aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/arch/arm64/include/asm/cputype.h8
-rw-r--r--tools/arch/hexagon/include/uapi/asm/bitsperlong.h27
-rw-r--r--tools/arch/loongarch/include/uapi/asm/bitsperlong.h9
-rw-r--r--tools/arch/microblaze/include/uapi/asm/bitsperlong.h2
-rw-r--r--tools/arch/x86/include/asm/cpufeatures.h2
-rw-r--r--tools/arch/x86/include/asm/msr-index.h1
-rw-r--r--tools/arch/x86/include/asm/nops.h16
-rw-r--r--tools/arch/x86/kcpuid/.gitignore1
-rw-r--r--tools/arch/x86/kcpuid/kcpuid.c7
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-map.rst8
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-prog.rst11
-rw-r--r--tools/bpf/bpftool/bash-completion/bpftool7
-rw-r--r--tools/bpf/bpftool/common.c9
-rw-r--r--tools/bpf/bpftool/feature.c24
-rw-r--r--tools/bpf/bpftool/iter.c2
-rw-r--r--tools/bpf/bpftool/link.c16
-rw-r--r--tools/bpf/bpftool/main.h2
-rw-r--r--tools/bpf/bpftool/map.c19
-rw-r--r--tools/bpf/bpftool/prog.c53
-rw-r--r--tools/bpf/bpftool/skeleton/pid_iter.bpf.c26
-rw-r--r--tools/bpf/bpftool/skeleton/profiler.bpf.c27
-rw-r--r--tools/bpf/bpftool/struct_ops.c2
-rw-r--r--tools/bpf/resolve_btfids/Makefile4
-rw-r--r--tools/build/Makefile.build10
-rw-r--r--tools/build/feature/Makefile2
-rw-r--r--tools/counter/.gitignore2
-rw-r--r--tools/counter/Makefile2
-rwxr-xr-xtools/hv/vmbus_testing4
-rw-r--r--tools/iio/iio_generic_buffer.c17
-rw-r--r--tools/include/nolibc/Makefile19
-rw-r--r--tools/include/nolibc/arch-aarch64.h39
-rw-r--r--tools/include/nolibc/arch-arm.h74
-rw-r--r--tools/include/nolibc/arch-i386.h48
-rw-r--r--tools/include/nolibc/arch-loongarch.h49
-rw-r--r--tools/include/nolibc/arch-mips.h64
-rw-r--r--tools/include/nolibc/arch-riscv.h51
-rw-r--r--tools/include/nolibc/arch-s390.h15
-rw-r--r--tools/include/nolibc/arch-x86_64.h42
-rw-r--r--tools/include/nolibc/arch.h2
-rw-r--r--tools/include/nolibc/compiler.h25
-rw-r--r--tools/include/nolibc/nolibc.h2
-rw-r--r--tools/include/nolibc/stackprotector.h19
-rw-r--r--tools/include/nolibc/stdint.h24
-rw-r--r--tools/include/nolibc/stdio.h95
-rw-r--r--tools/include/nolibc/stdlib.h18
-rw-r--r--tools/include/nolibc/string.h4
-rw-r--r--tools/include/nolibc/sys.h131
-rw-r--r--tools/include/nolibc/types.h14
-rw-r--r--tools/include/nolibc/unistd.h15
-rw-r--r--tools/include/uapi/asm-generic/bitsperlong.h14
-rw-r--r--tools/include/uapi/asm-generic/socket.h3
-rw-r--r--tools/include/uapi/asm-generic/unistd.h134
-rw-r--r--tools/include/uapi/asm/bitsperlong.h6
-rw-r--r--tools/include/uapi/drm/i915_drm.h95
-rw-r--r--tools/include/uapi/linux/bpf.h31
-rw-r--r--tools/include/uapi/linux/fcntl.h5
-rw-r--r--tools/include/uapi/linux/kvm.h6
-rw-r--r--tools/include/uapi/linux/mman.h14
-rw-r--r--tools/include/uapi/linux/mount.h3
-rw-r--r--tools/include/uapi/linux/prctl.h11
-rw-r--r--tools/include/uapi/linux/vhost.h31
-rw-r--r--tools/include/uapi/sound/asound.h81
-rw-r--r--tools/lib/api/fs/cgroup.c17
-rw-r--r--tools/lib/api/fs/fs.c226
-rw-r--r--tools/lib/api/fs/tracing_path.c17
-rw-r--r--tools/lib/api/io.h28
-rw-r--r--tools/lib/bpf/bpf.c17
-rw-r--r--tools/lib/bpf/bpf.h18
-rw-r--r--tools/lib/bpf/bpf_helpers.h15
-rw-r--r--tools/lib/bpf/bpf_tracing.h3
-rw-r--r--tools/lib/bpf/btf.c2
-rw-r--r--tools/lib/bpf/btf_dump.c22
-rw-r--r--tools/lib/bpf/gen_loader.c14
-rw-r--r--tools/lib/bpf/libbpf.c201
-rw-r--r--tools/lib/bpf/libbpf.h18
-rw-r--r--tools/lib/bpf/libbpf.map5
-rw-r--r--tools/lib/bpf/libbpf_probes.c2
-rw-r--r--tools/lib/bpf/libbpf_version.h2
-rw-r--r--tools/lib/bpf/usdt.c10
-rw-r--r--tools/lib/perf/cpumap.c125
-rw-r--r--tools/lib/perf/evlist.c25
-rw-r--r--tools/lib/perf/include/internal/evsel.h15
-rw-r--r--tools/lib/perf/include/perf/cpumap.h19
-rw-r--r--tools/lib/perf/include/perf/event.h3
-rw-r--r--tools/lib/subcmd/exec-cmd.c35
-rw-r--r--tools/lib/subcmd/help.c22
-rw-r--r--tools/lib/subcmd/parse-options.h8
-rw-r--r--tools/lib/subcmd/subcmd-util.h5
-rw-r--r--tools/mm/Makefile4
-rw-r--r--tools/net/ynl/Makefile19
-rw-r--r--tools/net/ynl/Makefile.deps20
-rw-r--r--tools/net/ynl/generated/Makefile50
-rw-r--r--tools/net/ynl/generated/devlink-user.c721
-rw-r--r--tools/net/ynl/generated/devlink-user.h210
-rw-r--r--tools/net/ynl/generated/ethtool-user.c6353
-rw-r--r--tools/net/ynl/generated/ethtool-user.h5531
-rw-r--r--tools/net/ynl/generated/fou-user.c328
-rw-r--r--tools/net/ynl/generated/fou-user.h337
-rw-r--r--tools/net/ynl/generated/handshake-user.c331
-rw-r--r--tools/net/ynl/generated/handshake-user.h145
-rw-r--r--tools/net/ynl/generated/netdev-user.c200
-rw-r--r--tools/net/ynl/generated/netdev-user.h85
-rw-r--r--tools/net/ynl/lib/Makefile28
-rw-r--r--tools/net/ynl/lib/nlspec.py46
-rw-r--r--tools/net/ynl/lib/ynl.c901
-rw-r--r--tools/net/ynl/lib/ynl.h237
-rw-r--r--tools/net/ynl/lib/ynl.py151
-rw-r--r--tools/net/ynl/samples/.gitignore3
-rw-r--r--tools/net/ynl/samples/Makefile30
-rw-r--r--tools/net/ynl/samples/devlink.c60
-rw-r--r--tools/net/ynl/samples/ethtool.c65
-rw-r--r--tools/net/ynl/samples/netdev.c108
-rwxr-xr-xtools/net/ynl/ynl-gen-c.py745
-rwxr-xr-xtools/net/ynl/ynl-regen.sh6
-rw-r--r--tools/objtool/Documentation/objtool.txt10
-rw-r--r--tools/objtool/arch/powerpc/include/arch/elf.h11
-rw-r--r--tools/objtool/arch/x86/decode.c12
-rw-r--r--tools/objtool/arch/x86/include/arch/elf.h11
-rw-r--r--tools/objtool/arch/x86/special.c14
-rw-r--r--tools/objtool/builtin-check.c5
-rw-r--r--tools/objtool/check.c688
-rw-r--r--tools/objtool/elf.c523
-rw-r--r--tools/objtool/include/objtool/arch.h1
-rw-r--r--tools/objtool/include/objtool/builtin.h1
-rw-r--r--tools/objtool/include/objtool/cfi.h1
-rw-r--r--tools/objtool/include/objtool/elf.h310
-rw-r--r--tools/objtool/include/objtool/warn.h21
-rw-r--r--tools/objtool/noreturns.h45
-rw-r--r--tools/objtool/orc_gen.c8
-rw-r--r--tools/objtool/special.c4
-rw-r--r--tools/perf/Documentation/Makefile15
-rw-r--r--tools/perf/Documentation/perf-config.txt2
-rw-r--r--tools/perf/Documentation/perf-dlfilter.txt22
-rw-r--r--tools/perf/Documentation/perf-lock.txt8
-rw-r--r--tools/perf/Documentation/perf-script.txt2
-rw-r--r--tools/perf/Documentation/perf-stat.txt31
-rw-r--r--tools/perf/Makefile.config26
-rw-r--r--tools/perf/Makefile.perf51
-rw-r--r--tools/perf/arch/arm/tests/dwarf-unwind.c2
-rw-r--r--tools/perf/arch/arm/util/auxtrace.c7
-rw-r--r--tools/perf/arch/arm/util/cs-etm.c4
-rwxr-xr-xtools/perf/arch/arm64/entry/syscalls/mksyscalltbl17
-rw-r--r--tools/perf/arch/arm64/tests/dwarf-unwind.c2
-rw-r--r--tools/perf/arch/arm64/util/pmu.c13
-rw-r--r--tools/perf/arch/common.c18
-rw-r--r--tools/perf/arch/loongarch/annotate/instructions.c116
-rwxr-xr-xtools/perf/arch/loongarch/entry/syscalls/mksyscalltbl40
-rw-r--r--tools/perf/arch/mips/entry/syscalls/mksyscalltbl2
-rw-r--r--tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl1
-rwxr-xr-xtools/perf/arch/powerpc/entry/syscalls/mksyscalltbl2
-rw-r--r--tools/perf/arch/powerpc/entry/syscalls/syscall.tbl1
-rw-r--r--tools/perf/arch/powerpc/tests/dwarf-unwind.c2
-rw-r--r--tools/perf/arch/powerpc/util/kvm-stat.c4
-rw-r--r--tools/perf/arch/powerpc/util/skip-callchain-idx.c4
-rw-r--r--tools/perf/arch/s390/annotate/instructions.c3
-rwxr-xr-xtools/perf/arch/s390/entry/syscalls/mksyscalltbl2
-rw-r--r--tools/perf/arch/s390/entry/syscalls/syscall.tbl1
-rw-r--r--tools/perf/arch/x86/annotate/instructions.c50
-rw-r--r--tools/perf/arch/x86/entry/syscalls/syscall_64.tbl1
-rwxr-xr-xtools/perf/arch/x86/entry/syscalls/syscalltbl.sh2
-rw-r--r--tools/perf/arch/x86/include/arch-tests.h4
-rw-r--r--tools/perf/arch/x86/tests/Build7
-rw-r--r--tools/perf/arch/x86/tests/amd-ibs-via-core-pmu.c68
-rw-r--r--tools/perf/arch/x86/tests/arch-tests.c16
-rw-r--r--tools/perf/arch/x86/tests/dwarf-unwind.c2
-rw-r--r--tools/perf/arch/x86/tests/hybrid.c288
-rw-r--r--tools/perf/arch/x86/tests/insn-x86.c10
-rw-r--r--tools/perf/arch/x86/tests/intel-pt-test.c14
-rw-r--r--tools/perf/arch/x86/util/Build1
-rw-r--r--tools/perf/arch/x86/util/auxtrace.c5
-rw-r--r--tools/perf/arch/x86/util/env.c19
-rw-r--r--tools/perf/arch/x86/util/env.h7
-rw-r--r--tools/perf/arch/x86/util/evlist.c29
-rw-r--r--tools/perf/arch/x86/util/evsel.c63
-rw-r--r--tools/perf/arch/x86/util/intel-bts.c4
-rw-r--r--tools/perf/arch/x86/util/intel-pt.c4
-rw-r--r--tools/perf/arch/x86/util/mem-events.c36
-rw-r--r--tools/perf/arch/x86/util/perf_regs.c15
-rw-r--r--tools/perf/arch/x86/util/pmu.c12
-rw-r--r--tools/perf/arch/x86/util/topdown.c5
-rw-r--r--tools/perf/bench/epoll-ctl.c5
-rw-r--r--tools/perf/bench/epoll-wait.c5
-rw-r--r--tools/perf/bench/futex-lock-pi.c12
-rw-r--r--tools/perf/bench/futex-requeue.c12
-rw-r--r--tools/perf/bench/futex-wake-parallel.c19
-rw-r--r--tools/perf/bench/futex-wake.c12
-rw-r--r--tools/perf/bench/pmu-scan.c60
-rw-r--r--tools/perf/bench/sched-messaging.c18
-rw-r--r--tools/perf/builtin-annotate.c32
-rw-r--r--tools/perf/builtin-bench.c2
-rw-r--r--tools/perf/builtin-c2c.c31
-rw-r--r--tools/perf/builtin-config.c4
-rw-r--r--tools/perf/builtin-daemon.c46
-rw-r--r--tools/perf/builtin-diff.c24
-rw-r--r--tools/perf/builtin-ftrace.c2
-rw-r--r--tools/perf/builtin-help.c4
-rw-r--r--tools/perf/builtin-inject.c35
-rw-r--r--tools/perf/builtin-kallsyms.c1
-rw-r--r--tools/perf/builtin-kmem.c26
-rw-r--r--tools/perf/builtin-kwork.c27
-rw-r--r--tools/perf/builtin-list.c48
-rw-r--r--tools/perf/builtin-lock.c413
-rw-r--r--tools/perf/builtin-mem.c13
-rw-r--r--tools/perf/builtin-probe.c133
-rw-r--r--tools/perf/builtin-record.c42
-rw-r--r--tools/perf/builtin-report.c22
-rw-r--r--tools/perf/builtin-sched.c120
-rw-r--r--tools/perf/builtin-script.c240
-rw-r--r--tools/perf/builtin-stat.c343
-rw-r--r--tools/perf/builtin-timechart.c59
-rw-r--r--tools/perf/builtin-top.c49
-rw-r--r--tools/perf/builtin-trace.c105
-rwxr-xr-xtools/perf/check-headers.sh232
-rw-r--r--tools/perf/dlfilters/dlfilter-test-api-v2.c377
-rw-r--r--tools/perf/include/perf/perf_dlfilter.h11
-rw-r--r--tools/perf/pmu-events/Build6
-rw-r--r--tools/perf/pmu-events/arch/arm64/ampere/ampereone/branch.json17
-rw-r--r--tools/perf/pmu-events/arch/arm64/ampere/ampereone/bus.json32
-rw-r--r--tools/perf/pmu-events/arch/arm64/ampere/ampereone/cache.json101
-rw-r--r--tools/perf/pmu-events/arch/arm64/ampere/ampereone/core-imp-def.json698
-rw-r--r--tools/perf/pmu-events/arch/arm64/ampere/ampereone/exception.json44
-rw-r--r--tools/perf/pmu-events/arch/arm64/ampere/ampereone/instruction.json89
-rw-r--r--tools/perf/pmu-events/arch/arm64/ampere/ampereone/intrinsic.json14
-rw-r--r--tools/perf/pmu-events/arch/arm64/ampere/ampereone/memory.json44
-rw-r--r--tools/perf/pmu-events/arch/arm64/ampere/ampereone/pipeline.json23
-rw-r--r--tools/perf/pmu-events/arch/arm64/ampere/ampereone/spe.json14
-rw-r--r--tools/perf/pmu-events/arch/arm64/hisilicon/hip08/metrics.json12
-rw-r--r--tools/perf/pmu-events/arch/arm64/mapfile.csv1
-rw-r--r--tools/perf/pmu-events/arch/arm64/sbsa.json12
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power10/cache.json47
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power10/floating_point.json66
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power10/frontend.json188
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power10/marked.json194
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power10/memory.json91
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power10/metrics.json56
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power10/others.json209
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power10/pipeline.json269
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power10/pmc.json193
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power10/translation.json42
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json1418
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlake/cache.json9
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlake/memory.json6
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlake/metricgroups.json122
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json301
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlaken/metricgroups.json26
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen1/recommended.json3
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen2/recommended.json3
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen3/recommended.json3
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json580
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwell/floating-point.json15
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwell/metricgroups.json107
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json556
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellde/floating-point.json15
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellde/metricgroups.json107
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json796
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellx/floating-point.json15
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellx/metricgroups.json107
-rw-r--r--tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json1217
-rw-r--r--tools/perf/pmu-events/arch/x86/cascadelakex/floating-point.json31
-rw-r--r--tools/perf/pmu-events/arch/x86/cascadelakex/frontend.json43
-rw-r--r--tools/perf/pmu-events/arch/x86/cascadelakex/metricgroups.json114
-rw-r--r--tools/perf/pmu-events/arch/x86/cascadelakex/pipeline.json40
-rw-r--r--tools/perf/pmu-events/arch/x86/cascadelakex/uncore-interconnect.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/cascadelakex/uncore-memory.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/elkhartlake/cache.json7
-rw-r--r--tools/perf/pmu-events/arch/x86/elkhartlake/memory.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/elkhartlake/other.json10
-rw-r--r--tools/perf/pmu-events/arch/x86/elkhartlake/pipeline.json3
-rw-r--r--tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json484
-rw-r--r--tools/perf/pmu-events/arch/x86/haswell/metricgroups.json107
-rw-r--r--tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json700
-rw-r--r--tools/perf/pmu-events/arch/x86/haswellx/metricgroups.json107
-rw-r--r--tools/perf/pmu-events/arch/x86/icelake/cache.json26
-rw-r--r--tools/perf/pmu-events/arch/x86/icelake/frontend.json32
-rw-r--r--tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json977
-rw-r--r--tools/perf/pmu-events/arch/x86/icelake/metricgroups.json113
-rw-r--r--tools/perf/pmu-events/arch/x86/icelake/pipeline.json6
-rw-r--r--tools/perf/pmu-events/arch/x86/icelakex/frontend.json32
-rw-r--r--tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json1317
-rw-r--r--tools/perf/pmu-events/arch/x86/icelakex/metricgroups.json114
-rw-r--r--tools/perf/pmu-events/arch/x86/icelakex/pipeline.json4
-rw-r--r--tools/perf/pmu-events/arch/x86/icelakex/uncore-interconnect.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json526
-rw-r--r--tools/perf/pmu-events/arch/x86/ivybridge/metricgroups.json107
-rw-r--r--tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json534
-rw-r--r--tools/perf/pmu-events/arch/x86/ivytown/metricgroups.json107
-rw-r--r--tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json224
-rw-r--r--tools/perf/pmu-events/arch/x86/jaketown/metricgroups.json100
-rw-r--r--tools/perf/pmu-events/arch/x86/mapfile.csv31
-rw-r--r--tools/perf/pmu-events/arch/x86/meteorlake/cache.json811
-rw-r--r--tools/perf/pmu-events/arch/x86/meteorlake/floating-point.json143
-rw-r--r--tools/perf/pmu-events/arch/x86/meteorlake/frontend.json410
-rw-r--r--tools/perf/pmu-events/arch/x86/meteorlake/memory.json142
-rw-r--r--tools/perf/pmu-events/arch/x86/meteorlake/other.json57
-rw-r--r--tools/perf/pmu-events/arch/x86/meteorlake/pipeline.json1121
-rw-r--r--tools/perf/pmu-events/arch/x86/meteorlake/uncore-cache.json18
-rw-r--r--tools/perf/pmu-events/arch/x86/meteorlake/uncore-interconnect.json42
-rw-r--r--tools/perf/pmu-events/arch/x86/meteorlake/uncore-memory.json126
-rw-r--r--tools/perf/pmu-events/arch/x86/meteorlake/virtual-memory.json257
-rw-r--r--tools/perf/pmu-events/arch/x86/rocketlake/cache.json894
-rw-r--r--tools/perf/pmu-events/arch/x86/rocketlake/floating-point.json105
-rw-r--r--tools/perf/pmu-events/arch/x86/rocketlake/frontend.json377
-rw-r--r--tools/perf/pmu-events/arch/x86/rocketlake/memory.json394
-rw-r--r--tools/perf/pmu-events/arch/x86/rocketlake/metricgroups.json113
-rw-r--r--tools/perf/pmu-events/arch/x86/rocketlake/other.json242
-rw-r--r--tools/perf/pmu-events/arch/x86/rocketlake/pipeline.json801
-rw-r--r--tools/perf/pmu-events/arch/x86/rocketlake/rkl-metrics.json1571
-rw-r--r--tools/perf/pmu-events/arch/x86/rocketlake/uncore-interconnect.json74
-rw-r--r--tools/perf/pmu-events/arch/x86/rocketlake/uncore-other.json9
-rw-r--r--tools/perf/pmu-events/arch/x86/rocketlake/virtual-memory.json165
-rw-r--r--tools/perf/pmu-events/arch/x86/sandybridge/metricgroups.json100
-rw-r--r--tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json222
-rw-r--r--tools/perf/pmu-events/arch/x86/sapphirerapids/memory.json6
-rw-r--r--tools/perf/pmu-events/arch/x86/sapphirerapids/metricgroups.json118
-rw-r--r--tools/perf/pmu-events/arch/x86/sapphirerapids/pipeline.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json1422
-rw-r--r--tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-cache.json308
-rw-r--r--tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-interconnect.json4
-rw-r--r--tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-memory.json8
-rw-r--r--tools/perf/pmu-events/arch/x86/skylake/floating-point.json8
-rw-r--r--tools/perf/pmu-events/arch/x86/skylake/frontend.json43
-rw-r--r--tools/perf/pmu-events/arch/x86/skylake/metricgroups.json113
-rw-r--r--tools/perf/pmu-events/arch/x86/skylake/pipeline.json32
-rw-r--r--tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json883
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/floating-point.json31
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/frontend.json43
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/metricgroups.json114
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/pipeline.json40
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json1185
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/uncore-interconnect.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/uncore-memory.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/snowridgex/cache.json7
-rw-r--r--tools/perf/pmu-events/arch/x86/snowridgex/memory.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/snowridgex/other.json10
-rw-r--r--tools/perf/pmu-events/arch/x86/snowridgex/pipeline.json3
-rw-r--r--tools/perf/pmu-events/arch/x86/snowridgex/uncore-interconnect.json14
-rw-r--r--tools/perf/pmu-events/arch/x86/snowridgex/uncore-io.json8
-rw-r--r--tools/perf/pmu-events/arch/x86/snowridgex/uncore-memory.json7
-rw-r--r--tools/perf/pmu-events/arch/x86/snowridgex/uncore-power.json6
-rw-r--r--tools/perf/pmu-events/arch/x86/tigerlake/cache.json18
-rw-r--r--tools/perf/pmu-events/arch/x86/tigerlake/frontend.json32
-rw-r--r--tools/perf/pmu-events/arch/x86/tigerlake/metricgroups.json113
-rw-r--r--tools/perf/pmu-events/arch/x86/tigerlake/pipeline.json7
-rw-r--r--tools/perf/pmu-events/arch/x86/tigerlake/tgl-metrics.json995
-rw-r--r--tools/perf/pmu-events/empty-pmu-events.c5
-rwxr-xr-xtools/perf/pmu-events/jevents.py62
-rw-r--r--tools/perf/pmu-events/metric.py36
-rwxr-xr-xtools/perf/pmu-events/metric_test.py6
-rw-r--r--tools/perf/pmu-events/pmu-events.h4
-rw-r--r--tools/perf/scripts/python/Perf-Trace-Util/Context.c4
-rwxr-xr-xtools/perf/scripts/python/arm-cs-trace-disasm.py1
-rw-r--r--tools/perf/tests/attr.c11
-rw-r--r--tools/perf/tests/builtin-test.c5
-rw-r--r--tools/perf/tests/code-reading.c6
-rw-r--r--tools/perf/tests/cpumap.c92
-rw-r--r--tools/perf/tests/dlfilter-test.c38
-rw-r--r--tools/perf/tests/dwarf-unwind.c1
-rw-r--r--tools/perf/tests/event_groups.c7
-rw-r--r--tools/perf/tests/evsel-roundtrip-name.c119
-rw-r--r--tools/perf/tests/expr.c44
-rw-r--r--tools/perf/tests/hists_common.c2
-rw-r--r--tools/perf/tests/hists_cumulate.c18
-rw-r--r--tools/perf/tests/hists_filter.c11
-rw-r--r--tools/perf/tests/hists_link.c20
-rw-r--r--tools/perf/tests/hists_output.c12
-rw-r--r--tools/perf/tests/make5
-rw-r--r--tools/perf/tests/maps.c2
-rw-r--r--tools/perf/tests/mmap-thread-lookup.c5
-rw-r--r--tools/perf/tests/parse-events.c2138
-rw-r--r--tools/perf/tests/parse-metric.c9
-rw-r--r--tools/perf/tests/pe-file-parsing.c3
-rwxr-xr-xtools/perf/tests/perf-targz-src-pkg5
-rw-r--r--tools/perf/tests/pmu-events.c18
-rw-r--r--tools/perf/tests/pmu.c17
-rw-r--r--tools/perf/tests/python-use.c2
-rwxr-xr-xtools/perf/tests/shell/buildid.sh12
-rwxr-xr-xtools/perf/tests/shell/daemon.sh113
-rw-r--r--tools/perf/tests/shell/lib/perf_json_output_lint.py13
-rw-r--r--tools/perf/tests/shell/lib/perf_metric_validation.py574
-rw-r--r--tools/perf/tests/shell/lib/perf_metric_validation_rules.json398
-rw-r--r--tools/perf/tests/shell/lib/stat_output.sh169
-rwxr-xr-xtools/perf/tests/shell/lock_contention.sh106
-rwxr-xr-xtools/perf/tests/shell/record+probe_libc_inet_pton.sh10
-rwxr-xr-xtools/perf/tests/shell/record+script_probe_vfs_getname.sh4
-rwxr-xr-xtools/perf/tests/shell/stat+csv_output.sh178
-rwxr-xr-xtools/perf/tests/shell/stat+json_output.sh15
-rwxr-xr-xtools/perf/tests/shell/stat+shadow_stat.sh4
-rwxr-xr-xtools/perf/tests/shell/stat+std_output.sh108
-rwxr-xr-xtools/perf/tests/shell/stat.sh44
-rwxr-xr-xtools/perf/tests/shell/stat_all_metrics.sh6
-rwxr-xr-xtools/perf/tests/shell/stat_all_pfm.sh51
-rwxr-xr-xtools/perf/tests/shell/stat_bpf_counters.sh4
-rwxr-xr-xtools/perf/tests/shell/stat_bpf_counters_cgrp.sh28
-rwxr-xr-xtools/perf/tests/shell/stat_metrics_values.sh30
-rwxr-xr-xtools/perf/tests/shell/test_arm_callgraph_fp.sh11
-rwxr-xr-xtools/perf/tests/shell/test_arm_coresight.sh6
-rwxr-xr-xtools/perf/tests/shell/test_arm_spe.sh2
-rwxr-xr-xtools/perf/tests/shell/test_brstack.sh12
-rwxr-xr-xtools/perf/tests/shell/test_perf_data_converter_json.sh72
-rwxr-xr-xtools/perf/tests/shell/test_task_analyzer.sh104
-rwxr-xr-xtools/perf/tests/shell/test_uprobe_from_different_cu.sh83
-rw-r--r--tools/perf/tests/switch-tracking.c14
-rw-r--r--tools/perf/tests/symbols.c1
-rw-r--r--tools/perf/tests/task-exit.c4
-rw-r--r--tools/perf/tests/tests.h4
-rw-r--r--tools/perf/tests/thread-maps-share.c13
-rw-r--r--tools/perf/tests/topology.c16
-rw-r--r--tools/perf/trace/beauty/beauty.h2
-rw-r--r--tools/perf/trace/beauty/include/linux/socket.h6
-rwxr-xr-xtools/perf/trace/beauty/move_mount_flags.sh2
-rw-r--r--tools/perf/trace/beauty/msg_flags.c14
-rw-r--r--tools/perf/trace/beauty/pid.c4
-rwxr-xr-xtools/perf/trace/beauty/tracepoints/x86_msr.sh6
-rw-r--r--tools/perf/ui/browsers/annotate.c14
-rw-r--r--tools/perf/ui/browsers/hists.c79
-rw-r--r--tools/perf/ui/hist.c5
-rw-r--r--tools/perf/ui/stdio/hist.c2
-rw-r--r--tools/perf/util/Build5
-rw-r--r--tools/perf/util/addr_location.c44
-rw-r--r--tools/perf/util/addr_location.h31
-rw-r--r--tools/perf/util/annotate.c131
-rw-r--r--tools/perf/util/annotate.h11
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-decoder.c2
-rw-r--r--tools/perf/util/arm-spe.c4
-rw-r--r--tools/perf/util/block-info.c8
-rw-r--r--tools/perf/util/bpf-filter.c14
-rw-r--r--tools/perf/util/bpf_skel/.gitignore1
-rw-r--r--tools/perf/util/bpf_skel/lock_contention.bpf.c2
-rw-r--r--tools/perf/util/bpf_skel/vmlinux/vmlinux.h (renamed from tools/perf/util/bpf_skel/vmlinux.h)10
-rw-r--r--tools/perf/util/build-id.c2
-rw-r--r--tools/perf/util/cache.h2
-rw-r--r--tools/perf/util/callchain.c73
-rw-r--r--tools/perf/util/callchain.h8
-rw-r--r--tools/perf/util/config.c10
-rw-r--r--tools/perf/util/cpumap.c14
-rw-r--r--tools/perf/util/cpumap.h11
-rw-r--r--tools/perf/util/cputopo.c12
-rw-r--r--tools/perf/util/cs-etm-decoder/cs-etm-decoder.c33
-rw-r--r--tools/perf/util/cs-etm-decoder/cs-etm-decoder.h4
-rw-r--r--tools/perf/util/cs-etm.c300
-rw-r--r--tools/perf/util/cs-etm.h13
-rw-r--r--tools/perf/util/data-convert-json.c16
-rw-r--r--tools/perf/util/db-export.c30
-rw-r--r--tools/perf/util/dlfilter.c47
-rw-r--r--tools/perf/util/dso.c40
-rw-r--r--tools/perf/util/dso.h11
-rw-r--r--tools/perf/util/dwarf-aux.c14
-rw-r--r--tools/perf/util/env.c5
-rw-r--r--tools/perf/util/event.c49
-rw-r--r--tools/perf/util/evlist-hybrid.c162
-rw-r--r--tools/perf/util/evlist-hybrid.h15
-rw-r--r--tools/perf/util/evlist.c66
-rw-r--r--tools/perf/util/evlist.h10
-rw-r--r--tools/perf/util/evsel.c122
-rw-r--r--tools/perf/util/evsel.h48
-rw-r--r--tools/perf/util/evsel_fprintf.c28
-rw-r--r--tools/perf/util/evsel_fprintf.h1
-rw-r--r--tools/perf/util/expr.c25
-rw-r--r--tools/perf/util/expr.h1
-rw-r--r--tools/perf/util/expr.l1
-rw-r--r--tools/perf/util/expr.y94
-rw-r--r--tools/perf/util/genelf_debug.c6
-rw-r--r--tools/perf/util/header.c166
-rw-r--r--tools/perf/util/header.h4
-rw-r--r--tools/perf/util/help-unknown-cmd.c3
-rw-r--r--tools/perf/util/hist.c76
-rw-r--r--tools/perf/util/intel-bts.c2
-rw-r--r--tools/perf/util/intel-pt.c88
-rw-r--r--tools/perf/util/jitdump.c12
-rw-r--r--tools/perf/util/llvm-utils.c4
-rw-r--r--tools/perf/util/machine.c298
-rw-r--r--tools/perf/util/map.c52
-rw-r--r--tools/perf/util/map.h13
-rw-r--r--tools/perf/util/maps.c5
-rw-r--r--tools/perf/util/maps.h9
-rw-r--r--tools/perf/util/mem-events.c30
-rw-r--r--tools/perf/util/metricgroup.c142
-rw-r--r--tools/perf/util/metricgroup.h6
-rw-r--r--tools/perf/util/parse-events-hybrid.c214
-rw-r--r--tools/perf/util/parse-events-hybrid.h25
-rw-r--r--tools/perf/util/parse-events.c906
-rw-r--r--tools/perf/util/parse-events.h69
-rw-r--r--tools/perf/util/parse-events.l131
-rw-r--r--tools/perf/util/parse-events.y487
-rw-r--r--tools/perf/util/path.c35
-rw-r--r--tools/perf/util/perf_event_attr_fprintf.c186
-rw-r--r--tools/perf/util/pfm.c63
-rw-r--r--tools/perf/util/pmu-hybrid.c72
-rw-r--r--tools/perf/util/pmu-hybrid.h33
-rw-r--r--tools/perf/util/pmu.c585
-rw-r--r--tools/perf/util/pmu.h51
-rw-r--r--tools/perf/util/pmus.c573
-rw-r--r--tools/perf/util/pmus.h20
-rw-r--r--tools/perf/util/print-events.c130
-rw-r--r--tools/perf/util/print-events.h2
-rw-r--r--tools/perf/util/probe-event.c16
-rw-r--r--tools/perf/util/python-ext-sources1
-rw-r--r--tools/perf/util/python.c26
-rw-r--r--tools/perf/util/scripting-engines/trace-event-perl.c15
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c49
-rw-r--r--tools/perf/util/session.c8
-rw-r--r--tools/perf/util/setup.py4
-rw-r--r--tools/perf/util/sharded_mutex.c33
-rw-r--r--tools/perf/util/sharded_mutex.h29
-rw-r--r--tools/perf/util/sort.c14
-rw-r--r--tools/perf/util/srcline.c170
-rw-r--r--tools/perf/util/srcline.h6
-rw-r--r--tools/perf/util/stat-display.c149
-rw-r--r--tools/perf/util/stat-shadow.c131
-rw-r--r--tools/perf/util/stat.c22
-rw-r--r--tools/perf/util/stat.h18
-rw-r--r--tools/perf/util/symbol-elf.c12
-rw-r--r--tools/perf/util/symbol.c146
-rw-r--r--tools/perf/util/symbol.h29
-rw-r--r--tools/perf/util/symbol_conf.h1
-rw-r--r--tools/perf/util/symbol_fprintf.c10
-rw-r--r--tools/perf/util/synthetic-events.c1
-rw-r--r--tools/perf/util/syscalltbl.c14
-rw-r--r--tools/perf/util/target.h1
-rw-r--r--tools/perf/util/thread-stack.c29
-rw-r--r--tools/perf/util/thread.c222
-rw-r--r--tools/perf/util/thread.h210
-rw-r--r--tools/perf/util/trace-event-info.c12
-rw-r--r--tools/perf/util/trace-event.h6
-rw-r--r--tools/perf/util/unwind-libdw.c35
-rw-r--r--tools/perf/util/unwind-libunwind-local.c36
-rw-r--r--tools/perf/util/unwind-libunwind.c2
-rw-r--r--tools/perf/util/vdso.c2
-rw-r--r--tools/power/x86/intel-speed-select/isst-config.c13
-rw-r--r--tools/power/x86/intel-speed-select/isst-core-tpmi.c43
-rw-r--r--tools/spi/spidev_test.c107
-rw-r--r--tools/testing/cxl/Kbuild5
-rw-r--r--tools/testing/cxl/test/cxl.c40
-rw-r--r--tools/testing/cxl/test/mem.c367
-rw-r--r--tools/testing/cxl/test/mock.c59
-rw-r--r--tools/testing/cxl/test/mock.h3
-rw-r--r--tools/testing/kunit/configs/all_tests.config2
-rw-r--r--tools/testing/kunit/configs/arch_uml.config3
-rw-r--r--tools/testing/kunit/kunit_kernel.py6
-rw-r--r--tools/testing/kunit/mypy.ini6
-rwxr-xr-xtools/testing/kunit/run_checks.py2
-rw-r--r--tools/testing/memblock/tests/alloc_nid_api.c40
-rw-r--r--tools/testing/nvdimm/test/nfit.c6
-rw-r--r--tools/testing/nvdimm/test/nfit_test.h29
-rw-r--r--tools/testing/radix-tree/linux/init.h1
-rw-r--r--tools/testing/radix-tree/maple.c170
-rw-r--r--tools/testing/radix-tree/multiorder.c68
-rw-r--r--tools/testing/radix-tree/regression1.c2
-rw-r--r--tools/testing/selftests/Makefile23
-rw-r--r--tools/testing/selftests/alsa/.gitignore1
-rw-r--r--tools/testing/selftests/alsa/Makefile2
-rw-r--r--tools/testing/selftests/alsa/test-pcmtest-driver.c331
-rw-r--r--tools/testing/selftests/arm64/Makefile2
-rw-r--r--tools/testing/selftests/arm64/abi/hwcap.c22
-rw-r--r--tools/testing/selftests/arm64/abi/ptrace.c32
-rw-r--r--tools/testing/selftests/arm64/signal/.gitignore2
-rw-r--r--tools/testing/selftests/arm64/signal/test_signals_utils.c3
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/tpidr2_restore.c86
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/zt_regs.c1
-rw-r--r--tools/testing/selftests/bpf/DENYLIST.aarch6483
-rw-r--r--tools/testing/selftests/bpf/DENYLIST.s390x1
-rw-r--r--tools/testing/selftests/bpf/Makefile3
-rw-r--r--tools/testing/selftests/bpf/bench.c15
-rw-r--r--tools/testing/selftests/bpf/bench.h1
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c14
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c10
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_lookup.c10
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_bpf_loop.c10
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_count.c14
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_local_storage.c12
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_local_storage_create.c8
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c10
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_rename.c15
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_ringbufs.c2
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_strncmp.c11
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_trigger.c21
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_rename.sh2
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh26
-rw-r--r--tools/testing/selftests/bpf/bpf_kfuncs.h6
-rw-r--r--tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c182
-rw-r--r--tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h107
-rw-r--r--tools/testing/selftests/bpf/config4
-rw-r--r--tools/testing/selftests/bpf/network_helpers.c23
-rw-r--r--tools/testing/selftests/bpf/network_helpers.h1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/arg_parsing.c68
-rw-r--r--tools/testing/selftests/bpf/prog_tests/async_stack_depth.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c34
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_nf.c5
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_obj_pinning.c269
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf.c40
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_getset_retval.c20
-rw-r--r--tools/testing/selftests/bpf/prog_tests/check_mtu.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cpumask.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/dynptr.c8
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fib_lookup.c61
-rw-r--r--tools/testing/selftests/bpf/prog_tests/global_map_resize.c227
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kfunc_call.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/module_attach.c12
-rw-r--r--tools/testing/selftests/bpf/prog_tests/netcnt.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sock_destroy.c221
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h26
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_listen.c81
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockopt.c100
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c59
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockopt_multi.c108
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockopt_qos_to_cc.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/task_under_cgroup.c53
-rw-r--r--tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/verifier.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/vrf_socket_lookup.c312
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_bonding.c121
-rw-r--r--tools/testing/selftests/bpf/progs/async_stack_depth.c61
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_ksym.c4
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_misc.h4
-rw-r--r--tools/testing/selftests/bpf/progs/cb_refs.c4
-rw-r--r--tools/testing/selftests/bpf/progs/cgroup_getset_retval_getsockopt.c13
-rw-r--r--tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c17
-rw-r--r--tools/testing/selftests/bpf/progs/cpumask_common.h6
-rw-r--r--tools/testing/selftests/bpf/progs/cpumask_success.c64
-rw-r--r--tools/testing/selftests/bpf/progs/dynptr_fail.c308
-rw-r--r--tools/testing/selftests/bpf/progs/dynptr_success.c337
-rw-r--r--tools/testing/selftests/bpf/progs/iters.c26
-rw-r--r--tools/testing/selftests/bpf/progs/jit_probe_mem.c4
-rw-r--r--tools/testing/selftests/bpf/progs/kfunc_call_destructive.c3
-rw-r--r--tools/testing/selftests/bpf/progs/kfunc_call_fail.c9
-rw-r--r--tools/testing/selftests/bpf/progs/kfunc_call_race.c3
-rw-r--r--tools/testing/selftests/bpf/progs/kfunc_call_test.c17
-rw-r--r--tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c9
-rw-r--r--tools/testing/selftests/bpf/progs/local_kptr_stash.c5
-rw-r--r--tools/testing/selftests/bpf/progs/map_kptr.c5
-rw-r--r--tools/testing/selftests/bpf/progs/map_kptr_fail.c4
-rw-r--r--tools/testing/selftests/bpf/progs/refcounted_kptr.c2
-rw-r--r--tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c4
-rw-r--r--tools/testing/selftests/bpf/progs/sock_destroy_prog.c145
-rw-r--r--tools/testing/selftests/bpf/progs/sock_destroy_prog_fail.c22
-rw-r--r--tools/testing/selftests/bpf/progs/sockopt_inherit.c18
-rw-r--r--tools/testing/selftests/bpf/progs/sockopt_multi.c26
-rw-r--r--tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c10
-rw-r--r--tools/testing/selftests/bpf/progs/sockopt_sk.c25
-rw-r--r--tools/testing/selftests/bpf/progs/test_cls_redirect.h9
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func1.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_map_resize.c58
-rw-r--r--tools/testing/selftests/bpf/progs/test_sock_fields.c5
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_listen.c14
-rw-r--r--tools/testing/selftests/bpf/progs/test_task_under_cgroup.c51
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_dynptr.c1
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_scalar_ids.c659
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_subprog_precision.c536
-rw-r--r--tools/testing/selftests/bpf/progs/vrf_socket_lookup.c89
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_hw_metadata.c4
-rw-r--r--tools/testing/selftests/bpf/test_progs.c113
-rw-r--r--tools/testing/selftests/bpf/test_progs.h1
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c194
-rwxr-xr-xtools/testing/selftests/bpf/test_xsk.sh10
-rw-r--r--tools/testing/selftests/bpf/testing_helpers.c268
-rw-r--r--tools/testing/selftests/bpf/testing_helpers.h12
-rw-r--r--tools/testing/selftests/bpf/verifier/precise.c143
-rw-r--r--tools/testing/selftests/bpf/veristat.c9
-rw-r--r--tools/testing/selftests/bpf/xdp_hw_metadata.c47
-rw-r--r--tools/testing/selftests/bpf/xdp_metadata.h1
-rw-r--r--tools/testing/selftests/bpf/xsk.h5
-rw-r--r--tools/testing/selftests/bpf/xskxceiver.c771
-rw-r--r--tools/testing/selftests/bpf/xskxceiver.h31
-rw-r--r--tools/testing/selftests/cachestat/.gitignore2
-rw-r--r--tools/testing/selftests/cachestat/Makefile8
-rw-r--r--tools/testing/selftests/cachestat/test_cachestat.c319
-rw-r--r--tools/testing/selftests/cgroup/test_kmem.c8
-rw-r--r--tools/testing/selftests/cgroup/test_memcontrol.c9
-rw-r--r--tools/testing/selftests/clone3/clone3.c5
-rw-r--r--tools/testing/selftests/cpufreq/config8
-rw-r--r--tools/testing/selftests/damon/config7
-rw-r--r--tools/testing/selftests/drivers/net/bonding/Makefile4
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh4
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond-eth-type-change.sh1
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond_macvlan.sh99
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond_options.sh3
-rw-r--r--tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh158
-rw-r--r--tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh118
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/egress_vid_classification.sh5
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/extack.sh24
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1d.sh5
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh1
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/one_armed_router.sh3
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/q_in_q_veto.sh8
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh1
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh8
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh2
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh16
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/spectrum/q_in_vni_veto.sh1
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/vxlan.sh41
-rwxr-xr-xtools/testing/selftests/ftrace/ftracetest10
-rw-r--r--tools/testing/selftests/ftrace/test.d/00basic/snapshot1.tc31
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/add_remove_btfarg.tc58
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc26
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/add_remove_tprobe.tc27
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc111
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/tprobe_syntax_errors.tc82
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/fgraph-retval.tc44
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_insn_boundary.tc19
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_opt_types.tc34
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc16
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-dynstring.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc2
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait_timeout.c7
-rwxr-xr-xtools/testing/selftests/gpio/gpio-sim.sh4
-rw-r--r--tools/testing/selftests/hid/tests/test_wacom_generic.py84
-rwxr-xr-xtools/testing/selftests/hid/vmtest.sh1
-rw-r--r--tools/testing/selftests/kselftest/runner.sh14
-rw-r--r--tools/testing/selftests/kselftest_harness.h11
-rw-r--r--tools/testing/selftests/kvm/Makefile19
-rw-r--r--tools/testing/selftests/kvm/aarch64/get-reg-list.c53
-rw-r--r--tools/testing/selftests/kvm/demand_paging_test.c32
-rw-r--r--tools/testing/selftests/kvm/dirty_log_perf_test.c96
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util_base.h7
-rw-r--r--tools/testing/selftests/kvm/include/memstress.h8
-rw-r--r--tools/testing/selftests/kvm/kvm_binary_stats_test.c68
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c17
-rw-r--r--tools/testing/selftests/kvm/lib/memstress.c75
-rw-r--r--tools/testing/selftests/kvm/lib/userfaultfd_util.c4
-rw-r--r--tools/testing/selftests/kvm/s390x/cmma_test.c700
-rw-r--r--tools/testing/selftests/kvm/x86_64/cpuid_test.c21
-rw-r--r--tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c259
-rw-r--r--tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/set_sregs_test.c70
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c22
-rw-r--r--tools/testing/selftests/landlock/config9
-rw-r--r--tools/testing/selftests/landlock/config.um1
-rw-r--r--tools/testing/selftests/landlock/fs_test.c391
-rw-r--r--tools/testing/selftests/lib.mk44
-rw-r--r--tools/testing/selftests/media_tests/video_device_test.c111
-rw-r--r--tools/testing/selftests/memfd/memfd_test.c40
-rw-r--r--tools/testing/selftests/mincore/mincore_selftest.c4
-rw-r--r--tools/testing/selftests/mm/.gitignore3
-rw-r--r--tools/testing/selftests/mm/Makefile5
-rwxr-xr-x[-rw-r--r--]tools/testing/selftests/mm/charge_reserved_hugetlb.sh0
-rwxr-xr-x[-rw-r--r--]tools/testing/selftests/mm/check_config.sh0
-rw-r--r--tools/testing/selftests/mm/cow.c37
-rw-r--r--tools/testing/selftests/mm/gup_longterm.c459
-rw-r--r--tools/testing/selftests/mm/hmm-tests.c7
-rw-r--r--tools/testing/selftests/mm/hugepage-shm.c4
-rw-r--r--tools/testing/selftests/mm/hugepage-vmemmap.c4
-rw-r--r--tools/testing/selftests/mm/hugetlb-madvise.c8
-rwxr-xr-x[-rw-r--r--]tools/testing/selftests/mm/hugetlb_reparenting_test.sh0
-rw-r--r--tools/testing/selftests/mm/khugepaged.c11
-rw-r--r--tools/testing/selftests/mm/ksm_tests.c1
-rw-r--r--tools/testing/selftests/mm/madv_populate.c7
-rw-r--r--tools/testing/selftests/mm/map_fixed_noreplace.c4
-rw-r--r--tools/testing/selftests/mm/map_hugetlb.c12
-rw-r--r--tools/testing/selftests/mm/map_populate.c2
-rw-r--r--tools/testing/selftests/mm/migration.c5
-rw-r--r--tools/testing/selftests/mm/mkdirty.c2
-rw-r--r--tools/testing/selftests/mm/mlock-random-test.c1
-rw-r--r--tools/testing/selftests/mm/mlock2-tests.c1
-rw-r--r--tools/testing/selftests/mm/mlock2.h8
-rw-r--r--tools/testing/selftests/mm/mrelease_test.c10
-rw-r--r--tools/testing/selftests/mm/mremap_dontunmap.c4
-rw-r--r--tools/testing/selftests/mm/on-fault-limit.c4
-rw-r--r--tools/testing/selftests/mm/pkey-powerpc.h3
-rw-r--r--tools/testing/selftests/mm/pkey-x86.h20
-rw-r--r--tools/testing/selftests/mm/protection_keys.c13
-rwxr-xr-x[-rw-r--r--]tools/testing/selftests/mm/run_vmtests.sh10
-rwxr-xr-x[-rw-r--r--]tools/testing/selftests/mm/test_hmm.sh0
-rwxr-xr-x[-rw-r--r--]tools/testing/selftests/mm/test_vmalloc.sh0
-rw-r--r--tools/testing/selftests/mm/uffd-common.c59
-rw-r--r--tools/testing/selftests/mm/uffd-common.h5
-rw-r--r--tools/testing/selftests/mm/uffd-stress.c10
-rw-r--r--tools/testing/selftests/mm/uffd-unit-tests.c16
-rwxr-xr-x[-rw-r--r--]tools/testing/selftests/mm/va_high_addr_switch.sh0
-rw-r--r--tools/testing/selftests/mm/vm_util.c86
-rw-r--r--tools/testing/selftests/mm/vm_util.h5
-rwxr-xr-x[-rw-r--r--]tools/testing/selftests/mm/write_hugetlb_memory.sh0
-rw-r--r--tools/testing/selftests/net/.gitignore3
-rw-r--r--tools/testing/selftests/net/Makefile1
-rw-r--r--tools/testing/selftests/net/af_unix/Makefile3
-rw-r--r--tools/testing/selftests/net/af_unix/scm_pidfd.c430
-rw-r--r--tools/testing/selftests/net/bind_wildcard.c2
-rw-r--r--tools/testing/selftests/net/config1
-rwxr-xr-xtools/testing/selftests/net/fcnal-test.sh87
-rwxr-xr-xtools/testing/selftests/net/fib_nexthops.sh10
-rw-r--r--tools/testing/selftests/net/forwarding/Makefile2
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_mdb.sh59
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_mdb_max.sh19
-rwxr-xr-xtools/testing/selftests/net/forwarding/dual_vxlan_bridge.sh1
-rwxr-xr-xtools/testing/selftests/net/forwarding/ethtool.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/ethtool_extended_state.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/ethtool_mm.sh18
-rwxr-xr-xtools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/lib.sh17
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bound.sh1
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh3
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh3
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh3
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_changes.sh3
-rw-r--r--tools/testing/selftests/net/forwarding/mirror_topo_lib.sh1
-rwxr-xr-xtools/testing/selftests/net/forwarding/pedit_dsfield.sh4
-rwxr-xr-xtools/testing/selftests/net/forwarding/q_in_vni.sh1
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_bridge.sh3
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_bridge_vlan.sh24
-rw-r--r--tools/testing/selftests/net/forwarding/settings1
-rwxr-xr-xtools/testing/selftests/net/forwarding/skbedit_priority.sh4
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_actions.sh6
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_flower.sh8
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_flower_cfm.sh206
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh353
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_tunnel_key.sh9
-rw-r--r--tools/testing/selftests/net/mptcp/config1
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect.sh5
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh751
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_sockopt.c120
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_sockopt.sh29
-rw-r--r--tools/testing/selftests/net/mptcp/pm_nl_ctl.c10
-rwxr-xr-xtools/testing/selftests/net/mptcp/userspace_pm.sh4
-rw-r--r--tools/testing/selftests/net/nettest.c46
-rwxr-xr-xtools/testing/selftests/net/pmtu.sh35
-rwxr-xr-xtools/testing/selftests/net/rtnetlink.sh1
-rw-r--r--tools/testing/selftests/net/so_incoming_cpu.c2
-rwxr-xr-xtools/testing/selftests/net/test_vxlan_nolocalbypass.sh240
-rw-r--r--tools/testing/selftests/net/tls.c131
-rw-r--r--tools/testing/selftests/nolibc/.gitignore1
-rw-r--r--tools/testing/selftests/nolibc/Makefile35
-rw-r--r--tools/testing/selftests/nolibc/nolibc-test.c264
-rw-r--r--tools/testing/selftests/pidfd/pidfd.h1
-rw-r--r--tools/testing/selftests/pidfd/pidfd_fdinfo_test.c1
-rw-r--r--tools/testing/selftests/pidfd/pidfd_test.c3
-rw-r--r--tools/testing/selftests/powerpc/Makefile1
-rw-r--r--tools/testing/selftests/powerpc/dexcr/.gitignore2
-rw-r--r--tools/testing/selftests/powerpc/dexcr/Makefile9
-rw-r--r--tools/testing/selftests/powerpc/dexcr/dexcr.c132
-rw-r--r--tools/testing/selftests/powerpc/dexcr/dexcr.h49
-rw-r--r--tools/testing/selftests/powerpc/dexcr/hashchk_test.c227
-rw-r--r--tools/testing/selftests/powerpc/dexcr/lsdexcr.c141
-rw-r--r--tools/testing/selftests/powerpc/include/reg.h4
-rw-r--r--tools/testing/selftests/powerpc/include/utils.h31
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h2
-rw-r--r--tools/testing/selftests/powerpc/utils.c24
-rw-r--r--tools/testing/selftests/prctl/set-anon-vma-name-test.c2
-rw-r--r--tools/testing/selftests/ptp/testptp.c29
-rw-r--r--tools/testing/selftests/rcutorture/bin/functions.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm.sh2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/BUSTED-BOOST.boot2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot2
-rw-r--r--tools/testing/selftests/resctrl/Makefile2
-rw-r--r--tools/testing/selftests/resctrl/cache.c18
-rw-r--r--tools/testing/selftests/resctrl/fill_buf.c3
-rw-r--r--tools/testing/selftests/resctrl/resctrl.h1
-rw-r--r--tools/testing/selftests/riscv/Makefile4
-rw-r--r--tools/testing/selftests/riscv/hwprobe/.gitignore1
-rw-r--r--tools/testing/selftests/riscv/vector/.gitignore3
-rw-r--r--tools/testing/selftests/riscv/vector/Makefile19
-rw-r--r--tools/testing/selftests/riscv/vector/v_initval_nolibc.c68
-rw-r--r--tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c109
-rw-r--r--tools/testing/selftests/riscv/vector/vstate_prctl.c189
-rw-r--r--tools/testing/selftests/rseq/Makefile4
-rw-r--r--tools/testing/selftests/rseq/rseq.c30
-rwxr-xr-xtools/testing/selftests/run_kselftest.sh7
-rwxr-xr-xtools/testing/selftests/sysctl/sysctl.sh115
-rw-r--r--tools/testing/selftests/tc-testing/config2
-rw-r--r--tools/testing/selftests/tc-testing/settings1
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/infra/filter.json25
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json25
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/qfq.json86
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json25
-rw-r--r--tools/testing/selftests/timers/raw_skew.c3
-rw-r--r--tools/testing/selftests/tty/.gitignore2
-rw-r--r--tools/testing/selftests/tty/Makefile5
-rw-r--r--tools/testing/selftests/tty/tty_tstamp_update.c88
-rw-r--r--tools/testing/selftests/user_events/dyn_test.c12
-rw-r--r--tools/testing/selftests/user_events/ftrace_test.c8
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_clock_getres.c4
-rwxr-xr-xtools/testing/selftests/wireguard/netns.sh30
-rw-r--r--tools/testing/vsock/Makefile2
-rw-r--r--tools/tracing/rtla/src/osnoise.c65
-rw-r--r--tools/tracing/rtla/src/osnoise.h5
-rw-r--r--tools/tracing/rtla/src/osnoise_hist.c90
-rw-r--r--tools/tracing/rtla/src/osnoise_top.c83
-rw-r--r--tools/tracing/rtla/src/timerlat_aa.c35
-rw-r--r--tools/tracing/rtla/src/timerlat_aa.h5
-rw-r--r--tools/tracing/rtla/src/timerlat_hist.c262
-rw-r--r--tools/tracing/rtla/src/timerlat_top.c229
-rw-r--r--tools/tracing/rtla/src/timerlat_u.c224
-rw-r--r--tools/tracing/rtla/src/timerlat_u.h18
-rw-r--r--tools/tracing/rtla/src/utils.c324
-rw-r--r--tools/tracing/rtla/src/utils.h7
-rw-r--r--tools/usb/usbip/configure.ac4
-rw-r--r--tools/usb/usbip/src/usbip_attach.c2
-rw-r--r--tools/usb/usbip/src/usbip_detach.c2
-rw-r--r--tools/usb/usbip/src/usbip_port.c2
-rw-r--r--tools/virtio/Makefile13
-rw-r--r--tools/workqueue/wq_monitor.py168
890 files changed, 65585 insertions, 17412 deletions
diff --git a/tools/arch/arm64/include/asm/cputype.h b/tools/arch/arm64/include/asm/cputype.h
index 683ca3af4084..5f6f84837a49 100644
--- a/tools/arch/arm64/include/asm/cputype.h
+++ b/tools/arch/arm64/include/asm/cputype.h
@@ -126,6 +126,10 @@
#define APPLE_CPU_PART_M1_FIRESTORM_MAX 0x029
#define APPLE_CPU_PART_M2_BLIZZARD 0x032
#define APPLE_CPU_PART_M2_AVALANCHE 0x033
+#define APPLE_CPU_PART_M2_BLIZZARD_PRO 0x034
+#define APPLE_CPU_PART_M2_AVALANCHE_PRO 0x035
+#define APPLE_CPU_PART_M2_BLIZZARD_MAX 0x038
+#define APPLE_CPU_PART_M2_AVALANCHE_MAX 0x039
#define AMPERE_CPU_PART_AMPERE1 0xAC3
@@ -181,6 +185,10 @@
#define MIDR_APPLE_M1_FIRESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_MAX)
#define MIDR_APPLE_M2_BLIZZARD MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD)
#define MIDR_APPLE_M2_AVALANCHE MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE)
+#define MIDR_APPLE_M2_BLIZZARD_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD_PRO)
+#define MIDR_APPLE_M2_AVALANCHE_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE_PRO)
+#define MIDR_APPLE_M2_BLIZZARD_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD_MAX)
+#define MIDR_APPLE_M2_AVALANCHE_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE_MAX)
#define MIDR_AMPERE1 MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1)
/* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */
diff --git a/tools/arch/hexagon/include/uapi/asm/bitsperlong.h b/tools/arch/hexagon/include/uapi/asm/bitsperlong.h
deleted file mode 100644
index 5adca0d26913..000000000000
--- a/tools/arch/hexagon/include/uapi/asm/bitsperlong.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- */
-
-#ifndef __ASM_HEXAGON_BITSPERLONG_H
-#define __ASM_HEXAGON_BITSPERLONG_H
-
-#define __BITS_PER_LONG 32
-
-#include <asm-generic/bitsperlong.h>
-
-#endif
diff --git a/tools/arch/loongarch/include/uapi/asm/bitsperlong.h b/tools/arch/loongarch/include/uapi/asm/bitsperlong.h
deleted file mode 100644
index 00b4ba1e5cdf..000000000000
--- a/tools/arch/loongarch/include/uapi/asm/bitsperlong.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef __ASM_LOONGARCH_BITSPERLONG_H
-#define __ASM_LOONGARCH_BITSPERLONG_H
-
-#define __BITS_PER_LONG (__SIZEOF_LONG__ * 8)
-
-#include <asm-generic/bitsperlong.h>
-
-#endif /* __ASM_LOONGARCH_BITSPERLONG_H */
diff --git a/tools/arch/microblaze/include/uapi/asm/bitsperlong.h b/tools/arch/microblaze/include/uapi/asm/bitsperlong.h
deleted file mode 100644
index 76da34b10f59..000000000000
--- a/tools/arch/microblaze/include/uapi/asm/bitsperlong.h
+++ /dev/null
@@ -1,2 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#include <asm-generic/bitsperlong.h>
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index cb8ca46213be..1f6d904c6481 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -14,7 +14,7 @@
* Defines x86 CPU feature bits
*/
#define NCAPINTS 21 /* N 32-bit words worth of info */
-#define NBUGINTS 1 /* N 32-bit bug flags */
+#define NBUGINTS 2 /* N 32-bit bug flags */
/*
* Note: If the comment begins with a quoted string, that string is used
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index 3aedae61af4f..a00a53e15ab7 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -545,6 +545,7 @@
#define MSR_AMD64_DE_CFG 0xc0011029
#define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT 1
#define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE BIT_ULL(MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT)
+#define MSR_AMD64_DE_CFG_ZEN2_FP_BACKUP_FIX_BIT 9
#define MSR_AMD64_BU_CFG2 0xc001102a
#define MSR_AMD64_IBSFETCHCTL 0xc0011030
diff --git a/tools/arch/x86/include/asm/nops.h b/tools/arch/x86/include/asm/nops.h
index c5573eaa5bb9..1c1b7550fa55 100644
--- a/tools/arch/x86/include/asm/nops.h
+++ b/tools/arch/x86/include/asm/nops.h
@@ -34,6 +34,8 @@
#define BYTES_NOP7 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00
#define BYTES_NOP8 0x3e,BYTES_NOP7
+#define ASM_NOP_MAX 8
+
#else
/*
@@ -47,6 +49,9 @@
* 6: osp nopl 0x00(%eax,%eax,1)
* 7: nopl 0x00000000(%eax)
* 8: nopl 0x00000000(%eax,%eax,1)
+ * 9: cs nopl 0x00000000(%eax,%eax,1)
+ * 10: osp cs nopl 0x00000000(%eax,%eax,1)
+ * 11: osp osp cs nopl 0x00000000(%eax,%eax,1)
*/
#define BYTES_NOP1 0x90
#define BYTES_NOP2 0x66,BYTES_NOP1
@@ -56,6 +61,15 @@
#define BYTES_NOP6 0x66,BYTES_NOP5
#define BYTES_NOP7 0x0f,0x1f,0x80,0x00,0x00,0x00,0x00
#define BYTES_NOP8 0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00
+#define BYTES_NOP9 0x2e,BYTES_NOP8
+#define BYTES_NOP10 0x66,BYTES_NOP9
+#define BYTES_NOP11 0x66,BYTES_NOP10
+
+#define ASM_NOP9 _ASM_BYTES(BYTES_NOP9)
+#define ASM_NOP10 _ASM_BYTES(BYTES_NOP10)
+#define ASM_NOP11 _ASM_BYTES(BYTES_NOP11)
+
+#define ASM_NOP_MAX 11
#endif /* CONFIG_64BIT */
@@ -68,8 +82,6 @@
#define ASM_NOP7 _ASM_BYTES(BYTES_NOP7)
#define ASM_NOP8 _ASM_BYTES(BYTES_NOP8)
-#define ASM_NOP_MAX 8
-
#ifndef __ASSEMBLY__
extern const unsigned char * const x86_nops[];
#endif
diff --git a/tools/arch/x86/kcpuid/.gitignore b/tools/arch/x86/kcpuid/.gitignore
new file mode 100644
index 000000000000..1b8541bc8dd0
--- /dev/null
+++ b/tools/arch/x86/kcpuid/.gitignore
@@ -0,0 +1 @@
+kcpuid
diff --git a/tools/arch/x86/kcpuid/kcpuid.c b/tools/arch/x86/kcpuid/kcpuid.c
index 416f5b35dd8f..24b7d017ec2c 100644
--- a/tools/arch/x86/kcpuid/kcpuid.c
+++ b/tools/arch/x86/kcpuid/kcpuid.c
@@ -517,15 +517,16 @@ static void show_range(struct cpuid_range *range)
static inline struct cpuid_func *index_to_func(u32 index)
{
struct cpuid_range *range;
+ u32 func_idx;
range = (index & 0x80000000) ? leafs_ext : leafs_basic;
- index &= 0x7FFFFFFF;
+ func_idx = index & 0xffff;
- if (((index & 0xFFFF) + 1) > (u32)range->nr) {
+ if ((func_idx + 1) > (u32)range->nr) {
printf("ERR: invalid input index (0x%x)\n", index);
return NULL;
}
- return &range->funcs[index];
+ return &range->funcs[func_idx];
}
static void show_info(void)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
index 11250c4734fe..3b7ba037af95 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
@@ -28,7 +28,7 @@ MAP COMMANDS
| **bpftool** **map** { **show** | **list** } [*MAP*]
| **bpftool** **map create** *FILE* **type** *TYPE* **key** *KEY_SIZE* **value** *VALUE_SIZE* \
| **entries** *MAX_ENTRIES* **name** *NAME* [**flags** *FLAGS*] [**inner_map** *MAP*] \
-| [**dev** *NAME*]
+| [**offload_dev** *NAME*]
| **bpftool** **map dump** *MAP*
| **bpftool** **map update** *MAP* [**key** *DATA*] [**value** *VALUE*] [*UPDATE_FLAGS*]
| **bpftool** **map lookup** *MAP* [**key** *DATA*]
@@ -73,7 +73,7 @@ DESCRIPTION
maps. On such kernels bpftool will automatically emit this
information as well.
- **bpftool map create** *FILE* **type** *TYPE* **key** *KEY_SIZE* **value** *VALUE_SIZE* **entries** *MAX_ENTRIES* **name** *NAME* [**flags** *FLAGS*] [**inner_map** *MAP*] [**dev** *NAME*]
+ **bpftool map create** *FILE* **type** *TYPE* **key** *KEY_SIZE* **value** *VALUE_SIZE* **entries** *MAX_ENTRIES* **name** *NAME* [**flags** *FLAGS*] [**inner_map** *MAP*] [**offload_dev** *NAME*]
Create a new map with given parameters and pin it to *bpffs*
as *FILE*.
@@ -86,8 +86,8 @@ DESCRIPTION
kernel needs it to collect metadata related to the inner maps
that the new map will work with.
- Keyword **dev** expects a network interface name, and is used
- to request hardware offload for the map.
+ Keyword **offload_dev** expects a network interface name,
+ and is used to request hardware offload for the map.
**bpftool map dump** *MAP*
Dump all entries in a given *MAP*. In case of **name**,
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index 9443c524bb76..dcae81bd27ed 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -31,7 +31,7 @@ PROG COMMANDS
| **bpftool** **prog dump xlated** *PROG* [{ **file** *FILE* | [**opcodes**] [**linum**] [**visual**] }]
| **bpftool** **prog dump jited** *PROG* [{ **file** *FILE* | [**opcodes**] [**linum**] }]
| **bpftool** **prog pin** *PROG* *FILE*
-| **bpftool** **prog** { **load** | **loadall** } *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] [**pinmaps** *MAP_DIR*] [**autoattach**]
+| **bpftool** **prog** { **load** | **loadall** } *OBJ* *PATH* [**type** *TYPE*] [**map** { **idx** *IDX* | **name** *NAME* } *MAP*] [{ **offload_dev** | **xdpmeta_dev** } *NAME*] [**pinmaps** *MAP_DIR*] [**autoattach**]
| **bpftool** **prog attach** *PROG* *ATTACH_TYPE* [*MAP*]
| **bpftool** **prog detach** *PROG* *ATTACH_TYPE* [*MAP*]
| **bpftool** **prog tracelog**
@@ -129,7 +129,7 @@ DESCRIPTION
contain a dot character ('.'), which is reserved for future
extensions of *bpffs*.
- **bpftool prog { load | loadall }** *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] [**pinmaps** *MAP_DIR*] [**autoattach**]
+ **bpftool prog { load | loadall }** *OBJ* *PATH* [**type** *TYPE*] [**map** { **idx** *IDX* | **name** *NAME* } *MAP*] [{ **offload_dev** | **xdpmeta_dev** } *NAME*] [**pinmaps** *MAP_DIR*] [**autoattach**]
Load bpf program(s) from binary *OBJ* and pin as *PATH*.
**bpftool prog load** pins only the first program from the
*OBJ* as *PATH*. **bpftool prog loadall** pins all programs
@@ -143,8 +143,11 @@ DESCRIPTION
to be replaced in the ELF file counting from 0, while *NAME*
allows to replace a map by name. *MAP* specifies the map to
use, referring to it by **id** or through a **pinned** file.
- If **dev** *NAME* is specified program will be loaded onto
- given networking device (offload).
+ If **offload_dev** *NAME* is specified program will be loaded
+ onto given networking device (offload).
+ If **xdpmeta_dev** *NAME* is specified program will become
+ device-bound without offloading, this facilitates access
+ to XDP metadata.
Optional **pinmaps** argument can be provided to pin all
maps under *MAP_DIR* directory.
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index e7234d1a5306..085bf18f3659 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -278,7 +278,7 @@ _bpftool()
_bpftool_get_prog_tags
return 0
;;
- dev)
+ dev|offload_dev|xdpmeta_dev)
_sysfs_get_netdevs
return 0
;;
@@ -508,7 +508,8 @@ _bpftool()
;;
*)
COMPREPLY=( $( compgen -W "map" -- "$cur" ) )
- _bpftool_once_attr 'type dev pinmaps autoattach'
+ _bpftool_once_attr 'type pinmaps autoattach'
+ _bpftool_one_of_list 'offload_dev xdpmeta_dev'
return 0
;;
esac
@@ -733,7 +734,7 @@ _bpftool()
esac
;;
*)
- _bpftool_once_attr 'type key value entries name flags dev'
+ _bpftool_once_attr 'type key value entries name flags offload_dev'
if _bpftool_search_list 'array_of_maps' 'hash_of_maps'; then
_bpftool_once_attr 'inner_map'
fi
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index 1360c82ae732..cc6e6aae2447 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -68,7 +68,7 @@ void p_info(const char *fmt, ...)
va_end(ap);
}
-static bool is_bpffs(char *path)
+static bool is_bpffs(const char *path)
{
struct statfs st_fs;
@@ -244,13 +244,16 @@ int open_obj_pinned_any(const char *path, enum bpf_obj_type exp_type)
return fd;
}
-int mount_bpffs_for_pin(const char *name)
+int mount_bpffs_for_pin(const char *name, bool is_dir)
{
char err_str[ERR_MAX_LEN];
char *file;
char *dir;
int err = 0;
+ if (is_dir && is_bpffs(name))
+ return err;
+
file = malloc(strlen(name) + 1);
if (!file) {
p_err("mem alloc failed");
@@ -286,7 +289,7 @@ int do_pin_fd(int fd, const char *name)
{
int err;
- err = mount_bpffs_for_pin(name);
+ err = mount_bpffs_for_pin(name, false);
if (err)
return err;
diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c
index da16e6a27ccc..0675d6a46413 100644
--- a/tools/bpf/bpftool/feature.c
+++ b/tools/bpf/bpftool/feature.c
@@ -167,12 +167,12 @@ static int get_vendor_id(int ifindex)
return strtol(buf, NULL, 0);
}
-static int read_procfs(const char *path)
+static long read_procfs(const char *path)
{
char *endptr, *line = NULL;
size_t len = 0;
FILE *fd;
- int res;
+ long res;
fd = fopen(path, "r");
if (!fd)
@@ -194,7 +194,7 @@ static int read_procfs(const char *path)
static void probe_unprivileged_disabled(void)
{
- int res;
+ long res;
/* No support for C-style ouptut */
@@ -216,14 +216,14 @@ static void probe_unprivileged_disabled(void)
printf("Unable to retrieve required privileges for bpf() syscall\n");
break;
default:
- printf("bpf() syscall restriction has unknown value %d\n", res);
+ printf("bpf() syscall restriction has unknown value %ld\n", res);
}
}
}
static void probe_jit_enable(void)
{
- int res;
+ long res;
/* No support for C-style ouptut */
@@ -245,7 +245,7 @@ static void probe_jit_enable(void)
printf("Unable to retrieve JIT-compiler status\n");
break;
default:
- printf("JIT-compiler status has unknown value %d\n",
+ printf("JIT-compiler status has unknown value %ld\n",
res);
}
}
@@ -253,7 +253,7 @@ static void probe_jit_enable(void)
static void probe_jit_harden(void)
{
- int res;
+ long res;
/* No support for C-style ouptut */
@@ -275,7 +275,7 @@ static void probe_jit_harden(void)
printf("Unable to retrieve JIT hardening status\n");
break;
default:
- printf("JIT hardening status has unknown value %d\n",
+ printf("JIT hardening status has unknown value %ld\n",
res);
}
}
@@ -283,7 +283,7 @@ static void probe_jit_harden(void)
static void probe_jit_kallsyms(void)
{
- int res;
+ long res;
/* No support for C-style ouptut */
@@ -302,14 +302,14 @@ static void probe_jit_kallsyms(void)
printf("Unable to retrieve JIT kallsyms export status\n");
break;
default:
- printf("JIT kallsyms exports status has unknown value %d\n", res);
+ printf("JIT kallsyms exports status has unknown value %ld\n", res);
}
}
}
static void probe_jit_limit(void)
{
- int res;
+ long res;
/* No support for C-style ouptut */
@@ -322,7 +322,7 @@ static void probe_jit_limit(void)
printf("Unable to retrieve global memory limit for JIT compiler for unprivileged users\n");
break;
default:
- printf("Global memory limit for JIT compiler for unprivileged users is %d bytes\n", res);
+ printf("Global memory limit for JIT compiler for unprivileged users is %ld bytes\n", res);
}
}
}
diff --git a/tools/bpf/bpftool/iter.c b/tools/bpf/bpftool/iter.c
index 9a1d2365a297..6b0e5202ca7a 100644
--- a/tools/bpf/bpftool/iter.c
+++ b/tools/bpf/bpftool/iter.c
@@ -76,7 +76,7 @@ static int do_pin(int argc, char **argv)
goto close_obj;
}
- err = mount_bpffs_for_pin(path);
+ err = mount_bpffs_for_pin(path, false);
if (err)
goto close_link;
diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c
index d98dbc50cf4c..2d786072ed0d 100644
--- a/tools/bpf/bpftool/link.c
+++ b/tools/bpf/bpftool/link.c
@@ -195,6 +195,8 @@ static int show_link_close_json(int fd, struct bpf_link_info *info)
show_link_attach_type_json(info->tracing.attach_type,
json_wtr);
+ jsonw_uint_field(json_wtr, "target_obj_id", info->tracing.target_obj_id);
+ jsonw_uint_field(json_wtr, "target_btf_id", info->tracing.target_btf_id);
break;
case BPF_LINK_TYPE_CGROUP:
jsonw_lluint_field(json_wtr, "cgroup_id",
@@ -212,7 +214,10 @@ static int show_link_close_json(int fd, struct bpf_link_info *info)
case BPF_LINK_TYPE_NETFILTER:
netfilter_dump_json(info, json_wtr);
break;
-
+ case BPF_LINK_TYPE_STRUCT_OPS:
+ jsonw_uint_field(json_wtr, "map_id",
+ info->struct_ops.map_id);
+ break;
default:
break;
}
@@ -245,7 +250,10 @@ static void show_link_header_plain(struct bpf_link_info *info)
else
printf("type %u ", info->type);
- printf("prog %u ", info->prog_id);
+ if (info->type == BPF_LINK_TYPE_STRUCT_OPS)
+ printf("map %u ", info->struct_ops.map_id);
+ else
+ printf("prog %u ", info->prog_id);
}
static void show_link_attach_type_plain(__u32 attach_type)
@@ -369,6 +377,10 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info)
printf("\n\tprog_type %u ", prog_info.type);
show_link_attach_type_plain(info->tracing.attach_type);
+ if (info->tracing.target_obj_id || info->tracing.target_btf_id)
+ printf("\n\ttarget_obj_id %u target_btf_id %u ",
+ info->tracing.target_obj_id,
+ info->tracing.target_btf_id);
break;
case BPF_LINK_TYPE_CGROUP:
printf("\n\tcgroup_id %zu ", (size_t)info->cgroup.cgroup_id);
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index a49534d7eafa..b8bb08d10dec 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -142,7 +142,7 @@ const char *get_fd_type_name(enum bpf_obj_type type);
char *get_fdinfo(int fd, const char *key);
int open_obj_pinned(const char *path, bool quiet);
int open_obj_pinned_any(const char *path, enum bpf_obj_type exp_type);
-int mount_bpffs_for_pin(const char *name);
+int mount_bpffs_for_pin(const char *name, bool is_dir);
int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(int *, char ***));
int do_pin_fd(int fd, const char *name);
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index aaeb8939e137..f98f7bbea2b1 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -139,6 +139,9 @@ static void print_entry_json(struct bpf_map_info *info, unsigned char *key,
print_hex_data_json(key, info->key_size);
jsonw_name(json_wtr, "value");
print_hex_data_json(value, info->value_size);
+ if (map_is_map_of_maps(info->type))
+ jsonw_uint_field(json_wtr, "inner_map_id",
+ *(unsigned int *)value);
if (btf) {
struct btf_dumper d = {
.btf = btf,
@@ -259,8 +262,13 @@ static void print_entry_plain(struct bpf_map_info *info, unsigned char *key,
}
if (info->value_size) {
- printf("value:%c", break_names ? '\n' : ' ');
- fprint_hex(stdout, value, info->value_size, " ");
+ if (map_is_map_of_maps(info->type)) {
+ printf("inner_map_id:%c", break_names ? '\n' : ' ');
+ printf("%u ", *(unsigned int *)value);
+ } else {
+ printf("value:%c", break_names ? '\n' : ' ');
+ fprint_hex(stdout, value, info->value_size, " ");
+ }
}
printf("\n");
@@ -1279,6 +1287,11 @@ static int do_create(int argc, char **argv)
"flags"))
goto exit;
} else if (is_prefix(*argv, "dev")) {
+ p_info("Warning: 'bpftool map create [...] dev <ifname>' syntax is deprecated.\n"
+ "Going further, please use 'offload_dev <ifname>' to request hardware offload for the map.");
+ goto offload_dev;
+ } else if (is_prefix(*argv, "offload_dev")) {
+offload_dev:
NEXT_ARG();
if (attr.map_ifindex) {
@@ -1423,7 +1436,7 @@ static int do_help(int argc, char **argv)
"Usage: %1$s %2$s { show | list } [MAP]\n"
" %1$s %2$s create FILE type TYPE key KEY_SIZE value VALUE_SIZE \\\n"
" entries MAX_ENTRIES name NAME [flags FLAGS] \\\n"
- " [inner_map MAP] [dev NAME]\n"
+ " [inner_map MAP] [offload_dev NAME]\n"
" %1$s %2$s dump MAP\n"
" %1$s %2$s update MAP [key DATA] [value VALUE] [UPDATE_FLAGS]\n"
" %1$s %2$s lookup MAP [key DATA]\n"
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 91b6075b2db3..8443a149dd17 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -1517,12 +1517,13 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
struct bpf_program *prog = NULL, *pos;
unsigned int old_map_fds = 0;
const char *pinmaps = NULL;
+ __u32 xdpmeta_ifindex = 0;
+ __u32 offload_ifindex = 0;
bool auto_attach = false;
struct bpf_object *obj;
struct bpf_map *map;
const char *pinfile;
unsigned int i, j;
- __u32 ifindex = 0;
const char *file;
int idx, err;
@@ -1614,17 +1615,46 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
map_replace[old_map_fds].fd = fd;
old_map_fds++;
} else if (is_prefix(*argv, "dev")) {
+ p_info("Warning: 'bpftool prog load [...] dev <ifname>' syntax is deprecated.\n"
+ "Going further, please use 'offload_dev <ifname>' to offload program to device.\n"
+ "For applications using XDP hints only, use 'xdpmeta_dev <ifname>'.");
+ goto offload_dev;
+ } else if (is_prefix(*argv, "offload_dev")) {
+offload_dev:
NEXT_ARG();
- if (ifindex) {
- p_err("offload device already specified");
+ if (offload_ifindex) {
+ p_err("offload_dev already specified");
+ goto err_free_reuse_maps;
+ } else if (xdpmeta_ifindex) {
+ p_err("xdpmeta_dev and offload_dev are mutually exclusive");
+ goto err_free_reuse_maps;
+ }
+ if (!REQ_ARGS(1))
+ goto err_free_reuse_maps;
+
+ offload_ifindex = if_nametoindex(*argv);
+ if (!offload_ifindex) {
+ p_err("unrecognized netdevice '%s': %s",
+ *argv, strerror(errno));
+ goto err_free_reuse_maps;
+ }
+ NEXT_ARG();
+ } else if (is_prefix(*argv, "xdpmeta_dev")) {
+ NEXT_ARG();
+
+ if (xdpmeta_ifindex) {
+ p_err("xdpmeta_dev already specified");
+ goto err_free_reuse_maps;
+ } else if (offload_ifindex) {
+ p_err("xdpmeta_dev and offload_dev are mutually exclusive");
goto err_free_reuse_maps;
}
if (!REQ_ARGS(1))
goto err_free_reuse_maps;
- ifindex = if_nametoindex(*argv);
- if (!ifindex) {
+ xdpmeta_ifindex = if_nametoindex(*argv);
+ if (!xdpmeta_ifindex) {
p_err("unrecognized netdevice '%s': %s",
*argv, strerror(errno));
goto err_free_reuse_maps;
@@ -1671,7 +1701,12 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
goto err_close_obj;
}
- bpf_program__set_ifindex(pos, ifindex);
+ if (prog_type == BPF_PROG_TYPE_XDP && xdpmeta_ifindex) {
+ bpf_program__set_flags(pos, BPF_F_XDP_DEV_BOUND_ONLY);
+ bpf_program__set_ifindex(pos, xdpmeta_ifindex);
+ } else {
+ bpf_program__set_ifindex(pos, offload_ifindex);
+ }
if (bpf_program__type(pos) != prog_type)
bpf_program__set_type(pos, prog_type);
bpf_program__set_expected_attach_type(pos, expected_attach_type);
@@ -1709,7 +1744,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
idx = 0;
bpf_object__for_each_map(map, obj) {
if (bpf_map__type(map) != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
- bpf_map__set_ifindex(map, ifindex);
+ bpf_map__set_ifindex(map, offload_ifindex);
if (j < old_map_fds && idx == map_replace[j].idx) {
err = bpf_map__reuse_fd(map, map_replace[j++].fd);
@@ -1739,7 +1774,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
goto err_close_obj;
}
- err = mount_bpffs_for_pin(pinfile);
+ err = mount_bpffs_for_pin(pinfile, !first_prog_only);
if (err)
goto err_close_obj;
@@ -2416,7 +2451,7 @@ static int do_help(int argc, char **argv)
" %1$s %2$s dump jited PROG [{ file FILE | [opcodes] [linum] }]\n"
" %1$s %2$s pin PROG FILE\n"
" %1$s %2$s { load | loadall } OBJ PATH \\\n"
- " [type TYPE] [dev NAME] \\\n"
+ " [type TYPE] [{ offload_dev | xdpmeta_dev } NAME] \\\n"
" [map { idx IDX | name NAME } MAP]\\\n"
" [pinmaps MAP_DIR]\n"
" [autoattach]\n"
diff --git a/tools/bpf/bpftool/skeleton/pid_iter.bpf.c b/tools/bpf/bpftool/skeleton/pid_iter.bpf.c
index eb05ea53afb1..26004f0c5a6a 100644
--- a/tools/bpf/bpftool/skeleton/pid_iter.bpf.c
+++ b/tools/bpf/bpftool/skeleton/pid_iter.bpf.c
@@ -15,6 +15,19 @@ enum bpf_obj_type {
BPF_OBJ_BTF,
};
+struct bpf_perf_link___local {
+ struct bpf_link link;
+ struct file *perf_file;
+} __attribute__((preserve_access_index));
+
+struct perf_event___local {
+ u64 bpf_cookie;
+} __attribute__((preserve_access_index));
+
+enum bpf_link_type___local {
+ BPF_LINK_TYPE_PERF_EVENT___local = 7,
+};
+
extern const void bpf_link_fops __ksym;
extern const void bpf_map_fops __ksym;
extern const void bpf_prog_fops __ksym;
@@ -41,10 +54,10 @@ static __always_inline __u32 get_obj_id(void *ent, enum bpf_obj_type type)
/* could be used only with BPF_LINK_TYPE_PERF_EVENT links */
static __u64 get_bpf_cookie(struct bpf_link *link)
{
- struct bpf_perf_link *perf_link;
- struct perf_event *event;
+ struct bpf_perf_link___local *perf_link;
+ struct perf_event___local *event;
- perf_link = container_of(link, struct bpf_perf_link, link);
+ perf_link = container_of(link, struct bpf_perf_link___local, link);
event = BPF_CORE_READ(perf_link, perf_file, private_data);
return BPF_CORE_READ(event, bpf_cookie);
}
@@ -84,10 +97,13 @@ int iter(struct bpf_iter__task_file *ctx)
e.pid = task->tgid;
e.id = get_obj_id(file->private_data, obj_type);
- if (obj_type == BPF_OBJ_LINK) {
+ if (obj_type == BPF_OBJ_LINK &&
+ bpf_core_enum_value_exists(enum bpf_link_type___local,
+ BPF_LINK_TYPE_PERF_EVENT___local)) {
struct bpf_link *link = (struct bpf_link *) file->private_data;
- if (BPF_CORE_READ(link, type) == BPF_LINK_TYPE_PERF_EVENT) {
+ if (link->type == bpf_core_enum_value(enum bpf_link_type___local,
+ BPF_LINK_TYPE_PERF_EVENT___local)) {
e.has_bpf_cookie = true;
e.bpf_cookie = get_bpf_cookie(link);
}
diff --git a/tools/bpf/bpftool/skeleton/profiler.bpf.c b/tools/bpf/bpftool/skeleton/profiler.bpf.c
index ce5b65e07ab1..2f80edc682f1 100644
--- a/tools/bpf/bpftool/skeleton/profiler.bpf.c
+++ b/tools/bpf/bpftool/skeleton/profiler.bpf.c
@@ -4,6 +4,12 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
+struct bpf_perf_event_value___local {
+ __u64 counter;
+ __u64 enabled;
+ __u64 running;
+} __attribute__((preserve_access_index));
+
/* map of perf event fds, num_cpu * num_metric entries */
struct {
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
@@ -15,14 +21,14 @@ struct {
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
__uint(key_size, sizeof(u32));
- __uint(value_size, sizeof(struct bpf_perf_event_value));
+ __uint(value_size, sizeof(struct bpf_perf_event_value___local));
} fentry_readings SEC(".maps");
/* accumulated readings */
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
__uint(key_size, sizeof(u32));
- __uint(value_size, sizeof(struct bpf_perf_event_value));
+ __uint(value_size, sizeof(struct bpf_perf_event_value___local));
} accum_readings SEC(".maps");
/* sample counts, one per cpu */
@@ -39,7 +45,7 @@ const volatile __u32 num_metric = 1;
SEC("fentry/XXX")
int BPF_PROG(fentry_XXX)
{
- struct bpf_perf_event_value *ptrs[MAX_NUM_MATRICS];
+ struct bpf_perf_event_value___local *ptrs[MAX_NUM_MATRICS];
u32 key = bpf_get_smp_processor_id();
u32 i;
@@ -53,10 +59,10 @@ int BPF_PROG(fentry_XXX)
}
for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) {
- struct bpf_perf_event_value reading;
+ struct bpf_perf_event_value___local reading;
int err;
- err = bpf_perf_event_read_value(&events, key, &reading,
+ err = bpf_perf_event_read_value(&events, key, (void *)&reading,
sizeof(reading));
if (err)
return 0;
@@ -68,14 +74,14 @@ int BPF_PROG(fentry_XXX)
}
static inline void
-fexit_update_maps(u32 id, struct bpf_perf_event_value *after)
+fexit_update_maps(u32 id, struct bpf_perf_event_value___local *after)
{
- struct bpf_perf_event_value *before, diff;
+ struct bpf_perf_event_value___local *before, diff;
before = bpf_map_lookup_elem(&fentry_readings, &id);
/* only account samples with a valid fentry_reading */
if (before && before->counter) {
- struct bpf_perf_event_value *accum;
+ struct bpf_perf_event_value___local *accum;
diff.counter = after->counter - before->counter;
diff.enabled = after->enabled - before->enabled;
@@ -93,7 +99,7 @@ fexit_update_maps(u32 id, struct bpf_perf_event_value *after)
SEC("fexit/XXX")
int BPF_PROG(fexit_XXX)
{
- struct bpf_perf_event_value readings[MAX_NUM_MATRICS];
+ struct bpf_perf_event_value___local readings[MAX_NUM_MATRICS];
u32 cpu = bpf_get_smp_processor_id();
u32 i, zero = 0;
int err;
@@ -102,7 +108,8 @@ int BPF_PROG(fexit_XXX)
/* read all events before updating the maps, to reduce error */
for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) {
err = bpf_perf_event_read_value(&events, cpu + i * num_cpu,
- readings + i, sizeof(*readings));
+ (void *)(readings + i),
+ sizeof(*readings));
if (err)
return 0;
}
diff --git a/tools/bpf/bpftool/struct_ops.c b/tools/bpf/bpftool/struct_ops.c
index 57c3da70aa31..3ebc9fe91e0e 100644
--- a/tools/bpf/bpftool/struct_ops.c
+++ b/tools/bpf/bpftool/struct_ops.c
@@ -509,7 +509,7 @@ static int do_register(int argc, char **argv)
if (argc == 1)
linkdir = GET_ARG();
- if (linkdir && mount_bpffs_for_pin(linkdir)) {
+ if (linkdir && mount_bpffs_for_pin(linkdir, true)) {
p_err("can't mount bpffs for pinning");
return -1;
}
diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile
index ac548a7baa73..4b8079f294f6 100644
--- a/tools/bpf/resolve_btfids/Makefile
+++ b/tools/bpf/resolve_btfids/Makefile
@@ -67,7 +67,7 @@ $(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(LIBBPF_OU
LIBELF_FLAGS := $(shell $(HOSTPKG_CONFIG) libelf --cflags 2>/dev/null)
LIBELF_LIBS := $(shell $(HOSTPKG_CONFIG) libelf --libs 2>/dev/null || echo -lelf)
-HOSTCFLAGS += -g \
+HOSTCFLAGS_resolve_btfids += -g \
-I$(srctree)/tools/include \
-I$(srctree)/tools/include/uapi \
-I$(LIBBPF_INCLUDE) \
@@ -76,7 +76,7 @@ HOSTCFLAGS += -g \
LIBS = $(LIBELF_LIBS) -lz
-export srctree OUTPUT HOSTCFLAGS Q HOSTCC HOSTLD HOSTAR
+export srctree OUTPUT HOSTCFLAGS_resolve_btfids Q HOSTCC HOSTLD HOSTAR
include $(srctree)/tools/build/Makefile.include
$(BINARY_IN): fixdep FORCE prepare | $(OUTPUT)
diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build
index 89430338a3d9..fac42486a8cf 100644
--- a/tools/build/Makefile.build
+++ b/tools/build/Makefile.build
@@ -117,6 +117,16 @@ $(OUTPUT)%.s: %.c FORCE
$(call rule_mkdir)
$(call if_changed_dep,cc_s_c)
+# bison and flex files are generated in the OUTPUT directory
+# so it needs a separate rule to depend on them properly
+$(OUTPUT)%-bison.o: $(OUTPUT)%-bison.c FORCE
+ $(call rule_mkdir)
+ $(call if_changed_dep,$(host)cc_o_c)
+
+$(OUTPUT)%-flex.o: $(OUTPUT)%-flex.c FORCE
+ $(call rule_mkdir)
+ $(call if_changed_dep,$(host)cc_o_c)
+
# Gather build data:
# obj-y - list of build objects
# subdir-y - list of directories to nest
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index 0f0aa9b7d7b5..2cd6dbbee088 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -208,7 +208,7 @@ $(OUTPUT)test-libtraceevent.bin:
$(BUILD) -ltraceevent
$(OUTPUT)test-libtracefs.bin:
- $(BUILD) -ltracefs
+ $(BUILD) $(shell $(PKG_CONFIG) --cflags libtraceevent 2>/dev/null) -ltracefs
$(OUTPUT)test-libcrypto.bin:
$(BUILD) -lcrypto
diff --git a/tools/counter/.gitignore b/tools/counter/.gitignore
new file mode 100644
index 000000000000..9fd290d4bf43
--- /dev/null
+++ b/tools/counter/.gitignore
@@ -0,0 +1,2 @@
+/counter_example
+/include/linux/counter.h
diff --git a/tools/counter/Makefile b/tools/counter/Makefile
index 8843f0fa6119..b2c2946f44c9 100644
--- a/tools/counter/Makefile
+++ b/tools/counter/Makefile
@@ -40,6 +40,8 @@ $(OUTPUT)counter_example: $(COUNTER_EXAMPLE)
clean:
rm -f $(ALL_PROGRAMS)
rm -rf $(OUTPUT)include/linux/counter.h
+ rm -df $(OUTPUT)include/linux
+ rm -df $(OUTPUT)include
find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete
install: $(ALL_PROGRAMS)
diff --git a/tools/hv/vmbus_testing b/tools/hv/vmbus_testing
index e7212903dd1d..4467979d8f69 100755
--- a/tools/hv/vmbus_testing
+++ b/tools/hv/vmbus_testing
@@ -164,7 +164,7 @@ def recursive_file_lookup(path, file_map):
def get_all_devices_test_status(file_map):
for device in file_map:
- if (get_test_state(locate_state(device, file_map)) is 1):
+ if (get_test_state(locate_state(device, file_map)) == 1):
print("Testing = ON for: {}"
.format(device.split("/")[5]))
else:
@@ -203,7 +203,7 @@ def write_test_files(path, value):
def set_test_state(state_path, state_value, quiet):
write_test_files(state_path, state_value)
- if (get_test_state(state_path) is 1):
+ if (get_test_state(state_path) == 1):
if (not quiet):
print("Testing = ON for device: {}"
.format(state_path.split("/")[5]))
diff --git a/tools/iio/iio_generic_buffer.c b/tools/iio/iio_generic_buffer.c
index f8deae4e26a1..44bbf80f0cfd 100644
--- a/tools/iio/iio_generic_buffer.c
+++ b/tools/iio/iio_generic_buffer.c
@@ -51,9 +51,9 @@ enum autochan {
* Has the side effect of filling the channels[i].location values used
* in processing the buffer output.
**/
-static int size_from_channelarray(struct iio_channel_info *channels, int num_channels)
+static unsigned int size_from_channelarray(struct iio_channel_info *channels, int num_channels)
{
- int bytes = 0;
+ unsigned int bytes = 0;
int i = 0;
while (i < num_channels) {
@@ -348,7 +348,7 @@ int main(int argc, char **argv)
ssize_t read_size;
int dev_num = -1, trig_num = -1;
char *buffer_access = NULL;
- int scan_size;
+ unsigned int scan_size;
int noevents = 0;
int notrigger = 0;
char *dummy;
@@ -674,7 +674,16 @@ int main(int argc, char **argv)
}
scan_size = size_from_channelarray(channels, num_channels);
- data = malloc(scan_size * buf_len);
+
+ size_t total_buf_len = scan_size * buf_len;
+
+ if (scan_size > 0 && total_buf_len / scan_size != buf_len) {
+ ret = -EFAULT;
+ perror("Integer overflow happened when calculate scan_size * buf_len");
+ goto error;
+ }
+
+ data = malloc(total_buf_len);
if (!data) {
ret = -ENOMEM;
goto error;
diff --git a/tools/include/nolibc/Makefile b/tools/include/nolibc/Makefile
index 9839feafd38a..64d67b080744 100644
--- a/tools/include/nolibc/Makefile
+++ b/tools/include/nolibc/Makefile
@@ -25,8 +25,23 @@ endif
nolibc_arch := $(patsubst arm64,aarch64,$(ARCH))
arch_file := arch-$(nolibc_arch).h
-all_files := ctype.h errno.h nolibc.h signal.h stackprotector.h std.h stdint.h \
- stdio.h stdlib.h string.h sys.h time.h types.h unistd.h
+all_files := \
+ compiler.h \
+ ctype.h \
+ errno.h \
+ nolibc.h \
+ signal.h \
+ stackprotector.h \
+ std.h \
+ stdint.h \
+ stdlib.h \
+ string.h \
+ sys.h \
+ time.h \
+ types.h \
+ unistd.h \
+ stdio.h \
+
# install all headers needed to support a bare-metal compiler
all: headers
diff --git a/tools/include/nolibc/arch-aarch64.h b/tools/include/nolibc/arch-aarch64.h
index 383baddef701..b8c7b14c4ca8 100644
--- a/tools/include/nolibc/arch-aarch64.h
+++ b/tools/include/nolibc/arch-aarch64.h
@@ -7,6 +7,8 @@
#ifndef _NOLIBC_ARCH_AARCH64_H
#define _NOLIBC_ARCH_AARCH64_H
+#include "compiler.h"
+
/* The struct returned by the newfstatat() syscall. Differs slightly from the
* x86_64's stat one by field ordering, so be careful.
*/
@@ -173,27 +175,30 @@ char **environ __attribute__((weak));
const unsigned long *_auxv __attribute__((weak));
/* startup code */
-void __attribute__((weak,noreturn,optimize("omit-frame-pointer"))) _start(void)
+void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void)
{
__asm__ volatile (
- "ldr x0, [sp]\n" // argc (x0) was in the stack
- "add x1, sp, 8\n" // argv (x1) = sp
- "lsl x2, x0, 3\n" // envp (x2) = 8*argc ...
- "add x2, x2, 8\n" // + 8 (skip null)
- "add x2, x2, x1\n" // + argv
- "adrp x3, environ\n" // x3 = &environ (high bits)
- "str x2, [x3, #:lo12:environ]\n" // store envp into environ
- "mov x4, x2\n" // search for auxv (follows NULL after last env)
+#ifdef _NOLIBC_STACKPROTECTOR
+ "bl __stack_chk_init\n" /* initialize stack protector */
+#endif
+ "ldr x0, [sp]\n" /* argc (x0) was in the stack */
+ "add x1, sp, 8\n" /* argv (x1) = sp */
+ "lsl x2, x0, 3\n" /* envp (x2) = 8*argc ... */
+ "add x2, x2, 8\n" /* + 8 (skip null) */
+ "add x2, x2, x1\n" /* + argv */
+ "adrp x3, environ\n" /* x3 = &environ (high bits) */
+ "str x2, [x3, #:lo12:environ]\n" /* store envp into environ */
+ "mov x4, x2\n" /* search for auxv (follows NULL after last env) */
"0:\n"
- "ldr x5, [x4], 8\n" // x5 = *x4; x4 += 8
- "cbnz x5, 0b\n" // and stop at NULL after last env
- "adrp x3, _auxv\n" // x3 = &_auxv (high bits)
- "str x4, [x3, #:lo12:_auxv]\n" // store x4 into _auxv
- "and sp, x1, -16\n" // sp must be 16-byte aligned in the callee
- "bl main\n" // main() returns the status code, we'll exit with it.
- "mov x8, 93\n" // NR_exit == 93
+ "ldr x5, [x4], 8\n" /* x5 = *x4; x4 += 8 */
+ "cbnz x5, 0b\n" /* and stop at NULL after last env */
+ "adrp x3, _auxv\n" /* x3 = &_auxv (high bits) */
+ "str x4, [x3, #:lo12:_auxv]\n" /* store x4 into _auxv */
+ "and sp, x1, -16\n" /* sp must be 16-byte aligned in the callee */
+ "bl main\n" /* main() returns the status code, we'll exit with it. */
+ "mov x8, 93\n" /* NR_exit == 93 */
"svc #0\n"
);
__builtin_unreachable();
}
-#endif // _NOLIBC_ARCH_AARCH64_H
+#endif /* _NOLIBC_ARCH_AARCH64_H */
diff --git a/tools/include/nolibc/arch-arm.h b/tools/include/nolibc/arch-arm.h
index 42499f23e73c..bd8bf2ebd43b 100644
--- a/tools/include/nolibc/arch-arm.h
+++ b/tools/include/nolibc/arch-arm.h
@@ -7,6 +7,8 @@
#ifndef _NOLIBC_ARCH_ARM_H
#define _NOLIBC_ARCH_ARM_H
+#include "compiler.h"
+
/* The struct returned by the stat() syscall, 32-bit only, the syscall returns
* exactly 56 bytes (stops before the unused array). In big endian, the format
* differs as devices are returned as short only.
@@ -196,41 +198,67 @@ struct sys_stat_struct {
_arg1; \
})
+#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \
+({ \
+ register long _num __asm__(_NOLIBC_SYSCALL_REG) = (num); \
+ register long _arg1 __asm__ ("r0") = (long)(arg1); \
+ register long _arg2 __asm__ ("r1") = (long)(arg2); \
+ register long _arg3 __asm__ ("r2") = (long)(arg3); \
+ register long _arg4 __asm__ ("r3") = (long)(arg4); \
+ register long _arg5 __asm__ ("r4") = (long)(arg5); \
+ register long _arg6 __asm__ ("r5") = (long)(arg6); \
+ \
+ __asm__ volatile ( \
+ _NOLIBC_THUMB_SET_R7 \
+ "svc #0\n" \
+ _NOLIBC_THUMB_RESTORE_R7 \
+ : "=r"(_arg1), "=r" (_num) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+ "r"(_arg6), "r"(_num) \
+ : "memory", "cc", "lr" \
+ ); \
+ _arg1; \
+})
+
+
char **environ __attribute__((weak));
const unsigned long *_auxv __attribute__((weak));
/* startup code */
-void __attribute__((weak,noreturn,optimize("omit-frame-pointer"))) _start(void)
+void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void)
{
__asm__ volatile (
- "pop {%r0}\n" // argc was in the stack
- "mov %r1, %sp\n" // argv = sp
+#ifdef _NOLIBC_STACKPROTECTOR
+ "bl __stack_chk_init\n" /* initialize stack protector */
+#endif
+ "pop {%r0}\n" /* argc was in the stack */
+ "mov %r1, %sp\n" /* argv = sp */
- "add %r2, %r0, $1\n" // envp = (argc + 1) ...
- "lsl %r2, %r2, $2\n" // * 4 ...
- "add %r2, %r2, %r1\n" // + argv
- "ldr %r3, 1f\n" // r3 = &environ (see below)
- "str %r2, [r3]\n" // store envp into environ
+ "add %r2, %r0, $1\n" /* envp = (argc + 1) ... */
+ "lsl %r2, %r2, $2\n" /* * 4 ... */
+ "add %r2, %r2, %r1\n" /* + argv */
+ "ldr %r3, 1f\n" /* r3 = &environ (see below) */
+ "str %r2, [r3]\n" /* store envp into environ */
- "mov r4, r2\n" // search for auxv (follows NULL after last env)
+ "mov r4, r2\n" /* search for auxv (follows NULL after last env) */
"0:\n"
- "mov r5, r4\n" // r5 = r4
- "add r4, r4, #4\n" // r4 += 4
- "ldr r5,[r5]\n" // r5 = *r5 = *(r4-4)
- "cmp r5, #0\n" // and stop at NULL after last env
+ "mov r5, r4\n" /* r5 = r4 */
+ "add r4, r4, #4\n" /* r4 += 4 */
+ "ldr r5,[r5]\n" /* r5 = *r5 = *(r4-4) */
+ "cmp r5, #0\n" /* and stop at NULL after last env */
"bne 0b\n"
- "ldr %r3, 2f\n" // r3 = &_auxv (low bits)
- "str r4, [r3]\n" // store r4 into _auxv
+ "ldr %r3, 2f\n" /* r3 = &_auxv (low bits) */
+ "str r4, [r3]\n" /* store r4 into _auxv */
- "mov %r3, $8\n" // AAPCS : sp must be 8-byte aligned in the
- "neg %r3, %r3\n" // callee, and bl doesn't push (lr=pc)
- "and %r3, %r3, %r1\n" // so we do sp = r1(=sp) & r3(=-8);
- "mov %sp, %r3\n" //
+ "mov %r3, $8\n" /* AAPCS : sp must be 8-byte aligned in the */
+ "neg %r3, %r3\n" /* callee, and bl doesn't push (lr=pc) */
+ "and %r3, %r3, %r1\n" /* so we do sp = r1(=sp) & r3(=-8); */
+ "mov %sp, %r3\n"
- "bl main\n" // main() returns the status code, we'll exit with it.
- "movs r7, $1\n" // NR_exit == 1
+ "bl main\n" /* main() returns the status code, we'll exit with it. */
+ "movs r7, $1\n" /* NR_exit == 1 */
"svc $0x00\n"
- ".align 2\n" // below are the pointers to a few variables
+ ".align 2\n" /* below are the pointers to a few variables */
"1:\n"
".word environ\n"
"2:\n"
@@ -239,4 +267,4 @@ void __attribute__((weak,noreturn,optimize("omit-frame-pointer"))) _start(void)
__builtin_unreachable();
}
-#endif // _NOLIBC_ARCH_ARM_H
+#endif /* _NOLIBC_ARCH_ARM_H */
diff --git a/tools/include/nolibc/arch-i386.h b/tools/include/nolibc/arch-i386.h
index 2d98d78fd3f3..1a86f86eab5c 100644
--- a/tools/include/nolibc/arch-i386.h
+++ b/tools/include/nolibc/arch-i386.h
@@ -7,6 +7,8 @@
#ifndef _NOLIBC_ARCH_I386_H
#define _NOLIBC_ARCH_I386_H
+#include "compiler.h"
+
/* The struct returned by the stat() syscall, 32-bit only, the syscall returns
* exactly 56 bytes (stops before the unused array).
*/
@@ -181,8 +183,6 @@ struct sys_stat_struct {
char **environ __attribute__((weak));
const unsigned long *_auxv __attribute__((weak));
-#define __ARCH_SUPPORTS_STACK_PROTECTOR
-
/* startup code */
/*
* i386 System V ABI mandates:
@@ -190,35 +190,35 @@ const unsigned long *_auxv __attribute__((weak));
* 2) The deepest stack frame should be set to zero
*
*/
-void __attribute__((weak,noreturn,optimize("omit-frame-pointer"),no_stack_protector)) _start(void)
+void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void)
{
__asm__ volatile (
-#ifdef NOLIBC_STACKPROTECTOR
- "call __stack_chk_init\n" // initialize stack protector
+#ifdef _NOLIBC_STACKPROTECTOR
+ "call __stack_chk_init\n" /* initialize stack protector */
#endif
- "pop %eax\n" // argc (first arg, %eax)
- "mov %esp, %ebx\n" // argv[] (second arg, %ebx)
- "lea 4(%ebx,%eax,4),%ecx\n" // then a NULL then envp (third arg, %ecx)
- "mov %ecx, environ\n" // save environ
- "xor %ebp, %ebp\n" // zero the stack frame
- "mov %ecx, %edx\n" // search for auxv (follows NULL after last env)
+ "pop %eax\n" /* argc (first arg, %eax) */
+ "mov %esp, %ebx\n" /* argv[] (second arg, %ebx) */
+ "lea 4(%ebx,%eax,4),%ecx\n" /* then a NULL then envp (third arg, %ecx) */
+ "mov %ecx, environ\n" /* save environ */
+ "xor %ebp, %ebp\n" /* zero the stack frame */
+ "mov %ecx, %edx\n" /* search for auxv (follows NULL after last env) */
"0:\n"
- "add $4, %edx\n" // search for auxv using edx, it follows the
- "cmp -4(%edx), %ebp\n" // ... NULL after last env (ebp is zero here)
+ "add $4, %edx\n" /* search for auxv using edx, it follows the */
+ "cmp -4(%edx), %ebp\n" /* ... NULL after last env (ebp is zero here) */
"jnz 0b\n"
- "mov %edx, _auxv\n" // save it into _auxv
- "and $-16, %esp\n" // x86 ABI : esp must be 16-byte aligned before
- "sub $4, %esp\n" // the call instruction (args are aligned)
- "push %ecx\n" // push all registers on the stack so that we
- "push %ebx\n" // support both regparm and plain stack modes
+ "mov %edx, _auxv\n" /* save it into _auxv */
+ "and $-16, %esp\n" /* x86 ABI : esp must be 16-byte aligned before */
+ "sub $4, %esp\n" /* the call instruction (args are aligned) */
+ "push %ecx\n" /* push all registers on the stack so that we */
+ "push %ebx\n" /* support both regparm and plain stack modes */
"push %eax\n"
- "call main\n" // main() returns the status code in %eax
- "mov %eax, %ebx\n" // retrieve exit code (32-bit int)
- "movl $1, %eax\n" // NR_exit == 1
- "int $0x80\n" // exit now
- "hlt\n" // ensure it does not
+ "call main\n" /* main() returns the status code in %eax */
+ "mov %eax, %ebx\n" /* retrieve exit code (32-bit int) */
+ "movl $1, %eax\n" /* NR_exit == 1 */
+ "int $0x80\n" /* exit now */
+ "hlt\n" /* ensure it does not */
);
__builtin_unreachable();
}
-#endif // _NOLIBC_ARCH_I386_H
+#endif /* _NOLIBC_ARCH_I386_H */
diff --git a/tools/include/nolibc/arch-loongarch.h b/tools/include/nolibc/arch-loongarch.h
index 029ee3cd6baf..b0279b941178 100644
--- a/tools/include/nolibc/arch-loongarch.h
+++ b/tools/include/nolibc/arch-loongarch.h
@@ -7,6 +7,8 @@
#ifndef _NOLIBC_ARCH_LOONGARCH_H
#define _NOLIBC_ARCH_LOONGARCH_H
+#include "compiler.h"
+
/* Syscalls for LoongArch :
* - stack is 16-byte aligned
* - syscall number is passed in a7
@@ -158,7 +160,7 @@ const unsigned long *_auxv __attribute__((weak));
#define LONG_ADDI "addi.w"
#define LONG_SLL "slli.w"
#define LONG_BSTRINS "bstrins.w"
-#else // __loongarch_grlen == 64
+#else /* __loongarch_grlen == 64 */
#define LONGLOG "3"
#define SZREG "8"
#define REG_L "ld.d"
@@ -170,31 +172,34 @@ const unsigned long *_auxv __attribute__((weak));
#endif
/* startup code */
-void __attribute__((weak,noreturn,optimize("omit-frame-pointer"))) _start(void)
+void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void)
{
__asm__ volatile (
- REG_L " $a0, $sp, 0\n" // argc (a0) was in the stack
- LONG_ADDI " $a1, $sp, "SZREG"\n" // argv (a1) = sp + SZREG
- LONG_SLL " $a2, $a0, "LONGLOG"\n" // envp (a2) = SZREG*argc ...
- LONG_ADDI " $a2, $a2, "SZREG"\n" // + SZREG (skip null)
- LONG_ADD " $a2, $a2, $a1\n" // + argv
-
- "move $a3, $a2\n" // iterate a3 over envp to find auxv (after NULL)
- "0:\n" // do {
- REG_L " $a4, $a3, 0\n" // a4 = *a3;
- LONG_ADDI " $a3, $a3, "SZREG"\n" // a3 += sizeof(void*);
- "bne $a4, $zero, 0b\n" // } while (a4);
- "la.pcrel $a4, _auxv\n" // a4 = &_auxv
- LONG_S " $a3, $a4, 0\n" // store a3 into _auxv
-
- "la.pcrel $a3, environ\n" // a3 = &environ
- LONG_S " $a2, $a3, 0\n" // store envp(a2) into environ
- LONG_BSTRINS " $sp, $zero, 3, 0\n" // sp must be 16-byte aligned
- "bl main\n" // main() returns the status code, we'll exit with it.
- "li.w $a7, 93\n" // NR_exit == 93
+#ifdef _NOLIBC_STACKPROTECTOR
+ "bl __stack_chk_init\n" /* initialize stack protector */
+#endif
+ REG_L " $a0, $sp, 0\n" /* argc (a0) was in the stack */
+ LONG_ADDI " $a1, $sp, "SZREG"\n" /* argv (a1) = sp + SZREG */
+ LONG_SLL " $a2, $a0, "LONGLOG"\n" /* envp (a2) = SZREG*argc ... */
+ LONG_ADDI " $a2, $a2, "SZREG"\n" /* + SZREG (skip null) */
+ LONG_ADD " $a2, $a2, $a1\n" /* + argv */
+
+ "move $a3, $a2\n" /* iterate a3 over envp to find auxv (after NULL) */
+ "0:\n" /* do { */
+ REG_L " $a4, $a3, 0\n" /* a4 = *a3; */
+ LONG_ADDI " $a3, $a3, "SZREG"\n" /* a3 += sizeof(void*); */
+ "bne $a4, $zero, 0b\n" /* } while (a4); */
+ "la.pcrel $a4, _auxv\n" /* a4 = &_auxv */
+ LONG_S " $a3, $a4, 0\n" /* store a3 into _auxv */
+
+ "la.pcrel $a3, environ\n" /* a3 = &environ */
+ LONG_S " $a2, $a3, 0\n" /* store envp(a2) into environ */
+ LONG_BSTRINS " $sp, $zero, 3, 0\n" /* sp must be 16-byte aligned */
+ "bl main\n" /* main() returns the status code, we'll exit with it. */
+ "li.w $a7, 93\n" /* NR_exit == 93 */
"syscall 0\n"
);
__builtin_unreachable();
}
-#endif // _NOLIBC_ARCH_LOONGARCH_H
+#endif /* _NOLIBC_ARCH_LOONGARCH_H */
diff --git a/tools/include/nolibc/arch-mips.h b/tools/include/nolibc/arch-mips.h
index bf83432d23ed..67c5d7997110 100644
--- a/tools/include/nolibc/arch-mips.h
+++ b/tools/include/nolibc/arch-mips.h
@@ -7,6 +7,8 @@
#ifndef _NOLIBC_ARCH_MIPS_H
#define _NOLIBC_ARCH_MIPS_H
+#include "compiler.h"
+
/* The struct returned by the stat() syscall. 88 bytes are returned by the
* syscall.
*/
@@ -180,45 +182,49 @@ char **environ __attribute__((weak));
const unsigned long *_auxv __attribute__((weak));
/* startup code, note that it's called __start on MIPS */
-void __attribute__((weak,noreturn,optimize("omit-frame-pointer"))) __start(void)
+void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector __start(void)
{
__asm__ volatile (
- //".set nomips16\n"
+ /*".set nomips16\n"*/
".set push\n"
".set noreorder\n"
".option pic0\n"
- //".ent __start\n"
- //"__start:\n"
- "lw $a0,($sp)\n" // argc was in the stack
- "addiu $a1, $sp, 4\n" // argv = sp + 4
- "sll $a2, $a0, 2\n" // a2 = argc * 4
- "add $a2, $a2, $a1\n" // envp = argv + 4*argc ...
- "addiu $a2, $a2, 4\n" // ... + 4
- "lui $a3, %hi(environ)\n" // load environ into a3 (hi)
- "addiu $a3, %lo(environ)\n" // load environ into a3 (lo)
- "sw $a2,($a3)\n" // store envp(a2) into environ
-
- "move $t0, $a2\n" // iterate t0 over envp, look for NULL
- "0:" // do {
- "lw $a3, ($t0)\n" // a3=*(t0);
- "bne $a3, $0, 0b\n" // } while (a3);
- "addiu $t0, $t0, 4\n" // delayed slot: t0+=4;
- "lui $a3, %hi(_auxv)\n" // load _auxv into a3 (hi)
- "addiu $a3, %lo(_auxv)\n" // load _auxv into a3 (lo)
- "sw $t0, ($a3)\n" // store t0 into _auxv
+#ifdef _NOLIBC_STACKPROTECTOR
+ "jal __stack_chk_init\n" /* initialize stack protector */
+ "nop\n" /* delayed slot */
+#endif
+ /*".ent __start\n"*/
+ /*"__start:\n"*/
+ "lw $a0,($sp)\n" /* argc was in the stack */
+ "addiu $a1, $sp, 4\n" /* argv = sp + 4 */
+ "sll $a2, $a0, 2\n" /* a2 = argc * 4 */
+ "add $a2, $a2, $a1\n" /* envp = argv + 4*argc ... */
+ "addiu $a2, $a2, 4\n" /* ... + 4 */
+ "lui $a3, %hi(environ)\n" /* load environ into a3 (hi) */
+ "addiu $a3, %lo(environ)\n" /* load environ into a3 (lo) */
+ "sw $a2,($a3)\n" /* store envp(a2) into environ */
+
+ "move $t0, $a2\n" /* iterate t0 over envp, look for NULL */
+ "0:" /* do { */
+ "lw $a3, ($t0)\n" /* a3=*(t0); */
+ "bne $a3, $0, 0b\n" /* } while (a3); */
+ "addiu $t0, $t0, 4\n" /* delayed slot: t0+=4; */
+ "lui $a3, %hi(_auxv)\n" /* load _auxv into a3 (hi) */
+ "addiu $a3, %lo(_auxv)\n" /* load _auxv into a3 (lo) */
+ "sw $t0, ($a3)\n" /* store t0 into _auxv */
"li $t0, -8\n"
- "and $sp, $sp, $t0\n" // sp must be 8-byte aligned
- "addiu $sp,$sp,-16\n" // the callee expects to save a0..a3 there!
- "jal main\n" // main() returns the status code, we'll exit with it.
- "nop\n" // delayed slot
- "move $a0, $v0\n" // retrieve 32-bit exit code from v0
- "li $v0, 4001\n" // NR_exit == 4001
+ "and $sp, $sp, $t0\n" /* sp must be 8-byte aligned */
+ "addiu $sp,$sp,-16\n" /* the callee expects to save a0..a3 there! */
+ "jal main\n" /* main() returns the status code, we'll exit with it. */
+ "nop\n" /* delayed slot */
+ "move $a0, $v0\n" /* retrieve 32-bit exit code from v0 */
+ "li $v0, 4001\n" /* NR_exit == 4001 */
"syscall\n"
- //".end __start\n"
+ /*".end __start\n"*/
".set pop\n"
);
__builtin_unreachable();
}
-#endif // _NOLIBC_ARCH_MIPS_H
+#endif /* _NOLIBC_ARCH_MIPS_H */
diff --git a/tools/include/nolibc/arch-riscv.h b/tools/include/nolibc/arch-riscv.h
index e197fcb10ac0..cefefc2e93f1 100644
--- a/tools/include/nolibc/arch-riscv.h
+++ b/tools/include/nolibc/arch-riscv.h
@@ -7,6 +7,8 @@
#ifndef _NOLIBC_ARCH_RISCV_H
#define _NOLIBC_ARCH_RISCV_H
+#include "compiler.h"
+
struct sys_stat_struct {
unsigned long st_dev; /* Device. */
unsigned long st_ino; /* File serial number. */
@@ -33,9 +35,13 @@ struct sys_stat_struct {
#if __riscv_xlen == 64
#define PTRLOG "3"
#define SZREG "8"
+#define REG_L "ld"
+#define REG_S "sd"
#elif __riscv_xlen == 32
#define PTRLOG "2"
#define SZREG "4"
+#define REG_L "lw"
+#define REG_S "sw"
#endif
/* Syscalls for RISCV :
@@ -174,35 +180,38 @@ char **environ __attribute__((weak));
const unsigned long *_auxv __attribute__((weak));
/* startup code */
-void __attribute__((weak,noreturn,optimize("omit-frame-pointer"))) _start(void)
+void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void)
{
__asm__ volatile (
".option push\n"
".option norelax\n"
"lla gp, __global_pointer$\n"
".option pop\n"
- "lw a0, 0(sp)\n" // argc (a0) was in the stack
- "add a1, sp, "SZREG"\n" // argv (a1) = sp
- "slli a2, a0, "PTRLOG"\n" // envp (a2) = SZREG*argc ...
- "add a2, a2, "SZREG"\n" // + SZREG (skip null)
- "add a2,a2,a1\n" // + argv
-
- "add a3, a2, zero\n" // iterate a3 over envp to find auxv (after NULL)
- "0:\n" // do {
- "ld a4, 0(a3)\n" // a4 = *a3;
- "add a3, a3, "SZREG"\n" // a3 += sizeof(void*);
- "bne a4, zero, 0b\n" // } while (a4);
- "lui a4, %hi(_auxv)\n" // a4 = &_auxv (high bits)
- "sd a3, %lo(_auxv)(a4)\n" // store a3 into _auxv
-
- "lui a3, %hi(environ)\n" // a3 = &environ (high bits)
- "sd a2,%lo(environ)(a3)\n" // store envp(a2) into environ
- "andi sp,a1,-16\n" // sp must be 16-byte aligned
- "call main\n" // main() returns the status code, we'll exit with it.
- "li a7, 93\n" // NR_exit == 93
+#ifdef _NOLIBC_STACKPROTECTOR
+ "call __stack_chk_init\n" /* initialize stack protector */
+#endif
+ REG_L" a0, 0(sp)\n" /* argc (a0) was in the stack */
+ "add a1, sp, "SZREG"\n" /* argv (a1) = sp */
+ "slli a2, a0, "PTRLOG"\n" /* envp (a2) = SZREG*argc ... */
+ "add a2, a2, "SZREG"\n" /* + SZREG (skip null) */
+ "add a2,a2,a1\n" /* + argv */
+
+ "add a3, a2, zero\n" /* iterate a3 over envp to find auxv (after NULL) */
+ "0:\n" /* do { */
+ REG_L" a4, 0(a3)\n" /* a4 = *a3; */
+ "add a3, a3, "SZREG"\n" /* a3 += sizeof(void*); */
+ "bne a4, zero, 0b\n" /* } while (a4); */
+ "lui a4, %hi(_auxv)\n" /* a4 = &_auxv (high bits) */
+ REG_S" a3, %lo(_auxv)(a4)\n" /* store a3 into _auxv */
+
+ "lui a3, %hi(environ)\n" /* a3 = &environ (high bits) */
+ REG_S" a2,%lo(environ)(a3)\n"/* store envp(a2) into environ */
+ "andi sp,a1,-16\n" /* sp must be 16-byte aligned */
+ "call main\n" /* main() returns the status code, we'll exit with it. */
+ "li a7, 93\n" /* NR_exit == 93 */
"ecall\n"
);
__builtin_unreachable();
}
-#endif // _NOLIBC_ARCH_RISCV_H
+#endif /* _NOLIBC_ARCH_RISCV_H */
diff --git a/tools/include/nolibc/arch-s390.h b/tools/include/nolibc/arch-s390.h
index 6b0e54ed543d..ed2c33b2de68 100644
--- a/tools/include/nolibc/arch-s390.h
+++ b/tools/include/nolibc/arch-s390.h
@@ -5,8 +5,11 @@
#ifndef _NOLIBC_ARCH_S390_H
#define _NOLIBC_ARCH_S390_H
+#include <asm/signal.h>
#include <asm/unistd.h>
+#include "compiler.h"
+
/* The struct returned by the stat() syscall, equivalent to stat64(). The
* syscall returns 116 bytes and stops in the middle of __unused.
*/
@@ -163,7 +166,7 @@ char **environ __attribute__((weak));
const unsigned long *_auxv __attribute__((weak));
/* startup code */
-void __attribute__((weak,noreturn,optimize("omit-frame-pointer"))) _start(void)
+void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void)
{
__asm__ volatile (
"lg %r2,0(%r15)\n" /* argument count */
@@ -223,4 +226,12 @@ void *sys_mmap(void *addr, size_t length, int prot, int flags, int fd,
return (void *)my_syscall1(__NR_mmap, &args);
}
#define sys_mmap sys_mmap
-#endif // _NOLIBC_ARCH_S390_H
+
+static __attribute__((unused))
+pid_t sys_fork(void)
+{
+ return my_syscall5(__NR_clone, 0, SIGCHLD, 0, 0, 0);
+}
+#define sys_fork sys_fork
+
+#endif /* _NOLIBC_ARCH_S390_H */
diff --git a/tools/include/nolibc/arch-x86_64.h b/tools/include/nolibc/arch-x86_64.h
index f7f2a11d4c3b..1bbd95f65233 100644
--- a/tools/include/nolibc/arch-x86_64.h
+++ b/tools/include/nolibc/arch-x86_64.h
@@ -7,6 +7,8 @@
#ifndef _NOLIBC_ARCH_X86_64_H
#define _NOLIBC_ARCH_X86_64_H
+#include "compiler.h"
+
/* The struct returned by the stat() syscall, equivalent to stat64(). The
* syscall returns 116 bytes and stops in the middle of __unused.
*/
@@ -181,8 +183,6 @@ struct sys_stat_struct {
char **environ __attribute__((weak));
const unsigned long *_auxv __attribute__((weak));
-#define __ARCH_SUPPORTS_STACK_PROTECTOR
-
/* startup code */
/*
* x86-64 System V ABI mandates:
@@ -190,31 +190,31 @@ const unsigned long *_auxv __attribute__((weak));
* 2) The deepest stack frame should be zero (the %rbp).
*
*/
-void __attribute__((weak,noreturn,optimize("omit-frame-pointer"))) _start(void)
+void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void)
{
__asm__ volatile (
-#ifdef NOLIBC_STACKPROTECTOR
- "call __stack_chk_init\n" // initialize stack protector
+#ifdef _NOLIBC_STACKPROTECTOR
+ "call __stack_chk_init\n" /* initialize stack protector */
#endif
- "pop %rdi\n" // argc (first arg, %rdi)
- "mov %rsp, %rsi\n" // argv[] (second arg, %rsi)
- "lea 8(%rsi,%rdi,8),%rdx\n" // then a NULL then envp (third arg, %rdx)
- "mov %rdx, environ\n" // save environ
- "xor %ebp, %ebp\n" // zero the stack frame
- "mov %rdx, %rax\n" // search for auxv (follows NULL after last env)
+ "pop %rdi\n" /* argc (first arg, %rdi) */
+ "mov %rsp, %rsi\n" /* argv[] (second arg, %rsi) */
+ "lea 8(%rsi,%rdi,8),%rdx\n" /* then a NULL then envp (third arg, %rdx) */
+ "mov %rdx, environ\n" /* save environ */
+ "xor %ebp, %ebp\n" /* zero the stack frame */
+ "mov %rdx, %rax\n" /* search for auxv (follows NULL after last env) */
"0:\n"
- "add $8, %rax\n" // search for auxv using rax, it follows the
- "cmp -8(%rax), %rbp\n" // ... NULL after last env (rbp is zero here)
+ "add $8, %rax\n" /* search for auxv using rax, it follows the */
+ "cmp -8(%rax), %rbp\n" /* ... NULL after last env (rbp is zero here) */
"jnz 0b\n"
- "mov %rax, _auxv\n" // save it into _auxv
- "and $-16, %rsp\n" // x86 ABI : esp must be 16-byte aligned before call
- "call main\n" // main() returns the status code, we'll exit with it.
- "mov %eax, %edi\n" // retrieve exit code (32 bit)
- "mov $60, %eax\n" // NR_exit == 60
- "syscall\n" // really exit
- "hlt\n" // ensure it does not return
+ "mov %rax, _auxv\n" /* save it into _auxv */
+ "and $-16, %rsp\n" /* x86 ABI : esp must be 16-byte aligned before call */
+ "call main\n" /* main() returns the status code, we'll exit with it. */
+ "mov %eax, %edi\n" /* retrieve exit code (32 bit) */
+ "mov $60, %eax\n" /* NR_exit == 60 */
+ "syscall\n" /* really exit */
+ "hlt\n" /* ensure it does not return */
);
__builtin_unreachable();
}
-#endif // _NOLIBC_ARCH_X86_64_H
+#endif /* _NOLIBC_ARCH_X86_64_H */
diff --git a/tools/include/nolibc/arch.h b/tools/include/nolibc/arch.h
index 2d5386a8d6aa..82b43935650f 100644
--- a/tools/include/nolibc/arch.h
+++ b/tools/include/nolibc/arch.h
@@ -7,7 +7,7 @@
* the syscall declarations and the _start code definition. This is the only
* global part. On all architectures the kernel puts everything in the stack
* before jumping to _start just above us, without any return address (_start
- * is not a function but an entry pint). So at the stack pointer we find argc.
+ * is not a function but an entry point). So at the stack pointer we find argc.
* Then argv[] begins, and ends at the first NULL. Then we have envp which
* starts and ends with a NULL as well. So envp=argv+argc+1.
*/
diff --git a/tools/include/nolibc/compiler.h b/tools/include/nolibc/compiler.h
new file mode 100644
index 000000000000..beddc3665d69
--- /dev/null
+++ b/tools/include/nolibc/compiler.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * NOLIBC compiler support header
+ * Copyright (C) 2023 Thomas Weißschuh <linux@weissschuh.net>
+ */
+#ifndef _NOLIBC_COMPILER_H
+#define _NOLIBC_COMPILER_H
+
+#if defined(__SSP__) || defined(__SSP_STRONG__) || defined(__SSP_ALL__) || defined(__SSP_EXPLICIT__)
+
+#define _NOLIBC_STACKPROTECTOR
+
+#endif /* defined(__SSP__) ... */
+
+#if defined(__has_attribute)
+# if __has_attribute(no_stack_protector)
+# define __no_stack_protector __attribute__((no_stack_protector))
+# else
+# define __no_stack_protector __attribute__((__optimize__("-fno-stack-protector")))
+# endif
+#else
+# define __no_stack_protector __attribute__((__optimize__("-fno-stack-protector")))
+#endif /* defined(__has_attribute) */
+
+#endif /* _NOLIBC_COMPILER_H */
diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h
index 04739a6293c4..05a228a6ee78 100644
--- a/tools/include/nolibc/nolibc.h
+++ b/tools/include/nolibc/nolibc.h
@@ -99,11 +99,11 @@
#include "sys.h"
#include "ctype.h"
#include "signal.h"
+#include "unistd.h"
#include "stdio.h"
#include "stdlib.h"
#include "string.h"
#include "time.h"
-#include "unistd.h"
#include "stackprotector.h"
/* Used by programs to avoid std includes */
diff --git a/tools/include/nolibc/stackprotector.h b/tools/include/nolibc/stackprotector.h
index d119cbbbc256..88f7b2d098ff 100644
--- a/tools/include/nolibc/stackprotector.h
+++ b/tools/include/nolibc/stackprotector.h
@@ -7,13 +7,9 @@
#ifndef _NOLIBC_STACKPROTECTOR_H
#define _NOLIBC_STACKPROTECTOR_H
-#include "arch.h"
+#include "compiler.h"
-#if defined(NOLIBC_STACKPROTECTOR)
-
-#if !defined(__ARCH_SUPPORTS_STACK_PROTECTOR)
-#error "nolibc does not support stack protectors on this arch"
-#endif
+#if defined(_NOLIBC_STACKPROTECTOR)
#include "sys.h"
#include "stdlib.h"
@@ -41,13 +37,14 @@ void __stack_chk_fail_local(void)
__attribute__((weak,section(".data.nolibc_stack_chk")))
uintptr_t __stack_chk_guard;
-__attribute__((weak,no_stack_protector,section(".text.nolibc_stack_chk")))
+__attribute__((weak,section(".text.nolibc_stack_chk"))) __no_stack_protector
void __stack_chk_init(void)
{
my_syscall3(__NR_getrandom, &__stack_chk_guard, sizeof(__stack_chk_guard), 0);
- /* a bit more randomness in case getrandom() fails */
- __stack_chk_guard ^= (uintptr_t) &__stack_chk_guard;
+ /* a bit more randomness in case getrandom() fails, ensure the guard is never 0 */
+ if (__stack_chk_guard != (uintptr_t) &__stack_chk_guard)
+ __stack_chk_guard ^= (uintptr_t) &__stack_chk_guard;
}
-#endif // defined(NOLIBC_STACKPROTECTOR)
+#endif /* defined(_NOLIBC_STACKPROTECTOR) */
-#endif // _NOLIBC_STACKPROTECTOR_H
+#endif /* _NOLIBC_STACKPROTECTOR_H */
diff --git a/tools/include/nolibc/stdint.h b/tools/include/nolibc/stdint.h
index c1ce4f5e0603..4b282435a59a 100644
--- a/tools/include/nolibc/stdint.h
+++ b/tools/include/nolibc/stdint.h
@@ -36,8 +36,8 @@ typedef ssize_t int_fast16_t;
typedef size_t uint_fast16_t;
typedef ssize_t int_fast32_t;
typedef size_t uint_fast32_t;
-typedef ssize_t int_fast64_t;
-typedef size_t uint_fast64_t;
+typedef int64_t int_fast64_t;
+typedef uint64_t uint_fast64_t;
typedef int64_t intmax_t;
typedef uint64_t uintmax_t;
@@ -84,16 +84,30 @@ typedef uint64_t uintmax_t;
#define INT_FAST8_MIN INT8_MIN
#define INT_FAST16_MIN INTPTR_MIN
#define INT_FAST32_MIN INTPTR_MIN
-#define INT_FAST64_MIN INTPTR_MIN
+#define INT_FAST64_MIN INT64_MIN
#define INT_FAST8_MAX INT8_MAX
#define INT_FAST16_MAX INTPTR_MAX
#define INT_FAST32_MAX INTPTR_MAX
-#define INT_FAST64_MAX INTPTR_MAX
+#define INT_FAST64_MAX INT64_MAX
#define UINT_FAST8_MAX UINT8_MAX
#define UINT_FAST16_MAX SIZE_MAX
#define UINT_FAST32_MAX SIZE_MAX
-#define UINT_FAST64_MAX SIZE_MAX
+#define UINT_FAST64_MAX UINT64_MAX
+
+#ifndef INT_MIN
+#define INT_MIN (-__INT_MAX__ - 1)
+#endif
+#ifndef INT_MAX
+#define INT_MAX __INT_MAX__
+#endif
+
+#ifndef LONG_MIN
+#define LONG_MIN (-__LONG_MAX__ - 1)
+#endif
+#ifndef LONG_MAX
+#define LONG_MAX __LONG_MAX__
+#endif
#endif /* _NOLIBC_STDINT_H */
diff --git a/tools/include/nolibc/stdio.h b/tools/include/nolibc/stdio.h
index 6cbbb52836a0..0eef91daf289 100644
--- a/tools/include/nolibc/stdio.h
+++ b/tools/include/nolibc/stdio.h
@@ -21,17 +21,75 @@
#define EOF (-1)
#endif
-/* just define FILE as a non-empty type */
+/* just define FILE as a non-empty type. The value of the pointer gives
+ * the FD: FILE=~fd for fd>=0 or NULL for fd<0. This way positive FILE
+ * are immediately identified as abnormal entries (i.e. possible copies
+ * of valid pointers to something else).
+ */
typedef struct FILE {
char dummy[1];
} FILE;
-/* We define the 3 common stdio files as constant invalid pointers that
- * are easily recognized.
- */
-static __attribute__((unused)) FILE* const stdin = (FILE*)-3;
-static __attribute__((unused)) FILE* const stdout = (FILE*)-2;
-static __attribute__((unused)) FILE* const stderr = (FILE*)-1;
+static __attribute__((unused)) FILE* const stdin = (FILE*)(intptr_t)~STDIN_FILENO;
+static __attribute__((unused)) FILE* const stdout = (FILE*)(intptr_t)~STDOUT_FILENO;
+static __attribute__((unused)) FILE* const stderr = (FILE*)(intptr_t)~STDERR_FILENO;
+
+/* provides a FILE* equivalent of fd. The mode is ignored. */
+static __attribute__((unused))
+FILE *fdopen(int fd, const char *mode __attribute__((unused)))
+{
+ if (fd < 0) {
+ SET_ERRNO(EBADF);
+ return NULL;
+ }
+ return (FILE*)(intptr_t)~fd;
+}
+
+/* provides the fd of stream. */
+static __attribute__((unused))
+int fileno(FILE *stream)
+{
+ intptr_t i = (intptr_t)stream;
+
+ if (i >= 0) {
+ SET_ERRNO(EBADF);
+ return -1;
+ }
+ return ~i;
+}
+
+/* flush a stream. */
+static __attribute__((unused))
+int fflush(FILE *stream)
+{
+ intptr_t i = (intptr_t)stream;
+
+ /* NULL is valid here. */
+ if (i > 0) {
+ SET_ERRNO(EBADF);
+ return -1;
+ }
+
+ /* Don't do anything, nolibc does not support buffering. */
+ return 0;
+}
+
+/* flush a stream. */
+static __attribute__((unused))
+int fclose(FILE *stream)
+{
+ intptr_t i = (intptr_t)stream;
+
+ if (i >= 0) {
+ SET_ERRNO(EBADF);
+ return -1;
+ }
+
+ if (close(~i))
+ return EOF;
+
+ return 0;
+}
/* getc(), fgetc(), getchar() */
@@ -41,14 +99,8 @@ static __attribute__((unused))
int fgetc(FILE* stream)
{
unsigned char ch;
- int fd;
- if (stream < stdin || stream > stderr)
- return EOF;
-
- fd = 3 + (long)stream;
-
- if (read(fd, &ch, 1) <= 0)
+ if (read(fileno(stream), &ch, 1) <= 0)
return EOF;
return ch;
}
@@ -68,14 +120,8 @@ static __attribute__((unused))
int fputc(int c, FILE* stream)
{
unsigned char ch = c;
- int fd;
-
- if (stream < stdin || stream > stderr)
- return EOF;
-
- fd = 3 + (long)stream;
- if (write(fd, &ch, 1) <= 0)
+ if (write(fileno(stream), &ch, 1) <= 0)
return EOF;
return ch;
}
@@ -96,12 +142,7 @@ static __attribute__((unused))
int _fwrite(const void *buf, size_t size, FILE *stream)
{
ssize_t ret;
- int fd;
-
- if (stream < stdin || stream > stderr)
- return EOF;
-
- fd = 3 + (long)stream;
+ int fd = fileno(stream);
while (size) {
ret = write(fd, buf, size);
diff --git a/tools/include/nolibc/stdlib.h b/tools/include/nolibc/stdlib.h
index 894c955d027e..902162f80337 100644
--- a/tools/include/nolibc/stdlib.h
+++ b/tools/include/nolibc/stdlib.h
@@ -102,7 +102,7 @@ char *_getenv(const char *name, char **environ)
return NULL;
}
-static inline __attribute__((unused,always_inline))
+static __inline__ __attribute__((unused,always_inline))
char *getenv(const char *name)
{
extern char **environ;
@@ -231,7 +231,7 @@ int utoh_r(unsigned long in, char *buffer)
/* converts unsigned long <in> to an hex string using the static itoa_buffer
* and returns the pointer to that string.
*/
-static inline __attribute__((unused))
+static __inline__ __attribute__((unused))
char *utoh(unsigned long in)
{
utoh_r(in, itoa_buffer);
@@ -293,7 +293,7 @@ int itoa_r(long in, char *buffer)
/* for historical compatibility, same as above but returns the pointer to the
* buffer.
*/
-static inline __attribute__((unused))
+static __inline__ __attribute__((unused))
char *ltoa_r(long in, char *buffer)
{
itoa_r(in, buffer);
@@ -303,7 +303,7 @@ char *ltoa_r(long in, char *buffer)
/* converts long integer <in> to a string using the static itoa_buffer and
* returns the pointer to that string.
*/
-static inline __attribute__((unused))
+static __inline__ __attribute__((unused))
char *itoa(long in)
{
itoa_r(in, itoa_buffer);
@@ -313,7 +313,7 @@ char *itoa(long in)
/* converts long integer <in> to a string using the static itoa_buffer and
* returns the pointer to that string. Same as above, for compatibility.
*/
-static inline __attribute__((unused))
+static __inline__ __attribute__((unused))
char *ltoa(long in)
{
itoa_r(in, itoa_buffer);
@@ -323,7 +323,7 @@ char *ltoa(long in)
/* converts unsigned long integer <in> to a string using the static itoa_buffer
* and returns the pointer to that string.
*/
-static inline __attribute__((unused))
+static __inline__ __attribute__((unused))
char *utoa(unsigned long in)
{
utoa_r(in, itoa_buffer);
@@ -367,7 +367,7 @@ int u64toh_r(uint64_t in, char *buffer)
/* converts uint64_t <in> to an hex string using the static itoa_buffer and
* returns the pointer to that string.
*/
-static inline __attribute__((unused))
+static __inline__ __attribute__((unused))
char *u64toh(uint64_t in)
{
u64toh_r(in, itoa_buffer);
@@ -429,7 +429,7 @@ int i64toa_r(int64_t in, char *buffer)
/* converts int64_t <in> to a string using the static itoa_buffer and returns
* the pointer to that string.
*/
-static inline __attribute__((unused))
+static __inline__ __attribute__((unused))
char *i64toa(int64_t in)
{
i64toa_r(in, itoa_buffer);
@@ -439,7 +439,7 @@ char *i64toa(int64_t in)
/* converts uint64_t <in> to a string using the static itoa_buffer and returns
* the pointer to that string.
*/
-static inline __attribute__((unused))
+static __inline__ __attribute__((unused))
char *u64toa(uint64_t in)
{
u64toa_r(in, itoa_buffer);
diff --git a/tools/include/nolibc/string.h b/tools/include/nolibc/string.h
index fffdaf6ff467..0c2e06c7c477 100644
--- a/tools/include/nolibc/string.h
+++ b/tools/include/nolibc/string.h
@@ -90,7 +90,7 @@ void *memset(void *dst, int b, size_t len)
while (len--) {
/* prevent gcc from recognizing memset() here */
- asm volatile("");
+ __asm__ volatile("");
*(p++) = b;
}
return dst;
@@ -139,7 +139,7 @@ size_t strlen(const char *str)
size_t len;
for (len = 0; str[len]; len++)
- asm("");
+ __asm__("");
return len;
}
diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h
index 5d624dc63a42..856249a11890 100644
--- a/tools/include/nolibc/sys.h
+++ b/tools/include/nolibc/sys.h
@@ -12,15 +12,17 @@
/* system includes */
#include <asm/unistd.h>
-#include <asm/signal.h> // for SIGCHLD
+#include <asm/signal.h> /* for SIGCHLD */
#include <asm/ioctls.h>
#include <asm/mman.h>
#include <linux/fs.h>
#include <linux/loop.h>
#include <linux/time.h>
#include <linux/auxvec.h>
-#include <linux/fcntl.h> // for O_* and AT_*
-#include <linux/stat.h> // for statx()
+#include <linux/fcntl.h> /* for O_* and AT_* */
+#include <linux/stat.h> /* for statx() */
+#include <linux/reboot.h> /* for LINUX_REBOOT_* */
+#include <linux/prctl.h>
#include "arch.h"
#include "errno.h"
@@ -322,7 +324,7 @@ static __attribute__((noreturn,unused))
void sys_exit(int status)
{
my_syscall1(__NR_exit, status & 255);
- while(1); // shut the "noreturn" warnings.
+ while(1); /* shut the "noreturn" warnings. */
}
static __attribute__((noreturn,unused))
@@ -336,6 +338,7 @@ void exit(int status)
* pid_t fork(void);
*/
+#ifndef sys_fork
static __attribute__((unused))
pid_t sys_fork(void)
{
@@ -351,6 +354,7 @@ pid_t sys_fork(void)
#error Neither __NR_clone nor __NR_fork defined, cannot implement sys_fork()
#endif
}
+#endif
static __attribute__((unused))
pid_t fork(void)
@@ -858,7 +862,7 @@ int open(const char *path, int flags, ...)
va_list args;
va_start(args, flags);
- mode = va_arg(args, mode_t);
+ mode = va_arg(args, int);
va_end(args);
}
@@ -873,6 +877,32 @@ int open(const char *path, int flags, ...)
/*
+ * int prctl(int option, unsigned long arg2, unsigned long arg3,
+ * unsigned long arg4, unsigned long arg5);
+ */
+
+static __attribute__((unused))
+int sys_prctl(int option, unsigned long arg2, unsigned long arg3,
+ unsigned long arg4, unsigned long arg5)
+{
+ return my_syscall5(__NR_prctl, option, arg2, arg3, arg4, arg5);
+}
+
+static __attribute__((unused))
+int prctl(int option, unsigned long arg2, unsigned long arg3,
+ unsigned long arg4, unsigned long arg5)
+{
+ int ret = sys_prctl(option, arg2, arg3, arg4, arg5);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
* int pivot_root(const char *new, const char *old);
*/
@@ -909,7 +939,7 @@ int sys_poll(struct pollfd *fds, int nfds, int timeout)
t.tv_sec = timeout / 1000;
t.tv_nsec = (timeout % 1000) * 1000000;
}
- return my_syscall4(__NR_ppoll, fds, nfds, (timeout >= 0) ? &t : NULL, NULL);
+ return my_syscall5(__NR_ppoll, fds, nfds, (timeout >= 0) ? &t : NULL, NULL, 0);
#elif defined(__NR_poll)
return my_syscall3(__NR_poll, fds, nfds, timeout);
#else
@@ -1131,23 +1161,26 @@ int sys_stat(const char *path, struct stat *buf)
long ret;
ret = sys_statx(AT_FDCWD, path, AT_NO_AUTOMOUNT, STATX_BASIC_STATS, &statx);
- buf->st_dev = ((statx.stx_dev_minor & 0xff)
- | (statx.stx_dev_major << 8)
- | ((statx.stx_dev_minor & ~0xff) << 12));
- buf->st_ino = statx.stx_ino;
- buf->st_mode = statx.stx_mode;
- buf->st_nlink = statx.stx_nlink;
- buf->st_uid = statx.stx_uid;
- buf->st_gid = statx.stx_gid;
- buf->st_rdev = ((statx.stx_rdev_minor & 0xff)
- | (statx.stx_rdev_major << 8)
- | ((statx.stx_rdev_minor & ~0xff) << 12));
- buf->st_size = statx.stx_size;
- buf->st_blksize = statx.stx_blksize;
- buf->st_blocks = statx.stx_blocks;
- buf->st_atime = statx.stx_atime.tv_sec;
- buf->st_mtime = statx.stx_mtime.tv_sec;
- buf->st_ctime = statx.stx_ctime.tv_sec;
+ buf->st_dev = ((statx.stx_dev_minor & 0xff)
+ | (statx.stx_dev_major << 8)
+ | ((statx.stx_dev_minor & ~0xff) << 12));
+ buf->st_ino = statx.stx_ino;
+ buf->st_mode = statx.stx_mode;
+ buf->st_nlink = statx.stx_nlink;
+ buf->st_uid = statx.stx_uid;
+ buf->st_gid = statx.stx_gid;
+ buf->st_rdev = ((statx.stx_rdev_minor & 0xff)
+ | (statx.stx_rdev_major << 8)
+ | ((statx.stx_rdev_minor & ~0xff) << 12));
+ buf->st_size = statx.stx_size;
+ buf->st_blksize = statx.stx_blksize;
+ buf->st_blocks = statx.stx_blocks;
+ buf->st_atim.tv_sec = statx.stx_atime.tv_sec;
+ buf->st_atim.tv_nsec = statx.stx_atime.tv_nsec;
+ buf->st_mtim.tv_sec = statx.stx_mtime.tv_sec;
+ buf->st_mtim.tv_nsec = statx.stx_mtime.tv_nsec;
+ buf->st_ctim.tv_sec = statx.stx_ctime.tv_sec;
+ buf->st_ctim.tv_nsec = statx.stx_ctime.tv_nsec;
return ret;
}
#else
@@ -1165,19 +1198,22 @@ int sys_stat(const char *path, struct stat *buf)
#else
#error Neither __NR_newfstatat nor __NR_stat defined, cannot implement sys_stat()
#endif
- buf->st_dev = stat.st_dev;
- buf->st_ino = stat.st_ino;
- buf->st_mode = stat.st_mode;
- buf->st_nlink = stat.st_nlink;
- buf->st_uid = stat.st_uid;
- buf->st_gid = stat.st_gid;
- buf->st_rdev = stat.st_rdev;
- buf->st_size = stat.st_size;
- buf->st_blksize = stat.st_blksize;
- buf->st_blocks = stat.st_blocks;
- buf->st_atime = stat.st_atime;
- buf->st_mtime = stat.st_mtime;
- buf->st_ctime = stat.st_ctime;
+ buf->st_dev = stat.st_dev;
+ buf->st_ino = stat.st_ino;
+ buf->st_mode = stat.st_mode;
+ buf->st_nlink = stat.st_nlink;
+ buf->st_uid = stat.st_uid;
+ buf->st_gid = stat.st_gid;
+ buf->st_rdev = stat.st_rdev;
+ buf->st_size = stat.st_size;
+ buf->st_blksize = stat.st_blksize;
+ buf->st_blocks = stat.st_blocks;
+ buf->st_atim.tv_sec = stat.st_atime;
+ buf->st_atim.tv_nsec = stat.st_atime_nsec;
+ buf->st_mtim.tv_sec = stat.st_mtime;
+ buf->st_mtim.tv_nsec = stat.st_mtime_nsec;
+ buf->st_ctim.tv_sec = stat.st_ctime;
+ buf->st_ctim.tv_nsec = stat.st_ctime_nsec;
return ret;
}
#endif
@@ -1365,6 +1401,29 @@ ssize_t write(int fd, const void *buf, size_t count)
return ret;
}
+
+/*
+ * int memfd_create(const char *name, unsigned int flags);
+ */
+
+static __attribute__((unused))
+int sys_memfd_create(const char *name, unsigned int flags)
+{
+ return my_syscall2(__NR_memfd_create, name, flags);
+}
+
+static __attribute__((unused))
+int memfd_create(const char *name, unsigned int flags)
+{
+ ssize_t ret = sys_memfd_create(name, flags);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
/* make sure to include all global symbols */
#include "nolibc.h"
diff --git a/tools/include/nolibc/types.h b/tools/include/nolibc/types.h
index aedd7d9e3f64..f96e28bff4ba 100644
--- a/tools/include/nolibc/types.h
+++ b/tools/include/nolibc/types.h
@@ -86,14 +86,6 @@
#define SEEK_CUR 1
#define SEEK_END 2
-/* cmd for reboot() */
-#define LINUX_REBOOT_MAGIC1 0xfee1dead
-#define LINUX_REBOOT_MAGIC2 0x28121969
-#define LINUX_REBOOT_CMD_HALT 0xcdef0123
-#define LINUX_REBOOT_CMD_POWER_OFF 0x4321fedc
-#define LINUX_REBOOT_CMD_RESTART 0x01234567
-#define LINUX_REBOOT_CMD_SW_SUSPEND 0xd000fce2
-
/* Macros used on waitpid()'s return status */
#define WEXITSTATUS(status) (((status) & 0xff00) >> 8)
#define WIFEXITED(status) (((status) & 0x7f) == 0)
@@ -206,9 +198,9 @@ struct stat {
off_t st_size; /* total size, in bytes */
blksize_t st_blksize; /* blocksize for file system I/O */
blkcnt_t st_blocks; /* number of 512B blocks allocated */
- time_t st_atime; /* time of last access */
- time_t st_mtime; /* time of last modification */
- time_t st_ctime; /* time of last status change */
+ union { time_t st_atime; struct timespec st_atim; }; /* time of last access */
+ union { time_t st_mtime; struct timespec st_mtim; }; /* time of last modification */
+ union { time_t st_ctime; struct timespec st_ctim; }; /* time of last status change */
};
/* WARNING, it only deals with the 4096 first majors and 256 first minors */
diff --git a/tools/include/nolibc/unistd.h b/tools/include/nolibc/unistd.h
index ac7d53d986cd..0e832e10a0b2 100644
--- a/tools/include/nolibc/unistd.h
+++ b/tools/include/nolibc/unistd.h
@@ -56,6 +56,21 @@ int tcsetpgrp(int fd, pid_t pid)
return ioctl(fd, TIOCSPGRP, &pid);
}
+#define _syscall(N, ...) \
+({ \
+ long _ret = my_syscall##N(__VA_ARGS__); \
+ if (_ret < 0) { \
+ SET_ERRNO(-_ret); \
+ _ret = -1; \
+ } \
+ _ret; \
+})
+
+#define _syscall_narg(...) __syscall_narg(__VA_ARGS__, 6, 5, 4, 3, 2, 1, 0)
+#define __syscall_narg(_0, _1, _2, _3, _4, _5, _6, N, ...) N
+#define _syscall_n(N, ...) _syscall(N, __VA_ARGS__)
+#define syscall(...) _syscall_n(_syscall_narg(__VA_ARGS__), ##__VA_ARGS__)
+
/* make sure to include all global symbols */
#include "nolibc.h"
diff --git a/tools/include/uapi/asm-generic/bitsperlong.h b/tools/include/uapi/asm-generic/bitsperlong.h
index 23e6c416b85f..352cb81947b8 100644
--- a/tools/include/uapi/asm-generic/bitsperlong.h
+++ b/tools/include/uapi/asm-generic/bitsperlong.h
@@ -1,6 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _UAPI__ASM_GENERIC_BITS_PER_LONG
#define _UAPI__ASM_GENERIC_BITS_PER_LONG
+#ifndef __BITS_PER_LONG
+/*
+ * In order to keep safe and avoid regression, only unify uapi
+ * bitsperlong.h for some archs which are using newer toolchains
+ * that have the definitions of __CHAR_BIT__ and __SIZEOF_LONG__.
+ * See the following link for more info:
+ * https://lore.kernel.org/linux-arch/b9624545-2c80-49a1-ac3c-39264a591f7b@app.fastmail.com/
+ */
+#if defined(__CHAR_BIT__) && defined(__SIZEOF_LONG__)
+#define __BITS_PER_LONG (__CHAR_BIT__ * __SIZEOF_LONG__)
+#else
/*
* There seems to be no way of detecting this automatically from user
* space, so 64 bit architectures should override this in their
@@ -8,8 +20,8 @@
* both 32 and 64 bit user space must not rely on CONFIG_64BIT
* to decide it, but rather check a compiler provided macro.
*/
-#ifndef __BITS_PER_LONG
#define __BITS_PER_LONG 32
#endif
+#endif
#endif /* _UAPI__ASM_GENERIC_BITS_PER_LONG */
diff --git a/tools/include/uapi/asm-generic/socket.h b/tools/include/uapi/asm-generic/socket.h
index 8756df13be50..54d9c8bf7c55 100644
--- a/tools/include/uapi/asm-generic/socket.h
+++ b/tools/include/uapi/asm-generic/socket.h
@@ -121,6 +121,9 @@
#define SO_RCVMARK 75
+#define SO_PASSPIDFD 76
+#define SO_PEERPIDFD 77
+
#if !defined(__KERNEL__)
#if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index 45fa180cc56a..fd6c1cb585db 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -38,12 +38,12 @@ __SYSCALL(__NR_io_destroy, sys_io_destroy)
__SC_COMP(__NR_io_submit, sys_io_submit, compat_sys_io_submit)
#define __NR_io_cancel 3
__SYSCALL(__NR_io_cancel, sys_io_cancel)
+
#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
#define __NR_io_getevents 4
__SC_3264(__NR_io_getevents, sys_io_getevents_time32, sys_io_getevents)
#endif
-/* fs/xattr.c */
#define __NR_setxattr 5
__SYSCALL(__NR_setxattr, sys_setxattr)
#define __NR_lsetxattr 6
@@ -68,58 +68,38 @@ __SYSCALL(__NR_removexattr, sys_removexattr)
__SYSCALL(__NR_lremovexattr, sys_lremovexattr)
#define __NR_fremovexattr 16
__SYSCALL(__NR_fremovexattr, sys_fremovexattr)
-
-/* fs/dcache.c */
#define __NR_getcwd 17
__SYSCALL(__NR_getcwd, sys_getcwd)
-
-/* fs/cookies.c */
#define __NR_lookup_dcookie 18
__SC_COMP(__NR_lookup_dcookie, sys_lookup_dcookie, compat_sys_lookup_dcookie)
-
-/* fs/eventfd.c */
#define __NR_eventfd2 19
__SYSCALL(__NR_eventfd2, sys_eventfd2)
-
-/* fs/eventpoll.c */
#define __NR_epoll_create1 20
__SYSCALL(__NR_epoll_create1, sys_epoll_create1)
#define __NR_epoll_ctl 21
__SYSCALL(__NR_epoll_ctl, sys_epoll_ctl)
#define __NR_epoll_pwait 22
__SC_COMP(__NR_epoll_pwait, sys_epoll_pwait, compat_sys_epoll_pwait)
-
-/* fs/fcntl.c */
#define __NR_dup 23
__SYSCALL(__NR_dup, sys_dup)
#define __NR_dup3 24
__SYSCALL(__NR_dup3, sys_dup3)
#define __NR3264_fcntl 25
__SC_COMP_3264(__NR3264_fcntl, sys_fcntl64, sys_fcntl, compat_sys_fcntl64)
-
-/* fs/inotify_user.c */
#define __NR_inotify_init1 26
__SYSCALL(__NR_inotify_init1, sys_inotify_init1)
#define __NR_inotify_add_watch 27
__SYSCALL(__NR_inotify_add_watch, sys_inotify_add_watch)
#define __NR_inotify_rm_watch 28
__SYSCALL(__NR_inotify_rm_watch, sys_inotify_rm_watch)
-
-/* fs/ioctl.c */
#define __NR_ioctl 29
__SC_COMP(__NR_ioctl, sys_ioctl, compat_sys_ioctl)
-
-/* fs/ioprio.c */
#define __NR_ioprio_set 30
__SYSCALL(__NR_ioprio_set, sys_ioprio_set)
#define __NR_ioprio_get 31
__SYSCALL(__NR_ioprio_get, sys_ioprio_get)
-
-/* fs/locks.c */
#define __NR_flock 32
__SYSCALL(__NR_flock, sys_flock)
-
-/* fs/namei.c */
#define __NR_mknodat 33
__SYSCALL(__NR_mknodat, sys_mknodat)
#define __NR_mkdirat 34
@@ -130,25 +110,21 @@ __SYSCALL(__NR_unlinkat, sys_unlinkat)
__SYSCALL(__NR_symlinkat, sys_symlinkat)
#define __NR_linkat 37
__SYSCALL(__NR_linkat, sys_linkat)
+
#ifdef __ARCH_WANT_RENAMEAT
/* renameat is superseded with flags by renameat2 */
#define __NR_renameat 38
__SYSCALL(__NR_renameat, sys_renameat)
#endif /* __ARCH_WANT_RENAMEAT */
-/* fs/namespace.c */
#define __NR_umount2 39
__SYSCALL(__NR_umount2, sys_umount)
#define __NR_mount 40
__SYSCALL(__NR_mount, sys_mount)
#define __NR_pivot_root 41
__SYSCALL(__NR_pivot_root, sys_pivot_root)
-
-/* fs/nfsctl.c */
#define __NR_nfsservctl 42
__SYSCALL(__NR_nfsservctl, sys_ni_syscall)
-
-/* fs/open.c */
#define __NR3264_statfs 43
__SC_COMP_3264(__NR3264_statfs, sys_statfs64, sys_statfs, \
compat_sys_statfs64)
@@ -161,7 +137,6 @@ __SC_COMP_3264(__NR3264_truncate, sys_truncate64, sys_truncate, \
#define __NR3264_ftruncate 46
__SC_COMP_3264(__NR3264_ftruncate, sys_ftruncate64, sys_ftruncate, \
compat_sys_ftruncate64)
-
#define __NR_fallocate 47
__SC_COMP(__NR_fallocate, sys_fallocate, compat_sys_fallocate)
#define __NR_faccessat 48
@@ -186,20 +161,12 @@ __SYSCALL(__NR_openat, sys_openat)
__SYSCALL(__NR_close, sys_close)
#define __NR_vhangup 58
__SYSCALL(__NR_vhangup, sys_vhangup)
-
-/* fs/pipe.c */
#define __NR_pipe2 59
__SYSCALL(__NR_pipe2, sys_pipe2)
-
-/* fs/quota.c */
#define __NR_quotactl 60
__SYSCALL(__NR_quotactl, sys_quotactl)
-
-/* fs/readdir.c */
#define __NR_getdents64 61
__SYSCALL(__NR_getdents64, sys_getdents64)
-
-/* fs/read_write.c */
#define __NR3264_lseek 62
__SC_3264(__NR3264_lseek, sys_llseek, sys_lseek)
#define __NR_read 63
@@ -218,12 +185,9 @@ __SC_COMP(__NR_pwrite64, sys_pwrite64, compat_sys_pwrite64)
__SC_COMP(__NR_preadv, sys_preadv, compat_sys_preadv)
#define __NR_pwritev 70
__SC_COMP(__NR_pwritev, sys_pwritev, compat_sys_pwritev)
-
-/* fs/sendfile.c */
#define __NR3264_sendfile 71
__SYSCALL(__NR3264_sendfile, sys_sendfile64)
-/* fs/select.c */
#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
#define __NR_pselect6 72
__SC_COMP_3264(__NR_pselect6, sys_pselect6_time32, sys_pselect6, compat_sys_pselect6_time32)
@@ -231,21 +195,17 @@ __SC_COMP_3264(__NR_pselect6, sys_pselect6_time32, sys_pselect6, compat_sys_psel
__SC_COMP_3264(__NR_ppoll, sys_ppoll_time32, sys_ppoll, compat_sys_ppoll_time32)
#endif
-/* fs/signalfd.c */
#define __NR_signalfd4 74
__SC_COMP(__NR_signalfd4, sys_signalfd4, compat_sys_signalfd4)
-
-/* fs/splice.c */
#define __NR_vmsplice 75
__SYSCALL(__NR_vmsplice, sys_vmsplice)
#define __NR_splice 76
__SYSCALL(__NR_splice, sys_splice)
#define __NR_tee 77
__SYSCALL(__NR_tee, sys_tee)
-
-/* fs/stat.c */
#define __NR_readlinkat 78
__SYSCALL(__NR_readlinkat, sys_readlinkat)
+
#if defined(__ARCH_WANT_NEW_STAT) || defined(__ARCH_WANT_STAT64)
#define __NR3264_fstatat 79
__SC_3264(__NR3264_fstatat, sys_fstatat64, sys_newfstatat)
@@ -253,13 +213,13 @@ __SC_3264(__NR3264_fstatat, sys_fstatat64, sys_newfstatat)
__SC_3264(__NR3264_fstat, sys_fstat64, sys_newfstat)
#endif
-/* fs/sync.c */
#define __NR_sync 81
__SYSCALL(__NR_sync, sys_sync)
#define __NR_fsync 82
__SYSCALL(__NR_fsync, sys_fsync)
#define __NR_fdatasync 83
__SYSCALL(__NR_fdatasync, sys_fdatasync)
+
#ifdef __ARCH_WANT_SYNC_FILE_RANGE2
#define __NR_sync_file_range2 84
__SC_COMP(__NR_sync_file_range2, sys_sync_file_range2, \
@@ -270,9 +230,9 @@ __SC_COMP(__NR_sync_file_range, sys_sync_file_range, \
compat_sys_sync_file_range)
#endif
-/* fs/timerfd.c */
#define __NR_timerfd_create 85
__SYSCALL(__NR_timerfd_create, sys_timerfd_create)
+
#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
#define __NR_timerfd_settime 86
__SC_3264(__NR_timerfd_settime, sys_timerfd_settime32, \
@@ -282,45 +242,35 @@ __SC_3264(__NR_timerfd_gettime, sys_timerfd_gettime32, \
sys_timerfd_gettime)
#endif
-/* fs/utimes.c */
#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
#define __NR_utimensat 88
__SC_3264(__NR_utimensat, sys_utimensat_time32, sys_utimensat)
#endif
-/* kernel/acct.c */
#define __NR_acct 89
__SYSCALL(__NR_acct, sys_acct)
-
-/* kernel/capability.c */
#define __NR_capget 90
__SYSCALL(__NR_capget, sys_capget)
#define __NR_capset 91
__SYSCALL(__NR_capset, sys_capset)
-
-/* kernel/exec_domain.c */
#define __NR_personality 92
__SYSCALL(__NR_personality, sys_personality)
-
-/* kernel/exit.c */
#define __NR_exit 93
__SYSCALL(__NR_exit, sys_exit)
#define __NR_exit_group 94
__SYSCALL(__NR_exit_group, sys_exit_group)
#define __NR_waitid 95
__SC_COMP(__NR_waitid, sys_waitid, compat_sys_waitid)
-
-/* kernel/fork.c */
#define __NR_set_tid_address 96
__SYSCALL(__NR_set_tid_address, sys_set_tid_address)
#define __NR_unshare 97
__SYSCALL(__NR_unshare, sys_unshare)
-/* kernel/futex.c */
#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
#define __NR_futex 98
__SC_3264(__NR_futex, sys_futex_time32, sys_futex)
#endif
+
#define __NR_set_robust_list 99
__SC_COMP(__NR_set_robust_list, sys_set_robust_list, \
compat_sys_set_robust_list)
@@ -328,43 +278,40 @@ __SC_COMP(__NR_set_robust_list, sys_set_robust_list, \
__SC_COMP(__NR_get_robust_list, sys_get_robust_list, \
compat_sys_get_robust_list)
-/* kernel/hrtimer.c */
#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
#define __NR_nanosleep 101
__SC_3264(__NR_nanosleep, sys_nanosleep_time32, sys_nanosleep)
#endif
-/* kernel/itimer.c */
#define __NR_getitimer 102
__SC_COMP(__NR_getitimer, sys_getitimer, compat_sys_getitimer)
#define __NR_setitimer 103
__SC_COMP(__NR_setitimer, sys_setitimer, compat_sys_setitimer)
-
-/* kernel/kexec.c */
#define __NR_kexec_load 104
__SC_COMP(__NR_kexec_load, sys_kexec_load, compat_sys_kexec_load)
-
-/* kernel/module.c */
#define __NR_init_module 105
__SYSCALL(__NR_init_module, sys_init_module)
#define __NR_delete_module 106
__SYSCALL(__NR_delete_module, sys_delete_module)
-
-/* kernel/posix-timers.c */
#define __NR_timer_create 107
__SC_COMP(__NR_timer_create, sys_timer_create, compat_sys_timer_create)
+
#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
#define __NR_timer_gettime 108
__SC_3264(__NR_timer_gettime, sys_timer_gettime32, sys_timer_gettime)
#endif
+
#define __NR_timer_getoverrun 109
__SYSCALL(__NR_timer_getoverrun, sys_timer_getoverrun)
+
#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
#define __NR_timer_settime 110
__SC_3264(__NR_timer_settime, sys_timer_settime32, sys_timer_settime)
#endif
+
#define __NR_timer_delete 111
__SYSCALL(__NR_timer_delete, sys_timer_delete)
+
#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
#define __NR_clock_settime 112
__SC_3264(__NR_clock_settime, sys_clock_settime32, sys_clock_settime)
@@ -377,15 +324,10 @@ __SC_3264(__NR_clock_nanosleep, sys_clock_nanosleep_time32, \
sys_clock_nanosleep)
#endif
-/* kernel/printk.c */
#define __NR_syslog 116
__SYSCALL(__NR_syslog, sys_syslog)
-
-/* kernel/ptrace.c */
#define __NR_ptrace 117
__SC_COMP(__NR_ptrace, sys_ptrace, compat_sys_ptrace)
-
-/* kernel/sched/core.c */
#define __NR_sched_setparam 118
__SYSCALL(__NR_sched_setparam, sys_sched_setparam)
#define __NR_sched_setscheduler 119
@@ -406,13 +348,13 @@ __SYSCALL(__NR_sched_yield, sys_sched_yield)
__SYSCALL(__NR_sched_get_priority_max, sys_sched_get_priority_max)
#define __NR_sched_get_priority_min 126
__SYSCALL(__NR_sched_get_priority_min, sys_sched_get_priority_min)
+
#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
#define __NR_sched_rr_get_interval 127
__SC_3264(__NR_sched_rr_get_interval, sys_sched_rr_get_interval_time32, \
sys_sched_rr_get_interval)
#endif
-/* kernel/signal.c */
#define __NR_restart_syscall 128
__SYSCALL(__NR_restart_syscall, sys_restart_syscall)
#define __NR_kill 129
@@ -431,18 +373,18 @@ __SC_COMP(__NR_rt_sigaction, sys_rt_sigaction, compat_sys_rt_sigaction)
__SC_COMP(__NR_rt_sigprocmask, sys_rt_sigprocmask, compat_sys_rt_sigprocmask)
#define __NR_rt_sigpending 136
__SC_COMP(__NR_rt_sigpending, sys_rt_sigpending, compat_sys_rt_sigpending)
+
#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
#define __NR_rt_sigtimedwait 137
__SC_COMP_3264(__NR_rt_sigtimedwait, sys_rt_sigtimedwait_time32, \
sys_rt_sigtimedwait, compat_sys_rt_sigtimedwait_time32)
#endif
+
#define __NR_rt_sigqueueinfo 138
__SC_COMP(__NR_rt_sigqueueinfo, sys_rt_sigqueueinfo, \
compat_sys_rt_sigqueueinfo)
#define __NR_rt_sigreturn 139
__SC_COMP(__NR_rt_sigreturn, sys_rt_sigreturn, compat_sys_rt_sigreturn)
-
-/* kernel/sys.c */
#define __NR_setpriority 140
__SYSCALL(__NR_setpriority, sys_setpriority)
#define __NR_getpriority 141
@@ -507,7 +449,6 @@ __SYSCALL(__NR_prctl, sys_prctl)
#define __NR_getcpu 168
__SYSCALL(__NR_getcpu, sys_getcpu)
-/* kernel/time.c */
#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
#define __NR_gettimeofday 169
__SC_COMP(__NR_gettimeofday, sys_gettimeofday, compat_sys_gettimeofday)
@@ -517,7 +458,6 @@ __SC_COMP(__NR_settimeofday, sys_settimeofday, compat_sys_settimeofday)
__SC_3264(__NR_adjtimex, sys_adjtimex_time32, sys_adjtimex)
#endif
-/* kernel/sys.c */
#define __NR_getpid 172
__SYSCALL(__NR_getpid, sys_getpid)
#define __NR_getppid 173
@@ -534,12 +474,11 @@ __SYSCALL(__NR_getegid, sys_getegid)
__SYSCALL(__NR_gettid, sys_gettid)
#define __NR_sysinfo 179
__SC_COMP(__NR_sysinfo, sys_sysinfo, compat_sys_sysinfo)
-
-/* ipc/mqueue.c */
#define __NR_mq_open 180
__SC_COMP(__NR_mq_open, sys_mq_open, compat_sys_mq_open)
#define __NR_mq_unlink 181
__SYSCALL(__NR_mq_unlink, sys_mq_unlink)
+
#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
#define __NR_mq_timedsend 182
__SC_3264(__NR_mq_timedsend, sys_mq_timedsend_time32, sys_mq_timedsend)
@@ -547,12 +486,11 @@ __SC_3264(__NR_mq_timedsend, sys_mq_timedsend_time32, sys_mq_timedsend)
__SC_3264(__NR_mq_timedreceive, sys_mq_timedreceive_time32, \
sys_mq_timedreceive)
#endif
+
#define __NR_mq_notify 184
__SC_COMP(__NR_mq_notify, sys_mq_notify, compat_sys_mq_notify)
#define __NR_mq_getsetattr 185
__SC_COMP(__NR_mq_getsetattr, sys_mq_getsetattr, compat_sys_mq_getsetattr)
-
-/* ipc/msg.c */
#define __NR_msgget 186
__SYSCALL(__NR_msgget, sys_msgget)
#define __NR_msgctl 187
@@ -561,20 +499,18 @@ __SC_COMP(__NR_msgctl, sys_msgctl, compat_sys_msgctl)
__SC_COMP(__NR_msgrcv, sys_msgrcv, compat_sys_msgrcv)
#define __NR_msgsnd 189
__SC_COMP(__NR_msgsnd, sys_msgsnd, compat_sys_msgsnd)
-
-/* ipc/sem.c */
#define __NR_semget 190
__SYSCALL(__NR_semget, sys_semget)
#define __NR_semctl 191
__SC_COMP(__NR_semctl, sys_semctl, compat_sys_semctl)
+
#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
#define __NR_semtimedop 192
__SC_3264(__NR_semtimedop, sys_semtimedop_time32, sys_semtimedop)
#endif
+
#define __NR_semop 193
__SYSCALL(__NR_semop, sys_semop)
-
-/* ipc/shm.c */
#define __NR_shmget 194
__SYSCALL(__NR_shmget, sys_shmget)
#define __NR_shmctl 195
@@ -583,8 +519,6 @@ __SC_COMP(__NR_shmctl, sys_shmctl, compat_sys_shmctl)
__SC_COMP(__NR_shmat, sys_shmat, compat_sys_shmat)
#define __NR_shmdt 197
__SYSCALL(__NR_shmdt, sys_shmdt)
-
-/* net/socket.c */
#define __NR_socket 198
__SYSCALL(__NR_socket, sys_socket)
#define __NR_socketpair 199
@@ -615,40 +549,30 @@ __SYSCALL(__NR_shutdown, sys_shutdown)
__SC_COMP(__NR_sendmsg, sys_sendmsg, compat_sys_sendmsg)
#define __NR_recvmsg 212
__SC_COMP(__NR_recvmsg, sys_recvmsg, compat_sys_recvmsg)
-
-/* mm/filemap.c */
#define __NR_readahead 213
__SC_COMP(__NR_readahead, sys_readahead, compat_sys_readahead)
-
-/* mm/nommu.c, also with MMU */
#define __NR_brk 214
__SYSCALL(__NR_brk, sys_brk)
#define __NR_munmap 215
__SYSCALL(__NR_munmap, sys_munmap)
#define __NR_mremap 216
__SYSCALL(__NR_mremap, sys_mremap)
-
-/* security/keys/keyctl.c */
#define __NR_add_key 217
__SYSCALL(__NR_add_key, sys_add_key)
#define __NR_request_key 218
__SYSCALL(__NR_request_key, sys_request_key)
#define __NR_keyctl 219
__SC_COMP(__NR_keyctl, sys_keyctl, compat_sys_keyctl)
-
-/* arch/example/kernel/sys_example.c */
#define __NR_clone 220
__SYSCALL(__NR_clone, sys_clone)
#define __NR_execve 221
__SC_COMP(__NR_execve, sys_execve, compat_sys_execve)
-
#define __NR3264_mmap 222
__SC_3264(__NR3264_mmap, sys_mmap2, sys_mmap)
-/* mm/fadvise.c */
#define __NR3264_fadvise64 223
__SC_COMP(__NR3264_fadvise64, sys_fadvise64_64, compat_sys_fadvise64_64)
-/* mm/, CONFIG_MMU only */
+/* CONFIG_MMU only */
#ifndef __ARCH_NOMMU
#define __NR_swapon 224
__SYSCALL(__NR_swapon, sys_swapon)
@@ -691,6 +615,7 @@ __SC_COMP(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo, \
__SYSCALL(__NR_perf_event_open, sys_perf_event_open)
#define __NR_accept4 242
__SYSCALL(__NR_accept4, sys_accept4)
+
#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
#define __NR_recvmmsg 243
__SC_COMP_3264(__NR_recvmmsg, sys_recvmmsg_time32, sys_recvmmsg, compat_sys_recvmmsg_time32)
@@ -706,6 +631,7 @@ __SC_COMP_3264(__NR_recvmmsg, sys_recvmmsg_time32, sys_recvmmsg, compat_sys_recv
#define __NR_wait4 260
__SC_COMP(__NR_wait4, sys_wait4, compat_sys_wait4)
#endif
+
#define __NR_prlimit64 261
__SYSCALL(__NR_prlimit64, sys_prlimit64)
#define __NR_fanotify_init 262
@@ -716,10 +642,12 @@ __SYSCALL(__NR_fanotify_mark, sys_fanotify_mark)
__SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at)
#define __NR_open_by_handle_at 265
__SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at)
+
#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
#define __NR_clock_adjtime 266
__SC_3264(__NR_clock_adjtime, sys_clock_adjtime32, sys_clock_adjtime)
#endif
+
#define __NR_syncfs 267
__SYSCALL(__NR_syncfs, sys_syncfs)
#define __NR_setns 268
@@ -770,15 +698,19 @@ __SYSCALL(__NR_pkey_alloc, sys_pkey_alloc)
__SYSCALL(__NR_pkey_free, sys_pkey_free)
#define __NR_statx 291
__SYSCALL(__NR_statx, sys_statx)
+
#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
#define __NR_io_pgetevents 292
__SC_COMP_3264(__NR_io_pgetevents, sys_io_pgetevents_time32, sys_io_pgetevents, compat_sys_io_pgetevents)
#endif
+
#define __NR_rseq 293
__SYSCALL(__NR_rseq, sys_rseq)
#define __NR_kexec_file_load 294
__SYSCALL(__NR_kexec_file_load, sys_kexec_file_load)
+
/* 295 through 402 are unassigned to sync up with generic numbers, don't use */
+
#if defined(__SYSCALL_COMPAT) || __BITS_PER_LONG == 32
#define __NR_clock_gettime64 403
__SYSCALL(__NR_clock_gettime64, sys_clock_gettime)
@@ -844,13 +776,14 @@ __SYSCALL(__NR_fsmount, sys_fsmount)
__SYSCALL(__NR_fspick, sys_fspick)
#define __NR_pidfd_open 434
__SYSCALL(__NR_pidfd_open, sys_pidfd_open)
+
#ifdef __ARCH_WANT_SYS_CLONE3
#define __NR_clone3 435
__SYSCALL(__NR_clone3, sys_clone3)
#endif
+
#define __NR_close_range 436
__SYSCALL(__NR_close_range, sys_close_range)
-
#define __NR_openat2 437
__SYSCALL(__NR_openat2, sys_openat2)
#define __NR_pidfd_getfd 438
@@ -865,7 +798,6 @@ __SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2)
__SYSCALL(__NR_mount_setattr, sys_mount_setattr)
#define __NR_quotactl_fd 443
__SYSCALL(__NR_quotactl_fd, sys_quotactl_fd)
-
#define __NR_landlock_create_ruleset 444
__SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset)
#define __NR_landlock_add_rule 445
@@ -877,17 +809,19 @@ __SYSCALL(__NR_landlock_restrict_self, sys_landlock_restrict_self)
#define __NR_memfd_secret 447
__SYSCALL(__NR_memfd_secret, sys_memfd_secret)
#endif
+
#define __NR_process_mrelease 448
__SYSCALL(__NR_process_mrelease, sys_process_mrelease)
-
#define __NR_futex_waitv 449
__SYSCALL(__NR_futex_waitv, sys_futex_waitv)
-
#define __NR_set_mempolicy_home_node 450
__SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node)
+#define __NR_cachestat 451
+__SYSCALL(__NR_cachestat, sys_cachestat)
+
#undef __NR_syscalls
-#define __NR_syscalls 451
+#define __NR_syscalls 452
/*
* 32 bit systems traditionally used different
diff --git a/tools/include/uapi/asm/bitsperlong.h b/tools/include/uapi/asm/bitsperlong.h
index da5206517158..c65267afc341 100644
--- a/tools/include/uapi/asm/bitsperlong.h
+++ b/tools/include/uapi/asm/bitsperlong.h
@@ -1,8 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
#if defined(__i386__) || defined(__x86_64__)
#include "../../../arch/x86/include/uapi/asm/bitsperlong.h"
-#elif defined(__aarch64__)
-#include "../../../arch/arm64/include/uapi/asm/bitsperlong.h"
#elif defined(__powerpc__)
#include "../../../arch/powerpc/include/uapi/asm/bitsperlong.h"
#elif defined(__s390__)
@@ -13,12 +11,8 @@
#include "../../../arch/mips/include/uapi/asm/bitsperlong.h"
#elif defined(__ia64__)
#include "../../../arch/ia64/include/uapi/asm/bitsperlong.h"
-#elif defined(__riscv)
-#include "../../../arch/riscv/include/uapi/asm/bitsperlong.h"
#elif defined(__alpha__)
#include "../../../arch/alpha/include/uapi/asm/bitsperlong.h"
-#elif defined(__loongarch__)
-#include "../../../arch/loongarch/include/uapi/asm/bitsperlong.h"
#else
#include <asm-generic/bitsperlong.h>
#endif
diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h
index dba7c5a5b25e..7000e5910a1d 100644
--- a/tools/include/uapi/drm/i915_drm.h
+++ b/tools/include/uapi/drm/i915_drm.h
@@ -280,7 +280,16 @@ enum drm_i915_pmu_engine_sample {
#define I915_PMU_ENGINE_SEMA(class, instance) \
__I915_PMU_ENGINE(class, instance, I915_SAMPLE_SEMA)
-#define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x))
+/*
+ * Top 4 bits of every non-engine counter are GT id.
+ */
+#define __I915_PMU_GT_SHIFT (60)
+
+#define ___I915_PMU_OTHER(gt, x) \
+ (((__u64)__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x)) | \
+ ((__u64)(gt) << __I915_PMU_GT_SHIFT))
+
+#define __I915_PMU_OTHER(x) ___I915_PMU_OTHER(0, x)
#define I915_PMU_ACTUAL_FREQUENCY __I915_PMU_OTHER(0)
#define I915_PMU_REQUESTED_FREQUENCY __I915_PMU_OTHER(1)
@@ -290,6 +299,12 @@ enum drm_i915_pmu_engine_sample {
#define I915_PMU_LAST /* Deprecated - do not use */ I915_PMU_RC6_RESIDENCY
+#define __I915_PMU_ACTUAL_FREQUENCY(gt) ___I915_PMU_OTHER(gt, 0)
+#define __I915_PMU_REQUESTED_FREQUENCY(gt) ___I915_PMU_OTHER(gt, 1)
+#define __I915_PMU_INTERRUPTS(gt) ___I915_PMU_OTHER(gt, 2)
+#define __I915_PMU_RC6_RESIDENCY(gt) ___I915_PMU_OTHER(gt, 3)
+#define __I915_PMU_SOFTWARE_GT_AWAKE_TIME(gt) ___I915_PMU_OTHER(gt, 4)
+
/* Each region is a minimum of 16k, and there are at most 255 of them.
*/
#define I915_NR_TEX_REGIONS 255 /* table size 2k - maximum due to use
@@ -659,7 +674,8 @@ typedef struct drm_i915_irq_wait {
* If the IOCTL is successful, the returned parameter will be set to one of the
* following values:
* * 0 if HuC firmware load is not complete,
- * * 1 if HuC firmware is authenticated and running.
+ * * 1 if HuC firmware is loaded and fully authenticated,
+ * * 2 if HuC firmware is loaded and authenticated for clear media only
*/
#define I915_PARAM_HUC_STATUS 42
@@ -771,6 +787,25 @@ typedef struct drm_i915_irq_wait {
*/
#define I915_PARAM_OA_TIMESTAMP_FREQUENCY 57
+/*
+ * Query the status of PXP support in i915.
+ *
+ * The query can fail in the following scenarios with the listed error codes:
+ * -ENODEV = PXP support is not available on the GPU device or in the
+ * kernel due to missing component drivers or kernel configs.
+ *
+ * If the IOCTL is successful, the returned parameter will be set to one of
+ * the following values:
+ * 1 = PXP feature is supported and is ready for use.
+ * 2 = PXP feature is supported but should be ready soon (pending
+ * initialization of non-i915 system dependencies).
+ *
+ * NOTE: When param is supported (positive return values), user space should
+ * still refer to the GEM PXP context-creation UAPI header specs to be
+ * aware of possible failure due to system state machine at the time.
+ */
+#define I915_PARAM_PXP_STATUS 58
+
/* Must be kept compact -- no holes and well documented */
/**
@@ -2096,6 +2131,21 @@ struct drm_i915_gem_context_param {
*
* -ENODEV: feature not available
* -EPERM: trying to mark a recoverable or not bannable context as protected
+ * -ENXIO: A dependency such as a component driver or firmware is not yet
+ * loaded so user space may need to attempt again. Depending on the
+ * device, this error may be reported if protected context creation is
+ * attempted very early after kernel start because the internal timeout
+ * waiting for such dependencies is not guaranteed to be larger than
+ * required (numbers differ depending on system and kernel config):
+ * - ADL/RPL: dependencies may take up to 3 seconds from kernel start
+ * while context creation internal timeout is 250 milisecs
+ * - MTL: dependencies may take up to 8 seconds from kernel start
+ * while context creation internal timeout is 250 milisecs
+ * NOTE: such dependencies happen once, so a subsequent call to create a
+ * protected context after a prior successful call will not experience
+ * such timeouts and will not return -ENXIO (unless the driver is reloaded,
+ * or, depending on the device, resumes from a suspended state).
+ * -EIO: The firmware did not succeed in creating the protected context.
*/
#define I915_CONTEXT_PARAM_PROTECTED_CONTENT 0xd
/* Must be kept compact -- no holes and well documented */
@@ -3630,9 +3680,13 @@ struct drm_i915_gem_create_ext {
*
* For I915_GEM_CREATE_EXT_PROTECTED_CONTENT usage see
* struct drm_i915_gem_create_ext_protected_content.
+ *
+ * For I915_GEM_CREATE_EXT_SET_PAT usage see
+ * struct drm_i915_gem_create_ext_set_pat.
*/
#define I915_GEM_CREATE_EXT_MEMORY_REGIONS 0
#define I915_GEM_CREATE_EXT_PROTECTED_CONTENT 1
+#define I915_GEM_CREATE_EXT_SET_PAT 2
__u64 extensions;
};
@@ -3747,6 +3801,43 @@ struct drm_i915_gem_create_ext_protected_content {
__u32 flags;
};
+/**
+ * struct drm_i915_gem_create_ext_set_pat - The
+ * I915_GEM_CREATE_EXT_SET_PAT extension.
+ *
+ * If this extension is provided, the specified caching policy (PAT index) is
+ * applied to the buffer object.
+ *
+ * Below is an example on how to create an object with specific caching policy:
+ *
+ * .. code-block:: C
+ *
+ * struct drm_i915_gem_create_ext_set_pat set_pat_ext = {
+ * .base = { .name = I915_GEM_CREATE_EXT_SET_PAT },
+ * .pat_index = 0,
+ * };
+ * struct drm_i915_gem_create_ext create_ext = {
+ * .size = PAGE_SIZE,
+ * .extensions = (uintptr_t)&set_pat_ext,
+ * };
+ *
+ * int err = ioctl(fd, DRM_IOCTL_I915_GEM_CREATE_EXT, &create_ext);
+ * if (err) ...
+ */
+struct drm_i915_gem_create_ext_set_pat {
+ /** @base: Extension link. See struct i915_user_extension. */
+ struct i915_user_extension base;
+ /**
+ * @pat_index: PAT index to be set
+ * PAT index is a bit field in Page Table Entry to control caching
+ * behaviors for GPU accesses. The definition of PAT index is
+ * platform dependent and can be found in hardware specifications,
+ */
+ __u32 pat_index;
+ /** @rsvd: reserved for future use */
+ __u32 rsvd;
+};
+
/* ID of the protected content session managed by i915 when PXP is active */
#define I915_PROTECTED_CONTENT_DEFAULT_SESSION 0xf
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index c994ff5b157c..60a9d59beeab 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1273,6 +1273,9 @@ enum {
/* Create a map that will be registered/unregesitered by the backed bpf_link */
BPF_F_LINK = (1U << 13),
+
+/* Get path from provided FD in BPF_OBJ_PIN/BPF_OBJ_GET commands */
+ BPF_F_PATH_FD = (1U << 14),
};
/* Flags for BPF_PROG_QUERY. */
@@ -1421,6 +1424,13 @@ union bpf_attr {
__aligned_u64 pathname;
__u32 bpf_fd;
__u32 file_flags;
+ /* Same as dirfd in openat() syscall; see openat(2)
+ * manpage for details of path FD and pathname semantics;
+ * path_fd should accompanied by BPF_F_PATH_FD flag set in
+ * file_flags field, otherwise it should be set to zero;
+ * if BPF_F_PATH_FD flag is not set, AT_FDCWD is assumed.
+ */
+ __s32 path_fd;
};
struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */
@@ -3168,6 +3178,10 @@ union bpf_attr {
* **BPF_FIB_LOOKUP_DIRECT**
* Do a direct table lookup vs full lookup using FIB
* rules.
+ * **BPF_FIB_LOOKUP_TBID**
+ * Used with BPF_FIB_LOOKUP_DIRECT.
+ * Use the routing table ID present in *params*->tbid
+ * for the fib lookup.
* **BPF_FIB_LOOKUP_OUTPUT**
* Perform lookup from an egress perspective (default is
* ingress).
@@ -6822,6 +6836,7 @@ enum {
BPF_FIB_LOOKUP_DIRECT = (1U << 0),
BPF_FIB_LOOKUP_OUTPUT = (1U << 1),
BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
+ BPF_FIB_LOOKUP_TBID = (1U << 3),
};
enum {
@@ -6882,9 +6897,19 @@ struct bpf_fib_lookup {
__u32 ipv6_dst[4]; /* in6_addr; network order */
};
- /* output */
- __be16 h_vlan_proto;
- __be16 h_vlan_TCI;
+ union {
+ struct {
+ /* output */
+ __be16 h_vlan_proto;
+ __be16 h_vlan_TCI;
+ };
+ /* input: when accompanied with the
+ * 'BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID` flags, a
+ * specific routing table to use for the fib lookup.
+ */
+ __u32 tbid;
+ };
+
__u8 smac[6]; /* ETH_ALEN */
__u8 dmac[6]; /* ETH_ALEN */
};
diff --git a/tools/include/uapi/linux/fcntl.h b/tools/include/uapi/linux/fcntl.h
index e8c07da58c9f..6c80f96049bd 100644
--- a/tools/include/uapi/linux/fcntl.h
+++ b/tools/include/uapi/linux/fcntl.h
@@ -112,4 +112,9 @@
#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */
+/* Flags for name_to_handle_at(2). We reuse AT_ flag space to save bits... */
+#define AT_HANDLE_FID AT_REMOVEDIR /* file handle is needed to
+ compare object identity and may not
+ be usable to open_by_handle_at(2) */
+
#endif /* _UAPI_LINUX_FCNTL_H */
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 737318b1c1d9..f089ab290978 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -1190,6 +1190,8 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP 225
#define KVM_CAP_PMU_EVENT_MASKED_EVENTS 226
#define KVM_CAP_COUNTER_OFFSET 227
+#define KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE 228
+#define KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES 229
#ifdef KVM_CAP_IRQ_ROUTING
@@ -1442,6 +1444,8 @@ enum kvm_device_type {
#define KVM_DEV_TYPE_XIVE KVM_DEV_TYPE_XIVE
KVM_DEV_TYPE_ARM_PV_TIME,
#define KVM_DEV_TYPE_ARM_PV_TIME KVM_DEV_TYPE_ARM_PV_TIME
+ KVM_DEV_TYPE_RISCV_AIA,
+#define KVM_DEV_TYPE_RISCV_AIA KVM_DEV_TYPE_RISCV_AIA
KVM_DEV_TYPE_MAX,
};
@@ -1613,7 +1617,7 @@ struct kvm_s390_ucas_mapping {
#define KVM_GET_DEBUGREGS _IOR(KVMIO, 0xa1, struct kvm_debugregs)
#define KVM_SET_DEBUGREGS _IOW(KVMIO, 0xa2, struct kvm_debugregs)
/*
- * vcpu version available with KVM_ENABLE_CAP
+ * vcpu version available with KVM_CAP_ENABLE_CAP
* vm version available with KVM_CAP_ENABLE_CAP_VM
*/
#define KVM_ENABLE_CAP _IOW(KVMIO, 0xa3, struct kvm_enable_cap)
diff --git a/tools/include/uapi/linux/mman.h b/tools/include/uapi/linux/mman.h
index f55bc680b5b0..a246e11988d5 100644
--- a/tools/include/uapi/linux/mman.h
+++ b/tools/include/uapi/linux/mman.h
@@ -4,6 +4,7 @@
#include <asm/mman.h>
#include <asm-generic/hugetlb_encode.h>
+#include <linux/types.h>
#define MREMAP_MAYMOVE 1
#define MREMAP_FIXED 2
@@ -41,4 +42,17 @@
#define MAP_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB
#define MAP_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB
+struct cachestat_range {
+ __u64 off;
+ __u64 len;
+};
+
+struct cachestat {
+ __u64 nr_cache;
+ __u64 nr_dirty;
+ __u64 nr_writeback;
+ __u64 nr_evicted;
+ __u64 nr_recently_evicted;
+};
+
#endif /* _UAPI_LINUX_MMAN_H */
diff --git a/tools/include/uapi/linux/mount.h b/tools/include/uapi/linux/mount.h
index 4d93967f8aea..8eb0d7b758d2 100644
--- a/tools/include/uapi/linux/mount.h
+++ b/tools/include/uapi/linux/mount.h
@@ -74,7 +74,8 @@
#define MOVE_MOUNT_T_AUTOMOUNTS 0x00000020 /* Follow automounts on to path */
#define MOVE_MOUNT_T_EMPTY_PATH 0x00000040 /* Empty to path permitted */
#define MOVE_MOUNT_SET_GROUP 0x00000100 /* Set sharing group instead */
-#define MOVE_MOUNT__MASK 0x00000177
+#define MOVE_MOUNT_BENEATH 0x00000200 /* Mount beneath top mount */
+#define MOVE_MOUNT__MASK 0x00000377
/*
* fsopen() flags.
diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h
index f23d9a16507f..3c36aeade991 100644
--- a/tools/include/uapi/linux/prctl.h
+++ b/tools/include/uapi/linux/prctl.h
@@ -294,4 +294,15 @@ struct prctl_mm_map {
#define PR_SET_MEMORY_MERGE 67
#define PR_GET_MEMORY_MERGE 68
+
+#define PR_RISCV_V_SET_CONTROL 69
+#define PR_RISCV_V_GET_CONTROL 70
+# define PR_RISCV_V_VSTATE_CTRL_DEFAULT 0
+# define PR_RISCV_V_VSTATE_CTRL_OFF 1
+# define PR_RISCV_V_VSTATE_CTRL_ON 2
+# define PR_RISCV_V_VSTATE_CTRL_INHERIT (1 << 4)
+# define PR_RISCV_V_VSTATE_CTRL_CUR_MASK 0x3
+# define PR_RISCV_V_VSTATE_CTRL_NEXT_MASK 0xc
+# define PR_RISCV_V_VSTATE_CTRL_MASK 0x1f
+
#endif /* _LINUX_PRCTL_H */
diff --git a/tools/include/uapi/linux/vhost.h b/tools/include/uapi/linux/vhost.h
index 92e1b700b51c..f5c48b61ab62 100644
--- a/tools/include/uapi/linux/vhost.h
+++ b/tools/include/uapi/linux/vhost.h
@@ -45,6 +45,25 @@
#define VHOST_SET_LOG_BASE _IOW(VHOST_VIRTIO, 0x04, __u64)
/* Specify an eventfd file descriptor to signal on log write. */
#define VHOST_SET_LOG_FD _IOW(VHOST_VIRTIO, 0x07, int)
+/* By default, a device gets one vhost_worker that its virtqueues share. This
+ * command allows the owner of the device to create an additional vhost_worker
+ * for the device. It can later be bound to 1 or more of its virtqueues using
+ * the VHOST_ATTACH_VRING_WORKER command.
+ *
+ * This must be called after VHOST_SET_OWNER and the caller must be the owner
+ * of the device. The new thread will inherit caller's cgroups and namespaces,
+ * and will share the caller's memory space. The new thread will also be
+ * counted against the caller's RLIMIT_NPROC value.
+ *
+ * The worker's ID used in other commands will be returned in
+ * vhost_worker_state.
+ */
+#define VHOST_NEW_WORKER _IOR(VHOST_VIRTIO, 0x8, struct vhost_worker_state)
+/* Free a worker created with VHOST_NEW_WORKER if it's not attached to any
+ * virtqueue. If userspace is not able to call this for workers its created,
+ * the kernel will free all the device's workers when the device is closed.
+ */
+#define VHOST_FREE_WORKER _IOW(VHOST_VIRTIO, 0x9, struct vhost_worker_state)
/* Ring setup. */
/* Set number of descriptors in ring. This parameter can not
@@ -70,6 +89,18 @@
#define VHOST_VRING_BIG_ENDIAN 1
#define VHOST_SET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x13, struct vhost_vring_state)
#define VHOST_GET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x14, struct vhost_vring_state)
+/* Attach a vhost_worker created with VHOST_NEW_WORKER to one of the device's
+ * virtqueues.
+ *
+ * This will replace the virtqueue's existing worker. If the replaced worker
+ * is no longer attached to any virtqueues, it can be freed with
+ * VHOST_FREE_WORKER.
+ */
+#define VHOST_ATTACH_VRING_WORKER _IOW(VHOST_VIRTIO, 0x15, \
+ struct vhost_vring_worker)
+/* Return the vring worker's ID */
+#define VHOST_GET_VRING_WORKER _IOWR(VHOST_VIRTIO, 0x16, \
+ struct vhost_vring_worker)
/* The following ioctls use eventfd file descriptors to signal and poll
* for events. */
diff --git a/tools/include/uapi/sound/asound.h b/tools/include/uapi/sound/asound.h
index 0aa955aa8246..f9939da41122 100644
--- a/tools/include/uapi/sound/asound.h
+++ b/tools/include/uapi/sound/asound.h
@@ -274,6 +274,7 @@ typedef int __bitwise snd_pcm_subformat_t;
#define SNDRV_PCM_INFO_DOUBLE 0x00000004 /* Double buffering needed for PCM start/stop */
#define SNDRV_PCM_INFO_BATCH 0x00000010 /* double buffering */
#define SNDRV_PCM_INFO_SYNC_APPLPTR 0x00000020 /* need the explicit sync of appl_ptr update */
+#define SNDRV_PCM_INFO_PERFECT_DRAIN 0x00000040 /* silencing at the end of stream is not required */
#define SNDRV_PCM_INFO_INTERLEAVED 0x00000100 /* channels are interleaved */
#define SNDRV_PCM_INFO_NONINTERLEAVED 0x00000200 /* channels are not interleaved */
#define SNDRV_PCM_INFO_COMPLEX 0x00000400 /* complex frame organization (mmap only) */
@@ -383,6 +384,9 @@ typedef int snd_pcm_hw_param_t;
#define SNDRV_PCM_HW_PARAMS_NORESAMPLE (1<<0) /* avoid rate resampling */
#define SNDRV_PCM_HW_PARAMS_EXPORT_BUFFER (1<<1) /* export buffer */
#define SNDRV_PCM_HW_PARAMS_NO_PERIOD_WAKEUP (1<<2) /* disable period wakeups */
+#define SNDRV_PCM_HW_PARAMS_NO_DRAIN_SILENCE (1<<3) /* suppress drain with the filling
+ * of the silence samples
+ */
struct snd_interval {
unsigned int min, max;
@@ -708,7 +712,7 @@ enum {
* Raw MIDI section - /dev/snd/midi??
*/
-#define SNDRV_RAWMIDI_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 2)
+#define SNDRV_RAWMIDI_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 4)
enum {
SNDRV_RAWMIDI_STREAM_OUTPUT = 0,
@@ -719,6 +723,7 @@ enum {
#define SNDRV_RAWMIDI_INFO_OUTPUT 0x00000001
#define SNDRV_RAWMIDI_INFO_INPUT 0x00000002
#define SNDRV_RAWMIDI_INFO_DUPLEX 0x00000004
+#define SNDRV_RAWMIDI_INFO_UMP 0x00000008
struct snd_rawmidi_info {
unsigned int device; /* RO/WR (control): device number */
@@ -779,6 +784,72 @@ struct snd_rawmidi_status {
};
#endif
+/* UMP EP info flags */
+#define SNDRV_UMP_EP_INFO_STATIC_BLOCKS 0x01
+
+/* UMP EP Protocol / JRTS capability bits */
+#define SNDRV_UMP_EP_INFO_PROTO_MIDI_MASK 0x0300
+#define SNDRV_UMP_EP_INFO_PROTO_MIDI1 0x0100 /* MIDI 1.0 */
+#define SNDRV_UMP_EP_INFO_PROTO_MIDI2 0x0200 /* MIDI 2.0 */
+#define SNDRV_UMP_EP_INFO_PROTO_JRTS_MASK 0x0003
+#define SNDRV_UMP_EP_INFO_PROTO_JRTS_TX 0x0001 /* JRTS Transmit */
+#define SNDRV_UMP_EP_INFO_PROTO_JRTS_RX 0x0002 /* JRTS Receive */
+
+/* UMP Endpoint information */
+struct snd_ump_endpoint_info {
+ int card; /* card number */
+ int device; /* device number */
+ unsigned int flags; /* additional info */
+ unsigned int protocol_caps; /* protocol capabilities */
+ unsigned int protocol; /* current protocol */
+ unsigned int num_blocks; /* # of function blocks */
+ unsigned short version; /* UMP major/minor version */
+ unsigned short family_id; /* MIDI device family ID */
+ unsigned short model_id; /* MIDI family model ID */
+ unsigned int manufacturer_id; /* MIDI manufacturer ID */
+ unsigned char sw_revision[4]; /* software revision */
+ unsigned short padding;
+ unsigned char name[128]; /* endpoint name string */
+ unsigned char product_id[128]; /* unique product id string */
+ unsigned char reserved[32];
+} __packed;
+
+/* UMP direction */
+#define SNDRV_UMP_DIR_INPUT 0x01
+#define SNDRV_UMP_DIR_OUTPUT 0x02
+#define SNDRV_UMP_DIR_BIDIRECTION 0x03
+
+/* UMP block info flags */
+#define SNDRV_UMP_BLOCK_IS_MIDI1 (1U << 0) /* MIDI 1.0 port w/o restrict */
+#define SNDRV_UMP_BLOCK_IS_LOWSPEED (1U << 1) /* 31.25Kbps B/W MIDI1 port */
+
+/* UMP block user-interface hint */
+#define SNDRV_UMP_BLOCK_UI_HINT_UNKNOWN 0x00
+#define SNDRV_UMP_BLOCK_UI_HINT_RECEIVER 0x01
+#define SNDRV_UMP_BLOCK_UI_HINT_SENDER 0x02
+#define SNDRV_UMP_BLOCK_UI_HINT_BOTH 0x03
+
+/* UMP groups and blocks */
+#define SNDRV_UMP_MAX_GROUPS 16
+#define SNDRV_UMP_MAX_BLOCKS 32
+
+/* UMP Block information */
+struct snd_ump_block_info {
+ int card; /* card number */
+ int device; /* device number */
+ unsigned char block_id; /* block ID (R/W) */
+ unsigned char direction; /* UMP direction */
+ unsigned char active; /* Activeness */
+ unsigned char first_group; /* first group ID */
+ unsigned char num_groups; /* number of groups */
+ unsigned char midi_ci_version; /* MIDI-CI support version */
+ unsigned char sysex8_streams; /* max number of sysex8 streams */
+ unsigned char ui_hint; /* user interface hint */
+ unsigned int flags; /* various info flags */
+ unsigned char name[128]; /* block name string */
+ unsigned char reserved[32];
+} __packed;
+
#define SNDRV_RAWMIDI_IOCTL_PVERSION _IOR('W', 0x00, int)
#define SNDRV_RAWMIDI_IOCTL_INFO _IOR('W', 0x01, struct snd_rawmidi_info)
#define SNDRV_RAWMIDI_IOCTL_USER_PVERSION _IOW('W', 0x02, int)
@@ -786,6 +857,9 @@ struct snd_rawmidi_status {
#define SNDRV_RAWMIDI_IOCTL_STATUS _IOWR('W', 0x20, struct snd_rawmidi_status)
#define SNDRV_RAWMIDI_IOCTL_DROP _IOW('W', 0x30, int)
#define SNDRV_RAWMIDI_IOCTL_DRAIN _IOW('W', 0x31, int)
+/* Additional ioctls for UMP rawmidi devices */
+#define SNDRV_UMP_IOCTL_ENDPOINT_INFO _IOR('W', 0x40, struct snd_ump_endpoint_info)
+#define SNDRV_UMP_IOCTL_BLOCK_INFO _IOR('W', 0x41, struct snd_ump_block_info)
/*
* Timer section - /dev/snd/timer
@@ -961,7 +1035,7 @@ struct snd_timer_tread {
* *
****************************************************************************/
-#define SNDRV_CTL_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 8)
+#define SNDRV_CTL_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 9)
struct snd_ctl_card_info {
int card; /* card number */
@@ -1122,6 +1196,9 @@ struct snd_ctl_tlv {
#define SNDRV_CTL_IOCTL_RAWMIDI_NEXT_DEVICE _IOWR('U', 0x40, int)
#define SNDRV_CTL_IOCTL_RAWMIDI_INFO _IOWR('U', 0x41, struct snd_rawmidi_info)
#define SNDRV_CTL_IOCTL_RAWMIDI_PREFER_SUBDEVICE _IOW('U', 0x42, int)
+#define SNDRV_CTL_IOCTL_UMP_NEXT_DEVICE _IOWR('U', 0x43, int)
+#define SNDRV_CTL_IOCTL_UMP_ENDPOINT_INFO _IOWR('U', 0x44, struct snd_ump_endpoint_info)
+#define SNDRV_CTL_IOCTL_UMP_BLOCK_INFO _IOWR('U', 0x45, struct snd_ump_block_info)
#define SNDRV_CTL_IOCTL_POWER _IOWR('U', 0xd0, int)
#define SNDRV_CTL_IOCTL_POWER_STATE _IOR('U', 0xd1, int)
diff --git a/tools/lib/api/fs/cgroup.c b/tools/lib/api/fs/cgroup.c
index 1573dae4259d..250629a09423 100644
--- a/tools/lib/api/fs/cgroup.c
+++ b/tools/lib/api/fs/cgroup.c
@@ -14,7 +14,7 @@ struct cgroupfs_cache_entry {
};
/* just cache last used one */
-static struct cgroupfs_cache_entry cached;
+static struct cgroupfs_cache_entry *cached;
int cgroupfs_find_mountpoint(char *buf, size_t maxlen, const char *subsys)
{
@@ -24,9 +24,9 @@ int cgroupfs_find_mountpoint(char *buf, size_t maxlen, const char *subsys)
char *p, *path;
char mountpoint[PATH_MAX];
- if (!strcmp(cached.subsys, subsys)) {
- if (strlen(cached.mountpoint) < maxlen) {
- strcpy(buf, cached.mountpoint);
+ if (cached && !strcmp(cached->subsys, subsys)) {
+ if (strlen(cached->mountpoint) < maxlen) {
+ strcpy(buf, cached->mountpoint);
return 0;
}
return -1;
@@ -91,8 +91,13 @@ int cgroupfs_find_mountpoint(char *buf, size_t maxlen, const char *subsys)
free(line);
fclose(fp);
- strncpy(cached.subsys, subsys, sizeof(cached.subsys) - 1);
- strcpy(cached.mountpoint, mountpoint);
+ if (!cached)
+ cached = calloc(1, sizeof(*cached));
+
+ if (cached) {
+ strncpy(cached->subsys, subsys, sizeof(cached->subsys) - 1);
+ strcpy(cached->mountpoint, mountpoint);
+ }
if (mountpoint[0] && strlen(mountpoint) < maxlen) {
strcpy(buf, mountpoint);
diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c
index 82f53d81a7a7..5cb0eeec2c8a 100644
--- a/tools/lib/api/fs/fs.c
+++ b/tools/lib/api/fs/fs.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
+#include <assert.h>
#include <ctype.h>
#include <errno.h>
#include <limits.h>
@@ -10,6 +11,7 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
+#include <pthread.h>
#include <unistd.h>
#include <sys/mount.h>
@@ -43,7 +45,7 @@
#define BPF_FS_MAGIC 0xcafe4a11
#endif
-static const char * const sysfs__fs_known_mountpoints[] = {
+static const char * const sysfs__known_mountpoints[] = {
"/sys",
0,
};
@@ -86,87 +88,89 @@ static const char * const bpf_fs__known_mountpoints[] = {
};
struct fs {
- const char *name;
- const char * const *mounts;
- char path[PATH_MAX];
- bool found;
- bool checked;
- long magic;
-};
-
-enum {
- FS__SYSFS = 0,
- FS__PROCFS = 1,
- FS__DEBUGFS = 2,
- FS__TRACEFS = 3,
- FS__HUGETLBFS = 4,
- FS__BPF_FS = 5,
+ const char * const name;
+ const char * const * const mounts;
+ char *path;
+ pthread_mutex_t mount_mutex;
+ const long magic;
};
#ifndef TRACEFS_MAGIC
#define TRACEFS_MAGIC 0x74726163
#endif
-static struct fs fs__entries[] = {
- [FS__SYSFS] = {
- .name = "sysfs",
- .mounts = sysfs__fs_known_mountpoints,
- .magic = SYSFS_MAGIC,
- .checked = false,
- },
- [FS__PROCFS] = {
- .name = "proc",
- .mounts = procfs__known_mountpoints,
- .magic = PROC_SUPER_MAGIC,
- .checked = false,
- },
- [FS__DEBUGFS] = {
- .name = "debugfs",
- .mounts = debugfs__known_mountpoints,
- .magic = DEBUGFS_MAGIC,
- .checked = false,
- },
- [FS__TRACEFS] = {
- .name = "tracefs",
- .mounts = tracefs__known_mountpoints,
- .magic = TRACEFS_MAGIC,
- .checked = false,
- },
- [FS__HUGETLBFS] = {
- .name = "hugetlbfs",
- .mounts = hugetlbfs__known_mountpoints,
- .magic = HUGETLBFS_MAGIC,
- .checked = false,
- },
- [FS__BPF_FS] = {
- .name = "bpf",
- .mounts = bpf_fs__known_mountpoints,
- .magic = BPF_FS_MAGIC,
- .checked = false,
- },
-};
+static void fs__init_once(struct fs *fs);
+static const char *fs__mountpoint(const struct fs *fs);
+static const char *fs__mount(struct fs *fs);
+
+#define FS(lower_name, fs_name, upper_name) \
+static struct fs fs__##lower_name = { \
+ .name = #fs_name, \
+ .mounts = lower_name##__known_mountpoints, \
+ .magic = upper_name##_MAGIC, \
+ .mount_mutex = PTHREAD_MUTEX_INITIALIZER, \
+}; \
+ \
+static void lower_name##_init_once(void) \
+{ \
+ struct fs *fs = &fs__##lower_name; \
+ \
+ fs__init_once(fs); \
+} \
+ \
+const char *lower_name##__mountpoint(void) \
+{ \
+ static pthread_once_t init_once = PTHREAD_ONCE_INIT; \
+ struct fs *fs = &fs__##lower_name; \
+ \
+ pthread_once(&init_once, lower_name##_init_once); \
+ return fs__mountpoint(fs); \
+} \
+ \
+const char *lower_name##__mount(void) \
+{ \
+ const char *mountpoint = lower_name##__mountpoint(); \
+ struct fs *fs = &fs__##lower_name; \
+ \
+ if (mountpoint) \
+ return mountpoint; \
+ \
+ return fs__mount(fs); \
+} \
+ \
+bool lower_name##__configured(void) \
+{ \
+ return lower_name##__mountpoint() != NULL; \
+}
+
+FS(sysfs, sysfs, SYSFS);
+FS(procfs, procfs, PROC_SUPER);
+FS(debugfs, debugfs, DEBUGFS);
+FS(tracefs, tracefs, TRACEFS);
+FS(hugetlbfs, hugetlbfs, HUGETLBFS);
+FS(bpf_fs, bpf, BPF_FS);
static bool fs__read_mounts(struct fs *fs)
{
- bool found = false;
char type[100];
FILE *fp;
+ char path[PATH_MAX + 1];
fp = fopen("/proc/mounts", "r");
if (fp == NULL)
- return NULL;
+ return false;
- while (!found &&
- fscanf(fp, "%*s %" STR(PATH_MAX) "s %99s %*s %*d %*d\n",
- fs->path, type) == 2) {
+ while (fscanf(fp, "%*s %" STR(PATH_MAX) "s %99s %*s %*d %*d\n",
+ path, type) == 2) {
- if (strcmp(type, fs->name) == 0)
- found = true;
+ if (strcmp(type, fs->name) == 0) {
+ fs->path = strdup(path);
+ fclose(fp);
+ return fs->path != NULL;
+ }
}
-
fclose(fp);
- fs->checked = true;
- return fs->found = found;
+ return false;
}
static int fs__valid_mount(const char *fs, long magic)
@@ -188,8 +192,9 @@ static bool fs__check_mounts(struct fs *fs)
ptr = fs->mounts;
while (*ptr) {
if (fs__valid_mount(*ptr, fs->magic) == 0) {
- fs->found = true;
- strcpy(fs->path, *ptr);
+ fs->path = strdup(*ptr);
+ if (!fs->path)
+ return false;
return true;
}
ptr++;
@@ -227,43 +232,26 @@ static bool fs__env_override(struct fs *fs)
if (!override_path)
return false;
- fs->found = true;
- fs->checked = true;
- strncpy(fs->path, override_path, sizeof(fs->path) - 1);
- fs->path[sizeof(fs->path) - 1] = '\0';
+ fs->path = strdup(override_path);
+ if (!fs->path)
+ return false;
return true;
}
-static const char *fs__get_mountpoint(struct fs *fs)
+static void fs__init_once(struct fs *fs)
{
- if (fs__env_override(fs))
- return fs->path;
-
- if (fs__check_mounts(fs))
- return fs->path;
-
- if (fs__read_mounts(fs))
- return fs->path;
-
- return NULL;
+ if (!fs__env_override(fs) &&
+ !fs__check_mounts(fs) &&
+ !fs__read_mounts(fs)) {
+ assert(!fs->path);
+ } else {
+ assert(fs->path);
+ }
}
-static const char *fs__mountpoint(int idx)
+static const char *fs__mountpoint(const struct fs *fs)
{
- struct fs *fs = &fs__entries[idx];
-
- if (fs->found)
- return (const char *)fs->path;
-
- /* the mount point was already checked for the mount point
- * but and did not exist, so return NULL to avoid scanning again.
- * This makes the found and not found paths cost equivalent
- * in case of multiple calls.
- */
- if (fs->checked)
- return NULL;
-
- return fs__get_mountpoint(fs);
+ return fs->path;
}
static const char *mount_overload(struct fs *fs)
@@ -278,45 +266,29 @@ static const char *mount_overload(struct fs *fs)
return getenv(upper_name) ?: *fs->mounts;
}
-static const char *fs__mount(int idx)
+static const char *fs__mount(struct fs *fs)
{
- struct fs *fs = &fs__entries[idx];
const char *mountpoint;
- if (fs__mountpoint(idx))
- return (const char *)fs->path;
+ pthread_mutex_lock(&fs->mount_mutex);
- mountpoint = mount_overload(fs);
+ /* Check if path found inside the mutex to avoid races with other callers of mount. */
+ mountpoint = fs__mountpoint(fs);
+ if (mountpoint)
+ goto out;
- if (mount(NULL, mountpoint, fs->name, 0, NULL) < 0)
- return NULL;
-
- return fs__check_mounts(fs) ? fs->path : NULL;
-}
+ mountpoint = mount_overload(fs);
-#define FS(name, idx) \
-const char *name##__mountpoint(void) \
-{ \
- return fs__mountpoint(idx); \
-} \
- \
-const char *name##__mount(void) \
-{ \
- return fs__mount(idx); \
-} \
- \
-bool name##__configured(void) \
-{ \
- return name##__mountpoint() != NULL; \
+ if (mount(NULL, mountpoint, fs->name, 0, NULL) == 0 &&
+ fs__valid_mount(mountpoint, fs->magic) == 0) {
+ fs->path = strdup(mountpoint);
+ mountpoint = fs->path;
+ }
+out:
+ pthread_mutex_unlock(&fs->mount_mutex);
+ return mountpoint;
}
-FS(sysfs, FS__SYSFS);
-FS(procfs, FS__PROCFS);
-FS(debugfs, FS__DEBUGFS);
-FS(tracefs, FS__TRACEFS);
-FS(hugetlbfs, FS__HUGETLBFS);
-FS(bpf_fs, FS__BPF_FS);
-
int filename__read_int(const char *filename, int *value)
{
char line[64];
diff --git a/tools/lib/api/fs/tracing_path.c b/tools/lib/api/fs/tracing_path.c
index 7ba3e81274e8..30745f35d0d2 100644
--- a/tools/lib/api/fs/tracing_path.c
+++ b/tools/lib/api/fs/tracing_path.c
@@ -13,17 +13,12 @@
#include "tracing_path.h"
-static char tracing_mnt[PATH_MAX] = "/sys/kernel/debug";
static char tracing_path[PATH_MAX] = "/sys/kernel/tracing";
-static char tracing_events_path[PATH_MAX] = "/sys/kernel/tracing/events";
static void __tracing_path_set(const char *tracing, const char *mountpoint)
{
- snprintf(tracing_mnt, sizeof(tracing_mnt), "%s", mountpoint);
snprintf(tracing_path, sizeof(tracing_path), "%s/%s",
mountpoint, tracing);
- snprintf(tracing_events_path, sizeof(tracing_events_path), "%s/%s%s",
- mountpoint, tracing, "events");
}
static const char *tracing_path_tracefs_mount(void)
@@ -149,15 +144,15 @@ int tracing_path__strerror_open_tp(int err, char *buf, size_t size,
/* sdt markers */
if (!strncmp(filename, "sdt_", 4)) {
snprintf(buf, size,
- "Error:\tFile %s/%s not found.\n"
+ "Error:\tFile %s/events/%s not found.\n"
"Hint:\tSDT event cannot be directly recorded on.\n"
"\tPlease first use 'perf probe %s:%s' before recording it.\n",
- tracing_events_path, filename, sys, name);
+ tracing_path, filename, sys, name);
} else {
snprintf(buf, size,
- "Error:\tFile %s/%s not found.\n"
+ "Error:\tFile %s/events/%s not found.\n"
"Hint:\tPerhaps this kernel misses some CONFIG_ setting to enable this feature?.\n",
- tracing_events_path, filename);
+ tracing_path, filename);
}
break;
}
@@ -169,9 +164,9 @@ int tracing_path__strerror_open_tp(int err, char *buf, size_t size,
break;
case EACCES: {
snprintf(buf, size,
- "Error:\tNo permissions to read %s/%s\n"
+ "Error:\tNo permissions to read %s/events/%s\n"
"Hint:\tTry 'sudo mount -o remount,mode=755 %s'\n",
- tracing_events_path, filename, tracing_path_mount());
+ tracing_path, filename, tracing_path_mount());
}
break;
default:
diff --git a/tools/lib/api/io.h b/tools/lib/api/io.h
index d5e8cf0dada0..9fc429d2852d 100644
--- a/tools/lib/api/io.h
+++ b/tools/lib/api/io.h
@@ -8,6 +8,7 @@
#define __API_IO__
#include <errno.h>
+#include <poll.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
@@ -23,6 +24,8 @@ struct io {
char *end;
/* Currently accessed data pointer. */
char *data;
+ /* Read timeout, 0 implies no timeout. */
+ int timeout_ms;
/* Set true on when the end of file on read error. */
bool eof;
};
@@ -35,6 +38,7 @@ static inline void io__init(struct io *io, int fd,
io->buf = buf;
io->end = buf;
io->data = buf;
+ io->timeout_ms = 0;
io->eof = false;
}
@@ -47,7 +51,29 @@ static inline int io__get_char(struct io *io)
return -1;
if (ptr == io->end) {
- ssize_t n = read(io->fd, io->buf, io->buf_len);
+ ssize_t n;
+
+ if (io->timeout_ms != 0) {
+ struct pollfd pfds[] = {
+ {
+ .fd = io->fd,
+ .events = POLLIN,
+ },
+ };
+
+ n = poll(pfds, 1, io->timeout_ms);
+ if (n == 0)
+ errno = ETIMEDOUT;
+ if (n > 0 && !(pfds[0].revents & POLLIN)) {
+ errno = EIO;
+ n = -1;
+ }
+ if (n <= 0) {
+ io->eof = true;
+ return -1;
+ }
+ }
+ n = read(io->fd, io->buf, io->buf_len);
if (n <= 0) {
io->eof = true;
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 128ac723c4ea..ed86b37d8024 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -572,20 +572,30 @@ int bpf_map_update_batch(int fd, const void *keys, const void *values, __u32 *co
(void *)keys, (void *)values, count, opts);
}
-int bpf_obj_pin(int fd, const char *pathname)
+int bpf_obj_pin_opts(int fd, const char *pathname, const struct bpf_obj_pin_opts *opts)
{
- const size_t attr_sz = offsetofend(union bpf_attr, file_flags);
+ const size_t attr_sz = offsetofend(union bpf_attr, path_fd);
union bpf_attr attr;
int ret;
+ if (!OPTS_VALID(opts, bpf_obj_pin_opts))
+ return libbpf_err(-EINVAL);
+
memset(&attr, 0, attr_sz);
+ attr.path_fd = OPTS_GET(opts, path_fd, 0);
attr.pathname = ptr_to_u64((void *)pathname);
+ attr.file_flags = OPTS_GET(opts, file_flags, 0);
attr.bpf_fd = fd;
ret = sys_bpf(BPF_OBJ_PIN, &attr, attr_sz);
return libbpf_err_errno(ret);
}
+int bpf_obj_pin(int fd, const char *pathname)
+{
+ return bpf_obj_pin_opts(fd, pathname, NULL);
+}
+
int bpf_obj_get(const char *pathname)
{
return bpf_obj_get_opts(pathname, NULL);
@@ -593,7 +603,7 @@ int bpf_obj_get(const char *pathname)
int bpf_obj_get_opts(const char *pathname, const struct bpf_obj_get_opts *opts)
{
- const size_t attr_sz = offsetofend(union bpf_attr, file_flags);
+ const size_t attr_sz = offsetofend(union bpf_attr, path_fd);
union bpf_attr attr;
int fd;
@@ -601,6 +611,7 @@ int bpf_obj_get_opts(const char *pathname, const struct bpf_obj_get_opts *opts)
return libbpf_err(-EINVAL);
memset(&attr, 0, attr_sz);
+ attr.path_fd = OPTS_GET(opts, path_fd, 0);
attr.pathname = ptr_to_u64((void *)pathname);
attr.file_flags = OPTS_GET(opts, file_flags, 0);
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index a2c091389b18..9aa0ee473754 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -284,16 +284,30 @@ LIBBPF_API int bpf_map_update_batch(int fd, const void *keys, const void *values
__u32 *count,
const struct bpf_map_batch_opts *opts);
-struct bpf_obj_get_opts {
+struct bpf_obj_pin_opts {
size_t sz; /* size of this struct for forward/backward compatibility */
__u32 file_flags;
+ int path_fd;
size_t :0;
};
-#define bpf_obj_get_opts__last_field file_flags
+#define bpf_obj_pin_opts__last_field path_fd
LIBBPF_API int bpf_obj_pin(int fd, const char *pathname);
+LIBBPF_API int bpf_obj_pin_opts(int fd, const char *pathname,
+ const struct bpf_obj_pin_opts *opts);
+
+struct bpf_obj_get_opts {
+ size_t sz; /* size of this struct for forward/backward compatibility */
+
+ __u32 file_flags;
+ int path_fd;
+
+ size_t :0;
+};
+#define bpf_obj_get_opts__last_field path_fd
+
LIBBPF_API int bpf_obj_get(const char *pathname);
LIBBPF_API int bpf_obj_get_opts(const char *pathname,
const struct bpf_obj_get_opts *opts);
diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index 929a3baca8ef..bbab9ad9dc5a 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -77,16 +77,21 @@
/*
* Helper macros to manipulate data structures
*/
-#ifndef offsetof
-#define offsetof(TYPE, MEMBER) ((unsigned long)&((TYPE *)0)->MEMBER)
-#endif
-#ifndef container_of
+
+/* offsetof() definition that uses __builtin_offset() might not preserve field
+ * offset CO-RE relocation properly, so force-redefine offsetof() using
+ * old-school approach which works with CO-RE correctly
+ */
+#undef offsetof
+#define offsetof(type, member) ((unsigned long)&((type *)0)->member)
+
+/* redefined container_of() to ensure we use the above offsetof() macro */
+#undef container_of
#define container_of(ptr, type, member) \
({ \
void *__mptr = (void *)(ptr); \
((type *)(__mptr - offsetof(type, member))); \
})
-#endif
/*
* Compiler (optimization) barrier.
diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h
index 6fb3d0f9af17..be076a4041ab 100644
--- a/tools/lib/bpf/bpf_tracing.h
+++ b/tools/lib/bpf/bpf_tracing.h
@@ -351,6 +351,7 @@ struct pt_regs___arm64 {
* https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc#risc-v-calling-conventions
*/
+/* riscv provides struct user_regs_struct instead of struct pt_regs to userspace */
#define __PT_REGS_CAST(x) ((const struct user_regs_struct *)(x))
#define __PT_PARM1_REG a0
#define __PT_PARM2_REG a1
@@ -383,7 +384,7 @@ struct pt_regs___arm64 {
* https://raw.githubusercontent.com/wiki/foss-for-synopsys-dwc-arc-processors/toolchain/files/ARCv2_ABI.pdf
*/
-/* arc provides struct user_pt_regs instead of struct pt_regs to userspace */
+/* arc provides struct user_regs_struct instead of struct pt_regs to userspace */
#define __PT_REGS_CAST(x) ((const struct user_regs_struct *)(x))
#define __PT_PARM1_REG scratch.r0
#define __PT_PARM2_REG scratch.r1
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 0a2c079244b6..8484b563b53d 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -1064,7 +1064,7 @@ static struct btf *btf_parse_raw(const char *path, struct btf *base_btf)
int err = 0;
long sz;
- f = fopen(path, "rb");
+ f = fopen(path, "rbe");
if (!f) {
err = -errno;
goto err_out;
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index 580985ee5545..4d9f30bf7f01 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -2250,9 +2250,25 @@ static int btf_dump_type_data_check_overflow(struct btf_dump *d,
const struct btf_type *t,
__u32 id,
const void *data,
- __u8 bits_offset)
+ __u8 bits_offset,
+ __u8 bit_sz)
{
- __s64 size = btf__resolve_size(d->btf, id);
+ __s64 size;
+
+ if (bit_sz) {
+ /* bits_offset is at most 7. bit_sz is at most 128. */
+ __u8 nr_bytes = (bits_offset + bit_sz + 7) / 8;
+
+ /* When bit_sz is non zero, it is called from
+ * btf_dump_struct_data() where it only cares about
+ * negative error value.
+ * Return nr_bytes in success case to make it
+ * consistent as the regular integer case below.
+ */
+ return data + nr_bytes > d->typed_dump->data_end ? -E2BIG : nr_bytes;
+ }
+
+ size = btf__resolve_size(d->btf, id);
if (size < 0 || size >= INT_MAX) {
pr_warn("unexpected size [%zu] for id [%u]\n",
@@ -2407,7 +2423,7 @@ static int btf_dump_dump_type_data(struct btf_dump *d,
{
int size, err = 0;
- size = btf_dump_type_data_check_overflow(d, t, id, data, bits_offset);
+ size = btf_dump_type_data_check_overflow(d, t, id, data, bits_offset, bit_sz);
if (size < 0)
return size;
err = btf_dump_type_data_check_zero(d, t, id, data, bits_offset, bit_sz);
diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c
index 83e8e3bfd8ff..cf3323fd47b8 100644
--- a/tools/lib/bpf/gen_loader.c
+++ b/tools/lib/bpf/gen_loader.c
@@ -703,17 +703,17 @@ static void emit_relo_kfunc_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo
/* obtain fd in BPF_REG_9 */
emit(gen, BPF_MOV64_REG(BPF_REG_9, BPF_REG_7));
emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_9, 32));
- /* jump to fd_array store if fd denotes module BTF */
- emit(gen, BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0, 2));
- /* set the default value for off */
- emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), 0));
- /* skip BTF fd store for vmlinux BTF */
- emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 4));
/* load fd_array slot pointer */
emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE,
0, 0, 0, blob_fd_array_off(gen, btf_fd_idx)));
- /* store BTF fd in slot */
+ /* store BTF fd in slot, 0 for vmlinux */
emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_9, 0));
+ /* jump to insn[insn_idx].off store if fd denotes module BTF */
+ emit(gen, BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0, 2));
+ /* set the default value for off */
+ emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), 0));
+ /* skip BTF fd store for vmlinux BTF */
+ emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 1));
/* store index into insn[insn_idx].off */
emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), btf_fd_idx));
log:
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index a27f6e9ccce7..13b5623b720f 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1501,16 +1501,36 @@ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
return map;
}
-static size_t bpf_map_mmap_sz(const struct bpf_map *map)
+static size_t bpf_map_mmap_sz(unsigned int value_sz, unsigned int max_entries)
{
- long page_sz = sysconf(_SC_PAGE_SIZE);
+ const long page_sz = sysconf(_SC_PAGE_SIZE);
size_t map_sz;
- map_sz = (size_t)roundup(map->def.value_size, 8) * map->def.max_entries;
+ map_sz = (size_t)roundup(value_sz, 8) * max_entries;
map_sz = roundup(map_sz, page_sz);
return map_sz;
}
+static int bpf_map_mmap_resize(struct bpf_map *map, size_t old_sz, size_t new_sz)
+{
+ void *mmaped;
+
+ if (!map->mmaped)
+ return -EINVAL;
+
+ if (old_sz == new_sz)
+ return 0;
+
+ mmaped = mmap(NULL, new_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ if (mmaped == MAP_FAILED)
+ return -errno;
+
+ memcpy(mmaped, map->mmaped, min(old_sz, new_sz));
+ munmap(map->mmaped, old_sz);
+ map->mmaped = mmaped;
+ return 0;
+}
+
static char *internal_map_name(struct bpf_object *obj, const char *real_name)
{
char map_name[BPF_OBJ_NAME_LEN], *p;
@@ -1609,6 +1629,7 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
{
struct bpf_map_def *def;
struct bpf_map *map;
+ size_t mmap_sz;
int err;
map = bpf_object__add_map(obj);
@@ -1643,7 +1664,8 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n",
map->name, map->sec_idx, map->sec_offset, def->map_flags);
- map->mmaped = mmap(NULL, bpf_map_mmap_sz(map), PROT_READ | PROT_WRITE,
+ mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries);
+ map->mmaped = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_ANONYMOUS, -1, 0);
if (map->mmaped == MAP_FAILED) {
err = -errno;
@@ -1953,9 +1975,9 @@ static int bpf_object__read_kconfig_file(struct bpf_object *obj, void *data)
return -ENAMETOOLONG;
/* gzopen also accepts uncompressed files. */
- file = gzopen(buf, "r");
+ file = gzopen(buf, "re");
if (!file)
- file = gzopen("/proc/config.gz", "r");
+ file = gzopen("/proc/config.gz", "re");
if (!file) {
pr_warn("failed to open system Kconfig\n");
@@ -4330,7 +4352,7 @@ static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info)
snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd);
memset(info, 0, sizeof(*info));
- fp = fopen(file, "r");
+ fp = fopen(file, "re");
if (!fp) {
err = -errno;
pr_warn("failed to open %s: %d. No procfs support?\n", file,
@@ -4393,18 +4415,17 @@ int bpf_map__reuse_fd(struct bpf_map *map, int fd)
if (!new_name)
return libbpf_err(-errno);
- new_fd = open("/", O_RDONLY | O_CLOEXEC);
+ /*
+ * Like dup(), but make sure new FD is >= 3 and has O_CLOEXEC set.
+ * This is similar to what we do in ensure_good_fd(), but without
+ * closing original FD.
+ */
+ new_fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
if (new_fd < 0) {
err = -errno;
goto err_free_new_name;
}
- new_fd = dup3(fd, new_fd, O_CLOEXEC);
- if (new_fd < 0) {
- err = -errno;
- goto err_close_new_fd;
- }
-
err = zclose(map->fd);
if (err) {
err = -errno;
@@ -6136,7 +6157,11 @@ static int append_subprog_relos(struct bpf_program *main_prog, struct bpf_progra
if (main_prog == subprog)
return 0;
relos = libbpf_reallocarray(main_prog->reloc_desc, new_cnt, sizeof(*relos));
- if (!relos)
+ /* if new count is zero, reallocarray can return a valid NULL result;
+ * in this case the previous pointer will be freed, so we *have to*
+ * reassign old pointer to the new value (even if it's NULL)
+ */
+ if (!relos && new_cnt)
return -ENOMEM;
if (subprog->nr_reloc)
memcpy(relos + main_prog->nr_reloc, subprog->reloc_desc,
@@ -7434,7 +7459,7 @@ int libbpf_kallsyms_parse(kallsyms_cb_t cb, void *ctx)
int ret, err = 0;
FILE *f;
- f = fopen("/proc/kallsyms", "r");
+ f = fopen("/proc/kallsyms", "re");
if (!f) {
err = -errno;
pr_warn("failed to open /proc/kallsyms: %d\n", err);
@@ -8295,7 +8320,10 @@ static void bpf_map__destroy(struct bpf_map *map)
map->init_slots_sz = 0;
if (map->mmaped) {
- munmap(map->mmaped, bpf_map_mmap_sz(map));
+ size_t mmap_sz;
+
+ mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries);
+ munmap(map->mmaped, mmap_sz);
map->mmaped = NULL;
}
@@ -8328,6 +8356,7 @@ void bpf_object__close(struct bpf_object *obj)
bpf_object__elf_finish(obj);
bpf_object_unload(obj);
btf__free(obj->btf);
+ btf__free(obj->btf_vmlinux);
btf_ext__free(obj->btf_ext);
for (i = 0; i < obj->nr_maps; i++)
@@ -8504,7 +8533,8 @@ int bpf_program__set_insns(struct bpf_program *prog,
return -EBUSY;
insns = libbpf_reallocarray(prog->insns, new_insn_cnt, sizeof(*insns));
- if (!insns) {
+ /* NULL is a valid return from reallocarray if the new count is zero */
+ if (!insns && new_insn_cnt) {
pr_warn("prog '%s': failed to realloc prog code\n", prog->name);
return -ENOMEM;
}
@@ -8534,13 +8564,31 @@ enum bpf_prog_type bpf_program__type(const struct bpf_program *prog)
return prog->type;
}
+static size_t custom_sec_def_cnt;
+static struct bpf_sec_def *custom_sec_defs;
+static struct bpf_sec_def custom_fallback_def;
+static bool has_custom_fallback_def;
+static int last_custom_sec_def_handler_id;
+
int bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type)
{
if (prog->obj->loaded)
return libbpf_err(-EBUSY);
+ /* if type is not changed, do nothing */
+ if (prog->type == type)
+ return 0;
+
prog->type = type;
- prog->sec_def = NULL;
+
+ /* If a program type was changed, we need to reset associated SEC()
+ * handler, as it will be invalid now. The only exception is a generic
+ * fallback handler, which by definition is program type-agnostic and
+ * is a catch-all custom handler, optionally set by the application,
+ * so should be able to handle any type of BPF program.
+ */
+ if (prog->sec_def != &custom_fallback_def)
+ prog->sec_def = NULL;
return 0;
}
@@ -8716,13 +8764,6 @@ static const struct bpf_sec_def section_defs[] = {
SEC_DEF("netfilter", NETFILTER, BPF_NETFILTER, SEC_NONE),
};
-static size_t custom_sec_def_cnt;
-static struct bpf_sec_def *custom_sec_defs;
-static struct bpf_sec_def custom_fallback_def;
-static bool has_custom_fallback_def;
-
-static int last_custom_sec_def_handler_id;
-
int libbpf_register_prog_handler(const char *sec,
enum bpf_prog_type prog_type,
enum bpf_attach_type exp_attach_type,
@@ -8802,7 +8843,11 @@ int libbpf_unregister_prog_handler(int handler_id)
/* try to shrink the array, but it's ok if we couldn't */
sec_defs = libbpf_reallocarray(custom_sec_defs, custom_sec_def_cnt, sizeof(*sec_defs));
- if (sec_defs)
+ /* if new count is zero, reallocarray can return a valid NULL result;
+ * in this case the previous pointer will be freed, so we *have to*
+ * reassign old pointer to the new value (even if it's NULL)
+ */
+ if (sec_defs || custom_sec_def_cnt == 0)
custom_sec_defs = sec_defs;
return 0;
@@ -9413,10 +9458,103 @@ __u32 bpf_map__value_size(const struct bpf_map *map)
return map->def.value_size;
}
+static int map_btf_datasec_resize(struct bpf_map *map, __u32 size)
+{
+ struct btf *btf;
+ struct btf_type *datasec_type, *var_type;
+ struct btf_var_secinfo *var;
+ const struct btf_type *array_type;
+ const struct btf_array *array;
+ int vlen, element_sz, new_array_id;
+ __u32 nr_elements;
+
+ /* check btf existence */
+ btf = bpf_object__btf(map->obj);
+ if (!btf)
+ return -ENOENT;
+
+ /* verify map is datasec */
+ datasec_type = btf_type_by_id(btf, bpf_map__btf_value_type_id(map));
+ if (!btf_is_datasec(datasec_type)) {
+ pr_warn("map '%s': cannot be resized, map value type is not a datasec\n",
+ bpf_map__name(map));
+ return -EINVAL;
+ }
+
+ /* verify datasec has at least one var */
+ vlen = btf_vlen(datasec_type);
+ if (vlen == 0) {
+ pr_warn("map '%s': cannot be resized, map value datasec is empty\n",
+ bpf_map__name(map));
+ return -EINVAL;
+ }
+
+ /* verify last var in the datasec is an array */
+ var = &btf_var_secinfos(datasec_type)[vlen - 1];
+ var_type = btf_type_by_id(btf, var->type);
+ array_type = skip_mods_and_typedefs(btf, var_type->type, NULL);
+ if (!btf_is_array(array_type)) {
+ pr_warn("map '%s': cannot be resized, last var must be an array\n",
+ bpf_map__name(map));
+ return -EINVAL;
+ }
+
+ /* verify request size aligns with array */
+ array = btf_array(array_type);
+ element_sz = btf__resolve_size(btf, array->type);
+ if (element_sz <= 0 || (size - var->offset) % element_sz != 0) {
+ pr_warn("map '%s': cannot be resized, element size (%d) doesn't align with new total size (%u)\n",
+ bpf_map__name(map), element_sz, size);
+ return -EINVAL;
+ }
+
+ /* create a new array based on the existing array, but with new length */
+ nr_elements = (size - var->offset) / element_sz;
+ new_array_id = btf__add_array(btf, array->index_type, array->type, nr_elements);
+ if (new_array_id < 0)
+ return new_array_id;
+
+ /* adding a new btf type invalidates existing pointers to btf objects,
+ * so refresh pointers before proceeding
+ */
+ datasec_type = btf_type_by_id(btf, map->btf_value_type_id);
+ var = &btf_var_secinfos(datasec_type)[vlen - 1];
+ var_type = btf_type_by_id(btf, var->type);
+
+ /* finally update btf info */
+ datasec_type->size = size;
+ var->size = size - var->offset;
+ var_type->type = new_array_id;
+
+ return 0;
+}
+
int bpf_map__set_value_size(struct bpf_map *map, __u32 size)
{
if (map->fd >= 0)
return libbpf_err(-EBUSY);
+
+ if (map->mmaped) {
+ int err;
+ size_t mmap_old_sz, mmap_new_sz;
+
+ mmap_old_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries);
+ mmap_new_sz = bpf_map_mmap_sz(size, map->def.max_entries);
+ err = bpf_map_mmap_resize(map, mmap_old_sz, mmap_new_sz);
+ if (err) {
+ pr_warn("map '%s': failed to resize memory-mapped region: %d\n",
+ bpf_map__name(map), err);
+ return err;
+ }
+ err = map_btf_datasec_resize(map, size);
+ if (err && err != -ENOENT) {
+ pr_warn("map '%s': failed to adjust resized BTF, clearing BTF key/value info: %d\n",
+ bpf_map__name(map), err);
+ map->btf_value_type_id = 0;
+ map->btf_key_type_id = 0;
+ }
+ }
+
map->def.value_size = size;
return 0;
}
@@ -9442,7 +9580,7 @@ int bpf_map__set_initial_value(struct bpf_map *map,
return 0;
}
-const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize)
+void *bpf_map__initial_value(struct bpf_map *map, size_t *psize)
{
if (!map->mmaped)
return NULL;
@@ -9958,7 +10096,7 @@ static int parse_uint_from_file(const char *file, const char *fmt)
int err, ret;
FILE *f;
- f = fopen(file, "r");
+ f = fopen(file, "re");
if (!f) {
err = -errno;
pr_debug("failed to open '%s': %s\n", file,
@@ -12694,7 +12832,7 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
for (i = 0; i < s->map_cnt; i++) {
struct bpf_map *map = *s->maps[i].map;
- size_t mmap_sz = bpf_map_mmap_sz(map);
+ size_t mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries);
int prot, map_fd = bpf_map__fd(map);
void **mmaped = s->maps[i].mmaped;
@@ -12721,8 +12859,7 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
* as per normal clean up procedure, so we don't need to worry
* about it from skeleton's clean up perspective.
*/
- *mmaped = mmap(map->mmaped, mmap_sz, prot,
- MAP_SHARED | MAP_FIXED, map_fd, 0);
+ *mmaped = mmap(map->mmaped, mmap_sz, prot, MAP_SHARED | MAP_FIXED, map_fd, 0);
if (*mmaped == MAP_FAILED) {
err = -errno;
*mmaped = NULL;
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 0b7362397ea3..754da73c643b 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -869,8 +869,22 @@ LIBBPF_API int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node);
/* get/set map key size */
LIBBPF_API __u32 bpf_map__key_size(const struct bpf_map *map);
LIBBPF_API int bpf_map__set_key_size(struct bpf_map *map, __u32 size);
-/* get/set map value size */
+/* get map value size */
LIBBPF_API __u32 bpf_map__value_size(const struct bpf_map *map);
+/**
+ * @brief **bpf_map__set_value_size()** sets map value size.
+ * @param map the BPF map instance
+ * @return 0, on success; negative error, otherwise
+ *
+ * There is a special case for maps with associated memory-mapped regions, like
+ * the global data section maps (bss, data, rodata). When this function is used
+ * on such a map, the mapped region is resized. Afterward, an attempt is made to
+ * adjust the corresponding BTF info. This attempt is best-effort and can only
+ * succeed if the last variable of the data section map is an array. The array
+ * BTF type is replaced by a new BTF array type with a different length.
+ * Any previously existing pointers returned from bpf_map__initial_value() or
+ * corresponding data section skeleton pointer must be reinitialized.
+ */
LIBBPF_API int bpf_map__set_value_size(struct bpf_map *map, __u32 size);
/* get map key/value BTF type IDs */
LIBBPF_API __u32 bpf_map__btf_key_type_id(const struct bpf_map *map);
@@ -884,7 +898,7 @@ LIBBPF_API int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra);
LIBBPF_API int bpf_map__set_initial_value(struct bpf_map *map,
const void *data, size_t size);
-LIBBPF_API const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize);
+LIBBPF_API void *bpf_map__initial_value(struct bpf_map *map, size_t *psize);
/**
* @brief **bpf_map__is_internal()** tells the caller whether or not the
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index a5aa3a383d69..7521a2fb7626 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -391,3 +391,8 @@ LIBBPF_1.2.0 {
bpf_map_get_info_by_fd;
bpf_prog_get_info_by_fd;
} LIBBPF_1.1.0;
+
+LIBBPF_1.3.0 {
+ global:
+ bpf_obj_pin_opts;
+} LIBBPF_1.2.0;
diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c
index b7d443129f1c..9c4db90b92b6 100644
--- a/tools/lib/bpf/libbpf_probes.c
+++ b/tools/lib/bpf/libbpf_probes.c
@@ -38,7 +38,7 @@ static __u32 get_ubuntu_kernel_version(void)
if (faccessat(AT_FDCWD, ubuntu_kver_file, R_OK, AT_EACCESS) != 0)
return 0;
- f = fopen(ubuntu_kver_file, "r");
+ f = fopen(ubuntu_kver_file, "re");
if (!f)
return 0;
diff --git a/tools/lib/bpf/libbpf_version.h b/tools/lib/bpf/libbpf_version.h
index 1fd2eeac5cfc..290411ddb39e 100644
--- a/tools/lib/bpf/libbpf_version.h
+++ b/tools/lib/bpf/libbpf_version.h
@@ -4,6 +4,6 @@
#define __LIBBPF_VERSION_H
#define LIBBPF_MAJOR_VERSION 1
-#define LIBBPF_MINOR_VERSION 2
+#define LIBBPF_MINOR_VERSION 3
#endif /* __LIBBPF_VERSION_H */
diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c
index 086eef355ab3..37455d00b239 100644
--- a/tools/lib/bpf/usdt.c
+++ b/tools/lib/bpf/usdt.c
@@ -466,7 +466,7 @@ static int parse_vma_segs(int pid, const char *lib_path, struct elf_seg **segs,
proceed:
sprintf(line, "/proc/%d/maps", pid);
- f = fopen(line, "r");
+ f = fopen(line, "re");
if (!f) {
err = -errno;
pr_warn("usdt: failed to open '%s' to get base addr of '%s': %d\n",
@@ -852,8 +852,11 @@ static int bpf_link_usdt_detach(struct bpf_link *link)
* system is so exhausted on memory, it's the least of user's
* concerns, probably.
* So just do our best here to return those IDs to usdt_manager.
+ * Another edge case when we can legitimately get NULL is when
+ * new_cnt is zero, which can happen in some edge cases, so we
+ * need to be careful about that.
*/
- if (new_free_ids) {
+ if (new_free_ids || new_cnt == 0) {
memcpy(new_free_ids + man->free_spec_cnt, usdt_link->spec_ids,
usdt_link->spec_cnt * sizeof(*usdt_link->spec_ids));
man->free_spec_ids = new_free_ids;
@@ -954,8 +957,7 @@ struct bpf_link *usdt_manager_attach_usdt(struct usdt_manager *man, const struct
spec_map_fd = bpf_map__fd(man->specs_map);
ip_map_fd = bpf_map__fd(man->ip_to_spec_id_map);
- /* TODO: perform path resolution similar to uprobe's */
- fd = open(path, O_RDONLY);
+ fd = open(path, O_RDONLY | O_CLOEXEC);
if (fd < 0) {
err = -errno;
pr_warn("usdt: failed to open ELF binary '%s': %d\n", path, err);
diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c
index 1229b18bcdb1..2a5a29217374 100644
--- a/tools/lib/perf/cpumap.c
+++ b/tools/lib/perf/cpumap.c
@@ -99,6 +99,11 @@ static int cmp_cpu(const void *a, const void *b)
return cpu_a->cpu - cpu_b->cpu;
}
+static struct perf_cpu __perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx)
+{
+ return RC_CHK_ACCESS(cpus)->map[idx];
+}
+
static struct perf_cpu_map *cpu_map__trim_new(int nr_cpus, const struct perf_cpu *tmp_cpus)
{
size_t payload_size = nr_cpus * sizeof(struct perf_cpu);
@@ -111,8 +116,12 @@ static struct perf_cpu_map *cpu_map__trim_new(int nr_cpus, const struct perf_cpu
/* Remove dups */
j = 0;
for (i = 0; i < nr_cpus; i++) {
- if (i == 0 || RC_CHK_ACCESS(cpus)->map[i].cpu != RC_CHK_ACCESS(cpus)->map[i - 1].cpu)
- RC_CHK_ACCESS(cpus)->map[j++].cpu = RC_CHK_ACCESS(cpus)->map[i].cpu;
+ if (i == 0 ||
+ __perf_cpu_map__cpu(cpus, i).cpu !=
+ __perf_cpu_map__cpu(cpus, i - 1).cpu) {
+ RC_CHK_ACCESS(cpus)->map[j++].cpu =
+ __perf_cpu_map__cpu(cpus, i).cpu;
+ }
}
perf_cpu_map__set_nr(cpus, j);
assert(j <= nr_cpus);
@@ -269,26 +278,31 @@ out:
return cpus;
}
+static int __perf_cpu_map__nr(const struct perf_cpu_map *cpus)
+{
+ return RC_CHK_ACCESS(cpus)->nr;
+}
+
struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx)
{
struct perf_cpu result = {
.cpu = -1
};
- if (cpus && idx < RC_CHK_ACCESS(cpus)->nr)
- return RC_CHK_ACCESS(cpus)->map[idx];
+ if (cpus && idx < __perf_cpu_map__nr(cpus))
+ return __perf_cpu_map__cpu(cpus, idx);
return result;
}
int perf_cpu_map__nr(const struct perf_cpu_map *cpus)
{
- return cpus ? RC_CHK_ACCESS(cpus)->nr : 1;
+ return cpus ? __perf_cpu_map__nr(cpus) : 1;
}
bool perf_cpu_map__empty(const struct perf_cpu_map *map)
{
- return map ? RC_CHK_ACCESS(map)->map[0].cpu == -1 : true;
+ return map ? __perf_cpu_map__cpu(map, 0).cpu == -1 : true;
}
int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu)
@@ -299,10 +313,10 @@ int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu)
return -1;
low = 0;
- high = RC_CHK_ACCESS(cpus)->nr;
+ high = __perf_cpu_map__nr(cpus);
while (low < high) {
int idx = (low + high) / 2;
- struct perf_cpu cpu_at_idx = RC_CHK_ACCESS(cpus)->map[idx];
+ struct perf_cpu cpu_at_idx = __perf_cpu_map__cpu(cpus, idx);
if (cpu_at_idx.cpu == cpu.cpu)
return idx;
@@ -321,6 +335,32 @@ bool perf_cpu_map__has(const struct perf_cpu_map *cpus, struct perf_cpu cpu)
return perf_cpu_map__idx(cpus, cpu) != -1;
}
+bool perf_cpu_map__equal(const struct perf_cpu_map *lhs, const struct perf_cpu_map *rhs)
+{
+ int nr;
+
+ if (lhs == rhs)
+ return true;
+
+ if (!lhs || !rhs)
+ return false;
+
+ nr = __perf_cpu_map__nr(lhs);
+ if (nr != __perf_cpu_map__nr(rhs))
+ return false;
+
+ for (int idx = 0; idx < nr; idx++) {
+ if (__perf_cpu_map__cpu(lhs, idx).cpu != __perf_cpu_map__cpu(rhs, idx).cpu)
+ return false;
+ }
+ return true;
+}
+
+bool perf_cpu_map__has_any_cpu(const struct perf_cpu_map *map)
+{
+ return map && __perf_cpu_map__cpu(map, 0).cpu == -1;
+}
+
struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map)
{
struct perf_cpu result = {
@@ -328,7 +368,9 @@ struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map)
};
// cpu_map__trim_new() qsort()s it, cpu_map__default_new() sorts it as well.
- return RC_CHK_ACCESS(map)->nr > 0 ? RC_CHK_ACCESS(map)->map[RC_CHK_ACCESS(map)->nr - 1] : result;
+ return __perf_cpu_map__nr(map) > 0
+ ? __perf_cpu_map__cpu(map, __perf_cpu_map__nr(map) - 1)
+ : result;
}
/** Is 'b' a subset of 'a'. */
@@ -336,15 +378,15 @@ bool perf_cpu_map__is_subset(const struct perf_cpu_map *a, const struct perf_cpu
{
if (a == b || !b)
return true;
- if (!a || RC_CHK_ACCESS(b)->nr > RC_CHK_ACCESS(a)->nr)
+ if (!a || __perf_cpu_map__nr(b) > __perf_cpu_map__nr(a))
return false;
- for (int i = 0, j = 0; i < RC_CHK_ACCESS(a)->nr; i++) {
- if (RC_CHK_ACCESS(a)->map[i].cpu > RC_CHK_ACCESS(b)->map[j].cpu)
+ for (int i = 0, j = 0; i < __perf_cpu_map__nr(a); i++) {
+ if (__perf_cpu_map__cpu(a, i).cpu > __perf_cpu_map__cpu(b, j).cpu)
return false;
- if (RC_CHK_ACCESS(a)->map[i].cpu == RC_CHK_ACCESS(b)->map[j].cpu) {
+ if (__perf_cpu_map__cpu(a, i).cpu == __perf_cpu_map__cpu(b, j).cpu) {
j++;
- if (j == RC_CHK_ACCESS(b)->nr)
+ if (j == __perf_cpu_map__nr(b))
return true;
}
}
@@ -374,27 +416,27 @@ struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
return perf_cpu_map__get(other);
}
- tmp_len = RC_CHK_ACCESS(orig)->nr + RC_CHK_ACCESS(other)->nr;
+ tmp_len = __perf_cpu_map__nr(orig) + __perf_cpu_map__nr(other);
tmp_cpus = malloc(tmp_len * sizeof(struct perf_cpu));
if (!tmp_cpus)
return NULL;
/* Standard merge algorithm from wikipedia */
i = j = k = 0;
- while (i < RC_CHK_ACCESS(orig)->nr && j < RC_CHK_ACCESS(other)->nr) {
- if (RC_CHK_ACCESS(orig)->map[i].cpu <= RC_CHK_ACCESS(other)->map[j].cpu) {
- if (RC_CHK_ACCESS(orig)->map[i].cpu == RC_CHK_ACCESS(other)->map[j].cpu)
+ while (i < __perf_cpu_map__nr(orig) && j < __perf_cpu_map__nr(other)) {
+ if (__perf_cpu_map__cpu(orig, i).cpu <= __perf_cpu_map__cpu(other, j).cpu) {
+ if (__perf_cpu_map__cpu(orig, i).cpu == __perf_cpu_map__cpu(other, j).cpu)
j++;
- tmp_cpus[k++] = RC_CHK_ACCESS(orig)->map[i++];
+ tmp_cpus[k++] = __perf_cpu_map__cpu(orig, i++);
} else
- tmp_cpus[k++] = RC_CHK_ACCESS(other)->map[j++];
+ tmp_cpus[k++] = __perf_cpu_map__cpu(other, j++);
}
- while (i < RC_CHK_ACCESS(orig)->nr)
- tmp_cpus[k++] = RC_CHK_ACCESS(orig)->map[i++];
+ while (i < __perf_cpu_map__nr(orig))
+ tmp_cpus[k++] = __perf_cpu_map__cpu(orig, i++);
- while (j < RC_CHK_ACCESS(other)->nr)
- tmp_cpus[k++] = RC_CHK_ACCESS(other)->map[j++];
+ while (j < __perf_cpu_map__nr(other))
+ tmp_cpus[k++] = __perf_cpu_map__cpu(other, j++);
assert(k <= tmp_len);
merged = cpu_map__trim_new(k, tmp_cpus);
@@ -402,3 +444,38 @@ struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
perf_cpu_map__put(orig);
return merged;
}
+
+struct perf_cpu_map *perf_cpu_map__intersect(struct perf_cpu_map *orig,
+ struct perf_cpu_map *other)
+{
+ struct perf_cpu *tmp_cpus;
+ int tmp_len;
+ int i, j, k;
+ struct perf_cpu_map *merged = NULL;
+
+ if (perf_cpu_map__is_subset(other, orig))
+ return perf_cpu_map__get(orig);
+ if (perf_cpu_map__is_subset(orig, other))
+ return perf_cpu_map__get(other);
+
+ tmp_len = max(__perf_cpu_map__nr(orig), __perf_cpu_map__nr(other));
+ tmp_cpus = malloc(tmp_len * sizeof(struct perf_cpu));
+ if (!tmp_cpus)
+ return NULL;
+
+ i = j = k = 0;
+ while (i < __perf_cpu_map__nr(orig) && j < __perf_cpu_map__nr(other)) {
+ if (__perf_cpu_map__cpu(orig, i).cpu < __perf_cpu_map__cpu(other, j).cpu)
+ i++;
+ else if (__perf_cpu_map__cpu(orig, i).cpu > __perf_cpu_map__cpu(other, j).cpu)
+ j++;
+ else {
+ j++;
+ tmp_cpus[k++] = __perf_cpu_map__cpu(orig, i++);
+ }
+ }
+ if (k)
+ merged = cpu_map__trim_new(k, tmp_cpus);
+ free(tmp_cpus);
+ return merged;
+}
diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c
index 81e8b5fcd8ba..b8b066d0dc5e 100644
--- a/tools/lib/perf/evlist.c
+++ b/tools/lib/perf/evlist.c
@@ -36,18 +36,33 @@ void perf_evlist__init(struct perf_evlist *evlist)
static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
struct perf_evsel *evsel)
{
- /*
- * We already have cpus for evsel (via PMU sysfs) so
- * keep it, if there's no target cpu list defined.
- */
if (evsel->system_wide) {
+ /* System wide: set the cpu map of the evsel to all online CPUs. */
perf_cpu_map__put(evsel->cpus);
evsel->cpus = perf_cpu_map__new(NULL);
+ } else if (evlist->has_user_cpus && evsel->is_pmu_core) {
+ /*
+ * User requested CPUs on a core PMU, ensure the requested CPUs
+ * are valid by intersecting with those of the PMU.
+ */
+ perf_cpu_map__put(evsel->cpus);
+ evsel->cpus = perf_cpu_map__intersect(evlist->user_requested_cpus, evsel->own_cpus);
} else if (!evsel->own_cpus || evlist->has_user_cpus ||
- (!evsel->requires_cpu && perf_cpu_map__empty(evlist->user_requested_cpus))) {
+ (!evsel->requires_cpu && perf_cpu_map__has_any_cpu(evlist->user_requested_cpus))) {
+ /*
+ * The PMU didn't specify a default cpu map, this isn't a core
+ * event and the user requested CPUs or the evlist user
+ * requested CPUs have the "any CPU" (aka dummy) CPU value. In
+ * which case use the user requested CPUs rather than the PMU
+ * ones.
+ */
perf_cpu_map__put(evsel->cpus);
evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus);
} else if (evsel->cpus != evsel->own_cpus) {
+ /*
+ * No user requested cpu map but the PMU cpu map doesn't match
+ * the evsel's. Reset it back to the PMU cpu map.
+ */
perf_cpu_map__put(evsel->cpus);
evsel->cpus = perf_cpu_map__get(evsel->own_cpus);
}
diff --git a/tools/lib/perf/include/internal/evsel.h b/tools/lib/perf/include/internal/evsel.h
index a99a75d9e78f..5cd220a61962 100644
--- a/tools/lib/perf/include/internal/evsel.h
+++ b/tools/lib/perf/include/internal/evsel.h
@@ -41,7 +41,14 @@ struct perf_sample_id {
struct perf_evsel {
struct list_head node;
struct perf_event_attr attr;
+ /** The commonly used cpu map of CPUs the event should be opened upon, etc. */
struct perf_cpu_map *cpus;
+ /**
+ * The cpu map read from the PMU. For core PMUs this is the list of all
+ * CPUs the event can be opened upon. For other PMUs this is the default
+ * cpu map for opening the event on, for example, the first CPU on a
+ * socket for an uncore event.
+ */
struct perf_cpu_map *own_cpus;
struct perf_thread_map *threads;
struct xyarray *fd;
@@ -55,9 +62,9 @@ struct perf_evsel {
int nr_members;
/*
* system_wide is for events that need to be on every CPU, irrespective
- * of user requested CPUs or threads. Map propagation will set cpus to
- * this event's own_cpus, whereby they will contribute to evlist
- * all_cpus.
+ * of user requested CPUs or threads. Tha main example of this is the
+ * dummy event. Map propagation will set cpus for this event to all CPUs
+ * as software PMU events like dummy, have a CPU map that is empty.
*/
bool system_wide;
/*
@@ -65,6 +72,8 @@ struct perf_evsel {
* i.e. it cannot be the 'any CPU' value of -1.
*/
bool requires_cpu;
+ /** Is the PMU for the event a core one? Effects the handling of own_cpus. */
+ bool is_pmu_core;
int idx;
};
diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h
index 3f43f770cdac..e38d859a384d 100644
--- a/tools/lib/perf/include/perf/cpumap.h
+++ b/tools/lib/perf/include/perf/cpumap.h
@@ -11,8 +11,16 @@ struct perf_cpu {
int cpu;
};
+struct perf_cache {
+ int cache_lvl;
+ int cache;
+};
+
struct perf_cpu_map;
+/**
+ * perf_cpu_map__dummy_new - a map with a singular "any CPU"/dummy -1 value.
+ */
LIBPERF_API struct perf_cpu_map *perf_cpu_map__dummy_new(void);
LIBPERF_API struct perf_cpu_map *perf_cpu_map__default_new(void);
LIBPERF_API struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list);
@@ -20,12 +28,23 @@ LIBPERF_API struct perf_cpu_map *perf_cpu_map__read(FILE *file);
LIBPERF_API struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map);
LIBPERF_API struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
struct perf_cpu_map *other);
+LIBPERF_API struct perf_cpu_map *perf_cpu_map__intersect(struct perf_cpu_map *orig,
+ struct perf_cpu_map *other);
LIBPERF_API void perf_cpu_map__put(struct perf_cpu_map *map);
LIBPERF_API struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx);
LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus);
+/**
+ * perf_cpu_map__empty - is map either empty or the "any CPU"/dummy value.
+ */
LIBPERF_API bool perf_cpu_map__empty(const struct perf_cpu_map *map);
LIBPERF_API struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map);
LIBPERF_API bool perf_cpu_map__has(const struct perf_cpu_map *map, struct perf_cpu cpu);
+LIBPERF_API bool perf_cpu_map__equal(const struct perf_cpu_map *lhs,
+ const struct perf_cpu_map *rhs);
+/**
+ * perf_cpu_map__any_cpu - Does the map contain the "any CPU"/dummy -1 value?
+ */
+LIBPERF_API bool perf_cpu_map__has_any_cpu(const struct perf_cpu_map *map);
#define perf_cpu_map__for_each_cpu(cpu, idx, cpus) \
for ((idx) = 0, (cpu) = perf_cpu_map__cpu(cpus, idx); \
diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h
index 51b9338f4c11..ba2dcf64f4e6 100644
--- a/tools/lib/perf/include/perf/event.h
+++ b/tools/lib/perf/include/perf/event.h
@@ -380,7 +380,8 @@ enum {
PERF_STAT_CONFIG_TERM__AGGR_MODE = 0,
PERF_STAT_CONFIG_TERM__INTERVAL = 1,
PERF_STAT_CONFIG_TERM__SCALE = 2,
- PERF_STAT_CONFIG_TERM__MAX = 3,
+ PERF_STAT_CONFIG_TERM__AGGR_LEVEL = 3,
+ PERF_STAT_CONFIG_TERM__MAX = 4,
};
struct perf_record_stat_config_entry {
diff --git a/tools/lib/subcmd/exec-cmd.c b/tools/lib/subcmd/exec-cmd.c
index 5dbea456973e..7739b5217cf6 100644
--- a/tools/lib/subcmd/exec-cmd.c
+++ b/tools/lib/subcmd/exec-cmd.c
@@ -36,38 +36,40 @@ static int is_absolute_path(const char *path)
return path[0] == '/';
}
-static const char *get_pwd_cwd(void)
+static const char *get_pwd_cwd(char *buf, size_t sz)
{
- static char cwd[PATH_MAX + 1];
char *pwd;
struct stat cwd_stat, pwd_stat;
- if (getcwd(cwd, PATH_MAX) == NULL)
+ if (getcwd(buf, sz) == NULL)
return NULL;
pwd = getenv("PWD");
- if (pwd && strcmp(pwd, cwd)) {
- stat(cwd, &cwd_stat);
+ if (pwd && strcmp(pwd, buf)) {
+ stat(buf, &cwd_stat);
if (!stat(pwd, &pwd_stat) &&
pwd_stat.st_dev == cwd_stat.st_dev &&
pwd_stat.st_ino == cwd_stat.st_ino) {
- strlcpy(cwd, pwd, PATH_MAX);
+ strlcpy(buf, pwd, sz);
}
}
- return cwd;
+ return buf;
}
-static const char *make_nonrelative_path(const char *path)
+static const char *make_nonrelative_path(char *buf, size_t sz, const char *path)
{
- static char buf[PATH_MAX + 1];
-
if (is_absolute_path(path)) {
- if (strlcpy(buf, path, PATH_MAX) >= PATH_MAX)
+ if (strlcpy(buf, path, sz) >= sz)
die("Too long path: %.*s", 60, path);
} else {
- const char *cwd = get_pwd_cwd();
+ const char *cwd = get_pwd_cwd(buf, sz);
+
if (!cwd)
die("Cannot determine the current working directory");
- if (snprintf(buf, PATH_MAX, "%s/%s", cwd, path) >= PATH_MAX)
+
+ if (strlen(cwd) + strlen(path) + 2 >= sz)
die("Too long path: %.*s", 60, path);
+
+ strcat(buf, "/");
+ strcat(buf, path);
}
return buf;
}
@@ -133,8 +135,11 @@ static void add_path(char **out, const char *path)
if (path && *path) {
if (is_absolute_path(path))
astrcat(out, path);
- else
- astrcat(out, make_nonrelative_path(path));
+ else {
+ char buf[PATH_MAX];
+
+ astrcat(out, make_nonrelative_path(buf, sizeof(buf), path));
+ }
astrcat(out, ":");
}
diff --git a/tools/lib/subcmd/help.c b/tools/lib/subcmd/help.c
index bf02d62a3b2b..adfbae27dc36 100644
--- a/tools/lib/subcmd/help.c
+++ b/tools/lib/subcmd/help.c
@@ -16,6 +16,8 @@
void add_cmdname(struct cmdnames *cmds, const char *name, size_t len)
{
struct cmdname *ent = malloc(sizeof(*ent) + len + 1);
+ if (!ent)
+ return;
ent->len = len;
memcpy(ent->name, name, len);
@@ -66,7 +68,13 @@ void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes)
while (ci < cmds->cnt && ei < excludes->cnt) {
cmp = strcmp(cmds->names[ci]->name, excludes->names[ei]->name);
if (cmp < 0) {
- cmds->names[cj++] = cmds->names[ci++];
+ if (ci == cj) {
+ ci++;
+ cj++;
+ } else {
+ zfree(&cmds->names[cj]);
+ cmds->names[cj++] = cmds->names[ci++];
+ }
} else if (cmp == 0) {
ci++;
ei++;
@@ -74,10 +82,14 @@ void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes)
ei++;
}
}
-
- while (ci < cmds->cnt)
- cmds->names[cj++] = cmds->names[ci++];
-
+ if (ci != cj) {
+ while (ci < cmds->cnt) {
+ zfree(&cmds->names[cj]);
+ cmds->names[cj++] = cmds->names[ci++];
+ }
+ }
+ for (ci = cj; ci < cmds->cnt; ci++)
+ zfree(&cmds->names[ci]);
cmds->cnt = cj;
}
diff --git a/tools/lib/subcmd/parse-options.h b/tools/lib/subcmd/parse-options.h
index 41b9b942504d..8e9147358a28 100644
--- a/tools/lib/subcmd/parse-options.h
+++ b/tools/lib/subcmd/parse-options.h
@@ -6,10 +6,6 @@
#include <stdbool.h>
#include <stdint.h>
-#ifndef NORETURN
-#define NORETURN __attribute__((__noreturn__))
-#endif
-
enum parse_opt_type {
/* special types */
OPTION_END,
@@ -183,9 +179,9 @@ extern int parse_options_subcommand(int argc, const char **argv,
const char *const subcommands[],
const char *usagestr[], int flags);
-extern NORETURN void usage_with_options(const char * const *usagestr,
+extern __noreturn void usage_with_options(const char * const *usagestr,
const struct option *options);
-extern NORETURN __attribute__((format(printf,3,4)))
+extern __noreturn __attribute__((format(printf,3,4)))
void usage_with_options_msg(const char * const *usagestr,
const struct option *options,
const char *fmt, ...);
diff --git a/tools/lib/subcmd/subcmd-util.h b/tools/lib/subcmd/subcmd-util.h
index b2aec04fce8f..dfac76e35ac7 100644
--- a/tools/lib/subcmd/subcmd-util.h
+++ b/tools/lib/subcmd/subcmd-util.h
@@ -5,8 +5,7 @@
#include <stdarg.h>
#include <stdlib.h>
#include <stdio.h>
-
-#define NORETURN __attribute__((__noreturn__))
+#include <linux/compiler.h>
static inline void report(const char *prefix, const char *err, va_list params)
{
@@ -15,7 +14,7 @@ static inline void report(const char *prefix, const char *err, va_list params)
fprintf(stderr, " %s%s\n", prefix, msg);
}
-static NORETURN inline void die(const char *err, ...)
+static __noreturn inline void die(const char *err, ...)
{
va_list params;
diff --git a/tools/mm/Makefile b/tools/mm/Makefile
index 6c1da51f4177..1c5606cc3334 100644
--- a/tools/mm/Makefile
+++ b/tools/mm/Makefile
@@ -8,8 +8,8 @@ TARGETS=page-types slabinfo page_owner_sort
LIB_DIR = ../lib/api
LIBS = $(LIB_DIR)/libapi.a
-CFLAGS += -Wall -Wextra -I../lib/
-LDFLAGS += $(LIBS)
+CFLAGS += -Wall -Wextra -I../lib/ -pthread
+LDFLAGS += $(LIBS) -pthread
all: $(TARGETS)
diff --git a/tools/net/ynl/Makefile b/tools/net/ynl/Makefile
new file mode 100644
index 000000000000..d664b36deb5b
--- /dev/null
+++ b/tools/net/ynl/Makefile
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: GPL-2.0
+
+SUBDIRS = lib generated samples
+
+all: $(SUBDIRS)
+
+$(SUBDIRS):
+ @if [ -f "$@/Makefile" ] ; then \
+ $(MAKE) -C $@ ; \
+ fi
+
+clean hardclean:
+ @for dir in $(SUBDIRS) ; do \
+ if [ -f "$$dir/Makefile" ] ; then \
+ $(MAKE) -C $$dir $@; \
+ fi \
+ done
+
+.PHONY: clean all $(SUBDIRS)
diff --git a/tools/net/ynl/Makefile.deps b/tools/net/ynl/Makefile.deps
new file mode 100644
index 000000000000..f842bc66b967
--- /dev/null
+++ b/tools/net/ynl/Makefile.deps
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# Try to include uAPI headers from the kernel uapi/ path.
+# Most code under tools/ requires the respective kernel uAPI headers
+# to be copied to tools/include. The duplication is annoying.
+# All the family headers should be self-contained. We avoid the copying
+# by selectively including just the uAPI header of the family directly
+# from the kernel sources.
+
+UAPI_PATH:=../../../../include/uapi/
+
+# scripts/headers_install.sh strips "_UAPI" from header guards so we
+# need the explicit -D matching what's in /usr, to avoid multiple definitions.
+
+get_hdr_inc=-D$(1) -include $(UAPI_PATH)/linux/$(2)
+
+CFLAGS_devlink:=$(call get_hdr_inc,_LINUX_DEVLINK_H_,devlink.h)
+CFLAGS_ethtool:=$(call get_hdr_inc,_LINUX_ETHTOOL_NETLINK_H_,ethtool_netlink.h)
+CFLAGS_handshake:=$(call get_hdr_inc,_LINUX_HANDSHAKE_H,handshake.h)
+CFLAGS_netdev:=$(call get_hdr_inc,_LINUX_NETDEV_H,netdev.h)
diff --git a/tools/net/ynl/generated/Makefile b/tools/net/ynl/generated/Makefile
new file mode 100644
index 000000000000..f8817d2e56e4
--- /dev/null
+++ b/tools/net/ynl/generated/Makefile
@@ -0,0 +1,50 @@
+# SPDX-License-Identifier: GPL-2.0
+
+CC=gcc
+CFLAGS=-std=gnu11 -O2 -W -Wall -Wextra -Wno-unused-parameter -Wshadow \
+ -I../lib/ -idirafter $(UAPI_PATH)
+ifeq ("$(DEBUG)","1")
+ CFLAGS += -g -fsanitize=address -fsanitize=leak -static-libasan
+endif
+
+include ../Makefile.deps
+
+YNL_GEN_ARG_ethtool:=--user-header linux/ethtool_netlink.h \
+ --exclude-op stats-get
+
+TOOL:=../ynl-gen-c.py
+
+GENS:=ethtool devlink handshake fou netdev
+SRCS=$(patsubst %,%-user.c,${GENS})
+HDRS=$(patsubst %,%-user.h,${GENS})
+OBJS=$(patsubst %,%-user.o,${GENS})
+
+all: protos.a $(HDRS) $(SRCS) $(KHDRS) $(KSRCS) $(UAPI) regen
+
+protos.a: $(OBJS)
+ @echo -e "\tAR $@"
+ @ar rcs $@ $(OBJS)
+
+%-user.h: ../../../../Documentation/netlink/specs/%.yaml $(TOOL)
+ @echo -e "\tGEN $@"
+ @$(TOOL) --mode user --header --spec $< $(YNL_GEN_ARG_$*) > $@
+
+%-user.c: ../../../../Documentation/netlink/specs/%.yaml $(TOOL)
+ @echo -e "\tGEN $@"
+ @$(TOOL) --mode user --source --spec $< $(YNL_GEN_ARG_$*) > $@
+
+%-user.o: %-user.c %-user.h
+ @echo -e "\tCC $@"
+ @$(COMPILE.c) $(CFLAGS_$*) -o $@ $<
+
+clean:
+ rm -f *.o
+
+hardclean: clean
+ rm -f *.c *.h *.a
+
+regen:
+ @../ynl-regen.sh
+
+.PHONY: all clean hardclean regen
+.DEFAULT_GOAL: all
diff --git a/tools/net/ynl/generated/devlink-user.c b/tools/net/ynl/generated/devlink-user.c
new file mode 100644
index 000000000000..939bd45feaca
--- /dev/null
+++ b/tools/net/ynl/generated/devlink-user.c
@@ -0,0 +1,721 @@
+// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
+/* Do not edit directly, auto-generated from: */
+/* Documentation/netlink/specs/devlink.yaml */
+/* YNL-GEN user source */
+
+#include <stdlib.h>
+#include <string.h>
+#include "devlink-user.h"
+#include "ynl.h"
+#include <linux/devlink.h>
+
+#include <libmnl/libmnl.h>
+#include <linux/genetlink.h>
+
+/* Enums */
+static const char * const devlink_op_strmap[] = {
+ [3] = "get",
+ [DEVLINK_CMD_INFO_GET] = "info-get",
+};
+
+const char *devlink_op_str(int op)
+{
+ if (op < 0 || op >= (int)MNL_ARRAY_SIZE(devlink_op_strmap))
+ return NULL;
+ return devlink_op_strmap[op];
+}
+
+/* Policies */
+struct ynl_policy_attr devlink_dl_info_version_policy[DEVLINK_ATTR_MAX + 1] = {
+ [DEVLINK_ATTR_INFO_VERSION_NAME] = { .name = "info-version-name", .type = YNL_PT_NUL_STR, },
+ [DEVLINK_ATTR_INFO_VERSION_VALUE] = { .name = "info-version-value", .type = YNL_PT_NUL_STR, },
+};
+
+struct ynl_policy_nest devlink_dl_info_version_nest = {
+ .max_attr = DEVLINK_ATTR_MAX,
+ .table = devlink_dl_info_version_policy,
+};
+
+struct ynl_policy_attr devlink_dl_reload_stats_entry_policy[DEVLINK_ATTR_MAX + 1] = {
+ [DEVLINK_ATTR_RELOAD_STATS_LIMIT] = { .name = "reload-stats-limit", .type = YNL_PT_U8, },
+ [DEVLINK_ATTR_RELOAD_STATS_VALUE] = { .name = "reload-stats-value", .type = YNL_PT_U32, },
+};
+
+struct ynl_policy_nest devlink_dl_reload_stats_entry_nest = {
+ .max_attr = DEVLINK_ATTR_MAX,
+ .table = devlink_dl_reload_stats_entry_policy,
+};
+
+struct ynl_policy_attr devlink_dl_reload_act_stats_policy[DEVLINK_ATTR_MAX + 1] = {
+ [DEVLINK_ATTR_RELOAD_STATS_ENTRY] = { .name = "reload-stats-entry", .type = YNL_PT_NEST, .nest = &devlink_dl_reload_stats_entry_nest, },
+};
+
+struct ynl_policy_nest devlink_dl_reload_act_stats_nest = {
+ .max_attr = DEVLINK_ATTR_MAX,
+ .table = devlink_dl_reload_act_stats_policy,
+};
+
+struct ynl_policy_attr devlink_dl_reload_act_info_policy[DEVLINK_ATTR_MAX + 1] = {
+ [DEVLINK_ATTR_RELOAD_ACTION] = { .name = "reload-action", .type = YNL_PT_U8, },
+ [DEVLINK_ATTR_RELOAD_ACTION_STATS] = { .name = "reload-action-stats", .type = YNL_PT_NEST, .nest = &devlink_dl_reload_act_stats_nest, },
+};
+
+struct ynl_policy_nest devlink_dl_reload_act_info_nest = {
+ .max_attr = DEVLINK_ATTR_MAX,
+ .table = devlink_dl_reload_act_info_policy,
+};
+
+struct ynl_policy_attr devlink_dl_reload_stats_policy[DEVLINK_ATTR_MAX + 1] = {
+ [DEVLINK_ATTR_RELOAD_ACTION_INFO] = { .name = "reload-action-info", .type = YNL_PT_NEST, .nest = &devlink_dl_reload_act_info_nest, },
+};
+
+struct ynl_policy_nest devlink_dl_reload_stats_nest = {
+ .max_attr = DEVLINK_ATTR_MAX,
+ .table = devlink_dl_reload_stats_policy,
+};
+
+struct ynl_policy_attr devlink_dl_dev_stats_policy[DEVLINK_ATTR_MAX + 1] = {
+ [DEVLINK_ATTR_RELOAD_STATS] = { .name = "reload-stats", .type = YNL_PT_NEST, .nest = &devlink_dl_reload_stats_nest, },
+ [DEVLINK_ATTR_REMOTE_RELOAD_STATS] = { .name = "remote-reload-stats", .type = YNL_PT_NEST, .nest = &devlink_dl_reload_stats_nest, },
+};
+
+struct ynl_policy_nest devlink_dl_dev_stats_nest = {
+ .max_attr = DEVLINK_ATTR_MAX,
+ .table = devlink_dl_dev_stats_policy,
+};
+
+struct ynl_policy_attr devlink_policy[DEVLINK_ATTR_MAX + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .name = "bus-name", .type = YNL_PT_NUL_STR, },
+ [DEVLINK_ATTR_DEV_NAME] = { .name = "dev-name", .type = YNL_PT_NUL_STR, },
+ [DEVLINK_ATTR_PORT_INDEX] = { .name = "port-index", .type = YNL_PT_U32, },
+ [DEVLINK_ATTR_INFO_DRIVER_NAME] = { .name = "info-driver-name", .type = YNL_PT_NUL_STR, },
+ [DEVLINK_ATTR_INFO_SERIAL_NUMBER] = { .name = "info-serial-number", .type = YNL_PT_NUL_STR, },
+ [DEVLINK_ATTR_INFO_VERSION_FIXED] = { .name = "info-version-fixed", .type = YNL_PT_NEST, .nest = &devlink_dl_info_version_nest, },
+ [DEVLINK_ATTR_INFO_VERSION_RUNNING] = { .name = "info-version-running", .type = YNL_PT_NEST, .nest = &devlink_dl_info_version_nest, },
+ [DEVLINK_ATTR_INFO_VERSION_STORED] = { .name = "info-version-stored", .type = YNL_PT_NEST, .nest = &devlink_dl_info_version_nest, },
+ [DEVLINK_ATTR_INFO_VERSION_NAME] = { .name = "info-version-name", .type = YNL_PT_NUL_STR, },
+ [DEVLINK_ATTR_INFO_VERSION_VALUE] = { .name = "info-version-value", .type = YNL_PT_NUL_STR, },
+ [DEVLINK_ATTR_RELOAD_FAILED] = { .name = "reload-failed", .type = YNL_PT_U8, },
+ [DEVLINK_ATTR_RELOAD_ACTION] = { .name = "reload-action", .type = YNL_PT_U8, },
+ [DEVLINK_ATTR_DEV_STATS] = { .name = "dev-stats", .type = YNL_PT_NEST, .nest = &devlink_dl_dev_stats_nest, },
+ [DEVLINK_ATTR_RELOAD_STATS] = { .name = "reload-stats", .type = YNL_PT_NEST, .nest = &devlink_dl_reload_stats_nest, },
+ [DEVLINK_ATTR_RELOAD_STATS_ENTRY] = { .name = "reload-stats-entry", .type = YNL_PT_NEST, .nest = &devlink_dl_reload_stats_entry_nest, },
+ [DEVLINK_ATTR_RELOAD_STATS_LIMIT] = { .name = "reload-stats-limit", .type = YNL_PT_U8, },
+ [DEVLINK_ATTR_RELOAD_STATS_VALUE] = { .name = "reload-stats-value", .type = YNL_PT_U32, },
+ [DEVLINK_ATTR_REMOTE_RELOAD_STATS] = { .name = "remote-reload-stats", .type = YNL_PT_NEST, .nest = &devlink_dl_reload_stats_nest, },
+ [DEVLINK_ATTR_RELOAD_ACTION_INFO] = { .name = "reload-action-info", .type = YNL_PT_NEST, .nest = &devlink_dl_reload_act_info_nest, },
+ [DEVLINK_ATTR_RELOAD_ACTION_STATS] = { .name = "reload-action-stats", .type = YNL_PT_NEST, .nest = &devlink_dl_reload_act_stats_nest, },
+};
+
+struct ynl_policy_nest devlink_nest = {
+ .max_attr = DEVLINK_ATTR_MAX,
+ .table = devlink_policy,
+};
+
+/* Common nested types */
+void devlink_dl_info_version_free(struct devlink_dl_info_version *obj)
+{
+ free(obj->info_version_name);
+ free(obj->info_version_value);
+}
+
+int devlink_dl_info_version_parse(struct ynl_parse_arg *yarg,
+ const struct nlattr *nested)
+{
+ struct devlink_dl_info_version *dst = yarg->data;
+ const struct nlattr *attr;
+
+ mnl_attr_for_each_nested(attr, nested) {
+ unsigned int type = mnl_attr_get_type(attr);
+
+ if (type == DEVLINK_ATTR_INFO_VERSION_NAME) {
+ unsigned int len;
+
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+
+ len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
+ dst->_present.info_version_name_len = len;
+ dst->info_version_name = malloc(len + 1);
+ memcpy(dst->info_version_name, mnl_attr_get_str(attr), len);
+ dst->info_version_name[len] = 0;
+ } else if (type == DEVLINK_ATTR_INFO_VERSION_VALUE) {
+ unsigned int len;
+
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+
+ len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
+ dst->_present.info_version_value_len = len;
+ dst->info_version_value = malloc(len + 1);
+ memcpy(dst->info_version_value, mnl_attr_get_str(attr), len);
+ dst->info_version_value[len] = 0;
+ }
+ }
+
+ return 0;
+}
+
+void
+devlink_dl_reload_stats_entry_free(struct devlink_dl_reload_stats_entry *obj)
+{
+}
+
+int devlink_dl_reload_stats_entry_parse(struct ynl_parse_arg *yarg,
+ const struct nlattr *nested)
+{
+ struct devlink_dl_reload_stats_entry *dst = yarg->data;
+ const struct nlattr *attr;
+
+ mnl_attr_for_each_nested(attr, nested) {
+ unsigned int type = mnl_attr_get_type(attr);
+
+ if (type == DEVLINK_ATTR_RELOAD_STATS_LIMIT) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.reload_stats_limit = 1;
+ dst->reload_stats_limit = mnl_attr_get_u8(attr);
+ } else if (type == DEVLINK_ATTR_RELOAD_STATS_VALUE) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.reload_stats_value = 1;
+ dst->reload_stats_value = mnl_attr_get_u32(attr);
+ }
+ }
+
+ return 0;
+}
+
+void devlink_dl_reload_act_stats_free(struct devlink_dl_reload_act_stats *obj)
+{
+ unsigned int i;
+
+ for (i = 0; i < obj->n_reload_stats_entry; i++)
+ devlink_dl_reload_stats_entry_free(&obj->reload_stats_entry[i]);
+ free(obj->reload_stats_entry);
+}
+
+int devlink_dl_reload_act_stats_parse(struct ynl_parse_arg *yarg,
+ const struct nlattr *nested)
+{
+ struct devlink_dl_reload_act_stats *dst = yarg->data;
+ unsigned int n_reload_stats_entry = 0;
+ const struct nlattr *attr;
+ struct ynl_parse_arg parg;
+ int i;
+
+ parg.ys = yarg->ys;
+
+ if (dst->reload_stats_entry)
+ return ynl_error_parse(yarg, "attribute already present (dl-reload-act-stats.reload-stats-entry)");
+
+ mnl_attr_for_each_nested(attr, nested) {
+ unsigned int type = mnl_attr_get_type(attr);
+
+ if (type == DEVLINK_ATTR_RELOAD_STATS_ENTRY) {
+ n_reload_stats_entry++;
+ }
+ }
+
+ if (n_reload_stats_entry) {
+ dst->reload_stats_entry = calloc(n_reload_stats_entry, sizeof(*dst->reload_stats_entry));
+ dst->n_reload_stats_entry = n_reload_stats_entry;
+ i = 0;
+ parg.rsp_policy = &devlink_dl_reload_stats_entry_nest;
+ mnl_attr_for_each_nested(attr, nested) {
+ if (mnl_attr_get_type(attr) == DEVLINK_ATTR_RELOAD_STATS_ENTRY) {
+ parg.data = &dst->reload_stats_entry[i];
+ if (devlink_dl_reload_stats_entry_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ i++;
+ }
+ }
+ }
+
+ return 0;
+}
+
+void devlink_dl_reload_act_info_free(struct devlink_dl_reload_act_info *obj)
+{
+ unsigned int i;
+
+ for (i = 0; i < obj->n_reload_action_stats; i++)
+ devlink_dl_reload_act_stats_free(&obj->reload_action_stats[i]);
+ free(obj->reload_action_stats);
+}
+
+int devlink_dl_reload_act_info_parse(struct ynl_parse_arg *yarg,
+ const struct nlattr *nested)
+{
+ struct devlink_dl_reload_act_info *dst = yarg->data;
+ unsigned int n_reload_action_stats = 0;
+ const struct nlattr *attr;
+ struct ynl_parse_arg parg;
+ int i;
+
+ parg.ys = yarg->ys;
+
+ if (dst->reload_action_stats)
+ return ynl_error_parse(yarg, "attribute already present (dl-reload-act-info.reload-action-stats)");
+
+ mnl_attr_for_each_nested(attr, nested) {
+ unsigned int type = mnl_attr_get_type(attr);
+
+ if (type == DEVLINK_ATTR_RELOAD_ACTION) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.reload_action = 1;
+ dst->reload_action = mnl_attr_get_u8(attr);
+ } else if (type == DEVLINK_ATTR_RELOAD_ACTION_STATS) {
+ n_reload_action_stats++;
+ }
+ }
+
+ if (n_reload_action_stats) {
+ dst->reload_action_stats = calloc(n_reload_action_stats, sizeof(*dst->reload_action_stats));
+ dst->n_reload_action_stats = n_reload_action_stats;
+ i = 0;
+ parg.rsp_policy = &devlink_dl_reload_act_stats_nest;
+ mnl_attr_for_each_nested(attr, nested) {
+ if (mnl_attr_get_type(attr) == DEVLINK_ATTR_RELOAD_ACTION_STATS) {
+ parg.data = &dst->reload_action_stats[i];
+ if (devlink_dl_reload_act_stats_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ i++;
+ }
+ }
+ }
+
+ return 0;
+}
+
+void devlink_dl_reload_stats_free(struct devlink_dl_reload_stats *obj)
+{
+ unsigned int i;
+
+ for (i = 0; i < obj->n_reload_action_info; i++)
+ devlink_dl_reload_act_info_free(&obj->reload_action_info[i]);
+ free(obj->reload_action_info);
+}
+
+int devlink_dl_reload_stats_parse(struct ynl_parse_arg *yarg,
+ const struct nlattr *nested)
+{
+ struct devlink_dl_reload_stats *dst = yarg->data;
+ unsigned int n_reload_action_info = 0;
+ const struct nlattr *attr;
+ struct ynl_parse_arg parg;
+ int i;
+
+ parg.ys = yarg->ys;
+
+ if (dst->reload_action_info)
+ return ynl_error_parse(yarg, "attribute already present (dl-reload-stats.reload-action-info)");
+
+ mnl_attr_for_each_nested(attr, nested) {
+ unsigned int type = mnl_attr_get_type(attr);
+
+ if (type == DEVLINK_ATTR_RELOAD_ACTION_INFO) {
+ n_reload_action_info++;
+ }
+ }
+
+ if (n_reload_action_info) {
+ dst->reload_action_info = calloc(n_reload_action_info, sizeof(*dst->reload_action_info));
+ dst->n_reload_action_info = n_reload_action_info;
+ i = 0;
+ parg.rsp_policy = &devlink_dl_reload_act_info_nest;
+ mnl_attr_for_each_nested(attr, nested) {
+ if (mnl_attr_get_type(attr) == DEVLINK_ATTR_RELOAD_ACTION_INFO) {
+ parg.data = &dst->reload_action_info[i];
+ if (devlink_dl_reload_act_info_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ i++;
+ }
+ }
+ }
+
+ return 0;
+}
+
+void devlink_dl_dev_stats_free(struct devlink_dl_dev_stats *obj)
+{
+ devlink_dl_reload_stats_free(&obj->reload_stats);
+ devlink_dl_reload_stats_free(&obj->remote_reload_stats);
+}
+
+int devlink_dl_dev_stats_parse(struct ynl_parse_arg *yarg,
+ const struct nlattr *nested)
+{
+ struct devlink_dl_dev_stats *dst = yarg->data;
+ const struct nlattr *attr;
+ struct ynl_parse_arg parg;
+
+ parg.ys = yarg->ys;
+
+ mnl_attr_for_each_nested(attr, nested) {
+ unsigned int type = mnl_attr_get_type(attr);
+
+ if (type == DEVLINK_ATTR_RELOAD_STATS) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.reload_stats = 1;
+
+ parg.rsp_policy = &devlink_dl_reload_stats_nest;
+ parg.data = &dst->reload_stats;
+ if (devlink_dl_reload_stats_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ } else if (type == DEVLINK_ATTR_REMOTE_RELOAD_STATS) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.remote_reload_stats = 1;
+
+ parg.rsp_policy = &devlink_dl_reload_stats_nest;
+ parg.data = &dst->remote_reload_stats;
+ if (devlink_dl_reload_stats_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ }
+ }
+
+ return 0;
+}
+
+/* ============== DEVLINK_CMD_GET ============== */
+/* DEVLINK_CMD_GET - do */
+void devlink_get_req_free(struct devlink_get_req *req)
+{
+ free(req->bus_name);
+ free(req->dev_name);
+ free(req);
+}
+
+void devlink_get_rsp_free(struct devlink_get_rsp *rsp)
+{
+ free(rsp->bus_name);
+ free(rsp->dev_name);
+ devlink_dl_dev_stats_free(&rsp->dev_stats);
+ free(rsp);
+}
+
+int devlink_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
+{
+ struct ynl_parse_arg *yarg = data;
+ struct devlink_get_rsp *dst;
+ const struct nlattr *attr;
+ struct ynl_parse_arg parg;
+
+ dst = yarg->data;
+ parg.ys = yarg->ys;
+
+ mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
+ unsigned int type = mnl_attr_get_type(attr);
+
+ if (type == DEVLINK_ATTR_BUS_NAME) {
+ unsigned int len;
+
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+
+ len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
+ dst->_present.bus_name_len = len;
+ dst->bus_name = malloc(len + 1);
+ memcpy(dst->bus_name, mnl_attr_get_str(attr), len);
+ dst->bus_name[len] = 0;
+ } else if (type == DEVLINK_ATTR_DEV_NAME) {
+ unsigned int len;
+
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+
+ len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
+ dst->_present.dev_name_len = len;
+ dst->dev_name = malloc(len + 1);
+ memcpy(dst->dev_name, mnl_attr_get_str(attr), len);
+ dst->dev_name[len] = 0;
+ } else if (type == DEVLINK_ATTR_RELOAD_FAILED) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.reload_failed = 1;
+ dst->reload_failed = mnl_attr_get_u8(attr);
+ } else if (type == DEVLINK_ATTR_RELOAD_ACTION) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.reload_action = 1;
+ dst->reload_action = mnl_attr_get_u8(attr);
+ } else if (type == DEVLINK_ATTR_DEV_STATS) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.dev_stats = 1;
+
+ parg.rsp_policy = &devlink_dl_dev_stats_nest;
+ parg.data = &dst->dev_stats;
+ if (devlink_dl_dev_stats_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ }
+ }
+
+ return MNL_CB_OK;
+}
+
+struct devlink_get_rsp *
+devlink_get(struct ynl_sock *ys, struct devlink_get_req *req)
+{
+ struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
+ struct devlink_get_rsp *rsp;
+ struct nlmsghdr *nlh;
+ int err;
+
+ nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_GET, 1);
+ ys->req_policy = &devlink_nest;
+ yrs.yarg.rsp_policy = &devlink_nest;
+
+ if (req->_present.bus_name_len)
+ mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
+ if (req->_present.dev_name_len)
+ mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
+
+ rsp = calloc(1, sizeof(*rsp));
+ yrs.yarg.data = rsp;
+ yrs.cb = devlink_get_rsp_parse;
+ yrs.rsp_cmd = 3;
+
+ err = ynl_exec(ys, nlh, &yrs);
+ if (err < 0)
+ goto err_free;
+
+ return rsp;
+
+err_free:
+ devlink_get_rsp_free(rsp);
+ return NULL;
+}
+
+/* DEVLINK_CMD_GET - dump */
+void devlink_get_list_free(struct devlink_get_list *rsp)
+{
+ struct devlink_get_list *next = rsp;
+
+ while ((void *)next != YNL_LIST_END) {
+ rsp = next;
+ next = rsp->next;
+
+ free(rsp->obj.bus_name);
+ free(rsp->obj.dev_name);
+ devlink_dl_dev_stats_free(&rsp->obj.dev_stats);
+ free(rsp);
+ }
+}
+
+struct devlink_get_list *devlink_get_dump(struct ynl_sock *ys)
+{
+ struct ynl_dump_state yds = {};
+ struct nlmsghdr *nlh;
+ int err;
+
+ yds.ys = ys;
+ yds.alloc_sz = sizeof(struct devlink_get_list);
+ yds.cb = devlink_get_rsp_parse;
+ yds.rsp_cmd = 3;
+ yds.rsp_policy = &devlink_nest;
+
+ nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_GET, 1);
+
+ err = ynl_exec_dump(ys, nlh, &yds);
+ if (err < 0)
+ goto free_list;
+
+ return yds.first;
+
+free_list:
+ devlink_get_list_free(yds.first);
+ return NULL;
+}
+
+/* ============== DEVLINK_CMD_INFO_GET ============== */
+/* DEVLINK_CMD_INFO_GET - do */
+void devlink_info_get_req_free(struct devlink_info_get_req *req)
+{
+ free(req->bus_name);
+ free(req->dev_name);
+ free(req);
+}
+
+void devlink_info_get_rsp_free(struct devlink_info_get_rsp *rsp)
+{
+ unsigned int i;
+
+ free(rsp->bus_name);
+ free(rsp->dev_name);
+ free(rsp->info_driver_name);
+ free(rsp->info_serial_number);
+ for (i = 0; i < rsp->n_info_version_fixed; i++)
+ devlink_dl_info_version_free(&rsp->info_version_fixed[i]);
+ free(rsp->info_version_fixed);
+ for (i = 0; i < rsp->n_info_version_running; i++)
+ devlink_dl_info_version_free(&rsp->info_version_running[i]);
+ free(rsp->info_version_running);
+ for (i = 0; i < rsp->n_info_version_stored; i++)
+ devlink_dl_info_version_free(&rsp->info_version_stored[i]);
+ free(rsp->info_version_stored);
+ free(rsp);
+}
+
+int devlink_info_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
+{
+ unsigned int n_info_version_running = 0;
+ unsigned int n_info_version_stored = 0;
+ unsigned int n_info_version_fixed = 0;
+ struct ynl_parse_arg *yarg = data;
+ struct devlink_info_get_rsp *dst;
+ const struct nlattr *attr;
+ struct ynl_parse_arg parg;
+ int i;
+
+ dst = yarg->data;
+ parg.ys = yarg->ys;
+
+ if (dst->info_version_fixed)
+ return ynl_error_parse(yarg, "attribute already present (devlink.info-version-fixed)");
+ if (dst->info_version_running)
+ return ynl_error_parse(yarg, "attribute already present (devlink.info-version-running)");
+ if (dst->info_version_stored)
+ return ynl_error_parse(yarg, "attribute already present (devlink.info-version-stored)");
+
+ mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
+ unsigned int type = mnl_attr_get_type(attr);
+
+ if (type == DEVLINK_ATTR_BUS_NAME) {
+ unsigned int len;
+
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+
+ len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
+ dst->_present.bus_name_len = len;
+ dst->bus_name = malloc(len + 1);
+ memcpy(dst->bus_name, mnl_attr_get_str(attr), len);
+ dst->bus_name[len] = 0;
+ } else if (type == DEVLINK_ATTR_DEV_NAME) {
+ unsigned int len;
+
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+
+ len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
+ dst->_present.dev_name_len = len;
+ dst->dev_name = malloc(len + 1);
+ memcpy(dst->dev_name, mnl_attr_get_str(attr), len);
+ dst->dev_name[len] = 0;
+ } else if (type == DEVLINK_ATTR_INFO_DRIVER_NAME) {
+ unsigned int len;
+
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+
+ len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
+ dst->_present.info_driver_name_len = len;
+ dst->info_driver_name = malloc(len + 1);
+ memcpy(dst->info_driver_name, mnl_attr_get_str(attr), len);
+ dst->info_driver_name[len] = 0;
+ } else if (type == DEVLINK_ATTR_INFO_SERIAL_NUMBER) {
+ unsigned int len;
+
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+
+ len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
+ dst->_present.info_serial_number_len = len;
+ dst->info_serial_number = malloc(len + 1);
+ memcpy(dst->info_serial_number, mnl_attr_get_str(attr), len);
+ dst->info_serial_number[len] = 0;
+ } else if (type == DEVLINK_ATTR_INFO_VERSION_FIXED) {
+ n_info_version_fixed++;
+ } else if (type == DEVLINK_ATTR_INFO_VERSION_RUNNING) {
+ n_info_version_running++;
+ } else if (type == DEVLINK_ATTR_INFO_VERSION_STORED) {
+ n_info_version_stored++;
+ }
+ }
+
+ if (n_info_version_fixed) {
+ dst->info_version_fixed = calloc(n_info_version_fixed, sizeof(*dst->info_version_fixed));
+ dst->n_info_version_fixed = n_info_version_fixed;
+ i = 0;
+ parg.rsp_policy = &devlink_dl_info_version_nest;
+ mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
+ if (mnl_attr_get_type(attr) == DEVLINK_ATTR_INFO_VERSION_FIXED) {
+ parg.data = &dst->info_version_fixed[i];
+ if (devlink_dl_info_version_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ i++;
+ }
+ }
+ }
+ if (n_info_version_running) {
+ dst->info_version_running = calloc(n_info_version_running, sizeof(*dst->info_version_running));
+ dst->n_info_version_running = n_info_version_running;
+ i = 0;
+ parg.rsp_policy = &devlink_dl_info_version_nest;
+ mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
+ if (mnl_attr_get_type(attr) == DEVLINK_ATTR_INFO_VERSION_RUNNING) {
+ parg.data = &dst->info_version_running[i];
+ if (devlink_dl_info_version_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ i++;
+ }
+ }
+ }
+ if (n_info_version_stored) {
+ dst->info_version_stored = calloc(n_info_version_stored, sizeof(*dst->info_version_stored));
+ dst->n_info_version_stored = n_info_version_stored;
+ i = 0;
+ parg.rsp_policy = &devlink_dl_info_version_nest;
+ mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
+ if (mnl_attr_get_type(attr) == DEVLINK_ATTR_INFO_VERSION_STORED) {
+ parg.data = &dst->info_version_stored[i];
+ if (devlink_dl_info_version_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ i++;
+ }
+ }
+ }
+
+ return MNL_CB_OK;
+}
+
+struct devlink_info_get_rsp *
+devlink_info_get(struct ynl_sock *ys, struct devlink_info_get_req *req)
+{
+ struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
+ struct devlink_info_get_rsp *rsp;
+ struct nlmsghdr *nlh;
+ int err;
+
+ nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_INFO_GET, 1);
+ ys->req_policy = &devlink_nest;
+ yrs.yarg.rsp_policy = &devlink_nest;
+
+ if (req->_present.bus_name_len)
+ mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
+ if (req->_present.dev_name_len)
+ mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
+
+ rsp = calloc(1, sizeof(*rsp));
+ yrs.yarg.data = rsp;
+ yrs.cb = devlink_info_get_rsp_parse;
+ yrs.rsp_cmd = DEVLINK_CMD_INFO_GET;
+
+ err = ynl_exec(ys, nlh, &yrs);
+ if (err < 0)
+ goto err_free;
+
+ return rsp;
+
+err_free:
+ devlink_info_get_rsp_free(rsp);
+ return NULL;
+}
+
+const struct ynl_family ynl_devlink_family = {
+ .name = "devlink",
+};
diff --git a/tools/net/ynl/generated/devlink-user.h b/tools/net/ynl/generated/devlink-user.h
new file mode 100644
index 000000000000..a008b99b6e24
--- /dev/null
+++ b/tools/net/ynl/generated/devlink-user.h
@@ -0,0 +1,210 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* Do not edit directly, auto-generated from: */
+/* Documentation/netlink/specs/devlink.yaml */
+/* YNL-GEN user header */
+
+#ifndef _LINUX_DEVLINK_GEN_H
+#define _LINUX_DEVLINK_GEN_H
+
+#include <stdlib.h>
+#include <string.h>
+#include <linux/types.h>
+#include <linux/devlink.h>
+
+struct ynl_sock;
+
+extern const struct ynl_family ynl_devlink_family;
+
+/* Enums */
+const char *devlink_op_str(int op);
+
+/* Common nested types */
+struct devlink_dl_info_version {
+ struct {
+ __u32 info_version_name_len;
+ __u32 info_version_value_len;
+ } _present;
+
+ char *info_version_name;
+ char *info_version_value;
+};
+
+struct devlink_dl_reload_stats_entry {
+ struct {
+ __u32 reload_stats_limit:1;
+ __u32 reload_stats_value:1;
+ } _present;
+
+ __u8 reload_stats_limit;
+ __u32 reload_stats_value;
+};
+
+struct devlink_dl_reload_act_stats {
+ unsigned int n_reload_stats_entry;
+ struct devlink_dl_reload_stats_entry *reload_stats_entry;
+};
+
+struct devlink_dl_reload_act_info {
+ struct {
+ __u32 reload_action:1;
+ } _present;
+
+ __u8 reload_action;
+ unsigned int n_reload_action_stats;
+ struct devlink_dl_reload_act_stats *reload_action_stats;
+};
+
+struct devlink_dl_reload_stats {
+ unsigned int n_reload_action_info;
+ struct devlink_dl_reload_act_info *reload_action_info;
+};
+
+struct devlink_dl_dev_stats {
+ struct {
+ __u32 reload_stats:1;
+ __u32 remote_reload_stats:1;
+ } _present;
+
+ struct devlink_dl_reload_stats reload_stats;
+ struct devlink_dl_reload_stats remote_reload_stats;
+};
+
+/* ============== DEVLINK_CMD_GET ============== */
+/* DEVLINK_CMD_GET - do */
+struct devlink_get_req {
+ struct {
+ __u32 bus_name_len;
+ __u32 dev_name_len;
+ } _present;
+
+ char *bus_name;
+ char *dev_name;
+};
+
+static inline struct devlink_get_req *devlink_get_req_alloc(void)
+{
+ return calloc(1, sizeof(struct devlink_get_req));
+}
+void devlink_get_req_free(struct devlink_get_req *req);
+
+static inline void
+devlink_get_req_set_bus_name(struct devlink_get_req *req, const char *bus_name)
+{
+ free(req->bus_name);
+ req->_present.bus_name_len = strlen(bus_name);
+ req->bus_name = malloc(req->_present.bus_name_len + 1);
+ memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
+ req->bus_name[req->_present.bus_name_len] = 0;
+}
+static inline void
+devlink_get_req_set_dev_name(struct devlink_get_req *req, const char *dev_name)
+{
+ free(req->dev_name);
+ req->_present.dev_name_len = strlen(dev_name);
+ req->dev_name = malloc(req->_present.dev_name_len + 1);
+ memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
+ req->dev_name[req->_present.dev_name_len] = 0;
+}
+
+struct devlink_get_rsp {
+ struct {
+ __u32 bus_name_len;
+ __u32 dev_name_len;
+ __u32 reload_failed:1;
+ __u32 reload_action:1;
+ __u32 dev_stats:1;
+ } _present;
+
+ char *bus_name;
+ char *dev_name;
+ __u8 reload_failed;
+ __u8 reload_action;
+ struct devlink_dl_dev_stats dev_stats;
+};
+
+void devlink_get_rsp_free(struct devlink_get_rsp *rsp);
+
+/*
+ * Get devlink instances.
+ */
+struct devlink_get_rsp *
+devlink_get(struct ynl_sock *ys, struct devlink_get_req *req);
+
+/* DEVLINK_CMD_GET - dump */
+struct devlink_get_list {
+ struct devlink_get_list *next;
+ struct devlink_get_rsp obj __attribute__ ((aligned (8)));
+};
+
+void devlink_get_list_free(struct devlink_get_list *rsp);
+
+struct devlink_get_list *devlink_get_dump(struct ynl_sock *ys);
+
+/* ============== DEVLINK_CMD_INFO_GET ============== */
+/* DEVLINK_CMD_INFO_GET - do */
+struct devlink_info_get_req {
+ struct {
+ __u32 bus_name_len;
+ __u32 dev_name_len;
+ } _present;
+
+ char *bus_name;
+ char *dev_name;
+};
+
+static inline struct devlink_info_get_req *devlink_info_get_req_alloc(void)
+{
+ return calloc(1, sizeof(struct devlink_info_get_req));
+}
+void devlink_info_get_req_free(struct devlink_info_get_req *req);
+
+static inline void
+devlink_info_get_req_set_bus_name(struct devlink_info_get_req *req,
+ const char *bus_name)
+{
+ free(req->bus_name);
+ req->_present.bus_name_len = strlen(bus_name);
+ req->bus_name = malloc(req->_present.bus_name_len + 1);
+ memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
+ req->bus_name[req->_present.bus_name_len] = 0;
+}
+static inline void
+devlink_info_get_req_set_dev_name(struct devlink_info_get_req *req,
+ const char *dev_name)
+{
+ free(req->dev_name);
+ req->_present.dev_name_len = strlen(dev_name);
+ req->dev_name = malloc(req->_present.dev_name_len + 1);
+ memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
+ req->dev_name[req->_present.dev_name_len] = 0;
+}
+
+struct devlink_info_get_rsp {
+ struct {
+ __u32 bus_name_len;
+ __u32 dev_name_len;
+ __u32 info_driver_name_len;
+ __u32 info_serial_number_len;
+ } _present;
+
+ char *bus_name;
+ char *dev_name;
+ char *info_driver_name;
+ char *info_serial_number;
+ unsigned int n_info_version_fixed;
+ struct devlink_dl_info_version *info_version_fixed;
+ unsigned int n_info_version_running;
+ struct devlink_dl_info_version *info_version_running;
+ unsigned int n_info_version_stored;
+ struct devlink_dl_info_version *info_version_stored;
+};
+
+void devlink_info_get_rsp_free(struct devlink_info_get_rsp *rsp);
+
+/*
+ * Get device information, like driver name, hardware and firmware versions etc.
+ */
+struct devlink_info_get_rsp *
+devlink_info_get(struct ynl_sock *ys, struct devlink_info_get_req *req);
+
+#endif /* _LINUX_DEVLINK_GEN_H */
diff --git a/tools/net/ynl/generated/ethtool-user.c b/tools/net/ynl/generated/ethtool-user.c
new file mode 100644
index 000000000000..74b883a14958
--- /dev/null
+++ b/tools/net/ynl/generated/ethtool-user.c
@@ -0,0 +1,6353 @@
+// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
+/* Do not edit directly, auto-generated from: */
+/* Documentation/netlink/specs/ethtool.yaml */
+/* YNL-GEN user source */
+/* YNL-ARG --user-header linux/ethtool_netlink.h --exclude-op stats-get */
+
+#include <stdlib.h>
+#include <string.h>
+#include "ethtool-user.h"
+#include "ynl.h"
+#include <linux/ethtool.h>
+
+#include <libmnl/libmnl.h>
+#include <linux/genetlink.h>
+
+#include "linux/ethtool_netlink.h"
+
+/* Enums */
+static const char * const ethtool_op_strmap[] = {
+ [ETHTOOL_MSG_STRSET_GET] = "strset-get",
+ [ETHTOOL_MSG_LINKINFO_GET] = "linkinfo-get",
+ [3] = "linkinfo-ntf",
+ [ETHTOOL_MSG_LINKMODES_GET] = "linkmodes-get",
+ [5] = "linkmodes-ntf",
+ [ETHTOOL_MSG_LINKSTATE_GET] = "linkstate-get",
+ [ETHTOOL_MSG_DEBUG_GET] = "debug-get",
+ [8] = "debug-ntf",
+ [ETHTOOL_MSG_WOL_GET] = "wol-get",
+ [10] = "wol-ntf",
+ [ETHTOOL_MSG_FEATURES_GET] = "features-get",
+ [ETHTOOL_MSG_FEATURES_SET] = "features-set",
+ [13] = "features-ntf",
+ [14] = "privflags-get",
+ [15] = "privflags-ntf",
+ [16] = "rings-get",
+ [17] = "rings-ntf",
+ [18] = "channels-get",
+ [19] = "channels-ntf",
+ [20] = "coalesce-get",
+ [21] = "coalesce-ntf",
+ [22] = "pause-get",
+ [23] = "pause-ntf",
+ [24] = "eee-get",
+ [25] = "eee-ntf",
+ [26] = "tsinfo-get",
+ [27] = "cable-test-ntf",
+ [28] = "cable-test-tdr-ntf",
+ [29] = "tunnel-info-get",
+ [30] = "fec-get",
+ [31] = "fec-ntf",
+ [32] = "module-eeprom-get",
+ [34] = "phc-vclocks-get",
+ [35] = "module-get",
+ [36] = "module-ntf",
+ [37] = "pse-get",
+ [ETHTOOL_MSG_RSS_GET] = "rss-get",
+ [ETHTOOL_MSG_PLCA_GET_CFG] = "plca-get-cfg",
+ [40] = "plca-get-status",
+ [41] = "plca-ntf",
+ [ETHTOOL_MSG_MM_GET] = "mm-get",
+ [43] = "mm-ntf",
+};
+
+const char *ethtool_op_str(int op)
+{
+ if (op < 0 || op >= (int)MNL_ARRAY_SIZE(ethtool_op_strmap))
+ return NULL;
+ return ethtool_op_strmap[op];
+}
+
+static const char * const ethtool_udp_tunnel_type_strmap[] = {
+ [0] = "vxlan",
+ [1] = "geneve",
+ [2] = "vxlan-gpe",
+};
+
+const char *ethtool_udp_tunnel_type_str(int value)
+{
+ if (value < 0 || value >= (int)MNL_ARRAY_SIZE(ethtool_udp_tunnel_type_strmap))
+ return NULL;
+ return ethtool_udp_tunnel_type_strmap[value];
+}
+
+static const char * const ethtool_stringset_strmap[] = {
+};
+
+const char *ethtool_stringset_str(enum ethtool_stringset value)
+{
+ if (value < 0 || value >= (int)MNL_ARRAY_SIZE(ethtool_stringset_strmap))
+ return NULL;
+ return ethtool_stringset_strmap[value];
+}
+
+/* Policies */
+struct ynl_policy_attr ethtool_header_policy[ETHTOOL_A_HEADER_MAX + 1] = {
+ [ETHTOOL_A_HEADER_DEV_INDEX] = { .name = "dev-index", .type = YNL_PT_U32, },
+ [ETHTOOL_A_HEADER_DEV_NAME] = { .name = "dev-name", .type = YNL_PT_NUL_STR, },
+ [ETHTOOL_A_HEADER_FLAGS] = { .name = "flags", .type = YNL_PT_U32, },
+};
+
+struct ynl_policy_nest ethtool_header_nest = {
+ .max_attr = ETHTOOL_A_HEADER_MAX,
+ .table = ethtool_header_policy,
+};
+
+struct ynl_policy_attr ethtool_pause_stat_policy[ETHTOOL_A_PAUSE_STAT_MAX + 1] = {
+ [ETHTOOL_A_PAUSE_STAT_PAD] = { .name = "pad", .type = YNL_PT_IGNORE, },
+ [ETHTOOL_A_PAUSE_STAT_TX_FRAMES] = { .name = "tx-frames", .type = YNL_PT_U64, },
+ [ETHTOOL_A_PAUSE_STAT_RX_FRAMES] = { .name = "rx-frames", .type = YNL_PT_U64, },
+};
+
+struct ynl_policy_nest ethtool_pause_stat_nest = {
+ .max_attr = ETHTOOL_A_PAUSE_STAT_MAX,
+ .table = ethtool_pause_stat_policy,
+};
+
+struct ynl_policy_attr ethtool_cable_test_tdr_cfg_policy[ETHTOOL_A_CABLE_TEST_TDR_CFG_MAX + 1] = {
+ [ETHTOOL_A_CABLE_TEST_TDR_CFG_FIRST] = { .name = "first", .type = YNL_PT_U32, },
+ [ETHTOOL_A_CABLE_TEST_TDR_CFG_LAST] = { .name = "last", .type = YNL_PT_U32, },
+ [ETHTOOL_A_CABLE_TEST_TDR_CFG_STEP] = { .name = "step", .type = YNL_PT_U32, },
+ [ETHTOOL_A_CABLE_TEST_TDR_CFG_PAIR] = { .name = "pair", .type = YNL_PT_U8, },
+};
+
+struct ynl_policy_nest ethtool_cable_test_tdr_cfg_nest = {
+ .max_attr = ETHTOOL_A_CABLE_TEST_TDR_CFG_MAX,
+ .table = ethtool_cable_test_tdr_cfg_policy,
+};
+
+struct ynl_policy_attr ethtool_fec_stat_policy[ETHTOOL_A_FEC_STAT_MAX + 1] = {
+ [ETHTOOL_A_FEC_STAT_PAD] = { .name = "pad", .type = YNL_PT_IGNORE, },
+ [ETHTOOL_A_FEC_STAT_CORRECTED] = { .name = "corrected", .type = YNL_PT_BINARY,},
+ [ETHTOOL_A_FEC_STAT_UNCORR] = { .name = "uncorr", .type = YNL_PT_BINARY,},
+ [ETHTOOL_A_FEC_STAT_CORR_BITS] = { .name = "corr-bits", .type = YNL_PT_BINARY,},
+};
+
+struct ynl_policy_nest ethtool_fec_stat_nest = {
+ .max_attr = ETHTOOL_A_FEC_STAT_MAX,
+ .table = ethtool_fec_stat_policy,
+};
+
+struct ynl_policy_attr ethtool_mm_stat_policy[ETHTOOL_A_MM_STAT_MAX + 1] = {
+ [ETHTOOL_A_MM_STAT_PAD] = { .name = "pad", .type = YNL_PT_IGNORE, },
+ [ETHTOOL_A_MM_STAT_REASSEMBLY_ERRORS] = { .name = "reassembly-errors", .type = YNL_PT_U64, },
+ [ETHTOOL_A_MM_STAT_SMD_ERRORS] = { .name = "smd-errors", .type = YNL_PT_U64, },
+ [ETHTOOL_A_MM_STAT_REASSEMBLY_OK] = { .name = "reassembly-ok", .type = YNL_PT_U64, },
+ [ETHTOOL_A_MM_STAT_RX_FRAG_COUNT] = { .name = "rx-frag-count", .type = YNL_PT_U64, },
+ [ETHTOOL_A_MM_STAT_TX_FRAG_COUNT] = { .name = "tx-frag-count", .type = YNL_PT_U64, },
+ [ETHTOOL_A_MM_STAT_HOLD_COUNT] = { .name = "hold-count", .type = YNL_PT_U64, },
+};
+
+struct ynl_policy_nest ethtool_mm_stat_nest = {
+ .max_attr = ETHTOOL_A_MM_STAT_MAX,
+ .table = ethtool_mm_stat_policy,
+};
+
+struct ynl_policy_attr ethtool_cable_result_policy[ETHTOOL_A_CABLE_RESULT_MAX + 1] = {
+ [ETHTOOL_A_CABLE_RESULT_PAIR] = { .name = "pair", .type = YNL_PT_U8, },
+ [ETHTOOL_A_CABLE_RESULT_CODE] = { .name = "code", .type = YNL_PT_U8, },
+};
+
+struct ynl_policy_nest ethtool_cable_result_nest = {
+ .max_attr = ETHTOOL_A_CABLE_RESULT_MAX,
+ .table = ethtool_cable_result_policy,
+};
+
+struct ynl_policy_attr ethtool_cable_fault_length_policy[ETHTOOL_A_CABLE_FAULT_LENGTH_MAX + 1] = {
+ [ETHTOOL_A_CABLE_FAULT_LENGTH_PAIR] = { .name = "pair", .type = YNL_PT_U8, },
+ [ETHTOOL_A_CABLE_FAULT_LENGTH_CM] = { .name = "cm", .type = YNL_PT_U32, },
+};
+
+struct ynl_policy_nest ethtool_cable_fault_length_nest = {
+ .max_attr = ETHTOOL_A_CABLE_FAULT_LENGTH_MAX,
+ .table = ethtool_cable_fault_length_policy,
+};
+
+struct ynl_policy_attr ethtool_bitset_bit_policy[ETHTOOL_A_BITSET_BIT_MAX + 1] = {
+ [ETHTOOL_A_BITSET_BIT_INDEX] = { .name = "index", .type = YNL_PT_U32, },
+ [ETHTOOL_A_BITSET_BIT_NAME] = { .name = "name", .type = YNL_PT_NUL_STR, },
+ [ETHTOOL_A_BITSET_BIT_VALUE] = { .name = "value", .type = YNL_PT_FLAG, },
+};
+
+struct ynl_policy_nest ethtool_bitset_bit_nest = {
+ .max_attr = ETHTOOL_A_BITSET_BIT_MAX,
+ .table = ethtool_bitset_bit_policy,
+};
+
+struct ynl_policy_attr ethtool_tunnel_udp_entry_policy[ETHTOOL_A_TUNNEL_UDP_ENTRY_MAX + 1] = {
+ [ETHTOOL_A_TUNNEL_UDP_ENTRY_PORT] = { .name = "port", .type = YNL_PT_U16, },
+ [ETHTOOL_A_TUNNEL_UDP_ENTRY_TYPE] = { .name = "type", .type = YNL_PT_U32, },
+};
+
+struct ynl_policy_nest ethtool_tunnel_udp_entry_nest = {
+ .max_attr = ETHTOOL_A_TUNNEL_UDP_ENTRY_MAX,
+ .table = ethtool_tunnel_udp_entry_policy,
+};
+
+struct ynl_policy_attr ethtool_string_policy[ETHTOOL_A_STRING_MAX + 1] = {
+ [ETHTOOL_A_STRING_INDEX] = { .name = "index", .type = YNL_PT_U32, },
+ [ETHTOOL_A_STRING_VALUE] = { .name = "value", .type = YNL_PT_NUL_STR, },
+};
+
+struct ynl_policy_nest ethtool_string_nest = {
+ .max_attr = ETHTOOL_A_STRING_MAX,
+ .table = ethtool_string_policy,
+};
+
+struct ynl_policy_attr ethtool_cable_nest_policy[ETHTOOL_A_CABLE_NEST_MAX + 1] = {
+ [ETHTOOL_A_CABLE_NEST_RESULT] = { .name = "result", .type = YNL_PT_NEST, .nest = &ethtool_cable_result_nest, },
+ [ETHTOOL_A_CABLE_NEST_FAULT_LENGTH] = { .name = "fault-length", .type = YNL_PT_NEST, .nest = &ethtool_cable_fault_length_nest, },
+};
+
+struct ynl_policy_nest ethtool_cable_nest_nest = {
+ .max_attr = ETHTOOL_A_CABLE_NEST_MAX,
+ .table = ethtool_cable_nest_policy,
+};
+
+struct ynl_policy_attr ethtool_bitset_bits_policy[ETHTOOL_A_BITSET_BITS_MAX + 1] = {
+ [ETHTOOL_A_BITSET_BITS_BIT] = { .name = "bit", .type = YNL_PT_NEST, .nest = &ethtool_bitset_bit_nest, },
+};
+
+struct ynl_policy_nest ethtool_bitset_bits_nest = {
+ .max_attr = ETHTOOL_A_BITSET_BITS_MAX,
+ .table = ethtool_bitset_bits_policy,
+};
+
+struct ynl_policy_attr ethtool_strings_policy[ETHTOOL_A_STRINGS_MAX + 1] = {
+ [ETHTOOL_A_STRINGS_STRING] = { .name = "string", .type = YNL_PT_NEST, .nest = &ethtool_string_nest, },
+};
+
+struct ynl_policy_nest ethtool_strings_nest = {
+ .max_attr = ETHTOOL_A_STRINGS_MAX,
+ .table = ethtool_strings_policy,
+};
+
+struct ynl_policy_attr ethtool_bitset_policy[ETHTOOL_A_BITSET_MAX + 1] = {
+ [ETHTOOL_A_BITSET_NOMASK] = { .name = "nomask", .type = YNL_PT_FLAG, },
+ [ETHTOOL_A_BITSET_SIZE] = { .name = "size", .type = YNL_PT_U32, },
+ [ETHTOOL_A_BITSET_BITS] = { .name = "bits", .type = YNL_PT_NEST, .nest = &ethtool_bitset_bits_nest, },
+};
+
+struct ynl_policy_nest ethtool_bitset_nest = {
+ .max_attr = ETHTOOL_A_BITSET_MAX,
+ .table = ethtool_bitset_policy,
+};
+
+struct ynl_policy_attr ethtool_stringset_policy[ETHTOOL_A_STRINGSET_MAX + 1] = {
+ [ETHTOOL_A_STRINGSET_ID] = { .name = "id", .type = YNL_PT_U32, },
+ [ETHTOOL_A_STRINGSET_COUNT] = { .name = "count", .type = YNL_PT_U32, },
+ [ETHTOOL_A_STRINGSET_STRINGS] = { .name = "strings", .type = YNL_PT_NEST, .nest = &ethtool_strings_nest, },
+};
+
+struct ynl_policy_nest ethtool_stringset_nest = {
+ .max_attr = ETHTOOL_A_STRINGSET_MAX,
+ .table = ethtool_stringset_policy,
+};
+
+struct ynl_policy_attr ethtool_tunnel_udp_table_policy[ETHTOOL_A_TUNNEL_UDP_TABLE_MAX + 1] = {
+ [ETHTOOL_A_TUNNEL_UDP_TABLE_SIZE] = { .name = "size", .type = YNL_PT_U32, },
+ [ETHTOOL_A_TUNNEL_UDP_TABLE_TYPES] = { .name = "types", .type = YNL_PT_NEST, .nest = &ethtool_bitset_nest, },
+ [ETHTOOL_A_TUNNEL_UDP_TABLE_ENTRY] = { .name = "entry", .type = YNL_PT_NEST, .nest = &ethtool_tunnel_udp_entry_nest, },
+};
+
+struct ynl_policy_nest ethtool_tunnel_udp_table_nest = {
+ .max_attr = ETHTOOL_A_TUNNEL_UDP_TABLE_MAX,
+ .table = ethtool_tunnel_udp_table_policy,
+};
+
+struct ynl_policy_attr ethtool_stringsets_policy[ETHTOOL_A_STRINGSETS_MAX + 1] = {
+ [ETHTOOL_A_STRINGSETS_STRINGSET] = { .name = "stringset", .type = YNL_PT_NEST, .nest = &ethtool_stringset_nest, },
+};
+
+struct ynl_policy_nest ethtool_stringsets_nest = {
+ .max_attr = ETHTOOL_A_STRINGSETS_MAX,
+ .table = ethtool_stringsets_policy,
+};
+
+struct ynl_policy_attr ethtool_tunnel_udp_policy[ETHTOOL_A_TUNNEL_UDP_MAX + 1] = {
+ [ETHTOOL_A_TUNNEL_UDP_TABLE] = { .name = "table", .type = YNL_PT_NEST, .nest = &ethtool_tunnel_udp_table_nest, },
+};
+
+struct ynl_policy_nest ethtool_tunnel_udp_nest = {
+ .max_attr = ETHTOOL_A_TUNNEL_UDP_MAX,
+ .table = ethtool_tunnel_udp_policy,
+};
+
+struct ynl_policy_attr ethtool_strset_policy[ETHTOOL_A_STRSET_MAX + 1] = {
+ [ETHTOOL_A_STRSET_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = &ethtool_header_nest, },
+ [ETHTOOL_A_STRSET_STRINGSETS] = { .name = "stringsets", .type = YNL_PT_NEST, .nest = &ethtool_stringsets_nest, },
+ [ETHTOOL_A_STRSET_COUNTS_ONLY] = { .name = "counts-only", .type = YNL_PT_FLAG, },
+};
+
+struct ynl_policy_nest ethtool_strset_nest = {
+ .max_attr = ETHTOOL_A_STRSET_MAX,
+ .table = ethtool_strset_policy,
+};
+
+struct ynl_policy_attr ethtool_linkinfo_policy[ETHTOOL_A_LINKINFO_MAX + 1] = {
+ [ETHTOOL_A_LINKINFO_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = &ethtool_header_nest, },
+ [ETHTOOL_A_LINKINFO_PORT] = { .name = "port", .type = YNL_PT_U8, },
+ [ETHTOOL_A_LINKINFO_PHYADDR] = { .name = "phyaddr", .type = YNL_PT_U8, },
+ [ETHTOOL_A_LINKINFO_TP_MDIX] = { .name = "tp-mdix", .type = YNL_PT_U8, },
+ [ETHTOOL_A_LINKINFO_TP_MDIX_CTRL] = { .name = "tp-mdix-ctrl", .type = YNL_PT_U8, },
+ [ETHTOOL_A_LINKINFO_TRANSCEIVER] = { .name = "transceiver", .type = YNL_PT_U8, },
+};
+
+struct ynl_policy_nest ethtool_linkinfo_nest = {
+ .max_attr = ETHTOOL_A_LINKINFO_MAX,
+ .table = ethtool_linkinfo_policy,
+};
+
+struct ynl_policy_attr ethtool_linkmodes_policy[ETHTOOL_A_LINKMODES_MAX + 1] = {
+ [ETHTOOL_A_LINKMODES_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = &ethtool_header_nest, },
+ [ETHTOOL_A_LINKMODES_AUTONEG] = { .name = "autoneg", .type = YNL_PT_U8, },
+ [ETHTOOL_A_LINKMODES_OURS] = { .name = "ours", .type = YNL_PT_NEST, .nest = &ethtool_bitset_nest, },
+ [ETHTOOL_A_LINKMODES_PEER] = { .name = "peer", .type = YNL_PT_NEST, .nest = &ethtool_bitset_nest, },
+ [ETHTOOL_A_LINKMODES_SPEED] = { .name = "speed", .type = YNL_PT_U32, },
+ [ETHTOOL_A_LINKMODES_DUPLEX] = { .name = "duplex", .type = YNL_PT_U8, },
+ [ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG] = { .name = "master-slave-cfg", .type = YNL_PT_U8, },
+ [ETHTOOL_A_LINKMODES_MASTER_SLAVE_STATE] = { .name = "master-slave-state", .type = YNL_PT_U8, },
+ [ETHTOOL_A_LINKMODES_LANES] = { .name = "lanes", .type = YNL_PT_U32, },
+ [ETHTOOL_A_LINKMODES_RATE_MATCHING] = { .name = "rate-matching", .type = YNL_PT_U8, },
+};
+
+struct ynl_policy_nest ethtool_linkmodes_nest = {
+ .max_attr = ETHTOOL_A_LINKMODES_MAX,
+ .table = ethtool_linkmodes_policy,
+};
+
+struct ynl_policy_attr ethtool_linkstate_policy[ETHTOOL_A_LINKSTATE_MAX + 1] = {
+ [ETHTOOL_A_LINKSTATE_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = &ethtool_header_nest, },
+ [ETHTOOL_A_LINKSTATE_LINK] = { .name = "link", .type = YNL_PT_U8, },
+ [ETHTOOL_A_LINKSTATE_SQI] = { .name = "sqi", .type = YNL_PT_U32, },
+ [ETHTOOL_A_LINKSTATE_SQI_MAX] = { .name = "sqi-max", .type = YNL_PT_U32, },
+ [ETHTOOL_A_LINKSTATE_EXT_STATE] = { .name = "ext-state", .type = YNL_PT_U8, },
+ [ETHTOOL_A_LINKSTATE_EXT_SUBSTATE] = { .name = "ext-substate", .type = YNL_PT_U8, },
+ [ETHTOOL_A_LINKSTATE_EXT_DOWN_CNT] = { .name = "ext-down-cnt", .type = YNL_PT_U32, },
+};
+
+struct ynl_policy_nest ethtool_linkstate_nest = {
+ .max_attr = ETHTOOL_A_LINKSTATE_MAX,
+ .table = ethtool_linkstate_policy,
+};
+
+struct ynl_policy_attr ethtool_debug_policy[ETHTOOL_A_DEBUG_MAX + 1] = {
+ [ETHTOOL_A_DEBUG_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = &ethtool_header_nest, },
+ [ETHTOOL_A_DEBUG_MSGMASK] = { .name = "msgmask", .type = YNL_PT_NEST, .nest = &ethtool_bitset_nest, },
+};
+
+struct ynl_policy_nest ethtool_debug_nest = {
+ .max_attr = ETHTOOL_A_DEBUG_MAX,
+ .table = ethtool_debug_policy,
+};
+
+struct ynl_policy_attr ethtool_wol_policy[ETHTOOL_A_WOL_MAX + 1] = {
+ [ETHTOOL_A_WOL_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = &ethtool_header_nest, },
+ [ETHTOOL_A_WOL_MODES] = { .name = "modes", .type = YNL_PT_NEST, .nest = &ethtool_bitset_nest, },
+ [ETHTOOL_A_WOL_SOPASS] = { .name = "sopass", .type = YNL_PT_BINARY,},
+};
+
+struct ynl_policy_nest ethtool_wol_nest = {
+ .max_attr = ETHTOOL_A_WOL_MAX,
+ .table = ethtool_wol_policy,
+};
+
+struct ynl_policy_attr ethtool_features_policy[ETHTOOL_A_FEATURES_MAX + 1] = {
+ [ETHTOOL_A_FEATURES_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = &ethtool_header_nest, },
+ [ETHTOOL_A_FEATURES_HW] = { .name = "hw", .type = YNL_PT_NEST, .nest = &ethtool_bitset_nest, },
+ [ETHTOOL_A_FEATURES_WANTED] = { .name = "wanted", .type = YNL_PT_NEST, .nest = &ethtool_bitset_nest, },
+ [ETHTOOL_A_FEATURES_ACTIVE] = { .name = "active", .type = YNL_PT_NEST, .nest = &ethtool_bitset_nest, },
+ [ETHTOOL_A_FEATURES_NOCHANGE] = { .name = "nochange", .type = YNL_PT_NEST, .nest = &ethtool_bitset_nest, },
+};
+
+struct ynl_policy_nest ethtool_features_nest = {
+ .max_attr = ETHTOOL_A_FEATURES_MAX,
+ .table = ethtool_features_policy,
+};
+
+struct ynl_policy_attr ethtool_privflags_policy[ETHTOOL_A_PRIVFLAGS_MAX + 1] = {
+ [ETHTOOL_A_PRIVFLAGS_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = &ethtool_header_nest, },
+ [ETHTOOL_A_PRIVFLAGS_FLAGS] = { .name = "flags", .type = YNL_PT_NEST, .nest = &ethtool_bitset_nest, },
+};
+
+struct ynl_policy_nest ethtool_privflags_nest = {
+ .max_attr = ETHTOOL_A_PRIVFLAGS_MAX,
+ .table = ethtool_privflags_policy,
+};
+
+struct ynl_policy_attr ethtool_rings_policy[ETHTOOL_A_RINGS_MAX + 1] = {
+ [ETHTOOL_A_RINGS_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = &ethtool_header_nest, },
+ [ETHTOOL_A_RINGS_RX_MAX] = { .name = "rx-max", .type = YNL_PT_U32, },
+ [ETHTOOL_A_RINGS_RX_MINI_MAX] = { .name = "rx-mini-max", .type = YNL_PT_U32, },
+ [ETHTOOL_A_RINGS_RX_JUMBO_MAX] = { .name = "rx-jumbo-max", .type = YNL_PT_U32, },
+ [ETHTOOL_A_RINGS_TX_MAX] = { .name = "tx-max", .type = YNL_PT_U32, },
+ [ETHTOOL_A_RINGS_RX] = { .name = "rx", .type = YNL_PT_U32, },
+ [ETHTOOL_A_RINGS_RX_MINI] = { .name = "rx-mini", .type = YNL_PT_U32, },
+ [ETHTOOL_A_RINGS_RX_JUMBO] = { .name = "rx-jumbo", .type = YNL_PT_U32, },
+ [ETHTOOL_A_RINGS_TX] = { .name = "tx", .type = YNL_PT_U32, },
+ [ETHTOOL_A_RINGS_RX_BUF_LEN] = { .name = "rx-buf-len", .type = YNL_PT_U32, },
+ [ETHTOOL_A_RINGS_TCP_DATA_SPLIT] = { .name = "tcp-data-split", .type = YNL_PT_U8, },
+ [ETHTOOL_A_RINGS_CQE_SIZE] = { .name = "cqe-size", .type = YNL_PT_U32, },
+ [ETHTOOL_A_RINGS_TX_PUSH] = { .name = "tx-push", .type = YNL_PT_U8, },
+ [ETHTOOL_A_RINGS_RX_PUSH] = { .name = "rx-push", .type = YNL_PT_U8, },
+ [ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN] = { .name = "tx-push-buf-len", .type = YNL_PT_U32, },
+ [ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX] = { .name = "tx-push-buf-len-max", .type = YNL_PT_U32, },
+};
+
+struct ynl_policy_nest ethtool_rings_nest = {
+ .max_attr = ETHTOOL_A_RINGS_MAX,
+ .table = ethtool_rings_policy,
+};
+
+struct ynl_policy_attr ethtool_channels_policy[ETHTOOL_A_CHANNELS_MAX + 1] = {
+ [ETHTOOL_A_CHANNELS_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = &ethtool_header_nest, },
+ [ETHTOOL_A_CHANNELS_RX_MAX] = { .name = "rx-max", .type = YNL_PT_U32, },
+ [ETHTOOL_A_CHANNELS_TX_MAX] = { .name = "tx-max", .type = YNL_PT_U32, },
+ [ETHTOOL_A_CHANNELS_OTHER_MAX] = { .name = "other-max", .type = YNL_PT_U32, },
+ [ETHTOOL_A_CHANNELS_COMBINED_MAX] = { .name = "combined-max", .type = YNL_PT_U32, },
+ [ETHTOOL_A_CHANNELS_RX_COUNT] = { .name = "rx-count", .type = YNL_PT_U32, },
+ [ETHTOOL_A_CHANNELS_TX_COUNT] = { .name = "tx-count", .type = YNL_PT_U32, },
+ [ETHTOOL_A_CHANNELS_OTHER_COUNT] = { .name = "other-count", .type = YNL_PT_U32, },
+ [ETHTOOL_A_CHANNELS_COMBINED_COUNT] = { .name = "combined-count", .type = YNL_PT_U32, },
+};
+
+struct ynl_policy_nest ethtool_channels_nest = {
+ .max_attr = ETHTOOL_A_CHANNELS_MAX,
+ .table = ethtool_channels_policy,
+};
+
+struct ynl_policy_attr ethtool_coalesce_policy[ETHTOOL_A_COALESCE_MAX + 1] = {
+ [ETHTOOL_A_COALESCE_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = &ethtool_header_nest, },
+ [ETHTOOL_A_COALESCE_RX_USECS] = { .name = "rx-usecs", .type = YNL_PT_U32, },
+ [ETHTOOL_A_COALESCE_RX_MAX_FRAMES] = { .name = "rx-max-frames", .type = YNL_PT_U32, },
+ [ETHTOOL_A_COALESCE_RX_USECS_IRQ] = { .name = "rx-usecs-irq", .type = YNL_PT_U32, },
+ [ETHTOOL_A_COALESCE_RX_MAX_FRAMES_IRQ] = { .name = "rx-max-frames-irq", .type = YNL_PT_U32, },
+ [ETHTOOL_A_COALESCE_TX_USECS] = { .name = "tx-usecs", .type = YNL_PT_U32, },
+ [ETHTOOL_A_COALESCE_TX_MAX_FRAMES] = { .name = "tx-max-frames", .type = YNL_PT_U32, },
+ [ETHTOOL_A_COALESCE_TX_USECS_IRQ] = { .name = "tx-usecs-irq", .type = YNL_PT_U32, },
+ [ETHTOOL_A_COALESCE_TX_MAX_FRAMES_IRQ] = { .name = "tx-max-frames-irq", .type = YNL_PT_U32, },
+ [ETHTOOL_A_COALESCE_STATS_BLOCK_USECS] = { .name = "stats-block-usecs", .type = YNL_PT_U32, },
+ [ETHTOOL_A_COALESCE_USE_ADAPTIVE_RX] = { .name = "use-adaptive-rx", .type = YNL_PT_U8, },
+ [ETHTOOL_A_COALESCE_USE_ADAPTIVE_TX] = { .name = "use-adaptive-tx", .type = YNL_PT_U8, },
+ [ETHTOOL_A_COALESCE_PKT_RATE_LOW] = { .name = "pkt-rate-low", .type = YNL_PT_U32, },
+ [ETHTOOL_A_COALESCE_RX_USECS_LOW] = { .name = "rx-usecs-low", .type = YNL_PT_U32, },
+ [ETHTOOL_A_COALESCE_RX_MAX_FRAMES_LOW] = { .name = "rx-max-frames-low", .type = YNL_PT_U32, },
+ [ETHTOOL_A_COALESCE_TX_USECS_LOW] = { .name = "tx-usecs-low", .type = YNL_PT_U32, },
+ [ETHTOOL_A_COALESCE_TX_MAX_FRAMES_LOW] = { .name = "tx-max-frames-low", .type = YNL_PT_U32, },
+ [ETHTOOL_A_COALESCE_PKT_RATE_HIGH] = { .name = "pkt-rate-high", .type = YNL_PT_U32, },
+ [ETHTOOL_A_COALESCE_RX_USECS_HIGH] = { .name = "rx-usecs-high", .type = YNL_PT_U32, },
+ [ETHTOOL_A_COALESCE_RX_MAX_FRAMES_HIGH] = { .name = "rx-max-frames-high", .type = YNL_PT_U32, },
+ [ETHTOOL_A_COALESCE_TX_USECS_HIGH] = { .name = "tx-usecs-high", .type = YNL_PT_U32, },
+ [ETHTOOL_A_COALESCE_TX_MAX_FRAMES_HIGH] = { .name = "tx-max-frames-high", .type = YNL_PT_U32, },
+ [ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL] = { .name = "rate-sample-interval", .type = YNL_PT_U32, },
+ [ETHTOOL_A_COALESCE_USE_CQE_MODE_TX] = { .name = "use-cqe-mode-tx", .type = YNL_PT_U8, },
+ [ETHTOOL_A_COALESCE_USE_CQE_MODE_RX] = { .name = "use-cqe-mode-rx", .type = YNL_PT_U8, },
+ [ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES] = { .name = "tx-aggr-max-bytes", .type = YNL_PT_U32, },
+ [ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES] = { .name = "tx-aggr-max-frames", .type = YNL_PT_U32, },
+ [ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS] = { .name = "tx-aggr-time-usecs", .type = YNL_PT_U32, },
+};
+
+struct ynl_policy_nest ethtool_coalesce_nest = {
+ .max_attr = ETHTOOL_A_COALESCE_MAX,
+ .table = ethtool_coalesce_policy,
+};
+
+struct ynl_policy_attr ethtool_pause_policy[ETHTOOL_A_PAUSE_MAX + 1] = {
+ [ETHTOOL_A_PAUSE_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = &ethtool_header_nest, },
+ [ETHTOOL_A_PAUSE_AUTONEG] = { .name = "autoneg", .type = YNL_PT_U8, },
+ [ETHTOOL_A_PAUSE_RX] = { .name = "rx", .type = YNL_PT_U8, },
+ [ETHTOOL_A_PAUSE_TX] = { .name = "tx", .type = YNL_PT_U8, },
+ [ETHTOOL_A_PAUSE_STATS] = { .name = "stats", .type = YNL_PT_NEST, .nest = &ethtool_pause_stat_nest, },
+ [ETHTOOL_A_PAUSE_STATS_SRC] = { .name = "stats-src", .type = YNL_PT_U32, },
+};
+
+struct ynl_policy_nest ethtool_pause_nest = {
+ .max_attr = ETHTOOL_A_PAUSE_MAX,
+ .table = ethtool_pause_policy,
+};
+
+struct ynl_policy_attr ethtool_eee_policy[ETHTOOL_A_EEE_MAX + 1] = {
+ [ETHTOOL_A_EEE_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = &ethtool_header_nest, },
+ [ETHTOOL_A_EEE_MODES_OURS] = { .name = "modes-ours", .type = YNL_PT_NEST, .nest = &ethtool_bitset_nest, },
+ [ETHTOOL_A_EEE_MODES_PEER] = { .name = "modes-peer", .type = YNL_PT_NEST, .nest = &ethtool_bitset_nest, },
+ [ETHTOOL_A_EEE_ACTIVE] = { .name = "active", .type = YNL_PT_U8, },
+ [ETHTOOL_A_EEE_ENABLED] = { .name = "enabled", .type = YNL_PT_U8, },
+ [ETHTOOL_A_EEE_TX_LPI_ENABLED] = { .name = "tx-lpi-enabled", .type = YNL_PT_U8, },
+ [ETHTOOL_A_EEE_TX_LPI_TIMER] = { .name = "tx-lpi-timer", .type = YNL_PT_U32, },
+};
+
+struct ynl_policy_nest ethtool_eee_nest = {
+ .max_attr = ETHTOOL_A_EEE_MAX,
+ .table = ethtool_eee_policy,
+};
+
+struct ynl_policy_attr ethtool_tsinfo_policy[ETHTOOL_A_TSINFO_MAX + 1] = {
+ [ETHTOOL_A_TSINFO_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = &ethtool_header_nest, },
+ [ETHTOOL_A_TSINFO_TIMESTAMPING] = { .name = "timestamping", .type = YNL_PT_NEST, .nest = &ethtool_bitset_nest, },
+ [ETHTOOL_A_TSINFO_TX_TYPES] = { .name = "tx-types", .type = YNL_PT_NEST, .nest = &ethtool_bitset_nest, },
+ [ETHTOOL_A_TSINFO_RX_FILTERS] = { .name = "rx-filters", .type = YNL_PT_NEST, .nest = &ethtool_bitset_nest, },
+ [ETHTOOL_A_TSINFO_PHC_INDEX] = { .name = "phc-index", .type = YNL_PT_U32, },
+};
+
+struct ynl_policy_nest ethtool_tsinfo_nest = {
+ .max_attr = ETHTOOL_A_TSINFO_MAX,
+ .table = ethtool_tsinfo_policy,
+};
+
+struct ynl_policy_attr ethtool_cable_test_policy[ETHTOOL_A_CABLE_TEST_MAX + 1] = {
+ [ETHTOOL_A_CABLE_TEST_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = &ethtool_header_nest, },
+};
+
+struct ynl_policy_nest ethtool_cable_test_nest = {
+ .max_attr = ETHTOOL_A_CABLE_TEST_MAX,
+ .table = ethtool_cable_test_policy,
+};
+
+struct ynl_policy_attr ethtool_cable_test_ntf_policy[ETHTOOL_A_CABLE_TEST_NTF_MAX + 1] = {
+ [ETHTOOL_A_CABLE_TEST_NTF_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = &ethtool_header_nest, },
+ [ETHTOOL_A_CABLE_TEST_NTF_STATUS] = { .name = "status", .type = YNL_PT_U8, },
+ [ETHTOOL_A_CABLE_TEST_NTF_NEST] = { .name = "nest", .type = YNL_PT_NEST, .nest = &ethtool_cable_nest_nest, },
+};
+
+struct ynl_policy_nest ethtool_cable_test_ntf_nest = {
+ .max_attr = ETHTOOL_A_CABLE_TEST_NTF_MAX,
+ .table = ethtool_cable_test_ntf_policy,
+};
+
+struct ynl_policy_attr ethtool_cable_test_tdr_policy[ETHTOOL_A_CABLE_TEST_TDR_MAX + 1] = {
+ [ETHTOOL_A_CABLE_TEST_TDR_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = &ethtool_header_nest, },
+ [ETHTOOL_A_CABLE_TEST_TDR_CFG] = { .name = "cfg", .type = YNL_PT_NEST, .nest = &ethtool_cable_test_tdr_cfg_nest, },
+};
+
+struct ynl_policy_nest ethtool_cable_test_tdr_nest = {
+ .max_attr = ETHTOOL_A_CABLE_TEST_TDR_MAX,
+ .table = ethtool_cable_test_tdr_policy,
+};
+
+struct ynl_policy_attr ethtool_cable_test_tdr_ntf_policy[ETHTOOL_A_CABLE_TEST_TDR_NTF_MAX + 1] = {
+ [ETHTOOL_A_CABLE_TEST_TDR_NTF_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = &ethtool_header_nest, },
+ [ETHTOOL_A_CABLE_TEST_TDR_NTF_STATUS] = { .name = "status", .type = YNL_PT_U8, },
+ [ETHTOOL_A_CABLE_TEST_TDR_NTF_NEST] = { .name = "nest", .type = YNL_PT_NEST, .nest = &ethtool_cable_nest_nest, },
+};
+
+struct ynl_policy_nest ethtool_cable_test_tdr_ntf_nest = {
+ .max_attr = ETHTOOL_A_CABLE_TEST_TDR_NTF_MAX,
+ .table = ethtool_cable_test_tdr_ntf_policy,
+};
+
+struct ynl_policy_attr ethtool_tunnel_info_policy[ETHTOOL_A_TUNNEL_INFO_MAX + 1] = {
+ [ETHTOOL_A_TUNNEL_INFO_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = &ethtool_header_nest, },
+ [ETHTOOL_A_TUNNEL_INFO_UDP_PORTS] = { .name = "udp-ports", .type = YNL_PT_NEST, .nest = &ethtool_tunnel_udp_nest, },
+};
+
+struct ynl_policy_nest ethtool_tunnel_info_nest = {
+ .max_attr = ETHTOOL_A_TUNNEL_INFO_MAX,
+ .table = ethtool_tunnel_info_policy,
+};
+
+struct ynl_policy_attr ethtool_fec_policy[ETHTOOL_A_FEC_MAX + 1] = {
+ [ETHTOOL_A_FEC_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = &ethtool_header_nest, },
+ [ETHTOOL_A_FEC_MODES] = { .name = "modes", .type = YNL_PT_NEST, .nest = &ethtool_bitset_nest, },
+ [ETHTOOL_A_FEC_AUTO] = { .name = "auto", .type = YNL_PT_U8, },
+ [ETHTOOL_A_FEC_ACTIVE] = { .name = "active", .type = YNL_PT_U32, },
+ [ETHTOOL_A_FEC_STATS] = { .name = "stats", .type = YNL_PT_NEST, .nest = &ethtool_fec_stat_nest, },
+};
+
+struct ynl_policy_nest ethtool_fec_nest = {
+ .max_attr = ETHTOOL_A_FEC_MAX,
+ .table = ethtool_fec_policy,
+};
+
+struct ynl_policy_attr ethtool_module_eeprom_policy[ETHTOOL_A_MODULE_EEPROM_MAX + 1] = {
+ [ETHTOOL_A_MODULE_EEPROM_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = &ethtool_header_nest, },
+ [ETHTOOL_A_MODULE_EEPROM_OFFSET] = { .name = "offset", .type = YNL_PT_U32, },
+ [ETHTOOL_A_MODULE_EEPROM_LENGTH] = { .name = "length", .type = YNL_PT_U32, },
+ [ETHTOOL_A_MODULE_EEPROM_PAGE] = { .name = "page", .type = YNL_PT_U8, },
+ [ETHTOOL_A_MODULE_EEPROM_BANK] = { .name = "bank", .type = YNL_PT_U8, },
+ [ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS] = { .name = "i2c-address", .type = YNL_PT_U8, },
+ [ETHTOOL_A_MODULE_EEPROM_DATA] = { .name = "data", .type = YNL_PT_BINARY,},
+};
+
+struct ynl_policy_nest ethtool_module_eeprom_nest = {
+ .max_attr = ETHTOOL_A_MODULE_EEPROM_MAX,
+ .table = ethtool_module_eeprom_policy,
+};
+
+struct ynl_policy_attr ethtool_phc_vclocks_policy[ETHTOOL_A_PHC_VCLOCKS_MAX + 1] = {
+ [ETHTOOL_A_PHC_VCLOCKS_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = &ethtool_header_nest, },
+ [ETHTOOL_A_PHC_VCLOCKS_NUM] = { .name = "num", .type = YNL_PT_U32, },
+ [ETHTOOL_A_PHC_VCLOCKS_INDEX] = { .name = "index", .type = YNL_PT_BINARY,},
+};
+
+struct ynl_policy_nest ethtool_phc_vclocks_nest = {
+ .max_attr = ETHTOOL_A_PHC_VCLOCKS_MAX,
+ .table = ethtool_phc_vclocks_policy,
+};
+
+struct ynl_policy_attr ethtool_module_policy[ETHTOOL_A_MODULE_MAX + 1] = {
+ [ETHTOOL_A_MODULE_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = &ethtool_header_nest, },
+ [ETHTOOL_A_MODULE_POWER_MODE_POLICY] = { .name = "power-mode-policy", .type = YNL_PT_U8, },
+ [ETHTOOL_A_MODULE_POWER_MODE] = { .name = "power-mode", .type = YNL_PT_U8, },
+};
+
+struct ynl_policy_nest ethtool_module_nest = {
+ .max_attr = ETHTOOL_A_MODULE_MAX,
+ .table = ethtool_module_policy,
+};
+
+struct ynl_policy_attr ethtool_pse_policy[ETHTOOL_A_PSE_MAX + 1] = {
+ [ETHTOOL_A_PSE_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = &ethtool_header_nest, },
+ [ETHTOOL_A_PODL_PSE_ADMIN_STATE] = { .name = "admin-state", .type = YNL_PT_U32, },
+ [ETHTOOL_A_PODL_PSE_ADMIN_CONTROL] = { .name = "admin-control", .type = YNL_PT_U32, },
+ [ETHTOOL_A_PODL_PSE_PW_D_STATUS] = { .name = "pw-d-status", .type = YNL_PT_U32, },
+};
+
+struct ynl_policy_nest ethtool_pse_nest = {
+ .max_attr = ETHTOOL_A_PSE_MAX,
+ .table = ethtool_pse_policy,
+};
+
+struct ynl_policy_attr ethtool_rss_policy[ETHTOOL_A_RSS_MAX + 1] = {
+ [ETHTOOL_A_RSS_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = &ethtool_header_nest, },
+ [ETHTOOL_A_RSS_CONTEXT] = { .name = "context", .type = YNL_PT_U32, },
+ [ETHTOOL_A_RSS_HFUNC] = { .name = "hfunc", .type = YNL_PT_U32, },
+ [ETHTOOL_A_RSS_INDIR] = { .name = "indir", .type = YNL_PT_BINARY,},
+ [ETHTOOL_A_RSS_HKEY] = { .name = "hkey", .type = YNL_PT_BINARY,},
+};
+
+struct ynl_policy_nest ethtool_rss_nest = {
+ .max_attr = ETHTOOL_A_RSS_MAX,
+ .table = ethtool_rss_policy,
+};
+
+struct ynl_policy_attr ethtool_plca_policy[ETHTOOL_A_PLCA_MAX + 1] = {
+ [ETHTOOL_A_PLCA_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = &ethtool_header_nest, },
+ [ETHTOOL_A_PLCA_VERSION] = { .name = "version", .type = YNL_PT_U16, },
+ [ETHTOOL_A_PLCA_ENABLED] = { .name = "enabled", .type = YNL_PT_U8, },
+ [ETHTOOL_A_PLCA_STATUS] = { .name = "status", .type = YNL_PT_U8, },
+ [ETHTOOL_A_PLCA_NODE_CNT] = { .name = "node-cnt", .type = YNL_PT_U32, },
+ [ETHTOOL_A_PLCA_NODE_ID] = { .name = "node-id", .type = YNL_PT_U32, },
+ [ETHTOOL_A_PLCA_TO_TMR] = { .name = "to-tmr", .type = YNL_PT_U32, },
+ [ETHTOOL_A_PLCA_BURST_CNT] = { .name = "burst-cnt", .type = YNL_PT_U32, },
+ [ETHTOOL_A_PLCA_BURST_TMR] = { .name = "burst-tmr", .type = YNL_PT_U32, },
+};
+
+struct ynl_policy_nest ethtool_plca_nest = {
+ .max_attr = ETHTOOL_A_PLCA_MAX,
+ .table = ethtool_plca_policy,
+};
+
+struct ynl_policy_attr ethtool_mm_policy[ETHTOOL_A_MM_MAX + 1] = {
+ [ETHTOOL_A_MM_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = &ethtool_header_nest, },
+ [ETHTOOL_A_MM_PMAC_ENABLED] = { .name = "pmac-enabled", .type = YNL_PT_U8, },
+ [ETHTOOL_A_MM_TX_ENABLED] = { .name = "tx-enabled", .type = YNL_PT_U8, },
+ [ETHTOOL_A_MM_TX_ACTIVE] = { .name = "tx-active", .type = YNL_PT_U8, },
+ [ETHTOOL_A_MM_TX_MIN_FRAG_SIZE] = { .name = "tx-min-frag-size", .type = YNL_PT_U32, },
+ [ETHTOOL_A_MM_RX_MIN_FRAG_SIZE] = { .name = "rx-min-frag-size", .type = YNL_PT_U32, },
+ [ETHTOOL_A_MM_VERIFY_ENABLED] = { .name = "verify-enabled", .type = YNL_PT_U8, },
+ [ETHTOOL_A_MM_VERIFY_STATUS] = { .name = "verify-status", .type = YNL_PT_U8, },
+ [ETHTOOL_A_MM_VERIFY_TIME] = { .name = "verify-time", .type = YNL_PT_U32, },
+ [ETHTOOL_A_MM_MAX_VERIFY_TIME] = { .name = "max-verify-time", .type = YNL_PT_U32, },
+ [ETHTOOL_A_MM_STATS] = { .name = "stats", .type = YNL_PT_NEST, .nest = &ethtool_mm_stat_nest, },
+};
+
+struct ynl_policy_nest ethtool_mm_nest = {
+ .max_attr = ETHTOOL_A_MM_MAX,
+ .table = ethtool_mm_policy,
+};
+
+/* Common nested types */
+void ethtool_header_free(struct ethtool_header *obj)
+{
+ free(obj->dev_name);
+}
+
+int ethtool_header_put(struct nlmsghdr *nlh, unsigned int attr_type,
+ struct ethtool_header *obj)
+{
+ struct nlattr *nest;
+
+ nest = mnl_attr_nest_start(nlh, attr_type);
+ if (obj->_present.dev_index)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_HEADER_DEV_INDEX, obj->dev_index);
+ if (obj->_present.dev_name_len)
+ mnl_attr_put_strz(nlh, ETHTOOL_A_HEADER_DEV_NAME, obj->dev_name);
+ if (obj->_present.flags)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_HEADER_FLAGS, obj->flags);
+ mnl_attr_nest_end(nlh, nest);
+
+ return 0;
+}
+
+int ethtool_header_parse(struct ynl_parse_arg *yarg,
+ const struct nlattr *nested)
+{
+ struct ethtool_header *dst = yarg->data;
+ const struct nlattr *attr;
+
+ mnl_attr_for_each_nested(attr, nested) {
+ unsigned int type = mnl_attr_get_type(attr);
+
+ if (type == ETHTOOL_A_HEADER_DEV_INDEX) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.dev_index = 1;
+ dst->dev_index = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_HEADER_DEV_NAME) {
+ unsigned int len;
+
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+
+ len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
+ dst->_present.dev_name_len = len;
+ dst->dev_name = malloc(len + 1);
+ memcpy(dst->dev_name, mnl_attr_get_str(attr), len);
+ dst->dev_name[len] = 0;
+ } else if (type == ETHTOOL_A_HEADER_FLAGS) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.flags = 1;
+ dst->flags = mnl_attr_get_u32(attr);
+ }
+ }
+
+ return 0;
+}
+
+void ethtool_pause_stat_free(struct ethtool_pause_stat *obj)
+{
+}
+
+int ethtool_pause_stat_put(struct nlmsghdr *nlh, unsigned int attr_type,
+ struct ethtool_pause_stat *obj)
+{
+ struct nlattr *nest;
+
+ nest = mnl_attr_nest_start(nlh, attr_type);
+ if (obj->_present.tx_frames)
+ mnl_attr_put_u64(nlh, ETHTOOL_A_PAUSE_STAT_TX_FRAMES, obj->tx_frames);
+ if (obj->_present.rx_frames)
+ mnl_attr_put_u64(nlh, ETHTOOL_A_PAUSE_STAT_RX_FRAMES, obj->rx_frames);
+ mnl_attr_nest_end(nlh, nest);
+
+ return 0;
+}
+
+int ethtool_pause_stat_parse(struct ynl_parse_arg *yarg,
+ const struct nlattr *nested)
+{
+ struct ethtool_pause_stat *dst = yarg->data;
+ const struct nlattr *attr;
+
+ mnl_attr_for_each_nested(attr, nested) {
+ unsigned int type = mnl_attr_get_type(attr);
+
+ if (type == ETHTOOL_A_PAUSE_STAT_TX_FRAMES) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.tx_frames = 1;
+ dst->tx_frames = mnl_attr_get_u64(attr);
+ } else if (type == ETHTOOL_A_PAUSE_STAT_RX_FRAMES) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.rx_frames = 1;
+ dst->rx_frames = mnl_attr_get_u64(attr);
+ }
+ }
+
+ return 0;
+}
+
+void ethtool_cable_test_tdr_cfg_free(struct ethtool_cable_test_tdr_cfg *obj)
+{
+}
+
+void ethtool_fec_stat_free(struct ethtool_fec_stat *obj)
+{
+ free(obj->corrected);
+ free(obj->uncorr);
+ free(obj->corr_bits);
+}
+
+int ethtool_fec_stat_put(struct nlmsghdr *nlh, unsigned int attr_type,
+ struct ethtool_fec_stat *obj)
+{
+ struct nlattr *nest;
+
+ nest = mnl_attr_nest_start(nlh, attr_type);
+ if (obj->_present.corrected_len)
+ mnl_attr_put(nlh, ETHTOOL_A_FEC_STAT_CORRECTED, obj->_present.corrected_len, obj->corrected);
+ if (obj->_present.uncorr_len)
+ mnl_attr_put(nlh, ETHTOOL_A_FEC_STAT_UNCORR, obj->_present.uncorr_len, obj->uncorr);
+ if (obj->_present.corr_bits_len)
+ mnl_attr_put(nlh, ETHTOOL_A_FEC_STAT_CORR_BITS, obj->_present.corr_bits_len, obj->corr_bits);
+ mnl_attr_nest_end(nlh, nest);
+
+ return 0;
+}
+
+int ethtool_fec_stat_parse(struct ynl_parse_arg *yarg,
+ const struct nlattr *nested)
+{
+ struct ethtool_fec_stat *dst = yarg->data;
+ const struct nlattr *attr;
+
+ mnl_attr_for_each_nested(attr, nested) {
+ unsigned int type = mnl_attr_get_type(attr);
+
+ if (type == ETHTOOL_A_FEC_STAT_CORRECTED) {
+ unsigned int len;
+
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+
+ len = mnl_attr_get_payload_len(attr);
+ dst->_present.corrected_len = len;
+ dst->corrected = malloc(len);
+ memcpy(dst->corrected, mnl_attr_get_payload(attr), len);
+ } else if (type == ETHTOOL_A_FEC_STAT_UNCORR) {
+ unsigned int len;
+
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+
+ len = mnl_attr_get_payload_len(attr);
+ dst->_present.uncorr_len = len;
+ dst->uncorr = malloc(len);
+ memcpy(dst->uncorr, mnl_attr_get_payload(attr), len);
+ } else if (type == ETHTOOL_A_FEC_STAT_CORR_BITS) {
+ unsigned int len;
+
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+
+ len = mnl_attr_get_payload_len(attr);
+ dst->_present.corr_bits_len = len;
+ dst->corr_bits = malloc(len);
+ memcpy(dst->corr_bits, mnl_attr_get_payload(attr), len);
+ }
+ }
+
+ return 0;
+}
+
+void ethtool_mm_stat_free(struct ethtool_mm_stat *obj)
+{
+}
+
+int ethtool_mm_stat_parse(struct ynl_parse_arg *yarg,
+ const struct nlattr *nested)
+{
+ struct ethtool_mm_stat *dst = yarg->data;
+ const struct nlattr *attr;
+
+ mnl_attr_for_each_nested(attr, nested) {
+ unsigned int type = mnl_attr_get_type(attr);
+
+ if (type == ETHTOOL_A_MM_STAT_REASSEMBLY_ERRORS) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.reassembly_errors = 1;
+ dst->reassembly_errors = mnl_attr_get_u64(attr);
+ } else if (type == ETHTOOL_A_MM_STAT_SMD_ERRORS) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.smd_errors = 1;
+ dst->smd_errors = mnl_attr_get_u64(attr);
+ } else if (type == ETHTOOL_A_MM_STAT_REASSEMBLY_OK) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.reassembly_ok = 1;
+ dst->reassembly_ok = mnl_attr_get_u64(attr);
+ } else if (type == ETHTOOL_A_MM_STAT_RX_FRAG_COUNT) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.rx_frag_count = 1;
+ dst->rx_frag_count = mnl_attr_get_u64(attr);
+ } else if (type == ETHTOOL_A_MM_STAT_TX_FRAG_COUNT) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.tx_frag_count = 1;
+ dst->tx_frag_count = mnl_attr_get_u64(attr);
+ } else if (type == ETHTOOL_A_MM_STAT_HOLD_COUNT) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.hold_count = 1;
+ dst->hold_count = mnl_attr_get_u64(attr);
+ }
+ }
+
+ return 0;
+}
+
+void ethtool_cable_result_free(struct ethtool_cable_result *obj)
+{
+}
+
+int ethtool_cable_result_parse(struct ynl_parse_arg *yarg,
+ const struct nlattr *nested)
+{
+ struct ethtool_cable_result *dst = yarg->data;
+ const struct nlattr *attr;
+
+ mnl_attr_for_each_nested(attr, nested) {
+ unsigned int type = mnl_attr_get_type(attr);
+
+ if (type == ETHTOOL_A_CABLE_RESULT_PAIR) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.pair = 1;
+ dst->pair = mnl_attr_get_u8(attr);
+ } else if (type == ETHTOOL_A_CABLE_RESULT_CODE) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.code = 1;
+ dst->code = mnl_attr_get_u8(attr);
+ }
+ }
+
+ return 0;
+}
+
+void ethtool_cable_fault_length_free(struct ethtool_cable_fault_length *obj)
+{
+}
+
+int ethtool_cable_fault_length_parse(struct ynl_parse_arg *yarg,
+ const struct nlattr *nested)
+{
+ struct ethtool_cable_fault_length *dst = yarg->data;
+ const struct nlattr *attr;
+
+ mnl_attr_for_each_nested(attr, nested) {
+ unsigned int type = mnl_attr_get_type(attr);
+
+ if (type == ETHTOOL_A_CABLE_FAULT_LENGTH_PAIR) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.pair = 1;
+ dst->pair = mnl_attr_get_u8(attr);
+ } else if (type == ETHTOOL_A_CABLE_FAULT_LENGTH_CM) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.cm = 1;
+ dst->cm = mnl_attr_get_u32(attr);
+ }
+ }
+
+ return 0;
+}
+
+void ethtool_bitset_bit_free(struct ethtool_bitset_bit *obj)
+{
+ free(obj->name);
+}
+
+int ethtool_bitset_bit_put(struct nlmsghdr *nlh, unsigned int attr_type,
+ struct ethtool_bitset_bit *obj)
+{
+ struct nlattr *nest;
+
+ nest = mnl_attr_nest_start(nlh, attr_type);
+ if (obj->_present.index)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_BITSET_BIT_INDEX, obj->index);
+ if (obj->_present.name_len)
+ mnl_attr_put_strz(nlh, ETHTOOL_A_BITSET_BIT_NAME, obj->name);
+ if (obj->_present.value)
+ mnl_attr_put(nlh, ETHTOOL_A_BITSET_BIT_VALUE, 0, NULL);
+ mnl_attr_nest_end(nlh, nest);
+
+ return 0;
+}
+
+int ethtool_bitset_bit_parse(struct ynl_parse_arg *yarg,
+ const struct nlattr *nested)
+{
+ struct ethtool_bitset_bit *dst = yarg->data;
+ const struct nlattr *attr;
+
+ mnl_attr_for_each_nested(attr, nested) {
+ unsigned int type = mnl_attr_get_type(attr);
+
+ if (type == ETHTOOL_A_BITSET_BIT_INDEX) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.index = 1;
+ dst->index = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_BITSET_BIT_NAME) {
+ unsigned int len;
+
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+
+ len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
+ dst->_present.name_len = len;
+ dst->name = malloc(len + 1);
+ memcpy(dst->name, mnl_attr_get_str(attr), len);
+ dst->name[len] = 0;
+ } else if (type == ETHTOOL_A_BITSET_BIT_VALUE) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.value = 1;
+ }
+ }
+
+ return 0;
+}
+
+void ethtool_tunnel_udp_entry_free(struct ethtool_tunnel_udp_entry *obj)
+{
+}
+
+int ethtool_tunnel_udp_entry_parse(struct ynl_parse_arg *yarg,
+ const struct nlattr *nested)
+{
+ struct ethtool_tunnel_udp_entry *dst = yarg->data;
+ const struct nlattr *attr;
+
+ mnl_attr_for_each_nested(attr, nested) {
+ unsigned int type = mnl_attr_get_type(attr);
+
+ if (type == ETHTOOL_A_TUNNEL_UDP_ENTRY_PORT) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.port = 1;
+ dst->port = mnl_attr_get_u16(attr);
+ } else if (type == ETHTOOL_A_TUNNEL_UDP_ENTRY_TYPE) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.type = 1;
+ dst->type = mnl_attr_get_u32(attr);
+ }
+ }
+
+ return 0;
+}
+
+void ethtool_string_free(struct ethtool_string *obj)
+{
+ free(obj->value);
+}
+
+int ethtool_string_put(struct nlmsghdr *nlh, unsigned int attr_type,
+ struct ethtool_string *obj)
+{
+ struct nlattr *nest;
+
+ nest = mnl_attr_nest_start(nlh, attr_type);
+ if (obj->_present.index)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_STRING_INDEX, obj->index);
+ if (obj->_present.value_len)
+ mnl_attr_put_strz(nlh, ETHTOOL_A_STRING_VALUE, obj->value);
+ mnl_attr_nest_end(nlh, nest);
+
+ return 0;
+}
+
+int ethtool_string_parse(struct ynl_parse_arg *yarg,
+ const struct nlattr *nested)
+{
+ struct ethtool_string *dst = yarg->data;
+ const struct nlattr *attr;
+
+ mnl_attr_for_each_nested(attr, nested) {
+ unsigned int type = mnl_attr_get_type(attr);
+
+ if (type == ETHTOOL_A_STRING_INDEX) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.index = 1;
+ dst->index = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_STRING_VALUE) {
+ unsigned int len;
+
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+
+ len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
+ dst->_present.value_len = len;
+ dst->value = malloc(len + 1);
+ memcpy(dst->value, mnl_attr_get_str(attr), len);
+ dst->value[len] = 0;
+ }
+ }
+
+ return 0;
+}
+
+void ethtool_cable_nest_free(struct ethtool_cable_nest *obj)
+{
+ ethtool_cable_result_free(&obj->result);
+ ethtool_cable_fault_length_free(&obj->fault_length);
+}
+
+int ethtool_cable_nest_parse(struct ynl_parse_arg *yarg,
+ const struct nlattr *nested)
+{
+ struct ethtool_cable_nest *dst = yarg->data;
+ const struct nlattr *attr;
+ struct ynl_parse_arg parg;
+
+ parg.ys = yarg->ys;
+
+ mnl_attr_for_each_nested(attr, nested) {
+ unsigned int type = mnl_attr_get_type(attr);
+
+ if (type == ETHTOOL_A_CABLE_NEST_RESULT) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.result = 1;
+
+ parg.rsp_policy = &ethtool_cable_result_nest;
+ parg.data = &dst->result;
+ if (ethtool_cable_result_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ } else if (type == ETHTOOL_A_CABLE_NEST_FAULT_LENGTH) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.fault_length = 1;
+
+ parg.rsp_policy = &ethtool_cable_fault_length_nest;
+ parg.data = &dst->fault_length;
+ if (ethtool_cable_fault_length_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ }
+ }
+
+ return 0;
+}
+
+void ethtool_bitset_bits_free(struct ethtool_bitset_bits *obj)
+{
+ unsigned int i;
+
+ for (i = 0; i < obj->n_bit; i++)
+ ethtool_bitset_bit_free(&obj->bit[i]);
+ free(obj->bit);
+}
+
+int ethtool_bitset_bits_put(struct nlmsghdr *nlh, unsigned int attr_type,
+ struct ethtool_bitset_bits *obj)
+{
+ struct nlattr *nest;
+
+ nest = mnl_attr_nest_start(nlh, attr_type);
+ for (unsigned int i = 0; i < obj->n_bit; i++)
+ ethtool_bitset_bit_put(nlh, ETHTOOL_A_BITSET_BITS_BIT, &obj->bit[i]);
+ mnl_attr_nest_end(nlh, nest);
+
+ return 0;
+}
+
+int ethtool_bitset_bits_parse(struct ynl_parse_arg *yarg,
+ const struct nlattr *nested)
+{
+ struct ethtool_bitset_bits *dst = yarg->data;
+ const struct nlattr *attr;
+ struct ynl_parse_arg parg;
+ unsigned int n_bit = 0;
+ int i;
+
+ parg.ys = yarg->ys;
+
+ if (dst->bit)
+ return ynl_error_parse(yarg, "attribute already present (bitset-bits.bit)");
+
+ mnl_attr_for_each_nested(attr, nested) {
+ unsigned int type = mnl_attr_get_type(attr);
+
+ if (type == ETHTOOL_A_BITSET_BITS_BIT) {
+ n_bit++;
+ }
+ }
+
+ if (n_bit) {
+ dst->bit = calloc(n_bit, sizeof(*dst->bit));
+ dst->n_bit = n_bit;
+ i = 0;
+ parg.rsp_policy = &ethtool_bitset_bit_nest;
+ mnl_attr_for_each_nested(attr, nested) {
+ if (mnl_attr_get_type(attr) == ETHTOOL_A_BITSET_BITS_BIT) {
+ parg.data = &dst->bit[i];
+ if (ethtool_bitset_bit_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ i++;
+ }
+ }
+ }
+
+ return 0;
+}
+
+void ethtool_strings_free(struct ethtool_strings *obj)
+{
+ unsigned int i;
+
+ for (i = 0; i < obj->n_string; i++)
+ ethtool_string_free(&obj->string[i]);
+ free(obj->string);
+}
+
+int ethtool_strings_put(struct nlmsghdr *nlh, unsigned int attr_type,
+ struct ethtool_strings *obj)
+{
+ struct nlattr *nest;
+
+ nest = mnl_attr_nest_start(nlh, attr_type);
+ for (unsigned int i = 0; i < obj->n_string; i++)
+ ethtool_string_put(nlh, ETHTOOL_A_STRINGS_STRING, &obj->string[i]);
+ mnl_attr_nest_end(nlh, nest);
+
+ return 0;
+}
+
+int ethtool_strings_parse(struct ynl_parse_arg *yarg,
+ const struct nlattr *nested)
+{
+ struct ethtool_strings *dst = yarg->data;
+ const struct nlattr *attr;
+ struct ynl_parse_arg parg;
+ unsigned int n_string = 0;
+ int i;
+
+ parg.ys = yarg->ys;
+
+ if (dst->string)
+ return ynl_error_parse(yarg, "attribute already present (strings.string)");
+
+ mnl_attr_for_each_nested(attr, nested) {
+ unsigned int type = mnl_attr_get_type(attr);
+
+ if (type == ETHTOOL_A_STRINGS_STRING) {
+ n_string++;
+ }
+ }
+
+ if (n_string) {
+ dst->string = calloc(n_string, sizeof(*dst->string));
+ dst->n_string = n_string;
+ i = 0;
+ parg.rsp_policy = &ethtool_string_nest;
+ mnl_attr_for_each_nested(attr, nested) {
+ if (mnl_attr_get_type(attr) == ETHTOOL_A_STRINGS_STRING) {
+ parg.data = &dst->string[i];
+ if (ethtool_string_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ i++;
+ }
+ }
+ }
+
+ return 0;
+}
+
+void ethtool_bitset_free(struct ethtool_bitset *obj)
+{
+ ethtool_bitset_bits_free(&obj->bits);
+}
+
+int ethtool_bitset_put(struct nlmsghdr *nlh, unsigned int attr_type,
+ struct ethtool_bitset *obj)
+{
+ struct nlattr *nest;
+
+ nest = mnl_attr_nest_start(nlh, attr_type);
+ if (obj->_present.nomask)
+ mnl_attr_put(nlh, ETHTOOL_A_BITSET_NOMASK, 0, NULL);
+ if (obj->_present.size)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_BITSET_SIZE, obj->size);
+ if (obj->_present.bits)
+ ethtool_bitset_bits_put(nlh, ETHTOOL_A_BITSET_BITS, &obj->bits);
+ mnl_attr_nest_end(nlh, nest);
+
+ return 0;
+}
+
+int ethtool_bitset_parse(struct ynl_parse_arg *yarg,
+ const struct nlattr *nested)
+{
+ struct ethtool_bitset *dst = yarg->data;
+ const struct nlattr *attr;
+ struct ynl_parse_arg parg;
+
+ parg.ys = yarg->ys;
+
+ mnl_attr_for_each_nested(attr, nested) {
+ unsigned int type = mnl_attr_get_type(attr);
+
+ if (type == ETHTOOL_A_BITSET_NOMASK) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.nomask = 1;
+ } else if (type == ETHTOOL_A_BITSET_SIZE) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.size = 1;
+ dst->size = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_BITSET_BITS) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.bits = 1;
+
+ parg.rsp_policy = &ethtool_bitset_bits_nest;
+ parg.data = &dst->bits;
+ if (ethtool_bitset_bits_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ }
+ }
+
+ return 0;
+}
+
+void ethtool_stringset_free(struct ethtool_stringset_ *obj)
+{
+ unsigned int i;
+
+ for (i = 0; i < obj->n_strings; i++)
+ ethtool_strings_free(&obj->strings[i]);
+ free(obj->strings);
+}
+
+int ethtool_stringset_put(struct nlmsghdr *nlh, unsigned int attr_type,
+ struct ethtool_stringset_ *obj)
+{
+ struct nlattr *nest;
+
+ nest = mnl_attr_nest_start(nlh, attr_type);
+ if (obj->_present.id)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_STRINGSET_ID, obj->id);
+ if (obj->_present.count)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_STRINGSET_COUNT, obj->count);
+ for (unsigned int i = 0; i < obj->n_strings; i++)
+ ethtool_strings_put(nlh, ETHTOOL_A_STRINGSET_STRINGS, &obj->strings[i]);
+ mnl_attr_nest_end(nlh, nest);
+
+ return 0;
+}
+
+int ethtool_stringset_parse(struct ynl_parse_arg *yarg,
+ const struct nlattr *nested)
+{
+ struct ethtool_stringset_ *dst = yarg->data;
+ unsigned int n_strings = 0;
+ const struct nlattr *attr;
+ struct ynl_parse_arg parg;
+ int i;
+
+ parg.ys = yarg->ys;
+
+ if (dst->strings)
+ return ynl_error_parse(yarg, "attribute already present (stringset.strings)");
+
+ mnl_attr_for_each_nested(attr, nested) {
+ unsigned int type = mnl_attr_get_type(attr);
+
+ if (type == ETHTOOL_A_STRINGSET_ID) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.id = 1;
+ dst->id = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_STRINGSET_COUNT) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.count = 1;
+ dst->count = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_STRINGSET_STRINGS) {
+ n_strings++;
+ }
+ }
+
+ if (n_strings) {
+ dst->strings = calloc(n_strings, sizeof(*dst->strings));
+ dst->n_strings = n_strings;
+ i = 0;
+ parg.rsp_policy = &ethtool_strings_nest;
+ mnl_attr_for_each_nested(attr, nested) {
+ if (mnl_attr_get_type(attr) == ETHTOOL_A_STRINGSET_STRINGS) {
+ parg.data = &dst->strings[i];
+ if (ethtool_strings_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ i++;
+ }
+ }
+ }
+
+ return 0;
+}
+
+void ethtool_tunnel_udp_table_free(struct ethtool_tunnel_udp_table *obj)
+{
+ unsigned int i;
+
+ ethtool_bitset_free(&obj->types);
+ for (i = 0; i < obj->n_entry; i++)
+ ethtool_tunnel_udp_entry_free(&obj->entry[i]);
+ free(obj->entry);
+}
+
+int ethtool_tunnel_udp_table_parse(struct ynl_parse_arg *yarg,
+ const struct nlattr *nested)
+{
+ struct ethtool_tunnel_udp_table *dst = yarg->data;
+ const struct nlattr *attr;
+ struct ynl_parse_arg parg;
+ unsigned int n_entry = 0;
+ int i;
+
+ parg.ys = yarg->ys;
+
+ if (dst->entry)
+ return ynl_error_parse(yarg, "attribute already present (tunnel-udp-table.entry)");
+
+ mnl_attr_for_each_nested(attr, nested) {
+ unsigned int type = mnl_attr_get_type(attr);
+
+ if (type == ETHTOOL_A_TUNNEL_UDP_TABLE_SIZE) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.size = 1;
+ dst->size = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_TUNNEL_UDP_TABLE_TYPES) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.types = 1;
+
+ parg.rsp_policy = &ethtool_bitset_nest;
+ parg.data = &dst->types;
+ if (ethtool_bitset_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ } else if (type == ETHTOOL_A_TUNNEL_UDP_TABLE_ENTRY) {
+ n_entry++;
+ }
+ }
+
+ if (n_entry) {
+ dst->entry = calloc(n_entry, sizeof(*dst->entry));
+ dst->n_entry = n_entry;
+ i = 0;
+ parg.rsp_policy = &ethtool_tunnel_udp_entry_nest;
+ mnl_attr_for_each_nested(attr, nested) {
+ if (mnl_attr_get_type(attr) == ETHTOOL_A_TUNNEL_UDP_TABLE_ENTRY) {
+ parg.data = &dst->entry[i];
+ if (ethtool_tunnel_udp_entry_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ i++;
+ }
+ }
+ }
+
+ return 0;
+}
+
+void ethtool_stringsets_free(struct ethtool_stringsets *obj)
+{
+ unsigned int i;
+
+ for (i = 0; i < obj->n_stringset; i++)
+ ethtool_stringset_free(&obj->stringset[i]);
+ free(obj->stringset);
+}
+
+int ethtool_stringsets_put(struct nlmsghdr *nlh, unsigned int attr_type,
+ struct ethtool_stringsets *obj)
+{
+ struct nlattr *nest;
+
+ nest = mnl_attr_nest_start(nlh, attr_type);
+ for (unsigned int i = 0; i < obj->n_stringset; i++)
+ ethtool_stringset_put(nlh, ETHTOOL_A_STRINGSETS_STRINGSET, &obj->stringset[i]);
+ mnl_attr_nest_end(nlh, nest);
+
+ return 0;
+}
+
+int ethtool_stringsets_parse(struct ynl_parse_arg *yarg,
+ const struct nlattr *nested)
+{
+ struct ethtool_stringsets *dst = yarg->data;
+ unsigned int n_stringset = 0;
+ const struct nlattr *attr;
+ struct ynl_parse_arg parg;
+ int i;
+
+ parg.ys = yarg->ys;
+
+ if (dst->stringset)
+ return ynl_error_parse(yarg, "attribute already present (stringsets.stringset)");
+
+ mnl_attr_for_each_nested(attr, nested) {
+ unsigned int type = mnl_attr_get_type(attr);
+
+ if (type == ETHTOOL_A_STRINGSETS_STRINGSET) {
+ n_stringset++;
+ }
+ }
+
+ if (n_stringset) {
+ dst->stringset = calloc(n_stringset, sizeof(*dst->stringset));
+ dst->n_stringset = n_stringset;
+ i = 0;
+ parg.rsp_policy = &ethtool_stringset_nest;
+ mnl_attr_for_each_nested(attr, nested) {
+ if (mnl_attr_get_type(attr) == ETHTOOL_A_STRINGSETS_STRINGSET) {
+ parg.data = &dst->stringset[i];
+ if (ethtool_stringset_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ i++;
+ }
+ }
+ }
+
+ return 0;
+}
+
+void ethtool_tunnel_udp_free(struct ethtool_tunnel_udp *obj)
+{
+ ethtool_tunnel_udp_table_free(&obj->table);
+}
+
+int ethtool_tunnel_udp_parse(struct ynl_parse_arg *yarg,
+ const struct nlattr *nested)
+{
+ struct ethtool_tunnel_udp *dst = yarg->data;
+ const struct nlattr *attr;
+ struct ynl_parse_arg parg;
+
+ parg.ys = yarg->ys;
+
+ mnl_attr_for_each_nested(attr, nested) {
+ unsigned int type = mnl_attr_get_type(attr);
+
+ if (type == ETHTOOL_A_TUNNEL_UDP_TABLE) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.table = 1;
+
+ parg.rsp_policy = &ethtool_tunnel_udp_table_nest;
+ parg.data = &dst->table;
+ if (ethtool_tunnel_udp_table_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ }
+ }
+
+ return 0;
+}
+
+/* ============== ETHTOOL_MSG_STRSET_GET ============== */
+/* ETHTOOL_MSG_STRSET_GET - do */
+void ethtool_strset_get_req_free(struct ethtool_strset_get_req *req)
+{
+ ethtool_header_free(&req->header);
+ ethtool_stringsets_free(&req->stringsets);
+ free(req);
+}
+
+void ethtool_strset_get_rsp_free(struct ethtool_strset_get_rsp *rsp)
+{
+ ethtool_header_free(&rsp->header);
+ ethtool_stringsets_free(&rsp->stringsets);
+ free(rsp);
+}
+
+int ethtool_strset_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
+{
+ struct ethtool_strset_get_rsp *dst;
+ struct ynl_parse_arg *yarg = data;
+ const struct nlattr *attr;
+ struct ynl_parse_arg parg;
+
+ dst = yarg->data;
+ parg.ys = yarg->ys;
+
+ mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
+ unsigned int type = mnl_attr_get_type(attr);
+
+ if (type == ETHTOOL_A_STRSET_HEADER) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.header = 1;
+
+ parg.rsp_policy = &ethtool_header_nest;
+ parg.data = &dst->header;
+ if (ethtool_header_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ } else if (type == ETHTOOL_A_STRSET_STRINGSETS) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.stringsets = 1;
+
+ parg.rsp_policy = &ethtool_stringsets_nest;
+ parg.data = &dst->stringsets;
+ if (ethtool_stringsets_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ }
+ }
+
+ return MNL_CB_OK;
+}
+
+struct ethtool_strset_get_rsp *
+ethtool_strset_get(struct ynl_sock *ys, struct ethtool_strset_get_req *req)
+{
+ struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
+ struct ethtool_strset_get_rsp *rsp;
+ struct nlmsghdr *nlh;
+ int err;
+
+ nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_STRSET_GET, 1);
+ ys->req_policy = &ethtool_strset_nest;
+ yrs.yarg.rsp_policy = &ethtool_strset_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_STRSET_HEADER, &req->header);
+ if (req->_present.stringsets)
+ ethtool_stringsets_put(nlh, ETHTOOL_A_STRSET_STRINGSETS, &req->stringsets);
+ if (req->_present.counts_only)
+ mnl_attr_put(nlh, ETHTOOL_A_STRSET_COUNTS_ONLY, 0, NULL);
+
+ rsp = calloc(1, sizeof(*rsp));
+ yrs.yarg.data = rsp;
+ yrs.cb = ethtool_strset_get_rsp_parse;
+ yrs.rsp_cmd = ETHTOOL_MSG_STRSET_GET;
+
+ err = ynl_exec(ys, nlh, &yrs);
+ if (err < 0)
+ goto err_free;
+
+ return rsp;
+
+err_free:
+ ethtool_strset_get_rsp_free(rsp);
+ return NULL;
+}
+
+/* ETHTOOL_MSG_STRSET_GET - dump */
+void ethtool_strset_get_list_free(struct ethtool_strset_get_list *rsp)
+{
+ struct ethtool_strset_get_list *next = rsp;
+
+ while ((void *)next != YNL_LIST_END) {
+ rsp = next;
+ next = rsp->next;
+
+ ethtool_header_free(&rsp->obj.header);
+ ethtool_stringsets_free(&rsp->obj.stringsets);
+ free(rsp);
+ }
+}
+
+struct ethtool_strset_get_list *
+ethtool_strset_get_dump(struct ynl_sock *ys,
+ struct ethtool_strset_get_req_dump *req)
+{
+ struct ynl_dump_state yds = {};
+ struct nlmsghdr *nlh;
+ int err;
+
+ yds.ys = ys;
+ yds.alloc_sz = sizeof(struct ethtool_strset_get_list);
+ yds.cb = ethtool_strset_get_rsp_parse;
+ yds.rsp_cmd = ETHTOOL_MSG_STRSET_GET;
+ yds.rsp_policy = &ethtool_strset_nest;
+
+ nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_STRSET_GET, 1);
+ ys->req_policy = &ethtool_strset_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_STRSET_HEADER, &req->header);
+ if (req->_present.stringsets)
+ ethtool_stringsets_put(nlh, ETHTOOL_A_STRSET_STRINGSETS, &req->stringsets);
+ if (req->_present.counts_only)
+ mnl_attr_put(nlh, ETHTOOL_A_STRSET_COUNTS_ONLY, 0, NULL);
+
+ err = ynl_exec_dump(ys, nlh, &yds);
+ if (err < 0)
+ goto free_list;
+
+ return yds.first;
+
+free_list:
+ ethtool_strset_get_list_free(yds.first);
+ return NULL;
+}
+
+/* ============== ETHTOOL_MSG_LINKINFO_GET ============== */
+/* ETHTOOL_MSG_LINKINFO_GET - do */
+void ethtool_linkinfo_get_req_free(struct ethtool_linkinfo_get_req *req)
+{
+ ethtool_header_free(&req->header);
+ free(req);
+}
+
+void ethtool_linkinfo_get_rsp_free(struct ethtool_linkinfo_get_rsp *rsp)
+{
+ ethtool_header_free(&rsp->header);
+ free(rsp);
+}
+
+int ethtool_linkinfo_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
+{
+ struct ethtool_linkinfo_get_rsp *dst;
+ struct ynl_parse_arg *yarg = data;
+ const struct nlattr *attr;
+ struct ynl_parse_arg parg;
+
+ dst = yarg->data;
+ parg.ys = yarg->ys;
+
+ mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
+ unsigned int type = mnl_attr_get_type(attr);
+
+ if (type == ETHTOOL_A_LINKINFO_HEADER) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.header = 1;
+
+ parg.rsp_policy = &ethtool_header_nest;
+ parg.data = &dst->header;
+ if (ethtool_header_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ } else if (type == ETHTOOL_A_LINKINFO_PORT) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.port = 1;
+ dst->port = mnl_attr_get_u8(attr);
+ } else if (type == ETHTOOL_A_LINKINFO_PHYADDR) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.phyaddr = 1;
+ dst->phyaddr = mnl_attr_get_u8(attr);
+ } else if (type == ETHTOOL_A_LINKINFO_TP_MDIX) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.tp_mdix = 1;
+ dst->tp_mdix = mnl_attr_get_u8(attr);
+ } else if (type == ETHTOOL_A_LINKINFO_TP_MDIX_CTRL) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.tp_mdix_ctrl = 1;
+ dst->tp_mdix_ctrl = mnl_attr_get_u8(attr);
+ } else if (type == ETHTOOL_A_LINKINFO_TRANSCEIVER) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.transceiver = 1;
+ dst->transceiver = mnl_attr_get_u8(attr);
+ }
+ }
+
+ return MNL_CB_OK;
+}
+
+struct ethtool_linkinfo_get_rsp *
+ethtool_linkinfo_get(struct ynl_sock *ys, struct ethtool_linkinfo_get_req *req)
+{
+ struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
+ struct ethtool_linkinfo_get_rsp *rsp;
+ struct nlmsghdr *nlh;
+ int err;
+
+ nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_LINKINFO_GET, 1);
+ ys->req_policy = &ethtool_linkinfo_nest;
+ yrs.yarg.rsp_policy = &ethtool_linkinfo_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_LINKINFO_HEADER, &req->header);
+
+ rsp = calloc(1, sizeof(*rsp));
+ yrs.yarg.data = rsp;
+ yrs.cb = ethtool_linkinfo_get_rsp_parse;
+ yrs.rsp_cmd = ETHTOOL_MSG_LINKINFO_GET;
+
+ err = ynl_exec(ys, nlh, &yrs);
+ if (err < 0)
+ goto err_free;
+
+ return rsp;
+
+err_free:
+ ethtool_linkinfo_get_rsp_free(rsp);
+ return NULL;
+}
+
+/* ETHTOOL_MSG_LINKINFO_GET - dump */
+void ethtool_linkinfo_get_list_free(struct ethtool_linkinfo_get_list *rsp)
+{
+ struct ethtool_linkinfo_get_list *next = rsp;
+
+ while ((void *)next != YNL_LIST_END) {
+ rsp = next;
+ next = rsp->next;
+
+ ethtool_header_free(&rsp->obj.header);
+ free(rsp);
+ }
+}
+
+struct ethtool_linkinfo_get_list *
+ethtool_linkinfo_get_dump(struct ynl_sock *ys,
+ struct ethtool_linkinfo_get_req_dump *req)
+{
+ struct ynl_dump_state yds = {};
+ struct nlmsghdr *nlh;
+ int err;
+
+ yds.ys = ys;
+ yds.alloc_sz = sizeof(struct ethtool_linkinfo_get_list);
+ yds.cb = ethtool_linkinfo_get_rsp_parse;
+ yds.rsp_cmd = ETHTOOL_MSG_LINKINFO_GET;
+ yds.rsp_policy = &ethtool_linkinfo_nest;
+
+ nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_LINKINFO_GET, 1);
+ ys->req_policy = &ethtool_linkinfo_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_LINKINFO_HEADER, &req->header);
+
+ err = ynl_exec_dump(ys, nlh, &yds);
+ if (err < 0)
+ goto free_list;
+
+ return yds.first;
+
+free_list:
+ ethtool_linkinfo_get_list_free(yds.first);
+ return NULL;
+}
+
+/* ETHTOOL_MSG_LINKINFO_GET - notify */
+void ethtool_linkinfo_get_ntf_free(struct ethtool_linkinfo_get_ntf *rsp)
+{
+ ethtool_header_free(&rsp->obj.header);
+ free(rsp);
+}
+
+/* ============== ETHTOOL_MSG_LINKINFO_SET ============== */
+/* ETHTOOL_MSG_LINKINFO_SET - do */
+void ethtool_linkinfo_set_req_free(struct ethtool_linkinfo_set_req *req)
+{
+ ethtool_header_free(&req->header);
+ free(req);
+}
+
+int ethtool_linkinfo_set(struct ynl_sock *ys,
+ struct ethtool_linkinfo_set_req *req)
+{
+ struct nlmsghdr *nlh;
+ int err;
+
+ nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_LINKINFO_SET, 1);
+ ys->req_policy = &ethtool_linkinfo_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_LINKINFO_HEADER, &req->header);
+ if (req->_present.port)
+ mnl_attr_put_u8(nlh, ETHTOOL_A_LINKINFO_PORT, req->port);
+ if (req->_present.phyaddr)
+ mnl_attr_put_u8(nlh, ETHTOOL_A_LINKINFO_PHYADDR, req->phyaddr);
+ if (req->_present.tp_mdix)
+ mnl_attr_put_u8(nlh, ETHTOOL_A_LINKINFO_TP_MDIX, req->tp_mdix);
+ if (req->_present.tp_mdix_ctrl)
+ mnl_attr_put_u8(nlh, ETHTOOL_A_LINKINFO_TP_MDIX_CTRL, req->tp_mdix_ctrl);
+ if (req->_present.transceiver)
+ mnl_attr_put_u8(nlh, ETHTOOL_A_LINKINFO_TRANSCEIVER, req->transceiver);
+
+ err = ynl_exec(ys, nlh, NULL);
+ if (err < 0)
+ return -1;
+
+ return 0;
+}
+
+/* ============== ETHTOOL_MSG_LINKMODES_GET ============== */
+/* ETHTOOL_MSG_LINKMODES_GET - do */
+void ethtool_linkmodes_get_req_free(struct ethtool_linkmodes_get_req *req)
+{
+ ethtool_header_free(&req->header);
+ free(req);
+}
+
+void ethtool_linkmodes_get_rsp_free(struct ethtool_linkmodes_get_rsp *rsp)
+{
+ ethtool_header_free(&rsp->header);
+ ethtool_bitset_free(&rsp->ours);
+ ethtool_bitset_free(&rsp->peer);
+ free(rsp);
+}
+
+int ethtool_linkmodes_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
+{
+ struct ethtool_linkmodes_get_rsp *dst;
+ struct ynl_parse_arg *yarg = data;
+ const struct nlattr *attr;
+ struct ynl_parse_arg parg;
+
+ dst = yarg->data;
+ parg.ys = yarg->ys;
+
+ mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
+ unsigned int type = mnl_attr_get_type(attr);
+
+ if (type == ETHTOOL_A_LINKMODES_HEADER) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.header = 1;
+
+ parg.rsp_policy = &ethtool_header_nest;
+ parg.data = &dst->header;
+ if (ethtool_header_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ } else if (type == ETHTOOL_A_LINKMODES_AUTONEG) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.autoneg = 1;
+ dst->autoneg = mnl_attr_get_u8(attr);
+ } else if (type == ETHTOOL_A_LINKMODES_OURS) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.ours = 1;
+
+ parg.rsp_policy = &ethtool_bitset_nest;
+ parg.data = &dst->ours;
+ if (ethtool_bitset_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ } else if (type == ETHTOOL_A_LINKMODES_PEER) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.peer = 1;
+
+ parg.rsp_policy = &ethtool_bitset_nest;
+ parg.data = &dst->peer;
+ if (ethtool_bitset_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ } else if (type == ETHTOOL_A_LINKMODES_SPEED) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.speed = 1;
+ dst->speed = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_LINKMODES_DUPLEX) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.duplex = 1;
+ dst->duplex = mnl_attr_get_u8(attr);
+ } else if (type == ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.master_slave_cfg = 1;
+ dst->master_slave_cfg = mnl_attr_get_u8(attr);
+ } else if (type == ETHTOOL_A_LINKMODES_MASTER_SLAVE_STATE) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.master_slave_state = 1;
+ dst->master_slave_state = mnl_attr_get_u8(attr);
+ } else if (type == ETHTOOL_A_LINKMODES_LANES) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.lanes = 1;
+ dst->lanes = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_LINKMODES_RATE_MATCHING) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.rate_matching = 1;
+ dst->rate_matching = mnl_attr_get_u8(attr);
+ }
+ }
+
+ return MNL_CB_OK;
+}
+
+struct ethtool_linkmodes_get_rsp *
+ethtool_linkmodes_get(struct ynl_sock *ys,
+ struct ethtool_linkmodes_get_req *req)
+{
+ struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
+ struct ethtool_linkmodes_get_rsp *rsp;
+ struct nlmsghdr *nlh;
+ int err;
+
+ nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_LINKMODES_GET, 1);
+ ys->req_policy = &ethtool_linkmodes_nest;
+ yrs.yarg.rsp_policy = &ethtool_linkmodes_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_LINKMODES_HEADER, &req->header);
+
+ rsp = calloc(1, sizeof(*rsp));
+ yrs.yarg.data = rsp;
+ yrs.cb = ethtool_linkmodes_get_rsp_parse;
+ yrs.rsp_cmd = ETHTOOL_MSG_LINKMODES_GET;
+
+ err = ynl_exec(ys, nlh, &yrs);
+ if (err < 0)
+ goto err_free;
+
+ return rsp;
+
+err_free:
+ ethtool_linkmodes_get_rsp_free(rsp);
+ return NULL;
+}
+
+/* ETHTOOL_MSG_LINKMODES_GET - dump */
+void ethtool_linkmodes_get_list_free(struct ethtool_linkmodes_get_list *rsp)
+{
+ struct ethtool_linkmodes_get_list *next = rsp;
+
+ while ((void *)next != YNL_LIST_END) {
+ rsp = next;
+ next = rsp->next;
+
+ ethtool_header_free(&rsp->obj.header);
+ ethtool_bitset_free(&rsp->obj.ours);
+ ethtool_bitset_free(&rsp->obj.peer);
+ free(rsp);
+ }
+}
+
+struct ethtool_linkmodes_get_list *
+ethtool_linkmodes_get_dump(struct ynl_sock *ys,
+ struct ethtool_linkmodes_get_req_dump *req)
+{
+ struct ynl_dump_state yds = {};
+ struct nlmsghdr *nlh;
+ int err;
+
+ yds.ys = ys;
+ yds.alloc_sz = sizeof(struct ethtool_linkmodes_get_list);
+ yds.cb = ethtool_linkmodes_get_rsp_parse;
+ yds.rsp_cmd = ETHTOOL_MSG_LINKMODES_GET;
+ yds.rsp_policy = &ethtool_linkmodes_nest;
+
+ nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_LINKMODES_GET, 1);
+ ys->req_policy = &ethtool_linkmodes_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_LINKMODES_HEADER, &req->header);
+
+ err = ynl_exec_dump(ys, nlh, &yds);
+ if (err < 0)
+ goto free_list;
+
+ return yds.first;
+
+free_list:
+ ethtool_linkmodes_get_list_free(yds.first);
+ return NULL;
+}
+
+/* ETHTOOL_MSG_LINKMODES_GET - notify */
+void ethtool_linkmodes_get_ntf_free(struct ethtool_linkmodes_get_ntf *rsp)
+{
+ ethtool_header_free(&rsp->obj.header);
+ ethtool_bitset_free(&rsp->obj.ours);
+ ethtool_bitset_free(&rsp->obj.peer);
+ free(rsp);
+}
+
+/* ============== ETHTOOL_MSG_LINKMODES_SET ============== */
+/* ETHTOOL_MSG_LINKMODES_SET - do */
+void ethtool_linkmodes_set_req_free(struct ethtool_linkmodes_set_req *req)
+{
+ ethtool_header_free(&req->header);
+ ethtool_bitset_free(&req->ours);
+ ethtool_bitset_free(&req->peer);
+ free(req);
+}
+
+int ethtool_linkmodes_set(struct ynl_sock *ys,
+ struct ethtool_linkmodes_set_req *req)
+{
+ struct nlmsghdr *nlh;
+ int err;
+
+ nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_LINKMODES_SET, 1);
+ ys->req_policy = &ethtool_linkmodes_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_LINKMODES_HEADER, &req->header);
+ if (req->_present.autoneg)
+ mnl_attr_put_u8(nlh, ETHTOOL_A_LINKMODES_AUTONEG, req->autoneg);
+ if (req->_present.ours)
+ ethtool_bitset_put(nlh, ETHTOOL_A_LINKMODES_OURS, &req->ours);
+ if (req->_present.peer)
+ ethtool_bitset_put(nlh, ETHTOOL_A_LINKMODES_PEER, &req->peer);
+ if (req->_present.speed)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_LINKMODES_SPEED, req->speed);
+ if (req->_present.duplex)
+ mnl_attr_put_u8(nlh, ETHTOOL_A_LINKMODES_DUPLEX, req->duplex);
+ if (req->_present.master_slave_cfg)
+ mnl_attr_put_u8(nlh, ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG, req->master_slave_cfg);
+ if (req->_present.master_slave_state)
+ mnl_attr_put_u8(nlh, ETHTOOL_A_LINKMODES_MASTER_SLAVE_STATE, req->master_slave_state);
+ if (req->_present.lanes)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_LINKMODES_LANES, req->lanes);
+ if (req->_present.rate_matching)
+ mnl_attr_put_u8(nlh, ETHTOOL_A_LINKMODES_RATE_MATCHING, req->rate_matching);
+
+ err = ynl_exec(ys, nlh, NULL);
+ if (err < 0)
+ return -1;
+
+ return 0;
+}
+
+/* ============== ETHTOOL_MSG_LINKSTATE_GET ============== */
+/* ETHTOOL_MSG_LINKSTATE_GET - do */
+void ethtool_linkstate_get_req_free(struct ethtool_linkstate_get_req *req)
+{
+ ethtool_header_free(&req->header);
+ free(req);
+}
+
+void ethtool_linkstate_get_rsp_free(struct ethtool_linkstate_get_rsp *rsp)
+{
+ ethtool_header_free(&rsp->header);
+ free(rsp);
+}
+
+int ethtool_linkstate_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
+{
+ struct ethtool_linkstate_get_rsp *dst;
+ struct ynl_parse_arg *yarg = data;
+ const struct nlattr *attr;
+ struct ynl_parse_arg parg;
+
+ dst = yarg->data;
+ parg.ys = yarg->ys;
+
+ mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
+ unsigned int type = mnl_attr_get_type(attr);
+
+ if (type == ETHTOOL_A_LINKSTATE_HEADER) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.header = 1;
+
+ parg.rsp_policy = &ethtool_header_nest;
+ parg.data = &dst->header;
+ if (ethtool_header_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ } else if (type == ETHTOOL_A_LINKSTATE_LINK) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.link = 1;
+ dst->link = mnl_attr_get_u8(attr);
+ } else if (type == ETHTOOL_A_LINKSTATE_SQI) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.sqi = 1;
+ dst->sqi = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_LINKSTATE_SQI_MAX) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.sqi_max = 1;
+ dst->sqi_max = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_LINKSTATE_EXT_STATE) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.ext_state = 1;
+ dst->ext_state = mnl_attr_get_u8(attr);
+ } else if (type == ETHTOOL_A_LINKSTATE_EXT_SUBSTATE) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.ext_substate = 1;
+ dst->ext_substate = mnl_attr_get_u8(attr);
+ } else if (type == ETHTOOL_A_LINKSTATE_EXT_DOWN_CNT) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.ext_down_cnt = 1;
+ dst->ext_down_cnt = mnl_attr_get_u32(attr);
+ }
+ }
+
+ return MNL_CB_OK;
+}
+
+struct ethtool_linkstate_get_rsp *
+ethtool_linkstate_get(struct ynl_sock *ys,
+ struct ethtool_linkstate_get_req *req)
+{
+ struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
+ struct ethtool_linkstate_get_rsp *rsp;
+ struct nlmsghdr *nlh;
+ int err;
+
+ nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_LINKSTATE_GET, 1);
+ ys->req_policy = &ethtool_linkstate_nest;
+ yrs.yarg.rsp_policy = &ethtool_linkstate_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_LINKSTATE_HEADER, &req->header);
+
+ rsp = calloc(1, sizeof(*rsp));
+ yrs.yarg.data = rsp;
+ yrs.cb = ethtool_linkstate_get_rsp_parse;
+ yrs.rsp_cmd = ETHTOOL_MSG_LINKSTATE_GET;
+
+ err = ynl_exec(ys, nlh, &yrs);
+ if (err < 0)
+ goto err_free;
+
+ return rsp;
+
+err_free:
+ ethtool_linkstate_get_rsp_free(rsp);
+ return NULL;
+}
+
+/* ETHTOOL_MSG_LINKSTATE_GET - dump */
+void ethtool_linkstate_get_list_free(struct ethtool_linkstate_get_list *rsp)
+{
+ struct ethtool_linkstate_get_list *next = rsp;
+
+ while ((void *)next != YNL_LIST_END) {
+ rsp = next;
+ next = rsp->next;
+
+ ethtool_header_free(&rsp->obj.header);
+ free(rsp);
+ }
+}
+
+struct ethtool_linkstate_get_list *
+ethtool_linkstate_get_dump(struct ynl_sock *ys,
+ struct ethtool_linkstate_get_req_dump *req)
+{
+ struct ynl_dump_state yds = {};
+ struct nlmsghdr *nlh;
+ int err;
+
+ yds.ys = ys;
+ yds.alloc_sz = sizeof(struct ethtool_linkstate_get_list);
+ yds.cb = ethtool_linkstate_get_rsp_parse;
+ yds.rsp_cmd = ETHTOOL_MSG_LINKSTATE_GET;
+ yds.rsp_policy = &ethtool_linkstate_nest;
+
+ nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_LINKSTATE_GET, 1);
+ ys->req_policy = &ethtool_linkstate_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_LINKSTATE_HEADER, &req->header);
+
+ err = ynl_exec_dump(ys, nlh, &yds);
+ if (err < 0)
+ goto free_list;
+
+ return yds.first;
+
+free_list:
+ ethtool_linkstate_get_list_free(yds.first);
+ return NULL;
+}
+
+/* ============== ETHTOOL_MSG_DEBUG_GET ============== */
+/* ETHTOOL_MSG_DEBUG_GET - do */
+void ethtool_debug_get_req_free(struct ethtool_debug_get_req *req)
+{
+ ethtool_header_free(&req->header);
+ free(req);
+}
+
+void ethtool_debug_get_rsp_free(struct ethtool_debug_get_rsp *rsp)
+{
+ ethtool_header_free(&rsp->header);
+ ethtool_bitset_free(&rsp->msgmask);
+ free(rsp);
+}
+
+int ethtool_debug_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
+{
+ struct ethtool_debug_get_rsp *dst;
+ struct ynl_parse_arg *yarg = data;
+ const struct nlattr *attr;
+ struct ynl_parse_arg parg;
+
+ dst = yarg->data;
+ parg.ys = yarg->ys;
+
+ mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
+ unsigned int type = mnl_attr_get_type(attr);
+
+ if (type == ETHTOOL_A_DEBUG_HEADER) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.header = 1;
+
+ parg.rsp_policy = &ethtool_header_nest;
+ parg.data = &dst->header;
+ if (ethtool_header_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ } else if (type == ETHTOOL_A_DEBUG_MSGMASK) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.msgmask = 1;
+
+ parg.rsp_policy = &ethtool_bitset_nest;
+ parg.data = &dst->msgmask;
+ if (ethtool_bitset_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ }
+ }
+
+ return MNL_CB_OK;
+}
+
+struct ethtool_debug_get_rsp *
+ethtool_debug_get(struct ynl_sock *ys, struct ethtool_debug_get_req *req)
+{
+ struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
+ struct ethtool_debug_get_rsp *rsp;
+ struct nlmsghdr *nlh;
+ int err;
+
+ nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_DEBUG_GET, 1);
+ ys->req_policy = &ethtool_debug_nest;
+ yrs.yarg.rsp_policy = &ethtool_debug_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_DEBUG_HEADER, &req->header);
+
+ rsp = calloc(1, sizeof(*rsp));
+ yrs.yarg.data = rsp;
+ yrs.cb = ethtool_debug_get_rsp_parse;
+ yrs.rsp_cmd = ETHTOOL_MSG_DEBUG_GET;
+
+ err = ynl_exec(ys, nlh, &yrs);
+ if (err < 0)
+ goto err_free;
+
+ return rsp;
+
+err_free:
+ ethtool_debug_get_rsp_free(rsp);
+ return NULL;
+}
+
+/* ETHTOOL_MSG_DEBUG_GET - dump */
+void ethtool_debug_get_list_free(struct ethtool_debug_get_list *rsp)
+{
+ struct ethtool_debug_get_list *next = rsp;
+
+ while ((void *)next != YNL_LIST_END) {
+ rsp = next;
+ next = rsp->next;
+
+ ethtool_header_free(&rsp->obj.header);
+ ethtool_bitset_free(&rsp->obj.msgmask);
+ free(rsp);
+ }
+}
+
+struct ethtool_debug_get_list *
+ethtool_debug_get_dump(struct ynl_sock *ys,
+ struct ethtool_debug_get_req_dump *req)
+{
+ struct ynl_dump_state yds = {};
+ struct nlmsghdr *nlh;
+ int err;
+
+ yds.ys = ys;
+ yds.alloc_sz = sizeof(struct ethtool_debug_get_list);
+ yds.cb = ethtool_debug_get_rsp_parse;
+ yds.rsp_cmd = ETHTOOL_MSG_DEBUG_GET;
+ yds.rsp_policy = &ethtool_debug_nest;
+
+ nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_DEBUG_GET, 1);
+ ys->req_policy = &ethtool_debug_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_DEBUG_HEADER, &req->header);
+
+ err = ynl_exec_dump(ys, nlh, &yds);
+ if (err < 0)
+ goto free_list;
+
+ return yds.first;
+
+free_list:
+ ethtool_debug_get_list_free(yds.first);
+ return NULL;
+}
+
+/* ETHTOOL_MSG_DEBUG_GET - notify */
+void ethtool_debug_get_ntf_free(struct ethtool_debug_get_ntf *rsp)
+{
+ ethtool_header_free(&rsp->obj.header);
+ ethtool_bitset_free(&rsp->obj.msgmask);
+ free(rsp);
+}
+
+/* ============== ETHTOOL_MSG_DEBUG_SET ============== */
+/* ETHTOOL_MSG_DEBUG_SET - do */
+void ethtool_debug_set_req_free(struct ethtool_debug_set_req *req)
+{
+ ethtool_header_free(&req->header);
+ ethtool_bitset_free(&req->msgmask);
+ free(req);
+}
+
+int ethtool_debug_set(struct ynl_sock *ys, struct ethtool_debug_set_req *req)
+{
+ struct nlmsghdr *nlh;
+ int err;
+
+ nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_DEBUG_SET, 1);
+ ys->req_policy = &ethtool_debug_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_DEBUG_HEADER, &req->header);
+ if (req->_present.msgmask)
+ ethtool_bitset_put(nlh, ETHTOOL_A_DEBUG_MSGMASK, &req->msgmask);
+
+ err = ynl_exec(ys, nlh, NULL);
+ if (err < 0)
+ return -1;
+
+ return 0;
+}
+
+/* ============== ETHTOOL_MSG_WOL_GET ============== */
+/* ETHTOOL_MSG_WOL_GET - do */
+void ethtool_wol_get_req_free(struct ethtool_wol_get_req *req)
+{
+ ethtool_header_free(&req->header);
+ free(req);
+}
+
+void ethtool_wol_get_rsp_free(struct ethtool_wol_get_rsp *rsp)
+{
+ ethtool_header_free(&rsp->header);
+ ethtool_bitset_free(&rsp->modes);
+ free(rsp->sopass);
+ free(rsp);
+}
+
+int ethtool_wol_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
+{
+ struct ynl_parse_arg *yarg = data;
+ struct ethtool_wol_get_rsp *dst;
+ const struct nlattr *attr;
+ struct ynl_parse_arg parg;
+
+ dst = yarg->data;
+ parg.ys = yarg->ys;
+
+ mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
+ unsigned int type = mnl_attr_get_type(attr);
+
+ if (type == ETHTOOL_A_WOL_HEADER) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.header = 1;
+
+ parg.rsp_policy = &ethtool_header_nest;
+ parg.data = &dst->header;
+ if (ethtool_header_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ } else if (type == ETHTOOL_A_WOL_MODES) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.modes = 1;
+
+ parg.rsp_policy = &ethtool_bitset_nest;
+ parg.data = &dst->modes;
+ if (ethtool_bitset_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ } else if (type == ETHTOOL_A_WOL_SOPASS) {
+ unsigned int len;
+
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+
+ len = mnl_attr_get_payload_len(attr);
+ dst->_present.sopass_len = len;
+ dst->sopass = malloc(len);
+ memcpy(dst->sopass, mnl_attr_get_payload(attr), len);
+ }
+ }
+
+ return MNL_CB_OK;
+}
+
+struct ethtool_wol_get_rsp *
+ethtool_wol_get(struct ynl_sock *ys, struct ethtool_wol_get_req *req)
+{
+ struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
+ struct ethtool_wol_get_rsp *rsp;
+ struct nlmsghdr *nlh;
+ int err;
+
+ nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_WOL_GET, 1);
+ ys->req_policy = &ethtool_wol_nest;
+ yrs.yarg.rsp_policy = &ethtool_wol_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_WOL_HEADER, &req->header);
+
+ rsp = calloc(1, sizeof(*rsp));
+ yrs.yarg.data = rsp;
+ yrs.cb = ethtool_wol_get_rsp_parse;
+ yrs.rsp_cmd = ETHTOOL_MSG_WOL_GET;
+
+ err = ynl_exec(ys, nlh, &yrs);
+ if (err < 0)
+ goto err_free;
+
+ return rsp;
+
+err_free:
+ ethtool_wol_get_rsp_free(rsp);
+ return NULL;
+}
+
+/* ETHTOOL_MSG_WOL_GET - dump */
+void ethtool_wol_get_list_free(struct ethtool_wol_get_list *rsp)
+{
+ struct ethtool_wol_get_list *next = rsp;
+
+ while ((void *)next != YNL_LIST_END) {
+ rsp = next;
+ next = rsp->next;
+
+ ethtool_header_free(&rsp->obj.header);
+ ethtool_bitset_free(&rsp->obj.modes);
+ free(rsp->obj.sopass);
+ free(rsp);
+ }
+}
+
+struct ethtool_wol_get_list *
+ethtool_wol_get_dump(struct ynl_sock *ys, struct ethtool_wol_get_req_dump *req)
+{
+ struct ynl_dump_state yds = {};
+ struct nlmsghdr *nlh;
+ int err;
+
+ yds.ys = ys;
+ yds.alloc_sz = sizeof(struct ethtool_wol_get_list);
+ yds.cb = ethtool_wol_get_rsp_parse;
+ yds.rsp_cmd = ETHTOOL_MSG_WOL_GET;
+ yds.rsp_policy = &ethtool_wol_nest;
+
+ nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_WOL_GET, 1);
+ ys->req_policy = &ethtool_wol_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_WOL_HEADER, &req->header);
+
+ err = ynl_exec_dump(ys, nlh, &yds);
+ if (err < 0)
+ goto free_list;
+
+ return yds.first;
+
+free_list:
+ ethtool_wol_get_list_free(yds.first);
+ return NULL;
+}
+
+/* ETHTOOL_MSG_WOL_GET - notify */
+void ethtool_wol_get_ntf_free(struct ethtool_wol_get_ntf *rsp)
+{
+ ethtool_header_free(&rsp->obj.header);
+ ethtool_bitset_free(&rsp->obj.modes);
+ free(rsp->obj.sopass);
+ free(rsp);
+}
+
+/* ============== ETHTOOL_MSG_WOL_SET ============== */
+/* ETHTOOL_MSG_WOL_SET - do */
+void ethtool_wol_set_req_free(struct ethtool_wol_set_req *req)
+{
+ ethtool_header_free(&req->header);
+ ethtool_bitset_free(&req->modes);
+ free(req->sopass);
+ free(req);
+}
+
+int ethtool_wol_set(struct ynl_sock *ys, struct ethtool_wol_set_req *req)
+{
+ struct nlmsghdr *nlh;
+ int err;
+
+ nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_WOL_SET, 1);
+ ys->req_policy = &ethtool_wol_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_WOL_HEADER, &req->header);
+ if (req->_present.modes)
+ ethtool_bitset_put(nlh, ETHTOOL_A_WOL_MODES, &req->modes);
+ if (req->_present.sopass_len)
+ mnl_attr_put(nlh, ETHTOOL_A_WOL_SOPASS, req->_present.sopass_len, req->sopass);
+
+ err = ynl_exec(ys, nlh, NULL);
+ if (err < 0)
+ return -1;
+
+ return 0;
+}
+
+/* ============== ETHTOOL_MSG_FEATURES_GET ============== */
+/* ETHTOOL_MSG_FEATURES_GET - do */
+void ethtool_features_get_req_free(struct ethtool_features_get_req *req)
+{
+ ethtool_header_free(&req->header);
+ free(req);
+}
+
+void ethtool_features_get_rsp_free(struct ethtool_features_get_rsp *rsp)
+{
+ ethtool_header_free(&rsp->header);
+ ethtool_bitset_free(&rsp->hw);
+ ethtool_bitset_free(&rsp->wanted);
+ ethtool_bitset_free(&rsp->active);
+ ethtool_bitset_free(&rsp->nochange);
+ free(rsp);
+}
+
+int ethtool_features_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
+{
+ struct ethtool_features_get_rsp *dst;
+ struct ynl_parse_arg *yarg = data;
+ const struct nlattr *attr;
+ struct ynl_parse_arg parg;
+
+ dst = yarg->data;
+ parg.ys = yarg->ys;
+
+ mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
+ unsigned int type = mnl_attr_get_type(attr);
+
+ if (type == ETHTOOL_A_FEATURES_HEADER) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.header = 1;
+
+ parg.rsp_policy = &ethtool_header_nest;
+ parg.data = &dst->header;
+ if (ethtool_header_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ } else if (type == ETHTOOL_A_FEATURES_HW) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.hw = 1;
+
+ parg.rsp_policy = &ethtool_bitset_nest;
+ parg.data = &dst->hw;
+ if (ethtool_bitset_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ } else if (type == ETHTOOL_A_FEATURES_WANTED) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.wanted = 1;
+
+ parg.rsp_policy = &ethtool_bitset_nest;
+ parg.data = &dst->wanted;
+ if (ethtool_bitset_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ } else if (type == ETHTOOL_A_FEATURES_ACTIVE) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.active = 1;
+
+ parg.rsp_policy = &ethtool_bitset_nest;
+ parg.data = &dst->active;
+ if (ethtool_bitset_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ } else if (type == ETHTOOL_A_FEATURES_NOCHANGE) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.nochange = 1;
+
+ parg.rsp_policy = &ethtool_bitset_nest;
+ parg.data = &dst->nochange;
+ if (ethtool_bitset_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ }
+ }
+
+ return MNL_CB_OK;
+}
+
+struct ethtool_features_get_rsp *
+ethtool_features_get(struct ynl_sock *ys, struct ethtool_features_get_req *req)
+{
+ struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
+ struct ethtool_features_get_rsp *rsp;
+ struct nlmsghdr *nlh;
+ int err;
+
+ nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_FEATURES_GET, 1);
+ ys->req_policy = &ethtool_features_nest;
+ yrs.yarg.rsp_policy = &ethtool_features_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_FEATURES_HEADER, &req->header);
+
+ rsp = calloc(1, sizeof(*rsp));
+ yrs.yarg.data = rsp;
+ yrs.cb = ethtool_features_get_rsp_parse;
+ yrs.rsp_cmd = ETHTOOL_MSG_FEATURES_GET;
+
+ err = ynl_exec(ys, nlh, &yrs);
+ if (err < 0)
+ goto err_free;
+
+ return rsp;
+
+err_free:
+ ethtool_features_get_rsp_free(rsp);
+ return NULL;
+}
+
+/* ETHTOOL_MSG_FEATURES_GET - dump */
+void ethtool_features_get_list_free(struct ethtool_features_get_list *rsp)
+{
+ struct ethtool_features_get_list *next = rsp;
+
+ while ((void *)next != YNL_LIST_END) {
+ rsp = next;
+ next = rsp->next;
+
+ ethtool_header_free(&rsp->obj.header);
+ ethtool_bitset_free(&rsp->obj.hw);
+ ethtool_bitset_free(&rsp->obj.wanted);
+ ethtool_bitset_free(&rsp->obj.active);
+ ethtool_bitset_free(&rsp->obj.nochange);
+ free(rsp);
+ }
+}
+
+struct ethtool_features_get_list *
+ethtool_features_get_dump(struct ynl_sock *ys,
+ struct ethtool_features_get_req_dump *req)
+{
+ struct ynl_dump_state yds = {};
+ struct nlmsghdr *nlh;
+ int err;
+
+ yds.ys = ys;
+ yds.alloc_sz = sizeof(struct ethtool_features_get_list);
+ yds.cb = ethtool_features_get_rsp_parse;
+ yds.rsp_cmd = ETHTOOL_MSG_FEATURES_GET;
+ yds.rsp_policy = &ethtool_features_nest;
+
+ nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_FEATURES_GET, 1);
+ ys->req_policy = &ethtool_features_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_FEATURES_HEADER, &req->header);
+
+ err = ynl_exec_dump(ys, nlh, &yds);
+ if (err < 0)
+ goto free_list;
+
+ return yds.first;
+
+free_list:
+ ethtool_features_get_list_free(yds.first);
+ return NULL;
+}
+
+/* ETHTOOL_MSG_FEATURES_GET - notify */
+void ethtool_features_get_ntf_free(struct ethtool_features_get_ntf *rsp)
+{
+ ethtool_header_free(&rsp->obj.header);
+ ethtool_bitset_free(&rsp->obj.hw);
+ ethtool_bitset_free(&rsp->obj.wanted);
+ ethtool_bitset_free(&rsp->obj.active);
+ ethtool_bitset_free(&rsp->obj.nochange);
+ free(rsp);
+}
+
+/* ============== ETHTOOL_MSG_FEATURES_SET ============== */
+/* ETHTOOL_MSG_FEATURES_SET - do */
+void ethtool_features_set_req_free(struct ethtool_features_set_req *req)
+{
+ ethtool_header_free(&req->header);
+ ethtool_bitset_free(&req->hw);
+ ethtool_bitset_free(&req->wanted);
+ ethtool_bitset_free(&req->active);
+ ethtool_bitset_free(&req->nochange);
+ free(req);
+}
+
+void ethtool_features_set_rsp_free(struct ethtool_features_set_rsp *rsp)
+{
+ ethtool_header_free(&rsp->header);
+ ethtool_bitset_free(&rsp->hw);
+ ethtool_bitset_free(&rsp->wanted);
+ ethtool_bitset_free(&rsp->active);
+ ethtool_bitset_free(&rsp->nochange);
+ free(rsp);
+}
+
+int ethtool_features_set_rsp_parse(const struct nlmsghdr *nlh, void *data)
+{
+ struct ethtool_features_set_rsp *dst;
+ struct ynl_parse_arg *yarg = data;
+ const struct nlattr *attr;
+ struct ynl_parse_arg parg;
+
+ dst = yarg->data;
+ parg.ys = yarg->ys;
+
+ mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
+ unsigned int type = mnl_attr_get_type(attr);
+
+ if (type == ETHTOOL_A_FEATURES_HEADER) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.header = 1;
+
+ parg.rsp_policy = &ethtool_header_nest;
+ parg.data = &dst->header;
+ if (ethtool_header_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ } else if (type == ETHTOOL_A_FEATURES_HW) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.hw = 1;
+
+ parg.rsp_policy = &ethtool_bitset_nest;
+ parg.data = &dst->hw;
+ if (ethtool_bitset_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ } else if (type == ETHTOOL_A_FEATURES_WANTED) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.wanted = 1;
+
+ parg.rsp_policy = &ethtool_bitset_nest;
+ parg.data = &dst->wanted;
+ if (ethtool_bitset_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ } else if (type == ETHTOOL_A_FEATURES_ACTIVE) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.active = 1;
+
+ parg.rsp_policy = &ethtool_bitset_nest;
+ parg.data = &dst->active;
+ if (ethtool_bitset_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ } else if (type == ETHTOOL_A_FEATURES_NOCHANGE) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.nochange = 1;
+
+ parg.rsp_policy = &ethtool_bitset_nest;
+ parg.data = &dst->nochange;
+ if (ethtool_bitset_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ }
+ }
+
+ return MNL_CB_OK;
+}
+
+struct ethtool_features_set_rsp *
+ethtool_features_set(struct ynl_sock *ys, struct ethtool_features_set_req *req)
+{
+ struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
+ struct ethtool_features_set_rsp *rsp;
+ struct nlmsghdr *nlh;
+ int err;
+
+ nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_FEATURES_SET, 1);
+ ys->req_policy = &ethtool_features_nest;
+ yrs.yarg.rsp_policy = &ethtool_features_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_FEATURES_HEADER, &req->header);
+ if (req->_present.hw)
+ ethtool_bitset_put(nlh, ETHTOOL_A_FEATURES_HW, &req->hw);
+ if (req->_present.wanted)
+ ethtool_bitset_put(nlh, ETHTOOL_A_FEATURES_WANTED, &req->wanted);
+ if (req->_present.active)
+ ethtool_bitset_put(nlh, ETHTOOL_A_FEATURES_ACTIVE, &req->active);
+ if (req->_present.nochange)
+ ethtool_bitset_put(nlh, ETHTOOL_A_FEATURES_NOCHANGE, &req->nochange);
+
+ rsp = calloc(1, sizeof(*rsp));
+ yrs.yarg.data = rsp;
+ yrs.cb = ethtool_features_set_rsp_parse;
+ yrs.rsp_cmd = ETHTOOL_MSG_FEATURES_SET;
+
+ err = ynl_exec(ys, nlh, &yrs);
+ if (err < 0)
+ goto err_free;
+
+ return rsp;
+
+err_free:
+ ethtool_features_set_rsp_free(rsp);
+ return NULL;
+}
+
+/* ============== ETHTOOL_MSG_PRIVFLAGS_GET ============== */
+/* ETHTOOL_MSG_PRIVFLAGS_GET - do */
+void ethtool_privflags_get_req_free(struct ethtool_privflags_get_req *req)
+{
+ ethtool_header_free(&req->header);
+ free(req);
+}
+
+void ethtool_privflags_get_rsp_free(struct ethtool_privflags_get_rsp *rsp)
+{
+ ethtool_header_free(&rsp->header);
+ ethtool_bitset_free(&rsp->flags);
+ free(rsp);
+}
+
+int ethtool_privflags_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
+{
+ struct ethtool_privflags_get_rsp *dst;
+ struct ynl_parse_arg *yarg = data;
+ const struct nlattr *attr;
+ struct ynl_parse_arg parg;
+
+ dst = yarg->data;
+ parg.ys = yarg->ys;
+
+ mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
+ unsigned int type = mnl_attr_get_type(attr);
+
+ if (type == ETHTOOL_A_PRIVFLAGS_HEADER) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.header = 1;
+
+ parg.rsp_policy = &ethtool_header_nest;
+ parg.data = &dst->header;
+ if (ethtool_header_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ } else if (type == ETHTOOL_A_PRIVFLAGS_FLAGS) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.flags = 1;
+
+ parg.rsp_policy = &ethtool_bitset_nest;
+ parg.data = &dst->flags;
+ if (ethtool_bitset_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ }
+ }
+
+ return MNL_CB_OK;
+}
+
+struct ethtool_privflags_get_rsp *
+ethtool_privflags_get(struct ynl_sock *ys,
+ struct ethtool_privflags_get_req *req)
+{
+ struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
+ struct ethtool_privflags_get_rsp *rsp;
+ struct nlmsghdr *nlh;
+ int err;
+
+ nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_PRIVFLAGS_GET, 1);
+ ys->req_policy = &ethtool_privflags_nest;
+ yrs.yarg.rsp_policy = &ethtool_privflags_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_PRIVFLAGS_HEADER, &req->header);
+
+ rsp = calloc(1, sizeof(*rsp));
+ yrs.yarg.data = rsp;
+ yrs.cb = ethtool_privflags_get_rsp_parse;
+ yrs.rsp_cmd = 14;
+
+ err = ynl_exec(ys, nlh, &yrs);
+ if (err < 0)
+ goto err_free;
+
+ return rsp;
+
+err_free:
+ ethtool_privflags_get_rsp_free(rsp);
+ return NULL;
+}
+
+/* ETHTOOL_MSG_PRIVFLAGS_GET - dump */
+void ethtool_privflags_get_list_free(struct ethtool_privflags_get_list *rsp)
+{
+ struct ethtool_privflags_get_list *next = rsp;
+
+ while ((void *)next != YNL_LIST_END) {
+ rsp = next;
+ next = rsp->next;
+
+ ethtool_header_free(&rsp->obj.header);
+ ethtool_bitset_free(&rsp->obj.flags);
+ free(rsp);
+ }
+}
+
+struct ethtool_privflags_get_list *
+ethtool_privflags_get_dump(struct ynl_sock *ys,
+ struct ethtool_privflags_get_req_dump *req)
+{
+ struct ynl_dump_state yds = {};
+ struct nlmsghdr *nlh;
+ int err;
+
+ yds.ys = ys;
+ yds.alloc_sz = sizeof(struct ethtool_privflags_get_list);
+ yds.cb = ethtool_privflags_get_rsp_parse;
+ yds.rsp_cmd = 14;
+ yds.rsp_policy = &ethtool_privflags_nest;
+
+ nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_PRIVFLAGS_GET, 1);
+ ys->req_policy = &ethtool_privflags_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_PRIVFLAGS_HEADER, &req->header);
+
+ err = ynl_exec_dump(ys, nlh, &yds);
+ if (err < 0)
+ goto free_list;
+
+ return yds.first;
+
+free_list:
+ ethtool_privflags_get_list_free(yds.first);
+ return NULL;
+}
+
+/* ETHTOOL_MSG_PRIVFLAGS_GET - notify */
+void ethtool_privflags_get_ntf_free(struct ethtool_privflags_get_ntf *rsp)
+{
+ ethtool_header_free(&rsp->obj.header);
+ ethtool_bitset_free(&rsp->obj.flags);
+ free(rsp);
+}
+
+/* ============== ETHTOOL_MSG_PRIVFLAGS_SET ============== */
+/* ETHTOOL_MSG_PRIVFLAGS_SET - do */
+void ethtool_privflags_set_req_free(struct ethtool_privflags_set_req *req)
+{
+ ethtool_header_free(&req->header);
+ ethtool_bitset_free(&req->flags);
+ free(req);
+}
+
+int ethtool_privflags_set(struct ynl_sock *ys,
+ struct ethtool_privflags_set_req *req)
+{
+ struct nlmsghdr *nlh;
+ int err;
+
+ nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_PRIVFLAGS_SET, 1);
+ ys->req_policy = &ethtool_privflags_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_PRIVFLAGS_HEADER, &req->header);
+ if (req->_present.flags)
+ ethtool_bitset_put(nlh, ETHTOOL_A_PRIVFLAGS_FLAGS, &req->flags);
+
+ err = ynl_exec(ys, nlh, NULL);
+ if (err < 0)
+ return -1;
+
+ return 0;
+}
+
+/* ============== ETHTOOL_MSG_RINGS_GET ============== */
+/* ETHTOOL_MSG_RINGS_GET - do */
+void ethtool_rings_get_req_free(struct ethtool_rings_get_req *req)
+{
+ ethtool_header_free(&req->header);
+ free(req);
+}
+
+void ethtool_rings_get_rsp_free(struct ethtool_rings_get_rsp *rsp)
+{
+ ethtool_header_free(&rsp->header);
+ free(rsp);
+}
+
+int ethtool_rings_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
+{
+ struct ethtool_rings_get_rsp *dst;
+ struct ynl_parse_arg *yarg = data;
+ const struct nlattr *attr;
+ struct ynl_parse_arg parg;
+
+ dst = yarg->data;
+ parg.ys = yarg->ys;
+
+ mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
+ unsigned int type = mnl_attr_get_type(attr);
+
+ if (type == ETHTOOL_A_RINGS_HEADER) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.header = 1;
+
+ parg.rsp_policy = &ethtool_header_nest;
+ parg.data = &dst->header;
+ if (ethtool_header_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ } else if (type == ETHTOOL_A_RINGS_RX_MAX) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.rx_max = 1;
+ dst->rx_max = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_RINGS_RX_MINI_MAX) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.rx_mini_max = 1;
+ dst->rx_mini_max = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_RINGS_RX_JUMBO_MAX) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.rx_jumbo_max = 1;
+ dst->rx_jumbo_max = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_RINGS_TX_MAX) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.tx_max = 1;
+ dst->tx_max = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_RINGS_RX) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.rx = 1;
+ dst->rx = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_RINGS_RX_MINI) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.rx_mini = 1;
+ dst->rx_mini = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_RINGS_RX_JUMBO) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.rx_jumbo = 1;
+ dst->rx_jumbo = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_RINGS_TX) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.tx = 1;
+ dst->tx = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_RINGS_RX_BUF_LEN) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.rx_buf_len = 1;
+ dst->rx_buf_len = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_RINGS_TCP_DATA_SPLIT) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.tcp_data_split = 1;
+ dst->tcp_data_split = mnl_attr_get_u8(attr);
+ } else if (type == ETHTOOL_A_RINGS_CQE_SIZE) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.cqe_size = 1;
+ dst->cqe_size = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_RINGS_TX_PUSH) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.tx_push = 1;
+ dst->tx_push = mnl_attr_get_u8(attr);
+ } else if (type == ETHTOOL_A_RINGS_RX_PUSH) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.rx_push = 1;
+ dst->rx_push = mnl_attr_get_u8(attr);
+ } else if (type == ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.tx_push_buf_len = 1;
+ dst->tx_push_buf_len = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.tx_push_buf_len_max = 1;
+ dst->tx_push_buf_len_max = mnl_attr_get_u32(attr);
+ }
+ }
+
+ return MNL_CB_OK;
+}
+
+struct ethtool_rings_get_rsp *
+ethtool_rings_get(struct ynl_sock *ys, struct ethtool_rings_get_req *req)
+{
+ struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
+ struct ethtool_rings_get_rsp *rsp;
+ struct nlmsghdr *nlh;
+ int err;
+
+ nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_RINGS_GET, 1);
+ ys->req_policy = &ethtool_rings_nest;
+ yrs.yarg.rsp_policy = &ethtool_rings_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_RINGS_HEADER, &req->header);
+
+ rsp = calloc(1, sizeof(*rsp));
+ yrs.yarg.data = rsp;
+ yrs.cb = ethtool_rings_get_rsp_parse;
+ yrs.rsp_cmd = 16;
+
+ err = ynl_exec(ys, nlh, &yrs);
+ if (err < 0)
+ goto err_free;
+
+ return rsp;
+
+err_free:
+ ethtool_rings_get_rsp_free(rsp);
+ return NULL;
+}
+
+/* ETHTOOL_MSG_RINGS_GET - dump */
+void ethtool_rings_get_list_free(struct ethtool_rings_get_list *rsp)
+{
+ struct ethtool_rings_get_list *next = rsp;
+
+ while ((void *)next != YNL_LIST_END) {
+ rsp = next;
+ next = rsp->next;
+
+ ethtool_header_free(&rsp->obj.header);
+ free(rsp);
+ }
+}
+
+struct ethtool_rings_get_list *
+ethtool_rings_get_dump(struct ynl_sock *ys,
+ struct ethtool_rings_get_req_dump *req)
+{
+ struct ynl_dump_state yds = {};
+ struct nlmsghdr *nlh;
+ int err;
+
+ yds.ys = ys;
+ yds.alloc_sz = sizeof(struct ethtool_rings_get_list);
+ yds.cb = ethtool_rings_get_rsp_parse;
+ yds.rsp_cmd = 16;
+ yds.rsp_policy = &ethtool_rings_nest;
+
+ nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_RINGS_GET, 1);
+ ys->req_policy = &ethtool_rings_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_RINGS_HEADER, &req->header);
+
+ err = ynl_exec_dump(ys, nlh, &yds);
+ if (err < 0)
+ goto free_list;
+
+ return yds.first;
+
+free_list:
+ ethtool_rings_get_list_free(yds.first);
+ return NULL;
+}
+
+/* ETHTOOL_MSG_RINGS_GET - notify */
+void ethtool_rings_get_ntf_free(struct ethtool_rings_get_ntf *rsp)
+{
+ ethtool_header_free(&rsp->obj.header);
+ free(rsp);
+}
+
+/* ============== ETHTOOL_MSG_RINGS_SET ============== */
+/* ETHTOOL_MSG_RINGS_SET - do */
+void ethtool_rings_set_req_free(struct ethtool_rings_set_req *req)
+{
+ ethtool_header_free(&req->header);
+ free(req);
+}
+
+int ethtool_rings_set(struct ynl_sock *ys, struct ethtool_rings_set_req *req)
+{
+ struct nlmsghdr *nlh;
+ int err;
+
+ nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_RINGS_SET, 1);
+ ys->req_policy = &ethtool_rings_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_RINGS_HEADER, &req->header);
+ if (req->_present.rx_max)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_RINGS_RX_MAX, req->rx_max);
+ if (req->_present.rx_mini_max)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_RINGS_RX_MINI_MAX, req->rx_mini_max);
+ if (req->_present.rx_jumbo_max)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_RINGS_RX_JUMBO_MAX, req->rx_jumbo_max);
+ if (req->_present.tx_max)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_RINGS_TX_MAX, req->tx_max);
+ if (req->_present.rx)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_RINGS_RX, req->rx);
+ if (req->_present.rx_mini)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_RINGS_RX_MINI, req->rx_mini);
+ if (req->_present.rx_jumbo)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_RINGS_RX_JUMBO, req->rx_jumbo);
+ if (req->_present.tx)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_RINGS_TX, req->tx);
+ if (req->_present.rx_buf_len)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_RINGS_RX_BUF_LEN, req->rx_buf_len);
+ if (req->_present.tcp_data_split)
+ mnl_attr_put_u8(nlh, ETHTOOL_A_RINGS_TCP_DATA_SPLIT, req->tcp_data_split);
+ if (req->_present.cqe_size)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_RINGS_CQE_SIZE, req->cqe_size);
+ if (req->_present.tx_push)
+ mnl_attr_put_u8(nlh, ETHTOOL_A_RINGS_TX_PUSH, req->tx_push);
+ if (req->_present.rx_push)
+ mnl_attr_put_u8(nlh, ETHTOOL_A_RINGS_RX_PUSH, req->rx_push);
+ if (req->_present.tx_push_buf_len)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN, req->tx_push_buf_len);
+ if (req->_present.tx_push_buf_len_max)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX, req->tx_push_buf_len_max);
+
+ err = ynl_exec(ys, nlh, NULL);
+ if (err < 0)
+ return -1;
+
+ return 0;
+}
+
+/* ============== ETHTOOL_MSG_CHANNELS_GET ============== */
+/* ETHTOOL_MSG_CHANNELS_GET - do */
+void ethtool_channels_get_req_free(struct ethtool_channels_get_req *req)
+{
+ ethtool_header_free(&req->header);
+ free(req);
+}
+
+void ethtool_channels_get_rsp_free(struct ethtool_channels_get_rsp *rsp)
+{
+ ethtool_header_free(&rsp->header);
+ free(rsp);
+}
+
+int ethtool_channels_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
+{
+ struct ethtool_channels_get_rsp *dst;
+ struct ynl_parse_arg *yarg = data;
+ const struct nlattr *attr;
+ struct ynl_parse_arg parg;
+
+ dst = yarg->data;
+ parg.ys = yarg->ys;
+
+ mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
+ unsigned int type = mnl_attr_get_type(attr);
+
+ if (type == ETHTOOL_A_CHANNELS_HEADER) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.header = 1;
+
+ parg.rsp_policy = &ethtool_header_nest;
+ parg.data = &dst->header;
+ if (ethtool_header_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ } else if (type == ETHTOOL_A_CHANNELS_RX_MAX) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.rx_max = 1;
+ dst->rx_max = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_CHANNELS_TX_MAX) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.tx_max = 1;
+ dst->tx_max = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_CHANNELS_OTHER_MAX) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.other_max = 1;
+ dst->other_max = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_CHANNELS_COMBINED_MAX) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.combined_max = 1;
+ dst->combined_max = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_CHANNELS_RX_COUNT) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.rx_count = 1;
+ dst->rx_count = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_CHANNELS_TX_COUNT) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.tx_count = 1;
+ dst->tx_count = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_CHANNELS_OTHER_COUNT) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.other_count = 1;
+ dst->other_count = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_CHANNELS_COMBINED_COUNT) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.combined_count = 1;
+ dst->combined_count = mnl_attr_get_u32(attr);
+ }
+ }
+
+ return MNL_CB_OK;
+}
+
+struct ethtool_channels_get_rsp *
+ethtool_channels_get(struct ynl_sock *ys, struct ethtool_channels_get_req *req)
+{
+ struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
+ struct ethtool_channels_get_rsp *rsp;
+ struct nlmsghdr *nlh;
+ int err;
+
+ nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_CHANNELS_GET, 1);
+ ys->req_policy = &ethtool_channels_nest;
+ yrs.yarg.rsp_policy = &ethtool_channels_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_CHANNELS_HEADER, &req->header);
+
+ rsp = calloc(1, sizeof(*rsp));
+ yrs.yarg.data = rsp;
+ yrs.cb = ethtool_channels_get_rsp_parse;
+ yrs.rsp_cmd = 18;
+
+ err = ynl_exec(ys, nlh, &yrs);
+ if (err < 0)
+ goto err_free;
+
+ return rsp;
+
+err_free:
+ ethtool_channels_get_rsp_free(rsp);
+ return NULL;
+}
+
+/* ETHTOOL_MSG_CHANNELS_GET - dump */
+void ethtool_channels_get_list_free(struct ethtool_channels_get_list *rsp)
+{
+ struct ethtool_channels_get_list *next = rsp;
+
+ while ((void *)next != YNL_LIST_END) {
+ rsp = next;
+ next = rsp->next;
+
+ ethtool_header_free(&rsp->obj.header);
+ free(rsp);
+ }
+}
+
+struct ethtool_channels_get_list *
+ethtool_channels_get_dump(struct ynl_sock *ys,
+ struct ethtool_channels_get_req_dump *req)
+{
+ struct ynl_dump_state yds = {};
+ struct nlmsghdr *nlh;
+ int err;
+
+ yds.ys = ys;
+ yds.alloc_sz = sizeof(struct ethtool_channels_get_list);
+ yds.cb = ethtool_channels_get_rsp_parse;
+ yds.rsp_cmd = 18;
+ yds.rsp_policy = &ethtool_channels_nest;
+
+ nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_CHANNELS_GET, 1);
+ ys->req_policy = &ethtool_channels_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_CHANNELS_HEADER, &req->header);
+
+ err = ynl_exec_dump(ys, nlh, &yds);
+ if (err < 0)
+ goto free_list;
+
+ return yds.first;
+
+free_list:
+ ethtool_channels_get_list_free(yds.first);
+ return NULL;
+}
+
+/* ETHTOOL_MSG_CHANNELS_GET - notify */
+void ethtool_channels_get_ntf_free(struct ethtool_channels_get_ntf *rsp)
+{
+ ethtool_header_free(&rsp->obj.header);
+ free(rsp);
+}
+
+/* ============== ETHTOOL_MSG_CHANNELS_SET ============== */
+/* ETHTOOL_MSG_CHANNELS_SET - do */
+void ethtool_channels_set_req_free(struct ethtool_channels_set_req *req)
+{
+ ethtool_header_free(&req->header);
+ free(req);
+}
+
+int ethtool_channels_set(struct ynl_sock *ys,
+ struct ethtool_channels_set_req *req)
+{
+ struct nlmsghdr *nlh;
+ int err;
+
+ nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_CHANNELS_SET, 1);
+ ys->req_policy = &ethtool_channels_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_CHANNELS_HEADER, &req->header);
+ if (req->_present.rx_max)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_CHANNELS_RX_MAX, req->rx_max);
+ if (req->_present.tx_max)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_CHANNELS_TX_MAX, req->tx_max);
+ if (req->_present.other_max)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_CHANNELS_OTHER_MAX, req->other_max);
+ if (req->_present.combined_max)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_CHANNELS_COMBINED_MAX, req->combined_max);
+ if (req->_present.rx_count)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_CHANNELS_RX_COUNT, req->rx_count);
+ if (req->_present.tx_count)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_CHANNELS_TX_COUNT, req->tx_count);
+ if (req->_present.other_count)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_CHANNELS_OTHER_COUNT, req->other_count);
+ if (req->_present.combined_count)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_CHANNELS_COMBINED_COUNT, req->combined_count);
+
+ err = ynl_exec(ys, nlh, NULL);
+ if (err < 0)
+ return -1;
+
+ return 0;
+}
+
+/* ============== ETHTOOL_MSG_COALESCE_GET ============== */
+/* ETHTOOL_MSG_COALESCE_GET - do */
+void ethtool_coalesce_get_req_free(struct ethtool_coalesce_get_req *req)
+{
+ ethtool_header_free(&req->header);
+ free(req);
+}
+
+void ethtool_coalesce_get_rsp_free(struct ethtool_coalesce_get_rsp *rsp)
+{
+ ethtool_header_free(&rsp->header);
+ free(rsp);
+}
+
+int ethtool_coalesce_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
+{
+ struct ethtool_coalesce_get_rsp *dst;
+ struct ynl_parse_arg *yarg = data;
+ const struct nlattr *attr;
+ struct ynl_parse_arg parg;
+
+ dst = yarg->data;
+ parg.ys = yarg->ys;
+
+ mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
+ unsigned int type = mnl_attr_get_type(attr);
+
+ if (type == ETHTOOL_A_COALESCE_HEADER) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.header = 1;
+
+ parg.rsp_policy = &ethtool_header_nest;
+ parg.data = &dst->header;
+ if (ethtool_header_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ } else if (type == ETHTOOL_A_COALESCE_RX_USECS) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.rx_usecs = 1;
+ dst->rx_usecs = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_COALESCE_RX_MAX_FRAMES) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.rx_max_frames = 1;
+ dst->rx_max_frames = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_COALESCE_RX_USECS_IRQ) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.rx_usecs_irq = 1;
+ dst->rx_usecs_irq = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_COALESCE_RX_MAX_FRAMES_IRQ) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.rx_max_frames_irq = 1;
+ dst->rx_max_frames_irq = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_COALESCE_TX_USECS) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.tx_usecs = 1;
+ dst->tx_usecs = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_COALESCE_TX_MAX_FRAMES) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.tx_max_frames = 1;
+ dst->tx_max_frames = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_COALESCE_TX_USECS_IRQ) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.tx_usecs_irq = 1;
+ dst->tx_usecs_irq = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_COALESCE_TX_MAX_FRAMES_IRQ) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.tx_max_frames_irq = 1;
+ dst->tx_max_frames_irq = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_COALESCE_STATS_BLOCK_USECS) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.stats_block_usecs = 1;
+ dst->stats_block_usecs = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_COALESCE_USE_ADAPTIVE_RX) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.use_adaptive_rx = 1;
+ dst->use_adaptive_rx = mnl_attr_get_u8(attr);
+ } else if (type == ETHTOOL_A_COALESCE_USE_ADAPTIVE_TX) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.use_adaptive_tx = 1;
+ dst->use_adaptive_tx = mnl_attr_get_u8(attr);
+ } else if (type == ETHTOOL_A_COALESCE_PKT_RATE_LOW) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.pkt_rate_low = 1;
+ dst->pkt_rate_low = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_COALESCE_RX_USECS_LOW) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.rx_usecs_low = 1;
+ dst->rx_usecs_low = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_COALESCE_RX_MAX_FRAMES_LOW) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.rx_max_frames_low = 1;
+ dst->rx_max_frames_low = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_COALESCE_TX_USECS_LOW) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.tx_usecs_low = 1;
+ dst->tx_usecs_low = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_COALESCE_TX_MAX_FRAMES_LOW) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.tx_max_frames_low = 1;
+ dst->tx_max_frames_low = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_COALESCE_PKT_RATE_HIGH) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.pkt_rate_high = 1;
+ dst->pkt_rate_high = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_COALESCE_RX_USECS_HIGH) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.rx_usecs_high = 1;
+ dst->rx_usecs_high = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_COALESCE_RX_MAX_FRAMES_HIGH) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.rx_max_frames_high = 1;
+ dst->rx_max_frames_high = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_COALESCE_TX_USECS_HIGH) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.tx_usecs_high = 1;
+ dst->tx_usecs_high = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_COALESCE_TX_MAX_FRAMES_HIGH) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.tx_max_frames_high = 1;
+ dst->tx_max_frames_high = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.rate_sample_interval = 1;
+ dst->rate_sample_interval = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_COALESCE_USE_CQE_MODE_TX) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.use_cqe_mode_tx = 1;
+ dst->use_cqe_mode_tx = mnl_attr_get_u8(attr);
+ } else if (type == ETHTOOL_A_COALESCE_USE_CQE_MODE_RX) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.use_cqe_mode_rx = 1;
+ dst->use_cqe_mode_rx = mnl_attr_get_u8(attr);
+ } else if (type == ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.tx_aggr_max_bytes = 1;
+ dst->tx_aggr_max_bytes = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.tx_aggr_max_frames = 1;
+ dst->tx_aggr_max_frames = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.tx_aggr_time_usecs = 1;
+ dst->tx_aggr_time_usecs = mnl_attr_get_u32(attr);
+ }
+ }
+
+ return MNL_CB_OK;
+}
+
+struct ethtool_coalesce_get_rsp *
+ethtool_coalesce_get(struct ynl_sock *ys, struct ethtool_coalesce_get_req *req)
+{
+ struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
+ struct ethtool_coalesce_get_rsp *rsp;
+ struct nlmsghdr *nlh;
+ int err;
+
+ nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_COALESCE_GET, 1);
+ ys->req_policy = &ethtool_coalesce_nest;
+ yrs.yarg.rsp_policy = &ethtool_coalesce_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_COALESCE_HEADER, &req->header);
+
+ rsp = calloc(1, sizeof(*rsp));
+ yrs.yarg.data = rsp;
+ yrs.cb = ethtool_coalesce_get_rsp_parse;
+ yrs.rsp_cmd = 20;
+
+ err = ynl_exec(ys, nlh, &yrs);
+ if (err < 0)
+ goto err_free;
+
+ return rsp;
+
+err_free:
+ ethtool_coalesce_get_rsp_free(rsp);
+ return NULL;
+}
+
+/* ETHTOOL_MSG_COALESCE_GET - dump */
+void ethtool_coalesce_get_list_free(struct ethtool_coalesce_get_list *rsp)
+{
+ struct ethtool_coalesce_get_list *next = rsp;
+
+ while ((void *)next != YNL_LIST_END) {
+ rsp = next;
+ next = rsp->next;
+
+ ethtool_header_free(&rsp->obj.header);
+ free(rsp);
+ }
+}
+
+struct ethtool_coalesce_get_list *
+ethtool_coalesce_get_dump(struct ynl_sock *ys,
+ struct ethtool_coalesce_get_req_dump *req)
+{
+ struct ynl_dump_state yds = {};
+ struct nlmsghdr *nlh;
+ int err;
+
+ yds.ys = ys;
+ yds.alloc_sz = sizeof(struct ethtool_coalesce_get_list);
+ yds.cb = ethtool_coalesce_get_rsp_parse;
+ yds.rsp_cmd = 20;
+ yds.rsp_policy = &ethtool_coalesce_nest;
+
+ nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_COALESCE_GET, 1);
+ ys->req_policy = &ethtool_coalesce_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_COALESCE_HEADER, &req->header);
+
+ err = ynl_exec_dump(ys, nlh, &yds);
+ if (err < 0)
+ goto free_list;
+
+ return yds.first;
+
+free_list:
+ ethtool_coalesce_get_list_free(yds.first);
+ return NULL;
+}
+
+/* ETHTOOL_MSG_COALESCE_GET - notify */
+void ethtool_coalesce_get_ntf_free(struct ethtool_coalesce_get_ntf *rsp)
+{
+ ethtool_header_free(&rsp->obj.header);
+ free(rsp);
+}
+
+/* ============== ETHTOOL_MSG_COALESCE_SET ============== */
+/* ETHTOOL_MSG_COALESCE_SET - do */
+void ethtool_coalesce_set_req_free(struct ethtool_coalesce_set_req *req)
+{
+ ethtool_header_free(&req->header);
+ free(req);
+}
+
+int ethtool_coalesce_set(struct ynl_sock *ys,
+ struct ethtool_coalesce_set_req *req)
+{
+ struct nlmsghdr *nlh;
+ int err;
+
+ nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_COALESCE_SET, 1);
+ ys->req_policy = &ethtool_coalesce_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_COALESCE_HEADER, &req->header);
+ if (req->_present.rx_usecs)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_RX_USECS, req->rx_usecs);
+ if (req->_present.rx_max_frames)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_RX_MAX_FRAMES, req->rx_max_frames);
+ if (req->_present.rx_usecs_irq)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_RX_USECS_IRQ, req->rx_usecs_irq);
+ if (req->_present.rx_max_frames_irq)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_RX_MAX_FRAMES_IRQ, req->rx_max_frames_irq);
+ if (req->_present.tx_usecs)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_TX_USECS, req->tx_usecs);
+ if (req->_present.tx_max_frames)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_TX_MAX_FRAMES, req->tx_max_frames);
+ if (req->_present.tx_usecs_irq)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_TX_USECS_IRQ, req->tx_usecs_irq);
+ if (req->_present.tx_max_frames_irq)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_TX_MAX_FRAMES_IRQ, req->tx_max_frames_irq);
+ if (req->_present.stats_block_usecs)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_STATS_BLOCK_USECS, req->stats_block_usecs);
+ if (req->_present.use_adaptive_rx)
+ mnl_attr_put_u8(nlh, ETHTOOL_A_COALESCE_USE_ADAPTIVE_RX, req->use_adaptive_rx);
+ if (req->_present.use_adaptive_tx)
+ mnl_attr_put_u8(nlh, ETHTOOL_A_COALESCE_USE_ADAPTIVE_TX, req->use_adaptive_tx);
+ if (req->_present.pkt_rate_low)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_PKT_RATE_LOW, req->pkt_rate_low);
+ if (req->_present.rx_usecs_low)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_RX_USECS_LOW, req->rx_usecs_low);
+ if (req->_present.rx_max_frames_low)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_RX_MAX_FRAMES_LOW, req->rx_max_frames_low);
+ if (req->_present.tx_usecs_low)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_TX_USECS_LOW, req->tx_usecs_low);
+ if (req->_present.tx_max_frames_low)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_TX_MAX_FRAMES_LOW, req->tx_max_frames_low);
+ if (req->_present.pkt_rate_high)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_PKT_RATE_HIGH, req->pkt_rate_high);
+ if (req->_present.rx_usecs_high)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_RX_USECS_HIGH, req->rx_usecs_high);
+ if (req->_present.rx_max_frames_high)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_RX_MAX_FRAMES_HIGH, req->rx_max_frames_high);
+ if (req->_present.tx_usecs_high)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_TX_USECS_HIGH, req->tx_usecs_high);
+ if (req->_present.tx_max_frames_high)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_TX_MAX_FRAMES_HIGH, req->tx_max_frames_high);
+ if (req->_present.rate_sample_interval)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL, req->rate_sample_interval);
+ if (req->_present.use_cqe_mode_tx)
+ mnl_attr_put_u8(nlh, ETHTOOL_A_COALESCE_USE_CQE_MODE_TX, req->use_cqe_mode_tx);
+ if (req->_present.use_cqe_mode_rx)
+ mnl_attr_put_u8(nlh, ETHTOOL_A_COALESCE_USE_CQE_MODE_RX, req->use_cqe_mode_rx);
+ if (req->_present.tx_aggr_max_bytes)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES, req->tx_aggr_max_bytes);
+ if (req->_present.tx_aggr_max_frames)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES, req->tx_aggr_max_frames);
+ if (req->_present.tx_aggr_time_usecs)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS, req->tx_aggr_time_usecs);
+
+ err = ynl_exec(ys, nlh, NULL);
+ if (err < 0)
+ return -1;
+
+ return 0;
+}
+
+/* ============== ETHTOOL_MSG_PAUSE_GET ============== */
+/* ETHTOOL_MSG_PAUSE_GET - do */
+void ethtool_pause_get_req_free(struct ethtool_pause_get_req *req)
+{
+ ethtool_header_free(&req->header);
+ free(req);
+}
+
+void ethtool_pause_get_rsp_free(struct ethtool_pause_get_rsp *rsp)
+{
+ ethtool_header_free(&rsp->header);
+ ethtool_pause_stat_free(&rsp->stats);
+ free(rsp);
+}
+
+int ethtool_pause_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
+{
+ struct ethtool_pause_get_rsp *dst;
+ struct ynl_parse_arg *yarg = data;
+ const struct nlattr *attr;
+ struct ynl_parse_arg parg;
+
+ dst = yarg->data;
+ parg.ys = yarg->ys;
+
+ mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
+ unsigned int type = mnl_attr_get_type(attr);
+
+ if (type == ETHTOOL_A_PAUSE_HEADER) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.header = 1;
+
+ parg.rsp_policy = &ethtool_header_nest;
+ parg.data = &dst->header;
+ if (ethtool_header_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ } else if (type == ETHTOOL_A_PAUSE_AUTONEG) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.autoneg = 1;
+ dst->autoneg = mnl_attr_get_u8(attr);
+ } else if (type == ETHTOOL_A_PAUSE_RX) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.rx = 1;
+ dst->rx = mnl_attr_get_u8(attr);
+ } else if (type == ETHTOOL_A_PAUSE_TX) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.tx = 1;
+ dst->tx = mnl_attr_get_u8(attr);
+ } else if (type == ETHTOOL_A_PAUSE_STATS) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.stats = 1;
+
+ parg.rsp_policy = &ethtool_pause_stat_nest;
+ parg.data = &dst->stats;
+ if (ethtool_pause_stat_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ } else if (type == ETHTOOL_A_PAUSE_STATS_SRC) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.stats_src = 1;
+ dst->stats_src = mnl_attr_get_u32(attr);
+ }
+ }
+
+ return MNL_CB_OK;
+}
+
+struct ethtool_pause_get_rsp *
+ethtool_pause_get(struct ynl_sock *ys, struct ethtool_pause_get_req *req)
+{
+ struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
+ struct ethtool_pause_get_rsp *rsp;
+ struct nlmsghdr *nlh;
+ int err;
+
+ nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_PAUSE_GET, 1);
+ ys->req_policy = &ethtool_pause_nest;
+ yrs.yarg.rsp_policy = &ethtool_pause_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_PAUSE_HEADER, &req->header);
+
+ rsp = calloc(1, sizeof(*rsp));
+ yrs.yarg.data = rsp;
+ yrs.cb = ethtool_pause_get_rsp_parse;
+ yrs.rsp_cmd = 22;
+
+ err = ynl_exec(ys, nlh, &yrs);
+ if (err < 0)
+ goto err_free;
+
+ return rsp;
+
+err_free:
+ ethtool_pause_get_rsp_free(rsp);
+ return NULL;
+}
+
+/* ETHTOOL_MSG_PAUSE_GET - dump */
+void ethtool_pause_get_list_free(struct ethtool_pause_get_list *rsp)
+{
+ struct ethtool_pause_get_list *next = rsp;
+
+ while ((void *)next != YNL_LIST_END) {
+ rsp = next;
+ next = rsp->next;
+
+ ethtool_header_free(&rsp->obj.header);
+ ethtool_pause_stat_free(&rsp->obj.stats);
+ free(rsp);
+ }
+}
+
+struct ethtool_pause_get_list *
+ethtool_pause_get_dump(struct ynl_sock *ys,
+ struct ethtool_pause_get_req_dump *req)
+{
+ struct ynl_dump_state yds = {};
+ struct nlmsghdr *nlh;
+ int err;
+
+ yds.ys = ys;
+ yds.alloc_sz = sizeof(struct ethtool_pause_get_list);
+ yds.cb = ethtool_pause_get_rsp_parse;
+ yds.rsp_cmd = 22;
+ yds.rsp_policy = &ethtool_pause_nest;
+
+ nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_PAUSE_GET, 1);
+ ys->req_policy = &ethtool_pause_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_PAUSE_HEADER, &req->header);
+
+ err = ynl_exec_dump(ys, nlh, &yds);
+ if (err < 0)
+ goto free_list;
+
+ return yds.first;
+
+free_list:
+ ethtool_pause_get_list_free(yds.first);
+ return NULL;
+}
+
+/* ETHTOOL_MSG_PAUSE_GET - notify */
+void ethtool_pause_get_ntf_free(struct ethtool_pause_get_ntf *rsp)
+{
+ ethtool_header_free(&rsp->obj.header);
+ ethtool_pause_stat_free(&rsp->obj.stats);
+ free(rsp);
+}
+
+/* ============== ETHTOOL_MSG_PAUSE_SET ============== */
+/* ETHTOOL_MSG_PAUSE_SET - do */
+void ethtool_pause_set_req_free(struct ethtool_pause_set_req *req)
+{
+ ethtool_header_free(&req->header);
+ ethtool_pause_stat_free(&req->stats);
+ free(req);
+}
+
+int ethtool_pause_set(struct ynl_sock *ys, struct ethtool_pause_set_req *req)
+{
+ struct nlmsghdr *nlh;
+ int err;
+
+ nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_PAUSE_SET, 1);
+ ys->req_policy = &ethtool_pause_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_PAUSE_HEADER, &req->header);
+ if (req->_present.autoneg)
+ mnl_attr_put_u8(nlh, ETHTOOL_A_PAUSE_AUTONEG, req->autoneg);
+ if (req->_present.rx)
+ mnl_attr_put_u8(nlh, ETHTOOL_A_PAUSE_RX, req->rx);
+ if (req->_present.tx)
+ mnl_attr_put_u8(nlh, ETHTOOL_A_PAUSE_TX, req->tx);
+ if (req->_present.stats)
+ ethtool_pause_stat_put(nlh, ETHTOOL_A_PAUSE_STATS, &req->stats);
+ if (req->_present.stats_src)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_PAUSE_STATS_SRC, req->stats_src);
+
+ err = ynl_exec(ys, nlh, NULL);
+ if (err < 0)
+ return -1;
+
+ return 0;
+}
+
+/* ============== ETHTOOL_MSG_EEE_GET ============== */
+/* ETHTOOL_MSG_EEE_GET - do */
+void ethtool_eee_get_req_free(struct ethtool_eee_get_req *req)
+{
+ ethtool_header_free(&req->header);
+ free(req);
+}
+
+void ethtool_eee_get_rsp_free(struct ethtool_eee_get_rsp *rsp)
+{
+ ethtool_header_free(&rsp->header);
+ ethtool_bitset_free(&rsp->modes_ours);
+ ethtool_bitset_free(&rsp->modes_peer);
+ free(rsp);
+}
+
+int ethtool_eee_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
+{
+ struct ynl_parse_arg *yarg = data;
+ struct ethtool_eee_get_rsp *dst;
+ const struct nlattr *attr;
+ struct ynl_parse_arg parg;
+
+ dst = yarg->data;
+ parg.ys = yarg->ys;
+
+ mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
+ unsigned int type = mnl_attr_get_type(attr);
+
+ if (type == ETHTOOL_A_EEE_HEADER) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.header = 1;
+
+ parg.rsp_policy = &ethtool_header_nest;
+ parg.data = &dst->header;
+ if (ethtool_header_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ } else if (type == ETHTOOL_A_EEE_MODES_OURS) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.modes_ours = 1;
+
+ parg.rsp_policy = &ethtool_bitset_nest;
+ parg.data = &dst->modes_ours;
+ if (ethtool_bitset_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ } else if (type == ETHTOOL_A_EEE_MODES_PEER) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.modes_peer = 1;
+
+ parg.rsp_policy = &ethtool_bitset_nest;
+ parg.data = &dst->modes_peer;
+ if (ethtool_bitset_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ } else if (type == ETHTOOL_A_EEE_ACTIVE) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.active = 1;
+ dst->active = mnl_attr_get_u8(attr);
+ } else if (type == ETHTOOL_A_EEE_ENABLED) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.enabled = 1;
+ dst->enabled = mnl_attr_get_u8(attr);
+ } else if (type == ETHTOOL_A_EEE_TX_LPI_ENABLED) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.tx_lpi_enabled = 1;
+ dst->tx_lpi_enabled = mnl_attr_get_u8(attr);
+ } else if (type == ETHTOOL_A_EEE_TX_LPI_TIMER) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.tx_lpi_timer = 1;
+ dst->tx_lpi_timer = mnl_attr_get_u32(attr);
+ }
+ }
+
+ return MNL_CB_OK;
+}
+
+struct ethtool_eee_get_rsp *
+ethtool_eee_get(struct ynl_sock *ys, struct ethtool_eee_get_req *req)
+{
+ struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
+ struct ethtool_eee_get_rsp *rsp;
+ struct nlmsghdr *nlh;
+ int err;
+
+ nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_EEE_GET, 1);
+ ys->req_policy = &ethtool_eee_nest;
+ yrs.yarg.rsp_policy = &ethtool_eee_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_EEE_HEADER, &req->header);
+
+ rsp = calloc(1, sizeof(*rsp));
+ yrs.yarg.data = rsp;
+ yrs.cb = ethtool_eee_get_rsp_parse;
+ yrs.rsp_cmd = 24;
+
+ err = ynl_exec(ys, nlh, &yrs);
+ if (err < 0)
+ goto err_free;
+
+ return rsp;
+
+err_free:
+ ethtool_eee_get_rsp_free(rsp);
+ return NULL;
+}
+
+/* ETHTOOL_MSG_EEE_GET - dump */
+void ethtool_eee_get_list_free(struct ethtool_eee_get_list *rsp)
+{
+ struct ethtool_eee_get_list *next = rsp;
+
+ while ((void *)next != YNL_LIST_END) {
+ rsp = next;
+ next = rsp->next;
+
+ ethtool_header_free(&rsp->obj.header);
+ ethtool_bitset_free(&rsp->obj.modes_ours);
+ ethtool_bitset_free(&rsp->obj.modes_peer);
+ free(rsp);
+ }
+}
+
+struct ethtool_eee_get_list *
+ethtool_eee_get_dump(struct ynl_sock *ys, struct ethtool_eee_get_req_dump *req)
+{
+ struct ynl_dump_state yds = {};
+ struct nlmsghdr *nlh;
+ int err;
+
+ yds.ys = ys;
+ yds.alloc_sz = sizeof(struct ethtool_eee_get_list);
+ yds.cb = ethtool_eee_get_rsp_parse;
+ yds.rsp_cmd = 24;
+ yds.rsp_policy = &ethtool_eee_nest;
+
+ nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_EEE_GET, 1);
+ ys->req_policy = &ethtool_eee_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_EEE_HEADER, &req->header);
+
+ err = ynl_exec_dump(ys, nlh, &yds);
+ if (err < 0)
+ goto free_list;
+
+ return yds.first;
+
+free_list:
+ ethtool_eee_get_list_free(yds.first);
+ return NULL;
+}
+
+/* ETHTOOL_MSG_EEE_GET - notify */
+void ethtool_eee_get_ntf_free(struct ethtool_eee_get_ntf *rsp)
+{
+ ethtool_header_free(&rsp->obj.header);
+ ethtool_bitset_free(&rsp->obj.modes_ours);
+ ethtool_bitset_free(&rsp->obj.modes_peer);
+ free(rsp);
+}
+
+/* ============== ETHTOOL_MSG_EEE_SET ============== */
+/* ETHTOOL_MSG_EEE_SET - do */
+void ethtool_eee_set_req_free(struct ethtool_eee_set_req *req)
+{
+ ethtool_header_free(&req->header);
+ ethtool_bitset_free(&req->modes_ours);
+ ethtool_bitset_free(&req->modes_peer);
+ free(req);
+}
+
+int ethtool_eee_set(struct ynl_sock *ys, struct ethtool_eee_set_req *req)
+{
+ struct nlmsghdr *nlh;
+ int err;
+
+ nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_EEE_SET, 1);
+ ys->req_policy = &ethtool_eee_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_EEE_HEADER, &req->header);
+ if (req->_present.modes_ours)
+ ethtool_bitset_put(nlh, ETHTOOL_A_EEE_MODES_OURS, &req->modes_ours);
+ if (req->_present.modes_peer)
+ ethtool_bitset_put(nlh, ETHTOOL_A_EEE_MODES_PEER, &req->modes_peer);
+ if (req->_present.active)
+ mnl_attr_put_u8(nlh, ETHTOOL_A_EEE_ACTIVE, req->active);
+ if (req->_present.enabled)
+ mnl_attr_put_u8(nlh, ETHTOOL_A_EEE_ENABLED, req->enabled);
+ if (req->_present.tx_lpi_enabled)
+ mnl_attr_put_u8(nlh, ETHTOOL_A_EEE_TX_LPI_ENABLED, req->tx_lpi_enabled);
+ if (req->_present.tx_lpi_timer)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_EEE_TX_LPI_TIMER, req->tx_lpi_timer);
+
+ err = ynl_exec(ys, nlh, NULL);
+ if (err < 0)
+ return -1;
+
+ return 0;
+}
+
+/* ============== ETHTOOL_MSG_TSINFO_GET ============== */
+/* ETHTOOL_MSG_TSINFO_GET - do */
+void ethtool_tsinfo_get_req_free(struct ethtool_tsinfo_get_req *req)
+{
+ ethtool_header_free(&req->header);
+ free(req);
+}
+
+void ethtool_tsinfo_get_rsp_free(struct ethtool_tsinfo_get_rsp *rsp)
+{
+ ethtool_header_free(&rsp->header);
+ ethtool_bitset_free(&rsp->timestamping);
+ ethtool_bitset_free(&rsp->tx_types);
+ ethtool_bitset_free(&rsp->rx_filters);
+ free(rsp);
+}
+
+int ethtool_tsinfo_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
+{
+ struct ethtool_tsinfo_get_rsp *dst;
+ struct ynl_parse_arg *yarg = data;
+ const struct nlattr *attr;
+ struct ynl_parse_arg parg;
+
+ dst = yarg->data;
+ parg.ys = yarg->ys;
+
+ mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
+ unsigned int type = mnl_attr_get_type(attr);
+
+ if (type == ETHTOOL_A_TSINFO_HEADER) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.header = 1;
+
+ parg.rsp_policy = &ethtool_header_nest;
+ parg.data = &dst->header;
+ if (ethtool_header_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ } else if (type == ETHTOOL_A_TSINFO_TIMESTAMPING) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.timestamping = 1;
+
+ parg.rsp_policy = &ethtool_bitset_nest;
+ parg.data = &dst->timestamping;
+ if (ethtool_bitset_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ } else if (type == ETHTOOL_A_TSINFO_TX_TYPES) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.tx_types = 1;
+
+ parg.rsp_policy = &ethtool_bitset_nest;
+ parg.data = &dst->tx_types;
+ if (ethtool_bitset_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ } else if (type == ETHTOOL_A_TSINFO_RX_FILTERS) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.rx_filters = 1;
+
+ parg.rsp_policy = &ethtool_bitset_nest;
+ parg.data = &dst->rx_filters;
+ if (ethtool_bitset_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ } else if (type == ETHTOOL_A_TSINFO_PHC_INDEX) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.phc_index = 1;
+ dst->phc_index = mnl_attr_get_u32(attr);
+ }
+ }
+
+ return MNL_CB_OK;
+}
+
+struct ethtool_tsinfo_get_rsp *
+ethtool_tsinfo_get(struct ynl_sock *ys, struct ethtool_tsinfo_get_req *req)
+{
+ struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
+ struct ethtool_tsinfo_get_rsp *rsp;
+ struct nlmsghdr *nlh;
+ int err;
+
+ nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_TSINFO_GET, 1);
+ ys->req_policy = &ethtool_tsinfo_nest;
+ yrs.yarg.rsp_policy = &ethtool_tsinfo_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_TSINFO_HEADER, &req->header);
+
+ rsp = calloc(1, sizeof(*rsp));
+ yrs.yarg.data = rsp;
+ yrs.cb = ethtool_tsinfo_get_rsp_parse;
+ yrs.rsp_cmd = 26;
+
+ err = ynl_exec(ys, nlh, &yrs);
+ if (err < 0)
+ goto err_free;
+
+ return rsp;
+
+err_free:
+ ethtool_tsinfo_get_rsp_free(rsp);
+ return NULL;
+}
+
+/* ETHTOOL_MSG_TSINFO_GET - dump */
+void ethtool_tsinfo_get_list_free(struct ethtool_tsinfo_get_list *rsp)
+{
+ struct ethtool_tsinfo_get_list *next = rsp;
+
+ while ((void *)next != YNL_LIST_END) {
+ rsp = next;
+ next = rsp->next;
+
+ ethtool_header_free(&rsp->obj.header);
+ ethtool_bitset_free(&rsp->obj.timestamping);
+ ethtool_bitset_free(&rsp->obj.tx_types);
+ ethtool_bitset_free(&rsp->obj.rx_filters);
+ free(rsp);
+ }
+}
+
+struct ethtool_tsinfo_get_list *
+ethtool_tsinfo_get_dump(struct ynl_sock *ys,
+ struct ethtool_tsinfo_get_req_dump *req)
+{
+ struct ynl_dump_state yds = {};
+ struct nlmsghdr *nlh;
+ int err;
+
+ yds.ys = ys;
+ yds.alloc_sz = sizeof(struct ethtool_tsinfo_get_list);
+ yds.cb = ethtool_tsinfo_get_rsp_parse;
+ yds.rsp_cmd = 26;
+ yds.rsp_policy = &ethtool_tsinfo_nest;
+
+ nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_TSINFO_GET, 1);
+ ys->req_policy = &ethtool_tsinfo_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_TSINFO_HEADER, &req->header);
+
+ err = ynl_exec_dump(ys, nlh, &yds);
+ if (err < 0)
+ goto free_list;
+
+ return yds.first;
+
+free_list:
+ ethtool_tsinfo_get_list_free(yds.first);
+ return NULL;
+}
+
+/* ============== ETHTOOL_MSG_CABLE_TEST_ACT ============== */
+/* ETHTOOL_MSG_CABLE_TEST_ACT - do */
+void ethtool_cable_test_act_req_free(struct ethtool_cable_test_act_req *req)
+{
+ ethtool_header_free(&req->header);
+ free(req);
+}
+
+int ethtool_cable_test_act(struct ynl_sock *ys,
+ struct ethtool_cable_test_act_req *req)
+{
+ struct nlmsghdr *nlh;
+ int err;
+
+ nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_CABLE_TEST_ACT, 1);
+ ys->req_policy = &ethtool_cable_test_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_CABLE_TEST_HEADER, &req->header);
+
+ err = ynl_exec(ys, nlh, NULL);
+ if (err < 0)
+ return -1;
+
+ return 0;
+}
+
+/* ============== ETHTOOL_MSG_CABLE_TEST_TDR_ACT ============== */
+/* ETHTOOL_MSG_CABLE_TEST_TDR_ACT - do */
+void
+ethtool_cable_test_tdr_act_req_free(struct ethtool_cable_test_tdr_act_req *req)
+{
+ ethtool_header_free(&req->header);
+ free(req);
+}
+
+int ethtool_cable_test_tdr_act(struct ynl_sock *ys,
+ struct ethtool_cable_test_tdr_act_req *req)
+{
+ struct nlmsghdr *nlh;
+ int err;
+
+ nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_CABLE_TEST_TDR_ACT, 1);
+ ys->req_policy = &ethtool_cable_test_tdr_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_CABLE_TEST_TDR_HEADER, &req->header);
+
+ err = ynl_exec(ys, nlh, NULL);
+ if (err < 0)
+ return -1;
+
+ return 0;
+}
+
+/* ============== ETHTOOL_MSG_TUNNEL_INFO_GET ============== */
+/* ETHTOOL_MSG_TUNNEL_INFO_GET - do */
+void ethtool_tunnel_info_get_req_free(struct ethtool_tunnel_info_get_req *req)
+{
+ ethtool_header_free(&req->header);
+ free(req);
+}
+
+void ethtool_tunnel_info_get_rsp_free(struct ethtool_tunnel_info_get_rsp *rsp)
+{
+ ethtool_header_free(&rsp->header);
+ ethtool_tunnel_udp_free(&rsp->udp_ports);
+ free(rsp);
+}
+
+int ethtool_tunnel_info_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
+{
+ struct ethtool_tunnel_info_get_rsp *dst;
+ struct ynl_parse_arg *yarg = data;
+ const struct nlattr *attr;
+ struct ynl_parse_arg parg;
+
+ dst = yarg->data;
+ parg.ys = yarg->ys;
+
+ mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
+ unsigned int type = mnl_attr_get_type(attr);
+
+ if (type == ETHTOOL_A_TUNNEL_INFO_HEADER) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.header = 1;
+
+ parg.rsp_policy = &ethtool_header_nest;
+ parg.data = &dst->header;
+ if (ethtool_header_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ } else if (type == ETHTOOL_A_TUNNEL_INFO_UDP_PORTS) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.udp_ports = 1;
+
+ parg.rsp_policy = &ethtool_tunnel_udp_nest;
+ parg.data = &dst->udp_ports;
+ if (ethtool_tunnel_udp_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ }
+ }
+
+ return MNL_CB_OK;
+}
+
+struct ethtool_tunnel_info_get_rsp *
+ethtool_tunnel_info_get(struct ynl_sock *ys,
+ struct ethtool_tunnel_info_get_req *req)
+{
+ struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
+ struct ethtool_tunnel_info_get_rsp *rsp;
+ struct nlmsghdr *nlh;
+ int err;
+
+ nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_TUNNEL_INFO_GET, 1);
+ ys->req_policy = &ethtool_tunnel_info_nest;
+ yrs.yarg.rsp_policy = &ethtool_tunnel_info_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_TUNNEL_INFO_HEADER, &req->header);
+
+ rsp = calloc(1, sizeof(*rsp));
+ yrs.yarg.data = rsp;
+ yrs.cb = ethtool_tunnel_info_get_rsp_parse;
+ yrs.rsp_cmd = 29;
+
+ err = ynl_exec(ys, nlh, &yrs);
+ if (err < 0)
+ goto err_free;
+
+ return rsp;
+
+err_free:
+ ethtool_tunnel_info_get_rsp_free(rsp);
+ return NULL;
+}
+
+/* ETHTOOL_MSG_TUNNEL_INFO_GET - dump */
+void
+ethtool_tunnel_info_get_list_free(struct ethtool_tunnel_info_get_list *rsp)
+{
+ struct ethtool_tunnel_info_get_list *next = rsp;
+
+ while ((void *)next != YNL_LIST_END) {
+ rsp = next;
+ next = rsp->next;
+
+ ethtool_header_free(&rsp->obj.header);
+ ethtool_tunnel_udp_free(&rsp->obj.udp_ports);
+ free(rsp);
+ }
+}
+
+struct ethtool_tunnel_info_get_list *
+ethtool_tunnel_info_get_dump(struct ynl_sock *ys,
+ struct ethtool_tunnel_info_get_req_dump *req)
+{
+ struct ynl_dump_state yds = {};
+ struct nlmsghdr *nlh;
+ int err;
+
+ yds.ys = ys;
+ yds.alloc_sz = sizeof(struct ethtool_tunnel_info_get_list);
+ yds.cb = ethtool_tunnel_info_get_rsp_parse;
+ yds.rsp_cmd = 29;
+ yds.rsp_policy = &ethtool_tunnel_info_nest;
+
+ nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_TUNNEL_INFO_GET, 1);
+ ys->req_policy = &ethtool_tunnel_info_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_TUNNEL_INFO_HEADER, &req->header);
+
+ err = ynl_exec_dump(ys, nlh, &yds);
+ if (err < 0)
+ goto free_list;
+
+ return yds.first;
+
+free_list:
+ ethtool_tunnel_info_get_list_free(yds.first);
+ return NULL;
+}
+
+/* ============== ETHTOOL_MSG_FEC_GET ============== */
+/* ETHTOOL_MSG_FEC_GET - do */
+void ethtool_fec_get_req_free(struct ethtool_fec_get_req *req)
+{
+ ethtool_header_free(&req->header);
+ free(req);
+}
+
+void ethtool_fec_get_rsp_free(struct ethtool_fec_get_rsp *rsp)
+{
+ ethtool_header_free(&rsp->header);
+ ethtool_bitset_free(&rsp->modes);
+ ethtool_fec_stat_free(&rsp->stats);
+ free(rsp);
+}
+
+int ethtool_fec_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
+{
+ struct ynl_parse_arg *yarg = data;
+ struct ethtool_fec_get_rsp *dst;
+ const struct nlattr *attr;
+ struct ynl_parse_arg parg;
+
+ dst = yarg->data;
+ parg.ys = yarg->ys;
+
+ mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
+ unsigned int type = mnl_attr_get_type(attr);
+
+ if (type == ETHTOOL_A_FEC_HEADER) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.header = 1;
+
+ parg.rsp_policy = &ethtool_header_nest;
+ parg.data = &dst->header;
+ if (ethtool_header_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ } else if (type == ETHTOOL_A_FEC_MODES) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.modes = 1;
+
+ parg.rsp_policy = &ethtool_bitset_nest;
+ parg.data = &dst->modes;
+ if (ethtool_bitset_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ } else if (type == ETHTOOL_A_FEC_AUTO) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.auto_ = 1;
+ dst->auto_ = mnl_attr_get_u8(attr);
+ } else if (type == ETHTOOL_A_FEC_ACTIVE) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.active = 1;
+ dst->active = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_FEC_STATS) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.stats = 1;
+
+ parg.rsp_policy = &ethtool_fec_stat_nest;
+ parg.data = &dst->stats;
+ if (ethtool_fec_stat_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ }
+ }
+
+ return MNL_CB_OK;
+}
+
+struct ethtool_fec_get_rsp *
+ethtool_fec_get(struct ynl_sock *ys, struct ethtool_fec_get_req *req)
+{
+ struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
+ struct ethtool_fec_get_rsp *rsp;
+ struct nlmsghdr *nlh;
+ int err;
+
+ nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_FEC_GET, 1);
+ ys->req_policy = &ethtool_fec_nest;
+ yrs.yarg.rsp_policy = &ethtool_fec_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_FEC_HEADER, &req->header);
+
+ rsp = calloc(1, sizeof(*rsp));
+ yrs.yarg.data = rsp;
+ yrs.cb = ethtool_fec_get_rsp_parse;
+ yrs.rsp_cmd = 30;
+
+ err = ynl_exec(ys, nlh, &yrs);
+ if (err < 0)
+ goto err_free;
+
+ return rsp;
+
+err_free:
+ ethtool_fec_get_rsp_free(rsp);
+ return NULL;
+}
+
+/* ETHTOOL_MSG_FEC_GET - dump */
+void ethtool_fec_get_list_free(struct ethtool_fec_get_list *rsp)
+{
+ struct ethtool_fec_get_list *next = rsp;
+
+ while ((void *)next != YNL_LIST_END) {
+ rsp = next;
+ next = rsp->next;
+
+ ethtool_header_free(&rsp->obj.header);
+ ethtool_bitset_free(&rsp->obj.modes);
+ ethtool_fec_stat_free(&rsp->obj.stats);
+ free(rsp);
+ }
+}
+
+struct ethtool_fec_get_list *
+ethtool_fec_get_dump(struct ynl_sock *ys, struct ethtool_fec_get_req_dump *req)
+{
+ struct ynl_dump_state yds = {};
+ struct nlmsghdr *nlh;
+ int err;
+
+ yds.ys = ys;
+ yds.alloc_sz = sizeof(struct ethtool_fec_get_list);
+ yds.cb = ethtool_fec_get_rsp_parse;
+ yds.rsp_cmd = 30;
+ yds.rsp_policy = &ethtool_fec_nest;
+
+ nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_FEC_GET, 1);
+ ys->req_policy = &ethtool_fec_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_FEC_HEADER, &req->header);
+
+ err = ynl_exec_dump(ys, nlh, &yds);
+ if (err < 0)
+ goto free_list;
+
+ return yds.first;
+
+free_list:
+ ethtool_fec_get_list_free(yds.first);
+ return NULL;
+}
+
+/* ETHTOOL_MSG_FEC_GET - notify */
+void ethtool_fec_get_ntf_free(struct ethtool_fec_get_ntf *rsp)
+{
+ ethtool_header_free(&rsp->obj.header);
+ ethtool_bitset_free(&rsp->obj.modes);
+ ethtool_fec_stat_free(&rsp->obj.stats);
+ free(rsp);
+}
+
+/* ============== ETHTOOL_MSG_FEC_SET ============== */
+/* ETHTOOL_MSG_FEC_SET - do */
+void ethtool_fec_set_req_free(struct ethtool_fec_set_req *req)
+{
+ ethtool_header_free(&req->header);
+ ethtool_bitset_free(&req->modes);
+ ethtool_fec_stat_free(&req->stats);
+ free(req);
+}
+
+int ethtool_fec_set(struct ynl_sock *ys, struct ethtool_fec_set_req *req)
+{
+ struct nlmsghdr *nlh;
+ int err;
+
+ nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_FEC_SET, 1);
+ ys->req_policy = &ethtool_fec_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_FEC_HEADER, &req->header);
+ if (req->_present.modes)
+ ethtool_bitset_put(nlh, ETHTOOL_A_FEC_MODES, &req->modes);
+ if (req->_present.auto_)
+ mnl_attr_put_u8(nlh, ETHTOOL_A_FEC_AUTO, req->auto_);
+ if (req->_present.active)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_FEC_ACTIVE, req->active);
+ if (req->_present.stats)
+ ethtool_fec_stat_put(nlh, ETHTOOL_A_FEC_STATS, &req->stats);
+
+ err = ynl_exec(ys, nlh, NULL);
+ if (err < 0)
+ return -1;
+
+ return 0;
+}
+
+/* ============== ETHTOOL_MSG_MODULE_EEPROM_GET ============== */
+/* ETHTOOL_MSG_MODULE_EEPROM_GET - do */
+void
+ethtool_module_eeprom_get_req_free(struct ethtool_module_eeprom_get_req *req)
+{
+ ethtool_header_free(&req->header);
+ free(req);
+}
+
+void
+ethtool_module_eeprom_get_rsp_free(struct ethtool_module_eeprom_get_rsp *rsp)
+{
+ ethtool_header_free(&rsp->header);
+ free(rsp->data);
+ free(rsp);
+}
+
+int ethtool_module_eeprom_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
+{
+ struct ethtool_module_eeprom_get_rsp *dst;
+ struct ynl_parse_arg *yarg = data;
+ const struct nlattr *attr;
+ struct ynl_parse_arg parg;
+
+ dst = yarg->data;
+ parg.ys = yarg->ys;
+
+ mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
+ unsigned int type = mnl_attr_get_type(attr);
+
+ if (type == ETHTOOL_A_MODULE_EEPROM_HEADER) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.header = 1;
+
+ parg.rsp_policy = &ethtool_header_nest;
+ parg.data = &dst->header;
+ if (ethtool_header_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ } else if (type == ETHTOOL_A_MODULE_EEPROM_OFFSET) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.offset = 1;
+ dst->offset = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_MODULE_EEPROM_LENGTH) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.length = 1;
+ dst->length = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_MODULE_EEPROM_PAGE) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.page = 1;
+ dst->page = mnl_attr_get_u8(attr);
+ } else if (type == ETHTOOL_A_MODULE_EEPROM_BANK) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.bank = 1;
+ dst->bank = mnl_attr_get_u8(attr);
+ } else if (type == ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.i2c_address = 1;
+ dst->i2c_address = mnl_attr_get_u8(attr);
+ } else if (type == ETHTOOL_A_MODULE_EEPROM_DATA) {
+ unsigned int len;
+
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+
+ len = mnl_attr_get_payload_len(attr);
+ dst->_present.data_len = len;
+ dst->data = malloc(len);
+ memcpy(dst->data, mnl_attr_get_payload(attr), len);
+ }
+ }
+
+ return MNL_CB_OK;
+}
+
+struct ethtool_module_eeprom_get_rsp *
+ethtool_module_eeprom_get(struct ynl_sock *ys,
+ struct ethtool_module_eeprom_get_req *req)
+{
+ struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
+ struct ethtool_module_eeprom_get_rsp *rsp;
+ struct nlmsghdr *nlh;
+ int err;
+
+ nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_MODULE_EEPROM_GET, 1);
+ ys->req_policy = &ethtool_module_eeprom_nest;
+ yrs.yarg.rsp_policy = &ethtool_module_eeprom_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_MODULE_EEPROM_HEADER, &req->header);
+
+ rsp = calloc(1, sizeof(*rsp));
+ yrs.yarg.data = rsp;
+ yrs.cb = ethtool_module_eeprom_get_rsp_parse;
+ yrs.rsp_cmd = 32;
+
+ err = ynl_exec(ys, nlh, &yrs);
+ if (err < 0)
+ goto err_free;
+
+ return rsp;
+
+err_free:
+ ethtool_module_eeprom_get_rsp_free(rsp);
+ return NULL;
+}
+
+/* ETHTOOL_MSG_MODULE_EEPROM_GET - dump */
+void
+ethtool_module_eeprom_get_list_free(struct ethtool_module_eeprom_get_list *rsp)
+{
+ struct ethtool_module_eeprom_get_list *next = rsp;
+
+ while ((void *)next != YNL_LIST_END) {
+ rsp = next;
+ next = rsp->next;
+
+ ethtool_header_free(&rsp->obj.header);
+ free(rsp->obj.data);
+ free(rsp);
+ }
+}
+
+struct ethtool_module_eeprom_get_list *
+ethtool_module_eeprom_get_dump(struct ynl_sock *ys,
+ struct ethtool_module_eeprom_get_req_dump *req)
+{
+ struct ynl_dump_state yds = {};
+ struct nlmsghdr *nlh;
+ int err;
+
+ yds.ys = ys;
+ yds.alloc_sz = sizeof(struct ethtool_module_eeprom_get_list);
+ yds.cb = ethtool_module_eeprom_get_rsp_parse;
+ yds.rsp_cmd = 32;
+ yds.rsp_policy = &ethtool_module_eeprom_nest;
+
+ nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_MODULE_EEPROM_GET, 1);
+ ys->req_policy = &ethtool_module_eeprom_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_MODULE_EEPROM_HEADER, &req->header);
+
+ err = ynl_exec_dump(ys, nlh, &yds);
+ if (err < 0)
+ goto free_list;
+
+ return yds.first;
+
+free_list:
+ ethtool_module_eeprom_get_list_free(yds.first);
+ return NULL;
+}
+
+/* ============== ETHTOOL_MSG_PHC_VCLOCKS_GET ============== */
+/* ETHTOOL_MSG_PHC_VCLOCKS_GET - do */
+void ethtool_phc_vclocks_get_req_free(struct ethtool_phc_vclocks_get_req *req)
+{
+ ethtool_header_free(&req->header);
+ free(req);
+}
+
+void ethtool_phc_vclocks_get_rsp_free(struct ethtool_phc_vclocks_get_rsp *rsp)
+{
+ ethtool_header_free(&rsp->header);
+ free(rsp);
+}
+
+int ethtool_phc_vclocks_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
+{
+ struct ethtool_phc_vclocks_get_rsp *dst;
+ struct ynl_parse_arg *yarg = data;
+ const struct nlattr *attr;
+ struct ynl_parse_arg parg;
+
+ dst = yarg->data;
+ parg.ys = yarg->ys;
+
+ mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
+ unsigned int type = mnl_attr_get_type(attr);
+
+ if (type == ETHTOOL_A_PHC_VCLOCKS_HEADER) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.header = 1;
+
+ parg.rsp_policy = &ethtool_header_nest;
+ parg.data = &dst->header;
+ if (ethtool_header_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ } else if (type == ETHTOOL_A_PHC_VCLOCKS_NUM) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.num = 1;
+ dst->num = mnl_attr_get_u32(attr);
+ }
+ }
+
+ return MNL_CB_OK;
+}
+
+struct ethtool_phc_vclocks_get_rsp *
+ethtool_phc_vclocks_get(struct ynl_sock *ys,
+ struct ethtool_phc_vclocks_get_req *req)
+{
+ struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
+ struct ethtool_phc_vclocks_get_rsp *rsp;
+ struct nlmsghdr *nlh;
+ int err;
+
+ nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_PHC_VCLOCKS_GET, 1);
+ ys->req_policy = &ethtool_phc_vclocks_nest;
+ yrs.yarg.rsp_policy = &ethtool_phc_vclocks_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_PHC_VCLOCKS_HEADER, &req->header);
+
+ rsp = calloc(1, sizeof(*rsp));
+ yrs.yarg.data = rsp;
+ yrs.cb = ethtool_phc_vclocks_get_rsp_parse;
+ yrs.rsp_cmd = 34;
+
+ err = ynl_exec(ys, nlh, &yrs);
+ if (err < 0)
+ goto err_free;
+
+ return rsp;
+
+err_free:
+ ethtool_phc_vclocks_get_rsp_free(rsp);
+ return NULL;
+}
+
+/* ETHTOOL_MSG_PHC_VCLOCKS_GET - dump */
+void
+ethtool_phc_vclocks_get_list_free(struct ethtool_phc_vclocks_get_list *rsp)
+{
+ struct ethtool_phc_vclocks_get_list *next = rsp;
+
+ while ((void *)next != YNL_LIST_END) {
+ rsp = next;
+ next = rsp->next;
+
+ ethtool_header_free(&rsp->obj.header);
+ free(rsp);
+ }
+}
+
+struct ethtool_phc_vclocks_get_list *
+ethtool_phc_vclocks_get_dump(struct ynl_sock *ys,
+ struct ethtool_phc_vclocks_get_req_dump *req)
+{
+ struct ynl_dump_state yds = {};
+ struct nlmsghdr *nlh;
+ int err;
+
+ yds.ys = ys;
+ yds.alloc_sz = sizeof(struct ethtool_phc_vclocks_get_list);
+ yds.cb = ethtool_phc_vclocks_get_rsp_parse;
+ yds.rsp_cmd = 34;
+ yds.rsp_policy = &ethtool_phc_vclocks_nest;
+
+ nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_PHC_VCLOCKS_GET, 1);
+ ys->req_policy = &ethtool_phc_vclocks_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_PHC_VCLOCKS_HEADER, &req->header);
+
+ err = ynl_exec_dump(ys, nlh, &yds);
+ if (err < 0)
+ goto free_list;
+
+ return yds.first;
+
+free_list:
+ ethtool_phc_vclocks_get_list_free(yds.first);
+ return NULL;
+}
+
+/* ============== ETHTOOL_MSG_MODULE_GET ============== */
+/* ETHTOOL_MSG_MODULE_GET - do */
+void ethtool_module_get_req_free(struct ethtool_module_get_req *req)
+{
+ ethtool_header_free(&req->header);
+ free(req);
+}
+
+void ethtool_module_get_rsp_free(struct ethtool_module_get_rsp *rsp)
+{
+ ethtool_header_free(&rsp->header);
+ free(rsp);
+}
+
+int ethtool_module_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
+{
+ struct ethtool_module_get_rsp *dst;
+ struct ynl_parse_arg *yarg = data;
+ const struct nlattr *attr;
+ struct ynl_parse_arg parg;
+
+ dst = yarg->data;
+ parg.ys = yarg->ys;
+
+ mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
+ unsigned int type = mnl_attr_get_type(attr);
+
+ if (type == ETHTOOL_A_MODULE_HEADER) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.header = 1;
+
+ parg.rsp_policy = &ethtool_header_nest;
+ parg.data = &dst->header;
+ if (ethtool_header_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ } else if (type == ETHTOOL_A_MODULE_POWER_MODE_POLICY) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.power_mode_policy = 1;
+ dst->power_mode_policy = mnl_attr_get_u8(attr);
+ } else if (type == ETHTOOL_A_MODULE_POWER_MODE) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.power_mode = 1;
+ dst->power_mode = mnl_attr_get_u8(attr);
+ }
+ }
+
+ return MNL_CB_OK;
+}
+
+struct ethtool_module_get_rsp *
+ethtool_module_get(struct ynl_sock *ys, struct ethtool_module_get_req *req)
+{
+ struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
+ struct ethtool_module_get_rsp *rsp;
+ struct nlmsghdr *nlh;
+ int err;
+
+ nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_MODULE_GET, 1);
+ ys->req_policy = &ethtool_module_nest;
+ yrs.yarg.rsp_policy = &ethtool_module_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_MODULE_HEADER, &req->header);
+
+ rsp = calloc(1, sizeof(*rsp));
+ yrs.yarg.data = rsp;
+ yrs.cb = ethtool_module_get_rsp_parse;
+ yrs.rsp_cmd = 35;
+
+ err = ynl_exec(ys, nlh, &yrs);
+ if (err < 0)
+ goto err_free;
+
+ return rsp;
+
+err_free:
+ ethtool_module_get_rsp_free(rsp);
+ return NULL;
+}
+
+/* ETHTOOL_MSG_MODULE_GET - dump */
+void ethtool_module_get_list_free(struct ethtool_module_get_list *rsp)
+{
+ struct ethtool_module_get_list *next = rsp;
+
+ while ((void *)next != YNL_LIST_END) {
+ rsp = next;
+ next = rsp->next;
+
+ ethtool_header_free(&rsp->obj.header);
+ free(rsp);
+ }
+}
+
+struct ethtool_module_get_list *
+ethtool_module_get_dump(struct ynl_sock *ys,
+ struct ethtool_module_get_req_dump *req)
+{
+ struct ynl_dump_state yds = {};
+ struct nlmsghdr *nlh;
+ int err;
+
+ yds.ys = ys;
+ yds.alloc_sz = sizeof(struct ethtool_module_get_list);
+ yds.cb = ethtool_module_get_rsp_parse;
+ yds.rsp_cmd = 35;
+ yds.rsp_policy = &ethtool_module_nest;
+
+ nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_MODULE_GET, 1);
+ ys->req_policy = &ethtool_module_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_MODULE_HEADER, &req->header);
+
+ err = ynl_exec_dump(ys, nlh, &yds);
+ if (err < 0)
+ goto free_list;
+
+ return yds.first;
+
+free_list:
+ ethtool_module_get_list_free(yds.first);
+ return NULL;
+}
+
+/* ETHTOOL_MSG_MODULE_GET - notify */
+void ethtool_module_get_ntf_free(struct ethtool_module_get_ntf *rsp)
+{
+ ethtool_header_free(&rsp->obj.header);
+ free(rsp);
+}
+
+/* ============== ETHTOOL_MSG_MODULE_SET ============== */
+/* ETHTOOL_MSG_MODULE_SET - do */
+void ethtool_module_set_req_free(struct ethtool_module_set_req *req)
+{
+ ethtool_header_free(&req->header);
+ free(req);
+}
+
+int ethtool_module_set(struct ynl_sock *ys, struct ethtool_module_set_req *req)
+{
+ struct nlmsghdr *nlh;
+ int err;
+
+ nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_MODULE_SET, 1);
+ ys->req_policy = &ethtool_module_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_MODULE_HEADER, &req->header);
+ if (req->_present.power_mode_policy)
+ mnl_attr_put_u8(nlh, ETHTOOL_A_MODULE_POWER_MODE_POLICY, req->power_mode_policy);
+ if (req->_present.power_mode)
+ mnl_attr_put_u8(nlh, ETHTOOL_A_MODULE_POWER_MODE, req->power_mode);
+
+ err = ynl_exec(ys, nlh, NULL);
+ if (err < 0)
+ return -1;
+
+ return 0;
+}
+
+/* ============== ETHTOOL_MSG_PSE_GET ============== */
+/* ETHTOOL_MSG_PSE_GET - do */
+void ethtool_pse_get_req_free(struct ethtool_pse_get_req *req)
+{
+ ethtool_header_free(&req->header);
+ free(req);
+}
+
+void ethtool_pse_get_rsp_free(struct ethtool_pse_get_rsp *rsp)
+{
+ ethtool_header_free(&rsp->header);
+ free(rsp);
+}
+
+int ethtool_pse_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
+{
+ struct ynl_parse_arg *yarg = data;
+ struct ethtool_pse_get_rsp *dst;
+ const struct nlattr *attr;
+ struct ynl_parse_arg parg;
+
+ dst = yarg->data;
+ parg.ys = yarg->ys;
+
+ mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
+ unsigned int type = mnl_attr_get_type(attr);
+
+ if (type == ETHTOOL_A_PSE_HEADER) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.header = 1;
+
+ parg.rsp_policy = &ethtool_header_nest;
+ parg.data = &dst->header;
+ if (ethtool_header_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ } else if (type == ETHTOOL_A_PODL_PSE_ADMIN_STATE) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.admin_state = 1;
+ dst->admin_state = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_PODL_PSE_ADMIN_CONTROL) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.admin_control = 1;
+ dst->admin_control = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_PODL_PSE_PW_D_STATUS) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.pw_d_status = 1;
+ dst->pw_d_status = mnl_attr_get_u32(attr);
+ }
+ }
+
+ return MNL_CB_OK;
+}
+
+struct ethtool_pse_get_rsp *
+ethtool_pse_get(struct ynl_sock *ys, struct ethtool_pse_get_req *req)
+{
+ struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
+ struct ethtool_pse_get_rsp *rsp;
+ struct nlmsghdr *nlh;
+ int err;
+
+ nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_PSE_GET, 1);
+ ys->req_policy = &ethtool_pse_nest;
+ yrs.yarg.rsp_policy = &ethtool_pse_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_PSE_HEADER, &req->header);
+
+ rsp = calloc(1, sizeof(*rsp));
+ yrs.yarg.data = rsp;
+ yrs.cb = ethtool_pse_get_rsp_parse;
+ yrs.rsp_cmd = 37;
+
+ err = ynl_exec(ys, nlh, &yrs);
+ if (err < 0)
+ goto err_free;
+
+ return rsp;
+
+err_free:
+ ethtool_pse_get_rsp_free(rsp);
+ return NULL;
+}
+
+/* ETHTOOL_MSG_PSE_GET - dump */
+void ethtool_pse_get_list_free(struct ethtool_pse_get_list *rsp)
+{
+ struct ethtool_pse_get_list *next = rsp;
+
+ while ((void *)next != YNL_LIST_END) {
+ rsp = next;
+ next = rsp->next;
+
+ ethtool_header_free(&rsp->obj.header);
+ free(rsp);
+ }
+}
+
+struct ethtool_pse_get_list *
+ethtool_pse_get_dump(struct ynl_sock *ys, struct ethtool_pse_get_req_dump *req)
+{
+ struct ynl_dump_state yds = {};
+ struct nlmsghdr *nlh;
+ int err;
+
+ yds.ys = ys;
+ yds.alloc_sz = sizeof(struct ethtool_pse_get_list);
+ yds.cb = ethtool_pse_get_rsp_parse;
+ yds.rsp_cmd = 37;
+ yds.rsp_policy = &ethtool_pse_nest;
+
+ nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_PSE_GET, 1);
+ ys->req_policy = &ethtool_pse_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_PSE_HEADER, &req->header);
+
+ err = ynl_exec_dump(ys, nlh, &yds);
+ if (err < 0)
+ goto free_list;
+
+ return yds.first;
+
+free_list:
+ ethtool_pse_get_list_free(yds.first);
+ return NULL;
+}
+
+/* ============== ETHTOOL_MSG_PSE_SET ============== */
+/* ETHTOOL_MSG_PSE_SET - do */
+void ethtool_pse_set_req_free(struct ethtool_pse_set_req *req)
+{
+ ethtool_header_free(&req->header);
+ free(req);
+}
+
+int ethtool_pse_set(struct ynl_sock *ys, struct ethtool_pse_set_req *req)
+{
+ struct nlmsghdr *nlh;
+ int err;
+
+ nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_PSE_SET, 1);
+ ys->req_policy = &ethtool_pse_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_PSE_HEADER, &req->header);
+ if (req->_present.admin_state)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_PODL_PSE_ADMIN_STATE, req->admin_state);
+ if (req->_present.admin_control)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_PODL_PSE_ADMIN_CONTROL, req->admin_control);
+ if (req->_present.pw_d_status)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_PODL_PSE_PW_D_STATUS, req->pw_d_status);
+
+ err = ynl_exec(ys, nlh, NULL);
+ if (err < 0)
+ return -1;
+
+ return 0;
+}
+
+/* ============== ETHTOOL_MSG_RSS_GET ============== */
+/* ETHTOOL_MSG_RSS_GET - do */
+void ethtool_rss_get_req_free(struct ethtool_rss_get_req *req)
+{
+ ethtool_header_free(&req->header);
+ free(req);
+}
+
+void ethtool_rss_get_rsp_free(struct ethtool_rss_get_rsp *rsp)
+{
+ ethtool_header_free(&rsp->header);
+ free(rsp->indir);
+ free(rsp->hkey);
+ free(rsp);
+}
+
+int ethtool_rss_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
+{
+ struct ynl_parse_arg *yarg = data;
+ struct ethtool_rss_get_rsp *dst;
+ const struct nlattr *attr;
+ struct ynl_parse_arg parg;
+
+ dst = yarg->data;
+ parg.ys = yarg->ys;
+
+ mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
+ unsigned int type = mnl_attr_get_type(attr);
+
+ if (type == ETHTOOL_A_RSS_HEADER) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.header = 1;
+
+ parg.rsp_policy = &ethtool_header_nest;
+ parg.data = &dst->header;
+ if (ethtool_header_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ } else if (type == ETHTOOL_A_RSS_CONTEXT) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.context = 1;
+ dst->context = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_RSS_HFUNC) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.hfunc = 1;
+ dst->hfunc = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_RSS_INDIR) {
+ unsigned int len;
+
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+
+ len = mnl_attr_get_payload_len(attr);
+ dst->_present.indir_len = len;
+ dst->indir = malloc(len);
+ memcpy(dst->indir, mnl_attr_get_payload(attr), len);
+ } else if (type == ETHTOOL_A_RSS_HKEY) {
+ unsigned int len;
+
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+
+ len = mnl_attr_get_payload_len(attr);
+ dst->_present.hkey_len = len;
+ dst->hkey = malloc(len);
+ memcpy(dst->hkey, mnl_attr_get_payload(attr), len);
+ }
+ }
+
+ return MNL_CB_OK;
+}
+
+struct ethtool_rss_get_rsp *
+ethtool_rss_get(struct ynl_sock *ys, struct ethtool_rss_get_req *req)
+{
+ struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
+ struct ethtool_rss_get_rsp *rsp;
+ struct nlmsghdr *nlh;
+ int err;
+
+ nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_RSS_GET, 1);
+ ys->req_policy = &ethtool_rss_nest;
+ yrs.yarg.rsp_policy = &ethtool_rss_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_RSS_HEADER, &req->header);
+
+ rsp = calloc(1, sizeof(*rsp));
+ yrs.yarg.data = rsp;
+ yrs.cb = ethtool_rss_get_rsp_parse;
+ yrs.rsp_cmd = ETHTOOL_MSG_RSS_GET;
+
+ err = ynl_exec(ys, nlh, &yrs);
+ if (err < 0)
+ goto err_free;
+
+ return rsp;
+
+err_free:
+ ethtool_rss_get_rsp_free(rsp);
+ return NULL;
+}
+
+/* ETHTOOL_MSG_RSS_GET - dump */
+void ethtool_rss_get_list_free(struct ethtool_rss_get_list *rsp)
+{
+ struct ethtool_rss_get_list *next = rsp;
+
+ while ((void *)next != YNL_LIST_END) {
+ rsp = next;
+ next = rsp->next;
+
+ ethtool_header_free(&rsp->obj.header);
+ free(rsp->obj.indir);
+ free(rsp->obj.hkey);
+ free(rsp);
+ }
+}
+
+struct ethtool_rss_get_list *
+ethtool_rss_get_dump(struct ynl_sock *ys, struct ethtool_rss_get_req_dump *req)
+{
+ struct ynl_dump_state yds = {};
+ struct nlmsghdr *nlh;
+ int err;
+
+ yds.ys = ys;
+ yds.alloc_sz = sizeof(struct ethtool_rss_get_list);
+ yds.cb = ethtool_rss_get_rsp_parse;
+ yds.rsp_cmd = ETHTOOL_MSG_RSS_GET;
+ yds.rsp_policy = &ethtool_rss_nest;
+
+ nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_RSS_GET, 1);
+ ys->req_policy = &ethtool_rss_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_RSS_HEADER, &req->header);
+
+ err = ynl_exec_dump(ys, nlh, &yds);
+ if (err < 0)
+ goto free_list;
+
+ return yds.first;
+
+free_list:
+ ethtool_rss_get_list_free(yds.first);
+ return NULL;
+}
+
+/* ============== ETHTOOL_MSG_PLCA_GET_CFG ============== */
+/* ETHTOOL_MSG_PLCA_GET_CFG - do */
+void ethtool_plca_get_cfg_req_free(struct ethtool_plca_get_cfg_req *req)
+{
+ ethtool_header_free(&req->header);
+ free(req);
+}
+
+void ethtool_plca_get_cfg_rsp_free(struct ethtool_plca_get_cfg_rsp *rsp)
+{
+ ethtool_header_free(&rsp->header);
+ free(rsp);
+}
+
+int ethtool_plca_get_cfg_rsp_parse(const struct nlmsghdr *nlh, void *data)
+{
+ struct ethtool_plca_get_cfg_rsp *dst;
+ struct ynl_parse_arg *yarg = data;
+ const struct nlattr *attr;
+ struct ynl_parse_arg parg;
+
+ dst = yarg->data;
+ parg.ys = yarg->ys;
+
+ mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
+ unsigned int type = mnl_attr_get_type(attr);
+
+ if (type == ETHTOOL_A_PLCA_HEADER) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.header = 1;
+
+ parg.rsp_policy = &ethtool_header_nest;
+ parg.data = &dst->header;
+ if (ethtool_header_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ } else if (type == ETHTOOL_A_PLCA_VERSION) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.version = 1;
+ dst->version = mnl_attr_get_u16(attr);
+ } else if (type == ETHTOOL_A_PLCA_ENABLED) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.enabled = 1;
+ dst->enabled = mnl_attr_get_u8(attr);
+ } else if (type == ETHTOOL_A_PLCA_STATUS) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.status = 1;
+ dst->status = mnl_attr_get_u8(attr);
+ } else if (type == ETHTOOL_A_PLCA_NODE_CNT) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.node_cnt = 1;
+ dst->node_cnt = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_PLCA_NODE_ID) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.node_id = 1;
+ dst->node_id = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_PLCA_TO_TMR) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.to_tmr = 1;
+ dst->to_tmr = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_PLCA_BURST_CNT) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.burst_cnt = 1;
+ dst->burst_cnt = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_PLCA_BURST_TMR) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.burst_tmr = 1;
+ dst->burst_tmr = mnl_attr_get_u32(attr);
+ }
+ }
+
+ return MNL_CB_OK;
+}
+
+struct ethtool_plca_get_cfg_rsp *
+ethtool_plca_get_cfg(struct ynl_sock *ys, struct ethtool_plca_get_cfg_req *req)
+{
+ struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
+ struct ethtool_plca_get_cfg_rsp *rsp;
+ struct nlmsghdr *nlh;
+ int err;
+
+ nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_PLCA_GET_CFG, 1);
+ ys->req_policy = &ethtool_plca_nest;
+ yrs.yarg.rsp_policy = &ethtool_plca_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_PLCA_HEADER, &req->header);
+
+ rsp = calloc(1, sizeof(*rsp));
+ yrs.yarg.data = rsp;
+ yrs.cb = ethtool_plca_get_cfg_rsp_parse;
+ yrs.rsp_cmd = ETHTOOL_MSG_PLCA_GET_CFG;
+
+ err = ynl_exec(ys, nlh, &yrs);
+ if (err < 0)
+ goto err_free;
+
+ return rsp;
+
+err_free:
+ ethtool_plca_get_cfg_rsp_free(rsp);
+ return NULL;
+}
+
+/* ETHTOOL_MSG_PLCA_GET_CFG - dump */
+void ethtool_plca_get_cfg_list_free(struct ethtool_plca_get_cfg_list *rsp)
+{
+ struct ethtool_plca_get_cfg_list *next = rsp;
+
+ while ((void *)next != YNL_LIST_END) {
+ rsp = next;
+ next = rsp->next;
+
+ ethtool_header_free(&rsp->obj.header);
+ free(rsp);
+ }
+}
+
+struct ethtool_plca_get_cfg_list *
+ethtool_plca_get_cfg_dump(struct ynl_sock *ys,
+ struct ethtool_plca_get_cfg_req_dump *req)
+{
+ struct ynl_dump_state yds = {};
+ struct nlmsghdr *nlh;
+ int err;
+
+ yds.ys = ys;
+ yds.alloc_sz = sizeof(struct ethtool_plca_get_cfg_list);
+ yds.cb = ethtool_plca_get_cfg_rsp_parse;
+ yds.rsp_cmd = ETHTOOL_MSG_PLCA_GET_CFG;
+ yds.rsp_policy = &ethtool_plca_nest;
+
+ nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_PLCA_GET_CFG, 1);
+ ys->req_policy = &ethtool_plca_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_PLCA_HEADER, &req->header);
+
+ err = ynl_exec_dump(ys, nlh, &yds);
+ if (err < 0)
+ goto free_list;
+
+ return yds.first;
+
+free_list:
+ ethtool_plca_get_cfg_list_free(yds.first);
+ return NULL;
+}
+
+/* ETHTOOL_MSG_PLCA_GET_CFG - notify */
+void ethtool_plca_get_cfg_ntf_free(struct ethtool_plca_get_cfg_ntf *rsp)
+{
+ ethtool_header_free(&rsp->obj.header);
+ free(rsp);
+}
+
+/* ============== ETHTOOL_MSG_PLCA_SET_CFG ============== */
+/* ETHTOOL_MSG_PLCA_SET_CFG - do */
+void ethtool_plca_set_cfg_req_free(struct ethtool_plca_set_cfg_req *req)
+{
+ ethtool_header_free(&req->header);
+ free(req);
+}
+
+int ethtool_plca_set_cfg(struct ynl_sock *ys,
+ struct ethtool_plca_set_cfg_req *req)
+{
+ struct nlmsghdr *nlh;
+ int err;
+
+ nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_PLCA_SET_CFG, 1);
+ ys->req_policy = &ethtool_plca_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_PLCA_HEADER, &req->header);
+ if (req->_present.version)
+ mnl_attr_put_u16(nlh, ETHTOOL_A_PLCA_VERSION, req->version);
+ if (req->_present.enabled)
+ mnl_attr_put_u8(nlh, ETHTOOL_A_PLCA_ENABLED, req->enabled);
+ if (req->_present.status)
+ mnl_attr_put_u8(nlh, ETHTOOL_A_PLCA_STATUS, req->status);
+ if (req->_present.node_cnt)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_PLCA_NODE_CNT, req->node_cnt);
+ if (req->_present.node_id)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_PLCA_NODE_ID, req->node_id);
+ if (req->_present.to_tmr)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_PLCA_TO_TMR, req->to_tmr);
+ if (req->_present.burst_cnt)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_PLCA_BURST_CNT, req->burst_cnt);
+ if (req->_present.burst_tmr)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_PLCA_BURST_TMR, req->burst_tmr);
+
+ err = ynl_exec(ys, nlh, NULL);
+ if (err < 0)
+ return -1;
+
+ return 0;
+}
+
+/* ============== ETHTOOL_MSG_PLCA_GET_STATUS ============== */
+/* ETHTOOL_MSG_PLCA_GET_STATUS - do */
+void ethtool_plca_get_status_req_free(struct ethtool_plca_get_status_req *req)
+{
+ ethtool_header_free(&req->header);
+ free(req);
+}
+
+void ethtool_plca_get_status_rsp_free(struct ethtool_plca_get_status_rsp *rsp)
+{
+ ethtool_header_free(&rsp->header);
+ free(rsp);
+}
+
+int ethtool_plca_get_status_rsp_parse(const struct nlmsghdr *nlh, void *data)
+{
+ struct ethtool_plca_get_status_rsp *dst;
+ struct ynl_parse_arg *yarg = data;
+ const struct nlattr *attr;
+ struct ynl_parse_arg parg;
+
+ dst = yarg->data;
+ parg.ys = yarg->ys;
+
+ mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
+ unsigned int type = mnl_attr_get_type(attr);
+
+ if (type == ETHTOOL_A_PLCA_HEADER) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.header = 1;
+
+ parg.rsp_policy = &ethtool_header_nest;
+ parg.data = &dst->header;
+ if (ethtool_header_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ } else if (type == ETHTOOL_A_PLCA_VERSION) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.version = 1;
+ dst->version = mnl_attr_get_u16(attr);
+ } else if (type == ETHTOOL_A_PLCA_ENABLED) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.enabled = 1;
+ dst->enabled = mnl_attr_get_u8(attr);
+ } else if (type == ETHTOOL_A_PLCA_STATUS) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.status = 1;
+ dst->status = mnl_attr_get_u8(attr);
+ } else if (type == ETHTOOL_A_PLCA_NODE_CNT) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.node_cnt = 1;
+ dst->node_cnt = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_PLCA_NODE_ID) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.node_id = 1;
+ dst->node_id = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_PLCA_TO_TMR) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.to_tmr = 1;
+ dst->to_tmr = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_PLCA_BURST_CNT) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.burst_cnt = 1;
+ dst->burst_cnt = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_PLCA_BURST_TMR) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.burst_tmr = 1;
+ dst->burst_tmr = mnl_attr_get_u32(attr);
+ }
+ }
+
+ return MNL_CB_OK;
+}
+
+struct ethtool_plca_get_status_rsp *
+ethtool_plca_get_status(struct ynl_sock *ys,
+ struct ethtool_plca_get_status_req *req)
+{
+ struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
+ struct ethtool_plca_get_status_rsp *rsp;
+ struct nlmsghdr *nlh;
+ int err;
+
+ nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_PLCA_GET_STATUS, 1);
+ ys->req_policy = &ethtool_plca_nest;
+ yrs.yarg.rsp_policy = &ethtool_plca_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_PLCA_HEADER, &req->header);
+
+ rsp = calloc(1, sizeof(*rsp));
+ yrs.yarg.data = rsp;
+ yrs.cb = ethtool_plca_get_status_rsp_parse;
+ yrs.rsp_cmd = 40;
+
+ err = ynl_exec(ys, nlh, &yrs);
+ if (err < 0)
+ goto err_free;
+
+ return rsp;
+
+err_free:
+ ethtool_plca_get_status_rsp_free(rsp);
+ return NULL;
+}
+
+/* ETHTOOL_MSG_PLCA_GET_STATUS - dump */
+void
+ethtool_plca_get_status_list_free(struct ethtool_plca_get_status_list *rsp)
+{
+ struct ethtool_plca_get_status_list *next = rsp;
+
+ while ((void *)next != YNL_LIST_END) {
+ rsp = next;
+ next = rsp->next;
+
+ ethtool_header_free(&rsp->obj.header);
+ free(rsp);
+ }
+}
+
+struct ethtool_plca_get_status_list *
+ethtool_plca_get_status_dump(struct ynl_sock *ys,
+ struct ethtool_plca_get_status_req_dump *req)
+{
+ struct ynl_dump_state yds = {};
+ struct nlmsghdr *nlh;
+ int err;
+
+ yds.ys = ys;
+ yds.alloc_sz = sizeof(struct ethtool_plca_get_status_list);
+ yds.cb = ethtool_plca_get_status_rsp_parse;
+ yds.rsp_cmd = 40;
+ yds.rsp_policy = &ethtool_plca_nest;
+
+ nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_PLCA_GET_STATUS, 1);
+ ys->req_policy = &ethtool_plca_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_PLCA_HEADER, &req->header);
+
+ err = ynl_exec_dump(ys, nlh, &yds);
+ if (err < 0)
+ goto free_list;
+
+ return yds.first;
+
+free_list:
+ ethtool_plca_get_status_list_free(yds.first);
+ return NULL;
+}
+
+/* ============== ETHTOOL_MSG_MM_GET ============== */
+/* ETHTOOL_MSG_MM_GET - do */
+void ethtool_mm_get_req_free(struct ethtool_mm_get_req *req)
+{
+ ethtool_header_free(&req->header);
+ free(req);
+}
+
+void ethtool_mm_get_rsp_free(struct ethtool_mm_get_rsp *rsp)
+{
+ ethtool_header_free(&rsp->header);
+ ethtool_mm_stat_free(&rsp->stats);
+ free(rsp);
+}
+
+int ethtool_mm_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
+{
+ struct ynl_parse_arg *yarg = data;
+ struct ethtool_mm_get_rsp *dst;
+ const struct nlattr *attr;
+ struct ynl_parse_arg parg;
+
+ dst = yarg->data;
+ parg.ys = yarg->ys;
+
+ mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
+ unsigned int type = mnl_attr_get_type(attr);
+
+ if (type == ETHTOOL_A_MM_HEADER) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.header = 1;
+
+ parg.rsp_policy = &ethtool_header_nest;
+ parg.data = &dst->header;
+ if (ethtool_header_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ } else if (type == ETHTOOL_A_MM_PMAC_ENABLED) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.pmac_enabled = 1;
+ dst->pmac_enabled = mnl_attr_get_u8(attr);
+ } else if (type == ETHTOOL_A_MM_TX_ENABLED) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.tx_enabled = 1;
+ dst->tx_enabled = mnl_attr_get_u8(attr);
+ } else if (type == ETHTOOL_A_MM_TX_ACTIVE) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.tx_active = 1;
+ dst->tx_active = mnl_attr_get_u8(attr);
+ } else if (type == ETHTOOL_A_MM_TX_MIN_FRAG_SIZE) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.tx_min_frag_size = 1;
+ dst->tx_min_frag_size = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_MM_RX_MIN_FRAG_SIZE) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.rx_min_frag_size = 1;
+ dst->rx_min_frag_size = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_MM_VERIFY_ENABLED) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.verify_enabled = 1;
+ dst->verify_enabled = mnl_attr_get_u8(attr);
+ } else if (type == ETHTOOL_A_MM_VERIFY_TIME) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.verify_time = 1;
+ dst->verify_time = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_MM_MAX_VERIFY_TIME) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.max_verify_time = 1;
+ dst->max_verify_time = mnl_attr_get_u32(attr);
+ } else if (type == ETHTOOL_A_MM_STATS) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.stats = 1;
+
+ parg.rsp_policy = &ethtool_mm_stat_nest;
+ parg.data = &dst->stats;
+ if (ethtool_mm_stat_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ }
+ }
+
+ return MNL_CB_OK;
+}
+
+struct ethtool_mm_get_rsp *
+ethtool_mm_get(struct ynl_sock *ys, struct ethtool_mm_get_req *req)
+{
+ struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
+ struct ethtool_mm_get_rsp *rsp;
+ struct nlmsghdr *nlh;
+ int err;
+
+ nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_MM_GET, 1);
+ ys->req_policy = &ethtool_mm_nest;
+ yrs.yarg.rsp_policy = &ethtool_mm_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_MM_HEADER, &req->header);
+
+ rsp = calloc(1, sizeof(*rsp));
+ yrs.yarg.data = rsp;
+ yrs.cb = ethtool_mm_get_rsp_parse;
+ yrs.rsp_cmd = ETHTOOL_MSG_MM_GET;
+
+ err = ynl_exec(ys, nlh, &yrs);
+ if (err < 0)
+ goto err_free;
+
+ return rsp;
+
+err_free:
+ ethtool_mm_get_rsp_free(rsp);
+ return NULL;
+}
+
+/* ETHTOOL_MSG_MM_GET - dump */
+void ethtool_mm_get_list_free(struct ethtool_mm_get_list *rsp)
+{
+ struct ethtool_mm_get_list *next = rsp;
+
+ while ((void *)next != YNL_LIST_END) {
+ rsp = next;
+ next = rsp->next;
+
+ ethtool_header_free(&rsp->obj.header);
+ ethtool_mm_stat_free(&rsp->obj.stats);
+ free(rsp);
+ }
+}
+
+struct ethtool_mm_get_list *
+ethtool_mm_get_dump(struct ynl_sock *ys, struct ethtool_mm_get_req_dump *req)
+{
+ struct ynl_dump_state yds = {};
+ struct nlmsghdr *nlh;
+ int err;
+
+ yds.ys = ys;
+ yds.alloc_sz = sizeof(struct ethtool_mm_get_list);
+ yds.cb = ethtool_mm_get_rsp_parse;
+ yds.rsp_cmd = ETHTOOL_MSG_MM_GET;
+ yds.rsp_policy = &ethtool_mm_nest;
+
+ nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_MM_GET, 1);
+ ys->req_policy = &ethtool_mm_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_MM_HEADER, &req->header);
+
+ err = ynl_exec_dump(ys, nlh, &yds);
+ if (err < 0)
+ goto free_list;
+
+ return yds.first;
+
+free_list:
+ ethtool_mm_get_list_free(yds.first);
+ return NULL;
+}
+
+/* ETHTOOL_MSG_MM_GET - notify */
+void ethtool_mm_get_ntf_free(struct ethtool_mm_get_ntf *rsp)
+{
+ ethtool_header_free(&rsp->obj.header);
+ ethtool_mm_stat_free(&rsp->obj.stats);
+ free(rsp);
+}
+
+/* ============== ETHTOOL_MSG_MM_SET ============== */
+/* ETHTOOL_MSG_MM_SET - do */
+void ethtool_mm_set_req_free(struct ethtool_mm_set_req *req)
+{
+ ethtool_header_free(&req->header);
+ free(req);
+}
+
+int ethtool_mm_set(struct ynl_sock *ys, struct ethtool_mm_set_req *req)
+{
+ struct nlmsghdr *nlh;
+ int err;
+
+ nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_MM_SET, 1);
+ ys->req_policy = &ethtool_mm_nest;
+
+ if (req->_present.header)
+ ethtool_header_put(nlh, ETHTOOL_A_MM_HEADER, &req->header);
+ if (req->_present.verify_enabled)
+ mnl_attr_put_u8(nlh, ETHTOOL_A_MM_VERIFY_ENABLED, req->verify_enabled);
+ if (req->_present.verify_time)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_MM_VERIFY_TIME, req->verify_time);
+ if (req->_present.tx_enabled)
+ mnl_attr_put_u8(nlh, ETHTOOL_A_MM_TX_ENABLED, req->tx_enabled);
+ if (req->_present.pmac_enabled)
+ mnl_attr_put_u8(nlh, ETHTOOL_A_MM_PMAC_ENABLED, req->pmac_enabled);
+ if (req->_present.tx_min_frag_size)
+ mnl_attr_put_u32(nlh, ETHTOOL_A_MM_TX_MIN_FRAG_SIZE, req->tx_min_frag_size);
+
+ err = ynl_exec(ys, nlh, NULL);
+ if (err < 0)
+ return -1;
+
+ return 0;
+}
+
+/* ETHTOOL_MSG_CABLE_TEST_NTF - event */
+int ethtool_cable_test_ntf_rsp_parse(const struct nlmsghdr *nlh, void *data)
+{
+ struct ethtool_cable_test_ntf_rsp *dst;
+ struct ynl_parse_arg *yarg = data;
+ const struct nlattr *attr;
+ struct ynl_parse_arg parg;
+
+ dst = yarg->data;
+ parg.ys = yarg->ys;
+
+ mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
+ unsigned int type = mnl_attr_get_type(attr);
+
+ if (type == ETHTOOL_A_CABLE_TEST_NTF_HEADER) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.header = 1;
+
+ parg.rsp_policy = &ethtool_header_nest;
+ parg.data = &dst->header;
+ if (ethtool_header_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ } else if (type == ETHTOOL_A_CABLE_TEST_NTF_STATUS) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.status = 1;
+ dst->status = mnl_attr_get_u8(attr);
+ }
+ }
+
+ return MNL_CB_OK;
+}
+
+void ethtool_cable_test_ntf_free(struct ethtool_cable_test_ntf *rsp)
+{
+ ethtool_header_free(&rsp->obj.header);
+ free(rsp);
+}
+
+/* ETHTOOL_MSG_CABLE_TEST_TDR_NTF - event */
+int ethtool_cable_test_tdr_ntf_rsp_parse(const struct nlmsghdr *nlh,
+ void *data)
+{
+ struct ethtool_cable_test_tdr_ntf_rsp *dst;
+ struct ynl_parse_arg *yarg = data;
+ const struct nlattr *attr;
+ struct ynl_parse_arg parg;
+
+ dst = yarg->data;
+ parg.ys = yarg->ys;
+
+ mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
+ unsigned int type = mnl_attr_get_type(attr);
+
+ if (type == ETHTOOL_A_CABLE_TEST_TDR_NTF_HEADER) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.header = 1;
+
+ parg.rsp_policy = &ethtool_header_nest;
+ parg.data = &dst->header;
+ if (ethtool_header_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ } else if (type == ETHTOOL_A_CABLE_TEST_TDR_NTF_STATUS) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.status = 1;
+ dst->status = mnl_attr_get_u8(attr);
+ } else if (type == ETHTOOL_A_CABLE_TEST_TDR_NTF_NEST) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.nest = 1;
+
+ parg.rsp_policy = &ethtool_cable_nest_nest;
+ parg.data = &dst->nest;
+ if (ethtool_cable_nest_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ }
+ }
+
+ return MNL_CB_OK;
+}
+
+void ethtool_cable_test_tdr_ntf_free(struct ethtool_cable_test_tdr_ntf *rsp)
+{
+ ethtool_header_free(&rsp->obj.header);
+ ethtool_cable_nest_free(&rsp->obj.nest);
+ free(rsp);
+}
+
+static const struct ynl_ntf_info ethtool_ntf_info[] = {
+ [ETHTOOL_MSG_LINKINFO_NTF] = {
+ .alloc_sz = sizeof(struct ethtool_linkinfo_get_ntf),
+ .cb = ethtool_linkinfo_get_rsp_parse,
+ .policy = &ethtool_linkinfo_nest,
+ .free = (void *)ethtool_linkinfo_get_ntf_free,
+ },
+ [ETHTOOL_MSG_LINKMODES_NTF] = {
+ .alloc_sz = sizeof(struct ethtool_linkmodes_get_ntf),
+ .cb = ethtool_linkmodes_get_rsp_parse,
+ .policy = &ethtool_linkmodes_nest,
+ .free = (void *)ethtool_linkmodes_get_ntf_free,
+ },
+ [ETHTOOL_MSG_DEBUG_NTF] = {
+ .alloc_sz = sizeof(struct ethtool_debug_get_ntf),
+ .cb = ethtool_debug_get_rsp_parse,
+ .policy = &ethtool_debug_nest,
+ .free = (void *)ethtool_debug_get_ntf_free,
+ },
+ [ETHTOOL_MSG_WOL_NTF] = {
+ .alloc_sz = sizeof(struct ethtool_wol_get_ntf),
+ .cb = ethtool_wol_get_rsp_parse,
+ .policy = &ethtool_wol_nest,
+ .free = (void *)ethtool_wol_get_ntf_free,
+ },
+ [ETHTOOL_MSG_FEATURES_NTF] = {
+ .alloc_sz = sizeof(struct ethtool_features_get_ntf),
+ .cb = ethtool_features_get_rsp_parse,
+ .policy = &ethtool_features_nest,
+ .free = (void *)ethtool_features_get_ntf_free,
+ },
+ [ETHTOOL_MSG_PRIVFLAGS_NTF] = {
+ .alloc_sz = sizeof(struct ethtool_privflags_get_ntf),
+ .cb = ethtool_privflags_get_rsp_parse,
+ .policy = &ethtool_privflags_nest,
+ .free = (void *)ethtool_privflags_get_ntf_free,
+ },
+ [ETHTOOL_MSG_RINGS_NTF] = {
+ .alloc_sz = sizeof(struct ethtool_rings_get_ntf),
+ .cb = ethtool_rings_get_rsp_parse,
+ .policy = &ethtool_rings_nest,
+ .free = (void *)ethtool_rings_get_ntf_free,
+ },
+ [ETHTOOL_MSG_CHANNELS_NTF] = {
+ .alloc_sz = sizeof(struct ethtool_channels_get_ntf),
+ .cb = ethtool_channels_get_rsp_parse,
+ .policy = &ethtool_channels_nest,
+ .free = (void *)ethtool_channels_get_ntf_free,
+ },
+ [ETHTOOL_MSG_COALESCE_NTF] = {
+ .alloc_sz = sizeof(struct ethtool_coalesce_get_ntf),
+ .cb = ethtool_coalesce_get_rsp_parse,
+ .policy = &ethtool_coalesce_nest,
+ .free = (void *)ethtool_coalesce_get_ntf_free,
+ },
+ [ETHTOOL_MSG_PAUSE_NTF] = {
+ .alloc_sz = sizeof(struct ethtool_pause_get_ntf),
+ .cb = ethtool_pause_get_rsp_parse,
+ .policy = &ethtool_pause_nest,
+ .free = (void *)ethtool_pause_get_ntf_free,
+ },
+ [ETHTOOL_MSG_EEE_NTF] = {
+ .alloc_sz = sizeof(struct ethtool_eee_get_ntf),
+ .cb = ethtool_eee_get_rsp_parse,
+ .policy = &ethtool_eee_nest,
+ .free = (void *)ethtool_eee_get_ntf_free,
+ },
+ [ETHTOOL_MSG_CABLE_TEST_NTF] = {
+ .alloc_sz = sizeof(struct ethtool_cable_test_ntf),
+ .cb = ethtool_cable_test_ntf_rsp_parse,
+ .policy = &ethtool_cable_test_ntf_nest,
+ .free = (void *)ethtool_cable_test_ntf_free,
+ },
+ [ETHTOOL_MSG_CABLE_TEST_TDR_NTF] = {
+ .alloc_sz = sizeof(struct ethtool_cable_test_tdr_ntf),
+ .cb = ethtool_cable_test_tdr_ntf_rsp_parse,
+ .policy = &ethtool_cable_test_tdr_ntf_nest,
+ .free = (void *)ethtool_cable_test_tdr_ntf_free,
+ },
+ [ETHTOOL_MSG_FEC_NTF] = {
+ .alloc_sz = sizeof(struct ethtool_fec_get_ntf),
+ .cb = ethtool_fec_get_rsp_parse,
+ .policy = &ethtool_fec_nest,
+ .free = (void *)ethtool_fec_get_ntf_free,
+ },
+ [ETHTOOL_MSG_MODULE_NTF] = {
+ .alloc_sz = sizeof(struct ethtool_module_get_ntf),
+ .cb = ethtool_module_get_rsp_parse,
+ .policy = &ethtool_module_nest,
+ .free = (void *)ethtool_module_get_ntf_free,
+ },
+ [ETHTOOL_MSG_PLCA_NTF] = {
+ .alloc_sz = sizeof(struct ethtool_plca_get_cfg_ntf),
+ .cb = ethtool_plca_get_cfg_rsp_parse,
+ .policy = &ethtool_plca_nest,
+ .free = (void *)ethtool_plca_get_cfg_ntf_free,
+ },
+ [ETHTOOL_MSG_MM_NTF] = {
+ .alloc_sz = sizeof(struct ethtool_mm_get_ntf),
+ .cb = ethtool_mm_get_rsp_parse,
+ .policy = &ethtool_mm_nest,
+ .free = (void *)ethtool_mm_get_ntf_free,
+ },
+};
+
+const struct ynl_family ynl_ethtool_family = {
+ .name = "ethtool",
+ .ntf_info = ethtool_ntf_info,
+ .ntf_info_size = MNL_ARRAY_SIZE(ethtool_ntf_info),
+};
diff --git a/tools/net/ynl/generated/ethtool-user.h b/tools/net/ynl/generated/ethtool-user.h
new file mode 100644
index 000000000000..d7d4ba855f43
--- /dev/null
+++ b/tools/net/ynl/generated/ethtool-user.h
@@ -0,0 +1,5531 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* Do not edit directly, auto-generated from: */
+/* Documentation/netlink/specs/ethtool.yaml */
+/* YNL-GEN user header */
+/* YNL-ARG --user-header linux/ethtool_netlink.h --exclude-op stats-get */
+
+#ifndef _LINUX_ETHTOOL_GEN_H
+#define _LINUX_ETHTOOL_GEN_H
+
+#include <stdlib.h>
+#include <string.h>
+#include <linux/types.h>
+#include <linux/ethtool.h>
+
+struct ynl_sock;
+
+extern const struct ynl_family ynl_ethtool_family;
+
+/* Enums */
+const char *ethtool_op_str(int op);
+const char *ethtool_udp_tunnel_type_str(int value);
+const char *ethtool_stringset_str(enum ethtool_stringset value);
+
+/* Common nested types */
+struct ethtool_header {
+ struct {
+ __u32 dev_index:1;
+ __u32 dev_name_len;
+ __u32 flags:1;
+ } _present;
+
+ __u32 dev_index;
+ char *dev_name;
+ __u32 flags;
+};
+
+struct ethtool_pause_stat {
+ struct {
+ __u32 tx_frames:1;
+ __u32 rx_frames:1;
+ } _present;
+
+ __u64 tx_frames;
+ __u64 rx_frames;
+};
+
+struct ethtool_cable_test_tdr_cfg {
+ struct {
+ __u32 first:1;
+ __u32 last:1;
+ __u32 step:1;
+ __u32 pair:1;
+ } _present;
+
+ __u32 first;
+ __u32 last;
+ __u32 step;
+ __u8 pair;
+};
+
+struct ethtool_fec_stat {
+ struct {
+ __u32 corrected_len;
+ __u32 uncorr_len;
+ __u32 corr_bits_len;
+ } _present;
+
+ void *corrected;
+ void *uncorr;
+ void *corr_bits;
+};
+
+struct ethtool_mm_stat {
+ struct {
+ __u32 reassembly_errors:1;
+ __u32 smd_errors:1;
+ __u32 reassembly_ok:1;
+ __u32 rx_frag_count:1;
+ __u32 tx_frag_count:1;
+ __u32 hold_count:1;
+ } _present;
+
+ __u64 reassembly_errors;
+ __u64 smd_errors;
+ __u64 reassembly_ok;
+ __u64 rx_frag_count;
+ __u64 tx_frag_count;
+ __u64 hold_count;
+};
+
+struct ethtool_cable_result {
+ struct {
+ __u32 pair:1;
+ __u32 code:1;
+ } _present;
+
+ __u8 pair;
+ __u8 code;
+};
+
+struct ethtool_cable_fault_length {
+ struct {
+ __u32 pair:1;
+ __u32 cm:1;
+ } _present;
+
+ __u8 pair;
+ __u32 cm;
+};
+
+struct ethtool_bitset_bit {
+ struct {
+ __u32 index:1;
+ __u32 name_len;
+ __u32 value:1;
+ } _present;
+
+ __u32 index;
+ char *name;
+};
+
+struct ethtool_tunnel_udp_entry {
+ struct {
+ __u32 port:1;
+ __u32 type:1;
+ } _present;
+
+ __u16 port /* big-endian */;
+ __u32 type;
+};
+
+struct ethtool_string {
+ struct {
+ __u32 index:1;
+ __u32 value_len;
+ } _present;
+
+ __u32 index;
+ char *value;
+};
+
+struct ethtool_cable_nest {
+ struct {
+ __u32 result:1;
+ __u32 fault_length:1;
+ } _present;
+
+ struct ethtool_cable_result result;
+ struct ethtool_cable_fault_length fault_length;
+};
+
+struct ethtool_bitset_bits {
+ unsigned int n_bit;
+ struct ethtool_bitset_bit *bit;
+};
+
+struct ethtool_strings {
+ unsigned int n_string;
+ struct ethtool_string *string;
+};
+
+struct ethtool_bitset {
+ struct {
+ __u32 nomask:1;
+ __u32 size:1;
+ __u32 bits:1;
+ } _present;
+
+ __u32 size;
+ struct ethtool_bitset_bits bits;
+};
+
+struct ethtool_stringset_ {
+ struct {
+ __u32 id:1;
+ __u32 count:1;
+ } _present;
+
+ __u32 id;
+ __u32 count;
+ unsigned int n_strings;
+ struct ethtool_strings *strings;
+};
+
+struct ethtool_tunnel_udp_table {
+ struct {
+ __u32 size:1;
+ __u32 types:1;
+ } _present;
+
+ __u32 size;
+ struct ethtool_bitset types;
+ unsigned int n_entry;
+ struct ethtool_tunnel_udp_entry *entry;
+};
+
+struct ethtool_stringsets {
+ unsigned int n_stringset;
+ struct ethtool_stringset_ *stringset;
+};
+
+struct ethtool_tunnel_udp {
+ struct {
+ __u32 table:1;
+ } _present;
+
+ struct ethtool_tunnel_udp_table table;
+};
+
+/* ============== ETHTOOL_MSG_STRSET_GET ============== */
+/* ETHTOOL_MSG_STRSET_GET - do */
+struct ethtool_strset_get_req {
+ struct {
+ __u32 header:1;
+ __u32 stringsets:1;
+ __u32 counts_only:1;
+ } _present;
+
+ struct ethtool_header header;
+ struct ethtool_stringsets stringsets;
+};
+
+static inline struct ethtool_strset_get_req *ethtool_strset_get_req_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_strset_get_req));
+}
+void ethtool_strset_get_req_free(struct ethtool_strset_get_req *req);
+
+static inline void
+ethtool_strset_get_req_set_header_dev_index(struct ethtool_strset_get_req *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_strset_get_req_set_header_dev_name(struct ethtool_strset_get_req *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_strset_get_req_set_header_flags(struct ethtool_strset_get_req *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+static inline void
+__ethtool_strset_get_req_set_stringsets_stringset(struct ethtool_strset_get_req *req,
+ struct ethtool_stringset_ *stringset,
+ unsigned int n_stringset)
+{
+ free(req->stringsets.stringset);
+ req->stringsets.stringset = stringset;
+ req->stringsets.n_stringset = n_stringset;
+}
+static inline void
+ethtool_strset_get_req_set_counts_only(struct ethtool_strset_get_req *req)
+{
+ req->_present.counts_only = 1;
+}
+
+struct ethtool_strset_get_rsp {
+ struct {
+ __u32 header:1;
+ __u32 stringsets:1;
+ } _present;
+
+ struct ethtool_header header;
+ struct ethtool_stringsets stringsets;
+};
+
+void ethtool_strset_get_rsp_free(struct ethtool_strset_get_rsp *rsp);
+
+/*
+ * Get string set from the kernel.
+ */
+struct ethtool_strset_get_rsp *
+ethtool_strset_get(struct ynl_sock *ys, struct ethtool_strset_get_req *req);
+
+/* ETHTOOL_MSG_STRSET_GET - dump */
+struct ethtool_strset_get_req_dump {
+ struct {
+ __u32 header:1;
+ __u32 stringsets:1;
+ __u32 counts_only:1;
+ } _present;
+
+ struct ethtool_header header;
+ struct ethtool_stringsets stringsets;
+};
+
+static inline struct ethtool_strset_get_req_dump *
+ethtool_strset_get_req_dump_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_strset_get_req_dump));
+}
+void ethtool_strset_get_req_dump_free(struct ethtool_strset_get_req_dump *req);
+
+static inline void
+ethtool_strset_get_req_dump_set_header_dev_index(struct ethtool_strset_get_req_dump *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_strset_get_req_dump_set_header_dev_name(struct ethtool_strset_get_req_dump *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_strset_get_req_dump_set_header_flags(struct ethtool_strset_get_req_dump *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+static inline void
+__ethtool_strset_get_req_dump_set_stringsets_stringset(struct ethtool_strset_get_req_dump *req,
+ struct ethtool_stringset_ *stringset,
+ unsigned int n_stringset)
+{
+ free(req->stringsets.stringset);
+ req->stringsets.stringset = stringset;
+ req->stringsets.n_stringset = n_stringset;
+}
+static inline void
+ethtool_strset_get_req_dump_set_counts_only(struct ethtool_strset_get_req_dump *req)
+{
+ req->_present.counts_only = 1;
+}
+
+struct ethtool_strset_get_list {
+ struct ethtool_strset_get_list *next;
+ struct ethtool_strset_get_rsp obj __attribute__ ((aligned (8)));
+};
+
+void ethtool_strset_get_list_free(struct ethtool_strset_get_list *rsp);
+
+struct ethtool_strset_get_list *
+ethtool_strset_get_dump(struct ynl_sock *ys,
+ struct ethtool_strset_get_req_dump *req);
+
+/* ============== ETHTOOL_MSG_LINKINFO_GET ============== */
+/* ETHTOOL_MSG_LINKINFO_GET - do */
+struct ethtool_linkinfo_get_req {
+ struct {
+ __u32 header:1;
+ } _present;
+
+ struct ethtool_header header;
+};
+
+static inline struct ethtool_linkinfo_get_req *
+ethtool_linkinfo_get_req_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_linkinfo_get_req));
+}
+void ethtool_linkinfo_get_req_free(struct ethtool_linkinfo_get_req *req);
+
+static inline void
+ethtool_linkinfo_get_req_set_header_dev_index(struct ethtool_linkinfo_get_req *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_linkinfo_get_req_set_header_dev_name(struct ethtool_linkinfo_get_req *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_linkinfo_get_req_set_header_flags(struct ethtool_linkinfo_get_req *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+
+struct ethtool_linkinfo_get_rsp {
+ struct {
+ __u32 header:1;
+ __u32 port:1;
+ __u32 phyaddr:1;
+ __u32 tp_mdix:1;
+ __u32 tp_mdix_ctrl:1;
+ __u32 transceiver:1;
+ } _present;
+
+ struct ethtool_header header;
+ __u8 port;
+ __u8 phyaddr;
+ __u8 tp_mdix;
+ __u8 tp_mdix_ctrl;
+ __u8 transceiver;
+};
+
+void ethtool_linkinfo_get_rsp_free(struct ethtool_linkinfo_get_rsp *rsp);
+
+/*
+ * Get link info.
+ */
+struct ethtool_linkinfo_get_rsp *
+ethtool_linkinfo_get(struct ynl_sock *ys, struct ethtool_linkinfo_get_req *req);
+
+/* ETHTOOL_MSG_LINKINFO_GET - dump */
+struct ethtool_linkinfo_get_req_dump {
+ struct {
+ __u32 header:1;
+ } _present;
+
+ struct ethtool_header header;
+};
+
+static inline struct ethtool_linkinfo_get_req_dump *
+ethtool_linkinfo_get_req_dump_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_linkinfo_get_req_dump));
+}
+void
+ethtool_linkinfo_get_req_dump_free(struct ethtool_linkinfo_get_req_dump *req);
+
+static inline void
+ethtool_linkinfo_get_req_dump_set_header_dev_index(struct ethtool_linkinfo_get_req_dump *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_linkinfo_get_req_dump_set_header_dev_name(struct ethtool_linkinfo_get_req_dump *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_linkinfo_get_req_dump_set_header_flags(struct ethtool_linkinfo_get_req_dump *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+
+struct ethtool_linkinfo_get_list {
+ struct ethtool_linkinfo_get_list *next;
+ struct ethtool_linkinfo_get_rsp obj __attribute__ ((aligned (8)));
+};
+
+void ethtool_linkinfo_get_list_free(struct ethtool_linkinfo_get_list *rsp);
+
+struct ethtool_linkinfo_get_list *
+ethtool_linkinfo_get_dump(struct ynl_sock *ys,
+ struct ethtool_linkinfo_get_req_dump *req);
+
+/* ETHTOOL_MSG_LINKINFO_GET - notify */
+struct ethtool_linkinfo_get_ntf {
+ __u16 family;
+ __u8 cmd;
+ struct ynl_ntf_base_type *next;
+ void (*free)(struct ethtool_linkinfo_get_ntf *ntf);
+ struct ethtool_linkinfo_get_rsp obj __attribute__ ((aligned (8)));
+};
+
+void ethtool_linkinfo_get_ntf_free(struct ethtool_linkinfo_get_ntf *rsp);
+
+/* ============== ETHTOOL_MSG_LINKINFO_SET ============== */
+/* ETHTOOL_MSG_LINKINFO_SET - do */
+struct ethtool_linkinfo_set_req {
+ struct {
+ __u32 header:1;
+ __u32 port:1;
+ __u32 phyaddr:1;
+ __u32 tp_mdix:1;
+ __u32 tp_mdix_ctrl:1;
+ __u32 transceiver:1;
+ } _present;
+
+ struct ethtool_header header;
+ __u8 port;
+ __u8 phyaddr;
+ __u8 tp_mdix;
+ __u8 tp_mdix_ctrl;
+ __u8 transceiver;
+};
+
+static inline struct ethtool_linkinfo_set_req *
+ethtool_linkinfo_set_req_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_linkinfo_set_req));
+}
+void ethtool_linkinfo_set_req_free(struct ethtool_linkinfo_set_req *req);
+
+static inline void
+ethtool_linkinfo_set_req_set_header_dev_index(struct ethtool_linkinfo_set_req *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_linkinfo_set_req_set_header_dev_name(struct ethtool_linkinfo_set_req *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_linkinfo_set_req_set_header_flags(struct ethtool_linkinfo_set_req *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+static inline void
+ethtool_linkinfo_set_req_set_port(struct ethtool_linkinfo_set_req *req,
+ __u8 port)
+{
+ req->_present.port = 1;
+ req->port = port;
+}
+static inline void
+ethtool_linkinfo_set_req_set_phyaddr(struct ethtool_linkinfo_set_req *req,
+ __u8 phyaddr)
+{
+ req->_present.phyaddr = 1;
+ req->phyaddr = phyaddr;
+}
+static inline void
+ethtool_linkinfo_set_req_set_tp_mdix(struct ethtool_linkinfo_set_req *req,
+ __u8 tp_mdix)
+{
+ req->_present.tp_mdix = 1;
+ req->tp_mdix = tp_mdix;
+}
+static inline void
+ethtool_linkinfo_set_req_set_tp_mdix_ctrl(struct ethtool_linkinfo_set_req *req,
+ __u8 tp_mdix_ctrl)
+{
+ req->_present.tp_mdix_ctrl = 1;
+ req->tp_mdix_ctrl = tp_mdix_ctrl;
+}
+static inline void
+ethtool_linkinfo_set_req_set_transceiver(struct ethtool_linkinfo_set_req *req,
+ __u8 transceiver)
+{
+ req->_present.transceiver = 1;
+ req->transceiver = transceiver;
+}
+
+/*
+ * Set link info.
+ */
+int ethtool_linkinfo_set(struct ynl_sock *ys,
+ struct ethtool_linkinfo_set_req *req);
+
+/* ============== ETHTOOL_MSG_LINKMODES_GET ============== */
+/* ETHTOOL_MSG_LINKMODES_GET - do */
+struct ethtool_linkmodes_get_req {
+ struct {
+ __u32 header:1;
+ } _present;
+
+ struct ethtool_header header;
+};
+
+static inline struct ethtool_linkmodes_get_req *
+ethtool_linkmodes_get_req_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_linkmodes_get_req));
+}
+void ethtool_linkmodes_get_req_free(struct ethtool_linkmodes_get_req *req);
+
+static inline void
+ethtool_linkmodes_get_req_set_header_dev_index(struct ethtool_linkmodes_get_req *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_linkmodes_get_req_set_header_dev_name(struct ethtool_linkmodes_get_req *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_linkmodes_get_req_set_header_flags(struct ethtool_linkmodes_get_req *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+
+struct ethtool_linkmodes_get_rsp {
+ struct {
+ __u32 header:1;
+ __u32 autoneg:1;
+ __u32 ours:1;
+ __u32 peer:1;
+ __u32 speed:1;
+ __u32 duplex:1;
+ __u32 master_slave_cfg:1;
+ __u32 master_slave_state:1;
+ __u32 lanes:1;
+ __u32 rate_matching:1;
+ } _present;
+
+ struct ethtool_header header;
+ __u8 autoneg;
+ struct ethtool_bitset ours;
+ struct ethtool_bitset peer;
+ __u32 speed;
+ __u8 duplex;
+ __u8 master_slave_cfg;
+ __u8 master_slave_state;
+ __u32 lanes;
+ __u8 rate_matching;
+};
+
+void ethtool_linkmodes_get_rsp_free(struct ethtool_linkmodes_get_rsp *rsp);
+
+/*
+ * Get link modes.
+ */
+struct ethtool_linkmodes_get_rsp *
+ethtool_linkmodes_get(struct ynl_sock *ys,
+ struct ethtool_linkmodes_get_req *req);
+
+/* ETHTOOL_MSG_LINKMODES_GET - dump */
+struct ethtool_linkmodes_get_req_dump {
+ struct {
+ __u32 header:1;
+ } _present;
+
+ struct ethtool_header header;
+};
+
+static inline struct ethtool_linkmodes_get_req_dump *
+ethtool_linkmodes_get_req_dump_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_linkmodes_get_req_dump));
+}
+void
+ethtool_linkmodes_get_req_dump_free(struct ethtool_linkmodes_get_req_dump *req);
+
+static inline void
+ethtool_linkmodes_get_req_dump_set_header_dev_index(struct ethtool_linkmodes_get_req_dump *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_linkmodes_get_req_dump_set_header_dev_name(struct ethtool_linkmodes_get_req_dump *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_linkmodes_get_req_dump_set_header_flags(struct ethtool_linkmodes_get_req_dump *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+
+struct ethtool_linkmodes_get_list {
+ struct ethtool_linkmodes_get_list *next;
+ struct ethtool_linkmodes_get_rsp obj __attribute__ ((aligned (8)));
+};
+
+void ethtool_linkmodes_get_list_free(struct ethtool_linkmodes_get_list *rsp);
+
+struct ethtool_linkmodes_get_list *
+ethtool_linkmodes_get_dump(struct ynl_sock *ys,
+ struct ethtool_linkmodes_get_req_dump *req);
+
+/* ETHTOOL_MSG_LINKMODES_GET - notify */
+struct ethtool_linkmodes_get_ntf {
+ __u16 family;
+ __u8 cmd;
+ struct ynl_ntf_base_type *next;
+ void (*free)(struct ethtool_linkmodes_get_ntf *ntf);
+ struct ethtool_linkmodes_get_rsp obj __attribute__ ((aligned (8)));
+};
+
+void ethtool_linkmodes_get_ntf_free(struct ethtool_linkmodes_get_ntf *rsp);
+
+/* ============== ETHTOOL_MSG_LINKMODES_SET ============== */
+/* ETHTOOL_MSG_LINKMODES_SET - do */
+struct ethtool_linkmodes_set_req {
+ struct {
+ __u32 header:1;
+ __u32 autoneg:1;
+ __u32 ours:1;
+ __u32 peer:1;
+ __u32 speed:1;
+ __u32 duplex:1;
+ __u32 master_slave_cfg:1;
+ __u32 master_slave_state:1;
+ __u32 lanes:1;
+ __u32 rate_matching:1;
+ } _present;
+
+ struct ethtool_header header;
+ __u8 autoneg;
+ struct ethtool_bitset ours;
+ struct ethtool_bitset peer;
+ __u32 speed;
+ __u8 duplex;
+ __u8 master_slave_cfg;
+ __u8 master_slave_state;
+ __u32 lanes;
+ __u8 rate_matching;
+};
+
+static inline struct ethtool_linkmodes_set_req *
+ethtool_linkmodes_set_req_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_linkmodes_set_req));
+}
+void ethtool_linkmodes_set_req_free(struct ethtool_linkmodes_set_req *req);
+
+static inline void
+ethtool_linkmodes_set_req_set_header_dev_index(struct ethtool_linkmodes_set_req *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_linkmodes_set_req_set_header_dev_name(struct ethtool_linkmodes_set_req *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_linkmodes_set_req_set_header_flags(struct ethtool_linkmodes_set_req *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+static inline void
+ethtool_linkmodes_set_req_set_autoneg(struct ethtool_linkmodes_set_req *req,
+ __u8 autoneg)
+{
+ req->_present.autoneg = 1;
+ req->autoneg = autoneg;
+}
+static inline void
+ethtool_linkmodes_set_req_set_ours_nomask(struct ethtool_linkmodes_set_req *req)
+{
+ req->_present.ours = 1;
+ req->ours._present.nomask = 1;
+}
+static inline void
+ethtool_linkmodes_set_req_set_ours_size(struct ethtool_linkmodes_set_req *req,
+ __u32 size)
+{
+ req->_present.ours = 1;
+ req->ours._present.size = 1;
+ req->ours.size = size;
+}
+static inline void
+__ethtool_linkmodes_set_req_set_ours_bits_bit(struct ethtool_linkmodes_set_req *req,
+ struct ethtool_bitset_bit *bit,
+ unsigned int n_bit)
+{
+ free(req->ours.bits.bit);
+ req->ours.bits.bit = bit;
+ req->ours.bits.n_bit = n_bit;
+}
+static inline void
+ethtool_linkmodes_set_req_set_peer_nomask(struct ethtool_linkmodes_set_req *req)
+{
+ req->_present.peer = 1;
+ req->peer._present.nomask = 1;
+}
+static inline void
+ethtool_linkmodes_set_req_set_peer_size(struct ethtool_linkmodes_set_req *req,
+ __u32 size)
+{
+ req->_present.peer = 1;
+ req->peer._present.size = 1;
+ req->peer.size = size;
+}
+static inline void
+__ethtool_linkmodes_set_req_set_peer_bits_bit(struct ethtool_linkmodes_set_req *req,
+ struct ethtool_bitset_bit *bit,
+ unsigned int n_bit)
+{
+ free(req->peer.bits.bit);
+ req->peer.bits.bit = bit;
+ req->peer.bits.n_bit = n_bit;
+}
+static inline void
+ethtool_linkmodes_set_req_set_speed(struct ethtool_linkmodes_set_req *req,
+ __u32 speed)
+{
+ req->_present.speed = 1;
+ req->speed = speed;
+}
+static inline void
+ethtool_linkmodes_set_req_set_duplex(struct ethtool_linkmodes_set_req *req,
+ __u8 duplex)
+{
+ req->_present.duplex = 1;
+ req->duplex = duplex;
+}
+static inline void
+ethtool_linkmodes_set_req_set_master_slave_cfg(struct ethtool_linkmodes_set_req *req,
+ __u8 master_slave_cfg)
+{
+ req->_present.master_slave_cfg = 1;
+ req->master_slave_cfg = master_slave_cfg;
+}
+static inline void
+ethtool_linkmodes_set_req_set_master_slave_state(struct ethtool_linkmodes_set_req *req,
+ __u8 master_slave_state)
+{
+ req->_present.master_slave_state = 1;
+ req->master_slave_state = master_slave_state;
+}
+static inline void
+ethtool_linkmodes_set_req_set_lanes(struct ethtool_linkmodes_set_req *req,
+ __u32 lanes)
+{
+ req->_present.lanes = 1;
+ req->lanes = lanes;
+}
+static inline void
+ethtool_linkmodes_set_req_set_rate_matching(struct ethtool_linkmodes_set_req *req,
+ __u8 rate_matching)
+{
+ req->_present.rate_matching = 1;
+ req->rate_matching = rate_matching;
+}
+
+/*
+ * Set link modes.
+ */
+int ethtool_linkmodes_set(struct ynl_sock *ys,
+ struct ethtool_linkmodes_set_req *req);
+
+/* ============== ETHTOOL_MSG_LINKSTATE_GET ============== */
+/* ETHTOOL_MSG_LINKSTATE_GET - do */
+struct ethtool_linkstate_get_req {
+ struct {
+ __u32 header:1;
+ } _present;
+
+ struct ethtool_header header;
+};
+
+static inline struct ethtool_linkstate_get_req *
+ethtool_linkstate_get_req_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_linkstate_get_req));
+}
+void ethtool_linkstate_get_req_free(struct ethtool_linkstate_get_req *req);
+
+static inline void
+ethtool_linkstate_get_req_set_header_dev_index(struct ethtool_linkstate_get_req *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_linkstate_get_req_set_header_dev_name(struct ethtool_linkstate_get_req *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_linkstate_get_req_set_header_flags(struct ethtool_linkstate_get_req *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+
+struct ethtool_linkstate_get_rsp {
+ struct {
+ __u32 header:1;
+ __u32 link:1;
+ __u32 sqi:1;
+ __u32 sqi_max:1;
+ __u32 ext_state:1;
+ __u32 ext_substate:1;
+ __u32 ext_down_cnt:1;
+ } _present;
+
+ struct ethtool_header header;
+ __u8 link;
+ __u32 sqi;
+ __u32 sqi_max;
+ __u8 ext_state;
+ __u8 ext_substate;
+ __u32 ext_down_cnt;
+};
+
+void ethtool_linkstate_get_rsp_free(struct ethtool_linkstate_get_rsp *rsp);
+
+/*
+ * Get link state.
+ */
+struct ethtool_linkstate_get_rsp *
+ethtool_linkstate_get(struct ynl_sock *ys,
+ struct ethtool_linkstate_get_req *req);
+
+/* ETHTOOL_MSG_LINKSTATE_GET - dump */
+struct ethtool_linkstate_get_req_dump {
+ struct {
+ __u32 header:1;
+ } _present;
+
+ struct ethtool_header header;
+};
+
+static inline struct ethtool_linkstate_get_req_dump *
+ethtool_linkstate_get_req_dump_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_linkstate_get_req_dump));
+}
+void
+ethtool_linkstate_get_req_dump_free(struct ethtool_linkstate_get_req_dump *req);
+
+static inline void
+ethtool_linkstate_get_req_dump_set_header_dev_index(struct ethtool_linkstate_get_req_dump *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_linkstate_get_req_dump_set_header_dev_name(struct ethtool_linkstate_get_req_dump *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_linkstate_get_req_dump_set_header_flags(struct ethtool_linkstate_get_req_dump *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+
+struct ethtool_linkstate_get_list {
+ struct ethtool_linkstate_get_list *next;
+ struct ethtool_linkstate_get_rsp obj __attribute__ ((aligned (8)));
+};
+
+void ethtool_linkstate_get_list_free(struct ethtool_linkstate_get_list *rsp);
+
+struct ethtool_linkstate_get_list *
+ethtool_linkstate_get_dump(struct ynl_sock *ys,
+ struct ethtool_linkstate_get_req_dump *req);
+
+/* ============== ETHTOOL_MSG_DEBUG_GET ============== */
+/* ETHTOOL_MSG_DEBUG_GET - do */
+struct ethtool_debug_get_req {
+ struct {
+ __u32 header:1;
+ } _present;
+
+ struct ethtool_header header;
+};
+
+static inline struct ethtool_debug_get_req *ethtool_debug_get_req_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_debug_get_req));
+}
+void ethtool_debug_get_req_free(struct ethtool_debug_get_req *req);
+
+static inline void
+ethtool_debug_get_req_set_header_dev_index(struct ethtool_debug_get_req *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_debug_get_req_set_header_dev_name(struct ethtool_debug_get_req *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_debug_get_req_set_header_flags(struct ethtool_debug_get_req *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+
+struct ethtool_debug_get_rsp {
+ struct {
+ __u32 header:1;
+ __u32 msgmask:1;
+ } _present;
+
+ struct ethtool_header header;
+ struct ethtool_bitset msgmask;
+};
+
+void ethtool_debug_get_rsp_free(struct ethtool_debug_get_rsp *rsp);
+
+/*
+ * Get debug message mask.
+ */
+struct ethtool_debug_get_rsp *
+ethtool_debug_get(struct ynl_sock *ys, struct ethtool_debug_get_req *req);
+
+/* ETHTOOL_MSG_DEBUG_GET - dump */
+struct ethtool_debug_get_req_dump {
+ struct {
+ __u32 header:1;
+ } _present;
+
+ struct ethtool_header header;
+};
+
+static inline struct ethtool_debug_get_req_dump *
+ethtool_debug_get_req_dump_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_debug_get_req_dump));
+}
+void ethtool_debug_get_req_dump_free(struct ethtool_debug_get_req_dump *req);
+
+static inline void
+ethtool_debug_get_req_dump_set_header_dev_index(struct ethtool_debug_get_req_dump *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_debug_get_req_dump_set_header_dev_name(struct ethtool_debug_get_req_dump *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_debug_get_req_dump_set_header_flags(struct ethtool_debug_get_req_dump *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+
+struct ethtool_debug_get_list {
+ struct ethtool_debug_get_list *next;
+ struct ethtool_debug_get_rsp obj __attribute__ ((aligned (8)));
+};
+
+void ethtool_debug_get_list_free(struct ethtool_debug_get_list *rsp);
+
+struct ethtool_debug_get_list *
+ethtool_debug_get_dump(struct ynl_sock *ys,
+ struct ethtool_debug_get_req_dump *req);
+
+/* ETHTOOL_MSG_DEBUG_GET - notify */
+struct ethtool_debug_get_ntf {
+ __u16 family;
+ __u8 cmd;
+ struct ynl_ntf_base_type *next;
+ void (*free)(struct ethtool_debug_get_ntf *ntf);
+ struct ethtool_debug_get_rsp obj __attribute__ ((aligned (8)));
+};
+
+void ethtool_debug_get_ntf_free(struct ethtool_debug_get_ntf *rsp);
+
+/* ============== ETHTOOL_MSG_DEBUG_SET ============== */
+/* ETHTOOL_MSG_DEBUG_SET - do */
+struct ethtool_debug_set_req {
+ struct {
+ __u32 header:1;
+ __u32 msgmask:1;
+ } _present;
+
+ struct ethtool_header header;
+ struct ethtool_bitset msgmask;
+};
+
+static inline struct ethtool_debug_set_req *ethtool_debug_set_req_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_debug_set_req));
+}
+void ethtool_debug_set_req_free(struct ethtool_debug_set_req *req);
+
+static inline void
+ethtool_debug_set_req_set_header_dev_index(struct ethtool_debug_set_req *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_debug_set_req_set_header_dev_name(struct ethtool_debug_set_req *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_debug_set_req_set_header_flags(struct ethtool_debug_set_req *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+static inline void
+ethtool_debug_set_req_set_msgmask_nomask(struct ethtool_debug_set_req *req)
+{
+ req->_present.msgmask = 1;
+ req->msgmask._present.nomask = 1;
+}
+static inline void
+ethtool_debug_set_req_set_msgmask_size(struct ethtool_debug_set_req *req,
+ __u32 size)
+{
+ req->_present.msgmask = 1;
+ req->msgmask._present.size = 1;
+ req->msgmask.size = size;
+}
+static inline void
+__ethtool_debug_set_req_set_msgmask_bits_bit(struct ethtool_debug_set_req *req,
+ struct ethtool_bitset_bit *bit,
+ unsigned int n_bit)
+{
+ free(req->msgmask.bits.bit);
+ req->msgmask.bits.bit = bit;
+ req->msgmask.bits.n_bit = n_bit;
+}
+
+/*
+ * Set debug message mask.
+ */
+int ethtool_debug_set(struct ynl_sock *ys, struct ethtool_debug_set_req *req);
+
+/* ============== ETHTOOL_MSG_WOL_GET ============== */
+/* ETHTOOL_MSG_WOL_GET - do */
+struct ethtool_wol_get_req {
+ struct {
+ __u32 header:1;
+ } _present;
+
+ struct ethtool_header header;
+};
+
+static inline struct ethtool_wol_get_req *ethtool_wol_get_req_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_wol_get_req));
+}
+void ethtool_wol_get_req_free(struct ethtool_wol_get_req *req);
+
+static inline void
+ethtool_wol_get_req_set_header_dev_index(struct ethtool_wol_get_req *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_wol_get_req_set_header_dev_name(struct ethtool_wol_get_req *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_wol_get_req_set_header_flags(struct ethtool_wol_get_req *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+
+struct ethtool_wol_get_rsp {
+ struct {
+ __u32 header:1;
+ __u32 modes:1;
+ __u32 sopass_len;
+ } _present;
+
+ struct ethtool_header header;
+ struct ethtool_bitset modes;
+ void *sopass;
+};
+
+void ethtool_wol_get_rsp_free(struct ethtool_wol_get_rsp *rsp);
+
+/*
+ * Get WOL params.
+ */
+struct ethtool_wol_get_rsp *
+ethtool_wol_get(struct ynl_sock *ys, struct ethtool_wol_get_req *req);
+
+/* ETHTOOL_MSG_WOL_GET - dump */
+struct ethtool_wol_get_req_dump {
+ struct {
+ __u32 header:1;
+ } _present;
+
+ struct ethtool_header header;
+};
+
+static inline struct ethtool_wol_get_req_dump *
+ethtool_wol_get_req_dump_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_wol_get_req_dump));
+}
+void ethtool_wol_get_req_dump_free(struct ethtool_wol_get_req_dump *req);
+
+static inline void
+ethtool_wol_get_req_dump_set_header_dev_index(struct ethtool_wol_get_req_dump *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_wol_get_req_dump_set_header_dev_name(struct ethtool_wol_get_req_dump *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_wol_get_req_dump_set_header_flags(struct ethtool_wol_get_req_dump *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+
+struct ethtool_wol_get_list {
+ struct ethtool_wol_get_list *next;
+ struct ethtool_wol_get_rsp obj __attribute__ ((aligned (8)));
+};
+
+void ethtool_wol_get_list_free(struct ethtool_wol_get_list *rsp);
+
+struct ethtool_wol_get_list *
+ethtool_wol_get_dump(struct ynl_sock *ys, struct ethtool_wol_get_req_dump *req);
+
+/* ETHTOOL_MSG_WOL_GET - notify */
+struct ethtool_wol_get_ntf {
+ __u16 family;
+ __u8 cmd;
+ struct ynl_ntf_base_type *next;
+ void (*free)(struct ethtool_wol_get_ntf *ntf);
+ struct ethtool_wol_get_rsp obj __attribute__ ((aligned (8)));
+};
+
+void ethtool_wol_get_ntf_free(struct ethtool_wol_get_ntf *rsp);
+
+/* ============== ETHTOOL_MSG_WOL_SET ============== */
+/* ETHTOOL_MSG_WOL_SET - do */
+struct ethtool_wol_set_req {
+ struct {
+ __u32 header:1;
+ __u32 modes:1;
+ __u32 sopass_len;
+ } _present;
+
+ struct ethtool_header header;
+ struct ethtool_bitset modes;
+ void *sopass;
+};
+
+static inline struct ethtool_wol_set_req *ethtool_wol_set_req_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_wol_set_req));
+}
+void ethtool_wol_set_req_free(struct ethtool_wol_set_req *req);
+
+static inline void
+ethtool_wol_set_req_set_header_dev_index(struct ethtool_wol_set_req *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_wol_set_req_set_header_dev_name(struct ethtool_wol_set_req *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_wol_set_req_set_header_flags(struct ethtool_wol_set_req *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+static inline void
+ethtool_wol_set_req_set_modes_nomask(struct ethtool_wol_set_req *req)
+{
+ req->_present.modes = 1;
+ req->modes._present.nomask = 1;
+}
+static inline void
+ethtool_wol_set_req_set_modes_size(struct ethtool_wol_set_req *req, __u32 size)
+{
+ req->_present.modes = 1;
+ req->modes._present.size = 1;
+ req->modes.size = size;
+}
+static inline void
+__ethtool_wol_set_req_set_modes_bits_bit(struct ethtool_wol_set_req *req,
+ struct ethtool_bitset_bit *bit,
+ unsigned int n_bit)
+{
+ free(req->modes.bits.bit);
+ req->modes.bits.bit = bit;
+ req->modes.bits.n_bit = n_bit;
+}
+static inline void
+ethtool_wol_set_req_set_sopass(struct ethtool_wol_set_req *req,
+ const void *sopass, size_t len)
+{
+ free(req->sopass);
+ req->sopass = malloc(req->_present.sopass_len);
+ memcpy(req->sopass, sopass, req->_present.sopass_len);
+}
+
+/*
+ * Set WOL params.
+ */
+int ethtool_wol_set(struct ynl_sock *ys, struct ethtool_wol_set_req *req);
+
+/* ============== ETHTOOL_MSG_FEATURES_GET ============== */
+/* ETHTOOL_MSG_FEATURES_GET - do */
+struct ethtool_features_get_req {
+ struct {
+ __u32 header:1;
+ } _present;
+
+ struct ethtool_header header;
+};
+
+static inline struct ethtool_features_get_req *
+ethtool_features_get_req_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_features_get_req));
+}
+void ethtool_features_get_req_free(struct ethtool_features_get_req *req);
+
+static inline void
+ethtool_features_get_req_set_header_dev_index(struct ethtool_features_get_req *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_features_get_req_set_header_dev_name(struct ethtool_features_get_req *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_features_get_req_set_header_flags(struct ethtool_features_get_req *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+
+struct ethtool_features_get_rsp {
+ struct {
+ __u32 header:1;
+ __u32 hw:1;
+ __u32 wanted:1;
+ __u32 active:1;
+ __u32 nochange:1;
+ } _present;
+
+ struct ethtool_header header;
+ struct ethtool_bitset hw;
+ struct ethtool_bitset wanted;
+ struct ethtool_bitset active;
+ struct ethtool_bitset nochange;
+};
+
+void ethtool_features_get_rsp_free(struct ethtool_features_get_rsp *rsp);
+
+/*
+ * Get features.
+ */
+struct ethtool_features_get_rsp *
+ethtool_features_get(struct ynl_sock *ys, struct ethtool_features_get_req *req);
+
+/* ETHTOOL_MSG_FEATURES_GET - dump */
+struct ethtool_features_get_req_dump {
+ struct {
+ __u32 header:1;
+ } _present;
+
+ struct ethtool_header header;
+};
+
+static inline struct ethtool_features_get_req_dump *
+ethtool_features_get_req_dump_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_features_get_req_dump));
+}
+void
+ethtool_features_get_req_dump_free(struct ethtool_features_get_req_dump *req);
+
+static inline void
+ethtool_features_get_req_dump_set_header_dev_index(struct ethtool_features_get_req_dump *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_features_get_req_dump_set_header_dev_name(struct ethtool_features_get_req_dump *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_features_get_req_dump_set_header_flags(struct ethtool_features_get_req_dump *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+
+struct ethtool_features_get_list {
+ struct ethtool_features_get_list *next;
+ struct ethtool_features_get_rsp obj __attribute__ ((aligned (8)));
+};
+
+void ethtool_features_get_list_free(struct ethtool_features_get_list *rsp);
+
+struct ethtool_features_get_list *
+ethtool_features_get_dump(struct ynl_sock *ys,
+ struct ethtool_features_get_req_dump *req);
+
+/* ETHTOOL_MSG_FEATURES_GET - notify */
+struct ethtool_features_get_ntf {
+ __u16 family;
+ __u8 cmd;
+ struct ynl_ntf_base_type *next;
+ void (*free)(struct ethtool_features_get_ntf *ntf);
+ struct ethtool_features_get_rsp obj __attribute__ ((aligned (8)));
+};
+
+void ethtool_features_get_ntf_free(struct ethtool_features_get_ntf *rsp);
+
+/* ============== ETHTOOL_MSG_FEATURES_SET ============== */
+/* ETHTOOL_MSG_FEATURES_SET - do */
+struct ethtool_features_set_req {
+ struct {
+ __u32 header:1;
+ __u32 hw:1;
+ __u32 wanted:1;
+ __u32 active:1;
+ __u32 nochange:1;
+ } _present;
+
+ struct ethtool_header header;
+ struct ethtool_bitset hw;
+ struct ethtool_bitset wanted;
+ struct ethtool_bitset active;
+ struct ethtool_bitset nochange;
+};
+
+static inline struct ethtool_features_set_req *
+ethtool_features_set_req_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_features_set_req));
+}
+void ethtool_features_set_req_free(struct ethtool_features_set_req *req);
+
+static inline void
+ethtool_features_set_req_set_header_dev_index(struct ethtool_features_set_req *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_features_set_req_set_header_dev_name(struct ethtool_features_set_req *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_features_set_req_set_header_flags(struct ethtool_features_set_req *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+static inline void
+ethtool_features_set_req_set_hw_nomask(struct ethtool_features_set_req *req)
+{
+ req->_present.hw = 1;
+ req->hw._present.nomask = 1;
+}
+static inline void
+ethtool_features_set_req_set_hw_size(struct ethtool_features_set_req *req,
+ __u32 size)
+{
+ req->_present.hw = 1;
+ req->hw._present.size = 1;
+ req->hw.size = size;
+}
+static inline void
+__ethtool_features_set_req_set_hw_bits_bit(struct ethtool_features_set_req *req,
+ struct ethtool_bitset_bit *bit,
+ unsigned int n_bit)
+{
+ free(req->hw.bits.bit);
+ req->hw.bits.bit = bit;
+ req->hw.bits.n_bit = n_bit;
+}
+static inline void
+ethtool_features_set_req_set_wanted_nomask(struct ethtool_features_set_req *req)
+{
+ req->_present.wanted = 1;
+ req->wanted._present.nomask = 1;
+}
+static inline void
+ethtool_features_set_req_set_wanted_size(struct ethtool_features_set_req *req,
+ __u32 size)
+{
+ req->_present.wanted = 1;
+ req->wanted._present.size = 1;
+ req->wanted.size = size;
+}
+static inline void
+__ethtool_features_set_req_set_wanted_bits_bit(struct ethtool_features_set_req *req,
+ struct ethtool_bitset_bit *bit,
+ unsigned int n_bit)
+{
+ free(req->wanted.bits.bit);
+ req->wanted.bits.bit = bit;
+ req->wanted.bits.n_bit = n_bit;
+}
+static inline void
+ethtool_features_set_req_set_active_nomask(struct ethtool_features_set_req *req)
+{
+ req->_present.active = 1;
+ req->active._present.nomask = 1;
+}
+static inline void
+ethtool_features_set_req_set_active_size(struct ethtool_features_set_req *req,
+ __u32 size)
+{
+ req->_present.active = 1;
+ req->active._present.size = 1;
+ req->active.size = size;
+}
+static inline void
+__ethtool_features_set_req_set_active_bits_bit(struct ethtool_features_set_req *req,
+ struct ethtool_bitset_bit *bit,
+ unsigned int n_bit)
+{
+ free(req->active.bits.bit);
+ req->active.bits.bit = bit;
+ req->active.bits.n_bit = n_bit;
+}
+static inline void
+ethtool_features_set_req_set_nochange_nomask(struct ethtool_features_set_req *req)
+{
+ req->_present.nochange = 1;
+ req->nochange._present.nomask = 1;
+}
+static inline void
+ethtool_features_set_req_set_nochange_size(struct ethtool_features_set_req *req,
+ __u32 size)
+{
+ req->_present.nochange = 1;
+ req->nochange._present.size = 1;
+ req->nochange.size = size;
+}
+static inline void
+__ethtool_features_set_req_set_nochange_bits_bit(struct ethtool_features_set_req *req,
+ struct ethtool_bitset_bit *bit,
+ unsigned int n_bit)
+{
+ free(req->nochange.bits.bit);
+ req->nochange.bits.bit = bit;
+ req->nochange.bits.n_bit = n_bit;
+}
+
+struct ethtool_features_set_rsp {
+ struct {
+ __u32 header:1;
+ __u32 hw:1;
+ __u32 wanted:1;
+ __u32 active:1;
+ __u32 nochange:1;
+ } _present;
+
+ struct ethtool_header header;
+ struct ethtool_bitset hw;
+ struct ethtool_bitset wanted;
+ struct ethtool_bitset active;
+ struct ethtool_bitset nochange;
+};
+
+void ethtool_features_set_rsp_free(struct ethtool_features_set_rsp *rsp);
+
+/*
+ * Set features.
+ */
+struct ethtool_features_set_rsp *
+ethtool_features_set(struct ynl_sock *ys, struct ethtool_features_set_req *req);
+
+/* ============== ETHTOOL_MSG_PRIVFLAGS_GET ============== */
+/* ETHTOOL_MSG_PRIVFLAGS_GET - do */
+struct ethtool_privflags_get_req {
+ struct {
+ __u32 header:1;
+ } _present;
+
+ struct ethtool_header header;
+};
+
+static inline struct ethtool_privflags_get_req *
+ethtool_privflags_get_req_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_privflags_get_req));
+}
+void ethtool_privflags_get_req_free(struct ethtool_privflags_get_req *req);
+
+static inline void
+ethtool_privflags_get_req_set_header_dev_index(struct ethtool_privflags_get_req *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_privflags_get_req_set_header_dev_name(struct ethtool_privflags_get_req *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_privflags_get_req_set_header_flags(struct ethtool_privflags_get_req *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+
+struct ethtool_privflags_get_rsp {
+ struct {
+ __u32 header:1;
+ __u32 flags:1;
+ } _present;
+
+ struct ethtool_header header;
+ struct ethtool_bitset flags;
+};
+
+void ethtool_privflags_get_rsp_free(struct ethtool_privflags_get_rsp *rsp);
+
+/*
+ * Get device private flags.
+ */
+struct ethtool_privflags_get_rsp *
+ethtool_privflags_get(struct ynl_sock *ys,
+ struct ethtool_privflags_get_req *req);
+
+/* ETHTOOL_MSG_PRIVFLAGS_GET - dump */
+struct ethtool_privflags_get_req_dump {
+ struct {
+ __u32 header:1;
+ } _present;
+
+ struct ethtool_header header;
+};
+
+static inline struct ethtool_privflags_get_req_dump *
+ethtool_privflags_get_req_dump_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_privflags_get_req_dump));
+}
+void
+ethtool_privflags_get_req_dump_free(struct ethtool_privflags_get_req_dump *req);
+
+static inline void
+ethtool_privflags_get_req_dump_set_header_dev_index(struct ethtool_privflags_get_req_dump *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_privflags_get_req_dump_set_header_dev_name(struct ethtool_privflags_get_req_dump *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_privflags_get_req_dump_set_header_flags(struct ethtool_privflags_get_req_dump *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+
+struct ethtool_privflags_get_list {
+ struct ethtool_privflags_get_list *next;
+ struct ethtool_privflags_get_rsp obj __attribute__ ((aligned (8)));
+};
+
+void ethtool_privflags_get_list_free(struct ethtool_privflags_get_list *rsp);
+
+struct ethtool_privflags_get_list *
+ethtool_privflags_get_dump(struct ynl_sock *ys,
+ struct ethtool_privflags_get_req_dump *req);
+
+/* ETHTOOL_MSG_PRIVFLAGS_GET - notify */
+struct ethtool_privflags_get_ntf {
+ __u16 family;
+ __u8 cmd;
+ struct ynl_ntf_base_type *next;
+ void (*free)(struct ethtool_privflags_get_ntf *ntf);
+ struct ethtool_privflags_get_rsp obj __attribute__ ((aligned (8)));
+};
+
+void ethtool_privflags_get_ntf_free(struct ethtool_privflags_get_ntf *rsp);
+
+/* ============== ETHTOOL_MSG_PRIVFLAGS_SET ============== */
+/* ETHTOOL_MSG_PRIVFLAGS_SET - do */
+struct ethtool_privflags_set_req {
+ struct {
+ __u32 header:1;
+ __u32 flags:1;
+ } _present;
+
+ struct ethtool_header header;
+ struct ethtool_bitset flags;
+};
+
+static inline struct ethtool_privflags_set_req *
+ethtool_privflags_set_req_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_privflags_set_req));
+}
+void ethtool_privflags_set_req_free(struct ethtool_privflags_set_req *req);
+
+static inline void
+ethtool_privflags_set_req_set_header_dev_index(struct ethtool_privflags_set_req *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_privflags_set_req_set_header_dev_name(struct ethtool_privflags_set_req *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_privflags_set_req_set_header_flags(struct ethtool_privflags_set_req *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+static inline void
+ethtool_privflags_set_req_set_flags_nomask(struct ethtool_privflags_set_req *req)
+{
+ req->_present.flags = 1;
+ req->flags._present.nomask = 1;
+}
+static inline void
+ethtool_privflags_set_req_set_flags_size(struct ethtool_privflags_set_req *req,
+ __u32 size)
+{
+ req->_present.flags = 1;
+ req->flags._present.size = 1;
+ req->flags.size = size;
+}
+static inline void
+__ethtool_privflags_set_req_set_flags_bits_bit(struct ethtool_privflags_set_req *req,
+ struct ethtool_bitset_bit *bit,
+ unsigned int n_bit)
+{
+ free(req->flags.bits.bit);
+ req->flags.bits.bit = bit;
+ req->flags.bits.n_bit = n_bit;
+}
+
+/*
+ * Set device private flags.
+ */
+int ethtool_privflags_set(struct ynl_sock *ys,
+ struct ethtool_privflags_set_req *req);
+
+/* ============== ETHTOOL_MSG_RINGS_GET ============== */
+/* ETHTOOL_MSG_RINGS_GET - do */
+struct ethtool_rings_get_req {
+ struct {
+ __u32 header:1;
+ } _present;
+
+ struct ethtool_header header;
+};
+
+static inline struct ethtool_rings_get_req *ethtool_rings_get_req_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_rings_get_req));
+}
+void ethtool_rings_get_req_free(struct ethtool_rings_get_req *req);
+
+static inline void
+ethtool_rings_get_req_set_header_dev_index(struct ethtool_rings_get_req *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_rings_get_req_set_header_dev_name(struct ethtool_rings_get_req *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_rings_get_req_set_header_flags(struct ethtool_rings_get_req *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+
+struct ethtool_rings_get_rsp {
+ struct {
+ __u32 header:1;
+ __u32 rx_max:1;
+ __u32 rx_mini_max:1;
+ __u32 rx_jumbo_max:1;
+ __u32 tx_max:1;
+ __u32 rx:1;
+ __u32 rx_mini:1;
+ __u32 rx_jumbo:1;
+ __u32 tx:1;
+ __u32 rx_buf_len:1;
+ __u32 tcp_data_split:1;
+ __u32 cqe_size:1;
+ __u32 tx_push:1;
+ __u32 rx_push:1;
+ __u32 tx_push_buf_len:1;
+ __u32 tx_push_buf_len_max:1;
+ } _present;
+
+ struct ethtool_header header;
+ __u32 rx_max;
+ __u32 rx_mini_max;
+ __u32 rx_jumbo_max;
+ __u32 tx_max;
+ __u32 rx;
+ __u32 rx_mini;
+ __u32 rx_jumbo;
+ __u32 tx;
+ __u32 rx_buf_len;
+ __u8 tcp_data_split;
+ __u32 cqe_size;
+ __u8 tx_push;
+ __u8 rx_push;
+ __u32 tx_push_buf_len;
+ __u32 tx_push_buf_len_max;
+};
+
+void ethtool_rings_get_rsp_free(struct ethtool_rings_get_rsp *rsp);
+
+/*
+ * Get ring params.
+ */
+struct ethtool_rings_get_rsp *
+ethtool_rings_get(struct ynl_sock *ys, struct ethtool_rings_get_req *req);
+
+/* ETHTOOL_MSG_RINGS_GET - dump */
+struct ethtool_rings_get_req_dump {
+ struct {
+ __u32 header:1;
+ } _present;
+
+ struct ethtool_header header;
+};
+
+static inline struct ethtool_rings_get_req_dump *
+ethtool_rings_get_req_dump_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_rings_get_req_dump));
+}
+void ethtool_rings_get_req_dump_free(struct ethtool_rings_get_req_dump *req);
+
+static inline void
+ethtool_rings_get_req_dump_set_header_dev_index(struct ethtool_rings_get_req_dump *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_rings_get_req_dump_set_header_dev_name(struct ethtool_rings_get_req_dump *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_rings_get_req_dump_set_header_flags(struct ethtool_rings_get_req_dump *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+
+struct ethtool_rings_get_list {
+ struct ethtool_rings_get_list *next;
+ struct ethtool_rings_get_rsp obj __attribute__ ((aligned (8)));
+};
+
+void ethtool_rings_get_list_free(struct ethtool_rings_get_list *rsp);
+
+struct ethtool_rings_get_list *
+ethtool_rings_get_dump(struct ynl_sock *ys,
+ struct ethtool_rings_get_req_dump *req);
+
+/* ETHTOOL_MSG_RINGS_GET - notify */
+struct ethtool_rings_get_ntf {
+ __u16 family;
+ __u8 cmd;
+ struct ynl_ntf_base_type *next;
+ void (*free)(struct ethtool_rings_get_ntf *ntf);
+ struct ethtool_rings_get_rsp obj __attribute__ ((aligned (8)));
+};
+
+void ethtool_rings_get_ntf_free(struct ethtool_rings_get_ntf *rsp);
+
+/* ============== ETHTOOL_MSG_RINGS_SET ============== */
+/* ETHTOOL_MSG_RINGS_SET - do */
+struct ethtool_rings_set_req {
+ struct {
+ __u32 header:1;
+ __u32 rx_max:1;
+ __u32 rx_mini_max:1;
+ __u32 rx_jumbo_max:1;
+ __u32 tx_max:1;
+ __u32 rx:1;
+ __u32 rx_mini:1;
+ __u32 rx_jumbo:1;
+ __u32 tx:1;
+ __u32 rx_buf_len:1;
+ __u32 tcp_data_split:1;
+ __u32 cqe_size:1;
+ __u32 tx_push:1;
+ __u32 rx_push:1;
+ __u32 tx_push_buf_len:1;
+ __u32 tx_push_buf_len_max:1;
+ } _present;
+
+ struct ethtool_header header;
+ __u32 rx_max;
+ __u32 rx_mini_max;
+ __u32 rx_jumbo_max;
+ __u32 tx_max;
+ __u32 rx;
+ __u32 rx_mini;
+ __u32 rx_jumbo;
+ __u32 tx;
+ __u32 rx_buf_len;
+ __u8 tcp_data_split;
+ __u32 cqe_size;
+ __u8 tx_push;
+ __u8 rx_push;
+ __u32 tx_push_buf_len;
+ __u32 tx_push_buf_len_max;
+};
+
+static inline struct ethtool_rings_set_req *ethtool_rings_set_req_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_rings_set_req));
+}
+void ethtool_rings_set_req_free(struct ethtool_rings_set_req *req);
+
+static inline void
+ethtool_rings_set_req_set_header_dev_index(struct ethtool_rings_set_req *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_rings_set_req_set_header_dev_name(struct ethtool_rings_set_req *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_rings_set_req_set_header_flags(struct ethtool_rings_set_req *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+static inline void
+ethtool_rings_set_req_set_rx_max(struct ethtool_rings_set_req *req,
+ __u32 rx_max)
+{
+ req->_present.rx_max = 1;
+ req->rx_max = rx_max;
+}
+static inline void
+ethtool_rings_set_req_set_rx_mini_max(struct ethtool_rings_set_req *req,
+ __u32 rx_mini_max)
+{
+ req->_present.rx_mini_max = 1;
+ req->rx_mini_max = rx_mini_max;
+}
+static inline void
+ethtool_rings_set_req_set_rx_jumbo_max(struct ethtool_rings_set_req *req,
+ __u32 rx_jumbo_max)
+{
+ req->_present.rx_jumbo_max = 1;
+ req->rx_jumbo_max = rx_jumbo_max;
+}
+static inline void
+ethtool_rings_set_req_set_tx_max(struct ethtool_rings_set_req *req,
+ __u32 tx_max)
+{
+ req->_present.tx_max = 1;
+ req->tx_max = tx_max;
+}
+static inline void
+ethtool_rings_set_req_set_rx(struct ethtool_rings_set_req *req, __u32 rx)
+{
+ req->_present.rx = 1;
+ req->rx = rx;
+}
+static inline void
+ethtool_rings_set_req_set_rx_mini(struct ethtool_rings_set_req *req,
+ __u32 rx_mini)
+{
+ req->_present.rx_mini = 1;
+ req->rx_mini = rx_mini;
+}
+static inline void
+ethtool_rings_set_req_set_rx_jumbo(struct ethtool_rings_set_req *req,
+ __u32 rx_jumbo)
+{
+ req->_present.rx_jumbo = 1;
+ req->rx_jumbo = rx_jumbo;
+}
+static inline void
+ethtool_rings_set_req_set_tx(struct ethtool_rings_set_req *req, __u32 tx)
+{
+ req->_present.tx = 1;
+ req->tx = tx;
+}
+static inline void
+ethtool_rings_set_req_set_rx_buf_len(struct ethtool_rings_set_req *req,
+ __u32 rx_buf_len)
+{
+ req->_present.rx_buf_len = 1;
+ req->rx_buf_len = rx_buf_len;
+}
+static inline void
+ethtool_rings_set_req_set_tcp_data_split(struct ethtool_rings_set_req *req,
+ __u8 tcp_data_split)
+{
+ req->_present.tcp_data_split = 1;
+ req->tcp_data_split = tcp_data_split;
+}
+static inline void
+ethtool_rings_set_req_set_cqe_size(struct ethtool_rings_set_req *req,
+ __u32 cqe_size)
+{
+ req->_present.cqe_size = 1;
+ req->cqe_size = cqe_size;
+}
+static inline void
+ethtool_rings_set_req_set_tx_push(struct ethtool_rings_set_req *req,
+ __u8 tx_push)
+{
+ req->_present.tx_push = 1;
+ req->tx_push = tx_push;
+}
+static inline void
+ethtool_rings_set_req_set_rx_push(struct ethtool_rings_set_req *req,
+ __u8 rx_push)
+{
+ req->_present.rx_push = 1;
+ req->rx_push = rx_push;
+}
+static inline void
+ethtool_rings_set_req_set_tx_push_buf_len(struct ethtool_rings_set_req *req,
+ __u32 tx_push_buf_len)
+{
+ req->_present.tx_push_buf_len = 1;
+ req->tx_push_buf_len = tx_push_buf_len;
+}
+static inline void
+ethtool_rings_set_req_set_tx_push_buf_len_max(struct ethtool_rings_set_req *req,
+ __u32 tx_push_buf_len_max)
+{
+ req->_present.tx_push_buf_len_max = 1;
+ req->tx_push_buf_len_max = tx_push_buf_len_max;
+}
+
+/*
+ * Set ring params.
+ */
+int ethtool_rings_set(struct ynl_sock *ys, struct ethtool_rings_set_req *req);
+
+/* ============== ETHTOOL_MSG_CHANNELS_GET ============== */
+/* ETHTOOL_MSG_CHANNELS_GET - do */
+struct ethtool_channels_get_req {
+ struct {
+ __u32 header:1;
+ } _present;
+
+ struct ethtool_header header;
+};
+
+static inline struct ethtool_channels_get_req *
+ethtool_channels_get_req_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_channels_get_req));
+}
+void ethtool_channels_get_req_free(struct ethtool_channels_get_req *req);
+
+static inline void
+ethtool_channels_get_req_set_header_dev_index(struct ethtool_channels_get_req *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_channels_get_req_set_header_dev_name(struct ethtool_channels_get_req *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_channels_get_req_set_header_flags(struct ethtool_channels_get_req *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+
+struct ethtool_channels_get_rsp {
+ struct {
+ __u32 header:1;
+ __u32 rx_max:1;
+ __u32 tx_max:1;
+ __u32 other_max:1;
+ __u32 combined_max:1;
+ __u32 rx_count:1;
+ __u32 tx_count:1;
+ __u32 other_count:1;
+ __u32 combined_count:1;
+ } _present;
+
+ struct ethtool_header header;
+ __u32 rx_max;
+ __u32 tx_max;
+ __u32 other_max;
+ __u32 combined_max;
+ __u32 rx_count;
+ __u32 tx_count;
+ __u32 other_count;
+ __u32 combined_count;
+};
+
+void ethtool_channels_get_rsp_free(struct ethtool_channels_get_rsp *rsp);
+
+/*
+ * Get channel params.
+ */
+struct ethtool_channels_get_rsp *
+ethtool_channels_get(struct ynl_sock *ys, struct ethtool_channels_get_req *req);
+
+/* ETHTOOL_MSG_CHANNELS_GET - dump */
+struct ethtool_channels_get_req_dump {
+ struct {
+ __u32 header:1;
+ } _present;
+
+ struct ethtool_header header;
+};
+
+static inline struct ethtool_channels_get_req_dump *
+ethtool_channels_get_req_dump_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_channels_get_req_dump));
+}
+void
+ethtool_channels_get_req_dump_free(struct ethtool_channels_get_req_dump *req);
+
+static inline void
+ethtool_channels_get_req_dump_set_header_dev_index(struct ethtool_channels_get_req_dump *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_channels_get_req_dump_set_header_dev_name(struct ethtool_channels_get_req_dump *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_channels_get_req_dump_set_header_flags(struct ethtool_channels_get_req_dump *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+
+struct ethtool_channels_get_list {
+ struct ethtool_channels_get_list *next;
+ struct ethtool_channels_get_rsp obj __attribute__ ((aligned (8)));
+};
+
+void ethtool_channels_get_list_free(struct ethtool_channels_get_list *rsp);
+
+struct ethtool_channels_get_list *
+ethtool_channels_get_dump(struct ynl_sock *ys,
+ struct ethtool_channels_get_req_dump *req);
+
+/* ETHTOOL_MSG_CHANNELS_GET - notify */
+struct ethtool_channels_get_ntf {
+ __u16 family;
+ __u8 cmd;
+ struct ynl_ntf_base_type *next;
+ void (*free)(struct ethtool_channels_get_ntf *ntf);
+ struct ethtool_channels_get_rsp obj __attribute__ ((aligned (8)));
+};
+
+void ethtool_channels_get_ntf_free(struct ethtool_channels_get_ntf *rsp);
+
+/* ============== ETHTOOL_MSG_CHANNELS_SET ============== */
+/* ETHTOOL_MSG_CHANNELS_SET - do */
+struct ethtool_channels_set_req {
+ struct {
+ __u32 header:1;
+ __u32 rx_max:1;
+ __u32 tx_max:1;
+ __u32 other_max:1;
+ __u32 combined_max:1;
+ __u32 rx_count:1;
+ __u32 tx_count:1;
+ __u32 other_count:1;
+ __u32 combined_count:1;
+ } _present;
+
+ struct ethtool_header header;
+ __u32 rx_max;
+ __u32 tx_max;
+ __u32 other_max;
+ __u32 combined_max;
+ __u32 rx_count;
+ __u32 tx_count;
+ __u32 other_count;
+ __u32 combined_count;
+};
+
+static inline struct ethtool_channels_set_req *
+ethtool_channels_set_req_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_channels_set_req));
+}
+void ethtool_channels_set_req_free(struct ethtool_channels_set_req *req);
+
+static inline void
+ethtool_channels_set_req_set_header_dev_index(struct ethtool_channels_set_req *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_channels_set_req_set_header_dev_name(struct ethtool_channels_set_req *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_channels_set_req_set_header_flags(struct ethtool_channels_set_req *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+static inline void
+ethtool_channels_set_req_set_rx_max(struct ethtool_channels_set_req *req,
+ __u32 rx_max)
+{
+ req->_present.rx_max = 1;
+ req->rx_max = rx_max;
+}
+static inline void
+ethtool_channels_set_req_set_tx_max(struct ethtool_channels_set_req *req,
+ __u32 tx_max)
+{
+ req->_present.tx_max = 1;
+ req->tx_max = tx_max;
+}
+static inline void
+ethtool_channels_set_req_set_other_max(struct ethtool_channels_set_req *req,
+ __u32 other_max)
+{
+ req->_present.other_max = 1;
+ req->other_max = other_max;
+}
+static inline void
+ethtool_channels_set_req_set_combined_max(struct ethtool_channels_set_req *req,
+ __u32 combined_max)
+{
+ req->_present.combined_max = 1;
+ req->combined_max = combined_max;
+}
+static inline void
+ethtool_channels_set_req_set_rx_count(struct ethtool_channels_set_req *req,
+ __u32 rx_count)
+{
+ req->_present.rx_count = 1;
+ req->rx_count = rx_count;
+}
+static inline void
+ethtool_channels_set_req_set_tx_count(struct ethtool_channels_set_req *req,
+ __u32 tx_count)
+{
+ req->_present.tx_count = 1;
+ req->tx_count = tx_count;
+}
+static inline void
+ethtool_channels_set_req_set_other_count(struct ethtool_channels_set_req *req,
+ __u32 other_count)
+{
+ req->_present.other_count = 1;
+ req->other_count = other_count;
+}
+static inline void
+ethtool_channels_set_req_set_combined_count(struct ethtool_channels_set_req *req,
+ __u32 combined_count)
+{
+ req->_present.combined_count = 1;
+ req->combined_count = combined_count;
+}
+
+/*
+ * Set channel params.
+ */
+int ethtool_channels_set(struct ynl_sock *ys,
+ struct ethtool_channels_set_req *req);
+
+/* ============== ETHTOOL_MSG_COALESCE_GET ============== */
+/* ETHTOOL_MSG_COALESCE_GET - do */
+struct ethtool_coalesce_get_req {
+ struct {
+ __u32 header:1;
+ } _present;
+
+ struct ethtool_header header;
+};
+
+static inline struct ethtool_coalesce_get_req *
+ethtool_coalesce_get_req_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_coalesce_get_req));
+}
+void ethtool_coalesce_get_req_free(struct ethtool_coalesce_get_req *req);
+
+static inline void
+ethtool_coalesce_get_req_set_header_dev_index(struct ethtool_coalesce_get_req *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_coalesce_get_req_set_header_dev_name(struct ethtool_coalesce_get_req *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_coalesce_get_req_set_header_flags(struct ethtool_coalesce_get_req *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+
+struct ethtool_coalesce_get_rsp {
+ struct {
+ __u32 header:1;
+ __u32 rx_usecs:1;
+ __u32 rx_max_frames:1;
+ __u32 rx_usecs_irq:1;
+ __u32 rx_max_frames_irq:1;
+ __u32 tx_usecs:1;
+ __u32 tx_max_frames:1;
+ __u32 tx_usecs_irq:1;
+ __u32 tx_max_frames_irq:1;
+ __u32 stats_block_usecs:1;
+ __u32 use_adaptive_rx:1;
+ __u32 use_adaptive_tx:1;
+ __u32 pkt_rate_low:1;
+ __u32 rx_usecs_low:1;
+ __u32 rx_max_frames_low:1;
+ __u32 tx_usecs_low:1;
+ __u32 tx_max_frames_low:1;
+ __u32 pkt_rate_high:1;
+ __u32 rx_usecs_high:1;
+ __u32 rx_max_frames_high:1;
+ __u32 tx_usecs_high:1;
+ __u32 tx_max_frames_high:1;
+ __u32 rate_sample_interval:1;
+ __u32 use_cqe_mode_tx:1;
+ __u32 use_cqe_mode_rx:1;
+ __u32 tx_aggr_max_bytes:1;
+ __u32 tx_aggr_max_frames:1;
+ __u32 tx_aggr_time_usecs:1;
+ } _present;
+
+ struct ethtool_header header;
+ __u32 rx_usecs;
+ __u32 rx_max_frames;
+ __u32 rx_usecs_irq;
+ __u32 rx_max_frames_irq;
+ __u32 tx_usecs;
+ __u32 tx_max_frames;
+ __u32 tx_usecs_irq;
+ __u32 tx_max_frames_irq;
+ __u32 stats_block_usecs;
+ __u8 use_adaptive_rx;
+ __u8 use_adaptive_tx;
+ __u32 pkt_rate_low;
+ __u32 rx_usecs_low;
+ __u32 rx_max_frames_low;
+ __u32 tx_usecs_low;
+ __u32 tx_max_frames_low;
+ __u32 pkt_rate_high;
+ __u32 rx_usecs_high;
+ __u32 rx_max_frames_high;
+ __u32 tx_usecs_high;
+ __u32 tx_max_frames_high;
+ __u32 rate_sample_interval;
+ __u8 use_cqe_mode_tx;
+ __u8 use_cqe_mode_rx;
+ __u32 tx_aggr_max_bytes;
+ __u32 tx_aggr_max_frames;
+ __u32 tx_aggr_time_usecs;
+};
+
+void ethtool_coalesce_get_rsp_free(struct ethtool_coalesce_get_rsp *rsp);
+
+/*
+ * Get coalesce params.
+ */
+struct ethtool_coalesce_get_rsp *
+ethtool_coalesce_get(struct ynl_sock *ys, struct ethtool_coalesce_get_req *req);
+
+/* ETHTOOL_MSG_COALESCE_GET - dump */
+struct ethtool_coalesce_get_req_dump {
+ struct {
+ __u32 header:1;
+ } _present;
+
+ struct ethtool_header header;
+};
+
+static inline struct ethtool_coalesce_get_req_dump *
+ethtool_coalesce_get_req_dump_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_coalesce_get_req_dump));
+}
+void
+ethtool_coalesce_get_req_dump_free(struct ethtool_coalesce_get_req_dump *req);
+
+static inline void
+ethtool_coalesce_get_req_dump_set_header_dev_index(struct ethtool_coalesce_get_req_dump *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_coalesce_get_req_dump_set_header_dev_name(struct ethtool_coalesce_get_req_dump *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_coalesce_get_req_dump_set_header_flags(struct ethtool_coalesce_get_req_dump *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+
+struct ethtool_coalesce_get_list {
+ struct ethtool_coalesce_get_list *next;
+ struct ethtool_coalesce_get_rsp obj __attribute__ ((aligned (8)));
+};
+
+void ethtool_coalesce_get_list_free(struct ethtool_coalesce_get_list *rsp);
+
+struct ethtool_coalesce_get_list *
+ethtool_coalesce_get_dump(struct ynl_sock *ys,
+ struct ethtool_coalesce_get_req_dump *req);
+
+/* ETHTOOL_MSG_COALESCE_GET - notify */
+struct ethtool_coalesce_get_ntf {
+ __u16 family;
+ __u8 cmd;
+ struct ynl_ntf_base_type *next;
+ void (*free)(struct ethtool_coalesce_get_ntf *ntf);
+ struct ethtool_coalesce_get_rsp obj __attribute__ ((aligned (8)));
+};
+
+void ethtool_coalesce_get_ntf_free(struct ethtool_coalesce_get_ntf *rsp);
+
+/* ============== ETHTOOL_MSG_COALESCE_SET ============== */
+/* ETHTOOL_MSG_COALESCE_SET - do */
+struct ethtool_coalesce_set_req {
+ struct {
+ __u32 header:1;
+ __u32 rx_usecs:1;
+ __u32 rx_max_frames:1;
+ __u32 rx_usecs_irq:1;
+ __u32 rx_max_frames_irq:1;
+ __u32 tx_usecs:1;
+ __u32 tx_max_frames:1;
+ __u32 tx_usecs_irq:1;
+ __u32 tx_max_frames_irq:1;
+ __u32 stats_block_usecs:1;
+ __u32 use_adaptive_rx:1;
+ __u32 use_adaptive_tx:1;
+ __u32 pkt_rate_low:1;
+ __u32 rx_usecs_low:1;
+ __u32 rx_max_frames_low:1;
+ __u32 tx_usecs_low:1;
+ __u32 tx_max_frames_low:1;
+ __u32 pkt_rate_high:1;
+ __u32 rx_usecs_high:1;
+ __u32 rx_max_frames_high:1;
+ __u32 tx_usecs_high:1;
+ __u32 tx_max_frames_high:1;
+ __u32 rate_sample_interval:1;
+ __u32 use_cqe_mode_tx:1;
+ __u32 use_cqe_mode_rx:1;
+ __u32 tx_aggr_max_bytes:1;
+ __u32 tx_aggr_max_frames:1;
+ __u32 tx_aggr_time_usecs:1;
+ } _present;
+
+ struct ethtool_header header;
+ __u32 rx_usecs;
+ __u32 rx_max_frames;
+ __u32 rx_usecs_irq;
+ __u32 rx_max_frames_irq;
+ __u32 tx_usecs;
+ __u32 tx_max_frames;
+ __u32 tx_usecs_irq;
+ __u32 tx_max_frames_irq;
+ __u32 stats_block_usecs;
+ __u8 use_adaptive_rx;
+ __u8 use_adaptive_tx;
+ __u32 pkt_rate_low;
+ __u32 rx_usecs_low;
+ __u32 rx_max_frames_low;
+ __u32 tx_usecs_low;
+ __u32 tx_max_frames_low;
+ __u32 pkt_rate_high;
+ __u32 rx_usecs_high;
+ __u32 rx_max_frames_high;
+ __u32 tx_usecs_high;
+ __u32 tx_max_frames_high;
+ __u32 rate_sample_interval;
+ __u8 use_cqe_mode_tx;
+ __u8 use_cqe_mode_rx;
+ __u32 tx_aggr_max_bytes;
+ __u32 tx_aggr_max_frames;
+ __u32 tx_aggr_time_usecs;
+};
+
+static inline struct ethtool_coalesce_set_req *
+ethtool_coalesce_set_req_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_coalesce_set_req));
+}
+void ethtool_coalesce_set_req_free(struct ethtool_coalesce_set_req *req);
+
+static inline void
+ethtool_coalesce_set_req_set_header_dev_index(struct ethtool_coalesce_set_req *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_coalesce_set_req_set_header_dev_name(struct ethtool_coalesce_set_req *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_coalesce_set_req_set_header_flags(struct ethtool_coalesce_set_req *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+static inline void
+ethtool_coalesce_set_req_set_rx_usecs(struct ethtool_coalesce_set_req *req,
+ __u32 rx_usecs)
+{
+ req->_present.rx_usecs = 1;
+ req->rx_usecs = rx_usecs;
+}
+static inline void
+ethtool_coalesce_set_req_set_rx_max_frames(struct ethtool_coalesce_set_req *req,
+ __u32 rx_max_frames)
+{
+ req->_present.rx_max_frames = 1;
+ req->rx_max_frames = rx_max_frames;
+}
+static inline void
+ethtool_coalesce_set_req_set_rx_usecs_irq(struct ethtool_coalesce_set_req *req,
+ __u32 rx_usecs_irq)
+{
+ req->_present.rx_usecs_irq = 1;
+ req->rx_usecs_irq = rx_usecs_irq;
+}
+static inline void
+ethtool_coalesce_set_req_set_rx_max_frames_irq(struct ethtool_coalesce_set_req *req,
+ __u32 rx_max_frames_irq)
+{
+ req->_present.rx_max_frames_irq = 1;
+ req->rx_max_frames_irq = rx_max_frames_irq;
+}
+static inline void
+ethtool_coalesce_set_req_set_tx_usecs(struct ethtool_coalesce_set_req *req,
+ __u32 tx_usecs)
+{
+ req->_present.tx_usecs = 1;
+ req->tx_usecs = tx_usecs;
+}
+static inline void
+ethtool_coalesce_set_req_set_tx_max_frames(struct ethtool_coalesce_set_req *req,
+ __u32 tx_max_frames)
+{
+ req->_present.tx_max_frames = 1;
+ req->tx_max_frames = tx_max_frames;
+}
+static inline void
+ethtool_coalesce_set_req_set_tx_usecs_irq(struct ethtool_coalesce_set_req *req,
+ __u32 tx_usecs_irq)
+{
+ req->_present.tx_usecs_irq = 1;
+ req->tx_usecs_irq = tx_usecs_irq;
+}
+static inline void
+ethtool_coalesce_set_req_set_tx_max_frames_irq(struct ethtool_coalesce_set_req *req,
+ __u32 tx_max_frames_irq)
+{
+ req->_present.tx_max_frames_irq = 1;
+ req->tx_max_frames_irq = tx_max_frames_irq;
+}
+static inline void
+ethtool_coalesce_set_req_set_stats_block_usecs(struct ethtool_coalesce_set_req *req,
+ __u32 stats_block_usecs)
+{
+ req->_present.stats_block_usecs = 1;
+ req->stats_block_usecs = stats_block_usecs;
+}
+static inline void
+ethtool_coalesce_set_req_set_use_adaptive_rx(struct ethtool_coalesce_set_req *req,
+ __u8 use_adaptive_rx)
+{
+ req->_present.use_adaptive_rx = 1;
+ req->use_adaptive_rx = use_adaptive_rx;
+}
+static inline void
+ethtool_coalesce_set_req_set_use_adaptive_tx(struct ethtool_coalesce_set_req *req,
+ __u8 use_adaptive_tx)
+{
+ req->_present.use_adaptive_tx = 1;
+ req->use_adaptive_tx = use_adaptive_tx;
+}
+static inline void
+ethtool_coalesce_set_req_set_pkt_rate_low(struct ethtool_coalesce_set_req *req,
+ __u32 pkt_rate_low)
+{
+ req->_present.pkt_rate_low = 1;
+ req->pkt_rate_low = pkt_rate_low;
+}
+static inline void
+ethtool_coalesce_set_req_set_rx_usecs_low(struct ethtool_coalesce_set_req *req,
+ __u32 rx_usecs_low)
+{
+ req->_present.rx_usecs_low = 1;
+ req->rx_usecs_low = rx_usecs_low;
+}
+static inline void
+ethtool_coalesce_set_req_set_rx_max_frames_low(struct ethtool_coalesce_set_req *req,
+ __u32 rx_max_frames_low)
+{
+ req->_present.rx_max_frames_low = 1;
+ req->rx_max_frames_low = rx_max_frames_low;
+}
+static inline void
+ethtool_coalesce_set_req_set_tx_usecs_low(struct ethtool_coalesce_set_req *req,
+ __u32 tx_usecs_low)
+{
+ req->_present.tx_usecs_low = 1;
+ req->tx_usecs_low = tx_usecs_low;
+}
+static inline void
+ethtool_coalesce_set_req_set_tx_max_frames_low(struct ethtool_coalesce_set_req *req,
+ __u32 tx_max_frames_low)
+{
+ req->_present.tx_max_frames_low = 1;
+ req->tx_max_frames_low = tx_max_frames_low;
+}
+static inline void
+ethtool_coalesce_set_req_set_pkt_rate_high(struct ethtool_coalesce_set_req *req,
+ __u32 pkt_rate_high)
+{
+ req->_present.pkt_rate_high = 1;
+ req->pkt_rate_high = pkt_rate_high;
+}
+static inline void
+ethtool_coalesce_set_req_set_rx_usecs_high(struct ethtool_coalesce_set_req *req,
+ __u32 rx_usecs_high)
+{
+ req->_present.rx_usecs_high = 1;
+ req->rx_usecs_high = rx_usecs_high;
+}
+static inline void
+ethtool_coalesce_set_req_set_rx_max_frames_high(struct ethtool_coalesce_set_req *req,
+ __u32 rx_max_frames_high)
+{
+ req->_present.rx_max_frames_high = 1;
+ req->rx_max_frames_high = rx_max_frames_high;
+}
+static inline void
+ethtool_coalesce_set_req_set_tx_usecs_high(struct ethtool_coalesce_set_req *req,
+ __u32 tx_usecs_high)
+{
+ req->_present.tx_usecs_high = 1;
+ req->tx_usecs_high = tx_usecs_high;
+}
+static inline void
+ethtool_coalesce_set_req_set_tx_max_frames_high(struct ethtool_coalesce_set_req *req,
+ __u32 tx_max_frames_high)
+{
+ req->_present.tx_max_frames_high = 1;
+ req->tx_max_frames_high = tx_max_frames_high;
+}
+static inline void
+ethtool_coalesce_set_req_set_rate_sample_interval(struct ethtool_coalesce_set_req *req,
+ __u32 rate_sample_interval)
+{
+ req->_present.rate_sample_interval = 1;
+ req->rate_sample_interval = rate_sample_interval;
+}
+static inline void
+ethtool_coalesce_set_req_set_use_cqe_mode_tx(struct ethtool_coalesce_set_req *req,
+ __u8 use_cqe_mode_tx)
+{
+ req->_present.use_cqe_mode_tx = 1;
+ req->use_cqe_mode_tx = use_cqe_mode_tx;
+}
+static inline void
+ethtool_coalesce_set_req_set_use_cqe_mode_rx(struct ethtool_coalesce_set_req *req,
+ __u8 use_cqe_mode_rx)
+{
+ req->_present.use_cqe_mode_rx = 1;
+ req->use_cqe_mode_rx = use_cqe_mode_rx;
+}
+static inline void
+ethtool_coalesce_set_req_set_tx_aggr_max_bytes(struct ethtool_coalesce_set_req *req,
+ __u32 tx_aggr_max_bytes)
+{
+ req->_present.tx_aggr_max_bytes = 1;
+ req->tx_aggr_max_bytes = tx_aggr_max_bytes;
+}
+static inline void
+ethtool_coalesce_set_req_set_tx_aggr_max_frames(struct ethtool_coalesce_set_req *req,
+ __u32 tx_aggr_max_frames)
+{
+ req->_present.tx_aggr_max_frames = 1;
+ req->tx_aggr_max_frames = tx_aggr_max_frames;
+}
+static inline void
+ethtool_coalesce_set_req_set_tx_aggr_time_usecs(struct ethtool_coalesce_set_req *req,
+ __u32 tx_aggr_time_usecs)
+{
+ req->_present.tx_aggr_time_usecs = 1;
+ req->tx_aggr_time_usecs = tx_aggr_time_usecs;
+}
+
+/*
+ * Set coalesce params.
+ */
+int ethtool_coalesce_set(struct ynl_sock *ys,
+ struct ethtool_coalesce_set_req *req);
+
+/* ============== ETHTOOL_MSG_PAUSE_GET ============== */
+/* ETHTOOL_MSG_PAUSE_GET - do */
+struct ethtool_pause_get_req {
+ struct {
+ __u32 header:1;
+ } _present;
+
+ struct ethtool_header header;
+};
+
+static inline struct ethtool_pause_get_req *ethtool_pause_get_req_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_pause_get_req));
+}
+void ethtool_pause_get_req_free(struct ethtool_pause_get_req *req);
+
+static inline void
+ethtool_pause_get_req_set_header_dev_index(struct ethtool_pause_get_req *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_pause_get_req_set_header_dev_name(struct ethtool_pause_get_req *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_pause_get_req_set_header_flags(struct ethtool_pause_get_req *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+
+struct ethtool_pause_get_rsp {
+ struct {
+ __u32 header:1;
+ __u32 autoneg:1;
+ __u32 rx:1;
+ __u32 tx:1;
+ __u32 stats:1;
+ __u32 stats_src:1;
+ } _present;
+
+ struct ethtool_header header;
+ __u8 autoneg;
+ __u8 rx;
+ __u8 tx;
+ struct ethtool_pause_stat stats;
+ __u32 stats_src;
+};
+
+void ethtool_pause_get_rsp_free(struct ethtool_pause_get_rsp *rsp);
+
+/*
+ * Get pause params.
+ */
+struct ethtool_pause_get_rsp *
+ethtool_pause_get(struct ynl_sock *ys, struct ethtool_pause_get_req *req);
+
+/* ETHTOOL_MSG_PAUSE_GET - dump */
+struct ethtool_pause_get_req_dump {
+ struct {
+ __u32 header:1;
+ } _present;
+
+ struct ethtool_header header;
+};
+
+static inline struct ethtool_pause_get_req_dump *
+ethtool_pause_get_req_dump_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_pause_get_req_dump));
+}
+void ethtool_pause_get_req_dump_free(struct ethtool_pause_get_req_dump *req);
+
+static inline void
+ethtool_pause_get_req_dump_set_header_dev_index(struct ethtool_pause_get_req_dump *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_pause_get_req_dump_set_header_dev_name(struct ethtool_pause_get_req_dump *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_pause_get_req_dump_set_header_flags(struct ethtool_pause_get_req_dump *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+
+struct ethtool_pause_get_list {
+ struct ethtool_pause_get_list *next;
+ struct ethtool_pause_get_rsp obj __attribute__ ((aligned (8)));
+};
+
+void ethtool_pause_get_list_free(struct ethtool_pause_get_list *rsp);
+
+struct ethtool_pause_get_list *
+ethtool_pause_get_dump(struct ynl_sock *ys,
+ struct ethtool_pause_get_req_dump *req);
+
+/* ETHTOOL_MSG_PAUSE_GET - notify */
+struct ethtool_pause_get_ntf {
+ __u16 family;
+ __u8 cmd;
+ struct ynl_ntf_base_type *next;
+ void (*free)(struct ethtool_pause_get_ntf *ntf);
+ struct ethtool_pause_get_rsp obj __attribute__ ((aligned (8)));
+};
+
+void ethtool_pause_get_ntf_free(struct ethtool_pause_get_ntf *rsp);
+
+/* ============== ETHTOOL_MSG_PAUSE_SET ============== */
+/* ETHTOOL_MSG_PAUSE_SET - do */
+struct ethtool_pause_set_req {
+ struct {
+ __u32 header:1;
+ __u32 autoneg:1;
+ __u32 rx:1;
+ __u32 tx:1;
+ __u32 stats:1;
+ __u32 stats_src:1;
+ } _present;
+
+ struct ethtool_header header;
+ __u8 autoneg;
+ __u8 rx;
+ __u8 tx;
+ struct ethtool_pause_stat stats;
+ __u32 stats_src;
+};
+
+static inline struct ethtool_pause_set_req *ethtool_pause_set_req_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_pause_set_req));
+}
+void ethtool_pause_set_req_free(struct ethtool_pause_set_req *req);
+
+static inline void
+ethtool_pause_set_req_set_header_dev_index(struct ethtool_pause_set_req *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_pause_set_req_set_header_dev_name(struct ethtool_pause_set_req *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_pause_set_req_set_header_flags(struct ethtool_pause_set_req *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+static inline void
+ethtool_pause_set_req_set_autoneg(struct ethtool_pause_set_req *req,
+ __u8 autoneg)
+{
+ req->_present.autoneg = 1;
+ req->autoneg = autoneg;
+}
+static inline void
+ethtool_pause_set_req_set_rx(struct ethtool_pause_set_req *req, __u8 rx)
+{
+ req->_present.rx = 1;
+ req->rx = rx;
+}
+static inline void
+ethtool_pause_set_req_set_tx(struct ethtool_pause_set_req *req, __u8 tx)
+{
+ req->_present.tx = 1;
+ req->tx = tx;
+}
+static inline void
+ethtool_pause_set_req_set_stats_tx_frames(struct ethtool_pause_set_req *req,
+ __u64 tx_frames)
+{
+ req->_present.stats = 1;
+ req->stats._present.tx_frames = 1;
+ req->stats.tx_frames = tx_frames;
+}
+static inline void
+ethtool_pause_set_req_set_stats_rx_frames(struct ethtool_pause_set_req *req,
+ __u64 rx_frames)
+{
+ req->_present.stats = 1;
+ req->stats._present.rx_frames = 1;
+ req->stats.rx_frames = rx_frames;
+}
+static inline void
+ethtool_pause_set_req_set_stats_src(struct ethtool_pause_set_req *req,
+ __u32 stats_src)
+{
+ req->_present.stats_src = 1;
+ req->stats_src = stats_src;
+}
+
+/*
+ * Set pause params.
+ */
+int ethtool_pause_set(struct ynl_sock *ys, struct ethtool_pause_set_req *req);
+
+/* ============== ETHTOOL_MSG_EEE_GET ============== */
+/* ETHTOOL_MSG_EEE_GET - do */
+struct ethtool_eee_get_req {
+ struct {
+ __u32 header:1;
+ } _present;
+
+ struct ethtool_header header;
+};
+
+static inline struct ethtool_eee_get_req *ethtool_eee_get_req_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_eee_get_req));
+}
+void ethtool_eee_get_req_free(struct ethtool_eee_get_req *req);
+
+static inline void
+ethtool_eee_get_req_set_header_dev_index(struct ethtool_eee_get_req *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_eee_get_req_set_header_dev_name(struct ethtool_eee_get_req *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_eee_get_req_set_header_flags(struct ethtool_eee_get_req *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+
+struct ethtool_eee_get_rsp {
+ struct {
+ __u32 header:1;
+ __u32 modes_ours:1;
+ __u32 modes_peer:1;
+ __u32 active:1;
+ __u32 enabled:1;
+ __u32 tx_lpi_enabled:1;
+ __u32 tx_lpi_timer:1;
+ } _present;
+
+ struct ethtool_header header;
+ struct ethtool_bitset modes_ours;
+ struct ethtool_bitset modes_peer;
+ __u8 active;
+ __u8 enabled;
+ __u8 tx_lpi_enabled;
+ __u32 tx_lpi_timer;
+};
+
+void ethtool_eee_get_rsp_free(struct ethtool_eee_get_rsp *rsp);
+
+/*
+ * Get eee params.
+ */
+struct ethtool_eee_get_rsp *
+ethtool_eee_get(struct ynl_sock *ys, struct ethtool_eee_get_req *req);
+
+/* ETHTOOL_MSG_EEE_GET - dump */
+struct ethtool_eee_get_req_dump {
+ struct {
+ __u32 header:1;
+ } _present;
+
+ struct ethtool_header header;
+};
+
+static inline struct ethtool_eee_get_req_dump *
+ethtool_eee_get_req_dump_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_eee_get_req_dump));
+}
+void ethtool_eee_get_req_dump_free(struct ethtool_eee_get_req_dump *req);
+
+static inline void
+ethtool_eee_get_req_dump_set_header_dev_index(struct ethtool_eee_get_req_dump *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_eee_get_req_dump_set_header_dev_name(struct ethtool_eee_get_req_dump *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_eee_get_req_dump_set_header_flags(struct ethtool_eee_get_req_dump *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+
+struct ethtool_eee_get_list {
+ struct ethtool_eee_get_list *next;
+ struct ethtool_eee_get_rsp obj __attribute__ ((aligned (8)));
+};
+
+void ethtool_eee_get_list_free(struct ethtool_eee_get_list *rsp);
+
+struct ethtool_eee_get_list *
+ethtool_eee_get_dump(struct ynl_sock *ys, struct ethtool_eee_get_req_dump *req);
+
+/* ETHTOOL_MSG_EEE_GET - notify */
+struct ethtool_eee_get_ntf {
+ __u16 family;
+ __u8 cmd;
+ struct ynl_ntf_base_type *next;
+ void (*free)(struct ethtool_eee_get_ntf *ntf);
+ struct ethtool_eee_get_rsp obj __attribute__ ((aligned (8)));
+};
+
+void ethtool_eee_get_ntf_free(struct ethtool_eee_get_ntf *rsp);
+
+/* ============== ETHTOOL_MSG_EEE_SET ============== */
+/* ETHTOOL_MSG_EEE_SET - do */
+struct ethtool_eee_set_req {
+ struct {
+ __u32 header:1;
+ __u32 modes_ours:1;
+ __u32 modes_peer:1;
+ __u32 active:1;
+ __u32 enabled:1;
+ __u32 tx_lpi_enabled:1;
+ __u32 tx_lpi_timer:1;
+ } _present;
+
+ struct ethtool_header header;
+ struct ethtool_bitset modes_ours;
+ struct ethtool_bitset modes_peer;
+ __u8 active;
+ __u8 enabled;
+ __u8 tx_lpi_enabled;
+ __u32 tx_lpi_timer;
+};
+
+static inline struct ethtool_eee_set_req *ethtool_eee_set_req_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_eee_set_req));
+}
+void ethtool_eee_set_req_free(struct ethtool_eee_set_req *req);
+
+static inline void
+ethtool_eee_set_req_set_header_dev_index(struct ethtool_eee_set_req *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_eee_set_req_set_header_dev_name(struct ethtool_eee_set_req *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_eee_set_req_set_header_flags(struct ethtool_eee_set_req *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+static inline void
+ethtool_eee_set_req_set_modes_ours_nomask(struct ethtool_eee_set_req *req)
+{
+ req->_present.modes_ours = 1;
+ req->modes_ours._present.nomask = 1;
+}
+static inline void
+ethtool_eee_set_req_set_modes_ours_size(struct ethtool_eee_set_req *req,
+ __u32 size)
+{
+ req->_present.modes_ours = 1;
+ req->modes_ours._present.size = 1;
+ req->modes_ours.size = size;
+}
+static inline void
+__ethtool_eee_set_req_set_modes_ours_bits_bit(struct ethtool_eee_set_req *req,
+ struct ethtool_bitset_bit *bit,
+ unsigned int n_bit)
+{
+ free(req->modes_ours.bits.bit);
+ req->modes_ours.bits.bit = bit;
+ req->modes_ours.bits.n_bit = n_bit;
+}
+static inline void
+ethtool_eee_set_req_set_modes_peer_nomask(struct ethtool_eee_set_req *req)
+{
+ req->_present.modes_peer = 1;
+ req->modes_peer._present.nomask = 1;
+}
+static inline void
+ethtool_eee_set_req_set_modes_peer_size(struct ethtool_eee_set_req *req,
+ __u32 size)
+{
+ req->_present.modes_peer = 1;
+ req->modes_peer._present.size = 1;
+ req->modes_peer.size = size;
+}
+static inline void
+__ethtool_eee_set_req_set_modes_peer_bits_bit(struct ethtool_eee_set_req *req,
+ struct ethtool_bitset_bit *bit,
+ unsigned int n_bit)
+{
+ free(req->modes_peer.bits.bit);
+ req->modes_peer.bits.bit = bit;
+ req->modes_peer.bits.n_bit = n_bit;
+}
+static inline void
+ethtool_eee_set_req_set_active(struct ethtool_eee_set_req *req, __u8 active)
+{
+ req->_present.active = 1;
+ req->active = active;
+}
+static inline void
+ethtool_eee_set_req_set_enabled(struct ethtool_eee_set_req *req, __u8 enabled)
+{
+ req->_present.enabled = 1;
+ req->enabled = enabled;
+}
+static inline void
+ethtool_eee_set_req_set_tx_lpi_enabled(struct ethtool_eee_set_req *req,
+ __u8 tx_lpi_enabled)
+{
+ req->_present.tx_lpi_enabled = 1;
+ req->tx_lpi_enabled = tx_lpi_enabled;
+}
+static inline void
+ethtool_eee_set_req_set_tx_lpi_timer(struct ethtool_eee_set_req *req,
+ __u32 tx_lpi_timer)
+{
+ req->_present.tx_lpi_timer = 1;
+ req->tx_lpi_timer = tx_lpi_timer;
+}
+
+/*
+ * Set eee params.
+ */
+int ethtool_eee_set(struct ynl_sock *ys, struct ethtool_eee_set_req *req);
+
+/* ============== ETHTOOL_MSG_TSINFO_GET ============== */
+/* ETHTOOL_MSG_TSINFO_GET - do */
+struct ethtool_tsinfo_get_req {
+ struct {
+ __u32 header:1;
+ } _present;
+
+ struct ethtool_header header;
+};
+
+static inline struct ethtool_tsinfo_get_req *ethtool_tsinfo_get_req_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_tsinfo_get_req));
+}
+void ethtool_tsinfo_get_req_free(struct ethtool_tsinfo_get_req *req);
+
+static inline void
+ethtool_tsinfo_get_req_set_header_dev_index(struct ethtool_tsinfo_get_req *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_tsinfo_get_req_set_header_dev_name(struct ethtool_tsinfo_get_req *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_tsinfo_get_req_set_header_flags(struct ethtool_tsinfo_get_req *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+
+struct ethtool_tsinfo_get_rsp {
+ struct {
+ __u32 header:1;
+ __u32 timestamping:1;
+ __u32 tx_types:1;
+ __u32 rx_filters:1;
+ __u32 phc_index:1;
+ } _present;
+
+ struct ethtool_header header;
+ struct ethtool_bitset timestamping;
+ struct ethtool_bitset tx_types;
+ struct ethtool_bitset rx_filters;
+ __u32 phc_index;
+};
+
+void ethtool_tsinfo_get_rsp_free(struct ethtool_tsinfo_get_rsp *rsp);
+
+/*
+ * Get tsinfo params.
+ */
+struct ethtool_tsinfo_get_rsp *
+ethtool_tsinfo_get(struct ynl_sock *ys, struct ethtool_tsinfo_get_req *req);
+
+/* ETHTOOL_MSG_TSINFO_GET - dump */
+struct ethtool_tsinfo_get_req_dump {
+ struct {
+ __u32 header:1;
+ } _present;
+
+ struct ethtool_header header;
+};
+
+static inline struct ethtool_tsinfo_get_req_dump *
+ethtool_tsinfo_get_req_dump_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_tsinfo_get_req_dump));
+}
+void ethtool_tsinfo_get_req_dump_free(struct ethtool_tsinfo_get_req_dump *req);
+
+static inline void
+ethtool_tsinfo_get_req_dump_set_header_dev_index(struct ethtool_tsinfo_get_req_dump *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_tsinfo_get_req_dump_set_header_dev_name(struct ethtool_tsinfo_get_req_dump *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_tsinfo_get_req_dump_set_header_flags(struct ethtool_tsinfo_get_req_dump *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+
+struct ethtool_tsinfo_get_list {
+ struct ethtool_tsinfo_get_list *next;
+ struct ethtool_tsinfo_get_rsp obj __attribute__ ((aligned (8)));
+};
+
+void ethtool_tsinfo_get_list_free(struct ethtool_tsinfo_get_list *rsp);
+
+struct ethtool_tsinfo_get_list *
+ethtool_tsinfo_get_dump(struct ynl_sock *ys,
+ struct ethtool_tsinfo_get_req_dump *req);
+
+/* ============== ETHTOOL_MSG_CABLE_TEST_ACT ============== */
+/* ETHTOOL_MSG_CABLE_TEST_ACT - do */
+struct ethtool_cable_test_act_req {
+ struct {
+ __u32 header:1;
+ } _present;
+
+ struct ethtool_header header;
+};
+
+static inline struct ethtool_cable_test_act_req *
+ethtool_cable_test_act_req_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_cable_test_act_req));
+}
+void ethtool_cable_test_act_req_free(struct ethtool_cable_test_act_req *req);
+
+static inline void
+ethtool_cable_test_act_req_set_header_dev_index(struct ethtool_cable_test_act_req *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_cable_test_act_req_set_header_dev_name(struct ethtool_cable_test_act_req *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_cable_test_act_req_set_header_flags(struct ethtool_cable_test_act_req *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+
+/*
+ * Cable test.
+ */
+int ethtool_cable_test_act(struct ynl_sock *ys,
+ struct ethtool_cable_test_act_req *req);
+
+/* ============== ETHTOOL_MSG_CABLE_TEST_TDR_ACT ============== */
+/* ETHTOOL_MSG_CABLE_TEST_TDR_ACT - do */
+struct ethtool_cable_test_tdr_act_req {
+ struct {
+ __u32 header:1;
+ } _present;
+
+ struct ethtool_header header;
+};
+
+static inline struct ethtool_cable_test_tdr_act_req *
+ethtool_cable_test_tdr_act_req_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_cable_test_tdr_act_req));
+}
+void
+ethtool_cable_test_tdr_act_req_free(struct ethtool_cable_test_tdr_act_req *req);
+
+static inline void
+ethtool_cable_test_tdr_act_req_set_header_dev_index(struct ethtool_cable_test_tdr_act_req *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_cable_test_tdr_act_req_set_header_dev_name(struct ethtool_cable_test_tdr_act_req *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_cable_test_tdr_act_req_set_header_flags(struct ethtool_cable_test_tdr_act_req *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+
+/*
+ * Cable test TDR.
+ */
+int ethtool_cable_test_tdr_act(struct ynl_sock *ys,
+ struct ethtool_cable_test_tdr_act_req *req);
+
+/* ============== ETHTOOL_MSG_TUNNEL_INFO_GET ============== */
+/* ETHTOOL_MSG_TUNNEL_INFO_GET - do */
+struct ethtool_tunnel_info_get_req {
+ struct {
+ __u32 header:1;
+ } _present;
+
+ struct ethtool_header header;
+};
+
+static inline struct ethtool_tunnel_info_get_req *
+ethtool_tunnel_info_get_req_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_tunnel_info_get_req));
+}
+void ethtool_tunnel_info_get_req_free(struct ethtool_tunnel_info_get_req *req);
+
+static inline void
+ethtool_tunnel_info_get_req_set_header_dev_index(struct ethtool_tunnel_info_get_req *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_tunnel_info_get_req_set_header_dev_name(struct ethtool_tunnel_info_get_req *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_tunnel_info_get_req_set_header_flags(struct ethtool_tunnel_info_get_req *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+
+struct ethtool_tunnel_info_get_rsp {
+ struct {
+ __u32 header:1;
+ __u32 udp_ports:1;
+ } _present;
+
+ struct ethtool_header header;
+ struct ethtool_tunnel_udp udp_ports;
+};
+
+void ethtool_tunnel_info_get_rsp_free(struct ethtool_tunnel_info_get_rsp *rsp);
+
+/*
+ * Get tsinfo params.
+ */
+struct ethtool_tunnel_info_get_rsp *
+ethtool_tunnel_info_get(struct ynl_sock *ys,
+ struct ethtool_tunnel_info_get_req *req);
+
+/* ETHTOOL_MSG_TUNNEL_INFO_GET - dump */
+struct ethtool_tunnel_info_get_req_dump {
+ struct {
+ __u32 header:1;
+ } _present;
+
+ struct ethtool_header header;
+};
+
+static inline struct ethtool_tunnel_info_get_req_dump *
+ethtool_tunnel_info_get_req_dump_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_tunnel_info_get_req_dump));
+}
+void
+ethtool_tunnel_info_get_req_dump_free(struct ethtool_tunnel_info_get_req_dump *req);
+
+static inline void
+ethtool_tunnel_info_get_req_dump_set_header_dev_index(struct ethtool_tunnel_info_get_req_dump *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_tunnel_info_get_req_dump_set_header_dev_name(struct ethtool_tunnel_info_get_req_dump *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_tunnel_info_get_req_dump_set_header_flags(struct ethtool_tunnel_info_get_req_dump *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+
+struct ethtool_tunnel_info_get_list {
+ struct ethtool_tunnel_info_get_list *next;
+ struct ethtool_tunnel_info_get_rsp obj __attribute__ ((aligned (8)));
+};
+
+void
+ethtool_tunnel_info_get_list_free(struct ethtool_tunnel_info_get_list *rsp);
+
+struct ethtool_tunnel_info_get_list *
+ethtool_tunnel_info_get_dump(struct ynl_sock *ys,
+ struct ethtool_tunnel_info_get_req_dump *req);
+
+/* ============== ETHTOOL_MSG_FEC_GET ============== */
+/* ETHTOOL_MSG_FEC_GET - do */
+struct ethtool_fec_get_req {
+ struct {
+ __u32 header:1;
+ } _present;
+
+ struct ethtool_header header;
+};
+
+static inline struct ethtool_fec_get_req *ethtool_fec_get_req_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_fec_get_req));
+}
+void ethtool_fec_get_req_free(struct ethtool_fec_get_req *req);
+
+static inline void
+ethtool_fec_get_req_set_header_dev_index(struct ethtool_fec_get_req *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_fec_get_req_set_header_dev_name(struct ethtool_fec_get_req *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_fec_get_req_set_header_flags(struct ethtool_fec_get_req *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+
+struct ethtool_fec_get_rsp {
+ struct {
+ __u32 header:1;
+ __u32 modes:1;
+ __u32 auto_:1;
+ __u32 active:1;
+ __u32 stats:1;
+ } _present;
+
+ struct ethtool_header header;
+ struct ethtool_bitset modes;
+ __u8 auto_;
+ __u32 active;
+ struct ethtool_fec_stat stats;
+};
+
+void ethtool_fec_get_rsp_free(struct ethtool_fec_get_rsp *rsp);
+
+/*
+ * Get FEC params.
+ */
+struct ethtool_fec_get_rsp *
+ethtool_fec_get(struct ynl_sock *ys, struct ethtool_fec_get_req *req);
+
+/* ETHTOOL_MSG_FEC_GET - dump */
+struct ethtool_fec_get_req_dump {
+ struct {
+ __u32 header:1;
+ } _present;
+
+ struct ethtool_header header;
+};
+
+static inline struct ethtool_fec_get_req_dump *
+ethtool_fec_get_req_dump_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_fec_get_req_dump));
+}
+void ethtool_fec_get_req_dump_free(struct ethtool_fec_get_req_dump *req);
+
+static inline void
+ethtool_fec_get_req_dump_set_header_dev_index(struct ethtool_fec_get_req_dump *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_fec_get_req_dump_set_header_dev_name(struct ethtool_fec_get_req_dump *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_fec_get_req_dump_set_header_flags(struct ethtool_fec_get_req_dump *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+
+struct ethtool_fec_get_list {
+ struct ethtool_fec_get_list *next;
+ struct ethtool_fec_get_rsp obj __attribute__ ((aligned (8)));
+};
+
+void ethtool_fec_get_list_free(struct ethtool_fec_get_list *rsp);
+
+struct ethtool_fec_get_list *
+ethtool_fec_get_dump(struct ynl_sock *ys, struct ethtool_fec_get_req_dump *req);
+
+/* ETHTOOL_MSG_FEC_GET - notify */
+struct ethtool_fec_get_ntf {
+ __u16 family;
+ __u8 cmd;
+ struct ynl_ntf_base_type *next;
+ void (*free)(struct ethtool_fec_get_ntf *ntf);
+ struct ethtool_fec_get_rsp obj __attribute__ ((aligned (8)));
+};
+
+void ethtool_fec_get_ntf_free(struct ethtool_fec_get_ntf *rsp);
+
+/* ============== ETHTOOL_MSG_FEC_SET ============== */
+/* ETHTOOL_MSG_FEC_SET - do */
+struct ethtool_fec_set_req {
+ struct {
+ __u32 header:1;
+ __u32 modes:1;
+ __u32 auto_:1;
+ __u32 active:1;
+ __u32 stats:1;
+ } _present;
+
+ struct ethtool_header header;
+ struct ethtool_bitset modes;
+ __u8 auto_;
+ __u32 active;
+ struct ethtool_fec_stat stats;
+};
+
+static inline struct ethtool_fec_set_req *ethtool_fec_set_req_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_fec_set_req));
+}
+void ethtool_fec_set_req_free(struct ethtool_fec_set_req *req);
+
+static inline void
+ethtool_fec_set_req_set_header_dev_index(struct ethtool_fec_set_req *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_fec_set_req_set_header_dev_name(struct ethtool_fec_set_req *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_fec_set_req_set_header_flags(struct ethtool_fec_set_req *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+static inline void
+ethtool_fec_set_req_set_modes_nomask(struct ethtool_fec_set_req *req)
+{
+ req->_present.modes = 1;
+ req->modes._present.nomask = 1;
+}
+static inline void
+ethtool_fec_set_req_set_modes_size(struct ethtool_fec_set_req *req, __u32 size)
+{
+ req->_present.modes = 1;
+ req->modes._present.size = 1;
+ req->modes.size = size;
+}
+static inline void
+__ethtool_fec_set_req_set_modes_bits_bit(struct ethtool_fec_set_req *req,
+ struct ethtool_bitset_bit *bit,
+ unsigned int n_bit)
+{
+ free(req->modes.bits.bit);
+ req->modes.bits.bit = bit;
+ req->modes.bits.n_bit = n_bit;
+}
+static inline void
+ethtool_fec_set_req_set_auto_(struct ethtool_fec_set_req *req, __u8 auto_)
+{
+ req->_present.auto_ = 1;
+ req->auto_ = auto_;
+}
+static inline void
+ethtool_fec_set_req_set_active(struct ethtool_fec_set_req *req, __u32 active)
+{
+ req->_present.active = 1;
+ req->active = active;
+}
+static inline void
+ethtool_fec_set_req_set_stats_corrected(struct ethtool_fec_set_req *req,
+ const void *corrected, size_t len)
+{
+ free(req->stats.corrected);
+ req->stats.corrected = malloc(req->stats._present.corrected_len);
+ memcpy(req->stats.corrected, corrected, req->stats._present.corrected_len);
+}
+static inline void
+ethtool_fec_set_req_set_stats_uncorr(struct ethtool_fec_set_req *req,
+ const void *uncorr, size_t len)
+{
+ free(req->stats.uncorr);
+ req->stats.uncorr = malloc(req->stats._present.uncorr_len);
+ memcpy(req->stats.uncorr, uncorr, req->stats._present.uncorr_len);
+}
+static inline void
+ethtool_fec_set_req_set_stats_corr_bits(struct ethtool_fec_set_req *req,
+ const void *corr_bits, size_t len)
+{
+ free(req->stats.corr_bits);
+ req->stats.corr_bits = malloc(req->stats._present.corr_bits_len);
+ memcpy(req->stats.corr_bits, corr_bits, req->stats._present.corr_bits_len);
+}
+
+/*
+ * Set FEC params.
+ */
+int ethtool_fec_set(struct ynl_sock *ys, struct ethtool_fec_set_req *req);
+
+/* ============== ETHTOOL_MSG_MODULE_EEPROM_GET ============== */
+/* ETHTOOL_MSG_MODULE_EEPROM_GET - do */
+struct ethtool_module_eeprom_get_req {
+ struct {
+ __u32 header:1;
+ } _present;
+
+ struct ethtool_header header;
+};
+
+static inline struct ethtool_module_eeprom_get_req *
+ethtool_module_eeprom_get_req_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_module_eeprom_get_req));
+}
+void
+ethtool_module_eeprom_get_req_free(struct ethtool_module_eeprom_get_req *req);
+
+static inline void
+ethtool_module_eeprom_get_req_set_header_dev_index(struct ethtool_module_eeprom_get_req *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_module_eeprom_get_req_set_header_dev_name(struct ethtool_module_eeprom_get_req *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_module_eeprom_get_req_set_header_flags(struct ethtool_module_eeprom_get_req *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+
+struct ethtool_module_eeprom_get_rsp {
+ struct {
+ __u32 header:1;
+ __u32 offset:1;
+ __u32 length:1;
+ __u32 page:1;
+ __u32 bank:1;
+ __u32 i2c_address:1;
+ __u32 data_len;
+ } _present;
+
+ struct ethtool_header header;
+ __u32 offset;
+ __u32 length;
+ __u8 page;
+ __u8 bank;
+ __u8 i2c_address;
+ void *data;
+};
+
+void
+ethtool_module_eeprom_get_rsp_free(struct ethtool_module_eeprom_get_rsp *rsp);
+
+/*
+ * Get module EEPROM params.
+ */
+struct ethtool_module_eeprom_get_rsp *
+ethtool_module_eeprom_get(struct ynl_sock *ys,
+ struct ethtool_module_eeprom_get_req *req);
+
+/* ETHTOOL_MSG_MODULE_EEPROM_GET - dump */
+struct ethtool_module_eeprom_get_req_dump {
+ struct {
+ __u32 header:1;
+ } _present;
+
+ struct ethtool_header header;
+};
+
+static inline struct ethtool_module_eeprom_get_req_dump *
+ethtool_module_eeprom_get_req_dump_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_module_eeprom_get_req_dump));
+}
+void
+ethtool_module_eeprom_get_req_dump_free(struct ethtool_module_eeprom_get_req_dump *req);
+
+static inline void
+ethtool_module_eeprom_get_req_dump_set_header_dev_index(struct ethtool_module_eeprom_get_req_dump *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_module_eeprom_get_req_dump_set_header_dev_name(struct ethtool_module_eeprom_get_req_dump *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_module_eeprom_get_req_dump_set_header_flags(struct ethtool_module_eeprom_get_req_dump *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+
+struct ethtool_module_eeprom_get_list {
+ struct ethtool_module_eeprom_get_list *next;
+ struct ethtool_module_eeprom_get_rsp obj __attribute__ ((aligned (8)));
+};
+
+void
+ethtool_module_eeprom_get_list_free(struct ethtool_module_eeprom_get_list *rsp);
+
+struct ethtool_module_eeprom_get_list *
+ethtool_module_eeprom_get_dump(struct ynl_sock *ys,
+ struct ethtool_module_eeprom_get_req_dump *req);
+
+/* ============== ETHTOOL_MSG_PHC_VCLOCKS_GET ============== */
+/* ETHTOOL_MSG_PHC_VCLOCKS_GET - do */
+struct ethtool_phc_vclocks_get_req {
+ struct {
+ __u32 header:1;
+ } _present;
+
+ struct ethtool_header header;
+};
+
+static inline struct ethtool_phc_vclocks_get_req *
+ethtool_phc_vclocks_get_req_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_phc_vclocks_get_req));
+}
+void ethtool_phc_vclocks_get_req_free(struct ethtool_phc_vclocks_get_req *req);
+
+static inline void
+ethtool_phc_vclocks_get_req_set_header_dev_index(struct ethtool_phc_vclocks_get_req *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_phc_vclocks_get_req_set_header_dev_name(struct ethtool_phc_vclocks_get_req *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_phc_vclocks_get_req_set_header_flags(struct ethtool_phc_vclocks_get_req *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+
+struct ethtool_phc_vclocks_get_rsp {
+ struct {
+ __u32 header:1;
+ __u32 num:1;
+ } _present;
+
+ struct ethtool_header header;
+ __u32 num;
+};
+
+void ethtool_phc_vclocks_get_rsp_free(struct ethtool_phc_vclocks_get_rsp *rsp);
+
+/*
+ * Get PHC VCLOCKs.
+ */
+struct ethtool_phc_vclocks_get_rsp *
+ethtool_phc_vclocks_get(struct ynl_sock *ys,
+ struct ethtool_phc_vclocks_get_req *req);
+
+/* ETHTOOL_MSG_PHC_VCLOCKS_GET - dump */
+struct ethtool_phc_vclocks_get_req_dump {
+ struct {
+ __u32 header:1;
+ } _present;
+
+ struct ethtool_header header;
+};
+
+static inline struct ethtool_phc_vclocks_get_req_dump *
+ethtool_phc_vclocks_get_req_dump_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_phc_vclocks_get_req_dump));
+}
+void
+ethtool_phc_vclocks_get_req_dump_free(struct ethtool_phc_vclocks_get_req_dump *req);
+
+static inline void
+ethtool_phc_vclocks_get_req_dump_set_header_dev_index(struct ethtool_phc_vclocks_get_req_dump *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_phc_vclocks_get_req_dump_set_header_dev_name(struct ethtool_phc_vclocks_get_req_dump *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_phc_vclocks_get_req_dump_set_header_flags(struct ethtool_phc_vclocks_get_req_dump *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+
+struct ethtool_phc_vclocks_get_list {
+ struct ethtool_phc_vclocks_get_list *next;
+ struct ethtool_phc_vclocks_get_rsp obj __attribute__ ((aligned (8)));
+};
+
+void
+ethtool_phc_vclocks_get_list_free(struct ethtool_phc_vclocks_get_list *rsp);
+
+struct ethtool_phc_vclocks_get_list *
+ethtool_phc_vclocks_get_dump(struct ynl_sock *ys,
+ struct ethtool_phc_vclocks_get_req_dump *req);
+
+/* ============== ETHTOOL_MSG_MODULE_GET ============== */
+/* ETHTOOL_MSG_MODULE_GET - do */
+struct ethtool_module_get_req {
+ struct {
+ __u32 header:1;
+ } _present;
+
+ struct ethtool_header header;
+};
+
+static inline struct ethtool_module_get_req *ethtool_module_get_req_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_module_get_req));
+}
+void ethtool_module_get_req_free(struct ethtool_module_get_req *req);
+
+static inline void
+ethtool_module_get_req_set_header_dev_index(struct ethtool_module_get_req *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_module_get_req_set_header_dev_name(struct ethtool_module_get_req *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_module_get_req_set_header_flags(struct ethtool_module_get_req *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+
+struct ethtool_module_get_rsp {
+ struct {
+ __u32 header:1;
+ __u32 power_mode_policy:1;
+ __u32 power_mode:1;
+ } _present;
+
+ struct ethtool_header header;
+ __u8 power_mode_policy;
+ __u8 power_mode;
+};
+
+void ethtool_module_get_rsp_free(struct ethtool_module_get_rsp *rsp);
+
+/*
+ * Get module params.
+ */
+struct ethtool_module_get_rsp *
+ethtool_module_get(struct ynl_sock *ys, struct ethtool_module_get_req *req);
+
+/* ETHTOOL_MSG_MODULE_GET - dump */
+struct ethtool_module_get_req_dump {
+ struct {
+ __u32 header:1;
+ } _present;
+
+ struct ethtool_header header;
+};
+
+static inline struct ethtool_module_get_req_dump *
+ethtool_module_get_req_dump_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_module_get_req_dump));
+}
+void ethtool_module_get_req_dump_free(struct ethtool_module_get_req_dump *req);
+
+static inline void
+ethtool_module_get_req_dump_set_header_dev_index(struct ethtool_module_get_req_dump *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_module_get_req_dump_set_header_dev_name(struct ethtool_module_get_req_dump *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_module_get_req_dump_set_header_flags(struct ethtool_module_get_req_dump *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+
+struct ethtool_module_get_list {
+ struct ethtool_module_get_list *next;
+ struct ethtool_module_get_rsp obj __attribute__ ((aligned (8)));
+};
+
+void ethtool_module_get_list_free(struct ethtool_module_get_list *rsp);
+
+struct ethtool_module_get_list *
+ethtool_module_get_dump(struct ynl_sock *ys,
+ struct ethtool_module_get_req_dump *req);
+
+/* ETHTOOL_MSG_MODULE_GET - notify */
+struct ethtool_module_get_ntf {
+ __u16 family;
+ __u8 cmd;
+ struct ynl_ntf_base_type *next;
+ void (*free)(struct ethtool_module_get_ntf *ntf);
+ struct ethtool_module_get_rsp obj __attribute__ ((aligned (8)));
+};
+
+void ethtool_module_get_ntf_free(struct ethtool_module_get_ntf *rsp);
+
+/* ============== ETHTOOL_MSG_MODULE_SET ============== */
+/* ETHTOOL_MSG_MODULE_SET - do */
+struct ethtool_module_set_req {
+ struct {
+ __u32 header:1;
+ __u32 power_mode_policy:1;
+ __u32 power_mode:1;
+ } _present;
+
+ struct ethtool_header header;
+ __u8 power_mode_policy;
+ __u8 power_mode;
+};
+
+static inline struct ethtool_module_set_req *ethtool_module_set_req_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_module_set_req));
+}
+void ethtool_module_set_req_free(struct ethtool_module_set_req *req);
+
+static inline void
+ethtool_module_set_req_set_header_dev_index(struct ethtool_module_set_req *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_module_set_req_set_header_dev_name(struct ethtool_module_set_req *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_module_set_req_set_header_flags(struct ethtool_module_set_req *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+static inline void
+ethtool_module_set_req_set_power_mode_policy(struct ethtool_module_set_req *req,
+ __u8 power_mode_policy)
+{
+ req->_present.power_mode_policy = 1;
+ req->power_mode_policy = power_mode_policy;
+}
+static inline void
+ethtool_module_set_req_set_power_mode(struct ethtool_module_set_req *req,
+ __u8 power_mode)
+{
+ req->_present.power_mode = 1;
+ req->power_mode = power_mode;
+}
+
+/*
+ * Set module params.
+ */
+int ethtool_module_set(struct ynl_sock *ys, struct ethtool_module_set_req *req);
+
+/* ============== ETHTOOL_MSG_PSE_GET ============== */
+/* ETHTOOL_MSG_PSE_GET - do */
+struct ethtool_pse_get_req {
+ struct {
+ __u32 header:1;
+ } _present;
+
+ struct ethtool_header header;
+};
+
+static inline struct ethtool_pse_get_req *ethtool_pse_get_req_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_pse_get_req));
+}
+void ethtool_pse_get_req_free(struct ethtool_pse_get_req *req);
+
+static inline void
+ethtool_pse_get_req_set_header_dev_index(struct ethtool_pse_get_req *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_pse_get_req_set_header_dev_name(struct ethtool_pse_get_req *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_pse_get_req_set_header_flags(struct ethtool_pse_get_req *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+
+struct ethtool_pse_get_rsp {
+ struct {
+ __u32 header:1;
+ __u32 admin_state:1;
+ __u32 admin_control:1;
+ __u32 pw_d_status:1;
+ } _present;
+
+ struct ethtool_header header;
+ __u32 admin_state;
+ __u32 admin_control;
+ __u32 pw_d_status;
+};
+
+void ethtool_pse_get_rsp_free(struct ethtool_pse_get_rsp *rsp);
+
+/*
+ * Get Power Sourcing Equipment params.
+ */
+struct ethtool_pse_get_rsp *
+ethtool_pse_get(struct ynl_sock *ys, struct ethtool_pse_get_req *req);
+
+/* ETHTOOL_MSG_PSE_GET - dump */
+struct ethtool_pse_get_req_dump {
+ struct {
+ __u32 header:1;
+ } _present;
+
+ struct ethtool_header header;
+};
+
+static inline struct ethtool_pse_get_req_dump *
+ethtool_pse_get_req_dump_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_pse_get_req_dump));
+}
+void ethtool_pse_get_req_dump_free(struct ethtool_pse_get_req_dump *req);
+
+static inline void
+ethtool_pse_get_req_dump_set_header_dev_index(struct ethtool_pse_get_req_dump *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_pse_get_req_dump_set_header_dev_name(struct ethtool_pse_get_req_dump *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_pse_get_req_dump_set_header_flags(struct ethtool_pse_get_req_dump *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+
+struct ethtool_pse_get_list {
+ struct ethtool_pse_get_list *next;
+ struct ethtool_pse_get_rsp obj __attribute__ ((aligned (8)));
+};
+
+void ethtool_pse_get_list_free(struct ethtool_pse_get_list *rsp);
+
+struct ethtool_pse_get_list *
+ethtool_pse_get_dump(struct ynl_sock *ys, struct ethtool_pse_get_req_dump *req);
+
+/* ============== ETHTOOL_MSG_PSE_SET ============== */
+/* ETHTOOL_MSG_PSE_SET - do */
+struct ethtool_pse_set_req {
+ struct {
+ __u32 header:1;
+ __u32 admin_state:1;
+ __u32 admin_control:1;
+ __u32 pw_d_status:1;
+ } _present;
+
+ struct ethtool_header header;
+ __u32 admin_state;
+ __u32 admin_control;
+ __u32 pw_d_status;
+};
+
+static inline struct ethtool_pse_set_req *ethtool_pse_set_req_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_pse_set_req));
+}
+void ethtool_pse_set_req_free(struct ethtool_pse_set_req *req);
+
+static inline void
+ethtool_pse_set_req_set_header_dev_index(struct ethtool_pse_set_req *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_pse_set_req_set_header_dev_name(struct ethtool_pse_set_req *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_pse_set_req_set_header_flags(struct ethtool_pse_set_req *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+static inline void
+ethtool_pse_set_req_set_admin_state(struct ethtool_pse_set_req *req,
+ __u32 admin_state)
+{
+ req->_present.admin_state = 1;
+ req->admin_state = admin_state;
+}
+static inline void
+ethtool_pse_set_req_set_admin_control(struct ethtool_pse_set_req *req,
+ __u32 admin_control)
+{
+ req->_present.admin_control = 1;
+ req->admin_control = admin_control;
+}
+static inline void
+ethtool_pse_set_req_set_pw_d_status(struct ethtool_pse_set_req *req,
+ __u32 pw_d_status)
+{
+ req->_present.pw_d_status = 1;
+ req->pw_d_status = pw_d_status;
+}
+
+/*
+ * Set Power Sourcing Equipment params.
+ */
+int ethtool_pse_set(struct ynl_sock *ys, struct ethtool_pse_set_req *req);
+
+/* ============== ETHTOOL_MSG_RSS_GET ============== */
+/* ETHTOOL_MSG_RSS_GET - do */
+struct ethtool_rss_get_req {
+ struct {
+ __u32 header:1;
+ } _present;
+
+ struct ethtool_header header;
+};
+
+static inline struct ethtool_rss_get_req *ethtool_rss_get_req_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_rss_get_req));
+}
+void ethtool_rss_get_req_free(struct ethtool_rss_get_req *req);
+
+static inline void
+ethtool_rss_get_req_set_header_dev_index(struct ethtool_rss_get_req *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_rss_get_req_set_header_dev_name(struct ethtool_rss_get_req *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_rss_get_req_set_header_flags(struct ethtool_rss_get_req *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+
+struct ethtool_rss_get_rsp {
+ struct {
+ __u32 header:1;
+ __u32 context:1;
+ __u32 hfunc:1;
+ __u32 indir_len;
+ __u32 hkey_len;
+ } _present;
+
+ struct ethtool_header header;
+ __u32 context;
+ __u32 hfunc;
+ void *indir;
+ void *hkey;
+};
+
+void ethtool_rss_get_rsp_free(struct ethtool_rss_get_rsp *rsp);
+
+/*
+ * Get RSS params.
+ */
+struct ethtool_rss_get_rsp *
+ethtool_rss_get(struct ynl_sock *ys, struct ethtool_rss_get_req *req);
+
+/* ETHTOOL_MSG_RSS_GET - dump */
+struct ethtool_rss_get_req_dump {
+ struct {
+ __u32 header:1;
+ } _present;
+
+ struct ethtool_header header;
+};
+
+static inline struct ethtool_rss_get_req_dump *
+ethtool_rss_get_req_dump_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_rss_get_req_dump));
+}
+void ethtool_rss_get_req_dump_free(struct ethtool_rss_get_req_dump *req);
+
+static inline void
+ethtool_rss_get_req_dump_set_header_dev_index(struct ethtool_rss_get_req_dump *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_rss_get_req_dump_set_header_dev_name(struct ethtool_rss_get_req_dump *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_rss_get_req_dump_set_header_flags(struct ethtool_rss_get_req_dump *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+
+struct ethtool_rss_get_list {
+ struct ethtool_rss_get_list *next;
+ struct ethtool_rss_get_rsp obj __attribute__ ((aligned (8)));
+};
+
+void ethtool_rss_get_list_free(struct ethtool_rss_get_list *rsp);
+
+struct ethtool_rss_get_list *
+ethtool_rss_get_dump(struct ynl_sock *ys, struct ethtool_rss_get_req_dump *req);
+
+/* ============== ETHTOOL_MSG_PLCA_GET_CFG ============== */
+/* ETHTOOL_MSG_PLCA_GET_CFG - do */
+struct ethtool_plca_get_cfg_req {
+ struct {
+ __u32 header:1;
+ } _present;
+
+ struct ethtool_header header;
+};
+
+static inline struct ethtool_plca_get_cfg_req *
+ethtool_plca_get_cfg_req_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_plca_get_cfg_req));
+}
+void ethtool_plca_get_cfg_req_free(struct ethtool_plca_get_cfg_req *req);
+
+static inline void
+ethtool_plca_get_cfg_req_set_header_dev_index(struct ethtool_plca_get_cfg_req *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_plca_get_cfg_req_set_header_dev_name(struct ethtool_plca_get_cfg_req *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_plca_get_cfg_req_set_header_flags(struct ethtool_plca_get_cfg_req *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+
+struct ethtool_plca_get_cfg_rsp {
+ struct {
+ __u32 header:1;
+ __u32 version:1;
+ __u32 enabled:1;
+ __u32 status:1;
+ __u32 node_cnt:1;
+ __u32 node_id:1;
+ __u32 to_tmr:1;
+ __u32 burst_cnt:1;
+ __u32 burst_tmr:1;
+ } _present;
+
+ struct ethtool_header header;
+ __u16 version;
+ __u8 enabled;
+ __u8 status;
+ __u32 node_cnt;
+ __u32 node_id;
+ __u32 to_tmr;
+ __u32 burst_cnt;
+ __u32 burst_tmr;
+};
+
+void ethtool_plca_get_cfg_rsp_free(struct ethtool_plca_get_cfg_rsp *rsp);
+
+/*
+ * Get PLCA params.
+ */
+struct ethtool_plca_get_cfg_rsp *
+ethtool_plca_get_cfg(struct ynl_sock *ys, struct ethtool_plca_get_cfg_req *req);
+
+/* ETHTOOL_MSG_PLCA_GET_CFG - dump */
+struct ethtool_plca_get_cfg_req_dump {
+ struct {
+ __u32 header:1;
+ } _present;
+
+ struct ethtool_header header;
+};
+
+static inline struct ethtool_plca_get_cfg_req_dump *
+ethtool_plca_get_cfg_req_dump_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_plca_get_cfg_req_dump));
+}
+void
+ethtool_plca_get_cfg_req_dump_free(struct ethtool_plca_get_cfg_req_dump *req);
+
+static inline void
+ethtool_plca_get_cfg_req_dump_set_header_dev_index(struct ethtool_plca_get_cfg_req_dump *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_plca_get_cfg_req_dump_set_header_dev_name(struct ethtool_plca_get_cfg_req_dump *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_plca_get_cfg_req_dump_set_header_flags(struct ethtool_plca_get_cfg_req_dump *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+
+struct ethtool_plca_get_cfg_list {
+ struct ethtool_plca_get_cfg_list *next;
+ struct ethtool_plca_get_cfg_rsp obj __attribute__ ((aligned (8)));
+};
+
+void ethtool_plca_get_cfg_list_free(struct ethtool_plca_get_cfg_list *rsp);
+
+struct ethtool_plca_get_cfg_list *
+ethtool_plca_get_cfg_dump(struct ynl_sock *ys,
+ struct ethtool_plca_get_cfg_req_dump *req);
+
+/* ETHTOOL_MSG_PLCA_GET_CFG - notify */
+struct ethtool_plca_get_cfg_ntf {
+ __u16 family;
+ __u8 cmd;
+ struct ynl_ntf_base_type *next;
+ void (*free)(struct ethtool_plca_get_cfg_ntf *ntf);
+ struct ethtool_plca_get_cfg_rsp obj __attribute__ ((aligned (8)));
+};
+
+void ethtool_plca_get_cfg_ntf_free(struct ethtool_plca_get_cfg_ntf *rsp);
+
+/* ============== ETHTOOL_MSG_PLCA_SET_CFG ============== */
+/* ETHTOOL_MSG_PLCA_SET_CFG - do */
+struct ethtool_plca_set_cfg_req {
+ struct {
+ __u32 header:1;
+ __u32 version:1;
+ __u32 enabled:1;
+ __u32 status:1;
+ __u32 node_cnt:1;
+ __u32 node_id:1;
+ __u32 to_tmr:1;
+ __u32 burst_cnt:1;
+ __u32 burst_tmr:1;
+ } _present;
+
+ struct ethtool_header header;
+ __u16 version;
+ __u8 enabled;
+ __u8 status;
+ __u32 node_cnt;
+ __u32 node_id;
+ __u32 to_tmr;
+ __u32 burst_cnt;
+ __u32 burst_tmr;
+};
+
+static inline struct ethtool_plca_set_cfg_req *
+ethtool_plca_set_cfg_req_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_plca_set_cfg_req));
+}
+void ethtool_plca_set_cfg_req_free(struct ethtool_plca_set_cfg_req *req);
+
+static inline void
+ethtool_plca_set_cfg_req_set_header_dev_index(struct ethtool_plca_set_cfg_req *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_plca_set_cfg_req_set_header_dev_name(struct ethtool_plca_set_cfg_req *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_plca_set_cfg_req_set_header_flags(struct ethtool_plca_set_cfg_req *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+static inline void
+ethtool_plca_set_cfg_req_set_version(struct ethtool_plca_set_cfg_req *req,
+ __u16 version)
+{
+ req->_present.version = 1;
+ req->version = version;
+}
+static inline void
+ethtool_plca_set_cfg_req_set_enabled(struct ethtool_plca_set_cfg_req *req,
+ __u8 enabled)
+{
+ req->_present.enabled = 1;
+ req->enabled = enabled;
+}
+static inline void
+ethtool_plca_set_cfg_req_set_status(struct ethtool_plca_set_cfg_req *req,
+ __u8 status)
+{
+ req->_present.status = 1;
+ req->status = status;
+}
+static inline void
+ethtool_plca_set_cfg_req_set_node_cnt(struct ethtool_plca_set_cfg_req *req,
+ __u32 node_cnt)
+{
+ req->_present.node_cnt = 1;
+ req->node_cnt = node_cnt;
+}
+static inline void
+ethtool_plca_set_cfg_req_set_node_id(struct ethtool_plca_set_cfg_req *req,
+ __u32 node_id)
+{
+ req->_present.node_id = 1;
+ req->node_id = node_id;
+}
+static inline void
+ethtool_plca_set_cfg_req_set_to_tmr(struct ethtool_plca_set_cfg_req *req,
+ __u32 to_tmr)
+{
+ req->_present.to_tmr = 1;
+ req->to_tmr = to_tmr;
+}
+static inline void
+ethtool_plca_set_cfg_req_set_burst_cnt(struct ethtool_plca_set_cfg_req *req,
+ __u32 burst_cnt)
+{
+ req->_present.burst_cnt = 1;
+ req->burst_cnt = burst_cnt;
+}
+static inline void
+ethtool_plca_set_cfg_req_set_burst_tmr(struct ethtool_plca_set_cfg_req *req,
+ __u32 burst_tmr)
+{
+ req->_present.burst_tmr = 1;
+ req->burst_tmr = burst_tmr;
+}
+
+/*
+ * Set PLCA params.
+ */
+int ethtool_plca_set_cfg(struct ynl_sock *ys,
+ struct ethtool_plca_set_cfg_req *req);
+
+/* ============== ETHTOOL_MSG_PLCA_GET_STATUS ============== */
+/* ETHTOOL_MSG_PLCA_GET_STATUS - do */
+struct ethtool_plca_get_status_req {
+ struct {
+ __u32 header:1;
+ } _present;
+
+ struct ethtool_header header;
+};
+
+static inline struct ethtool_plca_get_status_req *
+ethtool_plca_get_status_req_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_plca_get_status_req));
+}
+void ethtool_plca_get_status_req_free(struct ethtool_plca_get_status_req *req);
+
+static inline void
+ethtool_plca_get_status_req_set_header_dev_index(struct ethtool_plca_get_status_req *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_plca_get_status_req_set_header_dev_name(struct ethtool_plca_get_status_req *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_plca_get_status_req_set_header_flags(struct ethtool_plca_get_status_req *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+
+struct ethtool_plca_get_status_rsp {
+ struct {
+ __u32 header:1;
+ __u32 version:1;
+ __u32 enabled:1;
+ __u32 status:1;
+ __u32 node_cnt:1;
+ __u32 node_id:1;
+ __u32 to_tmr:1;
+ __u32 burst_cnt:1;
+ __u32 burst_tmr:1;
+ } _present;
+
+ struct ethtool_header header;
+ __u16 version;
+ __u8 enabled;
+ __u8 status;
+ __u32 node_cnt;
+ __u32 node_id;
+ __u32 to_tmr;
+ __u32 burst_cnt;
+ __u32 burst_tmr;
+};
+
+void ethtool_plca_get_status_rsp_free(struct ethtool_plca_get_status_rsp *rsp);
+
+/*
+ * Get PLCA status params.
+ */
+struct ethtool_plca_get_status_rsp *
+ethtool_plca_get_status(struct ynl_sock *ys,
+ struct ethtool_plca_get_status_req *req);
+
+/* ETHTOOL_MSG_PLCA_GET_STATUS - dump */
+struct ethtool_plca_get_status_req_dump {
+ struct {
+ __u32 header:1;
+ } _present;
+
+ struct ethtool_header header;
+};
+
+static inline struct ethtool_plca_get_status_req_dump *
+ethtool_plca_get_status_req_dump_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_plca_get_status_req_dump));
+}
+void
+ethtool_plca_get_status_req_dump_free(struct ethtool_plca_get_status_req_dump *req);
+
+static inline void
+ethtool_plca_get_status_req_dump_set_header_dev_index(struct ethtool_plca_get_status_req_dump *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_plca_get_status_req_dump_set_header_dev_name(struct ethtool_plca_get_status_req_dump *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_plca_get_status_req_dump_set_header_flags(struct ethtool_plca_get_status_req_dump *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+
+struct ethtool_plca_get_status_list {
+ struct ethtool_plca_get_status_list *next;
+ struct ethtool_plca_get_status_rsp obj __attribute__ ((aligned (8)));
+};
+
+void
+ethtool_plca_get_status_list_free(struct ethtool_plca_get_status_list *rsp);
+
+struct ethtool_plca_get_status_list *
+ethtool_plca_get_status_dump(struct ynl_sock *ys,
+ struct ethtool_plca_get_status_req_dump *req);
+
+/* ============== ETHTOOL_MSG_MM_GET ============== */
+/* ETHTOOL_MSG_MM_GET - do */
+struct ethtool_mm_get_req {
+ struct {
+ __u32 header:1;
+ } _present;
+
+ struct ethtool_header header;
+};
+
+static inline struct ethtool_mm_get_req *ethtool_mm_get_req_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_mm_get_req));
+}
+void ethtool_mm_get_req_free(struct ethtool_mm_get_req *req);
+
+static inline void
+ethtool_mm_get_req_set_header_dev_index(struct ethtool_mm_get_req *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_mm_get_req_set_header_dev_name(struct ethtool_mm_get_req *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_mm_get_req_set_header_flags(struct ethtool_mm_get_req *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+
+struct ethtool_mm_get_rsp {
+ struct {
+ __u32 header:1;
+ __u32 pmac_enabled:1;
+ __u32 tx_enabled:1;
+ __u32 tx_active:1;
+ __u32 tx_min_frag_size:1;
+ __u32 rx_min_frag_size:1;
+ __u32 verify_enabled:1;
+ __u32 verify_time:1;
+ __u32 max_verify_time:1;
+ __u32 stats:1;
+ } _present;
+
+ struct ethtool_header header;
+ __u8 pmac_enabled;
+ __u8 tx_enabled;
+ __u8 tx_active;
+ __u32 tx_min_frag_size;
+ __u32 rx_min_frag_size;
+ __u8 verify_enabled;
+ __u32 verify_time;
+ __u32 max_verify_time;
+ struct ethtool_mm_stat stats;
+};
+
+void ethtool_mm_get_rsp_free(struct ethtool_mm_get_rsp *rsp);
+
+/*
+ * Get MAC Merge configuration and state
+ */
+struct ethtool_mm_get_rsp *
+ethtool_mm_get(struct ynl_sock *ys, struct ethtool_mm_get_req *req);
+
+/* ETHTOOL_MSG_MM_GET - dump */
+struct ethtool_mm_get_req_dump {
+ struct {
+ __u32 header:1;
+ } _present;
+
+ struct ethtool_header header;
+};
+
+static inline struct ethtool_mm_get_req_dump *
+ethtool_mm_get_req_dump_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_mm_get_req_dump));
+}
+void ethtool_mm_get_req_dump_free(struct ethtool_mm_get_req_dump *req);
+
+static inline void
+ethtool_mm_get_req_dump_set_header_dev_index(struct ethtool_mm_get_req_dump *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_mm_get_req_dump_set_header_dev_name(struct ethtool_mm_get_req_dump *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_mm_get_req_dump_set_header_flags(struct ethtool_mm_get_req_dump *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+
+struct ethtool_mm_get_list {
+ struct ethtool_mm_get_list *next;
+ struct ethtool_mm_get_rsp obj __attribute__ ((aligned (8)));
+};
+
+void ethtool_mm_get_list_free(struct ethtool_mm_get_list *rsp);
+
+struct ethtool_mm_get_list *
+ethtool_mm_get_dump(struct ynl_sock *ys, struct ethtool_mm_get_req_dump *req);
+
+/* ETHTOOL_MSG_MM_GET - notify */
+struct ethtool_mm_get_ntf {
+ __u16 family;
+ __u8 cmd;
+ struct ynl_ntf_base_type *next;
+ void (*free)(struct ethtool_mm_get_ntf *ntf);
+ struct ethtool_mm_get_rsp obj __attribute__ ((aligned (8)));
+};
+
+void ethtool_mm_get_ntf_free(struct ethtool_mm_get_ntf *rsp);
+
+/* ============== ETHTOOL_MSG_MM_SET ============== */
+/* ETHTOOL_MSG_MM_SET - do */
+struct ethtool_mm_set_req {
+ struct {
+ __u32 header:1;
+ __u32 verify_enabled:1;
+ __u32 verify_time:1;
+ __u32 tx_enabled:1;
+ __u32 pmac_enabled:1;
+ __u32 tx_min_frag_size:1;
+ } _present;
+
+ struct ethtool_header header;
+ __u8 verify_enabled;
+ __u32 verify_time;
+ __u8 tx_enabled;
+ __u8 pmac_enabled;
+ __u32 tx_min_frag_size;
+};
+
+static inline struct ethtool_mm_set_req *ethtool_mm_set_req_alloc(void)
+{
+ return calloc(1, sizeof(struct ethtool_mm_set_req));
+}
+void ethtool_mm_set_req_free(struct ethtool_mm_set_req *req);
+
+static inline void
+ethtool_mm_set_req_set_header_dev_index(struct ethtool_mm_set_req *req,
+ __u32 dev_index)
+{
+ req->_present.header = 1;
+ req->header._present.dev_index = 1;
+ req->header.dev_index = dev_index;
+}
+static inline void
+ethtool_mm_set_req_set_header_dev_name(struct ethtool_mm_set_req *req,
+ const char *dev_name)
+{
+ free(req->header.dev_name);
+ req->header._present.dev_name_len = strlen(dev_name);
+ req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
+ memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
+ req->header.dev_name[req->header._present.dev_name_len] = 0;
+}
+static inline void
+ethtool_mm_set_req_set_header_flags(struct ethtool_mm_set_req *req,
+ __u32 flags)
+{
+ req->_present.header = 1;
+ req->header._present.flags = 1;
+ req->header.flags = flags;
+}
+static inline void
+ethtool_mm_set_req_set_verify_enabled(struct ethtool_mm_set_req *req,
+ __u8 verify_enabled)
+{
+ req->_present.verify_enabled = 1;
+ req->verify_enabled = verify_enabled;
+}
+static inline void
+ethtool_mm_set_req_set_verify_time(struct ethtool_mm_set_req *req,
+ __u32 verify_time)
+{
+ req->_present.verify_time = 1;
+ req->verify_time = verify_time;
+}
+static inline void
+ethtool_mm_set_req_set_tx_enabled(struct ethtool_mm_set_req *req,
+ __u8 tx_enabled)
+{
+ req->_present.tx_enabled = 1;
+ req->tx_enabled = tx_enabled;
+}
+static inline void
+ethtool_mm_set_req_set_pmac_enabled(struct ethtool_mm_set_req *req,
+ __u8 pmac_enabled)
+{
+ req->_present.pmac_enabled = 1;
+ req->pmac_enabled = pmac_enabled;
+}
+static inline void
+ethtool_mm_set_req_set_tx_min_frag_size(struct ethtool_mm_set_req *req,
+ __u32 tx_min_frag_size)
+{
+ req->_present.tx_min_frag_size = 1;
+ req->tx_min_frag_size = tx_min_frag_size;
+}
+
+/*
+ * Set MAC Merge configuration
+ */
+int ethtool_mm_set(struct ynl_sock *ys, struct ethtool_mm_set_req *req);
+
+/* ETHTOOL_MSG_CABLE_TEST_NTF - event */
+struct ethtool_cable_test_ntf_rsp {
+ struct {
+ __u32 header:1;
+ __u32 status:1;
+ } _present;
+
+ struct ethtool_header header;
+ __u8 status;
+};
+
+struct ethtool_cable_test_ntf {
+ __u16 family;
+ __u8 cmd;
+ struct ynl_ntf_base_type *next;
+ void (*free)(struct ethtool_cable_test_ntf *ntf);
+ struct ethtool_cable_test_ntf_rsp obj __attribute__ ((aligned (8)));
+};
+
+void ethtool_cable_test_ntf_free(struct ethtool_cable_test_ntf *rsp);
+
+/* ETHTOOL_MSG_CABLE_TEST_TDR_NTF - event */
+struct ethtool_cable_test_tdr_ntf_rsp {
+ struct {
+ __u32 header:1;
+ __u32 status:1;
+ __u32 nest:1;
+ } _present;
+
+ struct ethtool_header header;
+ __u8 status;
+ struct ethtool_cable_nest nest;
+};
+
+struct ethtool_cable_test_tdr_ntf {
+ __u16 family;
+ __u8 cmd;
+ struct ynl_ntf_base_type *next;
+ void (*free)(struct ethtool_cable_test_tdr_ntf *ntf);
+ struct ethtool_cable_test_tdr_ntf_rsp obj __attribute__ ((aligned (8)));
+};
+
+void ethtool_cable_test_tdr_ntf_free(struct ethtool_cable_test_tdr_ntf *rsp);
+
+#endif /* _LINUX_ETHTOOL_GEN_H */
diff --git a/tools/net/ynl/generated/fou-user.c b/tools/net/ynl/generated/fou-user.c
new file mode 100644
index 000000000000..4271b5d43c58
--- /dev/null
+++ b/tools/net/ynl/generated/fou-user.c
@@ -0,0 +1,328 @@
+// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
+/* Do not edit directly, auto-generated from: */
+/* Documentation/netlink/specs/fou.yaml */
+/* YNL-GEN user source */
+
+#include <stdlib.h>
+#include <string.h>
+#include "fou-user.h"
+#include "ynl.h"
+#include <linux/fou.h>
+
+#include <libmnl/libmnl.h>
+#include <linux/genetlink.h>
+
+/* Enums */
+static const char * const fou_op_strmap[] = {
+ [FOU_CMD_ADD] = "add",
+ [FOU_CMD_DEL] = "del",
+ [FOU_CMD_GET] = "get",
+};
+
+const char *fou_op_str(int op)
+{
+ if (op < 0 || op >= (int)MNL_ARRAY_SIZE(fou_op_strmap))
+ return NULL;
+ return fou_op_strmap[op];
+}
+
+static const char * const fou_encap_type_strmap[] = {
+ [0] = "unspec",
+ [1] = "direct",
+ [2] = "gue",
+};
+
+const char *fou_encap_type_str(int value)
+{
+ if (value < 0 || value >= (int)MNL_ARRAY_SIZE(fou_encap_type_strmap))
+ return NULL;
+ return fou_encap_type_strmap[value];
+}
+
+/* Policies */
+struct ynl_policy_attr fou_policy[FOU_ATTR_MAX + 1] = {
+ [FOU_ATTR_UNSPEC] = { .name = "unspec", .type = YNL_PT_REJECT, },
+ [FOU_ATTR_PORT] = { .name = "port", .type = YNL_PT_U16, },
+ [FOU_ATTR_AF] = { .name = "af", .type = YNL_PT_U8, },
+ [FOU_ATTR_IPPROTO] = { .name = "ipproto", .type = YNL_PT_U8, },
+ [FOU_ATTR_TYPE] = { .name = "type", .type = YNL_PT_U8, },
+ [FOU_ATTR_REMCSUM_NOPARTIAL] = { .name = "remcsum_nopartial", .type = YNL_PT_FLAG, },
+ [FOU_ATTR_LOCAL_V4] = { .name = "local_v4", .type = YNL_PT_U32, },
+ [FOU_ATTR_LOCAL_V6] = { .name = "local_v6", .type = YNL_PT_BINARY,},
+ [FOU_ATTR_PEER_V4] = { .name = "peer_v4", .type = YNL_PT_U32, },
+ [FOU_ATTR_PEER_V6] = { .name = "peer_v6", .type = YNL_PT_BINARY,},
+ [FOU_ATTR_PEER_PORT] = { .name = "peer_port", .type = YNL_PT_U16, },
+ [FOU_ATTR_IFINDEX] = { .name = "ifindex", .type = YNL_PT_U32, },
+};
+
+struct ynl_policy_nest fou_nest = {
+ .max_attr = FOU_ATTR_MAX,
+ .table = fou_policy,
+};
+
+/* Common nested types */
+/* ============== FOU_CMD_ADD ============== */
+/* FOU_CMD_ADD - do */
+void fou_add_req_free(struct fou_add_req *req)
+{
+ free(req->local_v6);
+ free(req->peer_v6);
+ free(req);
+}
+
+int fou_add(struct ynl_sock *ys, struct fou_add_req *req)
+{
+ struct nlmsghdr *nlh;
+ int err;
+
+ nlh = ynl_gemsg_start_req(ys, ys->family_id, FOU_CMD_ADD, 1);
+ ys->req_policy = &fou_nest;
+
+ if (req->_present.port)
+ mnl_attr_put_u16(nlh, FOU_ATTR_PORT, req->port);
+ if (req->_present.ipproto)
+ mnl_attr_put_u8(nlh, FOU_ATTR_IPPROTO, req->ipproto);
+ if (req->_present.type)
+ mnl_attr_put_u8(nlh, FOU_ATTR_TYPE, req->type);
+ if (req->_present.remcsum_nopartial)
+ mnl_attr_put(nlh, FOU_ATTR_REMCSUM_NOPARTIAL, 0, NULL);
+ if (req->_present.local_v4)
+ mnl_attr_put_u32(nlh, FOU_ATTR_LOCAL_V4, req->local_v4);
+ if (req->_present.peer_v4)
+ mnl_attr_put_u32(nlh, FOU_ATTR_PEER_V4, req->peer_v4);
+ if (req->_present.local_v6_len)
+ mnl_attr_put(nlh, FOU_ATTR_LOCAL_V6, req->_present.local_v6_len, req->local_v6);
+ if (req->_present.peer_v6_len)
+ mnl_attr_put(nlh, FOU_ATTR_PEER_V6, req->_present.peer_v6_len, req->peer_v6);
+ if (req->_present.peer_port)
+ mnl_attr_put_u16(nlh, FOU_ATTR_PEER_PORT, req->peer_port);
+ if (req->_present.ifindex)
+ mnl_attr_put_u32(nlh, FOU_ATTR_IFINDEX, req->ifindex);
+
+ err = ynl_exec(ys, nlh, NULL);
+ if (err < 0)
+ return -1;
+
+ return 0;
+}
+
+/* ============== FOU_CMD_DEL ============== */
+/* FOU_CMD_DEL - do */
+void fou_del_req_free(struct fou_del_req *req)
+{
+ free(req->local_v6);
+ free(req->peer_v6);
+ free(req);
+}
+
+int fou_del(struct ynl_sock *ys, struct fou_del_req *req)
+{
+ struct nlmsghdr *nlh;
+ int err;
+
+ nlh = ynl_gemsg_start_req(ys, ys->family_id, FOU_CMD_DEL, 1);
+ ys->req_policy = &fou_nest;
+
+ if (req->_present.af)
+ mnl_attr_put_u8(nlh, FOU_ATTR_AF, req->af);
+ if (req->_present.ifindex)
+ mnl_attr_put_u32(nlh, FOU_ATTR_IFINDEX, req->ifindex);
+ if (req->_present.port)
+ mnl_attr_put_u16(nlh, FOU_ATTR_PORT, req->port);
+ if (req->_present.peer_port)
+ mnl_attr_put_u16(nlh, FOU_ATTR_PEER_PORT, req->peer_port);
+ if (req->_present.local_v4)
+ mnl_attr_put_u32(nlh, FOU_ATTR_LOCAL_V4, req->local_v4);
+ if (req->_present.peer_v4)
+ mnl_attr_put_u32(nlh, FOU_ATTR_PEER_V4, req->peer_v4);
+ if (req->_present.local_v6_len)
+ mnl_attr_put(nlh, FOU_ATTR_LOCAL_V6, req->_present.local_v6_len, req->local_v6);
+ if (req->_present.peer_v6_len)
+ mnl_attr_put(nlh, FOU_ATTR_PEER_V6, req->_present.peer_v6_len, req->peer_v6);
+
+ err = ynl_exec(ys, nlh, NULL);
+ if (err < 0)
+ return -1;
+
+ return 0;
+}
+
+/* ============== FOU_CMD_GET ============== */
+/* FOU_CMD_GET - do */
+void fou_get_req_free(struct fou_get_req *req)
+{
+ free(req->local_v6);
+ free(req->peer_v6);
+ free(req);
+}
+
+void fou_get_rsp_free(struct fou_get_rsp *rsp)
+{
+ free(rsp->local_v6);
+ free(rsp->peer_v6);
+ free(rsp);
+}
+
+int fou_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
+{
+ struct ynl_parse_arg *yarg = data;
+ const struct nlattr *attr;
+ struct fou_get_rsp *dst;
+
+ dst = yarg->data;
+
+ mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
+ unsigned int type = mnl_attr_get_type(attr);
+
+ if (type == FOU_ATTR_PORT) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.port = 1;
+ dst->port = mnl_attr_get_u16(attr);
+ } else if (type == FOU_ATTR_IPPROTO) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.ipproto = 1;
+ dst->ipproto = mnl_attr_get_u8(attr);
+ } else if (type == FOU_ATTR_TYPE) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.type = 1;
+ dst->type = mnl_attr_get_u8(attr);
+ } else if (type == FOU_ATTR_REMCSUM_NOPARTIAL) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.remcsum_nopartial = 1;
+ } else if (type == FOU_ATTR_LOCAL_V4) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.local_v4 = 1;
+ dst->local_v4 = mnl_attr_get_u32(attr);
+ } else if (type == FOU_ATTR_PEER_V4) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.peer_v4 = 1;
+ dst->peer_v4 = mnl_attr_get_u32(attr);
+ } else if (type == FOU_ATTR_LOCAL_V6) {
+ unsigned int len;
+
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+
+ len = mnl_attr_get_payload_len(attr);
+ dst->_present.local_v6_len = len;
+ dst->local_v6 = malloc(len);
+ memcpy(dst->local_v6, mnl_attr_get_payload(attr), len);
+ } else if (type == FOU_ATTR_PEER_V6) {
+ unsigned int len;
+
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+
+ len = mnl_attr_get_payload_len(attr);
+ dst->_present.peer_v6_len = len;
+ dst->peer_v6 = malloc(len);
+ memcpy(dst->peer_v6, mnl_attr_get_payload(attr), len);
+ } else if (type == FOU_ATTR_PEER_PORT) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.peer_port = 1;
+ dst->peer_port = mnl_attr_get_u16(attr);
+ } else if (type == FOU_ATTR_IFINDEX) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.ifindex = 1;
+ dst->ifindex = mnl_attr_get_u32(attr);
+ }
+ }
+
+ return MNL_CB_OK;
+}
+
+struct fou_get_rsp *fou_get(struct ynl_sock *ys, struct fou_get_req *req)
+{
+ struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
+ struct fou_get_rsp *rsp;
+ struct nlmsghdr *nlh;
+ int err;
+
+ nlh = ynl_gemsg_start_req(ys, ys->family_id, FOU_CMD_GET, 1);
+ ys->req_policy = &fou_nest;
+ yrs.yarg.rsp_policy = &fou_nest;
+
+ if (req->_present.af)
+ mnl_attr_put_u8(nlh, FOU_ATTR_AF, req->af);
+ if (req->_present.ifindex)
+ mnl_attr_put_u32(nlh, FOU_ATTR_IFINDEX, req->ifindex);
+ if (req->_present.port)
+ mnl_attr_put_u16(nlh, FOU_ATTR_PORT, req->port);
+ if (req->_present.peer_port)
+ mnl_attr_put_u16(nlh, FOU_ATTR_PEER_PORT, req->peer_port);
+ if (req->_present.local_v4)
+ mnl_attr_put_u32(nlh, FOU_ATTR_LOCAL_V4, req->local_v4);
+ if (req->_present.peer_v4)
+ mnl_attr_put_u32(nlh, FOU_ATTR_PEER_V4, req->peer_v4);
+ if (req->_present.local_v6_len)
+ mnl_attr_put(nlh, FOU_ATTR_LOCAL_V6, req->_present.local_v6_len, req->local_v6);
+ if (req->_present.peer_v6_len)
+ mnl_attr_put(nlh, FOU_ATTR_PEER_V6, req->_present.peer_v6_len, req->peer_v6);
+
+ rsp = calloc(1, sizeof(*rsp));
+ yrs.yarg.data = rsp;
+ yrs.cb = fou_get_rsp_parse;
+ yrs.rsp_cmd = FOU_CMD_GET;
+
+ err = ynl_exec(ys, nlh, &yrs);
+ if (err < 0)
+ goto err_free;
+
+ return rsp;
+
+err_free:
+ fou_get_rsp_free(rsp);
+ return NULL;
+}
+
+/* FOU_CMD_GET - dump */
+void fou_get_list_free(struct fou_get_list *rsp)
+{
+ struct fou_get_list *next = rsp;
+
+ while ((void *)next != YNL_LIST_END) {
+ rsp = next;
+ next = rsp->next;
+
+ free(rsp->obj.local_v6);
+ free(rsp->obj.peer_v6);
+ free(rsp);
+ }
+}
+
+struct fou_get_list *fou_get_dump(struct ynl_sock *ys)
+{
+ struct ynl_dump_state yds = {};
+ struct nlmsghdr *nlh;
+ int err;
+
+ yds.ys = ys;
+ yds.alloc_sz = sizeof(struct fou_get_list);
+ yds.cb = fou_get_rsp_parse;
+ yds.rsp_cmd = FOU_CMD_GET;
+ yds.rsp_policy = &fou_nest;
+
+ nlh = ynl_gemsg_start_dump(ys, ys->family_id, FOU_CMD_GET, 1);
+
+ err = ynl_exec_dump(ys, nlh, &yds);
+ if (err < 0)
+ goto free_list;
+
+ return yds.first;
+
+free_list:
+ fou_get_list_free(yds.first);
+ return NULL;
+}
+
+const struct ynl_family ynl_fou_family = {
+ .name = "fou",
+};
diff --git a/tools/net/ynl/generated/fou-user.h b/tools/net/ynl/generated/fou-user.h
new file mode 100644
index 000000000000..d8ab50579cd1
--- /dev/null
+++ b/tools/net/ynl/generated/fou-user.h
@@ -0,0 +1,337 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* Do not edit directly, auto-generated from: */
+/* Documentation/netlink/specs/fou.yaml */
+/* YNL-GEN user header */
+
+#ifndef _LINUX_FOU_GEN_H
+#define _LINUX_FOU_GEN_H
+
+#include <stdlib.h>
+#include <string.h>
+#include <linux/types.h>
+#include <linux/fou.h>
+
+struct ynl_sock;
+
+extern const struct ynl_family ynl_fou_family;
+
+/* Enums */
+const char *fou_op_str(int op);
+const char *fou_encap_type_str(int value);
+
+/* Common nested types */
+/* ============== FOU_CMD_ADD ============== */
+/* FOU_CMD_ADD - do */
+struct fou_add_req {
+ struct {
+ __u32 port:1;
+ __u32 ipproto:1;
+ __u32 type:1;
+ __u32 remcsum_nopartial:1;
+ __u32 local_v4:1;
+ __u32 peer_v4:1;
+ __u32 local_v6_len;
+ __u32 peer_v6_len;
+ __u32 peer_port:1;
+ __u32 ifindex:1;
+ } _present;
+
+ __u16 port /* big-endian */;
+ __u8 ipproto;
+ __u8 type;
+ __u32 local_v4;
+ __u32 peer_v4;
+ void *local_v6;
+ void *peer_v6;
+ __u16 peer_port /* big-endian */;
+ __s32 ifindex;
+};
+
+static inline struct fou_add_req *fou_add_req_alloc(void)
+{
+ return calloc(1, sizeof(struct fou_add_req));
+}
+void fou_add_req_free(struct fou_add_req *req);
+
+static inline void
+fou_add_req_set_port(struct fou_add_req *req, __u16 port /* big-endian */)
+{
+ req->_present.port = 1;
+ req->port = port;
+}
+static inline void
+fou_add_req_set_ipproto(struct fou_add_req *req, __u8 ipproto)
+{
+ req->_present.ipproto = 1;
+ req->ipproto = ipproto;
+}
+static inline void fou_add_req_set_type(struct fou_add_req *req, __u8 type)
+{
+ req->_present.type = 1;
+ req->type = type;
+}
+static inline void fou_add_req_set_remcsum_nopartial(struct fou_add_req *req)
+{
+ req->_present.remcsum_nopartial = 1;
+}
+static inline void
+fou_add_req_set_local_v4(struct fou_add_req *req, __u32 local_v4)
+{
+ req->_present.local_v4 = 1;
+ req->local_v4 = local_v4;
+}
+static inline void
+fou_add_req_set_peer_v4(struct fou_add_req *req, __u32 peer_v4)
+{
+ req->_present.peer_v4 = 1;
+ req->peer_v4 = peer_v4;
+}
+static inline void
+fou_add_req_set_local_v6(struct fou_add_req *req, const void *local_v6,
+ size_t len)
+{
+ free(req->local_v6);
+ req->local_v6 = malloc(req->_present.local_v6_len);
+ memcpy(req->local_v6, local_v6, req->_present.local_v6_len);
+}
+static inline void
+fou_add_req_set_peer_v6(struct fou_add_req *req, const void *peer_v6,
+ size_t len)
+{
+ free(req->peer_v6);
+ req->peer_v6 = malloc(req->_present.peer_v6_len);
+ memcpy(req->peer_v6, peer_v6, req->_present.peer_v6_len);
+}
+static inline void
+fou_add_req_set_peer_port(struct fou_add_req *req,
+ __u16 peer_port /* big-endian */)
+{
+ req->_present.peer_port = 1;
+ req->peer_port = peer_port;
+}
+static inline void
+fou_add_req_set_ifindex(struct fou_add_req *req, __s32 ifindex)
+{
+ req->_present.ifindex = 1;
+ req->ifindex = ifindex;
+}
+
+/*
+ * Add port.
+ */
+int fou_add(struct ynl_sock *ys, struct fou_add_req *req);
+
+/* ============== FOU_CMD_DEL ============== */
+/* FOU_CMD_DEL - do */
+struct fou_del_req {
+ struct {
+ __u32 af:1;
+ __u32 ifindex:1;
+ __u32 port:1;
+ __u32 peer_port:1;
+ __u32 local_v4:1;
+ __u32 peer_v4:1;
+ __u32 local_v6_len;
+ __u32 peer_v6_len;
+ } _present;
+
+ __u8 af;
+ __s32 ifindex;
+ __u16 port /* big-endian */;
+ __u16 peer_port /* big-endian */;
+ __u32 local_v4;
+ __u32 peer_v4;
+ void *local_v6;
+ void *peer_v6;
+};
+
+static inline struct fou_del_req *fou_del_req_alloc(void)
+{
+ return calloc(1, sizeof(struct fou_del_req));
+}
+void fou_del_req_free(struct fou_del_req *req);
+
+static inline void fou_del_req_set_af(struct fou_del_req *req, __u8 af)
+{
+ req->_present.af = 1;
+ req->af = af;
+}
+static inline void
+fou_del_req_set_ifindex(struct fou_del_req *req, __s32 ifindex)
+{
+ req->_present.ifindex = 1;
+ req->ifindex = ifindex;
+}
+static inline void
+fou_del_req_set_port(struct fou_del_req *req, __u16 port /* big-endian */)
+{
+ req->_present.port = 1;
+ req->port = port;
+}
+static inline void
+fou_del_req_set_peer_port(struct fou_del_req *req,
+ __u16 peer_port /* big-endian */)
+{
+ req->_present.peer_port = 1;
+ req->peer_port = peer_port;
+}
+static inline void
+fou_del_req_set_local_v4(struct fou_del_req *req, __u32 local_v4)
+{
+ req->_present.local_v4 = 1;
+ req->local_v4 = local_v4;
+}
+static inline void
+fou_del_req_set_peer_v4(struct fou_del_req *req, __u32 peer_v4)
+{
+ req->_present.peer_v4 = 1;
+ req->peer_v4 = peer_v4;
+}
+static inline void
+fou_del_req_set_local_v6(struct fou_del_req *req, const void *local_v6,
+ size_t len)
+{
+ free(req->local_v6);
+ req->local_v6 = malloc(req->_present.local_v6_len);
+ memcpy(req->local_v6, local_v6, req->_present.local_v6_len);
+}
+static inline void
+fou_del_req_set_peer_v6(struct fou_del_req *req, const void *peer_v6,
+ size_t len)
+{
+ free(req->peer_v6);
+ req->peer_v6 = malloc(req->_present.peer_v6_len);
+ memcpy(req->peer_v6, peer_v6, req->_present.peer_v6_len);
+}
+
+/*
+ * Delete port.
+ */
+int fou_del(struct ynl_sock *ys, struct fou_del_req *req);
+
+/* ============== FOU_CMD_GET ============== */
+/* FOU_CMD_GET - do */
+struct fou_get_req {
+ struct {
+ __u32 af:1;
+ __u32 ifindex:1;
+ __u32 port:1;
+ __u32 peer_port:1;
+ __u32 local_v4:1;
+ __u32 peer_v4:1;
+ __u32 local_v6_len;
+ __u32 peer_v6_len;
+ } _present;
+
+ __u8 af;
+ __s32 ifindex;
+ __u16 port /* big-endian */;
+ __u16 peer_port /* big-endian */;
+ __u32 local_v4;
+ __u32 peer_v4;
+ void *local_v6;
+ void *peer_v6;
+};
+
+static inline struct fou_get_req *fou_get_req_alloc(void)
+{
+ return calloc(1, sizeof(struct fou_get_req));
+}
+void fou_get_req_free(struct fou_get_req *req);
+
+static inline void fou_get_req_set_af(struct fou_get_req *req, __u8 af)
+{
+ req->_present.af = 1;
+ req->af = af;
+}
+static inline void
+fou_get_req_set_ifindex(struct fou_get_req *req, __s32 ifindex)
+{
+ req->_present.ifindex = 1;
+ req->ifindex = ifindex;
+}
+static inline void
+fou_get_req_set_port(struct fou_get_req *req, __u16 port /* big-endian */)
+{
+ req->_present.port = 1;
+ req->port = port;
+}
+static inline void
+fou_get_req_set_peer_port(struct fou_get_req *req,
+ __u16 peer_port /* big-endian */)
+{
+ req->_present.peer_port = 1;
+ req->peer_port = peer_port;
+}
+static inline void
+fou_get_req_set_local_v4(struct fou_get_req *req, __u32 local_v4)
+{
+ req->_present.local_v4 = 1;
+ req->local_v4 = local_v4;
+}
+static inline void
+fou_get_req_set_peer_v4(struct fou_get_req *req, __u32 peer_v4)
+{
+ req->_present.peer_v4 = 1;
+ req->peer_v4 = peer_v4;
+}
+static inline void
+fou_get_req_set_local_v6(struct fou_get_req *req, const void *local_v6,
+ size_t len)
+{
+ free(req->local_v6);
+ req->local_v6 = malloc(req->_present.local_v6_len);
+ memcpy(req->local_v6, local_v6, req->_present.local_v6_len);
+}
+static inline void
+fou_get_req_set_peer_v6(struct fou_get_req *req, const void *peer_v6,
+ size_t len)
+{
+ free(req->peer_v6);
+ req->peer_v6 = malloc(req->_present.peer_v6_len);
+ memcpy(req->peer_v6, peer_v6, req->_present.peer_v6_len);
+}
+
+struct fou_get_rsp {
+ struct {
+ __u32 port:1;
+ __u32 ipproto:1;
+ __u32 type:1;
+ __u32 remcsum_nopartial:1;
+ __u32 local_v4:1;
+ __u32 peer_v4:1;
+ __u32 local_v6_len;
+ __u32 peer_v6_len;
+ __u32 peer_port:1;
+ __u32 ifindex:1;
+ } _present;
+
+ __u16 port /* big-endian */;
+ __u8 ipproto;
+ __u8 type;
+ __u32 local_v4;
+ __u32 peer_v4;
+ void *local_v6;
+ void *peer_v6;
+ __u16 peer_port /* big-endian */;
+ __s32 ifindex;
+};
+
+void fou_get_rsp_free(struct fou_get_rsp *rsp);
+
+/*
+ * Get tunnel info.
+ */
+struct fou_get_rsp *fou_get(struct ynl_sock *ys, struct fou_get_req *req);
+
+/* FOU_CMD_GET - dump */
+struct fou_get_list {
+ struct fou_get_list *next;
+ struct fou_get_rsp obj __attribute__ ((aligned (8)));
+};
+
+void fou_get_list_free(struct fou_get_list *rsp);
+
+struct fou_get_list *fou_get_dump(struct ynl_sock *ys);
+
+#endif /* _LINUX_FOU_GEN_H */
diff --git a/tools/net/ynl/generated/handshake-user.c b/tools/net/ynl/generated/handshake-user.c
new file mode 100644
index 000000000000..7c67765daf90
--- /dev/null
+++ b/tools/net/ynl/generated/handshake-user.c
@@ -0,0 +1,331 @@
+// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
+/* Do not edit directly, auto-generated from: */
+/* Documentation/netlink/specs/handshake.yaml */
+/* YNL-GEN user source */
+
+#include <stdlib.h>
+#include <string.h>
+#include "handshake-user.h"
+#include "ynl.h"
+#include <linux/handshake.h>
+
+#include <libmnl/libmnl.h>
+#include <linux/genetlink.h>
+
+/* Enums */
+static const char * const handshake_op_strmap[] = {
+ [HANDSHAKE_CMD_READY] = "ready",
+ [HANDSHAKE_CMD_ACCEPT] = "accept",
+ [HANDSHAKE_CMD_DONE] = "done",
+};
+
+const char *handshake_op_str(int op)
+{
+ if (op < 0 || op >= (int)MNL_ARRAY_SIZE(handshake_op_strmap))
+ return NULL;
+ return handshake_op_strmap[op];
+}
+
+static const char * const handshake_handler_class_strmap[] = {
+ [0] = "none",
+ [1] = "tlshd",
+ [2] = "max",
+};
+
+const char *handshake_handler_class_str(enum handshake_handler_class value)
+{
+ if (value < 0 || value >= (int)MNL_ARRAY_SIZE(handshake_handler_class_strmap))
+ return NULL;
+ return handshake_handler_class_strmap[value];
+}
+
+static const char * const handshake_msg_type_strmap[] = {
+ [0] = "unspec",
+ [1] = "clienthello",
+ [2] = "serverhello",
+};
+
+const char *handshake_msg_type_str(enum handshake_msg_type value)
+{
+ if (value < 0 || value >= (int)MNL_ARRAY_SIZE(handshake_msg_type_strmap))
+ return NULL;
+ return handshake_msg_type_strmap[value];
+}
+
+static const char * const handshake_auth_strmap[] = {
+ [0] = "unspec",
+ [1] = "unauth",
+ [2] = "psk",
+ [3] = "x509",
+};
+
+const char *handshake_auth_str(enum handshake_auth value)
+{
+ if (value < 0 || value >= (int)MNL_ARRAY_SIZE(handshake_auth_strmap))
+ return NULL;
+ return handshake_auth_strmap[value];
+}
+
+/* Policies */
+struct ynl_policy_attr handshake_x509_policy[HANDSHAKE_A_X509_MAX + 1] = {
+ [HANDSHAKE_A_X509_CERT] = { .name = "cert", .type = YNL_PT_U32, },
+ [HANDSHAKE_A_X509_PRIVKEY] = { .name = "privkey", .type = YNL_PT_U32, },
+};
+
+struct ynl_policy_nest handshake_x509_nest = {
+ .max_attr = HANDSHAKE_A_X509_MAX,
+ .table = handshake_x509_policy,
+};
+
+struct ynl_policy_attr handshake_accept_policy[HANDSHAKE_A_ACCEPT_MAX + 1] = {
+ [HANDSHAKE_A_ACCEPT_SOCKFD] = { .name = "sockfd", .type = YNL_PT_U32, },
+ [HANDSHAKE_A_ACCEPT_HANDLER_CLASS] = { .name = "handler-class", .type = YNL_PT_U32, },
+ [HANDSHAKE_A_ACCEPT_MESSAGE_TYPE] = { .name = "message-type", .type = YNL_PT_U32, },
+ [HANDSHAKE_A_ACCEPT_TIMEOUT] = { .name = "timeout", .type = YNL_PT_U32, },
+ [HANDSHAKE_A_ACCEPT_AUTH_MODE] = { .name = "auth-mode", .type = YNL_PT_U32, },
+ [HANDSHAKE_A_ACCEPT_PEER_IDENTITY] = { .name = "peer-identity", .type = YNL_PT_U32, },
+ [HANDSHAKE_A_ACCEPT_CERTIFICATE] = { .name = "certificate", .type = YNL_PT_NEST, .nest = &handshake_x509_nest, },
+ [HANDSHAKE_A_ACCEPT_PEERNAME] = { .name = "peername", .type = YNL_PT_NUL_STR, },
+};
+
+struct ynl_policy_nest handshake_accept_nest = {
+ .max_attr = HANDSHAKE_A_ACCEPT_MAX,
+ .table = handshake_accept_policy,
+};
+
+struct ynl_policy_attr handshake_done_policy[HANDSHAKE_A_DONE_MAX + 1] = {
+ [HANDSHAKE_A_DONE_STATUS] = { .name = "status", .type = YNL_PT_U32, },
+ [HANDSHAKE_A_DONE_SOCKFD] = { .name = "sockfd", .type = YNL_PT_U32, },
+ [HANDSHAKE_A_DONE_REMOTE_AUTH] = { .name = "remote-auth", .type = YNL_PT_U32, },
+};
+
+struct ynl_policy_nest handshake_done_nest = {
+ .max_attr = HANDSHAKE_A_DONE_MAX,
+ .table = handshake_done_policy,
+};
+
+/* Common nested types */
+void handshake_x509_free(struct handshake_x509 *obj)
+{
+}
+
+int handshake_x509_parse(struct ynl_parse_arg *yarg,
+ const struct nlattr *nested)
+{
+ struct handshake_x509 *dst = yarg->data;
+ const struct nlattr *attr;
+
+ mnl_attr_for_each_nested(attr, nested) {
+ unsigned int type = mnl_attr_get_type(attr);
+
+ if (type == HANDSHAKE_A_X509_CERT) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.cert = 1;
+ dst->cert = mnl_attr_get_u32(attr);
+ } else if (type == HANDSHAKE_A_X509_PRIVKEY) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.privkey = 1;
+ dst->privkey = mnl_attr_get_u32(attr);
+ }
+ }
+
+ return 0;
+}
+
+/* ============== HANDSHAKE_CMD_ACCEPT ============== */
+/* HANDSHAKE_CMD_ACCEPT - do */
+void handshake_accept_req_free(struct handshake_accept_req *req)
+{
+ free(req);
+}
+
+void handshake_accept_rsp_free(struct handshake_accept_rsp *rsp)
+{
+ unsigned int i;
+
+ free(rsp->peer_identity);
+ for (i = 0; i < rsp->n_certificate; i++)
+ handshake_x509_free(&rsp->certificate[i]);
+ free(rsp->certificate);
+ free(rsp->peername);
+ free(rsp);
+}
+
+int handshake_accept_rsp_parse(const struct nlmsghdr *nlh, void *data)
+{
+ struct ynl_parse_arg *yarg = data;
+ struct handshake_accept_rsp *dst;
+ unsigned int n_peer_identity = 0;
+ unsigned int n_certificate = 0;
+ const struct nlattr *attr;
+ struct ynl_parse_arg parg;
+ int i;
+
+ dst = yarg->data;
+ parg.ys = yarg->ys;
+
+ if (dst->certificate)
+ return ynl_error_parse(yarg, "attribute already present (accept.certificate)");
+ if (dst->peer_identity)
+ return ynl_error_parse(yarg, "attribute already present (accept.peer-identity)");
+
+ mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
+ unsigned int type = mnl_attr_get_type(attr);
+
+ if (type == HANDSHAKE_A_ACCEPT_SOCKFD) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.sockfd = 1;
+ dst->sockfd = mnl_attr_get_u32(attr);
+ } else if (type == HANDSHAKE_A_ACCEPT_MESSAGE_TYPE) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.message_type = 1;
+ dst->message_type = mnl_attr_get_u32(attr);
+ } else if (type == HANDSHAKE_A_ACCEPT_TIMEOUT) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.timeout = 1;
+ dst->timeout = mnl_attr_get_u32(attr);
+ } else if (type == HANDSHAKE_A_ACCEPT_AUTH_MODE) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.auth_mode = 1;
+ dst->auth_mode = mnl_attr_get_u32(attr);
+ } else if (type == HANDSHAKE_A_ACCEPT_PEER_IDENTITY) {
+ n_peer_identity++;
+ } else if (type == HANDSHAKE_A_ACCEPT_CERTIFICATE) {
+ n_certificate++;
+ } else if (type == HANDSHAKE_A_ACCEPT_PEERNAME) {
+ unsigned int len;
+
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+
+ len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
+ dst->_present.peername_len = len;
+ dst->peername = malloc(len + 1);
+ memcpy(dst->peername, mnl_attr_get_str(attr), len);
+ dst->peername[len] = 0;
+ }
+ }
+
+ if (n_certificate) {
+ dst->certificate = calloc(n_certificate, sizeof(*dst->certificate));
+ dst->n_certificate = n_certificate;
+ i = 0;
+ parg.rsp_policy = &handshake_x509_nest;
+ mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
+ if (mnl_attr_get_type(attr) == HANDSHAKE_A_ACCEPT_CERTIFICATE) {
+ parg.data = &dst->certificate[i];
+ if (handshake_x509_parse(&parg, attr))
+ return MNL_CB_ERROR;
+ i++;
+ }
+ }
+ }
+ if (n_peer_identity) {
+ dst->peer_identity = calloc(n_peer_identity, sizeof(*dst->peer_identity));
+ dst->n_peer_identity = n_peer_identity;
+ i = 0;
+ mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
+ if (mnl_attr_get_type(attr) == HANDSHAKE_A_ACCEPT_PEER_IDENTITY) {
+ dst->peer_identity[i] = mnl_attr_get_u32(attr);
+ i++;
+ }
+ }
+ }
+
+ return MNL_CB_OK;
+}
+
+struct handshake_accept_rsp *
+handshake_accept(struct ynl_sock *ys, struct handshake_accept_req *req)
+{
+ struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
+ struct handshake_accept_rsp *rsp;
+ struct nlmsghdr *nlh;
+ int err;
+
+ nlh = ynl_gemsg_start_req(ys, ys->family_id, HANDSHAKE_CMD_ACCEPT, 1);
+ ys->req_policy = &handshake_accept_nest;
+ yrs.yarg.rsp_policy = &handshake_accept_nest;
+
+ if (req->_present.handler_class)
+ mnl_attr_put_u32(nlh, HANDSHAKE_A_ACCEPT_HANDLER_CLASS, req->handler_class);
+
+ rsp = calloc(1, sizeof(*rsp));
+ yrs.yarg.data = rsp;
+ yrs.cb = handshake_accept_rsp_parse;
+ yrs.rsp_cmd = HANDSHAKE_CMD_ACCEPT;
+
+ err = ynl_exec(ys, nlh, &yrs);
+ if (err < 0)
+ goto err_free;
+
+ return rsp;
+
+err_free:
+ handshake_accept_rsp_free(rsp);
+ return NULL;
+}
+
+/* HANDSHAKE_CMD_ACCEPT - notify */
+void handshake_accept_ntf_free(struct handshake_accept_ntf *rsp)
+{
+ unsigned int i;
+
+ free(rsp->obj.peer_identity);
+ for (i = 0; i < rsp->obj.n_certificate; i++)
+ handshake_x509_free(&rsp->obj.certificate[i]);
+ free(rsp->obj.certificate);
+ free(rsp->obj.peername);
+ free(rsp);
+}
+
+/* ============== HANDSHAKE_CMD_DONE ============== */
+/* HANDSHAKE_CMD_DONE - do */
+void handshake_done_req_free(struct handshake_done_req *req)
+{
+ free(req->remote_auth);
+ free(req);
+}
+
+int handshake_done(struct ynl_sock *ys, struct handshake_done_req *req)
+{
+ struct nlmsghdr *nlh;
+ int err;
+
+ nlh = ynl_gemsg_start_req(ys, ys->family_id, HANDSHAKE_CMD_DONE, 1);
+ ys->req_policy = &handshake_done_nest;
+
+ if (req->_present.status)
+ mnl_attr_put_u32(nlh, HANDSHAKE_A_DONE_STATUS, req->status);
+ if (req->_present.sockfd)
+ mnl_attr_put_u32(nlh, HANDSHAKE_A_DONE_SOCKFD, req->sockfd);
+ for (unsigned int i = 0; i < req->n_remote_auth; i++)
+ mnl_attr_put_u32(nlh, HANDSHAKE_A_DONE_REMOTE_AUTH, req->remote_auth[i]);
+
+ err = ynl_exec(ys, nlh, NULL);
+ if (err < 0)
+ return -1;
+
+ return 0;
+}
+
+static const struct ynl_ntf_info handshake_ntf_info[] = {
+ [HANDSHAKE_CMD_READY] = {
+ .alloc_sz = sizeof(struct handshake_accept_ntf),
+ .cb = handshake_accept_rsp_parse,
+ .policy = &handshake_accept_nest,
+ .free = (void *)handshake_accept_ntf_free,
+ },
+};
+
+const struct ynl_family ynl_handshake_family = {
+ .name = "handshake",
+ .ntf_info = handshake_ntf_info,
+ .ntf_info_size = MNL_ARRAY_SIZE(handshake_ntf_info),
+};
diff --git a/tools/net/ynl/generated/handshake-user.h b/tools/net/ynl/generated/handshake-user.h
new file mode 100644
index 000000000000..47646bb91cea
--- /dev/null
+++ b/tools/net/ynl/generated/handshake-user.h
@@ -0,0 +1,145 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* Do not edit directly, auto-generated from: */
+/* Documentation/netlink/specs/handshake.yaml */
+/* YNL-GEN user header */
+
+#ifndef _LINUX_HANDSHAKE_GEN_H
+#define _LINUX_HANDSHAKE_GEN_H
+
+#include <stdlib.h>
+#include <string.h>
+#include <linux/types.h>
+#include <linux/handshake.h>
+
+struct ynl_sock;
+
+extern const struct ynl_family ynl_handshake_family;
+
+/* Enums */
+const char *handshake_op_str(int op);
+const char *handshake_handler_class_str(enum handshake_handler_class value);
+const char *handshake_msg_type_str(enum handshake_msg_type value);
+const char *handshake_auth_str(enum handshake_auth value);
+
+/* Common nested types */
+struct handshake_x509 {
+ struct {
+ __u32 cert:1;
+ __u32 privkey:1;
+ } _present;
+
+ __u32 cert;
+ __u32 privkey;
+};
+
+/* ============== HANDSHAKE_CMD_ACCEPT ============== */
+/* HANDSHAKE_CMD_ACCEPT - do */
+struct handshake_accept_req {
+ struct {
+ __u32 handler_class:1;
+ } _present;
+
+ enum handshake_handler_class handler_class;
+};
+
+static inline struct handshake_accept_req *handshake_accept_req_alloc(void)
+{
+ return calloc(1, sizeof(struct handshake_accept_req));
+}
+void handshake_accept_req_free(struct handshake_accept_req *req);
+
+static inline void
+handshake_accept_req_set_handler_class(struct handshake_accept_req *req,
+ enum handshake_handler_class handler_class)
+{
+ req->_present.handler_class = 1;
+ req->handler_class = handler_class;
+}
+
+struct handshake_accept_rsp {
+ struct {
+ __u32 sockfd:1;
+ __u32 message_type:1;
+ __u32 timeout:1;
+ __u32 auth_mode:1;
+ __u32 peername_len;
+ } _present;
+
+ __u32 sockfd;
+ enum handshake_msg_type message_type;
+ __u32 timeout;
+ enum handshake_auth auth_mode;
+ unsigned int n_peer_identity;
+ __u32 *peer_identity;
+ unsigned int n_certificate;
+ struct handshake_x509 *certificate;
+ char *peername;
+};
+
+void handshake_accept_rsp_free(struct handshake_accept_rsp *rsp);
+
+/*
+ * Handler retrieves next queued handshake request
+ */
+struct handshake_accept_rsp *
+handshake_accept(struct ynl_sock *ys, struct handshake_accept_req *req);
+
+/* HANDSHAKE_CMD_ACCEPT - notify */
+struct handshake_accept_ntf {
+ __u16 family;
+ __u8 cmd;
+ struct ynl_ntf_base_type *next;
+ void (*free)(struct handshake_accept_ntf *ntf);
+ struct handshake_accept_rsp obj __attribute__ ((aligned (8)));
+};
+
+void handshake_accept_ntf_free(struct handshake_accept_ntf *rsp);
+
+/* ============== HANDSHAKE_CMD_DONE ============== */
+/* HANDSHAKE_CMD_DONE - do */
+struct handshake_done_req {
+ struct {
+ __u32 status:1;
+ __u32 sockfd:1;
+ } _present;
+
+ __u32 status;
+ __u32 sockfd;
+ unsigned int n_remote_auth;
+ __u32 *remote_auth;
+};
+
+static inline struct handshake_done_req *handshake_done_req_alloc(void)
+{
+ return calloc(1, sizeof(struct handshake_done_req));
+}
+void handshake_done_req_free(struct handshake_done_req *req);
+
+static inline void
+handshake_done_req_set_status(struct handshake_done_req *req, __u32 status)
+{
+ req->_present.status = 1;
+ req->status = status;
+}
+static inline void
+handshake_done_req_set_sockfd(struct handshake_done_req *req, __u32 sockfd)
+{
+ req->_present.sockfd = 1;
+ req->sockfd = sockfd;
+}
+static inline void
+__handshake_done_req_set_remote_auth(struct handshake_done_req *req,
+ __u32 *remote_auth,
+ unsigned int n_remote_auth)
+{
+ free(req->remote_auth);
+ req->remote_auth = remote_auth;
+ req->n_remote_auth = n_remote_auth;
+}
+
+/*
+ * Handler reports handshake completion
+ */
+int handshake_done(struct ynl_sock *ys, struct handshake_done_req *req);
+
+#endif /* _LINUX_HANDSHAKE_GEN_H */
diff --git a/tools/net/ynl/generated/netdev-user.c b/tools/net/ynl/generated/netdev-user.c
new file mode 100644
index 000000000000..4eb8aefef0cd
--- /dev/null
+++ b/tools/net/ynl/generated/netdev-user.c
@@ -0,0 +1,200 @@
+// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
+/* Do not edit directly, auto-generated from: */
+/* Documentation/netlink/specs/netdev.yaml */
+/* YNL-GEN user source */
+
+#include <stdlib.h>
+#include <string.h>
+#include "netdev-user.h"
+#include "ynl.h"
+#include <linux/netdev.h>
+
+#include <libmnl/libmnl.h>
+#include <linux/genetlink.h>
+
+/* Enums */
+static const char * const netdev_op_strmap[] = {
+ [NETDEV_CMD_DEV_GET] = "dev-get",
+ [NETDEV_CMD_DEV_ADD_NTF] = "dev-add-ntf",
+ [NETDEV_CMD_DEV_DEL_NTF] = "dev-del-ntf",
+ [NETDEV_CMD_DEV_CHANGE_NTF] = "dev-change-ntf",
+};
+
+const char *netdev_op_str(int op)
+{
+ if (op < 0 || op >= (int)MNL_ARRAY_SIZE(netdev_op_strmap))
+ return NULL;
+ return netdev_op_strmap[op];
+}
+
+static const char * const netdev_xdp_act_strmap[] = {
+ [0] = "basic",
+ [1] = "redirect",
+ [2] = "ndo-xmit",
+ [3] = "xsk-zerocopy",
+ [4] = "hw-offload",
+ [5] = "rx-sg",
+ [6] = "ndo-xmit-sg",
+};
+
+const char *netdev_xdp_act_str(enum netdev_xdp_act value)
+{
+ value = ffs(value) - 1;
+ if (value < 0 || value >= (int)MNL_ARRAY_SIZE(netdev_xdp_act_strmap))
+ return NULL;
+ return netdev_xdp_act_strmap[value];
+}
+
+/* Policies */
+struct ynl_policy_attr netdev_dev_policy[NETDEV_A_DEV_MAX + 1] = {
+ [NETDEV_A_DEV_IFINDEX] = { .name = "ifindex", .type = YNL_PT_U32, },
+ [NETDEV_A_DEV_PAD] = { .name = "pad", .type = YNL_PT_IGNORE, },
+ [NETDEV_A_DEV_XDP_FEATURES] = { .name = "xdp-features", .type = YNL_PT_U64, },
+};
+
+struct ynl_policy_nest netdev_dev_nest = {
+ .max_attr = NETDEV_A_DEV_MAX,
+ .table = netdev_dev_policy,
+};
+
+/* Common nested types */
+/* ============== NETDEV_CMD_DEV_GET ============== */
+/* NETDEV_CMD_DEV_GET - do */
+void netdev_dev_get_req_free(struct netdev_dev_get_req *req)
+{
+ free(req);
+}
+
+void netdev_dev_get_rsp_free(struct netdev_dev_get_rsp *rsp)
+{
+ free(rsp);
+}
+
+int netdev_dev_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
+{
+ struct ynl_parse_arg *yarg = data;
+ struct netdev_dev_get_rsp *dst;
+ const struct nlattr *attr;
+
+ dst = yarg->data;
+
+ mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
+ unsigned int type = mnl_attr_get_type(attr);
+
+ if (type == NETDEV_A_DEV_IFINDEX) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.ifindex = 1;
+ dst->ifindex = mnl_attr_get_u32(attr);
+ } else if (type == NETDEV_A_DEV_XDP_FEATURES) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.xdp_features = 1;
+ dst->xdp_features = mnl_attr_get_u64(attr);
+ }
+ }
+
+ return MNL_CB_OK;
+}
+
+struct netdev_dev_get_rsp *
+netdev_dev_get(struct ynl_sock *ys, struct netdev_dev_get_req *req)
+{
+ struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
+ struct netdev_dev_get_rsp *rsp;
+ struct nlmsghdr *nlh;
+ int err;
+
+ nlh = ynl_gemsg_start_req(ys, ys->family_id, NETDEV_CMD_DEV_GET, 1);
+ ys->req_policy = &netdev_dev_nest;
+ yrs.yarg.rsp_policy = &netdev_dev_nest;
+
+ if (req->_present.ifindex)
+ mnl_attr_put_u32(nlh, NETDEV_A_DEV_IFINDEX, req->ifindex);
+
+ rsp = calloc(1, sizeof(*rsp));
+ yrs.yarg.data = rsp;
+ yrs.cb = netdev_dev_get_rsp_parse;
+ yrs.rsp_cmd = NETDEV_CMD_DEV_GET;
+
+ err = ynl_exec(ys, nlh, &yrs);
+ if (err < 0)
+ goto err_free;
+
+ return rsp;
+
+err_free:
+ netdev_dev_get_rsp_free(rsp);
+ return NULL;
+}
+
+/* NETDEV_CMD_DEV_GET - dump */
+void netdev_dev_get_list_free(struct netdev_dev_get_list *rsp)
+{
+ struct netdev_dev_get_list *next = rsp;
+
+ while ((void *)next != YNL_LIST_END) {
+ rsp = next;
+ next = rsp->next;
+
+ free(rsp);
+ }
+}
+
+struct netdev_dev_get_list *netdev_dev_get_dump(struct ynl_sock *ys)
+{
+ struct ynl_dump_state yds = {};
+ struct nlmsghdr *nlh;
+ int err;
+
+ yds.ys = ys;
+ yds.alloc_sz = sizeof(struct netdev_dev_get_list);
+ yds.cb = netdev_dev_get_rsp_parse;
+ yds.rsp_cmd = NETDEV_CMD_DEV_GET;
+ yds.rsp_policy = &netdev_dev_nest;
+
+ nlh = ynl_gemsg_start_dump(ys, ys->family_id, NETDEV_CMD_DEV_GET, 1);
+
+ err = ynl_exec_dump(ys, nlh, &yds);
+ if (err < 0)
+ goto free_list;
+
+ return yds.first;
+
+free_list:
+ netdev_dev_get_list_free(yds.first);
+ return NULL;
+}
+
+/* NETDEV_CMD_DEV_GET - notify */
+void netdev_dev_get_ntf_free(struct netdev_dev_get_ntf *rsp)
+{
+ free(rsp);
+}
+
+static const struct ynl_ntf_info netdev_ntf_info[] = {
+ [NETDEV_CMD_DEV_ADD_NTF] = {
+ .alloc_sz = sizeof(struct netdev_dev_get_ntf),
+ .cb = netdev_dev_get_rsp_parse,
+ .policy = &netdev_dev_nest,
+ .free = (void *)netdev_dev_get_ntf_free,
+ },
+ [NETDEV_CMD_DEV_DEL_NTF] = {
+ .alloc_sz = sizeof(struct netdev_dev_get_ntf),
+ .cb = netdev_dev_get_rsp_parse,
+ .policy = &netdev_dev_nest,
+ .free = (void *)netdev_dev_get_ntf_free,
+ },
+ [NETDEV_CMD_DEV_CHANGE_NTF] = {
+ .alloc_sz = sizeof(struct netdev_dev_get_ntf),
+ .cb = netdev_dev_get_rsp_parse,
+ .policy = &netdev_dev_nest,
+ .free = (void *)netdev_dev_get_ntf_free,
+ },
+};
+
+const struct ynl_family ynl_netdev_family = {
+ .name = "netdev",
+ .ntf_info = netdev_ntf_info,
+ .ntf_info_size = MNL_ARRAY_SIZE(netdev_ntf_info),
+};
diff --git a/tools/net/ynl/generated/netdev-user.h b/tools/net/ynl/generated/netdev-user.h
new file mode 100644
index 000000000000..5554dc69bb9c
--- /dev/null
+++ b/tools/net/ynl/generated/netdev-user.h
@@ -0,0 +1,85 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* Do not edit directly, auto-generated from: */
+/* Documentation/netlink/specs/netdev.yaml */
+/* YNL-GEN user header */
+
+#ifndef _LINUX_NETDEV_GEN_H
+#define _LINUX_NETDEV_GEN_H
+
+#include <stdlib.h>
+#include <string.h>
+#include <linux/types.h>
+#include <linux/netdev.h>
+
+struct ynl_sock;
+
+extern const struct ynl_family ynl_netdev_family;
+
+/* Enums */
+const char *netdev_op_str(int op);
+const char *netdev_xdp_act_str(enum netdev_xdp_act value);
+
+/* Common nested types */
+/* ============== NETDEV_CMD_DEV_GET ============== */
+/* NETDEV_CMD_DEV_GET - do */
+struct netdev_dev_get_req {
+ struct {
+ __u32 ifindex:1;
+ } _present;
+
+ __u32 ifindex;
+};
+
+static inline struct netdev_dev_get_req *netdev_dev_get_req_alloc(void)
+{
+ return calloc(1, sizeof(struct netdev_dev_get_req));
+}
+void netdev_dev_get_req_free(struct netdev_dev_get_req *req);
+
+static inline void
+netdev_dev_get_req_set_ifindex(struct netdev_dev_get_req *req, __u32 ifindex)
+{
+ req->_present.ifindex = 1;
+ req->ifindex = ifindex;
+}
+
+struct netdev_dev_get_rsp {
+ struct {
+ __u32 ifindex:1;
+ __u32 xdp_features:1;
+ } _present;
+
+ __u32 ifindex;
+ __u64 xdp_features;
+};
+
+void netdev_dev_get_rsp_free(struct netdev_dev_get_rsp *rsp);
+
+/*
+ * Get / dump information about a netdev.
+ */
+struct netdev_dev_get_rsp *
+netdev_dev_get(struct ynl_sock *ys, struct netdev_dev_get_req *req);
+
+/* NETDEV_CMD_DEV_GET - dump */
+struct netdev_dev_get_list {
+ struct netdev_dev_get_list *next;
+ struct netdev_dev_get_rsp obj __attribute__ ((aligned (8)));
+};
+
+void netdev_dev_get_list_free(struct netdev_dev_get_list *rsp);
+
+struct netdev_dev_get_list *netdev_dev_get_dump(struct ynl_sock *ys);
+
+/* NETDEV_CMD_DEV_GET - notify */
+struct netdev_dev_get_ntf {
+ __u16 family;
+ __u8 cmd;
+ struct ynl_ntf_base_type *next;
+ void (*free)(struct netdev_dev_get_ntf *ntf);
+ struct netdev_dev_get_rsp obj __attribute__ ((aligned (8)));
+};
+
+void netdev_dev_get_ntf_free(struct netdev_dev_get_ntf *rsp);
+
+#endif /* _LINUX_NETDEV_GEN_H */
diff --git a/tools/net/ynl/lib/Makefile b/tools/net/ynl/lib/Makefile
new file mode 100644
index 000000000000..d2e50fd0a52d
--- /dev/null
+++ b/tools/net/ynl/lib/Makefile
@@ -0,0 +1,28 @@
+# SPDX-License-Identifier: GPL-2.0
+
+CC=gcc
+CFLAGS=-std=gnu11 -O2 -W -Wall -Wextra -Wno-unused-parameter -Wshadow
+ifeq ("$(DEBUG)","1")
+ CFLAGS += -g -fsanitize=address -fsanitize=leak -static-libasan
+endif
+
+SRCS=$(wildcard *.c)
+OBJS=$(patsubst %.c,%.o,${SRCS})
+
+include $(wildcard *.d)
+
+all: ynl.a
+
+ynl.a: $(OBJS)
+ ar rcs $@ $(OBJS)
+clean:
+ rm -f *.o *.d *~
+
+hardclean: clean
+ rm -f *.a
+
+%.o: %.c
+ $(COMPILE.c) -MMD -c -o $@ $<
+
+.PHONY: all clean
+.DEFAULT_GOAL=all
diff --git a/tools/net/ynl/lib/nlspec.py b/tools/net/ynl/lib/nlspec.py
index a0241add3839..0ff0d18666b2 100644
--- a/tools/net/ynl/lib/nlspec.py
+++ b/tools/net/ynl/lib/nlspec.py
@@ -154,6 +154,9 @@ class SpecAttr(SpecElement):
is_multi bool, attr may repeat multiple times
struct_name string, name of struct definition
sub_type string, name of sub type
+ len integer, optional byte length of binary types
+ display_hint string, hint to help choose format specifier
+ when displaying the value
"""
def __init__(self, family, attr_set, yaml, value):
super().__init__(family, yaml)
@@ -164,6 +167,8 @@ class SpecAttr(SpecElement):
self.struct_name = yaml.get('struct')
self.sub_type = yaml.get('sub-type')
self.byte_order = yaml.get('byte-order')
+ self.len = yaml.get('len')
+ self.display_hint = yaml.get('display-hint')
class SpecAttrSet(SpecElement):
@@ -226,11 +231,20 @@ class SpecStructMember(SpecElement):
Represents a single struct member attribute.
Attributes:
- type string, type of the member attribute
+ type string, type of the member attribute
+ byte_order string or None for native byte order
+ enum string, name of the enum definition
+ len integer, optional byte length of binary types
+ display_hint string, hint to help choose format specifier
+ when displaying the value
"""
def __init__(self, family, yaml):
super().__init__(family, yaml)
self.type = yaml['type']
+ self.byte_order = yaml.get('byte-order')
+ self.enum = yaml.get('enum')
+ self.len = yaml.get('len')
+ self.display_hint = yaml.get('display-hint')
class SpecStruct(SpecElement):
@@ -320,16 +334,17 @@ class SpecFamily(SpecElement):
Attributes:
proto protocol type (e.g. genetlink)
+ msg_id_model enum-model for operations (unified, directional etc.)
license spec license (loaded from an SPDX tag on the spec)
attr_sets dict of attribute sets
msgs dict of all messages (index by name)
- msgs_by_value dict of all messages (indexed by name)
ops dict of all valid requests / responses
+ ntfs dict of all async events
consts dict of all constants/enums
fixed_header string, optional name of family default fixed header struct
"""
- def __init__(self, spec_path, schema_path=None):
+ def __init__(self, spec_path, schema_path=None, exclude_ops=None):
with open(spec_path, "r") as stream:
prefix = '# SPDX-License-Identifier: '
first = stream.readline().strip()
@@ -344,7 +359,10 @@ class SpecFamily(SpecElement):
super().__init__(self, spec)
+ self._exclude_ops = exclude_ops if exclude_ops else []
+
self.proto = self.yaml.get('protocol', 'genetlink')
+ self.msg_id_model = self.yaml['operations'].get('enum-model', 'unified')
if schema_path is None:
schema_path = os.path.dirname(os.path.dirname(spec_path)) + f'/{self.proto}.yaml'
@@ -364,6 +382,7 @@ class SpecFamily(SpecElement):
self.req_by_value = collections.OrderedDict()
self.rsp_by_value = collections.OrderedDict()
self.ops = collections.OrderedDict()
+ self.ntfs = collections.OrderedDict()
self.consts = collections.OrderedDict()
last_exception = None
@@ -416,7 +435,7 @@ class SpecFamily(SpecElement):
self.fixed_header = self.yaml['operations'].get('fixed-header')
req_val = rsp_val = 1
for elem in self.yaml['operations']['list']:
- if 'notify' in elem:
+ if 'notify' in elem or 'event' in elem:
if 'value' in elem:
rsp_val = elem['value']
req_val_next = req_val
@@ -438,7 +457,17 @@ class SpecFamily(SpecElement):
else:
raise Exception("Can't parse directional ops")
- op = self.new_operation(elem, req_val, rsp_val)
+ if req_val == req_val_next:
+ req_val = None
+ if rsp_val == rsp_val_next:
+ rsp_val = None
+
+ skip = False
+ for exclude in self._exclude_ops:
+ skip |= bool(exclude.match(elem['name']))
+ if not skip:
+ op = self.new_operation(elem, req_val, rsp_val)
+
req_val = req_val_next
rsp_val = rsp_val_next
@@ -469,10 +498,9 @@ class SpecFamily(SpecElement):
attr_set = self.new_attr_set(elem)
self.attr_sets[elem['name']] = attr_set
- msg_id_model = self.yaml['operations'].get('enum-model', 'unified')
- if msg_id_model == 'unified':
+ if self.msg_id_model == 'unified':
self._dictify_ops_unified()
- elif msg_id_model == 'directional':
+ elif self.msg_id_model == 'directional':
self._dictify_ops_directional()
for op in self.msgs.values():
@@ -482,3 +510,5 @@ class SpecFamily(SpecElement):
self.rsp_by_value[op.rsp_value] = op
if not op.is_async and 'attribute-set' in op:
self.ops[op.name] = op
+ elif op.is_async:
+ self.ntfs[op.name] = op
diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c
new file mode 100644
index 000000000000..514e0d69e731
--- /dev/null
+++ b/tools/net/ynl/lib/ynl.c
@@ -0,0 +1,901 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+#include <errno.h>
+#include <poll.h>
+#include <string.h>
+#include <stdlib.h>
+#include <linux/types.h>
+
+#include <libmnl/libmnl.h>
+#include <linux/genetlink.h>
+
+#include "ynl.h"
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(*arr))
+
+#define __yerr_msg(yse, _msg...) \
+ ({ \
+ struct ynl_error *_yse = (yse); \
+ \
+ if (_yse) { \
+ snprintf(_yse->msg, sizeof(_yse->msg) - 1, _msg); \
+ _yse->msg[sizeof(_yse->msg) - 1] = 0; \
+ } \
+ })
+
+#define __yerr_code(yse, _code...) \
+ ({ \
+ struct ynl_error *_yse = (yse); \
+ \
+ if (_yse) { \
+ _yse->code = _code; \
+ } \
+ })
+
+#define __yerr(yse, _code, _msg...) \
+ ({ \
+ __yerr_msg(yse, _msg); \
+ __yerr_code(yse, _code); \
+ })
+
+#define __perr(yse, _msg) __yerr(yse, errno, _msg)
+
+#define yerr_msg(_ys, _msg...) __yerr_msg(&(_ys)->err, _msg)
+#define yerr(_ys, _code, _msg...) __yerr(&(_ys)->err, _code, _msg)
+#define perr(_ys, _msg) __yerr(&(_ys)->err, errno, _msg)
+
+/* -- Netlink boiler plate */
+static int
+ynl_err_walk_report_one(struct ynl_policy_nest *policy, unsigned int type,
+ char *str, int str_sz, int *n)
+{
+ if (!policy) {
+ if (*n < str_sz)
+ *n += snprintf(str, str_sz, "!policy");
+ return 1;
+ }
+
+ if (type > policy->max_attr) {
+ if (*n < str_sz)
+ *n += snprintf(str, str_sz, "!oob");
+ return 1;
+ }
+
+ if (!policy->table[type].name) {
+ if (*n < str_sz)
+ *n += snprintf(str, str_sz, "!name");
+ return 1;
+ }
+
+ if (*n < str_sz)
+ *n += snprintf(str, str_sz - *n,
+ ".%s", policy->table[type].name);
+ return 0;
+}
+
+static int
+ynl_err_walk(struct ynl_sock *ys, void *start, void *end, unsigned int off,
+ struct ynl_policy_nest *policy, char *str, int str_sz,
+ struct ynl_policy_nest **nest_pol)
+{
+ unsigned int astart_off, aend_off;
+ const struct nlattr *attr;
+ unsigned int data_len;
+ unsigned int type;
+ bool found = false;
+ int n = 0;
+
+ if (!policy) {
+ if (n < str_sz)
+ n += snprintf(str, str_sz, "!policy");
+ return n;
+ }
+
+ data_len = end - start;
+
+ mnl_attr_for_each_payload(start, data_len) {
+ astart_off = (char *)attr - (char *)start;
+ aend_off = astart_off + mnl_attr_get_payload_len(attr);
+ if (aend_off <= off)
+ continue;
+
+ found = true;
+ break;
+ }
+ if (!found)
+ return 0;
+
+ off -= astart_off;
+
+ type = mnl_attr_get_type(attr);
+
+ if (ynl_err_walk_report_one(policy, type, str, str_sz, &n))
+ return n;
+
+ if (!off) {
+ if (nest_pol)
+ *nest_pol = policy->table[type].nest;
+ return n;
+ }
+
+ if (!policy->table[type].nest) {
+ if (n < str_sz)
+ n += snprintf(str, str_sz, "!nest");
+ return n;
+ }
+
+ off -= sizeof(struct nlattr);
+ start = mnl_attr_get_payload(attr);
+ end = start + mnl_attr_get_payload_len(attr);
+
+ return n + ynl_err_walk(ys, start, end, off, policy->table[type].nest,
+ &str[n], str_sz - n, nest_pol);
+}
+
+#define NLMSGERR_ATTR_MISS_TYPE (NLMSGERR_ATTR_POLICY + 1)
+#define NLMSGERR_ATTR_MISS_NEST (NLMSGERR_ATTR_POLICY + 2)
+#define NLMSGERR_ATTR_MAX (NLMSGERR_ATTR_MAX + 2)
+
+static int
+ynl_ext_ack_check(struct ynl_sock *ys, const struct nlmsghdr *nlh,
+ unsigned int hlen)
+{
+ const struct nlattr *tb[NLMSGERR_ATTR_MAX + 1] = {};
+ char miss_attr[sizeof(ys->err.msg)];
+ char bad_attr[sizeof(ys->err.msg)];
+ const struct nlattr *attr;
+ const char *str = NULL;
+
+ if (!(nlh->nlmsg_flags & NLM_F_ACK_TLVS))
+ return MNL_CB_OK;
+
+ mnl_attr_for_each(attr, nlh, hlen) {
+ unsigned int len, type;
+
+ len = mnl_attr_get_payload_len(attr);
+ type = mnl_attr_get_type(attr);
+
+ if (type > NLMSGERR_ATTR_MAX)
+ continue;
+
+ tb[type] = attr;
+
+ switch (type) {
+ case NLMSGERR_ATTR_OFFS:
+ case NLMSGERR_ATTR_MISS_TYPE:
+ case NLMSGERR_ATTR_MISS_NEST:
+ if (len != sizeof(__u32))
+ return MNL_CB_ERROR;
+ break;
+ case NLMSGERR_ATTR_MSG:
+ str = mnl_attr_get_payload(attr);
+ if (str[len - 1])
+ return MNL_CB_ERROR;
+ break;
+ default:
+ break;
+ }
+ }
+
+ bad_attr[0] = '\0';
+ miss_attr[0] = '\0';
+
+ if (tb[NLMSGERR_ATTR_OFFS]) {
+ unsigned int n, off;
+ void *start, *end;
+
+ ys->err.attr_offs = mnl_attr_get_u32(tb[NLMSGERR_ATTR_OFFS]);
+
+ n = snprintf(bad_attr, sizeof(bad_attr), "%sbad attribute: ",
+ str ? " (" : "");
+
+ start = mnl_nlmsg_get_payload_offset(ys->nlh,
+ sizeof(struct genlmsghdr));
+ end = mnl_nlmsg_get_payload_tail(ys->nlh);
+
+ off = ys->err.attr_offs;
+ off -= sizeof(struct nlmsghdr);
+ off -= sizeof(struct genlmsghdr);
+
+ n += ynl_err_walk(ys, start, end, off, ys->req_policy,
+ &bad_attr[n], sizeof(bad_attr) - n, NULL);
+
+ if (n >= sizeof(bad_attr))
+ n = sizeof(bad_attr) - 1;
+ bad_attr[n] = '\0';
+ }
+ if (tb[NLMSGERR_ATTR_MISS_TYPE]) {
+ struct ynl_policy_nest *nest_pol = NULL;
+ unsigned int n, off, type;
+ void *start, *end;
+ int n2;
+
+ type = mnl_attr_get_u32(tb[NLMSGERR_ATTR_MISS_TYPE]);
+
+ n = snprintf(miss_attr, sizeof(miss_attr), "%smissing attribute: ",
+ bad_attr[0] ? ", " : (str ? " (" : ""));
+
+ start = mnl_nlmsg_get_payload_offset(ys->nlh,
+ sizeof(struct genlmsghdr));
+ end = mnl_nlmsg_get_payload_tail(ys->nlh);
+
+ nest_pol = ys->req_policy;
+ if (tb[NLMSGERR_ATTR_MISS_NEST]) {
+ off = mnl_attr_get_u32(tb[NLMSGERR_ATTR_MISS_NEST]);
+ off -= sizeof(struct nlmsghdr);
+ off -= sizeof(struct genlmsghdr);
+
+ n += ynl_err_walk(ys, start, end, off, ys->req_policy,
+ &miss_attr[n], sizeof(miss_attr) - n,
+ &nest_pol);
+ }
+
+ n2 = 0;
+ ynl_err_walk_report_one(nest_pol, type, &miss_attr[n],
+ sizeof(miss_attr) - n, &n2);
+ n += n2;
+
+ if (n >= sizeof(miss_attr))
+ n = sizeof(miss_attr) - 1;
+ miss_attr[n] = '\0';
+ }
+
+ /* Implicitly depend on ys->err.code already set */
+ if (str)
+ yerr_msg(ys, "Kernel %s: '%s'%s%s%s",
+ ys->err.code ? "error" : "warning",
+ str, bad_attr, miss_attr,
+ bad_attr[0] || miss_attr[0] ? ")" : "");
+ else if (bad_attr[0] || miss_attr[0])
+ yerr_msg(ys, "Kernel %s: %s%s",
+ ys->err.code ? "error" : "warning",
+ bad_attr, miss_attr);
+
+ return MNL_CB_OK;
+}
+
+static int ynl_cb_error(const struct nlmsghdr *nlh, void *data)
+{
+ const struct nlmsgerr *err = mnl_nlmsg_get_payload(nlh);
+ struct ynl_parse_arg *yarg = data;
+ unsigned int hlen;
+ int code;
+
+ code = err->error >= 0 ? err->error : -err->error;
+ yarg->ys->err.code = code;
+ errno = code;
+
+ hlen = sizeof(*err);
+ if (!(nlh->nlmsg_flags & NLM_F_CAPPED))
+ hlen += mnl_nlmsg_get_payload_len(&err->msg);
+
+ ynl_ext_ack_check(yarg->ys, nlh, hlen);
+
+ return code ? MNL_CB_ERROR : MNL_CB_STOP;
+}
+
+static int ynl_cb_done(const struct nlmsghdr *nlh, void *data)
+{
+ struct ynl_parse_arg *yarg = data;
+ int err;
+
+ err = *(int *)NLMSG_DATA(nlh);
+ if (err < 0) {
+ yarg->ys->err.code = -err;
+ errno = -err;
+
+ ynl_ext_ack_check(yarg->ys, nlh, sizeof(int));
+
+ return MNL_CB_ERROR;
+ }
+ return MNL_CB_STOP;
+}
+
+static int ynl_cb_noop(const struct nlmsghdr *nlh, void *data)
+{
+ return MNL_CB_OK;
+}
+
+mnl_cb_t ynl_cb_array[NLMSG_MIN_TYPE] = {
+ [NLMSG_NOOP] = ynl_cb_noop,
+ [NLMSG_ERROR] = ynl_cb_error,
+ [NLMSG_DONE] = ynl_cb_done,
+ [NLMSG_OVERRUN] = ynl_cb_noop,
+};
+
+/* Attribute validation */
+
+int ynl_attr_validate(struct ynl_parse_arg *yarg, const struct nlattr *attr)
+{
+ struct ynl_policy_attr *policy;
+ unsigned int type, len;
+ unsigned char *data;
+
+ data = mnl_attr_get_payload(attr);
+ len = mnl_attr_get_payload_len(attr);
+ type = mnl_attr_get_type(attr);
+ if (type > yarg->rsp_policy->max_attr) {
+ yerr(yarg->ys, YNL_ERROR_INTERNAL,
+ "Internal error, validating unknown attribute");
+ return -1;
+ }
+
+ policy = &yarg->rsp_policy->table[type];
+
+ switch (policy->type) {
+ case YNL_PT_REJECT:
+ yerr(yarg->ys, YNL_ERROR_ATTR_INVALID,
+ "Rejected attribute (%s)", policy->name);
+ return -1;
+ case YNL_PT_IGNORE:
+ break;
+ case YNL_PT_U8:
+ if (len == sizeof(__u8))
+ break;
+ yerr(yarg->ys, YNL_ERROR_ATTR_INVALID,
+ "Invalid attribute (u8 %s)", policy->name);
+ return -1;
+ case YNL_PT_U16:
+ if (len == sizeof(__u16))
+ break;
+ yerr(yarg->ys, YNL_ERROR_ATTR_INVALID,
+ "Invalid attribute (u16 %s)", policy->name);
+ return -1;
+ case YNL_PT_U32:
+ if (len == sizeof(__u32))
+ break;
+ yerr(yarg->ys, YNL_ERROR_ATTR_INVALID,
+ "Invalid attribute (u32 %s)", policy->name);
+ return -1;
+ case YNL_PT_U64:
+ if (len == sizeof(__u64))
+ break;
+ yerr(yarg->ys, YNL_ERROR_ATTR_INVALID,
+ "Invalid attribute (u64 %s)", policy->name);
+ return -1;
+ case YNL_PT_FLAG:
+ /* Let flags grow into real attrs, why not.. */
+ break;
+ case YNL_PT_NEST:
+ if (!len || len >= sizeof(*attr))
+ break;
+ yerr(yarg->ys, YNL_ERROR_ATTR_INVALID,
+ "Invalid attribute (nest %s)", policy->name);
+ return -1;
+ case YNL_PT_BINARY:
+ if (!policy->len || len == policy->len)
+ break;
+ yerr(yarg->ys, YNL_ERROR_ATTR_INVALID,
+ "Invalid attribute (binary %s)", policy->name);
+ return -1;
+ case YNL_PT_NUL_STR:
+ if ((!policy->len || len <= policy->len) && !data[len - 1])
+ break;
+ yerr(yarg->ys, YNL_ERROR_ATTR_INVALID,
+ "Invalid attribute (string %s)", policy->name);
+ return -1;
+ default:
+ yerr(yarg->ys, YNL_ERROR_ATTR_INVALID,
+ "Invalid attribute (unknown %s)", policy->name);
+ return -1;
+ }
+
+ return 0;
+}
+
+/* Generic code */
+
+static void ynl_err_reset(struct ynl_sock *ys)
+{
+ ys->err.code = 0;
+ ys->err.attr_offs = 0;
+ ys->err.msg[0] = 0;
+}
+
+struct nlmsghdr *ynl_msg_start(struct ynl_sock *ys, __u32 id, __u16 flags)
+{
+ struct nlmsghdr *nlh;
+
+ ynl_err_reset(ys);
+
+ nlh = ys->nlh = mnl_nlmsg_put_header(ys->tx_buf);
+ nlh->nlmsg_type = id;
+ nlh->nlmsg_flags = flags;
+ nlh->nlmsg_seq = ++ys->seq;
+
+ return nlh;
+}
+
+struct nlmsghdr *
+ynl_gemsg_start(struct ynl_sock *ys, __u32 id, __u16 flags,
+ __u8 cmd, __u8 version)
+{
+ struct genlmsghdr gehdr;
+ struct nlmsghdr *nlh;
+ void *data;
+
+ nlh = ynl_msg_start(ys, id, flags);
+
+ memset(&gehdr, 0, sizeof(gehdr));
+ gehdr.cmd = cmd;
+ gehdr.version = version;
+
+ data = mnl_nlmsg_put_extra_header(nlh, sizeof(gehdr));
+ memcpy(data, &gehdr, sizeof(gehdr));
+
+ return nlh;
+}
+
+void ynl_msg_start_req(struct ynl_sock *ys, __u32 id)
+{
+ ynl_msg_start(ys, id, NLM_F_REQUEST | NLM_F_ACK);
+}
+
+void ynl_msg_start_dump(struct ynl_sock *ys, __u32 id)
+{
+ ynl_msg_start(ys, id, NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP);
+}
+
+struct nlmsghdr *
+ynl_gemsg_start_req(struct ynl_sock *ys, __u32 id, __u8 cmd, __u8 version)
+{
+ return ynl_gemsg_start(ys, id, NLM_F_REQUEST | NLM_F_ACK, cmd, version);
+}
+
+struct nlmsghdr *
+ynl_gemsg_start_dump(struct ynl_sock *ys, __u32 id, __u8 cmd, __u8 version)
+{
+ return ynl_gemsg_start(ys, id, NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP,
+ cmd, version);
+}
+
+int ynl_recv_ack(struct ynl_sock *ys, int ret)
+{
+ if (!ret) {
+ yerr(ys, YNL_ERROR_EXPECT_ACK,
+ "Expecting an ACK but nothing received");
+ return -1;
+ }
+
+ ret = mnl_socket_recvfrom(ys->sock, ys->rx_buf, MNL_SOCKET_BUFFER_SIZE);
+ if (ret < 0) {
+ perr(ys, "Socket receive failed");
+ return ret;
+ }
+ return mnl_cb_run(ys->rx_buf, ret, ys->seq, ys->portid,
+ ynl_cb_null, ys);
+}
+
+int ynl_cb_null(const struct nlmsghdr *nlh, void *data)
+{
+ struct ynl_parse_arg *yarg = data;
+
+ yerr(yarg->ys, YNL_ERROR_UNEXPECT_MSG,
+ "Received a message when none were expected");
+
+ return MNL_CB_ERROR;
+}
+
+/* Init/fini and genetlink boiler plate */
+static int
+ynl_get_family_info_mcast(struct ynl_sock *ys, const struct nlattr *mcasts)
+{
+ const struct nlattr *entry, *attr;
+ unsigned int i;
+
+ mnl_attr_for_each_nested(attr, mcasts)
+ ys->n_mcast_groups++;
+
+ if (!ys->n_mcast_groups)
+ return 0;
+
+ ys->mcast_groups = calloc(ys->n_mcast_groups,
+ sizeof(*ys->mcast_groups));
+ if (!ys->mcast_groups)
+ return MNL_CB_ERROR;
+
+ i = 0;
+ mnl_attr_for_each_nested(entry, mcasts) {
+ mnl_attr_for_each_nested(attr, entry) {
+ if (mnl_attr_get_type(attr) == CTRL_ATTR_MCAST_GRP_ID)
+ ys->mcast_groups[i].id = mnl_attr_get_u32(attr);
+ if (mnl_attr_get_type(attr) == CTRL_ATTR_MCAST_GRP_NAME) {
+ strncpy(ys->mcast_groups[i].name,
+ mnl_attr_get_str(attr),
+ GENL_NAMSIZ - 1);
+ ys->mcast_groups[i].name[GENL_NAMSIZ - 1] = 0;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int ynl_get_family_info_cb(const struct nlmsghdr *nlh, void *data)
+{
+ struct ynl_parse_arg *yarg = data;
+ struct ynl_sock *ys = yarg->ys;
+ const struct nlattr *attr;
+ bool found_id = true;
+
+ mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
+ if (mnl_attr_get_type(attr) == CTRL_ATTR_MCAST_GROUPS)
+ if (ynl_get_family_info_mcast(ys, attr))
+ return MNL_CB_ERROR;
+
+ if (mnl_attr_get_type(attr) != CTRL_ATTR_FAMILY_ID)
+ continue;
+
+ if (mnl_attr_get_payload_len(attr) != sizeof(__u16)) {
+ yerr(ys, YNL_ERROR_ATTR_INVALID, "Invalid family ID");
+ return MNL_CB_ERROR;
+ }
+
+ ys->family_id = mnl_attr_get_u16(attr);
+ found_id = true;
+ }
+
+ if (!found_id) {
+ yerr(ys, YNL_ERROR_ATTR_MISSING, "Family ID missing");
+ return MNL_CB_ERROR;
+ }
+ return MNL_CB_OK;
+}
+
+static int ynl_sock_read_family(struct ynl_sock *ys, const char *family_name)
+{
+ struct ynl_parse_arg yarg = { .ys = ys, };
+ struct nlmsghdr *nlh;
+ int err;
+
+ nlh = ynl_gemsg_start_req(ys, GENL_ID_CTRL, CTRL_CMD_GETFAMILY, 1);
+ mnl_attr_put_strz(nlh, CTRL_ATTR_FAMILY_NAME, family_name);
+
+ err = mnl_socket_sendto(ys->sock, nlh, nlh->nlmsg_len);
+ if (err < 0) {
+ perr(ys, "failed to request socket family info");
+ return err;
+ }
+
+ err = mnl_socket_recvfrom(ys->sock, ys->rx_buf, MNL_SOCKET_BUFFER_SIZE);
+ if (err <= 0) {
+ perr(ys, "failed to receive the socket family info");
+ return err;
+ }
+ err = mnl_cb_run2(ys->rx_buf, err, ys->seq, ys->portid,
+ ynl_get_family_info_cb, &yarg,
+ ynl_cb_array, ARRAY_SIZE(ynl_cb_array));
+ if (err < 0) {
+ free(ys->mcast_groups);
+ perr(ys, "failed to receive the socket family info - no such family?");
+ return err;
+ }
+
+ return ynl_recv_ack(ys, err);
+}
+
+struct ynl_sock *
+ynl_sock_create(const struct ynl_family *yf, struct ynl_error *yse)
+{
+ struct ynl_sock *ys;
+ int one = 1;
+
+ ys = malloc(sizeof(*ys) + 2 * MNL_SOCKET_BUFFER_SIZE);
+ if (!ys)
+ return NULL;
+ memset(ys, 0, sizeof(*ys));
+
+ ys->family = yf;
+ ys->tx_buf = &ys->raw_buf[0];
+ ys->rx_buf = &ys->raw_buf[MNL_SOCKET_BUFFER_SIZE];
+ ys->ntf_last_next = &ys->ntf_first;
+
+ ys->sock = mnl_socket_open(NETLINK_GENERIC);
+ if (!ys->sock) {
+ __perr(yse, "failed to create a netlink socket");
+ goto err_free_sock;
+ }
+
+ if (mnl_socket_setsockopt(ys->sock, NETLINK_CAP_ACK,
+ &one, sizeof(one))) {
+ __perr(yse, "failed to enable netlink ACK");
+ goto err_close_sock;
+ }
+ if (mnl_socket_setsockopt(ys->sock, NETLINK_EXT_ACK,
+ &one, sizeof(one))) {
+ __perr(yse, "failed to enable netlink ext ACK");
+ goto err_close_sock;
+ }
+
+ ys->seq = random();
+ ys->portid = mnl_socket_get_portid(ys->sock);
+
+ if (ynl_sock_read_family(ys, yf->name)) {
+ if (yse)
+ memcpy(yse, &ys->err, sizeof(*yse));
+ goto err_close_sock;
+ }
+
+ return ys;
+
+err_close_sock:
+ mnl_socket_close(ys->sock);
+err_free_sock:
+ free(ys);
+ return NULL;
+}
+
+void ynl_sock_destroy(struct ynl_sock *ys)
+{
+ struct ynl_ntf_base_type *ntf;
+
+ mnl_socket_close(ys->sock);
+ while ((ntf = ynl_ntf_dequeue(ys)))
+ ynl_ntf_free(ntf);
+ free(ys->mcast_groups);
+ free(ys);
+}
+
+/* YNL multicast handling */
+
+void ynl_ntf_free(struct ynl_ntf_base_type *ntf)
+{
+ ntf->free(ntf);
+}
+
+int ynl_subscribe(struct ynl_sock *ys, const char *grp_name)
+{
+ unsigned int i;
+ int err;
+
+ for (i = 0; i < ys->n_mcast_groups; i++)
+ if (!strcmp(ys->mcast_groups[i].name, grp_name))
+ break;
+ if (i == ys->n_mcast_groups) {
+ yerr(ys, ENOENT, "Multicast group '%s' not found", grp_name);
+ return -1;
+ }
+
+ err = mnl_socket_setsockopt(ys->sock, NETLINK_ADD_MEMBERSHIP,
+ &ys->mcast_groups[i].id,
+ sizeof(ys->mcast_groups[i].id));
+ if (err < 0) {
+ perr(ys, "Subscribing to multicast group failed");
+ return -1;
+ }
+
+ return 0;
+}
+
+int ynl_socket_get_fd(struct ynl_sock *ys)
+{
+ return mnl_socket_get_fd(ys->sock);
+}
+
+struct ynl_ntf_base_type *ynl_ntf_dequeue(struct ynl_sock *ys)
+{
+ struct ynl_ntf_base_type *ntf;
+
+ if (!ynl_has_ntf(ys))
+ return NULL;
+
+ ntf = ys->ntf_first;
+ ys->ntf_first = ntf->next;
+ if (ys->ntf_last_next == &ntf->next)
+ ys->ntf_last_next = &ys->ntf_first;
+
+ return ntf;
+}
+
+static int ynl_ntf_parse(struct ynl_sock *ys, const struct nlmsghdr *nlh)
+{
+ struct ynl_parse_arg yarg = { .ys = ys, };
+ const struct ynl_ntf_info *info;
+ struct ynl_ntf_base_type *rsp;
+ struct genlmsghdr *gehdr;
+ int ret;
+
+ gehdr = mnl_nlmsg_get_payload(nlh);
+ if (gehdr->cmd >= ys->family->ntf_info_size)
+ return MNL_CB_ERROR;
+ info = &ys->family->ntf_info[gehdr->cmd];
+ if (!info->cb)
+ return MNL_CB_ERROR;
+
+ rsp = calloc(1, info->alloc_sz);
+ rsp->free = info->free;
+ yarg.data = rsp->data;
+ yarg.rsp_policy = info->policy;
+
+ ret = info->cb(nlh, &yarg);
+ if (ret <= MNL_CB_STOP)
+ goto err_free;
+
+ rsp->family = nlh->nlmsg_type;
+ rsp->cmd = gehdr->cmd;
+
+ *ys->ntf_last_next = rsp;
+ ys->ntf_last_next = &rsp->next;
+
+ return MNL_CB_OK;
+
+err_free:
+ info->free(rsp);
+ return MNL_CB_ERROR;
+}
+
+static int ynl_ntf_trampoline(const struct nlmsghdr *nlh, void *data)
+{
+ return ynl_ntf_parse((struct ynl_sock *)data, nlh);
+}
+
+int ynl_ntf_check(struct ynl_sock *ys)
+{
+ ssize_t len;
+ int err;
+
+ do {
+ /* libmnl doesn't let us pass flags to the recv to make
+ * it non-blocking so we need to poll() or peek() :|
+ */
+ struct pollfd pfd = { };
+
+ pfd.fd = mnl_socket_get_fd(ys->sock);
+ pfd.events = POLLIN;
+ err = poll(&pfd, 1, 1);
+ if (err < 1)
+ return err;
+
+ len = mnl_socket_recvfrom(ys->sock, ys->rx_buf,
+ MNL_SOCKET_BUFFER_SIZE);
+ if (len < 0)
+ return len;
+
+ err = mnl_cb_run2(ys->rx_buf, len, ys->seq, ys->portid,
+ ynl_ntf_trampoline, ys,
+ ynl_cb_array, NLMSG_MIN_TYPE);
+ if (err < 0)
+ return err;
+ } while (err > 0);
+
+ return 0;
+}
+
+/* YNL specific helpers used by the auto-generated code */
+
+struct ynl_dump_list_type *YNL_LIST_END = (void *)(0xb4d123);
+
+void ynl_error_unknown_notification(struct ynl_sock *ys, __u8 cmd)
+{
+ yerr(ys, YNL_ERROR_UNKNOWN_NTF,
+ "Unknown notification message type '%d'", cmd);
+}
+
+int ynl_error_parse(struct ynl_parse_arg *yarg, const char *msg)
+{
+ yerr(yarg->ys, YNL_ERROR_INV_RESP, "Error parsing response: %s", msg);
+ return MNL_CB_ERROR;
+}
+
+static int
+ynl_check_alien(struct ynl_sock *ys, const struct nlmsghdr *nlh, __u32 rsp_cmd)
+{
+ struct genlmsghdr *gehdr;
+
+ if (mnl_nlmsg_get_payload_len(nlh) < sizeof(*gehdr)) {
+ yerr(ys, YNL_ERROR_INV_RESP,
+ "Kernel responded with truncated message");
+ return -1;
+ }
+
+ gehdr = mnl_nlmsg_get_payload(nlh);
+ if (gehdr->cmd != rsp_cmd)
+ return ynl_ntf_parse(ys, nlh);
+
+ return 0;
+}
+
+static int ynl_req_trampoline(const struct nlmsghdr *nlh, void *data)
+{
+ struct ynl_req_state *yrs = data;
+ int ret;
+
+ ret = ynl_check_alien(yrs->yarg.ys, nlh, yrs->rsp_cmd);
+ if (ret)
+ return ret < 0 ? MNL_CB_ERROR : MNL_CB_OK;
+
+ return yrs->cb(nlh, &yrs->yarg);
+}
+
+int ynl_exec(struct ynl_sock *ys, struct nlmsghdr *req_nlh,
+ struct ynl_req_state *yrs)
+{
+ ssize_t len;
+ int err;
+
+ err = mnl_socket_sendto(ys->sock, req_nlh, req_nlh->nlmsg_len);
+ if (err < 0)
+ return err;
+
+ do {
+ len = mnl_socket_recvfrom(ys->sock, ys->rx_buf,
+ MNL_SOCKET_BUFFER_SIZE);
+ if (len < 0)
+ return len;
+
+ err = mnl_cb_run2(ys->rx_buf, len, ys->seq, ys->portid,
+ ynl_req_trampoline, yrs,
+ ynl_cb_array, NLMSG_MIN_TYPE);
+ if (err < 0)
+ return err;
+ } while (err > 0);
+
+ return 0;
+}
+
+static int ynl_dump_trampoline(const struct nlmsghdr *nlh, void *data)
+{
+ struct ynl_dump_state *ds = data;
+ struct ynl_dump_list_type *obj;
+ struct ynl_parse_arg yarg = {};
+ int ret;
+
+ ret = ynl_check_alien(ds->ys, nlh, ds->rsp_cmd);
+ if (ret)
+ return ret < 0 ? MNL_CB_ERROR : MNL_CB_OK;
+
+ obj = calloc(1, ds->alloc_sz);
+ if (!obj)
+ return MNL_CB_ERROR;
+
+ if (!ds->first)
+ ds->first = obj;
+ if (ds->last)
+ ds->last->next = obj;
+ ds->last = obj;
+
+ yarg.ys = ds->ys;
+ yarg.rsp_policy = ds->rsp_policy;
+ yarg.data = &obj->data;
+
+ return ds->cb(nlh, &yarg);
+}
+
+static void *ynl_dump_end(struct ynl_dump_state *ds)
+{
+ if (!ds->first)
+ return YNL_LIST_END;
+
+ ds->last->next = YNL_LIST_END;
+ return ds->first;
+}
+
+int ynl_exec_dump(struct ynl_sock *ys, struct nlmsghdr *req_nlh,
+ struct ynl_dump_state *yds)
+{
+ ssize_t len;
+ int err;
+
+ err = mnl_socket_sendto(ys->sock, req_nlh, req_nlh->nlmsg_len);
+ if (err < 0)
+ return err;
+
+ do {
+ len = mnl_socket_recvfrom(ys->sock, ys->rx_buf,
+ MNL_SOCKET_BUFFER_SIZE);
+ if (len < 0)
+ goto err_close_list;
+
+ err = mnl_cb_run2(ys->rx_buf, len, ys->seq, ys->portid,
+ ynl_dump_trampoline, yds,
+ ynl_cb_array, NLMSG_MIN_TYPE);
+ if (err < 0)
+ goto err_close_list;
+ } while (err > 0);
+
+ yds->first = ynl_dump_end(yds);
+ return 0;
+
+err_close_list:
+ yds->first = ynl_dump_end(yds);
+ return -1;
+}
diff --git a/tools/net/ynl/lib/ynl.h b/tools/net/ynl/lib/ynl.h
new file mode 100644
index 000000000000..9eafa3552c16
--- /dev/null
+++ b/tools/net/ynl/lib/ynl.h
@@ -0,0 +1,237 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+#ifndef __YNL_C_H
+#define __YNL_C_H 1
+
+#include <stddef.h>
+#include <libmnl/libmnl.h>
+#include <linux/genetlink.h>
+#include <linux/types.h>
+
+struct mnl_socket;
+struct nlmsghdr;
+
+/*
+ * User facing code
+ */
+
+struct ynl_ntf_base_type;
+struct ynl_ntf_info;
+struct ynl_sock;
+
+enum ynl_error_code {
+ YNL_ERROR_NONE = 0,
+ __YNL_ERRNO_END = 4096,
+ YNL_ERROR_INTERNAL,
+ YNL_ERROR_EXPECT_ACK,
+ YNL_ERROR_EXPECT_MSG,
+ YNL_ERROR_UNEXPECT_MSG,
+ YNL_ERROR_ATTR_MISSING,
+ YNL_ERROR_ATTR_INVALID,
+ YNL_ERROR_UNKNOWN_NTF,
+ YNL_ERROR_INV_RESP,
+};
+
+/**
+ * struct ynl_error - error encountered by YNL
+ * @code: errno (low values) or YNL error code (enum ynl_error_code)
+ * @attr_offs: offset of bad attribute (for very advanced users)
+ * @msg: error message
+ *
+ * Error information for when YNL operations fail.
+ * Users should interact with the err member of struct ynl_sock directly.
+ * The main exception to that rule is ynl_sock_create().
+ */
+struct ynl_error {
+ enum ynl_error_code code;
+ unsigned int attr_offs;
+ char msg[512];
+};
+
+/**
+ * struct ynl_family - YNL family info
+ * Family description generated by codegen. Pass to ynl_sock_create().
+ */
+struct ynl_family {
+/* private: */
+ const char *name;
+ const struct ynl_ntf_info *ntf_info;
+ unsigned int ntf_info_size;
+};
+
+/**
+ * struct ynl_sock - YNL wrapped netlink socket
+ * @err: YNL error descriptor, cleared on every request.
+ */
+struct ynl_sock {
+ struct ynl_error err;
+
+/* private: */
+ const struct ynl_family *family;
+ struct mnl_socket *sock;
+ __u32 seq;
+ __u32 portid;
+ __u16 family_id;
+
+ unsigned int n_mcast_groups;
+ struct {
+ unsigned int id;
+ char name[GENL_NAMSIZ];
+ } *mcast_groups;
+
+ struct ynl_ntf_base_type *ntf_first;
+ struct ynl_ntf_base_type **ntf_last_next;
+
+ struct nlmsghdr *nlh;
+ struct ynl_policy_nest *req_policy;
+ unsigned char *tx_buf;
+ unsigned char *rx_buf;
+ unsigned char raw_buf[];
+};
+
+struct ynl_sock *
+ynl_sock_create(const struct ynl_family *yf, struct ynl_error *e);
+void ynl_sock_destroy(struct ynl_sock *ys);
+
+#define ynl_dump_foreach(dump, iter) \
+ for (typeof(dump->obj) *iter = &dump->obj; \
+ !ynl_dump_obj_is_last(iter); \
+ iter = ynl_dump_obj_next(iter))
+
+int ynl_subscribe(struct ynl_sock *ys, const char *grp_name);
+int ynl_socket_get_fd(struct ynl_sock *ys);
+int ynl_ntf_check(struct ynl_sock *ys);
+
+/**
+ * ynl_has_ntf() - check if socket has *parsed* notifications
+ * @ys: active YNL socket
+ *
+ * Note that this does not take into account notifications sitting
+ * in netlink socket, just the notifications which have already been
+ * read and parsed (e.g. during a ynl_ntf_check() call).
+ */
+static inline bool ynl_has_ntf(struct ynl_sock *ys)
+{
+ return ys->ntf_last_next != &ys->ntf_first;
+}
+struct ynl_ntf_base_type *ynl_ntf_dequeue(struct ynl_sock *ys);
+
+void ynl_ntf_free(struct ynl_ntf_base_type *ntf);
+
+/*
+ * YNL internals / low level stuff
+ */
+
+/* Generic mnl helper code */
+
+enum ynl_policy_type {
+ YNL_PT_REJECT = 1,
+ YNL_PT_IGNORE,
+ YNL_PT_NEST,
+ YNL_PT_FLAG,
+ YNL_PT_BINARY,
+ YNL_PT_U8,
+ YNL_PT_U16,
+ YNL_PT_U32,
+ YNL_PT_U64,
+ YNL_PT_NUL_STR,
+};
+
+struct ynl_policy_attr {
+ enum ynl_policy_type type;
+ unsigned int len;
+ const char *name;
+ struct ynl_policy_nest *nest;
+};
+
+struct ynl_policy_nest {
+ unsigned int max_attr;
+ struct ynl_policy_attr *table;
+};
+
+struct ynl_parse_arg {
+ struct ynl_sock *ys;
+ struct ynl_policy_nest *rsp_policy;
+ void *data;
+};
+
+struct ynl_dump_list_type {
+ struct ynl_dump_list_type *next;
+ unsigned char data[] __attribute__ ((aligned (8)));
+};
+extern struct ynl_dump_list_type *YNL_LIST_END;
+
+static inline bool ynl_dump_obj_is_last(void *obj)
+{
+ unsigned long uptr = (unsigned long)obj;
+
+ uptr -= offsetof(struct ynl_dump_list_type, data);
+ return uptr == (unsigned long)YNL_LIST_END;
+}
+
+static inline void *ynl_dump_obj_next(void *obj)
+{
+ unsigned long uptr = (unsigned long)obj;
+ struct ynl_dump_list_type *list;
+
+ uptr -= offsetof(struct ynl_dump_list_type, data);
+ list = (void *)uptr;
+ uptr = (unsigned long)list->next;
+ uptr += offsetof(struct ynl_dump_list_type, data);
+
+ return (void *)uptr;
+}
+
+struct ynl_ntf_base_type {
+ __u16 family;
+ __u8 cmd;
+ struct ynl_ntf_base_type *next;
+ void (*free)(struct ynl_ntf_base_type *ntf);
+ unsigned char data[] __attribute__ ((aligned (8)));
+};
+
+extern mnl_cb_t ynl_cb_array[NLMSG_MIN_TYPE];
+
+struct nlmsghdr *
+ynl_gemsg_start_req(struct ynl_sock *ys, __u32 id, __u8 cmd, __u8 version);
+struct nlmsghdr *
+ynl_gemsg_start_dump(struct ynl_sock *ys, __u32 id, __u8 cmd, __u8 version);
+
+int ynl_attr_validate(struct ynl_parse_arg *yarg, const struct nlattr *attr);
+
+int ynl_recv_ack(struct ynl_sock *ys, int ret);
+int ynl_cb_null(const struct nlmsghdr *nlh, void *data);
+
+/* YNL specific helpers used by the auto-generated code */
+
+struct ynl_req_state {
+ struct ynl_parse_arg yarg;
+ mnl_cb_t cb;
+ __u32 rsp_cmd;
+};
+
+struct ynl_dump_state {
+ struct ynl_sock *ys;
+ struct ynl_policy_nest *rsp_policy;
+ void *first;
+ struct ynl_dump_list_type *last;
+ size_t alloc_sz;
+ mnl_cb_t cb;
+ __u32 rsp_cmd;
+};
+
+struct ynl_ntf_info {
+ struct ynl_policy_nest *policy;
+ mnl_cb_t cb;
+ size_t alloc_sz;
+ void (*free)(struct ynl_ntf_base_type *ntf);
+};
+
+int ynl_exec(struct ynl_sock *ys, struct nlmsghdr *req_nlh,
+ struct ynl_req_state *yrs);
+int ynl_exec_dump(struct ynl_sock *ys, struct nlmsghdr *req_nlh,
+ struct ynl_dump_state *yds);
+
+void ynl_error_unknown_notification(struct ynl_sock *ys, __u8 cmd);
+int ynl_error_parse(struct ynl_parse_arg *yarg, const char *msg);
+
+#endif
diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py
index 3144f33196be..3ca28d4bcb18 100644
--- a/tools/net/ynl/lib/ynl.py
+++ b/tools/net/ynl/lib/ynl.py
@@ -1,11 +1,15 @@
# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+from collections import namedtuple
import functools
import os
import random
import socket
import struct
+from struct import Struct
import yaml
+import ipaddress
+import uuid
from .nlspec import SpecFamily
@@ -76,10 +80,17 @@ class NlError(Exception):
class NlAttr:
- type_formats = { 'u8' : ('B', 1), 's8' : ('b', 1),
- 'u16': ('H', 2), 's16': ('h', 2),
- 'u32': ('I', 4), 's32': ('i', 4),
- 'u64': ('Q', 8), 's64': ('q', 8) }
+ ScalarFormat = namedtuple('ScalarFormat', ['native', 'big', 'little'])
+ type_formats = {
+ 'u8' : ScalarFormat(Struct('B'), Struct("B"), Struct("B")),
+ 's8' : ScalarFormat(Struct('b'), Struct("b"), Struct("b")),
+ 'u16': ScalarFormat(Struct('H'), Struct(">H"), Struct("<H")),
+ 's16': ScalarFormat(Struct('h'), Struct(">h"), Struct("<h")),
+ 'u32': ScalarFormat(Struct('I'), Struct(">I"), Struct("<I")),
+ 's32': ScalarFormat(Struct('i'), Struct(">i"), Struct("<i")),
+ 'u64': ScalarFormat(Struct('Q'), Struct(">Q"), Struct("<Q")),
+ 's64': ScalarFormat(Struct('q'), Struct(">q"), Struct("<q"))
+ }
def __init__(self, raw, offset):
self._len, self._type = struct.unpack("HH", raw[offset:offset + 4])
@@ -88,25 +99,31 @@ class NlAttr:
self.full_len = (self.payload_len + 3) & ~3
self.raw = raw[offset + 4:offset + self.payload_len]
- def format_byte_order(byte_order):
+ @classmethod
+ def get_format(cls, attr_type, byte_order=None):
+ format = cls.type_formats[attr_type]
if byte_order:
- return ">" if byte_order == "big-endian" else "<"
- return ""
-
- def as_u8(self):
- return struct.unpack("B", self.raw)[0]
-
- def as_u16(self, byte_order=None):
- endian = NlAttr.format_byte_order(byte_order)
- return struct.unpack(f"{endian}H", self.raw)[0]
-
- def as_u32(self, byte_order=None):
- endian = NlAttr.format_byte_order(byte_order)
- return struct.unpack(f"{endian}I", self.raw)[0]
+ return format.big if byte_order == "big-endian" \
+ else format.little
+ return format.native
+
+ @classmethod
+ def formatted_string(cls, raw, display_hint):
+ if display_hint == 'mac':
+ formatted = ':'.join('%02x' % b for b in raw)
+ elif display_hint == 'hex':
+ formatted = bytes.hex(raw, ' ')
+ elif display_hint in [ 'ipv4', 'ipv6' ]:
+ formatted = format(ipaddress.ip_address(raw))
+ elif display_hint == 'uuid':
+ formatted = str(uuid.UUID(bytes=raw))
+ else:
+ formatted = raw
+ return formatted
- def as_u64(self, byte_order=None):
- endian = NlAttr.format_byte_order(byte_order)
- return struct.unpack(f"{endian}Q", self.raw)[0]
+ def as_scalar(self, attr_type, byte_order=None):
+ format = self.get_format(attr_type, byte_order)
+ return format.unpack(self.raw)[0]
def as_strz(self):
return self.raw.decode('ascii')[:-1]
@@ -115,18 +132,24 @@ class NlAttr:
return self.raw
def as_c_array(self, type):
- format, _ = self.type_formats[type]
- return list({ x[0] for x in struct.iter_unpack(format, self.raw) })
+ format = self.get_format(type)
+ return [ x[0] for x in format.iter_unpack(self.raw) ]
def as_struct(self, members):
value = dict()
offset = 0
for m in members:
# TODO: handle non-scalar members
- format, size = self.type_formats[m.type]
- decoded = struct.unpack_from(format, self.raw, offset)
- offset += size
- value[m.name] = decoded[0]
+ if m.type == 'binary':
+ decoded = self.raw[offset:offset+m['len']]
+ offset += m['len']
+ elif m.type in NlAttr.type_formats:
+ format = self.get_format(m.type, m.byte_order)
+ [ decoded ] = format.unpack_from(self.raw, offset)
+ offset += format.size
+ if m.display_hint:
+ decoded = self.formatted_string(decoded, m.display_hint)
+ value[m.name] = decoded
return value
def __repr__(self):
@@ -184,11 +207,11 @@ class NlMsg:
if extack.type == Netlink.NLMSGERR_ATTR_MSG:
self.extack['msg'] = extack.as_strz()
elif extack.type == Netlink.NLMSGERR_ATTR_MISS_TYPE:
- self.extack['miss-type'] = extack.as_u32()
+ self.extack['miss-type'] = extack.as_scalar('u32')
elif extack.type == Netlink.NLMSGERR_ATTR_MISS_NEST:
- self.extack['miss-nest'] = extack.as_u32()
+ self.extack['miss-nest'] = extack.as_scalar('u32')
elif extack.type == Netlink.NLMSGERR_ATTR_OFFS:
- self.extack['bad-attr-offs'] = extack.as_u32()
+ self.extack['bad-attr-offs'] = extack.as_scalar('u32')
else:
if 'unknown' not in self.extack:
self.extack['unknown'] = []
@@ -272,11 +295,11 @@ def _genl_load_families():
fam = dict()
for attr in gm.raw_attrs:
if attr.type == Netlink.CTRL_ATTR_FAMILY_ID:
- fam['id'] = attr.as_u16()
+ fam['id'] = attr.as_scalar('u16')
elif attr.type == Netlink.CTRL_ATTR_FAMILY_NAME:
fam['name'] = attr.as_strz()
elif attr.type == Netlink.CTRL_ATTR_MAXATTR:
- fam['maxattr'] = attr.as_u32()
+ fam['maxattr'] = attr.as_scalar('u32')
elif attr.type == Netlink.CTRL_ATTR_MCAST_GROUPS:
fam['mcast'] = dict()
for entry in NlAttrs(attr.raw):
@@ -286,7 +309,7 @@ def _genl_load_families():
if entry_attr.type == Netlink.CTRL_ATTR_MCAST_GRP_NAME:
mcast_name = entry_attr.as_strz()
elif entry_attr.type == Netlink.CTRL_ATTR_MCAST_GRP_ID:
- mcast_id = entry_attr.as_u32()
+ mcast_id = entry_attr.as_scalar('u32')
if mcast_name and mcast_id is not None:
fam['mcast'][mcast_name] = mcast_id
if 'name' in fam and 'id' in fam:
@@ -304,9 +327,9 @@ class GenlMsg:
self.fixed_header_attrs = dict()
for m in fixed_header_members:
- format, size = NlAttr.type_formats[m.type]
- decoded = struct.unpack_from(format, nl_msg.raw, offset)
- offset += size
+ format = NlAttr.get_format(m.type, m.byte_order)
+ decoded = format.unpack_from(nl_msg.raw, offset)
+ offset += format.size
self.fixed_header_attrs[m.name] = decoded[0]
self.raw = nl_msg.raw[offset:]
@@ -381,32 +404,23 @@ class YnlFamily(SpecFamily):
attr_payload += self._add_attr(attr['nested-attributes'], subname, subvalue)
elif attr["type"] == 'flag':
attr_payload = b''
- elif attr["type"] == 'u8':
- attr_payload = struct.pack("B", int(value))
- elif attr["type"] == 'u16':
- endian = NlAttr.format_byte_order(attr.byte_order)
- attr_payload = struct.pack(f"{endian}H", int(value))
- elif attr["type"] == 'u32':
- endian = NlAttr.format_byte_order(attr.byte_order)
- attr_payload = struct.pack(f"{endian}I", int(value))
- elif attr["type"] == 'u64':
- endian = NlAttr.format_byte_order(attr.byte_order)
- attr_payload = struct.pack(f"{endian}Q", int(value))
elif attr["type"] == 'string':
attr_payload = str(value).encode('ascii') + b'\x00'
elif attr["type"] == 'binary':
- attr_payload = value
+ attr_payload = bytes.fromhex(value)
+ elif attr['type'] in NlAttr.type_formats:
+ format = NlAttr.get_format(attr['type'], attr.byte_order)
+ attr_payload = format.pack(int(value))
else:
raise Exception(f'Unknown type at {space} {name} {value} {attr["type"]}')
pad = b'\x00' * ((4 - len(attr_payload) % 4) % 4)
return struct.pack('HH', len(attr_payload) + 4, nl_type) + attr_payload + pad
- def _decode_enum(self, rsp, attr_spec):
- raw = rsp[attr_spec['name']]
+ def _decode_enum(self, raw, attr_spec):
enum = self.consts[attr_spec['enum']]
- i = attr_spec.get('value-start', 0)
if 'enum-as-flags' in attr_spec and attr_spec['enum-as-flags']:
+ i = 0
value = set()
while raw:
if raw & 1:
@@ -414,16 +428,22 @@ class YnlFamily(SpecFamily):
raw >>= 1
i += 1
else:
- value = enum.entries_by_val[raw - i].name
- rsp[attr_spec['name']] = value
+ value = enum.entries_by_val[raw].name
+ return value
def _decode_binary(self, attr, attr_spec):
if attr_spec.struct_name:
- decoded = attr.as_struct(self.consts[attr_spec.struct_name])
+ members = self.consts[attr_spec.struct_name]
+ decoded = attr.as_struct(members)
+ for m in members:
+ if m.enum:
+ decoded[m.name] = self._decode_enum(decoded[m.name], m)
elif attr_spec.sub_type:
decoded = attr.as_c_array(attr_spec.sub_type)
else:
decoded = attr.as_bin()
+ if attr_spec.display_hint:
+ decoded = NlAttr.formatted_string(decoded, attr_spec.display_hint)
return decoded
def _decode(self, attrs, space):
@@ -434,22 +454,19 @@ class YnlFamily(SpecFamily):
if attr_spec["type"] == 'nest':
subdict = self._decode(NlAttrs(attr.raw), attr_spec['nested-attributes'])
decoded = subdict
- elif attr_spec['type'] == 'u8':
- decoded = attr.as_u8()
- elif attr_spec['type'] == 'u16':
- decoded = attr.as_u16(attr_spec.byte_order)
- elif attr_spec['type'] == 'u32':
- decoded = attr.as_u32(attr_spec.byte_order)
- elif attr_spec['type'] == 'u64':
- decoded = attr.as_u64(attr_spec.byte_order)
elif attr_spec["type"] == 'string':
decoded = attr.as_strz()
elif attr_spec["type"] == 'binary':
decoded = self._decode_binary(attr, attr_spec)
elif attr_spec["type"] == 'flag':
decoded = True
+ elif attr_spec["type"] in NlAttr.type_formats:
+ decoded = attr.as_scalar(attr_spec['type'], attr_spec.byte_order)
else:
- raise Exception(f'Unknown {attr.type} {attr_spec["name"]} {attr_spec["type"]}')
+ raise Exception(f'Unknown {attr_spec["type"]} with name {attr_spec["name"]}')
+
+ if 'enum' in attr_spec:
+ decoded = self._decode_enum(decoded, attr_spec)
if not attr_spec.is_multi:
rsp[attr_spec['name']] = decoded
@@ -458,8 +475,6 @@ class YnlFamily(SpecFamily):
else:
rsp[attr_spec.name] = [decoded]
- if 'enum' in attr_spec:
- self._decode_enum(rsp, attr_spec)
return rsp
def _decode_extack_path(self, attrs, attr_set, offset, target):
@@ -554,9 +569,9 @@ class YnlFamily(SpecFamily):
if op.fixed_header:
fixed_header_members = self.consts[op.fixed_header].members
for m in fixed_header_members:
- value = vals.pop(m.name)
- format, _ = NlAttr.type_formats[m.type]
- msg += struct.pack(format, value)
+ value = vals.pop(m.name) if m.name in vals else 0
+ format = NlAttr.get_format(m.type, m.byte_order)
+ msg += format.pack(value)
for name, value in vals.items():
msg += self._add_attr(op.attr_set.name, name, value)
msg = _genl_msg_finalize(msg)
diff --git a/tools/net/ynl/samples/.gitignore b/tools/net/ynl/samples/.gitignore
new file mode 100644
index 000000000000..2aae60c4829f
--- /dev/null
+++ b/tools/net/ynl/samples/.gitignore
@@ -0,0 +1,3 @@
+ethtool
+devlink
+netdev
diff --git a/tools/net/ynl/samples/Makefile b/tools/net/ynl/samples/Makefile
new file mode 100644
index 000000000000..f2db8bb78309
--- /dev/null
+++ b/tools/net/ynl/samples/Makefile
@@ -0,0 +1,30 @@
+# SPDX-License-Identifier: GPL-2.0
+
+include ../Makefile.deps
+
+CC=gcc
+CFLAGS=-std=gnu11 -O2 -W -Wall -Wextra -Wno-unused-parameter -Wshadow \
+ -I../lib/ -I../generated/ -idirafter $(UAPI_PATH)
+ifeq ("$(DEBUG)","1")
+ CFLAGS += -g -fsanitize=address -fsanitize=leak -static-libasan
+endif
+
+LDLIBS=-lmnl ../lib/ynl.a ../generated/protos.a
+
+SRCS=$(wildcard *.c)
+BINS=$(patsubst %.c,%,${SRCS})
+
+include $(wildcard *.d)
+
+all: $(BINS)
+
+$(BINS): ../lib/ynl.a ../generated/protos.a
+
+clean:
+ rm -f *.o *.d *~
+
+hardclean: clean
+ rm -f $(BINS)
+
+.PHONY: all clean
+.DEFAULT_GOAL=all
diff --git a/tools/net/ynl/samples/devlink.c b/tools/net/ynl/samples/devlink.c
new file mode 100644
index 000000000000..d2611d7ebab4
--- /dev/null
+++ b/tools/net/ynl/samples/devlink.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <string.h>
+
+#include <ynl.h>
+
+#include "devlink-user.h"
+
+int main(int argc, char **argv)
+{
+ struct devlink_get_list *devs;
+ struct ynl_sock *ys;
+
+ ys = ynl_sock_create(&ynl_devlink_family, NULL);
+ if (!ys)
+ return 1;
+
+ devs = devlink_get_dump(ys);
+ if (!devs)
+ goto err_close;
+
+ ynl_dump_foreach(devs, d) {
+ struct devlink_info_get_req *info_req;
+ struct devlink_info_get_rsp *info_rsp;
+
+ printf("%s/%s:\n", d->bus_name, d->dev_name);
+
+ info_req = devlink_info_get_req_alloc();
+ devlink_info_get_req_set_bus_name(info_req, d->bus_name);
+ devlink_info_get_req_set_dev_name(info_req, d->dev_name);
+
+ info_rsp = devlink_info_get(ys, info_req);
+ devlink_info_get_req_free(info_req);
+ if (!info_rsp)
+ goto err_free_devs;
+
+ if (info_rsp->_present.info_driver_name_len)
+ printf(" driver: %s\n", info_rsp->info_driver_name);
+ if (info_rsp->n_info_version_running)
+ printf(" running fw:\n");
+ for (unsigned i = 0; i < info_rsp->n_info_version_running; i++)
+ printf(" %s: %s\n",
+ info_rsp->info_version_running[i].info_version_name,
+ info_rsp->info_version_running[i].info_version_value);
+ printf(" ...\n");
+ devlink_info_get_rsp_free(info_rsp);
+ }
+ devlink_get_list_free(devs);
+
+ ynl_sock_destroy(ys);
+
+ return 0;
+
+err_free_devs:
+ devlink_get_list_free(devs);
+err_close:
+ fprintf(stderr, "YNL: %s\n", ys->err.msg);
+ ynl_sock_destroy(ys);
+ return 2;
+}
diff --git a/tools/net/ynl/samples/ethtool.c b/tools/net/ynl/samples/ethtool.c
new file mode 100644
index 000000000000..a7ebbd1b98db
--- /dev/null
+++ b/tools/net/ynl/samples/ethtool.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <string.h>
+
+#include <ynl.h>
+
+#include <net/if.h>
+
+#include "ethtool-user.h"
+
+int main(int argc, char **argv)
+{
+ struct ethtool_channels_get_req_dump creq = {};
+ struct ethtool_rings_get_req_dump rreq = {};
+ struct ethtool_channels_get_list *channels;
+ struct ethtool_rings_get_list *rings;
+ struct ynl_sock *ys;
+
+ ys = ynl_sock_create(&ynl_ethtool_family, NULL);
+ if (!ys)
+ return 1;
+
+ creq._present.header = 1; /* ethtool needs an empty nest, sigh */
+ channels = ethtool_channels_get_dump(ys, &creq);
+ if (!channels)
+ goto err_close;
+
+ printf("Channels:\n");
+ ynl_dump_foreach(channels, dev) {
+ printf(" %8s: ", dev->header.dev_name);
+ if (dev->_present.rx_count)
+ printf("rx %d ", dev->rx_count);
+ if (dev->_present.tx_count)
+ printf("tx %d ", dev->tx_count);
+ if (dev->_present.combined_count)
+ printf("combined %d ", dev->combined_count);
+ printf("\n");
+ }
+ ethtool_channels_get_list_free(channels);
+
+ rreq._present.header = 1; /* ethtool needs an empty nest.. */
+ rings = ethtool_rings_get_dump(ys, &rreq);
+ if (!rings)
+ goto err_close;
+
+ printf("Rings:\n");
+ ynl_dump_foreach(rings, dev) {
+ printf(" %8s: ", dev->header.dev_name);
+ if (dev->_present.rx)
+ printf("rx %d ", dev->rx);
+ if (dev->_present.tx)
+ printf("tx %d ", dev->tx);
+ printf("\n");
+ }
+ ethtool_rings_get_list_free(rings);
+
+ ynl_sock_destroy(ys);
+
+ return 0;
+
+err_close:
+ fprintf(stderr, "YNL (%d): %s\n", ys->err.code, ys->err.msg);
+ ynl_sock_destroy(ys);
+ return 2;
+}
diff --git a/tools/net/ynl/samples/netdev.c b/tools/net/ynl/samples/netdev.c
new file mode 100644
index 000000000000..d31268aa47c5
--- /dev/null
+++ b/tools/net/ynl/samples/netdev.c
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <string.h>
+
+#include <ynl.h>
+
+#include <net/if.h>
+
+#include "netdev-user.h"
+
+/* netdev genetlink family code sample
+ * This sample shows off basics of the netdev family but also notification
+ * handling, hence the somewhat odd UI. We subscribe to notifications first
+ * then wait for ifc selection, so the socket may already accumulate
+ * notifications as we wait. This allows us to test that YNL can handle
+ * requests and notifications getting interleaved.
+ */
+
+static void netdev_print_device(struct netdev_dev_get_rsp *d, unsigned int op)
+{
+ char ifname[IF_NAMESIZE];
+ const char *name;
+
+ if (!d->_present.ifindex)
+ return;
+
+ name = if_indextoname(d->ifindex, ifname);
+ if (name)
+ printf("%8s", name);
+ printf("[%d]\t", d->ifindex);
+
+ if (!d->_present.xdp_features)
+ return;
+
+ printf("%llx:", d->xdp_features);
+ for (int i = 0; d->xdp_features > 1U << i; i++) {
+ if (d->xdp_features & (1U << i))
+ printf(" %s", netdev_xdp_act_str(1 << i));
+ }
+
+ name = netdev_op_str(op);
+ if (name)
+ printf(" (ntf: %s)", name);
+ printf("\n");
+}
+
+int main(int argc, char **argv)
+{
+ struct netdev_dev_get_list *devs;
+ struct ynl_ntf_base_type *ntf;
+ struct ynl_error yerr;
+ struct ynl_sock *ys;
+ int ifindex = 0;
+
+ if (argc > 1)
+ ifindex = strtol(argv[1], NULL, 0);
+
+ ys = ynl_sock_create(&ynl_netdev_family, &yerr);
+ if (!ys) {
+ fprintf(stderr, "YNL: %s\n", yerr.msg);
+ return 1;
+ }
+
+ if (ynl_subscribe(ys, "mgmt"))
+ goto err_close;
+
+ printf("Select ifc ($ifindex; or 0 = dump; or -2 ntf check): ");
+ scanf("%d", &ifindex);
+
+ if (ifindex > 0) {
+ struct netdev_dev_get_req *req;
+ struct netdev_dev_get_rsp *d;
+
+ req = netdev_dev_get_req_alloc();
+ netdev_dev_get_req_set_ifindex(req, ifindex);
+
+ d = netdev_dev_get(ys, req);
+ netdev_dev_get_req_free(req);
+ if (!d)
+ goto err_close;
+
+ netdev_print_device(d, 0);
+ netdev_dev_get_rsp_free(d);
+ } else if (!ifindex) {
+ devs = netdev_dev_get_dump(ys);
+ if (!devs)
+ goto err_close;
+
+ ynl_dump_foreach(devs, d)
+ netdev_print_device(d, 0);
+ netdev_dev_get_list_free(devs);
+ } else if (ifindex == -2) {
+ ynl_ntf_check(ys);
+ }
+ while ((ntf = ynl_ntf_dequeue(ys))) {
+ netdev_print_device((struct netdev_dev_get_rsp *)&ntf->data,
+ ntf->cmd);
+ ynl_ntf_free(ntf);
+ }
+
+ ynl_sock_destroy(ys);
+ return 0;
+
+err_close:
+ fprintf(stderr, "YNL: %s\n", ys->err.msg);
+ ynl_sock_destroy(ys);
+ return 2;
+}
diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py
index cc2f8c945340..71c5e79e877f 100755
--- a/tools/net/ynl/ynl-gen-c.py
+++ b/tools/net/ynl/ynl-gen-c.py
@@ -4,6 +4,7 @@
import argparse
import collections
import os
+import re
import yaml
from lib import SpecFamily, SpecAttrSet, SpecAttr, SpecOperation, SpecEnumSet, SpecEnumEntry
@@ -48,6 +49,11 @@ class Type(SpecAttr):
else:
self.nested_render_name = f"{family.name}_{c_lower(self.nested_attrs)}"
+ if self.nested_attrs in self.family.consts:
+ self.nested_struct_type = 'struct ' + self.nested_render_name + '_'
+ else:
+ self.nested_struct_type = 'struct ' + self.nested_render_name
+
self.c_name = c_lower(self.name)
if self.c_name in _C_KW:
self.c_name += '_'
@@ -57,8 +63,11 @@ class Type(SpecAttr):
delattr(self, "enum_name")
def resolve(self):
- self.enum_name = f"{self.attr_set.name_prefix}{self.name}"
- self.enum_name = c_upper(self.enum_name)
+ if 'name-prefix' in self.attr:
+ enum_name = f"{self.attr['name-prefix']}{self.name}"
+ else:
+ enum_name = f"{self.attr_set.name_prefix}{self.name}"
+ self.enum_name = c_upper(enum_name)
def is_multi_val(self):
return None
@@ -94,7 +103,10 @@ class Type(SpecAttr):
def arg_member(self, ri):
member = self._complex_member_type(ri)
if member:
- return [member + ' *' + self.c_name]
+ arg = [member + ' *' + self.c_name]
+ if self.presence_type() == 'count':
+ arg += ['unsigned int n_' + self.c_name]
+ return arg
raise Exception(f"Struct member not implemented for class type {self.type}")
def struct_member(self, ri):
@@ -150,7 +162,7 @@ class Type(SpecAttr):
init_lines = [init_lines]
kw = 'if' if first else 'else if'
- ri.cw.block_start(line=f"{kw} (mnl_attr_get_type(attr) == {self.enum_name})")
+ ri.cw.block_start(line=f"{kw} (type == {self.enum_name})")
if local_vars:
for local in local_vars:
ri.cw.p(local)
@@ -170,6 +182,7 @@ class Type(SpecAttr):
for line in lines:
ri.cw.p(line)
ri.cw.block_end()
+ return True
def _setter_lines(self, ri, member, presence):
raise Exception(f"Setter not implemented for class type {self.type}")
@@ -187,9 +200,12 @@ class Type(SpecAttr):
code.append(presence + ' = 1;')
code += self._setter_lines(ri, member, presence)
- ri.cw.write_func('static inline void',
- f"{op_prefix(ri, direction, deref=deref)}_set_{'_'.join(ref)}",
- body=code,
+ func_name = f"{op_prefix(ri, direction, deref=deref)}_set_{'_'.join(ref)}"
+ free = bool([x for x in code if 'free(' in x])
+ alloc = bool([x for x in code if 'alloc(' in x])
+ if free and not alloc:
+ func_name = '__' + func_name
+ ri.cw.write_func('static inline void', func_name, body=code,
args=[f'{type_name(ri, direction, deref=deref)} *{var}'] + self.arg_member(ri))
@@ -197,6 +213,12 @@ class TypeUnused(Type):
def presence_type(self):
return ''
+ def arg_member(self, ri):
+ return []
+
+ def _attr_get(self, ri, var):
+ return ['return MNL_CB_ERROR;'], None, None
+
def _attr_typol(self):
return '.type = YNL_PT_REJECT, '
@@ -208,12 +230,24 @@ class TypePad(Type):
def presence_type(self):
return ''
+ def arg_member(self, ri):
+ return []
+
def _attr_typol(self):
- return '.type = YNL_PT_REJECT, '
+ return '.type = YNL_PT_IGNORE, '
+
+ def attr_put(self, ri, var):
+ pass
+
+ def attr_get(self, ri, var, first):
+ pass
def attr_policy(self, cw):
pass
+ def setter(self, ri, space, direction, deref=False, ref=None):
+ pass
+
class TypeScalar(Type):
def __init__(self, family, attr_set, attr, value):
@@ -239,7 +273,8 @@ class TypeScalar(Type):
else:
self.is_bitfield = False
- if 'enum' in self.attr and not self.is_bitfield:
+ maybe_enum = not self.is_bitfield and 'enum' in self.attr
+ if maybe_enum and self.family.consts[self.attr['enum']].enum_name:
self.type_name = f"enum {self.family.name}_{c_lower(self.attr['enum'])}"
else:
self.type_name = '__' + self.type
@@ -265,8 +300,10 @@ class TypeScalar(Type):
return f"NLA_POLICY_MIN({policy}, {self.checks['min']})"
elif 'enum' in self.attr:
enum = self.family.consts[self.attr['enum']]
- cnt = len(enum['entries'])
- return f"NLA_POLICY_MAX({policy}, {cnt - 1})"
+ low, high = enum.value_range()
+ if low == 0:
+ return f"NLA_POLICY_MAX({policy}, {high})"
+ return f"NLA_POLICY_RANGE({policy}, {low}, {high})"
return super()._attr_policy(policy)
def _attr_typol(self):
@@ -395,7 +432,7 @@ class TypeBinary(Type):
class TypeNest(Type):
def _complex_member_type(self, ri):
- return f"struct {self.nested_render_name}"
+ return self.nested_struct_type
def free(self, ri, var, ref):
ri.cw.p(f'{self.nested_render_name}_free(&{var}->{ref}{self.c_name});')
@@ -411,7 +448,8 @@ class TypeNest(Type):
f"{self.enum_name}, &{var}->{self.c_name})")
def _attr_get(self, ri, var):
- get_lines = [f"{self.nested_render_name}_parse(&parg, attr);"]
+ get_lines = [f"if ({self.nested_render_name}_parse(&parg, attr))",
+ "return MNL_CB_ERROR;"]
init_lines = [f"parg.rsp_policy = &{self.nested_render_name}_nest;",
f"parg.data = &{var}->{self.c_name};"]
return get_lines, init_lines, None
@@ -424,15 +462,27 @@ class TypeNest(Type):
class TypeMultiAttr(Type):
+ def __init__(self, family, attr_set, attr, value, base_type):
+ super().__init__(family, attr_set, attr, value)
+
+ self.base_type = base_type
+
def is_multi_val(self):
return True
def presence_type(self):
return 'count'
+ def _mnl_type(self):
+ t = self.type
+ # mnl does not have a helper for signed types
+ if t[0] == 's':
+ t = 'u' + t[1:]
+ return t
+
def _complex_member_type(self, ri):
if 'type' not in self.attr or self.attr['type'] == 'nest':
- return f"struct {self.nested_render_name}"
+ return self.nested_struct_type
elif self.attr['type'] in scalars:
scalar_pfx = '__' if ri.ku_space == 'user' else ''
return scalar_pfx + self.attr['type']
@@ -443,20 +493,42 @@ class TypeMultiAttr(Type):
return 'type' not in self.attr or self.attr['type'] == 'nest'
def free(self, ri, var, ref):
- if 'type' not in self.attr or self.attr['type'] == 'nest':
+ if self.attr['type'] in scalars:
+ ri.cw.p(f"free({var}->{ref}{self.c_name});")
+ elif 'type' not in self.attr or self.attr['type'] == 'nest':
ri.cw.p(f"for (i = 0; i < {var}->{ref}n_{self.c_name}; i++)")
ri.cw.p(f'{self.nested_render_name}_free(&{var}->{ref}{self.c_name}[i]);')
+ ri.cw.p(f"free({var}->{ref}{self.c_name});")
+ else:
+ raise Exception(f"Free of MultiAttr sub-type {self.attr['type']} not supported yet")
+
+ def _attr_policy(self, policy):
+ return self.base_type._attr_policy(policy)
def _attr_typol(self):
- if 'type' not in self.attr or self.attr['type'] == 'nest':
- return f'.type = YNL_PT_NEST, .nest = &{self.nested_render_name}_nest, '
- elif self.attr['type'] in scalars:
- return f".type = YNL_PT_U{self.attr['type'][1:]}, "
- else:
- raise Exception(f"Sub-type {self.attr['type']} not supported yet")
+ return self.base_type._attr_typol()
def _attr_get(self, ri, var):
- return f'{var}->n_{self.c_name}++;', None, None
+ return f'n_{self.c_name}++;', None, None
+
+ def attr_put(self, ri, var):
+ if self.attr['type'] in scalars:
+ put_type = self._mnl_type()
+ ri.cw.p(f"for (unsigned int i = 0; i < {var}->n_{self.c_name}; i++)")
+ ri.cw.p(f"mnl_attr_put_{put_type}(nlh, {self.enum_name}, {var}->{self.c_name}[i]);")
+ elif 'type' not in self.attr or self.attr['type'] == 'nest':
+ ri.cw.p(f"for (unsigned int i = 0; i < {var}->n_{self.c_name}; i++)")
+ self._attr_put_line(ri, var, f"{self.nested_render_name}_put(nlh, " +
+ f"{self.enum_name}, &{var}->{self.c_name}[i])")
+ else:
+ raise Exception(f"Put of MultiAttr sub-type {self.attr['type']} not supported yet")
+
+ def _setter_lines(self, ri, member, presence):
+ # For multi-attr we have a count, not presence, hack up the presence
+ presence = presence[:-(len('_present.') + len(self.c_name))] + "n_" + self.c_name
+ return [f"free({member});",
+ f"{member} = {self.c_name};",
+ f"{presence} = n_{self.c_name};"]
class TypeArrayNest(Type):
@@ -468,7 +540,7 @@ class TypeArrayNest(Type):
def _complex_member_type(self, ri):
if 'sub-type' not in self.attr or self.attr['sub-type'] == 'nest':
- return f"struct {self.nested_render_name}"
+ return self.nested_struct_type
elif self.attr['sub-type'] in scalars:
scalar_pfx = '__' if ri.ku_space == 'user' else ''
return scalar_pfx + self.attr['sub-type']
@@ -488,7 +560,7 @@ class TypeArrayNest(Type):
class TypeNestTypeValue(Type):
def _complex_member_type(self, ri):
- return f"struct {self.nested_render_name}"
+ return self.nested_struct_type
def _attr_typol(self):
return f'.type = YNL_PT_NEST, .nest = &{self.nested_render_name}_nest, '
@@ -531,6 +603,8 @@ class Struct:
else:
self.render_name = f"{family.name}_{c_lower(space_name)}"
self.struct_name = 'struct ' + self.render_name
+ if self.nested and space_name in family.consts:
+ self.struct_name += '_'
self.ptr_name = self.struct_name + ' *'
self.request = False
@@ -591,7 +665,14 @@ class EnumEntry(SpecEnumEntry):
class EnumSet(SpecEnumSet):
def __init__(self, family, yaml):
self.render_name = c_lower(family.name + '-' + yaml['name'])
- self.enum_name = 'enum ' + self.render_name
+
+ if 'enum-name' in yaml:
+ if yaml['enum-name']:
+ self.enum_name = 'enum ' + c_lower(yaml['enum-name'])
+ else:
+ self.enum_name = None
+ else:
+ self.enum_name = 'enum ' + self.render_name
self.value_pfx = yaml.get('name-prefix', f"{family.name}-{yaml['name']}-")
@@ -600,6 +681,15 @@ class EnumSet(SpecEnumSet):
def new_entry(self, entry, prev_entry, value_start):
return EnumEntry(self, entry, prev_entry, value_start)
+ def value_range(self):
+ low = min([x.value for x in self.entries.values()])
+ high = max([x.value for x in self.entries.values()])
+
+ if high - low + 1 != len(self.entries):
+ raise Exception("Can't get value range for a noncontiguous enum")
+
+ return low, high
+
class AttrSet(SpecAttrSet):
def __init__(self, family, yaml):
@@ -630,42 +720,44 @@ class AttrSet(SpecAttrSet):
self.c_name = ''
def new_attr(self, elem, value):
- if 'multi-attr' in elem and elem['multi-attr']:
- return TypeMultiAttr(self.family, self, elem, value)
- elif elem['type'] in scalars:
- return TypeScalar(self.family, self, elem, value)
+ if elem['type'] in scalars:
+ t = TypeScalar(self.family, self, elem, value)
elif elem['type'] == 'unused':
- return TypeUnused(self.family, self, elem, value)
+ t = TypeUnused(self.family, self, elem, value)
elif elem['type'] == 'pad':
- return TypePad(self.family, self, elem, value)
+ t = TypePad(self.family, self, elem, value)
elif elem['type'] == 'flag':
- return TypeFlag(self.family, self, elem, value)
+ t = TypeFlag(self.family, self, elem, value)
elif elem['type'] == 'string':
- return TypeString(self.family, self, elem, value)
+ t = TypeString(self.family, self, elem, value)
elif elem['type'] == 'binary':
- return TypeBinary(self.family, self, elem, value)
+ t = TypeBinary(self.family, self, elem, value)
elif elem['type'] == 'nest':
- return TypeNest(self.family, self, elem, value)
+ t = TypeNest(self.family, self, elem, value)
elif elem['type'] == 'array-nest':
- return TypeArrayNest(self.family, self, elem, value)
+ t = TypeArrayNest(self.family, self, elem, value)
elif elem['type'] == 'nest-type-value':
- return TypeNestTypeValue(self.family, self, elem, value)
+ t = TypeNestTypeValue(self.family, self, elem, value)
else:
raise Exception(f"No typed class for type {elem['type']}")
+ if 'multi-attr' in elem and elem['multi-attr']:
+ t = TypeMultiAttr(self.family, self, elem, value, t)
+
+ return t
+
class Operation(SpecOperation):
def __init__(self, family, yaml, req_value, rsp_value):
super().__init__(family, yaml, req_value, rsp_value)
- if req_value != rsp_value:
- raise Exception("Directional messages not supported by codegen")
-
self.render_name = family.name + '_' + c_lower(self.name)
self.dual_policy = ('do' in yaml and 'request' in yaml['do']) and \
('dump' in yaml and 'request' in yaml['dump'])
+ self.has_ntf = False
+
# Added by resolve:
self.enum_name = None
delattr(self, "enum_name")
@@ -678,16 +770,12 @@ class Operation(SpecOperation):
else:
self.enum_name = self.family.async_op_prefix + c_upper(self.name)
- def add_notification(self, op):
- if 'notify' not in self.yaml:
- self.yaml['notify'] = dict()
- self.yaml['notify']['reply'] = self.yaml['do']['reply']
- self.yaml['notify']['cmds'] = []
- self.yaml['notify']['cmds'].append(op)
+ def mark_has_ntf(self):
+ self.has_ntf = True
class Family(SpecFamily):
- def __init__(self, file_name):
+ def __init__(self, file_name, exclude_ops):
# Added by resolve:
self.c_name = None
delattr(self, "c_name")
@@ -702,7 +790,7 @@ class Family(SpecFamily):
self.hooks = None
delattr(self, "hooks")
- super().__init__(file_name)
+ super().__init__(file_name, exclude_ops=exclude_ops)
self.fam_key = c_upper(self.yaml.get('c-family-name', self.yaml["name"] + '_FAMILY_NAME'))
self.ver_key = c_upper(self.yaml.get('c-version-name', self.yaml["name"] + '_FAMILY_VERSION'))
@@ -745,14 +833,12 @@ class Family(SpecFamily):
self.root_sets = dict()
# dict space-name -> set('request', 'reply')
self.pure_nested_structs = dict()
- self.all_notify = dict()
+ self._mark_notify()
self._mock_up_events()
- self._dictify()
self._load_root_sets()
self._load_nested_sets()
- self._load_all_notify()
self._load_hooks()
self.kernel_policy = self.yaml.get('kernel-policy', 'split')
@@ -768,6 +854,11 @@ class Family(SpecFamily):
def new_operation(self, elem, req_value, rsp_value):
return Operation(self, elem, req_value, rsp_value)
+ def _mark_notify(self):
+ for op in self.msgs.values():
+ if 'notify' in op:
+ self.ops[op['notify']].mark_has_ntf()
+
# Fake a 'do' equivalent of all events, so that we can render their response parsing
def _mock_up_events(self):
for op in self.yaml['operations']['list']:
@@ -778,16 +869,8 @@ class Family(SpecFamily):
}
}
- def _dictify(self):
- ntf = []
- for msg in self.msgs.values():
- if 'notify' in msg:
- ntf.append(msg)
- for n in ntf:
- self.ops[n['notify']].add_notification(n)
-
def _load_root_sets(self):
- for op_name, op in self.ops.items():
+ for op_name, op in self.msgs.items():
if 'attribute-set' not in op:
continue
@@ -798,6 +881,8 @@ class Family(SpecFamily):
req_attrs.update(set(op[op_mode]['request']['attributes']))
if op_mode in op and 'reply' in op[op_mode]:
rsp_attrs.update(set(op[op_mode]['reply']['attributes']))
+ if 'event' in op:
+ rsp_attrs.update(set(op['event']['attributes']))
if op['attribute-set'] not in self.root_sets:
self.root_sets[op['attribute-set']] = {'request': req_attrs, 'reply': rsp_attrs}
@@ -806,33 +891,73 @@ class Family(SpecFamily):
self.root_sets[op['attribute-set']]['reply'].update(rsp_attrs)
def _load_nested_sets(self):
+ attr_set_queue = list(self.root_sets.keys())
+ attr_set_seen = set(self.root_sets.keys())
+
+ while len(attr_set_queue):
+ a_set = attr_set_queue.pop(0)
+ for attr, spec in self.attr_sets[a_set].items():
+ if 'nested-attributes' not in spec:
+ continue
+
+ nested = spec['nested-attributes']
+ if nested not in attr_set_seen:
+ attr_set_queue.append(nested)
+ attr_set_seen.add(nested)
+
+ inherit = set()
+ if nested not in self.root_sets:
+ if nested not in self.pure_nested_structs:
+ self.pure_nested_structs[nested] = Struct(self, nested, inherited=inherit)
+ else:
+ raise Exception(f'Using attr set as root and nested not supported - {nested}')
+
+ if 'type-value' in spec:
+ if nested in self.root_sets:
+ raise Exception("Inheriting members to a space used as root not supported")
+ inherit.update(set(spec['type-value']))
+ elif spec['type'] == 'array-nest':
+ inherit.add('idx')
+ self.pure_nested_structs[nested].set_inherited(inherit)
+
for root_set, rs_members in self.root_sets.items():
for attr, spec in self.attr_sets[root_set].items():
if 'nested-attributes' in spec:
- inherit = set()
nested = spec['nested-attributes']
- if nested not in self.root_sets:
- self.pure_nested_structs[nested] = Struct(self, nested, inherited=inherit)
if attr in rs_members['request']:
self.pure_nested_structs[nested].request = True
if attr in rs_members['reply']:
self.pure_nested_structs[nested].reply = True
- if 'type-value' in spec:
- if nested in self.root_sets:
- raise Exception("Inheriting members to a space used as root not supported")
- inherit.update(set(spec['type-value']))
- elif spec['type'] == 'array-nest':
- inherit.add('idx')
- self.pure_nested_structs[nested].set_inherited(inherit)
-
- def _load_all_notify(self):
- for op_name, op in self.ops.items():
- if not op:
- continue
-
- if 'notify' in op:
- self.all_notify[op_name] = op['notify']['cmds']
+ # Try to reorder according to dependencies
+ pns_key_list = list(self.pure_nested_structs.keys())
+ pns_key_seen = set()
+ rounds = len(pns_key_list)**2 # it's basically bubble sort
+ for _ in range(rounds):
+ if len(pns_key_list) == 0:
+ break
+ name = pns_key_list.pop(0)
+ finished = True
+ for _, spec in self.attr_sets[name].items():
+ if 'nested-attributes' in spec:
+ if spec['nested-attributes'] not in pns_key_seen:
+ # Dicts are sorted, this will make struct last
+ struct = self.pure_nested_structs.pop(name)
+ self.pure_nested_structs[name] = struct
+ finished = False
+ break
+ if finished:
+ pns_key_seen.add(name)
+ else:
+ pns_key_list.append(name)
+ # Propagate the request / reply
+ for attr_set, struct in reversed(self.pure_nested_structs.items()):
+ for _, spec in self.attr_sets[attr_set].items():
+ if 'nested-attributes' in spec:
+ child = self.pure_nested_structs.get(spec['nested-attributes'])
+ if child:
+ child.request |= struct.request
+ child.reply |= struct.reply
def _load_global_policy(self):
global_set = set()
@@ -874,33 +999,38 @@ class Family(SpecFamily):
class RenderInfo:
- def __init__(self, cw, family, ku_space, op, op_name, op_mode, attr_set=None):
+ def __init__(self, cw, family, ku_space, op, op_mode, attr_set=None):
self.family = family
self.nl = cw.nlib
self.ku_space = ku_space
- self.op = op
- self.op_name = op_name
self.op_mode = op_mode
+ self.op = op
# 'do' and 'dump' response parsing is identical
- if op_mode != 'do' and 'dump' in op and 'do' in op and 'reply' in op['do'] and \
- op["do"]["reply"] == op["dump"]["reply"]:
- self.type_consistent = True
- else:
- self.type_consistent = op_mode == 'event'
+ self.type_consistent = True
+ if op_mode != 'do' and 'dump' in op and 'do' in op:
+ if ('reply' in op['do']) != ('reply' in op["dump"]):
+ self.type_consistent = False
+ elif 'reply' in op['do'] and op["do"]["reply"] != op["dump"]["reply"]:
+ self.type_consistent = False
self.attr_set = attr_set
if not self.attr_set:
self.attr_set = op['attribute-set']
+ self.type_name_conflict = False
if op:
- self.type_name = c_lower(op_name)
+ self.type_name = c_lower(op.name)
else:
self.type_name = c_lower(attr_set)
+ if attr_set in family.consts:
+ self.type_name_conflict = True
self.cw = cw
self.struct = dict()
+ if op_mode == 'notify':
+ op_mode = 'do'
for op_dir in ['request', 'reply']:
if op and op_dir in op[op_mode]:
self.struct[op_dir] = Struct(family, self.attr_set,
@@ -914,6 +1044,7 @@ class CodeWriter:
self.nlib = nlib
self._nl = False
+ self._block_end = False
self._silent_block = False
self._ind = 0
self._out = out_file
@@ -923,9 +1054,17 @@ class CodeWriter:
return line.startswith('if') or line.startswith('while') or line.startswith('for')
def p(self, line, add_ind=0):
+ if self._block_end:
+ self._block_end = False
+ if line.startswith('else'):
+ line = '} ' + line
+ else:
+ self._out.write('\t' * self._ind + '}\n')
+
if self._nl:
self._out.write('\n')
self._nl = False
+
ind = self._ind
if line[-1] == ':':
ind -= 1
@@ -949,7 +1088,14 @@ class CodeWriter:
if line and line[0] not in {';', ','}:
line = ' ' + line
self._ind -= 1
- self.p('}' + line)
+ self._nl = False
+ if not line:
+ # Delay printing closing bracket in case "else" comes next
+ if self._block_end:
+ self._out.write('\t' * (self._ind + 1) + '}\n')
+ self._block_end = True
+ else:
+ self.p('}' + line)
def write_doc_line(self, doc, indent=True):
words = doc.split()
@@ -1068,7 +1214,40 @@ op_mode_to_wrapper = {
}
_C_KW = {
- 'do'
+ 'auto',
+ 'bool',
+ 'break',
+ 'case',
+ 'char',
+ 'const',
+ 'continue',
+ 'default',
+ 'do',
+ 'double',
+ 'else',
+ 'enum',
+ 'extern',
+ 'float',
+ 'for',
+ 'goto',
+ 'if',
+ 'inline',
+ 'int',
+ 'long',
+ 'register',
+ 'return',
+ 'short',
+ 'signed',
+ 'sizeof',
+ 'static',
+ 'struct',
+ 'switch',
+ 'typedef',
+ 'union',
+ 'unsigned',
+ 'void',
+ 'volatile',
+ 'while'
}
@@ -1131,10 +1310,6 @@ def print_dump_prototype(ri):
print_prototype(ri, "request")
-def put_typol_fwd(cw, struct):
- cw.p(f'extern struct ynl_policy_nest {struct.render_name}_nest;')
-
-
def put_typol(cw, struct):
type_max = struct.attr_set.max_name
cw.block_start(line=f'struct ynl_policy_attr {struct.render_name}_policy[{type_max} + 1] =')
@@ -1152,6 +1327,58 @@ def put_typol(cw, struct):
cw.nl()
+def _put_enum_to_str_helper(cw, render_name, map_name, arg_name, enum=None):
+ args = [f'int {arg_name}']
+ if enum and not ('enum-name' in enum and not enum['enum-name']):
+ args = [f'enum {render_name} {arg_name}']
+ cw.write_func_prot('const char *', f'{render_name}_str', args)
+ cw.block_start()
+ if enum and enum.type == 'flags':
+ cw.p(f'{arg_name} = ffs({arg_name}) - 1;')
+ cw.p(f'if ({arg_name} < 0 || {arg_name} >= (int)MNL_ARRAY_SIZE({map_name}))')
+ cw.p('return NULL;')
+ cw.p(f'return {map_name}[{arg_name}];')
+ cw.block_end()
+ cw.nl()
+
+
+def put_op_name_fwd(family, cw):
+ cw.write_func_prot('const char *', f'{family.name}_op_str', ['int op'], suffix=';')
+
+
+def put_op_name(family, cw):
+ map_name = f'{family.name}_op_strmap'
+ cw.block_start(line=f"static const char * const {map_name}[] =")
+ for op_name, op in family.msgs.items():
+ if op.rsp_value:
+ if op.req_value == op.rsp_value:
+ cw.p(f'[{op.enum_name}] = "{op_name}",')
+ else:
+ cw.p(f'[{op.rsp_value}] = "{op_name}",')
+ cw.block_end(line=';')
+ cw.nl()
+
+ _put_enum_to_str_helper(cw, family.name + '_op', map_name, 'op')
+
+
+def put_enum_to_str_fwd(family, cw, enum):
+ args = [f'enum {enum.render_name} value']
+ if 'enum-name' in enum and not enum['enum-name']:
+ args = ['int value']
+ cw.write_func_prot('const char *', f'{enum.render_name}_str', args, suffix=';')
+
+
+def put_enum_to_str(family, cw, enum):
+ map_name = f'{enum.render_name}_strmap'
+ cw.block_start(line=f"static const char * const {map_name}[] =")
+ for entry in enum.entries.values():
+ cw.p(f'[{entry.value}] = "{entry.name}",')
+ cw.block_end(line=';')
+ cw.nl()
+
+ _put_enum_to_str_helper(cw, enum.render_name, map_name, 'value', enum=enum)
+
+
def put_req_nested(ri, struct):
func_args = ['struct nlmsghdr *nlh',
'unsigned int attr_type',
@@ -1196,6 +1423,11 @@ def _multi_parse(ri, struct, init_lines, local_vars):
local_vars.append('struct ynl_parse_arg parg;')
init_lines.append('parg.ys = yarg->ys;')
+ all_multi = array_nests | multi_attrs
+
+ for anest in sorted(all_multi):
+ local_vars.append(f"unsigned int n_{struct[anest].c_name} = 0;")
+
ri.cw.block_start()
ri.cw.write_func_lvar(local_vars)
@@ -1206,13 +1438,21 @@ def _multi_parse(ri, struct, init_lines, local_vars):
for arg in struct.inherited:
ri.cw.p(f'dst->{arg} = {arg};')
+ for anest in sorted(all_multi):
+ aspec = struct[anest]
+ ri.cw.p(f"if (dst->{aspec.c_name})")
+ ri.cw.p(f'return ynl_error_parse(yarg, "attribute already present ({struct.attr_set.name}.{aspec.name})");')
+
ri.cw.nl()
ri.cw.block_start(line=iter_line)
+ ri.cw.p('unsigned int type = mnl_attr_get_type(attr);')
+ ri.cw.nl()
first = True
for _, arg in struct.member_list():
- arg.attr_get(ri, 'dst', first=first)
- first = False
+ good = arg.attr_get(ri, 'dst', first=first)
+ # First may be 'unused' or 'pad', ignore those
+ first &= not good
ri.cw.block_end()
ri.cw.nl()
@@ -1220,8 +1460,9 @@ def _multi_parse(ri, struct, init_lines, local_vars):
for anest in sorted(array_nests):
aspec = struct[anest]
- ri.cw.block_start(line=f"if (dst->n_{aspec.c_name})")
- ri.cw.p(f"dst->{aspec.c_name} = calloc(dst->n_{aspec.c_name}, sizeof(*dst->{aspec.c_name}));")
+ ri.cw.block_start(line=f"if (n_{aspec.c_name})")
+ ri.cw.p(f"dst->{aspec.c_name} = calloc({aspec.c_name}, sizeof(*dst->{aspec.c_name}));")
+ ri.cw.p(f"dst->n_{aspec.c_name} = n_{aspec.c_name};")
ri.cw.p('i = 0;')
ri.cw.p(f"parg.rsp_policy = &{aspec.nested_render_name}_nest;")
ri.cw.block_start(line=f"mnl_attr_for_each_nested(attr, attr_{aspec.c_name})")
@@ -1235,8 +1476,9 @@ def _multi_parse(ri, struct, init_lines, local_vars):
for anest in sorted(multi_attrs):
aspec = struct[anest]
- ri.cw.block_start(line=f"if (dst->n_{aspec.c_name})")
- ri.cw.p(f"dst->{aspec.c_name} = calloc(dst->n_{aspec.c_name}, sizeof(*dst->{aspec.c_name}));")
+ ri.cw.block_start(line=f"if (n_{aspec.c_name})")
+ ri.cw.p(f"dst->{aspec.c_name} = calloc(n_{aspec.c_name}, sizeof(*dst->{aspec.c_name}));")
+ ri.cw.p(f"dst->n_{aspec.c_name} = n_{aspec.c_name};")
ri.cw.p('i = 0;')
if 'nested-attributes' in aspec:
ri.cw.p(f"parg.rsp_policy = &{aspec.nested_render_name}_nest;")
@@ -1304,13 +1546,13 @@ def print_req(ri):
ret_err = '-1'
direction = "request"
local_vars = ['struct nlmsghdr *nlh;',
- 'int len, err;']
+ 'int err;']
if 'reply' in ri.op[ri.op_mode]:
ret_ok = 'rsp'
ret_err = 'NULL'
local_vars += [f'{type_name(ri, rdir(direction))} *rsp;',
- 'struct ynl_parse_arg yarg = { .ys = ys, };']
+ 'struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };']
print_prototype(ri, direction, terminate=False)
ri.cw.block_start()
@@ -1320,41 +1562,39 @@ def print_req(ri):
ri.cw.p(f"ys->req_policy = &{ri.struct['request'].render_name}_nest;")
if 'reply' in ri.op[ri.op_mode]:
- ri.cw.p(f"yarg.rsp_policy = &{ri.struct['reply'].render_name}_nest;")
+ ri.cw.p(f"yrs.yarg.rsp_policy = &{ri.struct['reply'].render_name}_nest;")
ri.cw.nl()
for _, attr in ri.struct["request"].member_list():
attr.attr_put(ri, "req")
ri.cw.nl()
- ri.cw.p('err = mnl_socket_sendto(ys->sock, nlh, nlh->nlmsg_len);')
- ri.cw.p('if (err < 0)')
- ri.cw.p(f"return {ret_err};")
- ri.cw.nl()
- ri.cw.p('len = mnl_socket_recvfrom(ys->sock, ys->rx_buf, MNL_SOCKET_BUFFER_SIZE);')
- ri.cw.p('if (len < 0)')
- ri.cw.p(f"return {ret_err};")
- ri.cw.nl()
-
+ parse_arg = "NULL"
if 'reply' in ri.op[ri.op_mode]:
ri.cw.p('rsp = calloc(1, sizeof(*rsp));')
- ri.cw.p('yarg.data = rsp;')
+ ri.cw.p('yrs.yarg.data = rsp;')
+ ri.cw.p(f"yrs.cb = {op_prefix(ri, 'reply')}_parse;")
+ if ri.op.value is not None:
+ ri.cw.p(f'yrs.rsp_cmd = {ri.op.enum_name};')
+ else:
+ ri.cw.p(f'yrs.rsp_cmd = {ri.op.rsp_value};')
ri.cw.nl()
- ri.cw.p(f"err = {ri.nl.parse_cb_run(op_prefix(ri, 'reply') + '_parse', '&yarg', False)};")
- ri.cw.p('if (err < 0)')
+ parse_arg = '&yrs'
+ ri.cw.p(f"err = ynl_exec(ys, nlh, {parse_arg});")
+ ri.cw.p('if (err < 0)')
+ if 'reply' in ri.op[ri.op_mode]:
ri.cw.p('goto err_free;')
- ri.cw.nl()
-
- ri.cw.p('err = ynl_recv_ack(ys, err);')
- ri.cw.p('if (err)')
- ri.cw.p('goto err_free;')
+ else:
+ ri.cw.p('return -1;')
ri.cw.nl()
+
ri.cw.p(f"return {ret_ok};")
ri.cw.nl()
- ri.cw.p('err_free:')
if 'reply' in ri.op[ri.op_mode]:
+ ri.cw.p('err_free:')
ri.cw.p(f"{call_free(ri, rdir(direction), 'rsp')}")
- ri.cw.p(f"return {ret_err};")
+ ri.cw.p(f"return {ret_err};")
+
ri.cw.block_end()
@@ -1364,7 +1604,7 @@ def print_dump(ri):
ri.cw.block_start()
local_vars = ['struct ynl_dump_state yds = {};',
'struct nlmsghdr *nlh;',
- 'int len, err;']
+ 'int err;']
for var in local_vars:
ri.cw.p(f'{var}')
@@ -1373,6 +1613,10 @@ def print_dump(ri):
ri.cw.p('yds.ys = ys;')
ri.cw.p(f"yds.alloc_sz = sizeof({type_name(ri, rdir(direction))});")
ri.cw.p(f"yds.cb = {op_prefix(ri, 'reply', deref=True)}_parse;")
+ if ri.op.value is not None:
+ ri.cw.p(f'yds.rsp_cmd = {ri.op.enum_name};')
+ else:
+ ri.cw.p(f'yds.rsp_cmd = {ri.op.rsp_value};')
ri.cw.p(f"yds.rsp_policy = &{ri.struct['reply'].render_name}_nest;")
ri.cw.nl()
ri.cw.p(f"nlh = ynl_gemsg_start_dump(ys, {ri.nl.get_family_id()}, {ri.op.enum_name}, 1);")
@@ -1384,20 +1628,9 @@ def print_dump(ri):
attr.attr_put(ri, "req")
ri.cw.nl()
- ri.cw.p('err = mnl_socket_sendto(ys->sock, nlh, nlh->nlmsg_len);')
- ri.cw.p('if (err < 0)')
- ri.cw.p('return NULL;')
- ri.cw.nl()
-
- ri.cw.block_start(line='do')
- ri.cw.p('len = mnl_socket_recvfrom(ys->sock, ys->rx_buf, MNL_SOCKET_BUFFER_SIZE);')
- ri.cw.p('if (len < 0)')
- ri.cw.p('goto free_list;')
- ri.cw.nl()
- ri.cw.p(f"err = {ri.nl.parse_cb_run('ynl_dump_trampoline', '&yds', False, indent=2)};")
+ ri.cw.p('err = ynl_exec_dump(ys, nlh, &yds);')
ri.cw.p('if (err < 0)')
ri.cw.p('goto free_list;')
- ri.cw.block_end(line='while (err > 0);')
ri.cw.nl()
ri.cw.p('return yds.first;')
@@ -1418,14 +1651,27 @@ def free_arg_name(direction):
return 'obj'
+def print_alloc_wrapper(ri, direction):
+ name = op_prefix(ri, direction)
+ ri.cw.write_func_prot(f'static inline struct {name} *', f"{name}_alloc", [f"void"])
+ ri.cw.block_start()
+ ri.cw.p(f'return calloc(1, sizeof(struct {name}));')
+ ri.cw.block_end()
+
+
def print_free_prototype(ri, direction, suffix=';'):
name = op_prefix(ri, direction)
+ struct_name = name
+ if ri.type_name_conflict:
+ struct_name += '_'
arg = free_arg_name(direction)
- ri.cw.write_func_prot('void', f"{name}_free", [f"struct {name} *{arg}"], suffix=suffix)
+ ri.cw.write_func_prot('void', f"{name}_free", [f"struct {struct_name} *{arg}"], suffix=suffix)
def _print_type(ri, direction, struct):
suffix = f'_{ri.type_name}{direction_to_suffix[direction]}'
+ if not direction and ri.type_name_conflict:
+ suffix += '_'
if ri.op_mode == 'dump':
suffix += '_dump'
@@ -1465,6 +1711,7 @@ def print_type_full(ri, struct):
def print_type_helpers(ri, direction, deref=False):
print_free_prototype(ri, direction)
+ ri.cw.nl()
if ri.ku_space == 'user' and direction == 'request':
for _, attr in ri.struct[direction].member_list():
@@ -1473,6 +1720,7 @@ def print_type_helpers(ri, direction, deref=False):
def print_req_type_helpers(ri):
+ print_alloc_wrapper(ri, "request")
print_type_helpers(ri, "request")
@@ -1496,6 +1744,12 @@ def print_req_type(ri):
print_type(ri, "request")
+def print_req_free(ri):
+ if 'request' not in ri.op[ri.op_mode]:
+ return
+ _free_type(ri, 'request', ri.struct['request'])
+
+
def print_rsp_type(ri):
if (ri.op_mode == 'do' or ri.op_mode == 'dump') and 'reply' in ri.op[ri.op_mode]:
direction = 'reply'
@@ -1513,6 +1767,7 @@ def print_wrapped_type(ri):
elif ri.op_mode == 'notify' or ri.op_mode == 'event':
ri.cw.p('__u16 family;')
ri.cw.p('__u8 cmd;')
+ ri.cw.p('struct ynl_ntf_base_type *next;')
ri.cw.p(f"void (*free)({type_name(ri, 'reply')} *ntf);")
ri.cw.p(f"{type_name(ri, 'reply', deref=True)} obj __attribute__ ((aligned (8)));")
ri.cw.block_end(line=';')
@@ -1564,7 +1819,7 @@ def print_dump_type_free(ri):
ri.cw.block_start()
ri.cw.p(f"{sub_type} *next = rsp;")
ri.cw.nl()
- ri.cw.block_start(line='while (next)')
+ ri.cw.block_start(line='while ((void *)next != YNL_LIST_END)')
_free_type_members_iter(ri, ri.struct['reply'])
ri.cw.p('rsp = next;')
ri.cw.p('next = rsp->next;')
@@ -1587,70 +1842,6 @@ def print_ntf_type_free(ri):
ri.cw.nl()
-def print_ntf_parse_prototype(family, cw, suffix=';'):
- cw.write_func_prot('struct ynl_ntf_base_type *', f"{family['name']}_ntf_parse",
- ['struct ynl_sock *ys'], suffix=suffix)
-
-
-def print_ntf_type_parse(family, cw, ku_mode):
- print_ntf_parse_prototype(family, cw, suffix='')
- cw.block_start()
- cw.write_func_lvar(['struct genlmsghdr *genlh;',
- 'struct nlmsghdr *nlh;',
- 'struct ynl_parse_arg yarg = { .ys = ys, };',
- 'struct ynl_ntf_base_type *rsp;',
- 'int len, err;',
- 'mnl_cb_t parse;'])
- cw.p('len = mnl_socket_recvfrom(ys->sock, ys->rx_buf, MNL_SOCKET_BUFFER_SIZE);')
- cw.p('if (len < (ssize_t)(sizeof(*nlh) + sizeof(*genlh)))')
- cw.p('return NULL;')
- cw.nl()
- cw.p('nlh = (struct nlmsghdr *)ys->rx_buf;')
- cw.p('genlh = mnl_nlmsg_get_payload(nlh);')
- cw.nl()
- cw.block_start(line='switch (genlh->cmd)')
- for ntf_op in sorted(family.all_notify.keys()):
- op = family.ops[ntf_op]
- ri = RenderInfo(cw, family, ku_mode, op, ntf_op, "notify")
- for ntf in op['notify']['cmds']:
- cw.p(f"case {ntf.enum_name}:")
- cw.p(f"rsp = calloc(1, sizeof({type_name(ri, 'notify')}));")
- cw.p(f"parse = {op_prefix(ri, 'reply', deref=True)}_parse;")
- cw.p(f"yarg.rsp_policy = &{ri.struct['reply'].render_name}_nest;")
- cw.p(f"rsp->free = (void *){op_prefix(ri, 'notify')}_free;")
- cw.p('break;')
- for op_name, op in family.ops.items():
- if 'event' not in op:
- continue
- ri = RenderInfo(cw, family, ku_mode, op, op_name, "event")
- cw.p(f"case {op.enum_name}:")
- cw.p(f"rsp = calloc(1, sizeof({type_name(ri, 'event')}));")
- cw.p(f"parse = {op_prefix(ri, 'reply', deref=True)}_parse;")
- cw.p(f"yarg.rsp_policy = &{ri.struct['reply'].render_name}_nest;")
- cw.p(f"rsp->free = (void *){op_prefix(ri, 'notify')}_free;")
- cw.p('break;')
- cw.p('default:')
- cw.p('ynl_error_unknown_notification(ys, genlh->cmd);')
- cw.p('return NULL;')
- cw.block_end()
- cw.nl()
- cw.p('yarg.data = rsp->data;')
- cw.nl()
- cw.p(f"err = {cw.nlib.parse_cb_run('parse', '&yarg', True)};")
- cw.p('if (err < 0)')
- cw.p('goto err_free;')
- cw.nl()
- cw.p('rsp->family = nlh->nlmsg_type;')
- cw.p('rsp->cmd = genlh->cmd;')
- cw.p('return rsp;')
- cw.nl()
- cw.p('err_free:')
- cw.p('free(rsp);')
- cw.p('return NULL;')
- cw.block_end()
- cw.nl()
-
-
def print_req_policy_fwd(cw, struct, ri=None, terminate=True):
if terminate and ri and kernel_can_gen_family_struct(struct.family):
return
@@ -2035,6 +2226,48 @@ def render_uapi(family, cw):
cw.p(f'#endif /* {hdr_prot} */')
+def _render_user_ntf_entry(ri, op):
+ ri.cw.block_start(line=f"[{op.enum_name}] = ")
+ ri.cw.p(f".alloc_sz\t= sizeof({type_name(ri, 'event')}),")
+ ri.cw.p(f".cb\t\t= {op_prefix(ri, 'reply', deref=True)}_parse,")
+ ri.cw.p(f".policy\t\t= &{ri.struct['reply'].render_name}_nest,")
+ ri.cw.p(f".free\t\t= (void *){op_prefix(ri, 'notify')}_free,")
+ ri.cw.block_end(line=',')
+
+
+def render_user_family(family, cw, prototype):
+ symbol = f'const struct ynl_family ynl_{family.c_name}_family'
+ if prototype:
+ cw.p(f'extern {symbol};')
+ return
+
+ if family.ntfs:
+ cw.block_start(line=f"static const struct ynl_ntf_info {family['name']}_ntf_info[] = ")
+ for ntf_op_name, ntf_op in family.ntfs.items():
+ if 'notify' in ntf_op:
+ op = family.ops[ntf_op['notify']]
+ ri = RenderInfo(cw, family, "user", op, "notify")
+ elif 'event' in ntf_op:
+ ri = RenderInfo(cw, family, "user", ntf_op, "event")
+ else:
+ raise Exception('Invalid notification ' + ntf_op_name)
+ _render_user_ntf_entry(ri, ntf_op)
+ for op_name, op in family.ops.items():
+ if 'event' not in op:
+ continue
+ ri = RenderInfo(cw, family, "user", op, "event")
+ _render_user_ntf_entry(ri, op)
+ cw.block_end(line=";")
+ cw.nl()
+
+ cw.block_start(f'{symbol} = ')
+ cw.p(f'.name\t\t= "{family.name}",')
+ if family.ntfs:
+ cw.p(f".ntf_info\t= {family['name']}_ntf_info,")
+ cw.p(f".ntf_info_size\t= MNL_ARRAY_SIZE({family['name']}_ntf_info),")
+ cw.block_end(line=';')
+
+
def find_kernel_root(full_path):
sub_path = ''
while True:
@@ -2052,6 +2285,7 @@ def main():
parser.add_argument('--header', dest='header', action='store_true', default=None)
parser.add_argument('--source', dest='header', action='store_false')
parser.add_argument('--user-header', nargs='+', default=[])
+ parser.add_argument('--exclude-op', action='append', default=[])
parser.add_argument('-o', dest='out_file', type=str)
args = parser.parse_args()
@@ -2060,8 +2294,10 @@ def main():
if args.header is None:
parser.error("--header or --source is required")
+ exclude_ops = [re.compile(expr) for expr in args.exclude_op]
+
try:
- parsed = Family(args.spec)
+ parsed = Family(args.spec, exclude_ops)
if parsed.license != '((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)':
print('Spec license:', parsed.license)
print('License must be: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)')
@@ -2071,6 +2307,13 @@ def main():
os.sys.exit(1)
return
+ supported_models = ['unified']
+ if args.mode == 'user':
+ supported_models += ['directional']
+ if parsed.msg_id_model not in supported_models:
+ print(f'Message enum-model {parsed.msg_id_model} not supported for {args.mode} generation')
+ os.sys.exit(1)
+
cw = CodeWriter(BaseNlLib(), out_file)
_, spec_kernel = find_kernel_root(args.spec)
@@ -2081,6 +2324,11 @@ def main():
cw.p("/* Do not edit directly, auto-generated from: */")
cw.p(f"/*\t{spec_kernel} */")
cw.p(f"/* YNL-GEN {args.mode} {'header' if args.header else 'source'} */")
+ if args.exclude_op or args.user_header:
+ line = ''
+ line += ' --user-header '.join([''] + args.user_header)
+ line += ' --exclude-op '.join([''] + args.exclude_op)
+ cw.p(f'/* YNL-ARG{line} */')
cw.nl()
if args.mode == 'uapi':
@@ -2101,7 +2349,16 @@ def main():
if args.out_file:
cw.p(f'#include "{os.path.basename(args.out_file[:-2])}.h"')
cw.nl()
- headers = [parsed.uapi_header]
+ headers = ['uapi/' + parsed.uapi_header]
+ else:
+ cw.p('#include <stdlib.h>')
+ cw.p('#include <string.h>')
+ if args.header:
+ cw.p('#include <linux/types.h>')
+ else:
+ cw.p(f'#include "{parsed.name}-user.h"')
+ cw.p('#include "ynl.h"')
+ headers = [parsed.uapi_header]
for definition in parsed['definitions']:
if 'header' in definition:
headers.append(definition['header'])
@@ -2111,9 +2368,6 @@ def main():
if args.mode == "user":
if not args.header:
- cw.p("#include <stdlib.h>")
- cw.p("#include <stdio.h>")
- cw.p("#include <string.h>")
cw.p("#include <libmnl/libmnl.h>")
cw.p("#include <linux/genetlink.h>")
cw.nl()
@@ -2121,6 +2375,8 @@ def main():
cw.p(f'#include "{one}"')
else:
cw.p('struct ynl_sock;')
+ cw.nl()
+ render_user_family(parsed, cw, True)
cw.nl()
if args.mode == "kernel":
@@ -2144,7 +2400,7 @@ def main():
if parsed.kernel_policy in {'per-op', 'split'}:
for op_name, op in parsed.ops.items():
if 'do' in op and 'event' not in op:
- ri = RenderInfo(cw, parsed, args.mode, op, op_name, "do")
+ ri = RenderInfo(cw, parsed, args.mode, op, "do")
print_req_policy_fwd(cw, ri.struct['request'], ri=ri)
cw.nl()
@@ -2173,7 +2429,7 @@ def main():
for op_mode in ['do', 'dump']:
if op_mode in op and 'request' in op[op_mode]:
cw.p(f"/* {op.enum_name} - {op_mode} */")
- ri = RenderInfo(cw, parsed, args.mode, op, op_name, op_mode)
+ ri = RenderInfo(cw, parsed, args.mode, op, op_mode)
print_req_policy(cw, ri.struct['request'], ri=ri)
cw.nl()
@@ -2182,11 +2438,18 @@ def main():
print_kernel_family_struct_src(parsed, cw)
if args.mode == "user":
- has_ntf = False
if args.header:
+ cw.p('/* Enums */')
+ put_op_name_fwd(parsed, cw)
+
+ for name, const in parsed.consts.items():
+ if isinstance(const, EnumSet):
+ put_enum_to_str_fwd(parsed, cw, const)
+ cw.nl()
+
cw.p('/* Common nested types */')
- for attr_set, struct in sorted(parsed.pure_nested_structs.items()):
- ri = RenderInfo(cw, parsed, args.mode, "", "", "", attr_set)
+ for attr_set, struct in parsed.pure_nested_structs.items():
+ ri = RenderInfo(cw, parsed, args.mode, "", "", attr_set)
print_type_full(ri, struct)
for op_name, op in parsed.ops.items():
@@ -2194,7 +2457,7 @@ def main():
if 'do' in op and 'event' not in op:
cw.p(f"/* {op.enum_name} - do */")
- ri = RenderInfo(cw, parsed, args.mode, op, op_name, "do")
+ ri = RenderInfo(cw, parsed, args.mode, op, "do")
print_req_type(ri)
print_req_type_helpers(ri)
cw.nl()
@@ -2206,7 +2469,7 @@ def main():
if 'dump' in op:
cw.p(f"/* {op.enum_name} - dump */")
- ri = RenderInfo(cw, parsed, args.mode, op, op_name, 'dump')
+ ri = RenderInfo(cw, parsed, args.mode, op, 'dump')
if 'request' in op['dump']:
print_req_type(ri)
print_req_type_helpers(ri)
@@ -2216,39 +2479,41 @@ def main():
print_dump_prototype(ri)
cw.nl()
- if 'notify' in op:
+ if op.has_ntf:
cw.p(f"/* {op.enum_name} - notify */")
- ri = RenderInfo(cw, parsed, args.mode, op, op_name, 'notify')
- has_ntf = True
+ ri = RenderInfo(cw, parsed, args.mode, op, 'notify')
if not ri.type_consistent:
- raise Exception('Only notifications with consistent types supported')
+ raise Exception(f'Only notifications with consistent types supported ({op.name})')
print_wrapped_type(ri)
+ for op_name, op in parsed.ntfs.items():
if 'event' in op:
- ri = RenderInfo(cw, parsed, args.mode, op, op_name, 'event')
+ ri = RenderInfo(cw, parsed, args.mode, op, 'event')
cw.p(f"/* {op.enum_name} - event */")
print_rsp_type(ri)
cw.nl()
print_wrapped_type(ri)
-
- if has_ntf:
- cw.p('/* --------------- Common notification parsing --------------- */')
- print_ntf_parse_prototype(parsed, cw)
cw.nl()
else:
- cw.p('/* Policies */')
- for name, _ in parsed.attr_sets.items():
- struct = Struct(parsed, name)
- put_typol_fwd(cw, struct)
+ cw.p('/* Enums */')
+ put_op_name(parsed, cw)
+
+ for name, const in parsed.consts.items():
+ if isinstance(const, EnumSet):
+ put_enum_to_str(parsed, cw, const)
cw.nl()
- for name, _ in parsed.attr_sets.items():
+ cw.p('/* Policies */')
+ for name in parsed.pure_nested_structs:
+ struct = Struct(parsed, name)
+ put_typol(cw, struct)
+ for name in parsed.root_sets:
struct = Struct(parsed, name)
put_typol(cw, struct)
cw.p('/* Common nested types */')
- for attr_set, struct in sorted(parsed.pure_nested_structs.items()):
- ri = RenderInfo(cw, parsed, args.mode, "", "", "", attr_set)
+ for attr_set, struct in parsed.pure_nested_structs.items():
+ ri = RenderInfo(cw, parsed, args.mode, "", "", attr_set)
free_rsp_nested(ri, struct)
if struct.request:
@@ -2260,7 +2525,8 @@ def main():
cw.p(f"/* ============== {op.enum_name} ============== */")
if 'do' in op and 'event' not in op:
cw.p(f"/* {op.enum_name} - do */")
- ri = RenderInfo(cw, parsed, args.mode, op, op_name, "do")
+ ri = RenderInfo(cw, parsed, args.mode, op, "do")
+ print_req_free(ri)
print_rsp_free(ri)
parse_rsp_msg(ri)
print_req(ri)
@@ -2268,34 +2534,31 @@ def main():
if 'dump' in op:
cw.p(f"/* {op.enum_name} - dump */")
- ri = RenderInfo(cw, parsed, args.mode, op, op_name, "dump")
+ ri = RenderInfo(cw, parsed, args.mode, op, "dump")
if not ri.type_consistent:
parse_rsp_msg(ri, deref=True)
print_dump_type_free(ri)
print_dump(ri)
cw.nl()
- if 'notify' in op:
+ if op.has_ntf:
cw.p(f"/* {op.enum_name} - notify */")
- ri = RenderInfo(cw, parsed, args.mode, op, op_name, 'notify')
- has_ntf = True
+ ri = RenderInfo(cw, parsed, args.mode, op, 'notify')
if not ri.type_consistent:
- raise Exception('Only notifications with consistent types supported')
+ raise Exception(f'Only notifications with consistent types supported ({op.name})')
print_ntf_type_free(ri)
+ for op_name, op in parsed.ntfs.items():
if 'event' in op:
cw.p(f"/* {op.enum_name} - event */")
- has_ntf = True
- ri = RenderInfo(cw, parsed, args.mode, op, op_name, "do")
+ ri = RenderInfo(cw, parsed, args.mode, op, "do")
parse_rsp_msg(ri)
- ri = RenderInfo(cw, parsed, args.mode, op, op_name, "event")
+ ri = RenderInfo(cw, parsed, args.mode, op, "event")
print_ntf_type_free(ri)
-
- if has_ntf:
- cw.p('/* --------------- Common notification parsing --------------- */')
- print_ntf_type_parse(parsed, cw, args.mode)
+ cw.nl()
+ render_user_family(parsed, cw, False)
if args.header:
cw.p(f'#endif /* {hdr_prot} */')
diff --git a/tools/net/ynl/ynl-regen.sh b/tools/net/ynl/ynl-regen.sh
index 74f5de1c2399..8d4ca6a50582 100755
--- a/tools/net/ynl/ynl-regen.sh
+++ b/tools/net/ynl/ynl-regen.sh
@@ -14,11 +14,12 @@ done
KDIR=$(dirname $(dirname $(dirname $(dirname $(realpath $0)))))
-files=$(git grep --files-with-matches '^/\* YNL-GEN \(kernel\|uapi\)')
+files=$(git grep --files-with-matches '^/\* YNL-GEN \(kernel\|uapi\|user\)')
for f in $files; do
# params: 0 1 2 3
# $YAML YNL-GEN kernel $mode
params=( $(git grep -B1 -h '/\* YNL-GEN' $f | sed 's@/\*\(.*\)\*/@\1@') )
+ args=$(sed -n 's@/\* YNL-ARG \(.*\) \*/@\1@p' $f)
if [ $f -nt ${params[0]} -a -z "$force" ]; then
echo -e "\tSKIP $f"
@@ -26,5 +27,6 @@ for f in $files; do
fi
echo -e "\tGEN ${params[2]}\t$f"
- $TOOL --mode ${params[2]} --${params[3]} --spec $KDIR/${params[0]} -o $f
+ $TOOL --mode ${params[2]} --${params[3]} --spec $KDIR/${params[0]} \
+ $args -o $f
done
diff --git a/tools/objtool/Documentation/objtool.txt b/tools/objtool/Documentation/objtool.txt
index 744db4218e7a..fe39c2a8ef0d 100644
--- a/tools/objtool/Documentation/objtool.txt
+++ b/tools/objtool/Documentation/objtool.txt
@@ -244,6 +244,11 @@ To achieve the validation, objtool enforces the following rules:
Objtool warnings
----------------
+NOTE: When requesting help with an objtool warning, please recreate with
+OBJTOOL_VERBOSE=1 (e.g., "make OBJTOOL_VERBOSE=1") and send the full
+output, including any disassembly or backtrace below the warning, to the
+objtool maintainers.
+
For asm files, if you're getting an error which doesn't make sense,
first make sure that the affected code follows the above rules.
@@ -298,6 +303,11 @@ the objtool maintainers.
If it's not actually in a callable function (e.g. kernel entry code),
change ENDPROC to END.
+3. file.o: warning: objtool: foo+0x48c: bar() is missing a __noreturn annotation
+
+ The call from foo() to bar() doesn't return, but bar() is missing the
+ __noreturn annotation. NOTE: In addition to annotating the function
+ with __noreturn, please also add it to tools/objtool/noreturns.h.
4. file.o: warning: objtool: func(): can't find starting instruction
or
diff --git a/tools/objtool/arch/powerpc/include/arch/elf.h b/tools/objtool/arch/powerpc/include/arch/elf.h
index 73f9ae172fe5..66814fa28024 100644
--- a/tools/objtool/arch/powerpc/include/arch/elf.h
+++ b/tools/objtool/arch/powerpc/include/arch/elf.h
@@ -1,10 +1,13 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
-
#ifndef _OBJTOOL_ARCH_ELF
#define _OBJTOOL_ARCH_ELF
-#define R_NONE R_PPC_NONE
-#define R_ABS64 R_PPC64_ADDR64
-#define R_ABS32 R_PPC_ADDR32
+#define R_NONE R_PPC_NONE
+#define R_ABS64 R_PPC64_ADDR64
+#define R_ABS32 R_PPC_ADDR32
+#define R_DATA32 R_PPC_REL32
+#define R_DATA64 R_PPC64_REL64
+#define R_TEXT32 R_PPC_REL32
+#define R_TEXT64 R_PPC64_REL32
#endif /* _OBJTOOL_ARCH_ELF */
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 9ef024fd648c..c0f25d00181e 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -84,7 +84,7 @@ bool arch_pc_relative_reloc(struct reloc *reloc)
* All relocation types where P (the address of the target)
* is included in the computation.
*/
- switch (reloc->type) {
+ switch (reloc_type(reloc)) {
case R_X86_64_PC8:
case R_X86_64_PC16:
case R_X86_64_PC32:
@@ -623,11 +623,11 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
if (!immr || strcmp(immr->sym->name, "pv_ops"))
break;
- idx = (immr->addend + 8) / sizeof(void *);
+ idx = (reloc_addend(immr) + 8) / sizeof(void *);
func = disp->sym;
if (disp->sym->type == STT_SECTION)
- func = find_symbol_by_offset(disp->sym->sec, disp->addend);
+ func = find_symbol_by_offset(disp->sym->sec, reloc_addend(disp));
if (!func) {
WARN("no func for pv_ops[]");
return -1;
@@ -826,3 +826,9 @@ bool arch_is_rethunk(struct symbol *sym)
{
return !strcmp(sym->name, "__x86_return_thunk");
}
+
+bool arch_is_embedded_insn(struct symbol *sym)
+{
+ return !strcmp(sym->name, "retbleed_return_thunk") ||
+ !strcmp(sym->name, "srso_safe_ret");
+}
diff --git a/tools/objtool/arch/x86/include/arch/elf.h b/tools/objtool/arch/x86/include/arch/elf.h
index ac14987cf687..7131f7f51a4e 100644
--- a/tools/objtool/arch/x86/include/arch/elf.h
+++ b/tools/objtool/arch/x86/include/arch/elf.h
@@ -1,8 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _OBJTOOL_ARCH_ELF
#define _OBJTOOL_ARCH_ELF
-#define R_NONE R_X86_64_NONE
-#define R_ABS64 R_X86_64_64
-#define R_ABS32 R_X86_64_32
+#define R_NONE R_X86_64_NONE
+#define R_ABS32 R_X86_64_32
+#define R_ABS64 R_X86_64_64
+#define R_DATA32 R_X86_64_PC32
+#define R_DATA64 R_X86_64_PC32
+#define R_TEXT32 R_X86_64_PC32
+#define R_TEXT64 R_X86_64_PC32
#endif /* _OBJTOOL_ARCH_ELF */
diff --git a/tools/objtool/arch/x86/special.c b/tools/objtool/arch/x86/special.c
index 7c97b7391279..29e949579ede 100644
--- a/tools/objtool/arch/x86/special.c
+++ b/tools/objtool/arch/x86/special.c
@@ -42,13 +42,7 @@ bool arch_support_alt_relocation(struct special_alt *special_alt,
struct instruction *insn,
struct reloc *reloc)
{
- /*
- * The x86 alternatives code adjusts the offsets only when it
- * encounters a branch instruction at the very beginning of the
- * replacement group.
- */
- return insn->offset == special_alt->new_off &&
- (insn->type == INSN_CALL || is_jump(insn));
+ return true;
}
/*
@@ -105,10 +99,10 @@ struct reloc *arch_find_switch_table(struct objtool_file *file,
!text_reloc->sym->sec->rodata)
return NULL;
- table_offset = text_reloc->addend;
+ table_offset = reloc_addend(text_reloc);
table_sec = text_reloc->sym->sec;
- if (text_reloc->type == R_X86_64_PC32)
+ if (reloc_type(text_reloc) == R_X86_64_PC32)
table_offset += 4;
/*
@@ -138,7 +132,7 @@ struct reloc *arch_find_switch_table(struct objtool_file *file,
* indicates a rare GCC quirk/bug which can leave dead
* code behind.
*/
- if (text_reloc->type == R_X86_64_PC32)
+ if (reloc_type(text_reloc) == R_X86_64_PC32)
file->ignore_unreachables = true;
return rodata_reloc;
diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index 7c175198d09f..5e21cfb7661d 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -93,6 +93,7 @@ static const struct option check_options[] = {
OPT_BOOLEAN(0, "no-unreachable", &opts.no_unreachable, "skip 'unreachable instruction' warnings"),
OPT_BOOLEAN(0, "sec-address", &opts.sec_address, "print section addresses in warnings"),
OPT_BOOLEAN(0, "stats", &opts.stats, "print statistics"),
+ OPT_BOOLEAN('v', "verbose", &opts.verbose, "verbose warnings"),
OPT_END(),
};
@@ -118,6 +119,10 @@ int cmd_parse_options(int argc, const char **argv, const char * const usage[])
parse_options(envc, envv, check_options, env_usage, 0);
}
+ env = getenv("OBJTOOL_VERBOSE");
+ if (env && !strcmp(env, "1"))
+ opts.verbose = true;
+
argc = parse_options(argc, argv, check_options, usage, 0);
if (argc != 1)
usage_with_options(usage, check_options);
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 0fcf99c91400..1384090530db 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -8,7 +8,6 @@
#include <inttypes.h>
#include <sys/mman.h>
-#include <arch/elf.h>
#include <objtool/builtin.h>
#include <objtool/cfi.h>
#include <objtool/arch.h>
@@ -33,6 +32,7 @@ static unsigned long nr_cfi, nr_cfi_reused, nr_cfi_cache;
static struct cfi_init_state initial_func_cfi;
static struct cfi_state init_cfi;
static struct cfi_state func_cfi;
+static struct cfi_state force_undefined_cfi;
struct instruction *find_insn(struct objtool_file *file,
struct section *sec, unsigned long offset)
@@ -192,51 +192,11 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func,
struct instruction *insn;
bool empty = true;
- /*
- * Unfortunately these have to be hard coded because the noreturn
- * attribute isn't provided in ELF data. Keep 'em sorted.
- */
+#define NORETURN(func) __stringify(func),
static const char * const global_noreturns[] = {
- "__invalid_creds",
- "__module_put_and_kthread_exit",
- "__reiserfs_panic",
- "__stack_chk_fail",
- "__ubsan_handle_builtin_unreachable",
- "arch_call_rest_init",
- "arch_cpu_idle_dead",
- "btrfs_assertfail",
- "cpu_bringup_and_idle",
- "cpu_startup_entry",
- "do_exit",
- "do_group_exit",
- "do_task_dead",
- "ex_handler_msr_mce",
- "fortify_panic",
- "hlt_play_dead",
- "hv_ghcb_terminate",
- "kthread_complete_and_exit",
- "kthread_exit",
- "kunit_try_catch_throw",
- "lbug_with_loc",
- "machine_real_restart",
- "make_task_dead",
- "mpt_halt_firmware",
- "nmi_panic_self_stop",
- "panic",
- "panic_smp_self_stop",
- "rest_init",
- "resume_play_dead",
- "rewind_stack_and_make_dead",
- "sev_es_terminate",
- "snp_abort",
- "start_kernel",
- "stop_this_cpu",
- "usercopy_abort",
- "x86_64_start_kernel",
- "x86_64_start_reservations",
- "xen_cpu_bringup_again",
- "xen_start_kernel",
+#include "noreturns.h"
};
+#undef NORETURN
if (!func)
return false;
@@ -429,7 +389,7 @@ static int decode_instructions(struct objtool_file *file)
if (!strcmp(sec->name, ".noinstr.text") ||
!strcmp(sec->name, ".entry.text") ||
!strcmp(sec->name, ".cpuidle.text") ||
- !strncmp(sec->name, ".text.__x86.", 12))
+ !strncmp(sec->name, ".text..__x86.", 13))
sec->noinstr = true;
/*
@@ -495,7 +455,7 @@ static int decode_instructions(struct objtool_file *file)
return -1;
}
- if (func->return_thunk || func->alias != func)
+ if (func->embedded_insn || func->alias != func)
continue;
if (!find_insn(file, sec, func->offset)) {
@@ -533,7 +493,7 @@ static int add_pv_ops(struct objtool_file *file, const char *symname)
{
struct symbol *sym, *func;
unsigned long off, end;
- struct reloc *rel;
+ struct reloc *reloc;
int idx;
sym = find_symbol_by_name(file->elf, symname);
@@ -543,19 +503,20 @@ static int add_pv_ops(struct objtool_file *file, const char *symname)
off = sym->offset;
end = off + sym->len;
for (;;) {
- rel = find_reloc_by_dest_range(file->elf, sym->sec, off, end - off);
- if (!rel)
+ reloc = find_reloc_by_dest_range(file->elf, sym->sec, off, end - off);
+ if (!reloc)
break;
- func = rel->sym;
+ func = reloc->sym;
if (func->type == STT_SECTION)
- func = find_symbol_by_offset(rel->sym->sec, rel->addend);
+ func = find_symbol_by_offset(reloc->sym->sec,
+ reloc_addend(reloc));
- idx = (rel->offset - sym->offset) / sizeof(unsigned long);
+ idx = (reloc_offset(reloc) - sym->offset) / sizeof(unsigned long);
objtool_pv_add(file, idx, func);
- off = rel->offset + 1;
+ off = reloc_offset(reloc) + 1;
if (off > end)
break;
}
@@ -620,35 +581,40 @@ static struct instruction *find_last_insn(struct objtool_file *file,
*/
static int add_dead_ends(struct objtool_file *file)
{
- struct section *sec;
+ struct section *rsec;
struct reloc *reloc;
struct instruction *insn;
+ s64 addend;
/*
* Check for manually annotated dead ends.
*/
- sec = find_section_by_name(file->elf, ".rela.discard.unreachable");
- if (!sec)
+ rsec = find_section_by_name(file->elf, ".rela.discard.unreachable");
+ if (!rsec)
goto reachable;
- list_for_each_entry(reloc, &sec->reloc_list, list) {
+ for_each_reloc(rsec, reloc) {
+
if (reloc->sym->type != STT_SECTION) {
- WARN("unexpected relocation symbol type in %s", sec->name);
+ WARN("unexpected relocation symbol type in %s", rsec->name);
return -1;
}
- insn = find_insn(file, reloc->sym->sec, reloc->addend);
+
+ addend = reloc_addend(reloc);
+
+ insn = find_insn(file, reloc->sym->sec, addend);
if (insn)
insn = prev_insn_same_sec(file, insn);
- else if (reloc->addend == reloc->sym->sec->sh.sh_size) {
+ else if (addend == reloc->sym->sec->sh.sh_size) {
insn = find_last_insn(file, reloc->sym->sec);
if (!insn) {
WARN("can't find unreachable insn at %s+0x%" PRIx64,
- reloc->sym->sec->name, reloc->addend);
+ reloc->sym->sec->name, addend);
return -1;
}
} else {
WARN("can't find unreachable insn at %s+0x%" PRIx64,
- reloc->sym->sec->name, reloc->addend);
+ reloc->sym->sec->name, addend);
return -1;
}
@@ -662,28 +628,32 @@ reachable:
* GCC doesn't know the "ud2" is fatal, so it generates code as if it's
* not a dead end.
*/
- sec = find_section_by_name(file->elf, ".rela.discard.reachable");
- if (!sec)
+ rsec = find_section_by_name(file->elf, ".rela.discard.reachable");
+ if (!rsec)
return 0;
- list_for_each_entry(reloc, &sec->reloc_list, list) {
+ for_each_reloc(rsec, reloc) {
+
if (reloc->sym->type != STT_SECTION) {
- WARN("unexpected relocation symbol type in %s", sec->name);
+ WARN("unexpected relocation symbol type in %s", rsec->name);
return -1;
}
- insn = find_insn(file, reloc->sym->sec, reloc->addend);
+
+ addend = reloc_addend(reloc);
+
+ insn = find_insn(file, reloc->sym->sec, addend);
if (insn)
insn = prev_insn_same_sec(file, insn);
- else if (reloc->addend == reloc->sym->sec->sh.sh_size) {
+ else if (addend == reloc->sym->sec->sh.sh_size) {
insn = find_last_insn(file, reloc->sym->sec);
if (!insn) {
WARN("can't find reachable insn at %s+0x%" PRIx64,
- reloc->sym->sec->name, reloc->addend);
+ reloc->sym->sec->name, addend);
return -1;
}
} else {
WARN("can't find reachable insn at %s+0x%" PRIx64,
- reloc->sym->sec->name, reloc->addend);
+ reloc->sym->sec->name, addend);
return -1;
}
@@ -695,8 +665,8 @@ reachable:
static int create_static_call_sections(struct objtool_file *file)
{
- struct section *sec;
struct static_call_site *site;
+ struct section *sec;
struct instruction *insn;
struct symbol *key_sym;
char *key_name, *tmp;
@@ -716,22 +686,21 @@ static int create_static_call_sections(struct objtool_file *file)
list_for_each_entry(insn, &file->static_call_list, call_node)
idx++;
- sec = elf_create_section(file->elf, ".static_call_sites", SHF_WRITE,
- sizeof(struct static_call_site), idx);
+ sec = elf_create_section_pair(file->elf, ".static_call_sites",
+ sizeof(*site), idx, idx * 2);
if (!sec)
return -1;
+ /* Allow modules to modify the low bits of static_call_site::key */
+ sec->sh.sh_flags |= SHF_WRITE;
+
idx = 0;
list_for_each_entry(insn, &file->static_call_list, call_node) {
- site = (struct static_call_site *)sec->data->d_buf + idx;
- memset(site, 0, sizeof(struct static_call_site));
-
/* populate reloc for 'addr' */
- if (elf_add_reloc_to_insn(file->elf, sec,
- idx * sizeof(struct static_call_site),
- R_X86_64_PC32,
- insn->sec, insn->offset))
+ if (!elf_init_reloc_text_sym(file->elf, sec,
+ idx * sizeof(*site), idx * 2,
+ insn->sec, insn->offset))
return -1;
/* find key symbol */
@@ -771,10 +740,10 @@ static int create_static_call_sections(struct objtool_file *file)
free(key_name);
/* populate reloc for 'key' */
- if (elf_add_reloc(file->elf, sec,
- idx * sizeof(struct static_call_site) + 4,
- R_X86_64_PC32, key_sym,
- is_sibling_call(insn) * STATIC_CALL_SITE_TAIL))
+ if (!elf_init_reloc_data_sym(file->elf, sec,
+ idx * sizeof(*site) + 4,
+ (idx * 2) + 1, key_sym,
+ is_sibling_call(insn) * STATIC_CALL_SITE_TAIL))
return -1;
idx++;
@@ -802,26 +771,18 @@ static int create_retpoline_sites_sections(struct objtool_file *file)
if (!idx)
return 0;
- sec = elf_create_section(file->elf, ".retpoline_sites", 0,
- sizeof(int), idx);
- if (!sec) {
- WARN("elf_create_section: .retpoline_sites");
+ sec = elf_create_section_pair(file->elf, ".retpoline_sites",
+ sizeof(int), idx, idx);
+ if (!sec)
return -1;
- }
idx = 0;
list_for_each_entry(insn, &file->retpoline_call_list, call_node) {
- int *site = (int *)sec->data->d_buf + idx;
- *site = 0;
-
- if (elf_add_reloc_to_insn(file->elf, sec,
- idx * sizeof(int),
- R_X86_64_PC32,
- insn->sec, insn->offset)) {
- WARN("elf_add_reloc_to_insn: .retpoline_sites");
+ if (!elf_init_reloc_text_sym(file->elf, sec,
+ idx * sizeof(int), idx,
+ insn->sec, insn->offset))
return -1;
- }
idx++;
}
@@ -848,26 +809,18 @@ static int create_return_sites_sections(struct objtool_file *file)
if (!idx)
return 0;
- sec = elf_create_section(file->elf, ".return_sites", 0,
- sizeof(int), idx);
- if (!sec) {
- WARN("elf_create_section: .return_sites");
+ sec = elf_create_section_pair(file->elf, ".return_sites",
+ sizeof(int), idx, idx);
+ if (!sec)
return -1;
- }
idx = 0;
list_for_each_entry(insn, &file->return_thunk_list, call_node) {
- int *site = (int *)sec->data->d_buf + idx;
- *site = 0;
-
- if (elf_add_reloc_to_insn(file->elf, sec,
- idx * sizeof(int),
- R_X86_64_PC32,
- insn->sec, insn->offset)) {
- WARN("elf_add_reloc_to_insn: .return_sites");
+ if (!elf_init_reloc_text_sym(file->elf, sec,
+ idx * sizeof(int), idx,
+ insn->sec, insn->offset))
return -1;
- }
idx++;
}
@@ -900,12 +853,10 @@ static int create_ibt_endbr_seal_sections(struct objtool_file *file)
if (!idx)
return 0;
- sec = elf_create_section(file->elf, ".ibt_endbr_seal", 0,
- sizeof(int), idx);
- if (!sec) {
- WARN("elf_create_section: .ibt_endbr_seal");
+ sec = elf_create_section_pair(file->elf, ".ibt_endbr_seal",
+ sizeof(int), idx, idx);
+ if (!sec)
return -1;
- }
idx = 0;
list_for_each_entry(insn, &file->endbr_list, call_node) {
@@ -920,13 +871,10 @@ static int create_ibt_endbr_seal_sections(struct objtool_file *file)
!strcmp(sym->name, "cleanup_module")))
WARN("%s(): not an indirect call target", sym->name);
- if (elf_add_reloc_to_insn(file->elf, sec,
- idx * sizeof(int),
- R_X86_64_PC32,
- insn->sec, insn->offset)) {
- WARN("elf_add_reloc_to_insn: .ibt_endbr_seal");
+ if (!elf_init_reloc_text_sym(file->elf, sec,
+ idx * sizeof(int), idx,
+ insn->sec, insn->offset))
return -1;
- }
idx++;
}
@@ -938,7 +886,6 @@ static int create_cfi_sections(struct objtool_file *file)
{
struct section *sec;
struct symbol *sym;
- unsigned int *loc;
int idx;
sec = find_section_by_name(file->elf, ".cfi_sites");
@@ -959,7 +906,8 @@ static int create_cfi_sections(struct objtool_file *file)
idx++;
}
- sec = elf_create_section(file->elf, ".cfi_sites", 0, sizeof(unsigned int), idx);
+ sec = elf_create_section_pair(file->elf, ".cfi_sites",
+ sizeof(unsigned int), idx, idx);
if (!sec)
return -1;
@@ -971,13 +919,9 @@ static int create_cfi_sections(struct objtool_file *file)
if (strncmp(sym->name, "__cfi_", 6))
continue;
- loc = (unsigned int *)sec->data->d_buf + idx;
- memset(loc, 0, sizeof(unsigned int));
-
- if (elf_add_reloc_to_insn(file->elf, sec,
- idx * sizeof(unsigned int),
- R_X86_64_PC32,
- sym->sec, sym->offset))
+ if (!elf_init_reloc_text_sym(file->elf, sec,
+ idx * sizeof(unsigned int), idx,
+ sym->sec, sym->offset))
return -1;
idx++;
@@ -988,7 +932,7 @@ static int create_cfi_sections(struct objtool_file *file)
static int create_mcount_loc_sections(struct objtool_file *file)
{
- int addrsize = elf_class_addrsize(file->elf);
+ size_t addr_size = elf_addr_size(file->elf);
struct instruction *insn;
struct section *sec;
int idx;
@@ -1007,25 +951,26 @@ static int create_mcount_loc_sections(struct objtool_file *file)
list_for_each_entry(insn, &file->mcount_loc_list, call_node)
idx++;
- sec = elf_create_section(file->elf, "__mcount_loc", 0, addrsize, idx);
+ sec = elf_create_section_pair(file->elf, "__mcount_loc", addr_size,
+ idx, idx);
if (!sec)
return -1;
- sec->sh.sh_addralign = addrsize;
+ sec->sh.sh_addralign = addr_size;
idx = 0;
list_for_each_entry(insn, &file->mcount_loc_list, call_node) {
- void *loc;
- loc = sec->data->d_buf + idx;
- memset(loc, 0, addrsize);
+ struct reloc *reloc;
- if (elf_add_reloc_to_insn(file->elf, sec, idx,
- addrsize == sizeof(u64) ? R_ABS64 : R_ABS32,
- insn->sec, insn->offset))
+ reloc = elf_init_reloc_text_sym(file->elf, sec, idx * addr_size, idx,
+ insn->sec, insn->offset);
+ if (!reloc)
return -1;
- idx += addrsize;
+ set_reloc_type(file->elf, reloc, addr_size == 8 ? R_ABS64 : R_ABS32);
+
+ idx++;
}
return 0;
@@ -1035,7 +980,6 @@ static int create_direct_call_sections(struct objtool_file *file)
{
struct instruction *insn;
struct section *sec;
- unsigned int *loc;
int idx;
sec = find_section_by_name(file->elf, ".call_sites");
@@ -1052,20 +996,17 @@ static int create_direct_call_sections(struct objtool_file *file)
list_for_each_entry(insn, &file->call_list, call_node)
idx++;
- sec = elf_create_section(file->elf, ".call_sites", 0, sizeof(unsigned int), idx);
+ sec = elf_create_section_pair(file->elf, ".call_sites",
+ sizeof(unsigned int), idx, idx);
if (!sec)
return -1;
idx = 0;
list_for_each_entry(insn, &file->call_list, call_node) {
- loc = (unsigned int *)sec->data->d_buf + idx;
- memset(loc, 0, sizeof(unsigned int));
-
- if (elf_add_reloc_to_insn(file->elf, sec,
- idx * sizeof(unsigned int),
- R_X86_64_PC32,
- insn->sec, insn->offset))
+ if (!elf_init_reloc_text_sym(file->elf, sec,
+ idx * sizeof(unsigned int), idx,
+ insn->sec, insn->offset))
return -1;
idx++;
@@ -1080,28 +1021,29 @@ static int create_direct_call_sections(struct objtool_file *file)
static void add_ignores(struct objtool_file *file)
{
struct instruction *insn;
- struct section *sec;
+ struct section *rsec;
struct symbol *func;
struct reloc *reloc;
- sec = find_section_by_name(file->elf, ".rela.discard.func_stack_frame_non_standard");
- if (!sec)
+ rsec = find_section_by_name(file->elf, ".rela.discard.func_stack_frame_non_standard");
+ if (!rsec)
return;
- list_for_each_entry(reloc, &sec->reloc_list, list) {
+ for_each_reloc(rsec, reloc) {
switch (reloc->sym->type) {
case STT_FUNC:
func = reloc->sym;
break;
case STT_SECTION:
- func = find_func_by_offset(reloc->sym->sec, reloc->addend);
+ func = find_func_by_offset(reloc->sym->sec, reloc_addend(reloc));
if (!func)
continue;
break;
default:
- WARN("unexpected relocation symbol type in %s: %d", sec->name, reloc->sym->type);
+ WARN("unexpected relocation symbol type in %s: %d",
+ rsec->name, reloc->sym->type);
continue;
}
@@ -1320,21 +1262,21 @@ static void add_uaccess_safe(struct objtool_file *file)
*/
static int add_ignore_alternatives(struct objtool_file *file)
{
- struct section *sec;
+ struct section *rsec;
struct reloc *reloc;
struct instruction *insn;
- sec = find_section_by_name(file->elf, ".rela.discard.ignore_alts");
- if (!sec)
+ rsec = find_section_by_name(file->elf, ".rela.discard.ignore_alts");
+ if (!rsec)
return 0;
- list_for_each_entry(reloc, &sec->reloc_list, list) {
+ for_each_reloc(rsec, reloc) {
if (reloc->sym->type != STT_SECTION) {
- WARN("unexpected relocation symbol type in %s", sec->name);
+ WARN("unexpected relocation symbol type in %s", rsec->name);
return -1;
}
- insn = find_insn(file, reloc->sym->sec, reloc->addend);
+ insn = find_insn(file, reloc->sym->sec, reloc_addend(reloc));
if (!insn) {
WARN("bad .discard.ignore_alts entry");
return -1;
@@ -1346,16 +1288,33 @@ static int add_ignore_alternatives(struct objtool_file *file)
return 0;
}
+/*
+ * Symbols that replace INSN_CALL_DYNAMIC, every (tail) call to such a symbol
+ * will be added to the .retpoline_sites section.
+ */
__weak bool arch_is_retpoline(struct symbol *sym)
{
return false;
}
+/*
+ * Symbols that replace INSN_RETURN, every (tail) call to such a symbol
+ * will be added to the .return_sites section.
+ */
__weak bool arch_is_rethunk(struct symbol *sym)
{
return false;
}
+/*
+ * Symbols that are embedded inside other instructions, because sometimes crazy
+ * code exists. These are mostly ignored for validation purposes.
+ */
+__weak bool arch_is_embedded_insn(struct symbol *sym)
+{
+ return false;
+}
+
static struct reloc *insn_reloc(struct objtool_file *file, struct instruction *insn)
{
struct reloc *reloc;
@@ -1421,10 +1380,8 @@ static void annotate_call_site(struct objtool_file *file,
* noinstr text.
*/
if (opts.hack_noinstr && insn->sec->noinstr && sym->profiling_func) {
- if (reloc) {
- reloc->type = R_NONE;
- elf_write_reloc(file->elf, reloc);
- }
+ if (reloc)
+ set_reloc_type(file->elf, reloc, R_NONE);
elf_write_insn(file->elf, insn->sec,
insn->offset, insn->len,
@@ -1450,10 +1407,8 @@ static void annotate_call_site(struct objtool_file *file,
if (sibling)
WARN_INSN(insn, "tail call to __fentry__ !?!?");
if (opts.mnop) {
- if (reloc) {
- reloc->type = R_NONE;
- elf_write_reloc(file->elf, reloc);
- }
+ if (reloc)
+ set_reloc_type(file->elf, reloc, R_NONE);
elf_write_insn(file->elf, insn->sec,
insn->offset, insn->len,
@@ -1610,7 +1565,7 @@ static int add_jump_destinations(struct objtool_file *file)
dest_off = arch_jump_destination(insn);
} else if (reloc->sym->type == STT_SECTION) {
dest_sec = reloc->sym->sec;
- dest_off = arch_dest_reloc_offset(reloc->addend);
+ dest_off = arch_dest_reloc_offset(reloc_addend(reloc));
} else if (reloc->sym->retpoline_thunk) {
add_retpoline_call(file, insn);
continue;
@@ -1627,7 +1582,7 @@ static int add_jump_destinations(struct objtool_file *file)
} else if (reloc->sym->sec->idx) {
dest_sec = reloc->sym->sec;
dest_off = reloc->sym->sym.st_value +
- arch_dest_reloc_offset(reloc->addend);
+ arch_dest_reloc_offset(reloc_addend(reloc));
} else {
/* non-func asm code jumping to another file */
continue;
@@ -1638,14 +1593,14 @@ static int add_jump_destinations(struct objtool_file *file)
struct symbol *sym = find_symbol_by_offset(dest_sec, dest_off);
/*
- * This is a special case for zen_untrain_ret().
+ * This is a special case for retbleed_untrain_ret().
* It jumps to __x86_return_thunk(), but objtool
* can't find the thunk's starting RET
* instruction, because the RET is also in the
* middle of another instruction. Objtool only
* knows about the outer instruction.
*/
- if (sym && sym->return_thunk) {
+ if (sym && sym->embedded_insn) {
add_return_call(file, insn, false);
continue;
}
@@ -1744,7 +1699,7 @@ static int add_call_destinations(struct objtool_file *file)
}
} else if (reloc->sym->type == STT_SECTION) {
- dest_off = arch_dest_reloc_offset(reloc->addend);
+ dest_off = arch_dest_reloc_offset(reloc_addend(reloc));
dest = find_call_destination(reloc->sym->sec, dest_off);
if (!dest) {
WARN_INSN(insn, "can't find call dest symbol at %s+0x%lx",
@@ -1932,10 +1887,8 @@ static int handle_jump_alt(struct objtool_file *file,
if (opts.hack_jump_label && special_alt->key_addend & 2) {
struct reloc *reloc = insn_reloc(file, orig_insn);
- if (reloc) {
- reloc->type = R_NONE;
- elf_write_reloc(file->elf, reloc);
- }
+ if (reloc)
+ set_reloc_type(file->elf, reloc, R_NONE);
elf_write_insn(file->elf, orig_insn->sec,
orig_insn->offset, orig_insn->len,
arch_nop_insn(orig_insn->len));
@@ -2047,34 +2000,35 @@ out:
}
static int add_jump_table(struct objtool_file *file, struct instruction *insn,
- struct reloc *table)
+ struct reloc *next_table)
{
- struct reloc *reloc = table;
- struct instruction *dest_insn;
- struct alternative *alt;
struct symbol *pfunc = insn_func(insn)->pfunc;
+ struct reloc *table = insn_jump_table(insn);
+ struct instruction *dest_insn;
unsigned int prev_offset = 0;
+ struct reloc *reloc = table;
+ struct alternative *alt;
/*
* Each @reloc is a switch table relocation which points to the target
* instruction.
*/
- list_for_each_entry_from(reloc, &table->sec->reloc_list, list) {
+ for_each_reloc_from(table->sec, reloc) {
/* Check for the end of the table: */
- if (reloc != table && reloc->jump_table_start)
+ if (reloc != table && reloc == next_table)
break;
/* Make sure the table entries are consecutive: */
- if (prev_offset && reloc->offset != prev_offset + 8)
+ if (prev_offset && reloc_offset(reloc) != prev_offset + 8)
break;
/* Detect function pointers from contiguous objects: */
if (reloc->sym->sec == pfunc->sec &&
- reloc->addend == pfunc->offset)
+ reloc_addend(reloc) == pfunc->offset)
break;
- dest_insn = find_insn(file, reloc->sym->sec, reloc->addend);
+ dest_insn = find_insn(file, reloc->sym->sec, reloc_addend(reloc));
if (!dest_insn)
break;
@@ -2091,7 +2045,7 @@ static int add_jump_table(struct objtool_file *file, struct instruction *insn,
alt->insn = dest_insn;
alt->next = insn->alts;
insn->alts = alt;
- prev_offset = reloc->offset;
+ prev_offset = reloc_offset(reloc);
}
if (!prev_offset) {
@@ -2135,7 +2089,7 @@ static struct reloc *find_jump_table(struct objtool_file *file,
table_reloc = arch_find_switch_table(file, insn);
if (!table_reloc)
continue;
- dest_insn = find_insn(file, table_reloc->sym->sec, table_reloc->addend);
+ dest_insn = find_insn(file, table_reloc->sym->sec, reloc_addend(table_reloc));
if (!dest_insn || !insn_func(dest_insn) || insn_func(dest_insn)->pfunc != func)
continue;
@@ -2177,29 +2131,39 @@ static void mark_func_jump_tables(struct objtool_file *file,
continue;
reloc = find_jump_table(file, func, insn);
- if (reloc) {
- reloc->jump_table_start = true;
+ if (reloc)
insn->_jump_table = reloc;
- }
}
}
static int add_func_jump_tables(struct objtool_file *file,
struct symbol *func)
{
- struct instruction *insn;
- int ret;
+ struct instruction *insn, *insn_t1 = NULL, *insn_t2;
+ int ret = 0;
func_for_each_insn(file, func, insn) {
if (!insn_jump_table(insn))
continue;
- ret = add_jump_table(file, insn, insn_jump_table(insn));
+ if (!insn_t1) {
+ insn_t1 = insn;
+ continue;
+ }
+
+ insn_t2 = insn;
+
+ ret = add_jump_table(file, insn_t1, insn_jump_table(insn_t2));
if (ret)
return ret;
+
+ insn_t1 = insn_t2;
}
- return 0;
+ if (insn_t1)
+ ret = add_jump_table(file, insn_t1, NULL);
+
+ return ret;
}
/*
@@ -2240,7 +2204,7 @@ static void set_func_state(struct cfi_state *state)
static int read_unwind_hints(struct objtool_file *file)
{
struct cfi_state cfi = init_cfi;
- struct section *sec, *relocsec;
+ struct section *sec;
struct unwind_hint *hint;
struct instruction *insn;
struct reloc *reloc;
@@ -2250,8 +2214,7 @@ static int read_unwind_hints(struct objtool_file *file)
if (!sec)
return 0;
- relocsec = sec->reloc;
- if (!relocsec) {
+ if (!sec->rsec) {
WARN("missing .rela.discard.unwind_hints section");
return -1;
}
@@ -2272,7 +2235,7 @@ static int read_unwind_hints(struct objtool_file *file)
return -1;
}
- insn = find_insn(file, reloc->sym->sec, reloc->addend);
+ insn = find_insn(file, reloc->sym->sec, reloc_addend(reloc));
if (!insn) {
WARN("can't find insn for unwind_hints[%d]", i);
return -1;
@@ -2280,6 +2243,11 @@ static int read_unwind_hints(struct objtool_file *file)
insn->hint = true;
+ if (hint->type == UNWIND_HINT_TYPE_UNDEFINED) {
+ insn->cfi = &force_undefined_cfi;
+ continue;
+ }
+
if (hint->type == UNWIND_HINT_TYPE_SAVE) {
insn->hint = false;
insn->save = true;
@@ -2326,16 +2294,17 @@ static int read_unwind_hints(struct objtool_file *file)
static int read_noendbr_hints(struct objtool_file *file)
{
- struct section *sec;
struct instruction *insn;
+ struct section *rsec;
struct reloc *reloc;
- sec = find_section_by_name(file->elf, ".rela.discard.noendbr");
- if (!sec)
+ rsec = find_section_by_name(file->elf, ".rela.discard.noendbr");
+ if (!rsec)
return 0;
- list_for_each_entry(reloc, &sec->reloc_list, list) {
- insn = find_insn(file, reloc->sym->sec, reloc->sym->offset + reloc->addend);
+ for_each_reloc(rsec, reloc) {
+ insn = find_insn(file, reloc->sym->sec,
+ reloc->sym->offset + reloc_addend(reloc));
if (!insn) {
WARN("bad .discard.noendbr entry");
return -1;
@@ -2349,21 +2318,21 @@ static int read_noendbr_hints(struct objtool_file *file)
static int read_retpoline_hints(struct objtool_file *file)
{
- struct section *sec;
+ struct section *rsec;
struct instruction *insn;
struct reloc *reloc;
- sec = find_section_by_name(file->elf, ".rela.discard.retpoline_safe");
- if (!sec)
+ rsec = find_section_by_name(file->elf, ".rela.discard.retpoline_safe");
+ if (!rsec)
return 0;
- list_for_each_entry(reloc, &sec->reloc_list, list) {
+ for_each_reloc(rsec, reloc) {
if (reloc->sym->type != STT_SECTION) {
- WARN("unexpected relocation symbol type in %s", sec->name);
+ WARN("unexpected relocation symbol type in %s", rsec->name);
return -1;
}
- insn = find_insn(file, reloc->sym->sec, reloc->addend);
+ insn = find_insn(file, reloc->sym->sec, reloc_addend(reloc));
if (!insn) {
WARN("bad .discard.retpoline_safe entry");
return -1;
@@ -2385,21 +2354,21 @@ static int read_retpoline_hints(struct objtool_file *file)
static int read_instr_hints(struct objtool_file *file)
{
- struct section *sec;
+ struct section *rsec;
struct instruction *insn;
struct reloc *reloc;
- sec = find_section_by_name(file->elf, ".rela.discard.instr_end");
- if (!sec)
+ rsec = find_section_by_name(file->elf, ".rela.discard.instr_end");
+ if (!rsec)
return 0;
- list_for_each_entry(reloc, &sec->reloc_list, list) {
+ for_each_reloc(rsec, reloc) {
if (reloc->sym->type != STT_SECTION) {
- WARN("unexpected relocation symbol type in %s", sec->name);
+ WARN("unexpected relocation symbol type in %s", rsec->name);
return -1;
}
- insn = find_insn(file, reloc->sym->sec, reloc->addend);
+ insn = find_insn(file, reloc->sym->sec, reloc_addend(reloc));
if (!insn) {
WARN("bad .discard.instr_end entry");
return -1;
@@ -2408,17 +2377,17 @@ static int read_instr_hints(struct objtool_file *file)
insn->instr--;
}
- sec = find_section_by_name(file->elf, ".rela.discard.instr_begin");
- if (!sec)
+ rsec = find_section_by_name(file->elf, ".rela.discard.instr_begin");
+ if (!rsec)
return 0;
- list_for_each_entry(reloc, &sec->reloc_list, list) {
+ for_each_reloc(rsec, reloc) {
if (reloc->sym->type != STT_SECTION) {
- WARN("unexpected relocation symbol type in %s", sec->name);
+ WARN("unexpected relocation symbol type in %s", rsec->name);
return -1;
}
- insn = find_insn(file, reloc->sym->sec, reloc->addend);
+ insn = find_insn(file, reloc->sym->sec, reloc_addend(reloc));
if (!insn) {
WARN("bad .discard.instr_begin entry");
return -1;
@@ -2432,21 +2401,21 @@ static int read_instr_hints(struct objtool_file *file)
static int read_validate_unret_hints(struct objtool_file *file)
{
- struct section *sec;
+ struct section *rsec;
struct instruction *insn;
struct reloc *reloc;
- sec = find_section_by_name(file->elf, ".rela.discard.validate_unret");
- if (!sec)
+ rsec = find_section_by_name(file->elf, ".rela.discard.validate_unret");
+ if (!rsec)
return 0;
- list_for_each_entry(reloc, &sec->reloc_list, list) {
+ for_each_reloc(rsec, reloc) {
if (reloc->sym->type != STT_SECTION) {
- WARN("unexpected relocation symbol type in %s", sec->name);
+ WARN("unexpected relocation symbol type in %s", rsec->name);
return -1;
}
- insn = find_insn(file, reloc->sym->sec, reloc->addend);
+ insn = find_insn(file, reloc->sym->sec, reloc_addend(reloc));
if (!insn) {
WARN("bad .discard.instr_end entry");
return -1;
@@ -2461,23 +2430,23 @@ static int read_validate_unret_hints(struct objtool_file *file)
static int read_intra_function_calls(struct objtool_file *file)
{
struct instruction *insn;
- struct section *sec;
+ struct section *rsec;
struct reloc *reloc;
- sec = find_section_by_name(file->elf, ".rela.discard.intra_function_calls");
- if (!sec)
+ rsec = find_section_by_name(file->elf, ".rela.discard.intra_function_calls");
+ if (!rsec)
return 0;
- list_for_each_entry(reloc, &sec->reloc_list, list) {
+ for_each_reloc(rsec, reloc) {
unsigned long dest_off;
if (reloc->sym->type != STT_SECTION) {
WARN("unexpected relocation symbol type in %s",
- sec->name);
+ rsec->name);
return -1;
}
- insn = find_insn(file, reloc->sym->sec, reloc->addend);
+ insn = find_insn(file, reloc->sym->sec, reloc_addend(reloc));
if (!insn) {
WARN("bad .discard.intra_function_call entry");
return -1;
@@ -2550,6 +2519,9 @@ static int classify_symbols(struct objtool_file *file)
if (arch_is_rethunk(func))
func->return_thunk = true;
+ if (arch_is_embedded_insn(func))
+ func->embedded_insn = true;
+
if (arch_ftrace_match(func->name))
func->fentry = true;
@@ -2678,12 +2650,17 @@ static int decode_sections(struct objtool_file *file)
return 0;
}
-static bool is_fentry_call(struct instruction *insn)
+static bool is_special_call(struct instruction *insn)
{
- if (insn->type == INSN_CALL &&
- insn_call_dest(insn) &&
- insn_call_dest(insn)->fentry)
- return true;
+ if (insn->type == INSN_CALL) {
+ struct symbol *dest = insn_call_dest(insn);
+
+ if (!dest)
+ return false;
+
+ if (dest->fentry || dest->embedded_insn)
+ return true;
+ }
return false;
}
@@ -2833,6 +2810,10 @@ static int update_cfi_state(struct instruction *insn,
struct cfi_reg *cfa = &cfi->cfa;
struct cfi_reg *regs = cfi->regs;
+ /* ignore UNWIND_HINT_UNDEFINED regions */
+ if (cfi->force_undefined)
+ return 0;
+
/* stack operations don't make sense with an undefined CFA */
if (cfa->base == CFI_UNDEFINED) {
if (insn_func(insn)) {
@@ -3369,15 +3350,15 @@ static inline bool func_uaccess_safe(struct symbol *func)
static inline const char *call_dest_name(struct instruction *insn)
{
static char pvname[19];
- struct reloc *rel;
+ struct reloc *reloc;
int idx;
if (insn_call_dest(insn))
return insn_call_dest(insn)->name;
- rel = insn_reloc(NULL, insn);
- if (rel && !strcmp(rel->sym->name, "pv_ops")) {
- idx = (rel->addend / sizeof(void *));
+ reloc = insn_reloc(NULL, insn);
+ if (reloc && !strcmp(reloc->sym->name, "pv_ops")) {
+ idx = (reloc_addend(reloc) / sizeof(void *));
snprintf(pvname, sizeof(pvname), "pv_ops[%d]", idx);
return pvname;
}
@@ -3388,14 +3369,14 @@ static inline const char *call_dest_name(struct instruction *insn)
static bool pv_call_dest(struct objtool_file *file, struct instruction *insn)
{
struct symbol *target;
- struct reloc *rel;
+ struct reloc *reloc;
int idx;
- rel = insn_reloc(file, insn);
- if (!rel || strcmp(rel->sym->name, "pv_ops"))
+ reloc = insn_reloc(file, insn);
+ if (!reloc || strcmp(reloc->sym->name, "pv_ops"))
return false;
- idx = (arch_dest_reloc_offset(rel->addend) / sizeof(void *));
+ idx = (arch_dest_reloc_offset(reloc_addend(reloc)) / sizeof(void *));
if (file->pv_ops[idx].clean)
return true;
@@ -3657,8 +3638,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
ret = validate_branch(file, func, alt->insn, state);
if (ret) {
- if (opts.backtrace)
- BT_FUNC("(alt)", insn);
+ BT_INSN(insn, "(alt)");
return ret;
}
}
@@ -3681,7 +3661,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
if (ret)
return ret;
- if (opts.stackval && func && !is_fentry_call(insn) &&
+ if (opts.stackval && func && !is_special_call(insn) &&
!has_valid_stack_frame(&state)) {
WARN_INSN(insn, "call without frame pointer save/setup");
return 1;
@@ -3703,8 +3683,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
ret = validate_branch(file, func,
insn->jump_dest, state);
if (ret) {
- if (opts.backtrace)
- BT_FUNC("(branch)", insn);
+ BT_INSN(insn, "(branch)");
return ret;
}
}
@@ -3802,8 +3781,8 @@ static int validate_unwind_hint(struct objtool_file *file,
{
if (insn->hint && !insn->visited && !insn->ignore) {
int ret = validate_branch(file, insn_func(insn), insn, *state);
- if (ret && opts.backtrace)
- BT_FUNC("<=== (hint)", insn);
+ if (ret)
+ BT_INSN(insn, "<=== (hint)");
return ret;
}
@@ -3841,7 +3820,7 @@ static int validate_unwind_hints(struct objtool_file *file, struct section *sec)
static int validate_unret(struct objtool_file *file, struct instruction *insn)
{
struct instruction *next, *dest;
- int ret, warnings = 0;
+ int ret;
for (;;) {
next = next_insn_to_validate(file, insn);
@@ -3861,8 +3840,7 @@ static int validate_unret(struct objtool_file *file, struct instruction *insn)
ret = validate_unret(file, alt->insn);
if (ret) {
- if (opts.backtrace)
- BT_FUNC("(alt)", insn);
+ BT_INSN(insn, "(alt)");
return ret;
}
}
@@ -3888,10 +3866,8 @@ static int validate_unret(struct objtool_file *file, struct instruction *insn)
}
ret = validate_unret(file, insn->jump_dest);
if (ret) {
- if (opts.backtrace) {
- BT_FUNC("(branch%s)", insn,
- insn->type == INSN_JUMP_CONDITIONAL ? "-cond" : "");
- }
+ BT_INSN(insn, "(branch%s)",
+ insn->type == INSN_JUMP_CONDITIONAL ? "-cond" : "");
return ret;
}
@@ -3913,8 +3889,7 @@ static int validate_unret(struct objtool_file *file, struct instruction *insn)
ret = validate_unret(file, dest);
if (ret) {
- if (opts.backtrace)
- BT_FUNC("(call)", insn);
+ BT_INSN(insn, "(call)");
return ret;
}
/*
@@ -3943,7 +3918,7 @@ static int validate_unret(struct objtool_file *file, struct instruction *insn)
insn = next;
}
- return warnings;
+ return 0;
}
/*
@@ -4178,7 +4153,6 @@ static int add_prefix_symbols(struct objtool_file *file)
{
struct section *sec;
struct symbol *func;
- int warnings = 0;
for_each_sec(file, sec) {
if (!(sec->sh.sh_flags & SHF_EXECINSTR))
@@ -4192,7 +4166,7 @@ static int add_prefix_symbols(struct objtool_file *file)
}
}
- return warnings;
+ return 0;
}
static int validate_symbol(struct objtool_file *file, struct section *sec,
@@ -4216,8 +4190,8 @@ static int validate_symbol(struct objtool_file *file, struct section *sec,
state->uaccess = sym->uaccess_safe;
ret = validate_branch(file, insn_func(insn), insn, *state);
- if (ret && opts.backtrace)
- BT_FUNC("<=== (sym)", insn);
+ if (ret)
+ BT_INSN(insn, "<=== (sym)");
return ret;
}
@@ -4333,8 +4307,8 @@ static int validate_ibt_insn(struct objtool_file *file, struct instruction *insn
for (reloc = insn_reloc(file, insn);
reloc;
reloc = find_reloc_by_dest_range(file->elf, insn->sec,
- reloc->offset + 1,
- (insn->offset + insn->len) - (reloc->offset + 1))) {
+ reloc_offset(reloc) + 1,
+ (insn->offset + insn->len) - (reloc_offset(reloc) + 1))) {
/*
* static_call_update() references the trampoline, which
@@ -4344,10 +4318,11 @@ static int validate_ibt_insn(struct objtool_file *file, struct instruction *insn
continue;
off = reloc->sym->offset;
- if (reloc->type == R_X86_64_PC32 || reloc->type == R_X86_64_PLT32)
- off += arch_dest_reloc_offset(reloc->addend);
+ if (reloc_type(reloc) == R_X86_64_PC32 ||
+ reloc_type(reloc) == R_X86_64_PLT32)
+ off += arch_dest_reloc_offset(reloc_addend(reloc));
else
- off += reloc->addend;
+ off += reloc_addend(reloc);
dest = find_insn(file, reloc->sym->sec, off);
if (!dest)
@@ -4404,7 +4379,7 @@ static int validate_ibt_data_reloc(struct objtool_file *file,
struct instruction *dest;
dest = find_insn(file, reloc->sym->sec,
- reloc->sym->offset + reloc->addend);
+ reloc->sym->offset + reloc_addend(reloc));
if (!dest)
return 0;
@@ -4417,7 +4392,7 @@ static int validate_ibt_data_reloc(struct objtool_file *file,
return 0;
WARN_FUNC("data relocation to !ENDBR: %s",
- reloc->sec->base, reloc->offset,
+ reloc->sec->base, reloc_offset(reloc),
offstr(dest->sec, dest->offset));
return 1;
@@ -4444,7 +4419,7 @@ static int validate_ibt(struct objtool_file *file)
if (sec->sh.sh_flags & SHF_EXECINSTR)
continue;
- if (!sec->reloc)
+ if (!sec->rsec)
continue;
/*
@@ -4471,7 +4446,7 @@ static int validate_ibt(struct objtool_file *file)
strstr(sec->name, "__patchable_function_entries"))
continue;
- list_for_each_entry(reloc, &sec->reloc->reloc_list, list)
+ for_each_reloc(sec->rsec, reloc)
warnings += validate_ibt_data_reloc(file, reloc);
}
@@ -4511,9 +4486,40 @@ static int validate_sls(struct objtool_file *file)
return warnings;
}
+static bool ignore_noreturn_call(struct instruction *insn)
+{
+ struct symbol *call_dest = insn_call_dest(insn);
+
+ /*
+ * FIXME: hack, we need a real noreturn solution
+ *
+ * Problem is, exc_double_fault() may or may not return, depending on
+ * whether CONFIG_X86_ESPFIX64 is set. But objtool has no visibility
+ * to the kernel config.
+ *
+ * Other potential ways to fix it:
+ *
+ * - have compiler communicate __noreturn functions somehow
+ * - remove CONFIG_X86_ESPFIX64
+ * - read the .config file
+ * - add a cmdline option
+ * - create a generic objtool annotation format (vs a bunch of custom
+ * formats) and annotate it
+ */
+ if (!strcmp(call_dest->name, "exc_double_fault")) {
+ /* prevent further unreachable warnings for the caller */
+ insn->sym->warned = 1;
+ return true;
+ }
+
+ return false;
+}
+
static int validate_reachable_instructions(struct objtool_file *file)
{
- struct instruction *insn;
+ struct instruction *insn, *prev_insn;
+ struct symbol *call_dest;
+ int warnings = 0;
if (file->ignore_unreachables)
return 0;
@@ -4522,13 +4528,127 @@ static int validate_reachable_instructions(struct objtool_file *file)
if (insn->visited || ignore_unreachable_insn(file, insn))
continue;
+ prev_insn = prev_insn_same_sec(file, insn);
+ if (prev_insn && prev_insn->dead_end) {
+ call_dest = insn_call_dest(prev_insn);
+ if (call_dest && !ignore_noreturn_call(prev_insn)) {
+ WARN_INSN(insn, "%s() is missing a __noreturn annotation",
+ call_dest->name);
+ warnings++;
+ continue;
+ }
+ }
+
WARN_INSN(insn, "unreachable instruction");
- return 1;
+ warnings++;
+ }
+
+ return warnings;
+}
+
+/* 'funcs' is a space-separated list of function names */
+static int disas_funcs(const char *funcs)
+{
+ const char *objdump_str, *cross_compile;
+ int size, ret;
+ char *cmd;
+
+ cross_compile = getenv("CROSS_COMPILE");
+
+ objdump_str = "%sobjdump -wdr %s | gawk -M -v _funcs='%s' '"
+ "BEGIN { split(_funcs, funcs); }"
+ "/^$/ { func_match = 0; }"
+ "/<.*>:/ { "
+ "f = gensub(/.*<(.*)>:/, \"\\\\1\", 1);"
+ "for (i in funcs) {"
+ "if (funcs[i] == f) {"
+ "func_match = 1;"
+ "base = strtonum(\"0x\" $1);"
+ "break;"
+ "}"
+ "}"
+ "}"
+ "{"
+ "if (func_match) {"
+ "addr = strtonum(\"0x\" $1);"
+ "printf(\"%%04x \", addr - base);"
+ "print;"
+ "}"
+ "}' 1>&2";
+
+ /* fake snprintf() to calculate the size */
+ size = snprintf(NULL, 0, objdump_str, cross_compile, objname, funcs) + 1;
+ if (size <= 0) {
+ WARN("objdump string size calculation failed");
+ return -1;
+ }
+
+ cmd = malloc(size);
+
+ /* real snprintf() */
+ snprintf(cmd, size, objdump_str, cross_compile, objname, funcs);
+ ret = system(cmd);
+ if (ret) {
+ WARN("disassembly failed: %d", ret);
+ return -1;
}
return 0;
}
+static int disas_warned_funcs(struct objtool_file *file)
+{
+ struct symbol *sym;
+ char *funcs = NULL, *tmp;
+
+ for_each_sym(file, sym) {
+ if (sym->warned) {
+ if (!funcs) {
+ funcs = malloc(strlen(sym->name) + 1);
+ strcpy(funcs, sym->name);
+ } else {
+ tmp = malloc(strlen(funcs) + strlen(sym->name) + 2);
+ sprintf(tmp, "%s %s", funcs, sym->name);
+ free(funcs);
+ funcs = tmp;
+ }
+ }
+ }
+
+ if (funcs)
+ disas_funcs(funcs);
+
+ return 0;
+}
+
+struct insn_chunk {
+ void *addr;
+ struct insn_chunk *next;
+};
+
+/*
+ * Reduce peak RSS usage by freeing insns memory before writing the ELF file,
+ * which can trigger more allocations for .debug_* sections whose data hasn't
+ * been read yet.
+ */
+static void free_insns(struct objtool_file *file)
+{
+ struct instruction *insn;
+ struct insn_chunk *chunks = NULL, *chunk;
+
+ for_each_insn(file, insn) {
+ if (!insn->idx) {
+ chunk = malloc(sizeof(*chunk));
+ chunk->addr = insn;
+ chunk->next = chunks;
+ chunks = chunk;
+ }
+ }
+
+ for (chunk = chunks; chunk; chunk = chunk->next)
+ free(chunk->addr);
+}
+
int check(struct objtool_file *file)
{
int ret, warnings = 0;
@@ -4537,6 +4657,8 @@ int check(struct objtool_file *file)
init_cfi_state(&init_cfi);
init_cfi_state(&func_cfi);
set_func_state(&func_cfi);
+ init_cfi_state(&force_undefined_cfi);
+ force_undefined_cfi.force_undefined = true;
if (!cfi_hash_alloc(1UL << (file->elf->symbol_bits - 3)))
goto out;
@@ -4673,6 +4795,10 @@ int check(struct objtool_file *file)
warnings += ret;
}
+ free_insns(file);
+
+ if (opts.verbose)
+ disas_warned_funcs(file);
if (opts.stats) {
printf("nr_insns_visited: %ld\n", nr_insns_visited);
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index 500e92979a31..081befa4674b 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -32,16 +32,52 @@ static inline u32 str_hash(const char *str)
#define __elf_table(name) (elf->name##_hash)
#define __elf_bits(name) (elf->name##_bits)
-#define elf_hash_add(name, node, key) \
- hlist_add_head(node, &__elf_table(name)[hash_min(key, __elf_bits(name))])
+#define __elf_table_entry(name, key) \
+ __elf_table(name)[hash_min(key, __elf_bits(name))]
+
+#define elf_hash_add(name, node, key) \
+({ \
+ struct elf_hash_node *__node = node; \
+ __node->next = __elf_table_entry(name, key); \
+ __elf_table_entry(name, key) = __node; \
+})
+
+static inline void __elf_hash_del(struct elf_hash_node *node,
+ struct elf_hash_node **head)
+{
+ struct elf_hash_node *cur, *prev;
+
+ if (node == *head) {
+ *head = node->next;
+ return;
+ }
+
+ for (prev = NULL, cur = *head; cur; prev = cur, cur = cur->next) {
+ if (cur == node) {
+ prev->next = cur->next;
+ break;
+ }
+ }
+}
-#define elf_hash_for_each_possible(name, obj, member, key) \
- hlist_for_each_entry(obj, &__elf_table(name)[hash_min(key, __elf_bits(name))], member)
+#define elf_hash_del(name, node, key) \
+ __elf_hash_del(node, &__elf_table_entry(name, key))
+
+#define elf_list_entry(ptr, type, member) \
+({ \
+ typeof(ptr) __ptr = (ptr); \
+ __ptr ? container_of(__ptr, type, member) : NULL; \
+})
+
+#define elf_hash_for_each_possible(name, obj, member, key) \
+ for (obj = elf_list_entry(__elf_table_entry(name, key), typeof(*obj), member); \
+ obj; \
+ obj = elf_list_entry(obj->member.next, typeof(*(obj)), member))
#define elf_alloc_hash(name, size) \
({ \
__elf_bits(name) = max(10, ilog2(size)); \
- __elf_table(name) = mmap(NULL, sizeof(struct hlist_head) << __elf_bits(name), \
+ __elf_table(name) = mmap(NULL, sizeof(struct elf_hash_node *) << __elf_bits(name), \
PROT_READ|PROT_WRITE, \
MAP_PRIVATE|MAP_ANON, -1, 0); \
if (__elf_table(name) == (void *)-1L) { \
@@ -233,21 +269,22 @@ struct reloc *find_reloc_by_dest_range(const struct elf *elf, struct section *se
unsigned long offset, unsigned int len)
{
struct reloc *reloc, *r = NULL;
+ struct section *rsec;
unsigned long o;
- if (!sec->reloc)
+ rsec = sec->rsec;
+ if (!rsec)
return NULL;
- sec = sec->reloc;
-
for_offset_range(o, offset, offset + len) {
elf_hash_for_each_possible(reloc, reloc, hash,
- sec_offset_hash(sec, o)) {
- if (reloc->sec != sec)
+ sec_offset_hash(rsec, o)) {
+ if (reloc->sec != rsec)
continue;
- if (reloc->offset >= offset && reloc->offset < offset + len) {
- if (!r || reloc->offset < r->offset)
+ if (reloc_offset(reloc) >= offset &&
+ reloc_offset(reloc) < offset + len) {
+ if (!r || reloc_offset(reloc) < reloc_offset(r))
r = reloc;
}
}
@@ -263,6 +300,11 @@ struct reloc *find_reloc_by_dest(const struct elf *elf, struct section *sec, uns
return find_reloc_by_dest_range(elf, sec, offset, 1);
}
+static bool is_dwarf_section(struct section *sec)
+{
+ return !strncmp(sec->name, ".debug_", 7);
+}
+
static int read_sections(struct elf *elf)
{
Elf_Scn *s = NULL;
@@ -293,7 +335,6 @@ static int read_sections(struct elf *elf)
sec = &elf->section_data[i];
INIT_LIST_HEAD(&sec->symbol_list);
- INIT_LIST_HEAD(&sec->reloc_list);
s = elf_getscn(elf->elf, i);
if (!s) {
@@ -314,7 +355,7 @@ static int read_sections(struct elf *elf)
return -1;
}
- if (sec->sh.sh_size != 0) {
+ if (sec->sh.sh_size != 0 && !is_dwarf_section(sec)) {
sec->data = elf_getdata(s, NULL);
if (!sec->data) {
WARN_ELF("elf_getdata");
@@ -328,12 +369,12 @@ static int read_sections(struct elf *elf)
}
}
- if (sec->sh.sh_flags & SHF_EXECINSTR)
- elf->text_size += sec->sh.sh_size;
-
list_add_tail(&sec->list, &elf->sections);
elf_hash_add(section, &sec->hash, sec->idx);
elf_hash_add(section_name, &sec->name_hash, str_hash(sec->name));
+
+ if (is_reloc_sec(sec))
+ elf->num_relocs += sec_num_entries(sec);
}
if (opts.stats) {
@@ -356,7 +397,6 @@ static void elf_add_symbol(struct elf *elf, struct symbol *sym)
struct rb_node *pnode;
struct symbol *iter;
- INIT_LIST_HEAD(&sym->reloc_list);
INIT_LIST_HEAD(&sym->pv_target);
sym->alias = sym;
@@ -407,7 +447,7 @@ static int read_symbols(struct elf *elf)
if (symtab_shndx)
shndx_data = symtab_shndx->data;
- symbols_nr = symtab->sh.sh_size / symtab->sh.sh_entsize;
+ symbols_nr = sec_num_entries(symtab);
} else {
/*
* A missing symbol table is actually possible if it's an empty
@@ -533,52 +573,17 @@ err:
return -1;
}
-static struct section *elf_create_reloc_section(struct elf *elf,
- struct section *base,
- int reltype);
-
-int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset,
- unsigned int type, struct symbol *sym, s64 addend)
-{
- struct reloc *reloc;
-
- if (!sec->reloc && !elf_create_reloc_section(elf, sec, SHT_RELA))
- return -1;
-
- reloc = malloc(sizeof(*reloc));
- if (!reloc) {
- perror("malloc");
- return -1;
- }
- memset(reloc, 0, sizeof(*reloc));
-
- reloc->sec = sec->reloc;
- reloc->offset = offset;
- reloc->type = type;
- reloc->sym = sym;
- reloc->addend = addend;
-
- list_add_tail(&reloc->sym_reloc_entry, &sym->reloc_list);
- list_add_tail(&reloc->list, &sec->reloc->reloc_list);
- elf_hash_add(reloc, &reloc->hash, reloc_hash(reloc));
-
- sec->reloc->sh.sh_size += sec->reloc->sh.sh_entsize;
- sec->reloc->changed = true;
-
- return 0;
-}
-
/*
- * Ensure that any reloc section containing references to @sym is marked
- * changed such that it will get re-generated in elf_rebuild_reloc_sections()
- * with the new symbol index.
+ * @sym's idx has changed. Update the relocs which reference it.
*/
-static void elf_dirty_reloc_sym(struct elf *elf, struct symbol *sym)
+static int elf_update_sym_relocs(struct elf *elf, struct symbol *sym)
{
struct reloc *reloc;
- list_for_each_entry(reloc, &sym->reloc_list, sym_reloc_entry)
- reloc->sec->changed = true;
+ for (reloc = sym->relocs; reloc; reloc = reloc->sym_next_reloc)
+ set_reloc_sym(elf, reloc, reloc->sym->idx);
+
+ return 0;
}
/*
@@ -655,7 +660,7 @@ static int elf_update_symbol(struct elf *elf, struct section *symtab,
symtab_data->d_align = 1;
symtab_data->d_type = ELF_T_SYM;
- symtab->changed = true;
+ mark_sec_changed(elf, symtab, true);
symtab->truncate = true;
if (t) {
@@ -670,7 +675,7 @@ static int elf_update_symbol(struct elf *elf, struct section *symtab,
shndx_data->d_align = sizeof(Elf32_Word);
shndx_data->d_type = ELF_T_WORD;
- symtab_shndx->changed = true;
+ mark_sec_changed(elf, symtab_shndx, true);
symtab_shndx->truncate = true;
}
@@ -734,7 +739,7 @@ __elf_create_symbol(struct elf *elf, struct symbol *sym)
return NULL;
}
- new_idx = symtab->sh.sh_size / symtab->sh.sh_entsize;
+ new_idx = sec_num_entries(symtab);
if (GELF_ST_BIND(sym->sym.st_info) != STB_LOCAL)
goto non_local;
@@ -746,18 +751,19 @@ __elf_create_symbol(struct elf *elf, struct symbol *sym)
first_non_local = symtab->sh.sh_info;
old = find_symbol_by_index(elf, first_non_local);
if (old) {
- old->idx = new_idx;
- hlist_del(&old->hash);
- elf_hash_add(symbol, &old->hash, old->idx);
-
- elf_dirty_reloc_sym(elf, old);
+ elf_hash_del(symbol, &old->hash, old->idx);
+ elf_hash_add(symbol, &old->hash, new_idx);
+ old->idx = new_idx;
if (elf_update_symbol(elf, symtab, symtab_shndx, old)) {
WARN("elf_update_symbol move");
return NULL;
}
+ if (elf_update_sym_relocs(elf, old))
+ return NULL;
+
new_idx = first_non_local;
}
@@ -774,11 +780,11 @@ non_local:
}
symtab->sh.sh_size += symtab->sh.sh_entsize;
- symtab->changed = true;
+ mark_sec_changed(elf, symtab, true);
if (symtab_shndx) {
symtab_shndx->sh.sh_size += sizeof(Elf32_Word);
- symtab_shndx->changed = true;
+ mark_sec_changed(elf, symtab_shndx, true);
}
return sym;
@@ -841,13 +847,57 @@ elf_create_prefix_symbol(struct elf *elf, struct symbol *orig, long size)
return sym;
}
-int elf_add_reloc_to_insn(struct elf *elf, struct section *sec,
- unsigned long offset, unsigned int type,
- struct section *insn_sec, unsigned long insn_off)
+static struct reloc *elf_init_reloc(struct elf *elf, struct section *rsec,
+ unsigned int reloc_idx,
+ unsigned long offset, struct symbol *sym,
+ s64 addend, unsigned int type)
+{
+ struct reloc *reloc, empty = { 0 };
+
+ if (reloc_idx >= sec_num_entries(rsec)) {
+ WARN("%s: bad reloc_idx %u for %s with %d relocs",
+ __func__, reloc_idx, rsec->name, sec_num_entries(rsec));
+ return NULL;
+ }
+
+ reloc = &rsec->relocs[reloc_idx];
+
+ if (memcmp(reloc, &empty, sizeof(empty))) {
+ WARN("%s: %s: reloc %d already initialized!",
+ __func__, rsec->name, reloc_idx);
+ return NULL;
+ }
+
+ reloc->sec = rsec;
+ reloc->sym = sym;
+
+ set_reloc_offset(elf, reloc, offset);
+ set_reloc_sym(elf, reloc, sym->idx);
+ set_reloc_type(elf, reloc, type);
+ set_reloc_addend(elf, reloc, addend);
+
+ elf_hash_add(reloc, &reloc->hash, reloc_hash(reloc));
+ reloc->sym_next_reloc = sym->relocs;
+ sym->relocs = reloc;
+
+ return reloc;
+}
+
+struct reloc *elf_init_reloc_text_sym(struct elf *elf, struct section *sec,
+ unsigned long offset,
+ unsigned int reloc_idx,
+ struct section *insn_sec,
+ unsigned long insn_off)
{
struct symbol *sym = insn_sec->sym;
int addend = insn_off;
+ if (!(insn_sec->sh.sh_flags & SHF_EXECINSTR)) {
+ WARN("bad call to %s() for data symbol %s",
+ __func__, sym->name);
+ return NULL;
+ }
+
if (!sym) {
/*
* Due to how weak functions work, we must use section based
@@ -857,108 +907,86 @@ int elf_add_reloc_to_insn(struct elf *elf, struct section *sec,
*/
sym = elf_create_section_symbol(elf, insn_sec);
if (!sym)
- return -1;
+ return NULL;
insn_sec->sym = sym;
}
- return elf_add_reloc(elf, sec, offset, type, sym, addend);
+ return elf_init_reloc(elf, sec->rsec, reloc_idx, offset, sym, addend,
+ elf_text_rela_type(elf));
}
-static int read_rel_reloc(struct section *sec, int i, struct reloc *reloc, unsigned int *symndx)
+struct reloc *elf_init_reloc_data_sym(struct elf *elf, struct section *sec,
+ unsigned long offset,
+ unsigned int reloc_idx,
+ struct symbol *sym,
+ s64 addend)
{
- if (!gelf_getrel(sec->data, i, &reloc->rel)) {
- WARN_ELF("gelf_getrel");
- return -1;
+ if (sym->sec && (sec->sh.sh_flags & SHF_EXECINSTR)) {
+ WARN("bad call to %s() for text symbol %s",
+ __func__, sym->name);
+ return NULL;
}
- reloc->type = GELF_R_TYPE(reloc->rel.r_info);
- reloc->addend = 0;
- reloc->offset = reloc->rel.r_offset;
- *symndx = GELF_R_SYM(reloc->rel.r_info);
- return 0;
-}
-static int read_rela_reloc(struct section *sec, int i, struct reloc *reloc, unsigned int *symndx)
-{
- if (!gelf_getrela(sec->data, i, &reloc->rela)) {
- WARN_ELF("gelf_getrela");
- return -1;
- }
- reloc->type = GELF_R_TYPE(reloc->rela.r_info);
- reloc->addend = reloc->rela.r_addend;
- reloc->offset = reloc->rela.r_offset;
- *symndx = GELF_R_SYM(reloc->rela.r_info);
- return 0;
+ return elf_init_reloc(elf, sec->rsec, reloc_idx, offset, sym, addend,
+ elf_data_rela_type(elf));
}
static int read_relocs(struct elf *elf)
{
- unsigned long nr_reloc, max_reloc = 0, tot_reloc = 0;
- struct section *sec;
+ unsigned long nr_reloc, max_reloc = 0;
+ struct section *rsec;
struct reloc *reloc;
unsigned int symndx;
struct symbol *sym;
int i;
- if (!elf_alloc_hash(reloc, elf->text_size / 16))
+ if (!elf_alloc_hash(reloc, elf->num_relocs))
return -1;
- list_for_each_entry(sec, &elf->sections, list) {
- if ((sec->sh.sh_type != SHT_RELA) &&
- (sec->sh.sh_type != SHT_REL))
+ list_for_each_entry(rsec, &elf->sections, list) {
+ if (!is_reloc_sec(rsec))
continue;
- sec->base = find_section_by_index(elf, sec->sh.sh_info);
- if (!sec->base) {
+ rsec->base = find_section_by_index(elf, rsec->sh.sh_info);
+ if (!rsec->base) {
WARN("can't find base section for reloc section %s",
- sec->name);
+ rsec->name);
return -1;
}
- sec->base->reloc = sec;
+ rsec->base->rsec = rsec;
nr_reloc = 0;
- sec->reloc_data = calloc(sec->sh.sh_size / sec->sh.sh_entsize, sizeof(*reloc));
- if (!sec->reloc_data) {
+ rsec->relocs = calloc(sec_num_entries(rsec), sizeof(*reloc));
+ if (!rsec->relocs) {
perror("calloc");
return -1;
}
- for (i = 0; i < sec->sh.sh_size / sec->sh.sh_entsize; i++) {
- reloc = &sec->reloc_data[i];
- switch (sec->sh.sh_type) {
- case SHT_REL:
- if (read_rel_reloc(sec, i, reloc, &symndx))
- return -1;
- break;
- case SHT_RELA:
- if (read_rela_reloc(sec, i, reloc, &symndx))
- return -1;
- break;
- default: return -1;
- }
+ for (i = 0; i < sec_num_entries(rsec); i++) {
+ reloc = &rsec->relocs[i];
- reloc->sec = sec;
- reloc->idx = i;
+ reloc->sec = rsec;
+ symndx = reloc_sym(reloc);
reloc->sym = sym = find_symbol_by_index(elf, symndx);
if (!reloc->sym) {
WARN("can't find reloc entry symbol %d for %s",
- symndx, sec->name);
+ symndx, rsec->name);
return -1;
}
- list_add_tail(&reloc->sym_reloc_entry, &sym->reloc_list);
- list_add_tail(&reloc->list, &sec->reloc_list);
elf_hash_add(reloc, &reloc->hash, reloc_hash(reloc));
+ reloc->sym_next_reloc = sym->relocs;
+ sym->relocs = reloc;
nr_reloc++;
}
max_reloc = max(max_reloc, nr_reloc);
- tot_reloc += nr_reloc;
}
if (opts.stats) {
printf("max_reloc: %lu\n", max_reloc);
- printf("tot_reloc: %lu\n", tot_reloc);
+ printf("num_relocs: %lu\n", elf->num_relocs);
printf("reloc_bits: %d\n", elf->reloc_bits);
}
@@ -977,7 +1005,7 @@ struct elf *elf_open_read(const char *name, int flags)
perror("malloc");
return NULL;
}
- memset(elf, 0, offsetof(struct elf, sections));
+ memset(elf, 0, sizeof(*elf));
INIT_LIST_HEAD(&elf->sections);
@@ -1053,13 +1081,14 @@ static int elf_add_string(struct elf *elf, struct section *strtab, char *str)
len = strtab->sh.sh_size;
strtab->sh.sh_size += data->d_size;
- strtab->changed = true;
+
+ mark_sec_changed(elf, strtab, true);
return len;
}
struct section *elf_create_section(struct elf *elf, const char *name,
- unsigned int sh_flags, size_t entsize, int nr)
+ size_t entsize, unsigned int nr)
{
struct section *sec, *shstrtab;
size_t size = entsize * nr;
@@ -1073,7 +1102,6 @@ struct section *elf_create_section(struct elf *elf, const char *name,
memset(sec, 0, sizeof(*sec));
INIT_LIST_HEAD(&sec->symbol_list);
- INIT_LIST_HEAD(&sec->reloc_list);
s = elf_newscn(elf->elf);
if (!s) {
@@ -1088,7 +1116,6 @@ struct section *elf_create_section(struct elf *elf, const char *name,
}
sec->idx = elf_ndxscn(s);
- sec->changed = true;
sec->data = elf_newdata(s);
if (!sec->data) {
@@ -1117,7 +1144,7 @@ struct section *elf_create_section(struct elf *elf, const char *name,
sec->sh.sh_entsize = entsize;
sec->sh.sh_type = SHT_PROGBITS;
sec->sh.sh_addralign = 1;
- sec->sh.sh_flags = SHF_ALLOC | sh_flags;
+ sec->sh.sh_flags = SHF_ALLOC;
/* Add section name to .shstrtab (or .strtab for Clang) */
shstrtab = find_section_by_name(elf, ".shstrtab");
@@ -1135,158 +1162,66 @@ struct section *elf_create_section(struct elf *elf, const char *name,
elf_hash_add(section, &sec->hash, sec->idx);
elf_hash_add(section_name, &sec->name_hash, str_hash(sec->name));
- elf->changed = true;
+ mark_sec_changed(elf, sec, true);
return sec;
}
-static struct section *elf_create_rel_reloc_section(struct elf *elf, struct section *base)
+static struct section *elf_create_rela_section(struct elf *elf,
+ struct section *sec,
+ unsigned int reloc_nr)
{
- char *relocname;
- struct section *sec;
+ struct section *rsec;
+ char *rsec_name;
- relocname = malloc(strlen(base->name) + strlen(".rel") + 1);
- if (!relocname) {
+ rsec_name = malloc(strlen(sec->name) + strlen(".rela") + 1);
+ if (!rsec_name) {
perror("malloc");
return NULL;
}
- strcpy(relocname, ".rel");
- strcat(relocname, base->name);
+ strcpy(rsec_name, ".rela");
+ strcat(rsec_name, sec->name);
- sec = elf_create_section(elf, relocname, 0, sizeof(GElf_Rel), 0);
- free(relocname);
- if (!sec)
+ rsec = elf_create_section(elf, rsec_name, elf_rela_size(elf), reloc_nr);
+ free(rsec_name);
+ if (!rsec)
return NULL;
- base->reloc = sec;
- sec->base = base;
+ rsec->data->d_type = ELF_T_RELA;
+ rsec->sh.sh_type = SHT_RELA;
+ rsec->sh.sh_addralign = elf_addr_size(elf);
+ rsec->sh.sh_link = find_section_by_name(elf, ".symtab")->idx;
+ rsec->sh.sh_info = sec->idx;
+ rsec->sh.sh_flags = SHF_INFO_LINK;
+
+ rsec->relocs = calloc(sec_num_entries(rsec), sizeof(struct reloc));
+ if (!rsec->relocs) {
+ perror("calloc");
+ return NULL;
+ }
- sec->sh.sh_type = SHT_REL;
- sec->sh.sh_addralign = 8;
- sec->sh.sh_link = find_section_by_name(elf, ".symtab")->idx;
- sec->sh.sh_info = base->idx;
- sec->sh.sh_flags = SHF_INFO_LINK;
+ sec->rsec = rsec;
+ rsec->base = sec;
- return sec;
+ return rsec;
}
-static struct section *elf_create_rela_reloc_section(struct elf *elf, struct section *base)
+struct section *elf_create_section_pair(struct elf *elf, const char *name,
+ size_t entsize, unsigned int nr,
+ unsigned int reloc_nr)
{
- char *relocname;
struct section *sec;
- int addrsize = elf_class_addrsize(elf);
-
- relocname = malloc(strlen(base->name) + strlen(".rela") + 1);
- if (!relocname) {
- perror("malloc");
- return NULL;
- }
- strcpy(relocname, ".rela");
- strcat(relocname, base->name);
- if (addrsize == sizeof(u32))
- sec = elf_create_section(elf, relocname, 0, sizeof(Elf32_Rela), 0);
- else
- sec = elf_create_section(elf, relocname, 0, sizeof(GElf_Rela), 0);
- free(relocname);
+ sec = elf_create_section(elf, name, entsize, nr);
if (!sec)
return NULL;
- base->reloc = sec;
- sec->base = base;
-
- sec->sh.sh_type = SHT_RELA;
- sec->sh.sh_addralign = addrsize;
- sec->sh.sh_link = find_section_by_name(elf, ".symtab")->idx;
- sec->sh.sh_info = base->idx;
- sec->sh.sh_flags = SHF_INFO_LINK;
+ if (!elf_create_rela_section(elf, sec, reloc_nr))
+ return NULL;
return sec;
}
-static struct section *elf_create_reloc_section(struct elf *elf,
- struct section *base,
- int reltype)
-{
- switch (reltype) {
- case SHT_REL: return elf_create_rel_reloc_section(elf, base);
- case SHT_RELA: return elf_create_rela_reloc_section(elf, base);
- default: return NULL;
- }
-}
-
-static int elf_rebuild_rel_reloc_section(struct section *sec)
-{
- struct reloc *reloc;
- int idx = 0;
- void *buf;
-
- /* Allocate a buffer for relocations */
- buf = malloc(sec->sh.sh_size);
- if (!buf) {
- perror("malloc");
- return -1;
- }
-
- sec->data->d_buf = buf;
- sec->data->d_size = sec->sh.sh_size;
- sec->data->d_type = ELF_T_REL;
-
- idx = 0;
- list_for_each_entry(reloc, &sec->reloc_list, list) {
- reloc->rel.r_offset = reloc->offset;
- reloc->rel.r_info = GELF_R_INFO(reloc->sym->idx, reloc->type);
- if (!gelf_update_rel(sec->data, idx, &reloc->rel)) {
- WARN_ELF("gelf_update_rel");
- return -1;
- }
- idx++;
- }
-
- return 0;
-}
-
-static int elf_rebuild_rela_reloc_section(struct section *sec)
-{
- struct reloc *reloc;
- int idx = 0;
- void *buf;
-
- /* Allocate a buffer for relocations with addends */
- buf = malloc(sec->sh.sh_size);
- if (!buf) {
- perror("malloc");
- return -1;
- }
-
- sec->data->d_buf = buf;
- sec->data->d_size = sec->sh.sh_size;
- sec->data->d_type = ELF_T_RELA;
-
- idx = 0;
- list_for_each_entry(reloc, &sec->reloc_list, list) {
- reloc->rela.r_offset = reloc->offset;
- reloc->rela.r_addend = reloc->addend;
- reloc->rela.r_info = GELF_R_INFO(reloc->sym->idx, reloc->type);
- if (!gelf_update_rela(sec->data, idx, &reloc->rela)) {
- WARN_ELF("gelf_update_rela");
- return -1;
- }
- idx++;
- }
-
- return 0;
-}
-
-static int elf_rebuild_reloc_section(struct elf *elf, struct section *sec)
-{
- switch (sec->sh.sh_type) {
- case SHT_REL: return elf_rebuild_rel_reloc_section(sec);
- case SHT_RELA: return elf_rebuild_rela_reloc_section(sec);
- default: return -1;
- }
-}
-
int elf_write_insn(struct elf *elf, struct section *sec,
unsigned long offset, unsigned int len,
const char *insn)
@@ -1299,37 +1234,8 @@ int elf_write_insn(struct elf *elf, struct section *sec,
}
memcpy(data->d_buf + offset, insn, len);
- elf_flagdata(data, ELF_C_SET, ELF_F_DIRTY);
- elf->changed = true;
-
- return 0;
-}
-
-int elf_write_reloc(struct elf *elf, struct reloc *reloc)
-{
- struct section *sec = reloc->sec;
-
- if (sec->sh.sh_type == SHT_REL) {
- reloc->rel.r_info = GELF_R_INFO(reloc->sym->idx, reloc->type);
- reloc->rel.r_offset = reloc->offset;
-
- if (!gelf_update_rel(sec->data, reloc->idx, &reloc->rel)) {
- WARN_ELF("gelf_update_rel");
- return -1;
- }
- } else {
- reloc->rela.r_info = GELF_R_INFO(reloc->sym->idx, reloc->type);
- reloc->rela.r_addend = reloc->addend;
- reloc->rela.r_offset = reloc->offset;
-
- if (!gelf_update_rela(sec->data, reloc->idx, &reloc->rela)) {
- WARN_ELF("gelf_update_rela");
- return -1;
- }
- }
-
- elf->changed = true;
+ mark_sec_changed(elf, sec, true);
return 0;
}
@@ -1401,25 +1307,20 @@ int elf_write(struct elf *elf)
if (sec->truncate)
elf_truncate_section(elf, sec);
- if (sec->changed) {
+ if (sec_changed(sec)) {
s = elf_getscn(elf->elf, sec->idx);
if (!s) {
WARN_ELF("elf_getscn");
return -1;
}
+
+ /* Note this also flags the section dirty */
if (!gelf_update_shdr(s, &sec->sh)) {
WARN_ELF("gelf_update_shdr");
return -1;
}
- if (sec->base &&
- elf_rebuild_reloc_section(elf, sec)) {
- WARN("elf_rebuild_reloc_section");
- return -1;
- }
-
- sec->changed = false;
- elf->changed = true;
+ mark_sec_changed(elf, sec, false);
}
}
@@ -1439,30 +1340,14 @@ int elf_write(struct elf *elf)
void elf_close(struct elf *elf)
{
- struct section *sec, *tmpsec;
- struct symbol *sym, *tmpsym;
- struct reloc *reloc, *tmpreloc;
-
if (elf->elf)
elf_end(elf->elf);
if (elf->fd > 0)
close(elf->fd);
- list_for_each_entry_safe(sec, tmpsec, &elf->sections, list) {
- list_for_each_entry_safe(sym, tmpsym, &sec->symbol_list, list) {
- list_del(&sym->list);
- hash_del(&sym->hash);
- }
- list_for_each_entry_safe(reloc, tmpreloc, &sec->reloc_list, list) {
- list_del(&reloc->list);
- hash_del(&reloc->hash);
- }
- list_del(&sec->list);
- free(sec->reloc_data);
- }
-
- free(elf->symbol_data);
- free(elf->section_data);
- free(elf);
+ /*
+ * NOTE: All remaining allocations are leaked on purpose. Objtool is
+ * about to exit anyway.
+ */
}
diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h
index 2b6d2ce4f9a5..0b303eba660e 100644
--- a/tools/objtool/include/objtool/arch.h
+++ b/tools/objtool/include/objtool/arch.h
@@ -90,6 +90,7 @@ int arch_decode_hint_reg(u8 sp_reg, int *base);
bool arch_is_retpoline(struct symbol *sym);
bool arch_is_rethunk(struct symbol *sym);
+bool arch_is_embedded_insn(struct symbol *sym);
int arch_rewrite_retpolines(struct objtool_file *file);
diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h
index 2a108e648b7a..fcca6662c8b4 100644
--- a/tools/objtool/include/objtool/builtin.h
+++ b/tools/objtool/include/objtool/builtin.h
@@ -37,6 +37,7 @@ struct opts {
bool no_unreachable;
bool sec_address;
bool stats;
+ bool verbose;
};
extern struct opts opts;
diff --git a/tools/objtool/include/objtool/cfi.h b/tools/objtool/include/objtool/cfi.h
index b1258e79a1b7..c8a6bec4f6b9 100644
--- a/tools/objtool/include/objtool/cfi.h
+++ b/tools/objtool/include/objtool/cfi.h
@@ -36,6 +36,7 @@ struct cfi_state {
bool drap;
bool signal;
bool end;
+ bool force_undefined;
};
#endif /* _OBJTOOL_CFI_H */
diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h
index e1ca588eb69d..9f71e988eca4 100644
--- a/tools/objtool/include/objtool/elf.h
+++ b/tools/objtool/include/objtool/elf.h
@@ -12,6 +12,7 @@
#include <linux/hashtable.h>
#include <linux/rbtree.h>
#include <linux/jhash.h>
+#include <arch/elf.h>
#ifdef LIBELF_USE_DEPRECATED
# define elf_getshdrnum elf_getshnum
@@ -25,28 +26,31 @@
#define ELF_C_READ_MMAP ELF_C_READ
#endif
+struct elf_hash_node {
+ struct elf_hash_node *next;
+};
+
struct section {
struct list_head list;
- struct hlist_node hash;
- struct hlist_node name_hash;
+ struct elf_hash_node hash;
+ struct elf_hash_node name_hash;
GElf_Shdr sh;
struct rb_root_cached symbol_tree;
struct list_head symbol_list;
- struct list_head reloc_list;
- struct section *base, *reloc;
+ struct section *base, *rsec;
struct symbol *sym;
Elf_Data *data;
char *name;
int idx;
- bool changed, text, rodata, noinstr, init, truncate;
- struct reloc *reloc_data;
+ bool _changed, text, rodata, noinstr, init, truncate;
+ struct reloc *relocs;
};
struct symbol {
struct list_head list;
struct rb_node node;
- struct hlist_node hash;
- struct hlist_node name_hash;
+ struct elf_hash_node hash;
+ struct elf_hash_node name_hash;
GElf_Sym sym;
struct section *sec;
char *name;
@@ -61,37 +65,28 @@ struct symbol {
u8 return_thunk : 1;
u8 fentry : 1;
u8 profiling_func : 1;
+ u8 warned : 1;
+ u8 embedded_insn : 1;
struct list_head pv_target;
- struct list_head reloc_list;
+ struct reloc *relocs;
};
struct reloc {
- struct list_head list;
- struct hlist_node hash;
- union {
- GElf_Rela rela;
- GElf_Rel rel;
- };
+ struct elf_hash_node hash;
struct section *sec;
struct symbol *sym;
- struct list_head sym_reloc_entry;
- unsigned long offset;
- unsigned int type;
- s64 addend;
- int idx;
- bool jump_table_start;
+ struct reloc *sym_next_reloc;
};
-#define ELF_HASH_BITS 20
-
struct elf {
Elf *elf;
GElf_Ehdr ehdr;
int fd;
bool changed;
char *name;
- unsigned int text_size, num_files;
+ unsigned int num_files;
struct list_head sections;
+ unsigned long num_relocs;
int symbol_bits;
int symbol_name_bits;
@@ -99,44 +94,54 @@ struct elf {
int section_name_bits;
int reloc_bits;
- struct hlist_head *symbol_hash;
- struct hlist_head *symbol_name_hash;
- struct hlist_head *section_hash;
- struct hlist_head *section_name_hash;
- struct hlist_head *reloc_hash;
+ struct elf_hash_node **symbol_hash;
+ struct elf_hash_node **symbol_name_hash;
+ struct elf_hash_node **section_hash;
+ struct elf_hash_node **section_name_hash;
+ struct elf_hash_node **reloc_hash;
struct section *section_data;
struct symbol *symbol_data;
};
-#define OFFSET_STRIDE_BITS 4
-#define OFFSET_STRIDE (1UL << OFFSET_STRIDE_BITS)
-#define OFFSET_STRIDE_MASK (~(OFFSET_STRIDE - 1))
-
-#define for_offset_range(_offset, _start, _end) \
- for (_offset = ((_start) & OFFSET_STRIDE_MASK); \
- _offset >= ((_start) & OFFSET_STRIDE_MASK) && \
- _offset <= ((_end) & OFFSET_STRIDE_MASK); \
- _offset += OFFSET_STRIDE)
+struct elf *elf_open_read(const char *name, int flags);
-static inline u32 sec_offset_hash(struct section *sec, unsigned long offset)
-{
- u32 ol, oh, idx = sec->idx;
+struct section *elf_create_section(struct elf *elf, const char *name,
+ size_t entsize, unsigned int nr);
+struct section *elf_create_section_pair(struct elf *elf, const char *name,
+ size_t entsize, unsigned int nr,
+ unsigned int reloc_nr);
- offset &= OFFSET_STRIDE_MASK;
+struct symbol *elf_create_prefix_symbol(struct elf *elf, struct symbol *orig, long size);
- ol = offset;
- oh = (offset >> 16) >> 16;
+struct reloc *elf_init_reloc_text_sym(struct elf *elf, struct section *sec,
+ unsigned long offset,
+ unsigned int reloc_idx,
+ struct section *insn_sec,
+ unsigned long insn_off);
- __jhash_mix(ol, oh, idx);
+struct reloc *elf_init_reloc_data_sym(struct elf *elf, struct section *sec,
+ unsigned long offset,
+ unsigned int reloc_idx,
+ struct symbol *sym,
+ s64 addend);
- return ol;
-}
+int elf_write_insn(struct elf *elf, struct section *sec,
+ unsigned long offset, unsigned int len,
+ const char *insn);
+int elf_write(struct elf *elf);
+void elf_close(struct elf *elf);
-static inline u32 reloc_hash(struct reloc *reloc)
-{
- return sec_offset_hash(reloc->sec, reloc->offset);
-}
+struct section *find_section_by_name(const struct elf *elf, const char *name);
+struct symbol *find_func_by_offset(struct section *sec, unsigned long offset);
+struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset);
+struct symbol *find_symbol_by_name(const struct elf *elf, const char *name);
+struct symbol *find_symbol_containing(const struct section *sec, unsigned long offset);
+int find_symbol_hole_containing(const struct section *sec, unsigned long offset);
+struct reloc *find_reloc_by_dest(const struct elf *elf, struct section *sec, unsigned long offset);
+struct reloc *find_reloc_by_dest_range(const struct elf *elf, struct section *sec,
+ unsigned long offset, unsigned int len);
+struct symbol *find_func_containing(struct section *sec, unsigned long offset);
/*
* Try to see if it's a whole archive (vmlinux.o or module).
@@ -148,42 +153,147 @@ static inline bool has_multiple_files(struct elf *elf)
return elf->num_files > 1;
}
-static inline int elf_class_addrsize(struct elf *elf)
+static inline size_t elf_addr_size(struct elf *elf)
{
- if (elf->ehdr.e_ident[EI_CLASS] == ELFCLASS32)
- return sizeof(u32);
- else
- return sizeof(u64);
+ return elf->ehdr.e_ident[EI_CLASS] == ELFCLASS32 ? 4 : 8;
}
-struct elf *elf_open_read(const char *name, int flags);
-struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr);
+static inline size_t elf_rela_size(struct elf *elf)
+{
+ return elf_addr_size(elf) == 4 ? sizeof(Elf32_Rela) : sizeof(Elf64_Rela);
+}
-struct symbol *elf_create_prefix_symbol(struct elf *elf, struct symbol *orig, long size);
+static inline unsigned int elf_data_rela_type(struct elf *elf)
+{
+ return elf_addr_size(elf) == 4 ? R_DATA32 : R_DATA64;
+}
-int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset,
- unsigned int type, struct symbol *sym, s64 addend);
-int elf_add_reloc_to_insn(struct elf *elf, struct section *sec,
- unsigned long offset, unsigned int type,
- struct section *insn_sec, unsigned long insn_off);
+static inline unsigned int elf_text_rela_type(struct elf *elf)
+{
+ return elf_addr_size(elf) == 4 ? R_TEXT32 : R_TEXT64;
+}
-int elf_write_insn(struct elf *elf, struct section *sec,
- unsigned long offset, unsigned int len,
- const char *insn);
-int elf_write_reloc(struct elf *elf, struct reloc *reloc);
-int elf_write(struct elf *elf);
-void elf_close(struct elf *elf);
+static inline bool is_reloc_sec(struct section *sec)
+{
+ return sec->sh.sh_type == SHT_RELA || sec->sh.sh_type == SHT_REL;
+}
-struct section *find_section_by_name(const struct elf *elf, const char *name);
-struct symbol *find_func_by_offset(struct section *sec, unsigned long offset);
-struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset);
-struct symbol *find_symbol_by_name(const struct elf *elf, const char *name);
-struct symbol *find_symbol_containing(const struct section *sec, unsigned long offset);
-int find_symbol_hole_containing(const struct section *sec, unsigned long offset);
-struct reloc *find_reloc_by_dest(const struct elf *elf, struct section *sec, unsigned long offset);
-struct reloc *find_reloc_by_dest_range(const struct elf *elf, struct section *sec,
- unsigned long offset, unsigned int len);
-struct symbol *find_func_containing(struct section *sec, unsigned long offset);
+static inline bool sec_changed(struct section *sec)
+{
+ return sec->_changed;
+}
+
+static inline void mark_sec_changed(struct elf *elf, struct section *sec,
+ bool changed)
+{
+ sec->_changed = changed;
+ elf->changed |= changed;
+}
+
+static inline unsigned int sec_num_entries(struct section *sec)
+{
+ return sec->sh.sh_size / sec->sh.sh_entsize;
+}
+
+static inline unsigned int reloc_idx(struct reloc *reloc)
+{
+ return reloc - reloc->sec->relocs;
+}
+
+static inline void *reloc_rel(struct reloc *reloc)
+{
+ struct section *rsec = reloc->sec;
+
+ return rsec->data->d_buf + (reloc_idx(reloc) * rsec->sh.sh_entsize);
+}
+
+static inline bool is_32bit_reloc(struct reloc *reloc)
+{
+ /*
+ * Elf32_Rel: 8 bytes
+ * Elf32_Rela: 12 bytes
+ * Elf64_Rel: 16 bytes
+ * Elf64_Rela: 24 bytes
+ */
+ return reloc->sec->sh.sh_entsize < 16;
+}
+
+#define __get_reloc_field(reloc, field) \
+({ \
+ is_32bit_reloc(reloc) ? \
+ ((Elf32_Rela *)reloc_rel(reloc))->field : \
+ ((Elf64_Rela *)reloc_rel(reloc))->field; \
+})
+
+#define __set_reloc_field(reloc, field, val) \
+({ \
+ if (is_32bit_reloc(reloc)) \
+ ((Elf32_Rela *)reloc_rel(reloc))->field = val; \
+ else \
+ ((Elf64_Rela *)reloc_rel(reloc))->field = val; \
+})
+
+static inline u64 reloc_offset(struct reloc *reloc)
+{
+ return __get_reloc_field(reloc, r_offset);
+}
+
+static inline void set_reloc_offset(struct elf *elf, struct reloc *reloc, u64 offset)
+{
+ __set_reloc_field(reloc, r_offset, offset);
+ mark_sec_changed(elf, reloc->sec, true);
+}
+
+static inline s64 reloc_addend(struct reloc *reloc)
+{
+ return __get_reloc_field(reloc, r_addend);
+}
+
+static inline void set_reloc_addend(struct elf *elf, struct reloc *reloc, s64 addend)
+{
+ __set_reloc_field(reloc, r_addend, addend);
+ mark_sec_changed(elf, reloc->sec, true);
+}
+
+
+static inline unsigned int reloc_sym(struct reloc *reloc)
+{
+ u64 info = __get_reloc_field(reloc, r_info);
+
+ return is_32bit_reloc(reloc) ?
+ ELF32_R_SYM(info) :
+ ELF64_R_SYM(info);
+}
+
+static inline unsigned int reloc_type(struct reloc *reloc)
+{
+ u64 info = __get_reloc_field(reloc, r_info);
+
+ return is_32bit_reloc(reloc) ?
+ ELF32_R_TYPE(info) :
+ ELF64_R_TYPE(info);
+}
+
+static inline void set_reloc_sym(struct elf *elf, struct reloc *reloc, unsigned int sym)
+{
+ u64 info = is_32bit_reloc(reloc) ?
+ ELF32_R_INFO(sym, reloc_type(reloc)) :
+ ELF64_R_INFO(sym, reloc_type(reloc));
+
+ __set_reloc_field(reloc, r_info, info);
+
+ mark_sec_changed(elf, reloc->sec, true);
+}
+static inline void set_reloc_type(struct elf *elf, struct reloc *reloc, unsigned int type)
+{
+ u64 info = is_32bit_reloc(reloc) ?
+ ELF32_R_INFO(reloc_sym(reloc), type) :
+ ELF64_R_INFO(reloc_sym(reloc), type);
+
+ __set_reloc_field(reloc, r_info, info);
+
+ mark_sec_changed(elf, reloc->sec, true);
+}
#define for_each_sec(file, sec) \
list_for_each_entry(sec, &file->elf->sections, list)
@@ -197,4 +307,44 @@ struct symbol *find_func_containing(struct section *sec, unsigned long offset);
for_each_sec(file, __sec) \
sec_for_each_sym(__sec, sym)
+#define for_each_reloc(rsec, reloc) \
+ for (int __i = 0, __fake = 1; __fake; __fake = 0) \
+ for (reloc = rsec->relocs; \
+ __i < sec_num_entries(rsec); \
+ __i++, reloc++)
+
+#define for_each_reloc_from(rsec, reloc) \
+ for (int __i = reloc_idx(reloc); \
+ __i < sec_num_entries(rsec); \
+ __i++, reloc++)
+
+#define OFFSET_STRIDE_BITS 4
+#define OFFSET_STRIDE (1UL << OFFSET_STRIDE_BITS)
+#define OFFSET_STRIDE_MASK (~(OFFSET_STRIDE - 1))
+
+#define for_offset_range(_offset, _start, _end) \
+ for (_offset = ((_start) & OFFSET_STRIDE_MASK); \
+ _offset >= ((_start) & OFFSET_STRIDE_MASK) && \
+ _offset <= ((_end) & OFFSET_STRIDE_MASK); \
+ _offset += OFFSET_STRIDE)
+
+static inline u32 sec_offset_hash(struct section *sec, unsigned long offset)
+{
+ u32 ol, oh, idx = sec->idx;
+
+ offset &= OFFSET_STRIDE_MASK;
+
+ ol = offset;
+ oh = (offset >> 16) >> 16;
+
+ __jhash_mix(ol, oh, idx);
+
+ return ol;
+}
+
+static inline u32 reloc_hash(struct reloc *reloc)
+{
+ return sec_offset_hash(reloc->sec, reloc_offset(reloc));
+}
+
#endif /* _OBJTOOL_ELF_H */
diff --git a/tools/objtool/include/objtool/warn.h b/tools/objtool/include/objtool/warn.h
index b1c920dc9516..ac04d3fe4dd9 100644
--- a/tools/objtool/include/objtool/warn.h
+++ b/tools/objtool/include/objtool/warn.h
@@ -55,15 +55,22 @@ static inline char *offstr(struct section *sec, unsigned long offset)
#define WARN_INSN(insn, format, ...) \
({ \
- WARN_FUNC(format, insn->sec, insn->offset, ##__VA_ARGS__); \
+ struct instruction *_insn = (insn); \
+ if (!_insn->sym || !_insn->sym->warned) \
+ WARN_FUNC(format, _insn->sec, _insn->offset, \
+ ##__VA_ARGS__); \
+ if (_insn->sym) \
+ _insn->sym->warned = 1; \
})
-#define BT_FUNC(format, insn, ...) \
-({ \
- struct instruction *_insn = (insn); \
- char *_str = offstr(_insn->sec, _insn->offset); \
- WARN(" %s: " format, _str, ##__VA_ARGS__); \
- free(_str); \
+#define BT_INSN(insn, format, ...) \
+({ \
+ if (opts.verbose || opts.backtrace) { \
+ struct instruction *_insn = (insn); \
+ char *_str = offstr(_insn->sec, _insn->offset); \
+ WARN(" %s: " format, _str, ##__VA_ARGS__); \
+ free(_str); \
+ } \
})
#define WARN_ELF(format, ...) \
diff --git a/tools/objtool/noreturns.h b/tools/objtool/noreturns.h
new file mode 100644
index 000000000000..e45c7cb1d5bc
--- /dev/null
+++ b/tools/objtool/noreturns.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * This is a (sorted!) list of all known __noreturn functions in the kernel.
+ * It's needed for objtool to properly reverse-engineer the control flow graph.
+ *
+ * Yes, this is unfortunate. A better solution is in the works.
+ */
+NORETURN(__invalid_creds)
+NORETURN(__kunit_abort)
+NORETURN(__module_put_and_kthread_exit)
+NORETURN(__reiserfs_panic)
+NORETURN(__stack_chk_fail)
+NORETURN(__ubsan_handle_builtin_unreachable)
+NORETURN(arch_call_rest_init)
+NORETURN(arch_cpu_idle_dead)
+NORETURN(cpu_bringup_and_idle)
+NORETURN(cpu_startup_entry)
+NORETURN(do_exit)
+NORETURN(do_group_exit)
+NORETURN(do_task_dead)
+NORETURN(ex_handler_msr_mce)
+NORETURN(fortify_panic)
+NORETURN(hlt_play_dead)
+NORETURN(hv_ghcb_terminate)
+NORETURN(kthread_complete_and_exit)
+NORETURN(kthread_exit)
+NORETURN(kunit_try_catch_throw)
+NORETURN(machine_real_restart)
+NORETURN(make_task_dead)
+NORETURN(mpt_halt_firmware)
+NORETURN(nmi_panic_self_stop)
+NORETURN(panic)
+NORETURN(panic_smp_self_stop)
+NORETURN(rest_init)
+NORETURN(rewind_stack_and_make_dead)
+NORETURN(sev_es_terminate)
+NORETURN(snp_abort)
+NORETURN(start_kernel)
+NORETURN(stop_this_cpu)
+NORETURN(usercopy_abort)
+NORETURN(x86_64_start_kernel)
+NORETURN(x86_64_start_reservations)
+NORETURN(xen_cpu_bringup_again)
+NORETURN(xen_start_kernel)
diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c
index 48efd1e2f00d..bae343908867 100644
--- a/tools/objtool/orc_gen.c
+++ b/tools/objtool/orc_gen.c
@@ -118,8 +118,8 @@ static int write_orc_entry(struct elf *elf, struct section *orc_sec,
orc->bp_offset = bswap_if_needed(elf, orc->bp_offset);
/* populate reloc for ip */
- if (elf_add_reloc_to_insn(elf, ip_sec, idx * sizeof(int), R_X86_64_PC32,
- insn_sec, insn_off))
+ if (!elf_init_reloc_text_sym(elf, ip_sec, idx * sizeof(int), idx,
+ insn_sec, insn_off))
return -1;
return 0;
@@ -237,12 +237,12 @@ int orc_create(struct objtool_file *file)
WARN("file already has .orc_unwind section, skipping");
return -1;
}
- orc_sec = elf_create_section(file->elf, ".orc_unwind", 0,
+ orc_sec = elf_create_section(file->elf, ".orc_unwind",
sizeof(struct orc_entry), nr);
if (!orc_sec)
return -1;
- sec = elf_create_section(file->elf, ".orc_unwind_ip", 0, sizeof(int), nr);
+ sec = elf_create_section_pair(file->elf, ".orc_unwind_ip", sizeof(int), nr, nr);
if (!sec)
return -1;
diff --git a/tools/objtool/special.c b/tools/objtool/special.c
index baa85c31526b..91b1950f5bd8 100644
--- a/tools/objtool/special.c
+++ b/tools/objtool/special.c
@@ -62,7 +62,7 @@ static void reloc_to_sec_off(struct reloc *reloc, struct section **sec,
unsigned long *off)
{
*sec = reloc->sym->sec;
- *off = reloc->sym->offset + reloc->addend;
+ *off = reloc->sym->offset + reloc_addend(reloc);
}
static int get_alt_entry(struct elf *elf, const struct special_entry *entry,
@@ -126,7 +126,7 @@ static int get_alt_entry(struct elf *elf, const struct special_entry *entry,
sec, offset + entry->key);
return -1;
}
- alt->key_addend = key_reloc->addend;
+ alt->key_addend = reloc_addend(key_reloc);
}
return 0;
diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile
index ba5d942e4c6a..4407b106d977 100644
--- a/tools/perf/Documentation/Makefile
+++ b/tools/perf/Documentation/Makefile
@@ -250,11 +250,20 @@ $(MAN_HTML): $(OUTPUT)%.html : %.txt
$(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \
mv $@+ $@
+# Generate date from either KBUILD_BUILD_TIMESTAMP or git log of
+# the doc input file
+PERF_DATE = $(strip \
+ $(if $(KBUILD_BUILD_TIMESTAMP), \
+ $(shell date -u -d '$(KBUILD_BUILD_TIMESTAMP)' +%Y-%m-%d), \
+ $(shell git log -1 --pretty="format:%cd" \
+ --date=short --no-show-signature $<)))
+
ifdef USE_ASCIIDOCTOR
$(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : %.txt
$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
$(ASCIIDOC) -b manpage -d manpage \
- $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \
+ $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) \
+ -adocdate=$(PERF_DATE) -o $@+ $< && \
mv $@+ $@
endif
@@ -266,9 +275,7 @@ $(OUTPUT)%.xml : %.txt
$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
$(ASCIIDOC) -b docbook -d manpage \
$(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) \
- -aperf_date=$(shell git log -1 --pretty="format:%cd" \
- --date=short --no-show-signature $<) \
- -o $@+ $< && \
+ -aperf_date=$(PERF_DATE) -o $@+ $< && \
mv $@+ $@
XSLT = docbook.xsl
diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt
index e56ae54805a8..1478068ad5dd 100644
--- a/tools/perf/Documentation/perf-config.txt
+++ b/tools/perf/Documentation/perf-config.txt
@@ -669,7 +669,7 @@ llvm.*::
"$CLANG_OPTIONS $PERF_BPF_INC_OPTIONS $KERNEL_INC_OPTIONS " \
"-Wno-unused-value -Wno-pointer-sign " \
"-working-directory $WORKING_DIR " \
- "-c \"$CLANG_SOURCE\" -target bpf $CLANG_EMIT_LLVM -O2 -o - $LLVM_OPTIONS_PIPE"
+ "-c \"$CLANG_SOURCE\" --target=bpf $CLANG_EMIT_LLVM -O2 -o - $LLVM_OPTIONS_PIPE"
llvm.clang-opt::
Options passed to clang.
diff --git a/tools/perf/Documentation/perf-dlfilter.txt b/tools/perf/Documentation/perf-dlfilter.txt
index fb22e3b31dc5..8887cc20a809 100644
--- a/tools/perf/Documentation/perf-dlfilter.txt
+++ b/tools/perf/Documentation/perf-dlfilter.txt
@@ -64,6 +64,12 @@ internal filtering.
If implemented, 'filter_description' should return a one-line description
of the filter, and optionally a longer description.
+Do not assume the 'sample' argument is valid (dereferenceable)
+after 'filter_event' and 'filter_event_early' return.
+
+Do not assume data referenced by pointers in struct perf_dlfilter_sample
+is valid (dereferenceable) after 'filter_event' and 'filter_event_early' return.
+
The perf_dlfilter_sample structure
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -150,7 +156,8 @@ struct perf_dlfilter_fns {
const char *(*srcline)(void *ctx, __u32 *line_number);
struct perf_event_attr *(*attr)(void *ctx);
__s32 (*object_code)(void *ctx, __u64 ip, void *buf, __u32 len);
- void *(*reserved[120])(void *);
+ void (*al_cleanup)(void *ctx, struct perf_dlfilter_al *al);
+ void *(*reserved[119])(void *);
};
----
@@ -161,7 +168,8 @@ struct perf_dlfilter_fns {
'args' returns arguments from --dlarg options.
'resolve_address' provides information about 'address'. al->size must be set
-before calling. Returns 0 on success, -1 otherwise.
+before calling. Returns 0 on success, -1 otherwise. Call al_cleanup() (if present,
+see below) when 'al' data is no longer needed.
'insn' returns instruction bytes and length.
@@ -171,6 +179,12 @@ before calling. Returns 0 on success, -1 otherwise.
'object_code' reads object code and returns the number of bytes read.
+'al_cleanup' must be called (if present, so check perf_dlfilter_fns.al_cleanup != NULL)
+after resolve_address() to free any associated resources.
+
+Do not assume pointers obtained via perf_dlfilter_fns are valid (dereferenceable)
+after 'filter_event' and 'filter_event_early' return.
+
The perf_dlfilter_al structure
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -197,9 +211,13 @@ struct perf_dlfilter_al {
/* Below members are only populated by resolve_ip() */
__u8 filtered; /* true if this sample event will be filtered out */
const char *comm;
+ void *priv; /* Private data. Do not change */
};
----
+Do not assume data referenced by pointers in struct perf_dlfilter_al
+is valid (dereferenceable) after 'filter_event' and 'filter_event_early' return.
+
perf_dlfilter_sample flags
~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/tools/perf/Documentation/perf-lock.txt b/tools/perf/Documentation/perf-lock.txt
index 6e5ba3cd2b72..30eea576721f 100644
--- a/tools/perf/Documentation/perf-lock.txt
+++ b/tools/perf/Documentation/perf-lock.txt
@@ -36,6 +36,9 @@ COMMON OPTIONS
--input=<file>::
Input file name. (default: perf.data unless stdin is a fifo)
+--output=<file>::
+ Output file name for perf lock contention and report.
+
-v::
--verbose::
Be more verbose (show symbol address, etc).
@@ -200,6 +203,11 @@ CONTENTION OPTIONS
Note that it matches the substring so 'rq' would match both 'raw_spin_rq_lock'
and 'irq_enter_rcu'.
+-x::
+--field-separator=<SEP>::
+ Show results using a CSV-style output to make it easy to import directly
+ into spreadsheets. Columns are separated by the string specified in SEP.
+
SEE ALSO
--------
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 777a0d8ba7d1..ff9a52e44688 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -130,7 +130,7 @@ OPTIONS
-F::
--fields::
Comma separated list of fields to print. Options are:
- comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
+ comm, tid, pid, time, cpu, event, trace, ip, sym, dso, dsoff, addr, symoff,
srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output,
brstackinsn, brstackinsnlen, brstackoff, callindent, insn, insnlen, synth,
phys_addr, metric, misc, srccode, ipc, data_page_size, code_page_size, ins_lat,
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 29bdcfa93f04..8f789fa1242e 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -308,6 +308,14 @@ use --per-die in addition to -a. (system-wide). The output includes the
die number and the number of online processors on that die. This is
useful to gauge the amount of aggregation.
+--per-cache::
+Aggregate counts per cache instance for system-wide mode measurements. By
+default, the aggregation happens for the cache level at the highest index
+in the system. To specify a particular level, mention the cache level
+alongside the option in the format [Ll][1-9][0-9]*. For example:
+Using option "--per-cache=l3" or "--per-cache=L3" will aggregate the
+information at the boundary of the level 3 cache in the system.
+
--per-core::
Aggregate counts per physical processor for system-wide mode measurements. This
is a useful mode to detect imbalance between physical cores. To enable this mode,
@@ -353,6 +361,15 @@ small group that need not have multiplexing is lowered. This option
forbids the event merging logic from sharing events between groups and
may be used to increase accuracy in this case.
+--metric-no-threshold::
+Metric thresholds may increase the number of events necessary to
+compute whether a metric has exceeded its threshold expression. This
+may not be desirable, for example, as the events can introduce
+multiplexing. This option disables the adding of threshold expression
+events for a metric. However, if there are sufficient events to
+compute the threshold then the threshold is still computed and used to
+color the metric's computed value.
+
--quiet::
Don't print output, warnings or messages. This is useful with perf stat
record below to only write data to the perf.data file.
@@ -379,6 +396,14 @@ Aggregate counts per processor socket for system-wide mode measurements.
--per-die::
Aggregate counts per processor die for system-wide mode measurements.
+--per-cache::
+Aggregate counts per cache instance for system-wide mode measurements. By
+default, the aggregation happens for the cache level at the highest index
+in the system. To specify a particular level, mention the cache level
+alongside the option in the format [Ll][1-9][0-9]*. For example: Using
+option "--per-cache=l3" or "--per-cache=L3" will aggregate the
+information at the boundary of the level 3 cache in the system.
+
--per-core::
Aggregate counts per physical processor for system-wide mode measurements.
@@ -389,6 +414,12 @@ For a group all metrics from the group are added.
The events from the metrics are automatically measured.
See perf list output for the possible metrics and metricgroups.
+ When threshold information is available for a metric, the
+ color red is used to signify a metric has exceeded a threshold
+ while green shows it hasn't. The default color means that
+ no threshold information was available or the threshold
+ couldn't be computed.
+
-A::
--no-aggr::
Do not aggregate counts across all monitored CPUs.
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index a794d9eca93d..c5db0de49868 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -155,9 +155,9 @@ FEATURE_CHECK_LDFLAGS-libcrypto = -lcrypto
ifdef CSINCLUDES
LIBOPENCSD_CFLAGS := -I$(CSINCLUDES)
endif
-OPENCSDLIBS := -lopencsd_c_api
+OPENCSDLIBS := -lopencsd_c_api -lopencsd
ifeq ($(findstring -static,${LDFLAGS}),-static)
- OPENCSDLIBS += -lopencsd -lstdc++
+ OPENCSDLIBS += -lstdc++
endif
ifdef CSLIBS
LIBOPENCSD_LDFLAGS := -L$(CSLIBS)
@@ -315,6 +315,9 @@ FEATURE_CHECK_LDFLAGS-libpython := $(PYTHON_EMBED_LDOPTS)
FEATURE_CHECK_LDFLAGS-libaio = -lrt
+FEATURE_CHECK_LDFLAGS-disassembler-four-args = -lbfd -lopcodes -ldl
+FEATURE_CHECK_LDFLAGS-disassembler-init-styled = -lbfd -lopcodes -ldl
+
CORE_CFLAGS += -fno-omit-frame-pointer
CORE_CFLAGS += -ggdb3
CORE_CFLAGS += -funwind-tables
@@ -344,8 +347,8 @@ ifneq ($(TCMALLOC),)
endif
ifeq ($(FEATURES_DUMP),)
-# We will display at the end of this Makefile.config, using $(call feature_display_entries),
-# as we may retry some feature detection here.
+# We will display at the end of this Makefile.config, using $(call feature_display_entries)
+# As we may retry some feature detection here, see the disassembler-four-args case, for instance
FEATURE_DISPLAY_DEFERRED := 1
include $(srctree)/tools/build/Makefile.feature
else
@@ -680,6 +683,10 @@ ifdef BUILD_BPF_SKEL
CFLAGS += -DHAVE_BPF_SKEL
endif
+ifndef GEN_VMLINUX_H
+ VMLINUX_H=$(src-perf)/util/bpf_skel/vmlinux/vmlinux.h
+endif
+
dwarf-post-unwind := 1
dwarf-post-unwind-text := BUG
@@ -903,9 +910,13 @@ ifdef BUILD_NONDISTRO
ifeq ($(feature-libbfd-liberty), 1)
EXTLIBS += -lbfd -lopcodes -liberty
+ FEATURE_CHECK_LDFLAGS-disassembler-four-args += -liberty -ldl
+ FEATURE_CHECK_LDFLAGS-disassembler-init-styled += -liberty -ldl
else
ifeq ($(feature-libbfd-liberty-z), 1)
EXTLIBS += -lbfd -lopcodes -liberty -lz
+ FEATURE_CHECK_LDFLAGS-disassembler-four-args += -liberty -lz -ldl
+ FEATURE_CHECK_LDFLAGS-disassembler-init-styled += -liberty -lz -ldl
endif
endif
$(call feature_check,disassembler-four-args)
@@ -1075,6 +1086,11 @@ ifndef NO_AUXTRACE
endif
endif
+ifdef EXTRA_TESTS
+ $(call detected,CONFIG_EXTRA_TESTS)
+ CFLAGS += -DHAVE_EXTRA_TESTS
+endif
+
ifndef NO_JVMTI
ifneq (,$(wildcard /usr/sbin/update-java-alternatives))
JDIR=$(shell /usr/sbin/update-java-alternatives -l | head -1 | awk '{print $$3}')
@@ -1324,6 +1340,6 @@ endif
# re-generate FEATURE-DUMP as we may have called feature_check, found out
# extra libraries to add to LDFLAGS of some other test and then redo those
-# tests.
+# tests, see the block about libbfd, disassembler-four-args, for instance.
$(shell rm -f $(FEATURE_DUMP_FILENAME))
$(foreach feat,$(FEATURE_TESTS),$(shell echo "$(call feature_assign,$(feat))" >> $(FEATURE_DUMP_FILENAME)))
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index f48794816d82..f178b36c6940 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -128,6 +128,12 @@ include ../scripts/utilities.mak
#
# Define BUILD_NONDISTRO to enable building an linking against libbfd and
# libiberty distribution license incompatible libraries.
+#
+# Define EXTRA_TESTS to enable building extra tests useful mainly to perf
+# developers, such as:
+# x86 instruction decoder - new instructions test
+#
+# Define GEN_VMLINUX_H to generate vmlinux.h from the BTF.
# As per kernel Makefile, avoid funny character set dependencies
unexport LC_ALL
@@ -193,6 +199,7 @@ FLEX ?= flex
BISON ?= bison
STRIP = strip
AWK = awk
+READELF ?= readelf
# include Makefile.config by default and rule out
# non-config cases
@@ -374,7 +381,7 @@ ifndef NO_JVMTI
PROGRAMS += $(OUTPUT)$(LIBJVMTI)
endif
-DLFILTERS := dlfilter-test-api-v0.so dlfilter-show-cycles.so
+DLFILTERS := dlfilter-test-api-v0.so dlfilter-test-api-v2.so dlfilter-show-cycles.so
DLFILTERS := $(patsubst %,$(OUTPUT)dlfilters/%,$(DLFILTERS))
# what 'all' will build and 'install' will install, in perfexecdir
@@ -1057,7 +1064,7 @@ $(SKEL_TMP_OUT) $(LIBAPI_OUTPUT) $(LIBBPF_OUTPUT) $(LIBPERF_OUTPUT) $(LIBSUBCMD_
ifdef BUILD_BPF_SKEL
BPFTOOL := $(SKEL_TMP_OUT)/bootstrap/bpftool
# Get Clang's default includes on this system, as opposed to those seen by
-# '-target bpf'. This fixes "missing" files on some architectures/distros,
+# '--target=bpf'. This fixes "missing" files on some architectures/distros,
# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc.
#
# Use '-idirafter': Don't interfere with include mechanics except where the
@@ -1080,8 +1087,44 @@ $(BPFTOOL): | $(SKEL_TMP_OUT)
$(Q)CFLAGS= $(MAKE) -C ../bpf/bpftool \
OUTPUT=$(SKEL_TMP_OUT)/ bootstrap
-$(SKEL_TMP_OUT)/%.bpf.o: util/bpf_skel/%.bpf.c $(LIBBPF) | $(SKEL_TMP_OUT)
- $(QUIET_CLANG)$(CLANG) -g -O2 -target bpf -Wall -Werror $(BPF_INCLUDE) $(TOOLS_UAPI_INCLUDE) \
+# Paths to search for a kernel to generate vmlinux.h from.
+VMLINUX_BTF_ELF_PATHS ?= $(if $(O),$(O)/vmlinux) \
+ $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \
+ ../../vmlinux \
+ /boot/vmlinux-$(shell uname -r)
+
+# Paths to BTF information.
+VMLINUX_BTF_BTF_PATHS ?= /sys/kernel/btf/vmlinux
+
+# Filter out kernels that don't exist or without a BTF section.
+VMLINUX_BTF_ELF_ABSPATHS ?= $(abspath $(wildcard $(VMLINUX_BTF_ELF_PATHS)))
+VMLINUX_BTF_PATHS ?= $(shell for file in $(VMLINUX_BTF_ELF_ABSPATHS); \
+ do \
+ if [ -f $$file ] && ($(READELF) -S "$$file" | grep -q .BTF); \
+ then \
+ echo "$$file"; \
+ fi; \
+ done) \
+ $(wildcard $(VMLINUX_BTF_BTF_PATHS))
+
+# Select the first as the source of vmlinux.h.
+VMLINUX_BTF ?= $(firstword $(VMLINUX_BTF_PATHS))
+
+ifeq ($(VMLINUX_H),)
+ ifeq ($(VMLINUX_BTF),)
+ $(error Missing bpftool input for generating vmlinux.h)
+ endif
+endif
+
+$(SKEL_OUT)/vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL)
+ifeq ($(VMLINUX_H),)
+ $(QUIET_GEN)$(BPFTOOL) btf dump file $< format c > $@
+else
+ $(Q)cp "$(VMLINUX_H)" $@
+endif
+
+$(SKEL_TMP_OUT)/%.bpf.o: util/bpf_skel/%.bpf.c $(LIBBPF) $(SKEL_OUT)/vmlinux.h | $(SKEL_TMP_OUT)
+ $(QUIET_CLANG)$(CLANG) -g -O2 --target=bpf -Wall -Werror $(BPF_INCLUDE) $(TOOLS_UAPI_INCLUDE) \
-c $(filter util/bpf_skel/%.bpf.c,$^) -o $@
$(SKEL_OUT)/%.skel.h: $(SKEL_TMP_OUT)/%.bpf.o | $(BPFTOOL)
diff --git a/tools/perf/arch/arm/tests/dwarf-unwind.c b/tools/perf/arch/arm/tests/dwarf-unwind.c
index 566fb6c0eae7..9bc304cb7762 100644
--- a/tools/perf/arch/arm/tests/dwarf-unwind.c
+++ b/tools/perf/arch/arm/tests/dwarf-unwind.c
@@ -26,7 +26,7 @@ static int sample_ustack(struct perf_sample *sample,
sp = (unsigned long) regs[PERF_REG_ARM_SP];
- map = maps__find(thread->maps, (u64)sp);
+ map = maps__find(thread__maps(thread), (u64)sp);
if (!map) {
pr_debug("failed to get stack map\n");
free(buf);
diff --git a/tools/perf/arch/arm/util/auxtrace.c b/tools/perf/arch/arm/util/auxtrace.c
index adec6c9ee11d..3b8eca0ffb17 100644
--- a/tools/perf/arch/arm/util/auxtrace.c
+++ b/tools/perf/arch/arm/util/auxtrace.c
@@ -14,6 +14,7 @@
#include "../../../util/debug.h"
#include "../../../util/evlist.h"
#include "../../../util/pmu.h"
+#include "../../../util/pmus.h"
#include "cs-etm.h"
#include "arm-spe.h"
#include "hisi-ptt.h"
@@ -40,7 +41,7 @@ static struct perf_pmu **find_all_arm_spe_pmus(int *nr_spes, int *err)
return NULL;
}
- arm_spe_pmus[*nr_spes] = perf_pmu__find(arm_spe_pmu_name);
+ arm_spe_pmus[*nr_spes] = perf_pmus__find(arm_spe_pmu_name);
if (arm_spe_pmus[*nr_spes]) {
pr_debug2("%s %d: arm_spe_pmu %d type %d name %s\n",
__func__, __LINE__, *nr_spes,
@@ -87,7 +88,7 @@ static struct perf_pmu **find_all_hisi_ptt_pmus(int *nr_ptts, int *err)
rewinddir(dir);
while ((dent = readdir(dir))) {
if (strstr(dent->d_name, HISI_PTT_PMU_NAME) && idx < *nr_ptts) {
- hisi_ptt_pmus[idx] = perf_pmu__find(dent->d_name);
+ hisi_ptt_pmus[idx] = perf_pmus__find(dent->d_name);
if (hisi_ptt_pmus[idx])
idx++;
}
@@ -131,7 +132,7 @@ struct auxtrace_record
if (!evlist)
return NULL;
- cs_etm_pmu = perf_pmu__find(CORESIGHT_ETM_PMU_NAME);
+ cs_etm_pmu = perf_pmus__find(CORESIGHT_ETM_PMU_NAME);
arm_spe_pmus = find_all_arm_spe_pmus(&nr_spes, err);
hisi_ptt_pmus = find_all_hisi_ptt_pmus(&nr_ptts, err);
diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
index 9ca040bfb1aa..7c51fa182b51 100644
--- a/tools/perf/arch/arm/util/cs-etm.c
+++ b/tools/perf/arch/arm/util/cs-etm.c
@@ -25,7 +25,7 @@
#include "../../../util/evsel.h"
#include "../../../util/perf_api_probe.h"
#include "../../../util/evsel_config.h"
-#include "../../../util/pmu.h"
+#include "../../../util/pmus.h"
#include "../../../util/cs-etm.h"
#include <internal/lib.h> // page_size
#include "../../../util/session.h"
@@ -881,7 +881,7 @@ struct auxtrace_record *cs_etm_record_init(int *err)
struct perf_pmu *cs_etm_pmu;
struct cs_etm_recording *ptr;
- cs_etm_pmu = perf_pmu__find(CORESIGHT_ETM_PMU_NAME);
+ cs_etm_pmu = perf_pmus__find(CORESIGHT_ETM_PMU_NAME);
if (!cs_etm_pmu) {
*err = -EINVAL;
diff --git a/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl b/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl
index 22cdf911dd9a..27d747c92d44 100755
--- a/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl
+++ b/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl
@@ -19,27 +19,28 @@ if ! test -r $input; then
exit 1
fi
-create_table_from_c()
+create_sc_table()
{
- local sc nr last_sc
+ local sc nr max_nr
while read sc nr; do
printf "%s\n" " [$nr] = \"$sc\","
- last_sc=$sc
+ max_nr=$nr
done
- printf "%s\n" "#define SYSCALLTBL_ARM64_MAX_ID __NR_$last_sc"
+ echo "#define SYSCALLTBL_ARM64_MAX_ID $max_nr"
}
create_table()
{
echo "#include \"$input\""
- echo "static const char *syscalltbl_arm64[] = {"
- create_table_from_c
+ echo "static const char *const syscalltbl_arm64[] = {"
+ create_sc_table
echo "};"
}
$gcc -E -dM -x c -I $incpath/include/uapi $input \
- |sed -ne 's/^#define __NR_//p' \
- |sort -t' ' -k2 -n \
+ |awk '$2 ~ "__NR" && $3 !~ "__NR3264_" {
+ sub("^#define __NR(3264)?_", "");
+ print | "sort -k2 -n"}' \
|create_table
diff --git a/tools/perf/arch/arm64/tests/dwarf-unwind.c b/tools/perf/arch/arm64/tests/dwarf-unwind.c
index 90a7ef293ce7..b2603d0d3737 100644
--- a/tools/perf/arch/arm64/tests/dwarf-unwind.c
+++ b/tools/perf/arch/arm64/tests/dwarf-unwind.c
@@ -26,7 +26,7 @@ static int sample_ustack(struct perf_sample *sample,
sp = (unsigned long) regs[PERF_REG_ARM64_SP];
- map = maps__find(thread->maps, (u64)sp);
+ map = maps__find(thread__maps(thread), (u64)sp);
if (!map) {
pr_debug("failed to get stack map\n");
free(buf);
diff --git a/tools/perf/arch/arm64/util/pmu.c b/tools/perf/arch/arm64/util/pmu.c
index ef1ed645097c..512a8f13c4de 100644
--- a/tools/perf/arch/arm64/util/pmu.c
+++ b/tools/perf/arch/arm64/util/pmu.c
@@ -3,6 +3,7 @@
#include <internal/cpumap.h>
#include "../../../util/cpumap.h"
#include "../../../util/pmu.h"
+#include "../../../util/pmus.h"
#include <api/fs/fs.h>
#include <math.h>
@@ -10,10 +11,7 @@ static struct perf_pmu *pmu__find_core_pmu(void)
{
struct perf_pmu *pmu = NULL;
- while ((pmu = perf_pmu__scan(pmu))) {
- if (!is_pmu_core(pmu->name))
- continue;
-
+ while ((pmu = perf_pmus__scan_core(pmu))) {
/*
* The cpumap should cover all CPUs. Otherwise, some CPUs may
* not support some events or have different event IDs.
@@ -56,10 +54,11 @@ double perf_pmu__cpu_slots_per_cycle(void)
perf_pmu__pathname_scnprintf(path, sizeof(path),
pmu->name, "caps/slots");
/*
- * The value of slots is not greater than 32 bits, but sysfs__read_int
- * can't read value with 0x prefix, so use sysfs__read_ull instead.
+ * The value of slots is not greater than 32 bits, but
+ * filename__read_int can't read value with 0x prefix,
+ * so use filename__read_ull instead.
*/
- sysfs__read_ull(path, &slots);
+ filename__read_ull(path, &slots);
}
return slots ? (double)slots : NAN;
diff --git a/tools/perf/arch/common.c b/tools/perf/arch/common.c
index b951374bc49d..4908d54dd33b 100644
--- a/tools/perf/arch/common.c
+++ b/tools/perf/arch/common.c
@@ -43,6 +43,20 @@ const char *const powerpc_triplets[] = {
NULL
};
+const char *const riscv32_triplets[] = {
+ "riscv32-unknown-linux-gnu-",
+ "riscv32-linux-android-",
+ "riscv32-linux-gnu-",
+ NULL
+};
+
+const char *const riscv64_triplets[] = {
+ "riscv64-unknown-linux-gnu-",
+ "riscv64-linux-android-",
+ "riscv64-linux-gnu-",
+ NULL
+};
+
const char *const s390_triplets[] = {
"s390-ibm-linux-",
"s390x-linux-gnu-",
@@ -164,6 +178,10 @@ static int perf_env__lookup_binutils_path(struct perf_env *env,
path_list = arm64_triplets;
else if (!strcmp(arch, "powerpc"))
path_list = powerpc_triplets;
+ else if (!strcmp(arch, "riscv32"))
+ path_list = riscv32_triplets;
+ else if (!strcmp(arch, "riscv64"))
+ path_list = riscv64_triplets;
else if (!strcmp(arch, "sh"))
path_list = sh_triplets;
else if (!strcmp(arch, "s390"))
diff --git a/tools/perf/arch/loongarch/annotate/instructions.c b/tools/perf/arch/loongarch/annotate/instructions.c
index ab21bf122135..98e19c5366ac 100644
--- a/tools/perf/arch/loongarch/annotate/instructions.c
+++ b/tools/perf/arch/loongarch/annotate/instructions.c
@@ -5,25 +5,115 @@
* Copyright (C) 2020-2023 Loongson Technology Corporation Limited
*/
+static int loongarch_call__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms)
+{
+ char *c, *endptr, *tok, *name;
+ struct map *map = ms->map;
+ struct addr_map_symbol target = {
+ .ms = { .map = map, },
+ };
+
+ c = strchr(ops->raw, '#');
+ if (c++ == NULL)
+ return -1;
+
+ ops->target.addr = strtoull(c, &endptr, 16);
+
+ name = strchr(endptr, '<');
+ name++;
+
+ if (arch->objdump.skip_functions_char &&
+ strchr(name, arch->objdump.skip_functions_char))
+ return -1;
+
+ tok = strchr(name, '>');
+ if (tok == NULL)
+ return -1;
+
+ *tok = '\0';
+ ops->target.name = strdup(name);
+ *tok = '>';
+
+ if (ops->target.name == NULL)
+ return -1;
+
+ target.addr = map__objdump_2mem(map, ops->target.addr);
+
+ if (maps__find_ams(ms->maps, &target) == 0 &&
+ map__rip_2objdump(target.ms.map, map__map_ip(target.ms.map, target.addr)) == ops->target.addr)
+ ops->target.sym = target.ms.sym;
+
+ return 0;
+}
+
+static struct ins_ops loongarch_call_ops = {
+ .parse = loongarch_call__parse,
+ .scnprintf = call__scnprintf,
+};
+
+static int loongarch_jump__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms)
+{
+ struct map *map = ms->map;
+ struct symbol *sym = ms->sym;
+ struct addr_map_symbol target = {
+ .ms = { .map = map, },
+ };
+ const char *c = strchr(ops->raw, '#');
+ u64 start, end;
+
+ ops->raw_comment = strchr(ops->raw, arch->objdump.comment_char);
+ ops->raw_func_start = strchr(ops->raw, '<');
+
+ if (ops->raw_func_start && c > ops->raw_func_start)
+ c = NULL;
+
+ if (c++ != NULL)
+ ops->target.addr = strtoull(c, NULL, 16);
+ else
+ ops->target.addr = strtoull(ops->raw, NULL, 16);
+
+ target.addr = map__objdump_2mem(map, ops->target.addr);
+ start = map__unmap_ip(map, sym->start);
+ end = map__unmap_ip(map, sym->end);
+
+ ops->target.outside = target.addr < start || target.addr > end;
+
+ if (maps__find_ams(ms->maps, &target) == 0 &&
+ map__rip_2objdump(target.ms.map, map__map_ip(target.ms.map, target.addr)) == ops->target.addr)
+ ops->target.sym = target.ms.sym;
+
+ if (!ops->target.outside) {
+ ops->target.offset = target.addr - start;
+ ops->target.offset_avail = true;
+ } else {
+ ops->target.offset_avail = false;
+ }
+
+ return 0;
+}
+
+static struct ins_ops loongarch_jump_ops = {
+ .parse = loongarch_jump__parse,
+ .scnprintf = jump__scnprintf,
+};
+
static
struct ins_ops *loongarch__associate_ins_ops(struct arch *arch, const char *name)
{
struct ins_ops *ops = NULL;
- if (!strncmp(name, "beqz", 4) ||
- !strncmp(name, "bnez", 4) ||
- !strncmp(name, "beq", 3) ||
- !strncmp(name, "bne", 3) ||
- !strncmp(name, "blt", 3) ||
- !strncmp(name, "bge", 3) ||
- !strncmp(name, "bltu", 4) ||
- !strncmp(name, "bgeu", 4) ||
- !strncmp(name, "bl", 2))
- ops = &call_ops;
- else if (!strncmp(name, "jirl", 4))
+ if (!strcmp(name, "bl"))
+ ops = &loongarch_call_ops;
+ else if (!strcmp(name, "jirl"))
ops = &ret_ops;
- else if (name[0] == 'b')
- ops = &jump_ops;
+ else if (!strcmp(name, "b") ||
+ !strncmp(name, "beq", 3) ||
+ !strncmp(name, "bne", 3) ||
+ !strncmp(name, "blt", 3) ||
+ !strncmp(name, "bge", 3) ||
+ !strncmp(name, "bltu", 4) ||
+ !strncmp(name, "bgeu", 4))
+ ops = &loongarch_jump_ops;
else
return NULL;
diff --git a/tools/perf/arch/loongarch/entry/syscalls/mksyscalltbl b/tools/perf/arch/loongarch/entry/syscalls/mksyscalltbl
index c52156f7204d..c10ad3580aef 100755
--- a/tools/perf/arch/loongarch/entry/syscalls/mksyscalltbl
+++ b/tools/perf/arch/loongarch/entry/syscalls/mksyscalltbl
@@ -18,44 +18,28 @@ if ! test -r $input; then
exit 1
fi
-create_table_from_c()
+create_sc_table()
{
- local sc nr last_sc
-
- create_table_exe=`mktemp ${TMPDIR:-/tmp}/create-table-XXXXXX`
-
- {
-
- cat <<-_EoHEADER
- #include <stdio.h>
- #include "$input"
- int main(int argc, char *argv[])
- {
- _EoHEADER
+ local sc nr max_nr
while read sc nr; do
- printf "%s\n" " printf(\"\\t[%d] = \\\"$sc\\\",\\n\", $nr);"
- last_sc=$nr
+ printf "%s\n" " [$nr] = \"$sc\","
+ max_nr=$nr
done
- printf "%s\n" " printf(\"#define SYSCALLTBL_LOONGARCH_MAX_ID %d\\n\", $last_sc);"
- printf "}\n"
-
- } | $hostcc -I $incpath/include/uapi -o $create_table_exe -x c -
-
- $create_table_exe
-
- rm -f $create_table_exe
+ echo "#define SYSCALLTBL_LOONGARCH_MAX_ID $max_nr"
}
create_table()
{
- echo "static const char *syscalltbl_loongarch[] = {"
- create_table_from_c
+ echo "#include \"$input\""
+ echo "static const char *const syscalltbl_loongarch[] = {"
+ create_sc_table
echo "};"
}
-$gcc -E -dM -x c -I $incpath/include/uapi $input \
- |sed -ne 's/^#define __NR_//p' \
- |sort -t' ' -k2 -n \
+$gcc -E -dM -x c -I $incpath/include/uapi $input \
+ |awk '$2 ~ "__NR" && $3 !~ "__NR3264_" {
+ sub("^#define __NR(3264)?_", "");
+ print | "sort -k2 -n"}' \
|create_table
diff --git a/tools/perf/arch/mips/entry/syscalls/mksyscalltbl b/tools/perf/arch/mips/entry/syscalls/mksyscalltbl
index fb1f49451af6..c0d93f959c4e 100644
--- a/tools/perf/arch/mips/entry/syscalls/mksyscalltbl
+++ b/tools/perf/arch/mips/entry/syscalls/mksyscalltbl
@@ -18,7 +18,7 @@ create_table()
{
local max_nr nr abi sc discard
- echo 'static const char *syscalltbl_mips_n64[] = {'
+ echo 'static const char *const syscalltbl_mips_n64[] = {'
while read nr abi sc discard; do
printf '\t[%d] = "%s",\n' $nr $sc
max_nr=$nr
diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
index 3f1886ad9d80..cfda2511badf 100644
--- a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
+++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
@@ -365,3 +365,4 @@
448 n64 process_mrelease sys_process_mrelease
449 n64 futex_waitv sys_futex_waitv
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
+451 n64 cachestat sys_cachestat
diff --git a/tools/perf/arch/powerpc/entry/syscalls/mksyscalltbl b/tools/perf/arch/powerpc/entry/syscalls/mksyscalltbl
index 6c58060aa03b..0eb316fe6dd1 100755
--- a/tools/perf/arch/powerpc/entry/syscalls/mksyscalltbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/mksyscalltbl
@@ -23,7 +23,7 @@ create_table()
max_nr=-1
nr=0
- echo "static const char *syscalltbl_powerpc_${wordsize}[] = {"
+ echo "static const char *const syscalltbl_powerpc_${wordsize}[] = {"
while read nr abi sc discard; do
if [ "$max_nr" -lt "$nr" ]; then
printf '\t[%d] = "%s",\n' $nr $sc
diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
index a0be127475b1..8c0b08b7a80e 100644
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -537,3 +537,4 @@
448 common process_mrelease sys_process_mrelease
449 common futex_waitv sys_futex_waitv
450 nospu set_mempolicy_home_node sys_set_mempolicy_home_node
+451 common cachestat sys_cachestat
diff --git a/tools/perf/arch/powerpc/tests/dwarf-unwind.c b/tools/perf/arch/powerpc/tests/dwarf-unwind.c
index 32fffb593fbf..5ecf82893b84 100644
--- a/tools/perf/arch/powerpc/tests/dwarf-unwind.c
+++ b/tools/perf/arch/powerpc/tests/dwarf-unwind.c
@@ -26,7 +26,7 @@ static int sample_ustack(struct perf_sample *sample,
sp = (unsigned long) regs[PERF_REG_POWERPC_R1];
- map = maps__find(thread->maps, (u64)sp);
+ map = maps__find(thread__maps(thread), (u64)sp);
if (!map) {
pr_debug("failed to get stack map\n");
free(buf);
diff --git a/tools/perf/arch/powerpc/util/kvm-stat.c b/tools/perf/arch/powerpc/util/kvm-stat.c
index ea1220d66b67..d9a0ac1cdf30 100644
--- a/tools/perf/arch/powerpc/util/kvm-stat.c
+++ b/tools/perf/arch/powerpc/util/kvm-stat.c
@@ -5,7 +5,7 @@
#include "util/debug.h"
#include "util/evsel.h"
#include "util/evlist.h"
-#include "util/pmu.h"
+#include "util/pmus.h"
#include "book3s_hv_exits.h"
#include "book3s_hcalls.h"
@@ -204,7 +204,7 @@ int kvm_add_default_arch_event(int *argc, const char **argv)
parse_options(j, tmp, event_options, NULL, PARSE_OPT_KEEP_UNKNOWN);
if (!event) {
- if (pmu_have_event("trace_imc", "trace_cycles")) {
+ if (perf_pmus__have_event("trace_imc", "trace_cycles")) {
argv[j++] = strdup("-e");
argv[j++] = strdup("trace_imc/trace_cycles/");
*argc += 2;
diff --git a/tools/perf/arch/powerpc/util/skip-callchain-idx.c b/tools/perf/arch/powerpc/util/skip-callchain-idx.c
index b7223feec770..5f3edb3004d8 100644
--- a/tools/perf/arch/powerpc/util/skip-callchain-idx.c
+++ b/tools/perf/arch/powerpc/util/skip-callchain-idx.c
@@ -250,6 +250,7 @@ int arch_skip_callchain_idx(struct thread *thread, struct ip_callchain *chain)
if (!chain || chain->nr < 3)
return skip_slot;
+ addr_location__init(&al);
ip = chain->ips[1];
thread__find_symbol(thread, PERF_RECORD_MISC_USER, ip, &al);
@@ -259,6 +260,7 @@ int arch_skip_callchain_idx(struct thread *thread, struct ip_callchain *chain)
if (!dso) {
pr_debug("%" PRIx64 " dso is NULL\n", ip);
+ addr_location__exit(&al);
return skip_slot;
}
@@ -279,5 +281,7 @@ int arch_skip_callchain_idx(struct thread *thread, struct ip_callchain *chain)
*/
skip_slot = 3;
}
+
+ addr_location__exit(&al);
return skip_slot;
}
diff --git a/tools/perf/arch/s390/annotate/instructions.c b/tools/perf/arch/s390/annotate/instructions.c
index de925b0e35ce..da5aa3e1f04c 100644
--- a/tools/perf/arch/s390/annotate/instructions.c
+++ b/tools/perf/arch/s390/annotate/instructions.c
@@ -45,9 +45,6 @@ static int s390_call__parse(struct arch *arch, struct ins_operands *ops,
return 0;
}
-static int call__scnprintf(struct ins *ins, char *bf, size_t size,
- struct ins_operands *ops, int max_ins_name);
-
static struct ins_ops s390_call_ops = {
.parse = s390_call__parse,
.scnprintf = call__scnprintf,
diff --git a/tools/perf/arch/s390/entry/syscalls/mksyscalltbl b/tools/perf/arch/s390/entry/syscalls/mksyscalltbl
index 72ecbb676370..52eb88a77c94 100755
--- a/tools/perf/arch/s390/entry/syscalls/mksyscalltbl
+++ b/tools/perf/arch/s390/entry/syscalls/mksyscalltbl
@@ -18,7 +18,7 @@ create_table()
{
local max_nr nr abi sc discard
- echo 'static const char *syscalltbl_s390_64[] = {'
+ echo 'static const char *const syscalltbl_s390_64[] = {'
while read nr abi sc discard; do
printf '\t[%d] = "%s",\n' $nr $sc
max_nr=$nr
diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
index b68f47541169..a6935af2235c 100644
--- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -453,3 +453,4 @@
448 common process_mrelease sys_process_mrelease sys_process_mrelease
449 common futex_waitv sys_futex_waitv sys_futex_waitv
450 common set_mempolicy_home_node sys_set_mempolicy_home_node sys_set_mempolicy_home_node
+451 common cachestat sys_cachestat sys_cachestat
diff --git a/tools/perf/arch/x86/annotate/instructions.c b/tools/perf/arch/x86/annotate/instructions.c
index 305872692bfd..5f4ac4fc7fcf 100644
--- a/tools/perf/arch/x86/annotate/instructions.c
+++ b/tools/perf/arch/x86/annotate/instructions.c
@@ -1,46 +1,37 @@
// SPDX-License-Identifier: GPL-2.0
+/*
+ * x86 instruction nmemonic table to parse disasm lines for annotate.
+ * This table is searched twice - one for exact match and another for
+ * match without a size suffix (b, w, l, q) in case of AT&T syntax.
+ *
+ * So this table should not have entries with the suffix unless it's
+ * a complete different instruction than ones without the suffix.
+ */
static struct ins x86__instructions[] = {
{ .name = "adc", .ops = &mov_ops, },
- { .name = "adcb", .ops = &mov_ops, },
- { .name = "adcl", .ops = &mov_ops, },
{ .name = "add", .ops = &mov_ops, },
- { .name = "addl", .ops = &mov_ops, },
- { .name = "addq", .ops = &mov_ops, },
{ .name = "addsd", .ops = &mov_ops, },
- { .name = "addw", .ops = &mov_ops, },
{ .name = "and", .ops = &mov_ops, },
- { .name = "andb", .ops = &mov_ops, },
- { .name = "andl", .ops = &mov_ops, },
{ .name = "andpd", .ops = &mov_ops, },
{ .name = "andps", .ops = &mov_ops, },
- { .name = "andq", .ops = &mov_ops, },
- { .name = "andw", .ops = &mov_ops, },
{ .name = "bsr", .ops = &mov_ops, },
{ .name = "bt", .ops = &mov_ops, },
{ .name = "btr", .ops = &mov_ops, },
{ .name = "bts", .ops = &mov_ops, },
- { .name = "btsq", .ops = &mov_ops, },
{ .name = "call", .ops = &call_ops, },
- { .name = "callq", .ops = &call_ops, },
{ .name = "cmovbe", .ops = &mov_ops, },
{ .name = "cmove", .ops = &mov_ops, },
{ .name = "cmovae", .ops = &mov_ops, },
{ .name = "cmp", .ops = &mov_ops, },
- { .name = "cmpb", .ops = &mov_ops, },
- { .name = "cmpl", .ops = &mov_ops, },
- { .name = "cmpq", .ops = &mov_ops, },
- { .name = "cmpw", .ops = &mov_ops, },
{ .name = "cmpxch", .ops = &mov_ops, },
{ .name = "cmpxchg", .ops = &mov_ops, },
{ .name = "cs", .ops = &mov_ops, },
{ .name = "dec", .ops = &dec_ops, },
- { .name = "decl", .ops = &dec_ops, },
{ .name = "divsd", .ops = &mov_ops, },
{ .name = "divss", .ops = &mov_ops, },
{ .name = "gs", .ops = &mov_ops, },
{ .name = "imul", .ops = &mov_ops, },
{ .name = "inc", .ops = &dec_ops, },
- { .name = "incl", .ops = &dec_ops, },
{ .name = "ja", .ops = &jump_ops, },
{ .name = "jae", .ops = &jump_ops, },
{ .name = "jb", .ops = &jump_ops, },
@@ -54,7 +45,6 @@ static struct ins x86__instructions[] = {
{ .name = "jl", .ops = &jump_ops, },
{ .name = "jle", .ops = &jump_ops, },
{ .name = "jmp", .ops = &jump_ops, },
- { .name = "jmpq", .ops = &jump_ops, },
{ .name = "jna", .ops = &jump_ops, },
{ .name = "jnae", .ops = &jump_ops, },
{ .name = "jnb", .ops = &jump_ops, },
@@ -81,48 +71,32 @@ static struct ins x86__instructions[] = {
{ .name = "mov", .ops = &mov_ops, },
{ .name = "movapd", .ops = &mov_ops, },
{ .name = "movaps", .ops = &mov_ops, },
- { .name = "movb", .ops = &mov_ops, },
{ .name = "movdqa", .ops = &mov_ops, },
{ .name = "movdqu", .ops = &mov_ops, },
- { .name = "movl", .ops = &mov_ops, },
- { .name = "movq", .ops = &mov_ops, },
{ .name = "movsd", .ops = &mov_ops, },
{ .name = "movslq", .ops = &mov_ops, },
{ .name = "movss", .ops = &mov_ops, },
{ .name = "movupd", .ops = &mov_ops, },
{ .name = "movups", .ops = &mov_ops, },
- { .name = "movw", .ops = &mov_ops, },
{ .name = "movzbl", .ops = &mov_ops, },
{ .name = "movzwl", .ops = &mov_ops, },
{ .name = "mulsd", .ops = &mov_ops, },
{ .name = "mulss", .ops = &mov_ops, },
{ .name = "nop", .ops = &nop_ops, },
- { .name = "nopl", .ops = &nop_ops, },
- { .name = "nopw", .ops = &nop_ops, },
{ .name = "or", .ops = &mov_ops, },
- { .name = "orb", .ops = &mov_ops, },
- { .name = "orl", .ops = &mov_ops, },
{ .name = "orps", .ops = &mov_ops, },
- { .name = "orq", .ops = &mov_ops, },
{ .name = "pand", .ops = &mov_ops, },
{ .name = "paddq", .ops = &mov_ops, },
{ .name = "pcmpeqb", .ops = &mov_ops, },
{ .name = "por", .ops = &mov_ops, },
- { .name = "rclb", .ops = &mov_ops, },
- { .name = "rcll", .ops = &mov_ops, },
+ { .name = "rcl", .ops = &mov_ops, },
{ .name = "ret", .ops = &ret_ops, },
- { .name = "retq", .ops = &ret_ops, },
{ .name = "sbb", .ops = &mov_ops, },
- { .name = "sbbl", .ops = &mov_ops, },
{ .name = "sete", .ops = &mov_ops, },
{ .name = "sub", .ops = &mov_ops, },
- { .name = "subl", .ops = &mov_ops, },
- { .name = "subq", .ops = &mov_ops, },
{ .name = "subsd", .ops = &mov_ops, },
- { .name = "subw", .ops = &mov_ops, },
{ .name = "test", .ops = &mov_ops, },
- { .name = "testb", .ops = &mov_ops, },
- { .name = "testl", .ops = &mov_ops, },
+ { .name = "tzcnt", .ops = &mov_ops, },
{ .name = "ucomisd", .ops = &mov_ops, },
{ .name = "ucomiss", .ops = &mov_ops, },
{ .name = "vaddsd", .ops = &mov_ops, },
@@ -135,11 +109,9 @@ static struct ins x86__instructions[] = {
{ .name = "vsubsd", .ops = &mov_ops, },
{ .name = "vucomisd", .ops = &mov_ops, },
{ .name = "xadd", .ops = &mov_ops, },
- { .name = "xbeginl", .ops = &jump_ops, },
- { .name = "xbeginq", .ops = &jump_ops, },
+ { .name = "xbegin", .ops = &jump_ops, },
{ .name = "xchg", .ops = &mov_ops, },
{ .name = "xor", .ops = &mov_ops, },
- { .name = "xorb", .ops = &mov_ops, },
{ .name = "xorpd", .ops = &mov_ops, },
{ .name = "xorps", .ops = &mov_ops, },
};
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index c84d12608cd2..227538b0ce80 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -372,6 +372,7 @@
448 common process_mrelease sys_process_mrelease
449 common futex_waitv sys_futex_waitv
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
+451 common cachestat sys_cachestat
#
# Due to a historical design error, certain syscalls are numbered differently
diff --git a/tools/perf/arch/x86/entry/syscalls/syscalltbl.sh b/tools/perf/arch/x86/entry/syscalls/syscalltbl.sh
index 029a72c20b19..fa526a993845 100755
--- a/tools/perf/arch/x86/entry/syscalls/syscalltbl.sh
+++ b/tools/perf/arch/x86/entry/syscalls/syscalltbl.sh
@@ -18,7 +18,7 @@ emit() {
syscall_macro "$nr" "$entry"
}
-echo "static const char *syscalltbl_${arch}[] = {"
+echo "static const char *const syscalltbl_${arch}[] = {"
sorted_table=$(mktemp /tmp/syscalltbl.XXXXXX)
grep '^[0-9]' "$in" | sort -n > $sorted_table
diff --git a/tools/perf/arch/x86/include/arch-tests.h b/tools/perf/arch/x86/include/arch-tests.h
index 902e9ea9b99e..c0421a26b875 100644
--- a/tools/perf/arch/x86/include/arch-tests.h
+++ b/tools/perf/arch/x86/include/arch-tests.h
@@ -6,11 +6,15 @@ struct test_suite;
/* Tests */
int test__rdpmc(struct test_suite *test, int subtest);
+#ifdef HAVE_EXTRA_TESTS
int test__insn_x86(struct test_suite *test, int subtest);
+#endif
int test__intel_pt_pkt_decoder(struct test_suite *test, int subtest);
int test__intel_pt_hybrid_compat(struct test_suite *test, int subtest);
int test__bp_modify(struct test_suite *test, int subtest);
int test__x86_sample_parsing(struct test_suite *test, int subtest);
+int test__amd_ibs_via_core_pmu(struct test_suite *test, int subtest);
+int test__hybrid(struct test_suite *test, int subtest);
extern struct test_suite *arch_tests[];
diff --git a/tools/perf/arch/x86/tests/Build b/tools/perf/arch/x86/tests/Build
index 6f4e8636c3bf..b87f46e5feea 100644
--- a/tools/perf/arch/x86/tests/Build
+++ b/tools/perf/arch/x86/tests/Build
@@ -3,5 +3,10 @@ perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
perf-y += arch-tests.o
perf-y += sample-parsing.o
-perf-$(CONFIG_AUXTRACE) += insn-x86.o intel-pt-test.o
+perf-y += hybrid.o
+perf-$(CONFIG_AUXTRACE) += intel-pt-test.o
+ifeq ($(CONFIG_EXTRA_TESTS),y)
+perf-$(CONFIG_AUXTRACE) += insn-x86.o
+endif
perf-$(CONFIG_X86_64) += bp-modify.o
+perf-y += amd-ibs-via-core-pmu.o
diff --git a/tools/perf/arch/x86/tests/amd-ibs-via-core-pmu.c b/tools/perf/arch/x86/tests/amd-ibs-via-core-pmu.c
new file mode 100644
index 000000000000..78b1902f6f59
--- /dev/null
+++ b/tools/perf/arch/x86/tests/amd-ibs-via-core-pmu.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "arch-tests.h"
+#include "linux/perf_event.h"
+#include "tests/tests.h"
+#include "pmu.h"
+#include "pmus.h"
+#include "../perf-sys.h"
+#include "debug.h"
+
+#define NR_SUB_TESTS 5
+
+static struct sub_tests {
+ int type;
+ unsigned long config;
+ bool valid;
+} sub_tests[NR_SUB_TESTS] = {
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, true },
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, false },
+ { PERF_TYPE_RAW, 0x076, true },
+ { PERF_TYPE_RAW, 0x0C1, true },
+ { PERF_TYPE_RAW, 0x012, false },
+};
+
+static int event_open(int type, unsigned long config)
+{
+ struct perf_event_attr attr;
+
+ memset(&attr, 0, sizeof(struct perf_event_attr));
+ attr.type = type;
+ attr.size = sizeof(struct perf_event_attr);
+ attr.config = config;
+ attr.disabled = 1;
+ attr.precise_ip = 1;
+ attr.sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID;
+ attr.sample_period = 100000;
+
+ return sys_perf_event_open(&attr, -1, 0, -1, 0);
+}
+
+int test__amd_ibs_via_core_pmu(struct test_suite *test __maybe_unused,
+ int subtest __maybe_unused)
+{
+ struct perf_pmu *ibs_pmu;
+ int ret = TEST_OK;
+ int fd, i;
+
+ ibs_pmu = perf_pmus__find("ibs_op");
+ if (!ibs_pmu)
+ return TEST_SKIP;
+
+ for (i = 0; i < NR_SUB_TESTS; i++) {
+ fd = event_open(sub_tests[i].type, sub_tests[i].config);
+ pr_debug("type: 0x%x, config: 0x%lx, fd: %d - ", sub_tests[i].type,
+ sub_tests[i].config, fd);
+ if ((sub_tests[i].valid && fd == -1) ||
+ (!sub_tests[i].valid && fd > 0)) {
+ pr_debug("Fail\n");
+ ret = TEST_FAIL;
+ } else {
+ pr_debug("Pass\n");
+ }
+
+ if (fd > 0)
+ close(fd);
+ }
+
+ return ret;
+}
diff --git a/tools/perf/arch/x86/tests/arch-tests.c b/tools/perf/arch/x86/tests/arch-tests.c
index aae6ea0fe52b..a216a5d172ed 100644
--- a/tools/perf/arch/x86/tests/arch-tests.c
+++ b/tools/perf/arch/x86/tests/arch-tests.c
@@ -4,7 +4,9 @@
#include "arch-tests.h"
#ifdef HAVE_AUXTRACE_SUPPORT
+#ifdef HAVE_EXTRA_TESTS
DEFINE_SUITE("x86 instruction decoder - new instructions", insn_x86);
+#endif
static struct test_case intel_pt_tests[] = {
TEST_CASE("Intel PT packet decoder", intel_pt_pkt_decoder),
@@ -22,18 +24,32 @@ struct test_suite suite__intel_pt = {
DEFINE_SUITE("x86 bp modify", bp_modify);
#endif
DEFINE_SUITE("x86 Sample parsing", x86_sample_parsing);
+DEFINE_SUITE("AMD IBS via core pmu", amd_ibs_via_core_pmu);
+static struct test_case hybrid_tests[] = {
+ TEST_CASE_REASON("x86 hybrid event parsing", hybrid, "not hybrid"),
+ { .name = NULL, }
+};
+
+struct test_suite suite__hybrid = {
+ .desc = "x86 hybrid",
+ .test_cases = hybrid_tests,
+};
struct test_suite *arch_tests[] = {
#ifdef HAVE_DWARF_UNWIND_SUPPORT
&suite__dwarf_unwind,
#endif
#ifdef HAVE_AUXTRACE_SUPPORT
+#ifdef HAVE_EXTRA_TESTS
&suite__insn_x86,
+#endif
&suite__intel_pt,
#endif
#if defined(__x86_64__)
&suite__bp_modify,
#endif
&suite__x86_sample_parsing,
+ &suite__amd_ibs_via_core_pmu,
+ &suite__hybrid,
NULL,
};
diff --git a/tools/perf/arch/x86/tests/dwarf-unwind.c b/tools/perf/arch/x86/tests/dwarf-unwind.c
index 497593be80f2..5bfec3345d59 100644
--- a/tools/perf/arch/x86/tests/dwarf-unwind.c
+++ b/tools/perf/arch/x86/tests/dwarf-unwind.c
@@ -26,7 +26,7 @@ static int sample_ustack(struct perf_sample *sample,
sp = (unsigned long) regs[PERF_REG_X86_SP];
- map = maps__find(thread->maps, (u64)sp);
+ map = maps__find(thread__maps(thread), (u64)sp);
if (!map) {
pr_debug("failed to get stack map\n");
free(buf);
diff --git a/tools/perf/arch/x86/tests/hybrid.c b/tools/perf/arch/x86/tests/hybrid.c
new file mode 100644
index 000000000000..eb152770f148
--- /dev/null
+++ b/tools/perf/arch/x86/tests/hybrid.c
@@ -0,0 +1,288 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "arch-tests.h"
+#include "debug.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "pmu.h"
+#include "pmus.h"
+#include "tests/tests.h"
+
+static bool test_config(const struct evsel *evsel, __u64 expected_config)
+{
+ return (evsel->core.attr.config & PERF_HW_EVENT_MASK) == expected_config;
+}
+
+static bool test_perf_config(const struct perf_evsel *evsel, __u64 expected_config)
+{
+ return (evsel->attr.config & PERF_HW_EVENT_MASK) == expected_config;
+}
+
+static bool test_hybrid_type(const struct evsel *evsel, __u64 expected_config)
+{
+ return (evsel->core.attr.config >> PERF_PMU_TYPE_SHIFT) == expected_config;
+}
+
+static int test__hybrid_hw_event_with_pmu(struct evlist *evlist)
+{
+ struct evsel *evsel = evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong hybrid type", test_hybrid_type(evsel, PERF_TYPE_RAW));
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES));
+ return TEST_OK;
+}
+
+static int test__hybrid_hw_group_event(struct evlist *evlist)
+{
+ struct evsel *evsel, *leader;
+
+ evsel = leader = evlist__first(evlist);
+ TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong hybrid type", test_hybrid_type(evsel, PERF_TYPE_RAW));
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES));
+ TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
+
+ evsel = evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong hybrid type", test_hybrid_type(evsel, PERF_TYPE_RAW));
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS));
+ TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
+ return TEST_OK;
+}
+
+static int test__hybrid_sw_hw_group_event(struct evlist *evlist)
+{
+ struct evsel *evsel, *leader;
+
+ evsel = leader = evlist__first(evlist);
+ TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
+
+ evsel = evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong hybrid type", test_hybrid_type(evsel, PERF_TYPE_RAW));
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES));
+ TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
+ return TEST_OK;
+}
+
+static int test__hybrid_hw_sw_group_event(struct evlist *evlist)
+{
+ struct evsel *evsel, *leader;
+
+ evsel = leader = evlist__first(evlist);
+ TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong hybrid type", test_hybrid_type(evsel, PERF_TYPE_RAW));
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES));
+ TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
+
+ evsel = evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
+ return TEST_OK;
+}
+
+static int test__hybrid_group_modifier1(struct evlist *evlist)
+{
+ struct evsel *evsel, *leader;
+
+ evsel = leader = evlist__first(evlist);
+ TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong hybrid type", test_hybrid_type(evsel, PERF_TYPE_RAW));
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES));
+ TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
+ TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
+
+ evsel = evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong hybrid type", test_hybrid_type(evsel, PERF_TYPE_RAW));
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS));
+ TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
+ return TEST_OK;
+}
+
+static int test__hybrid_raw1(struct evlist *evlist)
+{
+ struct perf_evsel *evsel;
+
+ perf_evlist__for_each_evsel(&evlist->core, evsel) {
+ struct perf_pmu *pmu = perf_pmus__find_by_type(evsel->attr.type);
+
+ TEST_ASSERT_VAL("missing pmu", pmu);
+ TEST_ASSERT_VAL("unexpected pmu", !strncmp(pmu->name, "cpu_", 4));
+ TEST_ASSERT_VAL("wrong config", test_perf_config(evsel, 0x1a));
+ }
+ return TEST_OK;
+}
+
+static int test__hybrid_raw2(struct evlist *evlist)
+{
+ struct evsel *evsel = evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, 0x1a));
+ return TEST_OK;
+}
+
+static int test__hybrid_cache_event(struct evlist *evlist)
+{
+ struct evsel *evsel = evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HW_CACHE == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong config", 0x2 == (evsel->core.attr.config & 0xffffffff));
+ return TEST_OK;
+}
+
+static int test__checkevent_pmu(struct evlist *evlist)
+{
+
+ struct evsel *evsel = evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong config", 10 == evsel->core.attr.config);
+ TEST_ASSERT_VAL("wrong config1", 1 == evsel->core.attr.config1);
+ TEST_ASSERT_VAL("wrong config2", 3 == evsel->core.attr.config2);
+ TEST_ASSERT_VAL("wrong config3", 0 == evsel->core.attr.config3);
+ /*
+ * The period value gets configured within evlist__config,
+ * while this test executes only parse events method.
+ */
+ TEST_ASSERT_VAL("wrong period", 0 == evsel->core.attr.sample_period);
+
+ return TEST_OK;
+}
+
+struct evlist_test {
+ const char *name;
+ bool (*valid)(void);
+ int (*check)(struct evlist *evlist);
+};
+
+static const struct evlist_test test__hybrid_events[] = {
+ {
+ .name = "cpu_core/cpu-cycles/",
+ .check = test__hybrid_hw_event_with_pmu,
+ /* 0 */
+ },
+ {
+ .name = "{cpu_core/cpu-cycles/,cpu_core/instructions/}",
+ .check = test__hybrid_hw_group_event,
+ /* 1 */
+ },
+ {
+ .name = "{cpu-clock,cpu_core/cpu-cycles/}",
+ .check = test__hybrid_sw_hw_group_event,
+ /* 2 */
+ },
+ {
+ .name = "{cpu_core/cpu-cycles/,cpu-clock}",
+ .check = test__hybrid_hw_sw_group_event,
+ /* 3 */
+ },
+ {
+ .name = "{cpu_core/cpu-cycles/k,cpu_core/instructions/u}",
+ .check = test__hybrid_group_modifier1,
+ /* 4 */
+ },
+ {
+ .name = "r1a",
+ .check = test__hybrid_raw1,
+ /* 5 */
+ },
+ {
+ .name = "cpu_core/r1a/",
+ .check = test__hybrid_raw2,
+ /* 6 */
+ },
+ {
+ .name = "cpu_core/config=10,config1,config2=3,period=1000/u",
+ .check = test__checkevent_pmu,
+ /* 7 */
+ },
+ {
+ .name = "cpu_core/LLC-loads/",
+ .check = test__hybrid_cache_event,
+ /* 8 */
+ },
+};
+
+static int test_event(const struct evlist_test *e)
+{
+ struct parse_events_error err;
+ struct evlist *evlist;
+ int ret;
+
+ if (e->valid && !e->valid()) {
+ pr_debug("... SKIP\n");
+ return TEST_OK;
+ }
+
+ evlist = evlist__new();
+ if (evlist == NULL) {
+ pr_err("Failed allocation");
+ return TEST_FAIL;
+ }
+ parse_events_error__init(&err);
+ ret = parse_events(evlist, e->name, &err);
+ if (ret) {
+ pr_debug("failed to parse event '%s', err %d, str '%s'\n",
+ e->name, ret, err.str);
+ parse_events_error__print(&err, e->name);
+ ret = TEST_FAIL;
+ if (strstr(err.str, "can't access trace events"))
+ ret = TEST_SKIP;
+ } else {
+ ret = e->check(evlist);
+ }
+ parse_events_error__exit(&err);
+ evlist__delete(evlist);
+
+ return ret;
+}
+
+static int combine_test_results(int existing, int latest)
+{
+ if (existing == TEST_FAIL)
+ return TEST_FAIL;
+ if (existing == TEST_SKIP)
+ return latest == TEST_OK ? TEST_SKIP : latest;
+ return latest;
+}
+
+static int test_events(const struct evlist_test *events, int cnt)
+{
+ int ret = TEST_OK;
+
+ for (int i = 0; i < cnt; i++) {
+ const struct evlist_test *e = &events[i];
+ int test_ret;
+
+ pr_debug("running test %d '%s'\n", i, e->name);
+ test_ret = test_event(e);
+ if (test_ret != TEST_OK) {
+ pr_debug("Event test failure: test %d '%s'", i, e->name);
+ ret = combine_test_results(ret, test_ret);
+ }
+ }
+
+ return ret;
+}
+
+int test__hybrid(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
+{
+ if (perf_pmus__num_core_pmus() == 1)
+ return TEST_SKIP;
+
+ return test_events(test__hybrid_events, ARRAY_SIZE(test__hybrid_events));
+}
diff --git a/tools/perf/arch/x86/tests/insn-x86.c b/tools/perf/arch/x86/tests/insn-x86.c
index 735257d205b5..7b5eb8baf0f2 100644
--- a/tools/perf/arch/x86/tests/insn-x86.c
+++ b/tools/perf/arch/x86/tests/insn-x86.c
@@ -18,14 +18,14 @@ struct test_data {
const char *asm_rep;
};
-struct test_data test_data_32[] = {
+const struct test_data test_data_32[] = {
#include "insn-x86-dat-32.c"
{{0x0f, 0x01, 0xee}, 3, 0, NULL, NULL, "0f 01 ee \trdpkru"},
{{0x0f, 0x01, 0xef}, 3, 0, NULL, NULL, "0f 01 ef \twrpkru"},
{{0}, 0, 0, NULL, NULL, NULL},
};
-struct test_data test_data_64[] = {
+const struct test_data test_data_64[] = {
#include "insn-x86-dat-64.c"
{{0x0f, 0x01, 0xee}, 3, 0, NULL, NULL, "0f 01 ee \trdpkru"},
{{0x0f, 0x01, 0xef}, 3, 0, NULL, NULL, "0f 01 ef \twrpkru"},
@@ -97,7 +97,7 @@ static int get_branch(const char *branch_str)
return -1;
}
-static int test_data_item(struct test_data *dat, int x86_64)
+static int test_data_item(const struct test_data *dat, int x86_64)
{
struct intel_pt_insn intel_pt_insn;
int op, branch, ret;
@@ -147,9 +147,9 @@ static int test_data_item(struct test_data *dat, int x86_64)
return 0;
}
-static int test_data_set(struct test_data *dat_set, int x86_64)
+static int test_data_set(const struct test_data *dat_set, int x86_64)
{
- struct test_data *dat;
+ const struct test_data *dat;
int ret = 0;
for (dat = dat_set; dat->expected_length; dat++) {
diff --git a/tools/perf/arch/x86/tests/intel-pt-test.c b/tools/perf/arch/x86/tests/intel-pt-test.c
index 70b7f79396b1..09d61fa736e3 100644
--- a/tools/perf/arch/x86/tests/intel-pt-test.c
+++ b/tools/perf/arch/x86/tests/intel-pt-test.c
@@ -22,7 +22,7 @@
* @new_ctx: expected new packet context
* @ctx_unchanged: the packet context must not change
*/
-static struct test_data {
+static const struct test_data {
int len;
u8 bytes[INTEL_PT_PKT_MAX_SZ];
enum intel_pt_pkt_ctx ctx;
@@ -186,7 +186,7 @@ static struct test_data {
{0, {0}, 0, {0, 0, 0}, 0, 0 },
};
-static int dump_packet(struct intel_pt_pkt *packet, u8 *bytes, int len)
+static int dump_packet(const struct intel_pt_pkt *packet, const u8 *bytes, int len)
{
char desc[INTEL_PT_PKT_DESC_MAX];
int ret, i;
@@ -206,14 +206,14 @@ static int dump_packet(struct intel_pt_pkt *packet, u8 *bytes, int len)
return TEST_OK;
}
-static void decoding_failed(struct test_data *d)
+static void decoding_failed(const struct test_data *d)
{
pr_debug("Decoding failed!\n");
pr_debug("Decoding: ");
dump_packet(&d->packet, d->bytes, d->len);
}
-static int fail(struct test_data *d, struct intel_pt_pkt *packet, int len,
+static int fail(const struct test_data *d, struct intel_pt_pkt *packet, int len,
enum intel_pt_pkt_ctx new_ctx)
{
decoding_failed(d);
@@ -242,7 +242,7 @@ static int fail(struct test_data *d, struct intel_pt_pkt *packet, int len,
return TEST_FAIL;
}
-static int test_ctx_unchanged(struct test_data *d, struct intel_pt_pkt *packet,
+static int test_ctx_unchanged(const struct test_data *d, struct intel_pt_pkt *packet,
enum intel_pt_pkt_ctx ctx)
{
enum intel_pt_pkt_ctx old_ctx = ctx;
@@ -258,7 +258,7 @@ static int test_ctx_unchanged(struct test_data *d, struct intel_pt_pkt *packet,
return TEST_OK;
}
-static int test_one(struct test_data *d)
+static int test_one(const struct test_data *d)
{
struct intel_pt_pkt packet;
enum intel_pt_pkt_ctx ctx = d->ctx;
@@ -307,7 +307,7 @@ static int test_one(struct test_data *d)
*/
int test__intel_pt_pkt_decoder(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
{
- struct test_data *d = data;
+ const struct test_data *d = data;
int ret;
for (d = data; d->len; d++) {
diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build
index 195ccfdef7aa..005907cb97d8 100644
--- a/tools/perf/arch/x86/util/Build
+++ b/tools/perf/arch/x86/util/Build
@@ -10,6 +10,7 @@ perf-y += evlist.o
perf-y += mem-events.o
perf-y += evsel.o
perf-y += iostat.o
+perf-y += env.o
perf-$(CONFIG_DWARF) += dwarf-regs.o
perf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o
diff --git a/tools/perf/arch/x86/util/auxtrace.c b/tools/perf/arch/x86/util/auxtrace.c
index 330d03216b0e..354780ff1605 100644
--- a/tools/perf/arch/x86/util/auxtrace.c
+++ b/tools/perf/arch/x86/util/auxtrace.c
@@ -10,6 +10,7 @@
#include "../../../util/header.h"
#include "../../../util/debug.h"
#include "../../../util/pmu.h"
+#include "../../../util/pmus.h"
#include "../../../util/auxtrace.h"
#include "../../../util/intel-pt.h"
#include "../../../util/intel-bts.h"
@@ -25,8 +26,8 @@ struct auxtrace_record *auxtrace_record__init_intel(struct evlist *evlist,
bool found_pt = false;
bool found_bts = false;
- intel_pt_pmu = perf_pmu__find(INTEL_PT_PMU_NAME);
- intel_bts_pmu = perf_pmu__find(INTEL_BTS_PMU_NAME);
+ intel_pt_pmu = perf_pmus__find(INTEL_PT_PMU_NAME);
+ intel_bts_pmu = perf_pmus__find(INTEL_BTS_PMU_NAME);
evlist__for_each_entry(evlist, evsel) {
if (intel_pt_pmu && evsel->core.attr.type == intel_pt_pmu->type)
diff --git a/tools/perf/arch/x86/util/env.c b/tools/perf/arch/x86/util/env.c
new file mode 100644
index 000000000000..3e537ffb1353
--- /dev/null
+++ b/tools/perf/arch/x86/util/env.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "linux/string.h"
+#include "util/env.h"
+#include "env.h"
+
+bool x86__is_amd_cpu(void)
+{
+ struct perf_env env = { .total_mem = 0, };
+ static int is_amd; /* 0: Uninitialized, 1: Yes, -1: No */
+
+ if (is_amd)
+ goto ret;
+
+ perf_env__cpuid(&env);
+ is_amd = env.cpuid && strstarts(env.cpuid, "AuthenticAMD") ? 1 : -1;
+ perf_env__exit(&env);
+ret:
+ return is_amd >= 1 ? true : false;
+}
diff --git a/tools/perf/arch/x86/util/env.h b/tools/perf/arch/x86/util/env.h
new file mode 100644
index 000000000000..d78f080b6b3f
--- /dev/null
+++ b/tools/perf/arch/x86/util/env.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _X86_ENV_H
+#define _X86_ENV_H
+
+bool x86__is_amd_cpu(void);
+
+#endif /* _X86_ENV_H */
diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c
index d4193479a364..cbd582182932 100644
--- a/tools/perf/arch/x86/util/evlist.c
+++ b/tools/perf/arch/x86/util/evlist.c
@@ -1,38 +1,42 @@
// SPDX-License-Identifier: GPL-2.0
#include <stdio.h>
#include "util/pmu.h"
+#include "util/pmus.h"
#include "util/evlist.h"
#include "util/parse-events.h"
#include "util/event.h"
-#include "util/pmu-hybrid.h"
#include "topdown.h"
+#include "evsel.h"
static int ___evlist__add_default_attrs(struct evlist *evlist,
struct perf_event_attr *attrs,
size_t nr_attrs)
{
- struct perf_cpu_map *cpus;
- struct evsel *evsel, *n;
- struct perf_pmu *pmu;
LIST_HEAD(head);
size_t i = 0;
for (i = 0; i < nr_attrs; i++)
event_attr_init(attrs + i);
- if (!perf_pmu__has_hybrid())
+ if (perf_pmus__num_core_pmus() == 1)
return evlist__add_attrs(evlist, attrs, nr_attrs);
for (i = 0; i < nr_attrs; i++) {
+ struct perf_pmu *pmu = NULL;
+
if (attrs[i].type == PERF_TYPE_SOFTWARE) {
- evsel = evsel__new(attrs + i);
+ struct evsel *evsel = evsel__new(attrs + i);
+
if (evsel == NULL)
goto out_delete_partial_list;
list_add_tail(&evsel->core.node, &head);
continue;
}
- perf_pmu__for_each_hybrid_pmu(pmu) {
+ while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
+ struct perf_cpu_map *cpus;
+ struct evsel *evsel;
+
evsel = evsel__new(attrs + i);
if (evsel == NULL)
goto out_delete_partial_list;
@@ -50,8 +54,12 @@ static int ___evlist__add_default_attrs(struct evlist *evlist,
return 0;
out_delete_partial_list:
- __evlist__for_each_entry_safe(&head, n, evsel)
- evsel__delete(evsel);
+ {
+ struct evsel *evsel, *n;
+
+ __evlist__for_each_entry_safe(&head, n, evsel)
+ evsel__delete(evsel);
+ }
return -1;
}
@@ -67,8 +75,7 @@ int arch_evlist__add_default_attrs(struct evlist *evlist,
int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs)
{
- if (topdown_sys_has_perf_metrics() &&
- (!lhs->pmu_name || !strncmp(lhs->pmu_name, "cpu", 3))) {
+ if (topdown_sys_has_perf_metrics() && evsel__sys_has_perf_metrics(lhs)) {
/* Ensure the topdown slots comes first. */
if (strcasestr(lhs->name, "slots"))
return -1;
diff --git a/tools/perf/arch/x86/util/evsel.c b/tools/perf/arch/x86/util/evsel.c
index ea3972d785d1..81d22657922a 100644
--- a/tools/perf/arch/x86/util/evsel.c
+++ b/tools/perf/arch/x86/util/evsel.c
@@ -4,9 +4,11 @@
#include "util/evsel.h"
#include "util/env.h"
#include "util/pmu.h"
+#include "util/pmus.h"
#include "linux/string.h"
#include "evsel.h"
#include "util/debug.h"
+#include "env.h"
#define IBS_FETCH_L3MISSONLY (1ULL << 59)
#define IBS_OP_L3MISSONLY (1ULL << 16)
@@ -16,26 +18,6 @@ void arch_evsel__set_sample_weight(struct evsel *evsel)
evsel__set_sample_bit(evsel, WEIGHT_STRUCT);
}
-void arch_evsel__fixup_new_cycles(struct perf_event_attr *attr)
-{
- struct perf_env env = { .total_mem = 0, } ;
-
- if (!perf_env__cpuid(&env))
- return;
-
- /*
- * On AMD, precise cycles event sampling internally uses IBS pmu.
- * But IBS does not have filtering capabilities and perf by default
- * sets exclude_guest = 1. This makes IBS pmu event init fail and
- * thus perf ends up doing non-precise sampling. Avoid it by clearing
- * exclude_guest.
- */
- if (env.cpuid && strstarts(env.cpuid, "AuthenticAMD"))
- attr->exclude_guest = 0;
-
- free(env.cpuid);
-}
-
/* Check whether the evsel's PMU supports the perf metrics */
bool evsel__sys_has_perf_metrics(const struct evsel *evsel)
{
@@ -50,7 +32,7 @@ bool evsel__sys_has_perf_metrics(const struct evsel *evsel)
* should be good enough to detect the perf metrics feature.
*/
if ((evsel->core.attr.type == PERF_TYPE_RAW) &&
- pmu_have_event(pmu_name, "slots"))
+ perf_pmus__have_event(pmu_name, "slots"))
return true;
return false;
@@ -97,29 +79,16 @@ void arch__post_evsel_config(struct evsel *evsel, struct perf_event_attr *attr)
{
struct perf_pmu *evsel_pmu, *ibs_fetch_pmu, *ibs_op_pmu;
static int warned_once;
- /* 0: Uninitialized, 1: Yes, -1: No */
- static int is_amd;
- if (warned_once || is_amd == -1)
+ if (warned_once || !x86__is_amd_cpu())
return;
- if (!is_amd) {
- struct perf_env *env = evsel__env(evsel);
-
- if (!perf_env__cpuid(env) || !env->cpuid ||
- !strstarts(env->cpuid, "AuthenticAMD")) {
- is_amd = -1;
- return;
- }
- is_amd = 1;
- }
-
evsel_pmu = evsel__find_pmu(evsel);
if (!evsel_pmu)
return;
- ibs_fetch_pmu = perf_pmu__find("ibs_fetch");
- ibs_op_pmu = perf_pmu__find("ibs_op");
+ ibs_fetch_pmu = perf_pmus__find("ibs_fetch");
+ ibs_op_pmu = perf_pmus__find("ibs_op");
if (ibs_fetch_pmu && ibs_fetch_pmu->type == evsel_pmu->type) {
if (attr->config & IBS_FETCH_L3MISSONLY) {
@@ -133,3 +102,23 @@ void arch__post_evsel_config(struct evsel *evsel, struct perf_event_attr *attr)
}
}
}
+
+int arch_evsel__open_strerror(struct evsel *evsel, char *msg, size_t size)
+{
+ if (!x86__is_amd_cpu())
+ return 0;
+
+ if (!evsel->core.attr.precise_ip &&
+ !(evsel->pmu_name && !strncmp(evsel->pmu_name, "ibs", 3)))
+ return 0;
+
+ /* More verbose IBS errors. */
+ if (evsel->core.attr.exclude_kernel || evsel->core.attr.exclude_user ||
+ evsel->core.attr.exclude_hv || evsel->core.attr.exclude_idle ||
+ evsel->core.attr.exclude_host || evsel->core.attr.exclude_guest) {
+ return scnprintf(msg, size, "AMD IBS doesn't support privilege filtering. Try "
+ "again without the privilege modifiers (like 'k') at the end.");
+ }
+
+ return 0;
+}
diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c
index 439c2956f3e7..d2c8cac11470 100644
--- a/tools/perf/arch/x86/util/intel-bts.c
+++ b/tools/perf/arch/x86/util/intel-bts.c
@@ -17,7 +17,7 @@
#include "../../../util/evlist.h"
#include "../../../util/mmap.h"
#include "../../../util/session.h"
-#include "../../../util/pmu.h"
+#include "../../../util/pmus.h"
#include "../../../util/debug.h"
#include "../../../util/record.h"
#include "../../../util/tsc.h"
@@ -416,7 +416,7 @@ out_err:
struct auxtrace_record *intel_bts_recording_init(int *err)
{
- struct perf_pmu *intel_bts_pmu = perf_pmu__find(INTEL_BTS_PMU_NAME);
+ struct perf_pmu *intel_bts_pmu = perf_pmus__find(INTEL_BTS_PMU_NAME);
struct intel_bts_recording *btsr;
if (!intel_bts_pmu)
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index 17336da08b58..74b70fd379df 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -23,7 +23,7 @@
#include "../../../util/mmap.h"
#include <subcmd/parse-options.h>
#include "../../../util/parse-events.h"
-#include "../../../util/pmu.h"
+#include "../../../util/pmus.h"
#include "../../../util/debug.h"
#include "../../../util/auxtrace.h"
#include "../../../util/perf_api_probe.h"
@@ -1185,7 +1185,7 @@ static u64 intel_pt_reference(struct auxtrace_record *itr __maybe_unused)
struct auxtrace_record *intel_pt_recording_init(int *err)
{
- struct perf_pmu *intel_pt_pmu = perf_pmu__find(INTEL_PT_PMU_NAME);
+ struct perf_pmu *intel_pt_pmu = perf_pmus__find(INTEL_PT_PMU_NAME);
struct intel_pt_recording *ptr;
if (!intel_pt_pmu)
diff --git a/tools/perf/arch/x86/util/mem-events.c b/tools/perf/arch/x86/util/mem-events.c
index f683ac702247..a8a782bcb121 100644
--- a/tools/perf/arch/x86/util/mem-events.c
+++ b/tools/perf/arch/x86/util/mem-events.c
@@ -1,9 +1,11 @@
// SPDX-License-Identifier: GPL-2.0
#include "util/pmu.h"
+#include "util/pmus.h"
#include "util/env.h"
#include "map_symbol.h"
#include "mem-events.h"
#include "linux/string.h"
+#include "env.h"
static char mem_loads_name[100];
static bool mem_loads_name__init;
@@ -26,28 +28,12 @@ static struct perf_mem_event perf_mem_events_amd[PERF_MEM_EVENTS__MAX] = {
E("mem-ldst", "ibs_op//", "ibs_op"),
};
-static int perf_mem_is_amd_cpu(void)
-{
- struct perf_env env = { .total_mem = 0, };
-
- perf_env__cpuid(&env);
- if (env.cpuid && strstarts(env.cpuid, "AuthenticAMD"))
- return 1;
- return -1;
-}
-
struct perf_mem_event *perf_mem_events__ptr(int i)
{
- /* 0: Uninitialized, 1: Yes, -1: No */
- static int is_amd;
-
if (i >= PERF_MEM_EVENTS__MAX)
return NULL;
- if (!is_amd)
- is_amd = perf_mem_is_amd_cpu();
-
- if (is_amd == 1)
+ if (x86__is_amd_cpu())
return &perf_mem_events_amd[i];
return &perf_mem_events_intel[i];
@@ -55,13 +41,13 @@ struct perf_mem_event *perf_mem_events__ptr(int i)
bool is_mem_loads_aux_event(struct evsel *leader)
{
- if (perf_pmu__find("cpu")) {
- if (!pmu_have_event("cpu", "mem-loads-aux"))
- return false;
- } else if (perf_pmu__find("cpu_core")) {
- if (!pmu_have_event("cpu_core", "mem-loads-aux"))
- return false;
- }
+ struct perf_pmu *pmu = perf_pmus__find("cpu");
+
+ if (!pmu)
+ pmu = perf_pmus__find("cpu_core");
+
+ if (pmu && !perf_pmu__have_event(pmu, "mem-loads-aux"))
+ return false;
return leader->core.attr.config == MEM_LOADS_AUX;
}
@@ -82,7 +68,7 @@ char *perf_mem_events__name(int i, char *pmu_name)
pmu_name = (char *)"cpu";
}
- if (pmu_have_event(pmu_name, "mem-loads-aux")) {
+ if (perf_pmus__have_event(pmu_name, "mem-loads-aux")) {
scnprintf(mem_loads_name, sizeof(mem_loads_name),
MEM_LOADS_AUX_NAME, pmu_name, pmu_name,
perf_mem_events__loads_ldlat);
diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c
index 0ed177991ad0..8ad4112ad10c 100644
--- a/tools/perf/arch/x86/util/perf_regs.c
+++ b/tools/perf/arch/x86/util/perf_regs.c
@@ -10,7 +10,7 @@
#include "../../../util/debug.h"
#include "../../../util/event.h"
#include "../../../util/pmu.h"
-#include "../../../util/pmu-hybrid.h"
+#include "../../../util/pmus.h"
const struct sample_reg sample_reg_masks[] = {
SMPL_REG(AX, PERF_REG_X86_AX),
@@ -286,20 +286,25 @@ uint64_t arch__intr_reg_mask(void)
.disabled = 1,
.exclude_kernel = 1,
};
- struct perf_pmu *pmu;
int fd;
/*
* In an unnamed union, init it here to build on older gcc versions
*/
attr.sample_period = 1;
- if (perf_pmu__has_hybrid()) {
+ if (perf_pmus__num_core_pmus() > 1) {
+ struct perf_pmu *pmu = NULL;
+ __u64 type = PERF_TYPE_RAW;
+
/*
* The same register set is supported among different hybrid PMUs.
* Only check the first available one.
*/
- pmu = list_first_entry(&perf_pmu__hybrid_pmus, typeof(*pmu), hybrid_list);
- attr.config |= (__u64)pmu->type << PERF_PMU_TYPE_SHIFT;
+ while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
+ type = pmu->type;
+ break;
+ }
+ attr.config |= type << PERF_PMU_TYPE_SHIFT;
}
event_attr_init(&attr);
diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c
index 3c0de3370d7e..65d8cdff4d5f 100644
--- a/tools/perf/arch/x86/util/pmu.c
+++ b/tools/perf/arch/x86/util/pmu.c
@@ -14,6 +14,8 @@
#include "../../../util/intel-bts.h"
#include "../../../util/pmu.h"
#include "../../../util/fncache.h"
+#include "../../../util/pmus.h"
+#include "env.h"
struct pmu_alias {
char *name;
@@ -168,3 +170,13 @@ char *pmu_find_alias_name(const char *name)
return __pmu_find_alias_name(name);
}
+
+int perf_pmus__num_mem_pmus(void)
+{
+ /* AMD uses IBS OP pmu and not a core PMU for perf mem/c2c */
+ if (x86__is_amd_cpu())
+ return 1;
+
+ /* Intel uses core pmus for perf mem/c2c */
+ return perf_pmus__num_core_pmus();
+}
diff --git a/tools/perf/arch/x86/util/topdown.c b/tools/perf/arch/x86/util/topdown.c
index 9ad5e5c7bd27..3f9a267d4501 100644
--- a/tools/perf/arch/x86/util/topdown.c
+++ b/tools/perf/arch/x86/util/topdown.c
@@ -2,6 +2,7 @@
#include "api/fs/fs.h"
#include "util/evsel.h"
#include "util/pmu.h"
+#include "util/pmus.h"
#include "util/topdown.h"
#include "topdown.h"
#include "evsel.h"
@@ -22,8 +23,8 @@ bool topdown_sys_has_perf_metrics(void)
* The slots event is only available when the core PMU
* supports the perf metrics feature.
*/
- pmu = perf_pmu__find_by_type(PERF_TYPE_RAW);
- if (pmu && pmu_have_event(pmu->name, "slots"))
+ pmu = perf_pmus__find_by_type(PERF_TYPE_RAW);
+ if (pmu && perf_pmu__have_event(pmu, "slots"))
has_perf_metrics = true;
cached = true;
diff --git a/tools/perf/bench/epoll-ctl.c b/tools/perf/bench/epoll-ctl.c
index 521d1ff97b06..6bfffe83dde9 100644
--- a/tools/perf/bench/epoll-ctl.c
+++ b/tools/perf/bench/epoll-ctl.c
@@ -421,6 +421,11 @@ int bench_epoll_ctl(int argc, const char **argv)
print_summary();
close(epollfd);
+ perf_cpu_map__put(cpu);
+ for (i = 0; i < nthreads; i++)
+ free(worker[i].fdmap);
+
+ free(worker);
return ret;
errmem:
err(EXIT_FAILURE, "calloc");
diff --git a/tools/perf/bench/epoll-wait.c b/tools/perf/bench/epoll-wait.c
index c1cdf03c075d..cb5174b53940 100644
--- a/tools/perf/bench/epoll-wait.c
+++ b/tools/perf/bench/epoll-wait.c
@@ -549,6 +549,11 @@ int bench_epoll_wait(int argc, const char **argv)
print_summary();
close(epollfd);
+ perf_cpu_map__put(cpu);
+ for (i = 0; i < nthreads; i++)
+ free(worker[i].fdmap);
+
+ free(worker);
return ret;
errmem:
err(EXIT_FAILURE, "calloc");
diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c
index 2d0417949727..092cbd52db82 100644
--- a/tools/perf/bench/futex-lock-pi.c
+++ b/tools/perf/bench/futex-lock-pi.c
@@ -118,8 +118,7 @@ static void *workerfn(void *arg)
return NULL;
}
-static void create_threads(struct worker *w, pthread_attr_t thread_attr,
- struct perf_cpu_map *cpu)
+static void create_threads(struct worker *w, struct perf_cpu_map *cpu)
{
cpu_set_t *cpuset;
unsigned int i;
@@ -133,6 +132,9 @@ static void create_threads(struct worker *w, pthread_attr_t thread_attr,
size = CPU_ALLOC_SIZE(nrcpus);
for (i = 0; i < params.nthreads; i++) {
+ pthread_attr_t thread_attr;
+
+ pthread_attr_init(&thread_attr);
worker[i].tid = i;
if (params.multi) {
@@ -154,6 +156,7 @@ static void create_threads(struct worker *w, pthread_attr_t thread_attr,
CPU_FREE(cpuset);
err(EXIT_FAILURE, "pthread_create");
}
+ pthread_attr_destroy(&thread_attr);
}
CPU_FREE(cpuset);
}
@@ -163,7 +166,6 @@ int bench_futex_lock_pi(int argc, const char **argv)
int ret = 0;
unsigned int i;
struct sigaction act;
- pthread_attr_t thread_attr;
struct perf_cpu_map *cpu;
argc = parse_options(argc, argv, options, bench_futex_lock_pi_usage, 0);
@@ -203,11 +205,9 @@ int bench_futex_lock_pi(int argc, const char **argv)
cond_init(&thread_worker);
threads_starting = params.nthreads;
- pthread_attr_init(&thread_attr);
gettimeofday(&bench__start, NULL);
- create_threads(worker, thread_attr, cpu);
- pthread_attr_destroy(&thread_attr);
+ create_threads(worker, cpu);
mutex_lock(&thread_lock);
while (threads_starting)
diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
index 69ad896f556c..c0035990a33c 100644
--- a/tools/perf/bench/futex-requeue.c
+++ b/tools/perf/bench/futex-requeue.c
@@ -121,8 +121,7 @@ static void *workerfn(void *arg __maybe_unused)
return NULL;
}
-static void block_threads(pthread_t *w,
- pthread_attr_t thread_attr, struct perf_cpu_map *cpu)
+static void block_threads(pthread_t *w, struct perf_cpu_map *cpu)
{
cpu_set_t *cpuset;
unsigned int i;
@@ -137,6 +136,9 @@ static void block_threads(pthread_t *w,
/* create and block all threads */
for (i = 0; i < params.nthreads; i++) {
+ pthread_attr_t thread_attr;
+
+ pthread_attr_init(&thread_attr);
CPU_ZERO_S(size, cpuset);
CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset);
@@ -149,6 +151,7 @@ static void block_threads(pthread_t *w,
CPU_FREE(cpuset);
err(EXIT_FAILURE, "pthread_create");
}
+ pthread_attr_destroy(&thread_attr);
}
CPU_FREE(cpuset);
}
@@ -165,7 +168,6 @@ int bench_futex_requeue(int argc, const char **argv)
int ret = 0;
unsigned int i, j;
struct sigaction act;
- pthread_attr_t thread_attr;
struct perf_cpu_map *cpu;
argc = parse_options(argc, argv, options, bench_futex_requeue_usage, 0);
@@ -209,7 +211,6 @@ int bench_futex_requeue(int argc, const char **argv)
init_stats(&requeued_stats);
init_stats(&requeuetime_stats);
- pthread_attr_init(&thread_attr);
mutex_init(&thread_lock);
cond_init(&thread_parent);
cond_init(&thread_worker);
@@ -219,7 +220,7 @@ int bench_futex_requeue(int argc, const char **argv)
struct timeval start, end, runtime;
/* create, launch & block all threads */
- block_threads(worker, thread_attr, cpu);
+ block_threads(worker, cpu);
/* make sure all threads are already blocked */
mutex_lock(&thread_lock);
@@ -301,7 +302,6 @@ int bench_futex_requeue(int argc, const char **argv)
cond_destroy(&thread_parent);
cond_destroy(&thread_worker);
mutex_destroy(&thread_lock);
- pthread_attr_destroy(&thread_attr);
print_summary();
diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c
index 6682e49d0ee0..5ab0234d74e6 100644
--- a/tools/perf/bench/futex-wake-parallel.c
+++ b/tools/perf/bench/futex-wake-parallel.c
@@ -95,10 +95,12 @@ static void *waking_workerfn(void *arg)
return NULL;
}
-static void wakeup_threads(struct thread_data *td, pthread_attr_t thread_attr)
+static void wakeup_threads(struct thread_data *td)
{
unsigned int i;
+ pthread_attr_t thread_attr;
+ pthread_attr_init(&thread_attr);
pthread_attr_setdetachstate(&thread_attr, PTHREAD_CREATE_JOINABLE);
pthread_barrier_init(&barrier, NULL, params.nwakes + 1);
@@ -122,6 +124,7 @@ static void wakeup_threads(struct thread_data *td, pthread_attr_t thread_attr)
err(EXIT_FAILURE, "pthread_join");
pthread_barrier_destroy(&barrier);
+ pthread_attr_destroy(&thread_attr);
}
static void *blocked_workerfn(void *arg __maybe_unused)
@@ -142,8 +145,7 @@ static void *blocked_workerfn(void *arg __maybe_unused)
return NULL;
}
-static void block_threads(pthread_t *w, pthread_attr_t thread_attr,
- struct perf_cpu_map *cpu)
+static void block_threads(pthread_t *w, struct perf_cpu_map *cpu)
{
cpu_set_t *cpuset;
unsigned int i;
@@ -158,6 +160,9 @@ static void block_threads(pthread_t *w, pthread_attr_t thread_attr,
/* create and block all threads */
for (i = 0; i < params.nthreads; i++) {
+ pthread_attr_t thread_attr;
+
+ pthread_attr_init(&thread_attr);
CPU_ZERO_S(size, cpuset);
CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset);
@@ -170,6 +175,7 @@ static void block_threads(pthread_t *w, pthread_attr_t thread_attr,
CPU_FREE(cpuset);
err(EXIT_FAILURE, "pthread_create");
}
+ pthread_attr_destroy(&thread_attr);
}
CPU_FREE(cpuset);
}
@@ -238,7 +244,6 @@ int bench_futex_wake_parallel(int argc, const char **argv)
int ret = 0;
unsigned int i, j;
struct sigaction act;
- pthread_attr_t thread_attr;
struct thread_data *waking_worker;
struct perf_cpu_map *cpu;
@@ -294,7 +299,6 @@ int bench_futex_wake_parallel(int argc, const char **argv)
init_stats(&wakeup_stats);
init_stats(&waketime_stats);
- pthread_attr_init(&thread_attr);
mutex_init(&thread_lock);
cond_init(&thread_parent);
cond_init(&thread_worker);
@@ -305,7 +309,7 @@ int bench_futex_wake_parallel(int argc, const char **argv)
err(EXIT_FAILURE, "calloc");
/* create, launch & block all threads */
- block_threads(blocked_worker, thread_attr, cpu);
+ block_threads(blocked_worker, cpu);
/* make sure all threads are already blocked */
mutex_lock(&thread_lock);
@@ -317,7 +321,7 @@ int bench_futex_wake_parallel(int argc, const char **argv)
usleep(100000);
/* Ok, all threads are patiently blocked, start waking folks up */
- wakeup_threads(waking_worker, thread_attr);
+ wakeup_threads(waking_worker);
for (i = 0; i < params.nthreads; i++) {
ret = pthread_join(blocked_worker[i], NULL);
@@ -336,7 +340,6 @@ int bench_futex_wake_parallel(int argc, const char **argv)
cond_destroy(&thread_parent);
cond_destroy(&thread_worker);
mutex_destroy(&thread_lock);
- pthread_attr_destroy(&thread_attr);
print_summary();
diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
index 9ecab6620a87..18a5894af8bb 100644
--- a/tools/perf/bench/futex-wake.c
+++ b/tools/perf/bench/futex-wake.c
@@ -95,8 +95,7 @@ static void print_summary(void)
rel_stddev_stats(waketime_stddev, waketime_avg));
}
-static void block_threads(pthread_t *w,
- pthread_attr_t thread_attr, struct perf_cpu_map *cpu)
+static void block_threads(pthread_t *w, struct perf_cpu_map *cpu)
{
cpu_set_t *cpuset;
unsigned int i;
@@ -110,6 +109,9 @@ static void block_threads(pthread_t *w,
/* create and block all threads */
for (i = 0; i < params.nthreads; i++) {
+ pthread_attr_t thread_attr;
+
+ pthread_attr_init(&thread_attr);
CPU_ZERO_S(size, cpuset);
CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset);
@@ -122,6 +124,7 @@ static void block_threads(pthread_t *w,
CPU_FREE(cpuset);
err(EXIT_FAILURE, "pthread_create");
}
+ pthread_attr_destroy(&thread_attr);
}
CPU_FREE(cpuset);
}
@@ -138,7 +141,6 @@ int bench_futex_wake(int argc, const char **argv)
int ret = 0;
unsigned int i, j;
struct sigaction act;
- pthread_attr_t thread_attr;
struct perf_cpu_map *cpu;
argc = parse_options(argc, argv, options, bench_futex_wake_usage, 0);
@@ -178,7 +180,6 @@ int bench_futex_wake(int argc, const char **argv)
init_stats(&wakeup_stats);
init_stats(&waketime_stats);
- pthread_attr_init(&thread_attr);
mutex_init(&thread_lock);
cond_init(&thread_parent);
cond_init(&thread_worker);
@@ -188,7 +189,7 @@ int bench_futex_wake(int argc, const char **argv)
struct timeval start, end, runtime;
/* create, launch & block all threads */
- block_threads(worker, thread_attr, cpu);
+ block_threads(worker, cpu);
/* make sure all threads are already blocked */
mutex_lock(&thread_lock);
@@ -228,7 +229,6 @@ int bench_futex_wake(int argc, const char **argv)
cond_destroy(&thread_parent);
cond_destroy(&thread_worker);
mutex_destroy(&thread_lock);
- pthread_attr_destroy(&thread_attr);
print_summary();
diff --git a/tools/perf/bench/pmu-scan.c b/tools/perf/bench/pmu-scan.c
index f0f007843bb8..c7d207f8e13c 100644
--- a/tools/perf/bench/pmu-scan.c
+++ b/tools/perf/bench/pmu-scan.c
@@ -22,6 +22,7 @@ struct pmu_scan_result {
int nr_aliases;
int nr_formats;
int nr_caps;
+ bool is_core;
};
static const struct option options[] = {
@@ -40,13 +41,11 @@ static struct pmu_scan_result *results;
static int save_result(void)
{
- struct perf_pmu *pmu;
+ struct perf_pmu *pmu = NULL;
struct list_head *list;
struct pmu_scan_result *r;
- perf_pmu__scan(NULL);
-
- perf_pmus__for_each_pmu(pmu) {
+ while ((pmu = perf_pmus__scan(pmu)) != NULL) {
r = realloc(results, (nr_pmus + 1) * sizeof(*r));
if (r == NULL)
return -ENOMEM;
@@ -55,6 +54,7 @@ static int save_result(void)
r = results + nr_pmus;
r->name = strdup(pmu->name);
+ r->is_core = pmu->is_core;
r->nr_caps = pmu->nr_caps;
r->nr_aliases = 0;
@@ -70,11 +70,11 @@ static int save_result(void)
nr_pmus++;
}
- perf_pmu__destroy();
+ perf_pmus__destroy();
return 0;
}
-static int check_result(void)
+static int check_result(bool core_only)
{
struct pmu_scan_result *r;
struct perf_pmu *pmu;
@@ -83,7 +83,10 @@ static int check_result(void)
for (int i = 0; i < nr_pmus; i++) {
r = &results[i];
- pmu = perf_pmu__find(r->name);
+ if (core_only && !r->is_core)
+ continue;
+
+ pmu = perf_pmus__find(r->name);
if (pmu == NULL) {
pr_err("Cannot find PMU %s\n", r->name);
return -1;
@@ -132,7 +135,6 @@ static int run_pmu_scan(void)
struct timeval start, end, diff;
double time_average, time_stddev;
u64 runtime_us;
- unsigned int i;
int ret;
init_stats(&stats);
@@ -144,26 +146,30 @@ static int run_pmu_scan(void)
return -1;
}
- for (i = 0; i < iterations; i++) {
- gettimeofday(&start, NULL);
- perf_pmu__scan(NULL);
- gettimeofday(&end, NULL);
-
- timersub(&end, &start, &diff);
- runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
- update_stats(&stats, runtime_us);
-
- ret = check_result();
- perf_pmu__destroy();
- if (ret < 0)
- break;
+ for (int j = 0; j < 2; j++) {
+ bool core_only = (j == 0);
+
+ for (unsigned int i = 0; i < iterations; i++) {
+ gettimeofday(&start, NULL);
+ if (core_only)
+ perf_pmus__scan_core(NULL);
+ else
+ perf_pmus__scan(NULL);
+ gettimeofday(&end, NULL);
+ timersub(&end, &start, &diff);
+ runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
+ update_stats(&stats, runtime_us);
+
+ ret = check_result(core_only);
+ perf_pmus__destroy();
+ if (ret < 0)
+ break;
+ }
+ time_average = avg_stats(&stats);
+ time_stddev = stddev_stats(&stats);
+ pr_info(" Average%s PMU scanning took: %.3f usec (+- %.3f usec)\n",
+ core_only ? " core" : "", time_average, time_stddev);
}
-
- time_average = avg_stats(&stats);
- time_stddev = stddev_stats(&stats);
- pr_info(" Average PMU scanning took: %.3f usec (+- %.3f usec)\n",
- time_average, time_stddev);
-
delete_result();
return 0;
}
diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c
index 488f6e6ba1a5..fa1f8f998814 100644
--- a/tools/perf/bench/sched-messaging.c
+++ b/tools/perf/bench/sched-messaging.c
@@ -27,6 +27,7 @@
#include <poll.h>
#include <limits.h>
#include <err.h>
+#include <linux/list.h>
#include <linux/time64.h>
#define DATASIZE 100
@@ -35,8 +36,11 @@ static bool use_pipes = false;
static unsigned int nr_loops = 100;
static bool thread_mode = false;
static unsigned int num_groups = 10;
+static struct list_head sender_contexts = LIST_HEAD_INIT(sender_contexts);
+static struct list_head receiver_contexts = LIST_HEAD_INIT(receiver_contexts);
struct sender_context {
+ struct list_head list;
unsigned int num_fds;
int ready_out;
int wakefd;
@@ -44,6 +48,7 @@ struct sender_context {
};
struct receiver_context {
+ struct list_head list;
unsigned int num_packets;
int in_fds[2];
int ready_out;
@@ -170,6 +175,7 @@ static pthread_t create_worker(void *ctx, void *(*func)(void *))
if (ret != 0)
err(EXIT_FAILURE, "pthread_create failed");
+ pthread_attr_destroy(&attr);
return childid;
}
@@ -201,6 +207,7 @@ static unsigned int group(pthread_t *pth,
if (!snd_ctx)
err(EXIT_FAILURE, "malloc()");
+ list_add(&snd_ctx->list, &sender_contexts);
for (i = 0; i < num_fds; i++) {
int fds[2];
struct receiver_context *ctx = malloc(sizeof(*ctx));
@@ -208,6 +215,7 @@ static unsigned int group(pthread_t *pth,
if (!ctx)
err(EXIT_FAILURE, "malloc()");
+ list_add(&ctx->list, &receiver_contexts);
/* Create the pipe between client and server */
fdpair(fds);
@@ -266,6 +274,7 @@ int bench_sched_messaging(int argc, const char **argv)
int readyfds[2], wakefds[2];
char dummy;
pthread_t *pth_tab;
+ struct sender_context *pos, *n;
argc = parse_options(argc, argv, options,
bench_sched_message_usage, 0);
@@ -324,6 +333,13 @@ int bench_sched_messaging(int argc, const char **argv)
}
free(pth_tab);
-
+ list_for_each_entry_safe(pos, n, &sender_contexts, list) {
+ list_del_init(&pos->list);
+ free(pos);
+ }
+ list_for_each_entry_safe(pos, n, &receiver_contexts, list) {
+ list_del_init(&pos->list);
+ free(pos);
+ }
return 0;
}
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 63cdf6ea6f6d..aeeb801f1ed7 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -184,7 +184,7 @@ out:
static int process_branch_callback(struct evsel *evsel,
struct perf_sample *sample,
- struct addr_location *al __maybe_unused,
+ struct addr_location *al,
struct perf_annotate *ann,
struct machine *machine)
{
@@ -195,21 +195,29 @@ static int process_branch_callback(struct evsel *evsel,
.hide_unresolved = symbol_conf.hide_unresolved,
.ops = &hist_iter_branch,
};
-
struct addr_location a;
+ int ret;
- if (machine__resolve(machine, &a, sample) < 0)
- return -1;
+ addr_location__init(&a);
+ if (machine__resolve(machine, &a, sample) < 0) {
+ ret = -1;
+ goto out;
+ }
- if (a.sym == NULL)
- return 0;
+ if (a.sym == NULL) {
+ ret = 0;
+ goto out;
+ }
if (a.map != NULL)
map__dso(a.map)->hit = 1;
hist__account_cycles(sample->branch_stack, al, sample, false, NULL);
- return hist_entry_iter__add(&iter, &a, PERF_MAX_STACK_DEPTH, ann);
+ ret = hist_entry_iter__add(&iter, &a, PERF_MAX_STACK_DEPTH, ann);
+out:
+ addr_location__exit(&a);
+ return ret;
}
static bool has_annotation(struct perf_annotate *ann)
@@ -272,10 +280,12 @@ static int process_sample_event(struct perf_tool *tool,
struct addr_location al;
int ret = 0;
+ addr_location__init(&al);
if (machine__resolve(machine, &al, sample) < 0) {
pr_warning("problem processing %d event, skipping it.\n",
event->header.type);
- return -1;
+ ret = -1;
+ goto out_put;
}
if (ann->cpu_list && !test_bit(sample->cpu, ann->cpu_bitmap))
@@ -288,7 +298,7 @@ static int process_sample_event(struct perf_tool *tool,
ret = -1;
}
out_put:
- addr_location__put(&al);
+ addr_location__exit(&al);
return ret;
}
@@ -342,7 +352,7 @@ static void hists__find_annotations(struct hists *hists,
notes = symbol__annotation(he->ms.sym);
if (notes->src == NULL) {
find_next:
- if (key == K_LEFT)
+ if (key == K_LEFT || key == '<')
nd = rb_prev(nd);
else
nd = rb_next(nd);
@@ -378,9 +388,11 @@ find_next:
return;
/* fall through */
case K_RIGHT:
+ case '>':
next = rb_next(nd);
break;
case K_LEFT:
+ case '<':
next = rb_prev(nd);
break;
default:
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index 58f1cfe1eb34..db435b791a09 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -21,6 +21,7 @@
#include "builtin.h"
#include "bench/bench.h"
+#include <locale.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -260,6 +261,7 @@ int cmd_bench(int argc, const char **argv)
/* Unbuffered output */
setvbuf(stdout, NULL, _IONBF, 0);
+ setlocale(LC_ALL, "");
if (argc < 2) {
/* No collection specified. */
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 08455e26b606..a4cf9de7a7b5 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -41,8 +41,7 @@
#include "symbol.h"
#include "ui/ui.h"
#include "ui/progress.h"
-#include "pmu.h"
-#include "pmu-hybrid.h"
+#include "pmus.h"
#include "string2.h"
#include "util/util.h"
@@ -285,25 +284,31 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
struct hist_entry *he;
struct addr_location al;
struct mem_info *mi, *mi_dup;
+ struct callchain_cursor *cursor;
int ret;
+ addr_location__init(&al);
if (machine__resolve(machine, &al, sample) < 0) {
pr_debug("problem processing %d event, skipping it.\n",
event->header.type);
- return -1;
+ ret = -1;
+ goto out;
}
if (c2c.stitch_lbr)
- al.thread->lbr_stitch_enable = true;
+ thread__set_lbr_stitch_enable(al.thread, true);
- ret = sample__resolve_callchain(sample, &callchain_cursor, NULL,
+ cursor = get_tls_callchain_cursor();
+ ret = sample__resolve_callchain(sample, cursor, NULL,
evsel, &al, sysctl_perf_event_max_stack);
if (ret)
goto out;
mi = sample__resolve_mem(sample, &al);
- if (mi == NULL)
- return -ENOMEM;
+ if (mi == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
/*
* The mi object is released in hists__add_entry_ops,
@@ -369,7 +374,7 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
}
out:
- addr_location__put(&al);
+ addr_location__exit(&al);
return ret;
free_mi:
@@ -1150,14 +1155,14 @@ pid_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
{
int width = c2c_width(fmt, hpp, he->hists);
- return scnprintf(hpp->buf, hpp->size, "%*d", width, he->thread->pid_);
+ return scnprintf(hpp->buf, hpp->size, "%*d", width, thread__pid(he->thread));
}
static int64_t
pid_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
struct hist_entry *left, struct hist_entry *right)
{
- return left->thread->pid_ - right->thread->pid_;
+ return thread__pid(left->thread) - thread__pid(right->thread);
}
static int64_t
@@ -3259,10 +3264,8 @@ static int perf_c2c__record(int argc, const char **argv)
argc = parse_options(argc, argv, options, record_mem_usage,
PARSE_OPT_KEEP_UNKNOWN);
- if (!perf_pmu__has_hybrid())
- rec_argc = argc + 11; /* max number of arguments */
- else
- rec_argc = argc + 11 * perf_pmu__hybrid_pmu_num();
+ /* Max number of arguments multiplied by number of PMUs that can support them. */
+ rec_argc = argc + 11 * perf_pmus__num_mem_pmus();
rec_argv = calloc(rec_argc + 1, sizeof(char *));
if (!rec_argv)
diff --git a/tools/perf/builtin-config.c b/tools/perf/builtin-config.c
index 2603015f98be..2e8363778935 100644
--- a/tools/perf/builtin-config.c
+++ b/tools/perf/builtin-config.c
@@ -12,6 +12,7 @@
#include "util/debug.h"
#include "util/config.h"
#include <linux/string.h>
+#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
@@ -157,7 +158,8 @@ int cmd_config(int argc, const char **argv)
{
int i, ret = -1;
struct perf_config_set *set;
- char *user_config = mkpath("%s/.perfconfig", getenv("HOME"));
+ char path[PATH_MAX];
+ char *user_config = mkpath(path, sizeof(path), "%s/.perfconfig", getenv("HOME"));
const char *config_filename;
bool changed = false;
diff --git a/tools/perf/builtin-daemon.c b/tools/perf/builtin-daemon.c
index 34cbe3e959aa..83954af36753 100644
--- a/tools/perf/builtin-daemon.c
+++ b/tools/perf/builtin-daemon.c
@@ -90,7 +90,7 @@ struct daemon {
char *base;
struct list_head sessions;
FILE *out;
- char perf[PATH_MAX];
+ char *perf;
int signal_fd;
time_t start;
};
@@ -1490,6 +1490,14 @@ static int __cmd_ping(struct daemon *daemon, struct option parent_options[],
return send_cmd(daemon, &cmd);
}
+static char *alloc_perf_exe_path(void)
+{
+ char path[PATH_MAX];
+
+ perf_exe(path, sizeof(path));
+ return strdup(path);
+}
+
int cmd_daemon(int argc, const char **argv)
{
struct option daemon_options[] = {
@@ -1502,8 +1510,12 @@ int cmd_daemon(int argc, const char **argv)
"field separator", "print counts with custom separator", ","),
OPT_END()
};
+ int ret = -1;
+
+ __daemon.perf = alloc_perf_exe_path();
+ if (!__daemon.perf)
+ return -ENOMEM;
- perf_exe(__daemon.perf, sizeof(__daemon.perf));
__daemon.out = stdout;
argc = parse_options(argc, argv, daemon_options, daemon_usage,
@@ -1511,22 +1523,22 @@ int cmd_daemon(int argc, const char **argv)
if (argc) {
if (!strcmp(argv[0], "start"))
- return __cmd_start(&__daemon, daemon_options, argc, argv);
- if (!strcmp(argv[0], "signal"))
- return __cmd_signal(&__daemon, daemon_options, argc, argv);
+ ret = __cmd_start(&__daemon, daemon_options, argc, argv);
+ else if (!strcmp(argv[0], "signal"))
+ ret = __cmd_signal(&__daemon, daemon_options, argc, argv);
else if (!strcmp(argv[0], "stop"))
- return __cmd_stop(&__daemon, daemon_options, argc, argv);
+ ret = __cmd_stop(&__daemon, daemon_options, argc, argv);
else if (!strcmp(argv[0], "ping"))
- return __cmd_ping(&__daemon, daemon_options, argc, argv);
-
- pr_err("failed: unknown command '%s'\n", argv[0]);
- return -1;
- }
-
- if (setup_config(&__daemon)) {
- pr_err("failed: config not found\n");
- return -1;
+ ret = __cmd_ping(&__daemon, daemon_options, argc, argv);
+ else
+ pr_err("failed: unknown command '%s'\n", argv[0]);
+ } else {
+ ret = setup_config(&__daemon);
+ if (ret)
+ pr_err("failed: config not found\n");
+ else
+ ret = send_cmd_list(&__daemon);
}
-
- return send_cmd_list(&__daemon);
+ zfree(&__daemon.perf);
+ return ret;
}
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index dbb0562d6a4f..e8a1b16aa5f8 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -409,15 +409,17 @@ static int diff__process_sample_event(struct perf_tool *tool,
return 0;
}
+ addr_location__init(&al);
if (machine__resolve(machine, &al, sample) < 0) {
pr_warning("problem processing %d event, skipping it.\n",
event->header.type);
- return -1;
+ ret = -1;
+ goto out;
}
if (cpu_list && !test_bit(sample->cpu, cpu_bitmap)) {
ret = 0;
- goto out_put;
+ goto out;
}
switch (compute) {
@@ -426,7 +428,7 @@ static int diff__process_sample_event(struct perf_tool *tool,
NULL, NULL, NULL, sample, true)) {
pr_warning("problem incrementing symbol period, "
"skipping event\n");
- goto out_put;
+ goto out;
}
hist__account_cycles(sample->branch_stack, &al, sample, false,
@@ -437,7 +439,7 @@ static int diff__process_sample_event(struct perf_tool *tool,
if (hist_entry_iter__add(&iter, &al, PERF_MAX_STACK_DEPTH,
NULL)) {
pr_debug("problem adding hist entry, skipping event\n");
- goto out_put;
+ goto out;
}
break;
@@ -446,7 +448,7 @@ static int diff__process_sample_event(struct perf_tool *tool,
true)) {
pr_warning("problem incrementing symbol period, "
"skipping event\n");
- goto out_put;
+ goto out;
}
}
@@ -460,8 +462,8 @@ static int diff__process_sample_event(struct perf_tool *tool,
if (!al.filtered)
hists->stats.total_non_filtered_period += sample->period;
ret = 0;
-out_put:
- addr_location__put(&al);
+out:
+ addr_location__exit(&al);
return ret;
}
@@ -1376,8 +1378,8 @@ static int cycles_printf(struct hist_entry *he, struct hist_entry *pair,
end_line = map__srcline(he->ms.map, bi->sym->start + bi->end,
he->ms.sym);
- if ((strncmp(start_line, SRCLINE_UNKNOWN, strlen(SRCLINE_UNKNOWN)) != 0) &&
- (strncmp(end_line, SRCLINE_UNKNOWN, strlen(SRCLINE_UNKNOWN)) != 0)) {
+ if (start_line != SRCLINE_UNKNOWN &&
+ end_line != SRCLINE_UNKNOWN) {
scnprintf(buf, sizeof(buf), "[%s -> %s] %4ld",
start_line, end_line, block_he->diff.cycles);
} else {
@@ -1385,8 +1387,8 @@ static int cycles_printf(struct hist_entry *he, struct hist_entry *pair,
bi->start, bi->end, block_he->diff.cycles);
}
- free_srcline(start_line);
- free_srcline(end_line);
+ zfree_srcline(&start_line);
+ zfree_srcline(&end_line);
return scnprintf(hpp->buf, hpp->size, "%*s", width, buf);
}
diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
index f9906f52e4fa..ac2e6c75f912 100644
--- a/tools/perf/builtin-ftrace.c
+++ b/tools/perf/builtin-ftrace.c
@@ -650,6 +650,8 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace)
break;
if (fwrite(buf, n, 1, stdout) != 1)
break;
+ /* flush output since stdout is in full buffering mode due to pager */
+ fflush(stdout);
}
}
diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c
index 3e7f52054fac..b2a368ae295a 100644
--- a/tools/perf/builtin-help.c
+++ b/tools/perf/builtin-help.c
@@ -19,6 +19,7 @@
#include <linux/string.h>
#include <linux/zalloc.h>
#include <errno.h>
+#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -389,9 +390,10 @@ static int get_html_page_path(char **page_path, const char *page)
{
struct stat st;
const char *html_path = system_path(PERF_HTML_PATH);
+ char path[PATH_MAX];
/* Check that we have a perf documentation directory. */
- if (stat(mkpath("%s/perf.html", html_path), &st)
+ if (stat(mkpath(path, sizeof(path), "%s/perf.html", html_path), &st)
|| !S_ISREG(st.st_mode)) {
pr_err("'%s': not a documentation directory.", html_path);
return -1;
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 61766eead4f4..c8cf2fdd9cff 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -47,7 +47,7 @@
struct guest_event {
struct perf_sample sample;
union perf_event *event;
- char event_buf[PERF_SAMPLE_MAX_SIZE];
+ char *event_buf;
};
struct guest_id {
@@ -122,7 +122,7 @@ struct perf_inject {
u64 aux_id;
struct list_head samples;
struct itrace_synth_opts itrace_synth_opts;
- char event_copy[PERF_SAMPLE_MAX_SIZE];
+ char *event_copy;
struct perf_file_section secs[HEADER_FEAT_BITS];
struct guest_session guest_session;
struct strlist *known_build_ids;
@@ -320,8 +320,14 @@ perf_inject__cut_auxtrace_sample(struct perf_inject *inject,
{
size_t sz1 = sample->aux_sample.data - (void *)event;
size_t sz2 = event->header.size - sample->aux_sample.size - sz1;
- union perf_event *ev = (union perf_event *)inject->event_copy;
+ union perf_event *ev;
+ if (inject->event_copy == NULL) {
+ inject->event_copy = malloc(PERF_SAMPLE_MAX_SIZE);
+ if (!inject->event_copy)
+ return ERR_PTR(-ENOMEM);
+ }
+ ev = (union perf_event *)inject->event_copy;
if (sz1 > event->header.size || sz2 > event->header.size ||
sz1 + sz2 > event->header.size ||
sz1 < sizeof(struct perf_event_header) + sizeof(u64))
@@ -357,8 +363,11 @@ static int perf_event__repipe_sample(struct perf_tool *tool,
build_id__mark_dso_hit(tool, event, sample, evsel, machine);
- if (inject->itrace_synth_opts.set && sample->aux_sample.size)
+ if (inject->itrace_synth_opts.set && sample->aux_sample.size) {
event = perf_inject__cut_auxtrace_sample(inject, event, sample);
+ if (IS_ERR(event))
+ return PTR_ERR(event);
+ }
return perf_event__repipe_synth(tool, event);
}
@@ -417,7 +426,7 @@ static struct dso *findnew_dso(int pid, int tid, const char *filename,
}
vdso = is_vdso_map(filename);
- nsi = nsinfo__get(thread->nsinfo);
+ nsi = nsinfo__get(thread__nsinfo(thread));
if (vdso) {
/* The vdso maps are always on the host and not the
@@ -743,6 +752,7 @@ int perf_event__inject_buildid(struct perf_tool *tool, union perf_event *event,
struct addr_location al;
struct thread *thread;
+ addr_location__init(&al);
thread = machine__findnew_thread(machine, sample->pid, sample->tid);
if (thread == NULL) {
pr_err("problem processing %d event, skipping it.\n",
@@ -763,6 +773,7 @@ int perf_event__inject_buildid(struct perf_tool *tool, union perf_event *event,
thread__put(thread);
repipe:
perf_event__repipe(tool, event, sample, machine);
+ addr_location__exit(&al);
return 0;
}
@@ -1363,11 +1374,19 @@ static void guest_session__convert_time(struct guest_session *gs, u64 guest_time
static int guest_session__fetch(struct guest_session *gs)
{
- void *buf = gs->ev.event_buf;
- struct perf_event_header *hdr = buf;
+ void *buf;
+ struct perf_event_header *hdr;
size_t hdr_sz = sizeof(*hdr);
ssize_t ret;
+ buf = gs->ev.event_buf;
+ if (!buf) {
+ buf = malloc(PERF_SAMPLE_MAX_SIZE);
+ if (!buf)
+ return -ENOMEM;
+ gs->ev.event_buf = buf;
+ }
+ hdr = buf;
ret = readn(gs->tmp_fd, buf, hdr_sz);
if (ret < 0)
return ret;
@@ -2389,5 +2408,7 @@ out_close_output:
if (!inject.in_place_update)
perf_data__close(&inject.output);
free(inject.itrace_synth_opts.vm_tm_corr_args);
+ free(inject.event_copy);
+ free(inject.guest_session.ev.event_buf);
return ret;
}
diff --git a/tools/perf/builtin-kallsyms.c b/tools/perf/builtin-kallsyms.c
index 3751df744577..7f75c5b73f26 100644
--- a/tools/perf/builtin-kallsyms.c
+++ b/tools/perf/builtin-kallsyms.c
@@ -62,7 +62,6 @@ int cmd_kallsyms(int argc, const char **argv)
if (argc < 1)
usage_with_options(kallsyms_usage, options);
- symbol_conf.sort_by_name = true;
symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
if (symbol__init(NULL) < 0)
return -1;
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 2150eeced892..9714327fd0ea 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -399,21 +399,29 @@ static u64 find_callsite(struct evsel *evsel, struct perf_sample *sample)
struct addr_location al;
struct machine *machine = &kmem_session->machines.host;
struct callchain_cursor_node *node;
+ struct callchain_cursor *cursor;
+ u64 result = sample->ip;
+ addr_location__init(&al);
if (alloc_func_list == NULL) {
if (build_alloc_func_list() < 0)
goto out;
}
al.thread = machine__findnew_thread(machine, sample->pid, sample->tid);
- sample__resolve_callchain(sample, &callchain_cursor, NULL, evsel, &al, 16);
- callchain_cursor_commit(&callchain_cursor);
+ cursor = get_tls_callchain_cursor();
+ if (cursor == NULL)
+ goto out;
+
+ sample__resolve_callchain(sample, cursor, NULL, evsel, &al, 16);
+
+ callchain_cursor_commit(cursor);
while (true) {
struct alloc_func key, *caller;
u64 addr;
- node = callchain_cursor_current(&callchain_cursor);
+ node = callchain_cursor_current(cursor);
if (node == NULL)
break;
@@ -427,16 +435,18 @@ static u64 find_callsite(struct evsel *evsel, struct perf_sample *sample)
else
addr = node->ip;
- return addr;
+ result = addr;
+ goto out;
} else
pr_debug3("skipping alloc function: %s\n", caller->name);
- callchain_cursor_advance(&callchain_cursor);
+ callchain_cursor_advance(cursor);
}
-out:
pr_debug2("unknown callsite: %"PRIx64 "\n", sample->ip);
- return sample->ip;
+out:
+ addr_location__exit(&al);
+ return result;
}
struct sort_dimension {
@@ -964,7 +974,7 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
if (perf_kmem__skip_sample(sample))
return 0;
- dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread), thread->tid);
+ dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread), thread__tid(thread));
if (evsel->handler != NULL) {
tracepoint_handler f = evsel->handler;
diff --git a/tools/perf/builtin-kwork.c b/tools/perf/builtin-kwork.c
index a9395c52b23b..14bf7a8429e7 100644
--- a/tools/perf/builtin-kwork.c
+++ b/tools/perf/builtin-kwork.c
@@ -589,7 +589,7 @@ static void timehist_save_callchain(struct perf_kwork *kwork,
struct symbol *sym;
struct thread *thread;
struct callchain_cursor_node *node;
- struct callchain_cursor *cursor = &callchain_cursor;
+ struct callchain_cursor *cursor;
if (!kwork->show_callchain || sample->callchain == NULL)
return;
@@ -601,6 +601,8 @@ static void timehist_save_callchain(struct perf_kwork *kwork,
return;
}
+ cursor = get_tls_callchain_cursor();
+
if (thread__resolve_callchain(thread, cursor, evsel, sample,
NULL, NULL, kwork->max_stack + 2) != 0) {
pr_debug("Failed to resolve callchain, skipping\n");
@@ -686,12 +688,18 @@ static void timehist_print_event(struct perf_kwork *kwork,
* callchain
*/
if (kwork->show_callchain) {
+ struct callchain_cursor *cursor = get_tls_callchain_cursor();
+
+ if (cursor == NULL)
+ return;
+
printf(" ");
+
sample__fprintf_sym(sample, al, 0,
EVSEL__PRINT_SYM | EVSEL__PRINT_ONELINE |
EVSEL__PRINT_CALLCHAIN_ARROW |
EVSEL__PRINT_SKIP_IGNORED,
- &callchain_cursor, symbol_conf.bt_stop_list,
+ cursor, symbol_conf.bt_stop_list,
stdout);
}
@@ -739,17 +747,22 @@ static int timehist_exit_event(struct perf_kwork *kwork,
struct kwork_atom *atom = NULL;
struct kwork_work *work = NULL;
struct addr_location al;
+ int ret = 0;
+ addr_location__init(&al);
if (machine__resolve(machine, &al, sample) < 0) {
pr_debug("Problem processing event, skipping it\n");
- return -1;
+ ret = -1;
+ goto out;
}
atom = work_pop_atom(kwork, class, KWORK_TRACE_EXIT,
KWORK_TRACE_ENTRY, evsel, sample,
machine, &work);
- if (work == NULL)
- return -1;
+ if (work == NULL) {
+ ret = -1;
+ goto out;
+ }
if (atom != NULL) {
work->nr_atoms++;
@@ -757,7 +770,9 @@ static int timehist_exit_event(struct perf_kwork *kwork,
atom_del(atom);
}
- return 0;
+out:
+ addr_location__exit(&al);
+ return ret;
}
static struct kwork_class kwork_irq;
diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c
index 1f5dbd5f0ba4..7fec2cca759f 100644
--- a/tools/perf/builtin-list.c
+++ b/tools/perf/builtin-list.c
@@ -11,10 +11,11 @@
#include "builtin.h"
#include "util/print-events.h"
+#include "util/pmus.h"
#include "util/pmu.h"
-#include "util/pmu-hybrid.h"
#include "util/debug.h"
#include "util/metricgroup.h"
+#include "util/pfm.h"
#include "util/string2.h"
#include "util/strlist.h"
#include "util/strbuf.h"
@@ -192,9 +193,14 @@ static void default_print_metric(void *ps,
if (group && print_state->metricgroups) {
if (print_state->name_only)
printf("%s ", group);
- else if (print_state->metrics)
- printf("\n%s:\n", group);
- else
+ else if (print_state->metrics) {
+ const char *gdesc = describe_metricgroup(group);
+
+ if (gdesc)
+ printf("\n%s: [%s]\n", group, gdesc);
+ else
+ printf("\n%s:\n", group);
+ } else
printf("%s\n", group);
}
zfree(&print_state->last_metricgroups);
@@ -429,7 +435,7 @@ int cmd_list(int argc, const char **argv)
.print_event = default_print_event,
.print_metric = default_print_metric,
};
- const char *hybrid_name = NULL;
+ const char *cputype = NULL;
const char *unit_name = NULL;
bool json = false;
struct option list_options[] = {
@@ -443,8 +449,8 @@ int cmd_list(int argc, const char **argv)
"Print information on the perf event names and expressions used internally by events."),
OPT_BOOLEAN(0, "deprecated", &default_ps.deprecated,
"Print deprecated events."),
- OPT_STRING(0, "cputype", &hybrid_name, "hybrid cpu type",
- "Limit PMU or metric printing to the given hybrid PMU (e.g. core or atom)."),
+ OPT_STRING(0, "cputype", &cputype, "cpu type",
+ "Limit PMU or metric printing to the given PMU (e.g. cpu, core or atom)."),
OPT_STRING(0, "unit", &unit_name, "PMU name",
"Limit PMU or metric printing to the specified PMU."),
OPT_INCR(0, "debug", &verbose,
@@ -452,7 +458,11 @@ int cmd_list(int argc, const char **argv)
OPT_END()
};
const char * const list_usage[] = {
+#ifdef HAVE_LIBPFM
+ "perf list [<options>] [hw|sw|cache|tracepoint|pmu|sdt|metric|metricgroup|event_glob|pfm]",
+#else
"perf list [<options>] [hw|sw|cache|tracepoint|pmu|sdt|metric|metricgroup|event_glob]",
+#endif
NULL
};
@@ -484,10 +494,15 @@ int cmd_list(int argc, const char **argv)
assert(default_ps.visited_metrics);
if (unit_name)
default_ps.pmu_glob = strdup(unit_name);
- else if (hybrid_name) {
- default_ps.pmu_glob = perf_pmu__hybrid_type_to_pmu(hybrid_name);
- if (!default_ps.pmu_glob)
- pr_warning("WARNING: hybrid cputype is not supported!\n");
+ else if (cputype) {
+ const struct perf_pmu *pmu = perf_pmus__pmu_for_pmu_filter(cputype);
+
+ if (!pmu) {
+ pr_err("ERROR: cputype is not supported!\n");
+ ret = -1;
+ goto out;
+ }
+ default_ps.pmu_glob = pmu->name;
}
}
print_cb.print_start(ps);
@@ -517,7 +532,7 @@ int cmd_list(int argc, const char **argv)
strcmp(argv[i], "hwcache") == 0)
print_hwcache_events(&print_cb, ps);
else if (strcmp(argv[i], "pmu") == 0)
- print_pmu_events(&print_cb, ps);
+ perf_pmus__print_pmu_events(&print_cb, ps);
else if (strcmp(argv[i], "sdt") == 0)
print_sdt_events(&print_cb, ps);
else if (strcmp(argv[i], "metric") == 0 || strcmp(argv[i], "metrics") == 0) {
@@ -529,7 +544,12 @@ int cmd_list(int argc, const char **argv)
default_ps.metricgroups = true;
default_ps.metrics = false;
metricgroup__print(&print_cb, ps);
- } else if ((sep = strchr(argv[i], ':')) != NULL) {
+ }
+#ifdef HAVE_LIBPFM
+ else if (strcmp(argv[i], "pfm") == 0)
+ print_libpfm_events(&print_cb, ps);
+#endif
+ else if ((sep = strchr(argv[i], ':')) != NULL) {
char *old_pmu_glob = default_ps.pmu_glob;
default_ps.event_glob = strdup(argv[i]);
@@ -557,7 +577,7 @@ int cmd_list(int argc, const char **argv)
event_symbols_sw, PERF_COUNT_SW_MAX);
print_tool_events(&print_cb, ps);
print_hwcache_events(&print_cb, ps);
- print_pmu_events(&print_cb, ps);
+ perf_pmus__print_pmu_events(&print_cb, ps);
print_tracepoint_events(&print_cb, ps);
print_sdt_events(&print_cb, ps);
default_ps.metrics = true;
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 70b14ba5fdd5..0cf1c5a2e032 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -27,6 +27,7 @@
#include "util/map.h"
#include "util/util.h"
+#include <stdio.h>
#include <sys/types.h>
#include <sys/prctl.h>
#include <semaphore.h>
@@ -48,7 +49,7 @@ static struct target target;
#define LOCKHASH_BITS 12
#define LOCKHASH_SIZE (1UL << LOCKHASH_BITS)
-static struct hlist_head lockhash_table[LOCKHASH_SIZE];
+static struct hlist_head *lockhash_table;
#define __lockhashfn(key) hash_long((unsigned long)key, LOCKHASH_BITS)
#define lockhashentry(key) (lockhash_table + __lockhashfn((key)))
@@ -65,6 +66,8 @@ static int max_stack_depth = CONTENTION_STACK_DEPTH;
static int stack_skip = CONTENTION_STACK_SKIP;
static int print_nr_entries = INT_MAX / 2;
static LIST_HEAD(callstack_filters);
+static const char *output_name = NULL;
+static FILE *lock_output;
struct callstack_filter {
struct list_head list;
@@ -225,22 +228,28 @@ static void lock_stat_key_print_time(unsigned long long nsec, int len)
{ 0, NULL },
};
+ /* for CSV output */
+ if (len == 0) {
+ fprintf(lock_output, "%llu", nsec);
+ return;
+ }
+
for (int i = 0; table[i].unit; i++) {
if (nsec < table[i].base)
continue;
- pr_info("%*.2f %s", len - 3, nsec / table[i].base, table[i].unit);
+ fprintf(lock_output, "%*.2f %s", len - 3, nsec / table[i].base, table[i].unit);
return;
}
- pr_info("%*llu %s", len - 3, nsec, "ns");
+ fprintf(lock_output, "%*llu %s", len - 3, nsec, "ns");
}
#define PRINT_KEY(member) \
static void lock_stat_key_print_ ## member(struct lock_key *key, \
struct lock_stat *ls) \
{ \
- pr_info("%*llu", key->len, (unsigned long long)ls->member); \
+ fprintf(lock_output, "%*llu", key->len, (unsigned long long)ls->member);\
}
#define PRINT_TIME(member) \
@@ -911,7 +920,7 @@ static int lock_contention_caller(struct evsel *evsel, struct perf_sample *sampl
char *buf, int size)
{
struct thread *thread;
- struct callchain_cursor *cursor = &callchain_cursor;
+ struct callchain_cursor *cursor;
struct machine *machine = &session->machines.host;
struct symbol *sym;
int skip = 0;
@@ -925,6 +934,8 @@ static int lock_contention_caller(struct evsel *evsel, struct perf_sample *sampl
if (thread == NULL)
return -1;
+ cursor = get_tls_callchain_cursor();
+
/* use caller function name from the callchain */
ret = thread__resolve_callchain(thread, cursor, evsel, sample,
NULL, NULL, max_stack_depth);
@@ -962,7 +973,7 @@ next:
static u64 callchain_id(struct evsel *evsel, struct perf_sample *sample)
{
- struct callchain_cursor *cursor = &callchain_cursor;
+ struct callchain_cursor *cursor;
struct machine *machine = &session->machines.host;
struct thread *thread;
u64 hash = 0;
@@ -973,6 +984,7 @@ static u64 callchain_id(struct evsel *evsel, struct perf_sample *sample)
if (thread == NULL)
return -1;
+ cursor = get_tls_callchain_cursor();
/* use caller function name from the callchain */
ret = thread__resolve_callchain(thread, cursor, evsel, sample,
NULL, NULL, max_stack_depth);
@@ -1326,12 +1338,12 @@ static void print_bad_events(int bad, int total)
if (quiet || total == 0 || (broken == 0 && verbose <= 0))
return;
- pr_info("\n=== output for debug ===\n\n");
- pr_info("bad: %d, total: %d\n", bad, total);
- pr_info("bad rate: %.2f %%\n", (double)bad / (double)total * 100);
- pr_info("histogram of events caused bad sequence\n");
+ fprintf(lock_output, "\n=== output for debug ===\n\n");
+ fprintf(lock_output, "bad: %d, total: %d\n", bad, total);
+ fprintf(lock_output, "bad rate: %.2f %%\n", (double)bad / (double)total * 100);
+ fprintf(lock_output, "histogram of events caused bad sequence\n");
for (i = 0; i < BROKEN_MAX; i++)
- pr_info(" %10s: %d\n", name[i], bad_hist[i]);
+ fprintf(lock_output, " %10s: %d\n", name[i], bad_hist[i]);
}
/* TODO: various way to print, coloring, nano or milli sec */
@@ -1343,10 +1355,10 @@ static void print_result(void)
int bad, total, printed;
if (!quiet) {
- pr_info("%20s ", "Name");
+ fprintf(lock_output, "%20s ", "Name");
list_for_each_entry(key, &lock_keys, list)
- pr_info("%*s ", key->len, key->header);
- pr_info("\n\n");
+ fprintf(lock_output, "%*s ", key->len, key->header);
+ fprintf(lock_output, "\n\n");
}
bad = total = printed = 0;
@@ -1371,7 +1383,7 @@ static void print_result(void)
name = thread__comm_str(t);
}
- pr_info("%20s ", name);
+ fprintf(lock_output, "%20s ", name);
} else {
strncpy(cut_name, st->name, 16);
cut_name[16] = '.';
@@ -1379,14 +1391,14 @@ static void print_result(void)
cut_name[18] = '.';
cut_name[19] = '\0';
/* cut off name for saving output style */
- pr_info("%20s ", cut_name);
+ fprintf(lock_output, "%20s ", cut_name);
}
list_for_each_entry(key, &lock_keys, list) {
key->print(key, st);
- pr_info(" ");
+ fprintf(lock_output, " ");
}
- pr_info("\n");
+ fprintf(lock_output, "\n");
if (++printed >= print_nr_entries)
break;
@@ -1403,13 +1415,13 @@ static void dump_threads(void)
struct rb_node *node;
struct thread *t;
- pr_info("%10s: comm\n", "Thread ID");
+ fprintf(lock_output, "%10s: comm\n", "Thread ID");
node = rb_first(&thread_stats);
while (node) {
st = container_of(node, struct thread_stat, rb);
t = perf_session__findnew(session, st->tid);
- pr_info("%10d: %s\n", st->tid, thread__comm_str(t));
+ fprintf(lock_output, "%10d: %s\n", st->tid, thread__comm_str(t));
node = rb_next(node);
thread__put(t);
}
@@ -1435,7 +1447,7 @@ static void dump_map(void)
unsigned int i;
struct lock_stat *st;
- pr_info("Address of instance: name of class\n");
+ fprintf(lock_output, "Address of instance: name of class\n");
for (i = 0; i < LOCKHASH_SIZE; i++) {
hlist_for_each_entry(st, &lockhash_table[i], hash_entry) {
insert_to_result(st, compare_maps);
@@ -1443,7 +1455,7 @@ static void dump_map(void)
}
while ((st = pop_from_result()))
- pr_info(" %#llx: %s\n", (unsigned long long)st->addr, st->name);
+ fprintf(lock_output, " %#llx: %s\n", (unsigned long long)st->addr, st->name);
}
static int dump_info(void)
@@ -1623,58 +1635,242 @@ static void sort_contention_result(void)
sort_result();
}
-static void print_bpf_events(int total, struct lock_contention_fails *fails)
+static void print_header_stdio(void)
+{
+ struct lock_key *key;
+
+ list_for_each_entry(key, &lock_keys, list)
+ fprintf(lock_output, "%*s ", key->len, key->header);
+
+ switch (aggr_mode) {
+ case LOCK_AGGR_TASK:
+ fprintf(lock_output, " %10s %s\n\n", "pid",
+ show_lock_owner ? "owner" : "comm");
+ break;
+ case LOCK_AGGR_CALLER:
+ fprintf(lock_output, " %10s %s\n\n", "type", "caller");
+ break;
+ case LOCK_AGGR_ADDR:
+ fprintf(lock_output, " %16s %s\n\n", "address", "symbol");
+ break;
+ default:
+ break;
+ }
+}
+
+static void print_header_csv(const char *sep)
+{
+ struct lock_key *key;
+
+ fprintf(lock_output, "# output: ");
+ list_for_each_entry(key, &lock_keys, list)
+ fprintf(lock_output, "%s%s ", key->header, sep);
+
+ switch (aggr_mode) {
+ case LOCK_AGGR_TASK:
+ fprintf(lock_output, "%s%s %s\n", "pid", sep,
+ show_lock_owner ? "owner" : "comm");
+ break;
+ case LOCK_AGGR_CALLER:
+ fprintf(lock_output, "%s%s %s", "type", sep, "caller");
+ if (verbose > 0)
+ fprintf(lock_output, "%s %s", sep, "stacktrace");
+ fprintf(lock_output, "\n");
+ break;
+ case LOCK_AGGR_ADDR:
+ fprintf(lock_output, "%s%s %s%s %s\n", "address", sep, "symbol", sep, "type");
+ break;
+ default:
+ break;
+ }
+}
+
+static void print_header(void)
+{
+ if (!quiet) {
+ if (symbol_conf.field_sep)
+ print_header_csv(symbol_conf.field_sep);
+ else
+ print_header_stdio();
+ }
+}
+
+static void print_lock_stat_stdio(struct lock_contention *con, struct lock_stat *st)
+{
+ struct lock_key *key;
+ struct thread *t;
+ int pid;
+
+ list_for_each_entry(key, &lock_keys, list) {
+ key->print(key, st);
+ fprintf(lock_output, " ");
+ }
+
+ switch (aggr_mode) {
+ case LOCK_AGGR_CALLER:
+ fprintf(lock_output, " %10s %s\n", get_type_str(st->flags), st->name);
+ break;
+ case LOCK_AGGR_TASK:
+ pid = st->addr;
+ t = perf_session__findnew(session, pid);
+ fprintf(lock_output, " %10d %s\n",
+ pid, pid == -1 ? "Unknown" : thread__comm_str(t));
+ break;
+ case LOCK_AGGR_ADDR:
+ fprintf(lock_output, " %016llx %s (%s)\n", (unsigned long long)st->addr,
+ st->name, get_type_name(st->flags));
+ break;
+ default:
+ break;
+ }
+
+ if (aggr_mode == LOCK_AGGR_CALLER && verbose > 0) {
+ struct map *kmap;
+ struct symbol *sym;
+ char buf[128];
+ u64 ip;
+
+ for (int i = 0; i < max_stack_depth; i++) {
+ if (!st->callstack || !st->callstack[i])
+ break;
+
+ ip = st->callstack[i];
+ sym = machine__find_kernel_symbol(con->machine, ip, &kmap);
+ get_symbol_name_offset(kmap, sym, ip, buf, sizeof(buf));
+ fprintf(lock_output, "\t\t\t%#lx %s\n", (unsigned long)ip, buf);
+ }
+ }
+}
+
+static void print_lock_stat_csv(struct lock_contention *con, struct lock_stat *st,
+ const char *sep)
+{
+ struct lock_key *key;
+ struct thread *t;
+ int pid;
+
+ list_for_each_entry(key, &lock_keys, list) {
+ key->print(key, st);
+ fprintf(lock_output, "%s ", sep);
+ }
+
+ switch (aggr_mode) {
+ case LOCK_AGGR_CALLER:
+ fprintf(lock_output, "%s%s %s", get_type_str(st->flags), sep, st->name);
+ if (verbose <= 0)
+ fprintf(lock_output, "\n");
+ break;
+ case LOCK_AGGR_TASK:
+ pid = st->addr;
+ t = perf_session__findnew(session, pid);
+ fprintf(lock_output, "%d%s %s\n", pid, sep,
+ pid == -1 ? "Unknown" : thread__comm_str(t));
+ break;
+ case LOCK_AGGR_ADDR:
+ fprintf(lock_output, "%llx%s %s%s %s\n", (unsigned long long)st->addr, sep,
+ st->name, sep, get_type_name(st->flags));
+ break;
+ default:
+ break;
+ }
+
+ if (aggr_mode == LOCK_AGGR_CALLER && verbose > 0) {
+ struct map *kmap;
+ struct symbol *sym;
+ char buf[128];
+ u64 ip;
+
+ for (int i = 0; i < max_stack_depth; i++) {
+ if (!st->callstack || !st->callstack[i])
+ break;
+
+ ip = st->callstack[i];
+ sym = machine__find_kernel_symbol(con->machine, ip, &kmap);
+ get_symbol_name_offset(kmap, sym, ip, buf, sizeof(buf));
+ fprintf(lock_output, "%s %#lx %s", i ? ":" : sep, (unsigned long) ip, buf);
+ }
+ fprintf(lock_output, "\n");
+ }
+}
+
+static void print_lock_stat(struct lock_contention *con, struct lock_stat *st)
+{
+ if (symbol_conf.field_sep)
+ print_lock_stat_csv(con, st, symbol_conf.field_sep);
+ else
+ print_lock_stat_stdio(con, st);
+}
+
+static void print_footer_stdio(int total, int bad, struct lock_contention_fails *fails)
{
/* Output for debug, this have to be removed */
int broken = fails->task + fails->stack + fails->time + fails->data;
+ if (!use_bpf)
+ print_bad_events(bad, total);
+
if (quiet || total == 0 || (broken == 0 && verbose <= 0))
return;
total += broken;
- pr_info("\n=== output for debug ===\n\n");
- pr_info("bad: %d, total: %d\n", broken, total);
- pr_info("bad rate: %.2f %%\n", (double)broken / (double)total * 100);
+ fprintf(lock_output, "\n=== output for debug ===\n\n");
+ fprintf(lock_output, "bad: %d, total: %d\n", broken, total);
+ fprintf(lock_output, "bad rate: %.2f %%\n", 100.0 * broken / total);
- pr_info("histogram of failure reasons\n");
- pr_info(" %10s: %d\n", "task", fails->task);
- pr_info(" %10s: %d\n", "stack", fails->stack);
- pr_info(" %10s: %d\n", "time", fails->time);
- pr_info(" %10s: %d\n", "data", fails->data);
+ fprintf(lock_output, "histogram of failure reasons\n");
+ fprintf(lock_output, " %10s: %d\n", "task", fails->task);
+ fprintf(lock_output, " %10s: %d\n", "stack", fails->stack);
+ fprintf(lock_output, " %10s: %d\n", "time", fails->time);
+ fprintf(lock_output, " %10s: %d\n", "data", fails->data);
+}
+
+static void print_footer_csv(int total, int bad, struct lock_contention_fails *fails,
+ const char *sep)
+{
+ /* Output for debug, this have to be removed */
+ if (use_bpf)
+ bad = fails->task + fails->stack + fails->time + fails->data;
+
+ if (quiet || total == 0 || (bad == 0 && verbose <= 0))
+ return;
+
+ total += bad;
+ fprintf(lock_output, "# debug: total=%d%s bad=%d", total, sep, bad);
+
+ if (use_bpf) {
+ fprintf(lock_output, "%s bad_%s=%d", sep, "task", fails->task);
+ fprintf(lock_output, "%s bad_%s=%d", sep, "stack", fails->stack);
+ fprintf(lock_output, "%s bad_%s=%d", sep, "time", fails->time);
+ fprintf(lock_output, "%s bad_%s=%d", sep, "data", fails->data);
+ } else {
+ int i;
+ const char *name[4] = { "acquire", "acquired", "contended", "release" };
+
+ for (i = 0; i < BROKEN_MAX; i++)
+ fprintf(lock_output, "%s bad_%s=%d", sep, name[i], bad_hist[i]);
+ }
+ fprintf(lock_output, "\n");
+}
+
+static void print_footer(int total, int bad, struct lock_contention_fails *fails)
+{
+ if (symbol_conf.field_sep)
+ print_footer_csv(total, bad, fails, symbol_conf.field_sep);
+ else
+ print_footer_stdio(total, bad, fails);
}
static void print_contention_result(struct lock_contention *con)
{
struct lock_stat *st;
- struct lock_key *key;
int bad, total, printed;
- if (!quiet) {
- list_for_each_entry(key, &lock_keys, list)
- pr_info("%*s ", key->len, key->header);
-
- switch (aggr_mode) {
- case LOCK_AGGR_TASK:
- pr_info(" %10s %s\n\n", "pid",
- show_lock_owner ? "owner" : "comm");
- break;
- case LOCK_AGGR_CALLER:
- pr_info(" %10s %s\n\n", "type", "caller");
- break;
- case LOCK_AGGR_ADDR:
- pr_info(" %16s %s\n\n", "address", "symbol");
- break;
- default:
- break;
- }
- }
+ if (!quiet)
+ print_header();
bad = total = printed = 0;
while ((st = pop_from_result())) {
- struct thread *t;
- int pid;
-
total += use_bpf ? st->nr_contended : 1;
if (st->broken)
bad++;
@@ -1682,45 +1878,7 @@ static void print_contention_result(struct lock_contention *con)
if (!st->wait_time_total)
continue;
- list_for_each_entry(key, &lock_keys, list) {
- key->print(key, st);
- pr_info(" ");
- }
-
- switch (aggr_mode) {
- case LOCK_AGGR_CALLER:
- pr_info(" %10s %s\n", get_type_str(st->flags), st->name);
- break;
- case LOCK_AGGR_TASK:
- pid = st->addr;
- t = perf_session__findnew(session, pid);
- pr_info(" %10d %s\n",
- pid, pid == -1 ? "Unknown" : thread__comm_str(t));
- break;
- case LOCK_AGGR_ADDR:
- pr_info(" %016llx %s (%s)\n", (unsigned long long)st->addr,
- st->name, get_type_name(st->flags));
- break;
- default:
- break;
- }
-
- if (aggr_mode == LOCK_AGGR_CALLER && verbose > 0) {
- struct map *kmap;
- struct symbol *sym;
- char buf[128];
- u64 ip;
-
- for (int i = 0; i < max_stack_depth; i++) {
- if (!st->callstack || !st->callstack[i])
- break;
-
- ip = st->callstack[i];
- sym = machine__find_kernel_symbol(con->machine, ip, &kmap);
- get_symbol_name_offset(kmap, sym, ip, buf, sizeof(buf));
- pr_info("\t\t\t%#lx %s\n", (unsigned long)ip, buf);
- }
- }
+ print_lock_stat(con, st);
if (++printed >= print_nr_entries)
break;
@@ -1737,10 +1895,7 @@ static void print_contention_result(struct lock_contention *con)
/* some entries are collected but hidden by the callstack filter */
total += con->nr_filtered;
- if (use_bpf)
- print_bpf_events(total, &con->fails);
- else
- print_bad_events(bad, total);
+ print_footer(total, bad, &con->fails);
}
static bool force;
@@ -1770,8 +1925,6 @@ static int __cmd_report(bool display_info)
return PTR_ERR(session);
}
- /* for lock function check */
- symbol_conf.sort_by_name = true;
symbol_conf.allow_aliases = true;
symbol__init(&session->header.env);
@@ -1846,6 +1999,16 @@ static int check_lock_contention_options(const struct option *options,
return -1;
}
+ if (symbol_conf.field_sep) {
+ if (strstr(symbol_conf.field_sep, ":") || /* part of type flags */
+ strstr(symbol_conf.field_sep, "+") || /* part of caller offset */
+ strstr(symbol_conf.field_sep, ".")) { /* can be in a symbol name */
+ pr_err("Cannot use the separator that is already used\n");
+ parse_options_usage(usage, options, "x", 1);
+ return -1;
+ }
+ }
+
if (show_lock_owner)
show_thread_stats = true;
@@ -1871,7 +2034,6 @@ static int __cmd_contention(int argc, const char **argv)
};
struct lock_contention con = {
.target = &target,
- .result = &lockhash_table[0],
.map_nr_entries = bpf_map_entries,
.max_stack = max_stack_depth,
.stack_skip = stack_skip,
@@ -1880,10 +2042,18 @@ static int __cmd_contention(int argc, const char **argv)
.owner = show_lock_owner,
};
+ lockhash_table = calloc(LOCKHASH_SIZE, sizeof(*lockhash_table));
+ if (!lockhash_table)
+ return -ENOMEM;
+
+ con.result = &lockhash_table[0];
+
session = perf_session__new(use_bpf ? NULL : &data, &eops);
if (IS_ERR(session)) {
pr_err("Initializing perf session failed\n");
- return PTR_ERR(session);
+ err = PTR_ERR(session);
+ session = NULL;
+ goto out_delete;
}
con.machine = &session->machines.host;
@@ -1894,8 +2064,6 @@ static int __cmd_contention(int argc, const char **argv)
if (con.aggr_mode == LOCK_AGGR_CALLER)
con.save_callstack = true;
- /* for lock function check */
- symbol_conf.sort_by_name = true;
symbol_conf.allow_aliases = true;
symbol__init(&session->header.env);
@@ -1957,6 +2125,15 @@ static int __cmd_contention(int argc, const char **argv)
if (select_key(true))
goto out_delete;
+ if (symbol_conf.field_sep) {
+ int i;
+ struct lock_key *keys = contention_keys;
+
+ /* do not align output in CSV format */
+ for (i = 0; keys[i].name; i++)
+ keys[i].len = 0;
+ }
+
if (use_bpf) {
lock_contention_start();
if (argc)
@@ -1983,6 +2160,7 @@ out_delete:
evlist__delete(con.evlist);
lock_contention_finish();
perf_session__delete(session);
+ zfree(&lockhash_table);
return err;
}
@@ -2254,10 +2432,29 @@ static int parse_call_stack(const struct option *opt __maybe_unused, const char
return ret;
}
+static int parse_output(const struct option *opt __maybe_unused, const char *str,
+ int unset __maybe_unused)
+{
+ const char **name = (const char **)opt->value;
+
+ if (str == NULL)
+ return -1;
+
+ lock_output = fopen(str, "w");
+ if (lock_output == NULL) {
+ pr_err("Cannot open %s\n", str);
+ return -1;
+ }
+
+ *name = str;
+ return 0;
+}
+
int cmd_lock(int argc, const char **argv)
{
const struct option lock_options[] = {
OPT_STRING('i', "input", &input_name, "file", "input file name"),
+ OPT_CALLBACK(0, "output", &output_name, "file", "output file name", parse_output),
OPT_INCR('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"),
OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"),
OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
@@ -2324,6 +2521,8 @@ int cmd_lock(int argc, const char **argv)
OPT_CALLBACK('S', "callstack-filter", NULL, "NAMES",
"Filter specific function in the callstack", parse_call_stack),
OPT_BOOLEAN('o', "lock-owner", &show_lock_owner, "show lock owners instead of waiters"),
+ OPT_STRING_NOEMPTY('x', "field-separator", &symbol_conf.field_sep, "separator",
+ "print result in CSV format with custom separator"),
OPT_PARENT(lock_options)
};
@@ -2348,9 +2547,14 @@ int cmd_lock(int argc, const char **argv)
unsigned int i;
int rc = 0;
+ lockhash_table = calloc(LOCKHASH_SIZE, sizeof(*lockhash_table));
+ if (!lockhash_table)
+ return -ENOMEM;
+
for (i = 0; i < LOCKHASH_SIZE; i++)
INIT_HLIST_HEAD(lockhash_table + i);
+ lock_output = stderr;
argc = parse_options_subcommand(argc, argv, lock_options, lock_subcommands,
lock_usage, PARSE_OPT_STOP_AT_NON_OPTION);
if (!argc)
@@ -2369,7 +2573,7 @@ int cmd_lock(int argc, const char **argv)
rc = __cmd_report(false);
} else if (!strcmp(argv[0], "script")) {
/* Aliased to 'perf script' */
- return cmd_script(argc, argv);
+ rc = cmd_script(argc, argv);
} else if (!strcmp(argv[0], "info")) {
if (argc) {
argc = parse_options(argc, argv,
@@ -2403,5 +2607,6 @@ int cmd_lock(int argc, const char **argv)
usage_with_options(lock_usage, lock_options);
}
+ zfree(&lockhash_table);
return rc;
}
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index 65465930ef8e..51499c20da01 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -17,8 +17,7 @@
#include "util/dso.h"
#include "util/map.h"
#include "util/symbol.h"
-#include "util/pmu.h"
-#include "util/pmu-hybrid.h"
+#include "util/pmus.h"
#include "util/sample.h"
#include "util/string2.h"
#include "util/util.h"
@@ -93,10 +92,8 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
argc = parse_options(argc, argv, options, record_mem_usage,
PARSE_OPT_KEEP_UNKNOWN);
- if (!perf_pmu__has_hybrid())
- rec_argc = argc + 9; /* max number of arguments */
- else
- rec_argc = argc + 9 * perf_pmu__hybrid_pmu_num();
+ /* Max number of arguments multiplied by number of PMUs that can support them. */
+ rec_argc = argc + 9 * perf_pmus__num_mem_pmus();
if (mem->cpu_list)
rec_argc += 2;
@@ -202,9 +199,11 @@ dump_raw_samples(struct perf_tool *tool,
char str[PAGE_SIZE_NAME_LEN];
struct dso *dso = NULL;
+ addr_location__init(&al);
if (machine__resolve(machine, &al, sample) < 0) {
fprintf(stderr, "problem processing %d event, skipping it.\n",
event->header.type);
+ addr_location__exit(&al);
return -1;
}
@@ -259,7 +258,7 @@ dump_raw_samples(struct perf_tool *tool,
dso ? dso->long_name : "???",
al.sym ? al.sym->name : "???");
out_put:
- addr_location__put(&al);
+ addr_location__exit(&al);
return 0;
}
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index 4df05b992093..019fef8da6a8 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -47,29 +47,29 @@ static struct {
char *target;
struct strfilter *filter;
struct nsinfo *nsi;
-} params;
+} *params;
/* Parse an event definition. Note that any error must die. */
static int parse_probe_event(const char *str)
{
- struct perf_probe_event *pev = &params.events[params.nevents];
+ struct perf_probe_event *pev = &params->events[params->nevents];
int ret;
- pr_debug("probe-definition(%d): %s\n", params.nevents, str);
- if (++params.nevents == MAX_PROBES) {
+ pr_debug("probe-definition(%d): %s\n", params->nevents, str);
+ if (++params->nevents == MAX_PROBES) {
pr_err("Too many probes (> %d) were specified.", MAX_PROBES);
return -1;
}
- pev->uprobes = params.uprobes;
- if (params.target) {
- pev->target = strdup(params.target);
+ pev->uprobes = params->uprobes;
+ if (params->target) {
+ pev->target = strdup(params->target);
if (!pev->target)
return -ENOMEM;
- params.target_used = true;
+ params->target_used = true;
}
- pev->nsi = nsinfo__get(params.nsi);
+ pev->nsi = nsinfo__get(params->nsi);
/* Parse a perf-probe command into event */
ret = parse_perf_probe_command(str, pev);
@@ -84,12 +84,12 @@ static int params_add_filter(const char *str)
int ret = 0;
pr_debug2("Add filter: %s\n", str);
- if (!params.filter) {
- params.filter = strfilter__new(str, &err);
- if (!params.filter)
+ if (!params->filter) {
+ params->filter = strfilter__new(str, &err);
+ if (!params->filter)
ret = err ? -EINVAL : -ENOMEM;
} else
- ret = strfilter__or(params.filter, str, &err);
+ ret = strfilter__or(params->filter, str, &err);
if (ret == -EINVAL) {
pr_err("Filter parse error at %td.\n", err - str + 1);
@@ -112,17 +112,17 @@ static int set_target(const char *ptr)
* TODO: Support relative path, and $PATH, $LD_LIBRARY_PATH,
* short module name.
*/
- if (!params.target && ptr && *ptr == '/') {
- params.target = strdup(ptr);
- if (!params.target)
+ if (!params->target && ptr && *ptr == '/') {
+ params->target = strdup(ptr);
+ if (!params->target)
return -ENOMEM;
- params.target_used = false;
+ params->target_used = false;
found = 1;
buf = ptr + (strlen(ptr) - 3);
if (strcmp(buf, ".ko"))
- params.uprobes = true;
+ params->uprobes = true;
}
@@ -172,15 +172,15 @@ static int opt_set_target(const struct option *opt, const char *str,
if (str) {
if (!strcmp(opt->long_name, "exec"))
- params.uprobes = true;
+ params->uprobes = true;
else if (!strcmp(opt->long_name, "module"))
- params.uprobes = false;
+ params->uprobes = false;
else
return ret;
/* Expand given path to absolute path, except for modulename */
- if (params.uprobes || strchr(str, '/')) {
- tmp = nsinfo__realpath(str, params.nsi);
+ if (params->uprobes || strchr(str, '/')) {
+ tmp = nsinfo__realpath(str, params->nsi);
if (!tmp) {
pr_warning("Failed to get the absolute path of %s: %m\n", str);
return ret;
@@ -190,9 +190,9 @@ static int opt_set_target(const struct option *opt, const char *str,
if (!tmp)
return -ENOMEM;
}
- free(params.target);
- params.target = tmp;
- params.target_used = false;
+ free(params->target);
+ params->target = tmp;
+ params->target_used = false;
ret = 0;
}
@@ -217,7 +217,7 @@ static int opt_set_target_ns(const struct option *opt __maybe_unused,
}
nsip = nsinfo__new(ns_pid);
if (nsip && nsinfo__need_setns(nsip))
- params.nsi = nsinfo__get(nsip);
+ params->nsi = nsinfo__get(nsip);
nsinfo__put(nsip);
ret = 0;
@@ -238,14 +238,14 @@ static int opt_show_lines(const struct option *opt,
if (!str)
return 0;
- if (params.command == 'L') {
+ if (params->command == 'L') {
pr_warning("Warning: more than one --line options are"
" detected. Only the first one is valid.\n");
return 0;
}
- params.command = opt->short_name;
- ret = parse_line_range_desc(str, &params.line_range);
+ params->command = opt->short_name;
+ ret = parse_line_range_desc(str, &params->line_range);
return ret;
}
@@ -253,7 +253,7 @@ static int opt_show_lines(const struct option *opt,
static int opt_show_vars(const struct option *opt,
const char *str, int unset __maybe_unused)
{
- struct perf_probe_event *pev = &params.events[params.nevents];
+ struct perf_probe_event *pev = &params->events[params->nevents];
int ret;
if (!str)
@@ -264,7 +264,7 @@ static int opt_show_vars(const struct option *opt,
pr_err(" Error: '--vars' doesn't accept arguments.\n");
return -EINVAL;
}
- params.command = opt->short_name;
+ params->command = opt->short_name;
return ret;
}
@@ -276,7 +276,7 @@ static int opt_add_probe_event(const struct option *opt,
const char *str, int unset __maybe_unused)
{
if (str) {
- params.command = opt->short_name;
+ params->command = opt->short_name;
return parse_probe_event(str);
}
@@ -287,7 +287,7 @@ static int opt_set_filter_with_command(const struct option *opt,
const char *str, int unset)
{
if (!unset)
- params.command = opt->short_name;
+ params->command = opt->short_name;
if (str)
return params_add_filter(str);
@@ -306,20 +306,29 @@ static int opt_set_filter(const struct option *opt __maybe_unused,
static int init_params(void)
{
- return line_range__init(&params.line_range);
+ int ret;
+
+ params = calloc(1, sizeof(*params));
+ if (!params)
+ return -ENOMEM;
+
+ ret = line_range__init(&params->line_range);
+ if (ret)
+ zfree(&params);
+ return ret;
}
static void cleanup_params(void)
{
int i;
- for (i = 0; i < params.nevents; i++)
- clear_perf_probe_event(params.events + i);
- line_range__clear(&params.line_range);
- free(params.target);
- strfilter__delete(params.filter);
- nsinfo__put(params.nsi);
- memset(&params, 0, sizeof(params));
+ for (i = 0; i < params->nevents; i++)
+ clear_perf_probe_event(params->events + i);
+ line_range__clear(&params->line_range);
+ free(params->target);
+ strfilter__delete(params->filter);
+ nsinfo__put(params->nsi);
+ zfree(&params);
}
static void pr_err_with_code(const char *msg, int err)
@@ -346,7 +355,7 @@ static int perf_add_probe_events(struct perf_probe_event *pevs, int npevs)
if (ret < 0)
goto out_cleanup;
- if (params.command == 'D') { /* it shows definition */
+ if (params->command == 'D') { /* it shows definition */
if (probe_conf.bootconfig)
ret = show_bootconfig_events(pevs, npevs);
else
@@ -635,7 +644,7 @@ __cmd_probe(int argc, const char **argv)
usage_with_options_msg(probe_usage, options,
"'-' is not supported.\n");
}
- if (params.command && params.command != 'a') {
+ if (params->command && params->command != 'a') {
usage_with_options_msg(probe_usage, options,
"another command except --add is set.\n");
}
@@ -644,7 +653,7 @@ __cmd_probe(int argc, const char **argv)
pr_err_with_code(" Error: Command Parse Error.", ret);
return ret;
}
- params.command = 'a';
+ params->command = 'a';
}
ret = symbol__validate_sym_arguments();
@@ -664,54 +673,54 @@ __cmd_probe(int argc, const char **argv)
* nor change running kernel. So if user gives offline vmlinux,
* ignore its buildid.
*/
- if (!strchr("lda", params.command) && symbol_conf.vmlinux_name)
+ if (!strchr("lda", params->command) && symbol_conf.vmlinux_name)
symbol_conf.ignore_vmlinux_buildid = true;
- switch (params.command) {
+ switch (params->command) {
case 'l':
- if (params.uprobes) {
+ if (params->uprobes) {
pr_err(" Error: Don't use --list with --exec.\n");
parse_options_usage(probe_usage, options, "l", true);
parse_options_usage(NULL, options, "x", true);
return -EINVAL;
}
- ret = show_perf_probe_events(params.filter);
+ ret = show_perf_probe_events(params->filter);
if (ret < 0)
pr_err_with_code(" Error: Failed to show event list.", ret);
return ret;
case 'F':
- ret = show_available_funcs(params.target, params.nsi,
- params.filter, params.uprobes);
+ ret = show_available_funcs(params->target, params->nsi,
+ params->filter, params->uprobes);
if (ret < 0)
pr_err_with_code(" Error: Failed to show functions.", ret);
return ret;
#ifdef HAVE_DWARF_SUPPORT
case 'L':
- ret = show_line_range(&params.line_range, params.target,
- params.nsi, params.uprobes);
+ ret = show_line_range(&params->line_range, params->target,
+ params->nsi, params->uprobes);
if (ret < 0)
pr_err_with_code(" Error: Failed to show lines.", ret);
return ret;
case 'V':
- if (!params.filter)
- params.filter = strfilter__new(DEFAULT_VAR_FILTER,
+ if (!params->filter)
+ params->filter = strfilter__new(DEFAULT_VAR_FILTER,
NULL);
- ret = show_available_vars(params.events, params.nevents,
- params.filter);
+ ret = show_available_vars(params->events, params->nevents,
+ params->filter);
if (ret < 0)
pr_err_with_code(" Error: Failed to show vars.", ret);
return ret;
#endif
case 'd':
- ret = perf_del_probe_events(params.filter);
+ ret = perf_del_probe_events(params->filter);
if (ret < 0) {
pr_err_with_code(" Error: Failed to delete events.", ret);
return ret;
}
break;
case 'D':
- if (probe_conf.bootconfig && params.uprobes) {
+ if (probe_conf.bootconfig && params->uprobes) {
pr_err(" Error: --bootconfig doesn't support uprobes.\n");
return -EINVAL;
}
@@ -719,25 +728,25 @@ __cmd_probe(int argc, const char **argv)
case 'a':
/* Ensure the last given target is used */
- if (params.target && !params.target_used) {
+ if (params->target && !params->target_used) {
pr_err(" Error: -x/-m must follow the probe definitions.\n");
parse_options_usage(probe_usage, options, "m", true);
parse_options_usage(NULL, options, "x", true);
return -EINVAL;
}
- ret = perf_add_probe_events(params.events, params.nevents);
+ ret = perf_add_probe_events(params->events, params->nevents);
if (ret < 0) {
/*
* When perf_add_probe_events() fails it calls
* cleanup_perf_probe_events(pevs, npevs), i.e.
- * cleanup_perf_probe_events(params.events, params.nevents), which
+ * cleanup_perf_probe_events(params->events, params->nevents), which
* will call clear_perf_probe_event(), so set nevents to zero
* to avoid cleanup_params() to call clear_perf_probe_event() again
* on the same pevs.
*/
- params.nevents = 0;
+ params->nevents = 0;
pr_err_with_code(" Error: Failed to add events.", ret);
return ret;
}
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index efa03e4ac2c9..aec18db7ff23 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -48,9 +48,9 @@
#include "util/bpf-event.h"
#include "util/util.h"
#include "util/pfm.h"
+#include "util/pmu.h"
+#include "util/pmus.h"
#include "util/clockid.h"
-#include "util/pmu-hybrid.h"
-#include "util/evlist-hybrid.h"
#include "util/off_cpu.h"
#include "util/bpf-filter.h"
#include "asm/bug.h"
@@ -1294,7 +1294,7 @@ static int record__open(struct record *rec)
* of waiting or event synthesis.
*/
if (opts->target.initial_delay || target__has_cpu(&opts->target) ||
- perf_pmu__has_hybrid()) {
+ perf_pmus__num_core_pmus() > 1) {
pos = evlist__get_tracking_event(evlist);
if (!evsel__is_dummy_event(pos)) {
/* Set up dummy event. */
@@ -2193,7 +2193,7 @@ static void record__uniquify_name(struct record *rec)
char *new_name;
int ret;
- if (!perf_pmu__has_hybrid())
+ if (perf_pmus__num_core_pmus() == 1)
return;
evlist__for_each_entry(evlist, pos) {
@@ -3335,6 +3335,14 @@ const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
static bool dry_run;
+static struct parse_events_option_args parse_events_option_args = {
+ .evlistp = &record.evlist,
+};
+
+static struct parse_events_option_args switch_output_parse_events_option_args = {
+ .evlistp = &record.sb_evlist,
+};
+
/*
* XXX Will stay a global variable till we fix builtin-script.c to stop messing
* with it and switch to use the library functions in perf_evlist that came
@@ -3343,7 +3351,7 @@ static bool dry_run;
* using pipes, etc.
*/
static struct option __record_options[] = {
- OPT_CALLBACK('e', "event", &record.evlist, "event",
+ OPT_CALLBACK('e', "event", &parse_events_option_args, "event",
"event selector. use 'perf list' to list available events",
parse_events_option),
OPT_CALLBACK(0, "filter", &record.evlist, "filter",
@@ -3496,7 +3504,8 @@ static struct option __record_options[] = {
&record.switch_output.set, "signal or size[BKMG] or time[smhd]",
"Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold",
"signal"),
- OPT_CALLBACK_SET(0, "switch-output-event", &record.sb_evlist, &record.switch_output_event_set, "switch output event",
+ OPT_CALLBACK_SET(0, "switch-output-event", &switch_output_parse_events_option_args,
+ &record.switch_output_event_set, "switch output event",
"switch output event selector. use 'perf list' to list available events",
parse_events_option_new_evlist),
OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files,
@@ -4152,18 +4161,11 @@ int cmd_record(int argc, const char **argv)
record.opts.tail_synthesize = true;
if (rec->evlist->core.nr_entries == 0) {
- if (perf_pmu__has_hybrid()) {
- err = evlist__add_default_hybrid(rec->evlist,
- !record.opts.no_samples);
- } else {
- err = __evlist__add_default(rec->evlist,
- !record.opts.no_samples);
- }
+ bool can_profile_kernel = perf_event_paranoid_check(1);
- if (err < 0) {
- pr_err("Not enough memory for event selector list\n");
+ err = parse_event(rec->evlist, can_profile_kernel ? "cycles:P" : "cycles:Pu");
+ if (err)
goto out;
- }
}
if (rec->opts.target.tid && !rec->opts.no_inherit_set)
@@ -4189,13 +4191,7 @@ int cmd_record(int argc, const char **argv)
/* Enable ignoring missing threads when -u/-p option is defined. */
rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
- if (evlist__fix_hybrid_cpus(rec->evlist, rec->opts.target.cpu_list)) {
- pr_err("failed to use cpu list %s\n",
- rec->opts.target.cpu_list);
- goto out;
- }
-
- rec->opts.target.hybrid = perf_pmu__has_hybrid();
+ evlist__warn_user_requested_cpus(rec->evlist, rec->opts.target.cpu_list);
if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP)
arch__add_leaf_frame_record_opts(&rec->opts);
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 92c6797e7cba..dcedfe00f04d 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -285,14 +285,16 @@ static int process_sample_event(struct perf_tool *tool,
if (evswitch__discard(&rep->evswitch, evsel))
return 0;
+ addr_location__init(&al);
if (machine__resolve(machine, &al, sample) < 0) {
pr_debug("problem processing %d event, skipping it.\n",
event->header.type);
- return -1;
+ ret = -1;
+ goto out_put;
}
if (rep->stitch_lbr)
- al.thread->lbr_stitch_enable = true;
+ thread__set_lbr_stitch_enable(al.thread, true);
if (symbol_conf.hide_unresolved && al.sym == NULL)
goto out_put;
@@ -331,7 +333,7 @@ static int process_sample_event(struct perf_tool *tool,
if (ret < 0)
pr_debug("problem adding hist entry, skipping event\n");
out_put:
- addr_location__put(&al);
+ addr_location__exit(&al);
return ret;
}
@@ -829,14 +831,15 @@ static struct task *tasks_list(struct task *task, struct machine *machine)
return NULL;
/* Last one in the chain. */
- if (thread->ppid == -1)
+ if (thread__ppid(thread) == -1)
return task;
- parent_thread = machine__find_thread(machine, -1, thread->ppid);
+ parent_thread = machine__find_thread(machine, -1, thread__ppid(thread));
if (!parent_thread)
return ERR_PTR(-ENOENT);
parent_task = thread__priv(parent_thread);
+ thread__put(parent_thread);
list_add_tail(&task->list, &parent_task->children);
return tasks_list(parent_task, machine);
}
@@ -869,12 +872,12 @@ static void task__print_level(struct task *task, FILE *fp, int level)
struct thread *thread = task->thread;
struct task *child;
int comm_indent = fprintf(fp, " %8d %8d %8d |%*s",
- thread->pid_, thread->tid, thread->ppid,
- level, "");
+ thread__pid(thread), thread__tid(thread),
+ thread__ppid(thread), level, "");
fprintf(fp, "%s\n", thread__comm_str(thread));
- maps__fprintf_task(thread->maps, comm_indent, fp);
+ maps__fprintf_task(thread__maps(thread), comm_indent, fp);
if (!list_empty(&task->children)) {
list_for_each_entry(child, &task->children, list)
@@ -911,7 +914,7 @@ static int tasks_print(struct report *rep, FILE *fp)
nd = rb_next(nd)) {
task = tasks + itask++;
- task->thread = rb_entry(nd, struct thread, rb_node);
+ task->thread = rb_entry(nd, struct thread_rb_node, rb_node)->thread;
INIT_LIST_HEAD(&task->children);
INIT_LIST_HEAD(&task->list);
thread__set_priv(task->thread, task);
@@ -1673,7 +1676,6 @@ repeat:
* See symbol__browser_index.
*/
symbol_conf.priv_size += sizeof(u32);
- symbol_conf.sort_by_name = true;
}
annotation_config__init(&report.annotation_opts);
}
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index cc4ba506e119..9ab300b6f131 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -193,8 +193,8 @@ struct perf_sched {
* weird events, such as a task being switched away that is not current.
*/
struct perf_cpu max_cpu;
- u32 curr_pid[MAX_CPUS];
- struct thread *curr_thread[MAX_CPUS];
+ u32 *curr_pid;
+ struct thread **curr_thread;
char next_shortname1;
char next_shortname2;
unsigned int replay_repeat;
@@ -224,7 +224,7 @@ struct perf_sched {
u64 run_avg;
u64 all_runtime;
u64 all_count;
- u64 cpu_last_switched[MAX_CPUS];
+ u64 *cpu_last_switched;
struct rb_root_cached atom_root, sorted_atom_root, merged_atom_root;
struct list_head sort_list, cmp_pid;
bool force;
@@ -916,12 +916,12 @@ static int replay_fork_event(struct perf_sched *sched,
if (verbose > 0) {
printf("fork event\n");
- printf("... parent: %s/%d\n", thread__comm_str(parent), parent->tid);
- printf("... child: %s/%d\n", thread__comm_str(child), child->tid);
+ printf("... parent: %s/%d\n", thread__comm_str(parent), thread__tid(parent));
+ printf("... child: %s/%d\n", thread__comm_str(child), thread__tid(child));
}
- register_pid(sched, parent->tid, thread__comm_str(parent));
- register_pid(sched, child->tid, thread__comm_str(child));
+ register_pid(sched, thread__tid(parent), thread__comm_str(parent));
+ register_pid(sched, thread__tid(child), thread__comm_str(child));
out_put:
thread__put(child);
thread__put(parent);
@@ -1316,7 +1316,7 @@ static int latency_migrate_task_event(struct perf_sched *sched,
if (!atoms) {
if (thread_atoms_insert(sched, migrant))
goto out_put;
- register_pid(sched, migrant->tid, thread__comm_str(migrant));
+ register_pid(sched, thread__tid(migrant), thread__comm_str(migrant));
atoms = thread_atoms_search(&sched->atom_root, migrant, &sched->cmp_pid);
if (!atoms) {
pr_err("migration-event: Internal tree error");
@@ -1359,10 +1359,13 @@ static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_
sched->all_runtime += work_list->total_runtime;
sched->all_count += work_list->nb_atoms;
- if (work_list->num_merged > 1)
- ret = printf(" %s:(%d) ", thread__comm_str(work_list->thread), work_list->num_merged);
- else
- ret = printf(" %s:%d ", thread__comm_str(work_list->thread), work_list->thread->tid);
+ if (work_list->num_merged > 1) {
+ ret = printf(" %s:(%d) ", thread__comm_str(work_list->thread),
+ work_list->num_merged);
+ } else {
+ ret = printf(" %s:%d ", thread__comm_str(work_list->thread),
+ thread__tid(work_list->thread));
+ }
for (i = 0; i < 24 - ret; i++)
printf(" ");
@@ -1380,13 +1383,17 @@ static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_
static int pid_cmp(struct work_atoms *l, struct work_atoms *r)
{
- if (l->thread == r->thread)
+ pid_t l_tid, r_tid;
+
+ if (RC_CHK_ACCESS(l->thread) == RC_CHK_ACCESS(r->thread))
return 0;
- if (l->thread->tid < r->thread->tid)
+ l_tid = thread__tid(l->thread);
+ r_tid = thread__tid(r->thread);
+ if (l_tid < r_tid)
return -1;
- if (l->thread->tid > r->thread->tid)
+ if (l_tid > r_tid)
return 1;
- return (int)(l->thread - r->thread);
+ return (int)(RC_CHK_ACCESS(l->thread) - RC_CHK_ACCESS(r->thread));
}
static int avg_cmp(struct work_atoms *l, struct work_atoms *r)
@@ -1679,14 +1686,14 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel,
timestamp__scnprintf_usec(timestamp, stimestamp, sizeof(stimestamp));
color_fprintf(stdout, color, " %12s secs ", stimestamp);
- if (new_shortname || tr->comm_changed || (verbose > 0 && sched_in->tid)) {
+ if (new_shortname || tr->comm_changed || (verbose > 0 && thread__tid(sched_in))) {
const char *pid_color = color;
if (thread__has_color(sched_in))
pid_color = COLOR_PIDS;
color_fprintf(stdout, pid_color, "%s => %s:%d",
- tr->shortname, thread__comm_str(sched_in), sched_in->tid);
+ tr->shortname, thread__comm_str(sched_in), thread__tid(sched_in));
tr->comm_changed = false;
}
@@ -1948,8 +1955,8 @@ static char *timehist_get_commstr(struct thread *thread)
{
static char str[32];
const char *comm = thread__comm_str(thread);
- pid_t tid = thread->tid;
- pid_t pid = thread->pid_;
+ pid_t tid = thread__tid(thread);
+ pid_t pid = thread__pid(thread);
int n;
if (pid == 0)
@@ -2032,7 +2039,7 @@ static char task_state_char(struct thread *thread, int state)
unsigned bit = state ? ffs(state) : 0;
/* 'I' for idle */
- if (thread->tid == 0)
+ if (thread__tid(thread) == 0)
return 'I';
return bit < sizeof(state_to_char) - 1 ? state_to_char[bit] : '?';
@@ -2067,7 +2074,7 @@ static void timehist_print_sample(struct perf_sched *sched,
for (i = 0; i < max_cpus; ++i) {
/* flag idle times with 'i'; others are sched events */
if (i == sample->cpu)
- c = (thread->tid == 0) ? 'i' : 's';
+ c = (thread__tid(thread) == 0) ? 'i' : 's';
else
c = ' ';
printf("%c", c);
@@ -2094,7 +2101,7 @@ static void timehist_print_sample(struct perf_sched *sched,
if (sched->show_wakeups && !sched->show_next)
printf(" %-*s", comm_width, "");
- if (thread->tid == 0)
+ if (thread__tid(thread) == 0)
goto out;
if (sched->show_callchain)
@@ -2104,7 +2111,7 @@ static void timehist_print_sample(struct perf_sched *sched,
EVSEL__PRINT_SYM | EVSEL__PRINT_ONELINE |
EVSEL__PRINT_CALLCHAIN_ARROW |
EVSEL__PRINT_SKIP_IGNORED,
- &callchain_cursor, symbol_conf.bt_stop_list, stdout);
+ get_tls_callchain_cursor(), symbol_conf.bt_stop_list, stdout);
out:
printf("\n");
@@ -2189,7 +2196,7 @@ static void save_task_callchain(struct perf_sched *sched,
struct evsel *evsel,
struct machine *machine)
{
- struct callchain_cursor *cursor = &callchain_cursor;
+ struct callchain_cursor *cursor;
struct thread *thread;
/* want main thread for process - has maps */
@@ -2202,6 +2209,8 @@ static void save_task_callchain(struct perf_sched *sched,
if (!sched->show_callchain || sample->callchain == NULL)
return;
+ cursor = get_tls_callchain_cursor();
+
if (thread__resolve_callchain(thread, cursor, evsel, sample,
NULL, NULL, sched->max_stack + 2) != 0) {
if (verbose > 0)
@@ -2331,10 +2340,16 @@ static void save_idle_callchain(struct perf_sched *sched,
struct idle_thread_runtime *itr,
struct perf_sample *sample)
{
+ struct callchain_cursor *cursor;
+
if (!sched->show_callchain || sample->callchain == NULL)
return;
- callchain_cursor__copy(&itr->cursor, &callchain_cursor);
+ cursor = get_tls_callchain_cursor();
+ if (cursor == NULL)
+ return;
+
+ callchain_cursor__copy(&itr->cursor, cursor);
}
static struct thread *timehist_get_thread(struct perf_sched *sched,
@@ -2577,6 +2592,7 @@ static int timehist_sched_change_event(struct perf_tool *tool,
int rc = 0;
int state = evsel__intval(evsel, sample, "prev_state");
+ addr_location__init(&al);
if (machine__resolve(machine, &al, sample) < 0) {
pr_err("problem processing %d event. skipping it\n",
event->header.type);
@@ -2626,7 +2642,7 @@ static int timehist_sched_change_event(struct perf_tool *tool,
t = ptime->end;
}
- if (!sched->idle_hist || thread->tid == 0) {
+ if (!sched->idle_hist || thread__tid(thread) == 0) {
if (!cpu_list || test_bit(sample->cpu, cpu_bitmap))
timehist_update_runtime_stats(tr, t, tprev);
@@ -2634,7 +2650,7 @@ static int timehist_sched_change_event(struct perf_tool *tool,
struct idle_thread_runtime *itr = (void *)tr;
struct thread_runtime *last_tr;
- BUG_ON(thread->tid != 0);
+ BUG_ON(thread__tid(thread) != 0);
if (itr->last_thread == NULL)
goto out;
@@ -2685,6 +2701,7 @@ out:
evsel__save_time(evsel, sample->time, sample->cpu);
+ addr_location__exit(&al);
return rc;
}
@@ -2719,7 +2736,7 @@ static void print_thread_runtime(struct thread *t,
float stddev;
printf("%*s %5d %9" PRIu64 " ",
- comm_width, timehist_get_commstr(t), t->ppid,
+ comm_width, timehist_get_commstr(t), thread__ppid(t),
(u64) r->run_stats.n);
print_sched_time(r->total_run_time, 8);
@@ -2739,7 +2756,7 @@ static void print_thread_waittime(struct thread *t,
struct thread_runtime *r)
{
printf("%*s %5d %9" PRIu64 " ",
- comm_width, timehist_get_commstr(t), t->ppid,
+ comm_width, timehist_get_commstr(t), thread__ppid(t),
(u64) r->run_stats.n);
print_sched_time(r->total_run_time, 8);
@@ -2760,7 +2777,7 @@ struct total_run_stats {
u64 total_run_time;
};
-static int __show_thread_runtime(struct thread *t, void *priv)
+static int show_thread_runtime(struct thread *t, void *priv)
{
struct total_run_stats *stats = priv;
struct thread_runtime *r;
@@ -2783,22 +2800,6 @@ static int __show_thread_runtime(struct thread *t, void *priv)
return 0;
}
-static int show_thread_runtime(struct thread *t, void *priv)
-{
- if (t->dead)
- return 0;
-
- return __show_thread_runtime(t, priv);
-}
-
-static int show_deadthread_runtime(struct thread *t, void *priv)
-{
- if (!t->dead)
- return 0;
-
- return __show_thread_runtime(t, priv);
-}
-
static size_t callchain__fprintf_folded(FILE *fp, struct callchain_node *node)
{
const char *sep = " <- ";
@@ -2890,11 +2891,6 @@ static void timehist_print_summary(struct perf_sched *sched,
if (!task_count)
printf("<no still running tasks>\n");
- printf("\nTerminated tasks:\n");
- machine__for_each_thread(m, show_deadthread_runtime, &totals);
- if (task_count == totals.task_count)
- printf("<no terminated tasks>\n");
-
/* CPU idle stats not tracked when samples were skipped */
if (sched->skipped_samples && !sched->idle_hist)
return;
@@ -3599,7 +3595,22 @@ int cmd_sched(int argc, const char **argv)
mutex_init(&sched.start_work_mutex);
mutex_init(&sched.work_done_wait_mutex);
- for (i = 0; i < ARRAY_SIZE(sched.curr_pid); i++)
+ sched.curr_thread = calloc(MAX_CPUS, sizeof(*sched.curr_thread));
+ if (!sched.curr_thread) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ sched.cpu_last_switched = calloc(MAX_CPUS, sizeof(*sched.cpu_last_switched));
+ if (!sched.cpu_last_switched) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ sched.curr_pid = malloc(MAX_CPUS * sizeof(*sched.curr_pid));
+ if (!sched.curr_pid) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ for (i = 0; i < MAX_CPUS; i++)
sched.curr_pid[i] = -1;
argc = parse_options_subcommand(argc, argv, sched_options, sched_subcommands,
@@ -3668,6 +3679,9 @@ int cmd_sched(int argc, const char **argv)
}
out:
+ free(sched.curr_pid);
+ free(sched.cpu_last_switched);
+ free(sched.curr_thread);
mutex_destroy(&sched.start_work_mutex);
mutex_destroy(&sched.work_done_wait_mutex);
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index c57be48d65bb..517bf25750c8 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -133,6 +133,7 @@ enum perf_output_field {
PERF_OUTPUT_VCPU = 1ULL << 38,
PERF_OUTPUT_CGROUP = 1ULL << 39,
PERF_OUTPUT_RETIRE_LAT = 1ULL << 40,
+ PERF_OUTPUT_DSOFF = 1ULL << 41,
};
struct perf_script {
@@ -174,6 +175,7 @@ struct output_option {
{.str = "ip", .field = PERF_OUTPUT_IP},
{.str = "sym", .field = PERF_OUTPUT_SYM},
{.str = "dso", .field = PERF_OUTPUT_DSO},
+ {.str = "dsoff", .field = PERF_OUTPUT_DSOFF},
{.str = "addr", .field = PERF_OUTPUT_ADDR},
{.str = "symoff", .field = PERF_OUTPUT_SYMOFFSET},
{.str = "srcline", .field = PERF_OUTPUT_SRCLINE},
@@ -574,6 +576,9 @@ static void set_print_ip_opts(struct perf_event_attr *attr)
if (PRINT_FIELD(DSO))
output[type].print_ip_opts |= EVSEL__PRINT_DSO;
+ if (PRINT_FIELD(DSOFF))
+ output[type].print_ip_opts |= EVSEL__PRINT_DSOFF;
+
if (PRINT_FIELD(SYMOFFSET))
output[type].print_ip_opts |= EVSEL__PRINT_SYMOFFSET;
@@ -627,6 +632,10 @@ static int perf_session__check_output_opt(struct perf_session *session)
if (evsel == NULL)
continue;
+ /* 'dsoff' implys 'dso' field */
+ if (output[j].fields & PERF_OUTPUT_DSOFF)
+ output[j].fields |= PERF_OUTPUT_DSO;
+
set_print_ip_opts(&evsel->core.attr);
tod |= output[j].fields & PERF_OUTPUT_TOD;
}
@@ -792,11 +801,11 @@ static int perf_sample__fprintf_start(struct perf_script *script,
}
if (PRINT_FIELD(PID) && PRINT_FIELD(TID))
- printed += fprintf(fp, "%5d/%-5d ", sample->pid, sample->tid);
+ printed += fprintf(fp, "%7d/%-7d ", sample->pid, sample->tid);
else if (PRINT_FIELD(PID))
- printed += fprintf(fp, "%5d ", sample->pid);
+ printed += fprintf(fp, "%7d ", sample->pid);
else if (PRINT_FIELD(TID))
- printed += fprintf(fp, "%5d ", sample->tid);
+ printed += fprintf(fp, "%7d ", sample->tid);
if (PRINT_FIELD(CPU)) {
if (latency_format)
@@ -910,7 +919,6 @@ static int perf_sample__fprintf_brstack(struct perf_sample *sample,
{
struct branch_stack *br = sample->branch_stack;
struct branch_entry *entries = perf_sample__branch_entries(sample);
- struct addr_location alf, alt;
u64 i, from, to;
int printed = 0;
@@ -921,26 +929,22 @@ static int perf_sample__fprintf_brstack(struct perf_sample *sample,
from = entries[i].from;
to = entries[i].to;
+ printed += fprintf(fp, " 0x%"PRIx64, from);
if (PRINT_FIELD(DSO)) {
- memset(&alf, 0, sizeof(alf));
- memset(&alt, 0, sizeof(alt));
+ struct addr_location alf, alt;
+
+ addr_location__init(&alf);
+ addr_location__init(&alt);
thread__find_map_fb(thread, sample->cpumode, from, &alf);
thread__find_map_fb(thread, sample->cpumode, to, &alt);
- }
- printed += fprintf(fp, " 0x%"PRIx64, from);
- if (PRINT_FIELD(DSO)) {
- printed += fprintf(fp, "(");
- printed += map__fprintf_dsoname(alf.map, fp);
- printed += fprintf(fp, ")");
- }
-
- printed += fprintf(fp, "/0x%"PRIx64, to);
- if (PRINT_FIELD(DSO)) {
- printed += fprintf(fp, "(");
- printed += map__fprintf_dsoname(alt.map, fp);
- printed += fprintf(fp, ")");
- }
+ printed += map__fprintf_dsoname_dsoff(alf.map, PRINT_FIELD(DSOFF), alf.addr, fp);
+ printed += fprintf(fp, "/0x%"PRIx64, to);
+ printed += map__fprintf_dsoname_dsoff(alt.map, PRINT_FIELD(DSOFF), alt.addr, fp);
+ addr_location__exit(&alt);
+ addr_location__exit(&alf);
+ } else
+ printed += fprintf(fp, "/0x%"PRIx64, to);
printed += print_bstack_flags(fp, entries + i);
}
@@ -954,7 +958,6 @@ static int perf_sample__fprintf_brstacksym(struct perf_sample *sample,
{
struct branch_stack *br = sample->branch_stack;
struct branch_entry *entries = perf_sample__branch_entries(sample);
- struct addr_location alf, alt;
u64 i, from, to;
int printed = 0;
@@ -962,9 +965,10 @@ static int perf_sample__fprintf_brstacksym(struct perf_sample *sample,
return 0;
for (i = 0; i < br->nr; i++) {
+ struct addr_location alf, alt;
- memset(&alf, 0, sizeof(alf));
- memset(&alt, 0, sizeof(alt));
+ addr_location__init(&alf);
+ addr_location__init(&alt);
from = entries[i].from;
to = entries[i].to;
@@ -972,19 +976,15 @@ static int perf_sample__fprintf_brstacksym(struct perf_sample *sample,
thread__find_symbol_fb(thread, sample->cpumode, to, &alt);
printed += symbol__fprintf_symname_offs(alf.sym, &alf, fp);
- if (PRINT_FIELD(DSO)) {
- printed += fprintf(fp, "(");
- printed += map__fprintf_dsoname(alf.map, fp);
- printed += fprintf(fp, ")");
- }
+ if (PRINT_FIELD(DSO))
+ printed += map__fprintf_dsoname_dsoff(alf.map, PRINT_FIELD(DSOFF), alf.addr, fp);
printed += fprintf(fp, "%c", '/');
printed += symbol__fprintf_symname_offs(alt.sym, &alt, fp);
- if (PRINT_FIELD(DSO)) {
- printed += fprintf(fp, "(");
- printed += map__fprintf_dsoname(alt.map, fp);
- printed += fprintf(fp, ")");
- }
+ if (PRINT_FIELD(DSO))
+ printed += map__fprintf_dsoname_dsoff(alt.map, PRINT_FIELD(DSOFF), alt.addr, fp);
printed += print_bstack_flags(fp, entries + i);
+ addr_location__exit(&alt);
+ addr_location__exit(&alf);
}
return printed;
@@ -996,7 +996,6 @@ static int perf_sample__fprintf_brstackoff(struct perf_sample *sample,
{
struct branch_stack *br = sample->branch_stack;
struct branch_entry *entries = perf_sample__branch_entries(sample);
- struct addr_location alf, alt;
u64 i, from, to;
int printed = 0;
@@ -1004,9 +1003,10 @@ static int perf_sample__fprintf_brstackoff(struct perf_sample *sample,
return 0;
for (i = 0; i < br->nr; i++) {
+ struct addr_location alf, alt;
- memset(&alf, 0, sizeof(alf));
- memset(&alt, 0, sizeof(alt));
+ addr_location__init(&alf);
+ addr_location__init(&alt);
from = entries[i].from;
to = entries[i].to;
@@ -1019,18 +1019,14 @@ static int perf_sample__fprintf_brstackoff(struct perf_sample *sample,
to = map__dso_map_ip(alt.map, to);
printed += fprintf(fp, " 0x%"PRIx64, from);
- if (PRINT_FIELD(DSO)) {
- printed += fprintf(fp, "(");
- printed += map__fprintf_dsoname(alf.map, fp);
- printed += fprintf(fp, ")");
- }
+ if (PRINT_FIELD(DSO))
+ printed += map__fprintf_dsoname_dsoff(alf.map, PRINT_FIELD(DSOFF), alf.addr, fp);
printed += fprintf(fp, "/0x%"PRIx64, to);
- if (PRINT_FIELD(DSO)) {
- printed += fprintf(fp, "(");
- printed += map__fprintf_dsoname(alt.map, fp);
- printed += fprintf(fp, ")");
- }
+ if (PRINT_FIELD(DSO))
+ printed += map__fprintf_dsoname_dsoff(alt.map, PRINT_FIELD(DSOFF), alt.addr, fp);
printed += print_bstack_flags(fp, entries + i);
+ addr_location__exit(&alt);
+ addr_location__exit(&alf);
}
return printed;
@@ -1045,6 +1041,7 @@ static int grab_bb(u8 *buffer, u64 start, u64 end,
struct addr_location al;
bool kernel;
struct dso *dso;
+ int ret = 0;
if (!start || !end)
return 0;
@@ -1066,7 +1063,6 @@ static int grab_bb(u8 *buffer, u64 start, u64 end,
return -ENXIO;
}
- memset(&al, 0, sizeof(al));
if (end - start > MAXBB - MAXINSN) {
if (last)
pr_debug("\tbrstack does not reach to final jump (%" PRIx64 "-%" PRIx64 ")\n", start, end);
@@ -1075,13 +1071,14 @@ static int grab_bb(u8 *buffer, u64 start, u64 end,
return 0;
}
+ addr_location__init(&al);
if (!thread__find_map(thread, *cpumode, start, &al) || (dso = map__dso(al.map)) == NULL) {
pr_debug("\tcannot resolve %" PRIx64 "-%" PRIx64 "\n", start, end);
- return 0;
+ goto out;
}
if (dso->data.status == DSO_DATA_STATUS_ERROR) {
pr_debug("\tcannot resolve %" PRIx64 "-%" PRIx64 "\n", start, end);
- return 0;
+ goto out;
}
/* Load maps to ensure dso->is_64_bit has been updated */
@@ -1095,7 +1092,10 @@ static int grab_bb(u8 *buffer, u64 start, u64 end,
if (len <= 0)
pr_debug("\tcannot fetch code for block at %" PRIx64 "-%" PRIx64 "\n",
start, end);
- return len;
+ ret = len;
+out:
+ addr_location__exit(&al);
+ return ret;
}
static int map__fprintf_srccode(struct map *map, u64 addr, FILE *fp, struct srccode_state *state)
@@ -1146,14 +1146,16 @@ static int print_srccode(struct thread *thread, u8 cpumode, uint64_t addr)
struct addr_location al;
int ret = 0;
- memset(&al, 0, sizeof(al));
+ addr_location__init(&al);
thread__find_map(thread, cpumode, addr, &al);
if (!al.map)
- return 0;
+ goto out;
ret = map__fprintf_srccode(al.map, al.addr, stdout,
- &thread->srccode_state);
+ thread__srccode_state(thread));
if (ret)
ret += printf("\n");
+out:
+ addr_location__exit(&al);
return ret;
}
@@ -1188,14 +1190,13 @@ static int ip__fprintf_sym(uint64_t addr, struct thread *thread,
struct perf_event_attr *attr, FILE *fp)
{
struct addr_location al;
- int off, printed = 0;
-
- memset(&al, 0, sizeof(al));
+ int off, printed = 0, ret = 0;
+ addr_location__init(&al);
thread__find_map(thread, cpumode, addr, &al);
if ((*lastsym) && al.addr >= (*lastsym)->start && al.addr < (*lastsym)->end)
- return 0;
+ goto out;
al.cpu = cpu;
al.sym = NULL;
@@ -1203,7 +1204,7 @@ static int ip__fprintf_sym(uint64_t addr, struct thread *thread,
al.sym = map__find_symbol(al.map, al.addr);
if (!al.sym)
- return 0;
+ goto out;
if (al.addr < al.sym->end)
off = al.addr - al.sym->start;
@@ -1218,7 +1219,10 @@ static int ip__fprintf_sym(uint64_t addr, struct thread *thread,
printed += fprintf(fp, "\n");
*lastsym = al.sym;
- return printed;
+ ret = printed;
+out:
+ addr_location__exit(&al);
+ return ret;
}
static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
@@ -1380,6 +1384,7 @@ static int perf_sample__fprintf_addr(struct perf_sample *sample,
struct addr_location al;
int printed = fprintf(fp, "%16" PRIx64, sample->addr);
+ addr_location__init(&al);
if (!sample_addr_correlates_sym(attr))
goto out;
@@ -1393,12 +1398,10 @@ static int perf_sample__fprintf_addr(struct perf_sample *sample,
printed += symbol__fprintf_symname(al.sym, fp);
}
- if (PRINT_FIELD(DSO)) {
- printed += fprintf(fp, " (");
- printed += map__fprintf_dsoname(al.map, fp);
- printed += fprintf(fp, ")");
- }
+ if (PRINT_FIELD(DSO))
+ printed += map__fprintf_dsoname_dsoff(al.map, PRINT_FIELD(DSOFF), al.addr, fp);
out:
+ addr_location__exit(&al);
return printed;
}
@@ -1451,7 +1454,7 @@ static int perf_sample__fprintf_callindent(struct perf_sample *sample,
* The 'return' has already been popped off the stack so the depth has
* to be adjusted to match the 'call'.
*/
- if (thread->ts && sample->flags & PERF_IP_FLAG_RETURN)
+ if (thread__ts(thread) && sample->flags & PERF_IP_FLAG_RETURN)
depth += 1;
name = resolve_branch_sym(sample, evsel, thread, al, addr_al, &ip);
@@ -1554,11 +1557,13 @@ static int perf_sample__fprintf_bts(struct perf_sample *sample,
unsigned int print_opts = output[type].print_ip_opts;
struct callchain_cursor *cursor = NULL;
- if (symbol_conf.use_callchain && sample->callchain &&
- thread__resolve_callchain(al->thread, &callchain_cursor, evsel,
- sample, NULL, NULL, scripting_max_stack) == 0)
- cursor = &callchain_cursor;
-
+ if (symbol_conf.use_callchain && sample->callchain) {
+ cursor = get_tls_callchain_cursor();
+ if (thread__resolve_callchain(al->thread, cursor, evsel,
+ sample, NULL, NULL,
+ scripting_max_stack))
+ cursor = NULL;
+ }
if (cursor == NULL) {
printed += fprintf(fp, " ");
if (print_opts & EVSEL__PRINT_SRCLINE) {
@@ -1589,7 +1594,7 @@ static int perf_sample__fprintf_bts(struct perf_sample *sample,
printed += fprintf(fp, "\n");
if (PRINT_FIELD(SRCCODE)) {
int ret = map__fprintf_srccode(al->map, al->addr, stdout,
- &thread->srccode_state);
+ thread__srccode_state(thread));
if (ret) {
printed += ret;
printed += printf("\n");
@@ -2098,9 +2103,9 @@ static bool show_event(struct perf_sample *sample,
if (!symbol_conf.graph_function)
return true;
- if (thread->filter) {
- if (depth <= thread->filter_entry_depth) {
- thread->filter = false;
+ if (thread__filter(thread)) {
+ if (depth <= thread__filter_entry_depth(thread)) {
+ thread__set_filter(thread, false);
return false;
}
return true;
@@ -2117,8 +2122,8 @@ static bool show_event(struct perf_sample *sample,
while (*s) {
unsigned len = strcspn(s, ",");
if (nlen == len && !strncmp(name, s, len)) {
- thread->filter = true;
- thread->filter_entry_depth = depth;
+ thread__set_filter(thread, true);
+ thread__set_filter_entry_depth(thread, depth);
return true;
}
s += len;
@@ -2194,17 +2199,30 @@ static void process_event(struct perf_script *script,
if (PRINT_FIELD(RETIRE_LAT))
fprintf(fp, "%16" PRIu16, sample->retire_lat);
+ if (PRINT_FIELD(CGROUP)) {
+ const char *cgrp_name;
+ struct cgroup *cgrp = cgroup__find(machine->env,
+ sample->cgroup);
+ if (cgrp != NULL)
+ cgrp_name = cgrp->name;
+ else
+ cgrp_name = "unknown";
+ fprintf(fp, " %s", cgrp_name);
+ }
+
if (PRINT_FIELD(IP)) {
struct callchain_cursor *cursor = NULL;
if (script->stitch_lbr)
- al->thread->lbr_stitch_enable = true;
-
- if (symbol_conf.use_callchain && sample->callchain &&
- thread__resolve_callchain(al->thread, &callchain_cursor, evsel,
- sample, NULL, NULL, scripting_max_stack) == 0)
- cursor = &callchain_cursor;
-
+ thread__set_lbr_stitch_enable(al->thread, true);
+
+ if (symbol_conf.use_callchain && sample->callchain) {
+ cursor = get_tls_callchain_cursor();
+ if (thread__resolve_callchain(al->thread, cursor, evsel,
+ sample, NULL, NULL,
+ scripting_max_stack))
+ cursor = NULL;
+ }
fputc(cursor ? '\n' : ' ', fp);
sample__fprintf_sym(sample, al, 0, output[type].print_ip_opts, cursor,
symbol_conf.bt_stop_list, fp);
@@ -2236,24 +2254,13 @@ static void process_event(struct perf_script *script,
if (PRINT_FIELD(CODE_PAGE_SIZE))
fprintf(fp, " %s", get_page_size_name(sample->code_page_size, str));
- if (PRINT_FIELD(CGROUP)) {
- const char *cgrp_name;
- struct cgroup *cgrp = cgroup__find(machine->env,
- sample->cgroup);
- if (cgrp != NULL)
- cgrp_name = cgrp->name;
- else
- cgrp_name = "unknown";
- fprintf(fp, " %s", cgrp_name);
- }
-
perf_sample__fprintf_ipc(sample, attr, fp);
fprintf(fp, "\n");
if (PRINT_FIELD(SRCCODE)) {
if (map__fprintf_srccode(al->map, al->addr, stdout,
- &thread->srccode_state))
+ thread__srccode_state(thread)))
printf("\n");
}
@@ -2350,8 +2357,8 @@ static int process_sample_event(struct perf_tool *tool,
int ret = 0;
/* Set thread to NULL to indicate addr_al and al are not initialized */
- addr_al.thread = NULL;
- al.thread = NULL;
+ addr_location__init(&al);
+ addr_location__init(&addr_al);
ret = dlfilter__filter_event_early(dlfilter, event, sample, evsel, machine, &al, &addr_al);
if (ret) {
@@ -2417,11 +2424,14 @@ static int process_sample_event(struct perf_tool *tool,
}
out_put:
- if (al.thread)
- addr_location__put(&al);
+ addr_location__exit(&addr_al);
+ addr_location__exit(&al);
return ret;
}
+// Used when scr->per_event_dump is not set
+static struct evsel_script es_stdout;
+
static int process_attr(struct perf_tool *tool, union perf_event *event,
struct evlist **pevlist)
{
@@ -2430,7 +2440,6 @@ static int process_attr(struct perf_tool *tool, union perf_event *event,
struct evsel *evsel, *pos;
u64 sample_type;
int err;
- static struct evsel_script *es;
err = perf_event__process_attr(tool, event, pevlist);
if (err)
@@ -2440,14 +2449,13 @@ static int process_attr(struct perf_tool *tool, union perf_event *event,
evsel = evlist__last(*pevlist);
if (!evsel->priv) {
- if (scr->per_event_dump) {
+ if (scr->per_event_dump) {
evsel->priv = evsel_script__new(evsel, scr->session->data);
- } else {
- es = zalloc(sizeof(*es));
- if (!es)
+ if (!evsel->priv)
return -ENOMEM;
- es->fp = stdout;
- evsel->priv = es;
+ } else { // Replicate what is done in perf_script__setup_per_event_dump()
+ es_stdout.fp = stdout;
+ evsel->priv = &es_stdout;
}
}
@@ -2753,7 +2761,6 @@ out_err_fclose:
static int perf_script__setup_per_event_dump(struct perf_script *script)
{
struct evsel *evsel;
- static struct evsel_script es_stdout;
if (script->per_event_dump)
return perf_script__fopen_per_event_dump(script);
@@ -3311,14 +3318,21 @@ static int list_available_scripts(const struct option *opt __maybe_unused,
int unset __maybe_unused)
{
struct dirent *script_dirent, *lang_dirent;
- char scripts_path[MAXPATHLEN];
+ char *buf, *scripts_path, *script_path, *lang_path, *first_half;
DIR *scripts_dir, *lang_dir;
- char script_path[MAXPATHLEN];
- char lang_path[MAXPATHLEN];
struct script_desc *desc;
- char first_half[BUFSIZ];
char *script_root;
+ buf = malloc(3 * MAXPATHLEN + BUFSIZ);
+ if (!buf) {
+ pr_err("malloc failed\n");
+ exit(-1);
+ }
+ scripts_path = buf;
+ script_path = buf + MAXPATHLEN;
+ lang_path = buf + 2 * MAXPATHLEN;
+ first_half = buf + 3 * MAXPATHLEN;
+
snprintf(scripts_path, MAXPATHLEN, "%s/scripts", get_argv_exec_path());
scripts_dir = opendir(scripts_path);
@@ -3327,6 +3341,7 @@ static int list_available_scripts(const struct option *opt __maybe_unused,
"open(%s) failed.\n"
"Check \"PERF_EXEC_PATH\" env to set scripts dir.\n",
scripts_path);
+ free(buf);
exit(-1);
}
@@ -3357,6 +3372,7 @@ static int list_available_scripts(const struct option *opt __maybe_unused,
desc->half_liner ? desc->half_liner : "");
}
+ free(buf);
exit(0);
}
@@ -3883,7 +3899,7 @@ int cmd_script(int argc, const char **argv)
"comma separated output fields prepend with 'type:'. "
"+field to add and -field to remove."
"Valid types: hw,sw,trace,raw,synth. "
- "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
+ "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,dsoff"
"addr,symoff,srcline,period,iregs,uregs,brstack,"
"brstacksym,flags,data_src,weight,bpf-output,brstackinsn,"
"brstackinsnlen,brstackoff,callindent,insn,insnlen,synth,"
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index b9ad32f21e57..07b48f6df48e 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -44,10 +44,10 @@
#include "util/cgroup.h"
#include <subcmd/parse-options.h>
#include "util/parse-events.h"
+#include "util/pmus.h"
#include "util/pmu.h"
#include "util/event.h"
#include "util/evlist.h"
-#include "util/evlist-hybrid.h"
#include "util/evsel.h"
#include "util/debug.h"
#include "util/color.h"
@@ -69,7 +69,6 @@
#include "util/pfm.h"
#include "util/bpf_counter.h"
#include "util/iostat.h"
-#include "util/pmu-hybrid.h"
#include "util/util.h"
#include "asm/bug.h"
@@ -101,6 +100,10 @@
static void print_counters(struct timespec *ts, int argc, const char **argv);
static struct evlist *evsel_list;
+static struct parse_events_option_args parse_events_option_args = {
+ .evlistp = &evsel_list,
+};
+
static bool all_counters_use_bpf = true;
static struct target target = {
@@ -138,6 +141,7 @@ struct perf_stat {
struct perf_cpu_map *cpus;
struct perf_thread_map *threads;
enum aggr_mode aggr_mode;
+ u32 aggr_level;
};
static struct perf_stat perf_stat;
@@ -147,6 +151,7 @@ static volatile sig_atomic_t done = 0;
static struct perf_stat_config stat_config = {
.aggr_mode = AGGR_GLOBAL,
+ .aggr_level = MAX_CACHE_LVL + 1,
.scale = true,
.unit_width = 4, /* strlen("unit") */
.run_count = 1,
@@ -183,9 +188,6 @@ static void evlist__check_cpu_maps(struct evlist *evlist)
{
struct evsel *evsel, *warned_leader = NULL;
- if (evlist__has_hybrid(evlist))
- evlist__warn_hybrid_group(evlist);
-
evlist__for_each_entry(evlist, evsel) {
struct evsel *leader = evsel__leader(evsel);
@@ -723,6 +725,8 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
all_counters_use_bpf = false;
}
+ evlist__reset_aggr_stats(evsel_list);
+
evlist__for_each_cpu(evlist_cpu_itr, evsel_list, affinity) {
counter = evlist_cpu_itr.evsel;
@@ -1085,10 +1089,11 @@ static int parse_stat_cgroups(const struct option *opt,
return parse_cgroups(opt, str, unset);
}
-static int parse_hybrid_type(const struct option *opt,
+static int parse_cputype(const struct option *opt,
const char *str,
int unset __maybe_unused)
{
+ const struct perf_pmu *pmu;
struct evlist *evlist = *(struct evlist **)opt->value;
if (!list_empty(&evlist->core.entries)) {
@@ -1096,19 +1101,69 @@ static int parse_hybrid_type(const struct option *opt,
return -1;
}
- evlist->hybrid_pmu_name = perf_pmu__hybrid_type_to_pmu(str);
- if (!evlist->hybrid_pmu_name) {
+ pmu = perf_pmus__pmu_for_pmu_filter(str);
+ if (!pmu) {
fprintf(stderr, "--cputype %s is not supported!\n", str);
return -1;
}
+ parse_events_option_args.pmu_filter = pmu->name;
+
+ return 0;
+}
+
+static int parse_cache_level(const struct option *opt,
+ const char *str,
+ int unset __maybe_unused)
+{
+ int level;
+ u32 *aggr_mode = (u32 *)opt->value;
+ u32 *aggr_level = (u32 *)opt->data;
+
+ /*
+ * If no string is specified, aggregate based on the topology of
+ * Last Level Cache (LLC). Since the LLC level can change from
+ * architecture to architecture, set level greater than
+ * MAX_CACHE_LVL which will be interpreted as LLC.
+ */
+ if (str == NULL) {
+ level = MAX_CACHE_LVL + 1;
+ goto out;
+ }
+
+ /*
+ * The format to specify cache level is LX or lX where X is the
+ * cache level.
+ */
+ if (strlen(str) != 2 || (str[0] != 'l' && str[0] != 'L')) {
+ pr_err("Cache level must be of form L[1-%d], or l[1-%d]\n",
+ MAX_CACHE_LVL,
+ MAX_CACHE_LVL);
+ return -EINVAL;
+ }
+
+ level = atoi(&str[1]);
+ if (level < 1) {
+ pr_err("Cache level must be of form L[1-%d], or l[1-%d]\n",
+ MAX_CACHE_LVL,
+ MAX_CACHE_LVL);
+ return -EINVAL;
+ }
+ if (level > MAX_CACHE_LVL) {
+ pr_err("perf only supports max cache level of %d.\n"
+ "Consider increasing MAX_CACHE_LVL\n", MAX_CACHE_LVL);
+ return -EINVAL;
+ }
+out:
+ *aggr_mode = AGGR_CACHE;
+ *aggr_level = level;
return 0;
}
static struct option stat_options[] = {
OPT_BOOLEAN('T', "transaction", &transaction_run,
"hardware transaction statistics"),
- OPT_CALLBACK('e', "event", &evsel_list, "event",
+ OPT_CALLBACK('e', "event", &parse_events_option_args, "event",
"event selector. use 'perf list' to list available events",
parse_events_option),
OPT_CALLBACK(0, "filter", &evsel_list, "filter",
@@ -1182,6 +1237,9 @@ static struct option stat_options[] = {
"aggregate counts per processor socket", AGGR_SOCKET),
OPT_SET_UINT(0, "per-die", &stat_config.aggr_mode,
"aggregate counts per processor die", AGGR_DIE),
+ OPT_CALLBACK_OPTARG(0, "per-cache", &stat_config.aggr_mode, &stat_config.aggr_level,
+ "cache level", "aggregate count at this cache level (Default: LLC)",
+ parse_cache_level),
OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode,
"aggregate counts per physical processor core", AGGR_CORE),
OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode,
@@ -1226,7 +1284,7 @@ static struct option stat_options[] = {
OPT_CALLBACK(0, "cputype", &evsel_list, "hybrid cpu type",
"Only enable events on applying cpu with this type "
"for hybrid platform (e.g. core or atom)",
- parse_hybrid_type),
+ parse_cputype),
#ifdef HAVE_LIBPFM
OPT_CALLBACK(0, "pfm-events", &evsel_list, "event",
"libpfm4 event selector. use 'perf list' to list available events",
@@ -1243,8 +1301,132 @@ static struct option stat_options[] = {
OPT_END()
};
+/**
+ * Calculate the cache instance ID from the map in
+ * /sys/devices/system/cpu/cpuX/cache/indexY/shared_cpu_list
+ * Cache instance ID is the first CPU reported in the shared_cpu_list file.
+ */
+static int cpu__get_cache_id_from_map(struct perf_cpu cpu, char *map)
+{
+ int id;
+ struct perf_cpu_map *cpu_map = perf_cpu_map__new(map);
+
+ /*
+ * If the map contains no CPU, consider the current CPU to
+ * be the first online CPU in the cache domain else use the
+ * first online CPU of the cache domain as the ID.
+ */
+ if (perf_cpu_map__empty(cpu_map))
+ id = cpu.cpu;
+ else
+ id = perf_cpu_map__cpu(cpu_map, 0).cpu;
+
+ /* Free the perf_cpu_map used to find the cache ID */
+ perf_cpu_map__put(cpu_map);
+
+ return id;
+}
+
+/**
+ * cpu__get_cache_id - Returns 0 if successful in populating the
+ * cache level and cache id. Cache level is read from
+ * /sys/devices/system/cpu/cpuX/cache/indexY/level where as cache instance ID
+ * is the first CPU reported by
+ * /sys/devices/system/cpu/cpuX/cache/indexY/shared_cpu_list
+ */
+static int cpu__get_cache_details(struct perf_cpu cpu, struct perf_cache *cache)
+{
+ int ret = 0;
+ u32 cache_level = stat_config.aggr_level;
+ struct cpu_cache_level caches[MAX_CACHE_LVL];
+ u32 i = 0, caches_cnt = 0;
+
+ cache->cache_lvl = (cache_level > MAX_CACHE_LVL) ? 0 : cache_level;
+ cache->cache = -1;
+
+ ret = build_caches_for_cpu(cpu.cpu, caches, &caches_cnt);
+ if (ret) {
+ /*
+ * If caches_cnt is not 0, cpu_cache_level data
+ * was allocated when building the topology.
+ * Free the allocated data before returning.
+ */
+ if (caches_cnt)
+ goto free_caches;
+
+ return ret;
+ }
+
+ if (!caches_cnt)
+ return -1;
+
+ /*
+ * Save the data for the highest level if no
+ * level was specified by the user.
+ */
+ if (cache_level > MAX_CACHE_LVL) {
+ int max_level_index = 0;
+
+ for (i = 1; i < caches_cnt; ++i) {
+ if (caches[i].level > caches[max_level_index].level)
+ max_level_index = i;
+ }
+
+ cache->cache_lvl = caches[max_level_index].level;
+ cache->cache = cpu__get_cache_id_from_map(cpu, caches[max_level_index].map);
+
+ /* Reset i to 0 to free entire caches[] */
+ i = 0;
+ goto free_caches;
+ }
+
+ for (i = 0; i < caches_cnt; ++i) {
+ if (caches[i].level == cache_level) {
+ cache->cache_lvl = cache_level;
+ cache->cache = cpu__get_cache_id_from_map(cpu, caches[i].map);
+ }
+
+ cpu_cache_level__free(&caches[i]);
+ }
+
+free_caches:
+ /*
+ * Free all the allocated cpu_cache_level data.
+ */
+ while (i < caches_cnt)
+ cpu_cache_level__free(&caches[i++]);
+
+ return ret;
+}
+
+/**
+ * aggr_cpu_id__cache - Create an aggr_cpu_id with cache instache ID, cache
+ * level, die and socket populated with the cache instache ID, cache level,
+ * die and socket for cpu. The function signature is compatible with
+ * aggr_cpu_id_get_t.
+ */
+static struct aggr_cpu_id aggr_cpu_id__cache(struct perf_cpu cpu, void *data)
+{
+ int ret;
+ struct aggr_cpu_id id;
+ struct perf_cache cache;
+
+ id = aggr_cpu_id__die(cpu, data);
+ if (aggr_cpu_id__is_empty(&id))
+ return id;
+
+ ret = cpu__get_cache_details(cpu, &cache);
+ if (ret)
+ return id;
+
+ id.cache_lvl = cache.cache_lvl;
+ id.cache = cache.cache;
+ return id;
+}
+
static const char *const aggr_mode__string[] = {
[AGGR_CORE] = "core",
+ [AGGR_CACHE] = "cache",
[AGGR_DIE] = "die",
[AGGR_GLOBAL] = "global",
[AGGR_NODE] = "node",
@@ -1266,6 +1448,12 @@ static struct aggr_cpu_id perf_stat__get_die(struct perf_stat_config *config __m
return aggr_cpu_id__die(cpu, /*data=*/NULL);
}
+static struct aggr_cpu_id perf_stat__get_cache_id(struct perf_stat_config *config __maybe_unused,
+ struct perf_cpu cpu)
+{
+ return aggr_cpu_id__cache(cpu, /*data=*/NULL);
+}
+
static struct aggr_cpu_id perf_stat__get_core(struct perf_stat_config *config __maybe_unused,
struct perf_cpu cpu)
{
@@ -1318,6 +1506,12 @@ static struct aggr_cpu_id perf_stat__get_die_cached(struct perf_stat_config *con
return perf_stat__get_aggr(config, perf_stat__get_die, cpu);
}
+static struct aggr_cpu_id perf_stat__get_cache_id_cached(struct perf_stat_config *config,
+ struct perf_cpu cpu)
+{
+ return perf_stat__get_aggr(config, perf_stat__get_cache_id, cpu);
+}
+
static struct aggr_cpu_id perf_stat__get_core_cached(struct perf_stat_config *config,
struct perf_cpu cpu)
{
@@ -1349,6 +1543,8 @@ static aggr_cpu_id_get_t aggr_mode__get_aggr(enum aggr_mode aggr_mode)
return aggr_cpu_id__socket;
case AGGR_DIE:
return aggr_cpu_id__die;
+ case AGGR_CACHE:
+ return aggr_cpu_id__cache;
case AGGR_CORE:
return aggr_cpu_id__core;
case AGGR_NODE:
@@ -1372,6 +1568,8 @@ static aggr_get_id_t aggr_mode__get_id(enum aggr_mode aggr_mode)
return perf_stat__get_socket_cached;
case AGGR_DIE:
return perf_stat__get_die_cached;
+ case AGGR_CACHE:
+ return perf_stat__get_cache_id_cached;
case AGGR_CORE:
return perf_stat__get_core_cached;
case AGGR_NODE:
@@ -1398,7 +1596,7 @@ static int perf_stat_init_aggr_mode(void)
stat_config.aggr_map = cpu_aggr_map__new(evsel_list->core.user_requested_cpus,
get_id, /*data=*/NULL, needs_sort);
if (!stat_config.aggr_map) {
- pr_err("cannot build %s map", aggr_mode__string[stat_config.aggr_mode]);
+ pr_err("cannot build %s map\n", aggr_mode__string[stat_config.aggr_mode]);
return -1;
}
stat_config.aggr_get_id = aggr_mode__get_id(stat_config.aggr_mode);
@@ -1484,6 +1682,60 @@ static struct aggr_cpu_id perf_env__get_die_aggr_by_cpu(struct perf_cpu cpu, voi
return id;
}
+static void perf_env__get_cache_id_for_cpu(struct perf_cpu cpu, struct perf_env *env,
+ u32 cache_level, struct aggr_cpu_id *id)
+{
+ int i;
+ int caches_cnt = env->caches_cnt;
+ struct cpu_cache_level *caches = env->caches;
+
+ id->cache_lvl = (cache_level > MAX_CACHE_LVL) ? 0 : cache_level;
+ id->cache = -1;
+
+ if (!caches_cnt)
+ return;
+
+ for (i = caches_cnt - 1; i > -1; --i) {
+ struct perf_cpu_map *cpu_map;
+ int map_contains_cpu;
+
+ /*
+ * If user has not specified a level, find the fist level with
+ * the cpu in the map. Since building the map is expensive, do
+ * this only if levels match.
+ */
+ if (cache_level <= MAX_CACHE_LVL && caches[i].level != cache_level)
+ continue;
+
+ cpu_map = perf_cpu_map__new(caches[i].map);
+ map_contains_cpu = perf_cpu_map__idx(cpu_map, cpu);
+ perf_cpu_map__put(cpu_map);
+
+ if (map_contains_cpu != -1) {
+ id->cache_lvl = caches[i].level;
+ id->cache = cpu__get_cache_id_from_map(cpu, caches[i].map);
+ return;
+ }
+ }
+}
+
+static struct aggr_cpu_id perf_env__get_cache_aggr_by_cpu(struct perf_cpu cpu,
+ void *data)
+{
+ struct perf_env *env = data;
+ struct aggr_cpu_id id = aggr_cpu_id__empty();
+
+ if (cpu.cpu != -1) {
+ u32 cache_level = (perf_stat.aggr_level) ?: stat_config.aggr_level;
+
+ id.socket = env->cpu[cpu.cpu].socket_id;
+ id.die = env->cpu[cpu.cpu].die_id;
+ perf_env__get_cache_id_for_cpu(cpu, env, cache_level, &id);
+ }
+
+ return id;
+}
+
static struct aggr_cpu_id perf_env__get_core_aggr_by_cpu(struct perf_cpu cpu, void *data)
{
struct perf_env *env = data;
@@ -1552,6 +1804,12 @@ static struct aggr_cpu_id perf_stat__get_die_file(struct perf_stat_config *confi
return perf_env__get_die_aggr_by_cpu(cpu, &perf_stat.session->header.env);
}
+static struct aggr_cpu_id perf_stat__get_cache_file(struct perf_stat_config *config __maybe_unused,
+ struct perf_cpu cpu)
+{
+ return perf_env__get_cache_aggr_by_cpu(cpu, &perf_stat.session->header.env);
+}
+
static struct aggr_cpu_id perf_stat__get_core_file(struct perf_stat_config *config __maybe_unused,
struct perf_cpu cpu)
{
@@ -1583,6 +1841,8 @@ static aggr_cpu_id_get_t aggr_mode__get_aggr_file(enum aggr_mode aggr_mode)
return perf_env__get_socket_aggr_by_cpu;
case AGGR_DIE:
return perf_env__get_die_aggr_by_cpu;
+ case AGGR_CACHE:
+ return perf_env__get_cache_aggr_by_cpu;
case AGGR_CORE:
return perf_env__get_core_aggr_by_cpu;
case AGGR_NODE:
@@ -1606,6 +1866,8 @@ static aggr_get_id_t aggr_mode__get_id_file(enum aggr_mode aggr_mode)
return perf_stat__get_socket_file;
case AGGR_DIE:
return perf_stat__get_die_file;
+ case AGGR_CACHE:
+ return perf_stat__get_cache_file;
case AGGR_CORE:
return perf_stat__get_core_file;
case AGGR_NODE:
@@ -1650,7 +1912,7 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
stat_config.aggr_map = cpu_aggr_map__new(evsel_list->core.user_requested_cpus,
get_id, env, needs_sort);
if (!stat_config.aggr_map) {
- pr_err("cannot build %s map", aggr_mode__string[stat_config.aggr_mode]);
+ pr_err("cannot build %s map\n", aggr_mode__string[stat_config.aggr_mode]);
return -1;
}
stat_config.aggr_get_id = aggr_mode__get_id_file(stat_config.aggr_mode);
@@ -1777,6 +2039,7 @@ static int add_default_attributes(void)
};
struct perf_event_attr default_null_attrs[] = {};
+ const char *pmu = parse_events_option_args.pmu_filter ?: "all";
/* Set attrs if no event is selected and !null_run: */
if (stat_config.null_run)
@@ -1788,11 +2051,11 @@ static int add_default_attributes(void)
* will use this approach. To determine transaction support
* on an architecture test for such a metric name.
*/
- if (!metricgroup__has_metric("transaction")) {
- pr_err("Missing transaction metrics");
+ if (!metricgroup__has_metric(pmu, "transaction")) {
+ pr_err("Missing transaction metrics\n");
return -1;
}
- return metricgroup__parse_groups(evsel_list, "transaction",
+ return metricgroup__parse_groups(evsel_list, pmu, "transaction",
stat_config.metric_no_group,
stat_config.metric_no_merge,
stat_config.metric_no_threshold,
@@ -1805,7 +2068,7 @@ static int add_default_attributes(void)
int smi;
if (sysfs__read_int(FREEZE_ON_SMI_PATH, &smi) < 0) {
- pr_err("freeze_on_smi is not supported.");
+ pr_err("freeze_on_smi is not supported.\n");
return -1;
}
@@ -1817,15 +2080,15 @@ static int add_default_attributes(void)
smi_reset = true;
}
- if (!metricgroup__has_metric("smi")) {
- pr_err("Missing smi metrics");
+ if (!metricgroup__has_metric(pmu, "smi")) {
+ pr_err("Missing smi metrics\n");
return -1;
}
if (!force_metric_only)
stat_config.metric_only = true;
- return metricgroup__parse_groups(evsel_list, "smi",
+ return metricgroup__parse_groups(evsel_list, pmu, "smi",
stat_config.metric_no_group,
stat_config.metric_no_merge,
stat_config.metric_no_threshold,
@@ -1843,7 +2106,7 @@ static int add_default_attributes(void)
if (!max_level) {
pr_err("Topdown requested but the topdown metric groups aren't present.\n"
- "(See perf list the metric groups have names like TopdownL1)");
+ "(See perf list the metric groups have names like TopdownL1)\n");
return -1;
}
if (stat_config.topdown_level > max_level) {
@@ -1858,7 +2121,8 @@ static int add_default_attributes(void)
"Please print the result regularly, e.g. -I1000\n");
}
str[8] = stat_config.topdown_level + '0';
- if (metricgroup__parse_groups(evsel_list, str,
+ if (metricgroup__parse_groups(evsel_list,
+ pmu, str,
/*metric_no_group=*/false,
/*metric_no_merge=*/false,
/*metric_no_threshold=*/true,
@@ -1878,11 +2142,11 @@ static int add_default_attributes(void)
if (evlist__add_default_attrs(evsel_list, default_attrs0) < 0)
return -1;
- if (pmu_have_event("cpu", "stalled-cycles-frontend")) {
+ if (perf_pmus__have_event("cpu", "stalled-cycles-frontend")) {
if (evlist__add_default_attrs(evsel_list, frontend_attrs) < 0)
return -1;
}
- if (pmu_have_event("cpu", "stalled-cycles-backend")) {
+ if (perf_pmus__have_event("cpu", "stalled-cycles-backend")) {
if (evlist__add_default_attrs(evsel_list, backend_attrs) < 0)
return -1;
}
@@ -1892,19 +2156,14 @@ static int add_default_attributes(void)
* Add TopdownL1 metrics if they exist. To minimize
* multiplexing, don't request threshold computation.
*/
- /*
- * TODO: TopdownL1 is disabled on hybrid CPUs to avoid a crashes
- * caused by exposing latent bugs. This is fixed properly in:
- * https://lore.kernel.org/lkml/bff481ba-e60a-763f-0aa0-3ee53302c480@linux.intel.com/
- */
- if (metricgroup__has_metric("TopdownL1") && !perf_pmu__has_hybrid()) {
+ if (metricgroup__has_metric(pmu, "Default")) {
struct evlist *metric_evlist = evlist__new();
struct evsel *metric_evsel;
if (!metric_evlist)
return -1;
- if (metricgroup__parse_groups(metric_evlist, "TopdownL1",
+ if (metricgroup__parse_groups(metric_evlist, pmu, "Default",
/*metric_no_group=*/false,
/*metric_no_merge=*/false,
/*metric_no_threshold=*/true,
@@ -1915,6 +2174,7 @@ static int add_default_attributes(void)
evlist__for_each_entry(metric_evlist, metric_evsel) {
metric_evsel->skippable = true;
+ metric_evsel->default_metricgroup = true;
}
evlist__splice_list_tail(evsel_list, &metric_evlist->core.entries);
evlist__delete(metric_evlist);
@@ -2124,7 +2384,8 @@ static struct perf_stat perf_stat = {
.stat = perf_event__process_stat_event,
.stat_round = process_stat_round_event,
},
- .aggr_mode = AGGR_UNSET,
+ .aggr_mode = AGGR_UNSET,
+ .aggr_level = 0,
};
static int __cmd_report(int argc, const char **argv)
@@ -2136,6 +2397,10 @@ static int __cmd_report(int argc, const char **argv)
"aggregate counts per processor socket", AGGR_SOCKET),
OPT_SET_UINT(0, "per-die", &perf_stat.aggr_mode,
"aggregate counts per processor die", AGGR_DIE),
+ OPT_CALLBACK_OPTARG(0, "per-cache", &perf_stat.aggr_mode, &perf_stat.aggr_level,
+ "cache level",
+ "aggregate count at this cache level (Default: LLC)",
+ parse_cache_level),
OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode,
"aggregate counts per physical processor core", AGGR_CORE),
OPT_SET_UINT(0, "per-node", &perf_stat.aggr_mode,
@@ -2165,6 +2430,7 @@ static int __cmd_report(int argc, const char **argv)
perf_stat.session = session;
stat_config.output = stderr;
+ evlist__delete(evsel_list);
evsel_list = session->evlist;
ret = perf_session__process_events(session);
@@ -2428,7 +2694,9 @@ int cmd_stat(int argc, const char **argv)
* knowing the target is system-wide.
*/
if (metrics) {
- metricgroup__parse_groups(evsel_list, metrics,
+ const char *pmu = parse_events_option_args.pmu_filter ?: "all";
+
+ metricgroup__parse_groups(evsel_list, pmu, metrics,
stat_config.metric_no_group,
stat_config.metric_no_merge,
stat_config.metric_no_threshold,
@@ -2457,12 +2725,8 @@ int cmd_stat(int argc, const char **argv)
}
}
- if (evlist__fix_hybrid_cpus(evsel_list, target.cpu_list)) {
- pr_err("failed to use cpu list %s\n", target.cpu_list);
- goto out;
- }
+ evlist__warn_user_requested_cpus(evsel_list, target.cpu_list);
- target.hybrid = perf_pmu__has_hybrid();
if (evlist__create_maps(evsel_list, &target) < 0) {
if (target__has_task(&target)) {
pr_err("Problems finding threads of monitor\n");
@@ -2567,8 +2831,11 @@ int cmd_stat(int argc, const char **argv)
}
}
- if (!forever && status != -1 && (!interval || stat_config.summary))
+ if (!forever && status != -1 && (!interval || stat_config.summary)) {
+ if (stat_config.run_count > 1)
+ evlist__copy_res_stats(&stat_config, evsel_list);
print_counters(NULL, argc, argv);
+ }
evlist__finalize_ctlfd(evsel_list);
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index bce1cf896f9c..19d4542ea18a 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -315,10 +315,10 @@ static void pid_put_sample(struct timechart *tchart, int pid, int type,
#define MAX_CPUS 4096
-static u64 cpus_cstate_start_times[MAX_CPUS];
-static int cpus_cstate_state[MAX_CPUS];
-static u64 cpus_pstate_start_times[MAX_CPUS];
-static u64 cpus_pstate_state[MAX_CPUS];
+static u64 *cpus_cstate_start_times;
+static int *cpus_cstate_state;
+static u64 *cpus_pstate_start_times;
+static u64 *cpus_pstate_state;
static int process_comm_event(struct perf_tool *tool,
union perf_event *event,
@@ -498,7 +498,6 @@ static const char *cat_backtrace(union perf_event *event,
char *p = NULL;
size_t p_len;
u8 cpumode = PERF_RECORD_MISC_USER;
- struct addr_location tal;
struct ip_callchain *chain = sample->callchain;
FILE *f = open_memstream(&p, &p_len);
@@ -507,6 +506,7 @@ static const char *cat_backtrace(union perf_event *event,
return NULL;
}
+ addr_location__init(&al);
if (!chain)
goto exit;
@@ -518,6 +518,7 @@ static const char *cat_backtrace(union perf_event *event,
for (i = 0; i < chain->nr; i++) {
u64 ip;
+ struct addr_location tal;
if (callchain_param.order == ORDER_CALLEE)
ip = chain->ips[i];
@@ -544,20 +545,22 @@ static const char *cat_backtrace(union perf_event *event,
* Discard all.
*/
zfree(&p);
- goto exit_put;
+ goto exit;
}
continue;
}
+ addr_location__init(&tal);
tal.filtered = 0;
if (thread__find_symbol(al.thread, cpumode, ip, &tal))
fprintf(f, "..... %016" PRIx64 " %s\n", ip, tal.sym->name);
else
fprintf(f, "..... %016" PRIx64 "\n", ip);
+
+ addr_location__exit(&tal);
}
-exit_put:
- addr_location__put(&al);
exit:
+ addr_location__exit(&al);
fclose(f);
return p;
@@ -1981,12 +1984,34 @@ int cmd_timechart(int argc, const char **argv)
"perf timechart record [<options>]",
NULL
};
+ int ret;
+
+ cpus_cstate_start_times = calloc(MAX_CPUS, sizeof(*cpus_cstate_start_times));
+ if (!cpus_cstate_start_times)
+ return -ENOMEM;
+ cpus_cstate_state = calloc(MAX_CPUS, sizeof(*cpus_cstate_state));
+ if (!cpus_cstate_state) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ cpus_pstate_start_times = calloc(MAX_CPUS, sizeof(*cpus_pstate_start_times));
+ if (!cpus_pstate_start_times) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ cpus_pstate_state = calloc(MAX_CPUS, sizeof(*cpus_pstate_state));
+ if (!cpus_pstate_state) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
argc = parse_options_subcommand(argc, argv, timechart_options, timechart_subcommands,
timechart_usage, PARSE_OPT_STOP_AT_NON_OPTION);
if (tchart.power_only && tchart.tasks_only) {
pr_err("-P and -T options cannot be used at the same time.\n");
- return -1;
+ ret = -1;
+ goto out;
}
if (argc && strlen(argv[0]) > 2 && strstarts("record", argv[0])) {
@@ -1996,17 +2021,25 @@ int cmd_timechart(int argc, const char **argv)
if (tchart.power_only && tchart.tasks_only) {
pr_err("-P and -T options cannot be used at the same time.\n");
- return -1;
+ ret = -1;
+ goto out;
}
if (tchart.io_only)
- return timechart__io_record(argc, argv);
+ ret = timechart__io_record(argc, argv);
else
- return timechart__record(&tchart, argc, argv);
+ ret = timechart__record(&tchart, argc, argv);
+ goto out;
} else if (argc)
usage_with_options(timechart_usage, timechart_options);
setup_pager();
- return __cmd_timechart(&tchart, output_name);
+ ret = __cmd_timechart(&tchart, output_name);
+out:
+ zfree(&cpus_cstate_start_times);
+ zfree(&cpus_cstate_state);
+ zfree(&cpus_pstate_start_times);
+ zfree(&cpus_pstate_state);
+ return ret;
}
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index eb5740154bc0..ea8c7eca5eee 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -137,10 +137,10 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he)
}
notes = symbol__annotation(sym);
- mutex_lock(&notes->lock);
+ annotation__lock(notes);
if (!symbol__hists(sym, top->evlist->core.nr_entries)) {
- mutex_unlock(&notes->lock);
+ annotation__unlock(notes);
pr_err("Not enough memory for annotating '%s' symbol!\n",
sym->name);
sleep(1);
@@ -156,7 +156,7 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he)
pr_err("Couldn't annotate %s: %s\n", sym->name, msg);
}
- mutex_unlock(&notes->lock);
+ annotation__unlock(notes);
return err;
}
@@ -211,12 +211,12 @@ static void perf_top__record_precise_ip(struct perf_top *top,
notes = symbol__annotation(sym);
- if (!mutex_trylock(&notes->lock))
+ if (!annotation__trylock(notes))
return;
err = hist_entry__inc_addr_samples(he, sample, evsel, ip);
- mutex_unlock(&notes->lock);
+ annotation__unlock(notes);
if (unlikely(err)) {
/*
@@ -253,7 +253,7 @@ static void perf_top__show_details(struct perf_top *top)
symbol = he->ms.sym;
notes = symbol__annotation(symbol);
- mutex_lock(&notes->lock);
+ annotation__lock(notes);
symbol__calc_percent(symbol, evsel);
@@ -274,7 +274,7 @@ static void perf_top__show_details(struct perf_top *top)
if (more != 0)
printf("%d lines not displayed, maybe increase display entries [e]\n", more);
out_unlock:
- mutex_unlock(&notes->lock);
+ annotation__unlock(notes);
}
static void perf_top__resort_hists(struct perf_top *t)
@@ -392,7 +392,7 @@ static void prompt_percent(int *target, const char *msg)
static void perf_top__prompt_symbol(struct perf_top *top, const char *msg)
{
- char *buf = malloc(0), *p;
+ char *buf = NULL, *p;
struct hist_entry *syme = top->sym_filter_entry, *n, *found = NULL;
struct hists *hists = evsel__hists(top->sym_evsel);
struct rb_node *next;
@@ -773,11 +773,12 @@ static void perf_event__process_sample(struct perf_tool *tool,
if (event->header.misc & PERF_RECORD_MISC_EXACT_IP)
top->exact_samples++;
+ addr_location__init(&al);
if (machine__resolve(machine, &al, sample) < 0)
- return;
+ goto out;
if (top->stitch_lbr)
- al.thread->lbr_stitch_enable = true;
+ thread__set_lbr_stitch_enable(al.thread, true);
if (!machine->kptr_restrict_warned &&
symbol_conf.kptr_restrict &&
@@ -848,7 +849,8 @@ static void perf_event__process_sample(struct perf_tool *tool,
mutex_unlock(&hists->lock);
}
- addr_location__put(&al);
+out:
+ addr_location__exit(&al);
}
static void
@@ -1225,6 +1227,14 @@ static void init_process_thread(struct perf_top *top)
cond_init(&top->qe.cond);
}
+static void exit_process_thread(struct perf_top *top)
+{
+ ordered_events__free(&top->qe.data[0]);
+ ordered_events__free(&top->qe.data[1]);
+ mutex_destroy(&top->qe.mutex);
+ cond_destroy(&top->qe.cond);
+}
+
static int __cmd_top(struct perf_top *top)
{
struct record_opts *opts = &top->record_opts;
@@ -1355,6 +1365,7 @@ out_join_thread:
cond_signal(&top->qe.cond);
pthread_join(thread_process, NULL);
perf_set_singlethreaded();
+ exit_process_thread(top);
return ret;
}
@@ -1440,12 +1451,15 @@ int cmd_top(int argc, const char **argv)
.max_stack = sysctl__max_stack(),
.nr_threads_synthesize = UINT_MAX,
};
+ struct parse_events_option_args parse_events_option_args = {
+ .evlistp = &top.evlist,
+ };
bool branch_call_mode = false;
struct record_opts *opts = &top.record_opts;
struct target *target = &opts->target;
const char *disassembler_style = NULL, *objdump_path = NULL, *addr2line_path = NULL;
const struct option options[] = {
- OPT_CALLBACK('e', "event", &top.evlist, "event",
+ OPT_CALLBACK('e', "event", &parse_events_option_args, "event",
"event selector. use 'perf list' to list available events",
parse_events_option),
OPT_U64('c', "count", &opts->user_interval, "event period to sample"),
@@ -1650,10 +1664,12 @@ int cmd_top(int argc, const char **argv)
if (annotate_check_args(&top.annotation_opts) < 0)
goto out_delete_evlist;
- if (!top.evlist->core.nr_entries &&
- evlist__add_default(top.evlist) < 0) {
- pr_err("Not enough memory for event selector list\n");
- goto out_delete_evlist;
+ if (!top.evlist->core.nr_entries) {
+ bool can_profile_kernel = perf_event_paranoid_check(1);
+ int err = parse_event(top.evlist, can_profile_kernel ? "cycles:P" : "cycles:Pu");
+
+ if (err)
+ goto out_delete_evlist;
}
status = evswitch__init(&top.evswitch, top.evlist, stderr);
@@ -1789,6 +1805,7 @@ int cmd_top(int argc, const char **argv)
top.session = perf_session__new(NULL, NULL);
if (IS_ERR(top.session)) {
status = PTR_ERR(top.session);
+ top.session = NULL;
goto out_delete_evlist;
}
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 8ee3a45c3c54..4aba576512a1 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -914,7 +914,7 @@ static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
#include "trace/beauty/socket_type.c"
#include "trace/beauty/waitid_options.c"
-static struct syscall_fmt syscall_fmts[] = {
+static const struct syscall_fmt syscall_fmts[] = {
{ .name = "access",
.arg = { [1] = { .scnprintf = SCA_ACCMODE, /* mode */ }, }, },
{ .name = "arch_prctl",
@@ -1176,18 +1176,21 @@ static int syscall_fmt__cmp(const void *name, const void *fmtp)
return strcmp(name, fmt->name);
}
-static struct syscall_fmt *__syscall_fmt__find(struct syscall_fmt *fmts, const int nmemb, const char *name)
+static const struct syscall_fmt *__syscall_fmt__find(const struct syscall_fmt *fmts,
+ const int nmemb,
+ const char *name)
{
return bsearch(name, fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
}
-static struct syscall_fmt *syscall_fmt__find(const char *name)
+static const struct syscall_fmt *syscall_fmt__find(const char *name)
{
const int nmemb = ARRAY_SIZE(syscall_fmts);
return __syscall_fmt__find(syscall_fmts, nmemb, name);
}
-static struct syscall_fmt *__syscall_fmt__find_by_alias(struct syscall_fmt *fmts, const int nmemb, const char *alias)
+static const struct syscall_fmt *__syscall_fmt__find_by_alias(const struct syscall_fmt *fmts,
+ const int nmemb, const char *alias)
{
int i;
@@ -1199,7 +1202,7 @@ static struct syscall_fmt *__syscall_fmt__find_by_alias(struct syscall_fmt *fmts
return NULL;
}
-static struct syscall_fmt *syscall_fmt__find_by_alias(const char *alias)
+static const struct syscall_fmt *syscall_fmt__find_by_alias(const char *alias)
{
const int nmemb = ARRAY_SIZE(syscall_fmts);
return __syscall_fmt__find_by_alias(syscall_fmts, nmemb, alias);
@@ -1224,7 +1227,7 @@ struct syscall {
bool nonexistent;
struct tep_format_field *args;
const char *name;
- struct syscall_fmt *fmt;
+ const struct syscall_fmt *fmt;
struct syscall_arg_fmt *arg_fmt;
};
@@ -1383,12 +1386,13 @@ static int thread__read_fd_path(struct thread *thread, int fd)
struct stat st;
int ret;
- if (thread->pid_ == thread->tid) {
+ if (thread__pid(thread) == thread__tid(thread)) {
scnprintf(linkname, sizeof(linkname),
- "/proc/%d/fd/%d", thread->pid_, fd);
+ "/proc/%d/fd/%d", thread__pid(thread), fd);
} else {
scnprintf(linkname, sizeof(linkname),
- "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
+ "/proc/%d/task/%d/fd/%d",
+ thread__pid(thread), thread__tid(thread), fd);
}
if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
@@ -1556,7 +1560,7 @@ static size_t trace__fprintf_comm_tid(struct trace *trace, struct thread *thread
if (trace->multiple_threads) {
if (trace->show_comm)
printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
- printed += fprintf(fp, "%d ", thread->tid);
+ printed += fprintf(fp, "%d ", thread__tid(thread));
}
return printed;
@@ -1673,7 +1677,7 @@ static int syscall__alloc_arg_fmts(struct syscall *sc, int nr_args)
return 0;
}
-static struct syscall_arg_fmt syscall_arg_fmts__by_name[] = {
+static const struct syscall_arg_fmt syscall_arg_fmts__by_name[] = {
{ .name = "msr", .scnprintf = SCA_X86_MSR, .strtoul = STUL_X86_MSR, },
{ .name = "vector", .scnprintf = SCA_X86_IRQ_VECTORS, .strtoul = STUL_X86_IRQ_VECTORS, },
};
@@ -1684,13 +1688,14 @@ static int syscall_arg_fmt__cmp(const void *name, const void *fmtp)
return strcmp(name, fmt->name);
}
-static struct syscall_arg_fmt *
-__syscall_arg_fmt__find_by_name(struct syscall_arg_fmt *fmts, const int nmemb, const char *name)
+static const struct syscall_arg_fmt *
+__syscall_arg_fmt__find_by_name(const struct syscall_arg_fmt *fmts, const int nmemb,
+ const char *name)
{
return bsearch(name, fmts, nmemb, sizeof(struct syscall_arg_fmt), syscall_arg_fmt__cmp);
}
-static struct syscall_arg_fmt *syscall_arg_fmt__find_by_name(const char *name)
+static const struct syscall_arg_fmt *syscall_arg_fmt__find_by_name(const char *name)
{
const int nmemb = ARRAY_SIZE(syscall_arg_fmts__by_name);
return __syscall_arg_fmt__find_by_name(syscall_arg_fmts__by_name, nmemb, name);
@@ -1735,8 +1740,9 @@ syscall_arg_fmt__init_array(struct syscall_arg_fmt *arg, struct tep_format_field
* 7 unsigned long
*/
arg->scnprintf = SCA_FD;
- } else {
- struct syscall_arg_fmt *fmt = syscall_arg_fmt__find_by_name(field->name);
+ } else {
+ const struct syscall_arg_fmt *fmt =
+ syscall_arg_fmt__find_by_name(field->name);
if (fmt) {
arg->scnprintf = fmt->scnprintf;
@@ -2200,7 +2206,8 @@ static void thread__update_stats(struct thread *thread, struct thread_trace *ttr
memset(new_errnos + stats->max_errno, 0, (err - stats->max_errno) * sizeof(u32));
} else {
pr_debug("Not enough memory for errno stats for thread \"%s\"(%d/%d), results will be incomplete\n",
- thread__comm_str(thread), thread->pid_, thread->tid);
+ thread__comm_str(thread), thread__pid(thread),
+ thread__tid(thread));
return;
}
@@ -2411,13 +2418,15 @@ static int trace__resolve_callchain(struct trace *trace, struct evsel *evsel,
int max_stack = evsel->core.attr.sample_max_stack ?
evsel->core.attr.sample_max_stack :
trace->max_stack;
- int err;
+ int err = -1;
+ addr_location__init(&al);
if (machine__resolve(trace->host, &al, sample) < 0)
- return -1;
+ goto out;
err = thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, max_stack);
- addr_location__put(&al);
+out:
+ addr_location__exit(&al);
return err;
}
@@ -2428,7 +2437,7 @@ static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sam
EVSEL__PRINT_DSO |
EVSEL__PRINT_UNKNOWN_AS_ADDR;
- return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, symbol_conf.bt_stop_list, trace->output);
+ return sample__fprintf_callchain(sample, 38, print_opts, get_tls_callchain_cursor(), symbol_conf.bt_stop_list, trace->output);
}
static const char *errno_to_name(struct evsel *evsel, int err)
@@ -2482,9 +2491,11 @@ static int trace__sys_exit(struct trace *trace, struct evsel *evsel,
goto out;
if (sample->callchain) {
- callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
+ struct callchain_cursor *cursor = get_tls_callchain_cursor();
+
+ callchain_ret = trace__resolve_callchain(trace, evsel, sample, cursor);
if (callchain_ret == 0) {
- if (callchain_cursor.nr < trace->min_stack)
+ if (cursor->nr < trace->min_stack)
goto out;
callchain_ret = 1;
}
@@ -2545,7 +2556,7 @@ errno_print: {
if (child != NULL) {
fprintf(trace->output, "%ld", ret);
- if (child->comm_set)
+ if (thread__comm_set(child))
fprintf(trace->output, " (%s)", thread__comm_str(child));
thread__put(child);
}
@@ -2786,9 +2797,11 @@ static int trace__event_handler(struct trace *trace, struct evsel *evsel,
thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
if (sample->callchain) {
- callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
+ struct callchain_cursor *cursor = get_tls_callchain_cursor();
+
+ callchain_ret = trace__resolve_callchain(trace, evsel, sample, cursor);
if (callchain_ret == 0) {
- if (callchain_cursor.nr < trace->min_stack)
+ if (cursor->nr < trace->min_stack)
goto out;
callchain_ret = 1;
}
@@ -2886,12 +2899,15 @@ static int trace__pgfault(struct trace *trace,
int err = -1;
int callchain_ret = 0;
+ addr_location__init(&al);
thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
if (sample->callchain) {
- callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
+ struct callchain_cursor *cursor = get_tls_callchain_cursor();
+
+ callchain_ret = trace__resolve_callchain(trace, evsel, sample, cursor);
if (callchain_ret == 0) {
- if (callchain_cursor.nr < trace->min_stack)
+ if (cursor->nr < trace->min_stack)
goto out_put;
callchain_ret = 1;
}
@@ -2946,6 +2962,7 @@ out:
err = 0;
out_put:
thread__put(thread);
+ addr_location__exit(&al);
return err;
}
@@ -3119,13 +3136,8 @@ static void evlist__free_syscall_tp_fields(struct evlist *evlist)
struct evsel *evsel;
evlist__for_each_entry(evlist, evsel) {
- struct evsel_trace *et = evsel->priv;
-
- if (!et || !evsel->tp_format || strcmp(evsel->tp_format->system, "syscalls"))
- continue;
-
- zfree(&et->fmt);
- free(et);
+ evsel_trace__delete(evsel->priv);
+ evsel->priv = NULL;
}
}
@@ -3611,14 +3623,16 @@ static int trace__set_filter_loop_pids(struct trace *trace)
struct thread *thread = machine__find_thread(trace->host, pids[0], pids[0]);
while (thread && nr < ARRAY_SIZE(pids)) {
- struct thread *parent = machine__find_thread(trace->host, thread->ppid, thread->ppid);
+ struct thread *parent = machine__find_thread(trace->host,
+ thread__ppid(thread),
+ thread__ppid(thread));
if (parent == NULL)
break;
if (!strcmp(thread__comm_str(parent), "sshd") ||
strstarts(thread__comm_str(parent), "gnome-terminal")) {
- pids[nr++] = parent->tid;
+ pids[nr++] = thread__tid(parent);
break;
}
thread = parent;
@@ -4317,7 +4331,7 @@ static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trac
ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
- printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
+ printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread__tid(thread));
printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
printed += fprintf(fp, "%.1f%%", ratio);
if (ttrace->pfmaj)
@@ -4339,11 +4353,13 @@ static unsigned long thread__nr_events(struct thread_trace *ttrace)
return ttrace ? ttrace->nr_events : 0;
}
-DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)),
+DEFINE_RESORT_RB(threads,
+ (thread__nr_events(thread__priv(a->thread)) <
+ thread__nr_events(thread__priv(b->thread))),
struct thread *thread;
)
{
- entry->thread = rb_entry(nd, struct thread, rb_node);
+ entry->thread = rb_entry(nd, struct thread_rb_node, rb_node)->thread;
}
static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
@@ -4458,7 +4474,7 @@ static void evsel__set_syscall_arg_fmt(struct evsel *evsel, const char *name)
struct syscall_arg_fmt *fmt = evsel__syscall_arg_fmt(evsel);
if (fmt) {
- struct syscall_fmt *scfmt = syscall_fmt__find(name);
+ const struct syscall_fmt *scfmt = syscall_fmt__find(name);
if (scfmt) {
int skip = 0;
@@ -4525,7 +4541,7 @@ static int trace__parse_events_option(const struct option *opt, const char *str,
int len = strlen(str) + 1, err = -1, list, idx;
char *strace_groups_dir = system_path(STRACE_GROUPS_DIR);
char group_name[PATH_MAX];
- struct syscall_fmt *fmt;
+ const struct syscall_fmt *fmt;
if (strace_groups_dir == NULL)
return -1;
@@ -4591,8 +4607,11 @@ do_concat:
err = 0;
if (lists[0]) {
+ struct parse_events_option_args parse_events_option_args = {
+ .evlistp = &trace->evlist,
+ };
struct option o = {
- .value = &trace->evlist,
+ .value = &parse_events_option_args,
};
err = parse_events_option(&o, lists[0], 0);
}
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index 6f831ee2f60f..a0f1d8adce60 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -1,113 +1,121 @@
-#!/bin/sh
+#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-FILES='
-include/uapi/linux/const.h
-include/uapi/drm/drm.h
-include/uapi/drm/i915_drm.h
-include/uapi/linux/fadvise.h
-include/uapi/linux/fcntl.h
-include/uapi/linux/fs.h
-include/uapi/linux/fscrypt.h
-include/uapi/linux/kcmp.h
-include/uapi/linux/kvm.h
-include/uapi/linux/in.h
-include/uapi/linux/mount.h
-include/uapi/linux/openat2.h
-include/uapi/linux/perf_event.h
-include/uapi/linux/prctl.h
-include/uapi/linux/sched.h
-include/uapi/linux/stat.h
-include/uapi/linux/usbdevice_fs.h
-include/uapi/linux/vhost.h
-include/uapi/sound/asound.h
-include/linux/bits.h
-include/vdso/bits.h
-include/linux/const.h
-include/vdso/const.h
-include/linux/hash.h
-include/linux/list-sort.h
-include/uapi/linux/hw_breakpoint.h
-arch/x86/include/asm/disabled-features.h
-arch/x86/include/asm/required-features.h
-arch/x86/include/asm/cpufeatures.h
-arch/x86/include/asm/inat_types.h
-arch/x86/include/asm/emulate_prefix.h
-arch/x86/include/asm/irq_vectors.h
-arch/x86/include/asm/msr-index.h
-arch/x86/include/uapi/asm/prctl.h
-arch/x86/lib/x86-opcode-map.txt
-arch/x86/tools/gen-insn-attr-x86.awk
-arch/arm/include/uapi/asm/perf_regs.h
-arch/arm64/include/uapi/asm/perf_regs.h
-arch/loongarch/include/uapi/asm/perf_regs.h
-arch/mips/include/uapi/asm/perf_regs.h
-arch/powerpc/include/uapi/asm/perf_regs.h
-arch/s390/include/uapi/asm/perf_regs.h
-arch/x86/include/uapi/asm/perf_regs.h
-arch/x86/include/uapi/asm/kvm.h
-arch/x86/include/uapi/asm/kvm_perf.h
-arch/x86/include/uapi/asm/svm.h
-arch/x86/include/uapi/asm/unistd.h
-arch/x86/include/uapi/asm/vmx.h
-arch/powerpc/include/uapi/asm/kvm.h
-arch/s390/include/uapi/asm/kvm.h
-arch/s390/include/uapi/asm/kvm_perf.h
-arch/s390/include/uapi/asm/sie.h
-arch/arm/include/uapi/asm/kvm.h
-arch/arm64/include/uapi/asm/kvm.h
-arch/arm64/include/uapi/asm/unistd.h
-arch/alpha/include/uapi/asm/errno.h
-arch/mips/include/asm/errno.h
-arch/mips/include/uapi/asm/errno.h
-arch/parisc/include/uapi/asm/errno.h
-arch/powerpc/include/uapi/asm/errno.h
-arch/sparc/include/uapi/asm/errno.h
-arch/x86/include/uapi/asm/errno.h
-include/asm-generic/bitops/arch_hweight.h
-include/asm-generic/bitops/const_hweight.h
-include/asm-generic/bitops/__fls.h
-include/asm-generic/bitops/fls.h
-include/asm-generic/bitops/fls64.h
-include/linux/coresight-pmu.h
-include/uapi/asm-generic/errno.h
-include/uapi/asm-generic/errno-base.h
-include/uapi/asm-generic/ioctls.h
-include/uapi/asm-generic/mman-common.h
-include/uapi/asm-generic/unistd.h
-'
-
-SYNC_CHECK_FILES='
-arch/x86/include/asm/inat.h
-arch/x86/include/asm/insn.h
-arch/x86/lib/inat.c
-arch/x86/lib/insn.c
-'
+YELLOW='\033[0;33m'
+NC='\033[0m' # No Color
+
+declare -a FILES
+FILES=(
+ "include/uapi/linux/const.h"
+ "include/uapi/drm/drm.h"
+ "include/uapi/drm/i915_drm.h"
+ "include/uapi/linux/fadvise.h"
+ "include/uapi/linux/fcntl.h"
+ "include/uapi/linux/fs.h"
+ "include/uapi/linux/fscrypt.h"
+ "include/uapi/linux/kcmp.h"
+ "include/uapi/linux/kvm.h"
+ "include/uapi/linux/in.h"
+ "include/uapi/linux/mount.h"
+ "include/uapi/linux/openat2.h"
+ "include/uapi/linux/perf_event.h"
+ "include/uapi/linux/prctl.h"
+ "include/uapi/linux/sched.h"
+ "include/uapi/linux/stat.h"
+ "include/uapi/linux/usbdevice_fs.h"
+ "include/uapi/linux/vhost.h"
+ "include/uapi/sound/asound.h"
+ "include/linux/bits.h"
+ "include/vdso/bits.h"
+ "include/linux/const.h"
+ "include/vdso/const.h"
+ "include/linux/hash.h"
+ "include/linux/list-sort.h"
+ "include/uapi/linux/hw_breakpoint.h"
+ "arch/x86/include/asm/disabled-features.h"
+ "arch/x86/include/asm/required-features.h"
+ "arch/x86/include/asm/cpufeatures.h"
+ "arch/x86/include/asm/inat_types.h"
+ "arch/x86/include/asm/emulate_prefix.h"
+ "arch/x86/include/asm/irq_vectors.h"
+ "arch/x86/include/asm/msr-index.h"
+ "arch/x86/include/uapi/asm/prctl.h"
+ "arch/x86/lib/x86-opcode-map.txt"
+ "arch/x86/tools/gen-insn-attr-x86.awk"
+ "arch/arm/include/uapi/asm/perf_regs.h"
+ "arch/arm64/include/uapi/asm/perf_regs.h"
+ "arch/loongarch/include/uapi/asm/perf_regs.h"
+ "arch/mips/include/uapi/asm/perf_regs.h"
+ "arch/powerpc/include/uapi/asm/perf_regs.h"
+ "arch/s390/include/uapi/asm/perf_regs.h"
+ "arch/x86/include/uapi/asm/perf_regs.h"
+ "arch/x86/include/uapi/asm/kvm.h"
+ "arch/x86/include/uapi/asm/kvm_perf.h"
+ "arch/x86/include/uapi/asm/svm.h"
+ "arch/x86/include/uapi/asm/unistd.h"
+ "arch/x86/include/uapi/asm/vmx.h"
+ "arch/powerpc/include/uapi/asm/kvm.h"
+ "arch/s390/include/uapi/asm/kvm.h"
+ "arch/s390/include/uapi/asm/kvm_perf.h"
+ "arch/s390/include/uapi/asm/sie.h"
+ "arch/arm/include/uapi/asm/kvm.h"
+ "arch/arm64/include/uapi/asm/kvm.h"
+ "arch/arm64/include/uapi/asm/unistd.h"
+ "arch/alpha/include/uapi/asm/errno.h"
+ "arch/mips/include/asm/errno.h"
+ "arch/mips/include/uapi/asm/errno.h"
+ "arch/parisc/include/uapi/asm/errno.h"
+ "arch/powerpc/include/uapi/asm/errno.h"
+ "arch/sparc/include/uapi/asm/errno.h"
+ "arch/x86/include/uapi/asm/errno.h"
+ "include/asm-generic/bitops/arch_hweight.h"
+ "include/asm-generic/bitops/const_hweight.h"
+ "include/asm-generic/bitops/__fls.h"
+ "include/asm-generic/bitops/fls.h"
+ "include/asm-generic/bitops/fls64.h"
+ "include/linux/coresight-pmu.h"
+ "include/uapi/asm-generic/errno.h"
+ "include/uapi/asm-generic/errno-base.h"
+ "include/uapi/asm-generic/ioctls.h"
+ "include/uapi/asm-generic/mman-common.h"
+ "include/uapi/asm-generic/unistd.h"
+)
+
+declare -a SYNC_CHECK_FILES
+SYNC_CHECK_FILES=(
+ "arch/x86/include/asm/inat.h"
+ "arch/x86/include/asm/insn.h"
+ "arch/x86/lib/inat.c"
+ "arch/x86/lib/insn.c"
+)
# These copies are under tools/perf/trace/beauty/ as they are not used to in
# building object files only by scripts in tools/perf/trace/beauty/ to generate
# tables that then gets included in .c files for things like id->string syscall
# tables (and the reverse lookup as well: string -> id)
-BEAUTY_FILES='
-include/linux/socket.h
-'
+declare -a BEAUTY_FILES
+BEAUTY_FILES=(
+ "include/linux/socket.h"
+)
+
+declare -a FAILURES
check_2 () {
- file1=$1
- file2=$2
+ tools_file=$1
+ orig_file=$2
shift
shift
- cmd="diff $* $file1 $file2 > /dev/null"
+ cmd="diff $* $tools_file $orig_file > /dev/null"
- test -f $file2 && {
- eval $cmd || {
- echo "Warning: Kernel ABI header at '$file1' differs from latest version at '$file2'" >&2
- echo diff -u $file1 $file2
- }
- }
+ if [ -f "$orig_file" ] && ! eval "$cmd"
+ then
+ FAILURES+=(
+ "$tools_file $orig_file"
+ )
+ fi
}
check () {
@@ -115,7 +123,7 @@ check () {
shift
- check_2 tools/$file $file $*
+ check_2 "tools/$file" "$file" $*
}
beauty_check () {
@@ -123,23 +131,29 @@ beauty_check () {
shift
- check_2 tools/perf/trace/beauty/$file $file $*
+ check_2 "tools/perf/trace/beauty/$file" "$file" $*
}
# Check if we have the kernel headers (tools/perf/../../include), else
# we're probably on a detached tarball, so no point in trying to check
# differences.
-test -d ../../include || exit 0
+if ! [ -d ../../include ]
+then
+ echo -e "${YELLOW}Warning${NC}: Skipped check-headers due to missing ../../include"
+ exit 0
+fi
cd ../..
# simple diff check
-for i in $FILES; do
- check $i -B
+for i in "${FILES[@]}"
+do
+ check "$i" -B
done
-for i in $SYNC_CHECK_FILES; do
- check $i '-I "^.*\/\*.*__ignore_sync_check__.*\*\/.*$"'
+for i in "${SYNC_CHECK_FILES[@]}"
+do
+ check "$i" '-I "^.*\/\*.*__ignore_sync_check__.*\*\/.*$"'
done
# diff with extra ignore lines
@@ -160,8 +174,9 @@ check_2 tools/perf/arch/powerpc/entry/syscalls/syscall.tbl arch/powerpc/kernel/s
check_2 tools/perf/arch/s390/entry/syscalls/syscall.tbl arch/s390/kernel/syscalls/syscall.tbl
check_2 tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl arch/mips/kernel/syscalls/syscall_n64.tbl
-for i in $BEAUTY_FILES; do
- beauty_check $i -B
+for i in "${BEAUTY_FILES[@]}"
+do
+ beauty_check "$i" -B
done
# check duplicated library files
@@ -169,3 +184,12 @@ check_2 tools/perf/util/hashmap.h tools/lib/bpf/hashmap.h
check_2 tools/perf/util/hashmap.c tools/lib/bpf/hashmap.c
cd tools/perf
+
+if [ ${#FAILURES[@]} -gt 0 ]
+then
+ echo -e "${YELLOW}Warning${NC}: Kernel ABI header differences:"
+ for i in "${FAILURES[@]}"
+ do
+ echo " diff -u $i"
+ done
+fi
diff --git a/tools/perf/dlfilters/dlfilter-test-api-v2.c b/tools/perf/dlfilters/dlfilter-test-api-v2.c
new file mode 100644
index 000000000000..38e593d92920
--- /dev/null
+++ b/tools/perf/dlfilters/dlfilter-test-api-v2.c
@@ -0,0 +1,377 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test v2 API for perf --dlfilter shared object
+ * Copyright (c) 2023, Intel Corporation.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdbool.h>
+
+/*
+ * Copy v2 API instead of including current API
+ */
+#include <linux/perf_event.h>
+#include <linux/types.h>
+
+/*
+ * The following macro can be used to determine if this header defines
+ * perf_dlfilter_sample machine_pid and vcpu.
+ */
+#define PERF_DLFILTER_HAS_MACHINE_PID
+
+/* Definitions for perf_dlfilter_sample flags */
+enum {
+ PERF_DLFILTER_FLAG_BRANCH = 1ULL << 0,
+ PERF_DLFILTER_FLAG_CALL = 1ULL << 1,
+ PERF_DLFILTER_FLAG_RETURN = 1ULL << 2,
+ PERF_DLFILTER_FLAG_CONDITIONAL = 1ULL << 3,
+ PERF_DLFILTER_FLAG_SYSCALLRET = 1ULL << 4,
+ PERF_DLFILTER_FLAG_ASYNC = 1ULL << 5,
+ PERF_DLFILTER_FLAG_INTERRUPT = 1ULL << 6,
+ PERF_DLFILTER_FLAG_TX_ABORT = 1ULL << 7,
+ PERF_DLFILTER_FLAG_TRACE_BEGIN = 1ULL << 8,
+ PERF_DLFILTER_FLAG_TRACE_END = 1ULL << 9,
+ PERF_DLFILTER_FLAG_IN_TX = 1ULL << 10,
+ PERF_DLFILTER_FLAG_VMENTRY = 1ULL << 11,
+ PERF_DLFILTER_FLAG_VMEXIT = 1ULL << 12,
+};
+
+/*
+ * perf sample event information (as per perf script and <linux/perf_event.h>)
+ */
+struct perf_dlfilter_sample {
+ __u32 size; /* Size of this structure (for compatibility checking) */
+ __u16 ins_lat; /* Refer PERF_SAMPLE_WEIGHT_TYPE in <linux/perf_event.h> */
+ __u16 p_stage_cyc; /* Refer PERF_SAMPLE_WEIGHT_TYPE in <linux/perf_event.h> */
+ __u64 ip;
+ __s32 pid;
+ __s32 tid;
+ __u64 time;
+ __u64 addr;
+ __u64 id;
+ __u64 stream_id;
+ __u64 period;
+ __u64 weight; /* Refer PERF_SAMPLE_WEIGHT_TYPE in <linux/perf_event.h> */
+ __u64 transaction; /* Refer PERF_SAMPLE_TRANSACTION in <linux/perf_event.h> */
+ __u64 insn_cnt; /* For instructions-per-cycle (IPC) */
+ __u64 cyc_cnt; /* For instructions-per-cycle (IPC) */
+ __s32 cpu;
+ __u32 flags; /* Refer PERF_DLFILTER_FLAG_* above */
+ __u64 data_src; /* Refer PERF_SAMPLE_DATA_SRC in <linux/perf_event.h> */
+ __u64 phys_addr; /* Refer PERF_SAMPLE_PHYS_ADDR in <linux/perf_event.h> */
+ __u64 data_page_size; /* Refer PERF_SAMPLE_DATA_PAGE_SIZE in <linux/perf_event.h> */
+ __u64 code_page_size; /* Refer PERF_SAMPLE_CODE_PAGE_SIZE in <linux/perf_event.h> */
+ __u64 cgroup; /* Refer PERF_SAMPLE_CGROUP in <linux/perf_event.h> */
+ __u8 cpumode; /* Refer CPUMODE_MASK etc in <linux/perf_event.h> */
+ __u8 addr_correlates_sym; /* True => resolve_addr() can be called */
+ __u16 misc; /* Refer perf_event_header in <linux/perf_event.h> */
+ __u32 raw_size; /* Refer PERF_SAMPLE_RAW in <linux/perf_event.h> */
+ const void *raw_data; /* Refer PERF_SAMPLE_RAW in <linux/perf_event.h> */
+ __u64 brstack_nr; /* Number of brstack entries */
+ const struct perf_branch_entry *brstack; /* Refer <linux/perf_event.h> */
+ __u64 raw_callchain_nr; /* Number of raw_callchain entries */
+ const __u64 *raw_callchain; /* Refer <linux/perf_event.h> */
+ const char *event;
+ __s32 machine_pid;
+ __s32 vcpu;
+};
+
+/*
+ * Address location (as per perf script)
+ */
+struct perf_dlfilter_al {
+ __u32 size; /* Size of this structure (for compatibility checking) */
+ __u32 symoff;
+ const char *sym;
+ __u64 addr; /* Mapped address (from dso) */
+ __u64 sym_start;
+ __u64 sym_end;
+ const char *dso;
+ __u8 sym_binding; /* STB_LOCAL, STB_GLOBAL or STB_WEAK, refer <elf.h> */
+ __u8 is_64_bit; /* Only valid if dso is not NULL */
+ __u8 is_kernel_ip; /* True if in kernel space */
+ __u32 buildid_size;
+ __u8 *buildid;
+ /* Below members are only populated by resolve_ip() */
+ __u8 filtered; /* True if this sample event will be filtered out */
+ const char *comm;
+ void *priv; /* Private data (v2 API) */
+};
+
+struct perf_dlfilter_fns {
+ /* Return information about ip */
+ const struct perf_dlfilter_al *(*resolve_ip)(void *ctx);
+ /* Return information about addr (if addr_correlates_sym) */
+ const struct perf_dlfilter_al *(*resolve_addr)(void *ctx);
+ /* Return arguments from --dlarg option */
+ char **(*args)(void *ctx, int *dlargc);
+ /*
+ * Return information about address (al->size must be set before
+ * calling). Returns 0 on success, -1 otherwise. Call al_cleanup()
+ * when 'al' data is no longer needed.
+ */
+ __s32 (*resolve_address)(void *ctx, __u64 address, struct perf_dlfilter_al *al);
+ /* Return instruction bytes and length */
+ const __u8 *(*insn)(void *ctx, __u32 *length);
+ /* Return source file name and line number */
+ const char *(*srcline)(void *ctx, __u32 *line_number);
+ /* Return perf_event_attr, refer <linux/perf_event.h> */
+ struct perf_event_attr *(*attr)(void *ctx);
+ /* Read object code, return numbers of bytes read */
+ __s32 (*object_code)(void *ctx, __u64 ip, void *buf, __u32 len);
+ /*
+ * If present (i.e. must check al_cleanup != NULL), call after
+ * resolve_address() to free any associated resources. (v2 API)
+ */
+ void (*al_cleanup)(void *ctx, struct perf_dlfilter_al *al);
+ /* Reserved */
+ void *(*reserved[119])(void *);
+};
+
+struct perf_dlfilter_fns perf_dlfilter_fns;
+
+static int verbose;
+
+#define pr_debug(fmt, ...) do { \
+ if (verbose > 0) \
+ fprintf(stderr, fmt, ##__VA_ARGS__); \
+ } while (0)
+
+static int test_fail(const char *msg)
+{
+ pr_debug("%s\n", msg);
+ return -1;
+}
+
+#define CHECK(x) do { \
+ if (!(x)) \
+ return test_fail("Check '" #x "' failed\n"); \
+ } while (0)
+
+struct filter_data {
+ __u64 ip;
+ __u64 addr;
+ int do_early;
+ int early_filter_cnt;
+ int filter_cnt;
+};
+
+static struct filter_data *filt_dat;
+
+int start(void **data, void *ctx)
+{
+ int dlargc;
+ char **dlargv;
+ struct filter_data *d;
+ static bool called;
+
+ verbose = 1;
+
+ CHECK(!filt_dat && !called);
+ called = true;
+
+ d = calloc(1, sizeof(*d));
+ if (!d)
+ test_fail("Failed to allocate memory");
+ filt_dat = d;
+ *data = d;
+
+ dlargv = perf_dlfilter_fns.args(ctx, &dlargc);
+
+ CHECK(dlargc == 6);
+ CHECK(!strcmp(dlargv[0], "first"));
+ verbose = strtol(dlargv[1], NULL, 0);
+ d->ip = strtoull(dlargv[2], NULL, 0);
+ d->addr = strtoull(dlargv[3], NULL, 0);
+ d->do_early = strtol(dlargv[4], NULL, 0);
+ CHECK(!strcmp(dlargv[5], "last"));
+
+ pr_debug("%s API\n", __func__);
+
+ return 0;
+}
+
+#define CHECK_SAMPLE(x) do { \
+ if (sample->x != expected.x) \
+ return test_fail("'" #x "' not expected value\n"); \
+ } while (0)
+
+static int check_sample(struct filter_data *d, const struct perf_dlfilter_sample *sample)
+{
+ struct perf_dlfilter_sample expected = {
+ .ip = d->ip,
+ .pid = 12345,
+ .tid = 12346,
+ .time = 1234567890,
+ .addr = d->addr,
+ .id = 99,
+ .stream_id = 101,
+ .period = 543212345,
+ .cpu = 31,
+ .cpumode = PERF_RECORD_MISC_USER,
+ .addr_correlates_sym = 1,
+ .misc = PERF_RECORD_MISC_USER,
+ };
+
+ CHECK(sample->size >= sizeof(struct perf_dlfilter_sample));
+
+ CHECK_SAMPLE(ip);
+ CHECK_SAMPLE(pid);
+ CHECK_SAMPLE(tid);
+ CHECK_SAMPLE(time);
+ CHECK_SAMPLE(addr);
+ CHECK_SAMPLE(id);
+ CHECK_SAMPLE(stream_id);
+ CHECK_SAMPLE(period);
+ CHECK_SAMPLE(cpu);
+ CHECK_SAMPLE(cpumode);
+ CHECK_SAMPLE(addr_correlates_sym);
+ CHECK_SAMPLE(misc);
+
+ CHECK(!sample->raw_data);
+ CHECK_SAMPLE(brstack_nr);
+ CHECK(!sample->brstack);
+ CHECK_SAMPLE(raw_callchain_nr);
+ CHECK(!sample->raw_callchain);
+
+#define EVENT_NAME "branches:"
+ CHECK(!strncmp(sample->event, EVENT_NAME, strlen(EVENT_NAME)));
+
+ return 0;
+}
+
+static int check_al(void *ctx)
+{
+ const struct perf_dlfilter_al *al;
+
+ al = perf_dlfilter_fns.resolve_ip(ctx);
+ if (!al)
+ return test_fail("resolve_ip() failed");
+
+ CHECK(al->sym && !strcmp("foo", al->sym));
+ CHECK(!al->symoff);
+
+ return 0;
+}
+
+static int check_addr_al(void *ctx)
+{
+ const struct perf_dlfilter_al *addr_al;
+
+ addr_al = perf_dlfilter_fns.resolve_addr(ctx);
+ if (!addr_al)
+ return test_fail("resolve_addr() failed");
+
+ CHECK(addr_al->sym && !strcmp("bar", addr_al->sym));
+ CHECK(!addr_al->symoff);
+
+ return 0;
+}
+
+static int check_address_al(void *ctx, const struct perf_dlfilter_sample *sample)
+{
+ struct perf_dlfilter_al address_al;
+ const struct perf_dlfilter_al *al;
+
+ al = perf_dlfilter_fns.resolve_ip(ctx);
+ if (!al)
+ return test_fail("resolve_ip() failed");
+
+ address_al.size = sizeof(address_al);
+ if (perf_dlfilter_fns.resolve_address(ctx, sample->ip, &address_al))
+ return test_fail("resolve_address() failed");
+
+ CHECK(address_al.sym && al->sym);
+ CHECK(!strcmp(address_al.sym, al->sym));
+ CHECK(address_al.addr == al->addr);
+ CHECK(address_al.sym_start == al->sym_start);
+ CHECK(address_al.sym_end == al->sym_end);
+ CHECK(address_al.dso && al->dso);
+ CHECK(!strcmp(address_al.dso, al->dso));
+
+ /* al_cleanup() is v2 API so may not be present */
+ if (perf_dlfilter_fns.al_cleanup)
+ perf_dlfilter_fns.al_cleanup(ctx, &address_al);
+
+ return 0;
+}
+
+static int check_attr(void *ctx)
+{
+ struct perf_event_attr *attr = perf_dlfilter_fns.attr(ctx);
+
+ CHECK(attr);
+ CHECK(attr->type == PERF_TYPE_HARDWARE);
+ CHECK(attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
+
+ return 0;
+}
+
+static int do_checks(void *data, const struct perf_dlfilter_sample *sample, void *ctx, bool early)
+{
+ struct filter_data *d = data;
+
+ CHECK(data && filt_dat == data);
+
+ if (early) {
+ CHECK(!d->early_filter_cnt);
+ d->early_filter_cnt += 1;
+ } else {
+ CHECK(!d->filter_cnt);
+ CHECK(d->early_filter_cnt);
+ CHECK(d->do_early != 2);
+ d->filter_cnt += 1;
+ }
+
+ if (check_sample(data, sample))
+ return -1;
+
+ if (check_attr(ctx))
+ return -1;
+
+ if (early && !d->do_early)
+ return 0;
+
+ if (check_al(ctx) || check_addr_al(ctx) || check_address_al(ctx, sample))
+ return -1;
+
+ if (early)
+ return d->do_early == 2;
+
+ return 1;
+}
+
+int filter_event_early(void *data, const struct perf_dlfilter_sample *sample, void *ctx)
+{
+ pr_debug("%s API\n", __func__);
+
+ return do_checks(data, sample, ctx, true);
+}
+
+int filter_event(void *data, const struct perf_dlfilter_sample *sample, void *ctx)
+{
+ pr_debug("%s API\n", __func__);
+
+ return do_checks(data, sample, ctx, false);
+}
+
+int stop(void *data, void *ctx)
+{
+ static bool called;
+
+ pr_debug("%s API\n", __func__);
+
+ CHECK(data && filt_dat == data && !called);
+ called = true;
+
+ free(data);
+ filt_dat = NULL;
+ return 0;
+}
+
+const char *filter_description(const char **long_description)
+{
+ *long_description = "Filter used by the 'dlfilter C API' perf test";
+ return "dlfilter to test v2 C API";
+}
diff --git a/tools/perf/include/perf/perf_dlfilter.h b/tools/perf/include/perf/perf_dlfilter.h
index a26e2f129f83..16fc4568ac53 100644
--- a/tools/perf/include/perf/perf_dlfilter.h
+++ b/tools/perf/include/perf/perf_dlfilter.h
@@ -91,6 +91,7 @@ struct perf_dlfilter_al {
/* Below members are only populated by resolve_ip() */
__u8 filtered; /* True if this sample event will be filtered out */
const char *comm;
+ void *priv; /* Private data. Do not change */
};
struct perf_dlfilter_fns {
@@ -102,7 +103,8 @@ struct perf_dlfilter_fns {
char **(*args)(void *ctx, int *dlargc);
/*
* Return information about address (al->size must be set before
- * calling). Returns 0 on success, -1 otherwise.
+ * calling). Returns 0 on success, -1 otherwise. Call al_cleanup()
+ * when 'al' data is no longer needed.
*/
__s32 (*resolve_address)(void *ctx, __u64 address, struct perf_dlfilter_al *al);
/* Return instruction bytes and length */
@@ -113,8 +115,13 @@ struct perf_dlfilter_fns {
struct perf_event_attr *(*attr)(void *ctx);
/* Read object code, return numbers of bytes read */
__s32 (*object_code)(void *ctx, __u64 ip, void *buf, __u32 len);
+ /*
+ * If present (i.e. must check al_cleanup != NULL), call after
+ * resolve_address() to free any associated resources.
+ */
+ void (*al_cleanup)(void *ctx, struct perf_dlfilter_al *al);
/* Reserved */
- void *(*reserved[120])(void *);
+ void *(*reserved[119])(void *);
};
/*
diff --git a/tools/perf/pmu-events/Build b/tools/perf/pmu-events/Build
index 150765f2baee..1d18bb89402e 100644
--- a/tools/perf/pmu-events/Build
+++ b/tools/perf/pmu-events/Build
@@ -35,3 +35,9 @@ $(PMU_EVENTS_C): $(JSON) $(JSON_TEST) $(JEVENTS_PY) $(METRIC_PY) $(METRIC_TEST_L
$(call rule_mkdir)
$(Q)$(call echo-cmd,gen)$(PYTHON) $(JEVENTS_PY) $(JEVENTS_ARCH) $(JEVENTS_MODEL) pmu-events/arch $@
endif
+
+# pmu-events.c file is generated in the OUTPUT directory so it needs a
+# separate rule to depend on it properly
+$(OUTPUT)pmu-events/pmu-events.o: $(PMU_EVENTS_C)
+ $(call rule_mkdir)
+ $(call if_changed_dep,cc_o_c)
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereone/branch.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/branch.json
new file mode 100644
index 000000000000..c751d57f2e19
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/branch.json
@@ -0,0 +1,17 @@
+[
+ {
+ "ArchStdEvent": "BR_IMMED_SPEC"
+ },
+ {
+ "ArchStdEvent": "BR_RETURN_SPEC"
+ },
+ {
+ "ArchStdEvent": "BR_INDIRECT_SPEC"
+ },
+ {
+ "ArchStdEvent": "BR_MIS_PRED"
+ },
+ {
+ "ArchStdEvent": "BR_PRED"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereone/bus.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/bus.json
new file mode 100644
index 000000000000..8623be121818
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/bus.json
@@ -0,0 +1,32 @@
+[
+ {
+ "ArchStdEvent": "CPU_CYCLES"
+ },
+ {
+ "ArchStdEvent": "BUS_CYCLES"
+ },
+ {
+ "ArchStdEvent": "BUS_ACCESS_RD"
+ },
+ {
+ "ArchStdEvent": "BUS_ACCESS_WR"
+ },
+ {
+ "ArchStdEvent": "BUS_ACCESS_SHARED"
+ },
+ {
+ "ArchStdEvent": "BUS_ACCESS_NOT_SHARED"
+ },
+ {
+ "ArchStdEvent": "BUS_ACCESS_NORMAL"
+ },
+ {
+ "ArchStdEvent": "BUS_ACCESS_PERIPH"
+ },
+ {
+ "ArchStdEvent": "BUS_ACCESS"
+ },
+ {
+ "ArchStdEvent": "CNT_CYCLES"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereone/cache.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/cache.json
new file mode 100644
index 000000000000..7a2b7b200f14
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/cache.json
@@ -0,0 +1,101 @@
+[
+ {
+ "ArchStdEvent": "L1D_CACHE_RD"
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_WR"
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_REFILL_RD"
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_INVAL"
+ },
+ {
+ "ArchStdEvent": "L1D_TLB_REFILL_RD"
+ },
+ {
+ "ArchStdEvent": "L1D_TLB_REFILL_WR"
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_RD"
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_WR"
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_REFILL_RD"
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_REFILL_WR"
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_WB_VICTIM"
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_WB_CLEAN"
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_INVAL"
+ },
+ {
+ "ArchStdEvent": "L1I_CACHE_REFILL"
+ },
+ {
+ "ArchStdEvent": "L1I_TLB_REFILL"
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_REFILL"
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE"
+ },
+ {
+ "ArchStdEvent": "L1D_TLB_REFILL"
+ },
+ {
+ "ArchStdEvent": "L1I_CACHE"
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE"
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_REFILL"
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_WB"
+ },
+ {
+ "ArchStdEvent": "L1D_TLB"
+ },
+ {
+ "ArchStdEvent": "L1I_TLB"
+ },
+ {
+ "ArchStdEvent": "L2D_TLB_REFILL"
+ },
+ {
+ "ArchStdEvent": "L2I_TLB_REFILL"
+ },
+ {
+ "ArchStdEvent": "L2D_TLB"
+ },
+ {
+ "ArchStdEvent": "L2I_TLB"
+ },
+ {
+ "ArchStdEvent": "DTLB_WALK"
+ },
+ {
+ "ArchStdEvent": "ITLB_WALK"
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_LMISS_RD"
+ },
+ {
+ "ArchStdEvent": "L1I_CACHE_LMISS"
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_LMISS_RD"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereone/core-imp-def.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/core-imp-def.json
new file mode 100644
index 000000000000..95c30243f2b2
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/core-imp-def.json
@@ -0,0 +1,698 @@
+[
+ {
+ "PublicDescription": "Level 2 prefetch requests, refilled to L2 cache",
+ "EventCode": "0x10A",
+ "EventName": "L2_PREFETCH_REFILL",
+ "BriefDescription": "Level 2 prefetch requests, refilled to L2 cache"
+ },
+ {
+ "PublicDescription": "Level 2 prefetch requests, late",
+ "EventCode": "0x10B",
+ "EventName": "L2_PREFETCH_UPGRADE",
+ "BriefDescription": "Level 2 prefetch requests, late"
+ },
+ {
+ "PublicDescription": "Predictable branch speculatively executed that hit any level of BTB",
+ "EventCode": "0x110",
+ "EventName": "BPU_HIT_BTB",
+ "BriefDescription": "Predictable branch speculatively executed that hit any level of BTB"
+ },
+ {
+ "PublicDescription": "Predictable conditional branch speculatively executed that hit any level of BTB",
+ "EventCode": "0x111",
+ "EventName": "BPU_CONDITIONAL_BRANCH_HIT_BTB",
+ "BriefDescription": "Predictable conditional branch speculatively executed that hit any level of BTB"
+ },
+ {
+ "PublicDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the indirect predictor",
+ "EventCode": "0x112",
+ "EventName": "BPU_HIT_INDIRECT_PREDICTOR",
+ "BriefDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the indirect predictor"
+ },
+ {
+ "PublicDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the return predictor",
+ "EventCode": "0x113",
+ "EventName": "BPU_HIT_RSB",
+ "BriefDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the return predictor"
+ },
+ {
+ "PublicDescription": "Predictable unconditional branch speculatively executed that did not hit any level of BTB",
+ "EventCode": "0x114",
+ "EventName": "BPU_UNCONDITIONAL_BRANCH_MISS_BTB",
+ "BriefDescription": "Predictable unconditional branch speculatively executed that did not hit any level of BTB"
+ },
+ {
+ "PublicDescription": "Predictable branch speculatively executed, unpredicted",
+ "EventCode": "0x115",
+ "EventName": "BPU_BRANCH_NO_HIT",
+ "BriefDescription": "Predictable branch speculatively executed, unpredicted"
+ },
+ {
+ "PublicDescription": "Predictable branch speculatively executed that hit any level of BTB that mispredict",
+ "EventCode": "0x116",
+ "EventName": "BPU_HIT_BTB_AND_MISPREDICT",
+ "BriefDescription": "Predictable branch speculatively executed that hit any level of BTB that mispredict"
+ },
+ {
+ "PublicDescription": "Predictable conditional branch speculatively executed that hit any level of BTB that (direction) mispredict",
+ "EventCode": "0x117",
+ "EventName": "BPU_CONDITIONAL_BRANCH_HIT_BTB_AND_MISPREDICT",
+ "BriefDescription": "Predictable conditional branch speculatively executed that hit any level of BTB that (direction) mispredict"
+ },
+ {
+ "PublicDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the indirect predictor that mispredict",
+ "EventCode": "0x118",
+ "EventName": "BPU_INDIRECT_BRANCH_HIT_BTB_AND_MISPREDICT",
+ "BriefDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the indirect predictor that mispredict"
+ },
+ {
+ "PublicDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the return predictor that mispredict",
+ "EventCode": "0x119",
+ "EventName": "BPU_HIT_RSB_AND_MISPREDICT",
+ "BriefDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the return predictor that mispredict"
+ },
+ {
+ "PublicDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the overflow/underflow return predictor that mispredict",
+ "EventCode": "0x11a",
+ "EventName": "BPU_MISS_RSB_AND_MISPREDICT",
+ "BriefDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the overflow/underflow return predictor that mispredict"
+ },
+ {
+ "PublicDescription": "Predictable branch speculatively executed, unpredicted, that mispredict",
+ "EventCode": "0x11b",
+ "EventName": "BPU_NO_PREDICTION_MISPREDICT",
+ "BriefDescription": "Predictable branch speculatively executed, unpredicted, that mispredict"
+ },
+ {
+ "PublicDescription": "Predictable branch speculatively executed, unpredicted, that mispredict",
+ "EventCode": "0x11c",
+ "EventName": "BPU_BTB_UPDATE",
+ "BriefDescription": "Predictable branch speculatively executed, unpredicted, that mispredict"
+ },
+ {
+ "PublicDescription": "Count predict pipe stalls due to speculative return address predictor full",
+ "EventCode": "0x11d",
+ "EventName": "BPU_RSB_FULL_STALL",
+ "BriefDescription": "Count predict pipe stalls due to speculative return address predictor full"
+ },
+ {
+ "PublicDescription": "Macro-ops speculatively decoded",
+ "EventCode": "0x11f",
+ "EventName": "ICF_INST_SPEC_DECODE",
+ "BriefDescription": "Macro-ops speculatively decoded"
+ },
+ {
+ "PublicDescription": "Flushes",
+ "EventCode": "0x120",
+ "EventName": "GPC_FLUSH",
+ "BriefDescription": "Flushes"
+ },
+ {
+ "PublicDescription": "Flushes due to memory hazards",
+ "EventCode": "0x121",
+ "EventName": "BPU_FLUSH_MEM_FAULT",
+ "BriefDescription": "Flushes due to memory hazards"
+ },
+ {
+ "PublicDescription": "ETM extout bit 0",
+ "EventCode": "0x141",
+ "EventName": "MSC_ETM_EXTOUT0",
+ "BriefDescription": "ETM extout bit 0"
+ },
+ {
+ "PublicDescription": "ETM extout bit 1",
+ "EventCode": "0x142",
+ "EventName": "MSC_ETM_EXTOUT1",
+ "BriefDescription": "ETM extout bit 1"
+ },
+ {
+ "PublicDescription": "ETM extout bit 2",
+ "EventCode": "0x143",
+ "EventName": "MSC_ETM_EXTOUT2",
+ "BriefDescription": "ETM extout bit 2"
+ },
+ {
+ "PublicDescription": "ETM extout bit 3",
+ "EventCode": "0x144",
+ "EventName": "MSC_ETM_EXTOUT3",
+ "BriefDescription": "ETM extout bit 3"
+ },
+ {
+ "PublicDescription": "Bus request sn",
+ "EventCode": "0x156",
+ "EventName": "L2C_SNOOP",
+ "BriefDescription": "Bus request sn"
+ },
+ {
+ "PublicDescription": "L2 TXDAT LCRD blocked",
+ "EventCode": "0x169",
+ "EventName": "L2C_DAT_CRD_STALL",
+ "BriefDescription": "L2 TXDAT LCRD blocked"
+ },
+ {
+ "PublicDescription": "L2 TXRSP LCRD blocked",
+ "EventCode": "0x16a",
+ "EventName": "L2C_RSP_CRD_STALL",
+ "BriefDescription": "L2 TXRSP LCRD blocked"
+ },
+ {
+ "PublicDescription": "L2 TXREQ LCRD blocked",
+ "EventCode": "0x16b",
+ "EventName": "L2C_REQ_CRD_STALL",
+ "BriefDescription": "L2 TXREQ LCRD blocked"
+ },
+ {
+ "PublicDescription": "Early mispredict",
+ "EventCode": "0xD100",
+ "EventName": "ICF_EARLY_MIS_PRED",
+ "BriefDescription": "Early mispredict"
+ },
+ {
+ "PublicDescription": "FEQ full cycles",
+ "EventCode": "0xD101",
+ "EventName": "ICF_FEQ_FULL",
+ "BriefDescription": "FEQ full cycles"
+ },
+ {
+ "PublicDescription": "Instruction FIFO Full",
+ "EventCode": "0xD102",
+ "EventName": "ICF_INST_FIFO_FULL",
+ "BriefDescription": "Instruction FIFO Full"
+ },
+ {
+ "PublicDescription": "L1I TLB miss",
+ "EventCode": "0xD103",
+ "EventName": "L1I_TLB_MISS",
+ "BriefDescription": "L1I TLB miss"
+ },
+ {
+ "PublicDescription": "ICF sent 0 instructions to IDR this cycle",
+ "EventCode": "0xD104",
+ "EventName": "ICF_STALL",
+ "BriefDescription": "ICF sent 0 instructions to IDR this cycle"
+ },
+ {
+ "PublicDescription": "PC FIFO Full",
+ "EventCode": "0xD105",
+ "EventName": "ICF_PC_FIFO_FULL",
+ "BriefDescription": "PC FIFO Full"
+ },
+ {
+ "PublicDescription": "Stall due to BOB ID",
+ "EventCode": "0xD200",
+ "EventName": "IDR_STALL_BOB_ID",
+ "BriefDescription": "Stall due to BOB ID"
+ },
+ {
+ "PublicDescription": "Dispatch stall due to LOB entries",
+ "EventCode": "0xD201",
+ "EventName": "IDR_STALL_LOB_ID",
+ "BriefDescription": "Dispatch stall due to LOB entries"
+ },
+ {
+ "PublicDescription": "Dispatch stall due to SOB entries",
+ "EventCode": "0xD202",
+ "EventName": "IDR_STALL_SOB_ID",
+ "BriefDescription": "Dispatch stall due to SOB entries"
+ },
+ {
+ "PublicDescription": "Dispatch stall due to IXU scheduler entries",
+ "EventCode": "0xD203",
+ "EventName": "IDR_STALL_IXU_SCHED",
+ "BriefDescription": "Dispatch stall due to IXU scheduler entries"
+ },
+ {
+ "PublicDescription": "Dispatch stall due to FSU scheduler entries",
+ "EventCode": "0xD204",
+ "EventName": "IDR_STALL_FSU_SCHED",
+ "BriefDescription": "Dispatch stall due to FSU scheduler entries"
+ },
+ {
+ "PublicDescription": "Dispatch stall due to ROB entries",
+ "EventCode": "0xD205",
+ "EventName": "IDR_STALL_ROB_ID",
+ "BriefDescription": "Dispatch stall due to ROB entries"
+ },
+ {
+ "PublicDescription": "Dispatch stall due to flush (6 cycles)",
+ "EventCode": "0xD206",
+ "EventName": "IDR_STALL_FLUSH",
+ "BriefDescription": "Dispatch stall due to flush (6 cycles)"
+ },
+ {
+ "PublicDescription": "Dispatch stall due to WFI",
+ "EventCode": "0xD207",
+ "EventName": "IDR_STALL_WFI",
+ "BriefDescription": "Dispatch stall due to WFI"
+ },
+ {
+ "PublicDescription": "Number of SWOB drains triggered by timeout",
+ "EventCode": "0xD208",
+ "EventName": "IDR_STALL_SWOB_TIMEOUT",
+ "BriefDescription": "Number of SWOB drains triggered by timeout"
+ },
+ {
+ "PublicDescription": "Number of SWOB drains triggered by system register or special-purpose register read-after-write or specific special-purpose register writes that cause SWOB drain",
+ "EventCode": "0xD209",
+ "EventName": "IDR_STALL_SWOB_RAW",
+ "BriefDescription": "Number of SWOB drains triggered by system register or special-purpose register read-after-write or specific special-purpose register writes that cause SWOB drain"
+ },
+ {
+ "PublicDescription": "Number of SWOB drains triggered by system register write when SWOB full",
+ "EventCode": "0xD20A",
+ "EventName": "IDR_STALL_SWOB_FULL",
+ "BriefDescription": "Number of SWOB drains triggered by system register write when SWOB full"
+ },
+ {
+ "PublicDescription": "Dispatch stall due to L1 instruction cache miss",
+ "EventCode": "0xD20B",
+ "EventName": "STALL_FRONTEND_CACHE",
+ "BriefDescription": "Dispatch stall due to L1 instruction cache miss"
+ },
+ {
+ "PublicDescription": "Dispatch stall due to L1 instruction TLB miss",
+ "EventCode": "0xD20C",
+ "EventName": "STALL_FRONTEND_TLB",
+ "BriefDescription": "Dispatch stall due to L1 instruction TLB miss"
+ },
+ {
+ "PublicDescription": "Dispatch stall due to L1 data cache miss",
+ "EventCode": "0xD20D",
+ "EventName": "STALL_BACKEND_CACHE",
+ "BriefDescription": "Dispatch stall due to L1 data cache miss"
+ },
+ {
+ "PublicDescription": "Dispatch stall due to L1 data TLB miss",
+ "EventCode": "0xD20E",
+ "EventName": "STALL_BACKEND_TLB",
+ "BriefDescription": "Dispatch stall due to L1 data TLB miss"
+ },
+ {
+ "PublicDescription": "Dispatch stall due to lack of any core resource",
+ "EventCode": "0xD20F",
+ "EventName": "STALL_BACKEND_RESOURCE",
+ "BriefDescription": "Dispatch stall due to lack of any core resource"
+ },
+ {
+ "PublicDescription": "Instructions issued by the scheduler",
+ "EventCode": "0xD300",
+ "EventName": "IXU_NUM_UOPS_ISSUED",
+ "BriefDescription": "Instructions issued by the scheduler"
+ },
+ {
+ "PublicDescription": "Any uop issued was canceled for any reason",
+ "EventCode": "0xD301",
+ "EventName": "IXU_ISSUE_CANCEL",
+ "BriefDescription": "Any uop issued was canceled for any reason"
+ },
+ {
+ "PublicDescription": "A load wakeup to the scheduler has been cancelled",
+ "EventCode": "0xD302",
+ "EventName": "IXU_LOAD_CANCEL",
+ "BriefDescription": "A load wakeup to the scheduler has been cancelled"
+ },
+ {
+ "PublicDescription": "The scheduler had to cancel one slow Uop due to resource conflict",
+ "EventCode": "0xD303",
+ "EventName": "IXU_SLOW_CANCEL",
+ "BriefDescription": "The scheduler had to cancel one slow Uop due to resource conflict"
+ },
+ {
+ "PublicDescription": "Uops issued by the scheduler on IXA",
+ "EventCode": "0xD304",
+ "EventName": "IXU_IXA_ISSUED",
+ "BriefDescription": "Uops issued by the scheduler on IXA"
+ },
+ {
+ "PublicDescription": "Uops issued by the scheduler on IXA Par 0",
+ "EventCode": "0xD305",
+ "EventName": "IXU_IXA_PAR0_ISSUED",
+ "BriefDescription": "Uops issued by the scheduler on IXA Par 0"
+ },
+ {
+ "PublicDescription": "Uops issued by the scheduler on IXA Par 1",
+ "EventCode": "0xD306",
+ "EventName": "IXU_IXA_PAR1_ISSUED",
+ "BriefDescription": "Uops issued by the scheduler on IXA Par 1"
+ },
+ {
+ "PublicDescription": "Uops issued by the scheduler on IXB",
+ "EventCode": "0xD307",
+ "EventName": "IXU_IXB_ISSUED",
+ "BriefDescription": "Uops issued by the scheduler on IXB"
+ },
+ {
+ "PublicDescription": "Uops issued by the scheduler on IXB Par 0",
+ "EventCode": "0xD308",
+ "EventName": "IXU_IXB_PAR0_ISSUED",
+ "BriefDescription": "Uops issued by the scheduler on IXB Par 0"
+ },
+ {
+ "PublicDescription": "Uops issued by the scheduler on IXB Par 1",
+ "EventCode": "0xD309",
+ "EventName": "IXU_IXB_PAR1_ISSUED",
+ "BriefDescription": "Uops issued by the scheduler on IXB Par 1"
+ },
+ {
+ "PublicDescription": "Uops issued by the scheduler on IXC",
+ "EventCode": "0xD30A",
+ "EventName": "IXU_IXC_ISSUED",
+ "BriefDescription": "Uops issued by the scheduler on IXC"
+ },
+ {
+ "PublicDescription": "Uops issued by the scheduler on IXC Par 0",
+ "EventCode": "0xD30B",
+ "EventName": "IXU_IXC_PAR0_ISSUED",
+ "BriefDescription": "Uops issued by the scheduler on IXC Par 0"
+ },
+ {
+ "PublicDescription": "Uops issued by the scheduler on IXC Par 1",
+ "EventCode": "0xD30C",
+ "EventName": "IXU_IXC_PAR1_ISSUED",
+ "BriefDescription": "Uops issued by the scheduler on IXC Par 1"
+ },
+ {
+ "PublicDescription": "Uops issued by the scheduler on IXD",
+ "EventCode": "0xD30D",
+ "EventName": "IXU_IXD_ISSUED",
+ "BriefDescription": "Uops issued by the scheduler on IXD"
+ },
+ {
+ "PublicDescription": "Uops issued by the scheduler on IXD Par 0",
+ "EventCode": "0xD30E",
+ "EventName": "IXU_IXD_PAR0_ISSUED",
+ "BriefDescription": "Uops issued by the scheduler on IXD Par 0"
+ },
+ {
+ "PublicDescription": "Uops issued by the scheduler on IXD Par 1",
+ "EventCode": "0xD30F",
+ "EventName": "IXU_IXD_PAR1_ISSUED",
+ "BriefDescription": "Uops issued by the scheduler on IXD Par 1"
+ },
+ {
+ "PublicDescription": "Uops issued by the FSU scheduler",
+ "EventCode": "0xD400",
+ "EventName": "FSU_ISSUED",
+ "BriefDescription": "Uops issued by the FSU scheduler"
+ },
+ {
+ "PublicDescription": "Uops issued by the scheduler on pipe X",
+ "EventCode": "0xD401",
+ "EventName": "FSU_FSX_ISSUED",
+ "BriefDescription": "Uops issued by the scheduler on pipe X"
+ },
+ {
+ "PublicDescription": "Uops issued by the scheduler on pipe Y",
+ "EventCode": "0xD402",
+ "EventName": "FSU_FSY_ISSUED",
+ "BriefDescription": "Uops issued by the scheduler on pipe Y"
+ },
+ {
+ "PublicDescription": "Uops issued by the scheduler on pipe Z",
+ "EventCode": "0xD403",
+ "EventName": "FSU_FSZ_ISSUED",
+ "BriefDescription": "Uops issued by the scheduler on pipe Z"
+ },
+ {
+ "PublicDescription": "Uops canceled (load cancels)",
+ "EventCode": "0xD404",
+ "EventName": "FSU_CANCEL",
+ "BriefDescription": "Uops canceled (load cancels)"
+ },
+ {
+ "PublicDescription": "Count scheduler stalls due to divide/sqrt",
+ "EventCode": "0xD405",
+ "EventName": "FSU_DIV_SQRT_STALL",
+ "BriefDescription": "Count scheduler stalls due to divide/sqrt"
+ },
+ {
+ "PublicDescription": "Number of SWOB drains",
+ "EventCode": "0xD500",
+ "EventName": "GPC_SWOB_DRAIN",
+ "BriefDescription": "Number of SWOB drains"
+ },
+ {
+ "PublicDescription": "GPC detected a Breakpoint instruction match",
+ "EventCode": "0xD501",
+ "EventName": "BREAKPOINT_MATCH",
+ "BriefDescription": "GPC detected a Breakpoint instruction match"
+ },
+ {
+ "PublicDescription": "L1D TLB miss",
+ "EventCode": "0xD600",
+ "EventName": "L1D_TLB_MISS",
+ "BriefDescription": "L1D TLB miss"
+ },
+ {
+ "PublicDescription": "OFB full cycles",
+ "EventCode": "0xD601",
+ "EventName": "OFB_FULL",
+ "BriefDescription": "OFB full cycles"
+ },
+ {
+ "PublicDescription": "Load satisified from store forwarded data",
+ "EventCode": "0xD605",
+ "EventName": "LD_FROM_ST_FWD",
+ "BriefDescription": "Load satisified from store forwarded data"
+ },
+ {
+ "PublicDescription": "L1 prefetcher, load prefetch requests generated",
+ "EventCode": "0xD606",
+ "EventName": "L1_PFETCH_LD_GEN",
+ "BriefDescription": "L1 prefetcher, load prefetch requests generated"
+ },
+ {
+ "PublicDescription": "L1 prefetcher, load prefetch fills into the L1 cache",
+ "EventCode": "0xD607",
+ "EventName": "L1_PFETCH_LD_FILL",
+ "BriefDescription": "L1 prefetcher, load prefetch fills into the L1 cache"
+ },
+ {
+ "PublicDescription": "L1 prefetcher, load prefetch to L2 generated",
+ "EventCode": "0xD608",
+ "EventName": "L1_PFETCH_L2_REQ",
+ "BriefDescription": "L1 prefetcher, load prefetch to L2 generated"
+ },
+ {
+ "PublicDescription": "L1 prefetcher, distance was reset",
+ "EventCode": "0xD609",
+ "EventName": "L1_PFETCH_DIST_RST",
+ "BriefDescription": "L1 prefetcher, distance was reset"
+ },
+ {
+ "PublicDescription": "L1 prefetcher, distance was increased",
+ "EventCode": "0xD60A",
+ "EventName": "L1_PFETCH_DIST_INC",
+ "BriefDescription": "L1 prefetcher, distance was increased"
+ },
+ {
+ "PublicDescription": "L1 prefetcher, table entry is trained",
+ "EventCode": "0xD60B",
+ "EventName": "L1_PFETCH_ENTRY_TRAINED",
+ "BriefDescription": "L1 prefetcher, table entry is trained"
+ },
+ {
+ "PublicDescription": "Store retirement pipe stall",
+ "EventCode": "0xD60C",
+ "EventName": "LSU_ST_RETIRE_STALL",
+ "BriefDescription": "Store retirement pipe stall"
+ },
+ {
+ "PublicDescription": "LSU detected a Watchpoint data match",
+ "EventCode": "0xD60D",
+ "EventName": "WATCHPOINT_MATCH",
+ "BriefDescription": "LSU detected a Watchpoint data match"
+ },
+ {
+ "PublicDescription": "L2 pipeline replay",
+ "EventCode": "0xD700",
+ "EventName": "L2C_PIPE_REPLAY",
+ "BriefDescription": "L2 pipeline replay"
+ },
+ {
+ "PublicDescription": "L2 refill from I-side miss",
+ "EventCode": "0xD701",
+ "EventName": "L2C_INST_REFILL",
+ "BriefDescription": "L2 refill from I-side miss"
+ },
+ {
+ "PublicDescription": "L2 refill from D-side miss",
+ "EventCode": "0xD702",
+ "EventName": "L2C_DATA_REFILL",
+ "BriefDescription": "L2 refill from D-side miss"
+ },
+ {
+ "PublicDescription": "L2 prefetcher, load prefetch requests generated",
+ "EventCode": "0xD703",
+ "EventName": "L2_PREFETCH_REQ",
+ "BriefDescription": "L2 prefetcher, load prefetch requests generated"
+ },
+ {
+ "PublicDescription": "L2D OTB allocate",
+ "EventCode": "0xD800",
+ "EventName": "MMU_D_OTB_ALLOC",
+ "BriefDescription": "L2D OTB allocate"
+ },
+ {
+ "PublicDescription": "DTLB Translation cache hit on S1L2 walk cache entry",
+ "EventCode": "0xD801",
+ "EventName": "MMU_D_TRANS_CACHE_HIT_S1L2_WALK",
+ "BriefDescription": "DTLB Translation cache hit on S1L2 walk cache entry"
+ },
+ {
+ "PublicDescription": "DTLB Translation cache hit on S1L1 walk cache entry",
+ "EventCode": "0xD802",
+ "EventName": "MMU_D_TRANS_CACHE_HIT_S1L1_WALK",
+ "BriefDescription": "DTLB Translation cache hit on S1L1 walk cache entry"
+ },
+ {
+ "PublicDescription": "DTLB Translation cache hit on S1L0 walk cache entry",
+ "EventCode": "0xD803",
+ "EventName": "MMU_D_TRANS_CACHE_HIT_S1L0_WALK",
+ "BriefDescription": "DTLB Translation cache hit on S1L0 walk cache entry"
+ },
+ {
+ "PublicDescription": "DTLB Translation cache hit on S2L2 walk cache entry",
+ "EventCode": "0xD804",
+ "EventName": "MMU_D_TRANS_CACHE_HIT_S2L2_WALK",
+ "BriefDescription": "DTLB Translation cache hit on S2L2 walk cache entry"
+ },
+ {
+ "PublicDescription": "DTLB Translation cache hit on S2L1 walk cache entry",
+ "EventCode": "0xD805",
+ "EventName": "MMU_D_TRANS_CACHE_HIT_S2L1_WALK",
+ "BriefDescription": "DTLB Translation cache hit on S2L1 walk cache entry"
+ },
+ {
+ "PublicDescription": "DTLB Translation cache hit on S2L0 walk cache entry",
+ "EventCode": "0xD806",
+ "EventName": "MMU_D_TRANS_CACHE_HIT_S2L0_WALK",
+ "BriefDescription": "DTLB Translation cache hit on S2L0 walk cache entry"
+ },
+ {
+ "PublicDescription": "D-side S1 Page walk cache lookup",
+ "EventCode": "0xD807",
+ "EventName": "MMU_D_S1_WALK_CACHE_LOOKUP",
+ "BriefDescription": "D-side S1 Page walk cache lookup"
+ },
+ {
+ "PublicDescription": "D-side S1 Page walk cache refill",
+ "EventCode": "0xD808",
+ "EventName": "MMU_D_S1_WALK_CACHE_REFILL",
+ "BriefDescription": "D-side S1 Page walk cache refill"
+ },
+ {
+ "PublicDescription": "D-side S2 Page walk cache lookup",
+ "EventCode": "0xD809",
+ "EventName": "MMU_D_S2_WALK_CACHE_LOOKUP",
+ "BriefDescription": "D-side S2 Page walk cache lookup"
+ },
+ {
+ "PublicDescription": "D-side S2 Page walk cache refill",
+ "EventCode": "0xD80A",
+ "EventName": "MMU_D_S2_WALK_CACHE_REFILL",
+ "BriefDescription": "D-side S2 Page walk cache refill"
+ },
+ {
+ "PublicDescription": "D-side Stage1 tablewalk fault",
+ "EventCode": "0xD80B",
+ "EventName": "MMU_D_S1_WALK_FAULT",
+ "BriefDescription": "D-side Stage1 tablewalk fault"
+ },
+ {
+ "PublicDescription": "D-side Stage2 tablewalk fault",
+ "EventCode": "0xD80C",
+ "EventName": "MMU_D_S2_WALK_FAULT",
+ "BriefDescription": "D-side Stage2 tablewalk fault"
+ },
+ {
+ "PublicDescription": "D-side Tablewalk steps or descriptor fetches",
+ "EventCode": "0xD80D",
+ "EventName": "MMU_D_WALK_STEPS",
+ "BriefDescription": "D-side Tablewalk steps or descriptor fetches"
+ },
+ {
+ "PublicDescription": "L2I OTB allocate",
+ "EventCode": "0xD900",
+ "EventName": "MMU_I_OTB_ALLOC",
+ "BriefDescription": "L2I OTB allocate"
+ },
+ {
+ "PublicDescription": "ITLB Translation cache hit on S1L2 walk cache entry",
+ "EventCode": "0xD901",
+ "EventName": "MMU_I_TRANS_CACHE_HIT_S1L2_WALK",
+ "BriefDescription": "ITLB Translation cache hit on S1L2 walk cache entry"
+ },
+ {
+ "PublicDescription": "ITLB Translation cache hit on S1L1 walk cache entry",
+ "EventCode": "0xD902",
+ "EventName": "MMU_I_TRANS_CACHE_HIT_S1L1_WALK",
+ "BriefDescription": "ITLB Translation cache hit on S1L1 walk cache entry"
+ },
+ {
+ "PublicDescription": "ITLB Translation cache hit on S1L0 walk cache entry",
+ "EventCode": "0xD903",
+ "EventName": "MMU_I_TRANS_CACHE_HIT_S1L0_WALK",
+ "BriefDescription": "ITLB Translation cache hit on S1L0 walk cache entry"
+ },
+ {
+ "PublicDescription": "ITLB Translation cache hit on S2L2 walk cache entry",
+ "EventCode": "0xD904",
+ "EventName": "MMU_I_TRANS_CACHE_HIT_S2L2_WALK",
+ "BriefDescription": "ITLB Translation cache hit on S2L2 walk cache entry"
+ },
+ {
+ "PublicDescription": "ITLB Translation cache hit on S2L1 walk cache entry",
+ "EventCode": "0xD905",
+ "EventName": "MMU_I_TRANS_CACHE_HIT_S2L1_WALK",
+ "BriefDescription": "ITLB Translation cache hit on S2L1 walk cache entry"
+ },
+ {
+ "PublicDescription": "ITLB Translation cache hit on S2L0 walk cache entry",
+ "EventCode": "0xD906",
+ "EventName": "MMU_I_TRANS_CACHE_HIT_S2L0_WALK",
+ "BriefDescription": "ITLB Translation cache hit on S2L0 walk cache entry"
+ },
+ {
+ "PublicDescription": "I-side S1 Page walk cache lookup",
+ "EventCode": "0xD907",
+ "EventName": "MMU_I_S1_WALK_CACHE_LOOKUP",
+ "BriefDescription": "I-side S1 Page walk cache lookup"
+ },
+ {
+ "PublicDescription": "I-side S1 Page walk cache refill",
+ "EventCode": "0xD908",
+ "EventName": "MMU_I_S1_WALK_CACHE_REFILL",
+ "BriefDescription": "I-side S1 Page walk cache refill"
+ },
+ {
+ "PublicDescription": "I-side S2 Page walk cache lookup",
+ "EventCode": "0xD909",
+ "EventName": "MMU_I_S2_WALK_CACHE_LOOKUP",
+ "BriefDescription": "I-side S2 Page walk cache lookup"
+ },
+ {
+ "PublicDescription": "I-side S2 Page walk cache refill",
+ "EventCode": "0xD90A",
+ "EventName": "MMU_I_S2_WALK_CACHE_REFILL",
+ "BriefDescription": "I-side S2 Page walk cache refill"
+ },
+ {
+ "PublicDescription": "I-side Stage1 tablewalk fault",
+ "EventCode": "0xD90B",
+ "EventName": "MMU_I_S1_WALK_FAULT",
+ "BriefDescription": "I-side Stage1 tablewalk fault"
+ },
+ {
+ "PublicDescription": "I-side Stage2 tablewalk fault",
+ "EventCode": "0xD90C",
+ "EventName": "MMU_I_S2_WALK_FAULT",
+ "BriefDescription": "I-side Stage2 tablewalk fault"
+ },
+ {
+ "PublicDescription": "I-side Tablewalk steps or descriptor fetches",
+ "EventCode": "0xD90D",
+ "EventName": "MMU_I_WALK_STEPS",
+ "BriefDescription": "I-side Tablewalk steps or descriptor fetches"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereone/exception.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/exception.json
new file mode 100644
index 000000000000..ada052e19632
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/exception.json
@@ -0,0 +1,44 @@
+[
+ {
+ "ArchStdEvent": "EXC_UNDEF"
+ },
+ {
+ "ArchStdEvent": "EXC_SVC"
+ },
+ {
+ "ArchStdEvent": "EXC_PABORT"
+ },
+ {
+ "ArchStdEvent": "EXC_DABORT"
+ },
+ {
+ "ArchStdEvent": "EXC_IRQ"
+ },
+ {
+ "ArchStdEvent": "EXC_FIQ"
+ },
+ {
+ "ArchStdEvent": "EXC_HVC"
+ },
+ {
+ "ArchStdEvent": "EXC_TRAP_PABORT"
+ },
+ {
+ "ArchStdEvent": "EXC_TRAP_DABORT"
+ },
+ {
+ "ArchStdEvent": "EXC_TRAP_OTHER"
+ },
+ {
+ "ArchStdEvent": "EXC_TRAP_IRQ"
+ },
+ {
+ "ArchStdEvent": "EXC_TRAP_FIQ"
+ },
+ {
+ "ArchStdEvent": "EXC_TAKEN"
+ },
+ {
+ "ArchStdEvent": "EXC_RETURN"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereone/instruction.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/instruction.json
new file mode 100644
index 000000000000..18d1f2f76a23
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/instruction.json
@@ -0,0 +1,89 @@
+[
+ {
+ "ArchStdEvent": "SW_INCR"
+ },
+ {
+ "ArchStdEvent": "ST_RETIRED"
+ },
+ {
+ "ArchStdEvent": "OP_SPEC"
+ },
+ {
+ "ArchStdEvent": "LD_SPEC"
+ },
+ {
+ "ArchStdEvent": "ST_SPEC"
+ },
+ {
+ "ArchStdEvent": "LDST_SPEC"
+ },
+ {
+ "ArchStdEvent": "DP_SPEC"
+ },
+ {
+ "ArchStdEvent": "ASE_SPEC"
+ },
+ {
+ "ArchStdEvent": "VFP_SPEC"
+ },
+ {
+ "ArchStdEvent": "PC_WRITE_SPEC"
+ },
+ {
+ "ArchStdEvent": "BR_IMMED_RETIRED"
+ },
+ {
+ "ArchStdEvent": "BR_RETURN_RETIRED"
+ },
+ {
+ "ArchStdEvent": "CRYPTO_SPEC"
+ },
+ {
+ "ArchStdEvent": "ISB_SPEC"
+ },
+ {
+ "ArchStdEvent": "DSB_SPEC"
+ },
+ {
+ "ArchStdEvent": "DMB_SPEC"
+ },
+ {
+ "ArchStdEvent": "RC_LD_SPEC"
+ },
+ {
+ "ArchStdEvent": "RC_ST_SPEC"
+ },
+ {
+ "ArchStdEvent": "INST_RETIRED"
+ },
+ {
+ "ArchStdEvent": "CID_WRITE_RETIRED"
+ },
+ {
+ "ArchStdEvent": "PC_WRITE_RETIRED"
+ },
+ {
+ "ArchStdEvent": "INST_SPEC"
+ },
+ {
+ "ArchStdEvent": "TTBR_WRITE_RETIRED"
+ },
+ {
+ "ArchStdEvent": "BR_RETIRED"
+ },
+ {
+ "ArchStdEvent": "BR_MIS_PRED_RETIRED"
+ },
+ {
+ "ArchStdEvent": "OP_RETIRED"
+ },
+ {
+ "ArchStdEvent": "OP_SPEC"
+ },
+ {
+ "PublicDescription": "Operation speculatively executed, NOP",
+ "EventCode": "0x100",
+ "EventName": "NOP_SPEC",
+ "BriefDescription": "Speculatively executed, NOP"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereone/intrinsic.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/intrinsic.json
new file mode 100644
index 000000000000..7ecffb989ae0
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/intrinsic.json
@@ -0,0 +1,14 @@
+[
+ {
+ "ArchStdEvent": "LDREX_SPEC"
+ },
+ {
+ "ArchStdEvent": "STREX_PASS_SPEC"
+ },
+ {
+ "ArchStdEvent": "STREX_FAIL_SPEC"
+ },
+ {
+ "ArchStdEvent": "STREX_SPEC"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereone/memory.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/memory.json
new file mode 100644
index 000000000000..0711782bfa6b
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/memory.json
@@ -0,0 +1,44 @@
+[
+ {
+ "ArchStdEvent": "LD_RETIRED"
+ },
+ {
+ "ArchStdEvent": "MEM_ACCESS_RD"
+ },
+ {
+ "ArchStdEvent": "MEM_ACCESS_WR"
+ },
+ {
+ "ArchStdEvent": "UNALIGNED_LD_SPEC"
+ },
+ {
+ "ArchStdEvent": "UNALIGNED_ST_SPEC"
+ },
+ {
+ "ArchStdEvent": "UNALIGNED_LDST_SPEC"
+ },
+ {
+ "ArchStdEvent": "LD_ALIGN_LAT"
+ },
+ {
+ "ArchStdEvent": "ST_ALIGN_LAT"
+ },
+ {
+ "ArchStdEvent": "MEM_ACCESS"
+ },
+ {
+ "ArchStdEvent": "MEMORY_ERROR"
+ },
+ {
+ "ArchStdEvent": "LDST_ALIGN_LAT"
+ },
+ {
+ "ArchStdEvent": "MEM_ACCESS_CHECKED"
+ },
+ {
+ "ArchStdEvent": "MEM_ACCESS_CHECKED_RD"
+ },
+ {
+ "ArchStdEvent": "MEM_ACCESS_CHECKED_WR"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereone/pipeline.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/pipeline.json
new file mode 100644
index 000000000000..f9fae15f7555
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/pipeline.json
@@ -0,0 +1,23 @@
+[
+ {
+ "ArchStdEvent": "STALL_FRONTEND"
+ },
+ {
+ "ArchStdEvent": "STALL_BACKEND"
+ },
+ {
+ "ArchStdEvent": "STALL"
+ },
+ {
+ "ArchStdEvent": "STALL_SLOT_BACKEND"
+ },
+ {
+ "ArchStdEvent": "STALL_SLOT_FRONTEND"
+ },
+ {
+ "ArchStdEvent": "STALL_SLOT"
+ },
+ {
+ "ArchStdEvent": "STALL_BACKEND_MEM"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereone/spe.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/spe.json
new file mode 100644
index 000000000000..20f2165c85fe
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/spe.json
@@ -0,0 +1,14 @@
+[
+ {
+ "ArchStdEvent": "SAMPLE_POP"
+ },
+ {
+ "ArchStdEvent": "SAMPLE_FEED"
+ },
+ {
+ "ArchStdEvent": "SAMPLE_FILTRATE"
+ },
+ {
+ "ArchStdEvent": "SAMPLE_COLLISION"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/metrics.json b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/metrics.json
index 6443a061e22a..6463531b9941 100644
--- a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/metrics.json
+++ b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/metrics.json
@@ -3,28 +3,32 @@
"MetricExpr": "FETCH_BUBBLE / (4 * CPU_CYCLES)",
"PublicDescription": "Frontend bound L1 topdown metric",
"BriefDescription": "Frontend bound L1 topdown metric",
- "MetricGroup": "TopDownL1",
+ "DefaultMetricgroupName": "TopDownL1",
+ "MetricGroup": "Default;TopDownL1",
"MetricName": "frontend_bound"
},
{
"MetricExpr": "(INST_SPEC - INST_RETIRED) / (4 * CPU_CYCLES)",
"PublicDescription": "Bad Speculation L1 topdown metric",
"BriefDescription": "Bad Speculation L1 topdown metric",
- "MetricGroup": "TopDownL1",
+ "DefaultMetricgroupName": "TopDownL1",
+ "MetricGroup": "Default;TopDownL1",
"MetricName": "bad_speculation"
},
{
"MetricExpr": "INST_RETIRED / (CPU_CYCLES * 4)",
"PublicDescription": "Retiring L1 topdown metric",
"BriefDescription": "Retiring L1 topdown metric",
- "MetricGroup": "TopDownL1",
+ "DefaultMetricgroupName": "TopDownL1",
+ "MetricGroup": "Default;TopDownL1",
"MetricName": "retiring"
},
{
"MetricExpr": "1 - (frontend_bound + bad_speculation + retiring)",
"PublicDescription": "Backend Bound L1 topdown metric",
"BriefDescription": "Backend Bound L1 topdown metric",
- "MetricGroup": "TopDownL1",
+ "DefaultMetricgroupName": "TopDownL1",
+ "MetricGroup": "Default;TopDownL1",
"MetricName": "backend_bound"
},
{
diff --git a/tools/perf/pmu-events/arch/arm64/mapfile.csv b/tools/perf/pmu-events/arch/arm64/mapfile.csv
index 9d400785d195..32674ddd2b63 100644
--- a/tools/perf/pmu-events/arch/arm64/mapfile.csv
+++ b/tools/perf/pmu-events/arch/arm64/mapfile.csv
@@ -41,3 +41,4 @@
0x00000000460f0010,v1,fujitsu/a64fx,core
0x00000000480fd010,v1,hisilicon/hip08,core
0x00000000500f0000,v1,ampere/emag,core
+0x00000000c00fac30,v1,ampere/ampereone,core
diff --git a/tools/perf/pmu-events/arch/arm64/sbsa.json b/tools/perf/pmu-events/arch/arm64/sbsa.json
index f678c37ea9c3..f90b338261ac 100644
--- a/tools/perf/pmu-events/arch/arm64/sbsa.json
+++ b/tools/perf/pmu-events/arch/arm64/sbsa.json
@@ -2,28 +2,32 @@
{
"MetricExpr": "stall_slot_frontend / (#slots * cpu_cycles)",
"BriefDescription": "Frontend bound L1 topdown metric",
- "MetricGroup": "TopdownL1",
+ "DefaultMetricgroupName": "TopdownL1",
+ "MetricGroup": "Default;TopdownL1",
"MetricName": "frontend_bound",
"ScaleUnit": "100%"
},
{
"MetricExpr": "(1 - op_retired / op_spec) * (1 - stall_slot / (#slots * cpu_cycles))",
"BriefDescription": "Bad speculation L1 topdown metric",
- "MetricGroup": "TopdownL1",
+ "DefaultMetricgroupName": "TopdownL1",
+ "MetricGroup": "Default;TopdownL1",
"MetricName": "bad_speculation",
"ScaleUnit": "100%"
},
{
"MetricExpr": "(op_retired / op_spec) * (1 - stall_slot / (#slots * cpu_cycles))",
"BriefDescription": "Retiring L1 topdown metric",
- "MetricGroup": "TopdownL1",
+ "DefaultMetricgroupName": "TopdownL1",
+ "MetricGroup": "Default;TopdownL1",
"MetricName": "retiring",
"ScaleUnit": "100%"
},
{
"MetricExpr": "stall_slot_backend / (#slots * cpu_cycles)",
"BriefDescription": "Backend Bound L1 topdown metric",
- "MetricGroup": "TopdownL1",
+ "DefaultMetricgroupName": "TopdownL1",
+ "MetricGroup": "Default;TopdownL1",
"MetricName": "backend_bound",
"ScaleUnit": "100%"
}
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/cache.json b/tools/perf/pmu-events/arch/powerpc/power10/cache.json
index 605be14f441c..839ae26945fb 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/cache.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/cache.json
@@ -1,53 +1,8 @@
[
{
- "EventCode": "0x1003C",
- "EventName": "PM_EXEC_STALL_DMISS_L2L3",
- "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from either the local L2 or local L3."
- },
- {
- "EventCode": "0x1E054",
- "EventName": "PM_EXEC_STALL_DMISS_L21_L31",
- "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from another core's L2 or L3 on the same chip."
- },
- {
- "EventCode": "0x34054",
- "EventName": "PM_EXEC_STALL_DMISS_L2L3_NOCONFLICT",
- "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from the local L2 or local L3, without a dispatch conflict."
- },
- {
- "EventCode": "0x34056",
- "EventName": "PM_EXEC_STALL_LOAD_FINISH",
- "BriefDescription": "Cycles in which the oldest instruction in the pipeline was finishing a load after its data was reloaded from a data source beyond the local L1; cycles in which the LSU was processing an L1-hit; cycles in which the NTF instruction merged with another load in the LMQ; cycles in which the NTF instruction is waiting for a data reload for a load miss, but the data comes back with a non-NTF instruction."
- },
- {
- "EventCode": "0x3006C",
- "EventName": "PM_RUN_CYC_SMT2_MODE",
- "BriefDescription": "Cycles when this thread's run latch is set and the core is in SMT2 mode."
- },
- {
"EventCode": "0x300F4",
"EventName": "PM_RUN_INST_CMPL_CONC",
- "BriefDescription": "PowerPC instructions completed by this thread when all threads in the core had the run-latch set."
- },
- {
- "EventCode": "0x4C016",
- "EventName": "PM_EXEC_STALL_DMISS_L2L3_CONFLICT",
- "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from the local L2 or local L3, with a dispatch conflict."
- },
- {
- "EventCode": "0x4D014",
- "EventName": "PM_EXEC_STALL_LOAD",
- "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a load instruction executing in the Load Store Unit."
- },
- {
- "EventCode": "0x4D016",
- "EventName": "PM_EXEC_STALL_PTESYNC",
- "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a PTESYNC instruction executing in the Load Store Unit."
- },
- {
- "EventCode": "0x401EA",
- "EventName": "PM_THRESH_EXC_128",
- "BriefDescription": "Threshold counter exceeded a value of 128."
+ "BriefDescription": "PowerPC instruction completed by this thread when all threads in the core had the run-latch set."
},
{
"EventCode": "0x400F6",
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/floating_point.json b/tools/perf/pmu-events/arch/powerpc/power10/floating_point.json
index 54acb55e2c8c..e816cd10c129 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/floating_point.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/floating_point.json
@@ -1,7 +1,67 @@
[
{
- "EventCode": "0x4016E",
- "EventName": "PM_THRESH_NOT_MET",
- "BriefDescription": "Threshold counter did not meet threshold."
+ "EventCode": "0x100F4",
+ "EventName": "PM_FLOP_CMPL",
+ "BriefDescription": "Floating Point Operations Completed. Includes any type. It counts once for each 1, 2, 4 or 8 flop instruction. Use PM_1|2|4|8_FLOP_CMPL events to count flops."
+ },
+ {
+ "EventCode": "0x45050",
+ "EventName": "PM_1FLOP_CMPL",
+ "BriefDescription": "One floating point instruction completed (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg)."
+ },
+ {
+ "EventCode": "0x45052",
+ "EventName": "PM_4FLOP_CMPL",
+ "BriefDescription": "Four floating point instruction completed (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg)."
+ },
+ {
+ "EventCode": "0x45054",
+ "EventName": "PM_FMA_CMPL",
+ "BriefDescription": "Two floating point instruction completed (FMA class of instructions: fmadd, fnmadd, fmsub, fnmsub). Scalar instructions only."
+ },
+ {
+ "EventCode": "0x45056",
+ "EventName": "PM_SCALAR_FLOP_CMPL",
+ "BriefDescription": "Scalar floating point instruction completed."
+ },
+ {
+ "EventCode": "0x4505A",
+ "EventName": "PM_SP_FLOP_CMPL",
+ "BriefDescription": "Single Precision floating point instruction completed."
+ },
+ {
+ "EventCode": "0x4505C",
+ "EventName": "PM_MATH_FLOP_CMPL",
+ "BriefDescription": "Math floating point instruction completed."
+ },
+ {
+ "EventCode": "0x4D052",
+ "EventName": "PM_2FLOP_CMPL",
+ "BriefDescription": "Double Precision vector version of fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg completed."
+ },
+ {
+ "EventCode": "0x4D054",
+ "EventName": "PM_8FLOP_CMPL",
+ "BriefDescription": "Four Double Precision vector instruction completed."
+ },
+ {
+ "EventCode": "0x4D056",
+ "EventName": "PM_NON_FMA_FLOP_CMPL",
+ "BriefDescription": "Non FMA instruction completed."
+ },
+ {
+ "EventCode": "0x4D058",
+ "EventName": "PM_VECTOR_FLOP_CMPL",
+ "BriefDescription": "Vector floating point instruction completed."
+ },
+ {
+ "EventCode": "0x4D05A",
+ "EventName": "PM_NON_MATH_FLOP_CMPL",
+ "BriefDescription": "Non Math instruction completed."
+ },
+ {
+ "EventCode": "0x4D05C",
+ "EventName": "PM_DPP_FLOP_CMPL",
+ "BriefDescription": "Double-Precision or Quad-Precision instruction completed."
}
]
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/frontend.json b/tools/perf/pmu-events/arch/powerpc/power10/frontend.json
index 558f9530f54e..dc0bb6c6338b 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/frontend.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/frontend.json
@@ -1,65 +1,10 @@
[
{
- "EventCode": "0x10004",
- "EventName": "PM_EXEC_STALL_TRANSLATION",
- "BriefDescription": "Cycles in which the oldest instruction in the pipeline suffered a TLB miss or ERAT miss and waited for it to resolve."
- },
- {
- "EventCode": "0x10006",
- "EventName": "PM_DISP_STALL_HELD_OTHER_CYC",
- "BriefDescription": "Cycles in which the NTC instruction is held at dispatch for any other reason."
- },
- {
- "EventCode": "0x10010",
- "EventName": "PM_PMC4_OVERFLOW",
- "BriefDescription": "The event selected for PMC4 caused the event counter to overflow."
- },
- {
- "EventCode": "0x10020",
- "EventName": "PM_PMC4_REWIND",
- "BriefDescription": "The speculative event selected for PMC4 rewinds and the counter for PMC4 is not charged."
- },
- {
- "EventCode": "0x10038",
- "EventName": "PM_DISP_STALL_TRANSLATION",
- "BriefDescription": "Cycles when dispatch was stalled for this thread because the MMU was handling a translation miss."
- },
- {
- "EventCode": "0x1003A",
- "EventName": "PM_DISP_STALL_BR_MPRED_IC_L2",
- "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L2 after suffering a branch mispredict."
- },
- {
- "EventCode": "0x1D05E",
- "EventName": "PM_DISP_STALL_HELD_HALT_CYC",
- "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because of power management."
- },
- {
- "EventCode": "0x1E050",
- "EventName": "PM_DISP_STALL_HELD_STF_MAPPER_CYC",
- "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because the STF mapper/SRB was full. Includes GPR (count, link, tar), VSR, VMR, FPR."
- },
- {
"EventCode": "0x1F054",
"EventName": "PM_DTLB_HIT",
"BriefDescription": "The PTE required by the instruction was resident in the TLB (data TLB access). When MMCR1[16]=0 this event counts only demand hits. When MMCR1[16]=1 this event includes demand and prefetch. Applies to both HPT and RPT."
},
{
- "EventCode": "0x10064",
- "EventName": "PM_DISP_STALL_IC_L2",
- "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L2."
- },
- {
- "EventCode": "0x101E8",
- "EventName": "PM_THRESH_EXC_256",
- "BriefDescription": "Threshold counter exceeded a count of 256."
- },
- {
- "EventCode": "0x101EC",
- "EventName": "PM_THRESH_MET",
- "BriefDescription": "Threshold exceeded."
- },
- {
"EventCode": "0x100F2",
"EventName": "PM_1PLUS_PPC_CMPL",
"BriefDescription": "Cycles in which at least one instruction is completed by this thread."
@@ -67,57 +12,7 @@
{
"EventCode": "0x100F6",
"EventName": "PM_IERAT_MISS",
- "BriefDescription": "IERAT Reloaded to satisfy an IERAT miss. All page sizes are counted by this event."
- },
- {
- "EventCode": "0x100F8",
- "EventName": "PM_DISP_STALL_CYC",
- "BriefDescription": "Cycles the ICT has no itags assigned to this thread (no instructions were dispatched during these cycles)."
- },
- {
- "EventCode": "0x20006",
- "EventName": "PM_DISP_STALL_HELD_ISSQ_FULL_CYC",
- "BriefDescription": "Cycles in which the NTC instruction is held at dispatch due to Issue queue full. Includes issue queue and branch queue."
- },
- {
- "EventCode": "0x20114",
- "EventName": "PM_MRK_L2_RC_DISP",
- "BriefDescription": "Marked instruction RC dispatched in L2."
- },
- {
- "EventCode": "0x2C010",
- "EventName": "PM_EXEC_STALL_LSU",
- "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in the Load Store Unit. This does not include simple fixed point instructions."
- },
- {
- "EventCode": "0x2C016",
- "EventName": "PM_DISP_STALL_IERAT_ONLY_MISS",
- "BriefDescription": "Cycles when dispatch was stalled while waiting to resolve an instruction ERAT miss."
- },
- {
- "EventCode": "0x2C01E",
- "EventName": "PM_DISP_STALL_BR_MPRED_IC_L3",
- "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L3 after suffering a branch mispredict."
- },
- {
- "EventCode": "0x2D01A",
- "EventName": "PM_DISP_STALL_IC_MISS",
- "BriefDescription": "Cycles when dispatch was stalled for this thread due to an Icache Miss."
- },
- {
- "EventCode": "0x2E018",
- "EventName": "PM_DISP_STALL_FETCH",
- "BriefDescription": "Cycles when dispatch was stalled for this thread because Fetch was being held."
- },
- {
- "EventCode": "0x2E01A",
- "EventName": "PM_DISP_STALL_HELD_XVFC_MAPPER_CYC",
- "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because the XVFC mapper/SRB was full."
- },
- {
- "EventCode": "0x2C142",
- "EventName": "PM_MRK_XFER_FROM_SRC_PMC2",
- "BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[15:27]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
+ "BriefDescription": "IERAT Reloaded to satisfy an IERAT miss. All page sizes are counted by this event. This event only counts instruction demand access."
},
{
"EventCode": "0x24050",
@@ -135,11 +30,6 @@
"BriefDescription": "Branch Taken instruction completed."
},
{
- "EventCode": "0x30004",
- "EventName": "PM_DISP_STALL_FLUSH",
- "BriefDescription": "Cycles when dispatch was stalled because of a flush that happened to an instruction(s) that was not yet NTC. PM_EXEC_STALL_NTC_FLUSH only includes instructions that were flushed after becoming NTC."
- },
- {
"EventCode": "0x3000A",
"EventName": "PM_DISP_STALL_ITLB_MISS",
"BriefDescription": "Cycles when dispatch was stalled while waiting to resolve an instruction TLB miss."
@@ -150,59 +40,19 @@
"BriefDescription": "The instruction that was next to complete (oldest in the pipeline) did not complete because it suffered a flush."
},
{
- "EventCode": "0x30014",
- "EventName": "PM_EXEC_STALL_STORE",
- "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a store instruction executing in the Load Store Unit."
- },
- {
- "EventCode": "0x30018",
- "EventName": "PM_DISP_STALL_HELD_SCOREBOARD_CYC",
- "BriefDescription": "Cycles in which the NTC instruction is held at dispatch while waiting on the Scoreboard. This event combines VSCR and FPSCR together."
- },
- {
- "EventCode": "0x30026",
- "EventName": "PM_EXEC_STALL_STORE_MISS",
- "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a store whose cache line was not resident in the L1 and was waiting for allocation of the missing line into the L1."
- },
- {
- "EventCode": "0x3012A",
- "EventName": "PM_MRK_L2_RC_DONE",
- "BriefDescription": "L2 RC machine completed the transaction for the marked instruction."
- },
- {
"EventCode": "0x3F046",
"EventName": "PM_ITLB_HIT_1G",
"BriefDescription": "Instruction TLB hit (IERAT reload) page size 1G, which implies Radix Page Table translation is in use. When MMCR1[17]=0 this event counts only for demand misses. When MMCR1[17]=1 this event includes demand misses and prefetches."
},
{
- "EventCode": "0x34058",
- "EventName": "PM_DISP_STALL_BR_MPRED_ICMISS",
- "BriefDescription": "Cycles when dispatch was stalled after a mispredicted branch resulted in an instruction cache miss."
- },
- {
- "EventCode": "0x3D05C",
- "EventName": "PM_DISP_STALL_HELD_RENAME_CYC",
- "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because the mapper/SRB was full. Includes GPR (count, link, tar), VSR, VMR, FPR and XVFC."
- },
- {
- "EventCode": "0x3E052",
- "EventName": "PM_DISP_STALL_IC_L3",
- "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L3."
- },
- {
"EventCode": "0x3E054",
"EventName": "PM_LD_MISS_L1",
- "BriefDescription": "Load Missed L1, counted at execution time (can be greater than loads finished). LMQ merges are not included in this count. i.e. if a load instruction misses on an address that is already allocated on the LMQ, this event will not increment for that load). Note that this count is per slice, so if a load spans multiple slices this event will increment multiple times for a single load."
- },
- {
- "EventCode": "0x301EA",
- "EventName": "PM_THRESH_EXC_1024",
- "BriefDescription": "Threshold counter exceeded a value of 1024."
+ "BriefDescription": "Load missed L1, counted at finish time. LMQ merges are not included in this count. i.e. if a load instruction misses on an address that is already allocated on the LMQ, this event will not increment for that load). Note that this count is per slice, so if a load spans multiple slices this event will increment multiple times for a single load."
},
{
"EventCode": "0x300FA",
"EventName": "PM_INST_FROM_L3MISS",
- "BriefDescription": "The processor's instruction cache was reloaded from a source other than the local core's L1, L2, or L3 due to a demand miss."
+ "BriefDescription": "The processor's instruction cache was reloaded from beyond the local core's L3 due to a demand miss."
},
{
"EventCode": "0x40006",
@@ -210,38 +60,8 @@
"BriefDescription": "Cycles in which an instruction or group of instructions were cancelled after being issued. This event increments once per occurrence, regardless of how many instructions are included in the issue group."
},
{
- "EventCode": "0x40116",
- "EventName": "PM_MRK_LARX_FIN",
- "BriefDescription": "Marked load and reserve instruction (LARX) finished. LARX and STCX are instructions used to acquire a lock."
- },
- {
- "EventCode": "0x4C010",
- "EventName": "PM_DISP_STALL_BR_MPRED_IC_L3MISS",
- "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from sources beyond the local L3 after suffering a mispredicted branch."
- },
- {
- "EventCode": "0x4D01E",
- "EventName": "PM_DISP_STALL_BR_MPRED",
- "BriefDescription": "Cycles when dispatch was stalled for this thread due to a mispredicted branch."
- },
- {
- "EventCode": "0x4E010",
- "EventName": "PM_DISP_STALL_IC_L3MISS",
- "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from any source beyond the local L3."
- },
- {
- "EventCode": "0x4E01A",
- "EventName": "PM_DISP_STALL_HELD_CYC",
- "BriefDescription": "Cycles in which the NTC instruction is held at dispatch for any reason."
- },
- {
- "EventCode": "0x4003C",
- "EventName": "PM_DISP_STALL_HELD_SYNC_CYC",
- "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because of a synchronizing instruction that requires the ICT to be empty before dispatch."
- },
- {
"EventCode": "0x44056",
"EventName": "PM_VECTOR_ST_CMPL",
- "BriefDescription": "Vector store instructions completed."
+ "BriefDescription": "Vector store instruction completed."
}
]
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/marked.json b/tools/perf/pmu-events/arch/powerpc/power10/marked.json
index 58b5dfe3a273..913b6515b870 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/marked.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/marked.json
@@ -1,15 +1,30 @@
[
{
- "EventCode": "0x1002C",
- "EventName": "PM_LD_PREFETCH_CACHE_LINE_MISS",
- "BriefDescription": "The L1 cache was reloaded with a line that fulfills a prefetch request."
- },
- {
"EventCode": "0x10132",
"EventName": "PM_MRK_INST_ISSUED",
"BriefDescription": "Marked instruction issued. Note that stores always get issued twice, the address gets issued to the LSU and the data gets issued to the VSU. Also, issues can sometimes get killed/cancelled and cause multiple sequential issues for the same instruction."
},
{
+ "EventCode": "0x10134",
+ "EventName": "PM_MRK_ST_DONE_L2",
+ "BriefDescription": "Marked store completed in L2."
+ },
+ {
+ "EventCode": "0x1C142",
+ "EventName": "PM_MRK_XFER_FROM_SRC_PMC1",
+ "BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[0:12]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
+ },
+ {
+ "EventCode": "0x1C144",
+ "EventName": "PM_MRK_XFER_FROM_SRC_CYC_PMC1",
+ "BriefDescription": "Cycles taken for a marked demand miss to reload a line from the source specified in MMCR3[0:12]."
+ },
+ {
+ "EventCode": "0x1F150",
+ "EventName": "PM_MRK_ST_L2_CYC",
+ "BriefDescription": "Cycles from L2 RC dispatch to L2 RC completion."
+ },
+ {
"EventCode": "0x101E0",
"EventName": "PM_MRK_INST_DISP",
"BriefDescription": "The thread has dispatched a randomly sampled marked instruction."
@@ -20,14 +35,39 @@
"BriefDescription": "Marked Branch Taken instruction completed."
},
{
- "EventCode": "0x20112",
- "EventName": "PM_MRK_NTF_FIN",
- "BriefDescription": "The marked instruction became the oldest in the pipeline before it finished. It excludes instructions that finish at dispatch."
+ "EventCode": "0x101E4",
+ "EventName": "PM_MRK_L1_ICACHE_MISS",
+ "BriefDescription": "Marked instruction suffered an instruction cache miss."
+ },
+ {
+ "EventCode": "0x101EA",
+ "EventName": "PM_MRK_L1_RELOAD_VALID",
+ "BriefDescription": "Marked demand reload."
+ },
+ {
+ "EventCode": "0x20114",
+ "EventName": "PM_MRK_L2_RC_DISP",
+ "BriefDescription": "Marked instruction RC dispatched in L2."
},
{
- "EventCode": "0x2C01C",
- "EventName": "PM_EXEC_STALL_DMISS_OFF_CHIP",
- "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from a remote chip."
+ "EventCode": "0x2011C",
+ "EventName": "PM_MRK_NTF_CYC",
+ "BriefDescription": "Cycles in which the marked instruction is the oldest in the pipeline (next-to-finish or next-to-complete)."
+ },
+ {
+ "EventCode": "0x20130",
+ "EventName": "PM_MRK_INST_DECODED",
+ "BriefDescription": "An instruction was marked at decode time. Random Instruction Sampling (RIS) only."
+ },
+ {
+ "EventCode": "0x20132",
+ "EventName": "PM_MRK_DFU_ISSUE",
+ "BriefDescription": "The marked instruction was a decimal floating point operation issued to the VSU. Measured at issue time."
+ },
+ {
+ "EventCode": "0x20134",
+ "EventName": "PM_MRK_FXU_ISSUE",
+ "BriefDescription": "The marked instruction was a fixed point operation issued to the VSU. Measured at issue time."
},
{
"EventCode": "0x20138",
@@ -40,6 +80,16 @@
"BriefDescription": "Marked Branch instruction finished."
},
{
+ "EventCode": "0x2013C",
+ "EventName": "PM_MRK_FX_LSU_FIN",
+ "BriefDescription": "The marked instruction was simple fixed point that was issued to the store unit. Measured at finish time."
+ },
+ {
+ "EventCode": "0x2C142",
+ "EventName": "PM_MRK_XFER_FROM_SRC_PMC2",
+ "BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[15:27]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
+ },
+ {
"EventCode": "0x2C144",
"EventName": "PM_MRK_XFER_FROM_SRC_CYC_PMC2",
"BriefDescription": "Cycles taken for a marked demand miss to reload a line from the source specified in MMCR3[15:27]."
@@ -60,19 +110,49 @@
"BriefDescription": "A marked branch completed. All branches are included."
},
{
- "EventCode": "0x200FD",
- "EventName": "PM_L1_ICACHE_MISS",
- "BriefDescription": "Demand iCache Miss."
+ "EventCode": "0x2D154",
+ "EventName": "PM_MRK_DERAT_MISS_64K",
+ "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 64K for a marked instruction. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
+ },
+ {
+ "EventCode": "0x201E0",
+ "EventName": "PM_MRK_DATA_FROM_MEMORY",
+ "BriefDescription": "The processor's data cache was reloaded from local, remote, or distant memory due to a demand miss for a marked load."
+ },
+ {
+ "EventCode": "0x201E2",
+ "EventName": "PM_MRK_LD_MISS_L1",
+ "BriefDescription": "Marked demand data load miss counted at finish time."
},
{
- "EventCode": "0x30130",
- "EventName": "PM_MRK_INST_FIN",
- "BriefDescription": "marked instruction finished. Excludes instructions that finish at dispatch. Note that stores always finish twice since the address gets issued to the LSU and the data gets issued to the VSU."
+ "EventCode": "0x201E4",
+ "EventName": "PM_MRK_DATA_FROM_L3MISS",
+ "BriefDescription": "The processor's data cache was reloaded from a source other than the local core's L1, L2, or L3 due to a demand miss for a marked load."
+ },
+ {
+ "EventCode": "0x3012A",
+ "EventName": "PM_MRK_L2_RC_DONE",
+ "BriefDescription": "L2 RC machine completed the transaction for the marked instruction."
+ },
+ {
+ "EventCode": "0x30132",
+ "EventName": "PM_MRK_VSU_FIN",
+ "BriefDescription": "VSU marked instruction finished. Excludes simple FX instructions issued to the Store Unit."
},
{
"EventCode": "0x34146",
"EventName": "PM_MRK_LD_CMPL",
- "BriefDescription": "Marked loads completed."
+ "BriefDescription": "Marked load instruction completed."
+ },
+ {
+ "EventCode": "0x3C142",
+ "EventName": "PM_MRK_XFER_FROM_SRC_PMC3",
+ "BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[30:42]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
+ },
+ {
+ "EventCode": "0x3C144",
+ "EventName": "PM_MRK_XFER_FROM_SRC_CYC_PMC3",
+ "BriefDescription": "Cycles taken for a marked demand miss to reload a line from the source specified in MMCR3[30:42]."
},
{
"EventCode": "0x3E158",
@@ -82,12 +162,22 @@
{
"EventCode": "0x3E15A",
"EventName": "PM_MRK_ST_FIN",
- "BriefDescription": "The marked instruction was a store of any kind."
+ "BriefDescription": "Marked store instruction finished."
},
{
- "EventCode": "0x30068",
- "EventName": "PM_L1_ICACHE_RELOADED_PREF",
- "BriefDescription": "Counts all Icache prefetch reloads ( includes demand turned into prefetch)."
+ "EventCode": "0x3F150",
+ "EventName": "PM_MRK_ST_DRAIN_CYC",
+ "BriefDescription": "Cycles in which the marked store drained from the core to the L2."
+ },
+ {
+ "EventCode": "0x30162",
+ "EventName": "PM_MRK_ISSUE_DEPENDENT_LOAD",
+ "BriefDescription": "The marked instruction was dependent on a load. It is eligible for issue kill."
+ },
+ {
+ "EventCode": "0x301E2",
+ "EventName": "PM_MRK_ST_CMPL",
+ "BriefDescription": "Marked store completed and sent to nest. Note that this count excludes cache-inhibited stores."
},
{
"EventCode": "0x301E4",
@@ -95,48 +185,58 @@
"BriefDescription": "Marked Branch Mispredicted. Includes direction and target."
},
{
- "EventCode": "0x300F6",
- "EventName": "PM_LD_DEMAND_MISS_L1",
- "BriefDescription": "The L1 cache was reloaded with a line that fulfills a demand miss request. Counted at reload time, before finish."
+ "EventCode": "0x40116",
+ "EventName": "PM_MRK_LARX_FIN",
+ "BriefDescription": "Marked load and reserve instruction (LARX) finished. LARX and STCX are instructions used to acquire a lock."
+ },
+ {
+ "EventCode": "0x40132",
+ "EventName": "PM_MRK_LSU_FIN",
+ "BriefDescription": "LSU marked instruction finish."
},
{
- "EventCode": "0x300FE",
- "EventName": "PM_DATA_FROM_L3MISS",
- "BriefDescription": "The processor's data cache was reloaded from a source other than the local core's L1, L2, or L3 due to a demand miss."
+ "EventCode": "0x44146",
+ "EventName": "PM_MRK_STCX_CORE_CYC",
+ "BriefDescription": "Cycles spent in the core portion of a marked STCX instruction. It starts counting when the instruction is decoded and stops counting when it drains into the L2."
},
{
- "EventCode": "0x40012",
- "EventName": "PM_L1_ICACHE_RELOADED_ALL",
- "BriefDescription": "Counts all Icache reloads includes demand, prefetch, prefetch turned into demand and demand turned into prefetch."
+ "EventCode": "0x4C142",
+ "EventName": "PM_MRK_XFER_FROM_SRC_PMC4",
+ "BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[45:57]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
},
{
- "EventCode": "0x40134",
- "EventName": "PM_MRK_INST_TIMEO",
- "BriefDescription": "Marked instruction finish timeout (instruction was lost)."
+ "EventCode": "0x4C144",
+ "EventName": "PM_MRK_XFER_FROM_SRC_CYC_PMC4",
+ "BriefDescription": "Cycles taken for a marked demand miss to reload a line from the source specified in MMCR3[45:57]."
},
{
- "EventCode": "0x4505A",
- "EventName": "PM_SP_FLOP_CMPL",
- "BriefDescription": "Single Precision floating point instructions completed."
+ "EventCode": "0x4C15E",
+ "EventName": "PM_MRK_DTLB_MISS_64K",
+ "BriefDescription": "Marked Data TLB reload (after a miss) page size 64K. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
},
{
- "EventCode": "0x4D058",
- "EventName": "PM_VECTOR_FLOP_CMPL",
- "BriefDescription": "Vector floating point instructions completed."
+ "EventCode": "0x4E15E",
+ "EventName": "PM_MRK_INST_FLUSHED",
+ "BriefDescription": "The marked instruction was flushed."
},
{
- "EventCode": "0x4D05A",
- "EventName": "PM_NON_MATH_FLOP_CMPL",
- "BriefDescription": "Non Math instructions completed."
+ "EventCode": "0x40164",
+ "EventName": "PM_MRK_DERAT_MISS_2M",
+ "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 2M for a marked instruction. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
},
{
"EventCode": "0x401E0",
"EventName": "PM_MRK_INST_CMPL",
- "BriefDescription": "marked instruction completed."
+ "BriefDescription": "Marked instruction completed."
+ },
+ {
+ "EventCode": "0x401E6",
+ "EventName": "PM_MRK_INST_FROM_L3MISS",
+ "BriefDescription": "The processor's instruction cache was reloaded from beyond the local core's L3 due to a demand miss for a marked instruction."
},
{
- "EventCode": "0x400FE",
- "EventName": "PM_DATA_FROM_MEMORY",
- "BriefDescription": "The processor's data cache was reloaded from local, remote, or distant memory due to a demand miss."
+ "EventCode": "0x401E8",
+ "EventName": "PM_MRK_DATA_FROM_L2MISS",
+ "BriefDescription": "The processor's L1 data cache was reloaded from a source beyond the local core's L2 due to a demand miss for a marked instruction."
}
]
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/memory.json b/tools/perf/pmu-events/arch/powerpc/power10/memory.json
index 843b51f531e9..b95a547a704b 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/memory.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/memory.json
@@ -1,25 +1,10 @@
[
{
- "EventCode": "0x1000A",
- "EventName": "PM_PMC3_REWIND",
- "BriefDescription": "The speculative event selected for PMC3 rewinds and the counter for PMC3 is not charged."
- },
- {
"EventCode": "0x1C040",
"EventName": "PM_XFER_FROM_SRC_PMC1",
"BriefDescription": "The processor's L1 data cache was reloaded from the source specified in MMCR3[0:12]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
},
{
- "EventCode": "0x1C142",
- "EventName": "PM_MRK_XFER_FROM_SRC_PMC1",
- "BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[0:12]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
- },
- {
- "EventCode": "0x1C144",
- "EventName": "PM_MRK_XFER_FROM_SRC_CYC_PMC1",
- "BriefDescription": "Cycles taken for a marked demand miss to reload a line from the source specified in MMCR3[0:12]."
- },
- {
"EventCode": "0x1C056",
"EventName": "PM_DERAT_MISS_4K",
"BriefDescription": "Data ERAT Miss (Data TLB Access) page size 4K. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
@@ -35,24 +20,9 @@
"BriefDescription": "Data TLB reload (after a miss) page size 2M. Implies radix translation was used. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
},
{
- "EventCode": "0x1E056",
- "EventName": "PM_EXEC_STALL_STORE_PIPE",
- "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in the store unit. This does not include cycles spent handling store misses, PTESYNC instructions or TLBIE instructions."
- },
- {
- "EventCode": "0x1F150",
- "EventName": "PM_MRK_ST_L2_CYC",
- "BriefDescription": "Cycles from L2 RC dispatch to L2 RC completion."
- },
- {
"EventCode": "0x10062",
"EventName": "PM_LD_L3MISS_PEND_CYC",
- "BriefDescription": "Cycles L3 miss was pending for this thread."
- },
- {
- "EventCode": "0x20010",
- "EventName": "PM_PMC1_OVERFLOW",
- "BriefDescription": "The event selected for PMC1 caused the event counter to overflow."
+ "BriefDescription": "Cycles in which an L3 miss was pending for this thread."
},
{
"EventCode": "0x2001A",
@@ -80,36 +50,16 @@
"BriefDescription": "Data TLB reload (after a miss) page size 4K. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
},
{
- "EventCode": "0x2D154",
- "EventName": "PM_MRK_DERAT_MISS_64K",
- "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 64K for a marked instruction. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
- },
- {
"EventCode": "0x200F6",
"EventName": "PM_DERAT_MISS",
"BriefDescription": "DERAT Reloaded to satisfy a DERAT miss. All page sizes are counted by this event. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
},
{
- "EventCode": "0x30016",
- "EventName": "PM_EXEC_STALL_DERAT_DTLB_MISS",
- "BriefDescription": "Cycles in which the oldest instruction in the pipeline suffered a TLB miss and waited for it resolve."
- },
- {
"EventCode": "0x3C040",
"EventName": "PM_XFER_FROM_SRC_PMC3",
"BriefDescription": "The processor's L1 data cache was reloaded from the source specified in MMCR3[30:42]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
},
{
- "EventCode": "0x3C142",
- "EventName": "PM_MRK_XFER_FROM_SRC_PMC3",
- "BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[30:42]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
- },
- {
- "EventCode": "0x3C144",
- "EventName": "PM_MRK_XFER_FROM_SRC_CYC_PMC3",
- "BriefDescription": "Cycles taken for a marked demand miss to reload a line from the source specified in MMCR3[30:42]."
- },
- {
"EventCode": "0x3C054",
"EventName": "PM_DERAT_MISS_16M",
"BriefDescription": "Data ERAT Miss (Data TLB Access) page size 16M. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
@@ -125,24 +75,14 @@
"BriefDescription": "Load and reserve instruction (LARX) finished. LARX and STCX are instructions used to acquire a lock."
},
{
- "EventCode": "0x301E2",
- "EventName": "PM_MRK_ST_CMPL",
- "BriefDescription": "Marked store completed and sent to nest. Note that this count excludes cache-inhibited stores."
- },
- {
"EventCode": "0x300FC",
"EventName": "PM_DTLB_MISS",
- "BriefDescription": "The DPTEG required for the load/store instruction in execution was missing from the TLB. It includes pages of all sizes for demand and prefetch activity."
- },
- {
- "EventCode": "0x4D02C",
- "EventName": "PM_PMC1_REWIND",
- "BriefDescription": "The speculative event selected for PMC1 rewinds and the counter for PMC1 is not charged."
+ "BriefDescription": "The DPTEG required for the load/store instruction in execution was missing from the TLB. This event only counts for demand misses."
},
{
"EventCode": "0x4003E",
"EventName": "PM_LD_CMPL",
- "BriefDescription": "Loads completed."
+ "BriefDescription": "Load instruction completed."
},
{
"EventCode": "0x4C040",
@@ -150,16 +90,6 @@
"BriefDescription": "The processor's L1 data cache was reloaded from the source specified in MMCR3[45:57]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
},
{
- "EventCode": "0x4C142",
- "EventName": "PM_MRK_XFER_FROM_SRC_PMC4",
- "BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[45:57]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
- },
- {
- "EventCode": "0x4C144",
- "EventName": "PM_MRK_XFER_FROM_SRC_CYC_PMC4",
- "BriefDescription": "Cycles taken for a marked demand miss to reload a line from the source specified in MMCR3[45:57]."
- },
- {
"EventCode": "0x4C056",
"EventName": "PM_DTLB_MISS_16M",
"BriefDescription": "Data TLB reload (after a miss) page size 16M. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
@@ -168,20 +98,5 @@
"EventCode": "0x4C05A",
"EventName": "PM_DTLB_MISS_1G",
"BriefDescription": "Data TLB reload (after a miss) page size 1G. Implies radix translation was used. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
- },
- {
- "EventCode": "0x4C15E",
- "EventName": "PM_MRK_DTLB_MISS_64K",
- "BriefDescription": "Marked Data TLB reload (after a miss) page size 64K. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
- },
- {
- "EventCode": "0x4D056",
- "EventName": "PM_NON_FMA_FLOP_CMPL",
- "BriefDescription": "Non FMA instruction completed."
- },
- {
- "EventCode": "0x40164",
- "EventName": "PM_MRK_DERAT_MISS_2M",
- "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 2M for a marked instruction. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
}
]
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/metrics.json b/tools/perf/pmu-events/arch/powerpc/power10/metrics.json
index 6f53583a0c62..182369076d95 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/metrics.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/metrics.json
@@ -16,133 +16,133 @@
"BriefDescription": "Average cycles per completed instruction when dispatch was stalled for any reason",
"MetricExpr": "PM_DISP_STALL_CYC / PM_RUN_INST_CMPL",
"MetricGroup": "CPI;CPI_STALL_RATIO",
- "MetricName": "DISPATCHED_CPI"
+ "MetricName": "DISPATCH_STALL_CPI"
},
{
"BriefDescription": "Average cycles per completed instruction when dispatch was stalled because there was a flush",
"MetricExpr": "PM_DISP_STALL_FLUSH / PM_RUN_INST_CMPL",
"MetricGroup": "CPI",
- "MetricName": "DISPATCHED_FLUSH_CPI"
+ "MetricName": "DISPATCH_STALL_FLUSH_CPI"
},
{
"BriefDescription": "Average cycles per completed instruction when dispatch was stalled because the MMU was handling a translation miss",
"MetricExpr": "PM_DISP_STALL_TRANSLATION / PM_RUN_INST_CMPL",
"MetricGroup": "CPI",
- "MetricName": "DISPATCHED_TRANSLATION_CPI"
+ "MetricName": "DISPATCH_STALL_TRANSLATION_CPI"
},
{
"BriefDescription": "Average cycles per completed instruction when dispatch was stalled waiting to resolve an instruction ERAT miss",
"MetricExpr": "PM_DISP_STALL_IERAT_ONLY_MISS / PM_RUN_INST_CMPL",
"MetricGroup": "CPI",
- "MetricName": "DISPATCHED_IERAT_ONLY_MISS_CPI"
+ "MetricName": "DISPATCH_STALL_IERAT_ONLY_MISS_CPI"
},
{
"BriefDescription": "Average cycles per completed instruction when dispatch was stalled waiting to resolve an instruction TLB miss",
"MetricExpr": "PM_DISP_STALL_ITLB_MISS / PM_RUN_INST_CMPL",
"MetricGroup": "CPI",
- "MetricName": "DISPATCHED_ITLB_MISS_CPI"
+ "MetricName": "DISPATCH_STALL_ITLB_MISS_CPI"
},
{
"BriefDescription": "Average cycles per completed instruction when dispatch was stalled due to an icache miss",
"MetricExpr": "PM_DISP_STALL_IC_MISS / PM_RUN_INST_CMPL",
"MetricGroup": "CPI",
- "MetricName": "DISPATCHED_IC_MISS_CPI"
+ "MetricName": "DISPATCH_STALL_IC_MISS_CPI"
},
{
"BriefDescription": "Average cycles per completed instruction when dispatch was stalled while the instruction was fetched from the local L2",
"MetricExpr": "PM_DISP_STALL_IC_L2 / PM_RUN_INST_CMPL",
"MetricGroup": "CPI",
- "MetricName": "DISPATCHED_IC_L2_CPI"
+ "MetricName": "DISPATCH_STALL_IC_L2_CPI"
},
{
"BriefDescription": "Average cycles per completed instruction when dispatch was stalled while the instruction was fetched from the local L3",
"MetricExpr": "PM_DISP_STALL_IC_L3 / PM_RUN_INST_CMPL",
"MetricGroup": "CPI",
- "MetricName": "DISPATCHED_IC_L3_CPI"
+ "MetricName": "DISPATCH_STALL_IC_L3_CPI"
},
{
"BriefDescription": "Average cycles per completed instruction when dispatch was stalled while the instruction was fetched from any source beyond the local L3",
"MetricExpr": "PM_DISP_STALL_IC_L3MISS / PM_RUN_INST_CMPL",
"MetricGroup": "CPI",
- "MetricName": "DISPATCHED_IC_L3MISS_CPI"
+ "MetricName": "DISPATCH_STALL_IC_L3MISS_CPI"
},
{
"BriefDescription": "Average cycles per completed instruction when dispatch was stalled due to an icache miss after a branch mispredict",
"MetricExpr": "PM_DISP_STALL_BR_MPRED_ICMISS / PM_RUN_INST_CMPL",
"MetricGroup": "CPI",
- "MetricName": "DISPATCHED_BR_MPRED_ICMISS_CPI"
+ "MetricName": "DISPATCH_STALL_BR_MPRED_ICMISS_CPI"
},
{
"BriefDescription": "Average cycles per completed instruction when dispatch was stalled while instruction was fetched from the local L2 after suffering a branch mispredict",
"MetricExpr": "PM_DISP_STALL_BR_MPRED_IC_L2 / PM_RUN_INST_CMPL",
"MetricGroup": "CPI",
- "MetricName": "DISPATCHED_BR_MPRED_IC_L2_CPI"
+ "MetricName": "DISPATCH_STALL_BR_MPRED_IC_L2_CPI"
},
{
"BriefDescription": "Average cycles per completed instruction when dispatch was stalled while instruction was fetched from the local L3 after suffering a branch mispredict",
"MetricExpr": "PM_DISP_STALL_BR_MPRED_IC_L3 / PM_RUN_INST_CMPL",
"MetricGroup": "CPI",
- "MetricName": "DISPATCHED_BR_MPRED_IC_L3_CPI"
+ "MetricName": "DISPATCH_STALL_BR_MPRED_IC_L3_CPI"
},
{
"BriefDescription": "Average cycles per completed instruction when dispatch was stalled while instruction was fetched from any source beyond the local L3 after suffering a branch mispredict",
"MetricExpr": "PM_DISP_STALL_BR_MPRED_IC_L3MISS / PM_RUN_INST_CMPL",
"MetricGroup": "CPI",
- "MetricName": "DISPATCHED_BR_MPRED_IC_L3MISS_CPI"
+ "MetricName": "DISPATCH_STALL_BR_MPRED_IC_L3MISS_CPI"
},
{
"BriefDescription": "Average cycles per completed instruction when dispatch was stalled due to a branch mispredict",
"MetricExpr": "PM_DISP_STALL_BR_MPRED / PM_RUN_INST_CMPL",
"MetricGroup": "CPI",
- "MetricName": "DISPATCHED_BR_MPRED_CPI"
+ "MetricName": "DISPATCH_STALL_BR_MPRED_CPI"
},
{
"BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch for any reason",
"MetricExpr": "PM_DISP_STALL_HELD_CYC / PM_RUN_INST_CMPL",
"MetricGroup": "CPI",
- "MetricName": "DISPATCHED_HELD_CPI"
+ "MetricName": "DISPATCH_STALL_HELD_CPI"
},
{
"BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch because of a synchronizing instruction that requires the ICT to be empty before dispatch",
"MetricExpr": "PM_DISP_STALL_HELD_SYNC_CYC / PM_RUN_INST_CMPL",
"MetricGroup": "CPI",
- "MetricName": "DISP_HELD_STALL_SYNC_CPI"
+ "MetricName": "DISPATCH_STALL_HELD_SYNC_CPI"
},
{
"BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch while waiting on the scoreboard",
"MetricExpr": "PM_DISP_STALL_HELD_SCOREBOARD_CYC / PM_RUN_INST_CMPL",
"MetricGroup": "CPI",
- "MetricName": "DISP_HELD_STALL_SCOREBOARD_CPI"
+ "MetricName": "DISPATCH_STALL_HELD_SCOREBOARD_CPI"
},
{
"BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch due to issue queue full",
"MetricExpr": "PM_DISP_STALL_HELD_ISSQ_FULL_CYC / PM_RUN_INST_CMPL",
"MetricGroup": "CPI",
- "MetricName": "DISP_HELD_STALL_ISSQ_FULL_CPI"
+ "MetricName": "DISPATCH_STALL_HELD_ISSQ_FULL_CPI"
},
{
"BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch because the mapper/SRB was full",
"MetricExpr": "PM_DISP_STALL_HELD_RENAME_CYC / PM_RUN_INST_CMPL",
"MetricGroup": "CPI",
- "MetricName": "DISPATCHED_HELD_RENAME_CPI"
+ "MetricName": "DISPATCH_STALL_HELD_RENAME_CPI"
},
{
"BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch because the STF mapper/SRB was full",
"MetricExpr": "PM_DISP_STALL_HELD_STF_MAPPER_CYC / PM_RUN_INST_CMPL",
"MetricGroup": "CPI",
- "MetricName": "DISPATCHED_HELD_STF_MAPPER_CPI"
+ "MetricName": "DISPATCH_STALL_HELD_STF_MAPPER_CPI"
},
{
"BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch because the XVFC mapper/SRB was full",
"MetricExpr": "PM_DISP_STALL_HELD_XVFC_MAPPER_CYC / PM_RUN_INST_CMPL",
"MetricGroup": "CPI",
- "MetricName": "DISPATCHED_HELD_XVFC_MAPPER_CPI"
+ "MetricName": "DISPATCH_STALL_HELD_XVFC_MAPPER_CPI"
},
{
"BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch for any other reason",
"MetricExpr": "PM_DISP_STALL_HELD_OTHER_CYC / PM_RUN_INST_CMPL",
"MetricGroup": "CPI",
- "MetricName": "DISPATCHED_HELD_OTHER_CPI"
+ "MetricName": "DISPATCH_STALL_HELD_OTHER_CPI"
},
{
"BriefDescription": "Average cycles per completed instruction when the NTC instruction has been dispatched but not issued for any reason",
@@ -352,13 +352,13 @@
"BriefDescription": "Average cycles per completed instruction when dispatch was stalled because fetch was being held, so there was nothing in the pipeline for this thread",
"MetricExpr": "PM_DISP_STALL_FETCH / PM_RUN_INST_CMPL",
"MetricGroup": "CPI",
- "MetricName": "DISPATCHED_FETCH_CPI"
+ "MetricName": "DISPATCH_STALL_FETCH_CPI"
},
{
"BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch because of power management",
"MetricExpr": "PM_DISP_STALL_HELD_HALT_CYC / PM_RUN_INST_CMPL",
"MetricGroup": "CPI",
- "MetricName": "DISPATCHED_HELD_HALT_CPI"
+ "MetricName": "DISPATCH_STALL_HELD_HALT_CPI"
},
{
"BriefDescription": "Percentage of flushes per completed instruction",
@@ -454,12 +454,6 @@
"MetricName": "LOADS_PER_INST"
},
{
- "BriefDescription": "Average number of finished stores per completed instruction",
- "MetricExpr": "PM_ST_FIN / PM_RUN_INST_CMPL",
- "MetricGroup": "General",
- "MetricName": "STORES_PER_INST"
- },
- {
"BriefDescription": "Percentage of demand loads that reloaded from beyond the L2 per completed instruction",
"MetricExpr": "PM_DATA_FROM_L2MISS / PM_RUN_INST_CMPL * 100",
"MetricGroup": "dL1_Reloads",
@@ -566,7 +560,7 @@
"BriefDescription": "Average number of STCX instructions finshed per completed instruction",
"MetricExpr": "PM_STCX_FIN / PM_RUN_INST_CMPL",
"MetricGroup": "General",
- "MetricName": "STXC_PER_INST"
+ "MetricName": "STCX_PER_INST"
},
{
"BriefDescription": "Average number of LARX instructions finshed per completed instruction",
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/others.json b/tools/perf/pmu-events/arch/powerpc/power10/others.json
index a771e4b6bec5..f09c00c89322 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/others.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/others.json
@@ -1,28 +1,13 @@
[
{
- "EventCode": "0x10016",
- "EventName": "PM_VSU0_ISSUE",
- "BriefDescription": "VSU instructions issued to VSU pipe 0."
- },
- {
- "EventCode": "0x1001C",
- "EventName": "PM_ULTRAVISOR_INST_CMPL",
- "BriefDescription": "PowerPC instructions that completed while the thread was in ultravisor state."
- },
- {
- "EventCode": "0x100F0",
- "EventName": "PM_CYC",
- "BriefDescription": "Processor cycles."
- },
- {
- "EventCode": "0x10134",
- "EventName": "PM_MRK_ST_DONE_L2",
- "BriefDescription": "Marked stores completed in L2 (RC machine done)."
+ "EventCode": "0x1002C",
+ "EventName": "PM_LD_PREFETCH_CACHE_LINE_MISS",
+ "BriefDescription": "The L1 cache was reloaded with a line that fulfills a prefetch request."
},
{
"EventCode": "0x1505E",
"EventName": "PM_LD_HIT_L1",
- "BriefDescription": "Loads that finished without experiencing an L1 miss."
+ "BriefDescription": "Load finished without experiencing an L1 miss."
},
{
"EventCode": "0x1F056",
@@ -30,49 +15,19 @@
"BriefDescription": "Cycles in which Superslice 0 dispatches either 1 or 2 instructions."
},
{
- "EventCode": "0x1F15C",
- "EventName": "PM_MRK_STCX_L2_CYC",
- "BriefDescription": "Cycles spent in the nest portion of a marked Stcx instruction. It starts counting when the operation starts to drain to the L2 and it stops counting when the instruction retires from the Instruction Completion Table (ICT) in the Instruction Sequencing Unit (ISU)."
- },
- {
"EventCode": "0x10066",
"EventName": "PM_ADJUNCT_CYC",
"BriefDescription": "Cycles in which the thread is in Adjunct state. MSR[S HV PR] bits = 011."
},
{
- "EventCode": "0x101E4",
- "EventName": "PM_MRK_L1_ICACHE_MISS",
- "BriefDescription": "Marked Instruction suffered an icache Miss."
- },
- {
- "EventCode": "0x101EA",
- "EventName": "PM_MRK_L1_RELOAD_VALID",
- "BriefDescription": "Marked demand reload."
- },
- {
- "EventCode": "0x100F4",
- "EventName": "PM_FLOP_CMPL",
- "BriefDescription": "Floating Point Operations Completed. Includes any type. It counts once for each 1, 2, 4 or 8 flop instruction. Use PM_1|2|4|8_FLOP_CMPL events to count flops."
- },
- {
- "EventCode": "0x100FA",
- "EventName": "PM_RUN_LATCH_ANY_THREAD_CYC",
- "BriefDescription": "Cycles when at least one thread has the run latch set."
- },
- {
"EventCode": "0x100FC",
"EventName": "PM_LD_REF_L1",
"BriefDescription": "All L1 D cache load references counted at finish, gated by reject. In P9 and earlier this event counted only cacheable loads but in P10 both cacheable and non-cacheable loads are included."
},
{
- "EventCode": "0x2000C",
- "EventName": "PM_RUN_LATCH_ALL_THREADS_CYC",
- "BriefDescription": "Cycles when the run latch is set for all threads."
- },
- {
"EventCode": "0x2E010",
"EventName": "PM_ADJUNCT_INST_CMPL",
- "BriefDescription": "PowerPC instructions that completed while the thread is in Adjunct state."
+ "BriefDescription": "PowerPC instruction completed while the thread was in Adjunct state."
},
{
"EventCode": "0x2E014",
@@ -80,26 +35,6 @@
"BriefDescription": "Conditional store instruction (STCX) finished. LARX and STCX are instructions used to acquire a lock."
},
{
- "EventCode": "0x20130",
- "EventName": "PM_MRK_INST_DECODED",
- "BriefDescription": "An instruction was marked at decode time. Random Instruction Sampling (RIS) only."
- },
- {
- "EventCode": "0x20132",
- "EventName": "PM_MRK_DFU_ISSUE",
- "BriefDescription": "The marked instruction was a decimal floating point operation issued to the VSU. Measured at issue time."
- },
- {
- "EventCode": "0x20134",
- "EventName": "PM_MRK_FXU_ISSUE",
- "BriefDescription": "The marked instruction was a fixed point operation issued to the VSU. Measured at issue time."
- },
- {
- "EventCode": "0x2505C",
- "EventName": "PM_VSU_ISSUE",
- "BriefDescription": "At least one VSU instruction was issued to one of the VSU pipes. Up to 4 per cycle. Includes fixed point operations."
- },
- {
"EventCode": "0x2F054",
"EventName": "PM_DISP_SS1_2_INSTR_CYC",
"BriefDescription": "Cycles in which Superslice 1 dispatches either 1 or 2 instructions."
@@ -110,39 +45,14 @@
"BriefDescription": "Cycles in which Superslice 1 dispatches either 3 or 4 instructions."
},
{
- "EventCode": "0x2006C",
- "EventName": "PM_RUN_CYC_SMT4_MODE",
- "BriefDescription": "Cycles when this thread's run latch is set and the core is in SMT4 mode."
- },
- {
- "EventCode": "0x201E0",
- "EventName": "PM_MRK_DATA_FROM_MEMORY",
- "BriefDescription": "The processor's data cache was reloaded from local, remote, or distant memory due to a demand miss for a marked load."
- },
- {
- "EventCode": "0x201E4",
- "EventName": "PM_MRK_DATA_FROM_L3MISS",
- "BriefDescription": "The processor's data cache was reloaded from a source other than the local core's L1, L2, or L3 due to a demand miss for a marked load."
- },
- {
- "EventCode": "0x201E8",
- "EventName": "PM_THRESH_EXC_512",
- "BriefDescription": "Threshold counter exceeded a value of 512."
- },
- {
"EventCode": "0x200F2",
"EventName": "PM_INST_DISP",
- "BriefDescription": "PowerPC instructions dispatched."
- },
- {
- "EventCode": "0x30132",
- "EventName": "PM_MRK_VSU_FIN",
- "BriefDescription": "VSU marked instructions finished. Excludes simple FX instructions issued to the Store Unit."
+ "BriefDescription": "PowerPC instruction dispatched."
},
{
- "EventCode": "0x30038",
- "EventName": "PM_EXEC_STALL_DMISS_LMEM",
- "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from the local memory, local OpenCapp cache, or local OpenCapp memory."
+ "EventCode": "0x200FD",
+ "EventName": "PM_L1_ICACHE_MISS",
+ "BriefDescription": "Demand instruction cache miss."
},
{
"EventCode": "0x3F04A",
@@ -152,12 +62,7 @@
{
"EventCode": "0x3405A",
"EventName": "PM_PRIVILEGED_INST_CMPL",
- "BriefDescription": "PowerPC Instructions that completed while the thread is in Privileged state."
- },
- {
- "EventCode": "0x3F150",
- "EventName": "PM_MRK_ST_DRAIN_CYC",
- "BriefDescription": "cycles to drain st from core to L2."
+ "BriefDescription": "PowerPC instruction completed while the thread was in Privileged state."
},
{
"EventCode": "0x3F054",
@@ -170,74 +75,29 @@
"BriefDescription": "Cycles in which Superslice 0 dispatches either 5, 6, 7 or 8 instructions."
},
{
- "EventCode": "0x30162",
- "EventName": "PM_MRK_ISSUE_DEPENDENT_LOAD",
- "BriefDescription": "The marked instruction was dependent on a load. It is eligible for issue kill."
- },
- {
- "EventCode": "0x40114",
- "EventName": "PM_MRK_START_PROBE_NOP_DISP",
- "BriefDescription": "Marked Start probe nop dispatched. Instruction AND R0,R0,R0."
- },
- {
- "EventCode": "0x4001C",
- "EventName": "PM_VSU_FIN",
- "BriefDescription": "VSU instructions finished."
- },
- {
- "EventCode": "0x4C01A",
- "EventName": "PM_EXEC_STALL_DMISS_OFF_NODE",
- "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from a distant chip."
- },
- {
- "EventCode": "0x4D012",
- "EventName": "PM_PMC3_SAVED",
- "BriefDescription": "The conditions for the speculative event selected for PMC3 are met and PMC3 is charged."
+ "EventCode": "0x30068",
+ "EventName": "PM_L1_ICACHE_RELOADED_PREF",
+ "BriefDescription": "Counts all instruction cache prefetch reloads (includes demand turned into prefetch)."
},
{
- "EventCode": "0x4D022",
- "EventName": "PM_HYPERVISOR_INST_CMPL",
- "BriefDescription": "PowerPC instructions that completed while the thread is in hypervisor state."
+ "EventCode": "0x300F6",
+ "EventName": "PM_LD_DEMAND_MISS_L1",
+ "BriefDescription": "The L1 cache was reloaded with a line that fulfills a demand miss request. Counted at reload time, before finish."
},
{
- "EventCode": "0x4D026",
- "EventName": "PM_ULTRAVISOR_CYC",
- "BriefDescription": "Cycles when the thread is in Ultravisor state. MSR[S HV PR]=110."
+ "EventCode": "0x300FE",
+ "EventName": "PM_DATA_FROM_L3MISS",
+ "BriefDescription": "The processor's data cache was reloaded from a source other than the local core's L1, L2, or L3 due to a demand miss."
},
{
- "EventCode": "0x4D028",
- "EventName": "PM_PRIVILEGED_CYC",
- "BriefDescription": "Cycles when the thread is in Privileged state. MSR[S HV PR]=x00."
- },
- {
- "EventCode": "0x40030",
- "EventName": "PM_INST_FIN",
- "BriefDescription": "Instructions finished."
- },
- {
- "EventCode": "0x44146",
- "EventName": "PM_MRK_STCX_CORE_CYC",
- "BriefDescription": "Cycles spent in the core portion of a marked Stcx instruction. It starts counting when the instruction is decoded and stops counting when it drains into the L2."
+ "EventCode": "0x40012",
+ "EventName": "PM_L1_ICACHE_RELOADED_ALL",
+ "BriefDescription": "Counts all instruction cache reloads includes demand, prefetch, prefetch turned into demand and demand turned into prefetch."
},
{
"EventCode": "0x44054",
"EventName": "PM_VECTOR_LD_CMPL",
- "BriefDescription": "Vector load instructions completed."
- },
- {
- "EventCode": "0x45054",
- "EventName": "PM_FMA_CMPL",
- "BriefDescription": "Two floating point instructions completed (FMA class of instructions: fmadd, fnmadd, fmsub, fnmsub). Scalar instructions only."
- },
- {
- "EventCode": "0x45056",
- "EventName": "PM_SCALAR_FLOP_CMPL",
- "BriefDescription": "Scalar floating point instructions completed."
- },
- {
- "EventCode": "0x4505C",
- "EventName": "PM_MATH_FLOP_CMPL",
- "BriefDescription": "Math floating point instructions completed."
+ "BriefDescription": "Vector load instruction completed."
},
{
"EventCode": "0x4D05E",
@@ -245,28 +105,13 @@
"BriefDescription": "A branch completed. All branches are included."
},
{
- "EventCode": "0x4E15E",
- "EventName": "PM_MRK_INST_FLUSHED",
- "BriefDescription": "The marked instruction was flushed."
- },
- {
- "EventCode": "0x401E6",
- "EventName": "PM_MRK_INST_FROM_L3MISS",
- "BriefDescription": "The processor's instruction cache was reloaded from a source other than the local core's L1, L2, or L3 due to a demand miss for a marked instruction."
- },
- {
- "EventCode": "0x401E8",
- "EventName": "PM_MRK_DATA_FROM_L2MISS",
- "BriefDescription": "The processor's data cache was reloaded from a source other than the local core's L1 or L2 due to a demand miss for a marked load."
- },
- {
"EventCode": "0x400F0",
"EventName": "PM_LD_DEMAND_MISS_L1_FIN",
- "BriefDescription": "Load Missed L1, counted at finish time."
+ "BriefDescription": "Load missed L1, counted at finish time."
},
{
- "EventCode": "0x500FA",
- "EventName": "PM_RUN_INST_CMPL",
- "BriefDescription": "Completed PowerPC instructions gated by the run latch."
+ "EventCode": "0x400FE",
+ "EventName": "PM_DATA_FROM_MEMORY",
+ "BriefDescription": "The processor's data cache was reloaded from local, remote, or distant memory due to a demand miss."
}
]
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/pipeline.json b/tools/perf/pmu-events/arch/powerpc/power10/pipeline.json
index b8aded6045fa..a8272a2f0517 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/pipeline.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/pipeline.json
@@ -1,8 +1,13 @@
[
{
- "EventCode": "0x100FE",
- "EventName": "PM_INST_CMPL",
- "BriefDescription": "PowerPC instructions completed."
+ "EventCode": "0x10004",
+ "EventName": "PM_EXEC_STALL_TRANSLATION",
+ "BriefDescription": "Cycles in which the oldest instruction in the pipeline suffered a TLB miss or ERAT miss and waited for it to resolve."
+ },
+ {
+ "EventCode": "0x10006",
+ "EventName": "PM_DISP_STALL_HELD_OTHER_CYC",
+ "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch for any other reason."
},
{
"EventCode": "0x1000C",
@@ -12,7 +17,7 @@
{
"EventCode": "0x1000E",
"EventName": "PM_MMA_ISSUED",
- "BriefDescription": "MMA instructions issued."
+ "BriefDescription": "MMA instruction issued."
},
{
"EventCode": "0x10012",
@@ -30,14 +35,19 @@
"BriefDescription": "Cycles in which an instruction reload is pending to satisfy a demand miss."
},
{
- "EventCode": "0x10022",
- "EventName": "PM_PMC2_SAVED",
- "BriefDescription": "The conditions for the speculative event selected for PMC2 are met and PMC2 is charged."
+ "EventCode": "0x10038",
+ "EventName": "PM_DISP_STALL_TRANSLATION",
+ "BriefDescription": "Cycles when dispatch was stalled for this thread because the MMU was handling a translation miss."
},
{
- "EventCode": "0x10024",
- "EventName": "PM_PMC5_OVERFLOW",
- "BriefDescription": "The event selected for PMC5 caused the event counter to overflow."
+ "EventCode": "0x1003A",
+ "EventName": "PM_DISP_STALL_BR_MPRED_IC_L2",
+ "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L2 after suffering a branch mispredict."
+ },
+ {
+ "EventCode": "0x1003C",
+ "EventName": "PM_EXEC_STALL_DMISS_L2L3",
+ "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from either the local L2 or local L3."
},
{
"EventCode": "0x10058",
@@ -55,11 +65,36 @@
"BriefDescription": "Data ERAT Miss (Data TLB Access) page size 2M. Implies radix translation. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
},
{
+ "EventCode": "0x1D05E",
+ "EventName": "PM_DISP_STALL_HELD_HALT_CYC",
+ "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch because of power management."
+ },
+ {
+ "EventCode": "0x1E050",
+ "EventName": "PM_DISP_STALL_HELD_STF_MAPPER_CYC",
+ "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch because the STF mapper/SRB was full. Includes GPR (count, link, tar), VSR, VMR, FPR."
+ },
+ {
+ "EventCode": "0x1E054",
+ "EventName": "PM_EXEC_STALL_DMISS_L21_L31",
+ "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from another core's L2 or L3 on the same chip."
+ },
+ {
+ "EventCode": "0x1E056",
+ "EventName": "PM_EXEC_STALL_STORE_PIPE",
+ "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in the store unit. This does not include cycles spent handling store misses, PTESYNC instructions or TLBIE instructions."
+ },
+ {
"EventCode": "0x1E05A",
"EventName": "PM_CMPL_STALL_LWSYNC",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was a lwsync waiting to complete."
},
{
+ "EventCode": "0x10064",
+ "EventName": "PM_DISP_STALL_IC_L2",
+ "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L2."
+ },
+ {
"EventCode": "0x10068",
"EventName": "PM_BR_FIN",
"BriefDescription": "A branch instruction finished. Includes predicted/mispredicted/unconditional."
@@ -70,9 +105,9 @@
"BriefDescription": "Simple fixed point instruction issued to the store unit. Measured at finish time."
},
{
- "EventCode": "0x1006C",
- "EventName": "PM_RUN_CYC_ST_MODE",
- "BriefDescription": "Cycles when the run latch is set and the core is in ST mode."
+ "EventCode": "0x100F8",
+ "EventName": "PM_DISP_STALL_CYC",
+ "BriefDescription": "Cycles the ICT has no itags assigned to this thread (no instructions were dispatched during these cycles)."
},
{
"EventCode": "0x20004",
@@ -80,9 +115,9 @@
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was dispatched but not issued yet."
},
{
- "EventCode": "0x2000A",
- "EventName": "PM_HYPERVISOR_CYC",
- "BriefDescription": "Cycles when the thread is in Hypervisor state. MSR[S HV PR]=010."
+ "EventCode": "0x20006",
+ "EventName": "PM_DISP_STALL_HELD_ISSQ_FULL_CYC",
+ "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch due to Issue queue full. Includes issue queue and branch queue."
},
{
"EventCode": "0x2000E",
@@ -90,24 +125,59 @@
"BriefDescription": "LSU Finished an internal operation in LD1 port."
},
{
+ "EventCode": "0x2C010",
+ "EventName": "PM_EXEC_STALL_LSU",
+ "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in the Load Store Unit. This does not include simple fixed point instructions."
+ },
+ {
"EventCode": "0x2C014",
"EventName": "PM_CMPL_STALL_SPECIAL",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline required special handling before completing."
},
{
+ "EventCode": "0x2C016",
+ "EventName": "PM_DISP_STALL_IERAT_ONLY_MISS",
+ "BriefDescription": "Cycles when dispatch was stalled while waiting to resolve an instruction ERAT miss."
+ },
+ {
"EventCode": "0x2C018",
"EventName": "PM_EXEC_STALL_DMISS_L3MISS",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from a source beyond the local L2 or local L3."
},
{
+ "EventCode": "0x2C01C",
+ "EventName": "PM_EXEC_STALL_DMISS_OFF_CHIP",
+ "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from a remote chip."
+ },
+ {
+ "EventCode": "0x2C01E",
+ "EventName": "PM_DISP_STALL_BR_MPRED_IC_L3",
+ "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L3 after suffering a branch mispredict."
+ },
+ {
"EventCode": "0x2D010",
"EventName": "PM_LSU_ST1_FIN",
"BriefDescription": "LSU Finished an internal operation in ST1 port."
},
{
+ "EventCode": "0x10016",
+ "EventName": "PM_VSU0_ISSUE",
+ "BriefDescription": "VSU instruction issued to VSU pipe 0."
+ },
+ {
"EventCode": "0x2D012",
"EventName": "PM_VSU1_ISSUE",
- "BriefDescription": "VSU instructions issued to VSU pipe 1."
+ "BriefDescription": "VSU instruction issued to VSU pipe 1."
+ },
+ {
+ "EventCode": "0x2505C",
+ "EventName": "PM_VSU_ISSUE",
+ "BriefDescription": "At least one VSU instruction was issued to one of the VSU pipes. Up to 4 per cycle. Includes fixed point operations."
+ },
+ {
+ "EventCode": "0x4001C",
+ "EventName": "PM_VSU_FIN",
+ "BriefDescription": "VSU instruction finished."
},
{
"EventCode": "0x2D018",
@@ -115,19 +185,34 @@
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in the VSU (includes FXU, VSU, CRU)."
},
{
+ "EventCode": "0x2D01A",
+ "EventName": "PM_DISP_STALL_IC_MISS",
+ "BriefDescription": "Cycles when dispatch was stalled for this thread due to an instruction cache miss."
+ },
+ {
"EventCode": "0x2D01C",
"EventName": "PM_CMPL_STALL_STCX",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was a stcx waiting for resolution from the nest before completing."
},
{
- "EventCode": "0x2E01E",
- "EventName": "PM_EXEC_STALL_NTC_FLUSH",
- "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in any unit before it was flushed. Note that if the flush of the oldest instruction happens after finish, the cycles from dispatch to issue will be included in PM_DISP_STALL and the cycles from issue to finish will be included in PM_EXEC_STALL and its corresponding children. This event will also count cycles when the previous NTF instruction is still completing and the new NTF instruction is stalled at dispatch."
+ "EventCode": "0x2E018",
+ "EventName": "PM_DISP_STALL_FETCH",
+ "BriefDescription": "Cycles when dispatch was stalled for this thread because Fetch was being held."
+ },
+ {
+ "EventCode": "0x2E01A",
+ "EventName": "PM_DISP_STALL_HELD_XVFC_MAPPER_CYC",
+ "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch because the XVFC mapper/SRB was full."
},
{
- "EventCode": "0x2013C",
- "EventName": "PM_MRK_FX_LSU_FIN",
- "BriefDescription": "The marked instruction was simple fixed point that was issued to the store unit. Measured at finish time."
+ "EventCode": "0x2E01C",
+ "EventName": "PM_EXEC_STALL_TLBIE",
+ "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a TLBIE instruction executing in the Load Store Unit."
+ },
+ {
+ "EventCode": "0x2E01E",
+ "EventName": "PM_EXEC_STALL_NTC_FLUSH",
+ "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in any unit before it was flushed. Note that if the flush of the oldest instruction happens after finish, the cycles from dispatch to issue will be included in PM_DISP_STALL and the cycles from issue to finish will be included in PM_EXEC_STALL and its corresponding children. This event will also count cycles when the previous next-to-finish (NTF) instruction is still completing and the new NTF instruction is stalled at dispatch."
},
{
"EventCode": "0x2405A",
@@ -135,14 +220,9 @@
"BriefDescription": "Cycles in which the oldest instruction in the pipeline (NTC) finishes. Note that instructions can finish out of order, therefore not all the instructions that finish have a Next-to-complete status."
},
{
- "EventCode": "0x201E2",
- "EventName": "PM_MRK_LD_MISS_L1",
- "BriefDescription": "Marked DL1 Demand Miss counted at finish time."
- },
- {
- "EventCode": "0x200F4",
- "EventName": "PM_RUN_CYC",
- "BriefDescription": "Processor cycles gated by the run latch."
+ "EventCode": "0x30004",
+ "EventName": "PM_DISP_STALL_FLUSH",
+ "BriefDescription": "Cycles when dispatch was stalled because of a flush that happened to an instruction(s) that was not yet next-to-complete (NTC). PM_EXEC_STALL_NTC_FLUSH only includes instructions that were flushed after becoming NTC."
},
{
"EventCode": "0x30008",
@@ -150,29 +230,34 @@
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting to finish in one of the execution units (BRU, LSU, VSU). Only cycles between issue and finish are counted in this category."
},
{
- "EventCode": "0x3001A",
- "EventName": "PM_LSU_ST2_FIN",
- "BriefDescription": "LSU Finished an internal operation in ST2 port."
+ "EventCode": "0x30014",
+ "EventName": "PM_EXEC_STALL_STORE",
+ "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a store instruction executing in the Load Store Unit."
},
{
- "EventCode": "0x30020",
- "EventName": "PM_PMC2_REWIND",
- "BriefDescription": "The speculative event selected for PMC2 rewinds and the counter for PMC2 is not charged."
+ "EventCode": "0x30016",
+ "EventName": "PM_EXEC_STALL_DERAT_DTLB_MISS",
+ "BriefDescription": "Cycles in which the oldest instruction in the pipeline suffered a TLB miss and waited for it resolve."
},
{
- "EventCode": "0x30022",
- "EventName": "PM_PMC4_SAVED",
- "BriefDescription": "The conditions for the speculative event selected for PMC4 are met and PMC4 is charged."
+ "EventCode": "0x30018",
+ "EventName": "PM_DISP_STALL_HELD_SCOREBOARD_CYC",
+ "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch while waiting on the Scoreboard. This event combines VSCR and FPSCR together."
+ },
+ {
+ "EventCode": "0x3001A",
+ "EventName": "PM_LSU_ST2_FIN",
+ "BriefDescription": "LSU Finished an internal operation in ST2 port."
},
{
- "EventCode": "0x30024",
- "EventName": "PM_PMC6_OVERFLOW",
- "BriefDescription": "The event selected for PMC6 caused the event counter to overflow."
+ "EventCode": "0x30026",
+ "EventName": "PM_EXEC_STALL_STORE_MISS",
+ "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a store whose cache line was not resident in the L1 and was waiting for allocation of the missing line into the L1."
},
{
"EventCode": "0x30028",
"EventName": "PM_CMPL_STALL_MEM_ECC",
- "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for the non-speculative finish of either a stcx waiting for its result or a load waiting for non-critical sectors of data and ECC."
+ "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for the non-speculative finish of either a STCX waiting for its result or a load waiting for non-critical sectors of data and ECC."
},
{
"EventCode": "0x30036",
@@ -180,6 +265,11 @@
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was a simple fixed point instruction executing in the Load Store Unit."
},
{
+ "EventCode": "0x30038",
+ "EventName": "PM_EXEC_STALL_DMISS_LMEM",
+ "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from the local memory, local OpenCAPI cache, or local OpenCAPI memory."
+ },
+ {
"EventCode": "0x3003A",
"EventName": "PM_CMPL_STALL_EXCEPTION",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was not allowed to complete because it was interrupted by ANY exception, which has to be serviced before the instruction can complete."
@@ -187,17 +277,37 @@
{
"EventCode": "0x3F044",
"EventName": "PM_VSU2_ISSUE",
- "BriefDescription": "VSU instructions issued to VSU pipe 2."
+ "BriefDescription": "VSU instruction issued to VSU pipe 2."
},
{
"EventCode": "0x30058",
"EventName": "PM_TLBIE_FIN",
- "BriefDescription": "TLBIE instructions finished in the LSU. Two TLBIEs can finish each cycle. All will be counted."
+ "BriefDescription": "TLBIE instruction finished in the LSU. Two TLBIEs can finish each cycle. All will be counted."
+ },
+ {
+ "EventCode": "0x34054",
+ "EventName": "PM_EXEC_STALL_DMISS_L2L3_NOCONFLICT",
+ "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from the local L2 or local L3, without a dispatch conflict."
+ },
+ {
+ "EventCode": "0x34056",
+ "EventName": "PM_EXEC_STALL_LOAD_FINISH",
+ "BriefDescription": "Cycles in which the oldest instruction in the pipeline was finishing a load after its data was reloaded from a data source beyond the local L1; cycles in which the LSU was processing an L1-hit; cycles in which the next-to-finish (NTF) instruction merged with another load in the LMQ; cycles in which the NTF instruction is waiting for a data reload for a load miss, but the data comes back with a non-NTF instruction."
+ },
+ {
+ "EventCode": "0x34058",
+ "EventName": "PM_DISP_STALL_BR_MPRED_ICMISS",
+ "BriefDescription": "Cycles when dispatch was stalled after a mispredicted branch resulted in an instruction cache miss."
+ },
+ {
+ "EventCode": "0x3D05C",
+ "EventName": "PM_DISP_STALL_HELD_RENAME_CYC",
+ "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch because the mapper/SRB was full. Includes GPR (count, link, tar), VSR, VMR, FPR and XVFC."
},
{
- "EventCode": "0x3D058",
- "EventName": "PM_SCALAR_FSQRT_FDIV_ISSUE",
- "BriefDescription": "Scalar versions of four floating point operations: fdiv,fsqrt (xvdivdp, xvdivsp, xvsqrtdp, xvsqrtsp)."
+ "EventCode": "0x3E052",
+ "EventName": "PM_DISP_STALL_IC_L3",
+ "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L3."
},
{
"EventCode": "0x30066",
@@ -215,9 +325,9 @@
"BriefDescription": "Cycles in which both instructions in the ICT entry pair show as finished. These are the cycles between finish and completion for the oldest pair of instructions in the pipeline."
},
{
- "EventCode": "0x40010",
- "EventName": "PM_PMC3_OVERFLOW",
- "BriefDescription": "The event selected for PMC3 caused the event counter to overflow."
+ "EventCode": "0x4C010",
+ "EventName": "PM_DISP_STALL_BR_MPRED_IC_L3MISS",
+ "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from sources beyond the local L3 after suffering a mispredicted branch."
},
{
"EventCode": "0x4C012",
@@ -225,16 +335,36 @@
"BriefDescription": "Cycles in which the oldest instruction in the pipeline suffered an ERAT miss and waited for it resolve."
},
{
+ "EventCode": "0x4C016",
+ "EventName": "PM_EXEC_STALL_DMISS_L2L3_CONFLICT",
+ "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from the local L2 or local L3, with a dispatch conflict."
+ },
+ {
"EventCode": "0x4C018",
"EventName": "PM_CMPL_STALL",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline cannot complete because the thread was blocked for any reason."
},
{
+ "EventCode": "0x4C01A",
+ "EventName": "PM_EXEC_STALL_DMISS_OFF_NODE",
+ "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from a distant chip."
+ },
+ {
"EventCode": "0x4C01E",
"EventName": "PM_LSU_ST3_FIN",
"BriefDescription": "LSU Finished an internal operation in ST3 port."
},
{
+ "EventCode": "0x4D014",
+ "EventName": "PM_EXEC_STALL_LOAD",
+ "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a load instruction executing in the Load Store Unit."
+ },
+ {
+ "EventCode": "0x4D016",
+ "EventName": "PM_EXEC_STALL_PTESYNC",
+ "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a PTESYNC instruction executing in the Load Store Unit."
+ },
+ {
"EventCode": "0x4D018",
"EventName": "PM_EXEC_STALL_BRU",
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in the Branch unit."
@@ -250,9 +380,24 @@
"BriefDescription": "Cycles in which the oldest instruction in the pipeline was a TLBIEL instruction executing in the Load Store Unit. TLBIEL instructions have lower overhead than TLBIE instructions because they don't get set to the nest."
},
{
+ "EventCode": "0x4D01E",
+ "EventName": "PM_DISP_STALL_BR_MPRED",
+ "BriefDescription": "Cycles when dispatch was stalled for this thread due to a mispredicted branch."
+ },
+ {
+ "EventCode": "0x4E010",
+ "EventName": "PM_DISP_STALL_IC_L3MISS",
+ "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from any source beyond the local L3."
+ },
+ {
"EventCode": "0x4E012",
"EventName": "PM_EXEC_STALL_UNKNOWN",
- "BriefDescription": "Cycles in which the oldest instruction in the pipeline completed without an ntf_type pulse. The ntf_pulse was missed by the ISU because the NTF finishes and completions came too close together."
+ "BriefDescription": "Cycles in which the oldest instruction in the pipeline completed without an ntf_type pulse. The ntf_pulse was missed by the ISU because the next-to-finish (NTF) instruction finishes and completions came too close together."
+ },
+ {
+ "EventCode": "0x4E01A",
+ "EventName": "PM_DISP_STALL_HELD_CYC",
+ "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch for any reason."
},
{
"EventCode": "0x4D020",
@@ -260,24 +405,14 @@
"BriefDescription": "VSU instruction was issued to VSU pipe 3."
},
{
- "EventCode": "0x40132",
- "EventName": "PM_MRK_LSU_FIN",
- "BriefDescription": "LSU marked instruction finish."
+ "EventCode": "0x4003C",
+ "EventName": "PM_DISP_STALL_HELD_SYNC_CYC",
+ "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch because of a synchronizing instruction that requires the ICT to be empty before dispatch."
},
{
"EventCode": "0x45058",
"EventName": "PM_IC_MISS_CMPL",
- "BriefDescription": "Non-speculative icache miss, counted at completion."
- },
- {
- "EventCode": "0x4D050",
- "EventName": "PM_VSU_NON_FLOP_CMPL",
- "BriefDescription": "Non-floating point VSU instructions completed."
- },
- {
- "EventCode": "0x4D052",
- "EventName": "PM_2FLOP_CMPL",
- "BriefDescription": "Double Precision vector version of fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg completed."
+ "BriefDescription": "Non-speculative instruction cache miss, counted at completion."
},
{
"EventCode": "0x400F2",
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/pmc.json b/tools/perf/pmu-events/arch/powerpc/power10/pmc.json
index b5d1bd39cfb2..0a2bf56ee7c1 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/pmc.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/pmc.json
@@ -1,22 +1,197 @@
[
{
+ "EventCode": "0x100FE",
+ "EventName": "PM_INST_CMPL",
+ "BriefDescription": "PowerPC instruction completed."
+ },
+ {
+ "EventCode": "0x1000A",
+ "EventName": "PM_PMC3_REWIND",
+ "BriefDescription": "The speculative event selected for PMC3 rewinds and the counter for PMC3 is not charged."
+ },
+ {
+ "EventCode": "0x10010",
+ "EventName": "PM_PMC4_OVERFLOW",
+ "BriefDescription": "The event selected for PMC4 caused the event counter to overflow."
+ },
+ {
+ "EventCode": "0x1001C",
+ "EventName": "PM_ULTRAVISOR_INST_CMPL",
+ "BriefDescription": "PowerPC instruction completed while the thread was in ultravisor state."
+ },
+ {
+ "EventCode": "0x100F0",
+ "EventName": "PM_CYC",
+ "BriefDescription": "Processor cycles."
+ },
+ {
+ "EventCode": "0x10020",
+ "EventName": "PM_PMC4_REWIND",
+ "BriefDescription": "The speculative event selected for PMC4 rewinds and the counter for PMC4 is not charged."
+ },
+ {
+ "EventCode": "0x10022",
+ "EventName": "PM_PMC2_SAVED",
+ "BriefDescription": "The conditions for the speculative event selected for PMC2 are met and PMC2 is charged."
+ },
+ {
+ "EventCode": "0x10024",
+ "EventName": "PM_PMC5_OVERFLOW",
+ "BriefDescription": "The event selected for PMC5 caused the event counter to overflow."
+ },
+ {
+ "EventCode": "0x1F15E",
+ "EventName": "PM_MRK_START_PROBE_NOP_CMPL",
+ "BriefDescription": "Marked Start probe nop (AND R0,R0,R0) completed."
+ },
+ {
+ "EventCode": "0x1006C",
+ "EventName": "PM_RUN_CYC_ST_MODE",
+ "BriefDescription": "Cycles when the run latch is set and the core is in ST mode."
+ },
+ {
+ "EventCode": "0x101E8",
+ "EventName": "PM_THRESH_EXC_256",
+ "BriefDescription": "Threshold counter exceeded a count of 256."
+ },
+ {
+ "EventCode": "0x101EC",
+ "EventName": "PM_THRESH_MET",
+ "BriefDescription": "Threshold exceeded."
+ },
+ {
+ "EventCode": "0x100FA",
+ "EventName": "PM_RUN_LATCH_ANY_THREAD_CYC",
+ "BriefDescription": "Cycles when at least one thread has the run latch set."
+ },
+ {
+ "EventCode": "0x2000A",
+ "EventName": "PM_HYPERVISOR_CYC",
+ "BriefDescription": "Cycles when the thread is in Hypervisor state. MSR[S HV PR]=010."
+ },
+ {
+ "EventCode": "0x2000C",
+ "EventName": "PM_RUN_LATCH_ALL_THREADS_CYC",
+ "BriefDescription": "Cycles when the run latch is set for all threads."
+ },
+ {
+ "EventCode": "0x20010",
+ "EventName": "PM_PMC1_OVERFLOW",
+ "BriefDescription": "The event selected for PMC1 caused the event counter to overflow."
+ },
+ {
+ "EventCode": "0x2006C",
+ "EventName": "PM_RUN_CYC_SMT4_MODE",
+ "BriefDescription": "Cycles when this thread's run latch is set and the core is in SMT4 mode."
+ },
+ {
+ "EventCode": "0x201E6",
+ "EventName": "PM_THRESH_EXC_32",
+ "BriefDescription": "Threshold counter exceeded a value of 32."
+ },
+ {
+ "EventCode": "0x201E8",
+ "EventName": "PM_THRESH_EXC_512",
+ "BriefDescription": "Threshold counter exceeded a value of 512."
+ },
+ {
+ "EventCode": "0x200F4",
+ "EventName": "PM_RUN_CYC",
+ "BriefDescription": "Processor cycles gated by the run latch."
+ },
+ {
+ "EventCode": "0x30010",
+ "EventName": "PM_PMC2_OVERFLOW",
+ "BriefDescription": "The event selected for PMC2 caused the event counter to overflow."
+ },
+ {
+ "EventCode": "0x30020",
+ "EventName": "PM_PMC2_REWIND",
+ "BriefDescription": "The speculative event selected for PMC2 rewinds and the counter for PMC2 is not charged."
+ },
+ {
+ "EventCode": "0x30022",
+ "EventName": "PM_PMC4_SAVED",
+ "BriefDescription": "The conditions for the speculative event selected for PMC4 are met and PMC4 is charged."
+ },
+ {
+ "EventCode": "0x30024",
+ "EventName": "PM_PMC6_OVERFLOW",
+ "BriefDescription": "The event selected for PMC6 caused the event counter to overflow."
+ },
+ {
+ "EventCode": "0x3006C",
+ "EventName": "PM_RUN_CYC_SMT2_MODE",
+ "BriefDescription": "Cycles when this thread's run latch is set and the core is in SMT2 mode."
+ },
+ {
"EventCode": "0x301E8",
"EventName": "PM_THRESH_EXC_64",
"BriefDescription": "Threshold counter exceeded a value of 64."
},
{
- "EventCode": "0x45050",
- "EventName": "PM_1FLOP_CMPL",
- "BriefDescription": "One floating point instruction completed (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg)."
+ "EventCode": "0x301EA",
+ "EventName": "PM_THRESH_EXC_1024",
+ "BriefDescription": "Threshold counter exceeded a value of 1024."
+ },
+ {
+ "EventCode": "0x40010",
+ "EventName": "PM_PMC3_OVERFLOW",
+ "BriefDescription": "The event selected for PMC3 caused the event counter to overflow."
+ },
+ {
+ "EventCode": "0x40114",
+ "EventName": "PM_MRK_START_PROBE_NOP_DISP",
+ "BriefDescription": "Marked Start probe nop dispatched. Instruction AND R0,R0,R0."
+ },
+ {
+ "EventCode": "0x4D010",
+ "EventName": "PM_PMC1_SAVED",
+ "BriefDescription": "The conditions for the speculative event selected for PMC1 are met and PMC1 is charged."
+ },
+ {
+ "EventCode": "0x4D012",
+ "EventName": "PM_PMC3_SAVED",
+ "BriefDescription": "The conditions for the speculative event selected for PMC3 are met and PMC3 is charged."
+ },
+ {
+ "EventCode": "0x4D022",
+ "EventName": "PM_HYPERVISOR_INST_CMPL",
+ "BriefDescription": "PowerPC instruction completed while the thread was in hypervisor state."
+ },
+ {
+ "EventCode": "0x4D026",
+ "EventName": "PM_ULTRAVISOR_CYC",
+ "BriefDescription": "Cycles when the thread is in Ultravisor state. MSR[S HV PR]=110."
+ },
+ {
+ "EventCode": "0x4D028",
+ "EventName": "PM_PRIVILEGED_CYC",
+ "BriefDescription": "Cycles when the thread is in Privileged state. MSR[S HV PR]=x00."
+ },
+ {
+ "EventCode": "0x4D02C",
+ "EventName": "PM_PMC1_REWIND",
+ "BriefDescription": "The speculative event selected for PMC1 rewinds and the counter for PMC1 is not charged."
+ },
+ {
+ "EventCode": "0x40030",
+ "EventName": "PM_INST_FIN",
+ "BriefDescription": "Instruction finished."
+ },
+ {
+ "EventCode": "0x40134",
+ "EventName": "PM_MRK_INST_TIMEO",
+ "BriefDescription": "Marked instruction finish timeout (instruction was lost)."
},
{
- "EventCode": "0x45052",
- "EventName": "PM_4FLOP_CMPL",
- "BriefDescription": "Four floating point instructions completed (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg)."
+ "EventCode": "0x401EA",
+ "EventName": "PM_THRESH_EXC_128",
+ "BriefDescription": "Threshold counter exceeded a value of 128."
},
{
- "EventCode": "0x4D054",
- "EventName": "PM_8FLOP_CMPL",
- "BriefDescription": "Four Double Precision vector instructions completed."
+ "EventCode": "0x400FA",
+ "EventName": "PM_RUN_INST_CMPL",
+ "BriefDescription": "PowerPC instruction completed while the run latch is set."
}
]
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/translation.json b/tools/perf/pmu-events/arch/powerpc/power10/translation.json
index db3766dca07c..170c9aeb30d8 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/translation.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/translation.json
@@ -1,35 +1,10 @@
[
{
- "EventCode": "0x1F15E",
- "EventName": "PM_MRK_START_PROBE_NOP_CMPL",
- "BriefDescription": "Marked Start probe nop (AND R0,R0,R0) completed."
- },
- {
- "EventCode": "0x20016",
- "EventName": "PM_ST_FIN",
- "BriefDescription": "Store finish count. Includes speculative activity."
- },
- {
"EventCode": "0x20018",
"EventName": "PM_ST_FWD",
"BriefDescription": "Store forwards that finished."
},
{
- "EventCode": "0x2011C",
- "EventName": "PM_MRK_NTF_CYC",
- "BriefDescription": "Cycles during which the marked instruction is the oldest in the pipeline (NTF or NTC)."
- },
- {
- "EventCode": "0x2E01C",
- "EventName": "PM_EXEC_STALL_TLBIE",
- "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a TLBIE instruction executing in the Load Store Unit."
- },
- {
- "EventCode": "0x201E6",
- "EventName": "PM_THRESH_EXC_32",
- "BriefDescription": "Threshold counter exceeded a value of 32."
- },
- {
"EventCode": "0x200F0",
"EventName": "PM_ST_CMPL",
"BriefDescription": "Stores completed from S2Q (2nd-level store queue). This event includes regular stores, stcx and cache inhibited stores. The following operations are excluded (pteupdate, snoop tlbie complete, store atomics, miso, load atomic payloads, tlbie, tlbsync, slbieg, isync, msgsnd, slbiag, cpabort, copy, tcheck, tend, stsync, dcbst, icbi, dcbf, hwsync, lwsync, ptesync, eieio, msgsync)."
@@ -37,21 +12,6 @@
{
"EventCode": "0x200FE",
"EventName": "PM_DATA_FROM_L2MISS",
- "BriefDescription": "The processor's data cache was reloaded from a source other than the local core's L1 or L2 due to a demand miss."
- },
- {
- "EventCode": "0x30010",
- "EventName": "PM_PMC2_OVERFLOW",
- "BriefDescription": "The event selected for PMC2 caused the event counter to overflow."
- },
- {
- "EventCode": "0x4D010",
- "EventName": "PM_PMC1_SAVED",
- "BriefDescription": "The conditions for the speculative event selected for PMC1 are met and PMC1 is charged."
- },
- {
- "EventCode": "0x4D05C",
- "EventName": "PM_DPP_FLOP_CMPL",
- "BriefDescription": "Double-Precision or Quad-Precision instructions completed."
+ "BriefDescription": "The processor's L1 data cache was reloaded from a source beyond the local core's L2 due to a demand miss."
}
]
diff --git a/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json b/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json
index 1f9047553942..daf9458f0b77 100644
--- a/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json
@@ -71,7 +71,7 @@
},
{
"BriefDescription": "Uncore frequency per die [GHZ]",
- "MetricExpr": "tma_info_socket_clks / #num_dies / duration_time / 1e9",
+ "MetricExpr": "tma_info_system_socket_clks / #num_dies / duration_time / 1e9",
"MetricGroup": "SoC",
"MetricName": "UNCORE_FREQ"
},
@@ -92,35 +92,35 @@
},
{
"BriefDescription": "Percentage of cycles in aborted transactions.",
- "MetricExpr": "max(cpu@cycles\\-t@ - cpu@cycles\\-ct@, 0) / cycles",
+ "MetricExpr": "(max(cycles\\-t - cycles\\-ct, 0) / cycles if has_event(cycles\\-t) else 0)",
"MetricGroup": "transaction",
"MetricName": "tsx_aborted_cycles",
"ScaleUnit": "100%"
},
{
"BriefDescription": "Number of cycles within a transaction divided by the number of elisions.",
- "MetricExpr": "cpu@cycles\\-t@ / cpu@el\\-start@",
+ "MetricExpr": "(cycles\\-t / el\\-start if has_event(cycles\\-t) else 0)",
"MetricGroup": "transaction",
"MetricName": "tsx_cycles_per_elision",
"ScaleUnit": "1cycles / elision"
},
{
"BriefDescription": "Number of cycles within a transaction divided by the number of transactions.",
- "MetricExpr": "cpu@cycles\\-t@ / cpu@tx\\-start@",
+ "MetricExpr": "(cycles\\-t / tx\\-start if has_event(cycles\\-t) else 0)",
"MetricGroup": "transaction",
"MetricName": "tsx_cycles_per_transaction",
"ScaleUnit": "1cycles / transaction"
},
{
"BriefDescription": "Percentage of cycles within a transaction region.",
- "MetricExpr": "cpu@cycles\\-t@ / cycles",
+ "MetricExpr": "(cycles\\-t / cycles if has_event(cycles\\-t) else 0)",
"MetricGroup": "transaction",
"MetricName": "tsx_transactional_cycles",
"ScaleUnit": "100%"
},
{
"BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to certain allocation restrictions.",
- "MetricExpr": "TOPDOWN_BE_BOUND.ALLOC_RESTRICTIONS / tma_info_slots",
+ "MetricExpr": "TOPDOWN_BE_BOUND.ALLOC_RESTRICTIONS / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group",
"MetricName": "tma_alloc_restriction",
"MetricThreshold": "tma_alloc_restriction > 0.1",
@@ -129,40 +129,43 @@
},
{
"BriefDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls",
- "MetricExpr": "TOPDOWN_BE_BOUND.ALL / tma_info_slots",
- "MetricGroup": "TopdownL1;tma_L1_group",
+ "DefaultMetricgroupName": "TopdownL1",
+ "MetricExpr": "TOPDOWN_BE_BOUND.ALL / tma_info_core_slots",
+ "MetricGroup": "Default;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
"MetricThreshold": "tma_backend_bound > 0.1",
- "MetricgroupNoGroup": "TopdownL1",
+ "MetricgroupNoGroup": "TopdownL1;Default",
"PublicDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls. Note that uops must be available for consumption in order for this event to count. If a uop is not available (IQ is empty), this event will not count. The rest of these subevents count backend stalls, in cycles, due to an outstanding request which is memory bound vs core bound. The subevents are not slot based events and therefore can not be precisely added or subtracted from the Backend_Bound_Aux subevents which are slot based.",
"ScaleUnit": "100%",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls",
+ "DefaultMetricgroupName": "TopdownL1",
"MetricExpr": "tma_backend_bound",
- "MetricGroup": "TopdownL1;tma_L1_group",
+ "MetricGroup": "Default;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound_aux",
"MetricThreshold": "tma_backend_bound_aux > 0.2",
- "MetricgroupNoGroup": "TopdownL1",
+ "MetricgroupNoGroup": "TopdownL1;Default",
"PublicDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls. Note that UOPS must be available for consumption in order for this event to count. If a uop is not available (IQ is empty), this event will not count. All of these subevents count backend stalls, in slots, due to a resource limitation. These are not cycle based events and therefore can not be precisely added or subtracted from the Backend_Bound subevents which are cycle based. These subevents are supplementary to Backend_Bound and can be used to analyze results from a resource perspective at allocation.",
"ScaleUnit": "100%",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear",
- "MetricExpr": "(tma_info_slots - (TOPDOWN_FE_BOUND.ALL + TOPDOWN_BE_BOUND.ALL + TOPDOWN_RETIRING.ALL)) / tma_info_slots",
- "MetricGroup": "TopdownL1;tma_L1_group",
+ "DefaultMetricgroupName": "TopdownL1",
+ "MetricExpr": "(tma_info_core_slots - (cpu_atom@TOPDOWN_FE_BOUND.ALL@ + cpu_atom@TOPDOWN_BE_BOUND.ALL@ + cpu_atom@TOPDOWN_RETIRING.ALL@)) / tma_info_core_slots",
+ "MetricGroup": "Default;TopdownL1;tma_L1_group",
"MetricName": "tma_bad_speculation",
"MetricThreshold": "tma_bad_speculation > 0.15",
- "MetricgroupNoGroup": "TopdownL1",
+ "MetricgroupNoGroup": "TopdownL1;Default",
"PublicDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear. Only issue slots wasted due to fast nukes such as memory ordering nukes are counted. Other nukes are not accounted for. Counts all issue slots blocked during this recovery window including relevant microcode flows and while uops are not yet available in the instruction queue (IQ). Also includes the issue slots that were consumed by the backend but were thrown away because they were younger than the mispredict or machine clear.",
"ScaleUnit": "100%",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Counts the number of uops that are not from the microsequencer.",
- "MetricExpr": "(TOPDOWN_RETIRING.ALL - UOPS_RETIRED.MS) / tma_info_slots",
+ "MetricExpr": "(cpu_atom@TOPDOWN_RETIRING.ALL@ - cpu_atom@UOPS_RETIRED.MS@) / tma_info_core_slots",
"MetricGroup": "TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_base",
"MetricThreshold": "tma_base > 0.6",
@@ -172,7 +175,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to BACLEARS, which occurs when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend",
- "MetricExpr": "TOPDOWN_FE_BOUND.BRANCH_DETECT / tma_info_slots",
+ "MetricExpr": "TOPDOWN_FE_BOUND.BRANCH_DETECT / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_branch_detect",
"MetricThreshold": "tma_branch_detect > 0.05",
@@ -182,7 +185,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to branch mispredicts.",
- "MetricExpr": "TOPDOWN_BAD_SPECULATION.MISPREDICT / tma_info_slots",
+ "MetricExpr": "TOPDOWN_BAD_SPECULATION.MISPREDICT / tma_info_core_slots",
"MetricGroup": "TopdownL2;tma_L2_group;tma_bad_speculation_group",
"MetricName": "tma_branch_mispredicts",
"MetricThreshold": "tma_branch_mispredicts > 0.05",
@@ -192,7 +195,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to BTCLEARS, which occurs when the Branch Target Buffer (BTB) predicts a taken branch.",
- "MetricExpr": "TOPDOWN_FE_BOUND.BRANCH_RESTEER / tma_info_slots",
+ "MetricExpr": "TOPDOWN_FE_BOUND.BRANCH_RESTEER / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_branch_resteer",
"MetricThreshold": "tma_branch_resteer > 0.05",
@@ -201,7 +204,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to the microcode sequencer (MS).",
- "MetricExpr": "TOPDOWN_FE_BOUND.CISC / tma_info_slots",
+ "MetricExpr": "TOPDOWN_FE_BOUND.CISC / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_cisc",
"MetricThreshold": "tma_cisc > 0.05",
@@ -220,7 +223,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to decode stalls.",
- "MetricExpr": "TOPDOWN_FE_BOUND.DECODE / tma_info_slots",
+ "MetricExpr": "TOPDOWN_FE_BOUND.DECODE / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_decode",
"MetricThreshold": "tma_decode > 0.05",
@@ -229,7 +232,7 @@
},
{
"BriefDescription": "Counts the number of machine clears relative to the number of nuke slots due to memory disambiguation.",
- "MetricExpr": "tma_nuke * (MACHINE_CLEARS.DISAMBIGUATION / MACHINE_CLEARS.SLOW)",
+ "MetricExpr": "tma_nuke * (cpu_atom@MACHINE_CLEARS.DISAMBIGUATION@ / cpu_atom@MACHINE_CLEARS.SLOW@)",
"MetricGroup": "TopdownL4;tma_L4_group;tma_nuke_group",
"MetricName": "tma_disambiguation",
"MetricThreshold": "tma_disambiguation > 0.02",
@@ -239,7 +242,7 @@
{
"BriefDescription": "Counts the number of cycles the core is stalled due to a demand load miss which hit in DRAM or MMIO (Non-DRAM).",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "MEM_BOUND_STALLS.LOAD_DRAM_HIT / tma_info_clks - MEM_BOUND_STALLS_AT_RET_CORRECTION * MEM_BOUND_STALLS.LOAD_DRAM_HIT / MEM_BOUND_STALLS.LOAD",
+ "MetricExpr": "cpu_atom@MEM_BOUND_STALLS.LOAD_DRAM_HIT@ / tma_info_core_clks - max((cpu_atom@MEM_BOUND_STALLS.LOAD@ - cpu_atom@LD_HEAD.L1_MISS_AT_RET@) / tma_info_core_clks, 0) * cpu_atom@MEM_BOUND_STALLS.LOAD_DRAM_HIT@ / cpu_atom@MEM_BOUND_STALLS.LOAD@",
"MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_dram_bound",
"MetricThreshold": "tma_dram_bound > 0.1",
@@ -248,7 +251,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to a machine clear classified as a fast nuke due to memory ordering, memory disambiguation and memory renaming.",
- "MetricExpr": "TOPDOWN_BAD_SPECULATION.FASTNUKE / tma_info_slots",
+ "MetricExpr": "TOPDOWN_BAD_SPECULATION.FASTNUKE / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_machine_clears_group",
"MetricName": "tma_fast_nuke",
"MetricThreshold": "tma_fast_nuke > 0.05",
@@ -257,7 +260,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to frontend bandwidth restrictions due to decode, predecode, cisc, and other limitations.",
- "MetricExpr": "TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH / tma_info_slots",
+ "MetricExpr": "TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH / tma_info_core_slots",
"MetricGroup": "TopdownL2;tma_L2_group;tma_frontend_bound_group",
"MetricName": "tma_fetch_bandwidth",
"MetricThreshold": "tma_fetch_bandwidth > 0.1",
@@ -267,7 +270,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to frontend bandwidth restrictions due to decode, predecode, cisc, and other limitations.",
- "MetricExpr": "TOPDOWN_FE_BOUND.FRONTEND_LATENCY / tma_info_slots",
+ "MetricExpr": "TOPDOWN_FE_BOUND.FRONTEND_LATENCY / tma_info_core_slots",
"MetricGroup": "TopdownL2;tma_L2_group;tma_frontend_bound_group",
"MetricName": "tma_fetch_latency",
"MetricThreshold": "tma_fetch_latency > 0.15",
@@ -277,7 +280,7 @@
},
{
"BriefDescription": "Counts the number of machine clears relative to the number of nuke slots due to FP assists.",
- "MetricExpr": "tma_nuke * (MACHINE_CLEARS.FP_ASSIST / MACHINE_CLEARS.SLOW)",
+ "MetricExpr": "tma_nuke * (cpu_atom@MACHINE_CLEARS.FP_ASSIST@ / cpu_atom@MACHINE_CLEARS.SLOW@)",
"MetricGroup": "TopdownL4;tma_L4_group;tma_nuke_group",
"MetricName": "tma_fp_assist",
"MetricThreshold": "tma_fp_assist > 0.02",
@@ -286,7 +289,7 @@
},
{
"BriefDescription": "Counts the number of floating point divide operations per uop.",
- "MetricExpr": "UOPS_RETIRED.FPDIV / tma_info_slots",
+ "MetricExpr": "UOPS_RETIRED.FPDIV / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_base_group",
"MetricName": "tma_fpdiv_uops",
"MetricThreshold": "tma_fpdiv_uops > 0.2",
@@ -295,17 +298,18 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to frontend stalls.",
- "MetricExpr": "TOPDOWN_FE_BOUND.ALL / tma_info_slots",
- "MetricGroup": "TopdownL1;tma_L1_group",
+ "DefaultMetricgroupName": "TopdownL1",
+ "MetricExpr": "TOPDOWN_FE_BOUND.ALL / tma_info_core_slots",
+ "MetricGroup": "Default;TopdownL1;tma_L1_group",
"MetricName": "tma_frontend_bound",
"MetricThreshold": "tma_frontend_bound > 0.2",
- "MetricgroupNoGroup": "TopdownL1",
+ "MetricgroupNoGroup": "TopdownL1;Default",
"ScaleUnit": "100%",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to instruction cache misses.",
- "MetricExpr": "TOPDOWN_FE_BOUND.ICACHE / tma_info_slots",
+ "MetricExpr": "TOPDOWN_FE_BOUND.ICACHE / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_icache_misses",
"MetricThreshold": "tma_icache_misses > 0.05",
@@ -313,246 +317,220 @@
"Unit": "cpu_atom"
},
{
- "BriefDescription": "Percentage of total non-speculative loads with a address aliasing block",
- "MetricExpr": "100 * LD_BLOCKS.4K_ALIAS / MEM_UOPS_RETIRED.ALL_LOADS",
- "MetricName": "tma_info_address_alias_blocks",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Ratio of all branches which mispredict",
- "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.ALL_BRANCHES",
- "MetricGroup": " ",
- "MetricName": "tma_info_branch_mispredict_ratio",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Ratio between Mispredicted branches and unknown branches",
- "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / BACLEARS.ANY",
- "MetricGroup": " ",
- "MetricName": "tma_info_branch_mispredict_to_unknown_branch_ratio",
- "Unit": "cpu_atom"
- },
- {
"BriefDescription": "",
- "MetricExpr": "CPU_CLK_UNHALTED.CORE",
- "MetricGroup": " ",
- "MetricName": "tma_info_clks",
+ "MetricExpr": "cpu_atom@CPU_CLK_UNHALTED.CORE@",
+ "MetricName": "tma_info_core_clks",
"Unit": "cpu_atom"
},
{
"BriefDescription": "",
- "MetricExpr": "CPU_CLK_UNHALTED.CORE_P",
- "MetricGroup": " ",
- "MetricName": "tma_info_clks_p",
+ "MetricExpr": "cpu_atom@CPU_CLK_UNHALTED.CORE_P@",
+ "MetricName": "tma_info_core_clks_p",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Cycles Per Instruction",
- "MetricExpr": "tma_info_clks / INST_RETIRED.ANY",
- "MetricGroup": " ",
- "MetricName": "tma_info_cpi",
+ "MetricExpr": "tma_info_core_clks / INST_RETIRED.ANY",
+ "MetricName": "tma_info_core_cpi",
"Unit": "cpu_atom"
},
{
- "BriefDescription": "Average CPU Utilization",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
- "MetricGroup": " ",
- "MetricName": "tma_info_cpu_utilization",
+ "BriefDescription": "Instructions Per Cycle",
+ "MetricExpr": "INST_RETIRED.ANY / tma_info_core_clks",
+ "MetricName": "tma_info_core_ipc",
"Unit": "cpu_atom"
},
{
- "BriefDescription": "Cycle cost per DRAM hit",
- "MetricExpr": "MEM_BOUND_STALLS.LOAD_DRAM_HIT / MEM_LOAD_UOPS_RETIRED.DRAM_HIT",
- "MetricGroup": " ",
- "MetricName": "tma_info_cycles_per_demand_load_dram_hit",
+ "BriefDescription": "",
+ "MetricExpr": "5 * tma_info_core_clks",
+ "MetricName": "tma_info_core_slots",
"Unit": "cpu_atom"
},
{
- "BriefDescription": "Cycle cost per L2 hit",
- "MetricExpr": "MEM_BOUND_STALLS.LOAD_L2_HIT / MEM_LOAD_UOPS_RETIRED.L2_HIT",
- "MetricGroup": " ",
- "MetricName": "tma_info_cycles_per_demand_load_l2_hit",
+ "BriefDescription": "Uops Per Instruction",
+ "MetricExpr": "UOPS_RETIRED.ALL / INST_RETIRED.ANY",
+ "MetricName": "tma_info_core_upi",
"Unit": "cpu_atom"
},
{
- "BriefDescription": "Cycle cost per LLC hit",
- "MetricExpr": "MEM_BOUND_STALLS.LOAD_LLC_HIT / MEM_LOAD_UOPS_RETIRED.L3_HIT",
- "MetricGroup": " ",
- "MetricName": "tma_info_cycles_per_demand_load_l3_hit",
+ "BriefDescription": "Percent of instruction miss cost that hit in DRAM",
+ "MetricExpr": "100 * cpu_atom@MEM_BOUND_STALLS.IFETCH_DRAM_HIT@ / cpu_atom@MEM_BOUND_STALLS.IFETCH@",
+ "MetricName": "tma_info_frontend_inst_miss_cost_dramhit_percent",
"Unit": "cpu_atom"
},
{
- "BriefDescription": "Percentage of all uops which are FPDiv uops",
- "MetricExpr": "100 * UOPS_RETIRED.FPDIV / UOPS_RETIRED.ALL",
- "MetricGroup": " ",
- "MetricName": "tma_info_fpdiv_uop_ratio",
+ "BriefDescription": "Percent of instruction miss cost that hit in the L2",
+ "MetricExpr": "100 * cpu_atom@MEM_BOUND_STALLS.IFETCH_L2_HIT@ / cpu_atom@MEM_BOUND_STALLS.IFETCH@",
+ "MetricName": "tma_info_frontend_inst_miss_cost_l2hit_percent",
"Unit": "cpu_atom"
},
{
- "BriefDescription": "Percentage of all uops which are IDiv uops",
- "MetricExpr": "100 * UOPS_RETIRED.IDIV / UOPS_RETIRED.ALL",
- "MetricGroup": " ",
- "MetricName": "tma_info_idiv_uop_ratio",
+ "BriefDescription": "Percent of instruction miss cost that hit in the L3",
+ "MetricExpr": "100 * cpu_atom@MEM_BOUND_STALLS.IFETCH_LLC_HIT@ / cpu_atom@MEM_BOUND_STALLS.IFETCH@",
+ "MetricName": "tma_info_frontend_inst_miss_cost_l3hit_percent",
"Unit": "cpu_atom"
},
{
- "BriefDescription": "Percent of instruction miss cost that hit in DRAM",
- "MetricExpr": "100 * MEM_BOUND_STALLS.IFETCH_DRAM_HIT / MEM_BOUND_STALLS.IFETCH",
- "MetricGroup": " ",
- "MetricName": "tma_info_inst_miss_cost_dramhit_percent",
+ "BriefDescription": "Ratio of all branches which mispredict",
+ "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.ALL_BRANCHES",
+ "MetricName": "tma_info_inst_mix_branch_mispredict_ratio",
"Unit": "cpu_atom"
},
{
- "BriefDescription": "Percent of instruction miss cost that hit in the L2",
- "MetricExpr": "100 * MEM_BOUND_STALLS.IFETCH_L2_HIT / MEM_BOUND_STALLS.IFETCH",
- "MetricGroup": " ",
- "MetricName": "tma_info_inst_miss_cost_l2hit_percent",
+ "BriefDescription": "Ratio between Mispredicted branches and unknown branches",
+ "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / BACLEARS.ANY",
+ "MetricName": "tma_info_inst_mix_branch_mispredict_to_unknown_branch_ratio",
"Unit": "cpu_atom"
},
{
- "BriefDescription": "Percent of instruction miss cost that hit in the L3",
- "MetricExpr": "100 * MEM_BOUND_STALLS.IFETCH_LLC_HIT / MEM_BOUND_STALLS.IFETCH",
- "MetricGroup": " ",
- "MetricName": "tma_info_inst_miss_cost_l3hit_percent",
+ "BriefDescription": "Percentage of all uops which are FPDiv uops",
+ "MetricExpr": "100 * cpu_atom@UOPS_RETIRED.FPDIV@ / UOPS_RETIRED.ALL",
+ "MetricName": "tma_info_inst_mix_fpdiv_uop_ratio",
"Unit": "cpu_atom"
},
{
- "BriefDescription": "Instructions per Branch (lower number means higher occurance rate)",
- "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
- "MetricGroup": " ",
- "MetricName": "tma_info_ipbranch",
+ "BriefDescription": "Percentage of all uops which are IDiv uops",
+ "MetricExpr": "100 * cpu_atom@UOPS_RETIRED.IDIV@ / UOPS_RETIRED.ALL",
+ "MetricName": "tma_info_inst_mix_idiv_uop_ratio",
"Unit": "cpu_atom"
},
{
- "BriefDescription": "Instructions Per Cycle",
- "MetricExpr": "INST_RETIRED.ANY / tma_info_clks",
- "MetricGroup": " ",
- "MetricName": "tma_info_ipc",
+ "BriefDescription": "Instructions per Branch (lower number means higher occurance rate)",
+ "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
+ "MetricName": "tma_info_inst_mix_ipbranch",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Instruction per (near) call (lower number means higher occurance rate)",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.CALL",
- "MetricGroup": " ",
- "MetricName": "tma_info_ipcall",
+ "MetricName": "tma_info_inst_mix_ipcall",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Instructions per Far Branch",
- "MetricExpr": "INST_RETIRED.ANY / (BR_INST_RETIRED.FAR_BRANCH / 2)",
- "MetricGroup": " ",
- "MetricName": "tma_info_ipfarbranch",
+ "MetricExpr": "INST_RETIRED.ANY / (cpu_atom@BR_INST_RETIRED.FAR_BRANCH@ / 2)",
+ "MetricName": "tma_info_inst_mix_ipfarbranch",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Instructions per Load",
"MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_LOADS",
- "MetricGroup": " ",
- "MetricName": "tma_info_ipload",
+ "MetricName": "tma_info_inst_mix_ipload",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Instructions per retired conditional Branch Misprediction where the branch was not taken",
- "MetricExpr": "INST_RETIRED.ANY / (BR_MISP_RETIRED.COND - BR_MISP_RETIRED.COND_TAKEN)",
- "MetricName": "tma_info_ipmisp_cond_ntaken",
+ "MetricExpr": "INST_RETIRED.ANY / (cpu_atom@BR_MISP_RETIRED.COND@ - cpu_atom@BR_MISP_RETIRED.COND_TAKEN@)",
+ "MetricName": "tma_info_inst_mix_ipmisp_cond_ntaken",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Instructions per retired conditional Branch Misprediction where the branch was taken",
"MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.COND_TAKEN",
- "MetricName": "tma_info_ipmisp_cond_taken",
+ "MetricName": "tma_info_inst_mix_ipmisp_cond_taken",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Instructions per retired indirect call or jump Branch Misprediction",
"MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.INDIRECT",
- "MetricName": "tma_info_ipmisp_indirect",
+ "MetricName": "tma_info_inst_mix_ipmisp_indirect",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Instructions per retired return Branch Misprediction",
"MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.RETURN",
- "MetricName": "tma_info_ipmisp_ret",
+ "MetricName": "tma_info_inst_mix_ipmisp_ret",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Instructions per retired Branch Misprediction",
"MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
- "MetricGroup": " ",
- "MetricName": "tma_info_ipmispredict",
+ "MetricName": "tma_info_inst_mix_ipmispredict",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Instructions per Store",
"MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_STORES",
- "MetricGroup": " ",
- "MetricName": "tma_info_ipstore",
+ "MetricName": "tma_info_inst_mix_ipstore",
"Unit": "cpu_atom"
},
{
- "BriefDescription": "Fraction of cycles spent in Kernel mode",
- "MetricExpr": "cpu_atom@CPU_CLK_UNHALTED.CORE@k / CPU_CLK_UNHALTED.CORE",
- "MetricGroup": " ",
- "MetricName": "tma_info_kernel_utilization",
+ "BriefDescription": "Percentage of all uops which are ucode ops",
+ "MetricExpr": "100 * cpu_atom@UOPS_RETIRED.MS@ / UOPS_RETIRED.ALL",
+ "MetricName": "tma_info_inst_mix_microcode_uop_ratio",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Percentage of all uops which are x87 uops",
+ "MetricExpr": "100 * cpu_atom@UOPS_RETIRED.X87@ / UOPS_RETIRED.ALL",
+ "MetricName": "tma_info_inst_mix_x87_uop_ratio",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Percentage of total non-speculative loads with a address aliasing block",
+ "MetricExpr": "100 * cpu_atom@LD_BLOCKS.4K_ALIAS@ / MEM_UOPS_RETIRED.ALL_LOADS",
+ "MetricName": "tma_info_l1_bound_address_alias_blocks",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Percentage of total non-speculative loads that are splits",
- "MetricExpr": "100 * MEM_UOPS_RETIRED.SPLIT_LOADS / MEM_UOPS_RETIRED.ALL_LOADS",
- "MetricName": "tma_info_load_splits",
+ "MetricExpr": "100 * cpu_atom@MEM_UOPS_RETIRED.SPLIT_LOADS@ / MEM_UOPS_RETIRED.ALL_LOADS",
+ "MetricName": "tma_info_l1_bound_load_splits",
"Unit": "cpu_atom"
},
{
- "BriefDescription": "load ops retired per 1000 instruction",
- "MetricExpr": "1e3 * MEM_UOPS_RETIRED.ALL_LOADS / INST_RETIRED.ANY",
- "MetricGroup": " ",
- "MetricName": "tma_info_memloadpki",
+ "BriefDescription": "Percentage of total non-speculative loads with a store forward or unknown store address block",
+ "MetricExpr": "100 * cpu_atom@LD_BLOCKS.DATA_UNKNOWN@ / MEM_UOPS_RETIRED.ALL_LOADS",
+ "MetricName": "tma_info_l1_bound_store_fwd_blocks",
"Unit": "cpu_atom"
},
{
- "BriefDescription": "Percentage of all uops which are ucode ops",
- "MetricExpr": "100 * UOPS_RETIRED.MS / UOPS_RETIRED.ALL",
- "MetricGroup": " ",
- "MetricName": "tma_info_microcode_uop_ratio",
+ "BriefDescription": "Cycle cost per DRAM hit",
+ "MetricExpr": "MEM_BOUND_STALLS.LOAD_DRAM_HIT / MEM_LOAD_UOPS_RETIRED.DRAM_HIT",
+ "MetricName": "tma_info_memory_cycles_per_demand_load_dram_hit",
"Unit": "cpu_atom"
},
{
- "BriefDescription": "",
- "MetricExpr": "5 * tma_info_clks",
- "MetricGroup": " ",
- "MetricName": "tma_info_slots",
+ "BriefDescription": "Cycle cost per L2 hit",
+ "MetricExpr": "MEM_BOUND_STALLS.LOAD_L2_HIT / MEM_LOAD_UOPS_RETIRED.L2_HIT",
+ "MetricName": "tma_info_memory_cycles_per_demand_load_l2_hit",
"Unit": "cpu_atom"
},
{
- "BriefDescription": "Percentage of total non-speculative loads with a store forward or unknown store address block",
- "MetricExpr": "100 * LD_BLOCKS.DATA_UNKNOWN / MEM_UOPS_RETIRED.ALL_LOADS",
- "MetricName": "tma_info_store_fwd_blocks",
+ "BriefDescription": "Cycle cost per LLC hit",
+ "MetricExpr": "MEM_BOUND_STALLS.LOAD_LLC_HIT / MEM_LOAD_UOPS_RETIRED.L3_HIT",
+ "MetricName": "tma_info_memory_cycles_per_demand_load_l3_hit",
"Unit": "cpu_atom"
},
{
- "BriefDescription": "Average Frequency Utilization relative nominal frequency",
- "MetricExpr": "tma_info_clks / CPU_CLK_UNHALTED.REF_TSC",
- "MetricGroup": " ",
- "MetricName": "tma_info_turbo_utilization",
+ "BriefDescription": "load ops retired per 1000 instruction",
+ "MetricExpr": "1e3 * cpu_atom@MEM_UOPS_RETIRED.ALL_LOADS@ / INST_RETIRED.ANY",
+ "MetricName": "tma_info_memory_memloadpki",
"Unit": "cpu_atom"
},
{
- "BriefDescription": "Uops Per Instruction",
- "MetricExpr": "UOPS_RETIRED.ALL / INST_RETIRED.ANY",
- "MetricGroup": " ",
- "MetricName": "tma_info_upi",
+ "BriefDescription": "Average CPU Utilization",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricName": "tma_info_system_cpu_utilization",
"Unit": "cpu_atom"
},
{
- "BriefDescription": "Percentage of all uops which are x87 uops",
- "MetricExpr": "100 * UOPS_RETIRED.X87 / UOPS_RETIRED.ALL",
- "MetricGroup": " ",
- "MetricName": "tma_info_x87_uop_ratio",
+ "BriefDescription": "Fraction of cycles spent in Kernel mode",
+ "MetricExpr": "cpu_atom@CPU_CLK_UNHALTED.CORE@k / CPU_CLK_UNHALTED.CORE",
+ "MetricGroup": "Summary",
+ "MetricName": "tma_info_system_kernel_utilization",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Average Frequency Utilization relative nominal frequency",
+ "MetricExpr": "tma_info_core_clks / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricGroup": "Power",
+ "MetricName": "tma_info_system_turbo_utilization",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to Instruction Table Lookaside Buffer (ITLB) misses.",
- "MetricExpr": "TOPDOWN_FE_BOUND.ITLB / tma_info_slots",
+ "MetricExpr": "TOPDOWN_FE_BOUND.ITLB / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_itlb_misses",
"MetricThreshold": "tma_itlb_misses > 0.05",
@@ -561,7 +539,7 @@
},
{
"BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a load block.",
- "MetricExpr": "LD_HEAD.L1_BOUND_AT_RET / tma_info_clks",
+ "MetricExpr": "LD_HEAD.L1_BOUND_AT_RET / tma_info_core_clks",
"MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l1_bound",
"MetricThreshold": "tma_l1_bound > 0.1",
@@ -571,7 +549,7 @@
{
"BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the L2 Cache.",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "MEM_BOUND_STALLS.LOAD_L2_HIT / tma_info_clks - MEM_BOUND_STALLS_AT_RET_CORRECTION * MEM_BOUND_STALLS.LOAD_L2_HIT / MEM_BOUND_STALLS.LOAD",
+ "MetricExpr": "cpu_atom@MEM_BOUND_STALLS.LOAD_L2_HIT@ / tma_info_core_clks - max((cpu_atom@MEM_BOUND_STALLS.LOAD@ - cpu_atom@LD_HEAD.L1_MISS_AT_RET@) / tma_info_core_clks, 0) * cpu_atom@MEM_BOUND_STALLS.LOAD_L2_HIT@ / cpu_atom@MEM_BOUND_STALLS.LOAD@",
"MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l2_bound",
"MetricThreshold": "tma_l2_bound > 0.1",
@@ -580,7 +558,7 @@
},
{
"BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the Last Level Cache (LLC) or other core with HITE/F/M.",
- "MetricExpr": "MEM_BOUND_STALLS.LOAD_LLC_HIT / tma_info_clks - MEM_BOUND_STALLS_AT_RET_CORRECTION * MEM_BOUND_STALLS.LOAD_LLC_HIT / MEM_BOUND_STALLS.LOAD",
+ "MetricExpr": "cpu_atom@MEM_BOUND_STALLS.LOAD_LLC_HIT@ / tma_info_core_clks - max((cpu_atom@MEM_BOUND_STALLS.LOAD@ - cpu_atom@LD_HEAD.L1_MISS_AT_RET@) / tma_info_core_clks, 0) * cpu_atom@MEM_BOUND_STALLS.LOAD_LLC_HIT@ / cpu_atom@MEM_BOUND_STALLS.LOAD@",
"MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l3_bound",
"MetricThreshold": "tma_l3_bound > 0.1",
@@ -589,7 +567,7 @@
},
{
"BriefDescription": "Counts the number of cycles, relative to the number of mem_scheduler slots, in which uops are blocked due to load buffer full",
- "MetricExpr": "tma_mem_scheduler * MEM_SCHEDULER_BLOCK.LD_BUF / MEM_SCHEDULER_BLOCK.ALL",
+ "MetricExpr": "tma_mem_scheduler * cpu_atom@MEM_SCHEDULER_BLOCK.LD_BUF@ / MEM_SCHEDULER_BLOCK.ALL",
"MetricGroup": "TopdownL4;tma_L4_group;tma_mem_scheduler_group",
"MetricName": "tma_ld_buffer",
"MetricThreshold": "tma_ld_buffer > 0.05",
@@ -598,7 +576,7 @@
},
{
"BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a machine clear (nuke) of any kind including memory ordering and memory disambiguation.",
- "MetricExpr": "TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS / tma_info_slots",
+ "MetricExpr": "TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS / tma_info_core_slots",
"MetricGroup": "TopdownL2;tma_L2_group;tma_bad_speculation_group",
"MetricName": "tma_machine_clears",
"MetricThreshold": "tma_machine_clears > 0.05",
@@ -608,7 +586,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to memory reservation stalls in which a scheduler is not able to accept uops.",
- "MetricExpr": "TOPDOWN_BE_BOUND.MEM_SCHEDULER / tma_info_slots",
+ "MetricExpr": "TOPDOWN_BE_BOUND.MEM_SCHEDULER / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group",
"MetricName": "tma_mem_scheduler",
"MetricThreshold": "tma_mem_scheduler > 0.1",
@@ -617,7 +595,7 @@
},
{
"BriefDescription": "Counts the number of cycles the core is stalled due to stores or loads.",
- "MetricExpr": "min(tma_backend_bound, LD_HEAD.ANY_AT_RET / tma_info_clks + tma_store_bound)",
+ "MetricExpr": "min(cpu_atom@TOPDOWN_BE_BOUND.ALL@ / tma_info_core_slots, cpu_atom@LD_HEAD.ANY_AT_RET@ / tma_info_core_clks + tma_store_bound)",
"MetricGroup": "TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_memory_bound",
"MetricThreshold": "tma_memory_bound > 0.2",
@@ -627,7 +605,7 @@
},
{
"BriefDescription": "Counts the number of machine clears relative to the number of nuke slots due to memory ordering.",
- "MetricExpr": "tma_nuke * (MACHINE_CLEARS.MEMORY_ORDERING / MACHINE_CLEARS.SLOW)",
+ "MetricExpr": "tma_nuke * (cpu_atom@MACHINE_CLEARS.MEMORY_ORDERING@ / cpu_atom@MACHINE_CLEARS.SLOW@)",
"MetricGroup": "TopdownL4;tma_L4_group;tma_nuke_group",
"MetricName": "tma_memory_ordering",
"MetricThreshold": "tma_memory_ordering > 0.02",
@@ -636,7 +614,7 @@
},
{
"BriefDescription": "Counts the number of uops that are from the complex flows issued by the micro-sequencer (MS)",
- "MetricExpr": "tma_microcode_sequencer",
+ "MetricExpr": "UOPS_RETIRED.MS / tma_info_core_slots",
"MetricGroup": "TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_ms_uops",
"MetricThreshold": "tma_ms_uops > 0.05",
@@ -647,7 +625,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to IEC or FPC RAT stalls, which can be due to FIQ or IEC reservation stalls in which the integer, floating point or SIMD scheduler is not able to accept uops.",
- "MetricExpr": "TOPDOWN_BE_BOUND.NON_MEM_SCHEDULER / tma_info_slots",
+ "MetricExpr": "TOPDOWN_BE_BOUND.NON_MEM_SCHEDULER / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group",
"MetricName": "tma_non_mem_scheduler",
"MetricThreshold": "tma_non_mem_scheduler > 0.1",
@@ -656,7 +634,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to a machine clear (slow nuke).",
- "MetricExpr": "TOPDOWN_BAD_SPECULATION.NUKE / tma_info_slots",
+ "MetricExpr": "TOPDOWN_BAD_SPECULATION.NUKE / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_machine_clears_group",
"MetricName": "tma_nuke",
"MetricThreshold": "tma_nuke > 0.05",
@@ -665,7 +643,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to other common frontend stalls not categorized.",
- "MetricExpr": "TOPDOWN_FE_BOUND.OTHER / tma_info_slots",
+ "MetricExpr": "TOPDOWN_FE_BOUND.OTHER / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_other_fb",
"MetricThreshold": "tma_other_fb > 0.05",
@@ -674,7 +652,7 @@
},
{
"BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a number of other load blocks.",
- "MetricExpr": "LD_HEAD.OTHER_AT_RET / tma_info_clks",
+ "MetricExpr": "LD_HEAD.OTHER_AT_RET / tma_info_core_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_other_l1",
"MetricThreshold": "tma_other_l1 > 0.05",
@@ -692,7 +670,7 @@
},
{
"BriefDescription": "Counts the number of uops retired excluding ms and fp div uops.",
- "MetricExpr": "(TOPDOWN_RETIRING.ALL - UOPS_RETIRED.MS - UOPS_RETIRED.FPDIV) / tma_info_slots",
+ "MetricExpr": "(cpu_atom@TOPDOWN_RETIRING.ALL@ - cpu_atom@UOPS_RETIRED.MS@ - cpu_atom@UOPS_RETIRED.FPDIV@) / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_base_group",
"MetricName": "tma_other_ret",
"MetricThreshold": "tma_other_ret > 0.3",
@@ -701,7 +679,7 @@
},
{
"BriefDescription": "Counts the number of machine clears relative to the number of nuke slots due to page faults.",
- "MetricExpr": "tma_nuke * (MACHINE_CLEARS.PAGE_FAULT / MACHINE_CLEARS.SLOW)",
+ "MetricExpr": "tma_nuke * (cpu_atom@MACHINE_CLEARS.PAGE_FAULT@ / cpu_atom@MACHINE_CLEARS.SLOW@)",
"MetricGroup": "TopdownL4;tma_L4_group;tma_nuke_group",
"MetricName": "tma_page_fault",
"MetricThreshold": "tma_page_fault > 0.02",
@@ -710,7 +688,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to wrong predecodes.",
- "MetricExpr": "TOPDOWN_FE_BOUND.PREDECODE / tma_info_slots",
+ "MetricExpr": "TOPDOWN_FE_BOUND.PREDECODE / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_predecode",
"MetricThreshold": "tma_predecode > 0.05",
@@ -719,7 +697,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to the physical register file unable to accept an entry (marble stalls).",
- "MetricExpr": "TOPDOWN_BE_BOUND.REGISTER / tma_info_slots",
+ "MetricExpr": "TOPDOWN_BE_BOUND.REGISTER / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group",
"MetricName": "tma_register",
"MetricThreshold": "tma_register > 0.1",
@@ -728,7 +706,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to the reorder buffer being full (ROB stalls).",
- "MetricExpr": "TOPDOWN_BE_BOUND.REORDER_BUFFER / tma_info_slots",
+ "MetricExpr": "TOPDOWN_BE_BOUND.REORDER_BUFFER / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group",
"MetricName": "tma_reorder_buffer",
"MetricThreshold": "tma_reorder_buffer > 0.1",
@@ -748,17 +726,18 @@
},
{
"BriefDescription": "Counts the numer of issue slots that result in retirement slots.",
- "MetricExpr": "TOPDOWN_RETIRING.ALL / tma_info_slots",
- "MetricGroup": "TopdownL1;tma_L1_group",
+ "DefaultMetricgroupName": "TopdownL1",
+ "MetricExpr": "TOPDOWN_RETIRING.ALL / tma_info_core_slots",
+ "MetricGroup": "Default;TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.75",
- "MetricgroupNoGroup": "TopdownL1",
+ "MetricgroupNoGroup": "TopdownL1;Default",
"ScaleUnit": "100%",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Counts the number of cycles, relative to the number of mem_scheduler slots, in which uops are blocked due to RSV full relative",
- "MetricExpr": "tma_mem_scheduler * MEM_SCHEDULER_BLOCK.RSV / MEM_SCHEDULER_BLOCK.ALL",
+ "MetricExpr": "tma_mem_scheduler * cpu_atom@MEM_SCHEDULER_BLOCK.RSV@ / MEM_SCHEDULER_BLOCK.ALL",
"MetricGroup": "TopdownL4;tma_L4_group;tma_mem_scheduler_group",
"MetricName": "tma_rsv",
"MetricThreshold": "tma_rsv > 0.05",
@@ -767,7 +746,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to scoreboards from the instruction queue (IQ), jump execution unit (JEU), or microcode sequencer (MS).",
- "MetricExpr": "TOPDOWN_BE_BOUND.SERIALIZATION / tma_info_slots",
+ "MetricExpr": "TOPDOWN_BE_BOUND.SERIALIZATION / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group",
"MetricName": "tma_serialization",
"MetricThreshold": "tma_serialization > 0.1",
@@ -776,7 +755,7 @@
},
{
"BriefDescription": "Counts the number of machine clears relative to the number of nuke slots due to SMC.",
- "MetricExpr": "tma_nuke * (MACHINE_CLEARS.SMC / MACHINE_CLEARS.SLOW)",
+ "MetricExpr": "tma_nuke * (cpu_atom@MACHINE_CLEARS.SMC@ / cpu_atom@MACHINE_CLEARS.SLOW@)",
"MetricGroup": "TopdownL4;tma_L4_group;tma_nuke_group",
"MetricName": "tma_smc",
"MetricThreshold": "tma_smc > 0.02",
@@ -794,7 +773,7 @@
},
{
"BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a first level TLB miss.",
- "MetricExpr": "LD_HEAD.DTLB_MISS_AT_RET / tma_info_clks",
+ "MetricExpr": "LD_HEAD.DTLB_MISS_AT_RET / tma_info_core_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_stlb_hit",
"MetricThreshold": "tma_stlb_hit > 0.05",
@@ -803,7 +782,7 @@
},
{
"BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a second level TLB miss requiring a page walk.",
- "MetricExpr": "LD_HEAD.PGWALK_AT_RET / tma_info_clks",
+ "MetricExpr": "LD_HEAD.PGWALK_AT_RET / tma_info_core_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_stlb_miss",
"MetricThreshold": "tma_stlb_miss > 0.05",
@@ -812,7 +791,7 @@
},
{
"BriefDescription": "Counts the number of cycles the core is stalled due to store buffer full.",
- "MetricExpr": "tma_mem_scheduler * (MEM_SCHEDULER_BLOCK.ST_BUF / MEM_SCHEDULER_BLOCK.ALL)",
+ "MetricExpr": "tma_mem_scheduler * (cpu_atom@MEM_SCHEDULER_BLOCK.ST_BUF@ / cpu_atom@MEM_SCHEDULER_BLOCK.ALL@)",
"MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_store_bound",
"MetricThreshold": "tma_store_bound > 0.1",
@@ -821,7 +800,7 @@
},
{
"BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a store forward block.",
- "MetricExpr": "LD_HEAD.ST_ADDR_AT_RET / tma_info_clks",
+ "MetricExpr": "LD_HEAD.ST_ADDR_AT_RET / tma_info_core_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_store_fwd_blk",
"MetricThreshold": "tma_store_fwd_blk > 0.05",
@@ -830,7 +809,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution ports for ALU operations.",
- "MetricExpr": "(UOPS_DISPATCHED.PORT_0 + UOPS_DISPATCHED.PORT_1 + UOPS_DISPATCHED.PORT_5_11 + UOPS_DISPATCHED.PORT_6) / (5 * tma_info_core_clks)",
+ "MetricExpr": "(cpu_core@UOPS_DISPATCHED.PORT_0@ + cpu_core@UOPS_DISPATCHED.PORT_1@ + cpu_core@UOPS_DISPATCHED.PORT_5_11@ + cpu_core@UOPS_DISPATCHED.PORT_6@) / (5 * tma_info_core_core_clks)",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_alu_op_utilization",
"MetricThreshold": "tma_alu_op_utilization > 0.6",
@@ -839,7 +818,7 @@
},
{
"BriefDescription": "This metric estimates fraction of slots the CPU retired uops delivered by the Microcode_Sequencer as a result of Assists",
- "MetricExpr": "100 * cpu_core@ASSISTS.ANY\\,umask\\=0x1B@ / tma_info_slots",
+ "MetricExpr": "100 * cpu_core@ASSISTS.ANY\\,umask\\=0x1B@ / tma_info_thread_slots",
"MetricGroup": "TopdownL4;tma_L4_group;tma_microcode_sequencer_group",
"MetricName": "tma_assists",
"MetricThreshold": "tma_assists > 0.1 & (tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1)",
@@ -849,7 +828,7 @@
},
{
"BriefDescription": "This metric estimates fraction of slots the CPU retired uops as a result of handing SSE to AVX* or AVX* to SSE transition Assists.",
- "MetricExpr": "63 * ASSISTS.SSE_AVX_MIX / tma_info_slots",
+ "MetricExpr": "63 * cpu_core@ASSISTS.SSE_AVX_MIX@ / tma_info_thread_slots",
"MetricGroup": "HPC;TopdownL5;tma_L5_group;tma_assists_group",
"MetricName": "tma_avx_assists",
"MetricThreshold": "tma_avx_assists > 0.1",
@@ -858,40 +837,42 @@
},
{
"BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
- "MetricExpr": "topdown\\-be\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0 * tma_info_slots",
- "MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
+ "DefaultMetricgroupName": "TopdownL1",
+ "MetricExpr": "cpu_core@topdown\\-be\\-bound@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_thread_slots",
+ "MetricGroup": "Default;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
"MetricThreshold": "tma_backend_bound > 0.2",
- "MetricgroupNoGroup": "TopdownL1",
+ "MetricgroupNoGroup": "TopdownL1;Default",
"PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. Sample with: TOPDOWN.BACKEND_BOUND_SLOTS",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
{
"BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations",
+ "DefaultMetricgroupName": "TopdownL1",
"MetricExpr": "max(1 - (tma_frontend_bound + tma_backend_bound + tma_retiring), 0)",
- "MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
+ "MetricGroup": "Default;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_bad_speculation",
"MetricThreshold": "tma_bad_speculation > 0.15",
- "MetricgroupNoGroup": "TopdownL1",
+ "MetricgroupNoGroup": "TopdownL1;Default",
"PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
{
"BriefDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction",
- "MetricExpr": "topdown\\-br\\-mispredict / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0 * tma_info_slots",
+ "MetricExpr": "cpu_core@topdown\\-br\\-mispredict@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_thread_slots",
"MetricGroup": "BadSpec;BrMispredicts;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueBM",
"MetricName": "tma_branch_mispredicts",
"MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15",
"MetricgroupNoGroup": "TopdownL2",
- "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: TOPDOWN.BR_MISPREDICT_SLOTS. Related metrics: tma_info_branch_misprediction_cost, tma_info_mispredictions, tma_mispredicts_resteers",
+ "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: TOPDOWN.BR_MISPREDICT_SLOTS. Related metrics: tma_info_bad_spec_branch_misprediction_cost, tma_info_bottleneck_mispredictions, tma_mispredicts_resteers",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers",
- "MetricExpr": "INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_clks + tma_unknown_branches",
+ "MetricExpr": "INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_thread_clks + tma_unknown_branches",
"MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_branch_resteers",
"MetricThreshold": "tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -911,7 +892,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers as a result of Machine Clears",
- "MetricExpr": "(1 - tma_branch_mispredicts / tma_bad_speculation) * INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_clks",
+ "MetricExpr": "(1 - tma_branch_mispredicts / tma_bad_speculation) * cpu_core@INT_MISC.CLEAR_RESTEER_CYCLES@ / tma_info_thread_clks",
"MetricGroup": "BadSpec;MachineClears;TopdownL4;tma_L4_group;tma_branch_resteers_group;tma_issueMC",
"MetricName": "tma_clears_resteers",
"MetricThreshold": "tma_clears_resteers > 0.05 & (tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15))",
@@ -922,7 +903,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to contested accesses",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(25 * tma_info_average_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD * (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM / (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM + OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) + 24 * tma_info_average_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_clks",
+ "MetricExpr": "(25 * tma_info_system_average_frequency * (cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD@ * (cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM@ / (cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM@ + cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD@))) + 24 * tma_info_system_average_frequency * cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS@) * (1 + cpu_core@MEM_LOAD_RETIRED.FB_HIT@ / cpu_core@MEM_LOAD_RETIRED.L1_MISS@ / 2) / tma_info_thread_clks",
"MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
"MetricName": "tma_contested_accesses",
"MetricThreshold": "tma_contested_accesses > 0.05 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -944,7 +925,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to data-sharing accesses",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "24 * tma_info_average_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD + MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD * (1 - OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM / (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM + OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_clks",
+ "MetricExpr": "24 * tma_info_system_average_frequency * (cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD@ + cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD@ * (1 - cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM@ / (cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM@ + cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD@))) * (1 + cpu_core@MEM_LOAD_RETIRED.FB_HIT@ / cpu_core@MEM_LOAD_RETIRED.L1_MISS@ / 2) / tma_info_thread_clks",
"MetricGroup": "Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
"MetricName": "tma_data_sharing",
"MetricThreshold": "tma_data_sharing > 0.05 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -954,17 +935,17 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where decoder-0 was the only active decoder",
- "MetricExpr": "(cpu_core@INST_DECODED.DECODERS\\,cmask\\=1@ - cpu_core@INST_DECODED.DECODERS\\,cmask\\=2@) / tma_info_core_clks / 2",
+ "MetricExpr": "(cpu_core@INST_DECODED.DECODERS\\,cmask\\=1@ - cpu_core@INST_DECODED.DECODERS\\,cmask\\=2@) / tma_info_core_core_clks / 2",
"MetricGroup": "DSBmiss;FetchBW;TopdownL4;tma_L4_group;tma_issueD0;tma_mite_group",
"MetricName": "tma_decoder0_alone",
- "MetricThreshold": "tma_decoder0_alone > 0.1 & (tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 6 > 0.35))",
+ "MetricThreshold": "tma_decoder0_alone > 0.1 & (tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 6 > 0.35))",
"PublicDescription": "This metric represents fraction of cycles where decoder-0 was the only active decoder. Related metrics: tma_few_uops_instructions",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
{
"BriefDescription": "This metric represents fraction of cycles where the Divider unit was active",
- "MetricExpr": "ARITH.DIV_ACTIVE / tma_info_clks",
+ "MetricExpr": "ARITH.DIV_ACTIVE / tma_info_thread_clks",
"MetricGroup": "TopdownL3;tma_L3_group;tma_core_bound_group",
"MetricName": "tma_divider",
"MetricThreshold": "tma_divider > 0.2 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
@@ -975,7 +956,7 @@
{
"BriefDescription": "This metric estimates how often the CPU was stalled on accesses to external memory (DRAM) by loads",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "MEMORY_ACTIVITY.STALLS_L3_MISS / tma_info_clks",
+ "MetricExpr": "cpu_core@MEMORY_ACTIVITY.STALLS_L3_MISS@ / tma_info_thread_clks",
"MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_dram_bound",
"MetricThreshold": "tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -985,47 +966,47 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline",
- "MetricExpr": "(IDQ.DSB_CYCLES_ANY - IDQ.DSB_CYCLES_OK) / tma_info_core_clks / 2",
+ "MetricExpr": "(cpu_core@IDQ.DSB_CYCLES_ANY@ - cpu_core@IDQ.DSB_CYCLES_OK@) / tma_info_core_core_clks / 2",
"MetricGroup": "DSB;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_dsb",
- "MetricThreshold": "tma_dsb > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 6 > 0.35)",
+ "MetricThreshold": "tma_dsb > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 6 > 0.35)",
"PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline. For example; inefficient utilization of the DSB cache structure or bank conflict when reading from it; are categorized here.",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines",
- "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / tma_info_clks",
+ "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / tma_info_thread_clks",
"MetricGroup": "DSBmiss;FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB",
"MetricName": "tma_dsb_switches",
"MetricThreshold": "tma_dsb_switches > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
- "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines. The DSB (decoded i-cache) is a Uop Cache where the front-end directly delivers Uops (micro operations) avoiding heavy x86 decoding. The DSB pipeline has shorter latency and delivered higher bandwidth than the MITE (legacy instruction decode pipeline). Switching between the two pipelines can cause penalties hence this metric measures the exposed penalty. Sample with: FRONTEND_RETIRED.DSB_MISS_PS. Related metrics: tma_fetch_bandwidth, tma_info_dsb_coverage, tma_info_dsb_misses, tma_info_iptb, tma_lcp",
+ "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines. The DSB (decoded i-cache) is a Uop Cache where the front-end directly delivers Uops (micro operations) avoiding heavy x86 decoding. The DSB pipeline has shorter latency and delivered higher bandwidth than the MITE (legacy instruction decode pipeline). Switching between the two pipelines can cause penalties hence this metric measures the exposed penalty. Sample with: FRONTEND_RETIRED.DSB_MISS_PS. Related metrics: tma_fetch_bandwidth, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
{
"BriefDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses",
- "MetricExpr": "min(7 * cpu_core@DTLB_LOAD_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_LOAD_MISSES.WALK_ACTIVE, max(CYCLE_ACTIVITY.CYCLES_MEM_ANY - MEMORY_ACTIVITY.CYCLES_L1D_MISS, 0)) / tma_info_clks",
+ "MetricExpr": "min(7 * cpu_core@DTLB_LOAD_MISSES.STLB_HIT\\,cmask\\=1@ + cpu_core@DTLB_LOAD_MISSES.WALK_ACTIVE@, max(cpu_core@CYCLE_ACTIVITY.CYCLES_MEM_ANY@ - cpu_core@MEMORY_ACTIVITY.CYCLES_L1D_MISS@, 0)) / tma_info_thread_clks",
"MetricGroup": "MemoryTLB;TopdownL4;tma_L4_group;tma_issueTLB;tma_l1_bound_group",
"MetricName": "tma_dtlb_load",
"MetricThreshold": "tma_dtlb_load > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses. TLBs (Translation Look-aside Buffers) are processor caches for recently used entries out of the Page Tables that are used to map virtual- to physical-addresses by the operating system. This metric approximates the potential delay of demand loads missing the first-level data TLB (assuming worst case scenario with back to back misses to different pages). This includes hitting in the second-level TLB (STLB) as well as performing a hardware page walk on an STLB miss. Sample with: MEM_INST_RETIRED.STLB_MISS_LOADS_PS. Related metrics: tma_dtlb_store, tma_info_memory_data_tlbs",
+ "PublicDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses. TLBs (Translation Look-aside Buffers) are processor caches for recently used entries out of the Page Tables that are used to map virtual- to physical-addresses by the operating system. This metric approximates the potential delay of demand loads missing the first-level data TLB (assuming worst case scenario with back to back misses to different pages). This includes hitting in the second-level TLB (STLB) as well as performing a hardware page walk on an STLB miss. Sample with: MEM_INST_RETIRED.STLB_MISS_LOADS_PS. Related metrics: tma_dtlb_store, tma_info_bottleneck_memory_data_tlbs",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
{
"BriefDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses",
- "MetricExpr": "(7 * cpu_core@DTLB_STORE_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_STORE_MISSES.WALK_ACTIVE) / tma_info_core_clks",
+ "MetricExpr": "(7 * cpu_core@DTLB_STORE_MISSES.STLB_HIT\\,cmask\\=1@ + cpu_core@DTLB_STORE_MISSES.WALK_ACTIVE@) / tma_info_core_core_clks",
"MetricGroup": "MemoryTLB;TopdownL4;tma_L4_group;tma_issueTLB;tma_store_bound_group",
"MetricName": "tma_dtlb_store",
"MetricThreshold": "tma_dtlb_store > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses. As with ordinary data caching; focus on improving data locality and reducing working-set size to reduce DTLB overhead. Additionally; consider using profile-guided optimization (PGO) to collocate frequently-used data on the same page. Try using larger page sizes for large amounts of frequently-used data. Sample with: MEM_INST_RETIRED.STLB_MISS_STORES_PS. Related metrics: tma_dtlb_load, tma_info_memory_data_tlbs",
+ "PublicDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses. As with ordinary data caching; focus on improving data locality and reducing working-set size to reduce DTLB overhead. Additionally; consider using profile-guided optimization (PGO) to collocate frequently-used data on the same page. Try using larger page sizes for large amounts of frequently-used data. Sample with: MEM_INST_RETIRED.STLB_MISS_STORES_PS. Related metrics: tma_dtlb_load, tma_info_bottleneck_memory_data_tlbs",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
{
"BriefDescription": "This metric roughly estimates how often CPU was handling synchronizations due to False Sharing",
- "MetricExpr": "28 * tma_info_average_frequency * OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM / tma_info_clks",
+ "MetricExpr": "28 * tma_info_system_average_frequency * cpu_core@OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM@ / tma_info_thread_clks",
"MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_store_bound_group",
"MetricName": "tma_false_sharing",
"MetricThreshold": "tma_false_sharing > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -1035,11 +1016,11 @@
},
{
"BriefDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed",
- "MetricExpr": "L1D_PEND_MISS.FB_FULL / tma_info_clks",
+ "MetricExpr": "L1D_PEND_MISS.FB_FULL / tma_info_thread_clks",
"MetricGroup": "MemoryBW;TopdownL4;tma_L4_group;tma_issueBW;tma_issueSL;tma_issueSmSt;tma_l1_bound_group",
"MetricName": "tma_fb_full",
"MetricThreshold": "tma_fb_full > 0.3",
- "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_info_dram_bw_use, tma_info_memory_bandwidth, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
+ "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_info_bottleneck_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
@@ -1048,15 +1029,15 @@
"MetricExpr": "max(0, tma_frontend_bound - tma_fetch_latency)",
"MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB",
"MetricName": "tma_fetch_bandwidth",
- "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 6 > 0.35",
+ "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 6 > 0.35",
"MetricgroupNoGroup": "TopdownL2",
- "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Sample with: FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_2_PS. Related metrics: tma_dsb_switches, tma_info_dsb_coverage, tma_info_dsb_misses, tma_info_iptb, tma_lcp",
+ "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Sample with: FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_2_PS. Related metrics: tma_dsb_switches, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
{
"BriefDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues",
- "MetricExpr": "topdown\\-fetch\\-lat / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) - INT_MISC.UOP_DROPPING / tma_info_slots",
+ "MetricExpr": "cpu_core@topdown\\-fetch\\-lat@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) - cpu_core@INT_MISC.UOP_DROPPING@ / tma_info_thread_slots",
"MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group",
"MetricName": "tma_fetch_latency",
"MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15",
@@ -1077,6 +1058,7 @@
},
{
"BriefDescription": "This metric represents overall arithmetic floating-point (FP) operations fraction the CPU has executed (retired)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "tma_x87_use + tma_fp_scalar + tma_fp_vector",
"MetricGroup": "HPC;TopdownL3;tma_L3_group;tma_light_operations_group",
"MetricName": "tma_fp_arith",
@@ -1087,7 +1069,7 @@
},
{
"BriefDescription": "This metric roughly estimates fraction of slots the CPU retired uops as a result of handing Floating Point (FP) Assists",
- "MetricExpr": "30 * ASSISTS.FP / tma_info_slots",
+ "MetricExpr": "30 * cpu_core@ASSISTS.FP@ / tma_info_thread_slots",
"MetricGroup": "HPC;TopdownL5;tma_L5_group;tma_assists_group",
"MetricName": "tma_fp_assists",
"MetricThreshold": "tma_fp_assists > 0.1",
@@ -1097,7 +1079,7 @@
},
{
"BriefDescription": "This metric approximates arithmetic floating-point (FP) scalar uops fraction the CPU has retired",
- "MetricExpr": "cpu_core@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ / (tma_retiring * tma_info_slots)",
+ "MetricExpr": "cpu_core@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Compute;Flops;TopdownL4;tma_L4_group;tma_fp_arith_group;tma_issue2P",
"MetricName": "tma_fp_scalar",
"MetricThreshold": "tma_fp_scalar > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6)",
@@ -1107,7 +1089,7 @@
},
{
"BriefDescription": "This metric approximates arithmetic floating-point (FP) vector uops fraction the CPU has retired aggregated across all vector widths",
- "MetricExpr": "cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0x3c@ / (tma_retiring * tma_info_slots)",
+ "MetricExpr": "cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0x3c@ / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Compute;Flops;TopdownL4;tma_L4_group;tma_fp_arith_group;tma_issue2P",
"MetricName": "tma_fp_vector",
"MetricThreshold": "tma_fp_vector > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6)",
@@ -1117,7 +1099,7 @@
},
{
"BriefDescription": "This metric approximates arithmetic FP vector uops fraction the CPU has retired for 128-bit wide vectors",
- "MetricExpr": "(FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE) / (tma_retiring * tma_info_slots)",
+ "MetricExpr": "(cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE@) / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Compute;Flops;TopdownL5;tma_L5_group;tma_fp_vector_group;tma_issue2P",
"MetricName": "tma_fp_vector_128b",
"MetricThreshold": "tma_fp_vector_128b > 0.1 & (tma_fp_vector > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6))",
@@ -1127,7 +1109,7 @@
},
{
"BriefDescription": "This metric approximates arithmetic FP vector uops fraction the CPU has retired for 256-bit wide vectors",
- "MetricExpr": "(FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE) / (tma_retiring * tma_info_slots)",
+ "MetricExpr": "(cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@) / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Compute;Flops;TopdownL5;tma_L5_group;tma_fp_vector_group;tma_issue2P",
"MetricName": "tma_fp_vector_256b",
"MetricThreshold": "tma_fp_vector_256b > 0.1 & (tma_fp_vector > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6))",
@@ -1137,18 +1119,19 @@
},
{
"BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend",
- "MetricExpr": "topdown\\-fe\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) - INT_MISC.UOP_DROPPING / tma_info_slots",
- "MetricGroup": "PGO;TmaL1;TopdownL1;tma_L1_group",
+ "DefaultMetricgroupName": "TopdownL1",
+ "MetricExpr": "cpu_core@topdown\\-fe\\-bound@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) - cpu_core@INT_MISC.UOP_DROPPING@ / tma_info_thread_slots",
+ "MetricGroup": "Default;PGO;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_frontend_bound",
"MetricThreshold": "tma_frontend_bound > 0.15",
- "MetricgroupNoGroup": "TopdownL1",
+ "MetricgroupNoGroup": "TopdownL1;Default",
"PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Pipeline_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. Sample with: FRONTEND_RETIRED.LATENCY_GE_4_PS",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
{
"BriefDescription": "This metric represents fraction of slots where the CPU was retiring fused instructions -- where one uop can represent multiple contiguous instructions",
- "MetricExpr": "tma_light_operations * INST_RETIRED.MACRO_FUSED / (tma_retiring * tma_info_slots)",
+ "MetricExpr": "tma_light_operations * cpu_core@INST_RETIRED.MACRO_FUSED@ / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
"MetricName": "tma_fused_instructions",
"MetricThreshold": "tma_fused_instructions > 0.1 & tma_light_operations > 0.6",
@@ -1158,7 +1141,7 @@
},
{
"BriefDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences",
- "MetricExpr": "topdown\\-heavy\\-ops / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0 * tma_info_slots",
+ "MetricExpr": "cpu_core@topdown\\-heavy\\-ops@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_thread_slots",
"MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_heavy_operations",
"MetricThreshold": "tma_heavy_operations > 0.1",
@@ -1169,7 +1152,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to instruction cache misses",
- "MetricExpr": "ICACHE_DATA.STALLS / tma_info_clks",
+ "MetricExpr": "ICACHE_DATA.STALLS / tma_info_thread_clks",
"MetricGroup": "BigFoot;FetchLat;IcMiss;TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_icache_misses",
"MetricThreshold": "tma_icache_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -1178,247 +1161,300 @@
"Unit": "cpu_core"
},
{
- "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
- "MetricExpr": "tma_info_turbo_utilization * TSC / 1e9 / duration_time",
- "MetricGroup": "Power;Summary",
- "MetricName": "tma_info_average_frequency",
- "Unit": "cpu_core"
- },
- {
- "BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)",
+ "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "100 * tma_fetch_latency * (tma_itlb_misses + tma_icache_misses + tma_unknown_branches) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)",
- "MetricGroup": "BigFoot;Fed;Frontend;IcMiss;MemoryTLB;tma_issueBC",
- "MetricName": "tma_info_big_code",
- "MetricThreshold": "tma_info_big_code > 20",
- "PublicDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses). Related metrics: tma_info_branching_overhead",
+ "MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_thread_slots / BR_MISP_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;BrMispredicts;tma_issueBM",
+ "MetricName": "tma_info_bad_spec_branch_misprediction_cost",
+ "PublicDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear). Related metrics: tma_branch_mispredicts, tma_info_bottleneck_mispredictions, tma_mispredicts_resteers",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Branch instructions per taken branch.",
- "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
- "MetricGroup": "Branches;Fed;PGO",
- "MetricName": "tma_info_bptkbranch",
+ "BriefDescription": "Instructions per retired mispredicts for conditional non-taken branches (lower number means higher occurrence rate).",
+ "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.COND_NTAKEN",
+ "MetricGroup": "Bad;BrMispredicts",
+ "MetricName": "tma_info_bad_spec_ipmisp_cond_ntaken",
+ "MetricThreshold": "tma_info_bad_spec_ipmisp_cond_ntaken < 200",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
- "MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_slots / BR_MISP_RETIRED.ALL_BRANCHES",
- "MetricGroup": "Bad;BrMispredicts;tma_issueBM",
- "MetricName": "tma_info_branch_misprediction_cost",
- "PublicDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear). Related metrics: tma_branch_mispredicts, tma_info_mispredictions, tma_mispredicts_resteers",
+ "BriefDescription": "Instructions per retired mispredicts for conditional taken branches (lower number means higher occurrence rate).",
+ "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.COND_TAKEN",
+ "MetricGroup": "Bad;BrMispredicts",
+ "MetricName": "tma_info_bad_spec_ipmisp_cond_taken",
+ "MetricThreshold": "tma_info_bad_spec_ipmisp_cond_taken < 200",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls)",
- "MetricExpr": "100 * ((BR_INST_RETIRED.COND + 3 * BR_INST_RETIRED.NEAR_CALL + (BR_INST_RETIRED.NEAR_TAKEN - BR_INST_RETIRED.COND_TAKEN - 2 * BR_INST_RETIRED.NEAR_CALL)) / tma_info_slots)",
- "MetricGroup": "Ret;tma_issueBC",
- "MetricName": "tma_info_branching_overhead",
- "MetricThreshold": "tma_info_branching_overhead > 10",
- "PublicDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls). Related metrics: tma_info_big_code",
+ "BriefDescription": "Instructions per retired mispredicts for indirect CALL or JMP branches (lower number means higher occurrence rate).",
+ "MetricExpr": "cpu_core@BR_MISP_RETIRED.INDIRECT_CALL\\,umask\\=0x80@ / BR_MISP_RETIRED.INDIRECT",
+ "MetricGroup": "Bad;BrMispredicts",
+ "MetricName": "tma_info_bad_spec_ipmisp_indirect",
+ "MetricThreshold": "tma_info_bad_spec_ipmisp_indirect < 1e3",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Fraction of branches that are CALL or RET",
- "MetricExpr": "(BR_INST_RETIRED.NEAR_CALL + BR_INST_RETIRED.NEAR_RETURN) / BR_INST_RETIRED.ALL_BRANCHES",
- "MetricGroup": "Bad;Branches",
- "MetricName": "tma_info_callret",
+ "BriefDescription": "Instructions per retired mispredicts for return branches (lower number means higher occurrence rate).",
+ "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.RET",
+ "MetricGroup": "Bad;BrMispredicts",
+ "MetricName": "tma_info_bad_spec_ipmisp_ret",
+ "MetricThreshold": "tma_info_bad_spec_ipmisp_ret < 500",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
- "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
- "MetricGroup": "Pipeline",
- "MetricName": "tma_info_clks",
+ "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear) (lower number means higher occurrence rate)",
+ "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;BadSpec;BrMispredicts",
+ "MetricName": "tma_info_bad_spec_ipmispredict",
+ "MetricThreshold": "tma_info_bad_spec_ipmispredict < 200",
"Unit": "cpu_core"
},
{
- "BriefDescription": "STLB (2nd level TLB) code speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
- "MetricExpr": "1e3 * ITLB_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
- "MetricGroup": "Fed;MemoryTLB",
- "MetricName": "tma_info_code_stlb_mpki",
+ "BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "(100 * (1 - tma_core_bound / tma_ports_utilization if tma_core_bound < tma_ports_utilization else 1) if tma_info_system_smt_2t_utilization > 0.5 else 0)",
+ "MetricGroup": "Cor;SMT",
+ "MetricName": "tma_info_botlnk_l0_core_bound_likely",
+ "MetricThreshold": "tma_info_botlnk_l0_core_bound_likely > 0.5",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Fraction of branches that are non-taken conditionals",
- "MetricExpr": "BR_INST_RETIRED.COND_NTAKEN / BR_INST_RETIRED.ALL_BRANCHES",
- "MetricGroup": "Bad;Branches;CodeGen;PGO",
- "MetricName": "tma_info_cond_nt",
+ "BriefDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_fetch_latency * tma_dsb_switches / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_mite / (tma_dsb + tma_lsd + tma_mite))",
+ "MetricGroup": "DSBmiss;Fed;tma_issueFB",
+ "MetricName": "tma_info_botlnk_l2_dsb_misses",
+ "MetricThreshold": "tma_info_botlnk_l2_dsb_misses > 10",
+ "PublicDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Fraction of branches that are taken conditionals",
- "MetricExpr": "BR_INST_RETIRED.COND_TAKEN / BR_INST_RETIRED.ALL_BRANCHES",
- "MetricGroup": "Bad;Branches;CodeGen;PGO",
- "MetricName": "tma_info_cond_tk",
+ "BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck",
+ "MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
+ "MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL",
+ "MetricName": "tma_info_botlnk_l2_ic_misses",
+ "MetricThreshold": "tma_info_botlnk_l2_ic_misses > 5",
+ "PublicDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck. Related metrics: ",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts",
- "MetricExpr": "(100 * (1 - tma_core_bound / tma_ports_utilization if tma_core_bound < tma_ports_utilization else 1) if tma_info_smt_2t_utilization > 0.5 else 0)",
- "MetricGroup": "Cor;SMT",
- "MetricName": "tma_info_core_bound_likely",
- "MetricThreshold": "tma_info_core_bound_likely > 0.5",
+ "BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * tma_fetch_latency * (tma_itlb_misses + tma_icache_misses + tma_unknown_branches) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)",
+ "MetricGroup": "BigFoot;Fed;Frontend;IcMiss;MemoryTLB;tma_issueBC",
+ "MetricName": "tma_info_bottleneck_big_code",
+ "MetricThreshold": "tma_info_bottleneck_big_code > 20",
+ "PublicDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses). Related metrics: tma_info_bottleneck_branching_overhead",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Core actual clocks when any Logical Processor is active on the Physical Core",
- "MetricExpr": "CPU_CLK_UNHALTED.DISTRIBUTED",
- "MetricGroup": "SMT",
- "MetricName": "tma_info_core_clks",
+ "BriefDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls)",
+ "MetricExpr": "100 * ((cpu_core@BR_INST_RETIRED.COND@ + 3 * cpu_core@BR_INST_RETIRED.NEAR_CALL@ + (cpu_core@BR_INST_RETIRED.NEAR_TAKEN@ - cpu_core@BR_INST_RETIRED.COND_TAKEN@ - 2 * cpu_core@BR_INST_RETIRED.NEAR_CALL@)) / tma_info_thread_slots)",
+ "MetricGroup": "Ret;tma_issueBC",
+ "MetricName": "tma_info_bottleneck_branching_overhead",
+ "MetricThreshold": "tma_info_bottleneck_branching_overhead > 10",
+ "PublicDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls). Related metrics: tma_info_bottleneck_big_code",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Instructions Per Cycle across hyper-threads (per physical core)",
- "MetricExpr": "INST_RETIRED.ANY / tma_info_core_clks",
- "MetricGroup": "Ret;SMT;TmaL1;tma_L1_group",
- "MetricName": "tma_info_coreipc",
+ "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_frontend_bound - tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_info_bottleneck_big_code",
+ "MetricGroup": "Fed;FetchBW;Frontend",
+ "MetricName": "tma_info_bottleneck_instruction_fetch_bw",
+ "MetricThreshold": "tma_info_bottleneck_instruction_fetch_bw > 20",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Cycles Per Instruction (per Logical Processor)",
- "MetricExpr": "1 / tma_info_ipc",
- "MetricGroup": "Mem;Pipeline",
- "MetricName": "tma_info_cpi",
+ "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
+ "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_fb_full / (tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))",
+ "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
+ "MetricName": "tma_info_bottleneck_memory_bandwidth",
+ "MetricThreshold": "tma_info_bottleneck_memory_bandwidth > 20",
+ "PublicDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Average CPU Utilization",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
- "MetricGroup": "HPC;Summary",
- "MetricName": "tma_info_cpu_utilization",
+ "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_load / max(tma_l1_bound, tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
+ "MetricGroup": "Mem;MemoryTLB;Offcore;tma_issueTLB",
+ "MetricName": "tma_info_bottleneck_memory_data_tlbs",
+ "MetricThreshold": "tma_info_bottleneck_memory_data_tlbs > 20",
+ "PublicDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs). Related metrics: tma_dtlb_load, tma_dtlb_store",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Average Parallel L2 cache miss data reads",
- "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
- "MetricGroup": "Memory_BW;Offcore",
- "MetricName": "tma_info_data_l2_mlp",
+ "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound))",
+ "MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat",
+ "MetricName": "tma_info_bottleneck_memory_latency",
+ "MetricThreshold": "tma_info_bottleneck_memory_latency > 20",
+ "PublicDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches). Related metrics: tma_l3_hit_latency, tma_mem_latency",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
- "MetricExpr": "64 * (UNC_ARB_TRK_REQUESTS.ALL + UNC_ARB_COH_TRK_REQUESTS.ALL) / 1e6 / duration_time / 1e3",
- "MetricGroup": "HPC;Mem;MemoryBW;SoC;tma_issueBW",
- "MetricName": "tma_info_dram_bw_use",
- "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_fb_full, tma_info_memory_bandwidth, tma_mem_bandwidth, tma_sq_full",
+ "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
+ "MetricGroup": "Bad;BadSpec;BrMispredicts;tma_issueBM",
+ "MetricName": "tma_info_bottleneck_mispredictions",
+ "MetricThreshold": "tma_info_bottleneck_mispredictions > 20",
+ "PublicDescription": "Total pipeline cost of Branch Misprediction related bottlenecks. Related metrics: tma_branch_mispredicts, tma_info_bad_spec_branch_misprediction_cost, tma_mispredicts_resteers",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
- "MetricExpr": "IDQ.DSB_UOPS / UOPS_ISSUED.ANY",
- "MetricGroup": "DSB;Fed;FetchBW;tma_issueFB",
- "MetricName": "tma_info_dsb_coverage",
- "MetricThreshold": "tma_info_dsb_coverage < 0.7 & tma_info_ipc / 6 > 0.35",
- "PublicDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache). Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_dsb_misses, tma_info_iptb, tma_lcp",
+ "BriefDescription": "Fraction of branches that are CALL or RET",
+ "MetricExpr": "(cpu_core@BR_INST_RETIRED.NEAR_CALL@ + cpu_core@BR_INST_RETIRED.NEAR_RETURN@) / BR_INST_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;Branches",
+ "MetricName": "tma_info_branches_callret",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck",
- "MetricExpr": "100 * (tma_fetch_latency * tma_dsb_switches / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_mite / (tma_dsb + tma_lsd + tma_mite))",
- "MetricGroup": "DSBmiss;Fed;tma_issueFB",
- "MetricName": "tma_info_dsb_misses",
- "MetricThreshold": "tma_info_dsb_misses > 10",
- "PublicDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_dsb_coverage, tma_info_iptb, tma_lcp",
+ "BriefDescription": "Fraction of branches that are non-taken conditionals",
+ "MetricExpr": "BR_INST_RETIRED.COND_NTAKEN / BR_INST_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;Branches;CodeGen;PGO",
+ "MetricName": "tma_info_branches_cond_nt",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Average number of cycles of a switch from the DSB fetch-unit to MITE fetch unit - see DSB_Switches tree node for details.",
- "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / cpu_core@DSB2MITE_SWITCHES.PENALTY_CYCLES\\,cmask\\=1\\,edge@",
- "MetricGroup": "DSBmiss",
- "MetricName": "tma_info_dsb_switch_cost",
+ "BriefDescription": "Fraction of branches that are taken conditionals",
+ "MetricExpr": "BR_INST_RETIRED.COND_TAKEN / BR_INST_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;Branches;CodeGen;PGO",
+ "MetricName": "tma_info_branches_cond_tk",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-thread",
- "MetricExpr": "UOPS_EXECUTED.THREAD / cpu_core@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
- "MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
- "MetricName": "tma_info_execute",
+ "BriefDescription": "Fraction of branches that are unconditional (direct or indirect) jumps",
+ "MetricExpr": "(cpu_core@BR_INST_RETIRED.NEAR_TAKEN@ - cpu_core@BR_INST_RETIRED.COND_TAKEN@ - 2 * cpu_core@BR_INST_RETIRED.NEAR_CALL@) / BR_INST_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;Branches",
+ "MetricName": "tma_info_branches_jump",
"Unit": "cpu_core"
},
{
- "BriefDescription": "The ratio of Executed- by Issued-Uops",
- "MetricExpr": "UOPS_EXECUTED.THREAD / UOPS_ISSUED.ANY",
- "MetricGroup": "Cor;Pipeline",
- "MetricName": "tma_info_execute_per_issue",
- "PublicDescription": "The ratio of Executed- by Issued-Uops. Ratio > 1 suggests high rate of uop micro-fusions. Ratio < 1 suggest high rate of \"execute\" at rename stage.",
+ "BriefDescription": "Fraction of branches of other types (not individually covered by other metrics in Info.Branches group)",
+ "MetricExpr": "1 - (tma_info_branches_cond_nt + tma_info_branches_cond_tk + tma_info_branches_callret + tma_info_branches_jump)",
+ "MetricGroup": "Bad;Branches",
+ "MetricName": "tma_info_branches_other_branches",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Fill Buffer (FB) hits per kilo instructions for retired demand loads (L1D misses that merge into ongoing miss-handling entries)",
- "MetricExpr": "1e3 * MEM_LOAD_RETIRED.FB_HIT / INST_RETIRED.ANY",
- "MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_fb_hpki",
+ "BriefDescription": "Core actual clocks when any Logical Processor is active on the Physical Core",
+ "MetricExpr": "cpu_core@CPU_CLK_UNHALTED.DISTRIBUTED@",
+ "MetricGroup": "SMT",
+ "MetricName": "tma_info_core_core_clks",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Average number of Uops issued by front-end when it issued something",
- "MetricExpr": "UOPS_ISSUED.ANY / cpu_core@UOPS_ISSUED.ANY\\,cmask\\=1@",
- "MetricGroup": "Fed;FetchBW",
- "MetricName": "tma_info_fetch_upc",
+ "BriefDescription": "Instructions Per Cycle across hyper-threads (per physical core)",
+ "MetricExpr": "INST_RETIRED.ANY / tma_info_core_core_clks",
+ "MetricGroup": "Ret;SMT;TmaL1;tma_L1_group",
+ "MetricName": "tma_info_core_coreipc",
"Unit": "cpu_core"
},
{
"BriefDescription": "Floating Point Operations Per Cycle",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * (FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE) / tma_info_core_clks",
+ "MetricExpr": "(cpu_core@FP_ARITH_INST_RETIRED.SCALAR_SINGLE@ + cpu_core@FP_ARITH_INST_RETIRED.SCALAR_DOUBLE@ + 2 * cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + 4 * (cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE@ + cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE@) + 8 * cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@) / tma_info_core_core_clks",
"MetricGroup": "Flops;Ret",
- "MetricName": "tma_info_flopc",
+ "MetricName": "tma_info_core_flopc",
"Unit": "cpu_core"
},
{
"BriefDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width)",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(FP_ARITH_DISPATCHED.PORT_0 + FP_ARITH_DISPATCHED.PORT_1 + FP_ARITH_DISPATCHED.PORT_5) / (2 * tma_info_core_clks)",
+ "MetricExpr": "(cpu_core@FP_ARITH_DISPATCHED.PORT_0@ + cpu_core@FP_ARITH_DISPATCHED.PORT_1@ + cpu_core@FP_ARITH_DISPATCHED.PORT_5@) / (2 * tma_info_core_core_clks)",
"MetricGroup": "Cor;Flops;HPC",
- "MetricName": "tma_info_fp_arith_utilization",
+ "MetricName": "tma_info_core_fp_arith_utilization",
"PublicDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width). Values > 1 are possible due to ([BDW+] Fused-Multiply Add (FMA) counting - common; [ADL+] use all of ADD/MUL/FMA in Scalar or 128/256-bit vectors - less common).",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Giga Floating Point Operations Per Second",
- "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * (FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE) / 1e9 / duration_time",
- "MetricGroup": "Cor;Flops;HPC",
- "MetricName": "tma_info_gflops",
- "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width and AMX engine.",
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / (cpu_core@UOPS_EXECUTED.CORE_CYCLES_GE_1@ / 2 if #SMT_on else cpu_core@UOPS_EXECUTED.CORE_CYCLES_GE_1@)",
+ "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
+ "MetricName": "tma_info_core_ilp",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck",
- "MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
- "MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL",
- "MetricName": "tma_info_ic_misses",
- "MetricThreshold": "tma_info_ic_misses > 5",
- "PublicDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck. Related metrics: ",
+ "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
+ "MetricExpr": "IDQ.DSB_UOPS / cpu_core@UOPS_ISSUED.ANY@",
+ "MetricGroup": "DSB;Fed;FetchBW;tma_issueFB",
+ "MetricName": "tma_info_frontend_dsb_coverage",
+ "MetricThreshold": "tma_info_frontend_dsb_coverage < 0.7 & tma_info_thread_ipc / 6 > 0.35",
+ "PublicDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache). Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_botlnk_l2_dsb_misses, tma_info_inst_mix_iptb, tma_lcp",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Average number of cycles of a switch from the DSB fetch-unit to MITE fetch unit - see DSB_Switches tree node for details.",
+ "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / cpu_core@DSB2MITE_SWITCHES.PENALTY_CYCLES\\,cmask\\=1\\,edge@",
+ "MetricGroup": "DSBmiss",
+ "MetricName": "tma_info_frontend_dsb_switch_cost",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Average number of Uops issued by front-end when it issued something",
+ "MetricExpr": "UOPS_ISSUED.ANY / cpu_core@UOPS_ISSUED.ANY\\,cmask\\=1@",
+ "MetricGroup": "Fed;FetchBW",
+ "MetricName": "tma_info_frontend_fetch_upc",
"Unit": "cpu_core"
},
{
"BriefDescription": "Average Latency for L1 instruction cache misses",
"MetricExpr": "ICACHE_DATA.STALLS / cpu_core@ICACHE_DATA.STALLS\\,cmask\\=1\\,edge@",
"MetricGroup": "Fed;FetchLat;IcMiss",
- "MetricName": "tma_info_icache_miss_latency",
+ "MetricName": "tma_info_frontend_icache_miss_latency",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
- "MetricExpr": "UOPS_EXECUTED.THREAD / (UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)",
- "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
- "MetricName": "tma_info_ilp",
+ "BriefDescription": "Instructions per non-speculative DSB miss (lower number means higher occurrence rate)",
+ "MetricExpr": "INST_RETIRED.ANY / FRONTEND_RETIRED.ANY_DSB_MISS",
+ "MetricGroup": "DSBmiss;Fed",
+ "MetricName": "tma_info_frontend_ipdsb_miss_ret",
+ "MetricThreshold": "tma_info_frontend_ipdsb_miss_ret < 50",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks",
- "MetricExpr": "100 * (tma_frontend_bound - tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_info_big_code",
- "MetricGroup": "Fed;FetchBW;Frontend",
- "MetricName": "tma_info_instruction_fetch_bw",
- "MetricThreshold": "tma_info_instruction_fetch_bw > 20",
+ "BriefDescription": "Instructions per speculative Unknown Branch Misprediction (BAClear) (lower number means higher occurrence rate)",
+ "MetricExpr": "tma_info_inst_mix_instructions / BACLEARS.ANY",
+ "MetricGroup": "Fed",
+ "MetricName": "tma_info_frontend_ipunknown_branch",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "L2 cache true code cacheline misses per kilo instruction",
+ "MetricExpr": "1e3 * cpu_core@FRONTEND_RETIRED.L2_MISS@ / INST_RETIRED.ANY",
+ "MetricGroup": "IcMiss",
+ "MetricName": "tma_info_frontend_l2mpki_code",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "L2 cache speculative code cacheline misses per kilo instruction",
+ "MetricExpr": "1e3 * cpu_core@L2_RQSTS.CODE_RD_MISS@ / INST_RETIRED.ANY",
+ "MetricGroup": "IcMiss",
+ "MetricName": "tma_info_frontend_l2mpki_code_all",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Fraction of Uops delivered by the LSD (Loop Stream Detector; aka Loop Cache)",
+ "MetricExpr": "LSD.UOPS / cpu_core@UOPS_ISSUED.ANY@",
+ "MetricGroup": "Fed;LSD",
+ "MetricName": "tma_info_frontend_lsd_coverage",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Branch instructions per taken branch.",
+ "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
+ "MetricGroup": "Branches;Fed;PGO",
+ "MetricName": "tma_info_inst_mix_bptkbranch",
"Unit": "cpu_core"
},
{
"BriefDescription": "Total number of retired Instructions",
- "MetricExpr": "INST_RETIRED.ANY",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@",
"MetricGroup": "Summary;TmaL1;tma_L1_group",
- "MetricName": "tma_info_instructions",
+ "MetricName": "tma_info_inst_mix_instructions",
"PublicDescription": "Total number of retired Instructions. Sample with: INST_RETIRED.PREC_DIST",
"Unit": "cpu_core"
},
@@ -1426,26 +1462,26 @@
"BriefDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / (cpu_core@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0x3c@)",
"MetricGroup": "Flops;InsType",
- "MetricName": "tma_info_iparith",
- "MetricThreshold": "tma_info_iparith < 10",
+ "MetricName": "tma_info_inst_mix_iparith",
+ "MetricThreshold": "tma_info_inst_mix_iparith < 10",
"PublicDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate). May undercount due to FMA double counting. Approximated prior to BDW.",
"Unit": "cpu_core"
},
{
"BriefDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate)",
- "MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE)",
+ "MetricExpr": "INST_RETIRED.ANY / (cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE@)",
"MetricGroup": "Flops;FpVector;InsType",
- "MetricName": "tma_info_iparith_avx128",
- "MetricThreshold": "tma_info_iparith_avx128 < 10",
+ "MetricName": "tma_info_inst_mix_iparith_avx128",
+ "MetricThreshold": "tma_info_inst_mix_iparith_avx128 < 10",
"PublicDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting.",
"Unit": "cpu_core"
},
{
"BriefDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate)",
- "MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE)",
+ "MetricExpr": "INST_RETIRED.ANY / (cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@)",
"MetricGroup": "Flops;FpVector;InsType",
- "MetricName": "tma_info_iparith_avx256",
- "MetricThreshold": "tma_info_iparith_avx256 < 10",
+ "MetricName": "tma_info_inst_mix_iparith_avx256",
+ "MetricThreshold": "tma_info_inst_mix_iparith_avx256 < 10",
"PublicDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting.",
"Unit": "cpu_core"
},
@@ -1453,8 +1489,8 @@
"BriefDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
"MetricGroup": "Flops;FpScalar;InsType",
- "MetricName": "tma_info_iparith_scalar_dp",
- "MetricThreshold": "tma_info_iparith_scalar_dp < 10",
+ "MetricName": "tma_info_inst_mix_iparith_scalar_dp",
+ "MetricThreshold": "tma_info_inst_mix_iparith_scalar_dp < 10",
"PublicDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting.",
"Unit": "cpu_core"
},
@@ -1462,490 +1498,445 @@
"BriefDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
"MetricGroup": "Flops;FpScalar;InsType",
- "MetricName": "tma_info_iparith_scalar_sp",
- "MetricThreshold": "tma_info_iparith_scalar_sp < 10",
+ "MetricName": "tma_info_inst_mix_iparith_scalar_sp",
+ "MetricThreshold": "tma_info_inst_mix_iparith_scalar_sp < 10",
"PublicDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting.",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Instructions per a microcode Assist invocation",
- "MetricExpr": "INST_RETIRED.ANY / cpu_core@ASSISTS.ANY\\,umask\\=0x1B@",
- "MetricGroup": "Pipeline;Ret;Retire",
- "MetricName": "tma_info_ipassist",
- "MetricThreshold": "tma_info_ipassist < 100e3",
- "PublicDescription": "Instructions per a microcode Assist invocation. See Assists tree node for details (lower number means higher occurrence rate)",
- "Unit": "cpu_core"
- },
- {
"BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
"MetricGroup": "Branches;Fed;InsType",
- "MetricName": "tma_info_ipbranch",
- "MetricThreshold": "tma_info_ipbranch < 8",
- "Unit": "cpu_core"
- },
- {
- "BriefDescription": "Instructions Per Cycle (per Logical Processor)",
- "MetricExpr": "INST_RETIRED.ANY / tma_info_clks",
- "MetricGroup": "Ret;Summary",
- "MetricName": "tma_info_ipc",
+ "MetricName": "tma_info_inst_mix_ipbranch",
+ "MetricThreshold": "tma_info_inst_mix_ipbranch < 8",
"Unit": "cpu_core"
},
{
"BriefDescription": "Instructions per (near) call (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL",
"MetricGroup": "Branches;Fed;PGO",
- "MetricName": "tma_info_ipcall",
- "MetricThreshold": "tma_info_ipcall < 200",
- "Unit": "cpu_core"
- },
- {
- "BriefDescription": "Instructions per non-speculative DSB miss (lower number means higher occurrence rate)",
- "MetricExpr": "INST_RETIRED.ANY / FRONTEND_RETIRED.ANY_DSB_MISS",
- "MetricGroup": "DSBmiss;Fed",
- "MetricName": "tma_info_ipdsb_miss_ret",
- "MetricThreshold": "tma_info_ipdsb_miss_ret < 50",
- "Unit": "cpu_core"
- },
- {
- "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
- "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u",
- "MetricGroup": "Branches;OS",
- "MetricName": "tma_info_ipfarbranch",
- "MetricThreshold": "tma_info_ipfarbranch < 1e6",
+ "MetricName": "tma_info_inst_mix_ipcall",
+ "MetricThreshold": "tma_info_inst_mix_ipcall < 200",
"Unit": "cpu_core"
},
{
"BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)",
- "MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * (FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE)",
+ "MetricExpr": "INST_RETIRED.ANY / (cpu_core@FP_ARITH_INST_RETIRED.SCALAR_SINGLE@ + cpu_core@FP_ARITH_INST_RETIRED.SCALAR_DOUBLE@ + 2 * cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + 4 * (cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE@ + cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE@) + 8 * cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@)",
"MetricGroup": "Flops;InsType",
- "MetricName": "tma_info_ipflop",
- "MetricThreshold": "tma_info_ipflop < 10",
+ "MetricName": "tma_info_inst_mix_ipflop",
+ "MetricThreshold": "tma_info_inst_mix_ipflop < 10",
"Unit": "cpu_core"
},
{
"BriefDescription": "Instructions per Load (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_LOADS",
"MetricGroup": "InsType",
- "MetricName": "tma_info_ipload",
- "MetricThreshold": "tma_info_ipload < 3",
- "Unit": "cpu_core"
- },
- {
- "BriefDescription": "Instructions per retired mispredicts for conditional non-taken branches (lower number means higher occurrence rate).",
- "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.COND_NTAKEN",
- "MetricGroup": "Bad;BrMispredicts",
- "MetricName": "tma_info_ipmisp_cond_ntaken",
- "MetricThreshold": "tma_info_ipmisp_cond_ntaken < 200",
- "Unit": "cpu_core"
- },
- {
- "BriefDescription": "Instructions per retired mispredicts for conditional taken branches (lower number means higher occurrence rate).",
- "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.COND_TAKEN",
- "MetricGroup": "Bad;BrMispredicts",
- "MetricName": "tma_info_ipmisp_cond_taken",
- "MetricThreshold": "tma_info_ipmisp_cond_taken < 200",
- "Unit": "cpu_core"
- },
- {
- "BriefDescription": "Instructions per retired mispredicts for indirect CALL or JMP branches (lower number means higher occurrence rate).",
- "MetricExpr": "cpu_core@BR_MISP_RETIRED.INDIRECT_CALL\\,umask\\=0x80@ / BR_MISP_RETIRED.INDIRECT",
- "MetricGroup": "Bad;BrMispredicts",
- "MetricName": "tma_info_ipmisp_indirect",
- "MetricThreshold": "tma_info_ipmisp_indirect < 1e3",
- "Unit": "cpu_core"
- },
- {
- "BriefDescription": "Instructions per retired mispredicts for return branches (lower number means higher occurrence rate).",
- "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.RET",
- "MetricGroup": "Bad;BrMispredicts",
- "MetricName": "tma_info_ipmisp_ret",
- "MetricThreshold": "tma_info_ipmisp_ret < 500",
- "Unit": "cpu_core"
- },
- {
- "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear) (lower number means higher occurrence rate)",
- "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
- "MetricGroup": "Bad;BadSpec;BrMispredicts",
- "MetricName": "tma_info_ipmispredict",
- "MetricThreshold": "tma_info_ipmispredict < 200",
+ "MetricName": "tma_info_inst_mix_ipload",
+ "MetricThreshold": "tma_info_inst_mix_ipload < 3",
"Unit": "cpu_core"
},
{
"BriefDescription": "Instructions per Store (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_STORES",
"MetricGroup": "InsType",
- "MetricName": "tma_info_ipstore",
- "MetricThreshold": "tma_info_ipstore < 8",
+ "MetricName": "tma_info_inst_mix_ipstore",
+ "MetricThreshold": "tma_info_inst_mix_ipstore < 8",
"Unit": "cpu_core"
},
{
"BriefDescription": "Instructions per Software prefetch instruction (of any type: NTA/T0/T1/T2/Prefetch) (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / cpu_core@SW_PREFETCH_ACCESS.T0\\,umask\\=0xF@",
"MetricGroup": "Prefetches",
- "MetricName": "tma_info_ipswpf",
- "MetricThreshold": "tma_info_ipswpf < 100",
+ "MetricName": "tma_info_inst_mix_ipswpf",
+ "MetricThreshold": "tma_info_inst_mix_ipswpf < 100",
"Unit": "cpu_core"
},
{
"BriefDescription": "Instruction per taken branch",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN",
"MetricGroup": "Branches;Fed;FetchBW;Frontend;PGO;tma_issueFB",
- "MetricName": "tma_info_iptb",
- "MetricThreshold": "tma_info_iptb < 13",
- "PublicDescription": "Instruction per taken branch. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_dsb_coverage, tma_info_dsb_misses, tma_lcp",
- "Unit": "cpu_core"
- },
- {
- "BriefDescription": "Instructions per speculative Unknown Branch Misprediction (BAClear) (lower number means higher occurrence rate)",
- "MetricExpr": "tma_info_instructions / BACLEARS.ANY",
- "MetricGroup": "Fed",
- "MetricName": "tma_info_ipunknown_branch",
+ "MetricName": "tma_info_inst_mix_iptb",
+ "MetricThreshold": "tma_info_inst_mix_iptb < 13",
+ "PublicDescription": "Instruction per taken branch. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_lcp",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Fraction of branches that are unconditional (direct or indirect) jumps",
- "MetricExpr": "(BR_INST_RETIRED.NEAR_TAKEN - BR_INST_RETIRED.COND_TAKEN - 2 * BR_INST_RETIRED.NEAR_CALL) / BR_INST_RETIRED.ALL_BRANCHES",
- "MetricGroup": "Bad;Branches",
- "MetricName": "tma_info_jump",
+ "BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
+ "MetricExpr": "64 * cpu_core@L1D.REPLACEMENT@ / 1e9 / duration_time",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_core_l1d_cache_fill_bw",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Cycles Per Instruction for the Operating System (OS) Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / INST_RETIRED.ANY_P:k",
- "MetricGroup": "OS",
- "MetricName": "tma_info_kernel_cpi",
+ "BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
+ "MetricExpr": "64 * cpu_core@L2_LINES_IN.ALL@ / 1e9 / duration_time",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_core_l2_cache_fill_bw",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Fraction of cycles spent in the Operating System (OS) Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / CPU_CLK_UNHALTED.THREAD",
- "MetricGroup": "OS",
- "MetricName": "tma_info_kernel_utilization",
- "MetricThreshold": "tma_info_kernel_utilization > 0.05",
+ "BriefDescription": "Average per-core data access bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "64 * cpu_core@OFFCORE_REQUESTS.ALL_REQUESTS@ / 1e9 / duration_time",
+ "MetricGroup": "Mem;MemoryBW;Offcore",
+ "MetricName": "tma_info_memory_core_l3_cache_access_bw",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
- "MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / duration_time",
+ "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "64 * cpu_core@LONGEST_LAT_CACHE.MISS@ / 1e9 / duration_time",
"MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l1d_cache_fill_bw",
+ "MetricName": "tma_info_memory_core_l3_cache_fill_bw",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
- "MetricExpr": "tma_info_l1d_cache_fill_bw",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l1d_cache_fill_bw_1t",
+ "BriefDescription": "Fill Buffer (FB) hits per kilo instructions for retired demand loads (L1D misses that merge into ongoing miss-handling entries)",
+ "MetricExpr": "1e3 * cpu_core@MEM_LOAD_RETIRED.FB_HIT@ / INST_RETIRED.ANY",
+ "MetricGroup": "CacheMisses;Mem",
+ "MetricName": "tma_info_memory_fb_hpki",
"Unit": "cpu_core"
},
{
"BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
- "MetricExpr": "1e3 * MEM_LOAD_RETIRED.L1_MISS / INST_RETIRED.ANY",
+ "MetricExpr": "1e3 * cpu_core@MEM_LOAD_RETIRED.L1_MISS@ / INST_RETIRED.ANY",
"MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l1mpki",
+ "MetricName": "tma_info_memory_l1mpki",
"Unit": "cpu_core"
},
{
"BriefDescription": "L1 cache true misses per kilo instruction for all demand loads (including speculative)",
- "MetricExpr": "1e3 * L2_RQSTS.ALL_DEMAND_DATA_RD / INST_RETIRED.ANY",
+ "MetricExpr": "1e3 * cpu_core@L2_RQSTS.ALL_DEMAND_DATA_RD@ / INST_RETIRED.ANY",
"MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l1mpki_load",
- "Unit": "cpu_core"
- },
- {
- "BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
- "MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / duration_time",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l2_cache_fill_bw",
- "Unit": "cpu_core"
- },
- {
- "BriefDescription": "Average per-thread data fill bandwidth to the L2 cache [GB / sec]",
- "MetricExpr": "tma_info_l2_cache_fill_bw",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l2_cache_fill_bw_1t",
+ "MetricName": "tma_info_memory_l1mpki_load",
"Unit": "cpu_core"
},
{
"BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)",
- "MetricExpr": "1e3 * (L2_RQSTS.REFERENCES - L2_RQSTS.MISS) / INST_RETIRED.ANY",
+ "MetricExpr": "1e3 * (cpu_core@L2_RQSTS.REFERENCES@ - cpu_core@L2_RQSTS.MISS@) / INST_RETIRED.ANY",
"MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l2hpki_all",
+ "MetricName": "tma_info_memory_l2hpki_all",
"Unit": "cpu_core"
},
{
"BriefDescription": "L2 cache hits per kilo instruction for all demand loads (including speculative)",
- "MetricExpr": "1e3 * L2_RQSTS.DEMAND_DATA_RD_HIT / INST_RETIRED.ANY",
+ "MetricExpr": "1e3 * cpu_core@L2_RQSTS.DEMAND_DATA_RD_HIT@ / INST_RETIRED.ANY",
"MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l2hpki_load",
+ "MetricName": "tma_info_memory_l2hpki_load",
"Unit": "cpu_core"
},
{
"BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads",
- "MetricExpr": "1e3 * MEM_LOAD_RETIRED.L2_MISS / INST_RETIRED.ANY",
+ "MetricExpr": "1e3 * cpu_core@MEM_LOAD_RETIRED.L2_MISS@ / INST_RETIRED.ANY",
"MetricGroup": "Backend;CacheMisses;Mem",
- "MetricName": "tma_info_l2mpki",
+ "MetricName": "tma_info_memory_l2mpki",
"Unit": "cpu_core"
},
{
"BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all request types (including speculative)",
- "MetricExpr": "1e3 * L2_RQSTS.MISS / INST_RETIRED.ANY",
+ "MetricExpr": "1e3 * cpu_core@L2_RQSTS.MISS@ / INST_RETIRED.ANY",
"MetricGroup": "CacheMisses;Mem;Offcore",
- "MetricName": "tma_info_l2mpki_all",
- "Unit": "cpu_core"
- },
- {
- "BriefDescription": "L2 cache true code cacheline misses per kilo instruction",
- "MetricExpr": "1e3 * FRONTEND_RETIRED.L2_MISS / INST_RETIRED.ANY",
- "MetricGroup": "IcMiss",
- "MetricName": "tma_info_l2mpki_code",
- "Unit": "cpu_core"
- },
- {
- "BriefDescription": "L2 cache speculative code cacheline misses per kilo instruction",
- "MetricExpr": "1e3 * L2_RQSTS.CODE_RD_MISS / INST_RETIRED.ANY",
- "MetricGroup": "IcMiss",
- "MetricName": "tma_info_l2mpki_code_all",
+ "MetricName": "tma_info_memory_l2mpki_all",
"Unit": "cpu_core"
},
{
"BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all demand loads (including speculative)",
- "MetricExpr": "1e3 * L2_RQSTS.DEMAND_DATA_RD_MISS / INST_RETIRED.ANY",
+ "MetricExpr": "1e3 * cpu_core@L2_RQSTS.DEMAND_DATA_RD_MISS@ / INST_RETIRED.ANY",
"MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l2mpki_load",
+ "MetricName": "tma_info_memory_l2mpki_load",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Average per-core data access bandwidth to the L3 cache [GB / sec]",
- "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1e9 / duration_time",
- "MetricGroup": "Mem;MemoryBW;Offcore",
- "MetricName": "tma_info_l3_cache_access_bw",
- "Unit": "cpu_core"
- },
- {
- "BriefDescription": "Average per-thread data access bandwidth to the L3 cache [GB / sec]",
- "MetricExpr": "tma_info_l3_cache_access_bw",
- "MetricGroup": "Mem;MemoryBW;Offcore",
- "MetricName": "tma_info_l3_cache_access_bw_1t",
+ "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
+ "MetricExpr": "1e3 * cpu_core@MEM_LOAD_RETIRED.L3_MISS@ / INST_RETIRED.ANY",
+ "MetricGroup": "CacheMisses;Mem",
+ "MetricName": "tma_info_memory_l3mpki",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
- "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l3_cache_fill_bw",
+ "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / MEM_LOAD_COMPLETED.L1_MISS_ANY",
+ "MetricGroup": "Mem;MemoryBound;MemoryLat",
+ "MetricName": "tma_info_memory_load_miss_real_latency",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Average per-thread data fill bandwidth to the L3 cache [GB / sec]",
- "MetricExpr": "tma_info_l3_cache_fill_bw",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l3_cache_fill_bw_1t",
+ "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
+ "MetricGroup": "Mem;MemoryBW;MemoryBound",
+ "MetricName": "tma_info_memory_mlp",
+ "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)",
"Unit": "cpu_core"
},
{
- "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
- "MetricExpr": "1e3 * MEM_LOAD_RETIRED.L3_MISS / INST_RETIRED.ANY",
- "MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l3mpki",
+ "BriefDescription": "Average Parallel L2 cache miss data reads",
+ "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+ "MetricGroup": "Memory_BW;Offcore",
+ "MetricName": "tma_info_memory_oro_data_l2_mlp",
"Unit": "cpu_core"
},
{
"BriefDescription": "Average Latency for L2 cache miss demand Loads",
"MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS.DEMAND_DATA_RD",
"MetricGroup": "Memory_Lat;Offcore",
- "MetricName": "tma_info_load_l2_miss_latency",
+ "MetricName": "tma_info_memory_oro_load_l2_miss_latency",
"Unit": "cpu_core"
},
{
"BriefDescription": "Average Parallel L2 cache miss demand Loads",
"MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / cpu_core@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD\\,cmask\\=1@",
"MetricGroup": "Memory_BW;Offcore",
- "MetricName": "tma_info_load_l2_mlp",
+ "MetricName": "tma_info_memory_oro_load_l2_mlp",
"Unit": "cpu_core"
},
{
"BriefDescription": "Average Latency for L3 cache miss demand Loads",
"MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD / OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD",
"MetricGroup": "Memory_Lat;Offcore",
- "MetricName": "tma_info_load_l3_miss_latency",
+ "MetricName": "tma_info_memory_oro_load_l3_miss_latency",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
- "MetricExpr": "L1D_PEND_MISS.PENDING / MEM_LOAD_COMPLETED.L1_MISS_ANY",
- "MetricGroup": "Mem;MemoryBound;MemoryLat",
- "MetricName": "tma_info_load_miss_real_latency",
+ "BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_core_l1d_cache_fill_bw",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_thread_l1d_cache_fill_bw_1t",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Average per-thread data fill bandwidth to the L2 cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_core_l2_cache_fill_bw",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_thread_l2_cache_fill_bw_1t",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Average per-thread data access bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_core_l3_cache_access_bw",
+ "MetricGroup": "Mem;MemoryBW;Offcore",
+ "MetricName": "tma_info_memory_thread_l3_cache_access_bw_1t",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Average per-thread data fill bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_core_l3_cache_fill_bw",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_thread_l3_cache_fill_bw_1t",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "STLB (2nd level TLB) code speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
+ "MetricExpr": "1e3 * cpu_core@ITLB_MISSES.WALK_COMPLETED@ / INST_RETIRED.ANY",
+ "MetricGroup": "Fed;MemoryTLB",
+ "MetricName": "tma_info_memory_tlb_code_stlb_mpki",
"Unit": "cpu_core"
},
{
"BriefDescription": "STLB (2nd level TLB) data load speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
- "MetricExpr": "1e3 * DTLB_LOAD_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
+ "MetricExpr": "1e3 * cpu_core@DTLB_LOAD_MISSES.WALK_COMPLETED@ / INST_RETIRED.ANY",
"MetricGroup": "Mem;MemoryTLB",
- "MetricName": "tma_info_load_stlb_mpki",
+ "MetricName": "tma_info_memory_tlb_load_stlb_mpki",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Fraction of Uops delivered by the LSD (Loop Stream Detector; aka Loop Cache)",
- "MetricExpr": "LSD.UOPS / UOPS_ISSUED.ANY",
- "MetricGroup": "Fed;LSD",
- "MetricName": "tma_info_lsd_coverage",
+ "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+ "MetricExpr": "(cpu_core@ITLB_MISSES.WALK_PENDING@ + cpu_core@DTLB_LOAD_MISSES.WALK_PENDING@ + cpu_core@DTLB_STORE_MISSES.WALK_PENDING@) / (4 * tma_info_core_core_clks)",
+ "MetricGroup": "Mem;MemoryTLB",
+ "MetricName": "tma_info_memory_tlb_page_walks_utilization",
+ "MetricThreshold": "tma_info_memory_tlb_page_walks_utilization > 0.5",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Average number of parallel data read requests to external memory",
- "MetricExpr": "UNC_ARB_DAT_OCCUPANCY.RD / cpu_core@UNC_ARB_DAT_OCCUPANCY.RD\\,cmask\\=1@",
- "MetricGroup": "Mem;MemoryBW;SoC",
- "MetricName": "tma_info_mem_parallel_reads",
- "PublicDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches",
+ "BriefDescription": "STLB (2nd level TLB) data store speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
+ "MetricExpr": "1e3 * cpu_core@DTLB_STORE_MISSES.WALK_COMPLETED@ / INST_RETIRED.ANY",
+ "MetricGroup": "Mem;MemoryTLB",
+ "MetricName": "tma_info_memory_tlb_store_stlb_mpki",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Average latency of data read request to external memory (in nanoseconds)",
- "MetricExpr": "(UNC_ARB_TRK_OCCUPANCY.RD + UNC_ARB_DAT_OCCUPANCY.RD) / UNC_ARB_TRK_REQUESTS.RD",
- "MetricGroup": "Mem;MemoryLat;SoC",
- "MetricName": "tma_info_mem_read_latency",
- "PublicDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches. ([RKL+]memory-controller only)",
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-thread",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / cpu_core@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
+ "MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
+ "MetricName": "tma_info_pipeline_execute",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Average latency of all requests to external memory (in Uncore cycles)",
- "MetricExpr": "(UNC_ARB_TRK_OCCUPANCY.ALL + UNC_ARB_DAT_OCCUPANCY.RD) / UNC_ARB_TRK_REQUESTS.ALL",
- "MetricGroup": "Mem;SoC",
- "MetricName": "tma_info_mem_request_latency",
+ "BriefDescription": "Instructions per a microcode Assist invocation",
+ "MetricExpr": "INST_RETIRED.ANY / cpu_core@ASSISTS.ANY\\,umask\\=0x1B@",
+ "MetricGroup": "Pipeline;Ret;Retire",
+ "MetricName": "tma_info_pipeline_ipassist",
+ "MetricThreshold": "tma_info_pipeline_ipassist < 100e3",
+ "PublicDescription": "Instructions per a microcode Assist invocation. See Assists tree node for details (lower number means higher occurrence rate)",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
- "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_fb_full / (tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))",
- "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
- "MetricName": "tma_info_memory_bandwidth",
- "MetricThreshold": "tma_info_memory_bandwidth > 20",
- "PublicDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_dram_bw_use, tma_mem_bandwidth, tma_sq_full",
+ "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "tma_retiring * tma_info_thread_slots / cpu_core@UOPS_RETIRED.SLOTS\\,cmask\\=1@",
+ "MetricGroup": "Pipeline;Ret",
+ "MetricName": "tma_info_pipeline_retire",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
- "MetricExpr": "100 * tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_load / max(tma_l1_bound, tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
- "MetricGroup": "Mem;MemoryTLB;Offcore;tma_issueTLB",
- "MetricName": "tma_info_memory_data_tlbs",
- "MetricThreshold": "tma_info_memory_data_tlbs > 20",
- "PublicDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs). Related metrics: tma_dtlb_load, tma_dtlb_store",
+ "BriefDescription": "Estimated fraction of retirement-cycles dealing with repeat instructions",
+ "MetricExpr": "INST_RETIRED.REP_ITERATION / cpu_core@UOPS_RETIRED.SLOTS\\,cmask\\=1@",
+ "MetricGroup": "Pipeline;Ret",
+ "MetricName": "tma_info_pipeline_strings_cycles",
+ "MetricThreshold": "tma_info_pipeline_strings_cycles > 0.1",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)",
- "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound))",
- "MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat",
- "MetricName": "tma_info_memory_latency",
- "MetricThreshold": "tma_info_memory_latency > 20",
- "PublicDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches). Related metrics: tma_l3_hit_latency, tma_mem_latency",
+ "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
+ "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / duration_time",
+ "MetricGroup": "Power;Summary",
+ "MetricName": "tma_info_system_average_frequency",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks",
- "MetricExpr": "100 * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
- "MetricGroup": "Bad;BadSpec;BrMispredicts;tma_issueBM",
- "MetricName": "tma_info_mispredictions",
- "MetricThreshold": "tma_info_mispredictions > 20",
- "PublicDescription": "Total pipeline cost of Branch Misprediction related bottlenecks. Related metrics: tma_branch_mispredicts, tma_info_branch_misprediction_cost, tma_mispredicts_resteers",
+ "BriefDescription": "Average CPU Utilization",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricGroup": "HPC;Summary",
+ "MetricName": "tma_info_system_cpu_utilization",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
- "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
- "MetricGroup": "Mem;MemoryBW;MemoryBound",
- "MetricName": "tma_info_mlp",
- "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)",
+ "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
+ "MetricExpr": "64 * (UNC_ARB_TRK_REQUESTS.ALL + UNC_ARB_COH_TRK_REQUESTS.ALL) / 1e6 / duration_time / 1e3",
+ "MetricGroup": "HPC;Mem;MemoryBW;SoC;tma_issueBW",
+ "MetricName": "tma_info_system_dram_bw_use",
+ "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_fb_full, tma_info_bottleneck_memory_bandwidth, tma_mem_bandwidth, tma_sq_full",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Fraction of branches of other types (not individually covered by other metrics in Info.Branches group)",
- "MetricExpr": "1 - (tma_info_cond_nt + tma_info_cond_tk + tma_info_callret + tma_info_jump)",
- "MetricGroup": "Bad;Branches",
- "MetricName": "tma_info_other_branches",
+ "BriefDescription": "Giga Floating Point Operations Per Second",
+ "MetricExpr": "(cpu_core@FP_ARITH_INST_RETIRED.SCALAR_SINGLE@ + cpu_core@FP_ARITH_INST_RETIRED.SCALAR_DOUBLE@ + 2 * cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + 4 * (cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE@ + cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE@) + 8 * cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@) / 1e9 / duration_time",
+ "MetricGroup": "Cor;Flops;HPC",
+ "MetricName": "tma_info_system_gflops",
+ "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width and AMX engine.",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
- "MetricExpr": "(ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING) / (4 * tma_info_core_clks)",
- "MetricGroup": "Mem;MemoryTLB",
- "MetricName": "tma_info_page_walks_utilization",
- "MetricThreshold": "tma_info_page_walks_utilization > 0.5",
+ "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
+ "MetricExpr": "INST_RETIRED.ANY / cpu_core@BR_INST_RETIRED.FAR_BRANCH@u",
+ "MetricGroup": "Branches;OS",
+ "MetricName": "tma_info_system_ipfarbranch",
+ "MetricThreshold": "tma_info_system_ipfarbranch < 1e6",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
- "MetricExpr": "tma_retiring * tma_info_slots / cpu_core@UOPS_RETIRED.SLOTS\\,cmask\\=1@",
- "MetricGroup": "Pipeline;Ret",
- "MetricName": "tma_info_retire",
+ "BriefDescription": "Cycles Per Instruction for the Operating System (OS) Kernel mode",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / cpu_core@INST_RETIRED.ANY_P@k",
+ "MetricGroup": "OS",
+ "MetricName": "tma_info_system_kernel_cpi",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
- "MetricExpr": "TOPDOWN.SLOTS",
- "MetricGroup": "TmaL1;tma_L1_group",
- "MetricName": "tma_info_slots",
+ "BriefDescription": "Fraction of cycles spent in the Operating System (OS) Kernel mode",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "OS",
+ "MetricName": "tma_info_system_kernel_utilization",
+ "MetricThreshold": "tma_info_system_kernel_utilization > 0.05",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Fraction of Physical Core issue-slots utilized by this Logical Processor",
- "MetricExpr": "(tma_info_slots / (TOPDOWN.SLOTS / 2) if #SMT_on else 1)",
- "MetricGroup": "SMT;TmaL1;tma_L1_group",
- "MetricName": "tma_info_slots_utilization",
+ "BriefDescription": "Average number of parallel data read requests to external memory",
+ "MetricExpr": "UNC_ARB_DAT_OCCUPANCY.RD / cpu_core@UNC_ARB_DAT_OCCUPANCY.RD\\,cmask\\=1@",
+ "MetricGroup": "Mem;MemoryBW;SoC",
+ "MetricName": "tma_info_system_mem_parallel_reads",
+ "PublicDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Average latency of data read request to external memory (in nanoseconds)",
+ "MetricExpr": "(UNC_ARB_TRK_OCCUPANCY.RD + UNC_ARB_DAT_OCCUPANCY.RD) / UNC_ARB_TRK_REQUESTS.RD",
+ "MetricGroup": "Mem;MemoryLat;SoC",
+ "MetricName": "tma_info_system_mem_read_latency",
+ "PublicDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches. ([RKL+]memory-controller only)",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Average latency of all requests to external memory (in Uncore cycles)",
+ "MetricExpr": "(UNC_ARB_TRK_OCCUPANCY.ALL + UNC_ARB_DAT_OCCUPANCY.RD) / UNC_ARB_TRK_REQUESTS.ALL",
+ "MetricGroup": "Mem;SoC",
+ "MetricName": "tma_info_system_mem_request_latency",
"Unit": "cpu_core"
},
{
"BriefDescription": "Fraction of cycles where both hardware Logical Processors were active",
- "MetricExpr": "(1 - CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_DISTRIBUTED if #SMT_on else 0)",
+ "MetricExpr": "(1 - cpu_core@CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE@ / cpu_core@CPU_CLK_UNHALTED.REF_DISTRIBUTED@ if #SMT_on else 0)",
"MetricGroup": "SMT",
- "MetricName": "tma_info_smt_2t_utilization",
+ "MetricName": "tma_info_system_smt_2t_utilization",
"Unit": "cpu_core"
},
{
"BriefDescription": "Socket actual clocks when any core is active on that socket",
"MetricExpr": "UNC_CLOCK.SOCKET",
"MetricGroup": "SoC",
- "MetricName": "tma_info_socket_clks",
+ "MetricName": "tma_info_system_socket_clks",
"Unit": "cpu_core"
},
{
- "BriefDescription": "STLB (2nd level TLB) data store speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
- "MetricExpr": "1e3 * DTLB_STORE_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
- "MetricGroup": "Mem;MemoryTLB",
- "MetricName": "tma_info_store_stlb_mpki",
+ "BriefDescription": "Average Frequency Utilization relative nominal frequency",
+ "MetricExpr": "tma_info_thread_clks / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricGroup": "Power",
+ "MetricName": "tma_info_system_turbo_utilization",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Estimated fraction of retirement-cycles dealing with repeat instructions",
- "MetricExpr": "INST_RETIRED.REP_ITERATION / cpu_core@UOPS_RETIRED.SLOTS\\,cmask\\=1@",
- "MetricGroup": "Pipeline;Ret",
- "MetricName": "tma_info_strings_cycles",
- "MetricThreshold": "tma_info_strings_cycles > 0.1",
+ "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
+ "MetricExpr": "cpu_core@CPU_CLK_UNHALTED.THREAD@",
+ "MetricGroup": "Pipeline",
+ "MetricName": "tma_info_thread_clks",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Average Frequency Utilization relative nominal frequency",
- "MetricExpr": "tma_info_clks / CPU_CLK_UNHALTED.REF_TSC",
- "MetricGroup": "Power",
- "MetricName": "tma_info_turbo_utilization",
+ "BriefDescription": "Cycles Per Instruction (per Logical Processor)",
+ "MetricExpr": "1 / tma_info_thread_ipc",
+ "MetricGroup": "Mem;Pipeline",
+ "MetricName": "tma_info_thread_cpi",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "The ratio of Executed- by Issued-Uops",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / UOPS_ISSUED.ANY",
+ "MetricGroup": "Cor;Pipeline",
+ "MetricName": "tma_info_thread_execute_per_issue",
+ "PublicDescription": "The ratio of Executed- by Issued-Uops. Ratio > 1 suggests high rate of uop micro-fusions. Ratio < 1 suggest high rate of \"execute\" at rename stage.",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Instructions Per Cycle (per Logical Processor)",
+ "MetricExpr": "INST_RETIRED.ANY / tma_info_thread_clks",
+ "MetricGroup": "Ret;Summary",
+ "MetricName": "tma_info_thread_ipc",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
+ "MetricExpr": "cpu_core@TOPDOWN.SLOTS@",
+ "MetricGroup": "TmaL1;tma_L1_group",
+ "MetricName": "tma_info_thread_slots",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Fraction of Physical Core issue-slots utilized by this Logical Processor",
+ "MetricExpr": "(tma_info_thread_slots / (cpu_core@TOPDOWN.SLOTS@ / 2) if #SMT_on else 1)",
+ "MetricGroup": "SMT;TmaL1;tma_L1_group",
+ "MetricName": "tma_info_thread_slots_utilization",
"Unit": "cpu_core"
},
{
"BriefDescription": "Uops Per Instruction",
- "MetricExpr": "tma_retiring * tma_info_slots / INST_RETIRED.ANY",
+ "MetricExpr": "tma_retiring * tma_info_thread_slots / INST_RETIRED.ANY",
"MetricGroup": "Pipeline;Ret;Retire",
- "MetricName": "tma_info_uoppi",
- "MetricThreshold": "tma_info_uoppi > 1.05",
+ "MetricName": "tma_info_thread_uoppi",
+ "MetricThreshold": "tma_info_thread_uoppi > 1.05",
"Unit": "cpu_core"
},
{
"BriefDescription": "Instruction per taken branch",
- "MetricExpr": "tma_retiring * tma_info_slots / BR_INST_RETIRED.NEAR_TAKEN",
+ "MetricExpr": "tma_retiring * tma_info_thread_slots / BR_INST_RETIRED.NEAR_TAKEN",
"MetricGroup": "Branches;Fed;FetchBW",
- "MetricName": "tma_info_uptb",
- "MetricThreshold": "tma_info_uptb < 9",
+ "MetricName": "tma_info_thread_uptb",
+ "MetricThreshold": "tma_info_thread_uptb < 9",
"Unit": "cpu_core"
},
{
@@ -1960,7 +1951,7 @@
},
{
"BriefDescription": "This metric represents 128-bit vector Integer ADD/SUB/SAD or VNNI (Vector Neural Network Instructions) uops fraction the CPU has retired",
- "MetricExpr": "(INT_VEC_RETIRED.ADD_128 + INT_VEC_RETIRED.VNNI_128) / (tma_retiring * tma_info_slots)",
+ "MetricExpr": "(cpu_core@INT_VEC_RETIRED.ADD_128@ + cpu_core@INT_VEC_RETIRED.VNNI_128@) / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Compute;IntVector;Pipeline;TopdownL4;tma_L4_group;tma_int_operations_group;tma_issue2P",
"MetricName": "tma_int_vector_128b",
"MetricThreshold": "tma_int_vector_128b > 0.1 & (tma_int_operations > 0.1 & tma_light_operations > 0.6)",
@@ -1970,7 +1961,7 @@
},
{
"BriefDescription": "This metric represents 256-bit vector Integer ADD/SUB/SAD or VNNI (Vector Neural Network Instructions) uops fraction the CPU has retired",
- "MetricExpr": "(INT_VEC_RETIRED.ADD_256 + INT_VEC_RETIRED.MUL_256 + INT_VEC_RETIRED.VNNI_256) / (tma_retiring * tma_info_slots)",
+ "MetricExpr": "(cpu_core@INT_VEC_RETIRED.ADD_256@ + cpu_core@INT_VEC_RETIRED.MUL_256@ + cpu_core@INT_VEC_RETIRED.VNNI_256@) / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Compute;IntVector;Pipeline;TopdownL4;tma_L4_group;tma_int_operations_group;tma_issue2P",
"MetricName": "tma_int_vector_256b",
"MetricThreshold": "tma_int_vector_256b > 0.1 & (tma_int_operations > 0.1 & tma_light_operations > 0.6)",
@@ -1980,7 +1971,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses",
- "MetricExpr": "ICACHE_TAG.STALLS / tma_info_clks",
+ "MetricExpr": "ICACHE_TAG.STALLS / tma_info_thread_clks",
"MetricGroup": "BigFoot;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_itlb_misses",
"MetricThreshold": "tma_itlb_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -1990,7 +1981,7 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled without loads missing the L1 data cache",
- "MetricExpr": "max((EXE_ACTIVITY.BOUND_ON_LOADS - MEMORY_ACTIVITY.STALLS_L1D_MISS) / tma_info_clks, 0)",
+ "MetricExpr": "max((cpu_core@EXE_ACTIVITY.BOUND_ON_LOADS@ - cpu_core@MEMORY_ACTIVITY.STALLS_L1D_MISS@) / tma_info_thread_clks, 0)",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_issueL1;tma_issueMC;tma_memory_bound_group",
"MetricName": "tma_l1_bound",
"MetricThreshold": "tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -2001,7 +1992,7 @@
{
"BriefDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(MEMORY_ACTIVITY.STALLS_L1D_MISS - MEMORY_ACTIVITY.STALLS_L2_MISS) / tma_info_clks",
+ "MetricExpr": "(cpu_core@MEMORY_ACTIVITY.STALLS_L1D_MISS@ - cpu_core@MEMORY_ACTIVITY.STALLS_L2_MISS@) / tma_info_thread_clks",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l2_bound",
"MetricThreshold": "tma_l2_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -2011,7 +2002,7 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
- "MetricExpr": "(MEMORY_ACTIVITY.STALLS_L2_MISS - MEMORY_ACTIVITY.STALLS_L3_MISS) / tma_info_clks",
+ "MetricExpr": "(cpu_core@MEMORY_ACTIVITY.STALLS_L2_MISS@ - cpu_core@MEMORY_ACTIVITY.STALLS_L3_MISS@) / tma_info_thread_clks",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l3_bound",
"MetricThreshold": "tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -2021,21 +2012,21 @@
},
{
"BriefDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
- "MetricExpr": "9 * tma_info_average_frequency * MEM_LOAD_RETIRED.L3_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_clks",
+ "MetricExpr": "9 * tma_info_system_average_frequency * cpu_core@MEM_LOAD_RETIRED.L3_HIT@ * (1 + cpu_core@MEM_LOAD_RETIRED.FB_HIT@ / cpu_core@MEM_LOAD_RETIRED.L1_MISS@ / 2) / tma_info_thread_clks",
"MetricGroup": "MemoryLat;TopdownL4;tma_L4_group;tma_issueLat;tma_l3_bound_group",
"MetricName": "tma_l3_hit_latency",
"MetricThreshold": "tma_l3_hit_latency > 0.1 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_info_memory_latency, tma_mem_latency",
+ "PublicDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_info_bottleneck_memory_latency, tma_mem_latency",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
{
"BriefDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs)",
- "MetricExpr": "DECODE.LCP / tma_info_clks",
+ "MetricExpr": "DECODE.LCP / tma_info_thread_clks",
"MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB",
"MetricName": "tma_lcp",
"MetricThreshold": "tma_lcp > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
- "PublicDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs). Using proper compiler flags or Intel Compiler by default will certainly avoid this. #Link: Optimization Guide about LCP BKMs. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_dsb_coverage, tma_info_dsb_misses, tma_info_iptb",
+ "PublicDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs). Using proper compiler flags or Intel Compiler by default will certainly avoid this. #Link: Optimization Guide about LCP BKMs. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
@@ -2052,7 +2043,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Load operations",
- "MetricExpr": "UOPS_DISPATCHED.PORT_2_3_10 / (3 * tma_info_core_clks)",
+ "MetricExpr": "UOPS_DISPATCHED.PORT_2_3_10 / (3 * tma_info_core_core_clks)",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_load_op_utilization",
"MetricThreshold": "tma_load_op_utilization > 0.6",
@@ -2071,7 +2062,7 @@
},
{
"BriefDescription": "This metric estimates the fraction of cycles where the Second-level TLB (STLB) was missed by load accesses, performing a hardware page walk",
- "MetricExpr": "DTLB_LOAD_MISSES.WALK_ACTIVE / tma_info_clks",
+ "MetricExpr": "DTLB_LOAD_MISSES.WALK_ACTIVE / tma_info_thread_clks",
"MetricGroup": "MemoryTLB;TopdownL5;tma_L5_group;tma_dtlb_load_group",
"MetricName": "tma_load_stlb_miss",
"MetricThreshold": "tma_load_stlb_miss > 0.05 & (tma_dtlb_load > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
@@ -2081,7 +2072,7 @@
{
"BriefDescription": "This metric represents fraction of cycles the CPU spent handling cache misses due to lock operations",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(16 * max(0, MEM_INST_RETIRED.LOCK_LOADS - L2_RQSTS.ALL_RFO) + MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES * (10 * L2_RQSTS.RFO_HIT + min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO))) / tma_info_clks",
+ "MetricExpr": "(16 * max(0, cpu_core@MEM_INST_RETIRED.LOCK_LOADS@ - cpu_core@L2_RQSTS.ALL_RFO@) + cpu_core@MEM_INST_RETIRED.LOCK_LOADS@ / cpu_core@MEM_INST_RETIRED.ALL_STORES@ * (10 * cpu_core@L2_RQSTS.RFO_HIT@ + min(cpu_core@CPU_CLK_UNHALTED.THREAD@, cpu_core@OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO@))) / tma_info_thread_clks",
"MetricGroup": "Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_l1_bound_group",
"MetricName": "tma_lock_latency",
"MetricThreshold": "tma_lock_latency > 0.2 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -2091,10 +2082,10 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to LSD (Loop Stream Detector) unit",
- "MetricExpr": "(LSD.CYCLES_ACTIVE - LSD.CYCLES_OK) / tma_info_core_clks / 2",
+ "MetricExpr": "(cpu_core@LSD.CYCLES_ACTIVE@ - cpu_core@LSD.CYCLES_OK@) / tma_info_core_core_clks / 2",
"MetricGroup": "FetchBW;LSD;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_lsd",
- "MetricThreshold": "tma_lsd > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 6 > 0.35)",
+ "MetricThreshold": "tma_lsd > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 6 > 0.35)",
"PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to LSD (Loop Stream Detector) unit. LSD typically does well sustaining Uop supply. However; in some rare cases; optimal uop-delivery could not be reached for small loops whose size (in terms of number of uops) does not suit well the LSD structure.",
"ScaleUnit": "100%",
"Unit": "cpu_core"
@@ -2112,27 +2103,27 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM)",
- "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, cpu_core@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@) / tma_info_clks",
+ "MetricExpr": "min(cpu_core@CPU_CLK_UNHALTED.THREAD@, cpu_core@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@) / tma_info_thread_clks",
"MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
"MetricName": "tma_mem_bandwidth",
"MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_dram_bw_use, tma_info_memory_bandwidth, tma_sq_full",
+ "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_bottleneck_memory_bandwidth, tma_info_system_dram_bw_use, tma_sq_full",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
{
"BriefDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM)",
- "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD) / tma_info_clks - tma_mem_bandwidth",
+ "MetricExpr": "min(cpu_core@CPU_CLK_UNHALTED.THREAD@, cpu_core@OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD@) / tma_info_thread_clks - tma_mem_bandwidth",
"MetricGroup": "MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
"MetricName": "tma_mem_latency",
"MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_info_memory_latency, tma_l3_hit_latency",
+ "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_info_bottleneck_memory_latency, tma_l3_hit_latency",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
{
"BriefDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck",
- "MetricExpr": "min(tma_backend_bound, LD_HEAD.ANY_AT_RET / tma_info_clks + tma_store_bound)",
+ "MetricExpr": "cpu_core@topdown\\-mem\\-bound@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_thread_slots",
"MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_memory_bound",
"MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2",
@@ -2143,7 +2134,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to LFENCE Instructions.",
- "MetricExpr": "13 * MISC2_RETIRED.LFENCE / tma_info_clks",
+ "MetricExpr": "13 * cpu_core@MISC2_RETIRED.LFENCE@ / tma_info_thread_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_serializing_operation_group",
"MetricName": "tma_memory_fence",
"MetricThreshold": "tma_memory_fence > 0.05 & (tma_serializing_operation > 0.1 & (tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))))",
@@ -2152,7 +2143,8 @@
},
{
"BriefDescription": "This metric represents fraction of slots where the CPU was retiring memory operations -- uops for memory load or store accesses.",
- "MetricExpr": "tma_light_operations * MEM_UOP_RETIRED.ANY / (tma_retiring * tma_info_slots)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "tma_light_operations * cpu_core@MEM_UOP_RETIRED.ANY@ / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
"MetricName": "tma_memory_operations",
"MetricThreshold": "tma_memory_operations > 0.1 & tma_light_operations > 0.6",
@@ -2161,7 +2153,7 @@
},
{
"BriefDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit",
- "MetricExpr": "UOPS_RETIRED.MS / tma_info_slots",
+ "MetricExpr": "UOPS_RETIRED.MS / tma_info_thread_slots",
"MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_heavy_operations_group;tma_issueMC;tma_issueMS",
"MetricName": "tma_microcode_sequencer",
"MetricThreshold": "tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1",
@@ -2171,27 +2163,27 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers as a result of Branch Misprediction at execution stage",
- "MetricExpr": "tma_branch_mispredicts / tma_bad_speculation * INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_clks",
+ "MetricExpr": "tma_branch_mispredicts / tma_bad_speculation * cpu_core@INT_MISC.CLEAR_RESTEER_CYCLES@ / tma_info_thread_clks",
"MetricGroup": "BadSpec;BrMispredicts;TopdownL4;tma_L4_group;tma_branch_resteers_group;tma_issueBM",
"MetricName": "tma_mispredicts_resteers",
"MetricThreshold": "tma_mispredicts_resteers > 0.05 & (tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15))",
- "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers as a result of Branch Misprediction at execution stage. Sample with: INT_MISC.CLEAR_RESTEER_CYCLES. Related metrics: tma_branch_mispredicts, tma_info_branch_misprediction_cost, tma_info_mispredictions",
+ "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers as a result of Branch Misprediction at execution stage. Sample with: INT_MISC.CLEAR_RESTEER_CYCLES. Related metrics: tma_branch_mispredicts, tma_info_bad_spec_branch_misprediction_cost, tma_info_bottleneck_mispredictions",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline)",
- "MetricExpr": "(IDQ.MITE_CYCLES_ANY - IDQ.MITE_CYCLES_OK) / tma_info_core_clks / 2",
+ "MetricExpr": "(cpu_core@IDQ.MITE_CYCLES_ANY@ - cpu_core@IDQ.MITE_CYCLES_OK@) / tma_info_core_core_clks / 2",
"MetricGroup": "DSBmiss;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_mite",
- "MetricThreshold": "tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 6 > 0.35)",
+ "MetricThreshold": "tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 6 > 0.35)",
"PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline). This pipeline is used for code that was not pre-cached in the DSB or LSD. For example; inefficiencies due to asymmetric decoders; use of long immediate or LCP can manifest as MITE fetch bandwidth bottleneck. Sample with: FRONTEND_RETIRED.ANY_DSB_MISS",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
{
"BriefDescription": "The Mixing_Vectors metric gives the percentage of injected blend uops out of all uops issued",
- "MetricExpr": "160 * ASSISTS.SSE_AVX_MIX / tma_info_clks",
+ "MetricExpr": "160 * cpu_core@ASSISTS.SSE_AVX_MIX@ / tma_info_thread_clks",
"MetricGroup": "TopdownL5;tma_L5_group;tma_issueMV;tma_ports_utilized_0_group",
"MetricName": "tma_mixing_vectors",
"MetricThreshold": "tma_mixing_vectors > 0.05",
@@ -2201,7 +2193,7 @@
},
{
"BriefDescription": "This metric estimates the fraction of cycles when the CPU was stalled due to switches of uop delivery to the Microcode Sequencer (MS)",
- "MetricExpr": "3 * cpu_core@UOPS_RETIRED.MS\\,cmask\\=1\\,edge@ / (tma_retiring * tma_info_slots / UOPS_ISSUED.ANY) / tma_info_clks",
+ "MetricExpr": "3 * cpu_core@UOPS_RETIRED.MS\\,cmask\\=1\\,edge@ / (tma_retiring * tma_info_thread_slots / cpu_core@UOPS_ISSUED.ANY@) / tma_info_thread_clks",
"MetricGroup": "FetchLat;MicroSeq;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueMC;tma_issueMS;tma_issueMV;tma_issueSO",
"MetricName": "tma_ms_switches",
"MetricThreshold": "tma_ms_switches > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -2211,7 +2203,7 @@
},
{
"BriefDescription": "This metric represents fraction of slots where the CPU was retiring branch instructions that were not fused",
- "MetricExpr": "tma_light_operations * (BR_INST_RETIRED.ALL_BRANCHES - INST_RETIRED.MACRO_FUSED) / (tma_retiring * tma_info_slots)",
+ "MetricExpr": "tma_light_operations * (cpu_core@BR_INST_RETIRED.ALL_BRANCHES@ - cpu_core@INST_RETIRED.MACRO_FUSED@) / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
"MetricName": "tma_non_fused_branches",
"MetricThreshold": "tma_non_fused_branches > 0.1 & tma_light_operations > 0.6",
@@ -2221,7 +2213,7 @@
},
{
"BriefDescription": "This metric represents fraction of slots where the CPU was retiring NOP (no op) instructions",
- "MetricExpr": "tma_light_operations * INST_RETIRED.NOP / (tma_retiring * tma_info_slots)",
+ "MetricExpr": "tma_light_operations * cpu_core@INST_RETIRED.NOP@ / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
"MetricName": "tma_nop_instructions",
"MetricThreshold": "tma_nop_instructions > 0.1 & tma_light_operations > 0.6",
@@ -2231,6 +2223,7 @@
},
{
"BriefDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_int_operations + tma_memory_operations + tma_fused_instructions + tma_non_fused_branches + tma_nop_instructions))",
"MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
"MetricName": "tma_other_light_ops",
@@ -2241,7 +2234,7 @@
},
{
"BriefDescription": "This metric roughly estimates fraction of slots the CPU retired uops as a result of handing Page Faults",
- "MetricExpr": "99 * ASSISTS.PAGE_FAULT / tma_info_slots",
+ "MetricExpr": "99 * cpu_core@ASSISTS.PAGE_FAULT@ / tma_info_thread_slots",
"MetricGroup": "TopdownL5;tma_L5_group;tma_assists_group",
"MetricName": "tma_page_faults",
"MetricThreshold": "tma_page_faults > 0.05",
@@ -2251,7 +2244,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 0 ([SNB+] ALU; [HSW+] ALU and 2nd branch)",
- "MetricExpr": "UOPS_DISPATCHED.PORT_0 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED.PORT_0 / tma_info_core_core_clks",
"MetricGroup": "Compute;TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_0",
"MetricThreshold": "tma_port_0 > 0.6",
@@ -2261,7 +2254,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 1 (ALU)",
- "MetricExpr": "UOPS_DISPATCHED.PORT_1 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED.PORT_1 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_1",
"MetricThreshold": "tma_port_1 > 0.6",
@@ -2271,7 +2264,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+]Primary Branch and simple ALU)",
- "MetricExpr": "UOPS_DISPATCHED.PORT_6 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED.PORT_6 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_6",
"MetricThreshold": "tma_port_6 > 0.6",
@@ -2281,7 +2274,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles the CPU performance was potentially limited due to Core computation issues (non divider-related)",
- "MetricExpr": "((cpu_core@EXE_ACTIVITY.3_PORTS_UTIL\\,umask\\=0x80@ + tma_serializing_operation * (CYCLE_ACTIVITY.STALLS_TOTAL - EXE_ACTIVITY.BOUND_ON_LOADS) + (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * cpu_core@EXE_ACTIVITY.2_PORTS_UTIL\\,umask\\=0xc@)) / tma_info_clks if ARITH.DIV_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - EXE_ACTIVITY.BOUND_ON_LOADS else (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * cpu_core@EXE_ACTIVITY.2_PORTS_UTIL\\,umask\\=0xc@) / tma_info_clks)",
+ "MetricExpr": "((cpu_core@EXE_ACTIVITY.3_PORTS_UTIL\\,umask\\=0x80@ + tma_serializing_operation * (cpu_core@CYCLE_ACTIVITY.STALLS_TOTAL@ - cpu_core@EXE_ACTIVITY.BOUND_ON_LOADS@) + (cpu_core@EXE_ACTIVITY.1_PORTS_UTIL@ + tma_retiring * cpu_core@EXE_ACTIVITY.2_PORTS_UTIL\\,umask\\=0xc@)) / tma_info_thread_clks if cpu_core@ARITH.DIV_ACTIVE@ < cpu_core@CYCLE_ACTIVITY.STALLS_TOTAL@ - cpu_core@EXE_ACTIVITY.BOUND_ON_LOADS@ else (cpu_core@EXE_ACTIVITY.1_PORTS_UTIL@ + tma_retiring * cpu_core@EXE_ACTIVITY.2_PORTS_UTIL\\,umask\\=0xc@) / tma_info_thread_clks)",
"MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group",
"MetricName": "tma_ports_utilization",
"MetricThreshold": "tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
@@ -2291,7 +2284,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed no uops on any execution port (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "cpu_core@EXE_ACTIVITY.3_PORTS_UTIL\\,umask\\=0x80@ / tma_info_clks + tma_serializing_operation * (CYCLE_ACTIVITY.STALLS_TOTAL - EXE_ACTIVITY.BOUND_ON_LOADS) / tma_info_clks",
+ "MetricExpr": "cpu_core@EXE_ACTIVITY.3_PORTS_UTIL\\,umask\\=0x80@ / tma_info_thread_clks + tma_serializing_operation * (cpu_core@CYCLE_ACTIVITY.STALLS_TOTAL@ - cpu_core@EXE_ACTIVITY.BOUND_ON_LOADS@) / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_0",
"MetricThreshold": "tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -2301,7 +2294,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where the CPU executed total of 1 uop per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "EXE_ACTIVITY.1_PORTS_UTIL / tma_info_clks",
+ "MetricExpr": "EXE_ACTIVITY.1_PORTS_UTIL / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issueL1;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_1",
"MetricThreshold": "tma_ports_utilized_1 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -2311,7 +2304,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "EXE_ACTIVITY.2_PORTS_UTIL / tma_info_clks",
+ "MetricExpr": "EXE_ACTIVITY.2_PORTS_UTIL / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_2",
"MetricThreshold": "tma_ports_utilized_2 > 0.15 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -2321,7 +2314,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 3 or more uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "UOPS_EXECUTED.CYCLES_GE_3 / tma_info_clks",
+ "MetricExpr": "UOPS_EXECUTED.CYCLES_GE_3 / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_3m",
"MetricThreshold": "tma_ports_utilized_3m > 0.7 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -2331,18 +2324,19 @@
},
{
"BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
- "MetricExpr": "topdown\\-retiring / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0 * tma_info_slots",
- "MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
+ "DefaultMetricgroupName": "TopdownL1",
+ "MetricExpr": "cpu_core@topdown\\-retiring@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_thread_slots",
+ "MetricGroup": "Default;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1",
- "MetricgroupNoGroup": "TopdownL1",
+ "MetricgroupNoGroup": "TopdownL1;Default",
"PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. Sample with: UOPS_RETIRED.SLOTS",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU issue-pipeline was stalled due to serializing operations",
- "MetricExpr": "RESOURCE_STALLS.SCOREBOARD / tma_info_clks",
+ "MetricExpr": "RESOURCE_STALLS.SCOREBOARD / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL5;tma_L5_group;tma_issueSO;tma_ports_utilized_0_group",
"MetricName": "tma_serializing_operation",
"MetricThreshold": "tma_serializing_operation > 0.1 & (tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)))",
@@ -2352,7 +2346,7 @@
},
{
"BriefDescription": "This metric represents Shuffle (cross \"vector lane\" data transfers) uops fraction the CPU has retired.",
- "MetricExpr": "INT_VEC_RETIRED.SHUFFLES / (tma_retiring * tma_info_slots)",
+ "MetricExpr": "INT_VEC_RETIRED.SHUFFLES / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "HPC;Pipeline;TopdownL4;tma_L4_group;tma_int_operations_group",
"MetricName": "tma_shuffles",
"MetricThreshold": "tma_shuffles > 0.1 & (tma_int_operations > 0.1 & tma_light_operations > 0.6)",
@@ -2361,7 +2355,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to PAUSE Instructions",
- "MetricExpr": "CPU_CLK_UNHALTED.PAUSE / tma_info_clks",
+ "MetricExpr": "CPU_CLK_UNHALTED.PAUSE / tma_info_thread_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_serializing_operation_group",
"MetricName": "tma_slow_pause",
"MetricThreshold": "tma_slow_pause > 0.05 & (tma_serializing_operation > 0.1 & (tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))))",
@@ -2371,7 +2365,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles handling memory load split accesses - load that cross 64-byte cache line boundary",
- "MetricExpr": "tma_info_load_miss_real_latency * LD_BLOCKS.NO_SR / tma_info_clks",
+ "MetricExpr": "tma_info_memory_load_miss_real_latency * cpu_core@LD_BLOCKS.NO_SR@ / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_split_loads",
"MetricThreshold": "tma_split_loads > 0.2 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -2381,7 +2375,7 @@
},
{
"BriefDescription": "This metric represents rate of split store accesses",
- "MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_clks",
+ "MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_core_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group",
"MetricName": "tma_split_stores",
"MetricThreshold": "tma_split_stores > 0.2 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -2391,17 +2385,17 @@
},
{
"BriefDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors)",
- "MetricExpr": "(XQ.FULL_CYCLES + L1D_PEND_MISS.L2_STALLS) / tma_info_clks",
+ "MetricExpr": "(cpu_core@XQ.FULL_CYCLES@ + cpu_core@L1D_PEND_MISS.L2_STALLS@) / tma_info_thread_clks",
"MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueBW;tma_l3_bound_group",
"MetricName": "tma_sq_full",
"MetricThreshold": "tma_sq_full > 0.3 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_fb_full, tma_info_dram_bw_use, tma_info_memory_bandwidth, tma_mem_bandwidth",
+ "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_fb_full, tma_info_bottleneck_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
{
"BriefDescription": "This metric estimates how often CPU was stalled due to RFO store memory accesses; RFO store issue a read-for-ownership request before the write",
- "MetricExpr": "EXE_ACTIVITY.BOUND_ON_STORES / tma_info_clks",
+ "MetricExpr": "EXE_ACTIVITY.BOUND_ON_STORES / tma_info_thread_clks",
"MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_store_bound",
"MetricThreshold": "tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -2411,7 +2405,7 @@
},
{
"BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores",
- "MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_clks",
+ "MetricExpr": "13 * cpu_core@LD_BLOCKS.STORE_FORWARD@ / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_store_fwd_blk",
"MetricThreshold": "tma_store_fwd_blk > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -2421,7 +2415,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles the CPU spent handling L1D store misses",
- "MetricExpr": "(MEM_STORE_RETIRED.L2_HIT * 10 * (1 - MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES) + (1 - MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES) * min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO)) / tma_info_clks",
+ "MetricExpr": "(cpu_core@MEM_STORE_RETIRED.L2_HIT@ * 10 * (1 - cpu_core@MEM_INST_RETIRED.LOCK_LOADS@ / cpu_core@MEM_INST_RETIRED.ALL_STORES@) + (1 - cpu_core@MEM_INST_RETIRED.LOCK_LOADS@ / cpu_core@MEM_INST_RETIRED.ALL_STORES@) * min(cpu_core@CPU_CLK_UNHALTED.THREAD@, cpu_core@OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO@)) / tma_info_thread_clks",
"MetricGroup": "MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_issueSL;tma_store_bound_group",
"MetricName": "tma_store_latency",
"MetricThreshold": "tma_store_latency > 0.1 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -2431,7 +2425,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Store operations",
- "MetricExpr": "(UOPS_DISPATCHED.PORT_4_9 + UOPS_DISPATCHED.PORT_7_8) / (4 * tma_info_core_clks)",
+ "MetricExpr": "(cpu_core@UOPS_DISPATCHED.PORT_4_9@ + cpu_core@UOPS_DISPATCHED.PORT_7_8@) / (4 * tma_info_core_core_clks)",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_store_op_utilization",
"MetricThreshold": "tma_store_op_utilization > 0.6",
@@ -2450,7 +2444,7 @@
},
{
"BriefDescription": "This metric estimates the fraction of cycles where the STLB was missed by store accesses, performing a hardware page walk",
- "MetricExpr": "DTLB_STORE_MISSES.WALK_ACTIVE / tma_info_core_clks",
+ "MetricExpr": "DTLB_STORE_MISSES.WALK_ACTIVE / tma_info_core_core_clks",
"MetricGroup": "MemoryTLB;TopdownL5;tma_L5_group;tma_dtlb_store_group",
"MetricName": "tma_store_stlb_miss",
"MetricThreshold": "tma_store_stlb_miss > 0.05 & (tma_dtlb_store > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
@@ -2459,7 +2453,7 @@
},
{
"BriefDescription": "This metric estimates how often CPU was stalled due to Streaming store memory accesses; Streaming store optimize out a read request required by RFO stores",
- "MetricExpr": "9 * OCR.STREAMING_WR.ANY_RESPONSE / tma_info_clks",
+ "MetricExpr": "9 * cpu_core@OCR.STREAMING_WR.ANY_RESPONSE@ / tma_info_thread_clks",
"MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueSmSt;tma_store_bound_group",
"MetricName": "tma_streaming_stores",
"MetricThreshold": "tma_streaming_stores > 0.2 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -2469,7 +2463,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to new branch address clears",
- "MetricExpr": "INT_MISC.UNKNOWN_BRANCH_CYCLES / tma_info_clks",
+ "MetricExpr": "INT_MISC.UNKNOWN_BRANCH_CYCLES / tma_info_thread_clks",
"MetricGroup": "BigFoot;FetchLat;TopdownL4;tma_L4_group;tma_branch_resteers_group",
"MetricName": "tma_unknown_branches",
"MetricThreshold": "tma_unknown_branches > 0.05 & (tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15))",
@@ -2479,7 +2473,7 @@
},
{
"BriefDescription": "This metric serves as an approximation of legacy x87 usage",
- "MetricExpr": "tma_retiring * UOPS_EXECUTED.X87 / UOPS_EXECUTED.THREAD",
+ "MetricExpr": "tma_retiring * cpu_core@UOPS_EXECUTED.X87@ / UOPS_EXECUTED.THREAD",
"MetricGroup": "Compute;TopdownL4;tma_L4_group;tma_fp_arith_group",
"MetricName": "tma_x87_use",
"MetricThreshold": "tma_x87_use > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6)",
diff --git a/tools/perf/pmu-events/arch/x86/alderlake/cache.json b/tools/perf/pmu-events/arch/x86/alderlake/cache.json
index 51770416bcc2..b3d7f8fb50df 100644
--- a/tools/perf/pmu-events/arch/x86/alderlake/cache.json
+++ b/tools/perf/pmu-events/arch/x86/alderlake/cache.json
@@ -1018,6 +1018,15 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts bus locks, accounts for cache line split locks and UC locks.",
+ "EventCode": "0x2c",
+ "EventName": "SQ_MISC.BUS_LOCK",
+ "PublicDescription": "Counts the more expensive bus lock needed to enforce cache coherency for certain memory accesses that need to be done atomically. Can be created by issuing an atomic instruction (via the LOCK prefix) which causes a cache line split or accesses uncacheable memory.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x10",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Number of PREFETCHNTA instructions executed.",
"EventCode": "0x40",
"EventName": "SW_PREFETCH_ACCESS.NTA",
diff --git a/tools/perf/pmu-events/arch/x86/alderlake/memory.json b/tools/perf/pmu-events/arch/x86/alderlake/memory.json
index 55827b276e6e..73d92d5c9f9d 100644
--- a/tools/perf/pmu-events/arch/x86/alderlake/memory.json
+++ b/tools/perf/pmu-events/arch/x86/alderlake/memory.json
@@ -93,19 +93,21 @@
"Unit": "cpu_core"
},
{
- "BriefDescription": "MEMORY_ACTIVITY.STALLS_L2_MISS",
+ "BriefDescription": "Execution stalls while L2 cache miss demand cacheable load request is outstanding.",
"CounterMask": "5",
"EventCode": "0x47",
"EventName": "MEMORY_ACTIVITY.STALLS_L2_MISS",
+ "PublicDescription": "Execution stalls while L2 cache miss demand cacheable load request is outstanding (will not count for uncacheable demand requests e.g. bus lock).",
"SampleAfterValue": "1000003",
"UMask": "0x5",
"Unit": "cpu_core"
},
{
- "BriefDescription": "MEMORY_ACTIVITY.STALLS_L3_MISS",
+ "BriefDescription": "Execution stalls while L3 cache miss demand cacheable load request is outstanding.",
"CounterMask": "9",
"EventCode": "0x47",
"EventName": "MEMORY_ACTIVITY.STALLS_L3_MISS",
+ "PublicDescription": "Execution stalls while L3 cache miss demand cacheable load request is outstanding (will not count for uncacheable demand requests e.g. bus lock).",
"SampleAfterValue": "1000003",
"UMask": "0x9",
"Unit": "cpu_core"
diff --git a/tools/perf/pmu-events/arch/x86/alderlake/metricgroups.json b/tools/perf/pmu-events/arch/x86/alderlake/metricgroups.json
new file mode 100644
index 000000000000..516eb0f93f02
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/alderlake/metricgroups.json
@@ -0,0 +1,122 @@
+{
+ "Backend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Bad": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BadSpec": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BigFoot": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BrMispredicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Branches": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "CacheMisses": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "CodeGen": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Compute": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Cor": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "DSB": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "DSBmiss": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "DataSharing": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Fed": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FetchBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FetchLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Flops": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FpScalar": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FpVector": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Frontend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "HPC": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "IcMiss": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "InsType": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "IntVector": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "L2Evicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "LSD": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MachineClears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryBound": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryTLB": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Memory_BW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Memory_Lat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MicroSeq": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "OS": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Offcore": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "PGO": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Pipeline": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "PortsUtil": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Power": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Prefetches": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Ret": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Retire": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "SMT": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Server": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Snoop": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "SoC": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Summary": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TmaL1": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TmaL2": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TmaL3mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TopdownL1": "Metrics for top-down breakdown at level 1",
+ "TopdownL2": "Metrics for top-down breakdown at level 2",
+ "TopdownL3": "Metrics for top-down breakdown at level 3",
+ "TopdownL4": "Metrics for top-down breakdown at level 4",
+ "TopdownL5": "Metrics for top-down breakdown at level 5",
+ "TopdownL6": "Metrics for top-down breakdown at level 6",
+ "tma_L1_group": "Metrics for top-down breakdown at level 1",
+ "tma_L2_group": "Metrics for top-down breakdown at level 2",
+ "tma_L3_group": "Metrics for top-down breakdown at level 3",
+ "tma_L4_group": "Metrics for top-down breakdown at level 4",
+ "tma_L5_group": "Metrics for top-down breakdown at level 5",
+ "tma_L6_group": "Metrics for top-down breakdown at level 6",
+ "tma_alu_op_utilization_group": "Metrics contributing to tma_alu_op_utilization category",
+ "tma_assists_group": "Metrics contributing to tma_assists category",
+ "tma_backend_bound_aux_group": "Metrics contributing to tma_backend_bound_aux category",
+ "tma_backend_bound_group": "Metrics contributing to tma_backend_bound category",
+ "tma_bad_speculation_group": "Metrics contributing to tma_bad_speculation category",
+ "tma_base_group": "Metrics contributing to tma_base category",
+ "tma_branch_resteers_group": "Metrics contributing to tma_branch_resteers category",
+ "tma_core_bound_group": "Metrics contributing to tma_core_bound category",
+ "tma_dram_bound_group": "Metrics contributing to tma_dram_bound category",
+ "tma_dtlb_load_group": "Metrics contributing to tma_dtlb_load category",
+ "tma_dtlb_store_group": "Metrics contributing to tma_dtlb_store category",
+ "tma_fetch_bandwidth_group": "Metrics contributing to tma_fetch_bandwidth category",
+ "tma_fetch_latency_group": "Metrics contributing to tma_fetch_latency category",
+ "tma_fp_arith_group": "Metrics contributing to tma_fp_arith category",
+ "tma_fp_vector_group": "Metrics contributing to tma_fp_vector category",
+ "tma_frontend_bound_group": "Metrics contributing to tma_frontend_bound category",
+ "tma_heavy_operations_group": "Metrics contributing to tma_heavy_operations category",
+ "tma_int_operations_group": "Metrics contributing to tma_int_operations category",
+ "tma_issue2P": "Metrics related by the issue $issue2P",
+ "tma_issueBC": "Metrics related by the issue $issueBC",
+ "tma_issueBM": "Metrics related by the issue $issueBM",
+ "tma_issueBW": "Metrics related by the issue $issueBW",
+ "tma_issueD0": "Metrics related by the issue $issueD0",
+ "tma_issueFB": "Metrics related by the issue $issueFB",
+ "tma_issueFL": "Metrics related by the issue $issueFL",
+ "tma_issueL1": "Metrics related by the issue $issueL1",
+ "tma_issueLat": "Metrics related by the issue $issueLat",
+ "tma_issueMC": "Metrics related by the issue $issueMC",
+ "tma_issueMS": "Metrics related by the issue $issueMS",
+ "tma_issueMV": "Metrics related by the issue $issueMV",
+ "tma_issueRFO": "Metrics related by the issue $issueRFO",
+ "tma_issueSL": "Metrics related by the issue $issueSL",
+ "tma_issueSO": "Metrics related by the issue $issueSO",
+ "tma_issueSmSt": "Metrics related by the issue $issueSmSt",
+ "tma_issueSpSt": "Metrics related by the issue $issueSpSt",
+ "tma_issueSyncxn": "Metrics related by the issue $issueSyncxn",
+ "tma_issueTLB": "Metrics related by the issue $issueTLB",
+ "tma_l1_bound_group": "Metrics contributing to tma_l1_bound category",
+ "tma_l3_bound_group": "Metrics contributing to tma_l3_bound category",
+ "tma_light_operations_group": "Metrics contributing to tma_light_operations category",
+ "tma_load_op_utilization_group": "Metrics contributing to tma_load_op_utilization category",
+ "tma_machine_clears_group": "Metrics contributing to tma_machine_clears category",
+ "tma_mem_latency_group": "Metrics contributing to tma_mem_latency category",
+ "tma_mem_scheduler_group": "Metrics contributing to tma_mem_scheduler category",
+ "tma_memory_bound_group": "Metrics contributing to tma_memory_bound category",
+ "tma_microcode_sequencer_group": "Metrics contributing to tma_microcode_sequencer category",
+ "tma_mite_group": "Metrics contributing to tma_mite category",
+ "tma_nuke_group": "Metrics contributing to tma_nuke category",
+ "tma_ports_utilization_group": "Metrics contributing to tma_ports_utilization category",
+ "tma_ports_utilized_0_group": "Metrics contributing to tma_ports_utilized_0 category",
+ "tma_ports_utilized_3m_group": "Metrics contributing to tma_ports_utilized_3m category",
+ "tma_resource_bound_group": "Metrics contributing to tma_resource_bound category",
+ "tma_retiring_group": "Metrics contributing to tma_retiring category",
+ "tma_serializing_operation_group": "Metrics contributing to tma_serializing_operation category",
+ "tma_store_bound_group": "Metrics contributing to tma_store_bound category",
+ "tma_store_op_utilization_group": "Metrics contributing to tma_store_op_utilization category"
+}
diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json b/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json
index 0402adbf7d92..0f1628d698da 100644
--- a/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json
@@ -86,7 +86,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to certain allocation restrictions.",
- "MetricExpr": "TOPDOWN_BE_BOUND.ALLOC_RESTRICTIONS / tma_info_slots",
+ "MetricExpr": "TOPDOWN_BE_BOUND.ALLOC_RESTRICTIONS / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group",
"MetricName": "tma_alloc_restriction",
"MetricThreshold": "tma_alloc_restriction > 0.1",
@@ -94,37 +94,40 @@
},
{
"BriefDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls",
- "MetricExpr": "TOPDOWN_BE_BOUND.ALL / tma_info_slots",
- "MetricGroup": "TopdownL1;tma_L1_group",
+ "DefaultMetricgroupName": "TopdownL1",
+ "MetricExpr": "TOPDOWN_BE_BOUND.ALL / tma_info_core_slots",
+ "MetricGroup": "Default;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
"MetricThreshold": "tma_backend_bound > 0.1",
- "MetricgroupNoGroup": "TopdownL1",
+ "MetricgroupNoGroup": "TopdownL1;Default",
"PublicDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls. Note that uops must be available for consumption in order for this event to count. If a uop is not available (IQ is empty), this event will not count. The rest of these subevents count backend stalls, in cycles, due to an outstanding request which is memory bound vs core bound. The subevents are not slot based events and therefore can not be precisely added or subtracted from the Backend_Bound_Aux subevents which are slot based.",
"ScaleUnit": "100%"
},
{
"BriefDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls",
+ "DefaultMetricgroupName": "TopdownL1",
"MetricExpr": "tma_backend_bound",
- "MetricGroup": "TopdownL1;tma_L1_group",
+ "MetricGroup": "Default;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound_aux",
"MetricThreshold": "tma_backend_bound_aux > 0.2",
- "MetricgroupNoGroup": "TopdownL1",
+ "MetricgroupNoGroup": "TopdownL1;Default",
"PublicDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls. Note that UOPS must be available for consumption in order for this event to count. If a uop is not available (IQ is empty), this event will not count. All of these subevents count backend stalls, in slots, due to a resource limitation. These are not cycle based events and therefore can not be precisely added or subtracted from the Backend_Bound subevents which are cycle based. These subevents are supplementary to Backend_Bound and can be used to analyze results from a resource perspective at allocation.",
"ScaleUnit": "100%"
},
{
"BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear",
- "MetricExpr": "(tma_info_slots - (TOPDOWN_FE_BOUND.ALL + TOPDOWN_BE_BOUND.ALL + TOPDOWN_RETIRING.ALL)) / tma_info_slots",
- "MetricGroup": "TopdownL1;tma_L1_group",
+ "DefaultMetricgroupName": "TopdownL1",
+ "MetricExpr": "(tma_info_core_slots - (TOPDOWN_FE_BOUND.ALL + TOPDOWN_BE_BOUND.ALL + TOPDOWN_RETIRING.ALL)) / tma_info_core_slots",
+ "MetricGroup": "Default;TopdownL1;tma_L1_group",
"MetricName": "tma_bad_speculation",
"MetricThreshold": "tma_bad_speculation > 0.15",
- "MetricgroupNoGroup": "TopdownL1",
+ "MetricgroupNoGroup": "TopdownL1;Default",
"PublicDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear. Only issue slots wasted due to fast nukes such as memory ordering nukes are counted. Other nukes are not accounted for. Counts all issue slots blocked during this recovery window including relevant microcode flows and while uops are not yet available in the instruction queue (IQ). Also includes the issue slots that were consumed by the backend but were thrown away because they were younger than the mispredict or machine clear.",
"ScaleUnit": "100%"
},
{
"BriefDescription": "Counts the number of uops that are not from the microsequencer.",
- "MetricExpr": "(TOPDOWN_RETIRING.ALL - UOPS_RETIRED.MS) / tma_info_slots",
+ "MetricExpr": "(TOPDOWN_RETIRING.ALL - UOPS_RETIRED.MS) / tma_info_core_slots",
"MetricGroup": "TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_base",
"MetricThreshold": "tma_base > 0.6",
@@ -133,7 +136,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to BACLEARS, which occurs when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend",
- "MetricExpr": "TOPDOWN_FE_BOUND.BRANCH_DETECT / tma_info_slots",
+ "MetricExpr": "TOPDOWN_FE_BOUND.BRANCH_DETECT / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_branch_detect",
"MetricThreshold": "tma_branch_detect > 0.05",
@@ -142,7 +145,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to branch mispredicts.",
- "MetricExpr": "TOPDOWN_BAD_SPECULATION.MISPREDICT / tma_info_slots",
+ "MetricExpr": "TOPDOWN_BAD_SPECULATION.MISPREDICT / tma_info_core_slots",
"MetricGroup": "TopdownL2;tma_L2_group;tma_bad_speculation_group",
"MetricName": "tma_branch_mispredicts",
"MetricThreshold": "tma_branch_mispredicts > 0.05",
@@ -151,7 +154,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to BTCLEARS, which occurs when the Branch Target Buffer (BTB) predicts a taken branch.",
- "MetricExpr": "TOPDOWN_FE_BOUND.BRANCH_RESTEER / tma_info_slots",
+ "MetricExpr": "TOPDOWN_FE_BOUND.BRANCH_RESTEER / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_branch_resteer",
"MetricThreshold": "tma_branch_resteer > 0.05",
@@ -159,7 +162,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to the microcode sequencer (MS).",
- "MetricExpr": "TOPDOWN_FE_BOUND.CISC / tma_info_slots",
+ "MetricExpr": "TOPDOWN_FE_BOUND.CISC / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_cisc",
"MetricThreshold": "tma_cisc > 0.05",
@@ -176,7 +179,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to decode stalls.",
- "MetricExpr": "TOPDOWN_FE_BOUND.DECODE / tma_info_slots",
+ "MetricExpr": "TOPDOWN_FE_BOUND.DECODE / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_decode",
"MetricThreshold": "tma_decode > 0.05",
@@ -193,7 +196,7 @@
{
"BriefDescription": "Counts the number of cycles the core is stalled due to a demand load miss which hit in DRAM or MMIO (Non-DRAM).",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "MEM_BOUND_STALLS.LOAD_DRAM_HIT / tma_info_clks - MEM_BOUND_STALLS_AT_RET_CORRECTION * MEM_BOUND_STALLS.LOAD_DRAM_HIT / MEM_BOUND_STALLS.LOAD",
+ "MetricExpr": "MEM_BOUND_STALLS.LOAD_DRAM_HIT / tma_info_core_clks - max((MEM_BOUND_STALLS.LOAD - LD_HEAD.L1_MISS_AT_RET) / tma_info_core_clks, 0) * MEM_BOUND_STALLS.LOAD_DRAM_HIT / MEM_BOUND_STALLS.LOAD",
"MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_dram_bound",
"MetricThreshold": "tma_dram_bound > 0.1",
@@ -201,7 +204,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to a machine clear classified as a fast nuke due to memory ordering, memory disambiguation and memory renaming.",
- "MetricExpr": "TOPDOWN_BAD_SPECULATION.FASTNUKE / tma_info_slots",
+ "MetricExpr": "TOPDOWN_BAD_SPECULATION.FASTNUKE / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_machine_clears_group",
"MetricName": "tma_fast_nuke",
"MetricThreshold": "tma_fast_nuke > 0.05",
@@ -209,7 +212,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to frontend bandwidth restrictions due to decode, predecode, cisc, and other limitations.",
- "MetricExpr": "TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH / tma_info_slots",
+ "MetricExpr": "TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH / tma_info_core_slots",
"MetricGroup": "TopdownL2;tma_L2_group;tma_frontend_bound_group",
"MetricName": "tma_fetch_bandwidth",
"MetricThreshold": "tma_fetch_bandwidth > 0.1",
@@ -218,7 +221,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to frontend bandwidth restrictions due to decode, predecode, cisc, and other limitations.",
- "MetricExpr": "TOPDOWN_FE_BOUND.FRONTEND_LATENCY / tma_info_slots",
+ "MetricExpr": "TOPDOWN_FE_BOUND.FRONTEND_LATENCY / tma_info_core_slots",
"MetricGroup": "TopdownL2;tma_L2_group;tma_frontend_bound_group",
"MetricName": "tma_fetch_latency",
"MetricThreshold": "tma_fetch_latency > 0.15",
@@ -235,7 +238,7 @@
},
{
"BriefDescription": "Counts the number of floating point divide operations per uop.",
- "MetricExpr": "UOPS_RETIRED.FPDIV / tma_info_slots",
+ "MetricExpr": "UOPS_RETIRED.FPDIV / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_base_group",
"MetricName": "tma_fpdiv_uops",
"MetricThreshold": "tma_fpdiv_uops > 0.2",
@@ -243,227 +246,202 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to frontend stalls.",
- "MetricExpr": "TOPDOWN_FE_BOUND.ALL / tma_info_slots",
- "MetricGroup": "TopdownL1;tma_L1_group",
+ "DefaultMetricgroupName": "TopdownL1",
+ "MetricExpr": "TOPDOWN_FE_BOUND.ALL / tma_info_core_slots",
+ "MetricGroup": "Default;TopdownL1;tma_L1_group",
"MetricName": "tma_frontend_bound",
"MetricThreshold": "tma_frontend_bound > 0.2",
- "MetricgroupNoGroup": "TopdownL1",
+ "MetricgroupNoGroup": "TopdownL1;Default",
"ScaleUnit": "100%"
},
{
"BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to instruction cache misses.",
- "MetricExpr": "TOPDOWN_FE_BOUND.ICACHE / tma_info_slots",
+ "MetricExpr": "TOPDOWN_FE_BOUND.ICACHE / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_icache_misses",
"MetricThreshold": "tma_icache_misses > 0.05",
"ScaleUnit": "100%"
},
{
- "BriefDescription": "Percentage of total non-speculative loads with a address aliasing block",
- "MetricExpr": "100 * LD_BLOCKS.4K_ALIAS / MEM_UOPS_RETIRED.ALL_LOADS",
- "MetricName": "tma_info_address_alias_blocks"
- },
- {
- "BriefDescription": "Ratio of all branches which mispredict",
- "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.ALL_BRANCHES",
- "MetricGroup": " ",
- "MetricName": "tma_info_branch_mispredict_ratio"
- },
- {
- "BriefDescription": "Ratio between Mispredicted branches and unknown branches",
- "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / BACLEARS.ANY",
- "MetricGroup": " ",
- "MetricName": "tma_info_branch_mispredict_to_unknown_branch_ratio"
- },
- {
"BriefDescription": "",
"MetricExpr": "CPU_CLK_UNHALTED.CORE",
- "MetricGroup": " ",
- "MetricName": "tma_info_clks"
+ "MetricName": "tma_info_core_clks"
},
{
"BriefDescription": "",
"MetricExpr": "CPU_CLK_UNHALTED.CORE_P",
- "MetricGroup": " ",
- "MetricName": "tma_info_clks_p"
+ "MetricName": "tma_info_core_clks_p"
},
{
"BriefDescription": "Cycles Per Instruction",
- "MetricExpr": "tma_info_clks / INST_RETIRED.ANY",
- "MetricGroup": " ",
- "MetricName": "tma_info_cpi"
- },
- {
- "BriefDescription": "Average CPU Utilization",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
- "MetricGroup": " ",
- "MetricName": "tma_info_cpu_utilization"
- },
- {
- "BriefDescription": "Cycle cost per DRAM hit",
- "MetricExpr": "MEM_BOUND_STALLS.LOAD_DRAM_HIT / MEM_LOAD_UOPS_RETIRED.DRAM_HIT",
- "MetricGroup": " ",
- "MetricName": "tma_info_cycles_per_demand_load_dram_hit"
- },
- {
- "BriefDescription": "Cycle cost per L2 hit",
- "MetricExpr": "MEM_BOUND_STALLS.LOAD_L2_HIT / MEM_LOAD_UOPS_RETIRED.L2_HIT",
- "MetricGroup": " ",
- "MetricName": "tma_info_cycles_per_demand_load_l2_hit"
+ "MetricExpr": "tma_info_core_clks / INST_RETIRED.ANY",
+ "MetricName": "tma_info_core_cpi"
},
{
- "BriefDescription": "Cycle cost per LLC hit",
- "MetricExpr": "MEM_BOUND_STALLS.LOAD_LLC_HIT / MEM_LOAD_UOPS_RETIRED.L3_HIT",
- "MetricGroup": " ",
- "MetricName": "tma_info_cycles_per_demand_load_l3_hit"
+ "BriefDescription": "Instructions Per Cycle",
+ "MetricExpr": "INST_RETIRED.ANY / tma_info_core_clks",
+ "MetricName": "tma_info_core_ipc"
},
{
- "BriefDescription": "Percentage of all uops which are FPDiv uops",
- "MetricExpr": "100 * UOPS_RETIRED.FPDIV / UOPS_RETIRED.ALL",
- "MetricGroup": " ",
- "MetricName": "tma_info_fpdiv_uop_ratio"
+ "BriefDescription": "",
+ "MetricExpr": "5 * tma_info_core_clks",
+ "MetricName": "tma_info_core_slots"
},
{
- "BriefDescription": "Percentage of all uops which are IDiv uops",
- "MetricExpr": "100 * UOPS_RETIRED.IDIV / UOPS_RETIRED.ALL",
- "MetricGroup": " ",
- "MetricName": "tma_info_idiv_uop_ratio"
+ "BriefDescription": "Uops Per Instruction",
+ "MetricExpr": "UOPS_RETIRED.ALL / INST_RETIRED.ANY",
+ "MetricName": "tma_info_core_upi"
},
{
"BriefDescription": "Percent of instruction miss cost that hit in DRAM",
"MetricExpr": "100 * MEM_BOUND_STALLS.IFETCH_DRAM_HIT / MEM_BOUND_STALLS.IFETCH",
- "MetricGroup": " ",
- "MetricName": "tma_info_inst_miss_cost_dramhit_percent"
+ "MetricName": "tma_info_frontend_inst_miss_cost_dramhit_percent"
},
{
"BriefDescription": "Percent of instruction miss cost that hit in the L2",
"MetricExpr": "100 * MEM_BOUND_STALLS.IFETCH_L2_HIT / MEM_BOUND_STALLS.IFETCH",
- "MetricGroup": " ",
- "MetricName": "tma_info_inst_miss_cost_l2hit_percent"
+ "MetricName": "tma_info_frontend_inst_miss_cost_l2hit_percent"
},
{
"BriefDescription": "Percent of instruction miss cost that hit in the L3",
"MetricExpr": "100 * MEM_BOUND_STALLS.IFETCH_LLC_HIT / MEM_BOUND_STALLS.IFETCH",
- "MetricGroup": " ",
- "MetricName": "tma_info_inst_miss_cost_l3hit_percent"
+ "MetricName": "tma_info_frontend_inst_miss_cost_l3hit_percent"
},
{
- "BriefDescription": "Instructions per Branch (lower number means higher occurance rate)",
- "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
- "MetricGroup": " ",
- "MetricName": "tma_info_ipbranch"
+ "BriefDescription": "Ratio of all branches which mispredict",
+ "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.ALL_BRANCHES",
+ "MetricName": "tma_info_inst_mix_branch_mispredict_ratio"
},
{
- "BriefDescription": "Instructions Per Cycle",
- "MetricExpr": "INST_RETIRED.ANY / tma_info_clks",
- "MetricGroup": " ",
- "MetricName": "tma_info_ipc"
+ "BriefDescription": "Ratio between Mispredicted branches and unknown branches",
+ "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / BACLEARS.ANY",
+ "MetricName": "tma_info_inst_mix_branch_mispredict_to_unknown_branch_ratio"
+ },
+ {
+ "BriefDescription": "Percentage of all uops which are FPDiv uops",
+ "MetricExpr": "100 * UOPS_RETIRED.FPDIV / UOPS_RETIRED.ALL",
+ "MetricName": "tma_info_inst_mix_fpdiv_uop_ratio"
+ },
+ {
+ "BriefDescription": "Percentage of all uops which are IDiv uops",
+ "MetricExpr": "100 * UOPS_RETIRED.IDIV / UOPS_RETIRED.ALL",
+ "MetricName": "tma_info_inst_mix_idiv_uop_ratio"
+ },
+ {
+ "BriefDescription": "Instructions per Branch (lower number means higher occurance rate)",
+ "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
+ "MetricName": "tma_info_inst_mix_ipbranch"
},
{
"BriefDescription": "Instruction per (near) call (lower number means higher occurance rate)",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.CALL",
- "MetricGroup": " ",
- "MetricName": "tma_info_ipcall"
+ "MetricName": "tma_info_inst_mix_ipcall"
},
{
"BriefDescription": "Instructions per Far Branch",
"MetricExpr": "INST_RETIRED.ANY / (BR_INST_RETIRED.FAR_BRANCH / 2)",
- "MetricGroup": " ",
- "MetricName": "tma_info_ipfarbranch"
+ "MetricName": "tma_info_inst_mix_ipfarbranch"
},
{
"BriefDescription": "Instructions per Load",
"MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_LOADS",
- "MetricGroup": " ",
- "MetricName": "tma_info_ipload"
+ "MetricName": "tma_info_inst_mix_ipload"
},
{
"BriefDescription": "Instructions per retired conditional Branch Misprediction where the branch was not taken",
"MetricExpr": "INST_RETIRED.ANY / (BR_MISP_RETIRED.COND - BR_MISP_RETIRED.COND_TAKEN)",
- "MetricName": "tma_info_ipmisp_cond_ntaken"
+ "MetricName": "tma_info_inst_mix_ipmisp_cond_ntaken"
},
{
"BriefDescription": "Instructions per retired conditional Branch Misprediction where the branch was taken",
"MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.COND_TAKEN",
- "MetricName": "tma_info_ipmisp_cond_taken"
+ "MetricName": "tma_info_inst_mix_ipmisp_cond_taken"
},
{
"BriefDescription": "Instructions per retired indirect call or jump Branch Misprediction",
"MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.INDIRECT",
- "MetricName": "tma_info_ipmisp_indirect"
+ "MetricName": "tma_info_inst_mix_ipmisp_indirect"
},
{
"BriefDescription": "Instructions per retired return Branch Misprediction",
"MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.RETURN",
- "MetricName": "tma_info_ipmisp_ret"
+ "MetricName": "tma_info_inst_mix_ipmisp_ret"
},
{
"BriefDescription": "Instructions per retired Branch Misprediction",
"MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
- "MetricGroup": " ",
- "MetricName": "tma_info_ipmispredict"
+ "MetricName": "tma_info_inst_mix_ipmispredict"
},
{
"BriefDescription": "Instructions per Store",
"MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_STORES",
- "MetricGroup": " ",
- "MetricName": "tma_info_ipstore"
+ "MetricName": "tma_info_inst_mix_ipstore"
},
{
- "BriefDescription": "Fraction of cycles spent in Kernel mode",
- "MetricExpr": "cpu@CPU_CLK_UNHALTED.CORE@k / CPU_CLK_UNHALTED.CORE",
- "MetricGroup": " ",
- "MetricName": "tma_info_kernel_utilization"
+ "BriefDescription": "Percentage of all uops which are ucode ops",
+ "MetricExpr": "100 * UOPS_RETIRED.MS / UOPS_RETIRED.ALL",
+ "MetricName": "tma_info_inst_mix_microcode_uop_ratio"
+ },
+ {
+ "BriefDescription": "Percentage of all uops which are x87 uops",
+ "MetricExpr": "100 * UOPS_RETIRED.X87 / UOPS_RETIRED.ALL",
+ "MetricName": "tma_info_inst_mix_x87_uop_ratio"
+ },
+ {
+ "BriefDescription": "Percentage of total non-speculative loads with a address aliasing block",
+ "MetricExpr": "100 * LD_BLOCKS.4K_ALIAS / MEM_UOPS_RETIRED.ALL_LOADS",
+ "MetricName": "tma_info_l1_bound_address_alias_blocks"
},
{
"BriefDescription": "Percentage of total non-speculative loads that are splits",
"MetricExpr": "100 * MEM_UOPS_RETIRED.SPLIT_LOADS / MEM_UOPS_RETIRED.ALL_LOADS",
- "MetricName": "tma_info_load_splits"
+ "MetricName": "tma_info_l1_bound_load_splits"
},
{
- "BriefDescription": "load ops retired per 1000 instruction",
- "MetricExpr": "1e3 * MEM_UOPS_RETIRED.ALL_LOADS / INST_RETIRED.ANY",
- "MetricGroup": " ",
- "MetricName": "tma_info_memloadpki"
+ "BriefDescription": "Percentage of total non-speculative loads with a store forward or unknown store address block",
+ "MetricExpr": "100 * LD_BLOCKS.DATA_UNKNOWN / MEM_UOPS_RETIRED.ALL_LOADS",
+ "MetricName": "tma_info_l1_bound_store_fwd_blocks"
},
{
- "BriefDescription": "Percentage of all uops which are ucode ops",
- "MetricExpr": "100 * UOPS_RETIRED.MS / UOPS_RETIRED.ALL",
- "MetricGroup": " ",
- "MetricName": "tma_info_microcode_uop_ratio"
+ "BriefDescription": "Cycle cost per DRAM hit",
+ "MetricExpr": "MEM_BOUND_STALLS.LOAD_DRAM_HIT / MEM_LOAD_UOPS_RETIRED.DRAM_HIT",
+ "MetricName": "tma_info_memory_cycles_per_demand_load_dram_hit"
},
{
- "BriefDescription": "",
- "MetricExpr": "5 * tma_info_clks",
- "MetricGroup": " ",
- "MetricName": "tma_info_slots"
+ "BriefDescription": "Cycle cost per L2 hit",
+ "MetricExpr": "MEM_BOUND_STALLS.LOAD_L2_HIT / MEM_LOAD_UOPS_RETIRED.L2_HIT",
+ "MetricName": "tma_info_memory_cycles_per_demand_load_l2_hit"
},
{
- "BriefDescription": "Percentage of total non-speculative loads with a store forward or unknown store address block",
- "MetricExpr": "100 * LD_BLOCKS.DATA_UNKNOWN / MEM_UOPS_RETIRED.ALL_LOADS",
- "MetricName": "tma_info_store_fwd_blocks"
+ "BriefDescription": "Cycle cost per LLC hit",
+ "MetricExpr": "MEM_BOUND_STALLS.LOAD_LLC_HIT / MEM_LOAD_UOPS_RETIRED.L3_HIT",
+ "MetricName": "tma_info_memory_cycles_per_demand_load_l3_hit"
},
{
- "BriefDescription": "Average Frequency Utilization relative nominal frequency",
- "MetricExpr": "tma_info_clks / CPU_CLK_UNHALTED.REF_TSC",
- "MetricGroup": " ",
- "MetricName": "tma_info_turbo_utilization"
+ "BriefDescription": "load ops retired per 1000 instruction",
+ "MetricExpr": "1e3 * MEM_UOPS_RETIRED.ALL_LOADS / INST_RETIRED.ANY",
+ "MetricName": "tma_info_memory_memloadpki"
},
{
- "BriefDescription": "Uops Per Instruction",
- "MetricExpr": "UOPS_RETIRED.ALL / INST_RETIRED.ANY",
- "MetricGroup": " ",
- "MetricName": "tma_info_upi"
+ "BriefDescription": "Average CPU Utilization",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricName": "tma_info_system_cpu_utilization"
},
{
- "BriefDescription": "Percentage of all uops which are x87 uops",
- "MetricExpr": "100 * UOPS_RETIRED.X87 / UOPS_RETIRED.ALL",
- "MetricGroup": " ",
- "MetricName": "tma_info_x87_uop_ratio"
+ "BriefDescription": "Fraction of cycles spent in Kernel mode",
+ "MetricExpr": "cpu@CPU_CLK_UNHALTED.CORE@k / CPU_CLK_UNHALTED.CORE",
+ "MetricGroup": "Summary",
+ "MetricName": "tma_info_system_kernel_utilization"
+ },
+ {
+ "BriefDescription": "Average Frequency Utilization relative nominal frequency",
+ "MetricExpr": "tma_info_core_clks / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricGroup": "Power",
+ "MetricName": "tma_info_system_turbo_utilization"
},
{
"BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to Instruction Table Lookaside Buffer (ITLB) misses.",
- "MetricExpr": "TOPDOWN_FE_BOUND.ITLB / tma_info_slots",
+ "MetricExpr": "TOPDOWN_FE_BOUND.ITLB / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_itlb_misses",
"MetricThreshold": "tma_itlb_misses > 0.05",
@@ -471,7 +449,7 @@
},
{
"BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a load block.",
- "MetricExpr": "LD_HEAD.L1_BOUND_AT_RET / tma_info_clks",
+ "MetricExpr": "LD_HEAD.L1_BOUND_AT_RET / tma_info_core_clks",
"MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l1_bound",
"MetricThreshold": "tma_l1_bound > 0.1",
@@ -480,7 +458,7 @@
{
"BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the L2 Cache.",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "MEM_BOUND_STALLS.LOAD_L2_HIT / tma_info_clks - MEM_BOUND_STALLS_AT_RET_CORRECTION * MEM_BOUND_STALLS.LOAD_L2_HIT / MEM_BOUND_STALLS.LOAD",
+ "MetricExpr": "MEM_BOUND_STALLS.LOAD_L2_HIT / tma_info_core_clks - max((MEM_BOUND_STALLS.LOAD - LD_HEAD.L1_MISS_AT_RET) / tma_info_core_clks, 0) * MEM_BOUND_STALLS.LOAD_L2_HIT / MEM_BOUND_STALLS.LOAD",
"MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l2_bound",
"MetricThreshold": "tma_l2_bound > 0.1",
@@ -488,7 +466,7 @@
},
{
"BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the Last Level Cache (LLC) or other core with HITE/F/M.",
- "MetricExpr": "MEM_BOUND_STALLS.LOAD_LLC_HIT / tma_info_clks - MEM_BOUND_STALLS_AT_RET_CORRECTION * MEM_BOUND_STALLS.LOAD_LLC_HIT / MEM_BOUND_STALLS.LOAD",
+ "MetricExpr": "MEM_BOUND_STALLS.LOAD_LLC_HIT / tma_info_core_clks - max((MEM_BOUND_STALLS.LOAD - LD_HEAD.L1_MISS_AT_RET) / tma_info_core_clks, 0) * MEM_BOUND_STALLS.LOAD_LLC_HIT / MEM_BOUND_STALLS.LOAD",
"MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l3_bound",
"MetricThreshold": "tma_l3_bound > 0.1",
@@ -504,7 +482,7 @@
},
{
"BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a machine clear (nuke) of any kind including memory ordering and memory disambiguation.",
- "MetricExpr": "TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS / tma_info_slots",
+ "MetricExpr": "TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS / tma_info_core_slots",
"MetricGroup": "TopdownL2;tma_L2_group;tma_bad_speculation_group",
"MetricName": "tma_machine_clears",
"MetricThreshold": "tma_machine_clears > 0.05",
@@ -513,7 +491,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to memory reservation stalls in which a scheduler is not able to accept uops.",
- "MetricExpr": "TOPDOWN_BE_BOUND.MEM_SCHEDULER / tma_info_slots",
+ "MetricExpr": "TOPDOWN_BE_BOUND.MEM_SCHEDULER / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group",
"MetricName": "tma_mem_scheduler",
"MetricThreshold": "tma_mem_scheduler > 0.1",
@@ -521,7 +499,7 @@
},
{
"BriefDescription": "Counts the number of cycles the core is stalled due to stores or loads.",
- "MetricExpr": "min(tma_backend_bound, LD_HEAD.ANY_AT_RET / tma_info_clks + tma_store_bound)",
+ "MetricExpr": "min(tma_backend_bound, LD_HEAD.ANY_AT_RET / tma_info_core_clks + tma_store_bound)",
"MetricGroup": "TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_memory_bound",
"MetricThreshold": "tma_memory_bound > 0.2",
@@ -538,7 +516,7 @@
},
{
"BriefDescription": "Counts the number of uops that are from the complex flows issued by the micro-sequencer (MS)",
- "MetricExpr": "UOPS_RETIRED.MS / tma_info_slots",
+ "MetricExpr": "UOPS_RETIRED.MS / tma_info_core_slots",
"MetricGroup": "TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_ms_uops",
"MetricThreshold": "tma_ms_uops > 0.05",
@@ -548,7 +526,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to IEC or FPC RAT stalls, which can be due to FIQ or IEC reservation stalls in which the integer, floating point or SIMD scheduler is not able to accept uops.",
- "MetricExpr": "TOPDOWN_BE_BOUND.NON_MEM_SCHEDULER / tma_info_slots",
+ "MetricExpr": "TOPDOWN_BE_BOUND.NON_MEM_SCHEDULER / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group",
"MetricName": "tma_non_mem_scheduler",
"MetricThreshold": "tma_non_mem_scheduler > 0.1",
@@ -556,7 +534,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to a machine clear (slow nuke).",
- "MetricExpr": "TOPDOWN_BAD_SPECULATION.NUKE / tma_info_slots",
+ "MetricExpr": "TOPDOWN_BAD_SPECULATION.NUKE / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_machine_clears_group",
"MetricName": "tma_nuke",
"MetricThreshold": "tma_nuke > 0.05",
@@ -564,7 +542,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to other common frontend stalls not categorized.",
- "MetricExpr": "TOPDOWN_FE_BOUND.OTHER / tma_info_slots",
+ "MetricExpr": "TOPDOWN_FE_BOUND.OTHER / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_other_fb",
"MetricThreshold": "tma_other_fb > 0.05",
@@ -572,7 +550,7 @@
},
{
"BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a number of other load blocks.",
- "MetricExpr": "LD_HEAD.OTHER_AT_RET / tma_info_clks",
+ "MetricExpr": "LD_HEAD.OTHER_AT_RET / tma_info_core_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_other_l1",
"MetricThreshold": "tma_other_l1 > 0.05",
@@ -588,7 +566,7 @@
},
{
"BriefDescription": "Counts the number of uops retired excluding ms and fp div uops.",
- "MetricExpr": "(TOPDOWN_RETIRING.ALL - UOPS_RETIRED.MS - UOPS_RETIRED.FPDIV) / tma_info_slots",
+ "MetricExpr": "(TOPDOWN_RETIRING.ALL - UOPS_RETIRED.MS - UOPS_RETIRED.FPDIV) / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_base_group",
"MetricName": "tma_other_ret",
"MetricThreshold": "tma_other_ret > 0.3",
@@ -604,7 +582,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to wrong predecodes.",
- "MetricExpr": "TOPDOWN_FE_BOUND.PREDECODE / tma_info_slots",
+ "MetricExpr": "TOPDOWN_FE_BOUND.PREDECODE / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_predecode",
"MetricThreshold": "tma_predecode > 0.05",
@@ -612,7 +590,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to the physical register file unable to accept an entry (marble stalls).",
- "MetricExpr": "TOPDOWN_BE_BOUND.REGISTER / tma_info_slots",
+ "MetricExpr": "TOPDOWN_BE_BOUND.REGISTER / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group",
"MetricName": "tma_register",
"MetricThreshold": "tma_register > 0.1",
@@ -620,7 +598,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to the reorder buffer being full (ROB stalls).",
- "MetricExpr": "TOPDOWN_BE_BOUND.REORDER_BUFFER / tma_info_slots",
+ "MetricExpr": "TOPDOWN_BE_BOUND.REORDER_BUFFER / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group",
"MetricName": "tma_reorder_buffer",
"MetricThreshold": "tma_reorder_buffer > 0.1",
@@ -638,11 +616,12 @@
},
{
"BriefDescription": "Counts the numer of issue slots that result in retirement slots.",
- "MetricExpr": "TOPDOWN_RETIRING.ALL / tma_info_slots",
- "MetricGroup": "TopdownL1;tma_L1_group",
+ "DefaultMetricgroupName": "TopdownL1",
+ "MetricExpr": "TOPDOWN_RETIRING.ALL / tma_info_core_slots",
+ "MetricGroup": "Default;TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.75",
- "MetricgroupNoGroup": "TopdownL1",
+ "MetricgroupNoGroup": "TopdownL1;Default",
"ScaleUnit": "100%"
},
{
@@ -655,7 +634,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to scoreboards from the instruction queue (IQ), jump execution unit (JEU), or microcode sequencer (MS).",
- "MetricExpr": "TOPDOWN_BE_BOUND.SERIALIZATION / tma_info_slots",
+ "MetricExpr": "TOPDOWN_BE_BOUND.SERIALIZATION / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group",
"MetricName": "tma_serialization",
"MetricThreshold": "tma_serialization > 0.1",
@@ -679,7 +658,7 @@
},
{
"BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a first level TLB miss.",
- "MetricExpr": "LD_HEAD.DTLB_MISS_AT_RET / tma_info_clks",
+ "MetricExpr": "LD_HEAD.DTLB_MISS_AT_RET / tma_info_core_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_stlb_hit",
"MetricThreshold": "tma_stlb_hit > 0.05",
@@ -687,7 +666,7 @@
},
{
"BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a second level TLB miss requiring a page walk.",
- "MetricExpr": "LD_HEAD.PGWALK_AT_RET / tma_info_clks",
+ "MetricExpr": "LD_HEAD.PGWALK_AT_RET / tma_info_core_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_stlb_miss",
"MetricThreshold": "tma_stlb_miss > 0.05",
@@ -703,7 +682,7 @@
},
{
"BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a store forward block.",
- "MetricExpr": "LD_HEAD.ST_ADDR_AT_RET / tma_info_clks",
+ "MetricExpr": "LD_HEAD.ST_ADDR_AT_RET / tma_info_core_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_store_fwd_blk",
"MetricThreshold": "tma_store_fwd_blk > 0.05",
diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/metricgroups.json b/tools/perf/pmu-events/arch/x86/alderlaken/metricgroups.json
new file mode 100644
index 000000000000..7b2049cd2694
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/alderlaken/metricgroups.json
@@ -0,0 +1,26 @@
+{
+ "Power": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Summary": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TopdownL1": "Metrics for top-down breakdown at level 1",
+ "TopdownL2": "Metrics for top-down breakdown at level 2",
+ "TopdownL3": "Metrics for top-down breakdown at level 3",
+ "TopdownL4": "Metrics for top-down breakdown at level 4",
+ "tma_L1_group": "Metrics for top-down breakdown at level 1",
+ "tma_L2_group": "Metrics for top-down breakdown at level 2",
+ "tma_L3_group": "Metrics for top-down breakdown at level 3",
+ "tma_L4_group": "Metrics for top-down breakdown at level 4",
+ "tma_backend_bound_aux_group": "Metrics contributing to tma_backend_bound_aux category",
+ "tma_backend_bound_group": "Metrics contributing to tma_backend_bound category",
+ "tma_bad_speculation_group": "Metrics contributing to tma_bad_speculation category",
+ "tma_base_group": "Metrics contributing to tma_base category",
+ "tma_fetch_bandwidth_group": "Metrics contributing to tma_fetch_bandwidth category",
+ "tma_fetch_latency_group": "Metrics contributing to tma_fetch_latency category",
+ "tma_frontend_bound_group": "Metrics contributing to tma_frontend_bound category",
+ "tma_l1_bound_group": "Metrics contributing to tma_l1_bound category",
+ "tma_machine_clears_group": "Metrics contributing to tma_machine_clears category",
+ "tma_mem_scheduler_group": "Metrics contributing to tma_mem_scheduler category",
+ "tma_memory_bound_group": "Metrics contributing to tma_memory_bound category",
+ "tma_nuke_group": "Metrics contributing to tma_nuke category",
+ "tma_resource_bound_group": "Metrics contributing to tma_resource_bound category",
+ "tma_retiring_group": "Metrics contributing to tma_retiring category"
+}
diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/recommended.json b/tools/perf/pmu-events/arch/x86/amdzen1/recommended.json
index bf5083c1c260..4d28177325a0 100644
--- a/tools/perf/pmu-events/arch/x86/amdzen1/recommended.json
+++ b/tools/perf/pmu-events/arch/x86/amdzen1/recommended.json
@@ -169,8 +169,9 @@
},
{
"MetricName": "nps1_die_to_dram",
- "BriefDescription": "Approximate: Combined DRAM B/bytes of all channels on a NPS1 node (die) (may need --metric-no-group)",
+ "BriefDescription": "Approximate: Combined DRAM B/bytes of all channels on a NPS1 node (die)",
"MetricExpr": "dram_channel_data_controller_0 + dram_channel_data_controller_1 + dram_channel_data_controller_2 + dram_channel_data_controller_3 + dram_channel_data_controller_4 + dram_channel_data_controller_5 + dram_channel_data_controller_6 + dram_channel_data_controller_7",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricGroup": "data_fabric",
"PerPkg": "1",
"ScaleUnit": "6.1e-5MiB"
diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/recommended.json b/tools/perf/pmu-events/arch/x86/amdzen2/recommended.json
index a71694a043ba..60e19456d4c8 100644
--- a/tools/perf/pmu-events/arch/x86/amdzen2/recommended.json
+++ b/tools/perf/pmu-events/arch/x86/amdzen2/recommended.json
@@ -169,8 +169,9 @@
},
{
"MetricName": "nps1_die_to_dram",
- "BriefDescription": "Approximate: Combined DRAM B/bytes of all channels on a NPS1 node (die) (may need --metric-no-group)",
+ "BriefDescription": "Approximate: Combined DRAM B/bytes of all channels on a NPS1 node (die)",
"MetricExpr": "dram_channel_data_controller_0 + dram_channel_data_controller_1 + dram_channel_data_controller_2 + dram_channel_data_controller_3 + dram_channel_data_controller_4 + dram_channel_data_controller_5 + dram_channel_data_controller_6 + dram_channel_data_controller_7",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricGroup": "data_fabric",
"PerPkg": "1",
"ScaleUnit": "6.1e-5MiB"
diff --git a/tools/perf/pmu-events/arch/x86/amdzen3/recommended.json b/tools/perf/pmu-events/arch/x86/amdzen3/recommended.json
index 988cf68ae825..3e9e1781812e 100644
--- a/tools/perf/pmu-events/arch/x86/amdzen3/recommended.json
+++ b/tools/perf/pmu-events/arch/x86/amdzen3/recommended.json
@@ -205,10 +205,11 @@
},
{
"MetricName": "nps1_die_to_dram",
- "BriefDescription": "Approximate: Combined DRAM B/bytes of all channels on a NPS1 node (die) (may need --metric-no-group)",
+ "BriefDescription": "Approximate: Combined DRAM B/bytes of all channels on a NPS1 node (die)",
"MetricExpr": "dram_channel_data_controller_0 + dram_channel_data_controller_1 + dram_channel_data_controller_2 + dram_channel_data_controller_3 + dram_channel_data_controller_4 + dram_channel_data_controller_5 + dram_channel_data_controller_6 + dram_channel_data_controller_7",
"MetricGroup": "data_fabric",
"PerPkg": "1",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"ScaleUnit": "6.1e-5MiB"
}
]
diff --git a/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json b/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json
index f9e2316601e1..55a10b0bf36f 100644
--- a/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json
@@ -50,7 +50,7 @@
},
{
"BriefDescription": "Uncore frequency per die [GHZ]",
- "MetricExpr": "tma_info_socket_clks / #num_dies / duration_time / 1e9",
+ "MetricExpr": "tma_info_system_socket_clks / #num_dies / duration_time / 1e9",
"MetricGroup": "SoC",
"MetricName": "UNCORE_FREQ"
},
@@ -71,7 +71,7 @@
},
{
"BriefDescription": "This metric estimates how often memory load accesses were aliased by preceding stores (in program order) with a 4K address offset",
- "MetricExpr": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS / tma_info_clks",
+ "MetricExpr": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_4k_aliasing",
"MetricThreshold": "tma_4k_aliasing > 0.2 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -81,7 +81,7 @@
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution ports for ALU operations.",
"MetricConstraint": "NO_GROUP_EVENTS_NMI",
- "MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_0 + UOPS_DISPATCHED_PORT.PORT_1 + UOPS_DISPATCHED_PORT.PORT_5 + UOPS_DISPATCHED_PORT.PORT_6) / tma_info_slots",
+ "MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_0 + UOPS_DISPATCHED_PORT.PORT_1 + UOPS_DISPATCHED_PORT.PORT_5 + UOPS_DISPATCHED_PORT.PORT_6) / tma_info_thread_slots",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_alu_op_utilization",
"MetricThreshold": "tma_alu_op_utilization > 0.6",
@@ -89,7 +89,7 @@
},
{
"BriefDescription": "This metric estimates fraction of slots the CPU retired uops delivered by the Microcode_Sequencer as a result of Assists",
- "MetricExpr": "100 * OTHER_ASSISTS.ANY_WB_ASSIST / tma_info_slots",
+ "MetricExpr": "100 * OTHER_ASSISTS.ANY_WB_ASSIST / tma_info_thread_slots",
"MetricGroup": "TopdownL4;tma_L4_group;tma_microcode_sequencer_group",
"MetricName": "tma_assists",
"MetricThreshold": "tma_assists > 0.1 & (tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1)",
@@ -109,7 +109,7 @@
},
{
"BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations",
- "MetricExpr": "(UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (INT_MISC.RECOVERY_CYCLES_ANY / 2 if #SMT_on else INT_MISC.RECOVERY_CYCLES)) / tma_info_slots",
+ "MetricExpr": "(UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (INT_MISC.RECOVERY_CYCLES_ANY / 2 if #SMT_on else INT_MISC.RECOVERY_CYCLES)) / tma_info_thread_slots",
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_bad_speculation",
"MetricThreshold": "tma_bad_speculation > 0.15",
@@ -125,12 +125,12 @@
"MetricName": "tma_branch_mispredicts",
"MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15",
"MetricgroupNoGroup": "TopdownL2",
- "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES. Related metrics: tma_info_branch_misprediction_cost, tma_mispredicts_resteers",
+ "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES. Related metrics: tma_info_bad_spec_branch_misprediction_cost, tma_mispredicts_resteers",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers",
- "MetricExpr": "12 * (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT + BACLEARS.ANY) / tma_info_clks",
+ "MetricExpr": "12 * (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT + BACLEARS.ANY) / tma_info_thread_clks",
"MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_branch_resteers",
"MetricThreshold": "tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -159,7 +159,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to contested accesses",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(60 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_RETIRED.L3_MISS))) + 43 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_RETIRED.L3_MISS)))) / tma_info_clks",
+ "MetricExpr": "(60 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_RETIRED.L3_MISS))) + 43 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_RETIRED.L3_MISS)))) / tma_info_thread_clks",
"MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
"MetricName": "tma_contested_accesses",
"MetricThreshold": "tma_contested_accesses > 0.05 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -180,7 +180,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to data-sharing accesses",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "43 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_RETIRED.L3_MISS))) / tma_info_clks",
+ "MetricExpr": "43 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_RETIRED.L3_MISS))) / tma_info_thread_clks",
"MetricGroup": "Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
"MetricName": "tma_data_sharing",
"MetricThreshold": "tma_data_sharing > 0.05 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -189,7 +189,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where the Divider unit was active",
- "MetricExpr": "ARITH.FPU_DIV_ACTIVE / tma_info_core_clks",
+ "MetricExpr": "ARITH.FPU_DIV_ACTIVE / tma_info_core_core_clks",
"MetricGroup": "TopdownL3;tma_L3_group;tma_core_bound_group",
"MetricName": "tma_divider",
"MetricThreshold": "tma_divider > 0.2 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
@@ -199,7 +199,7 @@
{
"BriefDescription": "This metric estimates how often the CPU was stalled on accesses to external memory (DRAM) by loads",
"MetricConstraint": "NO_GROUP_EVENTS_SMT",
- "MetricExpr": "(1 - MEM_LOAD_UOPS_RETIRED.L3_HIT / (MEM_LOAD_UOPS_RETIRED.L3_HIT + 7 * MEM_LOAD_UOPS_RETIRED.L3_MISS)) * CYCLE_ACTIVITY.STALLS_L2_MISS / tma_info_clks",
+ "MetricExpr": "(1 - MEM_LOAD_UOPS_RETIRED.L3_HIT / (MEM_LOAD_UOPS_RETIRED.L3_HIT + 7 * MEM_LOAD_UOPS_RETIRED.L3_MISS)) * CYCLE_ACTIVITY.STALLS_L2_MISS / tma_info_thread_clks",
"MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_dram_bound",
"MetricThreshold": "tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -208,25 +208,25 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline",
- "MetricExpr": "(IDQ.ALL_DSB_CYCLES_ANY_UOPS - IDQ.ALL_DSB_CYCLES_4_UOPS) / tma_info_core_clks / 2",
+ "MetricExpr": "(IDQ.ALL_DSB_CYCLES_ANY_UOPS - IDQ.ALL_DSB_CYCLES_4_UOPS) / tma_info_core_core_clks / 2",
"MetricGroup": "DSB;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_dsb",
- "MetricThreshold": "tma_dsb > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35)",
+ "MetricThreshold": "tma_dsb > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35)",
"PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline. For example; inefficient utilization of the DSB cache structure or bank conflict when reading from it; are categorized here.",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines",
- "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / tma_info_clks",
+ "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / tma_info_thread_clks",
"MetricGroup": "DSBmiss;FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB",
"MetricName": "tma_dsb_switches",
"MetricThreshold": "tma_dsb_switches > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
- "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines. The DSB (decoded i-cache) is a Uop Cache where the front-end directly delivers Uops (micro operations) avoiding heavy x86 decoding. The DSB pipeline has shorter latency and delivered higher bandwidth than the MITE (legacy instruction decode pipeline). Switching between the two pipelines can cause penalties hence this metric measures the exposed penalty. Related metrics: tma_fetch_bandwidth, tma_info_dsb_coverage, tma_info_iptb, tma_lcp",
+ "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines. The DSB (decoded i-cache) is a Uop Cache where the front-end directly delivers Uops (micro operations) avoiding heavy x86 decoding. The DSB pipeline has shorter latency and delivered higher bandwidth than the MITE (legacy instruction decode pipeline). Switching between the two pipelines can cause penalties hence this metric measures the exposed penalty. Related metrics: tma_fetch_bandwidth, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses",
- "MetricExpr": "(8 * DTLB_LOAD_MISSES.STLB_HIT + cpu@DTLB_LOAD_MISSES.WALK_DURATION\\,cmask\\=1@ + 7 * DTLB_LOAD_MISSES.WALK_COMPLETED) / tma_info_clks",
+ "MetricExpr": "(8 * DTLB_LOAD_MISSES.STLB_HIT + cpu@DTLB_LOAD_MISSES.WALK_DURATION\\,cmask\\=1@ + 7 * DTLB_LOAD_MISSES.WALK_COMPLETED) / tma_info_thread_clks",
"MetricGroup": "MemoryTLB;TopdownL4;tma_L4_group;tma_issueTLB;tma_l1_bound_group",
"MetricName": "tma_dtlb_load",
"MetricThreshold": "tma_dtlb_load > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -235,7 +235,7 @@
},
{
"BriefDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses",
- "MetricExpr": "(8 * DTLB_STORE_MISSES.STLB_HIT + cpu@DTLB_STORE_MISSES.WALK_DURATION\\,cmask\\=1@ + 7 * DTLB_STORE_MISSES.WALK_COMPLETED) / tma_info_clks",
+ "MetricExpr": "(8 * DTLB_STORE_MISSES.STLB_HIT + cpu@DTLB_STORE_MISSES.WALK_DURATION\\,cmask\\=1@ + 7 * DTLB_STORE_MISSES.WALK_COMPLETED) / tma_info_thread_clks",
"MetricGroup": "MemoryTLB;TopdownL4;tma_L4_group;tma_issueTLB;tma_store_bound_group",
"MetricName": "tma_dtlb_store",
"MetricThreshold": "tma_dtlb_store > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -244,7 +244,7 @@
},
{
"BriefDescription": "This metric roughly estimates how often CPU was handling synchronizations due to False Sharing",
- "MetricExpr": "60 * OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HITM / tma_info_clks",
+ "MetricExpr": "60 * OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HITM / tma_info_thread_clks",
"MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_store_bound_group",
"MetricName": "tma_false_sharing",
"MetricThreshold": "tma_false_sharing > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -254,11 +254,11 @@
{
"BriefDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "tma_info_load_miss_real_latency * cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ / tma_info_clks",
+ "MetricExpr": "tma_info_memory_load_miss_real_latency * cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ / tma_info_thread_clks",
"MetricGroup": "MemoryBW;TopdownL4;tma_L4_group;tma_issueBW;tma_issueSL;tma_issueSmSt;tma_l1_bound_group",
"MetricName": "tma_fb_full",
"MetricThreshold": "tma_fb_full > 0.3",
- "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_info_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
+ "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
"ScaleUnit": "100%"
},
{
@@ -266,14 +266,14 @@
"MetricExpr": "tma_frontend_bound - tma_fetch_latency",
"MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB",
"MetricName": "tma_fetch_bandwidth",
- "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35",
+ "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35",
"MetricgroupNoGroup": "TopdownL2",
- "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Related metrics: tma_dsb_switches, tma_info_dsb_coverage, tma_info_iptb, tma_lcp",
+ "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Related metrics: tma_dsb_switches, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues",
- "MetricExpr": "4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / tma_info_slots",
+ "MetricExpr": "4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / tma_info_thread_slots",
"MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group",
"MetricName": "tma_fetch_latency",
"MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15",
@@ -328,7 +328,7 @@
},
{
"BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend",
- "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / tma_info_slots",
+ "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / tma_info_thread_slots",
"MetricGroup": "PGO;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_frontend_bound",
"MetricThreshold": "tma_frontend_bound > 0.15",
@@ -348,435 +348,435 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to instruction cache misses.",
- "MetricExpr": "ICACHE.IFDATA_STALL / tma_info_clks",
+ "MetricExpr": "ICACHE.IFDATA_STALL / tma_info_thread_clks",
"MetricGroup": "BigFoot;FetchLat;IcMiss;TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_icache_misses",
"MetricThreshold": "tma_icache_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
"ScaleUnit": "100%"
},
{
- "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
- "MetricExpr": "tma_info_turbo_utilization * TSC / 1e9 / duration_time",
- "MetricGroup": "Power;Summary",
- "MetricName": "tma_info_average_frequency"
- },
- {
- "BriefDescription": "Branch instructions per taken branch.",
- "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
- "MetricGroup": "Branches;Fed;PGO",
- "MetricName": "tma_info_bptkbranch"
- },
- {
"BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
- "MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_slots / BR_MISP_RETIRED.ALL_BRANCHES",
+ "MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_thread_slots / BR_MISP_RETIRED.ALL_BRANCHES",
"MetricGroup": "Bad;BrMispredicts;tma_issueBM",
- "MetricName": "tma_info_branch_misprediction_cost",
+ "MetricName": "tma_info_bad_spec_branch_misprediction_cost",
"PublicDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear). Related metrics: tma_branch_mispredicts, tma_mispredicts_resteers"
},
{
- "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
- "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
- "MetricGroup": "Pipeline",
- "MetricName": "tma_info_clks"
+ "BriefDescription": "Instructions per retired mispredicts for indirect CALL or JMP branches (lower number means higher occurrence rate).",
+ "MetricExpr": "tma_info_inst_mix_instructions / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * cpu@BR_MISP_EXEC.ALL_BRANCHES\\,umask\\=0xE4@)",
+ "MetricGroup": "Bad;BrMispredicts",
+ "MetricName": "tma_info_bad_spec_ipmisp_indirect",
+ "MetricThreshold": "tma_info_bad_spec_ipmisp_indirect < 1e3"
+ },
+ {
+ "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear) (lower number means higher occurrence rate)",
+ "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;BadSpec;BrMispredicts",
+ "MetricName": "tma_info_bad_spec_ipmispredict",
+ "MetricThreshold": "tma_info_bad_spec_ipmispredict < 200"
},
{
"BriefDescription": "Core actual clocks when any Logical Processor is active on the Physical Core",
- "MetricExpr": "(CPU_CLK_UNHALTED.THREAD / 2 * (1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK) if #core_wide < 1 else (CPU_CLK_UNHALTED.THREAD_ANY / 2 if #SMT_on else tma_info_clks))",
+ "MetricExpr": "(CPU_CLK_UNHALTED.THREAD / 2 * (1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK) if #core_wide < 1 else (CPU_CLK_UNHALTED.THREAD_ANY / 2 if #SMT_on else tma_info_thread_clks))",
"MetricGroup": "SMT",
- "MetricName": "tma_info_core_clks"
+ "MetricName": "tma_info_core_core_clks"
},
{
"BriefDescription": "Instructions Per Cycle across hyper-threads (per physical core)",
- "MetricExpr": "INST_RETIRED.ANY / tma_info_core_clks",
+ "MetricExpr": "INST_RETIRED.ANY / tma_info_core_core_clks",
"MetricGroup": "Ret;SMT;TmaL1;tma_L1_group",
- "MetricName": "tma_info_coreipc"
- },
- {
- "BriefDescription": "Cycles Per Instruction (per Logical Processor)",
- "MetricExpr": "1 / tma_info_ipc",
- "MetricGroup": "Mem;Pipeline",
- "MetricName": "tma_info_cpi"
+ "MetricName": "tma_info_core_coreipc"
},
{
- "BriefDescription": "Average CPU Utilization",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
- "MetricGroup": "HPC;Summary",
- "MetricName": "tma_info_cpu_utilization"
+ "BriefDescription": "Floating Point Operations Per Cycle",
+ "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * (FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE) / tma_info_core_core_clks",
+ "MetricGroup": "Flops;Ret",
+ "MetricName": "tma_info_core_flopc"
},
{
- "BriefDescription": "Average Parallel L2 cache miss data reads",
- "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
- "MetricGroup": "Memory_BW;Offcore",
- "MetricName": "tma_info_data_l2_mlp"
+ "BriefDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width)",
+ "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0x3c@) / (2 * tma_info_core_core_clks)",
+ "MetricGroup": "Cor;Flops;HPC",
+ "MetricName": "tma_info_core_fp_arith_utilization",
+ "PublicDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width). Values > 1 are possible due to ([BDW+] Fused-Multiply Add (FMA) counting - common; [ADL+] use all of ADD/MUL/FMA in Scalar or 128/256-bit vectors - less common)."
},
{
- "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
- "MetricExpr": "64 * (UNC_ARB_TRK_REQUESTS.ALL + UNC_ARB_COH_TRK_REQUESTS.ALL) / 1e6 / duration_time / 1e3",
- "MetricGroup": "HPC;Mem;MemoryBW;SoC;tma_issueBW",
- "MetricName": "tma_info_dram_bw_use",
- "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_fb_full, tma_mem_bandwidth, tma_sq_full"
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
+ "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
+ "MetricName": "tma_info_core_ilp"
},
{
"BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
"MetricExpr": "IDQ.DSB_UOPS / (IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)",
"MetricGroup": "DSB;Fed;FetchBW;tma_issueFB",
- "MetricName": "tma_info_dsb_coverage",
- "MetricThreshold": "tma_info_dsb_coverage < 0.7 & tma_info_ipc / 4 > 0.35",
- "PublicDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache). Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_iptb, tma_lcp"
- },
- {
- "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-thread",
- "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
- "MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
- "MetricName": "tma_info_execute"
+ "MetricName": "tma_info_frontend_dsb_coverage",
+ "MetricThreshold": "tma_info_frontend_dsb_coverage < 0.7 & tma_info_thread_ipc / 4 > 0.35",
+ "PublicDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache). Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_inst_mix_iptb, tma_lcp"
},
{
- "BriefDescription": "The ratio of Executed- by Issued-Uops",
- "MetricExpr": "UOPS_EXECUTED.THREAD / UOPS_ISSUED.ANY",
- "MetricGroup": "Cor;Pipeline",
- "MetricName": "tma_info_execute_per_issue",
- "PublicDescription": "The ratio of Executed- by Issued-Uops. Ratio > 1 suggests high rate of uop micro-fusions. Ratio < 1 suggest high rate of \"execute\" at rename stage."
- },
- {
- "BriefDescription": "Floating Point Operations Per Cycle",
- "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * (FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE) / tma_info_core_clks",
- "MetricGroup": "Flops;Ret",
- "MetricName": "tma_info_flopc"
- },
- {
- "BriefDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width)",
- "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0x3c@) / (2 * tma_info_core_clks)",
- "MetricGroup": "Cor;Flops;HPC",
- "MetricName": "tma_info_fp_arith_utilization",
- "PublicDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width). Values > 1 are possible due to ([BDW+] Fused-Multiply Add (FMA) counting - common; [ADL+] use all of ADD/MUL/FMA in Scalar or 128/256-bit vectors - less common)."
- },
- {
- "BriefDescription": "Giga Floating Point Operations Per Second",
- "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * (FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE) / 1e9 / duration_time",
- "MetricGroup": "Cor;Flops;HPC",
- "MetricName": "tma_info_gflops",
- "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width and AMX engine."
+ "BriefDescription": "Instructions per speculative Unknown Branch Misprediction (BAClear) (lower number means higher occurrence rate)",
+ "MetricExpr": "tma_info_inst_mix_instructions / BACLEARS.ANY",
+ "MetricGroup": "Fed",
+ "MetricName": "tma_info_frontend_ipunknown_branch"
},
{
- "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
- "MetricExpr": "UOPS_EXECUTED.THREAD / (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
- "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
- "MetricName": "tma_info_ilp"
+ "BriefDescription": "Branch instructions per taken branch.",
+ "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
+ "MetricGroup": "Branches;Fed;PGO",
+ "MetricName": "tma_info_inst_mix_bptkbranch"
},
{
"BriefDescription": "Total number of retired Instructions",
"MetricExpr": "INST_RETIRED.ANY",
"MetricGroup": "Summary;TmaL1;tma_L1_group",
- "MetricName": "tma_info_instructions",
+ "MetricName": "tma_info_inst_mix_instructions",
"PublicDescription": "Total number of retired Instructions. Sample with: INST_RETIRED.PREC_DIST"
},
{
"BriefDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / (cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0x3c@)",
"MetricGroup": "Flops;InsType",
- "MetricName": "tma_info_iparith",
- "MetricThreshold": "tma_info_iparith < 10",
+ "MetricName": "tma_info_inst_mix_iparith",
+ "MetricThreshold": "tma_info_inst_mix_iparith < 10",
"PublicDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate). May undercount due to FMA double counting. Approximated prior to BDW."
},
{
"BriefDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE)",
"MetricGroup": "Flops;FpVector;InsType",
- "MetricName": "tma_info_iparith_avx128",
- "MetricThreshold": "tma_info_iparith_avx128 < 10",
+ "MetricName": "tma_info_inst_mix_iparith_avx128",
+ "MetricThreshold": "tma_info_inst_mix_iparith_avx128 < 10",
"PublicDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
},
{
"BriefDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE)",
"MetricGroup": "Flops;FpVector;InsType",
- "MetricName": "tma_info_iparith_avx256",
- "MetricThreshold": "tma_info_iparith_avx256 < 10",
+ "MetricName": "tma_info_inst_mix_iparith_avx256",
+ "MetricThreshold": "tma_info_inst_mix_iparith_avx256 < 10",
"PublicDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
},
{
"BriefDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
"MetricGroup": "Flops;FpScalar;InsType",
- "MetricName": "tma_info_iparith_scalar_dp",
- "MetricThreshold": "tma_info_iparith_scalar_dp < 10",
+ "MetricName": "tma_info_inst_mix_iparith_scalar_dp",
+ "MetricThreshold": "tma_info_inst_mix_iparith_scalar_dp < 10",
"PublicDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
},
{
"BriefDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
"MetricGroup": "Flops;FpScalar;InsType",
- "MetricName": "tma_info_iparith_scalar_sp",
- "MetricThreshold": "tma_info_iparith_scalar_sp < 10",
+ "MetricName": "tma_info_inst_mix_iparith_scalar_sp",
+ "MetricThreshold": "tma_info_inst_mix_iparith_scalar_sp < 10",
"PublicDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
},
{
"BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
"MetricGroup": "Branches;Fed;InsType",
- "MetricName": "tma_info_ipbranch",
- "MetricThreshold": "tma_info_ipbranch < 8"
- },
- {
- "BriefDescription": "Instructions Per Cycle (per Logical Processor)",
- "MetricExpr": "INST_RETIRED.ANY / tma_info_clks",
- "MetricGroup": "Ret;Summary",
- "MetricName": "tma_info_ipc"
+ "MetricName": "tma_info_inst_mix_ipbranch",
+ "MetricThreshold": "tma_info_inst_mix_ipbranch < 8"
},
{
"BriefDescription": "Instructions per (near) call (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL",
"MetricGroup": "Branches;Fed;PGO",
- "MetricName": "tma_info_ipcall",
- "MetricThreshold": "tma_info_ipcall < 200"
- },
- {
- "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
- "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u",
- "MetricGroup": "Branches;OS",
- "MetricName": "tma_info_ipfarbranch",
- "MetricThreshold": "tma_info_ipfarbranch < 1e6"
+ "MetricName": "tma_info_inst_mix_ipcall",
+ "MetricThreshold": "tma_info_inst_mix_ipcall < 200"
},
{
"BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * (FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE)",
"MetricGroup": "Flops;InsType",
- "MetricName": "tma_info_ipflop",
- "MetricThreshold": "tma_info_ipflop < 10"
+ "MetricName": "tma_info_inst_mix_ipflop",
+ "MetricThreshold": "tma_info_inst_mix_ipflop < 10"
},
{
"BriefDescription": "Instructions per Load (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_LOADS",
"MetricGroup": "InsType",
- "MetricName": "tma_info_ipload",
- "MetricThreshold": "tma_info_ipload < 3"
- },
- {
- "BriefDescription": "Instructions per retired mispredicts for indirect CALL or JMP branches (lower number means higher occurrence rate).",
- "MetricExpr": "tma_info_instructions / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * cpu@BR_MISP_EXEC.ALL_BRANCHES\\,umask\\=0xE4@)",
- "MetricGroup": "Bad;BrMispredicts",
- "MetricName": "tma_info_ipmisp_indirect",
- "MetricThreshold": "tma_info_ipmisp_indirect < 1e3"
- },
- {
- "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear) (lower number means higher occurrence rate)",
- "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
- "MetricGroup": "Bad;BadSpec;BrMispredicts",
- "MetricName": "tma_info_ipmispredict",
- "MetricThreshold": "tma_info_ipmispredict < 200"
+ "MetricName": "tma_info_inst_mix_ipload",
+ "MetricThreshold": "tma_info_inst_mix_ipload < 3"
},
{
"BriefDescription": "Instructions per Store (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_STORES",
"MetricGroup": "InsType",
- "MetricName": "tma_info_ipstore",
- "MetricThreshold": "tma_info_ipstore < 8"
+ "MetricName": "tma_info_inst_mix_ipstore",
+ "MetricThreshold": "tma_info_inst_mix_ipstore < 8"
},
{
"BriefDescription": "Instruction per taken branch",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN",
"MetricGroup": "Branches;Fed;FetchBW;Frontend;PGO;tma_issueFB",
- "MetricName": "tma_info_iptb",
- "MetricThreshold": "tma_info_iptb < 9",
- "PublicDescription": "Instruction per taken branch. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_dsb_coverage, tma_lcp"
- },
- {
- "BriefDescription": "Instructions per speculative Unknown Branch Misprediction (BAClear) (lower number means higher occurrence rate)",
- "MetricExpr": "tma_info_instructions / BACLEARS.ANY",
- "MetricGroup": "Fed",
- "MetricName": "tma_info_ipunknown_branch"
- },
- {
- "BriefDescription": "Cycles Per Instruction for the Operating System (OS) Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / INST_RETIRED.ANY_P:k",
- "MetricGroup": "OS",
- "MetricName": "tma_info_kernel_cpi"
- },
- {
- "BriefDescription": "Fraction of cycles spent in the Operating System (OS) Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / CPU_CLK_UNHALTED.THREAD",
- "MetricGroup": "OS",
- "MetricName": "tma_info_kernel_utilization",
- "MetricThreshold": "tma_info_kernel_utilization > 0.05"
+ "MetricName": "tma_info_inst_mix_iptb",
+ "MetricThreshold": "tma_info_inst_mix_iptb < 9",
+ "PublicDescription": "Instruction per taken branch. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_frontend_dsb_coverage, tma_lcp"
},
{
"BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
"MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / duration_time",
"MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l1d_cache_fill_bw"
- },
- {
- "BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
- "MetricExpr": "tma_info_l1d_cache_fill_bw",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l1d_cache_fill_bw_1t"
- },
- {
- "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
- "MetricExpr": "1e3 * MEM_LOAD_UOPS_RETIRED.L1_MISS / INST_RETIRED.ANY",
- "MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l1mpki"
+ "MetricName": "tma_info_memory_core_l1d_cache_fill_bw"
},
{
"BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
"MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / duration_time",
"MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l2_cache_fill_bw"
+ "MetricName": "tma_info_memory_core_l2_cache_fill_bw"
},
{
- "BriefDescription": "Average per-thread data fill bandwidth to the L2 cache [GB / sec]",
- "MetricExpr": "tma_info_l2_cache_fill_bw",
+ "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
"MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l2_cache_fill_bw_1t"
+ "MetricName": "tma_info_memory_core_l3_cache_fill_bw"
+ },
+ {
+ "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
+ "MetricExpr": "1e3 * MEM_LOAD_UOPS_RETIRED.L1_MISS / INST_RETIRED.ANY",
+ "MetricGroup": "CacheMisses;Mem",
+ "MetricName": "tma_info_memory_l1mpki"
},
{
"BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)",
"MetricExpr": "1e3 * (L2_RQSTS.REFERENCES - L2_RQSTS.MISS) / INST_RETIRED.ANY",
"MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l2hpki_all"
+ "MetricName": "tma_info_memory_l2hpki_all"
},
{
"BriefDescription": "L2 cache hits per kilo instruction for all demand loads (including speculative)",
"MetricExpr": "1e3 * L2_RQSTS.DEMAND_DATA_RD_HIT / INST_RETIRED.ANY",
"MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l2hpki_load"
+ "MetricName": "tma_info_memory_l2hpki_load"
},
{
"BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads",
"MetricExpr": "1e3 * MEM_LOAD_UOPS_RETIRED.L2_MISS / INST_RETIRED.ANY",
"MetricGroup": "Backend;CacheMisses;Mem",
- "MetricName": "tma_info_l2mpki"
+ "MetricName": "tma_info_memory_l2mpki"
},
{
"BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all request types (including speculative)",
"MetricExpr": "1e3 * L2_RQSTS.MISS / INST_RETIRED.ANY",
"MetricGroup": "CacheMisses;Mem;Offcore",
- "MetricName": "tma_info_l2mpki_all"
+ "MetricName": "tma_info_memory_l2mpki_all"
},
{
"BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all demand loads (including speculative)",
"MetricExpr": "1e3 * L2_RQSTS.DEMAND_DATA_RD_MISS / INST_RETIRED.ANY",
"MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l2mpki_load"
+ "MetricName": "tma_info_memory_l2mpki_load"
},
{
- "BriefDescription": "Average per-thread data access bandwidth to the L3 cache [GB / sec]",
- "MetricExpr": "0",
- "MetricGroup": "Mem;MemoryBW;Offcore",
- "MetricName": "tma_info_l3_cache_access_bw_1t"
+ "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
+ "MetricExpr": "1e3 * MEM_LOAD_UOPS_RETIRED.L3_MISS / INST_RETIRED.ANY",
+ "MetricGroup": "CacheMisses;Mem",
+ "MetricName": "tma_info_memory_l3mpki"
},
{
- "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
- "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l3_cache_fill_bw"
+ "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / (MEM_LOAD_UOPS_RETIRED.L1_MISS + MEM_LOAD_UOPS_RETIRED.HIT_LFB)",
+ "MetricGroup": "Mem;MemoryBound;MemoryLat",
+ "MetricName": "tma_info_memory_load_miss_real_latency"
},
{
- "BriefDescription": "Average per-thread data fill bandwidth to the L3 cache [GB / sec]",
- "MetricExpr": "tma_info_l3_cache_fill_bw",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l3_cache_fill_bw_1t"
+ "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
+ "MetricGroup": "Mem;MemoryBW;MemoryBound",
+ "MetricName": "tma_info_memory_mlp",
+ "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)"
},
{
- "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
- "MetricExpr": "1e3 * MEM_LOAD_UOPS_RETIRED.L3_MISS / INST_RETIRED.ANY",
- "MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l3mpki"
+ "BriefDescription": "Average Parallel L2 cache miss data reads",
+ "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+ "MetricGroup": "Memory_BW;Offcore",
+ "MetricName": "tma_info_memory_oro_data_l2_mlp"
},
{
"BriefDescription": "Average Latency for L2 cache miss demand Loads",
"MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS.DEMAND_DATA_RD",
"MetricGroup": "Memory_Lat;Offcore",
- "MetricName": "tma_info_load_l2_miss_latency"
+ "MetricName": "tma_info_memory_oro_load_l2_miss_latency"
},
{
"BriefDescription": "Average Parallel L2 cache miss demand Loads",
"MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
"MetricGroup": "Memory_BW;Offcore",
- "MetricName": "tma_info_load_l2_mlp"
+ "MetricName": "tma_info_memory_oro_load_l2_mlp"
},
{
- "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
- "MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "L1D_PEND_MISS.PENDING / (MEM_LOAD_UOPS_RETIRED.L1_MISS + MEM_LOAD_UOPS_RETIRED.HIT_LFB)",
- "MetricGroup": "Mem;MemoryBound;MemoryLat",
- "MetricName": "tma_info_load_miss_real_latency"
+ "BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_core_l1d_cache_fill_bw",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_thread_l1d_cache_fill_bw_1t"
},
{
- "BriefDescription": "Average number of parallel requests to external memory",
- "MetricExpr": "UNC_ARB_TRK_OCCUPANCY.ALL / UNC_ARB_TRK_OCCUPANCY.CYCLES_WITH_ANY_REQUEST",
- "MetricGroup": "Mem;SoC",
- "MetricName": "tma_info_mem_parallel_requests",
- "PublicDescription": "Average number of parallel requests to external memory. Accounts for all requests"
+ "BriefDescription": "Average per-thread data fill bandwidth to the L2 cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_core_l2_cache_fill_bw",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_thread_l2_cache_fill_bw_1t"
},
{
- "BriefDescription": "Average latency of all requests to external memory (in Uncore cycles)",
- "MetricExpr": "UNC_ARB_TRK_OCCUPANCY.ALL / UNC_ARB_TRK_REQUESTS.ALL",
- "MetricGroup": "Mem;SoC",
- "MetricName": "tma_info_mem_request_latency"
+ "BriefDescription": "Average per-thread data access bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "0",
+ "MetricGroup": "Mem;MemoryBW;Offcore",
+ "MetricName": "tma_info_memory_thread_l3_cache_access_bw_1t"
},
{
- "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
- "MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
- "MetricGroup": "Mem;MemoryBW;MemoryBound",
- "MetricName": "tma_info_mlp",
- "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)"
+ "BriefDescription": "Average per-thread data fill bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_core_l3_cache_fill_bw",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_thread_l3_cache_fill_bw_1t"
},
{
"BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
- "MetricExpr": "(cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + cpu@DTLB_LOAD_MISSES.WALK_DURATION\\,cmask\\=1@ + cpu@DTLB_STORE_MISSES.WALK_DURATION\\,cmask\\=1@ + 7 * (DTLB_STORE_MISSES.WALK_COMPLETED + DTLB_LOAD_MISSES.WALK_COMPLETED + ITLB_MISSES.WALK_COMPLETED)) / tma_info_core_clks",
+ "MetricExpr": "(cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + cpu@DTLB_LOAD_MISSES.WALK_DURATION\\,cmask\\=1@ + cpu@DTLB_STORE_MISSES.WALK_DURATION\\,cmask\\=1@ + 7 * (DTLB_STORE_MISSES.WALK_COMPLETED + DTLB_LOAD_MISSES.WALK_COMPLETED + ITLB_MISSES.WALK_COMPLETED)) / tma_info_core_core_clks",
"MetricGroup": "Mem;MemoryTLB",
- "MetricName": "tma_info_page_walks_utilization",
- "MetricThreshold": "tma_info_page_walks_utilization > 0.5"
+ "MetricName": "tma_info_memory_tlb_page_walks_utilization",
+ "MetricThreshold": "tma_info_memory_tlb_page_walks_utilization > 0.5"
+ },
+ {
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-thread",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
+ "MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
+ "MetricName": "tma_info_pipeline_execute"
},
{
"BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
"MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / cpu@UOPS_RETIRED.RETIRE_SLOTS\\,cmask\\=1@",
"MetricGroup": "Pipeline;Ret",
- "MetricName": "tma_info_retire"
+ "MetricName": "tma_info_pipeline_retire"
},
{
- "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
- "MetricExpr": "4 * tma_info_core_clks",
- "MetricGroup": "TmaL1;tma_L1_group",
- "MetricName": "tma_info_slots"
+ "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
+ "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / duration_time",
+ "MetricGroup": "Power;Summary",
+ "MetricName": "tma_info_system_average_frequency"
+ },
+ {
+ "BriefDescription": "Average CPU Utilization",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricGroup": "HPC;Summary",
+ "MetricName": "tma_info_system_cpu_utilization"
+ },
+ {
+ "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
+ "MetricExpr": "64 * (UNC_ARB_TRK_REQUESTS.ALL + UNC_ARB_COH_TRK_REQUESTS.ALL) / 1e6 / duration_time / 1e3",
+ "MetricGroup": "HPC;Mem;MemoryBW;SoC;tma_issueBW",
+ "MetricName": "tma_info_system_dram_bw_use",
+ "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_fb_full, tma_mem_bandwidth, tma_sq_full"
+ },
+ {
+ "BriefDescription": "Giga Floating Point Operations Per Second",
+ "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * (FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE) / 1e9 / duration_time",
+ "MetricGroup": "Cor;Flops;HPC",
+ "MetricName": "tma_info_system_gflops",
+ "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width and AMX engine."
+ },
+ {
+ "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
+ "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u",
+ "MetricGroup": "Branches;OS",
+ "MetricName": "tma_info_system_ipfarbranch",
+ "MetricThreshold": "tma_info_system_ipfarbranch < 1e6"
+ },
+ {
+ "BriefDescription": "Cycles Per Instruction for the Operating System (OS) Kernel mode",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / INST_RETIRED.ANY_P:k",
+ "MetricGroup": "OS",
+ "MetricName": "tma_info_system_kernel_cpi"
+ },
+ {
+ "BriefDescription": "Fraction of cycles spent in the Operating System (OS) Kernel mode",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "OS",
+ "MetricName": "tma_info_system_kernel_utilization",
+ "MetricThreshold": "tma_info_system_kernel_utilization > 0.05"
+ },
+ {
+ "BriefDescription": "Average number of parallel requests to external memory",
+ "MetricExpr": "UNC_ARB_TRK_OCCUPANCY.ALL / UNC_ARB_TRK_OCCUPANCY.CYCLES_WITH_ANY_REQUEST",
+ "MetricGroup": "Mem;SoC",
+ "MetricName": "tma_info_system_mem_parallel_requests",
+ "PublicDescription": "Average number of parallel requests to external memory. Accounts for all requests"
+ },
+ {
+ "BriefDescription": "Average latency of all requests to external memory (in Uncore cycles)",
+ "MetricExpr": "UNC_ARB_TRK_OCCUPANCY.ALL / UNC_ARB_TRK_REQUESTS.ALL",
+ "MetricGroup": "Mem;SoC",
+ "MetricName": "tma_info_system_mem_request_latency"
},
{
"BriefDescription": "Fraction of cycles where both hardware Logical Processors were active",
"MetricExpr": "(1 - CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / (CPU_CLK_UNHALTED.REF_XCLK_ANY / 2) if #SMT_on else 0)",
"MetricGroup": "SMT",
- "MetricName": "tma_info_smt_2t_utilization"
+ "MetricName": "tma_info_system_smt_2t_utilization"
},
{
"BriefDescription": "Socket actual clocks when any core is active on that socket",
"MetricExpr": "UNC_CLOCK.SOCKET",
"MetricGroup": "SoC",
- "MetricName": "tma_info_socket_clks"
+ "MetricName": "tma_info_system_socket_clks"
},
{
"BriefDescription": "Average Frequency Utilization relative nominal frequency",
- "MetricExpr": "tma_info_clks / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricExpr": "tma_info_thread_clks / CPU_CLK_UNHALTED.REF_TSC",
"MetricGroup": "Power",
- "MetricName": "tma_info_turbo_utilization"
+ "MetricName": "tma_info_system_turbo_utilization"
+ },
+ {
+ "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "Pipeline",
+ "MetricName": "tma_info_thread_clks"
+ },
+ {
+ "BriefDescription": "Cycles Per Instruction (per Logical Processor)",
+ "MetricExpr": "1 / tma_info_thread_ipc",
+ "MetricGroup": "Mem;Pipeline",
+ "MetricName": "tma_info_thread_cpi"
+ },
+ {
+ "BriefDescription": "The ratio of Executed- by Issued-Uops",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / UOPS_ISSUED.ANY",
+ "MetricGroup": "Cor;Pipeline",
+ "MetricName": "tma_info_thread_execute_per_issue",
+ "PublicDescription": "The ratio of Executed- by Issued-Uops. Ratio > 1 suggests high rate of uop micro-fusions. Ratio < 1 suggest high rate of \"execute\" at rename stage."
+ },
+ {
+ "BriefDescription": "Instructions Per Cycle (per Logical Processor)",
+ "MetricExpr": "INST_RETIRED.ANY / tma_info_thread_clks",
+ "MetricGroup": "Ret;Summary",
+ "MetricName": "tma_info_thread_ipc"
+ },
+ {
+ "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
+ "MetricExpr": "4 * tma_info_core_core_clks",
+ "MetricGroup": "TmaL1;tma_L1_group",
+ "MetricName": "tma_info_thread_slots"
},
{
"BriefDescription": "Uops Per Instruction",
"MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
"MetricGroup": "Pipeline;Ret;Retire",
- "MetricName": "tma_info_uoppi",
- "MetricThreshold": "tma_info_uoppi > 1.05"
+ "MetricName": "tma_info_thread_uoppi",
+ "MetricThreshold": "tma_info_thread_uoppi > 1.05"
},
{
"BriefDescription": "Instruction per taken branch",
"MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / BR_INST_RETIRED.NEAR_TAKEN",
"MetricGroup": "Branches;Fed;FetchBW",
- "MetricName": "tma_info_uptb",
- "MetricThreshold": "tma_info_uptb < 6"
+ "MetricName": "tma_info_thread_uptb",
+ "MetricThreshold": "tma_info_thread_uptb < 6"
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses",
- "MetricExpr": "(14 * ITLB_MISSES.STLB_HIT + cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + 7 * ITLB_MISSES.WALK_COMPLETED) / tma_info_clks",
+ "MetricExpr": "(14 * ITLB_MISSES.STLB_HIT + cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + 7 * ITLB_MISSES.WALK_COMPLETED) / tma_info_thread_clks",
"MetricGroup": "BigFoot;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_itlb_misses",
"MetricThreshold": "tma_itlb_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -785,7 +785,7 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled without loads missing the L1 data cache",
- "MetricExpr": "max((CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS) / tma_info_clks, 0)",
+ "MetricExpr": "max((CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS) / tma_info_thread_clks, 0)",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_issueL1;tma_issueMC;tma_memory_bound_group",
"MetricName": "tma_l1_bound",
"MetricThreshold": "tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -794,7 +794,7 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads",
- "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_clks",
+ "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l2_bound",
"MetricThreshold": "tma_l2_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -804,7 +804,7 @@
{
"BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
"MetricConstraint": "NO_GROUP_EVENTS_SMT",
- "MetricExpr": "MEM_LOAD_UOPS_RETIRED.L3_HIT / (MEM_LOAD_UOPS_RETIRED.L3_HIT + 7 * MEM_LOAD_UOPS_RETIRED.L3_MISS) * CYCLE_ACTIVITY.STALLS_L2_MISS / tma_info_clks",
+ "MetricExpr": "MEM_LOAD_UOPS_RETIRED.L3_HIT / (MEM_LOAD_UOPS_RETIRED.L3_HIT + 7 * MEM_LOAD_UOPS_RETIRED.L3_MISS) * CYCLE_ACTIVITY.STALLS_L2_MISS / tma_info_thread_clks",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l3_bound",
"MetricThreshold": "tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -814,7 +814,7 @@
{
"BriefDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "29 * (MEM_LOAD_UOPS_RETIRED.L3_HIT * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_RETIRED.L3_MISS))) / tma_info_clks",
+ "MetricExpr": "29 * (MEM_LOAD_UOPS_RETIRED.L3_HIT * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_RETIRED.L3_MISS))) / tma_info_thread_clks",
"MetricGroup": "MemoryLat;TopdownL4;tma_L4_group;tma_issueLat;tma_l3_bound_group",
"MetricName": "tma_l3_hit_latency",
"MetricThreshold": "tma_l3_hit_latency > 0.1 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -823,11 +823,11 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs)",
- "MetricExpr": "ILD_STALL.LCP / tma_info_clks",
+ "MetricExpr": "ILD_STALL.LCP / tma_info_thread_clks",
"MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB",
"MetricName": "tma_lcp",
"MetricThreshold": "tma_lcp > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
- "PublicDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs). Using proper compiler flags or Intel Compiler by default will certainly avoid this. #Link: Optimization Guide about LCP BKMs. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_dsb_coverage, tma_info_iptb",
+ "PublicDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs). Using proper compiler flags or Intel Compiler by default will certainly avoid this. #Link: Optimization Guide about LCP BKMs. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb",
"ScaleUnit": "100%"
},
{
@@ -843,7 +843,7 @@
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Load operations",
"MetricConstraint": "NO_GROUP_EVENTS_NMI",
- "MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_2 + UOPS_DISPATCHED_PORT.PORT_3 + UOPS_DISPATCHED_PORT.PORT_7 - UOPS_DISPATCHED_PORT.PORT_4) / (2 * tma_info_core_clks)",
+ "MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_2 + UOPS_DISPATCHED_PORT.PORT_3 + UOPS_DISPATCHED_PORT.PORT_7 - UOPS_DISPATCHED_PORT.PORT_4) / (2 * tma_info_core_core_clks)",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_load_op_utilization",
"MetricThreshold": "tma_load_op_utilization > 0.6",
@@ -853,7 +853,7 @@
{
"BriefDescription": "This metric represents fraction of cycles the CPU spent handling cache misses due to lock operations",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "MEM_UOPS_RETIRED.LOCK_LOADS / MEM_UOPS_RETIRED.ALL_STORES * min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO) / tma_info_clks",
+ "MetricExpr": "MEM_UOPS_RETIRED.LOCK_LOADS / MEM_UOPS_RETIRED.ALL_STORES * min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO) / tma_info_thread_clks",
"MetricGroup": "Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_l1_bound_group",
"MetricName": "tma_lock_latency",
"MetricThreshold": "tma_lock_latency > 0.2 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -873,16 +873,16 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM)",
- "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@) / tma_info_clks",
+ "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@) / tma_info_thread_clks",
"MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
"MetricName": "tma_mem_bandwidth",
"MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_dram_bw_use, tma_sq_full",
+ "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM)",
- "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD) / tma_info_clks - tma_mem_bandwidth",
+ "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD) / tma_info_thread_clks - tma_mem_bandwidth",
"MetricGroup": "MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
"MetricName": "tma_mem_latency",
"MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -892,7 +892,7 @@
{
"BriefDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(CYCLE_ACTIVITY.STALLS_MEM_ANY + RESOURCE_STALLS.SB) / (CYCLE_ACTIVITY.STALLS_TOTAL + UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC - (UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC if tma_info_ipc > 1.8 else UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB) * tma_backend_bound",
+ "MetricExpr": "(CYCLE_ACTIVITY.STALLS_MEM_ANY + RESOURCE_STALLS.SB) / (CYCLE_ACTIVITY.STALLS_TOTAL + UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC - (UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC if tma_info_thread_ipc > 1.8 else UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB) * tma_backend_bound",
"MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_memory_bound",
"MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2",
@@ -902,7 +902,7 @@
},
{
"BriefDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit",
- "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * IDQ.MS_UOPS / tma_info_slots",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * IDQ.MS_UOPS / tma_info_thread_slots",
"MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_heavy_operations_group;tma_issueMC;tma_issueMS",
"MetricName": "tma_microcode_sequencer",
"MetricThreshold": "tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1",
@@ -915,21 +915,21 @@
"MetricGroup": "BadSpec;BrMispredicts;TopdownL4;tma_L4_group;tma_branch_resteers_group;tma_issueBM",
"MetricName": "tma_mispredicts_resteers",
"MetricThreshold": "tma_mispredicts_resteers > 0.05 & (tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15))",
- "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers as a result of Branch Misprediction at execution stage. Related metrics: tma_branch_mispredicts, tma_info_branch_misprediction_cost",
+ "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers as a result of Branch Misprediction at execution stage. Related metrics: tma_branch_mispredicts, tma_info_bad_spec_branch_misprediction_cost",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline)",
- "MetricExpr": "(IDQ.ALL_MITE_CYCLES_ANY_UOPS - IDQ.ALL_MITE_CYCLES_4_UOPS) / tma_info_core_clks / 2",
+ "MetricExpr": "(IDQ.ALL_MITE_CYCLES_ANY_UOPS - IDQ.ALL_MITE_CYCLES_4_UOPS) / tma_info_core_core_clks / 2",
"MetricGroup": "DSBmiss;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_mite",
- "MetricThreshold": "tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35)",
+ "MetricThreshold": "tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35)",
"PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline). This pipeline is used for code that was not pre-cached in the DSB or LSD. For example; inefficiencies due to asymmetric decoders; use of long immediate or LCP can manifest as MITE fetch bandwidth bottleneck.",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric estimates the fraction of cycles when the CPU was stalled due to switches of uop delivery to the Microcode Sequencer (MS)",
- "MetricExpr": "2 * IDQ.MS_SWITCHES / tma_info_clks",
+ "MetricExpr": "2 * IDQ.MS_SWITCHES / tma_info_thread_clks",
"MetricGroup": "FetchLat;MicroSeq;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueMC;tma_issueMS;tma_issueMV;tma_issueSO",
"MetricName": "tma_ms_switches",
"MetricThreshold": "tma_ms_switches > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -938,7 +938,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 0 ([SNB+] ALU; [HSW+] ALU and 2nd branch)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_0 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_0 / tma_info_core_core_clks",
"MetricGroup": "Compute;TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_0",
"MetricThreshold": "tma_port_0 > 0.6",
@@ -947,7 +947,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 1 (ALU)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_1 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_1 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_1",
"MetricThreshold": "tma_port_1 > 0.6",
@@ -956,7 +956,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 2 ([SNB+]Loads and Store-address; [ICL+] Loads)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_2 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_2 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_load_op_utilization_group",
"MetricName": "tma_port_2",
"MetricThreshold": "tma_port_2 > 0.6",
@@ -965,7 +965,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 3 ([SNB+]Loads and Store-address; [ICL+] Loads)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_3 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_3 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_load_op_utilization_group",
"MetricName": "tma_port_3",
"MetricThreshold": "tma_port_3 > 0.6",
@@ -983,7 +983,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 5 ([SNB+] Branches and ALU; [HSW+] ALU)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_5 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_5 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_5",
"MetricThreshold": "tma_port_5 > 0.6",
@@ -992,7 +992,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+]Primary Branch and simple ALU)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_6 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_6 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_6",
"MetricThreshold": "tma_port_6 > 0.6",
@@ -1001,7 +1001,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 7 ([HSW+]simple Store-address)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_7 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_7 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_store_op_utilization_group",
"MetricName": "tma_port_7",
"MetricThreshold": "tma_port_7 > 0.6",
@@ -1011,7 +1011,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles the CPU performance was potentially limited due to Core computation issues (non divider-related)",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(CYCLE_ACTIVITY.STALLS_TOTAL + UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC - (UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC if tma_info_ipc > 1.8 else UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB - RESOURCE_STALLS.SB - CYCLE_ACTIVITY.STALLS_MEM_ANY) / tma_info_clks",
+ "MetricExpr": "(CYCLE_ACTIVITY.STALLS_TOTAL + UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC - (UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC if tma_info_thread_ipc > 1.8 else UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB - RESOURCE_STALLS.SB - CYCLE_ACTIVITY.STALLS_MEM_ANY) / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group",
"MetricName": "tma_ports_utilization",
"MetricThreshold": "tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
@@ -1020,7 +1020,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed no uops on any execution port (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,inv\\,cmask\\=1@ / 2 if #SMT_on else (CYCLE_ACTIVITY.STALLS_TOTAL - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0)) / tma_info_core_clks)",
+ "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,inv\\,cmask\\=1@ / 2 if #SMT_on else (CYCLE_ACTIVITY.STALLS_TOTAL - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0)) / tma_info_core_core_clks)",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_0",
"MetricThreshold": "tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1029,7 +1029,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where the CPU executed total of 1 uop per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) / 2 if #SMT_on else (UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC - UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC) / tma_info_core_clks)",
+ "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) / 2 if #SMT_on else (UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC - UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC) / tma_info_core_core_clks)",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issueL1;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_1",
"MetricThreshold": "tma_ports_utilized_1 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1038,7 +1038,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / 2 if #SMT_on else (UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC - UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC) / tma_info_core_clks)",
+ "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / 2 if #SMT_on else (UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC - UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC) / tma_info_core_core_clks)",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_2",
"MetricThreshold": "tma_ports_utilized_2 > 0.15 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1047,7 +1047,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 3 or more uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise).",
- "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC) / tma_info_core_clks",
+ "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_3m",
"MetricThreshold": "tma_ports_utilized_3m > 0.7 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1055,7 +1055,7 @@
},
{
"BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
- "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / tma_info_slots",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / tma_info_thread_slots",
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1",
@@ -1066,7 +1066,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles handling memory load split accesses - load that cross 64-byte cache line boundary",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "tma_info_load_miss_real_latency * LD_BLOCKS.NO_SR / tma_info_clks",
+ "MetricExpr": "tma_info_memory_load_miss_real_latency * LD_BLOCKS.NO_SR / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_split_loads",
"MetricThreshold": "tma_split_loads > 0.2 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -1075,7 +1075,7 @@
},
{
"BriefDescription": "This metric represents rate of split store accesses",
- "MetricExpr": "2 * MEM_UOPS_RETIRED.SPLIT_STORES / tma_info_core_clks",
+ "MetricExpr": "2 * MEM_UOPS_RETIRED.SPLIT_STORES / tma_info_core_core_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group",
"MetricName": "tma_split_stores",
"MetricThreshold": "tma_split_stores > 0.2 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -1084,16 +1084,16 @@
},
{
"BriefDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors)",
- "MetricExpr": "(OFFCORE_REQUESTS_BUFFER.SQ_FULL / 2 if #SMT_on else OFFCORE_REQUESTS_BUFFER.SQ_FULL) / tma_info_core_clks",
+ "MetricExpr": "(OFFCORE_REQUESTS_BUFFER.SQ_FULL / 2 if #SMT_on else OFFCORE_REQUESTS_BUFFER.SQ_FULL) / tma_info_core_core_clks",
"MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueBW;tma_l3_bound_group",
"MetricName": "tma_sq_full",
"MetricThreshold": "tma_sq_full > 0.3 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_fb_full, tma_info_dram_bw_use, tma_mem_bandwidth",
+ "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric estimates how often CPU was stalled due to RFO store memory accesses; RFO store issue a read-for-ownership request before the write",
- "MetricExpr": "RESOURCE_STALLS.SB / tma_info_clks",
+ "MetricExpr": "RESOURCE_STALLS.SB / tma_info_thread_clks",
"MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_store_bound",
"MetricThreshold": "tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -1102,7 +1102,7 @@
},
{
"BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores",
- "MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_clks",
+ "MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_store_fwd_blk",
"MetricThreshold": "tma_store_fwd_blk > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -1112,7 +1112,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles the CPU spent handling L1D store misses",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(L2_RQSTS.RFO_HIT * 9 * (1 - MEM_UOPS_RETIRED.LOCK_LOADS / MEM_UOPS_RETIRED.ALL_STORES) + (1 - MEM_UOPS_RETIRED.LOCK_LOADS / MEM_UOPS_RETIRED.ALL_STORES) * min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO)) / tma_info_clks",
+ "MetricExpr": "(L2_RQSTS.RFO_HIT * 9 * (1 - MEM_UOPS_RETIRED.LOCK_LOADS / MEM_UOPS_RETIRED.ALL_STORES) + (1 - MEM_UOPS_RETIRED.LOCK_LOADS / MEM_UOPS_RETIRED.ALL_STORES) * min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO)) / tma_info_thread_clks",
"MetricGroup": "MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_issueSL;tma_store_bound_group",
"MetricName": "tma_store_latency",
"MetricThreshold": "tma_store_latency > 0.1 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -1121,7 +1121,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Store operations",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_4 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_4 / tma_info_core_core_clks",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_store_op_utilization",
"MetricThreshold": "tma_store_op_utilization > 0.6",
@@ -1138,7 +1138,7 @@
},
{
"BriefDescription": "This metric serves as an approximation of legacy x87 usage",
- "MetricExpr": "INST_RETIRED.X87 * tma_info_uoppi / UOPS_RETIRED.RETIRE_SLOTS",
+ "MetricExpr": "INST_RETIRED.X87 * tma_info_thread_uoppi / UOPS_RETIRED.RETIRE_SLOTS",
"MetricGroup": "Compute;TopdownL4;tma_L4_group;tma_fp_arith_group",
"MetricName": "tma_x87_use",
"MetricThreshold": "tma_x87_use > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6)",
diff --git a/tools/perf/pmu-events/arch/x86/broadwell/floating-point.json b/tools/perf/pmu-events/arch/x86/broadwell/floating-point.json
index e4826dc7f797..986869252e71 100644
--- a/tools/perf/pmu-events/arch/x86/broadwell/floating-point.json
+++ b/tools/perf/pmu-events/arch/x86/broadwell/floating-point.json
@@ -32,6 +32,14 @@
"UMask": "0x20"
},
{
+ "BriefDescription": "Number of SSE/AVX computational 128-bit packed single and 256-bit packed double precision FP instructions retired; some instructions will count twice as noted below. Each count represents 2 or/and 4 computation operations, 1 for each element. Applies to SSE* and AVX* packed single precision and packed double precision FP instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB count twice as they perform 2 calculations per element.",
+ "EventCode": "0xc7",
+ "EventName": "FP_ARITH_INST_RETIRED.4_FLOPS",
+ "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision and 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 or/and 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point and packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x18"
+ },
+ {
"BriefDescription": "Number of SSE/AVX computational double precision floating-point instructions retired; some instructions will count twice as noted below. Applies to SSE* and AVX* scalar and packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.DOUBLE",
@@ -77,6 +85,13 @@
"UMask": "0x2a"
},
{
+ "BriefDescription": "Number of any Vector retired FP arithmetic instructions",
+ "EventCode": "0xc7",
+ "EventName": "FP_ARITH_INST_RETIRED.VECTOR",
+ "SampleAfterValue": "2000003",
+ "UMask": "0xfc"
+ },
+ {
"BriefDescription": "Cycles with any input/output SSE or FP assist",
"CounterMask": "1",
"EventCode": "0xCA",
diff --git a/tools/perf/pmu-events/arch/x86/broadwell/metricgroups.json b/tools/perf/pmu-events/arch/x86/broadwell/metricgroups.json
new file mode 100644
index 000000000000..f6a0258e3241
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/broadwell/metricgroups.json
@@ -0,0 +1,107 @@
+{
+ "Backend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Bad": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BadSpec": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BigFoot": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BrMispredicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Branches": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "CacheMisses": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Compute": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Cor": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "DSB": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "DSBmiss": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "DataSharing": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Fed": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FetchBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FetchLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Flops": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FpScalar": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FpVector": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Frontend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "HPC": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "IcMiss": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "InsType": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "L2Evicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "LSD": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MachineClears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryBound": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryTLB": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Memory_BW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Memory_Lat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MicroSeq": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "OS": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Offcore": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "PGO": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Pipeline": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "PortsUtil": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Power": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Ret": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Retire": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "SMT": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Server": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Snoop": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "SoC": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Summary": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TmaL1": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TmaL2": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TmaL3mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TopdownL1": "Metrics for top-down breakdown at level 1",
+ "TopdownL2": "Metrics for top-down breakdown at level 2",
+ "TopdownL3": "Metrics for top-down breakdown at level 3",
+ "TopdownL4": "Metrics for top-down breakdown at level 4",
+ "TopdownL5": "Metrics for top-down breakdown at level 5",
+ "TopdownL6": "Metrics for top-down breakdown at level 6",
+ "tma_L1_group": "Metrics for top-down breakdown at level 1",
+ "tma_L2_group": "Metrics for top-down breakdown at level 2",
+ "tma_L3_group": "Metrics for top-down breakdown at level 3",
+ "tma_L4_group": "Metrics for top-down breakdown at level 4",
+ "tma_L5_group": "Metrics for top-down breakdown at level 5",
+ "tma_L6_group": "Metrics for top-down breakdown at level 6",
+ "tma_alu_op_utilization_group": "Metrics contributing to tma_alu_op_utilization category",
+ "tma_backend_bound_group": "Metrics contributing to tma_backend_bound category",
+ "tma_bad_speculation_group": "Metrics contributing to tma_bad_speculation category",
+ "tma_branch_resteers_group": "Metrics contributing to tma_branch_resteers category",
+ "tma_core_bound_group": "Metrics contributing to tma_core_bound category",
+ "tma_dram_bound_group": "Metrics contributing to tma_dram_bound category",
+ "tma_dtlb_load_group": "Metrics contributing to tma_dtlb_load category",
+ "tma_dtlb_store_group": "Metrics contributing to tma_dtlb_store category",
+ "tma_fetch_bandwidth_group": "Metrics contributing to tma_fetch_bandwidth category",
+ "tma_fetch_latency_group": "Metrics contributing to tma_fetch_latency category",
+ "tma_fp_arith_group": "Metrics contributing to tma_fp_arith category",
+ "tma_fp_vector_group": "Metrics contributing to tma_fp_vector category",
+ "tma_frontend_bound_group": "Metrics contributing to tma_frontend_bound category",
+ "tma_heavy_operations_group": "Metrics contributing to tma_heavy_operations category",
+ "tma_issue2P": "Metrics related by the issue $issue2P",
+ "tma_issueBM": "Metrics related by the issue $issueBM",
+ "tma_issueBW": "Metrics related by the issue $issueBW",
+ "tma_issueFB": "Metrics related by the issue $issueFB",
+ "tma_issueL1": "Metrics related by the issue $issueL1",
+ "tma_issueLat": "Metrics related by the issue $issueLat",
+ "tma_issueMC": "Metrics related by the issue $issueMC",
+ "tma_issueMS": "Metrics related by the issue $issueMS",
+ "tma_issueMV": "Metrics related by the issue $issueMV",
+ "tma_issueRFO": "Metrics related by the issue $issueRFO",
+ "tma_issueSL": "Metrics related by the issue $issueSL",
+ "tma_issueSO": "Metrics related by the issue $issueSO",
+ "tma_issueSmSt": "Metrics related by the issue $issueSmSt",
+ "tma_issueSpSt": "Metrics related by the issue $issueSpSt",
+ "tma_issueSyncxn": "Metrics related by the issue $issueSyncxn",
+ "tma_issueTLB": "Metrics related by the issue $issueTLB",
+ "tma_l1_bound_group": "Metrics contributing to tma_l1_bound category",
+ "tma_l3_bound_group": "Metrics contributing to tma_l3_bound category",
+ "tma_light_operations_group": "Metrics contributing to tma_light_operations category",
+ "tma_load_op_utilization_group": "Metrics contributing to tma_load_op_utilization category",
+ "tma_mem_latency_group": "Metrics contributing to tma_mem_latency category",
+ "tma_memory_bound_group": "Metrics contributing to tma_memory_bound category",
+ "tma_microcode_sequencer_group": "Metrics contributing to tma_microcode_sequencer category",
+ "tma_ports_utilization_group": "Metrics contributing to tma_ports_utilization category",
+ "tma_ports_utilized_0_group": "Metrics contributing to tma_ports_utilized_0 category",
+ "tma_ports_utilized_3m_group": "Metrics contributing to tma_ports_utilized_3m category",
+ "tma_retiring_group": "Metrics contributing to tma_retiring category",
+ "tma_serializing_operation_group": "Metrics contributing to tma_serializing_operation category",
+ "tma_store_bound_group": "Metrics contributing to tma_store_bound category",
+ "tma_store_op_utilization_group": "Metrics contributing to tma_store_op_utilization category"
+}
diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json b/tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json
index e9c46d336a8e..8fc62b8f667d 100644
--- a/tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json
@@ -65,7 +65,7 @@
},
{
"BriefDescription": "This metric estimates how often memory load accesses were aliased by preceding stores (in program order) with a 4K address offset",
- "MetricExpr": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS / tma_info_clks",
+ "MetricExpr": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_4k_aliasing",
"MetricThreshold": "tma_4k_aliasing > 0.2 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -75,7 +75,7 @@
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution ports for ALU operations.",
"MetricConstraint": "NO_GROUP_EVENTS_NMI",
- "MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_0 + UOPS_DISPATCHED_PORT.PORT_1 + UOPS_DISPATCHED_PORT.PORT_5 + UOPS_DISPATCHED_PORT.PORT_6) / tma_info_slots",
+ "MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_0 + UOPS_DISPATCHED_PORT.PORT_1 + UOPS_DISPATCHED_PORT.PORT_5 + UOPS_DISPATCHED_PORT.PORT_6) / tma_info_thread_slots",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_alu_op_utilization",
"MetricThreshold": "tma_alu_op_utilization > 0.6",
@@ -83,7 +83,7 @@
},
{
"BriefDescription": "This metric estimates fraction of slots the CPU retired uops delivered by the Microcode_Sequencer as a result of Assists",
- "MetricExpr": "100 * OTHER_ASSISTS.ANY_WB_ASSIST / tma_info_slots",
+ "MetricExpr": "100 * OTHER_ASSISTS.ANY_WB_ASSIST / tma_info_thread_slots",
"MetricGroup": "TopdownL4;tma_L4_group;tma_microcode_sequencer_group",
"MetricName": "tma_assists",
"MetricThreshold": "tma_assists > 0.1 & (tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1)",
@@ -103,7 +103,7 @@
},
{
"BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations",
- "MetricExpr": "(UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (INT_MISC.RECOVERY_CYCLES_ANY / 2 if #SMT_on else INT_MISC.RECOVERY_CYCLES)) / tma_info_slots",
+ "MetricExpr": "(UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (INT_MISC.RECOVERY_CYCLES_ANY / 2 if #SMT_on else INT_MISC.RECOVERY_CYCLES)) / tma_info_thread_slots",
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_bad_speculation",
"MetricThreshold": "tma_bad_speculation > 0.15",
@@ -119,12 +119,12 @@
"MetricName": "tma_branch_mispredicts",
"MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15",
"MetricgroupNoGroup": "TopdownL2",
- "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: TOPDOWN.BR_MISPREDICT_SLOTS. Related metrics: tma_info_branch_misprediction_cost, tma_mispredicts_resteers",
+ "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: TOPDOWN.BR_MISPREDICT_SLOTS. Related metrics: tma_info_bad_spec_branch_misprediction_cost, tma_mispredicts_resteers",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers",
- "MetricExpr": "12 * (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT + BACLEARS.ANY) / tma_info_clks",
+ "MetricExpr": "12 * (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT + BACLEARS.ANY) / tma_info_thread_clks",
"MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_branch_resteers",
"MetricThreshold": "tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -153,7 +153,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to contested accesses",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(60 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_RETIRED.L3_MISS))) + 43 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_RETIRED.L3_MISS)))) / tma_info_clks",
+ "MetricExpr": "(60 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_RETIRED.L3_MISS))) + 43 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_RETIRED.L3_MISS)))) / tma_info_thread_clks",
"MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
"MetricName": "tma_contested_accesses",
"MetricThreshold": "tma_contested_accesses > 0.05 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -174,7 +174,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to data-sharing accesses",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "43 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_RETIRED.L3_MISS))) / tma_info_clks",
+ "MetricExpr": "43 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_RETIRED.L3_MISS))) / tma_info_thread_clks",
"MetricGroup": "Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
"MetricName": "tma_data_sharing",
"MetricThreshold": "tma_data_sharing > 0.05 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -183,7 +183,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where the Divider unit was active",
- "MetricExpr": "ARITH.FPU_DIV_ACTIVE / tma_info_core_clks",
+ "MetricExpr": "ARITH.FPU_DIV_ACTIVE / tma_info_core_core_clks",
"MetricGroup": "TopdownL3;tma_L3_group;tma_core_bound_group",
"MetricName": "tma_divider",
"MetricThreshold": "tma_divider > 0.2 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
@@ -193,7 +193,7 @@
{
"BriefDescription": "This metric estimates how often the CPU was stalled on accesses to external memory (DRAM) by loads",
"MetricConstraint": "NO_GROUP_EVENTS_SMT",
- "MetricExpr": "(1 - MEM_LOAD_UOPS_RETIRED.L3_HIT / (MEM_LOAD_UOPS_RETIRED.L3_HIT + 7 * MEM_LOAD_UOPS_RETIRED.L3_MISS)) * CYCLE_ACTIVITY.STALLS_L2_MISS / tma_info_clks",
+ "MetricExpr": "(1 - MEM_LOAD_UOPS_RETIRED.L3_HIT / (MEM_LOAD_UOPS_RETIRED.L3_HIT + 7 * MEM_LOAD_UOPS_RETIRED.L3_MISS)) * CYCLE_ACTIVITY.STALLS_L2_MISS / tma_info_thread_clks",
"MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_dram_bound",
"MetricThreshold": "tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -202,25 +202,25 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline",
- "MetricExpr": "(IDQ.ALL_DSB_CYCLES_ANY_UOPS - IDQ.ALL_DSB_CYCLES_4_UOPS) / tma_info_core_clks / 2",
+ "MetricExpr": "(IDQ.ALL_DSB_CYCLES_ANY_UOPS - IDQ.ALL_DSB_CYCLES_4_UOPS) / tma_info_core_core_clks / 2",
"MetricGroup": "DSB;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_dsb",
- "MetricThreshold": "tma_dsb > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35)",
+ "MetricThreshold": "tma_dsb > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35)",
"PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline. For example; inefficient utilization of the DSB cache structure or bank conflict when reading from it; are categorized here.",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines",
- "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / tma_info_clks",
+ "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / tma_info_thread_clks",
"MetricGroup": "DSBmiss;FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB",
"MetricName": "tma_dsb_switches",
"MetricThreshold": "tma_dsb_switches > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
- "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines. The DSB (decoded i-cache) is a Uop Cache where the front-end directly delivers Uops (micro operations) avoiding heavy x86 decoding. The DSB pipeline has shorter latency and delivered higher bandwidth than the MITE (legacy instruction decode pipeline). Switching between the two pipelines can cause penalties hence this metric measures the exposed penalty. Sample with: FRONTEND_RETIRED.DSB_MISS_PS. Related metrics: tma_fetch_bandwidth, tma_info_dsb_coverage, tma_info_iptb, tma_lcp",
+ "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines. The DSB (decoded i-cache) is a Uop Cache where the front-end directly delivers Uops (micro operations) avoiding heavy x86 decoding. The DSB pipeline has shorter latency and delivered higher bandwidth than the MITE (legacy instruction decode pipeline). Switching between the two pipelines can cause penalties hence this metric measures the exposed penalty. Sample with: FRONTEND_RETIRED.DSB_MISS_PS. Related metrics: tma_fetch_bandwidth, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses",
- "MetricExpr": "(8 * DTLB_LOAD_MISSES.STLB_HIT + cpu@DTLB_LOAD_MISSES.WALK_DURATION\\,cmask\\=1@ + 7 * DTLB_LOAD_MISSES.WALK_COMPLETED) / tma_info_clks",
+ "MetricExpr": "(8 * DTLB_LOAD_MISSES.STLB_HIT + cpu@DTLB_LOAD_MISSES.WALK_DURATION\\,cmask\\=1@ + 7 * DTLB_LOAD_MISSES.WALK_COMPLETED) / tma_info_thread_clks",
"MetricGroup": "MemoryTLB;TopdownL4;tma_L4_group;tma_issueTLB;tma_l1_bound_group",
"MetricName": "tma_dtlb_load",
"MetricThreshold": "tma_dtlb_load > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -229,7 +229,7 @@
},
{
"BriefDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses",
- "MetricExpr": "(8 * DTLB_STORE_MISSES.STLB_HIT + cpu@DTLB_STORE_MISSES.WALK_DURATION\\,cmask\\=1@ + 7 * DTLB_STORE_MISSES.WALK_COMPLETED) / tma_info_clks",
+ "MetricExpr": "(8 * DTLB_STORE_MISSES.STLB_HIT + cpu@DTLB_STORE_MISSES.WALK_DURATION\\,cmask\\=1@ + 7 * DTLB_STORE_MISSES.WALK_COMPLETED) / tma_info_thread_clks",
"MetricGroup": "MemoryTLB;TopdownL4;tma_L4_group;tma_issueTLB;tma_store_bound_group",
"MetricName": "tma_dtlb_store",
"MetricThreshold": "tma_dtlb_store > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -239,11 +239,11 @@
{
"BriefDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "tma_info_load_miss_real_latency * cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ / tma_info_clks",
+ "MetricExpr": "tma_info_memory_load_miss_real_latency * cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ / tma_info_thread_clks",
"MetricGroup": "MemoryBW;TopdownL4;tma_L4_group;tma_issueBW;tma_issueSL;tma_issueSmSt;tma_l1_bound_group",
"MetricName": "tma_fb_full",
"MetricThreshold": "tma_fb_full > 0.3",
- "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_info_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
+ "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
"ScaleUnit": "100%"
},
{
@@ -251,14 +251,14 @@
"MetricExpr": "tma_frontend_bound - tma_fetch_latency",
"MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB",
"MetricName": "tma_fetch_bandwidth",
- "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35",
+ "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35",
"MetricgroupNoGroup": "TopdownL2",
- "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Sample with: FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_2_PS. Related metrics: tma_dsb_switches, tma_info_dsb_coverage, tma_info_iptb, tma_lcp",
+ "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Sample with: FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_2_PS. Related metrics: tma_dsb_switches, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues",
- "MetricExpr": "4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / tma_info_slots",
+ "MetricExpr": "4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / tma_info_thread_slots",
"MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group",
"MetricName": "tma_fetch_latency",
"MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15",
@@ -313,7 +313,7 @@
},
{
"BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend",
- "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / tma_info_slots",
+ "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / tma_info_thread_slots",
"MetricGroup": "PGO;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_frontend_bound",
"MetricThreshold": "tma_frontend_bound > 0.15",
@@ -333,7 +333,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to instruction cache misses",
- "MetricExpr": "ICACHE.IFDATA_STALL / tma_info_clks",
+ "MetricExpr": "ICACHE.IFDATA_STALL / tma_info_thread_clks",
"MetricGroup": "BigFoot;FetchLat;IcMiss;TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_icache_misses",
"MetricThreshold": "tma_icache_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -341,409 +341,409 @@
"ScaleUnit": "100%"
},
{
- "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
- "MetricExpr": "tma_info_turbo_utilization * TSC / 1e9 / duration_time",
- "MetricGroup": "Power;Summary",
- "MetricName": "tma_info_average_frequency"
- },
- {
- "BriefDescription": "Branch instructions per taken branch.",
- "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
- "MetricGroup": "Branches;Fed;PGO",
- "MetricName": "tma_info_bptkbranch"
- },
- {
"BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
- "MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_slots / BR_MISP_RETIRED.ALL_BRANCHES",
+ "MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_thread_slots / BR_MISP_RETIRED.ALL_BRANCHES",
"MetricGroup": "Bad;BrMispredicts;tma_issueBM",
- "MetricName": "tma_info_branch_misprediction_cost",
+ "MetricName": "tma_info_bad_spec_branch_misprediction_cost",
"PublicDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear). Related metrics: tma_branch_mispredicts, tma_mispredicts_resteers"
},
{
- "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
- "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
- "MetricGroup": "Pipeline",
- "MetricName": "tma_info_clks"
+ "BriefDescription": "Instructions per retired mispredicts for indirect CALL or JMP branches (lower number means higher occurrence rate).",
+ "MetricExpr": "tma_info_inst_mix_instructions / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * cpu@BR_MISP_EXEC.ALL_BRANCHES\\,umask\\=0xE4@)",
+ "MetricGroup": "Bad;BrMispredicts",
+ "MetricName": "tma_info_bad_spec_ipmisp_indirect",
+ "MetricThreshold": "tma_info_bad_spec_ipmisp_indirect < 1e3"
+ },
+ {
+ "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear) (lower number means higher occurrence rate)",
+ "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;BadSpec;BrMispredicts",
+ "MetricName": "tma_info_bad_spec_ipmispredict",
+ "MetricThreshold": "tma_info_bad_spec_ipmispredict < 200"
},
{
"BriefDescription": "Core actual clocks when any Logical Processor is active on the Physical Core",
- "MetricExpr": "(CPU_CLK_UNHALTED.THREAD / 2 * (1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK) if #core_wide < 1 else (CPU_CLK_UNHALTED.THREAD_ANY / 2 if #SMT_on else tma_info_clks))",
+ "MetricExpr": "(CPU_CLK_UNHALTED.THREAD / 2 * (1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK) if #core_wide < 1 else (CPU_CLK_UNHALTED.THREAD_ANY / 2 if #SMT_on else tma_info_thread_clks))",
"MetricGroup": "SMT",
- "MetricName": "tma_info_core_clks"
+ "MetricName": "tma_info_core_core_clks"
},
{
"BriefDescription": "Instructions Per Cycle across hyper-threads (per physical core)",
- "MetricExpr": "INST_RETIRED.ANY / tma_info_core_clks",
+ "MetricExpr": "INST_RETIRED.ANY / tma_info_core_core_clks",
"MetricGroup": "Ret;SMT;TmaL1;tma_L1_group",
- "MetricName": "tma_info_coreipc"
- },
- {
- "BriefDescription": "Cycles Per Instruction (per Logical Processor)",
- "MetricExpr": "1 / tma_info_ipc",
- "MetricGroup": "Mem;Pipeline",
- "MetricName": "tma_info_cpi"
+ "MetricName": "tma_info_core_coreipc"
},
{
- "BriefDescription": "Average CPU Utilization",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
- "MetricGroup": "HPC;Summary",
- "MetricName": "tma_info_cpu_utilization"
+ "BriefDescription": "Floating Point Operations Per Cycle",
+ "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * (FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE) / tma_info_core_core_clks",
+ "MetricGroup": "Flops;Ret",
+ "MetricName": "tma_info_core_flopc"
},
{
- "BriefDescription": "Average Parallel L2 cache miss data reads",
- "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
- "MetricGroup": "Memory_BW;Offcore",
- "MetricName": "tma_info_data_l2_mlp"
+ "BriefDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width)",
+ "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0x3c@) / (2 * tma_info_core_core_clks)",
+ "MetricGroup": "Cor;Flops;HPC",
+ "MetricName": "tma_info_core_fp_arith_utilization",
+ "PublicDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width). Values > 1 are possible due to ([BDW+] Fused-Multiply Add (FMA) counting - common; [ADL+] use all of ADD/MUL/FMA in Scalar or 128/256-bit vectors - less common)."
},
{
- "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
- "MetricExpr": "64 * (arb@event\\=0x81\\,umask\\=0x1@ + arb@event\\=0x84\\,umask\\=0x1@) / 1e6 / duration_time / 1e3",
- "MetricGroup": "HPC;Mem;MemoryBW;SoC;tma_issueBW",
- "MetricName": "tma_info_dram_bw_use",
- "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_fb_full, tma_mem_bandwidth, tma_sq_full"
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
+ "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
+ "MetricName": "tma_info_core_ilp"
},
{
"BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
"MetricExpr": "IDQ.DSB_UOPS / (IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)",
"MetricGroup": "DSB;Fed;FetchBW;tma_issueFB",
- "MetricName": "tma_info_dsb_coverage",
- "MetricThreshold": "tma_info_dsb_coverage < 0.7 & tma_info_ipc / 4 > 0.35",
- "PublicDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache). Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_iptb, tma_lcp"
+ "MetricName": "tma_info_frontend_dsb_coverage",
+ "MetricThreshold": "tma_info_frontend_dsb_coverage < 0.7 & tma_info_thread_ipc / 4 > 0.35",
+ "PublicDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache). Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_inst_mix_iptb, tma_lcp"
},
{
- "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-thread",
- "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
- "MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
- "MetricName": "tma_info_execute"
- },
- {
- "BriefDescription": "The ratio of Executed- by Issued-Uops",
- "MetricExpr": "UOPS_EXECUTED.THREAD / UOPS_ISSUED.ANY",
- "MetricGroup": "Cor;Pipeline",
- "MetricName": "tma_info_execute_per_issue",
- "PublicDescription": "The ratio of Executed- by Issued-Uops. Ratio > 1 suggests high rate of uop micro-fusions. Ratio < 1 suggest high rate of \"execute\" at rename stage."
- },
- {
- "BriefDescription": "Floating Point Operations Per Cycle",
- "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * (FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE) / tma_info_core_clks",
- "MetricGroup": "Flops;Ret",
- "MetricName": "tma_info_flopc"
- },
- {
- "BriefDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width)",
- "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0x3c@) / (2 * tma_info_core_clks)",
- "MetricGroup": "Cor;Flops;HPC",
- "MetricName": "tma_info_fp_arith_utilization",
- "PublicDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width). Values > 1 are possible due to ([BDW+] Fused-Multiply Add (FMA) counting - common; [ADL+] use all of ADD/MUL/FMA in Scalar or 128/256-bit vectors - less common)."
- },
- {
- "BriefDescription": "Giga Floating Point Operations Per Second",
- "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * (FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE) / 1e9 / duration_time",
- "MetricGroup": "Cor;Flops;HPC",
- "MetricName": "tma_info_gflops",
- "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width and AMX engine."
+ "BriefDescription": "Instructions per speculative Unknown Branch Misprediction (BAClear) (lower number means higher occurrence rate)",
+ "MetricExpr": "tma_info_inst_mix_instructions / BACLEARS.ANY",
+ "MetricGroup": "Fed",
+ "MetricName": "tma_info_frontend_ipunknown_branch"
},
{
- "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
- "MetricExpr": "UOPS_EXECUTED.THREAD / (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
- "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
- "MetricName": "tma_info_ilp"
+ "BriefDescription": "Branch instructions per taken branch.",
+ "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
+ "MetricGroup": "Branches;Fed;PGO",
+ "MetricName": "tma_info_inst_mix_bptkbranch"
},
{
"BriefDescription": "Total number of retired Instructions",
"MetricExpr": "INST_RETIRED.ANY",
"MetricGroup": "Summary;TmaL1;tma_L1_group",
- "MetricName": "tma_info_instructions",
+ "MetricName": "tma_info_inst_mix_instructions",
"PublicDescription": "Total number of retired Instructions. Sample with: INST_RETIRED.PREC_DIST"
},
{
"BriefDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / (cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0x3c@)",
"MetricGroup": "Flops;InsType",
- "MetricName": "tma_info_iparith",
- "MetricThreshold": "tma_info_iparith < 10",
+ "MetricName": "tma_info_inst_mix_iparith",
+ "MetricThreshold": "tma_info_inst_mix_iparith < 10",
"PublicDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate). May undercount due to FMA double counting. Approximated prior to BDW."
},
{
"BriefDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE)",
"MetricGroup": "Flops;FpVector;InsType",
- "MetricName": "tma_info_iparith_avx128",
- "MetricThreshold": "tma_info_iparith_avx128 < 10",
+ "MetricName": "tma_info_inst_mix_iparith_avx128",
+ "MetricThreshold": "tma_info_inst_mix_iparith_avx128 < 10",
"PublicDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
},
{
"BriefDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE)",
"MetricGroup": "Flops;FpVector;InsType",
- "MetricName": "tma_info_iparith_avx256",
- "MetricThreshold": "tma_info_iparith_avx256 < 10",
+ "MetricName": "tma_info_inst_mix_iparith_avx256",
+ "MetricThreshold": "tma_info_inst_mix_iparith_avx256 < 10",
"PublicDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
},
{
"BriefDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
"MetricGroup": "Flops;FpScalar;InsType",
- "MetricName": "tma_info_iparith_scalar_dp",
- "MetricThreshold": "tma_info_iparith_scalar_dp < 10",
+ "MetricName": "tma_info_inst_mix_iparith_scalar_dp",
+ "MetricThreshold": "tma_info_inst_mix_iparith_scalar_dp < 10",
"PublicDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
},
{
"BriefDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
"MetricGroup": "Flops;FpScalar;InsType",
- "MetricName": "tma_info_iparith_scalar_sp",
- "MetricThreshold": "tma_info_iparith_scalar_sp < 10",
+ "MetricName": "tma_info_inst_mix_iparith_scalar_sp",
+ "MetricThreshold": "tma_info_inst_mix_iparith_scalar_sp < 10",
"PublicDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
},
{
"BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
"MetricGroup": "Branches;Fed;InsType",
- "MetricName": "tma_info_ipbranch",
- "MetricThreshold": "tma_info_ipbranch < 8"
- },
- {
- "BriefDescription": "Instructions Per Cycle (per Logical Processor)",
- "MetricExpr": "INST_RETIRED.ANY / tma_info_clks",
- "MetricGroup": "Ret;Summary",
- "MetricName": "tma_info_ipc"
+ "MetricName": "tma_info_inst_mix_ipbranch",
+ "MetricThreshold": "tma_info_inst_mix_ipbranch < 8"
},
{
"BriefDescription": "Instructions per (near) call (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL",
"MetricGroup": "Branches;Fed;PGO",
- "MetricName": "tma_info_ipcall",
- "MetricThreshold": "tma_info_ipcall < 200"
- },
- {
- "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
- "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u",
- "MetricGroup": "Branches;OS",
- "MetricName": "tma_info_ipfarbranch",
- "MetricThreshold": "tma_info_ipfarbranch < 1e6"
+ "MetricName": "tma_info_inst_mix_ipcall",
+ "MetricThreshold": "tma_info_inst_mix_ipcall < 200"
},
{
"BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * (FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE)",
"MetricGroup": "Flops;InsType",
- "MetricName": "tma_info_ipflop",
- "MetricThreshold": "tma_info_ipflop < 10"
+ "MetricName": "tma_info_inst_mix_ipflop",
+ "MetricThreshold": "tma_info_inst_mix_ipflop < 10"
},
{
"BriefDescription": "Instructions per Load (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_LOADS",
"MetricGroup": "InsType",
- "MetricName": "tma_info_ipload",
- "MetricThreshold": "tma_info_ipload < 3"
- },
- {
- "BriefDescription": "Instructions per retired mispredicts for indirect CALL or JMP branches (lower number means higher occurrence rate).",
- "MetricExpr": "tma_info_instructions / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * cpu@BR_MISP_EXEC.ALL_BRANCHES\\,umask\\=0xE4@)",
- "MetricGroup": "Bad;BrMispredicts",
- "MetricName": "tma_info_ipmisp_indirect",
- "MetricThreshold": "tma_info_ipmisp_indirect < 1e3"
- },
- {
- "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear) (lower number means higher occurrence rate)",
- "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
- "MetricGroup": "Bad;BadSpec;BrMispredicts",
- "MetricName": "tma_info_ipmispredict",
- "MetricThreshold": "tma_info_ipmispredict < 200"
+ "MetricName": "tma_info_inst_mix_ipload",
+ "MetricThreshold": "tma_info_inst_mix_ipload < 3"
},
{
"BriefDescription": "Instructions per Store (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_STORES",
"MetricGroup": "InsType",
- "MetricName": "tma_info_ipstore",
- "MetricThreshold": "tma_info_ipstore < 8"
+ "MetricName": "tma_info_inst_mix_ipstore",
+ "MetricThreshold": "tma_info_inst_mix_ipstore < 8"
},
{
"BriefDescription": "Instruction per taken branch",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN",
"MetricGroup": "Branches;Fed;FetchBW;Frontend;PGO;tma_issueFB",
- "MetricName": "tma_info_iptb",
- "MetricThreshold": "tma_info_iptb < 9",
- "PublicDescription": "Instruction per taken branch. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_dsb_coverage, tma_lcp"
- },
- {
- "BriefDescription": "Instructions per speculative Unknown Branch Misprediction (BAClear) (lower number means higher occurrence rate)",
- "MetricExpr": "tma_info_instructions / BACLEARS.ANY",
- "MetricGroup": "Fed",
- "MetricName": "tma_info_ipunknown_branch"
- },
- {
- "BriefDescription": "Cycles Per Instruction for the Operating System (OS) Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / INST_RETIRED.ANY_P:k",
- "MetricGroup": "OS",
- "MetricName": "tma_info_kernel_cpi"
- },
- {
- "BriefDescription": "Fraction of cycles spent in the Operating System (OS) Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / CPU_CLK_UNHALTED.THREAD",
- "MetricGroup": "OS",
- "MetricName": "tma_info_kernel_utilization",
- "MetricThreshold": "tma_info_kernel_utilization > 0.05"
+ "MetricName": "tma_info_inst_mix_iptb",
+ "MetricThreshold": "tma_info_inst_mix_iptb < 9",
+ "PublicDescription": "Instruction per taken branch. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_frontend_dsb_coverage, tma_lcp"
},
{
"BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
"MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / duration_time",
"MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l1d_cache_fill_bw"
- },
- {
- "BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
- "MetricExpr": "tma_info_l1d_cache_fill_bw",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l1d_cache_fill_bw_1t"
- },
- {
- "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
- "MetricExpr": "1e3 * MEM_LOAD_UOPS_RETIRED.L1_MISS / INST_RETIRED.ANY",
- "MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l1mpki"
+ "MetricName": "tma_info_memory_core_l1d_cache_fill_bw"
},
{
"BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
"MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / duration_time",
"MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l2_cache_fill_bw"
+ "MetricName": "tma_info_memory_core_l2_cache_fill_bw"
},
{
- "BriefDescription": "Average per-thread data fill bandwidth to the L2 cache [GB / sec]",
- "MetricExpr": "tma_info_l2_cache_fill_bw",
+ "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
"MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l2_cache_fill_bw_1t"
+ "MetricName": "tma_info_memory_core_l3_cache_fill_bw"
+ },
+ {
+ "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
+ "MetricExpr": "1e3 * MEM_LOAD_UOPS_RETIRED.L1_MISS / INST_RETIRED.ANY",
+ "MetricGroup": "CacheMisses;Mem",
+ "MetricName": "tma_info_memory_l1mpki"
},
{
"BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)",
"MetricExpr": "1e3 * (L2_RQSTS.REFERENCES - L2_RQSTS.MISS) / INST_RETIRED.ANY",
"MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l2hpki_all"
+ "MetricName": "tma_info_memory_l2hpki_all"
},
{
"BriefDescription": "L2 cache hits per kilo instruction for all demand loads (including speculative)",
"MetricExpr": "1e3 * L2_RQSTS.DEMAND_DATA_RD_HIT / INST_RETIRED.ANY",
"MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l2hpki_load"
+ "MetricName": "tma_info_memory_l2hpki_load"
},
{
"BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads",
"MetricExpr": "1e3 * MEM_LOAD_UOPS_RETIRED.L2_MISS / INST_RETIRED.ANY",
"MetricGroup": "Backend;CacheMisses;Mem",
- "MetricName": "tma_info_l2mpki"
+ "MetricName": "tma_info_memory_l2mpki"
},
{
"BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all request types (including speculative)",
"MetricExpr": "1e3 * L2_RQSTS.MISS / INST_RETIRED.ANY",
"MetricGroup": "CacheMisses;Mem;Offcore",
- "MetricName": "tma_info_l2mpki_all"
+ "MetricName": "tma_info_memory_l2mpki_all"
},
{
"BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all demand loads (including speculative)",
"MetricExpr": "1e3 * L2_RQSTS.DEMAND_DATA_RD_MISS / INST_RETIRED.ANY",
"MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l2mpki_load"
+ "MetricName": "tma_info_memory_l2mpki_load"
},
{
- "BriefDescription": "Average per-thread data access bandwidth to the L3 cache [GB / sec]",
- "MetricExpr": "0",
- "MetricGroup": "Mem;MemoryBW;Offcore",
- "MetricName": "tma_info_l3_cache_access_bw_1t"
+ "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
+ "MetricExpr": "1e3 * MEM_LOAD_UOPS_RETIRED.L3_MISS / INST_RETIRED.ANY",
+ "MetricGroup": "CacheMisses;Mem",
+ "MetricName": "tma_info_memory_l3mpki"
},
{
- "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
- "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l3_cache_fill_bw"
+ "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / (MEM_LOAD_UOPS_RETIRED.L1_MISS + MEM_LOAD_UOPS_RETIRED.HIT_LFB)",
+ "MetricGroup": "Mem;MemoryBound;MemoryLat",
+ "MetricName": "tma_info_memory_load_miss_real_latency"
},
{
- "BriefDescription": "Average per-thread data fill bandwidth to the L3 cache [GB / sec]",
- "MetricExpr": "tma_info_l3_cache_fill_bw",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l3_cache_fill_bw_1t"
+ "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
+ "MetricGroup": "Mem;MemoryBW;MemoryBound",
+ "MetricName": "tma_info_memory_mlp",
+ "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)"
},
{
- "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
- "MetricExpr": "1e3 * MEM_LOAD_UOPS_RETIRED.L3_MISS / INST_RETIRED.ANY",
- "MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l3mpki"
+ "BriefDescription": "Average Parallel L2 cache miss data reads",
+ "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+ "MetricGroup": "Memory_BW;Offcore",
+ "MetricName": "tma_info_memory_oro_data_l2_mlp"
},
{
"BriefDescription": "Average Latency for L2 cache miss demand Loads",
"MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS.DEMAND_DATA_RD",
"MetricGroup": "Memory_Lat;Offcore",
- "MetricName": "tma_info_load_l2_miss_latency"
+ "MetricName": "tma_info_memory_oro_load_l2_miss_latency"
},
{
"BriefDescription": "Average Parallel L2 cache miss demand Loads",
"MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
"MetricGroup": "Memory_BW;Offcore",
- "MetricName": "tma_info_load_l2_mlp"
+ "MetricName": "tma_info_memory_oro_load_l2_mlp"
},
{
- "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
- "MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "L1D_PEND_MISS.PENDING / (MEM_LOAD_UOPS_RETIRED.L1_MISS + MEM_LOAD_UOPS_RETIRED.HIT_LFB)",
- "MetricGroup": "Mem;MemoryBound;MemoryLat",
- "MetricName": "tma_info_load_miss_real_latency"
+ "BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_core_l1d_cache_fill_bw",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_thread_l1d_cache_fill_bw_1t"
},
{
- "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
- "MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
- "MetricGroup": "Mem;MemoryBW;MemoryBound",
- "MetricName": "tma_info_mlp",
- "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)"
+ "BriefDescription": "Average per-thread data fill bandwidth to the L2 cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_core_l2_cache_fill_bw",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_thread_l2_cache_fill_bw_1t"
+ },
+ {
+ "BriefDescription": "Average per-thread data access bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "0",
+ "MetricGroup": "Mem;MemoryBW;Offcore",
+ "MetricName": "tma_info_memory_thread_l3_cache_access_bw_1t"
+ },
+ {
+ "BriefDescription": "Average per-thread data fill bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_core_l3_cache_fill_bw",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_thread_l3_cache_fill_bw_1t"
},
{
"BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
- "MetricExpr": "(cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + cpu@DTLB_LOAD_MISSES.WALK_DURATION\\,cmask\\=1@ + cpu@DTLB_STORE_MISSES.WALK_DURATION\\,cmask\\=1@ + 7 * (DTLB_STORE_MISSES.WALK_COMPLETED + DTLB_LOAD_MISSES.WALK_COMPLETED + ITLB_MISSES.WALK_COMPLETED)) / tma_info_core_clks",
+ "MetricExpr": "(cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + cpu@DTLB_LOAD_MISSES.WALK_DURATION\\,cmask\\=1@ + cpu@DTLB_STORE_MISSES.WALK_DURATION\\,cmask\\=1@ + 7 * (DTLB_STORE_MISSES.WALK_COMPLETED + DTLB_LOAD_MISSES.WALK_COMPLETED + ITLB_MISSES.WALK_COMPLETED)) / tma_info_core_core_clks",
"MetricGroup": "Mem;MemoryTLB",
- "MetricName": "tma_info_page_walks_utilization",
- "MetricThreshold": "tma_info_page_walks_utilization > 0.5"
+ "MetricName": "tma_info_memory_tlb_page_walks_utilization",
+ "MetricThreshold": "tma_info_memory_tlb_page_walks_utilization > 0.5"
+ },
+ {
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-thread",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
+ "MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
+ "MetricName": "tma_info_pipeline_execute"
},
{
"BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
"MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / cpu@UOPS_RETIRED.RETIRE_SLOTS\\,cmask\\=1@",
"MetricGroup": "Pipeline;Ret",
- "MetricName": "tma_info_retire"
+ "MetricName": "tma_info_pipeline_retire"
},
{
- "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
- "MetricExpr": "4 * tma_info_core_clks",
- "MetricGroup": "TmaL1;tma_L1_group",
- "MetricName": "tma_info_slots"
+ "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
+ "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / duration_time",
+ "MetricGroup": "Power;Summary",
+ "MetricName": "tma_info_system_average_frequency"
+ },
+ {
+ "BriefDescription": "Average CPU Utilization",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricGroup": "HPC;Summary",
+ "MetricName": "tma_info_system_cpu_utilization"
+ },
+ {
+ "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
+ "MetricExpr": "64 * (arb@event\\=0x81\\,umask\\=0x1@ + arb@event\\=0x84\\,umask\\=0x1@) / 1e6 / duration_time / 1e3",
+ "MetricGroup": "HPC;Mem;MemoryBW;SoC;tma_issueBW",
+ "MetricName": "tma_info_system_dram_bw_use",
+ "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_fb_full, tma_mem_bandwidth, tma_sq_full"
+ },
+ {
+ "BriefDescription": "Giga Floating Point Operations Per Second",
+ "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * (FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE) / 1e9 / duration_time",
+ "MetricGroup": "Cor;Flops;HPC",
+ "MetricName": "tma_info_system_gflops",
+ "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width and AMX engine."
+ },
+ {
+ "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
+ "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u",
+ "MetricGroup": "Branches;OS",
+ "MetricName": "tma_info_system_ipfarbranch",
+ "MetricThreshold": "tma_info_system_ipfarbranch < 1e6"
+ },
+ {
+ "BriefDescription": "Cycles Per Instruction for the Operating System (OS) Kernel mode",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / INST_RETIRED.ANY_P:k",
+ "MetricGroup": "OS",
+ "MetricName": "tma_info_system_kernel_cpi"
+ },
+ {
+ "BriefDescription": "Fraction of cycles spent in the Operating System (OS) Kernel mode",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "OS",
+ "MetricName": "tma_info_system_kernel_utilization",
+ "MetricThreshold": "tma_info_system_kernel_utilization > 0.05"
},
{
"BriefDescription": "Fraction of cycles where both hardware Logical Processors were active",
"MetricExpr": "(1 - CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / (CPU_CLK_UNHALTED.REF_XCLK_ANY / 2) if #SMT_on else 0)",
"MetricGroup": "SMT",
- "MetricName": "tma_info_smt_2t_utilization"
+ "MetricName": "tma_info_system_smt_2t_utilization"
},
{
"BriefDescription": "Average Frequency Utilization relative nominal frequency",
- "MetricExpr": "tma_info_clks / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricExpr": "tma_info_thread_clks / CPU_CLK_UNHALTED.REF_TSC",
"MetricGroup": "Power",
- "MetricName": "tma_info_turbo_utilization"
+ "MetricName": "tma_info_system_turbo_utilization"
+ },
+ {
+ "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "Pipeline",
+ "MetricName": "tma_info_thread_clks"
+ },
+ {
+ "BriefDescription": "Cycles Per Instruction (per Logical Processor)",
+ "MetricExpr": "1 / tma_info_thread_ipc",
+ "MetricGroup": "Mem;Pipeline",
+ "MetricName": "tma_info_thread_cpi"
+ },
+ {
+ "BriefDescription": "The ratio of Executed- by Issued-Uops",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / UOPS_ISSUED.ANY",
+ "MetricGroup": "Cor;Pipeline",
+ "MetricName": "tma_info_thread_execute_per_issue",
+ "PublicDescription": "The ratio of Executed- by Issued-Uops. Ratio > 1 suggests high rate of uop micro-fusions. Ratio < 1 suggest high rate of \"execute\" at rename stage."
+ },
+ {
+ "BriefDescription": "Instructions Per Cycle (per Logical Processor)",
+ "MetricExpr": "INST_RETIRED.ANY / tma_info_thread_clks",
+ "MetricGroup": "Ret;Summary",
+ "MetricName": "tma_info_thread_ipc"
+ },
+ {
+ "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
+ "MetricExpr": "4 * tma_info_core_core_clks",
+ "MetricGroup": "TmaL1;tma_L1_group",
+ "MetricName": "tma_info_thread_slots"
},
{
"BriefDescription": "Uops Per Instruction",
"MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
"MetricGroup": "Pipeline;Ret;Retire",
- "MetricName": "tma_info_uoppi",
- "MetricThreshold": "tma_info_uoppi > 1.05"
+ "MetricName": "tma_info_thread_uoppi",
+ "MetricThreshold": "tma_info_thread_uoppi > 1.05"
},
{
"BriefDescription": "Instruction per taken branch",
"MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / BR_INST_RETIRED.NEAR_TAKEN",
"MetricGroup": "Branches;Fed;FetchBW",
- "MetricName": "tma_info_uptb",
- "MetricThreshold": "tma_info_uptb < 6"
+ "MetricName": "tma_info_thread_uptb",
+ "MetricThreshold": "tma_info_thread_uptb < 6"
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses",
- "MetricExpr": "(14 * ITLB_MISSES.STLB_HIT + cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + 7 * ITLB_MISSES.WALK_COMPLETED) / tma_info_clks",
+ "MetricExpr": "(14 * ITLB_MISSES.STLB_HIT + cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + 7 * ITLB_MISSES.WALK_COMPLETED) / tma_info_thread_clks",
"MetricGroup": "BigFoot;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_itlb_misses",
"MetricThreshold": "tma_itlb_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -752,7 +752,7 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled without loads missing the L1 data cache",
- "MetricExpr": "max((CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS) / tma_info_clks, 0)",
+ "MetricExpr": "max((CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS) / tma_info_thread_clks, 0)",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_issueL1;tma_issueMC;tma_memory_bound_group",
"MetricName": "tma_l1_bound",
"MetricThreshold": "tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -761,7 +761,7 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads",
- "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_clks",
+ "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l2_bound",
"MetricThreshold": "tma_l2_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -771,7 +771,7 @@
{
"BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
"MetricConstraint": "NO_GROUP_EVENTS_SMT",
- "MetricExpr": "MEM_LOAD_UOPS_RETIRED.L3_HIT / (MEM_LOAD_UOPS_RETIRED.L3_HIT + 7 * MEM_LOAD_UOPS_RETIRED.L3_MISS) * CYCLE_ACTIVITY.STALLS_L2_MISS / tma_info_clks",
+ "MetricExpr": "MEM_LOAD_UOPS_RETIRED.L3_HIT / (MEM_LOAD_UOPS_RETIRED.L3_HIT + 7 * MEM_LOAD_UOPS_RETIRED.L3_MISS) * CYCLE_ACTIVITY.STALLS_L2_MISS / tma_info_thread_clks",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l3_bound",
"MetricThreshold": "tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -781,7 +781,7 @@
{
"BriefDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "29 * (MEM_LOAD_UOPS_RETIRED.L3_HIT * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_RETIRED.L3_MISS))) / tma_info_clks",
+ "MetricExpr": "29 * (MEM_LOAD_UOPS_RETIRED.L3_HIT * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_RETIRED.L3_MISS))) / tma_info_thread_clks",
"MetricGroup": "MemoryLat;TopdownL4;tma_L4_group;tma_issueLat;tma_l3_bound_group",
"MetricName": "tma_l3_hit_latency",
"MetricThreshold": "tma_l3_hit_latency > 0.1 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -790,11 +790,11 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs)",
- "MetricExpr": "ILD_STALL.LCP / tma_info_clks",
+ "MetricExpr": "ILD_STALL.LCP / tma_info_thread_clks",
"MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB",
"MetricName": "tma_lcp",
"MetricThreshold": "tma_lcp > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
- "PublicDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs). Using proper compiler flags or Intel Compiler by default will certainly avoid this. #Link: Optimization Guide about LCP BKMs. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_dsb_coverage, tma_info_iptb",
+ "PublicDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs). Using proper compiler flags or Intel Compiler by default will certainly avoid this. #Link: Optimization Guide about LCP BKMs. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb",
"ScaleUnit": "100%"
},
{
@@ -810,7 +810,7 @@
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Load operations",
"MetricConstraint": "NO_GROUP_EVENTS_NMI",
- "MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_2 + UOPS_DISPATCHED_PORT.PORT_3 + UOPS_DISPATCHED_PORT.PORT_7 - UOPS_DISPATCHED_PORT.PORT_4) / (2 * tma_info_core_clks)",
+ "MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_2 + UOPS_DISPATCHED_PORT.PORT_3 + UOPS_DISPATCHED_PORT.PORT_7 - UOPS_DISPATCHED_PORT.PORT_4) / (2 * tma_info_core_core_clks)",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_load_op_utilization",
"MetricThreshold": "tma_load_op_utilization > 0.6",
@@ -820,7 +820,7 @@
{
"BriefDescription": "This metric represents fraction of cycles the CPU spent handling cache misses due to lock operations",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "MEM_UOPS_RETIRED.LOCK_LOADS / MEM_UOPS_RETIRED.ALL_STORES * min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO) / tma_info_clks",
+ "MetricExpr": "MEM_UOPS_RETIRED.LOCK_LOADS / MEM_UOPS_RETIRED.ALL_STORES * min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO) / tma_info_thread_clks",
"MetricGroup": "Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_l1_bound_group",
"MetricName": "tma_lock_latency",
"MetricThreshold": "tma_lock_latency > 0.2 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -840,16 +840,16 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM)",
- "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@) / tma_info_clks",
+ "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@) / tma_info_thread_clks",
"MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
"MetricName": "tma_mem_bandwidth",
"MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_dram_bw_use, tma_sq_full",
+ "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM)",
- "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD) / tma_info_clks - tma_mem_bandwidth",
+ "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD) / tma_info_thread_clks - tma_mem_bandwidth",
"MetricGroup": "MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
"MetricName": "tma_mem_latency",
"MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -859,7 +859,7 @@
{
"BriefDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(CYCLE_ACTIVITY.STALLS_MEM_ANY + RESOURCE_STALLS.SB) / (CYCLE_ACTIVITY.STALLS_TOTAL + UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC - (UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC if tma_info_ipc > 1.8 else UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB) * tma_backend_bound",
+ "MetricExpr": "(CYCLE_ACTIVITY.STALLS_MEM_ANY + RESOURCE_STALLS.SB) / (CYCLE_ACTIVITY.STALLS_TOTAL + UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC - (UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC if tma_info_thread_ipc > 1.8 else UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB) * tma_backend_bound",
"MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_memory_bound",
"MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2",
@@ -869,7 +869,7 @@
},
{
"BriefDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit",
- "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * IDQ.MS_UOPS / tma_info_slots",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * IDQ.MS_UOPS / tma_info_thread_slots",
"MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_heavy_operations_group;tma_issueMC;tma_issueMS",
"MetricName": "tma_microcode_sequencer",
"MetricThreshold": "tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1",
@@ -882,21 +882,21 @@
"MetricGroup": "BadSpec;BrMispredicts;TopdownL4;tma_L4_group;tma_branch_resteers_group;tma_issueBM",
"MetricName": "tma_mispredicts_resteers",
"MetricThreshold": "tma_mispredicts_resteers > 0.05 & (tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15))",
- "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers as a result of Branch Misprediction at execution stage. Sample with: INT_MISC.CLEAR_RESTEER_CYCLES. Related metrics: tma_branch_mispredicts, tma_info_branch_misprediction_cost",
+ "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers as a result of Branch Misprediction at execution stage. Sample with: INT_MISC.CLEAR_RESTEER_CYCLES. Related metrics: tma_branch_mispredicts, tma_info_bad_spec_branch_misprediction_cost",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline)",
- "MetricExpr": "(IDQ.ALL_MITE_CYCLES_ANY_UOPS - IDQ.ALL_MITE_CYCLES_4_UOPS) / tma_info_core_clks / 2",
+ "MetricExpr": "(IDQ.ALL_MITE_CYCLES_ANY_UOPS - IDQ.ALL_MITE_CYCLES_4_UOPS) / tma_info_core_core_clks / 2",
"MetricGroup": "DSBmiss;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_mite",
- "MetricThreshold": "tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35)",
+ "MetricThreshold": "tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35)",
"PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline). This pipeline is used for code that was not pre-cached in the DSB or LSD. For example; inefficiencies due to asymmetric decoders; use of long immediate or LCP can manifest as MITE fetch bandwidth bottleneck. Sample with: FRONTEND_RETIRED.ANY_DSB_MISS",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric estimates the fraction of cycles when the CPU was stalled due to switches of uop delivery to the Microcode Sequencer (MS)",
- "MetricExpr": "2 * IDQ.MS_SWITCHES / tma_info_clks",
+ "MetricExpr": "2 * IDQ.MS_SWITCHES / tma_info_thread_clks",
"MetricGroup": "FetchLat;MicroSeq;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueMC;tma_issueMS;tma_issueMV;tma_issueSO",
"MetricName": "tma_ms_switches",
"MetricThreshold": "tma_ms_switches > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -905,7 +905,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 0 ([SNB+] ALU; [HSW+] ALU and 2nd branch)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_0 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_0 / tma_info_core_core_clks",
"MetricGroup": "Compute;TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_0",
"MetricThreshold": "tma_port_0 > 0.6",
@@ -914,7 +914,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 1 (ALU)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_1 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_1 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_1",
"MetricThreshold": "tma_port_1 > 0.6",
@@ -923,7 +923,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 2 ([SNB+]Loads and Store-address; [ICL+] Loads)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_2 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_2 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_load_op_utilization_group",
"MetricName": "tma_port_2",
"MetricThreshold": "tma_port_2 > 0.6",
@@ -931,7 +931,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 3 ([SNB+]Loads and Store-address; [ICL+] Loads)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_3 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_3 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_load_op_utilization_group",
"MetricName": "tma_port_3",
"MetricThreshold": "tma_port_3 > 0.6",
@@ -948,7 +948,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 5 ([SNB+] Branches and ALU; [HSW+] ALU)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_5 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_5 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_5",
"MetricThreshold": "tma_port_5 > 0.6",
@@ -957,7 +957,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+]Primary Branch and simple ALU)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_6 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_6 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_6",
"MetricThreshold": "tma_port_6 > 0.6",
@@ -966,7 +966,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 7 ([HSW+]simple Store-address)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_7 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_7 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_store_op_utilization_group",
"MetricName": "tma_port_7",
"MetricThreshold": "tma_port_7 > 0.6",
@@ -975,7 +975,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles the CPU performance was potentially limited due to Core computation issues (non divider-related)",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(CYCLE_ACTIVITY.STALLS_TOTAL + UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC - (UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC if tma_info_ipc > 1.8 else UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB - RESOURCE_STALLS.SB - CYCLE_ACTIVITY.STALLS_MEM_ANY) / tma_info_clks",
+ "MetricExpr": "(CYCLE_ACTIVITY.STALLS_TOTAL + UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC - (UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC if tma_info_thread_ipc > 1.8 else UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB - RESOURCE_STALLS.SB - CYCLE_ACTIVITY.STALLS_MEM_ANY) / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group",
"MetricName": "tma_ports_utilization",
"MetricThreshold": "tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
@@ -984,7 +984,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed no uops on any execution port (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,inv\\,cmask\\=1@ / 2 if #SMT_on else (CYCLE_ACTIVITY.STALLS_TOTAL - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0)) / tma_info_core_clks)",
+ "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,inv\\,cmask\\=1@ / 2 if #SMT_on else (CYCLE_ACTIVITY.STALLS_TOTAL - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0)) / tma_info_core_core_clks)",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_0",
"MetricThreshold": "tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -993,7 +993,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where the CPU executed total of 1 uop per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) / 2 if #SMT_on else (UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC - UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC) / tma_info_core_clks)",
+ "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) / 2 if #SMT_on else (UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC - UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC) / tma_info_core_core_clks)",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issueL1;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_1",
"MetricThreshold": "tma_ports_utilized_1 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1002,7 +1002,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / 2 if #SMT_on else (UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC - UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC) / tma_info_core_clks)",
+ "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / 2 if #SMT_on else (UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC - UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC) / tma_info_core_core_clks)",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_2",
"MetricThreshold": "tma_ports_utilized_2 > 0.15 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1011,7 +1011,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 3 or more uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC) / tma_info_core_clks",
+ "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_3m",
"MetricThreshold": "tma_ports_utilized_3m > 0.7 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1020,7 +1020,7 @@
},
{
"BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
- "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / tma_info_slots",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / tma_info_thread_slots",
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1",
@@ -1031,7 +1031,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles handling memory load split accesses - load that cross 64-byte cache line boundary",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "tma_info_load_miss_real_latency * LD_BLOCKS.NO_SR / tma_info_clks",
+ "MetricExpr": "tma_info_memory_load_miss_real_latency * LD_BLOCKS.NO_SR / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_split_loads",
"MetricThreshold": "tma_split_loads > 0.2 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -1040,7 +1040,7 @@
},
{
"BriefDescription": "This metric represents rate of split store accesses",
- "MetricExpr": "2 * MEM_UOPS_RETIRED.SPLIT_STORES / tma_info_core_clks",
+ "MetricExpr": "2 * MEM_UOPS_RETIRED.SPLIT_STORES / tma_info_core_core_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group",
"MetricName": "tma_split_stores",
"MetricThreshold": "tma_split_stores > 0.2 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -1049,16 +1049,16 @@
},
{
"BriefDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors)",
- "MetricExpr": "(OFFCORE_REQUESTS_BUFFER.SQ_FULL / 2 if #SMT_on else OFFCORE_REQUESTS_BUFFER.SQ_FULL) / tma_info_core_clks",
+ "MetricExpr": "(OFFCORE_REQUESTS_BUFFER.SQ_FULL / 2 if #SMT_on else OFFCORE_REQUESTS_BUFFER.SQ_FULL) / tma_info_core_core_clks",
"MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueBW;tma_l3_bound_group",
"MetricName": "tma_sq_full",
"MetricThreshold": "tma_sq_full > 0.3 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_fb_full, tma_info_dram_bw_use, tma_mem_bandwidth",
+ "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric estimates how often CPU was stalled due to RFO store memory accesses; RFO store issue a read-for-ownership request before the write",
- "MetricExpr": "RESOURCE_STALLS.SB / tma_info_clks",
+ "MetricExpr": "RESOURCE_STALLS.SB / tma_info_thread_clks",
"MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_store_bound",
"MetricThreshold": "tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -1067,7 +1067,7 @@
},
{
"BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores",
- "MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_clks",
+ "MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_store_fwd_blk",
"MetricThreshold": "tma_store_fwd_blk > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -1077,7 +1077,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles the CPU spent handling L1D store misses",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(L2_RQSTS.RFO_HIT * 9 * (1 - MEM_UOPS_RETIRED.LOCK_LOADS / MEM_UOPS_RETIRED.ALL_STORES) + (1 - MEM_UOPS_RETIRED.LOCK_LOADS / MEM_UOPS_RETIRED.ALL_STORES) * min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO)) / tma_info_clks",
+ "MetricExpr": "(L2_RQSTS.RFO_HIT * 9 * (1 - MEM_UOPS_RETIRED.LOCK_LOADS / MEM_UOPS_RETIRED.ALL_STORES) + (1 - MEM_UOPS_RETIRED.LOCK_LOADS / MEM_UOPS_RETIRED.ALL_STORES) * min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO)) / tma_info_thread_clks",
"MetricGroup": "MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_issueSL;tma_store_bound_group",
"MetricName": "tma_store_latency",
"MetricThreshold": "tma_store_latency > 0.1 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -1086,7 +1086,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Store operations",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_4 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_4 / tma_info_core_core_clks",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_store_op_utilization",
"MetricThreshold": "tma_store_op_utilization > 0.6",
@@ -1104,7 +1104,7 @@
},
{
"BriefDescription": "This metric serves as an approximation of legacy x87 usage",
- "MetricExpr": "INST_RETIRED.X87 * tma_info_uoppi / UOPS_RETIRED.RETIRE_SLOTS",
+ "MetricExpr": "INST_RETIRED.X87 * tma_info_thread_uoppi / UOPS_RETIRED.RETIRE_SLOTS",
"MetricGroup": "Compute;TopdownL4;tma_L4_group;tma_fp_arith_group",
"MetricName": "tma_x87_use",
"MetricThreshold": "tma_x87_use > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6)",
diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/floating-point.json b/tools/perf/pmu-events/arch/x86/broadwellde/floating-point.json
index e4826dc7f797..986869252e71 100644
--- a/tools/perf/pmu-events/arch/x86/broadwellde/floating-point.json
+++ b/tools/perf/pmu-events/arch/x86/broadwellde/floating-point.json
@@ -32,6 +32,14 @@
"UMask": "0x20"
},
{
+ "BriefDescription": "Number of SSE/AVX computational 128-bit packed single and 256-bit packed double precision FP instructions retired; some instructions will count twice as noted below. Each count represents 2 or/and 4 computation operations, 1 for each element. Applies to SSE* and AVX* packed single precision and packed double precision FP instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB count twice as they perform 2 calculations per element.",
+ "EventCode": "0xc7",
+ "EventName": "FP_ARITH_INST_RETIRED.4_FLOPS",
+ "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision and 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 or/and 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point and packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x18"
+ },
+ {
"BriefDescription": "Number of SSE/AVX computational double precision floating-point instructions retired; some instructions will count twice as noted below. Applies to SSE* and AVX* scalar and packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.DOUBLE",
@@ -77,6 +85,13 @@
"UMask": "0x2a"
},
{
+ "BriefDescription": "Number of any Vector retired FP arithmetic instructions",
+ "EventCode": "0xc7",
+ "EventName": "FP_ARITH_INST_RETIRED.VECTOR",
+ "SampleAfterValue": "2000003",
+ "UMask": "0xfc"
+ },
+ {
"BriefDescription": "Cycles with any input/output SSE or FP assist",
"CounterMask": "1",
"EventCode": "0xCA",
diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/metricgroups.json b/tools/perf/pmu-events/arch/x86/broadwellde/metricgroups.json
new file mode 100644
index 000000000000..f6a0258e3241
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/broadwellde/metricgroups.json
@@ -0,0 +1,107 @@
+{
+ "Backend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Bad": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BadSpec": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BigFoot": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BrMispredicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Branches": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "CacheMisses": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Compute": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Cor": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "DSB": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "DSBmiss": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "DataSharing": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Fed": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FetchBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FetchLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Flops": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FpScalar": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FpVector": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Frontend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "HPC": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "IcMiss": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "InsType": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "L2Evicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "LSD": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MachineClears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryBound": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryTLB": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Memory_BW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Memory_Lat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MicroSeq": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "OS": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Offcore": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "PGO": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Pipeline": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "PortsUtil": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Power": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Ret": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Retire": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "SMT": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Server": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Snoop": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "SoC": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Summary": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TmaL1": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TmaL2": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TmaL3mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TopdownL1": "Metrics for top-down breakdown at level 1",
+ "TopdownL2": "Metrics for top-down breakdown at level 2",
+ "TopdownL3": "Metrics for top-down breakdown at level 3",
+ "TopdownL4": "Metrics for top-down breakdown at level 4",
+ "TopdownL5": "Metrics for top-down breakdown at level 5",
+ "TopdownL6": "Metrics for top-down breakdown at level 6",
+ "tma_L1_group": "Metrics for top-down breakdown at level 1",
+ "tma_L2_group": "Metrics for top-down breakdown at level 2",
+ "tma_L3_group": "Metrics for top-down breakdown at level 3",
+ "tma_L4_group": "Metrics for top-down breakdown at level 4",
+ "tma_L5_group": "Metrics for top-down breakdown at level 5",
+ "tma_L6_group": "Metrics for top-down breakdown at level 6",
+ "tma_alu_op_utilization_group": "Metrics contributing to tma_alu_op_utilization category",
+ "tma_backend_bound_group": "Metrics contributing to tma_backend_bound category",
+ "tma_bad_speculation_group": "Metrics contributing to tma_bad_speculation category",
+ "tma_branch_resteers_group": "Metrics contributing to tma_branch_resteers category",
+ "tma_core_bound_group": "Metrics contributing to tma_core_bound category",
+ "tma_dram_bound_group": "Metrics contributing to tma_dram_bound category",
+ "tma_dtlb_load_group": "Metrics contributing to tma_dtlb_load category",
+ "tma_dtlb_store_group": "Metrics contributing to tma_dtlb_store category",
+ "tma_fetch_bandwidth_group": "Metrics contributing to tma_fetch_bandwidth category",
+ "tma_fetch_latency_group": "Metrics contributing to tma_fetch_latency category",
+ "tma_fp_arith_group": "Metrics contributing to tma_fp_arith category",
+ "tma_fp_vector_group": "Metrics contributing to tma_fp_vector category",
+ "tma_frontend_bound_group": "Metrics contributing to tma_frontend_bound category",
+ "tma_heavy_operations_group": "Metrics contributing to tma_heavy_operations category",
+ "tma_issue2P": "Metrics related by the issue $issue2P",
+ "tma_issueBM": "Metrics related by the issue $issueBM",
+ "tma_issueBW": "Metrics related by the issue $issueBW",
+ "tma_issueFB": "Metrics related by the issue $issueFB",
+ "tma_issueL1": "Metrics related by the issue $issueL1",
+ "tma_issueLat": "Metrics related by the issue $issueLat",
+ "tma_issueMC": "Metrics related by the issue $issueMC",
+ "tma_issueMS": "Metrics related by the issue $issueMS",
+ "tma_issueMV": "Metrics related by the issue $issueMV",
+ "tma_issueRFO": "Metrics related by the issue $issueRFO",
+ "tma_issueSL": "Metrics related by the issue $issueSL",
+ "tma_issueSO": "Metrics related by the issue $issueSO",
+ "tma_issueSmSt": "Metrics related by the issue $issueSmSt",
+ "tma_issueSpSt": "Metrics related by the issue $issueSpSt",
+ "tma_issueSyncxn": "Metrics related by the issue $issueSyncxn",
+ "tma_issueTLB": "Metrics related by the issue $issueTLB",
+ "tma_l1_bound_group": "Metrics contributing to tma_l1_bound category",
+ "tma_l3_bound_group": "Metrics contributing to tma_l3_bound category",
+ "tma_light_operations_group": "Metrics contributing to tma_light_operations category",
+ "tma_load_op_utilization_group": "Metrics contributing to tma_load_op_utilization category",
+ "tma_mem_latency_group": "Metrics contributing to tma_mem_latency category",
+ "tma_memory_bound_group": "Metrics contributing to tma_memory_bound category",
+ "tma_microcode_sequencer_group": "Metrics contributing to tma_microcode_sequencer category",
+ "tma_ports_utilization_group": "Metrics contributing to tma_ports_utilization category",
+ "tma_ports_utilized_0_group": "Metrics contributing to tma_ports_utilized_0 category",
+ "tma_ports_utilized_3m_group": "Metrics contributing to tma_ports_utilized_3m category",
+ "tma_retiring_group": "Metrics contributing to tma_retiring category",
+ "tma_serializing_operation_group": "Metrics contributing to tma_serializing_operation category",
+ "tma_store_bound_group": "Metrics contributing to tma_store_bound category",
+ "tma_store_op_utilization_group": "Metrics contributing to tma_store_op_utilization category"
+}
diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json b/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json
index 437b9867acb9..b319e4edc238 100644
--- a/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json
@@ -50,11 +50,207 @@
},
{
"BriefDescription": "Uncore frequency per die [GHZ]",
- "MetricExpr": "tma_info_socket_clks / #num_dies / duration_time / 1e9",
+ "MetricExpr": "tma_info_system_socket_clks / #num_dies / duration_time / 1e9",
"MetricGroup": "SoC",
"MetricName": "UNCORE_FREQ"
},
{
+ "BriefDescription": "Cycles per instruction retired; indicating how much time each executed instruction took; in units of cycles.",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD / INST_RETIRED.ANY",
+ "MetricName": "cpi",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "CPU operating frequency (in GHz)",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC * #SYSTEM_TSC_FREQ / 1e9",
+ "MetricName": "cpu_operating_frequency",
+ "ScaleUnit": "1GHz"
+ },
+ {
+ "BriefDescription": "Percentage of time spent in the active CPU power state C0",
+ "MetricExpr": "tma_info_system_cpu_utilization",
+ "MetricName": "cpu_utilization",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Ratio of number of completed page walks (for all page sizes) caused by demand data loads to the total number of completed instructions",
+ "MetricExpr": "DTLB_LOAD_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
+ "MetricName": "dtlb_load_mpi",
+ "PublicDescription": "Ratio of number of completed page walks (for all page sizes) caused by demand data loads to the total number of completed instructions. This implies it missed in the DTLB and further levels of TLB.",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of completed page walks (for all page sizes) caused by demand data stores to the total number of completed instructions",
+ "MetricExpr": "DTLB_STORE_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
+ "MetricName": "dtlb_store_mpi",
+ "PublicDescription": "Ratio of number of completed page walks (for all page sizes) caused by demand data stores to the total number of completed instructions. This implies it missed in the DTLB and further levels of TLB.",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Bandwidth of IO reads that are initiated by end device controllers that are requesting memory from the CPU.",
+ "MetricExpr": "cbox@UNC_C_TOR_INSERTS.OPCODE\\,filter_opc\\=0x19e@ * 64 / 1e6 / duration_time",
+ "MetricName": "io_bandwidth_read",
+ "ScaleUnit": "1MB/s"
+ },
+ {
+ "BriefDescription": "Bandwidth of IO writes that are initiated by end device controllers that are writing memory to the CPU.",
+ "MetricExpr": "(cbox@UNC_C_TOR_INSERTS.OPCODE\\,filter_opc\\=0x1c8\\,filter_tid\\=0x3e@ + cbox@UNC_C_TOR_INSERTS.OPCODE\\,filter_opc\\=0x180\\,filter_tid\\=0x3e@) * 64 / 1e6 / duration_time",
+ "MetricName": "io_bandwidth_write",
+ "ScaleUnit": "1MB/s"
+ },
+ {
+ "BriefDescription": "Ratio of number of completed page walks (for 2 megabyte and 4 megabyte page sizes) caused by a code fetch to the total number of completed instructions",
+ "MetricExpr": "ITLB_MISSES.WALK_COMPLETED_2M_4M / INST_RETIRED.ANY",
+ "MetricName": "itlb_large_page_mpi",
+ "PublicDescription": "Ratio of number of completed page walks (for 2 megabyte and 4 megabyte page sizes) caused by a code fetch to the total number of completed instructions. This implies it missed in the Instruction Translation Lookaside Buffer (ITLB) and further levels of TLB.",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of completed page walks (for all page sizes) caused by a code fetch to the total number of completed instructions",
+ "MetricExpr": "ITLB_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
+ "MetricName": "itlb_mpi",
+ "PublicDescription": "Ratio of number of completed page walks (for all page sizes) caused by a code fetch to the total number of completed instructions. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB.",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of code read requests missing in L1 instruction cache (includes prefetches) to the total number of completed instructions",
+ "MetricExpr": "L2_RQSTS.ALL_CODE_RD / INST_RETIRED.ANY",
+ "MetricName": "l1_i_code_read_misses_with_prefetches_per_instr",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of demand load requests hitting in L1 data cache to the total number of completed instructions",
+ "MetricExpr": "MEM_LOAD_UOPS_RETIRED.L1_HIT / INST_RETIRED.ANY",
+ "MetricName": "l1d_demand_data_read_hits_per_instr",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of requests missing L1 data cache (includes data+rfo w/ prefetches) to the total number of completed instructions",
+ "MetricExpr": "L1D.REPLACEMENT / INST_RETIRED.ANY",
+ "MetricName": "l1d_mpi",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of code read request missing L2 cache to the total number of completed instructions",
+ "MetricExpr": "L2_RQSTS.CODE_RD_MISS / INST_RETIRED.ANY",
+ "MetricName": "l2_demand_code_mpi",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of completed demand load requests hitting in L2 cache to the total number of completed instructions",
+ "MetricExpr": "MEM_LOAD_UOPS_RETIRED.L2_HIT / INST_RETIRED.ANY",
+ "MetricName": "l2_demand_data_read_hits_per_instr",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of completed data read request missing L2 cache to the total number of completed instructions",
+ "MetricExpr": "MEM_LOAD_UOPS_RETIRED.L2_MISS / INST_RETIRED.ANY",
+ "MetricName": "l2_demand_data_read_mpi",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of requests missing L2 cache (includes code+data+rfo w/ prefetches) to the total number of completed instructions",
+ "MetricExpr": "L2_LINES_IN.ALL / INST_RETIRED.ANY",
+ "MetricName": "l2_mpi",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of code read requests missing last level core cache (includes demand w/ prefetches) to the total number of completed instructions",
+ "MetricExpr": "(cbox@UNC_C_TOR_INSERTS.MISS_OPCODE\\,filter_opc\\=0x181@ + cbox@UNC_C_TOR_INSERTS.MISS_OPCODE\\,filter_opc\\=0x191@) / INST_RETIRED.ANY",
+ "MetricName": "llc_code_read_mpi_demand_plus_prefetch",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Average latency of a last level cache (LLC) demand and prefetch data read miss (read memory access) in nano seconds",
+ "MetricExpr": "1e9 * (cbox@UNC_C_TOR_OCCUPANCY.MISS_OPCODE\\,filter_opc\\=0x182@ / cbox@UNC_C_TOR_INSERTS.MISS_OPCODE\\,filter_opc\\=0x182@) / (UNC_C_CLOCKTICKS / (#num_cores / #num_packages * #num_packages)) * duration_time",
+ "MetricName": "llc_data_read_demand_plus_prefetch_miss_latency",
+ "ScaleUnit": "1ns"
+ },
+ {
+ "BriefDescription": "Average latency of a last level cache (LLC) demand and prefetch data read miss (read memory access) addressed to local memory in nano seconds",
+ "MetricExpr": "1e9 * (cbox@UNC_C_TOR_OCCUPANCY.MISS_LOCAL_OPCODE\\,filter_opc\\=0x182@ / cbox@UNC_C_TOR_INSERTS.MISS_LOCAL_OPCODE\\,filter_opc\\=0x182@) / (UNC_C_CLOCKTICKS / (#num_cores / #num_packages * #num_packages)) * duration_time",
+ "MetricName": "llc_data_read_demand_plus_prefetch_miss_latency_for_local_requests",
+ "ScaleUnit": "1ns"
+ },
+ {
+ "BriefDescription": "Average latency of a last level cache (LLC) demand and prefetch data read miss (read memory access) addressed to remote memory in nano seconds",
+ "MetricExpr": "1e9 * (cbox@UNC_C_TOR_OCCUPANCY.MISS_REMOTE_OPCODE\\,filter_opc\\=0x182@ / cbox@UNC_C_TOR_INSERTS.MISS_REMOTE_OPCODE\\,filter_opc\\=0x182@) / (UNC_C_CLOCKTICKS / (#num_cores / #num_packages * #num_packages)) * duration_time",
+ "MetricName": "llc_data_read_demand_plus_prefetch_miss_latency_for_remote_requests",
+ "ScaleUnit": "1ns"
+ },
+ {
+ "BriefDescription": "Ratio of number of data read requests missing last level core cache (includes demand w/ prefetches) to the total number of completed instructions",
+ "MetricExpr": "(cbox@UNC_C_TOR_INSERTS.MISS_OPCODE\\,filter_opc\\=0x182@ + cbox@UNC_C_TOR_INSERTS.MISS_OPCODE\\,filter_opc\\=0x192@) / INST_RETIRED.ANY",
+ "MetricName": "llc_data_read_mpi_demand_plus_prefetch",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "The ratio of number of completed memory load instructions to the total number completed instructions",
+ "MetricExpr": "MEM_UOPS_RETIRED.ALL_LOADS / INST_RETIRED.ANY",
+ "MetricName": "loads_per_instr",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "DDR memory read bandwidth (MB/sec)",
+ "MetricExpr": "UNC_M_CAS_COUNT.RD * 64 / 1e6 / duration_time",
+ "MetricName": "memory_bandwidth_read",
+ "ScaleUnit": "1MB/s"
+ },
+ {
+ "BriefDescription": "DDR memory bandwidth (MB/sec)",
+ "MetricExpr": "(UNC_M_CAS_COUNT.RD + UNC_M_CAS_COUNT.WR) * 64 / 1e6 / duration_time",
+ "MetricName": "memory_bandwidth_total",
+ "ScaleUnit": "1MB/s"
+ },
+ {
+ "BriefDescription": "DDR memory write bandwidth (MB/sec)",
+ "MetricExpr": "UNC_M_CAS_COUNT.WR * 64 / 1e6 / duration_time",
+ "MetricName": "memory_bandwidth_write",
+ "ScaleUnit": "1MB/s"
+ },
+ {
+ "BriefDescription": "Memory read that miss the last level cache (LLC) addressed to local DRAM as a percentage of total memory read accesses, does not include LLC prefetches.",
+ "MetricExpr": "cbox@UNC_C_TOR_INSERTS.MISS_LOCAL_OPCODE\\,filter_opc\\=0x182@ / (cbox@UNC_C_TOR_INSERTS.MISS_LOCAL_OPCODE\\,filter_opc\\=0x182@ + cbox@UNC_C_TOR_INSERTS.MISS_REMOTE_OPCODE\\,filter_opc\\=0x182@)",
+ "MetricName": "numa_reads_addressed_to_local_dram",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Memory reads that miss the last level cache (LLC) addressed to remote DRAM as a percentage of total memory read accesses, does not include LLC prefetches.",
+ "MetricExpr": "cbox@UNC_C_TOR_INSERTS.MISS_REMOTE_OPCODE\\,filter_opc\\=0x182@ / (cbox@UNC_C_TOR_INSERTS.MISS_LOCAL_OPCODE\\,filter_opc\\=0x182@ + cbox@UNC_C_TOR_INSERTS.MISS_REMOTE_OPCODE\\,filter_opc\\=0x182@)",
+ "MetricName": "numa_reads_addressed_to_remote_dram",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Uops delivered from decoded instruction cache (decoded stream buffer or DSB) as a percent of total uops delivered to Instruction Decode Queue",
+ "MetricExpr": "IDQ.DSB_UOPS / UOPS_ISSUED.ANY",
+ "MetricName": "percent_uops_delivered_from_decoded_icache",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Uops delivered from legacy decode pipeline (Micro-instruction Translation Engine or MITE) as a percent of total uops delivered to Instruction Decode Queue",
+ "MetricExpr": "IDQ.MITE_UOPS / UOPS_ISSUED.ANY",
+ "MetricName": "percent_uops_delivered_from_legacy_decode_pipeline",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Uops delivered from loop stream detector(LSD) as a percent of total uops delivered to Instruction Decode Queue",
+ "MetricExpr": "LSD.UOPS / UOPS_ISSUED.ANY",
+ "MetricName": "percent_uops_delivered_from_loop_stream_detector",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Uops delivered from microcode sequencer (MS) as a percent of total uops delivered to Instruction Decode Queue",
+ "MetricExpr": "IDQ.MS_UOPS / UOPS_ISSUED.ANY",
+ "MetricName": "percent_uops_delivered_from_microcode_sequencer",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Intel(R) Quick Path Interconnect (QPI) data transmit bandwidth (MB/sec)",
+ "MetricExpr": "UNC_Q_TxL_FLITS_G0.DATA * 8 / 1e6 / duration_time",
+ "MetricName": "qpi_data_transmit_bw",
+ "ScaleUnit": "1MB/s"
+ },
+ {
"BriefDescription": "Percentage of cycles spent in System Management Interrupts.",
"MetricExpr": "((msr@aperf@ - cycles) / msr@aperf@ if msr@smi@ > 0 else 0)",
"MetricGroup": "smi",
@@ -70,8 +266,14 @@
"ScaleUnit": "1SMI#"
},
{
+ "BriefDescription": "The ratio of number of completed memory store instructions to the total number completed instructions",
+ "MetricExpr": "MEM_UOPS_RETIRED.ALL_STORES / INST_RETIRED.ANY",
+ "MetricName": "stores_per_instr",
+ "ScaleUnit": "1per_instr"
+ },
+ {
"BriefDescription": "This metric estimates how often memory load accesses were aliased by preceding stores (in program order) with a 4K address offset",
- "MetricExpr": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS / tma_info_clks",
+ "MetricExpr": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_4k_aliasing",
"MetricThreshold": "tma_4k_aliasing > 0.2 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -81,7 +283,7 @@
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution ports for ALU operations.",
"MetricConstraint": "NO_GROUP_EVENTS_NMI",
- "MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_0 + UOPS_DISPATCHED_PORT.PORT_1 + UOPS_DISPATCHED_PORT.PORT_5 + UOPS_DISPATCHED_PORT.PORT_6) / tma_info_slots",
+ "MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_0 + UOPS_DISPATCHED_PORT.PORT_1 + UOPS_DISPATCHED_PORT.PORT_5 + UOPS_DISPATCHED_PORT.PORT_6) / tma_info_thread_slots",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_alu_op_utilization",
"MetricThreshold": "tma_alu_op_utilization > 0.6",
@@ -89,7 +291,7 @@
},
{
"BriefDescription": "This metric estimates fraction of slots the CPU retired uops delivered by the Microcode_Sequencer as a result of Assists",
- "MetricExpr": "100 * OTHER_ASSISTS.ANY_WB_ASSIST / tma_info_slots",
+ "MetricExpr": "100 * OTHER_ASSISTS.ANY_WB_ASSIST / tma_info_thread_slots",
"MetricGroup": "TopdownL4;tma_L4_group;tma_microcode_sequencer_group",
"MetricName": "tma_assists",
"MetricThreshold": "tma_assists > 0.1 & (tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1)",
@@ -109,7 +311,7 @@
},
{
"BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations",
- "MetricExpr": "(UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (INT_MISC.RECOVERY_CYCLES_ANY / 2 if #SMT_on else INT_MISC.RECOVERY_CYCLES)) / tma_info_slots",
+ "MetricExpr": "(UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (INT_MISC.RECOVERY_CYCLES_ANY / 2 if #SMT_on else INT_MISC.RECOVERY_CYCLES)) / tma_info_thread_slots",
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_bad_speculation",
"MetricThreshold": "tma_bad_speculation > 0.15",
@@ -125,12 +327,12 @@
"MetricName": "tma_branch_mispredicts",
"MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15",
"MetricgroupNoGroup": "TopdownL2",
- "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES. Related metrics: tma_info_branch_misprediction_cost, tma_mispredicts_resteers",
+ "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES. Related metrics: tma_info_bad_spec_branch_misprediction_cost, tma_mispredicts_resteers",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers",
- "MetricExpr": "12 * (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT + BACLEARS.ANY) / tma_info_clks",
+ "MetricExpr": "12 * (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT + BACLEARS.ANY) / tma_info_thread_clks",
"MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_branch_resteers",
"MetricThreshold": "tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -159,7 +361,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to contested accesses",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(60 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD))) + 43 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD)))) / tma_info_clks",
+ "MetricExpr": "(60 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD))) + 43 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD)))) / tma_info_thread_clks",
"MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
"MetricName": "tma_contested_accesses",
"MetricThreshold": "tma_contested_accesses > 0.05 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -180,7 +382,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to data-sharing accesses",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "43 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD))) / tma_info_clks",
+ "MetricExpr": "43 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD))) / tma_info_thread_clks",
"MetricGroup": "Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
"MetricName": "tma_data_sharing",
"MetricThreshold": "tma_data_sharing > 0.05 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -189,7 +391,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where the Divider unit was active",
- "MetricExpr": "ARITH.FPU_DIV_ACTIVE / tma_info_core_clks",
+ "MetricExpr": "ARITH.FPU_DIV_ACTIVE / tma_info_core_core_clks",
"MetricGroup": "TopdownL3;tma_L3_group;tma_core_bound_group",
"MetricName": "tma_divider",
"MetricThreshold": "tma_divider > 0.2 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
@@ -199,7 +401,7 @@
{
"BriefDescription": "This metric estimates how often the CPU was stalled on accesses to external memory (DRAM) by loads",
"MetricConstraint": "NO_GROUP_EVENTS_SMT",
- "MetricExpr": "(1 - MEM_LOAD_UOPS_RETIRED.L3_HIT / (MEM_LOAD_UOPS_RETIRED.L3_HIT + 7 * MEM_LOAD_UOPS_RETIRED.L3_MISS)) * CYCLE_ACTIVITY.STALLS_L2_MISS / tma_info_clks",
+ "MetricExpr": "(1 - MEM_LOAD_UOPS_RETIRED.L3_HIT / (MEM_LOAD_UOPS_RETIRED.L3_HIT + 7 * MEM_LOAD_UOPS_RETIRED.L3_MISS)) * CYCLE_ACTIVITY.STALLS_L2_MISS / tma_info_thread_clks",
"MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_dram_bound",
"MetricThreshold": "tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -208,25 +410,25 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline",
- "MetricExpr": "(IDQ.ALL_DSB_CYCLES_ANY_UOPS - IDQ.ALL_DSB_CYCLES_4_UOPS) / tma_info_core_clks / 2",
+ "MetricExpr": "(IDQ.ALL_DSB_CYCLES_ANY_UOPS - IDQ.ALL_DSB_CYCLES_4_UOPS) / tma_info_core_core_clks / 2",
"MetricGroup": "DSB;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_dsb",
- "MetricThreshold": "tma_dsb > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35)",
+ "MetricThreshold": "tma_dsb > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35)",
"PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline. For example; inefficient utilization of the DSB cache structure or bank conflict when reading from it; are categorized here.",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines",
- "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / tma_info_clks",
+ "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / tma_info_thread_clks",
"MetricGroup": "DSBmiss;FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB",
"MetricName": "tma_dsb_switches",
"MetricThreshold": "tma_dsb_switches > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
- "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines. The DSB (decoded i-cache) is a Uop Cache where the front-end directly delivers Uops (micro operations) avoiding heavy x86 decoding. The DSB pipeline has shorter latency and delivered higher bandwidth than the MITE (legacy instruction decode pipeline). Switching between the two pipelines can cause penalties hence this metric measures the exposed penalty. Related metrics: tma_fetch_bandwidth, tma_info_dsb_coverage, tma_info_iptb, tma_lcp",
+ "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines. The DSB (decoded i-cache) is a Uop Cache where the front-end directly delivers Uops (micro operations) avoiding heavy x86 decoding. The DSB pipeline has shorter latency and delivered higher bandwidth than the MITE (legacy instruction decode pipeline). Switching between the two pipelines can cause penalties hence this metric measures the exposed penalty. Related metrics: tma_fetch_bandwidth, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses",
- "MetricExpr": "(8 * DTLB_LOAD_MISSES.STLB_HIT + cpu@DTLB_LOAD_MISSES.WALK_DURATION\\,cmask\\=1@ + 7 * DTLB_LOAD_MISSES.WALK_COMPLETED) / tma_info_clks",
+ "MetricExpr": "(8 * DTLB_LOAD_MISSES.STLB_HIT + cpu@DTLB_LOAD_MISSES.WALK_DURATION\\,cmask\\=1@ + 7 * DTLB_LOAD_MISSES.WALK_COMPLETED) / tma_info_thread_clks",
"MetricGroup": "MemoryTLB;TopdownL4;tma_L4_group;tma_issueTLB;tma_l1_bound_group",
"MetricName": "tma_dtlb_load",
"MetricThreshold": "tma_dtlb_load > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -235,7 +437,7 @@
},
{
"BriefDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses",
- "MetricExpr": "(8 * DTLB_STORE_MISSES.STLB_HIT + cpu@DTLB_STORE_MISSES.WALK_DURATION\\,cmask\\=1@ + 7 * DTLB_STORE_MISSES.WALK_COMPLETED) / tma_info_clks",
+ "MetricExpr": "(8 * DTLB_STORE_MISSES.STLB_HIT + cpu@DTLB_STORE_MISSES.WALK_DURATION\\,cmask\\=1@ + 7 * DTLB_STORE_MISSES.WALK_COMPLETED) / tma_info_thread_clks",
"MetricGroup": "MemoryTLB;TopdownL4;tma_L4_group;tma_issueTLB;tma_store_bound_group",
"MetricName": "tma_dtlb_store",
"MetricThreshold": "tma_dtlb_store > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -244,7 +446,7 @@
},
{
"BriefDescription": "This metric roughly estimates how often CPU was handling synchronizations due to False Sharing",
- "MetricExpr": "(200 * OFFCORE_RESPONSE.DEMAND_RFO.LLC_MISS.REMOTE_HITM + 60 * OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.HITM_OTHER_CORE) / tma_info_clks",
+ "MetricExpr": "(200 * OFFCORE_RESPONSE.DEMAND_RFO.LLC_MISS.REMOTE_HITM + 60 * OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.HITM_OTHER_CORE) / tma_info_thread_clks",
"MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_store_bound_group",
"MetricName": "tma_false_sharing",
"MetricThreshold": "tma_false_sharing > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -254,11 +456,11 @@
{
"BriefDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "tma_info_load_miss_real_latency * cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ / tma_info_clks",
+ "MetricExpr": "tma_info_memory_load_miss_real_latency * cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ / tma_info_thread_clks",
"MetricGroup": "MemoryBW;TopdownL4;tma_L4_group;tma_issueBW;tma_issueSL;tma_issueSmSt;tma_l1_bound_group",
"MetricName": "tma_fb_full",
"MetricThreshold": "tma_fb_full > 0.3",
- "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_info_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
+ "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
"ScaleUnit": "100%"
},
{
@@ -266,14 +468,14 @@
"MetricExpr": "tma_frontend_bound - tma_fetch_latency",
"MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB",
"MetricName": "tma_fetch_bandwidth",
- "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35",
+ "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35",
"MetricgroupNoGroup": "TopdownL2",
- "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Related metrics: tma_dsb_switches, tma_info_dsb_coverage, tma_info_iptb, tma_lcp",
+ "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Related metrics: tma_dsb_switches, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues",
- "MetricExpr": "4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / tma_info_slots",
+ "MetricExpr": "4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / tma_info_thread_slots",
"MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group",
"MetricName": "tma_fetch_latency",
"MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15",
@@ -328,7 +530,7 @@
},
{
"BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend",
- "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / tma_info_slots",
+ "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / tma_info_thread_slots",
"MetricGroup": "PGO;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_frontend_bound",
"MetricThreshold": "tma_frontend_bound > 0.15",
@@ -348,436 +550,436 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to instruction cache misses.",
- "MetricExpr": "ICACHE.IFDATA_STALL / tma_info_clks",
+ "MetricExpr": "ICACHE.IFDATA_STALL / tma_info_thread_clks",
"MetricGroup": "BigFoot;FetchLat;IcMiss;TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_icache_misses",
"MetricThreshold": "tma_icache_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
"ScaleUnit": "100%"
},
{
- "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
- "MetricExpr": "tma_info_turbo_utilization * TSC / 1e9 / duration_time",
- "MetricGroup": "Power;Summary",
- "MetricName": "tma_info_average_frequency"
- },
- {
- "BriefDescription": "Branch instructions per taken branch.",
- "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
- "MetricGroup": "Branches;Fed;PGO",
- "MetricName": "tma_info_bptkbranch"
- },
- {
"BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
- "MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_slots / BR_MISP_RETIRED.ALL_BRANCHES",
+ "MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_thread_slots / BR_MISP_RETIRED.ALL_BRANCHES",
"MetricGroup": "Bad;BrMispredicts;tma_issueBM",
- "MetricName": "tma_info_branch_misprediction_cost",
+ "MetricName": "tma_info_bad_spec_branch_misprediction_cost",
"PublicDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear). Related metrics: tma_branch_mispredicts, tma_mispredicts_resteers"
},
{
- "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
- "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
- "MetricGroup": "Pipeline",
- "MetricName": "tma_info_clks"
+ "BriefDescription": "Instructions per retired mispredicts for indirect CALL or JMP branches (lower number means higher occurrence rate).",
+ "MetricExpr": "tma_info_inst_mix_instructions / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * cpu@BR_MISP_EXEC.ALL_BRANCHES\\,umask\\=0xE4@)",
+ "MetricGroup": "Bad;BrMispredicts",
+ "MetricName": "tma_info_bad_spec_ipmisp_indirect",
+ "MetricThreshold": "tma_info_bad_spec_ipmisp_indirect < 1e3"
+ },
+ {
+ "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear) (lower number means higher occurrence rate)",
+ "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;BadSpec;BrMispredicts",
+ "MetricName": "tma_info_bad_spec_ipmispredict",
+ "MetricThreshold": "tma_info_bad_spec_ipmispredict < 200"
},
{
"BriefDescription": "Core actual clocks when any Logical Processor is active on the Physical Core",
- "MetricExpr": "(CPU_CLK_UNHALTED.THREAD / 2 * (1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK) if #core_wide < 1 else (CPU_CLK_UNHALTED.THREAD_ANY / 2 if #SMT_on else tma_info_clks))",
+ "MetricExpr": "(CPU_CLK_UNHALTED.THREAD / 2 * (1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK) if #core_wide < 1 else (CPU_CLK_UNHALTED.THREAD_ANY / 2 if #SMT_on else tma_info_thread_clks))",
"MetricGroup": "SMT",
- "MetricName": "tma_info_core_clks"
+ "MetricName": "tma_info_core_core_clks"
},
{
"BriefDescription": "Instructions Per Cycle across hyper-threads (per physical core)",
- "MetricExpr": "INST_RETIRED.ANY / tma_info_core_clks",
+ "MetricExpr": "INST_RETIRED.ANY / tma_info_core_core_clks",
"MetricGroup": "Ret;SMT;TmaL1;tma_L1_group",
- "MetricName": "tma_info_coreipc"
+ "MetricName": "tma_info_core_coreipc"
},
{
- "BriefDescription": "Cycles Per Instruction (per Logical Processor)",
- "MetricExpr": "1 / tma_info_ipc",
- "MetricGroup": "Mem;Pipeline",
- "MetricName": "tma_info_cpi"
- },
- {
- "BriefDescription": "Average CPU Utilization",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
- "MetricGroup": "HPC;Summary",
- "MetricName": "tma_info_cpu_utilization"
+ "BriefDescription": "Floating Point Operations Per Cycle",
+ "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * (FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE) / tma_info_core_core_clks",
+ "MetricGroup": "Flops;Ret",
+ "MetricName": "tma_info_core_flopc"
},
{
- "BriefDescription": "Average Parallel L2 cache miss data reads",
- "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
- "MetricGroup": "Memory_BW;Offcore",
- "MetricName": "tma_info_data_l2_mlp"
+ "BriefDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width)",
+ "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0x3c@) / (2 * tma_info_core_core_clks)",
+ "MetricGroup": "Cor;Flops;HPC",
+ "MetricName": "tma_info_core_fp_arith_utilization",
+ "PublicDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width). Values > 1 are possible due to ([BDW+] Fused-Multiply Add (FMA) counting - common; [ADL+] use all of ADD/MUL/FMA in Scalar or 128/256-bit vectors - less common)."
},
{
- "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
- "MetricExpr": "64 * (UNC_M_CAS_COUNT.RD + UNC_M_CAS_COUNT.WR) / 1e9 / duration_time",
- "MetricGroup": "HPC;Mem;MemoryBW;SoC;tma_issueBW",
- "MetricName": "tma_info_dram_bw_use",
- "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_fb_full, tma_mem_bandwidth, tma_sq_full"
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
+ "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
+ "MetricName": "tma_info_core_ilp"
},
{
"BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
"MetricExpr": "IDQ.DSB_UOPS / (IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)",
"MetricGroup": "DSB;Fed;FetchBW;tma_issueFB",
- "MetricName": "tma_info_dsb_coverage",
- "MetricThreshold": "tma_info_dsb_coverage < 0.7 & tma_info_ipc / 4 > 0.35",
- "PublicDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache). Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_iptb, tma_lcp"
- },
- {
- "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-thread",
- "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
- "MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
- "MetricName": "tma_info_execute"
+ "MetricName": "tma_info_frontend_dsb_coverage",
+ "MetricThreshold": "tma_info_frontend_dsb_coverage < 0.7 & tma_info_thread_ipc / 4 > 0.35",
+ "PublicDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache). Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_inst_mix_iptb, tma_lcp"
},
{
- "BriefDescription": "The ratio of Executed- by Issued-Uops",
- "MetricExpr": "UOPS_EXECUTED.THREAD / UOPS_ISSUED.ANY",
- "MetricGroup": "Cor;Pipeline",
- "MetricName": "tma_info_execute_per_issue",
- "PublicDescription": "The ratio of Executed- by Issued-Uops. Ratio > 1 suggests high rate of uop micro-fusions. Ratio < 1 suggest high rate of \"execute\" at rename stage."
- },
- {
- "BriefDescription": "Floating Point Operations Per Cycle",
- "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * (FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE) / tma_info_core_clks",
- "MetricGroup": "Flops;Ret",
- "MetricName": "tma_info_flopc"
- },
- {
- "BriefDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width)",
- "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0x3c@) / (2 * tma_info_core_clks)",
- "MetricGroup": "Cor;Flops;HPC",
- "MetricName": "tma_info_fp_arith_utilization",
- "PublicDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width). Values > 1 are possible due to ([BDW+] Fused-Multiply Add (FMA) counting - common; [ADL+] use all of ADD/MUL/FMA in Scalar or 128/256-bit vectors - less common)."
- },
- {
- "BriefDescription": "Giga Floating Point Operations Per Second",
- "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * (FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE) / 1e9 / duration_time",
- "MetricGroup": "Cor;Flops;HPC",
- "MetricName": "tma_info_gflops",
- "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width and AMX engine."
+ "BriefDescription": "Instructions per speculative Unknown Branch Misprediction (BAClear) (lower number means higher occurrence rate)",
+ "MetricExpr": "tma_info_inst_mix_instructions / BACLEARS.ANY",
+ "MetricGroup": "Fed",
+ "MetricName": "tma_info_frontend_ipunknown_branch"
},
{
- "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
- "MetricExpr": "UOPS_EXECUTED.THREAD / (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
- "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
- "MetricName": "tma_info_ilp"
+ "BriefDescription": "Branch instructions per taken branch.",
+ "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
+ "MetricGroup": "Branches;Fed;PGO",
+ "MetricName": "tma_info_inst_mix_bptkbranch"
},
{
"BriefDescription": "Total number of retired Instructions",
"MetricExpr": "INST_RETIRED.ANY",
"MetricGroup": "Summary;TmaL1;tma_L1_group",
- "MetricName": "tma_info_instructions",
+ "MetricName": "tma_info_inst_mix_instructions",
"PublicDescription": "Total number of retired Instructions. Sample with: INST_RETIRED.PREC_DIST"
},
{
"BriefDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / (cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0x3c@)",
"MetricGroup": "Flops;InsType",
- "MetricName": "tma_info_iparith",
- "MetricThreshold": "tma_info_iparith < 10",
+ "MetricName": "tma_info_inst_mix_iparith",
+ "MetricThreshold": "tma_info_inst_mix_iparith < 10",
"PublicDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate). May undercount due to FMA double counting. Approximated prior to BDW."
},
{
"BriefDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE)",
"MetricGroup": "Flops;FpVector;InsType",
- "MetricName": "tma_info_iparith_avx128",
- "MetricThreshold": "tma_info_iparith_avx128 < 10",
+ "MetricName": "tma_info_inst_mix_iparith_avx128",
+ "MetricThreshold": "tma_info_inst_mix_iparith_avx128 < 10",
"PublicDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
},
{
"BriefDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE)",
"MetricGroup": "Flops;FpVector;InsType",
- "MetricName": "tma_info_iparith_avx256",
- "MetricThreshold": "tma_info_iparith_avx256 < 10",
+ "MetricName": "tma_info_inst_mix_iparith_avx256",
+ "MetricThreshold": "tma_info_inst_mix_iparith_avx256 < 10",
"PublicDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
},
{
"BriefDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
"MetricGroup": "Flops;FpScalar;InsType",
- "MetricName": "tma_info_iparith_scalar_dp",
- "MetricThreshold": "tma_info_iparith_scalar_dp < 10",
+ "MetricName": "tma_info_inst_mix_iparith_scalar_dp",
+ "MetricThreshold": "tma_info_inst_mix_iparith_scalar_dp < 10",
"PublicDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
},
{
"BriefDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
"MetricGroup": "Flops;FpScalar;InsType",
- "MetricName": "tma_info_iparith_scalar_sp",
- "MetricThreshold": "tma_info_iparith_scalar_sp < 10",
+ "MetricName": "tma_info_inst_mix_iparith_scalar_sp",
+ "MetricThreshold": "tma_info_inst_mix_iparith_scalar_sp < 10",
"PublicDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
},
{
"BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
"MetricGroup": "Branches;Fed;InsType",
- "MetricName": "tma_info_ipbranch",
- "MetricThreshold": "tma_info_ipbranch < 8"
- },
- {
- "BriefDescription": "Instructions Per Cycle (per Logical Processor)",
- "MetricExpr": "INST_RETIRED.ANY / tma_info_clks",
- "MetricGroup": "Ret;Summary",
- "MetricName": "tma_info_ipc"
+ "MetricName": "tma_info_inst_mix_ipbranch",
+ "MetricThreshold": "tma_info_inst_mix_ipbranch < 8"
},
{
"BriefDescription": "Instructions per (near) call (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL",
"MetricGroup": "Branches;Fed;PGO",
- "MetricName": "tma_info_ipcall",
- "MetricThreshold": "tma_info_ipcall < 200"
- },
- {
- "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
- "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u",
- "MetricGroup": "Branches;OS",
- "MetricName": "tma_info_ipfarbranch",
- "MetricThreshold": "tma_info_ipfarbranch < 1e6"
+ "MetricName": "tma_info_inst_mix_ipcall",
+ "MetricThreshold": "tma_info_inst_mix_ipcall < 200"
},
{
"BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * (FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE)",
"MetricGroup": "Flops;InsType",
- "MetricName": "tma_info_ipflop",
- "MetricThreshold": "tma_info_ipflop < 10"
+ "MetricName": "tma_info_inst_mix_ipflop",
+ "MetricThreshold": "tma_info_inst_mix_ipflop < 10"
},
{
"BriefDescription": "Instructions per Load (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_LOADS",
"MetricGroup": "InsType",
- "MetricName": "tma_info_ipload",
- "MetricThreshold": "tma_info_ipload < 3"
- },
- {
- "BriefDescription": "Instructions per retired mispredicts for indirect CALL or JMP branches (lower number means higher occurrence rate).",
- "MetricExpr": "tma_info_instructions / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * cpu@BR_MISP_EXEC.ALL_BRANCHES\\,umask\\=0xE4@)",
- "MetricGroup": "Bad;BrMispredicts",
- "MetricName": "tma_info_ipmisp_indirect",
- "MetricThreshold": "tma_info_ipmisp_indirect < 1e3"
- },
- {
- "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear) (lower number means higher occurrence rate)",
- "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
- "MetricGroup": "Bad;BadSpec;BrMispredicts",
- "MetricName": "tma_info_ipmispredict",
- "MetricThreshold": "tma_info_ipmispredict < 200"
+ "MetricName": "tma_info_inst_mix_ipload",
+ "MetricThreshold": "tma_info_inst_mix_ipload < 3"
},
{
"BriefDescription": "Instructions per Store (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_STORES",
"MetricGroup": "InsType",
- "MetricName": "tma_info_ipstore",
- "MetricThreshold": "tma_info_ipstore < 8"
+ "MetricName": "tma_info_inst_mix_ipstore",
+ "MetricThreshold": "tma_info_inst_mix_ipstore < 8"
},
{
"BriefDescription": "Instruction per taken branch",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN",
"MetricGroup": "Branches;Fed;FetchBW;Frontend;PGO;tma_issueFB",
- "MetricName": "tma_info_iptb",
- "MetricThreshold": "tma_info_iptb < 9",
- "PublicDescription": "Instruction per taken branch. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_dsb_coverage, tma_lcp"
- },
- {
- "BriefDescription": "Instructions per speculative Unknown Branch Misprediction (BAClear) (lower number means higher occurrence rate)",
- "MetricExpr": "tma_info_instructions / BACLEARS.ANY",
- "MetricGroup": "Fed",
- "MetricName": "tma_info_ipunknown_branch"
- },
- {
- "BriefDescription": "Cycles Per Instruction for the Operating System (OS) Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / INST_RETIRED.ANY_P:k",
- "MetricGroup": "OS",
- "MetricName": "tma_info_kernel_cpi"
- },
- {
- "BriefDescription": "Fraction of cycles spent in the Operating System (OS) Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / CPU_CLK_UNHALTED.THREAD",
- "MetricGroup": "OS",
- "MetricName": "tma_info_kernel_utilization",
- "MetricThreshold": "tma_info_kernel_utilization > 0.05"
+ "MetricName": "tma_info_inst_mix_iptb",
+ "MetricThreshold": "tma_info_inst_mix_iptb < 9",
+ "PublicDescription": "Instruction per taken branch. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_frontend_dsb_coverage, tma_lcp"
},
{
"BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
"MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / duration_time",
"MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l1d_cache_fill_bw"
- },
- {
- "BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
- "MetricExpr": "tma_info_l1d_cache_fill_bw",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l1d_cache_fill_bw_1t"
- },
- {
- "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
- "MetricExpr": "1e3 * MEM_LOAD_UOPS_RETIRED.L1_MISS / INST_RETIRED.ANY",
- "MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l1mpki"
+ "MetricName": "tma_info_memory_core_l1d_cache_fill_bw"
},
{
"BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
"MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / duration_time",
"MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l2_cache_fill_bw"
+ "MetricName": "tma_info_memory_core_l2_cache_fill_bw"
},
{
- "BriefDescription": "Average per-thread data fill bandwidth to the L2 cache [GB / sec]",
- "MetricExpr": "tma_info_l2_cache_fill_bw",
+ "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
"MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l2_cache_fill_bw_1t"
+ "MetricName": "tma_info_memory_core_l3_cache_fill_bw"
+ },
+ {
+ "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
+ "MetricExpr": "1e3 * MEM_LOAD_UOPS_RETIRED.L1_MISS / INST_RETIRED.ANY",
+ "MetricGroup": "CacheMisses;Mem",
+ "MetricName": "tma_info_memory_l1mpki"
},
{
"BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)",
"MetricExpr": "1e3 * (L2_RQSTS.REFERENCES - L2_RQSTS.MISS) / INST_RETIRED.ANY",
"MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l2hpki_all"
+ "MetricName": "tma_info_memory_l2hpki_all"
},
{
"BriefDescription": "L2 cache hits per kilo instruction for all demand loads (including speculative)",
"MetricExpr": "1e3 * L2_RQSTS.DEMAND_DATA_RD_HIT / INST_RETIRED.ANY",
"MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l2hpki_load"
+ "MetricName": "tma_info_memory_l2hpki_load"
},
{
"BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads",
"MetricExpr": "1e3 * MEM_LOAD_UOPS_RETIRED.L2_MISS / INST_RETIRED.ANY",
"MetricGroup": "Backend;CacheMisses;Mem",
- "MetricName": "tma_info_l2mpki"
+ "MetricName": "tma_info_memory_l2mpki"
},
{
"BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all request types (including speculative)",
"MetricExpr": "1e3 * L2_RQSTS.MISS / INST_RETIRED.ANY",
"MetricGroup": "CacheMisses;Mem;Offcore",
- "MetricName": "tma_info_l2mpki_all"
+ "MetricName": "tma_info_memory_l2mpki_all"
},
{
"BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all demand loads (including speculative)",
"MetricExpr": "1e3 * L2_RQSTS.DEMAND_DATA_RD_MISS / INST_RETIRED.ANY",
"MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l2mpki_load"
+ "MetricName": "tma_info_memory_l2mpki_load"
},
{
- "BriefDescription": "Average per-thread data access bandwidth to the L3 cache [GB / sec]",
- "MetricExpr": "0",
- "MetricGroup": "Mem;MemoryBW;Offcore",
- "MetricName": "tma_info_l3_cache_access_bw_1t"
+ "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
+ "MetricExpr": "1e3 * MEM_LOAD_UOPS_RETIRED.L3_MISS / INST_RETIRED.ANY",
+ "MetricGroup": "CacheMisses;Mem",
+ "MetricName": "tma_info_memory_l3mpki"
},
{
- "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
- "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l3_cache_fill_bw"
+ "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / (MEM_LOAD_UOPS_RETIRED.L1_MISS + MEM_LOAD_UOPS_RETIRED.HIT_LFB)",
+ "MetricGroup": "Mem;MemoryBound;MemoryLat",
+ "MetricName": "tma_info_memory_load_miss_real_latency"
},
{
- "BriefDescription": "Average per-thread data fill bandwidth to the L3 cache [GB / sec]",
- "MetricExpr": "tma_info_l3_cache_fill_bw",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l3_cache_fill_bw_1t"
+ "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
+ "MetricGroup": "Mem;MemoryBW;MemoryBound",
+ "MetricName": "tma_info_memory_mlp",
+ "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)"
},
{
- "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
- "MetricExpr": "1e3 * MEM_LOAD_UOPS_RETIRED.L3_MISS / INST_RETIRED.ANY",
- "MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l3mpki"
+ "BriefDescription": "Average Parallel L2 cache miss data reads",
+ "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+ "MetricGroup": "Memory_BW;Offcore",
+ "MetricName": "tma_info_memory_oro_data_l2_mlp"
},
{
"BriefDescription": "Average Latency for L2 cache miss demand Loads",
"MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS.DEMAND_DATA_RD",
"MetricGroup": "Memory_Lat;Offcore",
- "MetricName": "tma_info_load_l2_miss_latency"
+ "MetricName": "tma_info_memory_oro_load_l2_miss_latency"
},
{
"BriefDescription": "Average Parallel L2 cache miss demand Loads",
"MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
"MetricGroup": "Memory_BW;Offcore",
- "MetricName": "tma_info_load_l2_mlp"
+ "MetricName": "tma_info_memory_oro_load_l2_mlp"
},
{
- "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
- "MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "L1D_PEND_MISS.PENDING / (MEM_LOAD_UOPS_RETIRED.L1_MISS + MEM_LOAD_UOPS_RETIRED.HIT_LFB)",
- "MetricGroup": "Mem;MemoryBound;MemoryLat",
- "MetricName": "tma_info_load_miss_real_latency"
+ "BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_core_l1d_cache_fill_bw",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_thread_l1d_cache_fill_bw_1t"
},
{
- "BriefDescription": "Average number of parallel data read requests to external memory",
- "MetricExpr": "UNC_C_TOR_OCCUPANCY.MISS_OPCODE@filter_opc\\=0x182@ / UNC_C_TOR_OCCUPANCY.MISS_OPCODE@filter_opc\\=0x182\\,thresh\\=1@",
- "MetricGroup": "Mem;MemoryBW;SoC",
- "MetricName": "tma_info_mem_parallel_reads",
- "PublicDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches"
+ "BriefDescription": "Average per-thread data fill bandwidth to the L2 cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_core_l2_cache_fill_bw",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_thread_l2_cache_fill_bw_1t"
},
{
- "BriefDescription": "Average latency of data read request to external memory (in nanoseconds)",
- "MetricExpr": "1e9 * (UNC_C_TOR_OCCUPANCY.MISS_OPCODE@filter_opc\\=0x182@ / UNC_C_TOR_INSERTS.MISS_OPCODE@filter_opc\\=0x182@) / (tma_info_socket_clks / duration_time)",
- "MetricGroup": "Mem;MemoryLat;SoC",
- "MetricName": "tma_info_mem_read_latency",
- "PublicDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches. ([RKL+]memory-controller only)"
+ "BriefDescription": "Average per-thread data access bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "0",
+ "MetricGroup": "Mem;MemoryBW;Offcore",
+ "MetricName": "tma_info_memory_thread_l3_cache_access_bw_1t"
},
{
- "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
- "MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
- "MetricGroup": "Mem;MemoryBW;MemoryBound",
- "MetricName": "tma_info_mlp",
- "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)"
+ "BriefDescription": "Average per-thread data fill bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_core_l3_cache_fill_bw",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_thread_l3_cache_fill_bw_1t"
},
{
"BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
- "MetricExpr": "(ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION + 7 * (DTLB_STORE_MISSES.WALK_COMPLETED + DTLB_LOAD_MISSES.WALK_COMPLETED + ITLB_MISSES.WALK_COMPLETED)) / (2 * tma_info_core_clks)",
+ "MetricExpr": "(ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION + 7 * (DTLB_STORE_MISSES.WALK_COMPLETED + DTLB_LOAD_MISSES.WALK_COMPLETED + ITLB_MISSES.WALK_COMPLETED)) / (2 * tma_info_core_core_clks)",
"MetricGroup": "Mem;MemoryTLB",
- "MetricName": "tma_info_page_walks_utilization",
- "MetricThreshold": "tma_info_page_walks_utilization > 0.5"
+ "MetricName": "tma_info_memory_tlb_page_walks_utilization",
+ "MetricThreshold": "tma_info_memory_tlb_page_walks_utilization > 0.5"
+ },
+ {
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-thread",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
+ "MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
+ "MetricName": "tma_info_pipeline_execute"
},
{
"BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
"MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / cpu@UOPS_RETIRED.RETIRE_SLOTS\\,cmask\\=1@",
"MetricGroup": "Pipeline;Ret",
- "MetricName": "tma_info_retire"
+ "MetricName": "tma_info_pipeline_retire"
},
{
- "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
- "MetricExpr": "4 * tma_info_core_clks",
- "MetricGroup": "TmaL1;tma_L1_group",
- "MetricName": "tma_info_slots"
+ "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
+ "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / duration_time",
+ "MetricGroup": "Power;Summary",
+ "MetricName": "tma_info_system_average_frequency"
+ },
+ {
+ "BriefDescription": "Average CPU Utilization",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricGroup": "HPC;Summary",
+ "MetricName": "tma_info_system_cpu_utilization"
+ },
+ {
+ "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
+ "MetricExpr": "64 * (UNC_M_CAS_COUNT.RD + UNC_M_CAS_COUNT.WR) / 1e9 / duration_time",
+ "MetricGroup": "HPC;Mem;MemoryBW;SoC;tma_issueBW",
+ "MetricName": "tma_info_system_dram_bw_use",
+ "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_fb_full, tma_mem_bandwidth, tma_sq_full"
+ },
+ {
+ "BriefDescription": "Giga Floating Point Operations Per Second",
+ "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * (FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE) / 1e9 / duration_time",
+ "MetricGroup": "Cor;Flops;HPC",
+ "MetricName": "tma_info_system_gflops",
+ "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width and AMX engine."
+ },
+ {
+ "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
+ "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u",
+ "MetricGroup": "Branches;OS",
+ "MetricName": "tma_info_system_ipfarbranch",
+ "MetricThreshold": "tma_info_system_ipfarbranch < 1e6"
+ },
+ {
+ "BriefDescription": "Cycles Per Instruction for the Operating System (OS) Kernel mode",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / INST_RETIRED.ANY_P:k",
+ "MetricGroup": "OS",
+ "MetricName": "tma_info_system_kernel_cpi"
+ },
+ {
+ "BriefDescription": "Fraction of cycles spent in the Operating System (OS) Kernel mode",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "OS",
+ "MetricName": "tma_info_system_kernel_utilization",
+ "MetricThreshold": "tma_info_system_kernel_utilization > 0.05"
+ },
+ {
+ "BriefDescription": "Average number of parallel data read requests to external memory",
+ "MetricExpr": "UNC_C_TOR_OCCUPANCY.MISS_OPCODE@filter_opc\\=0x182@ / UNC_C_TOR_OCCUPANCY.MISS_OPCODE@filter_opc\\=0x182\\,thresh\\=1@",
+ "MetricGroup": "Mem;MemoryBW;SoC",
+ "MetricName": "tma_info_system_mem_parallel_reads",
+ "PublicDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches"
+ },
+ {
+ "BriefDescription": "Average latency of data read request to external memory (in nanoseconds)",
+ "MetricExpr": "1e9 * (UNC_C_TOR_OCCUPANCY.MISS_OPCODE@filter_opc\\=0x182@ / UNC_C_TOR_INSERTS.MISS_OPCODE@filter_opc\\=0x182@) / (tma_info_system_socket_clks / duration_time)",
+ "MetricGroup": "Mem;MemoryLat;SoC",
+ "MetricName": "tma_info_system_mem_read_latency",
+ "PublicDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches. ([RKL+]memory-controller only)"
},
{
"BriefDescription": "Fraction of cycles where both hardware Logical Processors were active",
"MetricExpr": "(1 - CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / (CPU_CLK_UNHALTED.REF_XCLK_ANY / 2) if #SMT_on else 0)",
"MetricGroup": "SMT",
- "MetricName": "tma_info_smt_2t_utilization"
+ "MetricName": "tma_info_system_smt_2t_utilization"
},
{
"BriefDescription": "Socket actual clocks when any core is active on that socket",
"MetricExpr": "cbox_0@event\\=0x0@",
"MetricGroup": "SoC",
- "MetricName": "tma_info_socket_clks"
+ "MetricName": "tma_info_system_socket_clks"
},
{
"BriefDescription": "Average Frequency Utilization relative nominal frequency",
- "MetricExpr": "tma_info_clks / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricExpr": "tma_info_thread_clks / CPU_CLK_UNHALTED.REF_TSC",
"MetricGroup": "Power",
- "MetricName": "tma_info_turbo_utilization"
+ "MetricName": "tma_info_system_turbo_utilization"
+ },
+ {
+ "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "Pipeline",
+ "MetricName": "tma_info_thread_clks"
+ },
+ {
+ "BriefDescription": "Cycles Per Instruction (per Logical Processor)",
+ "MetricExpr": "1 / tma_info_thread_ipc",
+ "MetricGroup": "Mem;Pipeline",
+ "MetricName": "tma_info_thread_cpi"
+ },
+ {
+ "BriefDescription": "The ratio of Executed- by Issued-Uops",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / UOPS_ISSUED.ANY",
+ "MetricGroup": "Cor;Pipeline",
+ "MetricName": "tma_info_thread_execute_per_issue",
+ "PublicDescription": "The ratio of Executed- by Issued-Uops. Ratio > 1 suggests high rate of uop micro-fusions. Ratio < 1 suggest high rate of \"execute\" at rename stage."
+ },
+ {
+ "BriefDescription": "Instructions Per Cycle (per Logical Processor)",
+ "MetricExpr": "INST_RETIRED.ANY / tma_info_thread_clks",
+ "MetricGroup": "Ret;Summary",
+ "MetricName": "tma_info_thread_ipc"
+ },
+ {
+ "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
+ "MetricExpr": "4 * tma_info_core_core_clks",
+ "MetricGroup": "TmaL1;tma_L1_group",
+ "MetricName": "tma_info_thread_slots"
},
{
"BriefDescription": "Uops Per Instruction",
"MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
"MetricGroup": "Pipeline;Ret;Retire",
- "MetricName": "tma_info_uoppi",
- "MetricThreshold": "tma_info_uoppi > 1.05"
+ "MetricName": "tma_info_thread_uoppi",
+ "MetricThreshold": "tma_info_thread_uoppi > 1.05"
},
{
"BriefDescription": "Instruction per taken branch",
"MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / BR_INST_RETIRED.NEAR_TAKEN",
"MetricGroup": "Branches;Fed;FetchBW",
- "MetricName": "tma_info_uptb",
- "MetricThreshold": "tma_info_uptb < 6"
+ "MetricName": "tma_info_thread_uptb",
+ "MetricThreshold": "tma_info_thread_uptb < 6"
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses",
- "MetricExpr": "(14 * ITLB_MISSES.STLB_HIT + cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + 7 * ITLB_MISSES.WALK_COMPLETED) / tma_info_clks",
+ "MetricExpr": "(14 * ITLB_MISSES.STLB_HIT + cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + 7 * ITLB_MISSES.WALK_COMPLETED) / tma_info_thread_clks",
"MetricGroup": "BigFoot;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_itlb_misses",
"MetricThreshold": "tma_itlb_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -786,7 +988,7 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled without loads missing the L1 data cache",
- "MetricExpr": "max((CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS) / tma_info_clks, 0)",
+ "MetricExpr": "max((CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS) / tma_info_thread_clks, 0)",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_issueL1;tma_issueMC;tma_memory_bound_group",
"MetricName": "tma_l1_bound",
"MetricThreshold": "tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -795,7 +997,7 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads",
- "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_clks",
+ "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l2_bound",
"MetricThreshold": "tma_l2_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -805,7 +1007,7 @@
{
"BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
"MetricConstraint": "NO_GROUP_EVENTS_SMT",
- "MetricExpr": "MEM_LOAD_UOPS_RETIRED.L3_HIT / (MEM_LOAD_UOPS_RETIRED.L3_HIT + 7 * MEM_LOAD_UOPS_RETIRED.L3_MISS) * CYCLE_ACTIVITY.STALLS_L2_MISS / tma_info_clks",
+ "MetricExpr": "MEM_LOAD_UOPS_RETIRED.L3_HIT / (MEM_LOAD_UOPS_RETIRED.L3_HIT + 7 * MEM_LOAD_UOPS_RETIRED.L3_MISS) * CYCLE_ACTIVITY.STALLS_L2_MISS / tma_info_thread_clks",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l3_bound",
"MetricThreshold": "tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -815,7 +1017,7 @@
{
"BriefDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "41 * (MEM_LOAD_UOPS_RETIRED.L3_HIT * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD))) / tma_info_clks",
+ "MetricExpr": "41 * (MEM_LOAD_UOPS_RETIRED.L3_HIT * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD))) / tma_info_thread_clks",
"MetricGroup": "MemoryLat;TopdownL4;tma_L4_group;tma_issueLat;tma_l3_bound_group",
"MetricName": "tma_l3_hit_latency",
"MetricThreshold": "tma_l3_hit_latency > 0.1 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -824,11 +1026,11 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs)",
- "MetricExpr": "ILD_STALL.LCP / tma_info_clks",
+ "MetricExpr": "ILD_STALL.LCP / tma_info_thread_clks",
"MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB",
"MetricName": "tma_lcp",
"MetricThreshold": "tma_lcp > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
- "PublicDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs). Using proper compiler flags or Intel Compiler by default will certainly avoid this. #Link: Optimization Guide about LCP BKMs. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_dsb_coverage, tma_info_iptb",
+ "PublicDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs). Using proper compiler flags or Intel Compiler by default will certainly avoid this. #Link: Optimization Guide about LCP BKMs. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb",
"ScaleUnit": "100%"
},
{
@@ -844,7 +1046,7 @@
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Load operations",
"MetricConstraint": "NO_GROUP_EVENTS_NMI",
- "MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_2 + UOPS_DISPATCHED_PORT.PORT_3 + UOPS_DISPATCHED_PORT.PORT_7 - UOPS_DISPATCHED_PORT.PORT_4) / (2 * tma_info_core_clks)",
+ "MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_2 + UOPS_DISPATCHED_PORT.PORT_3 + UOPS_DISPATCHED_PORT.PORT_7 - UOPS_DISPATCHED_PORT.PORT_4) / (2 * tma_info_core_core_clks)",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_load_op_utilization",
"MetricThreshold": "tma_load_op_utilization > 0.6",
@@ -854,7 +1056,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from local memory",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "200 * (MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD))) / tma_info_clks",
+ "MetricExpr": "200 * (MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD))) / tma_info_thread_clks",
"MetricGroup": "Server;TopdownL5;tma_L5_group;tma_mem_latency_group",
"MetricName": "tma_local_dram",
"MetricThreshold": "tma_local_dram > 0.1 & (tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
@@ -864,7 +1066,7 @@
{
"BriefDescription": "This metric represents fraction of cycles the CPU spent handling cache misses due to lock operations",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "MEM_UOPS_RETIRED.LOCK_LOADS / MEM_UOPS_RETIRED.ALL_STORES * min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO) / tma_info_clks",
+ "MetricExpr": "MEM_UOPS_RETIRED.LOCK_LOADS / MEM_UOPS_RETIRED.ALL_STORES * min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO) / tma_info_thread_clks",
"MetricGroup": "Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_l1_bound_group",
"MetricName": "tma_lock_latency",
"MetricThreshold": "tma_lock_latency > 0.2 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -884,16 +1086,16 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM)",
- "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@) / tma_info_clks",
+ "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@) / tma_info_thread_clks",
"MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
"MetricName": "tma_mem_bandwidth",
"MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_dram_bw_use, tma_sq_full",
+ "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM)",
- "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD) / tma_info_clks - tma_mem_bandwidth",
+ "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD) / tma_info_thread_clks - tma_mem_bandwidth",
"MetricGroup": "MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
"MetricName": "tma_mem_latency",
"MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -903,7 +1105,7 @@
{
"BriefDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(CYCLE_ACTIVITY.STALLS_MEM_ANY + RESOURCE_STALLS.SB) / (CYCLE_ACTIVITY.STALLS_TOTAL + UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC - (UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC if tma_info_ipc > 1.8 else UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB) * tma_backend_bound",
+ "MetricExpr": "(CYCLE_ACTIVITY.STALLS_MEM_ANY + RESOURCE_STALLS.SB) / (CYCLE_ACTIVITY.STALLS_TOTAL + UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC - (UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC if tma_info_thread_ipc > 1.8 else UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB) * tma_backend_bound",
"MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_memory_bound",
"MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2",
@@ -913,7 +1115,7 @@
},
{
"BriefDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit",
- "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * IDQ.MS_UOPS / tma_info_slots",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * IDQ.MS_UOPS / tma_info_thread_slots",
"MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_heavy_operations_group;tma_issueMC;tma_issueMS",
"MetricName": "tma_microcode_sequencer",
"MetricThreshold": "tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1",
@@ -926,21 +1128,21 @@
"MetricGroup": "BadSpec;BrMispredicts;TopdownL4;tma_L4_group;tma_branch_resteers_group;tma_issueBM",
"MetricName": "tma_mispredicts_resteers",
"MetricThreshold": "tma_mispredicts_resteers > 0.05 & (tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15))",
- "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers as a result of Branch Misprediction at execution stage. Related metrics: tma_branch_mispredicts, tma_info_branch_misprediction_cost",
+ "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers as a result of Branch Misprediction at execution stage. Related metrics: tma_branch_mispredicts, tma_info_bad_spec_branch_misprediction_cost",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline)",
- "MetricExpr": "(IDQ.ALL_MITE_CYCLES_ANY_UOPS - IDQ.ALL_MITE_CYCLES_4_UOPS) / tma_info_core_clks / 2",
+ "MetricExpr": "(IDQ.ALL_MITE_CYCLES_ANY_UOPS - IDQ.ALL_MITE_CYCLES_4_UOPS) / tma_info_core_core_clks / 2",
"MetricGroup": "DSBmiss;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_mite",
- "MetricThreshold": "tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35)",
+ "MetricThreshold": "tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35)",
"PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline). This pipeline is used for code that was not pre-cached in the DSB or LSD. For example; inefficiencies due to asymmetric decoders; use of long immediate or LCP can manifest as MITE fetch bandwidth bottleneck.",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric estimates the fraction of cycles when the CPU was stalled due to switches of uop delivery to the Microcode Sequencer (MS)",
- "MetricExpr": "2 * IDQ.MS_SWITCHES / tma_info_clks",
+ "MetricExpr": "2 * IDQ.MS_SWITCHES / tma_info_thread_clks",
"MetricGroup": "FetchLat;MicroSeq;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueMC;tma_issueMS;tma_issueMV;tma_issueSO",
"MetricName": "tma_ms_switches",
"MetricThreshold": "tma_ms_switches > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -949,7 +1151,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 0 ([SNB+] ALU; [HSW+] ALU and 2nd branch)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_0 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_0 / tma_info_core_core_clks",
"MetricGroup": "Compute;TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_0",
"MetricThreshold": "tma_port_0 > 0.6",
@@ -958,7 +1160,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 1 (ALU)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_1 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_1 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_1",
"MetricThreshold": "tma_port_1 > 0.6",
@@ -967,7 +1169,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 2 ([SNB+]Loads and Store-address; [ICL+] Loads)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_2 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_2 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_load_op_utilization_group",
"MetricName": "tma_port_2",
"MetricThreshold": "tma_port_2 > 0.6",
@@ -976,7 +1178,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 3 ([SNB+]Loads and Store-address; [ICL+] Loads)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_3 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_3 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_load_op_utilization_group",
"MetricName": "tma_port_3",
"MetricThreshold": "tma_port_3 > 0.6",
@@ -994,7 +1196,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 5 ([SNB+] Branches and ALU; [HSW+] ALU)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_5 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_5 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_5",
"MetricThreshold": "tma_port_5 > 0.6",
@@ -1003,7 +1205,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+]Primary Branch and simple ALU)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_6 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_6 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_6",
"MetricThreshold": "tma_port_6 > 0.6",
@@ -1012,7 +1214,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 7 ([HSW+]simple Store-address)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_7 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_7 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_store_op_utilization_group",
"MetricName": "tma_port_7",
"MetricThreshold": "tma_port_7 > 0.6",
@@ -1022,7 +1224,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles the CPU performance was potentially limited due to Core computation issues (non divider-related)",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(CYCLE_ACTIVITY.STALLS_TOTAL + UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC - (UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC if tma_info_ipc > 1.8 else UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB - RESOURCE_STALLS.SB - CYCLE_ACTIVITY.STALLS_MEM_ANY) / tma_info_clks",
+ "MetricExpr": "(CYCLE_ACTIVITY.STALLS_TOTAL + UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC - (UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC if tma_info_thread_ipc > 1.8 else UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB - RESOURCE_STALLS.SB - CYCLE_ACTIVITY.STALLS_MEM_ANY) / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group",
"MetricName": "tma_ports_utilization",
"MetricThreshold": "tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
@@ -1031,7 +1233,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed no uops on any execution port (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,inv\\,cmask\\=1@ / 2 if #SMT_on else (CYCLE_ACTIVITY.STALLS_TOTAL - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0)) / tma_info_core_clks)",
+ "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,inv\\,cmask\\=1@ / 2 if #SMT_on else (CYCLE_ACTIVITY.STALLS_TOTAL - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0)) / tma_info_core_core_clks)",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_0",
"MetricThreshold": "tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1040,7 +1242,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where the CPU executed total of 1 uop per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) / 2 if #SMT_on else (UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC - UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC) / tma_info_core_clks)",
+ "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) / 2 if #SMT_on else (UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC - UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC) / tma_info_core_core_clks)",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issueL1;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_1",
"MetricThreshold": "tma_ports_utilized_1 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1049,7 +1251,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / 2 if #SMT_on else (UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC - UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC) / tma_info_core_clks)",
+ "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / 2 if #SMT_on else (UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC - UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC) / tma_info_core_core_clks)",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_2",
"MetricThreshold": "tma_ports_utilized_2 > 0.15 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1058,7 +1260,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 3 or more uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise).",
- "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC) / tma_info_core_clks",
+ "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_3m",
"MetricThreshold": "tma_ports_utilized_3m > 0.7 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1067,7 +1269,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from remote cache in other sockets including synchronizations issues",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(200 * (MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD))) + 180 * (MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD)))) / tma_info_clks",
+ "MetricExpr": "(200 * (MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD))) + 180 * (MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD)))) / tma_info_thread_clks",
"MetricGroup": "Offcore;Server;Snoop;TopdownL5;tma_L5_group;tma_issueSyncxn;tma_mem_latency_group",
"MetricName": "tma_remote_cache",
"MetricThreshold": "tma_remote_cache > 0.05 & (tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
@@ -1077,7 +1279,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from remote memory",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "310 * (MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD))) / tma_info_clks",
+ "MetricExpr": "310 * (MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD))) / tma_info_thread_clks",
"MetricGroup": "Server;Snoop;TopdownL5;tma_L5_group;tma_mem_latency_group",
"MetricName": "tma_remote_dram",
"MetricThreshold": "tma_remote_dram > 0.1 & (tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
@@ -1086,7 +1288,7 @@
},
{
"BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
- "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / tma_info_slots",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / tma_info_thread_slots",
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1",
@@ -1097,7 +1299,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles handling memory load split accesses - load that cross 64-byte cache line boundary",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "tma_info_load_miss_real_latency * LD_BLOCKS.NO_SR / tma_info_clks",
+ "MetricExpr": "tma_info_memory_load_miss_real_latency * LD_BLOCKS.NO_SR / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_split_loads",
"MetricThreshold": "tma_split_loads > 0.2 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -1106,7 +1308,7 @@
},
{
"BriefDescription": "This metric represents rate of split store accesses",
- "MetricExpr": "2 * MEM_UOPS_RETIRED.SPLIT_STORES / tma_info_core_clks",
+ "MetricExpr": "2 * MEM_UOPS_RETIRED.SPLIT_STORES / tma_info_core_core_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group",
"MetricName": "tma_split_stores",
"MetricThreshold": "tma_split_stores > 0.2 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -1115,16 +1317,16 @@
},
{
"BriefDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors)",
- "MetricExpr": "(OFFCORE_REQUESTS_BUFFER.SQ_FULL / 2 if #SMT_on else OFFCORE_REQUESTS_BUFFER.SQ_FULL) / tma_info_core_clks",
+ "MetricExpr": "(OFFCORE_REQUESTS_BUFFER.SQ_FULL / 2 if #SMT_on else OFFCORE_REQUESTS_BUFFER.SQ_FULL) / tma_info_core_core_clks",
"MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueBW;tma_l3_bound_group",
"MetricName": "tma_sq_full",
"MetricThreshold": "tma_sq_full > 0.3 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_fb_full, tma_info_dram_bw_use, tma_mem_bandwidth",
+ "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric estimates how often CPU was stalled due to RFO store memory accesses; RFO store issue a read-for-ownership request before the write",
- "MetricExpr": "RESOURCE_STALLS.SB / tma_info_clks",
+ "MetricExpr": "RESOURCE_STALLS.SB / tma_info_thread_clks",
"MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_store_bound",
"MetricThreshold": "tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -1133,7 +1335,7 @@
},
{
"BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores",
- "MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_clks",
+ "MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_store_fwd_blk",
"MetricThreshold": "tma_store_fwd_blk > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -1143,7 +1345,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles the CPU spent handling L1D store misses",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(L2_RQSTS.RFO_HIT * 9 * (1 - MEM_UOPS_RETIRED.LOCK_LOADS / MEM_UOPS_RETIRED.ALL_STORES) + (1 - MEM_UOPS_RETIRED.LOCK_LOADS / MEM_UOPS_RETIRED.ALL_STORES) * min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO)) / tma_info_clks",
+ "MetricExpr": "(L2_RQSTS.RFO_HIT * 9 * (1 - MEM_UOPS_RETIRED.LOCK_LOADS / MEM_UOPS_RETIRED.ALL_STORES) + (1 - MEM_UOPS_RETIRED.LOCK_LOADS / MEM_UOPS_RETIRED.ALL_STORES) * min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO)) / tma_info_thread_clks",
"MetricGroup": "MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_issueSL;tma_store_bound_group",
"MetricName": "tma_store_latency",
"MetricThreshold": "tma_store_latency > 0.1 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -1152,7 +1354,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Store operations",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_4 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_4 / tma_info_core_core_clks",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_store_op_utilization",
"MetricThreshold": "tma_store_op_utilization > 0.6",
@@ -1169,11 +1371,17 @@
},
{
"BriefDescription": "This metric serves as an approximation of legacy x87 usage",
- "MetricExpr": "INST_RETIRED.X87 * tma_info_uoppi / UOPS_RETIRED.RETIRE_SLOTS",
+ "MetricExpr": "INST_RETIRED.X87 * tma_info_thread_uoppi / UOPS_RETIRED.RETIRE_SLOTS",
"MetricGroup": "Compute;TopdownL4;tma_L4_group;tma_fp_arith_group",
"MetricName": "tma_x87_use",
"MetricThreshold": "tma_x87_use > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6)",
"PublicDescription": "This metric serves as an approximation of legacy x87 usage. It accounts for instructions beyond X87 FP arithmetic operations; hence may be used as a thermometer to avoid X87 high usage and preferably upgrade to modern ISA. See Tip under Tuning Hint.",
"ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Uncore operating frequency in GHz",
+ "MetricExpr": "UNC_C_CLOCKTICKS / (#num_cores / #num_packages * #num_packages) / 1e9 / duration_time",
+ "MetricName": "uncore_frequency",
+ "ScaleUnit": "1GHz"
}
]
diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/floating-point.json b/tools/perf/pmu-events/arch/x86/broadwellx/floating-point.json
index e4826dc7f797..986869252e71 100644
--- a/tools/perf/pmu-events/arch/x86/broadwellx/floating-point.json
+++ b/tools/perf/pmu-events/arch/x86/broadwellx/floating-point.json
@@ -32,6 +32,14 @@
"UMask": "0x20"
},
{
+ "BriefDescription": "Number of SSE/AVX computational 128-bit packed single and 256-bit packed double precision FP instructions retired; some instructions will count twice as noted below. Each count represents 2 or/and 4 computation operations, 1 for each element. Applies to SSE* and AVX* packed single precision and packed double precision FP instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB count twice as they perform 2 calculations per element.",
+ "EventCode": "0xc7",
+ "EventName": "FP_ARITH_INST_RETIRED.4_FLOPS",
+ "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision and 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 or/and 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point and packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x18"
+ },
+ {
"BriefDescription": "Number of SSE/AVX computational double precision floating-point instructions retired; some instructions will count twice as noted below. Applies to SSE* and AVX* scalar and packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.DOUBLE",
@@ -77,6 +85,13 @@
"UMask": "0x2a"
},
{
+ "BriefDescription": "Number of any Vector retired FP arithmetic instructions",
+ "EventCode": "0xc7",
+ "EventName": "FP_ARITH_INST_RETIRED.VECTOR",
+ "SampleAfterValue": "2000003",
+ "UMask": "0xfc"
+ },
+ {
"BriefDescription": "Cycles with any input/output SSE or FP assist",
"CounterMask": "1",
"EventCode": "0xCA",
diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/metricgroups.json b/tools/perf/pmu-events/arch/x86/broadwellx/metricgroups.json
new file mode 100644
index 000000000000..f6a0258e3241
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/broadwellx/metricgroups.json
@@ -0,0 +1,107 @@
+{
+ "Backend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Bad": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BadSpec": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BigFoot": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BrMispredicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Branches": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "CacheMisses": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Compute": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Cor": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "DSB": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "DSBmiss": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "DataSharing": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Fed": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FetchBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FetchLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Flops": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FpScalar": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FpVector": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Frontend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "HPC": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "IcMiss": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "InsType": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "L2Evicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "LSD": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MachineClears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryBound": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryTLB": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Memory_BW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Memory_Lat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MicroSeq": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "OS": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Offcore": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "PGO": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Pipeline": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "PortsUtil": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Power": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Ret": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Retire": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "SMT": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Server": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Snoop": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "SoC": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Summary": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TmaL1": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TmaL2": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TmaL3mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TopdownL1": "Metrics for top-down breakdown at level 1",
+ "TopdownL2": "Metrics for top-down breakdown at level 2",
+ "TopdownL3": "Metrics for top-down breakdown at level 3",
+ "TopdownL4": "Metrics for top-down breakdown at level 4",
+ "TopdownL5": "Metrics for top-down breakdown at level 5",
+ "TopdownL6": "Metrics for top-down breakdown at level 6",
+ "tma_L1_group": "Metrics for top-down breakdown at level 1",
+ "tma_L2_group": "Metrics for top-down breakdown at level 2",
+ "tma_L3_group": "Metrics for top-down breakdown at level 3",
+ "tma_L4_group": "Metrics for top-down breakdown at level 4",
+ "tma_L5_group": "Metrics for top-down breakdown at level 5",
+ "tma_L6_group": "Metrics for top-down breakdown at level 6",
+ "tma_alu_op_utilization_group": "Metrics contributing to tma_alu_op_utilization category",
+ "tma_backend_bound_group": "Metrics contributing to tma_backend_bound category",
+ "tma_bad_speculation_group": "Metrics contributing to tma_bad_speculation category",
+ "tma_branch_resteers_group": "Metrics contributing to tma_branch_resteers category",
+ "tma_core_bound_group": "Metrics contributing to tma_core_bound category",
+ "tma_dram_bound_group": "Metrics contributing to tma_dram_bound category",
+ "tma_dtlb_load_group": "Metrics contributing to tma_dtlb_load category",
+ "tma_dtlb_store_group": "Metrics contributing to tma_dtlb_store category",
+ "tma_fetch_bandwidth_group": "Metrics contributing to tma_fetch_bandwidth category",
+ "tma_fetch_latency_group": "Metrics contributing to tma_fetch_latency category",
+ "tma_fp_arith_group": "Metrics contributing to tma_fp_arith category",
+ "tma_fp_vector_group": "Metrics contributing to tma_fp_vector category",
+ "tma_frontend_bound_group": "Metrics contributing to tma_frontend_bound category",
+ "tma_heavy_operations_group": "Metrics contributing to tma_heavy_operations category",
+ "tma_issue2P": "Metrics related by the issue $issue2P",
+ "tma_issueBM": "Metrics related by the issue $issueBM",
+ "tma_issueBW": "Metrics related by the issue $issueBW",
+ "tma_issueFB": "Metrics related by the issue $issueFB",
+ "tma_issueL1": "Metrics related by the issue $issueL1",
+ "tma_issueLat": "Metrics related by the issue $issueLat",
+ "tma_issueMC": "Metrics related by the issue $issueMC",
+ "tma_issueMS": "Metrics related by the issue $issueMS",
+ "tma_issueMV": "Metrics related by the issue $issueMV",
+ "tma_issueRFO": "Metrics related by the issue $issueRFO",
+ "tma_issueSL": "Metrics related by the issue $issueSL",
+ "tma_issueSO": "Metrics related by the issue $issueSO",
+ "tma_issueSmSt": "Metrics related by the issue $issueSmSt",
+ "tma_issueSpSt": "Metrics related by the issue $issueSpSt",
+ "tma_issueSyncxn": "Metrics related by the issue $issueSyncxn",
+ "tma_issueTLB": "Metrics related by the issue $issueTLB",
+ "tma_l1_bound_group": "Metrics contributing to tma_l1_bound category",
+ "tma_l3_bound_group": "Metrics contributing to tma_l3_bound category",
+ "tma_light_operations_group": "Metrics contributing to tma_light_operations category",
+ "tma_load_op_utilization_group": "Metrics contributing to tma_load_op_utilization category",
+ "tma_mem_latency_group": "Metrics contributing to tma_mem_latency category",
+ "tma_memory_bound_group": "Metrics contributing to tma_memory_bound category",
+ "tma_microcode_sequencer_group": "Metrics contributing to tma_microcode_sequencer category",
+ "tma_ports_utilization_group": "Metrics contributing to tma_ports_utilization category",
+ "tma_ports_utilized_0_group": "Metrics contributing to tma_ports_utilized_0 category",
+ "tma_ports_utilized_3m_group": "Metrics contributing to tma_ports_utilized_3m category",
+ "tma_retiring_group": "Metrics contributing to tma_retiring category",
+ "tma_serializing_operation_group": "Metrics contributing to tma_serializing_operation category",
+ "tma_store_bound_group": "Metrics contributing to tma_store_bound category",
+ "tma_store_op_utilization_group": "Metrics contributing to tma_store_op_utilization category"
+}
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
index 875c766222e3..fbb111e40829 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
@@ -50,11 +50,238 @@
},
{
"BriefDescription": "Uncore frequency per die [GHZ]",
- "MetricExpr": "tma_info_socket_clks / #num_dies / duration_time / 1e9",
+ "MetricExpr": "tma_info_system_socket_clks / #num_dies / duration_time / 1e9",
"MetricGroup": "SoC",
"MetricName": "UNCORE_FREQ"
},
{
+ "BriefDescription": "Cycles per instruction retired; indicating how much time each executed instruction took; in units of cycles.",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD / INST_RETIRED.ANY",
+ "MetricName": "cpi",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "CPU operating frequency (in GHz)",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC * #SYSTEM_TSC_FREQ / 1e9",
+ "MetricName": "cpu_operating_frequency",
+ "ScaleUnit": "1GHz"
+ },
+ {
+ "BriefDescription": "Percentage of time spent in the active CPU power state C0",
+ "MetricExpr": "tma_info_system_cpu_utilization",
+ "MetricName": "cpu_utilization",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Ratio of number of completed page walks (for 2 megabyte page sizes) caused by demand data loads to the total number of completed instructions",
+ "MetricExpr": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M / INST_RETIRED.ANY",
+ "MetricName": "dtlb_2mb_large_page_load_mpi",
+ "PublicDescription": "Ratio of number of completed page walks (for 2 megabyte page sizes) caused by demand data loads to the total number of completed instructions. This implies it missed in the Data Translation Lookaside Buffer (DTLB) and further levels of TLB.",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of completed page walks (for all page sizes) caused by demand data loads to the total number of completed instructions",
+ "MetricExpr": "DTLB_LOAD_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
+ "MetricName": "dtlb_load_mpi",
+ "PublicDescription": "Ratio of number of completed page walks (for all page sizes) caused by demand data loads to the total number of completed instructions. This implies it missed in the DTLB and further levels of TLB.",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of completed page walks (for all page sizes) caused by demand data stores to the total number of completed instructions",
+ "MetricExpr": "DTLB_STORE_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
+ "MetricName": "dtlb_store_mpi",
+ "PublicDescription": "Ratio of number of completed page walks (for all page sizes) caused by demand data stores to the total number of completed instructions. This implies it missed in the DTLB and further levels of TLB.",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Bandwidth of IO reads that are initiated by end device controllers that are requesting memory from the CPU.",
+ "MetricExpr": "(UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3) * 4 / 1e6 / duration_time",
+ "MetricName": "io_bandwidth_read",
+ "ScaleUnit": "1MB/s"
+ },
+ {
+ "BriefDescription": "Bandwidth of IO writes that are initiated by end device controllers that are writing memory to the CPU.",
+ "MetricExpr": "(UNC_IIO_PAYLOAD_BYTES_IN.MEM_WRITE.PART0 + UNC_IIO_PAYLOAD_BYTES_IN.MEM_WRITE.PART1 + UNC_IIO_PAYLOAD_BYTES_IN.MEM_WRITE.PART2 + UNC_IIO_PAYLOAD_BYTES_IN.MEM_WRITE.PART3) * 4 / 1e6 / duration_time",
+ "MetricName": "io_bandwidth_write",
+ "ScaleUnit": "1MB/s"
+ },
+ {
+ "BriefDescription": "Ratio of number of completed page walks (for 2 megabyte and 4 megabyte page sizes) caused by a code fetch to the total number of completed instructions",
+ "MetricExpr": "ITLB_MISSES.WALK_COMPLETED_2M_4M / INST_RETIRED.ANY",
+ "MetricName": "itlb_large_page_mpi",
+ "PublicDescription": "Ratio of number of completed page walks (for 2 megabyte and 4 megabyte page sizes) caused by a code fetch to the total number of completed instructions. This implies it missed in the Instruction Translation Lookaside Buffer (ITLB) and further levels of TLB.",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of completed page walks (for all page sizes) caused by a code fetch to the total number of completed instructions",
+ "MetricExpr": "ITLB_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
+ "MetricName": "itlb_mpi",
+ "PublicDescription": "Ratio of number of completed page walks (for all page sizes) caused by a code fetch to the total number of completed instructions. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB.",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of code read requests missing in L1 instruction cache (includes prefetches) to the total number of completed instructions",
+ "MetricExpr": "L2_RQSTS.ALL_CODE_RD / INST_RETIRED.ANY",
+ "MetricName": "l1_i_code_read_misses_with_prefetches_per_instr",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of demand load requests hitting in L1 data cache to the total number of completed instructions",
+ "MetricExpr": "MEM_LOAD_RETIRED.L1_HIT / INST_RETIRED.ANY",
+ "MetricName": "l1d_demand_data_read_hits_per_instr",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of requests missing L1 data cache (includes data+rfo w/ prefetches) to the total number of completed instructions",
+ "MetricExpr": "L1D.REPLACEMENT / INST_RETIRED.ANY",
+ "MetricName": "l1d_mpi",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of code read request missing L2 cache to the total number of completed instructions",
+ "MetricExpr": "L2_RQSTS.CODE_RD_MISS / INST_RETIRED.ANY",
+ "MetricName": "l2_demand_code_mpi",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of completed demand load requests hitting in L2 cache to the total number of completed instructions",
+ "MetricExpr": "MEM_LOAD_RETIRED.L2_HIT / INST_RETIRED.ANY",
+ "MetricName": "l2_demand_data_read_hits_per_instr",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of completed data read request missing L2 cache to the total number of completed instructions",
+ "MetricExpr": "MEM_LOAD_RETIRED.L2_MISS / INST_RETIRED.ANY",
+ "MetricName": "l2_demand_data_read_mpi",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of requests missing L2 cache (includes code+data+rfo w/ prefetches) to the total number of completed instructions",
+ "MetricExpr": "L2_LINES_IN.ALL / INST_RETIRED.ANY",
+ "MetricName": "l2_mpi",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of code read requests missing last level core cache (includes demand w/ prefetches) to the total number of completed instructions",
+ "MetricExpr": "cha@UNC_CHA_TOR_INSERTS.IA_MISS\\,config1\\=0x12CC0233@ / INST_RETIRED.ANY",
+ "MetricName": "llc_code_read_mpi_demand_plus_prefetch",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Average latency of a last level cache (LLC) demand and prefetch data read miss (read memory access) in nano seconds",
+ "MetricExpr": "1e9 * (cha@UNC_CHA_TOR_OCCUPANCY.IA_MISS\\,config1\\=0x40433@ / cha@UNC_CHA_TOR_INSERTS.IA_MISS\\,config1\\=0x40433@) / (UNC_CHA_CLOCKTICKS / (source_count(UNC_CHA_CLOCKTICKS) * #num_packages)) * duration_time",
+ "MetricName": "llc_data_read_demand_plus_prefetch_miss_latency",
+ "ScaleUnit": "1ns"
+ },
+ {
+ "BriefDescription": "Average latency of a last level cache (LLC) demand and prefetch data read miss (read memory access) addressed to local memory in nano seconds",
+ "MetricExpr": "1e9 * (cha@UNC_CHA_TOR_OCCUPANCY.IA_MISS\\,config1\\=0x40432@ / cha@UNC_CHA_TOR_INSERTS.IA_MISS\\,config1\\=0x40432@) / (UNC_CHA_CLOCKTICKS / (source_count(UNC_CHA_CLOCKTICKS) * #num_packages)) * duration_time",
+ "MetricName": "llc_data_read_demand_plus_prefetch_miss_latency_for_local_requests",
+ "ScaleUnit": "1ns"
+ },
+ {
+ "BriefDescription": "Average latency of a last level cache (LLC) demand and prefetch data read miss (read memory access) addressed to remote memory in nano seconds",
+ "MetricExpr": "1e9 * (cha@UNC_CHA_TOR_OCCUPANCY.IA_MISS\\,config1\\=0x40431@ / cha@UNC_CHA_TOR_INSERTS.IA_MISS\\,config1\\=0x40431@) / (UNC_CHA_CLOCKTICKS / (source_count(UNC_CHA_CLOCKTICKS) * #num_packages)) * duration_time",
+ "MetricName": "llc_data_read_demand_plus_prefetch_miss_latency_for_remote_requests",
+ "ScaleUnit": "1ns"
+ },
+ {
+ "BriefDescription": "Ratio of number of data read requests missing last level core cache (includes demand w/ prefetches) to the total number of completed instructions",
+ "MetricExpr": "cha@UNC_CHA_TOR_INSERTS.IA_MISS\\,config1\\=0x12D40433@ / INST_RETIRED.ANY",
+ "MetricName": "llc_data_read_mpi_demand_plus_prefetch",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Bandwidth (MB/sec) of read requests that miss the last level cache (LLC) and go to local memory.",
+ "MetricExpr": "UNC_CHA_REQUESTS.READS_LOCAL * 64 / 1e6 / duration_time",
+ "MetricName": "llc_miss_local_memory_bandwidth_read",
+ "ScaleUnit": "1MB/s"
+ },
+ {
+ "BriefDescription": "Bandwidth (MB/sec) of write requests that miss the last level cache (LLC) and go to local memory.",
+ "MetricExpr": "UNC_CHA_REQUESTS.WRITES_LOCAL * 64 / 1e6 / duration_time",
+ "MetricName": "llc_miss_local_memory_bandwidth_write",
+ "ScaleUnit": "1MB/s"
+ },
+ {
+ "BriefDescription": "Bandwidth (MB/sec) of read requests that miss the last level cache (LLC) and go to remote memory.",
+ "MetricExpr": "UNC_CHA_REQUESTS.READS_REMOTE * 64 / 1e6 / duration_time",
+ "MetricName": "llc_miss_remote_memory_bandwidth_read",
+ "ScaleUnit": "1MB/s"
+ },
+ {
+ "BriefDescription": "The ratio of number of completed memory load instructions to the total number completed instructions",
+ "MetricExpr": "MEM_INST_RETIRED.ALL_LOADS / INST_RETIRED.ANY",
+ "MetricName": "loads_per_instr",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "DDR memory read bandwidth (MB/sec)",
+ "MetricExpr": "UNC_M_CAS_COUNT.RD * 64 / 1e6 / duration_time",
+ "MetricName": "memory_bandwidth_read",
+ "ScaleUnit": "1MB/s"
+ },
+ {
+ "BriefDescription": "DDR memory bandwidth (MB/sec)",
+ "MetricExpr": "(UNC_M_CAS_COUNT.RD + UNC_M_CAS_COUNT.WR) * 64 / 1e6 / duration_time",
+ "MetricName": "memory_bandwidth_total",
+ "ScaleUnit": "1MB/s"
+ },
+ {
+ "BriefDescription": "DDR memory write bandwidth (MB/sec)",
+ "MetricExpr": "UNC_M_CAS_COUNT.WR * 64 / 1e6 / duration_time",
+ "MetricName": "memory_bandwidth_write",
+ "ScaleUnit": "1MB/s"
+ },
+ {
+ "BriefDescription": "Memory read that miss the last level cache (LLC) addressed to local DRAM as a percentage of total memory read accesses, does not include LLC prefetches.",
+ "MetricExpr": "cha@UNC_CHA_TOR_INSERTS.IA_MISS\\,config1\\=0x40432@ / (cha@UNC_CHA_TOR_INSERTS.IA_MISS\\,config1\\=0x40432@ + cha@UNC_CHA_TOR_INSERTS.IA_MISS\\,config1\\=0x40431@)",
+ "MetricName": "numa_reads_addressed_to_local_dram",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Memory reads that miss the last level cache (LLC) addressed to remote DRAM as a percentage of total memory read accesses, does not include LLC prefetches.",
+ "MetricExpr": "cha@UNC_CHA_TOR_INSERTS.IA_MISS\\,config1\\=0x40431@ / (cha@UNC_CHA_TOR_INSERTS.IA_MISS\\,config1\\=0x40432@ + cha@UNC_CHA_TOR_INSERTS.IA_MISS\\,config1\\=0x40431@)",
+ "MetricName": "numa_reads_addressed_to_remote_dram",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Uops delivered from decoded instruction cache (decoded stream buffer or DSB) as a percent of total uops delivered to Instruction Decode Queue",
+ "MetricExpr": "IDQ.DSB_UOPS / (IDQ.DSB_UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS + LSD.UOPS)",
+ "MetricName": "percent_uops_delivered_from_decoded_icache",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Uops delivered from legacy decode pipeline (Micro-instruction Translation Engine or MITE) as a percent of total uops delivered to Instruction Decode Queue",
+ "MetricExpr": "IDQ.MITE_UOPS / (IDQ.DSB_UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS + LSD.UOPS)",
+ "MetricName": "percent_uops_delivered_from_legacy_decode_pipeline",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Uops delivered from microcode sequencer (MS) as a percent of total uops delivered to Instruction Decode Queue",
+ "MetricExpr": "IDQ.MS_UOPS / (IDQ.DSB_UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS + LSD.UOPS)",
+ "MetricName": "percent_uops_delivered_from_microcode_sequencer",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Intel(R) Optane(TM) Persistent Memory(PMEM) memory read bandwidth (MB/sec)",
+ "MetricExpr": "UNC_M_PMM_RPQ_INSERTS * 64 / 1e6 / duration_time",
+ "MetricName": "pmem_memory_bandwidth_read",
+ "ScaleUnit": "1MB/s"
+ },
+ {
+ "BriefDescription": "Intel(R) Optane(TM) Persistent Memory(PMEM) memory bandwidth (MB/sec)",
+ "MetricExpr": "(UNC_M_PMM_RPQ_INSERTS + UNC_M_PMM_WPQ_INSERTS) * 64 / 1e6 / duration_time",
+ "MetricName": "pmem_memory_bandwidth_total",
+ "ScaleUnit": "1MB/s"
+ },
+ {
+ "BriefDescription": "Intel(R) Optane(TM) Persistent Memory(PMEM) memory write bandwidth (MB/sec)",
+ "MetricExpr": "UNC_M_PMM_WPQ_INSERTS * 64 / 1e6 / duration_time",
+ "MetricName": "pmem_memory_bandwidth_write",
+ "ScaleUnit": "1MB/s"
+ },
+ {
"BriefDescription": "Percentage of cycles spent in System Management Interrupts.",
"MetricExpr": "((msr@aperf@ - cycles) / msr@aperf@ if msr@smi@ > 0 else 0)",
"MetricGroup": "smi",
@@ -70,8 +297,14 @@
"ScaleUnit": "1SMI#"
},
{
+ "BriefDescription": "The ratio of number of completed memory store instructions to the total number completed instructions",
+ "MetricExpr": "MEM_INST_RETIRED.ALL_STORES / INST_RETIRED.ANY",
+ "MetricName": "stores_per_instr",
+ "ScaleUnit": "1per_instr"
+ },
+ {
"BriefDescription": "This metric estimates how often memory load accesses were aliased by preceding stores (in program order) with a 4K address offset",
- "MetricExpr": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS / tma_info_clks",
+ "MetricExpr": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_4k_aliasing",
"MetricThreshold": "tma_4k_aliasing > 0.2 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -80,7 +313,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution ports for ALU operations.",
- "MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_0 + UOPS_DISPATCHED_PORT.PORT_1 + UOPS_DISPATCHED_PORT.PORT_5 + UOPS_DISPATCHED_PORT.PORT_6) / tma_info_slots",
+ "MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_0 + UOPS_DISPATCHED_PORT.PORT_1 + UOPS_DISPATCHED_PORT.PORT_5 + UOPS_DISPATCHED_PORT.PORT_6) / tma_info_thread_slots",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_alu_op_utilization",
"MetricThreshold": "tma_alu_op_utilization > 0.6",
@@ -88,7 +321,7 @@
},
{
"BriefDescription": "This metric estimates fraction of slots the CPU retired uops delivered by the Microcode_Sequencer as a result of Assists",
- "MetricExpr": "100 * (FP_ASSIST.ANY + OTHER_ASSISTS.ANY) / tma_info_slots",
+ "MetricExpr": "100 * (FP_ASSIST.ANY + OTHER_ASSISTS.ANY) / tma_info_thread_slots",
"MetricGroup": "TopdownL4;tma_L4_group;tma_microcode_sequencer_group",
"MetricName": "tma_assists",
"MetricThreshold": "tma_assists > 0.1 & (tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1)",
@@ -97,7 +330,7 @@
},
{
"BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
- "MetricExpr": "1 - tma_frontend_bound - (UOPS_ISSUED.ANY + 4 * (INT_MISC.RECOVERY_CYCLES_ANY / 2 if #SMT_on else INT_MISC.RECOVERY_CYCLES)) / tma_info_slots",
+ "MetricExpr": "1 - tma_frontend_bound - (UOPS_ISSUED.ANY + 4 * (INT_MISC.RECOVERY_CYCLES_ANY / 2 if #SMT_on else INT_MISC.RECOVERY_CYCLES)) / tma_info_thread_slots",
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
"MetricThreshold": "tma_backend_bound > 0.2",
@@ -107,7 +340,7 @@
},
{
"BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations",
- "MetricExpr": "(UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (INT_MISC.RECOVERY_CYCLES_ANY / 2 if #SMT_on else INT_MISC.RECOVERY_CYCLES)) / tma_info_slots",
+ "MetricExpr": "(UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (INT_MISC.RECOVERY_CYCLES_ANY / 2 if #SMT_on else INT_MISC.RECOVERY_CYCLES)) / tma_info_thread_slots",
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_bad_speculation",
"MetricThreshold": "tma_bad_speculation > 0.15",
@@ -123,12 +356,12 @@
"MetricName": "tma_branch_mispredicts",
"MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15",
"MetricgroupNoGroup": "TopdownL2",
- "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES. Related metrics: tma_info_branch_misprediction_cost, tma_info_mispredictions, tma_mispredicts_resteers",
+ "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES. Related metrics: tma_info_bad_spec_branch_misprediction_cost, tma_info_bottleneck_mispredictions, tma_mispredicts_resteers",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers",
- "MetricExpr": "INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_clks + tma_unknown_branches",
+ "MetricExpr": "INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_thread_clks + tma_unknown_branches",
"MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_branch_resteers",
"MetricThreshold": "tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -146,7 +379,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers as a result of Machine Clears",
- "MetricExpr": "(1 - BR_MISP_RETIRED.ALL_BRANCHES / (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT)) * INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_clks",
+ "MetricExpr": "(1 - BR_MISP_RETIRED.ALL_BRANCHES / (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT)) * INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_thread_clks",
"MetricGroup": "BadSpec;MachineClears;TopdownL4;tma_L4_group;tma_branch_resteers_group;tma_issueMC",
"MetricName": "tma_clears_resteers",
"MetricThreshold": "tma_clears_resteers > 0.05 & (tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15))",
@@ -156,7 +389,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to contested accesses",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(44 * tma_info_average_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM * (OCR.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE / (OCR.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE + OCR.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD))) + 44 * tma_info_average_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_clks",
+ "MetricExpr": "(44 * tma_info_system_average_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM * (OCR.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE / (OCR.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE + OCR.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD))) + 44 * tma_info_system_average_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
"MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
"MetricName": "tma_contested_accesses",
"MetricThreshold": "tma_contested_accesses > 0.05 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -177,7 +410,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to data-sharing accesses",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "44 * tma_info_average_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM * (1 - OCR.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE / (OCR.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE + OCR.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD))) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_clks",
+ "MetricExpr": "44 * tma_info_system_average_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM * (1 - OCR.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE / (OCR.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE + OCR.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD))) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
"MetricGroup": "Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
"MetricName": "tma_data_sharing",
"MetricThreshold": "tma_data_sharing > 0.05 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -186,16 +419,16 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where decoder-0 was the only active decoder",
- "MetricExpr": "(cpu@INST_DECODED.DECODERS\\,cmask\\=1@ - cpu@INST_DECODED.DECODERS\\,cmask\\=2@) / tma_info_core_clks / 2",
+ "MetricExpr": "(cpu@INST_DECODED.DECODERS\\,cmask\\=1@ - cpu@INST_DECODED.DECODERS\\,cmask\\=2@) / tma_info_core_core_clks / 2",
"MetricGroup": "DSBmiss;FetchBW;TopdownL4;tma_L4_group;tma_issueD0;tma_mite_group",
"MetricName": "tma_decoder0_alone",
- "MetricThreshold": "tma_decoder0_alone > 0.1 & (tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35))",
+ "MetricThreshold": "tma_decoder0_alone > 0.1 & (tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35))",
"PublicDescription": "This metric represents fraction of cycles where decoder-0 was the only active decoder. Related metrics: tma_few_uops_instructions",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of cycles where the Divider unit was active",
- "MetricExpr": "ARITH.DIVIDER_ACTIVE / tma_info_clks",
+ "MetricExpr": "ARITH.DIVIDER_ACTIVE / tma_info_thread_clks",
"MetricGroup": "TopdownL3;tma_L3_group;tma_core_bound_group",
"MetricName": "tma_divider",
"MetricThreshold": "tma_divider > 0.2 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
@@ -205,7 +438,7 @@
{
"BriefDescription": "This metric estimates how often the CPU was stalled on accesses to external memory (DRAM) by loads",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_clks - tma_l2_bound - tma_pmm_bound if #has_pmem > 0 else CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_clks - tma_l2_bound)",
+ "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks - tma_l2_bound - tma_pmm_bound if #has_pmem > 0 else CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks - tma_l2_bound)",
"MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_dram_bound",
"MetricThreshold": "tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -214,45 +447,45 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline",
- "MetricExpr": "(IDQ.ALL_DSB_CYCLES_ANY_UOPS - IDQ.ALL_DSB_CYCLES_4_UOPS) / tma_info_core_clks / 2",
+ "MetricExpr": "(IDQ.ALL_DSB_CYCLES_ANY_UOPS - IDQ.ALL_DSB_CYCLES_4_UOPS) / tma_info_core_core_clks / 2",
"MetricGroup": "DSB;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_dsb",
- "MetricThreshold": "tma_dsb > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35)",
+ "MetricThreshold": "tma_dsb > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35)",
"PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline. For example; inefficient utilization of the DSB cache structure or bank conflict when reading from it; are categorized here.",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines",
- "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / tma_info_clks",
+ "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / tma_info_thread_clks",
"MetricGroup": "DSBmiss;FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB",
"MetricName": "tma_dsb_switches",
"MetricThreshold": "tma_dsb_switches > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
- "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines. The DSB (decoded i-cache) is a Uop Cache where the front-end directly delivers Uops (micro operations) avoiding heavy x86 decoding. The DSB pipeline has shorter latency and delivered higher bandwidth than the MITE (legacy instruction decode pipeline). Switching between the two pipelines can cause penalties hence this metric measures the exposed penalty. Sample with: FRONTEND_RETIRED.DSB_MISS_PS. Related metrics: tma_fetch_bandwidth, tma_info_dsb_coverage, tma_info_dsb_misses, tma_info_iptb, tma_lcp",
+ "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines. The DSB (decoded i-cache) is a Uop Cache where the front-end directly delivers Uops (micro operations) avoiding heavy x86 decoding. The DSB pipeline has shorter latency and delivered higher bandwidth than the MITE (legacy instruction decode pipeline). Switching between the two pipelines can cause penalties hence this metric measures the exposed penalty. Sample with: FRONTEND_RETIRED.DSB_MISS_PS. Related metrics: tma_fetch_bandwidth, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses",
"MetricConstraint": "NO_GROUP_EVENTS_NMI",
- "MetricExpr": "min(9 * cpu@DTLB_LOAD_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_LOAD_MISSES.WALK_ACTIVE, max(CYCLE_ACTIVITY.CYCLES_MEM_ANY - CYCLE_ACTIVITY.CYCLES_L1D_MISS, 0)) / tma_info_clks",
+ "MetricExpr": "min(9 * cpu@DTLB_LOAD_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_LOAD_MISSES.WALK_ACTIVE, max(CYCLE_ACTIVITY.CYCLES_MEM_ANY - CYCLE_ACTIVITY.CYCLES_L1D_MISS, 0)) / tma_info_thread_clks",
"MetricGroup": "MemoryTLB;TopdownL4;tma_L4_group;tma_issueTLB;tma_l1_bound_group",
"MetricName": "tma_dtlb_load",
"MetricThreshold": "tma_dtlb_load > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses. TLBs (Translation Look-aside Buffers) are processor caches for recently used entries out of the Page Tables that are used to map virtual- to physical-addresses by the operating system. This metric approximates the potential delay of demand loads missing the first-level data TLB (assuming worst case scenario with back to back misses to different pages). This includes hitting in the second-level TLB (STLB) as well as performing a hardware page walk on an STLB miss. Sample with: MEM_INST_RETIRED.STLB_MISS_LOADS_PS. Related metrics: tma_dtlb_store, tma_info_memory_data_tlbs",
+ "PublicDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses. TLBs (Translation Look-aside Buffers) are processor caches for recently used entries out of the Page Tables that are used to map virtual- to physical-addresses by the operating system. This metric approximates the potential delay of demand loads missing the first-level data TLB (assuming worst case scenario with back to back misses to different pages). This includes hitting in the second-level TLB (STLB) as well as performing a hardware page walk on an STLB miss. Sample with: MEM_INST_RETIRED.STLB_MISS_LOADS_PS. Related metrics: tma_dtlb_store, tma_info_bottleneck_memory_data_tlbs",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses",
- "MetricExpr": "(9 * cpu@DTLB_STORE_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_STORE_MISSES.WALK_ACTIVE) / tma_info_core_clks",
+ "MetricExpr": "(9 * cpu@DTLB_STORE_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_STORE_MISSES.WALK_ACTIVE) / tma_info_core_core_clks",
"MetricGroup": "MemoryTLB;TopdownL4;tma_L4_group;tma_issueTLB;tma_store_bound_group",
"MetricName": "tma_dtlb_store",
"MetricThreshold": "tma_dtlb_store > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses. As with ordinary data caching; focus on improving data locality and reducing working-set size to reduce DTLB overhead. Additionally; consider using profile-guided optimization (PGO) to collocate frequently-used data on the same page. Try using larger page sizes for large amounts of frequently-used data. Sample with: MEM_INST_RETIRED.STLB_MISS_STORES_PS. Related metrics: tma_dtlb_load, tma_info_memory_data_tlbs",
+ "PublicDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses. As with ordinary data caching; focus on improving data locality and reducing working-set size to reduce DTLB overhead. Additionally; consider using profile-guided optimization (PGO) to collocate frequently-used data on the same page. Try using larger page sizes for large amounts of frequently-used data. Sample with: MEM_INST_RETIRED.STLB_MISS_STORES_PS. Related metrics: tma_dtlb_load, tma_info_bottleneck_memory_data_tlbs",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric roughly estimates how often CPU was handling synchronizations due to False Sharing",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(110 * tma_info_average_frequency * (OCR.DEMAND_RFO.L3_MISS.REMOTE_HITM + OCR.PF_L2_RFO.L3_MISS.REMOTE_HITM) + 47.5 * tma_info_average_frequency * (OCR.DEMAND_RFO.L3_HIT.HITM_OTHER_CORE + OCR.PF_L2_RFO.L3_HIT.HITM_OTHER_CORE)) / tma_info_clks",
+ "MetricExpr": "(110 * tma_info_system_average_frequency * (OCR.DEMAND_RFO.L3_MISS.REMOTE_HITM + OCR.PF_L2_RFO.L3_MISS.REMOTE_HITM) + 47.5 * tma_info_system_average_frequency * (OCR.DEMAND_RFO.L3_HIT.HITM_OTHER_CORE + OCR.PF_L2_RFO.L3_HIT.HITM_OTHER_CORE)) / tma_info_thread_clks",
"MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_store_bound_group",
"MetricName": "tma_false_sharing",
"MetricThreshold": "tma_false_sharing > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -262,11 +495,11 @@
{
"BriefDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed",
"MetricConstraint": "NO_GROUP_EVENTS_NMI",
- "MetricExpr": "tma_info_load_miss_real_latency * cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ / tma_info_clks",
+ "MetricExpr": "tma_info_memory_load_miss_real_latency * cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ / tma_info_thread_clks",
"MetricGroup": "MemoryBW;TopdownL4;tma_L4_group;tma_issueBW;tma_issueSL;tma_issueSmSt;tma_l1_bound_group",
"MetricName": "tma_fb_full",
"MetricThreshold": "tma_fb_full > 0.3",
- "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_info_dram_bw_use, tma_info_memory_bandwidth, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
+ "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_info_bottleneck_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
"ScaleUnit": "100%"
},
{
@@ -274,14 +507,14 @@
"MetricExpr": "tma_frontend_bound - tma_fetch_latency",
"MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB",
"MetricName": "tma_fetch_bandwidth",
- "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35",
+ "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35",
"MetricgroupNoGroup": "TopdownL2",
- "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Sample with: FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_2_PS. Related metrics: tma_dsb_switches, tma_info_dsb_coverage, tma_info_dsb_misses, tma_info_iptb, tma_lcp",
+ "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Sample with: FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_2_PS. Related metrics: tma_dsb_switches, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues",
- "MetricExpr": "4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / tma_info_slots",
+ "MetricExpr": "4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / tma_info_thread_slots",
"MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group",
"MetricName": "tma_fetch_latency",
"MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15",
@@ -356,7 +589,7 @@
},
{
"BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend",
- "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / tma_info_slots",
+ "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / tma_info_thread_slots",
"MetricGroup": "PGO;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_frontend_bound",
"MetricThreshold": "tma_frontend_bound > 0.15",
@@ -375,7 +608,7 @@
},
{
"BriefDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences",
- "MetricExpr": "(UOPS_RETIRED.RETIRE_SLOTS + UOPS_RETIRED.MACRO_FUSED - INST_RETIRED.ANY) / tma_info_slots",
+ "MetricExpr": "(UOPS_RETIRED.RETIRE_SLOTS + UOPS_RETIRED.MACRO_FUSED - INST_RETIRED.ANY) / tma_info_thread_slots",
"MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_heavy_operations",
"MetricThreshold": "tma_heavy_operations > 0.1",
@@ -385,7 +618,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to instruction cache misses",
- "MetricExpr": "(ICACHE_16B.IFDATA_STALL + 2 * cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=1\\,edge@) / tma_info_clks",
+ "MetricExpr": "(ICACHE_16B.IFDATA_STALL + 2 * cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=1\\,edge@) / tma_info_thread_clks",
"MetricGroup": "BigFoot;FetchLat;IcMiss;TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_icache_misses",
"MetricThreshold": "tma_icache_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -393,705 +626,711 @@
"ScaleUnit": "100%"
},
{
- "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
- "MetricExpr": "tma_info_turbo_utilization * TSC / 1e9 / duration_time",
- "MetricGroup": "Power;Summary",
- "MetricName": "tma_info_average_frequency"
+ "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
+ "MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_thread_slots / BR_MISP_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;BrMispredicts;tma_issueBM",
+ "MetricName": "tma_info_bad_spec_branch_misprediction_cost",
+ "PublicDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear). Related metrics: tma_branch_mispredicts, tma_info_bottleneck_mispredictions, tma_mispredicts_resteers"
+ },
+ {
+ "BriefDescription": "Instructions per retired mispredicts for indirect CALL or JMP branches (lower number means higher occurrence rate).",
+ "MetricExpr": "tma_info_inst_mix_instructions / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * cpu@BR_MISP_EXEC.ALL_BRANCHES\\,umask\\=0xE4@)",
+ "MetricGroup": "Bad;BrMispredicts",
+ "MetricName": "tma_info_bad_spec_ipmisp_indirect",
+ "MetricThreshold": "tma_info_bad_spec_ipmisp_indirect < 1e3"
+ },
+ {
+ "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear) (lower number means higher occurrence rate)",
+ "MetricExpr": "tma_info_core_ipmispredict",
+ "MetricGroup": "Bad;BadSpec;BrMispredicts",
+ "MetricName": "tma_info_bad_spec_ipmispredict",
+ "MetricThreshold": "tma_info_bad_spec_ipmispredict < 200"
+ },
+ {
+ "BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "(100 * (1 - tma_core_bound / tma_ports_utilization if tma_core_bound < tma_ports_utilization else 1) if tma_info_system_smt_2t_utilization > 0.5 else 0)",
+ "MetricGroup": "Cor;SMT",
+ "MetricName": "tma_info_botlnk_l0_core_bound_likely",
+ "MetricThreshold": "tma_info_botlnk_l0_core_bound_likely > 0.5"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_fetch_latency * tma_dsb_switches / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_mite / (tma_dsb + tma_mite))",
+ "MetricGroup": "DSBmiss;Fed;tma_issueFB",
+ "MetricName": "tma_info_botlnk_l2_dsb_misses",
+ "MetricThreshold": "tma_info_botlnk_l2_dsb_misses > 10",
+ "PublicDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck",
+ "MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
+ "MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL",
+ "MetricName": "tma_info_botlnk_l2_ic_misses",
+ "MetricThreshold": "tma_info_botlnk_l2_ic_misses > 5",
+ "PublicDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck. Related metrics: "
},
{
"BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)",
"MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * tma_fetch_latency * (tma_itlb_misses + tma_icache_misses + tma_unknown_branches) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)",
"MetricGroup": "BigFoot;Fed;Frontend;IcMiss;MemoryTLB;tma_issueBC",
- "MetricName": "tma_info_big_code",
- "MetricThreshold": "tma_info_big_code > 20",
- "PublicDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses). Related metrics: tma_info_branching_overhead"
+ "MetricName": "tma_info_bottleneck_big_code",
+ "MetricThreshold": "tma_info_bottleneck_big_code > 20",
+ "PublicDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses). Related metrics: tma_info_bottleneck_branching_overhead"
},
{
- "BriefDescription": "Branch instructions per taken branch.",
- "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
- "MetricGroup": "Branches;Fed;PGO",
- "MetricName": "tma_info_bptkbranch"
+ "BriefDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls)",
+ "MetricExpr": "100 * ((BR_INST_RETIRED.CONDITIONAL + 3 * BR_INST_RETIRED.NEAR_CALL + (BR_INST_RETIRED.NEAR_TAKEN - (BR_INST_RETIRED.CONDITIONAL - BR_INST_RETIRED.NOT_TAKEN) - 2 * BR_INST_RETIRED.NEAR_CALL)) / tma_info_thread_slots)",
+ "MetricGroup": "Ret;tma_issueBC",
+ "MetricName": "tma_info_bottleneck_branching_overhead",
+ "MetricThreshold": "tma_info_bottleneck_branching_overhead > 10",
+ "PublicDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls). Related metrics: tma_info_bottleneck_big_code"
},
{
- "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
- "MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_slots / BR_MISP_RETIRED.ALL_BRANCHES",
- "MetricGroup": "Bad;BrMispredicts;tma_issueBM",
- "MetricName": "tma_info_branch_misprediction_cost",
- "PublicDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear). Related metrics: tma_branch_mispredicts, tma_info_mispredictions, tma_mispredicts_resteers"
+ "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_frontend_bound - tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_info_bottleneck_big_code",
+ "MetricGroup": "Fed;FetchBW;Frontend",
+ "MetricName": "tma_info_bottleneck_instruction_fetch_bw",
+ "MetricThreshold": "tma_info_bottleneck_instruction_fetch_bw > 20"
},
{
- "BriefDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls)",
- "MetricExpr": "100 * ((BR_INST_RETIRED.CONDITIONAL + 3 * BR_INST_RETIRED.NEAR_CALL + (BR_INST_RETIRED.NEAR_TAKEN - (BR_INST_RETIRED.CONDITIONAL - BR_INST_RETIRED.NOT_TAKEN) - 2 * BR_INST_RETIRED.NEAR_CALL)) / tma_info_slots)",
- "MetricGroup": "Ret;tma_issueBC",
- "MetricName": "tma_info_branching_overhead",
- "MetricThreshold": "tma_info_branching_overhead > 10",
- "PublicDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls). Related metrics: tma_info_big_code"
+ "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_fb_full / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))",
+ "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
+ "MetricName": "tma_info_bottleneck_memory_bandwidth",
+ "MetricThreshold": "tma_info_bottleneck_memory_bandwidth > 20",
+ "PublicDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full"
},
{
- "BriefDescription": "Fraction of branches that are CALL or RET",
- "MetricExpr": "(BR_INST_RETIRED.NEAR_CALL + BR_INST_RETIRED.NEAR_RETURN) / BR_INST_RETIRED.ALL_BRANCHES",
- "MetricGroup": "Bad;Branches",
- "MetricName": "tma_info_callret"
+ "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_dtlb_load / max(tma_l1_bound, tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency)))",
+ "MetricGroup": "Mem;MemoryTLB;Offcore;tma_issueTLB",
+ "MetricName": "tma_info_bottleneck_memory_data_tlbs",
+ "MetricThreshold": "tma_info_bottleneck_memory_data_tlbs > 20",
+ "PublicDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs). Related metrics: tma_dtlb_load, tma_dtlb_store"
},
{
- "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
- "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
- "MetricGroup": "Pipeline",
- "MetricName": "tma_info_clks"
+ "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound))",
+ "MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat",
+ "MetricName": "tma_info_bottleneck_memory_latency",
+ "MetricThreshold": "tma_info_bottleneck_memory_latency > 20",
+ "PublicDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches). Related metrics: tma_l3_hit_latency, tma_mem_latency"
},
{
- "BriefDescription": "STLB (2nd level TLB) code speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
- "MetricExpr": "1e3 * ITLB_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
- "MetricGroup": "Fed;MemoryTLB",
- "MetricName": "tma_info_code_stlb_mpki"
+ "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
+ "MetricGroup": "Bad;BadSpec;BrMispredicts;tma_issueBM",
+ "MetricName": "tma_info_bottleneck_mispredictions",
+ "MetricThreshold": "tma_info_bottleneck_mispredictions > 20",
+ "PublicDescription": "Total pipeline cost of Branch Misprediction related bottlenecks. Related metrics: tma_branch_mispredicts, tma_info_bad_spec_branch_misprediction_cost, tma_mispredicts_resteers"
+ },
+ {
+ "BriefDescription": "Fraction of branches that are CALL or RET",
+ "MetricExpr": "(BR_INST_RETIRED.NEAR_CALL + BR_INST_RETIRED.NEAR_RETURN) / BR_INST_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;Branches",
+ "MetricName": "tma_info_branches_callret"
},
{
"BriefDescription": "Fraction of branches that are non-taken conditionals",
"MetricExpr": "BR_INST_RETIRED.NOT_TAKEN / BR_INST_RETIRED.ALL_BRANCHES",
"MetricGroup": "Bad;Branches;CodeGen;PGO",
- "MetricName": "tma_info_cond_nt"
+ "MetricName": "tma_info_branches_cond_nt"
},
{
"BriefDescription": "Fraction of branches that are taken conditionals",
"MetricExpr": "(BR_INST_RETIRED.CONDITIONAL - BR_INST_RETIRED.NOT_TAKEN) / BR_INST_RETIRED.ALL_BRANCHES",
"MetricGroup": "Bad;Branches;CodeGen;PGO",
- "MetricName": "tma_info_cond_tk"
+ "MetricName": "tma_info_branches_cond_tk"
},
{
- "BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts",
+ "BriefDescription": "Fraction of branches that are unconditional (direct or indirect) jumps",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(100 * (1 - tma_core_bound / tma_ports_utilization if tma_core_bound < tma_ports_utilization else 1) if tma_info_smt_2t_utilization > 0.5 else 0)",
- "MetricGroup": "Cor;SMT",
- "MetricName": "tma_info_core_bound_likely",
- "MetricThreshold": "tma_info_core_bound_likely > 0.5"
+ "MetricExpr": "(BR_INST_RETIRED.NEAR_TAKEN - (BR_INST_RETIRED.CONDITIONAL - BR_INST_RETIRED.NOT_TAKEN) - 2 * BR_INST_RETIRED.NEAR_CALL) / BR_INST_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;Branches",
+ "MetricName": "tma_info_branches_jump"
},
{
"BriefDescription": "Core actual clocks when any Logical Processor is active on the Physical Core",
- "MetricExpr": "(CPU_CLK_UNHALTED.THREAD / 2 * (1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK) if #core_wide < 1 else (CPU_CLK_UNHALTED.THREAD_ANY / 2 if #SMT_on else tma_info_clks))",
+ "MetricExpr": "(CPU_CLK_UNHALTED.THREAD / 2 * (1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK) if #core_wide < 1 else (CPU_CLK_UNHALTED.THREAD_ANY / 2 if #SMT_on else tma_info_thread_clks))",
"MetricGroup": "SMT",
- "MetricName": "tma_info_core_clks"
+ "MetricName": "tma_info_core_core_clks"
},
{
"BriefDescription": "Instructions Per Cycle across hyper-threads (per physical core)",
- "MetricExpr": "INST_RETIRED.ANY / tma_info_core_clks",
+ "MetricExpr": "INST_RETIRED.ANY / tma_info_core_core_clks",
"MetricGroup": "Ret;SMT;TmaL1;tma_L1_group",
- "MetricName": "tma_info_coreipc"
+ "MetricName": "tma_info_core_coreipc"
},
{
- "BriefDescription": "Cycles Per Instruction (per Logical Processor)",
- "MetricExpr": "1 / tma_info_ipc",
- "MetricGroup": "Mem;Pipeline",
- "MetricName": "tma_info_cpi"
+ "BriefDescription": "Floating Point Operations Per Cycle",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * (FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE) + 8 * (FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / tma_info_core_core_clks",
+ "MetricGroup": "Flops;Ret",
+ "MetricName": "tma_info_core_flopc"
},
{
- "BriefDescription": "Average CPU Utilization",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
- "MetricGroup": "HPC;Summary",
- "MetricName": "tma_info_cpu_utilization"
+ "BriefDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width)",
+ "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0xfc@) / (2 * tma_info_core_core_clks)",
+ "MetricGroup": "Cor;Flops;HPC",
+ "MetricName": "tma_info_core_fp_arith_utilization",
+ "PublicDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width). Values > 1 are possible due to ([BDW+] Fused-Multiply Add (FMA) counting - common; [ADL+] use all of ADD/MUL/FMA in Scalar or 128/256-bit vectors - less common)."
},
{
- "BriefDescription": "Average Parallel L2 cache miss data reads",
- "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
- "MetricGroup": "Memory_BW;Offcore",
- "MetricName": "tma_info_data_l2_mlp"
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / (UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)",
+ "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
+ "MetricName": "tma_info_core_ilp"
},
{
- "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
- "MetricExpr": "64 * (UNC_M_CAS_COUNT.RD + UNC_M_CAS_COUNT.WR) / 1e9 / duration_time",
- "MetricGroup": "HPC;Mem;MemoryBW;SoC;tma_issueBW",
- "MetricName": "tma_info_dram_bw_use",
- "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_fb_full, tma_info_memory_bandwidth, tma_mem_bandwidth, tma_sq_full"
+ "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear)",
+ "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;BadSpec;BrMispredicts;TopdownL1;tma_L1_group",
+ "MetricName": "tma_info_core_ipmispredict",
+ "MetricgroupNoGroup": "TopdownL1"
},
{
"BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
"MetricExpr": "IDQ.DSB_UOPS / (IDQ.DSB_UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)",
"MetricGroup": "DSB;Fed;FetchBW;tma_issueFB",
- "MetricName": "tma_info_dsb_coverage",
- "MetricThreshold": "tma_info_dsb_coverage < 0.7 & tma_info_ipc / 4 > 0.35",
- "PublicDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache). Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_dsb_misses, tma_info_iptb, tma_lcp"
- },
- {
- "BriefDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck",
- "MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "100 * (tma_fetch_latency * tma_dsb_switches / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_mite / (tma_dsb + tma_mite))",
- "MetricGroup": "DSBmiss;Fed;tma_issueFB",
- "MetricName": "tma_info_dsb_misses",
- "MetricThreshold": "tma_info_dsb_misses > 10",
- "PublicDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_dsb_coverage, tma_info_iptb, tma_lcp"
+ "MetricName": "tma_info_frontend_dsb_coverage",
+ "MetricThreshold": "tma_info_frontend_dsb_coverage < 0.7 & tma_info_thread_ipc / 4 > 0.35",
+ "PublicDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache). Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_botlnk_l2_dsb_misses, tma_info_inst_mix_iptb, tma_lcp"
},
{
"BriefDescription": "Average number of cycles of a switch from the DSB fetch-unit to MITE fetch unit - see DSB_Switches tree node for details.",
"MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / DSB2MITE_SWITCHES.COUNT",
"MetricGroup": "DSBmiss",
- "MetricName": "tma_info_dsb_switch_cost"
- },
- {
- "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-thread",
- "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
- "MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
- "MetricName": "tma_info_execute"
- },
- {
- "BriefDescription": "The ratio of Executed- by Issued-Uops",
- "MetricExpr": "UOPS_EXECUTED.THREAD / UOPS_ISSUED.ANY",
- "MetricGroup": "Cor;Pipeline",
- "MetricName": "tma_info_execute_per_issue",
- "PublicDescription": "The ratio of Executed- by Issued-Uops. Ratio > 1 suggests high rate of uop micro-fusions. Ratio < 1 suggest high rate of \"execute\" at rename stage."
- },
- {
- "BriefDescription": "Fill Buffer (FB) hits per kilo instructions for retired demand loads (L1D misses that merge into ongoing miss-handling entries)",
- "MetricExpr": "1e3 * MEM_LOAD_RETIRED.FB_HIT / INST_RETIRED.ANY",
- "MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_fb_hpki"
+ "MetricName": "tma_info_frontend_dsb_switch_cost"
},
{
"BriefDescription": "Average number of Uops issued by front-end when it issued something",
"MetricExpr": "UOPS_ISSUED.ANY / cpu@UOPS_ISSUED.ANY\\,cmask\\=1@",
"MetricGroup": "Fed;FetchBW",
- "MetricName": "tma_info_fetch_upc"
+ "MetricName": "tma_info_frontend_fetch_upc"
},
{
- "BriefDescription": "Floating Point Operations Per Cycle",
- "MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * (FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE) + 8 * (FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / tma_info_core_clks",
- "MetricGroup": "Flops;Ret",
- "MetricName": "tma_info_flopc"
- },
- {
- "BriefDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width)",
- "MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0xfc@) / (2 * tma_info_core_clks)",
- "MetricGroup": "Cor;Flops;HPC",
- "MetricName": "tma_info_fp_arith_utilization",
- "PublicDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width). Values > 1 are possible due to ([BDW+] Fused-Multiply Add (FMA) counting - common; [ADL+] use all of ADD/MUL/FMA in Scalar or 128/256-bit vectors - less common)."
+ "BriefDescription": "Average Latency for L1 instruction cache misses",
+ "MetricExpr": "ICACHE_16B.IFDATA_STALL / cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=1\\,edge@ + 2",
+ "MetricGroup": "Fed;FetchLat;IcMiss",
+ "MetricName": "tma_info_frontend_icache_miss_latency"
},
{
- "BriefDescription": "Giga Floating Point Operations Per Second",
- "MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * (FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE) + 8 * (FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / 1e9 / duration_time",
- "MetricGroup": "Cor;Flops;HPC",
- "MetricName": "tma_info_gflops",
- "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width and AMX engine."
+ "BriefDescription": "Instructions per non-speculative DSB miss (lower number means higher occurrence rate)",
+ "MetricExpr": "INST_RETIRED.ANY / FRONTEND_RETIRED.ANY_DSB_MISS",
+ "MetricGroup": "DSBmiss;Fed",
+ "MetricName": "tma_info_frontend_ipdsb_miss_ret",
+ "MetricThreshold": "tma_info_frontend_ipdsb_miss_ret < 50"
},
{
- "BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck",
- "MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
- "MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL",
- "MetricName": "tma_info_ic_misses",
- "MetricThreshold": "tma_info_ic_misses > 5",
- "PublicDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck. Related metrics: "
+ "BriefDescription": "Instructions per speculative Unknown Branch Misprediction (BAClear) (lower number means higher occurrence rate)",
+ "MetricExpr": "tma_info_inst_mix_instructions / BACLEARS.ANY",
+ "MetricGroup": "Fed",
+ "MetricName": "tma_info_frontend_ipunknown_branch"
},
{
- "BriefDescription": "Average Latency for L1 instruction cache misses",
- "MetricExpr": "ICACHE_16B.IFDATA_STALL / cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=1\\,edge@ + 2",
- "MetricGroup": "Fed;FetchLat;IcMiss",
- "MetricName": "tma_info_icache_miss_latency"
+ "BriefDescription": "L2 cache true code cacheline misses per kilo instruction",
+ "MetricExpr": "1e3 * FRONTEND_RETIRED.L2_MISS / INST_RETIRED.ANY",
+ "MetricGroup": "IcMiss",
+ "MetricName": "tma_info_frontend_l2mpki_code"
},
{
- "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
- "MetricExpr": "UOPS_EXECUTED.THREAD / (UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)",
- "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
- "MetricName": "tma_info_ilp"
+ "BriefDescription": "L2 cache speculative code cacheline misses per kilo instruction",
+ "MetricExpr": "1e3 * L2_RQSTS.CODE_RD_MISS / INST_RETIRED.ANY",
+ "MetricGroup": "IcMiss",
+ "MetricName": "tma_info_frontend_l2mpki_code_all"
},
{
- "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks",
- "MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "100 * (tma_frontend_bound - tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_info_big_code",
- "MetricGroup": "Fed;FetchBW;Frontend",
- "MetricName": "tma_info_instruction_fetch_bw",
- "MetricThreshold": "tma_info_instruction_fetch_bw > 20"
+ "BriefDescription": "Branch instructions per taken branch.",
+ "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
+ "MetricGroup": "Branches;Fed;PGO",
+ "MetricName": "tma_info_inst_mix_bptkbranch"
},
{
"BriefDescription": "Total number of retired Instructions",
"MetricExpr": "INST_RETIRED.ANY",
"MetricGroup": "Summary;TmaL1;tma_L1_group",
- "MetricName": "tma_info_instructions",
+ "MetricName": "tma_info_inst_mix_instructions",
"PublicDescription": "Total number of retired Instructions. Sample with: INST_RETIRED.PREC_DIST"
},
{
- "BriefDescription": "Average IO (network or disk) Bandwidth Use for Reads [GB / sec]",
- "MetricExpr": "(UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3) * 4 / 1e9 / duration_time",
- "MetricGroup": "IoBW;Mem;Server;SoC",
- "MetricName": "tma_info_io_read_bw"
- },
- {
- "BriefDescription": "Average IO (network or disk) Bandwidth Use for Writes [GB / sec]",
- "MetricExpr": "(UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3) * 4 / 1e9 / duration_time",
- "MetricGroup": "IoBW;Mem;Server;SoC",
- "MetricName": "tma_info_io_write_bw"
- },
- {
"BriefDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate)",
"MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "INST_RETIRED.ANY / (cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0xfc@)",
"MetricGroup": "Flops;InsType",
- "MetricName": "tma_info_iparith",
- "MetricThreshold": "tma_info_iparith < 10",
+ "MetricName": "tma_info_inst_mix_iparith",
+ "MetricThreshold": "tma_info_inst_mix_iparith < 10",
"PublicDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate). May undercount due to FMA double counting. Approximated prior to BDW."
},
{
"BriefDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE)",
"MetricGroup": "Flops;FpVector;InsType",
- "MetricName": "tma_info_iparith_avx128",
- "MetricThreshold": "tma_info_iparith_avx128 < 10",
+ "MetricName": "tma_info_inst_mix_iparith_avx128",
+ "MetricThreshold": "tma_info_inst_mix_iparith_avx128 < 10",
"PublicDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
},
{
"BriefDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE)",
"MetricGroup": "Flops;FpVector;InsType",
- "MetricName": "tma_info_iparith_avx256",
- "MetricThreshold": "tma_info_iparith_avx256 < 10",
+ "MetricName": "tma_info_inst_mix_iparith_avx256",
+ "MetricThreshold": "tma_info_inst_mix_iparith_avx256 < 10",
"PublicDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
},
{
"BriefDescription": "Instructions per FP Arithmetic AVX 512-bit instruction (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE)",
"MetricGroup": "Flops;FpVector;InsType",
- "MetricName": "tma_info_iparith_avx512",
- "MetricThreshold": "tma_info_iparith_avx512 < 10",
+ "MetricName": "tma_info_inst_mix_iparith_avx512",
+ "MetricThreshold": "tma_info_inst_mix_iparith_avx512 < 10",
"PublicDescription": "Instructions per FP Arithmetic AVX 512-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
},
{
"BriefDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
"MetricGroup": "Flops;FpScalar;InsType",
- "MetricName": "tma_info_iparith_scalar_dp",
- "MetricThreshold": "tma_info_iparith_scalar_dp < 10",
+ "MetricName": "tma_info_inst_mix_iparith_scalar_dp",
+ "MetricThreshold": "tma_info_inst_mix_iparith_scalar_dp < 10",
"PublicDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
},
{
"BriefDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
"MetricGroup": "Flops;FpScalar;InsType",
- "MetricName": "tma_info_iparith_scalar_sp",
- "MetricThreshold": "tma_info_iparith_scalar_sp < 10",
+ "MetricName": "tma_info_inst_mix_iparith_scalar_sp",
+ "MetricThreshold": "tma_info_inst_mix_iparith_scalar_sp < 10",
"PublicDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
},
{
"BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
"MetricGroup": "Branches;Fed;InsType",
- "MetricName": "tma_info_ipbranch",
- "MetricThreshold": "tma_info_ipbranch < 8"
- },
- {
- "BriefDescription": "Instructions Per Cycle (per Logical Processor)",
- "MetricExpr": "INST_RETIRED.ANY / tma_info_clks",
- "MetricGroup": "Ret;Summary",
- "MetricName": "tma_info_ipc"
+ "MetricName": "tma_info_inst_mix_ipbranch",
+ "MetricThreshold": "tma_info_inst_mix_ipbranch < 8"
},
{
"BriefDescription": "Instructions per (near) call (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL",
"MetricGroup": "Branches;Fed;PGO",
- "MetricName": "tma_info_ipcall",
- "MetricThreshold": "tma_info_ipcall < 200"
- },
- {
- "BriefDescription": "Instructions per non-speculative DSB miss (lower number means higher occurrence rate)",
- "MetricExpr": "INST_RETIRED.ANY / FRONTEND_RETIRED.ANY_DSB_MISS",
- "MetricGroup": "DSBmiss;Fed",
- "MetricName": "tma_info_ipdsb_miss_ret",
- "MetricThreshold": "tma_info_ipdsb_miss_ret < 50"
- },
- {
- "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
- "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u",
- "MetricGroup": "Branches;OS",
- "MetricName": "tma_info_ipfarbranch",
- "MetricThreshold": "tma_info_ipfarbranch < 1e6"
+ "MetricName": "tma_info_inst_mix_ipcall",
+ "MetricThreshold": "tma_info_inst_mix_ipcall < 200"
},
{
"BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)",
"MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * (FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE) + 8 * (FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE)",
"MetricGroup": "Flops;InsType",
- "MetricName": "tma_info_ipflop",
- "MetricThreshold": "tma_info_ipflop < 10"
+ "MetricName": "tma_info_inst_mix_ipflop",
+ "MetricThreshold": "tma_info_inst_mix_ipflop < 10"
},
{
"BriefDescription": "Instructions per Load (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_LOADS",
"MetricGroup": "InsType",
- "MetricName": "tma_info_ipload",
- "MetricThreshold": "tma_info_ipload < 3"
- },
- {
- "BriefDescription": "Instructions per retired mispredicts for indirect CALL or JMP branches (lower number means higher occurrence rate).",
- "MetricExpr": "tma_info_instructions / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * cpu@BR_MISP_EXEC.ALL_BRANCHES\\,umask\\=0xE4@)",
- "MetricGroup": "Bad;BrMispredicts",
- "MetricName": "tma_info_ipmisp_indirect",
- "MetricThreshold": "tma_info_ipmisp_indirect < 1e3"
- },
- {
- "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear) (lower number means higher occurrence rate)",
- "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
- "MetricGroup": "Bad;BadSpec;BrMispredicts",
- "MetricName": "tma_info_ipmispredict",
- "MetricThreshold": "tma_info_ipmispredict < 200"
+ "MetricName": "tma_info_inst_mix_ipload",
+ "MetricThreshold": "tma_info_inst_mix_ipload < 3"
},
{
"BriefDescription": "Instructions per Store (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_STORES",
"MetricGroup": "InsType",
- "MetricName": "tma_info_ipstore",
- "MetricThreshold": "tma_info_ipstore < 8"
+ "MetricName": "tma_info_inst_mix_ipstore",
+ "MetricThreshold": "tma_info_inst_mix_ipstore < 8"
},
{
"BriefDescription": "Instructions per Software prefetch instruction (of any type: NTA/T0/T1/T2/Prefetch) (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / cpu@SW_PREFETCH_ACCESS.T0\\,umask\\=0xF@",
"MetricGroup": "Prefetches",
- "MetricName": "tma_info_ipswpf",
- "MetricThreshold": "tma_info_ipswpf < 100"
+ "MetricName": "tma_info_inst_mix_ipswpf",
+ "MetricThreshold": "tma_info_inst_mix_ipswpf < 100"
},
{
"BriefDescription": "Instruction per taken branch",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN",
"MetricGroup": "Branches;Fed;FetchBW;Frontend;PGO;tma_issueFB",
- "MetricName": "tma_info_iptb",
- "MetricThreshold": "tma_info_iptb < 9",
- "PublicDescription": "Instruction per taken branch. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_dsb_coverage, tma_info_dsb_misses, tma_lcp"
+ "MetricName": "tma_info_inst_mix_iptb",
+ "MetricThreshold": "tma_info_inst_mix_iptb < 9",
+ "PublicDescription": "Instruction per taken branch. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_lcp"
},
{
- "BriefDescription": "Instructions per speculative Unknown Branch Misprediction (BAClear) (lower number means higher occurrence rate)",
- "MetricExpr": "tma_info_instructions / BACLEARS.ANY",
- "MetricGroup": "Fed",
- "MetricName": "tma_info_ipunknown_branch"
+ "BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
+ "MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / duration_time",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_core_l1d_cache_fill_bw"
},
{
- "BriefDescription": "Fraction of branches that are unconditional (direct or indirect) jumps",
- "MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(BR_INST_RETIRED.NEAR_TAKEN - (BR_INST_RETIRED.CONDITIONAL - BR_INST_RETIRED.NOT_TAKEN) - 2 * BR_INST_RETIRED.NEAR_CALL) / BR_INST_RETIRED.ALL_BRANCHES",
- "MetricGroup": "Bad;Branches",
- "MetricName": "tma_info_jump"
+ "BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
+ "MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / duration_time",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_core_l2_cache_fill_bw"
},
{
- "BriefDescription": "Cycles Per Instruction for the Operating System (OS) Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / INST_RETIRED.ANY_P:k",
- "MetricGroup": "OS",
- "MetricName": "tma_info_kernel_cpi"
+ "BriefDescription": "Rate of non silent evictions from the L2 cache per Kilo instruction",
+ "MetricExpr": "1e3 * L2_LINES_OUT.NON_SILENT / tma_info_inst_mix_instructions",
+ "MetricGroup": "L2Evicts;Mem;Server",
+ "MetricName": "tma_info_memory_core_l2_evictions_nonsilent_pki"
},
{
- "BriefDescription": "Fraction of cycles spent in the Operating System (OS) Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / CPU_CLK_UNHALTED.THREAD",
- "MetricGroup": "OS",
- "MetricName": "tma_info_kernel_utilization",
- "MetricThreshold": "tma_info_kernel_utilization > 0.05"
+ "BriefDescription": "Rate of silent evictions from the L2 cache per Kilo instruction where the evicted lines are dropped (no writeback to L3 or memory)",
+ "MetricExpr": "1e3 * L2_LINES_OUT.SILENT / tma_info_inst_mix_instructions",
+ "MetricGroup": "L2Evicts;Mem;Server",
+ "MetricName": "tma_info_memory_core_l2_evictions_silent_pki"
},
{
- "BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
- "MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / duration_time",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l1d_cache_fill_bw"
+ "BriefDescription": "Average per-core data access bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1e9 / duration_time",
+ "MetricGroup": "Mem;MemoryBW;Offcore",
+ "MetricName": "tma_info_memory_core_l3_cache_access_bw"
},
{
- "BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
- "MetricExpr": "tma_info_l1d_cache_fill_bw",
+ "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
"MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l1d_cache_fill_bw_1t"
+ "MetricName": "tma_info_memory_core_l3_cache_fill_bw"
+ },
+ {
+ "BriefDescription": "Fill Buffer (FB) hits per kilo instructions for retired demand loads (L1D misses that merge into ongoing miss-handling entries)",
+ "MetricExpr": "1e3 * MEM_LOAD_RETIRED.FB_HIT / INST_RETIRED.ANY",
+ "MetricGroup": "CacheMisses;Mem",
+ "MetricName": "tma_info_memory_fb_hpki"
},
{
"BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
"MetricExpr": "1e3 * MEM_LOAD_RETIRED.L1_MISS / INST_RETIRED.ANY",
"MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l1mpki"
+ "MetricName": "tma_info_memory_l1mpki"
},
{
"BriefDescription": "L1 cache true misses per kilo instruction for all demand loads (including speculative)",
"MetricExpr": "1e3 * L2_RQSTS.ALL_DEMAND_DATA_RD / INST_RETIRED.ANY",
"MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l1mpki_load"
- },
- {
- "BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
- "MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / duration_time",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l2_cache_fill_bw"
- },
- {
- "BriefDescription": "Average per-thread data fill bandwidth to the L2 cache [GB / sec]",
- "MetricExpr": "tma_info_l2_cache_fill_bw",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l2_cache_fill_bw_1t"
- },
- {
- "BriefDescription": "Rate of non silent evictions from the L2 cache per Kilo instruction",
- "MetricExpr": "1e3 * L2_LINES_OUT.NON_SILENT / tma_info_instructions",
- "MetricGroup": "L2Evicts;Mem;Server",
- "MetricName": "tma_info_l2_evictions_nonsilent_pki"
- },
- {
- "BriefDescription": "Rate of silent evictions from the L2 cache per Kilo instruction where the evicted lines are dropped (no writeback to L3 or memory)",
- "MetricExpr": "1e3 * L2_LINES_OUT.SILENT / tma_info_instructions",
- "MetricGroup": "L2Evicts;Mem;Server",
- "MetricName": "tma_info_l2_evictions_silent_pki"
+ "MetricName": "tma_info_memory_l1mpki_load"
},
{
"BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)",
"MetricExpr": "1e3 * (L2_RQSTS.REFERENCES - L2_RQSTS.MISS) / INST_RETIRED.ANY",
"MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l2hpki_all"
+ "MetricName": "tma_info_memory_l2hpki_all"
},
{
"BriefDescription": "L2 cache hits per kilo instruction for all demand loads (including speculative)",
"MetricExpr": "1e3 * L2_RQSTS.DEMAND_DATA_RD_HIT / INST_RETIRED.ANY",
"MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l2hpki_load"
+ "MetricName": "tma_info_memory_l2hpki_load"
},
{
"BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads",
"MetricExpr": "1e3 * MEM_LOAD_RETIRED.L2_MISS / INST_RETIRED.ANY",
"MetricGroup": "Backend;CacheMisses;Mem",
- "MetricName": "tma_info_l2mpki"
+ "MetricName": "tma_info_memory_l2mpki"
},
{
"BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all request types (including speculative)",
"MetricExpr": "1e3 * L2_RQSTS.MISS / INST_RETIRED.ANY",
"MetricGroup": "CacheMisses;Mem;Offcore",
- "MetricName": "tma_info_l2mpki_all"
- },
- {
- "BriefDescription": "L2 cache true code cacheline misses per kilo instruction",
- "MetricExpr": "1e3 * FRONTEND_RETIRED.L2_MISS / INST_RETIRED.ANY",
- "MetricGroup": "IcMiss",
- "MetricName": "tma_info_l2mpki_code"
- },
- {
- "BriefDescription": "L2 cache speculative code cacheline misses per kilo instruction",
- "MetricExpr": "1e3 * L2_RQSTS.CODE_RD_MISS / INST_RETIRED.ANY",
- "MetricGroup": "IcMiss",
- "MetricName": "tma_info_l2mpki_code_all"
+ "MetricName": "tma_info_memory_l2mpki_all"
},
{
"BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all demand loads (including speculative)",
"MetricExpr": "1e3 * L2_RQSTS.DEMAND_DATA_RD_MISS / INST_RETIRED.ANY",
"MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l2mpki_load"
+ "MetricName": "tma_info_memory_l2mpki_load"
},
{
- "BriefDescription": "Average per-core data access bandwidth to the L3 cache [GB / sec]",
- "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1e9 / duration_time",
- "MetricGroup": "Mem;MemoryBW;Offcore",
- "MetricName": "tma_info_l3_cache_access_bw"
- },
- {
- "BriefDescription": "Average per-thread data access bandwidth to the L3 cache [GB / sec]",
- "MetricExpr": "tma_info_l3_cache_access_bw",
- "MetricGroup": "Mem;MemoryBW;Offcore",
- "MetricName": "tma_info_l3_cache_access_bw_1t"
+ "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
+ "MetricExpr": "1e3 * MEM_LOAD_RETIRED.L3_MISS / INST_RETIRED.ANY",
+ "MetricGroup": "CacheMisses;Mem",
+ "MetricName": "tma_info_memory_l3mpki"
},
{
- "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
- "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l3_cache_fill_bw"
+ "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / (MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT)",
+ "MetricGroup": "Mem;MemoryBound;MemoryLat",
+ "MetricName": "tma_info_memory_load_miss_real_latency"
},
{
- "BriefDescription": "Average per-thread data fill bandwidth to the L3 cache [GB / sec]",
- "MetricExpr": "tma_info_l3_cache_fill_bw",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l3_cache_fill_bw_1t"
+ "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
+ "MetricGroup": "Mem;MemoryBW;MemoryBound",
+ "MetricName": "tma_info_memory_mlp",
+ "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)"
},
{
- "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
- "MetricExpr": "1e3 * MEM_LOAD_RETIRED.L3_MISS / INST_RETIRED.ANY",
- "MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l3mpki"
+ "BriefDescription": "Average Parallel L2 cache miss data reads",
+ "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+ "MetricGroup": "Memory_BW;Offcore",
+ "MetricName": "tma_info_memory_oro_data_l2_mlp"
},
{
"BriefDescription": "Average Latency for L2 cache miss demand Loads",
"MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS.DEMAND_DATA_RD",
"MetricGroup": "Memory_Lat;Offcore",
- "MetricName": "tma_info_load_l2_miss_latency"
+ "MetricName": "tma_info_memory_oro_load_l2_miss_latency"
},
{
"BriefDescription": "Average Parallel L2 cache miss demand Loads",
"MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
"MetricGroup": "Memory_BW;Offcore",
- "MetricName": "tma_info_load_l2_mlp"
+ "MetricName": "tma_info_memory_oro_load_l2_mlp"
},
{
- "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
- "MetricExpr": "L1D_PEND_MISS.PENDING / (MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT)",
- "MetricGroup": "Mem;MemoryBound;MemoryLat",
- "MetricName": "tma_info_load_miss_real_latency"
+ "BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_core_l1d_cache_fill_bw",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_thread_l1d_cache_fill_bw_1t"
+ },
+ {
+ "BriefDescription": "Average per-thread data fill bandwidth to the L2 cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_core_l2_cache_fill_bw",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_thread_l2_cache_fill_bw_1t"
+ },
+ {
+ "BriefDescription": "Average per-thread data access bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_core_l3_cache_access_bw",
+ "MetricGroup": "Mem;MemoryBW;Offcore",
+ "MetricName": "tma_info_memory_thread_l3_cache_access_bw_1t"
+ },
+ {
+ "BriefDescription": "Average per-thread data fill bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_core_l3_cache_fill_bw",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_thread_l3_cache_fill_bw_1t"
+ },
+ {
+ "BriefDescription": "STLB (2nd level TLB) code speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
+ "MetricExpr": "1e3 * ITLB_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
+ "MetricGroup": "Fed;MemoryTLB",
+ "MetricName": "tma_info_memory_tlb_code_stlb_mpki"
},
{
"BriefDescription": "STLB (2nd level TLB) data load speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
"MetricExpr": "1e3 * DTLB_LOAD_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
"MetricGroup": "Mem;MemoryTLB",
- "MetricName": "tma_info_load_stlb_mpki"
+ "MetricName": "tma_info_memory_tlb_load_stlb_mpki"
+ },
+ {
+ "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
+ "MetricExpr": "(ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING) / (2 * tma_info_core_core_clks)",
+ "MetricGroup": "Mem;MemoryTLB",
+ "MetricName": "tma_info_memory_tlb_page_walks_utilization",
+ "MetricThreshold": "tma_info_memory_tlb_page_walks_utilization > 0.5"
+ },
+ {
+ "BriefDescription": "STLB (2nd level TLB) data store speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
+ "MetricExpr": "1e3 * DTLB_STORE_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
+ "MetricGroup": "Mem;MemoryTLB",
+ "MetricName": "tma_info_memory_tlb_store_stlb_mpki"
+ },
+ {
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-thread",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
+ "MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
+ "MetricName": "tma_info_pipeline_execute"
+ },
+ {
+ "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / cpu@UOPS_RETIRED.RETIRE_SLOTS\\,cmask\\=1@",
+ "MetricGroup": "Pipeline;Ret",
+ "MetricName": "tma_info_pipeline_retire"
+ },
+ {
+ "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
+ "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / duration_time",
+ "MetricGroup": "Power;Summary",
+ "MetricName": "tma_info_system_average_frequency"
+ },
+ {
+ "BriefDescription": "Average CPU Utilization",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricGroup": "HPC;Summary",
+ "MetricName": "tma_info_system_cpu_utilization"
+ },
+ {
+ "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
+ "MetricExpr": "64 * (UNC_M_CAS_COUNT.RD + UNC_M_CAS_COUNT.WR) / 1e9 / duration_time",
+ "MetricGroup": "HPC;Mem;MemoryBW;SoC;tma_issueBW",
+ "MetricName": "tma_info_system_dram_bw_use",
+ "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_fb_full, tma_info_bottleneck_memory_bandwidth, tma_mem_bandwidth, tma_sq_full"
+ },
+ {
+ "BriefDescription": "Giga Floating Point Operations Per Second",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * (FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE) + 8 * (FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / 1e9 / duration_time",
+ "MetricGroup": "Cor;Flops;HPC",
+ "MetricName": "tma_info_system_gflops",
+ "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width and AMX engine."
+ },
+ {
+ "BriefDescription": "Average IO (network or disk) Bandwidth Use for Reads [GB / sec]",
+ "MetricExpr": "(UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3) * 4 / 1e9 / duration_time",
+ "MetricGroup": "IoBW;Mem;Server;SoC",
+ "MetricName": "tma_info_system_io_read_bw"
+ },
+ {
+ "BriefDescription": "Average IO (network or disk) Bandwidth Use for Writes [GB / sec]",
+ "MetricExpr": "(UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3) * 4 / 1e9 / duration_time",
+ "MetricGroup": "IoBW;Mem;Server;SoC",
+ "MetricName": "tma_info_system_io_write_bw"
+ },
+ {
+ "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
+ "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u",
+ "MetricGroup": "Branches;OS",
+ "MetricName": "tma_info_system_ipfarbranch",
+ "MetricThreshold": "tma_info_system_ipfarbranch < 1e6"
+ },
+ {
+ "BriefDescription": "Cycles Per Instruction for the Operating System (OS) Kernel mode",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / INST_RETIRED.ANY_P:k",
+ "MetricGroup": "OS",
+ "MetricName": "tma_info_system_kernel_cpi"
+ },
+ {
+ "BriefDescription": "Fraction of cycles spent in the Operating System (OS) Kernel mode",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "OS",
+ "MetricName": "tma_info_system_kernel_utilization",
+ "MetricThreshold": "tma_info_system_kernel_utilization > 0.05"
},
{
"BriefDescription": "Average latency of data read request to external DRAM memory [in nanoseconds]",
"MetricExpr": "1e9 * (UNC_M_RPQ_OCCUPANCY / UNC_M_RPQ_INSERTS) / imc_0@event\\=0x0@",
"MetricGroup": "Mem;MemoryLat;Server;SoC",
- "MetricName": "tma_info_mem_dram_read_latency",
+ "MetricName": "tma_info_system_mem_dram_read_latency",
"PublicDescription": "Average latency of data read request to external DRAM memory [in nanoseconds]. Accounts for demand loads and L1/L2 data-read prefetches"
},
{
"BriefDescription": "Average number of parallel data read requests to external memory",
"MetricExpr": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD / UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD@thresh\\=1@",
"MetricGroup": "Mem;MemoryBW;SoC",
- "MetricName": "tma_info_mem_parallel_reads",
+ "MetricName": "tma_info_system_mem_parallel_reads",
"PublicDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches"
},
{
"BriefDescription": "Average latency of data read request to external 3D X-Point memory [in nanoseconds]",
"MetricExpr": "(1e9 * (UNC_M_PMM_RPQ_OCCUPANCY.ALL / UNC_M_PMM_RPQ_INSERTS) / imc_0@event\\=0x0@ if #has_pmem > 0 else 0)",
"MetricGroup": "Mem;MemoryLat;Server;SoC",
- "MetricName": "tma_info_mem_pmm_read_latency",
+ "MetricName": "tma_info_system_mem_pmm_read_latency",
"PublicDescription": "Average latency of data read request to external 3D X-Point memory [in nanoseconds]. Accounts for demand loads and L1/L2 data-read prefetches"
},
{
"BriefDescription": "Average latency of data read request to external memory (in nanoseconds)",
- "MetricExpr": "1e9 * (UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD / UNC_CHA_TOR_INSERTS.IA_MISS_DRD) / (tma_info_socket_clks / duration_time)",
+ "MetricExpr": "1e9 * (UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD / UNC_CHA_TOR_INSERTS.IA_MISS_DRD) / (tma_info_system_socket_clks / duration_time)",
"MetricGroup": "Mem;MemoryLat;SoC",
- "MetricName": "tma_info_mem_read_latency",
+ "MetricName": "tma_info_system_mem_read_latency",
"PublicDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches. ([RKL+]memory-controller only)"
},
{
- "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
- "MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_fb_full / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))",
- "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
- "MetricName": "tma_info_memory_bandwidth",
- "MetricThreshold": "tma_info_memory_bandwidth > 20",
- "PublicDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_dram_bw_use, tma_mem_bandwidth, tma_sq_full"
- },
- {
- "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
- "MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "100 * tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_dtlb_load / max(tma_l1_bound, tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency)))",
- "MetricGroup": "Mem;MemoryTLB;Offcore;tma_issueTLB",
- "MetricName": "tma_info_memory_data_tlbs",
- "MetricThreshold": "tma_info_memory_data_tlbs > 20",
- "PublicDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs). Related metrics: tma_dtlb_load, tma_dtlb_store"
- },
- {
- "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)",
- "MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound))",
- "MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat",
- "MetricName": "tma_info_memory_latency",
- "MetricThreshold": "tma_info_memory_latency > 20",
- "PublicDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches). Related metrics: tma_l3_hit_latency, tma_mem_latency"
- },
- {
- "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks",
- "MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "100 * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
- "MetricGroup": "Bad;BadSpec;BrMispredicts;tma_issueBM",
- "MetricName": "tma_info_mispredictions",
- "MetricThreshold": "tma_info_mispredictions > 20",
- "PublicDescription": "Total pipeline cost of Branch Misprediction related bottlenecks. Related metrics: tma_branch_mispredicts, tma_info_branch_misprediction_cost, tma_mispredicts_resteers"
- },
- {
- "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
- "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
- "MetricGroup": "Mem;MemoryBW;MemoryBound",
- "MetricName": "tma_info_mlp",
- "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)"
- },
- {
- "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
- "MetricExpr": "(ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING) / (2 * tma_info_core_clks)",
- "MetricGroup": "Mem;MemoryTLB",
- "MetricName": "tma_info_page_walks_utilization",
- "MetricThreshold": "tma_info_page_walks_utilization > 0.5"
- },
- {
"BriefDescription": "Average 3DXP Memory Bandwidth Use for reads [GB / sec]",
"MetricExpr": "(64 * UNC_M_PMM_RPQ_INSERTS / 1e9 / duration_time if #has_pmem > 0 else 0)",
"MetricGroup": "Mem;MemoryBW;Server;SoC",
- "MetricName": "tma_info_pmm_read_bw"
+ "MetricName": "tma_info_system_pmm_read_bw"
},
{
"BriefDescription": "Average 3DXP Memory Bandwidth Use for Writes [GB / sec]",
"MetricExpr": "(64 * UNC_M_PMM_WPQ_INSERTS / 1e9 / duration_time if #has_pmem > 0 else 0)",
"MetricGroup": "Mem;MemoryBW;Server;SoC",
- "MetricName": "tma_info_pmm_write_bw"
+ "MetricName": "tma_info_system_pmm_write_bw"
},
{
"BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for baseline license level 0",
- "MetricExpr": "(CORE_POWER.LVL0_TURBO_LICENSE / 2 / tma_info_core_clks if #SMT_on else CORE_POWER.LVL0_TURBO_LICENSE / tma_info_core_clks)",
+ "MetricExpr": "(CORE_POWER.LVL0_TURBO_LICENSE / 2 / tma_info_core_core_clks if #SMT_on else CORE_POWER.LVL0_TURBO_LICENSE / tma_info_core_core_clks)",
"MetricGroup": "Power",
- "MetricName": "tma_info_power_license0_utilization",
+ "MetricName": "tma_info_system_power_license0_utilization",
"PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for baseline license level 0. This includes non-AVX codes, SSE, AVX 128-bit, and low-current AVX 256-bit codes."
},
{
"BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 1",
- "MetricExpr": "(CORE_POWER.LVL1_TURBO_LICENSE / 2 / tma_info_core_clks if #SMT_on else CORE_POWER.LVL1_TURBO_LICENSE / tma_info_core_clks)",
+ "MetricExpr": "(CORE_POWER.LVL1_TURBO_LICENSE / 2 / tma_info_core_core_clks if #SMT_on else CORE_POWER.LVL1_TURBO_LICENSE / tma_info_core_core_clks)",
"MetricGroup": "Power",
- "MetricName": "tma_info_power_license1_utilization",
- "MetricThreshold": "tma_info_power_license1_utilization > 0.5",
+ "MetricName": "tma_info_system_power_license1_utilization",
+ "MetricThreshold": "tma_info_system_power_license1_utilization > 0.5",
"PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 1. This includes high current AVX 256-bit instructions as well as low current AVX 512-bit instructions."
},
{
"BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 2 (introduced in SKX)",
- "MetricExpr": "(CORE_POWER.LVL2_TURBO_LICENSE / 2 / tma_info_core_clks if #SMT_on else CORE_POWER.LVL2_TURBO_LICENSE / tma_info_core_clks)",
+ "MetricExpr": "(CORE_POWER.LVL2_TURBO_LICENSE / 2 / tma_info_core_core_clks if #SMT_on else CORE_POWER.LVL2_TURBO_LICENSE / tma_info_core_core_clks)",
"MetricGroup": "Power",
- "MetricName": "tma_info_power_license2_utilization",
- "MetricThreshold": "tma_info_power_license2_utilization > 0.5",
+ "MetricName": "tma_info_system_power_license2_utilization",
+ "MetricThreshold": "tma_info_system_power_license2_utilization > 0.5",
"PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 2 (introduced in SKX). This includes high current AVX 512-bit instructions."
},
{
- "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
- "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / cpu@UOPS_RETIRED.RETIRE_SLOTS\\,cmask\\=1@",
- "MetricGroup": "Pipeline;Ret",
- "MetricName": "tma_info_retire"
- },
- {
- "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
- "MetricExpr": "4 * tma_info_core_clks",
- "MetricGroup": "TmaL1;tma_L1_group",
- "MetricName": "tma_info_slots"
- },
- {
"BriefDescription": "Fraction of cycles where both hardware Logical Processors were active",
"MetricExpr": "(1 - CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / (CPU_CLK_UNHALTED.REF_XCLK_ANY / 2) if #SMT_on else 0)",
"MetricGroup": "SMT",
- "MetricName": "tma_info_smt_2t_utilization"
+ "MetricName": "tma_info_system_smt_2t_utilization"
},
{
"BriefDescription": "Socket actual clocks when any core is active on that socket",
"MetricExpr": "cha_0@event\\=0x0@",
"MetricGroup": "SoC",
- "MetricName": "tma_info_socket_clks"
- },
- {
- "BriefDescription": "STLB (2nd level TLB) data store speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
- "MetricExpr": "1e3 * DTLB_STORE_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
- "MetricGroup": "Mem;MemoryTLB",
- "MetricName": "tma_info_store_stlb_mpki"
+ "MetricName": "tma_info_system_socket_clks"
},
{
"BriefDescription": "Average Frequency Utilization relative nominal frequency",
- "MetricExpr": "tma_info_clks / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricExpr": "tma_info_thread_clks / CPU_CLK_UNHALTED.REF_TSC",
"MetricGroup": "Power",
- "MetricName": "tma_info_turbo_utilization"
+ "MetricName": "tma_info_system_turbo_utilization"
+ },
+ {
+ "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "Pipeline",
+ "MetricName": "tma_info_thread_clks"
+ },
+ {
+ "BriefDescription": "Cycles Per Instruction (per Logical Processor)",
+ "MetricExpr": "1 / tma_info_thread_ipc",
+ "MetricGroup": "Mem;Pipeline",
+ "MetricName": "tma_info_thread_cpi"
+ },
+ {
+ "BriefDescription": "The ratio of Executed- by Issued-Uops",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / UOPS_ISSUED.ANY",
+ "MetricGroup": "Cor;Pipeline",
+ "MetricName": "tma_info_thread_execute_per_issue",
+ "PublicDescription": "The ratio of Executed- by Issued-Uops. Ratio > 1 suggests high rate of uop micro-fusions. Ratio < 1 suggest high rate of \"execute\" at rename stage."
+ },
+ {
+ "BriefDescription": "Instructions Per Cycle (per Logical Processor)",
+ "MetricExpr": "INST_RETIRED.ANY / tma_info_thread_clks",
+ "MetricGroup": "Ret;Summary",
+ "MetricName": "tma_info_thread_ipc"
+ },
+ {
+ "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
+ "MetricExpr": "4 * tma_info_core_core_clks",
+ "MetricGroup": "TmaL1;tma_L1_group",
+ "MetricName": "tma_info_thread_slots"
},
{
"BriefDescription": "Uops Per Instruction",
"MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
"MetricGroup": "Pipeline;Ret;Retire",
- "MetricName": "tma_info_uoppi",
- "MetricThreshold": "tma_info_uoppi > 1.05"
+ "MetricName": "tma_info_thread_uoppi",
+ "MetricThreshold": "tma_info_thread_uoppi > 1.05"
},
{
"BriefDescription": "Instruction per taken branch",
"MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / BR_INST_RETIRED.NEAR_TAKEN",
"MetricGroup": "Branches;Fed;FetchBW",
- "MetricName": "tma_info_uptb",
- "MetricThreshold": "tma_info_uptb < 6"
+ "MetricName": "tma_info_thread_uptb",
+ "MetricThreshold": "tma_info_thread_uptb < 6"
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses",
- "MetricExpr": "ICACHE_64B.IFTAG_STALL / tma_info_clks",
+ "MetricExpr": "ICACHE_64B.IFTAG_STALL / tma_info_thread_clks",
"MetricGroup": "BigFoot;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_itlb_misses",
"MetricThreshold": "tma_itlb_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -1100,7 +1339,7 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled without loads missing the L1 data cache",
- "MetricExpr": "max((CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS) / tma_info_clks, 0)",
+ "MetricExpr": "max((CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS) / tma_info_thread_clks, 0)",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_issueL1;tma_issueMC;tma_memory_bound_group",
"MetricName": "tma_l1_bound",
"MetricThreshold": "tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -1110,7 +1349,7 @@
{
"BriefDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "MEM_LOAD_RETIRED.L2_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) / (MEM_LOAD_RETIRED.L2_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@) * ((CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_clks)",
+ "MetricExpr": "MEM_LOAD_RETIRED.L2_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) / (MEM_LOAD_RETIRED.L2_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@) * ((CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks)",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l2_bound",
"MetricThreshold": "tma_l2_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -1119,7 +1358,7 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
- "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS) / tma_info_clks",
+ "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS) / tma_info_thread_clks",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l3_bound",
"MetricThreshold": "tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -1128,20 +1367,20 @@
},
{
"BriefDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
- "MetricExpr": "17 * tma_info_average_frequency * MEM_LOAD_RETIRED.L3_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_clks",
+ "MetricExpr": "17 * tma_info_system_average_frequency * MEM_LOAD_RETIRED.L3_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
"MetricGroup": "MemoryLat;TopdownL4;tma_L4_group;tma_issueLat;tma_l3_bound_group",
"MetricName": "tma_l3_hit_latency",
"MetricThreshold": "tma_l3_hit_latency > 0.1 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_info_memory_latency, tma_mem_latency",
+ "PublicDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_info_bottleneck_memory_latency, tma_mem_latency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs)",
- "MetricExpr": "ILD_STALL.LCP / tma_info_clks",
+ "MetricExpr": "ILD_STALL.LCP / tma_info_thread_clks",
"MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB",
"MetricName": "tma_lcp",
"MetricThreshold": "tma_lcp > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
- "PublicDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs). Using proper compiler flags or Intel Compiler by default will certainly avoid this. #Link: Optimization Guide about LCP BKMs. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_dsb_coverage, tma_info_dsb_misses, tma_info_iptb",
+ "PublicDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs). Using proper compiler flags or Intel Compiler by default will certainly avoid this. #Link: Optimization Guide about LCP BKMs. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb",
"ScaleUnit": "100%"
},
{
@@ -1156,7 +1395,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Load operations",
- "MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_2 + UOPS_DISPATCHED_PORT.PORT_3 + UOPS_DISPATCHED_PORT.PORT_7 - UOPS_DISPATCHED_PORT.PORT_4) / (2 * tma_info_core_clks)",
+ "MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_2 + UOPS_DISPATCHED_PORT.PORT_3 + UOPS_DISPATCHED_PORT.PORT_7 - UOPS_DISPATCHED_PORT.PORT_4) / (2 * tma_info_core_core_clks)",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_load_op_utilization",
"MetricThreshold": "tma_load_op_utilization > 0.6",
@@ -1174,7 +1413,7 @@
},
{
"BriefDescription": "This metric estimates the fraction of cycles where the Second-level TLB (STLB) was missed by load accesses, performing a hardware page walk",
- "MetricExpr": "DTLB_LOAD_MISSES.WALK_ACTIVE / tma_info_clks",
+ "MetricExpr": "DTLB_LOAD_MISSES.WALK_ACTIVE / tma_info_thread_clks",
"MetricGroup": "MemoryTLB;TopdownL5;tma_L5_group;tma_dtlb_load_group",
"MetricName": "tma_load_stlb_miss",
"MetricThreshold": "tma_load_stlb_miss > 0.05 & (tma_dtlb_load > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
@@ -1182,7 +1421,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from local memory",
- "MetricExpr": "59.5 * tma_info_average_frequency * MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_clks",
+ "MetricExpr": "59.5 * tma_info_system_average_frequency * MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
"MetricGroup": "Server;TopdownL5;tma_L5_group;tma_mem_latency_group",
"MetricName": "tma_local_dram",
"MetricThreshold": "tma_local_dram > 0.1 & (tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
@@ -1191,7 +1430,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU spent handling cache misses due to lock operations",
- "MetricExpr": "(12 * max(0, MEM_INST_RETIRED.LOCK_LOADS - L2_RQSTS.ALL_RFO) + MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES * (11 * L2_RQSTS.RFO_HIT + min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO))) / tma_info_clks",
+ "MetricExpr": "(12 * max(0, MEM_INST_RETIRED.LOCK_LOADS - L2_RQSTS.ALL_RFO) + MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES * (11 * L2_RQSTS.RFO_HIT + min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO))) / tma_info_thread_clks",
"MetricGroup": "Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_l1_bound_group",
"MetricName": "tma_lock_latency",
"MetricThreshold": "tma_lock_latency > 0.2 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -1211,20 +1450,20 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM)",
- "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@) / tma_info_clks",
+ "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@) / tma_info_thread_clks",
"MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
"MetricName": "tma_mem_bandwidth",
"MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_dram_bw_use, tma_info_memory_bandwidth, tma_sq_full",
+ "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_bottleneck_memory_bandwidth, tma_info_system_dram_bw_use, tma_sq_full",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM)",
- "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD) / tma_info_clks - tma_mem_bandwidth",
+ "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD) / tma_info_thread_clks - tma_mem_bandwidth",
"MetricGroup": "MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
"MetricName": "tma_mem_latency",
"MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_info_memory_latency, tma_l3_hit_latency",
+ "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_info_bottleneck_memory_latency, tma_l3_hit_latency",
"ScaleUnit": "100%"
},
{
@@ -1248,7 +1487,7 @@
},
{
"BriefDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit",
- "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * IDQ.MS_UOPS / tma_info_slots",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * IDQ.MS_UOPS / tma_info_thread_slots",
"MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_heavy_operations_group;tma_issueMC;tma_issueMS",
"MetricName": "tma_microcode_sequencer",
"MetricThreshold": "tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1",
@@ -1257,19 +1496,19 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers as a result of Branch Misprediction at execution stage",
- "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT) * INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_clks",
+ "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT) * INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_thread_clks",
"MetricGroup": "BadSpec;BrMispredicts;TopdownL4;tma_L4_group;tma_branch_resteers_group;tma_issueBM",
"MetricName": "tma_mispredicts_resteers",
"MetricThreshold": "tma_mispredicts_resteers > 0.05 & (tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15))",
- "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers as a result of Branch Misprediction at execution stage. Sample with: INT_MISC.CLEAR_RESTEER_CYCLES. Related metrics: tma_branch_mispredicts, tma_info_branch_misprediction_cost, tma_info_mispredictions",
+ "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers as a result of Branch Misprediction at execution stage. Sample with: INT_MISC.CLEAR_RESTEER_CYCLES. Related metrics: tma_branch_mispredicts, tma_info_bad_spec_branch_misprediction_cost, tma_info_bottleneck_mispredictions",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline)",
- "MetricExpr": "(IDQ.ALL_MITE_CYCLES_ANY_UOPS - IDQ.ALL_MITE_CYCLES_4_UOPS) / tma_info_core_clks / 2",
+ "MetricExpr": "(IDQ.ALL_MITE_CYCLES_ANY_UOPS - IDQ.ALL_MITE_CYCLES_4_UOPS) / tma_info_core_core_clks / 2",
"MetricGroup": "DSBmiss;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_mite",
- "MetricThreshold": "tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35)",
+ "MetricThreshold": "tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35)",
"PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline). This pipeline is used for code that was not pre-cached in the DSB or LSD. For example; inefficiencies due to asymmetric decoders; use of long immediate or LCP can manifest as MITE fetch bandwidth bottleneck. Sample with: FRONTEND_RETIRED.ANY_DSB_MISS",
"ScaleUnit": "100%"
},
@@ -1284,7 +1523,7 @@
},
{
"BriefDescription": "This metric estimates the fraction of cycles when the CPU was stalled due to switches of uop delivery to the Microcode Sequencer (MS)",
- "MetricExpr": "2 * IDQ.MS_SWITCHES / tma_info_clks",
+ "MetricExpr": "2 * IDQ.MS_SWITCHES / tma_info_thread_clks",
"MetricGroup": "FetchLat;MicroSeq;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueMC;tma_issueMS;tma_issueMV;tma_issueSO",
"MetricName": "tma_ms_switches",
"MetricThreshold": "tma_ms_switches > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -1321,7 +1560,7 @@
{
"BriefDescription": "This metric roughly estimates (based on idle latencies) how often the CPU was stalled on accesses to external 3D-Xpoint (Crystal Ridge, a.k.a",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(((1 - ((19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS))) / (19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + (25 * (MEM_LOAD_RETIRED.LOCAL_PMM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) if #has_pmem > 0 else 0) + 33 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) if #has_pmem > 0 else 0))) if #has_pmem > 0 else 0)) * (CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_clks - tma_l2_bound) if 1e6 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM + MEM_LOAD_RETIRED.LOCAL_PMM) > MEM_LOAD_RETIRED.L1_MISS else 0) if #has_pmem > 0 else 0)",
+ "MetricExpr": "(((1 - ((19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS))) / (19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + (25 * (MEM_LOAD_RETIRED.LOCAL_PMM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) if #has_pmem > 0 else 0) + 33 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) if #has_pmem > 0 else 0))) if #has_pmem > 0 else 0)) * (CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks - tma_l2_bound) if 1e6 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM + MEM_LOAD_RETIRED.LOCAL_PMM) > MEM_LOAD_RETIRED.L1_MISS else 0) if #has_pmem > 0 else 0)",
"MetricGroup": "MemoryBound;Server;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_pmm_bound",
"MetricThreshold": "tma_pmm_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -1330,7 +1569,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 0 ([SNB+] ALU; [HSW+] ALU and 2nd branch)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_0 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_0 / tma_info_core_core_clks",
"MetricGroup": "Compute;TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_0",
"MetricThreshold": "tma_port_0 > 0.6",
@@ -1339,7 +1578,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 1 (ALU)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_1 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_1 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_1",
"MetricThreshold": "tma_port_1 > 0.6",
@@ -1348,7 +1587,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 2 ([SNB+]Loads and Store-address; [ICL+] Loads)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_2 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_2 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_load_op_utilization_group",
"MetricName": "tma_port_2",
"MetricThreshold": "tma_port_2 > 0.6",
@@ -1357,7 +1596,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 3 ([SNB+]Loads and Store-address; [ICL+] Loads)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_3 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_3 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_load_op_utilization_group",
"MetricName": "tma_port_3",
"MetricThreshold": "tma_port_3 > 0.6",
@@ -1375,7 +1614,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 5 ([SNB+] Branches and ALU; [HSW+] ALU)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_5 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_5 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_5",
"MetricThreshold": "tma_port_5 > 0.6",
@@ -1384,7 +1623,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+]Primary Branch and simple ALU)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_6 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_6 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_6",
"MetricThreshold": "tma_port_6 > 0.6",
@@ -1393,7 +1632,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 7 ([HSW+]simple Store-address)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_7 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_7 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_store_op_utilization_group",
"MetricName": "tma_port_7",
"MetricThreshold": "tma_port_7 > 0.6",
@@ -1402,7 +1641,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles the CPU performance was potentially limited due to Core computation issues (non divider-related)",
- "MetricExpr": "((EXE_ACTIVITY.EXE_BOUND_0_PORTS + (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL)) / tma_info_clks if ARITH.DIVIDER_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY else (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL) / tma_info_clks)",
+ "MetricExpr": "((EXE_ACTIVITY.EXE_BOUND_0_PORTS + (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL)) / tma_info_thread_clks if ARITH.DIVIDER_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY else (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL) / tma_info_thread_clks)",
"MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group",
"MetricName": "tma_ports_utilization",
"MetricThreshold": "tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
@@ -1411,7 +1650,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed no uops on any execution port (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "(UOPS_EXECUTED.CORE_CYCLES_NONE / 2 if #SMT_on else CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY) / tma_info_core_clks",
+ "MetricExpr": "(UOPS_EXECUTED.CORE_CYCLES_NONE / 2 if #SMT_on else CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_0",
"MetricThreshold": "tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1420,7 +1659,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where the CPU executed total of 1 uop per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "((UOPS_EXECUTED.CORE_CYCLES_GE_1 - UOPS_EXECUTED.CORE_CYCLES_GE_2) / 2 if #SMT_on else EXE_ACTIVITY.1_PORTS_UTIL) / tma_info_core_clks",
+ "MetricExpr": "((UOPS_EXECUTED.CORE_CYCLES_GE_1 - UOPS_EXECUTED.CORE_CYCLES_GE_2) / 2 if #SMT_on else EXE_ACTIVITY.1_PORTS_UTIL) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issueL1;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_1",
"MetricThreshold": "tma_ports_utilized_1 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1429,7 +1668,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "((UOPS_EXECUTED.CORE_CYCLES_GE_2 - UOPS_EXECUTED.CORE_CYCLES_GE_3) / 2 if #SMT_on else EXE_ACTIVITY.2_PORTS_UTIL) / tma_info_core_clks",
+ "MetricExpr": "((UOPS_EXECUTED.CORE_CYCLES_GE_2 - UOPS_EXECUTED.CORE_CYCLES_GE_3) / 2 if #SMT_on else EXE_ACTIVITY.2_PORTS_UTIL) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_2",
"MetricThreshold": "tma_ports_utilized_2 > 0.15 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1438,7 +1677,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 3 or more uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise).",
- "MetricExpr": "(UOPS_EXECUTED.CORE_CYCLES_GE_3 / 2 if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_3) / tma_info_core_clks",
+ "MetricExpr": "(UOPS_EXECUTED.CORE_CYCLES_GE_3 / 2 if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_3) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_3m",
"MetricThreshold": "tma_ports_utilized_3m > 0.7 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1447,7 +1686,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from remote cache in other sockets including synchronizations issues",
"MetricConstraint": "NO_GROUP_EVENTS_NMI",
- "MetricExpr": "(89.5 * tma_info_average_frequency * MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM + 89.5 * tma_info_average_frequency * MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_clks",
+ "MetricExpr": "(89.5 * tma_info_system_average_frequency * MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM + 89.5 * tma_info_system_average_frequency * MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
"MetricGroup": "Offcore;Server;Snoop;TopdownL5;tma_L5_group;tma_issueSyncxn;tma_mem_latency_group",
"MetricName": "tma_remote_cache",
"MetricThreshold": "tma_remote_cache > 0.05 & (tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
@@ -1456,7 +1695,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from remote memory",
- "MetricExpr": "127 * tma_info_average_frequency * MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_clks",
+ "MetricExpr": "127 * tma_info_system_average_frequency * MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
"MetricGroup": "Server;Snoop;TopdownL5;tma_L5_group;tma_mem_latency_group",
"MetricName": "tma_remote_dram",
"MetricThreshold": "tma_remote_dram > 0.1 & (tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
@@ -1465,7 +1704,7 @@
},
{
"BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
- "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / tma_info_slots",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / tma_info_thread_slots",
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1",
@@ -1475,7 +1714,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU issue-pipeline was stalled due to serializing operations",
- "MetricExpr": "PARTIAL_RAT_STALLS.SCOREBOARD / tma_info_clks",
+ "MetricExpr": "PARTIAL_RAT_STALLS.SCOREBOARD / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL5;tma_L5_group;tma_issueSO;tma_ports_utilized_0_group",
"MetricName": "tma_serializing_operation",
"MetricThreshold": "tma_serializing_operation > 0.1 & (tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)))",
@@ -1484,7 +1723,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to PAUSE Instructions",
- "MetricExpr": "40 * ROB_MISC_EVENTS.PAUSE_INST / tma_info_clks",
+ "MetricExpr": "40 * ROB_MISC_EVENTS.PAUSE_INST / tma_info_thread_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_serializing_operation_group",
"MetricName": "tma_slow_pause",
"MetricThreshold": "tma_slow_pause > 0.05 & (tma_serializing_operation > 0.1 & (tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))))",
@@ -1494,7 +1733,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles handling memory load split accesses - load that cross 64-byte cache line boundary",
"MetricConstraint": "NO_GROUP_EVENTS_NMI",
- "MetricExpr": "tma_info_load_miss_real_latency * LD_BLOCKS.NO_SR / tma_info_clks",
+ "MetricExpr": "tma_info_memory_load_miss_real_latency * LD_BLOCKS.NO_SR / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_split_loads",
"MetricThreshold": "tma_split_loads > 0.2 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -1503,7 +1742,7 @@
},
{
"BriefDescription": "This metric represents rate of split store accesses",
- "MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_clks",
+ "MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_core_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group",
"MetricName": "tma_split_stores",
"MetricThreshold": "tma_split_stores > 0.2 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -1512,16 +1751,16 @@
},
{
"BriefDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors)",
- "MetricExpr": "(OFFCORE_REQUESTS_BUFFER.SQ_FULL / 2 if #SMT_on else OFFCORE_REQUESTS_BUFFER.SQ_FULL) / tma_info_core_clks",
+ "MetricExpr": "(OFFCORE_REQUESTS_BUFFER.SQ_FULL / 2 if #SMT_on else OFFCORE_REQUESTS_BUFFER.SQ_FULL) / tma_info_core_core_clks",
"MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueBW;tma_l3_bound_group",
"MetricName": "tma_sq_full",
"MetricThreshold": "tma_sq_full > 0.3 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_fb_full, tma_info_dram_bw_use, tma_info_memory_bandwidth, tma_mem_bandwidth",
+ "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_fb_full, tma_info_bottleneck_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric estimates how often CPU was stalled due to RFO store memory accesses; RFO store issue a read-for-ownership request before the write",
- "MetricExpr": "EXE_ACTIVITY.BOUND_ON_STORES / tma_info_clks",
+ "MetricExpr": "EXE_ACTIVITY.BOUND_ON_STORES / tma_info_thread_clks",
"MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_store_bound",
"MetricThreshold": "tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -1530,7 +1769,7 @@
},
{
"BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores",
- "MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_clks",
+ "MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_store_fwd_blk",
"MetricThreshold": "tma_store_fwd_blk > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -1540,7 +1779,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles the CPU spent handling L1D store misses",
"MetricConstraint": "NO_GROUP_EVENTS_NMI",
- "MetricExpr": "(L2_RQSTS.RFO_HIT * 11 * (1 - MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES) + (1 - MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES) * min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO)) / tma_info_clks",
+ "MetricExpr": "(L2_RQSTS.RFO_HIT * 11 * (1 - MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES) + (1 - MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES) * min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO)) / tma_info_thread_clks",
"MetricGroup": "MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_issueSL;tma_store_bound_group",
"MetricName": "tma_store_latency",
"MetricThreshold": "tma_store_latency > 0.1 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -1549,7 +1788,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Store operations",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_4 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_4 / tma_info_core_core_clks",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_store_op_utilization",
"MetricThreshold": "tma_store_op_utilization > 0.6",
@@ -1565,7 +1804,7 @@
},
{
"BriefDescription": "This metric estimates the fraction of cycles where the STLB was missed by store accesses, performing a hardware page walk",
- "MetricExpr": "DTLB_STORE_MISSES.WALK_ACTIVE / tma_info_core_clks",
+ "MetricExpr": "DTLB_STORE_MISSES.WALK_ACTIVE / tma_info_core_core_clks",
"MetricGroup": "MemoryTLB;TopdownL5;tma_L5_group;tma_dtlb_store_group",
"MetricName": "tma_store_stlb_miss",
"MetricThreshold": "tma_store_stlb_miss > 0.05 & (tma_dtlb_store > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
@@ -1573,7 +1812,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to new branch address clears",
- "MetricExpr": "9 * BACLEARS.ANY / tma_info_clks",
+ "MetricExpr": "9 * BACLEARS.ANY / tma_info_thread_clks",
"MetricGroup": "BigFoot;FetchLat;TopdownL4;tma_L4_group;tma_branch_resteers_group",
"MetricName": "tma_unknown_branches",
"MetricThreshold": "tma_unknown_branches > 0.05 & (tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15))",
@@ -1591,30 +1830,42 @@
},
{
"BriefDescription": "Percentage of cycles in aborted transactions.",
- "MetricExpr": "max(cpu@cycles\\-t@ - cpu@cycles\\-ct@, 0) / cycles",
+ "MetricExpr": "(max(cycles\\-t - cycles\\-ct, 0) / cycles if has_event(cycles\\-t) else 0)",
"MetricGroup": "transaction",
"MetricName": "tsx_aborted_cycles",
"ScaleUnit": "100%"
},
{
"BriefDescription": "Number of cycles within a transaction divided by the number of elisions.",
- "MetricExpr": "cpu@cycles\\-t@ / cpu@el\\-start@",
+ "MetricExpr": "(cycles\\-t / el\\-start if has_event(cycles\\-t) else 0)",
"MetricGroup": "transaction",
"MetricName": "tsx_cycles_per_elision",
"ScaleUnit": "1cycles / elision"
},
{
"BriefDescription": "Number of cycles within a transaction divided by the number of transactions.",
- "MetricExpr": "cpu@cycles\\-t@ / cpu@tx\\-start@",
+ "MetricExpr": "(cycles\\-t / tx\\-start if has_event(cycles\\-t) else 0)",
"MetricGroup": "transaction",
"MetricName": "tsx_cycles_per_transaction",
"ScaleUnit": "1cycles / transaction"
},
{
"BriefDescription": "Percentage of cycles within a transaction region.",
- "MetricExpr": "cpu@cycles\\-t@ / cycles",
+ "MetricExpr": "(cycles\\-t / cycles if has_event(cycles\\-t) else 0)",
"MetricGroup": "transaction",
"MetricName": "tsx_transactional_cycles",
"ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Uncore operating frequency in GHz",
+ "MetricExpr": "UNC_CHA_CLOCKTICKS / (source_count(UNC_CHA_CLOCKTICKS) * #num_packages) / 1e9 / duration_time",
+ "MetricName": "uncore_frequency",
+ "ScaleUnit": "1GHz"
+ },
+ {
+ "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data transmit bandwidth (MB/sec)",
+ "MetricExpr": "UNC_UPI_TxL_FLITS.ALL_DATA * 7.111111111111111 / 1e6 / duration_time",
+ "MetricName": "upi_data_transmit_bw",
+ "ScaleUnit": "1MB/s"
}
]
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/floating-point.json b/tools/perf/pmu-events/arch/x86/cascadelakex/floating-point.json
index 1f46e6b33856..bb4d5101f962 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/floating-point.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/floating-point.json
@@ -32,6 +32,14 @@
"UMask": "0x20"
},
{
+ "BriefDescription": "Number of SSE/AVX computational 128-bit packed single and 256-bit packed double precision FP instructions retired; some instructions will count twice as noted below. Each count represents 2 or/and 4 computation operations, 1 for each element. Applies to SSE* and AVX* packed single precision and packed double precision FP instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB count twice as they perform 2 calculations per element.",
+ "EventCode": "0xC7",
+ "EventName": "FP_ARITH_INST_RETIRED.4_FLOPS",
+ "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision and 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 or/and 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point and packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x18"
+ },
+ {
"BriefDescription": "Number of SSE/AVX computational 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
"EventCode": "0xC7",
"EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE",
@@ -48,6 +56,22 @@
"UMask": "0x80"
},
{
+ "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision and 512-bit packed double precision FP instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, 1 for each element. Applies to SSE* and AVX* packed single precision and double precision FP instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RSQRT14 RCP RCP14 DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB count twice as they perform 2 calculations per element.",
+ "EventCode": "0xC7",
+ "EventName": "FP_ARITH_INST_RETIRED.8_FLOPS",
+ "PublicDescription": "Number of SSE/AVX computational 256-bit packed single precision and 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision and double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RSQRT14 RCP RCP14 DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x18"
+ },
+ {
+ "BriefDescription": "Counts once for most SIMD scalar computational floating-point instructions retired. Counts twice for DPP and FM(N)ADD/SUB instructions retired.",
+ "EventCode": "0xC7",
+ "EventName": "FP_ARITH_INST_RETIRED.SCALAR",
+ "PublicDescription": "Counts once for most SIMD scalar computational single precision and double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SIMD scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x3"
+ },
+ {
"BriefDescription": "Counts once for most SIMD scalar computational double precision floating-point instructions retired. Counts twice for DPP and FM(N)ADD/SUB instructions retired.",
"EventCode": "0xC7",
"EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
@@ -64,6 +88,13 @@
"UMask": "0x2"
},
{
+ "BriefDescription": "Number of any Vector retired FP arithmetic instructions",
+ "EventCode": "0xC7",
+ "EventName": "FP_ARITH_INST_RETIRED.VECTOR",
+ "SampleAfterValue": "2000003",
+ "UMask": "0xfc"
+ },
+ {
"BriefDescription": "Intel AVX-512 computational 512-bit packed BFloat16 instructions retired.",
"EventCode": "0xCF",
"EventName": "FP_ARITH_INST_RETIRED2.128BIT_PACKED_BF16",
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/frontend.json b/tools/perf/pmu-events/arch/x86/cascadelakex/frontend.json
index 04f08e4d2402..095904c77001 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/frontend.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/frontend.json
@@ -8,6 +8,14 @@
"UMask": "0x1"
},
{
+ "BriefDescription": "Stalls caused by changing prefix length of the instruction. [This event is alias to ILD_STALL.LCP]",
+ "EventCode": "0x87",
+ "EventName": "DECODE.LCP",
+ "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk. [This event is alias to ILD_STALL.LCP]",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
+ },
+ {
"BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switches",
"EventCode": "0xAB",
"EventName": "DSB2MITE_SWITCHES.COUNT",
@@ -245,27 +253,34 @@
"UMask": "0x2"
},
{
- "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss.",
+ "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss. [This event is alias to ICACHE_TAG.STALLS]",
"EventCode": "0x83",
"EventName": "ICACHE_64B.IFTAG_STALL",
"SampleAfterValue": "200003",
"UMask": "0x4"
},
{
- "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops",
+ "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss. [This event is alias to ICACHE_64B.IFTAG_STALL]",
+ "EventCode": "0x83",
+ "EventName": "ICACHE_TAG.STALLS",
+ "SampleAfterValue": "200003",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops [This event is alias to IDQ.DSB_CYCLES_OK]",
"CounterMask": "4",
"EventCode": "0x79",
"EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS",
- "PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.",
+ "PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ. [This event is alias to IDQ.DSB_CYCLES_OK]",
"SampleAfterValue": "2000003",
"UMask": "0x18"
},
{
- "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop",
+ "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop [This event is alias to IDQ.DSB_CYCLES_ANY]",
"CounterMask": "1",
"EventCode": "0x79",
"EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS",
- "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.",
+ "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ. [This event is alias to IDQ.DSB_CYCLES_ANY]",
"SampleAfterValue": "2000003",
"UMask": "0x18"
},
@@ -297,6 +312,24 @@
"UMask": "0x8"
},
{
+ "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop [This event is alias to IDQ.ALL_DSB_CYCLES_ANY_UOPS]",
+ "CounterMask": "1",
+ "EventCode": "0x79",
+ "EventName": "IDQ.DSB_CYCLES_ANY",
+ "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ. [This event is alias to IDQ.ALL_DSB_CYCLES_ANY_UOPS]",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x18"
+ },
+ {
+ "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops [This event is alias to IDQ.ALL_DSB_CYCLES_4_UOPS]",
+ "CounterMask": "4",
+ "EventCode": "0x79",
+ "EventName": "IDQ.DSB_CYCLES_OK",
+ "PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ. [This event is alias to IDQ.ALL_DSB_CYCLES_4_UOPS]",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x18"
+ },
+ {
"BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path",
"EventCode": "0x79",
"EventName": "IDQ.DSB_UOPS",
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/metricgroups.json b/tools/perf/pmu-events/arch/x86/cascadelakex/metricgroups.json
new file mode 100644
index 000000000000..bc6a9a4d27a9
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/metricgroups.json
@@ -0,0 +1,114 @@
+{
+ "Backend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Bad": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BadSpec": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BigFoot": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BrMispredicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Branches": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "CacheMisses": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "CodeGen": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Compute": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Cor": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "DSB": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "DSBmiss": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "DataSharing": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Fed": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FetchBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FetchLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Flops": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FpScalar": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FpVector": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Frontend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "HPC": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "IcMiss": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "InsType": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "IoBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "L2Evicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "LSD": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MachineClears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryBound": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryTLB": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Memory_BW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Memory_Lat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MicroSeq": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "OS": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Offcore": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "PGO": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Pipeline": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "PortsUtil": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Power": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Prefetches": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Ret": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Retire": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "SMT": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Server": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Snoop": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "SoC": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Summary": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TmaL1": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TmaL2": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TmaL3mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TopdownL1": "Metrics for top-down breakdown at level 1",
+ "TopdownL2": "Metrics for top-down breakdown at level 2",
+ "TopdownL3": "Metrics for top-down breakdown at level 3",
+ "TopdownL4": "Metrics for top-down breakdown at level 4",
+ "TopdownL5": "Metrics for top-down breakdown at level 5",
+ "TopdownL6": "Metrics for top-down breakdown at level 6",
+ "tma_L1_group": "Metrics for top-down breakdown at level 1",
+ "tma_L2_group": "Metrics for top-down breakdown at level 2",
+ "tma_L3_group": "Metrics for top-down breakdown at level 3",
+ "tma_L4_group": "Metrics for top-down breakdown at level 4",
+ "tma_L5_group": "Metrics for top-down breakdown at level 5",
+ "tma_L6_group": "Metrics for top-down breakdown at level 6",
+ "tma_alu_op_utilization_group": "Metrics contributing to tma_alu_op_utilization category",
+ "tma_backend_bound_group": "Metrics contributing to tma_backend_bound category",
+ "tma_bad_speculation_group": "Metrics contributing to tma_bad_speculation category",
+ "tma_branch_resteers_group": "Metrics contributing to tma_branch_resteers category",
+ "tma_core_bound_group": "Metrics contributing to tma_core_bound category",
+ "tma_dram_bound_group": "Metrics contributing to tma_dram_bound category",
+ "tma_dtlb_load_group": "Metrics contributing to tma_dtlb_load category",
+ "tma_dtlb_store_group": "Metrics contributing to tma_dtlb_store category",
+ "tma_fetch_bandwidth_group": "Metrics contributing to tma_fetch_bandwidth category",
+ "tma_fetch_latency_group": "Metrics contributing to tma_fetch_latency category",
+ "tma_fp_arith_group": "Metrics contributing to tma_fp_arith category",
+ "tma_fp_vector_group": "Metrics contributing to tma_fp_vector category",
+ "tma_frontend_bound_group": "Metrics contributing to tma_frontend_bound category",
+ "tma_heavy_operations_group": "Metrics contributing to tma_heavy_operations category",
+ "tma_issue2P": "Metrics related by the issue $issue2P",
+ "tma_issueBC": "Metrics related by the issue $issueBC",
+ "tma_issueBM": "Metrics related by the issue $issueBM",
+ "tma_issueBW": "Metrics related by the issue $issueBW",
+ "tma_issueD0": "Metrics related by the issue $issueD0",
+ "tma_issueFB": "Metrics related by the issue $issueFB",
+ "tma_issueFL": "Metrics related by the issue $issueFL",
+ "tma_issueL1": "Metrics related by the issue $issueL1",
+ "tma_issueLat": "Metrics related by the issue $issueLat",
+ "tma_issueMC": "Metrics related by the issue $issueMC",
+ "tma_issueMS": "Metrics related by the issue $issueMS",
+ "tma_issueMV": "Metrics related by the issue $issueMV",
+ "tma_issueRFO": "Metrics related by the issue $issueRFO",
+ "tma_issueSL": "Metrics related by the issue $issueSL",
+ "tma_issueSO": "Metrics related by the issue $issueSO",
+ "tma_issueSmSt": "Metrics related by the issue $issueSmSt",
+ "tma_issueSpSt": "Metrics related by the issue $issueSpSt",
+ "tma_issueSyncxn": "Metrics related by the issue $issueSyncxn",
+ "tma_issueTLB": "Metrics related by the issue $issueTLB",
+ "tma_l1_bound_group": "Metrics contributing to tma_l1_bound category",
+ "tma_l3_bound_group": "Metrics contributing to tma_l3_bound category",
+ "tma_light_operations_group": "Metrics contributing to tma_light_operations category",
+ "tma_load_op_utilization_group": "Metrics contributing to tma_load_op_utilization category",
+ "tma_mem_latency_group": "Metrics contributing to tma_mem_latency category",
+ "tma_memory_bound_group": "Metrics contributing to tma_memory_bound category",
+ "tma_microcode_sequencer_group": "Metrics contributing to tma_microcode_sequencer category",
+ "tma_mite_group": "Metrics contributing to tma_mite category",
+ "tma_ports_utilization_group": "Metrics contributing to tma_ports_utilization category",
+ "tma_ports_utilized_0_group": "Metrics contributing to tma_ports_utilized_0 category",
+ "tma_ports_utilized_3m_group": "Metrics contributing to tma_ports_utilized_3m category",
+ "tma_retiring_group": "Metrics contributing to tma_retiring category",
+ "tma_serializing_operation_group": "Metrics contributing to tma_serializing_operation category",
+ "tma_store_bound_group": "Metrics contributing to tma_store_bound category",
+ "tma_store_op_utilization_group": "Metrics contributing to tma_store_op_utilization category"
+}
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/pipeline.json b/tools/perf/pmu-events/arch/x86/cascadelakex/pipeline.json
index 0f06e314fe36..66d686cc933e 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/pipeline.json
@@ -26,12 +26,21 @@
"UMask": "0x4"
},
{
- "BriefDescription": "Conditional branch instructions retired.",
+ "BriefDescription": "Conditional branch instructions retired. [This event is alias to BR_INST_RETIRED.CONDITIONAL]",
+ "Errata": "SKL091",
+ "EventCode": "0xC4",
+ "EventName": "BR_INST_RETIRED.COND",
+ "PublicDescription": "This event counts conditional branch instructions retired. [This event is alias to BR_INST_RETIRED.CONDITIONAL]",
+ "SampleAfterValue": "400009",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Conditional branch instructions retired. [This event is alias to BR_INST_RETIRED.COND]",
"Errata": "SKL091",
"EventCode": "0xC4",
"EventName": "BR_INST_RETIRED.CONDITIONAL",
"PEBS": "1",
- "PublicDescription": "This event counts conditional branch instructions retired.",
+ "PublicDescription": "This event counts conditional branch instructions retired. [This event is alias to BR_INST_RETIRED.COND]",
"SampleAfterValue": "400009",
"UMask": "0x1"
},
@@ -352,10 +361,10 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Stalls caused by changing prefix length of the instruction.",
+ "BriefDescription": "Stalls caused by changing prefix length of the instruction. [This event is alias to DECODE.LCP]",
"EventCode": "0x87",
"EventName": "ILD_STALL.LCP",
- "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.",
+ "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk. [This event is alias to DECODE.LCP]",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -414,6 +423,16 @@
"UMask": "0x1"
},
{
+ "BriefDescription": "Clears speculative count",
+ "CounterMask": "1",
+ "EdgeDetect": "1",
+ "EventCode": "0x0D",
+ "EventName": "INT_MISC.CLEARS_COUNT",
+ "PublicDescription": "Counts the number of speculative clears due to any type of branch misprediction or machine clears",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
+ },
+ {
"BriefDescription": "Cycles the issue-stage is waiting for front-end to fetch from resteered path following branch misprediction or machine clear events.",
"EventCode": "0x0D",
"EventName": "INT_MISC.CLEAR_RESTEER_CYCLES",
@@ -469,11 +488,11 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.",
+ "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder. [This event is alias to LSD.CYCLES_OK]",
"CounterMask": "4",
"EventCode": "0xA8",
"EventName": "LSD.CYCLES_4_UOPS",
- "PublicDescription": "Counts the cycles when 4 uops are delivered by the LSD (Loop-stream detector).",
+ "PublicDescription": "Counts the cycles when 4 uops are delivered by the LSD (Loop-stream detector). [This event is alias to LSD.CYCLES_OK]",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -487,6 +506,15 @@
"UMask": "0x1"
},
{
+ "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder. [This event is alias to LSD.CYCLES_4_UOPS]",
+ "CounterMask": "4",
+ "EventCode": "0xA8",
+ "EventName": "LSD.CYCLES_OK",
+ "PublicDescription": "Counts the cycles when 4 uops are delivered by the LSD (Loop-stream detector). [This event is alias to LSD.CYCLES_4_UOPS]",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
+ },
+ {
"BriefDescription": "Number of Uops delivered by the LSD.",
"EventCode": "0xA8",
"EventName": "LSD.UOPS",
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-interconnect.json
index 725780fb3990..1a342dff1503 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-interconnect.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-interconnect.json
@@ -6606,7 +6606,7 @@
"EventCode": "0x52",
"EventName": "UNC_M3UPI_RxC_HELD.PARALLEL_SUCCESS",
"PerPkg": "1",
- "PublicDescription": "ad and bl messages were actually slotted into the same flit in paralle",
+ "PublicDescription": "ad and bl messages were actually slotted into the same flit in parallel",
"UMask": "0x8",
"Unit": "M3UPI"
},
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-memory.json b/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-memory.json
index f761856d738e..d82d2cca6f0a 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-memory.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-memory.json
@@ -2735,7 +2735,7 @@
"EventCode": "0x81",
"EventName": "UNC_M_WPQ_OCCUPANCY",
"PerPkg": "1",
- "PublicDescription": "Counts the number of entries in the Write Pending Queue (WPQ) at each cycle. This can then be used to calculate both the average queue occupancy (in conjunction with the number of cycles not empty) and the average latency (in conjunction with the number of allocations). The WPQ is used to schedule writes out to the memory controller and to track the requests. Requests allocate into the WPQ soon after they enter the memory controller, and need credits for an entry in this buffer before being sent from the CHA to the iMC (memory controller). They deallocate after being issued to DRAM. Write requests themselves are able to complete (from the perspective of the rest of the system) as soon they have 'posted' to the iMC. This is not to be confused with actually performing the write to DRAM. Therefore, the average latency for this queue is actually not useful for deconstruction intermediate write latencies. So, we provide filtering based on if the request has posted or not. By using the 'not posted' filter, we can track how long writes spent in the iMC before completions were sent to the HA. The 'posted' filter, on the other hand, provides information about how much queueing is actually happening in the iMC for writes before they are actually issued to memory. High average occupancies will generally coincide with high write major mode counts. Is there a filter of sorts?",
+ "PublicDescription": "Counts the number of entries in the Write Pending Queue (WPQ) at each cycle. This can then be used to calculate both the average queue occupancy (in conjunction with the number of cycles not empty) and the average latency (in conjunction with the number of allocations). The WPQ is used to schedule writes out to the memory controller and to track the requests. Requests allocate into the WPQ soon after they enter the memory controller, and need credits for an entry in this buffer before being sent from the CHA to the iMC (memory controller). They deallocate after being issued to DRAM. Write requests themselves are able to complete (from the perspective of the rest of the system) as soon they have 'posted' to the iMC. This is not to be confused with actually performing the write to DRAM. Therefore, the average latency for this queue is actually not useful for deconstruction intermediate write latencies. So, we provide filtering based on if the request has posted or not. By using the 'not posted' filter, we can track how long writes spent in the iMC before completions were sent to the HA. The 'posted' filter, on the other hand, provides information about how much queueing is actually happening in the iMC for writes before they are actually issued to memory. High average occupancies will generally coincide with high write major mode counts.",
"Unit": "iMC"
},
{
diff --git a/tools/perf/pmu-events/arch/x86/elkhartlake/cache.json b/tools/perf/pmu-events/arch/x86/elkhartlake/cache.json
index 0ab90e3bf76b..c6be60584522 100644
--- a/tools/perf/pmu-events/arch/x86/elkhartlake/cache.json
+++ b/tools/perf/pmu-events/arch/x86/elkhartlake/cache.json
@@ -72,6 +72,7 @@
"BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in the L2, LLC, DRAM or MMIO (Non-DRAM).",
"EventCode": "0x34",
"EventName": "MEM_BOUND_STALLS.IFETCH",
+ "PublicDescription": "Counts the number of cycles the core is stalled due to an instruction cache or translation lookaside buffer (TLB) miss which hit in the L2, LLC, DRAM or MMIO (Non-DRAM).",
"SampleAfterValue": "200003",
"UMask": "0x38"
},
@@ -437,6 +438,7 @@
},
{
"BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_AND_L1PF_RD.L3_HIT",
+ "Deprecated": "1",
"EventCode": "0XB7",
"EventName": "OCR.DEMAND_DATA_RD.L3_HIT",
"MSRIndex": "0x1a6,0x1a7",
@@ -446,6 +448,7 @@
},
{
"BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_AND_L1PF_RD.L3_HIT.SNOOP_HITM",
+ "Deprecated": "1",
"EventCode": "0XB7",
"EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM",
"MSRIndex": "0x1a6,0x1a7",
@@ -455,6 +458,7 @@
},
{
"BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_AND_L1PF_RD.L3_HIT.SNOOP_HIT_NO_FWD",
+ "Deprecated": "1",
"EventCode": "0XB7",
"EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
@@ -464,6 +468,7 @@
},
{
"BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_AND_L1PF_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+ "Deprecated": "1",
"EventCode": "0XB7",
"EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
"MSRIndex": "0x1a6,0x1a7",
@@ -473,6 +478,7 @@
},
{
"BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_AND_L1PF_RD.L3_HIT.SNOOP_MISS",
+ "Deprecated": "1",
"EventCode": "0XB7",
"EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_MISS",
"MSRIndex": "0x1a6,0x1a7",
@@ -482,6 +488,7 @@
},
{
"BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_AND_L1PF_RD.L3_HIT.SNOOP_NOT_NEEDED",
+ "Deprecated": "1",
"EventCode": "0XB7",
"EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_NOT_NEEDED",
"MSRIndex": "0x1a6,0x1a7",
diff --git a/tools/perf/pmu-events/arch/x86/elkhartlake/memory.json b/tools/perf/pmu-events/arch/x86/elkhartlake/memory.json
index 18621909d1a9..c02eb0e836ad 100644
--- a/tools/perf/pmu-events/arch/x86/elkhartlake/memory.json
+++ b/tools/perf/pmu-events/arch/x86/elkhartlake/memory.json
@@ -96,6 +96,7 @@
},
{
"BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_AND_L1PF_RD.L3_MISS",
+ "Deprecated": "1",
"EventCode": "0XB7",
"EventName": "OCR.DEMAND_DATA_RD.L3_MISS",
"MSRIndex": "0x1a6,0x1a7",
@@ -105,6 +106,7 @@
},
{
"BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_AND_L1PF_RD.L3_MISS_LOCAL",
+ "Deprecated": "1",
"EventCode": "0XB7",
"EventName": "OCR.DEMAND_DATA_RD.L3_MISS_LOCAL",
"MSRIndex": "0x1a6,0x1a7",
diff --git a/tools/perf/pmu-events/arch/x86/elkhartlake/other.json b/tools/perf/pmu-events/arch/x86/elkhartlake/other.json
index 00ae180ded25..fefbc383b840 100644
--- a/tools/perf/pmu-events/arch/x86/elkhartlake/other.json
+++ b/tools/perf/pmu-events/arch/x86/elkhartlake/other.json
@@ -1,6 +1,7 @@
[
{
"BriefDescription": "This event is deprecated. Refer to new event BUS_LOCK.SELF_LOCKS",
+ "Deprecated": "1",
"EdgeDetect": "1",
"EventCode": "0x63",
"EventName": "BUS_LOCK.ALL",
@@ -16,6 +17,7 @@
},
{
"BriefDescription": "This event is deprecated. Refer to new event BUS_LOCK.BLOCK_CYCLES",
+ "Deprecated": "1",
"EventCode": "0x63",
"EventName": "BUS_LOCK.CYCLES_OTHER_BLOCK",
"SampleAfterValue": "200003",
@@ -23,6 +25,7 @@
},
{
"BriefDescription": "This event is deprecated. Refer to new event BUS_LOCK.LOCK_CYCLES",
+ "Deprecated": "1",
"EventCode": "0x63",
"EventName": "BUS_LOCK.CYCLES_SELF_BLOCK",
"SampleAfterValue": "200003",
@@ -46,6 +49,7 @@
},
{
"BriefDescription": "This event is deprecated. Refer to new event MEM_BOUND_STALLS.LOAD_DRAM_HIT",
+ "Deprecated": "1",
"EventCode": "0x34",
"EventName": "C0_STALLS.LOAD_DRAM_HIT",
"SampleAfterValue": "200003",
@@ -53,6 +57,7 @@
},
{
"BriefDescription": "This event is deprecated. Refer to new event MEM_BOUND_STALLS.LOAD_L2_HIT",
+ "Deprecated": "1",
"EventCode": "0x34",
"EventName": "C0_STALLS.LOAD_L2_HIT",
"SampleAfterValue": "200003",
@@ -60,6 +65,7 @@
},
{
"BriefDescription": "This event is deprecated. Refer to new event MEM_BOUND_STALLS.LOAD_LLC_HIT",
+ "Deprecated": "1",
"EventCode": "0x34",
"EventName": "C0_STALLS.LOAD_LLC_HIT",
"SampleAfterValue": "200003",
@@ -207,6 +213,7 @@
},
{
"BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_AND_L1PF_RD.ANY_RESPONSE",
+ "Deprecated": "1",
"EventCode": "0XB7",
"EventName": "OCR.DEMAND_DATA_RD.ANY_RESPONSE",
"MSRIndex": "0x1a6,0x1a7",
@@ -216,6 +223,7 @@
},
{
"BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_AND_L1PF_RD.DRAM",
+ "Deprecated": "1",
"EventCode": "0XB7",
"EventName": "OCR.DEMAND_DATA_RD.DRAM",
"MSRIndex": "0x1a6,0x1a7",
@@ -225,6 +233,7 @@
},
{
"BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_AND_L1PF_RD.LOCAL_DRAM",
+ "Deprecated": "1",
"EventCode": "0XB7",
"EventName": "OCR.DEMAND_DATA_RD.LOCAL_DRAM",
"MSRIndex": "0x1a6,0x1a7",
@@ -234,6 +243,7 @@
},
{
"BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_AND_L1PF_RD.OUTSTANDING",
+ "Deprecated": "1",
"EventCode": "0XB7",
"EventName": "OCR.DEMAND_DATA_RD.OUTSTANDING",
"MSRIndex": "0x1a6",
diff --git a/tools/perf/pmu-events/arch/x86/elkhartlake/pipeline.json b/tools/perf/pmu-events/arch/x86/elkhartlake/pipeline.json
index 9dd8c909facc..c483c0838e08 100644
--- a/tools/perf/pmu-events/arch/x86/elkhartlake/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/elkhartlake/pipeline.json
@@ -165,6 +165,7 @@
},
{
"BriefDescription": "This event is deprecated.",
+ "Deprecated": "1",
"EventCode": "0xcd",
"EventName": "CYCLES_DIV_BUSY.ANY",
"SampleAfterValue": "2000003"
@@ -283,6 +284,7 @@
},
{
"BriefDescription": "This event is deprecated. Refer to new event TOPDOWN_BAD_SPECULATION.FASTNUKE",
+ "Deprecated": "1",
"EventCode": "0x73",
"EventName": "TOPDOWN_BAD_SPECULATION.MONUKE",
"SampleAfterValue": "1000003",
@@ -338,6 +340,7 @@
},
{
"BriefDescription": "This event is deprecated.",
+ "Deprecated": "1",
"EventCode": "0x74",
"EventName": "TOPDOWN_BE_BOUND.STORE_BUFFER",
"SampleAfterValue": "1000003",
diff --git a/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json b/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json
index 9570a88d6d1c..79d89c263677 100644
--- a/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json
@@ -50,7 +50,7 @@
},
{
"BriefDescription": "Uncore frequency per die [GHZ]",
- "MetricExpr": "tma_info_socket_clks / #num_dies / duration_time / 1e9",
+ "MetricExpr": "tma_info_system_socket_clks / #num_dies / duration_time / 1e9",
"MetricGroup": "SoC",
"MetricName": "UNCORE_FREQ"
},
@@ -71,7 +71,7 @@
},
{
"BriefDescription": "This metric estimates how often memory load accesses were aliased by preceding stores (in program order) with a 4K address offset",
- "MetricExpr": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS / tma_info_clks",
+ "MetricExpr": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_4k_aliasing",
"MetricThreshold": "tma_4k_aliasing > 0.2 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -81,7 +81,7 @@
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution ports for ALU operations.",
"MetricConstraint": "NO_GROUP_EVENTS_NMI",
- "MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_0 + UOPS_DISPATCHED_PORT.PORT_1 + UOPS_DISPATCHED_PORT.PORT_5 + UOPS_DISPATCHED_PORT.PORT_6) / tma_info_slots",
+ "MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_0 + UOPS_DISPATCHED_PORT.PORT_1 + UOPS_DISPATCHED_PORT.PORT_5 + UOPS_DISPATCHED_PORT.PORT_6) / tma_info_thread_slots",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_alu_op_utilization",
"MetricThreshold": "tma_alu_op_utilization > 0.6",
@@ -89,7 +89,7 @@
},
{
"BriefDescription": "This metric estimates fraction of slots the CPU retired uops delivered by the Microcode_Sequencer as a result of Assists",
- "MetricExpr": "100 * OTHER_ASSISTS.ANY_WB_ASSIST / tma_info_slots",
+ "MetricExpr": "100 * OTHER_ASSISTS.ANY_WB_ASSIST / tma_info_thread_slots",
"MetricGroup": "TopdownL4;tma_L4_group;tma_microcode_sequencer_group",
"MetricName": "tma_assists",
"MetricThreshold": "tma_assists > 0.1 & (tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1)",
@@ -109,7 +109,7 @@
},
{
"BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations",
- "MetricExpr": "(UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (INT_MISC.RECOVERY_CYCLES_ANY / 2 if #SMT_on else INT_MISC.RECOVERY_CYCLES)) / tma_info_slots",
+ "MetricExpr": "(UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (INT_MISC.RECOVERY_CYCLES_ANY / 2 if #SMT_on else INT_MISC.RECOVERY_CYCLES)) / tma_info_thread_slots",
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_bad_speculation",
"MetricThreshold": "tma_bad_speculation > 0.15",
@@ -125,12 +125,12 @@
"MetricName": "tma_branch_mispredicts",
"MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15",
"MetricgroupNoGroup": "TopdownL2",
- "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES. Related metrics: tma_info_branch_misprediction_cost, tma_mispredicts_resteers",
+ "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES. Related metrics: tma_info_bad_spec_branch_misprediction_cost, tma_mispredicts_resteers",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers",
- "MetricExpr": "12 * (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT + BACLEARS.ANY) / tma_info_clks",
+ "MetricExpr": "12 * (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT + BACLEARS.ANY) / tma_info_thread_clks",
"MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_branch_resteers",
"MetricThreshold": "tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -150,7 +150,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to contested accesses",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(60 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_RETIRED.L3_MISS))) + 43 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_RETIRED.L3_MISS)))) / tma_info_clks",
+ "MetricExpr": "(60 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_RETIRED.L3_MISS))) + 43 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_RETIRED.L3_MISS)))) / tma_info_thread_clks",
"MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
"MetricName": "tma_contested_accesses",
"MetricThreshold": "tma_contested_accesses > 0.05 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -171,7 +171,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to data-sharing accesses",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "43 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_RETIRED.L3_MISS))) / tma_info_clks",
+ "MetricExpr": "43 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_RETIRED.L3_MISS))) / tma_info_thread_clks",
"MetricGroup": "Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
"MetricName": "tma_data_sharing",
"MetricThreshold": "tma_data_sharing > 0.05 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -180,7 +180,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where the Divider unit was active",
- "MetricExpr": "10 * ARITH.DIVIDER_UOPS / tma_info_core_clks",
+ "MetricExpr": "10 * ARITH.DIVIDER_UOPS / tma_info_core_core_clks",
"MetricGroup": "TopdownL3;tma_L3_group;tma_core_bound_group",
"MetricName": "tma_divider",
"MetricThreshold": "tma_divider > 0.2 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
@@ -190,7 +190,7 @@
{
"BriefDescription": "This metric estimates how often the CPU was stalled on accesses to external memory (DRAM) by loads",
"MetricConstraint": "NO_GROUP_EVENTS_SMT",
- "MetricExpr": "(1 - MEM_LOAD_UOPS_RETIRED.L3_HIT / (MEM_LOAD_UOPS_RETIRED.L3_HIT + 7 * MEM_LOAD_UOPS_RETIRED.L3_MISS)) * CYCLE_ACTIVITY.STALLS_L2_PENDING / tma_info_clks",
+ "MetricExpr": "(1 - MEM_LOAD_UOPS_RETIRED.L3_HIT / (MEM_LOAD_UOPS_RETIRED.L3_HIT + 7 * MEM_LOAD_UOPS_RETIRED.L3_MISS)) * CYCLE_ACTIVITY.STALLS_L2_PENDING / tma_info_thread_clks",
"MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_dram_bound",
"MetricThreshold": "tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -199,25 +199,25 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline",
- "MetricExpr": "(IDQ.ALL_DSB_CYCLES_ANY_UOPS - IDQ.ALL_DSB_CYCLES_4_UOPS) / tma_info_core_clks / 2",
+ "MetricExpr": "(IDQ.ALL_DSB_CYCLES_ANY_UOPS - IDQ.ALL_DSB_CYCLES_4_UOPS) / tma_info_core_core_clks / 2",
"MetricGroup": "DSB;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_dsb",
- "MetricThreshold": "tma_dsb > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35)",
+ "MetricThreshold": "tma_dsb > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35)",
"PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline. For example; inefficient utilization of the DSB cache structure or bank conflict when reading from it; are categorized here.",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines",
- "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / tma_info_clks",
+ "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / tma_info_thread_clks",
"MetricGroup": "DSBmiss;FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB",
"MetricName": "tma_dsb_switches",
"MetricThreshold": "tma_dsb_switches > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
- "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines. The DSB (decoded i-cache) is a Uop Cache where the front-end directly delivers Uops (micro operations) avoiding heavy x86 decoding. The DSB pipeline has shorter latency and delivered higher bandwidth than the MITE (legacy instruction decode pipeline). Switching between the two pipelines can cause penalties hence this metric measures the exposed penalty. Related metrics: tma_fetch_bandwidth, tma_info_dsb_coverage, tma_info_iptb, tma_lcp",
+ "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines. The DSB (decoded i-cache) is a Uop Cache where the front-end directly delivers Uops (micro operations) avoiding heavy x86 decoding. The DSB pipeline has shorter latency and delivered higher bandwidth than the MITE (legacy instruction decode pipeline). Switching between the two pipelines can cause penalties hence this metric measures the exposed penalty. Related metrics: tma_fetch_bandwidth, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses",
- "MetricExpr": "(8 * DTLB_LOAD_MISSES.STLB_HIT + DTLB_LOAD_MISSES.WALK_DURATION) / tma_info_clks",
+ "MetricExpr": "(8 * DTLB_LOAD_MISSES.STLB_HIT + DTLB_LOAD_MISSES.WALK_DURATION) / tma_info_thread_clks",
"MetricGroup": "MemoryTLB;TopdownL4;tma_L4_group;tma_issueTLB;tma_l1_bound_group",
"MetricName": "tma_dtlb_load",
"MetricThreshold": "tma_dtlb_load > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -226,7 +226,7 @@
},
{
"BriefDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses",
- "MetricExpr": "(8 * DTLB_STORE_MISSES.STLB_HIT + DTLB_STORE_MISSES.WALK_DURATION) / tma_info_clks",
+ "MetricExpr": "(8 * DTLB_STORE_MISSES.STLB_HIT + DTLB_STORE_MISSES.WALK_DURATION) / tma_info_thread_clks",
"MetricGroup": "MemoryTLB;TopdownL4;tma_L4_group;tma_issueTLB;tma_store_bound_group",
"MetricName": "tma_dtlb_store",
"MetricThreshold": "tma_dtlb_store > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -235,7 +235,7 @@
},
{
"BriefDescription": "This metric roughly estimates how often CPU was handling synchronizations due to False Sharing",
- "MetricExpr": "60 * OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HITM_OTHER_CORE / tma_info_clks",
+ "MetricExpr": "60 * OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HITM_OTHER_CORE / tma_info_thread_clks",
"MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_store_bound_group",
"MetricName": "tma_false_sharing",
"MetricThreshold": "tma_false_sharing > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -245,11 +245,11 @@
{
"BriefDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "tma_info_load_miss_real_latency * cpu@L1D_PEND_MISS.REQUEST_FB_FULL\\,cmask\\=1@ / tma_info_clks",
+ "MetricExpr": "tma_info_memory_load_miss_real_latency * cpu@L1D_PEND_MISS.REQUEST_FB_FULL\\,cmask\\=1@ / tma_info_thread_clks",
"MetricGroup": "MemoryBW;TopdownL4;tma_L4_group;tma_issueBW;tma_issueSL;tma_issueSmSt;tma_l1_bound_group",
"MetricName": "tma_fb_full",
"MetricThreshold": "tma_fb_full > 0.3",
- "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_info_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
+ "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
"ScaleUnit": "100%"
},
{
@@ -257,14 +257,14 @@
"MetricExpr": "tma_frontend_bound - tma_fetch_latency",
"MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB",
"MetricName": "tma_fetch_bandwidth",
- "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35",
+ "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35",
"MetricgroupNoGroup": "TopdownL2",
- "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Related metrics: tma_dsb_switches, tma_info_dsb_coverage, tma_info_iptb, tma_lcp",
+ "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Related metrics: tma_dsb_switches, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues",
- "MetricExpr": "4 * min(CPU_CLK_UNHALTED.THREAD, IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE) / tma_info_slots",
+ "MetricExpr": "4 * min(CPU_CLK_UNHALTED.THREAD, IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE) / tma_info_thread_slots",
"MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group",
"MetricName": "tma_fetch_latency",
"MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15",
@@ -274,7 +274,7 @@
},
{
"BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend",
- "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / tma_info_slots",
+ "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / tma_info_thread_slots",
"MetricGroup": "PGO;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_frontend_bound",
"MetricThreshold": "tma_frontend_bound > 0.15",
@@ -294,324 +294,324 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to instruction cache misses.",
- "MetricExpr": "ICACHE.IFDATA_STALL / tma_info_clks",
+ "MetricExpr": "ICACHE.IFDATA_STALL / tma_info_thread_clks",
"MetricGroup": "BigFoot;FetchLat;IcMiss;TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_icache_misses",
"MetricThreshold": "tma_icache_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
"ScaleUnit": "100%"
},
{
- "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
- "MetricExpr": "tma_info_turbo_utilization * TSC / 1e9 / duration_time",
- "MetricGroup": "Power;Summary",
- "MetricName": "tma_info_average_frequency"
- },
- {
- "BriefDescription": "Branch instructions per taken branch.",
- "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
- "MetricGroup": "Branches;Fed;PGO",
- "MetricName": "tma_info_bptkbranch"
+ "BriefDescription": "Instructions per retired mispredicts for indirect CALL or JMP branches (lower number means higher occurrence rate).",
+ "MetricExpr": "tma_info_inst_mix_instructions / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * cpu@BR_MISP_EXEC.ALL_BRANCHES\\,umask\\=0xE4@)",
+ "MetricGroup": "Bad;BrMispredicts",
+ "MetricName": "tma_info_bad_spec_ipmisp_indirect",
+ "MetricThreshold": "tma_info_bad_spec_ipmisp_indirect < 1e3"
},
{
- "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
- "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
- "MetricGroup": "Pipeline",
- "MetricName": "tma_info_clks"
+ "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear) (lower number means higher occurrence rate)",
+ "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;BadSpec;BrMispredicts",
+ "MetricName": "tma_info_bad_spec_ipmispredict",
+ "MetricThreshold": "tma_info_bad_spec_ipmispredict < 200"
},
{
"BriefDescription": "Core actual clocks when any Logical Processor is active on the Physical Core",
- "MetricExpr": "(CPU_CLK_UNHALTED.THREAD / 2 * (1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK) if #core_wide < 1 else (CPU_CLK_UNHALTED.THREAD_ANY / 2 if #SMT_on else tma_info_clks))",
+ "MetricExpr": "(CPU_CLK_UNHALTED.THREAD / 2 * (1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK) if #core_wide < 1 else (CPU_CLK_UNHALTED.THREAD_ANY / 2 if #SMT_on else tma_info_thread_clks))",
"MetricGroup": "SMT",
- "MetricName": "tma_info_core_clks"
+ "MetricName": "tma_info_core_core_clks"
},
{
"BriefDescription": "Instructions Per Cycle across hyper-threads (per physical core)",
- "MetricExpr": "INST_RETIRED.ANY / tma_info_core_clks",
+ "MetricExpr": "INST_RETIRED.ANY / tma_info_core_core_clks",
"MetricGroup": "Ret;SMT;TmaL1;tma_L1_group",
- "MetricName": "tma_info_coreipc"
+ "MetricName": "tma_info_core_coreipc"
},
{
- "BriefDescription": "Cycles Per Instruction (per Logical Processor)",
- "MetricExpr": "1 / tma_info_ipc",
- "MetricGroup": "Mem;Pipeline",
- "MetricName": "tma_info_cpi"
- },
- {
- "BriefDescription": "Average CPU Utilization",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
- "MetricGroup": "HPC;Summary",
- "MetricName": "tma_info_cpu_utilization"
- },
- {
- "BriefDescription": "Average Parallel L2 cache miss data reads",
- "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
- "MetricGroup": "Memory_BW;Offcore",
- "MetricName": "tma_info_data_l2_mlp"
- },
- {
- "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
- "MetricExpr": "64 * (UNC_ARB_TRK_REQUESTS.ALL + UNC_ARB_COH_TRK_REQUESTS.ALL) / 1e6 / duration_time / 1e3",
- "MetricGroup": "HPC;Mem;MemoryBW;SoC;tma_issueBW",
- "MetricName": "tma_info_dram_bw_use",
- "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_fb_full, tma_mem_bandwidth, tma_sq_full"
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
+ "MetricExpr": "(UOPS_EXECUTED.CORE / 2 / (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@) if #SMT_on else UOPS_EXECUTED.CORE / (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@))",
+ "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
+ "MetricName": "tma_info_core_ilp"
},
{
"BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
"MetricExpr": "IDQ.DSB_UOPS / (IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)",
"MetricGroup": "DSB;Fed;FetchBW;tma_issueFB",
- "MetricName": "tma_info_dsb_coverage",
- "MetricThreshold": "tma_info_dsb_coverage < 0.7 & tma_info_ipc / 4 > 0.35",
- "PublicDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache). Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_iptb, tma_lcp"
+ "MetricName": "tma_info_frontend_dsb_coverage",
+ "MetricThreshold": "tma_info_frontend_dsb_coverage < 0.7 & tma_info_thread_ipc / 4 > 0.35",
+ "PublicDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache). Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_inst_mix_iptb, tma_lcp"
},
{
- "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
- "MetricExpr": "(UOPS_EXECUTED.CORE / 2 / (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@) if #SMT_on else UOPS_EXECUTED.CORE / (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@))",
- "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
- "MetricName": "tma_info_ilp"
+ "BriefDescription": "Instructions per speculative Unknown Branch Misprediction (BAClear) (lower number means higher occurrence rate)",
+ "MetricExpr": "tma_info_inst_mix_instructions / BACLEARS.ANY",
+ "MetricGroup": "Fed",
+ "MetricName": "tma_info_frontend_ipunknown_branch"
+ },
+ {
+ "BriefDescription": "Branch instructions per taken branch.",
+ "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
+ "MetricGroup": "Branches;Fed;PGO",
+ "MetricName": "tma_info_inst_mix_bptkbranch"
},
{
"BriefDescription": "Total number of retired Instructions",
"MetricExpr": "INST_RETIRED.ANY",
"MetricGroup": "Summary;TmaL1;tma_L1_group",
- "MetricName": "tma_info_instructions",
+ "MetricName": "tma_info_inst_mix_instructions",
"PublicDescription": "Total number of retired Instructions. Sample with: INST_RETIRED.PREC_DIST"
},
{
"BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
"MetricGroup": "Branches;Fed;InsType",
- "MetricName": "tma_info_ipbranch",
- "MetricThreshold": "tma_info_ipbranch < 8"
- },
- {
- "BriefDescription": "Instructions Per Cycle (per Logical Processor)",
- "MetricExpr": "INST_RETIRED.ANY / tma_info_clks",
- "MetricGroup": "Ret;Summary",
- "MetricName": "tma_info_ipc"
+ "MetricName": "tma_info_inst_mix_ipbranch",
+ "MetricThreshold": "tma_info_inst_mix_ipbranch < 8"
},
{
"BriefDescription": "Instructions per (near) call (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL",
"MetricGroup": "Branches;Fed;PGO",
- "MetricName": "tma_info_ipcall",
- "MetricThreshold": "tma_info_ipcall < 200"
- },
- {
- "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
- "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u",
- "MetricGroup": "Branches;OS",
- "MetricName": "tma_info_ipfarbranch",
- "MetricThreshold": "tma_info_ipfarbranch < 1e6"
+ "MetricName": "tma_info_inst_mix_ipcall",
+ "MetricThreshold": "tma_info_inst_mix_ipcall < 200"
},
{
"BriefDescription": "Instructions per Load (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_LOADS",
"MetricGroup": "InsType",
- "MetricName": "tma_info_ipload",
- "MetricThreshold": "tma_info_ipload < 3"
- },
- {
- "BriefDescription": "Instructions per retired mispredicts for indirect CALL or JMP branches (lower number means higher occurrence rate).",
- "MetricExpr": "tma_info_instructions / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * cpu@BR_MISP_EXEC.ALL_BRANCHES\\,umask\\=0xE4@)",
- "MetricGroup": "Bad;BrMispredicts",
- "MetricName": "tma_info_ipmisp_indirect",
- "MetricThreshold": "tma_info_ipmisp_indirect < 1e3"
- },
- {
- "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear) (lower number means higher occurrence rate)",
- "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
- "MetricGroup": "Bad;BadSpec;BrMispredicts",
- "MetricName": "tma_info_ipmispredict",
- "MetricThreshold": "tma_info_ipmispredict < 200"
+ "MetricName": "tma_info_inst_mix_ipload",
+ "MetricThreshold": "tma_info_inst_mix_ipload < 3"
},
{
"BriefDescription": "Instructions per Store (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_STORES",
"MetricGroup": "InsType",
- "MetricName": "tma_info_ipstore",
- "MetricThreshold": "tma_info_ipstore < 8"
+ "MetricName": "tma_info_inst_mix_ipstore",
+ "MetricThreshold": "tma_info_inst_mix_ipstore < 8"
},
{
"BriefDescription": "Instruction per taken branch",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN",
"MetricGroup": "Branches;Fed;FetchBW;Frontend;PGO;tma_issueFB",
- "MetricName": "tma_info_iptb",
- "MetricThreshold": "tma_info_iptb < 9",
- "PublicDescription": "Instruction per taken branch. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_dsb_coverage, tma_lcp"
- },
- {
- "BriefDescription": "Instructions per speculative Unknown Branch Misprediction (BAClear) (lower number means higher occurrence rate)",
- "MetricExpr": "tma_info_instructions / BACLEARS.ANY",
- "MetricGroup": "Fed",
- "MetricName": "tma_info_ipunknown_branch"
- },
- {
- "BriefDescription": "Cycles Per Instruction for the Operating System (OS) Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / INST_RETIRED.ANY_P:k",
- "MetricGroup": "OS",
- "MetricName": "tma_info_kernel_cpi"
- },
- {
- "BriefDescription": "Fraction of cycles spent in the Operating System (OS) Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / CPU_CLK_UNHALTED.THREAD",
- "MetricGroup": "OS",
- "MetricName": "tma_info_kernel_utilization",
- "MetricThreshold": "tma_info_kernel_utilization > 0.05"
+ "MetricName": "tma_info_inst_mix_iptb",
+ "MetricThreshold": "tma_info_inst_mix_iptb < 9",
+ "PublicDescription": "Instruction per taken branch. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_frontend_dsb_coverage, tma_lcp"
},
{
"BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
"MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / duration_time",
"MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l1d_cache_fill_bw"
- },
- {
- "BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
- "MetricExpr": "tma_info_l1d_cache_fill_bw",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l1d_cache_fill_bw_1t"
- },
- {
- "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
- "MetricExpr": "1e3 * MEM_LOAD_UOPS_RETIRED.L1_MISS / INST_RETIRED.ANY",
- "MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l1mpki"
+ "MetricName": "tma_info_memory_core_l1d_cache_fill_bw"
},
{
"BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
"MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / duration_time",
"MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l2_cache_fill_bw"
+ "MetricName": "tma_info_memory_core_l2_cache_fill_bw"
},
{
- "BriefDescription": "Average per-thread data fill bandwidth to the L2 cache [GB / sec]",
- "MetricExpr": "tma_info_l2_cache_fill_bw",
+ "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
"MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l2_cache_fill_bw_1t"
+ "MetricName": "tma_info_memory_core_l3_cache_fill_bw"
+ },
+ {
+ "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
+ "MetricExpr": "1e3 * MEM_LOAD_UOPS_RETIRED.L1_MISS / INST_RETIRED.ANY",
+ "MetricGroup": "CacheMisses;Mem",
+ "MetricName": "tma_info_memory_l1mpki"
},
{
"BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads",
"MetricExpr": "1e3 * MEM_LOAD_UOPS_RETIRED.L2_MISS / INST_RETIRED.ANY",
"MetricGroup": "Backend;CacheMisses;Mem",
- "MetricName": "tma_info_l2mpki"
+ "MetricName": "tma_info_memory_l2mpki"
},
{
- "BriefDescription": "Average per-thread data access bandwidth to the L3 cache [GB / sec]",
- "MetricExpr": "0",
- "MetricGroup": "Mem;MemoryBW;Offcore",
- "MetricName": "tma_info_l3_cache_access_bw_1t"
+ "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
+ "MetricExpr": "1e3 * MEM_LOAD_UOPS_RETIRED.L3_MISS / INST_RETIRED.ANY",
+ "MetricGroup": "CacheMisses;Mem",
+ "MetricName": "tma_info_memory_l3mpki"
},
{
- "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
- "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l3_cache_fill_bw"
+ "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / (MEM_LOAD_UOPS_RETIRED.L1_MISS + MEM_LOAD_UOPS_RETIRED.HIT_LFB)",
+ "MetricGroup": "Mem;MemoryBound;MemoryLat",
+ "MetricName": "tma_info_memory_load_miss_real_latency"
},
{
- "BriefDescription": "Average per-thread data fill bandwidth to the L3 cache [GB / sec]",
- "MetricExpr": "tma_info_l3_cache_fill_bw",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l3_cache_fill_bw_1t"
+ "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
+ "MetricGroup": "Mem;MemoryBW;MemoryBound",
+ "MetricName": "tma_info_memory_mlp",
+ "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)"
},
{
- "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
- "MetricExpr": "1e3 * MEM_LOAD_UOPS_RETIRED.L3_MISS / INST_RETIRED.ANY",
- "MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l3mpki"
+ "BriefDescription": "Average Parallel L2 cache miss data reads",
+ "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+ "MetricGroup": "Memory_BW;Offcore",
+ "MetricName": "tma_info_memory_oro_data_l2_mlp"
},
{
"BriefDescription": "Average Latency for L2 cache miss demand Loads",
"MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS.DEMAND_DATA_RD",
"MetricGroup": "Memory_Lat;Offcore",
- "MetricName": "tma_info_load_l2_miss_latency"
+ "MetricName": "tma_info_memory_oro_load_l2_miss_latency"
},
{
"BriefDescription": "Average Parallel L2 cache miss demand Loads",
"MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
"MetricGroup": "Memory_BW;Offcore",
- "MetricName": "tma_info_load_l2_mlp"
+ "MetricName": "tma_info_memory_oro_load_l2_mlp"
},
{
- "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
- "MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "L1D_PEND_MISS.PENDING / (MEM_LOAD_UOPS_RETIRED.L1_MISS + MEM_LOAD_UOPS_RETIRED.HIT_LFB)",
- "MetricGroup": "Mem;MemoryBound;MemoryLat",
- "MetricName": "tma_info_load_miss_real_latency"
+ "BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_core_l1d_cache_fill_bw",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_thread_l1d_cache_fill_bw_1t"
},
{
- "BriefDescription": "Average number of parallel requests to external memory",
- "MetricExpr": "UNC_ARB_TRK_OCCUPANCY.ALL / UNC_ARB_TRK_OCCUPANCY.CYCLES_WITH_ANY_REQUEST",
- "MetricGroup": "Mem;SoC",
- "MetricName": "tma_info_mem_parallel_requests",
- "PublicDescription": "Average number of parallel requests to external memory. Accounts for all requests"
+ "BriefDescription": "Average per-thread data fill bandwidth to the L2 cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_core_l2_cache_fill_bw",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_thread_l2_cache_fill_bw_1t"
},
{
- "BriefDescription": "Average latency of all requests to external memory (in Uncore cycles)",
- "MetricExpr": "UNC_ARB_TRK_OCCUPANCY.ALL / UNC_ARB_TRK_REQUESTS.ALL",
- "MetricGroup": "Mem;SoC",
- "MetricName": "tma_info_mem_request_latency"
+ "BriefDescription": "Average per-thread data access bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "0",
+ "MetricGroup": "Mem;MemoryBW;Offcore",
+ "MetricName": "tma_info_memory_thread_l3_cache_access_bw_1t"
},
{
- "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
- "MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
- "MetricGroup": "Mem;MemoryBW;MemoryBound",
- "MetricName": "tma_info_mlp",
- "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)"
+ "BriefDescription": "Average per-thread data fill bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_core_l3_cache_fill_bw",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_thread_l3_cache_fill_bw_1t"
},
{
"BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
- "MetricExpr": "(ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION) / tma_info_core_clks",
+ "MetricExpr": "(ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION) / tma_info_core_core_clks",
"MetricGroup": "Mem;MemoryTLB",
- "MetricName": "tma_info_page_walks_utilization",
- "MetricThreshold": "tma_info_page_walks_utilization > 0.5"
+ "MetricName": "tma_info_memory_tlb_page_walks_utilization",
+ "MetricThreshold": "tma_info_memory_tlb_page_walks_utilization > 0.5"
},
{
"BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
"MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / cpu@UOPS_RETIRED.RETIRE_SLOTS\\,cmask\\=1@",
"MetricGroup": "Pipeline;Ret",
- "MetricName": "tma_info_retire"
+ "MetricName": "tma_info_pipeline_retire"
},
{
- "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
- "MetricExpr": "4 * tma_info_core_clks",
- "MetricGroup": "TmaL1;tma_L1_group",
- "MetricName": "tma_info_slots"
+ "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
+ "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / duration_time",
+ "MetricGroup": "Power;Summary",
+ "MetricName": "tma_info_system_average_frequency"
+ },
+ {
+ "BriefDescription": "Average CPU Utilization",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricGroup": "HPC;Summary",
+ "MetricName": "tma_info_system_cpu_utilization"
+ },
+ {
+ "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
+ "MetricExpr": "64 * (UNC_ARB_TRK_REQUESTS.ALL + UNC_ARB_COH_TRK_REQUESTS.ALL) / 1e6 / duration_time / 1e3",
+ "MetricGroup": "HPC;Mem;MemoryBW;SoC;tma_issueBW",
+ "MetricName": "tma_info_system_dram_bw_use",
+ "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_fb_full, tma_mem_bandwidth, tma_sq_full"
+ },
+ {
+ "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
+ "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u",
+ "MetricGroup": "Branches;OS",
+ "MetricName": "tma_info_system_ipfarbranch",
+ "MetricThreshold": "tma_info_system_ipfarbranch < 1e6"
+ },
+ {
+ "BriefDescription": "Cycles Per Instruction for the Operating System (OS) Kernel mode",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / INST_RETIRED.ANY_P:k",
+ "MetricGroup": "OS",
+ "MetricName": "tma_info_system_kernel_cpi"
+ },
+ {
+ "BriefDescription": "Fraction of cycles spent in the Operating System (OS) Kernel mode",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "OS",
+ "MetricName": "tma_info_system_kernel_utilization",
+ "MetricThreshold": "tma_info_system_kernel_utilization > 0.05"
+ },
+ {
+ "BriefDescription": "Average number of parallel requests to external memory",
+ "MetricExpr": "UNC_ARB_TRK_OCCUPANCY.ALL / UNC_ARB_TRK_OCCUPANCY.CYCLES_WITH_ANY_REQUEST",
+ "MetricGroup": "Mem;SoC",
+ "MetricName": "tma_info_system_mem_parallel_requests",
+ "PublicDescription": "Average number of parallel requests to external memory. Accounts for all requests"
+ },
+ {
+ "BriefDescription": "Average latency of all requests to external memory (in Uncore cycles)",
+ "MetricExpr": "UNC_ARB_TRK_OCCUPANCY.ALL / UNC_ARB_TRK_REQUESTS.ALL",
+ "MetricGroup": "Mem;SoC",
+ "MetricName": "tma_info_system_mem_request_latency"
},
{
"BriefDescription": "Fraction of cycles where both hardware Logical Processors were active",
"MetricExpr": "(1 - CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / (CPU_CLK_UNHALTED.REF_XCLK_ANY / 2) if #SMT_on else 0)",
"MetricGroup": "SMT",
- "MetricName": "tma_info_smt_2t_utilization"
+ "MetricName": "tma_info_system_smt_2t_utilization"
},
{
"BriefDescription": "Socket actual clocks when any core is active on that socket",
"MetricExpr": "UNC_CLOCK.SOCKET",
"MetricGroup": "SoC",
- "MetricName": "tma_info_socket_clks"
+ "MetricName": "tma_info_system_socket_clks"
},
{
"BriefDescription": "Average Frequency Utilization relative nominal frequency",
- "MetricExpr": "tma_info_clks / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricExpr": "tma_info_thread_clks / CPU_CLK_UNHALTED.REF_TSC",
"MetricGroup": "Power",
- "MetricName": "tma_info_turbo_utilization"
+ "MetricName": "tma_info_system_turbo_utilization"
+ },
+ {
+ "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "Pipeline",
+ "MetricName": "tma_info_thread_clks"
+ },
+ {
+ "BriefDescription": "Cycles Per Instruction (per Logical Processor)",
+ "MetricExpr": "1 / tma_info_thread_ipc",
+ "MetricGroup": "Mem;Pipeline",
+ "MetricName": "tma_info_thread_cpi"
+ },
+ {
+ "BriefDescription": "Instructions Per Cycle (per Logical Processor)",
+ "MetricExpr": "INST_RETIRED.ANY / tma_info_thread_clks",
+ "MetricGroup": "Ret;Summary",
+ "MetricName": "tma_info_thread_ipc"
+ },
+ {
+ "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
+ "MetricExpr": "4 * tma_info_core_core_clks",
+ "MetricGroup": "TmaL1;tma_L1_group",
+ "MetricName": "tma_info_thread_slots"
},
{
"BriefDescription": "Uops Per Instruction",
"MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
"MetricGroup": "Pipeline;Ret;Retire",
- "MetricName": "tma_info_uoppi",
- "MetricThreshold": "tma_info_uoppi > 1.05"
+ "MetricName": "tma_info_thread_uoppi",
+ "MetricThreshold": "tma_info_thread_uoppi > 1.05"
},
{
"BriefDescription": "Instruction per taken branch",
"MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / BR_INST_RETIRED.NEAR_TAKEN",
"MetricGroup": "Branches;Fed;FetchBW",
- "MetricName": "tma_info_uptb",
- "MetricThreshold": "tma_info_uptb < 6"
+ "MetricName": "tma_info_thread_uptb",
+ "MetricThreshold": "tma_info_thread_uptb < 6"
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses",
- "MetricExpr": "(14 * ITLB_MISSES.STLB_HIT + ITLB_MISSES.WALK_DURATION) / tma_info_clks",
+ "MetricExpr": "(14 * ITLB_MISSES.STLB_HIT + ITLB_MISSES.WALK_DURATION) / tma_info_thread_clks",
"MetricGroup": "BigFoot;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_itlb_misses",
"MetricThreshold": "tma_itlb_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -620,7 +620,7 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled without loads missing the L1 data cache",
- "MetricExpr": "max((min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.STALLS_LDM_PENDING) - CYCLE_ACTIVITY.STALLS_L1D_PENDING) / tma_info_clks, 0)",
+ "MetricExpr": "max((min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.STALLS_LDM_PENDING) - CYCLE_ACTIVITY.STALLS_L1D_PENDING) / tma_info_thread_clks, 0)",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_issueL1;tma_issueMC;tma_memory_bound_group",
"MetricName": "tma_l1_bound",
"MetricThreshold": "tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -629,7 +629,7 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads",
- "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L1D_PENDING - CYCLE_ACTIVITY.STALLS_L2_PENDING) / tma_info_clks",
+ "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L1D_PENDING - CYCLE_ACTIVITY.STALLS_L2_PENDING) / tma_info_thread_clks",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l2_bound",
"MetricThreshold": "tma_l2_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -639,7 +639,7 @@
{
"BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
"MetricConstraint": "NO_GROUP_EVENTS_SMT",
- "MetricExpr": "MEM_LOAD_UOPS_RETIRED.L3_HIT / (MEM_LOAD_UOPS_RETIRED.L3_HIT + 7 * MEM_LOAD_UOPS_RETIRED.L3_MISS) * CYCLE_ACTIVITY.STALLS_L2_PENDING / tma_info_clks",
+ "MetricExpr": "MEM_LOAD_UOPS_RETIRED.L3_HIT / (MEM_LOAD_UOPS_RETIRED.L3_HIT + 7 * MEM_LOAD_UOPS_RETIRED.L3_MISS) * CYCLE_ACTIVITY.STALLS_L2_PENDING / tma_info_thread_clks",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l3_bound",
"MetricThreshold": "tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -649,7 +649,7 @@
{
"BriefDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "29 * (MEM_LOAD_UOPS_RETIRED.L3_HIT * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_RETIRED.L3_MISS))) / tma_info_clks",
+ "MetricExpr": "29 * (MEM_LOAD_UOPS_RETIRED.L3_HIT * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_RETIRED.L3_MISS))) / tma_info_thread_clks",
"MetricGroup": "MemoryLat;TopdownL4;tma_L4_group;tma_issueLat;tma_l3_bound_group",
"MetricName": "tma_l3_hit_latency",
"MetricThreshold": "tma_l3_hit_latency > 0.1 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -658,11 +658,11 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs)",
- "MetricExpr": "ILD_STALL.LCP / tma_info_clks",
+ "MetricExpr": "ILD_STALL.LCP / tma_info_thread_clks",
"MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB",
"MetricName": "tma_lcp",
"MetricThreshold": "tma_lcp > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
- "PublicDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs). Using proper compiler flags or Intel Compiler by default will certainly avoid this. #Link: Optimization Guide about LCP BKMs. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_dsb_coverage, tma_info_iptb",
+ "PublicDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs). Using proper compiler flags or Intel Compiler by default will certainly avoid this. #Link: Optimization Guide about LCP BKMs. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb",
"ScaleUnit": "100%"
},
{
@@ -678,7 +678,7 @@
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Load operations",
"MetricConstraint": "NO_GROUP_EVENTS_NMI",
- "MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_2 + UOPS_DISPATCHED_PORT.PORT_3 + UOPS_DISPATCHED_PORT.PORT_7 - UOPS_DISPATCHED_PORT.PORT_4) / (2 * tma_info_core_clks)",
+ "MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_2 + UOPS_DISPATCHED_PORT.PORT_3 + UOPS_DISPATCHED_PORT.PORT_7 - UOPS_DISPATCHED_PORT.PORT_4) / (2 * tma_info_core_core_clks)",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_load_op_utilization",
"MetricThreshold": "tma_load_op_utilization > 0.6",
@@ -688,7 +688,7 @@
{
"BriefDescription": "This metric represents fraction of cycles the CPU spent handling cache misses due to lock operations",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "MEM_UOPS_RETIRED.LOCK_LOADS / MEM_UOPS_RETIRED.ALL_STORES * min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO) / tma_info_clks",
+ "MetricExpr": "MEM_UOPS_RETIRED.LOCK_LOADS / MEM_UOPS_RETIRED.ALL_STORES * min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO) / tma_info_thread_clks",
"MetricGroup": "Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_l1_bound_group",
"MetricName": "tma_lock_latency",
"MetricThreshold": "tma_lock_latency > 0.2 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -708,16 +708,16 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM)",
- "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=6@) / tma_info_clks",
+ "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=6@) / tma_info_thread_clks",
"MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
"MetricName": "tma_mem_bandwidth",
"MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_dram_bw_use, tma_sq_full",
+ "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM)",
- "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD) / tma_info_clks - tma_mem_bandwidth",
+ "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD) / tma_info_thread_clks - tma_mem_bandwidth",
"MetricGroup": "MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
"MetricName": "tma_mem_latency",
"MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -727,7 +727,7 @@
{
"BriefDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "((min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.STALLS_LDM_PENDING) + RESOURCE_STALLS.SB) / (min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) + (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - (cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ if tma_info_ipc > 1.8 else cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@)) / 2 - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB) if #SMT_on else min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) + cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - (cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ if tma_info_ipc > 1.8 else cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB) * tma_backend_bound",
+ "MetricExpr": "((min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.STALLS_LDM_PENDING) + RESOURCE_STALLS.SB) / (min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) + (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - (cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ if tma_info_thread_ipc > 1.8 else cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@)) / 2 - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB) if #SMT_on else min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) + cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - (cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ if tma_info_thread_ipc > 1.8 else cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB) * tma_backend_bound",
"MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_memory_bound",
"MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2",
@@ -737,7 +737,7 @@
},
{
"BriefDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit",
- "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * IDQ.MS_UOPS / tma_info_slots",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * IDQ.MS_UOPS / tma_info_thread_slots",
"MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_heavy_operations_group;tma_issueMC;tma_issueMS",
"MetricName": "tma_microcode_sequencer",
"MetricThreshold": "tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1",
@@ -746,16 +746,16 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline)",
- "MetricExpr": "(IDQ.ALL_MITE_CYCLES_ANY_UOPS - IDQ.ALL_MITE_CYCLES_4_UOPS) / tma_info_core_clks / 2",
+ "MetricExpr": "(IDQ.ALL_MITE_CYCLES_ANY_UOPS - IDQ.ALL_MITE_CYCLES_4_UOPS) / tma_info_core_core_clks / 2",
"MetricGroup": "DSBmiss;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_mite",
- "MetricThreshold": "tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35)",
+ "MetricThreshold": "tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35)",
"PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline). This pipeline is used for code that was not pre-cached in the DSB or LSD. For example; inefficiencies due to asymmetric decoders; use of long immediate or LCP can manifest as MITE fetch bandwidth bottleneck.",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric estimates the fraction of cycles when the CPU was stalled due to switches of uop delivery to the Microcode Sequencer (MS)",
- "MetricExpr": "2 * IDQ.MS_SWITCHES / tma_info_clks",
+ "MetricExpr": "2 * IDQ.MS_SWITCHES / tma_info_thread_clks",
"MetricGroup": "FetchLat;MicroSeq;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueMC;tma_issueMS;tma_issueMV;tma_issueSO",
"MetricName": "tma_ms_switches",
"MetricThreshold": "tma_ms_switches > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -764,7 +764,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 0 ([SNB+] ALU; [HSW+] ALU and 2nd branch)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_0 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_0 / tma_info_core_core_clks",
"MetricGroup": "Compute;TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_0",
"MetricThreshold": "tma_port_0 > 0.6",
@@ -773,7 +773,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 1 (ALU)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_1 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_1 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_1",
"MetricThreshold": "tma_port_1 > 0.6",
@@ -782,7 +782,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 2 ([SNB+]Loads and Store-address; [ICL+] Loads)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_2 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_2 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_load_op_utilization_group",
"MetricName": "tma_port_2",
"MetricThreshold": "tma_port_2 > 0.6",
@@ -791,7 +791,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 3 ([SNB+]Loads and Store-address; [ICL+] Loads)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_3 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_3 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_load_op_utilization_group",
"MetricName": "tma_port_3",
"MetricThreshold": "tma_port_3 > 0.6",
@@ -809,7 +809,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 5 ([SNB+] Branches and ALU; [HSW+] ALU)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_5 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_5 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_5",
"MetricThreshold": "tma_port_5 > 0.6",
@@ -818,7 +818,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+]Primary Branch and simple ALU)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_6 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_6 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_6",
"MetricThreshold": "tma_port_6 > 0.6",
@@ -827,7 +827,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 7 ([HSW+]simple Store-address)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_7 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_7 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_store_op_utilization_group",
"MetricName": "tma_port_7",
"MetricThreshold": "tma_port_7 > 0.6",
@@ -837,7 +837,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles the CPU performance was potentially limited due to Core computation issues (non divider-related)",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) + (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - (cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ if tma_info_ipc > 1.8 else cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@)) / 2 - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB if #SMT_on else min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) + cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - (cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ if tma_info_ipc > 1.8 else cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB - RESOURCE_STALLS.SB - min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.STALLS_LDM_PENDING)) / tma_info_clks",
+ "MetricExpr": "(min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) + (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - (cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ if tma_info_thread_ipc > 1.8 else cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@)) / 2 - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB if #SMT_on else min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) + cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - (cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ if tma_info_thread_ipc > 1.8 else cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB - RESOURCE_STALLS.SB - min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.STALLS_LDM_PENDING)) / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group",
"MetricName": "tma_ports_utilization",
"MetricThreshold": "tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
@@ -846,7 +846,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed no uops on any execution port (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,inv\\,cmask\\=1@ / 2 if #SMT_on else (min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0)) / tma_info_core_clks)",
+ "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,inv\\,cmask\\=1@ / 2 if #SMT_on else (min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0)) / tma_info_core_core_clks)",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_0",
"MetricThreshold": "tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -855,7 +855,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where the CPU executed total of 1 uop per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) / 2 if #SMT_on else (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) / tma_info_core_clks)",
+ "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) / 2 if #SMT_on else (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) / tma_info_core_core_clks)",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issueL1;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_1",
"MetricThreshold": "tma_ports_utilized_1 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -864,7 +864,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / 2 if #SMT_on else (cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / tma_info_core_clks)",
+ "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / 2 if #SMT_on else (cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / tma_info_core_core_clks)",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_2",
"MetricThreshold": "tma_ports_utilized_2 > 0.15 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -873,7 +873,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 3 or more uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise).",
- "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ / 2 if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / tma_info_core_clks",
+ "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ / 2 if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_3m",
"MetricThreshold": "tma_ports_utilized_3m > 0.7 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -881,7 +881,7 @@
},
{
"BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
- "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / tma_info_slots",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / tma_info_thread_slots",
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1",
@@ -892,7 +892,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles handling memory load split accesses - load that cross 64-byte cache line boundary",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "tma_info_load_miss_real_latency * LD_BLOCKS.NO_SR / tma_info_clks",
+ "MetricExpr": "tma_info_memory_load_miss_real_latency * LD_BLOCKS.NO_SR / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_split_loads",
"MetricThreshold": "tma_split_loads > 0.2 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -901,7 +901,7 @@
},
{
"BriefDescription": "This metric represents rate of split store accesses",
- "MetricExpr": "2 * MEM_UOPS_RETIRED.SPLIT_STORES / tma_info_core_clks",
+ "MetricExpr": "2 * MEM_UOPS_RETIRED.SPLIT_STORES / tma_info_core_core_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group",
"MetricName": "tma_split_stores",
"MetricThreshold": "tma_split_stores > 0.2 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -910,16 +910,16 @@
},
{
"BriefDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors)",
- "MetricExpr": "(OFFCORE_REQUESTS_BUFFER.SQ_FULL / 2 if #SMT_on else OFFCORE_REQUESTS_BUFFER.SQ_FULL) / tma_info_core_clks",
+ "MetricExpr": "(OFFCORE_REQUESTS_BUFFER.SQ_FULL / 2 if #SMT_on else OFFCORE_REQUESTS_BUFFER.SQ_FULL) / tma_info_core_core_clks",
"MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueBW;tma_l3_bound_group",
"MetricName": "tma_sq_full",
"MetricThreshold": "tma_sq_full > 0.3 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_fb_full, tma_info_dram_bw_use, tma_mem_bandwidth",
+ "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric estimates how often CPU was stalled due to RFO store memory accesses; RFO store issue a read-for-ownership request before the write",
- "MetricExpr": "RESOURCE_STALLS.SB / tma_info_clks",
+ "MetricExpr": "RESOURCE_STALLS.SB / tma_info_thread_clks",
"MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_store_bound",
"MetricThreshold": "tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -928,7 +928,7 @@
},
{
"BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores",
- "MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_clks",
+ "MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_store_fwd_blk",
"MetricThreshold": "tma_store_fwd_blk > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -938,7 +938,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles the CPU spent handling L1D store misses",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(L2_RQSTS.RFO_HIT * 9 * (1 - MEM_UOPS_RETIRED.LOCK_LOADS / MEM_UOPS_RETIRED.ALL_STORES) + (1 - MEM_UOPS_RETIRED.LOCK_LOADS / MEM_UOPS_RETIRED.ALL_STORES) * min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO)) / tma_info_clks",
+ "MetricExpr": "(L2_RQSTS.RFO_HIT * 9 * (1 - MEM_UOPS_RETIRED.LOCK_LOADS / MEM_UOPS_RETIRED.ALL_STORES) + (1 - MEM_UOPS_RETIRED.LOCK_LOADS / MEM_UOPS_RETIRED.ALL_STORES) * min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO)) / tma_info_thread_clks",
"MetricGroup": "MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_issueSL;tma_store_bound_group",
"MetricName": "tma_store_latency",
"MetricThreshold": "tma_store_latency > 0.1 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -947,7 +947,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Store operations",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_4 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_4 / tma_info_core_core_clks",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_store_op_utilization",
"MetricThreshold": "tma_store_op_utilization > 0.6",
@@ -955,7 +955,7 @@
},
{
"BriefDescription": "This metric serves as an approximation of legacy x87 usage",
- "MetricExpr": "INST_RETIRED.X87 * tma_info_uoppi / UOPS_RETIRED.RETIRE_SLOTS",
+ "MetricExpr": "INST_RETIRED.X87 * tma_info_thread_uoppi / UOPS_RETIRED.RETIRE_SLOTS",
"MetricGroup": "Compute;TopdownL4;tma_L4_group;tma_fp_arith_group",
"MetricName": "tma_x87_use",
"MetricThreshold": "tma_x87_use > 0.1",
diff --git a/tools/perf/pmu-events/arch/x86/haswell/metricgroups.json b/tools/perf/pmu-events/arch/x86/haswell/metricgroups.json
new file mode 100644
index 000000000000..f6a0258e3241
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/haswell/metricgroups.json
@@ -0,0 +1,107 @@
+{
+ "Backend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Bad": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BadSpec": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BigFoot": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BrMispredicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Branches": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "CacheMisses": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Compute": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Cor": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "DSB": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "DSBmiss": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "DataSharing": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Fed": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FetchBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FetchLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Flops": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FpScalar": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FpVector": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Frontend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "HPC": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "IcMiss": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "InsType": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "L2Evicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "LSD": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MachineClears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryBound": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryTLB": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Memory_BW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Memory_Lat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MicroSeq": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "OS": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Offcore": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "PGO": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Pipeline": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "PortsUtil": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Power": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Ret": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Retire": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "SMT": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Server": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Snoop": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "SoC": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Summary": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TmaL1": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TmaL2": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TmaL3mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TopdownL1": "Metrics for top-down breakdown at level 1",
+ "TopdownL2": "Metrics for top-down breakdown at level 2",
+ "TopdownL3": "Metrics for top-down breakdown at level 3",
+ "TopdownL4": "Metrics for top-down breakdown at level 4",
+ "TopdownL5": "Metrics for top-down breakdown at level 5",
+ "TopdownL6": "Metrics for top-down breakdown at level 6",
+ "tma_L1_group": "Metrics for top-down breakdown at level 1",
+ "tma_L2_group": "Metrics for top-down breakdown at level 2",
+ "tma_L3_group": "Metrics for top-down breakdown at level 3",
+ "tma_L4_group": "Metrics for top-down breakdown at level 4",
+ "tma_L5_group": "Metrics for top-down breakdown at level 5",
+ "tma_L6_group": "Metrics for top-down breakdown at level 6",
+ "tma_alu_op_utilization_group": "Metrics contributing to tma_alu_op_utilization category",
+ "tma_backend_bound_group": "Metrics contributing to tma_backend_bound category",
+ "tma_bad_speculation_group": "Metrics contributing to tma_bad_speculation category",
+ "tma_branch_resteers_group": "Metrics contributing to tma_branch_resteers category",
+ "tma_core_bound_group": "Metrics contributing to tma_core_bound category",
+ "tma_dram_bound_group": "Metrics contributing to tma_dram_bound category",
+ "tma_dtlb_load_group": "Metrics contributing to tma_dtlb_load category",
+ "tma_dtlb_store_group": "Metrics contributing to tma_dtlb_store category",
+ "tma_fetch_bandwidth_group": "Metrics contributing to tma_fetch_bandwidth category",
+ "tma_fetch_latency_group": "Metrics contributing to tma_fetch_latency category",
+ "tma_fp_arith_group": "Metrics contributing to tma_fp_arith category",
+ "tma_fp_vector_group": "Metrics contributing to tma_fp_vector category",
+ "tma_frontend_bound_group": "Metrics contributing to tma_frontend_bound category",
+ "tma_heavy_operations_group": "Metrics contributing to tma_heavy_operations category",
+ "tma_issue2P": "Metrics related by the issue $issue2P",
+ "tma_issueBM": "Metrics related by the issue $issueBM",
+ "tma_issueBW": "Metrics related by the issue $issueBW",
+ "tma_issueFB": "Metrics related by the issue $issueFB",
+ "tma_issueL1": "Metrics related by the issue $issueL1",
+ "tma_issueLat": "Metrics related by the issue $issueLat",
+ "tma_issueMC": "Metrics related by the issue $issueMC",
+ "tma_issueMS": "Metrics related by the issue $issueMS",
+ "tma_issueMV": "Metrics related by the issue $issueMV",
+ "tma_issueRFO": "Metrics related by the issue $issueRFO",
+ "tma_issueSL": "Metrics related by the issue $issueSL",
+ "tma_issueSO": "Metrics related by the issue $issueSO",
+ "tma_issueSmSt": "Metrics related by the issue $issueSmSt",
+ "tma_issueSpSt": "Metrics related by the issue $issueSpSt",
+ "tma_issueSyncxn": "Metrics related by the issue $issueSyncxn",
+ "tma_issueTLB": "Metrics related by the issue $issueTLB",
+ "tma_l1_bound_group": "Metrics contributing to tma_l1_bound category",
+ "tma_l3_bound_group": "Metrics contributing to tma_l3_bound category",
+ "tma_light_operations_group": "Metrics contributing to tma_light_operations category",
+ "tma_load_op_utilization_group": "Metrics contributing to tma_load_op_utilization category",
+ "tma_mem_latency_group": "Metrics contributing to tma_mem_latency category",
+ "tma_memory_bound_group": "Metrics contributing to tma_memory_bound category",
+ "tma_microcode_sequencer_group": "Metrics contributing to tma_microcode_sequencer category",
+ "tma_ports_utilization_group": "Metrics contributing to tma_ports_utilization category",
+ "tma_ports_utilized_0_group": "Metrics contributing to tma_ports_utilized_0 category",
+ "tma_ports_utilized_3m_group": "Metrics contributing to tma_ports_utilized_3m category",
+ "tma_retiring_group": "Metrics contributing to tma_retiring category",
+ "tma_serializing_operation_group": "Metrics contributing to tma_serializing_operation category",
+ "tma_store_bound_group": "Metrics contributing to tma_store_bound category",
+ "tma_store_op_utilization_group": "Metrics contributing to tma_store_op_utilization category"
+}
diff --git a/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json b/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json
index a522202cf684..5f451948c893 100644
--- a/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json
@@ -50,11 +50,207 @@
},
{
"BriefDescription": "Uncore frequency per die [GHZ]",
- "MetricExpr": "tma_info_socket_clks / #num_dies / duration_time / 1e9",
+ "MetricExpr": "tma_info_system_socket_clks / #num_dies / duration_time / 1e9",
"MetricGroup": "SoC",
"MetricName": "UNCORE_FREQ"
},
{
+ "BriefDescription": "Cycles per instruction retired; indicating how much time each executed instruction took; in units of cycles.",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD / INST_RETIRED.ANY",
+ "MetricName": "cpi",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "CPU operating frequency (in GHz)",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC * #SYSTEM_TSC_FREQ / 1e9",
+ "MetricName": "cpu_operating_frequency",
+ "ScaleUnit": "1GHz"
+ },
+ {
+ "BriefDescription": "Percentage of time spent in the active CPU power state C0",
+ "MetricExpr": "tma_info_system_cpu_utilization",
+ "MetricName": "cpu_utilization",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Ratio of number of completed page walks (for all page sizes) caused by demand data loads to the total number of completed instructions",
+ "MetricExpr": "DTLB_LOAD_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
+ "MetricName": "dtlb_load_mpi",
+ "PublicDescription": "Ratio of number of completed page walks (for all page sizes) caused by demand data loads to the total number of completed instructions. This implies it missed in the DTLB and further levels of TLB.",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of completed page walks (for all page sizes) caused by demand data stores to the total number of completed instructions",
+ "MetricExpr": "DTLB_STORE_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
+ "MetricName": "dtlb_store_mpi",
+ "PublicDescription": "Ratio of number of completed page walks (for all page sizes) caused by demand data stores to the total number of completed instructions. This implies it missed in the DTLB and further levels of TLB.",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Bandwidth of IO reads that are initiated by end device controllers that are requesting memory from the CPU.",
+ "MetricExpr": "cbox@UNC_C_TOR_INSERTS.OPCODE\\,filter_opc\\=0x19e@ * 64 / 1e6 / duration_time",
+ "MetricName": "io_bandwidth_read",
+ "ScaleUnit": "1MB/s"
+ },
+ {
+ "BriefDescription": "Bandwidth of IO writes that are initiated by end device controllers that are writing memory to the CPU.",
+ "MetricExpr": "cbox@UNC_C_TOR_INSERTS.OPCODE\\,filter_opc\\=0x1c8\\,filter_tid\\=0x3e@ * 64 / 1e6 / duration_time",
+ "MetricName": "io_bandwidth_write",
+ "ScaleUnit": "1MB/s"
+ },
+ {
+ "BriefDescription": "Ratio of number of completed page walks (for 2 megabyte and 4 megabyte page sizes) caused by a code fetch to the total number of completed instructions",
+ "MetricExpr": "ITLB_MISSES.WALK_COMPLETED_2M_4M / INST_RETIRED.ANY",
+ "MetricName": "itlb_large_page_mpi",
+ "PublicDescription": "Ratio of number of completed page walks (for 2 megabyte and 4 megabyte page sizes) caused by a code fetch to the total number of completed instructions. This implies it missed in the Instruction Translation Lookaside Buffer (ITLB) and further levels of TLB.",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of completed page walks (for all page sizes) caused by a code fetch to the total number of completed instructions",
+ "MetricExpr": "ITLB_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
+ "MetricName": "itlb_mpi",
+ "PublicDescription": "Ratio of number of completed page walks (for all page sizes) caused by a code fetch to the total number of completed instructions. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB.",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of code read requests missing in L1 instruction cache (includes prefetches) to the total number of completed instructions",
+ "MetricExpr": "L2_RQSTS.ALL_CODE_RD / INST_RETIRED.ANY",
+ "MetricName": "l1_i_code_read_misses_with_prefetches_per_instr",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of demand load requests hitting in L1 data cache to the total number of completed instructions",
+ "MetricExpr": "MEM_LOAD_UOPS_RETIRED.L1_HIT / INST_RETIRED.ANY",
+ "MetricName": "l1d_demand_data_read_hits_per_instr",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of requests missing L1 data cache (includes data+rfo w/ prefetches) to the total number of completed instructions",
+ "MetricExpr": "L1D.REPLACEMENT / INST_RETIRED.ANY",
+ "MetricName": "l1d_mpi",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of code read request missing L2 cache to the total number of completed instructions",
+ "MetricExpr": "L2_RQSTS.CODE_RD_MISS / INST_RETIRED.ANY",
+ "MetricName": "l2_demand_code_mpi",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of completed demand load requests hitting in L2 cache to the total number of completed instructions",
+ "MetricExpr": "MEM_LOAD_UOPS_RETIRED.L2_HIT / INST_RETIRED.ANY",
+ "MetricName": "l2_demand_data_read_hits_per_instr",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of completed data read request missing L2 cache to the total number of completed instructions",
+ "MetricExpr": "MEM_LOAD_UOPS_RETIRED.L2_MISS / INST_RETIRED.ANY",
+ "MetricName": "l2_demand_data_read_mpi",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of requests missing L2 cache (includes code+data+rfo w/ prefetches) to the total number of completed instructions",
+ "MetricExpr": "L2_LINES_IN.ALL / INST_RETIRED.ANY",
+ "MetricName": "l2_mpi",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of code read requests missing last level core cache (includes demand w/ prefetches) to the total number of completed instructions",
+ "MetricExpr": "(cbox@UNC_C_TOR_INSERTS.MISS_OPCODE\\,filter_opc\\=0x181@ + cbox@UNC_C_TOR_INSERTS.MISS_OPCODE\\,filter_opc\\=0x191@) / INST_RETIRED.ANY",
+ "MetricName": "llc_code_read_mpi_demand_plus_prefetch",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Average latency of a last level cache (LLC) demand and prefetch data read miss (read memory access) in nano seconds",
+ "MetricExpr": "1e9 * (cbox@UNC_C_TOR_OCCUPANCY.MISS_OPCODE\\,filter_opc\\=0x182@ / cbox@UNC_C_TOR_INSERTS.MISS_OPCODE\\,filter_opc\\=0x182@) / (UNC_C_CLOCKTICKS / (#num_cores / #num_packages * #num_packages)) * duration_time",
+ "MetricName": "llc_data_read_demand_plus_prefetch_miss_latency",
+ "ScaleUnit": "1ns"
+ },
+ {
+ "BriefDescription": "Average latency of a last level cache (LLC) demand and prefetch data read miss (read memory access) addressed to local memory in nano seconds",
+ "MetricExpr": "1e9 * (cbox@UNC_C_TOR_OCCUPANCY.MISS_LOCAL_OPCODE\\,filter_opc\\=0x182@ / cbox@UNC_C_TOR_INSERTS.MISS_LOCAL_OPCODE\\,filter_opc\\=0x182@) / (UNC_C_CLOCKTICKS / (#num_cores / #num_packages * #num_packages)) * duration_time",
+ "MetricName": "llc_data_read_demand_plus_prefetch_miss_latency_for_local_requests",
+ "ScaleUnit": "1ns"
+ },
+ {
+ "BriefDescription": "Average latency of a last level cache (LLC) demand and prefetch data read miss (read memory access) addressed to remote memory in nano seconds",
+ "MetricExpr": "1e9 * (cbox@UNC_C_TOR_OCCUPANCY.MISS_REMOTE_OPCODE\\,filter_opc\\=0x182@ / cbox@UNC_C_TOR_INSERTS.MISS_REMOTE_OPCODE\\,filter_opc\\=0x182@) / (UNC_C_CLOCKTICKS / (#num_cores / #num_packages * #num_packages)) * duration_time",
+ "MetricName": "llc_data_read_demand_plus_prefetch_miss_latency_for_remote_requests",
+ "ScaleUnit": "1ns"
+ },
+ {
+ "BriefDescription": "Ratio of number of data read requests missing last level core cache (includes demand w/ prefetches) to the total number of completed instructions",
+ "MetricExpr": "(cbox@UNC_C_TOR_INSERTS.MISS_OPCODE\\,filter_opc\\=0x182@ + cbox@UNC_C_TOR_INSERTS.MISS_OPCODE\\,filter_opc\\=0x192@) / INST_RETIRED.ANY",
+ "MetricName": "llc_data_read_mpi_demand_plus_prefetch",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "The ratio of number of completed memory load instructions to the total number completed instructions",
+ "MetricExpr": "MEM_UOPS_RETIRED.ALL_LOADS / INST_RETIRED.ANY",
+ "MetricName": "loads_per_instr",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "DDR memory read bandwidth (MB/sec)",
+ "MetricExpr": "UNC_M_CAS_COUNT.RD * 64 / 1e6 / duration_time",
+ "MetricName": "memory_bandwidth_read",
+ "ScaleUnit": "1MB/s"
+ },
+ {
+ "BriefDescription": "DDR memory bandwidth (MB/sec)",
+ "MetricExpr": "(UNC_M_CAS_COUNT.RD + UNC_M_CAS_COUNT.WR) * 64 / 1e6 / duration_time",
+ "MetricName": "memory_bandwidth_total",
+ "ScaleUnit": "1MB/s"
+ },
+ {
+ "BriefDescription": "DDR memory write bandwidth (MB/sec)",
+ "MetricExpr": "UNC_M_CAS_COUNT.WR * 64 / 1e6 / duration_time",
+ "MetricName": "memory_bandwidth_write",
+ "ScaleUnit": "1MB/s"
+ },
+ {
+ "BriefDescription": "Memory read that miss the last level cache (LLC) addressed to local DRAM as a percentage of total memory read accesses, does not include LLC prefetches.",
+ "MetricExpr": "cbox@UNC_C_TOR_INSERTS.MISS_LOCAL_OPCODE\\,filter_opc\\=0x182@ / (cbox@UNC_C_TOR_INSERTS.MISS_LOCAL_OPCODE\\,filter_opc\\=0x182@ + cbox@UNC_C_TOR_INSERTS.MISS_REMOTE_OPCODE\\,filter_opc\\=0x182@)",
+ "MetricName": "numa_reads_addressed_to_local_dram",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Memory reads that miss the last level cache (LLC) addressed to remote DRAM as a percentage of total memory read accesses, does not include LLC prefetches.",
+ "MetricExpr": "cbox@UNC_C_TOR_INSERTS.MISS_REMOTE_OPCODE\\,filter_opc\\=0x182@ / (cbox@UNC_C_TOR_INSERTS.MISS_LOCAL_OPCODE\\,filter_opc\\=0x182@ + cbox@UNC_C_TOR_INSERTS.MISS_REMOTE_OPCODE\\,filter_opc\\=0x182@)",
+ "MetricName": "numa_reads_addressed_to_remote_dram",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Uops delivered from decoded instruction cache (decoded stream buffer or DSB) as a percent of total uops delivered to Instruction Decode Queue",
+ "MetricExpr": "IDQ.DSB_UOPS / UOPS_ISSUED.ANY",
+ "MetricName": "percent_uops_delivered_from_decoded_icache",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Uops delivered from legacy decode pipeline (Micro-instruction Translation Engine or MITE) as a percent of total uops delivered to Instruction Decode Queue",
+ "MetricExpr": "IDQ.MITE_UOPS / UOPS_ISSUED.ANY",
+ "MetricName": "percent_uops_delivered_from_legacy_decode_pipeline",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Uops delivered from loop stream detector(LSD) as a percent of total uops delivered to Instruction Decode Queue",
+ "MetricExpr": "(UOPS_ISSUED.ANY - IDQ.MITE_UOPS - IDQ.MS_UOPS - IDQ.DSB_UOPS) / UOPS_ISSUED.ANY",
+ "MetricName": "percent_uops_delivered_from_loop_stream_detector",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Uops delivered from microcode sequencer (MS) as a percent of total uops delivered to Instruction Decode Queue",
+ "MetricExpr": "IDQ.MS_UOPS / UOPS_ISSUED.ANY",
+ "MetricName": "percent_uops_delivered_from_microcode_sequencer",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Intel(R) Quick Path Interconnect (QPI) data transmit bandwidth (MB/sec)",
+ "MetricExpr": "UNC_Q_TxL_FLITS_G0.DATA * 8 / 1e6 / duration_time",
+ "MetricName": "qpi_data_transmit_bw",
+ "ScaleUnit": "1MB/s"
+ },
+ {
"BriefDescription": "Percentage of cycles spent in System Management Interrupts.",
"MetricExpr": "((msr@aperf@ - cycles) / msr@aperf@ if msr@smi@ > 0 else 0)",
"MetricGroup": "smi",
@@ -70,8 +266,14 @@
"ScaleUnit": "1SMI#"
},
{
+ "BriefDescription": "The ratio of number of completed memory store instructions to the total number completed instructions",
+ "MetricExpr": "MEM_UOPS_RETIRED.ALL_STORES / INST_RETIRED.ANY",
+ "MetricName": "stores_per_instr",
+ "ScaleUnit": "1per_instr"
+ },
+ {
"BriefDescription": "This metric estimates how often memory load accesses were aliased by preceding stores (in program order) with a 4K address offset",
- "MetricExpr": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS / tma_info_clks",
+ "MetricExpr": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_4k_aliasing",
"MetricThreshold": "tma_4k_aliasing > 0.2 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -81,7 +283,7 @@
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution ports for ALU operations.",
"MetricConstraint": "NO_GROUP_EVENTS_NMI",
- "MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_0 + UOPS_DISPATCHED_PORT.PORT_1 + UOPS_DISPATCHED_PORT.PORT_5 + UOPS_DISPATCHED_PORT.PORT_6) / tma_info_slots",
+ "MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_0 + UOPS_DISPATCHED_PORT.PORT_1 + UOPS_DISPATCHED_PORT.PORT_5 + UOPS_DISPATCHED_PORT.PORT_6) / tma_info_thread_slots",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_alu_op_utilization",
"MetricThreshold": "tma_alu_op_utilization > 0.6",
@@ -89,7 +291,7 @@
},
{
"BriefDescription": "This metric estimates fraction of slots the CPU retired uops delivered by the Microcode_Sequencer as a result of Assists",
- "MetricExpr": "100 * OTHER_ASSISTS.ANY_WB_ASSIST / tma_info_slots",
+ "MetricExpr": "100 * OTHER_ASSISTS.ANY_WB_ASSIST / tma_info_thread_slots",
"MetricGroup": "TopdownL4;tma_L4_group;tma_microcode_sequencer_group",
"MetricName": "tma_assists",
"MetricThreshold": "tma_assists > 0.1 & (tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1)",
@@ -109,7 +311,7 @@
},
{
"BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations",
- "MetricExpr": "(UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (INT_MISC.RECOVERY_CYCLES_ANY / 2 if #SMT_on else INT_MISC.RECOVERY_CYCLES)) / tma_info_slots",
+ "MetricExpr": "(UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (INT_MISC.RECOVERY_CYCLES_ANY / 2 if #SMT_on else INT_MISC.RECOVERY_CYCLES)) / tma_info_thread_slots",
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_bad_speculation",
"MetricThreshold": "tma_bad_speculation > 0.15",
@@ -125,12 +327,12 @@
"MetricName": "tma_branch_mispredicts",
"MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15",
"MetricgroupNoGroup": "TopdownL2",
- "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES. Related metrics: tma_info_branch_misprediction_cost, tma_mispredicts_resteers",
+ "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES. Related metrics: tma_info_bad_spec_branch_misprediction_cost, tma_mispredicts_resteers",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers",
- "MetricExpr": "12 * (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT + BACLEARS.ANY) / tma_info_clks",
+ "MetricExpr": "12 * (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT + BACLEARS.ANY) / tma_info_thread_clks",
"MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_branch_resteers",
"MetricThreshold": "tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -150,7 +352,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to contested accesses",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(60 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD))) + 43 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD)))) / tma_info_clks",
+ "MetricExpr": "(60 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD))) + 43 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD)))) / tma_info_thread_clks",
"MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
"MetricName": "tma_contested_accesses",
"MetricThreshold": "tma_contested_accesses > 0.05 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -171,7 +373,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to data-sharing accesses",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "43 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD))) / tma_info_clks",
+ "MetricExpr": "43 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD))) / tma_info_thread_clks",
"MetricGroup": "Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
"MetricName": "tma_data_sharing",
"MetricThreshold": "tma_data_sharing > 0.05 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -180,7 +382,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where the Divider unit was active",
- "MetricExpr": "10 * ARITH.DIVIDER_UOPS / tma_info_core_clks",
+ "MetricExpr": "10 * ARITH.DIVIDER_UOPS / tma_info_core_core_clks",
"MetricGroup": "TopdownL3;tma_L3_group;tma_core_bound_group",
"MetricName": "tma_divider",
"MetricThreshold": "tma_divider > 0.2 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
@@ -190,7 +392,7 @@
{
"BriefDescription": "This metric estimates how often the CPU was stalled on accesses to external memory (DRAM) by loads",
"MetricConstraint": "NO_GROUP_EVENTS_SMT",
- "MetricExpr": "(1 - MEM_LOAD_UOPS_RETIRED.L3_HIT / (MEM_LOAD_UOPS_RETIRED.L3_HIT + 7 * MEM_LOAD_UOPS_RETIRED.L3_MISS)) * CYCLE_ACTIVITY.STALLS_L2_PENDING / tma_info_clks",
+ "MetricExpr": "(1 - MEM_LOAD_UOPS_RETIRED.L3_HIT / (MEM_LOAD_UOPS_RETIRED.L3_HIT + 7 * MEM_LOAD_UOPS_RETIRED.L3_MISS)) * CYCLE_ACTIVITY.STALLS_L2_PENDING / tma_info_thread_clks",
"MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_dram_bound",
"MetricThreshold": "tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -199,25 +401,25 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline",
- "MetricExpr": "(IDQ.ALL_DSB_CYCLES_ANY_UOPS - IDQ.ALL_DSB_CYCLES_4_UOPS) / tma_info_core_clks / 2",
+ "MetricExpr": "(IDQ.ALL_DSB_CYCLES_ANY_UOPS - IDQ.ALL_DSB_CYCLES_4_UOPS) / tma_info_core_core_clks / 2",
"MetricGroup": "DSB;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_dsb",
- "MetricThreshold": "tma_dsb > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35)",
+ "MetricThreshold": "tma_dsb > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35)",
"PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline. For example; inefficient utilization of the DSB cache structure or bank conflict when reading from it; are categorized here.",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines",
- "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / tma_info_clks",
+ "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / tma_info_thread_clks",
"MetricGroup": "DSBmiss;FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB",
"MetricName": "tma_dsb_switches",
"MetricThreshold": "tma_dsb_switches > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
- "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines. The DSB (decoded i-cache) is a Uop Cache where the front-end directly delivers Uops (micro operations) avoiding heavy x86 decoding. The DSB pipeline has shorter latency and delivered higher bandwidth than the MITE (legacy instruction decode pipeline). Switching between the two pipelines can cause penalties hence this metric measures the exposed penalty. Related metrics: tma_fetch_bandwidth, tma_info_dsb_coverage, tma_info_iptb, tma_lcp",
+ "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines. The DSB (decoded i-cache) is a Uop Cache where the front-end directly delivers Uops (micro operations) avoiding heavy x86 decoding. The DSB pipeline has shorter latency and delivered higher bandwidth than the MITE (legacy instruction decode pipeline). Switching between the two pipelines can cause penalties hence this metric measures the exposed penalty. Related metrics: tma_fetch_bandwidth, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses",
- "MetricExpr": "(8 * DTLB_LOAD_MISSES.STLB_HIT + DTLB_LOAD_MISSES.WALK_DURATION) / tma_info_clks",
+ "MetricExpr": "(8 * DTLB_LOAD_MISSES.STLB_HIT + DTLB_LOAD_MISSES.WALK_DURATION) / tma_info_thread_clks",
"MetricGroup": "MemoryTLB;TopdownL4;tma_L4_group;tma_issueTLB;tma_l1_bound_group",
"MetricName": "tma_dtlb_load",
"MetricThreshold": "tma_dtlb_load > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -226,7 +428,7 @@
},
{
"BriefDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses",
- "MetricExpr": "(8 * DTLB_STORE_MISSES.STLB_HIT + DTLB_STORE_MISSES.WALK_DURATION) / tma_info_clks",
+ "MetricExpr": "(8 * DTLB_STORE_MISSES.STLB_HIT + DTLB_STORE_MISSES.WALK_DURATION) / tma_info_thread_clks",
"MetricGroup": "MemoryTLB;TopdownL4;tma_L4_group;tma_issueTLB;tma_store_bound_group",
"MetricName": "tma_dtlb_store",
"MetricThreshold": "tma_dtlb_store > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -235,7 +437,7 @@
},
{
"BriefDescription": "This metric roughly estimates how often CPU was handling synchronizations due to False Sharing",
- "MetricExpr": "(200 * OFFCORE_RESPONSE.DEMAND_RFO.LLC_MISS.REMOTE_HITM + 60 * OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.HITM_OTHER_CORE) / tma_info_clks",
+ "MetricExpr": "(200 * OFFCORE_RESPONSE.DEMAND_RFO.LLC_MISS.REMOTE_HITM + 60 * OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.HITM_OTHER_CORE) / tma_info_thread_clks",
"MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_store_bound_group",
"MetricName": "tma_false_sharing",
"MetricThreshold": "tma_false_sharing > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -245,11 +447,11 @@
{
"BriefDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "tma_info_load_miss_real_latency * cpu@L1D_PEND_MISS.REQUEST_FB_FULL\\,cmask\\=1@ / tma_info_clks",
+ "MetricExpr": "tma_info_memory_load_miss_real_latency * cpu@L1D_PEND_MISS.REQUEST_FB_FULL\\,cmask\\=1@ / tma_info_thread_clks",
"MetricGroup": "MemoryBW;TopdownL4;tma_L4_group;tma_issueBW;tma_issueSL;tma_issueSmSt;tma_l1_bound_group",
"MetricName": "tma_fb_full",
"MetricThreshold": "tma_fb_full > 0.3",
- "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_info_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
+ "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
"ScaleUnit": "100%"
},
{
@@ -257,14 +459,14 @@
"MetricExpr": "tma_frontend_bound - tma_fetch_latency",
"MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB",
"MetricName": "tma_fetch_bandwidth",
- "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35",
+ "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35",
"MetricgroupNoGroup": "TopdownL2",
- "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Related metrics: tma_dsb_switches, tma_info_dsb_coverage, tma_info_iptb, tma_lcp",
+ "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Related metrics: tma_dsb_switches, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues",
- "MetricExpr": "4 * min(CPU_CLK_UNHALTED.THREAD, IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE) / tma_info_slots",
+ "MetricExpr": "4 * min(CPU_CLK_UNHALTED.THREAD, IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE) / tma_info_thread_slots",
"MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group",
"MetricName": "tma_fetch_latency",
"MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15",
@@ -274,7 +476,7 @@
},
{
"BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend",
- "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / tma_info_slots",
+ "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / tma_info_thread_slots",
"MetricGroup": "PGO;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_frontend_bound",
"MetricThreshold": "tma_frontend_bound > 0.15",
@@ -294,325 +496,325 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to instruction cache misses.",
- "MetricExpr": "ICACHE.IFDATA_STALL / tma_info_clks",
+ "MetricExpr": "ICACHE.IFDATA_STALL / tma_info_thread_clks",
"MetricGroup": "BigFoot;FetchLat;IcMiss;TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_icache_misses",
"MetricThreshold": "tma_icache_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
"ScaleUnit": "100%"
},
{
- "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
- "MetricExpr": "tma_info_turbo_utilization * TSC / 1e9 / duration_time",
- "MetricGroup": "Power;Summary",
- "MetricName": "tma_info_average_frequency"
- },
- {
- "BriefDescription": "Branch instructions per taken branch.",
- "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
- "MetricGroup": "Branches;Fed;PGO",
- "MetricName": "tma_info_bptkbranch"
+ "BriefDescription": "Instructions per retired mispredicts for indirect CALL or JMP branches (lower number means higher occurrence rate).",
+ "MetricExpr": "tma_info_inst_mix_instructions / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * cpu@BR_MISP_EXEC.ALL_BRANCHES\\,umask\\=0xE4@)",
+ "MetricGroup": "Bad;BrMispredicts",
+ "MetricName": "tma_info_bad_spec_ipmisp_indirect",
+ "MetricThreshold": "tma_info_bad_spec_ipmisp_indirect < 1e3"
},
{
- "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
- "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
- "MetricGroup": "Pipeline",
- "MetricName": "tma_info_clks"
+ "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear) (lower number means higher occurrence rate)",
+ "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;BadSpec;BrMispredicts",
+ "MetricName": "tma_info_bad_spec_ipmispredict",
+ "MetricThreshold": "tma_info_bad_spec_ipmispredict < 200"
},
{
"BriefDescription": "Core actual clocks when any Logical Processor is active on the Physical Core",
- "MetricExpr": "(CPU_CLK_UNHALTED.THREAD / 2 * (1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK) if #core_wide < 1 else (CPU_CLK_UNHALTED.THREAD_ANY / 2 if #SMT_on else tma_info_clks))",
+ "MetricExpr": "(CPU_CLK_UNHALTED.THREAD / 2 * (1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK) if #core_wide < 1 else (CPU_CLK_UNHALTED.THREAD_ANY / 2 if #SMT_on else tma_info_thread_clks))",
"MetricGroup": "SMT",
- "MetricName": "tma_info_core_clks"
+ "MetricName": "tma_info_core_core_clks"
},
{
"BriefDescription": "Instructions Per Cycle across hyper-threads (per physical core)",
- "MetricExpr": "INST_RETIRED.ANY / tma_info_core_clks",
+ "MetricExpr": "INST_RETIRED.ANY / tma_info_core_core_clks",
"MetricGroup": "Ret;SMT;TmaL1;tma_L1_group",
- "MetricName": "tma_info_coreipc"
+ "MetricName": "tma_info_core_coreipc"
},
{
- "BriefDescription": "Cycles Per Instruction (per Logical Processor)",
- "MetricExpr": "1 / tma_info_ipc",
- "MetricGroup": "Mem;Pipeline",
- "MetricName": "tma_info_cpi"
- },
- {
- "BriefDescription": "Average CPU Utilization",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
- "MetricGroup": "HPC;Summary",
- "MetricName": "tma_info_cpu_utilization"
- },
- {
- "BriefDescription": "Average Parallel L2 cache miss data reads",
- "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
- "MetricGroup": "Memory_BW;Offcore",
- "MetricName": "tma_info_data_l2_mlp"
- },
- {
- "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
- "MetricExpr": "64 * (UNC_M_CAS_COUNT.RD + UNC_M_CAS_COUNT.WR) / 1e9 / duration_time",
- "MetricGroup": "HPC;Mem;MemoryBW;SoC;tma_issueBW",
- "MetricName": "tma_info_dram_bw_use",
- "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_fb_full, tma_mem_bandwidth, tma_sq_full"
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
+ "MetricExpr": "(UOPS_EXECUTED.CORE / 2 / (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@) if #SMT_on else UOPS_EXECUTED.CORE / (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@))",
+ "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
+ "MetricName": "tma_info_core_ilp"
},
{
"BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
"MetricExpr": "IDQ.DSB_UOPS / (IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)",
"MetricGroup": "DSB;Fed;FetchBW;tma_issueFB",
- "MetricName": "tma_info_dsb_coverage",
- "MetricThreshold": "tma_info_dsb_coverage < 0.7 & tma_info_ipc / 4 > 0.35",
- "PublicDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache). Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_iptb, tma_lcp"
+ "MetricName": "tma_info_frontend_dsb_coverage",
+ "MetricThreshold": "tma_info_frontend_dsb_coverage < 0.7 & tma_info_thread_ipc / 4 > 0.35",
+ "PublicDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache). Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_inst_mix_iptb, tma_lcp"
},
{
- "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
- "MetricExpr": "(UOPS_EXECUTED.CORE / 2 / (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@) if #SMT_on else UOPS_EXECUTED.CORE / (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@))",
- "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
- "MetricName": "tma_info_ilp"
+ "BriefDescription": "Instructions per speculative Unknown Branch Misprediction (BAClear) (lower number means higher occurrence rate)",
+ "MetricExpr": "tma_info_inst_mix_instructions / BACLEARS.ANY",
+ "MetricGroup": "Fed",
+ "MetricName": "tma_info_frontend_ipunknown_branch"
+ },
+ {
+ "BriefDescription": "Branch instructions per taken branch.",
+ "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
+ "MetricGroup": "Branches;Fed;PGO",
+ "MetricName": "tma_info_inst_mix_bptkbranch"
},
{
"BriefDescription": "Total number of retired Instructions",
"MetricExpr": "INST_RETIRED.ANY",
"MetricGroup": "Summary;TmaL1;tma_L1_group",
- "MetricName": "tma_info_instructions",
+ "MetricName": "tma_info_inst_mix_instructions",
"PublicDescription": "Total number of retired Instructions. Sample with: INST_RETIRED.PREC_DIST"
},
{
"BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
"MetricGroup": "Branches;Fed;InsType",
- "MetricName": "tma_info_ipbranch",
- "MetricThreshold": "tma_info_ipbranch < 8"
- },
- {
- "BriefDescription": "Instructions Per Cycle (per Logical Processor)",
- "MetricExpr": "INST_RETIRED.ANY / tma_info_clks",
- "MetricGroup": "Ret;Summary",
- "MetricName": "tma_info_ipc"
+ "MetricName": "tma_info_inst_mix_ipbranch",
+ "MetricThreshold": "tma_info_inst_mix_ipbranch < 8"
},
{
"BriefDescription": "Instructions per (near) call (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL",
"MetricGroup": "Branches;Fed;PGO",
- "MetricName": "tma_info_ipcall",
- "MetricThreshold": "tma_info_ipcall < 200"
- },
- {
- "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
- "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u",
- "MetricGroup": "Branches;OS",
- "MetricName": "tma_info_ipfarbranch",
- "MetricThreshold": "tma_info_ipfarbranch < 1e6"
+ "MetricName": "tma_info_inst_mix_ipcall",
+ "MetricThreshold": "tma_info_inst_mix_ipcall < 200"
},
{
"BriefDescription": "Instructions per Load (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_LOADS",
"MetricGroup": "InsType",
- "MetricName": "tma_info_ipload",
- "MetricThreshold": "tma_info_ipload < 3"
- },
- {
- "BriefDescription": "Instructions per retired mispredicts for indirect CALL or JMP branches (lower number means higher occurrence rate).",
- "MetricExpr": "tma_info_instructions / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * cpu@BR_MISP_EXEC.ALL_BRANCHES\\,umask\\=0xE4@)",
- "MetricGroup": "Bad;BrMispredicts",
- "MetricName": "tma_info_ipmisp_indirect",
- "MetricThreshold": "tma_info_ipmisp_indirect < 1e3"
- },
- {
- "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear) (lower number means higher occurrence rate)",
- "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
- "MetricGroup": "Bad;BadSpec;BrMispredicts",
- "MetricName": "tma_info_ipmispredict",
- "MetricThreshold": "tma_info_ipmispredict < 200"
+ "MetricName": "tma_info_inst_mix_ipload",
+ "MetricThreshold": "tma_info_inst_mix_ipload < 3"
},
{
"BriefDescription": "Instructions per Store (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_STORES",
"MetricGroup": "InsType",
- "MetricName": "tma_info_ipstore",
- "MetricThreshold": "tma_info_ipstore < 8"
+ "MetricName": "tma_info_inst_mix_ipstore",
+ "MetricThreshold": "tma_info_inst_mix_ipstore < 8"
},
{
"BriefDescription": "Instruction per taken branch",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN",
"MetricGroup": "Branches;Fed;FetchBW;Frontend;PGO;tma_issueFB",
- "MetricName": "tma_info_iptb",
- "MetricThreshold": "tma_info_iptb < 9",
- "PublicDescription": "Instruction per taken branch. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_dsb_coverage, tma_lcp"
- },
- {
- "BriefDescription": "Instructions per speculative Unknown Branch Misprediction (BAClear) (lower number means higher occurrence rate)",
- "MetricExpr": "tma_info_instructions / BACLEARS.ANY",
- "MetricGroup": "Fed",
- "MetricName": "tma_info_ipunknown_branch"
- },
- {
- "BriefDescription": "Cycles Per Instruction for the Operating System (OS) Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / INST_RETIRED.ANY_P:k",
- "MetricGroup": "OS",
- "MetricName": "tma_info_kernel_cpi"
- },
- {
- "BriefDescription": "Fraction of cycles spent in the Operating System (OS) Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / CPU_CLK_UNHALTED.THREAD",
- "MetricGroup": "OS",
- "MetricName": "tma_info_kernel_utilization",
- "MetricThreshold": "tma_info_kernel_utilization > 0.05"
+ "MetricName": "tma_info_inst_mix_iptb",
+ "MetricThreshold": "tma_info_inst_mix_iptb < 9",
+ "PublicDescription": "Instruction per taken branch. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_frontend_dsb_coverage, tma_lcp"
},
{
"BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
"MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / duration_time",
"MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l1d_cache_fill_bw"
- },
- {
- "BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
- "MetricExpr": "tma_info_l1d_cache_fill_bw",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l1d_cache_fill_bw_1t"
- },
- {
- "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
- "MetricExpr": "1e3 * MEM_LOAD_UOPS_RETIRED.L1_MISS / INST_RETIRED.ANY",
- "MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l1mpki"
+ "MetricName": "tma_info_memory_core_l1d_cache_fill_bw"
},
{
"BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
"MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / duration_time",
"MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l2_cache_fill_bw"
+ "MetricName": "tma_info_memory_core_l2_cache_fill_bw"
},
{
- "BriefDescription": "Average per-thread data fill bandwidth to the L2 cache [GB / sec]",
- "MetricExpr": "tma_info_l2_cache_fill_bw",
+ "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
"MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l2_cache_fill_bw_1t"
+ "MetricName": "tma_info_memory_core_l3_cache_fill_bw"
+ },
+ {
+ "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
+ "MetricExpr": "1e3 * MEM_LOAD_UOPS_RETIRED.L1_MISS / INST_RETIRED.ANY",
+ "MetricGroup": "CacheMisses;Mem",
+ "MetricName": "tma_info_memory_l1mpki"
},
{
"BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads",
"MetricExpr": "1e3 * MEM_LOAD_UOPS_RETIRED.L2_MISS / INST_RETIRED.ANY",
"MetricGroup": "Backend;CacheMisses;Mem",
- "MetricName": "tma_info_l2mpki"
+ "MetricName": "tma_info_memory_l2mpki"
},
{
- "BriefDescription": "Average per-thread data access bandwidth to the L3 cache [GB / sec]",
- "MetricExpr": "0",
- "MetricGroup": "Mem;MemoryBW;Offcore",
- "MetricName": "tma_info_l3_cache_access_bw_1t"
+ "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
+ "MetricExpr": "1e3 * MEM_LOAD_UOPS_RETIRED.L3_MISS / INST_RETIRED.ANY",
+ "MetricGroup": "CacheMisses;Mem",
+ "MetricName": "tma_info_memory_l3mpki"
},
{
- "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
- "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l3_cache_fill_bw"
+ "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / (MEM_LOAD_UOPS_RETIRED.L1_MISS + MEM_LOAD_UOPS_RETIRED.HIT_LFB)",
+ "MetricGroup": "Mem;MemoryBound;MemoryLat",
+ "MetricName": "tma_info_memory_load_miss_real_latency"
},
{
- "BriefDescription": "Average per-thread data fill bandwidth to the L3 cache [GB / sec]",
- "MetricExpr": "tma_info_l3_cache_fill_bw",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l3_cache_fill_bw_1t"
+ "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
+ "MetricGroup": "Mem;MemoryBW;MemoryBound",
+ "MetricName": "tma_info_memory_mlp",
+ "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)"
},
{
- "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
- "MetricExpr": "1e3 * MEM_LOAD_UOPS_RETIRED.L3_MISS / INST_RETIRED.ANY",
- "MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l3mpki"
+ "BriefDescription": "Average Parallel L2 cache miss data reads",
+ "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+ "MetricGroup": "Memory_BW;Offcore",
+ "MetricName": "tma_info_memory_oro_data_l2_mlp"
},
{
"BriefDescription": "Average Latency for L2 cache miss demand Loads",
"MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS.DEMAND_DATA_RD",
"MetricGroup": "Memory_Lat;Offcore",
- "MetricName": "tma_info_load_l2_miss_latency"
+ "MetricName": "tma_info_memory_oro_load_l2_miss_latency"
},
{
"BriefDescription": "Average Parallel L2 cache miss demand Loads",
"MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
"MetricGroup": "Memory_BW;Offcore",
- "MetricName": "tma_info_load_l2_mlp"
+ "MetricName": "tma_info_memory_oro_load_l2_mlp"
},
{
- "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
- "MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "L1D_PEND_MISS.PENDING / (MEM_LOAD_UOPS_RETIRED.L1_MISS + MEM_LOAD_UOPS_RETIRED.HIT_LFB)",
- "MetricGroup": "Mem;MemoryBound;MemoryLat",
- "MetricName": "tma_info_load_miss_real_latency"
+ "BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_core_l1d_cache_fill_bw",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_thread_l1d_cache_fill_bw_1t"
},
{
- "BriefDescription": "Average number of parallel data read requests to external memory",
- "MetricExpr": "UNC_C_TOR_OCCUPANCY.MISS_OPCODE@filter_opc\\=0x182@ / UNC_C_TOR_OCCUPANCY.MISS_OPCODE@filter_opc\\=0x182\\,thresh\\=1@",
- "MetricGroup": "Mem;MemoryBW;SoC",
- "MetricName": "tma_info_mem_parallel_reads",
- "PublicDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches"
+ "BriefDescription": "Average per-thread data fill bandwidth to the L2 cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_core_l2_cache_fill_bw",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_thread_l2_cache_fill_bw_1t"
},
{
- "BriefDescription": "Average latency of data read request to external memory (in nanoseconds)",
- "MetricExpr": "1e9 * (UNC_C_TOR_OCCUPANCY.MISS_OPCODE@filter_opc\\=0x182@ / UNC_C_TOR_INSERTS.MISS_OPCODE@filter_opc\\=0x182@) / (tma_info_socket_clks / duration_time)",
- "MetricGroup": "Mem;MemoryLat;SoC",
- "MetricName": "tma_info_mem_read_latency",
- "PublicDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches. ([RKL+]memory-controller only)"
+ "BriefDescription": "Average per-thread data access bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "0",
+ "MetricGroup": "Mem;MemoryBW;Offcore",
+ "MetricName": "tma_info_memory_thread_l3_cache_access_bw_1t"
},
{
- "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
- "MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
- "MetricGroup": "Mem;MemoryBW;MemoryBound",
- "MetricName": "tma_info_mlp",
- "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)"
+ "BriefDescription": "Average per-thread data fill bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_core_l3_cache_fill_bw",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_thread_l3_cache_fill_bw_1t"
},
{
"BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
- "MetricExpr": "(ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION) / tma_info_core_clks",
+ "MetricExpr": "(ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION) / tma_info_core_core_clks",
"MetricGroup": "Mem;MemoryTLB",
- "MetricName": "tma_info_page_walks_utilization",
- "MetricThreshold": "tma_info_page_walks_utilization > 0.5"
+ "MetricName": "tma_info_memory_tlb_page_walks_utilization",
+ "MetricThreshold": "tma_info_memory_tlb_page_walks_utilization > 0.5"
},
{
"BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
"MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / cpu@UOPS_RETIRED.RETIRE_SLOTS\\,cmask\\=1@",
"MetricGroup": "Pipeline;Ret",
- "MetricName": "tma_info_retire"
+ "MetricName": "tma_info_pipeline_retire"
},
{
- "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
- "MetricExpr": "4 * tma_info_core_clks",
- "MetricGroup": "TmaL1;tma_L1_group",
- "MetricName": "tma_info_slots"
+ "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
+ "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / duration_time",
+ "MetricGroup": "Power;Summary",
+ "MetricName": "tma_info_system_average_frequency"
+ },
+ {
+ "BriefDescription": "Average CPU Utilization",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricGroup": "HPC;Summary",
+ "MetricName": "tma_info_system_cpu_utilization"
+ },
+ {
+ "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
+ "MetricExpr": "64 * (UNC_M_CAS_COUNT.RD + UNC_M_CAS_COUNT.WR) / 1e9 / duration_time",
+ "MetricGroup": "HPC;Mem;MemoryBW;SoC;tma_issueBW",
+ "MetricName": "tma_info_system_dram_bw_use",
+ "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_fb_full, tma_mem_bandwidth, tma_sq_full"
+ },
+ {
+ "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
+ "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u",
+ "MetricGroup": "Branches;OS",
+ "MetricName": "tma_info_system_ipfarbranch",
+ "MetricThreshold": "tma_info_system_ipfarbranch < 1e6"
+ },
+ {
+ "BriefDescription": "Cycles Per Instruction for the Operating System (OS) Kernel mode",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / INST_RETIRED.ANY_P:k",
+ "MetricGroup": "OS",
+ "MetricName": "tma_info_system_kernel_cpi"
+ },
+ {
+ "BriefDescription": "Fraction of cycles spent in the Operating System (OS) Kernel mode",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "OS",
+ "MetricName": "tma_info_system_kernel_utilization",
+ "MetricThreshold": "tma_info_system_kernel_utilization > 0.05"
+ },
+ {
+ "BriefDescription": "Average number of parallel data read requests to external memory",
+ "MetricExpr": "UNC_C_TOR_OCCUPANCY.MISS_OPCODE@filter_opc\\=0x182@ / UNC_C_TOR_OCCUPANCY.MISS_OPCODE@filter_opc\\=0x182\\,thresh\\=1@",
+ "MetricGroup": "Mem;MemoryBW;SoC",
+ "MetricName": "tma_info_system_mem_parallel_reads",
+ "PublicDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches"
+ },
+ {
+ "BriefDescription": "Average latency of data read request to external memory (in nanoseconds)",
+ "MetricExpr": "1e9 * (UNC_C_TOR_OCCUPANCY.MISS_OPCODE@filter_opc\\=0x182@ / UNC_C_TOR_INSERTS.MISS_OPCODE@filter_opc\\=0x182@) / (tma_info_system_socket_clks / duration_time)",
+ "MetricGroup": "Mem;MemoryLat;SoC",
+ "MetricName": "tma_info_system_mem_read_latency",
+ "PublicDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches. ([RKL+]memory-controller only)"
},
{
"BriefDescription": "Fraction of cycles where both hardware Logical Processors were active",
"MetricExpr": "(1 - CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / (CPU_CLK_UNHALTED.REF_XCLK_ANY / 2) if #SMT_on else 0)",
"MetricGroup": "SMT",
- "MetricName": "tma_info_smt_2t_utilization"
+ "MetricName": "tma_info_system_smt_2t_utilization"
},
{
"BriefDescription": "Socket actual clocks when any core is active on that socket",
"MetricExpr": "cbox_0@event\\=0x0@",
"MetricGroup": "SoC",
- "MetricName": "tma_info_socket_clks"
+ "MetricName": "tma_info_system_socket_clks"
},
{
"BriefDescription": "Average Frequency Utilization relative nominal frequency",
- "MetricExpr": "tma_info_clks / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricExpr": "tma_info_thread_clks / CPU_CLK_UNHALTED.REF_TSC",
"MetricGroup": "Power",
- "MetricName": "tma_info_turbo_utilization"
+ "MetricName": "tma_info_system_turbo_utilization"
+ },
+ {
+ "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "Pipeline",
+ "MetricName": "tma_info_thread_clks"
+ },
+ {
+ "BriefDescription": "Cycles Per Instruction (per Logical Processor)",
+ "MetricExpr": "1 / tma_info_thread_ipc",
+ "MetricGroup": "Mem;Pipeline",
+ "MetricName": "tma_info_thread_cpi"
+ },
+ {
+ "BriefDescription": "Instructions Per Cycle (per Logical Processor)",
+ "MetricExpr": "INST_RETIRED.ANY / tma_info_thread_clks",
+ "MetricGroup": "Ret;Summary",
+ "MetricName": "tma_info_thread_ipc"
+ },
+ {
+ "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
+ "MetricExpr": "4 * tma_info_core_core_clks",
+ "MetricGroup": "TmaL1;tma_L1_group",
+ "MetricName": "tma_info_thread_slots"
},
{
"BriefDescription": "Uops Per Instruction",
"MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
"MetricGroup": "Pipeline;Ret;Retire",
- "MetricName": "tma_info_uoppi",
- "MetricThreshold": "tma_info_uoppi > 1.05"
+ "MetricName": "tma_info_thread_uoppi",
+ "MetricThreshold": "tma_info_thread_uoppi > 1.05"
},
{
"BriefDescription": "Instruction per taken branch",
"MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / BR_INST_RETIRED.NEAR_TAKEN",
"MetricGroup": "Branches;Fed;FetchBW",
- "MetricName": "tma_info_uptb",
- "MetricThreshold": "tma_info_uptb < 6"
+ "MetricName": "tma_info_thread_uptb",
+ "MetricThreshold": "tma_info_thread_uptb < 6"
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses",
- "MetricExpr": "(14 * ITLB_MISSES.STLB_HIT + ITLB_MISSES.WALK_DURATION) / tma_info_clks",
+ "MetricExpr": "(14 * ITLB_MISSES.STLB_HIT + ITLB_MISSES.WALK_DURATION) / tma_info_thread_clks",
"MetricGroup": "BigFoot;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_itlb_misses",
"MetricThreshold": "tma_itlb_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -621,7 +823,7 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled without loads missing the L1 data cache",
- "MetricExpr": "max((min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.STALLS_LDM_PENDING) - CYCLE_ACTIVITY.STALLS_L1D_PENDING) / tma_info_clks, 0)",
+ "MetricExpr": "max((min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.STALLS_LDM_PENDING) - CYCLE_ACTIVITY.STALLS_L1D_PENDING) / tma_info_thread_clks, 0)",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_issueL1;tma_issueMC;tma_memory_bound_group",
"MetricName": "tma_l1_bound",
"MetricThreshold": "tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -630,7 +832,7 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads",
- "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L1D_PENDING - CYCLE_ACTIVITY.STALLS_L2_PENDING) / tma_info_clks",
+ "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L1D_PENDING - CYCLE_ACTIVITY.STALLS_L2_PENDING) / tma_info_thread_clks",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l2_bound",
"MetricThreshold": "tma_l2_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -640,7 +842,7 @@
{
"BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
"MetricConstraint": "NO_GROUP_EVENTS_SMT",
- "MetricExpr": "MEM_LOAD_UOPS_RETIRED.L3_HIT / (MEM_LOAD_UOPS_RETIRED.L3_HIT + 7 * MEM_LOAD_UOPS_RETIRED.L3_MISS) * CYCLE_ACTIVITY.STALLS_L2_PENDING / tma_info_clks",
+ "MetricExpr": "MEM_LOAD_UOPS_RETIRED.L3_HIT / (MEM_LOAD_UOPS_RETIRED.L3_HIT + 7 * MEM_LOAD_UOPS_RETIRED.L3_MISS) * CYCLE_ACTIVITY.STALLS_L2_PENDING / tma_info_thread_clks",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l3_bound",
"MetricThreshold": "tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -650,7 +852,7 @@
{
"BriefDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "41 * (MEM_LOAD_UOPS_RETIRED.L3_HIT * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD))) / tma_info_clks",
+ "MetricExpr": "41 * (MEM_LOAD_UOPS_RETIRED.L3_HIT * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD))) / tma_info_thread_clks",
"MetricGroup": "MemoryLat;TopdownL4;tma_L4_group;tma_issueLat;tma_l3_bound_group",
"MetricName": "tma_l3_hit_latency",
"MetricThreshold": "tma_l3_hit_latency > 0.1 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -659,11 +861,11 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs)",
- "MetricExpr": "ILD_STALL.LCP / tma_info_clks",
+ "MetricExpr": "ILD_STALL.LCP / tma_info_thread_clks",
"MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB",
"MetricName": "tma_lcp",
"MetricThreshold": "tma_lcp > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
- "PublicDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs). Using proper compiler flags or Intel Compiler by default will certainly avoid this. #Link: Optimization Guide about LCP BKMs. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_dsb_coverage, tma_info_iptb",
+ "PublicDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs). Using proper compiler flags or Intel Compiler by default will certainly avoid this. #Link: Optimization Guide about LCP BKMs. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb",
"ScaleUnit": "100%"
},
{
@@ -679,7 +881,7 @@
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Load operations",
"MetricConstraint": "NO_GROUP_EVENTS_NMI",
- "MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_2 + UOPS_DISPATCHED_PORT.PORT_3 + UOPS_DISPATCHED_PORT.PORT_7 - UOPS_DISPATCHED_PORT.PORT_4) / (2 * tma_info_core_clks)",
+ "MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_2 + UOPS_DISPATCHED_PORT.PORT_3 + UOPS_DISPATCHED_PORT.PORT_7 - UOPS_DISPATCHED_PORT.PORT_4) / (2 * tma_info_core_core_clks)",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_load_op_utilization",
"MetricThreshold": "tma_load_op_utilization > 0.6",
@@ -689,7 +891,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from local memory",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "200 * (MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD))) / tma_info_clks",
+ "MetricExpr": "200 * (MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD))) / tma_info_thread_clks",
"MetricGroup": "Server;TopdownL5;tma_L5_group;tma_mem_latency_group",
"MetricName": "tma_local_dram",
"MetricThreshold": "tma_local_dram > 0.1 & (tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
@@ -699,7 +901,7 @@
{
"BriefDescription": "This metric represents fraction of cycles the CPU spent handling cache misses due to lock operations",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "MEM_UOPS_RETIRED.LOCK_LOADS / MEM_UOPS_RETIRED.ALL_STORES * min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO) / tma_info_clks",
+ "MetricExpr": "MEM_UOPS_RETIRED.LOCK_LOADS / MEM_UOPS_RETIRED.ALL_STORES * min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO) / tma_info_thread_clks",
"MetricGroup": "Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_l1_bound_group",
"MetricName": "tma_lock_latency",
"MetricThreshold": "tma_lock_latency > 0.2 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -719,16 +921,16 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM)",
- "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=6@) / tma_info_clks",
+ "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=6@) / tma_info_thread_clks",
"MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
"MetricName": "tma_mem_bandwidth",
"MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_dram_bw_use, tma_sq_full",
+ "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM)",
- "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD) / tma_info_clks - tma_mem_bandwidth",
+ "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD) / tma_info_thread_clks - tma_mem_bandwidth",
"MetricGroup": "MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
"MetricName": "tma_mem_latency",
"MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -738,7 +940,7 @@
{
"BriefDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "((min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.STALLS_LDM_PENDING) + RESOURCE_STALLS.SB) / (min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) + (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - (cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ if tma_info_ipc > 1.8 else cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@)) / 2 - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB) if #SMT_on else min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) + cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - (cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ if tma_info_ipc > 1.8 else cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB) * tma_backend_bound",
+ "MetricExpr": "((min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.STALLS_LDM_PENDING) + RESOURCE_STALLS.SB) / (min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) + (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - (cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ if tma_info_thread_ipc > 1.8 else cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@)) / 2 - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB) if #SMT_on else min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) + cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - (cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ if tma_info_thread_ipc > 1.8 else cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB) * tma_backend_bound",
"MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_memory_bound",
"MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2",
@@ -748,7 +950,7 @@
},
{
"BriefDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit",
- "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * IDQ.MS_UOPS / tma_info_slots",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * IDQ.MS_UOPS / tma_info_thread_slots",
"MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_heavy_operations_group;tma_issueMC;tma_issueMS",
"MetricName": "tma_microcode_sequencer",
"MetricThreshold": "tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1",
@@ -757,16 +959,16 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline)",
- "MetricExpr": "(IDQ.ALL_MITE_CYCLES_ANY_UOPS - IDQ.ALL_MITE_CYCLES_4_UOPS) / tma_info_core_clks / 2",
+ "MetricExpr": "(IDQ.ALL_MITE_CYCLES_ANY_UOPS - IDQ.ALL_MITE_CYCLES_4_UOPS) / tma_info_core_core_clks / 2",
"MetricGroup": "DSBmiss;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_mite",
- "MetricThreshold": "tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35)",
+ "MetricThreshold": "tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35)",
"PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline). This pipeline is used for code that was not pre-cached in the DSB or LSD. For example; inefficiencies due to asymmetric decoders; use of long immediate or LCP can manifest as MITE fetch bandwidth bottleneck.",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric estimates the fraction of cycles when the CPU was stalled due to switches of uop delivery to the Microcode Sequencer (MS)",
- "MetricExpr": "2 * IDQ.MS_SWITCHES / tma_info_clks",
+ "MetricExpr": "2 * IDQ.MS_SWITCHES / tma_info_thread_clks",
"MetricGroup": "FetchLat;MicroSeq;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueMC;tma_issueMS;tma_issueMV;tma_issueSO",
"MetricName": "tma_ms_switches",
"MetricThreshold": "tma_ms_switches > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -775,7 +977,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 0 ([SNB+] ALU; [HSW+] ALU and 2nd branch)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_0 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_0 / tma_info_core_core_clks",
"MetricGroup": "Compute;TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_0",
"MetricThreshold": "tma_port_0 > 0.6",
@@ -784,7 +986,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 1 (ALU)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_1 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_1 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_1",
"MetricThreshold": "tma_port_1 > 0.6",
@@ -793,7 +995,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 2 ([SNB+]Loads and Store-address; [ICL+] Loads)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_2 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_2 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_load_op_utilization_group",
"MetricName": "tma_port_2",
"MetricThreshold": "tma_port_2 > 0.6",
@@ -802,7 +1004,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 3 ([SNB+]Loads and Store-address; [ICL+] Loads)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_3 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_3 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_load_op_utilization_group",
"MetricName": "tma_port_3",
"MetricThreshold": "tma_port_3 > 0.6",
@@ -820,7 +1022,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 5 ([SNB+] Branches and ALU; [HSW+] ALU)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_5 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_5 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_5",
"MetricThreshold": "tma_port_5 > 0.6",
@@ -829,7 +1031,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+]Primary Branch and simple ALU)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_6 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_6 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_6",
"MetricThreshold": "tma_port_6 > 0.6",
@@ -838,7 +1040,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 7 ([HSW+]simple Store-address)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_7 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_7 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_store_op_utilization_group",
"MetricName": "tma_port_7",
"MetricThreshold": "tma_port_7 > 0.6",
@@ -848,7 +1050,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles the CPU performance was potentially limited due to Core computation issues (non divider-related)",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) + (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - (cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ if tma_info_ipc > 1.8 else cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@)) / 2 - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB if #SMT_on else min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) + cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - (cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ if tma_info_ipc > 1.8 else cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB - RESOURCE_STALLS.SB - min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.STALLS_LDM_PENDING)) / tma_info_clks",
+ "MetricExpr": "(min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) + (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - (cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ if tma_info_thread_ipc > 1.8 else cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@)) / 2 - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB if #SMT_on else min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) + cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - (cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ if tma_info_thread_ipc > 1.8 else cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB - RESOURCE_STALLS.SB - min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.STALLS_LDM_PENDING)) / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group",
"MetricName": "tma_ports_utilization",
"MetricThreshold": "tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
@@ -857,7 +1059,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed no uops on any execution port (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,inv\\,cmask\\=1@ / 2 if #SMT_on else (min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0)) / tma_info_core_clks)",
+ "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,inv\\,cmask\\=1@ / 2 if #SMT_on else (min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0)) / tma_info_core_core_clks)",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_0",
"MetricThreshold": "tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -866,7 +1068,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where the CPU executed total of 1 uop per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) / 2 if #SMT_on else (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) / tma_info_core_clks)",
+ "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) / 2 if #SMT_on else (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) / tma_info_core_core_clks)",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issueL1;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_1",
"MetricThreshold": "tma_ports_utilized_1 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -875,7 +1077,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / 2 if #SMT_on else (cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / tma_info_core_clks)",
+ "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / 2 if #SMT_on else (cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / tma_info_core_core_clks)",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_2",
"MetricThreshold": "tma_ports_utilized_2 > 0.15 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -884,7 +1086,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 3 or more uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise).",
- "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ / 2 if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / tma_info_core_clks",
+ "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ / 2 if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_3m",
"MetricThreshold": "tma_ports_utilized_3m > 0.7 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -893,7 +1095,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from remote cache in other sockets including synchronizations issues",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(200 * (MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD))) + 180 * (MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD)))) / tma_info_clks",
+ "MetricExpr": "(200 * (MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD))) + 180 * (MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD)))) / tma_info_thread_clks",
"MetricGroup": "Offcore;Server;Snoop;TopdownL5;tma_L5_group;tma_issueSyncxn;tma_mem_latency_group",
"MetricName": "tma_remote_cache",
"MetricThreshold": "tma_remote_cache > 0.05 & (tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
@@ -903,7 +1105,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from remote memory",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "310 * (MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD))) / tma_info_clks",
+ "MetricExpr": "310 * (MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD))) / tma_info_thread_clks",
"MetricGroup": "Server;Snoop;TopdownL5;tma_L5_group;tma_mem_latency_group",
"MetricName": "tma_remote_dram",
"MetricThreshold": "tma_remote_dram > 0.1 & (tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
@@ -912,7 +1114,7 @@
},
{
"BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
- "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / tma_info_slots",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / tma_info_thread_slots",
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1",
@@ -923,7 +1125,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles handling memory load split accesses - load that cross 64-byte cache line boundary",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "tma_info_load_miss_real_latency * LD_BLOCKS.NO_SR / tma_info_clks",
+ "MetricExpr": "tma_info_memory_load_miss_real_latency * LD_BLOCKS.NO_SR / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_split_loads",
"MetricThreshold": "tma_split_loads > 0.2 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -932,7 +1134,7 @@
},
{
"BriefDescription": "This metric represents rate of split store accesses",
- "MetricExpr": "2 * MEM_UOPS_RETIRED.SPLIT_STORES / tma_info_core_clks",
+ "MetricExpr": "2 * MEM_UOPS_RETIRED.SPLIT_STORES / tma_info_core_core_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group",
"MetricName": "tma_split_stores",
"MetricThreshold": "tma_split_stores > 0.2 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -941,16 +1143,16 @@
},
{
"BriefDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors)",
- "MetricExpr": "(OFFCORE_REQUESTS_BUFFER.SQ_FULL / 2 if #SMT_on else OFFCORE_REQUESTS_BUFFER.SQ_FULL) / tma_info_core_clks",
+ "MetricExpr": "(OFFCORE_REQUESTS_BUFFER.SQ_FULL / 2 if #SMT_on else OFFCORE_REQUESTS_BUFFER.SQ_FULL) / tma_info_core_core_clks",
"MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueBW;tma_l3_bound_group",
"MetricName": "tma_sq_full",
"MetricThreshold": "tma_sq_full > 0.3 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_fb_full, tma_info_dram_bw_use, tma_mem_bandwidth",
+ "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric estimates how often CPU was stalled due to RFO store memory accesses; RFO store issue a read-for-ownership request before the write",
- "MetricExpr": "RESOURCE_STALLS.SB / tma_info_clks",
+ "MetricExpr": "RESOURCE_STALLS.SB / tma_info_thread_clks",
"MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_store_bound",
"MetricThreshold": "tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -959,7 +1161,7 @@
},
{
"BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores",
- "MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_clks",
+ "MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_store_fwd_blk",
"MetricThreshold": "tma_store_fwd_blk > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -969,7 +1171,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles the CPU spent handling L1D store misses",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(L2_RQSTS.RFO_HIT * 9 * (1 - MEM_UOPS_RETIRED.LOCK_LOADS / MEM_UOPS_RETIRED.ALL_STORES) + (1 - MEM_UOPS_RETIRED.LOCK_LOADS / MEM_UOPS_RETIRED.ALL_STORES) * min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO)) / tma_info_clks",
+ "MetricExpr": "(L2_RQSTS.RFO_HIT * 9 * (1 - MEM_UOPS_RETIRED.LOCK_LOADS / MEM_UOPS_RETIRED.ALL_STORES) + (1 - MEM_UOPS_RETIRED.LOCK_LOADS / MEM_UOPS_RETIRED.ALL_STORES) * min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO)) / tma_info_thread_clks",
"MetricGroup": "MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_issueSL;tma_store_bound_group",
"MetricName": "tma_store_latency",
"MetricThreshold": "tma_store_latency > 0.1 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -978,7 +1180,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Store operations",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_4 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_4 / tma_info_core_core_clks",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_store_op_utilization",
"MetricThreshold": "tma_store_op_utilization > 0.6",
@@ -986,11 +1188,17 @@
},
{
"BriefDescription": "This metric serves as an approximation of legacy x87 usage",
- "MetricExpr": "INST_RETIRED.X87 * tma_info_uoppi / UOPS_RETIRED.RETIRE_SLOTS",
+ "MetricExpr": "INST_RETIRED.X87 * tma_info_thread_uoppi / UOPS_RETIRED.RETIRE_SLOTS",
"MetricGroup": "Compute;TopdownL4;tma_L4_group;tma_fp_arith_group",
"MetricName": "tma_x87_use",
"MetricThreshold": "tma_x87_use > 0.1",
"PublicDescription": "This metric serves as an approximation of legacy x87 usage. It accounts for instructions beyond X87 FP arithmetic operations; hence may be used as a thermometer to avoid X87 high usage and preferably upgrade to modern ISA. See Tip under Tuning Hint.",
"ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Uncore operating frequency in GHz",
+ "MetricExpr": "UNC_C_CLOCKTICKS / (#num_cores / #num_packages * #num_packages) / 1e9 / duration_time",
+ "MetricName": "uncore_frequency",
+ "ScaleUnit": "1GHz"
}
]
diff --git a/tools/perf/pmu-events/arch/x86/haswellx/metricgroups.json b/tools/perf/pmu-events/arch/x86/haswellx/metricgroups.json
new file mode 100644
index 000000000000..f6a0258e3241
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/haswellx/metricgroups.json
@@ -0,0 +1,107 @@
+{
+ "Backend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Bad": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BadSpec": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BigFoot": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BrMispredicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Branches": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "CacheMisses": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Compute": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Cor": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "DSB": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "DSBmiss": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "DataSharing": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Fed": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FetchBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FetchLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Flops": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FpScalar": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FpVector": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Frontend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "HPC": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "IcMiss": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "InsType": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "L2Evicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "LSD": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MachineClears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryBound": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryTLB": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Memory_BW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Memory_Lat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MicroSeq": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "OS": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Offcore": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "PGO": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Pipeline": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "PortsUtil": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Power": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Ret": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Retire": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "SMT": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Server": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Snoop": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "SoC": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Summary": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TmaL1": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TmaL2": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TmaL3mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TopdownL1": "Metrics for top-down breakdown at level 1",
+ "TopdownL2": "Metrics for top-down breakdown at level 2",
+ "TopdownL3": "Metrics for top-down breakdown at level 3",
+ "TopdownL4": "Metrics for top-down breakdown at level 4",
+ "TopdownL5": "Metrics for top-down breakdown at level 5",
+ "TopdownL6": "Metrics for top-down breakdown at level 6",
+ "tma_L1_group": "Metrics for top-down breakdown at level 1",
+ "tma_L2_group": "Metrics for top-down breakdown at level 2",
+ "tma_L3_group": "Metrics for top-down breakdown at level 3",
+ "tma_L4_group": "Metrics for top-down breakdown at level 4",
+ "tma_L5_group": "Metrics for top-down breakdown at level 5",
+ "tma_L6_group": "Metrics for top-down breakdown at level 6",
+ "tma_alu_op_utilization_group": "Metrics contributing to tma_alu_op_utilization category",
+ "tma_backend_bound_group": "Metrics contributing to tma_backend_bound category",
+ "tma_bad_speculation_group": "Metrics contributing to tma_bad_speculation category",
+ "tma_branch_resteers_group": "Metrics contributing to tma_branch_resteers category",
+ "tma_core_bound_group": "Metrics contributing to tma_core_bound category",
+ "tma_dram_bound_group": "Metrics contributing to tma_dram_bound category",
+ "tma_dtlb_load_group": "Metrics contributing to tma_dtlb_load category",
+ "tma_dtlb_store_group": "Metrics contributing to tma_dtlb_store category",
+ "tma_fetch_bandwidth_group": "Metrics contributing to tma_fetch_bandwidth category",
+ "tma_fetch_latency_group": "Metrics contributing to tma_fetch_latency category",
+ "tma_fp_arith_group": "Metrics contributing to tma_fp_arith category",
+ "tma_fp_vector_group": "Metrics contributing to tma_fp_vector category",
+ "tma_frontend_bound_group": "Metrics contributing to tma_frontend_bound category",
+ "tma_heavy_operations_group": "Metrics contributing to tma_heavy_operations category",
+ "tma_issue2P": "Metrics related by the issue $issue2P",
+ "tma_issueBM": "Metrics related by the issue $issueBM",
+ "tma_issueBW": "Metrics related by the issue $issueBW",
+ "tma_issueFB": "Metrics related by the issue $issueFB",
+ "tma_issueL1": "Metrics related by the issue $issueL1",
+ "tma_issueLat": "Metrics related by the issue $issueLat",
+ "tma_issueMC": "Metrics related by the issue $issueMC",
+ "tma_issueMS": "Metrics related by the issue $issueMS",
+ "tma_issueMV": "Metrics related by the issue $issueMV",
+ "tma_issueRFO": "Metrics related by the issue $issueRFO",
+ "tma_issueSL": "Metrics related by the issue $issueSL",
+ "tma_issueSO": "Metrics related by the issue $issueSO",
+ "tma_issueSmSt": "Metrics related by the issue $issueSmSt",
+ "tma_issueSpSt": "Metrics related by the issue $issueSpSt",
+ "tma_issueSyncxn": "Metrics related by the issue $issueSyncxn",
+ "tma_issueTLB": "Metrics related by the issue $issueTLB",
+ "tma_l1_bound_group": "Metrics contributing to tma_l1_bound category",
+ "tma_l3_bound_group": "Metrics contributing to tma_l3_bound category",
+ "tma_light_operations_group": "Metrics contributing to tma_light_operations category",
+ "tma_load_op_utilization_group": "Metrics contributing to tma_load_op_utilization category",
+ "tma_mem_latency_group": "Metrics contributing to tma_mem_latency category",
+ "tma_memory_bound_group": "Metrics contributing to tma_memory_bound category",
+ "tma_microcode_sequencer_group": "Metrics contributing to tma_microcode_sequencer category",
+ "tma_ports_utilization_group": "Metrics contributing to tma_ports_utilization category",
+ "tma_ports_utilized_0_group": "Metrics contributing to tma_ports_utilized_0 category",
+ "tma_ports_utilized_3m_group": "Metrics contributing to tma_ports_utilized_3m category",
+ "tma_retiring_group": "Metrics contributing to tma_retiring category",
+ "tma_serializing_operation_group": "Metrics contributing to tma_serializing_operation category",
+ "tma_store_bound_group": "Metrics contributing to tma_store_bound category",
+ "tma_store_op_utilization_group": "Metrics contributing to tma_store_op_utilization category"
+}
diff --git a/tools/perf/pmu-events/arch/x86/icelake/cache.json b/tools/perf/pmu-events/arch/x86/icelake/cache.json
index a9174a0837f0..d26c4efe35f0 100644
--- a/tools/perf/pmu-events/arch/x86/icelake/cache.json
+++ b/tools/perf/pmu-events/arch/x86/icelake/cache.json
@@ -155,18 +155,18 @@
"UMask": "0x21"
},
{
- "BriefDescription": "All requests that miss L2 cache. This event is not supported on ICL and ICX products, only supported on RKL products.",
+ "BriefDescription": "This event is deprecated.",
+ "Deprecated": "1",
"EventCode": "0x24",
"EventName": "L2_RQSTS.MISS",
- "PublicDescription": "Counts all requests that miss L2 cache. This event is not supported on ICL and ICX products, only supported on RKL products.",
"SampleAfterValue": "200003",
"UMask": "0x3f"
},
{
- "BriefDescription": "All L2 requests. This event is not supported on ICL and ICX products, only supported on RKL products.",
+ "BriefDescription": "This event is deprecated.",
+ "Deprecated": "1",
"EventCode": "0x24",
"EventName": "L2_RQSTS.REFERENCES",
- "PublicDescription": "Counts all L2 requests. This event is not supported on ICL and ICX products, only supported on RKL products.",
"SampleAfterValue": "200003",
"UMask": "0xff"
},
@@ -339,6 +339,16 @@
"UMask": "0x8"
},
{
+ "BriefDescription": "Retired instructions with at least 1 uncacheable load or Bus Lock.",
+ "Data_LA": "1",
+ "EventCode": "0xd4",
+ "EventName": "MEM_LOAD_MISC_RETIRED.UC",
+ "PEBS": "1",
+ "PublicDescription": "Retired instructions with at least one load to uncacheable memory-type, or at least one cache-line split locked access (Bus Lock).",
+ "SampleAfterValue": "100007",
+ "UMask": "0x4"
+ },
+ {
"BriefDescription": "Number of completed demand load requests that missed the L1, but hit the FB(fill buffer), because a preceding miss to the same cacheline initiated the line to be brought into L1, but data is not yet ready in L1.",
"Data_LA": "1",
"EventCode": "0xd1",
@@ -834,6 +844,14 @@
"UMask": "0x4"
},
{
+ "BriefDescription": "Counts bus locks, accounts for cache line split locks and UC locks.",
+ "EventCode": "0xF4",
+ "EventName": "SQ_MISC.BUS_LOCK",
+ "PublicDescription": "Counts the more expensive bus lock needed to enforce cache coherency for certain memory accesses that need to be done atomically. Can be created by issuing an atomic instruction (via the LOCK prefix) which causes a cache line split or accesses uncacheable memory.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x10"
+ },
+ {
"BriefDescription": "Cycles the queue waiting for offcore responses is full.",
"EventCode": "0xf4",
"EventName": "SQ_MISC.SQ_FULL",
diff --git a/tools/perf/pmu-events/arch/x86/icelake/frontend.json b/tools/perf/pmu-events/arch/x86/icelake/frontend.json
index 3e3d2b002170..2b539a08d2bf 100644
--- a/tools/perf/pmu-events/arch/x86/icelake/frontend.json
+++ b/tools/perf/pmu-events/arch/x86/icelake/frontend.json
@@ -8,6 +8,14 @@
"UMask": "0x1"
},
{
+ "BriefDescription": "Stalls caused by changing prefix length of the instruction. [This event is alias to ILD_STALL.LCP]",
+ "EventCode": "0x87",
+ "EventName": "DECODE.LCP",
+ "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk. [This event is alias to ILD_STALL.LCP]",
+ "SampleAfterValue": "500009",
+ "UMask": "0x1"
+ },
+ {
"BriefDescription": "Decode Stream Buffer (DSB)-to-MITE transitions count.",
"CounterMask": "1",
"EdgeDetect": "1",
@@ -213,10 +221,10 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss.",
+ "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss. [This event is alias to ICACHE_DATA.STALLS]",
"EventCode": "0x80",
"EventName": "ICACHE_16B.IFDATA_STALL",
- "PublicDescription": "Counts cycles where a code line fetch is stalled due to an L1 instruction cache miss. The legacy decode pipeline works at a 16 Byte granularity.",
+ "PublicDescription": "Counts cycles where a code line fetch is stalled due to an L1 instruction cache miss. The legacy decode pipeline works at a 16 Byte granularity. [This event is alias to ICACHE_DATA.STALLS]",
"SampleAfterValue": "500009",
"UMask": "0x4"
},
@@ -237,10 +245,26 @@
"UMask": "0x2"
},
{
- "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss.",
+ "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss. [This event is alias to ICACHE_TAG.STALLS]",
"EventCode": "0x83",
"EventName": "ICACHE_64B.IFTAG_STALL",
- "PublicDescription": "Counts cycles where a code fetch is stalled due to L1 instruction cache tag miss.",
+ "PublicDescription": "Counts cycles where a code fetch is stalled due to L1 instruction cache tag miss. [This event is alias to ICACHE_TAG.STALLS]",
+ "SampleAfterValue": "200003",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss. [This event is alias to ICACHE_16B.IFDATA_STALL]",
+ "EventCode": "0x80",
+ "EventName": "ICACHE_DATA.STALLS",
+ "PublicDescription": "Counts cycles where a code line fetch is stalled due to an L1 instruction cache miss. The legacy decode pipeline works at a 16 Byte granularity. [This event is alias to ICACHE_16B.IFDATA_STALL]",
+ "SampleAfterValue": "500009",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss. [This event is alias to ICACHE_64B.IFTAG_STALL]",
+ "EventCode": "0x83",
+ "EventName": "ICACHE_TAG.STALLS",
+ "PublicDescription": "Counts cycles where a code fetch is stalled due to L1 instruction cache tag miss. [This event is alias to ICACHE_64B.IFTAG_STALL]",
"SampleAfterValue": "200003",
"UMask": "0x4"
},
diff --git a/tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json b/tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json
index 1a2154f28b7b..8fcc05c4e0a1 100644
--- a/tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json
@@ -64,7 +64,7 @@
},
{
"BriefDescription": "Uncore frequency per die [GHZ]",
- "MetricExpr": "tma_info_socket_clks / #num_dies / duration_time / 1e9",
+ "MetricExpr": "tma_info_system_socket_clks / #num_dies / duration_time / 1e9",
"MetricGroup": "SoC",
"MetricName": "UNCORE_FREQ"
},
@@ -85,7 +85,7 @@
},
{
"BriefDescription": "This metric estimates how often memory load accesses were aliased by preceding stores (in program order) with a 4K address offset",
- "MetricExpr": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS / tma_info_clks",
+ "MetricExpr": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_4k_aliasing",
"MetricThreshold": "tma_4k_aliasing > 0.2 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -94,7 +94,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution ports for ALU operations.",
- "MetricExpr": "(UOPS_DISPATCHED.PORT_0 + UOPS_DISPATCHED.PORT_1 + UOPS_DISPATCHED.PORT_5 + UOPS_DISPATCHED.PORT_6) / (4 * tma_info_core_clks)",
+ "MetricExpr": "(UOPS_DISPATCHED.PORT_0 + UOPS_DISPATCHED.PORT_1 + UOPS_DISPATCHED.PORT_5 + UOPS_DISPATCHED.PORT_6) / (4 * tma_info_core_core_clks)",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_alu_op_utilization",
"MetricThreshold": "tma_alu_op_utilization > 0.6",
@@ -102,7 +102,7 @@
},
{
"BriefDescription": "This metric estimates fraction of slots the CPU retired uops delivered by the Microcode_Sequencer as a result of Assists",
- "MetricExpr": "100 * ASSISTS.ANY / tma_info_slots",
+ "MetricExpr": "100 * ASSISTS.ANY / tma_info_thread_slots",
"MetricGroup": "TopdownL4;tma_L4_group;tma_microcode_sequencer_group",
"MetricName": "tma_assists",
"MetricThreshold": "tma_assists > 0.1 & (tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1)",
@@ -111,27 +111,29 @@
},
{
"BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
- "MetricExpr": "topdown\\-be\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 5 * cpu@INT_MISC.RECOVERY_CYCLES\\,cmask\\=1\\,edge@ / tma_info_slots",
- "MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
+ "DefaultMetricgroupName": "TopdownL1",
+ "MetricExpr": "topdown\\-be\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 5 * cpu@INT_MISC.RECOVERY_CYCLES\\,cmask\\=1\\,edge@ / tma_info_thread_slots",
+ "MetricGroup": "Default;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
"MetricThreshold": "tma_backend_bound > 0.2",
- "MetricgroupNoGroup": "TopdownL1",
+ "MetricgroupNoGroup": "TopdownL1;Default",
"PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. Sample with: TOPDOWN.BACKEND_BOUND_SLOTS",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations",
+ "DefaultMetricgroupName": "TopdownL1",
"MetricExpr": "max(1 - (tma_frontend_bound + tma_backend_bound + tma_retiring), 0)",
- "MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
+ "MetricGroup": "Default;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_bad_speculation",
"MetricThreshold": "tma_bad_speculation > 0.15",
- "MetricgroupNoGroup": "TopdownL1",
+ "MetricgroupNoGroup": "TopdownL1;Default",
"PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of slots where the CPU was retiring branch instructions.",
- "MetricExpr": "tma_light_operations * BR_INST_RETIRED.ALL_BRANCHES / (tma_retiring * tma_info_slots)",
+ "MetricExpr": "tma_light_operations * BR_INST_RETIRED.ALL_BRANCHES / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
"MetricName": "tma_branch_instructions",
"MetricThreshold": "tma_branch_instructions > 0.1 & tma_light_operations > 0.6",
@@ -144,12 +146,12 @@
"MetricName": "tma_branch_mispredicts",
"MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15",
"MetricgroupNoGroup": "TopdownL2",
- "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES. Related metrics: tma_info_branch_misprediction_cost, tma_info_mispredictions, tma_mispredicts_resteers",
+ "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES. Related metrics: tma_info_bad_spec_branch_misprediction_cost, tma_info_bottleneck_mispredictions, tma_mispredicts_resteers",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers",
- "MetricExpr": "INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_clks + tma_unknown_branches",
+ "MetricExpr": "INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_thread_clks + tma_unknown_branches",
"MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_branch_resteers",
"MetricThreshold": "tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -167,7 +169,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers as a result of Machine Clears",
- "MetricExpr": "(1 - BR_MISP_RETIRED.ALL_BRANCHES / (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT)) * INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_clks",
+ "MetricExpr": "(1 - BR_MISP_RETIRED.ALL_BRANCHES / (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT)) * INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_thread_clks",
"MetricGroup": "BadSpec;MachineClears;TopdownL4;tma_L4_group;tma_branch_resteers_group;tma_issueMC",
"MetricName": "tma_clears_resteers",
"MetricThreshold": "tma_clears_resteers > 0.05 & (tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15))",
@@ -177,7 +179,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to contested accesses",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(29 * tma_info_average_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM + 23.5 * tma_info_average_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_clks",
+ "MetricExpr": "(29 * tma_info_system_average_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM + 23.5 * tma_info_system_average_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
"MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
"MetricName": "tma_contested_accesses",
"MetricThreshold": "tma_contested_accesses > 0.05 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -197,7 +199,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to data-sharing accesses",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "23.5 * tma_info_average_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_clks",
+ "MetricExpr": "23.5 * tma_info_system_average_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
"MetricGroup": "Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
"MetricName": "tma_data_sharing",
"MetricThreshold": "tma_data_sharing > 0.05 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -206,16 +208,16 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where decoder-0 was the only active decoder",
- "MetricExpr": "(cpu@INST_DECODED.DECODERS\\,cmask\\=1@ - cpu@INST_DECODED.DECODERS\\,cmask\\=2@) / tma_info_core_clks / 2",
+ "MetricExpr": "(cpu@INST_DECODED.DECODERS\\,cmask\\=1@ - cpu@INST_DECODED.DECODERS\\,cmask\\=2@) / tma_info_core_core_clks / 2",
"MetricGroup": "DSBmiss;FetchBW;TopdownL4;tma_L4_group;tma_issueD0;tma_mite_group",
"MetricName": "tma_decoder0_alone",
- "MetricThreshold": "tma_decoder0_alone > 0.1 & (tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 5 > 0.35))",
+ "MetricThreshold": "tma_decoder0_alone > 0.1 & (tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 5 > 0.35))",
"PublicDescription": "This metric represents fraction of cycles where decoder-0 was the only active decoder. Related metrics: tma_few_uops_instructions",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of cycles where the Divider unit was active",
- "MetricExpr": "ARITH.DIVIDER_ACTIVE / tma_info_clks",
+ "MetricExpr": "ARITH.DIVIDER_ACTIVE / tma_info_thread_clks",
"MetricGroup": "TopdownL3;tma_L3_group;tma_core_bound_group",
"MetricName": "tma_divider",
"MetricThreshold": "tma_divider > 0.2 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
@@ -225,7 +227,7 @@
{
"BriefDescription": "This metric estimates how often the CPU was stalled on accesses to external memory (DRAM) by loads",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_clks - tma_l2_bound",
+ "MetricExpr": "CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks - tma_l2_bound",
"MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_dram_bound",
"MetricThreshold": "tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -234,43 +236,43 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline",
- "MetricExpr": "(IDQ.DSB_CYCLES_ANY - IDQ.DSB_CYCLES_OK) / tma_info_core_clks / 2",
+ "MetricExpr": "(IDQ.DSB_CYCLES_ANY - IDQ.DSB_CYCLES_OK) / tma_info_core_core_clks / 2",
"MetricGroup": "DSB;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_dsb",
- "MetricThreshold": "tma_dsb > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 5 > 0.35)",
+ "MetricThreshold": "tma_dsb > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 5 > 0.35)",
"PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline. For example; inefficient utilization of the DSB cache structure or bank conflict when reading from it; are categorized here.",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines",
- "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / tma_info_clks",
+ "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / tma_info_thread_clks",
"MetricGroup": "DSBmiss;FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB",
"MetricName": "tma_dsb_switches",
"MetricThreshold": "tma_dsb_switches > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
- "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines. The DSB (decoded i-cache) is a Uop Cache where the front-end directly delivers Uops (micro operations) avoiding heavy x86 decoding. The DSB pipeline has shorter latency and delivered higher bandwidth than the MITE (legacy instruction decode pipeline). Switching between the two pipelines can cause penalties hence this metric measures the exposed penalty. Sample with: FRONTEND_RETIRED.DSB_MISS_PS. Related metrics: tma_fetch_bandwidth, tma_info_dsb_coverage, tma_info_dsb_misses, tma_info_iptb, tma_lcp",
+ "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines. The DSB (decoded i-cache) is a Uop Cache where the front-end directly delivers Uops (micro operations) avoiding heavy x86 decoding. The DSB pipeline has shorter latency and delivered higher bandwidth than the MITE (legacy instruction decode pipeline). Switching between the two pipelines can cause penalties hence this metric measures the exposed penalty. Sample with: FRONTEND_RETIRED.DSB_MISS_PS. Related metrics: tma_fetch_bandwidth, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses",
- "MetricExpr": "min(7 * cpu@DTLB_LOAD_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_LOAD_MISSES.WALK_ACTIVE, max(CYCLE_ACTIVITY.CYCLES_MEM_ANY - CYCLE_ACTIVITY.CYCLES_L1D_MISS, 0)) / tma_info_clks",
+ "MetricExpr": "min(7 * cpu@DTLB_LOAD_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_LOAD_MISSES.WALK_ACTIVE, max(CYCLE_ACTIVITY.CYCLES_MEM_ANY - CYCLE_ACTIVITY.CYCLES_L1D_MISS, 0)) / tma_info_thread_clks",
"MetricGroup": "MemoryTLB;TopdownL4;tma_L4_group;tma_issueTLB;tma_l1_bound_group",
"MetricName": "tma_dtlb_load",
"MetricThreshold": "tma_dtlb_load > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses. TLBs (Translation Look-aside Buffers) are processor caches for recently used entries out of the Page Tables that are used to map virtual- to physical-addresses by the operating system. This metric approximates the potential delay of demand loads missing the first-level data TLB (assuming worst case scenario with back to back misses to different pages). This includes hitting in the second-level TLB (STLB) as well as performing a hardware page walk on an STLB miss. Sample with: MEM_INST_RETIRED.STLB_MISS_LOADS_PS. Related metrics: tma_dtlb_store, tma_info_memory_data_tlbs",
+ "PublicDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses. TLBs (Translation Look-aside Buffers) are processor caches for recently used entries out of the Page Tables that are used to map virtual- to physical-addresses by the operating system. This metric approximates the potential delay of demand loads missing the first-level data TLB (assuming worst case scenario with back to back misses to different pages). This includes hitting in the second-level TLB (STLB) as well as performing a hardware page walk on an STLB miss. Sample with: MEM_INST_RETIRED.STLB_MISS_LOADS_PS. Related metrics: tma_dtlb_store, tma_info_bottleneck_memory_data_tlbs",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses",
- "MetricExpr": "(7 * cpu@DTLB_STORE_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_STORE_MISSES.WALK_ACTIVE) / tma_info_core_clks",
+ "MetricExpr": "(7 * cpu@DTLB_STORE_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_STORE_MISSES.WALK_ACTIVE) / tma_info_core_core_clks",
"MetricGroup": "MemoryTLB;TopdownL4;tma_L4_group;tma_issueTLB;tma_store_bound_group",
"MetricName": "tma_dtlb_store",
"MetricThreshold": "tma_dtlb_store > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses. As with ordinary data caching; focus on improving data locality and reducing working-set size to reduce DTLB overhead. Additionally; consider using profile-guided optimization (PGO) to collocate frequently-used data on the same page. Try using larger page sizes for large amounts of frequently-used data. Sample with: MEM_INST_RETIRED.STLB_MISS_STORES_PS. Related metrics: tma_dtlb_load, tma_info_memory_data_tlbs",
+ "PublicDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses. As with ordinary data caching; focus on improving data locality and reducing working-set size to reduce DTLB overhead. Additionally; consider using profile-guided optimization (PGO) to collocate frequently-used data on the same page. Try using larger page sizes for large amounts of frequently-used data. Sample with: MEM_INST_RETIRED.STLB_MISS_STORES_PS. Related metrics: tma_dtlb_load, tma_info_bottleneck_memory_data_tlbs",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric roughly estimates how often CPU was handling synchronizations due to False Sharing",
- "MetricExpr": "32.5 * tma_info_average_frequency * OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM / tma_info_clks",
+ "MetricExpr": "32.5 * tma_info_system_average_frequency * OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM / tma_info_thread_clks",
"MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_store_bound_group",
"MetricName": "tma_false_sharing",
"MetricThreshold": "tma_false_sharing > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -279,11 +281,11 @@
},
{
"BriefDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed",
- "MetricExpr": "L1D_PEND_MISS.FB_FULL / tma_info_clks",
+ "MetricExpr": "L1D_PEND_MISS.FB_FULL / tma_info_thread_clks",
"MetricGroup": "MemoryBW;TopdownL4;tma_L4_group;tma_issueBW;tma_issueSL;tma_issueSmSt;tma_l1_bound_group",
"MetricName": "tma_fb_full",
"MetricThreshold": "tma_fb_full > 0.3",
- "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_info_dram_bw_use, tma_info_memory_bandwidth, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
+ "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_info_bottleneck_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
"ScaleUnit": "100%"
},
{
@@ -291,14 +293,14 @@
"MetricExpr": "max(0, tma_frontend_bound - tma_fetch_latency)",
"MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB",
"MetricName": "tma_fetch_bandwidth",
- "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 5 > 0.35",
+ "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 5 > 0.35",
"MetricgroupNoGroup": "TopdownL2",
- "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Sample with: FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_2_PS. Related metrics: tma_dsb_switches, tma_info_dsb_coverage, tma_info_dsb_misses, tma_info_iptb, tma_lcp",
+ "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Sample with: FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_2_PS. Related metrics: tma_dsb_switches, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues",
- "MetricExpr": "(5 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE - INT_MISC.UOP_DROPPING) / tma_info_slots",
+ "MetricExpr": "(5 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE - INT_MISC.UOP_DROPPING) / tma_info_thread_slots",
"MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group",
"MetricName": "tma_fetch_latency",
"MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15",
@@ -317,6 +319,7 @@
},
{
"BriefDescription": "This metric represents overall arithmetic floating-point (FP) operations fraction the CPU has executed (retired)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "tma_x87_use + tma_fp_scalar + tma_fp_vector",
"MetricGroup": "HPC;TopdownL3;tma_L3_group;tma_light_operations_group",
"MetricName": "tma_fp_arith",
@@ -326,7 +329,7 @@
},
{
"BriefDescription": "This metric approximates arithmetic floating-point (FP) scalar uops fraction the CPU has retired",
- "MetricExpr": "cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ / (tma_retiring * tma_info_slots)",
+ "MetricExpr": "cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Compute;Flops;TopdownL4;tma_L4_group;tma_fp_arith_group;tma_issue2P",
"MetricName": "tma_fp_scalar",
"MetricThreshold": "tma_fp_scalar > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6)",
@@ -335,7 +338,7 @@
},
{
"BriefDescription": "This metric approximates arithmetic floating-point (FP) vector uops fraction the CPU has retired aggregated across all vector widths",
- "MetricExpr": "cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0xfc@ / (tma_retiring * tma_info_slots)",
+ "MetricExpr": "cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0xfc@ / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Compute;Flops;TopdownL4;tma_L4_group;tma_fp_arith_group;tma_issue2P",
"MetricName": "tma_fp_vector",
"MetricThreshold": "tma_fp_vector > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6)",
@@ -344,7 +347,7 @@
},
{
"BriefDescription": "This metric approximates arithmetic FP vector uops fraction the CPU has retired for 128-bit wide vectors",
- "MetricExpr": "(FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE) / (tma_retiring * tma_info_slots)",
+ "MetricExpr": "(FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE) / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Compute;Flops;TopdownL5;tma_L5_group;tma_fp_vector_group;tma_issue2P",
"MetricName": "tma_fp_vector_128b",
"MetricThreshold": "tma_fp_vector_128b > 0.1 & (tma_fp_vector > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6))",
@@ -353,7 +356,7 @@
},
{
"BriefDescription": "This metric approximates arithmetic FP vector uops fraction the CPU has retired for 256-bit wide vectors",
- "MetricExpr": "(FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE) / (tma_retiring * tma_info_slots)",
+ "MetricExpr": "(FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE) / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Compute;Flops;TopdownL5;tma_L5_group;tma_fp_vector_group;tma_issue2P",
"MetricName": "tma_fp_vector_256b",
"MetricThreshold": "tma_fp_vector_256b > 0.1 & (tma_fp_vector > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6))",
@@ -362,7 +365,7 @@
},
{
"BriefDescription": "This metric approximates arithmetic FP vector uops fraction the CPU has retired for 512-bit wide vectors",
- "MetricExpr": "(FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / (tma_retiring * tma_info_slots)",
+ "MetricExpr": "(FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Compute;Flops;TopdownL5;tma_L5_group;tma_fp_vector_group;tma_issue2P",
"MetricName": "tma_fp_vector_512b",
"MetricThreshold": "tma_fp_vector_512b > 0.1 & (tma_fp_vector > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6))",
@@ -371,11 +374,12 @@
},
{
"BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend",
- "MetricExpr": "topdown\\-fe\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) - INT_MISC.UOP_DROPPING / tma_info_slots",
- "MetricGroup": "PGO;TmaL1;TopdownL1;tma_L1_group",
+ "DefaultMetricgroupName": "TopdownL1",
+ "MetricExpr": "topdown\\-fe\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) - INT_MISC.UOP_DROPPING / tma_info_thread_slots",
+ "MetricGroup": "Default;PGO;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_frontend_bound",
"MetricThreshold": "tma_frontend_bound > 0.15",
- "MetricgroupNoGroup": "TopdownL1",
+ "MetricgroupNoGroup": "TopdownL1;Default",
"PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Pipeline_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. Sample with: FRONTEND_RETIRED.LATENCY_GE_4_PS",
"ScaleUnit": "100%"
},
@@ -391,7 +395,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to instruction cache misses",
- "MetricExpr": "ICACHE_16B.IFDATA_STALL / tma_info_clks",
+ "MetricExpr": "ICACHE_16B.IFDATA_STALL / tma_info_thread_clks",
"MetricGroup": "BigFoot;FetchLat;IcMiss;TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_icache_misses",
"MetricThreshold": "tma_icache_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -399,668 +403,676 @@
"ScaleUnit": "100%"
},
{
- "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
- "MetricExpr": "tma_info_turbo_utilization * TSC / 1e9 / duration_time",
- "MetricGroup": "Power;Summary",
- "MetricName": "tma_info_average_frequency"
+ "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_thread_slots / BR_MISP_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;BrMispredicts;tma_issueBM",
+ "MetricName": "tma_info_bad_spec_branch_misprediction_cost",
+ "PublicDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear). Related metrics: tma_branch_mispredicts, tma_info_bottleneck_mispredictions, tma_mispredicts_resteers"
+ },
+ {
+ "BriefDescription": "Instructions per retired mispredicts for conditional non-taken branches (lower number means higher occurrence rate).",
+ "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.COND_NTAKEN",
+ "MetricGroup": "Bad;BrMispredicts",
+ "MetricName": "tma_info_bad_spec_ipmisp_cond_ntaken",
+ "MetricThreshold": "tma_info_bad_spec_ipmisp_cond_ntaken < 200"
+ },
+ {
+ "BriefDescription": "Instructions per retired mispredicts for conditional taken branches (lower number means higher occurrence rate).",
+ "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.COND_TAKEN",
+ "MetricGroup": "Bad;BrMispredicts",
+ "MetricName": "tma_info_bad_spec_ipmisp_cond_taken",
+ "MetricThreshold": "tma_info_bad_spec_ipmisp_cond_taken < 200"
+ },
+ {
+ "BriefDescription": "Instructions per retired mispredicts for indirect CALL or JMP branches (lower number means higher occurrence rate).",
+ "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.INDIRECT",
+ "MetricGroup": "Bad;BrMispredicts",
+ "MetricName": "tma_info_bad_spec_ipmisp_indirect",
+ "MetricThreshold": "tma_info_bad_spec_ipmisp_indirect < 1e3"
+ },
+ {
+ "BriefDescription": "Instructions per retired mispredicts for return branches (lower number means higher occurrence rate).",
+ "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.RET",
+ "MetricGroup": "Bad;BrMispredicts",
+ "MetricName": "tma_info_bad_spec_ipmisp_ret",
+ "MetricThreshold": "tma_info_bad_spec_ipmisp_ret < 500"
+ },
+ {
+ "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear) (lower number means higher occurrence rate)",
+ "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;BadSpec;BrMispredicts",
+ "MetricName": "tma_info_bad_spec_ipmispredict",
+ "MetricThreshold": "tma_info_bad_spec_ipmispredict < 200"
+ },
+ {
+ "BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "(100 * (1 - tma_core_bound / tma_ports_utilization if tma_core_bound < tma_ports_utilization else 1) if tma_info_system_smt_2t_utilization > 0.5 else 0)",
+ "MetricGroup": "Cor;SMT",
+ "MetricName": "tma_info_botlnk_l0_core_bound_likely",
+ "MetricThreshold": "tma_info_botlnk_l0_core_bound_likely > 0.5"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_fetch_latency * tma_dsb_switches / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_mite / (tma_dsb + tma_lsd + tma_mite))",
+ "MetricGroup": "DSBmiss;Fed;tma_issueFB",
+ "MetricName": "tma_info_botlnk_l2_dsb_misses",
+ "MetricThreshold": "tma_info_botlnk_l2_dsb_misses > 10",
+ "PublicDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck",
+ "MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
+ "MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL",
+ "MetricName": "tma_info_botlnk_l2_ic_misses",
+ "MetricThreshold": "tma_info_botlnk_l2_ic_misses > 5",
+ "PublicDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck. Related metrics: "
},
{
"BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)",
"MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * tma_fetch_latency * (tma_itlb_misses + tma_icache_misses + tma_unknown_branches) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)",
"MetricGroup": "BigFoot;Fed;Frontend;IcMiss;MemoryTLB;tma_issueBC",
- "MetricName": "tma_info_big_code",
- "MetricThreshold": "tma_info_big_code > 20",
- "PublicDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses). Related metrics: tma_info_branching_overhead"
+ "MetricName": "tma_info_bottleneck_big_code",
+ "MetricThreshold": "tma_info_bottleneck_big_code > 20",
+ "PublicDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses). Related metrics: tma_info_bottleneck_branching_overhead"
},
{
- "BriefDescription": "Branch instructions per taken branch.",
- "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
- "MetricGroup": "Branches;Fed;PGO",
- "MetricName": "tma_info_bptkbranch"
+ "BriefDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls)",
+ "MetricExpr": "100 * ((BR_INST_RETIRED.COND + 3 * BR_INST_RETIRED.NEAR_CALL + (BR_INST_RETIRED.NEAR_TAKEN - BR_INST_RETIRED.COND_TAKEN - 2 * BR_INST_RETIRED.NEAR_CALL)) / tma_info_thread_slots)",
+ "MetricGroup": "Ret;tma_issueBC",
+ "MetricName": "tma_info_bottleneck_branching_overhead",
+ "MetricThreshold": "tma_info_bottleneck_branching_overhead > 10",
+ "PublicDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls). Related metrics: tma_info_bottleneck_big_code"
},
{
- "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
- "MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_slots / BR_MISP_RETIRED.ALL_BRANCHES",
- "MetricGroup": "Bad;BrMispredicts;tma_issueBM",
- "MetricName": "tma_info_branch_misprediction_cost",
- "PublicDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear). Related metrics: tma_branch_mispredicts, tma_info_mispredictions, tma_mispredicts_resteers"
+ "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_frontend_bound - tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_info_bottleneck_big_code",
+ "MetricGroup": "Fed;FetchBW;Frontend",
+ "MetricName": "tma_info_bottleneck_instruction_fetch_bw",
+ "MetricThreshold": "tma_info_bottleneck_instruction_fetch_bw > 20"
},
{
- "BriefDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls)",
- "MetricExpr": "100 * ((BR_INST_RETIRED.COND + 3 * BR_INST_RETIRED.NEAR_CALL + (BR_INST_RETIRED.NEAR_TAKEN - BR_INST_RETIRED.COND_TAKEN - 2 * BR_INST_RETIRED.NEAR_CALL)) / tma_info_slots)",
- "MetricGroup": "Ret;tma_issueBC",
- "MetricName": "tma_info_branching_overhead",
- "MetricThreshold": "tma_info_branching_overhead > 10",
- "PublicDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls). Related metrics: tma_info_big_code"
+ "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
+ "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_fb_full / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))",
+ "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
+ "MetricName": "tma_info_bottleneck_memory_bandwidth",
+ "MetricThreshold": "tma_info_bottleneck_memory_bandwidth > 20",
+ "PublicDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full"
},
{
- "BriefDescription": "Fraction of branches that are CALL or RET",
- "MetricExpr": "(BR_INST_RETIRED.NEAR_CALL + BR_INST_RETIRED.NEAR_RETURN) / BR_INST_RETIRED.ALL_BRANCHES",
- "MetricGroup": "Bad;Branches",
- "MetricName": "tma_info_callret"
+ "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_load / max(tma_l1_bound, tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
+ "MetricGroup": "Mem;MemoryTLB;Offcore;tma_issueTLB",
+ "MetricName": "tma_info_bottleneck_memory_data_tlbs",
+ "MetricThreshold": "tma_info_bottleneck_memory_data_tlbs > 20",
+ "PublicDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs). Related metrics: tma_dtlb_load, tma_dtlb_store"
},
{
- "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
- "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
- "MetricGroup": "Pipeline",
- "MetricName": "tma_info_clks"
+ "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound))",
+ "MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat",
+ "MetricName": "tma_info_bottleneck_memory_latency",
+ "MetricThreshold": "tma_info_bottleneck_memory_latency > 20",
+ "PublicDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches). Related metrics: tma_l3_hit_latency, tma_mem_latency"
},
{
- "BriefDescription": "STLB (2nd level TLB) code speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
- "MetricExpr": "1e3 * ITLB_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
- "MetricGroup": "Fed;MemoryTLB",
- "MetricName": "tma_info_code_stlb_mpki"
+ "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
+ "MetricGroup": "Bad;BadSpec;BrMispredicts;tma_issueBM",
+ "MetricName": "tma_info_bottleneck_mispredictions",
+ "MetricThreshold": "tma_info_bottleneck_mispredictions > 20",
+ "PublicDescription": "Total pipeline cost of Branch Misprediction related bottlenecks. Related metrics: tma_branch_mispredicts, tma_info_bad_spec_branch_misprediction_cost, tma_mispredicts_resteers"
+ },
+ {
+ "BriefDescription": "Fraction of branches that are CALL or RET",
+ "MetricExpr": "(BR_INST_RETIRED.NEAR_CALL + BR_INST_RETIRED.NEAR_RETURN) / BR_INST_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;Branches",
+ "MetricName": "tma_info_branches_callret"
},
{
"BriefDescription": "Fraction of branches that are non-taken conditionals",
"MetricExpr": "BR_INST_RETIRED.COND_NTAKEN / BR_INST_RETIRED.ALL_BRANCHES",
"MetricGroup": "Bad;Branches;CodeGen;PGO",
- "MetricName": "tma_info_cond_nt"
+ "MetricName": "tma_info_branches_cond_nt"
},
{
"BriefDescription": "Fraction of branches that are taken conditionals",
"MetricExpr": "BR_INST_RETIRED.COND_TAKEN / BR_INST_RETIRED.ALL_BRANCHES",
"MetricGroup": "Bad;Branches;CodeGen;PGO",
- "MetricName": "tma_info_cond_tk"
+ "MetricName": "tma_info_branches_cond_tk"
},
{
- "BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts",
- "MetricExpr": "(100 * (1 - tma_core_bound / tma_ports_utilization if tma_core_bound < tma_ports_utilization else 1) if tma_info_smt_2t_utilization > 0.5 else 0)",
- "MetricGroup": "Cor;SMT",
- "MetricName": "tma_info_core_bound_likely",
- "MetricThreshold": "tma_info_core_bound_likely > 0.5"
+ "BriefDescription": "Fraction of branches that are unconditional (direct or indirect) jumps",
+ "MetricExpr": "(BR_INST_RETIRED.NEAR_TAKEN - BR_INST_RETIRED.COND_TAKEN - 2 * BR_INST_RETIRED.NEAR_CALL) / BR_INST_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;Branches",
+ "MetricName": "tma_info_branches_jump"
+ },
+ {
+ "BriefDescription": "Fraction of branches of other types (not individually covered by other metrics in Info.Branches group)",
+ "MetricExpr": "1 - (tma_info_branches_cond_nt + tma_info_branches_cond_tk + tma_info_branches_callret + tma_info_branches_jump)",
+ "MetricGroup": "Bad;Branches",
+ "MetricName": "tma_info_branches_other_branches"
},
{
"BriefDescription": "Core actual clocks when any Logical Processor is active on the Physical Core",
"MetricExpr": "CPU_CLK_UNHALTED.DISTRIBUTED",
"MetricGroup": "SMT",
- "MetricName": "tma_info_core_clks"
+ "MetricName": "tma_info_core_core_clks"
},
{
"BriefDescription": "Instructions Per Cycle across hyper-threads (per physical core)",
- "MetricExpr": "INST_RETIRED.ANY / tma_info_core_clks",
+ "MetricExpr": "INST_RETIRED.ANY / tma_info_core_core_clks",
"MetricGroup": "Ret;SMT;TmaL1;tma_L1_group",
- "MetricName": "tma_info_coreipc"
- },
- {
- "BriefDescription": "Cycles Per Instruction (per Logical Processor)",
- "MetricExpr": "1 / tma_info_ipc",
- "MetricGroup": "Mem;Pipeline",
- "MetricName": "tma_info_cpi"
+ "MetricName": "tma_info_core_coreipc"
},
{
- "BriefDescription": "Average CPU Utilization",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
- "MetricGroup": "HPC;Summary",
- "MetricName": "tma_info_cpu_utilization"
+ "BriefDescription": "Floating Point Operations Per Cycle",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * cpu@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE\\,umask\\=0x18@ + 8 * cpu@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE\\,umask\\=0x60@ + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / tma_info_core_core_clks",
+ "MetricGroup": "Flops;Ret",
+ "MetricName": "tma_info_core_flopc"
},
{
- "BriefDescription": "Average Parallel L2 cache miss data reads",
- "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
- "MetricGroup": "Memory_BW;Offcore",
- "MetricName": "tma_info_data_l2_mlp"
+ "BriefDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0xfc@) / (2 * tma_info_core_core_clks)",
+ "MetricGroup": "Cor;Flops;HPC",
+ "MetricName": "tma_info_core_fp_arith_utilization",
+ "PublicDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width). Values > 1 are possible due to ([BDW+] Fused-Multiply Add (FMA) counting - common; [ADL+] use all of ADD/MUL/FMA in Scalar or 128/256-bit vectors - less common)."
},
{
- "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
- "MetricExpr": "64 * (UNC_ARB_TRK_REQUESTS.ALL + UNC_ARB_COH_TRK_REQUESTS.ALL) / 1e6 / duration_time / 1e3",
- "MetricGroup": "HPC;Mem;MemoryBW;SoC;tma_issueBW",
- "MetricName": "tma_info_dram_bw_use",
- "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_fb_full, tma_info_memory_bandwidth, tma_mem_bandwidth, tma_sq_full"
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / (UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)",
+ "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
+ "MetricName": "tma_info_core_ilp"
},
{
"BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
"MetricExpr": "IDQ.DSB_UOPS / UOPS_ISSUED.ANY",
"MetricGroup": "DSB;Fed;FetchBW;tma_issueFB",
- "MetricName": "tma_info_dsb_coverage",
- "MetricThreshold": "tma_info_dsb_coverage < 0.7 & tma_info_ipc / 5 > 0.35",
- "PublicDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache). Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_dsb_misses, tma_info_iptb, tma_lcp"
- },
- {
- "BriefDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck",
- "MetricExpr": "100 * (tma_fetch_latency * tma_dsb_switches / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_mite / (tma_dsb + tma_lsd + tma_mite))",
- "MetricGroup": "DSBmiss;Fed;tma_issueFB",
- "MetricName": "tma_info_dsb_misses",
- "MetricThreshold": "tma_info_dsb_misses > 10",
- "PublicDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_dsb_coverage, tma_info_iptb, tma_lcp"
+ "MetricName": "tma_info_frontend_dsb_coverage",
+ "MetricThreshold": "tma_info_frontend_dsb_coverage < 0.7 & tma_info_thread_ipc / 5 > 0.35",
+ "PublicDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache). Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_botlnk_l2_dsb_misses, tma_info_inst_mix_iptb, tma_lcp"
},
{
"BriefDescription": "Average number of cycles of a switch from the DSB fetch-unit to MITE fetch unit - see DSB_Switches tree node for details.",
"MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / cpu@DSB2MITE_SWITCHES.PENALTY_CYCLES\\,cmask\\=1\\,edge@",
"MetricGroup": "DSBmiss",
- "MetricName": "tma_info_dsb_switch_cost"
- },
- {
- "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-thread",
- "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
- "MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
- "MetricName": "tma_info_execute"
- },
- {
- "BriefDescription": "The ratio of Executed- by Issued-Uops",
- "MetricExpr": "UOPS_EXECUTED.THREAD / UOPS_ISSUED.ANY",
- "MetricGroup": "Cor;Pipeline",
- "MetricName": "tma_info_execute_per_issue",
- "PublicDescription": "The ratio of Executed- by Issued-Uops. Ratio > 1 suggests high rate of uop micro-fusions. Ratio < 1 suggest high rate of \"execute\" at rename stage."
- },
- {
- "BriefDescription": "Fill Buffer (FB) hits per kilo instructions for retired demand loads (L1D misses that merge into ongoing miss-handling entries)",
- "MetricExpr": "1e3 * MEM_LOAD_RETIRED.FB_HIT / INST_RETIRED.ANY",
- "MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_fb_hpki"
+ "MetricName": "tma_info_frontend_dsb_switch_cost"
},
{
"BriefDescription": "Average number of Uops issued by front-end when it issued something",
"MetricExpr": "UOPS_ISSUED.ANY / cpu@UOPS_ISSUED.ANY\\,cmask\\=1@",
"MetricGroup": "Fed;FetchBW",
- "MetricName": "tma_info_fetch_upc"
+ "MetricName": "tma_info_frontend_fetch_upc"
},
{
- "BriefDescription": "Floating Point Operations Per Cycle",
- "MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * cpu@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE\\,umask\\=0x18@ + 8 * cpu@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE\\,umask\\=0x60@ + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / tma_info_core_clks",
- "MetricGroup": "Flops;Ret",
- "MetricName": "tma_info_flopc"
+ "BriefDescription": "Average Latency for L1 instruction cache misses",
+ "MetricExpr": "ICACHE_16B.IFDATA_STALL / cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=1\\,edge@",
+ "MetricGroup": "Fed;FetchLat;IcMiss",
+ "MetricName": "tma_info_frontend_icache_miss_latency"
},
{
- "BriefDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width)",
- "MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0xfc@) / (2 * tma_info_core_clks)",
- "MetricGroup": "Cor;Flops;HPC",
- "MetricName": "tma_info_fp_arith_utilization",
- "PublicDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width). Values > 1 are possible due to ([BDW+] Fused-Multiply Add (FMA) counting - common; [ADL+] use all of ADD/MUL/FMA in Scalar or 128/256-bit vectors - less common)."
+ "BriefDescription": "Instructions per non-speculative DSB miss (lower number means higher occurrence rate)",
+ "MetricExpr": "INST_RETIRED.ANY / FRONTEND_RETIRED.ANY_DSB_MISS",
+ "MetricGroup": "DSBmiss;Fed",
+ "MetricName": "tma_info_frontend_ipdsb_miss_ret",
+ "MetricThreshold": "tma_info_frontend_ipdsb_miss_ret < 50"
},
{
- "BriefDescription": "Giga Floating Point Operations Per Second",
- "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * cpu@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE\\,umask\\=0x18@ + 8 * cpu@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE\\,umask\\=0x60@ + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / 1e9 / duration_time",
- "MetricGroup": "Cor;Flops;HPC",
- "MetricName": "tma_info_gflops",
- "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width and AMX engine."
+ "BriefDescription": "Instructions per speculative Unknown Branch Misprediction (BAClear) (lower number means higher occurrence rate)",
+ "MetricExpr": "tma_info_inst_mix_instructions / BACLEARS.ANY",
+ "MetricGroup": "Fed",
+ "MetricName": "tma_info_frontend_ipunknown_branch"
},
{
- "BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck",
- "MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
- "MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL",
- "MetricName": "tma_info_ic_misses",
- "MetricThreshold": "tma_info_ic_misses > 5",
- "PublicDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck. Related metrics: "
+ "BriefDescription": "L2 cache true code cacheline misses per kilo instruction",
+ "MetricExpr": "1e3 * FRONTEND_RETIRED.L2_MISS / INST_RETIRED.ANY",
+ "MetricGroup": "IcMiss",
+ "MetricName": "tma_info_frontend_l2mpki_code"
},
{
- "BriefDescription": "Average Latency for L1 instruction cache misses",
- "MetricExpr": "ICACHE_16B.IFDATA_STALL / cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=1\\,edge@",
- "MetricGroup": "Fed;FetchLat;IcMiss",
- "MetricName": "tma_info_icache_miss_latency"
+ "BriefDescription": "L2 cache speculative code cacheline misses per kilo instruction",
+ "MetricExpr": "1e3 * L2_RQSTS.CODE_RD_MISS / INST_RETIRED.ANY",
+ "MetricGroup": "IcMiss",
+ "MetricName": "tma_info_frontend_l2mpki_code_all"
},
{
- "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
- "MetricExpr": "UOPS_EXECUTED.THREAD / (UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)",
- "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
- "MetricName": "tma_info_ilp"
+ "BriefDescription": "Fraction of Uops delivered by the LSD (Loop Stream Detector; aka Loop Cache)",
+ "MetricExpr": "LSD.UOPS / UOPS_ISSUED.ANY",
+ "MetricGroup": "Fed;LSD",
+ "MetricName": "tma_info_frontend_lsd_coverage"
},
{
- "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks",
- "MetricExpr": "100 * (tma_frontend_bound - tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_info_big_code",
- "MetricGroup": "Fed;FetchBW;Frontend",
- "MetricName": "tma_info_instruction_fetch_bw",
- "MetricThreshold": "tma_info_instruction_fetch_bw > 20"
+ "BriefDescription": "Branch instructions per taken branch.",
+ "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
+ "MetricGroup": "Branches;Fed;PGO",
+ "MetricName": "tma_info_inst_mix_bptkbranch"
},
{
"BriefDescription": "Total number of retired Instructions",
"MetricExpr": "INST_RETIRED.ANY",
"MetricGroup": "Summary;TmaL1;tma_L1_group",
- "MetricName": "tma_info_instructions",
+ "MetricName": "tma_info_inst_mix_instructions",
"PublicDescription": "Total number of retired Instructions. Sample with: INST_RETIRED.PREC_DIST"
},
{
"BriefDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / (cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0xfc@)",
"MetricGroup": "Flops;InsType",
- "MetricName": "tma_info_iparith",
- "MetricThreshold": "tma_info_iparith < 10",
+ "MetricName": "tma_info_inst_mix_iparith",
+ "MetricThreshold": "tma_info_inst_mix_iparith < 10",
"PublicDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate). May undercount due to FMA double counting. Approximated prior to BDW."
},
{
"BriefDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE)",
"MetricGroup": "Flops;FpVector;InsType",
- "MetricName": "tma_info_iparith_avx128",
- "MetricThreshold": "tma_info_iparith_avx128 < 10",
+ "MetricName": "tma_info_inst_mix_iparith_avx128",
+ "MetricThreshold": "tma_info_inst_mix_iparith_avx128 < 10",
"PublicDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
},
{
"BriefDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE)",
"MetricGroup": "Flops;FpVector;InsType",
- "MetricName": "tma_info_iparith_avx256",
- "MetricThreshold": "tma_info_iparith_avx256 < 10",
+ "MetricName": "tma_info_inst_mix_iparith_avx256",
+ "MetricThreshold": "tma_info_inst_mix_iparith_avx256 < 10",
"PublicDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
},
{
"BriefDescription": "Instructions per FP Arithmetic AVX 512-bit instruction (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE)",
"MetricGroup": "Flops;FpVector;InsType",
- "MetricName": "tma_info_iparith_avx512",
- "MetricThreshold": "tma_info_iparith_avx512 < 10",
+ "MetricName": "tma_info_inst_mix_iparith_avx512",
+ "MetricThreshold": "tma_info_inst_mix_iparith_avx512 < 10",
"PublicDescription": "Instructions per FP Arithmetic AVX 512-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
},
{
"BriefDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
"MetricGroup": "Flops;FpScalar;InsType",
- "MetricName": "tma_info_iparith_scalar_dp",
- "MetricThreshold": "tma_info_iparith_scalar_dp < 10",
+ "MetricName": "tma_info_inst_mix_iparith_scalar_dp",
+ "MetricThreshold": "tma_info_inst_mix_iparith_scalar_dp < 10",
"PublicDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
},
{
"BriefDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
"MetricGroup": "Flops;FpScalar;InsType",
- "MetricName": "tma_info_iparith_scalar_sp",
- "MetricThreshold": "tma_info_iparith_scalar_sp < 10",
+ "MetricName": "tma_info_inst_mix_iparith_scalar_sp",
+ "MetricThreshold": "tma_info_inst_mix_iparith_scalar_sp < 10",
"PublicDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
},
{
"BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
"MetricGroup": "Branches;Fed;InsType",
- "MetricName": "tma_info_ipbranch",
- "MetricThreshold": "tma_info_ipbranch < 8"
- },
- {
- "BriefDescription": "Instructions Per Cycle (per Logical Processor)",
- "MetricExpr": "INST_RETIRED.ANY / tma_info_clks",
- "MetricGroup": "Ret;Summary",
- "MetricName": "tma_info_ipc"
+ "MetricName": "tma_info_inst_mix_ipbranch",
+ "MetricThreshold": "tma_info_inst_mix_ipbranch < 8"
},
{
"BriefDescription": "Instructions per (near) call (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL",
"MetricGroup": "Branches;Fed;PGO",
- "MetricName": "tma_info_ipcall",
- "MetricThreshold": "tma_info_ipcall < 200"
- },
- {
- "BriefDescription": "Instructions per non-speculative DSB miss (lower number means higher occurrence rate)",
- "MetricExpr": "INST_RETIRED.ANY / FRONTEND_RETIRED.ANY_DSB_MISS",
- "MetricGroup": "DSBmiss;Fed",
- "MetricName": "tma_info_ipdsb_miss_ret",
- "MetricThreshold": "tma_info_ipdsb_miss_ret < 50"
- },
- {
- "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
- "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u",
- "MetricGroup": "Branches;OS",
- "MetricName": "tma_info_ipfarbranch",
- "MetricThreshold": "tma_info_ipfarbranch < 1e6"
+ "MetricName": "tma_info_inst_mix_ipcall",
+ "MetricThreshold": "tma_info_inst_mix_ipcall < 200"
},
{
"BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / (cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * cpu@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE\\,umask\\=0x18@ + 8 * cpu@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE\\,umask\\=0x60@ + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE)",
"MetricGroup": "Flops;InsType",
- "MetricName": "tma_info_ipflop",
- "MetricThreshold": "tma_info_ipflop < 10"
+ "MetricName": "tma_info_inst_mix_ipflop",
+ "MetricThreshold": "tma_info_inst_mix_ipflop < 10"
},
{
"BriefDescription": "Instructions per Load (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_LOADS",
"MetricGroup": "InsType",
- "MetricName": "tma_info_ipload",
- "MetricThreshold": "tma_info_ipload < 3"
- },
- {
- "BriefDescription": "Instructions per retired mispredicts for conditional non-taken branches (lower number means higher occurrence rate).",
- "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.COND_NTAKEN",
- "MetricGroup": "Bad;BrMispredicts",
- "MetricName": "tma_info_ipmisp_cond_ntaken",
- "MetricThreshold": "tma_info_ipmisp_cond_ntaken < 200"
- },
- {
- "BriefDescription": "Instructions per retired mispredicts for conditional taken branches (lower number means higher occurrence rate).",
- "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.COND_TAKEN",
- "MetricGroup": "Bad;BrMispredicts",
- "MetricName": "tma_info_ipmisp_cond_taken",
- "MetricThreshold": "tma_info_ipmisp_cond_taken < 200"
- },
- {
- "BriefDescription": "Instructions per retired mispredicts for indirect CALL or JMP branches (lower number means higher occurrence rate).",
- "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.INDIRECT",
- "MetricGroup": "Bad;BrMispredicts",
- "MetricName": "tma_info_ipmisp_indirect",
- "MetricThreshold": "tma_info_ipmisp_indirect < 1e3"
- },
- {
- "BriefDescription": "Instructions per retired mispredicts for return branches (lower number means higher occurrence rate).",
- "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.RET",
- "MetricGroup": "Bad;BrMispredicts",
- "MetricName": "tma_info_ipmisp_ret",
- "MetricThreshold": "tma_info_ipmisp_ret < 500"
- },
- {
- "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear) (lower number means higher occurrence rate)",
- "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
- "MetricGroup": "Bad;BadSpec;BrMispredicts",
- "MetricName": "tma_info_ipmispredict",
- "MetricThreshold": "tma_info_ipmispredict < 200"
+ "MetricName": "tma_info_inst_mix_ipload",
+ "MetricThreshold": "tma_info_inst_mix_ipload < 3"
},
{
"BriefDescription": "Instructions per Store (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_STORES",
"MetricGroup": "InsType",
- "MetricName": "tma_info_ipstore",
- "MetricThreshold": "tma_info_ipstore < 8"
+ "MetricName": "tma_info_inst_mix_ipstore",
+ "MetricThreshold": "tma_info_inst_mix_ipstore < 8"
},
{
"BriefDescription": "Instructions per Software prefetch instruction (of any type: NTA/T0/T1/T2/Prefetch) (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / cpu@SW_PREFETCH_ACCESS.T0\\,umask\\=0xF@",
"MetricGroup": "Prefetches",
- "MetricName": "tma_info_ipswpf",
- "MetricThreshold": "tma_info_ipswpf < 100"
+ "MetricName": "tma_info_inst_mix_ipswpf",
+ "MetricThreshold": "tma_info_inst_mix_ipswpf < 100"
},
{
"BriefDescription": "Instruction per taken branch",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN",
"MetricGroup": "Branches;Fed;FetchBW;Frontend;PGO;tma_issueFB",
- "MetricName": "tma_info_iptb",
- "MetricThreshold": "tma_info_iptb < 11",
- "PublicDescription": "Instruction per taken branch. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_dsb_coverage, tma_info_dsb_misses, tma_lcp"
+ "MetricName": "tma_info_inst_mix_iptb",
+ "MetricThreshold": "tma_info_inst_mix_iptb < 11",
+ "PublicDescription": "Instruction per taken branch. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_lcp"
},
{
- "BriefDescription": "Instructions per speculative Unknown Branch Misprediction (BAClear) (lower number means higher occurrence rate)",
- "MetricExpr": "tma_info_instructions / BACLEARS.ANY",
- "MetricGroup": "Fed",
- "MetricName": "tma_info_ipunknown_branch"
- },
- {
- "BriefDescription": "Fraction of branches that are unconditional (direct or indirect) jumps",
- "MetricExpr": "(BR_INST_RETIRED.NEAR_TAKEN - BR_INST_RETIRED.COND_TAKEN - 2 * BR_INST_RETIRED.NEAR_CALL) / BR_INST_RETIRED.ALL_BRANCHES",
- "MetricGroup": "Bad;Branches",
- "MetricName": "tma_info_jump"
+ "BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
+ "MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / duration_time",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_core_l1d_cache_fill_bw"
},
{
- "BriefDescription": "Cycles Per Instruction for the Operating System (OS) Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / INST_RETIRED.ANY_P:k",
- "MetricGroup": "OS",
- "MetricName": "tma_info_kernel_cpi"
+ "BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
+ "MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / duration_time",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_core_l2_cache_fill_bw"
},
{
- "BriefDescription": "Fraction of cycles spent in the Operating System (OS) Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / CPU_CLK_UNHALTED.THREAD",
- "MetricGroup": "OS",
- "MetricName": "tma_info_kernel_utilization",
- "MetricThreshold": "tma_info_kernel_utilization > 0.05"
+ "BriefDescription": "Average per-core data access bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1e9 / duration_time",
+ "MetricGroup": "Mem;MemoryBW;Offcore",
+ "MetricName": "tma_info_memory_core_l3_cache_access_bw"
},
{
- "BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
- "MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / duration_time",
+ "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
"MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l1d_cache_fill_bw"
+ "MetricName": "tma_info_memory_core_l3_cache_fill_bw"
},
{
- "BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
- "MetricExpr": "tma_info_l1d_cache_fill_bw",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l1d_cache_fill_bw_1t"
+ "BriefDescription": "Fill Buffer (FB) hits per kilo instructions for retired demand loads (L1D misses that merge into ongoing miss-handling entries)",
+ "MetricExpr": "1e3 * MEM_LOAD_RETIRED.FB_HIT / INST_RETIRED.ANY",
+ "MetricGroup": "CacheMisses;Mem",
+ "MetricName": "tma_info_memory_fb_hpki"
},
{
"BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
"MetricExpr": "1e3 * MEM_LOAD_RETIRED.L1_MISS / INST_RETIRED.ANY",
"MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l1mpki"
+ "MetricName": "tma_info_memory_l1mpki"
},
{
"BriefDescription": "L1 cache true misses per kilo instruction for all demand loads (including speculative)",
"MetricExpr": "1e3 * L2_RQSTS.ALL_DEMAND_DATA_RD / INST_RETIRED.ANY",
"MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l1mpki_load"
- },
- {
- "BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
- "MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / duration_time",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l2_cache_fill_bw"
- },
- {
- "BriefDescription": "Average per-thread data fill bandwidth to the L2 cache [GB / sec]",
- "MetricExpr": "tma_info_l2_cache_fill_bw",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l2_cache_fill_bw_1t"
+ "MetricName": "tma_info_memory_l1mpki_load"
},
{
"BriefDescription": "L2 cache hits per kilo instruction for all demand loads (including speculative)",
"MetricExpr": "1e3 * L2_RQSTS.DEMAND_DATA_RD_HIT / INST_RETIRED.ANY",
"MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l2hpki_load"
+ "MetricName": "tma_info_memory_l2hpki_load"
},
{
"BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads",
"MetricExpr": "1e3 * MEM_LOAD_RETIRED.L2_MISS / INST_RETIRED.ANY",
"MetricGroup": "Backend;CacheMisses;Mem",
- "MetricName": "tma_info_l2mpki"
+ "MetricName": "tma_info_memory_l2mpki"
},
{
"BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all request types (including speculative)",
- "MetricExpr": "1e3 * (OFFCORE_REQUESTS.ALL_DATA_RD - OFFCORE_REQUESTS.DEMAND_DATA_RD + L2_RQSTS.ALL_DEMAND_MISS + L2_RQSTS.SWPF_MISS) / tma_info_instructions",
+ "MetricExpr": "1e3 * (OFFCORE_REQUESTS.ALL_DATA_RD - OFFCORE_REQUESTS.DEMAND_DATA_RD + L2_RQSTS.ALL_DEMAND_MISS + L2_RQSTS.SWPF_MISS) / tma_info_inst_mix_instructions",
"MetricGroup": "CacheMisses;Mem;Offcore",
- "MetricName": "tma_info_l2mpki_all"
- },
- {
- "BriefDescription": "L2 cache true code cacheline misses per kilo instruction",
- "MetricExpr": "1e3 * FRONTEND_RETIRED.L2_MISS / INST_RETIRED.ANY",
- "MetricGroup": "IcMiss",
- "MetricName": "tma_info_l2mpki_code"
- },
- {
- "BriefDescription": "L2 cache speculative code cacheline misses per kilo instruction",
- "MetricExpr": "1e3 * L2_RQSTS.CODE_RD_MISS / INST_RETIRED.ANY",
- "MetricGroup": "IcMiss",
- "MetricName": "tma_info_l2mpki_code_all"
+ "MetricName": "tma_info_memory_l2mpki_all"
},
{
"BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all demand loads (including speculative)",
"MetricExpr": "1e3 * L2_RQSTS.DEMAND_DATA_RD_MISS / INST_RETIRED.ANY",
"MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l2mpki_load"
- },
- {
- "BriefDescription": "Average per-core data access bandwidth to the L3 cache [GB / sec]",
- "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1e9 / duration_time",
- "MetricGroup": "Mem;MemoryBW;Offcore",
- "MetricName": "tma_info_l3_cache_access_bw"
+ "MetricName": "tma_info_memory_l2mpki_load"
},
{
- "BriefDescription": "Average per-thread data access bandwidth to the L3 cache [GB / sec]",
- "MetricExpr": "tma_info_l3_cache_access_bw",
- "MetricGroup": "Mem;MemoryBW;Offcore",
- "MetricName": "tma_info_l3_cache_access_bw_1t"
+ "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
+ "MetricExpr": "1e3 * MEM_LOAD_RETIRED.L3_MISS / INST_RETIRED.ANY",
+ "MetricGroup": "CacheMisses;Mem",
+ "MetricName": "tma_info_memory_l3mpki"
},
{
- "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
- "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l3_cache_fill_bw"
+ "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / (MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT)",
+ "MetricGroup": "Mem;MemoryBound;MemoryLat",
+ "MetricName": "tma_info_memory_load_miss_real_latency"
},
{
- "BriefDescription": "Average per-thread data fill bandwidth to the L3 cache [GB / sec]",
- "MetricExpr": "tma_info_l3_cache_fill_bw",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l3_cache_fill_bw_1t"
+ "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
+ "MetricGroup": "Mem;MemoryBW;MemoryBound",
+ "MetricName": "tma_info_memory_mlp",
+ "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)"
},
{
- "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
- "MetricExpr": "1e3 * MEM_LOAD_RETIRED.L3_MISS / INST_RETIRED.ANY",
- "MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l3mpki"
+ "BriefDescription": "Average Parallel L2 cache miss data reads",
+ "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+ "MetricGroup": "Memory_BW;Offcore",
+ "MetricName": "tma_info_memory_oro_data_l2_mlp"
},
{
"BriefDescription": "Average Latency for L2 cache miss demand Loads",
"MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS.DEMAND_DATA_RD",
"MetricGroup": "Memory_Lat;Offcore",
- "MetricName": "tma_info_load_l2_miss_latency"
+ "MetricName": "tma_info_memory_oro_load_l2_miss_latency"
},
{
"BriefDescription": "Average Parallel L2 cache miss demand Loads",
"MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / cpu@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD\\,cmask\\=1@",
"MetricGroup": "Memory_BW;Offcore",
- "MetricName": "tma_info_load_l2_mlp"
+ "MetricName": "tma_info_memory_oro_load_l2_mlp"
},
{
"BriefDescription": "Average Latency for L3 cache miss demand Loads",
"MetricExpr": "cpu@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD\\,umask\\=0x10@ / OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD",
"MetricGroup": "Memory_Lat;Offcore",
- "MetricName": "tma_info_load_l3_miss_latency"
+ "MetricName": "tma_info_memory_oro_load_l3_miss_latency"
},
{
- "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
- "MetricExpr": "L1D_PEND_MISS.PENDING / (MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT)",
- "MetricGroup": "Mem;MemoryBound;MemoryLat",
- "MetricName": "tma_info_load_miss_real_latency"
+ "BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_core_l1d_cache_fill_bw",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_thread_l1d_cache_fill_bw_1t"
+ },
+ {
+ "BriefDescription": "Average per-thread data fill bandwidth to the L2 cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_core_l2_cache_fill_bw",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_thread_l2_cache_fill_bw_1t"
+ },
+ {
+ "BriefDescription": "Average per-thread data access bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_core_l3_cache_access_bw",
+ "MetricGroup": "Mem;MemoryBW;Offcore",
+ "MetricName": "tma_info_memory_thread_l3_cache_access_bw_1t"
+ },
+ {
+ "BriefDescription": "Average per-thread data fill bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_core_l3_cache_fill_bw",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_thread_l3_cache_fill_bw_1t"
+ },
+ {
+ "BriefDescription": "STLB (2nd level TLB) code speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
+ "MetricExpr": "1e3 * ITLB_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
+ "MetricGroup": "Fed;MemoryTLB",
+ "MetricName": "tma_info_memory_tlb_code_stlb_mpki"
},
{
"BriefDescription": "STLB (2nd level TLB) data load speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
"MetricExpr": "1e3 * DTLB_LOAD_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
"MetricGroup": "Mem;MemoryTLB",
- "MetricName": "tma_info_load_stlb_mpki"
+ "MetricName": "tma_info_memory_tlb_load_stlb_mpki"
},
{
- "BriefDescription": "Fraction of Uops delivered by the LSD (Loop Stream Detector; aka Loop Cache)",
- "MetricExpr": "LSD.UOPS / UOPS_ISSUED.ANY",
- "MetricGroup": "Fed;LSD",
- "MetricName": "tma_info_lsd_coverage"
+ "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+ "MetricExpr": "(ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING) / (2 * tma_info_core_core_clks)",
+ "MetricGroup": "Mem;MemoryTLB",
+ "MetricName": "tma_info_memory_tlb_page_walks_utilization",
+ "MetricThreshold": "tma_info_memory_tlb_page_walks_utilization > 0.5"
},
{
- "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
- "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_fb_full / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))",
- "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
- "MetricName": "tma_info_memory_bandwidth",
- "MetricThreshold": "tma_info_memory_bandwidth > 20",
- "PublicDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_dram_bw_use, tma_mem_bandwidth, tma_sq_full"
+ "BriefDescription": "STLB (2nd level TLB) data store speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
+ "MetricExpr": "1e3 * DTLB_STORE_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
+ "MetricGroup": "Mem;MemoryTLB",
+ "MetricName": "tma_info_memory_tlb_store_stlb_mpki"
},
{
- "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
- "MetricExpr": "100 * tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_load / max(tma_l1_bound, tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
- "MetricGroup": "Mem;MemoryTLB;Offcore;tma_issueTLB",
- "MetricName": "tma_info_memory_data_tlbs",
- "MetricThreshold": "tma_info_memory_data_tlbs > 20",
- "PublicDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs). Related metrics: tma_dtlb_load, tma_dtlb_store"
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-thread",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
+ "MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
+ "MetricName": "tma_info_pipeline_execute"
},
{
- "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)",
- "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound))",
- "MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat",
- "MetricName": "tma_info_memory_latency",
- "MetricThreshold": "tma_info_memory_latency > 20",
- "PublicDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches). Related metrics: tma_l3_hit_latency, tma_mem_latency"
+ "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "tma_retiring * tma_info_thread_slots / cpu@UOPS_RETIRED.SLOTS\\,cmask\\=1@",
+ "MetricGroup": "Pipeline;Ret",
+ "MetricName": "tma_info_pipeline_retire"
},
{
- "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks",
- "MetricExpr": "100 * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
- "MetricGroup": "Bad;BadSpec;BrMispredicts;tma_issueBM",
- "MetricName": "tma_info_mispredictions",
- "MetricThreshold": "tma_info_mispredictions > 20",
- "PublicDescription": "Total pipeline cost of Branch Misprediction related bottlenecks. Related metrics: tma_branch_mispredicts, tma_info_branch_misprediction_cost, tma_mispredicts_resteers"
+ "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
+ "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / duration_time",
+ "MetricGroup": "Power;Summary",
+ "MetricName": "tma_info_system_average_frequency"
},
{
- "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
- "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
- "MetricGroup": "Mem;MemoryBW;MemoryBound",
- "MetricName": "tma_info_mlp",
- "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)"
+ "BriefDescription": "Average CPU Utilization",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricGroup": "HPC;Summary",
+ "MetricName": "tma_info_system_cpu_utilization"
},
{
- "BriefDescription": "Fraction of branches of other types (not individually covered by other metrics in Info.Branches group)",
- "MetricExpr": "1 - (tma_info_cond_nt + tma_info_cond_tk + tma_info_callret + tma_info_jump)",
- "MetricGroup": "Bad;Branches",
- "MetricName": "tma_info_other_branches"
+ "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
+ "MetricExpr": "64 * (UNC_ARB_TRK_REQUESTS.ALL + UNC_ARB_COH_TRK_REQUESTS.ALL) / 1e6 / duration_time / 1e3",
+ "MetricGroup": "HPC;Mem;MemoryBW;SoC;tma_issueBW",
+ "MetricName": "tma_info_system_dram_bw_use",
+ "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_fb_full, tma_info_bottleneck_memory_bandwidth, tma_mem_bandwidth, tma_sq_full"
},
{
- "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
- "MetricExpr": "(ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING) / (2 * tma_info_core_clks)",
- "MetricGroup": "Mem;MemoryTLB",
- "MetricName": "tma_info_page_walks_utilization",
- "MetricThreshold": "tma_info_page_walks_utilization > 0.5"
+ "BriefDescription": "Giga Floating Point Operations Per Second",
+ "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * cpu@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE\\,umask\\=0x18@ + 8 * cpu@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE\\,umask\\=0x60@ + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / 1e9 / duration_time",
+ "MetricGroup": "Cor;Flops;HPC",
+ "MetricName": "tma_info_system_gflops",
+ "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width and AMX engine."
+ },
+ {
+ "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
+ "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u",
+ "MetricGroup": "Branches;OS",
+ "MetricName": "tma_info_system_ipfarbranch",
+ "MetricThreshold": "tma_info_system_ipfarbranch < 1e6"
+ },
+ {
+ "BriefDescription": "Cycles Per Instruction for the Operating System (OS) Kernel mode",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / INST_RETIRED.ANY_P:k",
+ "MetricGroup": "OS",
+ "MetricName": "tma_info_system_kernel_cpi"
+ },
+ {
+ "BriefDescription": "Fraction of cycles spent in the Operating System (OS) Kernel mode",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "OS",
+ "MetricName": "tma_info_system_kernel_utilization",
+ "MetricThreshold": "tma_info_system_kernel_utilization > 0.05"
},
{
"BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for baseline license level 0",
- "MetricExpr": "CORE_POWER.LVL0_TURBO_LICENSE / tma_info_core_clks",
+ "MetricExpr": "CORE_POWER.LVL0_TURBO_LICENSE / tma_info_core_core_clks",
"MetricGroup": "Power",
- "MetricName": "tma_info_power_license0_utilization",
+ "MetricName": "tma_info_system_power_license0_utilization",
"PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for baseline license level 0. This includes non-AVX codes, SSE, AVX 128-bit, and low-current AVX 256-bit codes."
},
{
"BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 1",
- "MetricExpr": "CORE_POWER.LVL1_TURBO_LICENSE / tma_info_core_clks",
+ "MetricExpr": "CORE_POWER.LVL1_TURBO_LICENSE / tma_info_core_core_clks",
"MetricGroup": "Power",
- "MetricName": "tma_info_power_license1_utilization",
- "MetricThreshold": "tma_info_power_license1_utilization > 0.5",
+ "MetricName": "tma_info_system_power_license1_utilization",
+ "MetricThreshold": "tma_info_system_power_license1_utilization > 0.5",
"PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 1. This includes high current AVX 256-bit instructions as well as low current AVX 512-bit instructions."
},
{
"BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 2 (introduced in SKX)",
- "MetricExpr": "CORE_POWER.LVL2_TURBO_LICENSE / tma_info_core_clks",
+ "MetricExpr": "CORE_POWER.LVL2_TURBO_LICENSE / tma_info_core_core_clks",
"MetricGroup": "Power",
- "MetricName": "tma_info_power_license2_utilization",
- "MetricThreshold": "tma_info_power_license2_utilization > 0.5",
+ "MetricName": "tma_info_system_power_license2_utilization",
+ "MetricThreshold": "tma_info_system_power_license2_utilization > 0.5",
"PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 2 (introduced in SKX). This includes high current AVX 512-bit instructions."
},
{
- "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
- "MetricExpr": "tma_retiring * tma_info_slots / cpu@UOPS_RETIRED.SLOTS\\,cmask\\=1@",
- "MetricGroup": "Pipeline;Ret",
- "MetricName": "tma_info_retire"
- },
- {
- "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
- "MetricExpr": "TOPDOWN.SLOTS",
- "MetricGroup": "TmaL1;tma_L1_group",
- "MetricName": "tma_info_slots"
- },
- {
- "BriefDescription": "Fraction of Physical Core issue-slots utilized by this Logical Processor",
- "MetricExpr": "(tma_info_slots / (TOPDOWN.SLOTS / 2) if #SMT_on else 1)",
- "MetricGroup": "SMT;TmaL1;tma_L1_group",
- "MetricName": "tma_info_slots_utilization"
- },
- {
"BriefDescription": "Fraction of cycles where both hardware Logical Processors were active",
"MetricExpr": "(1 - CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_DISTRIBUTED if #SMT_on else 0)",
"MetricGroup": "SMT",
- "MetricName": "tma_info_smt_2t_utilization"
+ "MetricName": "tma_info_system_smt_2t_utilization"
},
{
"BriefDescription": "Socket actual clocks when any core is active on that socket",
"MetricExpr": "UNC_CLOCK.SOCKET",
"MetricGroup": "SoC",
- "MetricName": "tma_info_socket_clks"
- },
- {
- "BriefDescription": "STLB (2nd level TLB) data store speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
- "MetricExpr": "1e3 * DTLB_STORE_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
- "MetricGroup": "Mem;MemoryTLB",
- "MetricName": "tma_info_store_stlb_mpki"
+ "MetricName": "tma_info_system_socket_clks"
},
{
"BriefDescription": "Average Frequency Utilization relative nominal frequency",
- "MetricExpr": "tma_info_clks / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricExpr": "tma_info_thread_clks / CPU_CLK_UNHALTED.REF_TSC",
"MetricGroup": "Power",
- "MetricName": "tma_info_turbo_utilization"
+ "MetricName": "tma_info_system_turbo_utilization"
+ },
+ {
+ "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "Pipeline",
+ "MetricName": "tma_info_thread_clks"
+ },
+ {
+ "BriefDescription": "Cycles Per Instruction (per Logical Processor)",
+ "MetricExpr": "1 / tma_info_thread_ipc",
+ "MetricGroup": "Mem;Pipeline",
+ "MetricName": "tma_info_thread_cpi"
+ },
+ {
+ "BriefDescription": "The ratio of Executed- by Issued-Uops",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / UOPS_ISSUED.ANY",
+ "MetricGroup": "Cor;Pipeline",
+ "MetricName": "tma_info_thread_execute_per_issue",
+ "PublicDescription": "The ratio of Executed- by Issued-Uops. Ratio > 1 suggests high rate of uop micro-fusions. Ratio < 1 suggest high rate of \"execute\" at rename stage."
+ },
+ {
+ "BriefDescription": "Instructions Per Cycle (per Logical Processor)",
+ "MetricExpr": "INST_RETIRED.ANY / tma_info_thread_clks",
+ "MetricGroup": "Ret;Summary",
+ "MetricName": "tma_info_thread_ipc"
+ },
+ {
+ "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
+ "MetricExpr": "TOPDOWN.SLOTS",
+ "MetricGroup": "TmaL1;tma_L1_group",
+ "MetricName": "tma_info_thread_slots"
+ },
+ {
+ "BriefDescription": "Fraction of Physical Core issue-slots utilized by this Logical Processor",
+ "MetricExpr": "(tma_info_thread_slots / (TOPDOWN.SLOTS / 2) if #SMT_on else 1)",
+ "MetricGroup": "SMT;TmaL1;tma_L1_group",
+ "MetricName": "tma_info_thread_slots_utilization"
},
{
"BriefDescription": "Uops Per Instruction",
- "MetricExpr": "tma_retiring * tma_info_slots / INST_RETIRED.ANY",
+ "MetricExpr": "tma_retiring * tma_info_thread_slots / INST_RETIRED.ANY",
"MetricGroup": "Pipeline;Ret;Retire",
- "MetricName": "tma_info_uoppi",
- "MetricThreshold": "tma_info_uoppi > 1.05"
+ "MetricName": "tma_info_thread_uoppi",
+ "MetricThreshold": "tma_info_thread_uoppi > 1.05"
},
{
"BriefDescription": "Instruction per taken branch",
- "MetricExpr": "tma_retiring * tma_info_slots / BR_INST_RETIRED.NEAR_TAKEN",
+ "MetricExpr": "tma_retiring * tma_info_thread_slots / BR_INST_RETIRED.NEAR_TAKEN",
"MetricGroup": "Branches;Fed;FetchBW",
- "MetricName": "tma_info_uptb",
- "MetricThreshold": "tma_info_uptb < 7.5"
+ "MetricName": "tma_info_thread_uptb",
+ "MetricThreshold": "tma_info_thread_uptb < 7.5"
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses",
- "MetricExpr": "ICACHE_64B.IFTAG_STALL / tma_info_clks",
+ "MetricExpr": "ICACHE_64B.IFTAG_STALL / tma_info_thread_clks",
"MetricGroup": "BigFoot;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_itlb_misses",
"MetricThreshold": "tma_itlb_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -1069,7 +1081,7 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled without loads missing the L1 data cache",
- "MetricExpr": "max((CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS) / tma_info_clks, 0)",
+ "MetricExpr": "max((CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS) / tma_info_thread_clks, 0)",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_issueL1;tma_issueMC;tma_memory_bound_group",
"MetricName": "tma_l1_bound",
"MetricThreshold": "tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -1079,7 +1091,7 @@
{
"BriefDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "MEM_LOAD_RETIRED.L2_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) / (MEM_LOAD_RETIRED.L2_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + L1D_PEND_MISS.FB_FULL_PERIODS) * ((CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_clks)",
+ "MetricExpr": "MEM_LOAD_RETIRED.L2_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) / (MEM_LOAD_RETIRED.L2_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + L1D_PEND_MISS.FB_FULL_PERIODS) * ((CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks)",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l2_bound",
"MetricThreshold": "tma_l2_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -1088,7 +1100,7 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
- "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS) / tma_info_clks",
+ "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS) / tma_info_thread_clks",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l3_bound",
"MetricThreshold": "tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -1097,20 +1109,20 @@
},
{
"BriefDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
- "MetricExpr": "9 * tma_info_average_frequency * MEM_LOAD_RETIRED.L3_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_clks",
+ "MetricExpr": "9 * tma_info_system_average_frequency * MEM_LOAD_RETIRED.L3_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
"MetricGroup": "MemoryLat;TopdownL4;tma_L4_group;tma_issueLat;tma_l3_bound_group",
"MetricName": "tma_l3_hit_latency",
"MetricThreshold": "tma_l3_hit_latency > 0.1 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_info_memory_latency, tma_mem_latency",
+ "PublicDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_info_bottleneck_memory_latency, tma_mem_latency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs)",
- "MetricExpr": "ILD_STALL.LCP / tma_info_clks",
+ "MetricExpr": "ILD_STALL.LCP / tma_info_thread_clks",
"MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB",
"MetricName": "tma_lcp",
"MetricThreshold": "tma_lcp > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
- "PublicDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs). Using proper compiler flags or Intel Compiler by default will certainly avoid this. #Link: Optimization Guide about LCP BKMs. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_dsb_coverage, tma_info_dsb_misses, tma_info_iptb",
+ "PublicDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs). Using proper compiler flags or Intel Compiler by default will certainly avoid this. #Link: Optimization Guide about LCP BKMs. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb",
"ScaleUnit": "100%"
},
{
@@ -1125,7 +1137,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Load operations",
- "MetricExpr": "UOPS_DISPATCHED.PORT_2_3 / (2 * tma_info_core_clks)",
+ "MetricExpr": "UOPS_DISPATCHED.PORT_2_3 / (2 * tma_info_core_core_clks)",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_load_op_utilization",
"MetricThreshold": "tma_load_op_utilization > 0.6",
@@ -1142,7 +1154,7 @@
},
{
"BriefDescription": "This metric estimates the fraction of cycles where the Second-level TLB (STLB) was missed by load accesses, performing a hardware page walk",
- "MetricExpr": "DTLB_LOAD_MISSES.WALK_ACTIVE / tma_info_clks",
+ "MetricExpr": "DTLB_LOAD_MISSES.WALK_ACTIVE / tma_info_thread_clks",
"MetricGroup": "MemoryTLB;TopdownL5;tma_L5_group;tma_dtlb_load_group",
"MetricName": "tma_load_stlb_miss",
"MetricThreshold": "tma_load_stlb_miss > 0.05 & (tma_dtlb_load > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
@@ -1151,7 +1163,7 @@
{
"BriefDescription": "This metric represents fraction of cycles the CPU spent handling cache misses due to lock operations",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(16 * max(0, MEM_INST_RETIRED.LOCK_LOADS - L2_RQSTS.ALL_RFO) + MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES * (10 * L2_RQSTS.RFO_HIT + min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO))) / tma_info_clks",
+ "MetricExpr": "(16 * max(0, MEM_INST_RETIRED.LOCK_LOADS - L2_RQSTS.ALL_RFO) + MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES * (10 * L2_RQSTS.RFO_HIT + min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO))) / tma_info_thread_clks",
"MetricGroup": "Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_l1_bound_group",
"MetricName": "tma_lock_latency",
"MetricThreshold": "tma_lock_latency > 0.2 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -1160,10 +1172,10 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to LSD (Loop Stream Detector) unit",
- "MetricExpr": "(LSD.CYCLES_ACTIVE - LSD.CYCLES_OK) / tma_info_core_clks / 2",
+ "MetricExpr": "(LSD.CYCLES_ACTIVE - LSD.CYCLES_OK) / tma_info_core_core_clks / 2",
"MetricGroup": "FetchBW;LSD;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_lsd",
- "MetricThreshold": "tma_lsd > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 5 > 0.35)",
+ "MetricThreshold": "tma_lsd > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 5 > 0.35)",
"PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to LSD (Loop Stream Detector) unit. LSD typically does well sustaining Uop supply. However; in some rare cases; optimal uop-delivery could not be reached for small loops whose size (in terms of number of uops) does not suit well the LSD structure.",
"ScaleUnit": "100%"
},
@@ -1179,20 +1191,20 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM)",
- "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@) / tma_info_clks",
+ "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@) / tma_info_thread_clks",
"MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
"MetricName": "tma_mem_bandwidth",
"MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_dram_bw_use, tma_info_memory_bandwidth, tma_sq_full",
+ "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_bottleneck_memory_bandwidth, tma_info_system_dram_bw_use, tma_sq_full",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM)",
- "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD) / tma_info_clks - tma_mem_bandwidth",
+ "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD) / tma_info_thread_clks - tma_mem_bandwidth",
"MetricGroup": "MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
"MetricName": "tma_mem_latency",
"MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_info_memory_latency, tma_l3_hit_latency",
+ "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_info_bottleneck_memory_latency, tma_l3_hit_latency",
"ScaleUnit": "100%"
},
{
@@ -1207,6 +1219,7 @@
},
{
"BriefDescription": "This metric represents fraction of slots where the CPU was retiring memory operations -- uops for memory load or store accesses.",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "tma_light_operations * MEM_INST_RETIRED.ANY / INST_RETIRED.ANY",
"MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
"MetricName": "tma_memory_operations",
@@ -1215,7 +1228,7 @@
},
{
"BriefDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit",
- "MetricExpr": "tma_retiring * tma_info_slots / UOPS_ISSUED.ANY * IDQ.MS_UOPS / tma_info_slots",
+ "MetricExpr": "tma_retiring * tma_info_thread_slots / UOPS_ISSUED.ANY * IDQ.MS_UOPS / tma_info_thread_slots",
"MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_heavy_operations_group;tma_issueMC;tma_issueMS",
"MetricName": "tma_microcode_sequencer",
"MetricThreshold": "tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1",
@@ -1224,28 +1237,28 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers as a result of Branch Misprediction at execution stage",
- "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT) * INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_clks",
+ "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT) * INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_thread_clks",
"MetricGroup": "BadSpec;BrMispredicts;TopdownL4;tma_L4_group;tma_branch_resteers_group;tma_issueBM",
"MetricName": "tma_mispredicts_resteers",
"MetricThreshold": "tma_mispredicts_resteers > 0.05 & (tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15))",
- "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers as a result of Branch Misprediction at execution stage. Sample with: INT_MISC.CLEAR_RESTEER_CYCLES. Related metrics: tma_branch_mispredicts, tma_info_branch_misprediction_cost, tma_info_mispredictions",
+ "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers as a result of Branch Misprediction at execution stage. Sample with: INT_MISC.CLEAR_RESTEER_CYCLES. Related metrics: tma_branch_mispredicts, tma_info_bad_spec_branch_misprediction_cost, tma_info_bottleneck_mispredictions",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline)",
- "MetricExpr": "(IDQ.MITE_CYCLES_ANY - IDQ.MITE_CYCLES_OK) / tma_info_core_clks / 2",
+ "MetricExpr": "(IDQ.MITE_CYCLES_ANY - IDQ.MITE_CYCLES_OK) / tma_info_core_core_clks / 2",
"MetricGroup": "DSBmiss;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_mite",
- "MetricThreshold": "tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 5 > 0.35)",
+ "MetricThreshold": "tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 5 > 0.35)",
"PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline). This pipeline is used for code that was not pre-cached in the DSB or LSD. For example; inefficiencies due to asymmetric decoders; use of long immediate or LCP can manifest as MITE fetch bandwidth bottleneck. Sample with: FRONTEND_RETIRED.ANY_DSB_MISS",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of cycles where (only) 4 uops were delivered by the MITE pipeline",
- "MetricExpr": "(cpu@IDQ.MITE_UOPS\\,cmask\\=4@ - cpu@IDQ.MITE_UOPS\\,cmask\\=5@) / tma_info_clks",
+ "MetricExpr": "(cpu@IDQ.MITE_UOPS\\,cmask\\=4@ - cpu@IDQ.MITE_UOPS\\,cmask\\=5@) / tma_info_thread_clks",
"MetricGroup": "DSBmiss;FetchBW;TopdownL4;tma_L4_group;tma_mite_group",
"MetricName": "tma_mite_4wide",
- "MetricThreshold": "tma_mite_4wide > 0.05 & (tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 5 > 0.35))",
+ "MetricThreshold": "tma_mite_4wide > 0.05 & (tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 5 > 0.35))",
"ScaleUnit": "100%"
},
{
@@ -1259,7 +1272,7 @@
},
{
"BriefDescription": "This metric estimates the fraction of cycles when the CPU was stalled due to switches of uop delivery to the Microcode Sequencer (MS)",
- "MetricExpr": "3 * IDQ.MS_SWITCHES / tma_info_clks",
+ "MetricExpr": "3 * IDQ.MS_SWITCHES / tma_info_thread_clks",
"MetricGroup": "FetchLat;MicroSeq;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueMC;tma_issueMS;tma_issueMV;tma_issueSO",
"MetricName": "tma_ms_switches",
"MetricThreshold": "tma_ms_switches > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -1268,7 +1281,7 @@
},
{
"BriefDescription": "This metric represents fraction of slots where the CPU was retiring NOP (no op) instructions",
- "MetricExpr": "tma_light_operations * INST_RETIRED.NOP / (tma_retiring * tma_info_slots)",
+ "MetricExpr": "tma_light_operations * INST_RETIRED.NOP / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
"MetricName": "tma_nop_instructions",
"MetricThreshold": "tma_nop_instructions > 0.1 & tma_light_operations > 0.6",
@@ -1277,6 +1290,7 @@
},
{
"BriefDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_memory_operations + tma_branch_instructions + tma_nop_instructions))",
"MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
"MetricName": "tma_other_light_ops",
@@ -1286,7 +1300,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 0 ([SNB+] ALU; [HSW+] ALU and 2nd branch)",
- "MetricExpr": "UOPS_DISPATCHED.PORT_0 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED.PORT_0 / tma_info_core_core_clks",
"MetricGroup": "Compute;TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_0",
"MetricThreshold": "tma_port_0 > 0.6",
@@ -1295,7 +1309,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 1 (ALU)",
- "MetricExpr": "UOPS_DISPATCHED.PORT_1 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED.PORT_1 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_1",
"MetricThreshold": "tma_port_1 > 0.6",
@@ -1304,7 +1318,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 5 ([SNB+] Branches and ALU; [HSW+] ALU)",
- "MetricExpr": "UOPS_DISPATCHED.PORT_5 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED.PORT_5 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_5",
"MetricThreshold": "tma_port_5 > 0.6",
@@ -1313,7 +1327,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+]Primary Branch and simple ALU)",
- "MetricExpr": "UOPS_DISPATCHED.PORT_6 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED.PORT_6 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_6",
"MetricThreshold": "tma_port_6 > 0.6",
@@ -1322,7 +1336,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles the CPU performance was potentially limited due to Core computation issues (non divider-related)",
- "MetricExpr": "((cpu@EXE_ACTIVITY.3_PORTS_UTIL\\,umask\\=0x80@ + tma_serializing_operation * (CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY) + (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL)) / tma_info_clks if ARITH.DIVIDER_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY else (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL) / tma_info_clks)",
+ "MetricExpr": "((cpu@EXE_ACTIVITY.3_PORTS_UTIL\\,umask\\=0x80@ + tma_serializing_operation * (CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY) + (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL)) / tma_info_thread_clks if ARITH.DIVIDER_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY else (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL) / tma_info_thread_clks)",
"MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group",
"MetricName": "tma_ports_utilization",
"MetricThreshold": "tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
@@ -1331,7 +1345,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed no uops on any execution port (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "cpu@EXE_ACTIVITY.3_PORTS_UTIL\\,umask\\=0x80@ / tma_info_clks + tma_serializing_operation * (CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY) / tma_info_clks",
+ "MetricExpr": "cpu@EXE_ACTIVITY.3_PORTS_UTIL\\,umask\\=0x80@ / tma_info_thread_clks + tma_serializing_operation * (CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY) / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_0",
"MetricThreshold": "tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1340,7 +1354,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where the CPU executed total of 1 uop per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "EXE_ACTIVITY.1_PORTS_UTIL / tma_info_clks",
+ "MetricExpr": "EXE_ACTIVITY.1_PORTS_UTIL / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issueL1;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_1",
"MetricThreshold": "tma_ports_utilized_1 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1349,7 +1363,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "EXE_ACTIVITY.2_PORTS_UTIL / tma_info_clks",
+ "MetricExpr": "EXE_ACTIVITY.2_PORTS_UTIL / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_2",
"MetricThreshold": "tma_ports_utilized_2 > 0.15 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1358,7 +1372,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 3 or more uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "UOPS_EXECUTED.CYCLES_GE_3 / tma_info_clks",
+ "MetricExpr": "UOPS_EXECUTED.CYCLES_GE_3 / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_3m",
"MetricThreshold": "tma_ports_utilized_3m > 0.7 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1367,17 +1381,18 @@
},
{
"BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
- "MetricExpr": "topdown\\-retiring / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0 * tma_info_slots",
- "MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
+ "DefaultMetricgroupName": "TopdownL1",
+ "MetricExpr": "topdown\\-retiring / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0 * tma_info_thread_slots",
+ "MetricGroup": "Default;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1",
- "MetricgroupNoGroup": "TopdownL1",
+ "MetricgroupNoGroup": "TopdownL1;Default",
"PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. Sample with: UOPS_RETIRED.SLOTS",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU issue-pipeline was stalled due to serializing operations",
- "MetricExpr": "RESOURCE_STALLS.SCOREBOARD / tma_info_clks",
+ "MetricExpr": "RESOURCE_STALLS.SCOREBOARD / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL5;tma_L5_group;tma_issueSO;tma_ports_utilized_0_group",
"MetricName": "tma_serializing_operation",
"MetricThreshold": "tma_serializing_operation > 0.1 & (tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)))",
@@ -1386,7 +1401,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to PAUSE Instructions",
- "MetricExpr": "140 * MISC_RETIRED.PAUSE_INST / tma_info_clks",
+ "MetricExpr": "140 * MISC_RETIRED.PAUSE_INST / tma_info_thread_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_serializing_operation_group",
"MetricName": "tma_slow_pause",
"MetricThreshold": "tma_slow_pause > 0.05 & (tma_serializing_operation > 0.1 & (tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))))",
@@ -1395,7 +1410,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles handling memory load split accesses - load that cross 64-byte cache line boundary",
- "MetricExpr": "tma_info_load_miss_real_latency * LD_BLOCKS.NO_SR / tma_info_clks",
+ "MetricExpr": "tma_info_memory_load_miss_real_latency * LD_BLOCKS.NO_SR / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_split_loads",
"MetricThreshold": "tma_split_loads > 0.2 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -1404,7 +1419,7 @@
},
{
"BriefDescription": "This metric represents rate of split store accesses",
- "MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_clks",
+ "MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_core_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group",
"MetricName": "tma_split_stores",
"MetricThreshold": "tma_split_stores > 0.2 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -1413,16 +1428,16 @@
},
{
"BriefDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors)",
- "MetricExpr": "L1D_PEND_MISS.L2_STALL / tma_info_clks",
+ "MetricExpr": "L1D_PEND_MISS.L2_STALL / tma_info_thread_clks",
"MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueBW;tma_l3_bound_group",
"MetricName": "tma_sq_full",
"MetricThreshold": "tma_sq_full > 0.3 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_fb_full, tma_info_dram_bw_use, tma_info_memory_bandwidth, tma_mem_bandwidth",
+ "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_fb_full, tma_info_bottleneck_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric estimates how often CPU was stalled due to RFO store memory accesses; RFO store issue a read-for-ownership request before the write",
- "MetricExpr": "EXE_ACTIVITY.BOUND_ON_STORES / tma_info_clks",
+ "MetricExpr": "EXE_ACTIVITY.BOUND_ON_STORES / tma_info_thread_clks",
"MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_store_bound",
"MetricThreshold": "tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -1431,7 +1446,7 @@
},
{
"BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores",
- "MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_clks",
+ "MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_store_fwd_blk",
"MetricThreshold": "tma_store_fwd_blk > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -1440,7 +1455,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles the CPU spent handling L1D store misses",
- "MetricExpr": "(L2_RQSTS.RFO_HIT * 10 * (1 - MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES) + (1 - MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES) * min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO)) / tma_info_clks",
+ "MetricExpr": "(L2_RQSTS.RFO_HIT * 10 * (1 - MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES) + (1 - MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES) * min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO)) / tma_info_thread_clks",
"MetricGroup": "MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_issueSL;tma_store_bound_group",
"MetricName": "tma_store_latency",
"MetricThreshold": "tma_store_latency > 0.1 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -1449,7 +1464,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Store operations",
- "MetricExpr": "(UOPS_DISPATCHED.PORT_4_9 + UOPS_DISPATCHED.PORT_7_8) / (4 * tma_info_core_clks)",
+ "MetricExpr": "(UOPS_DISPATCHED.PORT_4_9 + UOPS_DISPATCHED.PORT_7_8) / (4 * tma_info_core_core_clks)",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_store_op_utilization",
"MetricThreshold": "tma_store_op_utilization > 0.6",
@@ -1466,7 +1481,7 @@
},
{
"BriefDescription": "This metric estimates the fraction of cycles where the STLB was missed by store accesses, performing a hardware page walk",
- "MetricExpr": "DTLB_STORE_MISSES.WALK_ACTIVE / tma_info_core_clks",
+ "MetricExpr": "DTLB_STORE_MISSES.WALK_ACTIVE / tma_info_core_core_clks",
"MetricGroup": "MemoryTLB;TopdownL5;tma_L5_group;tma_dtlb_store_group",
"MetricName": "tma_store_stlb_miss",
"MetricThreshold": "tma_store_stlb_miss > 0.05 & (tma_dtlb_store > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
@@ -1474,7 +1489,7 @@
},
{
"BriefDescription": "This metric estimates how often CPU was stalled due to Streaming store memory accesses; Streaming store optimize out a read request required by RFO stores",
- "MetricExpr": "9 * OCR.STREAMING_WR.ANY_RESPONSE / tma_info_clks",
+ "MetricExpr": "9 * OCR.STREAMING_WR.ANY_RESPONSE / tma_info_thread_clks",
"MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueSmSt;tma_store_bound_group",
"MetricName": "tma_streaming_stores",
"MetricThreshold": "tma_streaming_stores > 0.2 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -1483,7 +1498,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to new branch address clears",
- "MetricExpr": "10 * BACLEARS.ANY / tma_info_clks",
+ "MetricExpr": "10 * BACLEARS.ANY / tma_info_thread_clks",
"MetricGroup": "BigFoot;FetchLat;TopdownL4;tma_L4_group;tma_branch_resteers_group",
"MetricName": "tma_unknown_branches",
"MetricThreshold": "tma_unknown_branches > 0.05 & (tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15))",
@@ -1501,28 +1516,28 @@
},
{
"BriefDescription": "Percentage of cycles in aborted transactions.",
- "MetricExpr": "max(cpu@cycles\\-t@ - cpu@cycles\\-ct@, 0) / cycles",
+ "MetricExpr": "(max(cycles\\-t - cycles\\-ct, 0) / cycles if has_event(cycles\\-t) else 0)",
"MetricGroup": "transaction",
"MetricName": "tsx_aborted_cycles",
"ScaleUnit": "100%"
},
{
"BriefDescription": "Number of cycles within a transaction divided by the number of elisions.",
- "MetricExpr": "cpu@cycles\\-t@ / cpu@el\\-start@",
+ "MetricExpr": "(cycles\\-t / el\\-start if has_event(cycles\\-t) else 0)",
"MetricGroup": "transaction",
"MetricName": "tsx_cycles_per_elision",
"ScaleUnit": "1cycles / elision"
},
{
"BriefDescription": "Number of cycles within a transaction divided by the number of transactions.",
- "MetricExpr": "cpu@cycles\\-t@ / cpu@tx\\-start@",
+ "MetricExpr": "(cycles\\-t / tx\\-start if has_event(cycles\\-t) else 0)",
"MetricGroup": "transaction",
"MetricName": "tsx_cycles_per_transaction",
"ScaleUnit": "1cycles / transaction"
},
{
"BriefDescription": "Percentage of cycles within a transaction region.",
- "MetricExpr": "cpu@cycles\\-t@ / cycles",
+ "MetricExpr": "(cycles\\-t / cycles if has_event(cycles\\-t) else 0)",
"MetricGroup": "transaction",
"MetricName": "tsx_transactional_cycles",
"ScaleUnit": "100%"
diff --git a/tools/perf/pmu-events/arch/x86/icelake/metricgroups.json b/tools/perf/pmu-events/arch/x86/icelake/metricgroups.json
new file mode 100644
index 000000000000..a151ba9cccb0
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/icelake/metricgroups.json
@@ -0,0 +1,113 @@
+{
+ "Backend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Bad": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BadSpec": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BigFoot": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BrMispredicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Branches": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "CacheMisses": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "CodeGen": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Compute": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Cor": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "DSB": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "DSBmiss": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "DataSharing": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Fed": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FetchBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FetchLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Flops": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FpScalar": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FpVector": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Frontend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "HPC": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "IcMiss": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "InsType": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "L2Evicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "LSD": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MachineClears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryBound": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryTLB": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Memory_BW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Memory_Lat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MicroSeq": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "OS": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Offcore": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "PGO": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Pipeline": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "PortsUtil": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Power": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Prefetches": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Ret": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Retire": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "SMT": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Server": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Snoop": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "SoC": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Summary": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TmaL1": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TmaL2": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TmaL3mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TopdownL1": "Metrics for top-down breakdown at level 1",
+ "TopdownL2": "Metrics for top-down breakdown at level 2",
+ "TopdownL3": "Metrics for top-down breakdown at level 3",
+ "TopdownL4": "Metrics for top-down breakdown at level 4",
+ "TopdownL5": "Metrics for top-down breakdown at level 5",
+ "TopdownL6": "Metrics for top-down breakdown at level 6",
+ "tma_L1_group": "Metrics for top-down breakdown at level 1",
+ "tma_L2_group": "Metrics for top-down breakdown at level 2",
+ "tma_L3_group": "Metrics for top-down breakdown at level 3",
+ "tma_L4_group": "Metrics for top-down breakdown at level 4",
+ "tma_L5_group": "Metrics for top-down breakdown at level 5",
+ "tma_L6_group": "Metrics for top-down breakdown at level 6",
+ "tma_alu_op_utilization_group": "Metrics contributing to tma_alu_op_utilization category",
+ "tma_backend_bound_group": "Metrics contributing to tma_backend_bound category",
+ "tma_bad_speculation_group": "Metrics contributing to tma_bad_speculation category",
+ "tma_branch_resteers_group": "Metrics contributing to tma_branch_resteers category",
+ "tma_core_bound_group": "Metrics contributing to tma_core_bound category",
+ "tma_dram_bound_group": "Metrics contributing to tma_dram_bound category",
+ "tma_dtlb_load_group": "Metrics contributing to tma_dtlb_load category",
+ "tma_dtlb_store_group": "Metrics contributing to tma_dtlb_store category",
+ "tma_fetch_bandwidth_group": "Metrics contributing to tma_fetch_bandwidth category",
+ "tma_fetch_latency_group": "Metrics contributing to tma_fetch_latency category",
+ "tma_fp_arith_group": "Metrics contributing to tma_fp_arith category",
+ "tma_fp_vector_group": "Metrics contributing to tma_fp_vector category",
+ "tma_frontend_bound_group": "Metrics contributing to tma_frontend_bound category",
+ "tma_heavy_operations_group": "Metrics contributing to tma_heavy_operations category",
+ "tma_issue2P": "Metrics related by the issue $issue2P",
+ "tma_issueBC": "Metrics related by the issue $issueBC",
+ "tma_issueBM": "Metrics related by the issue $issueBM",
+ "tma_issueBW": "Metrics related by the issue $issueBW",
+ "tma_issueD0": "Metrics related by the issue $issueD0",
+ "tma_issueFB": "Metrics related by the issue $issueFB",
+ "tma_issueFL": "Metrics related by the issue $issueFL",
+ "tma_issueL1": "Metrics related by the issue $issueL1",
+ "tma_issueLat": "Metrics related by the issue $issueLat",
+ "tma_issueMC": "Metrics related by the issue $issueMC",
+ "tma_issueMS": "Metrics related by the issue $issueMS",
+ "tma_issueMV": "Metrics related by the issue $issueMV",
+ "tma_issueRFO": "Metrics related by the issue $issueRFO",
+ "tma_issueSL": "Metrics related by the issue $issueSL",
+ "tma_issueSO": "Metrics related by the issue $issueSO",
+ "tma_issueSmSt": "Metrics related by the issue $issueSmSt",
+ "tma_issueSpSt": "Metrics related by the issue $issueSpSt",
+ "tma_issueSyncxn": "Metrics related by the issue $issueSyncxn",
+ "tma_issueTLB": "Metrics related by the issue $issueTLB",
+ "tma_l1_bound_group": "Metrics contributing to tma_l1_bound category",
+ "tma_l3_bound_group": "Metrics contributing to tma_l3_bound category",
+ "tma_light_operations_group": "Metrics contributing to tma_light_operations category",
+ "tma_load_op_utilization_group": "Metrics contributing to tma_load_op_utilization category",
+ "tma_mem_latency_group": "Metrics contributing to tma_mem_latency category",
+ "tma_memory_bound_group": "Metrics contributing to tma_memory_bound category",
+ "tma_microcode_sequencer_group": "Metrics contributing to tma_microcode_sequencer category",
+ "tma_mite_group": "Metrics contributing to tma_mite category",
+ "tma_ports_utilization_group": "Metrics contributing to tma_ports_utilization category",
+ "tma_ports_utilized_0_group": "Metrics contributing to tma_ports_utilized_0 category",
+ "tma_ports_utilized_3m_group": "Metrics contributing to tma_ports_utilized_3m category",
+ "tma_retiring_group": "Metrics contributing to tma_retiring category",
+ "tma_serializing_operation_group": "Metrics contributing to tma_serializing_operation category",
+ "tma_store_bound_group": "Metrics contributing to tma_store_bound category",
+ "tma_store_op_utilization_group": "Metrics contributing to tma_store_op_utilization category"
+}
diff --git a/tools/perf/pmu-events/arch/x86/icelake/pipeline.json b/tools/perf/pmu-events/arch/x86/icelake/pipeline.json
index 154fee4b60fb..375b78044f14 100644
--- a/tools/perf/pmu-events/arch/x86/icelake/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/icelake/pipeline.json
@@ -318,10 +318,10 @@
"UMask": "0x40"
},
{
- "BriefDescription": "Stalls caused by changing prefix length of the instruction.",
+ "BriefDescription": "Stalls caused by changing prefix length of the instruction. [This event is alias to DECODE.LCP]",
"EventCode": "0x87",
"EventName": "ILD_STALL.LCP",
- "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.",
+ "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk. [This event is alias to DECODE.LCP]",
"SampleAfterValue": "500009",
"UMask": "0x1"
},
@@ -556,7 +556,7 @@
"BriefDescription": "TMA slots wasted due to incorrect speculation by branch mispredictions",
"EventCode": "0xa4",
"EventName": "TOPDOWN.BR_MISPREDICT_SLOTS",
- "PublicDescription": "Number of TMA slots that were wasted due to incorrect speculation by branch mispredictions. This event estimates number of operations that were issued but not retired from the specualtive path as well as the out-of-order engine recovery past a branch misprediction.",
+ "PublicDescription": "Number of TMA slots that were wasted due to incorrect speculation by branch mispredictions. This event estimates number of operations that were issued but not retired from the speculative path as well as the out-of-order engine recovery past a branch misprediction.",
"SampleAfterValue": "10000003",
"UMask": "0x8"
},
diff --git a/tools/perf/pmu-events/arch/x86/icelakex/frontend.json b/tools/perf/pmu-events/arch/x86/icelakex/frontend.json
index 71498044f1cb..f6edc4222f42 100644
--- a/tools/perf/pmu-events/arch/x86/icelakex/frontend.json
+++ b/tools/perf/pmu-events/arch/x86/icelakex/frontend.json
@@ -8,6 +8,14 @@
"UMask": "0x1"
},
{
+ "BriefDescription": "Stalls caused by changing prefix length of the instruction. [This event is alias to ILD_STALL.LCP]",
+ "EventCode": "0x87",
+ "EventName": "DECODE.LCP",
+ "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk. [This event is alias to ILD_STALL.LCP]",
+ "SampleAfterValue": "500009",
+ "UMask": "0x1"
+ },
+ {
"BriefDescription": "Decode Stream Buffer (DSB)-to-MITE transitions count.",
"CounterMask": "1",
"EdgeDetect": "1",
@@ -213,10 +221,10 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss.",
+ "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss. [This event is alias to ICACHE_DATA.STALLS]",
"EventCode": "0x80",
"EventName": "ICACHE_16B.IFDATA_STALL",
- "PublicDescription": "Counts cycles where a code line fetch is stalled due to an L1 instruction cache miss. The legacy decode pipeline works at a 16 Byte granularity.",
+ "PublicDescription": "Counts cycles where a code line fetch is stalled due to an L1 instruction cache miss. The legacy decode pipeline works at a 16 Byte granularity. [This event is alias to ICACHE_DATA.STALLS]",
"SampleAfterValue": "500009",
"UMask": "0x4"
},
@@ -237,10 +245,26 @@
"UMask": "0x2"
},
{
- "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss.",
+ "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss. [This event is alias to ICACHE_TAG.STALLS]",
"EventCode": "0x83",
"EventName": "ICACHE_64B.IFTAG_STALL",
- "PublicDescription": "Counts cycles where a code fetch is stalled due to L1 instruction cache tag miss.",
+ "PublicDescription": "Counts cycles where a code fetch is stalled due to L1 instruction cache tag miss. [This event is alias to ICACHE_TAG.STALLS]",
+ "SampleAfterValue": "200003",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss. [This event is alias to ICACHE_16B.IFDATA_STALL]",
+ "EventCode": "0x80",
+ "EventName": "ICACHE_DATA.STALLS",
+ "PublicDescription": "Counts cycles where a code line fetch is stalled due to an L1 instruction cache miss. The legacy decode pipeline works at a 16 Byte granularity. [This event is alias to ICACHE_16B.IFDATA_STALL]",
+ "SampleAfterValue": "500009",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss. [This event is alias to ICACHE_64B.IFTAG_STALL]",
+ "EventCode": "0x83",
+ "EventName": "ICACHE_TAG.STALLS",
+ "PublicDescription": "Counts cycles where a code fetch is stalled due to L1 instruction cache tag miss. [This event is alias to ICACHE_64B.IFTAG_STALL]",
"SampleAfterValue": "200003",
"UMask": "0x4"
},
diff --git a/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json b/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json
index 1ef772b40e04..9bb7e3f20f7f 100644
--- a/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json
@@ -29,11 +29,244 @@
},
{
"BriefDescription": "Uncore frequency per die [GHZ]",
- "MetricExpr": "tma_info_socket_clks / #num_dies / duration_time / 1e9",
+ "MetricExpr": "tma_info_system_socket_clks / #num_dies / duration_time / 1e9",
"MetricGroup": "SoC",
"MetricName": "UNCORE_FREQ"
},
{
+ "BriefDescription": "Cycles per instruction retired; indicating how much time each executed instruction took; in units of cycles.",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD / INST_RETIRED.ANY",
+ "MetricName": "cpi",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "CPU operating frequency (in GHz)",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC * #SYSTEM_TSC_FREQ / 1e9",
+ "MetricName": "cpu_operating_frequency",
+ "ScaleUnit": "1GHz"
+ },
+ {
+ "BriefDescription": "Percentage of time spent in the active CPU power state C0",
+ "MetricExpr": "tma_info_system_cpu_utilization",
+ "MetricName": "cpu_utilization",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Ratio of number of completed page walks (for 2 megabyte page sizes) caused by demand data loads to the total number of completed instructions",
+ "MetricExpr": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M / INST_RETIRED.ANY",
+ "MetricName": "dtlb_2nd_level_2mb_large_page_load_mpi",
+ "PublicDescription": "Ratio of number of completed page walks (for 2 megabyte page sizes) caused by demand data loads to the total number of completed instructions. This implies it missed in the Data Translation Lookaside Buffer (DTLB) and further levels of TLB.",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of completed page walks (for all page sizes) caused by demand data loads to the total number of completed instructions",
+ "MetricExpr": "DTLB_LOAD_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
+ "MetricName": "dtlb_2nd_level_load_mpi",
+ "PublicDescription": "Ratio of number of completed page walks (for all page sizes) caused by demand data loads to the total number of completed instructions. This implies it missed in the DTLB and further levels of TLB.",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of completed page walks (for all page sizes) caused by demand data stores to the total number of completed instructions",
+ "MetricExpr": "DTLB_STORE_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
+ "MetricName": "dtlb_2nd_level_store_mpi",
+ "PublicDescription": "Ratio of number of completed page walks (for all page sizes) caused by demand data stores to the total number of completed instructions. This implies it missed in the DTLB and further levels of TLB.",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Bandwidth of IO writes that are initiated by end device controllers that are writing memory to the CPU.",
+ "MetricExpr": "(UNC_CHA_TOR_INSERTS.IO_HIT_ITOM + UNC_CHA_TOR_INSERTS.IO_MISS_ITOM + UNC_CHA_TOR_INSERTS.IO_HIT_ITOMCACHENEAR + UNC_CHA_TOR_INSERTS.IO_MISS_ITOMCACHENEAR) * 64 / 1e6 / duration_time",
+ "MetricName": "io_bandwidth_write",
+ "ScaleUnit": "1MB/s"
+ },
+ {
+ "BriefDescription": "Ratio of number of completed page walks (for 2 megabyte and 4 megabyte page sizes) caused by a code fetch to the total number of completed instructions",
+ "MetricExpr": "ITLB_MISSES.WALK_COMPLETED_2M_4M / INST_RETIRED.ANY",
+ "MetricName": "itlb_2nd_level_large_page_mpi",
+ "PublicDescription": "Ratio of number of completed page walks (for 2 megabyte and 4 megabyte page sizes) caused by a code fetch to the total number of completed instructions. This implies it missed in the Instruction Translation Lookaside Buffer (ITLB) and further levels of TLB.",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of completed page walks (for all page sizes) caused by a code fetch to the total number of completed instructions",
+ "MetricExpr": "ITLB_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
+ "MetricName": "itlb_2nd_level_mpi",
+ "PublicDescription": "Ratio of number of completed page walks (for all page sizes) caused by a code fetch to the total number of completed instructions. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB.",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of code read requests missing in L1 instruction cache (includes prefetches) to the total number of completed instructions",
+ "MetricExpr": "L2_RQSTS.ALL_CODE_RD / INST_RETIRED.ANY",
+ "MetricName": "l1_i_code_read_misses_with_prefetches_per_instr",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of demand load requests hitting in L1 data cache to the total number of completed instructions",
+ "MetricExpr": "MEM_LOAD_RETIRED.L1_HIT / INST_RETIRED.ANY",
+ "MetricName": "l1d_demand_data_read_hits_per_instr",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of requests missing L1 data cache (includes data+rfo w/ prefetches) to the total number of completed instructions",
+ "MetricExpr": "L1D.REPLACEMENT / INST_RETIRED.ANY",
+ "MetricName": "l1d_mpi",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of code read request missing L2 cache to the total number of completed instructions",
+ "MetricExpr": "L2_RQSTS.CODE_RD_MISS / INST_RETIRED.ANY",
+ "MetricName": "l2_demand_code_mpi",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of completed demand load requests hitting in L2 cache to the total number of completed instructions",
+ "MetricExpr": "MEM_LOAD_RETIRED.L2_HIT / INST_RETIRED.ANY",
+ "MetricName": "l2_demand_data_read_hits_per_instr",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of completed data read request missing L2 cache to the total number of completed instructions",
+ "MetricExpr": "MEM_LOAD_RETIRED.L2_MISS / INST_RETIRED.ANY",
+ "MetricName": "l2_demand_data_read_mpi",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of requests missing L2 cache (includes code+data+rfo w/ prefetches) to the total number of completed instructions",
+ "MetricExpr": "L2_LINES_IN.ALL / INST_RETIRED.ANY",
+ "MetricName": "l2_mpi",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of code read requests missing last level core cache (includes demand w/ prefetches) to the total number of completed instructions",
+ "MetricExpr": "(UNC_CHA_TOR_INSERTS.IA_MISS_CRD + UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF) / INST_RETIRED.ANY",
+ "MetricName": "llc_code_read_mpi_demand_plus_prefetch",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Average latency of a last level cache (LLC) demand data read miss (read memory access) in nano seconds",
+ "MetricExpr": "1e9 * (UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD / UNC_CHA_TOR_INSERTS.IA_MISS_DRD) / (UNC_CHA_CLOCKTICKS / (source_count(UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD) * #num_packages)) * duration_time",
+ "MetricName": "llc_demand_data_read_miss_latency",
+ "ScaleUnit": "1ns"
+ },
+ {
+ "BriefDescription": "Average latency of a last level cache (LLC) demand data read miss (read memory access) addressed to local memory in nano seconds",
+ "MetricExpr": "1e9 * (UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL / UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL) / (UNC_CHA_CLOCKTICKS / (source_count(UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL) * #num_packages)) * duration_time",
+ "MetricName": "llc_demand_data_read_miss_latency_for_local_requests",
+ "ScaleUnit": "1ns"
+ },
+ {
+ "BriefDescription": "Average latency of a last level cache (LLC) demand data read miss (read memory access) addressed to remote memory in nano seconds",
+ "MetricExpr": "1e9 * (UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_REMOTE / UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE) / (UNC_CHA_CLOCKTICKS / (source_count(UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_REMOTE) * #num_packages)) * duration_time",
+ "MetricName": "llc_demand_data_read_miss_latency_for_remote_requests",
+ "ScaleUnit": "1ns"
+ },
+ {
+ "BriefDescription": "Average latency of a last level cache (LLC) demand data read miss (read memory access) addressed to Intel(R) Optane(TM) Persistent Memory(PMEM) in nano seconds",
+ "MetricExpr": "1e9 * (UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_PMM / UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PMM) / (UNC_CHA_CLOCKTICKS / (source_count(UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_PMM) * #num_packages)) * duration_time",
+ "MetricName": "llc_demand_data_read_miss_to_pmem_latency",
+ "ScaleUnit": "1ns"
+ },
+ {
+ "BriefDescription": "Bandwidth (MB/sec) of read requests that miss the last level cache (LLC) and go to local memory.",
+ "MetricExpr": "UNC_CHA_REQUESTS.READS_LOCAL * 64 / 1e6 / duration_time",
+ "MetricName": "llc_miss_local_memory_bandwidth_read",
+ "ScaleUnit": "1MB/s"
+ },
+ {
+ "BriefDescription": "Bandwidth (MB/sec) of write requests that miss the last level cache (LLC) and go to local memory.",
+ "MetricExpr": "UNC_CHA_REQUESTS.WRITES_LOCAL * 64 / 1e6 / duration_time",
+ "MetricName": "llc_miss_local_memory_bandwidth_write",
+ "ScaleUnit": "1MB/s"
+ },
+ {
+ "BriefDescription": "Bandwidth (MB/sec) of read requests that miss the last level cache (LLC) and go to remote memory.",
+ "MetricExpr": "UNC_CHA_REQUESTS.READS_REMOTE * 64 / 1e6 / duration_time",
+ "MetricName": "llc_miss_remote_memory_bandwidth_read",
+ "ScaleUnit": "1MB/s"
+ },
+ {
+ "BriefDescription": "Bandwidth (MB/sec) of write requests that miss the last level cache (LLC) and go to remote memory.",
+ "MetricExpr": "UNC_CHA_REQUESTS.WRITES_REMOTE * 64 / 1e6 / duration_time",
+ "MetricName": "llc_miss_remote_memory_bandwidth_write",
+ "ScaleUnit": "1MB/s"
+ },
+ {
+ "BriefDescription": "The ratio of number of completed memory load instructions to the total number completed instructions",
+ "MetricExpr": "MEM_INST_RETIRED.ALL_LOADS / INST_RETIRED.ANY",
+ "MetricName": "loads_per_instr",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "DDR memory read bandwidth (MB/sec)",
+ "MetricExpr": "UNC_M_CAS_COUNT.RD * 64 / 1e6 / duration_time",
+ "MetricName": "memory_bandwidth_read",
+ "ScaleUnit": "1MB/s"
+ },
+ {
+ "BriefDescription": "DDR memory bandwidth (MB/sec)",
+ "MetricExpr": "(UNC_M_CAS_COUNT.RD + UNC_M_CAS_COUNT.WR) * 64 / 1e6 / duration_time",
+ "MetricName": "memory_bandwidth_total",
+ "ScaleUnit": "1MB/s"
+ },
+ {
+ "BriefDescription": "DDR memory write bandwidth (MB/sec)",
+ "MetricExpr": "UNC_M_CAS_COUNT.WR * 64 / 1e6 / duration_time",
+ "MetricName": "memory_bandwidth_write",
+ "ScaleUnit": "1MB/s"
+ },
+ {
+ "BriefDescription": "Memory write bandwidth (MB/sec) caused by directory updates; includes DDR and Intel(R) Optane(TM) Persistent Memory(PMEM).",
+ "MetricExpr": "(UNC_CHA_DIR_UPDATE.HA + UNC_CHA_DIR_UPDATE.TOR + UNC_M2M_DIRECTORY_UPDATE.ANY) * 64 / 1e6 / duration_time",
+ "MetricName": "memory_extra_write_bw_due_to_directory_updates",
+ "ScaleUnit": "1MB/s"
+ },
+ {
+ "BriefDescription": "Memory read that miss the last level cache (LLC) addressed to local DRAM as a percentage of total memory read accesses, does not include LLC prefetches.",
+ "MetricExpr": "(UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL + UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_LOCAL) / (UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL + UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_LOCAL + UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE + UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_REMOTE)",
+ "MetricName": "numa_reads_addressed_to_local_dram",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Memory reads that miss the last level cache (LLC) addressed to remote DRAM as a percentage of total memory read accesses, does not include LLC prefetches.",
+ "MetricExpr": "(UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE + UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_REMOTE) / (UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL + UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_LOCAL + UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE + UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_REMOTE)",
+ "MetricName": "numa_reads_addressed_to_remote_dram",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Uops delivered from decoded instruction cache (decoded stream buffer or DSB) as a percent of total uops delivered to Instruction Decode Queue",
+ "MetricExpr": "IDQ.DSB_UOPS / (IDQ.DSB_UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS + LSD.UOPS)",
+ "MetricName": "percent_uops_delivered_from_decoded_icache",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Uops delivered from legacy decode pipeline (Micro-instruction Translation Engine or MITE) as a percent of total uops delivered to Instruction Decode Queue",
+ "MetricExpr": "IDQ.MITE_UOPS / (IDQ.DSB_UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS + LSD.UOPS)",
+ "MetricName": "percent_uops_delivered_from_legacy_decode_pipeline",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Uops delivered from microcode sequencer (MS) as a percent of total uops delivered to Instruction Decode Queue",
+ "MetricExpr": "IDQ.MS_UOPS / (IDQ.DSB_UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS + LSD.UOPS)",
+ "MetricName": "percent_uops_delivered_from_microcode_sequencer",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Intel(R) Optane(TM) Persistent Memory(PMEM) memory read bandwidth (MB/sec)",
+ "MetricExpr": "UNC_M_PMM_RPQ_INSERTS * 64 / 1e6 / duration_time",
+ "MetricName": "pmem_memory_bandwidth_read",
+ "ScaleUnit": "1MB/s"
+ },
+ {
+ "BriefDescription": "Intel(R) Optane(TM) Persistent Memory(PMEM) memory bandwidth (MB/sec)",
+ "MetricExpr": "(UNC_M_PMM_RPQ_INSERTS + UNC_M_PMM_WPQ_INSERTS) * 64 / 1e6 / duration_time",
+ "MetricName": "pmem_memory_bandwidth_total",
+ "ScaleUnit": "1MB/s"
+ },
+ {
+ "BriefDescription": "Intel(R) Optane(TM) Persistent Memory(PMEM) memory write bandwidth (MB/sec)",
+ "MetricExpr": "UNC_M_PMM_WPQ_INSERTS * 64 / 1e6 / duration_time",
+ "MetricName": "pmem_memory_bandwidth_write",
+ "ScaleUnit": "1MB/s"
+ },
+ {
"BriefDescription": "Percentage of cycles spent in System Management Interrupts.",
"MetricExpr": "((msr@aperf@ - cycles) / msr@aperf@ if msr@smi@ > 0 else 0)",
"MetricGroup": "smi",
@@ -49,8 +282,14 @@
"ScaleUnit": "1SMI#"
},
{
+ "BriefDescription": "The ratio of number of completed memory store instructions to the total number completed instructions",
+ "MetricExpr": "MEM_INST_RETIRED.ALL_STORES / INST_RETIRED.ANY",
+ "MetricName": "stores_per_instr",
+ "ScaleUnit": "1per_instr"
+ },
+ {
"BriefDescription": "This metric estimates how often memory load accesses were aliased by preceding stores (in program order) with a 4K address offset",
- "MetricExpr": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS / tma_info_clks",
+ "MetricExpr": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_4k_aliasing",
"MetricThreshold": "tma_4k_aliasing > 0.2 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -59,7 +298,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution ports for ALU operations.",
- "MetricExpr": "(UOPS_DISPATCHED.PORT_0 + UOPS_DISPATCHED.PORT_1 + UOPS_DISPATCHED.PORT_5 + UOPS_DISPATCHED.PORT_6) / (4 * tma_info_core_clks)",
+ "MetricExpr": "(UOPS_DISPATCHED.PORT_0 + UOPS_DISPATCHED.PORT_1 + UOPS_DISPATCHED.PORT_5 + UOPS_DISPATCHED.PORT_6) / (4 * tma_info_core_core_clks)",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_alu_op_utilization",
"MetricThreshold": "tma_alu_op_utilization > 0.6",
@@ -67,7 +306,7 @@
},
{
"BriefDescription": "This metric estimates fraction of slots the CPU retired uops delivered by the Microcode_Sequencer as a result of Assists",
- "MetricExpr": "100 * ASSISTS.ANY / tma_info_slots",
+ "MetricExpr": "100 * ASSISTS.ANY / tma_info_thread_slots",
"MetricGroup": "TopdownL4;tma_L4_group;tma_microcode_sequencer_group",
"MetricName": "tma_assists",
"MetricThreshold": "tma_assists > 0.1 & (tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1)",
@@ -76,27 +315,29 @@
},
{
"BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
- "MetricExpr": "topdown\\-be\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 5 * cpu@INT_MISC.RECOVERY_CYCLES\\,cmask\\=1\\,edge@ / tma_info_slots",
- "MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
+ "DefaultMetricgroupName": "TopdownL1",
+ "MetricExpr": "topdown\\-be\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 5 * cpu@INT_MISC.RECOVERY_CYCLES\\,cmask\\=1\\,edge@ / tma_info_thread_slots",
+ "MetricGroup": "Default;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
"MetricThreshold": "tma_backend_bound > 0.2",
- "MetricgroupNoGroup": "TopdownL1",
+ "MetricgroupNoGroup": "TopdownL1;Default",
"PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. Sample with: TOPDOWN.BACKEND_BOUND_SLOTS",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations",
+ "DefaultMetricgroupName": "TopdownL1",
"MetricExpr": "max(1 - (tma_frontend_bound + tma_backend_bound + tma_retiring), 0)",
- "MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
+ "MetricGroup": "Default;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_bad_speculation",
"MetricThreshold": "tma_bad_speculation > 0.15",
- "MetricgroupNoGroup": "TopdownL1",
+ "MetricgroupNoGroup": "TopdownL1;Default",
"PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of slots where the CPU was retiring branch instructions.",
- "MetricExpr": "tma_light_operations * BR_INST_RETIRED.ALL_BRANCHES / (tma_retiring * tma_info_slots)",
+ "MetricExpr": "tma_light_operations * BR_INST_RETIRED.ALL_BRANCHES / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
"MetricName": "tma_branch_instructions",
"MetricThreshold": "tma_branch_instructions > 0.1 & tma_light_operations > 0.6",
@@ -109,12 +350,12 @@
"MetricName": "tma_branch_mispredicts",
"MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15",
"MetricgroupNoGroup": "TopdownL2",
- "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES. Related metrics: tma_info_branch_misprediction_cost, tma_info_mispredictions, tma_mispredicts_resteers",
+ "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES. Related metrics: tma_info_bad_spec_branch_misprediction_cost, tma_info_bottleneck_mispredictions, tma_mispredicts_resteers",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers",
- "MetricExpr": "INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_clks + tma_unknown_branches",
+ "MetricExpr": "INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_thread_clks + tma_unknown_branches",
"MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_branch_resteers",
"MetricThreshold": "tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -132,7 +373,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers as a result of Machine Clears",
- "MetricExpr": "(1 - BR_MISP_RETIRED.ALL_BRANCHES / (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT)) * INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_clks",
+ "MetricExpr": "(1 - BR_MISP_RETIRED.ALL_BRANCHES / (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT)) * INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_thread_clks",
"MetricGroup": "BadSpec;MachineClears;TopdownL4;tma_L4_group;tma_branch_resteers_group;tma_issueMC",
"MetricName": "tma_clears_resteers",
"MetricThreshold": "tma_clears_resteers > 0.05 & (tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15))",
@@ -142,7 +383,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to contested accesses",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(44 * tma_info_average_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM * (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM / (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM + OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) + 43.5 * tma_info_average_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_clks",
+ "MetricExpr": "(44 * tma_info_system_average_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM * (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM / (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM + OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) + 43.5 * tma_info_system_average_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
"MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
"MetricName": "tma_contested_accesses",
"MetricThreshold": "tma_contested_accesses > 0.05 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -162,7 +403,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to data-sharing accesses",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "43.5 * tma_info_average_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM * (1 - OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM / (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM + OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_clks",
+ "MetricExpr": "43.5 * tma_info_system_average_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM * (1 - OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM / (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM + OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
"MetricGroup": "Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
"MetricName": "tma_data_sharing",
"MetricThreshold": "tma_data_sharing > 0.05 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -171,16 +412,16 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where decoder-0 was the only active decoder",
- "MetricExpr": "(cpu@INST_DECODED.DECODERS\\,cmask\\=1@ - cpu@INST_DECODED.DECODERS\\,cmask\\=2@) / tma_info_core_clks / 2",
+ "MetricExpr": "(cpu@INST_DECODED.DECODERS\\,cmask\\=1@ - cpu@INST_DECODED.DECODERS\\,cmask\\=2@) / tma_info_core_core_clks / 2",
"MetricGroup": "DSBmiss;FetchBW;TopdownL4;tma_L4_group;tma_issueD0;tma_mite_group",
"MetricName": "tma_decoder0_alone",
- "MetricThreshold": "tma_decoder0_alone > 0.1 & (tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 5 > 0.35))",
+ "MetricThreshold": "tma_decoder0_alone > 0.1 & (tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 5 > 0.35))",
"PublicDescription": "This metric represents fraction of cycles where decoder-0 was the only active decoder. Related metrics: tma_few_uops_instructions",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of cycles where the Divider unit was active",
- "MetricExpr": "ARITH.DIVIDER_ACTIVE / tma_info_clks",
+ "MetricExpr": "ARITH.DIVIDER_ACTIVE / tma_info_thread_clks",
"MetricGroup": "TopdownL3;tma_L3_group;tma_core_bound_group",
"MetricName": "tma_divider",
"MetricThreshold": "tma_divider > 0.2 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
@@ -190,7 +431,7 @@
{
"BriefDescription": "This metric estimates how often the CPU was stalled on accesses to external memory (DRAM) by loads",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_clks - tma_l2_bound - tma_pmm_bound if #has_pmem > 0 else CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_clks - tma_l2_bound)",
+ "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks - tma_l2_bound - tma_pmm_bound if #has_pmem > 0 else CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks - tma_l2_bound)",
"MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_dram_bound",
"MetricThreshold": "tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -199,43 +440,43 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline",
- "MetricExpr": "(IDQ.DSB_CYCLES_ANY - IDQ.DSB_CYCLES_OK) / tma_info_core_clks / 2",
+ "MetricExpr": "(IDQ.DSB_CYCLES_ANY - IDQ.DSB_CYCLES_OK) / tma_info_core_core_clks / 2",
"MetricGroup": "DSB;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_dsb",
- "MetricThreshold": "tma_dsb > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 5 > 0.35)",
+ "MetricThreshold": "tma_dsb > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 5 > 0.35)",
"PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline. For example; inefficient utilization of the DSB cache structure or bank conflict when reading from it; are categorized here.",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines",
- "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / tma_info_clks",
+ "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / tma_info_thread_clks",
"MetricGroup": "DSBmiss;FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB",
"MetricName": "tma_dsb_switches",
"MetricThreshold": "tma_dsb_switches > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
- "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines. The DSB (decoded i-cache) is a Uop Cache where the front-end directly delivers Uops (micro operations) avoiding heavy x86 decoding. The DSB pipeline has shorter latency and delivered higher bandwidth than the MITE (legacy instruction decode pipeline). Switching between the two pipelines can cause penalties hence this metric measures the exposed penalty. Sample with: FRONTEND_RETIRED.DSB_MISS_PS. Related metrics: tma_fetch_bandwidth, tma_info_dsb_coverage, tma_info_dsb_misses, tma_info_iptb, tma_lcp",
+ "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines. The DSB (decoded i-cache) is a Uop Cache where the front-end directly delivers Uops (micro operations) avoiding heavy x86 decoding. The DSB pipeline has shorter latency and delivered higher bandwidth than the MITE (legacy instruction decode pipeline). Switching between the two pipelines can cause penalties hence this metric measures the exposed penalty. Sample with: FRONTEND_RETIRED.DSB_MISS_PS. Related metrics: tma_fetch_bandwidth, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses",
- "MetricExpr": "min(7 * cpu@DTLB_LOAD_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_LOAD_MISSES.WALK_ACTIVE, max(CYCLE_ACTIVITY.CYCLES_MEM_ANY - CYCLE_ACTIVITY.CYCLES_L1D_MISS, 0)) / tma_info_clks",
+ "MetricExpr": "min(7 * cpu@DTLB_LOAD_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_LOAD_MISSES.WALK_ACTIVE, max(CYCLE_ACTIVITY.CYCLES_MEM_ANY - CYCLE_ACTIVITY.CYCLES_L1D_MISS, 0)) / tma_info_thread_clks",
"MetricGroup": "MemoryTLB;TopdownL4;tma_L4_group;tma_issueTLB;tma_l1_bound_group",
"MetricName": "tma_dtlb_load",
"MetricThreshold": "tma_dtlb_load > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses. TLBs (Translation Look-aside Buffers) are processor caches for recently used entries out of the Page Tables that are used to map virtual- to physical-addresses by the operating system. This metric approximates the potential delay of demand loads missing the first-level data TLB (assuming worst case scenario with back to back misses to different pages). This includes hitting in the second-level TLB (STLB) as well as performing a hardware page walk on an STLB miss. Sample with: MEM_INST_RETIRED.STLB_MISS_LOADS_PS. Related metrics: tma_dtlb_store, tma_info_memory_data_tlbs",
+ "PublicDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses. TLBs (Translation Look-aside Buffers) are processor caches for recently used entries out of the Page Tables that are used to map virtual- to physical-addresses by the operating system. This metric approximates the potential delay of demand loads missing the first-level data TLB (assuming worst case scenario with back to back misses to different pages). This includes hitting in the second-level TLB (STLB) as well as performing a hardware page walk on an STLB miss. Sample with: MEM_INST_RETIRED.STLB_MISS_LOADS_PS. Related metrics: tma_dtlb_store, tma_info_bottleneck_memory_data_tlbs",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses",
- "MetricExpr": "(7 * cpu@DTLB_STORE_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_STORE_MISSES.WALK_ACTIVE) / tma_info_core_clks",
+ "MetricExpr": "(7 * cpu@DTLB_STORE_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_STORE_MISSES.WALK_ACTIVE) / tma_info_core_core_clks",
"MetricGroup": "MemoryTLB;TopdownL4;tma_L4_group;tma_issueTLB;tma_store_bound_group",
"MetricName": "tma_dtlb_store",
"MetricThreshold": "tma_dtlb_store > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses. As with ordinary data caching; focus on improving data locality and reducing working-set size to reduce DTLB overhead. Additionally; consider using profile-guided optimization (PGO) to collocate frequently-used data on the same page. Try using larger page sizes for large amounts of frequently-used data. Sample with: MEM_INST_RETIRED.STLB_MISS_STORES_PS. Related metrics: tma_dtlb_load, tma_info_memory_data_tlbs",
+ "PublicDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses. As with ordinary data caching; focus on improving data locality and reducing working-set size to reduce DTLB overhead. Additionally; consider using profile-guided optimization (PGO) to collocate frequently-used data on the same page. Try using larger page sizes for large amounts of frequently-used data. Sample with: MEM_INST_RETIRED.STLB_MISS_STORES_PS. Related metrics: tma_dtlb_load, tma_info_bottleneck_memory_data_tlbs",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric roughly estimates how often CPU was handling synchronizations due to False Sharing",
- "MetricExpr": "48 * tma_info_average_frequency * OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM / tma_info_clks",
+ "MetricExpr": "48 * tma_info_system_average_frequency * OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM / tma_info_thread_clks",
"MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_store_bound_group",
"MetricName": "tma_false_sharing",
"MetricThreshold": "tma_false_sharing > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -244,11 +485,11 @@
},
{
"BriefDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed",
- "MetricExpr": "L1D_PEND_MISS.FB_FULL / tma_info_clks",
+ "MetricExpr": "L1D_PEND_MISS.FB_FULL / tma_info_thread_clks",
"MetricGroup": "MemoryBW;TopdownL4;tma_L4_group;tma_issueBW;tma_issueSL;tma_issueSmSt;tma_l1_bound_group",
"MetricName": "tma_fb_full",
"MetricThreshold": "tma_fb_full > 0.3",
- "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_info_dram_bw_use, tma_info_memory_bandwidth, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
+ "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_info_bottleneck_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
"ScaleUnit": "100%"
},
{
@@ -256,14 +497,14 @@
"MetricExpr": "max(0, tma_frontend_bound - tma_fetch_latency)",
"MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB",
"MetricName": "tma_fetch_bandwidth",
- "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 5 > 0.35",
+ "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 5 > 0.35",
"MetricgroupNoGroup": "TopdownL2",
- "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Sample with: FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_2_PS. Related metrics: tma_dsb_switches, tma_info_dsb_coverage, tma_info_dsb_misses, tma_info_iptb, tma_lcp",
+ "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Sample with: FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_2_PS. Related metrics: tma_dsb_switches, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues",
- "MetricExpr": "(5 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE - INT_MISC.UOP_DROPPING) / tma_info_slots",
+ "MetricExpr": "(5 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE - INT_MISC.UOP_DROPPING) / tma_info_thread_slots",
"MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group",
"MetricName": "tma_fetch_latency",
"MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15",
@@ -282,6 +523,7 @@
},
{
"BriefDescription": "This metric represents overall arithmetic floating-point (FP) operations fraction the CPU has executed (retired)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "tma_x87_use + tma_fp_scalar + tma_fp_vector",
"MetricGroup": "HPC;TopdownL3;tma_L3_group;tma_light_operations_group",
"MetricName": "tma_fp_arith",
@@ -291,7 +533,7 @@
},
{
"BriefDescription": "This metric approximates arithmetic floating-point (FP) scalar uops fraction the CPU has retired",
- "MetricExpr": "cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ / (tma_retiring * tma_info_slots)",
+ "MetricExpr": "cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Compute;Flops;TopdownL4;tma_L4_group;tma_fp_arith_group;tma_issue2P",
"MetricName": "tma_fp_scalar",
"MetricThreshold": "tma_fp_scalar > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6)",
@@ -300,7 +542,7 @@
},
{
"BriefDescription": "This metric approximates arithmetic floating-point (FP) vector uops fraction the CPU has retired aggregated across all vector widths",
- "MetricExpr": "cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0xfc@ / (tma_retiring * tma_info_slots)",
+ "MetricExpr": "cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0xfc@ / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Compute;Flops;TopdownL4;tma_L4_group;tma_fp_arith_group;tma_issue2P",
"MetricName": "tma_fp_vector",
"MetricThreshold": "tma_fp_vector > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6)",
@@ -309,7 +551,7 @@
},
{
"BriefDescription": "This metric approximates arithmetic FP vector uops fraction the CPU has retired for 128-bit wide vectors",
- "MetricExpr": "(FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE) / (tma_retiring * tma_info_slots)",
+ "MetricExpr": "(FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE) / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Compute;Flops;TopdownL5;tma_L5_group;tma_fp_vector_group;tma_issue2P",
"MetricName": "tma_fp_vector_128b",
"MetricThreshold": "tma_fp_vector_128b > 0.1 & (tma_fp_vector > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6))",
@@ -318,7 +560,7 @@
},
{
"BriefDescription": "This metric approximates arithmetic FP vector uops fraction the CPU has retired for 256-bit wide vectors",
- "MetricExpr": "(FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE) / (tma_retiring * tma_info_slots)",
+ "MetricExpr": "(FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE) / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Compute;Flops;TopdownL5;tma_L5_group;tma_fp_vector_group;tma_issue2P",
"MetricName": "tma_fp_vector_256b",
"MetricThreshold": "tma_fp_vector_256b > 0.1 & (tma_fp_vector > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6))",
@@ -327,7 +569,7 @@
},
{
"BriefDescription": "This metric approximates arithmetic FP vector uops fraction the CPU has retired for 512-bit wide vectors",
- "MetricExpr": "(FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / (tma_retiring * tma_info_slots)",
+ "MetricExpr": "(FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Compute;Flops;TopdownL5;tma_L5_group;tma_fp_vector_group;tma_issue2P",
"MetricName": "tma_fp_vector_512b",
"MetricThreshold": "tma_fp_vector_512b > 0.1 & (tma_fp_vector > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6))",
@@ -336,11 +578,12 @@
},
{
"BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend",
- "MetricExpr": "topdown\\-fe\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) - INT_MISC.UOP_DROPPING / tma_info_slots",
- "MetricGroup": "PGO;TmaL1;TopdownL1;tma_L1_group",
+ "DefaultMetricgroupName": "TopdownL1",
+ "MetricExpr": "topdown\\-fe\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) - INT_MISC.UOP_DROPPING / tma_info_thread_slots",
+ "MetricGroup": "Default;PGO;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_frontend_bound",
"MetricThreshold": "tma_frontend_bound > 0.15",
- "MetricgroupNoGroup": "TopdownL1",
+ "MetricgroupNoGroup": "TopdownL1;Default",
"PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Pipeline_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. Sample with: FRONTEND_RETIRED.LATENCY_GE_4_PS",
"ScaleUnit": "100%"
},
@@ -356,7 +599,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to instruction cache misses",
- "MetricExpr": "ICACHE_16B.IFDATA_STALL / tma_info_clks",
+ "MetricExpr": "ICACHE_16B.IFDATA_STALL / tma_info_thread_clks",
"MetricGroup": "BigFoot;FetchLat;IcMiss;TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_icache_misses",
"MetricThreshold": "tma_icache_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -364,726 +607,741 @@
"ScaleUnit": "100%"
},
{
- "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
- "MetricExpr": "tma_info_turbo_utilization * TSC / 1e9 / duration_time",
- "MetricGroup": "Power;Summary",
- "MetricName": "tma_info_average_frequency"
+ "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_thread_slots / BR_MISP_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;BrMispredicts;tma_issueBM",
+ "MetricName": "tma_info_bad_spec_branch_misprediction_cost",
+ "PublicDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear). Related metrics: tma_branch_mispredicts, tma_info_bottleneck_mispredictions, tma_mispredicts_resteers"
+ },
+ {
+ "BriefDescription": "Instructions per retired mispredicts for conditional non-taken branches (lower number means higher occurrence rate).",
+ "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.COND_NTAKEN",
+ "MetricGroup": "Bad;BrMispredicts",
+ "MetricName": "tma_info_bad_spec_ipmisp_cond_ntaken",
+ "MetricThreshold": "tma_info_bad_spec_ipmisp_cond_ntaken < 200"
+ },
+ {
+ "BriefDescription": "Instructions per retired mispredicts for conditional taken branches (lower number means higher occurrence rate).",
+ "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.COND_TAKEN",
+ "MetricGroup": "Bad;BrMispredicts",
+ "MetricName": "tma_info_bad_spec_ipmisp_cond_taken",
+ "MetricThreshold": "tma_info_bad_spec_ipmisp_cond_taken < 200"
+ },
+ {
+ "BriefDescription": "Instructions per retired mispredicts for indirect CALL or JMP branches (lower number means higher occurrence rate).",
+ "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.INDIRECT",
+ "MetricGroup": "Bad;BrMispredicts",
+ "MetricName": "tma_info_bad_spec_ipmisp_indirect",
+ "MetricThreshold": "tma_info_bad_spec_ipmisp_indirect < 1e3"
+ },
+ {
+ "BriefDescription": "Instructions per retired mispredicts for return branches (lower number means higher occurrence rate).",
+ "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.RET",
+ "MetricGroup": "Bad;BrMispredicts",
+ "MetricName": "tma_info_bad_spec_ipmisp_ret",
+ "MetricThreshold": "tma_info_bad_spec_ipmisp_ret < 500"
+ },
+ {
+ "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear) (lower number means higher occurrence rate)",
+ "MetricExpr": "tma_info_core_ipmispredict",
+ "MetricGroup": "Bad;BadSpec;BrMispredicts",
+ "MetricName": "tma_info_bad_spec_ipmispredict",
+ "MetricThreshold": "tma_info_bad_spec_ipmispredict < 200"
+ },
+ {
+ "BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "(100 * (1 - tma_core_bound / tma_ports_utilization if tma_core_bound < tma_ports_utilization else 1) if tma_info_system_smt_2t_utilization > 0.5 else 0)",
+ "MetricGroup": "Cor;SMT",
+ "MetricName": "tma_info_botlnk_l0_core_bound_likely",
+ "MetricThreshold": "tma_info_botlnk_l0_core_bound_likely > 0.5"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_fetch_latency * tma_dsb_switches / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_mite / (tma_dsb + tma_mite))",
+ "MetricGroup": "DSBmiss;Fed;tma_issueFB",
+ "MetricName": "tma_info_botlnk_l2_dsb_misses",
+ "MetricThreshold": "tma_info_botlnk_l2_dsb_misses > 10",
+ "PublicDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck",
+ "MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
+ "MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL",
+ "MetricName": "tma_info_botlnk_l2_ic_misses",
+ "MetricThreshold": "tma_info_botlnk_l2_ic_misses > 5",
+ "PublicDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck. Related metrics: "
},
{
"BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)",
"MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * tma_fetch_latency * (tma_itlb_misses + tma_icache_misses + tma_unknown_branches) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)",
"MetricGroup": "BigFoot;Fed;Frontend;IcMiss;MemoryTLB;tma_issueBC",
- "MetricName": "tma_info_big_code",
- "MetricThreshold": "tma_info_big_code > 20",
- "PublicDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses). Related metrics: tma_info_branching_overhead"
+ "MetricName": "tma_info_bottleneck_big_code",
+ "MetricThreshold": "tma_info_bottleneck_big_code > 20",
+ "PublicDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses). Related metrics: tma_info_bottleneck_branching_overhead"
},
{
- "BriefDescription": "Branch instructions per taken branch.",
- "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
- "MetricGroup": "Branches;Fed;PGO",
- "MetricName": "tma_info_bptkbranch"
+ "BriefDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls)",
+ "MetricExpr": "100 * ((BR_INST_RETIRED.COND + 3 * BR_INST_RETIRED.NEAR_CALL + (BR_INST_RETIRED.NEAR_TAKEN - BR_INST_RETIRED.COND_TAKEN - 2 * BR_INST_RETIRED.NEAR_CALL)) / tma_info_thread_slots)",
+ "MetricGroup": "Ret;tma_issueBC",
+ "MetricName": "tma_info_bottleneck_branching_overhead",
+ "MetricThreshold": "tma_info_bottleneck_branching_overhead > 10",
+ "PublicDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls). Related metrics: tma_info_bottleneck_big_code"
},
{
- "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
- "MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_slots / BR_MISP_RETIRED.ALL_BRANCHES",
- "MetricGroup": "Bad;BrMispredicts;tma_issueBM",
- "MetricName": "tma_info_branch_misprediction_cost",
- "PublicDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear). Related metrics: tma_branch_mispredicts, tma_info_mispredictions, tma_mispredicts_resteers"
+ "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_frontend_bound - tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_info_bottleneck_big_code",
+ "MetricGroup": "Fed;FetchBW;Frontend",
+ "MetricName": "tma_info_bottleneck_instruction_fetch_bw",
+ "MetricThreshold": "tma_info_bottleneck_instruction_fetch_bw > 20"
},
{
- "BriefDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls)",
- "MetricExpr": "100 * ((BR_INST_RETIRED.COND + 3 * BR_INST_RETIRED.NEAR_CALL + (BR_INST_RETIRED.NEAR_TAKEN - BR_INST_RETIRED.COND_TAKEN - 2 * BR_INST_RETIRED.NEAR_CALL)) / tma_info_slots)",
- "MetricGroup": "Ret;tma_issueBC",
- "MetricName": "tma_info_branching_overhead",
- "MetricThreshold": "tma_info_branching_overhead > 10",
- "PublicDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls). Related metrics: tma_info_big_code"
+ "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
+ "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_fb_full / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))",
+ "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
+ "MetricName": "tma_info_bottleneck_memory_bandwidth",
+ "MetricThreshold": "tma_info_bottleneck_memory_bandwidth > 20",
+ "PublicDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full"
},
{
- "BriefDescription": "Fraction of branches that are CALL or RET",
- "MetricExpr": "(BR_INST_RETIRED.NEAR_CALL + BR_INST_RETIRED.NEAR_RETURN) / BR_INST_RETIRED.ALL_BRANCHES",
- "MetricGroup": "Bad;Branches",
- "MetricName": "tma_info_callret"
+ "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_dtlb_load / max(tma_l1_bound, tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
+ "MetricGroup": "Mem;MemoryTLB;Offcore;tma_issueTLB",
+ "MetricName": "tma_info_bottleneck_memory_data_tlbs",
+ "MetricThreshold": "tma_info_bottleneck_memory_data_tlbs > 20",
+ "PublicDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs). Related metrics: tma_dtlb_load, tma_dtlb_store"
},
{
- "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
- "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
- "MetricGroup": "Pipeline",
- "MetricName": "tma_info_clks"
+ "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound))",
+ "MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat",
+ "MetricName": "tma_info_bottleneck_memory_latency",
+ "MetricThreshold": "tma_info_bottleneck_memory_latency > 20",
+ "PublicDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches). Related metrics: tma_l3_hit_latency, tma_mem_latency"
},
{
- "BriefDescription": "STLB (2nd level TLB) code speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
- "MetricExpr": "1e3 * ITLB_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
- "MetricGroup": "Fed;MemoryTLB",
- "MetricName": "tma_info_code_stlb_mpki"
+ "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
+ "MetricGroup": "Bad;BadSpec;BrMispredicts;tma_issueBM",
+ "MetricName": "tma_info_bottleneck_mispredictions",
+ "MetricThreshold": "tma_info_bottleneck_mispredictions > 20",
+ "PublicDescription": "Total pipeline cost of Branch Misprediction related bottlenecks. Related metrics: tma_branch_mispredicts, tma_info_bad_spec_branch_misprediction_cost, tma_mispredicts_resteers"
+ },
+ {
+ "BriefDescription": "Fraction of branches that are CALL or RET",
+ "MetricExpr": "(BR_INST_RETIRED.NEAR_CALL + BR_INST_RETIRED.NEAR_RETURN) / BR_INST_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;Branches",
+ "MetricName": "tma_info_branches_callret"
},
{
"BriefDescription": "Fraction of branches that are non-taken conditionals",
"MetricExpr": "BR_INST_RETIRED.COND_NTAKEN / BR_INST_RETIRED.ALL_BRANCHES",
"MetricGroup": "Bad;Branches;CodeGen;PGO",
- "MetricName": "tma_info_cond_nt"
+ "MetricName": "tma_info_branches_cond_nt"
},
{
"BriefDescription": "Fraction of branches that are taken conditionals",
"MetricExpr": "BR_INST_RETIRED.COND_TAKEN / BR_INST_RETIRED.ALL_BRANCHES",
"MetricGroup": "Bad;Branches;CodeGen;PGO",
- "MetricName": "tma_info_cond_tk"
+ "MetricName": "tma_info_branches_cond_tk"
},
{
- "BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts",
- "MetricExpr": "(100 * (1 - tma_core_bound / tma_ports_utilization if tma_core_bound < tma_ports_utilization else 1) if tma_info_smt_2t_utilization > 0.5 else 0)",
- "MetricGroup": "Cor;SMT",
- "MetricName": "tma_info_core_bound_likely",
- "MetricThreshold": "tma_info_core_bound_likely > 0.5"
+ "BriefDescription": "Fraction of branches that are unconditional (direct or indirect) jumps",
+ "MetricExpr": "(BR_INST_RETIRED.NEAR_TAKEN - BR_INST_RETIRED.COND_TAKEN - 2 * BR_INST_RETIRED.NEAR_CALL) / BR_INST_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;Branches",
+ "MetricName": "tma_info_branches_jump"
+ },
+ {
+ "BriefDescription": "Fraction of branches of other types (not individually covered by other metrics in Info.Branches group)",
+ "MetricExpr": "1 - (tma_info_branches_cond_nt + tma_info_branches_cond_tk + tma_info_branches_callret + tma_info_branches_jump)",
+ "MetricGroup": "Bad;Branches",
+ "MetricName": "tma_info_branches_other_branches"
},
{
"BriefDescription": "Core actual clocks when any Logical Processor is active on the Physical Core",
"MetricExpr": "CPU_CLK_UNHALTED.DISTRIBUTED",
"MetricGroup": "SMT",
- "MetricName": "tma_info_core_clks"
+ "MetricName": "tma_info_core_core_clks"
},
{
"BriefDescription": "Instructions Per Cycle across hyper-threads (per physical core)",
- "MetricExpr": "INST_RETIRED.ANY / tma_info_core_clks",
+ "MetricExpr": "INST_RETIRED.ANY / tma_info_core_core_clks",
"MetricGroup": "Ret;SMT;TmaL1;tma_L1_group",
- "MetricName": "tma_info_coreipc"
+ "MetricName": "tma_info_core_coreipc"
},
{
- "BriefDescription": "Cycles Per Instruction (per Logical Processor)",
- "MetricExpr": "1 / tma_info_ipc",
- "MetricGroup": "Mem;Pipeline",
- "MetricName": "tma_info_cpi"
+ "BriefDescription": "Floating Point Operations Per Cycle",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * cpu@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE\\,umask\\=0x18@ + 8 * cpu@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE\\,umask\\=0x60@ + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / tma_info_core_core_clks",
+ "MetricGroup": "Flops;Ret",
+ "MetricName": "tma_info_core_flopc"
},
{
- "BriefDescription": "Average CPU Utilization",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
- "MetricGroup": "HPC;Summary",
- "MetricName": "tma_info_cpu_utilization"
+ "BriefDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0xfc@) / (2 * tma_info_core_core_clks)",
+ "MetricGroup": "Cor;Flops;HPC",
+ "MetricName": "tma_info_core_fp_arith_utilization",
+ "PublicDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width). Values > 1 are possible due to ([BDW+] Fused-Multiply Add (FMA) counting - common; [ADL+] use all of ADD/MUL/FMA in Scalar or 128/256-bit vectors - less common)."
},
{
- "BriefDescription": "Average Parallel L2 cache miss data reads",
- "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
- "MetricGroup": "Memory_BW;Offcore",
- "MetricName": "tma_info_data_l2_mlp"
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / (UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)",
+ "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
+ "MetricName": "tma_info_core_ilp"
},
{
- "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
- "MetricExpr": "64 * (UNC_M_CAS_COUNT.RD + UNC_M_CAS_COUNT.WR) / 1e9 / duration_time",
- "MetricGroup": "HPC;Mem;MemoryBW;SoC;tma_issueBW",
- "MetricName": "tma_info_dram_bw_use",
- "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_fb_full, tma_info_memory_bandwidth, tma_mem_bandwidth, tma_sq_full"
+ "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear)",
+ "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;BadSpec;BrMispredicts;TopdownL1;tma_L1_group",
+ "MetricName": "tma_info_core_ipmispredict",
+ "MetricgroupNoGroup": "TopdownL1"
},
{
"BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
"MetricExpr": "IDQ.DSB_UOPS / UOPS_ISSUED.ANY",
"MetricGroup": "DSB;Fed;FetchBW;tma_issueFB",
- "MetricName": "tma_info_dsb_coverage",
- "MetricThreshold": "tma_info_dsb_coverage < 0.7 & tma_info_ipc / 5 > 0.35",
- "PublicDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache). Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_dsb_misses, tma_info_iptb, tma_lcp"
- },
- {
- "BriefDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck",
- "MetricExpr": "100 * (tma_fetch_latency * tma_dsb_switches / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_mite / (tma_dsb + tma_mite))",
- "MetricGroup": "DSBmiss;Fed;tma_issueFB",
- "MetricName": "tma_info_dsb_misses",
- "MetricThreshold": "tma_info_dsb_misses > 10",
- "PublicDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_dsb_coverage, tma_info_iptb, tma_lcp"
+ "MetricName": "tma_info_frontend_dsb_coverage",
+ "MetricThreshold": "tma_info_frontend_dsb_coverage < 0.7 & tma_info_thread_ipc / 5 > 0.35",
+ "PublicDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache). Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_botlnk_l2_dsb_misses, tma_info_inst_mix_iptb, tma_lcp"
},
{
"BriefDescription": "Average number of cycles of a switch from the DSB fetch-unit to MITE fetch unit - see DSB_Switches tree node for details.",
"MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / cpu@DSB2MITE_SWITCHES.PENALTY_CYCLES\\,cmask\\=1\\,edge@",
"MetricGroup": "DSBmiss",
- "MetricName": "tma_info_dsb_switch_cost"
- },
- {
- "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-thread",
- "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
- "MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
- "MetricName": "tma_info_execute"
- },
- {
- "BriefDescription": "The ratio of Executed- by Issued-Uops",
- "MetricExpr": "UOPS_EXECUTED.THREAD / UOPS_ISSUED.ANY",
- "MetricGroup": "Cor;Pipeline",
- "MetricName": "tma_info_execute_per_issue",
- "PublicDescription": "The ratio of Executed- by Issued-Uops. Ratio > 1 suggests high rate of uop micro-fusions. Ratio < 1 suggest high rate of \"execute\" at rename stage."
- },
- {
- "BriefDescription": "Fill Buffer (FB) hits per kilo instructions for retired demand loads (L1D misses that merge into ongoing miss-handling entries)",
- "MetricExpr": "1e3 * MEM_LOAD_RETIRED.FB_HIT / INST_RETIRED.ANY",
- "MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_fb_hpki"
+ "MetricName": "tma_info_frontend_dsb_switch_cost"
},
{
"BriefDescription": "Average number of Uops issued by front-end when it issued something",
"MetricExpr": "UOPS_ISSUED.ANY / cpu@UOPS_ISSUED.ANY\\,cmask\\=1@",
"MetricGroup": "Fed;FetchBW",
- "MetricName": "tma_info_fetch_upc"
- },
- {
- "BriefDescription": "Floating Point Operations Per Cycle",
- "MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * cpu@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE\\,umask\\=0x18@ + 8 * cpu@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE\\,umask\\=0x60@ + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / tma_info_core_clks",
- "MetricGroup": "Flops;Ret",
- "MetricName": "tma_info_flopc"
+ "MetricName": "tma_info_frontend_fetch_upc"
},
{
- "BriefDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width)",
- "MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0xfc@) / (2 * tma_info_core_clks)",
- "MetricGroup": "Cor;Flops;HPC",
- "MetricName": "tma_info_fp_arith_utilization",
- "PublicDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width). Values > 1 are possible due to ([BDW+] Fused-Multiply Add (FMA) counting - common; [ADL+] use all of ADD/MUL/FMA in Scalar or 128/256-bit vectors - less common)."
+ "BriefDescription": "Average Latency for L1 instruction cache misses",
+ "MetricExpr": "ICACHE_16B.IFDATA_STALL / cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=1\\,edge@",
+ "MetricGroup": "Fed;FetchLat;IcMiss",
+ "MetricName": "tma_info_frontend_icache_miss_latency"
},
{
- "BriefDescription": "Giga Floating Point Operations Per Second",
- "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * cpu@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE\\,umask\\=0x18@ + 8 * cpu@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE\\,umask\\=0x60@ + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / 1e9 / duration_time",
- "MetricGroup": "Cor;Flops;HPC",
- "MetricName": "tma_info_gflops",
- "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width and AMX engine."
+ "BriefDescription": "Instructions per non-speculative DSB miss (lower number means higher occurrence rate)",
+ "MetricExpr": "INST_RETIRED.ANY / FRONTEND_RETIRED.ANY_DSB_MISS",
+ "MetricGroup": "DSBmiss;Fed",
+ "MetricName": "tma_info_frontend_ipdsb_miss_ret",
+ "MetricThreshold": "tma_info_frontend_ipdsb_miss_ret < 50"
},
{
- "BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck",
- "MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
- "MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL",
- "MetricName": "tma_info_ic_misses",
- "MetricThreshold": "tma_info_ic_misses > 5",
- "PublicDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck. Related metrics: "
+ "BriefDescription": "Instructions per speculative Unknown Branch Misprediction (BAClear) (lower number means higher occurrence rate)",
+ "MetricExpr": "tma_info_inst_mix_instructions / BACLEARS.ANY",
+ "MetricGroup": "Fed",
+ "MetricName": "tma_info_frontend_ipunknown_branch"
},
{
- "BriefDescription": "Average Latency for L1 instruction cache misses",
- "MetricExpr": "ICACHE_16B.IFDATA_STALL / cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=1\\,edge@",
- "MetricGroup": "Fed;FetchLat;IcMiss",
- "MetricName": "tma_info_icache_miss_latency"
+ "BriefDescription": "L2 cache true code cacheline misses per kilo instruction",
+ "MetricExpr": "1e3 * FRONTEND_RETIRED.L2_MISS / INST_RETIRED.ANY",
+ "MetricGroup": "IcMiss",
+ "MetricName": "tma_info_frontend_l2mpki_code"
},
{
- "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
- "MetricExpr": "UOPS_EXECUTED.THREAD / (UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)",
- "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
- "MetricName": "tma_info_ilp"
+ "BriefDescription": "L2 cache speculative code cacheline misses per kilo instruction",
+ "MetricExpr": "1e3 * L2_RQSTS.CODE_RD_MISS / INST_RETIRED.ANY",
+ "MetricGroup": "IcMiss",
+ "MetricName": "tma_info_frontend_l2mpki_code_all"
},
{
- "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks",
- "MetricExpr": "100 * (tma_frontend_bound - tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_info_big_code",
- "MetricGroup": "Fed;FetchBW;Frontend",
- "MetricName": "tma_info_instruction_fetch_bw",
- "MetricThreshold": "tma_info_instruction_fetch_bw > 20"
+ "BriefDescription": "Branch instructions per taken branch.",
+ "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
+ "MetricGroup": "Branches;Fed;PGO",
+ "MetricName": "tma_info_inst_mix_bptkbranch"
},
{
"BriefDescription": "Total number of retired Instructions",
"MetricExpr": "INST_RETIRED.ANY",
"MetricGroup": "Summary;TmaL1;tma_L1_group",
- "MetricName": "tma_info_instructions",
+ "MetricName": "tma_info_inst_mix_instructions",
"PublicDescription": "Total number of retired Instructions. Sample with: INST_RETIRED.PREC_DIST"
},
{
- "BriefDescription": "Average IO (network or disk) Bandwidth Use for Reads [GB / sec]",
- "MetricExpr": "(UNC_CHA_TOR_INSERTS.IO_HIT_ITOM + UNC_CHA_TOR_INSERTS.IO_MISS_ITOM + UNC_CHA_TOR_INSERTS.IO_HIT_ITOMCACHENEAR + UNC_CHA_TOR_INSERTS.IO_MISS_ITOMCACHENEAR) * 64 / 1e9 / duration_time",
- "MetricGroup": "IoBW;Mem;Server;SoC",
- "MetricName": "tma_info_io_read_bw"
- },
- {
- "BriefDescription": "Average IO (network or disk) Bandwidth Use for Writes [GB / sec]",
- "MetricExpr": "UNC_CHA_TOR_INSERTS.IO_PCIRDCUR * 64 / 1e9 / duration_time",
- "MetricGroup": "IoBW;Mem;Server;SoC",
- "MetricName": "tma_info_io_write_bw"
- },
- {
"BriefDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / (cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0xfc@)",
"MetricGroup": "Flops;InsType",
- "MetricName": "tma_info_iparith",
- "MetricThreshold": "tma_info_iparith < 10",
+ "MetricName": "tma_info_inst_mix_iparith",
+ "MetricThreshold": "tma_info_inst_mix_iparith < 10",
"PublicDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate). May undercount due to FMA double counting. Approximated prior to BDW."
},
{
"BriefDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE)",
"MetricGroup": "Flops;FpVector;InsType",
- "MetricName": "tma_info_iparith_avx128",
- "MetricThreshold": "tma_info_iparith_avx128 < 10",
+ "MetricName": "tma_info_inst_mix_iparith_avx128",
+ "MetricThreshold": "tma_info_inst_mix_iparith_avx128 < 10",
"PublicDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
},
{
"BriefDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE)",
"MetricGroup": "Flops;FpVector;InsType",
- "MetricName": "tma_info_iparith_avx256",
- "MetricThreshold": "tma_info_iparith_avx256 < 10",
+ "MetricName": "tma_info_inst_mix_iparith_avx256",
+ "MetricThreshold": "tma_info_inst_mix_iparith_avx256 < 10",
"PublicDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
},
{
"BriefDescription": "Instructions per FP Arithmetic AVX 512-bit instruction (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE)",
"MetricGroup": "Flops;FpVector;InsType",
- "MetricName": "tma_info_iparith_avx512",
- "MetricThreshold": "tma_info_iparith_avx512 < 10",
+ "MetricName": "tma_info_inst_mix_iparith_avx512",
+ "MetricThreshold": "tma_info_inst_mix_iparith_avx512 < 10",
"PublicDescription": "Instructions per FP Arithmetic AVX 512-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
},
{
"BriefDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
"MetricGroup": "Flops;FpScalar;InsType",
- "MetricName": "tma_info_iparith_scalar_dp",
- "MetricThreshold": "tma_info_iparith_scalar_dp < 10",
+ "MetricName": "tma_info_inst_mix_iparith_scalar_dp",
+ "MetricThreshold": "tma_info_inst_mix_iparith_scalar_dp < 10",
"PublicDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
},
{
"BriefDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
"MetricGroup": "Flops;FpScalar;InsType",
- "MetricName": "tma_info_iparith_scalar_sp",
- "MetricThreshold": "tma_info_iparith_scalar_sp < 10",
+ "MetricName": "tma_info_inst_mix_iparith_scalar_sp",
+ "MetricThreshold": "tma_info_inst_mix_iparith_scalar_sp < 10",
"PublicDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
},
{
"BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
"MetricGroup": "Branches;Fed;InsType",
- "MetricName": "tma_info_ipbranch",
- "MetricThreshold": "tma_info_ipbranch < 8"
- },
- {
- "BriefDescription": "Instructions Per Cycle (per Logical Processor)",
- "MetricExpr": "INST_RETIRED.ANY / tma_info_clks",
- "MetricGroup": "Ret;Summary",
- "MetricName": "tma_info_ipc"
+ "MetricName": "tma_info_inst_mix_ipbranch",
+ "MetricThreshold": "tma_info_inst_mix_ipbranch < 8"
},
{
"BriefDescription": "Instructions per (near) call (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL",
"MetricGroup": "Branches;Fed;PGO",
- "MetricName": "tma_info_ipcall",
- "MetricThreshold": "tma_info_ipcall < 200"
- },
- {
- "BriefDescription": "Instructions per non-speculative DSB miss (lower number means higher occurrence rate)",
- "MetricExpr": "INST_RETIRED.ANY / FRONTEND_RETIRED.ANY_DSB_MISS",
- "MetricGroup": "DSBmiss;Fed",
- "MetricName": "tma_info_ipdsb_miss_ret",
- "MetricThreshold": "tma_info_ipdsb_miss_ret < 50"
- },
- {
- "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
- "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u",
- "MetricGroup": "Branches;OS",
- "MetricName": "tma_info_ipfarbranch",
- "MetricThreshold": "tma_info_ipfarbranch < 1e6"
+ "MetricName": "tma_info_inst_mix_ipcall",
+ "MetricThreshold": "tma_info_inst_mix_ipcall < 200"
},
{
"BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / (cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * cpu@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE\\,umask\\=0x18@ + 8 * cpu@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE\\,umask\\=0x60@ + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE)",
"MetricGroup": "Flops;InsType",
- "MetricName": "tma_info_ipflop",
- "MetricThreshold": "tma_info_ipflop < 10"
+ "MetricName": "tma_info_inst_mix_ipflop",
+ "MetricThreshold": "tma_info_inst_mix_ipflop < 10"
},
{
"BriefDescription": "Instructions per Load (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_LOADS",
"MetricGroup": "InsType",
- "MetricName": "tma_info_ipload",
- "MetricThreshold": "tma_info_ipload < 3"
- },
- {
- "BriefDescription": "Instructions per retired mispredicts for conditional non-taken branches (lower number means higher occurrence rate).",
- "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.COND_NTAKEN",
- "MetricGroup": "Bad;BrMispredicts",
- "MetricName": "tma_info_ipmisp_cond_ntaken",
- "MetricThreshold": "tma_info_ipmisp_cond_ntaken < 200"
- },
- {
- "BriefDescription": "Instructions per retired mispredicts for conditional taken branches (lower number means higher occurrence rate).",
- "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.COND_TAKEN",
- "MetricGroup": "Bad;BrMispredicts",
- "MetricName": "tma_info_ipmisp_cond_taken",
- "MetricThreshold": "tma_info_ipmisp_cond_taken < 200"
- },
- {
- "BriefDescription": "Instructions per retired mispredicts for indirect CALL or JMP branches (lower number means higher occurrence rate).",
- "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.INDIRECT",
- "MetricGroup": "Bad;BrMispredicts",
- "MetricName": "tma_info_ipmisp_indirect",
- "MetricThreshold": "tma_info_ipmisp_indirect < 1e3"
- },
- {
- "BriefDescription": "Instructions per retired mispredicts for return branches (lower number means higher occurrence rate).",
- "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.RET",
- "MetricGroup": "Bad;BrMispredicts",
- "MetricName": "tma_info_ipmisp_ret",
- "MetricThreshold": "tma_info_ipmisp_ret < 500"
- },
- {
- "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear) (lower number means higher occurrence rate)",
- "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
- "MetricGroup": "Bad;BadSpec;BrMispredicts",
- "MetricName": "tma_info_ipmispredict",
- "MetricThreshold": "tma_info_ipmispredict < 200"
+ "MetricName": "tma_info_inst_mix_ipload",
+ "MetricThreshold": "tma_info_inst_mix_ipload < 3"
},
{
"BriefDescription": "Instructions per Store (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_STORES",
"MetricGroup": "InsType",
- "MetricName": "tma_info_ipstore",
- "MetricThreshold": "tma_info_ipstore < 8"
+ "MetricName": "tma_info_inst_mix_ipstore",
+ "MetricThreshold": "tma_info_inst_mix_ipstore < 8"
},
{
"BriefDescription": "Instructions per Software prefetch instruction (of any type: NTA/T0/T1/T2/Prefetch) (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / cpu@SW_PREFETCH_ACCESS.T0\\,umask\\=0xF@",
"MetricGroup": "Prefetches",
- "MetricName": "tma_info_ipswpf",
- "MetricThreshold": "tma_info_ipswpf < 100"
+ "MetricName": "tma_info_inst_mix_ipswpf",
+ "MetricThreshold": "tma_info_inst_mix_ipswpf < 100"
},
{
"BriefDescription": "Instruction per taken branch",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN",
"MetricGroup": "Branches;Fed;FetchBW;Frontend;PGO;tma_issueFB",
- "MetricName": "tma_info_iptb",
- "MetricThreshold": "tma_info_iptb < 11",
- "PublicDescription": "Instruction per taken branch. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_dsb_coverage, tma_info_dsb_misses, tma_lcp"
+ "MetricName": "tma_info_inst_mix_iptb",
+ "MetricThreshold": "tma_info_inst_mix_iptb < 11",
+ "PublicDescription": "Instruction per taken branch. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_lcp"
},
{
- "BriefDescription": "Instructions per speculative Unknown Branch Misprediction (BAClear) (lower number means higher occurrence rate)",
- "MetricExpr": "tma_info_instructions / BACLEARS.ANY",
- "MetricGroup": "Fed",
- "MetricName": "tma_info_ipunknown_branch"
+ "BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
+ "MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / duration_time",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_core_l1d_cache_fill_bw"
},
{
- "BriefDescription": "Fraction of branches that are unconditional (direct or indirect) jumps",
- "MetricExpr": "(BR_INST_RETIRED.NEAR_TAKEN - BR_INST_RETIRED.COND_TAKEN - 2 * BR_INST_RETIRED.NEAR_CALL) / BR_INST_RETIRED.ALL_BRANCHES",
- "MetricGroup": "Bad;Branches",
- "MetricName": "tma_info_jump"
+ "BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
+ "MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / duration_time",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_core_l2_cache_fill_bw"
},
{
- "BriefDescription": "Cycles Per Instruction for the Operating System (OS) Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / INST_RETIRED.ANY_P:k",
- "MetricGroup": "OS",
- "MetricName": "tma_info_kernel_cpi"
+ "BriefDescription": "Rate of non silent evictions from the L2 cache per Kilo instruction",
+ "MetricExpr": "1e3 * L2_LINES_OUT.NON_SILENT / tma_info_inst_mix_instructions",
+ "MetricGroup": "L2Evicts;Mem;Server",
+ "MetricName": "tma_info_memory_core_l2_evictions_nonsilent_pki"
},
{
- "BriefDescription": "Fraction of cycles spent in the Operating System (OS) Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / CPU_CLK_UNHALTED.THREAD",
- "MetricGroup": "OS",
- "MetricName": "tma_info_kernel_utilization",
- "MetricThreshold": "tma_info_kernel_utilization > 0.05"
+ "BriefDescription": "Rate of silent evictions from the L2 cache per Kilo instruction where the evicted lines are dropped (no writeback to L3 or memory)",
+ "MetricExpr": "1e3 * L2_LINES_OUT.SILENT / tma_info_inst_mix_instructions",
+ "MetricGroup": "L2Evicts;Mem;Server",
+ "MetricName": "tma_info_memory_core_l2_evictions_silent_pki"
},
{
- "BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
- "MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / duration_time",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l1d_cache_fill_bw"
+ "BriefDescription": "Average per-core data access bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1e9 / duration_time",
+ "MetricGroup": "Mem;MemoryBW;Offcore",
+ "MetricName": "tma_info_memory_core_l3_cache_access_bw"
},
{
- "BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
- "MetricExpr": "tma_info_l1d_cache_fill_bw",
+ "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
"MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l1d_cache_fill_bw_1t"
+ "MetricName": "tma_info_memory_core_l3_cache_fill_bw"
+ },
+ {
+ "BriefDescription": "Fill Buffer (FB) hits per kilo instructions for retired demand loads (L1D misses that merge into ongoing miss-handling entries)",
+ "MetricExpr": "1e3 * MEM_LOAD_RETIRED.FB_HIT / INST_RETIRED.ANY",
+ "MetricGroup": "CacheMisses;Mem",
+ "MetricName": "tma_info_memory_fb_hpki"
},
{
"BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
"MetricExpr": "1e3 * MEM_LOAD_RETIRED.L1_MISS / INST_RETIRED.ANY",
"MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l1mpki"
+ "MetricName": "tma_info_memory_l1mpki"
},
{
"BriefDescription": "L1 cache true misses per kilo instruction for all demand loads (including speculative)",
"MetricExpr": "1e3 * L2_RQSTS.ALL_DEMAND_DATA_RD / INST_RETIRED.ANY",
"MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l1mpki_load"
- },
- {
- "BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
- "MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / duration_time",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l2_cache_fill_bw"
- },
- {
- "BriefDescription": "Average per-thread data fill bandwidth to the L2 cache [GB / sec]",
- "MetricExpr": "tma_info_l2_cache_fill_bw",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l2_cache_fill_bw_1t"
- },
- {
- "BriefDescription": "Rate of non silent evictions from the L2 cache per Kilo instruction",
- "MetricExpr": "1e3 * L2_LINES_OUT.NON_SILENT / tma_info_instructions",
- "MetricGroup": "L2Evicts;Mem;Server",
- "MetricName": "tma_info_l2_evictions_nonsilent_pki"
- },
- {
- "BriefDescription": "Rate of silent evictions from the L2 cache per Kilo instruction where the evicted lines are dropped (no writeback to L3 or memory)",
- "MetricExpr": "1e3 * L2_LINES_OUT.SILENT / tma_info_instructions",
- "MetricGroup": "L2Evicts;Mem;Server",
- "MetricName": "tma_info_l2_evictions_silent_pki"
+ "MetricName": "tma_info_memory_l1mpki_load"
},
{
"BriefDescription": "L2 cache hits per kilo instruction for all demand loads (including speculative)",
"MetricExpr": "1e3 * L2_RQSTS.DEMAND_DATA_RD_HIT / INST_RETIRED.ANY",
"MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l2hpki_load"
+ "MetricName": "tma_info_memory_l2hpki_load"
},
{
"BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads",
"MetricExpr": "1e3 * MEM_LOAD_RETIRED.L2_MISS / INST_RETIRED.ANY",
"MetricGroup": "Backend;CacheMisses;Mem",
- "MetricName": "tma_info_l2mpki"
+ "MetricName": "tma_info_memory_l2mpki"
},
{
"BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all request types (including speculative)",
- "MetricExpr": "1e3 * (OFFCORE_REQUESTS.ALL_DATA_RD - OFFCORE_REQUESTS.DEMAND_DATA_RD + L2_RQSTS.ALL_DEMAND_MISS + L2_RQSTS.SWPF_MISS) / tma_info_instructions",
+ "MetricExpr": "1e3 * (OFFCORE_REQUESTS.ALL_DATA_RD - OFFCORE_REQUESTS.DEMAND_DATA_RD + L2_RQSTS.ALL_DEMAND_MISS + L2_RQSTS.SWPF_MISS) / tma_info_inst_mix_instructions",
"MetricGroup": "CacheMisses;Mem;Offcore",
- "MetricName": "tma_info_l2mpki_all"
- },
- {
- "BriefDescription": "L2 cache true code cacheline misses per kilo instruction",
- "MetricExpr": "1e3 * FRONTEND_RETIRED.L2_MISS / INST_RETIRED.ANY",
- "MetricGroup": "IcMiss",
- "MetricName": "tma_info_l2mpki_code"
- },
- {
- "BriefDescription": "L2 cache speculative code cacheline misses per kilo instruction",
- "MetricExpr": "1e3 * L2_RQSTS.CODE_RD_MISS / INST_RETIRED.ANY",
- "MetricGroup": "IcMiss",
- "MetricName": "tma_info_l2mpki_code_all"
+ "MetricName": "tma_info_memory_l2mpki_all"
},
{
"BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all demand loads (including speculative)",
"MetricExpr": "1e3 * L2_RQSTS.DEMAND_DATA_RD_MISS / INST_RETIRED.ANY",
"MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l2mpki_load"
- },
- {
- "BriefDescription": "Average per-core data access bandwidth to the L3 cache [GB / sec]",
- "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1e9 / duration_time",
- "MetricGroup": "Mem;MemoryBW;Offcore",
- "MetricName": "tma_info_l3_cache_access_bw"
+ "MetricName": "tma_info_memory_l2mpki_load"
},
{
- "BriefDescription": "Average per-thread data access bandwidth to the L3 cache [GB / sec]",
- "MetricExpr": "tma_info_l3_cache_access_bw",
- "MetricGroup": "Mem;MemoryBW;Offcore",
- "MetricName": "tma_info_l3_cache_access_bw_1t"
+ "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
+ "MetricExpr": "1e3 * MEM_LOAD_RETIRED.L3_MISS / INST_RETIRED.ANY",
+ "MetricGroup": "CacheMisses;Mem",
+ "MetricName": "tma_info_memory_l3mpki"
},
{
- "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
- "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l3_cache_fill_bw"
+ "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / (MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT)",
+ "MetricGroup": "Mem;MemoryBound;MemoryLat",
+ "MetricName": "tma_info_memory_load_miss_real_latency"
},
{
- "BriefDescription": "Average per-thread data fill bandwidth to the L3 cache [GB / sec]",
- "MetricExpr": "tma_info_l3_cache_fill_bw",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l3_cache_fill_bw_1t"
+ "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
+ "MetricGroup": "Mem;MemoryBW;MemoryBound",
+ "MetricName": "tma_info_memory_mlp",
+ "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)"
},
{
- "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
- "MetricExpr": "1e3 * MEM_LOAD_RETIRED.L3_MISS / INST_RETIRED.ANY",
- "MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l3mpki"
+ "BriefDescription": "Average Parallel L2 cache miss data reads",
+ "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+ "MetricGroup": "Memory_BW;Offcore",
+ "MetricName": "tma_info_memory_oro_data_l2_mlp"
},
{
"BriefDescription": "Average Latency for L2 cache miss demand Loads",
"MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS.DEMAND_DATA_RD",
"MetricGroup": "Memory_Lat;Offcore",
- "MetricName": "tma_info_load_l2_miss_latency"
+ "MetricName": "tma_info_memory_oro_load_l2_miss_latency"
},
{
"BriefDescription": "Average Parallel L2 cache miss demand Loads",
"MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / cpu@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD\\,cmask\\=1@",
"MetricGroup": "Memory_BW;Offcore",
- "MetricName": "tma_info_load_l2_mlp"
+ "MetricName": "tma_info_memory_oro_load_l2_mlp"
},
{
"BriefDescription": "Average Latency for L3 cache miss demand Loads",
"MetricExpr": "cpu@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD\\,umask\\=0x10@ / OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD",
"MetricGroup": "Memory_Lat;Offcore",
- "MetricName": "tma_info_load_l3_miss_latency"
+ "MetricName": "tma_info_memory_oro_load_l3_miss_latency"
},
{
- "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
- "MetricExpr": "L1D_PEND_MISS.PENDING / (MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT)",
- "MetricGroup": "Mem;MemoryBound;MemoryLat",
- "MetricName": "tma_info_load_miss_real_latency"
+ "BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_core_l1d_cache_fill_bw",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_thread_l1d_cache_fill_bw_1t"
+ },
+ {
+ "BriefDescription": "Average per-thread data fill bandwidth to the L2 cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_core_l2_cache_fill_bw",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_thread_l2_cache_fill_bw_1t"
+ },
+ {
+ "BriefDescription": "Average per-thread data access bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_core_l3_cache_access_bw",
+ "MetricGroup": "Mem;MemoryBW;Offcore",
+ "MetricName": "tma_info_memory_thread_l3_cache_access_bw_1t"
+ },
+ {
+ "BriefDescription": "Average per-thread data fill bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_core_l3_cache_fill_bw",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_thread_l3_cache_fill_bw_1t"
+ },
+ {
+ "BriefDescription": "STLB (2nd level TLB) code speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
+ "MetricExpr": "1e3 * ITLB_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
+ "MetricGroup": "Fed;MemoryTLB",
+ "MetricName": "tma_info_memory_tlb_code_stlb_mpki"
},
{
"BriefDescription": "STLB (2nd level TLB) data load speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
"MetricExpr": "1e3 * DTLB_LOAD_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
"MetricGroup": "Mem;MemoryTLB",
- "MetricName": "tma_info_load_stlb_mpki"
+ "MetricName": "tma_info_memory_tlb_load_stlb_mpki"
+ },
+ {
+ "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+ "MetricExpr": "(ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING) / (2 * tma_info_core_core_clks)",
+ "MetricGroup": "Mem;MemoryTLB",
+ "MetricName": "tma_info_memory_tlb_page_walks_utilization",
+ "MetricThreshold": "tma_info_memory_tlb_page_walks_utilization > 0.5"
+ },
+ {
+ "BriefDescription": "STLB (2nd level TLB) data store speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
+ "MetricExpr": "1e3 * DTLB_STORE_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
+ "MetricGroup": "Mem;MemoryTLB",
+ "MetricName": "tma_info_memory_tlb_store_stlb_mpki"
+ },
+ {
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-thread",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
+ "MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
+ "MetricName": "tma_info_pipeline_execute"
+ },
+ {
+ "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "tma_retiring * tma_info_thread_slots / cpu@UOPS_RETIRED.SLOTS\\,cmask\\=1@",
+ "MetricGroup": "Pipeline;Ret",
+ "MetricName": "tma_info_pipeline_retire"
+ },
+ {
+ "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
+ "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / duration_time",
+ "MetricGroup": "Power;Summary",
+ "MetricName": "tma_info_system_average_frequency"
+ },
+ {
+ "BriefDescription": "Average CPU Utilization",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricGroup": "HPC;Summary",
+ "MetricName": "tma_info_system_cpu_utilization"
+ },
+ {
+ "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
+ "MetricExpr": "64 * (UNC_M_CAS_COUNT.RD + UNC_M_CAS_COUNT.WR) / 1e9 / duration_time",
+ "MetricGroup": "HPC;Mem;MemoryBW;SoC;tma_issueBW",
+ "MetricName": "tma_info_system_dram_bw_use",
+ "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_fb_full, tma_info_bottleneck_memory_bandwidth, tma_mem_bandwidth, tma_sq_full"
+ },
+ {
+ "BriefDescription": "Giga Floating Point Operations Per Second",
+ "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * cpu@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE\\,umask\\=0x18@ + 8 * cpu@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE\\,umask\\=0x60@ + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / 1e9 / duration_time",
+ "MetricGroup": "Cor;Flops;HPC",
+ "MetricName": "tma_info_system_gflops",
+ "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width and AMX engine."
+ },
+ {
+ "BriefDescription": "Average IO (network or disk) Bandwidth Use for Reads [GB / sec]",
+ "MetricExpr": "(UNC_CHA_TOR_INSERTS.IO_HIT_ITOM + UNC_CHA_TOR_INSERTS.IO_MISS_ITOM + UNC_CHA_TOR_INSERTS.IO_HIT_ITOMCACHENEAR + UNC_CHA_TOR_INSERTS.IO_MISS_ITOMCACHENEAR) * 64 / 1e9 / duration_time",
+ "MetricGroup": "IoBW;Mem;Server;SoC",
+ "MetricName": "tma_info_system_io_read_bw"
+ },
+ {
+ "BriefDescription": "Average IO (network or disk) Bandwidth Use for Writes [GB / sec]",
+ "MetricExpr": "UNC_CHA_TOR_INSERTS.IO_PCIRDCUR * 64 / 1e9 / duration_time",
+ "MetricGroup": "IoBW;Mem;Server;SoC",
+ "MetricName": "tma_info_system_io_write_bw"
+ },
+ {
+ "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
+ "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u",
+ "MetricGroup": "Branches;OS",
+ "MetricName": "tma_info_system_ipfarbranch",
+ "MetricThreshold": "tma_info_system_ipfarbranch < 1e6"
+ },
+ {
+ "BriefDescription": "Cycles Per Instruction for the Operating System (OS) Kernel mode",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / INST_RETIRED.ANY_P:k",
+ "MetricGroup": "OS",
+ "MetricName": "tma_info_system_kernel_cpi"
+ },
+ {
+ "BriefDescription": "Fraction of cycles spent in the Operating System (OS) Kernel mode",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "OS",
+ "MetricName": "tma_info_system_kernel_utilization",
+ "MetricThreshold": "tma_info_system_kernel_utilization > 0.05"
},
{
"BriefDescription": "Average latency of data read request to external DRAM memory [in nanoseconds]",
"MetricExpr": "1e9 * (UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_DDR / UNC_CHA_TOR_INSERTS.IA_MISS_DRD_DDR) / cha_0@event\\=0x0@",
"MetricGroup": "Mem;MemoryLat;Server;SoC",
- "MetricName": "tma_info_mem_dram_read_latency",
+ "MetricName": "tma_info_system_mem_dram_read_latency",
"PublicDescription": "Average latency of data read request to external DRAM memory [in nanoseconds]. Accounts for demand loads and L1/L2 data-read prefetches"
},
{
"BriefDescription": "Average number of parallel data read requests to external memory",
"MetricExpr": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD / UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD@thresh\\=1@",
"MetricGroup": "Mem;MemoryBW;SoC",
- "MetricName": "tma_info_mem_parallel_reads",
+ "MetricName": "tma_info_system_mem_parallel_reads",
"PublicDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches"
},
{
"BriefDescription": "Average latency of data read request to external 3D X-Point memory [in nanoseconds]",
"MetricExpr": "(1e9 * (UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_PMM / UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PMM) / cha_0@event\\=0x0@ if #has_pmem > 0 else 0)",
"MetricGroup": "Mem;MemoryLat;Server;SoC",
- "MetricName": "tma_info_mem_pmm_read_latency",
+ "MetricName": "tma_info_system_mem_pmm_read_latency",
"PublicDescription": "Average latency of data read request to external 3D X-Point memory [in nanoseconds]. Accounts for demand loads and L1/L2 data-read prefetches"
},
{
"BriefDescription": "Average latency of data read request to external memory (in nanoseconds)",
- "MetricExpr": "1e9 * (UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD / UNC_CHA_TOR_INSERTS.IA_MISS_DRD) / (tma_info_socket_clks / duration_time)",
+ "MetricExpr": "1e9 * (UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD / UNC_CHA_TOR_INSERTS.IA_MISS_DRD) / (tma_info_system_socket_clks / duration_time)",
"MetricGroup": "Mem;MemoryLat;SoC",
- "MetricName": "tma_info_mem_read_latency",
+ "MetricName": "tma_info_system_mem_read_latency",
"PublicDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches. ([RKL+]memory-controller only)"
},
{
- "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
- "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_fb_full / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))",
- "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
- "MetricName": "tma_info_memory_bandwidth",
- "MetricThreshold": "tma_info_memory_bandwidth > 20",
- "PublicDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_dram_bw_use, tma_mem_bandwidth, tma_sq_full"
- },
- {
- "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
- "MetricExpr": "100 * tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_dtlb_load / max(tma_l1_bound, tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
- "MetricGroup": "Mem;MemoryTLB;Offcore;tma_issueTLB",
- "MetricName": "tma_info_memory_data_tlbs",
- "MetricThreshold": "tma_info_memory_data_tlbs > 20",
- "PublicDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs). Related metrics: tma_dtlb_load, tma_dtlb_store"
- },
- {
- "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)",
- "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound))",
- "MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat",
- "MetricName": "tma_info_memory_latency",
- "MetricThreshold": "tma_info_memory_latency > 20",
- "PublicDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches). Related metrics: tma_l3_hit_latency, tma_mem_latency"
- },
- {
- "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks",
- "MetricExpr": "100 * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
- "MetricGroup": "Bad;BadSpec;BrMispredicts;tma_issueBM",
- "MetricName": "tma_info_mispredictions",
- "MetricThreshold": "tma_info_mispredictions > 20",
- "PublicDescription": "Total pipeline cost of Branch Misprediction related bottlenecks. Related metrics: tma_branch_mispredicts, tma_info_branch_misprediction_cost, tma_mispredicts_resteers"
- },
- {
- "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
- "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
- "MetricGroup": "Mem;MemoryBW;MemoryBound",
- "MetricName": "tma_info_mlp",
- "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)"
- },
- {
- "BriefDescription": "Fraction of branches of other types (not individually covered by other metrics in Info.Branches group)",
- "MetricExpr": "1 - (tma_info_cond_nt + tma_info_cond_tk + tma_info_callret + tma_info_jump)",
- "MetricGroup": "Bad;Branches",
- "MetricName": "tma_info_other_branches"
- },
- {
- "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
- "MetricExpr": "(ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING) / (2 * tma_info_core_clks)",
- "MetricGroup": "Mem;MemoryTLB",
- "MetricName": "tma_info_page_walks_utilization",
- "MetricThreshold": "tma_info_page_walks_utilization > 0.5"
- },
- {
"BriefDescription": "Average 3DXP Memory Bandwidth Use for reads [GB / sec]",
"MetricExpr": "(64 * UNC_M_PMM_RPQ_INSERTS / 1e9 / duration_time if #has_pmem > 0 else 0)",
"MetricGroup": "Mem;MemoryBW;Server;SoC",
- "MetricName": "tma_info_pmm_read_bw"
+ "MetricName": "tma_info_system_pmm_read_bw"
},
{
"BriefDescription": "Average 3DXP Memory Bandwidth Use for Writes [GB / sec]",
"MetricExpr": "(64 * UNC_M_PMM_WPQ_INSERTS / 1e9 / duration_time if #has_pmem > 0 else 0)",
"MetricGroup": "Mem;MemoryBW;Server;SoC",
- "MetricName": "tma_info_pmm_write_bw"
+ "MetricName": "tma_info_system_pmm_write_bw"
},
{
"BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for baseline license level 0",
- "MetricExpr": "CORE_POWER.LVL0_TURBO_LICENSE / tma_info_core_clks",
+ "MetricExpr": "CORE_POWER.LVL0_TURBO_LICENSE / tma_info_core_core_clks",
"MetricGroup": "Power",
- "MetricName": "tma_info_power_license0_utilization",
+ "MetricName": "tma_info_system_power_license0_utilization",
"PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for baseline license level 0. This includes non-AVX codes, SSE, AVX 128-bit, and low-current AVX 256-bit codes."
},
{
"BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 1",
- "MetricExpr": "CORE_POWER.LVL1_TURBO_LICENSE / tma_info_core_clks",
+ "MetricExpr": "CORE_POWER.LVL1_TURBO_LICENSE / tma_info_core_core_clks",
"MetricGroup": "Power",
- "MetricName": "tma_info_power_license1_utilization",
- "MetricThreshold": "tma_info_power_license1_utilization > 0.5",
+ "MetricName": "tma_info_system_power_license1_utilization",
+ "MetricThreshold": "tma_info_system_power_license1_utilization > 0.5",
"PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 1. This includes high current AVX 256-bit instructions as well as low current AVX 512-bit instructions."
},
{
"BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 2 (introduced in SKX)",
- "MetricExpr": "CORE_POWER.LVL2_TURBO_LICENSE / tma_info_core_clks",
+ "MetricExpr": "CORE_POWER.LVL2_TURBO_LICENSE / tma_info_core_core_clks",
"MetricGroup": "Power",
- "MetricName": "tma_info_power_license2_utilization",
- "MetricThreshold": "tma_info_power_license2_utilization > 0.5",
+ "MetricName": "tma_info_system_power_license2_utilization",
+ "MetricThreshold": "tma_info_system_power_license2_utilization > 0.5",
"PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 2 (introduced in SKX). This includes high current AVX 512-bit instructions."
},
{
- "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
- "MetricExpr": "tma_retiring * tma_info_slots / cpu@UOPS_RETIRED.SLOTS\\,cmask\\=1@",
- "MetricGroup": "Pipeline;Ret",
- "MetricName": "tma_info_retire"
- },
- {
- "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
- "MetricExpr": "TOPDOWN.SLOTS",
- "MetricGroup": "TmaL1;tma_L1_group",
- "MetricName": "tma_info_slots"
- },
- {
- "BriefDescription": "Fraction of Physical Core issue-slots utilized by this Logical Processor",
- "MetricExpr": "(tma_info_slots / (TOPDOWN.SLOTS / 2) if #SMT_on else 1)",
- "MetricGroup": "SMT;TmaL1;tma_L1_group",
- "MetricName": "tma_info_slots_utilization"
- },
- {
"BriefDescription": "Fraction of cycles where both hardware Logical Processors were active",
"MetricExpr": "(1 - CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_DISTRIBUTED if #SMT_on else 0)",
"MetricGroup": "SMT",
- "MetricName": "tma_info_smt_2t_utilization"
+ "MetricName": "tma_info_system_smt_2t_utilization"
},
{
"BriefDescription": "Socket actual clocks when any core is active on that socket",
"MetricExpr": "cha_0@event\\=0x0@",
"MetricGroup": "SoC",
- "MetricName": "tma_info_socket_clks"
- },
- {
- "BriefDescription": "STLB (2nd level TLB) data store speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
- "MetricExpr": "1e3 * DTLB_STORE_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
- "MetricGroup": "Mem;MemoryTLB",
- "MetricName": "tma_info_store_stlb_mpki"
+ "MetricName": "tma_info_system_socket_clks"
},
{
"BriefDescription": "Average Frequency Utilization relative nominal frequency",
- "MetricExpr": "tma_info_clks / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricExpr": "tma_info_thread_clks / CPU_CLK_UNHALTED.REF_TSC",
"MetricGroup": "Power",
- "MetricName": "tma_info_turbo_utilization"
+ "MetricName": "tma_info_system_turbo_utilization"
+ },
+ {
+ "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "Pipeline",
+ "MetricName": "tma_info_thread_clks"
+ },
+ {
+ "BriefDescription": "Cycles Per Instruction (per Logical Processor)",
+ "MetricExpr": "1 / tma_info_thread_ipc",
+ "MetricGroup": "Mem;Pipeline",
+ "MetricName": "tma_info_thread_cpi"
+ },
+ {
+ "BriefDescription": "The ratio of Executed- by Issued-Uops",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / UOPS_ISSUED.ANY",
+ "MetricGroup": "Cor;Pipeline",
+ "MetricName": "tma_info_thread_execute_per_issue",
+ "PublicDescription": "The ratio of Executed- by Issued-Uops. Ratio > 1 suggests high rate of uop micro-fusions. Ratio < 1 suggest high rate of \"execute\" at rename stage."
+ },
+ {
+ "BriefDescription": "Instructions Per Cycle (per Logical Processor)",
+ "MetricExpr": "INST_RETIRED.ANY / tma_info_thread_clks",
+ "MetricGroup": "Ret;Summary",
+ "MetricName": "tma_info_thread_ipc"
+ },
+ {
+ "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
+ "MetricExpr": "TOPDOWN.SLOTS",
+ "MetricGroup": "TmaL1;tma_L1_group",
+ "MetricName": "tma_info_thread_slots"
+ },
+ {
+ "BriefDescription": "Fraction of Physical Core issue-slots utilized by this Logical Processor",
+ "MetricExpr": "(tma_info_thread_slots / (TOPDOWN.SLOTS / 2) if #SMT_on else 1)",
+ "MetricGroup": "SMT;TmaL1;tma_L1_group",
+ "MetricName": "tma_info_thread_slots_utilization"
},
{
"BriefDescription": "Uops Per Instruction",
- "MetricExpr": "tma_retiring * tma_info_slots / INST_RETIRED.ANY",
+ "MetricExpr": "tma_retiring * tma_info_thread_slots / INST_RETIRED.ANY",
"MetricGroup": "Pipeline;Ret;Retire",
- "MetricName": "tma_info_uoppi",
- "MetricThreshold": "tma_info_uoppi > 1.05"
+ "MetricName": "tma_info_thread_uoppi",
+ "MetricThreshold": "tma_info_thread_uoppi > 1.05"
},
{
"BriefDescription": "Instruction per taken branch",
- "MetricExpr": "tma_retiring * tma_info_slots / BR_INST_RETIRED.NEAR_TAKEN",
+ "MetricExpr": "tma_retiring * tma_info_thread_slots / BR_INST_RETIRED.NEAR_TAKEN",
"MetricGroup": "Branches;Fed;FetchBW",
- "MetricName": "tma_info_uptb",
- "MetricThreshold": "tma_info_uptb < 7.5"
+ "MetricName": "tma_info_thread_uptb",
+ "MetricThreshold": "tma_info_thread_uptb < 7.5"
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses",
- "MetricExpr": "ICACHE_64B.IFTAG_STALL / tma_info_clks",
+ "MetricExpr": "ICACHE_64B.IFTAG_STALL / tma_info_thread_clks",
"MetricGroup": "BigFoot;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_itlb_misses",
"MetricThreshold": "tma_itlb_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -1092,7 +1350,7 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled without loads missing the L1 data cache",
- "MetricExpr": "max((CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS) / tma_info_clks, 0)",
+ "MetricExpr": "max((CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS) / tma_info_thread_clks, 0)",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_issueL1;tma_issueMC;tma_memory_bound_group",
"MetricName": "tma_l1_bound",
"MetricThreshold": "tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -1102,7 +1360,7 @@
{
"BriefDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "MEM_LOAD_RETIRED.L2_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) / (MEM_LOAD_RETIRED.L2_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + L1D_PEND_MISS.FB_FULL_PERIODS) * ((CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_clks)",
+ "MetricExpr": "MEM_LOAD_RETIRED.L2_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) / (MEM_LOAD_RETIRED.L2_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + L1D_PEND_MISS.FB_FULL_PERIODS) * ((CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks)",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l2_bound",
"MetricThreshold": "tma_l2_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -1111,7 +1369,7 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
- "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS) / tma_info_clks",
+ "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS) / tma_info_thread_clks",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l3_bound",
"MetricThreshold": "tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -1120,20 +1378,20 @@
},
{
"BriefDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
- "MetricExpr": "19 * tma_info_average_frequency * MEM_LOAD_RETIRED.L3_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_clks",
+ "MetricExpr": "19 * tma_info_system_average_frequency * MEM_LOAD_RETIRED.L3_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
"MetricGroup": "MemoryLat;TopdownL4;tma_L4_group;tma_issueLat;tma_l3_bound_group",
"MetricName": "tma_l3_hit_latency",
"MetricThreshold": "tma_l3_hit_latency > 0.1 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_info_memory_latency, tma_mem_latency",
+ "PublicDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_info_bottleneck_memory_latency, tma_mem_latency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs)",
- "MetricExpr": "ILD_STALL.LCP / tma_info_clks",
+ "MetricExpr": "ILD_STALL.LCP / tma_info_thread_clks",
"MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB",
"MetricName": "tma_lcp",
"MetricThreshold": "tma_lcp > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
- "PublicDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs). Using proper compiler flags or Intel Compiler by default will certainly avoid this. #Link: Optimization Guide about LCP BKMs. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_dsb_coverage, tma_info_dsb_misses, tma_info_iptb",
+ "PublicDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs). Using proper compiler flags or Intel Compiler by default will certainly avoid this. #Link: Optimization Guide about LCP BKMs. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb",
"ScaleUnit": "100%"
},
{
@@ -1148,7 +1406,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Load operations",
- "MetricExpr": "UOPS_DISPATCHED.PORT_2_3 / (2 * tma_info_core_clks)",
+ "MetricExpr": "UOPS_DISPATCHED.PORT_2_3 / (2 * tma_info_core_core_clks)",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_load_op_utilization",
"MetricThreshold": "tma_load_op_utilization > 0.6",
@@ -1165,7 +1423,7 @@
},
{
"BriefDescription": "This metric estimates the fraction of cycles where the Second-level TLB (STLB) was missed by load accesses, performing a hardware page walk",
- "MetricExpr": "DTLB_LOAD_MISSES.WALK_ACTIVE / tma_info_clks",
+ "MetricExpr": "DTLB_LOAD_MISSES.WALK_ACTIVE / tma_info_thread_clks",
"MetricGroup": "MemoryTLB;TopdownL5;tma_L5_group;tma_dtlb_load_group",
"MetricName": "tma_load_stlb_miss",
"MetricThreshold": "tma_load_stlb_miss > 0.05 & (tma_dtlb_load > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
@@ -1173,7 +1431,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from local memory",
- "MetricExpr": "43.5 * tma_info_average_frequency * MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_clks",
+ "MetricExpr": "43.5 * tma_info_system_average_frequency * MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
"MetricGroup": "Server;TopdownL5;tma_L5_group;tma_mem_latency_group",
"MetricName": "tma_local_dram",
"MetricThreshold": "tma_local_dram > 0.1 & (tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
@@ -1183,7 +1441,7 @@
{
"BriefDescription": "This metric represents fraction of cycles the CPU spent handling cache misses due to lock operations",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(16 * max(0, MEM_INST_RETIRED.LOCK_LOADS - L2_RQSTS.ALL_RFO) + MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES * (10 * L2_RQSTS.RFO_HIT + min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO))) / tma_info_clks",
+ "MetricExpr": "(16 * max(0, MEM_INST_RETIRED.LOCK_LOADS - L2_RQSTS.ALL_RFO) + MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES * (10 * L2_RQSTS.RFO_HIT + min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO))) / tma_info_thread_clks",
"MetricGroup": "Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_l1_bound_group",
"MetricName": "tma_lock_latency",
"MetricThreshold": "tma_lock_latency > 0.2 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -1202,20 +1460,20 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM)",
- "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@) / tma_info_clks",
+ "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@) / tma_info_thread_clks",
"MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
"MetricName": "tma_mem_bandwidth",
"MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_dram_bw_use, tma_info_memory_bandwidth, tma_sq_full",
+ "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_bottleneck_memory_bandwidth, tma_info_system_dram_bw_use, tma_sq_full",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM)",
- "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD) / tma_info_clks - tma_mem_bandwidth",
+ "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD) / tma_info_thread_clks - tma_mem_bandwidth",
"MetricGroup": "MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
"MetricName": "tma_mem_latency",
"MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_info_memory_latency, tma_l3_hit_latency",
+ "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_info_bottleneck_memory_latency, tma_l3_hit_latency",
"ScaleUnit": "100%"
},
{
@@ -1230,6 +1488,7 @@
},
{
"BriefDescription": "This metric represents fraction of slots where the CPU was retiring memory operations -- uops for memory load or store accesses.",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "tma_light_operations * MEM_INST_RETIRED.ANY / INST_RETIRED.ANY",
"MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
"MetricName": "tma_memory_operations",
@@ -1238,7 +1497,7 @@
},
{
"BriefDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit",
- "MetricExpr": "tma_retiring * tma_info_slots / UOPS_ISSUED.ANY * IDQ.MS_UOPS / tma_info_slots",
+ "MetricExpr": "tma_retiring * tma_info_thread_slots / UOPS_ISSUED.ANY * IDQ.MS_UOPS / tma_info_thread_slots",
"MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_heavy_operations_group;tma_issueMC;tma_issueMS",
"MetricName": "tma_microcode_sequencer",
"MetricThreshold": "tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1",
@@ -1247,28 +1506,28 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers as a result of Branch Misprediction at execution stage",
- "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT) * INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_clks",
+ "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT) * INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_thread_clks",
"MetricGroup": "BadSpec;BrMispredicts;TopdownL4;tma_L4_group;tma_branch_resteers_group;tma_issueBM",
"MetricName": "tma_mispredicts_resteers",
"MetricThreshold": "tma_mispredicts_resteers > 0.05 & (tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15))",
- "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers as a result of Branch Misprediction at execution stage. Sample with: INT_MISC.CLEAR_RESTEER_CYCLES. Related metrics: tma_branch_mispredicts, tma_info_branch_misprediction_cost, tma_info_mispredictions",
+ "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers as a result of Branch Misprediction at execution stage. Sample with: INT_MISC.CLEAR_RESTEER_CYCLES. Related metrics: tma_branch_mispredicts, tma_info_bad_spec_branch_misprediction_cost, tma_info_bottleneck_mispredictions",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline)",
- "MetricExpr": "(IDQ.MITE_CYCLES_ANY - IDQ.MITE_CYCLES_OK) / tma_info_core_clks / 2",
+ "MetricExpr": "(IDQ.MITE_CYCLES_ANY - IDQ.MITE_CYCLES_OK) / tma_info_core_core_clks / 2",
"MetricGroup": "DSBmiss;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_mite",
- "MetricThreshold": "tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 5 > 0.35)",
+ "MetricThreshold": "tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 5 > 0.35)",
"PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline). This pipeline is used for code that was not pre-cached in the DSB or LSD. For example; inefficiencies due to asymmetric decoders; use of long immediate or LCP can manifest as MITE fetch bandwidth bottleneck. Sample with: FRONTEND_RETIRED.ANY_DSB_MISS",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of cycles where (only) 4 uops were delivered by the MITE pipeline",
- "MetricExpr": "(cpu@IDQ.MITE_UOPS\\,cmask\\=4@ - cpu@IDQ.MITE_UOPS\\,cmask\\=5@) / tma_info_clks",
+ "MetricExpr": "(cpu@IDQ.MITE_UOPS\\,cmask\\=4@ - cpu@IDQ.MITE_UOPS\\,cmask\\=5@) / tma_info_thread_clks",
"MetricGroup": "DSBmiss;FetchBW;TopdownL4;tma_L4_group;tma_mite_group",
"MetricName": "tma_mite_4wide",
- "MetricThreshold": "tma_mite_4wide > 0.05 & (tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 5 > 0.35))",
+ "MetricThreshold": "tma_mite_4wide > 0.05 & (tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 5 > 0.35))",
"ScaleUnit": "100%"
},
{
@@ -1282,7 +1541,7 @@
},
{
"BriefDescription": "This metric estimates the fraction of cycles when the CPU was stalled due to switches of uop delivery to the Microcode Sequencer (MS)",
- "MetricExpr": "3 * IDQ.MS_SWITCHES / tma_info_clks",
+ "MetricExpr": "3 * IDQ.MS_SWITCHES / tma_info_thread_clks",
"MetricGroup": "FetchLat;MicroSeq;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueMC;tma_issueMS;tma_issueMV;tma_issueSO",
"MetricName": "tma_ms_switches",
"MetricThreshold": "tma_ms_switches > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -1291,7 +1550,7 @@
},
{
"BriefDescription": "This metric represents fraction of slots where the CPU was retiring NOP (no op) instructions",
- "MetricExpr": "tma_light_operations * INST_RETIRED.NOP / (tma_retiring * tma_info_slots)",
+ "MetricExpr": "tma_light_operations * INST_RETIRED.NOP / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
"MetricName": "tma_nop_instructions",
"MetricThreshold": "tma_nop_instructions > 0.1 & tma_light_operations > 0.6",
@@ -1300,6 +1559,7 @@
},
{
"BriefDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_memory_operations + tma_branch_instructions + tma_nop_instructions))",
"MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
"MetricName": "tma_other_light_ops",
@@ -1309,7 +1569,7 @@
},
{
"BriefDescription": "This metric roughly estimates (based on idle latencies) how often the CPU was stalled on accesses to external 3D-Xpoint (Crystal Ridge, a.k.a",
- "MetricExpr": "(((1 - ((19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS))) / (19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + (25 * (MEM_LOAD_RETIRED.LOCAL_PMM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) if #has_pmem > 0 else 0) + 33 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) if #has_pmem > 0 else 0))) if #has_pmem > 0 else 0)) * (CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_clks - tma_l2_bound) if 1e6 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM + MEM_LOAD_RETIRED.LOCAL_PMM) > MEM_LOAD_RETIRED.L1_MISS else 0) if #has_pmem > 0 else 0)",
+ "MetricExpr": "(((1 - ((19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS))) / (19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + (25 * (MEM_LOAD_RETIRED.LOCAL_PMM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) if #has_pmem > 0 else 0) + 33 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) if #has_pmem > 0 else 0))) if #has_pmem > 0 else 0)) * (CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks - tma_l2_bound) if 1e6 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM + MEM_LOAD_RETIRED.LOCAL_PMM) > MEM_LOAD_RETIRED.L1_MISS else 0) if #has_pmem > 0 else 0)",
"MetricGroup": "MemoryBound;Server;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_pmm_bound",
"MetricThreshold": "tma_pmm_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -1318,7 +1578,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 0 ([SNB+] ALU; [HSW+] ALU and 2nd branch)",
- "MetricExpr": "UOPS_DISPATCHED.PORT_0 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED.PORT_0 / tma_info_core_core_clks",
"MetricGroup": "Compute;TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_0",
"MetricThreshold": "tma_port_0 > 0.6",
@@ -1327,7 +1587,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 1 (ALU)",
- "MetricExpr": "UOPS_DISPATCHED.PORT_1 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED.PORT_1 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_1",
"MetricThreshold": "tma_port_1 > 0.6",
@@ -1336,7 +1596,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 5 ([SNB+] Branches and ALU; [HSW+] ALU)",
- "MetricExpr": "UOPS_DISPATCHED.PORT_5 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED.PORT_5 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_5",
"MetricThreshold": "tma_port_5 > 0.6",
@@ -1345,7 +1605,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+]Primary Branch and simple ALU)",
- "MetricExpr": "UOPS_DISPATCHED.PORT_6 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED.PORT_6 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_6",
"MetricThreshold": "tma_port_6 > 0.6",
@@ -1354,7 +1614,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles the CPU performance was potentially limited due to Core computation issues (non divider-related)",
- "MetricExpr": "((cpu@EXE_ACTIVITY.3_PORTS_UTIL\\,umask\\=0x80@ + tma_serializing_operation * (CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY) + (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL)) / tma_info_clks if ARITH.DIVIDER_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY else (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL) / tma_info_clks)",
+ "MetricExpr": "((cpu@EXE_ACTIVITY.3_PORTS_UTIL\\,umask\\=0x80@ + tma_serializing_operation * (CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY) + (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL)) / tma_info_thread_clks if ARITH.DIVIDER_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY else (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL) / tma_info_thread_clks)",
"MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group",
"MetricName": "tma_ports_utilization",
"MetricThreshold": "tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
@@ -1363,7 +1623,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed no uops on any execution port (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "cpu@EXE_ACTIVITY.3_PORTS_UTIL\\,umask\\=0x80@ / tma_info_clks + tma_serializing_operation * (CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY) / tma_info_clks",
+ "MetricExpr": "cpu@EXE_ACTIVITY.3_PORTS_UTIL\\,umask\\=0x80@ / tma_info_thread_clks + tma_serializing_operation * (CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY) / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_0",
"MetricThreshold": "tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1372,7 +1632,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where the CPU executed total of 1 uop per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "EXE_ACTIVITY.1_PORTS_UTIL / tma_info_clks",
+ "MetricExpr": "EXE_ACTIVITY.1_PORTS_UTIL / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issueL1;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_1",
"MetricThreshold": "tma_ports_utilized_1 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1381,7 +1641,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "EXE_ACTIVITY.2_PORTS_UTIL / tma_info_clks",
+ "MetricExpr": "EXE_ACTIVITY.2_PORTS_UTIL / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_2",
"MetricThreshold": "tma_ports_utilized_2 > 0.15 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1390,7 +1650,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 3 or more uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "UOPS_EXECUTED.CYCLES_GE_3 / tma_info_clks",
+ "MetricExpr": "UOPS_EXECUTED.CYCLES_GE_3 / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_3m",
"MetricThreshold": "tma_ports_utilized_3m > 0.7 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1399,7 +1659,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from remote cache in other sockets including synchronizations issues",
- "MetricExpr": "(97 * tma_info_average_frequency * MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM + 97 * tma_info_average_frequency * MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_clks",
+ "MetricExpr": "(97 * tma_info_system_average_frequency * MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM + 97 * tma_info_system_average_frequency * MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
"MetricGroup": "Offcore;Server;Snoop;TopdownL5;tma_L5_group;tma_issueSyncxn;tma_mem_latency_group",
"MetricName": "tma_remote_cache",
"MetricThreshold": "tma_remote_cache > 0.05 & (tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
@@ -1408,7 +1668,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from remote memory",
- "MetricExpr": "108 * tma_info_average_frequency * MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_clks",
+ "MetricExpr": "108 * tma_info_system_average_frequency * MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
"MetricGroup": "Server;Snoop;TopdownL5;tma_L5_group;tma_mem_latency_group",
"MetricName": "tma_remote_dram",
"MetricThreshold": "tma_remote_dram > 0.1 & (tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
@@ -1417,17 +1677,18 @@
},
{
"BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
- "MetricExpr": "topdown\\-retiring / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0 * tma_info_slots",
- "MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
+ "DefaultMetricgroupName": "TopdownL1",
+ "MetricExpr": "topdown\\-retiring / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0 * tma_info_thread_slots",
+ "MetricGroup": "Default;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1",
- "MetricgroupNoGroup": "TopdownL1",
+ "MetricgroupNoGroup": "TopdownL1;Default",
"PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. Sample with: UOPS_RETIRED.SLOTS",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU issue-pipeline was stalled due to serializing operations",
- "MetricExpr": "RESOURCE_STALLS.SCOREBOARD / tma_info_clks",
+ "MetricExpr": "RESOURCE_STALLS.SCOREBOARD / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL5;tma_L5_group;tma_issueSO;tma_ports_utilized_0_group",
"MetricName": "tma_serializing_operation",
"MetricThreshold": "tma_serializing_operation > 0.1 & (tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)))",
@@ -1436,7 +1697,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to PAUSE Instructions",
- "MetricExpr": "37 * MISC_RETIRED.PAUSE_INST / tma_info_clks",
+ "MetricExpr": "37 * MISC_RETIRED.PAUSE_INST / tma_info_thread_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_serializing_operation_group",
"MetricName": "tma_slow_pause",
"MetricThreshold": "tma_slow_pause > 0.05 & (tma_serializing_operation > 0.1 & (tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))))",
@@ -1445,7 +1706,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles handling memory load split accesses - load that cross 64-byte cache line boundary",
- "MetricExpr": "tma_info_load_miss_real_latency * LD_BLOCKS.NO_SR / tma_info_clks",
+ "MetricExpr": "tma_info_memory_load_miss_real_latency * LD_BLOCKS.NO_SR / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_split_loads",
"MetricThreshold": "tma_split_loads > 0.2 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -1454,7 +1715,7 @@
},
{
"BriefDescription": "This metric represents rate of split store accesses",
- "MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_clks",
+ "MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_core_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group",
"MetricName": "tma_split_stores",
"MetricThreshold": "tma_split_stores > 0.2 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -1463,16 +1724,16 @@
},
{
"BriefDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors)",
- "MetricExpr": "L1D_PEND_MISS.L2_STALL / tma_info_clks",
+ "MetricExpr": "L1D_PEND_MISS.L2_STALL / tma_info_thread_clks",
"MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueBW;tma_l3_bound_group",
"MetricName": "tma_sq_full",
"MetricThreshold": "tma_sq_full > 0.3 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_fb_full, tma_info_dram_bw_use, tma_info_memory_bandwidth, tma_mem_bandwidth",
+ "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_fb_full, tma_info_bottleneck_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric estimates how often CPU was stalled due to RFO store memory accesses; RFO store issue a read-for-ownership request before the write",
- "MetricExpr": "EXE_ACTIVITY.BOUND_ON_STORES / tma_info_clks",
+ "MetricExpr": "EXE_ACTIVITY.BOUND_ON_STORES / tma_info_thread_clks",
"MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_store_bound",
"MetricThreshold": "tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -1481,7 +1742,7 @@
},
{
"BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores",
- "MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_clks",
+ "MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_store_fwd_blk",
"MetricThreshold": "tma_store_fwd_blk > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -1490,7 +1751,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles the CPU spent handling L1D store misses",
- "MetricExpr": "(L2_RQSTS.RFO_HIT * 10 * (1 - MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES) + (1 - MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES) * min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO)) / tma_info_clks",
+ "MetricExpr": "(L2_RQSTS.RFO_HIT * 10 * (1 - MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES) + (1 - MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES) * min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO)) / tma_info_thread_clks",
"MetricGroup": "MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_issueSL;tma_store_bound_group",
"MetricName": "tma_store_latency",
"MetricThreshold": "tma_store_latency > 0.1 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -1499,7 +1760,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Store operations",
- "MetricExpr": "(UOPS_DISPATCHED.PORT_4_9 + UOPS_DISPATCHED.PORT_7_8) / (4 * tma_info_core_clks)",
+ "MetricExpr": "(UOPS_DISPATCHED.PORT_4_9 + UOPS_DISPATCHED.PORT_7_8) / (4 * tma_info_core_core_clks)",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_store_op_utilization",
"MetricThreshold": "tma_store_op_utilization > 0.6",
@@ -1516,7 +1777,7 @@
},
{
"BriefDescription": "This metric estimates the fraction of cycles where the STLB was missed by store accesses, performing a hardware page walk",
- "MetricExpr": "DTLB_STORE_MISSES.WALK_ACTIVE / tma_info_core_clks",
+ "MetricExpr": "DTLB_STORE_MISSES.WALK_ACTIVE / tma_info_core_core_clks",
"MetricGroup": "MemoryTLB;TopdownL5;tma_L5_group;tma_dtlb_store_group",
"MetricName": "tma_store_stlb_miss",
"MetricThreshold": "tma_store_stlb_miss > 0.05 & (tma_dtlb_store > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
@@ -1524,7 +1785,7 @@
},
{
"BriefDescription": "This metric estimates how often CPU was stalled due to Streaming store memory accesses; Streaming store optimize out a read request required by RFO stores",
- "MetricExpr": "9 * OCR.STREAMING_WR.ANY_RESPONSE / tma_info_clks",
+ "MetricExpr": "9 * OCR.STREAMING_WR.ANY_RESPONSE / tma_info_thread_clks",
"MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueSmSt;tma_store_bound_group",
"MetricName": "tma_streaming_stores",
"MetricThreshold": "tma_streaming_stores > 0.2 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -1533,7 +1794,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to new branch address clears",
- "MetricExpr": "10 * BACLEARS.ANY / tma_info_clks",
+ "MetricExpr": "10 * BACLEARS.ANY / tma_info_thread_clks",
"MetricGroup": "BigFoot;FetchLat;TopdownL4;tma_L4_group;tma_branch_resteers_group",
"MetricName": "tma_unknown_branches",
"MetricThreshold": "tma_unknown_branches > 0.05 & (tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15))",
@@ -1551,30 +1812,42 @@
},
{
"BriefDescription": "Percentage of cycles in aborted transactions.",
- "MetricExpr": "max(cpu@cycles\\-t@ - cpu@cycles\\-ct@, 0) / cycles",
+ "MetricExpr": "(max(cycles\\-t - cycles\\-ct, 0) / cycles if has_event(cycles\\-t) else 0)",
"MetricGroup": "transaction",
"MetricName": "tsx_aborted_cycles",
"ScaleUnit": "100%"
},
{
"BriefDescription": "Number of cycles within a transaction divided by the number of elisions.",
- "MetricExpr": "cpu@cycles\\-t@ / cpu@el\\-start@",
+ "MetricExpr": "(cycles\\-t / el\\-start if has_event(cycles\\-t) else 0)",
"MetricGroup": "transaction",
"MetricName": "tsx_cycles_per_elision",
"ScaleUnit": "1cycles / elision"
},
{
"BriefDescription": "Number of cycles within a transaction divided by the number of transactions.",
- "MetricExpr": "cpu@cycles\\-t@ / cpu@tx\\-start@",
+ "MetricExpr": "(cycles\\-t / tx\\-start if has_event(cycles\\-t) else 0)",
"MetricGroup": "transaction",
"MetricName": "tsx_cycles_per_transaction",
"ScaleUnit": "1cycles / transaction"
},
{
"BriefDescription": "Percentage of cycles within a transaction region.",
- "MetricExpr": "cpu@cycles\\-t@ / cycles",
+ "MetricExpr": "(cycles\\-t / cycles if has_event(cycles\\-t) else 0)",
"MetricGroup": "transaction",
"MetricName": "tsx_transactional_cycles",
"ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Uncore operating frequency in GHz",
+ "MetricExpr": "UNC_CHA_CLOCKTICKS / (source_count(UNC_CHA_CLOCKTICKS) * #num_packages) / 1e9 / duration_time",
+ "MetricName": "uncore_frequency",
+ "ScaleUnit": "1GHz"
+ },
+ {
+ "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data transmit bandwidth (MB/sec)",
+ "MetricExpr": "UNC_UPI_TxL_FLITS.ALL_DATA * 7.111111111111111 / 1e6 / duration_time",
+ "MetricName": "upi_data_transmit_bw",
+ "ScaleUnit": "1MB/s"
}
]
diff --git a/tools/perf/pmu-events/arch/x86/icelakex/metricgroups.json b/tools/perf/pmu-events/arch/x86/icelakex/metricgroups.json
new file mode 100644
index 000000000000..bc6a9a4d27a9
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/icelakex/metricgroups.json
@@ -0,0 +1,114 @@
+{
+ "Backend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Bad": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BadSpec": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BigFoot": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BrMispredicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Branches": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "CacheMisses": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "CodeGen": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Compute": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Cor": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "DSB": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "DSBmiss": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "DataSharing": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Fed": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FetchBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FetchLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Flops": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FpScalar": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FpVector": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Frontend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "HPC": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "IcMiss": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "InsType": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "IoBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "L2Evicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "LSD": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MachineClears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryBound": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryTLB": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Memory_BW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Memory_Lat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MicroSeq": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "OS": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Offcore": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "PGO": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Pipeline": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "PortsUtil": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Power": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Prefetches": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Ret": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Retire": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "SMT": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Server": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Snoop": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "SoC": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Summary": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TmaL1": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TmaL2": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TmaL3mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TopdownL1": "Metrics for top-down breakdown at level 1",
+ "TopdownL2": "Metrics for top-down breakdown at level 2",
+ "TopdownL3": "Metrics for top-down breakdown at level 3",
+ "TopdownL4": "Metrics for top-down breakdown at level 4",
+ "TopdownL5": "Metrics for top-down breakdown at level 5",
+ "TopdownL6": "Metrics for top-down breakdown at level 6",
+ "tma_L1_group": "Metrics for top-down breakdown at level 1",
+ "tma_L2_group": "Metrics for top-down breakdown at level 2",
+ "tma_L3_group": "Metrics for top-down breakdown at level 3",
+ "tma_L4_group": "Metrics for top-down breakdown at level 4",
+ "tma_L5_group": "Metrics for top-down breakdown at level 5",
+ "tma_L6_group": "Metrics for top-down breakdown at level 6",
+ "tma_alu_op_utilization_group": "Metrics contributing to tma_alu_op_utilization category",
+ "tma_backend_bound_group": "Metrics contributing to tma_backend_bound category",
+ "tma_bad_speculation_group": "Metrics contributing to tma_bad_speculation category",
+ "tma_branch_resteers_group": "Metrics contributing to tma_branch_resteers category",
+ "tma_core_bound_group": "Metrics contributing to tma_core_bound category",
+ "tma_dram_bound_group": "Metrics contributing to tma_dram_bound category",
+ "tma_dtlb_load_group": "Metrics contributing to tma_dtlb_load category",
+ "tma_dtlb_store_group": "Metrics contributing to tma_dtlb_store category",
+ "tma_fetch_bandwidth_group": "Metrics contributing to tma_fetch_bandwidth category",
+ "tma_fetch_latency_group": "Metrics contributing to tma_fetch_latency category",
+ "tma_fp_arith_group": "Metrics contributing to tma_fp_arith category",
+ "tma_fp_vector_group": "Metrics contributing to tma_fp_vector category",
+ "tma_frontend_bound_group": "Metrics contributing to tma_frontend_bound category",
+ "tma_heavy_operations_group": "Metrics contributing to tma_heavy_operations category",
+ "tma_issue2P": "Metrics related by the issue $issue2P",
+ "tma_issueBC": "Metrics related by the issue $issueBC",
+ "tma_issueBM": "Metrics related by the issue $issueBM",
+ "tma_issueBW": "Metrics related by the issue $issueBW",
+ "tma_issueD0": "Metrics related by the issue $issueD0",
+ "tma_issueFB": "Metrics related by the issue $issueFB",
+ "tma_issueFL": "Metrics related by the issue $issueFL",
+ "tma_issueL1": "Metrics related by the issue $issueL1",
+ "tma_issueLat": "Metrics related by the issue $issueLat",
+ "tma_issueMC": "Metrics related by the issue $issueMC",
+ "tma_issueMS": "Metrics related by the issue $issueMS",
+ "tma_issueMV": "Metrics related by the issue $issueMV",
+ "tma_issueRFO": "Metrics related by the issue $issueRFO",
+ "tma_issueSL": "Metrics related by the issue $issueSL",
+ "tma_issueSO": "Metrics related by the issue $issueSO",
+ "tma_issueSmSt": "Metrics related by the issue $issueSmSt",
+ "tma_issueSpSt": "Metrics related by the issue $issueSpSt",
+ "tma_issueSyncxn": "Metrics related by the issue $issueSyncxn",
+ "tma_issueTLB": "Metrics related by the issue $issueTLB",
+ "tma_l1_bound_group": "Metrics contributing to tma_l1_bound category",
+ "tma_l3_bound_group": "Metrics contributing to tma_l3_bound category",
+ "tma_light_operations_group": "Metrics contributing to tma_light_operations category",
+ "tma_load_op_utilization_group": "Metrics contributing to tma_load_op_utilization category",
+ "tma_mem_latency_group": "Metrics contributing to tma_mem_latency category",
+ "tma_memory_bound_group": "Metrics contributing to tma_memory_bound category",
+ "tma_microcode_sequencer_group": "Metrics contributing to tma_microcode_sequencer category",
+ "tma_mite_group": "Metrics contributing to tma_mite category",
+ "tma_ports_utilization_group": "Metrics contributing to tma_ports_utilization category",
+ "tma_ports_utilized_0_group": "Metrics contributing to tma_ports_utilized_0 category",
+ "tma_ports_utilized_3m_group": "Metrics contributing to tma_ports_utilized_3m category",
+ "tma_retiring_group": "Metrics contributing to tma_retiring category",
+ "tma_serializing_operation_group": "Metrics contributing to tma_serializing_operation category",
+ "tma_store_bound_group": "Metrics contributing to tma_store_bound category",
+ "tma_store_op_utilization_group": "Metrics contributing to tma_store_op_utilization category"
+}
diff --git a/tools/perf/pmu-events/arch/x86/icelakex/pipeline.json b/tools/perf/pmu-events/arch/x86/icelakex/pipeline.json
index 442a4c7539dd..176e5ef2a24a 100644
--- a/tools/perf/pmu-events/arch/x86/icelakex/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/icelakex/pipeline.json
@@ -318,10 +318,10 @@
"UMask": "0x40"
},
{
- "BriefDescription": "Stalls caused by changing prefix length of the instruction.",
+ "BriefDescription": "Stalls caused by changing prefix length of the instruction. [This event is alias to DECODE.LCP]",
"EventCode": "0x87",
"EventName": "ILD_STALL.LCP",
- "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.",
+ "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk. [This event is alias to DECODE.LCP]",
"SampleAfterValue": "500009",
"UMask": "0x1"
},
diff --git a/tools/perf/pmu-events/arch/x86/icelakex/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/icelakex/uncore-interconnect.json
index 8ac5907762e1..f87ea3f66d1b 100644
--- a/tools/perf/pmu-events/arch/x86/icelakex/uncore-interconnect.json
+++ b/tools/perf/pmu-events/arch/x86/icelakex/uncore-interconnect.json
@@ -9311,7 +9311,7 @@
"EventCode": "0x50",
"EventName": "UNC_M3UPI_RxC_HELD.PARALLEL_SUCCESS",
"PerPkg": "1",
- "PublicDescription": "Message Held : Parallel Success : ad and bl messages were actually slotted into the same flit in paralle",
+ "PublicDescription": "Message Held : Parallel Success : ad and bl messages were actually slotted into the same flit in parallel",
"UMask": "0x8",
"Unit": "M3UPI"
},
diff --git a/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json b/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json
index 11080ccffd51..33fe555252b2 100644
--- a/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json
@@ -50,7 +50,7 @@
},
{
"BriefDescription": "Uncore frequency per die [GHZ]",
- "MetricExpr": "tma_info_socket_clks / #num_dies / duration_time / 1e9",
+ "MetricExpr": "tma_info_system_socket_clks / #num_dies / duration_time / 1e9",
"MetricGroup": "SoC",
"MetricName": "UNCORE_FREQ"
},
@@ -71,7 +71,7 @@
},
{
"BriefDescription": "This metric estimates how often memory load accesses were aliased by preceding stores (in program order) with a 4K address offset",
- "MetricExpr": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS / tma_info_clks",
+ "MetricExpr": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_4k_aliasing",
"MetricThreshold": "tma_4k_aliasing > 0.2 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -81,7 +81,7 @@
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution ports for ALU operations.",
"MetricConstraint": "NO_GROUP_EVENTS_NMI",
- "MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_0 + UOPS_DISPATCHED_PORT.PORT_1 + UOPS_DISPATCHED_PORT.PORT_5) / (3 * tma_info_core_clks)",
+ "MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_0 + UOPS_DISPATCHED_PORT.PORT_1 + UOPS_DISPATCHED_PORT.PORT_5) / (3 * tma_info_core_core_clks)",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_alu_op_utilization",
"MetricThreshold": "tma_alu_op_utilization > 0.6",
@@ -89,7 +89,7 @@
},
{
"BriefDescription": "This metric estimates fraction of slots the CPU retired uops delivered by the Microcode_Sequencer as a result of Assists",
- "MetricExpr": "100 * OTHER_ASSISTS.ANY_WB_ASSIST / tma_info_slots",
+ "MetricExpr": "100 * OTHER_ASSISTS.ANY_WB_ASSIST / tma_info_thread_slots",
"MetricGroup": "TopdownL4;tma_L4_group;tma_microcode_sequencer_group",
"MetricName": "tma_assists",
"MetricThreshold": "tma_assists > 0.1 & (tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1)",
@@ -109,7 +109,7 @@
},
{
"BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations",
- "MetricExpr": "(UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (INT_MISC.RECOVERY_CYCLES_ANY / 2 if #SMT_on else INT_MISC.RECOVERY_CYCLES)) / tma_info_slots",
+ "MetricExpr": "(UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (INT_MISC.RECOVERY_CYCLES_ANY / 2 if #SMT_on else INT_MISC.RECOVERY_CYCLES)) / tma_info_thread_slots",
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_bad_speculation",
"MetricThreshold": "tma_bad_speculation > 0.15",
@@ -125,12 +125,12 @@
"MetricName": "tma_branch_mispredicts",
"MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15",
"MetricgroupNoGroup": "TopdownL2",
- "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES. Related metrics: tma_info_branch_misprediction_cost, tma_mispredicts_resteers",
+ "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES. Related metrics: tma_info_bad_spec_branch_misprediction_cost, tma_mispredicts_resteers",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers",
- "MetricExpr": "12 * (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT + BACLEARS.ANY) / tma_info_clks",
+ "MetricExpr": "12 * (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT + BACLEARS.ANY) / tma_info_thread_clks",
"MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_branch_resteers",
"MetricThreshold": "tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -150,7 +150,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to contested accesses",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(60 * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.LLC_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_RETIRED.LLC_MISS))) + 43 * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.LLC_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_RETIRED.LLC_MISS)))) / tma_info_clks",
+ "MetricExpr": "(60 * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.LLC_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_RETIRED.LLC_MISS))) + 43 * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.LLC_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_RETIRED.LLC_MISS)))) / tma_info_thread_clks",
"MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
"MetricName": "tma_contested_accesses",
"MetricThreshold": "tma_contested_accesses > 0.05 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -171,7 +171,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to data-sharing accesses",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "43 * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.LLC_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_RETIRED.LLC_MISS))) / tma_info_clks",
+ "MetricExpr": "43 * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.LLC_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_RETIRED.LLC_MISS))) / tma_info_thread_clks",
"MetricGroup": "Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
"MetricName": "tma_data_sharing",
"MetricThreshold": "tma_data_sharing > 0.05 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -180,7 +180,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where the Divider unit was active",
- "MetricExpr": "ARITH.FPU_DIV_ACTIVE / tma_info_core_clks",
+ "MetricExpr": "ARITH.FPU_DIV_ACTIVE / tma_info_core_core_clks",
"MetricGroup": "TopdownL3;tma_L3_group;tma_core_bound_group",
"MetricName": "tma_divider",
"MetricThreshold": "tma_divider > 0.2 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
@@ -190,7 +190,7 @@
{
"BriefDescription": "This metric estimates how often the CPU was stalled on accesses to external memory (DRAM) by loads",
"MetricConstraint": "NO_GROUP_EVENTS_SMT",
- "MetricExpr": "(1 - MEM_LOAD_UOPS_RETIRED.LLC_HIT / (MEM_LOAD_UOPS_RETIRED.LLC_HIT + 7 * MEM_LOAD_UOPS_RETIRED.LLC_MISS)) * CYCLE_ACTIVITY.STALLS_L2_PENDING / tma_info_clks",
+ "MetricExpr": "(1 - MEM_LOAD_UOPS_RETIRED.LLC_HIT / (MEM_LOAD_UOPS_RETIRED.LLC_HIT + 7 * MEM_LOAD_UOPS_RETIRED.LLC_MISS)) * CYCLE_ACTIVITY.STALLS_L2_PENDING / tma_info_thread_clks",
"MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_dram_bound",
"MetricThreshold": "tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -199,25 +199,25 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline",
- "MetricExpr": "(IDQ.ALL_DSB_CYCLES_ANY_UOPS - IDQ.ALL_DSB_CYCLES_4_UOPS) / tma_info_core_clks / 2",
+ "MetricExpr": "(IDQ.ALL_DSB_CYCLES_ANY_UOPS - IDQ.ALL_DSB_CYCLES_4_UOPS) / tma_info_core_core_clks / 2",
"MetricGroup": "DSB;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_dsb",
- "MetricThreshold": "tma_dsb > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35)",
+ "MetricThreshold": "tma_dsb > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35)",
"PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline. For example; inefficient utilization of the DSB cache structure or bank conflict when reading from it; are categorized here.",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines",
- "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / tma_info_clks",
+ "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / tma_info_thread_clks",
"MetricGroup": "DSBmiss;FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB",
"MetricName": "tma_dsb_switches",
"MetricThreshold": "tma_dsb_switches > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
- "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines. The DSB (decoded i-cache) is a Uop Cache where the front-end directly delivers Uops (micro operations) avoiding heavy x86 decoding. The DSB pipeline has shorter latency and delivered higher bandwidth than the MITE (legacy instruction decode pipeline). Switching between the two pipelines can cause penalties hence this metric measures the exposed penalty. Related metrics: tma_fetch_bandwidth, tma_info_dsb_coverage, tma_info_iptb, tma_lcp",
+ "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines. The DSB (decoded i-cache) is a Uop Cache where the front-end directly delivers Uops (micro operations) avoiding heavy x86 decoding. The DSB pipeline has shorter latency and delivered higher bandwidth than the MITE (legacy instruction decode pipeline). Switching between the two pipelines can cause penalties hence this metric measures the exposed penalty. Related metrics: tma_fetch_bandwidth, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses",
- "MetricExpr": "(7 * DTLB_LOAD_MISSES.STLB_HIT + DTLB_LOAD_MISSES.WALK_DURATION) / tma_info_clks",
+ "MetricExpr": "(7 * DTLB_LOAD_MISSES.STLB_HIT + DTLB_LOAD_MISSES.WALK_DURATION) / tma_info_thread_clks",
"MetricGroup": "MemoryTLB;TopdownL4;tma_L4_group;tma_issueTLB;tma_l1_bound_group",
"MetricName": "tma_dtlb_load",
"MetricThreshold": "tma_dtlb_load > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -226,7 +226,7 @@
},
{
"BriefDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses",
- "MetricExpr": "(7 * DTLB_STORE_MISSES.STLB_HIT + DTLB_STORE_MISSES.WALK_DURATION) / tma_info_clks",
+ "MetricExpr": "(7 * DTLB_STORE_MISSES.STLB_HIT + DTLB_STORE_MISSES.WALK_DURATION) / tma_info_thread_clks",
"MetricGroup": "MemoryTLB;TopdownL4;tma_L4_group;tma_issueTLB;tma_store_bound_group",
"MetricName": "tma_dtlb_store",
"MetricThreshold": "tma_dtlb_store > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -235,7 +235,7 @@
},
{
"BriefDescription": "This metric roughly estimates how often CPU was handling synchronizations due to False Sharing",
- "MetricExpr": "60 * OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.HITM_OTHER_CORE / tma_info_clks",
+ "MetricExpr": "60 * OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.HITM_OTHER_CORE / tma_info_thread_clks",
"MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_store_bound_group",
"MetricName": "tma_false_sharing",
"MetricThreshold": "tma_false_sharing > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -245,11 +245,11 @@
{
"BriefDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "tma_info_load_miss_real_latency * cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ / tma_info_clks",
+ "MetricExpr": "tma_info_memory_load_miss_real_latency * cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ / tma_info_thread_clks",
"MetricGroup": "MemoryBW;TopdownL4;tma_L4_group;tma_issueBW;tma_issueSL;tma_issueSmSt;tma_l1_bound_group",
"MetricName": "tma_fb_full",
"MetricThreshold": "tma_fb_full > 0.3",
- "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_info_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
+ "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
"ScaleUnit": "100%"
},
{
@@ -257,14 +257,14 @@
"MetricExpr": "tma_frontend_bound - tma_fetch_latency",
"MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB",
"MetricName": "tma_fetch_bandwidth",
- "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35",
+ "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35",
"MetricgroupNoGroup": "TopdownL2",
- "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Related metrics: tma_dsb_switches, tma_info_dsb_coverage, tma_info_iptb, tma_lcp",
+ "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Related metrics: tma_dsb_switches, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues",
- "MetricExpr": "4 * min(CPU_CLK_UNHALTED.THREAD, IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE) / tma_info_slots",
+ "MetricExpr": "4 * min(CPU_CLK_UNHALTED.THREAD, IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE) / tma_info_thread_slots",
"MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group",
"MetricName": "tma_fetch_latency",
"MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15",
@@ -301,7 +301,7 @@
},
{
"BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend",
- "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / tma_info_slots",
+ "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / tma_info_thread_slots",
"MetricGroup": "PGO;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_frontend_bound",
"MetricThreshold": "tma_frontend_bound > 0.15",
@@ -321,358 +321,358 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to instruction cache misses.",
- "MetricExpr": "ICACHE.IFETCH_STALL / tma_info_clks - tma_itlb_misses",
+ "MetricExpr": "ICACHE.IFETCH_STALL / tma_info_thread_clks - tma_itlb_misses",
"MetricGroup": "BigFoot;FetchLat;IcMiss;TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_icache_misses",
"MetricThreshold": "tma_icache_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
"ScaleUnit": "100%"
},
{
- "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
- "MetricExpr": "tma_info_turbo_utilization * TSC / 1e9 / duration_time",
- "MetricGroup": "Power;Summary",
- "MetricName": "tma_info_average_frequency"
- },
- {
- "BriefDescription": "Branch instructions per taken branch.",
- "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
- "MetricGroup": "Branches;Fed;PGO",
- "MetricName": "tma_info_bptkbranch"
+ "BriefDescription": "Instructions per retired mispredicts for indirect CALL or JMP branches (lower number means higher occurrence rate).",
+ "MetricExpr": "tma_info_inst_mix_instructions / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * cpu@BR_MISP_EXEC.ALL_BRANCHES\\,umask\\=0xE4@)",
+ "MetricGroup": "Bad;BrMispredicts",
+ "MetricName": "tma_info_bad_spec_ipmisp_indirect",
+ "MetricThreshold": "tma_info_bad_spec_ipmisp_indirect < 1e3"
},
{
- "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
- "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
- "MetricGroup": "Pipeline",
- "MetricName": "tma_info_clks"
+ "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear) (lower number means higher occurrence rate)",
+ "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;BadSpec;BrMispredicts",
+ "MetricName": "tma_info_bad_spec_ipmispredict",
+ "MetricThreshold": "tma_info_bad_spec_ipmispredict < 200"
},
{
"BriefDescription": "Core actual clocks when any Logical Processor is active on the Physical Core",
- "MetricExpr": "(CPU_CLK_UNHALTED.THREAD / 2 * (1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK) if #core_wide < 1 else (CPU_CLK_UNHALTED.THREAD_ANY / 2 if #SMT_on else tma_info_clks))",
+ "MetricExpr": "(CPU_CLK_UNHALTED.THREAD / 2 * (1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK) if #core_wide < 1 else (CPU_CLK_UNHALTED.THREAD_ANY / 2 if #SMT_on else tma_info_thread_clks))",
"MetricGroup": "SMT",
- "MetricName": "tma_info_core_clks"
+ "MetricName": "tma_info_core_core_clks"
},
{
"BriefDescription": "Instructions Per Cycle across hyper-threads (per physical core)",
- "MetricExpr": "INST_RETIRED.ANY / tma_info_core_clks",
+ "MetricExpr": "INST_RETIRED.ANY / tma_info_core_core_clks",
"MetricGroup": "Ret;SMT;TmaL1;tma_L1_group",
- "MetricName": "tma_info_coreipc"
+ "MetricName": "tma_info_core_coreipc"
},
{
- "BriefDescription": "Cycles Per Instruction (per Logical Processor)",
- "MetricExpr": "1 / tma_info_ipc",
- "MetricGroup": "Mem;Pipeline",
- "MetricName": "tma_info_cpi"
- },
- {
- "BriefDescription": "Average CPU Utilization",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
- "MetricGroup": "HPC;Summary",
- "MetricName": "tma_info_cpu_utilization"
- },
- {
- "BriefDescription": "Average Parallel L2 cache miss data reads",
- "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
- "MetricGroup": "Memory_BW;Offcore",
- "MetricName": "tma_info_data_l2_mlp"
+ "BriefDescription": "Floating Point Operations Per Cycle",
+ "MetricExpr": "(FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * (FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE) + 8 * SIMD_FP_256.PACKED_SINGLE) / tma_info_core_core_clks",
+ "MetricGroup": "Flops;Ret",
+ "MetricName": "tma_info_core_flopc"
},
{
- "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
- "MetricExpr": "64 * (UNC_ARB_TRK_REQUESTS.ALL + UNC_ARB_COH_TRK_REQUESTS.ALL) / 1e6 / duration_time / 1e3",
- "MetricGroup": "HPC;Mem;MemoryBW;SoC;tma_issueBW",
- "MetricName": "tma_info_dram_bw_use",
- "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_fb_full, tma_mem_bandwidth, tma_sq_full"
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
+ "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
+ "MetricName": "tma_info_core_ilp"
},
{
"BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
"MetricExpr": "IDQ.DSB_UOPS / (IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)",
"MetricGroup": "DSB;Fed;FetchBW;tma_issueFB",
- "MetricName": "tma_info_dsb_coverage",
- "MetricThreshold": "tma_info_dsb_coverage < 0.7 & tma_info_ipc / 4 > 0.35",
- "PublicDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache). Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_iptb, tma_lcp"
- },
- {
- "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-thread",
- "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
- "MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
- "MetricName": "tma_info_execute"
- },
- {
- "BriefDescription": "The ratio of Executed- by Issued-Uops",
- "MetricExpr": "UOPS_EXECUTED.THREAD / UOPS_ISSUED.ANY",
- "MetricGroup": "Cor;Pipeline",
- "MetricName": "tma_info_execute_per_issue",
- "PublicDescription": "The ratio of Executed- by Issued-Uops. Ratio > 1 suggests high rate of uop micro-fusions. Ratio < 1 suggest high rate of \"execute\" at rename stage."
- },
- {
- "BriefDescription": "Floating Point Operations Per Cycle",
- "MetricExpr": "(FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * (FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE) + 8 * SIMD_FP_256.PACKED_SINGLE) / tma_info_core_clks",
- "MetricGroup": "Flops;Ret",
- "MetricName": "tma_info_flopc"
+ "MetricName": "tma_info_frontend_dsb_coverage",
+ "MetricThreshold": "tma_info_frontend_dsb_coverage < 0.7 & tma_info_thread_ipc / 4 > 0.35",
+ "PublicDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache). Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_inst_mix_iptb, tma_lcp"
},
{
- "BriefDescription": "Giga Floating Point Operations Per Second",
- "MetricExpr": "(FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * (FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE) + 8 * SIMD_FP_256.PACKED_SINGLE) / 1e9 / duration_time",
- "MetricGroup": "Cor;Flops;HPC",
- "MetricName": "tma_info_gflops",
- "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width and AMX engine."
+ "BriefDescription": "Instructions per speculative Unknown Branch Misprediction (BAClear) (lower number means higher occurrence rate)",
+ "MetricExpr": "tma_info_inst_mix_instructions / BACLEARS.ANY",
+ "MetricGroup": "Fed",
+ "MetricName": "tma_info_frontend_ipunknown_branch"
},
{
- "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
- "MetricExpr": "UOPS_EXECUTED.THREAD / (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
- "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
- "MetricName": "tma_info_ilp"
+ "BriefDescription": "Branch instructions per taken branch.",
+ "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
+ "MetricGroup": "Branches;Fed;PGO",
+ "MetricName": "tma_info_inst_mix_bptkbranch"
},
{
"BriefDescription": "Total number of retired Instructions",
"MetricExpr": "INST_RETIRED.ANY",
"MetricGroup": "Summary;TmaL1;tma_L1_group",
- "MetricName": "tma_info_instructions",
+ "MetricName": "tma_info_inst_mix_instructions",
"PublicDescription": "Total number of retired Instructions. Sample with: INST_RETIRED.PREC_DIST"
},
{
"BriefDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate)",
"MetricExpr": "1 / (tma_fp_scalar + tma_fp_vector)",
"MetricGroup": "Flops;InsType",
- "MetricName": "tma_info_iparith",
- "MetricThreshold": "tma_info_iparith < 10",
+ "MetricName": "tma_info_inst_mix_iparith",
+ "MetricThreshold": "tma_info_inst_mix_iparith < 10",
"PublicDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate). May undercount due to FMA double counting. Approximated prior to BDW."
},
{
"BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
"MetricGroup": "Branches;Fed;InsType",
- "MetricName": "tma_info_ipbranch",
- "MetricThreshold": "tma_info_ipbranch < 8"
- },
- {
- "BriefDescription": "Instructions Per Cycle (per Logical Processor)",
- "MetricExpr": "INST_RETIRED.ANY / tma_info_clks",
- "MetricGroup": "Ret;Summary",
- "MetricName": "tma_info_ipc"
+ "MetricName": "tma_info_inst_mix_ipbranch",
+ "MetricThreshold": "tma_info_inst_mix_ipbranch < 8"
},
{
"BriefDescription": "Instructions per (near) call (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL",
"MetricGroup": "Branches;Fed;PGO",
- "MetricName": "tma_info_ipcall",
- "MetricThreshold": "tma_info_ipcall < 200"
- },
- {
- "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
- "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u",
- "MetricGroup": "Branches;OS",
- "MetricName": "tma_info_ipfarbranch",
- "MetricThreshold": "tma_info_ipfarbranch < 1e6"
+ "MetricName": "tma_info_inst_mix_ipcall",
+ "MetricThreshold": "tma_info_inst_mix_ipcall < 200"
},
{
"BriefDescription": "Instructions per Load (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_LOADS",
"MetricGroup": "InsType",
- "MetricName": "tma_info_ipload",
- "MetricThreshold": "tma_info_ipload < 3"
- },
- {
- "BriefDescription": "Instructions per retired mispredicts for indirect CALL or JMP branches (lower number means higher occurrence rate).",
- "MetricExpr": "tma_info_instructions / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * cpu@BR_MISP_EXEC.ALL_BRANCHES\\,umask\\=0xE4@)",
- "MetricGroup": "Bad;BrMispredicts",
- "MetricName": "tma_info_ipmisp_indirect",
- "MetricThreshold": "tma_info_ipmisp_indirect < 1e3"
- },
- {
- "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear) (lower number means higher occurrence rate)",
- "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
- "MetricGroup": "Bad;BadSpec;BrMispredicts",
- "MetricName": "tma_info_ipmispredict",
- "MetricThreshold": "tma_info_ipmispredict < 200"
+ "MetricName": "tma_info_inst_mix_ipload",
+ "MetricThreshold": "tma_info_inst_mix_ipload < 3"
},
{
"BriefDescription": "Instructions per Store (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_STORES",
"MetricGroup": "InsType",
- "MetricName": "tma_info_ipstore",
- "MetricThreshold": "tma_info_ipstore < 8"
+ "MetricName": "tma_info_inst_mix_ipstore",
+ "MetricThreshold": "tma_info_inst_mix_ipstore < 8"
},
{
"BriefDescription": "Instruction per taken branch",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN",
"MetricGroup": "Branches;Fed;FetchBW;Frontend;PGO;tma_issueFB",
- "MetricName": "tma_info_iptb",
- "MetricThreshold": "tma_info_iptb < 9",
- "PublicDescription": "Instruction per taken branch. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_dsb_coverage, tma_lcp"
- },
- {
- "BriefDescription": "Instructions per speculative Unknown Branch Misprediction (BAClear) (lower number means higher occurrence rate)",
- "MetricExpr": "tma_info_instructions / BACLEARS.ANY",
- "MetricGroup": "Fed",
- "MetricName": "tma_info_ipunknown_branch"
- },
- {
- "BriefDescription": "Cycles Per Instruction for the Operating System (OS) Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / INST_RETIRED.ANY_P:k",
- "MetricGroup": "OS",
- "MetricName": "tma_info_kernel_cpi"
- },
- {
- "BriefDescription": "Fraction of cycles spent in the Operating System (OS) Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / CPU_CLK_UNHALTED.THREAD",
- "MetricGroup": "OS",
- "MetricName": "tma_info_kernel_utilization",
- "MetricThreshold": "tma_info_kernel_utilization > 0.05"
+ "MetricName": "tma_info_inst_mix_iptb",
+ "MetricThreshold": "tma_info_inst_mix_iptb < 9",
+ "PublicDescription": "Instruction per taken branch. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_frontend_dsb_coverage, tma_lcp"
},
{
"BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
"MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / duration_time",
"MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l1d_cache_fill_bw"
- },
- {
- "BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
- "MetricExpr": "tma_info_l1d_cache_fill_bw",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l1d_cache_fill_bw_1t"
- },
- {
- "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
- "MetricExpr": "1e3 * MEM_LOAD_UOPS_RETIRED.L1_MISS / INST_RETIRED.ANY",
- "MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l1mpki"
+ "MetricName": "tma_info_memory_core_l1d_cache_fill_bw"
},
{
"BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
"MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / duration_time",
"MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l2_cache_fill_bw"
+ "MetricName": "tma_info_memory_core_l2_cache_fill_bw"
},
{
- "BriefDescription": "Average per-thread data fill bandwidth to the L2 cache [GB / sec]",
- "MetricExpr": "tma_info_l2_cache_fill_bw",
+ "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
"MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l2_cache_fill_bw_1t"
+ "MetricName": "tma_info_memory_core_l3_cache_fill_bw"
+ },
+ {
+ "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
+ "MetricExpr": "1e3 * MEM_LOAD_UOPS_RETIRED.L1_MISS / INST_RETIRED.ANY",
+ "MetricGroup": "CacheMisses;Mem",
+ "MetricName": "tma_info_memory_l1mpki"
},
{
"BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads",
"MetricExpr": "1e3 * MEM_LOAD_UOPS_RETIRED.L2_MISS / INST_RETIRED.ANY",
"MetricGroup": "Backend;CacheMisses;Mem",
- "MetricName": "tma_info_l2mpki"
+ "MetricName": "tma_info_memory_l2mpki"
},
{
- "BriefDescription": "Average per-thread data access bandwidth to the L3 cache [GB / sec]",
- "MetricExpr": "0",
- "MetricGroup": "Mem;MemoryBW;Offcore",
- "MetricName": "tma_info_l3_cache_access_bw_1t"
+ "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
+ "MetricExpr": "1e3 * MEM_LOAD_UOPS_RETIRED.LLC_MISS / INST_RETIRED.ANY",
+ "MetricGroup": "CacheMisses;Mem",
+ "MetricName": "tma_info_memory_l3mpki"
},
{
- "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
- "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l3_cache_fill_bw"
+ "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / (MEM_LOAD_UOPS_RETIRED.L1_MISS + MEM_LOAD_UOPS_RETIRED.HIT_LFB)",
+ "MetricGroup": "Mem;MemoryBound;MemoryLat",
+ "MetricName": "tma_info_memory_load_miss_real_latency"
},
{
- "BriefDescription": "Average per-thread data fill bandwidth to the L3 cache [GB / sec]",
- "MetricExpr": "tma_info_l3_cache_fill_bw",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l3_cache_fill_bw_1t"
+ "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
+ "MetricGroup": "Mem;MemoryBW;MemoryBound",
+ "MetricName": "tma_info_memory_mlp",
+ "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)"
},
{
- "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
- "MetricExpr": "1e3 * MEM_LOAD_UOPS_RETIRED.LLC_MISS / INST_RETIRED.ANY",
- "MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l3mpki"
+ "BriefDescription": "Average Parallel L2 cache miss data reads",
+ "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+ "MetricGroup": "Memory_BW;Offcore",
+ "MetricName": "tma_info_memory_oro_data_l2_mlp"
},
{
"BriefDescription": "Average Latency for L2 cache miss demand Loads",
"MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS.DEMAND_DATA_RD",
"MetricGroup": "Memory_Lat;Offcore",
- "MetricName": "tma_info_load_l2_miss_latency"
+ "MetricName": "tma_info_memory_oro_load_l2_miss_latency"
},
{
"BriefDescription": "Average Parallel L2 cache miss demand Loads",
"MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
"MetricGroup": "Memory_BW;Offcore",
- "MetricName": "tma_info_load_l2_mlp"
+ "MetricName": "tma_info_memory_oro_load_l2_mlp"
},
{
- "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
- "MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "L1D_PEND_MISS.PENDING / (MEM_LOAD_UOPS_RETIRED.L1_MISS + MEM_LOAD_UOPS_RETIRED.HIT_LFB)",
- "MetricGroup": "Mem;MemoryBound;MemoryLat",
- "MetricName": "tma_info_load_miss_real_latency"
+ "BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_core_l1d_cache_fill_bw",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_thread_l1d_cache_fill_bw_1t"
},
{
- "BriefDescription": "Average number of parallel requests to external memory",
- "MetricExpr": "UNC_ARB_TRK_OCCUPANCY.ALL / UNC_ARB_TRK_OCCUPANCY.CYCLES_WITH_ANY_REQUEST",
- "MetricGroup": "Mem;SoC",
- "MetricName": "tma_info_mem_parallel_requests",
- "PublicDescription": "Average number of parallel requests to external memory. Accounts for all requests"
+ "BriefDescription": "Average per-thread data fill bandwidth to the L2 cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_core_l2_cache_fill_bw",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_thread_l2_cache_fill_bw_1t"
},
{
- "BriefDescription": "Average latency of all requests to external memory (in Uncore cycles)",
- "MetricExpr": "UNC_ARB_TRK_OCCUPANCY.ALL / UNC_ARB_TRK_REQUESTS.ALL",
- "MetricGroup": "Mem;SoC",
- "MetricName": "tma_info_mem_request_latency"
+ "BriefDescription": "Average per-thread data access bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "0",
+ "MetricGroup": "Mem;MemoryBW;Offcore",
+ "MetricName": "tma_info_memory_thread_l3_cache_access_bw_1t"
},
{
- "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
- "MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
- "MetricGroup": "Mem;MemoryBW;MemoryBound",
- "MetricName": "tma_info_mlp",
- "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)"
+ "BriefDescription": "Average per-thread data fill bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_core_l3_cache_fill_bw",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_thread_l3_cache_fill_bw_1t"
},
{
"BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
- "MetricExpr": "(ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION) / tma_info_core_clks",
+ "MetricExpr": "(ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION) / tma_info_core_core_clks",
"MetricGroup": "Mem;MemoryTLB",
- "MetricName": "tma_info_page_walks_utilization",
- "MetricThreshold": "tma_info_page_walks_utilization > 0.5"
+ "MetricName": "tma_info_memory_tlb_page_walks_utilization",
+ "MetricThreshold": "tma_info_memory_tlb_page_walks_utilization > 0.5"
+ },
+ {
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-thread",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
+ "MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
+ "MetricName": "tma_info_pipeline_execute"
},
{
"BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
"MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / cpu@UOPS_RETIRED.RETIRE_SLOTS\\,cmask\\=1@",
"MetricGroup": "Pipeline;Ret",
- "MetricName": "tma_info_retire"
+ "MetricName": "tma_info_pipeline_retire"
},
{
- "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
- "MetricExpr": "4 * tma_info_core_clks",
- "MetricGroup": "TmaL1;tma_L1_group",
- "MetricName": "tma_info_slots"
+ "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
+ "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / duration_time",
+ "MetricGroup": "Power;Summary",
+ "MetricName": "tma_info_system_average_frequency"
+ },
+ {
+ "BriefDescription": "Average CPU Utilization",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricGroup": "HPC;Summary",
+ "MetricName": "tma_info_system_cpu_utilization"
+ },
+ {
+ "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
+ "MetricExpr": "64 * (UNC_ARB_TRK_REQUESTS.ALL + UNC_ARB_COH_TRK_REQUESTS.ALL) / 1e6 / duration_time / 1e3",
+ "MetricGroup": "HPC;Mem;MemoryBW;SoC;tma_issueBW",
+ "MetricName": "tma_info_system_dram_bw_use",
+ "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_fb_full, tma_mem_bandwidth, tma_sq_full"
+ },
+ {
+ "BriefDescription": "Giga Floating Point Operations Per Second",
+ "MetricExpr": "(FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * (FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE) + 8 * SIMD_FP_256.PACKED_SINGLE) / 1e9 / duration_time",
+ "MetricGroup": "Cor;Flops;HPC",
+ "MetricName": "tma_info_system_gflops",
+ "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width and AMX engine."
+ },
+ {
+ "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
+ "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u",
+ "MetricGroup": "Branches;OS",
+ "MetricName": "tma_info_system_ipfarbranch",
+ "MetricThreshold": "tma_info_system_ipfarbranch < 1e6"
+ },
+ {
+ "BriefDescription": "Cycles Per Instruction for the Operating System (OS) Kernel mode",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / INST_RETIRED.ANY_P:k",
+ "MetricGroup": "OS",
+ "MetricName": "tma_info_system_kernel_cpi"
+ },
+ {
+ "BriefDescription": "Fraction of cycles spent in the Operating System (OS) Kernel mode",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "OS",
+ "MetricName": "tma_info_system_kernel_utilization",
+ "MetricThreshold": "tma_info_system_kernel_utilization > 0.05"
+ },
+ {
+ "BriefDescription": "Average number of parallel requests to external memory",
+ "MetricExpr": "UNC_ARB_TRK_OCCUPANCY.ALL / UNC_ARB_TRK_OCCUPANCY.CYCLES_WITH_ANY_REQUEST",
+ "MetricGroup": "Mem;SoC",
+ "MetricName": "tma_info_system_mem_parallel_requests",
+ "PublicDescription": "Average number of parallel requests to external memory. Accounts for all requests"
+ },
+ {
+ "BriefDescription": "Average latency of all requests to external memory (in Uncore cycles)",
+ "MetricExpr": "UNC_ARB_TRK_OCCUPANCY.ALL / UNC_ARB_TRK_REQUESTS.ALL",
+ "MetricGroup": "Mem;SoC",
+ "MetricName": "tma_info_system_mem_request_latency"
},
{
"BriefDescription": "Fraction of cycles where both hardware Logical Processors were active",
"MetricExpr": "(1 - CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / (CPU_CLK_UNHALTED.REF_XCLK_ANY / 2) if #SMT_on else 0)",
"MetricGroup": "SMT",
- "MetricName": "tma_info_smt_2t_utilization"
+ "MetricName": "tma_info_system_smt_2t_utilization"
},
{
"BriefDescription": "Socket actual clocks when any core is active on that socket",
"MetricExpr": "UNC_CLOCK.SOCKET",
"MetricGroup": "SoC",
- "MetricName": "tma_info_socket_clks"
+ "MetricName": "tma_info_system_socket_clks"
},
{
"BriefDescription": "Average Frequency Utilization relative nominal frequency",
- "MetricExpr": "tma_info_clks / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricExpr": "tma_info_thread_clks / CPU_CLK_UNHALTED.REF_TSC",
"MetricGroup": "Power",
- "MetricName": "tma_info_turbo_utilization"
+ "MetricName": "tma_info_system_turbo_utilization"
+ },
+ {
+ "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "Pipeline",
+ "MetricName": "tma_info_thread_clks"
+ },
+ {
+ "BriefDescription": "Cycles Per Instruction (per Logical Processor)",
+ "MetricExpr": "1 / tma_info_thread_ipc",
+ "MetricGroup": "Mem;Pipeline",
+ "MetricName": "tma_info_thread_cpi"
+ },
+ {
+ "BriefDescription": "The ratio of Executed- by Issued-Uops",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / UOPS_ISSUED.ANY",
+ "MetricGroup": "Cor;Pipeline",
+ "MetricName": "tma_info_thread_execute_per_issue",
+ "PublicDescription": "The ratio of Executed- by Issued-Uops. Ratio > 1 suggests high rate of uop micro-fusions. Ratio < 1 suggest high rate of \"execute\" at rename stage."
+ },
+ {
+ "BriefDescription": "Instructions Per Cycle (per Logical Processor)",
+ "MetricExpr": "INST_RETIRED.ANY / tma_info_thread_clks",
+ "MetricGroup": "Ret;Summary",
+ "MetricName": "tma_info_thread_ipc"
+ },
+ {
+ "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
+ "MetricExpr": "4 * tma_info_core_core_clks",
+ "MetricGroup": "TmaL1;tma_L1_group",
+ "MetricName": "tma_info_thread_slots"
},
{
"BriefDescription": "Uops Per Instruction",
"MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
"MetricGroup": "Pipeline;Ret;Retire",
- "MetricName": "tma_info_uoppi",
- "MetricThreshold": "tma_info_uoppi > 1.05"
+ "MetricName": "tma_info_thread_uoppi",
+ "MetricThreshold": "tma_info_thread_uoppi > 1.05"
},
{
"BriefDescription": "Instruction per taken branch",
"MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / BR_INST_RETIRED.NEAR_TAKEN",
"MetricGroup": "Branches;Fed;FetchBW",
- "MetricName": "tma_info_uptb",
- "MetricThreshold": "tma_info_uptb < 6"
+ "MetricName": "tma_info_thread_uptb",
+ "MetricThreshold": "tma_info_thread_uptb < 6"
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses",
- "MetricExpr": "(12 * ITLB_MISSES.STLB_HIT + ITLB_MISSES.WALK_DURATION) / tma_info_clks",
+ "MetricExpr": "(12 * ITLB_MISSES.STLB_HIT + ITLB_MISSES.WALK_DURATION) / tma_info_thread_clks",
"MetricGroup": "BigFoot;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_itlb_misses",
"MetricThreshold": "tma_itlb_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -681,7 +681,7 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled without loads missing the L1 data cache",
- "MetricExpr": "max((min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.STALLS_LDM_PENDING) - CYCLE_ACTIVITY.STALLS_L1D_PENDING) / tma_info_clks, 0)",
+ "MetricExpr": "max((min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.STALLS_LDM_PENDING) - CYCLE_ACTIVITY.STALLS_L1D_PENDING) / tma_info_thread_clks, 0)",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_issueL1;tma_issueMC;tma_memory_bound_group",
"MetricName": "tma_l1_bound",
"MetricThreshold": "tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -690,7 +690,7 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads",
- "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L1D_PENDING - CYCLE_ACTIVITY.STALLS_L2_PENDING) / tma_info_clks",
+ "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L1D_PENDING - CYCLE_ACTIVITY.STALLS_L2_PENDING) / tma_info_thread_clks",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l2_bound",
"MetricThreshold": "tma_l2_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -700,7 +700,7 @@
{
"BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
"MetricConstraint": "NO_GROUP_EVENTS_SMT",
- "MetricExpr": "MEM_LOAD_UOPS_RETIRED.LLC_HIT / (MEM_LOAD_UOPS_RETIRED.LLC_HIT + 7 * MEM_LOAD_UOPS_RETIRED.LLC_MISS) * CYCLE_ACTIVITY.STALLS_L2_PENDING / tma_info_clks",
+ "MetricExpr": "MEM_LOAD_UOPS_RETIRED.LLC_HIT / (MEM_LOAD_UOPS_RETIRED.LLC_HIT + 7 * MEM_LOAD_UOPS_RETIRED.LLC_MISS) * CYCLE_ACTIVITY.STALLS_L2_PENDING / tma_info_thread_clks",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l3_bound",
"MetricThreshold": "tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -710,7 +710,7 @@
{
"BriefDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "29 * (MEM_LOAD_UOPS_RETIRED.LLC_HIT * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.LLC_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_RETIRED.LLC_MISS))) / tma_info_clks",
+ "MetricExpr": "29 * (MEM_LOAD_UOPS_RETIRED.LLC_HIT * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.LLC_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_RETIRED.LLC_MISS))) / tma_info_thread_clks",
"MetricGroup": "MemoryLat;TopdownL4;tma_L4_group;tma_issueLat;tma_l3_bound_group",
"MetricName": "tma_l3_hit_latency",
"MetricThreshold": "tma_l3_hit_latency > 0.1 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -719,11 +719,11 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs)",
- "MetricExpr": "ILD_STALL.LCP / tma_info_clks",
+ "MetricExpr": "ILD_STALL.LCP / tma_info_thread_clks",
"MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB",
"MetricName": "tma_lcp",
"MetricThreshold": "tma_lcp > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
- "PublicDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs). Using proper compiler flags or Intel Compiler by default will certainly avoid this. #Link: Optimization Guide about LCP BKMs. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_dsb_coverage, tma_info_iptb",
+ "PublicDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs). Using proper compiler flags or Intel Compiler by default will certainly avoid this. #Link: Optimization Guide about LCP BKMs. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb",
"ScaleUnit": "100%"
},
{
@@ -739,7 +739,7 @@
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Load operations",
"MetricConstraint": "NO_GROUP_EVENTS_NMI",
- "MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_2 + UOPS_DISPATCHED_PORT.PORT_3 - UOPS_DISPATCHED_PORT.PORT_4) / (2 * tma_info_core_clks)",
+ "MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_2 + UOPS_DISPATCHED_PORT.PORT_3 - UOPS_DISPATCHED_PORT.PORT_4) / (2 * tma_info_core_core_clks)",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_load_op_utilization",
"MetricThreshold": "tma_load_op_utilization > 0.6",
@@ -749,7 +749,7 @@
{
"BriefDescription": "This metric represents fraction of cycles the CPU spent handling cache misses due to lock operations",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "MEM_UOPS_RETIRED.LOCK_LOADS / MEM_UOPS_RETIRED.ALL_STORES * min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO) / tma_info_clks",
+ "MetricExpr": "MEM_UOPS_RETIRED.LOCK_LOADS / MEM_UOPS_RETIRED.ALL_STORES * min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO) / tma_info_thread_clks",
"MetricGroup": "Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_l1_bound_group",
"MetricName": "tma_lock_latency",
"MetricThreshold": "tma_lock_latency > 0.2 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -769,16 +769,16 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM)",
- "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=6@) / tma_info_clks",
+ "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=6@) / tma_info_thread_clks",
"MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
"MetricName": "tma_mem_bandwidth",
"MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_dram_bw_use, tma_sq_full",
+ "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM)",
- "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD) / tma_info_clks - tma_mem_bandwidth",
+ "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD) / tma_info_thread_clks - tma_mem_bandwidth",
"MetricGroup": "MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
"MetricName": "tma_mem_latency",
"MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -788,7 +788,7 @@
{
"BriefDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.STALLS_LDM_PENDING) + RESOURCE_STALLS.SB) / (min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) + UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC - (UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC if tma_info_ipc > 1.8 else UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB) * tma_backend_bound",
+ "MetricExpr": "(min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.STALLS_LDM_PENDING) + RESOURCE_STALLS.SB) / (min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) + UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC - (UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC if tma_info_thread_ipc > 1.8 else UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB) * tma_backend_bound",
"MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_memory_bound",
"MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2",
@@ -798,7 +798,7 @@
},
{
"BriefDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit",
- "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * IDQ.MS_UOPS / tma_info_slots",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * IDQ.MS_UOPS / tma_info_thread_slots",
"MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_heavy_operations_group;tma_issueMC;tma_issueMS",
"MetricName": "tma_microcode_sequencer",
"MetricThreshold": "tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1",
@@ -807,16 +807,16 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline)",
- "MetricExpr": "(IDQ.ALL_MITE_CYCLES_ANY_UOPS - IDQ.ALL_MITE_CYCLES_4_UOPS) / tma_info_core_clks / 2",
+ "MetricExpr": "(IDQ.ALL_MITE_CYCLES_ANY_UOPS - IDQ.ALL_MITE_CYCLES_4_UOPS) / tma_info_core_core_clks / 2",
"MetricGroup": "DSBmiss;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_mite",
- "MetricThreshold": "tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35)",
+ "MetricThreshold": "tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35)",
"PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline). This pipeline is used for code that was not pre-cached in the DSB or LSD. For example; inefficiencies due to asymmetric decoders; use of long immediate or LCP can manifest as MITE fetch bandwidth bottleneck.",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric estimates the fraction of cycles when the CPU was stalled due to switches of uop delivery to the Microcode Sequencer (MS)",
- "MetricExpr": "3 * IDQ.MS_SWITCHES / tma_info_clks",
+ "MetricExpr": "3 * IDQ.MS_SWITCHES / tma_info_thread_clks",
"MetricGroup": "FetchLat;MicroSeq;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueMC;tma_issueMS;tma_issueMV;tma_issueSO",
"MetricName": "tma_ms_switches",
"MetricThreshold": "tma_ms_switches > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -825,7 +825,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 0 ([SNB+] ALU; [HSW+] ALU and 2nd branch)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_0 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_0 / tma_info_core_core_clks",
"MetricGroup": "Compute;TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_0",
"MetricThreshold": "tma_port_0 > 0.6",
@@ -834,7 +834,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 1 (ALU)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_1 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_1 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_1",
"MetricThreshold": "tma_port_1 > 0.6",
@@ -843,7 +843,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 2 ([SNB+]Loads and Store-address; [ICL+] Loads)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_2 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_2 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_load_op_utilization_group",
"MetricName": "tma_port_2",
"MetricThreshold": "tma_port_2 > 0.6",
@@ -852,7 +852,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 3 ([SNB+]Loads and Store-address; [ICL+] Loads)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_3 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_3 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_load_op_utilization_group",
"MetricName": "tma_port_3",
"MetricThreshold": "tma_port_3 > 0.6",
@@ -870,7 +870,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 5 ([SNB+] Branches and ALU; [HSW+] ALU)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_5 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_5 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_5",
"MetricThreshold": "tma_port_5 > 0.6",
@@ -880,7 +880,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles the CPU performance was potentially limited due to Core computation issues (non divider-related)",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) + UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC - (UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC if tma_info_ipc > 1.8 else UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB - RESOURCE_STALLS.SB - min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.STALLS_LDM_PENDING)) / tma_info_clks",
+ "MetricExpr": "(min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) + UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC - (UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC if tma_info_thread_ipc > 1.8 else UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB - RESOURCE_STALLS.SB - min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.STALLS_LDM_PENDING)) / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group",
"MetricName": "tma_ports_utilization",
"MetricThreshold": "tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
@@ -889,7 +889,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed no uops on any execution port (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,inv\\,cmask\\=1@ / 2 if #SMT_on else (min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0)) / tma_info_core_clks)",
+ "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,inv\\,cmask\\=1@ / 2 if #SMT_on else (min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0)) / tma_info_core_core_clks)",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_0",
"MetricThreshold": "tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -898,7 +898,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where the CPU executed total of 1 uop per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) / 2 if #SMT_on else (UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC - UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC) / tma_info_core_clks)",
+ "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) / 2 if #SMT_on else (UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC - UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC) / tma_info_core_core_clks)",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issueL1;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_1",
"MetricThreshold": "tma_ports_utilized_1 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -907,7 +907,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / 2 if #SMT_on else (UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC - UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC) / tma_info_core_clks)",
+ "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / 2 if #SMT_on else (UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC - UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC) / tma_info_core_core_clks)",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_2",
"MetricThreshold": "tma_ports_utilized_2 > 0.15 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -916,7 +916,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 3 or more uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise).",
- "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC) / tma_info_core_clks",
+ "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_3m",
"MetricThreshold": "tma_ports_utilized_3m > 0.7 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -924,7 +924,7 @@
},
{
"BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
- "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / tma_info_slots",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / tma_info_thread_slots",
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1",
@@ -935,7 +935,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles handling memory load split accesses - load that cross 64-byte cache line boundary",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "13 * LD_BLOCKS.NO_SR / tma_info_clks",
+ "MetricExpr": "13 * LD_BLOCKS.NO_SR / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_split_loads",
"MetricThreshold": "tma_split_loads > 0.2 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -944,7 +944,7 @@
},
{
"BriefDescription": "This metric represents rate of split store accesses",
- "MetricExpr": "2 * MEM_UOPS_RETIRED.SPLIT_STORES / tma_info_core_clks",
+ "MetricExpr": "2 * MEM_UOPS_RETIRED.SPLIT_STORES / tma_info_core_core_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group",
"MetricName": "tma_split_stores",
"MetricThreshold": "tma_split_stores > 0.2 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -953,16 +953,16 @@
},
{
"BriefDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors)",
- "MetricExpr": "(OFFCORE_REQUESTS_BUFFER.SQ_FULL / 2 if #SMT_on else OFFCORE_REQUESTS_BUFFER.SQ_FULL) / tma_info_core_clks",
+ "MetricExpr": "(OFFCORE_REQUESTS_BUFFER.SQ_FULL / 2 if #SMT_on else OFFCORE_REQUESTS_BUFFER.SQ_FULL) / tma_info_core_core_clks",
"MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueBW;tma_l3_bound_group",
"MetricName": "tma_sq_full",
"MetricThreshold": "tma_sq_full > 0.3 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_fb_full, tma_info_dram_bw_use, tma_mem_bandwidth",
+ "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric estimates how often CPU was stalled due to RFO store memory accesses; RFO store issue a read-for-ownership request before the write",
- "MetricExpr": "RESOURCE_STALLS.SB / tma_info_clks",
+ "MetricExpr": "RESOURCE_STALLS.SB / tma_info_thread_clks",
"MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_store_bound",
"MetricThreshold": "tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -971,7 +971,7 @@
},
{
"BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores",
- "MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_clks",
+ "MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_store_fwd_blk",
"MetricThreshold": "tma_store_fwd_blk > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -981,7 +981,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles the CPU spent handling L1D store misses",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(L2_RQSTS.RFO_HIT * 9 * (1 - MEM_UOPS_RETIRED.LOCK_LOADS / MEM_UOPS_RETIRED.ALL_STORES) + (1 - MEM_UOPS_RETIRED.LOCK_LOADS / MEM_UOPS_RETIRED.ALL_STORES) * min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO)) / tma_info_clks",
+ "MetricExpr": "(L2_RQSTS.RFO_HIT * 9 * (1 - MEM_UOPS_RETIRED.LOCK_LOADS / MEM_UOPS_RETIRED.ALL_STORES) + (1 - MEM_UOPS_RETIRED.LOCK_LOADS / MEM_UOPS_RETIRED.ALL_STORES) * min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO)) / tma_info_thread_clks",
"MetricGroup": "MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_issueSL;tma_store_bound_group",
"MetricName": "tma_store_latency",
"MetricThreshold": "tma_store_latency > 0.1 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -990,7 +990,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Store operations",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_4 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_4 / tma_info_core_core_clks",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_store_op_utilization",
"MetricThreshold": "tma_store_op_utilization > 0.6",
diff --git a/tools/perf/pmu-events/arch/x86/ivybridge/metricgroups.json b/tools/perf/pmu-events/arch/x86/ivybridge/metricgroups.json
new file mode 100644
index 000000000000..f6a0258e3241
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/ivybridge/metricgroups.json
@@ -0,0 +1,107 @@
+{
+ "Backend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Bad": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BadSpec": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BigFoot": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BrMispredicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Branches": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "CacheMisses": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Compute": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Cor": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "DSB": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "DSBmiss": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "DataSharing": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Fed": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FetchBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FetchLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Flops": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FpScalar": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FpVector": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Frontend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "HPC": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "IcMiss": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "InsType": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "L2Evicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "LSD": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MachineClears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryBound": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryTLB": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Memory_BW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Memory_Lat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MicroSeq": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "OS": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Offcore": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "PGO": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Pipeline": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "PortsUtil": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Power": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Ret": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Retire": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "SMT": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Server": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Snoop": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "SoC": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Summary": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TmaL1": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TmaL2": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TmaL3mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TopdownL1": "Metrics for top-down breakdown at level 1",
+ "TopdownL2": "Metrics for top-down breakdown at level 2",
+ "TopdownL3": "Metrics for top-down breakdown at level 3",
+ "TopdownL4": "Metrics for top-down breakdown at level 4",
+ "TopdownL5": "Metrics for top-down breakdown at level 5",
+ "TopdownL6": "Metrics for top-down breakdown at level 6",
+ "tma_L1_group": "Metrics for top-down breakdown at level 1",
+ "tma_L2_group": "Metrics for top-down breakdown at level 2",
+ "tma_L3_group": "Metrics for top-down breakdown at level 3",
+ "tma_L4_group": "Metrics for top-down breakdown at level 4",
+ "tma_L5_group": "Metrics for top-down breakdown at level 5",
+ "tma_L6_group": "Metrics for top-down breakdown at level 6",
+ "tma_alu_op_utilization_group": "Metrics contributing to tma_alu_op_utilization category",
+ "tma_backend_bound_group": "Metrics contributing to tma_backend_bound category",
+ "tma_bad_speculation_group": "Metrics contributing to tma_bad_speculation category",
+ "tma_branch_resteers_group": "Metrics contributing to tma_branch_resteers category",
+ "tma_core_bound_group": "Metrics contributing to tma_core_bound category",
+ "tma_dram_bound_group": "Metrics contributing to tma_dram_bound category",
+ "tma_dtlb_load_group": "Metrics contributing to tma_dtlb_load category",
+ "tma_dtlb_store_group": "Metrics contributing to tma_dtlb_store category",
+ "tma_fetch_bandwidth_group": "Metrics contributing to tma_fetch_bandwidth category",
+ "tma_fetch_latency_group": "Metrics contributing to tma_fetch_latency category",
+ "tma_fp_arith_group": "Metrics contributing to tma_fp_arith category",
+ "tma_fp_vector_group": "Metrics contributing to tma_fp_vector category",
+ "tma_frontend_bound_group": "Metrics contributing to tma_frontend_bound category",
+ "tma_heavy_operations_group": "Metrics contributing to tma_heavy_operations category",
+ "tma_issue2P": "Metrics related by the issue $issue2P",
+ "tma_issueBM": "Metrics related by the issue $issueBM",
+ "tma_issueBW": "Metrics related by the issue $issueBW",
+ "tma_issueFB": "Metrics related by the issue $issueFB",
+ "tma_issueL1": "Metrics related by the issue $issueL1",
+ "tma_issueLat": "Metrics related by the issue $issueLat",
+ "tma_issueMC": "Metrics related by the issue $issueMC",
+ "tma_issueMS": "Metrics related by the issue $issueMS",
+ "tma_issueMV": "Metrics related by the issue $issueMV",
+ "tma_issueRFO": "Metrics related by the issue $issueRFO",
+ "tma_issueSL": "Metrics related by the issue $issueSL",
+ "tma_issueSO": "Metrics related by the issue $issueSO",
+ "tma_issueSmSt": "Metrics related by the issue $issueSmSt",
+ "tma_issueSpSt": "Metrics related by the issue $issueSpSt",
+ "tma_issueSyncxn": "Metrics related by the issue $issueSyncxn",
+ "tma_issueTLB": "Metrics related by the issue $issueTLB",
+ "tma_l1_bound_group": "Metrics contributing to tma_l1_bound category",
+ "tma_l3_bound_group": "Metrics contributing to tma_l3_bound category",
+ "tma_light_operations_group": "Metrics contributing to tma_light_operations category",
+ "tma_load_op_utilization_group": "Metrics contributing to tma_load_op_utilization category",
+ "tma_mem_latency_group": "Metrics contributing to tma_mem_latency category",
+ "tma_memory_bound_group": "Metrics contributing to tma_memory_bound category",
+ "tma_microcode_sequencer_group": "Metrics contributing to tma_microcode_sequencer category",
+ "tma_ports_utilization_group": "Metrics contributing to tma_ports_utilization category",
+ "tma_ports_utilized_0_group": "Metrics contributing to tma_ports_utilized_0 category",
+ "tma_ports_utilized_3m_group": "Metrics contributing to tma_ports_utilized_3m category",
+ "tma_retiring_group": "Metrics contributing to tma_retiring category",
+ "tma_serializing_operation_group": "Metrics contributing to tma_serializing_operation category",
+ "tma_store_bound_group": "Metrics contributing to tma_store_bound category",
+ "tma_store_op_utilization_group": "Metrics contributing to tma_store_op_utilization category"
+}
diff --git a/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json b/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json
index 65a46d659c0a..f5e46a768fdd 100644
--- a/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json
@@ -50,7 +50,7 @@
},
{
"BriefDescription": "Uncore frequency per die [GHZ]",
- "MetricExpr": "tma_info_socket_clks / #num_dies / duration_time / 1e9",
+ "MetricExpr": "tma_info_system_socket_clks / #num_dies / duration_time / 1e9",
"MetricGroup": "SoC",
"MetricName": "UNCORE_FREQ"
},
@@ -71,7 +71,7 @@
},
{
"BriefDescription": "This metric estimates how often memory load accesses were aliased by preceding stores (in program order) with a 4K address offset",
- "MetricExpr": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS / tma_info_clks",
+ "MetricExpr": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_4k_aliasing",
"MetricThreshold": "tma_4k_aliasing > 0.2 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -81,7 +81,7 @@
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution ports for ALU operations.",
"MetricConstraint": "NO_GROUP_EVENTS_NMI",
- "MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_0 + UOPS_DISPATCHED_PORT.PORT_1 + UOPS_DISPATCHED_PORT.PORT_5) / (3 * tma_info_core_clks)",
+ "MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_0 + UOPS_DISPATCHED_PORT.PORT_1 + UOPS_DISPATCHED_PORT.PORT_5) / (3 * tma_info_core_core_clks)",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_alu_op_utilization",
"MetricThreshold": "tma_alu_op_utilization > 0.6",
@@ -89,7 +89,7 @@
},
{
"BriefDescription": "This metric estimates fraction of slots the CPU retired uops delivered by the Microcode_Sequencer as a result of Assists",
- "MetricExpr": "100 * OTHER_ASSISTS.ANY_WB_ASSIST / tma_info_slots",
+ "MetricExpr": "100 * OTHER_ASSISTS.ANY_WB_ASSIST / tma_info_thread_slots",
"MetricGroup": "TopdownL4;tma_L4_group;tma_microcode_sequencer_group",
"MetricName": "tma_assists",
"MetricThreshold": "tma_assists > 0.1 & (tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1)",
@@ -109,7 +109,7 @@
},
{
"BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations",
- "MetricExpr": "(UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (INT_MISC.RECOVERY_CYCLES_ANY / 2 if #SMT_on else INT_MISC.RECOVERY_CYCLES)) / tma_info_slots",
+ "MetricExpr": "(UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (INT_MISC.RECOVERY_CYCLES_ANY / 2 if #SMT_on else INT_MISC.RECOVERY_CYCLES)) / tma_info_thread_slots",
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_bad_speculation",
"MetricThreshold": "tma_bad_speculation > 0.15",
@@ -125,12 +125,12 @@
"MetricName": "tma_branch_mispredicts",
"MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15",
"MetricgroupNoGroup": "TopdownL2",
- "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES. Related metrics: tma_info_branch_misprediction_cost, tma_mispredicts_resteers",
+ "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES. Related metrics: tma_info_bad_spec_branch_misprediction_cost, tma_mispredicts_resteers",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers",
- "MetricExpr": "12 * (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT + BACLEARS.ANY) / tma_info_clks",
+ "MetricExpr": "12 * (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT + BACLEARS.ANY) / tma_info_thread_clks",
"MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_branch_resteers",
"MetricThreshold": "tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -150,7 +150,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to contested accesses",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(60 * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.LLC_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_FWD))) + 43 * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.LLC_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_FWD)))) / tma_info_clks",
+ "MetricExpr": "(60 * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.LLC_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_FWD))) + 43 * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.LLC_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_FWD)))) / tma_info_thread_clks",
"MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
"MetricName": "tma_contested_accesses",
"MetricThreshold": "tma_contested_accesses > 0.05 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -171,7 +171,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to data-sharing accesses",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "43 * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.LLC_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_FWD))) / tma_info_clks",
+ "MetricExpr": "43 * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.LLC_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_FWD))) / tma_info_thread_clks",
"MetricGroup": "Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
"MetricName": "tma_data_sharing",
"MetricThreshold": "tma_data_sharing > 0.05 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -180,7 +180,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where the Divider unit was active",
- "MetricExpr": "ARITH.FPU_DIV_ACTIVE / tma_info_core_clks",
+ "MetricExpr": "ARITH.FPU_DIV_ACTIVE / tma_info_core_core_clks",
"MetricGroup": "TopdownL3;tma_L3_group;tma_core_bound_group",
"MetricName": "tma_divider",
"MetricThreshold": "tma_divider > 0.2 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
@@ -190,7 +190,7 @@
{
"BriefDescription": "This metric estimates how often the CPU was stalled on accesses to external memory (DRAM) by loads",
"MetricConstraint": "NO_GROUP_EVENTS_SMT",
- "MetricExpr": "(1 - MEM_LOAD_UOPS_RETIRED.LLC_HIT / (MEM_LOAD_UOPS_RETIRED.LLC_HIT + 7 * MEM_LOAD_UOPS_RETIRED.LLC_MISS)) * CYCLE_ACTIVITY.STALLS_L2_PENDING / tma_info_clks",
+ "MetricExpr": "(1 - MEM_LOAD_UOPS_RETIRED.LLC_HIT / (MEM_LOAD_UOPS_RETIRED.LLC_HIT + 7 * MEM_LOAD_UOPS_RETIRED.LLC_MISS)) * CYCLE_ACTIVITY.STALLS_L2_PENDING / tma_info_thread_clks",
"MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_dram_bound",
"MetricThreshold": "tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -199,25 +199,25 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline",
- "MetricExpr": "(IDQ.ALL_DSB_CYCLES_ANY_UOPS - IDQ.ALL_DSB_CYCLES_4_UOPS) / tma_info_core_clks / 2",
+ "MetricExpr": "(IDQ.ALL_DSB_CYCLES_ANY_UOPS - IDQ.ALL_DSB_CYCLES_4_UOPS) / tma_info_core_core_clks / 2",
"MetricGroup": "DSB;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_dsb",
- "MetricThreshold": "tma_dsb > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35)",
+ "MetricThreshold": "tma_dsb > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35)",
"PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline. For example; inefficient utilization of the DSB cache structure or bank conflict when reading from it; are categorized here.",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines",
- "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / tma_info_clks",
+ "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / tma_info_thread_clks",
"MetricGroup": "DSBmiss;FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB",
"MetricName": "tma_dsb_switches",
"MetricThreshold": "tma_dsb_switches > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
- "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines. The DSB (decoded i-cache) is a Uop Cache where the front-end directly delivers Uops (micro operations) avoiding heavy x86 decoding. The DSB pipeline has shorter latency and delivered higher bandwidth than the MITE (legacy instruction decode pipeline). Switching between the two pipelines can cause penalties hence this metric measures the exposed penalty. Related metrics: tma_fetch_bandwidth, tma_info_dsb_coverage, tma_info_iptb, tma_lcp",
+ "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines. The DSB (decoded i-cache) is a Uop Cache where the front-end directly delivers Uops (micro operations) avoiding heavy x86 decoding. The DSB pipeline has shorter latency and delivered higher bandwidth than the MITE (legacy instruction decode pipeline). Switching between the two pipelines can cause penalties hence this metric measures the exposed penalty. Related metrics: tma_fetch_bandwidth, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses",
- "MetricExpr": "(7 * DTLB_LOAD_MISSES.STLB_HIT + DTLB_LOAD_MISSES.WALK_DURATION) / tma_info_clks",
+ "MetricExpr": "(7 * DTLB_LOAD_MISSES.STLB_HIT + DTLB_LOAD_MISSES.WALK_DURATION) / tma_info_thread_clks",
"MetricGroup": "MemoryTLB;TopdownL4;tma_L4_group;tma_issueTLB;tma_l1_bound_group",
"MetricName": "tma_dtlb_load",
"MetricThreshold": "tma_dtlb_load > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -226,7 +226,7 @@
},
{
"BriefDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses",
- "MetricExpr": "(7 * DTLB_STORE_MISSES.STLB_HIT + DTLB_STORE_MISSES.WALK_DURATION) / tma_info_clks",
+ "MetricExpr": "(7 * DTLB_STORE_MISSES.STLB_HIT + DTLB_STORE_MISSES.WALK_DURATION) / tma_info_thread_clks",
"MetricGroup": "MemoryTLB;TopdownL4;tma_L4_group;tma_issueTLB;tma_store_bound_group",
"MetricName": "tma_dtlb_store",
"MetricThreshold": "tma_dtlb_store > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -235,7 +235,7 @@
},
{
"BriefDescription": "This metric roughly estimates how often CPU was handling synchronizations due to False Sharing",
- "MetricExpr": "(200 * OFFCORE_RESPONSE.DEMAND_RFO.LLC_MISS.REMOTE_HITM + 60 * OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.HITM_OTHER_CORE) / tma_info_clks",
+ "MetricExpr": "(200 * OFFCORE_RESPONSE.DEMAND_RFO.LLC_MISS.REMOTE_HITM + 60 * OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.HITM_OTHER_CORE) / tma_info_thread_clks",
"MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_store_bound_group",
"MetricName": "tma_false_sharing",
"MetricThreshold": "tma_false_sharing > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -245,11 +245,11 @@
{
"BriefDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "tma_info_load_miss_real_latency * cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ / tma_info_clks",
+ "MetricExpr": "tma_info_memory_load_miss_real_latency * cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ / tma_info_thread_clks",
"MetricGroup": "MemoryBW;TopdownL4;tma_L4_group;tma_issueBW;tma_issueSL;tma_issueSmSt;tma_l1_bound_group",
"MetricName": "tma_fb_full",
"MetricThreshold": "tma_fb_full > 0.3",
- "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_info_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
+ "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
"ScaleUnit": "100%"
},
{
@@ -257,14 +257,14 @@
"MetricExpr": "tma_frontend_bound - tma_fetch_latency",
"MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB",
"MetricName": "tma_fetch_bandwidth",
- "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35",
+ "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35",
"MetricgroupNoGroup": "TopdownL2",
- "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Related metrics: tma_dsb_switches, tma_info_dsb_coverage, tma_info_iptb, tma_lcp",
+ "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Related metrics: tma_dsb_switches, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues",
- "MetricExpr": "4 * min(CPU_CLK_UNHALTED.THREAD, IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE) / tma_info_slots",
+ "MetricExpr": "4 * min(CPU_CLK_UNHALTED.THREAD, IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE) / tma_info_thread_slots",
"MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group",
"MetricName": "tma_fetch_latency",
"MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15",
@@ -301,7 +301,7 @@
},
{
"BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend",
- "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / tma_info_slots",
+ "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / tma_info_thread_slots",
"MetricGroup": "PGO;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_frontend_bound",
"MetricThreshold": "tma_frontend_bound > 0.15",
@@ -321,359 +321,359 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to instruction cache misses.",
- "MetricExpr": "ICACHE.IFETCH_STALL / tma_info_clks - tma_itlb_misses",
+ "MetricExpr": "ICACHE.IFETCH_STALL / tma_info_thread_clks - tma_itlb_misses",
"MetricGroup": "BigFoot;FetchLat;IcMiss;TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_icache_misses",
"MetricThreshold": "tma_icache_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
"ScaleUnit": "100%"
},
{
- "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
- "MetricExpr": "tma_info_turbo_utilization * TSC / 1e9 / duration_time",
- "MetricGroup": "Power;Summary",
- "MetricName": "tma_info_average_frequency"
- },
- {
- "BriefDescription": "Branch instructions per taken branch.",
- "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
- "MetricGroup": "Branches;Fed;PGO",
- "MetricName": "tma_info_bptkbranch"
+ "BriefDescription": "Instructions per retired mispredicts for indirect CALL or JMP branches (lower number means higher occurrence rate).",
+ "MetricExpr": "tma_info_inst_mix_instructions / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * cpu@BR_MISP_EXEC.ALL_BRANCHES\\,umask\\=0xE4@)",
+ "MetricGroup": "Bad;BrMispredicts",
+ "MetricName": "tma_info_bad_spec_ipmisp_indirect",
+ "MetricThreshold": "tma_info_bad_spec_ipmisp_indirect < 1e3"
},
{
- "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
- "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
- "MetricGroup": "Pipeline",
- "MetricName": "tma_info_clks"
+ "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear) (lower number means higher occurrence rate)",
+ "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;BadSpec;BrMispredicts",
+ "MetricName": "tma_info_bad_spec_ipmispredict",
+ "MetricThreshold": "tma_info_bad_spec_ipmispredict < 200"
},
{
"BriefDescription": "Core actual clocks when any Logical Processor is active on the Physical Core",
- "MetricExpr": "(CPU_CLK_UNHALTED.THREAD / 2 * (1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK) if #core_wide < 1 else (CPU_CLK_UNHALTED.THREAD_ANY / 2 if #SMT_on else tma_info_clks))",
+ "MetricExpr": "(CPU_CLK_UNHALTED.THREAD / 2 * (1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK) if #core_wide < 1 else (CPU_CLK_UNHALTED.THREAD_ANY / 2 if #SMT_on else tma_info_thread_clks))",
"MetricGroup": "SMT",
- "MetricName": "tma_info_core_clks"
+ "MetricName": "tma_info_core_core_clks"
},
{
"BriefDescription": "Instructions Per Cycle across hyper-threads (per physical core)",
- "MetricExpr": "INST_RETIRED.ANY / tma_info_core_clks",
+ "MetricExpr": "INST_RETIRED.ANY / tma_info_core_core_clks",
"MetricGroup": "Ret;SMT;TmaL1;tma_L1_group",
- "MetricName": "tma_info_coreipc"
+ "MetricName": "tma_info_core_coreipc"
},
{
- "BriefDescription": "Cycles Per Instruction (per Logical Processor)",
- "MetricExpr": "1 / tma_info_ipc",
- "MetricGroup": "Mem;Pipeline",
- "MetricName": "tma_info_cpi"
- },
- {
- "BriefDescription": "Average CPU Utilization",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
- "MetricGroup": "HPC;Summary",
- "MetricName": "tma_info_cpu_utilization"
- },
- {
- "BriefDescription": "Average Parallel L2 cache miss data reads",
- "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
- "MetricGroup": "Memory_BW;Offcore",
- "MetricName": "tma_info_data_l2_mlp"
+ "BriefDescription": "Floating Point Operations Per Cycle",
+ "MetricExpr": "(FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * (FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE) + 8 * SIMD_FP_256.PACKED_SINGLE) / tma_info_core_core_clks",
+ "MetricGroup": "Flops;Ret",
+ "MetricName": "tma_info_core_flopc"
},
{
- "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
- "MetricExpr": "64 * (UNC_M_CAS_COUNT.RD + UNC_M_CAS_COUNT.WR) / 1e9 / duration_time",
- "MetricGroup": "HPC;Mem;MemoryBW;SoC;tma_issueBW",
- "MetricName": "tma_info_dram_bw_use",
- "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_fb_full, tma_mem_bandwidth, tma_sq_full"
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
+ "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
+ "MetricName": "tma_info_core_ilp"
},
{
"BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
"MetricExpr": "IDQ.DSB_UOPS / (IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)",
"MetricGroup": "DSB;Fed;FetchBW;tma_issueFB",
- "MetricName": "tma_info_dsb_coverage",
- "MetricThreshold": "tma_info_dsb_coverage < 0.7 & tma_info_ipc / 4 > 0.35",
- "PublicDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache). Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_iptb, tma_lcp"
- },
- {
- "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-thread",
- "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
- "MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
- "MetricName": "tma_info_execute"
- },
- {
- "BriefDescription": "The ratio of Executed- by Issued-Uops",
- "MetricExpr": "UOPS_EXECUTED.THREAD / UOPS_ISSUED.ANY",
- "MetricGroup": "Cor;Pipeline",
- "MetricName": "tma_info_execute_per_issue",
- "PublicDescription": "The ratio of Executed- by Issued-Uops. Ratio > 1 suggests high rate of uop micro-fusions. Ratio < 1 suggest high rate of \"execute\" at rename stage."
- },
- {
- "BriefDescription": "Floating Point Operations Per Cycle",
- "MetricExpr": "(FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * (FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE) + 8 * SIMD_FP_256.PACKED_SINGLE) / tma_info_core_clks",
- "MetricGroup": "Flops;Ret",
- "MetricName": "tma_info_flopc"
+ "MetricName": "tma_info_frontend_dsb_coverage",
+ "MetricThreshold": "tma_info_frontend_dsb_coverage < 0.7 & tma_info_thread_ipc / 4 > 0.35",
+ "PublicDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache). Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_inst_mix_iptb, tma_lcp"
},
{
- "BriefDescription": "Giga Floating Point Operations Per Second",
- "MetricExpr": "(FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * (FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE) + 8 * SIMD_FP_256.PACKED_SINGLE) / 1e9 / duration_time",
- "MetricGroup": "Cor;Flops;HPC",
- "MetricName": "tma_info_gflops",
- "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width and AMX engine."
+ "BriefDescription": "Instructions per speculative Unknown Branch Misprediction (BAClear) (lower number means higher occurrence rate)",
+ "MetricExpr": "tma_info_inst_mix_instructions / BACLEARS.ANY",
+ "MetricGroup": "Fed",
+ "MetricName": "tma_info_frontend_ipunknown_branch"
},
{
- "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
- "MetricExpr": "UOPS_EXECUTED.THREAD / (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
- "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
- "MetricName": "tma_info_ilp"
+ "BriefDescription": "Branch instructions per taken branch.",
+ "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
+ "MetricGroup": "Branches;Fed;PGO",
+ "MetricName": "tma_info_inst_mix_bptkbranch"
},
{
"BriefDescription": "Total number of retired Instructions",
"MetricExpr": "INST_RETIRED.ANY",
"MetricGroup": "Summary;TmaL1;tma_L1_group",
- "MetricName": "tma_info_instructions",
+ "MetricName": "tma_info_inst_mix_instructions",
"PublicDescription": "Total number of retired Instructions. Sample with: INST_RETIRED.PREC_DIST"
},
{
"BriefDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate)",
"MetricExpr": "1 / (tma_fp_scalar + tma_fp_vector)",
"MetricGroup": "Flops;InsType",
- "MetricName": "tma_info_iparith",
- "MetricThreshold": "tma_info_iparith < 10",
+ "MetricName": "tma_info_inst_mix_iparith",
+ "MetricThreshold": "tma_info_inst_mix_iparith < 10",
"PublicDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate). May undercount due to FMA double counting. Approximated prior to BDW."
},
{
"BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
"MetricGroup": "Branches;Fed;InsType",
- "MetricName": "tma_info_ipbranch",
- "MetricThreshold": "tma_info_ipbranch < 8"
- },
- {
- "BriefDescription": "Instructions Per Cycle (per Logical Processor)",
- "MetricExpr": "INST_RETIRED.ANY / tma_info_clks",
- "MetricGroup": "Ret;Summary",
- "MetricName": "tma_info_ipc"
+ "MetricName": "tma_info_inst_mix_ipbranch",
+ "MetricThreshold": "tma_info_inst_mix_ipbranch < 8"
},
{
"BriefDescription": "Instructions per (near) call (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL",
"MetricGroup": "Branches;Fed;PGO",
- "MetricName": "tma_info_ipcall",
- "MetricThreshold": "tma_info_ipcall < 200"
- },
- {
- "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
- "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u",
- "MetricGroup": "Branches;OS",
- "MetricName": "tma_info_ipfarbranch",
- "MetricThreshold": "tma_info_ipfarbranch < 1e6"
+ "MetricName": "tma_info_inst_mix_ipcall",
+ "MetricThreshold": "tma_info_inst_mix_ipcall < 200"
},
{
"BriefDescription": "Instructions per Load (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_LOADS",
"MetricGroup": "InsType",
- "MetricName": "tma_info_ipload",
- "MetricThreshold": "tma_info_ipload < 3"
- },
- {
- "BriefDescription": "Instructions per retired mispredicts for indirect CALL or JMP branches (lower number means higher occurrence rate).",
- "MetricExpr": "tma_info_instructions / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * cpu@BR_MISP_EXEC.ALL_BRANCHES\\,umask\\=0xE4@)",
- "MetricGroup": "Bad;BrMispredicts",
- "MetricName": "tma_info_ipmisp_indirect",
- "MetricThreshold": "tma_info_ipmisp_indirect < 1e3"
- },
- {
- "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear) (lower number means higher occurrence rate)",
- "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
- "MetricGroup": "Bad;BadSpec;BrMispredicts",
- "MetricName": "tma_info_ipmispredict",
- "MetricThreshold": "tma_info_ipmispredict < 200"
+ "MetricName": "tma_info_inst_mix_ipload",
+ "MetricThreshold": "tma_info_inst_mix_ipload < 3"
},
{
"BriefDescription": "Instructions per Store (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_STORES",
"MetricGroup": "InsType",
- "MetricName": "tma_info_ipstore",
- "MetricThreshold": "tma_info_ipstore < 8"
+ "MetricName": "tma_info_inst_mix_ipstore",
+ "MetricThreshold": "tma_info_inst_mix_ipstore < 8"
},
{
"BriefDescription": "Instruction per taken branch",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN",
"MetricGroup": "Branches;Fed;FetchBW;Frontend;PGO;tma_issueFB",
- "MetricName": "tma_info_iptb",
- "MetricThreshold": "tma_info_iptb < 9",
- "PublicDescription": "Instruction per taken branch. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_dsb_coverage, tma_lcp"
- },
- {
- "BriefDescription": "Instructions per speculative Unknown Branch Misprediction (BAClear) (lower number means higher occurrence rate)",
- "MetricExpr": "tma_info_instructions / BACLEARS.ANY",
- "MetricGroup": "Fed",
- "MetricName": "tma_info_ipunknown_branch"
- },
- {
- "BriefDescription": "Cycles Per Instruction for the Operating System (OS) Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / INST_RETIRED.ANY_P:k",
- "MetricGroup": "OS",
- "MetricName": "tma_info_kernel_cpi"
- },
- {
- "BriefDescription": "Fraction of cycles spent in the Operating System (OS) Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / CPU_CLK_UNHALTED.THREAD",
- "MetricGroup": "OS",
- "MetricName": "tma_info_kernel_utilization",
- "MetricThreshold": "tma_info_kernel_utilization > 0.05"
+ "MetricName": "tma_info_inst_mix_iptb",
+ "MetricThreshold": "tma_info_inst_mix_iptb < 9",
+ "PublicDescription": "Instruction per taken branch. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_frontend_dsb_coverage, tma_lcp"
},
{
"BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
"MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / duration_time",
"MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l1d_cache_fill_bw"
- },
- {
- "BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
- "MetricExpr": "tma_info_l1d_cache_fill_bw",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l1d_cache_fill_bw_1t"
- },
- {
- "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
- "MetricExpr": "1e3 * MEM_LOAD_UOPS_RETIRED.L1_MISS / INST_RETIRED.ANY",
- "MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l1mpki"
+ "MetricName": "tma_info_memory_core_l1d_cache_fill_bw"
},
{
"BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
"MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / duration_time",
"MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l2_cache_fill_bw"
+ "MetricName": "tma_info_memory_core_l2_cache_fill_bw"
},
{
- "BriefDescription": "Average per-thread data fill bandwidth to the L2 cache [GB / sec]",
- "MetricExpr": "tma_info_l2_cache_fill_bw",
+ "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
"MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l2_cache_fill_bw_1t"
+ "MetricName": "tma_info_memory_core_l3_cache_fill_bw"
+ },
+ {
+ "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
+ "MetricExpr": "1e3 * MEM_LOAD_UOPS_RETIRED.L1_MISS / INST_RETIRED.ANY",
+ "MetricGroup": "CacheMisses;Mem",
+ "MetricName": "tma_info_memory_l1mpki"
},
{
"BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads",
"MetricExpr": "1e3 * MEM_LOAD_UOPS_RETIRED.L2_MISS / INST_RETIRED.ANY",
"MetricGroup": "Backend;CacheMisses;Mem",
- "MetricName": "tma_info_l2mpki"
+ "MetricName": "tma_info_memory_l2mpki"
},
{
- "BriefDescription": "Average per-thread data access bandwidth to the L3 cache [GB / sec]",
- "MetricExpr": "0",
- "MetricGroup": "Mem;MemoryBW;Offcore",
- "MetricName": "tma_info_l3_cache_access_bw_1t"
+ "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
+ "MetricExpr": "1e3 * MEM_LOAD_UOPS_RETIRED.LLC_MISS / INST_RETIRED.ANY",
+ "MetricGroup": "CacheMisses;Mem",
+ "MetricName": "tma_info_memory_l3mpki"
},
{
- "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
- "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l3_cache_fill_bw"
+ "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / (MEM_LOAD_UOPS_RETIRED.L1_MISS + MEM_LOAD_UOPS_RETIRED.HIT_LFB)",
+ "MetricGroup": "Mem;MemoryBound;MemoryLat",
+ "MetricName": "tma_info_memory_load_miss_real_latency"
},
{
- "BriefDescription": "Average per-thread data fill bandwidth to the L3 cache [GB / sec]",
- "MetricExpr": "tma_info_l3_cache_fill_bw",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l3_cache_fill_bw_1t"
+ "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
+ "MetricGroup": "Mem;MemoryBW;MemoryBound",
+ "MetricName": "tma_info_memory_mlp",
+ "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)"
},
{
- "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
- "MetricExpr": "1e3 * MEM_LOAD_UOPS_RETIRED.LLC_MISS / INST_RETIRED.ANY",
- "MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l3mpki"
+ "BriefDescription": "Average Parallel L2 cache miss data reads",
+ "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+ "MetricGroup": "Memory_BW;Offcore",
+ "MetricName": "tma_info_memory_oro_data_l2_mlp"
},
{
"BriefDescription": "Average Latency for L2 cache miss demand Loads",
"MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS.DEMAND_DATA_RD",
"MetricGroup": "Memory_Lat;Offcore",
- "MetricName": "tma_info_load_l2_miss_latency"
+ "MetricName": "tma_info_memory_oro_load_l2_miss_latency"
},
{
"BriefDescription": "Average Parallel L2 cache miss demand Loads",
"MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
"MetricGroup": "Memory_BW;Offcore",
- "MetricName": "tma_info_load_l2_mlp"
+ "MetricName": "tma_info_memory_oro_load_l2_mlp"
},
{
- "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
- "MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "L1D_PEND_MISS.PENDING / (MEM_LOAD_UOPS_RETIRED.L1_MISS + MEM_LOAD_UOPS_RETIRED.HIT_LFB)",
- "MetricGroup": "Mem;MemoryBound;MemoryLat",
- "MetricName": "tma_info_load_miss_real_latency"
+ "BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_core_l1d_cache_fill_bw",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_thread_l1d_cache_fill_bw_1t"
},
{
- "BriefDescription": "Average number of parallel data read requests to external memory",
- "MetricExpr": "UNC_C_TOR_OCCUPANCY.MISS_OPCODE@filter_opc\\=0x182@ / UNC_C_TOR_OCCUPANCY.MISS_OPCODE@filter_opc\\=0x182\\,thresh\\=1@",
- "MetricGroup": "Mem;MemoryBW;SoC",
- "MetricName": "tma_info_mem_parallel_reads",
- "PublicDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches"
+ "BriefDescription": "Average per-thread data fill bandwidth to the L2 cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_core_l2_cache_fill_bw",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_thread_l2_cache_fill_bw_1t"
},
{
- "BriefDescription": "Average latency of data read request to external memory (in nanoseconds)",
- "MetricExpr": "1e9 * (UNC_C_TOR_OCCUPANCY.MISS_OPCODE@filter_opc\\=0x182@ / UNC_C_TOR_INSERTS.MISS_OPCODE@filter_opc\\=0x182@) / (tma_info_socket_clks / duration_time)",
- "MetricGroup": "Mem;MemoryLat;SoC",
- "MetricName": "tma_info_mem_read_latency",
- "PublicDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches. ([RKL+]memory-controller only)"
+ "BriefDescription": "Average per-thread data access bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "0",
+ "MetricGroup": "Mem;MemoryBW;Offcore",
+ "MetricName": "tma_info_memory_thread_l3_cache_access_bw_1t"
},
{
- "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
- "MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
- "MetricGroup": "Mem;MemoryBW;MemoryBound",
- "MetricName": "tma_info_mlp",
- "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)"
+ "BriefDescription": "Average per-thread data fill bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_core_l3_cache_fill_bw",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_thread_l3_cache_fill_bw_1t"
},
{
"BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
- "MetricExpr": "(ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION) / tma_info_core_clks",
+ "MetricExpr": "(ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION) / tma_info_core_core_clks",
"MetricGroup": "Mem;MemoryTLB",
- "MetricName": "tma_info_page_walks_utilization",
- "MetricThreshold": "tma_info_page_walks_utilization > 0.5"
+ "MetricName": "tma_info_memory_tlb_page_walks_utilization",
+ "MetricThreshold": "tma_info_memory_tlb_page_walks_utilization > 0.5"
+ },
+ {
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-thread",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
+ "MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
+ "MetricName": "tma_info_pipeline_execute"
},
{
"BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
"MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / cpu@UOPS_RETIRED.RETIRE_SLOTS\\,cmask\\=1@",
"MetricGroup": "Pipeline;Ret",
- "MetricName": "tma_info_retire"
+ "MetricName": "tma_info_pipeline_retire"
},
{
- "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
- "MetricExpr": "4 * tma_info_core_clks",
- "MetricGroup": "TmaL1;tma_L1_group",
- "MetricName": "tma_info_slots"
+ "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
+ "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / duration_time",
+ "MetricGroup": "Power;Summary",
+ "MetricName": "tma_info_system_average_frequency"
+ },
+ {
+ "BriefDescription": "Average CPU Utilization",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricGroup": "HPC;Summary",
+ "MetricName": "tma_info_system_cpu_utilization"
+ },
+ {
+ "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
+ "MetricExpr": "64 * (UNC_M_CAS_COUNT.RD + UNC_M_CAS_COUNT.WR) / 1e9 / duration_time",
+ "MetricGroup": "HPC;Mem;MemoryBW;SoC;tma_issueBW",
+ "MetricName": "tma_info_system_dram_bw_use",
+ "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_fb_full, tma_mem_bandwidth, tma_sq_full"
+ },
+ {
+ "BriefDescription": "Giga Floating Point Operations Per Second",
+ "MetricExpr": "(FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * (FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE) + 8 * SIMD_FP_256.PACKED_SINGLE) / 1e9 / duration_time",
+ "MetricGroup": "Cor;Flops;HPC",
+ "MetricName": "tma_info_system_gflops",
+ "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width and AMX engine."
+ },
+ {
+ "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
+ "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u",
+ "MetricGroup": "Branches;OS",
+ "MetricName": "tma_info_system_ipfarbranch",
+ "MetricThreshold": "tma_info_system_ipfarbranch < 1e6"
+ },
+ {
+ "BriefDescription": "Cycles Per Instruction for the Operating System (OS) Kernel mode",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / INST_RETIRED.ANY_P:k",
+ "MetricGroup": "OS",
+ "MetricName": "tma_info_system_kernel_cpi"
+ },
+ {
+ "BriefDescription": "Fraction of cycles spent in the Operating System (OS) Kernel mode",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "OS",
+ "MetricName": "tma_info_system_kernel_utilization",
+ "MetricThreshold": "tma_info_system_kernel_utilization > 0.05"
+ },
+ {
+ "BriefDescription": "Average number of parallel data read requests to external memory",
+ "MetricExpr": "UNC_C_TOR_OCCUPANCY.MISS_OPCODE@filter_opc\\=0x182@ / UNC_C_TOR_OCCUPANCY.MISS_OPCODE@filter_opc\\=0x182\\,thresh\\=1@",
+ "MetricGroup": "Mem;MemoryBW;SoC",
+ "MetricName": "tma_info_system_mem_parallel_reads",
+ "PublicDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches"
+ },
+ {
+ "BriefDescription": "Average latency of data read request to external memory (in nanoseconds)",
+ "MetricExpr": "1e9 * (UNC_C_TOR_OCCUPANCY.MISS_OPCODE@filter_opc\\=0x182@ / UNC_C_TOR_INSERTS.MISS_OPCODE@filter_opc\\=0x182@) / (tma_info_system_socket_clks / duration_time)",
+ "MetricGroup": "Mem;MemoryLat;SoC",
+ "MetricName": "tma_info_system_mem_read_latency",
+ "PublicDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches. ([RKL+]memory-controller only)"
},
{
"BriefDescription": "Fraction of cycles where both hardware Logical Processors were active",
"MetricExpr": "(1 - CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / (CPU_CLK_UNHALTED.REF_XCLK_ANY / 2) if #SMT_on else 0)",
"MetricGroup": "SMT",
- "MetricName": "tma_info_smt_2t_utilization"
+ "MetricName": "tma_info_system_smt_2t_utilization"
},
{
"BriefDescription": "Socket actual clocks when any core is active on that socket",
"MetricExpr": "cbox_0@event\\=0x0@",
"MetricGroup": "SoC",
- "MetricName": "tma_info_socket_clks"
+ "MetricName": "tma_info_system_socket_clks"
},
{
"BriefDescription": "Average Frequency Utilization relative nominal frequency",
- "MetricExpr": "tma_info_clks / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricExpr": "tma_info_thread_clks / CPU_CLK_UNHALTED.REF_TSC",
"MetricGroup": "Power",
- "MetricName": "tma_info_turbo_utilization"
+ "MetricName": "tma_info_system_turbo_utilization"
+ },
+ {
+ "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "Pipeline",
+ "MetricName": "tma_info_thread_clks"
+ },
+ {
+ "BriefDescription": "Cycles Per Instruction (per Logical Processor)",
+ "MetricExpr": "1 / tma_info_thread_ipc",
+ "MetricGroup": "Mem;Pipeline",
+ "MetricName": "tma_info_thread_cpi"
+ },
+ {
+ "BriefDescription": "The ratio of Executed- by Issued-Uops",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / UOPS_ISSUED.ANY",
+ "MetricGroup": "Cor;Pipeline",
+ "MetricName": "tma_info_thread_execute_per_issue",
+ "PublicDescription": "The ratio of Executed- by Issued-Uops. Ratio > 1 suggests high rate of uop micro-fusions. Ratio < 1 suggest high rate of \"execute\" at rename stage."
+ },
+ {
+ "BriefDescription": "Instructions Per Cycle (per Logical Processor)",
+ "MetricExpr": "INST_RETIRED.ANY / tma_info_thread_clks",
+ "MetricGroup": "Ret;Summary",
+ "MetricName": "tma_info_thread_ipc"
+ },
+ {
+ "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
+ "MetricExpr": "4 * tma_info_core_core_clks",
+ "MetricGroup": "TmaL1;tma_L1_group",
+ "MetricName": "tma_info_thread_slots"
},
{
"BriefDescription": "Uops Per Instruction",
"MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
"MetricGroup": "Pipeline;Ret;Retire",
- "MetricName": "tma_info_uoppi",
- "MetricThreshold": "tma_info_uoppi > 1.05"
+ "MetricName": "tma_info_thread_uoppi",
+ "MetricThreshold": "tma_info_thread_uoppi > 1.05"
},
{
"BriefDescription": "Instruction per taken branch",
"MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / BR_INST_RETIRED.NEAR_TAKEN",
"MetricGroup": "Branches;Fed;FetchBW",
- "MetricName": "tma_info_uptb",
- "MetricThreshold": "tma_info_uptb < 6"
+ "MetricName": "tma_info_thread_uptb",
+ "MetricThreshold": "tma_info_thread_uptb < 6"
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses",
- "MetricExpr": "(12 * ITLB_MISSES.STLB_HIT + ITLB_MISSES.WALK_DURATION) / tma_info_clks",
+ "MetricExpr": "(12 * ITLB_MISSES.STLB_HIT + ITLB_MISSES.WALK_DURATION) / tma_info_thread_clks",
"MetricGroup": "BigFoot;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_itlb_misses",
"MetricThreshold": "tma_itlb_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -682,7 +682,7 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled without loads missing the L1 data cache",
- "MetricExpr": "max((min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.STALLS_LDM_PENDING) - CYCLE_ACTIVITY.STALLS_L1D_PENDING) / tma_info_clks, 0)",
+ "MetricExpr": "max((min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.STALLS_LDM_PENDING) - CYCLE_ACTIVITY.STALLS_L1D_PENDING) / tma_info_thread_clks, 0)",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_issueL1;tma_issueMC;tma_memory_bound_group",
"MetricName": "tma_l1_bound",
"MetricThreshold": "tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -691,7 +691,7 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads",
- "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L1D_PENDING - CYCLE_ACTIVITY.STALLS_L2_PENDING) / tma_info_clks",
+ "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L1D_PENDING - CYCLE_ACTIVITY.STALLS_L2_PENDING) / tma_info_thread_clks",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l2_bound",
"MetricThreshold": "tma_l2_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -701,7 +701,7 @@
{
"BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
"MetricConstraint": "NO_GROUP_EVENTS_SMT",
- "MetricExpr": "MEM_LOAD_UOPS_RETIRED.LLC_HIT / (MEM_LOAD_UOPS_RETIRED.LLC_HIT + 7 * MEM_LOAD_UOPS_RETIRED.LLC_MISS) * CYCLE_ACTIVITY.STALLS_L2_PENDING / tma_info_clks",
+ "MetricExpr": "MEM_LOAD_UOPS_RETIRED.LLC_HIT / (MEM_LOAD_UOPS_RETIRED.LLC_HIT + 7 * MEM_LOAD_UOPS_RETIRED.LLC_MISS) * CYCLE_ACTIVITY.STALLS_L2_PENDING / tma_info_thread_clks",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l3_bound",
"MetricThreshold": "tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -711,7 +711,7 @@
{
"BriefDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "41 * (MEM_LOAD_UOPS_RETIRED.LLC_HIT * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.LLC_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_FWD))) / tma_info_clks",
+ "MetricExpr": "41 * (MEM_LOAD_UOPS_RETIRED.LLC_HIT * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.LLC_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_FWD))) / tma_info_thread_clks",
"MetricGroup": "MemoryLat;TopdownL4;tma_L4_group;tma_issueLat;tma_l3_bound_group",
"MetricName": "tma_l3_hit_latency",
"MetricThreshold": "tma_l3_hit_latency > 0.1 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -720,11 +720,11 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs)",
- "MetricExpr": "ILD_STALL.LCP / tma_info_clks",
+ "MetricExpr": "ILD_STALL.LCP / tma_info_thread_clks",
"MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB",
"MetricName": "tma_lcp",
"MetricThreshold": "tma_lcp > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
- "PublicDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs). Using proper compiler flags or Intel Compiler by default will certainly avoid this. #Link: Optimization Guide about LCP BKMs. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_dsb_coverage, tma_info_iptb",
+ "PublicDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs). Using proper compiler flags or Intel Compiler by default will certainly avoid this. #Link: Optimization Guide about LCP BKMs. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb",
"ScaleUnit": "100%"
},
{
@@ -740,7 +740,7 @@
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Load operations",
"MetricConstraint": "NO_GROUP_EVENTS_NMI",
- "MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_2 + UOPS_DISPATCHED_PORT.PORT_3 - UOPS_DISPATCHED_PORT.PORT_4) / (2 * tma_info_core_clks)",
+ "MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_2 + UOPS_DISPATCHED_PORT.PORT_3 - UOPS_DISPATCHED_PORT.PORT_4) / (2 * tma_info_core_core_clks)",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_load_op_utilization",
"MetricThreshold": "tma_load_op_utilization > 0.6",
@@ -750,7 +750,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from local memory",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "200 * (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.LLC_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_FWD))) / tma_info_clks",
+ "MetricExpr": "200 * (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.LLC_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_FWD))) / tma_info_thread_clks",
"MetricGroup": "Server;TopdownL5;tma_L5_group;tma_mem_latency_group",
"MetricName": "tma_local_dram",
"MetricThreshold": "tma_local_dram > 0.1 & (tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
@@ -760,7 +760,7 @@
{
"BriefDescription": "This metric represents fraction of cycles the CPU spent handling cache misses due to lock operations",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "MEM_UOPS_RETIRED.LOCK_LOADS / MEM_UOPS_RETIRED.ALL_STORES * min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO) / tma_info_clks",
+ "MetricExpr": "MEM_UOPS_RETIRED.LOCK_LOADS / MEM_UOPS_RETIRED.ALL_STORES * min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO) / tma_info_thread_clks",
"MetricGroup": "Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_l1_bound_group",
"MetricName": "tma_lock_latency",
"MetricThreshold": "tma_lock_latency > 0.2 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -780,16 +780,16 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM)",
- "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=6@) / tma_info_clks",
+ "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=6@) / tma_info_thread_clks",
"MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
"MetricName": "tma_mem_bandwidth",
"MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_dram_bw_use, tma_sq_full",
+ "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM)",
- "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD) / tma_info_clks - tma_mem_bandwidth",
+ "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD) / tma_info_thread_clks - tma_mem_bandwidth",
"MetricGroup": "MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
"MetricName": "tma_mem_latency",
"MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -799,7 +799,7 @@
{
"BriefDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.STALLS_LDM_PENDING) + RESOURCE_STALLS.SB) / (min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) + UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC - (UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC if tma_info_ipc > 1.8 else UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB) * tma_backend_bound",
+ "MetricExpr": "(min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.STALLS_LDM_PENDING) + RESOURCE_STALLS.SB) / (min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) + UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC - (UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC if tma_info_thread_ipc > 1.8 else UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB) * tma_backend_bound",
"MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_memory_bound",
"MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2",
@@ -809,7 +809,7 @@
},
{
"BriefDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit",
- "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * IDQ.MS_UOPS / tma_info_slots",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * IDQ.MS_UOPS / tma_info_thread_slots",
"MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_heavy_operations_group;tma_issueMC;tma_issueMS",
"MetricName": "tma_microcode_sequencer",
"MetricThreshold": "tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1",
@@ -818,16 +818,16 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline)",
- "MetricExpr": "(IDQ.ALL_MITE_CYCLES_ANY_UOPS - IDQ.ALL_MITE_CYCLES_4_UOPS) / tma_info_core_clks / 2",
+ "MetricExpr": "(IDQ.ALL_MITE_CYCLES_ANY_UOPS - IDQ.ALL_MITE_CYCLES_4_UOPS) / tma_info_core_core_clks / 2",
"MetricGroup": "DSBmiss;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_mite",
- "MetricThreshold": "tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35)",
+ "MetricThreshold": "tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35)",
"PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline). This pipeline is used for code that was not pre-cached in the DSB or LSD. For example; inefficiencies due to asymmetric decoders; use of long immediate or LCP can manifest as MITE fetch bandwidth bottleneck.",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric estimates the fraction of cycles when the CPU was stalled due to switches of uop delivery to the Microcode Sequencer (MS)",
- "MetricExpr": "3 * IDQ.MS_SWITCHES / tma_info_clks",
+ "MetricExpr": "3 * IDQ.MS_SWITCHES / tma_info_thread_clks",
"MetricGroup": "FetchLat;MicroSeq;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueMC;tma_issueMS;tma_issueMV;tma_issueSO",
"MetricName": "tma_ms_switches",
"MetricThreshold": "tma_ms_switches > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -836,7 +836,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 0 ([SNB+] ALU; [HSW+] ALU and 2nd branch)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_0 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_0 / tma_info_core_core_clks",
"MetricGroup": "Compute;TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_0",
"MetricThreshold": "tma_port_0 > 0.6",
@@ -845,7 +845,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 1 (ALU)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_1 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_1 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_1",
"MetricThreshold": "tma_port_1 > 0.6",
@@ -854,7 +854,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 2 ([SNB+]Loads and Store-address; [ICL+] Loads)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_2 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_2 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_load_op_utilization_group",
"MetricName": "tma_port_2",
"MetricThreshold": "tma_port_2 > 0.6",
@@ -863,7 +863,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 3 ([SNB+]Loads and Store-address; [ICL+] Loads)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_3 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_3 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_load_op_utilization_group",
"MetricName": "tma_port_3",
"MetricThreshold": "tma_port_3 > 0.6",
@@ -881,7 +881,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 5 ([SNB+] Branches and ALU; [HSW+] ALU)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_5 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_5 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_5",
"MetricThreshold": "tma_port_5 > 0.6",
@@ -891,7 +891,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles the CPU performance was potentially limited due to Core computation issues (non divider-related)",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) + UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC - (UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC if tma_info_ipc > 1.8 else UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB - RESOURCE_STALLS.SB - min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.STALLS_LDM_PENDING)) / tma_info_clks",
+ "MetricExpr": "(min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) + UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC - (UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC if tma_info_thread_ipc > 1.8 else UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB - RESOURCE_STALLS.SB - min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.STALLS_LDM_PENDING)) / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group",
"MetricName": "tma_ports_utilization",
"MetricThreshold": "tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
@@ -900,7 +900,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed no uops on any execution port (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,inv\\,cmask\\=1@ / 2 if #SMT_on else (min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0)) / tma_info_core_clks)",
+ "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,inv\\,cmask\\=1@ / 2 if #SMT_on else (min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0)) / tma_info_core_core_clks)",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_0",
"MetricThreshold": "tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -909,7 +909,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where the CPU executed total of 1 uop per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) / 2 if #SMT_on else (UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC - UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC) / tma_info_core_clks)",
+ "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) / 2 if #SMT_on else (UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC - UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC) / tma_info_core_core_clks)",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issueL1;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_1",
"MetricThreshold": "tma_ports_utilized_1 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -918,7 +918,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / 2 if #SMT_on else (UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC - UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC) / tma_info_core_clks)",
+ "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / 2 if #SMT_on else (UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC - UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC) / tma_info_core_core_clks)",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_2",
"MetricThreshold": "tma_ports_utilized_2 > 0.15 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -927,7 +927,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 3 or more uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise).",
- "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC) / tma_info_core_clks",
+ "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_3m",
"MetricThreshold": "tma_ports_utilized_3m > 0.7 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -936,7 +936,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from remote cache in other sockets including synchronizations issues",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(200 * (MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.LLC_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_FWD))) + 180 * (MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.LLC_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_FWD)))) / tma_info_clks",
+ "MetricExpr": "(200 * (MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.LLC_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_FWD))) + 180 * (MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.LLC_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_FWD)))) / tma_info_thread_clks",
"MetricGroup": "Offcore;Server;Snoop;TopdownL5;tma_L5_group;tma_issueSyncxn;tma_mem_latency_group",
"MetricName": "tma_remote_cache",
"MetricThreshold": "tma_remote_cache > 0.05 & (tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
@@ -946,7 +946,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from remote memory",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "310 * (MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.LLC_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_FWD))) / tma_info_clks",
+ "MetricExpr": "310 * (MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.LLC_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_FWD))) / tma_info_thread_clks",
"MetricGroup": "Server;Snoop;TopdownL5;tma_L5_group;tma_mem_latency_group",
"MetricName": "tma_remote_dram",
"MetricThreshold": "tma_remote_dram > 0.1 & (tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
@@ -955,7 +955,7 @@
},
{
"BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
- "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / tma_info_slots",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / tma_info_thread_slots",
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1",
@@ -966,7 +966,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles handling memory load split accesses - load that cross 64-byte cache line boundary",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "13 * LD_BLOCKS.NO_SR / tma_info_clks",
+ "MetricExpr": "13 * LD_BLOCKS.NO_SR / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_split_loads",
"MetricThreshold": "tma_split_loads > 0.2 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -975,7 +975,7 @@
},
{
"BriefDescription": "This metric represents rate of split store accesses",
- "MetricExpr": "2 * MEM_UOPS_RETIRED.SPLIT_STORES / tma_info_core_clks",
+ "MetricExpr": "2 * MEM_UOPS_RETIRED.SPLIT_STORES / tma_info_core_core_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group",
"MetricName": "tma_split_stores",
"MetricThreshold": "tma_split_stores > 0.2 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -984,16 +984,16 @@
},
{
"BriefDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors)",
- "MetricExpr": "(OFFCORE_REQUESTS_BUFFER.SQ_FULL / 2 if #SMT_on else OFFCORE_REQUESTS_BUFFER.SQ_FULL) / tma_info_core_clks",
+ "MetricExpr": "(OFFCORE_REQUESTS_BUFFER.SQ_FULL / 2 if #SMT_on else OFFCORE_REQUESTS_BUFFER.SQ_FULL) / tma_info_core_core_clks",
"MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueBW;tma_l3_bound_group",
"MetricName": "tma_sq_full",
"MetricThreshold": "tma_sq_full > 0.3 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_fb_full, tma_info_dram_bw_use, tma_mem_bandwidth",
+ "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric estimates how often CPU was stalled due to RFO store memory accesses; RFO store issue a read-for-ownership request before the write",
- "MetricExpr": "RESOURCE_STALLS.SB / tma_info_clks",
+ "MetricExpr": "RESOURCE_STALLS.SB / tma_info_thread_clks",
"MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_store_bound",
"MetricThreshold": "tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -1002,7 +1002,7 @@
},
{
"BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores",
- "MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_clks",
+ "MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_store_fwd_blk",
"MetricThreshold": "tma_store_fwd_blk > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -1012,7 +1012,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles the CPU spent handling L1D store misses",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(L2_RQSTS.RFO_HIT * 9 * (1 - MEM_UOPS_RETIRED.LOCK_LOADS / MEM_UOPS_RETIRED.ALL_STORES) + (1 - MEM_UOPS_RETIRED.LOCK_LOADS / MEM_UOPS_RETIRED.ALL_STORES) * min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO)) / tma_info_clks",
+ "MetricExpr": "(L2_RQSTS.RFO_HIT * 9 * (1 - MEM_UOPS_RETIRED.LOCK_LOADS / MEM_UOPS_RETIRED.ALL_STORES) + (1 - MEM_UOPS_RETIRED.LOCK_LOADS / MEM_UOPS_RETIRED.ALL_STORES) * min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO)) / tma_info_thread_clks",
"MetricGroup": "MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_issueSL;tma_store_bound_group",
"MetricName": "tma_store_latency",
"MetricThreshold": "tma_store_latency > 0.1 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -1021,7 +1021,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Store operations",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_4 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_4 / tma_info_core_core_clks",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_store_op_utilization",
"MetricThreshold": "tma_store_op_utilization > 0.6",
diff --git a/tools/perf/pmu-events/arch/x86/ivytown/metricgroups.json b/tools/perf/pmu-events/arch/x86/ivytown/metricgroups.json
new file mode 100644
index 000000000000..f6a0258e3241
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/ivytown/metricgroups.json
@@ -0,0 +1,107 @@
+{
+ "Backend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Bad": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BadSpec": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BigFoot": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BrMispredicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Branches": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "CacheMisses": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Compute": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Cor": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "DSB": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "DSBmiss": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "DataSharing": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Fed": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FetchBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FetchLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Flops": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FpScalar": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FpVector": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Frontend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "HPC": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "IcMiss": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "InsType": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "L2Evicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "LSD": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MachineClears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryBound": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryTLB": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Memory_BW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Memory_Lat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MicroSeq": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "OS": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Offcore": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "PGO": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Pipeline": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "PortsUtil": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Power": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Ret": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Retire": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "SMT": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Server": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Snoop": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "SoC": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Summary": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TmaL1": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TmaL2": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TmaL3mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TopdownL1": "Metrics for top-down breakdown at level 1",
+ "TopdownL2": "Metrics for top-down breakdown at level 2",
+ "TopdownL3": "Metrics for top-down breakdown at level 3",
+ "TopdownL4": "Metrics for top-down breakdown at level 4",
+ "TopdownL5": "Metrics for top-down breakdown at level 5",
+ "TopdownL6": "Metrics for top-down breakdown at level 6",
+ "tma_L1_group": "Metrics for top-down breakdown at level 1",
+ "tma_L2_group": "Metrics for top-down breakdown at level 2",
+ "tma_L3_group": "Metrics for top-down breakdown at level 3",
+ "tma_L4_group": "Metrics for top-down breakdown at level 4",
+ "tma_L5_group": "Metrics for top-down breakdown at level 5",
+ "tma_L6_group": "Metrics for top-down breakdown at level 6",
+ "tma_alu_op_utilization_group": "Metrics contributing to tma_alu_op_utilization category",
+ "tma_backend_bound_group": "Metrics contributing to tma_backend_bound category",
+ "tma_bad_speculation_group": "Metrics contributing to tma_bad_speculation category",
+ "tma_branch_resteers_group": "Metrics contributing to tma_branch_resteers category",
+ "tma_core_bound_group": "Metrics contributing to tma_core_bound category",
+ "tma_dram_bound_group": "Metrics contributing to tma_dram_bound category",
+ "tma_dtlb_load_group": "Metrics contributing to tma_dtlb_load category",
+ "tma_dtlb_store_group": "Metrics contributing to tma_dtlb_store category",
+ "tma_fetch_bandwidth_group": "Metrics contributing to tma_fetch_bandwidth category",
+ "tma_fetch_latency_group": "Metrics contributing to tma_fetch_latency category",
+ "tma_fp_arith_group": "Metrics contributing to tma_fp_arith category",
+ "tma_fp_vector_group": "Metrics contributing to tma_fp_vector category",
+ "tma_frontend_bound_group": "Metrics contributing to tma_frontend_bound category",
+ "tma_heavy_operations_group": "Metrics contributing to tma_heavy_operations category",
+ "tma_issue2P": "Metrics related by the issue $issue2P",
+ "tma_issueBM": "Metrics related by the issue $issueBM",
+ "tma_issueBW": "Metrics related by the issue $issueBW",
+ "tma_issueFB": "Metrics related by the issue $issueFB",
+ "tma_issueL1": "Metrics related by the issue $issueL1",
+ "tma_issueLat": "Metrics related by the issue $issueLat",
+ "tma_issueMC": "Metrics related by the issue $issueMC",
+ "tma_issueMS": "Metrics related by the issue $issueMS",
+ "tma_issueMV": "Metrics related by the issue $issueMV",
+ "tma_issueRFO": "Metrics related by the issue $issueRFO",
+ "tma_issueSL": "Metrics related by the issue $issueSL",
+ "tma_issueSO": "Metrics related by the issue $issueSO",
+ "tma_issueSmSt": "Metrics related by the issue $issueSmSt",
+ "tma_issueSpSt": "Metrics related by the issue $issueSpSt",
+ "tma_issueSyncxn": "Metrics related by the issue $issueSyncxn",
+ "tma_issueTLB": "Metrics related by the issue $issueTLB",
+ "tma_l1_bound_group": "Metrics contributing to tma_l1_bound category",
+ "tma_l3_bound_group": "Metrics contributing to tma_l3_bound category",
+ "tma_light_operations_group": "Metrics contributing to tma_light_operations category",
+ "tma_load_op_utilization_group": "Metrics contributing to tma_load_op_utilization category",
+ "tma_mem_latency_group": "Metrics contributing to tma_mem_latency category",
+ "tma_memory_bound_group": "Metrics contributing to tma_memory_bound category",
+ "tma_microcode_sequencer_group": "Metrics contributing to tma_microcode_sequencer category",
+ "tma_ports_utilization_group": "Metrics contributing to tma_ports_utilization category",
+ "tma_ports_utilized_0_group": "Metrics contributing to tma_ports_utilized_0 category",
+ "tma_ports_utilized_3m_group": "Metrics contributing to tma_ports_utilized_3m category",
+ "tma_retiring_group": "Metrics contributing to tma_retiring category",
+ "tma_serializing_operation_group": "Metrics contributing to tma_serializing_operation category",
+ "tma_store_bound_group": "Metrics contributing to tma_store_bound category",
+ "tma_store_op_utilization_group": "Metrics contributing to tma_store_op_utilization category"
+}
diff --git a/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json b/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json
index 66a6f657bd6f..35b1a3aa728d 100644
--- a/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json
@@ -50,7 +50,7 @@
},
{
"BriefDescription": "Uncore frequency per die [GHZ]",
- "MetricExpr": "tma_info_socket_clks / #num_dies / duration_time / 1e9",
+ "MetricExpr": "tma_info_system_socket_clks / #num_dies / duration_time / 1e9",
"MetricGroup": "SoC",
"MetricName": "UNCORE_FREQ"
},
@@ -82,7 +82,7 @@
},
{
"BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations",
- "MetricExpr": "(UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (INT_MISC.RECOVERY_CYCLES_ANY / 2 if #SMT_on else INT_MISC.RECOVERY_CYCLES)) / tma_info_slots",
+ "MetricExpr": "(UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (INT_MISC.RECOVERY_CYCLES_ANY / 2 if #SMT_on else INT_MISC.RECOVERY_CYCLES)) / tma_info_thread_slots",
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_bad_speculation",
"MetricThreshold": "tma_bad_speculation > 0.15",
@@ -98,12 +98,12 @@
"MetricName": "tma_branch_mispredicts",
"MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15",
"MetricgroupNoGroup": "TopdownL2",
- "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES. Related metrics: tma_info_branch_misprediction_cost, tma_mispredicts_resteers",
+ "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES. Related metrics: tma_info_bad_spec_branch_misprediction_cost, tma_mispredicts_resteers",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers",
- "MetricExpr": "12 * (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT + BACLEARS.ANY) / tma_info_clks",
+ "MetricExpr": "12 * (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT + BACLEARS.ANY) / tma_info_thread_clks",
"MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_branch_resteers",
"MetricThreshold": "tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -123,7 +123,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where the Divider unit was active",
- "MetricExpr": "ARITH.FPU_DIV_ACTIVE / tma_info_core_clks",
+ "MetricExpr": "ARITH.FPU_DIV_ACTIVE / tma_info_core_core_clks",
"MetricGroup": "TopdownL3;tma_L3_group;tma_core_bound_group",
"MetricName": "tma_divider",
"MetricThreshold": "tma_divider > 0.2 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
@@ -133,7 +133,7 @@
{
"BriefDescription": "This metric estimates how often the CPU was stalled on accesses to external memory (DRAM) by loads",
"MetricConstraint": "NO_GROUP_EVENTS_SMT",
- "MetricExpr": "(1 - MEM_LOAD_UOPS_RETIRED.LLC_HIT / (MEM_LOAD_UOPS_RETIRED.LLC_HIT + 7 * MEM_LOAD_UOPS_RETIRED.LLC_MISS)) * CYCLE_ACTIVITY.STALLS_L2_PENDING / tma_info_clks",
+ "MetricExpr": "(1 - MEM_LOAD_UOPS_RETIRED.LLC_HIT / (MEM_LOAD_UOPS_RETIRED.LLC_HIT + 7 * MEM_LOAD_UOPS_RETIRED.LLC_MISS)) * CYCLE_ACTIVITY.STALLS_L2_PENDING / tma_info_thread_clks",
"MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_dram_bound",
"MetricThreshold": "tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -142,16 +142,16 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines",
- "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / tma_info_clks",
+ "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / tma_info_thread_clks",
"MetricGroup": "DSBmiss;FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB",
"MetricName": "tma_dsb_switches",
"MetricThreshold": "tma_dsb_switches > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
- "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines. The DSB (decoded i-cache) is a Uop Cache where the front-end directly delivers Uops (micro operations) avoiding heavy x86 decoding. The DSB pipeline has shorter latency and delivered higher bandwidth than the MITE (legacy instruction decode pipeline). Switching between the two pipelines can cause penalties hence this metric measures the exposed penalty. Related metrics: tma_fetch_bandwidth, tma_info_dsb_coverage, tma_lcp",
+ "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines. The DSB (decoded i-cache) is a Uop Cache where the front-end directly delivers Uops (micro operations) avoiding heavy x86 decoding. The DSB pipeline has shorter latency and delivered higher bandwidth than the MITE (legacy instruction decode pipeline). Switching between the two pipelines can cause penalties hence this metric measures the exposed penalty. Related metrics: tma_fetch_bandwidth, tma_info_frontend_dsb_coverage, tma_lcp",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses",
- "MetricExpr": "(7 * DTLB_LOAD_MISSES.STLB_HIT + DTLB_LOAD_MISSES.WALK_DURATION) / tma_info_clks",
+ "MetricExpr": "(7 * DTLB_LOAD_MISSES.STLB_HIT + DTLB_LOAD_MISSES.WALK_DURATION) / tma_info_thread_clks",
"MetricGroup": "MemoryTLB;TopdownL4;tma_L4_group;tma_issueTLB;tma_l1_bound_group",
"MetricName": "tma_dtlb_load",
"MetricThreshold": "tma_dtlb_load > 0.1",
@@ -163,14 +163,14 @@
"MetricExpr": "tma_frontend_bound - tma_fetch_latency",
"MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB",
"MetricName": "tma_fetch_bandwidth",
- "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35",
+ "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35",
"MetricgroupNoGroup": "TopdownL2",
- "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Related metrics: tma_dsb_switches, tma_info_dsb_coverage, tma_lcp",
+ "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Related metrics: tma_dsb_switches, tma_info_frontend_dsb_coverage, tma_lcp",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues",
- "MetricExpr": "4 * min(CPU_CLK_UNHALTED.THREAD, IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE) / tma_info_slots",
+ "MetricExpr": "4 * min(CPU_CLK_UNHALTED.THREAD, IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE) / tma_info_thread_slots",
"MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group",
"MetricName": "tma_fetch_latency",
"MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15",
@@ -207,7 +207,7 @@
},
{
"BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend",
- "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / tma_info_slots",
+ "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / tma_info_thread_slots",
"MetricGroup": "PGO;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_frontend_bound",
"MetricThreshold": "tma_frontend_bound > 0.15",
@@ -226,169 +226,169 @@
"ScaleUnit": "100%"
},
{
- "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
- "MetricExpr": "tma_info_turbo_utilization * TSC / 1e9 / duration_time",
- "MetricGroup": "Power;Summary",
- "MetricName": "tma_info_average_frequency"
- },
- {
- "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
- "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
- "MetricGroup": "Pipeline",
- "MetricName": "tma_info_clks"
- },
- {
"BriefDescription": "Core actual clocks when any Logical Processor is active on the Physical Core",
- "MetricExpr": "(CPU_CLK_UNHALTED.THREAD / 2 * (1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK) if #core_wide < 1 else (CPU_CLK_UNHALTED.THREAD_ANY / 2 if #SMT_on else tma_info_clks))",
+ "MetricExpr": "(CPU_CLK_UNHALTED.THREAD / 2 * (1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK) if #core_wide < 1 else (CPU_CLK_UNHALTED.THREAD_ANY / 2 if #SMT_on else tma_info_thread_clks))",
"MetricGroup": "SMT",
- "MetricName": "tma_info_core_clks"
+ "MetricName": "tma_info_core_core_clks"
},
{
"BriefDescription": "Instructions Per Cycle across hyper-threads (per physical core)",
- "MetricExpr": "INST_RETIRED.ANY / tma_info_core_clks",
+ "MetricExpr": "INST_RETIRED.ANY / tma_info_core_core_clks",
"MetricGroup": "Ret;SMT;TmaL1;tma_L1_group",
- "MetricName": "tma_info_coreipc"
- },
- {
- "BriefDescription": "Cycles Per Instruction (per Logical Processor)",
- "MetricExpr": "1 / tma_info_ipc",
- "MetricGroup": "Mem;Pipeline",
- "MetricName": "tma_info_cpi"
+ "MetricName": "tma_info_core_coreipc"
},
{
- "BriefDescription": "Average CPU Utilization",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
- "MetricGroup": "HPC;Summary",
- "MetricName": "tma_info_cpu_utilization"
+ "BriefDescription": "Floating Point Operations Per Cycle",
+ "MetricExpr": "(FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * (FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE) + 8 * SIMD_FP_256.PACKED_SINGLE) / tma_info_core_core_clks",
+ "MetricGroup": "Flops;Ret",
+ "MetricName": "tma_info_core_flopc"
},
{
- "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
- "MetricExpr": "64 * (UNC_M_CAS_COUNT.RD + UNC_M_CAS_COUNT.WR) / 1e9 / duration_time",
- "MetricGroup": "HPC;Mem;MemoryBW;SoC;tma_issueBW",
- "MetricName": "tma_info_dram_bw_use",
- "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_mem_bandwidth"
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
+ "MetricExpr": "UOPS_DISPATCHED.THREAD / (cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@ / 2 if #SMT_on else cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@)",
+ "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
+ "MetricName": "tma_info_core_ilp"
},
{
"BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
"MetricExpr": "IDQ.DSB_UOPS / (IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)",
"MetricGroup": "DSB;Fed;FetchBW;tma_issueFB",
- "MetricName": "tma_info_dsb_coverage",
- "MetricThreshold": "tma_info_dsb_coverage < 0.7 & tma_info_ipc / 4 > 0.35",
+ "MetricName": "tma_info_frontend_dsb_coverage",
+ "MetricThreshold": "tma_info_frontend_dsb_coverage < 0.7 & tma_info_thread_ipc / 4 > 0.35",
"PublicDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache). Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_lcp"
},
{
- "BriefDescription": "The ratio of Executed- by Issued-Uops",
- "MetricExpr": "UOPS_DISPATCHED.THREAD / UOPS_ISSUED.ANY",
- "MetricGroup": "Cor;Pipeline",
- "MetricName": "tma_info_execute_per_issue",
- "PublicDescription": "The ratio of Executed- by Issued-Uops. Ratio > 1 suggests high rate of uop micro-fusions. Ratio < 1 suggest high rate of \"execute\" at rename stage."
+ "BriefDescription": "Total number of retired Instructions",
+ "MetricExpr": "INST_RETIRED.ANY",
+ "MetricGroup": "Summary;TmaL1;tma_L1_group",
+ "MetricName": "tma_info_inst_mix_instructions",
+ "PublicDescription": "Total number of retired Instructions. Sample with: INST_RETIRED.PREC_DIST"
},
{
- "BriefDescription": "Floating Point Operations Per Cycle",
- "MetricExpr": "(FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * (FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE) + 8 * SIMD_FP_256.PACKED_SINGLE) / tma_info_core_clks",
- "MetricGroup": "Flops;Ret",
- "MetricName": "tma_info_flopc"
+ "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / cpu@UOPS_RETIRED.RETIRE_SLOTS\\,cmask\\=1@",
+ "MetricGroup": "Pipeline;Ret",
+ "MetricName": "tma_info_pipeline_retire"
},
{
- "BriefDescription": "Giga Floating Point Operations Per Second",
- "MetricExpr": "(FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * (FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE) + 8 * SIMD_FP_256.PACKED_SINGLE) / 1e9 / duration_time",
- "MetricGroup": "Cor;Flops;HPC",
- "MetricName": "tma_info_gflops",
- "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width and AMX engine."
+ "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
+ "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / duration_time",
+ "MetricGroup": "Power;Summary",
+ "MetricName": "tma_info_system_average_frequency"
},
{
- "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
- "MetricExpr": "UOPS_DISPATCHED.THREAD / (cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@ / 2 if #SMT_on else cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@)",
- "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
- "MetricName": "tma_info_ilp"
+ "BriefDescription": "Average CPU Utilization",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricGroup": "HPC;Summary",
+ "MetricName": "tma_info_system_cpu_utilization"
},
{
- "BriefDescription": "Total number of retired Instructions",
- "MetricExpr": "INST_RETIRED.ANY",
- "MetricGroup": "Summary;TmaL1;tma_L1_group",
- "MetricName": "tma_info_instructions",
- "PublicDescription": "Total number of retired Instructions. Sample with: INST_RETIRED.PREC_DIST"
+ "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
+ "MetricExpr": "64 * (UNC_M_CAS_COUNT.RD + UNC_M_CAS_COUNT.WR) / 1e9 / duration_time",
+ "MetricGroup": "HPC;Mem;MemoryBW;SoC;tma_issueBW",
+ "MetricName": "tma_info_system_dram_bw_use",
+ "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_mem_bandwidth"
},
{
- "BriefDescription": "Instructions Per Cycle (per Logical Processor)",
- "MetricExpr": "INST_RETIRED.ANY / tma_info_clks",
- "MetricGroup": "Ret;Summary",
- "MetricName": "tma_info_ipc"
+ "BriefDescription": "Giga Floating Point Operations Per Second",
+ "MetricExpr": "(FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * (FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE) + 8 * SIMD_FP_256.PACKED_SINGLE) / 1e9 / duration_time",
+ "MetricGroup": "Cor;Flops;HPC",
+ "MetricName": "tma_info_system_gflops",
+ "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width and AMX engine."
},
{
"BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u",
"MetricGroup": "Branches;OS",
- "MetricName": "tma_info_ipfarbranch",
- "MetricThreshold": "tma_info_ipfarbranch < 1e6"
+ "MetricName": "tma_info_system_ipfarbranch",
+ "MetricThreshold": "tma_info_system_ipfarbranch < 1e6"
},
{
"BriefDescription": "Cycles Per Instruction for the Operating System (OS) Kernel mode",
"MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / INST_RETIRED.ANY_P:k",
"MetricGroup": "OS",
- "MetricName": "tma_info_kernel_cpi"
+ "MetricName": "tma_info_system_kernel_cpi"
},
{
"BriefDescription": "Fraction of cycles spent in the Operating System (OS) Kernel mode",
"MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / CPU_CLK_UNHALTED.THREAD",
"MetricGroup": "OS",
- "MetricName": "tma_info_kernel_utilization",
- "MetricThreshold": "tma_info_kernel_utilization > 0.05"
+ "MetricName": "tma_info_system_kernel_utilization",
+ "MetricThreshold": "tma_info_system_kernel_utilization > 0.05"
},
{
"BriefDescription": "Average number of parallel data read requests to external memory",
"MetricExpr": "UNC_C_TOR_OCCUPANCY.MISS_OPCODE@filter_opc\\=0x182@ / UNC_C_TOR_OCCUPANCY.MISS_OPCODE@filter_opc\\=0x182\\,thresh\\=1@",
"MetricGroup": "Mem;MemoryBW;SoC",
- "MetricName": "tma_info_mem_parallel_reads",
+ "MetricName": "tma_info_system_mem_parallel_reads",
"PublicDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches"
},
{
"BriefDescription": "Average latency of data read request to external memory (in nanoseconds)",
- "MetricExpr": "1e9 * (UNC_C_TOR_OCCUPANCY.MISS_OPCODE@filter_opc\\=0x182@ / UNC_C_TOR_INSERTS.MISS_OPCODE@filter_opc\\=0x182@) / (tma_info_socket_clks / duration_time)",
+ "MetricExpr": "1e9 * (UNC_C_TOR_OCCUPANCY.MISS_OPCODE@filter_opc\\=0x182@ / UNC_C_TOR_INSERTS.MISS_OPCODE@filter_opc\\=0x182@) / (tma_info_system_socket_clks / duration_time)",
"MetricGroup": "Mem;MemoryLat;SoC",
- "MetricName": "tma_info_mem_read_latency",
+ "MetricName": "tma_info_system_mem_read_latency",
"PublicDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches. ([RKL+]memory-controller only)"
},
{
- "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
- "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / cpu@UOPS_RETIRED.RETIRE_SLOTS\\,cmask\\=1@",
- "MetricGroup": "Pipeline;Ret",
- "MetricName": "tma_info_retire"
- },
- {
- "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
- "MetricExpr": "4 * tma_info_core_clks",
- "MetricGroup": "TmaL1;tma_L1_group",
- "MetricName": "tma_info_slots"
- },
- {
"BriefDescription": "Fraction of cycles where both hardware Logical Processors were active",
"MetricExpr": "(1 - CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / (CPU_CLK_UNHALTED.REF_XCLK_ANY / 2) if #SMT_on else 0)",
"MetricGroup": "SMT",
- "MetricName": "tma_info_smt_2t_utilization"
+ "MetricName": "tma_info_system_smt_2t_utilization"
},
{
"BriefDescription": "Socket actual clocks when any core is active on that socket",
"MetricExpr": "cbox_0@event\\=0x0@",
"MetricGroup": "SoC",
- "MetricName": "tma_info_socket_clks"
+ "MetricName": "tma_info_system_socket_clks"
},
{
"BriefDescription": "Average Frequency Utilization relative nominal frequency",
- "MetricExpr": "tma_info_clks / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricExpr": "tma_info_thread_clks / CPU_CLK_UNHALTED.REF_TSC",
"MetricGroup": "Power",
- "MetricName": "tma_info_turbo_utilization"
+ "MetricName": "tma_info_system_turbo_utilization"
+ },
+ {
+ "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "Pipeline",
+ "MetricName": "tma_info_thread_clks"
+ },
+ {
+ "BriefDescription": "Cycles Per Instruction (per Logical Processor)",
+ "MetricExpr": "1 / tma_info_thread_ipc",
+ "MetricGroup": "Mem;Pipeline",
+ "MetricName": "tma_info_thread_cpi"
+ },
+ {
+ "BriefDescription": "The ratio of Executed- by Issued-Uops",
+ "MetricExpr": "UOPS_DISPATCHED.THREAD / UOPS_ISSUED.ANY",
+ "MetricGroup": "Cor;Pipeline",
+ "MetricName": "tma_info_thread_execute_per_issue",
+ "PublicDescription": "The ratio of Executed- by Issued-Uops. Ratio > 1 suggests high rate of uop micro-fusions. Ratio < 1 suggest high rate of \"execute\" at rename stage."
+ },
+ {
+ "BriefDescription": "Instructions Per Cycle (per Logical Processor)",
+ "MetricExpr": "INST_RETIRED.ANY / tma_info_thread_clks",
+ "MetricGroup": "Ret;Summary",
+ "MetricName": "tma_info_thread_ipc"
+ },
+ {
+ "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
+ "MetricExpr": "4 * tma_info_core_core_clks",
+ "MetricGroup": "TmaL1;tma_L1_group",
+ "MetricName": "tma_info_thread_slots"
},
{
"BriefDescription": "Uops Per Instruction",
"MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
"MetricGroup": "Pipeline;Ret;Retire",
- "MetricName": "tma_info_uoppi",
- "MetricThreshold": "tma_info_uoppi > 1.05"
+ "MetricName": "tma_info_thread_uoppi",
+ "MetricThreshold": "tma_info_thread_uoppi > 1.05"
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses",
- "MetricExpr": "(12 * ITLB_MISSES.STLB_HIT + ITLB_MISSES.WALK_DURATION) / tma_info_clks",
+ "MetricExpr": "(12 * ITLB_MISSES.STLB_HIT + ITLB_MISSES.WALK_DURATION) / tma_info_thread_clks",
"MetricGroup": "BigFoot;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_itlb_misses",
"MetricThreshold": "tma_itlb_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -398,7 +398,7 @@
{
"BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
"MetricConstraint": "NO_GROUP_EVENTS_SMT",
- "MetricExpr": "MEM_LOAD_UOPS_RETIRED.LLC_HIT / (MEM_LOAD_UOPS_RETIRED.LLC_HIT + 7 * MEM_LOAD_UOPS_RETIRED.LLC_MISS) * CYCLE_ACTIVITY.STALLS_L2_PENDING / tma_info_clks",
+ "MetricExpr": "MEM_LOAD_UOPS_RETIRED.LLC_HIT / (MEM_LOAD_UOPS_RETIRED.LLC_HIT + 7 * MEM_LOAD_UOPS_RETIRED.LLC_MISS) * CYCLE_ACTIVITY.STALLS_L2_PENDING / tma_info_thread_clks",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l3_bound",
"MetricThreshold": "tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -407,11 +407,11 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs)",
- "MetricExpr": "ILD_STALL.LCP / tma_info_clks",
+ "MetricExpr": "ILD_STALL.LCP / tma_info_thread_clks",
"MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB",
"MetricName": "tma_lcp",
"MetricThreshold": "tma_lcp > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
- "PublicDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs). Using proper compiler flags or Intel Compiler by default will certainly avoid this. #Link: Optimization Guide about LCP BKMs. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_dsb_coverage",
+ "PublicDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs). Using proper compiler flags or Intel Compiler by default will certainly avoid this. #Link: Optimization Guide about LCP BKMs. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_frontend_dsb_coverage",
"ScaleUnit": "100%"
},
{
@@ -437,16 +437,16 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM)",
- "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=6@) / tma_info_clks",
+ "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=6@) / tma_info_thread_clks",
"MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
"MetricName": "tma_mem_bandwidth",
"MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_info_dram_bw_use",
+ "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_info_system_dram_bw_use",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM)",
- "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD) / tma_info_clks - tma_mem_bandwidth",
+ "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD) / tma_info_thread_clks - tma_mem_bandwidth",
"MetricGroup": "MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
"MetricName": "tma_mem_latency",
"MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -456,7 +456,7 @@
{
"BriefDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.STALLS_L1D_PENDING) + RESOURCE_STALLS.SB) / (min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_DISPATCH) + cpu@UOPS_DISPATCHED.THREAD\\,cmask\\=1@ - (cpu@UOPS_DISPATCHED.THREAD\\,cmask\\=3@ if tma_info_ipc > 1.8 else cpu@UOPS_DISPATCHED.THREAD\\,cmask\\=2@) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB) * tma_backend_bound",
+ "MetricExpr": "(min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.STALLS_L1D_PENDING) + RESOURCE_STALLS.SB) / (min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_DISPATCH) + cpu@UOPS_DISPATCHED.THREAD\\,cmask\\=1@ - (cpu@UOPS_DISPATCHED.THREAD\\,cmask\\=3@ if tma_info_thread_ipc > 1.8 else cpu@UOPS_DISPATCHED.THREAD\\,cmask\\=2@) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB) * tma_backend_bound",
"MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_memory_bound",
"MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2",
@@ -466,7 +466,7 @@
},
{
"BriefDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit",
- "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * IDQ.MS_UOPS / tma_info_slots",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * IDQ.MS_UOPS / tma_info_thread_slots",
"MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_heavy_operations_group;tma_issueMC;tma_issueMS",
"MetricName": "tma_microcode_sequencer",
"MetricThreshold": "tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1",
@@ -475,7 +475,7 @@
},
{
"BriefDescription": "This metric estimates the fraction of cycles when the CPU was stalled due to switches of uop delivery to the Microcode Sequencer (MS)",
- "MetricExpr": "3 * IDQ.MS_SWITCHES / tma_info_clks",
+ "MetricExpr": "3 * IDQ.MS_SWITCHES / tma_info_thread_clks",
"MetricGroup": "FetchLat;MicroSeq;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueMC;tma_issueMS;tma_issueMV;tma_issueSO",
"MetricName": "tma_ms_switches",
"MetricThreshold": "tma_ms_switches > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -485,7 +485,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles the CPU performance was potentially limited due to Core computation issues (non divider-related)",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_DISPATCH) + cpu@UOPS_DISPATCHED.THREAD\\,cmask\\=1@ - (cpu@UOPS_DISPATCHED.THREAD\\,cmask\\=3@ if tma_info_ipc > 1.8 else cpu@UOPS_DISPATCHED.THREAD\\,cmask\\=2@) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB - RESOURCE_STALLS.SB - min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.STALLS_L1D_PENDING)) / tma_info_clks",
+ "MetricExpr": "(min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_DISPATCH) + cpu@UOPS_DISPATCHED.THREAD\\,cmask\\=1@ - (cpu@UOPS_DISPATCHED.THREAD\\,cmask\\=3@ if tma_info_thread_ipc > 1.8 else cpu@UOPS_DISPATCHED.THREAD\\,cmask\\=2@) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB - RESOURCE_STALLS.SB - min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.STALLS_L1D_PENDING)) / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group",
"MetricName": "tma_ports_utilization",
"MetricThreshold": "tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
@@ -494,7 +494,7 @@
},
{
"BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
- "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / tma_info_slots",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / tma_info_thread_slots",
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1",
@@ -504,7 +504,7 @@
},
{
"BriefDescription": "This metric estimates how often CPU was stalled due to RFO store memory accesses; RFO store issue a read-for-ownership request before the write",
- "MetricExpr": "RESOURCE_STALLS.SB / tma_info_clks",
+ "MetricExpr": "RESOURCE_STALLS.SB / tma_info_thread_clks",
"MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_store_bound",
"MetricThreshold": "tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
diff --git a/tools/perf/pmu-events/arch/x86/jaketown/metricgroups.json b/tools/perf/pmu-events/arch/x86/jaketown/metricgroups.json
new file mode 100644
index 000000000000..bebb85945d62
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/jaketown/metricgroups.json
@@ -0,0 +1,100 @@
+{
+ "Backend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Bad": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BadSpec": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BigFoot": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BrMispredicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Branches": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "CacheMisses": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Compute": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Cor": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "DSB": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "DSBmiss": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Fed": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FetchBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FetchLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Flops": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FpScalar": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FpVector": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Frontend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "HPC": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "IcMiss": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "InsType": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "L2Evicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "LSD": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MachineClears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryBound": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryTLB": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MicroSeq": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "OS": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Offcore": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "PGO": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Pipeline": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "PortsUtil": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Power": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Ret": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Retire": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "SMT": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Server": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Snoop": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "SoC": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Summary": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TmaL1": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TmaL2": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TmaL3mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TopdownL1": "Metrics for top-down breakdown at level 1",
+ "TopdownL2": "Metrics for top-down breakdown at level 2",
+ "TopdownL3": "Metrics for top-down breakdown at level 3",
+ "TopdownL4": "Metrics for top-down breakdown at level 4",
+ "TopdownL5": "Metrics for top-down breakdown at level 5",
+ "TopdownL6": "Metrics for top-down breakdown at level 6",
+ "tma_L1_group": "Metrics for top-down breakdown at level 1",
+ "tma_L2_group": "Metrics for top-down breakdown at level 2",
+ "tma_L3_group": "Metrics for top-down breakdown at level 3",
+ "tma_L4_group": "Metrics for top-down breakdown at level 4",
+ "tma_L5_group": "Metrics for top-down breakdown at level 5",
+ "tma_L6_group": "Metrics for top-down breakdown at level 6",
+ "tma_alu_op_utilization_group": "Metrics contributing to tma_alu_op_utilization category",
+ "tma_backend_bound_group": "Metrics contributing to tma_backend_bound category",
+ "tma_bad_speculation_group": "Metrics contributing to tma_bad_speculation category",
+ "tma_branch_resteers_group": "Metrics contributing to tma_branch_resteers category",
+ "tma_core_bound_group": "Metrics contributing to tma_core_bound category",
+ "tma_dram_bound_group": "Metrics contributing to tma_dram_bound category",
+ "tma_dtlb_load_group": "Metrics contributing to tma_dtlb_load category",
+ "tma_dtlb_store_group": "Metrics contributing to tma_dtlb_store category",
+ "tma_fetch_bandwidth_group": "Metrics contributing to tma_fetch_bandwidth category",
+ "tma_fetch_latency_group": "Metrics contributing to tma_fetch_latency category",
+ "tma_fp_arith_group": "Metrics contributing to tma_fp_arith category",
+ "tma_fp_vector_group": "Metrics contributing to tma_fp_vector category",
+ "tma_frontend_bound_group": "Metrics contributing to tma_frontend_bound category",
+ "tma_heavy_operations_group": "Metrics contributing to tma_heavy_operations category",
+ "tma_issue2P": "Metrics related by the issue $issue2P",
+ "tma_issueBM": "Metrics related by the issue $issueBM",
+ "tma_issueBW": "Metrics related by the issue $issueBW",
+ "tma_issueFB": "Metrics related by the issue $issueFB",
+ "tma_issueL1": "Metrics related by the issue $issueL1",
+ "tma_issueLat": "Metrics related by the issue $issueLat",
+ "tma_issueMC": "Metrics related by the issue $issueMC",
+ "tma_issueMS": "Metrics related by the issue $issueMS",
+ "tma_issueMV": "Metrics related by the issue $issueMV",
+ "tma_issueRFO": "Metrics related by the issue $issueRFO",
+ "tma_issueSL": "Metrics related by the issue $issueSL",
+ "tma_issueSO": "Metrics related by the issue $issueSO",
+ "tma_issueSmSt": "Metrics related by the issue $issueSmSt",
+ "tma_issueSyncxn": "Metrics related by the issue $issueSyncxn",
+ "tma_issueTLB": "Metrics related by the issue $issueTLB",
+ "tma_l1_bound_group": "Metrics contributing to tma_l1_bound category",
+ "tma_light_operations_group": "Metrics contributing to tma_light_operations category",
+ "tma_mem_latency_group": "Metrics contributing to tma_mem_latency category",
+ "tma_memory_bound_group": "Metrics contributing to tma_memory_bound category",
+ "tma_microcode_sequencer_group": "Metrics contributing to tma_microcode_sequencer category",
+ "tma_ports_utilization_group": "Metrics contributing to tma_ports_utilization category",
+ "tma_ports_utilized_0_group": "Metrics contributing to tma_ports_utilized_0 category",
+ "tma_retiring_group": "Metrics contributing to tma_retiring category",
+ "tma_serializing_operation_group": "Metrics contributing to tma_serializing_operation category",
+ "tma_store_bound_group": "Metrics contributing to tma_store_bound category",
+ "tma_store_op_utilization_group": "Metrics contributing to tma_store_op_utilization category"
+}
diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv
index 66c37a3cbf43..6650100830c4 100644
--- a/tools/perf/pmu-events/arch/x86/mapfile.csv
+++ b/tools/perf/pmu-events/arch/x86/mapfile.csv
@@ -1,35 +1,36 @@
Family-model,Version,Filename,EventType
-GenuineIntel-6-(97|9A|B7|BA|BF),v1.20,alderlake,core
-GenuineIntel-6-BE,v1.20,alderlaken,core
+GenuineIntel-6-(97|9A|B7|BA|BF),v1.21,alderlake,core
+GenuineIntel-6-BE,v1.21,alderlaken,core
GenuineIntel-6-(1C|26|27|35|36),v4,bonnell,core
-GenuineIntel-6-(3D|47),v27,broadwell,core
-GenuineIntel-6-56,v9,broadwellde,core
-GenuineIntel-6-4F,v20,broadwellx,core
-GenuineIntel-6-55-[56789ABCDEF],v1.17,cascadelakex,core
-GenuineIntel-6-9[6C],v1.03,elkhartlake,core
+GenuineIntel-6-(3D|47),v28,broadwell,core
+GenuineIntel-6-56,v10,broadwellde,core
+GenuineIntel-6-4F,v21,broadwellx,core
+GenuineIntel-6-55-[56789ABCDEF],v1.19,cascadelakex,core
+GenuineIntel-6-9[6C],v1.04,elkhartlake,core
GenuineIntel-6-5[CF],v13,goldmont,core
GenuineIntel-6-7A,v1.01,goldmontplus,core
GenuineIntel-6-B6,v1.00,grandridge,core
GenuineIntel-6-A[DE],v1.01,graniterapids,core
GenuineIntel-6-(3C|45|46),v33,haswell,core
GenuineIntel-6-3F,v27,haswellx,core
-GenuineIntel-6-(7D|7E|A7),v1.17,icelake,core
-GenuineIntel-6-6[AC],v1.20,icelakex,core
+GenuineIntel-6-7[DE],v1.19,icelake,core
+GenuineIntel-6-6[AC],v1.21,icelakex,core
GenuineIntel-6-3A,v24,ivybridge,core
GenuineIntel-6-3E,v23,ivytown,core
GenuineIntel-6-2D,v23,jaketown,core
GenuineIntel-6-(57|85),v10,knightslanding,core
-GenuineIntel-6-A[AC],v1.01,meteorlake,core
+GenuineIntel-6-A[AC],v1.03,meteorlake,core
GenuineIntel-6-1[AEF],v3,nehalemep,core
GenuineIntel-6-2E,v3,nehalemex,core
+GenuineIntel-6-A7,v1.01,rocketlake,core
GenuineIntel-6-2A,v19,sandybridge,core
-GenuineIntel-6-(8F|CF),v1.12,sapphirerapids,core
+GenuineIntel-6-(8F|CF),v1.14,sapphirerapids,core
GenuineIntel-6-AF,v1.00,sierraforest,core
GenuineIntel-6-(37|4A|4C|4D|5A),v15,silvermont,core
-GenuineIntel-6-(4E|5E|8E|9E|A5|A6),v55,skylake,core
-GenuineIntel-6-55-[01234],v1.29,skylakex,core
-GenuineIntel-6-86,v1.20,snowridgex,core
-GenuineIntel-6-8[CD],v1.10,tigerlake,core
+GenuineIntel-6-(4E|5E|8E|9E|A5|A6),v57,skylake,core
+GenuineIntel-6-55-[01234],v1.31,skylakex,core
+GenuineIntel-6-86,v1.21,snowridgex,core
+GenuineIntel-6-8[CD],v1.13,tigerlake,core
GenuineIntel-6-2C,v4,westmereep-dp,core
GenuineIntel-6-25,v3,westmereep-sp,core
GenuineIntel-6-2F,v3,westmereex,core
diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/cache.json b/tools/perf/pmu-events/arch/x86/meteorlake/cache.json
index bf24d3f25a3d..e1ae7c92f38e 100644
--- a/tools/perf/pmu-events/arch/x86/meteorlake/cache.json
+++ b/tools/perf/pmu-events/arch/x86/meteorlake/cache.json
@@ -1,5 +1,115 @@
[
{
+ "BriefDescription": "L1D.HWPF_MISS",
+ "EventCode": "0x51",
+ "EventName": "L1D.HWPF_MISS",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x20",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of cache lines replaced in L1 data cache.",
+ "EventCode": "0x51",
+ "EventName": "L1D.REPLACEMENT",
+ "PublicDescription": "Counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Number of cycles a demand request has waited due to L1D Fill Buffer (FB) unavailability.",
+ "EventCode": "0x48",
+ "EventName": "L1D_PEND_MISS.FB_FULL",
+ "PublicDescription": "Counts number of cycles a demand request has waited due to L1D Fill Buffer (FB) unavailability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Number of phases a demand request has waited due to L1D Fill Buffer (FB) unavailability.",
+ "CounterMask": "1",
+ "EdgeDetect": "1",
+ "EventCode": "0x48",
+ "EventName": "L1D_PEND_MISS.FB_FULL_PERIODS",
+ "PublicDescription": "Counts number of phases a demand request has waited due to L1D Fill Buffer (FB) unavailability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Number of L1D misses that are outstanding",
+ "EventCode": "0x48",
+ "EventName": "L1D_PEND_MISS.PENDING",
+ "PublicDescription": "Counts number of L1D misses that are outstanding in each cycle, that is each cycle the number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch. Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles with L1D load Misses outstanding.",
+ "CounterMask": "1",
+ "EventCode": "0x48",
+ "EventName": "L1D_PEND_MISS.PENDING_CYCLES",
+ "PublicDescription": "Counts duration of L1D miss outstanding in cycles.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "L2 cache lines filling L2",
+ "EventCode": "0x25",
+ "EventName": "L2_LINES_IN.ALL",
+ "PublicDescription": "Counts the number of L2 cache lines filling the L2. Counting does not cover rejects.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1f",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Modified cache lines that are evicted by L2 cache when triggered by an L2 cache fill.",
+ "EventCode": "0x26",
+ "EventName": "L2_LINES_OUT.NON_SILENT",
+ "PublicDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines are in Modified state. Modified lines are written back to L3",
+ "SampleAfterValue": "200003",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Non-modified cache lines that are silently dropped by L2 cache when triggered by an L2 cache fill.",
+ "EventCode": "0x26",
+ "EventName": "L2_LINES_OUT.SILENT",
+ "PublicDescription": "Counts the number of lines that are silently dropped by L2 cache when triggered by an L2 cache fill. These lines are typically in Shared or Exclusive state. A non-threaded event.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "All accesses to L2 cache [This event is alias to L2_RQSTS.REFERENCES]",
+ "EventCode": "0x24",
+ "EventName": "L2_REQUEST.ALL",
+ "PublicDescription": "Counts all requests that were hit or true misses in L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. [This event is alias to L2_RQSTS.REFERENCES]",
+ "SampleAfterValue": "200003",
+ "UMask": "0xff",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "All requests that hit L2 cache. [This event is alias to L2_RQSTS.HIT]",
+ "EventCode": "0x24",
+ "EventName": "L2_REQUEST.HIT",
+ "PublicDescription": "Counts all requests that hit L2 cache. [This event is alias to L2_RQSTS.HIT]",
+ "SampleAfterValue": "200003",
+ "UMask": "0xdf",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Read requests with true-miss in L2 cache [This event is alias to L2_RQSTS.MISS]",
+ "EventCode": "0x24",
+ "EventName": "L2_REQUEST.MISS",
+ "PublicDescription": "Counts read requests of any type with true-miss in the L2 cache. True-miss excludes L2 misses that were merged with ongoing L2 misses. [This event is alias to L2_RQSTS.MISS]",
+ "SampleAfterValue": "200003",
+ "UMask": "0x3f",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "L2 code requests",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_CODE_RD",
@@ -18,6 +128,139 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Demand requests that miss L2 cache",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.ALL_DEMAND_MISS",
+ "PublicDescription": "Counts demand requests that miss L2 cache.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x27",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Demand requests to L2 cache",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES",
+ "PublicDescription": "Counts demand requests to L2 cache.",
+ "SampleAfterValue": "200003",
+ "UMask": "0xe7",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "L2_RQSTS.ALL_HWPF",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.ALL_HWPF",
+ "SampleAfterValue": "200003",
+ "UMask": "0xf0",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "RFO requests to L2 cache",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.ALL_RFO",
+ "PublicDescription": "Counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches.",
+ "SampleAfterValue": "200003",
+ "UMask": "0xe2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "L2 cache hits when fetching instructions, code reads.",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.CODE_RD_HIT",
+ "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads.",
+ "SampleAfterValue": "200003",
+ "UMask": "0xc4",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "L2 cache misses when fetching instructions",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.CODE_RD_MISS",
+ "PublicDescription": "Counts L2 cache misses when fetching instructions.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x24",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Demand Data Read requests that hit L2 cache",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
+ "PublicDescription": "Counts the number of demand Data Read requests initiated by load instructions that hit L2 cache.",
+ "SampleAfterValue": "200003",
+ "UMask": "0xc1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Demand Data Read miss L2 cache",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS",
+ "PublicDescription": "Counts demand Data Read requests with true-miss in the L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. An access is counted once.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x21",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "All requests that hit L2 cache. [This event is alias to L2_REQUEST.HIT]",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.HIT",
+ "PublicDescription": "Counts all requests that hit L2 cache. [This event is alias to L2_REQUEST.HIT]",
+ "SampleAfterValue": "200003",
+ "UMask": "0xdf",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "L2_RQSTS.HWPF_MISS",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.HWPF_MISS",
+ "SampleAfterValue": "200003",
+ "UMask": "0x30",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Read requests with true-miss in L2 cache [This event is alias to L2_REQUEST.MISS]",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.MISS",
+ "PublicDescription": "Counts read requests of any type with true-miss in the L2 cache. True-miss excludes L2 misses that were merged with ongoing L2 misses. [This event is alias to L2_REQUEST.MISS]",
+ "SampleAfterValue": "200003",
+ "UMask": "0x3f",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "All accesses to L2 cache [This event is alias to L2_REQUEST.ALL]",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.REFERENCES",
+ "PublicDescription": "Counts all requests that were hit or true misses in L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. [This event is alias to L2_REQUEST.ALL]",
+ "SampleAfterValue": "200003",
+ "UMask": "0xff",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "RFO requests that hit L2 cache",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.RFO_HIT",
+ "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache.",
+ "SampleAfterValue": "200003",
+ "UMask": "0xc2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "RFO requests that miss L2 cache",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.RFO_MISS",
+ "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that miss L2 cache.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x22",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "L2 writebacks that access L2 cache",
+ "EventCode": "0x23",
+ "EventName": "L2_TRANS.L2_WB",
+ "PublicDescription": "Counts L2 writebacks that access L2 cache.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x40",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts the number of cacheable memory requests that miss in the LLC. Counts on a per core basis.",
"EventCode": "0x2e",
"EventName": "LONGEST_LAT_CACHE.MISS",
@@ -54,6 +297,72 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of unhalted cycles when the core is stalled due to an instruction cache or TLB miss.",
+ "EventCode": "0x35",
+ "EventName": "MEM_BOUND_STALLS_IFETCH.ALL",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x6f",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in the L2 cache.",
+ "EventCode": "0x35",
+ "EventName": "MEM_BOUND_STALLS_IFETCH.L2_HIT",
+ "PublicDescription": "Counts the number of cycles the core is stalled due to an instruction cache or Translation Lookaside Buffer (TLB) miss which hit in the L2 cache.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of unhalted cycles when the core is stalled due to an icache or itlb miss which hit in the LLC.",
+ "EventCode": "0x35",
+ "EventName": "MEM_BOUND_STALLS_IFETCH.LLC_HIT",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x6",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of unhalted cycles when the core is stalled due to an icache or itlb miss which missed all the caches.",
+ "EventCode": "0x35",
+ "EventName": "MEM_BOUND_STALLS_IFETCH.LLC_MISS",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x68",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of unhalted cycles when the core is stalled due to an L1 demand load miss.",
+ "EventCode": "0x34",
+ "EventName": "MEM_BOUND_STALLS_LOAD.ALL",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x6f",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load which hit in the L2 cache.",
+ "EventCode": "0x34",
+ "EventName": "MEM_BOUND_STALLS_LOAD.L2_HIT",
+ "PublicDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the L2 cache.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of unhalted cycles when the core is stalled due to a demand load miss which hit in the LLC.",
+ "EventCode": "0x34",
+ "EventName": "MEM_BOUND_STALLS_LOAD.LLC_HIT",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x6",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of unhalted cycles when the core is stalled due to a demand load miss which missed all the local caches.",
+ "EventCode": "0x34",
+ "EventName": "MEM_BOUND_STALLS_LOAD.LLC_MISS",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x68",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Retired load instructions.",
"Data_LA": "1",
"EventCode": "0xd0",
@@ -76,6 +385,352 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "All retired memory instructions.",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.ANY",
+ "PEBS": "1",
+ "PublicDescription": "Counts all retired memory instructions - loads and stores.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x83",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired load instructions with locked access.",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.LOCK_LOADS",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired load instructions with locked access.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x21",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired load instructions that split across a cacheline boundary.",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.SPLIT_LOADS",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired load instructions that split across a cacheline boundary.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x41",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired store instructions that split across a cacheline boundary.",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.SPLIT_STORES",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired store instructions that split across a cacheline boundary.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x42",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired load instructions that hit the STLB.",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.STLB_HIT_LOADS",
+ "PEBS": "1",
+ "PublicDescription": "Number of retired load instructions with a clean hit in the 2nd-level TLB (STLB).",
+ "SampleAfterValue": "100003",
+ "UMask": "0x9",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired store instructions that hit the STLB.",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.STLB_HIT_STORES",
+ "PEBS": "1",
+ "PublicDescription": "Number of retired store instructions that hit in the 2nd-level TLB (STLB).",
+ "SampleAfterValue": "100003",
+ "UMask": "0xa",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired load instructions that miss the STLB.",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.STLB_MISS_LOADS",
+ "PEBS": "1",
+ "PublicDescription": "Number of retired load instructions that (start a) miss in the 2nd-level TLB (STLB).",
+ "SampleAfterValue": "100003",
+ "UMask": "0x11",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired store instructions that miss the STLB.",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.STLB_MISS_STORES",
+ "PEBS": "1",
+ "PublicDescription": "Number of retired store instructions that (start a) miss in the 2nd-level TLB (STLB).",
+ "SampleAfterValue": "100003",
+ "UMask": "0x12",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Completed demand load uops that miss the L1 d-cache.",
+ "EventCode": "0x43",
+ "EventName": "MEM_LOAD_COMPLETED.L1_MISS_ANY",
+ "PublicDescription": "Number of completed demand load requests that missed the L1 data cache including shadow misses (FB hits, merge to an ongoing L1D miss)",
+ "SampleAfterValue": "1000003",
+ "UMask": "0xfd",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired load instructions whose data sources were HitM responses from shared L3",
+ "Data_LA": "1",
+ "EventCode": "0xd2",
+ "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired load instructions whose data sources were HitM responses from shared L3.",
+ "SampleAfterValue": "20011",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired load instructions whose data sources were L3 and cross-core snoop hits in on-pkg core cache",
+ "Data_LA": "1",
+ "EventCode": "0xd2",
+ "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired load instructions whose data sources were L3 and cross-core snoop hits in on-pkg core cache.",
+ "SampleAfterValue": "20011",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired load instructions whose data sources were HitM responses from shared L3",
+ "Data_LA": "1",
+ "EventCode": "0xd2",
+ "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired load instructions whose data sources were HitM responses from shared L3.",
+ "SampleAfterValue": "20011",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired load instructions whose data sources were hits in L3 without snoops required",
+ "Data_LA": "1",
+ "EventCode": "0xd2",
+ "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NONE",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired load instructions whose data sources were hits in L3 without snoops required.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x8",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired load instructions whose data sources were L3 and cross-core snoop hits in on-pkg core cache",
+ "Data_LA": "1",
+ "EventCode": "0xd2",
+ "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired load instructions whose data sources were L3 and cross-core snoop hits in on-pkg core cache.",
+ "SampleAfterValue": "20011",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired load instructions which data sources missed L3 but serviced from local dram",
+ "Data_LA": "1",
+ "EventCode": "0xd3",
+ "EventName": "MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM",
+ "PEBS": "1",
+ "PublicDescription": "Retired load instructions which data sources missed L3 but serviced from local DRAM.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired instructions with at least 1 uncacheable load or lock.",
+ "Data_LA": "1",
+ "EventCode": "0xd4",
+ "EventName": "MEM_LOAD_MISC_RETIRED.UC",
+ "PEBS": "1",
+ "PublicDescription": "Retired instructions with at least one load to uncacheable memory-type, or at least one cache-line split locked access (Bus Lock).",
+ "SampleAfterValue": "100007",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Number of completed demand load requests that missed the L1, but hit the FB(fill buffer), because a preceding miss to the same cacheline initiated the line to be brought into L1, but data is not yet ready in L1.",
+ "Data_LA": "1",
+ "EventCode": "0xd1",
+ "EventName": "MEM_LOAD_RETIRED.FB_HIT",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x40",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired load instructions with L1 cache hits as data sources",
+ "Data_LA": "1",
+ "EventCode": "0xd1",
+ "EventName": "MEM_LOAD_RETIRED.L1_HIT",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired load instructions missed L1 cache as data sources",
+ "Data_LA": "1",
+ "EventCode": "0xd1",
+ "EventName": "MEM_LOAD_RETIRED.L1_MISS",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L1 cache.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x8",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired load instructions with L2 cache hits as data sources",
+ "Data_LA": "1",
+ "EventCode": "0xd1",
+ "EventName": "MEM_LOAD_RETIRED.L2_HIT",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired load instructions with L2 cache hits as data sources.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired load instructions missed L2 cache as data sources",
+ "Data_LA": "1",
+ "EventCode": "0xd1",
+ "EventName": "MEM_LOAD_RETIRED.L2_MISS",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired load instructions missed L2 cache as data sources.",
+ "SampleAfterValue": "100021",
+ "UMask": "0x10",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired load instructions with L3 cache hits as data sources",
+ "Data_LA": "1",
+ "EventCode": "0xd1",
+ "EventName": "MEM_LOAD_RETIRED.L3_HIT",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L3 cache.",
+ "SampleAfterValue": "100021",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired load instructions missed L3 cache as data sources",
+ "Data_LA": "1",
+ "EventCode": "0xd1",
+ "EventName": "MEM_LOAD_RETIRED.L3_MISS",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L3 cache.",
+ "SampleAfterValue": "50021",
+ "UMask": "0x20",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of load ops retired that miss the L3 cache and hit in DRAM",
+ "EventCode": "0xd4",
+ "EventName": "MEM_LOAD_UOPS_MISC_RETIRED.LOCAL_DRAM",
+ "PEBS": "1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of load ops retired that hit the L1 data cache.",
+ "EventCode": "0xd1",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L1_HIT",
+ "PEBS": "1",
+ "SampleAfterValue": "200003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of load ops retired that miss in the L1 data cache.",
+ "EventCode": "0xd1",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L1_MISS",
+ "PEBS": "1",
+ "SampleAfterValue": "200003",
+ "UMask": "0x40",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of load ops retired that hit in the L2 cache.",
+ "EventCode": "0xd1",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT",
+ "PEBS": "1",
+ "SampleAfterValue": "200003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of load ops retired that miss in the L2 cache.",
+ "EventCode": "0xd1",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L2_MISS",
+ "PEBS": "1",
+ "SampleAfterValue": "200003",
+ "UMask": "0x80",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of load ops retired that hit in the L3 cache.",
+ "EventCode": "0xd1",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L3_HIT",
+ "PEBS": "1",
+ "SampleAfterValue": "200003",
+ "UMask": "0x1c",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of loads that hit in a write combining buffer (WCB), excluding the first load that caused the WCB to allocate.",
+ "EventCode": "0xd1",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.WCB_HIT",
+ "PEBS": "1",
+ "SampleAfterValue": "200003",
+ "UMask": "0x20",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that uops are blocked for any of the following reasons: load buffer, store buffer or RSV full.",
+ "EventCode": "0x04",
+ "EventName": "MEM_SCHEDULER_BLOCK.ALL",
+ "SampleAfterValue": "20003",
+ "UMask": "0x7",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that uops are blocked due to a load buffer full condition.",
+ "EventCode": "0x04",
+ "EventName": "MEM_SCHEDULER_BLOCK.LD_BUF",
+ "SampleAfterValue": "20003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that uops are blocked due to an RSV full condition.",
+ "EventCode": "0x04",
+ "EventName": "MEM_SCHEDULER_BLOCK.RSV",
+ "SampleAfterValue": "20003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that uops are blocked due to a store buffer full condition.",
+ "EventCode": "0x04",
+ "EventName": "MEM_SCHEDULER_BLOCK.ST_BUF",
+ "SampleAfterValue": "20003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of load ops retired.",
"Data_LA": "1",
"EventCode": "0xd0",
@@ -99,6 +754,18 @@
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.",
"Data_LA": "1",
"EventCode": "0xd0",
+ "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_1024",
+ "MSRIndex": "0x3F6",
+ "MSRValue": "0x400",
+ "PEBS": "2",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x5",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_128",
"MSRIndex": "0x3F6",
"MSRValue": "0x80",
@@ -123,6 +790,18 @@
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.",
"Data_LA": "1",
"EventCode": "0xd0",
+ "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_2048",
+ "MSRIndex": "0x3F6",
+ "MSRValue": "0x800",
+ "PEBS": "2",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x5",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_256",
"MSRIndex": "0x3F6",
"MSRValue": "0x100",
@@ -192,6 +871,46 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Counts the number of load uops retired that performed one or more locks",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS",
+ "PEBS": "1",
+ "SampleAfterValue": "200003",
+ "UMask": "0x21",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of memory uops retired that were splits.",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_UOPS_RETIRED.SPLIT",
+ "PEBS": "1",
+ "SampleAfterValue": "200003",
+ "UMask": "0x43",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of retired split load uops.",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS",
+ "PEBS": "1",
+ "SampleAfterValue": "200003",
+ "UMask": "0x41",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of retired split store uops.",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES",
+ "PEBS": "1",
+ "SampleAfterValue": "200003",
+ "UMask": "0x42",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of stores uops retired same as MEM_UOPS_RETIRED.ALL_STORES",
"Data_LA": "1",
"EventCode": "0xd0",
@@ -200,5 +919,97 @@
"SampleAfterValue": "1000003",
"UMask": "0x6",
"Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Retired memory uops for any access",
+ "EventCode": "0xe5",
+ "EventName": "MEM_UOP_RETIRED.ANY",
+ "PublicDescription": "Number of retired micro-operations (uops) for load or store memory accesses",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x3",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts demand data reads that resulted in a snoop hit in another cores caches, data forwarding is required as the data is modified.",
+ "EventCode": "0x2A,0x2B",
+ "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10003C0001",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts demand data reads that resulted in a snoop hit in another cores caches which forwarded the unmodified data to the requesting core.",
+ "EventCode": "0x2A,0x2B",
+ "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x8003C0001",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that resulted in a snoop hit in another cores caches, data forwarding is required as the data is modified.",
+ "EventCode": "0x2A,0x2B",
+ "EventName": "OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10003C0002",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Any memory transaction that reached the SQ.",
+ "EventCode": "0x21",
+ "EventName": "OFFCORE_REQUESTS.ALL_REQUESTS",
+ "PublicDescription": "Counts memory transactions reached the super queue including requests initiated by the core, all L3 prefetches, page walks, etc..",
+ "SampleAfterValue": "100003",
+ "UMask": "0x80",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Demand and prefetch data reads",
+ "EventCode": "0x21",
+ "EventName": "OFFCORE_REQUESTS.DATA_RD",
+ "PublicDescription": "Counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x8",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Demand Data Read requests sent to uncore",
+ "EventCode": "0x21",
+ "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD",
+ "PublicDescription": "Counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Demand RFO requests including regular RFOs, locks, ItoM",
+ "EventCode": "0x21",
+ "EventName": "OFFCORE_REQUESTS.DEMAND_RFO",
+ "PublicDescription": "Counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts bus locks, accounts for cache line split locks and UC locks.",
+ "EventCode": "0x2c",
+ "EventName": "SQ_MISC.BUS_LOCK",
+ "PublicDescription": "Counts the more expensive bus lock needed to enforce cache coherency for certain memory accesses that need to be done atomically. Can be created by issuing an atomic instruction (via the LOCK prefix) which causes a cache line split or accesses uncacheable memory.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x10",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to an icache miss",
+ "EventCode": "0x71",
+ "EventName": "TOPDOWN_FE_BOUND.ICACHE",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x20",
+ "Unit": "cpu_atom"
}
]
diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/floating-point.json b/tools/perf/pmu-events/arch/x86/meteorlake/floating-point.json
new file mode 100644
index 000000000000..616489f0974a
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/meteorlake/floating-point.json
@@ -0,0 +1,143 @@
+[
+ {
+ "BriefDescription": "This event counts the cycles the floating point divider is busy.",
+ "CounterMask": "1",
+ "EventCode": "0xb0",
+ "EventName": "ARITH.FPDIV_ACTIVE",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts all microcode FP assists.",
+ "EventCode": "0xc1",
+ "EventName": "ASSISTS.FP",
+ "PublicDescription": "Counts all microcode Floating Point assists.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "ASSISTS.SSE_AVX_MIX",
+ "EventCode": "0xc1",
+ "EventName": "ASSISTS.SSE_AVX_MIX",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x10",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "FP_ARITH_DISPATCHED.PORT_0",
+ "EventCode": "0xb3",
+ "EventName": "FP_ARITH_DISPATCHED.PORT_0",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "FP_ARITH_DISPATCHED.PORT_1",
+ "EventCode": "0xb3",
+ "EventName": "FP_ARITH_DISPATCHED.PORT_1",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+ "EventCode": "0xc7",
+ "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE",
+ "PublicDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+ "EventCode": "0xc7",
+ "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE",
+ "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x8",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+ "EventCode": "0xc7",
+ "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE",
+ "PublicDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x10",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+ "EventCode": "0xc7",
+ "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE",
+ "PublicDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x20",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Number of SSE/AVX computational 128-bit packed single and 256-bit packed double precision FP instructions retired; some instructions will count twice as noted below. Each count represents 2 or/and 4 computation operations, 1 for each element. Applies to SSE* and AVX* packed single precision and packed double precision FP instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB count twice as they perform 2 calculations per element.",
+ "EventCode": "0xc7",
+ "EventName": "FP_ARITH_INST_RETIRED.4_FLOPS",
+ "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision and 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 or/and 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point and packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x18",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Number of SSE/AVX computational scalar floating-point instructions retired; some instructions will count twice as noted below. Applies to SSE* and AVX* scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 RANGE SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+ "EventCode": "0xc7",
+ "EventName": "FP_ARITH_INST_RETIRED.SCALAR",
+ "PublicDescription": "Number of SSE/AVX computational scalar single precision and double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x3",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+ "EventCode": "0xc7",
+ "EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
+ "PublicDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+ "EventCode": "0xc7",
+ "EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
+ "PublicDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Number of any Vector retired FP arithmetic instructions",
+ "EventCode": "0xc7",
+ "EventName": "FP_ARITH_INST_RETIRED.VECTOR",
+ "PublicDescription": "Number of any Vector retired FP arithmetic instructions. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0xfc",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of floating point operations retired that required microcode assist.",
+ "EventCode": "0xc3",
+ "EventName": "MACHINE_CLEARS.FP_ASSIST",
+ "PublicDescription": "Counts the number of floating point operations retired that required microcode assist, which is not a reflection of the number of FP operations, instructions or uops.",
+ "SampleAfterValue": "20003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of floating point divide uops retired (x87 and sse, including x87 sqrt).",
+ "EventCode": "0xc2",
+ "EventName": "UOPS_RETIRED.FPDIV",
+ "PEBS": "1",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x8",
+ "Unit": "cpu_atom"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/frontend.json b/tools/perf/pmu-events/arch/x86/meteorlake/frontend.json
index 66e5609699ea..0f064518d1c0 100644
--- a/tools/perf/pmu-events/arch/x86/meteorlake/frontend.json
+++ b/tools/perf/pmu-events/arch/x86/meteorlake/frontend.json
@@ -1,5 +1,260 @@
[
{
+ "BriefDescription": "Counts the total number of BACLEARS due to all branch types including conditional and unconditional jumps, returns, and indirect branches.",
+ "EventCode": "0xe6",
+ "EventName": "BACLEARS.ANY",
+ "PublicDescription": "Counts the total number of BACLEARS, which occur when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend. Includes BACLEARS due to all branch types including conditional and unconditional jumps, returns, and indirect branches.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Clears due to Unknown Branches.",
+ "EventCode": "0x60",
+ "EventName": "BACLEARS.ANY",
+ "PublicDescription": "Number of times the front-end is resteered when it finds a branch instruction in a fetch line. This is called Unknown Branch which occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Stalls caused by changing prefix length of the instruction.",
+ "EventCode": "0x87",
+ "EventName": "DECODE.LCP",
+ "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.",
+ "SampleAfterValue": "500009",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles the Microcode Sequencer is busy.",
+ "EventCode": "0x87",
+ "EventName": "DECODE.MS_BUSY",
+ "SampleAfterValue": "500009",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "DSB-to-MITE switch true penalty cycles.",
+ "EventCode": "0x61",
+ "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES",
+ "PublicDescription": "Decode Stream Buffer (DSB) is a Uop-cache that holds translations of previously fetched instructions that were decoded by the legacy x86 decode pipeline (MITE). This event counts fetch penalty cycles when a transition occurs from DSB to MITE.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired ANT branches",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.ANY_ANT",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x9",
+ "PEBS": "1",
+ "PublicDescription": "Always Not Taken (ANT) conditional retired branches (no BTB entry and not mispredicted)",
+ "SampleAfterValue": "100007",
+ "UMask": "0x3",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of instructions retired that were tagged because empty issue slots were seen before the uop due to ITLB miss",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.ITLB_MISS",
+ "PEBS": "1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x10",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Retired Instructions who experienced iTLB true miss.",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.ITLB_MISS",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x14",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired Instructions that experienced iTLB (Instruction TLB) true miss.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x3",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired Instructions who experienced Instruction L1 Cache true miss.",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.L1I_MISS",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x12",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired Instructions who experienced Instruction L1 Cache true miss.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x3",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired instructions after front-end starvation of at least 1 cycle",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_1",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x600106",
+ "PEBS": "1",
+ "PublicDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of at least 1 cycle which was not interrupted by a back-end stall.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x3",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall.",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_128",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x608006",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x3",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 16 cycles which was not interrupted by a back-end stall.",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_16",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x601006",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x3",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired instructions after front-end starvation of at least 2 cycles",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_2",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x600206",
+ "PEBS": "1",
+ "PublicDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of at least 2 cycles which was not interrupted by a back-end stall.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x3",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall.",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_256",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x610006",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x3",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 1 bubble-slot for a period of 2 cycles which was not interrupted by a back-end stall.",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x100206",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x3",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 32 cycles which was not interrupted by a back-end stall.",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_32",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x602006",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x3",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall.",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_4",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x600406",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x3",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall.",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_512",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x620006",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x3",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall.",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_64",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x604006",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x3",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 8 cycles which was not interrupted by a back-end stall.",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_8",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x600806",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x3",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Mispredicted Retired ANT branches",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.MISP_ANT",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x9",
+ "PEBS": "1",
+ "PublicDescription": "ANT retired branches that got just mispredicted",
+ "SampleAfterValue": "100007",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "FRONTEND_RETIRED.MS_FLOWS",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.MS_FLOWS",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x8",
+ "PEBS": "1",
+ "SampleAfterValue": "100007",
+ "UMask": "0x3",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "FRONTEND_RETIRED.UNKNOWN_BRANCH",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.UNKNOWN_BRANCH",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x17",
+ "PEBS": "1",
+ "SampleAfterValue": "100007",
+ "UMask": "0x3",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts every time the code stream enters into a new cache line by walking sequential from the previous line or being redirected by a jump.",
"EventCode": "0x80",
"EventName": "ICACHE.ACCESSES",
@@ -16,6 +271,131 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss.",
+ "EventCode": "0x80",
+ "EventName": "ICACHE_DATA.STALLS",
+ "PublicDescription": "Counts cycles where a code line fetch is stalled due to an L1 instruction cache miss. The decode pipeline works at a 32 Byte granularity.",
+ "SampleAfterValue": "500009",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "ICACHE_DATA.STALL_PERIODS",
+ "CounterMask": "1",
+ "EdgeDetect": "1",
+ "EventCode": "0x80",
+ "EventName": "ICACHE_DATA.STALL_PERIODS",
+ "SampleAfterValue": "500009",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Instruction fetch tag lookups that hit in the instruction cache (L1I). Counts at 64-byte cache-line granularity.",
+ "EventCode": "0x83",
+ "EventName": "ICACHE_TAG.HIT",
+ "PublicDescription": "Counts instruction fetch tag lookups that hit in the instruction cache (L1I). Counts at 64-byte cache-line granularity. Accounts for both cacheable and uncacheable accesses.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss.",
+ "EventCode": "0x83",
+ "EventName": "ICACHE_TAG.STALLS",
+ "PublicDescription": "Counts cycles where a code fetch is stalled due to L1 instruction cache tag miss.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop",
+ "CounterMask": "1",
+ "EventCode": "0x79",
+ "EventName": "IDQ.DSB_CYCLES_ANY",
+ "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x8",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles DSB is delivering optimal number of Uops",
+ "CounterMask": "6",
+ "EventCode": "0x79",
+ "EventName": "IDQ.DSB_CYCLES_OK",
+ "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x8",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path",
+ "EventCode": "0x79",
+ "EventName": "IDQ.DSB_UOPS",
+ "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x8",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles MITE is delivering any Uop",
+ "CounterMask": "1",
+ "EventCode": "0x79",
+ "EventName": "IDQ.MITE_CYCLES_ANY",
+ "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles MITE is delivering optimal number of Uops",
+ "CounterMask": "6",
+ "EventCode": "0x79",
+ "EventName": "IDQ.MITE_CYCLES_OK",
+ "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path",
+ "EventCode": "0x79",
+ "EventName": "IDQ.MITE_UOPS",
+ "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles when uops are being delivered to IDQ while MS is busy",
+ "CounterMask": "1",
+ "EventCode": "0x79",
+ "EventName": "IDQ.MS_CYCLES_ANY",
+ "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x20",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Number of switches from DSB or MITE to the MS",
+ "CounterMask": "1",
+ "EdgeDetect": "1",
+ "EventCode": "0x79",
+ "EventName": "IDQ.MS_SWITCHES",
+ "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x20",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Uops initiated by MITE or Decode Stream Buffer (DSB) and delivered to Instruction Decode Queue (IDQ) while Microcode Sequencer (MS) is busy",
+ "EventCode": "0x79",
+ "EventName": "IDQ.MS_UOPS",
+ "PublicDescription": "Counts the number of uops initiated by MITE or Decode Stream Buffer (DSB) and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x20",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "This event counts a subset of the Topdown Slots event that were no operation was delivered to the back-end pipeline due to instruction fetch limitations when the back-end could have accepted more operations. Common examples include instruction cache misses or x86 instruction decode limitations.",
"EventCode": "0x9c",
"EventName": "IDQ_BUBBLES.CORE",
@@ -23,5 +403,35 @@
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Uops not delivered by IDQ when backend of the machine is not stalled",
+ "EventCode": "0x9c",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE",
+ "PublicDescription": "Counts the number of uops not delivered to by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles when no uops are not delivered by the IDQ when backend of the machine is not stalled",
+ "CounterMask": "6",
+ "EventCode": "0x9c",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
+ "PublicDescription": "Counts the number of cycles when no uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles when optimal number of uops was delivered to the back-end when the back-end is not stalled",
+ "CounterMask": "1",
+ "EventCode": "0x9c",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK",
+ "Invert": "1",
+ "PublicDescription": "Counts the number of cycles when the optimal number of uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
}
]
diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/memory.json b/tools/perf/pmu-events/arch/x86/meteorlake/memory.json
index 20c2efe70eeb..67e949b4c789 100644
--- a/tools/perf/pmu-events/arch/x86/meteorlake/memory.json
+++ b/tools/perf/pmu-events/arch/x86/meteorlake/memory.json
@@ -1,5 +1,101 @@
[
{
+ "BriefDescription": "Cycles while L3 cache miss demand load is outstanding.",
+ "CounterMask": "2",
+ "EventCode": "0xa3",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_L3_MISS",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Execution stalls while L3 cache miss demand load is outstanding.",
+ "CounterMask": "6",
+ "EventCode": "0xa3",
+ "EventName": "CYCLE_ACTIVITY.STALLS_L3_MISS",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x6",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer is stalled due to any number of reasons, including an L1 miss, WCB full, pagewalk, store address block or store data block, on a load that retires.",
+ "EventCode": "0x05",
+ "EventName": "LD_HEAD.ANY_AT_RET",
+ "SampleAfterValue": "1000003",
+ "UMask": "0xff",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer is stalled due to a core bound stall including a store address match, a DTLB miss or a page walk that detains the load from retiring.",
+ "EventCode": "0x05",
+ "EventName": "LD_HEAD.L1_BOUND_AT_RET",
+ "SampleAfterValue": "1000003",
+ "UMask": "0xf4",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to a DL1 miss.",
+ "EventCode": "0x05",
+ "EventName": "LD_HEAD.L1_MISS_AT_RET",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x81",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to other block cases.",
+ "EventCode": "0x05",
+ "EventName": "LD_HEAD.OTHER_AT_RET",
+ "PublicDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to other block cases such as pipeline conflicts, fences, etc.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0xc0",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to a pagewalk.",
+ "EventCode": "0x05",
+ "EventName": "LD_HEAD.PGWALK_AT_RET",
+ "SampleAfterValue": "1000003",
+ "UMask": "0xa0",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to a store address match.",
+ "EventCode": "0x05",
+ "EventName": "LD_HEAD.ST_ADDR_AT_RET",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x84",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.",
+ "CounterMask": "3",
+ "EventCode": "0x47",
+ "EventName": "MEMORY_ACTIVITY.STALLS_L1D_MISS",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x3",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Execution stalls while L2 cache miss demand cacheable load request is outstanding.",
+ "CounterMask": "5",
+ "EventCode": "0x47",
+ "EventName": "MEMORY_ACTIVITY.STALLS_L2_MISS",
+ "PublicDescription": "Execution stalls while L2 cache miss demand cacheable load request is outstanding (will not count for uncacheable demand requests e.g. bus lock).",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x5",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Execution stalls while L3 cache miss demand cacheable load request is outstanding.",
+ "CounterMask": "9",
+ "EventCode": "0x47",
+ "EventName": "MEMORY_ACTIVITY.STALLS_L3_MISS",
+ "PublicDescription": "Execution stalls while L3 cache miss demand cacheable load request is outstanding (will not count for uncacheable demand requests e.g. bus lock).",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x9",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.",
"Data_LA": "1",
"EventCode": "0xcd",
@@ -115,43 +211,29 @@
"Unit": "cpu_core"
},
{
- "BriefDescription": "Counts demand data reads that were not supplied by the L3 cache.",
- "EventCode": "0xB7",
- "EventName": "OCR.DEMAND_DATA_RD.L3_MISS",
- "MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x3FBFC00001",
- "SampleAfterValue": "100003",
- "UMask": "0x1",
+ "BriefDescription": "Counts misaligned loads that are 4K page splits.",
+ "EventCode": "0x13",
+ "EventName": "MISALIGN_MEM_REF.LOAD_PAGE_SPLIT",
+ "PEBS": "1",
+ "SampleAfterValue": "200003",
+ "UMask": "0x2",
"Unit": "cpu_atom"
},
{
- "BriefDescription": "Counts demand data reads that were not supplied by the L3 cache.",
- "EventCode": "0x2A,0x2B",
- "EventName": "OCR.DEMAND_DATA_RD.L3_MISS",
- "MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x3FBFC00001",
- "SampleAfterValue": "100003",
- "UMask": "0x1",
- "Unit": "cpu_core"
- },
- {
- "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that were not supplied by the L3 cache.",
- "EventCode": "0xB7",
- "EventName": "OCR.DEMAND_RFO.L3_MISS",
- "MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x3FBFC00002",
- "SampleAfterValue": "100003",
- "UMask": "0x1",
+ "BriefDescription": "Counts misaligned stores that are 4K page splits.",
+ "EventCode": "0x13",
+ "EventName": "MISALIGN_MEM_REF.STORE_PAGE_SPLIT",
+ "PEBS": "1",
+ "SampleAfterValue": "200003",
+ "UMask": "0x4",
"Unit": "cpu_atom"
},
{
- "BriefDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that were not supplied by the L3 cache.",
- "EventCode": "0x2A,0x2B",
- "EventName": "OCR.DEMAND_RFO.L3_MISS",
- "MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x3FBFC00002",
+ "BriefDescription": "Counts demand data read requests that miss the L3 cache.",
+ "EventCode": "0x21",
+ "EventName": "OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD",
"SampleAfterValue": "100003",
- "UMask": "0x1",
+ "UMask": "0x10",
"Unit": "cpu_core"
}
]
diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/other.json b/tools/perf/pmu-events/arch/x86/meteorlake/other.json
index 14e648bf11c5..2ec57f487525 100644
--- a/tools/perf/pmu-events/arch/x86/meteorlake/other.json
+++ b/tools/perf/pmu-events/arch/x86/meteorlake/other.json
@@ -1,41 +1,50 @@
[
{
- "BriefDescription": "Counts demand data reads that have any type of response.",
- "EventCode": "0xB7",
- "EventName": "OCR.DEMAND_DATA_RD.ANY_RESPONSE",
+ "BriefDescription": "Counts streaming stores that have any type of response.",
+ "EventCode": "0x2A,0x2B",
+ "EventName": "OCR.STREAMING_WR.ANY_RESPONSE",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x10001",
+ "MSRValue": "0x10800",
"SampleAfterValue": "100003",
"UMask": "0x1",
- "Unit": "cpu_atom"
+ "Unit": "cpu_core"
},
{
- "BriefDescription": "Counts demand data reads that have any type of response.",
- "EventCode": "0x2A,0x2B",
- "EventName": "OCR.DEMAND_DATA_RD.ANY_RESPONSE",
- "MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x10001",
- "SampleAfterValue": "100003",
- "UMask": "0x1",
+ "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread.",
+ "EventCode": "0xa5",
+ "EventName": "RS.EMPTY",
+ "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for this logical processor. This is usually caused when the front-end pipeline runs into starvation periods (e.g. branch mispredictions or i-cache misses)",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x7",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that have any type of response.",
- "EventCode": "0xB7",
- "EventName": "OCR.DEMAND_RFO.ANY_RESPONSE",
- "MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x10002",
+ "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty.",
+ "CounterMask": "1",
+ "EdgeDetect": "1",
+ "EventCode": "0xa5",
+ "EventName": "RS.EMPTY_COUNT",
+ "Invert": "1",
+ "PublicDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to closely sample on front-end latency issues (see the FRONTEND_RETIRED event of designated precise events)",
"SampleAfterValue": "100003",
- "UMask": "0x1",
+ "UMask": "0x7",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots in a UMWAIT or TPAUSE instruction where no uop issues due to the instruction putting the CPU into the C0.1 activity state. For Tremont, UMWAIT and TPAUSE will only put the CPU into C0.1 activity state (not C0.2 activity state)",
+ "EventCode": "0x75",
+ "EventName": "SERIALIZATION.C01_MS_SCB",
+ "SampleAfterValue": "200003",
+ "UMask": "0x4",
"Unit": "cpu_atom"
},
{
- "BriefDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that have any type of response.",
- "EventCode": "0x2A,0x2B",
- "EventName": "OCR.DEMAND_RFO.ANY_RESPONSE",
- "MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x10002",
- "SampleAfterValue": "100003",
+ "BriefDescription": "Cycles the uncore cannot take further requests",
+ "CounterMask": "1",
+ "EventCode": "0x2d",
+ "EventName": "XQ.FULL_CYCLES",
+ "PublicDescription": "number of cycles when the thread is active and the uncore cannot take any further requests (for example prefetches, loads or stores initiated by the Core that miss the L2 cache).",
+ "SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
}
diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/pipeline.json b/tools/perf/pmu-events/arch/x86/meteorlake/pipeline.json
index 639789478073..eeaa7a97f71c 100644
--- a/tools/perf/pmu-events/arch/x86/meteorlake/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/meteorlake/pipeline.json
@@ -1,5 +1,33 @@
[
{
+ "BriefDescription": "Cycles when divide unit is busy executing divide or square root operations.",
+ "CounterMask": "1",
+ "EventCode": "0xb0",
+ "EventName": "ARITH.DIV_ACTIVE",
+ "PublicDescription": "Counts cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x9",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This event counts the cycles the integer divider is busy.",
+ "CounterMask": "1",
+ "EventCode": "0xb0",
+ "EventName": "ARITH.IDIV_ACTIVE",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Number of occurrences where a microcode assist is invoked by hardware.",
+ "EventCode": "0xc1",
+ "EventName": "ASSISTS.ANY",
+ "PublicDescription": "Counts the number of occurrences where a microcode assist is invoked by hardware. Examples include AD (page Access Dirty), FP and AVX related assists.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1b",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts the total number of branch instructions retired for all branch types.",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.ALL_BRANCHES",
@@ -18,6 +46,104 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Conditional branch instructions retired.",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.COND",
+ "PEBS": "1",
+ "PublicDescription": "Counts conditional branch instructions retired.",
+ "SampleAfterValue": "400009",
+ "UMask": "0x11",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Not taken branch instructions retired.",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.COND_NTAKEN",
+ "PEBS": "1",
+ "PublicDescription": "Counts not taken branch instructions retired.",
+ "SampleAfterValue": "400009",
+ "UMask": "0x10",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Taken conditional branch instructions retired.",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.COND_TAKEN",
+ "PEBS": "1",
+ "PublicDescription": "Counts taken conditional branch instructions retired.",
+ "SampleAfterValue": "400009",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of far branch instructions retired, includes far jump, far call and return, and interrupt call and return.",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.FAR_BRANCH",
+ "PEBS": "1",
+ "SampleAfterValue": "200003",
+ "UMask": "0xbf",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Far branch instructions retired.",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.FAR_BRANCH",
+ "PEBS": "1",
+ "PublicDescription": "Counts far branch instructions retired.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x40",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Indirect near branch instructions retired (excluding returns)",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.INDIRECT",
+ "PEBS": "1",
+ "PublicDescription": "Counts near indirect branch instructions retired excluding returns. TSX abort is an indirect branch.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x80",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of near CALL branch instructions retired.",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.NEAR_CALL",
+ "PEBS": "1",
+ "SampleAfterValue": "200003",
+ "UMask": "0xf9",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Direct and indirect near call instructions retired.",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.NEAR_CALL",
+ "PEBS": "1",
+ "PublicDescription": "Counts both direct and indirect near call instructions retired.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Return instructions retired.",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.NEAR_RETURN",
+ "PEBS": "1",
+ "PublicDescription": "Counts return instructions retired.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x8",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Taken branch instructions retired.",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.NEAR_TAKEN",
+ "PEBS": "1",
+ "PublicDescription": "Counts taken branch instructions retired.",
+ "SampleAfterValue": "400009",
+ "UMask": "0x20",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts the total number of mispredicted branch instructions retired for all branch types.",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
@@ -36,6 +162,174 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "All mispredicted branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch.",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_COST",
+ "PEBS": "1",
+ "SampleAfterValue": "400009",
+ "UMask": "0x44",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of mispredicted JCC (Jump on Conditional Code) branch instructions retired.",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.COND",
+ "PEBS": "1",
+ "SampleAfterValue": "200003",
+ "UMask": "0x7e",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Mispredicted conditional branch instructions retired.",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.COND",
+ "PEBS": "1",
+ "PublicDescription": "Counts mispredicted conditional branch instructions retired.",
+ "SampleAfterValue": "400009",
+ "UMask": "0x11",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Mispredicted conditional branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch.",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.COND_COST",
+ "PEBS": "1",
+ "SampleAfterValue": "400009",
+ "UMask": "0x51",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Mispredicted non-taken conditional branch instructions retired.",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.COND_NTAKEN",
+ "PEBS": "1",
+ "PublicDescription": "Counts the number of conditional branch instructions retired that were mispredicted and the branch direction was not taken.",
+ "SampleAfterValue": "400009",
+ "UMask": "0x10",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Mispredicted non-taken conditional branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch.",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.COND_NTAKEN_COST",
+ "PEBS": "1",
+ "SampleAfterValue": "400009",
+ "UMask": "0x50",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "number of branch instructions retired that were mispredicted and taken.",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.COND_TAKEN",
+ "PEBS": "1",
+ "PublicDescription": "Counts taken conditional mispredicted branch instructions retired.",
+ "SampleAfterValue": "400009",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Mispredicted taken conditional branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch.",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.COND_TAKEN_COST",
+ "PEBS": "1",
+ "SampleAfterValue": "400009",
+ "UMask": "0x41",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of mispredicted near indirect JMP and near indirect CALL branch instructions retired.",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.INDIRECT",
+ "PEBS": "1",
+ "SampleAfterValue": "200003",
+ "UMask": "0xeb",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Miss-predicted near indirect branch instructions retired (excluding returns)",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.INDIRECT",
+ "PEBS": "1",
+ "PublicDescription": "Counts miss-predicted near indirect branch instructions retired excluding returns. TSX abort is an indirect branch.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x80",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of mispredicted near indirect CALL branch instructions retired.",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.INDIRECT_CALL",
+ "PEBS": "1",
+ "SampleAfterValue": "200003",
+ "UMask": "0xfb",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Mispredicted indirect CALL retired.",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.INDIRECT_CALL",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired mispredicted indirect (near taken) CALL instructions, including both register and memory indirect.",
+ "SampleAfterValue": "400009",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Mispredicted indirect CALL retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch.",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.INDIRECT_CALL_COST",
+ "PEBS": "1",
+ "SampleAfterValue": "400009",
+ "UMask": "0x42",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Mispredicted near indirect branch instructions retired (excluding returns). This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch.",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.INDIRECT_COST",
+ "PEBS": "1",
+ "SampleAfterValue": "100003",
+ "UMask": "0xc0",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Number of near branch instructions retired that were mispredicted and taken.",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.NEAR_TAKEN",
+ "PEBS": "1",
+ "PublicDescription": "Counts number of near branch instructions retired that were mispredicted and taken.",
+ "SampleAfterValue": "400009",
+ "UMask": "0x20",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Mispredicted taken near branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch.",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.NEAR_TAKEN_COST",
+ "PEBS": "1",
+ "SampleAfterValue": "400009",
+ "UMask": "0x60",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of mispredicted near RET branch instructions retired.",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.RETURN",
+ "PEBS": "1",
+ "SampleAfterValue": "200003",
+ "UMask": "0xf7",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Mispredicted ret instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch.",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.RET_COST",
+ "PEBS": "1",
+ "SampleAfterValue": "100007",
+ "UMask": "0x48",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Fixed Counter: Counts the number of unhalted core clock cycles",
"EventName": "CPU_CLK_UNHALTED.CORE",
"SampleAfterValue": "2000003",
@@ -50,6 +344,33 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Cycle counts are evenly distributed between active threads in the Core.",
+ "EventCode": "0xec",
+ "EventName": "CPU_CLK_UNHALTED.DISTRIBUTED",
+ "PublicDescription": "This event distributes cycle counts between active hyperthreads, i.e., those in C0. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If all other hyperthreads are inactive (or disabled or do not exist), all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.",
+ "EventCode": "0x3c",
+ "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE",
+ "PublicDescription": "Counts Core crystal clock cycles when current thread is unhalted and the other thread is halted.",
+ "SampleAfterValue": "25003",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Core crystal clock cycles. Cycle counts are evenly distributed between active threads in the Core.",
+ "EventCode": "0x3c",
+ "EventName": "CPU_CLK_UNHALTED.REF_DISTRIBUTED",
+ "PublicDescription": "This event distributes Core crystal clock cycle counts between active hyperthreads, i.e., those in C0 sleep-state. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If one thread is active in a core, all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x8",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Fixed Counter: Counts the number of unhalted reference clock cycles",
"EventName": "CPU_CLK_UNHALTED.REF_TSC",
"SampleAfterValue": "2000003",
@@ -65,6 +386,15 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of unhalted reference clock cycles at TSC frequency.",
+ "EventCode": "0x3c",
+ "EventName": "CPU_CLK_UNHALTED.REF_TSC_P",
+ "PublicDescription": "Counts the number of reference cycles that the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. This event is not affected by core frequency changes and increments at a fixed frequency that is also used for the Time Stamp Counter (TSC). This event uses a programmable general purpose performance counter.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Reference cycles when the core is not in halt state.",
"EventCode": "0x3c",
"EventName": "CPU_CLK_UNHALTED.REF_TSC_P",
@@ -104,6 +434,133 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.",
+ "CounterMask": "8",
+ "EventCode": "0xa3",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.",
+ "CounterMask": "1",
+ "EventCode": "0xa3",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles while memory subsystem has an outstanding load.",
+ "CounterMask": "16",
+ "EventCode": "0xa3",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x10",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.",
+ "CounterMask": "12",
+ "EventCode": "0xa3",
+ "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS",
+ "SampleAfterValue": "1000003",
+ "UMask": "0xc",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.",
+ "CounterMask": "5",
+ "EventCode": "0xa3",
+ "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x5",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Total execution stalls.",
+ "CounterMask": "4",
+ "EventCode": "0xa3",
+ "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles total of 1 uop is executed on all ports and Reservation Station was not empty.",
+ "EventCode": "0xa6",
+ "EventName": "EXE_ACTIVITY.1_PORTS_UTIL",
+ "PublicDescription": "Counts cycles during which a total of 1 uop was executed on all ports and Reservation Station (RS) was not empty.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles total of 2 uops are executed on all ports and Reservation Station was not empty.",
+ "EventCode": "0xa6",
+ "EventName": "EXE_ACTIVITY.2_PORTS_UTIL",
+ "PublicDescription": "Counts cycles during which a total of 2 uops were executed on all ports and Reservation Station (RS) was not empty.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station was not empty.",
+ "EventCode": "0xa6",
+ "EventName": "EXE_ACTIVITY.3_PORTS_UTIL",
+ "PublicDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station (RS) was not empty.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x8",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station was not empty.",
+ "EventCode": "0xa6",
+ "EventName": "EXE_ACTIVITY.4_PORTS_UTIL",
+ "PublicDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station (RS) was not empty.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x10",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.",
+ "CounterMask": "5",
+ "EventCode": "0xa6",
+ "EventName": "EXE_ACTIVITY.BOUND_ON_LOADS",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x21",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles where the Store Buffer was full and no loads caused an execution stall.",
+ "CounterMask": "2",
+ "EventCode": "0xa6",
+ "EventName": "EXE_ACTIVITY.BOUND_ON_STORES",
+ "PublicDescription": "Counts cycles where the Store Buffer was full and no loads caused an execution stall.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x40",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles no uop executed while RS was not empty, the SB was not full and there was no outstanding load.",
+ "EventCode": "0xa6",
+ "EventName": "EXE_ACTIVITY.EXE_BOUND_0_PORTS",
+ "PublicDescription": "Number of cycles total of 0 uops executed on all ports, Reservation Station (RS) was not empty, the Store Buffer (SB) was not full and there was no outstanding load.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x80",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Instruction decoders utilized in a cycle",
+ "EventCode": "0x75",
+ "EventName": "INST_DECODED.DECODERS",
+ "PublicDescription": "Number of decoders utilized in a cycle when the MITE (legacy decode pipeline) fetches instructions.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Fixed Counter: Counts the number of instructions retired",
"EventName": "INST_RETIRED.ANY",
"PEBS": "1",
@@ -138,12 +595,240 @@
"Unit": "cpu_core"
},
{
- "BriefDescription": "Loads blocked due to overlapping with a preceding store that cannot be forwarded.",
+ "BriefDescription": "INST_RETIRED.MACRO_FUSED",
+ "EventCode": "0xc0",
+ "EventName": "INST_RETIRED.MACRO_FUSED",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x10",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Precise instruction retired with PEBS precise-distribution",
+ "EventName": "INST_RETIRED.PREC_DIST",
+ "PEBS": "1",
+ "PublicDescription": "A version of INST_RETIRED that allows for a precise distribution of samples across instructions retired. It utilizes the Precise Distribution of Instructions Retired (PDIR++) feature to fix bias in how retired instructions get sampled. Use on Fixed Counter 0.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles the Backend cluster is recovering after a miss-speculation or a Store Buffer or Load Buffer drain stall.",
+ "CounterMask": "1",
+ "EventCode": "0xad",
+ "EventName": "INT_MISC.ALL_RECOVERY_CYCLES",
+ "PublicDescription": "Counts cycles the Backend cluster is recovering after a miss-speculation or a Store Buffer or Load Buffer drain stall.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x3",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts cycles after recovery from a branch misprediction or machine clear till the first uop is issued from the resteered path.",
+ "EventCode": "0xad",
+ "EventName": "INT_MISC.CLEAR_RESTEER_CYCLES",
+ "PublicDescription": "Cycles after recovery from a branch misprediction or machine clear till the first uop is issued from the resteered path.",
+ "SampleAfterValue": "500009",
+ "UMask": "0x80",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread",
+ "EventCode": "0xad",
+ "EventName": "INT_MISC.RECOVERY_CYCLES",
+ "PublicDescription": "Counts core cycles when the Resource allocator was stalled due to recovery from an earlier branch misprediction or machine clear event.",
+ "SampleAfterValue": "500009",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "INT_MISC.UNKNOWN_BRANCH_CYCLES",
+ "EventCode": "0xad",
+ "EventName": "INT_MISC.UNKNOWN_BRANCH_CYCLES",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x7",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x40",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "TMA slots where uops got dropped",
+ "EventCode": "0xad",
+ "EventName": "INT_MISC.UOP_DROPPING",
+ "PublicDescription": "Estimated number of Top-down Microarchitecture Analysis slots that got dropped due to non front-end reasons",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x10",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "INT_VEC_RETIRED.128BIT",
+ "EventCode": "0xe7",
+ "EventName": "INT_VEC_RETIRED.128BIT",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x13",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "INT_VEC_RETIRED.256BIT",
+ "EventCode": "0xe7",
+ "EventName": "INT_VEC_RETIRED.256BIT",
+ "SampleAfterValue": "1000003",
+ "UMask": "0xac",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "integer ADD, SUB, SAD 128-bit vector instructions.",
+ "EventCode": "0xe7",
+ "EventName": "INT_VEC_RETIRED.ADD_128",
+ "PublicDescription": "Number of retired integer ADD/SUB (regular or horizontal), SAD 128-bit vector instructions.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x3",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "integer ADD, SUB, SAD 256-bit vector instructions.",
+ "EventCode": "0xe7",
+ "EventName": "INT_VEC_RETIRED.ADD_256",
+ "PublicDescription": "Number of retired integer ADD/SUB (regular or horizontal), SAD 256-bit vector instructions.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0xc",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "INT_VEC_RETIRED.MUL_256",
+ "EventCode": "0xe7",
+ "EventName": "INT_VEC_RETIRED.MUL_256",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x80",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "INT_VEC_RETIRED.SHUFFLES",
+ "EventCode": "0xe7",
+ "EventName": "INT_VEC_RETIRED.SHUFFLES",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x40",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "INT_VEC_RETIRED.VNNI_128",
+ "EventCode": "0xe7",
+ "EventName": "INT_VEC_RETIRED.VNNI_128",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x10",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "INT_VEC_RETIRED.VNNI_256",
+ "EventCode": "0xe7",
+ "EventName": "INT_VEC_RETIRED.VNNI_256",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x20",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of retired loads that are blocked because it initially appears to be store forward blocked, but subsequently is shown not to be blocked based on 4K alias check.",
+ "EventCode": "0x03",
+ "EventName": "LD_BLOCKS.ADDRESS_ALIAS",
+ "PEBS": "1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of retired loads that are blocked because its address exactly matches an older store whose data is not ready.",
+ "EventCode": "0x03",
+ "EventName": "LD_BLOCKS.DATA_UNKNOWN",
+ "PEBS": "1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of retired loads that are blocked because its address partially overlapped with an older store.",
"EventCode": "0x03",
"EventName": "LD_BLOCKS.STORE_FORWARD",
- "PublicDescription": "Counts the number of times where store forwarding was prevented for a load operation. The most common case is a load blocked due to the address of memory access (partially) overlapping with a preceding uncompleted store. Note: See the table of not supported store forwards in the Optimization Guide.",
+ "PEBS": "1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.",
+ "CounterMask": "1",
+ "EventCode": "0xa8",
+ "EventName": "LSD.CYCLES_ACTIVE",
+ "PublicDescription": "Counts the cycles when at least one uop is delivered by the LSD (Loop-stream detector).",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles optimal number of Uops delivered by the LSD, but did not come from the decoder.",
+ "CounterMask": "6",
+ "EventCode": "0xa8",
+ "EventName": "LSD.CYCLES_OK",
+ "PublicDescription": "Counts the cycles when optimal number of uops is delivered by the LSD (Loop-stream detector).",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Number of Uops delivered by the LSD.",
+ "EventCode": "0xa8",
+ "EventName": "LSD.UOPS",
+ "PublicDescription": "Counts the number of uops delivered to the back-end by the LSD(Loop Stream Detector).",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Number of machine clears (nukes) of any type.",
+ "CounterMask": "1",
+ "EdgeDetect": "1",
+ "EventCode": "0xc3",
+ "EventName": "MACHINE_CLEARS.COUNT",
+ "PublicDescription": "Counts the number of machine clears (nukes) of any type.",
"SampleAfterValue": "100003",
- "UMask": "0x82",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of machine clears due to memory ordering in which an internal load passes an older store within the same CPU.",
+ "EventCode": "0xc3",
+ "EventName": "MACHINE_CLEARS.DISAMBIGUATION",
+ "SampleAfterValue": "20003",
+ "UMask": "0x8",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of machine clears due to a page fault. Counts both I-Side and D-Side (Loads/Stores) page faults. A page fault occurs when either the page is not present, or an access violation occurs.",
+ "EventCode": "0xc3",
+ "EventName": "MACHINE_CLEARS.PAGE_FAULT",
+ "SampleAfterValue": "20003",
+ "UMask": "0x20",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of machine clears that flush the pipeline and restart the machine with the use of microcode due to SMC, MEMORY_ORDERING, FP_ASSISTS, PAGE_FAULT, DISAMBIGUATION, and FPC_VIRTUAL_TRAP.",
+ "EventCode": "0xc3",
+ "EventName": "MACHINE_CLEARS.SLOW",
+ "SampleAfterValue": "20003",
+ "UMask": "0x6f",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of machine clears due to program modifying data (self modifying code) within 1K of a recently fetched code page.",
+ "EventCode": "0xc3",
+ "EventName": "MACHINE_CLEARS.SMC",
+ "SampleAfterValue": "20003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts cycles where the pipeline is stalled due to serializing operations.",
+ "EventCode": "0xa2",
+ "EventName": "RESOURCE_STALLS.SCOREBOARD",
+ "SampleAfterValue": "100003",
+ "UMask": "0x2",
"Unit": "cpu_core"
},
{
@@ -156,6 +841,32 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "TMA slots wasted due to incorrect speculations.",
+ "EventCode": "0xa4",
+ "EventName": "TOPDOWN.BAD_SPEC_SLOTS",
+ "PublicDescription": "Number of slots of TMA method that were wasted due to incorrect speculation. It covers all types of control-flow or data-related mis-speculations.",
+ "SampleAfterValue": "10000003",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "TMA slots wasted due to incorrect speculation by branch mispredictions",
+ "EventCode": "0xa4",
+ "EventName": "TOPDOWN.BR_MISPREDICT_SLOTS",
+ "PublicDescription": "Number of TMA slots that were wasted due to incorrect speculation by (any type of) branch mispredictions. This event estimates number of speculative operations that were issued but not retired as well as the out-of-order engine recovery past a branch misprediction.",
+ "SampleAfterValue": "10000003",
+ "UMask": "0x8",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "TOPDOWN.MEMORY_BOUND_SLOTS",
+ "EventCode": "0xa4",
+ "EventName": "TOPDOWN.MEMORY_BOUND_SLOTS",
+ "SampleAfterValue": "10000003",
+ "UMask": "0x10",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "TMA slots available for an unhalted logical processor. Fixed counter - architectural event",
"EventName": "TOPDOWN.SLOTS",
"PublicDescription": "Number of available slots for an unhalted logical processor. The event increments by machine-width of the narrowest pipeline as employed by the Top-down Microarchitecture Analysis method (TMA). The count is distributed among unhalted logical processors (hyper-threads) who share the same physical core. Software can use this event as the denominator for the top-level metrics of the TMA method. This architectural event is counted on a designated fixed counter (Fixed Counter 3).",
@@ -181,6 +892,30 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to Fast Nukes such as Memory Ordering Machine clears and MRN nukes",
+ "EventCode": "0x73",
+ "EventName": "TOPDOWN_BAD_SPECULATION.FASTNUKE",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a machine clear (nuke) of any kind including memory ordering and memory disambiguation.",
+ "EventCode": "0x73",
+ "EventName": "TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x3",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to a machine clear (nuke).",
+ "EventCode": "0x73",
+ "EventName": "TOPDOWN_BAD_SPECULATION.NUKE",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of retirement slots not consumed due to backend stalls",
"EventCode": "0x74",
"EventName": "TOPDOWN_BE_BOUND.ALL",
@@ -188,6 +923,30 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to due to certain allocation restrictions",
+ "EventCode": "0x74",
+ "EventName": "TOPDOWN_BE_BOUND.ALLOC_RESTRICTIONS",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to mrbl stall. A 'marble' refers to a physical register file entry, also known as the physical destination (PDST).",
+ "EventCode": "0x74",
+ "EventName": "TOPDOWN_BE_BOUND.REGISTER",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x20",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to iq/jeu scoreboards or ms scb",
+ "EventCode": "0x74",
+ "EventName": "TOPDOWN_BE_BOUND.SERIALIZATION",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x10",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of retirement slots not consumed due to front end stalls",
"EventCode": "0x71",
"EventName": "TOPDOWN_FE_BOUND.ALL",
@@ -195,6 +954,79 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BAClear",
+ "EventCode": "0x71",
+ "EventName": "TOPDOWN_FE_BOUND.BRANCH_DETECT",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BTClear",
+ "EventCode": "0x71",
+ "EventName": "TOPDOWN_FE_BOUND.BRANCH_RESTEER",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x40",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to ms",
+ "EventCode": "0x71",
+ "EventName": "TOPDOWN_FE_BOUND.CISC",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to decode stall",
+ "EventCode": "0x71",
+ "EventName": "TOPDOWN_FE_BOUND.DECODE",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to frontend bandwidth restrictions due to decode, predecode, cisc, and other limitations.",
+ "EventCode": "0x71",
+ "EventName": "TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8d",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to latency related stalls including BACLEARs, BTCLEARs, ITLB misses, and ICache misses.",
+ "EventCode": "0x71",
+ "EventName": "TOPDOWN_FE_BOUND.FRONTEND_LATENCY",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x72",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "This event is deprecated. [This event is alias to TOPDOWN_FE_BOUND.ITLB_MISS]",
+ "Deprecated": "1",
+ "EventCode": "0x71",
+ "EventName": "TOPDOWN_FE_BOUND.ITLB",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x10",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to itlb miss [This event is alias to TOPDOWN_FE_BOUND.ITLB]",
+ "EventCode": "0x71",
+ "EventName": "TOPDOWN_FE_BOUND.ITLB_MISS",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x10",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to predecode wrong",
+ "EventCode": "0x71",
+ "EventName": "TOPDOWN_FE_BOUND.PREDECODE",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of consumed retirement slots. Similar to UOPS_RETIRED.ALL",
"EventCode": "0x72",
"EventName": "TOPDOWN_RETIRING.ALL",
@@ -203,6 +1035,269 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Number of non dec-by-all uops decoded by decoder",
+ "EventCode": "0x76",
+ "EventName": "UOPS_DECODED.DEC0_UOPS",
+ "PublicDescription": "This event counts the number of not dec-by-all uops decoded by decoder 0.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Uops executed on port 0",
+ "EventCode": "0xb2",
+ "EventName": "UOPS_DISPATCHED.PORT_0",
+ "PublicDescription": "Number of uops dispatch to execution port 0.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Uops executed on port 1",
+ "EventCode": "0xb2",
+ "EventName": "UOPS_DISPATCHED.PORT_1",
+ "PublicDescription": "Number of uops dispatch to execution port 1.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Uops executed on ports 2, 3 and 10",
+ "EventCode": "0xb2",
+ "EventName": "UOPS_DISPATCHED.PORT_2_3_10",
+ "PublicDescription": "Number of uops dispatch to execution ports 2, 3 and 10",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Uops executed on ports 4 and 9",
+ "EventCode": "0xb2",
+ "EventName": "UOPS_DISPATCHED.PORT_4_9",
+ "PublicDescription": "Number of uops dispatch to execution ports 4 and 9",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x10",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Uops executed on ports 5 and 11",
+ "EventCode": "0xb2",
+ "EventName": "UOPS_DISPATCHED.PORT_5_11",
+ "PublicDescription": "Number of uops dispatch to execution ports 5 and 11",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x20",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Uops executed on port 6",
+ "EventCode": "0xb2",
+ "EventName": "UOPS_DISPATCHED.PORT_6",
+ "PublicDescription": "Number of uops dispatch to execution port 6.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x40",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Uops executed on ports 7 and 8",
+ "EventCode": "0xb2",
+ "EventName": "UOPS_DISPATCHED.PORT_7_8",
+ "PublicDescription": "Number of uops dispatch to execution ports 7 and 8.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x80",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Number of uops executed on the core.",
+ "EventCode": "0xb1",
+ "EventName": "UOPS_EXECUTED.CORE",
+ "PublicDescription": "Counts the number of uops executed from any thread.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.",
+ "CounterMask": "1",
+ "EventCode": "0xb1",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1",
+ "PublicDescription": "Counts cycles when at least 1 micro-op is executed from any thread on physical core.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.",
+ "CounterMask": "2",
+ "EventCode": "0xb1",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2",
+ "PublicDescription": "Counts cycles when at least 2 micro-ops are executed from any thread on physical core.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.",
+ "CounterMask": "3",
+ "EventCode": "0xb1",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3",
+ "PublicDescription": "Counts cycles when at least 3 micro-ops are executed from any thread on physical core.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.",
+ "CounterMask": "4",
+ "EventCode": "0xb1",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4",
+ "PublicDescription": "Counts cycles when at least 4 micro-ops are executed from any thread on physical core.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles where at least 1 uop was executed per-thread",
+ "CounterMask": "1",
+ "EventCode": "0xb1",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_1",
+ "PublicDescription": "Cycles where at least 1 uop was executed per-thread.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles where at least 2 uops were executed per-thread",
+ "CounterMask": "2",
+ "EventCode": "0xb1",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_2",
+ "PublicDescription": "Cycles where at least 2 uops were executed per-thread.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles where at least 3 uops were executed per-thread",
+ "CounterMask": "3",
+ "EventCode": "0xb1",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_3",
+ "PublicDescription": "Cycles where at least 3 uops were executed per-thread.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles where at least 4 uops were executed per-thread",
+ "CounterMask": "4",
+ "EventCode": "0xb1",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_4",
+ "PublicDescription": "Cycles where at least 4 uops were executed per-thread.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.",
+ "CounterMask": "1",
+ "EventCode": "0xb1",
+ "EventName": "UOPS_EXECUTED.STALLS",
+ "Invert": "1",
+ "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.",
+ "EventCode": "0xb1",
+ "EventName": "UOPS_EXECUTED.THREAD",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of x87 uops dispatched.",
+ "EventCode": "0xb1",
+ "EventName": "UOPS_EXECUTED.X87",
+ "PublicDescription": "Counts the number of x87 uops executed.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x10",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of uops issued by the front end every cycle.",
+ "EventCode": "0x0e",
+ "EventName": "UOPS_ISSUED.ANY",
+ "PublicDescription": "Counts the number of uops issued by the front end every cycle. When 4-uops are requested and only 2-uops are delivered, the event counts 2. Uops_issued correlates to the number of ROB entries. If uop takes 2 ROB slots it counts as 2 uops_issued.",
+ "SampleAfterValue": "1000003",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Uops that RAT issues to RS",
+ "EventCode": "0xae",
+ "EventName": "UOPS_ISSUED.ANY",
+ "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS).",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "UOPS_ISSUED.CYCLES",
+ "CounterMask": "1",
+ "EventCode": "0xae",
+ "EventName": "UOPS_ISSUED.CYCLES",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles when RAT does not issue Uops to RS for the thread",
+ "CounterMask": "1",
+ "EventCode": "0xae",
+ "EventName": "UOPS_ISSUED.STALLS",
+ "Invert": "1",
+ "PublicDescription": "Counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired uops except the last uop of each instruction.",
+ "EventCode": "0xc2",
+ "EventName": "UOPS_RETIRED.HEAVY",
+ "PublicDescription": "Counts the number of retired micro-operations (uops) except the last uop of each instruction. An instruction that is decoded into less than two uops does not contribute to the count.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of integer divide uops retired.",
+ "EventCode": "0xc2",
+ "EventName": "UOPS_RETIRED.IDIV",
+ "PEBS": "1",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x10",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of uops that are from the complex flows issued by the micro-sequencer (MS). This includes uops from flows due to complex instructions, faults, assists, and inserted flows.",
+ "EventCode": "0xc2",
+ "EventName": "UOPS_RETIRED.MS",
+ "PEBS": "1",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "UOPS_RETIRED.MS",
+ "EventCode": "0xc2",
+ "EventName": "UOPS_RETIRED.MS",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x8",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "This event counts a subset of the Topdown Slots event that are utilized by operations that eventually get retired (committed) by the processor pipeline. Usually, this event positively correlates with higher performance for example, as measured by the instructions-per-cycle metric.",
"EventCode": "0xc2",
"EventName": "UOPS_RETIRED.SLOTS",
@@ -210,5 +1305,25 @@
"SampleAfterValue": "2000003",
"UMask": "0x2",
"Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles with less than 10 actually retired uops.",
+ "CounterMask": "10",
+ "EventCode": "0xc2",
+ "EventName": "UOPS_RETIRED.TOTAL_CYCLES",
+ "Invert": "1",
+ "PublicDescription": "Counts the number of cycles using always true condition (uops_ret < 16) applied to non PEBS uops retired event.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of x87 uops retired, includes those in ms flows",
+ "EventCode": "0xc2",
+ "EventName": "UOPS_RETIRED.X87",
+ "PEBS": "1",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
}
]
diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/uncore-cache.json b/tools/perf/pmu-events/arch/x86/meteorlake/uncore-cache.json
new file mode 100644
index 000000000000..188843be4caf
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/meteorlake/uncore-cache.json
@@ -0,0 +1,18 @@
+[
+ {
+ "BriefDescription": "Number of all entries allocated. Includes also retries.",
+ "EventCode": "0x35",
+ "EventName": "UNC_HAC_CBO_TOR_ALLOCATION.ALL",
+ "PerPkg": "1",
+ "UMask": "0x8",
+ "Unit": "HAC_CBO"
+ },
+ {
+ "BriefDescription": "Asserted on coherent DRD + DRdPref allocations into the queue. Cacheable only",
+ "EventCode": "0x35",
+ "EventName": "UNC_HAC_CBO_TOR_ALLOCATION.DRD",
+ "PerPkg": "1",
+ "UMask": "0x1",
+ "Unit": "HAC_CBO"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/meteorlake/uncore-interconnect.json
new file mode 100644
index 000000000000..08b5c7574cfc
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/meteorlake/uncore-interconnect.json
@@ -0,0 +1,42 @@
+[
+ {
+ "BriefDescription": "Number of all coherent Data Read entries. Doesn't include prefetches",
+ "EventCode": "0x81",
+ "EventName": "UNC_HAC_ARB_REQ_TRK_REQUEST.DRD",
+ "PerPkg": "1",
+ "UMask": "0x2",
+ "Unit": "HAC_ARB"
+ },
+ {
+ "BriefDescription": "Number of all CMI transactions",
+ "EventCode": "0x8a",
+ "EventName": "UNC_HAC_ARB_TRANSACTIONS.ALL",
+ "PerPkg": "1",
+ "UMask": "0x1",
+ "Unit": "HAC_ARB"
+ },
+ {
+ "BriefDescription": "Number of all CMI reads",
+ "EventCode": "0x8a",
+ "EventName": "UNC_HAC_ARB_TRANSACTIONS.READS",
+ "PerPkg": "1",
+ "UMask": "0x2",
+ "Unit": "HAC_ARB"
+ },
+ {
+ "BriefDescription": "Number of all CMI writes not including Mflush",
+ "EventCode": "0x8a",
+ "EventName": "UNC_HAC_ARB_TRANSACTIONS.WRITES",
+ "PerPkg": "1",
+ "UMask": "0x4",
+ "Unit": "HAC_ARB"
+ },
+ {
+ "BriefDescription": "Total number of all outgoing entries allocated. Accounts for Coherent and non-coherent traffic.",
+ "EventCode": "0x81",
+ "EventName": "UNC_HAC_ARB_TRK_REQUESTS.ALL",
+ "PerPkg": "1",
+ "UMask": "0x1",
+ "Unit": "HAC_ARB"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/uncore-memory.json b/tools/perf/pmu-events/arch/x86/meteorlake/uncore-memory.json
new file mode 100644
index 000000000000..c9d248d1042e
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/meteorlake/uncore-memory.json
@@ -0,0 +1,126 @@
+[
+ {
+ "BriefDescription": "Counts every CAS read command sent from the Memory Controller 0 to DRAM (sum of all channels).",
+ "EventCode": "0xff",
+ "EventName": "UNC_MC0_RDCAS_COUNT_FREERUN",
+ "PerPkg": "1",
+ "PublicDescription": "Counts every CAS read command sent from the Memory Controller 0 to DRAM (sum of all channels). Each CAS commands can be for 32B or 64B of data.",
+ "UMask": "0x20",
+ "Unit": "imc_free_running_0"
+ },
+ {
+ "BriefDescription": "Counts every read and write request entering the Memory Controller 0.",
+ "EventCode": "0xff",
+ "EventName": "UNC_MC0_TOTAL_REQCOUNT_FREERUN",
+ "PerPkg": "1",
+ "PublicDescription": "Counts every read and write request entering the Memory Controller 0 (sum of all channels). All requests are counted as one, whether they are 32B or 64B Read/Write or partial/full line writes. Some write requests to the same address may merge to a single write command to DRAM. Therefore, the total request count may be higher than total DRAM BW.",
+ "UMask": "0x10",
+ "Unit": "imc_free_running_0"
+ },
+ {
+ "BriefDescription": "Counts every CAS write command sent from the Memory Controller 0 to DRAM (sum of all channels).",
+ "EventCode": "0xff",
+ "EventName": "UNC_MC0_WRCAS_COUNT_FREERUN",
+ "PerPkg": "1",
+ "PublicDescription": "Counts every CAS write command sent from the Memory Controller 0 to DRAM (sum of all channels). Each CAS commands can be for 32B or 64B of data.",
+ "UMask": "0x30",
+ "Unit": "imc_free_running_0"
+ },
+ {
+ "BriefDescription": "Counts every CAS read command sent from the Memory Controller 1 to DRAM (sum of all channels).",
+ "EventCode": "0xff",
+ "EventName": "UNC_MC1_RDCAS_COUNT_FREERUN",
+ "PerPkg": "1",
+ "PublicDescription": "Counts every CAS read command sent from the Memory Controller 1 to DRAM (sum of all channels). Each CAS commands can be for 32B or 64B of data.",
+ "UMask": "0x20",
+ "Unit": "imc_free_running_1"
+ },
+ {
+ "BriefDescription": "Counts every read and write request entering the Memory Controller 1.",
+ "EventCode": "0xff",
+ "EventName": "UNC_MC1_TOTAL_REQCOUNT_FREERUN",
+ "PerPkg": "1",
+ "PublicDescription": "Counts every read and write request entering the Memory Controller 1 (sum of all channels). All requests are counted as one, whether they are 32B or 64B Read/Write or partial/full line writes. Some write requests to the same address may merge to a single write command to DRAM. Therefore, the total request count may be higher than total DRAM BW.",
+ "UMask": "0x10",
+ "Unit": "imc_free_running_1"
+ },
+ {
+ "BriefDescription": "Counts every CAS write command sent from the Memory Controller 1 to DRAM (sum of all channels).",
+ "EventCode": "0xff",
+ "EventName": "UNC_MC1_WRCAS_COUNT_FREERUN",
+ "PerPkg": "1",
+ "PublicDescription": "Counts every CAS write command sent from the Memory Controller 1 to DRAM (sum of all channels). Each CAS commands can be for 32B or 64B of data.",
+ "UMask": "0x30",
+ "Unit": "imc_free_running_1"
+ },
+ {
+ "BriefDescription": "ACT command for a read request sent to DRAM",
+ "EventCode": "0x24",
+ "EventName": "UNC_M_ACT_COUNT_RD",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "ACT command sent to DRAM",
+ "EventCode": "0x26",
+ "EventName": "UNC_M_ACT_COUNT_TOTAL",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "ACT command for a write request sent to DRAM",
+ "EventCode": "0x25",
+ "EventName": "UNC_M_ACT_COUNT_WR",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Read CAS command sent to DRAM",
+ "EventCode": "0x22",
+ "EventName": "UNC_M_CAS_COUNT_RD",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Write CAS command sent to DRAM",
+ "EventCode": "0x23",
+ "EventName": "UNC_M_CAS_COUNT_WR",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "PRE command sent to DRAM due to page table idle timer expiration",
+ "EventCode": "0x28",
+ "EventName": "UNC_M_PRE_COUNT_IDLE",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "PRE command sent to DRAM for a read/write request",
+ "EventCode": "0x27",
+ "EventName": "UNC_M_PRE_COUNT_PAGE_MISS",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Number of bytes read from DRAM, in 32B chunks. Counter increments by 1 after receiving 32B chunk data.",
+ "EventCode": "0x3A",
+ "EventName": "UNC_M_RD_DATA",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Total number of read and write byte transfers to/from DRAM, in 32B chunks. Counter increments by 1 after sending or receiving 32B chunk data.",
+ "EventCode": "0x3C",
+ "EventName": "UNC_M_TOTAL_DATA",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Number of bytes written to DRAM, in 32B chunks. Counter increments by 1 after sending 32B chunk data.",
+ "EventCode": "0x3B",
+ "EventName": "UNC_M_WR_DATA",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/virtual-memory.json b/tools/perf/pmu-events/arch/x86/meteorlake/virtual-memory.json
index 556e4292fcc8..056c2a885a32 100644
--- a/tools/perf/pmu-events/arch/x86/meteorlake/virtual-memory.json
+++ b/tools/perf/pmu-events/arch/x86/meteorlake/virtual-memory.json
@@ -1,5 +1,40 @@
[
{
+ "BriefDescription": "Counts the number of first level TLB misses but second level hits due to a demand load that did not start a page walk. Accounts for all page sizes. Will result in a DTLB write from STLB.",
+ "EventCode": "0x08",
+ "EventName": "DTLB_LOAD_MISSES.STLB_HIT",
+ "SampleAfterValue": "200003",
+ "UMask": "0x20",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Loads that miss the DTLB and hit the STLB.",
+ "EventCode": "0x12",
+ "EventName": "DTLB_LOAD_MISSES.STLB_HIT",
+ "PublicDescription": "Counts loads that miss the DTLB (Data TLB) and hit the STLB (Second level TLB).",
+ "SampleAfterValue": "100003",
+ "UMask": "0x20",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a demand load.",
+ "CounterMask": "1",
+ "EventCode": "0x12",
+ "EventName": "DTLB_LOAD_MISSES.WALK_ACTIVE",
+ "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a demand load.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x10",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of page walks completed due to load DTLB misses.",
+ "EventCode": "0x08",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
+ "SampleAfterValue": "200003",
+ "UMask": "0xe",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (All page sizes)",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
@@ -9,6 +44,95 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Page walks completed due to a demand data load to a 1G page.",
+ "EventCode": "0x12",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1G",
+ "PublicDescription": "Counts completed page walks (1G sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x8",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of page walks completed due to load DTLB misses to a 2M or 4M page.",
+ "EventCode": "0x08",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M",
+ "PublicDescription": "Counts the number of page walks completed due to loads (including SW prefetches) whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to 2M or 4M pages. Includes page walks that page fault.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Page walks completed due to a demand data load to a 2M/4M page.",
+ "EventCode": "0x12",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M",
+ "PublicDescription": "Counts completed page walks (2M/4M sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Page walks completed due to a demand data load to a 4K page.",
+ "EventCode": "0x12",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K",
+ "PublicDescription": "Counts completed page walks (4K sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of page walks outstanding for Loads (demand or SW prefetch) in PMH every cycle.",
+ "EventCode": "0x08",
+ "EventName": "DTLB_LOAD_MISSES.WALK_PENDING",
+ "PublicDescription": "Counts the number of page walks outstanding for Loads (demand or SW prefetch) in PMH every cycle. A PMH page walk is outstanding from page walk start till PMH becomes idle again (ready to serve next walk). Includes EPT-walk intervals.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x10",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Number of page walks outstanding for a demand load in the PMH each cycle.",
+ "EventCode": "0x12",
+ "EventName": "DTLB_LOAD_MISSES.WALK_PENDING",
+ "PublicDescription": "Counts the number of page walks outstanding for a demand load in the PMH (Page Miss Handler) each cycle.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x10",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of first level TLB misses but second level hits due to stores that did not start a page walk. Accounts for all pages sizes. Will result in a DTLB write from STLB.",
+ "EventCode": "0x49",
+ "EventName": "DTLB_STORE_MISSES.STLB_HIT",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x20",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Stores that miss the DTLB and hit the STLB.",
+ "EventCode": "0x13",
+ "EventName": "DTLB_STORE_MISSES.STLB_HIT",
+ "PublicDescription": "Counts stores that miss the DTLB (Data TLB) and hit the STLB (2nd Level TLB).",
+ "SampleAfterValue": "100003",
+ "UMask": "0x20",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a store.",
+ "CounterMask": "1",
+ "EventCode": "0x13",
+ "EventName": "DTLB_STORE_MISSES.WALK_ACTIVE",
+ "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a store.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x10",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of page walks completed due to store DTLB misses to a 1G page.",
+ "EventCode": "0x49",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
+ "SampleAfterValue": "2000003",
+ "UMask": "0xe",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Store misses in all TLB levels causes a page walk that completes. (All page sizes)",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
@@ -18,6 +142,86 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Page walks completed due to a demand data store to a 1G page.",
+ "EventCode": "0x13",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1G",
+ "PublicDescription": "Counts completed page walks (1G sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x8",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Page walks completed due to a demand data store to a 2M/4M page.",
+ "EventCode": "0x13",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M",
+ "PublicDescription": "Counts completed page walks (2M/4M sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Page walks completed due to a demand data store to a 4K page.",
+ "EventCode": "0x13",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K",
+ "PublicDescription": "Counts completed page walks (4K sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of page walks outstanding in the page miss handler (PMH) for stores every cycle.",
+ "EventCode": "0x49",
+ "EventName": "DTLB_STORE_MISSES.WALK_PENDING",
+ "PublicDescription": "Counts the number of page walks outstanding in the page miss handler (PMH) for stores every cycle. A PMH page walk is outstanding from page walk start till PMH becomes idle again (ready to serve next walk). Includes EPT-walk intervals.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x10",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Number of page walks outstanding for a store in the PMH each cycle.",
+ "EventCode": "0x13",
+ "EventName": "DTLB_STORE_MISSES.WALK_PENDING",
+ "PublicDescription": "Counts the number of page walks outstanding for a store in the PMH (Page Miss Handler) each cycle.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x10",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of page walks initiated by a instruction fetch that missed the first and second level TLBs.",
+ "EventCode": "0x85",
+ "EventName": "ITLB_MISSES.MISS_CAUSED_WALK",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of first level TLB misses but second level hits due to an instruction fetch that did not start a page walk. Account for all pages sizes. Will result in an ITLB write from STLB.",
+ "EventCode": "0x85",
+ "EventName": "ITLB_MISSES.STLB_HIT",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x20",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Instruction fetch requests that miss the ITLB and hit the STLB.",
+ "EventCode": "0x11",
+ "EventName": "ITLB_MISSES.STLB_HIT",
+ "PublicDescription": "Counts instruction fetch requests that miss the ITLB (Instruction TLB) and hit the STLB (Second-level TLB).",
+ "SampleAfterValue": "100003",
+ "UMask": "0x20",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request.",
+ "CounterMask": "1",
+ "EventCode": "0x11",
+ "EventName": "ITLB_MISSES.WALK_ACTIVE",
+ "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a code (instruction fetch) request.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x10",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts the number of page walks completed due to instruction fetch misses to any page size.",
"EventCode": "0x85",
"EventName": "ITLB_MISSES.WALK_COMPLETED",
@@ -34,5 +238,58 @@
"SampleAfterValue": "100003",
"UMask": "0xe",
"Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of page walks completed due to instruction fetch misses to a 2M or 4M page.",
+ "EventCode": "0x85",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M",
+ "PublicDescription": "Counts the number of page walks completed due to instruction fetches whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to 2M or 4M pages. Includes page walks that page fault.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (2M/4M)",
+ "EventCode": "0x11",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M",
+ "PublicDescription": "Counts completed page walks (2M/4M page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (4K)",
+ "EventCode": "0x11",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED_4K",
+ "PublicDescription": "Counts completed page walks (4K page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of page walks outstanding for iside in PMH every cycle.",
+ "EventCode": "0x85",
+ "EventName": "ITLB_MISSES.WALK_PENDING",
+ "PublicDescription": "Counts the number of page walks outstanding for iside in PMH every cycle. A PMH page walk is outstanding from page walk start till PMH becomes idle again (ready to serve next walk). Includes EPT-walk intervals. Walks could be counted by edge detecting on this event, but would count restarted suspended walks.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x10",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Number of page walks outstanding for an outstanding code request in the PMH each cycle.",
+ "EventCode": "0x11",
+ "EventName": "ITLB_MISSES.WALK_PENDING",
+ "PublicDescription": "Counts the number of page walks outstanding for an outstanding code (instruction fetch) request in the PMH (Page Miss Handler) each cycle.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x10",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to a DTLB miss.",
+ "EventCode": "0x05",
+ "EventName": "LD_HEAD.DTLB_MISS_AT_RET",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x90",
+ "Unit": "cpu_atom"
}
]
diff --git a/tools/perf/pmu-events/arch/x86/rocketlake/cache.json b/tools/perf/pmu-events/arch/x86/rocketlake/cache.json
new file mode 100644
index 000000000000..b0f54a6650fe
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/rocketlake/cache.json
@@ -0,0 +1,894 @@
+[
+ {
+ "BriefDescription": "Counts the number of cache lines replaced in L1 data cache.",
+ "EventCode": "0x51",
+ "EventName": "L1D.REPLACEMENT",
+ "PublicDescription": "Counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Number of cycles a demand request has waited due to L1D Fill Buffer (FB) unavailability.",
+ "EventCode": "0x48",
+ "EventName": "L1D_PEND_MISS.FB_FULL",
+ "PublicDescription": "Counts number of cycles a demand request has waited due to L1D Fill Buffer (FB) unavailability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Number of phases a demand request has waited due to L1D Fill Buffer (FB) unavailability.",
+ "CounterMask": "1",
+ "EdgeDetect": "1",
+ "EventCode": "0x48",
+ "EventName": "L1D_PEND_MISS.FB_FULL_PERIODS",
+ "PublicDescription": "Counts number of phases a demand request has waited due to L1D Fill Buffer (FB) unavailability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Number of cycles a demand request has waited due to L1D due to lack of L2 resources.",
+ "EventCode": "0x48",
+ "EventName": "L1D_PEND_MISS.L2_STALL",
+ "PublicDescription": "Counts number of cycles a demand request has waited due to L1D due to lack of L2 resources. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Number of L1D misses that are outstanding",
+ "EventCode": "0x48",
+ "EventName": "L1D_PEND_MISS.PENDING",
+ "PublicDescription": "Counts number of L1D misses that are outstanding in each cycle, that is each cycle the number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch. Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Cycles with L1D load Misses outstanding.",
+ "CounterMask": "1",
+ "EventCode": "0x48",
+ "EventName": "L1D_PEND_MISS.PENDING_CYCLES",
+ "PublicDescription": "Counts duration of L1D miss outstanding in cycles.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "L2 cache lines filling L2",
+ "EventCode": "0xF1",
+ "EventName": "L2_LINES_IN.ALL",
+ "PublicDescription": "Counts the number of L2 cache lines filling the L2. Counting does not cover rejects.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1f"
+ },
+ {
+ "BriefDescription": "Modified cache lines that are evicted by L2 cache when triggered by an L2 cache fill.",
+ "EventCode": "0xF2",
+ "EventName": "L2_LINES_OUT.NON_SILENT",
+ "PublicDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines are in Modified state. Modified lines are written back to L3",
+ "SampleAfterValue": "200003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Non-modified cache lines that are silently dropped by L2 cache when triggered by an L2 cache fill.",
+ "EventCode": "0xF2",
+ "EventName": "L2_LINES_OUT.SILENT",
+ "PublicDescription": "Counts the number of lines that are silently dropped by L2 cache when triggered by an L2 cache fill. These lines are typically in Shared or Exclusive state. A non-threaded event.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Cache lines that have been L2 hardware prefetched but not used by demand accesses",
+ "EventCode": "0xf2",
+ "EventName": "L2_LINES_OUT.USELESS_HWPF",
+ "PublicDescription": "Counts the number of cache lines that have been prefetched by the L2 hardware prefetcher but not used by demand access when evicted from the L2 cache",
+ "SampleAfterValue": "200003",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "L2 code requests",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.ALL_CODE_RD",
+ "PublicDescription": "Counts the total number of L2 code requests.",
+ "SampleAfterValue": "200003",
+ "UMask": "0xe4"
+ },
+ {
+ "BriefDescription": "Demand Data Read requests",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD",
+ "PublicDescription": "Counts the number of demand Data Read requests (including requests from L1D hardware prefetchers). These loads may hit or miss L2 cache. Only non rejected loads are counted.",
+ "SampleAfterValue": "200003",
+ "UMask": "0xe1"
+ },
+ {
+ "BriefDescription": "Demand requests that miss L2 cache",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.ALL_DEMAND_MISS",
+ "PublicDescription": "Counts demand requests that miss L2 cache.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x27"
+ },
+ {
+ "BriefDescription": "Demand requests to L2 cache",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES",
+ "PublicDescription": "Counts demand requests to L2 cache.",
+ "SampleAfterValue": "200003",
+ "UMask": "0xe7"
+ },
+ {
+ "BriefDescription": "RFO requests to L2 cache",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.ALL_RFO",
+ "PublicDescription": "Counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches.",
+ "SampleAfterValue": "200003",
+ "UMask": "0xe2"
+ },
+ {
+ "BriefDescription": "L2 cache hits when fetching instructions, code reads.",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.CODE_RD_HIT",
+ "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads.",
+ "SampleAfterValue": "200003",
+ "UMask": "0xc4"
+ },
+ {
+ "BriefDescription": "L2 cache misses when fetching instructions",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.CODE_RD_MISS",
+ "PublicDescription": "Counts L2 cache misses when fetching instructions.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x24"
+ },
+ {
+ "BriefDescription": "Demand Data Read requests that hit L2 cache",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
+ "PublicDescription": "Counts the number of demand Data Read requests initiated by load instructions that hit L2 cache.",
+ "SampleAfterValue": "200003",
+ "UMask": "0xc1"
+ },
+ {
+ "BriefDescription": "Demand Data Read miss L2, no rejects",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS",
+ "PublicDescription": "Counts the number of demand Data Read requests that miss L2 cache. Only not rejected loads are counted.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x21"
+ },
+ {
+ "BriefDescription": "All requests that miss L2 cache.",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.MISS",
+ "PublicDescription": "Counts all requests that miss L2 cache.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x3f"
+ },
+ {
+ "BriefDescription": "All L2 requests.",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.REFERENCES",
+ "PublicDescription": "Counts all L2 requests.",
+ "SampleAfterValue": "200003",
+ "UMask": "0xff"
+ },
+ {
+ "BriefDescription": "RFO requests that hit L2 cache",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.RFO_HIT",
+ "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache.",
+ "SampleAfterValue": "200003",
+ "UMask": "0xc2"
+ },
+ {
+ "BriefDescription": "RFO requests that miss L2 cache",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.RFO_MISS",
+ "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that miss L2 cache.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x22"
+ },
+ {
+ "BriefDescription": "SW prefetch requests that hit L2 cache.",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.SWPF_HIT",
+ "PublicDescription": "Counts Software prefetch requests that hit the L2 cache. Accounts for PREFETCHNTA and PREFETCHT0/1/2 instructions when FB is not full.",
+ "SampleAfterValue": "200003",
+ "UMask": "0xc8"
+ },
+ {
+ "BriefDescription": "SW prefetch requests that miss L2 cache.",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.SWPF_MISS",
+ "PublicDescription": "Counts Software prefetch requests that miss the L2 cache. Accounts for PREFETCHNTA and PREFETCHT0/1/2 instructions when FB is not full.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x28"
+ },
+ {
+ "BriefDescription": "L2 writebacks that access L2 cache",
+ "EventCode": "0xF0",
+ "EventName": "L2_TRANS.L2_WB",
+ "PublicDescription": "Counts L2 writebacks that access L2 cache.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x40"
+ },
+ {
+ "BriefDescription": "Core-originated cacheable requests that missed L3 (Except hardware prefetches to the L3)",
+ "EventCode": "0x2e",
+ "EventName": "LONGEST_LAT_CACHE.MISS",
+ "PublicDescription": "Counts core-originated cacheable requests that miss the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches to the L1 and L2. It does not include hardware prefetches to the L3, and may not count other types of requests to the L3.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x41"
+ },
+ {
+ "BriefDescription": "Retired load instructions.",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.ALL_LOADS",
+ "PEBS": "1",
+ "PublicDescription": "Counts all retired load instructions. This event accounts for SW prefetch instructions of PREFETCHNTA or PREFETCHT0/1/2 or PREFETCHW.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x81"
+ },
+ {
+ "BriefDescription": "Retired store instructions.",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.ALL_STORES",
+ "PEBS": "1",
+ "PublicDescription": "Counts all retired store instructions.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x82"
+ },
+ {
+ "BriefDescription": "All retired memory instructions.",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.ANY",
+ "PEBS": "1",
+ "PublicDescription": "Counts all retired memory instructions - loads and stores.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x83"
+ },
+ {
+ "BriefDescription": "Retired load instructions with locked access.",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.LOCK_LOADS",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired load instructions with locked access.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x21"
+ },
+ {
+ "BriefDescription": "Retired load instructions that split across a cacheline boundary.",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.SPLIT_LOADS",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired load instructions that split across a cacheline boundary.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x41"
+ },
+ {
+ "BriefDescription": "Retired store instructions that split across a cacheline boundary.",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.SPLIT_STORES",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired store instructions that split across a cacheline boundary.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x42"
+ },
+ {
+ "BriefDescription": "Retired load instructions that miss the STLB.",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.STLB_MISS_LOADS",
+ "PEBS": "1",
+ "PublicDescription": "Number of retired load instructions that (start a) miss in the 2nd-level TLB (STLB).",
+ "SampleAfterValue": "100003",
+ "UMask": "0x11"
+ },
+ {
+ "BriefDescription": "Retired store instructions that miss the STLB.",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.STLB_MISS_STORES",
+ "PEBS": "1",
+ "PublicDescription": "Number of retired store instructions that (start a) miss in the 2nd-level TLB (STLB).",
+ "SampleAfterValue": "100003",
+ "UMask": "0x12"
+ },
+ {
+ "BriefDescription": "Retired load instructions whose data sources were L3 and cross-core snoop hits in on-pkg core cache",
+ "Data_LA": "1",
+ "EventCode": "0xd2",
+ "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired load instructions whose data sources were L3 and cross-core snoop hits in on-pkg core cache.",
+ "SampleAfterValue": "20011",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Retired load instructions whose data sources were HitM responses from shared L3",
+ "Data_LA": "1",
+ "EventCode": "0xd2",
+ "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired load instructions whose data sources were HitM responses from shared L3.",
+ "SampleAfterValue": "20011",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Retired load instructions whose data sources were L3 hit and cross-core snoop missed in on-pkg core cache.",
+ "Data_LA": "1",
+ "EventCode": "0xd2",
+ "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS",
+ "PEBS": "1",
+ "PublicDescription": "Counts the retired load instructions whose data sources were L3 hit and cross-core snoop missed in on-pkg core cache.",
+ "SampleAfterValue": "20011",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Retired load instructions whose data sources were hits in L3 without snoops required",
+ "Data_LA": "1",
+ "EventCode": "0xd2",
+ "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NONE",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired load instructions whose data sources were hits in L3 without snoops required.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x8"
+ },
+ {
+ "BriefDescription": "Retired instructions with at least 1 uncacheable load or Bus Lock.",
+ "Data_LA": "1",
+ "EventCode": "0xd4",
+ "EventName": "MEM_LOAD_MISC_RETIRED.UC",
+ "PEBS": "1",
+ "PublicDescription": "Retired instructions with at least one load to uncacheable memory-type, or at least one cache-line split locked access (Bus Lock).",
+ "SampleAfterValue": "100007",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Number of completed demand load requests that missed the L1, but hit the FB(fill buffer), because a preceding miss to the same cacheline initiated the line to be brought into L1, but data is not yet ready in L1.",
+ "Data_LA": "1",
+ "EventCode": "0xd1",
+ "EventName": "MEM_LOAD_RETIRED.FB_HIT",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x40"
+ },
+ {
+ "BriefDescription": "Retired load instructions with L1 cache hits as data sources",
+ "Data_LA": "1",
+ "EventCode": "0xd1",
+ "EventName": "MEM_LOAD_RETIRED.L1_HIT",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Retired load instructions missed L1 cache as data sources",
+ "Data_LA": "1",
+ "EventCode": "0xd1",
+ "EventName": "MEM_LOAD_RETIRED.L1_MISS",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L1 cache.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x8"
+ },
+ {
+ "BriefDescription": "Retired load instructions with L2 cache hits as data sources",
+ "Data_LA": "1",
+ "EventCode": "0xd1",
+ "EventName": "MEM_LOAD_RETIRED.L2_HIT",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired load instructions with L2 cache hits as data sources.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Retired load instructions missed L2 cache as data sources",
+ "Data_LA": "1",
+ "EventCode": "0xd1",
+ "EventName": "MEM_LOAD_RETIRED.L2_MISS",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired load instructions missed L2 cache as data sources.",
+ "SampleAfterValue": "100021",
+ "UMask": "0x10"
+ },
+ {
+ "BriefDescription": "Retired load instructions with L3 cache hits as data sources",
+ "Data_LA": "1",
+ "EventCode": "0xd1",
+ "EventName": "MEM_LOAD_RETIRED.L3_HIT",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L3 cache.",
+ "SampleAfterValue": "100021",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Retired load instructions missed L3 cache as data sources",
+ "Data_LA": "1",
+ "EventCode": "0xd1",
+ "EventName": "MEM_LOAD_RETIRED.L3_MISS",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L3 cache.",
+ "SampleAfterValue": "50021",
+ "UMask": "0x20"
+ },
+ {
+ "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that hit a cacheline in the L3 where a snoop was sent or not.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_CODE_RD.L3_HIT.ANY",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3FC03C0004",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that hit a cacheline in the L3 where a snoop hit in another cores caches, data forwarding is required as the data is modified.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_CODE_RD.L3_HIT.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10003C0004",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that hit a cacheline in the L3 where a snoop hit in another core, data forwarding is not required.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_CODE_RD.L3_HIT.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x4003C0004",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that hit a cacheline in the L3 where a snoop was sent but no other cores had the data.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_CODE_RD.L3_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x2003C0004",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that hit a cacheline in the L3 where a snoop was not needed to satisfy the request.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_CODE_RD.L3_HIT.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x1003C0004",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that hit a cacheline in the L3 where a snoop was sent.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_CODE_RD.L3_HIT.SNOOP_SENT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x1E003C0004",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand data reads that hit a cacheline in the L3 where a snoop was sent or not.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.ANY",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3FC03C0001",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand data reads that hit a cacheline in the L3 where a snoop hit in another cores caches, data forwarding is required as the data is modified.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10003C0001",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand data reads that hit a cacheline in the L3 where a snoop hit in another core, data forwarding is not required.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x4003C0001",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand data reads that hit a cacheline in the L3 where a snoop was sent but no other cores had the data.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x2003C0001",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand data reads that hit a cacheline in the L3 where a snoop was not needed to satisfy the request.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x1003C0001",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand data reads that hit a cacheline in the L3 where a snoop was sent.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_SENT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x1E003C0001",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand reads for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that hit a cacheline in the L3 where a snoop was sent or not.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_RFO.L3_HIT.ANY",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3FC03C0002",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand reads for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that hit a cacheline in the L3 where a snoop hit in another cores caches, data forwarding is required as the data is modified.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10003C0002",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand reads for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that hit a cacheline in the L3 where a snoop hit in another core, data forwarding is not required.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_RFO.L3_HIT.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x4003C0002",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand reads for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that hit a cacheline in the L3 where a snoop was sent but no other cores had the data.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_RFO.L3_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x2003C0002",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand reads for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that hit a cacheline in the L3 where a snoop was not needed to satisfy the request.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_RFO.L3_HIT.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x1003C0002",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand reads for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that hit a cacheline in the L3 where a snoop was sent.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_RFO.L3_HIT.SNOOP_SENT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x1E003C0002",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts L1 data cache prefetch requests and software prefetches (except PREFETCHW) that hit a cacheline in the L3 where a snoop was sent or not.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.HWPF_L1D_AND_SWPF.L3_HIT.ANY",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3FC03C0400",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts L1 data cache prefetch requests and software prefetches (except PREFETCHW) that hit a cacheline in the L3 where a snoop was sent but no other cores had the data.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.HWPF_L1D_AND_SWPF.L3_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x2003C0400",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts L1 data cache prefetch requests and software prefetches (except PREFETCHW) that hit a cacheline in the L3 where a snoop was not needed to satisfy the request.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.HWPF_L1D_AND_SWPF.L3_HIT.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x1003C0400",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts hardware prefetch data reads (which bring data to L2) that hit a cacheline in the L3 where a snoop was sent or not.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.HWPF_L2_DATA_RD.L3_HIT.ANY",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3FC03C0010",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts hardware prefetch data reads (which bring data to L2) that hit a cacheline in the L3 where a snoop hit in another cores caches, data forwarding is required as the data is modified.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.HWPF_L2_DATA_RD.L3_HIT.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10003C0010",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts hardware prefetch data reads (which bring data to L2) that hit a cacheline in the L3 where a snoop hit in another core, data forwarding is not required.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.HWPF_L2_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x4003C0010",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts hardware prefetch data reads (which bring data to L2) that hit a cacheline in the L3 where a snoop was sent but no other cores had the data.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.HWPF_L2_DATA_RD.L3_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x2003C0010",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts hardware prefetch data reads (which bring data to L2) that hit a cacheline in the L3 where a snoop was not needed to satisfy the request.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.HWPF_L2_DATA_RD.L3_HIT.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x1003C0010",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts hardware prefetch data reads (which bring data to L2) that hit a cacheline in the L3 where a snoop was sent.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.HWPF_L2_DATA_RD.L3_HIT.SNOOP_SENT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x1E003C0010",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts hardware prefetch RFOs (which bring data to L2) that hit a cacheline in the L3 where a snoop was sent or not.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.HWPF_L2_RFO.L3_HIT.ANY",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3FC03C0020",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts hardware prefetch RFOs (which bring data to L2) that hit a cacheline in the L3 where a snoop hit in another cores caches, data forwarding is required as the data is modified.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.HWPF_L2_RFO.L3_HIT.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10003C0020",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts hardware prefetch RFOs (which bring data to L2) that hit a cacheline in the L3 where a snoop hit in another core, data forwarding is not required.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.HWPF_L2_RFO.L3_HIT.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x4003C0020",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts hardware prefetch RFOs (which bring data to L2) that hit a cacheline in the L3 where a snoop was sent but no other cores had the data.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.HWPF_L2_RFO.L3_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x2003C0020",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts hardware prefetch RFOs (which bring data to L2) that hit a cacheline in the L3 where a snoop was not needed to satisfy the request.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.HWPF_L2_RFO.L3_HIT.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x1003C0020",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts hardware prefetch RFOs (which bring data to L2) that hit a cacheline in the L3 where a snoop was sent.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.HWPF_L2_RFO.L3_HIT.SNOOP_SENT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x1E003C0020",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts hardware prefetches to the L3 only that hit a cacheline in the L3 where a snoop was sent or not.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.HWPF_L3.L3_HIT.ANY",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3FC03C2380",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts miscellaneous requests, such as I/O and un-cacheable accesses that hit a cacheline in the L3 where a snoop hit in another core, data forwarding is not required.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.OTHER.L3_HIT.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x4003C8000",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts miscellaneous requests, such as I/O and un-cacheable accesses that hit a cacheline in the L3 where a snoop was sent but no other cores had the data.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.OTHER.L3_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x2003C8000",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts miscellaneous requests, such as I/O and un-cacheable accesses that hit a cacheline in the L3 where a snoop was not needed to satisfy the request.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.OTHER.L3_HIT.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x1003C8000",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts miscellaneous requests, such as I/O and un-cacheable accesses that hit a cacheline in the L3 where a snoop was sent.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.OTHER.L3_HIT.SNOOP_SENT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x1E003C8000",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts streaming stores that hit a cacheline in the L3 where a snoop was sent or not.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.STREAMING_WR.L3_HIT.ANY",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3FC03C0800",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Demand and prefetch data reads",
+ "EventCode": "0xB0",
+ "EventName": "OFFCORE_REQUESTS.ALL_DATA_RD",
+ "PublicDescription": "Counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x8"
+ },
+ {
+ "BriefDescription": "Counts memory transactions sent to the uncore.",
+ "EventCode": "0xB0",
+ "EventName": "OFFCORE_REQUESTS.ALL_REQUESTS",
+ "PublicDescription": "Counts memory transactions sent to the uncore including requests initiated by the core, all L3 prefetches, reads resulting from page walks, and snoop responses.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x80"
+ },
+ {
+ "BriefDescription": "Demand Data Read requests sent to uncore",
+ "EventCode": "0xb0",
+ "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD",
+ "PublicDescription": "Counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Demand RFO requests including regular RFOs, locks, ItoM",
+ "EventCode": "0xb0",
+ "EventName": "OFFCORE_REQUESTS.DEMAND_RFO",
+ "PublicDescription": "Counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "For every cycle, increments by the number of outstanding data read requests pending.",
+ "EventCode": "0x60",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD",
+ "PublicDescription": "For every cycle, increments by the number of outstanding data read requests pending. Data read requests include cacheable demand reads and L2 prefetches, but do not include RFOs, code reads or prefetches to the L3. Reads due to page walks resulting from any request type will also be counted. Requests are considered outstanding from the time they miss the core's L2 cache until the transaction completion message is sent to the requestor.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8"
+ },
+ {
+ "BriefDescription": "Cycles where at least 1 outstanding data read request is pending.",
+ "CounterMask": "1",
+ "EventCode": "0x60",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+ "PublicDescription": "Cycles where at least 1 outstanding data read request is pending. Data read requests include cacheable demand reads and L2 prefetches, but do not include RFOs, code reads or prefetches to the L3. Reads due to page walks resulting from any request type will also be counted. Requests are considered outstanding from the time they miss the core's L2 cache until the transaction completion message is sent to the requestor.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8"
+ },
+ {
+ "BriefDescription": "Cycles where at least 1 outstanding Demand RFO request is pending.",
+ "CounterMask": "1",
+ "EventCode": "0x60",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
+ "PublicDescription": "Cycles where at least 1 outstanding Demand RFO request is pending. RFOs are initiated by a core as part of a data store operation. Demand RFO requests include RFOs, locks, and ItoM transactions. Requests are considered outstanding from the time they miss the core's L2 cache until the transaction completion message is sent to the requestor.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "For every cycle, increments by the number of outstanding demand data read requests pending.",
+ "EventCode": "0x60",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD",
+ "PublicDescription": "For every cycle, increments by the number of outstanding demand data read requests pending. Requests are considered outstanding from the time they miss the core's L2 cache until the transaction completion message is sent to the requestor.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Store Read transactions pending for off-core. Highly correlated.",
+ "EventCode": "0x60",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO",
+ "PublicDescription": "Counts the number of off-core outstanding read-for-ownership (RFO) store transactions every cycle. An RFO transaction is considered to be in the Off-core outstanding state between L2 cache miss and transaction completion.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Counts bus locks, accounts for cache line split locks and UC locks.",
+ "EventCode": "0xF4",
+ "EventName": "SQ_MISC.BUS_LOCK",
+ "PublicDescription": "Counts the more expensive bus lock needed to enforce cache coherency for certain memory accesses that need to be done atomically. Can be created by issuing an atomic instruction (via the LOCK prefix) which causes a cache line split or accesses uncacheable memory.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x10"
+ },
+ {
+ "BriefDescription": "Cycles the queue waiting for offcore responses is full.",
+ "EventCode": "0xf4",
+ "EventName": "SQ_MISC.SQ_FULL",
+ "PublicDescription": "Counts the cycles for which the thread is active and the queue waiting for responses from the uncore cannot take any more entries.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Number of PREFETCHNTA instructions executed.",
+ "EventCode": "0x32",
+ "EventName": "SW_PREFETCH_ACCESS.NTA",
+ "PublicDescription": "Counts the number of PREFETCHNTA instructions executed.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Number of PREFETCHW instructions executed.",
+ "EventCode": "0x32",
+ "EventName": "SW_PREFETCH_ACCESS.PREFETCHW",
+ "PublicDescription": "Counts the number of PREFETCHW instructions executed.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x8"
+ },
+ {
+ "BriefDescription": "Number of PREFETCHT0 instructions executed.",
+ "EventCode": "0x32",
+ "EventName": "SW_PREFETCH_ACCESS.T0",
+ "PublicDescription": "Counts the number of PREFETCHT0 instructions executed.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Number of PREFETCHT1 or PREFETCHT2 instructions executed.",
+ "EventCode": "0x32",
+ "EventName": "SW_PREFETCH_ACCESS.T1_T2",
+ "PublicDescription": "Counts the number of PREFETCHT1 or PREFETCHT2 instructions executed.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x4"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/rocketlake/floating-point.json b/tools/perf/pmu-events/arch/x86/rocketlake/floating-point.json
new file mode 100644
index 000000000000..85c26c889088
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/rocketlake/floating-point.json
@@ -0,0 +1,105 @@
+[
+ {
+ "BriefDescription": "Counts all microcode FP assists.",
+ "EventCode": "0xc1",
+ "EventName": "ASSISTS.FP",
+ "PublicDescription": "Counts all microcode Floating Point assists.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Counts number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+ "EventCode": "0xc7",
+ "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE",
+ "PublicDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+ "EventCode": "0xc7",
+ "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE",
+ "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x8"
+ },
+ {
+ "BriefDescription": "Counts number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+ "EventCode": "0xc7",
+ "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE",
+ "PublicDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x10"
+ },
+ {
+ "BriefDescription": "Counts number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+ "EventCode": "0xc7",
+ "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE",
+ "PublicDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x20"
+ },
+ {
+ "BriefDescription": "Number of SSE/AVX computational 128-bit packed single and 256-bit packed double precision FP instructions retired; some instructions will count twice as noted below. Each count represents 2 or/and 4 computation operations, 1 for each element. Applies to SSE* and AVX* packed single precision and packed double precision FP instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB count twice as they perform 2 calculations per element.",
+ "EventCode": "0xc7",
+ "EventName": "FP_ARITH_INST_RETIRED.4_FLOPS",
+ "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision and 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 or/and 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point and packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x18"
+ },
+ {
+ "BriefDescription": "Counts number of SSE/AVX computational 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+ "EventCode": "0xc7",
+ "EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE",
+ "PublicDescription": "Number of SSE/AVX computational 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x40"
+ },
+ {
+ "BriefDescription": "Counts number of SSE/AVX computational 512-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 16 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+ "EventCode": "0xc7",
+ "EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE",
+ "PublicDescription": "Number of SSE/AVX computational 512-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 16 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x80"
+ },
+ {
+ "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision and 512-bit packed double precision FP instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, 1 for each element. Applies to SSE* and AVX* packed single precision and double precision FP instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RSQRT14 RCP RCP14 DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB count twice as they perform 2 calculations per element.",
+ "EventCode": "0xc7",
+ "EventName": "FP_ARITH_INST_RETIRED.8_FLOPS",
+ "PublicDescription": "Number of SSE/AVX computational 256-bit packed single precision and 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision and double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RSQRT14 RCP RCP14 DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x60"
+ },
+ {
+ "BriefDescription": "Number of SSE/AVX computational scalar floating-point instructions retired; some instructions will count twice as noted below. Applies to SSE* and AVX* scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+ "EventCode": "0xc7",
+ "EventName": "FP_ARITH_INST_RETIRED.SCALAR",
+ "PublicDescription": "Number of SSE/AVX computational scalar single precision and double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x3"
+ },
+ {
+ "BriefDescription": "Counts number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+ "EventCode": "0xc7",
+ "EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
+ "PublicDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+ "EventCode": "0xc7",
+ "EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
+ "PublicDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Number of any Vector retired FP arithmetic instructions",
+ "EventCode": "0xc7",
+ "EventName": "FP_ARITH_INST_RETIRED.VECTOR",
+ "SampleAfterValue": "1000003",
+ "UMask": "0xfc"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/rocketlake/frontend.json b/tools/perf/pmu-events/arch/x86/rocketlake/frontend.json
new file mode 100644
index 000000000000..2b539a08d2bf
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/rocketlake/frontend.json
@@ -0,0 +1,377 @@
+[
+ {
+ "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.",
+ "EventCode": "0xe6",
+ "EventName": "BACLEARS.ANY",
+ "PublicDescription": "Counts the number of times the front-end is resteered when it finds a branch instruction in a fetch line. This occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Stalls caused by changing prefix length of the instruction. [This event is alias to ILD_STALL.LCP]",
+ "EventCode": "0x87",
+ "EventName": "DECODE.LCP",
+ "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk. [This event is alias to ILD_STALL.LCP]",
+ "SampleAfterValue": "500009",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE transitions count.",
+ "CounterMask": "1",
+ "EdgeDetect": "1",
+ "EventCode": "0xab",
+ "EventName": "DSB2MITE_SWITCHES.COUNT",
+ "PublicDescription": "Counts the number of Decode Stream Buffer (DSB a.k.a. Uop Cache)-to-MITE speculative transitions.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "DSB-to-MITE switch true penalty cycles.",
+ "EventCode": "0xab",
+ "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES",
+ "PublicDescription": "Decode Stream Buffer (DSB) is a Uop-cache that holds translations of previously fetched instructions that were decoded by the legacy x86 decode pipeline (MITE). This event counts fetch penalty cycles when a transition occurs from DSB to MITE.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Retired Instructions who experienced DSB miss.",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.ANY_DSB_MISS",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x1",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Retired Instructions who experienced a critical DSB miss.",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.DSB_MISS",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x11",
+ "PEBS": "1",
+ "PublicDescription": "Number of retired Instructions that experienced a critical DSB (Decode stream buffer i.e. the decoded instruction-cache) miss. Critical means stalls were exposed to the back-end as a result of the DSB miss.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Retired Instructions who experienced iTLB true miss.",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.ITLB_MISS",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x14",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired Instructions that experienced iTLB (Instruction TLB) true miss.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Retired Instructions who experienced Instruction L1 Cache true miss.",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.L1I_MISS",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x12",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired Instructions who experienced Instruction L1 Cache true miss.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Retired Instructions who experienced Instruction L2 Cache true miss.",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.L2_MISS",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x13",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired Instructions who experienced Instruction L2 Cache true miss.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Retired instructions after front-end starvation of at least 1 cycle",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_1",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x500106",
+ "PEBS": "1",
+ "PublicDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of at least 1 cycle which was not interrupted by a back-end stall.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall.",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_128",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x508006",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 16 cycles which was not interrupted by a back-end stall.",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_16",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x501006",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Retired instructions after front-end starvation of at least 2 cycles",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_2",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x500206",
+ "PEBS": "1",
+ "PublicDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of at least 2 cycles which was not interrupted by a back-end stall.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall.",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_256",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x510006",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 1 bubble-slot for a period of 2 cycles which was not interrupted by a back-end stall.",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x100206",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 32 cycles which was not interrupted by a back-end stall.",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_32",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x502006",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall.",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_4",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x500406",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall.",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_512",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x520006",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall.",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_64",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x504006",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 8 cycles which was not interrupted by a back-end stall.",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_8",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x500806",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Retired Instructions who experienced STLB (2nd level TLB) true miss.",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.STLB_MISS",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x15",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss. [This event is alias to ICACHE_DATA.STALLS]",
+ "EventCode": "0x80",
+ "EventName": "ICACHE_16B.IFDATA_STALL",
+ "PublicDescription": "Counts cycles where a code line fetch is stalled due to an L1 instruction cache miss. The legacy decode pipeline works at a 16 Byte granularity. [This event is alias to ICACHE_DATA.STALLS]",
+ "SampleAfterValue": "500009",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Instruction fetch tag lookups that hit in the instruction cache (L1I). Counts at 64-byte cache-line granularity.",
+ "EventCode": "0x83",
+ "EventName": "ICACHE_64B.IFTAG_HIT",
+ "PublicDescription": "Counts instruction fetch tag lookups that hit in the instruction cache (L1I). Counts at 64-byte cache-line granularity. Accounts for both cacheable and uncacheable accesses.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Instruction fetch tag lookups that miss in the instruction cache (L1I). Counts at 64-byte cache-line granularity.",
+ "EventCode": "0x83",
+ "EventName": "ICACHE_64B.IFTAG_MISS",
+ "PublicDescription": "Counts instruction fetch tag lookups that miss in the instruction cache (L1I). Counts at 64-byte cache-line granularity. Accounts for both cacheable and uncacheable accesses.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss. [This event is alias to ICACHE_TAG.STALLS]",
+ "EventCode": "0x83",
+ "EventName": "ICACHE_64B.IFTAG_STALL",
+ "PublicDescription": "Counts cycles where a code fetch is stalled due to L1 instruction cache tag miss. [This event is alias to ICACHE_TAG.STALLS]",
+ "SampleAfterValue": "200003",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss. [This event is alias to ICACHE_16B.IFDATA_STALL]",
+ "EventCode": "0x80",
+ "EventName": "ICACHE_DATA.STALLS",
+ "PublicDescription": "Counts cycles where a code line fetch is stalled due to an L1 instruction cache miss. The legacy decode pipeline works at a 16 Byte granularity. [This event is alias to ICACHE_16B.IFDATA_STALL]",
+ "SampleAfterValue": "500009",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss. [This event is alias to ICACHE_64B.IFTAG_STALL]",
+ "EventCode": "0x83",
+ "EventName": "ICACHE_TAG.STALLS",
+ "PublicDescription": "Counts cycles where a code fetch is stalled due to L1 instruction cache tag miss. [This event is alias to ICACHE_64B.IFTAG_STALL]",
+ "SampleAfterValue": "200003",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop",
+ "CounterMask": "1",
+ "EventCode": "0x79",
+ "EventName": "IDQ.DSB_CYCLES_ANY",
+ "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x8"
+ },
+ {
+ "BriefDescription": "Cycles DSB is delivering optimal number of Uops",
+ "CounterMask": "5",
+ "EventCode": "0x79",
+ "EventName": "IDQ.DSB_CYCLES_OK",
+ "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x8"
+ },
+ {
+ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path",
+ "EventCode": "0x79",
+ "EventName": "IDQ.DSB_UOPS",
+ "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x8"
+ },
+ {
+ "BriefDescription": "Cycles MITE is delivering any Uop",
+ "CounterMask": "1",
+ "EventCode": "0x79",
+ "EventName": "IDQ.MITE_CYCLES_ANY",
+ "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Cycles MITE is delivering optimal number of Uops",
+ "CounterMask": "5",
+ "EventCode": "0x79",
+ "EventName": "IDQ.MITE_CYCLES_OK",
+ "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path",
+ "EventCode": "0x79",
+ "EventName": "IDQ.MITE_UOPS",
+ "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Cycles when uops are being delivered to IDQ while MS is busy",
+ "CounterMask": "1",
+ "EventCode": "0x79",
+ "EventName": "IDQ.MS_CYCLES_ANY",
+ "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x30"
+ },
+ {
+ "BriefDescription": "Number of switches from DSB or MITE to the MS",
+ "CounterMask": "1",
+ "EdgeDetect": "1",
+ "EventCode": "0x79",
+ "EventName": "IDQ.MS_SWITCHES",
+ "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x30"
+ },
+ {
+ "BriefDescription": "Uops delivered to IDQ while MS is busy",
+ "EventCode": "0x79",
+ "EventName": "IDQ.MS_UOPS",
+ "PublicDescription": "Counts the total number of uops delivered by the Microcode Sequencer (MS). Any instruction over 4 uops will be delivered by the MS. Some instructions such as transcendentals may additionally generate uops from the MS.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x30"
+ },
+ {
+ "BriefDescription": "Uops not delivered by IDQ when backend of the machine is not stalled",
+ "EventCode": "0x9c",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE",
+ "PublicDescription": "Counts the number of uops not delivered to by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Cycles when no uops are not delivered by the IDQ when backend of the machine is not stalled",
+ "CounterMask": "5",
+ "EventCode": "0x9c",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
+ "PublicDescription": "Counts the number of cycles when no uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Cycles when optimal number of uops was delivered to the back-end when the back-end is not stalled",
+ "CounterMask": "1",
+ "EventCode": "0x9C",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK",
+ "Invert": "1",
+ "PublicDescription": "Counts the number of cycles when the optimal number of uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/rocketlake/memory.json b/tools/perf/pmu-events/arch/x86/rocketlake/memory.json
new file mode 100644
index 000000000000..e8d2ec1c029b
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/rocketlake/memory.json
@@ -0,0 +1,394 @@
+[
+ {
+ "BriefDescription": "Cycles while L3 cache miss demand load is outstanding.",
+ "CounterMask": "2",
+ "EventCode": "0xA3",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_L3_MISS",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Execution stalls while L3 cache miss demand load is outstanding.",
+ "CounterMask": "6",
+ "EventCode": "0xa3",
+ "EventName": "CYCLE_ACTIVITY.STALLS_L3_MISS",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x6"
+ },
+ {
+ "BriefDescription": "Number of times an HLE execution aborted due to any reasons (multiple categories may count as one).",
+ "EventCode": "0xc8",
+ "EventName": "HLE_RETIRED.ABORTED",
+ "PublicDescription": "Counts the number of times HLE abort was triggered.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Number of times an HLE execution aborted due to unfriendly events (such as interrupts).",
+ "EventCode": "0xc8",
+ "EventName": "HLE_RETIRED.ABORTED_EVENTS",
+ "PublicDescription": "Counts the number of times an HLE execution aborted due to unfriendly events (such as interrupts).",
+ "SampleAfterValue": "100003",
+ "UMask": "0x80"
+ },
+ {
+ "BriefDescription": "Number of times an HLE execution aborted due to various memory events (e.g., read/write capacity and conflicts).",
+ "EventCode": "0xc8",
+ "EventName": "HLE_RETIRED.ABORTED_MEM",
+ "PublicDescription": "Counts the number of times an HLE execution aborted due to various memory events (e.g., read/write capacity and conflicts).",
+ "SampleAfterValue": "100003",
+ "UMask": "0x8"
+ },
+ {
+ "BriefDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.).",
+ "EventCode": "0xc8",
+ "EventName": "HLE_RETIRED.ABORTED_UNFRIENDLY",
+ "PublicDescription": "Counts the number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.).",
+ "SampleAfterValue": "100003",
+ "UMask": "0x20"
+ },
+ {
+ "BriefDescription": "Number of times an HLE execution successfully committed",
+ "EventCode": "0xc8",
+ "EventName": "HLE_RETIRED.COMMIT",
+ "PublicDescription": "Counts the number of times HLE commit succeeded.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Number of times an HLE execution started.",
+ "EventCode": "0xc8",
+ "EventName": "HLE_RETIRED.START",
+ "PublicDescription": "Counts the number of times we entered an HLE region. Does not count nested transactions.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Number of machine clears due to memory ordering conflicts.",
+ "EventCode": "0xc3",
+ "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
+ "PublicDescription": "Counts the number of Machine Clears detected dye to memory ordering. Memory Ordering Machine Clears may apply when a memory read may not conform to the memory ordering rules of the x86 architecture",
+ "SampleAfterValue": "100003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.",
+ "Data_LA": "1",
+ "EventCode": "0xcd",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128",
+ "MSRIndex": "0x3F6",
+ "MSRValue": "0x80",
+ "PEBS": "2",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles. Reported latency may be longer than just the memory latency.",
+ "SampleAfterValue": "1009",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles.",
+ "Data_LA": "1",
+ "EventCode": "0xcd",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16",
+ "MSRIndex": "0x3F6",
+ "MSRValue": "0x10",
+ "PEBS": "2",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles. Reported latency may be longer than just the memory latency.",
+ "SampleAfterValue": "20011",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles.",
+ "Data_LA": "1",
+ "EventCode": "0xcd",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256",
+ "MSRIndex": "0x3F6",
+ "MSRValue": "0x100",
+ "PEBS": "2",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles. Reported latency may be longer than just the memory latency.",
+ "SampleAfterValue": "503",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles.",
+ "Data_LA": "1",
+ "EventCode": "0xcd",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32",
+ "MSRIndex": "0x3F6",
+ "MSRValue": "0x20",
+ "PEBS": "2",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles. Reported latency may be longer than just the memory latency.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles.",
+ "Data_LA": "1",
+ "EventCode": "0xcd",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4",
+ "MSRIndex": "0x3F6",
+ "MSRValue": "0x4",
+ "PEBS": "2",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles. Reported latency may be longer than just the memory latency.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles.",
+ "Data_LA": "1",
+ "EventCode": "0xcd",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512",
+ "MSRIndex": "0x3F6",
+ "MSRValue": "0x200",
+ "PEBS": "2",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles. Reported latency may be longer than just the memory latency.",
+ "SampleAfterValue": "101",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles.",
+ "Data_LA": "1",
+ "EventCode": "0xcd",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64",
+ "MSRIndex": "0x3F6",
+ "MSRValue": "0x40",
+ "PEBS": "2",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles. Reported latency may be longer than just the memory latency.",
+ "SampleAfterValue": "2003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles.",
+ "Data_LA": "1",
+ "EventCode": "0xcd",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8",
+ "MSRIndex": "0x3F6",
+ "MSRValue": "0x8",
+ "PEBS": "2",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles. Reported latency may be longer than just the memory latency.",
+ "SampleAfterValue": "50021",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that was not supplied by the L3 cache.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_CODE_RD.L3_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3FFFC00004",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand data reads that was not supplied by the L3 cache.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_DATA_RD.L3_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3FFFC00001",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand reads for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that was not supplied by the L3 cache.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_RFO.L3_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3FFFC00002",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts L1 data cache prefetch requests and software prefetches (except PREFETCHW) that was not supplied by the L3 cache.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.HWPF_L1D_AND_SWPF.L3_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3FFFC00400",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts hardware prefetch data reads (which bring data to L2) that was not supplied by the L3 cache.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.HWPF_L2_DATA_RD.L3_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3FFFC00010",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts hardware prefetch RFOs (which bring data to L2) that was not supplied by the L3 cache.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.HWPF_L2_RFO.L3_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3FFFC00020",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts miscellaneous requests, such as I/O and un-cacheable accesses that was not supplied by the L3 cache.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.OTHER.L3_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3FFFC08000",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts streaming stores that was not supplied by the L3 cache.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.STREAMING_WR.L3_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3FFFC00800",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand data read requests that miss the L3 cache.",
+ "EventCode": "0xb0",
+ "EventName": "OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD",
+ "SampleAfterValue": "100003",
+ "UMask": "0x10"
+ },
+ {
+ "BriefDescription": "Cycles where at least one demand data read request known to have missed the L3 cache is pending.",
+ "CounterMask": "1",
+ "EventCode": "0x60",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_L3_MISS_DEMAND_DATA_RD",
+ "PublicDescription": "Cycles where at least one demand data read request known to have missed the L3 cache is pending. Note that this does not capture all elapsed cycles while requests are outstanding - only cycles from when the requests were known to have missed the L3 cache.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x10"
+ },
+ {
+ "BriefDescription": "Number of times an RTM execution aborted.",
+ "EventCode": "0xc9",
+ "EventName": "RTM_RETIRED.ABORTED",
+ "PublicDescription": "Counts the number of times RTM abort was triggered.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt)",
+ "EventCode": "0xc9",
+ "EventName": "RTM_RETIRED.ABORTED_EVENTS",
+ "PublicDescription": "Counts the number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt).",
+ "SampleAfterValue": "100003",
+ "UMask": "0x80"
+ },
+ {
+ "BriefDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts)",
+ "EventCode": "0xc9",
+ "EventName": "RTM_RETIRED.ABORTED_MEM",
+ "PublicDescription": "Counts the number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts).",
+ "SampleAfterValue": "100003",
+ "UMask": "0x8"
+ },
+ {
+ "BriefDescription": "Number of times an RTM execution aborted due to incompatible memory type",
+ "EventCode": "0xc9",
+ "EventName": "RTM_RETIRED.ABORTED_MEMTYPE",
+ "PublicDescription": "Counts the number of times an RTM execution aborted due to incompatible memory type.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x40"
+ },
+ {
+ "BriefDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions",
+ "EventCode": "0xc9",
+ "EventName": "RTM_RETIRED.ABORTED_UNFRIENDLY",
+ "PublicDescription": "Counts the number of times an RTM execution aborted due to HLE-unfriendly instructions.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x20"
+ },
+ {
+ "BriefDescription": "Number of times an RTM execution successfully committed",
+ "EventCode": "0xc9",
+ "EventName": "RTM_RETIRED.COMMIT",
+ "PublicDescription": "Counts the number of times RTM commit succeeded.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Number of times an RTM execution started.",
+ "EventCode": "0xc9",
+ "EventName": "RTM_RETIRED.START",
+ "PublicDescription": "Counts the number of times we entered an RTM region. Does not count nested transactions.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts the number of times a class of instructions that may cause a transactional abort was executed inside a transactional region",
+ "EventCode": "0x5d",
+ "EventName": "TX_EXEC.MISC2",
+ "PublicDescription": "Counts Unfriendly TSX abort triggered by a vzeroupper instruction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Number of times an instruction execution caused the transactional nest count supported to be exceeded",
+ "EventCode": "0x5d",
+ "EventName": "TX_EXEC.MISC3",
+ "PublicDescription": "Counts Unfriendly TSX abort triggered by a nest count that is too deep.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Speculatively counts the number of TSX aborts due to a data capacity limitation for transactional reads",
+ "EventCode": "0x54",
+ "EventName": "TX_MEM.ABORT_CAPACITY_READ",
+ "PublicDescription": "Speculatively counts the number of Transactional Synchronization Extensions (TSX) aborts due to a data capacity limitation for transactional reads",
+ "SampleAfterValue": "100003",
+ "UMask": "0x80"
+ },
+ {
+ "BriefDescription": "Speculatively counts the number of TSX aborts due to a data capacity limitation for transactional writes.",
+ "EventCode": "0x54",
+ "EventName": "TX_MEM.ABORT_CAPACITY_WRITE",
+ "PublicDescription": "Speculatively counts the number of Transactional Synchronization Extensions (TSX) aborts due to a data capacity limitation for transactional writes.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Number of times a transactional abort was signaled due to a data conflict on a transactionally accessed address",
+ "EventCode": "0x54",
+ "EventName": "TX_MEM.ABORT_CONFLICT",
+ "PublicDescription": "Counts the number of times a TSX line had a cache conflict.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Number of times an HLE transactional execution aborted due to XRELEASE lock not satisfying the address and value requirements in the elision buffer",
+ "EventCode": "0x54",
+ "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_MISMATCH",
+ "PublicDescription": "Counts the number of times a TSX Abort was triggered due to release/commit but data and address mismatch.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x10"
+ },
+ {
+ "BriefDescription": "Number of times an HLE transactional execution aborted due to NoAllocatedElisionBuffer being non-zero.",
+ "EventCode": "0x54",
+ "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_NOT_EMPTY",
+ "PublicDescription": "Counts the number of times a TSX Abort was triggered due to commit but Lock Buffer not empty.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x8"
+ },
+ {
+ "BriefDescription": "Number of times an HLE transactional execution aborted due to an unsupported read alignment from the elision buffer.",
+ "EventCode": "0x54",
+ "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_UNSUPPORTED_ALIGNMENT",
+ "PublicDescription": "Counts the number of times a TSX Abort was triggered due to attempting an unsupported alignment from Lock Buffer.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x20"
+ },
+ {
+ "BriefDescription": "Number of times a HLE transactional region aborted due to a non XRELEASE prefixed instruction writing to an elided lock in the elision buffer",
+ "EventCode": "0x54",
+ "EventName": "TX_MEM.ABORT_HLE_STORE_TO_ELIDED_LOCK",
+ "PublicDescription": "Counts the number of times a TSX Abort was triggered due to a non-release/commit store to lock.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Number of times HLE lock could not be elided due to ElisionBufferAvailable being zero.",
+ "EventCode": "0x54",
+ "EventName": "TX_MEM.HLE_ELISION_BUFFER_FULL",
+ "PublicDescription": "Counts the number of times we could not allocate Lock Buffer.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x40"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/rocketlake/metricgroups.json b/tools/perf/pmu-events/arch/x86/rocketlake/metricgroups.json
new file mode 100644
index 000000000000..a151ba9cccb0
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/rocketlake/metricgroups.json
@@ -0,0 +1,113 @@
+{
+ "Backend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Bad": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BadSpec": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BigFoot": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BrMispredicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Branches": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "CacheMisses": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "CodeGen": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Compute": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Cor": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "DSB": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "DSBmiss": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "DataSharing": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Fed": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FetchBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FetchLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Flops": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FpScalar": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FpVector": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Frontend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "HPC": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "IcMiss": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "InsType": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "L2Evicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "LSD": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MachineClears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryBound": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryTLB": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Memory_BW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Memory_Lat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MicroSeq": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "OS": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Offcore": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "PGO": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Pipeline": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "PortsUtil": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Power": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Prefetches": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Ret": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Retire": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "SMT": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Server": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Snoop": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "SoC": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Summary": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TmaL1": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TmaL2": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TmaL3mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TopdownL1": "Metrics for top-down breakdown at level 1",
+ "TopdownL2": "Metrics for top-down breakdown at level 2",
+ "TopdownL3": "Metrics for top-down breakdown at level 3",
+ "TopdownL4": "Metrics for top-down breakdown at level 4",
+ "TopdownL5": "Metrics for top-down breakdown at level 5",
+ "TopdownL6": "Metrics for top-down breakdown at level 6",
+ "tma_L1_group": "Metrics for top-down breakdown at level 1",
+ "tma_L2_group": "Metrics for top-down breakdown at level 2",
+ "tma_L3_group": "Metrics for top-down breakdown at level 3",
+ "tma_L4_group": "Metrics for top-down breakdown at level 4",
+ "tma_L5_group": "Metrics for top-down breakdown at level 5",
+ "tma_L6_group": "Metrics for top-down breakdown at level 6",
+ "tma_alu_op_utilization_group": "Metrics contributing to tma_alu_op_utilization category",
+ "tma_backend_bound_group": "Metrics contributing to tma_backend_bound category",
+ "tma_bad_speculation_group": "Metrics contributing to tma_bad_speculation category",
+ "tma_branch_resteers_group": "Metrics contributing to tma_branch_resteers category",
+ "tma_core_bound_group": "Metrics contributing to tma_core_bound category",
+ "tma_dram_bound_group": "Metrics contributing to tma_dram_bound category",
+ "tma_dtlb_load_group": "Metrics contributing to tma_dtlb_load category",
+ "tma_dtlb_store_group": "Metrics contributing to tma_dtlb_store category",
+ "tma_fetch_bandwidth_group": "Metrics contributing to tma_fetch_bandwidth category",
+ "tma_fetch_latency_group": "Metrics contributing to tma_fetch_latency category",
+ "tma_fp_arith_group": "Metrics contributing to tma_fp_arith category",
+ "tma_fp_vector_group": "Metrics contributing to tma_fp_vector category",
+ "tma_frontend_bound_group": "Metrics contributing to tma_frontend_bound category",
+ "tma_heavy_operations_group": "Metrics contributing to tma_heavy_operations category",
+ "tma_issue2P": "Metrics related by the issue $issue2P",
+ "tma_issueBC": "Metrics related by the issue $issueBC",
+ "tma_issueBM": "Metrics related by the issue $issueBM",
+ "tma_issueBW": "Metrics related by the issue $issueBW",
+ "tma_issueD0": "Metrics related by the issue $issueD0",
+ "tma_issueFB": "Metrics related by the issue $issueFB",
+ "tma_issueFL": "Metrics related by the issue $issueFL",
+ "tma_issueL1": "Metrics related by the issue $issueL1",
+ "tma_issueLat": "Metrics related by the issue $issueLat",
+ "tma_issueMC": "Metrics related by the issue $issueMC",
+ "tma_issueMS": "Metrics related by the issue $issueMS",
+ "tma_issueMV": "Metrics related by the issue $issueMV",
+ "tma_issueRFO": "Metrics related by the issue $issueRFO",
+ "tma_issueSL": "Metrics related by the issue $issueSL",
+ "tma_issueSO": "Metrics related by the issue $issueSO",
+ "tma_issueSmSt": "Metrics related by the issue $issueSmSt",
+ "tma_issueSpSt": "Metrics related by the issue $issueSpSt",
+ "tma_issueSyncxn": "Metrics related by the issue $issueSyncxn",
+ "tma_issueTLB": "Metrics related by the issue $issueTLB",
+ "tma_l1_bound_group": "Metrics contributing to tma_l1_bound category",
+ "tma_l3_bound_group": "Metrics contributing to tma_l3_bound category",
+ "tma_light_operations_group": "Metrics contributing to tma_light_operations category",
+ "tma_load_op_utilization_group": "Metrics contributing to tma_load_op_utilization category",
+ "tma_mem_latency_group": "Metrics contributing to tma_mem_latency category",
+ "tma_memory_bound_group": "Metrics contributing to tma_memory_bound category",
+ "tma_microcode_sequencer_group": "Metrics contributing to tma_microcode_sequencer category",
+ "tma_mite_group": "Metrics contributing to tma_mite category",
+ "tma_ports_utilization_group": "Metrics contributing to tma_ports_utilization category",
+ "tma_ports_utilized_0_group": "Metrics contributing to tma_ports_utilized_0 category",
+ "tma_ports_utilized_3m_group": "Metrics contributing to tma_ports_utilized_3m category",
+ "tma_retiring_group": "Metrics contributing to tma_retiring category",
+ "tma_serializing_operation_group": "Metrics contributing to tma_serializing_operation category",
+ "tma_store_bound_group": "Metrics contributing to tma_store_bound category",
+ "tma_store_op_utilization_group": "Metrics contributing to tma_store_op_utilization category"
+}
diff --git a/tools/perf/pmu-events/arch/x86/rocketlake/other.json b/tools/perf/pmu-events/arch/x86/rocketlake/other.json
new file mode 100644
index 000000000000..cfb590632918
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/rocketlake/other.json
@@ -0,0 +1,242 @@
+[
+ {
+ "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the Non-AVX turbo schedule.",
+ "EventCode": "0x28",
+ "EventName": "CORE_POWER.LVL0_TURBO_LICENSE",
+ "PublicDescription": "Counts Core cycles where the core was running with power-delivery for baseline license level 0. This includes non-AVX codes, SSE, AVX 128-bit, and low-current AVX 256-bit codes.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x7"
+ },
+ {
+ "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the AVX2 turbo schedule.",
+ "EventCode": "0x28",
+ "EventName": "CORE_POWER.LVL1_TURBO_LICENSE",
+ "PublicDescription": "Counts Core cycles where the core was running with power-delivery for license level 1. This includes high current AVX 256-bit instructions as well as low current AVX 512-bit instructions.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x18"
+ },
+ {
+ "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the AVX512 turbo schedule.",
+ "EventCode": "0x28",
+ "EventName": "CORE_POWER.LVL2_TURBO_LICENSE",
+ "PublicDescription": "Core cycles where the core was running with power-delivery for license level 2 (introduced in Skylake Server microarchtecture). This includes high current AVX 512-bit instructions.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x20"
+ },
+ {
+ "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that have any type of response.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_CODE_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10004",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that DRAM supplied the request.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_CODE_RD.DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x184000004",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that DRAM supplied the request.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_CODE_RD.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x184000004",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand data reads that have any type of response.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_DATA_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10001",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand data reads that DRAM supplied the request.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_DATA_RD.DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x184000001",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand data reads that DRAM supplied the request.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_DATA_RD.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x184000001",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand reads for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that have any type of response.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_RFO.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10002",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand reads for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that DRAM supplied the request.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_RFO.DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x184000002",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand reads for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that DRAM supplied the request.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_RFO.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x184000002",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts L1 data cache prefetch requests and software prefetches (except PREFETCHW) that have any type of response.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.HWPF_L1D_AND_SWPF.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10400",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts L1 data cache prefetch requests and software prefetches (except PREFETCHW) that DRAM supplied the request.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.HWPF_L1D_AND_SWPF.DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x184000400",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts L1 data cache prefetch requests and software prefetches (except PREFETCHW) that DRAM supplied the request.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.HWPF_L1D_AND_SWPF.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x184000400",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts hardware prefetch data reads (which bring data to L2) that have any type of response.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.HWPF_L2_DATA_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10010",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts hardware prefetch data reads (which bring data to L2) that DRAM supplied the request.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.HWPF_L2_DATA_RD.DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x184000010",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts hardware prefetch data reads (which bring data to L2) that DRAM supplied the request.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.HWPF_L2_DATA_RD.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x184000010",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts hardware prefetch RFOs (which bring data to L2) that have any type of response.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.HWPF_L2_RFO.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10020",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts hardware prefetch RFOs (which bring data to L2) that DRAM supplied the request.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.HWPF_L2_RFO.DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x184000020",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts hardware prefetch RFOs (which bring data to L2) that DRAM supplied the request.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.HWPF_L2_RFO.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x184000020",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts miscellaneous requests, such as I/O and un-cacheable accesses that have any type of response.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.OTHER.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x18000",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts miscellaneous requests, such as I/O and un-cacheable accesses that DRAM supplied the request.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.OTHER.DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x184008000",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts miscellaneous requests, such as I/O and un-cacheable accesses that DRAM supplied the request.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.OTHER.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x184008000",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts streaming stores that have any type of response.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.STREAMING_WR.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10800",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts streaming stores that DRAM supplied the request.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.STREAMING_WR.DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x184000800",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts streaming stores that DRAM supplied the request.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.STREAMING_WR.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x184000800",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/rocketlake/pipeline.json b/tools/perf/pmu-events/arch/x86/rocketlake/pipeline.json
new file mode 100644
index 000000000000..375b78044f14
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/rocketlake/pipeline.json
@@ -0,0 +1,801 @@
+[
+ {
+ "BriefDescription": "Cycles when divide unit is busy executing divide or square root operations.",
+ "CounterMask": "1",
+ "EventCode": "0x14",
+ "EventName": "ARITH.DIVIDER_ACTIVE",
+ "PublicDescription": "Counts cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x9"
+ },
+ {
+ "BriefDescription": "Number of occurrences where a microcode assist is invoked by hardware.",
+ "EventCode": "0xc1",
+ "EventName": "ASSISTS.ANY",
+ "PublicDescription": "Counts the number of occurrences where a microcode assist is invoked by hardware Examples include AD (page Access Dirty), FP and AVX related assists.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x7"
+ },
+ {
+ "BriefDescription": "All branch instructions retired.",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
+ "PEBS": "1",
+ "PublicDescription": "Counts all branch instructions retired.",
+ "SampleAfterValue": "400009"
+ },
+ {
+ "BriefDescription": "Conditional branch instructions retired.",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.COND",
+ "PEBS": "1",
+ "PublicDescription": "Counts conditional branch instructions retired.",
+ "SampleAfterValue": "400009",
+ "UMask": "0x11"
+ },
+ {
+ "BriefDescription": "Not taken branch instructions retired.",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.COND_NTAKEN",
+ "PEBS": "1",
+ "PublicDescription": "Counts not taken branch instructions retired.",
+ "SampleAfterValue": "400009",
+ "UMask": "0x10"
+ },
+ {
+ "BriefDescription": "Taken conditional branch instructions retired.",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.COND_TAKEN",
+ "PEBS": "1",
+ "PublicDescription": "Counts taken conditional branch instructions retired.",
+ "SampleAfterValue": "400009",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Far branch instructions retired.",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.FAR_BRANCH",
+ "PEBS": "1",
+ "PublicDescription": "Counts far branch instructions retired.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x40"
+ },
+ {
+ "BriefDescription": "Indirect near branch instructions retired (excluding returns)",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.INDIRECT",
+ "PEBS": "1",
+ "PublicDescription": "Counts near indirect branch instructions retired excluding returns. TSX abort is an indirect branch.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x80"
+ },
+ {
+ "BriefDescription": "Direct and indirect near call instructions retired.",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.NEAR_CALL",
+ "PEBS": "1",
+ "PublicDescription": "Counts both direct and indirect near call instructions retired.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Return instructions retired.",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.NEAR_RETURN",
+ "PEBS": "1",
+ "PublicDescription": "Counts return instructions retired.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x8"
+ },
+ {
+ "BriefDescription": "Taken branch instructions retired.",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.NEAR_TAKEN",
+ "PEBS": "1",
+ "PublicDescription": "Counts taken branch instructions retired.",
+ "SampleAfterValue": "400009",
+ "UMask": "0x20"
+ },
+ {
+ "BriefDescription": "All mispredicted branch instructions retired.",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
+ "PEBS": "1",
+ "PublicDescription": "Counts all the retired branch instructions that were mispredicted by the processor. A branch misprediction occurs when the processor incorrectly predicts the destination of the branch. When the misprediction is discovered at execution, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.",
+ "SampleAfterValue": "50021"
+ },
+ {
+ "BriefDescription": "Mispredicted conditional branch instructions retired.",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.COND",
+ "PEBS": "1",
+ "PublicDescription": "Counts mispredicted conditional branch instructions retired.",
+ "SampleAfterValue": "50021",
+ "UMask": "0x11"
+ },
+ {
+ "BriefDescription": "Mispredicted non-taken conditional branch instructions retired.",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.COND_NTAKEN",
+ "PEBS": "1",
+ "PublicDescription": "Counts the number of conditional branch instructions retired that were mispredicted and the branch direction was not taken.",
+ "SampleAfterValue": "50021",
+ "UMask": "0x10"
+ },
+ {
+ "BriefDescription": "number of branch instructions retired that were mispredicted and taken.",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.COND_TAKEN",
+ "PEBS": "1",
+ "PublicDescription": "Counts taken conditional mispredicted branch instructions retired.",
+ "SampleAfterValue": "50021",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "All miss-predicted indirect branch instructions retired (excluding RETs. TSX aborts is considered indirect branch).",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.INDIRECT",
+ "PEBS": "1",
+ "PublicDescription": "Counts all miss-predicted indirect branch instructions retired (excluding RETs. TSX aborts is considered indirect branch).",
+ "SampleAfterValue": "50021",
+ "UMask": "0x80"
+ },
+ {
+ "BriefDescription": "Mispredicted indirect CALL instructions retired.",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.INDIRECT_CALL",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired mispredicted indirect (near taken) CALL instructions, including both register and memory indirect.",
+ "SampleAfterValue": "50021",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Number of near branch instructions retired that were mispredicted and taken.",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.NEAR_TAKEN",
+ "PEBS": "1",
+ "PublicDescription": "Counts number of near branch instructions retired that were mispredicted and taken.",
+ "SampleAfterValue": "50021",
+ "UMask": "0x20"
+ },
+ {
+ "BriefDescription": "This event counts the number of mispredicted ret instructions retired. Non PEBS",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.RET",
+ "PEBS": "1",
+ "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts mispredicted return instructions retired.",
+ "SampleAfterValue": "50021",
+ "UMask": "0x8"
+ },
+ {
+ "BriefDescription": "Cycle counts are evenly distributed between active threads in the Core.",
+ "EventCode": "0xec",
+ "EventName": "CPU_CLK_UNHALTED.DISTRIBUTED",
+ "PublicDescription": "This event distributes cycle counts between active hyperthreads, i.e., those in C0. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If all other hyperthreads are inactive (or disabled or do not exist), all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.",
+ "EventCode": "0x3C",
+ "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE",
+ "PublicDescription": "Counts Core crystal clock cycles when current thread is unhalted and the other thread is halted.",
+ "SampleAfterValue": "25003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Core crystal clock cycles. Cycle counts are evenly distributed between active threads in the Core.",
+ "EventCode": "0x3c",
+ "EventName": "CPU_CLK_UNHALTED.REF_DISTRIBUTED",
+ "PublicDescription": "This event distributes Core crystal clock cycle counts between active hyperthreads, i.e., those in C0 sleep-state. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If one thread is active in a core, all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x8"
+ },
+ {
+ "BriefDescription": "Reference cycles when the core is not in halt state.",
+ "EventName": "CPU_CLK_UNHALTED.REF_TSC",
+ "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the eight programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x3"
+ },
+ {
+ "BriefDescription": "Core crystal clock cycles when the thread is unhalted.",
+ "EventCode": "0x3C",
+ "EventName": "CPU_CLK_UNHALTED.REF_XCLK",
+ "PublicDescription": "Counts core crystal clock cycles when the thread is unhalted.",
+ "SampleAfterValue": "25003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Core cycles when the thread is not in halt state",
+ "EventName": "CPU_CLK_UNHALTED.THREAD",
+ "PublicDescription": "Counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the eight programmable counters available for other events.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Thread cycles when thread is not in halt state",
+ "EventCode": "0x3C",
+ "EventName": "CPU_CLK_UNHALTED.THREAD_P",
+ "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.",
+ "SampleAfterValue": "2000003"
+ },
+ {
+ "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.",
+ "CounterMask": "8",
+ "EventCode": "0xA3",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8"
+ },
+ {
+ "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.",
+ "CounterMask": "1",
+ "EventCode": "0xA3",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Cycles while memory subsystem has an outstanding load.",
+ "CounterMask": "16",
+ "EventCode": "0xA3",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x10"
+ },
+ {
+ "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.",
+ "CounterMask": "12",
+ "EventCode": "0xA3",
+ "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS",
+ "SampleAfterValue": "1000003",
+ "UMask": "0xc"
+ },
+ {
+ "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.",
+ "CounterMask": "5",
+ "EventCode": "0xa3",
+ "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x5"
+ },
+ {
+ "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.",
+ "CounterMask": "20",
+ "EventCode": "0xa3",
+ "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x14"
+ },
+ {
+ "BriefDescription": "Total execution stalls.",
+ "CounterMask": "4",
+ "EventCode": "0xa3",
+ "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Cycles total of 1 uop is executed on all ports and Reservation Station was not empty.",
+ "EventCode": "0xa6",
+ "EventName": "EXE_ACTIVITY.1_PORTS_UTIL",
+ "PublicDescription": "Counts cycles during which a total of 1 uop was executed on all ports and Reservation Station (RS) was not empty.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Cycles total of 2 uops are executed on all ports and Reservation Station was not empty.",
+ "EventCode": "0xa6",
+ "EventName": "EXE_ACTIVITY.2_PORTS_UTIL",
+ "PublicDescription": "Counts cycles during which a total of 2 uops were executed on all ports and Reservation Station (RS) was not empty.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station was not empty.",
+ "EventCode": "0xa6",
+ "EventName": "EXE_ACTIVITY.3_PORTS_UTIL",
+ "PublicDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station (RS) was not empty.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x8"
+ },
+ {
+ "BriefDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station was not empty.",
+ "EventCode": "0xa6",
+ "EventName": "EXE_ACTIVITY.4_PORTS_UTIL",
+ "PublicDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station (RS) was not empty.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x10"
+ },
+ {
+ "BriefDescription": "Cycles where the Store Buffer was full and no loads caused an execution stall.",
+ "CounterMask": "2",
+ "EventCode": "0xA6",
+ "EventName": "EXE_ACTIVITY.BOUND_ON_STORES",
+ "PublicDescription": "Counts cycles where the Store Buffer was full and no loads caused an execution stall.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x40"
+ },
+ {
+ "BriefDescription": "Stalls caused by changing prefix length of the instruction. [This event is alias to DECODE.LCP]",
+ "EventCode": "0x87",
+ "EventName": "ILD_STALL.LCP",
+ "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk. [This event is alias to DECODE.LCP]",
+ "SampleAfterValue": "500009",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Instruction decoders utilized in a cycle",
+ "EventCode": "0x55",
+ "EventName": "INST_DECODED.DECODERS",
+ "PublicDescription": "Number of decoders utilized in a cycle when the MITE (legacy decode pipeline) fetches instructions.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Number of instructions retired. Fixed Counter - architectural event",
+ "EventName": "INST_RETIRED.ANY",
+ "PEBS": "1",
+ "PublicDescription": "Counts the number of instructions retired - an Architectural PerfMon event. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter freeing up programmable counters to count other events. INST_RETIRED.ANY_P is counted by a programmable counter.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Number of instructions retired. General Counter - architectural event",
+ "EventCode": "0xc0",
+ "EventName": "INST_RETIRED.ANY_P",
+ "PEBS": "1",
+ "PublicDescription": "Counts the number of instructions retired - an Architectural PerfMon event. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter freeing up programmable counters to count other events. INST_RETIRED.ANY_P is counted by a programmable counter.",
+ "SampleAfterValue": "2000003"
+ },
+ {
+ "BriefDescription": "Number of all retired NOP instructions.",
+ "EventCode": "0xc0",
+ "EventName": "INST_RETIRED.NOP",
+ "PEBS": "1",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Precise instruction retired event with a reduced effect of PEBS shadow in IP distribution",
+ "EventName": "INST_RETIRED.PREC_DIST",
+ "PEBS": "1",
+ "PublicDescription": "A version of INST_RETIRED that allows for a more unbiased distribution of samples across instructions retired. It utilizes the Precise Distribution of Instructions Retired (PDIR) feature to mitigate some bias in how retired instructions get sampled. Use on Fixed Counter 0.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Cycles without actually retired instructions.",
+ "CounterMask": "1",
+ "EventCode": "0xc0",
+ "EventName": "INST_RETIRED.STALL_CYCLES",
+ "Invert": "1",
+ "PublicDescription": "This event counts cycles without actually retired instructions.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Cycles the Backend cluster is recovering after a miss-speculation or a Store Buffer or Load Buffer drain stall.",
+ "CounterMask": "1",
+ "EventCode": "0x0D",
+ "EventName": "INT_MISC.ALL_RECOVERY_CYCLES",
+ "PublicDescription": "Counts cycles the Backend cluster is recovering after a miss-speculation or a Store Buffer or Load Buffer drain stall.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x3"
+ },
+ {
+ "BriefDescription": "Clears speculative count",
+ "CounterMask": "1",
+ "EdgeDetect": "1",
+ "EventCode": "0x0D",
+ "EventName": "INT_MISC.CLEARS_COUNT",
+ "PublicDescription": "Counts the number of speculative clears due to any type of branch misprediction or machine clears",
+ "SampleAfterValue": "500009",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts cycles after recovery from a branch misprediction or machine clear till the first uop is issued from the resteered path.",
+ "EventCode": "0x0d",
+ "EventName": "INT_MISC.CLEAR_RESTEER_CYCLES",
+ "PublicDescription": "Cycles after recovery from a branch misprediction or machine clear till the first uop is issued from the resteered path.",
+ "SampleAfterValue": "500009",
+ "UMask": "0x80"
+ },
+ {
+ "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread",
+ "EventCode": "0x0D",
+ "EventName": "INT_MISC.RECOVERY_CYCLES",
+ "PublicDescription": "Counts core cycles when the Resource allocator was stalled due to recovery from an earlier branch misprediction or machine clear event.",
+ "SampleAfterValue": "500009",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "TMA slots where uops got dropped",
+ "EventCode": "0x0d",
+ "EventName": "INT_MISC.UOP_DROPPING",
+ "PublicDescription": "Estimated number of Top-down Microarchitecture Analysis slots that got dropped due to non front-end reasons",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x10"
+ },
+ {
+ "BriefDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
+ "EventCode": "0x03",
+ "EventName": "LD_BLOCKS.NO_SR",
+ "PublicDescription": "Counts the number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x8"
+ },
+ {
+ "BriefDescription": "Loads blocked due to overlapping with a preceding store that cannot be forwarded.",
+ "EventCode": "0x03",
+ "EventName": "LD_BLOCKS.STORE_FORWARD",
+ "PublicDescription": "Counts the number of times where store forwarding was prevented for a load operation. The most common case is a load blocked due to the address of memory access (partially) overlapping with a preceding uncompleted store. Note: See the table of not supported store forwards in the Optimization Guide.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "False dependencies due to partial compare on address.",
+ "EventCode": "0x07",
+ "EventName": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS",
+ "PublicDescription": "Counts the number of times a load got blocked due to false dependencies due to partial compare on address.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts the number of demand load dispatches that hit L1D fill buffer (FB) allocated for software prefetch.",
+ "EventCode": "0x4c",
+ "EventName": "LOAD_HIT_PREFETCH.SWPF",
+ "PublicDescription": "Counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.",
+ "CounterMask": "1",
+ "EventCode": "0xA8",
+ "EventName": "LSD.CYCLES_ACTIVE",
+ "PublicDescription": "Counts the cycles when at least one uop is delivered by the LSD (Loop-stream detector).",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Cycles optimal number of Uops delivered by the LSD, but did not come from the decoder.",
+ "CounterMask": "5",
+ "EventCode": "0xa8",
+ "EventName": "LSD.CYCLES_OK",
+ "PublicDescription": "Counts the cycles when optimal number of uops is delivered by the LSD (Loop-stream detector).",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Number of Uops delivered by the LSD.",
+ "EventCode": "0xa8",
+ "EventName": "LSD.UOPS",
+ "PublicDescription": "Counts the number of uops delivered to the back-end by the LSD(Loop Stream Detector).",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Number of machine clears (nukes) of any type.",
+ "CounterMask": "1",
+ "EdgeDetect": "1",
+ "EventCode": "0xc3",
+ "EventName": "MACHINE_CLEARS.COUNT",
+ "PublicDescription": "Counts the number of machine clears (nukes) of any type.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Self-modifying code (SMC) detected.",
+ "EventCode": "0xc3",
+ "EventName": "MACHINE_CLEARS.SMC",
+ "PublicDescription": "Counts self-modifying code (SMC) detected, which causes a machine clear.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Increments whenever there is an update to the LBR array.",
+ "EventCode": "0xcc",
+ "EventName": "MISC_RETIRED.LBR_INSERTS",
+ "PublicDescription": "Increments when an entry is added to the Last Branch Record (LBR) array (or removed from the array in case of RETURNs in call stack mode). The event requires LBR to be enabled properly.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x20"
+ },
+ {
+ "BriefDescription": "Number of retired PAUSE instructions. This event is not supported on first SKL and KBL products.",
+ "EventCode": "0xcc",
+ "EventName": "MISC_RETIRED.PAUSE_INST",
+ "PublicDescription": "Counts number of retired PAUSE instructions. This event is not supported on first SKL and KBL products.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x40"
+ },
+ {
+ "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).",
+ "EventCode": "0xa2",
+ "EventName": "RESOURCE_STALLS.SB",
+ "PublicDescription": "Counts allocation stall cycles caused by the store buffer (SB) being full. This counts cycles that the pipeline back-end blocked uop delivery from the front-end.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x8"
+ },
+ {
+ "BriefDescription": "Counts cycles where the pipeline is stalled due to serializing operations.",
+ "EventCode": "0xa2",
+ "EventName": "RESOURCE_STALLS.SCOREBOARD",
+ "SampleAfterValue": "100003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread",
+ "EventCode": "0x5e",
+ "EventName": "RS_EVENTS.EMPTY_CYCLES",
+ "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for this logical processor. This is usually caused when the front-end pipeline runs into stravation periods (e.g. branch mispredictions or i-cache misses)",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty.",
+ "CounterMask": "1",
+ "EdgeDetect": "1",
+ "EventCode": "0x5E",
+ "EventName": "RS_EVENTS.EMPTY_END",
+ "Invert": "1",
+ "PublicDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to closely sample on front-end latency issues (see the FRONTEND_RETIRED event of designated precise events)",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "TMA slots where no uops were being issued due to lack of back-end resources.",
+ "EventCode": "0xa4",
+ "EventName": "TOPDOWN.BACKEND_BOUND_SLOTS",
+ "PublicDescription": "Counts the number of Top-down Microarchitecture Analysis (TMA) method's slots where no micro-operations were being issued from front-end to back-end of the machine due to lack of back-end resources.",
+ "SampleAfterValue": "10000003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "TMA slots wasted due to incorrect speculation by branch mispredictions",
+ "EventCode": "0xa4",
+ "EventName": "TOPDOWN.BR_MISPREDICT_SLOTS",
+ "PublicDescription": "Number of TMA slots that were wasted due to incorrect speculation by branch mispredictions. This event estimates number of operations that were issued but not retired from the speculative path as well as the out-of-order engine recovery past a branch misprediction.",
+ "SampleAfterValue": "10000003",
+ "UMask": "0x8"
+ },
+ {
+ "BriefDescription": "TMA slots available for an unhalted logical processor. Fixed counter - architectural event",
+ "EventName": "TOPDOWN.SLOTS",
+ "PublicDescription": "Number of available slots for an unhalted logical processor. The event increments by machine-width of the narrowest pipeline as employed by the Top-down Microarchitecture Analysis method (TMA). The count is distributed among unhalted logical processors (hyper-threads) who share the same physical core. Software can use this event as the denominator for the top-level metrics of the TMA method. This architectural event is counted on a designated fixed counter (Fixed Counter 3).",
+ "SampleAfterValue": "10000003",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "TMA slots available for an unhalted logical processor. General counter - architectural event",
+ "EventCode": "0xa4",
+ "EventName": "TOPDOWN.SLOTS_P",
+ "PublicDescription": "Counts the number of available slots for an unhalted logical processor. The event increments by machine-width of the narrowest pipeline as employed by the Top-down Microarchitecture Analysis method. The count is distributed among unhalted logical processors (hyper-threads) who share the same physical core.",
+ "SampleAfterValue": "10000003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Number of uops decoded out of instructions exclusively fetched by decoder 0",
+ "EventCode": "0x56",
+ "EventName": "UOPS_DECODED.DEC0",
+ "PublicDescription": "Uops exclusively fetched by decoder 0",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Number of uops executed on port 0",
+ "EventCode": "0xa1",
+ "EventName": "UOPS_DISPATCHED.PORT_0",
+ "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 0.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Number of uops executed on port 1",
+ "EventCode": "0xa1",
+ "EventName": "UOPS_DISPATCHED.PORT_1",
+ "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 1.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Number of uops executed on port 2 and 3",
+ "EventCode": "0xa1",
+ "EventName": "UOPS_DISPATCHED.PORT_2_3",
+ "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to ports 2 and 3.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Number of uops executed on port 4 and 9",
+ "EventCode": "0xa1",
+ "EventName": "UOPS_DISPATCHED.PORT_4_9",
+ "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to ports 5 and 9.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x10"
+ },
+ {
+ "BriefDescription": "Number of uops executed on port 5",
+ "EventCode": "0xa1",
+ "EventName": "UOPS_DISPATCHED.PORT_5",
+ "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 5.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x20"
+ },
+ {
+ "BriefDescription": "Number of uops executed on port 6",
+ "EventCode": "0xa1",
+ "EventName": "UOPS_DISPATCHED.PORT_6",
+ "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 6.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x40"
+ },
+ {
+ "BriefDescription": "Number of uops executed on port 7 and 8",
+ "EventCode": "0xa1",
+ "EventName": "UOPS_DISPATCHED.PORT_7_8",
+ "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to ports 7 and 8.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x80"
+ },
+ {
+ "BriefDescription": "Number of uops executed on the core.",
+ "EventCode": "0xB1",
+ "EventName": "UOPS_EXECUTED.CORE",
+ "PublicDescription": "Counts the number of uops executed from any thread.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.",
+ "CounterMask": "1",
+ "EventCode": "0xB1",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1",
+ "PublicDescription": "Counts cycles when at least 1 micro-op is executed from any thread on physical core.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.",
+ "CounterMask": "2",
+ "EventCode": "0xB1",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2",
+ "PublicDescription": "Counts cycles when at least 2 micro-ops are executed from any thread on physical core.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.",
+ "CounterMask": "3",
+ "EventCode": "0xB1",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3",
+ "PublicDescription": "Counts cycles when at least 3 micro-ops are executed from any thread on physical core.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.",
+ "CounterMask": "4",
+ "EventCode": "0xB1",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4",
+ "PublicDescription": "Counts cycles when at least 4 micro-ops are executed from any thread on physical core.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Cycles where at least 1 uop was executed per-thread",
+ "CounterMask": "1",
+ "EventCode": "0xb1",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_1",
+ "PublicDescription": "Cycles where at least 1 uop was executed per-thread.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Cycles where at least 2 uops were executed per-thread",
+ "CounterMask": "2",
+ "EventCode": "0xb1",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_2",
+ "PublicDescription": "Cycles where at least 2 uops were executed per-thread.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Cycles where at least 3 uops were executed per-thread",
+ "CounterMask": "3",
+ "EventCode": "0xb1",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_3",
+ "PublicDescription": "Cycles where at least 3 uops were executed per-thread.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Cycles where at least 4 uops were executed per-thread",
+ "CounterMask": "4",
+ "EventCode": "0xb1",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_4",
+ "PublicDescription": "Cycles where at least 4 uops were executed per-thread.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.",
+ "CounterMask": "1",
+ "EventCode": "0xB1",
+ "EventName": "UOPS_EXECUTED.STALL_CYCLES",
+ "Invert": "1",
+ "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.",
+ "EventCode": "0xb1",
+ "EventName": "UOPS_EXECUTED.THREAD",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts the number of x87 uops dispatched.",
+ "EventCode": "0xB1",
+ "EventName": "UOPS_EXECUTED.X87",
+ "PublicDescription": "Counts the number of x87 uops executed.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x10"
+ },
+ {
+ "BriefDescription": "Uops that RAT issues to RS",
+ "EventCode": "0x0e",
+ "EventName": "UOPS_ISSUED.ANY",
+ "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS).",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Cycles when RAT does not issue Uops to RS for the thread",
+ "CounterMask": "1",
+ "EventCode": "0x0E",
+ "EventName": "UOPS_ISSUED.STALL_CYCLES",
+ "Invert": "1",
+ "PublicDescription": "Counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Uops inserted at issue-stage in order to preserve upper bits of vector registers.",
+ "EventCode": "0x0e",
+ "EventName": "UOPS_ISSUED.VECTOR_WIDTH_MISMATCH",
+ "PublicDescription": "Counts the number of Blend Uops issued by the Resource Allocation Table (RAT) to the reservation station (RS) in order to preserve upper bits of vector registers. Starting with the Skylake microarchitecture, these Blend uops are needed since every Intel SSE instruction executed in Dirty Upper State needs to preserve bits 128-255 of the destination register. For more information, refer to 'Mixing Intel AVX and Intel SSE Code' section of the Optimization Guide.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Retirement slots used.",
+ "EventCode": "0xc2",
+ "EventName": "UOPS_RETIRED.SLOTS",
+ "PublicDescription": "Counts the retirement slots used each cycle.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Cycles without actually retired uops.",
+ "CounterMask": "1",
+ "EventCode": "0xc2",
+ "EventName": "UOPS_RETIRED.STALL_CYCLES",
+ "Invert": "1",
+ "PublicDescription": "This event counts cycles without actually retired uops.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Cycles with less than 10 actually retired uops.",
+ "CounterMask": "10",
+ "EventCode": "0xc2",
+ "EventName": "UOPS_RETIRED.TOTAL_CYCLES",
+ "Invert": "1",
+ "PublicDescription": "Counts the number of cycles using always true condition (uops_ret < 16) applied to non PEBS uops retired event.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/rocketlake/rkl-metrics.json b/tools/perf/pmu-events/arch/x86/rocketlake/rkl-metrics.json
new file mode 100644
index 000000000000..1bb9cededa56
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/rocketlake/rkl-metrics.json
@@ -0,0 +1,1571 @@
+[
+ {
+ "BriefDescription": "C10 residency percent per package",
+ "MetricExpr": "cstate_pkg@c10\\-residency@ / TSC",
+ "MetricGroup": "Power",
+ "MetricName": "C10_Pkg_Residency",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "C2 residency percent per package",
+ "MetricExpr": "cstate_pkg@c2\\-residency@ / TSC",
+ "MetricGroup": "Power",
+ "MetricName": "C2_Pkg_Residency",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "C3 residency percent per package",
+ "MetricExpr": "cstate_pkg@c3\\-residency@ / TSC",
+ "MetricGroup": "Power",
+ "MetricName": "C3_Pkg_Residency",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "C6 residency percent per core",
+ "MetricExpr": "cstate_core@c6\\-residency@ / TSC",
+ "MetricGroup": "Power",
+ "MetricName": "C6_Core_Residency",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "C6 residency percent per package",
+ "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC",
+ "MetricGroup": "Power",
+ "MetricName": "C6_Pkg_Residency",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "C7 residency percent per core",
+ "MetricExpr": "cstate_core@c7\\-residency@ / TSC",
+ "MetricGroup": "Power",
+ "MetricName": "C7_Core_Residency",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "C7 residency percent per package",
+ "MetricExpr": "cstate_pkg@c7\\-residency@ / TSC",
+ "MetricGroup": "Power",
+ "MetricName": "C7_Pkg_Residency",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "C8 residency percent per package",
+ "MetricExpr": "cstate_pkg@c8\\-residency@ / TSC",
+ "MetricGroup": "Power",
+ "MetricName": "C8_Pkg_Residency",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "C9 residency percent per package",
+ "MetricExpr": "cstate_pkg@c9\\-residency@ / TSC",
+ "MetricGroup": "Power",
+ "MetricName": "C9_Pkg_Residency",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Uncore frequency per die [GHZ]",
+ "MetricExpr": "tma_info_system_socket_clks / #num_dies / duration_time / 1e9",
+ "MetricGroup": "SoC",
+ "MetricName": "UNCORE_FREQ"
+ },
+ {
+ "BriefDescription": "Percentage of cycles spent in System Management Interrupts.",
+ "MetricExpr": "((msr@aperf@ - cycles) / msr@aperf@ if msr@smi@ > 0 else 0)",
+ "MetricGroup": "smi",
+ "MetricName": "smi_cycles",
+ "MetricThreshold": "smi_cycles > 0.1",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Number of SMI interrupts.",
+ "MetricExpr": "msr@smi@",
+ "MetricGroup": "smi",
+ "MetricName": "smi_num",
+ "ScaleUnit": "1SMI#"
+ },
+ {
+ "BriefDescription": "This metric estimates how often memory load accesses were aliased by preceding stores (in program order) with a 4K address offset",
+ "MetricExpr": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS / tma_info_thread_clks",
+ "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
+ "MetricName": "tma_4k_aliasing",
+ "MetricThreshold": "tma_4k_aliasing > 0.2 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
+ "PublicDescription": "This metric estimates how often memory load accesses were aliased by preceding stores (in program order) with a 4K address offset. False match is possible; which incur a few cycles load re-issue. However; the short re-issue duration is often hidden by the out-of-order core and HW optimizations; hence a user may safely ignore a high value of this metric unless it manages to propagate up into parent nodes of the hierarchy (e.g. to L1_Bound).",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution ports for ALU operations.",
+ "MetricExpr": "(UOPS_DISPATCHED.PORT_0 + UOPS_DISPATCHED.PORT_1 + UOPS_DISPATCHED.PORT_5 + UOPS_DISPATCHED.PORT_6) / (4 * tma_info_core_core_clks)",
+ "MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
+ "MetricName": "tma_alu_op_utilization",
+ "MetricThreshold": "tma_alu_op_utilization > 0.6",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric estimates fraction of slots the CPU retired uops delivered by the Microcode_Sequencer as a result of Assists",
+ "MetricExpr": "100 * ASSISTS.ANY / tma_info_thread_slots",
+ "MetricGroup": "TopdownL4;tma_L4_group;tma_microcode_sequencer_group",
+ "MetricName": "tma_assists",
+ "MetricThreshold": "tma_assists > 0.1 & (tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1)",
+ "PublicDescription": "This metric estimates fraction of slots the CPU retired uops delivered by the Microcode_Sequencer as a result of Assists. Assists are long sequences of uops that are required in certain corner-cases for operations that cannot be handled natively by the execution pipeline. For example; when working with very small floating point values (so-called Denormals); the FP units are not set up to perform these operations natively. Instead; a sequence of instructions to perform the computation on the Denormals is injected into the pipeline. Since these microcode sequences might be dozens of uops long; Assists can be extremely deleterious to performance and they can be avoided in many cases. Sample with: ASSISTS.ANY",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
+ "DefaultMetricgroupName": "TopdownL1",
+ "MetricExpr": "topdown\\-be\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 5 * cpu@INT_MISC.RECOVERY_CYCLES\\,cmask\\=1\\,edge@ / tma_info_thread_slots",
+ "MetricGroup": "Default;TmaL1;TopdownL1;tma_L1_group",
+ "MetricName": "tma_backend_bound",
+ "MetricThreshold": "tma_backend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL1;Default",
+ "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. Sample with: TOPDOWN.BACKEND_BOUND_SLOTS",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations",
+ "DefaultMetricgroupName": "TopdownL1",
+ "MetricExpr": "max(1 - (tma_frontend_bound + tma_backend_bound + tma_retiring), 0)",
+ "MetricGroup": "Default;TmaL1;TopdownL1;tma_L1_group",
+ "MetricName": "tma_bad_speculation",
+ "MetricThreshold": "tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL1;Default",
+ "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of slots where the CPU was retiring branch instructions.",
+ "MetricExpr": "tma_light_operations * BR_INST_RETIRED.ALL_BRANCHES / (tma_retiring * tma_info_thread_slots)",
+ "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
+ "MetricName": "tma_branch_instructions",
+ "MetricThreshold": "tma_branch_instructions > 0.1 & tma_light_operations > 0.6",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction",
+ "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT) * tma_bad_speculation",
+ "MetricGroup": "BadSpec;BrMispredicts;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueBM",
+ "MetricName": "tma_branch_mispredicts",
+ "MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
+ "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES. Related metrics: tma_info_bad_spec_branch_misprediction_cost, tma_info_bottleneck_mispredictions, tma_mispredicts_resteers",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers",
+ "MetricExpr": "INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_thread_clks + tma_unknown_branches",
+ "MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group",
+ "MetricName": "tma_branch_resteers",
+ "MetricThreshold": "tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
+ "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers. Branch Resteers estimates the Frontend delay in fetching operations from corrected path; following all sorts of miss-predicted branches. For example; branchy code with lots of miss-predictions might get categorized under Branch Resteers. Note the value of this node may overlap with its siblings. Sample with: BR_MISP_RETIRED.ALL_BRANCHES",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric estimates fraction of cycles the CPU retired uops originated from CISC (complex instruction set computer) instruction",
+ "MetricExpr": "max(0, tma_microcode_sequencer - tma_assists)",
+ "MetricGroup": "TopdownL4;tma_L4_group;tma_microcode_sequencer_group",
+ "MetricName": "tma_cisc",
+ "MetricThreshold": "tma_cisc > 0.1 & (tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1)",
+ "PublicDescription": "This metric estimates fraction of cycles the CPU retired uops originated from CISC (complex instruction set computer) instruction. A CISC instruction has multiple uops that are required to perform the instruction's functionality as in the case of read-modify-write as an example. Since these instructions require multiple uops they may or may not imply sub-optimal use of machine resources.",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers as a result of Machine Clears",
+ "MetricExpr": "(1 - BR_MISP_RETIRED.ALL_BRANCHES / (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT)) * INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_thread_clks",
+ "MetricGroup": "BadSpec;MachineClears;TopdownL4;tma_L4_group;tma_branch_resteers_group;tma_issueMC",
+ "MetricName": "tma_clears_resteers",
+ "MetricThreshold": "tma_clears_resteers > 0.05 & (tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15))",
+ "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers as a result of Machine Clears. Sample with: INT_MISC.CLEAR_RESTEER_CYCLES. Related metrics: tma_l1_bound, tma_machine_clears, tma_microcode_sequencer, tma_ms_switches",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to contested accesses",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "(29 * tma_info_system_average_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM + 23.5 * tma_info_system_average_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
+ "MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
+ "MetricName": "tma_contested_accesses",
+ "MetricThreshold": "tma_contested_accesses > 0.05 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
+ "PublicDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to contested accesses. Contested accesses occur when data written by one Logical Processor are read by another Logical Processor on a different Physical Core. Examples of contested accesses include synchronizations such as locks; true data sharing such as modified locked variables; and false sharing. Sample with: MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM_PS;MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS_PS. Related metrics: tma_data_sharing, tma_false_sharing, tma_machine_clears, tma_remote_cache",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of slots where Core non-memory issues were of a bottleneck",
+ "MetricExpr": "max(0, tma_backend_bound - tma_memory_bound)",
+ "MetricGroup": "Backend;Compute;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
+ "MetricName": "tma_core_bound",
+ "MetricThreshold": "tma_core_bound > 0.1 & tma_backend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL2",
+ "PublicDescription": "This metric represents fraction of slots where Core non-memory issues were of a bottleneck. Shortage in hardware compute resources; or dependencies in software's instructions are both categorized under Core Bound. Hence it may indicate the machine ran out of an out-of-order resource; certain execution units are overloaded or dependencies in program's data- or instruction-flow are limiting the performance (e.g. FP-chained long-latency arithmetic operations).",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to data-sharing accesses",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "23.5 * tma_info_system_average_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
+ "MetricGroup": "Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
+ "MetricName": "tma_data_sharing",
+ "MetricThreshold": "tma_data_sharing > 0.05 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
+ "PublicDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to data-sharing accesses. Data shared by multiple Logical Processors (even just read shared) may cause increased access latency due to cache coherency. Excessive data sharing can drastically harm multithreaded performance. Sample with: MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT_PS. Related metrics: tma_contested_accesses, tma_false_sharing, tma_machine_clears, tma_remote_cache",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of cycles where decoder-0 was the only active decoder",
+ "MetricExpr": "(cpu@INST_DECODED.DECODERS\\,cmask\\=1@ - cpu@INST_DECODED.DECODERS\\,cmask\\=2@) / tma_info_core_core_clks / 2",
+ "MetricGroup": "DSBmiss;FetchBW;TopdownL4;tma_L4_group;tma_issueD0;tma_mite_group",
+ "MetricName": "tma_decoder0_alone",
+ "MetricThreshold": "tma_decoder0_alone > 0.1 & (tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 5 > 0.35))",
+ "PublicDescription": "This metric represents fraction of cycles where decoder-0 was the only active decoder. Related metrics: tma_few_uops_instructions",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of cycles where the Divider unit was active",
+ "MetricExpr": "ARITH.DIVIDER_ACTIVE / tma_info_thread_clks",
+ "MetricGroup": "TopdownL3;tma_L3_group;tma_core_bound_group",
+ "MetricName": "tma_divider",
+ "MetricThreshold": "tma_divider > 0.2 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
+ "PublicDescription": "This metric represents fraction of cycles where the Divider unit was active. Divide and square root instructions are performed by the Divider unit and can take considerably longer latency than integer or Floating Point addition; subtraction; or multiplication. Sample with: ARITH.DIVIDER_ACTIVE",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric estimates how often the CPU was stalled on accesses to external memory (DRAM) by loads",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks - tma_l2_bound",
+ "MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
+ "MetricName": "tma_dram_bound",
+ "MetricThreshold": "tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
+ "PublicDescription": "This metric estimates how often the CPU was stalled on accesses to external memory (DRAM) by loads. Better caching can improve the latency and increase performance. Sample with: MEM_LOAD_RETIRED.L3_MISS_PS",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline",
+ "MetricExpr": "(IDQ.DSB_CYCLES_ANY - IDQ.DSB_CYCLES_OK) / tma_info_core_core_clks / 2",
+ "MetricGroup": "DSB;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
+ "MetricName": "tma_dsb",
+ "MetricThreshold": "tma_dsb > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 5 > 0.35)",
+ "PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline. For example; inefficient utilization of the DSB cache structure or bank conflict when reading from it; are categorized here.",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines",
+ "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / tma_info_thread_clks",
+ "MetricGroup": "DSBmiss;FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB",
+ "MetricName": "tma_dsb_switches",
+ "MetricThreshold": "tma_dsb_switches > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
+ "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines. The DSB (decoded i-cache) is a Uop Cache where the front-end directly delivers Uops (micro operations) avoiding heavy x86 decoding. The DSB pipeline has shorter latency and delivered higher bandwidth than the MITE (legacy instruction decode pipeline). Switching between the two pipelines can cause penalties hence this metric measures the exposed penalty. Sample with: FRONTEND_RETIRED.DSB_MISS_PS. Related metrics: tma_fetch_bandwidth, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses",
+ "MetricExpr": "min(7 * cpu@DTLB_LOAD_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_LOAD_MISSES.WALK_ACTIVE, max(CYCLE_ACTIVITY.CYCLES_MEM_ANY - CYCLE_ACTIVITY.CYCLES_L1D_MISS, 0)) / tma_info_thread_clks",
+ "MetricGroup": "MemoryTLB;TopdownL4;tma_L4_group;tma_issueTLB;tma_l1_bound_group",
+ "MetricName": "tma_dtlb_load",
+ "MetricThreshold": "tma_dtlb_load > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
+ "PublicDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses. TLBs (Translation Look-aside Buffers) are processor caches for recently used entries out of the Page Tables that are used to map virtual- to physical-addresses by the operating system. This metric approximates the potential delay of demand loads missing the first-level data TLB (assuming worst case scenario with back to back misses to different pages). This includes hitting in the second-level TLB (STLB) as well as performing a hardware page walk on an STLB miss. Sample with: MEM_INST_RETIRED.STLB_MISS_LOADS_PS. Related metrics: tma_dtlb_store, tma_info_bottleneck_memory_data_tlbs",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses",
+ "MetricExpr": "(7 * cpu@DTLB_STORE_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_STORE_MISSES.WALK_ACTIVE) / tma_info_core_core_clks",
+ "MetricGroup": "MemoryTLB;TopdownL4;tma_L4_group;tma_issueTLB;tma_store_bound_group",
+ "MetricName": "tma_dtlb_store",
+ "MetricThreshold": "tma_dtlb_store > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
+ "PublicDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses. As with ordinary data caching; focus on improving data locality and reducing working-set size to reduce DTLB overhead. Additionally; consider using profile-guided optimization (PGO) to collocate frequently-used data on the same page. Try using larger page sizes for large amounts of frequently-used data. Sample with: MEM_INST_RETIRED.STLB_MISS_STORES_PS. Related metrics: tma_dtlb_load, tma_info_bottleneck_memory_data_tlbs",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric roughly estimates how often CPU was handling synchronizations due to False Sharing",
+ "MetricExpr": "32.5 * tma_info_system_average_frequency * OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM / tma_info_thread_clks",
+ "MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_store_bound_group",
+ "MetricName": "tma_false_sharing",
+ "MetricThreshold": "tma_false_sharing > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
+ "PublicDescription": "This metric roughly estimates how often CPU was handling synchronizations due to False Sharing. False Sharing is a multithreading hiccup; where multiple Logical Processors contend on different data-elements mapped into the same cache line. Sample with: OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM. Related metrics: tma_contested_accesses, tma_data_sharing, tma_machine_clears, tma_remote_cache",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed",
+ "MetricExpr": "L1D_PEND_MISS.FB_FULL / tma_info_thread_clks",
+ "MetricGroup": "MemoryBW;TopdownL4;tma_L4_group;tma_issueBW;tma_issueSL;tma_issueSmSt;tma_l1_bound_group",
+ "MetricName": "tma_fb_full",
+ "MetricThreshold": "tma_fb_full > 0.3",
+ "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_info_bottleneck_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues",
+ "MetricExpr": "max(0, tma_frontend_bound - tma_fetch_latency)",
+ "MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB",
+ "MetricName": "tma_fetch_bandwidth",
+ "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 5 > 0.35",
+ "MetricgroupNoGroup": "TopdownL2",
+ "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Sample with: FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_2_PS. Related metrics: tma_dsb_switches, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues",
+ "MetricExpr": "(5 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE - INT_MISC.UOP_DROPPING) / tma_info_thread_slots",
+ "MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group",
+ "MetricName": "tma_fetch_latency",
+ "MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
+ "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues. For example; instruction-cache misses; iTLB misses or fetch stalls after a branch misprediction are categorized under Frontend Latency. In such cases; the Frontend eventually delivers no uops for some period. Sample with: FRONTEND_RETIRED.LATENCY_GE_16_PS;FRONTEND_RETIRED.LATENCY_GE_8_PS",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of slots where the CPU was retiring instructions that that are decoder into two or up to ([SNB+] four; [ADL+] five) uops",
+ "MetricExpr": "tma_heavy_operations - tma_microcode_sequencer",
+ "MetricGroup": "TopdownL3;tma_L3_group;tma_heavy_operations_group;tma_issueD0",
+ "MetricName": "tma_few_uops_instructions",
+ "MetricThreshold": "tma_few_uops_instructions > 0.05 & tma_heavy_operations > 0.1",
+ "PublicDescription": "This metric represents fraction of slots where the CPU was retiring instructions that that are decoder into two or up to ([SNB+] four; [ADL+] five) uops. This highly-correlates with the number of uops in such instructions. Related metrics: tma_decoder0_alone",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric represents overall arithmetic floating-point (FP) operations fraction the CPU has executed (retired)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "tma_x87_use + tma_fp_scalar + tma_fp_vector",
+ "MetricGroup": "HPC;TopdownL3;tma_L3_group;tma_light_operations_group",
+ "MetricName": "tma_fp_arith",
+ "MetricThreshold": "tma_fp_arith > 0.2 & tma_light_operations > 0.6",
+ "PublicDescription": "This metric represents overall arithmetic floating-point (FP) operations fraction the CPU has executed (retired). Note this metric's value may exceed its parent due to use of \"Uops\" CountDomain and FMA double-counting.",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric approximates arithmetic floating-point (FP) scalar uops fraction the CPU has retired",
+ "MetricExpr": "cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ / (tma_retiring * tma_info_thread_slots)",
+ "MetricGroup": "Compute;Flops;TopdownL4;tma_L4_group;tma_fp_arith_group;tma_issue2P",
+ "MetricName": "tma_fp_scalar",
+ "MetricThreshold": "tma_fp_scalar > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6)",
+ "PublicDescription": "This metric approximates arithmetic floating-point (FP) scalar uops fraction the CPU has retired. May overcount due to FMA double counting. Related metrics: tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_5, tma_port_6, tma_ports_utilized_2",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric approximates arithmetic floating-point (FP) vector uops fraction the CPU has retired aggregated across all vector widths",
+ "MetricExpr": "cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0xfc@ / (tma_retiring * tma_info_thread_slots)",
+ "MetricGroup": "Compute;Flops;TopdownL4;tma_L4_group;tma_fp_arith_group;tma_issue2P",
+ "MetricName": "tma_fp_vector",
+ "MetricThreshold": "tma_fp_vector > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6)",
+ "PublicDescription": "This metric approximates arithmetic floating-point (FP) vector uops fraction the CPU has retired aggregated across all vector widths. May overcount due to FMA double counting. Related metrics: tma_fp_scalar, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_5, tma_port_6, tma_ports_utilized_2",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric approximates arithmetic FP vector uops fraction the CPU has retired for 128-bit wide vectors",
+ "MetricExpr": "(FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE) / (tma_retiring * tma_info_thread_slots)",
+ "MetricGroup": "Compute;Flops;TopdownL5;tma_L5_group;tma_fp_vector_group;tma_issue2P",
+ "MetricName": "tma_fp_vector_128b",
+ "MetricThreshold": "tma_fp_vector_128b > 0.1 & (tma_fp_vector > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6))",
+ "PublicDescription": "This metric approximates arithmetic FP vector uops fraction the CPU has retired for 128-bit wide vectors. May overcount due to FMA double counting. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_5, tma_port_6, tma_ports_utilized_2",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric approximates arithmetic FP vector uops fraction the CPU has retired for 256-bit wide vectors",
+ "MetricExpr": "(FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE) / (tma_retiring * tma_info_thread_slots)",
+ "MetricGroup": "Compute;Flops;TopdownL5;tma_L5_group;tma_fp_vector_group;tma_issue2P",
+ "MetricName": "tma_fp_vector_256b",
+ "MetricThreshold": "tma_fp_vector_256b > 0.1 & (tma_fp_vector > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6))",
+ "PublicDescription": "This metric approximates arithmetic FP vector uops fraction the CPU has retired for 256-bit wide vectors. May overcount due to FMA double counting. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_5, tma_port_6, tma_ports_utilized_2",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric approximates arithmetic FP vector uops fraction the CPU has retired for 512-bit wide vectors",
+ "MetricExpr": "(FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / (tma_retiring * tma_info_thread_slots)",
+ "MetricGroup": "Compute;Flops;TopdownL5;tma_L5_group;tma_fp_vector_group;tma_issue2P",
+ "MetricName": "tma_fp_vector_512b",
+ "MetricThreshold": "tma_fp_vector_512b > 0.1 & (tma_fp_vector > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6))",
+ "PublicDescription": "This metric approximates arithmetic FP vector uops fraction the CPU has retired for 512-bit wide vectors. May overcount due to FMA double counting. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_port_0, tma_port_1, tma_port_5, tma_port_6, tma_ports_utilized_2",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend",
+ "DefaultMetricgroupName": "TopdownL1",
+ "MetricExpr": "topdown\\-fe\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) - INT_MISC.UOP_DROPPING / tma_info_thread_slots",
+ "MetricGroup": "Default;PGO;TmaL1;TopdownL1;tma_L1_group",
+ "MetricName": "tma_frontend_bound",
+ "MetricThreshold": "tma_frontend_bound > 0.15",
+ "MetricgroupNoGroup": "TopdownL1;Default",
+ "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Pipeline_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. Sample with: FRONTEND_RETIRED.LATENCY_GE_4_PS",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences",
+ "MetricExpr": "tma_microcode_sequencer + tma_retiring * (UOPS_DECODED.DEC0 - cpu@UOPS_DECODED.DEC0\\,cmask\\=1@) / IDQ.MITE_UOPS",
+ "MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group",
+ "MetricName": "tma_heavy_operations",
+ "MetricThreshold": "tma_heavy_operations > 0.1",
+ "MetricgroupNoGroup": "TopdownL2",
+ "PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences.",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to instruction cache misses",
+ "MetricExpr": "ICACHE_16B.IFDATA_STALL / tma_info_thread_clks",
+ "MetricGroup": "BigFoot;FetchLat;IcMiss;TopdownL3;tma_L3_group;tma_fetch_latency_group",
+ "MetricName": "tma_icache_misses",
+ "MetricThreshold": "tma_icache_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
+ "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to instruction cache misses. Sample with: FRONTEND_RETIRED.L2_MISS_PS;FRONTEND_RETIRED.L1I_MISS_PS",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_thread_slots / BR_MISP_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;BrMispredicts;tma_issueBM",
+ "MetricName": "tma_info_bad_spec_branch_misprediction_cost",
+ "PublicDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear). Related metrics: tma_branch_mispredicts, tma_info_bottleneck_mispredictions, tma_mispredicts_resteers"
+ },
+ {
+ "BriefDescription": "Instructions per retired mispredicts for conditional non-taken branches (lower number means higher occurrence rate).",
+ "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.COND_NTAKEN",
+ "MetricGroup": "Bad;BrMispredicts",
+ "MetricName": "tma_info_bad_spec_ipmisp_cond_ntaken",
+ "MetricThreshold": "tma_info_bad_spec_ipmisp_cond_ntaken < 200"
+ },
+ {
+ "BriefDescription": "Instructions per retired mispredicts for conditional taken branches (lower number means higher occurrence rate).",
+ "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.COND_TAKEN",
+ "MetricGroup": "Bad;BrMispredicts",
+ "MetricName": "tma_info_bad_spec_ipmisp_cond_taken",
+ "MetricThreshold": "tma_info_bad_spec_ipmisp_cond_taken < 200"
+ },
+ {
+ "BriefDescription": "Instructions per retired mispredicts for indirect CALL or JMP branches (lower number means higher occurrence rate).",
+ "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.INDIRECT",
+ "MetricGroup": "Bad;BrMispredicts",
+ "MetricName": "tma_info_bad_spec_ipmisp_indirect",
+ "MetricThreshold": "tma_info_bad_spec_ipmisp_indirect < 1e3"
+ },
+ {
+ "BriefDescription": "Instructions per retired mispredicts for return branches (lower number means higher occurrence rate).",
+ "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.RET",
+ "MetricGroup": "Bad;BrMispredicts",
+ "MetricName": "tma_info_bad_spec_ipmisp_ret",
+ "MetricThreshold": "tma_info_bad_spec_ipmisp_ret < 500"
+ },
+ {
+ "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear) (lower number means higher occurrence rate)",
+ "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;BadSpec;BrMispredicts",
+ "MetricName": "tma_info_bad_spec_ipmispredict",
+ "MetricThreshold": "tma_info_bad_spec_ipmispredict < 200"
+ },
+ {
+ "BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "(100 * (1 - tma_core_bound / tma_ports_utilization if tma_core_bound < tma_ports_utilization else 1) if tma_info_system_smt_2t_utilization > 0.5 else 0)",
+ "MetricGroup": "Cor;SMT",
+ "MetricName": "tma_info_botlnk_l0_core_bound_likely",
+ "MetricThreshold": "tma_info_botlnk_l0_core_bound_likely > 0.5"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_fetch_latency * tma_dsb_switches / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_mite / (tma_dsb + tma_lsd + tma_mite))",
+ "MetricGroup": "DSBmiss;Fed;tma_issueFB",
+ "MetricName": "tma_info_botlnk_l2_dsb_misses",
+ "MetricThreshold": "tma_info_botlnk_l2_dsb_misses > 10",
+ "PublicDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck",
+ "MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
+ "MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL",
+ "MetricName": "tma_info_botlnk_l2_ic_misses",
+ "MetricThreshold": "tma_info_botlnk_l2_ic_misses > 5",
+ "PublicDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck. Related metrics: "
+ },
+ {
+ "BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * tma_fetch_latency * (tma_itlb_misses + tma_icache_misses + tma_unknown_branches) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)",
+ "MetricGroup": "BigFoot;Fed;Frontend;IcMiss;MemoryTLB;tma_issueBC",
+ "MetricName": "tma_info_bottleneck_big_code",
+ "MetricThreshold": "tma_info_bottleneck_big_code > 20",
+ "PublicDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses). Related metrics: tma_info_bottleneck_branching_overhead"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls)",
+ "MetricExpr": "100 * ((BR_INST_RETIRED.COND + 3 * BR_INST_RETIRED.NEAR_CALL + (BR_INST_RETIRED.NEAR_TAKEN - BR_INST_RETIRED.COND_TAKEN - 2 * BR_INST_RETIRED.NEAR_CALL)) / tma_info_thread_slots)",
+ "MetricGroup": "Ret;tma_issueBC",
+ "MetricName": "tma_info_bottleneck_branching_overhead",
+ "MetricThreshold": "tma_info_bottleneck_branching_overhead > 10",
+ "PublicDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls). Related metrics: tma_info_bottleneck_big_code"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_frontend_bound - tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_info_bottleneck_big_code",
+ "MetricGroup": "Fed;FetchBW;Frontend",
+ "MetricName": "tma_info_bottleneck_instruction_fetch_bw",
+ "MetricThreshold": "tma_info_bottleneck_instruction_fetch_bw > 20"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
+ "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_fb_full / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))",
+ "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
+ "MetricName": "tma_info_bottleneck_memory_bandwidth",
+ "MetricThreshold": "tma_info_bottleneck_memory_bandwidth > 20",
+ "PublicDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_load / max(tma_l1_bound, tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
+ "MetricGroup": "Mem;MemoryTLB;Offcore;tma_issueTLB",
+ "MetricName": "tma_info_bottleneck_memory_data_tlbs",
+ "MetricThreshold": "tma_info_bottleneck_memory_data_tlbs > 20",
+ "PublicDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs). Related metrics: tma_dtlb_load, tma_dtlb_store"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound))",
+ "MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat",
+ "MetricName": "tma_info_bottleneck_memory_latency",
+ "MetricThreshold": "tma_info_bottleneck_memory_latency > 20",
+ "PublicDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches). Related metrics: tma_l3_hit_latency, tma_mem_latency"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
+ "MetricGroup": "Bad;BadSpec;BrMispredicts;tma_issueBM",
+ "MetricName": "tma_info_bottleneck_mispredictions",
+ "MetricThreshold": "tma_info_bottleneck_mispredictions > 20",
+ "PublicDescription": "Total pipeline cost of Branch Misprediction related bottlenecks. Related metrics: tma_branch_mispredicts, tma_info_bad_spec_branch_misprediction_cost, tma_mispredicts_resteers"
+ },
+ {
+ "BriefDescription": "Fraction of branches that are CALL or RET",
+ "MetricExpr": "(BR_INST_RETIRED.NEAR_CALL + BR_INST_RETIRED.NEAR_RETURN) / BR_INST_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;Branches",
+ "MetricName": "tma_info_branches_callret"
+ },
+ {
+ "BriefDescription": "Fraction of branches that are non-taken conditionals",
+ "MetricExpr": "BR_INST_RETIRED.COND_NTAKEN / BR_INST_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;Branches;CodeGen;PGO",
+ "MetricName": "tma_info_branches_cond_nt"
+ },
+ {
+ "BriefDescription": "Fraction of branches that are taken conditionals",
+ "MetricExpr": "BR_INST_RETIRED.COND_TAKEN / BR_INST_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;Branches;CodeGen;PGO",
+ "MetricName": "tma_info_branches_cond_tk"
+ },
+ {
+ "BriefDescription": "Fraction of branches that are unconditional (direct or indirect) jumps",
+ "MetricExpr": "(BR_INST_RETIRED.NEAR_TAKEN - BR_INST_RETIRED.COND_TAKEN - 2 * BR_INST_RETIRED.NEAR_CALL) / BR_INST_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;Branches",
+ "MetricName": "tma_info_branches_jump"
+ },
+ {
+ "BriefDescription": "Fraction of branches of other types (not individually covered by other metrics in Info.Branches group)",
+ "MetricExpr": "1 - (tma_info_branches_cond_nt + tma_info_branches_cond_tk + tma_info_branches_callret + tma_info_branches_jump)",
+ "MetricGroup": "Bad;Branches",
+ "MetricName": "tma_info_branches_other_branches"
+ },
+ {
+ "BriefDescription": "Core actual clocks when any Logical Processor is active on the Physical Core",
+ "MetricExpr": "CPU_CLK_UNHALTED.DISTRIBUTED",
+ "MetricGroup": "SMT",
+ "MetricName": "tma_info_core_core_clks"
+ },
+ {
+ "BriefDescription": "Instructions Per Cycle across hyper-threads (per physical core)",
+ "MetricExpr": "INST_RETIRED.ANY / tma_info_core_core_clks",
+ "MetricGroup": "Ret;SMT;TmaL1;tma_L1_group",
+ "MetricName": "tma_info_core_coreipc"
+ },
+ {
+ "BriefDescription": "Floating Point Operations Per Cycle",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * cpu@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE\\,umask\\=0x18@ + 8 * cpu@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE\\,umask\\=0x60@ + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / tma_info_core_core_clks",
+ "MetricGroup": "Flops;Ret",
+ "MetricName": "tma_info_core_flopc"
+ },
+ {
+ "BriefDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0xfc@) / (2 * tma_info_core_core_clks)",
+ "MetricGroup": "Cor;Flops;HPC",
+ "MetricName": "tma_info_core_fp_arith_utilization",
+ "PublicDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width). Values > 1 are possible due to ([BDW+] Fused-Multiply Add (FMA) counting - common; [ADL+] use all of ADD/MUL/FMA in Scalar or 128/256-bit vectors - less common)."
+ },
+ {
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / (UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)",
+ "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
+ "MetricName": "tma_info_core_ilp"
+ },
+ {
+ "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
+ "MetricExpr": "IDQ.DSB_UOPS / UOPS_ISSUED.ANY",
+ "MetricGroup": "DSB;Fed;FetchBW;tma_issueFB",
+ "MetricName": "tma_info_frontend_dsb_coverage",
+ "MetricThreshold": "tma_info_frontend_dsb_coverage < 0.7 & tma_info_thread_ipc / 5 > 0.35",
+ "PublicDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache). Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_botlnk_l2_dsb_misses, tma_info_inst_mix_iptb, tma_lcp"
+ },
+ {
+ "BriefDescription": "Average number of cycles of a switch from the DSB fetch-unit to MITE fetch unit - see DSB_Switches tree node for details.",
+ "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / cpu@DSB2MITE_SWITCHES.PENALTY_CYCLES\\,cmask\\=1\\,edge@",
+ "MetricGroup": "DSBmiss",
+ "MetricName": "tma_info_frontend_dsb_switch_cost"
+ },
+ {
+ "BriefDescription": "Average number of Uops issued by front-end when it issued something",
+ "MetricExpr": "UOPS_ISSUED.ANY / cpu@UOPS_ISSUED.ANY\\,cmask\\=1@",
+ "MetricGroup": "Fed;FetchBW",
+ "MetricName": "tma_info_frontend_fetch_upc"
+ },
+ {
+ "BriefDescription": "Average Latency for L1 instruction cache misses",
+ "MetricExpr": "ICACHE_16B.IFDATA_STALL / cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=1\\,edge@",
+ "MetricGroup": "Fed;FetchLat;IcMiss",
+ "MetricName": "tma_info_frontend_icache_miss_latency"
+ },
+ {
+ "BriefDescription": "Instructions per non-speculative DSB miss (lower number means higher occurrence rate)",
+ "MetricExpr": "INST_RETIRED.ANY / FRONTEND_RETIRED.ANY_DSB_MISS",
+ "MetricGroup": "DSBmiss;Fed",
+ "MetricName": "tma_info_frontend_ipdsb_miss_ret",
+ "MetricThreshold": "tma_info_frontend_ipdsb_miss_ret < 50"
+ },
+ {
+ "BriefDescription": "Instructions per speculative Unknown Branch Misprediction (BAClear) (lower number means higher occurrence rate)",
+ "MetricExpr": "tma_info_inst_mix_instructions / BACLEARS.ANY",
+ "MetricGroup": "Fed",
+ "MetricName": "tma_info_frontend_ipunknown_branch"
+ },
+ {
+ "BriefDescription": "L2 cache true code cacheline misses per kilo instruction",
+ "MetricExpr": "1e3 * FRONTEND_RETIRED.L2_MISS / INST_RETIRED.ANY",
+ "MetricGroup": "IcMiss",
+ "MetricName": "tma_info_frontend_l2mpki_code"
+ },
+ {
+ "BriefDescription": "L2 cache speculative code cacheline misses per kilo instruction",
+ "MetricExpr": "1e3 * L2_RQSTS.CODE_RD_MISS / INST_RETIRED.ANY",
+ "MetricGroup": "IcMiss",
+ "MetricName": "tma_info_frontend_l2mpki_code_all"
+ },
+ {
+ "BriefDescription": "Fraction of Uops delivered by the LSD (Loop Stream Detector; aka Loop Cache)",
+ "MetricExpr": "LSD.UOPS / UOPS_ISSUED.ANY",
+ "MetricGroup": "Fed;LSD",
+ "MetricName": "tma_info_frontend_lsd_coverage"
+ },
+ {
+ "BriefDescription": "Branch instructions per taken branch.",
+ "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
+ "MetricGroup": "Branches;Fed;PGO",
+ "MetricName": "tma_info_inst_mix_bptkbranch"
+ },
+ {
+ "BriefDescription": "Total number of retired Instructions",
+ "MetricExpr": "INST_RETIRED.ANY",
+ "MetricGroup": "Summary;TmaL1;tma_L1_group",
+ "MetricName": "tma_info_inst_mix_instructions",
+ "PublicDescription": "Total number of retired Instructions. Sample with: INST_RETIRED.PREC_DIST"
+ },
+ {
+ "BriefDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate)",
+ "MetricExpr": "INST_RETIRED.ANY / (cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0xfc@)",
+ "MetricGroup": "Flops;InsType",
+ "MetricName": "tma_info_inst_mix_iparith",
+ "MetricThreshold": "tma_info_inst_mix_iparith < 10",
+ "PublicDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate). May undercount due to FMA double counting. Approximated prior to BDW."
+ },
+ {
+ "BriefDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate)",
+ "MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE)",
+ "MetricGroup": "Flops;FpVector;InsType",
+ "MetricName": "tma_info_inst_mix_iparith_avx128",
+ "MetricThreshold": "tma_info_inst_mix_iparith_avx128 < 10",
+ "PublicDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+ },
+ {
+ "BriefDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate)",
+ "MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE)",
+ "MetricGroup": "Flops;FpVector;InsType",
+ "MetricName": "tma_info_inst_mix_iparith_avx256",
+ "MetricThreshold": "tma_info_inst_mix_iparith_avx256 < 10",
+ "PublicDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+ },
+ {
+ "BriefDescription": "Instructions per FP Arithmetic AVX 512-bit instruction (lower number means higher occurrence rate)",
+ "MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE)",
+ "MetricGroup": "Flops;FpVector;InsType",
+ "MetricName": "tma_info_inst_mix_iparith_avx512",
+ "MetricThreshold": "tma_info_inst_mix_iparith_avx512 < 10",
+ "PublicDescription": "Instructions per FP Arithmetic AVX 512-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+ },
+ {
+ "BriefDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate)",
+ "MetricExpr": "INST_RETIRED.ANY / FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
+ "MetricGroup": "Flops;FpScalar;InsType",
+ "MetricName": "tma_info_inst_mix_iparith_scalar_dp",
+ "MetricThreshold": "tma_info_inst_mix_iparith_scalar_dp < 10",
+ "PublicDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+ },
+ {
+ "BriefDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate)",
+ "MetricExpr": "INST_RETIRED.ANY / FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
+ "MetricGroup": "Flops;FpScalar;InsType",
+ "MetricName": "tma_info_inst_mix_iparith_scalar_sp",
+ "MetricThreshold": "tma_info_inst_mix_iparith_scalar_sp < 10",
+ "PublicDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+ },
+ {
+ "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)",
+ "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Branches;Fed;InsType",
+ "MetricName": "tma_info_inst_mix_ipbranch",
+ "MetricThreshold": "tma_info_inst_mix_ipbranch < 8"
+ },
+ {
+ "BriefDescription": "Instructions per (near) call (lower number means higher occurrence rate)",
+ "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL",
+ "MetricGroup": "Branches;Fed;PGO",
+ "MetricName": "tma_info_inst_mix_ipcall",
+ "MetricThreshold": "tma_info_inst_mix_ipcall < 200"
+ },
+ {
+ "BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)",
+ "MetricExpr": "INST_RETIRED.ANY / (cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * cpu@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE\\,umask\\=0x18@ + 8 * cpu@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE\\,umask\\=0x60@ + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE)",
+ "MetricGroup": "Flops;InsType",
+ "MetricName": "tma_info_inst_mix_ipflop",
+ "MetricThreshold": "tma_info_inst_mix_ipflop < 10"
+ },
+ {
+ "BriefDescription": "Instructions per Load (lower number means higher occurrence rate)",
+ "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_LOADS",
+ "MetricGroup": "InsType",
+ "MetricName": "tma_info_inst_mix_ipload",
+ "MetricThreshold": "tma_info_inst_mix_ipload < 3"
+ },
+ {
+ "BriefDescription": "Instructions per Store (lower number means higher occurrence rate)",
+ "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_STORES",
+ "MetricGroup": "InsType",
+ "MetricName": "tma_info_inst_mix_ipstore",
+ "MetricThreshold": "tma_info_inst_mix_ipstore < 8"
+ },
+ {
+ "BriefDescription": "Instructions per Software prefetch instruction (of any type: NTA/T0/T1/T2/Prefetch) (lower number means higher occurrence rate)",
+ "MetricExpr": "INST_RETIRED.ANY / cpu@SW_PREFETCH_ACCESS.T0\\,umask\\=0xF@",
+ "MetricGroup": "Prefetches",
+ "MetricName": "tma_info_inst_mix_ipswpf",
+ "MetricThreshold": "tma_info_inst_mix_ipswpf < 100"
+ },
+ {
+ "BriefDescription": "Instruction per taken branch",
+ "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN",
+ "MetricGroup": "Branches;Fed;FetchBW;Frontend;PGO;tma_issueFB",
+ "MetricName": "tma_info_inst_mix_iptb",
+ "MetricThreshold": "tma_info_inst_mix_iptb < 11",
+ "PublicDescription": "Instruction per taken branch. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_lcp"
+ },
+ {
+ "BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
+ "MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / duration_time",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_core_l1d_cache_fill_bw"
+ },
+ {
+ "BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
+ "MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / duration_time",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_core_l2_cache_fill_bw"
+ },
+ {
+ "BriefDescription": "Average per-core data access bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1e9 / duration_time",
+ "MetricGroup": "Mem;MemoryBW;Offcore",
+ "MetricName": "tma_info_memory_core_l3_cache_access_bw"
+ },
+ {
+ "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_core_l3_cache_fill_bw"
+ },
+ {
+ "BriefDescription": "Fill Buffer (FB) hits per kilo instructions for retired demand loads (L1D misses that merge into ongoing miss-handling entries)",
+ "MetricExpr": "1e3 * MEM_LOAD_RETIRED.FB_HIT / INST_RETIRED.ANY",
+ "MetricGroup": "CacheMisses;Mem",
+ "MetricName": "tma_info_memory_fb_hpki"
+ },
+ {
+ "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
+ "MetricExpr": "1e3 * MEM_LOAD_RETIRED.L1_MISS / INST_RETIRED.ANY",
+ "MetricGroup": "CacheMisses;Mem",
+ "MetricName": "tma_info_memory_l1mpki"
+ },
+ {
+ "BriefDescription": "L1 cache true misses per kilo instruction for all demand loads (including speculative)",
+ "MetricExpr": "1e3 * L2_RQSTS.ALL_DEMAND_DATA_RD / INST_RETIRED.ANY",
+ "MetricGroup": "CacheMisses;Mem",
+ "MetricName": "tma_info_memory_l1mpki_load"
+ },
+ {
+ "BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)",
+ "MetricExpr": "1e3 * (L2_RQSTS.REFERENCES - L2_RQSTS.MISS) / INST_RETIRED.ANY",
+ "MetricGroup": "CacheMisses;Mem",
+ "MetricName": "tma_info_memory_l2hpki_all"
+ },
+ {
+ "BriefDescription": "L2 cache hits per kilo instruction for all demand loads (including speculative)",
+ "MetricExpr": "1e3 * L2_RQSTS.DEMAND_DATA_RD_HIT / INST_RETIRED.ANY",
+ "MetricGroup": "CacheMisses;Mem",
+ "MetricName": "tma_info_memory_l2hpki_load"
+ },
+ {
+ "BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads",
+ "MetricExpr": "1e3 * MEM_LOAD_RETIRED.L2_MISS / INST_RETIRED.ANY",
+ "MetricGroup": "Backend;CacheMisses;Mem",
+ "MetricName": "tma_info_memory_l2mpki"
+ },
+ {
+ "BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all request types (including speculative)",
+ "MetricExpr": "1e3 * L2_RQSTS.MISS / INST_RETIRED.ANY",
+ "MetricGroup": "CacheMisses;Mem;Offcore",
+ "MetricName": "tma_info_memory_l2mpki_all"
+ },
+ {
+ "BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all demand loads (including speculative)",
+ "MetricExpr": "1e3 * L2_RQSTS.DEMAND_DATA_RD_MISS / INST_RETIRED.ANY",
+ "MetricGroup": "CacheMisses;Mem",
+ "MetricName": "tma_info_memory_l2mpki_load"
+ },
+ {
+ "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
+ "MetricExpr": "1e3 * MEM_LOAD_RETIRED.L3_MISS / INST_RETIRED.ANY",
+ "MetricGroup": "CacheMisses;Mem",
+ "MetricName": "tma_info_memory_l3mpki"
+ },
+ {
+ "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / (MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT)",
+ "MetricGroup": "Mem;MemoryBound;MemoryLat",
+ "MetricName": "tma_info_memory_load_miss_real_latency"
+ },
+ {
+ "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
+ "MetricGroup": "Mem;MemoryBW;MemoryBound",
+ "MetricName": "tma_info_memory_mlp",
+ "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)"
+ },
+ {
+ "BriefDescription": "Average Parallel L2 cache miss data reads",
+ "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+ "MetricGroup": "Memory_BW;Offcore",
+ "MetricName": "tma_info_memory_oro_data_l2_mlp"
+ },
+ {
+ "BriefDescription": "Average Latency for L2 cache miss demand Loads",
+ "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS.DEMAND_DATA_RD",
+ "MetricGroup": "Memory_Lat;Offcore",
+ "MetricName": "tma_info_memory_oro_load_l2_miss_latency"
+ },
+ {
+ "BriefDescription": "Average Parallel L2 cache miss demand Loads",
+ "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / cpu@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD\\,cmask\\=1@",
+ "MetricGroup": "Memory_BW;Offcore",
+ "MetricName": "tma_info_memory_oro_load_l2_mlp"
+ },
+ {
+ "BriefDescription": "Average Latency for L3 cache miss demand Loads",
+ "MetricExpr": "cpu@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD\\,umask\\=0x10@ / OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD",
+ "MetricGroup": "Memory_Lat;Offcore",
+ "MetricName": "tma_info_memory_oro_load_l3_miss_latency"
+ },
+ {
+ "BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_core_l1d_cache_fill_bw",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_thread_l1d_cache_fill_bw_1t"
+ },
+ {
+ "BriefDescription": "Average per-thread data fill bandwidth to the L2 cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_core_l2_cache_fill_bw",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_thread_l2_cache_fill_bw_1t"
+ },
+ {
+ "BriefDescription": "Average per-thread data access bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_core_l3_cache_access_bw",
+ "MetricGroup": "Mem;MemoryBW;Offcore",
+ "MetricName": "tma_info_memory_thread_l3_cache_access_bw_1t"
+ },
+ {
+ "BriefDescription": "Average per-thread data fill bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_core_l3_cache_fill_bw",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_thread_l3_cache_fill_bw_1t"
+ },
+ {
+ "BriefDescription": "STLB (2nd level TLB) code speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
+ "MetricExpr": "1e3 * ITLB_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
+ "MetricGroup": "Fed;MemoryTLB",
+ "MetricName": "tma_info_memory_tlb_code_stlb_mpki"
+ },
+ {
+ "BriefDescription": "STLB (2nd level TLB) data load speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
+ "MetricExpr": "1e3 * DTLB_LOAD_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
+ "MetricGroup": "Mem;MemoryTLB",
+ "MetricName": "tma_info_memory_tlb_load_stlb_mpki"
+ },
+ {
+ "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+ "MetricExpr": "(ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING) / (2 * tma_info_core_core_clks)",
+ "MetricGroup": "Mem;MemoryTLB",
+ "MetricName": "tma_info_memory_tlb_page_walks_utilization",
+ "MetricThreshold": "tma_info_memory_tlb_page_walks_utilization > 0.5"
+ },
+ {
+ "BriefDescription": "STLB (2nd level TLB) data store speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
+ "MetricExpr": "1e3 * DTLB_STORE_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
+ "MetricGroup": "Mem;MemoryTLB",
+ "MetricName": "tma_info_memory_tlb_store_stlb_mpki"
+ },
+ {
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-thread",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
+ "MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
+ "MetricName": "tma_info_pipeline_execute"
+ },
+ {
+ "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "tma_retiring * tma_info_thread_slots / cpu@UOPS_RETIRED.SLOTS\\,cmask\\=1@",
+ "MetricGroup": "Pipeline;Ret",
+ "MetricName": "tma_info_pipeline_retire"
+ },
+ {
+ "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
+ "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / duration_time",
+ "MetricGroup": "Power;Summary",
+ "MetricName": "tma_info_system_average_frequency"
+ },
+ {
+ "BriefDescription": "Average CPU Utilization",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricGroup": "HPC;Summary",
+ "MetricName": "tma_info_system_cpu_utilization"
+ },
+ {
+ "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
+ "MetricExpr": "64 * (UNC_ARB_TRK_REQUESTS.ALL + UNC_ARB_COH_TRK_REQUESTS.ALL) / 1e6 / duration_time / 1e3",
+ "MetricGroup": "HPC;Mem;MemoryBW;SoC;tma_issueBW",
+ "MetricName": "tma_info_system_dram_bw_use",
+ "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_fb_full, tma_info_bottleneck_memory_bandwidth, tma_mem_bandwidth, tma_sq_full"
+ },
+ {
+ "BriefDescription": "Giga Floating Point Operations Per Second",
+ "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * cpu@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE\\,umask\\=0x18@ + 8 * cpu@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE\\,umask\\=0x60@ + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / 1e9 / duration_time",
+ "MetricGroup": "Cor;Flops;HPC",
+ "MetricName": "tma_info_system_gflops",
+ "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width and AMX engine."
+ },
+ {
+ "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
+ "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u",
+ "MetricGroup": "Branches;OS",
+ "MetricName": "tma_info_system_ipfarbranch",
+ "MetricThreshold": "tma_info_system_ipfarbranch < 1e6"
+ },
+ {
+ "BriefDescription": "Cycles Per Instruction for the Operating System (OS) Kernel mode",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / INST_RETIRED.ANY_P:k",
+ "MetricGroup": "OS",
+ "MetricName": "tma_info_system_kernel_cpi"
+ },
+ {
+ "BriefDescription": "Fraction of cycles spent in the Operating System (OS) Kernel mode",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "OS",
+ "MetricName": "tma_info_system_kernel_utilization",
+ "MetricThreshold": "tma_info_system_kernel_utilization > 0.05"
+ },
+ {
+ "BriefDescription": "Average number of parallel data read requests to external memory",
+ "MetricExpr": "UNC_ARB_DAT_OCCUPANCY.RD / cpu@UNC_ARB_DAT_OCCUPANCY.RD\\,cmask\\=1@",
+ "MetricGroup": "Mem;MemoryBW;SoC",
+ "MetricName": "tma_info_system_mem_parallel_reads",
+ "PublicDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches"
+ },
+ {
+ "BriefDescription": "Average latency of data read request to external memory (in nanoseconds)",
+ "MetricExpr": "(UNC_ARB_TRK_OCCUPANCY.RD + UNC_ARB_DAT_OCCUPANCY.RD) / UNC_ARB_TRK_REQUESTS.RD",
+ "MetricGroup": "Mem;MemoryLat;SoC",
+ "MetricName": "tma_info_system_mem_read_latency",
+ "PublicDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches. ([RKL+]memory-controller only)"
+ },
+ {
+ "BriefDescription": "Average latency of all requests to external memory (in Uncore cycles)",
+ "MetricExpr": "(UNC_ARB_TRK_OCCUPANCY.ALL + UNC_ARB_DAT_OCCUPANCY.RD) / UNC_ARB_TRK_REQUESTS.ALL",
+ "MetricGroup": "Mem;SoC",
+ "MetricName": "tma_info_system_mem_request_latency"
+ },
+ {
+ "BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for baseline license level 0",
+ "MetricExpr": "CORE_POWER.LVL0_TURBO_LICENSE / tma_info_core_core_clks",
+ "MetricGroup": "Power",
+ "MetricName": "tma_info_system_power_license0_utilization",
+ "PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for baseline license level 0. This includes non-AVX codes, SSE, AVX 128-bit, and low-current AVX 256-bit codes."
+ },
+ {
+ "BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 1",
+ "MetricExpr": "CORE_POWER.LVL1_TURBO_LICENSE / tma_info_core_core_clks",
+ "MetricGroup": "Power",
+ "MetricName": "tma_info_system_power_license1_utilization",
+ "MetricThreshold": "tma_info_system_power_license1_utilization > 0.5",
+ "PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 1. This includes high current AVX 256-bit instructions as well as low current AVX 512-bit instructions."
+ },
+ {
+ "BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 2 (introduced in SKX)",
+ "MetricExpr": "CORE_POWER.LVL2_TURBO_LICENSE / tma_info_core_core_clks",
+ "MetricGroup": "Power",
+ "MetricName": "tma_info_system_power_license2_utilization",
+ "MetricThreshold": "tma_info_system_power_license2_utilization > 0.5",
+ "PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 2 (introduced in SKX). This includes high current AVX 512-bit instructions."
+ },
+ {
+ "BriefDescription": "Fraction of cycles where both hardware Logical Processors were active",
+ "MetricExpr": "(1 - CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_DISTRIBUTED if #SMT_on else 0)",
+ "MetricGroup": "SMT",
+ "MetricName": "tma_info_system_smt_2t_utilization"
+ },
+ {
+ "BriefDescription": "Socket actual clocks when any core is active on that socket",
+ "MetricExpr": "UNC_CLOCK.SOCKET",
+ "MetricGroup": "SoC",
+ "MetricName": "tma_info_system_socket_clks"
+ },
+ {
+ "BriefDescription": "Average Frequency Utilization relative nominal frequency",
+ "MetricExpr": "tma_info_thread_clks / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricGroup": "Power",
+ "MetricName": "tma_info_system_turbo_utilization"
+ },
+ {
+ "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "Pipeline",
+ "MetricName": "tma_info_thread_clks"
+ },
+ {
+ "BriefDescription": "Cycles Per Instruction (per Logical Processor)",
+ "MetricExpr": "1 / tma_info_thread_ipc",
+ "MetricGroup": "Mem;Pipeline",
+ "MetricName": "tma_info_thread_cpi"
+ },
+ {
+ "BriefDescription": "The ratio of Executed- by Issued-Uops",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / UOPS_ISSUED.ANY",
+ "MetricGroup": "Cor;Pipeline",
+ "MetricName": "tma_info_thread_execute_per_issue",
+ "PublicDescription": "The ratio of Executed- by Issued-Uops. Ratio > 1 suggests high rate of uop micro-fusions. Ratio < 1 suggest high rate of \"execute\" at rename stage."
+ },
+ {
+ "BriefDescription": "Instructions Per Cycle (per Logical Processor)",
+ "MetricExpr": "INST_RETIRED.ANY / tma_info_thread_clks",
+ "MetricGroup": "Ret;Summary",
+ "MetricName": "tma_info_thread_ipc"
+ },
+ {
+ "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
+ "MetricExpr": "TOPDOWN.SLOTS",
+ "MetricGroup": "TmaL1;tma_L1_group",
+ "MetricName": "tma_info_thread_slots"
+ },
+ {
+ "BriefDescription": "Fraction of Physical Core issue-slots utilized by this Logical Processor",
+ "MetricExpr": "(tma_info_thread_slots / (TOPDOWN.SLOTS / 2) if #SMT_on else 1)",
+ "MetricGroup": "SMT;TmaL1;tma_L1_group",
+ "MetricName": "tma_info_thread_slots_utilization"
+ },
+ {
+ "BriefDescription": "Uops Per Instruction",
+ "MetricExpr": "tma_retiring * tma_info_thread_slots / INST_RETIRED.ANY",
+ "MetricGroup": "Pipeline;Ret;Retire",
+ "MetricName": "tma_info_thread_uoppi",
+ "MetricThreshold": "tma_info_thread_uoppi > 1.05"
+ },
+ {
+ "BriefDescription": "Instruction per taken branch",
+ "MetricExpr": "tma_retiring * tma_info_thread_slots / BR_INST_RETIRED.NEAR_TAKEN",
+ "MetricGroup": "Branches;Fed;FetchBW",
+ "MetricName": "tma_info_thread_uptb",
+ "MetricThreshold": "tma_info_thread_uptb < 7.5"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses",
+ "MetricExpr": "ICACHE_64B.IFTAG_STALL / tma_info_thread_clks",
+ "MetricGroup": "BigFoot;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group",
+ "MetricName": "tma_itlb_misses",
+ "MetricThreshold": "tma_itlb_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
+ "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses. Sample with: FRONTEND_RETIRED.STLB_MISS_PS;FRONTEND_RETIRED.ITLB_MISS_PS",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric estimates how often the CPU was stalled without loads missing the L1 data cache",
+ "MetricExpr": "max((CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS) / tma_info_thread_clks, 0)",
+ "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_issueL1;tma_issueMC;tma_memory_bound_group",
+ "MetricName": "tma_l1_bound",
+ "MetricThreshold": "tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
+ "PublicDescription": "This metric estimates how often the CPU was stalled without loads missing the L1 data cache. The L1 data cache typically has the shortest latency. However; in certain cases like loads blocked on older stores; a load might suffer due to high latency even though it is being satisfied by the L1. Another example is loads who miss in the TLB. These cases are characterized by execution unit stalls; while some non-completed demand load lives in the machine without having that demand load missing the L1 cache. Sample with: MEM_LOAD_RETIRED.L1_HIT_PS;MEM_LOAD_RETIRED.FB_HIT_PS. Related metrics: tma_clears_resteers, tma_machine_clears, tma_microcode_sequencer, tma_ms_switches, tma_ports_utilized_1",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "MEM_LOAD_RETIRED.L2_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) / (MEM_LOAD_RETIRED.L2_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + L1D_PEND_MISS.FB_FULL_PERIODS) * ((CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks)",
+ "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
+ "MetricName": "tma_l2_bound",
+ "MetricThreshold": "tma_l2_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
+ "PublicDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads. Avoiding cache misses (i.e. L1 misses/L2 hits) can improve the latency and increase performance. Sample with: MEM_LOAD_RETIRED.L2_HIT_PS",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
+ "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS) / tma_info_thread_clks",
+ "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
+ "MetricName": "tma_l3_bound",
+ "MetricThreshold": "tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
+ "PublicDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core. Avoiding cache misses (i.e. L2 misses/L3 hits) can improve the latency and increase performance. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
+ "MetricExpr": "9 * tma_info_system_average_frequency * MEM_LOAD_RETIRED.L3_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
+ "MetricGroup": "MemoryLat;TopdownL4;tma_L4_group;tma_issueLat;tma_l3_bound_group",
+ "MetricName": "tma_l3_hit_latency",
+ "MetricThreshold": "tma_l3_hit_latency > 0.1 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
+ "PublicDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_info_bottleneck_memory_latency, tma_mem_latency",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs)",
+ "MetricExpr": "ILD_STALL.LCP / tma_info_thread_clks",
+ "MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB",
+ "MetricName": "tma_lcp",
+ "MetricThreshold": "tma_lcp > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
+ "PublicDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs). Using proper compiler flags or Intel Compiler by default will certainly avoid this. #Link: Optimization Guide about LCP BKMs. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation)",
+ "MetricExpr": "max(0, tma_retiring - tma_heavy_operations)",
+ "MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group",
+ "MetricName": "tma_light_operations",
+ "MetricThreshold": "tma_light_operations > 0.6",
+ "MetricgroupNoGroup": "TopdownL2",
+ "PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized software running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. Sample with: INST_RETIRED.PREC_DIST",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Load operations",
+ "MetricExpr": "UOPS_DISPATCHED.PORT_2_3 / (2 * tma_info_core_core_clks)",
+ "MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
+ "MetricName": "tma_load_op_utilization",
+ "MetricThreshold": "tma_load_op_utilization > 0.6",
+ "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Load operations. Sample with: UOPS_DISPATCHED.PORT_2_3",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric roughly estimates the fraction of cycles where the (first level) DTLB was missed by load accesses, that later on hit in second-level TLB (STLB)",
+ "MetricExpr": "tma_dtlb_load - tma_load_stlb_miss",
+ "MetricGroup": "MemoryTLB;TopdownL5;tma_L5_group;tma_dtlb_load_group",
+ "MetricName": "tma_load_stlb_hit",
+ "MetricThreshold": "tma_load_stlb_hit > 0.05 & (tma_dtlb_load > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric estimates the fraction of cycles where the Second-level TLB (STLB) was missed by load accesses, performing a hardware page walk",
+ "MetricExpr": "DTLB_LOAD_MISSES.WALK_ACTIVE / tma_info_thread_clks",
+ "MetricGroup": "MemoryTLB;TopdownL5;tma_L5_group;tma_dtlb_load_group",
+ "MetricName": "tma_load_stlb_miss",
+ "MetricThreshold": "tma_load_stlb_miss > 0.05 & (tma_dtlb_load > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of cycles the CPU spent handling cache misses due to lock operations",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "(16 * max(0, MEM_INST_RETIRED.LOCK_LOADS - L2_RQSTS.ALL_RFO) + MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES * (10 * L2_RQSTS.RFO_HIT + min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO))) / tma_info_thread_clks",
+ "MetricGroup": "Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_l1_bound_group",
+ "MetricName": "tma_lock_latency",
+ "MetricThreshold": "tma_lock_latency > 0.2 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
+ "PublicDescription": "This metric represents fraction of cycles the CPU spent handling cache misses due to lock operations. Due to the microarchitecture handling of locks; they are classified as L1_Bound regardless of what memory source satisfied them. Sample with: MEM_INST_RETIRED.LOCK_LOADS_PS. Related metrics: tma_store_latency",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to LSD (Loop Stream Detector) unit",
+ "MetricExpr": "(LSD.CYCLES_ACTIVE - LSD.CYCLES_OK) / tma_info_core_core_clks / 2",
+ "MetricGroup": "FetchBW;LSD;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
+ "MetricName": "tma_lsd",
+ "MetricThreshold": "tma_lsd > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 5 > 0.35)",
+ "PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to LSD (Loop Stream Detector) unit. LSD typically does well sustaining Uop supply. However; in some rare cases; optimal uop-delivery could not be reached for small loops whose size (in terms of number of uops) does not suit well the LSD structure.",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of slots the CPU has wasted due to Machine Clears",
+ "MetricExpr": "max(0, tma_bad_speculation - tma_branch_mispredicts)",
+ "MetricGroup": "BadSpec;MachineClears;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueMC;tma_issueSyncxn",
+ "MetricName": "tma_machine_clears",
+ "MetricThreshold": "tma_machine_clears > 0.1 & tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
+ "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Machine Clears. These slots are either wasted by uops fetched prior to the clear; or stalls the out-of-order portion of the machine needs to recover its state after the clear. For example; this can happen due to memory ordering Nukes (e.g. Memory Disambiguation) or Self-Modifying-Code (SMC) nukes. Sample with: MACHINE_CLEARS.COUNT. Related metrics: tma_clears_resteers, tma_contested_accesses, tma_data_sharing, tma_false_sharing, tma_l1_bound, tma_microcode_sequencer, tma_ms_switches, tma_remote_cache",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM)",
+ "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@) / tma_info_thread_clks",
+ "MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
+ "MetricName": "tma_mem_bandwidth",
+ "MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
+ "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_bottleneck_memory_bandwidth, tma_info_system_dram_bw_use, tma_sq_full",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM)",
+ "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD) / tma_info_thread_clks - tma_mem_bandwidth",
+ "MetricGroup": "MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
+ "MetricName": "tma_mem_latency",
+ "MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
+ "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_info_bottleneck_memory_latency, tma_l3_hit_latency",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck",
+ "MetricExpr": "(CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES) * tma_backend_bound",
+ "MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
+ "MetricName": "tma_memory_bound",
+ "MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL2",
+ "PublicDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck. Memory Bound estimates fraction of slots where pipeline is likely stalled due to demand load or store instructions. This accounts mainly for (1) non-completed in-flight memory demand loads which coincides with execution units starvation; in addition to (2) cases where stores could impose backpressure on the pipeline when many of them get buffered at the same time (less common out of the two).",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of slots where the CPU was retiring memory operations -- uops for memory load or store accesses.",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "tma_light_operations * MEM_INST_RETIRED.ANY / INST_RETIRED.ANY",
+ "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
+ "MetricName": "tma_memory_operations",
+ "MetricThreshold": "tma_memory_operations > 0.1 & tma_light_operations > 0.6",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit",
+ "MetricExpr": "tma_retiring * tma_info_thread_slots / UOPS_ISSUED.ANY * IDQ.MS_UOPS / tma_info_thread_slots",
+ "MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_heavy_operations_group;tma_issueMC;tma_issueMS",
+ "MetricName": "tma_microcode_sequencer",
+ "MetricThreshold": "tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1",
+ "PublicDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit. The MS is used for CISC instructions not supported by the default decoders (like repeat move strings; or CPUID); or by microcode assists used to address some operation modes (like in Floating Point assists). These cases can often be avoided. Sample with: IDQ.MS_UOPS. Related metrics: tma_clears_resteers, tma_l1_bound, tma_machine_clears, tma_ms_switches",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers as a result of Branch Misprediction at execution stage",
+ "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT) * INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_thread_clks",
+ "MetricGroup": "BadSpec;BrMispredicts;TopdownL4;tma_L4_group;tma_branch_resteers_group;tma_issueBM",
+ "MetricName": "tma_mispredicts_resteers",
+ "MetricThreshold": "tma_mispredicts_resteers > 0.05 & (tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15))",
+ "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers as a result of Branch Misprediction at execution stage. Sample with: INT_MISC.CLEAR_RESTEER_CYCLES. Related metrics: tma_branch_mispredicts, tma_info_bad_spec_branch_misprediction_cost, tma_info_bottleneck_mispredictions",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline)",
+ "MetricExpr": "(IDQ.MITE_CYCLES_ANY - IDQ.MITE_CYCLES_OK) / tma_info_core_core_clks / 2",
+ "MetricGroup": "DSBmiss;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
+ "MetricName": "tma_mite",
+ "MetricThreshold": "tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 5 > 0.35)",
+ "PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline). This pipeline is used for code that was not pre-cached in the DSB or LSD. For example; inefficiencies due to asymmetric decoders; use of long immediate or LCP can manifest as MITE fetch bandwidth bottleneck. Sample with: FRONTEND_RETIRED.ANY_DSB_MISS",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of cycles where (only) 4 uops were delivered by the MITE pipeline",
+ "MetricExpr": "(cpu@IDQ.MITE_UOPS\\,cmask\\=4@ - cpu@IDQ.MITE_UOPS\\,cmask\\=5@) / tma_info_thread_clks",
+ "MetricGroup": "DSBmiss;FetchBW;TopdownL4;tma_L4_group;tma_mite_group",
+ "MetricName": "tma_mite_4wide",
+ "MetricThreshold": "tma_mite_4wide > 0.05 & (tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 5 > 0.35))",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "The Mixing_Vectors metric gives the percentage of injected blend uops out of all uops issued",
+ "MetricExpr": "UOPS_ISSUED.VECTOR_WIDTH_MISMATCH / UOPS_ISSUED.ANY",
+ "MetricGroup": "TopdownL5;tma_L5_group;tma_issueMV;tma_ports_utilized_0_group",
+ "MetricName": "tma_mixing_vectors",
+ "MetricThreshold": "tma_mixing_vectors > 0.05",
+ "PublicDescription": "The Mixing_Vectors metric gives the percentage of injected blend uops out of all uops issued. Usually a Mixing_Vectors over 5% is worth investigating. Read more in Appendix B1 of the Optimizations Guide for this topic. Related metrics: tma_ms_switches",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric estimates the fraction of cycles when the CPU was stalled due to switches of uop delivery to the Microcode Sequencer (MS)",
+ "MetricExpr": "3 * IDQ.MS_SWITCHES / tma_info_thread_clks",
+ "MetricGroup": "FetchLat;MicroSeq;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueMC;tma_issueMS;tma_issueMV;tma_issueSO",
+ "MetricName": "tma_ms_switches",
+ "MetricThreshold": "tma_ms_switches > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
+ "PublicDescription": "This metric estimates the fraction of cycles when the CPU was stalled due to switches of uop delivery to the Microcode Sequencer (MS). Commonly used instructions are optimized for delivery by the DSB (decoded i-cache) or MITE (legacy instruction decode) pipelines. Certain operations cannot be handled natively by the execution pipeline; and must be performed by microcode (small programs injected into the execution stream). Switching to the MS too often can negatively impact performance. The MS is designated to deliver long uop flows required by CISC instructions like CPUID; or uncommon conditions like Floating Point Assists when dealing with Denormals. Sample with: IDQ.MS_SWITCHES. Related metrics: tma_clears_resteers, tma_l1_bound, tma_machine_clears, tma_microcode_sequencer, tma_mixing_vectors, tma_serializing_operation",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of slots where the CPU was retiring NOP (no op) instructions",
+ "MetricExpr": "tma_light_operations * INST_RETIRED.NOP / (tma_retiring * tma_info_thread_slots)",
+ "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
+ "MetricName": "tma_nop_instructions",
+ "MetricThreshold": "tma_nop_instructions > 0.1 & tma_light_operations > 0.6",
+ "PublicDescription": "This metric represents fraction of slots where the CPU was retiring NOP (no op) instructions. Compilers often use NOPs for certain address alignments - e.g. start address of a function or loop body. Sample with: INST_RETIRED.NOP",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_memory_operations + tma_branch_instructions + tma_nop_instructions))",
+ "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
+ "MetricName": "tma_other_light_ops",
+ "MetricThreshold": "tma_other_light_ops > 0.3 & tma_light_operations > 0.6",
+ "PublicDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes. May undercount due to FMA double counting",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 0 ([SNB+] ALU; [HSW+] ALU and 2nd branch)",
+ "MetricExpr": "UOPS_DISPATCHED.PORT_0 / tma_info_core_core_clks",
+ "MetricGroup": "Compute;TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
+ "MetricName": "tma_port_0",
+ "MetricThreshold": "tma_port_0 > 0.6",
+ "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 0 ([SNB+] ALU; [HSW+] ALU and 2nd branch). Sample with: UOPS_DISPATCHED.PORT_0. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_1, tma_port_5, tma_port_6, tma_ports_utilized_2",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 1 (ALU)",
+ "MetricExpr": "UOPS_DISPATCHED.PORT_1 / tma_info_core_core_clks",
+ "MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
+ "MetricName": "tma_port_1",
+ "MetricThreshold": "tma_port_1 > 0.6",
+ "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 1 (ALU). Sample with: UOPS_DISPATCHED.PORT_1. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_0, tma_port_5, tma_port_6, tma_ports_utilized_2",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 5 ([SNB+] Branches and ALU; [HSW+] ALU)",
+ "MetricExpr": "UOPS_DISPATCHED.PORT_5 / tma_info_core_core_clks",
+ "MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
+ "MetricName": "tma_port_5",
+ "MetricThreshold": "tma_port_5 > 0.6",
+ "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 5 ([SNB+] Branches and ALU; [HSW+] ALU). Sample with: UOPS_DISPATCHED.PORT_5. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_6, tma_ports_utilized_2",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+]Primary Branch and simple ALU)",
+ "MetricExpr": "UOPS_DISPATCHED.PORT_6 / tma_info_core_core_clks",
+ "MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
+ "MetricName": "tma_port_6",
+ "MetricThreshold": "tma_port_6 > 0.6",
+ "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+]Primary Branch and simple ALU). Sample with: UOPS_DISPATCHED.PORT_6. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_5, tma_ports_utilized_2",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric estimates fraction of cycles the CPU performance was potentially limited due to Core computation issues (non divider-related)",
+ "MetricExpr": "((cpu@EXE_ACTIVITY.3_PORTS_UTIL\\,umask\\=0x80@ + tma_serializing_operation * (CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY) + (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL)) / tma_info_thread_clks if ARITH.DIVIDER_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY else (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL) / tma_info_thread_clks)",
+ "MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group",
+ "MetricName": "tma_ports_utilization",
+ "MetricThreshold": "tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
+ "PublicDescription": "This metric estimates fraction of cycles the CPU performance was potentially limited due to Core computation issues (non divider-related). Two distinct categories can be attributed into this metric: (1) heavy data-dependency among contiguous instructions would manifest in this metric - such cases are often referred to as low Instruction Level Parallelism (ILP). (2) Contention on some hardware execution unit other than Divider. For example; when there are too many multiply operations.",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of cycles CPU executed no uops on any execution port (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
+ "MetricExpr": "cpu@EXE_ACTIVITY.3_PORTS_UTIL\\,umask\\=0x80@ / tma_info_thread_clks + tma_serializing_operation * (CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY) / tma_info_thread_clks",
+ "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
+ "MetricName": "tma_ports_utilized_0",
+ "MetricThreshold": "tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
+ "PublicDescription": "This metric represents fraction of cycles CPU executed no uops on any execution port (Logical Processor cycles since ICL, Physical Core cycles otherwise). Long-latency instructions like divides may contribute to this metric.",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of cycles where the CPU executed total of 1 uop per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
+ "MetricExpr": "EXE_ACTIVITY.1_PORTS_UTIL / tma_info_thread_clks",
+ "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issueL1;tma_ports_utilization_group",
+ "MetricName": "tma_ports_utilized_1",
+ "MetricThreshold": "tma_ports_utilized_1 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
+ "PublicDescription": "This metric represents fraction of cycles where the CPU executed total of 1 uop per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise). This can be due to heavy data-dependency among software instructions; or over oversubscribing a particular hardware resource. In some other cases with high 1_Port_Utilized and L1_Bound; this metric can point to L1 data-cache latency bottleneck that may not necessarily manifest with complete execution starvation (due to the short L1 latency e.g. walking a linked list) - looking at the assembly can be helpful. Sample with: EXE_ACTIVITY.1_PORTS_UTIL. Related metrics: tma_l1_bound",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
+ "MetricExpr": "EXE_ACTIVITY.2_PORTS_UTIL / tma_info_thread_clks",
+ "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group",
+ "MetricName": "tma_ports_utilized_2",
+ "MetricThreshold": "tma_ports_utilized_2 > 0.15 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
+ "PublicDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise). Loop Vectorization -most compilers feature auto-Vectorization options today- reduces pressure on the execution ports as multiple elements are calculated with same uop. Sample with: EXE_ACTIVITY.2_PORTS_UTIL. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_5, tma_port_6",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of cycles CPU executed total of 3 or more uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
+ "MetricExpr": "UOPS_EXECUTED.CYCLES_GE_3 / tma_info_thread_clks",
+ "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
+ "MetricName": "tma_ports_utilized_3m",
+ "MetricThreshold": "tma_ports_utilized_3m > 0.7 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
+ "PublicDescription": "This metric represents fraction of cycles CPU executed total of 3 or more uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise). Sample with: UOPS_EXECUTED.CYCLES_GE_3",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
+ "DefaultMetricgroupName": "TopdownL1",
+ "MetricExpr": "topdown\\-retiring / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0 * tma_info_thread_slots",
+ "MetricGroup": "Default;TmaL1;TopdownL1;tma_L1_group",
+ "MetricName": "tma_retiring",
+ "MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1",
+ "MetricgroupNoGroup": "TopdownL1;Default",
+ "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. Sample with: UOPS_RETIRED.SLOTS",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of cycles the CPU issue-pipeline was stalled due to serializing operations",
+ "MetricExpr": "RESOURCE_STALLS.SCOREBOARD / tma_info_thread_clks",
+ "MetricGroup": "PortsUtil;TopdownL5;tma_L5_group;tma_issueSO;tma_ports_utilized_0_group",
+ "MetricName": "tma_serializing_operation",
+ "MetricThreshold": "tma_serializing_operation > 0.1 & (tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)))",
+ "PublicDescription": "This metric represents fraction of cycles the CPU issue-pipeline was stalled due to serializing operations. Instructions like CPUID; WRMSR or LFENCE serialize the out-of-order execution which may limit performance. Sample with: RESOURCE_STALLS.SCOREBOARD. Related metrics: tma_ms_switches",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to PAUSE Instructions",
+ "MetricExpr": "140 * MISC_RETIRED.PAUSE_INST / tma_info_thread_clks",
+ "MetricGroup": "TopdownL6;tma_L6_group;tma_serializing_operation_group",
+ "MetricName": "tma_slow_pause",
+ "MetricThreshold": "tma_slow_pause > 0.05 & (tma_serializing_operation > 0.1 & (tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))))",
+ "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to PAUSE Instructions. Sample with: MISC_RETIRED.PAUSE_INST",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric estimates fraction of cycles handling memory load split accesses - load that cross 64-byte cache line boundary",
+ "MetricExpr": "tma_info_memory_load_miss_real_latency * LD_BLOCKS.NO_SR / tma_info_thread_clks",
+ "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
+ "MetricName": "tma_split_loads",
+ "MetricThreshold": "tma_split_loads > 0.2 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
+ "PublicDescription": "This metric estimates fraction of cycles handling memory load split accesses - load that cross 64-byte cache line boundary. Sample with: MEM_INST_RETIRED.SPLIT_LOADS_PS",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric represents rate of split store accesses",
+ "MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_core_clks",
+ "MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group",
+ "MetricName": "tma_split_stores",
+ "MetricThreshold": "tma_split_stores > 0.2 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
+ "PublicDescription": "This metric represents rate of split store accesses. Consider aligning your data to the 64-byte cache line granularity. Sample with: MEM_INST_RETIRED.SPLIT_STORES_PS. Related metrics: tma_port_4",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors)",
+ "MetricExpr": "L1D_PEND_MISS.L2_STALL / tma_info_thread_clks",
+ "MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueBW;tma_l3_bound_group",
+ "MetricName": "tma_sq_full",
+ "MetricThreshold": "tma_sq_full > 0.3 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
+ "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_fb_full, tma_info_bottleneck_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric estimates how often CPU was stalled due to RFO store memory accesses; RFO store issue a read-for-ownership request before the write",
+ "MetricExpr": "EXE_ACTIVITY.BOUND_ON_STORES / tma_info_thread_clks",
+ "MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
+ "MetricName": "tma_store_bound",
+ "MetricThreshold": "tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
+ "PublicDescription": "This metric estimates how often CPU was stalled due to RFO store memory accesses; RFO store issue a read-for-ownership request before the write. Even though store accesses do not typically stall out-of-order CPUs; there are few cases where stores can lead to actual stalls. This metric will be flagged should RFO stores be a bottleneck. Sample with: MEM_INST_RETIRED.ALL_STORES_PS",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores",
+ "MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_thread_clks",
+ "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
+ "MetricName": "tma_store_fwd_blk",
+ "MetricThreshold": "tma_store_fwd_blk > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
+ "PublicDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores. To streamline memory operations in the pipeline; a load can avoid waiting for memory if a prior in-flight store is writing the data that the load wants to read (store forwarding process). However; in some cases the load may be blocked for a significant time pending the store forward. For example; when the prior store is writing a smaller region than the load is reading.",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric estimates fraction of cycles the CPU spent handling L1D store misses",
+ "MetricExpr": "(L2_RQSTS.RFO_HIT * 10 * (1 - MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES) + (1 - MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES) * min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO)) / tma_info_thread_clks",
+ "MetricGroup": "MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_issueSL;tma_store_bound_group",
+ "MetricName": "tma_store_latency",
+ "MetricThreshold": "tma_store_latency > 0.1 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
+ "PublicDescription": "This metric estimates fraction of cycles the CPU spent handling L1D store misses. Store accesses usually less impact out-of-order core performance; however; holding resources for longer time can lead into undesired implications (e.g. contention on L1D fill-buffer entries - see FB_Full). Related metrics: tma_fb_full, tma_lock_latency",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Store operations",
+ "MetricExpr": "(UOPS_DISPATCHED.PORT_4_9 + UOPS_DISPATCHED.PORT_7_8) / (4 * tma_info_core_core_clks)",
+ "MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
+ "MetricName": "tma_store_op_utilization",
+ "MetricThreshold": "tma_store_op_utilization > 0.6",
+ "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Store operations. Sample with: UOPS_DISPATCHED.PORT_7_8",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric roughly estimates the fraction of cycles where the TLB was missed by store accesses, hitting in the second-level TLB (STLB)",
+ "MetricExpr": "tma_dtlb_store - tma_store_stlb_miss",
+ "MetricGroup": "MemoryTLB;TopdownL5;tma_L5_group;tma_dtlb_store_group",
+ "MetricName": "tma_store_stlb_hit",
+ "MetricThreshold": "tma_store_stlb_hit > 0.05 & (tma_dtlb_store > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric estimates the fraction of cycles where the STLB was missed by store accesses, performing a hardware page walk",
+ "MetricExpr": "DTLB_STORE_MISSES.WALK_ACTIVE / tma_info_core_core_clks",
+ "MetricGroup": "MemoryTLB;TopdownL5;tma_L5_group;tma_dtlb_store_group",
+ "MetricName": "tma_store_stlb_miss",
+ "MetricThreshold": "tma_store_stlb_miss > 0.05 & (tma_dtlb_store > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric estimates how often CPU was stalled due to Streaming store memory accesses; Streaming store optimize out a read request required by RFO stores",
+ "MetricExpr": "9 * OCR.STREAMING_WR.ANY_RESPONSE / tma_info_thread_clks",
+ "MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueSmSt;tma_store_bound_group",
+ "MetricName": "tma_streaming_stores",
+ "MetricThreshold": "tma_streaming_stores > 0.2 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
+ "PublicDescription": "This metric estimates how often CPU was stalled due to Streaming store memory accesses; Streaming store optimize out a read request required by RFO stores. Even though store accesses do not typically stall out-of-order CPUs; there are few cases where stores can lead to actual stalls. This metric will be flagged should Streaming stores be a bottleneck. Sample with: OCR.STREAMING_WR.ANY_RESPONSE. Related metrics: tma_fb_full",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to new branch address clears",
+ "MetricExpr": "10 * BACLEARS.ANY / tma_info_thread_clks",
+ "MetricGroup": "BigFoot;FetchLat;TopdownL4;tma_L4_group;tma_branch_resteers_group",
+ "MetricName": "tma_unknown_branches",
+ "MetricThreshold": "tma_unknown_branches > 0.05 & (tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15))",
+ "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to new branch address clears. These are fetched branches the Branch Prediction Unit was unable to recognize (e.g. first time the branch is fetched or hitting BTB capacity limit). Sample with: BACLEARS.ANY",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "This metric serves as an approximation of legacy x87 usage",
+ "MetricExpr": "tma_retiring * UOPS_EXECUTED.X87 / UOPS_EXECUTED.THREAD",
+ "MetricGroup": "Compute;TopdownL4;tma_L4_group;tma_fp_arith_group",
+ "MetricName": "tma_x87_use",
+ "MetricThreshold": "tma_x87_use > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6)",
+ "PublicDescription": "This metric serves as an approximation of legacy x87 usage. It accounts for instructions beyond X87 FP arithmetic operations; hence may be used as a thermometer to avoid X87 high usage and preferably upgrade to modern ISA. See Tip under Tuning Hint.",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Percentage of cycles in aborted transactions.",
+ "MetricExpr": "(max(cycles\\-t - cycles\\-ct, 0) / cycles if has_event(cycles\\-t) else 0)",
+ "MetricGroup": "transaction",
+ "MetricName": "tsx_aborted_cycles",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Number of cycles within a transaction divided by the number of elisions.",
+ "MetricExpr": "(cycles\\-t / el\\-start if has_event(cycles\\-t) else 0)",
+ "MetricGroup": "transaction",
+ "MetricName": "tsx_cycles_per_elision",
+ "ScaleUnit": "1cycles / elision"
+ },
+ {
+ "BriefDescription": "Number of cycles within a transaction divided by the number of transactions.",
+ "MetricExpr": "(cycles\\-t / tx\\-start if has_event(cycles\\-t) else 0)",
+ "MetricGroup": "transaction",
+ "MetricName": "tsx_cycles_per_transaction",
+ "ScaleUnit": "1cycles / transaction"
+ },
+ {
+ "BriefDescription": "Percentage of cycles within a transaction region.",
+ "MetricExpr": "(cycles\\-t / cycles if has_event(cycles\\-t) else 0)",
+ "MetricGroup": "transaction",
+ "MetricName": "tsx_transactional_cycles",
+ "ScaleUnit": "100%"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/rocketlake/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/rocketlake/uncore-interconnect.json
new file mode 100644
index 000000000000..8027590f1776
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/rocketlake/uncore-interconnect.json
@@ -0,0 +1,74 @@
+[
+ {
+ "BriefDescription": "Number of entries allocated. Account for Any type: e.g. Snoop, etc.",
+ "EventCode": "0x84",
+ "EventName": "UNC_ARB_COH_TRK_REQUESTS.ALL",
+ "PerPkg": "1",
+ "UMask": "0x1",
+ "Unit": "ARB"
+ },
+ {
+ "BriefDescription": "Each cycle counts number of any coherent request at memory controller that were issued by any core. This event is not supported on ICL products but is supported on RKL products.",
+ "EventCode": "0x85",
+ "EventName": "UNC_ARB_DAT_OCCUPANCY.ALL",
+ "PerPkg": "1",
+ "UMask": "0x1",
+ "Unit": "ARB"
+ },
+ {
+ "BriefDescription": "Each cycle counts number of coherent reads pending on data return from memory controller that were issued by any core. This event is not supported on ICL products but is supported on RKL products.",
+ "EventCode": "0x85",
+ "EventName": "UNC_ARB_DAT_OCCUPANCY.RD",
+ "PerPkg": "1",
+ "UMask": "0x2",
+ "Unit": "ARB"
+ },
+ {
+ "BriefDescription": "Each cycle count number of 'valid' coherent Data Read entries . Such entry is defined as valid when it is allocated till deallocation. Doesn't include prefetches. This event is not supported on ICL products but is supported on RKL products.",
+ "EventCode": "0x80",
+ "EventName": "UNC_ARB_REQ_TRK_OCCUPANCY.DRD",
+ "PerPkg": "1",
+ "UMask": "0x2",
+ "Unit": "ARB"
+ },
+ {
+ "BriefDescription": "Number of all coherent Data Read entries. Doesn't include prefetches",
+ "EventCode": "0x81",
+ "EventName": "UNC_ARB_REQ_TRK_REQUEST.DRD",
+ "PerPkg": "1",
+ "UMask": "0x2",
+ "Unit": "ARB"
+ },
+ {
+ "BriefDescription": "Each cycle counts number of all outgoing valid entries in ReqTrk. Such entry is defined as valid from its allocation in ReqTrk till deallocation. Accounts for Coherent and non-coherent traffic. This event is not supported on ICL products but is supported on RKL products.",
+ "EventCode": "0x80",
+ "EventName": "UNC_ARB_TRK_OCCUPANCY.ALL",
+ "PerPkg": "1",
+ "UMask": "0x1",
+ "Unit": "ARB"
+ },
+ {
+ "BriefDescription": "Each cycle count number of 'valid' coherent Data Read entries . Such entry is defined as valid when it is allocated till deallocation. Doesn't include prefetches. This event is not supported on ICL products but is supported on RKL products.",
+ "EventCode": "0x80",
+ "EventName": "UNC_ARB_TRK_OCCUPANCY.RD",
+ "PerPkg": "1",
+ "UMask": "0x2",
+ "Unit": "ARB"
+ },
+ {
+ "BriefDescription": "Total number of all outgoing entries allocated. Accounts for Coherent and non-coherent traffic.",
+ "EventCode": "0x81",
+ "EventName": "UNC_ARB_TRK_REQUESTS.ALL",
+ "PerPkg": "1",
+ "UMask": "0x1",
+ "Unit": "ARB"
+ },
+ {
+ "BriefDescription": "Number of all coherent Data Read entries. Doesn't include prefetches. This event is not supported on ICL products but is supported on RKL products.",
+ "EventCode": "0x81",
+ "EventName": "UNC_ARB_TRK_REQUESTS.RD",
+ "PerPkg": "1",
+ "UMask": "0x2",
+ "Unit": "ARB"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/rocketlake/uncore-other.json b/tools/perf/pmu-events/arch/x86/rocketlake/uncore-other.json
new file mode 100644
index 000000000000..c6596ba09195
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/rocketlake/uncore-other.json
@@ -0,0 +1,9 @@
+[
+ {
+ "BriefDescription": "UNC_CLOCK.SOCKET",
+ "EventCode": "0xff",
+ "EventName": "UNC_CLOCK.SOCKET",
+ "PerPkg": "1",
+ "Unit": "CLOCK"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/rocketlake/virtual-memory.json b/tools/perf/pmu-events/arch/x86/rocketlake/virtual-memory.json
new file mode 100644
index 000000000000..b28f62ce1f39
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/rocketlake/virtual-memory.json
@@ -0,0 +1,165 @@
+[
+ {
+ "BriefDescription": "Loads that miss the DTLB and hit the STLB.",
+ "EventCode": "0x08",
+ "EventName": "DTLB_LOAD_MISSES.STLB_HIT",
+ "PublicDescription": "Counts loads that miss the DTLB (Data TLB) and hit the STLB (Second level TLB).",
+ "SampleAfterValue": "100003",
+ "UMask": "0x20"
+ },
+ {
+ "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a demand load.",
+ "CounterMask": "1",
+ "EventCode": "0x08",
+ "EventName": "DTLB_LOAD_MISSES.WALK_ACTIVE",
+ "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a demand load.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x10"
+ },
+ {
+ "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (All page sizes)",
+ "EventCode": "0x08",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
+ "PublicDescription": "Counts completed page walks (all page sizes) caused by demand data loads. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
+ "SampleAfterValue": "100003",
+ "UMask": "0xe"
+ },
+ {
+ "BriefDescription": "Page walks completed due to a demand data load to a 2M/4M page.",
+ "EventCode": "0x08",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M",
+ "PublicDescription": "Counts completed page walks (2M/4M sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Page walks completed due to a demand data load to a 4K page.",
+ "EventCode": "0x08",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K",
+ "PublicDescription": "Counts completed page walks (4K sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Number of page walks outstanding for a demand load in the PMH each cycle.",
+ "EventCode": "0x08",
+ "EventName": "DTLB_LOAD_MISSES.WALK_PENDING",
+ "PublicDescription": "Counts the number of page walks outstanding for a demand load in the PMH (Page Miss Handler) each cycle.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x10"
+ },
+ {
+ "BriefDescription": "Stores that miss the DTLB and hit the STLB.",
+ "EventCode": "0x49",
+ "EventName": "DTLB_STORE_MISSES.STLB_HIT",
+ "PublicDescription": "Counts stores that miss the DTLB (Data TLB) and hit the STLB (2nd Level TLB).",
+ "SampleAfterValue": "100003",
+ "UMask": "0x20"
+ },
+ {
+ "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a store.",
+ "CounterMask": "1",
+ "EventCode": "0x49",
+ "EventName": "DTLB_STORE_MISSES.WALK_ACTIVE",
+ "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a store.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x10"
+ },
+ {
+ "BriefDescription": "Store misses in all TLB levels causes a page walk that completes. (All page sizes)",
+ "EventCode": "0x49",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
+ "PublicDescription": "Counts completed page walks (all page sizes) caused by demand data stores. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
+ "SampleAfterValue": "100003",
+ "UMask": "0xe"
+ },
+ {
+ "BriefDescription": "Page walks completed due to a demand data store to a 2M/4M page.",
+ "EventCode": "0x49",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M",
+ "PublicDescription": "Counts completed page walks (2M/4M sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Page walks completed due to a demand data store to a 4K page.",
+ "EventCode": "0x49",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K",
+ "PublicDescription": "Counts completed page walks (4K sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Number of page walks outstanding for a store in the PMH each cycle.",
+ "EventCode": "0x49",
+ "EventName": "DTLB_STORE_MISSES.WALK_PENDING",
+ "PublicDescription": "Counts the number of page walks outstanding for a store in the PMH (Page Miss Handler) each cycle.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x10"
+ },
+ {
+ "BriefDescription": "Instruction fetch requests that miss the ITLB and hit the STLB.",
+ "EventCode": "0x85",
+ "EventName": "ITLB_MISSES.STLB_HIT",
+ "PublicDescription": "Counts instruction fetch requests that miss the ITLB (Instruction TLB) and hit the STLB (Second-level TLB).",
+ "SampleAfterValue": "100003",
+ "UMask": "0x20"
+ },
+ {
+ "BriefDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request.",
+ "CounterMask": "1",
+ "EventCode": "0x85",
+ "EventName": "ITLB_MISSES.WALK_ACTIVE",
+ "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a code (instruction fetch) request.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x10"
+ },
+ {
+ "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (All page sizes)",
+ "EventCode": "0x85",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED",
+ "PublicDescription": "Counts completed page walks (all page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
+ "SampleAfterValue": "100003",
+ "UMask": "0xe"
+ },
+ {
+ "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (2M/4M)",
+ "EventCode": "0x85",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M",
+ "PublicDescription": "Counts completed page walks (2M/4M page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (4K)",
+ "EventCode": "0x85",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED_4K",
+ "PublicDescription": "Counts completed page walks (4K page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Number of page walks outstanding for an outstanding code request in the PMH each cycle.",
+ "EventCode": "0x85",
+ "EventName": "ITLB_MISSES.WALK_PENDING",
+ "PublicDescription": "Counts the number of page walks outstanding for an outstanding code (instruction fetch) request in the PMH (Page Miss Handler) each cycle.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x10"
+ },
+ {
+ "BriefDescription": "DTLB flush attempts of the thread-specific entries",
+ "EventCode": "0xBD",
+ "EventName": "TLB_FLUSH.DTLB_THREAD",
+ "PublicDescription": "Counts the number of DTLB flush attempts of the thread-specific entries.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "STLB flush attempts",
+ "EventCode": "0xBD",
+ "EventName": "TLB_FLUSH.STLB_ANY",
+ "PublicDescription": "Counts the number of any STLB flush attempts (such as entire, VPID, PCID, InvPage, CR3 write, etc.).",
+ "SampleAfterValue": "100007",
+ "UMask": "0x20"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/sandybridge/metricgroups.json b/tools/perf/pmu-events/arch/x86/sandybridge/metricgroups.json
new file mode 100644
index 000000000000..bebb85945d62
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/sandybridge/metricgroups.json
@@ -0,0 +1,100 @@
+{
+ "Backend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Bad": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BadSpec": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BigFoot": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BrMispredicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Branches": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "CacheMisses": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Compute": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Cor": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "DSB": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "DSBmiss": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Fed": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FetchBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FetchLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Flops": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FpScalar": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FpVector": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Frontend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "HPC": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "IcMiss": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "InsType": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "L2Evicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "LSD": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MachineClears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryBound": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryTLB": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MicroSeq": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "OS": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Offcore": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "PGO": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Pipeline": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "PortsUtil": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Power": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Ret": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Retire": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "SMT": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Server": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Snoop": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "SoC": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Summary": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TmaL1": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TmaL2": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TmaL3mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TopdownL1": "Metrics for top-down breakdown at level 1",
+ "TopdownL2": "Metrics for top-down breakdown at level 2",
+ "TopdownL3": "Metrics for top-down breakdown at level 3",
+ "TopdownL4": "Metrics for top-down breakdown at level 4",
+ "TopdownL5": "Metrics for top-down breakdown at level 5",
+ "TopdownL6": "Metrics for top-down breakdown at level 6",
+ "tma_L1_group": "Metrics for top-down breakdown at level 1",
+ "tma_L2_group": "Metrics for top-down breakdown at level 2",
+ "tma_L3_group": "Metrics for top-down breakdown at level 3",
+ "tma_L4_group": "Metrics for top-down breakdown at level 4",
+ "tma_L5_group": "Metrics for top-down breakdown at level 5",
+ "tma_L6_group": "Metrics for top-down breakdown at level 6",
+ "tma_alu_op_utilization_group": "Metrics contributing to tma_alu_op_utilization category",
+ "tma_backend_bound_group": "Metrics contributing to tma_backend_bound category",
+ "tma_bad_speculation_group": "Metrics contributing to tma_bad_speculation category",
+ "tma_branch_resteers_group": "Metrics contributing to tma_branch_resteers category",
+ "tma_core_bound_group": "Metrics contributing to tma_core_bound category",
+ "tma_dram_bound_group": "Metrics contributing to tma_dram_bound category",
+ "tma_dtlb_load_group": "Metrics contributing to tma_dtlb_load category",
+ "tma_dtlb_store_group": "Metrics contributing to tma_dtlb_store category",
+ "tma_fetch_bandwidth_group": "Metrics contributing to tma_fetch_bandwidth category",
+ "tma_fetch_latency_group": "Metrics contributing to tma_fetch_latency category",
+ "tma_fp_arith_group": "Metrics contributing to tma_fp_arith category",
+ "tma_fp_vector_group": "Metrics contributing to tma_fp_vector category",
+ "tma_frontend_bound_group": "Metrics contributing to tma_frontend_bound category",
+ "tma_heavy_operations_group": "Metrics contributing to tma_heavy_operations category",
+ "tma_issue2P": "Metrics related by the issue $issue2P",
+ "tma_issueBM": "Metrics related by the issue $issueBM",
+ "tma_issueBW": "Metrics related by the issue $issueBW",
+ "tma_issueFB": "Metrics related by the issue $issueFB",
+ "tma_issueL1": "Metrics related by the issue $issueL1",
+ "tma_issueLat": "Metrics related by the issue $issueLat",
+ "tma_issueMC": "Metrics related by the issue $issueMC",
+ "tma_issueMS": "Metrics related by the issue $issueMS",
+ "tma_issueMV": "Metrics related by the issue $issueMV",
+ "tma_issueRFO": "Metrics related by the issue $issueRFO",
+ "tma_issueSL": "Metrics related by the issue $issueSL",
+ "tma_issueSO": "Metrics related by the issue $issueSO",
+ "tma_issueSmSt": "Metrics related by the issue $issueSmSt",
+ "tma_issueSyncxn": "Metrics related by the issue $issueSyncxn",
+ "tma_issueTLB": "Metrics related by the issue $issueTLB",
+ "tma_l1_bound_group": "Metrics contributing to tma_l1_bound category",
+ "tma_light_operations_group": "Metrics contributing to tma_light_operations category",
+ "tma_mem_latency_group": "Metrics contributing to tma_mem_latency category",
+ "tma_memory_bound_group": "Metrics contributing to tma_memory_bound category",
+ "tma_microcode_sequencer_group": "Metrics contributing to tma_microcode_sequencer category",
+ "tma_ports_utilization_group": "Metrics contributing to tma_ports_utilization category",
+ "tma_ports_utilized_0_group": "Metrics contributing to tma_ports_utilized_0 category",
+ "tma_retiring_group": "Metrics contributing to tma_retiring category",
+ "tma_serializing_operation_group": "Metrics contributing to tma_serializing_operation category",
+ "tma_store_bound_group": "Metrics contributing to tma_store_bound category",
+ "tma_store_op_utilization_group": "Metrics contributing to tma_store_op_utilization category"
+}
diff --git a/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json b/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json
index 4b8bc19392a4..8898b6fd0dea 100644
--- a/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json
@@ -50,7 +50,7 @@
},
{
"BriefDescription": "Uncore frequency per die [GHZ]",
- "MetricExpr": "tma_info_socket_clks / #num_dies / duration_time / 1e9",
+ "MetricExpr": "tma_info_system_socket_clks / #num_dies / duration_time / 1e9",
"MetricGroup": "SoC",
"MetricName": "UNCORE_FREQ"
},
@@ -82,7 +82,7 @@
},
{
"BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations",
- "MetricExpr": "(UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (INT_MISC.RECOVERY_CYCLES_ANY / 2 if #SMT_on else INT_MISC.RECOVERY_CYCLES)) / tma_info_slots",
+ "MetricExpr": "(UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (INT_MISC.RECOVERY_CYCLES_ANY / 2 if #SMT_on else INT_MISC.RECOVERY_CYCLES)) / tma_info_thread_slots",
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_bad_speculation",
"MetricThreshold": "tma_bad_speculation > 0.15",
@@ -98,12 +98,12 @@
"MetricName": "tma_branch_mispredicts",
"MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15",
"MetricgroupNoGroup": "TopdownL2",
- "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES. Related metrics: tma_info_branch_misprediction_cost, tma_mispredicts_resteers",
+ "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES. Related metrics: tma_info_bad_spec_branch_misprediction_cost, tma_mispredicts_resteers",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers",
- "MetricExpr": "12 * (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT + BACLEARS.ANY) / tma_info_clks",
+ "MetricExpr": "12 * (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT + BACLEARS.ANY) / tma_info_thread_clks",
"MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_branch_resteers",
"MetricThreshold": "tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -123,7 +123,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where the Divider unit was active",
- "MetricExpr": "ARITH.FPU_DIV_ACTIVE / tma_info_core_clks",
+ "MetricExpr": "ARITH.FPU_DIV_ACTIVE / tma_info_core_core_clks",
"MetricGroup": "TopdownL3;tma_L3_group;tma_core_bound_group",
"MetricName": "tma_divider",
"MetricThreshold": "tma_divider > 0.2 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
@@ -133,7 +133,7 @@
{
"BriefDescription": "This metric estimates how often the CPU was stalled on accesses to external memory (DRAM) by loads",
"MetricConstraint": "NO_GROUP_EVENTS_SMT",
- "MetricExpr": "(1 - MEM_LOAD_UOPS_RETIRED.LLC_HIT / (MEM_LOAD_UOPS_RETIRED.LLC_HIT + 7 * MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS)) * CYCLE_ACTIVITY.STALLS_L2_PENDING / tma_info_clks",
+ "MetricExpr": "(1 - MEM_LOAD_UOPS_RETIRED.LLC_HIT / (MEM_LOAD_UOPS_RETIRED.LLC_HIT + 7 * MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS)) * CYCLE_ACTIVITY.STALLS_L2_PENDING / tma_info_thread_clks",
"MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_dram_bound",
"MetricThreshold": "tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -142,16 +142,16 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines",
- "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / tma_info_clks",
+ "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / tma_info_thread_clks",
"MetricGroup": "DSBmiss;FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB",
"MetricName": "tma_dsb_switches",
"MetricThreshold": "tma_dsb_switches > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
- "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines. The DSB (decoded i-cache) is a Uop Cache where the front-end directly delivers Uops (micro operations) avoiding heavy x86 decoding. The DSB pipeline has shorter latency and delivered higher bandwidth than the MITE (legacy instruction decode pipeline). Switching between the two pipelines can cause penalties hence this metric measures the exposed penalty. Related metrics: tma_fetch_bandwidth, tma_info_dsb_coverage, tma_lcp",
+ "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines. The DSB (decoded i-cache) is a Uop Cache where the front-end directly delivers Uops (micro operations) avoiding heavy x86 decoding. The DSB pipeline has shorter latency and delivered higher bandwidth than the MITE (legacy instruction decode pipeline). Switching between the two pipelines can cause penalties hence this metric measures the exposed penalty. Related metrics: tma_fetch_bandwidth, tma_info_frontend_dsb_coverage, tma_lcp",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses",
- "MetricExpr": "(7 * DTLB_LOAD_MISSES.STLB_HIT + DTLB_LOAD_MISSES.WALK_DURATION) / tma_info_clks",
+ "MetricExpr": "(7 * DTLB_LOAD_MISSES.STLB_HIT + DTLB_LOAD_MISSES.WALK_DURATION) / tma_info_thread_clks",
"MetricGroup": "MemoryTLB;TopdownL4;tma_L4_group;tma_issueTLB;tma_l1_bound_group",
"MetricName": "tma_dtlb_load",
"MetricThreshold": "tma_dtlb_load > 0.1",
@@ -163,14 +163,14 @@
"MetricExpr": "tma_frontend_bound - tma_fetch_latency",
"MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB",
"MetricName": "tma_fetch_bandwidth",
- "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35",
+ "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35",
"MetricgroupNoGroup": "TopdownL2",
- "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Related metrics: tma_dsb_switches, tma_info_dsb_coverage, tma_lcp",
+ "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Related metrics: tma_dsb_switches, tma_info_frontend_dsb_coverage, tma_lcp",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues",
- "MetricExpr": "4 * min(CPU_CLK_UNHALTED.THREAD, IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE) / tma_info_slots",
+ "MetricExpr": "4 * min(CPU_CLK_UNHALTED.THREAD, IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE) / tma_info_thread_slots",
"MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group",
"MetricName": "tma_fetch_latency",
"MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15",
@@ -207,7 +207,7 @@
},
{
"BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend",
- "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / tma_info_slots",
+ "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / tma_info_thread_slots",
"MetricGroup": "PGO;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_frontend_bound",
"MetricThreshold": "tma_frontend_bound > 0.15",
@@ -226,168 +226,168 @@
"ScaleUnit": "100%"
},
{
- "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
- "MetricExpr": "tma_info_turbo_utilization * TSC / 1e9 / duration_time",
- "MetricGroup": "Power;Summary",
- "MetricName": "tma_info_average_frequency"
- },
- {
- "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
- "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
- "MetricGroup": "Pipeline",
- "MetricName": "tma_info_clks"
- },
- {
"BriefDescription": "Core actual clocks when any Logical Processor is active on the Physical Core",
- "MetricExpr": "(CPU_CLK_UNHALTED.THREAD / 2 * (1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK) if #core_wide < 1 else (CPU_CLK_UNHALTED.THREAD_ANY / 2 if #SMT_on else tma_info_clks))",
+ "MetricExpr": "(CPU_CLK_UNHALTED.THREAD / 2 * (1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK) if #core_wide < 1 else (CPU_CLK_UNHALTED.THREAD_ANY / 2 if #SMT_on else tma_info_thread_clks))",
"MetricGroup": "SMT",
- "MetricName": "tma_info_core_clks"
+ "MetricName": "tma_info_core_core_clks"
},
{
"BriefDescription": "Instructions Per Cycle across hyper-threads (per physical core)",
- "MetricExpr": "INST_RETIRED.ANY / tma_info_core_clks",
+ "MetricExpr": "INST_RETIRED.ANY / tma_info_core_core_clks",
"MetricGroup": "Ret;SMT;TmaL1;tma_L1_group",
- "MetricName": "tma_info_coreipc"
- },
- {
- "BriefDescription": "Cycles Per Instruction (per Logical Processor)",
- "MetricExpr": "1 / tma_info_ipc",
- "MetricGroup": "Mem;Pipeline",
- "MetricName": "tma_info_cpi"
+ "MetricName": "tma_info_core_coreipc"
},
{
- "BriefDescription": "Average CPU Utilization",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
- "MetricGroup": "HPC;Summary",
- "MetricName": "tma_info_cpu_utilization"
+ "BriefDescription": "Floating Point Operations Per Cycle",
+ "MetricExpr": "(FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * (FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE) + 8 * SIMD_FP_256.PACKED_SINGLE) / tma_info_core_core_clks",
+ "MetricGroup": "Flops;Ret",
+ "MetricName": "tma_info_core_flopc"
},
{
- "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
- "MetricExpr": "64 * (UNC_ARB_TRK_REQUESTS.ALL + UNC_ARB_COH_TRK_REQUESTS.ALL) / 1e6 / duration_time / 1e3",
- "MetricGroup": "HPC;Mem;MemoryBW;SoC;tma_issueBW",
- "MetricName": "tma_info_dram_bw_use",
- "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_mem_bandwidth"
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
+ "MetricExpr": "UOPS_DISPATCHED.THREAD / (cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@ / 2 if #SMT_on else cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@)",
+ "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
+ "MetricName": "tma_info_core_ilp"
},
{
"BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
"MetricExpr": "IDQ.DSB_UOPS / (IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)",
"MetricGroup": "DSB;Fed;FetchBW;tma_issueFB",
- "MetricName": "tma_info_dsb_coverage",
- "MetricThreshold": "tma_info_dsb_coverage < 0.7 & tma_info_ipc / 4 > 0.35",
+ "MetricName": "tma_info_frontend_dsb_coverage",
+ "MetricThreshold": "tma_info_frontend_dsb_coverage < 0.7 & tma_info_thread_ipc / 4 > 0.35",
"PublicDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache). Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_lcp"
},
{
- "BriefDescription": "The ratio of Executed- by Issued-Uops",
- "MetricExpr": "UOPS_DISPATCHED.THREAD / UOPS_ISSUED.ANY",
- "MetricGroup": "Cor;Pipeline",
- "MetricName": "tma_info_execute_per_issue",
- "PublicDescription": "The ratio of Executed- by Issued-Uops. Ratio > 1 suggests high rate of uop micro-fusions. Ratio < 1 suggest high rate of \"execute\" at rename stage."
+ "BriefDescription": "Total number of retired Instructions",
+ "MetricExpr": "INST_RETIRED.ANY",
+ "MetricGroup": "Summary;TmaL1;tma_L1_group",
+ "MetricName": "tma_info_inst_mix_instructions",
+ "PublicDescription": "Total number of retired Instructions. Sample with: INST_RETIRED.PREC_DIST"
},
{
- "BriefDescription": "Floating Point Operations Per Cycle",
- "MetricExpr": "(FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * (FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE) + 8 * SIMD_FP_256.PACKED_SINGLE) / tma_info_core_clks",
- "MetricGroup": "Flops;Ret",
- "MetricName": "tma_info_flopc"
+ "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / cpu@UOPS_RETIRED.RETIRE_SLOTS\\,cmask\\=1@",
+ "MetricGroup": "Pipeline;Ret",
+ "MetricName": "tma_info_pipeline_retire"
},
{
- "BriefDescription": "Giga Floating Point Operations Per Second",
- "MetricExpr": "(FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * (FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE) + 8 * SIMD_FP_256.PACKED_SINGLE) / 1e9 / duration_time",
- "MetricGroup": "Cor;Flops;HPC",
- "MetricName": "tma_info_gflops",
- "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width and AMX engine."
+ "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
+ "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / duration_time",
+ "MetricGroup": "Power;Summary",
+ "MetricName": "tma_info_system_average_frequency"
},
{
- "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
- "MetricExpr": "UOPS_DISPATCHED.THREAD / (cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@ / 2 if #SMT_on else cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@)",
- "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
- "MetricName": "tma_info_ilp"
+ "BriefDescription": "Average CPU Utilization",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricGroup": "HPC;Summary",
+ "MetricName": "tma_info_system_cpu_utilization"
},
{
- "BriefDescription": "Total number of retired Instructions",
- "MetricExpr": "INST_RETIRED.ANY",
- "MetricGroup": "Summary;TmaL1;tma_L1_group",
- "MetricName": "tma_info_instructions",
- "PublicDescription": "Total number of retired Instructions. Sample with: INST_RETIRED.PREC_DIST"
+ "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
+ "MetricExpr": "64 * (UNC_ARB_TRK_REQUESTS.ALL + UNC_ARB_COH_TRK_REQUESTS.ALL) / 1e6 / duration_time / 1e3",
+ "MetricGroup": "HPC;Mem;MemoryBW;SoC;tma_issueBW",
+ "MetricName": "tma_info_system_dram_bw_use",
+ "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_mem_bandwidth"
},
{
- "BriefDescription": "Instructions Per Cycle (per Logical Processor)",
- "MetricExpr": "INST_RETIRED.ANY / tma_info_clks",
- "MetricGroup": "Ret;Summary",
- "MetricName": "tma_info_ipc"
+ "BriefDescription": "Giga Floating Point Operations Per Second",
+ "MetricExpr": "(FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * (FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE) + 8 * SIMD_FP_256.PACKED_SINGLE) / 1e9 / duration_time",
+ "MetricGroup": "Cor;Flops;HPC",
+ "MetricName": "tma_info_system_gflops",
+ "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width and AMX engine."
},
{
"BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u",
"MetricGroup": "Branches;OS",
- "MetricName": "tma_info_ipfarbranch",
- "MetricThreshold": "tma_info_ipfarbranch < 1e6"
+ "MetricName": "tma_info_system_ipfarbranch",
+ "MetricThreshold": "tma_info_system_ipfarbranch < 1e6"
},
{
"BriefDescription": "Cycles Per Instruction for the Operating System (OS) Kernel mode",
"MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / INST_RETIRED.ANY_P:k",
"MetricGroup": "OS",
- "MetricName": "tma_info_kernel_cpi"
+ "MetricName": "tma_info_system_kernel_cpi"
},
{
"BriefDescription": "Fraction of cycles spent in the Operating System (OS) Kernel mode",
"MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / CPU_CLK_UNHALTED.THREAD",
"MetricGroup": "OS",
- "MetricName": "tma_info_kernel_utilization",
- "MetricThreshold": "tma_info_kernel_utilization > 0.05"
+ "MetricName": "tma_info_system_kernel_utilization",
+ "MetricThreshold": "tma_info_system_kernel_utilization > 0.05"
},
{
"BriefDescription": "Average number of parallel requests to external memory",
"MetricExpr": "UNC_ARB_TRK_OCCUPANCY.ALL / UNC_ARB_TRK_OCCUPANCY.CYCLES_WITH_ANY_REQUEST",
"MetricGroup": "Mem;SoC",
- "MetricName": "tma_info_mem_parallel_requests",
+ "MetricName": "tma_info_system_mem_parallel_requests",
"PublicDescription": "Average number of parallel requests to external memory. Accounts for all requests"
},
{
"BriefDescription": "Average latency of all requests to external memory (in Uncore cycles)",
"MetricExpr": "UNC_ARB_TRK_OCCUPANCY.ALL / UNC_ARB_TRK_REQUESTS.ALL",
"MetricGroup": "Mem;SoC",
- "MetricName": "tma_info_mem_request_latency"
- },
- {
- "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
- "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / cpu@UOPS_RETIRED.RETIRE_SLOTS\\,cmask\\=1@",
- "MetricGroup": "Pipeline;Ret",
- "MetricName": "tma_info_retire"
- },
- {
- "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
- "MetricExpr": "4 * tma_info_core_clks",
- "MetricGroup": "TmaL1;tma_L1_group",
- "MetricName": "tma_info_slots"
+ "MetricName": "tma_info_system_mem_request_latency"
},
{
"BriefDescription": "Fraction of cycles where both hardware Logical Processors were active",
"MetricExpr": "(1 - CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / (CPU_CLK_UNHALTED.REF_XCLK_ANY / 2) if #SMT_on else 0)",
"MetricGroup": "SMT",
- "MetricName": "tma_info_smt_2t_utilization"
+ "MetricName": "tma_info_system_smt_2t_utilization"
},
{
"BriefDescription": "Socket actual clocks when any core is active on that socket",
"MetricExpr": "UNC_CLOCK.SOCKET",
"MetricGroup": "SoC",
- "MetricName": "tma_info_socket_clks"
+ "MetricName": "tma_info_system_socket_clks"
},
{
"BriefDescription": "Average Frequency Utilization relative nominal frequency",
- "MetricExpr": "tma_info_clks / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricExpr": "tma_info_thread_clks / CPU_CLK_UNHALTED.REF_TSC",
"MetricGroup": "Power",
- "MetricName": "tma_info_turbo_utilization"
+ "MetricName": "tma_info_system_turbo_utilization"
+ },
+ {
+ "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "Pipeline",
+ "MetricName": "tma_info_thread_clks"
+ },
+ {
+ "BriefDescription": "Cycles Per Instruction (per Logical Processor)",
+ "MetricExpr": "1 / tma_info_thread_ipc",
+ "MetricGroup": "Mem;Pipeline",
+ "MetricName": "tma_info_thread_cpi"
+ },
+ {
+ "BriefDescription": "The ratio of Executed- by Issued-Uops",
+ "MetricExpr": "UOPS_DISPATCHED.THREAD / UOPS_ISSUED.ANY",
+ "MetricGroup": "Cor;Pipeline",
+ "MetricName": "tma_info_thread_execute_per_issue",
+ "PublicDescription": "The ratio of Executed- by Issued-Uops. Ratio > 1 suggests high rate of uop micro-fusions. Ratio < 1 suggest high rate of \"execute\" at rename stage."
+ },
+ {
+ "BriefDescription": "Instructions Per Cycle (per Logical Processor)",
+ "MetricExpr": "INST_RETIRED.ANY / tma_info_thread_clks",
+ "MetricGroup": "Ret;Summary",
+ "MetricName": "tma_info_thread_ipc"
+ },
+ {
+ "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
+ "MetricExpr": "4 * tma_info_core_core_clks",
+ "MetricGroup": "TmaL1;tma_L1_group",
+ "MetricName": "tma_info_thread_slots"
},
{
"BriefDescription": "Uops Per Instruction",
"MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
"MetricGroup": "Pipeline;Ret;Retire",
- "MetricName": "tma_info_uoppi",
- "MetricThreshold": "tma_info_uoppi > 1.05"
+ "MetricName": "tma_info_thread_uoppi",
+ "MetricThreshold": "tma_info_thread_uoppi > 1.05"
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses",
- "MetricExpr": "(12 * ITLB_MISSES.STLB_HIT + ITLB_MISSES.WALK_DURATION) / tma_info_clks",
+ "MetricExpr": "(12 * ITLB_MISSES.STLB_HIT + ITLB_MISSES.WALK_DURATION) / tma_info_thread_clks",
"MetricGroup": "BigFoot;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_itlb_misses",
"MetricThreshold": "tma_itlb_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -397,7 +397,7 @@
{
"BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
"MetricConstraint": "NO_GROUP_EVENTS_SMT",
- "MetricExpr": "MEM_LOAD_UOPS_RETIRED.LLC_HIT / (MEM_LOAD_UOPS_RETIRED.LLC_HIT + 7 * MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS) * CYCLE_ACTIVITY.STALLS_L2_PENDING / tma_info_clks",
+ "MetricExpr": "MEM_LOAD_UOPS_RETIRED.LLC_HIT / (MEM_LOAD_UOPS_RETIRED.LLC_HIT + 7 * MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS) * CYCLE_ACTIVITY.STALLS_L2_PENDING / tma_info_thread_clks",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l3_bound",
"MetricThreshold": "tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -406,11 +406,11 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs)",
- "MetricExpr": "ILD_STALL.LCP / tma_info_clks",
+ "MetricExpr": "ILD_STALL.LCP / tma_info_thread_clks",
"MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB",
"MetricName": "tma_lcp",
"MetricThreshold": "tma_lcp > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
- "PublicDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs). Using proper compiler flags or Intel Compiler by default will certainly avoid this. #Link: Optimization Guide about LCP BKMs. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_dsb_coverage",
+ "PublicDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs). Using proper compiler flags or Intel Compiler by default will certainly avoid this. #Link: Optimization Guide about LCP BKMs. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_frontend_dsb_coverage",
"ScaleUnit": "100%"
},
{
@@ -436,16 +436,16 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM)",
- "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=6@) / tma_info_clks",
+ "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=6@) / tma_info_thread_clks",
"MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
"MetricName": "tma_mem_bandwidth",
"MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_info_dram_bw_use",
+ "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_info_system_dram_bw_use",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM)",
- "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD) / tma_info_clks - tma_mem_bandwidth",
+ "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD) / tma_info_thread_clks - tma_mem_bandwidth",
"MetricGroup": "MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
"MetricName": "tma_mem_latency",
"MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -455,7 +455,7 @@
{
"BriefDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.STALLS_L1D_PENDING) + RESOURCE_STALLS.SB) / (min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_DISPATCH) + cpu@UOPS_DISPATCHED.THREAD\\,cmask\\=1@ - (cpu@UOPS_DISPATCHED.THREAD\\,cmask\\=3@ if tma_info_ipc > 1.8 else cpu@UOPS_DISPATCHED.THREAD\\,cmask\\=2@) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB) * tma_backend_bound",
+ "MetricExpr": "(min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.STALLS_L1D_PENDING) + RESOURCE_STALLS.SB) / (min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_DISPATCH) + cpu@UOPS_DISPATCHED.THREAD\\,cmask\\=1@ - (cpu@UOPS_DISPATCHED.THREAD\\,cmask\\=3@ if tma_info_thread_ipc > 1.8 else cpu@UOPS_DISPATCHED.THREAD\\,cmask\\=2@) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB) * tma_backend_bound",
"MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_memory_bound",
"MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2",
@@ -465,7 +465,7 @@
},
{
"BriefDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit",
- "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * IDQ.MS_UOPS / tma_info_slots",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * IDQ.MS_UOPS / tma_info_thread_slots",
"MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_heavy_operations_group;tma_issueMC;tma_issueMS",
"MetricName": "tma_microcode_sequencer",
"MetricThreshold": "tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1",
@@ -474,7 +474,7 @@
},
{
"BriefDescription": "This metric estimates the fraction of cycles when the CPU was stalled due to switches of uop delivery to the Microcode Sequencer (MS)",
- "MetricExpr": "3 * IDQ.MS_SWITCHES / tma_info_clks",
+ "MetricExpr": "3 * IDQ.MS_SWITCHES / tma_info_thread_clks",
"MetricGroup": "FetchLat;MicroSeq;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueMC;tma_issueMS;tma_issueMV;tma_issueSO",
"MetricName": "tma_ms_switches",
"MetricThreshold": "tma_ms_switches > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -484,7 +484,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles the CPU performance was potentially limited due to Core computation issues (non divider-related)",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_DISPATCH) + cpu@UOPS_DISPATCHED.THREAD\\,cmask\\=1@ - (cpu@UOPS_DISPATCHED.THREAD\\,cmask\\=3@ if tma_info_ipc > 1.8 else cpu@UOPS_DISPATCHED.THREAD\\,cmask\\=2@) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB - RESOURCE_STALLS.SB - min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.STALLS_L1D_PENDING)) / tma_info_clks",
+ "MetricExpr": "(min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_DISPATCH) + cpu@UOPS_DISPATCHED.THREAD\\,cmask\\=1@ - (cpu@UOPS_DISPATCHED.THREAD\\,cmask\\=3@ if tma_info_thread_ipc > 1.8 else cpu@UOPS_DISPATCHED.THREAD\\,cmask\\=2@) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB - RESOURCE_STALLS.SB - min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.STALLS_L1D_PENDING)) / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group",
"MetricName": "tma_ports_utilization",
"MetricThreshold": "tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
@@ -493,7 +493,7 @@
},
{
"BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
- "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / tma_info_slots",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / tma_info_thread_slots",
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1",
@@ -503,7 +503,7 @@
},
{
"BriefDescription": "This metric estimates how often CPU was stalled due to RFO store memory accesses; RFO store issue a read-for-ownership request before the write",
- "MetricExpr": "RESOURCE_STALLS.SB / tma_info_clks",
+ "MetricExpr": "RESOURCE_STALLS.SB / tma_info_thread_clks",
"MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_store_bound",
"MetricThreshold": "tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/memory.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/memory.json
index b72a36999930..e8bf7c9c44e1 100644
--- a/tools/perf/pmu-events/arch/x86/sapphirerapids/memory.json
+++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/memory.json
@@ -32,18 +32,20 @@
"UMask": "0x3"
},
{
- "BriefDescription": "MEMORY_ACTIVITY.STALLS_L2_MISS",
+ "BriefDescription": "Execution stalls while L2 cache miss demand cacheable load request is outstanding.",
"CounterMask": "5",
"EventCode": "0x47",
"EventName": "MEMORY_ACTIVITY.STALLS_L2_MISS",
+ "PublicDescription": "Execution stalls while L2 cache miss demand cacheable load request is outstanding (will not count for uncacheable demand requests e.g. bus lock).",
"SampleAfterValue": "1000003",
"UMask": "0x5"
},
{
- "BriefDescription": "MEMORY_ACTIVITY.STALLS_L3_MISS",
+ "BriefDescription": "Execution stalls while L3 cache miss demand cacheable load request is outstanding.",
"CounterMask": "9",
"EventCode": "0x47",
"EventName": "MEMORY_ACTIVITY.STALLS_L3_MISS",
+ "PublicDescription": "Execution stalls while L3 cache miss demand cacheable load request is outstanding (will not count for uncacheable demand requests e.g. bus lock).",
"SampleAfterValue": "1000003",
"UMask": "0x9"
},
diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/metricgroups.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/metricgroups.json
new file mode 100644
index 000000000000..e6f7934320bf
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/metricgroups.json
@@ -0,0 +1,118 @@
+{
+ "Backend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Bad": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BadSpec": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BigFoot": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BrMispredicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Branches": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "CacheMisses": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "CodeGen": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Compute": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Cor": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "DSB": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "DSBmiss": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "DataSharing": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Fed": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FetchBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FetchLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Flops": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FpScalar": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FpVector": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Frontend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "HPC": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "IcMiss": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "InsType": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "IntVector": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "IoBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "L2Evicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "LSD": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MachineClears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryBound": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryTLB": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Memory_BW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Memory_Lat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MicroSeq": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "OS": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Offcore": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "PGO": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Pipeline": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "PortsUtil": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Power": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Prefetches": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Ret": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Retire": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "SMT": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Server": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Snoop": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "SoC": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Summary": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TmaL1": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TmaL2": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TmaL3mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TopdownL1": "Metrics for top-down breakdown at level 1",
+ "TopdownL2": "Metrics for top-down breakdown at level 2",
+ "TopdownL3": "Metrics for top-down breakdown at level 3",
+ "TopdownL4": "Metrics for top-down breakdown at level 4",
+ "TopdownL5": "Metrics for top-down breakdown at level 5",
+ "TopdownL6": "Metrics for top-down breakdown at level 6",
+ "tma_L1_group": "Metrics for top-down breakdown at level 1",
+ "tma_L2_group": "Metrics for top-down breakdown at level 2",
+ "tma_L3_group": "Metrics for top-down breakdown at level 3",
+ "tma_L4_group": "Metrics for top-down breakdown at level 4",
+ "tma_L5_group": "Metrics for top-down breakdown at level 5",
+ "tma_L6_group": "Metrics for top-down breakdown at level 6",
+ "tma_alu_op_utilization_group": "Metrics contributing to tma_alu_op_utilization category",
+ "tma_assists_group": "Metrics contributing to tma_assists category",
+ "tma_backend_bound_group": "Metrics contributing to tma_backend_bound category",
+ "tma_bad_speculation_group": "Metrics contributing to tma_bad_speculation category",
+ "tma_branch_resteers_group": "Metrics contributing to tma_branch_resteers category",
+ "tma_core_bound_group": "Metrics contributing to tma_core_bound category",
+ "tma_dram_bound_group": "Metrics contributing to tma_dram_bound category",
+ "tma_dtlb_load_group": "Metrics contributing to tma_dtlb_load category",
+ "tma_dtlb_store_group": "Metrics contributing to tma_dtlb_store category",
+ "tma_fetch_bandwidth_group": "Metrics contributing to tma_fetch_bandwidth category",
+ "tma_fetch_latency_group": "Metrics contributing to tma_fetch_latency category",
+ "tma_fp_arith_group": "Metrics contributing to tma_fp_arith category",
+ "tma_fp_vector_group": "Metrics contributing to tma_fp_vector category",
+ "tma_frontend_bound_group": "Metrics contributing to tma_frontend_bound category",
+ "tma_heavy_operations_group": "Metrics contributing to tma_heavy_operations category",
+ "tma_int_operations_group": "Metrics contributing to tma_int_operations category",
+ "tma_issue2P": "Metrics related by the issue $issue2P",
+ "tma_issueBC": "Metrics related by the issue $issueBC",
+ "tma_issueBM": "Metrics related by the issue $issueBM",
+ "tma_issueBW": "Metrics related by the issue $issueBW",
+ "tma_issueD0": "Metrics related by the issue $issueD0",
+ "tma_issueFB": "Metrics related by the issue $issueFB",
+ "tma_issueFL": "Metrics related by the issue $issueFL",
+ "tma_issueL1": "Metrics related by the issue $issueL1",
+ "tma_issueLat": "Metrics related by the issue $issueLat",
+ "tma_issueMC": "Metrics related by the issue $issueMC",
+ "tma_issueMS": "Metrics related by the issue $issueMS",
+ "tma_issueMV": "Metrics related by the issue $issueMV",
+ "tma_issueRFO": "Metrics related by the issue $issueRFO",
+ "tma_issueSL": "Metrics related by the issue $issueSL",
+ "tma_issueSO": "Metrics related by the issue $issueSO",
+ "tma_issueSmSt": "Metrics related by the issue $issueSmSt",
+ "tma_issueSpSt": "Metrics related by the issue $issueSpSt",
+ "tma_issueSyncxn": "Metrics related by the issue $issueSyncxn",
+ "tma_issueTLB": "Metrics related by the issue $issueTLB",
+ "tma_l1_bound_group": "Metrics contributing to tma_l1_bound category",
+ "tma_l3_bound_group": "Metrics contributing to tma_l3_bound category",
+ "tma_light_operations_group": "Metrics contributing to tma_light_operations category",
+ "tma_load_op_utilization_group": "Metrics contributing to tma_load_op_utilization category",
+ "tma_mem_bandwidth_group": "Metrics contributing to tma_mem_bandwidth category",
+ "tma_mem_latency_group": "Metrics contributing to tma_mem_latency category",
+ "tma_memory_bound_group": "Metrics contributing to tma_memory_bound category",
+ "tma_microcode_sequencer_group": "Metrics contributing to tma_microcode_sequencer category",
+ "tma_mite_group": "Metrics contributing to tma_mite category",
+ "tma_ports_utilization_group": "Metrics contributing to tma_ports_utilization category",
+ "tma_ports_utilized_0_group": "Metrics contributing to tma_ports_utilized_0 category",
+ "tma_ports_utilized_3m_group": "Metrics contributing to tma_ports_utilized_3m category",
+ "tma_retiring_group": "Metrics contributing to tma_retiring category",
+ "tma_serializing_operation_group": "Metrics contributing to tma_serializing_operation category",
+ "tma_store_bound_group": "Metrics contributing to tma_store_bound category",
+ "tma_store_op_utilization_group": "Metrics contributing to tma_store_op_utilization category"
+}
diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/pipeline.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/pipeline.json
index 72e9bdfa9f80..6dcf3b763af4 100644
--- a/tools/perf/pmu-events/arch/x86/sapphirerapids/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/pipeline.json
@@ -706,7 +706,7 @@
"BriefDescription": "TMA slots wasted due to incorrect speculation by branch mispredictions",
"EventCode": "0xa4",
"EventName": "TOPDOWN.BR_MISPREDICT_SLOTS",
- "PublicDescription": "Number of TMA slots that were wasted due to incorrect speculation by (any type of) branch mispredictions. This event estimates number of specualtive operations that were issued but not retired as well as the out-of-order engine recovery past a branch misprediction.",
+ "PublicDescription": "Number of TMA slots that were wasted due to incorrect speculation by (any type of) branch mispredictions. This event estimates number of speculative operations that were issued but not retired as well as the out-of-order engine recovery past a branch misprediction.",
"SampleAfterValue": "10000003",
"UMask": "0x8"
},
diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json
index 620fc5bd2217..c207c851a9f9 100644
--- a/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json
@@ -29,11 +29,262 @@
},
{
"BriefDescription": "Uncore frequency per die [GHZ]",
- "MetricExpr": "tma_info_socket_clks / #num_dies / duration_time / 1e9",
+ "MetricExpr": "tma_info_system_socket_clks / #num_dies / duration_time / 1e9",
"MetricGroup": "SoC",
"MetricName": "UNCORE_FREQ"
},
{
+ "BriefDescription": "Cycles per instruction retired; indicating how much time each executed instruction took; in units of cycles.",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD / INST_RETIRED.ANY",
+ "MetricName": "cpi",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "CPU operating frequency (in GHz)",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC * #SYSTEM_TSC_FREQ / 1e9",
+ "MetricName": "cpu_operating_frequency",
+ "ScaleUnit": "1GHz"
+ },
+ {
+ "BriefDescription": "Percentage of time spent in the active CPU power state C0",
+ "MetricExpr": "tma_info_system_cpu_utilization",
+ "MetricName": "cpu_utilization",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Ratio of number of completed page walks (for 2 megabyte page sizes) caused by demand data loads to the total number of completed instructions",
+ "MetricExpr": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M / INST_RETIRED.ANY",
+ "MetricName": "dtlb_2nd_level_2mb_large_page_load_mpi",
+ "PublicDescription": "Ratio of number of completed page walks (for 2 megabyte page sizes) caused by demand data loads to the total number of completed instructions. This implies it missed in the Data Translation Lookaside Buffer (DTLB) and further levels of TLB.",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of completed page walks (for all page sizes) caused by demand data loads to the total number of completed instructions",
+ "MetricExpr": "DTLB_LOAD_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
+ "MetricName": "dtlb_2nd_level_load_mpi",
+ "PublicDescription": "Ratio of number of completed page walks (for all page sizes) caused by demand data loads to the total number of completed instructions. This implies it missed in the DTLB and further levels of TLB.",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of completed page walks (for all page sizes) caused by demand data stores to the total number of completed instructions",
+ "MetricExpr": "DTLB_STORE_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
+ "MetricName": "dtlb_2nd_level_store_mpi",
+ "PublicDescription": "Ratio of number of completed page walks (for all page sizes) caused by demand data stores to the total number of completed instructions. This implies it missed in the DTLB and further levels of TLB.",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Bandwidth of IO reads that are initiated by end device controllers that are requesting memory from the CPU.",
+ "MetricExpr": "UNC_CHA_TOR_INSERTS.IO_PCIRDCUR * 64 / 1e6 / duration_time",
+ "MetricName": "io_bandwidth_read",
+ "ScaleUnit": "1MB/s"
+ },
+ {
+ "BriefDescription": "Bandwidth of IO writes that are initiated by end device controllers that are writing memory to the CPU.",
+ "MetricExpr": "(UNC_CHA_TOR_INSERTS.IO_ITOM + UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR) * 64 / 1e6 / duration_time",
+ "MetricName": "io_bandwidth_write",
+ "ScaleUnit": "1MB/s"
+ },
+ {
+ "BriefDescription": "Ratio of number of completed page walks (for 2 megabyte and 4 megabyte page sizes) caused by a code fetch to the total number of completed instructions",
+ "MetricExpr": "ITLB_MISSES.WALK_COMPLETED_2M_4M / INST_RETIRED.ANY",
+ "MetricName": "itlb_2nd_level_large_page_mpi",
+ "PublicDescription": "Ratio of number of completed page walks (for 2 megabyte and 4 megabyte page sizes) caused by a code fetch to the total number of completed instructions. This implies it missed in the Instruction Translation Lookaside Buffer (ITLB) and further levels of TLB.",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of completed page walks (for all page sizes) caused by a code fetch to the total number of completed instructions",
+ "MetricExpr": "ITLB_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
+ "MetricName": "itlb_2nd_level_mpi",
+ "PublicDescription": "Ratio of number of completed page walks (for all page sizes) caused by a code fetch to the total number of completed instructions. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB.",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of code read requests missing in L1 instruction cache (includes prefetches) to the total number of completed instructions",
+ "MetricExpr": "L2_RQSTS.ALL_CODE_RD / INST_RETIRED.ANY",
+ "MetricName": "l1_i_code_read_misses_with_prefetches_per_instr",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of demand load requests hitting in L1 data cache to the total number of completed instructions",
+ "MetricExpr": "MEM_LOAD_RETIRED.L1_HIT / INST_RETIRED.ANY",
+ "MetricName": "l1d_demand_data_read_hits_per_instr",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of requests missing L1 data cache (includes data+rfo w/ prefetches) to the total number of completed instructions",
+ "MetricExpr": "L1D.REPLACEMENT / INST_RETIRED.ANY",
+ "MetricName": "l1d_mpi",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of code read request missing L2 cache to the total number of completed instructions",
+ "MetricExpr": "L2_RQSTS.CODE_RD_MISS / INST_RETIRED.ANY",
+ "MetricName": "l2_demand_code_mpi",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of completed demand load requests hitting in L2 cache to the total number of completed instructions",
+ "MetricExpr": "MEM_LOAD_RETIRED.L2_HIT / INST_RETIRED.ANY",
+ "MetricName": "l2_demand_data_read_hits_per_instr",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of completed data read request missing L2 cache to the total number of completed instructions",
+ "MetricExpr": "MEM_LOAD_RETIRED.L2_MISS / INST_RETIRED.ANY",
+ "MetricName": "l2_demand_data_read_mpi",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of requests missing L2 cache (includes code+data+rfo w/ prefetches) to the total number of completed instructions",
+ "MetricExpr": "L2_LINES_IN.ALL / INST_RETIRED.ANY",
+ "MetricName": "l2_mpi",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of code read requests missing last level core cache (includes demand w/ prefetches) to the total number of completed instructions",
+ "MetricExpr": "UNC_CHA_TOR_INSERTS.IA_MISS_CRD / INST_RETIRED.ANY",
+ "MetricName": "llc_code_read_mpi_demand_plus_prefetch",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of data read requests missing last level core cache (includes demand w/ prefetches) to the total number of completed instructions",
+ "MetricExpr": "(UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA + UNC_CHA_TOR_INSERTS.IA_MISS_DRD + UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF) / INST_RETIRED.ANY",
+ "MetricName": "llc_data_read_mpi_demand_plus_prefetch",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Average latency of a last level cache (LLC) demand data read miss (read memory access) in nano seconds",
+ "MetricExpr": "1e9 * (UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD / UNC_CHA_TOR_INSERTS.IA_MISS_DRD) / (UNC_CHA_CLOCKTICKS / (source_count(UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD) * #num_packages)) * duration_time",
+ "MetricName": "llc_demand_data_read_miss_latency",
+ "ScaleUnit": "1ns"
+ },
+ {
+ "BriefDescription": "Average latency of a last level cache (LLC) demand data read miss (read memory access) addressed to local memory in nano seconds",
+ "MetricExpr": "1e9 * (UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL / UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL) / (UNC_CHA_CLOCKTICKS / (source_count(UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL) * #num_packages)) * duration_time",
+ "MetricName": "llc_demand_data_read_miss_latency_for_local_requests",
+ "ScaleUnit": "1ns"
+ },
+ {
+ "BriefDescription": "Average latency of a last level cache (LLC) demand data read miss (read memory access) addressed to remote memory in nano seconds",
+ "MetricExpr": "1e9 * (UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_REMOTE / UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE) / (UNC_CHA_CLOCKTICKS / (source_count(UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_REMOTE) * #num_packages)) * duration_time",
+ "MetricName": "llc_demand_data_read_miss_latency_for_remote_requests",
+ "ScaleUnit": "1ns"
+ },
+ {
+ "BriefDescription": "Average latency of a last level cache (LLC) demand data read miss (read memory access) addressed to DRAM in nano seconds",
+ "MetricExpr": "1e9 * (UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_DDR / UNC_CHA_TOR_INSERTS.IA_MISS_DRD_DDR) / (UNC_CHA_CLOCKTICKS / (source_count(UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_DDR) * #num_packages)) * duration_time",
+ "MetricName": "llc_demand_data_read_miss_to_dram_latency",
+ "ScaleUnit": "1ns"
+ },
+ {
+ "BriefDescription": "Average latency of a last level cache (LLC) demand data read miss (read memory access) addressed to Intel(R) Optane(TM) Persistent Memory(PMEM) in nano seconds",
+ "MetricExpr": "1e9 * (UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_PMM / UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PMM) / (UNC_CHA_CLOCKTICKS / (source_count(UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_PMM) * #num_packages)) * duration_time",
+ "MetricName": "llc_demand_data_read_miss_to_pmem_latency",
+ "ScaleUnit": "1ns"
+ },
+ {
+ "BriefDescription": "Bandwidth (MB/sec) of read requests that miss the last level cache (LLC) and go to local memory.",
+ "MetricExpr": "UNC_CHA_REQUESTS.READS_LOCAL * 64 / 1e6 / duration_time",
+ "MetricName": "llc_miss_local_memory_bandwidth_read",
+ "ScaleUnit": "1MB/s"
+ },
+ {
+ "BriefDescription": "Bandwidth (MB/sec) of write requests that miss the last level cache (LLC) and go to local memory.",
+ "MetricExpr": "UNC_CHA_REQUESTS.WRITES_LOCAL * 64 / 1e6 / duration_time",
+ "MetricName": "llc_miss_local_memory_bandwidth_write",
+ "ScaleUnit": "1MB/s"
+ },
+ {
+ "BriefDescription": "Bandwidth (MB/sec) of read requests that miss the last level cache (LLC) and go to remote memory.",
+ "MetricExpr": "UNC_CHA_REQUESTS.READS_REMOTE * 64 / 1e6 / duration_time",
+ "MetricName": "llc_miss_remote_memory_bandwidth_read",
+ "ScaleUnit": "1MB/s"
+ },
+ {
+ "BriefDescription": "Bandwidth (MB/sec) of write requests that miss the last level cache (LLC) and go to remote memory.",
+ "MetricExpr": "UNC_CHA_REQUESTS.WRITES_REMOTE * 64 / 1e6 / duration_time",
+ "MetricName": "llc_miss_remote_memory_bandwidth_write",
+ "ScaleUnit": "1MB/s"
+ },
+ {
+ "BriefDescription": "The ratio of number of completed memory load instructions to the total number completed instructions",
+ "MetricExpr": "MEM_INST_RETIRED.ALL_LOADS / INST_RETIRED.ANY",
+ "MetricName": "loads_per_instr",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "DDR memory read bandwidth (MB/sec)",
+ "MetricExpr": "UNC_M_CAS_COUNT.RD * 64 / 1e6 / duration_time",
+ "MetricName": "memory_bandwidth_read",
+ "ScaleUnit": "1MB/s"
+ },
+ {
+ "BriefDescription": "DDR memory bandwidth (MB/sec)",
+ "MetricExpr": "(UNC_M_CAS_COUNT.RD + UNC_M_CAS_COUNT.WR) * 64 / 1e6 / duration_time",
+ "MetricName": "memory_bandwidth_total",
+ "ScaleUnit": "1MB/s"
+ },
+ {
+ "BriefDescription": "DDR memory write bandwidth (MB/sec)",
+ "MetricExpr": "UNC_M_CAS_COUNT.WR * 64 / 1e6 / duration_time",
+ "MetricName": "memory_bandwidth_write",
+ "ScaleUnit": "1MB/s"
+ },
+ {
+ "BriefDescription": "Memory write bandwidth (MB/sec) caused by directory updates; includes DDR and Intel(R) Optane(TM) Persistent Memory(PMEM).",
+ "MetricExpr": "(UNC_CHA_DIR_UPDATE.HA + UNC_CHA_DIR_UPDATE.TOR + UNC_M2M_DIRECTORY_UPDATE.ANY) * 64 / 1e6 / duration_time",
+ "MetricName": "memory_extra_write_bw_due_to_directory_updates",
+ "ScaleUnit": "1MB/s"
+ },
+ {
+ "BriefDescription": "Memory read that miss the last level cache (LLC) addressed to local DRAM as a percentage of total memory read accesses, does not include LLC prefetches.",
+ "MetricExpr": "(UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL + UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_LOCAL) / (UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL + UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_LOCAL + UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE + UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_REMOTE)",
+ "MetricName": "numa_reads_addressed_to_local_dram",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Memory reads that miss the last level cache (LLC) addressed to remote DRAM as a percentage of total memory read accesses, does not include LLC prefetches.",
+ "MetricExpr": "(UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE + UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_REMOTE) / (UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL + UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_LOCAL + UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE + UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_REMOTE)",
+ "MetricName": "numa_reads_addressed_to_remote_dram",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Uops delivered from decoded instruction cache (decoded stream buffer or DSB) as a percent of total uops delivered to Instruction Decode Queue",
+ "MetricExpr": "IDQ.DSB_UOPS / (IDQ.DSB_UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS + LSD.UOPS)",
+ "MetricName": "percent_uops_delivered_from_decoded_icache",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Uops delivered from legacy decode pipeline (Micro-instruction Translation Engine or MITE) as a percent of total uops delivered to Instruction Decode Queue",
+ "MetricExpr": "IDQ.MITE_UOPS / (IDQ.DSB_UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS + LSD.UOPS)",
+ "MetricName": "percent_uops_delivered_from_legacy_decode_pipeline",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Uops delivered from microcode sequencer (MS) as a percent of total uops delivered to Instruction Decode Queue",
+ "MetricExpr": "IDQ.MS_UOPS / (IDQ.DSB_UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS + LSD.UOPS)",
+ "MetricName": "percent_uops_delivered_from_microcode_sequencer",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Intel(R) Optane(TM) Persistent Memory(PMEM) memory read bandwidth (MB/sec)",
+ "MetricExpr": "UNC_M_PMM_RPQ_INSERTS * 64 / 1e6 / duration_time",
+ "MetricName": "pmem_memory_bandwidth_read",
+ "ScaleUnit": "1MB/s"
+ },
+ {
+ "BriefDescription": "Intel(R) Optane(TM) Persistent Memory(PMEM) memory bandwidth (MB/sec)",
+ "MetricExpr": "(UNC_M_PMM_RPQ_INSERTS + UNC_M_PMM_WPQ_INSERTS) * 64 / 1e6 / duration_time",
+ "MetricName": "pmem_memory_bandwidth_total",
+ "ScaleUnit": "1MB/s"
+ },
+ {
+ "BriefDescription": "Intel(R) Optane(TM) Persistent Memory(PMEM) memory write bandwidth (MB/sec)",
+ "MetricExpr": "UNC_M_PMM_WPQ_INSERTS * 64 / 1e6 / duration_time",
+ "MetricName": "pmem_memory_bandwidth_write",
+ "ScaleUnit": "1MB/s"
+ },
+ {
"BriefDescription": "Percentage of cycles spent in System Management Interrupts.",
"MetricExpr": "((msr@aperf@ - cycles) / msr@aperf@ if msr@smi@ > 0 else 0)",
"MetricGroup": "smi",
@@ -49,8 +300,14 @@
"ScaleUnit": "1SMI#"
},
{
+ "BriefDescription": "The ratio of number of completed memory store instructions to the total number completed instructions",
+ "MetricExpr": "MEM_INST_RETIRED.ALL_STORES / INST_RETIRED.ANY",
+ "MetricName": "stores_per_instr",
+ "ScaleUnit": "1per_instr"
+ },
+ {
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution ports for ALU operations.",
- "MetricExpr": "(UOPS_DISPATCHED.PORT_0 + UOPS_DISPATCHED.PORT_1 + UOPS_DISPATCHED.PORT_5_11 + UOPS_DISPATCHED.PORT_6) / (5 * tma_info_core_clks)",
+ "MetricExpr": "(UOPS_DISPATCHED.PORT_0 + UOPS_DISPATCHED.PORT_1 + UOPS_DISPATCHED.PORT_5_11 + UOPS_DISPATCHED.PORT_6) / (5 * tma_info_core_core_clks)",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_alu_op_utilization",
"MetricThreshold": "tma_alu_op_utilization > 0.6",
@@ -58,7 +315,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles where the Advanced Matrix Extensions (AMX) execution engine was busy with tile (arithmetic) operations",
- "MetricExpr": "EXE.AMX_BUSY / tma_info_core_clks",
+ "MetricExpr": "EXE.AMX_BUSY / tma_info_core_core_clks",
"MetricGroup": "Compute;HPC;Server;TopdownL5;tma_L5_group;tma_ports_utilized_0_group",
"MetricName": "tma_amx_busy",
"MetricThreshold": "tma_amx_busy > 0.5 & (tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)))",
@@ -66,7 +323,7 @@
},
{
"BriefDescription": "This metric estimates fraction of slots the CPU retired uops delivered by the Microcode_Sequencer as a result of Assists",
- "MetricExpr": "100 * cpu@ASSISTS.ANY\\,umask\\=0x1B@ / tma_info_slots",
+ "MetricExpr": "100 * cpu@ASSISTS.ANY\\,umask\\=0x1B@ / tma_info_thread_slots",
"MetricGroup": "TopdownL4;tma_L4_group;tma_microcode_sequencer_group",
"MetricName": "tma_assists",
"MetricThreshold": "tma_assists > 0.1 & (tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1)",
@@ -75,7 +332,7 @@
},
{
"BriefDescription": "This metric estimates fraction of slots the CPU retired uops as a result of handing SSE to AVX* or AVX* to SSE transition Assists.",
- "MetricExpr": "63 * ASSISTS.SSE_AVX_MIX / tma_info_slots",
+ "MetricExpr": "63 * ASSISTS.SSE_AVX_MIX / tma_info_thread_slots",
"MetricGroup": "HPC;TopdownL5;tma_L5_group;tma_assists_group",
"MetricName": "tma_avx_assists",
"MetricThreshold": "tma_avx_assists > 0.1",
@@ -83,37 +340,40 @@
},
{
"BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
- "MetricExpr": "topdown\\-be\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0 * tma_info_slots",
- "MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
+ "DefaultMetricgroupName": "TopdownL1",
+ "MetricExpr": "topdown\\-be\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0 * tma_info_thread_slots",
+ "MetricGroup": "Default;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
"MetricThreshold": "tma_backend_bound > 0.2",
- "MetricgroupNoGroup": "TopdownL1",
+ "MetricgroupNoGroup": "TopdownL1;Default",
"PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. Sample with: TOPDOWN.BACKEND_BOUND_SLOTS",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations",
+ "DefaultMetricgroupName": "TopdownL1",
"MetricExpr": "max(1 - (tma_frontend_bound + tma_backend_bound + tma_retiring), 0)",
- "MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
+ "MetricGroup": "Default;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_bad_speculation",
"MetricThreshold": "tma_bad_speculation > 0.15",
- "MetricgroupNoGroup": "TopdownL1",
+ "MetricgroupNoGroup": "TopdownL1;Default",
"PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction",
- "MetricExpr": "topdown\\-br\\-mispredict / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0 * tma_info_slots",
- "MetricGroup": "BadSpec;BrMispredicts;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueBM",
+ "DefaultMetricgroupName": "TopdownL2",
+ "MetricExpr": "topdown\\-br\\-mispredict / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0 * tma_info_thread_slots",
+ "MetricGroup": "BadSpec;BrMispredicts;Default;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueBM",
"MetricName": "tma_branch_mispredicts",
"MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15",
- "MetricgroupNoGroup": "TopdownL2",
- "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: TOPDOWN.BR_MISPREDICT_SLOTS. Related metrics: tma_info_branch_misprediction_cost, tma_info_mispredictions, tma_mispredicts_resteers",
+ "MetricgroupNoGroup": "TopdownL2;Default",
+ "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: TOPDOWN.BR_MISPREDICT_SLOTS. Related metrics: tma_info_bad_spec_branch_misprediction_cost, tma_info_bottleneck_mispredictions, tma_mispredicts_resteers",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers",
- "MetricExpr": "INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_clks + tma_unknown_branches",
+ "MetricExpr": "INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_thread_clks + tma_unknown_branches",
"MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_branch_resteers",
"MetricThreshold": "tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -131,7 +391,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers as a result of Machine Clears",
- "MetricExpr": "(1 - tma_branch_mispredicts / tma_bad_speculation) * INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_clks",
+ "MetricExpr": "(1 - tma_branch_mispredicts / tma_bad_speculation) * INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_thread_clks",
"MetricGroup": "BadSpec;MachineClears;TopdownL4;tma_L4_group;tma_branch_resteers_group;tma_issueMC",
"MetricName": "tma_clears_resteers",
"MetricThreshold": "tma_clears_resteers > 0.05 & (tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15))",
@@ -141,7 +401,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to contested accesses",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(76 * tma_info_average_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD * (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM / (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM + OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) + 75.5 * tma_info_average_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_clks",
+ "MetricExpr": "(76 * tma_info_system_average_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD * (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM / (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM + OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) + 75.5 * tma_info_system_average_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
"MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
"MetricName": "tma_contested_accesses",
"MetricThreshold": "tma_contested_accesses > 0.05 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -150,18 +410,19 @@
},
{
"BriefDescription": "This metric represents fraction of slots where Core non-memory issues were of a bottleneck",
+ "DefaultMetricgroupName": "TopdownL2",
"MetricExpr": "max(0, tma_backend_bound - tma_memory_bound)",
- "MetricGroup": "Backend;Compute;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
+ "MetricGroup": "Backend;Compute;Default;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_core_bound",
"MetricThreshold": "tma_core_bound > 0.1 & tma_backend_bound > 0.2",
- "MetricgroupNoGroup": "TopdownL2",
+ "MetricgroupNoGroup": "TopdownL2;Default",
"PublicDescription": "This metric represents fraction of slots where Core non-memory issues were of a bottleneck. Shortage in hardware compute resources; or dependencies in software's instructions are both categorized under Core Bound. Hence it may indicate the machine ran out of an out-of-order resource; certain execution units are overloaded or dependencies in program's data- or instruction-flow are limiting the performance (e.g. FP-chained long-latency arithmetic operations).",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to data-sharing accesses",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "75.5 * tma_info_average_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD + MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD * (1 - OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM / (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM + OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_clks",
+ "MetricExpr": "75.5 * tma_info_system_average_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD + MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD * (1 - OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM / (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM + OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
"MetricGroup": "Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
"MetricName": "tma_data_sharing",
"MetricThreshold": "tma_data_sharing > 0.05 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -170,16 +431,16 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where decoder-0 was the only active decoder",
- "MetricExpr": "(cpu@INST_DECODED.DECODERS\\,cmask\\=1@ - cpu@INST_DECODED.DECODERS\\,cmask\\=2@) / tma_info_core_clks / 2",
+ "MetricExpr": "(cpu@INST_DECODED.DECODERS\\,cmask\\=1@ - cpu@INST_DECODED.DECODERS\\,cmask\\=2@) / tma_info_core_core_clks / 2",
"MetricGroup": "DSBmiss;FetchBW;TopdownL4;tma_L4_group;tma_issueD0;tma_mite_group",
"MetricName": "tma_decoder0_alone",
- "MetricThreshold": "tma_decoder0_alone > 0.1 & (tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 6 > 0.35))",
+ "MetricThreshold": "tma_decoder0_alone > 0.1 & (tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 6 > 0.35))",
"PublicDescription": "This metric represents fraction of cycles where decoder-0 was the only active decoder. Related metrics: tma_few_uops_instructions",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of cycles where the Divider unit was active",
- "MetricExpr": "ARITH.DIV_ACTIVE / tma_info_clks",
+ "MetricExpr": "ARITH.DIV_ACTIVE / tma_info_thread_clks",
"MetricGroup": "TopdownL3;tma_L3_group;tma_core_bound_group",
"MetricName": "tma_divider",
"MetricThreshold": "tma_divider > 0.2 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
@@ -189,7 +450,7 @@
{
"BriefDescription": "This metric estimates how often the CPU was stalled on accesses to external memory (DRAM) by loads",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(MEMORY_ACTIVITY.STALLS_L3_MISS / tma_info_clks - tma_pmm_bound if #has_pmem > 0 else MEMORY_ACTIVITY.STALLS_L3_MISS / tma_info_clks)",
+ "MetricExpr": "(MEMORY_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks - tma_pmm_bound if #has_pmem > 0 else MEMORY_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks)",
"MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_dram_bound",
"MetricThreshold": "tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -198,43 +459,43 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline",
- "MetricExpr": "(IDQ.DSB_CYCLES_ANY - IDQ.DSB_CYCLES_OK) / tma_info_core_clks / 2",
+ "MetricExpr": "(IDQ.DSB_CYCLES_ANY - IDQ.DSB_CYCLES_OK) / tma_info_core_core_clks / 2",
"MetricGroup": "DSB;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_dsb",
- "MetricThreshold": "tma_dsb > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 6 > 0.35)",
+ "MetricThreshold": "tma_dsb > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 6 > 0.35)",
"PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline. For example; inefficient utilization of the DSB cache structure or bank conflict when reading from it; are categorized here.",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines",
- "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / tma_info_clks",
+ "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / tma_info_thread_clks",
"MetricGroup": "DSBmiss;FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB",
"MetricName": "tma_dsb_switches",
"MetricThreshold": "tma_dsb_switches > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
- "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines. The DSB (decoded i-cache) is a Uop Cache where the front-end directly delivers Uops (micro operations) avoiding heavy x86 decoding. The DSB pipeline has shorter latency and delivered higher bandwidth than the MITE (legacy instruction decode pipeline). Switching between the two pipelines can cause penalties hence this metric measures the exposed penalty. Sample with: FRONTEND_RETIRED.DSB_MISS_PS. Related metrics: tma_fetch_bandwidth, tma_info_dsb_coverage, tma_info_dsb_misses, tma_info_iptb, tma_lcp",
+ "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines. The DSB (decoded i-cache) is a Uop Cache where the front-end directly delivers Uops (micro operations) avoiding heavy x86 decoding. The DSB pipeline has shorter latency and delivered higher bandwidth than the MITE (legacy instruction decode pipeline). Switching between the two pipelines can cause penalties hence this metric measures the exposed penalty. Sample with: FRONTEND_RETIRED.DSB_MISS_PS. Related metrics: tma_fetch_bandwidth, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses",
- "MetricExpr": "min(7 * cpu@DTLB_LOAD_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_LOAD_MISSES.WALK_ACTIVE, max(CYCLE_ACTIVITY.CYCLES_MEM_ANY - MEMORY_ACTIVITY.CYCLES_L1D_MISS, 0)) / tma_info_clks",
+ "MetricExpr": "min(7 * cpu@DTLB_LOAD_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_LOAD_MISSES.WALK_ACTIVE, max(CYCLE_ACTIVITY.CYCLES_MEM_ANY - MEMORY_ACTIVITY.CYCLES_L1D_MISS, 0)) / tma_info_thread_clks",
"MetricGroup": "MemoryTLB;TopdownL4;tma_L4_group;tma_issueTLB;tma_l1_bound_group",
"MetricName": "tma_dtlb_load",
"MetricThreshold": "tma_dtlb_load > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses. TLBs (Translation Look-aside Buffers) are processor caches for recently used entries out of the Page Tables that are used to map virtual- to physical-addresses by the operating system. This metric approximates the potential delay of demand loads missing the first-level data TLB (assuming worst case scenario with back to back misses to different pages). This includes hitting in the second-level TLB (STLB) as well as performing a hardware page walk on an STLB miss. Sample with: MEM_INST_RETIRED.STLB_MISS_LOADS_PS. Related metrics: tma_dtlb_store, tma_info_memory_data_tlbs",
+ "PublicDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses. TLBs (Translation Look-aside Buffers) are processor caches for recently used entries out of the Page Tables that are used to map virtual- to physical-addresses by the operating system. This metric approximates the potential delay of demand loads missing the first-level data TLB (assuming worst case scenario with back to back misses to different pages). This includes hitting in the second-level TLB (STLB) as well as performing a hardware page walk on an STLB miss. Sample with: MEM_INST_RETIRED.STLB_MISS_LOADS_PS. Related metrics: tma_dtlb_store, tma_info_bottleneck_memory_data_tlbs",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses",
- "MetricExpr": "(7 * cpu@DTLB_STORE_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_STORE_MISSES.WALK_ACTIVE) / tma_info_core_clks",
+ "MetricExpr": "(7 * cpu@DTLB_STORE_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_STORE_MISSES.WALK_ACTIVE) / tma_info_core_core_clks",
"MetricGroup": "MemoryTLB;TopdownL4;tma_L4_group;tma_issueTLB;tma_store_bound_group",
"MetricName": "tma_dtlb_store",
"MetricThreshold": "tma_dtlb_store > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses. As with ordinary data caching; focus on improving data locality and reducing working-set size to reduce DTLB overhead. Additionally; consider using profile-guided optimization (PGO) to collocate frequently-used data on the same page. Try using larger page sizes for large amounts of frequently-used data. Sample with: MEM_INST_RETIRED.STLB_MISS_STORES_PS. Related metrics: tma_dtlb_load, tma_info_memory_data_tlbs",
+ "PublicDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses. As with ordinary data caching; focus on improving data locality and reducing working-set size to reduce DTLB overhead. Additionally; consider using profile-guided optimization (PGO) to collocate frequently-used data on the same page. Try using larger page sizes for large amounts of frequently-used data. Sample with: MEM_INST_RETIRED.STLB_MISS_STORES_PS. Related metrics: tma_dtlb_load, tma_info_bottleneck_memory_data_tlbs",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric roughly estimates how often CPU was handling synchronizations due to False Sharing",
- "MetricExpr": "80 * tma_info_average_frequency * OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM / tma_info_clks",
+ "MetricExpr": "80 * tma_info_system_average_frequency * OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM / tma_info_thread_clks",
"MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_store_bound_group",
"MetricName": "tma_false_sharing",
"MetricThreshold": "tma_false_sharing > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -243,30 +504,32 @@
},
{
"BriefDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed",
- "MetricExpr": "L1D_PEND_MISS.FB_FULL / tma_info_clks",
+ "MetricExpr": "L1D_PEND_MISS.FB_FULL / tma_info_thread_clks",
"MetricGroup": "MemoryBW;TopdownL4;tma_L4_group;tma_issueBW;tma_issueSL;tma_issueSmSt;tma_l1_bound_group",
"MetricName": "tma_fb_full",
"MetricThreshold": "tma_fb_full > 0.3",
- "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_info_dram_bw_use, tma_info_memory_bandwidth, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
+ "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_info_bottleneck_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues",
+ "DefaultMetricgroupName": "TopdownL2",
"MetricExpr": "max(0, tma_frontend_bound - tma_fetch_latency)",
- "MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB",
+ "MetricGroup": "Default;FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB",
"MetricName": "tma_fetch_bandwidth",
- "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 6 > 0.35",
- "MetricgroupNoGroup": "TopdownL2",
- "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Sample with: FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_2_PS. Related metrics: tma_dsb_switches, tma_info_dsb_coverage, tma_info_dsb_misses, tma_info_iptb, tma_lcp",
+ "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 6 > 0.35",
+ "MetricgroupNoGroup": "TopdownL2;Default",
+ "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Sample with: FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_2_PS. Related metrics: tma_dsb_switches, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues",
- "MetricExpr": "topdown\\-fetch\\-lat / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) - INT_MISC.UOP_DROPPING / tma_info_slots",
- "MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group",
+ "DefaultMetricgroupName": "TopdownL2",
+ "MetricExpr": "topdown\\-fetch\\-lat / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) - INT_MISC.UOP_DROPPING / tma_info_thread_slots",
+ "MetricGroup": "Default;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group",
"MetricName": "tma_fetch_latency",
"MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15",
- "MetricgroupNoGroup": "TopdownL2",
+ "MetricgroupNoGroup": "TopdownL2;Default",
"PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues. For example; instruction-cache misses; iTLB misses or fetch stalls after a branch misprediction are categorized under Frontend Latency. In such cases; the Frontend eventually delivers no uops for some period. Sample with: FRONTEND_RETIRED.LATENCY_GE_16_PS;FRONTEND_RETIRED.LATENCY_GE_8_PS",
"ScaleUnit": "100%"
},
@@ -281,7 +544,7 @@
},
{
"BriefDescription": "This metric approximates arithmetic floating-point (FP) matrix uops fraction the CPU has retired (aggregated across all supported FP datatypes in AMX engine)",
- "MetricExpr": "cpu@AMX_OPS_RETIRED.BF16\\,cmask\\=1@ / (tma_retiring * tma_info_slots)",
+ "MetricExpr": "cpu@AMX_OPS_RETIRED.BF16\\,cmask\\=1@ / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Compute;Flops;HPC;Pipeline;Server;TopdownL4;tma_L4_group;tma_fp_arith_group",
"MetricName": "tma_fp_amx",
"MetricThreshold": "tma_fp_amx > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6)",
@@ -290,6 +553,7 @@
},
{
"BriefDescription": "This metric represents overall arithmetic floating-point (FP) operations fraction the CPU has executed (retired)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "tma_x87_use + tma_fp_scalar + tma_fp_vector + tma_fp_amx",
"MetricGroup": "HPC;TopdownL3;tma_L3_group;tma_light_operations_group",
"MetricName": "tma_fp_arith",
@@ -299,7 +563,7 @@
},
{
"BriefDescription": "This metric roughly estimates fraction of slots the CPU retired uops as a result of handing Floating Point (FP) Assists",
- "MetricExpr": "30 * ASSISTS.FP / tma_info_slots",
+ "MetricExpr": "30 * ASSISTS.FP / tma_info_thread_slots",
"MetricGroup": "HPC;TopdownL5;tma_L5_group;tma_assists_group",
"MetricName": "tma_fp_assists",
"MetricThreshold": "tma_fp_assists > 0.1",
@@ -308,7 +572,7 @@
},
{
"BriefDescription": "This metric approximates arithmetic floating-point (FP) scalar uops fraction the CPU has retired",
- "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + FP_ARITH_INST_RETIRED2.SCALAR) / (tma_retiring * tma_info_slots)",
+ "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + FP_ARITH_INST_RETIRED2.SCALAR) / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Compute;Flops;TopdownL4;tma_L4_group;tma_fp_arith_group;tma_issue2P",
"MetricName": "tma_fp_scalar",
"MetricThreshold": "tma_fp_scalar > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6)",
@@ -317,7 +581,7 @@
},
{
"BriefDescription": "This metric approximates arithmetic floating-point (FP) vector uops fraction the CPU has retired aggregated across all vector widths",
- "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0x3c@ + FP_ARITH_INST_RETIRED2.VECTOR) / (tma_retiring * tma_info_slots)",
+ "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0x3c@ + FP_ARITH_INST_RETIRED2.VECTOR) / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Compute;Flops;TopdownL4;tma_L4_group;tma_fp_arith_group;tma_issue2P",
"MetricName": "tma_fp_vector",
"MetricThreshold": "tma_fp_vector > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6)",
@@ -326,7 +590,7 @@
},
{
"BriefDescription": "This metric approximates arithmetic FP vector uops fraction the CPU has retired for 128-bit wide vectors",
- "MetricExpr": "(FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED2.128B_PACKED_HALF) / (tma_retiring * tma_info_slots)",
+ "MetricExpr": "(FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED2.128B_PACKED_HALF) / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Compute;Flops;TopdownL5;tma_L5_group;tma_fp_vector_group;tma_issue2P",
"MetricName": "tma_fp_vector_128b",
"MetricThreshold": "tma_fp_vector_128b > 0.1 & (tma_fp_vector > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6))",
@@ -335,7 +599,7 @@
},
{
"BriefDescription": "This metric approximates arithmetic FP vector uops fraction the CPU has retired for 256-bit wide vectors",
- "MetricExpr": "(FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED2.256B_PACKED_HALF) / (tma_retiring * tma_info_slots)",
+ "MetricExpr": "(FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED2.256B_PACKED_HALF) / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Compute;Flops;TopdownL5;tma_L5_group;tma_fp_vector_group;tma_issue2P",
"MetricName": "tma_fp_vector_256b",
"MetricThreshold": "tma_fp_vector_256b > 0.1 & (tma_fp_vector > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6))",
@@ -344,7 +608,7 @@
},
{
"BriefDescription": "This metric approximates arithmetic FP vector uops fraction the CPU has retired for 512-bit wide vectors",
- "MetricExpr": "(FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE + FP_ARITH_INST_RETIRED2.512B_PACKED_HALF) / (tma_retiring * tma_info_slots)",
+ "MetricExpr": "(FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE + FP_ARITH_INST_RETIRED2.512B_PACKED_HALF) / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Compute;Flops;TopdownL5;tma_L5_group;tma_fp_vector_group;tma_issue2P",
"MetricName": "tma_fp_vector_512b",
"MetricThreshold": "tma_fp_vector_512b > 0.1 & (tma_fp_vector > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6))",
@@ -353,17 +617,18 @@
},
{
"BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend",
- "MetricExpr": "topdown\\-fe\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) - INT_MISC.UOP_DROPPING / tma_info_slots",
- "MetricGroup": "PGO;TmaL1;TopdownL1;tma_L1_group",
+ "DefaultMetricgroupName": "TopdownL1",
+ "MetricExpr": "topdown\\-fe\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) - INT_MISC.UOP_DROPPING / tma_info_thread_slots",
+ "MetricGroup": "Default;PGO;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_frontend_bound",
"MetricThreshold": "tma_frontend_bound > 0.15",
- "MetricgroupNoGroup": "TopdownL1",
+ "MetricgroupNoGroup": "TopdownL1;Default",
"PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Pipeline_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. Sample with: FRONTEND_RETIRED.LATENCY_GE_4_PS",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of slots where the CPU was retiring fused instructions -- where one uop can represent multiple contiguous instructions",
- "MetricExpr": "tma_light_operations * INST_RETIRED.MACRO_FUSED / (tma_retiring * tma_info_slots)",
+ "MetricExpr": "tma_light_operations * INST_RETIRED.MACRO_FUSED / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
"MetricName": "tma_fused_instructions",
"MetricThreshold": "tma_fused_instructions > 0.1 & tma_light_operations > 0.6",
@@ -372,17 +637,18 @@
},
{
"BriefDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences",
- "MetricExpr": "topdown\\-heavy\\-ops / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0 * tma_info_slots",
- "MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group",
+ "DefaultMetricgroupName": "TopdownL2",
+ "MetricExpr": "topdown\\-heavy\\-ops / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0 * tma_info_thread_slots",
+ "MetricGroup": "Default;Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_heavy_operations",
"MetricThreshold": "tma_heavy_operations > 0.1",
- "MetricgroupNoGroup": "TopdownL2",
+ "MetricgroupNoGroup": "TopdownL2;Default",
"PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences. Sample with: UOPS_RETIRED.HEAVY",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to instruction cache misses",
- "MetricExpr": "ICACHE_DATA.STALLS / tma_info_clks",
+ "MetricExpr": "ICACHE_DATA.STALLS / tma_info_thread_clks",
"MetricGroup": "BigFoot;FetchLat;IcMiss;TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_icache_misses",
"MetricThreshold": "tma_icache_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -390,746 +656,754 @@
"ScaleUnit": "100%"
},
{
- "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
- "MetricExpr": "tma_info_turbo_utilization * TSC / 1e9 / duration_time",
- "MetricGroup": "Power;Summary",
- "MetricName": "tma_info_average_frequency"
+ "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_thread_slots / BR_MISP_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;BrMispredicts;tma_issueBM",
+ "MetricName": "tma_info_bad_spec_branch_misprediction_cost",
+ "PublicDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear). Related metrics: tma_branch_mispredicts, tma_info_bottleneck_mispredictions, tma_mispredicts_resteers"
+ },
+ {
+ "BriefDescription": "Instructions per retired mispredicts for conditional non-taken branches (lower number means higher occurrence rate).",
+ "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.COND_NTAKEN",
+ "MetricGroup": "Bad;BrMispredicts",
+ "MetricName": "tma_info_bad_spec_ipmisp_cond_ntaken",
+ "MetricThreshold": "tma_info_bad_spec_ipmisp_cond_ntaken < 200"
+ },
+ {
+ "BriefDescription": "Instructions per retired mispredicts for conditional taken branches (lower number means higher occurrence rate).",
+ "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.COND_TAKEN",
+ "MetricGroup": "Bad;BrMispredicts",
+ "MetricName": "tma_info_bad_spec_ipmisp_cond_taken",
+ "MetricThreshold": "tma_info_bad_spec_ipmisp_cond_taken < 200"
+ },
+ {
+ "BriefDescription": "Instructions per retired mispredicts for indirect CALL or JMP branches (lower number means higher occurrence rate).",
+ "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.INDIRECT",
+ "MetricGroup": "Bad;BrMispredicts",
+ "MetricName": "tma_info_bad_spec_ipmisp_indirect",
+ "MetricThreshold": "tma_info_bad_spec_ipmisp_indirect < 1e3"
+ },
+ {
+ "BriefDescription": "Instructions per retired mispredicts for return branches (lower number means higher occurrence rate).",
+ "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.RET",
+ "MetricGroup": "Bad;BrMispredicts",
+ "MetricName": "tma_info_bad_spec_ipmisp_ret",
+ "MetricThreshold": "tma_info_bad_spec_ipmisp_ret < 500"
+ },
+ {
+ "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear) (lower number means higher occurrence rate)",
+ "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;BadSpec;BrMispredicts",
+ "MetricName": "tma_info_bad_spec_ipmispredict",
+ "MetricThreshold": "tma_info_bad_spec_ipmispredict < 200"
+ },
+ {
+ "BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "(100 * (1 - tma_core_bound / tma_ports_utilization if tma_core_bound < tma_ports_utilization else 1) if tma_info_system_smt_2t_utilization > 0.5 else 0)",
+ "MetricGroup": "Cor;SMT",
+ "MetricName": "tma_info_botlnk_l0_core_bound_likely",
+ "MetricThreshold": "tma_info_botlnk_l0_core_bound_likely > 0.5"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_fetch_latency * tma_dsb_switches / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_mite / (tma_dsb + tma_mite))",
+ "MetricGroup": "DSBmiss;Fed;tma_issueFB",
+ "MetricName": "tma_info_botlnk_l2_dsb_misses",
+ "MetricThreshold": "tma_info_botlnk_l2_dsb_misses > 10",
+ "PublicDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck",
+ "MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
+ "MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL",
+ "MetricName": "tma_info_botlnk_l2_ic_misses",
+ "MetricThreshold": "tma_info_botlnk_l2_ic_misses > 5",
+ "PublicDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck. Related metrics: "
},
{
"BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)",
"MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * tma_fetch_latency * (tma_itlb_misses + tma_icache_misses + tma_unknown_branches) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)",
"MetricGroup": "BigFoot;Fed;Frontend;IcMiss;MemoryTLB;tma_issueBC",
- "MetricName": "tma_info_big_code",
- "MetricThreshold": "tma_info_big_code > 20",
- "PublicDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses). Related metrics: tma_info_branching_overhead"
+ "MetricName": "tma_info_bottleneck_big_code",
+ "MetricThreshold": "tma_info_bottleneck_big_code > 20",
+ "PublicDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses). Related metrics: tma_info_bottleneck_branching_overhead"
},
{
- "BriefDescription": "Branch instructions per taken branch.",
- "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
- "MetricGroup": "Branches;Fed;PGO",
- "MetricName": "tma_info_bptkbranch"
+ "BriefDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls)",
+ "MetricExpr": "100 * ((BR_INST_RETIRED.COND + 3 * BR_INST_RETIRED.NEAR_CALL + (BR_INST_RETIRED.NEAR_TAKEN - BR_INST_RETIRED.COND_TAKEN - 2 * BR_INST_RETIRED.NEAR_CALL)) / tma_info_thread_slots)",
+ "MetricGroup": "Ret;tma_issueBC",
+ "MetricName": "tma_info_bottleneck_branching_overhead",
+ "MetricThreshold": "tma_info_bottleneck_branching_overhead > 10",
+ "PublicDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls). Related metrics: tma_info_bottleneck_big_code"
},
{
- "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
- "MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_slots / BR_MISP_RETIRED.ALL_BRANCHES",
- "MetricGroup": "Bad;BrMispredicts;tma_issueBM",
- "MetricName": "tma_info_branch_misprediction_cost",
- "PublicDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear). Related metrics: tma_branch_mispredicts, tma_info_mispredictions, tma_mispredicts_resteers"
+ "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_frontend_bound - tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_info_bottleneck_big_code",
+ "MetricGroup": "Fed;FetchBW;Frontend",
+ "MetricName": "tma_info_bottleneck_instruction_fetch_bw",
+ "MetricThreshold": "tma_info_bottleneck_instruction_fetch_bw > 20"
},
{
- "BriefDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls)",
- "MetricExpr": "100 * ((BR_INST_RETIRED.COND + 3 * BR_INST_RETIRED.NEAR_CALL + (BR_INST_RETIRED.NEAR_TAKEN - BR_INST_RETIRED.COND_TAKEN - 2 * BR_INST_RETIRED.NEAR_CALL)) / tma_info_slots)",
- "MetricGroup": "Ret;tma_issueBC",
- "MetricName": "tma_info_branching_overhead",
- "MetricThreshold": "tma_info_branching_overhead > 10",
- "PublicDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls). Related metrics: tma_info_big_code"
+ "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
+ "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_fb_full / (tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))",
+ "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
+ "MetricName": "tma_info_bottleneck_memory_bandwidth",
+ "MetricThreshold": "tma_info_bottleneck_memory_bandwidth > 20",
+ "PublicDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full"
},
{
- "BriefDescription": "Fraction of branches that are CALL or RET",
- "MetricExpr": "(BR_INST_RETIRED.NEAR_CALL + BR_INST_RETIRED.NEAR_RETURN) / BR_INST_RETIRED.ALL_BRANCHES",
- "MetricGroup": "Bad;Branches",
- "MetricName": "tma_info_callret"
+ "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_dtlb_load / max(tma_l1_bound, tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
+ "MetricGroup": "Mem;MemoryTLB;Offcore;tma_issueTLB",
+ "MetricName": "tma_info_bottleneck_memory_data_tlbs",
+ "MetricThreshold": "tma_info_bottleneck_memory_data_tlbs > 20",
+ "PublicDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs). Related metrics: tma_dtlb_load, tma_dtlb_store"
},
{
- "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
- "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
- "MetricGroup": "Pipeline",
- "MetricName": "tma_info_clks"
+ "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound))",
+ "MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat",
+ "MetricName": "tma_info_bottleneck_memory_latency",
+ "MetricThreshold": "tma_info_bottleneck_memory_latency > 20",
+ "PublicDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches). Related metrics: tma_l3_hit_latency, tma_mem_latency"
},
{
- "BriefDescription": "STLB (2nd level TLB) code speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
- "MetricExpr": "1e3 * ITLB_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
- "MetricGroup": "Fed;MemoryTLB",
- "MetricName": "tma_info_code_stlb_mpki"
+ "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
+ "MetricGroup": "Bad;BadSpec;BrMispredicts;tma_issueBM",
+ "MetricName": "tma_info_bottleneck_mispredictions",
+ "MetricThreshold": "tma_info_bottleneck_mispredictions > 20",
+ "PublicDescription": "Total pipeline cost of Branch Misprediction related bottlenecks. Related metrics: tma_branch_mispredicts, tma_info_bad_spec_branch_misprediction_cost, tma_mispredicts_resteers"
+ },
+ {
+ "BriefDescription": "Fraction of branches that are CALL or RET",
+ "MetricExpr": "(BR_INST_RETIRED.NEAR_CALL + BR_INST_RETIRED.NEAR_RETURN) / BR_INST_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;Branches",
+ "MetricName": "tma_info_branches_callret"
},
{
"BriefDescription": "Fraction of branches that are non-taken conditionals",
"MetricExpr": "BR_INST_RETIRED.COND_NTAKEN / BR_INST_RETIRED.ALL_BRANCHES",
"MetricGroup": "Bad;Branches;CodeGen;PGO",
- "MetricName": "tma_info_cond_nt"
+ "MetricName": "tma_info_branches_cond_nt"
},
{
"BriefDescription": "Fraction of branches that are taken conditionals",
"MetricExpr": "BR_INST_RETIRED.COND_TAKEN / BR_INST_RETIRED.ALL_BRANCHES",
"MetricGroup": "Bad;Branches;CodeGen;PGO",
- "MetricName": "tma_info_cond_tk"
+ "MetricName": "tma_info_branches_cond_tk"
},
{
- "BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts",
- "MetricExpr": "(100 * (1 - tma_core_bound / tma_ports_utilization if tma_core_bound < tma_ports_utilization else 1) if tma_info_smt_2t_utilization > 0.5 else 0)",
- "MetricGroup": "Cor;SMT",
- "MetricName": "tma_info_core_bound_likely",
- "MetricThreshold": "tma_info_core_bound_likely > 0.5"
+ "BriefDescription": "Fraction of branches that are unconditional (direct or indirect) jumps",
+ "MetricExpr": "(BR_INST_RETIRED.NEAR_TAKEN - BR_INST_RETIRED.COND_TAKEN - 2 * BR_INST_RETIRED.NEAR_CALL) / BR_INST_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;Branches",
+ "MetricName": "tma_info_branches_jump"
+ },
+ {
+ "BriefDescription": "Fraction of branches of other types (not individually covered by other metrics in Info.Branches group)",
+ "MetricExpr": "1 - (tma_info_branches_cond_nt + tma_info_branches_cond_tk + tma_info_branches_callret + tma_info_branches_jump)",
+ "MetricGroup": "Bad;Branches",
+ "MetricName": "tma_info_branches_other_branches"
},
{
"BriefDescription": "Core actual clocks when any Logical Processor is active on the Physical Core",
"MetricExpr": "CPU_CLK_UNHALTED.DISTRIBUTED",
"MetricGroup": "SMT",
- "MetricName": "tma_info_core_clks"
+ "MetricName": "tma_info_core_core_clks"
},
{
"BriefDescription": "Instructions Per Cycle across hyper-threads (per physical core)",
- "MetricExpr": "INST_RETIRED.ANY / tma_info_core_clks",
+ "MetricExpr": "INST_RETIRED.ANY / tma_info_core_core_clks",
"MetricGroup": "Ret;SMT;TmaL1;tma_L1_group",
- "MetricName": "tma_info_coreipc"
- },
- {
- "BriefDescription": "Cycles Per Instruction (per Logical Processor)",
- "MetricExpr": "1 / tma_info_ipc",
- "MetricGroup": "Mem;Pipeline",
- "MetricName": "tma_info_cpi"
+ "MetricName": "tma_info_core_coreipc"
},
{
- "BriefDescription": "Average CPU Utilization",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
- "MetricGroup": "HPC;Summary",
- "MetricName": "tma_info_cpu_utilization"
+ "BriefDescription": "Floating Point Operations Per Cycle",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + FP_ARITH_INST_RETIRED2.SCALAR_HALF + 2 * (FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED2.COMPLEX_SCALAR_HALF) + 4 * cpu@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE\\,umask\\=0x18@ + 8 * (FP_ARITH_INST_RETIRED2.128B_PACKED_HALF + cpu@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE\\,umask\\=0x60@) + 16 * (FP_ARITH_INST_RETIRED2.256B_PACKED_HALF + FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) + 32 * FP_ARITH_INST_RETIRED2.512B_PACKED_HALF + 4 * AMX_OPS_RETIRED.BF16",
+ "MetricGroup": "Flops;Ret",
+ "MetricName": "tma_info_core_flopc"
},
{
- "BriefDescription": "Average Parallel L2 cache miss data reads",
- "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
- "MetricGroup": "Memory_BW;Offcore",
- "MetricName": "tma_info_data_l2_mlp"
+ "BriefDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "(FP_ARITH_DISPATCHED.PORT_0 + FP_ARITH_DISPATCHED.PORT_1 + FP_ARITH_DISPATCHED.PORT_5) / (2 * tma_info_core_core_clks)",
+ "MetricGroup": "Cor;Flops;HPC",
+ "MetricName": "tma_info_core_fp_arith_utilization",
+ "PublicDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width). Values > 1 are possible due to ([BDW+] Fused-Multiply Add (FMA) counting - common; [ADL+] use all of ADD/MUL/FMA in Scalar or 128/256-bit vectors - less common)."
},
{
- "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
- "MetricExpr": "64 * (UNC_M_CAS_COUNT.RD + UNC_M_CAS_COUNT.WR) / 1e9 / duration_time",
- "MetricGroup": "HPC;Mem;MemoryBW;SoC;tma_issueBW",
- "MetricName": "tma_info_dram_bw_use",
- "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_fb_full, tma_info_memory_bandwidth, tma_mem_bandwidth, tma_sq_full"
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / (UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)",
+ "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
+ "MetricName": "tma_info_core_ilp"
},
{
"BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
"MetricExpr": "IDQ.DSB_UOPS / UOPS_ISSUED.ANY",
"MetricGroup": "DSB;Fed;FetchBW;tma_issueFB",
- "MetricName": "tma_info_dsb_coverage",
- "MetricThreshold": "tma_info_dsb_coverage < 0.7 & tma_info_ipc / 6 > 0.35",
- "PublicDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache). Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_dsb_misses, tma_info_iptb, tma_lcp"
- },
- {
- "BriefDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck",
- "MetricExpr": "100 * (tma_fetch_latency * tma_dsb_switches / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_mite / (tma_dsb + tma_mite))",
- "MetricGroup": "DSBmiss;Fed;tma_issueFB",
- "MetricName": "tma_info_dsb_misses",
- "MetricThreshold": "tma_info_dsb_misses > 10",
- "PublicDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_dsb_coverage, tma_info_iptb, tma_lcp"
+ "MetricName": "tma_info_frontend_dsb_coverage",
+ "MetricThreshold": "tma_info_frontend_dsb_coverage < 0.7 & tma_info_thread_ipc / 6 > 0.35",
+ "PublicDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache). Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_botlnk_l2_dsb_misses, tma_info_inst_mix_iptb, tma_lcp"
},
{
"BriefDescription": "Average number of cycles of a switch from the DSB fetch-unit to MITE fetch unit - see DSB_Switches tree node for details.",
"MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / cpu@DSB2MITE_SWITCHES.PENALTY_CYCLES\\,cmask\\=1\\,edge@",
"MetricGroup": "DSBmiss",
- "MetricName": "tma_info_dsb_switch_cost"
- },
- {
- "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-thread",
- "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
- "MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
- "MetricName": "tma_info_execute"
- },
- {
- "BriefDescription": "The ratio of Executed- by Issued-Uops",
- "MetricExpr": "UOPS_EXECUTED.THREAD / UOPS_ISSUED.ANY",
- "MetricGroup": "Cor;Pipeline",
- "MetricName": "tma_info_execute_per_issue",
- "PublicDescription": "The ratio of Executed- by Issued-Uops. Ratio > 1 suggests high rate of uop micro-fusions. Ratio < 1 suggest high rate of \"execute\" at rename stage."
- },
- {
- "BriefDescription": "Fill Buffer (FB) hits per kilo instructions for retired demand loads (L1D misses that merge into ongoing miss-handling entries)",
- "MetricExpr": "1e3 * MEM_LOAD_RETIRED.FB_HIT / INST_RETIRED.ANY",
- "MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_fb_hpki"
+ "MetricName": "tma_info_frontend_dsb_switch_cost"
},
{
"BriefDescription": "Average number of Uops issued by front-end when it issued something",
"MetricExpr": "UOPS_ISSUED.ANY / cpu@UOPS_ISSUED.ANY\\,cmask\\=1@",
"MetricGroup": "Fed;FetchBW",
- "MetricName": "tma_info_fetch_upc"
+ "MetricName": "tma_info_frontend_fetch_upc"
},
{
- "BriefDescription": "Floating Point Operations Per Cycle",
- "MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + FP_ARITH_INST_RETIRED2.SCALAR_HALF + 2 * (FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED2.COMPLEX_SCALAR_HALF) + 4 * cpu@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE\\,umask\\=0x18@ + 8 * (FP_ARITH_INST_RETIRED2.128B_PACKED_HALF + cpu@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE\\,umask\\=0x60@) + 16 * (FP_ARITH_INST_RETIRED2.256B_PACKED_HALF + FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) + 32 * FP_ARITH_INST_RETIRED2.512B_PACKED_HALF + 4 * AMX_OPS_RETIRED.BF16",
- "MetricGroup": "Flops;Ret",
- "MetricName": "tma_info_flopc"
- },
- {
- "BriefDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width)",
- "MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(FP_ARITH_DISPATCHED.PORT_0 + FP_ARITH_DISPATCHED.PORT_1 + FP_ARITH_DISPATCHED.PORT_5) / (2 * tma_info_core_clks)",
- "MetricGroup": "Cor;Flops;HPC",
- "MetricName": "tma_info_fp_arith_utilization",
- "PublicDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width). Values > 1 are possible due to ([BDW+] Fused-Multiply Add (FMA) counting - common; [ADL+] use all of ADD/MUL/FMA in Scalar or 128/256-bit vectors - less common)."
+ "BriefDescription": "Average Latency for L1 instruction cache misses",
+ "MetricExpr": "ICACHE_DATA.STALLS / cpu@ICACHE_DATA.STALLS\\,cmask\\=1\\,edge@",
+ "MetricGroup": "Fed;FetchLat;IcMiss",
+ "MetricName": "tma_info_frontend_icache_miss_latency"
},
{
- "BriefDescription": "Giga Floating Point Operations Per Second",
- "MetricExpr": "tma_info_flopc / duration_time",
- "MetricGroup": "Cor;Flops;HPC",
- "MetricName": "tma_info_gflops",
- "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width and AMX engine."
+ "BriefDescription": "Instructions per non-speculative DSB miss (lower number means higher occurrence rate)",
+ "MetricExpr": "INST_RETIRED.ANY / FRONTEND_RETIRED.ANY_DSB_MISS",
+ "MetricGroup": "DSBmiss;Fed",
+ "MetricName": "tma_info_frontend_ipdsb_miss_ret",
+ "MetricThreshold": "tma_info_frontend_ipdsb_miss_ret < 50"
},
{
- "BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck",
- "MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
- "MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL",
- "MetricName": "tma_info_ic_misses",
- "MetricThreshold": "tma_info_ic_misses > 5",
- "PublicDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck. Related metrics: "
+ "BriefDescription": "Instructions per speculative Unknown Branch Misprediction (BAClear) (lower number means higher occurrence rate)",
+ "MetricExpr": "tma_info_inst_mix_instructions / BACLEARS.ANY",
+ "MetricGroup": "Fed",
+ "MetricName": "tma_info_frontend_ipunknown_branch"
},
{
- "BriefDescription": "Average Latency for L1 instruction cache misses",
- "MetricExpr": "ICACHE_DATA.STALLS / cpu@ICACHE_DATA.STALLS\\,cmask\\=1\\,edge@",
- "MetricGroup": "Fed;FetchLat;IcMiss",
- "MetricName": "tma_info_icache_miss_latency"
+ "BriefDescription": "L2 cache true code cacheline misses per kilo instruction",
+ "MetricExpr": "1e3 * FRONTEND_RETIRED.L2_MISS / INST_RETIRED.ANY",
+ "MetricGroup": "IcMiss",
+ "MetricName": "tma_info_frontend_l2mpki_code"
},
{
- "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
- "MetricExpr": "UOPS_EXECUTED.THREAD / (UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)",
- "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
- "MetricName": "tma_info_ilp"
+ "BriefDescription": "L2 cache speculative code cacheline misses per kilo instruction",
+ "MetricExpr": "1e3 * L2_RQSTS.CODE_RD_MISS / INST_RETIRED.ANY",
+ "MetricGroup": "IcMiss",
+ "MetricName": "tma_info_frontend_l2mpki_code_all"
},
{
- "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks",
- "MetricExpr": "100 * (tma_frontend_bound - tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_info_big_code",
- "MetricGroup": "Fed;FetchBW;Frontend",
- "MetricName": "tma_info_instruction_fetch_bw",
- "MetricThreshold": "tma_info_instruction_fetch_bw > 20"
+ "BriefDescription": "Branch instructions per taken branch.",
+ "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
+ "MetricGroup": "Branches;Fed;PGO",
+ "MetricName": "tma_info_inst_mix_bptkbranch"
},
{
"BriefDescription": "Total number of retired Instructions",
"MetricExpr": "INST_RETIRED.ANY",
"MetricGroup": "Summary;TmaL1;tma_L1_group",
- "MetricName": "tma_info_instructions",
+ "MetricName": "tma_info_inst_mix_instructions",
"PublicDescription": "Total number of retired Instructions. Sample with: INST_RETIRED.PREC_DIST"
},
{
- "BriefDescription": "Average IO (network or disk) Bandwidth Use for Writes [GB / sec]",
- "MetricExpr": "UNC_CHA_TOR_INSERTS.IO_PCIRDCUR * 64 / 1e9 / duration_time",
- "MetricGroup": "IoBW;Mem;Server;SoC",
- "MetricName": "tma_info_io_write_bw"
- },
- {
"BriefDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / (cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + FP_ARITH_INST_RETIRED2.SCALAR + (cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0x3c@ + FP_ARITH_INST_RETIRED2.VECTOR))",
"MetricGroup": "Flops;InsType",
- "MetricName": "tma_info_iparith",
- "MetricThreshold": "tma_info_iparith < 10",
+ "MetricName": "tma_info_inst_mix_iparith",
+ "MetricThreshold": "tma_info_inst_mix_iparith < 10",
"PublicDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate). May undercount due to FMA double counting. Approximated prior to BDW."
},
{
"BriefDescription": "Instructions per FP Arithmetic AMX operation (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / AMX_OPS_RETIRED.BF16",
"MetricGroup": "Flops;FpVector;InsType;Server",
- "MetricName": "tma_info_iparith_amx_f16",
- "MetricThreshold": "tma_info_iparith_amx_f16 < 10",
+ "MetricName": "tma_info_inst_mix_iparith_amx_f16",
+ "MetricThreshold": "tma_info_inst_mix_iparith_amx_f16 < 10",
"PublicDescription": "Instructions per FP Arithmetic AMX operation (lower number means higher occurrence rate). Operations factored per matrices' sizes of the AMX instructions."
},
{
"BriefDescription": "Instructions per Integer Arithmetic AMX operation (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / AMX_OPS_RETIRED.INT8",
"MetricGroup": "InsType;IntVector;Server",
- "MetricName": "tma_info_iparith_amx_int8",
- "MetricThreshold": "tma_info_iparith_amx_int8 < 10",
+ "MetricName": "tma_info_inst_mix_iparith_amx_int8",
+ "MetricThreshold": "tma_info_inst_mix_iparith_amx_int8 < 10",
"PublicDescription": "Instructions per Integer Arithmetic AMX operation (lower number means higher occurrence rate). Operations factored per matrices' sizes of the AMX instructions."
},
{
"BriefDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED2.128B_PACKED_HALF)",
"MetricGroup": "Flops;FpVector;InsType",
- "MetricName": "tma_info_iparith_avx128",
- "MetricThreshold": "tma_info_iparith_avx128 < 10",
+ "MetricName": "tma_info_inst_mix_iparith_avx128",
+ "MetricThreshold": "tma_info_inst_mix_iparith_avx128 < 10",
"PublicDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
},
{
"BriefDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED2.256B_PACKED_HALF)",
"MetricGroup": "Flops;FpVector;InsType",
- "MetricName": "tma_info_iparith_avx256",
- "MetricThreshold": "tma_info_iparith_avx256 < 10",
+ "MetricName": "tma_info_inst_mix_iparith_avx256",
+ "MetricThreshold": "tma_info_inst_mix_iparith_avx256 < 10",
"PublicDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
},
{
"BriefDescription": "Instructions per FP Arithmetic AVX 512-bit instruction (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE + FP_ARITH_INST_RETIRED2.512B_PACKED_HALF)",
"MetricGroup": "Flops;FpVector;InsType",
- "MetricName": "tma_info_iparith_avx512",
- "MetricThreshold": "tma_info_iparith_avx512 < 10",
+ "MetricName": "tma_info_inst_mix_iparith_avx512",
+ "MetricThreshold": "tma_info_inst_mix_iparith_avx512 < 10",
"PublicDescription": "Instructions per FP Arithmetic AVX 512-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
},
{
"BriefDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
"MetricGroup": "Flops;FpScalar;InsType",
- "MetricName": "tma_info_iparith_scalar_dp",
- "MetricThreshold": "tma_info_iparith_scalar_dp < 10",
+ "MetricName": "tma_info_inst_mix_iparith_scalar_dp",
+ "MetricThreshold": "tma_info_inst_mix_iparith_scalar_dp < 10",
"PublicDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
},
{
"BriefDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
"MetricGroup": "Flops;FpScalar;InsType",
- "MetricName": "tma_info_iparith_scalar_sp",
- "MetricThreshold": "tma_info_iparith_scalar_sp < 10",
+ "MetricName": "tma_info_inst_mix_iparith_scalar_sp",
+ "MetricThreshold": "tma_info_inst_mix_iparith_scalar_sp < 10",
"PublicDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
},
{
- "BriefDescription": "Instructions per a microcode Assist invocation",
- "MetricExpr": "INST_RETIRED.ANY / cpu@ASSISTS.ANY\\,umask\\=0x1B@",
- "MetricGroup": "Pipeline;Ret;Retire",
- "MetricName": "tma_info_ipassist",
- "MetricThreshold": "tma_info_ipassist < 100e3",
- "PublicDescription": "Instructions per a microcode Assist invocation. See Assists tree node for details (lower number means higher occurrence rate)"
- },
- {
"BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
"MetricGroup": "Branches;Fed;InsType",
- "MetricName": "tma_info_ipbranch",
- "MetricThreshold": "tma_info_ipbranch < 8"
- },
- {
- "BriefDescription": "Instructions Per Cycle (per Logical Processor)",
- "MetricExpr": "INST_RETIRED.ANY / tma_info_clks",
- "MetricGroup": "Ret;Summary",
- "MetricName": "tma_info_ipc"
+ "MetricName": "tma_info_inst_mix_ipbranch",
+ "MetricThreshold": "tma_info_inst_mix_ipbranch < 8"
},
{
"BriefDescription": "Instructions per (near) call (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL",
"MetricGroup": "Branches;Fed;PGO",
- "MetricName": "tma_info_ipcall",
- "MetricThreshold": "tma_info_ipcall < 200"
- },
- {
- "BriefDescription": "Instructions per non-speculative DSB miss (lower number means higher occurrence rate)",
- "MetricExpr": "INST_RETIRED.ANY / FRONTEND_RETIRED.ANY_DSB_MISS",
- "MetricGroup": "DSBmiss;Fed",
- "MetricName": "tma_info_ipdsb_miss_ret",
- "MetricThreshold": "tma_info_ipdsb_miss_ret < 50"
- },
- {
- "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
- "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u",
- "MetricGroup": "Branches;OS",
- "MetricName": "tma_info_ipfarbranch",
- "MetricThreshold": "tma_info_ipfarbranch < 1e6"
+ "MetricName": "tma_info_inst_mix_ipcall",
+ "MetricThreshold": "tma_info_inst_mix_ipcall < 200"
},
{
"BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)",
- "MetricExpr": "INST_RETIRED.ANY / tma_info_flopc",
+ "MetricExpr": "INST_RETIRED.ANY / tma_info_core_flopc",
"MetricGroup": "Flops;InsType",
- "MetricName": "tma_info_ipflop",
- "MetricThreshold": "tma_info_ipflop < 10"
+ "MetricName": "tma_info_inst_mix_ipflop",
+ "MetricThreshold": "tma_info_inst_mix_ipflop < 10"
},
{
"BriefDescription": "Instructions per Load (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_LOADS",
"MetricGroup": "InsType",
- "MetricName": "tma_info_ipload",
- "MetricThreshold": "tma_info_ipload < 3"
- },
- {
- "BriefDescription": "Instructions per retired mispredicts for conditional non-taken branches (lower number means higher occurrence rate).",
- "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.COND_NTAKEN",
- "MetricGroup": "Bad;BrMispredicts",
- "MetricName": "tma_info_ipmisp_cond_ntaken",
- "MetricThreshold": "tma_info_ipmisp_cond_ntaken < 200"
- },
- {
- "BriefDescription": "Instructions per retired mispredicts for conditional taken branches (lower number means higher occurrence rate).",
- "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.COND_TAKEN",
- "MetricGroup": "Bad;BrMispredicts",
- "MetricName": "tma_info_ipmisp_cond_taken",
- "MetricThreshold": "tma_info_ipmisp_cond_taken < 200"
- },
- {
- "BriefDescription": "Instructions per retired mispredicts for indirect CALL or JMP branches (lower number means higher occurrence rate).",
- "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.INDIRECT",
- "MetricGroup": "Bad;BrMispredicts",
- "MetricName": "tma_info_ipmisp_indirect",
- "MetricThreshold": "tma_info_ipmisp_indirect < 1e3"
- },
- {
- "BriefDescription": "Instructions per retired mispredicts for return branches (lower number means higher occurrence rate).",
- "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.RET",
- "MetricGroup": "Bad;BrMispredicts",
- "MetricName": "tma_info_ipmisp_ret",
- "MetricThreshold": "tma_info_ipmisp_ret < 500"
- },
- {
- "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear) (lower number means higher occurrence rate)",
- "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
- "MetricGroup": "Bad;BadSpec;BrMispredicts",
- "MetricName": "tma_info_ipmispredict",
- "MetricThreshold": "tma_info_ipmispredict < 200"
+ "MetricName": "tma_info_inst_mix_ipload",
+ "MetricThreshold": "tma_info_inst_mix_ipload < 3"
},
{
"BriefDescription": "Instructions per Store (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_STORES",
"MetricGroup": "InsType",
- "MetricName": "tma_info_ipstore",
- "MetricThreshold": "tma_info_ipstore < 8"
+ "MetricName": "tma_info_inst_mix_ipstore",
+ "MetricThreshold": "tma_info_inst_mix_ipstore < 8"
},
{
"BriefDescription": "Instructions per Software prefetch instruction (of any type: NTA/T0/T1/T2/Prefetch) (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / cpu@SW_PREFETCH_ACCESS.T0\\,umask\\=0xF@",
"MetricGroup": "Prefetches",
- "MetricName": "tma_info_ipswpf",
- "MetricThreshold": "tma_info_ipswpf < 100"
+ "MetricName": "tma_info_inst_mix_ipswpf",
+ "MetricThreshold": "tma_info_inst_mix_ipswpf < 100"
},
{
"BriefDescription": "Instruction per taken branch",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN",
"MetricGroup": "Branches;Fed;FetchBW;Frontend;PGO;tma_issueFB",
- "MetricName": "tma_info_iptb",
- "MetricThreshold": "tma_info_iptb < 13",
- "PublicDescription": "Instruction per taken branch. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_dsb_coverage, tma_info_dsb_misses, tma_lcp"
+ "MetricName": "tma_info_inst_mix_iptb",
+ "MetricThreshold": "tma_info_inst_mix_iptb < 13",
+ "PublicDescription": "Instruction per taken branch. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_lcp"
},
{
- "BriefDescription": "Instructions per speculative Unknown Branch Misprediction (BAClear) (lower number means higher occurrence rate)",
- "MetricExpr": "tma_info_instructions / BACLEARS.ANY",
- "MetricGroup": "Fed",
- "MetricName": "tma_info_ipunknown_branch"
+ "BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
+ "MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / duration_time",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_core_l1d_cache_fill_bw"
},
{
- "BriefDescription": "Fraction of branches that are unconditional (direct or indirect) jumps",
- "MetricExpr": "(BR_INST_RETIRED.NEAR_TAKEN - BR_INST_RETIRED.COND_TAKEN - 2 * BR_INST_RETIRED.NEAR_CALL) / BR_INST_RETIRED.ALL_BRANCHES",
- "MetricGroup": "Bad;Branches",
- "MetricName": "tma_info_jump"
+ "BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
+ "MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / duration_time",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_core_l2_cache_fill_bw"
},
{
- "BriefDescription": "Cycles Per Instruction for the Operating System (OS) Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / INST_RETIRED.ANY_P:k",
- "MetricGroup": "OS",
- "MetricName": "tma_info_kernel_cpi"
+ "BriefDescription": "Rate of non silent evictions from the L2 cache per Kilo instruction",
+ "MetricExpr": "1e3 * L2_LINES_OUT.NON_SILENT / tma_info_inst_mix_instructions",
+ "MetricGroup": "L2Evicts;Mem;Server",
+ "MetricName": "tma_info_memory_core_l2_evictions_nonsilent_pki"
},
{
- "BriefDescription": "Fraction of cycles spent in the Operating System (OS) Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / CPU_CLK_UNHALTED.THREAD",
- "MetricGroup": "OS",
- "MetricName": "tma_info_kernel_utilization",
- "MetricThreshold": "tma_info_kernel_utilization > 0.05"
+ "BriefDescription": "Rate of silent evictions from the L2 cache per Kilo instruction where the evicted lines are dropped (no writeback to L3 or memory)",
+ "MetricExpr": "1e3 * L2_LINES_OUT.SILENT / tma_info_inst_mix_instructions",
+ "MetricGroup": "L2Evicts;Mem;Server",
+ "MetricName": "tma_info_memory_core_l2_evictions_silent_pki"
},
{
- "BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
- "MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / duration_time",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l1d_cache_fill_bw"
+ "BriefDescription": "Average per-core data access bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1e9 / duration_time",
+ "MetricGroup": "Mem;MemoryBW;Offcore",
+ "MetricName": "tma_info_memory_core_l3_cache_access_bw"
},
{
- "BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
- "MetricExpr": "tma_info_l1d_cache_fill_bw",
+ "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
"MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l1d_cache_fill_bw_1t"
+ "MetricName": "tma_info_memory_core_l3_cache_fill_bw"
+ },
+ {
+ "BriefDescription": "Fill Buffer (FB) hits per kilo instructions for retired demand loads (L1D misses that merge into ongoing miss-handling entries)",
+ "MetricExpr": "1e3 * MEM_LOAD_RETIRED.FB_HIT / INST_RETIRED.ANY",
+ "MetricGroup": "CacheMisses;Mem",
+ "MetricName": "tma_info_memory_fb_hpki"
},
{
"BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
"MetricExpr": "1e3 * MEM_LOAD_RETIRED.L1_MISS / INST_RETIRED.ANY",
"MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l1mpki"
+ "MetricName": "tma_info_memory_l1mpki"
},
{
"BriefDescription": "L1 cache true misses per kilo instruction for all demand loads (including speculative)",
"MetricExpr": "1e3 * L2_RQSTS.ALL_DEMAND_DATA_RD / INST_RETIRED.ANY",
"MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l1mpki_load"
- },
- {
- "BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
- "MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / duration_time",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l2_cache_fill_bw"
- },
- {
- "BriefDescription": "Average per-thread data fill bandwidth to the L2 cache [GB / sec]",
- "MetricExpr": "tma_info_l2_cache_fill_bw",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l2_cache_fill_bw_1t"
- },
- {
- "BriefDescription": "Rate of non silent evictions from the L2 cache per Kilo instruction",
- "MetricExpr": "1e3 * L2_LINES_OUT.NON_SILENT / tma_info_instructions",
- "MetricGroup": "L2Evicts;Mem;Server",
- "MetricName": "tma_info_l2_evictions_nonsilent_pki"
- },
- {
- "BriefDescription": "Rate of silent evictions from the L2 cache per Kilo instruction where the evicted lines are dropped (no writeback to L3 or memory)",
- "MetricExpr": "1e3 * L2_LINES_OUT.SILENT / tma_info_instructions",
- "MetricGroup": "L2Evicts;Mem;Server",
- "MetricName": "tma_info_l2_evictions_silent_pki"
+ "MetricName": "tma_info_memory_l1mpki_load"
},
{
"BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)",
"MetricExpr": "1e3 * (L2_RQSTS.REFERENCES - L2_RQSTS.MISS) / INST_RETIRED.ANY",
"MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l2hpki_all"
+ "MetricName": "tma_info_memory_l2hpki_all"
},
{
"BriefDescription": "L2 cache hits per kilo instruction for all demand loads (including speculative)",
"MetricExpr": "1e3 * L2_RQSTS.DEMAND_DATA_RD_HIT / INST_RETIRED.ANY",
"MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l2hpki_load"
+ "MetricName": "tma_info_memory_l2hpki_load"
},
{
"BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads",
"MetricExpr": "1e3 * MEM_LOAD_RETIRED.L2_MISS / INST_RETIRED.ANY",
"MetricGroup": "Backend;CacheMisses;Mem",
- "MetricName": "tma_info_l2mpki"
+ "MetricName": "tma_info_memory_l2mpki"
},
{
"BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all request types (including speculative)",
"MetricExpr": "1e3 * L2_RQSTS.MISS / INST_RETIRED.ANY",
"MetricGroup": "CacheMisses;Mem;Offcore",
- "MetricName": "tma_info_l2mpki_all"
- },
- {
- "BriefDescription": "L2 cache true code cacheline misses per kilo instruction",
- "MetricExpr": "1e3 * FRONTEND_RETIRED.L2_MISS / INST_RETIRED.ANY",
- "MetricGroup": "IcMiss",
- "MetricName": "tma_info_l2mpki_code"
- },
- {
- "BriefDescription": "L2 cache speculative code cacheline misses per kilo instruction",
- "MetricExpr": "1e3 * L2_RQSTS.CODE_RD_MISS / INST_RETIRED.ANY",
- "MetricGroup": "IcMiss",
- "MetricName": "tma_info_l2mpki_code_all"
+ "MetricName": "tma_info_memory_l2mpki_all"
},
{
"BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all demand loads (including speculative)",
"MetricExpr": "1e3 * L2_RQSTS.DEMAND_DATA_RD_MISS / INST_RETIRED.ANY",
"MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l2mpki_load"
+ "MetricName": "tma_info_memory_l2mpki_load"
},
{
- "BriefDescription": "Average per-core data access bandwidth to the L3 cache [GB / sec]",
- "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1e9 / duration_time",
- "MetricGroup": "Mem;MemoryBW;Offcore",
- "MetricName": "tma_info_l3_cache_access_bw"
- },
- {
- "BriefDescription": "Average per-thread data access bandwidth to the L3 cache [GB / sec]",
- "MetricExpr": "tma_info_l3_cache_access_bw",
- "MetricGroup": "Mem;MemoryBW;Offcore",
- "MetricName": "tma_info_l3_cache_access_bw_1t"
+ "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
+ "MetricExpr": "1e3 * MEM_LOAD_RETIRED.L3_MISS / INST_RETIRED.ANY",
+ "MetricGroup": "CacheMisses;Mem",
+ "MetricName": "tma_info_memory_l3mpki"
},
{
- "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
- "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l3_cache_fill_bw"
+ "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / MEM_LOAD_COMPLETED.L1_MISS_ANY",
+ "MetricGroup": "Mem;MemoryBound;MemoryLat",
+ "MetricName": "tma_info_memory_load_miss_real_latency"
},
{
- "BriefDescription": "Average per-thread data fill bandwidth to the L3 cache [GB / sec]",
- "MetricExpr": "tma_info_l3_cache_fill_bw",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l3_cache_fill_bw_1t"
+ "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
+ "MetricGroup": "Mem;MemoryBW;MemoryBound",
+ "MetricName": "tma_info_memory_mlp",
+ "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)"
},
{
- "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
- "MetricExpr": "1e3 * MEM_LOAD_RETIRED.L3_MISS / INST_RETIRED.ANY",
- "MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l3mpki"
+ "BriefDescription": "Average Parallel L2 cache miss data reads",
+ "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+ "MetricGroup": "Memory_BW;Offcore",
+ "MetricName": "tma_info_memory_oro_data_l2_mlp"
},
{
"BriefDescription": "Average Latency for L2 cache miss demand Loads",
"MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS.DEMAND_DATA_RD",
"MetricGroup": "Memory_Lat;Offcore",
- "MetricName": "tma_info_load_l2_miss_latency"
+ "MetricName": "tma_info_memory_oro_load_l2_miss_latency"
},
{
"BriefDescription": "Average Parallel L2 cache miss demand Loads",
"MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / cpu@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD\\,cmask\\=1@",
"MetricGroup": "Memory_BW;Offcore",
- "MetricName": "tma_info_load_l2_mlp"
+ "MetricName": "tma_info_memory_oro_load_l2_mlp"
},
{
"BriefDescription": "Average Latency for L3 cache miss demand Loads",
"MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD / OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD",
"MetricGroup": "Memory_Lat;Offcore",
- "MetricName": "tma_info_load_l3_miss_latency"
+ "MetricName": "tma_info_memory_oro_load_l3_miss_latency"
},
{
- "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
- "MetricExpr": "L1D_PEND_MISS.PENDING / MEM_LOAD_COMPLETED.L1_MISS_ANY",
- "MetricGroup": "Mem;MemoryBound;MemoryLat",
- "MetricName": "tma_info_load_miss_real_latency"
+ "BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_core_l1d_cache_fill_bw",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_thread_l1d_cache_fill_bw_1t"
+ },
+ {
+ "BriefDescription": "Average per-thread data fill bandwidth to the L2 cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_core_l2_cache_fill_bw",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_thread_l2_cache_fill_bw_1t"
+ },
+ {
+ "BriefDescription": "Average per-thread data access bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_core_l3_cache_access_bw",
+ "MetricGroup": "Mem;MemoryBW;Offcore",
+ "MetricName": "tma_info_memory_thread_l3_cache_access_bw_1t"
+ },
+ {
+ "BriefDescription": "Average per-thread data fill bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_core_l3_cache_fill_bw",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_thread_l3_cache_fill_bw_1t"
+ },
+ {
+ "BriefDescription": "STLB (2nd level TLB) code speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
+ "MetricExpr": "1e3 * ITLB_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
+ "MetricGroup": "Fed;MemoryTLB",
+ "MetricName": "tma_info_memory_tlb_code_stlb_mpki"
},
{
"BriefDescription": "STLB (2nd level TLB) data load speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
"MetricExpr": "1e3 * DTLB_LOAD_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
"MetricGroup": "Mem;MemoryTLB",
- "MetricName": "tma_info_load_stlb_mpki"
+ "MetricName": "tma_info_memory_tlb_load_stlb_mpki"
+ },
+ {
+ "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+ "MetricExpr": "(ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING) / (4 * tma_info_core_core_clks)",
+ "MetricGroup": "Mem;MemoryTLB",
+ "MetricName": "tma_info_memory_tlb_page_walks_utilization",
+ "MetricThreshold": "tma_info_memory_tlb_page_walks_utilization > 0.5"
+ },
+ {
+ "BriefDescription": "STLB (2nd level TLB) data store speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
+ "MetricExpr": "1e3 * DTLB_STORE_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
+ "MetricGroup": "Mem;MemoryTLB",
+ "MetricName": "tma_info_memory_tlb_store_stlb_mpki"
+ },
+ {
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-thread",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
+ "MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
+ "MetricName": "tma_info_pipeline_execute"
+ },
+ {
+ "BriefDescription": "Instructions per a microcode Assist invocation",
+ "MetricExpr": "INST_RETIRED.ANY / cpu@ASSISTS.ANY\\,umask\\=0x1B@",
+ "MetricGroup": "Pipeline;Ret;Retire",
+ "MetricName": "tma_info_pipeline_ipassist",
+ "MetricThreshold": "tma_info_pipeline_ipassist < 100e3",
+ "PublicDescription": "Instructions per a microcode Assist invocation. See Assists tree node for details (lower number means higher occurrence rate)"
+ },
+ {
+ "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "tma_retiring * tma_info_thread_slots / cpu@UOPS_RETIRED.SLOTS\\,cmask\\=1@",
+ "MetricGroup": "Pipeline;Ret",
+ "MetricName": "tma_info_pipeline_retire"
+ },
+ {
+ "BriefDescription": "Estimated fraction of retirement-cycles dealing with repeat instructions",
+ "MetricExpr": "INST_RETIRED.REP_ITERATION / cpu@UOPS_RETIRED.SLOTS\\,cmask\\=1@",
+ "MetricGroup": "Pipeline;Ret",
+ "MetricName": "tma_info_pipeline_strings_cycles",
+ "MetricThreshold": "tma_info_pipeline_strings_cycles > 0.1"
+ },
+ {
+ "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
+ "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / duration_time",
+ "MetricGroup": "Power;Summary",
+ "MetricName": "tma_info_system_average_frequency"
+ },
+ {
+ "BriefDescription": "Average CPU Utilization",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricGroup": "HPC;Summary",
+ "MetricName": "tma_info_system_cpu_utilization"
+ },
+ {
+ "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
+ "MetricExpr": "64 * (UNC_M_CAS_COUNT.RD + UNC_M_CAS_COUNT.WR) / 1e9 / duration_time",
+ "MetricGroup": "HPC;Mem;MemoryBW;SoC;tma_issueBW",
+ "MetricName": "tma_info_system_dram_bw_use",
+ "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_fb_full, tma_info_bottleneck_memory_bandwidth, tma_mem_bandwidth, tma_sq_full"
+ },
+ {
+ "BriefDescription": "Giga Floating Point Operations Per Second",
+ "MetricExpr": "tma_info_core_flopc / duration_time",
+ "MetricGroup": "Cor;Flops;HPC",
+ "MetricName": "tma_info_system_gflops",
+ "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width and AMX engine."
+ },
+ {
+ "BriefDescription": "Average IO (network or disk) Bandwidth Use for Writes [GB / sec]",
+ "MetricExpr": "UNC_CHA_TOR_INSERTS.IO_PCIRDCUR * 64 / 1e9 / duration_time",
+ "MetricGroup": "IoBW;Mem;Server;SoC",
+ "MetricName": "tma_info_system_io_write_bw"
+ },
+ {
+ "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
+ "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u",
+ "MetricGroup": "Branches;OS",
+ "MetricName": "tma_info_system_ipfarbranch",
+ "MetricThreshold": "tma_info_system_ipfarbranch < 1e6"
+ },
+ {
+ "BriefDescription": "Cycles Per Instruction for the Operating System (OS) Kernel mode",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / INST_RETIRED.ANY_P:k",
+ "MetricGroup": "OS",
+ "MetricName": "tma_info_system_kernel_cpi"
+ },
+ {
+ "BriefDescription": "Fraction of cycles spent in the Operating System (OS) Kernel mode",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "OS",
+ "MetricName": "tma_info_system_kernel_utilization",
+ "MetricThreshold": "tma_info_system_kernel_utilization > 0.05"
},
{
"BriefDescription": "Average latency of data read request to external DRAM memory [in nanoseconds]",
"MetricExpr": "1e9 * (UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_DDR / UNC_CHA_TOR_INSERTS.IA_MISS_DRD_DDR) / uncore_cha_0@event\\=0x1@",
"MetricGroup": "Mem;MemoryLat;Server;SoC",
- "MetricName": "tma_info_mem_dram_read_latency",
+ "MetricName": "tma_info_system_mem_dram_read_latency",
"PublicDescription": "Average latency of data read request to external DRAM memory [in nanoseconds]. Accounts for demand loads and L1/L2 data-read prefetches"
},
{
"BriefDescription": "Average number of parallel data read requests to external memory",
"MetricExpr": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD / UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD@thresh\\=1@",
"MetricGroup": "Mem;MemoryBW;SoC",
- "MetricName": "tma_info_mem_parallel_reads",
+ "MetricName": "tma_info_system_mem_parallel_reads",
"PublicDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches"
},
{
"BriefDescription": "Average latency of data read request to external 3D X-Point memory [in nanoseconds]",
"MetricExpr": "(1e9 * (UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_PMM / UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PMM) / uncore_cha_0@event\\=0x1@ if #has_pmem > 0 else 0)",
"MetricGroup": "Mem;MemoryLat;Server;SoC",
- "MetricName": "tma_info_mem_pmm_read_latency",
+ "MetricName": "tma_info_system_mem_pmm_read_latency",
"PublicDescription": "Average latency of data read request to external 3D X-Point memory [in nanoseconds]. Accounts for demand loads and L1/L2 data-read prefetches"
},
{
"BriefDescription": "Average latency of data read request to external memory (in nanoseconds)",
- "MetricExpr": "1e9 * (UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD / UNC_CHA_TOR_INSERTS.IA_MISS_DRD) / (tma_info_socket_clks / duration_time)",
+ "MetricExpr": "1e9 * (UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD / UNC_CHA_TOR_INSERTS.IA_MISS_DRD) / (tma_info_system_socket_clks / duration_time)",
"MetricGroup": "Mem;MemoryLat;SoC",
- "MetricName": "tma_info_mem_read_latency",
+ "MetricName": "tma_info_system_mem_read_latency",
"PublicDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches. ([RKL+]memory-controller only)"
},
{
- "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
- "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_fb_full / (tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))",
- "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
- "MetricName": "tma_info_memory_bandwidth",
- "MetricThreshold": "tma_info_memory_bandwidth > 20",
- "PublicDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_dram_bw_use, tma_mem_bandwidth, tma_sq_full"
- },
- {
- "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
- "MetricExpr": "100 * tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_dtlb_load / max(tma_l1_bound, tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
- "MetricGroup": "Mem;MemoryTLB;Offcore;tma_issueTLB",
- "MetricName": "tma_info_memory_data_tlbs",
- "MetricThreshold": "tma_info_memory_data_tlbs > 20",
- "PublicDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs). Related metrics: tma_dtlb_load, tma_dtlb_store"
- },
- {
- "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)",
- "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound))",
- "MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat",
- "MetricName": "tma_info_memory_latency",
- "MetricThreshold": "tma_info_memory_latency > 20",
- "PublicDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches). Related metrics: tma_l3_hit_latency, tma_mem_latency"
- },
- {
- "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks",
- "MetricExpr": "100 * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
- "MetricGroup": "Bad;BadSpec;BrMispredicts;tma_issueBM",
- "MetricName": "tma_info_mispredictions",
- "MetricThreshold": "tma_info_mispredictions > 20",
- "PublicDescription": "Total pipeline cost of Branch Misprediction related bottlenecks. Related metrics: tma_branch_mispredicts, tma_info_branch_misprediction_cost, tma_mispredicts_resteers"
- },
- {
- "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
- "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
- "MetricGroup": "Mem;MemoryBW;MemoryBound",
- "MetricName": "tma_info_mlp",
- "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)"
- },
- {
- "BriefDescription": "Fraction of branches of other types (not individually covered by other metrics in Info.Branches group)",
- "MetricExpr": "1 - (tma_info_cond_nt + tma_info_cond_tk + tma_info_callret + tma_info_jump)",
- "MetricGroup": "Bad;Branches",
- "MetricName": "tma_info_other_branches"
- },
- {
- "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
- "MetricExpr": "(ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING) / (4 * tma_info_core_clks)",
- "MetricGroup": "Mem;MemoryTLB",
- "MetricName": "tma_info_page_walks_utilization",
- "MetricThreshold": "tma_info_page_walks_utilization > 0.5"
- },
- {
"BriefDescription": "Average 3DXP Memory Bandwidth Use for reads [GB / sec]",
"MetricExpr": "(64 * UNC_M_PMM_RPQ_INSERTS / 1e9 / duration_time if #has_pmem > 0 else 0)",
"MetricGroup": "Mem;MemoryBW;Server;SoC",
- "MetricName": "tma_info_pmm_read_bw"
+ "MetricName": "tma_info_system_pmm_read_bw"
},
{
"BriefDescription": "Average 3DXP Memory Bandwidth Use for Writes [GB / sec]",
"MetricExpr": "(64 * UNC_M_PMM_WPQ_INSERTS / 1e9 / duration_time if #has_pmem > 0 else 0)",
"MetricGroup": "Mem;MemoryBW;Server;SoC",
- "MetricName": "tma_info_pmm_write_bw"
- },
- {
- "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
- "MetricExpr": "tma_retiring * tma_info_slots / cpu@UOPS_RETIRED.SLOTS\\,cmask\\=1@",
- "MetricGroup": "Pipeline;Ret",
- "MetricName": "tma_info_retire"
- },
- {
- "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
- "MetricExpr": "TOPDOWN.SLOTS",
- "MetricGroup": "TmaL1;tma_L1_group",
- "MetricName": "tma_info_slots"
- },
- {
- "BriefDescription": "Fraction of Physical Core issue-slots utilized by this Logical Processor",
- "MetricExpr": "(tma_info_slots / (TOPDOWN.SLOTS / 2) if #SMT_on else 1)",
- "MetricGroup": "SMT;TmaL1;tma_L1_group",
- "MetricName": "tma_info_slots_utilization"
+ "MetricName": "tma_info_system_pmm_write_bw"
},
{
"BriefDescription": "Fraction of cycles where both hardware Logical Processors were active",
"MetricExpr": "(1 - CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_DISTRIBUTED if #SMT_on else 0)",
"MetricGroup": "SMT",
- "MetricName": "tma_info_smt_2t_utilization"
+ "MetricName": "tma_info_system_smt_2t_utilization"
},
{
"BriefDescription": "Socket actual clocks when any core is active on that socket",
"MetricExpr": "uncore_cha_0@event\\=0x1@",
"MetricGroup": "SoC",
- "MetricName": "tma_info_socket_clks"
- },
- {
- "BriefDescription": "STLB (2nd level TLB) data store speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
- "MetricExpr": "1e3 * DTLB_STORE_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
- "MetricGroup": "Mem;MemoryTLB",
- "MetricName": "tma_info_store_stlb_mpki"
- },
- {
- "BriefDescription": "Estimated fraction of retirement-cycles dealing with repeat instructions",
- "MetricExpr": "INST_RETIRED.REP_ITERATION / cpu@UOPS_RETIRED.SLOTS\\,cmask\\=1@",
- "MetricGroup": "Pipeline;Ret",
- "MetricName": "tma_info_strings_cycles",
- "MetricThreshold": "tma_info_strings_cycles > 0.1"
+ "MetricName": "tma_info_system_socket_clks"
},
{
"BriefDescription": "Tera Integer (matrix) Operations Per Second",
"MetricExpr": "8 * AMX_OPS_RETIRED.INT8 / 1e12 / duration_time",
"MetricGroup": "Cor;HPC;IntVector;Server",
- "MetricName": "tma_info_tiops"
+ "MetricName": "tma_info_system_tiops"
},
{
"BriefDescription": "Average Frequency Utilization relative nominal frequency",
- "MetricExpr": "tma_info_clks / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricExpr": "tma_info_thread_clks / CPU_CLK_UNHALTED.REF_TSC",
"MetricGroup": "Power",
- "MetricName": "tma_info_turbo_utilization"
- },
- {
- "BriefDescription": "Uops Per Instruction",
- "MetricExpr": "tma_retiring * tma_info_slots / INST_RETIRED.ANY",
- "MetricGroup": "Pipeline;Ret;Retire",
- "MetricName": "tma_info_uoppi",
- "MetricThreshold": "tma_info_uoppi > 1.05"
+ "MetricName": "tma_info_system_turbo_utilization"
},
{
"BriefDescription": "Cross-socket Ultra Path Interconnect (UPI) data transmit bandwidth for data only [MB / sec]",
"MetricExpr": "UNC_UPI_TxL_FLITS.ALL_DATA * 64 / 9 / 1e6",
"MetricGroup": "Server;SoC",
- "MetricName": "tma_info_upi_data_transmit_bw"
+ "MetricName": "tma_info_system_upi_data_transmit_bw"
+ },
+ {
+ "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "Pipeline",
+ "MetricName": "tma_info_thread_clks"
+ },
+ {
+ "BriefDescription": "Cycles Per Instruction (per Logical Processor)",
+ "MetricExpr": "1 / tma_info_thread_ipc",
+ "MetricGroup": "Mem;Pipeline",
+ "MetricName": "tma_info_thread_cpi"
+ },
+ {
+ "BriefDescription": "The ratio of Executed- by Issued-Uops",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / UOPS_ISSUED.ANY",
+ "MetricGroup": "Cor;Pipeline",
+ "MetricName": "tma_info_thread_execute_per_issue",
+ "PublicDescription": "The ratio of Executed- by Issued-Uops. Ratio > 1 suggests high rate of uop micro-fusions. Ratio < 1 suggest high rate of \"execute\" at rename stage."
+ },
+ {
+ "BriefDescription": "Instructions Per Cycle (per Logical Processor)",
+ "MetricExpr": "INST_RETIRED.ANY / tma_info_thread_clks",
+ "MetricGroup": "Ret;Summary",
+ "MetricName": "tma_info_thread_ipc"
+ },
+ {
+ "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
+ "MetricExpr": "TOPDOWN.SLOTS",
+ "MetricGroup": "TmaL1;tma_L1_group",
+ "MetricName": "tma_info_thread_slots"
+ },
+ {
+ "BriefDescription": "Fraction of Physical Core issue-slots utilized by this Logical Processor",
+ "MetricExpr": "(tma_info_thread_slots / (TOPDOWN.SLOTS / 2) if #SMT_on else 1)",
+ "MetricGroup": "SMT;TmaL1;tma_L1_group",
+ "MetricName": "tma_info_thread_slots_utilization"
+ },
+ {
+ "BriefDescription": "Uops Per Instruction",
+ "MetricExpr": "tma_retiring * tma_info_thread_slots / INST_RETIRED.ANY",
+ "MetricGroup": "Pipeline;Ret;Retire",
+ "MetricName": "tma_info_thread_uoppi",
+ "MetricThreshold": "tma_info_thread_uoppi > 1.05"
},
{
"BriefDescription": "Instruction per taken branch",
- "MetricExpr": "tma_retiring * tma_info_slots / BR_INST_RETIRED.NEAR_TAKEN",
+ "MetricExpr": "tma_retiring * tma_info_thread_slots / BR_INST_RETIRED.NEAR_TAKEN",
"MetricGroup": "Branches;Fed;FetchBW",
- "MetricName": "tma_info_uptb",
- "MetricThreshold": "tma_info_uptb < 9"
+ "MetricName": "tma_info_thread_uptb",
+ "MetricThreshold": "tma_info_thread_uptb < 9"
},
{
"BriefDescription": "This metric approximates arithmetic Integer (Int) matrix uops fraction the CPU has retired (aggregated across all supported Int datatypes in AMX engine)",
- "MetricExpr": "cpu@AMX_OPS_RETIRED.INT8\\,cmask\\=1@ / (tma_retiring * tma_info_slots)",
+ "MetricExpr": "cpu@AMX_OPS_RETIRED.INT8\\,cmask\\=1@ / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Compute;HPC;IntVector;Pipeline;Server;TopdownL4;tma_L4_group;tma_int_operations_group",
"MetricName": "tma_int_amx",
"MetricThreshold": "tma_int_amx > 0.1 & (tma_int_operations > 0.1 & tma_light_operations > 0.6)",
@@ -1147,7 +1421,7 @@
},
{
"BriefDescription": "This metric represents 128-bit vector Integer ADD/SUB/SAD or VNNI (Vector Neural Network Instructions) uops fraction the CPU has retired",
- "MetricExpr": "(INT_VEC_RETIRED.ADD_128 + INT_VEC_RETIRED.VNNI_128) / (tma_retiring * tma_info_slots)",
+ "MetricExpr": "(INT_VEC_RETIRED.ADD_128 + INT_VEC_RETIRED.VNNI_128) / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Compute;IntVector;Pipeline;TopdownL4;tma_L4_group;tma_int_operations_group;tma_issue2P",
"MetricName": "tma_int_vector_128b",
"MetricThreshold": "tma_int_vector_128b > 0.1 & (tma_int_operations > 0.1 & tma_light_operations > 0.6)",
@@ -1156,7 +1430,7 @@
},
{
"BriefDescription": "This metric represents 256-bit vector Integer ADD/SUB/SAD or VNNI (Vector Neural Network Instructions) uops fraction the CPU has retired",
- "MetricExpr": "(INT_VEC_RETIRED.ADD_256 + INT_VEC_RETIRED.MUL_256 + INT_VEC_RETIRED.VNNI_256) / (tma_retiring * tma_info_slots)",
+ "MetricExpr": "(INT_VEC_RETIRED.ADD_256 + INT_VEC_RETIRED.MUL_256 + INT_VEC_RETIRED.VNNI_256) / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Compute;IntVector;Pipeline;TopdownL4;tma_L4_group;tma_int_operations_group;tma_issue2P",
"MetricName": "tma_int_vector_256b",
"MetricThreshold": "tma_int_vector_256b > 0.1 & (tma_int_operations > 0.1 & tma_light_operations > 0.6)",
@@ -1165,7 +1439,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses",
- "MetricExpr": "ICACHE_TAG.STALLS / tma_info_clks",
+ "MetricExpr": "ICACHE_TAG.STALLS / tma_info_thread_clks",
"MetricGroup": "BigFoot;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_itlb_misses",
"MetricThreshold": "tma_itlb_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -1174,7 +1448,7 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled without loads missing the L1 data cache",
- "MetricExpr": "max((EXE_ACTIVITY.BOUND_ON_LOADS - MEMORY_ACTIVITY.STALLS_L1D_MISS) / tma_info_clks, 0)",
+ "MetricExpr": "max((EXE_ACTIVITY.BOUND_ON_LOADS - MEMORY_ACTIVITY.STALLS_L1D_MISS) / tma_info_thread_clks, 0)",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_issueL1;tma_issueMC;tma_memory_bound_group",
"MetricName": "tma_l1_bound",
"MetricThreshold": "tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -1184,7 +1458,7 @@
{
"BriefDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(MEMORY_ACTIVITY.STALLS_L1D_MISS - MEMORY_ACTIVITY.STALLS_L2_MISS) / tma_info_clks",
+ "MetricExpr": "(MEMORY_ACTIVITY.STALLS_L1D_MISS - MEMORY_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l2_bound",
"MetricThreshold": "tma_l2_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -1193,7 +1467,7 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
- "MetricExpr": "(MEMORY_ACTIVITY.STALLS_L2_MISS - MEMORY_ACTIVITY.STALLS_L3_MISS) / tma_info_clks",
+ "MetricExpr": "(MEMORY_ACTIVITY.STALLS_L2_MISS - MEMORY_ACTIVITY.STALLS_L3_MISS) / tma_info_thread_clks",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l3_bound",
"MetricThreshold": "tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -1202,35 +1476,36 @@
},
{
"BriefDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
- "MetricExpr": "33 * tma_info_average_frequency * MEM_LOAD_RETIRED.L3_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_clks",
+ "MetricExpr": "33 * tma_info_system_average_frequency * MEM_LOAD_RETIRED.L3_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
"MetricGroup": "MemoryLat;TopdownL4;tma_L4_group;tma_issueLat;tma_l3_bound_group",
"MetricName": "tma_l3_hit_latency",
"MetricThreshold": "tma_l3_hit_latency > 0.1 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_info_memory_latency, tma_mem_latency",
+ "PublicDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_info_bottleneck_memory_latency, tma_mem_latency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs)",
- "MetricExpr": "DECODE.LCP / tma_info_clks",
+ "MetricExpr": "DECODE.LCP / tma_info_thread_clks",
"MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB",
"MetricName": "tma_lcp",
"MetricThreshold": "tma_lcp > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
- "PublicDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs). Using proper compiler flags or Intel Compiler by default will certainly avoid this. #Link: Optimization Guide about LCP BKMs. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_dsb_coverage, tma_info_dsb_misses, tma_info_iptb",
+ "PublicDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs). Using proper compiler flags or Intel Compiler by default will certainly avoid this. #Link: Optimization Guide about LCP BKMs. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation)",
+ "DefaultMetricgroupName": "TopdownL2",
"MetricExpr": "max(0, tma_retiring - tma_heavy_operations)",
- "MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group",
+ "MetricGroup": "Default;Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_light_operations",
"MetricThreshold": "tma_light_operations > 0.6",
- "MetricgroupNoGroup": "TopdownL2",
+ "MetricgroupNoGroup": "TopdownL2;Default",
"PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized software running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. Sample with: INST_RETIRED.PREC_DIST",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Load operations",
- "MetricExpr": "UOPS_DISPATCHED.PORT_2_3_10 / (3 * tma_info_core_clks)",
+ "MetricExpr": "UOPS_DISPATCHED.PORT_2_3_10 / (3 * tma_info_core_core_clks)",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_load_op_utilization",
"MetricThreshold": "tma_load_op_utilization > 0.6",
@@ -1247,7 +1522,7 @@
},
{
"BriefDescription": "This metric estimates the fraction of cycles where the Second-level TLB (STLB) was missed by load accesses, performing a hardware page walk",
- "MetricExpr": "DTLB_LOAD_MISSES.WALK_ACTIVE / tma_info_clks",
+ "MetricExpr": "DTLB_LOAD_MISSES.WALK_ACTIVE / tma_info_thread_clks",
"MetricGroup": "MemoryTLB;TopdownL5;tma_L5_group;tma_dtlb_load_group",
"MetricName": "tma_load_stlb_miss",
"MetricThreshold": "tma_load_stlb_miss > 0.05 & (tma_dtlb_load > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
@@ -1255,7 +1530,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from local memory",
- "MetricExpr": "71 * tma_info_average_frequency * MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_clks",
+ "MetricExpr": "71 * tma_info_system_average_frequency * MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
"MetricGroup": "Server;TopdownL5;tma_L5_group;tma_mem_latency_group",
"MetricName": "tma_local_dram",
"MetricThreshold": "tma_local_dram > 0.1 & (tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
@@ -1265,7 +1540,7 @@
{
"BriefDescription": "This metric represents fraction of cycles the CPU spent handling cache misses due to lock operations",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(16 * max(0, MEM_INST_RETIRED.LOCK_LOADS - L2_RQSTS.ALL_RFO) + MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES * (10 * L2_RQSTS.RFO_HIT + min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO))) / tma_info_clks",
+ "MetricExpr": "(16 * max(0, MEM_INST_RETIRED.LOCK_LOADS - L2_RQSTS.ALL_RFO) + MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES * (10 * L2_RQSTS.RFO_HIT + min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO))) / tma_info_thread_clks",
"MetricGroup": "Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_l1_bound_group",
"MetricName": "tma_lock_latency",
"MetricThreshold": "tma_lock_latency > 0.2 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -1274,17 +1549,18 @@
},
{
"BriefDescription": "This metric represents fraction of slots the CPU has wasted due to Machine Clears",
+ "DefaultMetricgroupName": "TopdownL2",
"MetricExpr": "max(0, tma_bad_speculation - tma_branch_mispredicts)",
- "MetricGroup": "BadSpec;MachineClears;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueMC;tma_issueSyncxn",
+ "MetricGroup": "BadSpec;Default;MachineClears;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueMC;tma_issueSyncxn",
"MetricName": "tma_machine_clears",
"MetricThreshold": "tma_machine_clears > 0.1 & tma_bad_speculation > 0.15",
- "MetricgroupNoGroup": "TopdownL2",
+ "MetricgroupNoGroup": "TopdownL2;Default",
"PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Machine Clears. These slots are either wasted by uops fetched prior to the clear; or stalls the out-of-order portion of the machine needs to recover its state after the clear. For example; this can happen due to memory ordering Nukes (e.g. Memory Disambiguation) or Self-Modifying-Code (SMC) nukes. Sample with: MACHINE_CLEARS.COUNT. Related metrics: tma_clears_resteers, tma_contested_accesses, tma_data_sharing, tma_false_sharing, tma_l1_bound, tma_microcode_sequencer, tma_ms_switches, tma_remote_cache",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to memory bandwidth Allocation feature (RDT's memory bandwidth throttling).",
- "MetricExpr": "INT_MISC.MBA_STALLS / tma_info_clks",
+ "MetricExpr": "INT_MISC.MBA_STALLS / tma_info_thread_clks",
"MetricGroup": "MemoryBW;Offcore;Server;TopdownL5;tma_L5_group;tma_mem_bandwidth_group",
"MetricName": "tma_mba_stalls",
"MetricThreshold": "tma_mba_stalls > 0.1 & (tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
@@ -1292,35 +1568,36 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM)",
- "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@) / tma_info_clks",
+ "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@) / tma_info_thread_clks",
"MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
"MetricName": "tma_mem_bandwidth",
"MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_dram_bw_use, tma_info_memory_bandwidth, tma_sq_full",
+ "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_bottleneck_memory_bandwidth, tma_info_system_dram_bw_use, tma_sq_full",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM)",
- "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD) / tma_info_clks - tma_mem_bandwidth",
+ "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD) / tma_info_thread_clks - tma_mem_bandwidth",
"MetricGroup": "MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
"MetricName": "tma_mem_latency",
"MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_info_memory_latency, tma_l3_hit_latency",
+ "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_info_bottleneck_memory_latency, tma_l3_hit_latency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck",
- "MetricExpr": "topdown\\-mem\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0 * tma_info_slots",
- "MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
+ "DefaultMetricgroupName": "TopdownL2",
+ "MetricExpr": "topdown\\-mem\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0 * tma_info_thread_slots",
+ "MetricGroup": "Backend;Default;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_memory_bound",
"MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2",
- "MetricgroupNoGroup": "TopdownL2",
+ "MetricgroupNoGroup": "TopdownL2;Default",
"PublicDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck. Memory Bound estimates fraction of slots where pipeline is likely stalled due to demand load or store instructions. This accounts mainly for (1) non-completed in-flight memory demand loads which coincides with execution units starvation; in addition to (2) cases where stores could impose backpressure on the pipeline when many of them get buffered at the same time (less common out of the two).",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to LFENCE Instructions.",
- "MetricExpr": "13 * MISC2_RETIRED.LFENCE / tma_info_clks",
+ "MetricExpr": "13 * MISC2_RETIRED.LFENCE / tma_info_thread_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_serializing_operation_group",
"MetricName": "tma_memory_fence",
"MetricThreshold": "tma_memory_fence > 0.05 & (tma_serializing_operation > 0.1 & (tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))))",
@@ -1328,7 +1605,8 @@
},
{
"BriefDescription": "This metric represents fraction of slots where the CPU was retiring memory operations -- uops for memory load or store accesses.",
- "MetricExpr": "tma_light_operations * MEM_UOP_RETIRED.ANY / (tma_retiring * tma_info_slots)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "tma_light_operations * MEM_UOP_RETIRED.ANY / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
"MetricName": "tma_memory_operations",
"MetricThreshold": "tma_memory_operations > 0.1 & tma_light_operations > 0.6",
@@ -1336,7 +1614,7 @@
},
{
"BriefDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit",
- "MetricExpr": "UOPS_RETIRED.MS / tma_info_slots",
+ "MetricExpr": "UOPS_RETIRED.MS / tma_info_thread_slots",
"MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_heavy_operations_group;tma_issueMC;tma_issueMS",
"MetricName": "tma_microcode_sequencer",
"MetricThreshold": "tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1",
@@ -1345,25 +1623,25 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers as a result of Branch Misprediction at execution stage",
- "MetricExpr": "tma_branch_mispredicts / tma_bad_speculation * INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_clks",
+ "MetricExpr": "tma_branch_mispredicts / tma_bad_speculation * INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_thread_clks",
"MetricGroup": "BadSpec;BrMispredicts;TopdownL4;tma_L4_group;tma_branch_resteers_group;tma_issueBM",
"MetricName": "tma_mispredicts_resteers",
"MetricThreshold": "tma_mispredicts_resteers > 0.05 & (tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15))",
- "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers as a result of Branch Misprediction at execution stage. Sample with: INT_MISC.CLEAR_RESTEER_CYCLES. Related metrics: tma_branch_mispredicts, tma_info_branch_misprediction_cost, tma_info_mispredictions",
+ "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers as a result of Branch Misprediction at execution stage. Sample with: INT_MISC.CLEAR_RESTEER_CYCLES. Related metrics: tma_branch_mispredicts, tma_info_bad_spec_branch_misprediction_cost, tma_info_bottleneck_mispredictions",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline)",
- "MetricExpr": "(IDQ.MITE_CYCLES_ANY - IDQ.MITE_CYCLES_OK) / tma_info_core_clks / 2",
+ "MetricExpr": "(IDQ.MITE_CYCLES_ANY - IDQ.MITE_CYCLES_OK) / tma_info_core_core_clks / 2",
"MetricGroup": "DSBmiss;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_mite",
- "MetricThreshold": "tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 6 > 0.35)",
+ "MetricThreshold": "tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 6 > 0.35)",
"PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline). This pipeline is used for code that was not pre-cached in the DSB or LSD. For example; inefficiencies due to asymmetric decoders; use of long immediate or LCP can manifest as MITE fetch bandwidth bottleneck. Sample with: FRONTEND_RETIRED.ANY_DSB_MISS",
"ScaleUnit": "100%"
},
{
"BriefDescription": "The Mixing_Vectors metric gives the percentage of injected blend uops out of all uops issued",
- "MetricExpr": "160 * ASSISTS.SSE_AVX_MIX / tma_info_clks",
+ "MetricExpr": "160 * ASSISTS.SSE_AVX_MIX / tma_info_thread_clks",
"MetricGroup": "TopdownL5;tma_L5_group;tma_issueMV;tma_ports_utilized_0_group",
"MetricName": "tma_mixing_vectors",
"MetricThreshold": "tma_mixing_vectors > 0.05",
@@ -1372,7 +1650,7 @@
},
{
"BriefDescription": "This metric estimates the fraction of cycles when the CPU was stalled due to switches of uop delivery to the Microcode Sequencer (MS)",
- "MetricExpr": "3 * cpu@UOPS_RETIRED.MS\\,cmask\\=1\\,edge@ / (tma_retiring * tma_info_slots / UOPS_ISSUED.ANY) / tma_info_clks",
+ "MetricExpr": "3 * cpu@UOPS_RETIRED.MS\\,cmask\\=1\\,edge@ / (tma_retiring * tma_info_thread_slots / UOPS_ISSUED.ANY) / tma_info_thread_clks",
"MetricGroup": "FetchLat;MicroSeq;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueMC;tma_issueMS;tma_issueMV;tma_issueSO",
"MetricName": "tma_ms_switches",
"MetricThreshold": "tma_ms_switches > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -1381,7 +1659,7 @@
},
{
"BriefDescription": "This metric represents fraction of slots where the CPU was retiring branch instructions that were not fused",
- "MetricExpr": "tma_light_operations * (BR_INST_RETIRED.ALL_BRANCHES - INST_RETIRED.MACRO_FUSED) / (tma_retiring * tma_info_slots)",
+ "MetricExpr": "tma_light_operations * (BR_INST_RETIRED.ALL_BRANCHES - INST_RETIRED.MACRO_FUSED) / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
"MetricName": "tma_non_fused_branches",
"MetricThreshold": "tma_non_fused_branches > 0.1 & tma_light_operations > 0.6",
@@ -1390,7 +1668,7 @@
},
{
"BriefDescription": "This metric represents fraction of slots where the CPU was retiring NOP (no op) instructions",
- "MetricExpr": "tma_light_operations * INST_RETIRED.NOP / (tma_retiring * tma_info_slots)",
+ "MetricExpr": "tma_light_operations * INST_RETIRED.NOP / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
"MetricName": "tma_nop_instructions",
"MetricThreshold": "tma_nop_instructions > 0.1 & tma_light_operations > 0.6",
@@ -1399,6 +1677,7 @@
},
{
"BriefDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_int_operations + tma_memory_operations + tma_fused_instructions + tma_non_fused_branches + tma_nop_instructions))",
"MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
"MetricName": "tma_other_light_ops",
@@ -1408,7 +1687,7 @@
},
{
"BriefDescription": "This metric roughly estimates fraction of slots the CPU retired uops as a result of handing Page Faults",
- "MetricExpr": "99 * ASSISTS.PAGE_FAULT / tma_info_slots",
+ "MetricExpr": "99 * ASSISTS.PAGE_FAULT / tma_info_thread_slots",
"MetricGroup": "TopdownL5;tma_L5_group;tma_assists_group",
"MetricName": "tma_page_faults",
"MetricThreshold": "tma_page_faults > 0.05",
@@ -1417,7 +1696,7 @@
},
{
"BriefDescription": "This metric roughly estimates (based on idle latencies) how often the CPU was stalled on accesses to external 3D-Xpoint (Crystal Ridge, a.k.a",
- "MetricExpr": "(((1 - ((19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS))) / (19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + (25 * (MEM_LOAD_RETIRED.LOCAL_PMM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) if #has_pmem > 0 else 0) + 33 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) if #has_pmem > 0 else 0))) if #has_pmem > 0 else 0)) * (MEMORY_ACTIVITY.STALLS_L3_MISS / tma_info_clks) if 1e6 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM + MEM_LOAD_RETIRED.LOCAL_PMM) > MEM_LOAD_RETIRED.L1_MISS else 0) if #has_pmem > 0 else 0)",
+ "MetricExpr": "(((1 - ((19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS))) / (19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + (25 * (MEM_LOAD_RETIRED.LOCAL_PMM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) if #has_pmem > 0 else 0) + 33 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) if #has_pmem > 0 else 0))) if #has_pmem > 0 else 0)) * (MEMORY_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks) if 1e6 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM + MEM_LOAD_RETIRED.LOCAL_PMM) > MEM_LOAD_RETIRED.L1_MISS else 0) if #has_pmem > 0 else 0)",
"MetricGroup": "MemoryBound;Server;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_pmm_bound",
"MetricThreshold": "tma_pmm_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -1426,7 +1705,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 0 ([SNB+] ALU; [HSW+] ALU and 2nd branch)",
- "MetricExpr": "UOPS_DISPATCHED.PORT_0 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED.PORT_0 / tma_info_core_core_clks",
"MetricGroup": "Compute;TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_0",
"MetricThreshold": "tma_port_0 > 0.6",
@@ -1435,7 +1714,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 1 (ALU)",
- "MetricExpr": "UOPS_DISPATCHED.PORT_1 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED.PORT_1 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_1",
"MetricThreshold": "tma_port_1 > 0.6",
@@ -1444,7 +1723,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+]Primary Branch and simple ALU)",
- "MetricExpr": "UOPS_DISPATCHED.PORT_6 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED.PORT_6 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_6",
"MetricThreshold": "tma_port_6 > 0.6",
@@ -1453,7 +1732,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles the CPU performance was potentially limited due to Core computation issues (non divider-related)",
- "MetricExpr": "((cpu@EXE_ACTIVITY.3_PORTS_UTIL\\,umask\\=0x80@ + tma_serializing_operation * (CYCLE_ACTIVITY.STALLS_TOTAL - EXE_ACTIVITY.BOUND_ON_LOADS) + (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * cpu@EXE_ACTIVITY.2_PORTS_UTIL\\,umask\\=0xc@)) / tma_info_clks if ARITH.DIV_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - EXE_ACTIVITY.BOUND_ON_LOADS else (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * cpu@EXE_ACTIVITY.2_PORTS_UTIL\\,umask\\=0xc@) / tma_info_clks)",
+ "MetricExpr": "((cpu@EXE_ACTIVITY.3_PORTS_UTIL\\,umask\\=0x80@ + tma_serializing_operation * (CYCLE_ACTIVITY.STALLS_TOTAL - EXE_ACTIVITY.BOUND_ON_LOADS) + (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * cpu@EXE_ACTIVITY.2_PORTS_UTIL\\,umask\\=0xc@)) / tma_info_thread_clks if ARITH.DIV_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - EXE_ACTIVITY.BOUND_ON_LOADS else (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * cpu@EXE_ACTIVITY.2_PORTS_UTIL\\,umask\\=0xc@) / tma_info_thread_clks)",
"MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group",
"MetricName": "tma_ports_utilization",
"MetricThreshold": "tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
@@ -1462,7 +1741,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed no uops on any execution port (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "cpu@EXE_ACTIVITY.3_PORTS_UTIL\\,umask\\=0x80@ / tma_info_clks + tma_serializing_operation * (CYCLE_ACTIVITY.STALLS_TOTAL - EXE_ACTIVITY.BOUND_ON_LOADS) / tma_info_clks",
+ "MetricExpr": "cpu@EXE_ACTIVITY.3_PORTS_UTIL\\,umask\\=0x80@ / tma_info_thread_clks + tma_serializing_operation * (CYCLE_ACTIVITY.STALLS_TOTAL - EXE_ACTIVITY.BOUND_ON_LOADS) / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_0",
"MetricThreshold": "tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1471,7 +1750,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where the CPU executed total of 1 uop per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "EXE_ACTIVITY.1_PORTS_UTIL / tma_info_clks",
+ "MetricExpr": "EXE_ACTIVITY.1_PORTS_UTIL / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issueL1;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_1",
"MetricThreshold": "tma_ports_utilized_1 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1480,7 +1759,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "EXE_ACTIVITY.2_PORTS_UTIL / tma_info_clks",
+ "MetricExpr": "EXE_ACTIVITY.2_PORTS_UTIL / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_2",
"MetricThreshold": "tma_ports_utilized_2 > 0.15 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1489,7 +1768,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 3 or more uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "UOPS_EXECUTED.CYCLES_GE_3 / tma_info_clks",
+ "MetricExpr": "UOPS_EXECUTED.CYCLES_GE_3 / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_3m",
"MetricThreshold": "tma_ports_utilized_3m > 0.7 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1498,7 +1777,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from remote cache in other sockets including synchronizations issues",
- "MetricExpr": "(135.5 * tma_info_average_frequency * MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM + 135.5 * tma_info_average_frequency * MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_clks",
+ "MetricExpr": "(135.5 * tma_info_system_average_frequency * MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM + 135.5 * tma_info_system_average_frequency * MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
"MetricGroup": "Offcore;Server;Snoop;TopdownL5;tma_L5_group;tma_issueSyncxn;tma_mem_latency_group",
"MetricName": "tma_remote_cache",
"MetricThreshold": "tma_remote_cache > 0.05 & (tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
@@ -1507,7 +1786,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from remote memory",
- "MetricExpr": "149 * tma_info_average_frequency * MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_clks",
+ "MetricExpr": "149 * tma_info_system_average_frequency * MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
"MetricGroup": "Server;Snoop;TopdownL5;tma_L5_group;tma_mem_latency_group",
"MetricName": "tma_remote_dram",
"MetricThreshold": "tma_remote_dram > 0.1 & (tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
@@ -1516,17 +1795,18 @@
},
{
"BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
- "MetricExpr": "topdown\\-retiring / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0 * tma_info_slots",
- "MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
+ "DefaultMetricgroupName": "TopdownL1",
+ "MetricExpr": "topdown\\-retiring / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0 * tma_info_thread_slots",
+ "MetricGroup": "Default;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1",
- "MetricgroupNoGroup": "TopdownL1",
+ "MetricgroupNoGroup": "TopdownL1;Default",
"PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. Sample with: UOPS_RETIRED.SLOTS",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU issue-pipeline was stalled due to serializing operations",
- "MetricExpr": "RESOURCE_STALLS.SCOREBOARD / tma_info_clks",
+ "MetricExpr": "RESOURCE_STALLS.SCOREBOARD / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL5;tma_L5_group;tma_issueSO;tma_ports_utilized_0_group",
"MetricName": "tma_serializing_operation",
"MetricThreshold": "tma_serializing_operation > 0.1 & (tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)))",
@@ -1535,7 +1815,7 @@
},
{
"BriefDescription": "This metric represents Shuffle (cross \"vector lane\" data transfers) uops fraction the CPU has retired.",
- "MetricExpr": "INT_VEC_RETIRED.SHUFFLES / (tma_retiring * tma_info_slots)",
+ "MetricExpr": "INT_VEC_RETIRED.SHUFFLES / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "HPC;Pipeline;TopdownL4;tma_L4_group;tma_int_operations_group",
"MetricName": "tma_shuffles",
"MetricThreshold": "tma_shuffles > 0.1 & (tma_int_operations > 0.1 & tma_light_operations > 0.6)",
@@ -1543,7 +1823,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to PAUSE Instructions",
- "MetricExpr": "CPU_CLK_UNHALTED.PAUSE / tma_info_clks",
+ "MetricExpr": "CPU_CLK_UNHALTED.PAUSE / tma_info_thread_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_serializing_operation_group",
"MetricName": "tma_slow_pause",
"MetricThreshold": "tma_slow_pause > 0.05 & (tma_serializing_operation > 0.1 & (tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))))",
@@ -1552,7 +1832,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles handling memory load split accesses - load that cross 64-byte cache line boundary",
- "MetricExpr": "tma_info_load_miss_real_latency * LD_BLOCKS.NO_SR / tma_info_clks",
+ "MetricExpr": "tma_info_memory_load_miss_real_latency * LD_BLOCKS.NO_SR / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_split_loads",
"MetricThreshold": "tma_split_loads > 0.2 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -1561,7 +1841,7 @@
},
{
"BriefDescription": "This metric represents rate of split store accesses",
- "MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_clks",
+ "MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_core_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group",
"MetricName": "tma_split_stores",
"MetricThreshold": "tma_split_stores > 0.2 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -1570,16 +1850,16 @@
},
{
"BriefDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors)",
- "MetricExpr": "(XQ.FULL_CYCLES + L1D_PEND_MISS.L2_STALLS) / tma_info_clks",
+ "MetricExpr": "(XQ.FULL_CYCLES + L1D_PEND_MISS.L2_STALLS) / tma_info_thread_clks",
"MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueBW;tma_l3_bound_group",
"MetricName": "tma_sq_full",
"MetricThreshold": "tma_sq_full > 0.3 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_fb_full, tma_info_dram_bw_use, tma_info_memory_bandwidth, tma_mem_bandwidth",
+ "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_fb_full, tma_info_bottleneck_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric estimates how often CPU was stalled due to RFO store memory accesses; RFO store issue a read-for-ownership request before the write",
- "MetricExpr": "EXE_ACTIVITY.BOUND_ON_STORES / tma_info_clks",
+ "MetricExpr": "EXE_ACTIVITY.BOUND_ON_STORES / tma_info_thread_clks",
"MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_store_bound",
"MetricThreshold": "tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -1588,7 +1868,7 @@
},
{
"BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores",
- "MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_clks",
+ "MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_store_fwd_blk",
"MetricThreshold": "tma_store_fwd_blk > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -1597,7 +1877,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles the CPU spent handling L1D store misses",
- "MetricExpr": "(MEM_STORE_RETIRED.L2_HIT * 10 * (1 - MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES) + (1 - MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES) * min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO)) / tma_info_clks",
+ "MetricExpr": "(MEM_STORE_RETIRED.L2_HIT * 10 * (1 - MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES) + (1 - MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES) * min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO)) / tma_info_thread_clks",
"MetricGroup": "MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_issueSL;tma_store_bound_group",
"MetricName": "tma_store_latency",
"MetricThreshold": "tma_store_latency > 0.1 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -1606,7 +1886,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Store operations",
- "MetricExpr": "(UOPS_DISPATCHED.PORT_4_9 + UOPS_DISPATCHED.PORT_7_8) / (4 * tma_info_core_clks)",
+ "MetricExpr": "(UOPS_DISPATCHED.PORT_4_9 + UOPS_DISPATCHED.PORT_7_8) / (4 * tma_info_core_core_clks)",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_store_op_utilization",
"MetricThreshold": "tma_store_op_utilization > 0.6",
@@ -1623,7 +1903,7 @@
},
{
"BriefDescription": "This metric estimates the fraction of cycles where the STLB was missed by store accesses, performing a hardware page walk",
- "MetricExpr": "DTLB_STORE_MISSES.WALK_ACTIVE / tma_info_core_clks",
+ "MetricExpr": "DTLB_STORE_MISSES.WALK_ACTIVE / tma_info_core_core_clks",
"MetricGroup": "MemoryTLB;TopdownL5;tma_L5_group;tma_dtlb_store_group",
"MetricName": "tma_store_stlb_miss",
"MetricThreshold": "tma_store_stlb_miss > 0.05 & (tma_dtlb_store > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
@@ -1631,7 +1911,7 @@
},
{
"BriefDescription": "This metric estimates how often CPU was stalled due to Streaming store memory accesses; Streaming store optimize out a read request required by RFO stores",
- "MetricExpr": "9 * OCR.STREAMING_WR.ANY_RESPONSE / tma_info_clks",
+ "MetricExpr": "9 * OCR.STREAMING_WR.ANY_RESPONSE / tma_info_thread_clks",
"MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueSmSt;tma_store_bound_group",
"MetricName": "tma_streaming_stores",
"MetricThreshold": "tma_streaming_stores > 0.2 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -1640,7 +1920,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to new branch address clears",
- "MetricExpr": "INT_MISC.UNKNOWN_BRANCH_CYCLES / tma_info_clks",
+ "MetricExpr": "INT_MISC.UNKNOWN_BRANCH_CYCLES / tma_info_thread_clks",
"MetricGroup": "BigFoot;FetchLat;TopdownL4;tma_L4_group;tma_branch_resteers_group",
"MetricName": "tma_unknown_branches",
"MetricThreshold": "tma_unknown_branches > 0.05 & (tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15))",
@@ -1658,30 +1938,42 @@
},
{
"BriefDescription": "Percentage of cycles in aborted transactions.",
- "MetricExpr": "max(cpu@cycles\\-t@ - cpu@cycles\\-ct@, 0) / cycles",
+ "MetricExpr": "(max(cycles\\-t - cycles\\-ct, 0) / cycles if has_event(cycles\\-t) else 0)",
"MetricGroup": "transaction",
"MetricName": "tsx_aborted_cycles",
"ScaleUnit": "100%"
},
{
"BriefDescription": "Number of cycles within a transaction divided by the number of elisions.",
- "MetricExpr": "cpu@cycles\\-t@ / cpu@el\\-start@",
+ "MetricExpr": "(cycles\\-t / el\\-start if has_event(cycles\\-t) else 0)",
"MetricGroup": "transaction",
"MetricName": "tsx_cycles_per_elision",
"ScaleUnit": "1cycles / elision"
},
{
"BriefDescription": "Number of cycles within a transaction divided by the number of transactions.",
- "MetricExpr": "cpu@cycles\\-t@ / cpu@tx\\-start@",
+ "MetricExpr": "(cycles\\-t / tx\\-start if has_event(cycles\\-t) else 0)",
"MetricGroup": "transaction",
"MetricName": "tsx_cycles_per_transaction",
"ScaleUnit": "1cycles / transaction"
},
{
"BriefDescription": "Percentage of cycles within a transaction region.",
- "MetricExpr": "cpu@cycles\\-t@ / cycles",
+ "MetricExpr": "(cycles\\-t / cycles if has_event(cycles\\-t) else 0)",
"MetricGroup": "transaction",
"MetricName": "tsx_transactional_cycles",
"ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Uncore operating frequency in GHz",
+ "MetricExpr": "UNC_CHA_CLOCKTICKS / (source_count(UNC_CHA_CLOCKTICKS) * #num_packages) / 1e9 / duration_time",
+ "MetricName": "uncore_frequency",
+ "ScaleUnit": "1GHz"
+ },
+ {
+ "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data transmit bandwidth (MB/sec)",
+ "MetricExpr": "UNC_UPI_TxL_FLITS.ALL_DATA * 7.111111111111111 / 1e6 / duration_time",
+ "MetricName": "upi_data_transmit_bw",
+ "ScaleUnit": "1MB/s"
}
]
diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-cache.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-cache.json
index b91cebf81f50..3fa660694bc7 100644
--- a/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-cache.json
+++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-cache.json
@@ -3157,6 +3157,23 @@
"Unit": "CHA"
},
{
+ "BriefDescription": "All requests issued from IA cores to CXL accelerator memory regions that hit the LLC.",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_HIT_CXL_ACC",
+ "PerPkg": "1",
+ "UMask": "0x10c0018101",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "UNC_CHA_TOR_INSERTS.IA_HIT_CXL_ACC_LOCAL",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_HIT_CXL_ACC_LOCAL",
+ "PerPkg": "1",
+ "PortMask": "0x000",
+ "UMask": "0x10c0008101",
+ "Unit": "CHA"
+ },
+ {
"BriefDescription": "TOR Inserts; DRd hits from local IA",
"EventCode": "0x35",
"EventName": "UNC_CHA_TOR_INSERTS.IA_HIT_DRD",
@@ -3372,6 +3389,23 @@
"Unit": "CHA"
},
{
+ "BriefDescription": "All requests issued from IA cores to CXL accelerator memory regions that miss the LLC.",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_CXL_ACC",
+ "PerPkg": "1",
+ "UMask": "0x10c0018201",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "UNC_CHA_TOR_INSERTS.IA_MISS_CXL_ACC_LOCAL",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_CXL_ACC_LOCAL",
+ "PerPkg": "1",
+ "PortMask": "0x000",
+ "UMask": "0x10c0008201",
+ "Unit": "CHA"
+ },
+ {
"BriefDescription": "TOR Inserts for DRd misses from local IA",
"EventCode": "0x35",
"EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD",
@@ -3398,6 +3432,23 @@
"Unit": "CHA"
},
{
+ "BriefDescription": "DRds issued from an IA core which miss the L3 and target memory in a CXL type 2 memory expander card.",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD_CXL_ACC",
+ "PerPkg": "1",
+ "UMask": "0x10c8178201",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD_CXL_ACC_LOCAL",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD_CXL_ACC_LOCAL",
+ "PerPkg": "1",
+ "PortMask": "0x000",
+ "UMask": "0x10c8168201",
+ "Unit": "CHA"
+ },
+ {
"BriefDescription": "TOR Inserts for DRds issued by IA Cores targeting DDR Mem that Missed the LLC",
"EventCode": "0x35",
"EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD_DDR",
@@ -3443,6 +3494,15 @@
"Unit": "CHA"
},
{
+ "BriefDescription": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT_CXL_ACC_LOCAL",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT_CXL_ACC_LOCAL",
+ "PerPkg": "1",
+ "PortMask": "0x000",
+ "UMask": "0x10c8268201",
+ "Unit": "CHA"
+ },
+ {
"BriefDescription": "TOR Inserts; DRd Opt Pref misses from local IA",
"EventCode": "0x35",
"EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT_PREF",
@@ -3452,6 +3512,15 @@
"Unit": "CHA"
},
{
+ "BriefDescription": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT_PREF_CXL_ACC_LOCAL",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT_PREF_CXL_ACC_LOCAL",
+ "PerPkg": "1",
+ "PortMask": "0x000",
+ "UMask": "0x10c8a68201",
+ "Unit": "CHA"
+ },
+ {
"BriefDescription": "TOR Inserts for DRds issued by iA Cores targeting PMM Mem that Missed the LLC",
"EventCode": "0x35",
"EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PMM",
@@ -3470,6 +3539,23 @@
"Unit": "CHA"
},
{
+ "BriefDescription": "L2 data prefetches issued from an IA core which miss the L3 and target memory in a CXL type 2 accelerator.",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_CXL_ACC",
+ "PerPkg": "1",
+ "UMask": "0x10c8978201",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_CXL_ACC_LOCAL",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_CXL_ACC_LOCAL",
+ "PerPkg": "1",
+ "PortMask": "0x000",
+ "UMask": "0x10c8968201",
+ "Unit": "CHA"
+ },
+ {
"BriefDescription": "TOR Inserts : DRd_Prefs issued by iA Cores targeting DDR Mem that Missed the LLC",
"EventCode": "0x35",
"EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_DDR",
@@ -3604,6 +3690,23 @@
"Unit": "CHA"
},
{
+ "BriefDescription": "LLC data prefetches issued from an IA core which miss the L3 and target memory in a CXL type 2 accelerator.",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA_CXL_ACC",
+ "PerPkg": "1",
+ "UMask": "0x10ccd78201",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA_CXL_ACC_LOCAL",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA_CXL_ACC_LOCAL",
+ "PerPkg": "1",
+ "PortMask": "0x000",
+ "UMask": "0x10ccd68201",
+ "Unit": "CHA"
+ },
+ {
"BriefDescription": "TOR Inserts; LLCPrefRFO misses from local IA",
"EventCode": "0x35",
"EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFRFO",
@@ -3613,6 +3716,23 @@
"Unit": "CHA"
},
{
+ "BriefDescription": "L2 RFO prefetches issued from an IA core which miss the L3 and target memory in a CXL type 2 accelerator.",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFRFO_CXL_ACC",
+ "PerPkg": "1",
+ "UMask": "0x10c8878201",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFRFO_CXL_ACC_LOCAL",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFRFO_CXL_ACC_LOCAL",
+ "PerPkg": "1",
+ "PortMask": "0x000",
+ "UMask": "0x10c8868201",
+ "Unit": "CHA"
+ },
+ {
"BriefDescription": "TOR Inserts : WCiLFs issued by iA Cores targeting DDR that missed the LLC - HOMed locally",
"EventCode": "0x35",
"EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_LOCAL_WCILF_DDR",
@@ -3702,6 +3822,23 @@
"Unit": "CHA"
},
{
+ "BriefDescription": "RFOs issued from an IA core which miss the L3 and target memory in a CXL type 2 accelerator.",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_RFO_CXL_ACC",
+ "PerPkg": "1",
+ "UMask": "0x10c8078201",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "UNC_CHA_TOR_INSERTS.IA_MISS_RFO_CXL_ACC_LOCAL",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_RFO_CXL_ACC_LOCAL",
+ "PerPkg": "1",
+ "PortMask": "0x000",
+ "UMask": "0x10c8068201",
+ "Unit": "CHA"
+ },
+ {
"BriefDescription": "TOR Inserts RFO misses from local IA",
"EventCode": "0x35",
"EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_RFO_LOCAL",
@@ -3720,6 +3857,23 @@
"Unit": "CHA"
},
{
+ "BriefDescription": "LLC RFO prefetches issued from an IA core which miss the L3 and target memory in a CXL type 2 accelerator.",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_RFO_PREF_CXL_ACC",
+ "PerPkg": "1",
+ "UMask": "0x10ccc78201",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "UNC_CHA_TOR_INSERTS.IA_MISS_RFO_PREF_CXL_ACC_LOCAL",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_RFO_PREF_CXL_ACC_LOCAL",
+ "PerPkg": "1",
+ "PortMask": "0x000",
+ "UMask": "0x10ccc68201",
+ "Unit": "CHA"
+ },
+ {
"BriefDescription": "TOR Inserts; RFO prefetch misses from local IA",
"EventCode": "0x35",
"EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_RFO_PREF_LOCAL",
@@ -4428,6 +4582,23 @@
"Unit": "CHA"
},
{
+ "BriefDescription": "TOR Occupancy for All requests issued from IA cores to CXL accelerator memory regions that hit the LLC.",
+ "EventCode": "0x36",
+ "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_HIT_CXL_ACC",
+ "PerPkg": "1",
+ "UMask": "0x10c0018101",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "UNC_CHA_TOR_OCCUPANCY.IA_HIT_CXL_ACC_LOCAL",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_HIT_CXL_ACC_LOCAL",
+ "PerPkg": "1",
+ "PortMask": "0x000",
+ "UMask": "0x10c0008101",
+ "Unit": "CHA"
+ },
+ {
"BriefDescription": "TOR Occupancy; DRd hits from local IA",
"EventCode": "0x36",
"EventName": "UNC_CHA_TOR_OCCUPANCY.IA_HIT_DRD",
@@ -4645,6 +4816,23 @@
"Unit": "CHA"
},
{
+ "BriefDescription": "TOR Occupancy for All requests issued from IA cores to CXL accelerator memory regions that miss the LLC.",
+ "EventCode": "0x36",
+ "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_CXL_ACC",
+ "PerPkg": "1",
+ "UMask": "0x10c0018201",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_CXL_ACC_LOCAL",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_CXL_ACC_LOCAL",
+ "PerPkg": "1",
+ "PortMask": "0x000",
+ "UMask": "0x10c0008201",
+ "Unit": "CHA"
+ },
+ {
"BriefDescription": "TOR Occupancy for DRd misses from local IA",
"EventCode": "0x36",
"EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD",
@@ -4673,6 +4861,23 @@
"Unit": "CHA"
},
{
+ "BriefDescription": "TOR Occupancy for DRds and equivalent opcodes issued from an IA core which miss the L3 and target memory in a CXL type 2 memory expander card.",
+ "EventCode": "0x36",
+ "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_CXL_ACC",
+ "PerPkg": "1",
+ "UMask": "0x10c8178201",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_CXL_ACC_LOCAL",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_CXL_ACC_LOCAL",
+ "PerPkg": "1",
+ "PortMask": "0x000",
+ "UMask": "0x10c8168201",
+ "Unit": "CHA"
+ },
+ {
"BriefDescription": "TOR Occupancy for DRds issued by iA Cores targeting DDR Mem that Missed the LLC",
"EventCode": "0x36",
"EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_DDR",
@@ -4718,6 +4923,15 @@
"Unit": "CHA"
},
{
+ "BriefDescription": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_OPT_CXL_ACC_LOCAL",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_OPT_CXL_ACC_LOCAL",
+ "PerPkg": "1",
+ "PortMask": "0x000",
+ "UMask": "0x10c8268201",
+ "Unit": "CHA"
+ },
+ {
"BriefDescription": "TOR Occupancy; DRd Opt Pref misses from local IA",
"EventCode": "0x36",
"EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_OPT_PREF",
@@ -4727,6 +4941,15 @@
"Unit": "CHA"
},
{
+ "BriefDescription": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_OPT_PREF_CXL_ACC_LOCAL",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_OPT_PREF_CXL_ACC_LOCAL",
+ "PerPkg": "1",
+ "PortMask": "0x000",
+ "UMask": "0x10c8a68201",
+ "Unit": "CHA"
+ },
+ {
"BriefDescription": "TOR Occupancy for DRds issued by iA Cores targeting PMM Mem that Missed the LLC",
"EventCode": "0x36",
"EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_PMM",
@@ -4745,6 +4968,23 @@
"Unit": "CHA"
},
{
+ "BriefDescription": "TOR Occupancy for L2 data prefetches issued from an IA core which miss the L3 and target memory in a CXL type 2 accelerator.",
+ "EventCode": "0x36",
+ "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_PREF_CXL_ACC",
+ "PerPkg": "1",
+ "UMask": "0x10c8978201",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_PREF_CXL_ACC_LOCAL",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_PREF_CXL_ACC_LOCAL",
+ "PerPkg": "1",
+ "PortMask": "0x000",
+ "UMask": "0x10c8968201",
+ "Unit": "CHA"
+ },
+ {
"BriefDescription": "TOR Occupancy : DRd_Prefs issued by iA Cores targeting DDR Mem that Missed the LLC",
"EventCode": "0x36",
"EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_PREF_DDR",
@@ -4879,6 +5119,23 @@
"Unit": "CHA"
},
{
+ "BriefDescription": "TOR Occupancy for LLC data prefetches issued from an IA core which miss the L3 and target memory in a CXL type 2 accelerator.",
+ "EventCode": "0x36",
+ "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_LLCPREFDATA_CXL_ACC",
+ "PerPkg": "1",
+ "UMask": "0x10ccd78201",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_LLCPREFDATA_CXL_ACC_LOCAL",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_LLCPREFDATA_CXL_ACC_LOCAL",
+ "PerPkg": "1",
+ "PortMask": "0x000",
+ "UMask": "0x10ccd68201",
+ "Unit": "CHA"
+ },
+ {
"BriefDescription": "TOR Occupancy; LLCPrefRFO misses from local IA",
"EventCode": "0x36",
"EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_LLCPREFRFO",
@@ -4888,6 +5145,23 @@
"Unit": "CHA"
},
{
+ "BriefDescription": "TOR Occupancy for L2 RFO prefetches issued from an IA core which miss the L3 and target memory in a CXL type 2 accelerator.",
+ "EventCode": "0x36",
+ "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_LLCPREFRFO_CXL_ACC",
+ "PerPkg": "1",
+ "UMask": "0x10c8878201",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_LLCPREFRFO_CXL_ACC_LOCAL",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_LLCPREFRFO_CXL_ACC_LOCAL",
+ "PerPkg": "1",
+ "PortMask": "0x000",
+ "UMask": "0x10c8868201",
+ "Unit": "CHA"
+ },
+ {
"BriefDescription": "TOR Occupancy : WCiLFs issued by iA Cores targeting DDR that missed the LLC - HOMed locally",
"EventCode": "0x36",
"EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_LOCAL_WCILF_DDR",
@@ -4977,6 +5251,23 @@
"Unit": "CHA"
},
{
+ "BriefDescription": "TOR Occupancy for RFOs issued from an IA core which miss the L3 and target memory in a CXL type 2 accelerator.",
+ "EventCode": "0x36",
+ "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_RFO_CXL_ACC",
+ "PerPkg": "1",
+ "UMask": "0x10c8078201",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_RFO_CXL_ACC_LOCAL",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_RFO_CXL_ACC_LOCAL",
+ "PerPkg": "1",
+ "PortMask": "0x000",
+ "UMask": "0x10c8068201",
+ "Unit": "CHA"
+ },
+ {
"BriefDescription": "TOR Occupancy; RFO misses from local IA",
"EventCode": "0x36",
"EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_RFO_LOCAL",
@@ -4995,6 +5286,23 @@
"Unit": "CHA"
},
{
+ "BriefDescription": "TOR Occupancy for LLC RFO prefetches issued from an IA core which miss the L3 and target memory in a CXL type 2 accelerator.",
+ "EventCode": "0x36",
+ "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_RFO_PREF_CXL_ACC",
+ "PerPkg": "1",
+ "UMask": "0x10ccc78201",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_RFO_PREF_CXL_ACC_LOCAL",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_RFO_PREF_CXL_ACC_LOCAL",
+ "PerPkg": "1",
+ "PortMask": "0x000",
+ "UMask": "0x10ccc68201",
+ "Unit": "CHA"
+ },
+ {
"BriefDescription": "TOR Occupancy; RFO prefetch misses from local IA",
"EventCode": "0x36",
"EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_RFO_PREF_LOCAL",
diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-interconnect.json
index 08faf38115d9..09d840c7da4c 100644
--- a/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-interconnect.json
+++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-interconnect.json
@@ -464,7 +464,7 @@
"Unit": "M2M"
},
{
- "BriefDescription": "Counts the time when FM didn? do d2c for fill reads (cross tile case)",
+ "BriefDescription": "Counts the time when FM didn't do d2c for fill reads (cross tile case)",
"EventCode": "0x4a",
"EventName": "UNC_M2M_DIRECT2CORE_NOT_TAKEN_NOTFORKED",
"PerPkg": "1",
@@ -3326,7 +3326,7 @@
"EventCode": "0x50",
"EventName": "UNC_M3UPI_RxC_HELD.PARALLEL_SUCCESS",
"PerPkg": "1",
- "PublicDescription": "Message Held : Parallel Success : ad and bl messages were actually slotted into the same flit in paralle",
+ "PublicDescription": "Message Held : Parallel Success : ad and bl messages were actually slotted into the same flit in parallel",
"UMask": "0x8",
"Unit": "M3UPI"
},
diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-memory.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-memory.json
index 225333561295..3ff9e9b722c8 100644
--- a/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-memory.json
+++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-memory.json
@@ -2480,11 +2480,11 @@
"Unit": "iMC"
},
{
- "BriefDescription": "DRAM Precharge commands. : Precharge due to (?)",
+ "BriefDescription": "DRAM Precharge commands",
"EventCode": "0x03",
"EventName": "UNC_M_PRE_COUNT.PGT",
"PerPkg": "1",
- "PublicDescription": "DRAM Precharge commands. : Precharge due to (?) : Counts the number of DRAM Precharge commands sent on this channel.",
+ "PublicDescription": "DRAM Precharge commands. Counts the number of DRAM Precharge commands sent on this channel.",
"UMask": "0x88",
"Unit": "iMC"
},
@@ -3236,7 +3236,7 @@
"Unit": "iMC"
},
{
- "BriefDescription": "2LM Tag check hit due to memory read (bug?)",
+ "BriefDescription": "2LM Tag check hit due to memory read",
"EventCode": "0xd3",
"EventName": "UNC_M_TAGCHK.NM_RD_HIT",
"PerPkg": "1",
@@ -3244,7 +3244,7 @@
"Unit": "iMC"
},
{
- "BriefDescription": "2LM Tag check hit due to memory write (bug?)",
+ "BriefDescription": "2LM Tag check hit due to memory write",
"EventCode": "0xd3",
"EventName": "UNC_M_TAGCHK.NM_WR_HIT",
"PerPkg": "1",
diff --git a/tools/perf/pmu-events/arch/x86/skylake/floating-point.json b/tools/perf/pmu-events/arch/x86/skylake/floating-point.json
index 4d494a5cabbf..5891bd74af60 100644
--- a/tools/perf/pmu-events/arch/x86/skylake/floating-point.json
+++ b/tools/perf/pmu-events/arch/x86/skylake/floating-point.json
@@ -32,6 +32,14 @@
"UMask": "0x20"
},
{
+ "BriefDescription": "Number of SSE/AVX computational 128-bit packed single and 256-bit packed double precision FP instructions retired; some instructions will count twice as noted below. Each count represents 2 or/and 4 computation operations, 1 for each element. Applies to SSE* and AVX* packed single precision and packed double precision FP instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB count twice as they perform 2 calculations per element.",
+ "EventCode": "0xC7",
+ "EventName": "FP_ARITH_INST_RETIRED.4_FLOPS",
+ "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision and 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 or/and 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point and packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x18"
+ },
+ {
"BriefDescription": "Counts once for most SIMD scalar computational floating-point instructions retired. Counts twice for DPP and FM(N)ADD/SUB instructions retired.",
"EventCode": "0xC7",
"EventName": "FP_ARITH_INST_RETIRED.SCALAR",
diff --git a/tools/perf/pmu-events/arch/x86/skylake/frontend.json b/tools/perf/pmu-events/arch/x86/skylake/frontend.json
index 04f08e4d2402..095904c77001 100644
--- a/tools/perf/pmu-events/arch/x86/skylake/frontend.json
+++ b/tools/perf/pmu-events/arch/x86/skylake/frontend.json
@@ -8,6 +8,14 @@
"UMask": "0x1"
},
{
+ "BriefDescription": "Stalls caused by changing prefix length of the instruction. [This event is alias to ILD_STALL.LCP]",
+ "EventCode": "0x87",
+ "EventName": "DECODE.LCP",
+ "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk. [This event is alias to ILD_STALL.LCP]",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
+ },
+ {
"BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switches",
"EventCode": "0xAB",
"EventName": "DSB2MITE_SWITCHES.COUNT",
@@ -245,27 +253,34 @@
"UMask": "0x2"
},
{
- "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss.",
+ "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss. [This event is alias to ICACHE_TAG.STALLS]",
"EventCode": "0x83",
"EventName": "ICACHE_64B.IFTAG_STALL",
"SampleAfterValue": "200003",
"UMask": "0x4"
},
{
- "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops",
+ "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss. [This event is alias to ICACHE_64B.IFTAG_STALL]",
+ "EventCode": "0x83",
+ "EventName": "ICACHE_TAG.STALLS",
+ "SampleAfterValue": "200003",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops [This event is alias to IDQ.DSB_CYCLES_OK]",
"CounterMask": "4",
"EventCode": "0x79",
"EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS",
- "PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.",
+ "PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ. [This event is alias to IDQ.DSB_CYCLES_OK]",
"SampleAfterValue": "2000003",
"UMask": "0x18"
},
{
- "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop",
+ "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop [This event is alias to IDQ.DSB_CYCLES_ANY]",
"CounterMask": "1",
"EventCode": "0x79",
"EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS",
- "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.",
+ "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ. [This event is alias to IDQ.DSB_CYCLES_ANY]",
"SampleAfterValue": "2000003",
"UMask": "0x18"
},
@@ -297,6 +312,24 @@
"UMask": "0x8"
},
{
+ "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop [This event is alias to IDQ.ALL_DSB_CYCLES_ANY_UOPS]",
+ "CounterMask": "1",
+ "EventCode": "0x79",
+ "EventName": "IDQ.DSB_CYCLES_ANY",
+ "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ. [This event is alias to IDQ.ALL_DSB_CYCLES_ANY_UOPS]",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x18"
+ },
+ {
+ "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops [This event is alias to IDQ.ALL_DSB_CYCLES_4_UOPS]",
+ "CounterMask": "4",
+ "EventCode": "0x79",
+ "EventName": "IDQ.DSB_CYCLES_OK",
+ "PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ. [This event is alias to IDQ.ALL_DSB_CYCLES_4_UOPS]",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x18"
+ },
+ {
"BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path",
"EventCode": "0x79",
"EventName": "IDQ.DSB_UOPS",
diff --git a/tools/perf/pmu-events/arch/x86/skylake/metricgroups.json b/tools/perf/pmu-events/arch/x86/skylake/metricgroups.json
new file mode 100644
index 000000000000..a151ba9cccb0
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/skylake/metricgroups.json
@@ -0,0 +1,113 @@
+{
+ "Backend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Bad": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BadSpec": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BigFoot": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BrMispredicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Branches": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "CacheMisses": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "CodeGen": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Compute": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Cor": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "DSB": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "DSBmiss": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "DataSharing": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Fed": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FetchBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FetchLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Flops": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FpScalar": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FpVector": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Frontend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "HPC": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "IcMiss": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "InsType": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "L2Evicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "LSD": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MachineClears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryBound": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryTLB": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Memory_BW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Memory_Lat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MicroSeq": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "OS": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Offcore": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "PGO": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Pipeline": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "PortsUtil": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Power": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Prefetches": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Ret": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Retire": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "SMT": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Server": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Snoop": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "SoC": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Summary": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TmaL1": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TmaL2": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TmaL3mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TopdownL1": "Metrics for top-down breakdown at level 1",
+ "TopdownL2": "Metrics for top-down breakdown at level 2",
+ "TopdownL3": "Metrics for top-down breakdown at level 3",
+ "TopdownL4": "Metrics for top-down breakdown at level 4",
+ "TopdownL5": "Metrics for top-down breakdown at level 5",
+ "TopdownL6": "Metrics for top-down breakdown at level 6",
+ "tma_L1_group": "Metrics for top-down breakdown at level 1",
+ "tma_L2_group": "Metrics for top-down breakdown at level 2",
+ "tma_L3_group": "Metrics for top-down breakdown at level 3",
+ "tma_L4_group": "Metrics for top-down breakdown at level 4",
+ "tma_L5_group": "Metrics for top-down breakdown at level 5",
+ "tma_L6_group": "Metrics for top-down breakdown at level 6",
+ "tma_alu_op_utilization_group": "Metrics contributing to tma_alu_op_utilization category",
+ "tma_backend_bound_group": "Metrics contributing to tma_backend_bound category",
+ "tma_bad_speculation_group": "Metrics contributing to tma_bad_speculation category",
+ "tma_branch_resteers_group": "Metrics contributing to tma_branch_resteers category",
+ "tma_core_bound_group": "Metrics contributing to tma_core_bound category",
+ "tma_dram_bound_group": "Metrics contributing to tma_dram_bound category",
+ "tma_dtlb_load_group": "Metrics contributing to tma_dtlb_load category",
+ "tma_dtlb_store_group": "Metrics contributing to tma_dtlb_store category",
+ "tma_fetch_bandwidth_group": "Metrics contributing to tma_fetch_bandwidth category",
+ "tma_fetch_latency_group": "Metrics contributing to tma_fetch_latency category",
+ "tma_fp_arith_group": "Metrics contributing to tma_fp_arith category",
+ "tma_fp_vector_group": "Metrics contributing to tma_fp_vector category",
+ "tma_frontend_bound_group": "Metrics contributing to tma_frontend_bound category",
+ "tma_heavy_operations_group": "Metrics contributing to tma_heavy_operations category",
+ "tma_issue2P": "Metrics related by the issue $issue2P",
+ "tma_issueBC": "Metrics related by the issue $issueBC",
+ "tma_issueBM": "Metrics related by the issue $issueBM",
+ "tma_issueBW": "Metrics related by the issue $issueBW",
+ "tma_issueD0": "Metrics related by the issue $issueD0",
+ "tma_issueFB": "Metrics related by the issue $issueFB",
+ "tma_issueFL": "Metrics related by the issue $issueFL",
+ "tma_issueL1": "Metrics related by the issue $issueL1",
+ "tma_issueLat": "Metrics related by the issue $issueLat",
+ "tma_issueMC": "Metrics related by the issue $issueMC",
+ "tma_issueMS": "Metrics related by the issue $issueMS",
+ "tma_issueMV": "Metrics related by the issue $issueMV",
+ "tma_issueRFO": "Metrics related by the issue $issueRFO",
+ "tma_issueSL": "Metrics related by the issue $issueSL",
+ "tma_issueSO": "Metrics related by the issue $issueSO",
+ "tma_issueSmSt": "Metrics related by the issue $issueSmSt",
+ "tma_issueSpSt": "Metrics related by the issue $issueSpSt",
+ "tma_issueSyncxn": "Metrics related by the issue $issueSyncxn",
+ "tma_issueTLB": "Metrics related by the issue $issueTLB",
+ "tma_l1_bound_group": "Metrics contributing to tma_l1_bound category",
+ "tma_l3_bound_group": "Metrics contributing to tma_l3_bound category",
+ "tma_light_operations_group": "Metrics contributing to tma_light_operations category",
+ "tma_load_op_utilization_group": "Metrics contributing to tma_load_op_utilization category",
+ "tma_mem_latency_group": "Metrics contributing to tma_mem_latency category",
+ "tma_memory_bound_group": "Metrics contributing to tma_memory_bound category",
+ "tma_microcode_sequencer_group": "Metrics contributing to tma_microcode_sequencer category",
+ "tma_mite_group": "Metrics contributing to tma_mite category",
+ "tma_ports_utilization_group": "Metrics contributing to tma_ports_utilization category",
+ "tma_ports_utilized_0_group": "Metrics contributing to tma_ports_utilized_0 category",
+ "tma_ports_utilized_3m_group": "Metrics contributing to tma_ports_utilized_3m category",
+ "tma_retiring_group": "Metrics contributing to tma_retiring category",
+ "tma_serializing_operation_group": "Metrics contributing to tma_serializing_operation category",
+ "tma_store_bound_group": "Metrics contributing to tma_store_bound category",
+ "tma_store_op_utilization_group": "Metrics contributing to tma_store_op_utilization category"
+}
diff --git a/tools/perf/pmu-events/arch/x86/skylake/pipeline.json b/tools/perf/pmu-events/arch/x86/skylake/pipeline.json
index 2dfc3af08eff..cd3e737bf4a1 100644
--- a/tools/perf/pmu-events/arch/x86/skylake/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/skylake/pipeline.json
@@ -26,12 +26,21 @@
"UMask": "0x4"
},
{
- "BriefDescription": "Conditional branch instructions retired.",
+ "BriefDescription": "Conditional branch instructions retired. [This event is alias to BR_INST_RETIRED.CONDITIONAL]",
+ "Errata": "SKL091",
+ "EventCode": "0xC4",
+ "EventName": "BR_INST_RETIRED.COND",
+ "PublicDescription": "This event counts conditional branch instructions retired. [This event is alias to BR_INST_RETIRED.CONDITIONAL]",
+ "SampleAfterValue": "400009",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Conditional branch instructions retired. [This event is alias to BR_INST_RETIRED.COND]",
"Errata": "SKL091",
"EventCode": "0xC4",
"EventName": "BR_INST_RETIRED.CONDITIONAL",
"PEBS": "1",
- "PublicDescription": "This event counts conditional branch instructions retired.",
+ "PublicDescription": "This event counts conditional branch instructions retired. [This event is alias to BR_INST_RETIRED.COND]",
"SampleAfterValue": "400009",
"UMask": "0x1"
},
@@ -343,10 +352,10 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Stalls caused by changing prefix length of the instruction.",
+ "BriefDescription": "Stalls caused by changing prefix length of the instruction. [This event is alias to DECODE.LCP]",
"EventCode": "0x87",
"EventName": "ILD_STALL.LCP",
- "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.",
+ "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk. [This event is alias to DECODE.LCP]",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -405,9 +414,9 @@
"UMask": "0x1"
},
{
- "AnyThread": "1",
"BriefDescription": "Clears speculative count",
"CounterMask": "1",
+ "EdgeDetect": "1",
"EventCode": "0x0D",
"EventName": "INT_MISC.CLEARS_COUNT",
"PublicDescription": "Counts the number of speculative clears due to any type of branch misprediction or machine clears",
@@ -470,11 +479,11 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.",
+ "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder. [This event is alias to LSD.CYCLES_OK]",
"CounterMask": "4",
"EventCode": "0xA8",
"EventName": "LSD.CYCLES_4_UOPS",
- "PublicDescription": "Counts the cycles when 4 uops are delivered by the LSD (Loop-stream detector).",
+ "PublicDescription": "Counts the cycles when 4 uops are delivered by the LSD (Loop-stream detector). [This event is alias to LSD.CYCLES_OK]",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -488,6 +497,15 @@
"UMask": "0x1"
},
{
+ "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder. [This event is alias to LSD.CYCLES_4_UOPS]",
+ "CounterMask": "4",
+ "EventCode": "0xA8",
+ "EventName": "LSD.CYCLES_OK",
+ "PublicDescription": "Counts the cycles when 4 uops are delivered by the LSD (Loop-stream detector). [This event is alias to LSD.CYCLES_4_UOPS]",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
+ },
+ {
"BriefDescription": "Number of Uops delivered by the LSD.",
"EventCode": "0xA8",
"EventName": "LSD.UOPS",
diff --git a/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json b/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json
index 21ef6c9be816..94cb38540b5a 100644
--- a/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json
@@ -50,7 +50,7 @@
},
{
"BriefDescription": "Uncore frequency per die [GHZ]",
- "MetricExpr": "tma_info_socket_clks / #num_dies / duration_time / 1e9",
+ "MetricExpr": "tma_info_system_socket_clks / #num_dies / duration_time / 1e9",
"MetricGroup": "SoC",
"MetricName": "UNCORE_FREQ"
},
@@ -71,7 +71,7 @@
},
{
"BriefDescription": "This metric estimates how often memory load accesses were aliased by preceding stores (in program order) with a 4K address offset",
- "MetricExpr": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS / tma_info_clks",
+ "MetricExpr": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_4k_aliasing",
"MetricThreshold": "tma_4k_aliasing > 0.2 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -80,7 +80,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution ports for ALU operations.",
- "MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_0 + UOPS_DISPATCHED_PORT.PORT_1 + UOPS_DISPATCHED_PORT.PORT_5 + UOPS_DISPATCHED_PORT.PORT_6) / tma_info_slots",
+ "MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_0 + UOPS_DISPATCHED_PORT.PORT_1 + UOPS_DISPATCHED_PORT.PORT_5 + UOPS_DISPATCHED_PORT.PORT_6) / tma_info_thread_slots",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_alu_op_utilization",
"MetricThreshold": "tma_alu_op_utilization > 0.6",
@@ -88,7 +88,7 @@
},
{
"BriefDescription": "This metric estimates fraction of slots the CPU retired uops delivered by the Microcode_Sequencer as a result of Assists",
- "MetricExpr": "100 * (FP_ASSIST.ANY + OTHER_ASSISTS.ANY) / tma_info_slots",
+ "MetricExpr": "100 * (FP_ASSIST.ANY + OTHER_ASSISTS.ANY) / tma_info_thread_slots",
"MetricGroup": "TopdownL4;tma_L4_group;tma_microcode_sequencer_group",
"MetricName": "tma_assists",
"MetricThreshold": "tma_assists > 0.1 & (tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1)",
@@ -97,7 +97,7 @@
},
{
"BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
- "MetricExpr": "1 - tma_frontend_bound - (UOPS_ISSUED.ANY + 4 * (INT_MISC.RECOVERY_CYCLES_ANY / 2 if #SMT_on else INT_MISC.RECOVERY_CYCLES)) / tma_info_slots",
+ "MetricExpr": "1 - tma_frontend_bound - (UOPS_ISSUED.ANY + 4 * (INT_MISC.RECOVERY_CYCLES_ANY / 2 if #SMT_on else INT_MISC.RECOVERY_CYCLES)) / tma_info_thread_slots",
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
"MetricThreshold": "tma_backend_bound > 0.2",
@@ -107,7 +107,7 @@
},
{
"BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations",
- "MetricExpr": "(UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (INT_MISC.RECOVERY_CYCLES_ANY / 2 if #SMT_on else INT_MISC.RECOVERY_CYCLES)) / tma_info_slots",
+ "MetricExpr": "(UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (INT_MISC.RECOVERY_CYCLES_ANY / 2 if #SMT_on else INT_MISC.RECOVERY_CYCLES)) / tma_info_thread_slots",
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_bad_speculation",
"MetricThreshold": "tma_bad_speculation > 0.15",
@@ -123,12 +123,12 @@
"MetricName": "tma_branch_mispredicts",
"MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15",
"MetricgroupNoGroup": "TopdownL2",
- "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES. Related metrics: tma_info_branch_misprediction_cost, tma_info_mispredictions, tma_mispredicts_resteers",
+ "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES. Related metrics: tma_info_bad_spec_branch_misprediction_cost, tma_info_bottleneck_mispredictions, tma_mispredicts_resteers",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers",
- "MetricExpr": "INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_clks + tma_unknown_branches",
+ "MetricExpr": "INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_thread_clks + tma_unknown_branches",
"MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_branch_resteers",
"MetricThreshold": "tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -146,7 +146,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers as a result of Machine Clears",
- "MetricExpr": "(1 - BR_MISP_RETIRED.ALL_BRANCHES / (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT)) * INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_clks",
+ "MetricExpr": "(1 - BR_MISP_RETIRED.ALL_BRANCHES / (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT)) * INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_thread_clks",
"MetricGroup": "BadSpec;MachineClears;TopdownL4;tma_L4_group;tma_branch_resteers_group;tma_issueMC",
"MetricName": "tma_clears_resteers",
"MetricThreshold": "tma_clears_resteers > 0.05 & (tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15))",
@@ -156,7 +156,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to contested accesses",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(18.5 * tma_info_average_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM + 16.5 * tma_info_average_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_clks",
+ "MetricExpr": "(18.5 * tma_info_system_average_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM + 16.5 * tma_info_system_average_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
"MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
"MetricName": "tma_contested_accesses",
"MetricThreshold": "tma_contested_accesses > 0.05 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -177,7 +177,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to data-sharing accesses",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "16.5 * tma_info_average_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_clks",
+ "MetricExpr": "16.5 * tma_info_system_average_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
"MetricGroup": "Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
"MetricName": "tma_data_sharing",
"MetricThreshold": "tma_data_sharing > 0.05 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -186,16 +186,16 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where decoder-0 was the only active decoder",
- "MetricExpr": "(cpu@INST_DECODED.DECODERS\\,cmask\\=1@ - cpu@INST_DECODED.DECODERS\\,cmask\\=2@) / tma_info_core_clks / 2",
+ "MetricExpr": "(cpu@INST_DECODED.DECODERS\\,cmask\\=1@ - cpu@INST_DECODED.DECODERS\\,cmask\\=2@) / tma_info_core_core_clks / 2",
"MetricGroup": "DSBmiss;FetchBW;TopdownL4;tma_L4_group;tma_issueD0;tma_mite_group",
"MetricName": "tma_decoder0_alone",
- "MetricThreshold": "tma_decoder0_alone > 0.1 & (tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35))",
+ "MetricThreshold": "tma_decoder0_alone > 0.1 & (tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35))",
"PublicDescription": "This metric represents fraction of cycles where decoder-0 was the only active decoder. Related metrics: tma_few_uops_instructions",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of cycles where the Divider unit was active",
- "MetricExpr": "ARITH.DIVIDER_ACTIVE / tma_info_clks",
+ "MetricExpr": "ARITH.DIVIDER_ACTIVE / tma_info_thread_clks",
"MetricGroup": "TopdownL3;tma_L3_group;tma_core_bound_group",
"MetricName": "tma_divider",
"MetricThreshold": "tma_divider > 0.2 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
@@ -205,7 +205,7 @@
{
"BriefDescription": "This metric estimates how often the CPU was stalled on accesses to external memory (DRAM) by loads",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_clks - tma_l2_bound",
+ "MetricExpr": "CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks - tma_l2_bound",
"MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_dram_bound",
"MetricThreshold": "tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -214,45 +214,45 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline",
- "MetricExpr": "(IDQ.ALL_DSB_CYCLES_ANY_UOPS - IDQ.ALL_DSB_CYCLES_4_UOPS) / tma_info_core_clks / 2",
+ "MetricExpr": "(IDQ.ALL_DSB_CYCLES_ANY_UOPS - IDQ.ALL_DSB_CYCLES_4_UOPS) / tma_info_core_core_clks / 2",
"MetricGroup": "DSB;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_dsb",
- "MetricThreshold": "tma_dsb > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35)",
+ "MetricThreshold": "tma_dsb > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35)",
"PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline. For example; inefficient utilization of the DSB cache structure or bank conflict when reading from it; are categorized here.",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines",
- "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / tma_info_clks",
+ "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / tma_info_thread_clks",
"MetricGroup": "DSBmiss;FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB",
"MetricName": "tma_dsb_switches",
"MetricThreshold": "tma_dsb_switches > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
- "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines. The DSB (decoded i-cache) is a Uop Cache where the front-end directly delivers Uops (micro operations) avoiding heavy x86 decoding. The DSB pipeline has shorter latency and delivered higher bandwidth than the MITE (legacy instruction decode pipeline). Switching between the two pipelines can cause penalties hence this metric measures the exposed penalty. Sample with: FRONTEND_RETIRED.DSB_MISS_PS. Related metrics: tma_fetch_bandwidth, tma_info_dsb_coverage, tma_info_dsb_misses, tma_info_iptb, tma_lcp",
+ "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines. The DSB (decoded i-cache) is a Uop Cache where the front-end directly delivers Uops (micro operations) avoiding heavy x86 decoding. The DSB pipeline has shorter latency and delivered higher bandwidth than the MITE (legacy instruction decode pipeline). Switching between the two pipelines can cause penalties hence this metric measures the exposed penalty. Sample with: FRONTEND_RETIRED.DSB_MISS_PS. Related metrics: tma_fetch_bandwidth, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses",
"MetricConstraint": "NO_GROUP_EVENTS_NMI",
- "MetricExpr": "min(9 * cpu@DTLB_LOAD_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_LOAD_MISSES.WALK_ACTIVE, max(CYCLE_ACTIVITY.CYCLES_MEM_ANY - CYCLE_ACTIVITY.CYCLES_L1D_MISS, 0)) / tma_info_clks",
+ "MetricExpr": "min(9 * cpu@DTLB_LOAD_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_LOAD_MISSES.WALK_ACTIVE, max(CYCLE_ACTIVITY.CYCLES_MEM_ANY - CYCLE_ACTIVITY.CYCLES_L1D_MISS, 0)) / tma_info_thread_clks",
"MetricGroup": "MemoryTLB;TopdownL4;tma_L4_group;tma_issueTLB;tma_l1_bound_group",
"MetricName": "tma_dtlb_load",
"MetricThreshold": "tma_dtlb_load > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses. TLBs (Translation Look-aside Buffers) are processor caches for recently used entries out of the Page Tables that are used to map virtual- to physical-addresses by the operating system. This metric approximates the potential delay of demand loads missing the first-level data TLB (assuming worst case scenario with back to back misses to different pages). This includes hitting in the second-level TLB (STLB) as well as performing a hardware page walk on an STLB miss. Sample with: MEM_INST_RETIRED.STLB_MISS_LOADS_PS. Related metrics: tma_dtlb_store, tma_info_memory_data_tlbs",
+ "PublicDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses. TLBs (Translation Look-aside Buffers) are processor caches for recently used entries out of the Page Tables that are used to map virtual- to physical-addresses by the operating system. This metric approximates the potential delay of demand loads missing the first-level data TLB (assuming worst case scenario with back to back misses to different pages). This includes hitting in the second-level TLB (STLB) as well as performing a hardware page walk on an STLB miss. Sample with: MEM_INST_RETIRED.STLB_MISS_LOADS_PS. Related metrics: tma_dtlb_store, tma_info_bottleneck_memory_data_tlbs",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses",
- "MetricExpr": "(9 * cpu@DTLB_STORE_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_STORE_MISSES.WALK_ACTIVE) / tma_info_core_clks",
+ "MetricExpr": "(9 * cpu@DTLB_STORE_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_STORE_MISSES.WALK_ACTIVE) / tma_info_core_core_clks",
"MetricGroup": "MemoryTLB;TopdownL4;tma_L4_group;tma_issueTLB;tma_store_bound_group",
"MetricName": "tma_dtlb_store",
"MetricThreshold": "tma_dtlb_store > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses. As with ordinary data caching; focus on improving data locality and reducing working-set size to reduce DTLB overhead. Additionally; consider using profile-guided optimization (PGO) to collocate frequently-used data on the same page. Try using larger page sizes for large amounts of frequently-used data. Sample with: MEM_INST_RETIRED.STLB_MISS_STORES_PS. Related metrics: tma_dtlb_load, tma_info_memory_data_tlbs",
+ "PublicDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses. As with ordinary data caching; focus on improving data locality and reducing working-set size to reduce DTLB overhead. Additionally; consider using profile-guided optimization (PGO) to collocate frequently-used data on the same page. Try using larger page sizes for large amounts of frequently-used data. Sample with: MEM_INST_RETIRED.STLB_MISS_STORES_PS. Related metrics: tma_dtlb_load, tma_info_bottleneck_memory_data_tlbs",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric roughly estimates how often CPU was handling synchronizations due to False Sharing",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "22 * tma_info_average_frequency * OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HITM / tma_info_clks",
+ "MetricExpr": "22 * tma_info_system_average_frequency * OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HITM / tma_info_thread_clks",
"MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_store_bound_group",
"MetricName": "tma_false_sharing",
"MetricThreshold": "tma_false_sharing > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -262,11 +262,11 @@
{
"BriefDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed",
"MetricConstraint": "NO_GROUP_EVENTS_NMI",
- "MetricExpr": "tma_info_load_miss_real_latency * cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ / tma_info_clks",
+ "MetricExpr": "tma_info_memory_load_miss_real_latency * cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ / tma_info_thread_clks",
"MetricGroup": "MemoryBW;TopdownL4;tma_L4_group;tma_issueBW;tma_issueSL;tma_issueSmSt;tma_l1_bound_group",
"MetricName": "tma_fb_full",
"MetricThreshold": "tma_fb_full > 0.3",
- "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_info_dram_bw_use, tma_info_memory_bandwidth, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
+ "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_info_bottleneck_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
"ScaleUnit": "100%"
},
{
@@ -274,14 +274,14 @@
"MetricExpr": "tma_frontend_bound - tma_fetch_latency",
"MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB",
"MetricName": "tma_fetch_bandwidth",
- "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35",
+ "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35",
"MetricgroupNoGroup": "TopdownL2",
- "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Sample with: FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_2_PS. Related metrics: tma_dsb_switches, tma_info_dsb_coverage, tma_info_dsb_misses, tma_info_iptb, tma_lcp",
+ "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Sample with: FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_2_PS. Related metrics: tma_dsb_switches, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues",
- "MetricExpr": "4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / tma_info_slots",
+ "MetricExpr": "4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / tma_info_thread_slots",
"MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group",
"MetricName": "tma_fetch_latency",
"MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15",
@@ -347,7 +347,7 @@
},
{
"BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend",
- "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / tma_info_slots",
+ "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / tma_info_thread_slots",
"MetricGroup": "PGO;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_frontend_bound",
"MetricThreshold": "tma_frontend_bound > 0.15",
@@ -366,7 +366,7 @@
},
{
"BriefDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences",
- "MetricExpr": "(UOPS_RETIRED.RETIRE_SLOTS + UOPS_RETIRED.MACRO_FUSED - INST_RETIRED.ANY) / tma_info_slots",
+ "MetricExpr": "(UOPS_RETIRED.RETIRE_SLOTS + UOPS_RETIRED.MACRO_FUSED - INST_RETIRED.ANY) / tma_info_thread_slots",
"MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_heavy_operations",
"MetricThreshold": "tma_heavy_operations > 0.1",
@@ -376,7 +376,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to instruction cache misses",
- "MetricExpr": "(ICACHE_16B.IFDATA_STALL + 2 * cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=1\\,edge@) / tma_info_clks",
+ "MetricExpr": "(ICACHE_16B.IFDATA_STALL + 2 * cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=1\\,edge@) / tma_info_thread_clks",
"MetricGroup": "BigFoot;FetchLat;IcMiss;TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_icache_misses",
"MetricThreshold": "tma_icache_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -384,220 +384,231 @@
"ScaleUnit": "100%"
},
{
- "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
- "MetricExpr": "tma_info_turbo_utilization * TSC / 1e9 / duration_time",
- "MetricGroup": "Power;Summary",
- "MetricName": "tma_info_average_frequency"
+ "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
+ "MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_thread_slots / BR_MISP_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;BrMispredicts;tma_issueBM",
+ "MetricName": "tma_info_bad_spec_branch_misprediction_cost",
+ "PublicDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear). Related metrics: tma_branch_mispredicts, tma_info_bottleneck_mispredictions, tma_mispredicts_resteers"
+ },
+ {
+ "BriefDescription": "Instructions per retired mispredicts for indirect CALL or JMP branches (lower number means higher occurrence rate).",
+ "MetricExpr": "tma_info_inst_mix_instructions / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * cpu@BR_MISP_EXEC.ALL_BRANCHES\\,umask\\=0xE4@)",
+ "MetricGroup": "Bad;BrMispredicts",
+ "MetricName": "tma_info_bad_spec_ipmisp_indirect",
+ "MetricThreshold": "tma_info_bad_spec_ipmisp_indirect < 1e3"
+ },
+ {
+ "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear) (lower number means higher occurrence rate)",
+ "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;BadSpec;BrMispredicts",
+ "MetricName": "tma_info_bad_spec_ipmispredict",
+ "MetricThreshold": "tma_info_bad_spec_ipmispredict < 200"
+ },
+ {
+ "BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "(100 * (1 - tma_core_bound / tma_ports_utilization if tma_core_bound < tma_ports_utilization else 1) if tma_info_system_smt_2t_utilization > 0.5 else 0)",
+ "MetricGroup": "Cor;SMT",
+ "MetricName": "tma_info_botlnk_l0_core_bound_likely",
+ "MetricThreshold": "tma_info_botlnk_l0_core_bound_likely > 0.5"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_fetch_latency * tma_dsb_switches / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_mite / (tma_dsb + tma_mite))",
+ "MetricGroup": "DSBmiss;Fed;tma_issueFB",
+ "MetricName": "tma_info_botlnk_l2_dsb_misses",
+ "MetricThreshold": "tma_info_botlnk_l2_dsb_misses > 10",
+ "PublicDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck",
+ "MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
+ "MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL",
+ "MetricName": "tma_info_botlnk_l2_ic_misses",
+ "MetricThreshold": "tma_info_botlnk_l2_ic_misses > 5",
+ "PublicDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck. Related metrics: "
},
{
"BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)",
"MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * tma_fetch_latency * (tma_itlb_misses + tma_icache_misses + tma_unknown_branches) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)",
"MetricGroup": "BigFoot;Fed;Frontend;IcMiss;MemoryTLB;tma_issueBC",
- "MetricName": "tma_info_big_code",
- "MetricThreshold": "tma_info_big_code > 20",
- "PublicDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses). Related metrics: tma_info_branching_overhead"
+ "MetricName": "tma_info_bottleneck_big_code",
+ "MetricThreshold": "tma_info_bottleneck_big_code > 20",
+ "PublicDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses). Related metrics: tma_info_bottleneck_branching_overhead"
},
{
- "BriefDescription": "Branch instructions per taken branch.",
- "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
- "MetricGroup": "Branches;Fed;PGO",
- "MetricName": "tma_info_bptkbranch"
+ "BriefDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls)",
+ "MetricExpr": "100 * ((BR_INST_RETIRED.CONDITIONAL + 3 * BR_INST_RETIRED.NEAR_CALL + (BR_INST_RETIRED.NEAR_TAKEN - (BR_INST_RETIRED.CONDITIONAL - BR_INST_RETIRED.NOT_TAKEN) - 2 * BR_INST_RETIRED.NEAR_CALL)) / tma_info_thread_slots)",
+ "MetricGroup": "Ret;tma_issueBC",
+ "MetricName": "tma_info_bottleneck_branching_overhead",
+ "MetricThreshold": "tma_info_bottleneck_branching_overhead > 10",
+ "PublicDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls). Related metrics: tma_info_bottleneck_big_code"
},
{
- "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
- "MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_slots / BR_MISP_RETIRED.ALL_BRANCHES",
- "MetricGroup": "Bad;BrMispredicts;tma_issueBM",
- "MetricName": "tma_info_branch_misprediction_cost",
- "PublicDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear). Related metrics: tma_branch_mispredicts, tma_info_mispredictions, tma_mispredicts_resteers"
+ "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_frontend_bound - tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_info_bottleneck_big_code",
+ "MetricGroup": "Fed;FetchBW;Frontend",
+ "MetricName": "tma_info_bottleneck_instruction_fetch_bw",
+ "MetricThreshold": "tma_info_bottleneck_instruction_fetch_bw > 20"
},
{
- "BriefDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls)",
- "MetricExpr": "100 * ((BR_INST_RETIRED.CONDITIONAL + 3 * BR_INST_RETIRED.NEAR_CALL + (BR_INST_RETIRED.NEAR_TAKEN - (BR_INST_RETIRED.CONDITIONAL - BR_INST_RETIRED.NOT_TAKEN) - 2 * BR_INST_RETIRED.NEAR_CALL)) / tma_info_slots)",
- "MetricGroup": "Ret;tma_issueBC",
- "MetricName": "tma_info_branching_overhead",
- "MetricThreshold": "tma_info_branching_overhead > 10",
- "PublicDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls). Related metrics: tma_info_big_code"
+ "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_fb_full / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))",
+ "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
+ "MetricName": "tma_info_bottleneck_memory_bandwidth",
+ "MetricThreshold": "tma_info_bottleneck_memory_bandwidth > 20",
+ "PublicDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full"
},
{
- "BriefDescription": "Fraction of branches that are CALL or RET",
- "MetricExpr": "(BR_INST_RETIRED.NEAR_CALL + BR_INST_RETIRED.NEAR_RETURN) / BR_INST_RETIRED.ALL_BRANCHES",
- "MetricGroup": "Bad;Branches",
- "MetricName": "tma_info_callret"
+ "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_load / max(tma_l1_bound, tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency)))",
+ "MetricGroup": "Mem;MemoryTLB;Offcore;tma_issueTLB",
+ "MetricName": "tma_info_bottleneck_memory_data_tlbs",
+ "MetricThreshold": "tma_info_bottleneck_memory_data_tlbs > 20",
+ "PublicDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs). Related metrics: tma_dtlb_load, tma_dtlb_store"
},
{
- "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
- "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
- "MetricGroup": "Pipeline",
- "MetricName": "tma_info_clks"
+ "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound))",
+ "MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat",
+ "MetricName": "tma_info_bottleneck_memory_latency",
+ "MetricThreshold": "tma_info_bottleneck_memory_latency > 20",
+ "PublicDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches). Related metrics: tma_l3_hit_latency, tma_mem_latency"
},
{
- "BriefDescription": "STLB (2nd level TLB) code speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
- "MetricExpr": "1e3 * ITLB_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
- "MetricGroup": "Fed;MemoryTLB",
- "MetricName": "tma_info_code_stlb_mpki"
+ "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
+ "MetricGroup": "Bad;BadSpec;BrMispredicts;tma_issueBM",
+ "MetricName": "tma_info_bottleneck_mispredictions",
+ "MetricThreshold": "tma_info_bottleneck_mispredictions > 20",
+ "PublicDescription": "Total pipeline cost of Branch Misprediction related bottlenecks. Related metrics: tma_branch_mispredicts, tma_info_bad_spec_branch_misprediction_cost, tma_mispredicts_resteers"
+ },
+ {
+ "BriefDescription": "Fraction of branches that are CALL or RET",
+ "MetricExpr": "(BR_INST_RETIRED.NEAR_CALL + BR_INST_RETIRED.NEAR_RETURN) / BR_INST_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;Branches",
+ "MetricName": "tma_info_branches_callret"
},
{
"BriefDescription": "Fraction of branches that are non-taken conditionals",
"MetricExpr": "BR_INST_RETIRED.NOT_TAKEN / BR_INST_RETIRED.ALL_BRANCHES",
"MetricGroup": "Bad;Branches;CodeGen;PGO",
- "MetricName": "tma_info_cond_nt"
+ "MetricName": "tma_info_branches_cond_nt"
},
{
"BriefDescription": "Fraction of branches that are taken conditionals",
"MetricExpr": "(BR_INST_RETIRED.CONDITIONAL - BR_INST_RETIRED.NOT_TAKEN) / BR_INST_RETIRED.ALL_BRANCHES",
"MetricGroup": "Bad;Branches;CodeGen;PGO",
- "MetricName": "tma_info_cond_tk"
+ "MetricName": "tma_info_branches_cond_tk"
},
{
- "BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts",
+ "BriefDescription": "Fraction of branches that are unconditional (direct or indirect) jumps",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(100 * (1 - tma_core_bound / tma_ports_utilization if tma_core_bound < tma_ports_utilization else 1) if tma_info_smt_2t_utilization > 0.5 else 0)",
- "MetricGroup": "Cor;SMT",
- "MetricName": "tma_info_core_bound_likely",
- "MetricThreshold": "tma_info_core_bound_likely > 0.5"
+ "MetricExpr": "(BR_INST_RETIRED.NEAR_TAKEN - (BR_INST_RETIRED.CONDITIONAL - BR_INST_RETIRED.NOT_TAKEN) - 2 * BR_INST_RETIRED.NEAR_CALL) / BR_INST_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;Branches",
+ "MetricName": "tma_info_branches_jump"
},
{
"BriefDescription": "Core actual clocks when any Logical Processor is active on the Physical Core",
- "MetricExpr": "(CPU_CLK_UNHALTED.THREAD / 2 * (1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK) if #core_wide < 1 else (CPU_CLK_UNHALTED.THREAD_ANY / 2 if #SMT_on else tma_info_clks))",
+ "MetricExpr": "(CPU_CLK_UNHALTED.THREAD / 2 * (1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK) if #core_wide < 1 else (CPU_CLK_UNHALTED.THREAD_ANY / 2 if #SMT_on else tma_info_thread_clks))",
"MetricGroup": "SMT",
- "MetricName": "tma_info_core_clks"
+ "MetricName": "tma_info_core_core_clks"
},
{
"BriefDescription": "Instructions Per Cycle across hyper-threads (per physical core)",
- "MetricExpr": "INST_RETIRED.ANY / tma_info_core_clks",
+ "MetricExpr": "INST_RETIRED.ANY / tma_info_core_core_clks",
"MetricGroup": "Ret;SMT;TmaL1;tma_L1_group",
- "MetricName": "tma_info_coreipc"
+ "MetricName": "tma_info_core_coreipc"
},
{
- "BriefDescription": "Cycles Per Instruction (per Logical Processor)",
- "MetricExpr": "1 / tma_info_ipc",
- "MetricGroup": "Mem;Pipeline",
- "MetricName": "tma_info_cpi"
- },
- {
- "BriefDescription": "Average CPU Utilization",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
- "MetricGroup": "HPC;Summary",
- "MetricName": "tma_info_cpu_utilization"
+ "BriefDescription": "Floating Point Operations Per Cycle",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * (FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE) / tma_info_core_core_clks",
+ "MetricGroup": "Flops;Ret",
+ "MetricName": "tma_info_core_flopc"
},
{
- "BriefDescription": "Average Parallel L2 cache miss data reads",
- "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
- "MetricGroup": "Memory_BW;Offcore",
- "MetricName": "tma_info_data_l2_mlp"
+ "BriefDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width)",
+ "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0x3c@) / (2 * tma_info_core_core_clks)",
+ "MetricGroup": "Cor;Flops;HPC",
+ "MetricName": "tma_info_core_fp_arith_utilization",
+ "PublicDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width). Values > 1 are possible due to ([BDW+] Fused-Multiply Add (FMA) counting - common; [ADL+] use all of ADD/MUL/FMA in Scalar or 128/256-bit vectors - less common)."
},
{
- "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
- "MetricExpr": "64 * (UNC_ARB_TRK_REQUESTS.ALL + UNC_ARB_COH_TRK_REQUESTS.ALL) / 1e6 / duration_time / 1e3",
- "MetricGroup": "HPC;Mem;MemoryBW;SoC;tma_issueBW",
- "MetricName": "tma_info_dram_bw_use",
- "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_fb_full, tma_info_memory_bandwidth, tma_mem_bandwidth, tma_sq_full"
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / (UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)",
+ "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
+ "MetricName": "tma_info_core_ilp"
},
{
"BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
"MetricExpr": "IDQ.DSB_UOPS / (IDQ.DSB_UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)",
"MetricGroup": "DSB;Fed;FetchBW;tma_issueFB",
- "MetricName": "tma_info_dsb_coverage",
- "MetricThreshold": "tma_info_dsb_coverage < 0.7 & tma_info_ipc / 4 > 0.35",
- "PublicDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache). Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_dsb_misses, tma_info_iptb, tma_lcp"
- },
- {
- "BriefDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck",
- "MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "100 * (tma_fetch_latency * tma_dsb_switches / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_mite / (tma_dsb + tma_mite))",
- "MetricGroup": "DSBmiss;Fed;tma_issueFB",
- "MetricName": "tma_info_dsb_misses",
- "MetricThreshold": "tma_info_dsb_misses > 10",
- "PublicDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_dsb_coverage, tma_info_iptb, tma_lcp"
+ "MetricName": "tma_info_frontend_dsb_coverage",
+ "MetricThreshold": "tma_info_frontend_dsb_coverage < 0.7 & tma_info_thread_ipc / 4 > 0.35",
+ "PublicDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache). Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_botlnk_l2_dsb_misses, tma_info_inst_mix_iptb, tma_lcp"
},
{
"BriefDescription": "Average number of cycles of a switch from the DSB fetch-unit to MITE fetch unit - see DSB_Switches tree node for details.",
"MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / DSB2MITE_SWITCHES.COUNT",
"MetricGroup": "DSBmiss",
- "MetricName": "tma_info_dsb_switch_cost"
- },
- {
- "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-thread",
- "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
- "MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
- "MetricName": "tma_info_execute"
- },
- {
- "BriefDescription": "The ratio of Executed- by Issued-Uops",
- "MetricExpr": "UOPS_EXECUTED.THREAD / UOPS_ISSUED.ANY",
- "MetricGroup": "Cor;Pipeline",
- "MetricName": "tma_info_execute_per_issue",
- "PublicDescription": "The ratio of Executed- by Issued-Uops. Ratio > 1 suggests high rate of uop micro-fusions. Ratio < 1 suggest high rate of \"execute\" at rename stage."
- },
- {
- "BriefDescription": "Fill Buffer (FB) hits per kilo instructions for retired demand loads (L1D misses that merge into ongoing miss-handling entries)",
- "MetricExpr": "1e3 * MEM_LOAD_RETIRED.FB_HIT / INST_RETIRED.ANY",
- "MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_fb_hpki"
+ "MetricName": "tma_info_frontend_dsb_switch_cost"
},
{
"BriefDescription": "Average number of Uops issued by front-end when it issued something",
"MetricExpr": "UOPS_ISSUED.ANY / cpu@UOPS_ISSUED.ANY\\,cmask\\=1@",
"MetricGroup": "Fed;FetchBW",
- "MetricName": "tma_info_fetch_upc"
+ "MetricName": "tma_info_frontend_fetch_upc"
},
{
- "BriefDescription": "Floating Point Operations Per Cycle",
- "MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * (FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE) / tma_info_core_clks",
- "MetricGroup": "Flops;Ret",
- "MetricName": "tma_info_flopc"
- },
- {
- "BriefDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width)",
- "MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0x3c@) / (2 * tma_info_core_clks)",
- "MetricGroup": "Cor;Flops;HPC",
- "MetricName": "tma_info_fp_arith_utilization",
- "PublicDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width). Values > 1 are possible due to ([BDW+] Fused-Multiply Add (FMA) counting - common; [ADL+] use all of ADD/MUL/FMA in Scalar or 128/256-bit vectors - less common)."
+ "BriefDescription": "Average Latency for L1 instruction cache misses",
+ "MetricExpr": "ICACHE_16B.IFDATA_STALL / cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=1\\,edge@ + 2",
+ "MetricGroup": "Fed;FetchLat;IcMiss",
+ "MetricName": "tma_info_frontend_icache_miss_latency"
},
{
- "BriefDescription": "Giga Floating Point Operations Per Second",
- "MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * (FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE) / 1e9 / duration_time",
- "MetricGroup": "Cor;Flops;HPC",
- "MetricName": "tma_info_gflops",
- "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width and AMX engine."
+ "BriefDescription": "Instructions per non-speculative DSB miss (lower number means higher occurrence rate)",
+ "MetricExpr": "INST_RETIRED.ANY / FRONTEND_RETIRED.ANY_DSB_MISS",
+ "MetricGroup": "DSBmiss;Fed",
+ "MetricName": "tma_info_frontend_ipdsb_miss_ret",
+ "MetricThreshold": "tma_info_frontend_ipdsb_miss_ret < 50"
},
{
- "BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck",
- "MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
- "MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL",
- "MetricName": "tma_info_ic_misses",
- "MetricThreshold": "tma_info_ic_misses > 5",
- "PublicDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck. Related metrics: "
+ "BriefDescription": "Instructions per speculative Unknown Branch Misprediction (BAClear) (lower number means higher occurrence rate)",
+ "MetricExpr": "tma_info_inst_mix_instructions / BACLEARS.ANY",
+ "MetricGroup": "Fed",
+ "MetricName": "tma_info_frontend_ipunknown_branch"
},
{
- "BriefDescription": "Average Latency for L1 instruction cache misses",
- "MetricExpr": "ICACHE_16B.IFDATA_STALL / cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=1\\,edge@ + 2",
- "MetricGroup": "Fed;FetchLat;IcMiss",
- "MetricName": "tma_info_icache_miss_latency"
+ "BriefDescription": "L2 cache true code cacheline misses per kilo instruction",
+ "MetricExpr": "1e3 * FRONTEND_RETIRED.L2_MISS / INST_RETIRED.ANY",
+ "MetricGroup": "IcMiss",
+ "MetricName": "tma_info_frontend_l2mpki_code"
},
{
- "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
- "MetricExpr": "UOPS_EXECUTED.THREAD / (UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)",
- "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
- "MetricName": "tma_info_ilp"
+ "BriefDescription": "L2 cache speculative code cacheline misses per kilo instruction",
+ "MetricExpr": "1e3 * L2_RQSTS.CODE_RD_MISS / INST_RETIRED.ANY",
+ "MetricGroup": "IcMiss",
+ "MetricName": "tma_info_frontend_l2mpki_code_all"
},
{
- "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks",
- "MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "100 * (tma_frontend_bound - tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_info_big_code",
- "MetricGroup": "Fed;FetchBW;Frontend",
- "MetricName": "tma_info_instruction_fetch_bw",
- "MetricThreshold": "tma_info_instruction_fetch_bw > 20"
+ "BriefDescription": "Branch instructions per taken branch.",
+ "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
+ "MetricGroup": "Branches;Fed;PGO",
+ "MetricName": "tma_info_inst_mix_bptkbranch"
},
{
"BriefDescription": "Total number of retired Instructions",
"MetricExpr": "INST_RETIRED.ANY",
"MetricGroup": "Summary;TmaL1;tma_L1_group",
- "MetricName": "tma_info_instructions",
+ "MetricName": "tma_info_inst_mix_instructions",
"PublicDescription": "Total number of retired Instructions. Sample with: INST_RETIRED.PREC_DIST"
},
{
@@ -605,416 +616,404 @@
"MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "INST_RETIRED.ANY / (cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0x3c@)",
"MetricGroup": "Flops;InsType",
- "MetricName": "tma_info_iparith",
- "MetricThreshold": "tma_info_iparith < 10",
+ "MetricName": "tma_info_inst_mix_iparith",
+ "MetricThreshold": "tma_info_inst_mix_iparith < 10",
"PublicDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate). May undercount due to FMA double counting. Approximated prior to BDW."
},
{
"BriefDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE)",
"MetricGroup": "Flops;FpVector;InsType",
- "MetricName": "tma_info_iparith_avx128",
- "MetricThreshold": "tma_info_iparith_avx128 < 10",
+ "MetricName": "tma_info_inst_mix_iparith_avx128",
+ "MetricThreshold": "tma_info_inst_mix_iparith_avx128 < 10",
"PublicDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
},
{
"BriefDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE)",
"MetricGroup": "Flops;FpVector;InsType",
- "MetricName": "tma_info_iparith_avx256",
- "MetricThreshold": "tma_info_iparith_avx256 < 10",
+ "MetricName": "tma_info_inst_mix_iparith_avx256",
+ "MetricThreshold": "tma_info_inst_mix_iparith_avx256 < 10",
"PublicDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
},
{
"BriefDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
"MetricGroup": "Flops;FpScalar;InsType",
- "MetricName": "tma_info_iparith_scalar_dp",
- "MetricThreshold": "tma_info_iparith_scalar_dp < 10",
+ "MetricName": "tma_info_inst_mix_iparith_scalar_dp",
+ "MetricThreshold": "tma_info_inst_mix_iparith_scalar_dp < 10",
"PublicDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
},
{
"BriefDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
"MetricGroup": "Flops;FpScalar;InsType",
- "MetricName": "tma_info_iparith_scalar_sp",
- "MetricThreshold": "tma_info_iparith_scalar_sp < 10",
+ "MetricName": "tma_info_inst_mix_iparith_scalar_sp",
+ "MetricThreshold": "tma_info_inst_mix_iparith_scalar_sp < 10",
"PublicDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
},
{
"BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
"MetricGroup": "Branches;Fed;InsType",
- "MetricName": "tma_info_ipbranch",
- "MetricThreshold": "tma_info_ipbranch < 8"
- },
- {
- "BriefDescription": "Instructions Per Cycle (per Logical Processor)",
- "MetricExpr": "INST_RETIRED.ANY / tma_info_clks",
- "MetricGroup": "Ret;Summary",
- "MetricName": "tma_info_ipc"
+ "MetricName": "tma_info_inst_mix_ipbranch",
+ "MetricThreshold": "tma_info_inst_mix_ipbranch < 8"
},
{
"BriefDescription": "Instructions per (near) call (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL",
"MetricGroup": "Branches;Fed;PGO",
- "MetricName": "tma_info_ipcall",
- "MetricThreshold": "tma_info_ipcall < 200"
- },
- {
- "BriefDescription": "Instructions per non-speculative DSB miss (lower number means higher occurrence rate)",
- "MetricExpr": "INST_RETIRED.ANY / FRONTEND_RETIRED.ANY_DSB_MISS",
- "MetricGroup": "DSBmiss;Fed",
- "MetricName": "tma_info_ipdsb_miss_ret",
- "MetricThreshold": "tma_info_ipdsb_miss_ret < 50"
- },
- {
- "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
- "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u",
- "MetricGroup": "Branches;OS",
- "MetricName": "tma_info_ipfarbranch",
- "MetricThreshold": "tma_info_ipfarbranch < 1e6"
+ "MetricName": "tma_info_inst_mix_ipcall",
+ "MetricThreshold": "tma_info_inst_mix_ipcall < 200"
},
{
"BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)",
"MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * (FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE)",
"MetricGroup": "Flops;InsType",
- "MetricName": "tma_info_ipflop",
- "MetricThreshold": "tma_info_ipflop < 10"
+ "MetricName": "tma_info_inst_mix_ipflop",
+ "MetricThreshold": "tma_info_inst_mix_ipflop < 10"
},
{
"BriefDescription": "Instructions per Load (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_LOADS",
"MetricGroup": "InsType",
- "MetricName": "tma_info_ipload",
- "MetricThreshold": "tma_info_ipload < 3"
- },
- {
- "BriefDescription": "Instructions per retired mispredicts for indirect CALL or JMP branches (lower number means higher occurrence rate).",
- "MetricExpr": "tma_info_instructions / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * cpu@BR_MISP_EXEC.ALL_BRANCHES\\,umask\\=0xE4@)",
- "MetricGroup": "Bad;BrMispredicts",
- "MetricName": "tma_info_ipmisp_indirect",
- "MetricThreshold": "tma_info_ipmisp_indirect < 1e3"
- },
- {
- "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear) (lower number means higher occurrence rate)",
- "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
- "MetricGroup": "Bad;BadSpec;BrMispredicts",
- "MetricName": "tma_info_ipmispredict",
- "MetricThreshold": "tma_info_ipmispredict < 200"
+ "MetricName": "tma_info_inst_mix_ipload",
+ "MetricThreshold": "tma_info_inst_mix_ipload < 3"
},
{
"BriefDescription": "Instructions per Store (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_STORES",
"MetricGroup": "InsType",
- "MetricName": "tma_info_ipstore",
- "MetricThreshold": "tma_info_ipstore < 8"
+ "MetricName": "tma_info_inst_mix_ipstore",
+ "MetricThreshold": "tma_info_inst_mix_ipstore < 8"
},
{
"BriefDescription": "Instructions per Software prefetch instruction (of any type: NTA/T0/T1/T2/Prefetch) (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / cpu@SW_PREFETCH_ACCESS.T0\\,umask\\=0xF@",
"MetricGroup": "Prefetches",
- "MetricName": "tma_info_ipswpf",
- "MetricThreshold": "tma_info_ipswpf < 100"
+ "MetricName": "tma_info_inst_mix_ipswpf",
+ "MetricThreshold": "tma_info_inst_mix_ipswpf < 100"
},
{
"BriefDescription": "Instruction per taken branch",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN",
"MetricGroup": "Branches;Fed;FetchBW;Frontend;PGO;tma_issueFB",
- "MetricName": "tma_info_iptb",
- "MetricThreshold": "tma_info_iptb < 9",
- "PublicDescription": "Instruction per taken branch. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_dsb_coverage, tma_info_dsb_misses, tma_lcp"
- },
- {
- "BriefDescription": "Instructions per speculative Unknown Branch Misprediction (BAClear) (lower number means higher occurrence rate)",
- "MetricExpr": "tma_info_instructions / BACLEARS.ANY",
- "MetricGroup": "Fed",
- "MetricName": "tma_info_ipunknown_branch"
+ "MetricName": "tma_info_inst_mix_iptb",
+ "MetricThreshold": "tma_info_inst_mix_iptb < 9",
+ "PublicDescription": "Instruction per taken branch. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_lcp"
},
{
- "BriefDescription": "Fraction of branches that are unconditional (direct or indirect) jumps",
- "MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(BR_INST_RETIRED.NEAR_TAKEN - (BR_INST_RETIRED.CONDITIONAL - BR_INST_RETIRED.NOT_TAKEN) - 2 * BR_INST_RETIRED.NEAR_CALL) / BR_INST_RETIRED.ALL_BRANCHES",
- "MetricGroup": "Bad;Branches",
- "MetricName": "tma_info_jump"
+ "BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
+ "MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / duration_time",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_core_l1d_cache_fill_bw"
},
{
- "BriefDescription": "Cycles Per Instruction for the Operating System (OS) Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / INST_RETIRED.ANY_P:k",
- "MetricGroup": "OS",
- "MetricName": "tma_info_kernel_cpi"
+ "BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
+ "MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / duration_time",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_core_l2_cache_fill_bw"
},
{
- "BriefDescription": "Fraction of cycles spent in the Operating System (OS) Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / CPU_CLK_UNHALTED.THREAD",
- "MetricGroup": "OS",
- "MetricName": "tma_info_kernel_utilization",
- "MetricThreshold": "tma_info_kernel_utilization > 0.05"
+ "BriefDescription": "Average per-core data access bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1e9 / duration_time",
+ "MetricGroup": "Mem;MemoryBW;Offcore",
+ "MetricName": "tma_info_memory_core_l3_cache_access_bw"
},
{
- "BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
- "MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / duration_time",
+ "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
"MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l1d_cache_fill_bw"
+ "MetricName": "tma_info_memory_core_l3_cache_fill_bw"
},
{
- "BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
- "MetricExpr": "tma_info_l1d_cache_fill_bw",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l1d_cache_fill_bw_1t"
+ "BriefDescription": "Fill Buffer (FB) hits per kilo instructions for retired demand loads (L1D misses that merge into ongoing miss-handling entries)",
+ "MetricExpr": "1e3 * MEM_LOAD_RETIRED.FB_HIT / INST_RETIRED.ANY",
+ "MetricGroup": "CacheMisses;Mem",
+ "MetricName": "tma_info_memory_fb_hpki"
},
{
"BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
"MetricExpr": "1e3 * MEM_LOAD_RETIRED.L1_MISS / INST_RETIRED.ANY",
"MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l1mpki"
+ "MetricName": "tma_info_memory_l1mpki"
},
{
"BriefDescription": "L1 cache true misses per kilo instruction for all demand loads (including speculative)",
"MetricExpr": "1e3 * L2_RQSTS.ALL_DEMAND_DATA_RD / INST_RETIRED.ANY",
"MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l1mpki_load"
- },
- {
- "BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
- "MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / duration_time",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l2_cache_fill_bw"
- },
- {
- "BriefDescription": "Average per-thread data fill bandwidth to the L2 cache [GB / sec]",
- "MetricExpr": "tma_info_l2_cache_fill_bw",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l2_cache_fill_bw_1t"
+ "MetricName": "tma_info_memory_l1mpki_load"
},
{
"BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)",
"MetricExpr": "1e3 * (L2_RQSTS.REFERENCES - L2_RQSTS.MISS) / INST_RETIRED.ANY",
"MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l2hpki_all"
+ "MetricName": "tma_info_memory_l2hpki_all"
},
{
"BriefDescription": "L2 cache hits per kilo instruction for all demand loads (including speculative)",
"MetricExpr": "1e3 * L2_RQSTS.DEMAND_DATA_RD_HIT / INST_RETIRED.ANY",
"MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l2hpki_load"
+ "MetricName": "tma_info_memory_l2hpki_load"
},
{
"BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads",
"MetricExpr": "1e3 * MEM_LOAD_RETIRED.L2_MISS / INST_RETIRED.ANY",
"MetricGroup": "Backend;CacheMisses;Mem",
- "MetricName": "tma_info_l2mpki"
+ "MetricName": "tma_info_memory_l2mpki"
},
{
"BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all request types (including speculative)",
"MetricExpr": "1e3 * L2_RQSTS.MISS / INST_RETIRED.ANY",
"MetricGroup": "CacheMisses;Mem;Offcore",
- "MetricName": "tma_info_l2mpki_all"
- },
- {
- "BriefDescription": "L2 cache true code cacheline misses per kilo instruction",
- "MetricExpr": "1e3 * FRONTEND_RETIRED.L2_MISS / INST_RETIRED.ANY",
- "MetricGroup": "IcMiss",
- "MetricName": "tma_info_l2mpki_code"
- },
- {
- "BriefDescription": "L2 cache speculative code cacheline misses per kilo instruction",
- "MetricExpr": "1e3 * L2_RQSTS.CODE_RD_MISS / INST_RETIRED.ANY",
- "MetricGroup": "IcMiss",
- "MetricName": "tma_info_l2mpki_code_all"
+ "MetricName": "tma_info_memory_l2mpki_all"
},
{
"BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all demand loads (including speculative)",
"MetricExpr": "1e3 * L2_RQSTS.DEMAND_DATA_RD_MISS / INST_RETIRED.ANY",
"MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l2mpki_load"
- },
- {
- "BriefDescription": "Average per-core data access bandwidth to the L3 cache [GB / sec]",
- "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1e9 / duration_time",
- "MetricGroup": "Mem;MemoryBW;Offcore",
- "MetricName": "tma_info_l3_cache_access_bw"
+ "MetricName": "tma_info_memory_l2mpki_load"
},
{
- "BriefDescription": "Average per-thread data access bandwidth to the L3 cache [GB / sec]",
- "MetricExpr": "tma_info_l3_cache_access_bw",
- "MetricGroup": "Mem;MemoryBW;Offcore",
- "MetricName": "tma_info_l3_cache_access_bw_1t"
+ "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
+ "MetricExpr": "1e3 * MEM_LOAD_RETIRED.L3_MISS / INST_RETIRED.ANY",
+ "MetricGroup": "CacheMisses;Mem",
+ "MetricName": "tma_info_memory_l3mpki"
},
{
- "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
- "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l3_cache_fill_bw"
+ "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / (MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT)",
+ "MetricGroup": "Mem;MemoryBound;MemoryLat",
+ "MetricName": "tma_info_memory_load_miss_real_latency"
},
{
- "BriefDescription": "Average per-thread data fill bandwidth to the L3 cache [GB / sec]",
- "MetricExpr": "tma_info_l3_cache_fill_bw",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l3_cache_fill_bw_1t"
+ "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
+ "MetricGroup": "Mem;MemoryBW;MemoryBound",
+ "MetricName": "tma_info_memory_mlp",
+ "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)"
},
{
- "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
- "MetricExpr": "1e3 * MEM_LOAD_RETIRED.L3_MISS / INST_RETIRED.ANY",
- "MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l3mpki"
+ "BriefDescription": "Average Parallel L2 cache miss data reads",
+ "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+ "MetricGroup": "Memory_BW;Offcore",
+ "MetricName": "tma_info_memory_oro_data_l2_mlp"
},
{
"BriefDescription": "Average Latency for L2 cache miss demand Loads",
"MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS.DEMAND_DATA_RD",
"MetricGroup": "Memory_Lat;Offcore",
- "MetricName": "tma_info_load_l2_miss_latency"
+ "MetricName": "tma_info_memory_oro_load_l2_miss_latency"
},
{
"BriefDescription": "Average Parallel L2 cache miss demand Loads",
"MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
"MetricGroup": "Memory_BW;Offcore",
- "MetricName": "tma_info_load_l2_mlp"
+ "MetricName": "tma_info_memory_oro_load_l2_mlp"
},
{
- "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
- "MetricExpr": "L1D_PEND_MISS.PENDING / (MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT)",
- "MetricGroup": "Mem;MemoryBound;MemoryLat",
- "MetricName": "tma_info_load_miss_real_latency"
+ "BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_core_l1d_cache_fill_bw",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_thread_l1d_cache_fill_bw_1t"
+ },
+ {
+ "BriefDescription": "Average per-thread data fill bandwidth to the L2 cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_core_l2_cache_fill_bw",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_thread_l2_cache_fill_bw_1t"
+ },
+ {
+ "BriefDescription": "Average per-thread data access bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_core_l3_cache_access_bw",
+ "MetricGroup": "Mem;MemoryBW;Offcore",
+ "MetricName": "tma_info_memory_thread_l3_cache_access_bw_1t"
+ },
+ {
+ "BriefDescription": "Average per-thread data fill bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_core_l3_cache_fill_bw",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_thread_l3_cache_fill_bw_1t"
+ },
+ {
+ "BriefDescription": "STLB (2nd level TLB) code speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
+ "MetricExpr": "1e3 * ITLB_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
+ "MetricGroup": "Fed;MemoryTLB",
+ "MetricName": "tma_info_memory_tlb_code_stlb_mpki"
},
{
"BriefDescription": "STLB (2nd level TLB) data load speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
"MetricExpr": "1e3 * DTLB_LOAD_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
"MetricGroup": "Mem;MemoryTLB",
- "MetricName": "tma_info_load_stlb_mpki"
+ "MetricName": "tma_info_memory_tlb_load_stlb_mpki"
+ },
+ {
+ "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
+ "MetricExpr": "(ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING) / (2 * tma_info_core_core_clks)",
+ "MetricGroup": "Mem;MemoryTLB",
+ "MetricName": "tma_info_memory_tlb_page_walks_utilization",
+ "MetricThreshold": "tma_info_memory_tlb_page_walks_utilization > 0.5"
+ },
+ {
+ "BriefDescription": "STLB (2nd level TLB) data store speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
+ "MetricExpr": "1e3 * DTLB_STORE_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
+ "MetricGroup": "Mem;MemoryTLB",
+ "MetricName": "tma_info_memory_tlb_store_stlb_mpki"
+ },
+ {
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-thread",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
+ "MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
+ "MetricName": "tma_info_pipeline_execute"
+ },
+ {
+ "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / cpu@UOPS_RETIRED.RETIRE_SLOTS\\,cmask\\=1@",
+ "MetricGroup": "Pipeline;Ret",
+ "MetricName": "tma_info_pipeline_retire"
+ },
+ {
+ "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
+ "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / duration_time",
+ "MetricGroup": "Power;Summary",
+ "MetricName": "tma_info_system_average_frequency"
+ },
+ {
+ "BriefDescription": "Average CPU Utilization",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricGroup": "HPC;Summary",
+ "MetricName": "tma_info_system_cpu_utilization"
+ },
+ {
+ "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
+ "MetricExpr": "64 * (UNC_ARB_TRK_REQUESTS.ALL + UNC_ARB_COH_TRK_REQUESTS.ALL) / 1e6 / duration_time / 1e3",
+ "MetricGroup": "HPC;Mem;MemoryBW;SoC;tma_issueBW",
+ "MetricName": "tma_info_system_dram_bw_use",
+ "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_fb_full, tma_info_bottleneck_memory_bandwidth, tma_mem_bandwidth, tma_sq_full"
+ },
+ {
+ "BriefDescription": "Giga Floating Point Operations Per Second",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * (FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE) / 1e9 / duration_time",
+ "MetricGroup": "Cor;Flops;HPC",
+ "MetricName": "tma_info_system_gflops",
+ "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width and AMX engine."
+ },
+ {
+ "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
+ "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u",
+ "MetricGroup": "Branches;OS",
+ "MetricName": "tma_info_system_ipfarbranch",
+ "MetricThreshold": "tma_info_system_ipfarbranch < 1e6"
+ },
+ {
+ "BriefDescription": "Cycles Per Instruction for the Operating System (OS) Kernel mode",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / INST_RETIRED.ANY_P:k",
+ "MetricGroup": "OS",
+ "MetricName": "tma_info_system_kernel_cpi"
+ },
+ {
+ "BriefDescription": "Fraction of cycles spent in the Operating System (OS) Kernel mode",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "OS",
+ "MetricName": "tma_info_system_kernel_utilization",
+ "MetricThreshold": "tma_info_system_kernel_utilization > 0.05"
},
{
"BriefDescription": "Average number of parallel data read requests to external memory",
"MetricExpr": "UNC_ARB_TRK_OCCUPANCY.DATA_READ / UNC_ARB_TRK_OCCUPANCY.DATA_READ@thresh\\=1@",
"MetricGroup": "Mem;MemoryBW;SoC",
- "MetricName": "tma_info_mem_parallel_reads",
+ "MetricName": "tma_info_system_mem_parallel_reads",
"PublicDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches"
},
{
"BriefDescription": "Average number of parallel requests to external memory",
"MetricExpr": "UNC_ARB_TRK_OCCUPANCY.ALL / UNC_ARB_TRK_OCCUPANCY.CYCLES_WITH_ANY_REQUEST",
"MetricGroup": "Mem;SoC",
- "MetricName": "tma_info_mem_parallel_requests",
+ "MetricName": "tma_info_system_mem_parallel_requests",
"PublicDescription": "Average number of parallel requests to external memory. Accounts for all requests"
},
{
"BriefDescription": "Average latency of data read request to external memory (in nanoseconds)",
- "MetricExpr": "1e9 * (UNC_ARB_TRK_OCCUPANCY.DATA_READ / UNC_ARB_TRK_REQUESTS.DATA_READ) / (tma_info_socket_clks / duration_time)",
+ "MetricExpr": "1e9 * (UNC_ARB_TRK_OCCUPANCY.DATA_READ / UNC_ARB_TRK_REQUESTS.DATA_READ) / (tma_info_system_socket_clks / duration_time)",
"MetricGroup": "Mem;MemoryLat;SoC",
- "MetricName": "tma_info_mem_read_latency",
+ "MetricName": "tma_info_system_mem_read_latency",
"PublicDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches. ([RKL+]memory-controller only)"
},
{
"BriefDescription": "Average latency of all requests to external memory (in Uncore cycles)",
"MetricExpr": "UNC_ARB_TRK_OCCUPANCY.ALL / UNC_ARB_TRK_REQUESTS.ALL",
"MetricGroup": "Mem;SoC",
- "MetricName": "tma_info_mem_request_latency"
+ "MetricName": "tma_info_system_mem_request_latency"
},
{
- "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
- "MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_fb_full / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))",
- "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
- "MetricName": "tma_info_memory_bandwidth",
- "MetricThreshold": "tma_info_memory_bandwidth > 20",
- "PublicDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_dram_bw_use, tma_mem_bandwidth, tma_sq_full"
+ "BriefDescription": "Fraction of cycles where both hardware Logical Processors were active",
+ "MetricExpr": "(1 - CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / (CPU_CLK_UNHALTED.REF_XCLK_ANY / 2) if #SMT_on else 0)",
+ "MetricGroup": "SMT",
+ "MetricName": "tma_info_system_smt_2t_utilization"
},
{
- "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
- "MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "100 * tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_load / max(tma_l1_bound, tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency)))",
- "MetricGroup": "Mem;MemoryTLB;Offcore;tma_issueTLB",
- "MetricName": "tma_info_memory_data_tlbs",
- "MetricThreshold": "tma_info_memory_data_tlbs > 20",
- "PublicDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs). Related metrics: tma_dtlb_load, tma_dtlb_store"
+ "BriefDescription": "Socket actual clocks when any core is active on that socket",
+ "MetricExpr": "UNC_CLOCK.SOCKET",
+ "MetricGroup": "SoC",
+ "MetricName": "tma_info_system_socket_clks"
},
{
- "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)",
- "MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound))",
- "MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat",
- "MetricName": "tma_info_memory_latency",
- "MetricThreshold": "tma_info_memory_latency > 20",
- "PublicDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches). Related metrics: tma_l3_hit_latency, tma_mem_latency"
+ "BriefDescription": "Average Frequency Utilization relative nominal frequency",
+ "MetricExpr": "tma_info_thread_clks / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricGroup": "Power",
+ "MetricName": "tma_info_system_turbo_utilization"
},
{
- "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks",
- "MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "100 * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
- "MetricGroup": "Bad;BadSpec;BrMispredicts;tma_issueBM",
- "MetricName": "tma_info_mispredictions",
- "MetricThreshold": "tma_info_mispredictions > 20",
- "PublicDescription": "Total pipeline cost of Branch Misprediction related bottlenecks. Related metrics: tma_branch_mispredicts, tma_info_branch_misprediction_cost, tma_mispredicts_resteers"
+ "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "Pipeline",
+ "MetricName": "tma_info_thread_clks"
},
{
- "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
- "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
- "MetricGroup": "Mem;MemoryBW;MemoryBound",
- "MetricName": "tma_info_mlp",
- "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)"
+ "BriefDescription": "Cycles Per Instruction (per Logical Processor)",
+ "MetricExpr": "1 / tma_info_thread_ipc",
+ "MetricGroup": "Mem;Pipeline",
+ "MetricName": "tma_info_thread_cpi"
},
{
- "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
- "MetricExpr": "(ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING) / (2 * tma_info_core_clks)",
- "MetricGroup": "Mem;MemoryTLB",
- "MetricName": "tma_info_page_walks_utilization",
- "MetricThreshold": "tma_info_page_walks_utilization > 0.5"
+ "BriefDescription": "The ratio of Executed- by Issued-Uops",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / UOPS_ISSUED.ANY",
+ "MetricGroup": "Cor;Pipeline",
+ "MetricName": "tma_info_thread_execute_per_issue",
+ "PublicDescription": "The ratio of Executed- by Issued-Uops. Ratio > 1 suggests high rate of uop micro-fusions. Ratio < 1 suggest high rate of \"execute\" at rename stage."
},
{
- "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
- "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / cpu@UOPS_RETIRED.RETIRE_SLOTS\\,cmask\\=1@",
- "MetricGroup": "Pipeline;Ret",
- "MetricName": "tma_info_retire"
+ "BriefDescription": "Instructions Per Cycle (per Logical Processor)",
+ "MetricExpr": "INST_RETIRED.ANY / tma_info_thread_clks",
+ "MetricGroup": "Ret;Summary",
+ "MetricName": "tma_info_thread_ipc"
},
{
"BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
- "MetricExpr": "4 * tma_info_core_clks",
+ "MetricExpr": "4 * tma_info_core_core_clks",
"MetricGroup": "TmaL1;tma_L1_group",
- "MetricName": "tma_info_slots"
- },
- {
- "BriefDescription": "Fraction of cycles where both hardware Logical Processors were active",
- "MetricExpr": "(1 - CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / (CPU_CLK_UNHALTED.REF_XCLK_ANY / 2) if #SMT_on else 0)",
- "MetricGroup": "SMT",
- "MetricName": "tma_info_smt_2t_utilization"
- },
- {
- "BriefDescription": "Socket actual clocks when any core is active on that socket",
- "MetricExpr": "UNC_CLOCK.SOCKET",
- "MetricGroup": "SoC",
- "MetricName": "tma_info_socket_clks"
- },
- {
- "BriefDescription": "STLB (2nd level TLB) data store speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
- "MetricExpr": "1e3 * DTLB_STORE_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
- "MetricGroup": "Mem;MemoryTLB",
- "MetricName": "tma_info_store_stlb_mpki"
- },
- {
- "BriefDescription": "Average Frequency Utilization relative nominal frequency",
- "MetricExpr": "tma_info_clks / CPU_CLK_UNHALTED.REF_TSC",
- "MetricGroup": "Power",
- "MetricName": "tma_info_turbo_utilization"
+ "MetricName": "tma_info_thread_slots"
},
{
"BriefDescription": "Uops Per Instruction",
"MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
"MetricGroup": "Pipeline;Ret;Retire",
- "MetricName": "tma_info_uoppi",
- "MetricThreshold": "tma_info_uoppi > 1.05"
+ "MetricName": "tma_info_thread_uoppi",
+ "MetricThreshold": "tma_info_thread_uoppi > 1.05"
},
{
"BriefDescription": "Instruction per taken branch",
"MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / BR_INST_RETIRED.NEAR_TAKEN",
"MetricGroup": "Branches;Fed;FetchBW",
- "MetricName": "tma_info_uptb",
- "MetricThreshold": "tma_info_uptb < 6"
+ "MetricName": "tma_info_thread_uptb",
+ "MetricThreshold": "tma_info_thread_uptb < 6"
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses",
- "MetricExpr": "ICACHE_64B.IFTAG_STALL / tma_info_clks",
+ "MetricExpr": "ICACHE_64B.IFTAG_STALL / tma_info_thread_clks",
"MetricGroup": "BigFoot;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_itlb_misses",
"MetricThreshold": "tma_itlb_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -1023,7 +1022,7 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled without loads missing the L1 data cache",
- "MetricExpr": "max((CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS) / tma_info_clks, 0)",
+ "MetricExpr": "max((CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS) / tma_info_thread_clks, 0)",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_issueL1;tma_issueMC;tma_memory_bound_group",
"MetricName": "tma_l1_bound",
"MetricThreshold": "tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -1033,7 +1032,7 @@
{
"BriefDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "MEM_LOAD_RETIRED.L2_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) / (MEM_LOAD_RETIRED.L2_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@) * ((CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_clks)",
+ "MetricExpr": "MEM_LOAD_RETIRED.L2_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) / (MEM_LOAD_RETIRED.L2_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@) * ((CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks)",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l2_bound",
"MetricThreshold": "tma_l2_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -1042,7 +1041,7 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
- "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS) / tma_info_clks",
+ "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS) / tma_info_thread_clks",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l3_bound",
"MetricThreshold": "tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -1051,20 +1050,20 @@
},
{
"BriefDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
- "MetricExpr": "6.5 * tma_info_average_frequency * MEM_LOAD_RETIRED.L3_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_clks",
+ "MetricExpr": "6.5 * tma_info_system_average_frequency * MEM_LOAD_RETIRED.L3_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
"MetricGroup": "MemoryLat;TopdownL4;tma_L4_group;tma_issueLat;tma_l3_bound_group",
"MetricName": "tma_l3_hit_latency",
"MetricThreshold": "tma_l3_hit_latency > 0.1 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_info_memory_latency, tma_mem_latency",
+ "PublicDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_info_bottleneck_memory_latency, tma_mem_latency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs)",
- "MetricExpr": "ILD_STALL.LCP / tma_info_clks",
+ "MetricExpr": "ILD_STALL.LCP / tma_info_thread_clks",
"MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB",
"MetricName": "tma_lcp",
"MetricThreshold": "tma_lcp > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
- "PublicDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs). Using proper compiler flags or Intel Compiler by default will certainly avoid this. #Link: Optimization Guide about LCP BKMs. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_dsb_coverage, tma_info_dsb_misses, tma_info_iptb",
+ "PublicDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs). Using proper compiler flags or Intel Compiler by default will certainly avoid this. #Link: Optimization Guide about LCP BKMs. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb",
"ScaleUnit": "100%"
},
{
@@ -1079,7 +1078,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Load operations",
- "MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_2 + UOPS_DISPATCHED_PORT.PORT_3 + UOPS_DISPATCHED_PORT.PORT_7 - UOPS_DISPATCHED_PORT.PORT_4) / (2 * tma_info_core_clks)",
+ "MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_2 + UOPS_DISPATCHED_PORT.PORT_3 + UOPS_DISPATCHED_PORT.PORT_7 - UOPS_DISPATCHED_PORT.PORT_4) / (2 * tma_info_core_core_clks)",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_load_op_utilization",
"MetricThreshold": "tma_load_op_utilization > 0.6",
@@ -1097,7 +1096,7 @@
},
{
"BriefDescription": "This metric estimates the fraction of cycles where the Second-level TLB (STLB) was missed by load accesses, performing a hardware page walk",
- "MetricExpr": "DTLB_LOAD_MISSES.WALK_ACTIVE / tma_info_clks",
+ "MetricExpr": "DTLB_LOAD_MISSES.WALK_ACTIVE / tma_info_thread_clks",
"MetricGroup": "MemoryTLB;TopdownL5;tma_L5_group;tma_dtlb_load_group",
"MetricName": "tma_load_stlb_miss",
"MetricThreshold": "tma_load_stlb_miss > 0.05 & (tma_dtlb_load > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
@@ -1105,7 +1104,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU spent handling cache misses due to lock operations",
- "MetricExpr": "(12 * max(0, MEM_INST_RETIRED.LOCK_LOADS - L2_RQSTS.ALL_RFO) + MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES * (9 * L2_RQSTS.RFO_HIT + min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO))) / tma_info_clks",
+ "MetricExpr": "(12 * max(0, MEM_INST_RETIRED.LOCK_LOADS - L2_RQSTS.ALL_RFO) + MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES * (9 * L2_RQSTS.RFO_HIT + min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO))) / tma_info_thread_clks",
"MetricGroup": "Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_l1_bound_group",
"MetricName": "tma_lock_latency",
"MetricThreshold": "tma_lock_latency > 0.2 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -1125,20 +1124,20 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM)",
- "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@) / tma_info_clks",
+ "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@) / tma_info_thread_clks",
"MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
"MetricName": "tma_mem_bandwidth",
"MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_dram_bw_use, tma_info_memory_bandwidth, tma_sq_full",
+ "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_bottleneck_memory_bandwidth, tma_info_system_dram_bw_use, tma_sq_full",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM)",
- "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD) / tma_info_clks - tma_mem_bandwidth",
+ "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD) / tma_info_thread_clks - tma_mem_bandwidth",
"MetricGroup": "MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
"MetricName": "tma_mem_latency",
"MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_info_memory_latency, tma_l3_hit_latency",
+ "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_info_bottleneck_memory_latency, tma_l3_hit_latency",
"ScaleUnit": "100%"
},
{
@@ -1162,7 +1161,7 @@
},
{
"BriefDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit",
- "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * IDQ.MS_UOPS / tma_info_slots",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * IDQ.MS_UOPS / tma_info_thread_slots",
"MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_heavy_operations_group;tma_issueMC;tma_issueMS",
"MetricName": "tma_microcode_sequencer",
"MetricThreshold": "tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1",
@@ -1171,19 +1170,19 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers as a result of Branch Misprediction at execution stage",
- "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT) * INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_clks",
+ "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT) * INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_thread_clks",
"MetricGroup": "BadSpec;BrMispredicts;TopdownL4;tma_L4_group;tma_branch_resteers_group;tma_issueBM",
"MetricName": "tma_mispredicts_resteers",
"MetricThreshold": "tma_mispredicts_resteers > 0.05 & (tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15))",
- "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers as a result of Branch Misprediction at execution stage. Sample with: INT_MISC.CLEAR_RESTEER_CYCLES. Related metrics: tma_branch_mispredicts, tma_info_branch_misprediction_cost, tma_info_mispredictions",
+ "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers as a result of Branch Misprediction at execution stage. Sample with: INT_MISC.CLEAR_RESTEER_CYCLES. Related metrics: tma_branch_mispredicts, tma_info_bad_spec_branch_misprediction_cost, tma_info_bottleneck_mispredictions",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline)",
- "MetricExpr": "(IDQ.ALL_MITE_CYCLES_ANY_UOPS - IDQ.ALL_MITE_CYCLES_4_UOPS) / tma_info_core_clks / 2",
+ "MetricExpr": "(IDQ.ALL_MITE_CYCLES_ANY_UOPS - IDQ.ALL_MITE_CYCLES_4_UOPS) / tma_info_core_core_clks / 2",
"MetricGroup": "DSBmiss;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_mite",
- "MetricThreshold": "tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35)",
+ "MetricThreshold": "tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35)",
"PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline). This pipeline is used for code that was not pre-cached in the DSB or LSD. For example; inefficiencies due to asymmetric decoders; use of long immediate or LCP can manifest as MITE fetch bandwidth bottleneck. Sample with: FRONTEND_RETIRED.ANY_DSB_MISS",
"ScaleUnit": "100%"
},
@@ -1198,7 +1197,7 @@
},
{
"BriefDescription": "This metric estimates the fraction of cycles when the CPU was stalled due to switches of uop delivery to the Microcode Sequencer (MS)",
- "MetricExpr": "2 * IDQ.MS_SWITCHES / tma_info_clks",
+ "MetricExpr": "2 * IDQ.MS_SWITCHES / tma_info_thread_clks",
"MetricGroup": "FetchLat;MicroSeq;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueMC;tma_issueMS;tma_issueMV;tma_issueSO",
"MetricName": "tma_ms_switches",
"MetricThreshold": "tma_ms_switches > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -1234,7 +1233,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 0 ([SNB+] ALU; [HSW+] ALU and 2nd branch)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_0 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_0 / tma_info_core_core_clks",
"MetricGroup": "Compute;TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_0",
"MetricThreshold": "tma_port_0 > 0.6",
@@ -1243,7 +1242,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 1 (ALU)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_1 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_1 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_1",
"MetricThreshold": "tma_port_1 > 0.6",
@@ -1252,7 +1251,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 2 ([SNB+]Loads and Store-address; [ICL+] Loads)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_2 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_2 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_load_op_utilization_group",
"MetricName": "tma_port_2",
"MetricThreshold": "tma_port_2 > 0.6",
@@ -1261,7 +1260,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 3 ([SNB+]Loads and Store-address; [ICL+] Loads)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_3 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_3 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_load_op_utilization_group",
"MetricName": "tma_port_3",
"MetricThreshold": "tma_port_3 > 0.6",
@@ -1279,7 +1278,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 5 ([SNB+] Branches and ALU; [HSW+] ALU)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_5 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_5 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_5",
"MetricThreshold": "tma_port_5 > 0.6",
@@ -1288,7 +1287,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+]Primary Branch and simple ALU)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_6 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_6 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_6",
"MetricThreshold": "tma_port_6 > 0.6",
@@ -1297,7 +1296,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 7 ([HSW+]simple Store-address)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_7 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_7 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_store_op_utilization_group",
"MetricName": "tma_port_7",
"MetricThreshold": "tma_port_7 > 0.6",
@@ -1306,7 +1305,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles the CPU performance was potentially limited due to Core computation issues (non divider-related)",
- "MetricExpr": "((EXE_ACTIVITY.EXE_BOUND_0_PORTS + (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL)) / tma_info_clks if ARITH.DIVIDER_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY else (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL) / tma_info_clks)",
+ "MetricExpr": "((EXE_ACTIVITY.EXE_BOUND_0_PORTS + (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL)) / tma_info_thread_clks if ARITH.DIVIDER_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY else (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL) / tma_info_thread_clks)",
"MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group",
"MetricName": "tma_ports_utilization",
"MetricThreshold": "tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
@@ -1315,7 +1314,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed no uops on any execution port (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "(UOPS_EXECUTED.CORE_CYCLES_NONE / 2 if #SMT_on else CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY) / tma_info_core_clks",
+ "MetricExpr": "(UOPS_EXECUTED.CORE_CYCLES_NONE / 2 if #SMT_on else CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_0",
"MetricThreshold": "tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1324,7 +1323,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where the CPU executed total of 1 uop per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "((UOPS_EXECUTED.CORE_CYCLES_GE_1 - UOPS_EXECUTED.CORE_CYCLES_GE_2) / 2 if #SMT_on else EXE_ACTIVITY.1_PORTS_UTIL) / tma_info_core_clks",
+ "MetricExpr": "((UOPS_EXECUTED.CORE_CYCLES_GE_1 - UOPS_EXECUTED.CORE_CYCLES_GE_2) / 2 if #SMT_on else EXE_ACTIVITY.1_PORTS_UTIL) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issueL1;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_1",
"MetricThreshold": "tma_ports_utilized_1 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1333,7 +1332,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "((UOPS_EXECUTED.CORE_CYCLES_GE_2 - UOPS_EXECUTED.CORE_CYCLES_GE_3) / 2 if #SMT_on else EXE_ACTIVITY.2_PORTS_UTIL) / tma_info_core_clks",
+ "MetricExpr": "((UOPS_EXECUTED.CORE_CYCLES_GE_2 - UOPS_EXECUTED.CORE_CYCLES_GE_3) / 2 if #SMT_on else EXE_ACTIVITY.2_PORTS_UTIL) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_2",
"MetricThreshold": "tma_ports_utilized_2 > 0.15 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1342,7 +1341,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 3 or more uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise).",
- "MetricExpr": "(UOPS_EXECUTED.CORE_CYCLES_GE_3 / 2 if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_3) / tma_info_core_clks",
+ "MetricExpr": "(UOPS_EXECUTED.CORE_CYCLES_GE_3 / 2 if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_3) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_3m",
"MetricThreshold": "tma_ports_utilized_3m > 0.7 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1350,7 +1349,7 @@
},
{
"BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
- "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / tma_info_slots",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / tma_info_thread_slots",
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1",
@@ -1360,7 +1359,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU issue-pipeline was stalled due to serializing operations",
- "MetricExpr": "PARTIAL_RAT_STALLS.SCOREBOARD / tma_info_clks",
+ "MetricExpr": "PARTIAL_RAT_STALLS.SCOREBOARD / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL5;tma_L5_group;tma_issueSO;tma_ports_utilized_0_group",
"MetricName": "tma_serializing_operation",
"MetricThreshold": "tma_serializing_operation > 0.1 & (tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)))",
@@ -1370,7 +1369,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles handling memory load split accesses - load that cross 64-byte cache line boundary",
"MetricConstraint": "NO_GROUP_EVENTS_NMI",
- "MetricExpr": "tma_info_load_miss_real_latency * LD_BLOCKS.NO_SR / tma_info_clks",
+ "MetricExpr": "tma_info_memory_load_miss_real_latency * LD_BLOCKS.NO_SR / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_split_loads",
"MetricThreshold": "tma_split_loads > 0.2 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -1379,7 +1378,7 @@
},
{
"BriefDescription": "This metric represents rate of split store accesses",
- "MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_clks",
+ "MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_core_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group",
"MetricName": "tma_split_stores",
"MetricThreshold": "tma_split_stores > 0.2 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -1388,16 +1387,16 @@
},
{
"BriefDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors)",
- "MetricExpr": "(OFFCORE_REQUESTS_BUFFER.SQ_FULL / 2 if #SMT_on else OFFCORE_REQUESTS_BUFFER.SQ_FULL) / tma_info_core_clks",
+ "MetricExpr": "(OFFCORE_REQUESTS_BUFFER.SQ_FULL / 2 if #SMT_on else OFFCORE_REQUESTS_BUFFER.SQ_FULL) / tma_info_core_core_clks",
"MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueBW;tma_l3_bound_group",
"MetricName": "tma_sq_full",
"MetricThreshold": "tma_sq_full > 0.3 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_fb_full, tma_info_dram_bw_use, tma_info_memory_bandwidth, tma_mem_bandwidth",
+ "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_fb_full, tma_info_bottleneck_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric estimates how often CPU was stalled due to RFO store memory accesses; RFO store issue a read-for-ownership request before the write",
- "MetricExpr": "EXE_ACTIVITY.BOUND_ON_STORES / tma_info_clks",
+ "MetricExpr": "EXE_ACTIVITY.BOUND_ON_STORES / tma_info_thread_clks",
"MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_store_bound",
"MetricThreshold": "tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -1406,7 +1405,7 @@
},
{
"BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores",
- "MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_clks",
+ "MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_store_fwd_blk",
"MetricThreshold": "tma_store_fwd_blk > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -1416,7 +1415,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles the CPU spent handling L1D store misses",
"MetricConstraint": "NO_GROUP_EVENTS_NMI",
- "MetricExpr": "(L2_RQSTS.RFO_HIT * 9 * (1 - MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES) + (1 - MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES) * min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO)) / tma_info_clks",
+ "MetricExpr": "(L2_RQSTS.RFO_HIT * 9 * (1 - MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES) + (1 - MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES) * min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO)) / tma_info_thread_clks",
"MetricGroup": "MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_issueSL;tma_store_bound_group",
"MetricName": "tma_store_latency",
"MetricThreshold": "tma_store_latency > 0.1 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -1425,7 +1424,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Store operations",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_4 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_4 / tma_info_core_core_clks",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_store_op_utilization",
"MetricThreshold": "tma_store_op_utilization > 0.6",
@@ -1441,7 +1440,7 @@
},
{
"BriefDescription": "This metric estimates the fraction of cycles where the STLB was missed by store accesses, performing a hardware page walk",
- "MetricExpr": "DTLB_STORE_MISSES.WALK_ACTIVE / tma_info_core_clks",
+ "MetricExpr": "DTLB_STORE_MISSES.WALK_ACTIVE / tma_info_core_core_clks",
"MetricGroup": "MemoryTLB;TopdownL5;tma_L5_group;tma_dtlb_store_group",
"MetricName": "tma_store_stlb_miss",
"MetricThreshold": "tma_store_stlb_miss > 0.05 & (tma_dtlb_store > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
@@ -1449,7 +1448,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to new branch address clears",
- "MetricExpr": "9 * BACLEARS.ANY / tma_info_clks",
+ "MetricExpr": "9 * BACLEARS.ANY / tma_info_thread_clks",
"MetricGroup": "BigFoot;FetchLat;TopdownL4;tma_L4_group;tma_branch_resteers_group",
"MetricName": "tma_unknown_branches",
"MetricThreshold": "tma_unknown_branches > 0.05 & (tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15))",
@@ -1467,28 +1466,28 @@
},
{
"BriefDescription": "Percentage of cycles in aborted transactions.",
- "MetricExpr": "max(cpu@cycles\\-t@ - cpu@cycles\\-ct@, 0) / cycles",
+ "MetricExpr": "(max(cycles\\-t - cycles\\-ct, 0) / cycles if has_event(cycles\\-t) else 0)",
"MetricGroup": "transaction",
"MetricName": "tsx_aborted_cycles",
"ScaleUnit": "100%"
},
{
"BriefDescription": "Number of cycles within a transaction divided by the number of elisions.",
- "MetricExpr": "cpu@cycles\\-t@ / cpu@el\\-start@",
+ "MetricExpr": "(cycles\\-t / el\\-start if has_event(cycles\\-t) else 0)",
"MetricGroup": "transaction",
"MetricName": "tsx_cycles_per_elision",
"ScaleUnit": "1cycles / elision"
},
{
"BriefDescription": "Number of cycles within a transaction divided by the number of transactions.",
- "MetricExpr": "cpu@cycles\\-t@ / cpu@tx\\-start@",
+ "MetricExpr": "(cycles\\-t / tx\\-start if has_event(cycles\\-t) else 0)",
"MetricGroup": "transaction",
"MetricName": "tsx_cycles_per_transaction",
"ScaleUnit": "1cycles / transaction"
},
{
"BriefDescription": "Percentage of cycles within a transaction region.",
- "MetricExpr": "cpu@cycles\\-t@ / cycles",
+ "MetricExpr": "(cycles\\-t / cycles if has_event(cycles\\-t) else 0)",
"MetricGroup": "transaction",
"MetricName": "tsx_transactional_cycles",
"ScaleUnit": "100%"
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json b/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json
index 64dd36387209..384b3c551a1f 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json
@@ -32,6 +32,14 @@
"UMask": "0x20"
},
{
+ "BriefDescription": "Number of SSE/AVX computational 128-bit packed single and 256-bit packed double precision FP instructions retired; some instructions will count twice as noted below. Each count represents 2 or/and 4 computation operations, 1 for each element. Applies to SSE* and AVX* packed single precision and packed double precision FP instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB count twice as they perform 2 calculations per element.",
+ "EventCode": "0xC7",
+ "EventName": "FP_ARITH_INST_RETIRED.4_FLOPS",
+ "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision and 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 or/and 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point and packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x18"
+ },
+ {
"BriefDescription": "Counts number of SSE/AVX computational 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
"EventCode": "0xC7",
"EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE",
@@ -48,6 +56,22 @@
"UMask": "0x80"
},
{
+ "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision and 512-bit packed double precision FP instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, 1 for each element. Applies to SSE* and AVX* packed single precision and double precision FP instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RSQRT14 RCP RCP14 DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB count twice as they perform 2 calculations per element.",
+ "EventCode": "0xC7",
+ "EventName": "FP_ARITH_INST_RETIRED.8_FLOPS",
+ "PublicDescription": "Number of SSE/AVX computational 256-bit packed single precision and 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision and double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RSQRT14 RCP RCP14 DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x18"
+ },
+ {
+ "BriefDescription": "Counts once for most SIMD scalar computational floating-point instructions retired. Counts twice for DPP and FM(N)ADD/SUB instructions retired.",
+ "EventCode": "0xC7",
+ "EventName": "FP_ARITH_INST_RETIRED.SCALAR",
+ "PublicDescription": "Counts once for most SIMD scalar computational single precision and double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SIMD scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x3"
+ },
+ {
"BriefDescription": "Counts once for most SIMD scalar computational double precision floating-point instructions retired. Counts twice for DPP and FM(N)ADD/SUB instructions retired.",
"EventCode": "0xC7",
"EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
@@ -64,6 +88,13 @@
"UMask": "0x2"
},
{
+ "BriefDescription": "Number of any Vector retired FP arithmetic instructions",
+ "EventCode": "0xC7",
+ "EventName": "FP_ARITH_INST_RETIRED.VECTOR",
+ "SampleAfterValue": "2000003",
+ "UMask": "0xfc"
+ },
+ {
"BriefDescription": "Cycles with any input/output SSE or FP assist",
"CounterMask": "1",
"EventCode": "0xCA",
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/frontend.json b/tools/perf/pmu-events/arch/x86/skylakex/frontend.json
index 04f08e4d2402..095904c77001 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/frontend.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/frontend.json
@@ -8,6 +8,14 @@
"UMask": "0x1"
},
{
+ "BriefDescription": "Stalls caused by changing prefix length of the instruction. [This event is alias to ILD_STALL.LCP]",
+ "EventCode": "0x87",
+ "EventName": "DECODE.LCP",
+ "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk. [This event is alias to ILD_STALL.LCP]",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
+ },
+ {
"BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switches",
"EventCode": "0xAB",
"EventName": "DSB2MITE_SWITCHES.COUNT",
@@ -245,27 +253,34 @@
"UMask": "0x2"
},
{
- "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss.",
+ "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss. [This event is alias to ICACHE_TAG.STALLS]",
"EventCode": "0x83",
"EventName": "ICACHE_64B.IFTAG_STALL",
"SampleAfterValue": "200003",
"UMask": "0x4"
},
{
- "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops",
+ "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss. [This event is alias to ICACHE_64B.IFTAG_STALL]",
+ "EventCode": "0x83",
+ "EventName": "ICACHE_TAG.STALLS",
+ "SampleAfterValue": "200003",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops [This event is alias to IDQ.DSB_CYCLES_OK]",
"CounterMask": "4",
"EventCode": "0x79",
"EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS",
- "PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.",
+ "PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ. [This event is alias to IDQ.DSB_CYCLES_OK]",
"SampleAfterValue": "2000003",
"UMask": "0x18"
},
{
- "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop",
+ "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop [This event is alias to IDQ.DSB_CYCLES_ANY]",
"CounterMask": "1",
"EventCode": "0x79",
"EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS",
- "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.",
+ "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ. [This event is alias to IDQ.DSB_CYCLES_ANY]",
"SampleAfterValue": "2000003",
"UMask": "0x18"
},
@@ -297,6 +312,24 @@
"UMask": "0x8"
},
{
+ "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop [This event is alias to IDQ.ALL_DSB_CYCLES_ANY_UOPS]",
+ "CounterMask": "1",
+ "EventCode": "0x79",
+ "EventName": "IDQ.DSB_CYCLES_ANY",
+ "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ. [This event is alias to IDQ.ALL_DSB_CYCLES_ANY_UOPS]",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x18"
+ },
+ {
+ "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops [This event is alias to IDQ.ALL_DSB_CYCLES_4_UOPS]",
+ "CounterMask": "4",
+ "EventCode": "0x79",
+ "EventName": "IDQ.DSB_CYCLES_OK",
+ "PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ. [This event is alias to IDQ.ALL_DSB_CYCLES_4_UOPS]",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x18"
+ },
+ {
"BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path",
"EventCode": "0x79",
"EventName": "IDQ.DSB_UOPS",
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/metricgroups.json b/tools/perf/pmu-events/arch/x86/skylakex/metricgroups.json
new file mode 100644
index 000000000000..bc6a9a4d27a9
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/skylakex/metricgroups.json
@@ -0,0 +1,114 @@
+{
+ "Backend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Bad": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BadSpec": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BigFoot": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BrMispredicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Branches": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "CacheMisses": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "CodeGen": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Compute": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Cor": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "DSB": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "DSBmiss": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "DataSharing": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Fed": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FetchBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FetchLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Flops": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FpScalar": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FpVector": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Frontend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "HPC": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "IcMiss": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "InsType": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "IoBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "L2Evicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "LSD": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MachineClears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryBound": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryTLB": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Memory_BW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Memory_Lat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MicroSeq": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "OS": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Offcore": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "PGO": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Pipeline": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "PortsUtil": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Power": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Prefetches": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Ret": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Retire": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "SMT": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Server": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Snoop": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "SoC": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Summary": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TmaL1": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TmaL2": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TmaL3mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TopdownL1": "Metrics for top-down breakdown at level 1",
+ "TopdownL2": "Metrics for top-down breakdown at level 2",
+ "TopdownL3": "Metrics for top-down breakdown at level 3",
+ "TopdownL4": "Metrics for top-down breakdown at level 4",
+ "TopdownL5": "Metrics for top-down breakdown at level 5",
+ "TopdownL6": "Metrics for top-down breakdown at level 6",
+ "tma_L1_group": "Metrics for top-down breakdown at level 1",
+ "tma_L2_group": "Metrics for top-down breakdown at level 2",
+ "tma_L3_group": "Metrics for top-down breakdown at level 3",
+ "tma_L4_group": "Metrics for top-down breakdown at level 4",
+ "tma_L5_group": "Metrics for top-down breakdown at level 5",
+ "tma_L6_group": "Metrics for top-down breakdown at level 6",
+ "tma_alu_op_utilization_group": "Metrics contributing to tma_alu_op_utilization category",
+ "tma_backend_bound_group": "Metrics contributing to tma_backend_bound category",
+ "tma_bad_speculation_group": "Metrics contributing to tma_bad_speculation category",
+ "tma_branch_resteers_group": "Metrics contributing to tma_branch_resteers category",
+ "tma_core_bound_group": "Metrics contributing to tma_core_bound category",
+ "tma_dram_bound_group": "Metrics contributing to tma_dram_bound category",
+ "tma_dtlb_load_group": "Metrics contributing to tma_dtlb_load category",
+ "tma_dtlb_store_group": "Metrics contributing to tma_dtlb_store category",
+ "tma_fetch_bandwidth_group": "Metrics contributing to tma_fetch_bandwidth category",
+ "tma_fetch_latency_group": "Metrics contributing to tma_fetch_latency category",
+ "tma_fp_arith_group": "Metrics contributing to tma_fp_arith category",
+ "tma_fp_vector_group": "Metrics contributing to tma_fp_vector category",
+ "tma_frontend_bound_group": "Metrics contributing to tma_frontend_bound category",
+ "tma_heavy_operations_group": "Metrics contributing to tma_heavy_operations category",
+ "tma_issue2P": "Metrics related by the issue $issue2P",
+ "tma_issueBC": "Metrics related by the issue $issueBC",
+ "tma_issueBM": "Metrics related by the issue $issueBM",
+ "tma_issueBW": "Metrics related by the issue $issueBW",
+ "tma_issueD0": "Metrics related by the issue $issueD0",
+ "tma_issueFB": "Metrics related by the issue $issueFB",
+ "tma_issueFL": "Metrics related by the issue $issueFL",
+ "tma_issueL1": "Metrics related by the issue $issueL1",
+ "tma_issueLat": "Metrics related by the issue $issueLat",
+ "tma_issueMC": "Metrics related by the issue $issueMC",
+ "tma_issueMS": "Metrics related by the issue $issueMS",
+ "tma_issueMV": "Metrics related by the issue $issueMV",
+ "tma_issueRFO": "Metrics related by the issue $issueRFO",
+ "tma_issueSL": "Metrics related by the issue $issueSL",
+ "tma_issueSO": "Metrics related by the issue $issueSO",
+ "tma_issueSmSt": "Metrics related by the issue $issueSmSt",
+ "tma_issueSpSt": "Metrics related by the issue $issueSpSt",
+ "tma_issueSyncxn": "Metrics related by the issue $issueSyncxn",
+ "tma_issueTLB": "Metrics related by the issue $issueTLB",
+ "tma_l1_bound_group": "Metrics contributing to tma_l1_bound category",
+ "tma_l3_bound_group": "Metrics contributing to tma_l3_bound category",
+ "tma_light_operations_group": "Metrics contributing to tma_light_operations category",
+ "tma_load_op_utilization_group": "Metrics contributing to tma_load_op_utilization category",
+ "tma_mem_latency_group": "Metrics contributing to tma_mem_latency category",
+ "tma_memory_bound_group": "Metrics contributing to tma_memory_bound category",
+ "tma_microcode_sequencer_group": "Metrics contributing to tma_microcode_sequencer category",
+ "tma_mite_group": "Metrics contributing to tma_mite category",
+ "tma_ports_utilization_group": "Metrics contributing to tma_ports_utilization category",
+ "tma_ports_utilized_0_group": "Metrics contributing to tma_ports_utilized_0 category",
+ "tma_ports_utilized_3m_group": "Metrics contributing to tma_ports_utilized_3m category",
+ "tma_retiring_group": "Metrics contributing to tma_retiring category",
+ "tma_serializing_operation_group": "Metrics contributing to tma_serializing_operation category",
+ "tma_store_bound_group": "Metrics contributing to tma_store_bound category",
+ "tma_store_op_utilization_group": "Metrics contributing to tma_store_op_utilization category"
+}
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json b/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json
index 0f06e314fe36..66d686cc933e 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json
@@ -26,12 +26,21 @@
"UMask": "0x4"
},
{
- "BriefDescription": "Conditional branch instructions retired.",
+ "BriefDescription": "Conditional branch instructions retired. [This event is alias to BR_INST_RETIRED.CONDITIONAL]",
+ "Errata": "SKL091",
+ "EventCode": "0xC4",
+ "EventName": "BR_INST_RETIRED.COND",
+ "PublicDescription": "This event counts conditional branch instructions retired. [This event is alias to BR_INST_RETIRED.CONDITIONAL]",
+ "SampleAfterValue": "400009",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Conditional branch instructions retired. [This event is alias to BR_INST_RETIRED.COND]",
"Errata": "SKL091",
"EventCode": "0xC4",
"EventName": "BR_INST_RETIRED.CONDITIONAL",
"PEBS": "1",
- "PublicDescription": "This event counts conditional branch instructions retired.",
+ "PublicDescription": "This event counts conditional branch instructions retired. [This event is alias to BR_INST_RETIRED.COND]",
"SampleAfterValue": "400009",
"UMask": "0x1"
},
@@ -352,10 +361,10 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Stalls caused by changing prefix length of the instruction.",
+ "BriefDescription": "Stalls caused by changing prefix length of the instruction. [This event is alias to DECODE.LCP]",
"EventCode": "0x87",
"EventName": "ILD_STALL.LCP",
- "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.",
+ "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk. [This event is alias to DECODE.LCP]",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -414,6 +423,16 @@
"UMask": "0x1"
},
{
+ "BriefDescription": "Clears speculative count",
+ "CounterMask": "1",
+ "EdgeDetect": "1",
+ "EventCode": "0x0D",
+ "EventName": "INT_MISC.CLEARS_COUNT",
+ "PublicDescription": "Counts the number of speculative clears due to any type of branch misprediction or machine clears",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
+ },
+ {
"BriefDescription": "Cycles the issue-stage is waiting for front-end to fetch from resteered path following branch misprediction or machine clear events.",
"EventCode": "0x0D",
"EventName": "INT_MISC.CLEAR_RESTEER_CYCLES",
@@ -469,11 +488,11 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.",
+ "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder. [This event is alias to LSD.CYCLES_OK]",
"CounterMask": "4",
"EventCode": "0xA8",
"EventName": "LSD.CYCLES_4_UOPS",
- "PublicDescription": "Counts the cycles when 4 uops are delivered by the LSD (Loop-stream detector).",
+ "PublicDescription": "Counts the cycles when 4 uops are delivered by the LSD (Loop-stream detector). [This event is alias to LSD.CYCLES_OK]",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -487,6 +506,15 @@
"UMask": "0x1"
},
{
+ "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder. [This event is alias to LSD.CYCLES_4_UOPS]",
+ "CounterMask": "4",
+ "EventCode": "0xA8",
+ "EventName": "LSD.CYCLES_OK",
+ "PublicDescription": "Counts the cycles when 4 uops are delivered by the LSD (Loop-stream detector). [This event is alias to LSD.CYCLES_4_UOPS]",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
+ },
+ {
"BriefDescription": "Number of Uops delivered by the LSD.",
"EventCode": "0xA8",
"EventName": "LSD.UOPS",
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
index eb6f12c0343d..fa4209809c57 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
@@ -50,11 +50,220 @@
},
{
"BriefDescription": "Uncore frequency per die [GHZ]",
- "MetricExpr": "tma_info_socket_clks / #num_dies / duration_time / 1e9",
+ "MetricExpr": "tma_info_system_socket_clks / #num_dies / duration_time / 1e9",
"MetricGroup": "SoC",
"MetricName": "UNCORE_FREQ"
},
{
+ "BriefDescription": "Cycles per instruction retired; indicating how much time each executed instruction took; in units of cycles.",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD / INST_RETIRED.ANY",
+ "MetricName": "cpi",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "CPU operating frequency (in GHz)",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC * #SYSTEM_TSC_FREQ / 1e9",
+ "MetricName": "cpu_operating_frequency",
+ "ScaleUnit": "1GHz"
+ },
+ {
+ "BriefDescription": "Percentage of time spent in the active CPU power state C0",
+ "MetricExpr": "tma_info_system_cpu_utilization",
+ "MetricName": "cpu_utilization",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Ratio of number of completed page walks (for 2 megabyte page sizes) caused by demand data loads to the total number of completed instructions",
+ "MetricExpr": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M / INST_RETIRED.ANY",
+ "MetricName": "dtlb_2mb_large_page_load_mpi",
+ "PublicDescription": "Ratio of number of completed page walks (for 2 megabyte page sizes) caused by demand data loads to the total number of completed instructions. This implies it missed in the Data Translation Lookaside Buffer (DTLB) and further levels of TLB.",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of completed page walks (for all page sizes) caused by demand data loads to the total number of completed instructions",
+ "MetricExpr": "DTLB_LOAD_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
+ "MetricName": "dtlb_load_mpi",
+ "PublicDescription": "Ratio of number of completed page walks (for all page sizes) caused by demand data loads to the total number of completed instructions. This implies it missed in the DTLB and further levels of TLB.",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of completed page walks (for all page sizes) caused by demand data stores to the total number of completed instructions",
+ "MetricExpr": "DTLB_STORE_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
+ "MetricName": "dtlb_store_mpi",
+ "PublicDescription": "Ratio of number of completed page walks (for all page sizes) caused by demand data stores to the total number of completed instructions. This implies it missed in the DTLB and further levels of TLB.",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Bandwidth of IO reads that are initiated by end device controllers that are requesting memory from the CPU.",
+ "MetricExpr": "(UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3) * 4 / 1e6 / duration_time",
+ "MetricName": "io_bandwidth_read",
+ "ScaleUnit": "1MB/s"
+ },
+ {
+ "BriefDescription": "Bandwidth of IO writes that are initiated by end device controllers that are writing memory to the CPU.",
+ "MetricExpr": "(UNC_IIO_PAYLOAD_BYTES_IN.MEM_WRITE.PART0 + UNC_IIO_PAYLOAD_BYTES_IN.MEM_WRITE.PART1 + UNC_IIO_PAYLOAD_BYTES_IN.MEM_WRITE.PART2 + UNC_IIO_PAYLOAD_BYTES_IN.MEM_WRITE.PART3) * 4 / 1e6 / duration_time",
+ "MetricName": "io_bandwidth_write",
+ "ScaleUnit": "1MB/s"
+ },
+ {
+ "BriefDescription": "Ratio of number of completed page walks (for 2 megabyte and 4 megabyte page sizes) caused by a code fetch to the total number of completed instructions",
+ "MetricExpr": "ITLB_MISSES.WALK_COMPLETED_2M_4M / INST_RETIRED.ANY",
+ "MetricName": "itlb_large_page_mpi",
+ "PublicDescription": "Ratio of number of completed page walks (for 2 megabyte and 4 megabyte page sizes) caused by a code fetch to the total number of completed instructions. This implies it missed in the Instruction Translation Lookaside Buffer (ITLB) and further levels of TLB.",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of completed page walks (for all page sizes) caused by a code fetch to the total number of completed instructions",
+ "MetricExpr": "ITLB_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
+ "MetricName": "itlb_mpi",
+ "PublicDescription": "Ratio of number of completed page walks (for all page sizes) caused by a code fetch to the total number of completed instructions. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB.",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of code read requests missing in L1 instruction cache (includes prefetches) to the total number of completed instructions",
+ "MetricExpr": "L2_RQSTS.ALL_CODE_RD / INST_RETIRED.ANY",
+ "MetricName": "l1_i_code_read_misses_with_prefetches_per_instr",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of demand load requests hitting in L1 data cache to the total number of completed instructions",
+ "MetricExpr": "MEM_LOAD_RETIRED.L1_HIT / INST_RETIRED.ANY",
+ "MetricName": "l1d_demand_data_read_hits_per_instr",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of requests missing L1 data cache (includes data+rfo w/ prefetches) to the total number of completed instructions",
+ "MetricExpr": "L1D.REPLACEMENT / INST_RETIRED.ANY",
+ "MetricName": "l1d_mpi",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of code read request missing L2 cache to the total number of completed instructions",
+ "MetricExpr": "L2_RQSTS.CODE_RD_MISS / INST_RETIRED.ANY",
+ "MetricName": "l2_demand_code_mpi",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of completed demand load requests hitting in L2 cache to the total number of completed instructions",
+ "MetricExpr": "MEM_LOAD_RETIRED.L2_HIT / INST_RETIRED.ANY",
+ "MetricName": "l2_demand_data_read_hits_per_instr",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of completed data read request missing L2 cache to the total number of completed instructions",
+ "MetricExpr": "MEM_LOAD_RETIRED.L2_MISS / INST_RETIRED.ANY",
+ "MetricName": "l2_demand_data_read_mpi",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of requests missing L2 cache (includes code+data+rfo w/ prefetches) to the total number of completed instructions",
+ "MetricExpr": "L2_LINES_IN.ALL / INST_RETIRED.ANY",
+ "MetricName": "l2_mpi",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Ratio of number of code read requests missing last level core cache (includes demand w/ prefetches) to the total number of completed instructions",
+ "MetricExpr": "cha@UNC_CHA_TOR_INSERTS.IA_MISS\\,config1\\=0x12CC0233@ / INST_RETIRED.ANY",
+ "MetricName": "llc_code_read_mpi_demand_plus_prefetch",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Average latency of a last level cache (LLC) demand and prefetch data read miss (read memory access) in nano seconds",
+ "MetricExpr": "1e9 * (cha@UNC_CHA_TOR_OCCUPANCY.IA_MISS\\,config1\\=0x40433@ / cha@UNC_CHA_TOR_INSERTS.IA_MISS\\,config1\\=0x40433@) / (UNC_CHA_CLOCKTICKS / (#num_cores / #num_packages * #num_packages)) * duration_time",
+ "MetricName": "llc_data_read_demand_plus_prefetch_miss_latency",
+ "ScaleUnit": "1ns"
+ },
+ {
+ "BriefDescription": "Average latency of a last level cache (LLC) demand and prefetch data read miss (read memory access) addressed to local memory in nano seconds",
+ "MetricExpr": "1e9 * (cha@UNC_CHA_TOR_OCCUPANCY.IA_MISS\\,config1\\=0x40432@ / cha@UNC_CHA_TOR_INSERTS.IA_MISS\\,config1\\=0x40432@) / (UNC_CHA_CLOCKTICKS / (#num_cores / #num_packages * #num_packages)) * duration_time",
+ "MetricName": "llc_data_read_demand_plus_prefetch_miss_latency_for_local_requests",
+ "ScaleUnit": "1ns"
+ },
+ {
+ "BriefDescription": "Average latency of a last level cache (LLC) demand and prefetch data read miss (read memory access) addressed to remote memory in nano seconds",
+ "MetricExpr": "1e9 * (cha@UNC_CHA_TOR_OCCUPANCY.IA_MISS\\,config1\\=0x40431@ / cha@UNC_CHA_TOR_INSERTS.IA_MISS\\,config1\\=0x40431@) / (UNC_CHA_CLOCKTICKS / (#num_cores / #num_packages * #num_packages)) * duration_time",
+ "MetricName": "llc_data_read_demand_plus_prefetch_miss_latency_for_remote_requests",
+ "ScaleUnit": "1ns"
+ },
+ {
+ "BriefDescription": "Ratio of number of data read requests missing last level core cache (includes demand w/ prefetches) to the total number of completed instructions",
+ "MetricExpr": "cha@UNC_CHA_TOR_INSERTS.IA_MISS\\,config1\\=0x12D40433@ / INST_RETIRED.ANY",
+ "MetricName": "llc_data_read_mpi_demand_plus_prefetch",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "Bandwidth (MB/sec) of read requests that miss the last level cache (LLC) and go to local memory.",
+ "MetricExpr": "UNC_CHA_REQUESTS.READS_LOCAL * 64 / 1e6 / duration_time",
+ "MetricName": "llc_miss_local_memory_bandwidth_read",
+ "ScaleUnit": "1MB/s"
+ },
+ {
+ "BriefDescription": "Bandwidth (MB/sec) of write requests that miss the last level cache (LLC) and go to local memory.",
+ "MetricExpr": "UNC_CHA_REQUESTS.WRITES_LOCAL * 64 / 1e6 / duration_time",
+ "MetricName": "llc_miss_local_memory_bandwidth_write",
+ "ScaleUnit": "1MB/s"
+ },
+ {
+ "BriefDescription": "Bandwidth (MB/sec) of read requests that miss the last level cache (LLC) and go to remote memory.",
+ "MetricExpr": "UNC_CHA_REQUESTS.READS_REMOTE * 64 / 1e6 / duration_time",
+ "MetricName": "llc_miss_remote_memory_bandwidth_read",
+ "ScaleUnit": "1MB/s"
+ },
+ {
+ "BriefDescription": "The ratio of number of completed memory load instructions to the total number completed instructions",
+ "MetricExpr": "MEM_INST_RETIRED.ALL_LOADS / INST_RETIRED.ANY",
+ "MetricName": "loads_per_instr",
+ "ScaleUnit": "1per_instr"
+ },
+ {
+ "BriefDescription": "DDR memory read bandwidth (MB/sec)",
+ "MetricExpr": "UNC_M_CAS_COUNT.RD * 64 / 1e6 / duration_time",
+ "MetricName": "memory_bandwidth_read",
+ "ScaleUnit": "1MB/s"
+ },
+ {
+ "BriefDescription": "DDR memory bandwidth (MB/sec)",
+ "MetricExpr": "(UNC_M_CAS_COUNT.RD + UNC_M_CAS_COUNT.WR) * 64 / 1e6 / duration_time",
+ "MetricName": "memory_bandwidth_total",
+ "ScaleUnit": "1MB/s"
+ },
+ {
+ "BriefDescription": "DDR memory write bandwidth (MB/sec)",
+ "MetricExpr": "UNC_M_CAS_COUNT.WR * 64 / 1e6 / duration_time",
+ "MetricName": "memory_bandwidth_write",
+ "ScaleUnit": "1MB/s"
+ },
+ {
+ "BriefDescription": "Memory read that miss the last level cache (LLC) addressed to local DRAM as a percentage of total memory read accesses, does not include LLC prefetches.",
+ "MetricExpr": "cha@UNC_CHA_TOR_INSERTS.IA_MISS\\,config1\\=0x40432@ / (cha@UNC_CHA_TOR_INSERTS.IA_MISS\\,config1\\=0x40432@ + cha@UNC_CHA_TOR_INSERTS.IA_MISS\\,config1\\=0x40431@)",
+ "MetricName": "numa_reads_addressed_to_local_dram",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Memory reads that miss the last level cache (LLC) addressed to remote DRAM as a percentage of total memory read accesses, does not include LLC prefetches.",
+ "MetricExpr": "cha@UNC_CHA_TOR_INSERTS.IA_MISS\\,config1\\=0x40431@ / (cha@UNC_CHA_TOR_INSERTS.IA_MISS\\,config1\\=0x40432@ + cha@UNC_CHA_TOR_INSERTS.IA_MISS\\,config1\\=0x40431@)",
+ "MetricName": "numa_reads_addressed_to_remote_dram",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Uops delivered from decoded instruction cache (decoded stream buffer or DSB) as a percent of total uops delivered to Instruction Decode Queue",
+ "MetricExpr": "IDQ.DSB_UOPS / UOPS_ISSUED.ANY",
+ "MetricName": "percent_uops_delivered_from_decoded_icache",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Uops delivered from legacy decode pipeline (Micro-instruction Translation Engine or MITE) as a percent of total uops delivered to Instruction Decode Queue",
+ "MetricExpr": "IDQ.MITE_UOPS / UOPS_ISSUED.ANY",
+ "MetricName": "percent_uops_delivered_from_legacy_decode_pipeline",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Uops delivered from microcode sequencer (MS) as a percent of total uops delivered to Instruction Decode Queue",
+ "MetricExpr": "IDQ.MS_UOPS / UOPS_ISSUED.ANY",
+ "MetricName": "percent_uops_delivered_from_microcode_sequencer",
+ "ScaleUnit": "100%"
+ },
+ {
"BriefDescription": "Percentage of cycles spent in System Management Interrupts.",
"MetricExpr": "((msr@aperf@ - cycles) / msr@aperf@ if msr@smi@ > 0 else 0)",
"MetricGroup": "smi",
@@ -70,8 +279,14 @@
"ScaleUnit": "1SMI#"
},
{
+ "BriefDescription": "The ratio of number of completed memory store instructions to the total number completed instructions",
+ "MetricExpr": "MEM_INST_RETIRED.ALL_STORES / INST_RETIRED.ANY",
+ "MetricName": "stores_per_instr",
+ "ScaleUnit": "1per_instr"
+ },
+ {
"BriefDescription": "This metric estimates how often memory load accesses were aliased by preceding stores (in program order) with a 4K address offset",
- "MetricExpr": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS / tma_info_clks",
+ "MetricExpr": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_4k_aliasing",
"MetricThreshold": "tma_4k_aliasing > 0.2 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -80,7 +295,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution ports for ALU operations.",
- "MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_0 + UOPS_DISPATCHED_PORT.PORT_1 + UOPS_DISPATCHED_PORT.PORT_5 + UOPS_DISPATCHED_PORT.PORT_6) / tma_info_slots",
+ "MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_0 + UOPS_DISPATCHED_PORT.PORT_1 + UOPS_DISPATCHED_PORT.PORT_5 + UOPS_DISPATCHED_PORT.PORT_6) / tma_info_thread_slots",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_alu_op_utilization",
"MetricThreshold": "tma_alu_op_utilization > 0.6",
@@ -88,7 +303,7 @@
},
{
"BriefDescription": "This metric estimates fraction of slots the CPU retired uops delivered by the Microcode_Sequencer as a result of Assists",
- "MetricExpr": "100 * (FP_ASSIST.ANY + OTHER_ASSISTS.ANY) / tma_info_slots",
+ "MetricExpr": "100 * (FP_ASSIST.ANY + OTHER_ASSISTS.ANY) / tma_info_thread_slots",
"MetricGroup": "TopdownL4;tma_L4_group;tma_microcode_sequencer_group",
"MetricName": "tma_assists",
"MetricThreshold": "tma_assists > 0.1 & (tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1)",
@@ -97,7 +312,7 @@
},
{
"BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
- "MetricExpr": "1 - tma_frontend_bound - (UOPS_ISSUED.ANY + 4 * (INT_MISC.RECOVERY_CYCLES_ANY / 2 if #SMT_on else INT_MISC.RECOVERY_CYCLES)) / tma_info_slots",
+ "MetricExpr": "1 - tma_frontend_bound - (UOPS_ISSUED.ANY + 4 * (INT_MISC.RECOVERY_CYCLES_ANY / 2 if #SMT_on else INT_MISC.RECOVERY_CYCLES)) / tma_info_thread_slots",
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
"MetricThreshold": "tma_backend_bound > 0.2",
@@ -107,7 +322,7 @@
},
{
"BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations",
- "MetricExpr": "(UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (INT_MISC.RECOVERY_CYCLES_ANY / 2 if #SMT_on else INT_MISC.RECOVERY_CYCLES)) / tma_info_slots",
+ "MetricExpr": "(UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (INT_MISC.RECOVERY_CYCLES_ANY / 2 if #SMT_on else INT_MISC.RECOVERY_CYCLES)) / tma_info_thread_slots",
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_bad_speculation",
"MetricThreshold": "tma_bad_speculation > 0.15",
@@ -123,12 +338,12 @@
"MetricName": "tma_branch_mispredicts",
"MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15",
"MetricgroupNoGroup": "TopdownL2",
- "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES. Related metrics: tma_info_branch_misprediction_cost, tma_info_mispredictions, tma_mispredicts_resteers",
+ "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES. Related metrics: tma_info_bad_spec_branch_misprediction_cost, tma_info_bottleneck_mispredictions, tma_mispredicts_resteers",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers",
- "MetricExpr": "INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_clks + tma_unknown_branches",
+ "MetricExpr": "INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_thread_clks + tma_unknown_branches",
"MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_branch_resteers",
"MetricThreshold": "tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -146,7 +361,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers as a result of Machine Clears",
- "MetricExpr": "(1 - BR_MISP_RETIRED.ALL_BRANCHES / (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT)) * INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_clks",
+ "MetricExpr": "(1 - BR_MISP_RETIRED.ALL_BRANCHES / (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT)) * INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_thread_clks",
"MetricGroup": "BadSpec;MachineClears;TopdownL4;tma_L4_group;tma_branch_resteers_group;tma_issueMC",
"MetricName": "tma_clears_resteers",
"MetricThreshold": "tma_clears_resteers > 0.05 & (tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15))",
@@ -156,7 +371,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to contested accesses",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(44 * tma_info_average_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM * (OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE / (OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE + OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) + 44 * tma_info_average_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_clks",
+ "MetricExpr": "(44 * tma_info_system_average_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM * (OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE / (OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE + OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) + 44 * tma_info_system_average_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
"MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
"MetricName": "tma_contested_accesses",
"MetricThreshold": "tma_contested_accesses > 0.05 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -177,7 +392,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to data-sharing accesses",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "44 * tma_info_average_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM * (1 - OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE / (OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE + OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_clks",
+ "MetricExpr": "44 * tma_info_system_average_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM * (1 - OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE / (OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE + OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
"MetricGroup": "Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
"MetricName": "tma_data_sharing",
"MetricThreshold": "tma_data_sharing > 0.05 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -186,16 +401,16 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where decoder-0 was the only active decoder",
- "MetricExpr": "(cpu@INST_DECODED.DECODERS\\,cmask\\=1@ - cpu@INST_DECODED.DECODERS\\,cmask\\=2@) / tma_info_core_clks / 2",
+ "MetricExpr": "(cpu@INST_DECODED.DECODERS\\,cmask\\=1@ - cpu@INST_DECODED.DECODERS\\,cmask\\=2@) / tma_info_core_core_clks / 2",
"MetricGroup": "DSBmiss;FetchBW;TopdownL4;tma_L4_group;tma_issueD0;tma_mite_group",
"MetricName": "tma_decoder0_alone",
- "MetricThreshold": "tma_decoder0_alone > 0.1 & (tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35))",
+ "MetricThreshold": "tma_decoder0_alone > 0.1 & (tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35))",
"PublicDescription": "This metric represents fraction of cycles where decoder-0 was the only active decoder. Related metrics: tma_few_uops_instructions",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of cycles where the Divider unit was active",
- "MetricExpr": "ARITH.DIVIDER_ACTIVE / tma_info_clks",
+ "MetricExpr": "ARITH.DIVIDER_ACTIVE / tma_info_thread_clks",
"MetricGroup": "TopdownL3;tma_L3_group;tma_core_bound_group",
"MetricName": "tma_divider",
"MetricThreshold": "tma_divider > 0.2 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
@@ -205,7 +420,7 @@
{
"BriefDescription": "This metric estimates how often the CPU was stalled on accesses to external memory (DRAM) by loads",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_clks - tma_l2_bound",
+ "MetricExpr": "CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks - tma_l2_bound",
"MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_dram_bound",
"MetricThreshold": "tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -214,45 +429,45 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline",
- "MetricExpr": "(IDQ.ALL_DSB_CYCLES_ANY_UOPS - IDQ.ALL_DSB_CYCLES_4_UOPS) / tma_info_core_clks / 2",
+ "MetricExpr": "(IDQ.ALL_DSB_CYCLES_ANY_UOPS - IDQ.ALL_DSB_CYCLES_4_UOPS) / tma_info_core_core_clks / 2",
"MetricGroup": "DSB;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_dsb",
- "MetricThreshold": "tma_dsb > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35)",
+ "MetricThreshold": "tma_dsb > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35)",
"PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline. For example; inefficient utilization of the DSB cache structure or bank conflict when reading from it; are categorized here.",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines",
- "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / tma_info_clks",
+ "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / tma_info_thread_clks",
"MetricGroup": "DSBmiss;FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB",
"MetricName": "tma_dsb_switches",
"MetricThreshold": "tma_dsb_switches > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
- "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines. The DSB (decoded i-cache) is a Uop Cache where the front-end directly delivers Uops (micro operations) avoiding heavy x86 decoding. The DSB pipeline has shorter latency and delivered higher bandwidth than the MITE (legacy instruction decode pipeline). Switching between the two pipelines can cause penalties hence this metric measures the exposed penalty. Sample with: FRONTEND_RETIRED.DSB_MISS_PS. Related metrics: tma_fetch_bandwidth, tma_info_dsb_coverage, tma_info_dsb_misses, tma_info_iptb, tma_lcp",
+ "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines. The DSB (decoded i-cache) is a Uop Cache where the front-end directly delivers Uops (micro operations) avoiding heavy x86 decoding. The DSB pipeline has shorter latency and delivered higher bandwidth than the MITE (legacy instruction decode pipeline). Switching between the two pipelines can cause penalties hence this metric measures the exposed penalty. Sample with: FRONTEND_RETIRED.DSB_MISS_PS. Related metrics: tma_fetch_bandwidth, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses",
"MetricConstraint": "NO_GROUP_EVENTS_NMI",
- "MetricExpr": "min(9 * cpu@DTLB_LOAD_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_LOAD_MISSES.WALK_ACTIVE, max(CYCLE_ACTIVITY.CYCLES_MEM_ANY - CYCLE_ACTIVITY.CYCLES_L1D_MISS, 0)) / tma_info_clks",
+ "MetricExpr": "min(9 * cpu@DTLB_LOAD_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_LOAD_MISSES.WALK_ACTIVE, max(CYCLE_ACTIVITY.CYCLES_MEM_ANY - CYCLE_ACTIVITY.CYCLES_L1D_MISS, 0)) / tma_info_thread_clks",
"MetricGroup": "MemoryTLB;TopdownL4;tma_L4_group;tma_issueTLB;tma_l1_bound_group",
"MetricName": "tma_dtlb_load",
"MetricThreshold": "tma_dtlb_load > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses. TLBs (Translation Look-aside Buffers) are processor caches for recently used entries out of the Page Tables that are used to map virtual- to physical-addresses by the operating system. This metric approximates the potential delay of demand loads missing the first-level data TLB (assuming worst case scenario with back to back misses to different pages). This includes hitting in the second-level TLB (STLB) as well as performing a hardware page walk on an STLB miss. Sample with: MEM_INST_RETIRED.STLB_MISS_LOADS_PS. Related metrics: tma_dtlb_store, tma_info_memory_data_tlbs",
+ "PublicDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses. TLBs (Translation Look-aside Buffers) are processor caches for recently used entries out of the Page Tables that are used to map virtual- to physical-addresses by the operating system. This metric approximates the potential delay of demand loads missing the first-level data TLB (assuming worst case scenario with back to back misses to different pages). This includes hitting in the second-level TLB (STLB) as well as performing a hardware page walk on an STLB miss. Sample with: MEM_INST_RETIRED.STLB_MISS_LOADS_PS. Related metrics: tma_dtlb_store, tma_info_bottleneck_memory_data_tlbs",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses",
- "MetricExpr": "(9 * cpu@DTLB_STORE_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_STORE_MISSES.WALK_ACTIVE) / tma_info_core_clks",
+ "MetricExpr": "(9 * cpu@DTLB_STORE_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_STORE_MISSES.WALK_ACTIVE) / tma_info_core_core_clks",
"MetricGroup": "MemoryTLB;TopdownL4;tma_L4_group;tma_issueTLB;tma_store_bound_group",
"MetricName": "tma_dtlb_store",
"MetricThreshold": "tma_dtlb_store > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses. As with ordinary data caching; focus on improving data locality and reducing working-set size to reduce DTLB overhead. Additionally; consider using profile-guided optimization (PGO) to collocate frequently-used data on the same page. Try using larger page sizes for large amounts of frequently-used data. Sample with: MEM_INST_RETIRED.STLB_MISS_STORES_PS. Related metrics: tma_dtlb_load, tma_info_memory_data_tlbs",
+ "PublicDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses. As with ordinary data caching; focus on improving data locality and reducing working-set size to reduce DTLB overhead. Additionally; consider using profile-guided optimization (PGO) to collocate frequently-used data on the same page. Try using larger page sizes for large amounts of frequently-used data. Sample with: MEM_INST_RETIRED.STLB_MISS_STORES_PS. Related metrics: tma_dtlb_load, tma_info_bottleneck_memory_data_tlbs",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric roughly estimates how often CPU was handling synchronizations due to False Sharing",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(110 * tma_info_average_frequency * (OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.REMOTE_HITM + OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.REMOTE_HITM) + 47.5 * tma_info_average_frequency * (OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HITM_OTHER_CORE + OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.HITM_OTHER_CORE)) / tma_info_clks",
+ "MetricExpr": "(110 * tma_info_system_average_frequency * (OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.REMOTE_HITM + OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.REMOTE_HITM) + 47.5 * tma_info_system_average_frequency * (OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HITM_OTHER_CORE + OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.HITM_OTHER_CORE)) / tma_info_thread_clks",
"MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_store_bound_group",
"MetricName": "tma_false_sharing",
"MetricThreshold": "tma_false_sharing > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -262,11 +477,11 @@
{
"BriefDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed",
"MetricConstraint": "NO_GROUP_EVENTS_NMI",
- "MetricExpr": "tma_info_load_miss_real_latency * cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ / tma_info_clks",
+ "MetricExpr": "tma_info_memory_load_miss_real_latency * cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ / tma_info_thread_clks",
"MetricGroup": "MemoryBW;TopdownL4;tma_L4_group;tma_issueBW;tma_issueSL;tma_issueSmSt;tma_l1_bound_group",
"MetricName": "tma_fb_full",
"MetricThreshold": "tma_fb_full > 0.3",
- "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_info_dram_bw_use, tma_info_memory_bandwidth, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
+ "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_info_bottleneck_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
"ScaleUnit": "100%"
},
{
@@ -274,14 +489,14 @@
"MetricExpr": "tma_frontend_bound - tma_fetch_latency",
"MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB",
"MetricName": "tma_fetch_bandwidth",
- "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35",
+ "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35",
"MetricgroupNoGroup": "TopdownL2",
- "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Sample with: FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_2_PS. Related metrics: tma_dsb_switches, tma_info_dsb_coverage, tma_info_dsb_misses, tma_info_iptb, tma_lcp",
+ "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Sample with: FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_2_PS. Related metrics: tma_dsb_switches, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues",
- "MetricExpr": "4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / tma_info_slots",
+ "MetricExpr": "4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / tma_info_thread_slots",
"MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group",
"MetricName": "tma_fetch_latency",
"MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15",
@@ -356,7 +571,7 @@
},
{
"BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend",
- "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / tma_info_slots",
+ "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / tma_info_thread_slots",
"MetricGroup": "PGO;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_frontend_bound",
"MetricThreshold": "tma_frontend_bound > 0.15",
@@ -375,7 +590,7 @@
},
{
"BriefDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences",
- "MetricExpr": "(UOPS_RETIRED.RETIRE_SLOTS + UOPS_RETIRED.MACRO_FUSED - INST_RETIRED.ANY) / tma_info_slots",
+ "MetricExpr": "(UOPS_RETIRED.RETIRE_SLOTS + UOPS_RETIRED.MACRO_FUSED - INST_RETIRED.ANY) / tma_info_thread_slots",
"MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_heavy_operations",
"MetricThreshold": "tma_heavy_operations > 0.1",
@@ -385,7 +600,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to instruction cache misses",
- "MetricExpr": "(ICACHE_16B.IFDATA_STALL + 2 * cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=1\\,edge@) / tma_info_clks",
+ "MetricExpr": "(ICACHE_16B.IFDATA_STALL + 2 * cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=1\\,edge@) / tma_info_thread_clks",
"MetricGroup": "BigFoot;FetchLat;IcMiss;TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_icache_misses",
"MetricThreshold": "tma_icache_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -393,686 +608,692 @@
"ScaleUnit": "100%"
},
{
- "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
- "MetricExpr": "tma_info_turbo_utilization * TSC / 1e9 / duration_time",
- "MetricGroup": "Power;Summary",
- "MetricName": "tma_info_average_frequency"
+ "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
+ "MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_thread_slots / BR_MISP_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;BrMispredicts;tma_issueBM",
+ "MetricName": "tma_info_bad_spec_branch_misprediction_cost",
+ "PublicDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear). Related metrics: tma_branch_mispredicts, tma_info_bottleneck_mispredictions, tma_mispredicts_resteers"
+ },
+ {
+ "BriefDescription": "Instructions per retired mispredicts for indirect CALL or JMP branches (lower number means higher occurrence rate).",
+ "MetricExpr": "tma_info_inst_mix_instructions / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * cpu@BR_MISP_EXEC.ALL_BRANCHES\\,umask\\=0xE4@)",
+ "MetricGroup": "Bad;BrMispredicts",
+ "MetricName": "tma_info_bad_spec_ipmisp_indirect",
+ "MetricThreshold": "tma_info_bad_spec_ipmisp_indirect < 1e3"
+ },
+ {
+ "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear) (lower number means higher occurrence rate)",
+ "MetricExpr": "tma_info_core_ipmispredict",
+ "MetricGroup": "Bad;BadSpec;BrMispredicts",
+ "MetricName": "tma_info_bad_spec_ipmispredict",
+ "MetricThreshold": "tma_info_bad_spec_ipmispredict < 200"
+ },
+ {
+ "BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "(100 * (1 - tma_core_bound / tma_ports_utilization if tma_core_bound < tma_ports_utilization else 1) if tma_info_system_smt_2t_utilization > 0.5 else 0)",
+ "MetricGroup": "Cor;SMT",
+ "MetricName": "tma_info_botlnk_l0_core_bound_likely",
+ "MetricThreshold": "tma_info_botlnk_l0_core_bound_likely > 0.5"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_fetch_latency * tma_dsb_switches / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_mite / (tma_dsb + tma_mite))",
+ "MetricGroup": "DSBmiss;Fed;tma_issueFB",
+ "MetricName": "tma_info_botlnk_l2_dsb_misses",
+ "MetricThreshold": "tma_info_botlnk_l2_dsb_misses > 10",
+ "PublicDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck",
+ "MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
+ "MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL",
+ "MetricName": "tma_info_botlnk_l2_ic_misses",
+ "MetricThreshold": "tma_info_botlnk_l2_ic_misses > 5",
+ "PublicDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck. Related metrics: "
},
{
"BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)",
"MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * tma_fetch_latency * (tma_itlb_misses + tma_icache_misses + tma_unknown_branches) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)",
"MetricGroup": "BigFoot;Fed;Frontend;IcMiss;MemoryTLB;tma_issueBC",
- "MetricName": "tma_info_big_code",
- "MetricThreshold": "tma_info_big_code > 20",
- "PublicDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses). Related metrics: tma_info_branching_overhead"
+ "MetricName": "tma_info_bottleneck_big_code",
+ "MetricThreshold": "tma_info_bottleneck_big_code > 20",
+ "PublicDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses). Related metrics: tma_info_bottleneck_branching_overhead"
},
{
- "BriefDescription": "Branch instructions per taken branch.",
- "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
- "MetricGroup": "Branches;Fed;PGO",
- "MetricName": "tma_info_bptkbranch"
+ "BriefDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls)",
+ "MetricExpr": "100 * ((BR_INST_RETIRED.CONDITIONAL + 3 * BR_INST_RETIRED.NEAR_CALL + (BR_INST_RETIRED.NEAR_TAKEN - (BR_INST_RETIRED.CONDITIONAL - BR_INST_RETIRED.NOT_TAKEN) - 2 * BR_INST_RETIRED.NEAR_CALL)) / tma_info_thread_slots)",
+ "MetricGroup": "Ret;tma_issueBC",
+ "MetricName": "tma_info_bottleneck_branching_overhead",
+ "MetricThreshold": "tma_info_bottleneck_branching_overhead > 10",
+ "PublicDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls). Related metrics: tma_info_bottleneck_big_code"
},
{
- "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
- "MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_slots / BR_MISP_RETIRED.ALL_BRANCHES",
- "MetricGroup": "Bad;BrMispredicts;tma_issueBM",
- "MetricName": "tma_info_branch_misprediction_cost",
- "PublicDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear). Related metrics: tma_branch_mispredicts, tma_info_mispredictions, tma_mispredicts_resteers"
+ "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_frontend_bound - tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_info_bottleneck_big_code",
+ "MetricGroup": "Fed;FetchBW;Frontend",
+ "MetricName": "tma_info_bottleneck_instruction_fetch_bw",
+ "MetricThreshold": "tma_info_bottleneck_instruction_fetch_bw > 20"
},
{
- "BriefDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls)",
- "MetricExpr": "100 * ((BR_INST_RETIRED.CONDITIONAL + 3 * BR_INST_RETIRED.NEAR_CALL + (BR_INST_RETIRED.NEAR_TAKEN - (BR_INST_RETIRED.CONDITIONAL - BR_INST_RETIRED.NOT_TAKEN) - 2 * BR_INST_RETIRED.NEAR_CALL)) / tma_info_slots)",
- "MetricGroup": "Ret;tma_issueBC",
- "MetricName": "tma_info_branching_overhead",
- "MetricThreshold": "tma_info_branching_overhead > 10",
- "PublicDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls). Related metrics: tma_info_big_code"
+ "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_fb_full / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))",
+ "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
+ "MetricName": "tma_info_bottleneck_memory_bandwidth",
+ "MetricThreshold": "tma_info_bottleneck_memory_bandwidth > 20",
+ "PublicDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full"
},
{
- "BriefDescription": "Fraction of branches that are CALL or RET",
- "MetricExpr": "(BR_INST_RETIRED.NEAR_CALL + BR_INST_RETIRED.NEAR_RETURN) / BR_INST_RETIRED.ALL_BRANCHES",
- "MetricGroup": "Bad;Branches",
- "MetricName": "tma_info_callret"
+ "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_load / max(tma_l1_bound, tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency)))",
+ "MetricGroup": "Mem;MemoryTLB;Offcore;tma_issueTLB",
+ "MetricName": "tma_info_bottleneck_memory_data_tlbs",
+ "MetricThreshold": "tma_info_bottleneck_memory_data_tlbs > 20",
+ "PublicDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs). Related metrics: tma_dtlb_load, tma_dtlb_store"
},
{
- "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
- "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
- "MetricGroup": "Pipeline",
- "MetricName": "tma_info_clks"
+ "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound))",
+ "MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat",
+ "MetricName": "tma_info_bottleneck_memory_latency",
+ "MetricThreshold": "tma_info_bottleneck_memory_latency > 20",
+ "PublicDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches). Related metrics: tma_l3_hit_latency, tma_mem_latency"
},
{
- "BriefDescription": "STLB (2nd level TLB) code speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
- "MetricExpr": "1e3 * ITLB_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
- "MetricGroup": "Fed;MemoryTLB",
- "MetricName": "tma_info_code_stlb_mpki"
+ "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
+ "MetricGroup": "Bad;BadSpec;BrMispredicts;tma_issueBM",
+ "MetricName": "tma_info_bottleneck_mispredictions",
+ "MetricThreshold": "tma_info_bottleneck_mispredictions > 20",
+ "PublicDescription": "Total pipeline cost of Branch Misprediction related bottlenecks. Related metrics: tma_branch_mispredicts, tma_info_bad_spec_branch_misprediction_cost, tma_mispredicts_resteers"
+ },
+ {
+ "BriefDescription": "Fraction of branches that are CALL or RET",
+ "MetricExpr": "(BR_INST_RETIRED.NEAR_CALL + BR_INST_RETIRED.NEAR_RETURN) / BR_INST_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;Branches",
+ "MetricName": "tma_info_branches_callret"
},
{
"BriefDescription": "Fraction of branches that are non-taken conditionals",
"MetricExpr": "BR_INST_RETIRED.NOT_TAKEN / BR_INST_RETIRED.ALL_BRANCHES",
"MetricGroup": "Bad;Branches;CodeGen;PGO",
- "MetricName": "tma_info_cond_nt"
+ "MetricName": "tma_info_branches_cond_nt"
},
{
"BriefDescription": "Fraction of branches that are taken conditionals",
"MetricExpr": "(BR_INST_RETIRED.CONDITIONAL - BR_INST_RETIRED.NOT_TAKEN) / BR_INST_RETIRED.ALL_BRANCHES",
"MetricGroup": "Bad;Branches;CodeGen;PGO",
- "MetricName": "tma_info_cond_tk"
+ "MetricName": "tma_info_branches_cond_tk"
},
{
- "BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts",
+ "BriefDescription": "Fraction of branches that are unconditional (direct or indirect) jumps",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(100 * (1 - tma_core_bound / tma_ports_utilization if tma_core_bound < tma_ports_utilization else 1) if tma_info_smt_2t_utilization > 0.5 else 0)",
- "MetricGroup": "Cor;SMT",
- "MetricName": "tma_info_core_bound_likely",
- "MetricThreshold": "tma_info_core_bound_likely > 0.5"
+ "MetricExpr": "(BR_INST_RETIRED.NEAR_TAKEN - (BR_INST_RETIRED.CONDITIONAL - BR_INST_RETIRED.NOT_TAKEN) - 2 * BR_INST_RETIRED.NEAR_CALL) / BR_INST_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;Branches",
+ "MetricName": "tma_info_branches_jump"
},
{
"BriefDescription": "Core actual clocks when any Logical Processor is active on the Physical Core",
- "MetricExpr": "(CPU_CLK_UNHALTED.THREAD / 2 * (1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK) if #core_wide < 1 else (CPU_CLK_UNHALTED.THREAD_ANY / 2 if #SMT_on else tma_info_clks))",
+ "MetricExpr": "(CPU_CLK_UNHALTED.THREAD / 2 * (1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK) if #core_wide < 1 else (CPU_CLK_UNHALTED.THREAD_ANY / 2 if #SMT_on else tma_info_thread_clks))",
"MetricGroup": "SMT",
- "MetricName": "tma_info_core_clks"
+ "MetricName": "tma_info_core_core_clks"
},
{
"BriefDescription": "Instructions Per Cycle across hyper-threads (per physical core)",
- "MetricExpr": "INST_RETIRED.ANY / tma_info_core_clks",
+ "MetricExpr": "INST_RETIRED.ANY / tma_info_core_core_clks",
"MetricGroup": "Ret;SMT;TmaL1;tma_L1_group",
- "MetricName": "tma_info_coreipc"
+ "MetricName": "tma_info_core_coreipc"
},
{
- "BriefDescription": "Cycles Per Instruction (per Logical Processor)",
- "MetricExpr": "1 / tma_info_ipc",
- "MetricGroup": "Mem;Pipeline",
- "MetricName": "tma_info_cpi"
+ "BriefDescription": "Floating Point Operations Per Cycle",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * (FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE) + 8 * (FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / tma_info_core_core_clks",
+ "MetricGroup": "Flops;Ret",
+ "MetricName": "tma_info_core_flopc"
},
{
- "BriefDescription": "Average CPU Utilization",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
- "MetricGroup": "HPC;Summary",
- "MetricName": "tma_info_cpu_utilization"
+ "BriefDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width)",
+ "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0xfc@) / (2 * tma_info_core_core_clks)",
+ "MetricGroup": "Cor;Flops;HPC",
+ "MetricName": "tma_info_core_fp_arith_utilization",
+ "PublicDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width). Values > 1 are possible due to ([BDW+] Fused-Multiply Add (FMA) counting - common; [ADL+] use all of ADD/MUL/FMA in Scalar or 128/256-bit vectors - less common)."
},
{
- "BriefDescription": "Average Parallel L2 cache miss data reads",
- "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
- "MetricGroup": "Memory_BW;Offcore",
- "MetricName": "tma_info_data_l2_mlp"
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / (UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)",
+ "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
+ "MetricName": "tma_info_core_ilp"
},
{
- "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
- "MetricExpr": "64 * (UNC_M_CAS_COUNT.RD + UNC_M_CAS_COUNT.WR) / 1e9 / duration_time",
- "MetricGroup": "HPC;Mem;MemoryBW;SoC;tma_issueBW",
- "MetricName": "tma_info_dram_bw_use",
- "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_fb_full, tma_info_memory_bandwidth, tma_mem_bandwidth, tma_sq_full"
+ "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear)",
+ "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;BadSpec;BrMispredicts;TopdownL1;tma_L1_group",
+ "MetricName": "tma_info_core_ipmispredict",
+ "MetricgroupNoGroup": "TopdownL1"
},
{
"BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
"MetricExpr": "IDQ.DSB_UOPS / (IDQ.DSB_UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)",
"MetricGroup": "DSB;Fed;FetchBW;tma_issueFB",
- "MetricName": "tma_info_dsb_coverage",
- "MetricThreshold": "tma_info_dsb_coverage < 0.7 & tma_info_ipc / 4 > 0.35",
- "PublicDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache). Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_dsb_misses, tma_info_iptb, tma_lcp"
- },
- {
- "BriefDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck",
- "MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "100 * (tma_fetch_latency * tma_dsb_switches / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_mite / (tma_dsb + tma_mite))",
- "MetricGroup": "DSBmiss;Fed;tma_issueFB",
- "MetricName": "tma_info_dsb_misses",
- "MetricThreshold": "tma_info_dsb_misses > 10",
- "PublicDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_dsb_coverage, tma_info_iptb, tma_lcp"
+ "MetricName": "tma_info_frontend_dsb_coverage",
+ "MetricThreshold": "tma_info_frontend_dsb_coverage < 0.7 & tma_info_thread_ipc / 4 > 0.35",
+ "PublicDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache). Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_botlnk_l2_dsb_misses, tma_info_inst_mix_iptb, tma_lcp"
},
{
"BriefDescription": "Average number of cycles of a switch from the DSB fetch-unit to MITE fetch unit - see DSB_Switches tree node for details.",
"MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / DSB2MITE_SWITCHES.COUNT",
"MetricGroup": "DSBmiss",
- "MetricName": "tma_info_dsb_switch_cost"
- },
- {
- "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-thread",
- "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
- "MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
- "MetricName": "tma_info_execute"
- },
- {
- "BriefDescription": "The ratio of Executed- by Issued-Uops",
- "MetricExpr": "UOPS_EXECUTED.THREAD / UOPS_ISSUED.ANY",
- "MetricGroup": "Cor;Pipeline",
- "MetricName": "tma_info_execute_per_issue",
- "PublicDescription": "The ratio of Executed- by Issued-Uops. Ratio > 1 suggests high rate of uop micro-fusions. Ratio < 1 suggest high rate of \"execute\" at rename stage."
- },
- {
- "BriefDescription": "Fill Buffer (FB) hits per kilo instructions for retired demand loads (L1D misses that merge into ongoing miss-handling entries)",
- "MetricExpr": "1e3 * MEM_LOAD_RETIRED.FB_HIT / INST_RETIRED.ANY",
- "MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_fb_hpki"
+ "MetricName": "tma_info_frontend_dsb_switch_cost"
},
{
"BriefDescription": "Average number of Uops issued by front-end when it issued something",
"MetricExpr": "UOPS_ISSUED.ANY / cpu@UOPS_ISSUED.ANY\\,cmask\\=1@",
"MetricGroup": "Fed;FetchBW",
- "MetricName": "tma_info_fetch_upc"
- },
- {
- "BriefDescription": "Floating Point Operations Per Cycle",
- "MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * (FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE) + 8 * (FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / tma_info_core_clks",
- "MetricGroup": "Flops;Ret",
- "MetricName": "tma_info_flopc"
+ "MetricName": "tma_info_frontend_fetch_upc"
},
{
- "BriefDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width)",
- "MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0xfc@) / (2 * tma_info_core_clks)",
- "MetricGroup": "Cor;Flops;HPC",
- "MetricName": "tma_info_fp_arith_utilization",
- "PublicDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width). Values > 1 are possible due to ([BDW+] Fused-Multiply Add (FMA) counting - common; [ADL+] use all of ADD/MUL/FMA in Scalar or 128/256-bit vectors - less common)."
+ "BriefDescription": "Average Latency for L1 instruction cache misses",
+ "MetricExpr": "ICACHE_16B.IFDATA_STALL / cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=1\\,edge@ + 2",
+ "MetricGroup": "Fed;FetchLat;IcMiss",
+ "MetricName": "tma_info_frontend_icache_miss_latency"
},
{
- "BriefDescription": "Giga Floating Point Operations Per Second",
- "MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * (FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE) + 8 * (FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / 1e9 / duration_time",
- "MetricGroup": "Cor;Flops;HPC",
- "MetricName": "tma_info_gflops",
- "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width and AMX engine."
+ "BriefDescription": "Instructions per non-speculative DSB miss (lower number means higher occurrence rate)",
+ "MetricExpr": "INST_RETIRED.ANY / FRONTEND_RETIRED.ANY_DSB_MISS",
+ "MetricGroup": "DSBmiss;Fed",
+ "MetricName": "tma_info_frontend_ipdsb_miss_ret",
+ "MetricThreshold": "tma_info_frontend_ipdsb_miss_ret < 50"
},
{
- "BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck",
- "MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
- "MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL",
- "MetricName": "tma_info_ic_misses",
- "MetricThreshold": "tma_info_ic_misses > 5",
- "PublicDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck. Related metrics: "
+ "BriefDescription": "Instructions per speculative Unknown Branch Misprediction (BAClear) (lower number means higher occurrence rate)",
+ "MetricExpr": "tma_info_inst_mix_instructions / BACLEARS.ANY",
+ "MetricGroup": "Fed",
+ "MetricName": "tma_info_frontend_ipunknown_branch"
},
{
- "BriefDescription": "Average Latency for L1 instruction cache misses",
- "MetricExpr": "ICACHE_16B.IFDATA_STALL / cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=1\\,edge@ + 2",
- "MetricGroup": "Fed;FetchLat;IcMiss",
- "MetricName": "tma_info_icache_miss_latency"
+ "BriefDescription": "L2 cache true code cacheline misses per kilo instruction",
+ "MetricExpr": "1e3 * FRONTEND_RETIRED.L2_MISS / INST_RETIRED.ANY",
+ "MetricGroup": "IcMiss",
+ "MetricName": "tma_info_frontend_l2mpki_code"
},
{
- "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
- "MetricExpr": "UOPS_EXECUTED.THREAD / (UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)",
- "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
- "MetricName": "tma_info_ilp"
+ "BriefDescription": "L2 cache speculative code cacheline misses per kilo instruction",
+ "MetricExpr": "1e3 * L2_RQSTS.CODE_RD_MISS / INST_RETIRED.ANY",
+ "MetricGroup": "IcMiss",
+ "MetricName": "tma_info_frontend_l2mpki_code_all"
},
{
- "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks",
- "MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "100 * (tma_frontend_bound - tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_info_big_code",
- "MetricGroup": "Fed;FetchBW;Frontend",
- "MetricName": "tma_info_instruction_fetch_bw",
- "MetricThreshold": "tma_info_instruction_fetch_bw > 20"
+ "BriefDescription": "Branch instructions per taken branch.",
+ "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
+ "MetricGroup": "Branches;Fed;PGO",
+ "MetricName": "tma_info_inst_mix_bptkbranch"
},
{
"BriefDescription": "Total number of retired Instructions",
"MetricExpr": "INST_RETIRED.ANY",
"MetricGroup": "Summary;TmaL1;tma_L1_group",
- "MetricName": "tma_info_instructions",
+ "MetricName": "tma_info_inst_mix_instructions",
"PublicDescription": "Total number of retired Instructions. Sample with: INST_RETIRED.PREC_DIST"
},
{
- "BriefDescription": "Average IO (network or disk) Bandwidth Use for Reads [GB / sec]",
- "MetricExpr": "(UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3) * 4 / 1e9 / duration_time",
- "MetricGroup": "IoBW;Mem;Server;SoC",
- "MetricName": "tma_info_io_read_bw"
- },
- {
- "BriefDescription": "Average IO (network or disk) Bandwidth Use for Writes [GB / sec]",
- "MetricExpr": "(UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3) * 4 / 1e9 / duration_time",
- "MetricGroup": "IoBW;Mem;Server;SoC",
- "MetricName": "tma_info_io_write_bw"
- },
- {
"BriefDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate)",
"MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "INST_RETIRED.ANY / (cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0xfc@)",
"MetricGroup": "Flops;InsType",
- "MetricName": "tma_info_iparith",
- "MetricThreshold": "tma_info_iparith < 10",
+ "MetricName": "tma_info_inst_mix_iparith",
+ "MetricThreshold": "tma_info_inst_mix_iparith < 10",
"PublicDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate). May undercount due to FMA double counting. Approximated prior to BDW."
},
{
"BriefDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE)",
"MetricGroup": "Flops;FpVector;InsType",
- "MetricName": "tma_info_iparith_avx128",
- "MetricThreshold": "tma_info_iparith_avx128 < 10",
+ "MetricName": "tma_info_inst_mix_iparith_avx128",
+ "MetricThreshold": "tma_info_inst_mix_iparith_avx128 < 10",
"PublicDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
},
{
"BriefDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE)",
"MetricGroup": "Flops;FpVector;InsType",
- "MetricName": "tma_info_iparith_avx256",
- "MetricThreshold": "tma_info_iparith_avx256 < 10",
+ "MetricName": "tma_info_inst_mix_iparith_avx256",
+ "MetricThreshold": "tma_info_inst_mix_iparith_avx256 < 10",
"PublicDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
},
{
"BriefDescription": "Instructions per FP Arithmetic AVX 512-bit instruction (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE)",
"MetricGroup": "Flops;FpVector;InsType",
- "MetricName": "tma_info_iparith_avx512",
- "MetricThreshold": "tma_info_iparith_avx512 < 10",
+ "MetricName": "tma_info_inst_mix_iparith_avx512",
+ "MetricThreshold": "tma_info_inst_mix_iparith_avx512 < 10",
"PublicDescription": "Instructions per FP Arithmetic AVX 512-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
},
{
"BriefDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
"MetricGroup": "Flops;FpScalar;InsType",
- "MetricName": "tma_info_iparith_scalar_dp",
- "MetricThreshold": "tma_info_iparith_scalar_dp < 10",
+ "MetricName": "tma_info_inst_mix_iparith_scalar_dp",
+ "MetricThreshold": "tma_info_inst_mix_iparith_scalar_dp < 10",
"PublicDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
},
{
"BriefDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
"MetricGroup": "Flops;FpScalar;InsType",
- "MetricName": "tma_info_iparith_scalar_sp",
- "MetricThreshold": "tma_info_iparith_scalar_sp < 10",
+ "MetricName": "tma_info_inst_mix_iparith_scalar_sp",
+ "MetricThreshold": "tma_info_inst_mix_iparith_scalar_sp < 10",
"PublicDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
},
{
"BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
"MetricGroup": "Branches;Fed;InsType",
- "MetricName": "tma_info_ipbranch",
- "MetricThreshold": "tma_info_ipbranch < 8"
- },
- {
- "BriefDescription": "Instructions Per Cycle (per Logical Processor)",
- "MetricExpr": "INST_RETIRED.ANY / tma_info_clks",
- "MetricGroup": "Ret;Summary",
- "MetricName": "tma_info_ipc"
+ "MetricName": "tma_info_inst_mix_ipbranch",
+ "MetricThreshold": "tma_info_inst_mix_ipbranch < 8"
},
{
"BriefDescription": "Instructions per (near) call (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL",
"MetricGroup": "Branches;Fed;PGO",
- "MetricName": "tma_info_ipcall",
- "MetricThreshold": "tma_info_ipcall < 200"
- },
- {
- "BriefDescription": "Instructions per non-speculative DSB miss (lower number means higher occurrence rate)",
- "MetricExpr": "INST_RETIRED.ANY / FRONTEND_RETIRED.ANY_DSB_MISS",
- "MetricGroup": "DSBmiss;Fed",
- "MetricName": "tma_info_ipdsb_miss_ret",
- "MetricThreshold": "tma_info_ipdsb_miss_ret < 50"
- },
- {
- "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
- "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u",
- "MetricGroup": "Branches;OS",
- "MetricName": "tma_info_ipfarbranch",
- "MetricThreshold": "tma_info_ipfarbranch < 1e6"
+ "MetricName": "tma_info_inst_mix_ipcall",
+ "MetricThreshold": "tma_info_inst_mix_ipcall < 200"
},
{
"BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)",
"MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * (FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE) + 8 * (FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE)",
"MetricGroup": "Flops;InsType",
- "MetricName": "tma_info_ipflop",
- "MetricThreshold": "tma_info_ipflop < 10"
+ "MetricName": "tma_info_inst_mix_ipflop",
+ "MetricThreshold": "tma_info_inst_mix_ipflop < 10"
},
{
"BriefDescription": "Instructions per Load (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_LOADS",
"MetricGroup": "InsType",
- "MetricName": "tma_info_ipload",
- "MetricThreshold": "tma_info_ipload < 3"
- },
- {
- "BriefDescription": "Instructions per retired mispredicts for indirect CALL or JMP branches (lower number means higher occurrence rate).",
- "MetricExpr": "tma_info_instructions / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * cpu@BR_MISP_EXEC.ALL_BRANCHES\\,umask\\=0xE4@)",
- "MetricGroup": "Bad;BrMispredicts",
- "MetricName": "tma_info_ipmisp_indirect",
- "MetricThreshold": "tma_info_ipmisp_indirect < 1e3"
- },
- {
- "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear) (lower number means higher occurrence rate)",
- "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
- "MetricGroup": "Bad;BadSpec;BrMispredicts",
- "MetricName": "tma_info_ipmispredict",
- "MetricThreshold": "tma_info_ipmispredict < 200"
+ "MetricName": "tma_info_inst_mix_ipload",
+ "MetricThreshold": "tma_info_inst_mix_ipload < 3"
},
{
"BriefDescription": "Instructions per Store (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_STORES",
"MetricGroup": "InsType",
- "MetricName": "tma_info_ipstore",
- "MetricThreshold": "tma_info_ipstore < 8"
+ "MetricName": "tma_info_inst_mix_ipstore",
+ "MetricThreshold": "tma_info_inst_mix_ipstore < 8"
},
{
"BriefDescription": "Instructions per Software prefetch instruction (of any type: NTA/T0/T1/T2/Prefetch) (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / cpu@SW_PREFETCH_ACCESS.T0\\,umask\\=0xF@",
"MetricGroup": "Prefetches",
- "MetricName": "tma_info_ipswpf",
- "MetricThreshold": "tma_info_ipswpf < 100"
+ "MetricName": "tma_info_inst_mix_ipswpf",
+ "MetricThreshold": "tma_info_inst_mix_ipswpf < 100"
},
{
"BriefDescription": "Instruction per taken branch",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN",
"MetricGroup": "Branches;Fed;FetchBW;Frontend;PGO;tma_issueFB",
- "MetricName": "tma_info_iptb",
- "MetricThreshold": "tma_info_iptb < 9",
- "PublicDescription": "Instruction per taken branch. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_dsb_coverage, tma_info_dsb_misses, tma_lcp"
+ "MetricName": "tma_info_inst_mix_iptb",
+ "MetricThreshold": "tma_info_inst_mix_iptb < 9",
+ "PublicDescription": "Instruction per taken branch. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_lcp"
},
{
- "BriefDescription": "Instructions per speculative Unknown Branch Misprediction (BAClear) (lower number means higher occurrence rate)",
- "MetricExpr": "tma_info_instructions / BACLEARS.ANY",
- "MetricGroup": "Fed",
- "MetricName": "tma_info_ipunknown_branch"
+ "BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
+ "MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / duration_time",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_core_l1d_cache_fill_bw"
},
{
- "BriefDescription": "Fraction of branches that are unconditional (direct or indirect) jumps",
- "MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(BR_INST_RETIRED.NEAR_TAKEN - (BR_INST_RETIRED.CONDITIONAL - BR_INST_RETIRED.NOT_TAKEN) - 2 * BR_INST_RETIRED.NEAR_CALL) / BR_INST_RETIRED.ALL_BRANCHES",
- "MetricGroup": "Bad;Branches",
- "MetricName": "tma_info_jump"
+ "BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
+ "MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / duration_time",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_core_l2_cache_fill_bw"
},
{
- "BriefDescription": "Cycles Per Instruction for the Operating System (OS) Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / INST_RETIRED.ANY_P:k",
- "MetricGroup": "OS",
- "MetricName": "tma_info_kernel_cpi"
+ "BriefDescription": "Rate of non silent evictions from the L2 cache per Kilo instruction",
+ "MetricExpr": "1e3 * L2_LINES_OUT.NON_SILENT / tma_info_inst_mix_instructions",
+ "MetricGroup": "L2Evicts;Mem;Server",
+ "MetricName": "tma_info_memory_core_l2_evictions_nonsilent_pki"
},
{
- "BriefDescription": "Fraction of cycles spent in the Operating System (OS) Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / CPU_CLK_UNHALTED.THREAD",
- "MetricGroup": "OS",
- "MetricName": "tma_info_kernel_utilization",
- "MetricThreshold": "tma_info_kernel_utilization > 0.05"
+ "BriefDescription": "Rate of silent evictions from the L2 cache per Kilo instruction where the evicted lines are dropped (no writeback to L3 or memory)",
+ "MetricExpr": "1e3 * L2_LINES_OUT.SILENT / tma_info_inst_mix_instructions",
+ "MetricGroup": "L2Evicts;Mem;Server",
+ "MetricName": "tma_info_memory_core_l2_evictions_silent_pki"
},
{
- "BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
- "MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / duration_time",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l1d_cache_fill_bw"
+ "BriefDescription": "Average per-core data access bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1e9 / duration_time",
+ "MetricGroup": "Mem;MemoryBW;Offcore",
+ "MetricName": "tma_info_memory_core_l3_cache_access_bw"
},
{
- "BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
- "MetricExpr": "tma_info_l1d_cache_fill_bw",
+ "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
"MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l1d_cache_fill_bw_1t"
+ "MetricName": "tma_info_memory_core_l3_cache_fill_bw"
+ },
+ {
+ "BriefDescription": "Fill Buffer (FB) hits per kilo instructions for retired demand loads (L1D misses that merge into ongoing miss-handling entries)",
+ "MetricExpr": "1e3 * MEM_LOAD_RETIRED.FB_HIT / INST_RETIRED.ANY",
+ "MetricGroup": "CacheMisses;Mem",
+ "MetricName": "tma_info_memory_fb_hpki"
},
{
"BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
"MetricExpr": "1e3 * MEM_LOAD_RETIRED.L1_MISS / INST_RETIRED.ANY",
"MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l1mpki"
+ "MetricName": "tma_info_memory_l1mpki"
},
{
"BriefDescription": "L1 cache true misses per kilo instruction for all demand loads (including speculative)",
"MetricExpr": "1e3 * L2_RQSTS.ALL_DEMAND_DATA_RD / INST_RETIRED.ANY",
"MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l1mpki_load"
- },
- {
- "BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
- "MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / duration_time",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l2_cache_fill_bw"
- },
- {
- "BriefDescription": "Average per-thread data fill bandwidth to the L2 cache [GB / sec]",
- "MetricExpr": "tma_info_l2_cache_fill_bw",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l2_cache_fill_bw_1t"
- },
- {
- "BriefDescription": "Rate of non silent evictions from the L2 cache per Kilo instruction",
- "MetricExpr": "1e3 * L2_LINES_OUT.NON_SILENT / tma_info_instructions",
- "MetricGroup": "L2Evicts;Mem;Server",
- "MetricName": "tma_info_l2_evictions_nonsilent_pki"
- },
- {
- "BriefDescription": "Rate of silent evictions from the L2 cache per Kilo instruction where the evicted lines are dropped (no writeback to L3 or memory)",
- "MetricExpr": "1e3 * L2_LINES_OUT.SILENT / tma_info_instructions",
- "MetricGroup": "L2Evicts;Mem;Server",
- "MetricName": "tma_info_l2_evictions_silent_pki"
+ "MetricName": "tma_info_memory_l1mpki_load"
},
{
"BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)",
"MetricExpr": "1e3 * (L2_RQSTS.REFERENCES - L2_RQSTS.MISS) / INST_RETIRED.ANY",
"MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l2hpki_all"
+ "MetricName": "tma_info_memory_l2hpki_all"
},
{
"BriefDescription": "L2 cache hits per kilo instruction for all demand loads (including speculative)",
"MetricExpr": "1e3 * L2_RQSTS.DEMAND_DATA_RD_HIT / INST_RETIRED.ANY",
"MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l2hpki_load"
+ "MetricName": "tma_info_memory_l2hpki_load"
},
{
"BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads",
"MetricExpr": "1e3 * MEM_LOAD_RETIRED.L2_MISS / INST_RETIRED.ANY",
"MetricGroup": "Backend;CacheMisses;Mem",
- "MetricName": "tma_info_l2mpki"
+ "MetricName": "tma_info_memory_l2mpki"
},
{
"BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all request types (including speculative)",
"MetricExpr": "1e3 * L2_RQSTS.MISS / INST_RETIRED.ANY",
"MetricGroup": "CacheMisses;Mem;Offcore",
- "MetricName": "tma_info_l2mpki_all"
- },
- {
- "BriefDescription": "L2 cache true code cacheline misses per kilo instruction",
- "MetricExpr": "1e3 * FRONTEND_RETIRED.L2_MISS / INST_RETIRED.ANY",
- "MetricGroup": "IcMiss",
- "MetricName": "tma_info_l2mpki_code"
- },
- {
- "BriefDescription": "L2 cache speculative code cacheline misses per kilo instruction",
- "MetricExpr": "1e3 * L2_RQSTS.CODE_RD_MISS / INST_RETIRED.ANY",
- "MetricGroup": "IcMiss",
- "MetricName": "tma_info_l2mpki_code_all"
+ "MetricName": "tma_info_memory_l2mpki_all"
},
{
"BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all demand loads (including speculative)",
"MetricExpr": "1e3 * L2_RQSTS.DEMAND_DATA_RD_MISS / INST_RETIRED.ANY",
"MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l2mpki_load"
+ "MetricName": "tma_info_memory_l2mpki_load"
},
{
- "BriefDescription": "Average per-core data access bandwidth to the L3 cache [GB / sec]",
- "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1e9 / duration_time",
- "MetricGroup": "Mem;MemoryBW;Offcore",
- "MetricName": "tma_info_l3_cache_access_bw"
- },
- {
- "BriefDescription": "Average per-thread data access bandwidth to the L3 cache [GB / sec]",
- "MetricExpr": "tma_info_l3_cache_access_bw",
- "MetricGroup": "Mem;MemoryBW;Offcore",
- "MetricName": "tma_info_l3_cache_access_bw_1t"
+ "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
+ "MetricExpr": "1e3 * MEM_LOAD_RETIRED.L3_MISS / INST_RETIRED.ANY",
+ "MetricGroup": "CacheMisses;Mem",
+ "MetricName": "tma_info_memory_l3mpki"
},
{
- "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
- "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l3_cache_fill_bw"
+ "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / (MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT)",
+ "MetricGroup": "Mem;MemoryBound;MemoryLat",
+ "MetricName": "tma_info_memory_load_miss_real_latency"
},
{
- "BriefDescription": "Average per-thread data fill bandwidth to the L3 cache [GB / sec]",
- "MetricExpr": "tma_info_l3_cache_fill_bw",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l3_cache_fill_bw_1t"
+ "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
+ "MetricGroup": "Mem;MemoryBW;MemoryBound",
+ "MetricName": "tma_info_memory_mlp",
+ "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)"
},
{
- "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
- "MetricExpr": "1e3 * MEM_LOAD_RETIRED.L3_MISS / INST_RETIRED.ANY",
- "MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l3mpki"
+ "BriefDescription": "Average Parallel L2 cache miss data reads",
+ "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+ "MetricGroup": "Memory_BW;Offcore",
+ "MetricName": "tma_info_memory_oro_data_l2_mlp"
},
{
"BriefDescription": "Average Latency for L2 cache miss demand Loads",
"MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS.DEMAND_DATA_RD",
"MetricGroup": "Memory_Lat;Offcore",
- "MetricName": "tma_info_load_l2_miss_latency"
+ "MetricName": "tma_info_memory_oro_load_l2_miss_latency"
},
{
"BriefDescription": "Average Parallel L2 cache miss demand Loads",
"MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
"MetricGroup": "Memory_BW;Offcore",
- "MetricName": "tma_info_load_l2_mlp"
+ "MetricName": "tma_info_memory_oro_load_l2_mlp"
},
{
- "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
- "MetricExpr": "L1D_PEND_MISS.PENDING / (MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT)",
- "MetricGroup": "Mem;MemoryBound;MemoryLat",
- "MetricName": "tma_info_load_miss_real_latency"
+ "BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_core_l1d_cache_fill_bw",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_thread_l1d_cache_fill_bw_1t"
+ },
+ {
+ "BriefDescription": "Average per-thread data fill bandwidth to the L2 cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_core_l2_cache_fill_bw",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_thread_l2_cache_fill_bw_1t"
+ },
+ {
+ "BriefDescription": "Average per-thread data access bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_core_l3_cache_access_bw",
+ "MetricGroup": "Mem;MemoryBW;Offcore",
+ "MetricName": "tma_info_memory_thread_l3_cache_access_bw_1t"
+ },
+ {
+ "BriefDescription": "Average per-thread data fill bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_core_l3_cache_fill_bw",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_thread_l3_cache_fill_bw_1t"
+ },
+ {
+ "BriefDescription": "STLB (2nd level TLB) code speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
+ "MetricExpr": "1e3 * ITLB_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
+ "MetricGroup": "Fed;MemoryTLB",
+ "MetricName": "tma_info_memory_tlb_code_stlb_mpki"
},
{
"BriefDescription": "STLB (2nd level TLB) data load speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
"MetricExpr": "1e3 * DTLB_LOAD_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
"MetricGroup": "Mem;MemoryTLB",
- "MetricName": "tma_info_load_stlb_mpki"
+ "MetricName": "tma_info_memory_tlb_load_stlb_mpki"
},
{
- "BriefDescription": "Average latency of data read request to external DRAM memory [in nanoseconds]",
- "MetricExpr": "1e9 * (UNC_M_RPQ_OCCUPANCY / UNC_M_RPQ_INSERTS) / imc_0@event\\=0x0@",
- "MetricGroup": "Mem;MemoryLat;Server;SoC",
- "MetricName": "tma_info_mem_dram_read_latency",
- "PublicDescription": "Average latency of data read request to external DRAM memory [in nanoseconds]. Accounts for demand loads and L1/L2 data-read prefetches"
+ "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
+ "MetricExpr": "(ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING) / (2 * tma_info_core_core_clks)",
+ "MetricGroup": "Mem;MemoryTLB",
+ "MetricName": "tma_info_memory_tlb_page_walks_utilization",
+ "MetricThreshold": "tma_info_memory_tlb_page_walks_utilization > 0.5"
},
{
- "BriefDescription": "Average number of parallel data read requests to external memory",
- "MetricExpr": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD / UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD@thresh\\=1@",
- "MetricGroup": "Mem;MemoryBW;SoC",
- "MetricName": "tma_info_mem_parallel_reads",
- "PublicDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches"
+ "BriefDescription": "STLB (2nd level TLB) data store speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
+ "MetricExpr": "1e3 * DTLB_STORE_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
+ "MetricGroup": "Mem;MemoryTLB",
+ "MetricName": "tma_info_memory_tlb_store_stlb_mpki"
},
{
- "BriefDescription": "Average latency of data read request to external memory (in nanoseconds)",
- "MetricExpr": "1e9 * (UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD / UNC_CHA_TOR_INSERTS.IA_MISS_DRD) / (tma_info_socket_clks / duration_time)",
- "MetricGroup": "Mem;MemoryLat;SoC",
- "MetricName": "tma_info_mem_read_latency",
- "PublicDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches. ([RKL+]memory-controller only)"
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-thread",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
+ "MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
+ "MetricName": "tma_info_pipeline_execute"
},
{
- "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
- "MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_fb_full / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))",
- "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
- "MetricName": "tma_info_memory_bandwidth",
- "MetricThreshold": "tma_info_memory_bandwidth > 20",
- "PublicDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_dram_bw_use, tma_mem_bandwidth, tma_sq_full"
+ "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / cpu@UOPS_RETIRED.RETIRE_SLOTS\\,cmask\\=1@",
+ "MetricGroup": "Pipeline;Ret",
+ "MetricName": "tma_info_pipeline_retire"
},
{
- "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
- "MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "100 * tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_load / max(tma_l1_bound, tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency)))",
- "MetricGroup": "Mem;MemoryTLB;Offcore;tma_issueTLB",
- "MetricName": "tma_info_memory_data_tlbs",
- "MetricThreshold": "tma_info_memory_data_tlbs > 20",
- "PublicDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs). Related metrics: tma_dtlb_load, tma_dtlb_store"
+ "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
+ "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / duration_time",
+ "MetricGroup": "Power;Summary",
+ "MetricName": "tma_info_system_average_frequency"
},
{
- "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)",
- "MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound))",
- "MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat",
- "MetricName": "tma_info_memory_latency",
- "MetricThreshold": "tma_info_memory_latency > 20",
- "PublicDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches). Related metrics: tma_l3_hit_latency, tma_mem_latency"
+ "BriefDescription": "Average CPU Utilization",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricGroup": "HPC;Summary",
+ "MetricName": "tma_info_system_cpu_utilization"
},
{
- "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks",
+ "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
+ "MetricExpr": "64 * (UNC_M_CAS_COUNT.RD + UNC_M_CAS_COUNT.WR) / 1e9 / duration_time",
+ "MetricGroup": "HPC;Mem;MemoryBW;SoC;tma_issueBW",
+ "MetricName": "tma_info_system_dram_bw_use",
+ "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_fb_full, tma_info_bottleneck_memory_bandwidth, tma_mem_bandwidth, tma_sq_full"
+ },
+ {
+ "BriefDescription": "Giga Floating Point Operations Per Second",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "100 * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
- "MetricGroup": "Bad;BadSpec;BrMispredicts;tma_issueBM",
- "MetricName": "tma_info_mispredictions",
- "MetricThreshold": "tma_info_mispredictions > 20",
- "PublicDescription": "Total pipeline cost of Branch Misprediction related bottlenecks. Related metrics: tma_branch_mispredicts, tma_info_branch_misprediction_cost, tma_mispredicts_resteers"
+ "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * (FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE) + 8 * (FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / 1e9 / duration_time",
+ "MetricGroup": "Cor;Flops;HPC",
+ "MetricName": "tma_info_system_gflops",
+ "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width and AMX engine."
},
{
- "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
- "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
- "MetricGroup": "Mem;MemoryBW;MemoryBound",
- "MetricName": "tma_info_mlp",
- "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)"
+ "BriefDescription": "Average IO (network or disk) Bandwidth Use for Reads [GB / sec]",
+ "MetricExpr": "(UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3) * 4 / 1e9 / duration_time",
+ "MetricGroup": "IoBW;Mem;Server;SoC",
+ "MetricName": "tma_info_system_io_read_bw"
},
{
- "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
- "MetricExpr": "(ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING) / (2 * tma_info_core_clks)",
- "MetricGroup": "Mem;MemoryTLB",
- "MetricName": "tma_info_page_walks_utilization",
- "MetricThreshold": "tma_info_page_walks_utilization > 0.5"
+ "BriefDescription": "Average IO (network or disk) Bandwidth Use for Writes [GB / sec]",
+ "MetricExpr": "(UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3) * 4 / 1e9 / duration_time",
+ "MetricGroup": "IoBW;Mem;Server;SoC",
+ "MetricName": "tma_info_system_io_write_bw"
+ },
+ {
+ "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
+ "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u",
+ "MetricGroup": "Branches;OS",
+ "MetricName": "tma_info_system_ipfarbranch",
+ "MetricThreshold": "tma_info_system_ipfarbranch < 1e6"
+ },
+ {
+ "BriefDescription": "Cycles Per Instruction for the Operating System (OS) Kernel mode",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / INST_RETIRED.ANY_P:k",
+ "MetricGroup": "OS",
+ "MetricName": "tma_info_system_kernel_cpi"
+ },
+ {
+ "BriefDescription": "Fraction of cycles spent in the Operating System (OS) Kernel mode",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "OS",
+ "MetricName": "tma_info_system_kernel_utilization",
+ "MetricThreshold": "tma_info_system_kernel_utilization > 0.05"
+ },
+ {
+ "BriefDescription": "Average latency of data read request to external DRAM memory [in nanoseconds]",
+ "MetricExpr": "1e9 * (UNC_M_RPQ_OCCUPANCY / UNC_M_RPQ_INSERTS) / imc_0@event\\=0x0@",
+ "MetricGroup": "Mem;MemoryLat;Server;SoC",
+ "MetricName": "tma_info_system_mem_dram_read_latency",
+ "PublicDescription": "Average latency of data read request to external DRAM memory [in nanoseconds]. Accounts for demand loads and L1/L2 data-read prefetches"
+ },
+ {
+ "BriefDescription": "Average number of parallel data read requests to external memory",
+ "MetricExpr": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD / UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD@thresh\\=1@",
+ "MetricGroup": "Mem;MemoryBW;SoC",
+ "MetricName": "tma_info_system_mem_parallel_reads",
+ "PublicDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches"
+ },
+ {
+ "BriefDescription": "Average latency of data read request to external memory (in nanoseconds)",
+ "MetricExpr": "1e9 * (UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD / UNC_CHA_TOR_INSERTS.IA_MISS_DRD) / (tma_info_system_socket_clks / duration_time)",
+ "MetricGroup": "Mem;MemoryLat;SoC",
+ "MetricName": "tma_info_system_mem_read_latency",
+ "PublicDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches. ([RKL+]memory-controller only)"
},
{
"BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for baseline license level 0",
- "MetricExpr": "(CORE_POWER.LVL0_TURBO_LICENSE / 2 / tma_info_core_clks if #SMT_on else CORE_POWER.LVL0_TURBO_LICENSE / tma_info_core_clks)",
+ "MetricExpr": "(CORE_POWER.LVL0_TURBO_LICENSE / 2 / tma_info_core_core_clks if #SMT_on else CORE_POWER.LVL0_TURBO_LICENSE / tma_info_core_core_clks)",
"MetricGroup": "Power",
- "MetricName": "tma_info_power_license0_utilization",
+ "MetricName": "tma_info_system_power_license0_utilization",
"PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for baseline license level 0. This includes non-AVX codes, SSE, AVX 128-bit, and low-current AVX 256-bit codes."
},
{
"BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 1",
- "MetricExpr": "(CORE_POWER.LVL1_TURBO_LICENSE / 2 / tma_info_core_clks if #SMT_on else CORE_POWER.LVL1_TURBO_LICENSE / tma_info_core_clks)",
+ "MetricExpr": "(CORE_POWER.LVL1_TURBO_LICENSE / 2 / tma_info_core_core_clks if #SMT_on else CORE_POWER.LVL1_TURBO_LICENSE / tma_info_core_core_clks)",
"MetricGroup": "Power",
- "MetricName": "tma_info_power_license1_utilization",
- "MetricThreshold": "tma_info_power_license1_utilization > 0.5",
+ "MetricName": "tma_info_system_power_license1_utilization",
+ "MetricThreshold": "tma_info_system_power_license1_utilization > 0.5",
"PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 1. This includes high current AVX 256-bit instructions as well as low current AVX 512-bit instructions."
},
{
"BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 2 (introduced in SKX)",
- "MetricExpr": "(CORE_POWER.LVL2_TURBO_LICENSE / 2 / tma_info_core_clks if #SMT_on else CORE_POWER.LVL2_TURBO_LICENSE / tma_info_core_clks)",
+ "MetricExpr": "(CORE_POWER.LVL2_TURBO_LICENSE / 2 / tma_info_core_core_clks if #SMT_on else CORE_POWER.LVL2_TURBO_LICENSE / tma_info_core_core_clks)",
"MetricGroup": "Power",
- "MetricName": "tma_info_power_license2_utilization",
- "MetricThreshold": "tma_info_power_license2_utilization > 0.5",
+ "MetricName": "tma_info_system_power_license2_utilization",
+ "MetricThreshold": "tma_info_system_power_license2_utilization > 0.5",
"PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 2 (introduced in SKX). This includes high current AVX 512-bit instructions."
},
{
- "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
- "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / cpu@UOPS_RETIRED.RETIRE_SLOTS\\,cmask\\=1@",
- "MetricGroup": "Pipeline;Ret",
- "MetricName": "tma_info_retire"
- },
- {
- "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
- "MetricExpr": "4 * tma_info_core_clks",
- "MetricGroup": "TmaL1;tma_L1_group",
- "MetricName": "tma_info_slots"
- },
- {
"BriefDescription": "Fraction of cycles where both hardware Logical Processors were active",
"MetricExpr": "(1 - CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / (CPU_CLK_UNHALTED.REF_XCLK_ANY / 2) if #SMT_on else 0)",
"MetricGroup": "SMT",
- "MetricName": "tma_info_smt_2t_utilization"
+ "MetricName": "tma_info_system_smt_2t_utilization"
},
{
"BriefDescription": "Socket actual clocks when any core is active on that socket",
"MetricExpr": "cha_0@event\\=0x0@",
"MetricGroup": "SoC",
- "MetricName": "tma_info_socket_clks"
- },
- {
- "BriefDescription": "STLB (2nd level TLB) data store speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
- "MetricExpr": "1e3 * DTLB_STORE_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
- "MetricGroup": "Mem;MemoryTLB",
- "MetricName": "tma_info_store_stlb_mpki"
+ "MetricName": "tma_info_system_socket_clks"
},
{
"BriefDescription": "Average Frequency Utilization relative nominal frequency",
- "MetricExpr": "tma_info_clks / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricExpr": "tma_info_thread_clks / CPU_CLK_UNHALTED.REF_TSC",
"MetricGroup": "Power",
- "MetricName": "tma_info_turbo_utilization"
+ "MetricName": "tma_info_system_turbo_utilization"
+ },
+ {
+ "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "Pipeline",
+ "MetricName": "tma_info_thread_clks"
+ },
+ {
+ "BriefDescription": "Cycles Per Instruction (per Logical Processor)",
+ "MetricExpr": "1 / tma_info_thread_ipc",
+ "MetricGroup": "Mem;Pipeline",
+ "MetricName": "tma_info_thread_cpi"
+ },
+ {
+ "BriefDescription": "The ratio of Executed- by Issued-Uops",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / UOPS_ISSUED.ANY",
+ "MetricGroup": "Cor;Pipeline",
+ "MetricName": "tma_info_thread_execute_per_issue",
+ "PublicDescription": "The ratio of Executed- by Issued-Uops. Ratio > 1 suggests high rate of uop micro-fusions. Ratio < 1 suggest high rate of \"execute\" at rename stage."
+ },
+ {
+ "BriefDescription": "Instructions Per Cycle (per Logical Processor)",
+ "MetricExpr": "INST_RETIRED.ANY / tma_info_thread_clks",
+ "MetricGroup": "Ret;Summary",
+ "MetricName": "tma_info_thread_ipc"
+ },
+ {
+ "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
+ "MetricExpr": "4 * tma_info_core_core_clks",
+ "MetricGroup": "TmaL1;tma_L1_group",
+ "MetricName": "tma_info_thread_slots"
},
{
"BriefDescription": "Uops Per Instruction",
"MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
"MetricGroup": "Pipeline;Ret;Retire",
- "MetricName": "tma_info_uoppi",
- "MetricThreshold": "tma_info_uoppi > 1.05"
+ "MetricName": "tma_info_thread_uoppi",
+ "MetricThreshold": "tma_info_thread_uoppi > 1.05"
},
{
"BriefDescription": "Instruction per taken branch",
"MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / BR_INST_RETIRED.NEAR_TAKEN",
"MetricGroup": "Branches;Fed;FetchBW",
- "MetricName": "tma_info_uptb",
- "MetricThreshold": "tma_info_uptb < 6"
+ "MetricName": "tma_info_thread_uptb",
+ "MetricThreshold": "tma_info_thread_uptb < 6"
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses",
- "MetricExpr": "ICACHE_64B.IFTAG_STALL / tma_info_clks",
+ "MetricExpr": "ICACHE_64B.IFTAG_STALL / tma_info_thread_clks",
"MetricGroup": "BigFoot;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_itlb_misses",
"MetricThreshold": "tma_itlb_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -1081,7 +1302,7 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled without loads missing the L1 data cache",
- "MetricExpr": "max((CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS) / tma_info_clks, 0)",
+ "MetricExpr": "max((CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS) / tma_info_thread_clks, 0)",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_issueL1;tma_issueMC;tma_memory_bound_group",
"MetricName": "tma_l1_bound",
"MetricThreshold": "tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -1091,7 +1312,7 @@
{
"BriefDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "MEM_LOAD_RETIRED.L2_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) / (MEM_LOAD_RETIRED.L2_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@) * ((CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_clks)",
+ "MetricExpr": "MEM_LOAD_RETIRED.L2_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) / (MEM_LOAD_RETIRED.L2_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@) * ((CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks)",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l2_bound",
"MetricThreshold": "tma_l2_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -1100,7 +1321,7 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
- "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS) / tma_info_clks",
+ "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS) / tma_info_thread_clks",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l3_bound",
"MetricThreshold": "tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -1109,20 +1330,20 @@
},
{
"BriefDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
- "MetricExpr": "17 * tma_info_average_frequency * MEM_LOAD_RETIRED.L3_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_clks",
+ "MetricExpr": "17 * tma_info_system_average_frequency * MEM_LOAD_RETIRED.L3_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
"MetricGroup": "MemoryLat;TopdownL4;tma_L4_group;tma_issueLat;tma_l3_bound_group",
"MetricName": "tma_l3_hit_latency",
"MetricThreshold": "tma_l3_hit_latency > 0.1 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_info_memory_latency, tma_mem_latency",
+ "PublicDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_info_bottleneck_memory_latency, tma_mem_latency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs)",
- "MetricExpr": "ILD_STALL.LCP / tma_info_clks",
+ "MetricExpr": "ILD_STALL.LCP / tma_info_thread_clks",
"MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB",
"MetricName": "tma_lcp",
"MetricThreshold": "tma_lcp > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
- "PublicDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs). Using proper compiler flags or Intel Compiler by default will certainly avoid this. #Link: Optimization Guide about LCP BKMs. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_dsb_coverage, tma_info_dsb_misses, tma_info_iptb",
+ "PublicDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs). Using proper compiler flags or Intel Compiler by default will certainly avoid this. #Link: Optimization Guide about LCP BKMs. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb",
"ScaleUnit": "100%"
},
{
@@ -1137,7 +1358,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Load operations",
- "MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_2 + UOPS_DISPATCHED_PORT.PORT_3 + UOPS_DISPATCHED_PORT.PORT_7 - UOPS_DISPATCHED_PORT.PORT_4) / (2 * tma_info_core_clks)",
+ "MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_2 + UOPS_DISPATCHED_PORT.PORT_3 + UOPS_DISPATCHED_PORT.PORT_7 - UOPS_DISPATCHED_PORT.PORT_4) / (2 * tma_info_core_core_clks)",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_load_op_utilization",
"MetricThreshold": "tma_load_op_utilization > 0.6",
@@ -1155,7 +1376,7 @@
},
{
"BriefDescription": "This metric estimates the fraction of cycles where the Second-level TLB (STLB) was missed by load accesses, performing a hardware page walk",
- "MetricExpr": "DTLB_LOAD_MISSES.WALK_ACTIVE / tma_info_clks",
+ "MetricExpr": "DTLB_LOAD_MISSES.WALK_ACTIVE / tma_info_thread_clks",
"MetricGroup": "MemoryTLB;TopdownL5;tma_L5_group;tma_dtlb_load_group",
"MetricName": "tma_load_stlb_miss",
"MetricThreshold": "tma_load_stlb_miss > 0.05 & (tma_dtlb_load > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
@@ -1163,7 +1384,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from local memory",
- "MetricExpr": "59.5 * tma_info_average_frequency * MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_clks",
+ "MetricExpr": "59.5 * tma_info_system_average_frequency * MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
"MetricGroup": "Server;TopdownL5;tma_L5_group;tma_mem_latency_group",
"MetricName": "tma_local_dram",
"MetricThreshold": "tma_local_dram > 0.1 & (tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
@@ -1172,7 +1393,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU spent handling cache misses due to lock operations",
- "MetricExpr": "(12 * max(0, MEM_INST_RETIRED.LOCK_LOADS - L2_RQSTS.ALL_RFO) + MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES * (11 * L2_RQSTS.RFO_HIT + min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO))) / tma_info_clks",
+ "MetricExpr": "(12 * max(0, MEM_INST_RETIRED.LOCK_LOADS - L2_RQSTS.ALL_RFO) + MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES * (11 * L2_RQSTS.RFO_HIT + min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO))) / tma_info_thread_clks",
"MetricGroup": "Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_l1_bound_group",
"MetricName": "tma_lock_latency",
"MetricThreshold": "tma_lock_latency > 0.2 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -1192,20 +1413,20 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM)",
- "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@) / tma_info_clks",
+ "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@) / tma_info_thread_clks",
"MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
"MetricName": "tma_mem_bandwidth",
"MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_dram_bw_use, tma_info_memory_bandwidth, tma_sq_full",
+ "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_bottleneck_memory_bandwidth, tma_info_system_dram_bw_use, tma_sq_full",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM)",
- "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD) / tma_info_clks - tma_mem_bandwidth",
+ "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD) / tma_info_thread_clks - tma_mem_bandwidth",
"MetricGroup": "MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
"MetricName": "tma_mem_latency",
"MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_info_memory_latency, tma_l3_hit_latency",
+ "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_info_bottleneck_memory_latency, tma_l3_hit_latency",
"ScaleUnit": "100%"
},
{
@@ -1229,7 +1450,7 @@
},
{
"BriefDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit",
- "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * IDQ.MS_UOPS / tma_info_slots",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * IDQ.MS_UOPS / tma_info_thread_slots",
"MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_heavy_operations_group;tma_issueMC;tma_issueMS",
"MetricName": "tma_microcode_sequencer",
"MetricThreshold": "tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1",
@@ -1238,19 +1459,19 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers as a result of Branch Misprediction at execution stage",
- "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT) * INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_clks",
+ "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT) * INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_thread_clks",
"MetricGroup": "BadSpec;BrMispredicts;TopdownL4;tma_L4_group;tma_branch_resteers_group;tma_issueBM",
"MetricName": "tma_mispredicts_resteers",
"MetricThreshold": "tma_mispredicts_resteers > 0.05 & (tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15))",
- "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers as a result of Branch Misprediction at execution stage. Sample with: INT_MISC.CLEAR_RESTEER_CYCLES. Related metrics: tma_branch_mispredicts, tma_info_branch_misprediction_cost, tma_info_mispredictions",
+ "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers as a result of Branch Misprediction at execution stage. Sample with: INT_MISC.CLEAR_RESTEER_CYCLES. Related metrics: tma_branch_mispredicts, tma_info_bad_spec_branch_misprediction_cost, tma_info_bottleneck_mispredictions",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline)",
- "MetricExpr": "(IDQ.ALL_MITE_CYCLES_ANY_UOPS - IDQ.ALL_MITE_CYCLES_4_UOPS) / tma_info_core_clks / 2",
+ "MetricExpr": "(IDQ.ALL_MITE_CYCLES_ANY_UOPS - IDQ.ALL_MITE_CYCLES_4_UOPS) / tma_info_core_core_clks / 2",
"MetricGroup": "DSBmiss;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_mite",
- "MetricThreshold": "tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35)",
+ "MetricThreshold": "tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35)",
"PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline). This pipeline is used for code that was not pre-cached in the DSB or LSD. For example; inefficiencies due to asymmetric decoders; use of long immediate or LCP can manifest as MITE fetch bandwidth bottleneck. Sample with: FRONTEND_RETIRED.ANY_DSB_MISS",
"ScaleUnit": "100%"
},
@@ -1265,7 +1486,7 @@
},
{
"BriefDescription": "This metric estimates the fraction of cycles when the CPU was stalled due to switches of uop delivery to the Microcode Sequencer (MS)",
- "MetricExpr": "2 * IDQ.MS_SWITCHES / tma_info_clks",
+ "MetricExpr": "2 * IDQ.MS_SWITCHES / tma_info_thread_clks",
"MetricGroup": "FetchLat;MicroSeq;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueMC;tma_issueMS;tma_issueMV;tma_issueSO",
"MetricName": "tma_ms_switches",
"MetricThreshold": "tma_ms_switches > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -1301,7 +1522,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 0 ([SNB+] ALU; [HSW+] ALU and 2nd branch)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_0 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_0 / tma_info_core_core_clks",
"MetricGroup": "Compute;TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_0",
"MetricThreshold": "tma_port_0 > 0.6",
@@ -1310,7 +1531,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 1 (ALU)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_1 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_1 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_1",
"MetricThreshold": "tma_port_1 > 0.6",
@@ -1319,7 +1540,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 2 ([SNB+]Loads and Store-address; [ICL+] Loads)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_2 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_2 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_load_op_utilization_group",
"MetricName": "tma_port_2",
"MetricThreshold": "tma_port_2 > 0.6",
@@ -1328,7 +1549,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 3 ([SNB+]Loads and Store-address; [ICL+] Loads)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_3 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_3 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_load_op_utilization_group",
"MetricName": "tma_port_3",
"MetricThreshold": "tma_port_3 > 0.6",
@@ -1346,7 +1567,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 5 ([SNB+] Branches and ALU; [HSW+] ALU)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_5 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_5 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_5",
"MetricThreshold": "tma_port_5 > 0.6",
@@ -1355,7 +1576,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+]Primary Branch and simple ALU)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_6 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_6 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_6",
"MetricThreshold": "tma_port_6 > 0.6",
@@ -1364,7 +1585,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 7 ([HSW+]simple Store-address)",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_7 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_7 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_store_op_utilization_group",
"MetricName": "tma_port_7",
"MetricThreshold": "tma_port_7 > 0.6",
@@ -1373,7 +1594,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles the CPU performance was potentially limited due to Core computation issues (non divider-related)",
- "MetricExpr": "((EXE_ACTIVITY.EXE_BOUND_0_PORTS + (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL)) / tma_info_clks if ARITH.DIVIDER_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY else (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL) / tma_info_clks)",
+ "MetricExpr": "((EXE_ACTIVITY.EXE_BOUND_0_PORTS + (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL)) / tma_info_thread_clks if ARITH.DIVIDER_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY else (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL) / tma_info_thread_clks)",
"MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group",
"MetricName": "tma_ports_utilization",
"MetricThreshold": "tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
@@ -1382,7 +1603,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed no uops on any execution port (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "(UOPS_EXECUTED.CORE_CYCLES_NONE / 2 if #SMT_on else CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY) / tma_info_core_clks",
+ "MetricExpr": "(UOPS_EXECUTED.CORE_CYCLES_NONE / 2 if #SMT_on else CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_0",
"MetricThreshold": "tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1391,7 +1612,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where the CPU executed total of 1 uop per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "((UOPS_EXECUTED.CORE_CYCLES_GE_1 - UOPS_EXECUTED.CORE_CYCLES_GE_2) / 2 if #SMT_on else EXE_ACTIVITY.1_PORTS_UTIL) / tma_info_core_clks",
+ "MetricExpr": "((UOPS_EXECUTED.CORE_CYCLES_GE_1 - UOPS_EXECUTED.CORE_CYCLES_GE_2) / 2 if #SMT_on else EXE_ACTIVITY.1_PORTS_UTIL) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issueL1;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_1",
"MetricThreshold": "tma_ports_utilized_1 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1400,7 +1621,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "((UOPS_EXECUTED.CORE_CYCLES_GE_2 - UOPS_EXECUTED.CORE_CYCLES_GE_3) / 2 if #SMT_on else EXE_ACTIVITY.2_PORTS_UTIL) / tma_info_core_clks",
+ "MetricExpr": "((UOPS_EXECUTED.CORE_CYCLES_GE_2 - UOPS_EXECUTED.CORE_CYCLES_GE_3) / 2 if #SMT_on else EXE_ACTIVITY.2_PORTS_UTIL) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_2",
"MetricThreshold": "tma_ports_utilized_2 > 0.15 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1409,7 +1630,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 3 or more uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise).",
- "MetricExpr": "(UOPS_EXECUTED.CORE_CYCLES_GE_3 / 2 if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_3) / tma_info_core_clks",
+ "MetricExpr": "(UOPS_EXECUTED.CORE_CYCLES_GE_3 / 2 if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_3) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_3m",
"MetricThreshold": "tma_ports_utilized_3m > 0.7 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1418,7 +1639,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from remote cache in other sockets including synchronizations issues",
"MetricConstraint": "NO_GROUP_EVENTS_NMI",
- "MetricExpr": "(89.5 * tma_info_average_frequency * MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM + 89.5 * tma_info_average_frequency * MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_clks",
+ "MetricExpr": "(89.5 * tma_info_system_average_frequency * MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM + 89.5 * tma_info_system_average_frequency * MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
"MetricGroup": "Offcore;Server;Snoop;TopdownL5;tma_L5_group;tma_issueSyncxn;tma_mem_latency_group",
"MetricName": "tma_remote_cache",
"MetricThreshold": "tma_remote_cache > 0.05 & (tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
@@ -1427,7 +1648,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from remote memory",
- "MetricExpr": "127 * tma_info_average_frequency * MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_clks",
+ "MetricExpr": "127 * tma_info_system_average_frequency * MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
"MetricGroup": "Server;Snoop;TopdownL5;tma_L5_group;tma_mem_latency_group",
"MetricName": "tma_remote_dram",
"MetricThreshold": "tma_remote_dram > 0.1 & (tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
@@ -1436,7 +1657,7 @@
},
{
"BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
- "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / tma_info_slots",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / tma_info_thread_slots",
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1",
@@ -1446,7 +1667,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU issue-pipeline was stalled due to serializing operations",
- "MetricExpr": "PARTIAL_RAT_STALLS.SCOREBOARD / tma_info_clks",
+ "MetricExpr": "PARTIAL_RAT_STALLS.SCOREBOARD / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL5;tma_L5_group;tma_issueSO;tma_ports_utilized_0_group",
"MetricName": "tma_serializing_operation",
"MetricThreshold": "tma_serializing_operation > 0.1 & (tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)))",
@@ -1456,7 +1677,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles handling memory load split accesses - load that cross 64-byte cache line boundary",
"MetricConstraint": "NO_GROUP_EVENTS_NMI",
- "MetricExpr": "tma_info_load_miss_real_latency * LD_BLOCKS.NO_SR / tma_info_clks",
+ "MetricExpr": "tma_info_memory_load_miss_real_latency * LD_BLOCKS.NO_SR / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_split_loads",
"MetricThreshold": "tma_split_loads > 0.2 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -1465,7 +1686,7 @@
},
{
"BriefDescription": "This metric represents rate of split store accesses",
- "MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_clks",
+ "MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_core_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group",
"MetricName": "tma_split_stores",
"MetricThreshold": "tma_split_stores > 0.2 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -1474,16 +1695,16 @@
},
{
"BriefDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors)",
- "MetricExpr": "(OFFCORE_REQUESTS_BUFFER.SQ_FULL / 2 if #SMT_on else OFFCORE_REQUESTS_BUFFER.SQ_FULL) / tma_info_core_clks",
+ "MetricExpr": "(OFFCORE_REQUESTS_BUFFER.SQ_FULL / 2 if #SMT_on else OFFCORE_REQUESTS_BUFFER.SQ_FULL) / tma_info_core_core_clks",
"MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueBW;tma_l3_bound_group",
"MetricName": "tma_sq_full",
"MetricThreshold": "tma_sq_full > 0.3 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_fb_full, tma_info_dram_bw_use, tma_info_memory_bandwidth, tma_mem_bandwidth",
+ "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_fb_full, tma_info_bottleneck_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric estimates how often CPU was stalled due to RFO store memory accesses; RFO store issue a read-for-ownership request before the write",
- "MetricExpr": "EXE_ACTIVITY.BOUND_ON_STORES / tma_info_clks",
+ "MetricExpr": "EXE_ACTIVITY.BOUND_ON_STORES / tma_info_thread_clks",
"MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_store_bound",
"MetricThreshold": "tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -1492,7 +1713,7 @@
},
{
"BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores",
- "MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_clks",
+ "MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_store_fwd_blk",
"MetricThreshold": "tma_store_fwd_blk > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -1502,7 +1723,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles the CPU spent handling L1D store misses",
"MetricConstraint": "NO_GROUP_EVENTS_NMI",
- "MetricExpr": "(L2_RQSTS.RFO_HIT * 11 * (1 - MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES) + (1 - MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES) * min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO)) / tma_info_clks",
+ "MetricExpr": "(L2_RQSTS.RFO_HIT * 11 * (1 - MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES) + (1 - MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES) * min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO)) / tma_info_thread_clks",
"MetricGroup": "MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_issueSL;tma_store_bound_group",
"MetricName": "tma_store_latency",
"MetricThreshold": "tma_store_latency > 0.1 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -1511,7 +1732,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Store operations",
- "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_4 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_4 / tma_info_core_core_clks",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_store_op_utilization",
"MetricThreshold": "tma_store_op_utilization > 0.6",
@@ -1527,7 +1748,7 @@
},
{
"BriefDescription": "This metric estimates the fraction of cycles where the STLB was missed by store accesses, performing a hardware page walk",
- "MetricExpr": "DTLB_STORE_MISSES.WALK_ACTIVE / tma_info_core_clks",
+ "MetricExpr": "DTLB_STORE_MISSES.WALK_ACTIVE / tma_info_core_core_clks",
"MetricGroup": "MemoryTLB;TopdownL5;tma_L5_group;tma_dtlb_store_group",
"MetricName": "tma_store_stlb_miss",
"MetricThreshold": "tma_store_stlb_miss > 0.05 & (tma_dtlb_store > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
@@ -1535,7 +1756,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to new branch address clears",
- "MetricExpr": "9 * BACLEARS.ANY / tma_info_clks",
+ "MetricExpr": "9 * BACLEARS.ANY / tma_info_thread_clks",
"MetricGroup": "BigFoot;FetchLat;TopdownL4;tma_L4_group;tma_branch_resteers_group",
"MetricName": "tma_unknown_branches",
"MetricThreshold": "tma_unknown_branches > 0.05 & (tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15))",
@@ -1553,30 +1774,42 @@
},
{
"BriefDescription": "Percentage of cycles in aborted transactions.",
- "MetricExpr": "max(cpu@cycles\\-t@ - cpu@cycles\\-ct@, 0) / cycles",
+ "MetricExpr": "(max(cycles\\-t - cycles\\-ct, 0) / cycles if has_event(cycles\\-t) else 0)",
"MetricGroup": "transaction",
"MetricName": "tsx_aborted_cycles",
"ScaleUnit": "100%"
},
{
"BriefDescription": "Number of cycles within a transaction divided by the number of elisions.",
- "MetricExpr": "cpu@cycles\\-t@ / cpu@el\\-start@",
+ "MetricExpr": "(cycles\\-t / el\\-start if has_event(cycles\\-t) else 0)",
"MetricGroup": "transaction",
"MetricName": "tsx_cycles_per_elision",
"ScaleUnit": "1cycles / elision"
},
{
"BriefDescription": "Number of cycles within a transaction divided by the number of transactions.",
- "MetricExpr": "cpu@cycles\\-t@ / cpu@tx\\-start@",
+ "MetricExpr": "(cycles\\-t / tx\\-start if has_event(cycles\\-t) else 0)",
"MetricGroup": "transaction",
"MetricName": "tsx_cycles_per_transaction",
"ScaleUnit": "1cycles / transaction"
},
{
"BriefDescription": "Percentage of cycles within a transaction region.",
- "MetricExpr": "cpu@cycles\\-t@ / cycles",
+ "MetricExpr": "(cycles\\-t / cycles if has_event(cycles\\-t) else 0)",
"MetricGroup": "transaction",
"MetricName": "tsx_transactional_cycles",
"ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Uncore operating frequency in GHz",
+ "MetricExpr": "UNC_CHA_CLOCKTICKS / (#num_cores / #num_packages * #num_packages) / 1e9 / duration_time",
+ "MetricName": "uncore_frequency",
+ "ScaleUnit": "1GHz"
+ },
+ {
+ "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data transmit bandwidth (MB/sec)",
+ "MetricExpr": "UNC_UPI_TxL_FLITS.ALL_DATA * 7.111111111111111 / 1e6 / duration_time",
+ "MetricName": "upi_data_transmit_bw",
+ "ScaleUnit": "1MB/s"
}
]
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/skylakex/uncore-interconnect.json
index 26a5a20bf37a..3eece8a728b5 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/uncore-interconnect.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/uncore-interconnect.json
@@ -6504,7 +6504,7 @@
"EventCode": "0x52",
"EventName": "UNC_M3UPI_RxC_HELD.PARALLEL_SUCCESS",
"PerPkg": "1",
- "PublicDescription": "ad and bl messages were actually slotted into the same flit in paralle",
+ "PublicDescription": "ad and bl messages were actually slotted into the same flit in parallel",
"UMask": "0x8",
"Unit": "M3UPI"
},
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/uncore-memory.json b/tools/perf/pmu-events/arch/x86/skylakex/uncore-memory.json
index 6f8ff2262ce7..7a40aa0f1018 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/uncore-memory.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/uncore-memory.json
@@ -1952,7 +1952,7 @@
"EventCode": "0x81",
"EventName": "UNC_M_WPQ_OCCUPANCY",
"PerPkg": "1",
- "PublicDescription": "Counts the number of entries in the Write Pending Queue (WPQ) at each cycle. This can then be used to calculate both the average queue occupancy (in conjunction with the number of cycles not empty) and the average latency (in conjunction with the number of allocations). The WPQ is used to schedule writes out to the memory controller and to track the requests. Requests allocate into the WPQ soon after they enter the memory controller, and need credits for an entry in this buffer before being sent from the CHA to the iMC (memory controller). They deallocate after being issued to DRAM. Write requests themselves are able to complete (from the perspective of the rest of the system) as soon they have 'posted' to the iMC. This is not to be confused with actually performing the write to DRAM. Therefore, the average latency for this queue is actually not useful for deconstruction intermediate write latencies. So, we provide filtering based on if the request has posted or not. By using the 'not posted' filter, we can track how long writes spent in the iMC before completions were sent to the HA. The 'posted' filter, on the other hand, provides information about how much queueing is actually happening in the iMC for writes before they are actually issued to memory. High average occupancies will generally coincide with high write major mode counts. Is there a filter of sorts?",
+ "PublicDescription": "Counts the number of entries in the Write Pending Queue (WPQ) at each cycle. This can then be used to calculate both the average queue occupancy (in conjunction with the number of cycles not empty) and the average latency (in conjunction with the number of allocations). The WPQ is used to schedule writes out to the memory controller and to track the requests. Requests allocate into the WPQ soon after they enter the memory controller, and need credits for an entry in this buffer before being sent from the CHA to the iMC (memory controller). They deallocate after being issued to DRAM. Write requests themselves are able to complete (from the perspective of the rest of the system) as soon they have 'posted' to the iMC. This is not to be confused with actually performing the write to DRAM. Therefore, the average latency for this queue is actually not useful for deconstruction intermediate write latencies. So, we provide filtering based on if the request has posted or not. By using the 'not posted' filter, we can track how long writes spent in the iMC before completions were sent to the HA. The 'posted' filter, on the other hand, provides information about how much queueing is actually happening in the iMC for writes before they are actually issued to memory. High average occupancies will generally coincide with high write major mode counts.",
"Unit": "iMC"
},
{
diff --git a/tools/perf/pmu-events/arch/x86/snowridgex/cache.json b/tools/perf/pmu-events/arch/x86/snowridgex/cache.json
index 0ab90e3bf76b..c6be60584522 100644
--- a/tools/perf/pmu-events/arch/x86/snowridgex/cache.json
+++ b/tools/perf/pmu-events/arch/x86/snowridgex/cache.json
@@ -72,6 +72,7 @@
"BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in the L2, LLC, DRAM or MMIO (Non-DRAM).",
"EventCode": "0x34",
"EventName": "MEM_BOUND_STALLS.IFETCH",
+ "PublicDescription": "Counts the number of cycles the core is stalled due to an instruction cache or translation lookaside buffer (TLB) miss which hit in the L2, LLC, DRAM or MMIO (Non-DRAM).",
"SampleAfterValue": "200003",
"UMask": "0x38"
},
@@ -437,6 +438,7 @@
},
{
"BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_AND_L1PF_RD.L3_HIT",
+ "Deprecated": "1",
"EventCode": "0XB7",
"EventName": "OCR.DEMAND_DATA_RD.L3_HIT",
"MSRIndex": "0x1a6,0x1a7",
@@ -446,6 +448,7 @@
},
{
"BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_AND_L1PF_RD.L3_HIT.SNOOP_HITM",
+ "Deprecated": "1",
"EventCode": "0XB7",
"EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM",
"MSRIndex": "0x1a6,0x1a7",
@@ -455,6 +458,7 @@
},
{
"BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_AND_L1PF_RD.L3_HIT.SNOOP_HIT_NO_FWD",
+ "Deprecated": "1",
"EventCode": "0XB7",
"EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
@@ -464,6 +468,7 @@
},
{
"BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_AND_L1PF_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+ "Deprecated": "1",
"EventCode": "0XB7",
"EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
"MSRIndex": "0x1a6,0x1a7",
@@ -473,6 +478,7 @@
},
{
"BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_AND_L1PF_RD.L3_HIT.SNOOP_MISS",
+ "Deprecated": "1",
"EventCode": "0XB7",
"EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_MISS",
"MSRIndex": "0x1a6,0x1a7",
@@ -482,6 +488,7 @@
},
{
"BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_AND_L1PF_RD.L3_HIT.SNOOP_NOT_NEEDED",
+ "Deprecated": "1",
"EventCode": "0XB7",
"EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_NOT_NEEDED",
"MSRIndex": "0x1a6,0x1a7",
diff --git a/tools/perf/pmu-events/arch/x86/snowridgex/memory.json b/tools/perf/pmu-events/arch/x86/snowridgex/memory.json
index 18621909d1a9..c02eb0e836ad 100644
--- a/tools/perf/pmu-events/arch/x86/snowridgex/memory.json
+++ b/tools/perf/pmu-events/arch/x86/snowridgex/memory.json
@@ -96,6 +96,7 @@
},
{
"BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_AND_L1PF_RD.L3_MISS",
+ "Deprecated": "1",
"EventCode": "0XB7",
"EventName": "OCR.DEMAND_DATA_RD.L3_MISS",
"MSRIndex": "0x1a6,0x1a7",
@@ -105,6 +106,7 @@
},
{
"BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_AND_L1PF_RD.L3_MISS_LOCAL",
+ "Deprecated": "1",
"EventCode": "0XB7",
"EventName": "OCR.DEMAND_DATA_RD.L3_MISS_LOCAL",
"MSRIndex": "0x1a6,0x1a7",
diff --git a/tools/perf/pmu-events/arch/x86/snowridgex/other.json b/tools/perf/pmu-events/arch/x86/snowridgex/other.json
index 00ae180ded25..fefbc383b840 100644
--- a/tools/perf/pmu-events/arch/x86/snowridgex/other.json
+++ b/tools/perf/pmu-events/arch/x86/snowridgex/other.json
@@ -1,6 +1,7 @@
[
{
"BriefDescription": "This event is deprecated. Refer to new event BUS_LOCK.SELF_LOCKS",
+ "Deprecated": "1",
"EdgeDetect": "1",
"EventCode": "0x63",
"EventName": "BUS_LOCK.ALL",
@@ -16,6 +17,7 @@
},
{
"BriefDescription": "This event is deprecated. Refer to new event BUS_LOCK.BLOCK_CYCLES",
+ "Deprecated": "1",
"EventCode": "0x63",
"EventName": "BUS_LOCK.CYCLES_OTHER_BLOCK",
"SampleAfterValue": "200003",
@@ -23,6 +25,7 @@
},
{
"BriefDescription": "This event is deprecated. Refer to new event BUS_LOCK.LOCK_CYCLES",
+ "Deprecated": "1",
"EventCode": "0x63",
"EventName": "BUS_LOCK.CYCLES_SELF_BLOCK",
"SampleAfterValue": "200003",
@@ -46,6 +49,7 @@
},
{
"BriefDescription": "This event is deprecated. Refer to new event MEM_BOUND_STALLS.LOAD_DRAM_HIT",
+ "Deprecated": "1",
"EventCode": "0x34",
"EventName": "C0_STALLS.LOAD_DRAM_HIT",
"SampleAfterValue": "200003",
@@ -53,6 +57,7 @@
},
{
"BriefDescription": "This event is deprecated. Refer to new event MEM_BOUND_STALLS.LOAD_L2_HIT",
+ "Deprecated": "1",
"EventCode": "0x34",
"EventName": "C0_STALLS.LOAD_L2_HIT",
"SampleAfterValue": "200003",
@@ -60,6 +65,7 @@
},
{
"BriefDescription": "This event is deprecated. Refer to new event MEM_BOUND_STALLS.LOAD_LLC_HIT",
+ "Deprecated": "1",
"EventCode": "0x34",
"EventName": "C0_STALLS.LOAD_LLC_HIT",
"SampleAfterValue": "200003",
@@ -207,6 +213,7 @@
},
{
"BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_AND_L1PF_RD.ANY_RESPONSE",
+ "Deprecated": "1",
"EventCode": "0XB7",
"EventName": "OCR.DEMAND_DATA_RD.ANY_RESPONSE",
"MSRIndex": "0x1a6,0x1a7",
@@ -216,6 +223,7 @@
},
{
"BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_AND_L1PF_RD.DRAM",
+ "Deprecated": "1",
"EventCode": "0XB7",
"EventName": "OCR.DEMAND_DATA_RD.DRAM",
"MSRIndex": "0x1a6,0x1a7",
@@ -225,6 +233,7 @@
},
{
"BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_AND_L1PF_RD.LOCAL_DRAM",
+ "Deprecated": "1",
"EventCode": "0XB7",
"EventName": "OCR.DEMAND_DATA_RD.LOCAL_DRAM",
"MSRIndex": "0x1a6,0x1a7",
@@ -234,6 +243,7 @@
},
{
"BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_AND_L1PF_RD.OUTSTANDING",
+ "Deprecated": "1",
"EventCode": "0XB7",
"EventName": "OCR.DEMAND_DATA_RD.OUTSTANDING",
"MSRIndex": "0x1a6",
diff --git a/tools/perf/pmu-events/arch/x86/snowridgex/pipeline.json b/tools/perf/pmu-events/arch/x86/snowridgex/pipeline.json
index 9dd8c909facc..c483c0838e08 100644
--- a/tools/perf/pmu-events/arch/x86/snowridgex/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/snowridgex/pipeline.json
@@ -165,6 +165,7 @@
},
{
"BriefDescription": "This event is deprecated.",
+ "Deprecated": "1",
"EventCode": "0xcd",
"EventName": "CYCLES_DIV_BUSY.ANY",
"SampleAfterValue": "2000003"
@@ -283,6 +284,7 @@
},
{
"BriefDescription": "This event is deprecated. Refer to new event TOPDOWN_BAD_SPECULATION.FASTNUKE",
+ "Deprecated": "1",
"EventCode": "0x73",
"EventName": "TOPDOWN_BAD_SPECULATION.MONUKE",
"SampleAfterValue": "1000003",
@@ -338,6 +340,7 @@
},
{
"BriefDescription": "This event is deprecated.",
+ "Deprecated": "1",
"EventCode": "0x74",
"EventName": "TOPDOWN_BE_BOUND.STORE_BUFFER",
"SampleAfterValue": "1000003",
diff --git a/tools/perf/pmu-events/arch/x86/snowridgex/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/snowridgex/uncore-interconnect.json
index de3840078e21..7e2895f7fe3d 100644
--- a/tools/perf/pmu-events/arch/x86/snowridgex/uncore-interconnect.json
+++ b/tools/perf/pmu-events/arch/x86/snowridgex/uncore-interconnect.json
@@ -590,7 +590,7 @@
"EventCode": "0x0C",
"EventName": "UNC_I_TxS_REQUEST_OCCUPANCY",
"PerPkg": "1",
- "PublicDescription": "Outbound Request Queue Occupancy : Accumultes the number of outstanding outbound requests from the IRP to the switch (towards the devices). This can be used in conjuection with the allocations event in order to calculate average latency of outbound requests.",
+ "PublicDescription": "Outbound Request Queue Occupancy : Accumulates the number of outstanding outbound requests from the IRP to the switch (towards the devices). This can be used in conjunction with the allocations event in order to calculate average latency of outbound requests.",
"Unit": "IRP"
},
{
@@ -5570,7 +5570,7 @@
"Unit": "M2M"
},
{
- "BriefDescription": "M2M-&gt;iMC WPQ Cycles w/Credits - Regular : Channel 0",
+ "BriefDescription": "M2M->iMC WPQ Cycles w/Credits - Regular : Channel 0",
"EventCode": "0x4D",
"EventName": "UNC_M2M_WPQ_NO_REG_CRD.CHN0",
"PerPkg": "1",
@@ -5578,7 +5578,7 @@
"Unit": "M2M"
},
{
- "BriefDescription": "M2M-&gt;iMC WPQ Cycles w/Credits - Regular : Channel 1",
+ "BriefDescription": "M2M->iMC WPQ Cycles w/Credits - Regular : Channel 1",
"EventCode": "0x4D",
"EventName": "UNC_M2M_WPQ_NO_REG_CRD.CHN1",
"PerPkg": "1",
@@ -5586,7 +5586,7 @@
"Unit": "M2M"
},
{
- "BriefDescription": "M2M-&gt;iMC WPQ Cycles w/Credits - Regular : Channel 2",
+ "BriefDescription": "M2M->iMC WPQ Cycles w/Credits - Regular : Channel 2",
"EventCode": "0x4D",
"EventName": "UNC_M2M_WPQ_NO_REG_CRD.CHN2",
"PerPkg": "1",
@@ -5594,7 +5594,7 @@
"Unit": "M2M"
},
{
- "BriefDescription": "M2M-&gt;iMC WPQ Cycles w/Credits - Special : Channel 0",
+ "BriefDescription": "M2M->iMC WPQ Cycles w/Credits - Special : Channel 0",
"EventCode": "0x4E",
"EventName": "UNC_M2M_WPQ_NO_SPEC_CRD.CHN0",
"PerPkg": "1",
@@ -5602,7 +5602,7 @@
"Unit": "M2M"
},
{
- "BriefDescription": "M2M-&gt;iMC WPQ Cycles w/Credits - Special : Channel 1",
+ "BriefDescription": "M2M->iMC WPQ Cycles w/Credits - Special : Channel 1",
"EventCode": "0x4E",
"EventName": "UNC_M2M_WPQ_NO_SPEC_CRD.CHN1",
"PerPkg": "1",
@@ -5610,7 +5610,7 @@
"Unit": "M2M"
},
{
- "BriefDescription": "M2M-&gt;iMC WPQ Cycles w/Credits - Special : Channel 2",
+ "BriefDescription": "M2M->iMC WPQ Cycles w/Credits - Special : Channel 2",
"EventCode": "0x4E",
"EventName": "UNC_M2M_WPQ_NO_SPEC_CRD.CHN2",
"PerPkg": "1",
diff --git a/tools/perf/pmu-events/arch/x86/snowridgex/uncore-io.json b/tools/perf/pmu-events/arch/x86/snowridgex/uncore-io.json
index 996028071ee4..ecdd6f0f8e8f 100644
--- a/tools/perf/pmu-events/arch/x86/snowridgex/uncore-io.json
+++ b/tools/perf/pmu-events/arch/x86/snowridgex/uncore-io.json
@@ -34,7 +34,6 @@
"EventCode": "0xff",
"EventName": "UNC_IIO_BANDWIDTH_IN.PART0_FREERUN",
"PerPkg": "1",
- "PublicDescription": "UNC_IIO_BANDWIDTH_IN.PART0_FREERUN",
"UMask": "0x20",
"Unit": "iio_free_running"
},
@@ -43,7 +42,6 @@
"EventCode": "0xff",
"EventName": "UNC_IIO_BANDWIDTH_IN.PART1_FREERUN",
"PerPkg": "1",
- "PublicDescription": "UNC_IIO_BANDWIDTH_IN.PART1_FREERUN",
"UMask": "0x21",
"Unit": "iio_free_running"
},
@@ -52,7 +50,6 @@
"EventCode": "0xff",
"EventName": "UNC_IIO_BANDWIDTH_IN.PART2_FREERUN",
"PerPkg": "1",
- "PublicDescription": "UNC_IIO_BANDWIDTH_IN.PART2_FREERUN",
"UMask": "0x22",
"Unit": "iio_free_running"
},
@@ -61,7 +58,6 @@
"EventCode": "0xff",
"EventName": "UNC_IIO_BANDWIDTH_IN.PART3_FREERUN",
"PerPkg": "1",
- "PublicDescription": "UNC_IIO_BANDWIDTH_IN.PART3_FREERUN",
"UMask": "0x23",
"Unit": "iio_free_running"
},
@@ -70,7 +66,6 @@
"EventCode": "0xff",
"EventName": "UNC_IIO_BANDWIDTH_IN.PART4_FREERUN",
"PerPkg": "1",
- "PublicDescription": "UNC_IIO_BANDWIDTH_IN.PART4_FREERUN",
"UMask": "0x24",
"Unit": "iio_free_running"
},
@@ -79,7 +74,6 @@
"EventCode": "0xff",
"EventName": "UNC_IIO_BANDWIDTH_IN.PART5_FREERUN",
"PerPkg": "1",
- "PublicDescription": "UNC_IIO_BANDWIDTH_IN.PART5_FREERUN",
"UMask": "0x25",
"Unit": "iio_free_running"
},
@@ -88,7 +82,6 @@
"EventCode": "0xff",
"EventName": "UNC_IIO_BANDWIDTH_IN.PART6_FREERUN",
"PerPkg": "1",
- "PublicDescription": "UNC_IIO_BANDWIDTH_IN.PART6_FREERUN",
"UMask": "0x26",
"Unit": "iio_free_running"
},
@@ -97,7 +90,6 @@
"EventCode": "0xff",
"EventName": "UNC_IIO_BANDWIDTH_IN.PART7_FREERUN",
"PerPkg": "1",
- "PublicDescription": "UNC_IIO_BANDWIDTH_IN.PART7_FREERUN",
"UMask": "0x27",
"Unit": "iio_free_running"
},
diff --git a/tools/perf/pmu-events/arch/x86/snowridgex/uncore-memory.json b/tools/perf/pmu-events/arch/x86/snowridgex/uncore-memory.json
index 530e9b71b92a..b80911d498dd 100644
--- a/tools/perf/pmu-events/arch/x86/snowridgex/uncore-memory.json
+++ b/tools/perf/pmu-events/arch/x86/snowridgex/uncore-memory.json
@@ -130,7 +130,6 @@
"EventCode": "0xff",
"EventName": "UNC_M_CLOCKTICKS_FREERUN",
"PerPkg": "1",
- "PublicDescription": "UNC_M_CLOCKTICKS_FREERUN",
"UMask": "0x10",
"Unit": "imc_free_running"
},
@@ -322,7 +321,7 @@
"EventCode": "0x02",
"EventName": "UNC_M_PRE_COUNT.PGT",
"PerPkg": "1",
- "PublicDescription": "DRAM Precharge commands. : Precharge due to page table : Counts the number of DRAM Precharge commands sent on this channel. : Prechages from Page Table",
+ "PublicDescription": "DRAM Precharge commands. : Precharge due to page table : Counts the number of DRAM Precharge commands sent on this channel. : Precharges from Page Table",
"UMask": "0x10",
"Unit": "iMC"
},
@@ -497,7 +496,7 @@
"EventCode": "0x82",
"EventName": "UNC_M_WPQ_OCCUPANCY_PCH0",
"PerPkg": "1",
- "PublicDescription": "Write Pending Queue Occupancy : Accumulates the occupancies of the Write Pending Queue each cycle. This can then be used to calculate both the average queue occupancy (in conjunction with the number of cycles not empty) and the average latency (in conjunction with the number of allocations). The WPQ is used to schedule write out to the memory controller and to track the writes. Requests allocate into the WPQ soon after they enter the memory controller, and need credits for an entry in this buffer before being sent from the HA to the iMC. They deallocate after being issued to DRAM. Write requests themselves are able to complete (from the perspective of the rest of the system) as soon they have posted to the iMC. This is not to be confused with actually performing the write to DRAM. Therefore, the average latency for this queue is actually not useful for deconstruction intermediate write latencies. So, we provide filtering based on if the request has posted or not. By using the not posted filter, we can track how long writes spent in the iMC before completions were sent to the HA. The posted filter, on the other hand, provides information about how much queueing is actually happenning in the iMC for writes before they are actually issued to memory. High average occupancies will generally coincide with high write major mode counts.",
+ "PublicDescription": "Write Pending Queue Occupancy : Accumulates the occupancies of the Write Pending Queue each cycle. This can then be used to calculate both the average queue occupancy (in conjunction with the number of cycles not empty) and the average latency (in conjunction with the number of allocations). The WPQ is used to schedule write out to the memory controller and to track the writes. Requests allocate into the WPQ soon after they enter the memory controller, and need credits for an entry in this buffer before being sent from the HA to the iMC. They deallocate after being issued to DRAM. Write requests themselves are able to complete (from the perspective of the rest of the system) as soon they have posted to the iMC. This is not to be confused with actually performing the write to DRAM. Therefore, the average latency for this queue is actually not useful for deconstruction intermediate write latencies. So, we provide filtering based on if the request has posted or not. By using the not posted filter, we can track how long writes spent in the iMC before completions were sent to the HA. The posted filter, on the other hand, provides information about how much queueing is actually happening in the iMC for writes before they are actually issued to memory. High average occupancies will generally coincide with high write major mode counts.",
"Unit": "iMC"
},
{
@@ -505,7 +504,7 @@
"EventCode": "0x83",
"EventName": "UNC_M_WPQ_OCCUPANCY_PCH1",
"PerPkg": "1",
- "PublicDescription": "Write Pending Queue Occupancy : Accumulates the occupancies of the Write Pending Queue each cycle. This can then be used to calculate both the average queue occupancy (in conjunction with the number of cycles not empty) and the average latency (in conjunction with the number of allocations). The WPQ is used to schedule write out to the memory controller and to track the writes. Requests allocate into the WPQ soon after they enter the memory controller, and need credits for an entry in this buffer before being sent from the HA to the iMC. They deallocate after being issued to DRAM. Write requests themselves are able to complete (from the perspective of the rest of the system) as soon they have posted to the iMC. This is not to be confused with actually performing the write to DRAM. Therefore, the average latency for this queue is actually not useful for deconstruction intermediate write latencies. So, we provide filtering based on if the request has posted or not. By using the not posted filter, we can track how long writes spent in the iMC before completions were sent to the HA. The posted filter, on the other hand, provides information about how much queueing is actually happenning in the iMC for writes before they are actually issued to memory. High average occupancies will generally coincide with high write major mode counts.",
+ "PublicDescription": "Write Pending Queue Occupancy : Accumulates the occupancies of the Write Pending Queue each cycle. This can then be used to calculate both the average queue occupancy (in conjunction with the number of cycles not empty) and the average latency (in conjunction with the number of allocations). The WPQ is used to schedule write out to the memory controller and to track the writes. Requests allocate into the WPQ soon after they enter the memory controller, and need credits for an entry in this buffer before being sent from the HA to the iMC. They deallocate after being issued to DRAM. Write requests themselves are able to complete (from the perspective of the rest of the system) as soon they have posted to the iMC. This is not to be confused with actually performing the write to DRAM. Therefore, the average latency for this queue is actually not useful for deconstruction intermediate write latencies. So, we provide filtering based on if the request has posted or not. By using the not posted filter, we can track how long writes spent in the iMC before completions were sent to the HA. The posted filter, on the other hand, provides information about how much queueing is actually happening in the iMC for writes before they are actually issued to memory. High average occupancies will generally coincide with high write major mode counts.",
"Unit": "iMC"
},
{
diff --git a/tools/perf/pmu-events/arch/x86/snowridgex/uncore-power.json b/tools/perf/pmu-events/arch/x86/snowridgex/uncore-power.json
index 27fc155f1223..a61ffca2dfea 100644
--- a/tools/perf/pmu-events/arch/x86/snowridgex/uncore-power.json
+++ b/tools/perf/pmu-events/arch/x86/snowridgex/uncore-power.json
@@ -149,7 +149,7 @@
"EventCode": "0x80",
"EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C0",
"PerPkg": "1",
- "PublicDescription": "Number of cores in C-State : C0 and C1 : This is an occupancy event that tracks the number of cores that are in the chosen C-State. It can be used by itself to get the average number of cores in that C-state with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details.",
+ "PublicDescription": "Number of cores in C-State : C0 and C1 : This is an occupancy event that tracks the number of cores that are in the chosen C-State. It can be used by itself to get the average number of cores in that C-state with thresholding to generate histograms, or with other PCU events and occupancy triggering to capture other details.",
"Unit": "PCU"
},
{
@@ -157,7 +157,7 @@
"EventCode": "0x80",
"EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C3",
"PerPkg": "1",
- "PublicDescription": "Number of cores in C-State : C3 : This is an occupancy event that tracks the number of cores that are in the chosen C-State. It can be used by itself to get the average number of cores in that C-state with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details.",
+ "PublicDescription": "Number of cores in C-State : C3 : This is an occupancy event that tracks the number of cores that are in the chosen C-State. It can be used by itself to get the average number of cores in that C-state with thresholding to generate histograms, or with other PCU events and occupancy triggering to capture other details.",
"Unit": "PCU"
},
{
@@ -165,7 +165,7 @@
"EventCode": "0x80",
"EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C6",
"PerPkg": "1",
- "PublicDescription": "Number of cores in C-State : C6 and C7 : This is an occupancy event that tracks the number of cores that are in the chosen C-State. It can be used by itself to get the average number of cores in that C-state with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details.",
+ "PublicDescription": "Number of cores in C-State : C6 and C7 : This is an occupancy event that tracks the number of cores that are in the chosen C-State. It can be used by itself to get the average number of cores in that C-state with thresholding to generate histograms, or with other PCU events and occupancy triggering to capture other details.",
"Unit": "PCU"
},
{
diff --git a/tools/perf/pmu-events/arch/x86/tigerlake/cache.json b/tools/perf/pmu-events/arch/x86/tigerlake/cache.json
index 738249a6f488..c54fb65d3259 100644
--- a/tools/perf/pmu-events/arch/x86/tigerlake/cache.json
+++ b/tools/perf/pmu-events/arch/x86/tigerlake/cache.json
@@ -323,6 +323,16 @@
"UMask": "0x2"
},
{
+ "BriefDescription": "Retired instructions with at least 1 uncacheable load or lock.",
+ "Data_LA": "1",
+ "EventCode": "0xd4",
+ "EventName": "MEM_LOAD_MISC_RETIRED.UC",
+ "PEBS": "1",
+ "PublicDescription": "Retired instructions with at least one load to uncacheable memory-type, or at least one cache-line split locked access",
+ "SampleAfterValue": "100007",
+ "UMask": "0x4"
+ },
+ {
"BriefDescription": "Number of completed demand load requests that missed the L1, but hit the FB(fill buffer), because a preceding miss to the same cacheline initiated the line to be brought into L1, but data is not yet ready in L1.",
"Data_LA": "1",
"EventCode": "0xd1",
@@ -511,6 +521,14 @@
"UMask": "0x4"
},
{
+ "BriefDescription": "Counts bus locks, accounts for cache line split locks and UC locks.",
+ "EventCode": "0xf4",
+ "EventName": "SQ_MISC.BUS_LOCK",
+ "PublicDescription": "Counts the more expensive bus lock needed to enforce cache coherency for certain memory accesses that need to be done atomically. Can be created by issuing an atomic instruction (via the LOCK prefix) which causes a cache line split or accesses uncacheable memory.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x10"
+ },
+ {
"BriefDescription": "Cycles the superQ cannot take any more entries.",
"EventCode": "0xf4",
"EventName": "SQ_MISC.SQ_FULL",
diff --git a/tools/perf/pmu-events/arch/x86/tigerlake/frontend.json b/tools/perf/pmu-events/arch/x86/tigerlake/frontend.json
index 23b8528590b3..d7b972452c0e 100644
--- a/tools/perf/pmu-events/arch/x86/tigerlake/frontend.json
+++ b/tools/perf/pmu-events/arch/x86/tigerlake/frontend.json
@@ -8,6 +8,14 @@
"UMask": "0x1"
},
{
+ "BriefDescription": "Stalls caused by changing prefix length of the instruction. [This event is alias to ILD_STALL.LCP]",
+ "EventCode": "0x87",
+ "EventName": "DECODE.LCP",
+ "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk. [This event is alias to ILD_STALL.LCP]",
+ "SampleAfterValue": "500009",
+ "UMask": "0x1"
+ },
+ {
"BriefDescription": "Decode Stream Buffer (DSB)-to-MITE transitions count.",
"CounterMask": "1",
"EdgeDetect": "1",
@@ -213,10 +221,10 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss.",
+ "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss. [This event is alias to ICACHE_DATA.STALLS]",
"EventCode": "0x80",
"EventName": "ICACHE_16B.IFDATA_STALL",
- "PublicDescription": "Counts cycles where a code line fetch is stalled due to an L1 instruction cache miss. The legacy decode pipeline works at a 16 Byte granularity.",
+ "PublicDescription": "Counts cycles where a code line fetch is stalled due to an L1 instruction cache miss. The legacy decode pipeline works at a 16 Byte granularity. [This event is alias to ICACHE_DATA.STALLS]",
"SampleAfterValue": "500009",
"UMask": "0x4"
},
@@ -237,10 +245,26 @@
"UMask": "0x2"
},
{
- "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss.",
+ "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss. [This event is alias to ICACHE_TAG.STALLS]",
"EventCode": "0x83",
"EventName": "ICACHE_64B.IFTAG_STALL",
- "PublicDescription": "Counts cycles where a code fetch is stalled due to L1 instruction cache tag miss.",
+ "PublicDescription": "Counts cycles where a code fetch is stalled due to L1 instruction cache tag miss. [This event is alias to ICACHE_TAG.STALLS]",
+ "SampleAfterValue": "200003",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss. [This event is alias to ICACHE_16B.IFDATA_STALL]",
+ "EventCode": "0x80",
+ "EventName": "ICACHE_DATA.STALLS",
+ "PublicDescription": "Counts cycles where a code line fetch is stalled due to an L1 instruction cache miss. The legacy decode pipeline works at a 16 Byte granularity. [This event is alias to ICACHE_16B.IFDATA_STALL]",
+ "SampleAfterValue": "500009",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss. [This event is alias to ICACHE_64B.IFTAG_STALL]",
+ "EventCode": "0x83",
+ "EventName": "ICACHE_TAG.STALLS",
+ "PublicDescription": "Counts cycles where a code fetch is stalled due to L1 instruction cache tag miss. [This event is alias to ICACHE_64B.IFTAG_STALL]",
"SampleAfterValue": "200003",
"UMask": "0x4"
},
diff --git a/tools/perf/pmu-events/arch/x86/tigerlake/metricgroups.json b/tools/perf/pmu-events/arch/x86/tigerlake/metricgroups.json
new file mode 100644
index 000000000000..a151ba9cccb0
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/tigerlake/metricgroups.json
@@ -0,0 +1,113 @@
+{
+ "Backend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Bad": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BadSpec": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BigFoot": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "BrMispredicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Branches": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "CacheMisses": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "CodeGen": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Compute": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Cor": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "DSB": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "DSBmiss": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "DataSharing": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Fed": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FetchBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FetchLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Flops": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FpScalar": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "FpVector": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Frontend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "HPC": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "IcMiss": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "InsType": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "L2Evicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "LSD": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MachineClears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryBound": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MemoryTLB": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Memory_BW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Memory_Lat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "MicroSeq": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "OS": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Offcore": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "PGO": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Pipeline": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "PortsUtil": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Power": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Prefetches": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Ret": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Retire": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "SMT": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Server": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Snoop": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "SoC": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "Summary": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TmaL1": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TmaL2": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TmaL3mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+ "TopdownL1": "Metrics for top-down breakdown at level 1",
+ "TopdownL2": "Metrics for top-down breakdown at level 2",
+ "TopdownL3": "Metrics for top-down breakdown at level 3",
+ "TopdownL4": "Metrics for top-down breakdown at level 4",
+ "TopdownL5": "Metrics for top-down breakdown at level 5",
+ "TopdownL6": "Metrics for top-down breakdown at level 6",
+ "tma_L1_group": "Metrics for top-down breakdown at level 1",
+ "tma_L2_group": "Metrics for top-down breakdown at level 2",
+ "tma_L3_group": "Metrics for top-down breakdown at level 3",
+ "tma_L4_group": "Metrics for top-down breakdown at level 4",
+ "tma_L5_group": "Metrics for top-down breakdown at level 5",
+ "tma_L6_group": "Metrics for top-down breakdown at level 6",
+ "tma_alu_op_utilization_group": "Metrics contributing to tma_alu_op_utilization category",
+ "tma_backend_bound_group": "Metrics contributing to tma_backend_bound category",
+ "tma_bad_speculation_group": "Metrics contributing to tma_bad_speculation category",
+ "tma_branch_resteers_group": "Metrics contributing to tma_branch_resteers category",
+ "tma_core_bound_group": "Metrics contributing to tma_core_bound category",
+ "tma_dram_bound_group": "Metrics contributing to tma_dram_bound category",
+ "tma_dtlb_load_group": "Metrics contributing to tma_dtlb_load category",
+ "tma_dtlb_store_group": "Metrics contributing to tma_dtlb_store category",
+ "tma_fetch_bandwidth_group": "Metrics contributing to tma_fetch_bandwidth category",
+ "tma_fetch_latency_group": "Metrics contributing to tma_fetch_latency category",
+ "tma_fp_arith_group": "Metrics contributing to tma_fp_arith category",
+ "tma_fp_vector_group": "Metrics contributing to tma_fp_vector category",
+ "tma_frontend_bound_group": "Metrics contributing to tma_frontend_bound category",
+ "tma_heavy_operations_group": "Metrics contributing to tma_heavy_operations category",
+ "tma_issue2P": "Metrics related by the issue $issue2P",
+ "tma_issueBC": "Metrics related by the issue $issueBC",
+ "tma_issueBM": "Metrics related by the issue $issueBM",
+ "tma_issueBW": "Metrics related by the issue $issueBW",
+ "tma_issueD0": "Metrics related by the issue $issueD0",
+ "tma_issueFB": "Metrics related by the issue $issueFB",
+ "tma_issueFL": "Metrics related by the issue $issueFL",
+ "tma_issueL1": "Metrics related by the issue $issueL1",
+ "tma_issueLat": "Metrics related by the issue $issueLat",
+ "tma_issueMC": "Metrics related by the issue $issueMC",
+ "tma_issueMS": "Metrics related by the issue $issueMS",
+ "tma_issueMV": "Metrics related by the issue $issueMV",
+ "tma_issueRFO": "Metrics related by the issue $issueRFO",
+ "tma_issueSL": "Metrics related by the issue $issueSL",
+ "tma_issueSO": "Metrics related by the issue $issueSO",
+ "tma_issueSmSt": "Metrics related by the issue $issueSmSt",
+ "tma_issueSpSt": "Metrics related by the issue $issueSpSt",
+ "tma_issueSyncxn": "Metrics related by the issue $issueSyncxn",
+ "tma_issueTLB": "Metrics related by the issue $issueTLB",
+ "tma_l1_bound_group": "Metrics contributing to tma_l1_bound category",
+ "tma_l3_bound_group": "Metrics contributing to tma_l3_bound category",
+ "tma_light_operations_group": "Metrics contributing to tma_light_operations category",
+ "tma_load_op_utilization_group": "Metrics contributing to tma_load_op_utilization category",
+ "tma_mem_latency_group": "Metrics contributing to tma_mem_latency category",
+ "tma_memory_bound_group": "Metrics contributing to tma_memory_bound category",
+ "tma_microcode_sequencer_group": "Metrics contributing to tma_microcode_sequencer category",
+ "tma_mite_group": "Metrics contributing to tma_mite category",
+ "tma_ports_utilization_group": "Metrics contributing to tma_ports_utilization category",
+ "tma_ports_utilized_0_group": "Metrics contributing to tma_ports_utilized_0 category",
+ "tma_ports_utilized_3m_group": "Metrics contributing to tma_ports_utilized_3m category",
+ "tma_retiring_group": "Metrics contributing to tma_retiring category",
+ "tma_serializing_operation_group": "Metrics contributing to tma_serializing_operation category",
+ "tma_store_bound_group": "Metrics contributing to tma_store_bound category",
+ "tma_store_op_utilization_group": "Metrics contributing to tma_store_op_utilization category"
+}
diff --git a/tools/perf/pmu-events/arch/x86/tigerlake/pipeline.json b/tools/perf/pmu-events/arch/x86/tigerlake/pipeline.json
index a0aeeb801fd7..541bf1dd1679 100644
--- a/tools/perf/pmu-events/arch/x86/tigerlake/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/tigerlake/pipeline.json
@@ -335,10 +335,10 @@
"UMask": "0x80"
},
{
- "BriefDescription": "Stalls caused by changing prefix length of the instruction.",
+ "BriefDescription": "Stalls caused by changing prefix length of the instruction. [This event is alias to DECODE.LCP]",
"EventCode": "0x87",
"EventName": "ILD_STALL.LCP",
- "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.",
+ "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk. [This event is alias to DECODE.LCP]",
"SampleAfterValue": "500009",
"UMask": "0x1"
},
@@ -395,6 +395,7 @@
{
"BriefDescription": "Clears speculative count",
"CounterMask": "1",
+ "EdgeDetect": "1",
"EventCode": "0x0d",
"EventName": "INT_MISC.CLEARS_COUNT",
"PublicDescription": "Counts the number of speculative clears due to any type of branch misprediction or machine clears",
@@ -563,7 +564,7 @@
"BriefDescription": "TMA slots wasted due to incorrect speculation by branch mispredictions",
"EventCode": "0xa4",
"EventName": "TOPDOWN.BR_MISPREDICT_SLOTS",
- "PublicDescription": "Number of TMA slots that were wasted due to incorrect speculation by branch mispredictions. This event estimates number of operations that were issued but not retired from the specualtive path as well as the out-of-order engine recovery past a branch misprediction.",
+ "PublicDescription": "Number of TMA slots that were wasted due to incorrect speculation by branch mispredictions. This event estimates number of operations that were issued but not retired from the speculative path as well as the out-of-order engine recovery past a branch misprediction.",
"SampleAfterValue": "10000003",
"UMask": "0x8"
},
diff --git a/tools/perf/pmu-events/arch/x86/tigerlake/tgl-metrics.json b/tools/perf/pmu-events/arch/x86/tigerlake/tgl-metrics.json
index b442ed4acfbb..c7c2d6ab1a93 100644
--- a/tools/perf/pmu-events/arch/x86/tigerlake/tgl-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/tigerlake/tgl-metrics.json
@@ -79,7 +79,7 @@
},
{
"BriefDescription": "This metric estimates how often memory load accesses were aliased by preceding stores (in program order) with a 4K address offset",
- "MetricExpr": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS / tma_info_clks",
+ "MetricExpr": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_4k_aliasing",
"MetricThreshold": "tma_4k_aliasing > 0.2 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -88,7 +88,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution ports for ALU operations.",
- "MetricExpr": "(UOPS_DISPATCHED.PORT_0 + UOPS_DISPATCHED.PORT_1 + UOPS_DISPATCHED.PORT_5 + UOPS_DISPATCHED.PORT_6) / (4 * tma_info_core_clks)",
+ "MetricExpr": "(UOPS_DISPATCHED.PORT_0 + UOPS_DISPATCHED.PORT_1 + UOPS_DISPATCHED.PORT_5 + UOPS_DISPATCHED.PORT_6) / (4 * tma_info_core_core_clks)",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_alu_op_utilization",
"MetricThreshold": "tma_alu_op_utilization > 0.6",
@@ -96,7 +96,7 @@
},
{
"BriefDescription": "This metric estimates fraction of slots the CPU retired uops delivered by the Microcode_Sequencer as a result of Assists",
- "MetricExpr": "100 * ASSISTS.ANY / tma_info_slots",
+ "MetricExpr": "100 * ASSISTS.ANY / tma_info_thread_slots",
"MetricGroup": "TopdownL4;tma_L4_group;tma_microcode_sequencer_group",
"MetricName": "tma_assists",
"MetricThreshold": "tma_assists > 0.1 & (tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1)",
@@ -105,27 +105,29 @@
},
{
"BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
- "MetricExpr": "topdown\\-be\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 5 * cpu@INT_MISC.RECOVERY_CYCLES\\,cmask\\=1\\,edge@ / tma_info_slots",
- "MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
+ "DefaultMetricgroupName": "TopdownL1",
+ "MetricExpr": "topdown\\-be\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 5 * cpu@INT_MISC.RECOVERY_CYCLES\\,cmask\\=1\\,edge@ / tma_info_thread_slots",
+ "MetricGroup": "Default;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
"MetricThreshold": "tma_backend_bound > 0.2",
- "MetricgroupNoGroup": "TopdownL1",
+ "MetricgroupNoGroup": "TopdownL1;Default",
"PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. Sample with: TOPDOWN.BACKEND_BOUND_SLOTS",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations",
+ "DefaultMetricgroupName": "TopdownL1",
"MetricExpr": "max(1 - (tma_frontend_bound + tma_backend_bound + tma_retiring), 0)",
- "MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
+ "MetricGroup": "Default;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_bad_speculation",
"MetricThreshold": "tma_bad_speculation > 0.15",
- "MetricgroupNoGroup": "TopdownL1",
+ "MetricgroupNoGroup": "TopdownL1;Default",
"PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of slots where the CPU was retiring branch instructions.",
- "MetricExpr": "tma_light_operations * BR_INST_RETIRED.ALL_BRANCHES / (tma_retiring * tma_info_slots)",
+ "MetricExpr": "tma_light_operations * BR_INST_RETIRED.ALL_BRANCHES / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
"MetricName": "tma_branch_instructions",
"MetricThreshold": "tma_branch_instructions > 0.1 & tma_light_operations > 0.6",
@@ -138,12 +140,12 @@
"MetricName": "tma_branch_mispredicts",
"MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15",
"MetricgroupNoGroup": "TopdownL2",
- "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES. Related metrics: tma_info_branch_misprediction_cost, tma_info_mispredictions, tma_mispredicts_resteers",
+ "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES. Related metrics: tma_info_bad_spec_branch_misprediction_cost, tma_info_bottleneck_mispredictions, tma_mispredicts_resteers",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers",
- "MetricExpr": "INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_clks + tma_unknown_branches",
+ "MetricExpr": "INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_thread_clks + tma_unknown_branches",
"MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_branch_resteers",
"MetricThreshold": "tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -161,7 +163,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers as a result of Machine Clears",
- "MetricExpr": "(1 - BR_MISP_RETIRED.ALL_BRANCHES / (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT)) * INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_clks",
+ "MetricExpr": "(1 - BR_MISP_RETIRED.ALL_BRANCHES / (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT)) * INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_thread_clks",
"MetricGroup": "BadSpec;MachineClears;TopdownL4;tma_L4_group;tma_branch_resteers_group;tma_issueMC",
"MetricName": "tma_clears_resteers",
"MetricThreshold": "tma_clears_resteers > 0.05 & (tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15))",
@@ -171,7 +173,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to contested accesses",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(49 * tma_info_average_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD * (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM / (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM + OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) + 48 * tma_info_average_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_clks",
+ "MetricExpr": "(49 * tma_info_system_average_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD * (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM / (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM + OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) + 48 * tma_info_system_average_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
"MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
"MetricName": "tma_contested_accesses",
"MetricThreshold": "tma_contested_accesses > 0.05 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -191,7 +193,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to data-sharing accesses",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "48 * tma_info_average_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD + MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD * (1 - OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM / (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM + OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_clks",
+ "MetricExpr": "48 * tma_info_system_average_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD + MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD * (1 - OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM / (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM + OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
"MetricGroup": "Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
"MetricName": "tma_data_sharing",
"MetricThreshold": "tma_data_sharing > 0.05 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -200,16 +202,16 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where decoder-0 was the only active decoder",
- "MetricExpr": "(cpu@INST_DECODED.DECODERS\\,cmask\\=1@ - cpu@INST_DECODED.DECODERS\\,cmask\\=2@) / tma_info_core_clks / 2",
+ "MetricExpr": "(cpu@INST_DECODED.DECODERS\\,cmask\\=1@ - cpu@INST_DECODED.DECODERS\\,cmask\\=2@) / tma_info_core_core_clks / 2",
"MetricGroup": "DSBmiss;FetchBW;TopdownL4;tma_L4_group;tma_issueD0;tma_mite_group",
"MetricName": "tma_decoder0_alone",
- "MetricThreshold": "tma_decoder0_alone > 0.1 & (tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 5 > 0.35))",
+ "MetricThreshold": "tma_decoder0_alone > 0.1 & (tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 5 > 0.35))",
"PublicDescription": "This metric represents fraction of cycles where decoder-0 was the only active decoder. Related metrics: tma_few_uops_instructions",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of cycles where the Divider unit was active",
- "MetricExpr": "ARITH.DIVIDER_ACTIVE / tma_info_clks",
+ "MetricExpr": "ARITH.DIVIDER_ACTIVE / tma_info_thread_clks",
"MetricGroup": "TopdownL3;tma_L3_group;tma_core_bound_group",
"MetricName": "tma_divider",
"MetricThreshold": "tma_divider > 0.2 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
@@ -219,7 +221,7 @@
{
"BriefDescription": "This metric estimates how often the CPU was stalled on accesses to external memory (DRAM) by loads",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_clks - tma_l2_bound",
+ "MetricExpr": "CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks - tma_l2_bound",
"MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_dram_bound",
"MetricThreshold": "tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -228,43 +230,43 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline",
- "MetricExpr": "(IDQ.DSB_CYCLES_ANY - IDQ.DSB_CYCLES_OK) / tma_info_core_clks / 2",
+ "MetricExpr": "(IDQ.DSB_CYCLES_ANY - IDQ.DSB_CYCLES_OK) / tma_info_core_core_clks / 2",
"MetricGroup": "DSB;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_dsb",
- "MetricThreshold": "tma_dsb > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 5 > 0.35)",
+ "MetricThreshold": "tma_dsb > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 5 > 0.35)",
"PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline. For example; inefficient utilization of the DSB cache structure or bank conflict when reading from it; are categorized here.",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines",
- "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / tma_info_clks",
+ "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / tma_info_thread_clks",
"MetricGroup": "DSBmiss;FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB",
"MetricName": "tma_dsb_switches",
"MetricThreshold": "tma_dsb_switches > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
- "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines. The DSB (decoded i-cache) is a Uop Cache where the front-end directly delivers Uops (micro operations) avoiding heavy x86 decoding. The DSB pipeline has shorter latency and delivered higher bandwidth than the MITE (legacy instruction decode pipeline). Switching between the two pipelines can cause penalties hence this metric measures the exposed penalty. Sample with: FRONTEND_RETIRED.DSB_MISS_PS. Related metrics: tma_fetch_bandwidth, tma_info_dsb_coverage, tma_info_dsb_misses, tma_info_iptb, tma_lcp",
+ "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines. The DSB (decoded i-cache) is a Uop Cache where the front-end directly delivers Uops (micro operations) avoiding heavy x86 decoding. The DSB pipeline has shorter latency and delivered higher bandwidth than the MITE (legacy instruction decode pipeline). Switching between the two pipelines can cause penalties hence this metric measures the exposed penalty. Sample with: FRONTEND_RETIRED.DSB_MISS_PS. Related metrics: tma_fetch_bandwidth, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses",
- "MetricExpr": "min(7 * cpu@DTLB_LOAD_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_LOAD_MISSES.WALK_ACTIVE, max(CYCLE_ACTIVITY.CYCLES_MEM_ANY - CYCLE_ACTIVITY.CYCLES_L1D_MISS, 0)) / tma_info_clks",
+ "MetricExpr": "min(7 * cpu@DTLB_LOAD_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_LOAD_MISSES.WALK_ACTIVE, max(CYCLE_ACTIVITY.CYCLES_MEM_ANY - CYCLE_ACTIVITY.CYCLES_L1D_MISS, 0)) / tma_info_thread_clks",
"MetricGroup": "MemoryTLB;TopdownL4;tma_L4_group;tma_issueTLB;tma_l1_bound_group",
"MetricName": "tma_dtlb_load",
"MetricThreshold": "tma_dtlb_load > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses. TLBs (Translation Look-aside Buffers) are processor caches for recently used entries out of the Page Tables that are used to map virtual- to physical-addresses by the operating system. This metric approximates the potential delay of demand loads missing the first-level data TLB (assuming worst case scenario with back to back misses to different pages). This includes hitting in the second-level TLB (STLB) as well as performing a hardware page walk on an STLB miss. Sample with: MEM_INST_RETIRED.STLB_MISS_LOADS_PS. Related metrics: tma_dtlb_store, tma_info_memory_data_tlbs",
+ "PublicDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses. TLBs (Translation Look-aside Buffers) are processor caches for recently used entries out of the Page Tables that are used to map virtual- to physical-addresses by the operating system. This metric approximates the potential delay of demand loads missing the first-level data TLB (assuming worst case scenario with back to back misses to different pages). This includes hitting in the second-level TLB (STLB) as well as performing a hardware page walk on an STLB miss. Sample with: MEM_INST_RETIRED.STLB_MISS_LOADS_PS. Related metrics: tma_dtlb_store, tma_info_bottleneck_memory_data_tlbs",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses",
- "MetricExpr": "(7 * cpu@DTLB_STORE_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_STORE_MISSES.WALK_ACTIVE) / tma_info_core_clks",
+ "MetricExpr": "(7 * cpu@DTLB_STORE_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_STORE_MISSES.WALK_ACTIVE) / tma_info_core_core_clks",
"MetricGroup": "MemoryTLB;TopdownL4;tma_L4_group;tma_issueTLB;tma_store_bound_group",
"MetricName": "tma_dtlb_store",
"MetricThreshold": "tma_dtlb_store > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses. As with ordinary data caching; focus on improving data locality and reducing working-set size to reduce DTLB overhead. Additionally; consider using profile-guided optimization (PGO) to collocate frequently-used data on the same page. Try using larger page sizes for large amounts of frequently-used data. Sample with: MEM_INST_RETIRED.STLB_MISS_STORES_PS. Related metrics: tma_dtlb_load, tma_info_memory_data_tlbs",
+ "PublicDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses. As with ordinary data caching; focus on improving data locality and reducing working-set size to reduce DTLB overhead. Additionally; consider using profile-guided optimization (PGO) to collocate frequently-used data on the same page. Try using larger page sizes for large amounts of frequently-used data. Sample with: MEM_INST_RETIRED.STLB_MISS_STORES_PS. Related metrics: tma_dtlb_load, tma_info_bottleneck_memory_data_tlbs",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric roughly estimates how often CPU was handling synchronizations due to False Sharing",
- "MetricExpr": "54 * tma_info_average_frequency * OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM / tma_info_clks",
+ "MetricExpr": "54 * tma_info_system_average_frequency * OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM / tma_info_thread_clks",
"MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_store_bound_group",
"MetricName": "tma_false_sharing",
"MetricThreshold": "tma_false_sharing > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -273,11 +275,11 @@
},
{
"BriefDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed",
- "MetricExpr": "L1D_PEND_MISS.FB_FULL / tma_info_clks",
+ "MetricExpr": "L1D_PEND_MISS.FB_FULL / tma_info_thread_clks",
"MetricGroup": "MemoryBW;TopdownL4;tma_L4_group;tma_issueBW;tma_issueSL;tma_issueSmSt;tma_l1_bound_group",
"MetricName": "tma_fb_full",
"MetricThreshold": "tma_fb_full > 0.3",
- "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_info_dram_bw_use, tma_info_memory_bandwidth, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
+ "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_info_bottleneck_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
"ScaleUnit": "100%"
},
{
@@ -285,14 +287,14 @@
"MetricExpr": "max(0, tma_frontend_bound - tma_fetch_latency)",
"MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB",
"MetricName": "tma_fetch_bandwidth",
- "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 5 > 0.35",
+ "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 5 > 0.35",
"MetricgroupNoGroup": "TopdownL2",
- "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Sample with: FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_2_PS. Related metrics: tma_dsb_switches, tma_info_dsb_coverage, tma_info_dsb_misses, tma_info_iptb, tma_lcp",
+ "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Sample with: FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_2_PS. Related metrics: tma_dsb_switches, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues",
- "MetricExpr": "(5 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE - INT_MISC.UOP_DROPPING) / tma_info_slots",
+ "MetricExpr": "(5 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE - INT_MISC.UOP_DROPPING) / tma_info_thread_slots",
"MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group",
"MetricName": "tma_fetch_latency",
"MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15",
@@ -311,6 +313,7 @@
},
{
"BriefDescription": "This metric represents overall arithmetic floating-point (FP) operations fraction the CPU has executed (retired)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "tma_x87_use + tma_fp_scalar + tma_fp_vector",
"MetricGroup": "HPC;TopdownL3;tma_L3_group;tma_light_operations_group",
"MetricName": "tma_fp_arith",
@@ -320,7 +323,7 @@
},
{
"BriefDescription": "This metric approximates arithmetic floating-point (FP) scalar uops fraction the CPU has retired",
- "MetricExpr": "cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ / (tma_retiring * tma_info_slots)",
+ "MetricExpr": "cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Compute;Flops;TopdownL4;tma_L4_group;tma_fp_arith_group;tma_issue2P",
"MetricName": "tma_fp_scalar",
"MetricThreshold": "tma_fp_scalar > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6)",
@@ -329,7 +332,7 @@
},
{
"BriefDescription": "This metric approximates arithmetic floating-point (FP) vector uops fraction the CPU has retired aggregated across all vector widths",
- "MetricExpr": "cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0xfc@ / (tma_retiring * tma_info_slots)",
+ "MetricExpr": "cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0xfc@ / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Compute;Flops;TopdownL4;tma_L4_group;tma_fp_arith_group;tma_issue2P",
"MetricName": "tma_fp_vector",
"MetricThreshold": "tma_fp_vector > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6)",
@@ -338,7 +341,7 @@
},
{
"BriefDescription": "This metric approximates arithmetic FP vector uops fraction the CPU has retired for 128-bit wide vectors",
- "MetricExpr": "(FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE) / (tma_retiring * tma_info_slots)",
+ "MetricExpr": "(FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE) / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Compute;Flops;TopdownL5;tma_L5_group;tma_fp_vector_group;tma_issue2P",
"MetricName": "tma_fp_vector_128b",
"MetricThreshold": "tma_fp_vector_128b > 0.1 & (tma_fp_vector > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6))",
@@ -347,7 +350,7 @@
},
{
"BriefDescription": "This metric approximates arithmetic FP vector uops fraction the CPU has retired for 256-bit wide vectors",
- "MetricExpr": "(FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE) / (tma_retiring * tma_info_slots)",
+ "MetricExpr": "(FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE) / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Compute;Flops;TopdownL5;tma_L5_group;tma_fp_vector_group;tma_issue2P",
"MetricName": "tma_fp_vector_256b",
"MetricThreshold": "tma_fp_vector_256b > 0.1 & (tma_fp_vector > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6))",
@@ -356,7 +359,7 @@
},
{
"BriefDescription": "This metric approximates arithmetic FP vector uops fraction the CPU has retired for 512-bit wide vectors",
- "MetricExpr": "(FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / (tma_retiring * tma_info_slots)",
+ "MetricExpr": "(FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Compute;Flops;TopdownL5;tma_L5_group;tma_fp_vector_group;tma_issue2P",
"MetricName": "tma_fp_vector_512b",
"MetricThreshold": "tma_fp_vector_512b > 0.1 & (tma_fp_vector > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6))",
@@ -365,11 +368,12 @@
},
{
"BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend",
- "MetricExpr": "topdown\\-fe\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) - INT_MISC.UOP_DROPPING / tma_info_slots",
- "MetricGroup": "PGO;TmaL1;TopdownL1;tma_L1_group",
+ "DefaultMetricgroupName": "TopdownL1",
+ "MetricExpr": "topdown\\-fe\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) - INT_MISC.UOP_DROPPING / tma_info_thread_slots",
+ "MetricGroup": "Default;PGO;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_frontend_bound",
"MetricThreshold": "tma_frontend_bound > 0.15",
- "MetricgroupNoGroup": "TopdownL1",
+ "MetricgroupNoGroup": "TopdownL1;Default",
"PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Pipeline_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. Sample with: FRONTEND_RETIRED.LATENCY_GE_4_PS",
"ScaleUnit": "100%"
},
@@ -385,7 +389,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to instruction cache misses",
- "MetricExpr": "ICACHE_16B.IFDATA_STALL / tma_info_clks",
+ "MetricExpr": "ICACHE_16B.IFDATA_STALL / tma_info_thread_clks",
"MetricGroup": "BigFoot;FetchLat;IcMiss;TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_icache_misses",
"MetricThreshold": "tma_icache_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -393,688 +397,696 @@
"ScaleUnit": "100%"
},
{
- "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
- "MetricExpr": "tma_info_turbo_utilization * TSC / 1e9 / duration_time",
- "MetricGroup": "Power;Summary",
- "MetricName": "tma_info_average_frequency"
+ "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_thread_slots / BR_MISP_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;BrMispredicts;tma_issueBM",
+ "MetricName": "tma_info_bad_spec_branch_misprediction_cost",
+ "PublicDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear). Related metrics: tma_branch_mispredicts, tma_info_bottleneck_mispredictions, tma_mispredicts_resteers"
+ },
+ {
+ "BriefDescription": "Instructions per retired mispredicts for conditional non-taken branches (lower number means higher occurrence rate).",
+ "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.COND_NTAKEN",
+ "MetricGroup": "Bad;BrMispredicts",
+ "MetricName": "tma_info_bad_spec_ipmisp_cond_ntaken",
+ "MetricThreshold": "tma_info_bad_spec_ipmisp_cond_ntaken < 200"
+ },
+ {
+ "BriefDescription": "Instructions per retired mispredicts for conditional taken branches (lower number means higher occurrence rate).",
+ "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.COND_TAKEN",
+ "MetricGroup": "Bad;BrMispredicts",
+ "MetricName": "tma_info_bad_spec_ipmisp_cond_taken",
+ "MetricThreshold": "tma_info_bad_spec_ipmisp_cond_taken < 200"
+ },
+ {
+ "BriefDescription": "Instructions per retired mispredicts for indirect CALL or JMP branches (lower number means higher occurrence rate).",
+ "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.INDIRECT",
+ "MetricGroup": "Bad;BrMispredicts",
+ "MetricName": "tma_info_bad_spec_ipmisp_indirect",
+ "MetricThreshold": "tma_info_bad_spec_ipmisp_indirect < 1e3"
+ },
+ {
+ "BriefDescription": "Instructions per retired mispredicts for return branches (lower number means higher occurrence rate).",
+ "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.RET",
+ "MetricGroup": "Bad;BrMispredicts",
+ "MetricName": "tma_info_bad_spec_ipmisp_ret",
+ "MetricThreshold": "tma_info_bad_spec_ipmisp_ret < 500"
+ },
+ {
+ "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear) (lower number means higher occurrence rate)",
+ "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;BadSpec;BrMispredicts",
+ "MetricName": "tma_info_bad_spec_ipmispredict",
+ "MetricThreshold": "tma_info_bad_spec_ipmispredict < 200"
+ },
+ {
+ "BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "(100 * (1 - tma_core_bound / tma_ports_utilization if tma_core_bound < tma_ports_utilization else 1) if tma_info_system_smt_2t_utilization > 0.5 else 0)",
+ "MetricGroup": "Cor;SMT",
+ "MetricName": "tma_info_botlnk_l0_core_bound_likely",
+ "MetricThreshold": "tma_info_botlnk_l0_core_bound_likely > 0.5"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_fetch_latency * tma_dsb_switches / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_mite / (tma_dsb + tma_lsd + tma_mite))",
+ "MetricGroup": "DSBmiss;Fed;tma_issueFB",
+ "MetricName": "tma_info_botlnk_l2_dsb_misses",
+ "MetricThreshold": "tma_info_botlnk_l2_dsb_misses > 10",
+ "PublicDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck",
+ "MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
+ "MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL",
+ "MetricName": "tma_info_botlnk_l2_ic_misses",
+ "MetricThreshold": "tma_info_botlnk_l2_ic_misses > 5",
+ "PublicDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck. Related metrics: "
},
{
"BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)",
"MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * tma_fetch_latency * (tma_itlb_misses + tma_icache_misses + tma_unknown_branches) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)",
"MetricGroup": "BigFoot;Fed;Frontend;IcMiss;MemoryTLB;tma_issueBC",
- "MetricName": "tma_info_big_code",
- "MetricThreshold": "tma_info_big_code > 20",
- "PublicDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses). Related metrics: tma_info_branching_overhead"
+ "MetricName": "tma_info_bottleneck_big_code",
+ "MetricThreshold": "tma_info_bottleneck_big_code > 20",
+ "PublicDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses). Related metrics: tma_info_bottleneck_branching_overhead"
},
{
- "BriefDescription": "Branch instructions per taken branch.",
- "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
- "MetricGroup": "Branches;Fed;PGO",
- "MetricName": "tma_info_bptkbranch"
+ "BriefDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls)",
+ "MetricExpr": "100 * ((BR_INST_RETIRED.COND + 3 * BR_INST_RETIRED.NEAR_CALL + (BR_INST_RETIRED.NEAR_TAKEN - BR_INST_RETIRED.COND_TAKEN - 2 * BR_INST_RETIRED.NEAR_CALL)) / tma_info_thread_slots)",
+ "MetricGroup": "Ret;tma_issueBC",
+ "MetricName": "tma_info_bottleneck_branching_overhead",
+ "MetricThreshold": "tma_info_bottleneck_branching_overhead > 10",
+ "PublicDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls). Related metrics: tma_info_bottleneck_big_code"
},
{
- "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
- "MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_slots / BR_MISP_RETIRED.ALL_BRANCHES",
- "MetricGroup": "Bad;BrMispredicts;tma_issueBM",
- "MetricName": "tma_info_branch_misprediction_cost",
- "PublicDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear). Related metrics: tma_branch_mispredicts, tma_info_mispredictions, tma_mispredicts_resteers"
+ "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_frontend_bound - tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_info_bottleneck_big_code",
+ "MetricGroup": "Fed;FetchBW;Frontend",
+ "MetricName": "tma_info_bottleneck_instruction_fetch_bw",
+ "MetricThreshold": "tma_info_bottleneck_instruction_fetch_bw > 20"
},
{
- "BriefDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls)",
- "MetricExpr": "100 * ((BR_INST_RETIRED.COND + 3 * BR_INST_RETIRED.NEAR_CALL + (BR_INST_RETIRED.NEAR_TAKEN - BR_INST_RETIRED.COND_TAKEN - 2 * BR_INST_RETIRED.NEAR_CALL)) / tma_info_slots)",
- "MetricGroup": "Ret;tma_issueBC",
- "MetricName": "tma_info_branching_overhead",
- "MetricThreshold": "tma_info_branching_overhead > 10",
- "PublicDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls). Related metrics: tma_info_big_code"
+ "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
+ "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_fb_full / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))",
+ "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
+ "MetricName": "tma_info_bottleneck_memory_bandwidth",
+ "MetricThreshold": "tma_info_bottleneck_memory_bandwidth > 20",
+ "PublicDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full"
},
{
- "BriefDescription": "Fraction of branches that are CALL or RET",
- "MetricExpr": "(BR_INST_RETIRED.NEAR_CALL + BR_INST_RETIRED.NEAR_RETURN) / BR_INST_RETIRED.ALL_BRANCHES",
- "MetricGroup": "Bad;Branches",
- "MetricName": "tma_info_callret"
+ "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_load / max(tma_l1_bound, tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
+ "MetricGroup": "Mem;MemoryTLB;Offcore;tma_issueTLB",
+ "MetricName": "tma_info_bottleneck_memory_data_tlbs",
+ "MetricThreshold": "tma_info_bottleneck_memory_data_tlbs > 20",
+ "PublicDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs). Related metrics: tma_dtlb_load, tma_dtlb_store"
},
{
- "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
- "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
- "MetricGroup": "Pipeline",
- "MetricName": "tma_info_clks"
+ "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound))",
+ "MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat",
+ "MetricName": "tma_info_bottleneck_memory_latency",
+ "MetricThreshold": "tma_info_bottleneck_memory_latency > 20",
+ "PublicDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches). Related metrics: tma_l3_hit_latency, tma_mem_latency"
},
{
- "BriefDescription": "STLB (2nd level TLB) code speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
- "MetricExpr": "1e3 * ITLB_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
- "MetricGroup": "Fed;MemoryTLB",
- "MetricName": "tma_info_code_stlb_mpki"
+ "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
+ "MetricGroup": "Bad;BadSpec;BrMispredicts;tma_issueBM",
+ "MetricName": "tma_info_bottleneck_mispredictions",
+ "MetricThreshold": "tma_info_bottleneck_mispredictions > 20",
+ "PublicDescription": "Total pipeline cost of Branch Misprediction related bottlenecks. Related metrics: tma_branch_mispredicts, tma_info_bad_spec_branch_misprediction_cost, tma_mispredicts_resteers"
+ },
+ {
+ "BriefDescription": "Fraction of branches that are CALL or RET",
+ "MetricExpr": "(BR_INST_RETIRED.NEAR_CALL + BR_INST_RETIRED.NEAR_RETURN) / BR_INST_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;Branches",
+ "MetricName": "tma_info_branches_callret"
},
{
"BriefDescription": "Fraction of branches that are non-taken conditionals",
"MetricExpr": "BR_INST_RETIRED.COND_NTAKEN / BR_INST_RETIRED.ALL_BRANCHES",
"MetricGroup": "Bad;Branches;CodeGen;PGO",
- "MetricName": "tma_info_cond_nt"
+ "MetricName": "tma_info_branches_cond_nt"
},
{
"BriefDescription": "Fraction of branches that are taken conditionals",
"MetricExpr": "BR_INST_RETIRED.COND_TAKEN / BR_INST_RETIRED.ALL_BRANCHES",
"MetricGroup": "Bad;Branches;CodeGen;PGO",
- "MetricName": "tma_info_cond_tk"
+ "MetricName": "tma_info_branches_cond_tk"
},
{
- "BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts",
- "MetricExpr": "(100 * (1 - tma_core_bound / tma_ports_utilization if tma_core_bound < tma_ports_utilization else 1) if tma_info_smt_2t_utilization > 0.5 else 0)",
- "MetricGroup": "Cor;SMT",
- "MetricName": "tma_info_core_bound_likely",
- "MetricThreshold": "tma_info_core_bound_likely > 0.5"
+ "BriefDescription": "Fraction of branches that are unconditional (direct or indirect) jumps",
+ "MetricExpr": "(BR_INST_RETIRED.NEAR_TAKEN - BR_INST_RETIRED.COND_TAKEN - 2 * BR_INST_RETIRED.NEAR_CALL) / BR_INST_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;Branches",
+ "MetricName": "tma_info_branches_jump"
+ },
+ {
+ "BriefDescription": "Fraction of branches of other types (not individually covered by other metrics in Info.Branches group)",
+ "MetricExpr": "1 - (tma_info_branches_cond_nt + tma_info_branches_cond_tk + tma_info_branches_callret + tma_info_branches_jump)",
+ "MetricGroup": "Bad;Branches",
+ "MetricName": "tma_info_branches_other_branches"
},
{
"BriefDescription": "Core actual clocks when any Logical Processor is active on the Physical Core",
"MetricExpr": "CPU_CLK_UNHALTED.DISTRIBUTED",
"MetricGroup": "SMT",
- "MetricName": "tma_info_core_clks"
+ "MetricName": "tma_info_core_core_clks"
},
{
"BriefDescription": "Instructions Per Cycle across hyper-threads (per physical core)",
- "MetricExpr": "INST_RETIRED.ANY / tma_info_core_clks",
+ "MetricExpr": "INST_RETIRED.ANY / tma_info_core_core_clks",
"MetricGroup": "Ret;SMT;TmaL1;tma_L1_group",
- "MetricName": "tma_info_coreipc"
+ "MetricName": "tma_info_core_coreipc"
},
{
- "BriefDescription": "Cycles Per Instruction (per Logical Processor)",
- "MetricExpr": "1 / tma_info_ipc",
- "MetricGroup": "Mem;Pipeline",
- "MetricName": "tma_info_cpi"
- },
- {
- "BriefDescription": "Average CPU Utilization",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
- "MetricGroup": "HPC;Summary",
- "MetricName": "tma_info_cpu_utilization"
+ "BriefDescription": "Floating Point Operations Per Cycle",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * cpu@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE\\,umask\\=0x18@ + 8 * cpu@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE\\,umask\\=0x60@ + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / tma_info_core_core_clks",
+ "MetricGroup": "Flops;Ret",
+ "MetricName": "tma_info_core_flopc"
},
{
- "BriefDescription": "Average Parallel L2 cache miss data reads",
- "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
- "MetricGroup": "Memory_BW;Offcore",
- "MetricName": "tma_info_data_l2_mlp"
+ "BriefDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0xfc@) / (2 * tma_info_core_core_clks)",
+ "MetricGroup": "Cor;Flops;HPC",
+ "MetricName": "tma_info_core_fp_arith_utilization",
+ "PublicDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width). Values > 1 are possible due to ([BDW+] Fused-Multiply Add (FMA) counting - common; [ADL+] use all of ADD/MUL/FMA in Scalar or 128/256-bit vectors - less common)."
},
{
- "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
- "MetricExpr": "64 * (arb@event\\=0x81\\,umask\\=0x1@ + arb@event\\=0x84\\,umask\\=0x1@) / 1e6 / duration_time / 1e3",
- "MetricGroup": "HPC;Mem;MemoryBW;SoC;tma_issueBW",
- "MetricName": "tma_info_dram_bw_use",
- "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_fb_full, tma_info_memory_bandwidth, tma_mem_bandwidth, tma_sq_full"
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / (UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)",
+ "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
+ "MetricName": "tma_info_core_ilp"
},
{
"BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
"MetricExpr": "IDQ.DSB_UOPS / UOPS_ISSUED.ANY",
"MetricGroup": "DSB;Fed;FetchBW;tma_issueFB",
- "MetricName": "tma_info_dsb_coverage",
- "MetricThreshold": "tma_info_dsb_coverage < 0.7 & tma_info_ipc / 5 > 0.35",
- "PublicDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache). Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_dsb_misses, tma_info_iptb, tma_lcp"
- },
- {
- "BriefDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck",
- "MetricExpr": "100 * (tma_fetch_latency * tma_dsb_switches / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_mite / (tma_dsb + tma_lsd + tma_mite))",
- "MetricGroup": "DSBmiss;Fed;tma_issueFB",
- "MetricName": "tma_info_dsb_misses",
- "MetricThreshold": "tma_info_dsb_misses > 10",
- "PublicDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_dsb_coverage, tma_info_iptb, tma_lcp"
+ "MetricName": "tma_info_frontend_dsb_coverage",
+ "MetricThreshold": "tma_info_frontend_dsb_coverage < 0.7 & tma_info_thread_ipc / 5 > 0.35",
+ "PublicDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache). Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_botlnk_l2_dsb_misses, tma_info_inst_mix_iptb, tma_lcp"
},
{
"BriefDescription": "Average number of cycles of a switch from the DSB fetch-unit to MITE fetch unit - see DSB_Switches tree node for details.",
"MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / cpu@DSB2MITE_SWITCHES.PENALTY_CYCLES\\,cmask\\=1\\,edge@",
"MetricGroup": "DSBmiss",
- "MetricName": "tma_info_dsb_switch_cost"
- },
- {
- "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-thread",
- "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
- "MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
- "MetricName": "tma_info_execute"
- },
- {
- "BriefDescription": "The ratio of Executed- by Issued-Uops",
- "MetricExpr": "UOPS_EXECUTED.THREAD / UOPS_ISSUED.ANY",
- "MetricGroup": "Cor;Pipeline",
- "MetricName": "tma_info_execute_per_issue",
- "PublicDescription": "The ratio of Executed- by Issued-Uops. Ratio > 1 suggests high rate of uop micro-fusions. Ratio < 1 suggest high rate of \"execute\" at rename stage."
- },
- {
- "BriefDescription": "Fill Buffer (FB) hits per kilo instructions for retired demand loads (L1D misses that merge into ongoing miss-handling entries)",
- "MetricExpr": "1e3 * MEM_LOAD_RETIRED.FB_HIT / INST_RETIRED.ANY",
- "MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_fb_hpki"
+ "MetricName": "tma_info_frontend_dsb_switch_cost"
},
{
"BriefDescription": "Average number of Uops issued by front-end when it issued something",
"MetricExpr": "UOPS_ISSUED.ANY / cpu@UOPS_ISSUED.ANY\\,cmask\\=1@",
"MetricGroup": "Fed;FetchBW",
- "MetricName": "tma_info_fetch_upc"
+ "MetricName": "tma_info_frontend_fetch_upc"
},
{
- "BriefDescription": "Floating Point Operations Per Cycle",
- "MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * cpu@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE\\,umask\\=0x18@ + 8 * cpu@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE\\,umask\\=0x60@ + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / tma_info_core_clks",
- "MetricGroup": "Flops;Ret",
- "MetricName": "tma_info_flopc"
+ "BriefDescription": "Average Latency for L1 instruction cache misses",
+ "MetricExpr": "ICACHE_16B.IFDATA_STALL / cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=1\\,edge@",
+ "MetricGroup": "Fed;FetchLat;IcMiss",
+ "MetricName": "tma_info_frontend_icache_miss_latency"
},
{
- "BriefDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width)",
- "MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0xfc@) / (2 * tma_info_core_clks)",
- "MetricGroup": "Cor;Flops;HPC",
- "MetricName": "tma_info_fp_arith_utilization",
- "PublicDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width). Values > 1 are possible due to ([BDW+] Fused-Multiply Add (FMA) counting - common; [ADL+] use all of ADD/MUL/FMA in Scalar or 128/256-bit vectors - less common)."
+ "BriefDescription": "Instructions per non-speculative DSB miss (lower number means higher occurrence rate)",
+ "MetricExpr": "INST_RETIRED.ANY / FRONTEND_RETIRED.ANY_DSB_MISS",
+ "MetricGroup": "DSBmiss;Fed",
+ "MetricName": "tma_info_frontend_ipdsb_miss_ret",
+ "MetricThreshold": "tma_info_frontend_ipdsb_miss_ret < 50"
},
{
- "BriefDescription": "Giga Floating Point Operations Per Second",
- "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * cpu@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE\\,umask\\=0x18@ + 8 * cpu@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE\\,umask\\=0x60@ + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / 1e9 / duration_time",
- "MetricGroup": "Cor;Flops;HPC",
- "MetricName": "tma_info_gflops",
- "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width and AMX engine."
+ "BriefDescription": "Instructions per speculative Unknown Branch Misprediction (BAClear) (lower number means higher occurrence rate)",
+ "MetricExpr": "tma_info_inst_mix_instructions / BACLEARS.ANY",
+ "MetricGroup": "Fed",
+ "MetricName": "tma_info_frontend_ipunknown_branch"
},
{
- "BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck",
- "MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
- "MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL",
- "MetricName": "tma_info_ic_misses",
- "MetricThreshold": "tma_info_ic_misses > 5",
- "PublicDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck. Related metrics: "
+ "BriefDescription": "L2 cache true code cacheline misses per kilo instruction",
+ "MetricExpr": "1e3 * FRONTEND_RETIRED.L2_MISS / INST_RETIRED.ANY",
+ "MetricGroup": "IcMiss",
+ "MetricName": "tma_info_frontend_l2mpki_code"
},
{
- "BriefDescription": "Average Latency for L1 instruction cache misses",
- "MetricExpr": "ICACHE_16B.IFDATA_STALL / cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=1\\,edge@",
- "MetricGroup": "Fed;FetchLat;IcMiss",
- "MetricName": "tma_info_icache_miss_latency"
+ "BriefDescription": "L2 cache speculative code cacheline misses per kilo instruction",
+ "MetricExpr": "1e3 * L2_RQSTS.CODE_RD_MISS / INST_RETIRED.ANY",
+ "MetricGroup": "IcMiss",
+ "MetricName": "tma_info_frontend_l2mpki_code_all"
},
{
- "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
- "MetricExpr": "UOPS_EXECUTED.THREAD / (UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)",
- "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
- "MetricName": "tma_info_ilp"
+ "BriefDescription": "Fraction of Uops delivered by the LSD (Loop Stream Detector; aka Loop Cache)",
+ "MetricExpr": "LSD.UOPS / UOPS_ISSUED.ANY",
+ "MetricGroup": "Fed;LSD",
+ "MetricName": "tma_info_frontend_lsd_coverage"
},
{
- "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks",
- "MetricExpr": "100 * (tma_frontend_bound - tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_info_big_code",
- "MetricGroup": "Fed;FetchBW;Frontend",
- "MetricName": "tma_info_instruction_fetch_bw",
- "MetricThreshold": "tma_info_instruction_fetch_bw > 20"
+ "BriefDescription": "Branch instructions per taken branch.",
+ "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
+ "MetricGroup": "Branches;Fed;PGO",
+ "MetricName": "tma_info_inst_mix_bptkbranch"
},
{
"BriefDescription": "Total number of retired Instructions",
"MetricExpr": "INST_RETIRED.ANY",
"MetricGroup": "Summary;TmaL1;tma_L1_group",
- "MetricName": "tma_info_instructions",
+ "MetricName": "tma_info_inst_mix_instructions",
"PublicDescription": "Total number of retired Instructions. Sample with: INST_RETIRED.PREC_DIST"
},
{
"BriefDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / (cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0xfc@)",
"MetricGroup": "Flops;InsType",
- "MetricName": "tma_info_iparith",
- "MetricThreshold": "tma_info_iparith < 10",
+ "MetricName": "tma_info_inst_mix_iparith",
+ "MetricThreshold": "tma_info_inst_mix_iparith < 10",
"PublicDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate). May undercount due to FMA double counting. Approximated prior to BDW."
},
{
"BriefDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE)",
"MetricGroup": "Flops;FpVector;InsType",
- "MetricName": "tma_info_iparith_avx128",
- "MetricThreshold": "tma_info_iparith_avx128 < 10",
+ "MetricName": "tma_info_inst_mix_iparith_avx128",
+ "MetricThreshold": "tma_info_inst_mix_iparith_avx128 < 10",
"PublicDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
},
{
"BriefDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE)",
"MetricGroup": "Flops;FpVector;InsType",
- "MetricName": "tma_info_iparith_avx256",
- "MetricThreshold": "tma_info_iparith_avx256 < 10",
+ "MetricName": "tma_info_inst_mix_iparith_avx256",
+ "MetricThreshold": "tma_info_inst_mix_iparith_avx256 < 10",
"PublicDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
},
{
"BriefDescription": "Instructions per FP Arithmetic AVX 512-bit instruction (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE)",
"MetricGroup": "Flops;FpVector;InsType",
- "MetricName": "tma_info_iparith_avx512",
- "MetricThreshold": "tma_info_iparith_avx512 < 10",
+ "MetricName": "tma_info_inst_mix_iparith_avx512",
+ "MetricThreshold": "tma_info_inst_mix_iparith_avx512 < 10",
"PublicDescription": "Instructions per FP Arithmetic AVX 512-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
},
{
"BriefDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
"MetricGroup": "Flops;FpScalar;InsType",
- "MetricName": "tma_info_iparith_scalar_dp",
- "MetricThreshold": "tma_info_iparith_scalar_dp < 10",
+ "MetricName": "tma_info_inst_mix_iparith_scalar_dp",
+ "MetricThreshold": "tma_info_inst_mix_iparith_scalar_dp < 10",
"PublicDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
},
{
"BriefDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
"MetricGroup": "Flops;FpScalar;InsType",
- "MetricName": "tma_info_iparith_scalar_sp",
- "MetricThreshold": "tma_info_iparith_scalar_sp < 10",
+ "MetricName": "tma_info_inst_mix_iparith_scalar_sp",
+ "MetricThreshold": "tma_info_inst_mix_iparith_scalar_sp < 10",
"PublicDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
},
{
"BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
"MetricGroup": "Branches;Fed;InsType",
- "MetricName": "tma_info_ipbranch",
- "MetricThreshold": "tma_info_ipbranch < 8"
- },
- {
- "BriefDescription": "Instructions Per Cycle (per Logical Processor)",
- "MetricExpr": "INST_RETIRED.ANY / tma_info_clks",
- "MetricGroup": "Ret;Summary",
- "MetricName": "tma_info_ipc"
+ "MetricName": "tma_info_inst_mix_ipbranch",
+ "MetricThreshold": "tma_info_inst_mix_ipbranch < 8"
},
{
"BriefDescription": "Instructions per (near) call (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL",
"MetricGroup": "Branches;Fed;PGO",
- "MetricName": "tma_info_ipcall",
- "MetricThreshold": "tma_info_ipcall < 200"
- },
- {
- "BriefDescription": "Instructions per non-speculative DSB miss (lower number means higher occurrence rate)",
- "MetricExpr": "INST_RETIRED.ANY / FRONTEND_RETIRED.ANY_DSB_MISS",
- "MetricGroup": "DSBmiss;Fed",
- "MetricName": "tma_info_ipdsb_miss_ret",
- "MetricThreshold": "tma_info_ipdsb_miss_ret < 50"
- },
- {
- "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
- "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u",
- "MetricGroup": "Branches;OS",
- "MetricName": "tma_info_ipfarbranch",
- "MetricThreshold": "tma_info_ipfarbranch < 1e6"
+ "MetricName": "tma_info_inst_mix_ipcall",
+ "MetricThreshold": "tma_info_inst_mix_ipcall < 200"
},
{
"BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / (cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * cpu@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE\\,umask\\=0x18@ + 8 * cpu@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE\\,umask\\=0x60@ + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE)",
"MetricGroup": "Flops;InsType",
- "MetricName": "tma_info_ipflop",
- "MetricThreshold": "tma_info_ipflop < 10"
+ "MetricName": "tma_info_inst_mix_ipflop",
+ "MetricThreshold": "tma_info_inst_mix_ipflop < 10"
},
{
"BriefDescription": "Instructions per Load (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_LOADS",
"MetricGroup": "InsType",
- "MetricName": "tma_info_ipload",
- "MetricThreshold": "tma_info_ipload < 3"
- },
- {
- "BriefDescription": "Instructions per retired mispredicts for conditional non-taken branches (lower number means higher occurrence rate).",
- "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.COND_NTAKEN",
- "MetricGroup": "Bad;BrMispredicts",
- "MetricName": "tma_info_ipmisp_cond_ntaken",
- "MetricThreshold": "tma_info_ipmisp_cond_ntaken < 200"
- },
- {
- "BriefDescription": "Instructions per retired mispredicts for conditional taken branches (lower number means higher occurrence rate).",
- "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.COND_TAKEN",
- "MetricGroup": "Bad;BrMispredicts",
- "MetricName": "tma_info_ipmisp_cond_taken",
- "MetricThreshold": "tma_info_ipmisp_cond_taken < 200"
- },
- {
- "BriefDescription": "Instructions per retired mispredicts for indirect CALL or JMP branches (lower number means higher occurrence rate).",
- "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.INDIRECT",
- "MetricGroup": "Bad;BrMispredicts",
- "MetricName": "tma_info_ipmisp_indirect",
- "MetricThreshold": "tma_info_ipmisp_indirect < 1e3"
- },
- {
- "BriefDescription": "Instructions per retired mispredicts for return branches (lower number means higher occurrence rate).",
- "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.RET",
- "MetricGroup": "Bad;BrMispredicts",
- "MetricName": "tma_info_ipmisp_ret",
- "MetricThreshold": "tma_info_ipmisp_ret < 500"
- },
- {
- "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear) (lower number means higher occurrence rate)",
- "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
- "MetricGroup": "Bad;BadSpec;BrMispredicts",
- "MetricName": "tma_info_ipmispredict",
- "MetricThreshold": "tma_info_ipmispredict < 200"
+ "MetricName": "tma_info_inst_mix_ipload",
+ "MetricThreshold": "tma_info_inst_mix_ipload < 3"
},
{
"BriefDescription": "Instructions per Store (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_STORES",
"MetricGroup": "InsType",
- "MetricName": "tma_info_ipstore",
- "MetricThreshold": "tma_info_ipstore < 8"
+ "MetricName": "tma_info_inst_mix_ipstore",
+ "MetricThreshold": "tma_info_inst_mix_ipstore < 8"
},
{
"BriefDescription": "Instructions per Software prefetch instruction (of any type: NTA/T0/T1/T2/Prefetch) (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / cpu@SW_PREFETCH_ACCESS.T0\\,umask\\=0xF@",
"MetricGroup": "Prefetches",
- "MetricName": "tma_info_ipswpf",
- "MetricThreshold": "tma_info_ipswpf < 100"
+ "MetricName": "tma_info_inst_mix_ipswpf",
+ "MetricThreshold": "tma_info_inst_mix_ipswpf < 100"
},
{
"BriefDescription": "Instruction per taken branch",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN",
"MetricGroup": "Branches;Fed;FetchBW;Frontend;PGO;tma_issueFB",
- "MetricName": "tma_info_iptb",
- "MetricThreshold": "tma_info_iptb < 11",
- "PublicDescription": "Instruction per taken branch. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_dsb_coverage, tma_info_dsb_misses, tma_lcp"
+ "MetricName": "tma_info_inst_mix_iptb",
+ "MetricThreshold": "tma_info_inst_mix_iptb < 11",
+ "PublicDescription": "Instruction per taken branch. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_lcp"
},
{
- "BriefDescription": "Instructions per speculative Unknown Branch Misprediction (BAClear) (lower number means higher occurrence rate)",
- "MetricExpr": "tma_info_instructions / BACLEARS.ANY",
- "MetricGroup": "Fed",
- "MetricName": "tma_info_ipunknown_branch"
- },
- {
- "BriefDescription": "Fraction of branches that are unconditional (direct or indirect) jumps",
- "MetricExpr": "(BR_INST_RETIRED.NEAR_TAKEN - BR_INST_RETIRED.COND_TAKEN - 2 * BR_INST_RETIRED.NEAR_CALL) / BR_INST_RETIRED.ALL_BRANCHES",
- "MetricGroup": "Bad;Branches",
- "MetricName": "tma_info_jump"
+ "BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
+ "MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / duration_time",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_core_l1d_cache_fill_bw"
},
{
- "BriefDescription": "Cycles Per Instruction for the Operating System (OS) Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / INST_RETIRED.ANY_P:k",
- "MetricGroup": "OS",
- "MetricName": "tma_info_kernel_cpi"
+ "BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
+ "MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / duration_time",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_core_l2_cache_fill_bw"
},
{
- "BriefDescription": "Fraction of cycles spent in the Operating System (OS) Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / CPU_CLK_UNHALTED.THREAD",
- "MetricGroup": "OS",
- "MetricName": "tma_info_kernel_utilization",
- "MetricThreshold": "tma_info_kernel_utilization > 0.05"
+ "BriefDescription": "Average per-core data access bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1e9 / duration_time",
+ "MetricGroup": "Mem;MemoryBW;Offcore",
+ "MetricName": "tma_info_memory_core_l3_cache_access_bw"
},
{
- "BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
- "MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / duration_time",
+ "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
"MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l1d_cache_fill_bw"
+ "MetricName": "tma_info_memory_core_l3_cache_fill_bw"
},
{
- "BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
- "MetricExpr": "tma_info_l1d_cache_fill_bw",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l1d_cache_fill_bw_1t"
+ "BriefDescription": "Fill Buffer (FB) hits per kilo instructions for retired demand loads (L1D misses that merge into ongoing miss-handling entries)",
+ "MetricExpr": "1e3 * MEM_LOAD_RETIRED.FB_HIT / INST_RETIRED.ANY",
+ "MetricGroup": "CacheMisses;Mem",
+ "MetricName": "tma_info_memory_fb_hpki"
},
{
"BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
"MetricExpr": "1e3 * MEM_LOAD_RETIRED.L1_MISS / INST_RETIRED.ANY",
"MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l1mpki"
+ "MetricName": "tma_info_memory_l1mpki"
},
{
"BriefDescription": "L1 cache true misses per kilo instruction for all demand loads (including speculative)",
"MetricExpr": "1e3 * L2_RQSTS.ALL_DEMAND_DATA_RD / INST_RETIRED.ANY",
"MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l1mpki_load"
- },
- {
- "BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
- "MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / duration_time",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l2_cache_fill_bw"
- },
- {
- "BriefDescription": "Average per-thread data fill bandwidth to the L2 cache [GB / sec]",
- "MetricExpr": "tma_info_l2_cache_fill_bw",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l2_cache_fill_bw_1t"
+ "MetricName": "tma_info_memory_l1mpki_load"
},
{
"BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)",
"MetricExpr": "1e3 * (L2_RQSTS.REFERENCES - L2_RQSTS.MISS) / INST_RETIRED.ANY",
"MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l2hpki_all"
+ "MetricName": "tma_info_memory_l2hpki_all"
},
{
"BriefDescription": "L2 cache hits per kilo instruction for all demand loads (including speculative)",
"MetricExpr": "1e3 * L2_RQSTS.DEMAND_DATA_RD_HIT / INST_RETIRED.ANY",
"MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l2hpki_load"
+ "MetricName": "tma_info_memory_l2hpki_load"
},
{
"BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads",
"MetricExpr": "1e3 * MEM_LOAD_RETIRED.L2_MISS / INST_RETIRED.ANY",
"MetricGroup": "Backend;CacheMisses;Mem",
- "MetricName": "tma_info_l2mpki"
+ "MetricName": "tma_info_memory_l2mpki"
},
{
"BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all request types (including speculative)",
"MetricExpr": "1e3 * L2_RQSTS.MISS / INST_RETIRED.ANY",
"MetricGroup": "CacheMisses;Mem;Offcore",
- "MetricName": "tma_info_l2mpki_all"
- },
- {
- "BriefDescription": "L2 cache true code cacheline misses per kilo instruction",
- "MetricExpr": "1e3 * FRONTEND_RETIRED.L2_MISS / INST_RETIRED.ANY",
- "MetricGroup": "IcMiss",
- "MetricName": "tma_info_l2mpki_code"
- },
- {
- "BriefDescription": "L2 cache speculative code cacheline misses per kilo instruction",
- "MetricExpr": "1e3 * L2_RQSTS.CODE_RD_MISS / INST_RETIRED.ANY",
- "MetricGroup": "IcMiss",
- "MetricName": "tma_info_l2mpki_code_all"
+ "MetricName": "tma_info_memory_l2mpki_all"
},
{
"BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all demand loads (including speculative)",
"MetricExpr": "1e3 * L2_RQSTS.DEMAND_DATA_RD_MISS / INST_RETIRED.ANY",
"MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l2mpki_load"
- },
- {
- "BriefDescription": "Average per-core data access bandwidth to the L3 cache [GB / sec]",
- "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1e9 / duration_time",
- "MetricGroup": "Mem;MemoryBW;Offcore",
- "MetricName": "tma_info_l3_cache_access_bw"
+ "MetricName": "tma_info_memory_l2mpki_load"
},
{
- "BriefDescription": "Average per-thread data access bandwidth to the L3 cache [GB / sec]",
- "MetricExpr": "tma_info_l3_cache_access_bw",
- "MetricGroup": "Mem;MemoryBW;Offcore",
- "MetricName": "tma_info_l3_cache_access_bw_1t"
+ "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
+ "MetricExpr": "1e3 * MEM_LOAD_RETIRED.L3_MISS / INST_RETIRED.ANY",
+ "MetricGroup": "CacheMisses;Mem",
+ "MetricName": "tma_info_memory_l3mpki"
},
{
- "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
- "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l3_cache_fill_bw"
+ "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / (MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT)",
+ "MetricGroup": "Mem;MemoryBound;MemoryLat",
+ "MetricName": "tma_info_memory_load_miss_real_latency"
},
{
- "BriefDescription": "Average per-thread data fill bandwidth to the L3 cache [GB / sec]",
- "MetricExpr": "tma_info_l3_cache_fill_bw",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "tma_info_l3_cache_fill_bw_1t"
+ "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
+ "MetricGroup": "Mem;MemoryBW;MemoryBound",
+ "MetricName": "tma_info_memory_mlp",
+ "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)"
},
{
- "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
- "MetricExpr": "1e3 * MEM_LOAD_RETIRED.L3_MISS / INST_RETIRED.ANY",
- "MetricGroup": "CacheMisses;Mem",
- "MetricName": "tma_info_l3mpki"
+ "BriefDescription": "Average Parallel L2 cache miss data reads",
+ "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+ "MetricGroup": "Memory_BW;Offcore",
+ "MetricName": "tma_info_memory_oro_data_l2_mlp"
},
{
"BriefDescription": "Average Latency for L2 cache miss demand Loads",
"MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS.DEMAND_DATA_RD",
"MetricGroup": "Memory_Lat;Offcore",
- "MetricName": "tma_info_load_l2_miss_latency"
+ "MetricName": "tma_info_memory_oro_load_l2_miss_latency"
},
{
"BriefDescription": "Average Parallel L2 cache miss demand Loads",
"MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / cpu@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD\\,cmask\\=1@",
"MetricGroup": "Memory_BW;Offcore",
- "MetricName": "tma_info_load_l2_mlp"
+ "MetricName": "tma_info_memory_oro_load_l2_mlp"
},
{
"BriefDescription": "Average Latency for L3 cache miss demand Loads",
"MetricExpr": "cpu@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD\\,umask\\=0x10@ / OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD",
"MetricGroup": "Memory_Lat;Offcore",
- "MetricName": "tma_info_load_l3_miss_latency"
+ "MetricName": "tma_info_memory_oro_load_l3_miss_latency"
},
{
- "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
- "MetricExpr": "L1D_PEND_MISS.PENDING / (MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT)",
- "MetricGroup": "Mem;MemoryBound;MemoryLat",
- "MetricName": "tma_info_load_miss_real_latency"
+ "BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_core_l1d_cache_fill_bw",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_thread_l1d_cache_fill_bw_1t"
+ },
+ {
+ "BriefDescription": "Average per-thread data fill bandwidth to the L2 cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_core_l2_cache_fill_bw",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_thread_l2_cache_fill_bw_1t"
+ },
+ {
+ "BriefDescription": "Average per-thread data access bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_core_l3_cache_access_bw",
+ "MetricGroup": "Mem;MemoryBW;Offcore",
+ "MetricName": "tma_info_memory_thread_l3_cache_access_bw_1t"
+ },
+ {
+ "BriefDescription": "Average per-thread data fill bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "tma_info_memory_core_l3_cache_fill_bw",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_thread_l3_cache_fill_bw_1t"
+ },
+ {
+ "BriefDescription": "STLB (2nd level TLB) code speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
+ "MetricExpr": "1e3 * ITLB_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
+ "MetricGroup": "Fed;MemoryTLB",
+ "MetricName": "tma_info_memory_tlb_code_stlb_mpki"
},
{
"BriefDescription": "STLB (2nd level TLB) data load speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
"MetricExpr": "1e3 * DTLB_LOAD_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
"MetricGroup": "Mem;MemoryTLB",
- "MetricName": "tma_info_load_stlb_mpki"
+ "MetricName": "tma_info_memory_tlb_load_stlb_mpki"
},
{
- "BriefDescription": "Fraction of Uops delivered by the LSD (Loop Stream Detector; aka Loop Cache)",
- "MetricExpr": "LSD.UOPS / UOPS_ISSUED.ANY",
- "MetricGroup": "Fed;LSD",
- "MetricName": "tma_info_lsd_coverage"
+ "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+ "MetricExpr": "(ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING) / (2 * tma_info_core_core_clks)",
+ "MetricGroup": "Mem;MemoryTLB",
+ "MetricName": "tma_info_memory_tlb_page_walks_utilization",
+ "MetricThreshold": "tma_info_memory_tlb_page_walks_utilization > 0.5"
},
{
- "BriefDescription": "Average number of parallel data read requests to external memory",
- "MetricExpr": "UNC_ARB_DAT_OCCUPANCY.RD / UNC_ARB_DAT_OCCUPANCY.RD@cmask\\=1@",
- "MetricGroup": "Mem;MemoryBW;SoC",
- "MetricName": "tma_info_mem_parallel_reads",
- "PublicDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches"
+ "BriefDescription": "STLB (2nd level TLB) data store speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
+ "MetricExpr": "1e3 * DTLB_STORE_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
+ "MetricGroup": "Mem;MemoryTLB",
+ "MetricName": "tma_info_memory_tlb_store_stlb_mpki"
},
{
- "BriefDescription": "Average latency of data read request to external memory (in nanoseconds)",
- "MetricExpr": "(UNC_ARB_TRK_OCCUPANCY.RD + UNC_ARB_DAT_OCCUPANCY.RD) / UNC_ARB_TRK_REQUESTS.RD",
- "MetricGroup": "Mem;MemoryLat;SoC",
- "MetricName": "tma_info_mem_read_latency",
- "PublicDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches. ([RKL+]memory-controller only)"
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-thread",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
+ "MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
+ "MetricName": "tma_info_pipeline_execute"
},
{
- "BriefDescription": "Average latency of all requests to external memory (in Uncore cycles)",
- "MetricExpr": "(UNC_ARB_TRK_OCCUPANCY.ALL + UNC_ARB_DAT_OCCUPANCY.RD) / arb@event\\=0x81\\,umask\\=0x1@",
- "MetricGroup": "Mem;SoC",
- "MetricName": "tma_info_mem_request_latency"
+ "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "tma_retiring * tma_info_thread_slots / cpu@UOPS_RETIRED.SLOTS\\,cmask\\=1@",
+ "MetricGroup": "Pipeline;Ret",
+ "MetricName": "tma_info_pipeline_retire"
},
{
- "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
- "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_fb_full / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))",
- "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
- "MetricName": "tma_info_memory_bandwidth",
- "MetricThreshold": "tma_info_memory_bandwidth > 20",
- "PublicDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_dram_bw_use, tma_mem_bandwidth, tma_sq_full"
+ "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
+ "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / duration_time",
+ "MetricGroup": "Power;Summary",
+ "MetricName": "tma_info_system_average_frequency"
},
{
- "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
- "MetricExpr": "100 * tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_load / max(tma_l1_bound, tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
- "MetricGroup": "Mem;MemoryTLB;Offcore;tma_issueTLB",
- "MetricName": "tma_info_memory_data_tlbs",
- "MetricThreshold": "tma_info_memory_data_tlbs > 20",
- "PublicDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs). Related metrics: tma_dtlb_load, tma_dtlb_store"
+ "BriefDescription": "Average CPU Utilization",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricGroup": "HPC;Summary",
+ "MetricName": "tma_info_system_cpu_utilization"
},
{
- "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)",
- "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound))",
- "MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat",
- "MetricName": "tma_info_memory_latency",
- "MetricThreshold": "tma_info_memory_latency > 20",
- "PublicDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches). Related metrics: tma_l3_hit_latency, tma_mem_latency"
+ "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
+ "MetricExpr": "64 * (arb@event\\=0x81\\,umask\\=0x1@ + arb@event\\=0x84\\,umask\\=0x1@) / 1e6 / duration_time / 1e3",
+ "MetricGroup": "HPC;Mem;MemoryBW;SoC;tma_issueBW",
+ "MetricName": "tma_info_system_dram_bw_use",
+ "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_fb_full, tma_info_bottleneck_memory_bandwidth, tma_mem_bandwidth, tma_sq_full"
},
{
- "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks",
- "MetricExpr": "100 * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
- "MetricGroup": "Bad;BadSpec;BrMispredicts;tma_issueBM",
- "MetricName": "tma_info_mispredictions",
- "MetricThreshold": "tma_info_mispredictions > 20",
- "PublicDescription": "Total pipeline cost of Branch Misprediction related bottlenecks. Related metrics: tma_branch_mispredicts, tma_info_branch_misprediction_cost, tma_mispredicts_resteers"
+ "BriefDescription": "Giga Floating Point Operations Per Second",
+ "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * cpu@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE\\,umask\\=0x18@ + 8 * cpu@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE\\,umask\\=0x60@ + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / 1e9 / duration_time",
+ "MetricGroup": "Cor;Flops;HPC",
+ "MetricName": "tma_info_system_gflops",
+ "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width and AMX engine."
},
{
- "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
- "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
- "MetricGroup": "Mem;MemoryBW;MemoryBound",
- "MetricName": "tma_info_mlp",
- "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)"
+ "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
+ "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u",
+ "MetricGroup": "Branches;OS",
+ "MetricName": "tma_info_system_ipfarbranch",
+ "MetricThreshold": "tma_info_system_ipfarbranch < 1e6"
},
{
- "BriefDescription": "Fraction of branches of other types (not individually covered by other metrics in Info.Branches group)",
- "MetricExpr": "1 - (tma_info_cond_nt + tma_info_cond_tk + tma_info_callret + tma_info_jump)",
- "MetricGroup": "Bad;Branches",
- "MetricName": "tma_info_other_branches"
+ "BriefDescription": "Cycles Per Instruction for the Operating System (OS) Kernel mode",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / INST_RETIRED.ANY_P:k",
+ "MetricGroup": "OS",
+ "MetricName": "tma_info_system_kernel_cpi"
},
{
- "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
- "MetricExpr": "(ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING) / (2 * tma_info_core_clks)",
- "MetricGroup": "Mem;MemoryTLB",
- "MetricName": "tma_info_page_walks_utilization",
- "MetricThreshold": "tma_info_page_walks_utilization > 0.5"
+ "BriefDescription": "Fraction of cycles spent in the Operating System (OS) Kernel mode",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "OS",
+ "MetricName": "tma_info_system_kernel_utilization",
+ "MetricThreshold": "tma_info_system_kernel_utilization > 0.05"
+ },
+ {
+ "BriefDescription": "Average number of parallel data read requests to external memory",
+ "MetricExpr": "UNC_ARB_DAT_OCCUPANCY.RD / UNC_ARB_DAT_OCCUPANCY.RD@cmask\\=1@",
+ "MetricGroup": "Mem;MemoryBW;SoC",
+ "MetricName": "tma_info_system_mem_parallel_reads",
+ "PublicDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches"
+ },
+ {
+ "BriefDescription": "Average latency of data read request to external memory (in nanoseconds)",
+ "MetricExpr": "(UNC_ARB_TRK_OCCUPANCY.RD + UNC_ARB_DAT_OCCUPANCY.RD) / UNC_ARB_TRK_REQUESTS.RD",
+ "MetricGroup": "Mem;MemoryLat;SoC",
+ "MetricName": "tma_info_system_mem_read_latency",
+ "PublicDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches. ([RKL+]memory-controller only)"
+ },
+ {
+ "BriefDescription": "Average latency of all requests to external memory (in Uncore cycles)",
+ "MetricExpr": "(UNC_ARB_TRK_OCCUPANCY.ALL + UNC_ARB_DAT_OCCUPANCY.RD) / arb@event\\=0x81\\,umask\\=0x1@",
+ "MetricGroup": "Mem;SoC",
+ "MetricName": "tma_info_system_mem_request_latency"
},
{
"BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for baseline license level 0",
- "MetricExpr": "CORE_POWER.LVL0_TURBO_LICENSE / tma_info_core_clks",
+ "MetricExpr": "CORE_POWER.LVL0_TURBO_LICENSE / tma_info_core_core_clks",
"MetricGroup": "Power",
- "MetricName": "tma_info_power_license0_utilization",
+ "MetricName": "tma_info_system_power_license0_utilization",
"PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for baseline license level 0. This includes non-AVX codes, SSE, AVX 128-bit, and low-current AVX 256-bit codes."
},
{
"BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 1",
- "MetricExpr": "CORE_POWER.LVL1_TURBO_LICENSE / tma_info_core_clks",
+ "MetricExpr": "CORE_POWER.LVL1_TURBO_LICENSE / tma_info_core_core_clks",
"MetricGroup": "Power",
- "MetricName": "tma_info_power_license1_utilization",
- "MetricThreshold": "tma_info_power_license1_utilization > 0.5",
+ "MetricName": "tma_info_system_power_license1_utilization",
+ "MetricThreshold": "tma_info_system_power_license1_utilization > 0.5",
"PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 1. This includes high current AVX 256-bit instructions as well as low current AVX 512-bit instructions."
},
{
"BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 2 (introduced in SKX)",
- "MetricExpr": "CORE_POWER.LVL2_TURBO_LICENSE / tma_info_core_clks",
+ "MetricExpr": "CORE_POWER.LVL2_TURBO_LICENSE / tma_info_core_core_clks",
"MetricGroup": "Power",
- "MetricName": "tma_info_power_license2_utilization",
- "MetricThreshold": "tma_info_power_license2_utilization > 0.5",
+ "MetricName": "tma_info_system_power_license2_utilization",
+ "MetricThreshold": "tma_info_system_power_license2_utilization > 0.5",
"PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 2 (introduced in SKX). This includes high current AVX 512-bit instructions."
},
{
- "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
- "MetricExpr": "tma_retiring * tma_info_slots / cpu@UOPS_RETIRED.SLOTS\\,cmask\\=1@",
- "MetricGroup": "Pipeline;Ret",
- "MetricName": "tma_info_retire"
+ "BriefDescription": "Fraction of cycles where both hardware Logical Processors were active",
+ "MetricExpr": "(1 - CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_DISTRIBUTED if #SMT_on else 0)",
+ "MetricGroup": "SMT",
+ "MetricName": "tma_info_system_smt_2t_utilization"
},
{
- "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
- "MetricExpr": "TOPDOWN.SLOTS",
- "MetricGroup": "TmaL1;tma_L1_group",
- "MetricName": "tma_info_slots"
+ "BriefDescription": "Average Frequency Utilization relative nominal frequency",
+ "MetricExpr": "tma_info_thread_clks / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricGroup": "Power",
+ "MetricName": "tma_info_system_turbo_utilization"
},
{
- "BriefDescription": "Fraction of Physical Core issue-slots utilized by this Logical Processor",
- "MetricExpr": "(tma_info_slots / (TOPDOWN.SLOTS / 2) if #SMT_on else 1)",
- "MetricGroup": "SMT;TmaL1;tma_L1_group",
- "MetricName": "tma_info_slots_utilization"
+ "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "Pipeline",
+ "MetricName": "tma_info_thread_clks"
},
{
- "BriefDescription": "Fraction of cycles where both hardware Logical Processors were active",
- "MetricExpr": "(1 - CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_DISTRIBUTED if #SMT_on else 0)",
- "MetricGroup": "SMT",
- "MetricName": "tma_info_smt_2t_utilization"
+ "BriefDescription": "Cycles Per Instruction (per Logical Processor)",
+ "MetricExpr": "1 / tma_info_thread_ipc",
+ "MetricGroup": "Mem;Pipeline",
+ "MetricName": "tma_info_thread_cpi"
},
{
- "BriefDescription": "STLB (2nd level TLB) data store speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
- "MetricExpr": "1e3 * DTLB_STORE_MISSES.WALK_COMPLETED / INST_RETIRED.ANY",
- "MetricGroup": "Mem;MemoryTLB",
- "MetricName": "tma_info_store_stlb_mpki"
+ "BriefDescription": "The ratio of Executed- by Issued-Uops",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / UOPS_ISSUED.ANY",
+ "MetricGroup": "Cor;Pipeline",
+ "MetricName": "tma_info_thread_execute_per_issue",
+ "PublicDescription": "The ratio of Executed- by Issued-Uops. Ratio > 1 suggests high rate of uop micro-fusions. Ratio < 1 suggest high rate of \"execute\" at rename stage."
},
{
- "BriefDescription": "Average Frequency Utilization relative nominal frequency",
- "MetricExpr": "tma_info_clks / CPU_CLK_UNHALTED.REF_TSC",
- "MetricGroup": "Power",
- "MetricName": "tma_info_turbo_utilization"
+ "BriefDescription": "Instructions Per Cycle (per Logical Processor)",
+ "MetricExpr": "INST_RETIRED.ANY / tma_info_thread_clks",
+ "MetricGroup": "Ret;Summary",
+ "MetricName": "tma_info_thread_ipc"
+ },
+ {
+ "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
+ "MetricExpr": "TOPDOWN.SLOTS",
+ "MetricGroup": "TmaL1;tma_L1_group",
+ "MetricName": "tma_info_thread_slots"
+ },
+ {
+ "BriefDescription": "Fraction of Physical Core issue-slots utilized by this Logical Processor",
+ "MetricExpr": "(tma_info_thread_slots / (TOPDOWN.SLOTS / 2) if #SMT_on else 1)",
+ "MetricGroup": "SMT;TmaL1;tma_L1_group",
+ "MetricName": "tma_info_thread_slots_utilization"
},
{
"BriefDescription": "Uops Per Instruction",
- "MetricExpr": "tma_retiring * tma_info_slots / INST_RETIRED.ANY",
+ "MetricExpr": "tma_retiring * tma_info_thread_slots / INST_RETIRED.ANY",
"MetricGroup": "Pipeline;Ret;Retire",
- "MetricName": "tma_info_uoppi",
- "MetricThreshold": "tma_info_uoppi > 1.05"
+ "MetricName": "tma_info_thread_uoppi",
+ "MetricThreshold": "tma_info_thread_uoppi > 1.05"
},
{
"BriefDescription": "Instruction per taken branch",
- "MetricExpr": "tma_retiring * tma_info_slots / BR_INST_RETIRED.NEAR_TAKEN",
+ "MetricExpr": "tma_retiring * tma_info_thread_slots / BR_INST_RETIRED.NEAR_TAKEN",
"MetricGroup": "Branches;Fed;FetchBW",
- "MetricName": "tma_info_uptb",
- "MetricThreshold": "tma_info_uptb < 7.5"
+ "MetricName": "tma_info_thread_uptb",
+ "MetricThreshold": "tma_info_thread_uptb < 7.5"
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses",
- "MetricExpr": "ICACHE_64B.IFTAG_STALL / tma_info_clks",
+ "MetricExpr": "ICACHE_64B.IFTAG_STALL / tma_info_thread_clks",
"MetricGroup": "BigFoot;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_itlb_misses",
"MetricThreshold": "tma_itlb_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -1083,7 +1095,7 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled without loads missing the L1 data cache",
- "MetricExpr": "max((CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS) / tma_info_clks, 0)",
+ "MetricExpr": "max((CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS) / tma_info_thread_clks, 0)",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_issueL1;tma_issueMC;tma_memory_bound_group",
"MetricName": "tma_l1_bound",
"MetricThreshold": "tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -1093,7 +1105,7 @@
{
"BriefDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "MEM_LOAD_RETIRED.L2_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) / (MEM_LOAD_RETIRED.L2_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + L1D_PEND_MISS.FB_FULL_PERIODS) * ((CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_clks)",
+ "MetricExpr": "MEM_LOAD_RETIRED.L2_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) / (MEM_LOAD_RETIRED.L2_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + L1D_PEND_MISS.FB_FULL_PERIODS) * ((CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks)",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l2_bound",
"MetricThreshold": "tma_l2_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -1102,7 +1114,7 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
- "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS) / tma_info_clks",
+ "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS) / tma_info_thread_clks",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l3_bound",
"MetricThreshold": "tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -1111,20 +1123,20 @@
},
{
"BriefDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
- "MetricExpr": "17.5 * tma_info_average_frequency * MEM_LOAD_RETIRED.L3_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_clks",
+ "MetricExpr": "17.5 * tma_info_system_average_frequency * MEM_LOAD_RETIRED.L3_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
"MetricGroup": "MemoryLat;TopdownL4;tma_L4_group;tma_issueLat;tma_l3_bound_group",
"MetricName": "tma_l3_hit_latency",
"MetricThreshold": "tma_l3_hit_latency > 0.1 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_info_memory_latency, tma_mem_latency",
+ "PublicDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_info_bottleneck_memory_latency, tma_mem_latency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs)",
- "MetricExpr": "ILD_STALL.LCP / tma_info_clks",
+ "MetricExpr": "ILD_STALL.LCP / tma_info_thread_clks",
"MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB",
"MetricName": "tma_lcp",
"MetricThreshold": "tma_lcp > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
- "PublicDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs). Using proper compiler flags or Intel Compiler by default will certainly avoid this. #Link: Optimization Guide about LCP BKMs. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_dsb_coverage, tma_info_dsb_misses, tma_info_iptb",
+ "PublicDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs). Using proper compiler flags or Intel Compiler by default will certainly avoid this. #Link: Optimization Guide about LCP BKMs. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb",
"ScaleUnit": "100%"
},
{
@@ -1139,7 +1151,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Load operations",
- "MetricExpr": "UOPS_DISPATCHED.PORT_2_3 / (2 * tma_info_core_clks)",
+ "MetricExpr": "UOPS_DISPATCHED.PORT_2_3 / (2 * tma_info_core_core_clks)",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_load_op_utilization",
"MetricThreshold": "tma_load_op_utilization > 0.6",
@@ -1156,7 +1168,7 @@
},
{
"BriefDescription": "This metric estimates the fraction of cycles where the Second-level TLB (STLB) was missed by load accesses, performing a hardware page walk",
- "MetricExpr": "DTLB_LOAD_MISSES.WALK_ACTIVE / tma_info_clks",
+ "MetricExpr": "DTLB_LOAD_MISSES.WALK_ACTIVE / tma_info_thread_clks",
"MetricGroup": "MemoryTLB;TopdownL5;tma_L5_group;tma_dtlb_load_group",
"MetricName": "tma_load_stlb_miss",
"MetricThreshold": "tma_load_stlb_miss > 0.05 & (tma_dtlb_load > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
@@ -1165,7 +1177,7 @@
{
"BriefDescription": "This metric represents fraction of cycles the CPU spent handling cache misses due to lock operations",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(16 * max(0, MEM_INST_RETIRED.LOCK_LOADS - L2_RQSTS.ALL_RFO) + MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES * (10 * L2_RQSTS.RFO_HIT + min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO))) / tma_info_clks",
+ "MetricExpr": "(16 * max(0, MEM_INST_RETIRED.LOCK_LOADS - L2_RQSTS.ALL_RFO) + MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES * (10 * L2_RQSTS.RFO_HIT + min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO))) / tma_info_thread_clks",
"MetricGroup": "Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_l1_bound_group",
"MetricName": "tma_lock_latency",
"MetricThreshold": "tma_lock_latency > 0.2 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -1174,10 +1186,10 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to LSD (Loop Stream Detector) unit",
- "MetricExpr": "(LSD.CYCLES_ACTIVE - LSD.CYCLES_OK) / tma_info_core_clks / 2",
+ "MetricExpr": "(LSD.CYCLES_ACTIVE - LSD.CYCLES_OK) / tma_info_core_core_clks / 2",
"MetricGroup": "FetchBW;LSD;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_lsd",
- "MetricThreshold": "tma_lsd > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 5 > 0.35)",
+ "MetricThreshold": "tma_lsd > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 5 > 0.35)",
"PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to LSD (Loop Stream Detector) unit. LSD typically does well sustaining Uop supply. However; in some rare cases; optimal uop-delivery could not be reached for small loops whose size (in terms of number of uops) does not suit well the LSD structure.",
"ScaleUnit": "100%"
},
@@ -1193,20 +1205,20 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM)",
- "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@) / tma_info_clks",
+ "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@) / tma_info_thread_clks",
"MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
"MetricName": "tma_mem_bandwidth",
"MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_dram_bw_use, tma_info_memory_bandwidth, tma_sq_full",
+ "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_bottleneck_memory_bandwidth, tma_info_system_dram_bw_use, tma_sq_full",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM)",
- "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD) / tma_info_clks - tma_mem_bandwidth",
+ "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD) / tma_info_thread_clks - tma_mem_bandwidth",
"MetricGroup": "MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
"MetricName": "tma_mem_latency",
"MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_info_memory_latency, tma_l3_hit_latency",
+ "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_info_bottleneck_memory_latency, tma_l3_hit_latency",
"ScaleUnit": "100%"
},
{
@@ -1221,6 +1233,7 @@
},
{
"BriefDescription": "This metric represents fraction of slots where the CPU was retiring memory operations -- uops for memory load or store accesses.",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "tma_light_operations * MEM_INST_RETIRED.ANY / INST_RETIRED.ANY",
"MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
"MetricName": "tma_memory_operations",
@@ -1229,7 +1242,7 @@
},
{
"BriefDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit",
- "MetricExpr": "tma_retiring * tma_info_slots / UOPS_ISSUED.ANY * IDQ.MS_UOPS / tma_info_slots",
+ "MetricExpr": "tma_retiring * tma_info_thread_slots / UOPS_ISSUED.ANY * IDQ.MS_UOPS / tma_info_thread_slots",
"MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_heavy_operations_group;tma_issueMC;tma_issueMS",
"MetricName": "tma_microcode_sequencer",
"MetricThreshold": "tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1",
@@ -1238,28 +1251,28 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers as a result of Branch Misprediction at execution stage",
- "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT) * INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_clks",
+ "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT) * INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_thread_clks",
"MetricGroup": "BadSpec;BrMispredicts;TopdownL4;tma_L4_group;tma_branch_resteers_group;tma_issueBM",
"MetricName": "tma_mispredicts_resteers",
"MetricThreshold": "tma_mispredicts_resteers > 0.05 & (tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15))",
- "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers as a result of Branch Misprediction at execution stage. Sample with: INT_MISC.CLEAR_RESTEER_CYCLES. Related metrics: tma_branch_mispredicts, tma_info_branch_misprediction_cost, tma_info_mispredictions",
+ "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers as a result of Branch Misprediction at execution stage. Sample with: INT_MISC.CLEAR_RESTEER_CYCLES. Related metrics: tma_branch_mispredicts, tma_info_bad_spec_branch_misprediction_cost, tma_info_bottleneck_mispredictions",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline)",
- "MetricExpr": "(IDQ.MITE_CYCLES_ANY - IDQ.MITE_CYCLES_OK) / tma_info_core_clks / 2",
+ "MetricExpr": "(IDQ.MITE_CYCLES_ANY - IDQ.MITE_CYCLES_OK) / tma_info_core_core_clks / 2",
"MetricGroup": "DSBmiss;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_mite",
- "MetricThreshold": "tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 5 > 0.35)",
+ "MetricThreshold": "tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 5 > 0.35)",
"PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline). This pipeline is used for code that was not pre-cached in the DSB or LSD. For example; inefficiencies due to asymmetric decoders; use of long immediate or LCP can manifest as MITE fetch bandwidth bottleneck. Sample with: FRONTEND_RETIRED.ANY_DSB_MISS",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of cycles where (only) 4 uops were delivered by the MITE pipeline",
- "MetricExpr": "(cpu@IDQ.MITE_UOPS\\,cmask\\=4@ - cpu@IDQ.MITE_UOPS\\,cmask\\=5@) / tma_info_clks",
+ "MetricExpr": "(cpu@IDQ.MITE_UOPS\\,cmask\\=4@ - cpu@IDQ.MITE_UOPS\\,cmask\\=5@) / tma_info_thread_clks",
"MetricGroup": "DSBmiss;FetchBW;TopdownL4;tma_L4_group;tma_mite_group",
"MetricName": "tma_mite_4wide",
- "MetricThreshold": "tma_mite_4wide > 0.05 & (tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 5 > 0.35))",
+ "MetricThreshold": "tma_mite_4wide > 0.05 & (tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 5 > 0.35))",
"ScaleUnit": "100%"
},
{
@@ -1273,7 +1286,7 @@
},
{
"BriefDescription": "This metric estimates the fraction of cycles when the CPU was stalled due to switches of uop delivery to the Microcode Sequencer (MS)",
- "MetricExpr": "3 * IDQ.MS_SWITCHES / tma_info_clks",
+ "MetricExpr": "3 * IDQ.MS_SWITCHES / tma_info_thread_clks",
"MetricGroup": "FetchLat;MicroSeq;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueMC;tma_issueMS;tma_issueMV;tma_issueSO",
"MetricName": "tma_ms_switches",
"MetricThreshold": "tma_ms_switches > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -1282,7 +1295,7 @@
},
{
"BriefDescription": "This metric represents fraction of slots where the CPU was retiring NOP (no op) instructions",
- "MetricExpr": "tma_light_operations * INST_RETIRED.NOP / (tma_retiring * tma_info_slots)",
+ "MetricExpr": "tma_light_operations * INST_RETIRED.NOP / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
"MetricName": "tma_nop_instructions",
"MetricThreshold": "tma_nop_instructions > 0.1 & tma_light_operations > 0.6",
@@ -1291,6 +1304,7 @@
},
{
"BriefDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_memory_operations + tma_branch_instructions + tma_nop_instructions))",
"MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
"MetricName": "tma_other_light_ops",
@@ -1300,7 +1314,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 0 ([SNB+] ALU; [HSW+] ALU and 2nd branch)",
- "MetricExpr": "UOPS_DISPATCHED.PORT_0 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED.PORT_0 / tma_info_core_core_clks",
"MetricGroup": "Compute;TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_0",
"MetricThreshold": "tma_port_0 > 0.6",
@@ -1309,7 +1323,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 1 (ALU)",
- "MetricExpr": "UOPS_DISPATCHED.PORT_1 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED.PORT_1 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_1",
"MetricThreshold": "tma_port_1 > 0.6",
@@ -1318,7 +1332,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 5 ([SNB+] Branches and ALU; [HSW+] ALU)",
- "MetricExpr": "UOPS_DISPATCHED.PORT_5 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED.PORT_5 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_5",
"MetricThreshold": "tma_port_5 > 0.6",
@@ -1327,7 +1341,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+]Primary Branch and simple ALU)",
- "MetricExpr": "UOPS_DISPATCHED.PORT_6 / tma_info_core_clks",
+ "MetricExpr": "UOPS_DISPATCHED.PORT_6 / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_6",
"MetricThreshold": "tma_port_6 > 0.6",
@@ -1336,7 +1350,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles the CPU performance was potentially limited due to Core computation issues (non divider-related)",
- "MetricExpr": "((cpu@EXE_ACTIVITY.3_PORTS_UTIL\\,umask\\=0x80@ + tma_serializing_operation * (CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY) + (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL)) / tma_info_clks if ARITH.DIVIDER_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY else (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL) / tma_info_clks)",
+ "MetricExpr": "((cpu@EXE_ACTIVITY.3_PORTS_UTIL\\,umask\\=0x80@ + tma_serializing_operation * (CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY) + (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL)) / tma_info_thread_clks if ARITH.DIVIDER_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY else (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL) / tma_info_thread_clks)",
"MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group",
"MetricName": "tma_ports_utilization",
"MetricThreshold": "tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
@@ -1345,7 +1359,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed no uops on any execution port (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "cpu@EXE_ACTIVITY.3_PORTS_UTIL\\,umask\\=0x80@ / tma_info_clks + tma_serializing_operation * (CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY) / tma_info_clks",
+ "MetricExpr": "cpu@EXE_ACTIVITY.3_PORTS_UTIL\\,umask\\=0x80@ / tma_info_thread_clks + tma_serializing_operation * (CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY) / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_0",
"MetricThreshold": "tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1354,7 +1368,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where the CPU executed total of 1 uop per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "EXE_ACTIVITY.1_PORTS_UTIL / tma_info_clks",
+ "MetricExpr": "EXE_ACTIVITY.1_PORTS_UTIL / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issueL1;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_1",
"MetricThreshold": "tma_ports_utilized_1 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1363,7 +1377,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "EXE_ACTIVITY.2_PORTS_UTIL / tma_info_clks",
+ "MetricExpr": "EXE_ACTIVITY.2_PORTS_UTIL / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_2",
"MetricThreshold": "tma_ports_utilized_2 > 0.15 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1372,7 +1386,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 3 or more uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "UOPS_EXECUTED.CYCLES_GE_3 / tma_info_clks",
+ "MetricExpr": "UOPS_EXECUTED.CYCLES_GE_3 / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_3m",
"MetricThreshold": "tma_ports_utilized_3m > 0.7 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1381,17 +1395,18 @@
},
{
"BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
- "MetricExpr": "topdown\\-retiring / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0 * tma_info_slots",
- "MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
+ "DefaultMetricgroupName": "TopdownL1",
+ "MetricExpr": "topdown\\-retiring / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0 * tma_info_thread_slots",
+ "MetricGroup": "Default;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1",
- "MetricgroupNoGroup": "TopdownL1",
+ "MetricgroupNoGroup": "TopdownL1;Default",
"PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. Sample with: UOPS_RETIRED.SLOTS",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU issue-pipeline was stalled due to serializing operations",
- "MetricExpr": "RESOURCE_STALLS.SCOREBOARD / tma_info_clks",
+ "MetricExpr": "RESOURCE_STALLS.SCOREBOARD / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL5;tma_L5_group;tma_issueSO;tma_ports_utilized_0_group",
"MetricName": "tma_serializing_operation",
"MetricThreshold": "tma_serializing_operation > 0.1 & (tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)))",
@@ -1400,7 +1415,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to PAUSE Instructions",
- "MetricExpr": "140 * MISC_RETIRED.PAUSE_INST / tma_info_clks",
+ "MetricExpr": "140 * MISC_RETIRED.PAUSE_INST / tma_info_thread_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_serializing_operation_group",
"MetricName": "tma_slow_pause",
"MetricThreshold": "tma_slow_pause > 0.05 & (tma_serializing_operation > 0.1 & (tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))))",
@@ -1409,7 +1424,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles handling memory load split accesses - load that cross 64-byte cache line boundary",
- "MetricExpr": "tma_info_load_miss_real_latency * LD_BLOCKS.NO_SR / tma_info_clks",
+ "MetricExpr": "tma_info_memory_load_miss_real_latency * LD_BLOCKS.NO_SR / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_split_loads",
"MetricThreshold": "tma_split_loads > 0.2 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -1418,7 +1433,7 @@
},
{
"BriefDescription": "This metric represents rate of split store accesses",
- "MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_clks",
+ "MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_core_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group",
"MetricName": "tma_split_stores",
"MetricThreshold": "tma_split_stores > 0.2 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -1427,16 +1442,16 @@
},
{
"BriefDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors)",
- "MetricExpr": "L1D_PEND_MISS.L2_STALL / tma_info_clks",
+ "MetricExpr": "L1D_PEND_MISS.L2_STALL / tma_info_thread_clks",
"MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueBW;tma_l3_bound_group",
"MetricName": "tma_sq_full",
"MetricThreshold": "tma_sq_full > 0.3 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_fb_full, tma_info_dram_bw_use, tma_info_memory_bandwidth, tma_mem_bandwidth",
+ "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_fb_full, tma_info_bottleneck_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric estimates how often CPU was stalled due to RFO store memory accesses; RFO store issue a read-for-ownership request before the write",
- "MetricExpr": "EXE_ACTIVITY.BOUND_ON_STORES / tma_info_clks",
+ "MetricExpr": "EXE_ACTIVITY.BOUND_ON_STORES / tma_info_thread_clks",
"MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_store_bound",
"MetricThreshold": "tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -1445,7 +1460,7 @@
},
{
"BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores",
- "MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_clks",
+ "MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_store_fwd_blk",
"MetricThreshold": "tma_store_fwd_blk > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -1454,7 +1469,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles the CPU spent handling L1D store misses",
- "MetricExpr": "(L2_RQSTS.RFO_HIT * 10 * (1 - MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES) + (1 - MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES) * min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO)) / tma_info_clks",
+ "MetricExpr": "(L2_RQSTS.RFO_HIT * 10 * (1 - MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES) + (1 - MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES) * min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO)) / tma_info_thread_clks",
"MetricGroup": "MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_issueSL;tma_store_bound_group",
"MetricName": "tma_store_latency",
"MetricThreshold": "tma_store_latency > 0.1 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -1463,7 +1478,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Store operations",
- "MetricExpr": "(UOPS_DISPATCHED.PORT_4_9 + UOPS_DISPATCHED.PORT_7_8) / (4 * tma_info_core_clks)",
+ "MetricExpr": "(UOPS_DISPATCHED.PORT_4_9 + UOPS_DISPATCHED.PORT_7_8) / (4 * tma_info_core_core_clks)",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_store_op_utilization",
"MetricThreshold": "tma_store_op_utilization > 0.6",
@@ -1480,7 +1495,7 @@
},
{
"BriefDescription": "This metric estimates the fraction of cycles where the STLB was missed by store accesses, performing a hardware page walk",
- "MetricExpr": "DTLB_STORE_MISSES.WALK_ACTIVE / tma_info_core_clks",
+ "MetricExpr": "DTLB_STORE_MISSES.WALK_ACTIVE / tma_info_core_core_clks",
"MetricGroup": "MemoryTLB;TopdownL5;tma_L5_group;tma_dtlb_store_group",
"MetricName": "tma_store_stlb_miss",
"MetricThreshold": "tma_store_stlb_miss > 0.05 & (tma_dtlb_store > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
@@ -1488,7 +1503,7 @@
},
{
"BriefDescription": "This metric estimates how often CPU was stalled due to Streaming store memory accesses; Streaming store optimize out a read request required by RFO stores",
- "MetricExpr": "9 * OCR.STREAMING_WR.ANY_RESPONSE / tma_info_clks",
+ "MetricExpr": "9 * OCR.STREAMING_WR.ANY_RESPONSE / tma_info_thread_clks",
"MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueSmSt;tma_store_bound_group",
"MetricName": "tma_streaming_stores",
"MetricThreshold": "tma_streaming_stores > 0.2 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -1497,7 +1512,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to new branch address clears",
- "MetricExpr": "10 * BACLEARS.ANY / tma_info_clks",
+ "MetricExpr": "10 * BACLEARS.ANY / tma_info_thread_clks",
"MetricGroup": "BigFoot;FetchLat;TopdownL4;tma_L4_group;tma_branch_resteers_group",
"MetricName": "tma_unknown_branches",
"MetricThreshold": "tma_unknown_branches > 0.05 & (tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15))",
@@ -1515,28 +1530,28 @@
},
{
"BriefDescription": "Percentage of cycles in aborted transactions.",
- "MetricExpr": "max(cpu@cycles\\-t@ - cpu@cycles\\-ct@, 0) / cycles",
+ "MetricExpr": "(max(cycles\\-t - cycles\\-ct, 0) / cycles if has_event(cycles\\-t) else 0)",
"MetricGroup": "transaction",
"MetricName": "tsx_aborted_cycles",
"ScaleUnit": "100%"
},
{
"BriefDescription": "Number of cycles within a transaction divided by the number of elisions.",
- "MetricExpr": "cpu@cycles\\-t@ / cpu@el\\-start@",
+ "MetricExpr": "(cycles\\-t / el\\-start if has_event(cycles\\-t) else 0)",
"MetricGroup": "transaction",
"MetricName": "tsx_cycles_per_elision",
"ScaleUnit": "1cycles / elision"
},
{
"BriefDescription": "Number of cycles within a transaction divided by the number of transactions.",
- "MetricExpr": "cpu@cycles\\-t@ / cpu@tx\\-start@",
+ "MetricExpr": "(cycles\\-t / tx\\-start if has_event(cycles\\-t) else 0)",
"MetricGroup": "transaction",
"MetricName": "tsx_cycles_per_transaction",
"ScaleUnit": "1cycles / transaction"
},
{
"BriefDescription": "Percentage of cycles within a transaction region.",
- "MetricExpr": "cpu@cycles\\-t@ / cycles",
+ "MetricExpr": "(cycles\\-t / cycles if has_event(cycles\\-t) else 0)",
"MetricGroup": "transaction",
"MetricName": "tsx_transactional_cycles",
"ScaleUnit": "100%"
diff --git a/tools/perf/pmu-events/empty-pmu-events.c b/tools/perf/pmu-events/empty-pmu-events.c
index e74defb5284f..a630c617e879 100644
--- a/tools/perf/pmu-events/empty-pmu-events.c
+++ b/tools/perf/pmu-events/empty-pmu-events.c
@@ -420,3 +420,8 @@ int pmu_for_each_sys_metric(pmu_metric_iter_fn fn __maybe_unused, void *data __m
{
return 0;
}
+
+const char *describe_metricgroup(const char *group __maybe_unused)
+{
+ return NULL;
+}
diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py
index f57a8f274025..82c42c46d886 100755
--- a/tools/perf/pmu-events/jevents.py
+++ b/tools/perf/pmu-events/jevents.py
@@ -37,6 +37,8 @@ _pending_metrics = []
_pending_metrics_tblname = None
# Global BigCString shared by all structures.
_bcs = None
+# Map from the name of a metric group to a description of the group.
+_metricgroups = {}
# Order specific JsonEvent attributes will be visited.
_json_event_attributes = [
# cmp_sevent related attributes.
@@ -51,9 +53,9 @@ _json_event_attributes = [
# Attributes that are in pmu_metric rather than pmu_event.
_json_metric_attributes = [
- 'metric_name', 'metric_group', 'metric_expr', 'metric_threshold', 'desc',
- 'long_desc', 'unit', 'compat', 'metricgroup_no_group', 'aggr_mode',
- 'event_grouping'
+ 'pmu', 'metric_name', 'metric_group', 'metric_expr', 'metric_threshold',
+ 'desc', 'long_desc', 'unit', 'compat', 'metricgroup_no_group',
+ 'default_metricgroup_name', 'aggr_mode', 'event_grouping'
]
# Attributes that are bools or enum int values, encoded as '0', '1',...
_json_enum_attributes = ['aggr_mode', 'deprecated', 'event_grouping', 'perpkg']
@@ -305,6 +307,7 @@ class JsonEvent:
self.metric_name = jd.get('MetricName')
self.metric_group = jd.get('MetricGroup')
self.metricgroup_no_group = jd.get('MetricgroupNoGroup')
+ self.default_metricgroup_name = jd.get('DefaultMetricgroupName')
self.event_grouping = convert_metric_constraint(jd.get('MetricConstraint'))
self.metric_expr = None
if 'MetricExpr' in jd:
@@ -391,11 +394,11 @@ def read_json_events(path: str, topic: str) -> Sequence[JsonEvent]:
except BaseException as err:
print(f"Exception processing {path}")
raise
- metrics: list[Tuple[str, metric.Expression]] = []
+ metrics: list[Tuple[str, str, metric.Expression]] = []
for event in events:
event.topic = topic
if event.metric_name and '-' not in event.metric_name:
- metrics.append((event.metric_name, event.metric_expr))
+ metrics.append((event.pmu, event.metric_name, event.metric_expr))
updates = metric.RewriteMetricsInTermsOfOthers(metrics)
if updates:
for event in events:
@@ -512,6 +515,17 @@ def preprocess_one_file(parents: Sequence[str], item: os.DirEntry) -> None:
if not item.is_file() or not item.name.endswith('.json'):
return
+ if item.name == 'metricgroups.json':
+ metricgroup_descriptions = json.load(open(item.path))
+ for mgroup in metricgroup_descriptions:
+ assert len(mgroup) > 1, parents
+ description = f"{metricgroup_descriptions[mgroup]}\\000"
+ mgroup = f"{mgroup}\\000"
+ _bcs.add(mgroup)
+ _bcs.add(description)
+ _metricgroups[mgroup] = description
+ return
+
topic = get_topic(item.name)
for event in read_json_events(item.path, topic):
if event.name:
@@ -548,7 +562,7 @@ def process_one_file(parents: Sequence[str], item: os.DirEntry) -> None:
# Ignore other directories. If the file name does not have a .json
# extension, ignore it. It could be a readme.txt for instance.
- if not item.is_file() or not item.name.endswith('.json'):
+ if not item.is_file() or not item.name.endswith('.json') or item.name == 'metricgroups.json':
return
add_events_table_entries(item, get_topic(item.name))
@@ -911,6 +925,38 @@ int pmu_for_each_sys_metric(pmu_metric_iter_fn fn, void *data)
}
""")
+def print_metricgroups() -> None:
+ _args.output_file.write("""
+static const int metricgroups[][2] = {
+""")
+ for mgroup in sorted(_metricgroups):
+ description = _metricgroups[mgroup]
+ _args.output_file.write(
+ f'\t{{ {_bcs.offsets[mgroup]}, {_bcs.offsets[description]} }}, /* {mgroup} => {description} */\n'
+ )
+ _args.output_file.write("""
+};
+
+const char *describe_metricgroup(const char *group)
+{
+ int low = 0, high = (int)ARRAY_SIZE(metricgroups) - 1;
+
+ while (low <= high) {
+ int mid = (low + high) / 2;
+ const char *mgroup = &big_c_string[metricgroups[mid][0]];
+ int cmp = strcmp(mgroup, group);
+
+ if (cmp == 0) {
+ return &big_c_string[metricgroups[mid][1]];
+ } else if (cmp < 0) {
+ low = mid + 1;
+ } else {
+ high = mid - 1;
+ }
+ }
+ return NULL;
+}
+""")
def main() -> None:
global _args
@@ -953,7 +999,7 @@ such as "arm/cortex-a34".''',
_args = ap.parse_args()
_args.output_file.write("""
-#include "pmu-events/pmu-events.h"
+#include <pmu-events/pmu-events.h>
#include "util/header.h"
#include "util/pmu.h"
#include <string.h>
@@ -993,7 +1039,7 @@ struct compact_pmu_event {
print_mapping_table(archs)
print_system_mapping_table()
-
+ print_metricgroups()
if __name__ == '__main__':
main()
diff --git a/tools/perf/pmu-events/metric.py b/tools/perf/pmu-events/metric.py
index 8ec0ba884673..85a3545f5b6a 100644
--- a/tools/perf/pmu-events/metric.py
+++ b/tools/perf/pmu-events/metric.py
@@ -408,6 +408,12 @@ def source_count(event: Event) -> Function:
return Function('source_count', event)
+def has_event(event: Event) -> Function:
+ # pylint: disable=redefined-builtin
+ # pylint: disable=invalid-name
+ return Function('has_event', event)
+
+
class Metric:
"""An individual metric that will specifiable on the perf command line."""
groups: Set[str]
@@ -539,7 +545,7 @@ def ParsePerfJson(orig: str) -> Expression:
r'Event(r"\1")', py)
py = re.sub(r'#Event\(r"([^"]*)"\)', r'Literal("#\1")', py)
py = re.sub(r'([0-9]+)Event\(r"(e[0-9]+)"\)', r'\1\2', py)
- keywords = ['if', 'else', 'min', 'max', 'd_ratio', 'source_count']
+ keywords = ['if', 'else', 'min', 'max', 'd_ratio', 'source_count', 'has_event']
for kw in keywords:
py = re.sub(rf'Event\(r"{kw}"\)', kw, py)
@@ -552,28 +558,34 @@ def ParsePerfJson(orig: str) -> Expression:
return _Constify(eval(compile(parsed, orig, 'eval')))
-def RewriteMetricsInTermsOfOthers(metrics: List[Tuple[str, Expression]]
- )-> Dict[str, Expression]:
+def RewriteMetricsInTermsOfOthers(metrics: List[Tuple[str, str, Expression]]
+ )-> Dict[Tuple[str, str], Expression]:
"""Shorten metrics by rewriting in terms of others.
Args:
- metrics (list): pairs of metric names and their expressions.
+ metrics (list): pmus, metric names and their expressions.
Returns:
- Dict: mapping from a metric name to a shortened expression.
+ Dict: mapping from a pmu, metric name pair to a shortened expression.
"""
- updates: Dict[str, Expression] = dict()
- for outer_name, outer_expression in metrics:
+ updates: Dict[Tuple[str, str], Expression] = dict()
+ for outer_pmu, outer_name, outer_expression in metrics:
+ if outer_pmu is None:
+ outer_pmu = 'cpu'
updated = outer_expression
while True:
- for inner_name, inner_expression in metrics:
+ for inner_pmu, inner_name, inner_expression in metrics:
+ if inner_pmu is None:
+ inner_pmu = 'cpu'
+ if inner_pmu.lower() != outer_pmu.lower():
+ continue
if inner_name.lower() == outer_name.lower():
continue
- if inner_name in updates:
- inner_expression = updates[inner_name]
+ if (inner_pmu, inner_name) in updates:
+ inner_expression = updates[(inner_pmu, inner_name)]
updated = updated.Substitute(inner_name, inner_expression)
if updated.Equals(outer_expression):
break
- if outer_name in updates and updated.Equals(updates[outer_name]):
+ if (outer_pmu, outer_name) in updates and updated.Equals(updates[(outer_pmu, outer_name)]):
break
- updates[outer_name] = updated
+ updates[(outer_pmu, outer_name)] = updated
return updates
diff --git a/tools/perf/pmu-events/metric_test.py b/tools/perf/pmu-events/metric_test.py
index 40a3c7d8b2bc..ee22ff43ddd7 100755
--- a/tools/perf/pmu-events/metric_test.py
+++ b/tools/perf/pmu-events/metric_test.py
@@ -158,9 +158,9 @@ class TestMetricExpressions(unittest.TestCase):
def test_RewriteMetricsInTermsOfOthers(self):
Expression.__eq__ = lambda e1, e2: e1.Equals(e2)
- before = [('m1', ParsePerfJson('a + b + c + d')),
- ('m2', ParsePerfJson('a + b + c'))]
- after = {'m1': ParsePerfJson('m2 + d')}
+ before = [('cpu', 'm1', ParsePerfJson('a + b + c + d')),
+ ('cpu', 'm2', ParsePerfJson('a + b + c'))]
+ after = {('cpu', 'm1'): ParsePerfJson('m2 + d')}
self.assertEqual(RewriteMetricsInTermsOfOthers(before), after)
Expression.__eq__ = None
diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h
index 80349685cf4d..caf59f23cd64 100644
--- a/tools/perf/pmu-events/pmu-events.h
+++ b/tools/perf/pmu-events/pmu-events.h
@@ -51,6 +51,7 @@ struct pmu_event {
};
struct pmu_metric {
+ const char *pmu;
const char *metric_name;
const char *metric_group;
const char *metric_expr;
@@ -60,6 +61,7 @@ struct pmu_metric {
const char *desc;
const char *long_desc;
const char *metricgroup_no_group;
+ const char *default_metricgroup_name;
enum aggr_mode_class aggr_mode;
enum metric_event_groups event_grouping;
};
@@ -92,4 +94,6 @@ const struct pmu_metrics_table *find_sys_metrics_table(const char *name);
int pmu_for_each_sys_event(pmu_event_iter_fn fn, void *data);
int pmu_for_each_sys_metric(pmu_metric_iter_fn fn, void *data);
+const char *describe_metricgroup(const char *group);
+
#endif
diff --git a/tools/perf/scripts/python/Perf-Trace-Util/Context.c b/tools/perf/scripts/python/Perf-Trace-Util/Context.c
index 53b1587db403..3954bd1587ce 100644
--- a/tools/perf/scripts/python/Perf-Trace-Util/Context.c
+++ b/tools/perf/scripts/python/Perf-Trace-Util/Context.c
@@ -100,8 +100,8 @@ static PyObject *perf_sample_insn(PyObject *obj, PyObject *args)
if (!c)
return NULL;
- if (c->sample->ip && !c->sample->insn_len && c->al->thread->maps) {
- struct machine *machine = maps__machine(c->al->thread->maps);
+ if (c->sample->ip && !c->sample->insn_len && thread__maps(c->al->thread)) {
+ struct machine *machine = maps__machine(thread__maps(c->al->thread));
script_fetch_insn(c->sample, c->al->thread, machine);
}
diff --git a/tools/perf/scripts/python/arm-cs-trace-disasm.py b/tools/perf/scripts/python/arm-cs-trace-disasm.py
index 4339692a8d0b..d59ff53f1d94 100755
--- a/tools/perf/scripts/python/arm-cs-trace-disasm.py
+++ b/tools/perf/scripts/python/arm-cs-trace-disasm.py
@@ -9,7 +9,6 @@
from __future__ import print_function
import os
from os import path
-import sys
import re
from subprocess import *
from optparse import OptionParser, make_option
diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c
index 56fba08a3037..61186d0d1cfa 100644
--- a/tools/perf/tests/attr.c
+++ b/tools/perf/tests/attr.c
@@ -34,7 +34,7 @@
#include "event.h"
#include "util.h"
#include "tests.h"
-#include "pmu.h"
+#include "pmus.h"
#define ENV "PERF_TEST_ATTR"
@@ -185,8 +185,15 @@ static int test__attr(struct test_suite *test __maybe_unused, int subtest __mayb
char path_dir[PATH_MAX];
char *exec_path;
- if (perf_pmu__has_hybrid())
+ if (perf_pmus__num_core_pmus() > 1) {
+ /*
+ * TODO: Attribute tests hard code the PMU type. If there are >1
+ * core PMU then each PMU will have a different type whic
+ * requires additional support.
+ */
+ pr_debug("Skip test on hybrid systems");
return TEST_SKIP;
+ }
/* First try development tree tests. */
if (!lstat("./tests", &st))
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index b89d69afcef0..1f6557ce3b0a 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -88,15 +88,13 @@ static struct test_suite *generic_tests[] = {
&suite__bpf,
&suite__thread_map_synthesize,
&suite__thread_map_remove,
- &suite__cpu_map_synthesize,
+ &suite__cpu_map,
&suite__synthesize_stat_config,
&suite__synthesize_stat,
&suite__synthesize_stat_round,
&suite__event_update,
&suite__event_times,
&suite__backward_ring_buffer,
- &suite__cpu_map_print,
- &suite__cpu_map_merge,
&suite__sdt_event,
&suite__is_printable_array,
&suite__bitmap_print,
@@ -544,7 +542,6 @@ int cmd_test(int argc, const char **argv)
return run_workload(workload, argc, argv);
symbol_conf.priv_size = sizeof(int);
- symbol_conf.sort_by_name = true;
symbol_conf.try_vmlinux_path = true;
if (symbol__init(NULL) < 0)
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index efe026a35010..ed3815163d1b 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -241,6 +241,7 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
pr_debug("Reading object code for memory address: %#"PRIx64"\n", addr);
+ addr_location__init(&al);
if (!thread__find_map(thread, cpumode, addr, &al) || !map__dso(al.map)) {
if (cpumode == PERF_RECORD_MISC_HYPERVISOR) {
pr_debug("Hypervisor address can not be resolved - skipping\n");
@@ -269,7 +270,7 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
len = map__end(al.map) - addr;
/* Read the object code using perf */
- ret_len = dso__data_read_offset(dso, maps__machine(thread->maps),
+ ret_len = dso__data_read_offset(dso, maps__machine(thread__maps(thread)),
al.addr, buf1, len);
if (ret_len != len) {
pr_debug("dso__data_read_offset failed\n");
@@ -366,7 +367,7 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
}
pr_debug("Bytes read match those read by objdump\n");
out:
- map__put(al.map);
+ addr_location__exit(&al);
return err;
}
@@ -720,7 +721,6 @@ out_err:
evlist__delete(evlist);
perf_cpu_map__put(cpus);
perf_thread_map__put(threads);
- machine__delete_threads(machine);
machine__delete(machine);
return err;
diff --git a/tools/perf/tests/cpumap.c b/tools/perf/tests/cpumap.c
index b1a924314e09..7730fc2ab40b 100644
--- a/tools/perf/tests/cpumap.c
+++ b/tools/perf/tests/cpumap.c
@@ -171,6 +171,92 @@ static int test__cpu_map_merge(struct test_suite *test __maybe_unused, int subte
return 0;
}
-DEFINE_SUITE("Synthesize cpu map", cpu_map_synthesize);
-DEFINE_SUITE("Print cpu map", cpu_map_print);
-DEFINE_SUITE("Merge cpu map", cpu_map_merge);
+static int __test__cpu_map_intersect(const char *lhs, const char *rhs, int nr, const char *expected)
+{
+ struct perf_cpu_map *a = perf_cpu_map__new(lhs);
+ struct perf_cpu_map *b = perf_cpu_map__new(rhs);
+ struct perf_cpu_map *c = perf_cpu_map__intersect(a, b);
+ char buf[100];
+
+ TEST_ASSERT_EQUAL("failed to intersect map: bad nr", perf_cpu_map__nr(c), nr);
+ cpu_map__snprint(c, buf, sizeof(buf));
+ TEST_ASSERT_VAL("failed to intersect map: bad result", !strcmp(buf, expected));
+ perf_cpu_map__put(a);
+ perf_cpu_map__put(b);
+ perf_cpu_map__put(c);
+ return 0;
+}
+
+static int test__cpu_map_intersect(struct test_suite *test __maybe_unused,
+ int subtest __maybe_unused)
+{
+ int ret;
+
+ ret = __test__cpu_map_intersect("4,2,1", "4,5,7", 1, "4");
+ if (ret)
+ return ret;
+ ret = __test__cpu_map_intersect("1-8", "6-9", 3, "6-8");
+ if (ret)
+ return ret;
+ ret = __test__cpu_map_intersect("1-8,12-20", "6-9,15", 4, "6-8,15");
+ if (ret)
+ return ret;
+ ret = __test__cpu_map_intersect("4,2,1", "1", 1, "1");
+ if (ret)
+ return ret;
+ ret = __test__cpu_map_intersect("1", "4,2,1", 1, "1");
+ if (ret)
+ return ret;
+ ret = __test__cpu_map_intersect("1", "1", 1, "1");
+ return ret;
+}
+
+static int test__cpu_map_equal(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
+{
+ struct perf_cpu_map *any = perf_cpu_map__dummy_new();
+ struct perf_cpu_map *one = perf_cpu_map__new("1");
+ struct perf_cpu_map *two = perf_cpu_map__new("2");
+ struct perf_cpu_map *empty = perf_cpu_map__intersect(one, two);
+ struct perf_cpu_map *pair = perf_cpu_map__new("1-2");
+ struct perf_cpu_map *tmp;
+ struct perf_cpu_map *maps[] = {empty, any, one, two, pair};
+
+ for (size_t i = 0; i < ARRAY_SIZE(maps); i++) {
+ /* Maps equal themself. */
+ TEST_ASSERT_VAL("equal", perf_cpu_map__equal(maps[i], maps[i]));
+ for (size_t j = 0; j < ARRAY_SIZE(maps); j++) {
+ /* Maps dont't equal each other. */
+ if (i == j)
+ continue;
+ TEST_ASSERT_VAL("not equal", !perf_cpu_map__equal(maps[i], maps[j]));
+ }
+ }
+
+ /* Maps equal made maps. */
+ tmp = perf_cpu_map__merge(perf_cpu_map__get(one), two);
+ TEST_ASSERT_VAL("pair", perf_cpu_map__equal(pair, tmp));
+ perf_cpu_map__put(tmp);
+
+ tmp = perf_cpu_map__intersect(pair, one);
+ TEST_ASSERT_VAL("one", perf_cpu_map__equal(one, tmp));
+ perf_cpu_map__put(tmp);
+
+ for (size_t i = 0; i < ARRAY_SIZE(maps); i++)
+ perf_cpu_map__put(maps[i]);
+
+ return TEST_OK;
+}
+
+static struct test_case tests__cpu_map[] = {
+ TEST_CASE("Synthesize cpu map", cpu_map_synthesize),
+ TEST_CASE("Print cpu map", cpu_map_print),
+ TEST_CASE("Merge cpu map", cpu_map_merge),
+ TEST_CASE("Intersect cpu map", cpu_map_intersect),
+ TEST_CASE("Equal cpu map", cpu_map_equal),
+ { .name = NULL, }
+};
+
+struct test_suite suite__cpu_map = {
+ .desc = "CPU map",
+ .test_cases = tests__cpu_map,
+};
diff --git a/tools/perf/tests/dlfilter-test.c b/tools/perf/tests/dlfilter-test.c
index 086fd2179e41..da3a9b50b1b1 100644
--- a/tools/perf/tests/dlfilter-test.c
+++ b/tools/perf/tests/dlfilter-test.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Test dlfilter C API. A perf.data file is synthesized and then processed
- * by perf script with a dlfilter named dlfilter-test-api-v0.so. Also a C file
+ * by perf script with dlfilters named dlfilter-test-api-v*.so. Also a C file
* is compiled to provide a dso to match the synthesized perf.data file.
*/
@@ -37,6 +37,8 @@
#define MAP_START 0x400000
+#define DLFILTER_TEST_NAME_MAX 128
+
struct test_data {
struct perf_tool tool;
struct machine *machine;
@@ -45,6 +47,8 @@ struct test_data {
u64 bar;
u64 ip;
u64 addr;
+ char name[DLFILTER_TEST_NAME_MAX];
+ char desc[DLFILTER_TEST_NAME_MAX];
char perf[PATH_MAX];
char perf_data_file_name[PATH_MAX];
char c_file_name[PATH_MAX];
@@ -215,7 +219,7 @@ static int write_prog(char *file_name)
return err ? -1 : 0;
}
-static int get_dlfilters_path(char *buf, size_t sz)
+static int get_dlfilters_path(const char *name, char *buf, size_t sz)
{
char perf[PATH_MAX];
char path[PATH_MAX];
@@ -224,12 +228,12 @@ static int get_dlfilters_path(char *buf, size_t sz)
perf_exe(perf, sizeof(perf));
perf_path = dirname(perf);
- snprintf(path, sizeof(path), "%s/dlfilters/dlfilter-test-api-v0.so", perf_path);
+ snprintf(path, sizeof(path), "%s/dlfilters/%s", perf_path, name);
if (access(path, R_OK)) {
exec_path = get_argv_exec_path();
if (!exec_path)
return -1;
- snprintf(path, sizeof(path), "%s/dlfilters/dlfilter-test-api-v0.so", exec_path);
+ snprintf(path, sizeof(path), "%s/dlfilters/%s", exec_path, name);
free(exec_path);
if (access(path, R_OK))
return -1;
@@ -244,9 +248,9 @@ static int check_filter_desc(struct test_data *td)
char *desc = NULL;
int ret;
- if (get_filter_desc(td->dlfilters, "dlfilter-test-api-v0.so", &desc, &long_desc) &&
+ if (get_filter_desc(td->dlfilters, td->name, &desc, &long_desc) &&
long_desc && !strcmp(long_desc, "Filter used by the 'dlfilter C API' perf test") &&
- desc && !strcmp(desc, "dlfilter to test v0 C API"))
+ desc && !strcmp(desc, td->desc))
ret = 0;
else
ret = -1;
@@ -284,7 +288,7 @@ static int get_ip_addr(struct test_data *td)
static int do_run_perf_script(struct test_data *td, int do_early)
{
return system_cmd("%s script -i %s "
- "--dlfilter %s/dlfilter-test-api-v0.so "
+ "--dlfilter %s/%s "
"--dlarg first "
"--dlarg %d "
"--dlarg %" PRIu64 " "
@@ -292,7 +296,7 @@ static int do_run_perf_script(struct test_data *td, int do_early)
"--dlarg %d "
"--dlarg last",
td->perf, td->perf_data_file_name, td->dlfilters,
- verbose, td->ip, td->addr, do_early);
+ td->name, verbose, td->ip, td->addr, do_early);
}
static int run_perf_script(struct test_data *td)
@@ -321,7 +325,7 @@ static int test__dlfilter_test(struct test_data *td)
u64 id = 99;
int err;
- if (get_dlfilters_path(td->dlfilters, PATH_MAX))
+ if (get_dlfilters_path(td->name, td->dlfilters, PATH_MAX))
return test_result("dlfilters not found", TEST_SKIP);
if (check_filter_desc(td))
@@ -399,14 +403,18 @@ static void test_data__free(struct test_data *td)
}
}
-static int test__dlfilter(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
+static int test__dlfilter_ver(int ver)
{
struct test_data td = {.fd = -1};
int pid = getpid();
int err;
+ pr_debug("\n-- Testing version %d API --\n", ver);
+
perf_exe(td.perf, sizeof(td.perf));
+ snprintf(td.name, sizeof(td.name), "dlfilter-test-api-v%d.so", ver);
+ snprintf(td.desc, sizeof(td.desc), "dlfilter to test v%d C API", ver);
snprintf(td.perf_data_file_name, PATH_MAX, "/tmp/dlfilter-test-%u-perf-data", pid);
snprintf(td.c_file_name, PATH_MAX, "/tmp/dlfilter-test-%u-prog.c", pid);
snprintf(td.prog_file_name, PATH_MAX, "/tmp/dlfilter-test-%u-prog", pid);
@@ -416,4 +424,14 @@ static int test__dlfilter(struct test_suite *test __maybe_unused, int subtest __
return err;
}
+static int test__dlfilter(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
+{
+ int err = test__dlfilter_ver(0);
+
+ if (err)
+ return err;
+ /* No test for version 1 */
+ return test__dlfilter_ver(2);
+}
+
DEFINE_SUITE("dlfilter C API", dlfilter);
diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c
index ee983b677a6a..d01aa931fe81 100644
--- a/tools/perf/tests/dwarf-unwind.c
+++ b/tools/perf/tests/dwarf-unwind.c
@@ -235,7 +235,6 @@ noinline int test__dwarf_unwind(struct test_suite *test __maybe_unused,
thread__put(thread);
out:
- machine__delete_threads(machine);
machine__delete(machine);
return err;
}
diff --git a/tools/perf/tests/event_groups.c b/tools/perf/tests/event_groups.c
index 029442b4e9c6..ccd9d8b2903f 100644
--- a/tools/perf/tests/event_groups.c
+++ b/tools/perf/tests/event_groups.c
@@ -50,13 +50,10 @@ static int event_open(int type, unsigned long config, int group_fd)
static int setup_uncore_event(void)
{
- struct perf_pmu *pmu;
+ struct perf_pmu *pmu = NULL;
int i, fd;
- if (list_empty(&pmus))
- perf_pmu__scan(NULL);
-
- perf_pmus__for_each_pmu(pmu) {
+ while ((pmu = perf_pmus__scan(pmu)) != NULL) {
for (i = 0; i < NR_UNCORE_PMUS; i++) {
if (!strcmp(uncore_pmus[i].name, pmu->name)) {
pr_debug("Using %s for uncore pmu event\n", pmu->name);
diff --git a/tools/perf/tests/evsel-roundtrip-name.c b/tools/perf/tests/evsel-roundtrip-name.c
index e94fed901992..15ff86f9da0b 100644
--- a/tools/perf/tests/evsel-roundtrip-name.c
+++ b/tools/perf/tests/evsel-roundtrip-name.c
@@ -4,114 +4,93 @@
#include "parse-events.h"
#include "tests.h"
#include "debug.h"
-#include "pmu.h"
-#include "pmu-hybrid.h"
-#include <errno.h>
#include <linux/kernel.h>
static int perf_evsel__roundtrip_cache_name_test(void)
{
- char name[128];
- int type, op, err = 0, ret = 0, i, idx;
- struct evsel *evsel;
- struct evlist *evlist = evlist__new();
+ int ret = TEST_OK;
- if (evlist == NULL)
- return -ENOMEM;
-
- for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) {
- for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) {
+ for (int type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) {
+ for (int op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) {
/* skip invalid cache type */
if (!evsel__is_cache_op_valid(type, op))
continue;
- for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) {
- __evsel__hw_cache_type_op_res_name(type, op, i, name, sizeof(name));
- err = parse_event(evlist, name);
- if (err)
- ret = err;
- }
- }
- }
-
- idx = 0;
- evsel = evlist__first(evlist);
+ for (int res = 0; res < PERF_COUNT_HW_CACHE_RESULT_MAX; res++) {
+ char name[128];
+ struct evlist *evlist = evlist__new();
+ struct evsel *evsel;
+ int err;
- for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) {
- for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) {
- /* skip invalid cache type */
- if (!evsel__is_cache_op_valid(type, op))
- continue;
+ if (evlist == NULL) {
+ pr_debug("Failed to alloc evlist");
+ return TEST_FAIL;
+ }
+ __evsel__hw_cache_type_op_res_name(type, op, res,
+ name, sizeof(name));
- for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) {
- __evsel__hw_cache_type_op_res_name(type, op, i, name, sizeof(name));
- if (evsel->core.idx != idx)
+ err = parse_event(evlist, name);
+ if (err) {
+ pr_debug("Failure to parse cache event '%s' possibly as PMUs don't support it",
+ name);
+ evlist__delete(evlist);
continue;
-
- ++idx;
-
- if (strcmp(evsel__name(evsel), name)) {
- pr_debug("%s != %s\n", evsel__name(evsel), name);
- ret = -1;
}
-
- evsel = evsel__next(evsel);
+ evlist__for_each_entry(evlist, evsel) {
+ if (strcmp(evsel__name(evsel), name)) {
+ pr_debug("%s != %s\n", evsel__name(evsel), name);
+ ret = TEST_FAIL;
+ }
+ }
+ evlist__delete(evlist);
}
}
}
-
- evlist__delete(evlist);
return ret;
}
-static int __perf_evsel__name_array_test(const char *const names[], int nr_names,
- int distance)
+static int perf_evsel__name_array_test(const char *const names[], int nr_names)
{
- int i, err;
- struct evsel *evsel;
- struct evlist *evlist = evlist__new();
+ int ret = TEST_OK;
- if (evlist == NULL)
- return -ENOMEM;
+ for (int i = 0; i < nr_names; ++i) {
+ struct evlist *evlist = evlist__new();
+ struct evsel *evsel;
+ int err;
- for (i = 0; i < nr_names; ++i) {
+ if (evlist == NULL) {
+ pr_debug("Failed to alloc evlist");
+ return TEST_FAIL;
+ }
err = parse_event(evlist, names[i]);
if (err) {
pr_debug("failed to parse event '%s', err %d\n",
names[i], err);
- goto out_delete_evlist;
+ evlist__delete(evlist);
+ ret = TEST_FAIL;
+ continue;
}
- }
-
- err = 0;
- evlist__for_each_entry(evlist, evsel) {
- if (strcmp(evsel__name(evsel), names[evsel->core.idx / distance])) {
- --err;
- pr_debug("%s != %s\n", evsel__name(evsel), names[evsel->core.idx / distance]);
+ evlist__for_each_entry(evlist, evsel) {
+ if (strcmp(evsel__name(evsel), names[i])) {
+ pr_debug("%s != %s\n", evsel__name(evsel), names[i]);
+ ret = TEST_FAIL;
+ }
}
+ evlist__delete(evlist);
}
-
-out_delete_evlist:
- evlist__delete(evlist);
- return err;
+ return ret;
}
-#define perf_evsel__name_array_test(names, distance) \
- __perf_evsel__name_array_test(names, ARRAY_SIZE(names), distance)
-
static int test__perf_evsel__roundtrip_name_test(struct test_suite *test __maybe_unused,
int subtest __maybe_unused)
{
- int err = 0, ret = 0;
-
- if (perf_pmu__has_hybrid() && perf_pmu__hybrid_mounted("cpu_atom"))
- return perf_evsel__name_array_test(evsel__hw_names, 2);
+ int err = 0, ret = TEST_OK;
- err = perf_evsel__name_array_test(evsel__hw_names, 1);
+ err = perf_evsel__name_array_test(evsel__hw_names, PERF_COUNT_HW_MAX);
if (err)
ret = err;
- err = __perf_evsel__name_array_test(evsel__sw_names, PERF_COUNT_SW_DUMMY + 1, 1);
+ err = perf_evsel__name_array_test(evsel__sw_names, PERF_COUNT_SW_DUMMY + 1);
if (err)
ret = err;
diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c
index 733ead151c63..c1c3fcbc2753 100644
--- a/tools/perf/tests/expr.c
+++ b/tools/perf/tests/expr.c
@@ -185,6 +185,46 @@ static int test__expr(struct test_suite *t __maybe_unused, int subtest __maybe_u
NULL, ctx) == 0);
TEST_ASSERT_VAL("find ids", hashmap__size(ctx->ids) == 0);
+ /* The expression is a constant 0.0 without needing to evaluate EVENT1. */
+ expr__ctx_clear(ctx);
+ TEST_ASSERT_VAL("find ids",
+ expr__find_ids("0 & EVENT1 > 0", NULL, ctx) == 0);
+ TEST_ASSERT_VAL("find ids", hashmap__size(ctx->ids) == 0);
+ expr__ctx_clear(ctx);
+ TEST_ASSERT_VAL("find ids",
+ expr__find_ids("EVENT1 > 0 & 0", NULL, ctx) == 0);
+ TEST_ASSERT_VAL("find ids", hashmap__size(ctx->ids) == 0);
+ expr__ctx_clear(ctx);
+ TEST_ASSERT_VAL("find ids",
+ expr__find_ids("1 & EVENT1 > 0", NULL, ctx) == 0);
+ TEST_ASSERT_VAL("find ids", hashmap__size(ctx->ids) == 1);
+ TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "EVENT1", &val_ptr));
+ expr__ctx_clear(ctx);
+ TEST_ASSERT_VAL("find ids",
+ expr__find_ids("EVENT1 > 0 & 1", NULL, ctx) == 0);
+ TEST_ASSERT_VAL("find ids", hashmap__size(ctx->ids) == 1);
+ TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "EVENT1", &val_ptr));
+
+ /* The expression is a constant 1.0 without needing to evaluate EVENT1. */
+ expr__ctx_clear(ctx);
+ TEST_ASSERT_VAL("find ids",
+ expr__find_ids("1 | EVENT1 > 0", NULL, ctx) == 0);
+ TEST_ASSERT_VAL("find ids", hashmap__size(ctx->ids) == 0);
+ expr__ctx_clear(ctx);
+ TEST_ASSERT_VAL("find ids",
+ expr__find_ids("EVENT1 > 0 | 1", NULL, ctx) == 0);
+ TEST_ASSERT_VAL("find ids", hashmap__size(ctx->ids) == 0);
+ expr__ctx_clear(ctx);
+ TEST_ASSERT_VAL("find ids",
+ expr__find_ids("0 | EVENT1 > 0", NULL, ctx) == 0);
+ TEST_ASSERT_VAL("find ids", hashmap__size(ctx->ids) == 1);
+ TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "EVENT1", &val_ptr));
+ expr__ctx_clear(ctx);
+ TEST_ASSERT_VAL("find ids",
+ expr__find_ids("EVENT1 > 0 | 0", NULL, ctx) == 0);
+ TEST_ASSERT_VAL("find ids", hashmap__size(ctx->ids) == 1);
+ TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "EVENT1", &val_ptr));
+
/* Test toplogy constants appear well ordered. */
expr__ctx_clear(ctx);
TEST_ASSERT_VAL("#num_cpus", expr__parse(&num_cpus, ctx, "#num_cpus") == 0);
@@ -214,6 +254,10 @@ static int test__expr(struct test_suite *t __maybe_unused, int subtest __maybe_u
TEST_ASSERT_VAL("source count", hashmap__size(ctx->ids) == 1);
TEST_ASSERT_VAL("source count", hashmap__find(ctx->ids, "EVENT1", &val_ptr));
+ /* has_event returns 1 when an event exists. */
+ expr__add_id_val(ctx, strdup("cycles"), 2);
+ ret = test(ctx, "has_event(cycles)", 1);
+
expr__ctx_free(ctx);
return 0;
diff --git a/tools/perf/tests/hists_common.c b/tools/perf/tests/hists_common.c
index 745ab18d17db..d08add0f4da6 100644
--- a/tools/perf/tests/hists_common.c
+++ b/tools/perf/tests/hists_common.c
@@ -211,7 +211,7 @@ void print_hists_out(struct hists *hists)
struct dso *dso = map__dso(he->ms.map);
pr_info("%2d: entry: %8s:%5d [%-8s] %20s: period = %"PRIu64"/%"PRIu64"\n",
- i, thread__comm_str(he->thread), he->thread->tid,
+ i, thread__comm_str(he->thread), thread__tid(he->thread),
dso->short_name,
he->ms.sym->name, he->stat.period,
he->stat_acc ? he->stat_acc->period : 0);
diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c
index 8c0e3f334747..71dacb0fec4d 100644
--- a/tools/perf/tests/hists_cumulate.c
+++ b/tools/perf/tests/hists_cumulate.c
@@ -8,8 +8,8 @@
#include "util/evsel.h"
#include "util/evlist.h"
#include "util/machine.h"
-#include "util/thread.h"
#include "util/parse-events.h"
+#include "util/thread.h"
#include "tests/tests.h"
#include "tests/hists_common.h"
#include <linux/kernel.h>
@@ -84,6 +84,7 @@ static int add_hist_entries(struct hists *hists, struct machine *machine)
struct perf_sample sample = { .period = 1000, };
size_t i;
+ addr_location__init(&al);
for (i = 0; i < ARRAY_SIZE(fake_samples); i++) {
struct hist_entry_iter iter = {
.evsel = evsel,
@@ -107,20 +108,22 @@ static int add_hist_entries(struct hists *hists, struct machine *machine)
if (hist_entry_iter__add(&iter, &al, sysctl_perf_event_max_stack,
NULL) < 0) {
- addr_location__put(&al);
goto out;
}
- fake_samples[i].thread = al.thread;
+ thread__put(fake_samples[i].thread);
+ fake_samples[i].thread = thread__get(al.thread);
map__put(fake_samples[i].map);
- fake_samples[i].map = al.map;
+ fake_samples[i].map = map__get(al.map);
fake_samples[i].sym = al.sym;
}
+ addr_location__exit(&al);
return TEST_OK;
out:
pr_debug("Not enough memory for adding a hist entry\n");
+ addr_location__exit(&al);
return TEST_FAIL;
}
@@ -152,8 +155,10 @@ static void put_fake_samples(void)
{
size_t i;
- for (i = 0; i < ARRAY_SIZE(fake_samples); i++)
- map__put(fake_samples[i].map);
+ for (i = 0; i < ARRAY_SIZE(fake_samples); i++) {
+ map__zput(fake_samples[i].map);
+ thread__zput(fake_samples[i].thread);
+ }
}
typedef int (*test_fn_t)(struct evsel *, struct machine *);
@@ -162,7 +167,6 @@ typedef int (*test_fn_t)(struct evsel *, struct machine *);
#define DSO(he) (map__dso(he->ms.map)->short_name)
#define SYM(he) (he->ms.sym->name)
#define CPU(he) (he->cpu)
-#define PID(he) (he->thread->tid)
#define DEPTH(he) (he->callchain->max_depth)
#define CDSO(cl) (map__dso(cl->ms.map)->short_name)
#define CSYM(cl) (cl->ms.sym->name)
diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c
index 98eff5935a1c..4b2e4f2fbe48 100644
--- a/tools/perf/tests/hists_filter.c
+++ b/tools/perf/tests/hists_filter.c
@@ -8,6 +8,7 @@
#include "util/evlist.h"
#include "util/machine.h"
#include "util/parse-events.h"
+#include "util/thread.h"
#include "tests/tests.h"
#include "tests/hists_common.h"
#include <linux/kernel.h>
@@ -53,6 +54,7 @@ static int add_hist_entries(struct evlist *evlist,
struct perf_sample sample = { .period = 100, };
size_t i;
+ addr_location__init(&al);
/*
* each evsel will have 10 samples but the 4th sample
* (perf [perf] main) will be collapsed to an existing entry
@@ -84,21 +86,22 @@ static int add_hist_entries(struct evlist *evlist,
al.socket = fake_samples[i].socket;
if (hist_entry_iter__add(&iter, &al,
sysctl_perf_event_max_stack, NULL) < 0) {
- addr_location__put(&al);
goto out;
}
- fake_samples[i].thread = al.thread;
+ thread__put(fake_samples[i].thread);
+ fake_samples[i].thread = thread__get(al.thread);
map__put(fake_samples[i].map);
- fake_samples[i].map = al.map;
+ fake_samples[i].map = map__get(al.map);
fake_samples[i].sym = al.sym;
}
}
-
+ addr_location__exit(&al);
return 0;
out:
pr_debug("Not enough memory for adding a hist entry\n");
+ addr_location__exit(&al);
return TEST_FAIL;
}
diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c
index 141e2972e34f..2d19657ab5e0 100644
--- a/tools/perf/tests/hists_link.c
+++ b/tools/perf/tests/hists_link.c
@@ -8,6 +8,7 @@
#include "machine.h"
#include "map.h"
#include "parse-events.h"
+#include "thread.h"
#include "hists_common.h"
#include "util/mmap.h"
#include <errno.h>
@@ -70,6 +71,7 @@ static int add_hist_entries(struct evlist *evlist, struct machine *machine)
struct perf_sample sample = { .period = 1, .weight = 1, };
size_t i = 0, k;
+ addr_location__init(&al);
/*
* each evsel will have 10 samples - 5 common and 5 distinct.
* However the second evsel also has a collapsed entry for
@@ -90,13 +92,13 @@ static int add_hist_entries(struct evlist *evlist, struct machine *machine)
he = hists__add_entry(hists, &al, NULL,
NULL, NULL, NULL, &sample, true);
if (he == NULL) {
- addr_location__put(&al);
goto out;
}
- fake_common_samples[k].thread = al.thread;
+ thread__put(fake_common_samples[k].thread);
+ fake_common_samples[k].thread = thread__get(al.thread);
map__put(fake_common_samples[k].map);
- fake_common_samples[k].map = al.map;
+ fake_common_samples[k].map = map__get(al.map);
fake_common_samples[k].sym = al.sym;
}
@@ -110,20 +112,22 @@ static int add_hist_entries(struct evlist *evlist, struct machine *machine)
he = hists__add_entry(hists, &al, NULL,
NULL, NULL, NULL, &sample, true);
if (he == NULL) {
- addr_location__put(&al);
goto out;
}
- fake_samples[i][k].thread = al.thread;
- fake_samples[i][k].map = al.map;
+ thread__put(fake_samples[i][k].thread);
+ fake_samples[i][k].thread = thread__get(al.thread);
+ map__put(fake_samples[i][k].map);
+ fake_samples[i][k].map = map__get(al.map);
fake_samples[i][k].sym = al.sym;
}
i++;
}
+ addr_location__exit(&al);
return 0;
-
out:
+ addr_location__exit(&al);
pr_debug("Not enough memory for adding a hist entry\n");
return -1;
}
@@ -144,7 +148,7 @@ static int find_sample(struct sample *samples, size_t nr_samples,
struct thread *t, struct map *m, struct symbol *s)
{
while (nr_samples--) {
- if (samples->thread == t &&
+ if (RC_CHK_ACCESS(samples->thread) == RC_CHK_ACCESS(t) &&
RC_CHK_ACCESS(samples->map) == RC_CHK_ACCESS(m) &&
samples->sym == s)
return 1;
diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c
index cebd5226bb12..ba1cccf57049 100644
--- a/tools/perf/tests/hists_output.c
+++ b/tools/perf/tests/hists_output.c
@@ -54,6 +54,7 @@ static int add_hist_entries(struct hists *hists, struct machine *machine)
struct perf_sample sample = { .period = 100, };
size_t i;
+ addr_location__init(&al);
for (i = 0; i < ARRAY_SIZE(fake_samples); i++) {
struct hist_entry_iter iter = {
.evsel = evsel,
@@ -73,20 +74,21 @@ static int add_hist_entries(struct hists *hists, struct machine *machine)
if (hist_entry_iter__add(&iter, &al, sysctl_perf_event_max_stack,
NULL) < 0) {
- addr_location__put(&al);
goto out;
}
fake_samples[i].thread = al.thread;
map__put(fake_samples[i].map);
- fake_samples[i].map = al.map;
+ fake_samples[i].map = map__get(al.map);
fake_samples[i].sym = al.sym;
}
+ addr_location__exit(&al);
return TEST_OK;
out:
pr_debug("Not enough memory for adding a hist entry\n");
+ addr_location__exit(&al);
return TEST_FAIL;
}
@@ -118,8 +120,10 @@ static void put_fake_samples(void)
{
size_t i;
- for (i = 0; i < ARRAY_SIZE(fake_samples); i++)
+ for (i = 0; i < ARRAY_SIZE(fake_samples); i++) {
map__put(fake_samples[i].map);
+ fake_samples[i].map = NULL;
+ }
}
typedef int (*test_fn_t)(struct evsel *, struct machine *);
@@ -128,7 +132,7 @@ typedef int (*test_fn_t)(struct evsel *, struct machine *);
#define DSO(he) (map__dso(he->ms.map)->short_name)
#define SYM(he) (he->ms.sym->name)
#define CPU(he) (he->cpu)
-#define PID(he) (he->thread->tid)
+#define PID(he) (thread__tid(he->thread))
/* default sort keys (no field) */
static int test1(struct evsel *evsel, struct machine *machine)
diff --git a/tools/perf/tests/make b/tools/perf/tests/make
index 8dd3f8090352..58cf96d762d0 100644
--- a/tools/perf/tests/make
+++ b/tools/perf/tests/make
@@ -69,6 +69,9 @@ make_clean_all := clean all
make_python_perf_so := $(python_perf_so)
make_debug := DEBUG=1
make_nondistro := BUILD_NONDISTRO=1
+make_extra_tests := EXTRA_TESTS=1
+make_bpf_skel := BUILD_BPF_SKEL=1
+make_gen_vmlinux_h := BUILD_BPF_SKEL=1 GEN_VMLINUX_H=1
make_no_libperl := NO_LIBPERL=1
make_no_libpython := NO_LIBPYTHON=1
make_no_scripts := NO_LIBPYTHON=1 NO_LIBPERL=1
@@ -136,6 +139,8 @@ endif
run += make_python_perf_so
run += make_debug
run += make_nondistro
+run += make_build_bpf_skel
+run += make_gen_vmlinux_h
run += make_no_libperl
run += make_no_libpython
run += make_no_scripts
diff --git a/tools/perf/tests/maps.c b/tools/perf/tests/maps.c
index 8c0eb5cf8bb5..5bb1123a91a7 100644
--- a/tools/perf/tests/maps.c
+++ b/tools/perf/tests/maps.c
@@ -140,7 +140,7 @@ static int test__maps__merge_in(struct test_suite *t __maybe_unused, int subtest
ret = check_maps(merged3, ARRAY_SIZE(merged3), maps);
TEST_ASSERT_VAL("merge check failed", !ret);
- maps__delete(maps);
+ maps__zput(maps);
return TEST_OK;
}
diff --git a/tools/perf/tests/mmap-thread-lookup.c b/tools/perf/tests/mmap-thread-lookup.c
index 898eda55b7a8..ddd1da9a4ba9 100644
--- a/tools/perf/tests/mmap-thread-lookup.c
+++ b/tools/perf/tests/mmap-thread-lookup.c
@@ -187,6 +187,7 @@ static int mmap_events(synth_cb synth)
struct addr_location al;
struct thread *thread;
+ addr_location__init(&al);
thread = machine__findnew_thread(machine, getpid(), td->tid);
pr_debug("looking for map %p\n", td->map);
@@ -199,14 +200,14 @@ static int mmap_events(synth_cb synth)
if (!al.map) {
pr_debug("failed, couldn't find map\n");
err = -1;
+ addr_location__exit(&al);
break;
}
pr_debug("map %p, addr %" PRIx64 "\n", al.map, map__start(al.map));
- map__put(al.map);
+ addr_location__exit(&al);
}
- machine__delete_threads(machine);
machine__delete(machine);
return err;
}
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index 8068cfd89b84..658fb9599d95 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -6,7 +6,7 @@
#include "tests.h"
#include "debug.h"
#include "pmu.h"
-#include "pmu-hybrid.h"
+#include "pmus.h"
#include <dirent.h>
#include <errno.h>
#include "fncache.h"
@@ -20,6 +20,40 @@
#define PERF_TP_SAMPLE_TYPE (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | \
PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD)
+static int num_core_entries(void)
+{
+ /*
+ * If the kernel supports extended type, expect events to be
+ * opened once for each core PMU type. Otherwise fall back to the legacy
+ * behavior of opening only one event even though there are multiple
+ * PMUs
+ */
+ if (perf_pmus__supports_extended_type())
+ return perf_pmus__num_core_pmus();
+
+ return 1;
+}
+
+static bool test_config(const struct evsel *evsel, __u64 expected_config)
+{
+ __u32 type = evsel->core.attr.type;
+ __u64 config = evsel->core.attr.config;
+
+ if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE) {
+ /*
+ * HARDWARE and HW_CACHE events encode the PMU's extended type
+ * in the top 32-bits. Mask in order to ignore.
+ */
+ config &= PERF_HW_EVENT_MASK;
+ }
+ return config == expected_config;
+}
+
+static bool test_perf_config(const struct perf_evsel *evsel, __u64 expected_config)
+{
+ return (evsel->attr.config & PERF_HW_EVENT_MASK) == expected_config;
+}
+
#ifdef HAVE_LIBTRACEEVENT
#if defined(__s390x__)
@@ -82,11 +116,39 @@ static int test__checkevent_tracepoint_multi(struct evlist *evlist)
static int test__checkevent_raw(struct evlist *evlist)
{
- struct evsel *evsel = evlist__first(evlist);
+ struct perf_evsel *evsel;
+ bool raw_type_match = false;
- TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config", 0x1a == evsel->core.attr.config);
+ TEST_ASSERT_VAL("wrong number of entries", 0 != evlist->core.nr_entries);
+
+ perf_evlist__for_each_evsel(&evlist->core, evsel) {
+ struct perf_pmu *pmu __maybe_unused = NULL;
+ bool type_matched = false;
+
+ TEST_ASSERT_VAL("wrong config", test_perf_config(evsel, 0x1a));
+ TEST_ASSERT_VAL("event not parsed as raw type",
+ evsel->attr.type == PERF_TYPE_RAW);
+#if defined(__aarch64__)
+ /*
+ * Arm doesn't have a real raw type PMU in sysfs, so raw events
+ * would never match any PMU. However, RAW events on Arm will
+ * always successfully open on the first available core PMU
+ * so no need to test for a matching type here.
+ */
+ type_matched = raw_type_match = true;
+#else
+ while ((pmu = perf_pmus__scan(pmu)) != NULL) {
+ if (pmu->type == evsel->attr.type) {
+ TEST_ASSERT_VAL("PMU type expected once", !type_matched);
+ type_matched = true;
+ if (pmu->type == PERF_TYPE_RAW)
+ raw_type_match = true;
+ }
+ }
+#endif
+ TEST_ASSERT_VAL("No PMU found for type", type_matched);
+ }
+ TEST_ASSERT_VAL("Raw PMU not matched", raw_type_match);
return TEST_OK;
}
@@ -96,39 +158,41 @@ static int test__checkevent_numeric(struct evlist *evlist)
TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
TEST_ASSERT_VAL("wrong type", 1 == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config", 1 == evsel->core.attr.config);
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, 1));
return TEST_OK;
}
static int test__checkevent_symbolic_name(struct evlist *evlist)
{
- struct evsel *evsel = evlist__first(evlist);
+ struct perf_evsel *evsel;
- TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config",
- PERF_COUNT_HW_INSTRUCTIONS == evsel->core.attr.config);
+ TEST_ASSERT_VAL("wrong number of entries", 0 != evlist->core.nr_entries);
+
+ perf_evlist__for_each_evsel(&evlist->core, evsel) {
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config",
+ test_perf_config(evsel, PERF_COUNT_HW_INSTRUCTIONS));
+ }
return TEST_OK;
}
static int test__checkevent_symbolic_name_config(struct evlist *evlist)
{
- struct evsel *evsel = evlist__first(evlist);
+ struct perf_evsel *evsel;
- TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config",
- PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config);
- /*
- * The period value gets configured within evlist__config,
- * while this test executes only parse events method.
- */
- TEST_ASSERT_VAL("wrong period",
- 0 == evsel->core.attr.sample_period);
- TEST_ASSERT_VAL("wrong config1",
- 0 == evsel->core.attr.config1);
- TEST_ASSERT_VAL("wrong config2",
- 1 == evsel->core.attr.config2);
+ TEST_ASSERT_VAL("wrong number of entries", 0 != evlist->core.nr_entries);
+
+ perf_evlist__for_each_evsel(&evlist->core, evsel) {
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config", test_perf_config(evsel, PERF_COUNT_HW_CPU_CYCLES));
+ /*
+ * The period value gets configured within evlist__config,
+ * while this test executes only parse events method.
+ */
+ TEST_ASSERT_VAL("wrong period", 0 == evsel->attr.sample_period);
+ TEST_ASSERT_VAL("wrong config1", 0 == evsel->attr.config1);
+ TEST_ASSERT_VAL("wrong config2", 1 == evsel->attr.config2);
+ }
return TEST_OK;
}
@@ -138,18 +202,20 @@ static int test__checkevent_symbolic_alias(struct evlist *evlist)
TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config",
- PERF_COUNT_SW_PAGE_FAULTS == evsel->core.attr.config);
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_SW_PAGE_FAULTS));
return TEST_OK;
}
static int test__checkevent_genhw(struct evlist *evlist)
{
- struct evsel *evsel = evlist__first(evlist);
+ struct perf_evsel *evsel;
- TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_HW_CACHE == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config", (1 << 16) == evsel->core.attr.config);
+ TEST_ASSERT_VAL("wrong number of entries", 0 != evlist->core.nr_entries);
+
+ perf_evlist__for_each_entry(&evlist->core, evsel) {
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HW_CACHE == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config", test_perf_config(evsel, 1 << 16));
+ }
return TEST_OK;
}
@@ -159,7 +225,7 @@ static int test__checkevent_breakpoint(struct evlist *evlist)
TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config", 0 == evsel->core.attr.config);
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, 0));
TEST_ASSERT_VAL("wrong bp_type", (HW_BREAKPOINT_R | HW_BREAKPOINT_W) ==
evsel->core.attr.bp_type);
TEST_ASSERT_VAL("wrong bp_len", HW_BREAKPOINT_LEN_4 ==
@@ -173,7 +239,7 @@ static int test__checkevent_breakpoint_x(struct evlist *evlist)
TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config", 0 == evsel->core.attr.config);
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, 0));
TEST_ASSERT_VAL("wrong bp_type",
HW_BREAKPOINT_X == evsel->core.attr.bp_type);
TEST_ASSERT_VAL("wrong bp_len", sizeof(long) == evsel->core.attr.bp_len);
@@ -187,7 +253,7 @@ static int test__checkevent_breakpoint_r(struct evlist *evlist)
TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
TEST_ASSERT_VAL("wrong type",
PERF_TYPE_BREAKPOINT == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config", 0 == evsel->core.attr.config);
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, 0));
TEST_ASSERT_VAL("wrong bp_type",
HW_BREAKPOINT_R == evsel->core.attr.bp_type);
TEST_ASSERT_VAL("wrong bp_len",
@@ -202,7 +268,7 @@ static int test__checkevent_breakpoint_w(struct evlist *evlist)
TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
TEST_ASSERT_VAL("wrong type",
PERF_TYPE_BREAKPOINT == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config", 0 == evsel->core.attr.config);
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, 0));
TEST_ASSERT_VAL("wrong bp_type",
HW_BREAKPOINT_W == evsel->core.attr.bp_type);
TEST_ASSERT_VAL("wrong bp_len",
@@ -217,7 +283,7 @@ static int test__checkevent_breakpoint_rw(struct evlist *evlist)
TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
TEST_ASSERT_VAL("wrong type",
PERF_TYPE_BREAKPOINT == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config", 0 == evsel->core.attr.config);
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, 0));
TEST_ASSERT_VAL("wrong bp_type",
(HW_BREAKPOINT_R|HW_BREAKPOINT_W) == evsel->core.attr.bp_type);
TEST_ASSERT_VAL("wrong bp_len",
@@ -241,17 +307,15 @@ static int test__checkevent_tracepoint_modifier(struct evlist *evlist)
static int
test__checkevent_tracepoint_multi_modifier(struct evlist *evlist)
{
- struct evsel *evsel;
+ struct perf_evsel *evsel;
TEST_ASSERT_VAL("wrong number of entries", evlist->core.nr_entries > 1);
- evlist__for_each_entry(evlist, evsel) {
- TEST_ASSERT_VAL("wrong exclude_user",
- !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel",
- evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
+ perf_evlist__for_each_entry(&evlist->core, evsel) {
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
}
return test__checkevent_tracepoint_multi(evlist);
@@ -260,57 +324,65 @@ test__checkevent_tracepoint_multi_modifier(struct evlist *evlist)
static int test__checkevent_raw_modifier(struct evlist *evlist)
{
- struct evsel *evsel = evlist__first(evlist);
-
- TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip);
+ struct perf_evsel *evsel;
+ perf_evlist__for_each_entry(&evlist->core, evsel) {
+ TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip);
+ }
return test__checkevent_raw(evlist);
}
static int test__checkevent_numeric_modifier(struct evlist *evlist)
{
- struct evsel *evsel = evlist__first(evlist);
-
- TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip);
+ struct perf_evsel *evsel;
+ perf_evlist__for_each_entry(&evlist->core, evsel) {
+ TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip);
+ }
return test__checkevent_numeric(evlist);
}
static int test__checkevent_symbolic_name_modifier(struct evlist *evlist)
{
- struct evsel *evsel = evlist__first(evlist);
+ struct perf_evsel *evsel;
- TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
+ TEST_ASSERT_VAL("wrong number of entries",
+ evlist->core.nr_entries == num_core_entries());
+ perf_evlist__for_each_entry(&evlist->core, evsel) {
+ TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+ }
return test__checkevent_symbolic_name(evlist);
}
static int test__checkevent_exclude_host_modifier(struct evlist *evlist)
{
- struct evsel *evsel = evlist__first(evlist);
-
- TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host);
+ struct perf_evsel *evsel;
+ perf_evlist__for_each_entry(&evlist->core, evsel) {
+ TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host);
+ }
return test__checkevent_symbolic_name(evlist);
}
static int test__checkevent_exclude_guest_modifier(struct evlist *evlist)
{
- struct evsel *evsel = evlist__first(evlist);
-
- TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
+ struct perf_evsel *evsel;
+ perf_evlist__for_each_entry(&evlist->core, evsel) {
+ TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+ }
return test__checkevent_symbolic_name(evlist);
}
@@ -328,13 +400,14 @@ static int test__checkevent_symbolic_alias_modifier(struct evlist *evlist)
static int test__checkevent_genhw_modifier(struct evlist *evlist)
{
- struct evsel *evsel = evlist__first(evlist);
-
- TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip);
+ struct perf_evsel *evsel;
+ perf_evlist__for_each_entry(&evlist->core, evsel) {
+ TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip);
+ }
return test__checkevent_genhw(evlist);
}
@@ -439,6 +512,93 @@ static int test__checkevent_breakpoint_rw_modifier(struct evlist *evlist)
return test__checkevent_breakpoint_rw(evlist);
}
+static int test__checkevent_breakpoint_modifier_name(struct evlist *evlist)
+{
+ struct evsel *evsel = evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
+ TEST_ASSERT_VAL("wrong name",
+ !strcmp(evsel__name(evsel), "breakpoint"));
+
+ return test__checkevent_breakpoint(evlist);
+}
+
+static int test__checkevent_breakpoint_x_modifier_name(struct evlist *evlist)
+{
+ struct evsel *evsel = evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
+ TEST_ASSERT_VAL("wrong name",
+ !strcmp(evsel__name(evsel), "breakpoint"));
+
+ return test__checkevent_breakpoint_x(evlist);
+}
+
+static int test__checkevent_breakpoint_r_modifier_name(struct evlist *evlist)
+{
+ struct evsel *evsel = evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip);
+ TEST_ASSERT_VAL("wrong name",
+ !strcmp(evsel__name(evsel), "breakpoint"));
+
+ return test__checkevent_breakpoint_r(evlist);
+}
+
+static int test__checkevent_breakpoint_w_modifier_name(struct evlist *evlist)
+{
+ struct evsel *evsel = evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip);
+ TEST_ASSERT_VAL("wrong name",
+ !strcmp(evsel__name(evsel), "breakpoint"));
+
+ return test__checkevent_breakpoint_w(evlist);
+}
+
+static int test__checkevent_breakpoint_rw_modifier_name(struct evlist *evlist)
+{
+ struct evsel *evsel = evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip);
+ TEST_ASSERT_VAL("wrong name",
+ !strcmp(evsel__name(evsel), "breakpoint"));
+
+ return test__checkevent_breakpoint_rw(evlist);
+}
+
+static int test__checkevent_breakpoint_2_events(struct evlist *evlist)
+{
+ struct evsel *evsel = evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries);
+
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong name", !strcmp(evsel__name(evsel), "breakpoint1"));
+
+ evsel = evsel__next(evsel);
+
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong name", !strcmp(evsel__name(evsel), "breakpoint2"));
+
+ return TEST_OK;
+}
+
static int test__checkevent_pmu(struct evlist *evlist)
{
@@ -446,7 +606,7 @@ static int test__checkevent_pmu(struct evlist *evlist)
TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config", 10 == evsel->core.attr.config);
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, 10));
TEST_ASSERT_VAL("wrong config1", 1 == evsel->core.attr.config1);
TEST_ASSERT_VAL("wrong config2", 3 == evsel->core.attr.config2);
TEST_ASSERT_VAL("wrong config3", 0 == evsel->core.attr.config3);
@@ -464,21 +624,23 @@ static int test__checkevent_list(struct evlist *evlist)
{
struct evsel *evsel = evlist__first(evlist);
- TEST_ASSERT_VAL("wrong number of entries", 3 == evlist->core.nr_entries);
+ TEST_ASSERT_VAL("wrong number of entries", 3 <= evlist->core.nr_entries);
/* r1 */
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config", 1 == evsel->core.attr.config);
- TEST_ASSERT_VAL("wrong config1", 0 == evsel->core.attr.config1);
- TEST_ASSERT_VAL("wrong config2", 0 == evsel->core.attr.config2);
- TEST_ASSERT_VAL("wrong config3", 0 == evsel->core.attr.config3);
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_TRACEPOINT != evsel->core.attr.type);
+ while (evsel->core.attr.type != PERF_TYPE_TRACEPOINT) {
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, 1));
+ TEST_ASSERT_VAL("wrong config1", 0 == evsel->core.attr.config1);
+ TEST_ASSERT_VAL("wrong config2", 0 == evsel->core.attr.config2);
+ TEST_ASSERT_VAL("wrong config3", 0 == evsel->core.attr.config3);
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
+ evsel = evsel__next(evsel);
+ }
/* syscalls:sys_enter_openat:k */
- evsel = evsel__next(evsel);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_TRACEPOINT == evsel->core.attr.type);
TEST_ASSERT_VAL("wrong sample_type",
PERF_TP_SAMPLE_TYPE == evsel->core.attr.sample_type);
@@ -491,7 +653,7 @@ static int test__checkevent_list(struct evlist *evlist)
/* 1:1:hp */
evsel = evsel__next(evsel);
TEST_ASSERT_VAL("wrong type", 1 == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config", 1 == evsel->core.attr.config);
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, 1));
TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user);
TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
@@ -508,14 +670,14 @@ static int test__checkevent_pmu_name(struct evlist *evlist)
/* cpu/config=1,name=krava/u */
TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config", 1 == evsel->core.attr.config);
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, 1));
TEST_ASSERT_VAL("wrong name", !strcmp(evsel__name(evsel), "krava"));
/* cpu/config=2/u" */
evsel = evsel__next(evsel);
TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config", 2 == evsel->core.attr.config);
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, 2));
TEST_ASSERT_VAL("wrong name",
!strcmp(evsel__name(evsel), "cpu/config=2/u"));
@@ -529,7 +691,7 @@ static int test__checkevent_pmu_partial_time_callgraph(struct evlist *evlist)
/* cpu/config=1,call-graph=fp,time,period=100000/ */
TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config", 1 == evsel->core.attr.config);
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, 1));
/*
* The period, time and callgraph value gets configured within evlist__config,
* while this test executes only parse events method.
@@ -541,7 +703,7 @@ static int test__checkevent_pmu_partial_time_callgraph(struct evlist *evlist)
/* cpu/config=2,call-graph=no,time=0,period=2000/ */
evsel = evsel__next(evsel);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config", 2 == evsel->core.attr.config);
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, 2));
/*
* The period, time and callgraph value gets configured within evlist__config,
* while this test executes only parse events method.
@@ -558,7 +720,8 @@ static int test__checkevent_pmu_events(struct evlist *evlist)
struct evsel *evsel = evlist__first(evlist);
TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type ||
+ strcmp(evsel->pmu_name, "cpu"));
TEST_ASSERT_VAL("wrong exclude_user",
!evsel->core.attr.exclude_user);
TEST_ASSERT_VAL("wrong exclude_kernel",
@@ -574,23 +737,28 @@ static int test__checkevent_pmu_events(struct evlist *evlist)
static int test__checkevent_pmu_events_mix(struct evlist *evlist)
{
- struct evsel *evsel = evlist__first(evlist);
-
- /* pmu-event:u */
- TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries);
- TEST_ASSERT_VAL("wrong exclude_user",
- !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel",
- evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong pinned", !evsel->core.attr.pinned);
- TEST_ASSERT_VAL("wrong exclusive", !evsel->core.attr.exclusive);
+ struct evsel *evsel = NULL;
+ /*
+ * The wild card event will be opened at least once, but it may be
+ * opened on each core PMU.
+ */
+ TEST_ASSERT_VAL("wrong number of entries", evlist->core.nr_entries >= 2);
+ for (int i = 0; i < evlist->core.nr_entries - 1; i++) {
+ evsel = (i == 0 ? evlist__first(evlist) : evsel__next(evsel));
+ /* pmu-event:u */
+ TEST_ASSERT_VAL("wrong exclude_user",
+ !evsel->core.attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel",
+ evsel->core.attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
+ TEST_ASSERT_VAL("wrong pinned", !evsel->core.attr.pinned);
+ TEST_ASSERT_VAL("wrong exclusive", !evsel->core.attr.exclusive);
+ }
/* cpu/pmu-event/u*/
evsel = evsel__next(evsel);
- TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong type", evsel__find_pmu(evsel)->is_core);
TEST_ASSERT_VAL("wrong exclude_user",
!evsel->core.attr.exclude_user);
TEST_ASSERT_VAL("wrong exclude_kernel",
@@ -661,11 +829,11 @@ static int test__checkterms_simple(struct list_head *terms)
*/
term = list_entry(term->list.next, struct parse_events_term, list);
TEST_ASSERT_VAL("wrong type term",
- term->type_term == PARSE_EVENTS__TERM_TYPE_USER);
+ term->type_term == PARSE_EVENTS__TERM_TYPE_RAW);
TEST_ASSERT_VAL("wrong type val",
- term->type_val == PARSE_EVENTS__TERM_TYPE_NUM);
- TEST_ASSERT_VAL("wrong val", term->val.num == 1);
- TEST_ASSERT_VAL("wrong config", !strcmp(term->config, "read"));
+ term->type_val == PARSE_EVENTS__TERM_TYPE_STR);
+ TEST_ASSERT_VAL("wrong val", !strcmp(term->val.str, "read"));
+ TEST_ASSERT_VAL("wrong config", !strcmp(term->config, "raw"));
/*
* r0xead
@@ -675,11 +843,11 @@ static int test__checkterms_simple(struct list_head *terms)
*/
term = list_entry(term->list.next, struct parse_events_term, list);
TEST_ASSERT_VAL("wrong type term",
- term->type_term == PARSE_EVENTS__TERM_TYPE_CONFIG);
+ term->type_term == PARSE_EVENTS__TERM_TYPE_RAW);
TEST_ASSERT_VAL("wrong type val",
- term->type_val == PARSE_EVENTS__TERM_TYPE_NUM);
- TEST_ASSERT_VAL("wrong val", term->val.num == 0xead);
- TEST_ASSERT_VAL("wrong config", !strcmp(term->config, "config"));
+ term->type_val == PARSE_EVENTS__TERM_TYPE_STR);
+ TEST_ASSERT_VAL("wrong val", !strcmp(term->val.str, "r0xead"));
+ TEST_ASSERT_VAL("wrong config", !strcmp(term->config, "raw"));
return TEST_OK;
}
@@ -687,189 +855,207 @@ static int test__group1(struct evlist *evlist)
{
struct evsel *evsel, *leader;
- TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries);
- TEST_ASSERT_VAL("wrong number of groups", 1 == evlist__nr_groups(evlist));
-
- /* instructions:k */
- evsel = leader = evlist__first(evlist);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config",
- PERF_COUNT_HW_INSTRUCTIONS == evsel->core.attr.config);
- TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
- TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
- TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
- TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
-
- /* cycles:upp */
- evsel = evsel__next(evsel);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config",
- PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config);
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- /* use of precise requires exclude_guest */
- TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip == 2);
- TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
- TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
- TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
-
+ TEST_ASSERT_VAL("wrong number of entries",
+ evlist->core.nr_entries == (num_core_entries() * 2));
+ TEST_ASSERT_VAL("wrong number of groups",
+ evlist__nr_groups(evlist) == num_core_entries());
+
+ for (int i = 0; i < num_core_entries(); i++) {
+ /* instructions:k */
+ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel));
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS));
+ TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
+ TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
+ TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+
+ /* cycles:upp */
+ evsel = evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES));
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
+ /* use of precise requires exclude_guest */
+ TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip == 2);
+ TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
+ TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+ }
return TEST_OK;
}
static int test__group2(struct evlist *evlist)
{
- struct evsel *evsel, *leader;
+ struct evsel *evsel, *leader = NULL;
- TEST_ASSERT_VAL("wrong number of entries", 3 == evlist->core.nr_entries);
+ TEST_ASSERT_VAL("wrong number of entries",
+ evlist->core.nr_entries == (2 * num_core_entries() + 1));
+ /*
+ * TODO: Currently the software event won't be grouped with the hardware
+ * event except for 1 PMU.
+ */
TEST_ASSERT_VAL("wrong number of groups", 1 == evlist__nr_groups(evlist));
- /* faults + :ku modifier */
- evsel = leader = evlist__first(evlist);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config",
- PERF_COUNT_SW_PAGE_FAULTS == evsel->core.attr.config);
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
- TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
- TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
- TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
-
- /* cache-references + :u modifier */
- evsel = evsel__next(evsel);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config",
- PERF_COUNT_HW_CACHE_REFERENCES == evsel->core.attr.config);
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
- TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
- TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
-
- /* cycles:k */
- evsel = evsel__next(evsel);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config",
- PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config);
- TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
- TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
-
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->core.attr.type == PERF_TYPE_SOFTWARE) {
+ /* faults + :ku modifier */
+ leader = evsel;
+ TEST_ASSERT_VAL("wrong config",
+ test_config(evsel, PERF_COUNT_SW_PAGE_FAULTS));
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
+ TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
+ TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+ continue;
+ }
+ if (evsel->core.attr.type == PERF_TYPE_HARDWARE &&
+ test_config(evsel, PERF_COUNT_HW_CACHE_REFERENCES)) {
+ /* cache-references + :u modifier */
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
+ if (evsel__has_leader(evsel, leader))
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
+ TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+ continue;
+ }
+ /* cycles:k */
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES));
+ TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
+ TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+ }
return TEST_OK;
}
#ifdef HAVE_LIBTRACEEVENT
static int test__group3(struct evlist *evlist __maybe_unused)
{
- struct evsel *evsel, *leader;
+ struct evsel *evsel, *group1_leader = NULL, *group2_leader = NULL;
- TEST_ASSERT_VAL("wrong number of entries", 5 == evlist->core.nr_entries);
+ TEST_ASSERT_VAL("wrong number of entries",
+ evlist->core.nr_entries == (3 * perf_pmus__num_core_pmus() + 2));
+ /*
+ * Currently the software event won't be grouped with the hardware event
+ * except for 1 PMU. This means there are always just 2 groups
+ * regardless of the number of core PMUs.
+ */
TEST_ASSERT_VAL("wrong number of groups", 2 == evlist__nr_groups(evlist));
- /* group1 syscalls:sys_enter_openat:H */
- evsel = leader = evlist__first(evlist);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_TRACEPOINT == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong sample_type",
- PERF_TP_SAMPLE_TYPE == evsel->core.attr.sample_type);
- TEST_ASSERT_VAL("wrong sample_period", 1 == evsel->core.attr.sample_period);
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
- TEST_ASSERT_VAL("wrong group name",
- !strcmp(leader->group_name, "group1"));
- TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
- TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
- TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
-
- /* group1 cycles:kppp */
- evsel = evsel__next(evsel);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config",
- PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config);
- TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- /* use of precise requires exclude_guest */
- TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip == 3);
- TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
- TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
- TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
- TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
-
- /* group2 cycles + G modifier */
- evsel = leader = evsel__next(evsel);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config",
- PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config);
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
- TEST_ASSERT_VAL("wrong group name",
- !strcmp(leader->group_name, "group2"));
- TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
- TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
- TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
-
- /* group2 1:3 + G modifier */
- evsel = evsel__next(evsel);
- TEST_ASSERT_VAL("wrong type", 1 == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config", 3 == evsel->core.attr.config);
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
- TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
- TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
-
- /* instructions:u */
- evsel = evsel__next(evsel);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config",
- PERF_COUNT_HW_INSTRUCTIONS == evsel->core.attr.config);
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
- TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
-
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT) {
+ /* group1 syscalls:sys_enter_openat:H */
+ group1_leader = evsel;
+ TEST_ASSERT_VAL("wrong sample_type",
+ evsel->core.attr.sample_type == PERF_TP_SAMPLE_TYPE);
+ TEST_ASSERT_VAL("wrong sample_period", 1 == evsel->core.attr.sample_period);
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
+ TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong group name", !strcmp(evsel->group_name, "group1"));
+ TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
+ TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+ continue;
+ }
+ if (evsel->core.attr.type == PERF_TYPE_HARDWARE &&
+ test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)) {
+ if (evsel->core.attr.exclude_user) {
+ /* group1 cycles:kppp */
+ TEST_ASSERT_VAL("wrong exclude_user",
+ evsel->core.attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel",
+ !evsel->core.attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
+ /* use of precise requires exclude_guest */
+ TEST_ASSERT_VAL("wrong exclude guest",
+ evsel->core.attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host",
+ !evsel->core.attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip",
+ evsel->core.attr.precise_ip == 3);
+ if (evsel__has_leader(evsel, group1_leader)) {
+ TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
+ TEST_ASSERT_VAL("wrong group_idx",
+ evsel__group_idx(evsel) == 1);
+ }
+ TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+ } else {
+ /* group2 cycles + G modifier */
+ group2_leader = evsel;
+ TEST_ASSERT_VAL("wrong exclude_kernel",
+ !evsel->core.attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv",
+ !evsel->core.attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest",
+ !evsel->core.attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host",
+ evsel->core.attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
+ TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
+ if (evsel->core.nr_members == 2) {
+ TEST_ASSERT_VAL("wrong group_idx",
+ evsel__group_idx(evsel) == 0);
+ }
+ TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+ }
+ continue;
+ }
+ if (evsel->core.attr.type == 1) {
+ /* group2 1:3 + G modifier */
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, 3));
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
+ if (evsel__has_leader(evsel, group2_leader))
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
+ TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+ continue;
+ }
+ /* instructions:u */
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS));
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
+ TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+ }
return TEST_OK;
}
#endif
@@ -878,425 +1064,435 @@ static int test__group4(struct evlist *evlist __maybe_unused)
{
struct evsel *evsel, *leader;
- TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries);
- TEST_ASSERT_VAL("wrong number of groups", 1 == evlist__nr_groups(evlist));
-
- /* cycles:u + p */
- evsel = leader = evlist__first(evlist);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config",
- PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config);
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- /* use of precise requires exclude_guest */
- TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip == 1);
- TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
- TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
- TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
- TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
- TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
-
- /* instructions:kp + p */
- evsel = evsel__next(evsel);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config",
- PERF_COUNT_HW_INSTRUCTIONS == evsel->core.attr.config);
- TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- /* use of precise requires exclude_guest */
- TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip == 2);
- TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
- TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
- TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
-
+ TEST_ASSERT_VAL("wrong number of entries",
+ evlist->core.nr_entries == (num_core_entries() * 2));
+ TEST_ASSERT_VAL("wrong number of groups",
+ num_core_entries() == evlist__nr_groups(evlist));
+
+ for (int i = 0; i < num_core_entries(); i++) {
+ /* cycles:u + p */
+ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel));
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES));
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
+ /* use of precise requires exclude_guest */
+ TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip == 1);
+ TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
+ TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
+ TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+
+ /* instructions:kp + p */
+ evsel = evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS));
+ TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
+ /* use of precise requires exclude_guest */
+ TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip == 2);
+ TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
+ TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+ }
return TEST_OK;
}
static int test__group5(struct evlist *evlist __maybe_unused)
{
- struct evsel *evsel, *leader;
-
- TEST_ASSERT_VAL("wrong number of entries", 5 == evlist->core.nr_entries);
- TEST_ASSERT_VAL("wrong number of groups", 2 == evlist__nr_groups(evlist));
-
- /* cycles + G */
- evsel = leader = evlist__first(evlist);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config",
- PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config);
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
- TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
- TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
- TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
- TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
-
- /* instructions + G */
- evsel = evsel__next(evsel);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config",
- PERF_COUNT_HW_INSTRUCTIONS == evsel->core.attr.config);
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
- TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
- TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
-
- /* cycles:G */
- evsel = leader = evsel__next(evsel);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config",
- PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config);
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
- TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
- TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
- TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
- TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
-
- /* instructions:G */
- evsel = evsel__next(evsel);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config",
- PERF_COUNT_HW_INSTRUCTIONS == evsel->core.attr.config);
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
- TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
-
- /* cycles */
- evsel = evsel__next(evsel);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config",
- PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config);
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
-
+ struct evsel *evsel = NULL, *leader;
+
+ TEST_ASSERT_VAL("wrong number of entries",
+ evlist->core.nr_entries == (5 * num_core_entries()));
+ TEST_ASSERT_VAL("wrong number of groups",
+ evlist__nr_groups(evlist) == (2 * num_core_entries()));
+
+ for (int i = 0; i < num_core_entries(); i++) {
+ /* cycles + G */
+ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel));
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES));
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
+ TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
+ TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
+ TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+
+ /* instructions + G */
+ evsel = evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS));
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
+ TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
+ TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+ }
+ for (int i = 0; i < num_core_entries(); i++) {
+ /* cycles:G */
+ evsel = leader = evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES));
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
+ TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
+ TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
+ TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+
+ /* instructions:G */
+ evsel = evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS));
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
+ TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
+ }
+ for (int i = 0; i < num_core_entries(); i++) {
+ /* cycles */
+ evsel = evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES));
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
+ TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
+ }
return TEST_OK;
}
static int test__group_gh1(struct evlist *evlist)
{
- struct evsel *evsel, *leader;
-
- TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries);
- TEST_ASSERT_VAL("wrong number of groups", 1 == evlist__nr_groups(evlist));
-
- /* cycles + :H group modifier */
- evsel = leader = evlist__first(evlist);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config",
- PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config);
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
- TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
- TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
- TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
-
- /* cache-misses:G + :H group modifier */
- evsel = evsel__next(evsel);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config",
- PERF_COUNT_HW_CACHE_MISSES == evsel->core.attr.config);
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
- TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
-
+ struct evsel *evsel = NULL, *leader;
+
+ TEST_ASSERT_VAL("wrong number of entries",
+ evlist->core.nr_entries == (2 * num_core_entries()));
+ TEST_ASSERT_VAL("wrong number of groups",
+ evlist__nr_groups(evlist) == num_core_entries());
+
+ for (int i = 0; i < num_core_entries(); i++) {
+ /* cycles + :H group modifier */
+ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel));
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES));
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
+ TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
+ TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
+
+ /* cache-misses:G + :H group modifier */
+ evsel = evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CACHE_MISSES));
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
+ TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
+ }
return TEST_OK;
}
static int test__group_gh2(struct evlist *evlist)
{
- struct evsel *evsel, *leader;
-
- TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries);
- TEST_ASSERT_VAL("wrong number of groups", 1 == evlist__nr_groups(evlist));
-
- /* cycles + :G group modifier */
- evsel = leader = evlist__first(evlist);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config",
- PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config);
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
- TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
- TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
- TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
-
- /* cache-misses:H + :G group modifier */
- evsel = evsel__next(evsel);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config",
- PERF_COUNT_HW_CACHE_MISSES == evsel->core.attr.config);
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
- TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
-
+ struct evsel *evsel = NULL, *leader;
+
+ TEST_ASSERT_VAL("wrong number of entries",
+ evlist->core.nr_entries == (2 * num_core_entries()));
+ TEST_ASSERT_VAL("wrong number of groups",
+ evlist__nr_groups(evlist) == num_core_entries());
+
+ for (int i = 0; i < num_core_entries(); i++) {
+ /* cycles + :G group modifier */
+ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel));
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES));
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
+ TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
+ TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
+
+ /* cache-misses:H + :G group modifier */
+ evsel = evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CACHE_MISSES));
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
+ TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
+ }
return TEST_OK;
}
static int test__group_gh3(struct evlist *evlist)
{
- struct evsel *evsel, *leader;
-
- TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries);
- TEST_ASSERT_VAL("wrong number of groups", 1 == evlist__nr_groups(evlist));
+ struct evsel *evsel = NULL, *leader;
- /* cycles:G + :u group modifier */
- evsel = leader = evlist__first(evlist);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config",
- PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config);
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
- TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
- TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
- TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
-
- /* cache-misses:H + :u group modifier */
- evsel = evsel__next(evsel);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config",
- PERF_COUNT_HW_CACHE_MISSES == evsel->core.attr.config);
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
- TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
+ TEST_ASSERT_VAL("wrong number of entries",
+ evlist->core.nr_entries == (2 * num_core_entries()));
+ TEST_ASSERT_VAL("wrong number of groups",
+ evlist__nr_groups(evlist) == num_core_entries());
+ for (int i = 0; i < num_core_entries(); i++) {
+ /* cycles:G + :u group modifier */
+ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel));
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES));
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
+ TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
+ TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
+
+ /* cache-misses:H + :u group modifier */
+ evsel = evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CACHE_MISSES));
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
+ TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
+ }
return TEST_OK;
}
static int test__group_gh4(struct evlist *evlist)
{
- struct evsel *evsel, *leader;
+ struct evsel *evsel = NULL, *leader;
- TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries);
- TEST_ASSERT_VAL("wrong number of groups", 1 == evlist__nr_groups(evlist));
-
- /* cycles:G + :uG group modifier */
- evsel = leader = evlist__first(evlist);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config",
- PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config);
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
- TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
- TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
- TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
-
- /* cache-misses:H + :uG group modifier */
- evsel = evsel__next(evsel);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config",
- PERF_COUNT_HW_CACHE_MISSES == evsel->core.attr.config);
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
- TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
+ TEST_ASSERT_VAL("wrong number of entries",
+ evlist->core.nr_entries == (2 * num_core_entries()));
+ TEST_ASSERT_VAL("wrong number of groups",
+ evlist__nr_groups(evlist) == num_core_entries());
+ for (int i = 0; i < num_core_entries(); i++) {
+ /* cycles:G + :uG group modifier */
+ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel));
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES));
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
+ TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
+ TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
+
+ /* cache-misses:H + :uG group modifier */
+ evsel = evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CACHE_MISSES));
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
+ TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
+ }
return TEST_OK;
}
static int test__leader_sample1(struct evlist *evlist)
{
- struct evsel *evsel, *leader;
-
- TEST_ASSERT_VAL("wrong number of entries", 3 == evlist->core.nr_entries);
-
- /* cycles - sampling group leader */
- evsel = leader = evlist__first(evlist);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config",
- PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config);
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
- TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
- TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read);
-
- /* cache-misses - not sampling */
- evsel = evsel__next(evsel);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config",
- PERF_COUNT_HW_CACHE_MISSES == evsel->core.attr.config);
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
- TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read);
-
- /* branch-misses - not sampling */
- evsel = evsel__next(evsel);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config",
- PERF_COUNT_HW_BRANCH_MISSES == evsel->core.attr.config);
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
- TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
- TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read);
-
+ struct evsel *evsel = NULL, *leader;
+
+ TEST_ASSERT_VAL("wrong number of entries",
+ evlist->core.nr_entries == (3 * num_core_entries()));
+
+ for (int i = 0; i < num_core_entries(); i++) {
+ /* cycles - sampling group leader */
+ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel));
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES));
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
+ TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
+ TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
+ TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read);
+
+ /* cache-misses - not sampling */
+ evsel = evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CACHE_MISSES));
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
+ TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
+ TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read);
+
+ /* branch-misses - not sampling */
+ evsel = evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_BRANCH_MISSES));
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
+ TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
+ TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
+ TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read);
+ }
return TEST_OK;
}
static int test__leader_sample2(struct evlist *evlist __maybe_unused)
{
- struct evsel *evsel, *leader;
+ struct evsel *evsel = NULL, *leader;
- TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries);
-
- /* instructions - sampling group leader */
- evsel = leader = evlist__first(evlist);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config",
- PERF_COUNT_HW_INSTRUCTIONS == evsel->core.attr.config);
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
- TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
- TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read);
-
- /* branch-misses - not sampling */
- evsel = evsel__next(evsel);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config",
- PERF_COUNT_HW_BRANCH_MISSES == evsel->core.attr.config);
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest);
- TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
- TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
- TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
- TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read);
+ TEST_ASSERT_VAL("wrong number of entries",
+ evlist->core.nr_entries == (2 * num_core_entries()));
+ for (int i = 0; i < num_core_entries(); i++) {
+ /* instructions - sampling group leader */
+ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel));
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS));
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
+ TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
+ TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
+ TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read);
+
+ /* branch-misses - not sampling */
+ evsel = evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_BRANCH_MISSES));
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
+ TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
+ TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
+ TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read);
+ }
return TEST_OK;
}
static int test__checkevent_pinned_modifier(struct evlist *evlist)
{
- struct evsel *evsel = evlist__first(evlist);
+ struct evsel *evsel = NULL;
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
- TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong pinned", evsel->core.attr.pinned);
+ TEST_ASSERT_VAL("wrong number of entries",
+ evlist->core.nr_entries == num_core_entries());
+ for (int i = 0; i < num_core_entries(); i++) {
+ evsel = (i == 0 ? evlist__first(evlist) : evsel__next(evsel));
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip);
+ TEST_ASSERT_VAL("wrong pinned", evsel->core.attr.pinned);
+ }
return test__checkevent_symbolic_name(evlist);
}
static int test__pinned_group(struct evlist *evlist)
{
- struct evsel *evsel, *leader;
-
- TEST_ASSERT_VAL("wrong number of entries", 3 == evlist->core.nr_entries);
-
- /* cycles - group leader */
- evsel = leader = evlist__first(evlist);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config",
- PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config);
- TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
- TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
- TEST_ASSERT_VAL("wrong pinned", evsel->core.attr.pinned);
-
- /* cache-misses - can not be pinned, but will go on with the leader */
- evsel = evsel__next(evsel);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config",
- PERF_COUNT_HW_CACHE_MISSES == evsel->core.attr.config);
- TEST_ASSERT_VAL("wrong pinned", !evsel->core.attr.pinned);
-
- /* branch-misses - ditto */
- evsel = evsel__next(evsel);
- TEST_ASSERT_VAL("wrong config",
- PERF_COUNT_HW_BRANCH_MISSES == evsel->core.attr.config);
- TEST_ASSERT_VAL("wrong pinned", !evsel->core.attr.pinned);
-
+ struct evsel *evsel = NULL, *leader;
+
+ TEST_ASSERT_VAL("wrong number of entries",
+ evlist->core.nr_entries == (3 * num_core_entries()));
+
+ for (int i = 0; i < num_core_entries(); i++) {
+ /* cycles - group leader */
+ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel));
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES));
+ TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
+ TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
+ /* TODO: The group modifier is not copied to the split group leader. */
+ if (perf_pmus__num_core_pmus() == 1)
+ TEST_ASSERT_VAL("wrong pinned", evsel->core.attr.pinned);
+
+ /* cache-misses - can not be pinned, but will go on with the leader */
+ evsel = evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CACHE_MISSES));
+ TEST_ASSERT_VAL("wrong pinned", !evsel->core.attr.pinned);
+
+ /* branch-misses - ditto */
+ evsel = evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_BRANCH_MISSES));
+ TEST_ASSERT_VAL("wrong pinned", !evsel->core.attr.pinned);
+ }
return TEST_OK;
}
@@ -1315,32 +1511,33 @@ static int test__checkevent_exclusive_modifier(struct evlist *evlist)
static int test__exclusive_group(struct evlist *evlist)
{
- struct evsel *evsel, *leader;
-
- TEST_ASSERT_VAL("wrong number of entries", 3 == evlist->core.nr_entries);
-
- /* cycles - group leader */
- evsel = leader = evlist__first(evlist);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config",
- PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config);
- TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
- TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
- TEST_ASSERT_VAL("wrong exclusive", evsel->core.attr.exclusive);
-
- /* cache-misses - can not be pinned, but will go on with the leader */
- evsel = evsel__next(evsel);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config",
- PERF_COUNT_HW_CACHE_MISSES == evsel->core.attr.config);
- TEST_ASSERT_VAL("wrong exclusive", !evsel->core.attr.exclusive);
-
- /* branch-misses - ditto */
- evsel = evsel__next(evsel);
- TEST_ASSERT_VAL("wrong config",
- PERF_COUNT_HW_BRANCH_MISSES == evsel->core.attr.config);
- TEST_ASSERT_VAL("wrong exclusive", !evsel->core.attr.exclusive);
-
+ struct evsel *evsel = NULL, *leader;
+
+ TEST_ASSERT_VAL("wrong number of entries",
+ evlist->core.nr_entries == 3 * num_core_entries());
+
+ for (int i = 0; i < num_core_entries(); i++) {
+ /* cycles - group leader */
+ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel));
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES));
+ TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
+ TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
+ /* TODO: The group modifier is not copied to the split group leader. */
+ if (perf_pmus__num_core_pmus() == 1)
+ TEST_ASSERT_VAL("wrong exclusive", evsel->core.attr.exclusive);
+
+ /* cache-misses - can not be pinned, but will go on with the leader */
+ evsel = evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CACHE_MISSES));
+ TEST_ASSERT_VAL("wrong exclusive", !evsel->core.attr.exclusive);
+
+ /* branch-misses - ditto */
+ evsel = evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_BRANCH_MISSES));
+ TEST_ASSERT_VAL("wrong exclusive", !evsel->core.attr.exclusive);
+ }
return TEST_OK;
}
static int test__checkevent_breakpoint_len(struct evlist *evlist)
@@ -1349,7 +1546,7 @@ static int test__checkevent_breakpoint_len(struct evlist *evlist)
TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config", 0 == evsel->core.attr.config);
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, 0));
TEST_ASSERT_VAL("wrong bp_type", (HW_BREAKPOINT_R | HW_BREAKPOINT_W) ==
evsel->core.attr.bp_type);
TEST_ASSERT_VAL("wrong bp_len", HW_BREAKPOINT_LEN_1 ==
@@ -1364,7 +1561,7 @@ static int test__checkevent_breakpoint_len_w(struct evlist *evlist)
TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config", 0 == evsel->core.attr.config);
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, 0));
TEST_ASSERT_VAL("wrong bp_type", HW_BREAKPOINT_W ==
evsel->core.attr.bp_type);
TEST_ASSERT_VAL("wrong bp_len", HW_BREAKPOINT_LEN_2 ==
@@ -1390,10 +1587,10 @@ static int test__checkevent_precise_max_modifier(struct evlist *evlist)
{
struct evsel *evsel = evlist__first(evlist);
- TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries);
+ TEST_ASSERT_VAL("wrong number of entries",
+ evlist->core.nr_entries == 1 + num_core_entries());
TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config",
- PERF_COUNT_SW_TASK_CLOCK == evsel->core.attr.config);
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_SW_TASK_CLOCK));
return TEST_OK;
}
@@ -1426,12 +1623,27 @@ static int test__checkevent_config_cache(struct evlist *evlist)
struct evsel *evsel = evlist__first(evlist);
TEST_ASSERT_VAL("wrong name setting", evsel__name_is(evsel, "cachepmu"));
- return TEST_OK;
+ return test__checkevent_genhw(evlist);
+}
+
+static bool test__pmu_cpu_valid(void)
+{
+ return !!perf_pmus__find("cpu");
+}
+
+static bool test__pmu_cpu_event_valid(void)
+{
+ struct perf_pmu *pmu = perf_pmus__find("cpu");
+
+ if (!pmu)
+ return false;
+
+ return perf_pmu__has_format(pmu, "event");
}
static bool test__intel_pt_valid(void)
{
- return !!perf_pmu__find("intel_pt");
+ return !!perf_pmus__find("intel_pt");
}
static int test__intel_pt(struct evlist *evlist)
@@ -1446,7 +1658,9 @@ static int test__checkevent_complex_name(struct evlist *evlist)
{
struct evsel *evsel = evlist__first(evlist);
- TEST_ASSERT_VAL("wrong complex name parsing", evsel__name_is(evsel, "COMPLEX_CYCLES_NAME:orig=cycles,desc=chip-clock-ticks"));
+ TEST_ASSERT_VAL("wrong complex name parsing",
+ evsel__name_is(evsel,
+ "COMPLEX_CYCLES_NAME:orig=cycles,desc=chip-clock-ticks"));
return TEST_OK;
}
@@ -1456,7 +1670,7 @@ static int test__checkevent_raw_pmu(struct evlist *evlist)
TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config", 0x1a == evsel->core.attr.config);
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, 0x1a));
return TEST_OK;
}
@@ -1465,7 +1679,7 @@ static int test__sym_event_slash(struct evlist *evlist)
struct evsel *evsel = evlist__first(evlist);
TEST_ASSERT_VAL("wrong type", evsel->core.attr.type == PERF_TYPE_HARDWARE);
- TEST_ASSERT_VAL("wrong config", evsel->core.attr.config == PERF_COUNT_HW_CPU_CYCLES);
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES));
TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
return TEST_OK;
}
@@ -1475,11 +1689,31 @@ static int test__sym_event_dc(struct evlist *evlist)
struct evsel *evsel = evlist__first(evlist);
TEST_ASSERT_VAL("wrong type", evsel->core.attr.type == PERF_TYPE_HARDWARE);
- TEST_ASSERT_VAL("wrong config", evsel->core.attr.config == PERF_COUNT_HW_CPU_CYCLES);
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES));
TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user);
return TEST_OK;
}
+static int test__term_equal_term(struct evlist *evlist)
+{
+ struct evsel *evsel = evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong type", evsel->core.attr.type == PERF_TYPE_HARDWARE);
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES));
+ TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "name") == 0);
+ return TEST_OK;
+}
+
+static int test__term_equal_legacy(struct evlist *evlist)
+{
+ struct evsel *evsel = evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong type", evsel->core.attr.type == PERF_TYPE_HARDWARE);
+ TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES));
+ TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "l1d") == 0);
+ return TEST_OK;
+}
+
#ifdef HAVE_LIBTRACEEVENT
static int count_tracepoints(void)
{
@@ -1536,127 +1770,6 @@ static int test__all_tracepoints(struct evlist *evlist)
}
#endif /* HAVE_LIBTRACEVENT */
-static int test__hybrid_hw_event_with_pmu(struct evlist *evlist)
-{
- struct evsel *evsel = evlist__first(evlist);
-
- TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config", 0x3c == evsel->core.attr.config);
- return TEST_OK;
-}
-
-static int test__hybrid_hw_group_event(struct evlist *evlist)
-{
- struct evsel *evsel, *leader;
-
- evsel = leader = evlist__first(evlist);
- TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config", 0x3c == evsel->core.attr.config);
- TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
-
- evsel = evsel__next(evsel);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config", 0xc0 == evsel->core.attr.config);
- TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
- return TEST_OK;
-}
-
-static int test__hybrid_sw_hw_group_event(struct evlist *evlist)
-{
- struct evsel *evsel, *leader;
-
- evsel = leader = evlist__first(evlist);
- TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
-
- evsel = evsel__next(evsel);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config", 0x3c == evsel->core.attr.config);
- TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
- return TEST_OK;
-}
-
-static int test__hybrid_hw_sw_group_event(struct evlist *evlist)
-{
- struct evsel *evsel, *leader;
-
- evsel = leader = evlist__first(evlist);
- TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config", 0x3c == evsel->core.attr.config);
- TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
-
- evsel = evsel__next(evsel);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
- return TEST_OK;
-}
-
-static int test__hybrid_group_modifier1(struct evlist *evlist)
-{
- struct evsel *evsel, *leader;
-
- evsel = leader = evlist__first(evlist);
- TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config", 0x3c == evsel->core.attr.config);
- TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
- TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
-
- evsel = evsel__next(evsel);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config", 0xc0 == evsel->core.attr.config);
- TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
- TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
- TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
- return TEST_OK;
-}
-
-static int test__hybrid_raw1(struct evlist *evlist)
-{
- struct evsel *evsel = evlist__first(evlist);
-
- if (!perf_pmu__hybrid_mounted("cpu_atom")) {
- TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config", 0x1a == evsel->core.attr.config);
- return TEST_OK;
- }
-
- TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config", 0x1a == evsel->core.attr.config);
-
- /* The type of second event is randome value */
- evsel = evsel__next(evsel);
- TEST_ASSERT_VAL("wrong config", 0x1a == evsel->core.attr.config);
- return TEST_OK;
-}
-
-static int test__hybrid_raw2(struct evlist *evlist)
-{
- struct evsel *evsel = evlist__first(evlist);
-
- TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config", 0x1a == evsel->core.attr.config);
- return TEST_OK;
-}
-
-static int test__hybrid_cache_event(struct evlist *evlist)
-{
- struct evsel *evsel = evlist__first(evlist);
-
- TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
- TEST_ASSERT_VAL("wrong type", PERF_TYPE_HW_CACHE == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong config", 0x2 == (evsel->core.attr.config & 0xffffffff));
- return TEST_OK;
-}
-
struct evlist_test {
const char *name;
bool (*valid)(void);
@@ -1973,26 +2086,110 @@ static const struct evlist_test test__events[] = {
.check = test__exclusive_group,
/* 7 */
},
+ {
+ .name = "cycles/name=name/",
+ .check = test__term_equal_term,
+ /* 8 */
+ },
+ {
+ .name = "cycles/name=l1d/",
+ .check = test__term_equal_legacy,
+ /* 9 */
+ },
+ {
+ .name = "mem:0/name=breakpoint/",
+ .check = test__checkevent_breakpoint,
+ /* 0 */
+ },
+ {
+ .name = "mem:0:x/name=breakpoint/",
+ .check = test__checkevent_breakpoint_x,
+ /* 1 */
+ },
+ {
+ .name = "mem:0:r/name=breakpoint/",
+ .check = test__checkevent_breakpoint_r,
+ /* 2 */
+ },
+ {
+ .name = "mem:0:w/name=breakpoint/",
+ .check = test__checkevent_breakpoint_w,
+ /* 3 */
+ },
+ {
+ .name = "mem:0/name=breakpoint/u",
+ .check = test__checkevent_breakpoint_modifier_name,
+ /* 4 */
+ },
+ {
+ .name = "mem:0:x/name=breakpoint/k",
+ .check = test__checkevent_breakpoint_x_modifier_name,
+ /* 5 */
+ },
+ {
+ .name = "mem:0:r/name=breakpoint/hp",
+ .check = test__checkevent_breakpoint_r_modifier_name,
+ /* 6 */
+ },
+ {
+ .name = "mem:0:w/name=breakpoint/up",
+ .check = test__checkevent_breakpoint_w_modifier_name,
+ /* 7 */
+ },
+ {
+ .name = "mem:0:rw/name=breakpoint/",
+ .check = test__checkevent_breakpoint_rw,
+ /* 8 */
+ },
+ {
+ .name = "mem:0:rw/name=breakpoint/kp",
+ .check = test__checkevent_breakpoint_rw_modifier_name,
+ /* 9 */
+ },
+ {
+ .name = "mem:0/1/name=breakpoint/",
+ .check = test__checkevent_breakpoint_len,
+ /* 0 */
+ },
+ {
+ .name = "mem:0/2:w/name=breakpoint/",
+ .check = test__checkevent_breakpoint_len_w,
+ /* 1 */
+ },
+ {
+ .name = "mem:0/4:rw/name=breakpoint/u",
+ .check = test__checkevent_breakpoint_len_rw_modifier,
+ /* 2 */
+ },
+ {
+ .name = "mem:0/1/name=breakpoint1/,mem:0/4:rw/name=breakpoint2/",
+ .check = test__checkevent_breakpoint_2_events,
+ /* 3 */
+ },
};
static const struct evlist_test test__events_pmu[] = {
{
.name = "cpu/config=10,config1,config2=3,period=1000/u",
+ .valid = test__pmu_cpu_valid,
.check = test__checkevent_pmu,
/* 0 */
},
{
.name = "cpu/config=1,name=krava/u,cpu/config=2/u",
+ .valid = test__pmu_cpu_valid,
.check = test__checkevent_pmu_name,
/* 1 */
},
{
.name = "cpu/config=1,call-graph=fp,time,period=100000/,cpu/config=2,call-graph=no,time=0,period=2000/",
+ .valid = test__pmu_cpu_valid,
.check = test__checkevent_pmu_partial_time_callgraph,
/* 2 */
},
{
.name = "cpu/name='COMPLEX_CYCLES_NAME:orig=cycles,desc=chip-clock-ticks',period=0x1,event=0x2/ukp",
+ .valid = test__pmu_cpu_event_valid,
.check = test__checkevent_complex_name,
/* 3 */
},
@@ -2006,66 +2203,174 @@ static const struct evlist_test test__events_pmu[] = {
.check = test__checkevent_raw_pmu,
/* 5 */
},
-};
-
-struct terms_test {
- const char *str;
- int (*check)(struct list_head *terms);
-};
-
-static const struct terms_test test__terms[] = {
- [0] = {
- .str = "config=10,config1,config2=3,config3=4,umask=1,read,r0xead",
- .check = test__checkterms_simple,
+ {
+ .name = "cpu/L1-dcache-load-miss/",
+ .valid = test__pmu_cpu_valid,
+ .check = test__checkevent_genhw,
+ /* 6 */
},
-};
-
-static const struct evlist_test test__hybrid_events[] = {
{
- .name = "cpu_core/cpu-cycles/",
- .check = test__hybrid_hw_event_with_pmu,
+ .name = "cpu/L1-dcache-load-miss/kp",
+ .valid = test__pmu_cpu_valid,
+ .check = test__checkevent_genhw_modifier,
+ /* 7 */
+ },
+ {
+ .name = "cpu/L1-dcache-misses,name=cachepmu/",
+ .valid = test__pmu_cpu_valid,
+ .check = test__checkevent_config_cache,
+ /* 8 */
+ },
+ {
+ .name = "cpu/instructions/",
+ .valid = test__pmu_cpu_valid,
+ .check = test__checkevent_symbolic_name,
+ /* 9 */
+ },
+ {
+ .name = "cpu/cycles,period=100000,config2/",
+ .valid = test__pmu_cpu_valid,
+ .check = test__checkevent_symbolic_name_config,
/* 0 */
},
{
- .name = "{cpu_core/cpu-cycles/,cpu_core/instructions/}",
- .check = test__hybrid_hw_group_event,
+ .name = "cpu/instructions/h",
+ .valid = test__pmu_cpu_valid,
+ .check = test__checkevent_symbolic_name_modifier,
/* 1 */
},
{
- .name = "{cpu-clock,cpu_core/cpu-cycles/}",
- .check = test__hybrid_sw_hw_group_event,
+ .name = "cpu/instructions/G",
+ .valid = test__pmu_cpu_valid,
+ .check = test__checkevent_exclude_host_modifier,
/* 2 */
},
{
- .name = "{cpu_core/cpu-cycles/,cpu-clock}",
- .check = test__hybrid_hw_sw_group_event,
+ .name = "cpu/instructions/H",
+ .valid = test__pmu_cpu_valid,
+ .check = test__checkevent_exclude_guest_modifier,
/* 3 */
},
{
- .name = "{cpu_core/cpu-cycles/k,cpu_core/instructions/u}",
- .check = test__hybrid_group_modifier1,
+ .name = "{cpu/instructions/k,cpu/cycles/upp}",
+ .valid = test__pmu_cpu_valid,
+ .check = test__group1,
/* 4 */
},
{
- .name = "r1a",
- .check = test__hybrid_raw1,
+ .name = "{cpu/cycles/u,cpu/instructions/kp}:p",
+ .valid = test__pmu_cpu_valid,
+ .check = test__group4,
/* 5 */
},
{
- .name = "cpu_core/r1a/",
- .check = test__hybrid_raw2,
+ .name = "{cpu/cycles/,cpu/cache-misses/G}:H",
+ .valid = test__pmu_cpu_valid,
+ .check = test__group_gh1,
/* 6 */
},
{
- .name = "cpu_core/config=10,config1,config2=3,period=1000/u",
- .check = test__checkevent_pmu,
+ .name = "{cpu/cycles/,cpu/cache-misses/H}:G",
+ .valid = test__pmu_cpu_valid,
+ .check = test__group_gh2,
/* 7 */
},
{
- .name = "cpu_core/LLC-loads/",
- .check = test__hybrid_cache_event,
+ .name = "{cpu/cycles/G,cpu/cache-misses/H}:u",
+ .valid = test__pmu_cpu_valid,
+ .check = test__group_gh3,
/* 8 */
},
+ {
+ .name = "{cpu/cycles/G,cpu/cache-misses/H}:uG",
+ .valid = test__pmu_cpu_valid,
+ .check = test__group_gh4,
+ /* 9 */
+ },
+ {
+ .name = "{cpu/cycles/,cpu/cache-misses/,cpu/branch-misses/}:S",
+ .valid = test__pmu_cpu_valid,
+ .check = test__leader_sample1,
+ /* 0 */
+ },
+ {
+ .name = "{cpu/instructions/,cpu/branch-misses/}:Su",
+ .valid = test__pmu_cpu_valid,
+ .check = test__leader_sample2,
+ /* 1 */
+ },
+ {
+ .name = "cpu/instructions/uDp",
+ .valid = test__pmu_cpu_valid,
+ .check = test__checkevent_pinned_modifier,
+ /* 2 */
+ },
+ {
+ .name = "{cpu/cycles/,cpu/cache-misses/,cpu/branch-misses/}:D",
+ .valid = test__pmu_cpu_valid,
+ .check = test__pinned_group,
+ /* 3 */
+ },
+ {
+ .name = "cpu/instructions/I",
+ .valid = test__pmu_cpu_valid,
+ .check = test__checkevent_exclude_idle_modifier,
+ /* 4 */
+ },
+ {
+ .name = "cpu/instructions/kIG",
+ .valid = test__pmu_cpu_valid,
+ .check = test__checkevent_exclude_idle_modifier_1,
+ /* 5 */
+ },
+ {
+ .name = "cpu/cycles/u",
+ .valid = test__pmu_cpu_valid,
+ .check = test__sym_event_slash,
+ /* 6 */
+ },
+ {
+ .name = "cpu/cycles/k",
+ .valid = test__pmu_cpu_valid,
+ .check = test__sym_event_dc,
+ /* 7 */
+ },
+ {
+ .name = "cpu/instructions/uep",
+ .valid = test__pmu_cpu_valid,
+ .check = test__checkevent_exclusive_modifier,
+ /* 8 */
+ },
+ {
+ .name = "{cpu/cycles/,cpu/cache-misses/,cpu/branch-misses/}:e",
+ .valid = test__pmu_cpu_valid,
+ .check = test__exclusive_group,
+ /* 9 */
+ },
+ {
+ .name = "cpu/cycles,name=name/",
+ .valid = test__pmu_cpu_valid,
+ .check = test__term_equal_term,
+ /* 0 */
+ },
+ {
+ .name = "cpu/cycles,name=l1d/",
+ .valid = test__pmu_cpu_valid,
+ .check = test__term_equal_legacy,
+ /* 1 */
+ },
+};
+
+struct terms_test {
+ const char *str;
+ int (*check)(struct list_head *terms);
+};
+
+static const struct terms_test test__terms[] = {
+ [0] = {
+ .str = "config=10,config1,config2=3,config3=4,umask=1,read,r0xead",
+ .check = test__checkterms_simple,
+ },
};
static int test_event(const struct evlist_test *e)
@@ -2091,7 +2396,7 @@ static int test_event(const struct evlist_test *e)
e->name, ret, err.str);
parse_events_error__print(&err, e->name);
ret = TEST_FAIL;
- if (strstr(err.str, "can't access trace events"))
+ if (err.str && strstr(err.str, "can't access trace events"))
ret = TEST_SKIP;
} else {
ret = e->check(evlist);
@@ -2113,8 +2418,8 @@ static int test_event_fake_pmu(const char *str)
return -ENOMEM;
parse_events_error__init(&err);
- perf_pmu__test_parse_init();
- ret = __parse_events(evlist, str, &err, &perf_pmu__fake, /*warn_if_reordered=*/true);
+ ret = __parse_events(evlist, str, /*pmu_filter=*/NULL, &err,
+ &perf_pmu__fake, /*warn_if_reordered=*/true);
if (ret) {
pr_debug("failed to parse event '%s', err %d, str '%s'\n",
str, ret, err.str);
@@ -2167,13 +2472,6 @@ static int test_term(const struct terms_test *t)
INIT_LIST_HEAD(&terms);
- /*
- * The perf_pmu__test_parse_init prepares perf_pmu_events_list
- * which gets freed in parse_events_terms.
- */
- if (perf_pmu__test_parse_init())
- return -1;
-
ret = parse_events_terms(&terms, t->str);
if (ret) {
pr_debug("failed to parse terms '%s', err %d\n",
@@ -2208,99 +2506,88 @@ static int test__terms2(struct test_suite *test __maybe_unused, int subtest __ma
return test_terms(test__terms, ARRAY_SIZE(test__terms));
}
-static int test_pmu(void)
+static int test__pmu_events(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
{
- struct stat st;
- char path[PATH_MAX];
- int ret;
+ struct perf_pmu *pmu = NULL;
+ int ret = TEST_OK;
- snprintf(path, PATH_MAX, "%s/bus/event_source/devices/cpu/format/",
- sysfs__mountpoint());
+ while ((pmu = perf_pmus__scan(pmu)) != NULL) {
+ struct stat st;
+ char path[PATH_MAX];
+ struct dirent *ent;
+ DIR *dir;
+ int err;
- ret = stat(path, &st);
- if (ret)
- pr_debug("omitting PMU cpu tests\n");
- return !ret;
-}
+ snprintf(path, PATH_MAX, "%s/bus/event_source/devices/%s/events/",
+ sysfs__mountpoint(), pmu->name);
-static int test__pmu_events(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
-{
- struct stat st;
- char path[PATH_MAX];
- struct dirent *ent;
- DIR *dir;
- int ret;
+ err = stat(path, &st);
+ if (err) {
+ pr_debug("skipping PMU %s events tests: %s\n", pmu->name, path);
+ continue;
+ }
- if (!test_pmu())
- return TEST_SKIP;
+ dir = opendir(path);
+ if (!dir) {
+ pr_debug("can't open pmu event dir: %s\n", path);
+ ret = combine_test_results(ret, TEST_SKIP);
+ continue;
+ }
- snprintf(path, PATH_MAX, "%s/bus/event_source/devices/cpu/events/",
- sysfs__mountpoint());
+ while ((ent = readdir(dir))) {
+ struct evlist_test e = { .name = NULL, };
+ char name[2 * NAME_MAX + 1 + 12 + 3];
+ int test_ret;
- ret = stat(path, &st);
- if (ret) {
- pr_debug("omitting PMU cpu events tests: %s\n", path);
- return TEST_OK;
- }
+ /* Names containing . are special and cannot be used directly */
+ if (strchr(ent->d_name, '.'))
+ continue;
- dir = opendir(path);
- if (!dir) {
- pr_debug("can't open pmu event dir: %s\n", path);
- return TEST_FAIL;
- }
+ snprintf(name, sizeof(name), "%s/event=%s/u", pmu->name, ent->d_name);
- ret = TEST_OK;
- while ((ent = readdir(dir))) {
- struct evlist_test e = { .name = NULL, };
- char name[2 * NAME_MAX + 1 + 12 + 3];
- int test_ret;
+ e.name = name;
+ e.check = test__checkevent_pmu_events;
- /* Names containing . are special and cannot be used directly */
- if (strchr(ent->d_name, '.'))
- continue;
+ test_ret = test_event(&e);
+ if (test_ret != TEST_OK) {
+ pr_debug("Test PMU event failed for '%s'", name);
+ ret = combine_test_results(ret, test_ret);
+ }
- snprintf(name, sizeof(name), "cpu/event=%s/u", ent->d_name);
+ if (!is_pmu_core(pmu->name))
+ continue;
- e.name = name;
- e.check = test__checkevent_pmu_events;
+ /*
+ * Names containing '-' are recognized as prefixes and suffixes
+ * due to '-' being a legacy PMU separator. This fails when the
+ * prefix or suffix collides with an existing legacy token. For
+ * example, branch-brs has a prefix (branch) that collides with
+ * a PE_NAME_CACHE_TYPE token causing a parse error as a suffix
+ * isn't expected after this. As event names in the config
+ * slashes are allowed a '-' in the name we check this works
+ * above.
+ */
+ if (strchr(ent->d_name, '-'))
+ continue;
- test_ret = test_event(&e);
- if (test_ret != TEST_OK) {
- pr_debug("Test PMU event failed for '%s'", name);
- ret = combine_test_results(ret, test_ret);
+ snprintf(name, sizeof(name), "%s:u,%s/event=%s/u",
+ ent->d_name, pmu->name, ent->d_name);
+ e.name = name;
+ e.check = test__checkevent_pmu_events_mix;
+ test_ret = test_event(&e);
+ if (test_ret != TEST_OK) {
+ pr_debug("Test PMU event failed for '%s'", name);
+ ret = combine_test_results(ret, test_ret);
+ }
}
- /*
- * Names containing '-' are recognized as prefixes and suffixes
- * due to '-' being a legacy PMU separator. This fails when the
- * prefix or suffix collides with an existing legacy token. For
- * example, branch-brs has a prefix (branch) that collides with
- * a PE_NAME_CACHE_TYPE token causing a parse error as a suffix
- * isn't expected after this. As event names in the config
- * slashes are allowed a '-' in the name we check this works
- * above.
- */
- if (strchr(ent->d_name, '-'))
- continue;
- snprintf(name, sizeof(name), "%s:u,cpu/event=%s/u", ent->d_name, ent->d_name);
- e.name = name;
- e.check = test__checkevent_pmu_events_mix;
- test_ret = test_event(&e);
- if (test_ret != TEST_OK) {
- pr_debug("Test PMU event failed for '%s'", name);
- ret = combine_test_results(ret, test_ret);
- }
+ closedir(dir);
}
-
- closedir(dir);
return ret;
}
static int test__pmu_events2(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
{
- if (!test_pmu())
- return TEST_SKIP;
-
return test_events(test__events_pmu, ARRAY_SIZE(test__events_pmu));
}
@@ -2362,14 +2649,6 @@ static bool test_alias(char **event, char **alias)
return false;
}
-static int test__hybrid(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
-{
- if (!perf_pmu__has_hybrid())
- return TEST_SKIP;
-
- return test_events(test__hybrid_events, ARRAY_SIZE(test__hybrid_events));
-}
-
static int test__checkevent_pmu_events_alias(struct evlist *evlist)
{
struct evsel *evsel1 = evlist__first(evlist);
@@ -2433,9 +2712,6 @@ static struct test_case tests__parse_events[] = {
TEST_CASE_REASON("Test event parsing",
events2,
"permissions"),
- TEST_CASE_REASON("Test parsing of \"hybrid\" CPU events",
- hybrid,
- "not hybrid"),
TEST_CASE_REASON("Parsing of all PMU events from sysfs",
pmu_events,
"permissions"),
diff --git a/tools/perf/tests/parse-metric.c b/tools/perf/tests/parse-metric.c
index c05148ea400c..2c28fb50dc24 100644
--- a/tools/perf/tests/parse-metric.c
+++ b/tools/perf/tests/parse-metric.c
@@ -11,7 +11,7 @@
#include "debug.h"
#include "expr.h"
#include "stat.h"
-#include "pmu.h"
+#include "pmus.h"
struct value {
const char *event;
@@ -302,11 +302,8 @@ static int test__parse_metric(struct test_suite *test __maybe_unused, int subtes
TEST_ASSERT_VAL("DCache_L2 failed", test_dcache_l2() == 0);
TEST_ASSERT_VAL("recursion fail failed", test_recursion_fail() == 0);
TEST_ASSERT_VAL("Memory bandwidth", test_memory_bandwidth() == 0);
-
- if (!perf_pmu__has_hybrid()) {
- TEST_ASSERT_VAL("cache_miss_cycles failed", test_cache_miss_cycles() == 0);
- TEST_ASSERT_VAL("test metric group", test_metric_group() == 0);
- }
+ TEST_ASSERT_VAL("cache_miss_cycles failed", test_cache_miss_cycles() == 0);
+ TEST_ASSERT_VAL("test metric group", test_metric_group() == 0);
return 0;
}
diff --git a/tools/perf/tests/pe-file-parsing.c b/tools/perf/tests/pe-file-parsing.c
index c09a9fae1689..fff58b220c07 100644
--- a/tools/perf/tests/pe-file-parsing.c
+++ b/tools/perf/tests/pe-file-parsing.c
@@ -34,6 +34,7 @@ static int run_dir(const char *d)
struct dso *dso;
struct symbol *sym;
int ret;
+ size_t idx;
scnprintf(filename, PATH_MAX, "%s/pe-file.exe", d);
ret = filename__read_build_id(filename, &bid);
@@ -61,7 +62,7 @@ static int run_dir(const char *d)
TEST_ASSERT_VAL("Failed to load symbols", ret == 0);
dso__sort_by_name(dso);
- sym = dso__find_symbol_by_name(dso, "main");
+ sym = dso__find_symbol_by_name(dso, "main", &idx);
TEST_ASSERT_VAL("Failed to find main", sym);
dso__delete(dso);
diff --git a/tools/perf/tests/perf-targz-src-pkg b/tools/perf/tests/perf-targz-src-pkg
index fae26b1cf08f..b3075c168cb2 100755
--- a/tools/perf/tests/perf-targz-src-pkg
+++ b/tools/perf/tests/perf-targz-src-pkg
@@ -7,16 +7,17 @@
# be in such tarball, which sometimes gets broken when we move files around,
# like when we made some files that were in tools/perf/ available to other tools/
# codebases by moving it to tools/include/, etc.
+set -e
PERF=$1
cd ${PERF}/../..
-make perf-targz-src-pkg > /dev/null
+make perf-targz-src-pkg
TARBALL=$(ls -rt perf-*.tar.gz)
TMP_DEST=$(mktemp -d)
tar xf ${TARBALL} -C $TMP_DEST
rm -f ${TARBALL}
cd - > /dev/null
-make -C $TMP_DEST/perf*/tools/perf > /dev/null
+make -C $TMP_DEST/perf*/tools/perf
RC=$?
rm -rf ${TMP_DEST}
exit $RC
diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c
index 1dff863b9711..64383fc34ef1 100644
--- a/tools/perf/tests/pmu-events.c
+++ b/tools/perf/tests/pmu-events.c
@@ -2,6 +2,7 @@
#include "math.h"
#include "parse-events.h"
#include "pmu.h"
+#include "pmus.h"
#include "tests.h"
#include <errno.h>
#include <stdio.h>
@@ -708,12 +709,9 @@ static int test__aliases(struct test_suite *test __maybe_unused,
struct perf_pmu *pmu = NULL;
unsigned long i;
- while ((pmu = perf_pmu__scan(pmu)) != NULL) {
+ while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
int count = 0;
- if (!is_pmu_core(pmu->name))
- continue;
-
if (list_empty(&pmu->format)) {
pr_debug2("skipping testing core PMU %s\n", pmu->name);
continue;
@@ -776,16 +774,8 @@ static int check_parse_id(const char *id, struct parse_events_error *error,
for (cur = strchr(dup, '@') ; cur; cur = strchr(++cur, '@'))
*cur = '/';
- if (fake_pmu) {
- /*
- * Every call to __parse_events will try to initialize the PMU
- * state from sysfs and then clean it up at the end. Reset the
- * PMU events to the test state so that we don't pick up
- * erroneous prefixes and suffixes.
- */
- perf_pmu__test_parse_init();
- }
- ret = __parse_events(evlist, dup, error, fake_pmu, /*warn_if_reordered=*/true);
+ ret = __parse_events(evlist, dup, /*pmu_filter=*/NULL, error, fake_pmu,
+ /*warn_if_reordered=*/true);
free(dup);
evlist__delete(evlist);
diff --git a/tools/perf/tests/pmu.c b/tools/perf/tests/pmu.c
index 3cf25f883df7..a4452639a3d4 100644
--- a/tools/perf/tests/pmu.c
+++ b/tools/perf/tests/pmu.c
@@ -86,17 +86,16 @@ static struct parse_events_term test_terms[] = {
* Prepare format directory data, exported by kernel
* at /sys/bus/event_source/devices/<dev>/format.
*/
-static char *test_format_dir_get(void)
+static char *test_format_dir_get(char *dir, size_t sz)
{
- static char dir[PATH_MAX];
unsigned int i;
- snprintf(dir, PATH_MAX, "/tmp/perf-pmu-test-format-XXXXXX");
+ snprintf(dir, sz, "/tmp/perf-pmu-test-format-XXXXXX");
if (!mkdtemp(dir))
return NULL;
for (i = 0; i < ARRAY_SIZE(test_formats); i++) {
- static char name[PATH_MAX];
+ char name[PATH_MAX];
struct test_format *format = &test_formats[i];
FILE *file;
@@ -118,12 +117,13 @@ static char *test_format_dir_get(void)
/* Cleanup format directory. */
static int test_format_dir_put(char *dir)
{
- char buf[PATH_MAX];
- snprintf(buf, PATH_MAX, "rm -f %s/*\n", dir);
+ char buf[PATH_MAX + 20];
+
+ snprintf(buf, sizeof(buf), "rm -f %s/*\n", dir);
if (system(buf))
return -1;
- snprintf(buf, PATH_MAX, "rmdir %s\n", dir);
+ snprintf(buf, sizeof(buf), "rmdir %s\n", dir);
return system(buf);
}
@@ -140,7 +140,8 @@ static struct list_head *test_terms_list(void)
static int test__pmu(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
{
- char *format = test_format_dir_get();
+ char dir[PATH_MAX];
+ char *format = test_format_dir_get(dir, sizeof(dir));
LIST_HEAD(formats);
struct list_head *terms = test_terms_list();
int ret;
diff --git a/tools/perf/tests/python-use.c b/tools/perf/tests/python-use.c
index 6b990ee38575..0ebc22ac8d5b 100644
--- a/tools/perf/tests/python-use.c
+++ b/tools/perf/tests/python-use.c
@@ -14,7 +14,7 @@ static int test__python_use(struct test_suite *test __maybe_unused, int subtest
char *cmd;
int ret;
- if (asprintf(&cmd, "echo \"import sys ; sys.path.append('%s'); import perf\" | %s %s",
+ if (asprintf(&cmd, "echo \"import sys ; sys.path.insert(0, '%s'); import perf\" | %s %s",
PYTHONPATH, PYTHON, verbose > 0 ? "" : "2> /dev/null") < 0)
return -1;
diff --git a/tools/perf/tests/shell/buildid.sh b/tools/perf/tests/shell/buildid.sh
index 0ce22ea0a7f1..3383ca3399d4 100755
--- a/tools/perf/tests/shell/buildid.sh
+++ b/tools/perf/tests/shell/buildid.sh
@@ -83,12 +83,12 @@ check()
# in case of pe-file.exe file
echo $1 | grep ".exe"
if [ $? -eq 0 ]; then
- if [ -x $1 -a ! -x $file ]; then
+ if [ -x $1 ] && [ ! -x $file ]; then
echo "failed: file ${file} executable does not exist"
exit 1
fi
- if [ ! -x $file -a ! -e $file ]; then
+ if [ ! -x $file ] && [ ! -e $file ]; then
echo "failed: file ${file} does not exist"
exit 1
fi
@@ -136,10 +136,10 @@ test_record()
log_err=$(mktemp /tmp/perf.log.err.XXX)
perf="perf --buildid-dir ${build_id_dir}"
- echo "running: perf record $@"
- ${perf} record --buildid-all -o ${data} $@ 1>${log_out} 2>${log_err}
+ echo "running: perf record $*"
+ ${perf} record --buildid-all -o ${data} "$@" 1>${log_out} 2>${log_err}
if [ $? -ne 0 ]; then
- echo "failed: record $@"
+ echo "failed: record $*"
echo "see log: ${log_err}"
exit 1
fi
@@ -172,4 +172,4 @@ if [ ${run_pe} -eq 1 ]; then
rm -r ${wineprefix}
fi
-exit ${err}
+exit 0
diff --git a/tools/perf/tests/shell/daemon.sh b/tools/perf/tests/shell/daemon.sh
index 45fc24af5b07..4c598cfc5afa 100755
--- a/tools/perf/tests/shell/daemon.sh
+++ b/tools/perf/tests/shell/daemon.sh
@@ -11,11 +11,16 @@ check_line_first()
local lock=$5
local up=$6
- local line_name=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $2 }'`
- local line_base=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $3 }'`
- local line_output=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $4 }'`
- local line_lock=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $5 }'`
- local line_up=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $6 }'`
+ local line_name
+ line_name=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $2 }'`
+ local line_base
+ line_base=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $3 }'`
+ local line_output
+ line_output=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $4 }'`
+ local line_lock
+ line_lock=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $5 }'`
+ local line_up
+ line_up=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $6 }'`
if [ "${name}" != "${line_name}" ]; then
echo "FAILED: wrong name"
@@ -54,13 +59,20 @@ check_line_other()
local ack=$7
local up=$8
- local line_name=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $2 }'`
- local line_run=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $3 }'`
- local line_base=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $4 }'`
- local line_output=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $5 }'`
- local line_control=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $6 }'`
- local line_ack=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $7 }'`
- local line_up=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $8 }'`
+ local line_name
+ line_name=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $2 }'`
+ local line_run
+ line_run=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $3 }'`
+ local line_base
+ line_base=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $4 }'`
+ local line_output
+ line_output=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $5 }'`
+ local line_control
+ line_control=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $6 }'`
+ local line_ack
+ line_ack=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $7 }'`
+ local line_up
+ line_up=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $8 }'`
if [ "${name}" != "${line_name}" ]; then
echo "FAILED: wrong name"
@@ -102,8 +114,10 @@ daemon_exit()
{
local config=$1
- local line=`perf daemon --config ${config} -x: | head -1`
- local pid=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $1 }'`
+ local line
+ line=`perf daemon --config ${config} -x: | head -1`
+ local pid
+ pid=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $1 }'`
# Reset trap handler.
trap - SIGINT SIGTERM
@@ -123,7 +137,7 @@ daemon_start()
perf daemon start --config ${config}
# Clean up daemon if interrupted.
- trap "echo 'FAILED: Signal caught'; daemon_exit ${config}; exit 1" SIGINT SIGTERM
+ trap 'echo "FAILED: Signal caught"; daemon_exit "${config}"; exit 1' SIGINT SIGTERM
# wait for the session to ping
local state="FAIL"
@@ -144,8 +158,10 @@ test_list()
{
echo "test daemon list"
- local config=$(mktemp /tmp/perf.daemon.config.XXX)
- local base=$(mktemp -d /tmp/perf.daemon.base.XXX)
+ local config
+ config=$(mktemp /tmp/perf.daemon.config.XXX)
+ local base
+ base=$(mktemp -d /tmp/perf.daemon.base.XXX)
cat <<EOF > ${config}
[daemon]
@@ -165,19 +181,22 @@ EOF
# check first line
# pid:daemon:base:base/output:base/lock
- local line=`perf daemon --config ${config} -x: | head -1`
+ local line
+ line=`perf daemon --config ${config} -x: | head -1`
check_line_first ${line} daemon ${base} ${base}/output ${base}/lock "0"
# check 1st session
# pid:size:-e cpu-clock:base/size:base/size/output:base/size/control:base/size/ack:0
- local line=`perf daemon --config ${config} -x: | head -2 | tail -1`
+ local line
+ line=`perf daemon --config ${config} -x: | head -2 | tail -1`
check_line_other "${line}" size "-e cpu-clock -m 1 sleep 10" ${base}/session-size \
${base}/session-size/output ${base}/session-size/control \
${base}/session-size/ack "0"
# check 2nd session
# pid:time:-e task-clock:base/time:base/time/output:base/time/control:base/time/ack:0
- local line=`perf daemon --config ${config} -x: | head -3 | tail -1`
+ local line
+ line=`perf daemon --config ${config} -x: | head -3 | tail -1`
check_line_other "${line}" time "-e task-clock -m 1 sleep 10" ${base}/session-time \
${base}/session-time/output ${base}/session-time/control \
${base}/session-time/ack "0"
@@ -193,8 +212,10 @@ test_reconfig()
{
echo "test daemon reconfig"
- local config=$(mktemp /tmp/perf.daemon.config.XXX)
- local base=$(mktemp -d /tmp/perf.daemon.base.XXX)
+ local config
+ config=$(mktemp /tmp/perf.daemon.config.XXX)
+ local base
+ base=$(mktemp -d /tmp/perf.daemon.base.XXX)
# prepare config
cat <<EOF > ${config}
@@ -215,10 +236,12 @@ EOF
# check 2nd session
# pid:time:-e task-clock:base/time:base/time/output:base/time/control:base/time/ack:0
- local line=`perf daemon --config ${config} -x: | head -3 | tail -1`
+ local line
+ line=`perf daemon --config ${config} -x: | head -3 | tail -1`
check_line_other "${line}" time "-e task-clock -m 1 sleep 10" ${base}/session-time \
${base}/session-time/output ${base}/session-time/control ${base}/session-time/ack "0"
- local pid=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $1 }'`
+ local pid
+ pid=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $1 }'`
# prepare new config
local config_new=${config}.new
@@ -249,7 +272,8 @@ EOF
# check reconfigured 2nd session
# pid:time:-e task-clock:base/time:base/time/output:base/time/control:base/time/ack:0
- local line=`perf daemon --config ${config} -x: | head -3 | tail -1`
+ local line
+ line=`perf daemon --config ${config} -x: | head -3 | tail -1`
check_line_other "${line}" time "-e cpu-clock -m 1 sleep 10" ${base}/session-time \
${base}/session-time/output ${base}/session-time/control ${base}/session-time/ack "0"
@@ -276,7 +300,8 @@ EOF
state=`perf daemon ping --config ${config} --session size | awk '{ print $1 }'`
done
- local one=`perf daemon --config ${config} -x: | wc -l`
+ local one
+ one=`perf daemon --config ${config} -x: | wc -l`
if [ ${one} -ne "1" ]; then
echo "FAILED: wrong list output"
@@ -312,8 +337,10 @@ test_stop()
{
echo "test daemon stop"
- local config=$(mktemp /tmp/perf.daemon.config.XXX)
- local base=$(mktemp -d /tmp/perf.daemon.base.XXX)
+ local config
+ config=$(mktemp /tmp/perf.daemon.config.XXX)
+ local base
+ base=$(mktemp -d /tmp/perf.daemon.base.XXX)
# prepare config
cat <<EOF > ${config}
@@ -332,8 +359,12 @@ EOF
# start daemon
daemon_start ${config} size
- local pid_size=`perf daemon --config ${config} -x: | head -2 | tail -1 | awk 'BEGIN { FS = ":" } ; { print $1 }'`
- local pid_time=`perf daemon --config ${config} -x: | head -3 | tail -1 | awk 'BEGIN { FS = ":" } ; { print $1 }'`
+ local pid_size
+ pid_size=`perf daemon --config ${config} -x: | head -2 | tail -1 |
+ awk 'BEGIN { FS = ":" } ; { print $1 }'`
+ local pid_time
+ pid_time=`perf daemon --config ${config} -x: | head -3 | tail -1 |
+ awk 'BEGIN { FS = ":" } ; { print $1 }'`
# check that sessions are running
if [ ! -d "/proc/${pid_size}" ]; then
@@ -364,8 +395,10 @@ test_signal()
{
echo "test daemon signal"
- local config=$(mktemp /tmp/perf.daemon.config.XXX)
- local base=$(mktemp -d /tmp/perf.daemon.base.XXX)
+ local config
+ config=$(mktemp /tmp/perf.daemon.config.XXX)
+ local base
+ base=$(mktemp -d /tmp/perf.daemon.base.XXX)
# prepare config
cat <<EOF > ${config}
@@ -389,7 +422,7 @@ EOF
daemon_exit ${config}
# count is 2 perf.data for signals and 1 for perf record finished
- count=`ls ${base}/session-test/ | grep perf.data | wc -l`
+ count=`ls ${base}/session-test/*perf.data* | wc -l`
if [ ${count} -ne 3 ]; then
error=1
echo "FAILED: perf data no generated"
@@ -403,8 +436,10 @@ test_ping()
{
echo "test daemon ping"
- local config=$(mktemp /tmp/perf.daemon.config.XXX)
- local base=$(mktemp -d /tmp/perf.daemon.base.XXX)
+ local config
+ config=$(mktemp /tmp/perf.daemon.config.XXX)
+ local base
+ base=$(mktemp -d /tmp/perf.daemon.base.XXX)
# prepare config
cat <<EOF > ${config}
@@ -426,7 +461,7 @@ EOF
size=`perf daemon ping --config ${config} --session size | awk '{ print $1 }'`
type=`perf daemon ping --config ${config} --session time | awk '{ print $1 }'`
- if [ ${size} != "OK" -o ${type} != "OK" ]; then
+ if [ ${size} != "OK" ] || [ ${type} != "OK" ]; then
error=1
echo "FAILED: daemon ping failed"
fi
@@ -442,8 +477,10 @@ test_lock()
{
echo "test daemon lock"
- local config=$(mktemp /tmp/perf.daemon.config.XXX)
- local base=$(mktemp -d /tmp/perf.daemon.base.XXX)
+ local config
+ config=$(mktemp /tmp/perf.daemon.config.XXX)
+ local base
+ base=$(mktemp -d /tmp/perf.daemon.base.XXX)
# prepare config
cat <<EOF > ${config}
diff --git a/tools/perf/tests/shell/lib/perf_json_output_lint.py b/tools/perf/tests/shell/lib/perf_json_output_lint.py
index 61f3059ca54b..ea55d5ea1ced 100644
--- a/tools/perf/tests/shell/lib/perf_json_output_lint.py
+++ b/tools/perf/tests/shell/lib/perf_json_output_lint.py
@@ -14,6 +14,7 @@ ap.add_argument('--system-wide', action='store_true')
ap.add_argument('--event', action='store_true')
ap.add_argument('--per-core', action='store_true')
ap.add_argument('--per-thread', action='store_true')
+ap.add_argument('--per-cache', action='store_true')
ap.add_argument('--per-die', action='store_true')
ap.add_argument('--per-node', action='store_true')
ap.add_argument('--per-socket', action='store_true')
@@ -47,12 +48,14 @@ def check_json_output(expected_items):
'counter-value': lambda x: is_counter_value(x),
'cgroup': lambda x: True,
'cpu': lambda x: isint(x),
+ 'cache': lambda x: True,
'die': lambda x: True,
'event': lambda x: True,
'event-runtime': lambda x: isfloat(x),
'interval': lambda x: isfloat(x),
'metric-unit': lambda x: True,
'metric-value': lambda x: isfloat(x),
+ 'metricgroup': lambda x: True,
'node': lambda x: True,
'pcnt-running': lambda x: isfloat(x),
'socket': lambda x: True,
@@ -63,10 +66,12 @@ def check_json_output(expected_items):
for item in json.loads(input):
if expected_items != -1:
count = len(item)
- if count != expected_items and count >= 1 and count <= 4 and 'metric-value' in item:
+ if count != expected_items and count >= 1 and count <= 6 and 'metric-value' in item:
# Events that generate >1 metric may have isolated metric
- # values and possibly other prefixes like interval, core and
- # aggregate-number.
+ # values and possibly other prefixes like interval, core,
+ # aggregate-number, or event-runtime/pcnt-running from multiplexing.
+ pass
+ elif count != expected_items and count >= 1 and count <= 5 and 'metricgroup' in item:
pass
elif count != expected_items:
raise RuntimeError(f'wrong number of fields. counted {count} expected {expected_items}'
@@ -83,7 +88,7 @@ try:
expected_items = 7
elif args.interval or args.per_thread or args.system_wide_no_aggr:
expected_items = 8
- elif args.per_core or args.per_socket or args.per_node or args.per_die:
+ elif args.per_core or args.per_socket or args.per_node or args.per_die or args.per_cache:
expected_items = 9
else:
# If no option is specified, don't check the number of items.
diff --git a/tools/perf/tests/shell/lib/perf_metric_validation.py b/tools/perf/tests/shell/lib/perf_metric_validation.py
new file mode 100644
index 000000000000..50a34a9cc040
--- /dev/null
+++ b/tools/perf/tests/shell/lib/perf_metric_validation.py
@@ -0,0 +1,574 @@
+#SPDX-License-Identifier: GPL-2.0
+import re
+import csv
+import json
+import argparse
+from pathlib import Path
+import subprocess
+
+class Validator:
+ def __init__(self, rulefname, reportfname='', t=5, debug=False, datafname='', fullrulefname='', workload='true', metrics=''):
+ self.rulefname = rulefname
+ self.reportfname = reportfname
+ self.rules = None
+ self.collectlist:str = metrics
+ self.metrics = self.__set_metrics(metrics)
+ self.skiplist = set()
+ self.tolerance = t
+
+ self.workloads = [x for x in workload.split(",") if x]
+ self.wlidx = 0 # idx of current workloads
+ self.allresults = dict() # metric results of all workload
+ self.allignoremetrics = dict() # metrics with no results or negative results
+ self.allfailtests = dict()
+ self.alltotalcnt = dict()
+ self.allpassedcnt = dict()
+ self.allerrlist = dict()
+
+ self.results = dict() # metric results of current workload
+ # vars for test pass/failure statistics
+ self.ignoremetrics= set() # metrics with no results or negative results, neg result counts as a failed test
+ self.failtests = dict()
+ self.totalcnt = 0
+ self.passedcnt = 0
+ # vars for errors
+ self.errlist = list()
+
+ # vars for Rule Generator
+ self.pctgmetrics = set() # Percentage rule
+
+ # vars for debug
+ self.datafname = datafname
+ self.debug = debug
+ self.fullrulefname = fullrulefname
+
+ def __set_metrics(self, metrics=''):
+ if metrics != '':
+ return set(metrics.split(","))
+ else:
+ return set()
+
+ def read_json(self, filename: str) -> dict:
+ try:
+ with open(Path(filename).resolve(), "r") as f:
+ data = json.loads(f.read())
+ except OSError as e:
+ print(f"Error when reading file {e}")
+ sys.exit()
+
+ return data
+
+ def json_dump(self, data, output_file):
+ parent = Path(output_file).parent
+ if not parent.exists():
+ parent.mkdir(parents=True)
+
+ with open(output_file, "w+") as output_file:
+ json.dump(data,
+ output_file,
+ ensure_ascii=True,
+ indent=4)
+
+ def get_results(self, idx:int = 0):
+ return self.results[idx]
+
+ def get_bounds(self, lb, ub, error, alias={}, ridx:int = 0) -> list:
+ """
+ Get bounds and tolerance from lb, ub, and error.
+ If missing lb, use 0.0; missing ub, use float('inf); missing error, use self.tolerance.
+
+ @param lb: str/float, lower bound
+ @param ub: str/float, upper bound
+ @param error: float/str, error tolerance
+ @returns: lower bound, return inf if the lower bound is a metric value and is not collected
+ upper bound, return -1 if the upper bound is a metric value and is not collected
+ tolerance, denormalized base on upper bound value
+ """
+ # init ubv and lbv to invalid values
+ def get_bound_value (bound, initval, ridx):
+ val = initval
+ if isinstance(bound, int) or isinstance(bound, float):
+ val = bound
+ elif isinstance(bound, str):
+ if bound == '':
+ val = float("inf")
+ elif bound in alias:
+ vall = self.get_value(alias[ub], ridx)
+ if vall:
+ val = vall[0]
+ elif bound.replace('.', '1').isdigit():
+ val = float(bound)
+ else:
+ print("Wrong bound: {0}".format(bound))
+ else:
+ print("Wrong bound: {0}".format(bound))
+ return val
+
+ ubv = get_bound_value(ub, -1, ridx)
+ lbv = get_bound_value(lb, float('inf'), ridx)
+ t = get_bound_value(error, self.tolerance, ridx)
+
+ # denormalize error threshold
+ denormerr = t * ubv / 100 if ubv != 100 and ubv > 0 else t
+
+ return lbv, ubv, denormerr
+
+ def get_value(self, name:str, ridx:int = 0) -> list:
+ """
+ Get value of the metric from self.results.
+ If result of this metric is not provided, the metric name will be added into self.ignoremetics and self.errlist.
+ All future test(s) on this metric will fail.
+
+ @param name: name of the metric
+ @returns: list with value found in self.results; list is empty when value is not found.
+ """
+ results = []
+ data = self.results[ridx] if ridx in self.results else self.results[0]
+ if name not in self.ignoremetrics:
+ if name in data:
+ results.append(data[name])
+ elif name.replace('.', '1').isdigit():
+ results.append(float(name))
+ else:
+ self.ignoremetrics.add(name)
+ return results
+
+ def check_bound(self, val, lb, ub, err):
+ return True if val <= ub + err and val >= lb - err else False
+
+ # Positive Value Sanity check
+ def pos_val_test(self):
+ """
+ Check if metrics value are non-negative.
+ One metric is counted as one test.
+ Failure: when metric value is negative or not provided.
+ Metrics with negative value will be added into the self.failtests['PositiveValueTest'] and self.ignoremetrics.
+ """
+ negmetric = dict()
+ pcnt = 0
+ tcnt = 0
+ rerun = list()
+ for name, val in self.get_results().items():
+ if val < 0:
+ negmetric[name] = val
+ rerun.append(name)
+ else:
+ pcnt += 1
+ tcnt += 1
+ if len(rerun) > 0 and len(rerun) < 20:
+ second_results = dict()
+ self.second_test(rerun, second_results)
+ for name, val in second_results.items():
+ if name not in negmetric: continue
+ if val >= 0:
+ del negmetric[name]
+ pcnt += 1
+
+ self.failtests['PositiveValueTest']['Total Tests'] = tcnt
+ self.failtests['PositiveValueTest']['Passed Tests'] = pcnt
+ if len(negmetric.keys()):
+ self.ignoremetrics.update(negmetric.keys())
+ negmessage = ["{0}(={1:.4f})".format(name, val) for name, val in negmetric.items()]
+ self.failtests['PositiveValueTest']['Failed Tests'].append({'NegativeValue': negmessage})
+
+ return
+
+ def evaluate_formula(self, formula:str, alias:dict, ridx:int = 0):
+ """
+ Evaluate the value of formula.
+
+ @param formula: the formula to be evaluated
+ @param alias: the dict has alias to metric name mapping
+ @returns: value of the formula is success; -1 if the one or more metric value not provided
+ """
+ stack = []
+ b = 0
+ errs = []
+ sign = "+"
+ f = str()
+
+ #TODO: support parenthesis?
+ for i in range(len(formula)):
+ if i+1 == len(formula) or formula[i] in ('+', '-', '*', '/'):
+ s = alias[formula[b:i]] if i+1 < len(formula) else alias[formula[b:]]
+ v = self.get_value(s, ridx)
+ if not v:
+ errs.append(s)
+ else:
+ f = f + "{0}(={1:.4f})".format(s, v[0])
+ if sign == "*":
+ stack[-1] = stack[-1] * v
+ elif sign == "/":
+ stack[-1] = stack[-1] / v
+ elif sign == '-':
+ stack.append(-v[0])
+ else:
+ stack.append(v[0])
+ if i + 1 < len(formula):
+ sign = formula[i]
+ f += sign
+ b = i + 1
+
+ if len(errs) > 0:
+ return -1, "Metric value missing: "+','.join(errs)
+
+ val = sum(stack)
+ return val, f
+
+ # Relationships Tests
+ def relationship_test(self, rule: dict):
+ """
+ Validate if the metrics follow the required relationship in the rule.
+ eg. lower_bound <= eval(formula)<= upper_bound
+ One rule is counted as ont test.
+ Failure: when one or more metric result(s) not provided, or when formula evaluated outside of upper/lower bounds.
+
+ @param rule: dict with metric name(+alias), formula, and required upper and lower bounds.
+ """
+ alias = dict()
+ for m in rule['Metrics']:
+ alias[m['Alias']] = m['Name']
+ lbv, ubv, t = self.get_bounds(rule['RangeLower'], rule['RangeUpper'], rule['ErrorThreshold'], alias, ridx=rule['RuleIndex'])
+ val, f = self.evaluate_formula(rule['Formula'], alias, ridx=rule['RuleIndex'])
+ if val == -1:
+ self.failtests['RelationshipTest']['Failed Tests'].append({'RuleIndex': rule['RuleIndex'], 'Description':f})
+ elif not self.check_bound(val, lbv, ubv, t):
+ lb = rule['RangeLower']
+ ub = rule['RangeUpper']
+ if isinstance(lb, str):
+ if lb in alias:
+ lb = alias[lb]
+ if isinstance(ub, str):
+ if ub in alias:
+ ub = alias[ub]
+ self.failtests['RelationshipTest']['Failed Tests'].append({'RuleIndex': rule['RuleIndex'], 'Formula':f,
+ 'RangeLower': lb, 'LowerBoundValue': self.get_value(lb),
+ 'RangeUpper': ub, 'UpperBoundValue':self.get_value(ub),
+ 'ErrorThreshold': t, 'CollectedValue': val})
+ else:
+ self.passedcnt += 1
+ self.failtests['RelationshipTest']['Passed Tests'] += 1
+ self.totalcnt += 1
+ self.failtests['RelationshipTest']['Total Tests'] += 1
+
+ return
+
+
+ # Single Metric Test
+ def single_test(self, rule:dict):
+ """
+ Validate if the metrics are in the required value range.
+ eg. lower_bound <= metrics_value <= upper_bound
+ One metric is counted as one test in this type of test.
+ One rule may include one or more metrics.
+ Failure: when the metric value not provided or the value is outside the bounds.
+ This test updates self.total_cnt and records failed tests in self.failtest['SingleMetricTest'].
+
+ @param rule: dict with metrics to validate and the value range requirement
+ """
+ lbv, ubv, t = self.get_bounds(rule['RangeLower'], rule['RangeUpper'], rule['ErrorThreshold'])
+ metrics = rule['Metrics']
+ passcnt = 0
+ totalcnt = 0
+ faillist = list()
+ failures = dict()
+ rerun = list()
+ for m in metrics:
+ totalcnt += 1
+ result = self.get_value(m['Name'])
+ if len(result) > 0 and self.check_bound(result[0], lbv, ubv, t) or m['Name'] in self.skiplist:
+ passcnt += 1
+ else:
+ failures[m['Name']] = result
+ rerun.append(m['Name'])
+
+ if len(rerun) > 0 and len(rerun) < 20:
+ second_results = dict()
+ self.second_test(rerun, second_results)
+ for name, val in second_results.items():
+ if name not in failures: continue
+ if self.check_bound(val, lbv, ubv, t):
+ passcnt += 1
+ del failures[name]
+ else:
+ failures[name] = val
+ self.results[0][name] = val
+
+ self.totalcnt += totalcnt
+ self.passedcnt += passcnt
+ self.failtests['SingleMetricTest']['Total Tests'] += totalcnt
+ self.failtests['SingleMetricTest']['Passed Tests'] += passcnt
+ if len(failures.keys()) != 0:
+ faillist = [{'MetricName':name, 'CollectedValue':val} for name, val in failures.items()]
+ self.failtests['SingleMetricTest']['Failed Tests'].append({'RuleIndex':rule['RuleIndex'],
+ 'RangeLower': rule['RangeLower'],
+ 'RangeUpper': rule['RangeUpper'],
+ 'ErrorThreshold':rule['ErrorThreshold'],
+ 'Failure':faillist})
+
+ return
+
+ def create_report(self):
+ """
+ Create final report and write into a JSON file.
+ """
+ alldata = list()
+ for i in range(0, len(self.workloads)):
+ reportstas = {"Total Rule Count": self.alltotalcnt[i], "Passed Rule Count": self.allpassedcnt[i]}
+ data = {"Metric Validation Statistics": reportstas, "Tests in Category": self.allfailtests[i],
+ "Errors":self.allerrlist[i]}
+ alldata.append({"Workload": self.workloads[i], "Report": data})
+
+ json_str = json.dumps(alldata, indent=4)
+ print("Test validation finished. Final report: ")
+ print(json_str)
+
+ if self.debug:
+ allres = [{"Workload": self.workloads[i], "Results": self.allresults[i]} for i in range(0, len(self.workloads))]
+ self.json_dump(allres, self.datafname)
+
+ def check_rule(self, testtype, metric_list):
+ """
+ Check if the rule uses metric(s) that not exist in current platform.
+
+ @param metric_list: list of metrics from the rule.
+ @return: False when find one metric out in Metric file. (This rule should not skipped.)
+ True when all metrics used in the rule are found in Metric file.
+ """
+ if testtype == "RelationshipTest":
+ for m in metric_list:
+ if m['Name'] not in self.metrics:
+ return False
+ return True
+
+ # Start of Collector and Converter
+ def convert(self, data: list, metricvalues:dict):
+ """
+ Convert collected metric data from the -j output to dict of {metric_name:value}.
+ """
+ for json_string in data:
+ try:
+ result =json.loads(json_string)
+ if "metric-unit" in result and result["metric-unit"] != "(null)" and result["metric-unit"] != "":
+ name = result["metric-unit"].split(" ")[1] if len(result["metric-unit"].split(" ")) > 1 \
+ else result["metric-unit"]
+ metricvalues[name.lower()] = float(result["metric-value"])
+ except ValueError as error:
+ continue
+ return
+
+ def _run_perf(self, metric, workload: str):
+ tool = 'perf'
+ command = [tool, 'stat', '-j', '-M', f"{metric}", "-a"]
+ wl = workload.split()
+ command.extend(wl)
+ print(" ".join(command))
+ cmd = subprocess.run(command, stderr=subprocess.PIPE, encoding='utf-8')
+ data = [x+'}' for x in cmd.stderr.split('}\n') if x]
+ return data
+
+
+ def collect_perf(self, workload: str):
+ """
+ Collect metric data with "perf stat -M" on given workload with -a and -j.
+ """
+ self.results = dict()
+ print(f"Starting perf collection")
+ print(f"Long workload: {workload}")
+ collectlist = dict()
+ if self.collectlist != "":
+ collectlist[0] = {x for x in self.collectlist.split(",")}
+ else:
+ collectlist[0] = set(list(self.metrics))
+ # Create metric set for relationship rules
+ for rule in self.rules:
+ if rule["TestType"] == "RelationshipTest":
+ metrics = [m["Name"] for m in rule["Metrics"]]
+ if not any(m not in collectlist[0] for m in metrics):
+ collectlist[rule["RuleIndex"]] = [",".join(list(set(metrics)))]
+
+ for idx, metrics in collectlist.items():
+ if idx == 0: wl = "true"
+ else: wl = workload
+ for metric in metrics:
+ data = self._run_perf(metric, wl)
+ if idx not in self.results: self.results[idx] = dict()
+ self.convert(data, self.results[idx])
+ return
+
+ def second_test(self, collectlist, second_results):
+ workload = self.workloads[self.wlidx]
+ for metric in collectlist:
+ data = self._run_perf(metric, workload)
+ self.convert(data, second_results)
+
+ # End of Collector and Converter
+
+ # Start of Rule Generator
+ def parse_perf_metrics(self):
+ """
+ Read and parse perf metric file:
+ 1) find metrics with '1%' or '100%' as ScaleUnit for Percent check
+ 2) create metric name list
+ """
+ command = ['perf', 'list', '-j', '--details', 'metrics']
+ cmd = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding='utf-8')
+ try:
+ data = json.loads(cmd.stdout)
+ for m in data:
+ if 'MetricName' not in m:
+ print("Warning: no metric name")
+ continue
+ name = m['MetricName'].lower()
+ self.metrics.add(name)
+ if 'ScaleUnit' in m and (m['ScaleUnit'] == '1%' or m['ScaleUnit'] == '100%'):
+ self.pctgmetrics.add(name.lower())
+ except ValueError as error:
+ print(f"Error when parsing metric data")
+ sys.exit()
+
+ return
+
+ def remove_unsupported_rules(self, rules):
+ new_rules = []
+ for rule in rules:
+ add_rule = True
+ for m in rule["Metrics"]:
+ if m["Name"] in self.skiplist or m["Name"] not in self.metrics:
+ add_rule = False
+ break
+ if add_rule:
+ new_rules.append(rule)
+ return new_rules
+
+ def create_rules(self):
+ """
+ Create full rules which includes:
+ 1) All the rules from the "relationshi_rules" file
+ 2) SingleMetric rule for all the 'percent' metrics
+
+ Reindex all the rules to avoid repeated RuleIndex
+ """
+ data = self.read_json(self.rulefname)
+ rules = data['RelationshipRules']
+ self.skiplist = set([name.lower() for name in data['SkipList']])
+ self.rules = self.remove_unsupported_rules(rules)
+ pctgrule = {'RuleIndex':0,
+ 'TestType':'SingleMetricTest',
+ 'RangeLower':'0',
+ 'RangeUpper': '100',
+ 'ErrorThreshold': self.tolerance,
+ 'Description':'Metrics in percent unit have value with in [0, 100]',
+ 'Metrics': [{'Name': m.lower()} for m in self.pctgmetrics]}
+ self.rules.append(pctgrule)
+
+ # Re-index all rules to avoid repeated RuleIndex
+ idx = 1
+ for r in self.rules:
+ r['RuleIndex'] = idx
+ idx += 1
+
+ if self.debug:
+ #TODO: need to test and generate file name correctly
+ data = {'RelationshipRules':self.rules, 'SupportedMetrics': [{"MetricName": name} for name in self.metrics]}
+ self.json_dump(data, self.fullrulefname)
+
+ return
+ # End of Rule Generator
+
+ def _storewldata(self, key):
+ '''
+ Store all the data of one workload into the corresponding data structure for all workloads.
+ @param key: key to the dictionaries (index of self.workloads).
+ '''
+ self.allresults[key] = self.results
+ self.allignoremetrics[key] = self.ignoremetrics
+ self.allfailtests[key] = self.failtests
+ self.alltotalcnt[key] = self.totalcnt
+ self.allpassedcnt[key] = self.passedcnt
+ self.allerrlist[key] = self.errlist
+
+ #Initialize data structures before data validation of each workload
+ def _init_data(self):
+
+ testtypes = ['PositiveValueTest', 'RelationshipTest', 'SingleMetricTest']
+ self.results = dict()
+ self.ignoremetrics= set()
+ self.errlist = list()
+ self.failtests = {k:{'Total Tests':0, 'Passed Tests':0, 'Failed Tests':[]} for k in testtypes}
+ self.totalcnt = 0
+ self.passedcnt = 0
+
+ def test(self):
+ '''
+ The real entry point of the test framework.
+ This function loads the validation rule JSON file and Standard Metric file to create rules for
+ testing and namemap dictionaries.
+ It also reads in result JSON file for testing.
+
+ In the test process, it passes through each rule and launch correct test function bases on the
+ 'TestType' field of the rule.
+
+ The final report is written into a JSON file.
+ '''
+ if not self.collectlist:
+ self.parse_perf_metrics()
+ self.create_rules()
+ for i in range(0, len(self.workloads)):
+ self.wlidx = i
+ self._init_data()
+ self.collect_perf(self.workloads[i])
+ # Run positive value test
+ self.pos_val_test()
+ for r in self.rules:
+ # skip rules that uses metrics not exist in this platform
+ testtype = r['TestType']
+ if not self.check_rule(testtype, r['Metrics']):
+ continue
+ if testtype == 'RelationshipTest':
+ self.relationship_test(r)
+ elif testtype == 'SingleMetricTest':
+ self.single_test(r)
+ else:
+ print("Unsupported Test Type: ", testtype)
+ self.errlist.append("Unsupported Test Type from rule: " + r['RuleIndex'])
+ self._storewldata(i)
+ print("Workload: ", self.workloads[i])
+ print("Total metrics collected: ", self.failtests['PositiveValueTest']['Total Tests'])
+ print("Non-negative metric count: ", self.failtests['PositiveValueTest']['Passed Tests'])
+ print("Total Test Count: ", self.totalcnt)
+ print("Passed Test Count: ", self.passedcnt)
+
+ self.create_report()
+ return sum(self.alltotalcnt.values()) != sum(self.allpassedcnt.values())
+# End of Class Validator
+
+
+def main() -> None:
+ parser = argparse.ArgumentParser(description="Launch metric value validation")
+
+ parser.add_argument("-rule", help="Base validation rule file", required=True)
+ parser.add_argument("-output_dir", help="Path for validator output file, report file", required=True)
+ parser.add_argument("-debug", help="Debug run, save intermediate data to files", action="store_true", default=False)
+ parser.add_argument("-wl", help="Workload to run while data collection", default="true")
+ parser.add_argument("-m", help="Metric list to validate", default="")
+ args = parser.parse_args()
+ outpath = Path(args.output_dir)
+ reportf = Path.joinpath(outpath, 'perf_report.json')
+ fullrule = Path.joinpath(outpath, 'full_rule.json')
+ datafile = Path.joinpath(outpath, 'perf_data.json')
+
+ validator = Validator(args.rule, reportf, debug=args.debug,
+ datafname=datafile, fullrulefname=fullrule, workload=args.wl,
+ metrics=args.m)
+ ret = validator.test()
+
+ return ret
+
+
+if __name__ == "__main__":
+ import sys
+ sys.exit(main())
+
+
+
diff --git a/tools/perf/tests/shell/lib/perf_metric_validation_rules.json b/tools/perf/tests/shell/lib/perf_metric_validation_rules.json
new file mode 100644
index 000000000000..eb6f59e018b7
--- /dev/null
+++ b/tools/perf/tests/shell/lib/perf_metric_validation_rules.json
@@ -0,0 +1,398 @@
+{
+ "SkipList": [
+ "tsx_aborted_cycles",
+ "tsx_transactional_cycles",
+ "C2_Pkg_Residency",
+ "C6_Pkg_Residency",
+ "C1_Core_Residency",
+ "C6_Core_Residency",
+ "tma_false_sharing",
+ "tma_remote_cache",
+ "tma_contested_accesses"
+ ],
+ "RelationshipRules": [
+ {
+ "RuleIndex": 1,
+ "Formula": "a+b",
+ "TestType": "RelationshipTest",
+ "RangeLower": "c",
+ "RangeUpper": "c",
+ "ErrorThreshold": 5.0,
+ "Description": "Intel(R) Optane(TM) Persistent Memory(PMEM) bandwidth total includes Intel(R) Optane(TM) Persistent Memory(PMEM) read bandwidth and Intel(R) Optane(TM) Persistent Memory(PMEM) write bandwidth",
+ "Metrics": [
+ {
+ "Name": "pmem_memory_bandwidth_read",
+ "Alias": "a"
+ },
+ {
+ "Name": "pmem_memory_bandwidth_write",
+ "Alias": "b"
+ },
+ {
+ "Name": "pmem_memory_bandwidth_total",
+ "Alias": "c"
+ }
+ ]
+ },
+ {
+ "RuleIndex": 2,
+ "Formula": "a+b",
+ "TestType": "RelationshipTest",
+ "RangeLower": "c",
+ "RangeUpper": "c",
+ "ErrorThreshold": 5.0,
+ "Description": "DDR memory bandwidth total includes DDR memory read bandwidth and DDR memory write bandwidth",
+ "Metrics": [
+ {
+ "Name": "memory_bandwidth_read",
+ "Alias": "a"
+ },
+ {
+ "Name": "memory_bandwidth_write",
+ "Alias": "b"
+ },
+ {
+ "Name": "memory_bandwidth_total",
+ "Alias": "c"
+ }
+ ]
+ },
+ {
+ "RuleIndex": 3,
+ "Formula": "a+b",
+ "TestType": "RelationshipTest",
+ "RangeLower": "100",
+ "RangeUpper": "100",
+ "ErrorThreshold": 5.0,
+ "Description": "Total memory read accesses includes memory reads from last level cache (LLC) addressed to local DRAM and memory reads from the last level cache (LLC) addressed to remote DRAM.",
+ "Metrics": [
+ {
+ "Name": "numa_reads_addressed_to_local_dram",
+ "Alias": "a"
+ },
+ {
+ "Name": "numa_reads_addressed_to_remote_dram",
+ "Alias": "b"
+ }
+ ]
+ },
+ {
+ "RuleIndex": 4,
+ "Formula": "a",
+ "TestType": "SingleMetricTest",
+ "RangeLower": "0.125",
+ "RangeUpper": "",
+ "ErrorThreshold": "",
+ "Description": "",
+ "Metrics": [
+ {
+ "Name": "cpi",
+ "Alias": "a"
+ }
+ ]
+ },
+ {
+ "RuleIndex": 5,
+ "Formula": "",
+ "TestType": "SingleMetricTest",
+ "RangeLower": "0",
+ "RangeUpper": "1",
+ "ErrorThreshold": 5.0,
+ "Description": "Ratio values should be within value range [0,1)",
+ "Metrics": [
+ {
+ "Name": "loads_per_instr",
+ "Alias": ""
+ },
+ {
+ "Name": "stores_per_instr",
+ "Alias": ""
+ },
+ {
+ "Name": "l1d_mpi",
+ "Alias": ""
+ },
+ {
+ "Name": "l1d_demand_data_read_hits_per_instr",
+ "Alias": ""
+ },
+ {
+ "Name": "l1_i_code_read_misses_with_prefetches_per_instr",
+ "Alias": ""
+ },
+ {
+ "Name": "l2_demand_data_read_hits_per_instr",
+ "Alias": ""
+ },
+ {
+ "Name": "l2_mpi",
+ "Alias": ""
+ },
+ {
+ "Name": "l2_demand_data_read_mpi",
+ "Alias": ""
+ },
+ {
+ "Name": "l2_demand_code_mpi",
+ "Alias": ""
+ }
+ ]
+ },
+ {
+ "RuleIndex": 6,
+ "Formula": "a+b+c+d",
+ "TestType": "RelationshipTest",
+ "RangeLower": "100",
+ "RangeUpper": "100",
+ "ErrorThreshold": 5.0,
+ "Description": "Sum of TMA level 1 metrics should be 100%",
+ "Metrics": [
+ {
+ "Name": "tma_frontend_bound",
+ "Alias": "a"
+ },
+ {
+ "Name": "tma_bad_speculation",
+ "Alias": "b"
+ },
+ {
+ "Name": "tma_backend_bound",
+ "Alias": "c"
+ },
+ {
+ "Name": "tma_retiring",
+ "Alias": "d"
+ }
+ ]
+ },
+ {
+ "RuleIndex": 7,
+ "Formula": "a+b",
+ "TestType": "RelationshipTest",
+ "RangeLower": "c",
+ "RangeUpper": "c",
+ "ErrorThreshold": 5.0,
+ "Description": "Sum of the level 2 children should equal level 1 parent",
+ "Metrics": [
+ {
+ "Name": "tma_fetch_latency",
+ "Alias": "a"
+ },
+ {
+ "Name": "tma_fetch_bandwidth",
+ "Alias": "b"
+ },
+ {
+ "Name": "tma_frontend_bound",
+ "Alias": "c"
+ }
+ ]
+ },
+ {
+ "RuleIndex": 8,
+ "Formula": "a+b",
+ "TestType": "RelationshipTest",
+ "RangeLower": "c",
+ "RangeUpper": "c",
+ "ErrorThreshold": 5.0,
+ "Description": "Sum of the level 2 children should equal level 1 parent",
+ "Metrics": [
+ {
+ "Name": "tma_branch_mispredicts",
+ "Alias": "a"
+ },
+ {
+ "Name": "tma_machine_clears",
+ "Alias": "b"
+ },
+ {
+ "Name": "tma_bad_speculation",
+ "Alias": "c"
+ }
+ ]
+ },
+ {
+ "RuleIndex": 9,
+ "Formula": "a+b",
+ "TestType": "RelationshipTest",
+ "RangeLower": "c",
+ "RangeUpper": "c",
+ "ErrorThreshold": 5.0,
+ "Description": "Sum of the level 2 children should equal level 1 parent",
+ "Metrics": [
+ {
+ "Name": "tma_memory_bound",
+ "Alias": "a"
+ },
+ {
+ "Name": "tma_core_bound",
+ "Alias": "b"
+ },
+ {
+ "Name": "tma_backend_bound",
+ "Alias": "c"
+ }
+ ]
+ },
+ {
+ "RuleIndex": 10,
+ "Formula": "a+b",
+ "TestType": "RelationshipTest",
+ "RangeLower": "c",
+ "RangeUpper": "c",
+ "ErrorThreshold": 5.0,
+ "Description": "Sum of the level 2 children should equal level 1 parent",
+ "Metrics": [
+ {
+ "Name": "tma_light_operations",
+ "Alias": "a"
+ },
+ {
+ "Name": "tma_heavy_operations",
+ "Alias": "b"
+ },
+ {
+ "Name": "tma_retiring",
+ "Alias": "c"
+ }
+ ]
+ },
+ {
+ "RuleIndex": 11,
+ "Formula": "a+b+c",
+ "TestType": "RelationshipTest",
+ "RangeLower": "100",
+ "RangeUpper": "100",
+ "ErrorThreshold": 5.0,
+ "Description": "The all_requests includes the memory_page_empty, memory_page_misses, and memory_page_hits equals.",
+ "Metrics": [
+ {
+ "Name": "memory_page_empty_vs_all_requests",
+ "Alias": "a"
+ },
+ {
+ "Name": "memory_page_misses_vs_all_requests",
+ "Alias": "b"
+ },
+ {
+ "Name": "memory_page_hits_vs_all_requests",
+ "Alias": "c"
+ }
+ ]
+ },
+ {
+ "RuleIndex": 12,
+ "Formula": "a-b",
+ "TestType": "RelationshipTest",
+ "RangeLower": "0",
+ "RangeUpper": "",
+ "ErrorThreshold": 5.0,
+ "Description": "CPU utilization in kernel mode should always be <= cpu utilization",
+ "Metrics": [
+ {
+ "Name": "cpu_utilization",
+ "Alias": "a"
+ },
+ {
+ "Name": "cpu_utilization_in_kernel_mode",
+ "Alias": "b"
+ }
+ ]
+ },
+ {
+ "RuleIndex": 13,
+ "Formula": "a-b",
+ "TestType": "RelationshipTest",
+ "RangeLower": "0",
+ "RangeUpper": "",
+ "ErrorThreshold": 5.0,
+ "Description": "Total L2 misses per instruction should be >= L2 demand data read misses per instruction",
+ "Metrics": [
+ {
+ "Name": "l2_mpi",
+ "Alias": "a"
+ },
+ {
+ "Name": "l2_demand_data_read_mpi",
+ "Alias": "b"
+ }
+ ]
+ },
+ {
+ "RuleIndex": 14,
+ "Formula": "a-b",
+ "TestType": "RelationshipTest",
+ "RangeLower": "0",
+ "RangeUpper": "",
+ "ErrorThreshold": 5.0,
+ "Description": "Total L2 misses per instruction should be >= L2 demand code misses per instruction",
+ "Metrics": [
+ {
+ "Name": "l2_mpi",
+ "Alias": "a"
+ },
+ {
+ "Name": "l2_demand_code_mpi",
+ "Alias": "b"
+ }
+ ]
+ },
+ {
+ "RuleIndex": 15,
+ "Formula": "b+c+d",
+ "TestType": "RelationshipTest",
+ "RangeLower": "a",
+ "RangeUpper": "a",
+ "ErrorThreshold": 5.0,
+ "Description": "L3 data read, rfo, code misses per instruction equals total L3 misses per instruction.",
+ "Metrics": [
+ {
+ "Name": "llc_mpi",
+ "Alias": "a"
+ },
+ {
+ "Name": "llc_data_read_mpi_demand_plus_prefetch",
+ "Alias": "b"
+ },
+ {
+ "Name": "llc_rfo_read_mpi_demand_plus_prefetch",
+ "Alias": "c"
+ },
+ {
+ "Name": "llc_code_read_mpi_demand_plus_prefetch",
+ "Alias": "d"
+ }
+ ]
+ },
+ {
+ "RuleIndex": 16,
+ "Formula": "a",
+ "TestType": "SingleMetricTest",
+ "RangeLower": "0",
+ "RangeUpper": "8",
+ "ErrorThreshold": 0.0,
+ "Description": "Setting generous range for allowable frequencies",
+ "Metrics": [
+ {
+ "Name": "uncore_freq",
+ "Alias": "a"
+ }
+ ]
+ },
+ {
+ "RuleIndex": 17,
+ "Formula": "a",
+ "TestType": "SingleMetricTest",
+ "RangeLower": "0",
+ "RangeUpper": "8",
+ "ErrorThreshold": 0.0,
+ "Description": "Setting generous range for allowable frequencies",
+ "Metrics": [
+ {
+ "Name": "cpu_operating_frequency",
+ "Alias": "a"
+ }
+ ]
+ }
+ ]
+} \ No newline at end of file
diff --git a/tools/perf/tests/shell/lib/stat_output.sh b/tools/perf/tests/shell/lib/stat_output.sh
new file mode 100644
index 000000000000..698343f0ecf9
--- /dev/null
+++ b/tools/perf/tests/shell/lib/stat_output.sh
@@ -0,0 +1,169 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# Return true if perf_event_paranoid is > $1 and not running as root.
+function ParanoidAndNotRoot()
+{
+ [ "$(id -u)" != 0 ] && [ "$(cat /proc/sys/kernel/perf_event_paranoid)" -gt $1 ]
+}
+
+# $1 name $2 extra_opt
+check_no_args()
+{
+ echo -n "Checking $1 output: no args "
+ perf stat $2 true
+ commachecker --no-args
+ echo "[Success]"
+}
+
+check_system_wide()
+{
+ echo -n "Checking $1 output: system wide "
+ if ParanoidAndNotRoot 0
+ then
+ echo "[Skip] paranoid and not root"
+ return
+ fi
+ perf stat -a $2 true
+ commachecker --system-wide
+ echo "[Success]"
+}
+
+check_system_wide_no_aggr()
+{
+ echo -n "Checking $1 output: system wide no aggregation "
+ if ParanoidAndNotRoot 0
+ then
+ echo "[Skip] paranoid and not root"
+ return
+ fi
+ perf stat -A -a --no-merge $2 true
+ commachecker --system-wide-no-aggr
+ echo "[Success]"
+}
+
+check_interval()
+{
+ echo -n "Checking $1 output: interval "
+ perf stat -I 1000 $2 true
+ commachecker --interval
+ echo "[Success]"
+}
+
+check_event()
+{
+ echo -n "Checking $1 output: event "
+ perf stat -e cpu-clock $2 true
+ commachecker --event
+ echo "[Success]"
+}
+
+check_per_core()
+{
+ echo -n "Checking $1 output: per core "
+ if ParanoidAndNotRoot 0
+ then
+ echo "[Skip] paranoid and not root"
+ return
+ fi
+ perf stat --per-core -a $2 true
+ commachecker --per-core
+ echo "[Success]"
+}
+
+check_per_thread()
+{
+ echo -n "Checking $1 output: per thread "
+ if ParanoidAndNotRoot 0
+ then
+ echo "[Skip] paranoid and not root"
+ return
+ fi
+ perf stat --per-thread -a $2 true
+ commachecker --per-thread
+ echo "[Success]"
+}
+
+check_per_cache_instance()
+{
+ echo -n "Checking $1 output: per cache instance "
+ if ParanoidAndNotRoot 0
+ then
+ echo "[Skip] paranoid and not root"
+ return
+ fi
+ perf stat --per-cache -a $2 true
+ commachecker --per-cache
+ echo "[Success]"
+}
+
+check_per_die()
+{
+ echo -n "Checking $1 output: per die "
+ if ParanoidAndNotRoot 0
+ then
+ echo "[Skip] paranoid and not root"
+ return
+ fi
+ perf stat --per-die -a $2 true
+ commachecker --per-die
+ echo "[Success]"
+}
+
+check_per_node()
+{
+ echo -n "Checking $1 output: per node "
+ if ParanoidAndNotRoot 0
+ then
+ echo "[Skip] paranoid and not root"
+ return
+ fi
+ perf stat --per-node -a $2 true
+ commachecker --per-node
+ echo "[Success]"
+}
+
+check_per_socket()
+{
+ echo -n "Checking $1 output: per socket "
+ if ParanoidAndNotRoot 0
+ then
+ echo "[Skip] paranoid and not root"
+ return
+ fi
+ perf stat --per-socket -a $2 true
+ commachecker --per-socket
+ echo "[Success]"
+}
+
+# The perf stat options for per-socket, per-core, per-die
+# and -A ( no_aggr mode ) uses the info fetched from this
+# directory: "/sys/devices/system/cpu/cpu*/topology". For
+# example, socket value is fetched from "physical_package_id"
+# file in topology directory.
+# Reference: cpu__get_topology_int in util/cpumap.c
+# If the platform doesn't expose topology information, values
+# will be set to -1. For example, incase of pSeries platform
+# of powerpc, value for "physical_package_id" is restricted
+# and set to -1. Check here validates the socket-id read from
+# topology file before proceeding further
+
+FILE_LOC="/sys/devices/system/cpu/cpu*/topology/"
+FILE_NAME="physical_package_id"
+
+function check_for_topology()
+{
+ if ! ParanoidAndNotRoot 0
+ then
+ socket_file=`ls $FILE_LOC/$FILE_NAME | head -n 1`
+ [ -z $socket_file ] && {
+ echo 0
+ return
+ }
+ socket_id=`cat $socket_file`
+ [ $socket_id == -1 ] && {
+ echo 1
+ return
+ }
+ fi
+ echo 0
+}
diff --git a/tools/perf/tests/shell/lock_contention.sh b/tools/perf/tests/shell/lock_contention.sh
index be5fcafb26aa..4a194420416e 100755
--- a/tools/perf/tests/shell/lock_contention.sh
+++ b/tools/perf/tests/shell/lock_contention.sh
@@ -11,14 +11,14 @@ result=$(mktemp /tmp/__perf_test.result.XXXXX)
cleanup() {
rm -f ${perfdata}
rm -f ${result}
- trap - exit term int
+ trap - EXIT TERM INT
}
trap_cleanup() {
cleanup
exit ${err}
}
-trap trap_cleanup exit term int
+trap trap_cleanup EXIT TERM INT
check() {
if [ `id -u` != 0 ]; then
@@ -40,8 +40,8 @@ test_record()
perf lock record -o ${perfdata} -- perf bench sched messaging > /dev/null 2>&1
# the output goes to the stderr and we expect only 1 output (-E 1)
perf lock contention -i ${perfdata} -E 1 -q 2> ${result}
- if [ $(cat "${result}" | wc -l) != "1" ]; then
- echo "[Fail] Recorded result count is not 1:" $(cat "${result}" | wc -l)
+ if [ "$(cat "${result}" | wc -l)" != "1" ]; then
+ echo "[Fail] Recorded result count is not 1:" "$(cat "${result}" | wc -l)"
err=1
exit
fi
@@ -58,8 +58,8 @@ test_bpf()
# the perf lock contention output goes to the stderr
perf lock con -a -b -E 1 -q -- perf bench sched messaging > /dev/null 2> ${result}
- if [ $(cat "${result}" | wc -l) != "1" ]; then
- echo "[Fail] BPF result count is not 1:" $(cat "${result}" | wc -l)
+ if [ "$(cat "${result}" | wc -l)" != "1" ]; then
+ echo "[Fail] BPF result count is not 1:" "$(cat "${result}" | wc -l)"
err=1
exit
fi
@@ -70,8 +70,8 @@ test_record_concurrent()
echo "Testing perf lock record and perf lock contention at the same time"
perf lock record -o- -- perf bench sched messaging 2> /dev/null | \
perf lock contention -i- -E 1 -q 2> ${result}
- if [ $(cat "${result}" | wc -l) != "1" ]; then
- echo "[Fail] Recorded result count is not 1:" $(cat "${result}" | wc -l)
+ if [ "$(cat "${result}" | wc -l)" != "1" ]; then
+ echo "[Fail] Recorded result count is not 1:" "$(cat "${result}" | wc -l)"
err=1
exit
fi
@@ -81,8 +81,8 @@ test_aggr_task()
{
echo "Testing perf lock contention --threads"
perf lock contention -i ${perfdata} -t -E 1 -q 2> ${result}
- if [ $(cat "${result}" | wc -l) != "1" ]; then
- echo "[Fail] Recorded result count is not 1:" $(cat "${result}" | wc -l)
+ if [ "$(cat "${result}" | wc -l)" != "1" ]; then
+ echo "[Fail] Recorded result count is not 1:" "$(cat "${result}" | wc -l)"
err=1
exit
fi
@@ -93,8 +93,8 @@ test_aggr_task()
# the perf lock contention output goes to the stderr
perf lock con -a -b -t -E 1 -q -- perf bench sched messaging > /dev/null 2> ${result}
- if [ $(cat "${result}" | wc -l) != "1" ]; then
- echo "[Fail] BPF result count is not 1:" $(cat "${result}" | wc -l)
+ if [ "$(cat "${result}" | wc -l)" != "1" ]; then
+ echo "[Fail] BPF result count is not 1:" "$(cat "${result}" | wc -l)"
err=1
exit
fi
@@ -104,8 +104,8 @@ test_aggr_addr()
{
echo "Testing perf lock contention --lock-addr"
perf lock contention -i ${perfdata} -l -E 1 -q 2> ${result}
- if [ $(cat "${result}" | wc -l) != "1" ]; then
- echo "[Fail] Recorded result count is not 1:" $(cat "${result}" | wc -l)
+ if [ "$(cat "${result}" | wc -l)" != "1" ]; then
+ echo "[Fail] Recorded result count is not 1:" "$(cat "${result}" | wc -l)"
err=1
exit
fi
@@ -116,8 +116,8 @@ test_aggr_addr()
# the perf lock contention output goes to the stderr
perf lock con -a -b -l -E 1 -q -- perf bench sched messaging > /dev/null 2> ${result}
- if [ $(cat "${result}" | wc -l) != "1" ]; then
- echo "[Fail] BPF result count is not 1:" $(cat "${result}" | wc -l)
+ if [ "$(cat "${result}" | wc -l)" != "1" ]; then
+ echo "[Fail] BPF result count is not 1:" "$(cat "${result}" | wc -l)"
err=1
exit
fi
@@ -127,8 +127,8 @@ test_type_filter()
{
echo "Testing perf lock contention --type-filter (w/ spinlock)"
perf lock contention -i ${perfdata} -Y spinlock -q 2> ${result}
- if [ $(grep -c -v spinlock "${result}") != "0" ]; then
- echo "[Fail] Recorded result should not have non-spinlocks:" $(cat "${result}")
+ if [ "$(grep -c -v spinlock "${result}")" != "0" ]; then
+ echo "[Fail] Recorded result should not have non-spinlocks:" "$(cat "${result}")"
err=1
exit
fi
@@ -138,8 +138,8 @@ test_type_filter()
fi
perf lock con -a -b -Y spinlock -q -- perf bench sched messaging > /dev/null 2> ${result}
- if [ $(grep -c -v spinlock "${result}") != "0" ]; then
- echo "[Fail] BPF result should not have non-spinlocks:" $(cat "${result}")
+ if [ "$(grep -c -v spinlock "${result}")" != "0" ]; then
+ echo "[Fail] BPF result should not have non-spinlocks:" "$(cat "${result}")"
err=1
exit
fi
@@ -149,7 +149,7 @@ test_lock_filter()
{
echo "Testing perf lock contention --lock-filter (w/ tasklist_lock)"
perf lock contention -i ${perfdata} -l -q 2> ${result}
- if [ $(grep -c tasklist_lock "${result}") != "1" ]; then
+ if [ "$(grep -c tasklist_lock "${result}")" != "1" ]; then
echo "[Skip] Could not find 'tasklist_lock'"
return
fi
@@ -159,8 +159,8 @@ test_lock_filter()
# find out the type of tasklist_lock
local type=$(head -1 "${result}" | awk '{ print $8 }' | sed -e 's/:.*//')
- if [ $(grep -c -v "${type}" "${result}") != "0" ]; then
- echo "[Fail] Recorded result should not have non-${type} locks:" $(cat "${result}")
+ if [ "$(grep -c -v "${type}" "${result}")" != "0" ]; then
+ echo "[Fail] Recorded result should not have non-${type} locks:" "$(cat "${result}")"
err=1
exit
fi
@@ -170,8 +170,8 @@ test_lock_filter()
fi
perf lock con -a -b -L tasklist_lock -q -- perf bench sched messaging > /dev/null 2> ${result}
- if [ $(grep -c -v "${type}" "${result}") != "0" ]; then
- echo "[Fail] BPF result should not have non-${type} locks:" $(cat "${result}")
+ if [ "$(grep -c -v "${type}" "${result}")" != "0" ]; then
+ echo "[Fail] BPF result should not have non-${type} locks:" "$(cat "${result}")"
err=1
exit
fi
@@ -181,14 +181,14 @@ test_stack_filter()
{
echo "Testing perf lock contention --callstack-filter (w/ unix_stream)"
perf lock contention -i ${perfdata} -v -q 2> ${result}
- if [ $(grep -c unix_stream "${result}") == "0" ]; then
+ if [ "$(grep -c unix_stream "${result}")" = "0" ]; then
echo "[Skip] Could not find 'unix_stream'"
return
fi
perf lock contention -i ${perfdata} -E 1 -S unix_stream -q 2> ${result}
- if [ $(cat "${result}" | wc -l) != "1" ]; then
- echo "[Fail] Recorded result should have a lock from unix_stream:" $(cat "${result}")
+ if [ "$(cat "${result}" | wc -l)" != "1" ]; then
+ echo "[Fail] Recorded result should have a lock from unix_stream:" "$(cat "${result}")"
err=1
exit
fi
@@ -198,8 +198,8 @@ test_stack_filter()
fi
perf lock con -a -b -S unix_stream -E 1 -q -- perf bench sched messaging > /dev/null 2> ${result}
- if [ $(cat "${result}" | wc -l) != "1" ]; then
- echo "[Fail] BPF result should have a lock from unix_stream:" $(cat "${result}")
+ if [ "$(cat "${result}" | wc -l)" != "1" ]; then
+ echo "[Fail] BPF result should have a lock from unix_stream:" "$(cat "${result}")"
err=1
exit
fi
@@ -209,14 +209,14 @@ test_aggr_task_stack_filter()
{
echo "Testing perf lock contention --callstack-filter with task aggregation"
perf lock contention -i ${perfdata} -v -q 2> ${result}
- if [ $(grep -c unix_stream "${result}") == "0" ]; then
+ if [ "$(grep -c unix_stream "${result}")" = "0" ]; then
echo "[Skip] Could not find 'unix_stream'"
return
fi
perf lock contention -i ${perfdata} -t -E 1 -S unix_stream -q 2> ${result}
- if [ $(cat "${result}" | wc -l) != "1" ]; then
- echo "[Fail] Recorded result should have a task from unix_stream:" $(cat "${result}")
+ if [ "$(cat "${result}" | wc -l)" != "1" ]; then
+ echo "[Fail] Recorded result should have a task from unix_stream:" "$(cat "${result}")"
err=1
exit
fi
@@ -226,8 +226,43 @@ test_aggr_task_stack_filter()
fi
perf lock con -a -b -t -S unix_stream -E 1 -q -- perf bench sched messaging > /dev/null 2> ${result}
- if [ $(cat "${result}" | wc -l) != "1" ]; then
- echo "[Fail] BPF result should have a task from unix_stream:" $(cat "${result}")
+ if [ "$(cat "${result}" | wc -l)" != "1" ]; then
+ echo "[Fail] BPF result should have a task from unix_stream:" "$(cat "${result}")"
+ err=1
+ exit
+ fi
+}
+
+test_csv_output()
+{
+ echo "Testing perf lock contention CSV output"
+ perf lock contention -i ${perfdata} -E 1 -x , --output ${result}
+ # count the number of commas in the header
+ # it should have 5: contended, total-wait, max-wait, avg-wait, type, caller
+ header=$(grep "# output:" ${result} | tr -d -c , | wc -c)
+ if [ "${header}" != "5" ]; then
+ echo "[Fail] Recorded result does not have enough output columns: ${header} != 5"
+ err=1
+ exit
+ fi
+ # count the number of commas in the output
+ output=$(grep -v "^#" ${result} | tr -d -c , | wc -c)
+ if [ "${header}" != "${output}" ]; then
+ echo "[Fail] Recorded result does not match the number of commas: ${header} != ${output}"
+ err=1
+ exit
+ fi
+
+ if ! perf lock con -b true > /dev/null 2>&1 ; then
+ echo "[Skip] No BPF support"
+ return
+ fi
+
+ # the perf lock contention output goes to the stderr
+ perf lock con -a -b -E 1 -x , --output ${result} -- perf bench sched messaging > /dev/null 2>&1
+ output=$(grep -v "^#" ${result} | tr -d -c , | wc -c)
+ if [ "${header}" != "${output}" ]; then
+ echo "[Fail] BPF result does not match the number of commas: ${header} != ${output}"
err=1
exit
fi
@@ -244,5 +279,6 @@ test_type_filter
test_lock_filter
test_stack_filter
test_aggr_task_stack_filter
+test_csv_output
exit ${err}
diff --git a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh
index bbb5b3d185fa..89214a6d9951 100755
--- a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh
+++ b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh
@@ -10,11 +10,11 @@
# SPDX-License-Identifier: GPL-2.0
# Arnaldo Carvalho de Melo <acme@kernel.org>, 2017
-. $(dirname $0)/lib/probe.sh
-. $(dirname $0)/lib/probe_vfs_getname.sh
+. "$(dirname "$0")/lib/probe.sh"
+. "$(dirname "$0")/lib/probe_vfs_getname.sh"
libc=$(grep -w libc /proc/self/maps | head -1 | sed -r 's/.*[[:space:]](\/.*)/\1/g')
-nm -Dg $libc 2>/dev/null | fgrep -q inet_pton || exit 254
+nm -Dg $libc 2>/dev/null | grep -F -q inet_pton || exit 254
event_pattern='probe_libc:inet_pton(\_[[:digit:]]+)?'
@@ -23,7 +23,7 @@ add_libc_inet_pton_event() {
event_name=$(perf probe -f -x $libc -a inet_pton 2>&1 | tail -n +2 | head -n -5 | \
grep -P -o "$event_pattern(?=[[:space:]]\(on inet_pton in $libc\))")
- if [ $? -ne 0 -o -z "$event_name" ] ; then
+ if [ $? -ne 0 ] || [ -z "$event_name" ] ; then
printf "FAIL: could not add event\n"
return 1
fi
@@ -94,7 +94,7 @@ delete_libc_inet_pton_event() {
}
# Check for IPv6 interface existence
-ip a sh lo | fgrep -q inet6 || exit 2
+ip a sh lo | grep -F -q inet6 || exit 2
skip_if_no_perf_probe && \
add_libc_inet_pton_event && \
diff --git a/tools/perf/tests/shell/record+script_probe_vfs_getname.sh b/tools/perf/tests/shell/record+script_probe_vfs_getname.sh
index 1341437e1bd9..7f664f1889d9 100755
--- a/tools/perf/tests/shell/record+script_probe_vfs_getname.sh
+++ b/tools/perf/tests/shell/record+script_probe_vfs_getname.sh
@@ -9,11 +9,11 @@
# SPDX-License-Identifier: GPL-2.0
# Arnaldo Carvalho de Melo <acme@kernel.org>, 2017
-. $(dirname $0)/lib/probe.sh
+. "$(dirname "$0")/lib/probe.sh"
skip_if_no_perf_probe || exit 2
-. $(dirname $0)/lib/probe_vfs_getname.sh
+. "$(dirname "$0")/lib/probe_vfs_getname.sh"
record_open_file() {
echo "Recording open file:"
diff --git a/tools/perf/tests/shell/stat+csv_output.sh b/tools/perf/tests/shell/stat+csv_output.sh
index fb78b6251a4e..34a0701fee05 100755
--- a/tools/perf/tests/shell/stat+csv_output.sh
+++ b/tools/perf/tests/shell/stat+csv_output.sh
@@ -6,7 +6,8 @@
set -e
-skip_test=0
+. $(dirname $0)/lib/stat_output.sh
+
csv_sep=@
stat_output=$(mktemp /tmp/__perf_test.stat_output.csv.XXXXX)
@@ -35,11 +36,12 @@ function commachecker()
;; "--interval") exp=7
;; "--per-thread") exp=7
;; "--system-wide-no-aggr") exp=7
- [ $(uname -m) = "s390x" ] && exp='^[6-7]$'
+ [ "$(uname -m)" = "s390x" ] && exp='^[6-7]$'
;; "--per-core") exp=8
;; "--per-socket") exp=8
;; "--per-node") exp=8
;; "--per-die") exp=8
+ ;; "--per-cache") exp=8
esac
while read line
@@ -62,168 +64,22 @@ function commachecker()
return 0
}
-# Return true if perf_event_paranoid is > $1 and not running as root.
-function ParanoidAndNotRoot()
-{
- [ $(id -u) != 0 ] && [ $(cat /proc/sys/kernel/perf_event_paranoid) -gt $1 ]
-}
-
-check_no_args()
-{
- echo -n "Checking CSV output: no args "
- perf stat -x$csv_sep -o "${stat_output}" true
- commachecker --no-args
- echo "[Success]"
-}
-
-check_system_wide()
-{
- echo -n "Checking CSV output: system wide "
- if ParanoidAndNotRoot 0
- then
- echo "[Skip] paranoid and not root"
- return
- fi
- perf stat -x$csv_sep -a -o "${stat_output}" true
- commachecker --system-wide
- echo "[Success]"
-}
-
-check_system_wide_no_aggr()
-{
- echo -n "Checking CSV output: system wide no aggregation "
- if ParanoidAndNotRoot 0
- then
- echo "[Skip] paranoid and not root"
- return
- fi
- perf stat -x$csv_sep -A -a --no-merge -o "${stat_output}" true
- commachecker --system-wide-no-aggr
- echo "[Success]"
-}
-
-check_interval()
-{
- echo -n "Checking CSV output: interval "
- perf stat -x$csv_sep -I 1000 -o "${stat_output}" true
- commachecker --interval
- echo "[Success]"
-}
-
-
-check_event()
-{
- echo -n "Checking CSV output: event "
- perf stat -x$csv_sep -e cpu-clock -o "${stat_output}" true
- commachecker --event
- echo "[Success]"
-}
-
-check_per_core()
-{
- echo -n "Checking CSV output: per core "
- if ParanoidAndNotRoot 0
- then
- echo "[Skip] paranoid and not root"
- return
- fi
- perf stat -x$csv_sep --per-core -a -o "${stat_output}" true
- commachecker --per-core
- echo "[Success]"
-}
-
-check_per_thread()
-{
- echo -n "Checking CSV output: per thread "
- if ParanoidAndNotRoot 0
- then
- echo "[Skip] paranoid and not root"
- return
- fi
- perf stat -x$csv_sep --per-thread -a -o "${stat_output}" true
- commachecker --per-thread
- echo "[Success]"
-}
-
-check_per_die()
-{
- echo -n "Checking CSV output: per die "
- if ParanoidAndNotRoot 0
- then
- echo "[Skip] paranoid and not root"
- return
- fi
- perf stat -x$csv_sep --per-die -a -o "${stat_output}" true
- commachecker --per-die
- echo "[Success]"
-}
-
-check_per_node()
-{
- echo -n "Checking CSV output: per node "
- if ParanoidAndNotRoot 0
- then
- echo "[Skip] paranoid and not root"
- return
- fi
- perf stat -x$csv_sep --per-node -a -o "${stat_output}" true
- commachecker --per-node
- echo "[Success]"
-}
-
-check_per_socket()
-{
- echo -n "Checking CSV output: per socket "
- if ParanoidAndNotRoot 0
- then
- echo "[Skip] paranoid and not root"
- return
- fi
- perf stat -x$csv_sep --per-socket -a -o "${stat_output}" true
- commachecker --per-socket
- echo "[Success]"
-}
-
-# The perf stat options for per-socket, per-core, per-die
-# and -A ( no_aggr mode ) uses the info fetched from this
-# directory: "/sys/devices/system/cpu/cpu*/topology". For
-# example, socket value is fetched from "physical_package_id"
-# file in topology directory.
-# Reference: cpu__get_topology_int in util/cpumap.c
-# If the platform doesn't expose topology information, values
-# will be set to -1. For example, incase of pSeries platform
-# of powerpc, value for "physical_package_id" is restricted
-# and set to -1. Check here validates the socket-id read from
-# topology file before proceeding further
-
-FILE_LOC="/sys/devices/system/cpu/cpu*/topology/"
-FILE_NAME="physical_package_id"
-
-check_for_topology()
-{
- if ! ParanoidAndNotRoot 0
- then
- socket_file=`ls $FILE_LOC/$FILE_NAME | head -n 1`
- [ -z $socket_file ] && return 0
- socket_id=`cat $socket_file`
- [ $socket_id == -1 ] && skip_test=1
- return 0
- fi
-}
+perf_cmd="-x$csv_sep -o ${stat_output}"
-check_for_topology
-check_no_args
-check_system_wide
-check_interval
-check_event
-check_per_thread
-check_per_node
+skip_test=$(check_for_topology)
+check_no_args "CSV" "$perf_cmd"
+check_system_wide "CSV" "$perf_cmd"
+check_interval "CSV" "$perf_cmd"
+check_event "CSV" "$perf_cmd"
+check_per_thread "CSV" "$perf_cmd"
+check_per_node "CSV" "$perf_cmd"
if [ $skip_test -ne 1 ]
then
- check_system_wide_no_aggr
- check_per_core
- check_per_die
- check_per_socket
+ check_system_wide_no_aggr "CSV" "$perf_cmd"
+ check_per_core "CSV" "$perf_cmd"
+ check_per_cache_instance "CSV" "$perf_cmd"
+ check_per_die "CSV" "$perf_cmd"
+ check_per_socket "CSV" "$perf_cmd"
else
echo "[Skip] Skipping tests for system_wide_no_aggr, per_core, per_die and per_socket since socket id exposed via topology is invalid"
fi
diff --git a/tools/perf/tests/shell/stat+json_output.sh b/tools/perf/tests/shell/stat+json_output.sh
index f3e4967cc72e..196e22672c50 100755
--- a/tools/perf/tests/shell/stat+json_output.sh
+++ b/tools/perf/tests/shell/stat+json_output.sh
@@ -40,7 +40,7 @@ trap trap_cleanup EXIT TERM INT
# Return true if perf_event_paranoid is > $1 and not running as root.
function ParanoidAndNotRoot()
{
- [ $(id -u) != 0 ] && [ $(cat /proc/sys/kernel/perf_event_paranoid) -gt $1 ]
+ [ "$(id -u)" != 0 ] && [ "$(cat /proc/sys/kernel/perf_event_paranoid)" -gt $1 ]
}
check_no_args()
@@ -120,6 +120,18 @@ check_per_thread()
echo "[Success]"
}
+check_per_cache_instance()
+{
+ echo -n "Checking json output: per cache_instance "
+ if ParanoidAndNotRoot 0
+ then
+ echo "[Skip] paranoia and not root"
+ return
+ fi
+ perf stat -j --per-cache -a true 2>&1 | $PYTHON $pythonchecker --per-cache
+ echo "[Success]"
+}
+
check_per_die()
{
echo -n "Checking json output: per die "
@@ -197,6 +209,7 @@ if [ $skip_test -ne 1 ]
then
check_system_wide_no_aggr
check_per_core
+ check_per_cache_instance
check_per_die
check_per_socket
else
diff --git a/tools/perf/tests/shell/stat+shadow_stat.sh b/tools/perf/tests/shell/stat+shadow_stat.sh
index e6e35fc6c882..0e9cba84e757 100755
--- a/tools/perf/tests/shell/stat+shadow_stat.sh
+++ b/tools/perf/tests/shell/stat+shadow_stat.sh
@@ -33,7 +33,7 @@ test_global_aggr()
fi
# use printf for rounding and a leading zero
- res=`printf "%.2f" $(echo "scale=6; $num / $cyc" | bc -q)`
+ res=`printf "%.2f" "$(echo "scale=6; $num / $cyc" | bc -q)"`
if [ "$ipc" != "$res" ]; then
echo "IPC is different: $res != $ipc ($num / $cyc)"
exit 1
@@ -67,7 +67,7 @@ test_no_aggr()
fi
# use printf for rounding and a leading zero
- res=`printf "%.2f" $(echo "scale=6; $num / $cyc" | bc -q)`
+ res=`printf "%.2f" "$(echo "scale=6; $num / $cyc" | bc -q)"`
if [ "$ipc" != "$res" ]; then
echo "IPC is different for $cpu: $res != $ipc ($num / $cyc)"
exit 1
diff --git a/tools/perf/tests/shell/stat+std_output.sh b/tools/perf/tests/shell/stat+std_output.sh
new file mode 100755
index 000000000000..f972b31fa0c2
--- /dev/null
+++ b/tools/perf/tests/shell/stat+std_output.sh
@@ -0,0 +1,108 @@
+#!/bin/bash
+# perf stat STD output linter
+# SPDX-License-Identifier: GPL-2.0
+# Tests various perf stat STD output commands for
+# default event and metricgroup
+
+set -e
+
+. $(dirname $0)/lib/stat_output.sh
+
+stat_output=$(mktemp /tmp/__perf_test.stat_output.std.XXXXX)
+
+event_name=(cpu-clock task-clock context-switches cpu-migrations page-faults stalled-cycles-frontend stalled-cycles-backend cycles instructions branches branch-misses)
+event_metric=("CPUs utilized" "CPUs utilized" "/sec" "/sec" "/sec" "frontend cycles idle" "backend cycles idle" "GHz" "insn per cycle" "/sec" "of all branches")
+skip_metric=("stalled cycles per insn" "tma_")
+
+cleanup() {
+ rm -f "${stat_output}"
+
+ trap - EXIT TERM INT
+}
+
+trap_cleanup() {
+ cleanup
+ exit 1
+}
+trap trap_cleanup EXIT TERM INT
+
+function commachecker()
+{
+ local -i cnt=0
+ local prefix=1
+
+ case "$1"
+ in "--interval") prefix=2
+ ;; "--per-thread") prefix=2
+ ;; "--system-wide-no-aggr") prefix=2
+ ;; "--per-core") prefix=3
+ ;; "--per-socket") prefix=3
+ ;; "--per-node") prefix=3
+ ;; "--per-die") prefix=3
+ ;; "--per-cache") prefix=3
+ esac
+
+ while read line
+ do
+ # Ignore initial "started on" comment.
+ x=${line:0:1}
+ [ "$x" = "#" ] && continue
+ # Ignore initial blank line.
+ [ "$line" = "" ] && continue
+ # Ignore "Performance counter stats"
+ x=${line:0:25}
+ [ "$x" = "Performance counter stats" ] && continue
+ # Ignore "seconds time elapsed" and break
+ [[ "$line" == *"time elapsed"* ]] && break
+
+ main_body=$(echo $line | cut -d' ' -f$prefix-)
+ x=${main_body%#*}
+ [ "$x" = "" ] && continue
+
+ # Skip metrics without event name
+ y=${main_body#*#}
+ for i in "${!skip_metric[@]}"; do
+ [[ "$y" == *"${skip_metric[$i]}"* ]] && break
+ done
+ [[ "$y" == *"${skip_metric[$i]}"* ]] && continue
+
+ # Check default event
+ for i in "${!event_name[@]}"; do
+ [[ "$x" == *"${event_name[$i]}"* ]] && break
+ done
+
+ [[ ! "$x" == *"${event_name[$i]}"* ]] && {
+ echo "Unknown event name in $line" 1>&2
+ exit 1;
+ }
+
+ # Check event metric if it exists
+ [[ ! "$main_body" == *"#"* ]] && continue
+ [[ ! "$main_body" == *"${event_metric[$i]}"* ]] && {
+ echo "wrong event metric. expected ${event_metric[$i]} in $line" 1>&2
+ exit 1;
+ }
+ done < "${stat_output}"
+ return 0
+}
+
+perf_cmd="-o ${stat_output}"
+
+skip_test=$(check_for_topology)
+check_no_args "STD" "$perf_cmd"
+check_system_wide "STD" "$perf_cmd"
+check_interval "STD" "$perf_cmd"
+check_per_thread "STD" "$perf_cmd"
+check_per_node "STD" "$perf_cmd"
+if [ $skip_test -ne 1 ]
+then
+ check_system_wide_no_aggr "STD" "$perf_cmd"
+ check_per_core "STD" "$perf_cmd"
+ check_per_cache_instance "STD" "$perf_cmd"
+ check_per_die "STD" "$perf_cmd"
+ check_per_socket "STD" "$perf_cmd"
+else
+ echo "[Skip] Skipping tests for system_wide_no_aggr, per_core, per_die and per_socket since socket id exposed via topology is invalid"
+fi
+cleanup
+exit 0
diff --git a/tools/perf/tests/shell/stat.sh b/tools/perf/tests/shell/stat.sh
index b154fbb15d54..3f1e67795490 100755
--- a/tools/perf/tests/shell/stat.sh
+++ b/tools/perf/tests/shell/stat.sh
@@ -103,10 +103,54 @@ test_topdown_weak_groups() {
echo "Topdown weak groups test [Success]"
}
+test_cputype() {
+ # Test --cputype argument.
+ echo "cputype test"
+
+ # Bogus PMU should fail.
+ if perf stat --cputype="123" -e instructions true > /dev/null 2>&1
+ then
+ echo "cputype test [Bogus PMU didn't fail]"
+ err=1
+ return
+ fi
+
+ # Find a known PMU for cputype.
+ pmu=""
+ for i in cpu cpu_atom armv8_pmuv3_0
+ do
+ if test -d "/sys/devices/$i"
+ then
+ pmu="$i"
+ break
+ fi
+ if perf stat -e "$i/instructions/" true > /dev/null 2>&1
+ then
+ pmu="$i"
+ break
+ fi
+ done
+ if test "x$pmu" = "x"
+ then
+ echo "cputype test [Skipped known PMU not found]"
+ return
+ fi
+
+ # Test running with cputype produces output.
+ if ! perf stat --cputype="$pmu" -e instructions true 2>&1 | grep -E -q "instructions"
+ then
+ echo "cputype test [Failed count missed with given filter]"
+ err=1
+ return
+ fi
+ echo "cputype test [Success]"
+}
+
test_default_stat
test_stat_record_report
test_stat_record_script
test_stat_repeat_weak_groups
test_topdown_groups
test_topdown_weak_groups
+test_cputype
exit $err
diff --git a/tools/perf/tests/shell/stat_all_metrics.sh b/tools/perf/tests/shell/stat_all_metrics.sh
index 22e9cb294b40..54774525e18a 100755
--- a/tools/perf/tests/shell/stat_all_metrics.sh
+++ b/tools/perf/tests/shell/stat_all_metrics.sh
@@ -6,20 +6,20 @@ err=0
for m in $(perf list --raw-dump metrics); do
echo "Testing $m"
result=$(perf stat -M "$m" true 2>&1)
- if [[ "$result" =~ "${m:0:50}" ]] || [[ "$result" =~ "<not supported>" ]]
+ if [[ "$result" =~ ${m:0:50} ]] || [[ "$result" =~ "<not supported>" ]]
then
continue
fi
# Failed so try system wide.
result=$(perf stat -M "$m" -a sleep 0.01 2>&1)
- if [[ "$result" =~ "${m:0:50}" ]]
+ if [[ "$result" =~ ${m:0:50} ]]
then
continue
fi
# Failed again, possibly the workload was too small so retry with something
# longer.
result=$(perf stat -M "$m" perf bench internals synthesize 2>&1)
- if [[ "$result" =~ "${m:0:50}" ]]
+ if [[ "$result" =~ ${m:0:50} ]]
then
continue
fi
diff --git a/tools/perf/tests/shell/stat_all_pfm.sh b/tools/perf/tests/shell/stat_all_pfm.sh
new file mode 100755
index 000000000000..4d004f777a6e
--- /dev/null
+++ b/tools/perf/tests/shell/stat_all_pfm.sh
@@ -0,0 +1,51 @@
+#!/bin/sh
+# perf all libpfm4 events test
+# SPDX-License-Identifier: GPL-2.0
+
+if perf version --build-options | grep HAVE_LIBPFM | grep -q OFF
+then
+ echo "Skipping, no libpfm4 support"
+ exit 2
+fi
+
+err=0
+for p in $(perf list --raw-dump pfm)
+do
+ if echo "$p" | grep -q unc_
+ then
+ echo "Skipping uncore event '$p' that may require additional options."
+ continue
+ fi
+ echo "Testing $p"
+ result=$(perf stat --pfm-events "$p" true 2>&1)
+ x=$?
+ if echo "$result" | grep -q "failed to parse event $p : invalid or missing unit mask"
+ then
+ continue
+ fi
+ if test "$x" -ne "0"
+ then
+ echo "Unexpected exit code '$x'"
+ err=1
+ fi
+ if ! echo "$result" | grep -q "$p" && ! echo "$result" | grep -q "<not supported>"
+ then
+ # We failed to see the event and it is supported. Possibly the workload was
+ # too small so retry with something longer.
+ result=$(perf stat --pfm-events "$p" perf bench internals synthesize 2>&1)
+ x=$?
+ if test "$x" -ne "0"
+ then
+ echo "Unexpected exit code '$x'"
+ err=1
+ fi
+ if ! echo "$result" | grep -q "$p"
+ then
+ echo "Event '$p' not printed in:"
+ echo "$result"
+ err=1
+ fi
+ fi
+done
+
+exit "$err"
diff --git a/tools/perf/tests/shell/stat_bpf_counters.sh b/tools/perf/tests/shell/stat_bpf_counters.sh
index 13473aeba489..6bf24b85294c 100755
--- a/tools/perf/tests/shell/stat_bpf_counters.sh
+++ b/tools/perf/tests/shell/stat_bpf_counters.sh
@@ -22,10 +22,10 @@ compare_number()
}
# skip if --bpf-counters is not supported
-if ! perf stat --bpf-counters true > /dev/null 2>&1; then
+if ! perf stat -e cycles --bpf-counters true > /dev/null 2>&1; then
if [ "$1" = "-v" ]; then
echo "Skipping: --bpf-counters not supported"
- perf --no-pager stat --bpf-counters true || true
+ perf --no-pager stat -e cycles --bpf-counters true || true
fi
exit 2
fi
diff --git a/tools/perf/tests/shell/stat_bpf_counters_cgrp.sh b/tools/perf/tests/shell/stat_bpf_counters_cgrp.sh
index d724855d097c..e75d0780dc78 100755
--- a/tools/perf/tests/shell/stat_bpf_counters_cgrp.sh
+++ b/tools/perf/tests/shell/stat_bpf_counters_cgrp.sh
@@ -25,22 +25,22 @@ check_bpf_counter()
find_cgroups()
{
# try usual systemd slices first
- if [ -d /sys/fs/cgroup/system.slice -a -d /sys/fs/cgroup/user.slice ]; then
+ if [ -d /sys/fs/cgroup/system.slice ] && [ -d /sys/fs/cgroup/user.slice ]; then
test_cgroups="system.slice,user.slice"
return
fi
# try root and self cgroups
- local self_cgrp=$(grep perf_event /proc/self/cgroup | cut -d: -f3)
- if [ -z ${self_cgrp} ]; then
+ find_cgroups_self_cgrp=$(grep perf_event /proc/self/cgroup | cut -d: -f3)
+ if [ -z ${find_cgroups_self_cgrp} ]; then
# cgroup v2 doesn't specify perf_event
- self_cgrp=$(grep ^0: /proc/self/cgroup | cut -d: -f3)
+ find_cgroups_self_cgrp=$(grep ^0: /proc/self/cgroup | cut -d: -f3)
fi
- if [ -z ${self_cgrp} ]; then
+ if [ -z ${find_cgroups_self_cgrp} ]; then
test_cgroups="/"
else
- test_cgroups="/,${self_cgrp}"
+ test_cgroups="/,${find_cgroups_self_cgrp}"
fi
}
@@ -48,13 +48,11 @@ find_cgroups()
# Just check if it runs without failure and has non-zero results.
check_system_wide_counted()
{
- local output
-
- output=$(perf stat -a --bpf-counters --for-each-cgroup ${test_cgroups} -e cpu-clock -x, sleep 1 2>&1)
- if echo ${output} | grep -q -F "<not "; then
+ check_system_wide_counted_output=$(perf stat -a --bpf-counters --for-each-cgroup ${test_cgroups} -e cpu-clock -x, sleep 1 2>&1)
+ if echo ${check_system_wide_counted_output} | grep -q -F "<not "; then
echo "Some system-wide events are not counted"
if [ "${verbose}" = "1" ]; then
- echo ${output}
+ echo ${check_system_wide_counted_output}
fi
exit 1
fi
@@ -62,13 +60,11 @@ check_system_wide_counted()
check_cpu_list_counted()
{
- local output
-
- output=$(perf stat -C 1 --bpf-counters --for-each-cgroup ${test_cgroups} -e cpu-clock -x, taskset -c 1 sleep 1 2>&1)
- if echo ${output} | grep -q -F "<not "; then
+ check_cpu_list_counted_output=$(perf stat -C 0,1 --bpf-counters --for-each-cgroup ${test_cgroups} -e cpu-clock -x, taskset -c 1 sleep 1 2>&1)
+ if echo ${check_cpu_list_counted_output} | grep -q -F "<not "; then
echo "Some CPU events are not counted"
if [ "${verbose}" = "1" ]; then
- echo ${output}
+ echo ${check_cpu_list_counted_output}
fi
exit 1
fi
diff --git a/tools/perf/tests/shell/stat_metrics_values.sh b/tools/perf/tests/shell/stat_metrics_values.sh
new file mode 100755
index 000000000000..ad94c936de7e
--- /dev/null
+++ b/tools/perf/tests/shell/stat_metrics_values.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+# perf metrics value validation
+# SPDX-License-Identifier: GPL-2.0
+if [ "x$PYTHON" == "x" ]
+then
+ if which python3 > /dev/null
+ then
+ PYTHON=python3
+ else
+ echo Skipping test, python3 not detected please set environment variable PYTHON.
+ exit 2
+ fi
+fi
+
+grep -q GenuineIntel /proc/cpuinfo || { echo Skipping non-Intel; exit 2; }
+
+pythonvalidator=$(dirname $0)/lib/perf_metric_validation.py
+rulefile=$(dirname $0)/lib/perf_metric_validation_rules.json
+tmpdir=$(mktemp -d /tmp/__perf_test.program.XXXXX)
+workload="perf bench futex hash -r 2 -s"
+
+# Add -debug, save data file and full rule file
+echo "Launch python validation script $pythonvalidator"
+echo "Output will be stored in: $tmpdir"
+$PYTHON $pythonvalidator -rule $rulefile -output_dir $tmpdir -wl "${workload}"
+ret=$?
+rm -rf $tmpdir
+
+exit $ret
+
diff --git a/tools/perf/tests/shell/test_arm_callgraph_fp.sh b/tools/perf/tests/shell/test_arm_callgraph_fp.sh
index e61d8deaa0c4..66dfdfdad553 100755
--- a/tools/perf/tests/shell/test_arm_callgraph_fp.sh
+++ b/tools/perf/tests/shell/test_arm_callgraph_fp.sh
@@ -9,13 +9,14 @@ TEST_PROGRAM="perf test -w leafloop"
cleanup_files()
{
- rm -f $PERF_DATA
+ rm -f "$PERF_DATA"
}
-trap cleanup_files exit term int
+trap cleanup_files EXIT TERM INT
# Add a 1 second delay to skip samples that are not in the leaf() function
-perf record -o $PERF_DATA --call-graph fp -e cycles//u -D 1000 --user-callchains -- $TEST_PROGRAM 2> /dev/null &
+# shellcheck disable=SC2086
+perf record -o "$PERF_DATA" --call-graph fp -e cycles//u -D 1000 --user-callchains -- $TEST_PROGRAM 2> /dev/null &
PID=$!
echo " + Recording (PID=$PID)..."
@@ -33,8 +34,8 @@ wait $PID
# 76c leafloop
# ...
-perf script -i $PERF_DATA -F comm,ip,sym | head -n4
-perf script -i $PERF_DATA -F comm,ip,sym | head -n4 | \
+perf script -i "$PERF_DATA" -F comm,ip,sym | head -n4
+perf script -i "$PERF_DATA" -F comm,ip,sym | head -n4 | \
awk '{ if ($2 != "") sym[i++] = $2 } END { if (sym[0] != "leaf" ||
sym[1] != "parent" ||
sym[2] != "leafloop") exit 1 }'
diff --git a/tools/perf/tests/shell/test_arm_coresight.sh b/tools/perf/tests/shell/test_arm_coresight.sh
index 482009e17bda..f1bf5621160f 100755
--- a/tools/perf/tests/shell/test_arm_coresight.sh
+++ b/tools/perf/tests/shell/test_arm_coresight.sh
@@ -28,11 +28,11 @@ cleanup_files()
rm -f ${perfdata}
rm -f ${file}
rm -f "${perfdata}.old"
- trap - exit term int
+ trap - EXIT TERM INT
exit $glb_err
}
-trap cleanup_files exit term int
+trap cleanup_files EXIT TERM INT
record_touch_file() {
echo "Recording trace (only user mode) with path: CPU$2 => $1"
@@ -89,7 +89,7 @@ is_device_sink() {
# cannot support perf PMU.
echo "$1" | grep -E -q -v "tpiu"
- if [ $? -eq 0 -a -e "$1/enable_sink" ]; then
+ if [ $? -eq 0 ] && [ -e "$1/enable_sink" ]; then
pmu_dev="/sys/bus/event_source/devices/cs_etm/sinks/$2"
diff --git a/tools/perf/tests/shell/test_arm_spe.sh b/tools/perf/tests/shell/test_arm_spe.sh
index aa094d71f5b4..03d5c7d12ee5 100755
--- a/tools/perf/tests/shell/test_arm_spe.sh
+++ b/tools/perf/tests/shell/test_arm_spe.sh
@@ -27,7 +27,7 @@ cleanup_files()
exit $glb_err
}
-trap cleanup_files exit term int
+trap cleanup_files EXIT TERM INT
arm_spe_report() {
if [ $2 = 0 ]; then
diff --git a/tools/perf/tests/shell/test_brstack.sh b/tools/perf/tests/shell/test_brstack.sh
index 1c49d8293003..09908d71c994 100755
--- a/tools/perf/tests/shell/test_brstack.sh
+++ b/tools/perf/tests/shell/test_brstack.sh
@@ -18,7 +18,7 @@ cleanup() {
rm -rf $TMPDIR
}
-trap cleanup exit term int
+trap cleanup EXIT TERM INT
test_user_branches() {
echo "Testing user branch stack sampling"
@@ -47,17 +47,17 @@ test_user_branches() {
# first argument <arg0> is the argument passed to "--branch-stack <arg0>,save_type,u"
# second argument are the expected branch types for the given filter
test_filter() {
- local filter=$1
- local expect=$2
+ test_filter_filter=$1
+ test_filter_expect=$2
- echo "Testing branch stack filtering permutation ($filter,$expect)"
+ echo "Testing branch stack filtering permutation ($test_filter_filter,$test_filter_expect)"
- perf record -o $TMPDIR/perf.data --branch-filter $filter,save_type,u -- ${TESTPROG} > /dev/null 2>&1
+ perf record -o $TMPDIR/perf.data --branch-filter $test_filter_filter,save_type,u -- ${TESTPROG} > /dev/null 2>&1
perf script -i $TMPDIR/perf.data --fields brstack | xargs -n1 > $TMPDIR/perf.script
# fail if we find any branch type that doesn't match any of the expected ones
# also consider UNKNOWN branch types (-)
- if grep -E -vm1 "^[^ ]*/($expect|-|( *))/.*$" $TMPDIR/perf.script; then
+ if grep -E -vm1 "^[^ ]*/($test_filter_expect|-|( *))/.*$" $TMPDIR/perf.script; then
return 1
fi
}
diff --git a/tools/perf/tests/shell/test_perf_data_converter_json.sh b/tools/perf/tests/shell/test_perf_data_converter_json.sh
new file mode 100755
index 000000000000..72ac6c83231c
--- /dev/null
+++ b/tools/perf/tests/shell/test_perf_data_converter_json.sh
@@ -0,0 +1,72 @@
+#!/bin/bash
+# 'perf data convert --to-json' command test
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+
+err=0
+
+if [ "$PYTHON" = "" ] ; then
+ if which python3 > /dev/null ; then
+ PYTHON=python3
+ elif which python > /dev/null ; then
+ PYTHON=python
+ else
+ echo Skipping test, python not detected please set environment variable PYTHON.
+ exit 2
+ fi
+fi
+
+perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX)
+result=$(mktemp /tmp/__perf_test.output.json.XXXXX)
+
+cleanup()
+{
+ rm -f "${perfdata}"
+ rm -f "${result}"
+ trap - exit term int
+}
+
+trap_cleanup()
+{
+ cleanup
+ exit ${err}
+}
+trap trap_cleanup exit term int
+
+test_json_converter_command()
+{
+ echo "Testing Perf Data Convertion Command to JSON"
+ perf record -o "$perfdata" -F 99 -g -- perf test -w noploop > /dev/null 2>&1
+ perf data convert --to-json "$result" --force -i "$perfdata" >/dev/null 2>&1
+ if [ $(cat "${result}" | wc -l) -gt "0" ] ; then
+ echo "Perf Data Converter Command to JSON [SUCCESS]"
+ else
+ echo "Perf Data Converter Command to JSON [FAILED]"
+ err=1
+ exit
+ fi
+}
+
+validate_json_format()
+{
+ echo "Validating Perf Data Converted JSON file"
+ if [ -f "$result" ] ; then
+ if $PYTHON -c "import json; json.load(open('$result'))" >/dev/null 2>&1 ; then
+ echo "The file contains valid JSON format [SUCCESS]"
+ else
+ echo "The file does not contain valid JSON format [FAILED]"
+ err=1
+ exit
+ fi
+ else
+ echo "File not found [FAILED]"
+ err=2
+ exit
+ fi
+}
+
+test_json_converter_command
+validate_json_format
+
+exit ${err}
diff --git a/tools/perf/tests/shell/test_task_analyzer.sh b/tools/perf/tests/shell/test_task_analyzer.sh
index a98e4ab66040..0095abbe20ca 100755
--- a/tools/perf/tests/shell/test_task_analyzer.sh
+++ b/tools/perf/tests/shell/test_task_analyzer.sh
@@ -5,12 +5,18 @@
tmpdir=$(mktemp -d /tmp/perf-script-task-analyzer-XXXXX)
err=0
+# set PERF_EXEC_PATH to find scripts in the source directory
+perfdir=$(dirname "$0")/../..
+if [ -e "$perfdir/scripts/python/Perf-Trace-Util" ]; then
+ export PERF_EXEC_PATH=$perfdir
+fi
+
cleanup() {
rm -f perf.data
rm -f perf.data.old
rm -f csv
rm -f csvsummary
- rm -rf $tmpdir
+ rm -rf "$tmpdir"
trap - exit term int
}
@@ -21,7 +27,7 @@ trap_cleanup() {
trap trap_cleanup exit term int
report() {
- if [ $1 = 0 ]; then
+ if [ "$1" = 0 ]; then
echo "PASS: \"$2\""
else
echo "FAIL: \"$2\" Error message: \"$3\""
@@ -31,109 +37,127 @@ report() {
check_exec_0() {
if [ $? != 0 ]; then
- report 1 "invokation of ${$1} command failed"
+ report 1 "invocation of $1 command failed"
fi
}
find_str_or_fail() {
- grep -q "$1" $2
- if [ $? != 0 ]; then
- report 1 $3 "Failed to find required string:'${1}'."
+ grep -q "$1" "$2"
+ if [ "$?" != 0 ]; then
+ report 1 "$3" "Failed to find required string:'${1}'."
else
- report 0 $3
+ report 0 "$3"
fi
}
+# check if perf is compiled with libtraceevent support
+skip_no_probe_record_support() {
+ perf record -e "sched:sched_switch" -a -- sleep 1 2>&1 | grep "libtraceevent is necessary for tracepoint support" && return 2
+ return 0
+}
+
prepare_perf_data() {
# 1s should be sufficient to catch at least some switches
perf record -e sched:sched_switch -a -- sleep 1 > /dev/null 2>&1
+ # check if perf data file got created in above step.
+ if [ ! -e "perf.data" ]; then
+ printf "FAIL: perf record failed to create \"perf.data\" \n"
+ return 1
+ fi
}
# check standard inkvokation with no arguments
test_basic() {
out="$tmpdir/perf.out"
- perf script report task-analyzer > $out
- check_exec_0 "perf"
- find_str_or_fail "Comm" $out ${FUNCNAME[0]}
+ perf script report task-analyzer > "$out"
+ check_exec_0 "perf script report task-analyzer"
+ find_str_or_fail "Comm" "$out" "${FUNCNAME[0]}"
}
test_ns_rename(){
out="$tmpdir/perf.out"
- perf script report task-analyzer --ns --rename-comms-by-tids 0:random > $out
- check_exec_0 "perf"
- find_str_or_fail "Comm" $out ${FUNCNAME[0]}
+ perf script report task-analyzer --ns --rename-comms-by-tids 0:random > "$out"
+ check_exec_0 "perf script report task-analyzer --ns --rename-comms-by-tids 0:random"
+ find_str_or_fail "Comm" "$out" "${FUNCNAME[0]}"
}
test_ms_filtertasks_highlight(){
out="$tmpdir/perf.out"
perf script report task-analyzer --ms --filter-tasks perf --highlight-tasks perf \
- > $out
- check_exec_0 "perf"
- find_str_or_fail "Comm" $out ${FUNCNAME[0]}
+ > "$out"
+ check_exec_0 "perf script report task-analyzer --ms --filter-tasks perf --highlight-tasks perf"
+ find_str_or_fail "Comm" "$out" "${FUNCNAME[0]}"
}
test_extended_times_timelimit_limittasks() {
out="$tmpdir/perf.out"
perf script report task-analyzer --extended-times --time-limit :99999 \
- --limit-to-tasks perf > $out
- check_exec_0 "perf"
- find_str_or_fail "Out-Out" $out ${FUNCNAME[0]}
+ --limit-to-tasks perf > "$out"
+ check_exec_0 "perf script report task-analyzer --extended-times --time-limit :99999 --limit-to-tasks perf"
+ find_str_or_fail "Out-Out" "$out" "${FUNCNAME[0]}"
}
test_summary() {
out="$tmpdir/perf.out"
- perf script report task-analyzer --summary > $out
- check_exec_0 "perf"
- find_str_or_fail "Summary" $out ${FUNCNAME[0]}
+ perf script report task-analyzer --summary > "$out"
+ check_exec_0 "perf script report task-analyzer --summary"
+ find_str_or_fail "Summary" "$out" "${FUNCNAME[0]}"
}
test_summaryextended() {
out="$tmpdir/perf.out"
- perf script report task-analyzer --summary-extended > $out
- check_exec_0 "perf"
- find_str_or_fail "Inter Task Times" $out ${FUNCNAME[0]}
+ perf script report task-analyzer --summary-extended > "$out"
+ check_exec_0 "perf script report task-analyzer --summary-extended"
+ find_str_or_fail "Inter Task Times" "$out" "${FUNCNAME[0]}"
}
test_summaryonly() {
out="$tmpdir/perf.out"
- perf script report task-analyzer --summary-only > $out
- check_exec_0 "perf"
- find_str_or_fail "Summary" $out ${FUNCNAME[0]}
+ perf script report task-analyzer --summary-only > "$out"
+ check_exec_0 "perf script report task-analyzer --summary-only"
+ find_str_or_fail "Summary" "$out" "${FUNCNAME[0]}"
}
test_extended_times_summary_ns() {
out="$tmpdir/perf.out"
- perf script report task-analyzer --extended-times --summary --ns > $out
- check_exec_0 "perf"
- find_str_or_fail "Out-Out" $out ${FUNCNAME[0]}
- find_str_or_fail "Summary" $out ${FUNCNAME[0]}
+ perf script report task-analyzer --extended-times --summary --ns > "$out"
+ check_exec_0 "perf script report task-analyzer --extended-times --summary --ns"
+ find_str_or_fail "Out-Out" "$out" "${FUNCNAME[0]}"
+ find_str_or_fail "Summary" "$out" "${FUNCNAME[0]}"
}
test_csv() {
perf script report task-analyzer --csv csv > /dev/null
- check_exec_0 "perf"
- find_str_or_fail "Comm;" csv ${FUNCNAME[0]}
+ check_exec_0 "perf script report task-analyzer --csv csv"
+ find_str_or_fail "Comm;" csv "${FUNCNAME[0]}"
}
test_csv_extended_times() {
perf script report task-analyzer --csv csv --extended-times > /dev/null
- check_exec_0 "perf"
- find_str_or_fail "Out-Out;" csv ${FUNCNAME[0]}
+ check_exec_0 "perf script report task-analyzer --csv csv --extended-times"
+ find_str_or_fail "Out-Out;" csv "${FUNCNAME[0]}"
}
test_csvsummary() {
perf script report task-analyzer --csv-summary csvsummary > /dev/null
- check_exec_0 "perf"
- find_str_or_fail "Comm;" csvsummary ${FUNCNAME[0]}
+ check_exec_0 "perf script report task-analyzer --csv-summary csvsummary"
+ find_str_or_fail "Comm;" csvsummary "${FUNCNAME[0]}"
}
test_csvsummary_extended() {
perf script report task-analyzer --csv-summary csvsummary --summary-extended \
>/dev/null
- check_exec_0 "perf"
- find_str_or_fail "Out-Out;" csvsummary ${FUNCNAME[0]}
+ check_exec_0 "perf script report task-analyzer --csv-summary csvsummary --summary-extended"
+ find_str_or_fail "Out-Out;" csvsummary "${FUNCNAME[0]}"
}
+skip_no_probe_record_support
+err=$?
+if [ $err -ne 0 ]; then
+ echo "WARN: Skipping tests. No libtraceevent support"
+ cleanup
+ exit $err
+fi
prepare_perf_data
test_basic
test_ns_rename
diff --git a/tools/perf/tests/shell/test_uprobe_from_different_cu.sh b/tools/perf/tests/shell/test_uprobe_from_different_cu.sh
new file mode 100755
index 000000000000..319f36ebb9a4
--- /dev/null
+++ b/tools/perf/tests/shell/test_uprobe_from_different_cu.sh
@@ -0,0 +1,83 @@
+#!/bin/bash
+# test perf probe of function from different CU
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+
+# skip if there's no gcc
+if ! [ -x "$(command -v gcc)" ]; then
+ echo "failed: no gcc compiler"
+ exit 2
+fi
+
+temp_dir=$(mktemp -d /tmp/perf-uprobe-different-cu-sh.XXXXXXXXXX)
+
+cleanup()
+{
+ trap - EXIT TERM INT
+ if [[ "${temp_dir}" =~ ^/tmp/perf-uprobe-different-cu-sh.*$ ]]; then
+ echo "--- Cleaning up ---"
+ perf probe -x ${temp_dir}/testfile -d foo || true
+ rm -f "${temp_dir}/"*
+ rmdir "${temp_dir}"
+ fi
+}
+
+trap_cleanup()
+{
+ cleanup
+ exit 1
+}
+
+trap trap_cleanup EXIT TERM INT
+
+cat > ${temp_dir}/testfile-foo.h << EOF
+struct t
+{
+ int *p;
+ int c;
+};
+
+extern int foo (int i, struct t *t);
+EOF
+
+cat > ${temp_dir}/testfile-foo.c << EOF
+#include "testfile-foo.h"
+
+int
+foo (int i, struct t *t)
+{
+ int j, res = 0;
+ for (j = 0; j < i && j < t->c; j++)
+ res += t->p[j];
+
+ return res;
+}
+EOF
+
+cat > ${temp_dir}/testfile-main.c << EOF
+#include "testfile-foo.h"
+
+static struct t g;
+
+int
+main (int argc, char **argv)
+{
+ int i;
+ int j[argc];
+ g.c = argc;
+ g.p = j;
+ for (i = 0; i < argc; i++)
+ j[i] = (int) argv[i][0];
+ return foo (3, &g);
+}
+EOF
+
+gcc -g -Og -flto -c ${temp_dir}/testfile-foo.c -o ${temp_dir}/testfile-foo.o
+gcc -g -Og -c ${temp_dir}/testfile-main.c -o ${temp_dir}/testfile-main.o
+gcc -g -Og -o ${temp_dir}/testfile ${temp_dir}/testfile-foo.o ${temp_dir}/testfile-main.o
+
+perf probe -x ${temp_dir}/testfile --funcs foo
+perf probe -x ${temp_dir}/testfile foo
+
+cleanup
diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c
index b3bd14b025a8..e52b031bedc5 100644
--- a/tools/perf/tests/switch-tracking.c
+++ b/tools/perf/tests/switch-tracking.c
@@ -20,7 +20,7 @@
#include "tests.h"
#include "util/mmap.h"
#include "util/sample.h"
-#include "pmu.h"
+#include "pmus.h"
static int spin_sleep(void)
{
@@ -375,17 +375,7 @@ static int test__switch_tracking(struct test_suite *test __maybe_unused, int sub
cpu_clocks_evsel = evlist__last(evlist);
/* Second event */
- if (perf_pmu__has_hybrid()) {
- cycles = "cpu_core/cycles/u";
- err = parse_event(evlist, cycles);
- if (err) {
- cycles = "cpu_atom/cycles/u";
- pr_debug("Trying %s\n", cycles);
- err = parse_event(evlist, cycles);
- }
- } else {
- err = parse_event(evlist, cycles);
- }
+ err = parse_event(evlist, cycles);
if (err) {
pr_debug("Failed to parse event %s\n", cycles);
goto out_err;
diff --git a/tools/perf/tests/symbols.c b/tools/perf/tests/symbols.c
index 2d1aa42d36a9..16e1c5502b09 100644
--- a/tools/perf/tests/symbols.c
+++ b/tools/perf/tests/symbols.c
@@ -38,7 +38,6 @@ static int init_test_info(struct test_info *ti)
static void exit_test_info(struct test_info *ti)
{
thread__put(ti->thread);
- machine__delete_threads(ti->machine);
machine__delete(ti->machine);
}
diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c
index 25f075fa9125..968dddde6dda 100644
--- a/tools/perf/tests/task-exit.c
+++ b/tools/perf/tests/task-exit.c
@@ -58,9 +58,9 @@ static int test__task_exit(struct test_suite *test __maybe_unused, int subtest _
signal(SIGCHLD, sig_handler);
- evlist = evlist__new_default();
+ evlist = evlist__new_dummy();
if (evlist == NULL) {
- pr_debug("evlist__new_default\n");
+ pr_debug("evlist__new_dummy\n");
return -1;
}
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index 9a0f3904e53d..f424c0b7f43f 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -118,15 +118,13 @@ DECLARE_SUITE(bpf);
DECLARE_SUITE(session_topology);
DECLARE_SUITE(thread_map_synthesize);
DECLARE_SUITE(thread_map_remove);
-DECLARE_SUITE(cpu_map_synthesize);
+DECLARE_SUITE(cpu_map);
DECLARE_SUITE(synthesize_stat_config);
DECLARE_SUITE(synthesize_stat);
DECLARE_SUITE(synthesize_stat_round);
DECLARE_SUITE(event_update);
DECLARE_SUITE(event_times);
DECLARE_SUITE(backward_ring_buffer);
-DECLARE_SUITE(cpu_map_print);
-DECLARE_SUITE(cpu_map_merge);
DECLARE_SUITE(sdt_event);
DECLARE_SUITE(is_printable_array);
DECLARE_SUITE(bitmap_print);
diff --git a/tools/perf/tests/thread-maps-share.c b/tools/perf/tests/thread-maps-share.c
index 858e725318a9..faf980b26252 100644
--- a/tools/perf/tests/thread-maps-share.c
+++ b/tools/perf/tests/thread-maps-share.c
@@ -42,13 +42,13 @@ static int test__thread_maps_share(struct test_suite *test __maybe_unused, int s
TEST_ASSERT_VAL("failed to create threads",
leader && t1 && t2 && t3 && other);
- maps = leader->maps;
+ maps = thread__maps(leader);
TEST_ASSERT_EQUAL("wrong refcnt", refcount_read(maps__refcnt(maps)), 4);
/* test the maps pointer is shared */
- TEST_ASSERT_VAL("maps don't match", RC_CHK_ACCESS(maps) == RC_CHK_ACCESS(t1->maps));
- TEST_ASSERT_VAL("maps don't match", RC_CHK_ACCESS(maps) == RC_CHK_ACCESS(t2->maps));
- TEST_ASSERT_VAL("maps don't match", RC_CHK_ACCESS(maps) == RC_CHK_ACCESS(t3->maps));
+ TEST_ASSERT_VAL("maps don't match", RC_CHK_ACCESS(maps) == RC_CHK_ACCESS(thread__maps(t1)));
+ TEST_ASSERT_VAL("maps don't match", RC_CHK_ACCESS(maps) == RC_CHK_ACCESS(thread__maps(t2)));
+ TEST_ASSERT_VAL("maps don't match", RC_CHK_ACCESS(maps) == RC_CHK_ACCESS(thread__maps(t3)));
/*
* Verify the other leader was created by previous call.
@@ -70,10 +70,11 @@ static int test__thread_maps_share(struct test_suite *test __maybe_unused, int s
machine__remove_thread(machine, other);
machine__remove_thread(machine, other_leader);
- other_maps = other->maps;
+ other_maps = thread__maps(other);
TEST_ASSERT_EQUAL("wrong refcnt", refcount_read(maps__refcnt(other_maps)), 2);
- TEST_ASSERT_VAL("maps don't match", RC_CHK_ACCESS(other_maps) == RC_CHK_ACCESS(other_leader->maps));
+ TEST_ASSERT_VAL("maps don't match", RC_CHK_ACCESS(other_maps) ==
+ RC_CHK_ACCESS(thread__maps(other_leader)));
/* release thread group */
thread__put(t3);
diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c
index c4630cfc80ea..9dee63734e66 100644
--- a/tools/perf/tests/topology.c
+++ b/tools/perf/tests/topology.c
@@ -8,7 +8,7 @@
#include "session.h"
#include "evlist.h"
#include "debug.h"
-#include "pmu.h"
+#include "pmus.h"
#include <linux/err.h>
#define TEMPL "/tmp/perf-test-XXXXXX"
@@ -41,18 +41,8 @@ static int session_write_header(char *path)
session = perf_session__new(&data, NULL);
TEST_ASSERT_VAL("can't get session", !IS_ERR(session));
- if (!perf_pmu__has_hybrid()) {
- session->evlist = evlist__new_default();
- TEST_ASSERT_VAL("can't get evlist", session->evlist);
- } else {
- struct parse_events_error err;
-
- session->evlist = evlist__new();
- TEST_ASSERT_VAL("can't get evlist", session->evlist);
- parse_events_error__init(&err);
- parse_events(session->evlist, "cpu_core/cycles/", &err);
- parse_events_error__exit(&err);
- }
+ session->evlist = evlist__new_default();
+ TEST_ASSERT_VAL("can't get evlist", session->evlist);
perf_header__set_feat(&session->header, HEADER_CPU_TOPOLOGY);
perf_header__set_feat(&session->header, HEADER_NRCPUS);
diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h
index 4c59edddd6a8..3d12bf0f6d07 100644
--- a/tools/perf/trace/beauty/beauty.h
+++ b/tools/perf/trace/beauty/beauty.h
@@ -11,7 +11,7 @@ struct strarray {
u64 offset;
int nr_entries;
const char *prefix;
- const char **entries;
+ const char * const *entries;
};
#define DEFINE_STRARRAY(array, _prefix) struct strarray strarray__##array = { \
diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h
index 13c3a237b9c9..39b74d83c7c4 100644
--- a/tools/perf/trace/beauty/include/linux/socket.h
+++ b/tools/perf/trace/beauty/include/linux/socket.h
@@ -177,6 +177,7 @@ static inline size_t msg_data_left(struct msghdr *msg)
#define SCM_RIGHTS 0x01 /* rw: access rights (array of int) */
#define SCM_CREDENTIALS 0x02 /* rw: struct ucred */
#define SCM_SECURITY 0x03 /* rw: security label */
+#define SCM_PIDFD 0x04 /* ro: pidfd (int) */
struct ucred {
__u32 pid;
@@ -318,7 +319,6 @@ struct ucred {
#define MSG_MORE 0x8000 /* Sender will send more */
#define MSG_WAITFORONE 0x10000 /* recvmmsg(): block until 1+ packets avail */
#define MSG_SENDPAGE_NOPOLICY 0x10000 /* sendpage() internal : do no apply policy */
-#define MSG_SENDPAGE_NOTLAST 0x20000 /* sendpage() internal : not the last page */
#define MSG_BATCH 0x40000 /* sendmmsg(): more messages coming */
#define MSG_EOF MSG_FIN
#define MSG_NO_SHARED_FRAGS 0x80000 /* sendpage() internal : page frags are not shared */
@@ -327,6 +327,7 @@ struct ucred {
*/
#define MSG_ZEROCOPY 0x4000000 /* Use user data in kernel path */
+#define MSG_SPLICE_PAGES 0x8000000 /* Splice the pages from the iterator in sendmsg() */
#define MSG_FASTOPEN 0x20000000 /* Send data in TCP SYN */
#define MSG_CMSG_CLOEXEC 0x40000000 /* Set close_on_exec for file
descriptor received through
@@ -337,6 +338,9 @@ struct ucred {
#define MSG_CMSG_COMPAT 0 /* We never have 32 bit fixups */
#endif
+/* Flags to be cleared on entry by sendmsg and sendmmsg syscalls */
+#define MSG_INTERNAL_SENDMSG_FLAGS \
+ (MSG_SPLICE_PAGES | MSG_SENDPAGE_NOPOLICY | MSG_SENDPAGE_DECRYPTED)
/* Setsockoptions(2) level. Thanks to BSD these must match IPPROTO_xxx */
#define SOL_IP 0
diff --git a/tools/perf/trace/beauty/move_mount_flags.sh b/tools/perf/trace/beauty/move_mount_flags.sh
index 32e552faf37a..ce5e632d1448 100755
--- a/tools/perf/trace/beauty/move_mount_flags.sh
+++ b/tools/perf/trace/beauty/move_mount_flags.sh
@@ -10,7 +10,7 @@ fi
linux_mount=${linux_header_dir}/mount.h
printf "static const char *move_mount_flags[] = {\n"
-regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MOVE_MOUNT_([^_]+_[[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*'
+regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MOVE_MOUNT_([^_]+[[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*'
grep -E $regex ${linux_mount} | \
sed -r "s/$regex/\2 \1/g" | \
xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n"
diff --git a/tools/perf/trace/beauty/msg_flags.c b/tools/perf/trace/beauty/msg_flags.c
index ea68db08b8e7..ed3ff969b546 100644
--- a/tools/perf/trace/beauty/msg_flags.c
+++ b/tools/perf/trace/beauty/msg_flags.c
@@ -8,8 +8,14 @@
#ifndef MSG_WAITFORONE
#define MSG_WAITFORONE 0x10000
#endif
-#ifndef MSG_SENDPAGE_NOTLAST
-#define MSG_SENDPAGE_NOTLAST 0x20000
+#ifndef MSG_BATCH
+#define MSG_BATCH 0x40000
+#endif
+#ifndef MSG_ZEROCOPY
+#define MSG_ZEROCOPY 0x4000000
+#endif
+#ifndef MSG_SPLICE_PAGES
+#define MSG_SPLICE_PAGES 0x8000000
#endif
#ifndef MSG_FASTOPEN
#define MSG_FASTOPEN 0x20000000
@@ -50,7 +56,9 @@ static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
P_MSG_FLAG(NOSIGNAL);
P_MSG_FLAG(MORE);
P_MSG_FLAG(WAITFORONE);
- P_MSG_FLAG(SENDPAGE_NOTLAST);
+ P_MSG_FLAG(BATCH);
+ P_MSG_FLAG(ZEROCOPY);
+ P_MSG_FLAG(SPLICE_PAGES);
P_MSG_FLAG(FASTOPEN);
P_MSG_FLAG(CMSG_CLOEXEC);
#undef P_MSG_FLAG
diff --git a/tools/perf/trace/beauty/pid.c b/tools/perf/trace/beauty/pid.c
index 1a6acc46807b..8f9c9950f8ba 100644
--- a/tools/perf/trace/beauty/pid.c
+++ b/tools/perf/trace/beauty/pid.c
@@ -8,10 +8,10 @@ size_t syscall_arg__scnprintf_pid(char *bf, size_t size, struct syscall_arg *arg
struct thread *thread = machine__findnew_thread(trace->host, pid, pid);
if (thread != NULL) {
- if (!thread->comm_set)
+ if (!thread__comm_set(thread))
thread__set_comm_from_proc(thread);
- if (thread->comm_set)
+ if (thread__comm_set(thread))
printed += scnprintf(bf + printed, size - printed,
" (%s)", thread__comm_str(thread));
thread__put(thread);
diff --git a/tools/perf/trace/beauty/tracepoints/x86_msr.sh b/tools/perf/trace/beauty/tracepoints/x86_msr.sh
index 0078689963e0..fa3c4418e856 100755
--- a/tools/perf/trace/beauty/tracepoints/x86_msr.sh
+++ b/tools/perf/trace/beauty/tracepoints/x86_msr.sh
@@ -13,7 +13,7 @@ x86_msr_index=${arch_x86_header_dir}/msr-index.h
# Just the ones starting with 0x00000 so as to have a simple
# array.
-printf "static const char *x86_MSRs[] = {\n"
+printf "static const char * const x86_MSRs[] = {\n"
regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MSR_([[:alnum:]][[:alnum:]_]+)[[:space:]]+(0x00000[[:xdigit:]]+)[[:space:]]*.*'
grep -E $regex ${x86_msr_index} | grep -E -v 'MSR_(ATOM|P[46]|IA32_(TSC_DEADLINE|UCODE_REV)|IDT_FCR4)' | \
sed -r "s/$regex/\2 \1/g" | sort -n | \
@@ -24,7 +24,7 @@ printf "};\n\n"
regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MSR_([[:alnum:]][[:alnum:]_]+)[[:space:]]+(0xc0000[[:xdigit:]]+)[[:space:]]*.*'
printf "#define x86_64_specific_MSRs_offset "
grep -E $regex ${x86_msr_index} | sed -r "s/$regex/\2/g" | sort -n | head -1
-printf "static const char *x86_64_specific_MSRs[] = {\n"
+printf "static const char * const x86_64_specific_MSRs[] = {\n"
grep -E $regex ${x86_msr_index} | \
sed -r "s/$regex/\2 \1/g" | grep -E -vw 'K6_WHCR' | sort -n | \
xargs printf "\t[%s - x86_64_specific_MSRs_offset] = \"%s\",\n"
@@ -33,7 +33,7 @@ printf "};\n\n"
regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MSR_([[:alnum:]][[:alnum:]_]+)[[:space:]]+(0xc0010[[:xdigit:]]+)[[:space:]]*.*'
printf "#define x86_AMD_V_KVM_MSRs_offset "
grep -E $regex ${x86_msr_index} | sed -r "s/$regex/\2/g" | sort -n | head -1
-printf "static const char *x86_AMD_V_KVM_MSRs[] = {\n"
+printf "static const char * const x86_AMD_V_KVM_MSRs[] = {\n"
grep -E $regex ${x86_msr_index} | \
sed -r "s/$regex/\2 \1/g" | sort -n | \
xargs printf "\t[%s - x86_AMD_V_KVM_MSRs_offset] = \"%s\",\n"
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 12c3ce530e42..ccdb2cd11fbf 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -314,7 +314,7 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser,
browser->entries = RB_ROOT;
- mutex_lock(&notes->lock);
+ annotation__lock(notes);
symbol__calc_percent(sym, evsel);
@@ -343,7 +343,7 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser,
}
disasm_rb_tree__insert(browser, &pos->al);
}
- mutex_unlock(&notes->lock);
+ annotation__unlock(notes);
browser->curr_hot = rb_last(&browser->entries);
}
@@ -470,10 +470,10 @@ static bool annotate_browser__callq(struct annotate_browser *browser,
}
notes = symbol__annotation(dl->ops.target.sym);
- mutex_lock(&notes->lock);
+ annotation__lock(notes);
if (!symbol__hists(dl->ops.target.sym, evsel->evlist->core.nr_entries)) {
- mutex_unlock(&notes->lock);
+ annotation__unlock(notes);
ui__warning("Not enough memory for annotating '%s' symbol!\n",
dl->ops.target.sym->name);
return true;
@@ -482,7 +482,7 @@ static bool annotate_browser__callq(struct annotate_browser *browser,
target_ms.maps = ms->maps;
target_ms.map = ms->map;
target_ms.sym = dl->ops.target.sym;
- mutex_unlock(&notes->lock);
+ annotation__unlock(notes);
symbol__tui_annotate(&target_ms, evsel, hbt, browser->opts);
sym_title(ms->sym, ms->map, title, sizeof(title), browser->opts->percent_type);
ui_browser__show_title(&browser->b, title);
@@ -781,9 +781,9 @@ static int annotate_browser__run(struct annotate_browser *browser,
ui_browser__help_window(&browser->b,
"UP/DOWN/PGUP\n"
"PGDN/SPACE Navigate\n"
+ "</> Move to prev/next symbol\n"
"q/ESC/CTRL+C Exit\n\n"
"ENTER Go to target\n"
- "ESC Exit\n"
"H Go to hottest instruction\n"
"TAB/shift+TAB Cycle thru hottest instructions\n"
"j Toggle showing jump to target arrows\n"
@@ -913,6 +913,8 @@ show_sup_ins:
annotation__toggle_full_addr(notes, ms);
continue;
case K_LEFT:
+ case '<':
+ case '>':
case K_ESC:
case 'q':
case CTRL('c'):
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 69c81759a64f..70db5a717905 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -407,11 +407,6 @@ static bool hist_browser__selection_has_children(struct hist_browser *browser)
return container_of(ms, struct callchain_list, ms)->has_children;
}
-static bool hist_browser__he_selection_unfolded(struct hist_browser *browser)
-{
- return browser->he_selection ? browser->he_selection->unfolded : false;
-}
-
static bool hist_browser__selection_unfolded(struct hist_browser *browser)
{
struct hist_entry *he = browser->he_selection;
@@ -584,8 +579,8 @@ static int hierarchy_set_folding(struct hist_browser *hb, struct hist_entry *he,
return n;
}
-static void __hist_entry__set_folding(struct hist_entry *he,
- struct hist_browser *hb, bool unfold)
+static void hist_entry__set_folding(struct hist_entry *he,
+ struct hist_browser *hb, bool unfold)
{
hist_entry__init_have_children(he);
he->unfolded = unfold ? he->has_children : false;
@@ -603,34 +598,12 @@ static void __hist_entry__set_folding(struct hist_entry *he,
he->nr_rows = 0;
}
-static void hist_entry__set_folding(struct hist_entry *he,
- struct hist_browser *browser, bool unfold)
-{
- double percent;
-
- percent = hist_entry__get_percent_limit(he);
- if (he->filtered || percent < browser->min_pcnt)
- return;
-
- __hist_entry__set_folding(he, browser, unfold);
-
- if (!he->depth || unfold)
- browser->nr_hierarchy_entries++;
- if (he->leaf)
- browser->nr_callchain_rows += he->nr_rows;
- else if (unfold && !hist_entry__has_hierarchy_children(he, browser->min_pcnt)) {
- browser->nr_hierarchy_entries++;
- he->has_no_entry = true;
- he->nr_rows = 1;
- } else
- he->has_no_entry = false;
-}
-
static void
__hist_browser__set_folding(struct hist_browser *browser, bool unfold)
{
struct rb_node *nd;
struct hist_entry *he;
+ double percent;
nd = rb_first_cached(&browser->hists->entries);
while (nd) {
@@ -640,6 +613,21 @@ __hist_browser__set_folding(struct hist_browser *browser, bool unfold)
nd = __rb_hierarchy_next(nd, HMD_FORCE_CHILD);
hist_entry__set_folding(he, browser, unfold);
+
+ percent = hist_entry__get_percent_limit(he);
+ if (he->filtered || percent < browser->min_pcnt)
+ continue;
+
+ if (!he->depth || unfold)
+ browser->nr_hierarchy_entries++;
+ if (he->leaf)
+ browser->nr_callchain_rows += he->nr_rows;
+ else if (unfold && !hist_entry__has_hierarchy_children(he, browser->min_pcnt)) {
+ browser->nr_hierarchy_entries++;
+ he->has_no_entry = true;
+ he->nr_rows = 1;
+ } else
+ he->has_no_entry = false;
}
}
@@ -659,8 +647,10 @@ static void hist_browser__set_folding_selected(struct hist_browser *browser, boo
if (!browser->he_selection)
return;
- hist_entry__set_folding(browser->he_selection, browser, unfold);
- browser->b.nr_entries = hist_browser__nr_entries(browser);
+ if (unfold == browser->he_selection->unfolded)
+ return;
+
+ hist_browser__toggle_fold(browser);
}
static void ui_browser__warn_lost_events(struct ui_browser *browser)
@@ -732,8 +722,8 @@ static int hist_browser__handle_hotkey(struct hist_browser *browser, bool warn_l
hist_browser__set_folding(browser, true);
break;
case 'e':
- /* Expand the selected entry. */
- hist_browser__set_folding_selected(browser, !hist_browser__he_selection_unfolded(browser));
+ /* Toggle expand/collapse the selected entry. */
+ hist_browser__toggle_fold(browser);
break;
case 'H':
browser->show_headers = !browser->show_headers;
@@ -1779,7 +1769,7 @@ static void hists_browser__hierarchy_headers(struct hist_browser *browser)
hists_browser__scnprintf_hierarchy_headers(browser, headers,
sizeof(headers));
- ui_browser__gotorc(&browser->b, 0, 0);
+ ui_browser__gotorc_title(&browser->b, 0, 0);
ui_browser__set_color(&browser->b, HE_COLORSET_ROOT);
ui_browser__write_nstring(&browser->b, headers, browser->b.width + 1);
}
@@ -2533,13 +2523,15 @@ do_zoom_thread(struct hist_browser *browser, struct popup_action *act)
thread__zput(browser->hists->thread_filter);
ui_helpline__pop();
} else {
+ const char *comm_set_str =
+ thread__comm_set(thread) ? thread__comm_str(thread) : "";
+
if (hists__has(browser->hists, thread)) {
ui_helpline__fpush("To zoom out press ESC or ENTER + \"Zoom out of %s(%d) thread\"",
- thread->comm_set ? thread__comm_str(thread) : "",
- thread->tid);
+ comm_set_str, thread__tid(thread));
} else {
ui_helpline__fpush("To zoom out press ESC or ENTER + \"Zoom out of %s thread\"",
- thread->comm_set ? thread__comm_str(thread) : "");
+ comm_set_str);
}
browser->hists->thread_filter = thread__get(thread);
@@ -2557,20 +2549,19 @@ add_thread_opt(struct hist_browser *browser, struct popup_action *act,
char **optstr, struct thread *thread)
{
int ret;
+ const char *comm_set_str, *in_out;
if ((!hists__has(browser->hists, thread) &&
!hists__has(browser->hists, comm)) || thread == NULL)
return 0;
+ in_out = browser->hists->thread_filter ? "out of" : "into";
+ comm_set_str = thread__comm_set(thread) ? thread__comm_str(thread) : "";
if (hists__has(browser->hists, thread)) {
ret = asprintf(optstr, "Zoom %s %s(%d) thread",
- browser->hists->thread_filter ? "out of" : "into",
- thread->comm_set ? thread__comm_str(thread) : "",
- thread->tid);
+ in_out, comm_set_str, thread__tid(thread));
} else {
- ret = asprintf(optstr, "Zoom %s %s thread",
- browser->hists->thread_filter ? "out of" : "into",
- thread->comm_set ? thread__comm_str(thread) : "");
+ ret = asprintf(optstr, "Zoom %s %s thread", in_out, comm_set_str);
}
if (ret < 0)
return 0;
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index f164bd26fc41..2bf959d08354 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -11,6 +11,7 @@
#include "../util/sort.h"
#include "../util/evsel.h"
#include "../util/evlist.h"
+#include "../util/thread.h"
#include "../util/util.h"
/* hist period print (hpp) functions */
@@ -274,7 +275,9 @@ static int __hpp__sort_acc(struct hist_entry *a, struct hist_entry *b,
if (ret)
return ret;
- if (a->thread != b->thread || !hist_entry__has_callchains(a) || !symbol_conf.use_callchain)
+ if ((a->thread == NULL ? NULL : RC_CHK_ACCESS(a->thread)) !=
+ (b->thread == NULL ? NULL : RC_CHK_ACCESS(b->thread)) ||
+ !hist_entry__has_callchains(a) || !symbol_conf.use_callchain)
return 0;
ret = b->callchain->max_depth - a->callchain->max_depth;
diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index f36270485168..b849caace398 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -885,7 +885,7 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
}
if (h->ms.map == NULL && verbose > 1) {
- maps__fprintf(h->thread->maps, fp);
+ maps__fprintf(thread__maps(h->thread), fp);
fprintf(fp, "%.10s end\n", graph_dotted_line);
}
}
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index f9df1df1eec0..96f4ea1d45c5 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -1,4 +1,5 @@
perf-y += arm64-frame-pointer-unwind-support.o
+perf-y += addr_location.o
perf-y += annotate.o
perf-y += block-info.o
perf-y += block-range.o
@@ -11,7 +12,6 @@ perf-y += db-export.o
perf-y += env.o
perf-y += event.o
perf-y += evlist.o
-perf-y += evlist-hybrid.o
perf-y += sideband_evlist.o
perf-y += evsel.o
perf-y += evsel_fprintf.o
@@ -24,7 +24,6 @@ perf-y += llvm-utils.o
perf-y += mmap.o
perf-y += memswap.o
perf-y += parse-events.o
-perf-y += parse-events-hybrid.o
perf-y += print-events.o
perf-y += tracepoint.o
perf-y += perf_regs.o
@@ -75,7 +74,6 @@ perf-y += pmu.o
perf-y += pmus.o
perf-y += pmu-flex.o
perf-y += pmu-bison.o
-perf-y += pmu-hybrid.o
perf-y += svghelper.o
perf-$(CONFIG_LIBTRACEEVENT) += trace-event-info.o
perf-y += trace-event-scripting.o
@@ -147,6 +145,7 @@ perf-y += mem2node.o
perf-y += clockid.o
perf-y += list_sort.o
perf-y += mutex.o
+perf-y += sharded_mutex.o
perf-$(CONFIG_LIBBPF) += bpf-loader.o
perf-$(CONFIG_LIBBPF) += bpf_map.o
diff --git a/tools/perf/util/addr_location.c b/tools/perf/util/addr_location.c
new file mode 100644
index 000000000000..51825ef8c0ab
--- /dev/null
+++ b/tools/perf/util/addr_location.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "addr_location.h"
+#include "map.h"
+#include "maps.h"
+#include "thread.h"
+
+void addr_location__init(struct addr_location *al)
+{
+ al->thread = NULL;
+ al->maps = NULL;
+ al->map = NULL;
+ al->sym = NULL;
+ al->srcline = NULL;
+ al->addr = 0;
+ al->level = 0;
+ al->filtered = 0;
+ al->cpumode = 0;
+ al->cpu = 0;
+ al->socket = 0;
+}
+
+/*
+ * The preprocess_sample method will return with reference counts for the
+ * in it, when done using (and perhaps getting ref counts if needing to
+ * keep a pointer to one of those entries) it must be paired with
+ * addr_location__put(), so that the refcounts can be decremented.
+ */
+void addr_location__exit(struct addr_location *al)
+{
+ map__zput(al->map);
+ thread__zput(al->thread);
+ maps__zput(al->maps);
+}
+
+void addr_location__copy(struct addr_location *dst, struct addr_location *src)
+{
+ thread__put(dst->thread);
+ maps__put(dst->maps);
+ map__put(dst->map);
+ *dst = *src;
+ dst->thread = thread__get(src->thread);
+ dst->maps = maps__get(src->maps);
+ dst->map = map__get(src->map);
+}
diff --git a/tools/perf/util/addr_location.h b/tools/perf/util/addr_location.h
new file mode 100644
index 000000000000..d8ac0428dff2
--- /dev/null
+++ b/tools/perf/util/addr_location.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_ADDR_LOCATION
+#define __PERF_ADDR_LOCATION 1
+
+#include <linux/types.h>
+
+struct thread;
+struct maps;
+struct map;
+struct symbol;
+
+struct addr_location {
+ struct thread *thread;
+ struct maps *maps;
+ struct map *map;
+ struct symbol *sym;
+ const char *srcline;
+ u64 addr;
+ char level;
+ u8 filtered;
+ u8 cpumode;
+ s32 cpu;
+ s32 socket;
+};
+
+void addr_location__init(struct addr_location *al);
+void addr_location__exit(struct addr_location *al);
+
+void addr_location__copy(struct addr_location *dst, struct addr_location *src);
+
+#endif /* __PERF_ADDR_LOCATION */
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 11992cfe271c..82956adf9963 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -32,6 +32,7 @@
#include "block-range.h"
#include "string2.h"
#include "util/event.h"
+#include "util/sharded_mutex.h"
#include "arch/common.h"
#include "namespaces.h"
#include <regex.h>
@@ -61,6 +62,10 @@ static regex_t file_lineno;
static struct ins_ops *ins__find(struct arch *arch, const char *name);
static void ins__sort(struct arch *arch);
static int disasm_line__parse(char *line, const char **namep, char **rawp);
+static int call__scnprintf(struct ins *ins, char *bf, size_t size,
+ struct ins_operands *ops, int max_ins_name);
+static int jump__scnprintf(struct ins *ins, char *bf, size_t size,
+ struct ins_operands *ops, int max_ins_name);
struct arch {
const char *name;
@@ -70,6 +75,7 @@ struct arch {
struct ins_ops *(*associate_instruction_ops)(struct arch *arch, const char *name);
bool sorted_instructions;
bool initialized;
+ const char *insn_suffix;
void *priv;
unsigned int model;
unsigned int family;
@@ -179,6 +185,7 @@ static struct arch architectures[] = {
.init = x86__annotate_init,
.instructions = x86__instructions,
.nr_instructions = ARRAY_SIZE(x86__instructions),
+ .insn_suffix = "bwlq",
.objdump = {
.comment_char = '#',
},
@@ -321,7 +328,7 @@ static struct ins_ops call_ops = {
bool ins__is_call(const struct ins *ins)
{
- return ins->ops == &call_ops || ins->ops == &s390_call_ops;
+ return ins->ops == &call_ops || ins->ops == &s390_call_ops || ins->ops == &loongarch_call_ops;
}
/*
@@ -462,7 +469,7 @@ static struct ins_ops jump_ops = {
bool ins__is_jump(const struct ins *ins)
{
- return ins->ops == &jump_ops;
+ return ins->ops == &jump_ops || ins->ops == &loongarch_jump_ops;
}
static int comment__symbol(char *raw, char *comment, u64 *addrp, char **namep)
@@ -558,13 +565,26 @@ static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map_sy
return -1;
*s = '\0';
+
+ /*
+ * x86 SIB addressing has something like 0x8(%rax, %rcx, 1)
+ * then it needs to have the closing parenthesis.
+ */
+ if (strchr(ops->raw, '(')) {
+ *s = ',';
+ s = strchr(ops->raw, ')');
+ if (s == NULL || s[1] != ',')
+ return -1;
+ *++s = '\0';
+ }
+
ops->source.raw = strdup(ops->raw);
*s = ',';
if (ops->source.raw == NULL)
return -1;
- target = ++s;
+ target = skip_spaces(++s);
comment = strchr(s, arch->objdump.comment_char);
if (comment != NULL)
@@ -707,6 +727,26 @@ static struct ins_ops *__ins__find(struct arch *arch, const char *name)
}
ins = bsearch(name, arch->instructions, nmemb, sizeof(struct ins), ins__key_cmp);
+ if (ins)
+ return ins->ops;
+
+ if (arch->insn_suffix) {
+ char tmp[32];
+ char suffix;
+ size_t len = strlen(name);
+
+ if (len == 0 || len >= sizeof(tmp))
+ return NULL;
+
+ suffix = name[len - 1];
+ if (strchr(arch->insn_suffix, suffix) == NULL)
+ return NULL;
+
+ strcpy(tmp, name);
+ tmp[len - 1] = '\0'; /* remove the suffix and check again */
+
+ ins = bsearch(tmp, arch->instructions, nmemb, sizeof(struct ins), ins__key_cmp);
+ }
return ins ? ins->ops : NULL;
}
@@ -821,7 +861,7 @@ void symbol__annotate_zero_histograms(struct symbol *sym)
{
struct annotation *notes = symbol__annotation(sym);
- mutex_lock(&notes->lock);
+ annotation__lock(notes);
if (notes->src != NULL) {
memset(notes->src->histograms, 0,
notes->src->nr_histograms * notes->src->sizeof_sym_hist);
@@ -829,7 +869,7 @@ void symbol__annotate_zero_histograms(struct symbol *sym)
memset(notes->src->cycles_hist, 0,
symbol__size(sym) * sizeof(struct cyc_hist));
}
- mutex_unlock(&notes->lock);
+ annotation__unlock(notes);
}
static int __symbol__account_cycles(struct cyc_hist *ch,
@@ -1086,7 +1126,7 @@ void annotation__compute_ipc(struct annotation *notes, size_t size)
notes->hit_insn = 0;
notes->cover_insn = 0;
- mutex_lock(&notes->lock);
+ annotation__lock(notes);
for (offset = size - 1; offset >= 0; --offset) {
struct cyc_hist *ch;
@@ -1105,7 +1145,7 @@ void annotation__compute_ipc(struct annotation *notes, size_t size)
notes->have_cycles = true;
}
}
- mutex_unlock(&notes->lock);
+ annotation__unlock(notes);
}
int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, struct perf_sample *sample,
@@ -1183,7 +1223,7 @@ static void annotation_line__init(struct annotation_line *al,
static void annotation_line__exit(struct annotation_line *al)
{
- free_srcline(al->path);
+ zfree_srcline(&al->path);
zfree(&al->line);
}
@@ -1256,17 +1296,64 @@ int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool r
return ins__scnprintf(&dl->ins, bf, size, &dl->ops, max_ins_name);
}
-void annotation__init(struct annotation *notes)
+void annotation__exit(struct annotation *notes)
+{
+ annotated_source__delete(notes->src);
+}
+
+static struct sharded_mutex *sharded_mutex;
+
+static void annotation__init_sharded_mutex(void)
{
- mutex_init(&notes->lock);
+ /* As many mutexes as there are CPUs. */
+ sharded_mutex = sharded_mutex__new(cpu__max_present_cpu().cpu);
}
-void annotation__exit(struct annotation *notes)
+static size_t annotation__hash(const struct annotation *notes)
{
- annotated_source__delete(notes->src);
- mutex_destroy(&notes->lock);
+ return (size_t)notes;
}
+static struct mutex *annotation__get_mutex(const struct annotation *notes)
+{
+ static pthread_once_t once = PTHREAD_ONCE_INIT;
+
+ pthread_once(&once, annotation__init_sharded_mutex);
+ if (!sharded_mutex)
+ return NULL;
+
+ return sharded_mutex__get_mutex(sharded_mutex, annotation__hash(notes));
+}
+
+void annotation__lock(struct annotation *notes)
+ NO_THREAD_SAFETY_ANALYSIS
+{
+ struct mutex *mutex = annotation__get_mutex(notes);
+
+ if (mutex)
+ mutex_lock(mutex);
+}
+
+void annotation__unlock(struct annotation *notes)
+ NO_THREAD_SAFETY_ANALYSIS
+{
+ struct mutex *mutex = annotation__get_mutex(notes);
+
+ if (mutex)
+ mutex_unlock(mutex);
+}
+
+bool annotation__trylock(struct annotation *notes)
+{
+ struct mutex *mutex = annotation__get_mutex(notes);
+
+ if (!mutex)
+ return false;
+
+ return mutex_trylock(mutex);
+}
+
+
static void annotation_line__add(struct annotation_line *al, struct list_head *head)
{
list_add_tail(&al->node, head);
@@ -1511,6 +1598,7 @@ static int symbol__parse_objdump_line(struct symbol *sym,
/* /filename:linenr ? Save line number and ignore. */
if (regexec(&file_lineno, parsed_line, 2, match, 0) == 0) {
*line_nr = atoi(parsed_line + match[1].rm_so);
+ free(*fileloc);
*fileloc = strdup(parsed_line);
return 0;
}
@@ -1559,7 +1647,6 @@ static int symbol__parse_objdump_line(struct symbol *sym,
}
annotation_line__add(&dl->al, &notes->src->source);
-
return 0;
}
@@ -1696,7 +1783,10 @@ fallback:
* cache, or is just a kallsyms file, well, lets hope that this
* DSO is the same as when 'perf record' ran.
*/
- __symbol__join_symfs(filename, filename_size, dso->long_name);
+ if (dso->kernel && dso->long_name[0] == '/')
+ snprintf(filename, filename_size, "%s", dso->long_name);
+ else
+ __symbol__join_symfs(filename, filename_size, dso->long_name);
mutex_lock(&dso->lock);
if (access(filename, R_OK) && errno == ENOENT && dso->nsinfo) {
@@ -1756,8 +1846,11 @@ static int symbol__disassemble_bpf(struct symbol *sym,
perf_exe(tpath, sizeof(tpath));
bfdf = bfd_openr(tpath, NULL);
- assert(bfdf);
- assert(bfd_check_format(bfdf, bfd_object));
+ if (bfdf == NULL)
+ abort();
+
+ if (!bfd_check_format(bfdf, bfd_object))
+ abort();
s = open_memstream(&buf, &buf_size);
if (!s) {
@@ -1805,7 +1898,8 @@ static int symbol__disassemble_bpf(struct symbol *sym,
#else
disassemble = disassembler(bfdf);
#endif
- assert(disassemble);
+ if (disassemble == NULL)
+ abort();
fflush(s);
do {
@@ -2101,6 +2195,7 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
nline++;
}
free(line);
+ free(fileloc);
err = finish_command(&objdump_process);
if (err)
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 1c6335b8333a..962780559176 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -271,8 +271,7 @@ struct annotated_source {
struct sym_hist *histograms;
};
-struct annotation {
- struct mutex lock;
+struct LOCKABLE annotation {
u64 max_coverage;
u64 start;
u64 hit_cycles;
@@ -298,9 +297,15 @@ struct annotation {
struct annotated_source *src;
};
-void annotation__init(struct annotation *notes);
+static inline void annotation__init(struct annotation *notes __maybe_unused)
+{
+}
void annotation__exit(struct annotation *notes);
+void annotation__lock(struct annotation *notes) EXCLUSIVE_LOCK_FUNCTION(*notes);
+void annotation__unlock(struct annotation *notes) UNLOCK_FUNCTION(*notes);
+bool annotation__trylock(struct annotation *notes) EXCLUSIVE_TRYLOCK_FUNCTION(true, *notes);
+
static inline int annotation__cycles_width(struct annotation *notes)
{
if (notes->have_cycles && notes->options->show_minmax_cycle)
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
index f3918f290df5..ba807071d3c1 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
@@ -51,7 +51,7 @@ static u64 arm_spe_calc_ip(int index, u64 payload)
* (bits [63:56]) is assigned as top-byte tag; so we only can
* retrieve address value from bits [55:0].
*
- * According to Documentation/arm64/memory.rst, if detects the
+ * According to Documentation/arch/arm64/memory.rst, if detects the
* specific pattern in bits [55:52] of payload which falls in
* the kernel space, should fixup the top byte and this allows
* perf tool to parse DSO symbol for data address correctly.
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
index 7b36ba6b4079..afbd5869f6bf 100644
--- a/tools/perf/util/arm-spe.c
+++ b/tools/perf/util/arm-spe.c
@@ -254,9 +254,9 @@ static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe,
}
if (speq->thread) {
- speq->pid = speq->thread->pid_;
+ speq->pid = thread__pid(speq->thread);
if (queue->cpu == -1)
- speq->cpu = speq->thread->cpu;
+ speq->cpu = thread__cpu(speq->thread);
}
}
diff --git a/tools/perf/util/block-info.c b/tools/perf/util/block-info.c
index 16a7b4adcf18..591fc1edd385 100644
--- a/tools/perf/util/block-info.c
+++ b/tools/perf/util/block-info.c
@@ -296,8 +296,8 @@ static int block_range_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
end_line = map__srcline(he->ms.map, bi->sym->start + bi->end,
he->ms.sym);
- if ((strncmp(start_line, SRCLINE_UNKNOWN, strlen(SRCLINE_UNKNOWN)) != 0) &&
- (strncmp(end_line, SRCLINE_UNKNOWN, strlen(SRCLINE_UNKNOWN)) != 0)) {
+ if (start_line != SRCLINE_UNKNOWN &&
+ end_line != SRCLINE_UNKNOWN) {
scnprintf(buf, sizeof(buf), "[%s -> %s]",
start_line, end_line);
} else {
@@ -305,8 +305,8 @@ static int block_range_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
bi->start, bi->end);
}
- free_srcline(start_line);
- free_srcline(end_line);
+ zfree_srcline(&start_line);
+ zfree_srcline(&end_line);
return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width, buf);
}
diff --git a/tools/perf/util/bpf-filter.c b/tools/perf/util/bpf-filter.c
index 0b30688d78a7..b51544996046 100644
--- a/tools/perf/util/bpf-filter.c
+++ b/tools/perf/util/bpf-filter.c
@@ -9,8 +9,8 @@
#include "util/evsel.h"
#include "util/bpf-filter.h"
-#include "util/bpf-filter-flex.h"
-#include "util/bpf-filter-bison.h"
+#include <util/bpf-filter-flex.h>
+#include <util/bpf-filter-bison.h>
#include "bpf_skel/sample-filter.h"
#include "bpf_skel/sample_filter.skel.h"
@@ -62,6 +62,16 @@ static int check_sample_flags(struct evsel *evsel, struct perf_bpf_filter_expr *
if (evsel->core.attr.sample_type & expr->sample_flags)
return 0;
+ if (expr->op == PBF_OP_GROUP_BEGIN) {
+ struct perf_bpf_filter_expr *group;
+
+ list_for_each_entry(group, &expr->groups, list) {
+ if (check_sample_flags(evsel, group) < 0)
+ return -1;
+ }
+ return 0;
+ }
+
info = get_sample_info(expr->sample_flags);
if (info == NULL) {
pr_err("Error: %s event does not have sample flags %lx\n",
diff --git a/tools/perf/util/bpf_skel/.gitignore b/tools/perf/util/bpf_skel/.gitignore
index 7a1c832825de..cd01455e1b53 100644
--- a/tools/perf/util/bpf_skel/.gitignore
+++ b/tools/perf/util/bpf_skel/.gitignore
@@ -1,3 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
.tmp
*.skel.h
+vmlinux.h
diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c b/tools/perf/util/bpf_skel/lock_contention.bpf.c
index 1d48226ae75d..8d3cfbb3cc65 100644
--- a/tools/perf/util/bpf_skel/lock_contention.bpf.c
+++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c
@@ -416,8 +416,6 @@ int contention_end(u64 *ctx)
return 0;
}
-struct rq {};
-
extern struct rq runqueues __ksym;
struct rq___old {
diff --git a/tools/perf/util/bpf_skel/vmlinux.h b/tools/perf/util/bpf_skel/vmlinux/vmlinux.h
index c7ed51b0c1ef..ab84a6e1da5e 100644
--- a/tools/perf/util/bpf_skel/vmlinux.h
+++ b/tools/perf/util/bpf_skel/vmlinux/vmlinux.h
@@ -171,4 +171,14 @@ struct bpf_perf_event_data_kern {
struct perf_sample_data *data;
struct perf_event *event;
} __attribute__((preserve_access_index));
+
+/*
+ * If 'struct rq' isn't defined for lock_contention.bpf.c, for the sake of
+ * rq___old and rq___new, then the type for the 'runqueue' variable ends up
+ * being a forward declaration (BTF_KIND_FWD) while the kernel has it defined
+ * (BTF_KIND_STRUCT). The definition appears in vmlinux.h rather than
+ * lock_contention.bpf.c for consistency with a generated vmlinux.h.
+ */
+struct rq {};
+
#endif // __VMLINUX_H
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 06a8cd88cbef..36728222a5b4 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -58,9 +58,11 @@ int build_id__mark_dso_hit(struct perf_tool *tool __maybe_unused,
return -1;
}
+ addr_location__init(&al);
if (thread__find_map(thread, sample->cpumode, sample->ip, &al))
map__dso(al.map)->hit = 1;
+ addr_location__exit(&al);
thread__put(thread);
return 0;
}
diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h
index 9f2e36ef5072..0b61840d4226 100644
--- a/tools/perf/util/cache.h
+++ b/tools/perf/util/cache.h
@@ -26,6 +26,6 @@ static inline int is_absolute_path(const char *path)
return path[0] == '/';
}
-char *mkpath(const char *fmt, ...) __printf(1, 2);
+char *mkpath(char *path_buf, size_t sz, const char *fmt, ...) __printf(3, 4);
#endif /* __PERF_CACHE_H */
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index b0dafc758173..aee937d14fbb 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -58,7 +58,8 @@ struct callchain_param callchain_param_default = {
CALLCHAIN_PARAM_DEFAULT
};
-__thread struct callchain_cursor callchain_cursor;
+/* Used for thread-local struct callchain_cursor. */
+static pthread_key_t callchain_cursor;
int parse_callchain_record_opt(const char *arg, struct callchain_param *param)
{
@@ -590,6 +591,7 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor)
call->ip = cursor_node->ip;
call->ms = cursor_node->ms;
call->ms.map = map__get(call->ms.map);
+ call->ms.maps = maps__get(call->ms.maps);
call->srcline = cursor_node->srcline;
if (cursor_node->branch) {
@@ -649,6 +651,7 @@ add_child(struct callchain_node *parent,
list_for_each_entry_safe(call, tmp, &new->val, list) {
list_del_init(&call->list);
map__zput(call->ms.map);
+ maps__zput(call->ms.maps);
free(call);
}
free(new);
@@ -984,6 +987,9 @@ int callchain_append(struct callchain_root *root,
struct callchain_cursor *cursor,
u64 period)
{
+ if (cursor == NULL)
+ return -1;
+
if (!cursor->nr)
return 0;
@@ -1010,10 +1016,16 @@ merge_chain_branch(struct callchain_cursor *cursor,
int err = 0;
list_for_each_entry_safe(list, next_list, &src->val, list) {
- callchain_cursor_append(cursor, list->ip, &list->ms,
- false, NULL, 0, 0, 0, list->srcline);
+ struct map_symbol ms = {
+ .maps = maps__get(list->ms.maps),
+ .map = map__get(list->ms.map),
+ };
+ callchain_cursor_append(cursor, list->ip, &ms, false, NULL, 0, 0, 0, list->srcline);
list_del_init(&list->list);
+ map__zput(ms.map);
+ maps__zput(ms.maps);
map__zput(list->ms.map);
+ maps__zput(list->ms.maps);
free(list);
}
@@ -1065,9 +1077,11 @@ int callchain_cursor_append(struct callchain_cursor *cursor,
}
node->ip = ip;
+ maps__zput(node->ms.maps);
map__zput(node->ms.map);
node->ms = *ms;
- node->ms.map = map__get(node->ms.map);
+ node->ms.maps = maps__get(ms->maps);
+ node->ms.map = map__get(ms->map);
node->branch = branch;
node->nr_loop_iter = nr_loop_iter;
node->iter_cycles = iter_cycles;
@@ -1106,7 +1120,7 @@ int hist_entry__append_callchain(struct hist_entry *he, struct perf_sample *samp
if ((!symbol_conf.use_callchain || sample->callchain == NULL) &&
!symbol_conf.show_branchflag_count)
return 0;
- return callchain_append(he->callchain, &callchain_cursor, sample->period);
+ return callchain_append(he->callchain, get_tls_callchain_cursor(), sample->period);
}
int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *node,
@@ -1114,7 +1128,8 @@ int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *
{
struct machine *machine = maps__machine(node->ms.maps);
- al->maps = node->ms.maps;
+ maps__put(al->maps);
+ al->maps = maps__get(node->ms.maps);
map__put(al->map);
al->map = map__get(node->ms.map);
al->sym = node->ms.sym;
@@ -1127,7 +1142,7 @@ int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *
if (al->map == NULL)
goto out;
}
- if (al->maps == machine__kernel_maps(machine)) {
+ if (RC_CHK_ACCESS(al->maps) == RC_CHK_ACCESS(machine__kernel_maps(machine))) {
if (machine__is_host(machine)) {
al->cpumode = PERF_RECORD_MISC_KERNEL;
al->level = 'k';
@@ -1460,12 +1475,14 @@ static void free_callchain_node(struct callchain_node *node)
list_for_each_entry_safe(list, tmp, &node->parent_val, list) {
list_del_init(&list->list);
map__zput(list->ms.map);
+ maps__zput(list->ms.maps);
free(list);
}
list_for_each_entry_safe(list, tmp, &node->val, list) {
list_del_init(&list->list);
map__zput(list->ms.map);
+ maps__zput(list->ms.maps);
free(list);
}
@@ -1551,11 +1568,49 @@ out:
list_for_each_entry_safe(chain, new, &head, list) {
list_del_init(&chain->list);
map__zput(chain->ms.map);
+ maps__zput(chain->ms.maps);
free(chain);
}
return -ENOMEM;
}
+static void callchain_cursor__delete(void *vcursor)
+{
+ struct callchain_cursor *cursor = vcursor;
+ struct callchain_cursor_node *node, *next;
+
+ callchain_cursor_reset(cursor);
+ for (node = cursor->first; node != NULL; node = next) {
+ next = node->next;
+ free(node);
+ }
+ free(cursor);
+}
+
+static void init_callchain_cursor_key(void)
+{
+ if (pthread_key_create(&callchain_cursor, callchain_cursor__delete)) {
+ pr_err("callchain cursor creation failed");
+ abort();
+ }
+}
+
+struct callchain_cursor *get_tls_callchain_cursor(void)
+{
+ static pthread_once_t once_control = PTHREAD_ONCE_INIT;
+ struct callchain_cursor *cursor;
+
+ pthread_once(&once_control, init_callchain_cursor_key);
+ cursor = pthread_getspecific(callchain_cursor);
+ if (!cursor) {
+ cursor = zalloc(sizeof(*cursor));
+ if (!cursor)
+ pr_debug3("%s: not enough memory\n", __func__);
+ pthread_setspecific(callchain_cursor, cursor);
+ }
+ return cursor;
+}
+
int callchain_cursor__copy(struct callchain_cursor *dst,
struct callchain_cursor *src)
{
@@ -1596,8 +1651,10 @@ void callchain_cursor_reset(struct callchain_cursor *cursor)
cursor->nr = 0;
cursor->last = &cursor->first;
- for (node = cursor->first; node != NULL; node = node->next)
+ for (node = cursor->first; node != NULL; node = node->next) {
map__zput(node->ms.map);
+ maps__zput(node->ms.maps);
+ }
}
void callchain_param_setup(u64 sample_type, const char *arch)
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index d95615daed73..d2618a47deca 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -168,8 +168,6 @@ struct callchain_cursor {
struct callchain_cursor_node *curr;
};
-extern __thread struct callchain_cursor callchain_cursor;
-
static inline void callchain_init(struct callchain_root *root)
{
INIT_LIST_HEAD(&root->node.val);
@@ -211,6 +209,8 @@ int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip,
/* Close a cursor writing session. Initialize for the reader */
static inline void callchain_cursor_commit(struct callchain_cursor *cursor)
{
+ if (cursor == NULL)
+ return;
cursor->curr = cursor->first;
cursor->pos = 0;
}
@@ -219,7 +219,7 @@ static inline void callchain_cursor_commit(struct callchain_cursor *cursor)
static inline struct callchain_cursor_node *
callchain_cursor_current(struct callchain_cursor *cursor)
{
- if (cursor->pos == cursor->nr)
+ if (cursor == NULL || cursor->pos == cursor->nr)
return NULL;
return cursor->curr;
@@ -231,6 +231,8 @@ static inline void callchain_cursor_advance(struct callchain_cursor *cursor)
cursor->pos++;
}
+struct callchain_cursor *get_tls_callchain_cursor(void);
+
int callchain_cursor__copy(struct callchain_cursor *dst,
struct callchain_cursor *src);
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
index 658170b8dcef..46f144c46827 100644
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c
@@ -19,6 +19,7 @@
#include "util/llvm-utils.h" /* perf_llvm_config */
#include "util/stat.h" /* perf_stat__set_big_num */
#include "util/evsel.h" /* evsel__hw_names, evsel__use_bpf_counters */
+#include "util/srcline.h" /* addr2line_timeout_ms */
#include "build-id.h"
#include "debug.h"
#include "config.h"
@@ -434,12 +435,14 @@ static int perf_buildid_config(const char *var, const char *value)
return 0;
}
-static int perf_default_core_config(const char *var __maybe_unused,
- const char *value __maybe_unused)
+static int perf_default_core_config(const char *var, const char *value)
{
if (!strcmp(var, "core.proc-map-timeout"))
proc_map_timeout = strtoul(value, NULL, 10);
+ if (!strcmp(var, "core.addr2line-timeout"))
+ addr2line_timeout_ms = strtoul(value, NULL, 10);
+
/* Add other config variables here. */
return 0;
}
@@ -543,6 +546,7 @@ static char *home_perfconfig(void)
const char *home = NULL;
char *config;
struct stat st;
+ char path[PATH_MAX];
home = getenv("HOME");
@@ -554,7 +558,7 @@ static char *home_perfconfig(void)
if (!home || !*home || !perf_config_global())
return NULL;
- config = strdup(mkpath("%s/.perfconfig", home));
+ config = strdup(mkpath(path, sizeof(path), "%s/.perfconfig", home));
if (config == NULL) {
pr_warning("Not enough memory to process %s/.perfconfig, ignoring it.\n", home);
return NULL;
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 75d9c73e0184..0e090e8bc334 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -222,6 +222,10 @@ static int aggr_cpu_id__cmp(const void *a_pointer, const void *b_pointer)
return a->socket - b->socket;
else if (a->die != b->die)
return a->die - b->die;
+ else if (a->cache_lvl != b->cache_lvl)
+ return a->cache_lvl - b->cache_lvl;
+ else if (a->cache != b->cache)
+ return a->cache - b->cache;
else if (a->core != b->core)
return a->core - b->core;
else
@@ -663,9 +667,9 @@ size_t cpu_map__snprint_mask(struct perf_cpu_map *map, char *buf, size_t size)
return ptr - buf;
}
-const struct perf_cpu_map *cpu_map__online(void) /* thread unsafe */
+struct perf_cpu_map *cpu_map__online(void) /* thread unsafe */
{
- static const struct perf_cpu_map *online = NULL;
+ static struct perf_cpu_map *online;
if (!online)
online = perf_cpu_map__new(NULL); /* from /sys/devices/system/cpu/online */
@@ -679,6 +683,8 @@ bool aggr_cpu_id__equal(const struct aggr_cpu_id *a, const struct aggr_cpu_id *b
a->node == b->node &&
a->socket == b->socket &&
a->die == b->die &&
+ a->cache_lvl == b->cache_lvl &&
+ a->cache == b->cache &&
a->core == b->core &&
a->cpu.cpu == b->cpu.cpu;
}
@@ -689,6 +695,8 @@ bool aggr_cpu_id__is_empty(const struct aggr_cpu_id *a)
a->node == -1 &&
a->socket == -1 &&
a->die == -1 &&
+ a->cache_lvl == -1 &&
+ a->cache == -1 &&
a->core == -1 &&
a->cpu.cpu == -1;
}
@@ -700,6 +708,8 @@ struct aggr_cpu_id aggr_cpu_id__empty(void)
.node = -1,
.socket = -1,
.die = -1,
+ .cache_lvl = -1,
+ .cache = -1,
.core = -1,
.cpu = (struct perf_cpu){ .cpu = -1 },
};
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index e3426541e0aa..9df2aeb34d3d 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -20,6 +20,13 @@ struct aggr_cpu_id {
int socket;
/** The die id as read from /sys/devices/system/cpu/cpuX/topology/die_id. */
int die;
+ /** The cache level as read from /sys/devices/system/cpu/cpuX/cache/indexY/level */
+ int cache_lvl;
+ /**
+ * The cache instance ID, which is the first CPU in the
+ * /sys/devices/system/cpu/cpuX/cache/indexY/shared_cpu_list
+ */
+ int cache;
/** The core id as read from /sys/devices/system/cpu/cpuX/topology/core_id. */
int core;
/** CPU aggregation, note there is one CPU for each SMT thread. */
@@ -48,7 +55,7 @@ struct perf_cpu_map *cpu_map__new_data(const struct perf_record_cpu_map_data *da
size_t cpu_map__snprint(struct perf_cpu_map *map, char *buf, size_t size);
size_t cpu_map__snprint_mask(struct perf_cpu_map *map, char *buf, size_t size);
size_t cpu_map__fprintf(struct perf_cpu_map *map, FILE *fp);
-const struct perf_cpu_map *cpu_map__online(void); /* thread unsafe */
+struct perf_cpu_map *cpu_map__online(void); /* thread unsafe */
int cpu__setup_cpunode_map(void);
@@ -59,7 +66,7 @@ struct perf_cpu cpu__max_present_cpu(void);
/**
* cpu_map__is_dummy - Events associated with a pid, rather than a CPU, use a single dummy map with an entry of -1.
*/
-static inline bool cpu_map__is_dummy(struct perf_cpu_map *cpus)
+static inline bool cpu_map__is_dummy(const struct perf_cpu_map *cpus)
{
return perf_cpu_map__nr(cpus) == 1 && perf_cpu_map__cpu(cpus, 0).cpu == -1;
}
diff --git a/tools/perf/util/cputopo.c b/tools/perf/util/cputopo.c
index ca1d833a0c26..81cfc85f4668 100644
--- a/tools/perf/util/cputopo.c
+++ b/tools/perf/util/cputopo.c
@@ -12,7 +12,8 @@
#include "cpumap.h"
#include "debug.h"
#include "env.h"
-#include "pmu-hybrid.h"
+#include "pmu.h"
+#include "pmus.h"
#define PACKAGE_CPUS_FMT \
"%s/devices/system/cpu/cpu%d/topology/package_cpus_list"
@@ -469,12 +470,11 @@ err:
struct hybrid_topology *hybrid_topology__new(void)
{
- struct perf_pmu *pmu;
+ struct perf_pmu *pmu = NULL;
struct hybrid_topology *tp = NULL;
- u32 nr, i = 0;
+ int nr = perf_pmus__num_core_pmus(), i = 0;
- nr = perf_pmu__hybrid_pmu_num();
- if (nr == 0)
+ if (nr <= 1)
return NULL;
tp = zalloc(sizeof(*tp) + sizeof(tp->nodes[0]) * nr);
@@ -482,7 +482,7 @@ struct hybrid_topology *hybrid_topology__new(void)
return NULL;
tp->nr = nr;
- perf_pmu__for_each_hybrid_pmu(pmu) {
+ while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
if (load_hybrid_node(&tp->nodes[i], pmu)) {
hybrid_topology__delete(tp);
return NULL;
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
index 82a27ab90c8b..e917985bbbe6 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
@@ -52,15 +52,15 @@ struct cs_etm_decoder {
static u32
cs_etm_decoder__mem_access(const void *context,
const ocsd_vaddr_t address,
- const ocsd_mem_space_acc_t mem_space __maybe_unused,
+ const ocsd_mem_space_acc_t mem_space,
const u8 trace_chan_id,
const u32 req_size,
u8 *buffer)
{
struct cs_etm_decoder *decoder = (struct cs_etm_decoder *) context;
- return decoder->mem_access(decoder->data, trace_chan_id,
- address, req_size, buffer);
+ return decoder->mem_access(decoder->data, trace_chan_id, address,
+ req_size, buffer, mem_space);
}
int cs_etm_decoder__add_mem_access_cb(struct cs_etm_decoder *decoder,
@@ -541,44 +541,33 @@ cs_etm_decoder__set_tid(struct cs_etm_queue *etmq,
const uint8_t trace_chan_id)
{
pid_t tid = -1;
- static u64 pid_fmt;
- int ret;
-
- /*
- * As all the ETMs run at the same exception level, the system should
- * have the same PID format crossing CPUs. So cache the PID format
- * and reuse it for sequential decoding.
- */
- if (!pid_fmt) {
- ret = cs_etm__get_pid_fmt(trace_chan_id, &pid_fmt);
- if (ret)
- return OCSD_RESP_FATAL_SYS_ERR;
- }
/*
* Process the PE_CONTEXT packets if we have a valid contextID or VMID.
* If the kernel is running at EL2, the PID is traced in CONTEXTIDR_EL2
* as VMID, Bit ETM_OPT_CTXTID2 is set in this case.
*/
- switch (pid_fmt) {
- case BIT(ETM_OPT_CTXTID):
+ switch (cs_etm__get_pid_fmt(etmq)) {
+ case CS_ETM_PIDFMT_CTXTID:
if (elem->context.ctxt_id_valid)
tid = elem->context.context_id;
break;
- case BIT(ETM_OPT_CTXTID2):
+ case CS_ETM_PIDFMT_CTXTID2:
if (elem->context.vmid_valid)
tid = elem->context.vmid;
break;
+ case CS_ETM_PIDFMT_NONE:
default:
break;
}
+ if (cs_etm__etmq_set_tid_el(etmq, tid, trace_chan_id,
+ elem->context.exception_level))
+ return OCSD_RESP_FATAL_SYS_ERR;
+
if (tid == -1)
return OCSD_RESP_CONT;
- if (cs_etm__etmq_set_tid(etmq, tid, trace_chan_id))
- return OCSD_RESP_FATAL_SYS_ERR;
-
/*
* A timestamp is generated after a PE_CONTEXT element so make sure
* to rely on that coming one.
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
index 21d403f55d96..272c2efe78ee 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
@@ -11,6 +11,7 @@
#define INCLUDE__CS_ETM_DECODER_H__
#include <linux/types.h>
+#include <opencsd/ocsd_if_types.h>
#include <stdio.h>
struct cs_etm_decoder;
@@ -19,7 +20,8 @@ struct cs_etm_packet_queue;
struct cs_etm_queue;
-typedef u32 (*cs_etm_mem_cb_type)(struct cs_etm_queue *, u8, u64, size_t, u8 *);
+typedef u32 (*cs_etm_mem_cb_type)(struct cs_etm_queue *, u8, u64, size_t, u8 *,
+ const ocsd_mem_space_acc_t);
struct cs_etmv3_trace_params {
u32 reg_ctrl;
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index 91299cc56bf7..1419b40dfbe8 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -14,7 +14,6 @@
#include <linux/types.h>
#include <linux/zalloc.h>
-#include <opencsd/ocsd_if_types.h>
#include <stdlib.h>
#include "auxtrace.h"
@@ -46,8 +45,6 @@ struct cs_etm_auxtrace {
struct auxtrace_heap heap;
struct itrace_synth_opts synth_opts;
struct perf_session *session;
- struct machine *machine;
- struct thread *unknown_thread;
struct perf_tsc_conversion tc;
/*
@@ -80,15 +77,18 @@ struct cs_etm_auxtrace {
u64 instructions_id;
u64 **metadata;
unsigned int pmu_type;
+ enum cs_etm_pid_fmt pid_fmt;
};
struct cs_etm_traceid_queue {
u8 trace_chan_id;
- pid_t pid, tid;
u64 period_instructions;
size_t last_branch_pos;
union perf_event *event_buf;
struct thread *thread;
+ struct thread *prev_packet_thread;
+ ocsd_ex_level prev_packet_el;
+ ocsd_ex_level el;
struct branch_stack *last_branch;
struct branch_stack *last_branch_rb;
struct cs_etm_packet *prev_packet;
@@ -172,44 +172,46 @@ int cs_etm__get_cpu(u8 trace_chan_id, int *cpu)
}
/*
- * The returned PID format is presented by two bits:
+ * The returned PID format is presented as an enum:
*
- * Bit ETM_OPT_CTXTID: CONTEXTIDR or CONTEXTIDR_EL1 is traced;
- * Bit ETM_OPT_CTXTID2: CONTEXTIDR_EL2 is traced.
+ * CS_ETM_PIDFMT_CTXTID: CONTEXTIDR or CONTEXTIDR_EL1 is traced.
+ * CS_ETM_PIDFMT_CTXTID2: CONTEXTIDR_EL2 is traced.
+ * CS_ETM_PIDFMT_NONE: No context IDs
*
* It's possible that the two bits ETM_OPT_CTXTID and ETM_OPT_CTXTID2
* are enabled at the same time when the session runs on an EL2 kernel.
* This means the CONTEXTIDR_EL1 and CONTEXTIDR_EL2 both will be
* recorded in the trace data, the tool will selectively use
* CONTEXTIDR_EL2 as PID.
+ *
+ * The result is cached in etm->pid_fmt so this function only needs to be called
+ * when processing the aux info.
*/
-int cs_etm__get_pid_fmt(u8 trace_chan_id, u64 *pid_fmt)
+static enum cs_etm_pid_fmt cs_etm__init_pid_fmt(u64 *metadata)
{
- struct int_node *inode;
- u64 *metadata, val;
-
- inode = intlist__find(traceid_list, trace_chan_id);
- if (!inode)
- return -EINVAL;
-
- metadata = inode->priv;
+ u64 val;
if (metadata[CS_ETM_MAGIC] == __perf_cs_etmv3_magic) {
val = metadata[CS_ETM_ETMCR];
/* CONTEXTIDR is traced */
if (val & BIT(ETM_OPT_CTXTID))
- *pid_fmt = BIT(ETM_OPT_CTXTID);
+ return CS_ETM_PIDFMT_CTXTID;
} else {
val = metadata[CS_ETMV4_TRCCONFIGR];
/* CONTEXTIDR_EL2 is traced */
if (val & (BIT(ETM4_CFG_BIT_VMID) | BIT(ETM4_CFG_BIT_VMID_OPT)))
- *pid_fmt = BIT(ETM_OPT_CTXTID2);
+ return CS_ETM_PIDFMT_CTXTID2;
/* CONTEXTIDR_EL1 is traced */
else if (val & BIT(ETM4_CFG_BIT_CTXTID))
- *pid_fmt = BIT(ETM_OPT_CTXTID);
+ return CS_ETM_PIDFMT_CTXTID;
}
- return 0;
+ return CS_ETM_PIDFMT_NONE;
+}
+
+enum cs_etm_pid_fmt cs_etm__get_pid_fmt(struct cs_etm_queue *etmq)
+{
+ return etmq->etm->pid_fmt;
}
static int cs_etm__map_trace_id(u8 trace_chan_id, u64 *cpu_metadata)
@@ -480,9 +482,11 @@ static int cs_etm__init_traceid_queue(struct cs_etm_queue *etmq,
cs_etm__clear_packet_queue(&tidq->packet_queue);
queue = &etmq->etm->queues.queue_array[etmq->queue_nr];
- tidq->tid = queue->tid;
- tidq->pid = -1;
tidq->trace_chan_id = trace_chan_id;
+ tidq->el = tidq->prev_packet_el = ocsd_EL_unknown;
+ tidq->thread = machine__findnew_thread(&etm->session->machines.host, -1,
+ queue->tid);
+ tidq->prev_packet_thread = machine__idle_thread(&etm->session->machines.host);
tidq->packet = zalloc(sizeof(struct cs_etm_packet));
if (!tidq->packet)
@@ -615,10 +619,21 @@ static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm,
/*
* Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
* the next incoming packet.
+ *
+ * Threads and exception levels are also tracked for both the
+ * previous and current packets. This is because the previous
+ * packet is used for the 'from' IP for branch samples, so the
+ * thread at that time must also be assigned to that sample.
+ * Across discontinuity packets the thread can change, so by
+ * tracking the thread for the previous packet the branch sample
+ * will have the correct info.
*/
tmp = tidq->packet;
tidq->packet = tidq->prev_packet;
tidq->prev_packet = tmp;
+ tidq->prev_packet_el = tidq->el;
+ thread__put(tidq->prev_packet_thread);
+ tidq->prev_packet_thread = thread__get(tidq->thread);
}
}
@@ -794,6 +809,7 @@ static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq)
/* Free this traceid_queue from the array */
tidq = etmq->traceid_queues[idx];
thread__zput(tidq->thread);
+ thread__zput(tidq->prev_packet_thread);
zfree(&tidq->event_buf);
zfree(&tidq->last_branch);
zfree(&tidq->last_branch_rb);
@@ -863,7 +879,6 @@ static void cs_etm__free(struct perf_session *session)
for (i = 0; i < aux->num_cpu; i++)
zfree(&aux->metadata[i]);
- thread__zput(aux->unknown_thread);
zfree(&aux->metadata);
zfree(&aux);
}
@@ -878,11 +893,43 @@ static bool cs_etm__evsel_is_auxtrace(struct perf_session *session,
return evsel->core.attr.type == aux->pmu_type;
}
-static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address)
+static struct machine *cs_etm__get_machine(struct cs_etm_queue *etmq,
+ ocsd_ex_level el)
{
- struct machine *machine;
+ enum cs_etm_pid_fmt pid_fmt = cs_etm__get_pid_fmt(etmq);
+
+ /*
+ * For any virtualisation based on nVHE (e.g. pKVM), or host kernels
+ * running at EL1 assume everything is the host.
+ */
+ if (pid_fmt == CS_ETM_PIDFMT_CTXTID)
+ return &etmq->etm->session->machines.host;
+
+ /*
+ * Not perfect, but otherwise assume anything in EL1 is the default
+ * guest, and everything else is the host. Distinguishing between guest
+ * and host userspaces isn't currently supported either. Neither is
+ * multiple guest support. All this does is reduce the likeliness of
+ * decode errors where we look into the host kernel maps when it should
+ * have been the guest maps.
+ */
+ switch (el) {
+ case ocsd_EL1:
+ return machines__find_guest(&etmq->etm->session->machines,
+ DEFAULT_GUEST_KERNEL_ID);
+ case ocsd_EL3:
+ case ocsd_EL2:
+ case ocsd_EL0:
+ case ocsd_EL_unknown:
+ default:
+ return &etmq->etm->session->machines.host;
+ }
+}
- machine = etmq->etm->machine;
+static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address,
+ ocsd_ex_level el)
+{
+ struct machine *machine = cs_etm__get_machine(etmq, el);
if (address >= machine__kernel_start(machine)) {
if (machine__is_host(machine))
@@ -892,57 +939,74 @@ static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address)
} else {
if (machine__is_host(machine))
return PERF_RECORD_MISC_USER;
- else if (perf_guest)
+ else {
+ /*
+ * Can't really happen at the moment because
+ * cs_etm__get_machine() will always return
+ * machines.host for any non EL1 trace.
+ */
return PERF_RECORD_MISC_GUEST_USER;
- else
- return PERF_RECORD_MISC_HYPERVISOR;
+ }
}
}
static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id,
- u64 address, size_t size, u8 *buffer)
+ u64 address, size_t size, u8 *buffer,
+ const ocsd_mem_space_acc_t mem_space)
{
u8 cpumode;
u64 offset;
int len;
- struct thread *thread;
- struct machine *machine;
struct addr_location al;
struct dso *dso;
struct cs_etm_traceid_queue *tidq;
+ int ret = 0;
if (!etmq)
return 0;
- machine = etmq->etm->machine;
- cpumode = cs_etm__cpu_mode(etmq, address);
+ addr_location__init(&al);
tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
if (!tidq)
- return 0;
+ goto out;
- thread = tidq->thread;
- if (!thread) {
- if (cpumode != PERF_RECORD_MISC_KERNEL)
- return 0;
- thread = etmq->etm->unknown_thread;
+ /*
+ * We've already tracked EL along side the PID in cs_etm__set_thread()
+ * so double check that it matches what OpenCSD thinks as well. It
+ * doesn't distinguish between EL0 and EL1 for this mem access callback
+ * so we had to do the extra tracking. Skip validation if it's any of
+ * the 'any' values.
+ */
+ if (!(mem_space == OCSD_MEM_SPACE_ANY ||
+ mem_space == OCSD_MEM_SPACE_N || mem_space == OCSD_MEM_SPACE_S)) {
+ if (mem_space & OCSD_MEM_SPACE_EL1N) {
+ /* Includes both non secure EL1 and EL0 */
+ assert(tidq->el == ocsd_EL1 || tidq->el == ocsd_EL0);
+ } else if (mem_space & OCSD_MEM_SPACE_EL2)
+ assert(tidq->el == ocsd_EL2);
+ else if (mem_space & OCSD_MEM_SPACE_EL3)
+ assert(tidq->el == ocsd_EL3);
}
- if (!thread__find_map(thread, cpumode, address, &al))
- return 0;
+ cpumode = cs_etm__cpu_mode(etmq, address, tidq->el);
+
+ if (!thread__find_map(tidq->thread, cpumode, address, &al))
+ goto out;
dso = map__dso(al.map);
if (!dso)
- return 0;
+ goto out;
if (dso->data.status == DSO_DATA_STATUS_ERROR &&
dso__data_status_seen(dso, DSO_DATA_STATUS_SEEN_ITRACE))
- return 0;
+ goto out;
offset = map__map_ip(al.map, address);
map__load(al.map);
- len = dso__data_read_offset(dso, machine, offset, buffer, size);
+ len = dso__data_read_offset(dso, maps__machine(thread__maps(tidq->thread)),
+ offset, buffer, size);
if (len <= 0) {
ui__warning_once("CS ETM Trace: Missing DSO. Use 'perf archive' or debuginfod to export data from the traced system.\n"
@@ -953,10 +1017,12 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id,
dso->long_name ? dso->long_name : "Unknown");
dso->auxtrace_warned = true;
}
- return 0;
+ goto out;
}
-
- return len;
+ ret = len;
+out:
+ addr_location__exit(&al);
+ return ret;
}
static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
@@ -1172,8 +1238,8 @@ static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq,
{
u8 instrBytes[2];
- cs_etm__mem_access(etmq, trace_chan_id, addr,
- ARRAY_SIZE(instrBytes), instrBytes);
+ cs_etm__mem_access(etmq, trace_chan_id, addr, ARRAY_SIZE(instrBytes),
+ instrBytes, 0);
/*
* T32 instruction size is indicated by bits[15:11] of the first
* 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111
@@ -1303,39 +1369,34 @@ cs_etm__get_trace(struct cs_etm_queue *etmq)
return etmq->buf_len;
}
-static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm,
- struct cs_etm_traceid_queue *tidq)
+static void cs_etm__set_thread(struct cs_etm_queue *etmq,
+ struct cs_etm_traceid_queue *tidq, pid_t tid,
+ ocsd_ex_level el)
{
- if ((!tidq->thread) && (tidq->tid != -1))
- tidq->thread = machine__find_thread(etm->machine, -1,
- tidq->tid);
+ struct machine *machine = cs_etm__get_machine(etmq, el);
- if (tidq->thread)
- tidq->pid = tidq->thread->pid_;
+ if (tid != -1) {
+ thread__zput(tidq->thread);
+ tidq->thread = machine__find_thread(machine, -1, tid);
+ }
+
+ /* Couldn't find a known thread */
+ if (!tidq->thread)
+ tidq->thread = machine__idle_thread(machine);
+
+ tidq->el = el;
}
-int cs_etm__etmq_set_tid(struct cs_etm_queue *etmq,
- pid_t tid, u8 trace_chan_id)
+int cs_etm__etmq_set_tid_el(struct cs_etm_queue *etmq, pid_t tid,
+ u8 trace_chan_id, ocsd_ex_level el)
{
- int cpu, err = -EINVAL;
- struct cs_etm_auxtrace *etm = etmq->etm;
struct cs_etm_traceid_queue *tidq;
tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
if (!tidq)
- return err;
-
- if (cs_etm__get_cpu(trace_chan_id, &cpu) < 0)
- return err;
-
- err = machine__set_current_tid(etm->machine, cpu, tid, tid);
- if (err)
- return err;
-
- tidq->tid = tid;
- thread__zput(tidq->thread);
+ return -EINVAL;
- cs_etm__set_pid_tid_cpu(etm, tidq);
+ cs_etm__set_thread(etmq, tidq, tid, el);
return 0;
}
@@ -1369,8 +1430,8 @@ static void cs_etm__copy_insn(struct cs_etm_queue *etmq,
else
sample->insn_len = 4;
- cs_etm__mem_access(etmq, trace_chan_id, sample->ip,
- sample->insn_len, (void *)sample->insn);
+ cs_etm__mem_access(etmq, trace_chan_id, sample->ip, sample->insn_len,
+ (void *)sample->insn, 0);
}
u64 cs_etm__convert_sample_time(struct cs_etm_queue *etmq, u64 cs_timestamp)
@@ -1405,15 +1466,15 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
struct perf_sample sample = {.ip = 0,};
event->sample.header.type = PERF_RECORD_SAMPLE;
- event->sample.header.misc = cs_etm__cpu_mode(etmq, addr);
+ event->sample.header.misc = cs_etm__cpu_mode(etmq, addr, tidq->el);
event->sample.header.size = sizeof(struct perf_event_header);
/* Set time field based on etm auxtrace config. */
sample.time = cs_etm__resolve_sample_time(etmq, tidq);
sample.ip = addr;
- sample.pid = tidq->pid;
- sample.tid = tidq->tid;
+ sample.pid = thread__pid(tidq->thread);
+ sample.tid = thread__tid(tidq->thread);
sample.id = etmq->etm->instructions_id;
sample.stream_id = etmq->etm->instructions_id;
sample.period = period;
@@ -1464,15 +1525,16 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
ip = cs_etm__last_executed_instr(tidq->prev_packet);
event->sample.header.type = PERF_RECORD_SAMPLE;
- event->sample.header.misc = cs_etm__cpu_mode(etmq, ip);
+ event->sample.header.misc = cs_etm__cpu_mode(etmq, ip,
+ tidq->prev_packet_el);
event->sample.header.size = sizeof(struct perf_event_header);
/* Set time field based on etm auxtrace config. */
sample.time = cs_etm__resolve_sample_time(etmq, tidq);
sample.ip = ip;
- sample.pid = tidq->pid;
- sample.tid = tidq->tid;
+ sample.pid = thread__pid(tidq->prev_packet_thread);
+ sample.tid = thread__tid(tidq->prev_packet_thread);
sample.addr = cs_etm__first_executed_instr(tidq->packet);
sample.id = etmq->etm->branches_id;
sample.stream_id = etmq->etm->branches_id;
@@ -1922,8 +1984,8 @@ static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, u8 trace_chan_id,
* so below only read 2 bytes as instruction size for T32.
*/
addr = end_addr - 2;
- cs_etm__mem_access(etmq, trace_chan_id, addr,
- sizeof(instr16), (u8 *)&instr16);
+ cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr16),
+ (u8 *)&instr16, 0);
if ((instr16 & 0xFF00) == 0xDF00)
return true;
@@ -1938,8 +2000,8 @@ static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, u8 trace_chan_id,
* +---------+---------+-------------------------+
*/
addr = end_addr - 4;
- cs_etm__mem_access(etmq, trace_chan_id, addr,
- sizeof(instr32), (u8 *)&instr32);
+ cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32),
+ (u8 *)&instr32, 0);
if ((instr32 & 0x0F000000) == 0x0F000000 &&
(instr32 & 0xF0000000) != 0xF0000000)
return true;
@@ -1955,8 +2017,8 @@ static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, u8 trace_chan_id,
* +-----------------------+---------+-----------+
*/
addr = end_addr - 4;
- cs_etm__mem_access(etmq, trace_chan_id, addr,
- sizeof(instr32), (u8 *)&instr32);
+ cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32),
+ (u8 *)&instr32, 0);
if ((instr32 & 0xFFE0001F) == 0xd4000001)
return true;
@@ -2466,11 +2528,6 @@ static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
if (!etmq)
continue;
- /*
- * Per-cpu mode has contextIDs in the trace and the decoder
- * calls cs_etm__set_pid_tid_cpu() automatically so no need
- * to do this here
- */
if (etm->per_thread_decoding) {
tidq = cs_etm__etmq_get_traceid_queue(
etmq, CS_ETM_PER_THREAD_TRACEID);
@@ -2478,10 +2535,8 @@ static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
if (!tidq)
continue;
- if ((tid == -1) || (tidq->tid == tid)) {
- cs_etm__set_pid_tid_cpu(etm, tidq);
+ if (tid == -1 || thread__tid(tidq->thread) == tid)
cs_etm__run_per_thread_timeless_decoder(etmq);
- }
} else
cs_etm__run_per_cpu_timeless_decoder(etmq);
}
@@ -2611,10 +2666,12 @@ static int cs_etm__process_itrace_start(struct cs_etm_auxtrace *etm,
return 0;
/*
- * Add the tid/pid to the log so that we can get a match when
- * we get a contextID from the decoder.
+ * Add the tid/pid to the log so that we can get a match when we get a
+ * contextID from the decoder. Only track for the host: only kernel
+ * trace is supported for guests which wouldn't need pids so this should
+ * be fine.
*/
- th = machine__findnew_thread(etm->machine,
+ th = machine__findnew_thread(&etm->session->machines.host,
event->itrace_start.pid,
event->itrace_start.tid);
if (!th)
@@ -2647,10 +2704,12 @@ static int cs_etm__process_switch_cpu_wide(struct cs_etm_auxtrace *etm,
return 0;
/*
- * Add the tid/pid to the log so that we can get a match when
- * we get a contextID from the decoder.
+ * Add the tid/pid to the log so that we can get a match when we get a
+ * contextID from the decoder. Only track for the host: only kernel
+ * trace is supported for guests which wouldn't need pids so this should
+ * be fine.
*/
- th = machine__findnew_thread(etm->machine,
+ th = machine__findnew_thread(&etm->session->machines.host,
event->context_switch.next_prev_pid,
event->context_switch.next_prev_tid);
if (!th)
@@ -3246,6 +3305,13 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
goto err_free_metadata;
}
+ /*
+ * As all the ETMs run at the same exception level, the system should
+ * have the same PID format crossing CPUs. So cache the PID format
+ * and reuse it for sequential decoding.
+ */
+ etm->pid_fmt = cs_etm__init_pid_fmt(metadata[0]);
+
err = auxtrace_queues__init(&etm->queues);
if (err)
goto err_free_etm;
@@ -3259,7 +3325,6 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
}
etm->session = session;
- etm->machine = &session->machines.host;
etm->num_cpu = num_cpu;
etm->pmu_type = (unsigned int) ((ptr[CS_PMU_TYPE_CPUS] >> 32) & 0xffffffff);
@@ -3286,27 +3351,6 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
if (err)
return err;
- etm->unknown_thread = thread__new(999999999, 999999999);
- if (!etm->unknown_thread) {
- err = -ENOMEM;
- goto err_free_queues;
- }
-
- /*
- * Initialize list node so that at thread__zput() we can avoid
- * segmentation fault at list_del_init().
- */
- INIT_LIST_HEAD(&etm->unknown_thread->node);
-
- err = thread__set_comm(etm->unknown_thread, "unknown", 0);
- if (err)
- goto err_delete_thread;
-
- if (thread__init_maps(etm->unknown_thread, etm->machine)) {
- err = -ENOMEM;
- goto err_delete_thread;
- }
-
etm->tc.time_shift = tc->time_shift;
etm->tc.time_mult = tc->time_mult;
etm->tc.time_zero = tc->time_zero;
@@ -3318,7 +3362,7 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
}
err = cs_etm__synth_events(etm, session);
if (err)
- goto err_delete_thread;
+ goto err_free_queues;
/*
* Map Trace ID values to CPU metadata.
@@ -3348,7 +3392,7 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
session->header.data_size,
cs_etm__process_aux_hw_id_cb, &aux_hw_id_found);
if (err)
- goto err_delete_thread;
+ goto err_free_queues;
/* if HW ID found then clear any unused metadata ID values */
if (aux_hw_id_found)
@@ -3358,17 +3402,15 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
err = cs_etm__map_trace_ids_metadata(num_cpu, metadata);
if (err)
- goto err_delete_thread;
+ goto err_free_queues;
err = cs_etm__queue_aux_records(session);
if (err)
- goto err_delete_thread;
+ goto err_free_queues;
etm->data_queued = etm->queues.populated;
return 0;
-err_delete_thread:
- thread__zput(etm->unknown_thread);
err_free_queues:
auxtrace_queues__free(&etm->queues);
session->auxtrace = NULL;
diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h
index ecca40787ac9..7cca37887917 100644
--- a/tools/perf/util/cs-etm.h
+++ b/tools/perf/util/cs-etm.h
@@ -244,11 +244,18 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
struct perf_session *session);
struct perf_event_attr *cs_etm_get_default_config(struct perf_pmu *pmu);
+enum cs_etm_pid_fmt {
+ CS_ETM_PIDFMT_NONE,
+ CS_ETM_PIDFMT_CTXTID,
+ CS_ETM_PIDFMT_CTXTID2
+};
+
#ifdef HAVE_CSTRACE_SUPPORT
+#include <opencsd/ocsd_if_types.h>
int cs_etm__get_cpu(u8 trace_chan_id, int *cpu);
-int cs_etm__get_pid_fmt(u8 trace_chan_id, u64 *pid_fmt);
-int cs_etm__etmq_set_tid(struct cs_etm_queue *etmq,
- pid_t tid, u8 trace_chan_id);
+enum cs_etm_pid_fmt cs_etm__get_pid_fmt(struct cs_etm_queue *etmq);
+int cs_etm__etmq_set_tid_el(struct cs_etm_queue *etmq, pid_t tid,
+ u8 trace_chan_id, ocsd_ex_level el);
bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq);
void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq,
u8 trace_chan_id);
diff --git a/tools/perf/util/data-convert-json.c b/tools/perf/util/data-convert-json.c
index 653709ab867a..5bb3c2ba95ca 100644
--- a/tools/perf/util/data-convert-json.c
+++ b/tools/perf/util/data-convert-json.c
@@ -154,12 +154,14 @@ static int process_sample_event(struct perf_tool *tool,
{
struct convert_json *c = container_of(tool, struct convert_json, tool);
FILE *out = c->out;
- struct addr_location al, tal;
+ struct addr_location al;
u64 sample_type = __evlist__combined_sample_type(evsel->evlist);
u8 cpumode = PERF_RECORD_MISC_USER;
+ addr_location__init(&al);
if (machine__resolve(machine, &al, sample) < 0) {
pr_err("Sample resolution failed!\n");
+ addr_location__exit(&al);
return -1;
}
@@ -172,13 +174,13 @@ static int process_sample_event(struct perf_tool *tool,
output_json_format(out, false, 2, "{");
output_json_key_format(out, false, 3, "timestamp", "%" PRIi64, sample->time);
- output_json_key_format(out, true, 3, "pid", "%i", al.thread->pid_);
- output_json_key_format(out, true, 3, "tid", "%i", al.thread->tid);
+ output_json_key_format(out, true, 3, "pid", "%i", thread__pid(al.thread));
+ output_json_key_format(out, true, 3, "tid", "%i", thread__tid(al.thread));
if ((sample_type & PERF_SAMPLE_CPU))
output_json_key_format(out, true, 3, "cpu", "%i", sample->cpu);
- else if (al.thread->cpu >= 0)
- output_json_key_format(out, true, 3, "cpu", "%i", al.thread->cpu);
+ else if (thread__cpu(al.thread) >= 0)
+ output_json_key_format(out, true, 3, "cpu", "%i", thread__cpu(al.thread));
output_json_key_string(out, true, 3, "comm", thread__comm_str(al.thread));
@@ -190,6 +192,7 @@ static int process_sample_event(struct perf_tool *tool,
for (i = 0; i < sample->callchain->nr; ++i) {
u64 ip = sample->callchain->ips[i];
+ struct addr_location tal;
if (ip >= PERF_CONTEXT_MAX) {
switch (ip) {
@@ -215,8 +218,10 @@ static int process_sample_event(struct perf_tool *tool,
else
fputc(',', out);
+ addr_location__init(&tal);
ok = thread__find_symbol(al.thread, cpumode, ip, &tal);
output_sample_callchain_entry(tool, ip, ok ? &tal : NULL);
+ addr_location__exit(&tal);
}
} else {
output_sample_callchain_entry(tool, sample->ip, &al);
@@ -245,6 +250,7 @@ static int process_sample_event(struct perf_tool *tool,
}
#endif
output_json_format(out, false, 2, "}");
+ addr_location__exit(&al);
return 0;
}
diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c
index 84c970c11794..b9fb71ab7a73 100644
--- a/tools/perf/util/db-export.c
+++ b/tools/perf/util/db-export.c
@@ -64,13 +64,13 @@ int db_export__thread(struct db_export *dbe, struct thread *thread,
{
u64 main_thread_db_id = 0;
- if (thread->db_id)
+ if (thread__db_id(thread))
return 0;
- thread->db_id = ++dbe->thread_last_db_id;
+ thread__set_db_id(thread, ++dbe->thread_last_db_id);
if (main_thread)
- main_thread_db_id = main_thread->db_id;
+ main_thread_db_id = thread__db_id(main_thread);
if (dbe->export_thread)
return dbe->export_thread(dbe, thread, main_thread_db_id,
@@ -215,6 +215,7 @@ static struct call_path *call_path_from_sample(struct db_export *dbe,
u64 kernel_start = machine__kernel_start(machine);
struct call_path *current = &dbe->cpr->call_path;
enum chain_order saved_order = callchain_param.order;
+ struct callchain_cursor *cursor;
int err;
if (!symbol_conf.use_callchain || !sample->callchain)
@@ -226,32 +227,34 @@ static struct call_path *call_path_from_sample(struct db_export *dbe,
* the callchain starting with the root node and ending with the leaf.
*/
callchain_param.order = ORDER_CALLER;
- err = thread__resolve_callchain(thread, &callchain_cursor, evsel,
+ cursor = get_tls_callchain_cursor();
+ err = thread__resolve_callchain(thread, cursor, evsel,
sample, NULL, NULL, PERF_MAX_STACK_DEPTH);
if (err) {
callchain_param.order = saved_order;
return NULL;
}
- callchain_cursor_commit(&callchain_cursor);
+ callchain_cursor_commit(cursor);
while (1) {
struct callchain_cursor_node *node;
struct addr_location al;
u64 dso_db_id = 0, sym_db_id = 0, offset = 0;
- memset(&al, 0, sizeof(al));
- node = callchain_cursor_current(&callchain_cursor);
+ node = callchain_cursor_current(cursor);
if (!node)
break;
+
/*
* Handle export of symbol and dso for this node by
* constructing an addr_location struct and then passing it to
* db_ids_from_al() to perform the export.
*/
+ addr_location__init(&al);
al.sym = node->ms.sym;
al.map = node->ms.map;
- al.maps = thread->maps;
+ al.maps = thread__maps(thread);
al.addr = node->ip;
if (al.map && !al.sym)
@@ -264,7 +267,8 @@ static struct call_path *call_path_from_sample(struct db_export *dbe,
al.sym, node->ip,
kernel_start);
- callchain_cursor_advance(&callchain_cursor);
+ callchain_cursor_advance(cursor);
+ addr_location__exit(&al);
}
/* Reset the callchain order to its prior value. */
@@ -321,7 +325,7 @@ static int db_export__threads(struct db_export *dbe, struct thread *thread,
* For a non-main thread, db_export__comm_thread() must be
* called only if thread has not previously been exported.
*/
- bool export_comm_thread = comm && !thread->db_id;
+ bool export_comm_thread = comm && !thread__db_id(thread);
err = db_export__thread(dbe, thread, machine, main_thread);
if (err)
@@ -529,16 +533,16 @@ static int db_export__pid_tid(struct db_export *dbe, struct machine *machine,
struct thread *main_thread;
int err = 0;
- if (!thread || !thread->comm_set)
+ if (!thread || !thread__comm_set(thread))
goto out_put;
- *is_idle = !thread->pid_ && !thread->tid;
+ *is_idle = !thread__pid(thread) && !thread__tid(thread);
main_thread = thread__main_thread(machine, thread);
err = db_export__threads(dbe, thread, main_thread, machine, comm_ptr);
- *db_id = thread->db_id;
+ *db_id = thread__db_id(thread);
thread__put(main_thread);
out_put:
diff --git a/tools/perf/util/dlfilter.c b/tools/perf/util/dlfilter.c
index 16238f823a5e..e0f822ebb9b9 100644
--- a/tools/perf/util/dlfilter.c
+++ b/tools/perf/util/dlfilter.c
@@ -10,6 +10,8 @@
#include <subcmd/exec-cmd.h>
#include <linux/zalloc.h>
#include <linux/build_bug.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
#include "debug.h"
#include "event.h"
@@ -63,6 +65,7 @@ static void al_to_d_al(struct addr_location *al, struct perf_dlfilter_al *d_al)
d_al->addr = al->addr;
d_al->comm = NULL;
d_al->filtered = 0;
+ d_al->priv = NULL;
}
static struct addr_location *get_al(struct dlfilter *d)
@@ -151,6 +154,11 @@ static char **dlfilter__args(void *ctx, int *dlargc)
return d->dlargv;
}
+static bool has_priv(struct perf_dlfilter_al *d_al_p)
+{
+ return d_al_p->size >= offsetof(struct perf_dlfilter_al, priv) + sizeof(d_al_p->priv);
+}
+
static __s32 dlfilter__resolve_address(void *ctx, __u64 address, struct perf_dlfilter_al *d_al_p)
{
struct dlfilter *d = (struct dlfilter *)ctx;
@@ -166,6 +174,7 @@ static __s32 dlfilter__resolve_address(void *ctx, __u64 address, struct perf_dlf
if (!thread)
return -1;
+ addr_location__init(&al);
thread__find_symbol_fb(thread, d->sample->cpumode, address, &al);
al_to_d_al(&al, &d_al);
@@ -176,9 +185,29 @@ static __s32 dlfilter__resolve_address(void *ctx, __u64 address, struct perf_dlf
memcpy(d_al_p, &d_al, min((size_t)sz, sizeof(d_al)));
d_al_p->size = sz;
+ if (has_priv(d_al_p))
+ d_al_p->priv = memdup(&al, sizeof(al));
+
return 0;
}
+static void dlfilter__al_cleanup(void *ctx __maybe_unused, struct perf_dlfilter_al *d_al_p)
+{
+ struct addr_location *al;
+
+ /* Ensure backward compatibility */
+ if (!has_priv(d_al_p) || !d_al_p->priv)
+ return;
+
+ al = d_al_p->priv;
+
+ d_al_p->priv = NULL;
+
+ addr_location__exit(al);
+
+ free(al);
+}
+
static const __u8 *dlfilter__insn(void *ctx, __u32 *len)
{
struct dlfilter *d = (struct dlfilter *)ctx;
@@ -197,8 +226,8 @@ static const __u8 *dlfilter__insn(void *ctx, __u32 *len)
if (!al->thread && machine__resolve(d->machine, al, d->sample) < 0)
return NULL;
- if (al->thread->maps) {
- struct machine *machine = maps__machine(al->thread->maps);
+ if (thread__maps(al->thread)) {
+ struct machine *machine = maps__machine(thread__maps(al->thread));
if (machine)
script_fetch_insn(d->sample, al->thread, machine);
@@ -258,6 +287,7 @@ static __s32 dlfilter__object_code(void *ctx, __u64 ip, void *buf, __u32 len)
struct addr_location a;
struct map *map;
u64 offset;
+ __s32 ret;
if (!d->ctx_valid)
return -1;
@@ -272,16 +302,22 @@ static __s32 dlfilter__object_code(void *ctx, __u64 ip, void *buf, __u32 len)
machine__kernel_ip(d->machine, ip) == machine__kernel_ip(d->machine, d->sample->ip))
goto have_map;
+ addr_location__init(&a);
thread__find_map_fb(al->thread, d->sample->cpumode, ip, &a);
- if (!a.map)
- return -1;
+ if (!a.map) {
+ ret = -1;
+ goto out;
+ }
map = a.map;
have_map:
offset = map__map_ip(map, ip);
if (ip + len >= map__end(map))
len = map__end(map) - ip;
- return dso__data_read_offset(map__dso(map), d->machine, offset, buf, len);
+ ret = dso__data_read_offset(map__dso(map), d->machine, offset, buf, len);
+out:
+ addr_location__exit(&a);
+ return ret;
}
static const struct perf_dlfilter_fns perf_dlfilter_fns = {
@@ -289,6 +325,7 @@ static const struct perf_dlfilter_fns perf_dlfilter_fns = {
.resolve_addr = dlfilter__resolve_addr,
.args = dlfilter__args,
.resolve_address = dlfilter__resolve_address,
+ .al_cleanup = dlfilter__al_cleanup,
.insn = dlfilter__insn,
.srcline = dlfilter__srcline,
.attr = dlfilter__attr,
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index a86614599269..bdfead36b83a 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -67,6 +67,39 @@ char dso__symtab_origin(const struct dso *dso)
return origin[dso->symtab_type];
}
+bool dso__is_object_file(const struct dso *dso)
+{
+ switch (dso->binary_type) {
+ case DSO_BINARY_TYPE__KALLSYMS:
+ case DSO_BINARY_TYPE__GUEST_KALLSYMS:
+ case DSO_BINARY_TYPE__JAVA_JIT:
+ case DSO_BINARY_TYPE__BPF_PROG_INFO:
+ case DSO_BINARY_TYPE__BPF_IMAGE:
+ case DSO_BINARY_TYPE__OOL:
+ return false;
+ case DSO_BINARY_TYPE__VMLINUX:
+ case DSO_BINARY_TYPE__GUEST_VMLINUX:
+ case DSO_BINARY_TYPE__DEBUGLINK:
+ case DSO_BINARY_TYPE__BUILD_ID_CACHE:
+ case DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO:
+ case DSO_BINARY_TYPE__FEDORA_DEBUGINFO:
+ case DSO_BINARY_TYPE__UBUNTU_DEBUGINFO:
+ case DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO:
+ case DSO_BINARY_TYPE__BUILDID_DEBUGINFO:
+ case DSO_BINARY_TYPE__SYSTEM_PATH_DSO:
+ case DSO_BINARY_TYPE__GUEST_KMODULE:
+ case DSO_BINARY_TYPE__GUEST_KMODULE_COMP:
+ case DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE:
+ case DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP:
+ case DSO_BINARY_TYPE__KCORE:
+ case DSO_BINARY_TYPE__GUEST_KCORE:
+ case DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO:
+ case DSO_BINARY_TYPE__NOT_FOUND:
+ default:
+ return true;
+ }
+}
+
int dso__read_binary_type_filename(const struct dso *dso,
enum dso_binary_type type,
char *root_dir, char *filename, size_t size)
@@ -1287,7 +1320,9 @@ struct dso *dso__new_id(const char *name, struct dso_id *id)
dso->id = *id;
dso__set_long_name_id(dso, dso->name, id, false);
dso__set_short_name(dso, dso->name, false);
- dso->symbols = dso->symbol_names = RB_ROOT_CACHED;
+ dso->symbols = RB_ROOT_CACHED;
+ dso->symbol_names = NULL;
+ dso->symbol_names_len = 0;
dso->data.cache = RB_ROOT;
dso->inlined_nodes = RB_ROOT_CACHED;
dso->srclines = RB_ROOT_CACHED;
@@ -1331,7 +1366,8 @@ void dso__delete(struct dso *dso)
inlines__tree_delete(&dso->inlined_nodes);
srcline__tree_delete(&dso->srclines);
symbols__delete(&dso->symbols);
-
+ dso->symbol_names_len = 0;
+ zfree(&dso->symbol_names);
if (dso->short_name_allocated) {
zfree((char **)&dso->short_name);
dso->short_name_allocated = false;
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index 0b7c7633b9f6..b41c9782c754 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -150,7 +150,8 @@ struct dso {
struct rb_node rb_node; /* rbtree node sorted by long name */
struct rb_root *root; /* root of rbtree that rb_node is in */
struct rb_root_cached symbols;
- struct rb_root_cached symbol_names;
+ struct symbol **symbol_names;
+ size_t symbol_names_len;
struct rb_root_cached inlined_nodes;
struct rb_root_cached srclines;
struct {
@@ -379,23 +380,25 @@ void dso__reset_find_symbol_cache(struct dso *dso);
size_t dso__fprintf_symbols_by_name(struct dso *dso, FILE *fp);
size_t dso__fprintf(struct dso *dso, FILE *fp);
-static inline bool dso__is_vmlinux(struct dso *dso)
+static inline bool dso__is_vmlinux(const struct dso *dso)
{
return dso->binary_type == DSO_BINARY_TYPE__VMLINUX ||
dso->binary_type == DSO_BINARY_TYPE__GUEST_VMLINUX;
}
-static inline bool dso__is_kcore(struct dso *dso)
+static inline bool dso__is_kcore(const struct dso *dso)
{
return dso->binary_type == DSO_BINARY_TYPE__KCORE ||
dso->binary_type == DSO_BINARY_TYPE__GUEST_KCORE;
}
-static inline bool dso__is_kallsyms(struct dso *dso)
+static inline bool dso__is_kallsyms(const struct dso *dso)
{
return dso->kernel && dso->long_name[0] != '/';
}
+bool dso__is_object_file(const struct dso *dso);
+
void dso__free_a2l(struct dso *dso);
enum dso_type dso__type(struct dso *dso, struct machine *machine);
diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
index b07414409771..2941d88f2199 100644
--- a/tools/perf/util/dwarf-aux.c
+++ b/tools/perf/util/dwarf-aux.c
@@ -478,8 +478,10 @@ static const char *die_get_file_name(Dwarf_Die *dw_die, int idx)
{
Dwarf_Die cu_die;
Dwarf_Files *files;
+ Dwarf_Attribute attr_mem;
- if (idx < 0 || !dwarf_diecu(dw_die, &cu_die, NULL, NULL) ||
+ if (idx < 0 || !dwarf_attr_integrate(dw_die, DW_AT_decl_file, &attr_mem) ||
+ !dwarf_cu_die(attr_mem.cu, &cu_die, NULL, NULL, NULL, NULL, NULL, NULL) ||
dwarf_getsrcfiles(&cu_die, &files, NULL) != 0)
return NULL;
@@ -1074,16 +1076,18 @@ int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf)
/* Function pointer */
return strbuf_add(buf, "(function_type)", 15);
} else {
- if (!dwarf_diename(&type))
- return -ENOENT;
+ const char *name = dwarf_diename(&type);
+
if (tag == DW_TAG_union_type)
tmp = "union ";
else if (tag == DW_TAG_structure_type)
tmp = "struct ";
else if (tag == DW_TAG_enumeration_type)
tmp = "enum ";
+ else if (name == NULL)
+ return -ENOENT;
/* Write a base name */
- return strbuf_addf(buf, "%s%s", tmp, dwarf_diename(&type));
+ return strbuf_addf(buf, "%s%s", tmp, name ?: "");
}
ret = die_get_typename(&type, buf);
return ret ? ret : strbuf_addstr(buf, tmp);
@@ -1103,7 +1107,7 @@ int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf)
ret = die_get_typename(vr_die, buf);
if (ret < 0) {
pr_debug("Failed to get type, make it unknown.\n");
- ret = strbuf_add(buf, " (unknown_type)", 14);
+ ret = strbuf_add(buf, "(unknown_type)", 14);
}
return ret < 0 ? ret : strbuf_addf(buf, "\t%s", dwarf_diename(vr_die));
diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index 4a4fdad820d6..9eabf3ec56e9 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -10,6 +10,7 @@
#include <sys/utsname.h>
#include <stdlib.h>
#include <string.h>
+#include "pmus.h"
#include "strbuf.h"
struct perf_env perf_env;
@@ -323,7 +324,7 @@ int perf_env__read_pmu_mappings(struct perf_env *env)
u32 pmu_num = 0;
struct strbuf sb;
- while ((pmu = perf_pmu__scan(pmu))) {
+ while ((pmu = perf_pmus__scan(pmu))) {
if (!pmu->name)
continue;
pmu_num++;
@@ -337,7 +338,7 @@ int perf_env__read_pmu_mappings(struct perf_env *env)
if (strbuf_init(&sb, 128 * pmu_num) < 0)
return -ENOMEM;
- while ((pmu = perf_pmu__scan(pmu))) {
+ while ((pmu = perf_pmus__scan(pmu))) {
if (!pmu->name)
continue;
if (strbuf_addf(&sb, "%u:%s", pmu->type, pmu->name) < 0)
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 8ae742e32e3c..4cbb092e0684 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -135,9 +135,10 @@ void perf_event__read_stat_config(struct perf_stat_config *config,
config->__val = event->data[i].val; \
break;
- CASE(AGGR_MODE, aggr_mode)
- CASE(SCALE, scale)
- CASE(INTERVAL, interval)
+ CASE(AGGR_MODE, aggr_mode)
+ CASE(SCALE, scale)
+ CASE(INTERVAL, interval)
+ CASE(AGGR_LEVEL, aggr_level)
#undef CASE
default:
pr_warning("unknown stat config term %" PRI_lu64 "\n",
@@ -485,6 +486,7 @@ size_t perf_event__fprintf_text_poke(union perf_event *event, struct machine *ma
if (machine) {
struct addr_location al;
+ addr_location__init(&al);
al.map = map__get(maps__find(machine__kernel_maps(machine), tp->addr));
if (al.map && map__load(al.map) >= 0) {
al.addr = map__map_ip(al.map, tp->addr);
@@ -492,7 +494,7 @@ size_t perf_event__fprintf_text_poke(union perf_event *event, struct machine *ma
if (al.sym)
ret += symbol__fprintf_symname_offs(al.sym, &al, fp);
}
- map__put(al.map);
+ addr_location__exit(&al);
}
ret += fprintf(fp, " old len %u new len %u\n", tp->old_len, tp->new_len);
old = true;
@@ -572,36 +574,36 @@ int perf_event__process(struct perf_tool *tool __maybe_unused,
struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr,
struct addr_location *al)
{
- struct maps *maps = thread->maps;
+ struct maps *maps = thread__maps(thread);
struct machine *machine = maps__machine(maps);
bool load_map = false;
- al->maps = maps;
- al->thread = thread;
+ maps__zput(al->maps);
+ map__zput(al->map);
+ thread__zput(al->thread);
+ al->thread = thread__get(thread);
+
al->addr = addr;
al->cpumode = cpumode;
al->filtered = 0;
- if (machine == NULL) {
- al->map = NULL;
+ if (machine == NULL)
return NULL;
- }
if (cpumode == PERF_RECORD_MISC_KERNEL && perf_host) {
al->level = 'k';
- al->maps = maps = machine__kernel_maps(machine);
+ maps = machine__kernel_maps(machine);
load_map = true;
} else if (cpumode == PERF_RECORD_MISC_USER && perf_host) {
al->level = '.';
} else if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL && perf_guest) {
al->level = 'g';
- al->maps = maps = machine__kernel_maps(machine);
+ maps = machine__kernel_maps(machine);
load_map = true;
} else if (cpumode == PERF_RECORD_MISC_GUEST_USER && perf_guest) {
al->level = 'u';
} else {
al->level = 'H';
- al->map = NULL;
if ((cpumode == PERF_RECORD_MISC_GUEST_USER ||
cpumode == PERF_RECORD_MISC_GUEST_KERNEL) &&
@@ -614,7 +616,7 @@ struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr,
return NULL;
}
-
+ al->maps = maps__get(maps);
al->map = map__get(maps__find(maps, al->addr));
if (al->map != NULL) {
/*
@@ -638,7 +640,7 @@ struct map *thread__find_map_fb(struct thread *thread, u8 cpumode, u64 addr,
struct addr_location *al)
{
struct map *map = thread__find_map(thread, cpumode, addr, al);
- struct machine *machine = maps__machine(thread->maps);
+ struct machine *machine = maps__machine(thread__maps(thread));
u8 addr_cpumode = machine__addr_cpumode(machine, cpumode, addr);
if (map || addr_cpumode == cpumode)
@@ -695,7 +697,7 @@ int machine__resolve(struct machine *machine, struct addr_location *al,
if (thread == NULL)
return -1;
- dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread), thread->tid);
+ dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread), thread__tid(thread));
thread__find_map(thread, sample->cpumode, sample->ip, al);
dso = al->map ? map__dso(al->map) : NULL;
dump_printf(" ...... dso: %s\n",
@@ -706,6 +708,9 @@ int machine__resolve(struct machine *machine, struct addr_location *al,
if (thread__is_filtered(thread))
al->filtered |= (1 << HIST_FILTER__THREAD);
+ thread__put(thread);
+ thread = NULL;
+
al->sym = NULL;
al->cpu = sample->cpu;
al->socket = -1;
@@ -766,18 +771,6 @@ int machine__resolve(struct machine *machine, struct addr_location *al,
return 0;
}
-/*
- * The preprocess_sample method will return with reference counts for the
- * in it, when done using (and perhaps getting ref counts if needing to
- * keep a pointer to one of those entries) it must be paired with
- * addr_location__put(), so that the refcounts can be decremented.
- */
-void addr_location__put(struct addr_location *al)
-{
- map__zput(al->map);
- thread__zput(al->thread);
-}
-
bool is_bts_event(struct perf_event_attr *attr)
{
return attr->type == PERF_TYPE_HARDWARE &&
diff --git a/tools/perf/util/evlist-hybrid.c b/tools/perf/util/evlist-hybrid.c
deleted file mode 100644
index 57f02beef023..000000000000
--- a/tools/perf/util/evlist-hybrid.c
+++ /dev/null
@@ -1,162 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-#include <errno.h>
-#include <inttypes.h>
-#include "cpumap.h"
-#include "evlist.h"
-#include "evsel.h"
-#include "../perf.h"
-#include "util/pmu-hybrid.h"
-#include "util/evlist-hybrid.h"
-#include "debug.h"
-#include <unistd.h>
-#include <stdlib.h>
-#include <linux/err.h>
-#include <linux/string.h>
-#include <perf/evlist.h>
-#include <perf/evsel.h>
-#include <perf/cpumap.h>
-
-int evlist__add_default_hybrid(struct evlist *evlist, bool precise)
-{
- struct evsel *evsel;
- struct perf_pmu *pmu;
- __u64 config;
- struct perf_cpu_map *cpus;
-
- perf_pmu__for_each_hybrid_pmu(pmu) {
- config = PERF_COUNT_HW_CPU_CYCLES |
- ((__u64)pmu->type << PERF_PMU_TYPE_SHIFT);
- evsel = evsel__new_cycles(precise, PERF_TYPE_HARDWARE,
- config);
- if (!evsel)
- return -ENOMEM;
-
- cpus = perf_cpu_map__get(pmu->cpus);
- evsel->core.cpus = cpus;
- evsel->core.own_cpus = perf_cpu_map__get(cpus);
- evsel->pmu_name = strdup(pmu->name);
- evlist__add(evlist, evsel);
- }
-
- return 0;
-}
-
-static bool group_hybrid_conflict(struct evsel *leader)
-{
- struct evsel *pos, *prev = NULL;
-
- for_each_group_evsel(pos, leader) {
- if (!evsel__is_hybrid(pos))
- continue;
-
- if (prev && strcmp(prev->pmu_name, pos->pmu_name))
- return true;
-
- prev = pos;
- }
-
- return false;
-}
-
-void evlist__warn_hybrid_group(struct evlist *evlist)
-{
- struct evsel *evsel;
-
- evlist__for_each_entry(evlist, evsel) {
- if (evsel__is_group_leader(evsel) &&
- evsel->core.nr_members > 1 &&
- group_hybrid_conflict(evsel)) {
- pr_warning("WARNING: events in group from "
- "different hybrid PMUs!\n");
- return;
- }
- }
-}
-
-bool evlist__has_hybrid(struct evlist *evlist)
-{
- struct evsel *evsel;
-
- evlist__for_each_entry(evlist, evsel) {
- if (evsel->pmu_name &&
- perf_pmu__is_hybrid(evsel->pmu_name)) {
- return true;
- }
- }
-
- return false;
-}
-
-int evlist__fix_hybrid_cpus(struct evlist *evlist, const char *cpu_list)
-{
- struct perf_cpu_map *cpus;
- struct evsel *evsel, *tmp;
- struct perf_pmu *pmu;
- int ret, unmatched_count = 0, events_nr = 0;
-
- if (!perf_pmu__has_hybrid() || !cpu_list)
- return 0;
-
- cpus = perf_cpu_map__new(cpu_list);
- if (!cpus)
- return -1;
-
- /*
- * The evsels are created with hybrid pmu's cpus. But now we
- * need to check and adjust the cpus of evsel by cpu_list because
- * cpu_list may cause conflicts with cpus of evsel. For example,
- * cpus of evsel is cpu0-7, but the cpu_list is cpu6-8, we need
- * to adjust the cpus of evsel to cpu6-7. And then propatate maps
- * in evlist__create_maps().
- */
- evlist__for_each_entry_safe(evlist, tmp, evsel) {
- struct perf_cpu_map *matched_cpus, *unmatched_cpus;
- char buf1[128], buf2[128];
-
- pmu = perf_pmu__find_hybrid_pmu(evsel->pmu_name);
- if (!pmu)
- continue;
-
- ret = perf_pmu__cpus_match(pmu, cpus, &matched_cpus,
- &unmatched_cpus);
- if (ret)
- goto out;
-
- events_nr++;
-
- if (perf_cpu_map__nr(matched_cpus) > 0 &&
- (perf_cpu_map__nr(unmatched_cpus) > 0 ||
- perf_cpu_map__nr(matched_cpus) < perf_cpu_map__nr(cpus) ||
- perf_cpu_map__nr(matched_cpus) < perf_cpu_map__nr(pmu->cpus))) {
- perf_cpu_map__put(evsel->core.cpus);
- perf_cpu_map__put(evsel->core.own_cpus);
- evsel->core.cpus = perf_cpu_map__get(matched_cpus);
- evsel->core.own_cpus = perf_cpu_map__get(matched_cpus);
-
- if (perf_cpu_map__nr(unmatched_cpus) > 0) {
- cpu_map__snprint(matched_cpus, buf1, sizeof(buf1));
- pr_warning("WARNING: use %s in '%s' for '%s', skip other cpus in list.\n",
- buf1, pmu->name, evsel->name);
- }
- }
-
- if (perf_cpu_map__nr(matched_cpus) == 0) {
- evlist__remove(evlist, evsel);
- evsel__delete(evsel);
-
- cpu_map__snprint(cpus, buf1, sizeof(buf1));
- cpu_map__snprint(pmu->cpus, buf2, sizeof(buf2));
- pr_warning("WARNING: %s isn't a '%s', please use a CPU list in the '%s' range (%s)\n",
- buf1, pmu->name, pmu->name, buf2);
- unmatched_count++;
- }
-
- perf_cpu_map__put(matched_cpus);
- perf_cpu_map__put(unmatched_cpus);
- }
- if (events_nr)
- ret = (unmatched_count == events_nr) ? -1 : 0;
-out:
- perf_cpu_map__put(cpus);
- return ret;
-}
diff --git a/tools/perf/util/evlist-hybrid.h b/tools/perf/util/evlist-hybrid.h
deleted file mode 100644
index aacdb1b0f948..000000000000
--- a/tools/perf/util/evlist-hybrid.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __PERF_EVLIST_HYBRID_H
-#define __PERF_EVLIST_HYBRID_H
-
-#include <linux/compiler.h>
-#include <linux/kernel.h>
-#include "evlist.h"
-#include <unistd.h>
-
-int evlist__add_default_hybrid(struct evlist *evlist, bool precise);
-void evlist__warn_hybrid_group(struct evlist *evlist);
-bool evlist__has_hybrid(struct evlist *evlist);
-int evlist__fix_hybrid_cpus(struct evlist *evlist, const char *cpu_list);
-
-#endif /* __PERF_EVLIST_HYBRID_H */
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index a0504316b06f..7ef43f72098e 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -28,10 +28,10 @@
#include "util/string2.h"
#include "util/perf_api_probe.h"
#include "util/evsel_fprintf.h"
-#include "util/evlist-hybrid.h"
#include "util/pmu.h"
#include "util/sample.h"
#include "util/bpf-filter.h"
+#include "util/stat.h"
#include "util/util.h"
#include <signal.h>
#include <unistd.h>
@@ -93,8 +93,15 @@ struct evlist *evlist__new(void)
struct evlist *evlist__new_default(void)
{
struct evlist *evlist = evlist__new();
+ bool can_profile_kernel;
+ int err;
+
+ if (!evlist)
+ return NULL;
- if (evlist && evlist__add_default(evlist)) {
+ can_profile_kernel = perf_event_paranoid_check(1);
+ err = parse_event(evlist, can_profile_kernel ? "cycles:P" : "cycles:Pu");
+ if (err) {
evlist__delete(evlist);
evlist = NULL;
}
@@ -165,6 +172,7 @@ void evlist__delete(struct evlist *evlist)
if (evlist == NULL)
return;
+ evlist__free_stats(evlist);
evlist__munmap(evlist);
evlist__close(evlist);
evlist__purge(evlist);
@@ -237,19 +245,6 @@ static void evlist__set_leader(struct evlist *evlist)
perf_evlist__set_leader(&evlist->core);
}
-int __evlist__add_default(struct evlist *evlist, bool precise)
-{
- struct evsel *evsel;
-
- evsel = evsel__new_cycles(precise, PERF_TYPE_HARDWARE,
- PERF_COUNT_HW_CPU_CYCLES);
- if (evsel == NULL)
- return -ENOMEM;
-
- evlist__add(evlist, evsel);
- return 0;
-}
-
static struct evsel *evlist__dummy_event(struct evlist *evlist)
{
struct perf_event_attr attr = {
@@ -1067,7 +1062,7 @@ int evlist__create_maps(struct evlist *evlist, struct target *target)
if (!cpus)
goto out_delete_threads;
- evlist->core.has_user_cpus = !!target->cpu_list && !target->hybrid;
+ evlist->core.has_user_cpus = !!target->cpu_list;
perf_evlist__set_maps(&evlist->core, cpus, threads);
@@ -2465,3 +2460,42 @@ void evlist__check_mem_load_aux(struct evlist *evlist)
}
}
}
+
+/**
+ * evlist__warn_user_requested_cpus() - Check each evsel against requested CPUs
+ * and warn if the user CPU list is inapplicable for the event's PMU's
+ * CPUs. Not core PMUs list a CPU in sysfs, but this may be overwritten by a
+ * user requested CPU and so any online CPU is applicable. Core PMUs handle
+ * events on the CPUs in their list and otherwise the event isn't supported.
+ * @evlist: The list of events being checked.
+ * @cpu_list: The user provided list of CPUs.
+ */
+void evlist__warn_user_requested_cpus(struct evlist *evlist, const char *cpu_list)
+{
+ struct perf_cpu_map *user_requested_cpus;
+ struct evsel *pos;
+
+ if (!cpu_list)
+ return;
+
+ user_requested_cpus = perf_cpu_map__new(cpu_list);
+ if (!user_requested_cpus)
+ return;
+
+ evlist__for_each_entry(evlist, pos) {
+ struct perf_cpu_map *intersect, *to_test;
+ const struct perf_pmu *pmu = evsel__find_pmu(pos);
+
+ to_test = pmu && pmu->is_core ? pmu->cpus : cpu_map__online();
+ intersect = perf_cpu_map__intersect(to_test, user_requested_cpus);
+ if (!perf_cpu_map__equal(intersect, user_requested_cpus)) {
+ char buf[128];
+
+ cpu_map__snprint(to_test, buf, sizeof(buf));
+ pr_warning("WARNING: A requested CPU in '%s' is not supported by PMU '%s' (CPUs %s) for event '%s'\n",
+ cpu_list, pmu ? pmu->name : "cpu", buf, evsel__name(pos));
+ }
+ perf_cpu_map__put(intersect);
+ }
+ perf_cpu_map__put(user_requested_cpus);
+}
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 46cf402add93..664c6bf7b3e0 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -67,7 +67,6 @@ struct evlist {
struct evsel *selected;
struct events_stats stats;
struct perf_env *env;
- const char *hybrid_pmu_name;
void (*trace_event_sample_raw)(struct evlist *evlist,
union perf_event *event,
struct perf_sample *sample);
@@ -101,13 +100,6 @@ void evlist__delete(struct evlist *evlist);
void evlist__add(struct evlist *evlist, struct evsel *entry);
void evlist__remove(struct evlist *evlist, struct evsel *evsel);
-int __evlist__add_default(struct evlist *evlist, bool precise);
-
-static inline int evlist__add_default(struct evlist *evlist)
-{
- return __evlist__add_default(evlist, true);
-}
-
int evlist__add_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs);
int __evlist__add_default_attrs(struct evlist *evlist,
@@ -448,4 +440,6 @@ struct evsel *evlist__find_evsel(struct evlist *evlist, int idx);
int evlist__scnprintf_evsels(struct evlist *evlist, size_t size, char *bf);
void evlist__check_mem_load_aux(struct evlist *evlist);
+void evlist__warn_user_requested_cpus(struct evlist *evlist, const char *cpu_list);
+
#endif /* __PERF_EVLIST_H */
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index c2dbb5647e75..762e2b2634a5 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -46,8 +46,9 @@
#include "memswap.h"
#include "util.h"
#include "util/hashmap.h"
-#include "pmu-hybrid.h"
#include "off_cpu.h"
+#include "pmu.h"
+#include "pmus.h"
#include "../perf-sys.h"
#include "util/parse-branch-options.h"
#include "util/bpf-filter.h"
@@ -291,6 +292,7 @@ void evsel__init(struct evsel *evsel,
evsel->per_pkg_mask = NULL;
evsel->collect_stat = false;
evsel->pmu_name = NULL;
+ evsel->group_pmu_name = NULL;
evsel->skippable = false;
}
@@ -317,48 +319,6 @@ struct evsel *evsel__new_idx(struct perf_event_attr *attr, int idx)
return evsel;
}
-static bool perf_event_can_profile_kernel(void)
-{
- return perf_event_paranoid_check(1);
-}
-
-struct evsel *evsel__new_cycles(bool precise __maybe_unused, __u32 type, __u64 config)
-{
- struct perf_event_attr attr = {
- .type = type,
- .config = config,
- .exclude_kernel = !perf_event_can_profile_kernel(),
- };
- struct evsel *evsel;
-
- event_attr_init(&attr);
-
- /*
- * Now let the usual logic to set up the perf_event_attr defaults
- * to kick in when we return and before perf_evsel__open() is called.
- */
- evsel = evsel__new(&attr);
- if (evsel == NULL)
- goto out;
-
- arch_evsel__fixup_new_cycles(&evsel->core.attr);
-
- evsel->precise_max = true;
-
- /* use asprintf() because free(evsel) assumes name is allocated */
- if (asprintf(&evsel->name, "cycles%s%s%.*s",
- (attr.precise_ip || attr.exclude_kernel) ? ":" : "",
- attr.exclude_kernel ? "u" : "",
- attr.precise_ip ? attr.precise_ip + 1 : 0, "ppp") < 0)
- goto error_free;
-out:
- return evsel;
-error_free:
- evsel__delete(evsel);
- evsel = NULL;
- goto out;
-}
-
int copy_config_terms(struct list_head *dst, struct list_head *src)
{
struct evsel_config_term *pos, *tmp;
@@ -416,6 +376,7 @@ struct evsel *evsel__clone(struct evsel *orig)
evsel->core.nr_members = orig->core.nr_members;
evsel->core.system_wide = orig->core.system_wide;
evsel->core.requires_cpu = orig->core.requires_cpu;
+ evsel->core.is_pmu_core = orig->core.is_pmu_core;
if (orig->name) {
evsel->name = strdup(orig->name);
@@ -432,6 +393,11 @@ struct evsel *evsel__clone(struct evsel *orig)
if (evsel->pmu_name == NULL)
goto out_err;
}
+ if (orig->group_pmu_name) {
+ evsel->group_pmu_name = strdup(orig->group_pmu_name);
+ if (evsel->group_pmu_name == NULL)
+ goto out_err;
+ }
if (orig->filter) {
evsel->filter = strdup(orig->filter);
if (evsel->filter == NULL)
@@ -828,30 +794,6 @@ bool evsel__name_is(struct evsel *evsel, const char *name)
return !strcmp(evsel__name(evsel), name);
}
-const char *evsel__group_pmu_name(const struct evsel *evsel)
-{
- struct evsel *leader = evsel__leader(evsel);
- struct evsel *pos;
-
- /*
- * Software events may be in a group with other uncore PMU events. Use
- * the pmu_name of the first non-software event to avoid breaking the
- * software event out of the group.
- *
- * Aux event leaders, like intel_pt, expect a group with events from
- * other PMUs, so substitute the AUX event's PMU in this case.
- */
- if (evsel->core.attr.type == PERF_TYPE_SOFTWARE || evsel__is_aux_event(leader)) {
- /* Starting with the leader, find the first event with a named PMU. */
- for_each_group_evsel(pos, leader) {
- if (pos->pmu_name)
- return pos->pmu_name;
- }
- }
-
- return evsel->pmu_name ?: "cpu";
-}
-
const char *evsel__metric_id(const struct evsel *evsel)
{
if (evsel->metric_id)
@@ -1131,10 +1073,6 @@ void __weak arch_evsel__set_sample_weight(struct evsel *evsel)
evsel__set_sample_bit(evsel, WEIGHT);
}
-void __weak arch_evsel__fixup_new_cycles(struct perf_event_attr *attr __maybe_unused)
-{
-}
-
void __weak arch__post_evsel_config(struct evsel *evsel __maybe_unused,
struct perf_event_attr *attr __maybe_unused)
{
@@ -1537,6 +1475,7 @@ void evsel__exit(struct evsel *evsel)
zfree(&evsel->group_name);
zfree(&evsel->name);
zfree(&evsel->pmu_name);
+ zfree(&evsel->group_pmu_name);
zfree(&evsel->unit);
zfree(&evsel->metric_id);
evsel__zero_per_pkg(evsel);
@@ -2100,6 +2039,7 @@ static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus,
fallback_missing_features:
evsel__disable_missing_features(evsel);
+ pr_debug3("Opening: %s\n", evsel__name(evsel));
display_attr(&evsel->core.attr);
for (idx = start_cpu_map_idx; idx < end_cpu_map_idx; idx++) {
@@ -2984,25 +2924,19 @@ static bool find_process(const char *name)
return ret ? false : true;
}
-static bool is_amd(const char *arch, const char *cpuid)
+int __weak arch_evsel__open_strerror(struct evsel *evsel __maybe_unused,
+ char *msg __maybe_unused,
+ size_t size __maybe_unused)
{
- return arch && !strcmp("x86", arch) && cpuid && strstarts(cpuid, "AuthenticAMD");
-}
-
-static bool is_amd_ibs(struct evsel *evsel)
-{
- return evsel->core.attr.precise_ip
- || (evsel->pmu_name && !strncmp(evsel->pmu_name, "ibs", 3));
+ return 0;
}
int evsel__open_strerror(struct evsel *evsel, struct target *target,
int err, char *msg, size_t size)
{
- struct perf_env *env = evsel__env(evsel);
- const char *arch = perf_env__arch(env);
- const char *cpuid = perf_env__cpuid(env);
char sbuf[STRERR_BUFSIZE];
int printed = 0, enforced = 0;
+ int ret;
switch (err) {
case EPERM:
@@ -3104,16 +3038,6 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target,
return scnprintf(msg, size,
"Invalid event (%s) in per-thread mode, enable system wide with '-a'.",
evsel__name(evsel));
- if (is_amd(arch, cpuid)) {
- if (is_amd_ibs(evsel)) {
- if (evsel->core.attr.exclude_kernel)
- return scnprintf(msg, size,
- "AMD IBS can't exclude kernel events. Try running at a higher privilege level.");
- if (!evsel->core.system_wide)
- return scnprintf(msg, size,
- "AMD IBS may only be available in system-wide/per-cpu mode. Try using -a, or -C and workload affinity");
- }
- }
break;
case ENODATA:
@@ -3123,6 +3047,10 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target,
break;
}
+ ret = arch_evsel__open_strerror(evsel, msg, size);
+ if (ret)
+ return ret;
+
return scnprintf(msg, size,
"The sys_perf_event_open() syscall returned with %d (%s) for event (%s).\n"
"/bin/dmesg | grep -i perf may provide additional information.\n",
@@ -3178,9 +3106,17 @@ void evsel__zero_per_pkg(struct evsel *evsel)
}
}
+/**
+ * evsel__is_hybrid - does the evsel have a known PMU that is hybrid. Note, this
+ * will be false on hybrid systems for hardware and legacy
+ * cache events.
+ */
bool evsel__is_hybrid(const struct evsel *evsel)
{
- return evsel->pmu_name && perf_pmu__is_hybrid(evsel->pmu_name);
+ if (perf_pmus__num_core_pmus() == 1)
+ return false;
+
+ return evsel->core.is_pmu_core;
}
struct evsel *evsel__leader(const struct evsel *evsel)
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 0f54f28a69c2..848534ec74fa 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -10,6 +10,7 @@
#include <internal/evsel.h>
#include <perf/evsel.h>
#include "symbol_conf.h"
+#include "pmus.h"
struct bpf_object;
struct cgroup;
@@ -72,6 +73,7 @@ struct evsel {
char *name;
char *group_name;
const char *pmu_name;
+ const char *group_pmu_name;
#ifdef HAVE_LIBTRACEEVENT
struct tep_event *tp_format;
#endif
@@ -129,6 +131,7 @@ struct evsel {
bool reset_group;
bool errored;
bool needs_auxtrace_mmap;
+ bool default_metricgroup; /* A member of the Default metricgroup */
struct hashmap *per_pkg_mask;
int err;
struct {
@@ -241,8 +244,6 @@ static inline struct evsel *evsel__newtp(const char *sys, const char *name)
}
#endif
-struct evsel *evsel__new_cycles(bool precise, __u32 type, __u64 config);
-
#ifdef HAVE_LIBTRACEEVENT
struct tep_event *event_format__new(const char *sys, const char *name);
#endif
@@ -287,7 +288,6 @@ int arch_evsel__hw_name(struct evsel *evsel, char *bf, size_t size);
int __evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, char *bf, size_t size);
const char *evsel__name(struct evsel *evsel);
bool evsel__name_is(struct evsel *evsel, const char *name);
-const char *evsel__group_pmu_name(const struct evsel *evsel);
const char *evsel__metric_id(const struct evsel *evsel);
static inline bool evsel__is_tool(const struct evsel *evsel)
@@ -310,8 +310,8 @@ void __evsel__reset_sample_bit(struct evsel *evsel, enum perf_event_sample_forma
void evsel__set_sample_id(struct evsel *evsel, bool use_sample_identifier);
void arch_evsel__set_sample_weight(struct evsel *evsel);
-void arch_evsel__fixup_new_cycles(struct perf_event_attr *attr);
void arch__post_evsel_config(struct evsel *evsel, struct perf_event_attr *attr);
+int arch_evsel__open_strerror(struct evsel *evsel, char *msg, size_t size);
int evsel__set_filter(struct evsel *evsel, const char *filter);
int evsel__append_tp_filter(struct evsel *evsel, const char *filter);
@@ -353,9 +353,19 @@ u64 format_field__intval(struct tep_format_field *field, struct perf_sample *sam
struct tep_format_field *evsel__field(struct evsel *evsel, const char *name);
-#define evsel__match(evsel, t, c) \
- (evsel->core.attr.type == PERF_TYPE_##t && \
- evsel->core.attr.config == PERF_COUNT_##c)
+static inline bool __evsel__match(const struct evsel *evsel, u32 type, u64 config)
+{
+ if (evsel->core.attr.type != type)
+ return false;
+
+ if ((type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE) &&
+ perf_pmus__supports_extended_type())
+ return (evsel->core.attr.config & PERF_HW_EVENT_MASK) == config;
+
+ return evsel->core.attr.config == config;
+}
+
+#define evsel__match(evsel, t, c) __evsel__match(evsel, PERF_TYPE_##t, PERF_COUNT_##c)
static inline bool evsel__match2(struct evsel *e1, struct evsel *e2)
{
@@ -460,16 +470,24 @@ static inline int evsel__group_idx(struct evsel *evsel)
}
/* Iterates group WITHOUT the leader. */
-#define for_each_group_member(_evsel, _leader) \
-for ((_evsel) = list_entry((_leader)->core.node.next, struct evsel, core.node); \
- (_evsel) && (_evsel)->core.leader == (&_leader->core); \
- (_evsel) = list_entry((_evsel)->core.node.next, struct evsel, core.node))
+#define for_each_group_member_head(_evsel, _leader, _head) \
+for ((_evsel) = list_entry((_leader)->core.node.next, struct evsel, core.node); \
+ (_evsel) && &(_evsel)->core.node != (_head) && \
+ (_evsel)->core.leader == &(_leader)->core; \
+ (_evsel) = list_entry((_evsel)->core.node.next, struct evsel, core.node))
+
+#define for_each_group_member(_evsel, _leader) \
+ for_each_group_member_head(_evsel, _leader, &(_leader)->evlist->core.entries)
/* Iterates group WITH the leader. */
-#define for_each_group_evsel(_evsel, _leader) \
-for ((_evsel) = _leader; \
- (_evsel) && (_evsel)->core.leader == (&_leader->core); \
- (_evsel) = list_entry((_evsel)->core.node.next, struct evsel, core.node))
+#define for_each_group_evsel_head(_evsel, _leader, _head) \
+for ((_evsel) = _leader; \
+ (_evsel) && &(_evsel)->core.node != (_head) && \
+ (_evsel)->core.leader == &(_leader)->core; \
+ (_evsel) = list_entry((_evsel)->core.node.next, struct evsel, core.node))
+
+#define for_each_group_evsel(_evsel, _leader) \
+ for_each_group_evsel_head(_evsel, _leader, &(_leader)->evlist->core.entries)
static inline bool evsel__has_branch_callstack(const struct evsel *evsel)
{
diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c
index cc80ec554c0a..8719b3cb5646 100644
--- a/tools/perf/util/evsel_fprintf.c
+++ b/tools/perf/util/evsel_fprintf.c
@@ -2,6 +2,7 @@
#include <inttypes.h>
#include <stdio.h>
#include <stdbool.h>
+#include "util/evlist.h"
#include "evsel.h"
#include "util/evsel_fprintf.h"
#include "util/event.h"
@@ -116,6 +117,7 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment,
int print_ip = print_opts & EVSEL__PRINT_IP;
int print_sym = print_opts & EVSEL__PRINT_SYM;
int print_dso = print_opts & EVSEL__PRINT_DSO;
+ int print_dsoff = print_opts & EVSEL__PRINT_DSOFF;
int print_symoffset = print_opts & EVSEL__PRINT_SYMOFFSET;
int print_oneline = print_opts & EVSEL__PRINT_ONELINE;
int print_srcline = print_opts & EVSEL__PRINT_SRCLINE;
@@ -125,9 +127,10 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment,
char s = print_oneline ? ' ' : '\t';
bool first = true;
- if (sample->callchain) {
- struct addr_location node_al;
+ if (cursor == NULL)
+ return fprintf(fp, "<not enough memory for the callchain cursor>%s", print_oneline ? "" : "\n");
+ if (sample->callchain) {
callchain_cursor_commit(cursor);
while (1) {
@@ -157,9 +160,12 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment,
printed += fprintf(fp, "%c%16" PRIx64, s, node->ip);
if (print_sym) {
+ struct addr_location node_al;
+
+ addr_location__init(&node_al);
printed += fprintf(fp, " ");
node_al.addr = addr;
- node_al.map = map;
+ node_al.map = map__get(map);
if (print_symoffset) {
printed += __symbol__fprintf_symname_offs(sym, &node_al,
@@ -169,13 +175,11 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment,
printed += __symbol__fprintf_symname(sym, &node_al,
print_unknown_as_addr, fp);
}
+ addr_location__exit(&node_al);
}
- if (print_dso && (!sym || !sym->inlined)) {
- printed += fprintf(fp, " (");
- printed += map__fprintf_dsoname(map, fp);
- printed += fprintf(fp, ")");
- }
+ if (print_dso && (!sym || !sym->inlined))
+ printed += map__fprintf_dsoname_dsoff(map, print_dsoff, addr, fp);
if (print_srcline)
printed += map__fprintf_srcline(map, addr, "\n ", fp);
@@ -209,6 +213,7 @@ int sample__fprintf_sym(struct perf_sample *sample, struct addr_location *al,
int print_ip = print_opts & EVSEL__PRINT_IP;
int print_sym = print_opts & EVSEL__PRINT_SYM;
int print_dso = print_opts & EVSEL__PRINT_DSO;
+ int print_dsoff = print_opts & EVSEL__PRINT_DSOFF;
int print_symoffset = print_opts & EVSEL__PRINT_SYMOFFSET;
int print_srcline = print_opts & EVSEL__PRINT_SRCLINE;
int print_unknown_as_addr = print_opts & EVSEL__PRINT_UNKNOWN_AS_ADDR;
@@ -234,11 +239,8 @@ int sample__fprintf_sym(struct perf_sample *sample, struct addr_location *al,
}
}
- if (print_dso) {
- printed += fprintf(fp, " (");
- printed += map__fprintf_dsoname(al->map, fp);
- printed += fprintf(fp, ")");
- }
+ if (print_dso)
+ printed += map__fprintf_dsoname_dsoff(al->map, print_dsoff, al->addr, fp);
if (print_srcline)
printed += map__fprintf_srcline(al->map, al->addr, "\n ", fp);
diff --git a/tools/perf/util/evsel_fprintf.h b/tools/perf/util/evsel_fprintf.h
index 3093d096c29f..c8a9fac2f2dd 100644
--- a/tools/perf/util/evsel_fprintf.h
+++ b/tools/perf/util/evsel_fprintf.h
@@ -26,6 +26,7 @@ int evsel__fprintf(struct evsel *evsel, struct perf_attr_details *details, FILE
#define EVSEL__PRINT_UNKNOWN_AS_ADDR (1<<6)
#define EVSEL__PRINT_CALLCHAIN_ARROW (1<<7)
#define EVSEL__PRINT_SKIP_IGNORED (1<<8)
+#define EVSEL__PRINT_DSOFF (1<<9)
struct addr_location;
struct perf_event_attr;
diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c
index f4e52919324e..7410a165f68b 100644
--- a/tools/perf/util/expr.c
+++ b/tools/perf/util/expr.c
@@ -8,9 +8,10 @@
#include "cpumap.h"
#include "cputopo.h"
#include "debug.h"
+#include "evlist.h"
#include "expr.h"
-#include "expr-bison.h"
-#include "expr-flex.h"
+#include <util/expr-bison.h>
+#include <util/expr-flex.h>
#include "util/hashmap.h"
#include "smt.h"
#include "tsc.h"
@@ -474,3 +475,23 @@ out:
pr_debug2("literal: %s = %f\n", literal, result);
return result;
}
+
+/* Does the event 'id' parse? Determine via ctx->ids if possible. */
+double expr__has_event(const struct expr_parse_ctx *ctx, bool compute_ids, const char *id)
+{
+ struct evlist *tmp;
+ double ret;
+
+ if (hashmap__find(ctx->ids, id, /*value=*/NULL))
+ return 1.0;
+
+ if (!compute_ids)
+ return 0.0;
+
+ tmp = evlist__new();
+ if (!tmp)
+ return NAN;
+ ret = parse_event(tmp, id) ? 0 : 1;
+ evlist__delete(tmp);
+ return ret;
+}
diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h
index eaa44b24c555..3c1e49b3e35d 100644
--- a/tools/perf/util/expr.h
+++ b/tools/perf/util/expr.h
@@ -54,5 +54,6 @@ int expr__find_ids(const char *expr, const char *one,
double expr_id_data__value(const struct expr_id_data *data);
double expr_id_data__source_count(const struct expr_id_data *data);
double expr__get_literal(const char *literal, const struct expr_scanner_ctx *ctx);
+double expr__has_event(const struct expr_parse_ctx *ctx, bool compute_ids, const char *id);
#endif
diff --git a/tools/perf/util/expr.l b/tools/perf/util/expr.l
index 4fbf353e78e7..dbb117414710 100644
--- a/tools/perf/util/expr.l
+++ b/tools/perf/util/expr.l
@@ -113,6 +113,7 @@ min { return MIN; }
if { return IF; }
else { return ELSE; }
source_count { return SOURCE_COUNT; }
+has_event { return HAS_EVENT; }
{literal} { return literal(yyscanner, sctx); }
{number} { return value(yyscanner); }
{symbol} { return str(yyscanner, ID, sctx->runtime); }
diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y
index 4ce931cccb63..dd504afd8f36 100644
--- a/tools/perf/util/expr.y
+++ b/tools/perf/util/expr.y
@@ -37,7 +37,7 @@
} ids;
}
-%token ID NUMBER MIN MAX IF ELSE LITERAL D_RATIO SOURCE_COUNT EXPR_ERROR
+%token ID NUMBER MIN MAX IF ELSE LITERAL D_RATIO SOURCE_COUNT HAS_EVENT EXPR_ERROR
%left MIN MAX IF
%left '|'
%left '^'
@@ -123,20 +123,6 @@ static struct ids handle_id(struct expr_parse_ctx *ctx, char *id,
* constant value using OP. Its invariant that there are no ids. If computing
* ids for non-constants union the set of IDs that must be computed.
*/
-#define BINARY_LONG_OP(RESULT, OP, LHS, RHS) \
- if (!compute_ids || (is_const(LHS.val) && is_const(RHS.val))) { \
- assert(LHS.ids == NULL); \
- assert(RHS.ids == NULL); \
- if (isnan(LHS.val) || isnan(RHS.val)) { \
- RESULT.val = NAN; \
- } else { \
- RESULT.val = (long)LHS.val OP (long)RHS.val; \
- } \
- RESULT.ids = NULL; \
- } else { \
- RESULT = union_expr(LHS, RHS); \
- }
-
#define BINARY_OP(RESULT, OP, LHS, RHS) \
if (!compute_ids || (is_const(LHS.val) && is_const(RHS.val))) { \
assert(LHS.ids == NULL); \
@@ -213,9 +199,81 @@ expr: NUMBER
}
| ID { $$ = handle_id(ctx, $1, compute_ids, /*source_count=*/false); }
| SOURCE_COUNT '(' ID ')' { $$ = handle_id(ctx, $3, compute_ids, /*source_count=*/true); }
-| expr '|' expr { BINARY_LONG_OP($$, |, $1, $3); }
-| expr '&' expr { BINARY_LONG_OP($$, &, $1, $3); }
-| expr '^' expr { BINARY_LONG_OP($$, ^, $1, $3); }
+| HAS_EVENT '(' ID ')'
+{
+ $$.val = expr__has_event(ctx, compute_ids, $3);
+ $$.ids = NULL;
+ free($3);
+}
+| expr '|' expr
+{
+ if (is_const($1.val) && is_const($3.val)) {
+ assert($1.ids == NULL);
+ assert($3.ids == NULL);
+ $$.ids = NULL;
+ $$.val = (fpclassify($1.val) == FP_ZERO && fpclassify($3.val) == FP_ZERO) ? 0 : 1;
+ } else if (is_const($1.val)) {
+ assert($1.ids == NULL);
+ if (fpclassify($1.val) == FP_ZERO) {
+ $$ = $3;
+ } else {
+ $$.val = 1;
+ $$.ids = NULL;
+ ids__free($3.ids);
+ }
+ } else if (is_const($3.val)) {
+ assert($3.ids == NULL);
+ if (fpclassify($3.val) == FP_ZERO) {
+ $$ = $1;
+ } else {
+ $$.val = 1;
+ $$.ids = NULL;
+ ids__free($1.ids);
+ }
+ } else {
+ $$ = union_expr($1, $3);
+ }
+}
+| expr '&' expr
+{
+ if (is_const($1.val) && is_const($3.val)) {
+ assert($1.ids == NULL);
+ assert($3.ids == NULL);
+ $$.val = (fpclassify($1.val) != FP_ZERO && fpclassify($3.val) != FP_ZERO) ? 1 : 0;
+ $$.ids = NULL;
+ } else if (is_const($1.val)) {
+ assert($1.ids == NULL);
+ if (fpclassify($1.val) != FP_ZERO) {
+ $$ = $3;
+ } else {
+ $$.val = 0;
+ $$.ids = NULL;
+ ids__free($3.ids);
+ }
+ } else if (is_const($3.val)) {
+ assert($3.ids == NULL);
+ if (fpclassify($3.val) != FP_ZERO) {
+ $$ = $1;
+ } else {
+ $$.val = 0;
+ $$.ids = NULL;
+ ids__free($1.ids);
+ }
+ } else {
+ $$ = union_expr($1, $3);
+ }
+}
+| expr '^' expr
+{
+ if (is_const($1.val) && is_const($3.val)) {
+ assert($1.ids == NULL);
+ assert($3.ids == NULL);
+ $$.val = (fpclassify($1.val) == FP_ZERO) != (fpclassify($3.val) == FP_ZERO) ? 1 : 0;
+ $$.ids = NULL;
+ } else {
+ $$ = union_expr($1, $3);
+ }
+}
| expr '<' expr { BINARY_OP($$, <, $1, $3); }
| expr '>' expr { BINARY_OP($$, >, $1, $3); }
| expr '+' expr { BINARY_OP($$, +, $1, $3); }
diff --git a/tools/perf/util/genelf_debug.c b/tools/perf/util/genelf_debug.c
index aa5dcc56b2ac..8588b3e35e00 100644
--- a/tools/perf/util/genelf_debug.c
+++ b/tools/perf/util/genelf_debug.c
@@ -337,6 +337,9 @@ static void emit_lineno_info(struct buffer_ext *be,
{
size_t i;
+ /* as described in the jitdump format */
+ const char repeated_name_marker[] = {'\xff', '\0'};
+
/*
* Machine state at start of a statement program
* address = 0
@@ -363,7 +366,8 @@ static void emit_lineno_info(struct buffer_ext *be,
/*
* check if filename changed, if so add it
*/
- if (!cur_filename || strcmp(cur_filename, ent->name)) {
+ if ((!cur_filename || strcmp(cur_filename, ent->name)) &&
+ strcmp(repeated_name_marker, ent->name)) {
emit_lne_define_filename(be, ent->name);
cur_filename = ent->name;
emit_set_file(be, ++cur_file_idx);
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 276870221ce0..7af85a479786 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -24,6 +24,7 @@
#include <bpf/libbpf.h>
#endif
#include <perf/cpumap.h>
+#include <tools/libc_compat.h> // reallocarray
#include "dso.h"
#include "evlist.h"
@@ -37,6 +38,7 @@
#include "debug.h"
#include "cpumap.h"
#include "pmu.h"
+#include "pmus.h"
#include "vdso.h"
#include "strbuf.h"
#include "build-id.h"
@@ -51,7 +53,6 @@
#include "bpf-event.h"
#include "bpf-utils.h"
#include "clockid.h"
-#include "pmu-hybrid.h"
#include <linux/ctype.h>
#include <internal/lib.h>
@@ -745,7 +746,7 @@ static int write_pmu_mappings(struct feat_fd *ff,
* Do a first pass to count number of pmu to avoid lseek so this
* works in pipe mode as well.
*/
- while ((pmu = perf_pmu__scan(pmu))) {
+ while ((pmu = perf_pmus__scan(pmu))) {
if (!pmu->name)
continue;
pmu_num++;
@@ -755,7 +756,7 @@ static int write_pmu_mappings(struct feat_fd *ff,
if (ret < 0)
return ret;
- while ((pmu = perf_pmu__scan(pmu))) {
+ while ((pmu = perf_pmus__scan(pmu))) {
if (!pmu->name)
continue;
@@ -1213,38 +1214,54 @@ static void cpu_cache_level__fprintf(FILE *out, struct cpu_cache_level *c)
fprintf(out, "L%d %-15s %8s [%s]\n", c->level, c->type, c->size, c->map);
}
-#define MAX_CACHE_LVL 4
-
-static int build_caches(struct cpu_cache_level caches[], u32 *cntp)
+/*
+ * Build caches levels for a particular CPU from the data in
+ * /sys/devices/system/cpu/cpu<cpu>/cache/
+ * The cache level data is stored in caches[] from index at
+ * *cntp.
+ */
+int build_caches_for_cpu(u32 cpu, struct cpu_cache_level caches[], u32 *cntp)
{
- u32 i, cnt = 0;
- u32 nr, cpu;
u16 level;
- nr = cpu__max_cpu().cpu;
+ for (level = 0; level < MAX_CACHE_LVL; level++) {
+ struct cpu_cache_level c;
+ int err;
+ u32 i;
- for (cpu = 0; cpu < nr; cpu++) {
- for (level = 0; level < MAX_CACHE_LVL; level++) {
- struct cpu_cache_level c;
- int err;
+ err = cpu_cache_level__read(&c, cpu, level);
+ if (err < 0)
+ return err;
- err = cpu_cache_level__read(&c, cpu, level);
- if (err < 0)
- return err;
+ if (err == 1)
+ break;
- if (err == 1)
+ for (i = 0; i < *cntp; i++) {
+ if (cpu_cache_level__cmp(&c, &caches[i]))
break;
+ }
- for (i = 0; i < cnt; i++) {
- if (cpu_cache_level__cmp(&c, &caches[i]))
- break;
- }
+ if (i == *cntp) {
+ caches[*cntp] = c;
+ *cntp = *cntp + 1;
+ } else
+ cpu_cache_level__free(&c);
+ }
- if (i == cnt)
- caches[cnt++] = c;
- else
- cpu_cache_level__free(&c);
- }
+ return 0;
+}
+
+static int build_caches(struct cpu_cache_level caches[], u32 *cntp)
+{
+ u32 nr, cpu, cnt = 0;
+
+ nr = cpu__max_cpu().cpu;
+
+ for (cpu = 0; cpu < nr; cpu++) {
+ int ret = build_caches_for_cpu(cpu, caches, &cnt);
+
+ if (ret)
+ return ret;
}
*cntp = cnt;
return 0;
@@ -1372,6 +1389,14 @@ static int memory_node__read(struct memory_node *n, unsigned long idx)
return 0;
}
+static void memory_node__delete_nodes(struct memory_node *nodesp, u64 cnt)
+{
+ for (u64 i = 0; i < cnt; i++)
+ bitmap_free(nodesp[i].set);
+
+ free(nodesp);
+}
+
static int memory_node__sort(const void *a, const void *b)
{
const struct memory_node *na = a;
@@ -1380,13 +1405,14 @@ static int memory_node__sort(const void *a, const void *b)
return na->node - nb->node;
}
-static int build_mem_topology(struct memory_node *nodes, u64 size, u64 *cntp)
+static int build_mem_topology(struct memory_node **nodesp, u64 *cntp)
{
char path[PATH_MAX];
struct dirent *ent;
DIR *dir;
- u64 cnt = 0;
int ret = 0;
+ size_t cnt = 0, size = 0;
+ struct memory_node *nodes = NULL;
scnprintf(path, PATH_MAX, "%s/devices/system/node/",
sysfs__mountpoint());
@@ -1410,26 +1436,32 @@ static int build_mem_topology(struct memory_node *nodes, u64 size, u64 *cntp)
if (r != 1)
continue;
- if (WARN_ONCE(cnt >= size,
- "failed to write MEM_TOPOLOGY, way too many nodes\n")) {
- closedir(dir);
- return -1;
- }
+ if (cnt >= size) {
+ struct memory_node *new_nodes =
+ reallocarray(nodes, cnt + 4, sizeof(*nodes));
+ if (!new_nodes) {
+ pr_err("Failed to write MEM_TOPOLOGY, size %zd nodes\n", size);
+ ret = -ENOMEM;
+ goto out;
+ }
+ nodes = new_nodes;
+ size += 4;
+ }
ret = memory_node__read(&nodes[cnt++], idx);
}
-
- *cntp = cnt;
+out:
closedir(dir);
-
- if (!ret)
+ if (!ret) {
+ *cntp = cnt;
+ *nodesp = nodes;
qsort(nodes, cnt, sizeof(nodes[0]), memory_node__sort);
+ } else
+ memory_node__delete_nodes(nodes, cnt);
return ret;
}
-#define MAX_MEMORY_NODES 2000
-
/*
* The MEM_TOPOLOGY holds physical memory map for every
* node in system. The format of data is as follows:
@@ -1448,8 +1480,8 @@ static int build_mem_topology(struct memory_node *nodes, u64 size, u64 *cntp)
static int write_mem_topology(struct feat_fd *ff __maybe_unused,
struct evlist *evlist __maybe_unused)
{
- static struct memory_node nodes[MAX_MEMORY_NODES];
- u64 bsize, version = 1, i, nr;
+ struct memory_node *nodes = NULL;
+ u64 bsize, version = 1, i, nr = 0;
int ret;
ret = sysfs__read_xll("devices/system/memory/block_size_bytes",
@@ -1457,7 +1489,7 @@ static int write_mem_topology(struct feat_fd *ff __maybe_unused,
if (ret)
return ret;
- ret = build_mem_topology(&nodes[0], MAX_MEMORY_NODES, &nr);
+ ret = build_mem_topology(&nodes, &nr);
if (ret)
return ret;
@@ -1492,6 +1524,7 @@ static int write_mem_topology(struct feat_fd *ff __maybe_unused,
}
out:
+ memory_node__delete_nodes(nodes, nr);
return ret;
}
@@ -1551,7 +1584,7 @@ static int __write_pmu_caps(struct feat_fd *ff, struct perf_pmu *pmu,
static int write_cpu_pmu_caps(struct feat_fd *ff,
struct evlist *evlist __maybe_unused)
{
- struct perf_pmu *cpu_pmu = perf_pmu__find("cpu");
+ struct perf_pmu *cpu_pmu = perf_pmus__find("cpu");
int ret;
if (!cpu_pmu)
@@ -1571,9 +1604,16 @@ static int write_pmu_caps(struct feat_fd *ff,
int nr_pmu = 0;
int ret;
- while ((pmu = perf_pmu__scan(pmu))) {
- if (!pmu->name || !strcmp(pmu->name, "cpu") ||
- perf_pmu__caps_parse(pmu) <= 0)
+ while ((pmu = perf_pmus__scan(pmu))) {
+ if (!strcmp(pmu->name, "cpu")) {
+ /*
+ * The "cpu" PMU is special and covered by
+ * HEADER_CPU_PMU_CAPS. Note, core PMUs are
+ * counted/written here for ARM, s390 and Intel hybrid.
+ */
+ continue;
+ }
+ if (perf_pmu__caps_parse(pmu) <= 0)
continue;
nr_pmu++;
}
@@ -1586,22 +1626,17 @@ static int write_pmu_caps(struct feat_fd *ff,
return 0;
/*
- * Write hybrid pmu caps first to maintain compatibility with
- * older perf tool.
+ * Note older perf tools assume core PMUs come first, this is a property
+ * of perf_pmus__scan.
*/
pmu = NULL;
- perf_pmu__for_each_hybrid_pmu(pmu) {
- ret = __write_pmu_caps(ff, pmu, true);
- if (ret < 0)
- return ret;
- }
-
- pmu = NULL;
- while ((pmu = perf_pmu__scan(pmu))) {
- if (!pmu->name || !strcmp(pmu->name, "cpu") ||
- !pmu->nr_caps || perf_pmu__is_hybrid(pmu->name))
+ while ((pmu = perf_pmus__scan(pmu))) {
+ if (!strcmp(pmu->name, "cpu")) {
+ /* Skip as above. */
+ continue;
+ }
+ if (perf_pmu__caps_parse(pmu) <= 0)
continue;
-
ret = __write_pmu_caps(ff, pmu, true);
if (ret < 0)
return ret;
@@ -2810,7 +2845,7 @@ static int process_group_desc(struct feat_fd *ff, void *data __maybe_unused)
i = nr = 0;
evlist__for_each_entry(session->evlist, evsel) {
- if (evsel->core.idx == (int) desc[i].leader_idx) {
+ if (i < nr_groups && evsel->core.idx == (int) desc[i].leader_idx) {
evsel__set_leader(evsel, evsel);
/* {anon_group} is a dummy name */
if (strcmp(desc[i].name, "{anon_group}")) {
@@ -4347,7 +4382,8 @@ int perf_event__process_attr(struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct evlist **pevlist)
{
- u32 i, ids, n_ids;
+ u32 i, n_ids;
+ u64 *ids;
struct evsel *evsel;
struct evlist *evlist = *pevlist;
@@ -4363,9 +4399,8 @@ int perf_event__process_attr(struct perf_tool *tool __maybe_unused,
evlist__add(evlist, evsel);
- ids = event->header.size;
- ids -= (void *)&event->attr.id - (void *)event;
- n_ids = ids / sizeof(u64);
+ n_ids = event->header.size - sizeof(event->header) - event->attr.attr.size;
+ n_ids = n_ids / sizeof(u64);
/*
* We don't have the cpu and thread maps on the header, so
* for allocating the perf_sample_id table we fake 1 cpu and
@@ -4374,8 +4409,9 @@ int perf_event__process_attr(struct perf_tool *tool __maybe_unused,
if (perf_evsel__alloc_id(&evsel->core, 1, n_ids))
return -ENOMEM;
+ ids = (void *)&event->attr.attr + event->attr.attr.size;
for (i = 0; i < n_ids; i++) {
- perf_evlist__id_add(&evlist->core, &evsel->core, 0, i, event->attr.id[i]);
+ perf_evlist__id_add(&evlist->core, &evsel->core, 0, i, ids[i]);
}
return 0;
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 59eeb4a32ac5..7c16a250e738 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -179,7 +179,11 @@ int do_write(struct feat_fd *fd, const void *buf, size_t size);
int write_padded(struct feat_fd *fd, const void *bf,
size_t count, size_t count_aligned);
+#define MAX_CACHE_LVL 4
+
int is_cpu_online(unsigned int cpu);
+int build_caches_for_cpu(u32 cpu, struct cpu_cache_level caches[], u32 *cntp);
+
/*
* arch specific callback
*/
diff --git a/tools/perf/util/help-unknown-cmd.c b/tools/perf/util/help-unknown-cmd.c
index ab9e16123626..eab99ea6ac01 100644
--- a/tools/perf/util/help-unknown-cmd.c
+++ b/tools/perf/util/help-unknown-cmd.c
@@ -92,6 +92,7 @@ const char *help_unknown_cmd(const char *cmd)
main_cmds.names[0] = NULL;
clean_cmdnames(&main_cmds);
+ clean_cmdnames(&other_cmds);
fprintf(stderr, "WARNING: You called a perf program named '%s', "
"which does not exist.\n"
"Continuing under the assumption that you meant '%s'\n",
@@ -114,5 +115,7 @@ const char *help_unknown_cmd(const char *cmd)
fprintf(stderr, "\t%s\n", main_cmds.names[i]->name);
}
end:
+ clean_cmdnames(&main_cmds);
+ clean_cmdnames(&other_cmds);
exit(1);
}
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 3c9301a26dfc..3dc8a4968beb 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -450,6 +450,7 @@ static int hist_entry__init(struct hist_entry *he,
memset(&he->stat, 0, sizeof(he->stat));
}
+ he->ms.maps = maps__get(he->ms.maps);
he->ms.map = map__get(he->ms.map);
if (he->branch_info) {
@@ -483,7 +484,7 @@ static int hist_entry__init(struct hist_entry *he,
goto err_infos;
}
- if (he->srcline) {
+ if (he->srcline && he->srcline != SRCLINE_UNKNOWN) {
he->srcline = strdup(he->srcline);
if (he->srcline == NULL)
goto err_rawdata;
@@ -497,7 +498,7 @@ static int hist_entry__init(struct hist_entry *he,
}
INIT_LIST_HEAD(&he->pairs.node);
- thread__get(he->thread);
+ he->thread = thread__get(he->thread);
he->hroot_in = RB_ROOT_CACHED;
he->hroot_out = RB_ROOT_CACHED;
@@ -523,6 +524,7 @@ err_infos:
map__put(he->mem_info->daddr.ms.map);
}
err:
+ maps__zput(he->ms.maps);
map__zput(he->ms.map);
zfree(&he->stat_acc);
return -ENOMEM;
@@ -588,7 +590,7 @@ static void hist_entry__add_callchain_period(struct hist_entry *he, u64 period)
static struct hist_entry *hists__findnew_entry(struct hists *hists,
struct hist_entry *entry,
- struct addr_location *al,
+ const struct addr_location *al,
bool sample_self)
{
struct rb_node **p;
@@ -611,7 +613,6 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists,
* keys were used.
*/
cmp = hist_entry__cmp(he, entry);
-
if (!cmp) {
if (sample_self) {
he_stat__add_period(&he->stat, period);
@@ -927,8 +928,10 @@ iter_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *al)
if (iter->curr >= iter->total)
return 0;
- al->maps = bi[i].to.ms.maps;
- al->map = bi[i].to.ms.map;
+ maps__put(al->maps);
+ al->maps = maps__get(bi[i].to.ms.maps);
+ map__put(al->map);
+ al->map = map__get(bi[i].to.ms.map);
al->sym = bi[i].to.ms.sym;
al->addr = bi[i].to.addr;
return 1;
@@ -1026,15 +1029,19 @@ iter_prepare_cumulative_entry(struct hist_entry_iter *iter,
struct addr_location *al __maybe_unused)
{
struct hist_entry **he_cache;
+ struct callchain_cursor *cursor = get_tls_callchain_cursor();
+
+ if (cursor == NULL)
+ return -ENOMEM;
- callchain_cursor_commit(&callchain_cursor);
+ callchain_cursor_commit(cursor);
/*
* This is for detecting cycles or recursions so that they're
* cumulated only one time to prevent entries more than 100%
* overhead.
*/
- he_cache = malloc(sizeof(*he_cache) * (callchain_cursor.nr + 1));
+ he_cache = malloc(sizeof(*he_cache) * (cursor->nr + 1));
if (he_cache == NULL)
return -ENOMEM;
@@ -1069,7 +1076,7 @@ iter_add_single_cumulative_entry(struct hist_entry_iter *iter,
* We need to re-initialize the cursor since callchain_append()
* advanced the cursor to the end.
*/
- callchain_cursor_commit(&callchain_cursor);
+ callchain_cursor_commit(get_tls_callchain_cursor());
hists__inc_nr_samples(hists, he->filtered);
@@ -1082,7 +1089,7 @@ iter_next_cumulative_entry(struct hist_entry_iter *iter,
{
struct callchain_cursor_node *node;
- node = callchain_cursor_current(&callchain_cursor);
+ node = callchain_cursor_current(get_tls_callchain_cursor());
if (node == NULL)
return 0;
@@ -1128,12 +1135,15 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter,
.raw_size = sample->raw_size,
};
int i;
- struct callchain_cursor cursor;
+ struct callchain_cursor cursor, *tls_cursor = get_tls_callchain_cursor();
bool fast = hists__has(he_tmp.hists, sym);
- callchain_cursor_snapshot(&cursor, &callchain_cursor);
+ if (tls_cursor == NULL)
+ return -ENOMEM;
+
+ callchain_cursor_snapshot(&cursor, tls_cursor);
- callchain_cursor_advance(&callchain_cursor);
+ callchain_cursor_advance(tls_cursor);
/*
* Check if there's duplicate entries in the callchain.
@@ -1219,7 +1229,7 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
if (al)
alm = map__get(al->map);
- err = sample__resolve_callchain(iter->sample, &callchain_cursor, &iter->parent,
+ err = sample__resolve_callchain(iter->sample, get_tls_callchain_cursor(), &iter->parent,
iter->evsel, al, max_stack_depth);
if (err) {
map__put(alm);
@@ -1307,13 +1317,14 @@ void hist_entry__delete(struct hist_entry *he)
struct hist_entry_ops *ops = he->ops;
thread__zput(he->thread);
+ maps__zput(he->ms.maps);
map__zput(he->ms.map);
if (he->branch_info) {
map__zput(he->branch_info->from.ms.map);
map__zput(he->branch_info->to.ms.map);
- free_srcline(he->branch_info->srcline_from);
- free_srcline(he->branch_info->srcline_to);
+ zfree_srcline(&he->branch_info->srcline_from);
+ zfree_srcline(&he->branch_info->srcline_to);
zfree(&he->branch_info);
}
@@ -1331,7 +1342,7 @@ void hist_entry__delete(struct hist_entry *he)
zfree(&he->res_samples);
zfree(&he->stat_acc);
- free_srcline(he->srcline);
+ zfree_srcline(&he->srcline);
if (he->srcfile && he->srcfile[0])
zfree(&he->srcfile);
free_callchain(he->callchain);
@@ -1564,8 +1575,13 @@ static int hists__hierarchy_insert_entry(struct hists *hists,
if (hist_entry__has_callchains(new_he) &&
symbol_conf.use_callchain) {
- callchain_cursor_reset(&callchain_cursor);
- if (callchain_merge(&callchain_cursor,
+ struct callchain_cursor *cursor = get_tls_callchain_cursor();
+
+ if (cursor == NULL)
+ return -1;
+
+ callchain_cursor_reset(cursor);
+ if (callchain_merge(cursor,
new_he->callchain,
he->callchain) < 0)
ret = -1;
@@ -1606,11 +1622,15 @@ static int hists__collapse_insert_entry(struct hists *hists,
he_stat__add_stat(iter->stat_acc, he->stat_acc);
if (hist_entry__has_callchains(he) && symbol_conf.use_callchain) {
- callchain_cursor_reset(&callchain_cursor);
- if (callchain_merge(&callchain_cursor,
- iter->callchain,
- he->callchain) < 0)
- ret = -1;
+ struct callchain_cursor *cursor = get_tls_callchain_cursor();
+
+ if (cursor != NULL) {
+ callchain_cursor_reset(cursor);
+ if (callchain_merge(cursor, iter->callchain, he->callchain) < 0)
+ ret = -1;
+ } else {
+ ret = 0;
+ }
}
hist_entry__delete(he);
return ret;
@@ -2122,7 +2142,7 @@ static bool hists__filter_entry_by_thread(struct hists *hists,
struct hist_entry *he)
{
if (hists->thread_filter != NULL &&
- he->thread != hists->thread_filter) {
+ RC_CHK_ACCESS(he->thread) != RC_CHK_ACCESS(hists->thread_filter)) {
he->filtered |= (1 << HIST_FILTER__THREAD);
return true;
}
@@ -2778,12 +2798,12 @@ int __hists__scnprintf_title(struct hists *hists, char *bf, size_t size, bool sh
if (hists__has(hists, thread)) {
printed += scnprintf(bf + printed, size - printed,
", Thread: %s(%d)",
- (thread->comm_set ? thread__comm_str(thread) : ""),
- thread->tid);
+ (thread__comm_set(thread) ? thread__comm_str(thread) : ""),
+ thread__tid(thread));
} else {
printed += scnprintf(bf + printed, size - printed,
", Thread: %s",
- (thread->comm_set ? thread__comm_str(thread) : ""));
+ (thread__comm_set(thread) ? thread__comm_str(thread) : ""));
}
}
if (dso)
diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c
index 2c8147a62203..ec1b3bd9f530 100644
--- a/tools/perf/util/intel-bts.c
+++ b/tools/perf/util/intel-bts.c
@@ -456,7 +456,7 @@ static int intel_bts_process_queue(struct intel_bts_queue *btsq, u64 *timestamp)
thread = machine__find_thread(btsq->bts->machine, -1,
btsq->tid);
if (thread)
- btsq->pid = thread->pid_;
+ btsq->pid = thread__pid(thread);
} else {
thread = machine__findnew_thread(btsq->bts->machine, btsq->pid,
btsq->tid);
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index fe893c9bab3f..dbf0bc71a63b 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -754,13 +754,15 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
struct addr_location al;
unsigned char buf[INTEL_PT_INSN_BUF_SZ];
ssize_t len;
- int x86_64;
+ int x86_64, ret = 0;
u8 cpumode;
u64 offset, start_offset, start_ip;
u64 insn_cnt = 0;
bool one_map = true;
bool nr;
+
+ addr_location__init(&al);
intel_pt_insn->length = 0;
if (to_ip && *ip == to_ip)
@@ -773,19 +775,22 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
if (ptq->pt->have_guest_sideband) {
if (!ptq->guest_machine || ptq->guest_machine_pid != ptq->pid) {
intel_pt_log("ERROR: guest sideband but no guest machine\n");
- return -EINVAL;
+ ret = -EINVAL;
+ goto out_ret;
}
} else if ((!symbol_conf.guest_code && cpumode != PERF_RECORD_MISC_GUEST_KERNEL) ||
intel_pt_get_guest(ptq)) {
intel_pt_log("ERROR: no guest machine\n");
- return -EINVAL;
+ ret = -EINVAL;
+ goto out_ret;
}
machine = ptq->guest_machine;
thread = ptq->guest_thread;
if (!thread) {
if (cpumode != PERF_RECORD_MISC_GUEST_KERNEL) {
intel_pt_log("ERROR: no guest thread\n");
- return -EINVAL;
+ ret = -EINVAL;
+ goto out_ret;
}
thread = ptq->unknown_guest_thread;
}
@@ -794,7 +799,8 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
if (!thread) {
if (cpumode != PERF_RECORD_MISC_KERNEL) {
intel_pt_log("ERROR: no thread\n");
- return -EINVAL;
+ ret = -EINVAL;
+ goto out_ret;
}
thread = ptq->pt->unknown_thread;
}
@@ -808,13 +814,17 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
intel_pt_log("ERROR: thread has no dso for %#" PRIx64 "\n", *ip);
else
intel_pt_log("ERROR: thread has no map for %#" PRIx64 "\n", *ip);
- return -EINVAL;
+ addr_location__exit(&al);
+ ret = -EINVAL;
+ goto out_ret;
}
dso = map__dso(al.map);
if (dso->data.status == DSO_DATA_STATUS_ERROR &&
- dso__data_status_seen(dso, DSO_DATA_STATUS_SEEN_ITRACE))
- return -ENOENT;
+ dso__data_status_seen(dso, DSO_DATA_STATUS_SEEN_ITRACE)) {
+ ret = -ENOENT;
+ goto out_ret;
+ }
offset = map__map_ip(al.map, *ip);
@@ -833,7 +843,8 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
intel_pt_insn->rel = e->rel;
memcpy(intel_pt_insn->buf, e->insn, INTEL_PT_INSN_BUF_SZ);
intel_pt_log_insn_no_data(intel_pt_insn, *ip);
- return 0;
+ ret = 0;
+ goto out_ret;
}
}
@@ -854,11 +865,14 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
offset);
if (intel_pt_enable_logging)
dso__fprintf(dso, intel_pt_log_fp());
- return -EINVAL;
+ ret = -EINVAL;
+ goto out_ret;
}
- if (intel_pt_get_insn(buf, len, x86_64, intel_pt_insn))
- return -EINVAL;
+ if (intel_pt_get_insn(buf, len, x86_64, intel_pt_insn)) {
+ ret = -EINVAL;
+ goto out_ret;
+ }
intel_pt_log_insn(intel_pt_insn, *ip);
@@ -909,17 +923,20 @@ out:
e = intel_pt_cache_lookup(map__dso(al.map), machine, start_offset);
if (e)
- return 0;
+ goto out_ret;
}
/* Ignore cache errors */
intel_pt_cache_add(map__dso(al.map), machine, start_offset, insn_cnt,
*ip - start_ip, intel_pt_insn);
- return 0;
+out_ret:
+ addr_location__exit(&al);
+ return ret;
out_no_cache:
*insn_cnt_ptr = insn_cnt;
+ addr_location__exit(&al);
return 0;
}
@@ -968,6 +985,7 @@ static int __intel_pt_pgd_ip(uint64_t ip, void *data)
struct addr_location al;
u8 cpumode;
u64 offset;
+ int res;
if (ptq->state->to_nr) {
if (intel_pt_guest_kernel_ip(ip))
@@ -984,12 +1002,15 @@ static int __intel_pt_pgd_ip(uint64_t ip, void *data)
if (!thread)
return -EINVAL;
+ addr_location__init(&al);
if (!thread__find_map(thread, cpumode, ip, &al) || !map__dso(al.map))
return -EINVAL;
offset = map__map_ip(al.map, ip);
- return intel_pt_match_pgd_ip(ptq->pt, ip, offset, map__dso(al.map)->long_name);
+ res = intel_pt_match_pgd_ip(ptq->pt, ip, offset, map__dso(al.map)->long_name);
+ addr_location__exit(&al);
+ return res;
}
static bool intel_pt_pgd_ip(uint64_t ip, void *data)
@@ -1259,6 +1280,7 @@ static void intel_pt_add_br_stack(struct intel_pt *pt,
pt->kernel_start);
sample->branch_stack = pt->br_stack;
+ thread__put(thread);
}
/* INTEL_PT_LBR_0, INTEL_PT_LBR_1 and INTEL_PT_LBR_2 */
@@ -1428,13 +1450,13 @@ static int intel_pt_get_guest_from_sideband(struct intel_pt_queue *ptq)
ptq->guest_machine = machine;
}
- vcpu = ptq->thread ? ptq->thread->guest_cpu : -1;
+ vcpu = ptq->thread ? thread__guest_cpu(ptq->thread) : -1;
if (vcpu < 0)
return -1;
tid = machine__get_current_tid(machine, vcpu);
- if (ptq->guest_thread && ptq->guest_thread->tid != tid)
+ if (ptq->guest_thread && thread__tid(ptq->guest_thread) != tid)
thread__zput(ptq->guest_thread);
if (!ptq->guest_thread) {
@@ -1444,7 +1466,7 @@ static int intel_pt_get_guest_from_sideband(struct intel_pt_queue *ptq)
}
ptq->guest_machine_pid = machine_pid;
- ptq->guest_pid = ptq->guest_thread->pid_;
+ ptq->guest_pid = thread__pid(ptq->guest_thread);
ptq->guest_tid = tid;
ptq->vcpu = vcpu;
@@ -1467,9 +1489,9 @@ static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt,
ptq->thread = machine__find_thread(pt->machine, -1, ptq->tid);
if (ptq->thread) {
- ptq->pid = ptq->thread->pid_;
+ ptq->pid = thread__pid(ptq->thread);
if (queue->cpu == -1)
- ptq->cpu = ptq->thread->cpu;
+ ptq->cpu = thread__cpu(ptq->thread);
}
if (pt->have_guest_sideband && intel_pt_get_guest_from_sideband(ptq)) {
@@ -3074,7 +3096,7 @@ static void intel_pt_sample_set_pid_tid_cpu(struct intel_pt_queue *ptq,
if (ptq->pid == -1) {
ptq->thread = machine__find_thread(m, -1, ptq->tid);
if (ptq->thread)
- ptq->pid = ptq->thread->pid_;
+ ptq->pid = thread__pid(ptq->thread);
return;
}
@@ -3372,20 +3394,22 @@ static int intel_pt_text_poke(struct intel_pt *pt, union perf_event *event)
/* Assume text poke begins in a basic block no more than 4096 bytes */
int cnt = 4096 + event->text_poke.new_len;
struct thread *thread = pt->unknown_thread;
- struct addr_location al = { .map = NULL };
+ struct addr_location al;
struct machine *machine = pt->machine;
struct intel_pt_cache_entry *e;
u64 offset;
+ int ret = 0;
+ addr_location__init(&al);
if (!event->text_poke.new_len)
- return 0;
+ goto out;
for (; cnt; cnt--, addr--) {
struct dso *dso;
if (intel_pt_find_map(thread, cpumode, addr, &al)) {
if (addr < event->text_poke.addr)
- return 0;
+ goto out;
continue;
}
@@ -3406,15 +3430,16 @@ static int intel_pt_text_poke(struct intel_pt *pt, union perf_event *event)
* branch instruction before the text poke address.
*/
if (e->branch != INTEL_PT_BR_NO_BRANCH)
- return 0;
+ goto out;
} else {
intel_pt_cache_invalidate(dso, machine, offset);
intel_pt_log("Invalidated instruction cache for %s at %#"PRIx64"\n",
dso->long_name, addr);
}
}
-
- return 0;
+out:
+ addr_location__exit(&al);
+ return ret;
}
static int intel_pt_process_event(struct perf_session *session,
@@ -3556,6 +3581,7 @@ static void intel_pt_free(struct perf_session *session)
zfree(&pt->chain);
zfree(&pt->filter);
zfree(&pt->time_ranges);
+ zfree(&pt->br_stack);
free(pt);
}
@@ -4311,14 +4337,6 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
goto err_free_queues;
}
- /*
- * Since this thread will not be kept in any rbtree not in a
- * list, initialize its list node so that at thread__put() the
- * current thread lifetime assumption is kept and we don't segfault
- * at list_del_init().
- */
- INIT_LIST_HEAD(&pt->unknown_thread->node);
-
err = thread__set_comm(pt->unknown_thread, "unknown", 0);
if (err)
goto err_delete_thread;
diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c
index 28e49502db5e..6b2b96c16ccd 100644
--- a/tools/perf/util/jitdump.c
+++ b/tools/perf/util/jitdump.c
@@ -799,17 +799,21 @@ static void jit_add_pid(struct machine *machine, pid_t pid)
return;
}
- thread->priv = (void *)1;
+ thread__set_priv(thread, (void *)true);
+ thread__put(thread);
}
static bool jit_has_pid(struct machine *machine, pid_t pid)
{
struct thread *thread = machine__find_thread(machine, pid, pid);
+ void *priv;
if (!thread)
- return 0;
+ return false;
- return (bool)thread->priv;
+ priv = thread__priv(thread);
+ thread__put(thread);
+ return (bool)priv;
}
int
@@ -833,7 +837,7 @@ jit_process(struct perf_session *session,
return 0;
}
- nsi = nsinfo__get(thread->nsinfo);
+ nsi = nsinfo__get(thread__nsinfo(thread));
thread__put(thread);
/*
diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c
index 4e8e243a6e4b..c6c9c2228578 100644
--- a/tools/perf/util/llvm-utils.c
+++ b/tools/perf/util/llvm-utils.c
@@ -25,7 +25,7 @@
"$CLANG_OPTIONS $PERF_BPF_INC_OPTIONS $KERNEL_INC_OPTIONS " \
"-Wno-unused-value -Wno-pointer-sign " \
"-working-directory $WORKING_DIR " \
- "-c \"$CLANG_SOURCE\" -target bpf $CLANG_EMIT_LLVM -g -O2 -o - $LLVM_OPTIONS_PIPE"
+ "-c \"$CLANG_SOURCE\" --target=bpf $CLANG_EMIT_LLVM -g -O2 -o - $LLVM_OPTIONS_PIPE"
struct llvm_param llvm_param = {
.clang_path = "clang",
@@ -569,7 +569,7 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf,
pr_err("ERROR:\tunable to compile %s\n", path);
pr_err("Hint:\tCheck error message shown above.\n");
pr_err("Hint:\tYou can also pre-compile it into .o using:\n");
- pr_err(" \t\tclang -target bpf -O2 -c %s\n", path);
+ pr_err(" \t\tclang --target=bpf -O2 -c %s\n", path);
pr_err(" \twith proper -I and -D options.\n");
goto errout;
}
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 9e02e19c1b7a..f4cb41ee23cd 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -43,8 +43,8 @@
#include <linux/string.h>
#include <linux/zalloc.h>
-static void __machine__remove_thread(struct machine *machine, struct thread *th, bool lock);
-static int append_inlines(struct callchain_cursor *cursor, struct map_symbol *ms, u64 ip);
+static void __machine__remove_thread(struct machine *machine, struct thread_rb_node *nd,
+ struct thread *th, bool lock);
static struct dso *machine__kernel_dso(struct machine *machine)
{
@@ -72,6 +72,22 @@ static void machine__threads_init(struct machine *machine)
}
}
+static int thread_rb_node__cmp_tid(const void *key, const struct rb_node *nd)
+{
+ int to_find = (int) *((pid_t *)key);
+
+ return to_find - (int)thread__tid(rb_entry(nd, struct thread_rb_node, rb_node)->thread);
+}
+
+static struct thread_rb_node *thread_rb_node__find(const struct thread *th,
+ struct rb_root *tree)
+{
+ pid_t to_find = thread__tid(th);
+ struct rb_node *nd = rb_find(&to_find, tree, thread_rb_node__cmp_tid);
+
+ return rb_entry(nd, struct thread_rb_node, rb_node);
+}
+
static int machine__set_mmap_name(struct machine *machine)
{
if (machine__is_host(machine))
@@ -214,10 +230,10 @@ void machine__delete_threads(struct machine *machine)
down_write(&threads->lock);
nd = rb_first_cached(&threads->entries);
while (nd) {
- struct thread *t = rb_entry(nd, struct thread, rb_node);
+ struct thread_rb_node *trb = rb_entry(nd, struct thread_rb_node, rb_node);
nd = rb_next(nd);
- __machine__remove_thread(machine, t, false);
+ __machine__remove_thread(machine, trb, trb->thread, false);
}
up_write(&threads->lock);
}
@@ -231,7 +247,7 @@ void machine__exit(struct machine *machine)
return;
machine__destroy_kernel_maps(machine);
- maps__delete(machine->kmaps);
+ maps__zput(machine->kmaps);
dsos__exit(&machine->dsos);
machine__exit_vdso(machine);
zfree(&machine->root_dir);
@@ -239,19 +255,9 @@ void machine__exit(struct machine *machine)
zfree(&machine->current_tid);
zfree(&machine->kallsyms_filename);
+ machine__delete_threads(machine);
for (i = 0; i < THREADS__TABLE_SIZE; i++) {
struct threads *threads = &machine->threads[i];
- struct thread *thread, *n;
- /*
- * Forget about the dead, at this point whatever threads were
- * left in the dead lists better have a reference count taken
- * by who is using them, and then, when they drop those references
- * and it finally hits zero, thread__put() will check and see that
- * its not in the dead threads list and will not try to remove it
- * from there, just calling thread__delete() straight away.
- */
- list_for_each_entry_safe(thread, n, &threads->dead, node)
- list_del_init(&thread->node);
exit_rwsem(&threads->lock);
}
@@ -435,7 +441,7 @@ static struct thread *findnew_guest_code(struct machine *machine,
return NULL;
/* Assume maps are set up if there are any */
- if (maps__nr_maps(thread->maps))
+ if (maps__nr_maps(thread__maps(thread)))
return thread;
host_thread = machine__find_thread(host_machine, -1, pid);
@@ -448,7 +454,7 @@ static struct thread *findnew_guest_code(struct machine *machine,
* Guest code can be found in hypervisor process at the same address
* so copy host maps.
*/
- err = maps__clone(thread, host_thread->maps);
+ err = maps__clone(thread, thread__maps(host_thread));
thread__put(host_thread);
if (err)
goto out_err;
@@ -513,45 +519,45 @@ static void machine__update_thread_pid(struct machine *machine,
{
struct thread *leader;
- if (pid == th->pid_ || pid == -1 || th->pid_ != -1)
+ if (pid == thread__pid(th) || pid == -1 || thread__pid(th) != -1)
return;
- th->pid_ = pid;
+ thread__set_pid(th, pid);
- if (th->pid_ == th->tid)
+ if (thread__pid(th) == thread__tid(th))
return;
- leader = __machine__findnew_thread(machine, th->pid_, th->pid_);
+ leader = __machine__findnew_thread(machine, thread__pid(th), thread__pid(th));
if (!leader)
goto out_err;
- if (!leader->maps)
- leader->maps = maps__new(machine);
+ if (!thread__maps(leader))
+ thread__set_maps(leader, maps__new(machine));
- if (!leader->maps)
+ if (!thread__maps(leader))
goto out_err;
- if (th->maps == leader->maps)
- return;
+ if (thread__maps(th) == thread__maps(leader))
+ goto out_put;
- if (th->maps) {
+ if (thread__maps(th)) {
/*
* Maps are created from MMAP events which provide the pid and
* tid. Consequently there never should be any maps on a thread
* with an unknown pid. Just print an error if there are.
*/
- if (!maps__empty(th->maps))
+ if (!maps__empty(thread__maps(th)))
pr_err("Discarding thread maps for %d:%d\n",
- th->pid_, th->tid);
- maps__put(th->maps);
+ thread__pid(th), thread__tid(th));
+ maps__put(thread__maps(th));
}
- th->maps = maps__get(leader->maps);
+ thread__set_maps(th, maps__get(thread__maps(leader)));
out_put:
thread__put(leader);
return;
out_err:
- pr_err("Failed to join map groups for %d:%d\n", th->pid_, th->tid);
+ pr_err("Failed to join map groups for %d:%d\n", thread__pid(th), thread__tid(th));
goto out_put;
}
@@ -568,11 +574,11 @@ __threads__get_last_match(struct threads *threads, struct machine *machine,
th = threads->last_match;
if (th != NULL) {
- if (th->tid == tid) {
+ if (thread__tid(th) == tid) {
machine__update_thread_pid(machine, th, pid);
return thread__get(th);
}
-
+ thread__put(threads->last_match);
threads->last_match = NULL;
}
@@ -594,7 +600,8 @@ threads__get_last_match(struct threads *threads, struct machine *machine,
static void
__threads__set_last_match(struct threads *threads, struct thread *th)
{
- threads->last_match = th;
+ thread__put(threads->last_match);
+ threads->last_match = thread__get(th);
}
static void
@@ -616,6 +623,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
struct rb_node **p = &threads->entries.rb_root.rb_node;
struct rb_node *parent = NULL;
struct thread *th;
+ struct thread_rb_node *nd;
bool leftmost = true;
th = threads__get_last_match(threads, machine, pid, tid);
@@ -624,15 +632,15 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
while (*p != NULL) {
parent = *p;
- th = rb_entry(parent, struct thread, rb_node);
+ th = rb_entry(parent, struct thread_rb_node, rb_node)->thread;
- if (th->tid == tid) {
+ if (thread__tid(th) == tid) {
threads__set_last_match(threads, th);
machine__update_thread_pid(machine, th, pid);
return thread__get(th);
}
- if (tid < th->tid)
+ if (tid < thread__tid(th))
p = &(*p)->rb_left;
else {
p = &(*p)->rb_right;
@@ -644,32 +652,40 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
return NULL;
th = thread__new(pid, tid);
- if (th != NULL) {
- rb_link_node(&th->rb_node, parent, p);
- rb_insert_color_cached(&th->rb_node, &threads->entries, leftmost);
+ if (th == NULL)
+ return NULL;
- /*
- * We have to initialize maps separately after rb tree is updated.
- *
- * The reason is that we call machine__findnew_thread
- * within thread__init_maps to find the thread
- * leader and that would screwed the rb tree.
- */
- if (thread__init_maps(th, machine)) {
- rb_erase_cached(&th->rb_node, &threads->entries);
- RB_CLEAR_NODE(&th->rb_node);
- thread__put(th);
- return NULL;
- }
- /*
- * It is now in the rbtree, get a ref
- */
- thread__get(th);
- threads__set_last_match(threads, th);
- ++threads->nr;
+ nd = malloc(sizeof(*nd));
+ if (nd == NULL) {
+ thread__put(th);
+ return NULL;
}
+ nd->thread = th;
- return th;
+ rb_link_node(&nd->rb_node, parent, p);
+ rb_insert_color_cached(&nd->rb_node, &threads->entries, leftmost);
+ /*
+ * We have to initialize maps separately after rb tree is updated.
+ *
+ * The reason is that we call machine__findnew_thread within
+ * thread__init_maps to find the thread leader and that would screwed
+ * the rb tree.
+ */
+ if (thread__init_maps(th, machine)) {
+ pr_err("Thread init failed thread %d\n", pid);
+ rb_erase_cached(&nd->rb_node, &threads->entries);
+ RB_CLEAR_NODE(&nd->rb_node);
+ free(nd);
+ thread__put(th);
+ return NULL;
+ }
+ /*
+ * It is now in the rbtree, get a ref
+ */
+ threads__set_last_match(threads, th);
+ ++threads->nr;
+
+ return thread__get(th);
}
struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid)
@@ -1120,7 +1136,7 @@ size_t machine__fprintf(struct machine *machine, FILE *fp)
for (nd = rb_first_cached(&threads->entries); nd;
nd = rb_next(nd)) {
- struct thread *pos = rb_entry(nd, struct thread, rb_node);
+ struct thread *pos = rb_entry(nd, struct thread_rb_node, rb_node)->thread;
ret += thread__fprintf(pos, fp);
}
@@ -1780,7 +1796,6 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
struct extra_kernel_map *xm,
struct build_id *bid)
{
- struct map *map;
enum dso_space_type dso_space;
bool is_kernel_mmap;
const char *mmap_name = machine->mmap_name;
@@ -1806,8 +1821,8 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
}
if (xm->name[0] == '/' ||
(!is_kernel_mmap && xm->name[0] == '[')) {
- map = machine__addnew_module_map(machine, xm->start,
- xm->name);
+ struct map *map = machine__addnew_module_map(machine, xm->start, xm->name);
+
if (map == NULL)
goto out_problem;
@@ -1816,6 +1831,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
if (build_id__is_defined(bid))
dso__set_build_id(map__dso(map), bid);
+ map__put(map);
} else if (is_kernel_mmap) {
const char *symbol_name = xm->name + strlen(mmap_name);
/*
@@ -1851,7 +1867,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
continue;
- kernel = dso;
+ kernel = dso__get(dso);
break;
}
@@ -1896,6 +1912,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
*/
dso__load(kernel, machine__kernel_map(machine));
}
+ dso__put(kernel);
} else if (perf_event__is_extra_kernel_mmap(machine, xm)) {
return machine__process_extra_kernel_map(machine, xm);
}
@@ -2031,34 +2048,28 @@ out_problem:
return 0;
}
-static void __machine__remove_thread(struct machine *machine, struct thread *th, bool lock)
+static void __machine__remove_thread(struct machine *machine, struct thread_rb_node *nd,
+ struct thread *th, bool lock)
{
- struct threads *threads = machine__threads(machine, th->tid);
+ struct threads *threads = machine__threads(machine, thread__tid(th));
- if (threads->last_match == th)
+ if (!nd)
+ nd = thread_rb_node__find(th, &threads->entries.rb_root);
+
+ if (threads->last_match && RC_CHK_ACCESS(threads->last_match) == RC_CHK_ACCESS(th))
threads__set_last_match(threads, NULL);
if (lock)
down_write(&threads->lock);
- BUG_ON(refcount_read(&th->refcnt) == 0);
+ BUG_ON(refcount_read(thread__refcnt(th)) == 0);
- rb_erase_cached(&th->rb_node, &threads->entries);
- RB_CLEAR_NODE(&th->rb_node);
+ thread__put(nd->thread);
+ rb_erase_cached(&nd->rb_node, &threads->entries);
+ RB_CLEAR_NODE(&nd->rb_node);
--threads->nr;
- /*
- * Move it first to the dead_threads list, then drop the reference,
- * if this is the last reference, then the thread__delete destructor
- * will be called and we will remove it from the dead_threads list.
- */
- list_add_tail(&th->node, &threads->dead);
- /*
- * We need to do the put here because if this is the last refcount,
- * then we will be touching the threads->dead head when removing the
- * thread.
- */
- thread__put(th);
+ free(nd);
if (lock)
up_write(&threads->lock);
@@ -2066,7 +2077,7 @@ static void __machine__remove_thread(struct machine *machine, struct thread *th,
void machine__remove_thread(struct machine *machine, struct thread *th)
{
- return __machine__remove_thread(machine, th, true);
+ return __machine__remove_thread(machine, NULL, th, true);
}
int machine__process_fork_event(struct machine *machine, union perf_event *event,
@@ -2090,9 +2101,9 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event
* (fork) event that would have removed the thread was lost. Assume the
* latter case and continue on as best we can.
*/
- if (parent->pid_ != (pid_t)event->fork.ppid) {
+ if (thread__pid(parent) != (pid_t)event->fork.ppid) {
dump_printf("removing erroneous parent thread %d/%d\n",
- parent->pid_, parent->tid);
+ thread__pid(parent), thread__tid(parent));
machine__remove_thread(machine, parent);
thread__put(parent);
parent = machine__findnew_thread(machine, event->fork.ppid,
@@ -2145,10 +2156,8 @@ int machine__process_exit_event(struct machine *machine, union perf_event *event
if (dump_trace)
perf_event__fprintf_task(event, stdout);
- if (thread != NULL) {
- thread__exited(thread);
+ if (thread != NULL)
thread__put(thread);
- }
return 0;
}
@@ -2213,7 +2222,7 @@ static void ip__resolve_ams(struct thread *thread,
{
struct addr_location al;
- memset(&al, 0, sizeof(al));
+ addr_location__init(&al);
/*
* We cannot use the header.misc hint to determine whether a
* branch stack address is user, kernel, guest, hypervisor.
@@ -2226,11 +2235,12 @@ static void ip__resolve_ams(struct thread *thread,
ams->addr = ip;
ams->al_addr = al.addr;
ams->al_level = al.level;
- ams->ms.maps = al.maps;
+ ams->ms.maps = maps__get(al.maps);
ams->ms.sym = al.sym;
- ams->ms.map = al.map;
+ ams->ms.map = map__get(al.map);
ams->phys_addr = 0;
ams->data_page_size = 0;
+ addr_location__exit(&al);
}
static void ip__resolve_data(struct thread *thread,
@@ -2239,18 +2249,19 @@ static void ip__resolve_data(struct thread *thread,
{
struct addr_location al;
- memset(&al, 0, sizeof(al));
+ addr_location__init(&al);
thread__find_symbol(thread, m, addr, &al);
ams->addr = addr;
ams->al_addr = al.addr;
ams->al_level = al.level;
- ams->ms.maps = al.maps;
+ ams->ms.maps = maps__get(al.maps);
ams->ms.sym = al.sym;
- ams->ms.map = al.map;
+ ams->ms.map = map__get(al.map);
ams->phys_addr = phys_addr;
ams->data_page_size = daddr_page_size;
+ addr_location__exit(&al);
}
struct mem_info *sample__resolve_mem(struct perf_sample *sample,
@@ -2309,12 +2320,13 @@ static int add_callchain_ip(struct thread *thread,
struct iterations *iter,
u64 branch_from)
{
- struct map_symbol ms;
+ struct map_symbol ms = {};
struct addr_location al;
- int nr_loop_iter = 0, err;
+ int nr_loop_iter = 0, err = 0;
u64 iter_cycles = 0;
const char *srcline = NULL;
+ addr_location__init(&al);
al.filtered = 0;
al.sym = NULL;
al.srcline = NULL;
@@ -2340,9 +2352,10 @@ static int add_callchain_ip(struct thread *thread,
* Discard all.
*/
callchain_cursor_reset(cursor);
- return 1;
+ err = 1;
+ goto out;
}
- return 0;
+ goto out;
}
thread__find_symbol(thread, *cpumode, ip, &al);
}
@@ -2355,31 +2368,30 @@ static int add_callchain_ip(struct thread *thread,
symbol__match_regex(al.sym, &ignore_callees_regex)) {
/* Treat this symbol as the root,
forgetting its callees. */
- *root_al = al;
+ addr_location__copy(root_al, &al);
callchain_cursor_reset(cursor);
}
}
if (symbol_conf.hide_unresolved && al.sym == NULL)
- return 0;
+ goto out;
if (iter) {
nr_loop_iter = iter->nr_loop_iter;
iter_cycles = iter->cycles;
}
- ms.maps = al.maps;
- ms.map = al.map;
+ ms.maps = maps__get(al.maps);
+ ms.map = map__get(al.map);
ms.sym = al.sym;
-
- if (!branch && append_inlines(cursor, &ms, ip) == 0)
- return 0;
-
srcline = callchain_srcline(&ms, al.addr);
err = callchain_cursor_append(cursor, ip, &ms,
branch, flags, nr_loop_iter,
iter_cycles, branch_from, srcline);
- map__put(al.map);
+out:
+ addr_location__exit(&al);
+ maps__put(ms.maps);
+ map__put(ms.map);
return err;
}
@@ -2504,7 +2516,7 @@ static void save_lbr_cursor_node(struct thread *thread,
struct callchain_cursor *cursor,
int idx)
{
- struct lbr_stitch *lbr_stitch = thread->lbr_stitch;
+ struct lbr_stitch *lbr_stitch = thread__lbr_stitch(thread);
if (!lbr_stitch)
return;
@@ -2546,7 +2558,7 @@ static int lbr_callchain_add_lbr_ip(struct thread *thread,
* in callchain_cursor_commit() when the writing session is closed.
* Using curr and pos to track the current cursor node.
*/
- if (thread->lbr_stitch) {
+ if (thread__lbr_stitch(thread)) {
cursor->curr = NULL;
cursor->pos = cursor->nr;
if (cursor->nr) {
@@ -2574,7 +2586,7 @@ static int lbr_callchain_add_lbr_ip(struct thread *thread,
* But does not need to save current cursor node for entry 0.
* It's impossible to stitch the whole LBRs of previous sample.
*/
- if (thread->lbr_stitch && (cursor->pos != cursor->nr)) {
+ if (thread__lbr_stitch(thread) && (cursor->pos != cursor->nr)) {
if (!cursor->curr)
cursor->curr = cursor->first;
else
@@ -2627,7 +2639,7 @@ static int lbr_callchain_add_lbr_ip(struct thread *thread,
static int lbr_callchain_add_stitched_lbr_ip(struct thread *thread,
struct callchain_cursor *cursor)
{
- struct lbr_stitch *lbr_stitch = thread->lbr_stitch;
+ struct lbr_stitch *lbr_stitch = thread__lbr_stitch(thread);
struct callchain_cursor_node *cnode;
struct stitch_list *stitch_node;
int err;
@@ -2651,7 +2663,7 @@ static int lbr_callchain_add_stitched_lbr_ip(struct thread *thread,
static struct stitch_list *get_stitch_node(struct thread *thread)
{
- struct lbr_stitch *lbr_stitch = thread->lbr_stitch;
+ struct lbr_stitch *lbr_stitch = thread__lbr_stitch(thread);
struct stitch_list *stitch_node;
if (!list_empty(&lbr_stitch->free_lists)) {
@@ -2675,7 +2687,7 @@ static bool has_stitched_lbr(struct thread *thread,
struct branch_entry *cur_entries = perf_sample__branch_entries(cur);
struct branch_stack *prev_stack = prev->branch_stack;
struct branch_entry *prev_entries = perf_sample__branch_entries(prev);
- struct lbr_stitch *lbr_stitch = thread->lbr_stitch;
+ struct lbr_stitch *lbr_stitch = thread__lbr_stitch(thread);
int i, j, nr_identical_branches = 0;
struct stitch_list *stitch_node;
u64 cur_base, distance;
@@ -2739,27 +2751,29 @@ static bool has_stitched_lbr(struct thread *thread,
static bool alloc_lbr_stitch(struct thread *thread, unsigned int max_lbr)
{
- if (thread->lbr_stitch)
+ if (thread__lbr_stitch(thread))
return true;
- thread->lbr_stitch = zalloc(sizeof(*thread->lbr_stitch));
- if (!thread->lbr_stitch)
+ thread__set_lbr_stitch(thread, zalloc(sizeof(struct lbr_stitch)));
+ if (!thread__lbr_stitch(thread))
goto err;
- thread->lbr_stitch->prev_lbr_cursor = calloc(max_lbr + 1, sizeof(struct callchain_cursor_node));
- if (!thread->lbr_stitch->prev_lbr_cursor)
+ thread__lbr_stitch(thread)->prev_lbr_cursor =
+ calloc(max_lbr + 1, sizeof(struct callchain_cursor_node));
+ if (!thread__lbr_stitch(thread)->prev_lbr_cursor)
goto free_lbr_stitch;
- INIT_LIST_HEAD(&thread->lbr_stitch->lists);
- INIT_LIST_HEAD(&thread->lbr_stitch->free_lists);
+ INIT_LIST_HEAD(&thread__lbr_stitch(thread)->lists);
+ INIT_LIST_HEAD(&thread__lbr_stitch(thread)->free_lists);
return true;
free_lbr_stitch:
- zfree(&thread->lbr_stitch);
+ free(thread__lbr_stitch(thread));
+ thread__set_lbr_stitch(thread, NULL);
err:
pr_warning("Failed to allocate space for stitched LBRs. Disable LBR stitch\n");
- thread->lbr_stitch_enable = false;
+ thread__set_lbr_stitch_enable(thread, false);
return false;
}
@@ -2795,9 +2809,9 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
if (i == chain_nr)
return 0;
- if (thread->lbr_stitch_enable && !sample->no_hw_idx &&
+ if (thread__lbr_stitch_enable(thread) && !sample->no_hw_idx &&
(max_lbr > 0) && alloc_lbr_stitch(thread, max_lbr)) {
- lbr_stitch = thread->lbr_stitch;
+ lbr_stitch = thread__lbr_stitch(thread);
stitched_lbr = has_stitched_lbr(thread, sample,
&lbr_stitch->prev_sample,
@@ -2877,7 +2891,7 @@ static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread,
static u64 get_leaf_frame_caller(struct perf_sample *sample,
struct thread *thread, int usr_idx)
{
- if (machine__normalized_is(maps__machine(thread->maps), "arm64"))
+ if (machine__normalized_is(maps__machine(thread__maps(thread)), "arm64"))
return get_leaf_frame_caller_aarch64(sample, thread, usr_idx);
else
return 0;
@@ -3072,6 +3086,7 @@ static int append_inlines(struct callchain_cursor *cursor, struct map_symbol *ms
struct dso *dso;
u64 addr;
int ret = 1;
+ struct map_symbol ilist_ms;
if (!symbol_conf.inline_name || !map || !sym)
return ret;
@@ -3088,18 +3103,20 @@ static int append_inlines(struct callchain_cursor *cursor, struct map_symbol *ms
inlines__tree_insert(&dso->inlined_nodes, inline_node);
}
+ ilist_ms = (struct map_symbol) {
+ .maps = maps__get(ms->maps),
+ .map = map__get(map),
+ };
list_for_each_entry(ilist, &inline_node->val, list) {
- struct map_symbol ilist_ms = {
- .maps = ms->maps,
- .map = map,
- .sym = ilist->symbol,
- };
+ ilist_ms.sym = ilist->symbol;
ret = callchain_cursor_append(cursor, ip, &ilist_ms, false,
NULL, 0, 0, 0, ilist->srcline);
if (ret != 0)
return ret;
}
+ map__put(ilist_ms.map);
+ maps__put(ilist_ms.maps);
return ret;
}
@@ -3158,6 +3175,9 @@ int thread__resolve_callchain(struct thread *thread,
{
int ret = 0;
+ if (cursor == NULL)
+ return -ENOMEM;
+
callchain_cursor_reset(cursor);
if (callchain_param.order == ORDER_CALLEE) {
@@ -3191,7 +3211,6 @@ int machine__for_each_thread(struct machine *machine,
{
struct threads *threads;
struct rb_node *nd;
- struct thread *thread;
int rc = 0;
int i;
@@ -3199,14 +3218,9 @@ int machine__for_each_thread(struct machine *machine,
threads = &machine->threads[i];
for (nd = rb_first_cached(&threads->entries); nd;
nd = rb_next(nd)) {
- thread = rb_entry(nd, struct thread, rb_node);
- rc = fn(thread, priv);
- if (rc != 0)
- return rc;
- }
+ struct thread_rb_node *trb = rb_entry(nd, struct thread_rb_node, rb_node);
- list_for_each_entry(thread, &threads->dead, node) {
- rc = fn(thread, priv);
+ rc = fn(trb->thread, priv);
if (rc != 0)
return rc;
}
@@ -3264,7 +3278,7 @@ int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid,
if (!thread)
return -ENOMEM;
- thread->cpu = cpu;
+ thread__set_cpu(thread, cpu);
thread__put(thread);
return 0;
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index b7f890950909..f64b83004421 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -137,7 +137,7 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
no_dso = is_no_dso_memory(filename);
map->prot = prot;
map->flags = flags;
- nsi = nsinfo__get(thread->nsinfo);
+ nsi = nsinfo__get(thread__nsinfo(thread));
if ((anon || no_dso) && nsi && (prot & PROT_EXEC)) {
snprintf(newfilename, sizeof(newfilename),
@@ -390,7 +390,7 @@ struct symbol *map__find_symbol(struct map *map, u64 addr)
return dso__find_symbol(map__dso(map), addr);
}
-struct symbol *map__find_symbol_by_name(struct map *map, const char *name)
+struct symbol *map__find_symbol_by_name_idx(struct map *map, const char *name, size_t *idx)
{
struct dso *dso;
@@ -398,10 +398,16 @@ struct symbol *map__find_symbol_by_name(struct map *map, const char *name)
return NULL;
dso = map__dso(map);
- if (!dso__sorted_by_name(dso))
- dso__sort_by_name(dso);
+ dso__sort_by_name(dso);
+
+ return dso__find_symbol_by_name(dso, name, idx);
+}
+
+struct symbol *map__find_symbol_by_name(struct map *map, const char *name)
+{
+ size_t idx;
- return dso__find_symbol_by_name(dso, name);
+ return map__find_symbol_by_name_idx(map, name, &idx);
}
struct map *map__clone(struct map *from)
@@ -431,14 +437,21 @@ size_t map__fprintf(struct map *map, FILE *fp)
map__start(map), map__end(map), map__pgoff(map), dso->name);
}
-size_t map__fprintf_dsoname(struct map *map, FILE *fp)
+static bool prefer_dso_long_name(const struct dso *dso, bool print_off)
+{
+ return dso->long_name &&
+ (symbol_conf.show_kernel_path ||
+ (print_off && (dso->name[0] == '[' || dso__is_kcore(dso))));
+}
+
+static size_t __map__fprintf_dsoname(struct map *map, bool print_off, FILE *fp)
{
char buf[symbol_conf.pad_output_len_dso + 1];
const char *dsoname = "[unknown]";
const struct dso *dso = map ? map__dso(map) : NULL;
if (dso) {
- if (symbol_conf.show_kernel_path && dso->long_name)
+ if (prefer_dso_long_name(dso, print_off))
dsoname = dso->long_name;
else
dsoname = dso->name;
@@ -452,6 +465,27 @@ size_t map__fprintf_dsoname(struct map *map, FILE *fp)
return fprintf(fp, "%s", dsoname);
}
+size_t map__fprintf_dsoname(struct map *map, FILE *fp)
+{
+ return __map__fprintf_dsoname(map, false, fp);
+}
+
+size_t map__fprintf_dsoname_dsoff(struct map *map, bool print_off, u64 addr, FILE *fp)
+{
+ const struct dso *dso = map ? map__dso(map) : NULL;
+ int printed = 0;
+
+ if (print_off && (!dso || !dso__is_object_file(dso)))
+ print_off = false;
+ printed += fprintf(fp, " (");
+ printed += __map__fprintf_dsoname(map, print_off, fp);
+ if (print_off)
+ printed += fprintf(fp, "+0x%" PRIx64, addr);
+ printed += fprintf(fp, ")");
+
+ return printed;
+}
+
char *map__srcline(struct map *map, u64 addr, struct symbol *sym)
{
if (map == NULL)
@@ -468,9 +502,9 @@ int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix,
if (dso) {
char *srcline = map__srcline(map, addr, NULL);
- if (strncmp(srcline, SRCLINE_UNKNOWN, strlen(SRCLINE_UNKNOWN)) != 0)
+ if (srcline != SRCLINE_UNKNOWN)
ret = fprintf(fp, "%s%s", prefix, srcline);
- free_srcline(srcline);
+ zfree_srcline(&srcline);
}
return ret;
}
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index 823ab7fc0acf..1b53d53adc86 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -148,16 +148,17 @@ struct thread;
* @map: the 'struct map *' in which symbols are iterated
* @sym_name: the symbol name
* @pos: the 'struct symbol *' to use as a loop cursor
+ * @idx: the cursor index in the symbol names array
*/
-#define __map__for_each_symbol_by_name(map, sym_name, pos) \
- for (pos = map__find_symbol_by_name(map, sym_name); \
+#define __map__for_each_symbol_by_name(map, sym_name, pos, idx) \
+ for (pos = map__find_symbol_by_name_idx(map, sym_name, &idx); \
pos && \
!symbol__match_symbol_name(pos->name, sym_name, \
SYMBOL_TAG_INCLUDE__DEFAULT_ONLY); \
- pos = symbol__next_by_name(pos))
+ pos = dso__next_symbol_by_name(map__dso(map), &idx))
-#define map__for_each_symbol_by_name(map, sym_name, pos) \
- __map__for_each_symbol_by_name(map, sym_name, (pos))
+#define map__for_each_symbol_by_name(map, sym_name, pos, idx) \
+ __map__for_each_symbol_by_name(map, sym_name, (pos), idx)
void map__init(struct map *map,
u64 start, u64 end, u64 pgoff, struct dso *dso);
@@ -194,6 +195,7 @@ static inline void __map__zput(struct map **map)
size_t map__fprintf(struct map *map, FILE *fp);
size_t map__fprintf_dsoname(struct map *map, FILE *fp);
+size_t map__fprintf_dsoname_dsoff(struct map *map, bool print_off, u64 addr, FILE *fp);
char *map__srcline(struct map *map, u64 addr, struct symbol *sym);
int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix,
FILE *fp);
@@ -201,6 +203,7 @@ int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix,
int map__load(struct map *map);
struct symbol *map__find_symbol(struct map *map, u64 addr);
struct symbol *map__find_symbol_by_name(struct map *map, const char *name);
+struct symbol *map__find_symbol_by_name_idx(struct map *map, const char *name, size_t *idx);
void map__fixup_start(struct map *map);
void map__fixup_end(struct map *map);
diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c
index 1aeb1db58fe5..233438c95b53 100644
--- a/tools/perf/util/maps.c
+++ b/tools/perf/util/maps.c
@@ -171,7 +171,7 @@ struct maps *maps__new(struct machine *machine)
return result;
}
-void maps__delete(struct maps *maps)
+static void maps__delete(struct maps *maps)
{
maps__exit(maps);
unwind__finish_access(maps);
@@ -374,6 +374,7 @@ int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp)
}
put_map:
map__put(pos->map);
+ free(pos);
}
up_write(maps__lock(maps));
return err;
@@ -384,7 +385,7 @@ put_map:
*/
int maps__clone(struct thread *thread, struct maps *parent)
{
- struct maps *maps = thread->maps;
+ struct maps *maps = thread__maps(thread);
int err;
struct map_rb_node *rb_node;
diff --git a/tools/perf/util/maps.h b/tools/perf/util/maps.h
index d2963456cfbe..83144e0645ed 100644
--- a/tools/perf/util/maps.h
+++ b/tools/perf/util/maps.h
@@ -57,13 +57,20 @@ struct kmap {
};
struct maps *maps__new(struct machine *machine);
-void maps__delete(struct maps *maps);
bool maps__empty(struct maps *maps);
int maps__clone(struct thread *thread, struct maps *parent);
struct maps *maps__get(struct maps *maps);
void maps__put(struct maps *maps);
+static inline void __maps__zput(struct maps **map)
+{
+ maps__put(*map);
+ *map = NULL;
+}
+
+#define maps__zput(map) __maps__zput(&map)
+
static inline struct rb_root *maps__entries(struct maps *maps)
{
return &RC_CHK_ACCESS(maps)->entries;
diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
index ed1ee4b05356..c07fe3a90722 100644
--- a/tools/perf/util/mem-events.c
+++ b/tools/perf/util/mem-events.c
@@ -13,7 +13,7 @@
#include "debug.h"
#include "symbol.h"
#include "pmu.h"
-#include "pmu-hybrid.h"
+#include "pmus.h"
unsigned int perf_mem_events__loads_ldlat = 30;
@@ -120,8 +120,8 @@ int perf_mem_events__init(void)
for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
struct perf_mem_event *e = perf_mem_events__ptr(j);
- struct perf_pmu *pmu;
char sysfs_name[100];
+ struct perf_pmu *pmu = NULL;
/*
* If the event entry isn't valid, skip initialization
@@ -130,16 +130,14 @@ int perf_mem_events__init(void)
if (!e->tag)
continue;
- if (!perf_pmu__has_hybrid()) {
- scnprintf(sysfs_name, sizeof(sysfs_name),
- e->sysfs_name, "cpu");
- e->supported = perf_mem_event__supported(mnt, sysfs_name);
- } else {
- perf_pmu__for_each_hybrid_pmu(pmu) {
- scnprintf(sysfs_name, sizeof(sysfs_name),
- e->sysfs_name, pmu->name);
- e->supported |= perf_mem_event__supported(mnt, sysfs_name);
- }
+ /*
+ * Scan all PMUs not just core ones, since perf mem/c2c on
+ * platforms like AMD uses IBS OP PMU which is independent
+ * of core PMU.
+ */
+ while ((pmu = perf_pmus__scan(pmu)) != NULL) {
+ scnprintf(sysfs_name, sizeof(sysfs_name), e->sysfs_name, pmu->name);
+ e->supported |= perf_mem_event__supported(mnt, sysfs_name);
}
if (e->supported)
@@ -170,9 +168,9 @@ static void perf_mem_events__print_unsupport_hybrid(struct perf_mem_event *e,
{
const char *mnt = sysfs__mount();
char sysfs_name[100];
- struct perf_pmu *pmu;
+ struct perf_pmu *pmu = NULL;
- perf_pmu__for_each_hybrid_pmu(pmu) {
+ while ((pmu = perf_pmus__scan(pmu)) != NULL) {
scnprintf(sysfs_name, sizeof(sysfs_name), e->sysfs_name,
pmu->name);
if (!perf_mem_event__supported(mnt, sysfs_name)) {
@@ -195,7 +193,7 @@ int perf_mem_events__record_args(const char **rec_argv, int *argv_nr,
if (!e->record)
continue;
- if (!perf_pmu__has_hybrid()) {
+ if (perf_pmus__num_mem_pmus() == 1) {
if (!e->supported) {
pr_err("failed: event '%s' not supported\n",
perf_mem_events__name(j, NULL));
@@ -210,7 +208,7 @@ int perf_mem_events__record_args(const char **rec_argv, int *argv_nr,
return -1;
}
- perf_pmu__for_each_hybrid_pmu(pmu) {
+ while ((pmu = perf_pmus__scan(pmu)) != NULL) {
rec_argv[i++] = "-e";
s = perf_mem_events__name(j, pmu->name);
if (s) {
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index 5e9c657dd3f7..a6a5ed44a679 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -11,7 +11,7 @@
#include "evsel.h"
#include "strbuf.h"
#include "pmu.h"
-#include "pmu-hybrid.h"
+#include "pmus.h"
#include "print-events.h"
#include "smt.h"
#include "expr.h"
@@ -79,6 +79,7 @@ static struct rb_node *metric_event_new(struct rblist *rblist __maybe_unused,
return NULL;
memcpy(me, entry, sizeof(struct metric_event));
me->evsel = ((struct metric_event *)entry)->evsel;
+ me->is_default = false;
INIT_LIST_HEAD(&me->head);
return &me->nd;
}
@@ -123,6 +124,7 @@ struct metric {
* within the expression.
*/
struct expr_parse_ctx *pctx;
+ const char *pmu;
/** The name of the metric such as "IPC". */
const char *metric_name;
/** Modifier on the metric such as "u" or NULL for none. */
@@ -136,6 +138,11 @@ struct metric {
* output.
*/
const char *metric_unit;
+ /**
+ * Optional name of the metric group reported
+ * if the Default metric group is being processed.
+ */
+ const char *default_metricgroup_name;
/** Optional null terminated array of referenced metrics. */
struct metric_ref *metric_refs;
/**
@@ -216,7 +223,9 @@ static struct metric *metric__new(const struct pmu_metric *pm,
if (!m->pctx)
goto out_err;
+ m->pmu = pm->pmu ?: "cpu";
m->metric_name = pm->metric_name;
+ m->default_metricgroup_name = pm->default_metricgroup_name;
m->modifier = NULL;
if (modifier) {
m->modifier = strdup(modifier);
@@ -259,11 +268,12 @@ static bool contains_metric_id(struct evsel **metric_events, int num_events,
/**
* setup_metric_events - Find a group of events in metric_evlist that correspond
* to the IDs from a parsed metric expression.
+ * @pmu: The PMU for the IDs.
* @ids: the metric IDs to match.
* @metric_evlist: the list of perf events.
* @out_metric_events: holds the created metric events array.
*/
-static int setup_metric_events(struct hashmap *ids,
+static int setup_metric_events(const char *pmu, struct hashmap *ids,
struct evlist *metric_evlist,
struct evsel ***out_metric_events)
{
@@ -271,6 +281,7 @@ static int setup_metric_events(struct hashmap *ids,
const char *metric_id;
struct evsel *ev;
size_t ids_size, matched_events, i;
+ bool all_pmus = !strcmp(pmu, "all") || perf_pmus__num_core_pmus() == 1 || !is_pmu_core(pmu);
*out_metric_events = NULL;
ids_size = hashmap__size(ids);
@@ -283,6 +294,10 @@ static int setup_metric_events(struct hashmap *ids,
evlist__for_each_entry(metric_evlist, ev) {
struct expr_id_data *val_ptr;
+ /* Don't match events for the wrong hybrid PMU. */
+ if (!all_pmus && ev->pmu_name && evsel__is_hybrid(ev) &&
+ strcmp(ev->pmu_name, pmu))
+ continue;
/*
* Check for duplicate events with the same name. For
* example, uncore_imc/cas_count_read/ will turn into 6
@@ -298,6 +313,7 @@ static int setup_metric_events(struct hashmap *ids,
* about this event.
*/
if (hashmap__find(ids, metric_id, &val_ptr)) {
+ pr_debug("Matched metric-id %s to %s\n", metric_id, evsel__name(ev));
metric_events[matched_events++] = ev;
if (matched_events >= ids_size)
@@ -355,8 +371,13 @@ static bool match_metric(const char *n, const char *list)
return false;
}
-static bool match_pm_metric(const struct pmu_metric *pm, const char *metric)
+static bool match_pm_metric(const struct pmu_metric *pm, const char *pmu, const char *metric)
{
+ const char *pm_pmu = pm->pmu ?: "cpu";
+
+ if (strcmp(pmu, "all") && strcmp(pm_pmu, pmu))
+ return false;
+
return match_metric(pm->metric_group, metric) ||
match_metric(pm->metric_name, metric);
}
@@ -475,7 +496,7 @@ static int metricgroup__sys_event_iter(const struct pmu_metric *pm,
if (!pm->metric_expr || !pm->compat)
return 0;
- while ((pmu = perf_pmu__scan(pmu))) {
+ while ((pmu = perf_pmus__scan(pmu))) {
if (!pmu->id || strcmp(pmu->id, pm->compat))
continue;
@@ -766,6 +787,7 @@ struct visited_metric {
struct metricgroup_add_iter_data {
struct list_head *metric_list;
+ const char *pmu;
const char *metric_name;
const char *modifier;
int *ret;
@@ -779,7 +801,8 @@ struct metricgroup_add_iter_data {
const struct pmu_metrics_table *table;
};
-static bool metricgroup__find_metric(const char *metric,
+static bool metricgroup__find_metric(const char *pmu,
+ const char *metric,
const struct pmu_metrics_table *table,
struct pmu_metric *pm);
@@ -798,6 +821,7 @@ static int add_metric(struct list_head *metric_list,
* resolve_metric - Locate metrics within the root metric and recursively add
* references to them.
* @metric_list: The list the metric is added to.
+ * @pmu: The PMU name to resolve metrics on, or "all" for all PMUs.
* @modifier: if non-null event modifiers like "u".
* @metric_no_group: Should events written to events be grouped "{}" or
* global. Grouping is the default but due to multiplexing the
@@ -813,6 +837,7 @@ static int add_metric(struct list_head *metric_list,
* architecture perf is running upon.
*/
static int resolve_metric(struct list_head *metric_list,
+ const char *pmu,
const char *modifier,
bool metric_no_group,
bool metric_no_threshold,
@@ -842,7 +867,7 @@ static int resolve_metric(struct list_head *metric_list,
hashmap__for_each_entry(root_metric->pctx->ids, cur, bkt) {
struct pmu_metric pm;
- if (metricgroup__find_metric(cur->pkey, table, &pm)) {
+ if (metricgroup__find_metric(pmu, cur->pkey, table, &pm)) {
pending = realloc(pending,
(pending_cnt + 1) * sizeof(struct to_resolve));
if (!pending)
@@ -993,9 +1018,12 @@ static int __add_metric(struct list_head *metric_list,
}
if (!ret) {
/* Resolve referenced metrics. */
- ret = resolve_metric(metric_list, modifier, metric_no_group,
+ const char *pmu = pm->pmu ?: "cpu";
+
+ ret = resolve_metric(metric_list, pmu, modifier, metric_no_group,
metric_no_threshold, user_requested_cpu_list,
- system_wide, root_metric, &visited_node, table);
+ system_wide, root_metric, &visited_node,
+ table);
}
if (ret) {
if (is_root)
@@ -1008,6 +1036,7 @@ static int __add_metric(struct list_head *metric_list,
}
struct metricgroup__find_metric_data {
+ const char *pmu;
const char *metric;
struct pmu_metric *pm;
};
@@ -1017,6 +1046,10 @@ static int metricgroup__find_metric_callback(const struct pmu_metric *pm,
void *vdata)
{
struct metricgroup__find_metric_data *data = vdata;
+ const char *pm_pmu = pm->pmu ?: "cpu";
+
+ if (strcmp(data->pmu, "all") && strcmp(pm_pmu, data->pmu))
+ return 0;
if (!match_metric(pm->metric_name, data->metric))
return 0;
@@ -1025,11 +1058,13 @@ static int metricgroup__find_metric_callback(const struct pmu_metric *pm,
return 1;
}
-static bool metricgroup__find_metric(const char *metric,
+static bool metricgroup__find_metric(const char *pmu,
+ const char *metric,
const struct pmu_metrics_table *table,
struct pmu_metric *pm)
{
struct metricgroup__find_metric_data data = {
+ .pmu = pmu,
.metric = metric,
.pm = pm,
};
@@ -1083,7 +1118,7 @@ static int metricgroup__add_metric_sys_event_iter(const struct pmu_metric *pm,
struct metricgroup_add_iter_data *d = data;
int ret;
- if (!match_pm_metric(pm, d->metric_name))
+ if (!match_pm_metric(pm, d->pmu, d->metric_name))
return 0;
ret = add_metric(d->metric_list, pm, d->modifier, d->metric_no_group,
@@ -1126,8 +1161,28 @@ static int metric_list_cmp(void *priv __maybe_unused, const struct list_head *l,
return right_count - left_count;
}
+/**
+ * default_metricgroup_cmp - Implements complex key for the Default metricgroup
+ * that first sorts by default_metricgroup_name, then
+ * metric_name.
+ */
+static int default_metricgroup_cmp(void *priv __maybe_unused,
+ const struct list_head *l,
+ const struct list_head *r)
+{
+ const struct metric *left = container_of(l, struct metric, nd);
+ const struct metric *right = container_of(r, struct metric, nd);
+ int diff = strcmp(right->default_metricgroup_name, left->default_metricgroup_name);
+
+ if (diff)
+ return diff;
+
+ return strcmp(right->metric_name, left->metric_name);
+}
+
struct metricgroup__add_metric_data {
struct list_head *list;
+ const char *pmu;
const char *metric_name;
const char *modifier;
const char *user_requested_cpu_list;
@@ -1144,9 +1199,9 @@ static int metricgroup__add_metric_callback(const struct pmu_metric *pm,
struct metricgroup__add_metric_data *data = vdata;
int ret = 0;
- if (pm->metric_expr && match_pm_metric(pm, data->metric_name)) {
+ if (pm->metric_expr && match_pm_metric(pm, data->pmu, data->metric_name)) {
bool metric_no_group = data->metric_no_group ||
- match_metric(data->metric_name, pm->metricgroup_no_group);
+ match_metric(pm->metricgroup_no_group, data->metric_name);
data->has_match = true;
ret = add_metric(data->list, pm, data->modifier, metric_no_group,
@@ -1159,6 +1214,7 @@ static int metricgroup__add_metric_callback(const struct pmu_metric *pm,
/**
* metricgroup__add_metric - Find and add a metric, or a metric group.
+ * @pmu: The PMU name to search for metrics on, or "all" for all PMUs.
* @metric_name: The name of the metric or metric group. For example, "IPC"
* could be the name of a metric and "TopDownL1" the name of a
* metric group.
@@ -1172,7 +1228,7 @@ static int metricgroup__add_metric_callback(const struct pmu_metric *pm,
* @table: The table that is searched for metrics, most commonly the table for the
* architecture perf is running upon.
*/
-static int metricgroup__add_metric(const char *metric_name, const char *modifier,
+static int metricgroup__add_metric(const char *pmu, const char *metric_name, const char *modifier,
bool metric_no_group, bool metric_no_threshold,
const char *user_requested_cpu_list,
bool system_wide,
@@ -1186,6 +1242,7 @@ static int metricgroup__add_metric(const char *metric_name, const char *modifier
{
struct metricgroup__add_metric_data data = {
.list = &list,
+ .pmu = pmu,
.metric_name = metric_name,
.modifier = modifier,
.metric_no_group = metric_no_group,
@@ -1210,6 +1267,7 @@ static int metricgroup__add_metric(const char *metric_name, const char *modifier
.fn = metricgroup__add_metric_sys_event_iter,
.data = (void *) &(struct metricgroup_add_iter_data) {
.metric_list = &list,
+ .pmu = pmu,
.metric_name = metric_name,
.modifier = modifier,
.metric_no_group = metric_no_group,
@@ -1239,6 +1297,7 @@ out:
/**
* metricgroup__add_metric_list - Find and add metrics, or metric groups,
* specified in a list.
+ * @pmu: A pmu to restrict the metrics to, or "all" for all PMUS.
* @list: the list of metrics or metric groups. For example, "IPC,CPI,TopDownL1"
* would match the IPC and CPI metrics, and TopDownL1 would match all
* the metrics in the TopDownL1 group.
@@ -1251,7 +1310,8 @@ out:
* @table: The table that is searched for metrics, most commonly the table for the
* architecture perf is running upon.
*/
-static int metricgroup__add_metric_list(const char *list, bool metric_no_group,
+static int metricgroup__add_metric_list(const char *pmu, const char *list,
+ bool metric_no_group,
bool metric_no_threshold,
const char *user_requested_cpu_list,
bool system_wide, struct list_head *metric_list,
@@ -1270,7 +1330,7 @@ static int metricgroup__add_metric_list(const char *list, bool metric_no_group,
if (modifier)
*modifier++ = '\0';
- ret = metricgroup__add_metric(metric_name, modifier,
+ ret = metricgroup__add_metric(pmu, metric_name, modifier,
metric_no_group, metric_no_threshold,
user_requested_cpu_list,
system_wide, metric_list, table);
@@ -1441,8 +1501,8 @@ static int parse_ids(bool metric_no_merge, struct perf_pmu *fake_pmu,
}
pr_debug("Parsing metric events '%s'\n", events.buf);
parse_events_error__init(&parse_error);
- ret = __parse_events(parsed_evlist, events.buf, &parse_error, fake_pmu,
- /*warn_if_reordered=*/false);
+ ret = __parse_events(parsed_evlist, events.buf, /*pmu_filter=*/NULL,
+ &parse_error, fake_pmu, /*warn_if_reordered=*/false);
if (ret) {
parse_events_error__print(&parse_error, events.buf);
goto err_out;
@@ -1460,7 +1520,8 @@ err_out:
return ret;
}
-static int parse_groups(struct evlist *perf_evlist, const char *str,
+static int parse_groups(struct evlist *perf_evlist,
+ const char *pmu, const char *str,
bool metric_no_group,
bool metric_no_merge,
bool metric_no_threshold,
@@ -1474,11 +1535,12 @@ static int parse_groups(struct evlist *perf_evlist, const char *str,
LIST_HEAD(metric_list);
struct metric *m;
bool tool_events[PERF_TOOL_MAX] = {false};
+ bool is_default = !strcmp(str, "Default");
int ret;
if (metric_events_list->nr_entries == 0)
metricgroup__rblist_init(metric_events_list);
- ret = metricgroup__add_metric_list(str, metric_no_group, metric_no_threshold,
+ ret = metricgroup__add_metric_list(pmu, str, metric_no_group, metric_no_threshold,
user_requested_cpu_list,
system_wide, &metric_list, table);
if (ret)
@@ -1508,6 +1570,9 @@ static int parse_groups(struct evlist *perf_evlist, const char *str,
goto out;
}
+ if (is_default)
+ list_sort(NULL, &metric_list, default_metricgroup_cmp);
+
list_for_each_entry(m, &metric_list, nd) {
struct metric_event *me;
struct evsel **metric_events;
@@ -1535,6 +1600,11 @@ static int parse_groups(struct evlist *perf_evlist, const char *str,
strcmp(m->modifier, n->modifier)))
continue;
+ if ((!m->pmu && n->pmu) ||
+ (m->pmu && !n->pmu) ||
+ (m->pmu && n->pmu && strcmp(m->pmu, n->pmu)))
+ continue;
+
if (expr__subset_of_ids(n->pctx, m->pctx)) {
pr_debug("Events in '%s' fully contained within '%s'\n",
m->metric_name, n->metric_name);
@@ -1552,9 +1622,10 @@ static int parse_groups(struct evlist *perf_evlist, const char *str,
metric_evlist = m->evlist;
}
- ret = setup_metric_events(m->pctx->ids, metric_evlist, &metric_events);
+ ret = setup_metric_events(fake_pmu ? "all" : m->pmu, m->pctx->ids,
+ metric_evlist, &metric_events);
if (ret) {
- pr_debug("Cannot resolve IDs for %s: %s\n",
+ pr_err("Cannot resolve IDs for %s: %s\n",
m->metric_name, m->metric_expr);
goto out;
}
@@ -1590,6 +1661,8 @@ static int parse_groups(struct evlist *perf_evlist, const char *str,
expr->metric_unit = m->metric_unit;
expr->metric_events = metric_events;
expr->runtime = m->pctx->sctx.runtime;
+ expr->default_metricgroup_name = m->default_metricgroup_name;
+ me->is_default = is_default;
list_add(&expr->nd, &me->head);
}
@@ -1610,6 +1683,7 @@ out:
}
int metricgroup__parse_groups(struct evlist *perf_evlist,
+ const char *pmu,
const char *str,
bool metric_no_group,
bool metric_no_merge,
@@ -1623,7 +1697,7 @@ int metricgroup__parse_groups(struct evlist *perf_evlist,
if (!table)
return -EINVAL;
- return parse_groups(perf_evlist, str, metric_no_group, metric_no_merge,
+ return parse_groups(perf_evlist, pmu, str, metric_no_group, metric_no_merge,
metric_no_threshold, user_requested_cpu_list, system_wide,
/*fake_pmu=*/NULL, metric_events, table);
}
@@ -1633,7 +1707,7 @@ int metricgroup__parse_groups_test(struct evlist *evlist,
const char *str,
struct rblist *metric_events)
{
- return parse_groups(evlist, str,
+ return parse_groups(evlist, "all", str,
/*metric_no_group=*/false,
/*metric_no_merge=*/false,
/*metric_no_threshold=*/false,
@@ -1642,28 +1716,32 @@ int metricgroup__parse_groups_test(struct evlist *evlist,
&perf_pmu__fake, metric_events, table);
}
+struct metricgroup__has_metric_data {
+ const char *pmu;
+ const char *metric;
+};
static int metricgroup__has_metric_callback(const struct pmu_metric *pm,
const struct pmu_metrics_table *table __maybe_unused,
void *vdata)
{
- const char *metric = vdata;
+ struct metricgroup__has_metric_data *data = vdata;
- if (match_metric(pm->metric_name, metric) ||
- match_metric(pm->metric_group, metric))
- return 1;
-
- return 0;
+ return match_pm_metric(pm, data->pmu, data->metric) ? 1 : 0;
}
-bool metricgroup__has_metric(const char *metric)
+bool metricgroup__has_metric(const char *pmu, const char *metric)
{
const struct pmu_metrics_table *table = pmu_metrics_table__find();
+ struct metricgroup__has_metric_data data = {
+ .pmu = pmu,
+ .metric = metric,
+ };
if (!table)
return false;
- return pmu_metrics_table_for_each_metric(table, metricgroup__has_metric_callback,
- (void *)metric) ? true : false;
+ return pmu_metrics_table_for_each_metric(table, metricgroup__has_metric_callback, &data)
+ ? true : false;
}
static int metricgroup__topdown_max_level_callback(const struct pmu_metric *pm,
diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h
index 77472e35705e..d5325c6ec8e1 100644
--- a/tools/perf/util/metricgroup.h
+++ b/tools/perf/util/metricgroup.h
@@ -22,6 +22,7 @@ struct cgroup;
struct metric_event {
struct rb_node nd;
struct evsel *evsel;
+ bool is_default; /* the metric evsel from the Default metricgroup */
struct list_head head; /* list of metric_expr */
};
@@ -55,6 +56,8 @@ struct metric_expr {
* more human intelligible) and then add "MiB" afterward when displayed.
*/
const char *metric_unit;
+ /** Displayed metricgroup name of the Default metricgroup */
+ const char *default_metricgroup_name;
/** Null terminated array of events used by the metric. */
struct evsel **metric_events;
/** Null terminated array of referenced metrics. */
@@ -67,6 +70,7 @@ struct metric_event *metricgroup__lookup(struct rblist *metric_events,
struct evsel *evsel,
bool create);
int metricgroup__parse_groups(struct evlist *perf_evlist,
+ const char *pmu,
const char *str,
bool metric_no_group,
bool metric_no_merge,
@@ -80,7 +84,7 @@ int metricgroup__parse_groups_test(struct evlist *evlist,
struct rblist *metric_events);
void metricgroup__print(const struct print_callbacks *print_cb, void *print_state);
-bool metricgroup__has_metric(const char *metric);
+bool metricgroup__has_metric(const char *pmu, const char *metric);
unsigned int metricgroups__topdown_max_level(void);
int arch_get_runtimeparam(const struct pmu_metric *pm);
void metricgroup__rblist_exit(struct rblist *metric_events);
diff --git a/tools/perf/util/parse-events-hybrid.c b/tools/perf/util/parse-events-hybrid.c
deleted file mode 100644
index 7c9f9150bad5..000000000000
--- a/tools/perf/util/parse-events-hybrid.c
+++ /dev/null
@@ -1,214 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/err.h>
-#include <linux/zalloc.h>
-#include <errno.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <sys/param.h>
-#include "evlist.h"
-#include "evsel.h"
-#include "parse-events.h"
-#include "parse-events-hybrid.h"
-#include "debug.h"
-#include "pmu.h"
-#include "pmu-hybrid.h"
-#include "perf.h"
-
-static void config_hybrid_attr(struct perf_event_attr *attr,
- int type, int pmu_type)
-{
- /*
- * attr.config layout for type PERF_TYPE_HARDWARE and
- * PERF_TYPE_HW_CACHE
- *
- * PERF_TYPE_HARDWARE: 0xEEEEEEEE000000AA
- * AA: hardware event ID
- * EEEEEEEE: PMU type ID
- * PERF_TYPE_HW_CACHE: 0xEEEEEEEE00DDCCBB
- * BB: hardware cache ID
- * CC: hardware cache op ID
- * DD: hardware cache op result ID
- * EEEEEEEE: PMU type ID
- * If the PMU type ID is 0, the PERF_TYPE_RAW will be applied.
- */
- attr->type = type;
- attr->config = (attr->config & PERF_HW_EVENT_MASK) |
- ((__u64)pmu_type << PERF_PMU_TYPE_SHIFT);
-}
-
-static int create_event_hybrid(__u32 config_type, int *idx,
- struct list_head *list,
- struct perf_event_attr *attr, const char *name,
- const char *metric_id,
- struct list_head *config_terms,
- struct perf_pmu *pmu)
-{
- struct evsel *evsel;
- __u32 type = attr->type;
- __u64 config = attr->config;
-
- config_hybrid_attr(attr, config_type, pmu->type);
-
- /*
- * Some hybrid hardware cache events are only available on one CPU
- * PMU. For example, the 'L1-dcache-load-misses' is only available
- * on cpu_core, while the 'L1-icache-loads' is only available on
- * cpu_atom. We need to remove "not supported" hybrid cache events.
- */
- if (attr->type == PERF_TYPE_HW_CACHE
- && !is_event_supported(attr->type, attr->config))
- return 0;
-
- evsel = parse_events__add_event_hybrid(list, idx, attr, name, metric_id,
- pmu, config_terms);
- if (evsel) {
- evsel->pmu_name = strdup(pmu->name);
- if (!evsel->pmu_name)
- return -ENOMEM;
- } else
- return -ENOMEM;
- attr->type = type;
- attr->config = config;
- return 0;
-}
-
-static int pmu_cmp(struct parse_events_state *parse_state,
- struct perf_pmu *pmu)
-{
- if (parse_state->evlist && parse_state->evlist->hybrid_pmu_name)
- return strcmp(parse_state->evlist->hybrid_pmu_name, pmu->name);
-
- if (parse_state->hybrid_pmu_name)
- return strcmp(parse_state->hybrid_pmu_name, pmu->name);
-
- return 0;
-}
-
-static int add_hw_hybrid(struct parse_events_state *parse_state,
- struct list_head *list, struct perf_event_attr *attr,
- const char *name, const char *metric_id,
- struct list_head *config_terms)
-{
- struct perf_pmu *pmu;
- int ret;
-
- perf_pmu__for_each_hybrid_pmu(pmu) {
- LIST_HEAD(terms);
-
- if (pmu_cmp(parse_state, pmu))
- continue;
-
- copy_config_terms(&terms, config_terms);
- ret = create_event_hybrid(PERF_TYPE_HARDWARE,
- &parse_state->idx, list, attr, name,
- metric_id, &terms, pmu);
- free_config_terms(&terms);
- if (ret)
- return ret;
- }
-
- return 0;
-}
-
-static int create_raw_event_hybrid(int *idx, struct list_head *list,
- struct perf_event_attr *attr,
- const char *name,
- const char *metric_id,
- struct list_head *config_terms,
- struct perf_pmu *pmu)
-{
- struct evsel *evsel;
-
- attr->type = pmu->type;
- evsel = parse_events__add_event_hybrid(list, idx, attr, name, metric_id,
- pmu, config_terms);
- if (evsel)
- evsel->pmu_name = strdup(pmu->name);
- else
- return -ENOMEM;
-
- return 0;
-}
-
-static int add_raw_hybrid(struct parse_events_state *parse_state,
- struct list_head *list, struct perf_event_attr *attr,
- const char *name, const char *metric_id,
- struct list_head *config_terms)
-{
- struct perf_pmu *pmu;
- int ret;
-
- perf_pmu__for_each_hybrid_pmu(pmu) {
- LIST_HEAD(terms);
-
- if (pmu_cmp(parse_state, pmu))
- continue;
-
- copy_config_terms(&terms, config_terms);
- ret = create_raw_event_hybrid(&parse_state->idx, list, attr,
- name, metric_id, &terms, pmu);
- free_config_terms(&terms);
- if (ret)
- return ret;
- }
-
- return 0;
-}
-
-int parse_events__add_numeric_hybrid(struct parse_events_state *parse_state,
- struct list_head *list,
- struct perf_event_attr *attr,
- const char *name, const char *metric_id,
- struct list_head *config_terms,
- bool *hybrid)
-{
- *hybrid = false;
- if (attr->type == PERF_TYPE_SOFTWARE)
- return 0;
-
- if (!perf_pmu__has_hybrid())
- return 0;
-
- *hybrid = true;
- if (attr->type != PERF_TYPE_RAW) {
- return add_hw_hybrid(parse_state, list, attr, name, metric_id,
- config_terms);
- }
-
- return add_raw_hybrid(parse_state, list, attr, name, metric_id,
- config_terms);
-}
-
-int parse_events__add_cache_hybrid(struct list_head *list, int *idx,
- struct perf_event_attr *attr,
- const char *name,
- const char *metric_id,
- struct list_head *config_terms,
- bool *hybrid,
- struct parse_events_state *parse_state)
-{
- struct perf_pmu *pmu;
- int ret;
-
- *hybrid = false;
- if (!perf_pmu__has_hybrid())
- return 0;
-
- *hybrid = true;
- perf_pmu__for_each_hybrid_pmu(pmu) {
- LIST_HEAD(terms);
-
- if (pmu_cmp(parse_state, pmu))
- continue;
-
- copy_config_terms(&terms, config_terms);
- ret = create_event_hybrid(PERF_TYPE_HW_CACHE, idx, list,
- attr, name, metric_id, &terms, pmu);
- free_config_terms(&terms);
- if (ret)
- return ret;
- }
-
- return 0;
-}
diff --git a/tools/perf/util/parse-events-hybrid.h b/tools/perf/util/parse-events-hybrid.h
deleted file mode 100644
index cbc05fec02a2..000000000000
--- a/tools/perf/util/parse-events-hybrid.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __PERF_PARSE_EVENTS_HYBRID_H
-#define __PERF_PARSE_EVENTS_HYBRID_H
-
-#include <linux/list.h>
-#include <stdbool.h>
-#include <linux/types.h>
-#include <linux/perf_event.h>
-#include <string.h>
-
-int parse_events__add_numeric_hybrid(struct parse_events_state *parse_state,
- struct list_head *list,
- struct perf_event_attr *attr,
- const char *name, const char *metric_id,
- struct list_head *config_terms,
- bool *hybrid);
-
-int parse_events__add_cache_hybrid(struct list_head *list, int *idx,
- struct perf_event_attr *attr,
- const char *name, const char *metric_id,
- struct list_head *config_terms,
- bool *hybrid,
- struct parse_events_state *parse_state);
-
-#endif /* __PERF_PARSE_EVENTS_HYBRID_H */
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 34ba840ae19a..30311844eea7 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -18,45 +18,26 @@
#include "debug.h"
#include <api/fs/tracing_path.h>
#include <perf/cpumap.h>
-#include "parse-events-bison.h"
-#include "parse-events-flex.h"
+#include <util/parse-events-bison.h>
+#include <util/parse-events-flex.h>
#include "pmu.h"
+#include "pmus.h"
#include "asm/bug.h"
#include "util/parse-branch-options.h"
#include "util/evsel_config.h"
#include "util/event.h"
-#include "util/parse-events-hybrid.h"
-#include "util/pmu-hybrid.h"
#include "util/bpf-filter.h"
#include "util/util.h"
#include "tracepoint.h"
-#include "thread_map.h"
#define MAX_NAME_LEN 100
-struct perf_pmu_event_symbol {
- char *symbol;
- enum perf_pmu_event_symbol_type type;
-};
-
#ifdef PARSER_DEBUG
extern int parse_events_debug;
#endif
int parse_events_parse(void *parse_state, void *scanner);
static int get_config_terms(struct list_head *head_config,
struct list_head *head_terms __maybe_unused);
-static int parse_events__with_hybrid_pmu(struct parse_events_state *parse_state,
- const char *str, char *pmu_name,
- struct list_head *list);
-
-static struct perf_pmu_event_symbol *perf_pmu_events_list;
-/*
- * The variable indicates the number of supported pmu event symbols.
- * 0 means not initialized and ready to init
- * -1 means failed to init, don't try anymore
- * >0 is the number of supported pmu event symbols
- */
-static int perf_pmu_events_list_num;
struct event_symbol event_symbols_hw[PERF_COUNT_HW_MAX] = {
[PERF_COUNT_HW_CPU_CYCLES] = {
@@ -152,44 +133,6 @@ struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = {
},
};
-bool is_event_supported(u8 type, u64 config)
-{
- bool ret = true;
- int open_return;
- struct evsel *evsel;
- struct perf_event_attr attr = {
- .type = type,
- .config = config,
- .disabled = 1,
- };
- struct perf_thread_map *tmap = thread_map__new_by_tid(0);
-
- if (tmap == NULL)
- return false;
-
- evsel = evsel__new(&attr);
- if (evsel) {
- open_return = evsel__open(evsel, NULL, tmap);
- ret = open_return >= 0;
-
- if (open_return == -EACCES) {
- /*
- * This happens if the paranoid value
- * /proc/sys/kernel/perf_event_paranoid is set to 2
- * Re-run with exclude_kernel set; we don't do that
- * by default as some ARM machines do not support it.
- *
- */
- evsel->core.attr.exclude_kernel = 1;
- ret = evsel__open(evsel, NULL, tmap) >= 0;
- }
- evsel__delete(evsel);
- }
-
- perf_thread_map__put(tmap);
- return ret;
-}
-
const char *event_type(int type)
{
switch (type) {
@@ -236,6 +179,57 @@ static char *get_config_name(struct list_head *head_terms)
return get_config_str(head_terms, PARSE_EVENTS__TERM_TYPE_NAME);
}
+/**
+ * fix_raw - For each raw term see if there is an event (aka alias) in pmu that
+ * matches the raw's string value. If the string value matches an
+ * event then change the term to be an event, if not then change it to
+ * be a config term. For example, "read" may be an event of the PMU or
+ * a raw hex encoding of 0xead. The fix-up is done late so the PMU of
+ * the event can be determined and we don't need to scan all PMUs
+ * ahead-of-time.
+ * @config_terms: the list of terms that may contain a raw term.
+ * @pmu: the PMU to scan for events from.
+ */
+static void fix_raw(struct list_head *config_terms, struct perf_pmu *pmu)
+{
+ struct parse_events_term *term;
+
+ list_for_each_entry(term, config_terms, list) {
+ struct perf_pmu_alias *alias;
+ bool matched = false;
+
+ if (term->type_term != PARSE_EVENTS__TERM_TYPE_RAW)
+ continue;
+
+ list_for_each_entry(alias, &pmu->aliases, list) {
+ if (!strcmp(alias->name, term->val.str)) {
+ free(term->config);
+ term->config = term->val.str;
+ term->type_val = PARSE_EVENTS__TERM_TYPE_NUM;
+ term->type_term = PARSE_EVENTS__TERM_TYPE_USER;
+ term->val.num = 1;
+ term->no_value = true;
+ matched = true;
+ break;
+ }
+ }
+ if (!matched) {
+ u64 num;
+
+ free(term->config);
+ term->config = strdup("config");
+ errno = 0;
+ num = strtoull(term->val.str + 1, NULL, 16);
+ assert(errno == 0);
+ free(term->val.str);
+ term->type_val = PARSE_EVENTS__TERM_TYPE_NUM;
+ term->type_term = PARSE_EVENTS__TERM_TYPE_CONFIG;
+ term->val.num = num;
+ term->no_value = false;
+ }
+ }
+}
+
static struct evsel *
__add_event(struct list_head *list, int *idx,
struct perf_event_attr *attr,
@@ -251,9 +245,16 @@ __add_event(struct list_head *list, int *idx,
if (pmu)
perf_pmu__warn_invalid_formats(pmu);
- if (pmu && attr->type == PERF_TYPE_RAW)
- perf_pmu__warn_invalid_config(pmu, attr->config, name);
-
+ if (pmu && (attr->type == PERF_TYPE_RAW || attr->type >= PERF_TYPE_MAX)) {
+ perf_pmu__warn_invalid_config(pmu, attr->config, name,
+ PERF_PMU_FORMAT_VALUE_CONFIG, "config");
+ perf_pmu__warn_invalid_config(pmu, attr->config1, name,
+ PERF_PMU_FORMAT_VALUE_CONFIG1, "config1");
+ perf_pmu__warn_invalid_config(pmu, attr->config2, name,
+ PERF_PMU_FORMAT_VALUE_CONFIG2, "config2");
+ perf_pmu__warn_invalid_config(pmu, attr->config3, name,
+ PERF_PMU_FORMAT_VALUE_CONFIG3, "config3");
+ }
if (init_attr)
event_attr_init(attr);
@@ -267,8 +268,10 @@ __add_event(struct list_head *list, int *idx,
evsel->core.cpus = cpus;
evsel->core.own_cpus = perf_cpu_map__get(cpus);
evsel->core.requires_cpu = pmu ? pmu->is_uncore : false;
+ evsel->core.is_pmu_core = pmu ? pmu->is_core : false;
evsel->auto_merge_stats = auto_merge_stats;
evsel->pmu = pmu;
+ evsel->pmu_name = pmu && pmu->name ? strdup(pmu->name) : NULL;
if (name)
evsel->name = strdup(name);
@@ -328,18 +331,27 @@ static int add_event_tool(struct list_head *list, int *idx,
return 0;
}
-static int parse_aliases(char *str, const char *const names[][EVSEL__MAX_ALIASES], int size)
+/**
+ * parse_aliases - search names for entries beginning or equalling str ignoring
+ * case. If mutliple entries in names match str then the longest
+ * is chosen.
+ * @str: The needle to look for.
+ * @names: The haystack to search.
+ * @size: The size of the haystack.
+ * @longest: Out argument giving the length of the matching entry.
+ */
+static int parse_aliases(const char *str, const char *const names[][EVSEL__MAX_ALIASES], int size,
+ int *longest)
{
- int i, j;
- int n, longest = -1;
+ *longest = -1;
+ for (int i = 0; i < size; i++) {
+ for (int j = 0; j < EVSEL__MAX_ALIASES && names[i][j]; j++) {
+ int n = strlen(names[i][j]);
- for (i = 0; i < size; i++) {
- for (j = 0; j < EVSEL__MAX_ALIASES && names[i][j]; j++) {
- n = strlen(names[i][j]);
- if (n > longest && !strncasecmp(str, names[i][j], n))
- longest = n;
+ if (n > *longest && !strncasecmp(str, names[i][j], n))
+ *longest = n;
}
- if (longest > 0)
+ if (*longest > 0)
return i;
}
@@ -357,52 +369,52 @@ static int config_attr(struct perf_event_attr *attr,
struct parse_events_error *err,
config_term_func_t config_term);
-int parse_events_add_cache(struct list_head *list, int *idx,
- char *type, char *op_result1, char *op_result2,
- struct parse_events_error *err,
- struct list_head *head_config,
- struct parse_events_state *parse_state)
+/**
+ * parse_events__decode_legacy_cache - Search name for the legacy cache event
+ * name composed of 1, 2 or 3 hyphen
+ * separated sections. The first section is
+ * the cache type while the others are the
+ * optional op and optional result. To make
+ * life hard the names in the table also
+ * contain hyphens and the longest name
+ * should always be selected.
+ */
+int parse_events__decode_legacy_cache(const char *name, int extended_pmu_type, __u64 *config)
{
- struct perf_event_attr attr;
- LIST_HEAD(config_terms);
- char name[MAX_NAME_LEN];
- const char *config_name, *metric_id;
- int cache_type = -1, cache_op = -1, cache_result = -1;
- char *op_result[2] = { op_result1, op_result2 };
- int i, n, ret;
- bool hybrid;
+ int len, cache_type = -1, cache_op = -1, cache_result = -1;
+ const char *name_end = &name[strlen(name) + 1];
+ const char *str = name;
- /*
- * No fallback - if we cannot get a clear cache type
- * then bail out:
- */
- cache_type = parse_aliases(type, evsel__hw_cache, PERF_COUNT_HW_CACHE_MAX);
+ cache_type = parse_aliases(str, evsel__hw_cache, PERF_COUNT_HW_CACHE_MAX, &len);
if (cache_type == -1)
return -EINVAL;
-
- config_name = get_config_name(head_config);
- n = snprintf(name, MAX_NAME_LEN, "%s", type);
-
- for (i = 0; (i < 2) && (op_result[i]); i++) {
- char *str = op_result[i];
-
- n += snprintf(name + n, MAX_NAME_LEN - n, "-%s", str);
-
- if (cache_op == -1) {
+ str += len + 1;
+
+ if (str < name_end) {
+ cache_op = parse_aliases(str, evsel__hw_cache_op,
+ PERF_COUNT_HW_CACHE_OP_MAX, &len);
+ if (cache_op >= 0) {
+ if (!evsel__is_cache_op_valid(cache_type, cache_op))
+ return -EINVAL;
+ str += len + 1;
+ } else {
+ cache_result = parse_aliases(str, evsel__hw_cache_result,
+ PERF_COUNT_HW_CACHE_RESULT_MAX, &len);
+ if (cache_result >= 0)
+ str += len + 1;
+ }
+ }
+ if (str < name_end) {
+ if (cache_op < 0) {
cache_op = parse_aliases(str, evsel__hw_cache_op,
- PERF_COUNT_HW_CACHE_OP_MAX);
+ PERF_COUNT_HW_CACHE_OP_MAX, &len);
if (cache_op >= 0) {
if (!evsel__is_cache_op_valid(cache_type, cache_op))
return -EINVAL;
- continue;
}
- }
-
- if (cache_result == -1) {
+ } else if (cache_result < 0) {
cache_result = parse_aliases(str, evsel__hw_cache_result,
- PERF_COUNT_HW_CACHE_RESULT_MAX);
- if (cache_result >= 0)
- continue;
+ PERF_COUNT_HW_CACHE_RESULT_MAX, &len);
}
}
@@ -418,33 +430,71 @@ int parse_events_add_cache(struct list_head *list, int *idx,
if (cache_result == -1)
cache_result = PERF_COUNT_HW_CACHE_RESULT_ACCESS;
- memset(&attr, 0, sizeof(attr));
- attr.config = cache_type | (cache_op << 8) | (cache_result << 16);
- attr.type = PERF_TYPE_HW_CACHE;
+ *config = cache_type | (cache_op << 8) | (cache_result << 16);
+ if (perf_pmus__supports_extended_type())
+ *config |= (__u64)extended_pmu_type << PERF_PMU_TYPE_SHIFT;
+ return 0;
+}
- if (head_config) {
- if (config_attr(&attr, head_config, err,
- config_term_common))
- return -EINVAL;
+/**
+ * parse_events__filter_pmu - returns false if a wildcard PMU should be
+ * considered, true if it should be filtered.
+ */
+bool parse_events__filter_pmu(const struct parse_events_state *parse_state,
+ const struct perf_pmu *pmu)
+{
+ if (parse_state->pmu_filter == NULL)
+ return false;
- if (get_config_terms(head_config, &config_terms))
+ if (pmu->name == NULL)
+ return true;
+
+ return strcmp(parse_state->pmu_filter, pmu->name) != 0;
+}
+
+int parse_events_add_cache(struct list_head *list, int *idx, const char *name,
+ struct parse_events_state *parse_state,
+ struct list_head *head_config)
+{
+ struct perf_pmu *pmu = NULL;
+ bool found_supported = false;
+ const char *config_name = get_config_name(head_config);
+ const char *metric_id = get_config_metric_id(head_config);
+
+ /* Legacy cache events are only supported by core PMUs. */
+ while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
+ LIST_HEAD(config_terms);
+ struct perf_event_attr attr;
+ int ret;
+
+ if (parse_events__filter_pmu(parse_state, pmu))
+ continue;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.type = PERF_TYPE_HW_CACHE;
+
+ ret = parse_events__decode_legacy_cache(name, pmu->type, &attr.config);
+ if (ret)
+ return ret;
+
+ found_supported = true;
+
+ if (head_config) {
+ if (config_attr(&attr, head_config, parse_state->error, config_term_common))
+ return -EINVAL;
+
+ if (get_config_terms(head_config, &config_terms))
+ return -ENOMEM;
+ }
+
+ if (__add_event(list, idx, &attr, /*init_attr*/true, config_name ?: name,
+ metric_id, pmu, &config_terms, /*auto_merge_stats=*/false,
+ /*cpu_list=*/NULL) == NULL)
return -ENOMEM;
- }
- metric_id = get_config_metric_id(head_config);
- ret = parse_events__add_cache_hybrid(list, idx, &attr,
- config_name ? : name,
- metric_id,
- &config_terms,
- &hybrid, parse_state);
- if (hybrid)
- goto out_free_terms;
-
- ret = add_event(list, idx, &attr, config_name ? : name, metric_id,
- &config_terms);
-out_free_terms:
- free_config_terms(&config_terms);
- return ret;
+ free_config_terms(&config_terms);
+ }
+ return found_supported ? 0 : -EINVAL;
}
#ifdef HAVE_LIBTRACEEVENT
@@ -896,10 +946,14 @@ do { \
return 0;
}
-int parse_events_add_breakpoint(struct list_head *list, int *idx,
- u64 addr, char *type, u64 len)
+int parse_events_add_breakpoint(struct parse_events_state *parse_state,
+ struct list_head *list,
+ u64 addr, char *type, u64 len,
+ struct list_head *head_config __maybe_unused)
{
struct perf_event_attr attr;
+ LIST_HEAD(config_terms);
+ const char *name;
memset(&attr, 0, sizeof(attr));
attr.bp_addr = addr;
@@ -920,8 +974,19 @@ int parse_events_add_breakpoint(struct list_head *list, int *idx,
attr.type = PERF_TYPE_BREAKPOINT;
attr.sample_period = 1;
- return add_event(list, idx, &attr, /*name=*/NULL, /*mertic_id=*/NULL,
- /*config_terms=*/NULL);
+ if (head_config) {
+ if (config_attr(&attr, head_config, parse_state->error,
+ config_term_common))
+ return -EINVAL;
+
+ if (get_config_terms(head_config, &config_terms))
+ return -ENOMEM;
+ }
+
+ name = get_config_name(head_config);
+
+ return add_event(list, &parse_state->idx, &attr, name, /*mertic_id=*/NULL,
+ &config_terms);
}
static int check_type_val(struct parse_events_term *term,
@@ -968,6 +1033,9 @@ static const char *config_term_names[__PARSE_EVENTS__TERM_TYPE_NR] = {
[PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT] = "aux-output",
[PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE] = "aux-sample-size",
[PARSE_EVENTS__TERM_TYPE_METRIC_ID] = "metric-id",
+ [PARSE_EVENTS__TERM_TYPE_RAW] = "raw",
+ [PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE] = "legacy-cache",
+ [PARSE_EVENTS__TERM_TYPE_HARDWARE] = "hardware",
};
static bool config_term_shrinked;
@@ -1089,6 +1157,9 @@ do { \
case PARSE_EVENTS__TERM_TYPE_METRIC_ID:
CHECK_TYPE_VAL(STR);
break;
+ case PARSE_EVENTS__TERM_TYPE_RAW:
+ CHECK_TYPE_VAL(STR);
+ break;
case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
CHECK_TYPE_VAL(NUM);
break;
@@ -1142,15 +1213,50 @@ static int config_term_pmu(struct perf_event_attr *attr,
struct parse_events_term *term,
struct parse_events_error *err)
{
+ if (term->type_term == PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE) {
+ const struct perf_pmu *pmu = perf_pmus__find_by_type(attr->type);
+
+ if (!pmu) {
+ char *err_str;
+
+ if (asprintf(&err_str, "Failed to find PMU for type %d", attr->type) >= 0)
+ parse_events_error__handle(err, term->err_term,
+ err_str, /*help=*/NULL);
+ return -EINVAL;
+ }
+ if (perf_pmu__supports_legacy_cache(pmu)) {
+ attr->type = PERF_TYPE_HW_CACHE;
+ return parse_events__decode_legacy_cache(term->config, pmu->type,
+ &attr->config);
+ } else
+ term->type_term = PARSE_EVENTS__TERM_TYPE_USER;
+ }
+ if (term->type_term == PARSE_EVENTS__TERM_TYPE_HARDWARE) {
+ const struct perf_pmu *pmu = perf_pmus__find_by_type(attr->type);
+
+ if (!pmu) {
+ char *err_str;
+
+ if (asprintf(&err_str, "Failed to find PMU for type %d", attr->type) >= 0)
+ parse_events_error__handle(err, term->err_term,
+ err_str, /*help=*/NULL);
+ return -EINVAL;
+ }
+ attr->type = PERF_TYPE_HARDWARE;
+ attr->config = term->val.num;
+ if (perf_pmus__supports_extended_type())
+ attr->config |= (__u64)pmu->type << PERF_PMU_TYPE_SHIFT;
+ return 0;
+ }
if (term->type_term == PARSE_EVENTS__TERM_TYPE_USER ||
- term->type_term == PARSE_EVENTS__TERM_TYPE_DRV_CFG)
+ term->type_term == PARSE_EVENTS__TERM_TYPE_DRV_CFG) {
/*
* Always succeed for sysfs terms, as we dont know
* at this point what type they need to have.
*/
return 0;
- else
- return config_term_common(attr, term, err);
+ }
+ return config_term_common(attr, term, err);
}
#ifdef HAVE_LIBTRACEEVENT
@@ -1364,20 +1470,23 @@ int parse_events_add_tracepoint(struct list_head *list, int *idx,
#endif
}
-int parse_events_add_numeric(struct parse_events_state *parse_state,
- struct list_head *list,
- u32 type, u64 config,
- struct list_head *head_config)
+static int __parse_events_add_numeric(struct parse_events_state *parse_state,
+ struct list_head *list,
+ struct perf_pmu *pmu, u32 type, u32 extended_type,
+ u64 config, struct list_head *head_config)
{
struct perf_event_attr attr;
LIST_HEAD(config_terms);
const char *name, *metric_id;
- bool hybrid;
int ret;
memset(&attr, 0, sizeof(attr));
attr.type = type;
attr.config = config;
+ if (extended_type && (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE)) {
+ assert(perf_pmus__supports_extended_type());
+ attr.config |= (u64)extended_type << PERF_PMU_TYPE_SHIFT;
+ }
if (head_config) {
if (config_attr(&attr, head_config, parse_state->error,
@@ -1390,19 +1499,44 @@ int parse_events_add_numeric(struct parse_events_state *parse_state,
name = get_config_name(head_config);
metric_id = get_config_metric_id(head_config);
- ret = parse_events__add_numeric_hybrid(parse_state, list, &attr,
- name, metric_id,
- &config_terms, &hybrid);
- if (hybrid)
- goto out_free_terms;
-
- ret = add_event(list, &parse_state->idx, &attr, name, metric_id,
- &config_terms);
-out_free_terms:
+ ret = __add_event(list, &parse_state->idx, &attr, /*init_attr*/true, name,
+ metric_id, pmu, &config_terms, /*auto_merge_stats=*/false,
+ /*cpu_list=*/NULL) ? 0 : -ENOMEM;
free_config_terms(&config_terms);
return ret;
}
+int parse_events_add_numeric(struct parse_events_state *parse_state,
+ struct list_head *list,
+ u32 type, u64 config,
+ struct list_head *head_config,
+ bool wildcard)
+{
+ struct perf_pmu *pmu = NULL;
+ bool found_supported = false;
+
+ /* Wildcards on numeric values are only supported by core PMUs. */
+ if (wildcard && perf_pmus__supports_extended_type()) {
+ while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
+ int ret;
+
+ found_supported = true;
+ if (parse_events__filter_pmu(parse_state, pmu))
+ continue;
+
+ ret = __parse_events_add_numeric(parse_state, list, pmu,
+ type, pmu->type,
+ config, head_config);
+ if (ret)
+ return ret;
+ }
+ if (found_supported)
+ return 0;
+ }
+ return __parse_events_add_numeric(parse_state, list, perf_pmus__find_by_type(type),
+ type, /*extended_type=*/0, config, head_config);
+}
+
int parse_events_add_tool(struct parse_events_state *parse_state,
struct list_head *list,
int tool_event)
@@ -1422,33 +1556,6 @@ static bool config_term_percore(struct list_head *config_terms)
return false;
}
-static int parse_events__inside_hybrid_pmu(struct parse_events_state *parse_state,
- struct list_head *list, char *name,
- struct list_head *head_config)
-{
- struct parse_events_term *term;
- int ret = -1;
-
- if (parse_state->fake_pmu || !head_config || list_empty(head_config) ||
- !perf_pmu__is_hybrid(name)) {
- return -1;
- }
-
- /*
- * More than one term in list.
- */
- if (head_config->next && head_config->next->next != head_config)
- return -1;
-
- term = list_first_entry(head_config, struct parse_events_term, list);
- if (term && term->config && strcmp(term->config, "event")) {
- ret = parse_events__with_hybrid_pmu(parse_state, term->config,
- name, list);
- }
-
- return ret;
-}
-
int parse_events_add_pmu(struct parse_events_state *parse_state,
struct list_head *list, char *name,
struct list_head *head_config,
@@ -1461,7 +1568,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
struct parse_events_error *err = parse_state->error;
LIST_HEAD(config_terms);
- pmu = parse_state->fake_pmu ?: perf_pmu__find(name);
+ pmu = parse_state->fake_pmu ?: perf_pmus__find(name);
if (verbose > 1 && !(pmu && pmu->selectable)) {
fprintf(stderr, "Attempting to add event pmu '%s' with '",
@@ -1485,6 +1592,8 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
parse_events_error__handle(err, 0, err_str, NULL);
return -EINVAL;
}
+ if (head_config)
+ fix_raw(head_config, pmu);
if (pmu->default_config) {
memcpy(&attr, pmu->default_config,
@@ -1492,20 +1601,15 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
} else {
memset(&attr, 0, sizeof(attr));
}
+ attr.type = pmu->type;
if (!head_config) {
- attr.type = pmu->type;
evsel = __add_event(list, &parse_state->idx, &attr,
/*init_attr=*/true, /*name=*/NULL,
/*metric_id=*/NULL, pmu,
/*config_terms=*/NULL, auto_merge_stats,
/*cpu_list=*/NULL);
- if (evsel) {
- evsel->pmu_name = name ? strdup(name) : NULL;
- return 0;
- } else {
- return -ENOMEM;
- }
+ return evsel ? 0 : -ENOMEM;
}
if (!parse_state->fake_pmu && perf_pmu__check_alias(pmu, head_config, &info))
@@ -1541,11 +1645,6 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
if (pmu->default_config && get_config_chgs(pmu, head_config, &config_terms))
return -ENOMEM;
- if (!parse_events__inside_hybrid_pmu(parse_state, list, name,
- head_config)) {
- return 0;
- }
-
if (!parse_state->fake_pmu && perf_pmu__config(pmu, &attr, head_config, parse_state->error)) {
free_config_terms(&config_terms);
return -EINVAL;
@@ -1561,7 +1660,6 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
if (evsel->name)
evsel->use_config_name = true;
- evsel->pmu_name = name ? strdup(name) : NULL;
evsel->percore = config_term_percore(&evsel->config_terms);
if (parse_state->fake_pmu)
@@ -1615,15 +1713,21 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
INIT_LIST_HEAD(list);
- while ((pmu = perf_pmu__scan(pmu)) != NULL) {
+ while ((pmu = perf_pmus__scan(pmu)) != NULL) {
struct perf_pmu_alias *alias;
+ bool auto_merge_stats;
+
+ if (parse_events__filter_pmu(parse_state, pmu))
+ continue;
+
+ auto_merge_stats = perf_pmu__auto_merge_stats(pmu);
list_for_each_entry(alias, &pmu->aliases, list) {
if (!strcasecmp(alias->name, str)) {
parse_events_copy_term_list(head, &orig_head);
if (!parse_events_add_pmu(parse_state, list,
pmu->name, orig_head,
- /*auto_merge_stats=*/true)) {
+ auto_merge_stats)) {
pr_debug("%s -> %s/%s/\n", str,
pmu->name, alias->str);
ok++;
@@ -1875,180 +1979,6 @@ int parse_events_name(struct list_head *list, const char *name)
return 0;
}
-static int
-comp_pmu(const void *p1, const void *p2)
-{
- struct perf_pmu_event_symbol *pmu1 = (struct perf_pmu_event_symbol *) p1;
- struct perf_pmu_event_symbol *pmu2 = (struct perf_pmu_event_symbol *) p2;
-
- return strcasecmp(pmu1->symbol, pmu2->symbol);
-}
-
-static void perf_pmu__parse_cleanup(void)
-{
- if (perf_pmu_events_list_num > 0) {
- struct perf_pmu_event_symbol *p;
- int i;
-
- for (i = 0; i < perf_pmu_events_list_num; i++) {
- p = perf_pmu_events_list + i;
- zfree(&p->symbol);
- }
- zfree(&perf_pmu_events_list);
- perf_pmu_events_list_num = 0;
- }
-}
-
-#define SET_SYMBOL(str, stype) \
-do { \
- p->symbol = str; \
- if (!p->symbol) \
- goto err; \
- p->type = stype; \
-} while (0)
-
-/*
- * Read the pmu events list from sysfs
- * Save it into perf_pmu_events_list
- */
-static void perf_pmu__parse_init(void)
-{
-
- struct perf_pmu *pmu = NULL;
- struct perf_pmu_alias *alias;
- int len = 0;
-
- pmu = NULL;
- while ((pmu = perf_pmu__scan(pmu)) != NULL) {
- list_for_each_entry(alias, &pmu->aliases, list) {
- char *tmp = strchr(alias->name, '-');
-
- if (tmp) {
- char *tmp2 = NULL;
-
- tmp2 = strchr(tmp + 1, '-');
- len++;
- if (tmp2)
- len++;
- }
-
- len++;
- }
- }
-
- if (len == 0) {
- perf_pmu_events_list_num = -1;
- return;
- }
- perf_pmu_events_list = malloc(sizeof(struct perf_pmu_event_symbol) * len);
- if (!perf_pmu_events_list)
- return;
- perf_pmu_events_list_num = len;
-
- len = 0;
- pmu = NULL;
- while ((pmu = perf_pmu__scan(pmu)) != NULL) {
- list_for_each_entry(alias, &pmu->aliases, list) {
- struct perf_pmu_event_symbol *p = perf_pmu_events_list + len;
- char *tmp = strchr(alias->name, '-');
- char *tmp2 = NULL;
-
- if (tmp)
- tmp2 = strchr(tmp + 1, '-');
- if (tmp2) {
- SET_SYMBOL(strndup(alias->name, tmp - alias->name),
- PMU_EVENT_SYMBOL_PREFIX);
- p++;
- tmp++;
- SET_SYMBOL(strndup(tmp, tmp2 - tmp), PMU_EVENT_SYMBOL_SUFFIX);
- p++;
- SET_SYMBOL(strdup(++tmp2), PMU_EVENT_SYMBOL_SUFFIX2);
- len += 3;
- } else if (tmp) {
- SET_SYMBOL(strndup(alias->name, tmp - alias->name),
- PMU_EVENT_SYMBOL_PREFIX);
- p++;
- SET_SYMBOL(strdup(++tmp), PMU_EVENT_SYMBOL_SUFFIX);
- len += 2;
- } else {
- SET_SYMBOL(strdup(alias->name), PMU_EVENT_SYMBOL);
- len++;
- }
- }
- }
- qsort(perf_pmu_events_list, len,
- sizeof(struct perf_pmu_event_symbol), comp_pmu);
-
- return;
-err:
- perf_pmu__parse_cleanup();
-}
-
-/*
- * This function injects special term in
- * perf_pmu_events_list so the test code
- * can check on this functionality.
- */
-int perf_pmu__test_parse_init(void)
-{
- struct perf_pmu_event_symbol *list, *tmp, symbols[] = {
- {(char *)"read", PMU_EVENT_SYMBOL},
- {(char *)"event", PMU_EVENT_SYMBOL_PREFIX},
- {(char *)"two", PMU_EVENT_SYMBOL_SUFFIX},
- {(char *)"hyphen", PMU_EVENT_SYMBOL_SUFFIX},
- {(char *)"hyph", PMU_EVENT_SYMBOL_SUFFIX2},
- };
- unsigned long i, j;
-
- tmp = list = malloc(sizeof(*list) * ARRAY_SIZE(symbols));
- if (!list)
- return -ENOMEM;
-
- for (i = 0; i < ARRAY_SIZE(symbols); i++, tmp++) {
- tmp->type = symbols[i].type;
- tmp->symbol = strdup(symbols[i].symbol);
- if (!tmp->symbol)
- goto err_free;
- }
-
- perf_pmu_events_list = list;
- perf_pmu_events_list_num = ARRAY_SIZE(symbols);
-
- qsort(perf_pmu_events_list, ARRAY_SIZE(symbols),
- sizeof(struct perf_pmu_event_symbol), comp_pmu);
- return 0;
-
-err_free:
- for (j = 0, tmp = list; j < i; j++, tmp++)
- zfree(&tmp->symbol);
- free(list);
- return -ENOMEM;
-}
-
-enum perf_pmu_event_symbol_type
-perf_pmu__parse_check(const char *name)
-{
- struct perf_pmu_event_symbol p, *r;
-
- /* scan kernel pmu events from sysfs if needed */
- if (perf_pmu_events_list_num == 0)
- perf_pmu__parse_init();
- /*
- * name "cpu" could be prefix of cpu-cycles or cpu// events.
- * cpu-cycles has been handled by hardcode.
- * So it must be cpu// events, not kernel pmu event.
- */
- if ((perf_pmu_events_list_num <= 0) || !strcmp(name, "cpu"))
- return PMU_EVENT_SYMBOL_ERR;
-
- p.symbol = strdup(name);
- r = bsearch(&p, perf_pmu_events_list,
- (size_t) perf_pmu_events_list_num,
- sizeof(struct perf_pmu_event_symbol), comp_pmu);
- zfree(&p.symbol);
- return r ? r->type : PMU_EVENT_SYMBOL_ERR;
-}
-
static int parse_events__scanner(const char *str,
struct parse_events_state *parse_state)
{
@@ -2086,7 +2016,6 @@ int parse_events_terms(struct list_head *terms, const char *str)
int ret;
ret = parse_events__scanner(str, &parse_state);
- perf_pmu__parse_cleanup();
if (!ret) {
list_splice(parse_state.terms, terms);
@@ -2098,31 +2027,71 @@ int parse_events_terms(struct list_head *terms, const char *str)
return ret;
}
-static int parse_events__with_hybrid_pmu(struct parse_events_state *parse_state,
- const char *str, char *pmu_name,
- struct list_head *list)
+static int evsel__compute_group_pmu_name(struct evsel *evsel,
+ const struct list_head *head)
{
- struct parse_events_state ps = {
- .list = LIST_HEAD_INIT(ps.list),
- .stoken = PE_START_EVENTS,
- .hybrid_pmu_name = pmu_name,
- .idx = parse_state->idx,
- };
- int ret;
-
- ret = parse_events__scanner(str, &ps);
- perf_pmu__parse_cleanup();
+ struct evsel *leader = evsel__leader(evsel);
+ struct evsel *pos;
+ const char *group_pmu_name;
+ struct perf_pmu *pmu = evsel__find_pmu(evsel);
- if (!ret) {
- if (!list_empty(&ps.list)) {
- list_splice(&ps.list, list);
- parse_state->idx = ps.idx;
- return 0;
- } else
- return -1;
+ if (!pmu) {
+ /*
+ * For PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE types the PMU
+ * is a core PMU, but in heterogeneous systems this is
+ * unknown. For now pick the first core PMU.
+ */
+ pmu = perf_pmus__scan_core(NULL);
}
+ if (!pmu) {
+ pr_debug("No PMU found for '%s'\n", evsel__name(evsel));
+ return -EINVAL;
+ }
+ group_pmu_name = pmu->name;
+ /*
+ * Software events may be in a group with other uncore PMU events. Use
+ * the pmu_name of the first non-software event to avoid breaking the
+ * software event out of the group.
+ *
+ * Aux event leaders, like intel_pt, expect a group with events from
+ * other PMUs, so substitute the AUX event's PMU in this case.
+ */
+ if (perf_pmu__is_software(pmu) || evsel__is_aux_event(leader)) {
+ struct perf_pmu *leader_pmu = evsel__find_pmu(leader);
- return ret;
+ if (!leader_pmu) {
+ /* As with determining pmu above. */
+ leader_pmu = perf_pmus__scan_core(NULL);
+ }
+ /*
+ * Starting with the leader, find the first event with a named
+ * non-software PMU. for_each_group_(member|evsel) isn't used as
+ * the list isn't yet sorted putting evsel's in the same group
+ * together.
+ */
+ if (leader_pmu && !perf_pmu__is_software(leader_pmu)) {
+ group_pmu_name = leader_pmu->name;
+ } else if (leader->core.nr_members > 1) {
+ list_for_each_entry(pos, head, core.node) {
+ struct perf_pmu *pos_pmu;
+
+ if (pos == leader || evsel__leader(pos) != leader)
+ continue;
+ pos_pmu = evsel__find_pmu(pos);
+ if (!pos_pmu) {
+ /* As with determining pmu above. */
+ pos_pmu = perf_pmus__scan_core(NULL);
+ }
+ if (pos_pmu && !perf_pmu__is_software(pos_pmu)) {
+ group_pmu_name = pos_pmu->name;
+ break;
+ }
+ }
+ }
+ }
+ /* Assign the actual name taking care that the fake PMU lacks a name. */
+ evsel->group_pmu_name = strdup(group_pmu_name ?: "fake");
+ return evsel->group_pmu_name ? 0 : -ENOMEM;
}
__weak int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs)
@@ -2131,37 +2100,51 @@ __weak int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs)
return lhs->core.idx - rhs->core.idx;
}
-static int evlist__cmp(void *state, const struct list_head *l, const struct list_head *r)
+static int evlist__cmp(void *_fg_idx, const struct list_head *l, const struct list_head *r)
{
const struct perf_evsel *lhs_core = container_of(l, struct perf_evsel, node);
const struct evsel *lhs = container_of(lhs_core, struct evsel, core);
const struct perf_evsel *rhs_core = container_of(r, struct perf_evsel, node);
const struct evsel *rhs = container_of(rhs_core, struct evsel, core);
- int *leader_idx = state;
- int lhs_leader_idx = *leader_idx, rhs_leader_idx = *leader_idx, ret;
+ int *force_grouped_idx = _fg_idx;
+ int lhs_sort_idx, rhs_sort_idx, ret;
const char *lhs_pmu_name, *rhs_pmu_name;
- bool lhs_has_group = false, rhs_has_group = false;
+ bool lhs_has_group, rhs_has_group;
/*
* First sort by grouping/leader. Read the leader idx only if the evsel
- * is part of a group, as -1 indicates no group.
+ * is part of a group, by default ungrouped events will be sorted
+ * relative to grouped events based on where the first ungrouped event
+ * occurs. If both events don't have a group we want to fall-through to
+ * the arch specific sorting, that can reorder and fix things like
+ * Intel's topdown events.
*/
if (lhs_core->leader != lhs_core || lhs_core->nr_members > 1) {
lhs_has_group = true;
- lhs_leader_idx = lhs_core->leader->idx;
+ lhs_sort_idx = lhs_core->leader->idx;
+ } else {
+ lhs_has_group = false;
+ lhs_sort_idx = *force_grouped_idx != -1 && arch_evsel__must_be_in_group(lhs)
+ ? *force_grouped_idx
+ : lhs_core->idx;
}
if (rhs_core->leader != rhs_core || rhs_core->nr_members > 1) {
rhs_has_group = true;
- rhs_leader_idx = rhs_core->leader->idx;
+ rhs_sort_idx = rhs_core->leader->idx;
+ } else {
+ rhs_has_group = false;
+ rhs_sort_idx = *force_grouped_idx != -1 && arch_evsel__must_be_in_group(rhs)
+ ? *force_grouped_idx
+ : rhs_core->idx;
}
- if (lhs_leader_idx != rhs_leader_idx)
- return lhs_leader_idx - rhs_leader_idx;
+ if (lhs_sort_idx != rhs_sort_idx)
+ return lhs_sort_idx - rhs_sort_idx;
/* Group by PMU if there is a group. Groups can't span PMUs. */
if (lhs_has_group && rhs_has_group) {
- lhs_pmu_name = evsel__group_pmu_name(lhs);
- rhs_pmu_name = evsel__group_pmu_name(rhs);
+ lhs_pmu_name = lhs->group_pmu_name;
+ rhs_pmu_name = rhs->group_pmu_name;
ret = strcmp(lhs_pmu_name, rhs_pmu_name);
if (ret)
return ret;
@@ -2171,13 +2154,14 @@ static int evlist__cmp(void *state, const struct list_head *l, const struct list
return arch_evlist__cmp(lhs, rhs);
}
-static bool parse_events__sort_events_and_fix_groups(struct list_head *list)
+static int parse_events__sort_events_and_fix_groups(struct list_head *list)
{
- int idx = 0, unsorted_idx = -1;
+ int idx = 0, force_grouped_idx = -1;
struct evsel *pos, *cur_leader = NULL;
struct perf_evsel *cur_leaders_grp = NULL;
- bool idx_changed = false;
+ bool idx_changed = false, cur_leader_force_grouped = false;
int orig_num_leaders = 0, num_leaders = 0;
+ int ret;
/*
* Compute index to insert ungrouped events at. Place them where the
@@ -2186,6 +2170,10 @@ static bool parse_events__sort_events_and_fix_groups(struct list_head *list)
list_for_each_entry(pos, list, core.node) {
const struct evsel *pos_leader = evsel__leader(pos);
+ ret = evsel__compute_group_pmu_name(pos, list);
+ if (ret)
+ return ret;
+
if (pos == pos_leader)
orig_num_leaders++;
@@ -2196,12 +2184,14 @@ static bool parse_events__sort_events_and_fix_groups(struct list_head *list)
*/
pos->core.idx = idx++;
- if (unsorted_idx == -1 && pos == pos_leader && pos->core.nr_members < 2)
- unsorted_idx = pos->core.idx;
+ /* Remember an index to sort all forced grouped events together to. */
+ if (force_grouped_idx == -1 && pos == pos_leader && pos->core.nr_members < 2 &&
+ arch_evsel__must_be_in_group(pos))
+ force_grouped_idx = pos->core.idx;
}
/* Sort events. */
- list_sort(&unsorted_idx, list, evlist__cmp);
+ list_sort(&force_grouped_idx, list, evlist__cmp);
/*
* Recompute groups, splitting for PMUs and adding groups for events
@@ -2210,9 +2200,10 @@ static bool parse_events__sort_events_and_fix_groups(struct list_head *list)
idx = 0;
list_for_each_entry(pos, list, core.node) {
const struct evsel *pos_leader = evsel__leader(pos);
- const char *pos_pmu_name = evsel__group_pmu_name(pos);
- const char *cur_leader_pmu_name, *pos_leader_pmu_name;
- bool force_grouped = arch_evsel__must_be_in_group(pos);
+ const char *pos_pmu_name = pos->group_pmu_name;
+ const char *cur_leader_pmu_name;
+ bool pos_force_grouped = force_grouped_idx != -1 &&
+ arch_evsel__must_be_in_group(pos);
/* Reset index and nr_members. */
if (pos->core.idx != idx)
@@ -2227,8 +2218,9 @@ static bool parse_events__sort_events_and_fix_groups(struct list_head *list)
if (!cur_leader)
cur_leader = pos;
- cur_leader_pmu_name = evsel__group_pmu_name(cur_leader);
- if ((cur_leaders_grp != pos->core.leader && !force_grouped) ||
+ cur_leader_pmu_name = cur_leader->group_pmu_name;
+ if ((cur_leaders_grp != pos->core.leader &&
+ (!pos_force_grouped || !cur_leader_force_grouped)) ||
strcmp(cur_leader_pmu_name, pos_pmu_name)) {
/* Event is for a different group/PMU than last. */
cur_leader = pos;
@@ -2238,14 +2230,14 @@ static bool parse_events__sort_events_and_fix_groups(struct list_head *list)
* group.
*/
cur_leaders_grp = pos->core.leader;
- }
- pos_leader_pmu_name = evsel__group_pmu_name(pos_leader);
- if (strcmp(pos_leader_pmu_name, pos_pmu_name) || force_grouped) {
/*
- * Event's PMU differs from its leader's. Groups can't
- * span PMUs, so update leader from the group/PMU
- * tracker.
+ * Avoid forcing events into groups with events that
+ * don't need to be in the group.
*/
+ cur_leader_force_grouped = pos_force_grouped;
+ }
+ if (pos_leader != cur_leader) {
+ /* The leader changed so update it. */
evsel__set_leader(pos, cur_leader);
}
}
@@ -2256,10 +2248,10 @@ static bool parse_events__sort_events_and_fix_groups(struct list_head *list)
num_leaders++;
pos_leader->core.nr_members++;
}
- return idx_changed || num_leaders != orig_num_leaders;
+ return (idx_changed || num_leaders != orig_num_leaders) ? 1 : 0;
}
-int __parse_events(struct evlist *evlist, const char *str,
+int __parse_events(struct evlist *evlist, const char *str, const char *pmu_filter,
struct parse_events_error *err, struct perf_pmu *fake_pmu,
bool warn_if_reordered)
{
@@ -2270,19 +2262,23 @@ int __parse_events(struct evlist *evlist, const char *str,
.evlist = evlist,
.stoken = PE_START_EVENTS,
.fake_pmu = fake_pmu,
+ .pmu_filter = pmu_filter,
+ .match_legacy_cache_terms = true,
};
- int ret;
+ int ret, ret2;
ret = parse_events__scanner(str, &parse_state);
- perf_pmu__parse_cleanup();
if (!ret && list_empty(&parse_state.list)) {
WARN_ONCE(true, "WARNING: event parser found nothing\n");
return -1;
}
- if (parse_events__sort_events_and_fix_groups(&parse_state.list) &&
- warn_if_reordered && !parse_state.wild_card_pmus)
+ ret2 = parse_events__sort_events_and_fix_groups(&parse_state.list);
+ if (ret2 < 0)
+ return ret;
+
+ if (ret2 && warn_if_reordered && !parse_state.wild_card_pmus)
pr_warning("WARNING: events were regrouped to match PMUs\n");
/*
@@ -2451,12 +2447,13 @@ void parse_events_error__print(struct parse_events_error *err,
int parse_events_option(const struct option *opt, const char *str,
int unset __maybe_unused)
{
- struct evlist *evlist = *(struct evlist **)opt->value;
+ struct parse_events_option_args *args = opt->value;
struct parse_events_error err;
int ret;
parse_events_error__init(&err);
- ret = parse_events(evlist, str, &err);
+ ret = __parse_events(*args->evlistp, str, args->pmu_filter, &err,
+ /*fake_pmu=*/NULL, /*warn_if_reordered=*/true);
if (ret) {
parse_events_error__print(&err, str);
@@ -2469,22 +2466,21 @@ int parse_events_option(const struct option *opt, const char *str,
int parse_events_option_new_evlist(const struct option *opt, const char *str, int unset)
{
- struct evlist **evlistp = opt->value;
+ struct parse_events_option_args *args = opt->value;
int ret;
- if (*evlistp == NULL) {
- *evlistp = evlist__new();
+ if (*args->evlistp == NULL) {
+ *args->evlistp = evlist__new();
- if (*evlistp == NULL) {
+ if (*args->evlistp == NULL) {
fprintf(stderr, "Not enough memory to create evlist\n");
return -1;
}
}
-
ret = parse_events_option(opt, str, unset);
if (ret) {
- evlist__delete(*evlistp);
- *evlistp = NULL;
+ evlist__delete(*args->evlistp);
+ *args->evlistp = NULL;
}
return ret;
@@ -2546,7 +2542,7 @@ static int set_filter(struct evsel *evsel, const void *arg)
return 0;
}
- while ((pmu = perf_pmu__scan(pmu)) != NULL)
+ while ((pmu = perf_pmus__scan(pmu)) != NULL)
if (pmu->type == evsel->core.attr.type) {
found = true;
break;
@@ -2682,29 +2678,13 @@ int parse_events_term__str(struct parse_events_term **term,
return new_term(term, &temp, str, 0);
}
-int parse_events_term__sym_hw(struct parse_events_term **term,
- char *config, unsigned idx)
+int parse_events_term__term(struct parse_events_term **term,
+ int term_lhs, int term_rhs,
+ void *loc_term, void *loc_val)
{
- struct event_symbol *sym;
- char *str;
- struct parse_events_term temp = {
- .type_val = PARSE_EVENTS__TERM_TYPE_STR,
- .type_term = PARSE_EVENTS__TERM_TYPE_USER,
- .config = config,
- };
-
- if (!temp.config) {
- temp.config = strdup("event");
- if (!temp.config)
- return -ENOMEM;
- }
- BUG_ON(idx >= PERF_COUNT_HW_MAX);
- sym = &event_symbols_hw[idx];
-
- str = strdup(sym->symbol);
- if (!str)
- return -ENOMEM;
- return new_term(term, &temp, str, 0);
+ return parse_events_term__str(term, term_lhs, NULL,
+ strdup(config_term_names[term_rhs]),
+ loc_term, loc_val);
}
int parse_events_term__clone(struct parse_events_term **new,
@@ -2855,15 +2835,3 @@ char *parse_events_formats_error_string(char *additional_terms)
fail:
return NULL;
}
-
-struct evsel *parse_events__add_event_hybrid(struct list_head *list, int *idx,
- struct perf_event_attr *attr,
- const char *name,
- const char *metric_id,
- struct perf_pmu *pmu,
- struct list_head *config_terms)
-{
- return __add_event(list, idx, attr, /*init_attr=*/true, name, metric_id,
- pmu, config_terms, /*auto_merge_stats=*/false,
- /*cpu_list=*/NULL);
-}
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 86ad4438a2aa..b0eb95f93e9c 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -18,21 +18,26 @@ struct parse_events_error;
struct option;
struct perf_pmu;
-bool is_event_supported(u8 type, u64 config);
-
const char *event_type(int type);
+/* Arguments encoded in opt->value. */
+struct parse_events_option_args {
+ struct evlist **evlistp;
+ const char *pmu_filter;
+};
int parse_events_option(const struct option *opt, const char *str, int unset);
int parse_events_option_new_evlist(const struct option *opt, const char *str, int unset);
-__attribute__((nonnull(1, 2, 3)))
-int __parse_events(struct evlist *evlist, const char *str, struct parse_events_error *error,
- struct perf_pmu *fake_pmu, bool warn_if_reordered);
+__attribute__((nonnull(1, 2, 4)))
+int __parse_events(struct evlist *evlist, const char *str, const char *pmu_filter,
+ struct parse_events_error *error, struct perf_pmu *fake_pmu,
+ bool warn_if_reordered);
__attribute__((nonnull(1, 2, 3)))
static inline int parse_events(struct evlist *evlist, const char *str,
struct parse_events_error *err)
{
- return __parse_events(evlist, str, err, /*fake_pmu=*/NULL, /*warn_if_reordered=*/true);
+ return __parse_events(evlist, str, /*pmu_filter=*/NULL, err, /*fake_pmu=*/NULL,
+ /*warn_if_reordered=*/true);
}
int parse_event(struct evlist *evlist, const char *str);
@@ -41,14 +46,6 @@ int parse_events_terms(struct list_head *terms, const char *str);
int parse_filter(const struct option *opt, const char *str, int unset);
int exclude_perf(const struct option *opt, const char *arg, int unset);
-enum perf_pmu_event_symbol_type {
- PMU_EVENT_SYMBOL_ERR, /* not a PMU EVENT */
- PMU_EVENT_SYMBOL, /* normal style PMU event */
- PMU_EVENT_SYMBOL_PREFIX, /* prefix of pre-suf style event */
- PMU_EVENT_SYMBOL_SUFFIX, /* suffix of pre-suf style event */
- PMU_EVENT_SYMBOL_SUFFIX2, /* suffix of pre-suf2 style event */
-};
-
enum {
PARSE_EVENTS__TERM_TYPE_NUM,
PARSE_EVENTS__TERM_TYPE_STR,
@@ -78,6 +75,9 @@ enum {
PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT,
PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE,
PARSE_EVENTS__TERM_TYPE_METRIC_ID,
+ PARSE_EVENTS__TERM_TYPE_RAW,
+ PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE,
+ PARSE_EVENTS__TERM_TYPE_HARDWARE,
__PARSE_EVENTS__TERM_TYPE_NR,
};
@@ -128,10 +128,15 @@ struct parse_events_state {
struct list_head *terms;
int stoken;
struct perf_pmu *fake_pmu;
- char *hybrid_pmu_name;
+ /* If non-null, when wildcard matching only match the given PMU. */
+ const char *pmu_filter;
+ /* Should PE_LEGACY_NAME tokens be generated for config terms? */
+ bool match_legacy_cache_terms;
bool wild_card_pmus;
};
+bool parse_events__filter_pmu(const struct parse_events_state *parse_state,
+ const struct perf_pmu *pmu);
void parse_events__shrink_config_terms(void);
int parse_events__is_hardcoded_term(struct parse_events_term *term);
int parse_events_term__num(struct parse_events_term **term,
@@ -141,8 +146,9 @@ int parse_events_term__num(struct parse_events_term **term,
int parse_events_term__str(struct parse_events_term **term,
int type_term, char *config, char *str,
void *loc_term, void *loc_val);
-int parse_events_term__sym_hw(struct parse_events_term **term,
- char *config, unsigned idx);
+int parse_events_term__term(struct parse_events_term **term,
+ int term_lhs, int term_rhs,
+ void *loc_term, void *loc_val);
int parse_events_term__clone(struct parse_events_term **new,
struct parse_events_term *term);
void parse_events_term__delete(struct parse_events_term *term);
@@ -170,17 +176,19 @@ int parse_events_load_bpf_obj(struct parse_events_state *parse_state,
int parse_events_add_numeric(struct parse_events_state *parse_state,
struct list_head *list,
u32 type, u64 config,
- struct list_head *head_config);
+ struct list_head *head_config,
+ bool wildcard);
int parse_events_add_tool(struct parse_events_state *parse_state,
struct list_head *list,
int tool_event);
-int parse_events_add_cache(struct list_head *list, int *idx,
- char *type, char *op_result1, char *op_result2,
- struct parse_events_error *error,
- struct list_head *head_config,
- struct parse_events_state *parse_state);
-int parse_events_add_breakpoint(struct list_head *list, int *idx,
- u64 addr, char *type, u64 len);
+int parse_events_add_cache(struct list_head *list, int *idx, const char *name,
+ struct parse_events_state *parse_state,
+ struct list_head *head_config);
+int parse_events__decode_legacy_cache(const char *name, int pmu_type, __u64 *config);
+int parse_events_add_breakpoint(struct parse_events_state *parse_state,
+ struct list_head *list,
+ u64 addr, char *type, u64 len,
+ struct list_head *head_config);
int parse_events_add_pmu(struct parse_events_state *parse_state,
struct list_head *list, char *name,
struct list_head *head_config,
@@ -198,8 +206,6 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
int parse_events_copy_term_list(struct list_head *old,
struct list_head **new);
-enum perf_pmu_event_symbol_type
-perf_pmu__parse_check(const char *name);
void parse_events__set_leader(char *name, struct list_head *list);
void parse_events_update_lists(struct list_head *list_event,
struct list_head *list_all);
@@ -241,13 +247,4 @@ static inline bool is_sdt_event(char *str __maybe_unused)
}
#endif /* HAVE_LIBELF_SUPPORT */
-int perf_pmu__test_parse_init(void);
-
-struct evsel *parse_events__add_event_hybrid(struct list_head *list, int *idx,
- struct perf_event_attr *attr,
- const char *name,
- const char *metric_id,
- struct perf_pmu *pmu,
- struct list_head *config_terms);
-
#endif /* __PERF_PARSE_EVENTS_H */
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 51fe0a9fb3de..99335ec586ae 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -63,15 +63,9 @@ static int str(yyscan_t scanner, int token)
return token;
}
-static int raw(yyscan_t scanner)
+static int lc_str(yyscan_t scanner, const struct parse_events_state *state)
{
- YYSTYPE *yylval = parse_events_get_lval(scanner);
- char *text = parse_events_get_text(scanner);
-
- if (perf_pmu__parse_check(text) == PMU_EVENT_SYMBOL)
- return str(scanner, PE_NAME);
-
- return __value(yylval, text + 1, 16, PE_RAW);
+ return str(scanner, state->match_legacy_cache_terms ? PE_LEGACY_CACHE : PE_NAME);
}
static bool isbpf_suffix(char *text)
@@ -131,35 +125,6 @@ do { \
yyless(0); \
} while (0)
-static int pmu_str_check(yyscan_t scanner, struct parse_events_state *parse_state)
-{
- YYSTYPE *yylval = parse_events_get_lval(scanner);
- char *text = parse_events_get_text(scanner);
-
- yylval->str = strdup(text);
-
- /*
- * If we're not testing then parse check determines the PMU event type
- * which if it isn't a PMU returns PE_NAME. When testing the result of
- * parse check can't be trusted so we return PE_PMU_EVENT_FAKE unless
- * an '!' is present in which case the text can't be a PMU name.
- */
- switch (perf_pmu__parse_check(text)) {
- case PMU_EVENT_SYMBOL_PREFIX:
- return PE_PMU_EVENT_PRE;
- case PMU_EVENT_SYMBOL_SUFFIX:
- return PE_PMU_EVENT_SUF;
- case PMU_EVENT_SYMBOL_SUFFIX2:
- return PE_PMU_EVENT_SUF2;
- case PMU_EVENT_SYMBOL:
- return parse_state->fake_pmu
- ? PE_PMU_EVENT_FAKE : PE_KERNEL_PMU_EVENT;
- default:
- return parse_state->fake_pmu && !strchr(text,'!')
- ? PE_PMU_EVENT_FAKE : PE_NAME;
- }
-}
-
static int sym(yyscan_t scanner, int type, int config)
{
YYSTYPE *yylval = parse_events_get_lval(scanner);
@@ -184,6 +149,16 @@ static int term(yyscan_t scanner, int type)
return PE_TERM;
}
+static int hw_term(yyscan_t scanner, int config)
+{
+ YYSTYPE *yylval = parse_events_get_lval(scanner);
+ char *text = parse_events_get_text(scanner);
+
+ yylval->hardware_term.str = strdup(text);
+ yylval->hardware_term.num = PERF_TYPE_HARDWARE + config;
+ return PE_TERM_HW;
+}
+
#define YY_USER_ACTION \
do { \
yylloc->last_column = yylloc->first_column; \
@@ -211,19 +186,25 @@ bpf_source [^,{}]+\.c[a-zA-Z0-9._]*
num_dec [0-9]+
num_hex 0x[a-fA-F0-9]+
num_raw_hex [a-fA-F0-9]+
-name [a-zA-Z_*?\[\]][a-zA-Z0-9_*?.\[\]!]*
+name [a-zA-Z_*?\[\]][a-zA-Z0-9_*?.\[\]!\-]*
name_tag [\'][a-zA-Z_*?\[\]][a-zA-Z0-9_*?\-,\.\[\]:=]*[\']
name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?.:]*
drv_cfg_term [a-zA-Z0-9_\.]+(=[a-zA-Z0-9_*?\.:]+)?
-/* If you add a modifier you need to update check_modifier() */
+/*
+ * If you add a modifier you need to update check_modifier().
+ * Also, the letters in modifier_event must not be in modifier_bp.
+ */
modifier_event [ukhpPGHSDIWeb]+
modifier_bp [rwx]{1,3}
+lc_type (L1-dcache|l1-d|l1d|L1-data|L1-icache|l1-i|l1i|L1-instruction|LLC|L2|dTLB|d-tlb|Data-TLB|iTLB|i-tlb|Instruction-TLB|branch|branches|bpu|btb|bpc|node)
+lc_op_result (load|loads|read|store|stores|write|prefetch|prefetches|speculative-read|speculative-load|refs|Reference|ops|access|misses|miss)
+digit [0-9]
+non_digit [^0-9]
%%
%{
struct parse_events_state *_parse_state = parse_events_get_extra(yyscanner);
-
{
int start_token = _parse_state->stoken;
@@ -303,10 +284,23 @@ percore { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_PERCORE); }
aux-output { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT); }
aux-sample-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE); }
metric-id { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_METRIC_ID); }
-r{num_raw_hex} { return raw(yyscanner); }
-r0x{num_raw_hex} { return raw(yyscanner); }
+cpu-cycles|cycles { return hw_term(yyscanner, PERF_COUNT_HW_CPU_CYCLES); }
+stalled-cycles-frontend|idle-cycles-frontend { return hw_term(yyscanner, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND); }
+stalled-cycles-backend|idle-cycles-backend { return hw_term(yyscanner, PERF_COUNT_HW_STALLED_CYCLES_BACKEND); }
+instructions { return hw_term(yyscanner, PERF_COUNT_HW_INSTRUCTIONS); }
+cache-references { return hw_term(yyscanner, PERF_COUNT_HW_CACHE_REFERENCES); }
+cache-misses { return hw_term(yyscanner, PERF_COUNT_HW_CACHE_MISSES); }
+branch-instructions|branches { return hw_term(yyscanner, PERF_COUNT_HW_BRANCH_INSTRUCTIONS); }
+branch-misses { return hw_term(yyscanner, PERF_COUNT_HW_BRANCH_MISSES); }
+bus-cycles { return hw_term(yyscanner, PERF_COUNT_HW_BUS_CYCLES); }
+ref-cycles { return hw_term(yyscanner, PERF_COUNT_HW_REF_CPU_CYCLES); }
+r{num_raw_hex} { return str(yyscanner, PE_RAW); }
+r0x{num_raw_hex} { return str(yyscanner, PE_RAW); }
, { return ','; }
"/" { BEGIN(INITIAL); return '/'; }
+{lc_type} { return lc_str(yyscanner, _parse_state); }
+{lc_type}-{lc_op_result} { return lc_str(yyscanner, _parse_state); }
+{lc_type}-{lc_op_result}-{lc_op_result} { return lc_str(yyscanner, _parse_state); }
{name_minus} { return str(yyscanner, PE_NAME); }
\[all\] { return PE_ARRAY_ALL; }
"[" { BEGIN(array); return '['; }
@@ -315,8 +309,20 @@ r0x{num_raw_hex} { return raw(yyscanner); }
<mem>{
{modifier_bp} { return str(yyscanner, PE_MODIFIER_BP); }
-: { return ':'; }
-"/" { return '/'; }
+ /*
+ * The colon before memory access modifiers can get mixed up with the
+ * colon before event modifiers. Fortunately none of the option letters
+ * are the same, so trailing context can be used disambiguate the two
+ * cases.
+ */
+":"/{modifier_bp} { return PE_BP_COLON; }
+ /*
+ * The slash before memory length can get mixed up with the slash before
+ * config terms. Fortunately config terms do not start with a numeric
+ * digit, so trailing context can be used disambiguate the two cases.
+ */
+"/"/{digit} { return PE_BP_SLASH; }
+"/"/{non_digit} { BEGIN(config); return '/'; }
{num_dec} { return value(yyscanner, 10); }
{num_hex} { return value(yyscanner, 16); }
/*
@@ -359,47 +365,20 @@ system_time { return tool(yyscanner, PERF_TOOL_SYSTEM_TIME); }
bpf-output { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUTPUT); }
cgroup-switches { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CGROUP_SWITCHES); }
- /*
- * We have to handle the kernel PMU event cycles-ct/cycles-t/mem-loads/mem-stores separately.
- * Because the prefix cycles is mixed up with cpu-cycles.
- * loads and stores are mixed up with cache event
- */
-cycles-ct |
-cycles-t |
-mem-loads |
-mem-loads-aux |
-mem-stores |
-topdown-[a-z-]+ |
-tx-capacity-[a-z-]+ |
-el-capacity-[a-z-]+ { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
-
-L1-dcache|l1-d|l1d|L1-data |
-L1-icache|l1-i|l1i|L1-instruction |
-LLC|L2 |
-dTLB|d-tlb|Data-TLB |
-iTLB|i-tlb|Instruction-TLB |
-branch|branches|bpu|btb|bpc |
-node { return str(yyscanner, PE_NAME_CACHE_TYPE); }
-
-load|loads|read |
-store|stores|write |
-prefetch|prefetches |
-speculative-read|speculative-load |
-refs|Reference|ops|access |
-misses|miss { return str(yyscanner, PE_NAME_CACHE_OP_RESULT); }
-
+{lc_type} { return str(yyscanner, PE_LEGACY_CACHE); }
+{lc_type}-{lc_op_result} { return str(yyscanner, PE_LEGACY_CACHE); }
+{lc_type}-{lc_op_result}-{lc_op_result} { return str(yyscanner, PE_LEGACY_CACHE); }
mem: { BEGIN(mem); return PE_PREFIX_MEM; }
-r{num_raw_hex} { return raw(yyscanner); }
+r{num_raw_hex} { return str(yyscanner, PE_RAW); }
{num_dec} { return value(yyscanner, 10); }
{num_hex} { return value(yyscanner, 16); }
{modifier_event} { return str(yyscanner, PE_MODIFIER_EVENT); }
{bpf_object} { if (!isbpf(yyscanner)) { USER_REJECT }; return str(yyscanner, PE_BPF_OBJECT); }
{bpf_source} { if (!isbpf(yyscanner)) { USER_REJECT }; return str(yyscanner, PE_BPF_SOURCE); }
-{name} { return pmu_str_check(yyscanner, _parse_state); }
+{name} { return str(yyscanner, PE_NAME); }
{name_tag} { return str(yyscanner, PE_NAME); }
"/" { BEGIN(config); return '/'; }
-- { return '-'; }
, { BEGIN(event); return ','; }
: { return ':'; }
"{" { BEGIN(event); return '{'; }
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index 4488443e506e..c590cf7f02a4 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -8,12 +8,14 @@
#define YYDEBUG 1
+#include <errno.h>
#include <fnmatch.h>
#include <stdio.h>
#include <linux/compiler.h>
#include <linux/types.h>
#include <linux/zalloc.h>
#include "pmu.h"
+#include "pmus.h"
#include "evsel.h"
#include "parse-events.h"
#include "parse-events-bison.h"
@@ -26,6 +28,13 @@ do { \
YYABORT; \
} while (0)
+#define PE_ABORT(val) \
+do { \
+ if (val == -ENOMEM) \
+ YYNOMEM; \
+ YYABORT; \
+} while (0)
+
static struct list_head* alloc_list(void)
{
struct list_head *list;
@@ -52,36 +61,36 @@ static void free_list_evsel(struct list_head* list_evsel)
%}
%token PE_START_EVENTS PE_START_TERMS
-%token PE_VALUE PE_VALUE_SYM_HW PE_VALUE_SYM_SW PE_RAW PE_TERM
+%token PE_VALUE PE_VALUE_SYM_HW PE_VALUE_SYM_SW PE_TERM
%token PE_VALUE_SYM_TOOL
%token PE_EVENT_NAME
-%token PE_NAME
+%token PE_RAW PE_NAME
%token PE_BPF_OBJECT PE_BPF_SOURCE
-%token PE_MODIFIER_EVENT PE_MODIFIER_BP
-%token PE_NAME_CACHE_TYPE PE_NAME_CACHE_OP_RESULT
+%token PE_MODIFIER_EVENT PE_MODIFIER_BP PE_BP_COLON PE_BP_SLASH
+%token PE_LEGACY_CACHE
%token PE_PREFIX_MEM PE_PREFIX_RAW PE_PREFIX_GROUP
%token PE_ERROR
-%token PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_PMU_EVENT_SUF2 PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE
+%token PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE
%token PE_ARRAY_ALL PE_ARRAY_RANGE
%token PE_DRV_CFG_TERM
+%token PE_TERM_HW
%type <num> PE_VALUE
%type <num> PE_VALUE_SYM_HW
%type <num> PE_VALUE_SYM_SW
%type <num> PE_VALUE_SYM_TOOL
-%type <num> PE_RAW
%type <num> PE_TERM
%type <num> value_sym
+%type <str> PE_RAW
%type <str> PE_NAME
%type <str> PE_BPF_OBJECT
%type <str> PE_BPF_SOURCE
-%type <str> PE_NAME_CACHE_TYPE
-%type <str> PE_NAME_CACHE_OP_RESULT
+%type <str> PE_LEGACY_CACHE
%type <str> PE_MODIFIER_EVENT
%type <str> PE_MODIFIER_BP
%type <str> PE_EVENT_NAME
-%type <str> PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_PMU_EVENT_SUF2 PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE
+%type <str> PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE
%type <str> PE_DRV_CFG_TERM
-%type <str> event_pmu_name
+%type <str> name_or_raw name_or_legacy
%destructor { free ($$); } <str>
%type <term> event_term
%destructor { parse_events_term__delete ($$); } <term>
@@ -112,6 +121,8 @@ static void free_list_evsel(struct list_head* list_evsel)
%type <array> array_term
%type <array> array_terms
%destructor { free ($$.ranges); } <array>
+%type <hardware_term> PE_TERM_HW
+%destructor { free ($$.str); } <hardware_term>
%union
{
@@ -125,6 +136,10 @@ static void free_list_evsel(struct list_head* list_evsel)
char *event;
} tracepoint_name;
struct parse_events_array array;
+ struct hardware_term {
+ char *str;
+ u64 num;
+ } hardware_term;
}
%%
@@ -267,61 +282,68 @@ event_def
event_def: event_pmu |
event_legacy_symbol |
event_legacy_cache sep_dc |
- event_legacy_mem |
+ event_legacy_mem sep_dc |
event_legacy_tracepoint sep_dc |
event_legacy_numeric sep_dc |
event_legacy_raw sep_dc |
event_bpf_file
-event_pmu_name:
-PE_NAME | PE_PMU_EVENT_PRE
-
event_pmu:
-event_pmu_name opt_pmu_config
+PE_NAME opt_pmu_config
{
struct parse_events_state *parse_state = _parse_state;
- struct parse_events_error *error = parse_state->error;
struct list_head *list = NULL, *orig_terms = NULL, *terms= NULL;
char *pattern = NULL;
-#define CLEANUP_YYABORT \
+#define CLEANUP \
do { \
parse_events_terms__delete($2); \
parse_events_terms__delete(orig_terms); \
free(list); \
free($1); \
free(pattern); \
- YYABORT; \
} while(0)
- if (parse_events_copy_term_list($2, &orig_terms))
- CLEANUP_YYABORT;
-
- if (error)
- error->idx = @1.first_column;
+ if (parse_events_copy_term_list($2, &orig_terms)) {
+ CLEANUP;
+ YYNOMEM;
+ }
list = alloc_list();
- if (!list)
- CLEANUP_YYABORT;
- if (parse_events_add_pmu(_parse_state, list, $1, $2, /*auto_merge_stats=*/false)) {
+ if (!list) {
+ CLEANUP;
+ YYNOMEM;
+ }
+ /* Attempt to add to list assuming $1 is a PMU name. */
+ if (parse_events_add_pmu(parse_state, list, $1, $2, /*auto_merge_stats=*/false)) {
struct perf_pmu *pmu = NULL;
int ok = 0;
- if (asprintf(&pattern, "%s*", $1) < 0)
- CLEANUP_YYABORT;
+ /* Failure to add, try wildcard expansion of $1 as a PMU name. */
+ if (asprintf(&pattern, "%s*", $1) < 0) {
+ CLEANUP;
+ YYNOMEM;
+ }
- while ((pmu = perf_pmu__scan(pmu)) != NULL) {
+ while ((pmu = perf_pmus__scan(pmu)) != NULL) {
char *name = pmu->name;
+ if (parse_events__filter_pmu(parse_state, pmu))
+ continue;
+
if (!strncmp(name, "uncore_", 7) &&
strncmp($1, "uncore_", 7))
name += 7;
if (!perf_pmu__match(pattern, name, $1) ||
!perf_pmu__match(pattern, pmu->alias_name, $1)) {
- if (parse_events_copy_term_list(orig_terms, &terms))
- CLEANUP_YYABORT;
- if (!parse_events_add_pmu(_parse_state, list, pmu->name, terms,
- /*auto_merge_stats=*/true)) {
+ bool auto_merge_stats = perf_pmu__auto_merge_stats(pmu);
+
+ if (parse_events_copy_term_list(orig_terms, &terms)) {
+ CLEANUP;
+ YYNOMEM;
+ }
+ if (!parse_events_add_pmu(parse_state, list, pmu->name, terms,
+ auto_merge_stats)) {
ok++;
parse_state->wild_card_pmus = true;
}
@@ -329,15 +351,29 @@ event_pmu_name opt_pmu_config
}
}
- if (!ok)
- CLEANUP_YYABORT;
+ if (!ok) {
+ /* Failure to add, assume $1 is an event name. */
+ zfree(&list);
+ ok = !parse_events_multi_pmu_add(parse_state, $1, $2, &list);
+ $2 = NULL;
+ }
+ if (!ok) {
+ struct parse_events_error *error = parse_state->error;
+ char *help;
+
+ if (asprintf(&help, "Unabled to find PMU or event on a PMU of '%s'", $1) < 0)
+ help = NULL;
+ parse_events_error__handle(error, @1.first_column,
+ strdup("Bad event or PMU"),
+ help);
+ CLEANUP;
+ YYABORT;
+ }
}
- parse_events_terms__delete($2);
- parse_events_terms__delete(orig_terms);
- free(pattern);
- free($1);
$$ = list;
-#undef CLEANUP_YYABORT
+ list = NULL;
+ CLEANUP;
+#undef CLEANUP
}
|
PE_KERNEL_PMU_EVENT sep_dc
@@ -352,41 +388,36 @@ PE_KERNEL_PMU_EVENT sep_dc
$$ = list;
}
|
-PE_KERNEL_PMU_EVENT opt_pmu_config
+PE_NAME sep_dc
{
struct list_head *list;
int err;
- /* frees $2 */
- err = parse_events_multi_pmu_add(_parse_state, $1, $2, &list);
- free($1);
- if (err < 0)
- YYABORT;
- $$ = list;
-}
-|
-PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF '-' PE_PMU_EVENT_SUF2 sep_dc
-{
- struct list_head *list;
- char pmu_name[128];
- snprintf(pmu_name, sizeof(pmu_name), "%s-%s-%s", $1, $3, $5);
+ err = parse_events_multi_pmu_add(_parse_state, $1, NULL, &list);
+ if (err < 0) {
+ struct parse_events_state *parse_state = _parse_state;
+ struct parse_events_error *error = parse_state->error;
+ char *help;
+
+ if (asprintf(&help, "Unabled to find PMU or event on a PMU of '%s'", $1) < 0)
+ help = NULL;
+ parse_events_error__handle(error, @1.first_column, strdup("Bad event name"), help);
+ free($1);
+ PE_ABORT(err);
+ }
free($1);
- free($3);
- free($5);
- if (parse_events_multi_pmu_add(_parse_state, pmu_name, NULL, &list) < 0)
- YYABORT;
$$ = list;
}
|
-PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF sep_dc
+PE_KERNEL_PMU_EVENT opt_pmu_config
{
struct list_head *list;
- char pmu_name[128];
+ int err;
- snprintf(pmu_name, sizeof(pmu_name), "%s-%s", $1, $3);
+ /* frees $2 */
+ err = parse_events_multi_pmu_add(_parse_state, $1, $2, &list);
free($1);
- free($3);
- if (parse_events_multi_pmu_add(_parse_state, pmu_name, NULL, &list) < 0)
+ if (err < 0)
YYABORT;
$$ = list;
}
@@ -441,14 +472,16 @@ value_sym '/' event_config '/'
int type = $1 >> 16;
int config = $1 & 255;
int err;
+ bool wildcard = (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE);
list = alloc_list();
- ABORT_ON(!list);
- err = parse_events_add_numeric(_parse_state, list, type, config, $3);
+ if (!list)
+ YYNOMEM;
+ err = parse_events_add_numeric(_parse_state, list, type, config, $3, wildcard);
parse_events_terms__delete($3);
if (err) {
free_list_evsel(list);
- YYABORT;
+ PE_ABORT(err);
}
$$ = list;
}
@@ -458,150 +491,128 @@ value_sym sep_slash_slash_dc
struct list_head *list;
int type = $1 >> 16;
int config = $1 & 255;
+ bool wildcard = (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE);
+ int err;
list = alloc_list();
- ABORT_ON(!list);
- ABORT_ON(parse_events_add_numeric(_parse_state, list, type, config, NULL));
+ if (!list)
+ YYNOMEM;
+ err = parse_events_add_numeric(_parse_state, list, type, config, /*head_config=*/NULL, wildcard);
+ if (err)
+ PE_ABORT(err);
$$ = list;
}
|
PE_VALUE_SYM_TOOL sep_slash_slash_dc
{
struct list_head *list;
+ int err;
list = alloc_list();
- ABORT_ON(!list);
- ABORT_ON(parse_events_add_tool(_parse_state, list, $1));
+ if (!list)
+ YYNOMEM;
+ err = parse_events_add_tool(_parse_state, list, $1);
+ if (err)
+ YYNOMEM;
$$ = list;
}
event_legacy_cache:
-PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT '-' PE_NAME_CACHE_OP_RESULT opt_event_config
+PE_LEGACY_CACHE opt_event_config
{
struct parse_events_state *parse_state = _parse_state;
- struct parse_events_error *error = parse_state->error;
struct list_head *list;
int err;
list = alloc_list();
- ABORT_ON(!list);
- err = parse_events_add_cache(list, &parse_state->idx, $1, $3, $5, error, $6,
- parse_state);
- parse_events_terms__delete($6);
- free($1);
- free($3);
- free($5);
- if (err) {
- free_list_evsel(list);
- YYABORT;
- }
- $$ = list;
-}
-|
-PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT opt_event_config
-{
- struct parse_events_state *parse_state = _parse_state;
- struct parse_events_error *error = parse_state->error;
- struct list_head *list;
- int err;
+ if (!list)
+ YYNOMEM;
- list = alloc_list();
- ABORT_ON(!list);
- err = parse_events_add_cache(list, &parse_state->idx, $1, $3, NULL, error, $4,
- parse_state);
- parse_events_terms__delete($4);
- free($1);
- free($3);
- if (err) {
- free_list_evsel(list);
- YYABORT;
- }
- $$ = list;
-}
-|
-PE_NAME_CACHE_TYPE opt_event_config
-{
- struct parse_events_state *parse_state = _parse_state;
- struct parse_events_error *error = parse_state->error;
- struct list_head *list;
- int err;
+ err = parse_events_add_cache(list, &parse_state->idx, $1, parse_state, $2);
- list = alloc_list();
- ABORT_ON(!list);
- err = parse_events_add_cache(list, &parse_state->idx, $1, NULL, NULL, error, $2,
- parse_state);
parse_events_terms__delete($2);
free($1);
if (err) {
free_list_evsel(list);
- YYABORT;
+ PE_ABORT(err);
}
$$ = list;
}
event_legacy_mem:
-PE_PREFIX_MEM PE_VALUE '/' PE_VALUE ':' PE_MODIFIER_BP sep_dc
+PE_PREFIX_MEM PE_VALUE PE_BP_SLASH PE_VALUE PE_BP_COLON PE_MODIFIER_BP opt_event_config
{
- struct parse_events_state *parse_state = _parse_state;
struct list_head *list;
int err;
list = alloc_list();
- ABORT_ON(!list);
- err = parse_events_add_breakpoint(list, &parse_state->idx,
- $2, $6, $4);
+ if (!list)
+ YYNOMEM;
+
+ err = parse_events_add_breakpoint(_parse_state, list,
+ $2, $6, $4, $7);
+ parse_events_terms__delete($7);
free($6);
if (err) {
free(list);
- YYABORT;
+ PE_ABORT(err);
}
$$ = list;
}
|
-PE_PREFIX_MEM PE_VALUE '/' PE_VALUE sep_dc
+PE_PREFIX_MEM PE_VALUE PE_BP_SLASH PE_VALUE opt_event_config
{
- struct parse_events_state *parse_state = _parse_state;
struct list_head *list;
+ int err;
list = alloc_list();
- ABORT_ON(!list);
- if (parse_events_add_breakpoint(list, &parse_state->idx,
- $2, NULL, $4)) {
+ if (!list)
+ YYNOMEM;
+
+ err = parse_events_add_breakpoint(_parse_state, list,
+ $2, NULL, $4, $5);
+ parse_events_terms__delete($5);
+ if (err) {
free(list);
- YYABORT;
+ PE_ABORT(err);
}
$$ = list;
}
|
-PE_PREFIX_MEM PE_VALUE ':' PE_MODIFIER_BP sep_dc
+PE_PREFIX_MEM PE_VALUE PE_BP_COLON PE_MODIFIER_BP opt_event_config
{
- struct parse_events_state *parse_state = _parse_state;
struct list_head *list;
int err;
list = alloc_list();
- ABORT_ON(!list);
- err = parse_events_add_breakpoint(list, &parse_state->idx,
- $2, $4, 0);
+ if (!list)
+ YYNOMEM;
+
+ err = parse_events_add_breakpoint(_parse_state, list,
+ $2, $4, 0, $5);
+ parse_events_terms__delete($5);
free($4);
if (err) {
free(list);
- YYABORT;
+ PE_ABORT(err);
}
$$ = list;
}
|
-PE_PREFIX_MEM PE_VALUE sep_dc
+PE_PREFIX_MEM PE_VALUE opt_event_config
{
- struct parse_events_state *parse_state = _parse_state;
struct list_head *list;
+ int err;
list = alloc_list();
- ABORT_ON(!list);
- if (parse_events_add_breakpoint(list, &parse_state->idx,
- $2, NULL, 0)) {
+ if (!list)
+ YYNOMEM;
+ err = parse_events_add_breakpoint(_parse_state, list,
+ $2, NULL, 0, $3);
+ parse_events_terms__delete($3);
+ if (err) {
free(list);
- YYABORT;
+ PE_ABORT(err);
}
$$ = list;
}
@@ -615,7 +626,8 @@ tracepoint_name opt_event_config
int err;
list = alloc_list();
- ABORT_ON(!list);
+ if (!list)
+ YYNOMEM;
if (error)
error->idx = @1.first_column;
@@ -627,23 +639,12 @@ tracepoint_name opt_event_config
free($1.event);
if (err) {
free(list);
- YYABORT;
+ PE_ABORT(err);
}
$$ = list;
}
tracepoint_name:
-PE_NAME '-' PE_NAME ':' PE_NAME
-{
- struct tracepoint_name tracepoint;
-
- ABORT_ON(asprintf(&tracepoint.sys, "%s-%s", $1, $3) < 0);
- tracepoint.event = $5;
- free($1);
- free($3);
- $$ = tracepoint;
-}
-|
PE_NAME ':' PE_NAME
{
struct tracepoint_name tracepoint = {$1, $3};
@@ -658,12 +659,14 @@ PE_VALUE ':' PE_VALUE opt_event_config
int err;
list = alloc_list();
- ABORT_ON(!list);
- err = parse_events_add_numeric(_parse_state, list, (u32)$1, $3, $4);
+ if (!list)
+ YYNOMEM;
+ err = parse_events_add_numeric(_parse_state, list, (u32)$1, $3, $4,
+ /*wildcard=*/false);
parse_events_terms__delete($4);
if (err) {
free(list);
- YYABORT;
+ PE_ABORT(err);
}
$$ = list;
}
@@ -673,14 +676,21 @@ PE_RAW opt_event_config
{
struct list_head *list;
int err;
+ u64 num;
list = alloc_list();
- ABORT_ON(!list);
- err = parse_events_add_numeric(_parse_state, list, PERF_TYPE_RAW, $1, $2);
+ if (!list)
+ YYNOMEM;
+ errno = 0;
+ num = strtoull($1 + 1, NULL, 16);
+ ABORT_ON(errno);
+ free($1);
+ err = parse_events_add_numeric(_parse_state, list, PERF_TYPE_RAW, num, $2,
+ /*wildcard=*/false);
parse_events_terms__delete($2);
if (err) {
free(list);
- YYABORT;
+ PE_ABORT(err);
}
$$ = list;
}
@@ -693,13 +703,14 @@ PE_BPF_OBJECT opt_event_config
int err;
list = alloc_list();
- ABORT_ON(!list);
+ if (!list)
+ YYNOMEM;
err = parse_events_load_bpf(parse_state, list, $1, false, $2);
parse_events_terms__delete($2);
free($1);
if (err) {
free(list);
- YYABORT;
+ PE_ABORT(err);
}
$$ = list;
}
@@ -710,12 +721,13 @@ PE_BPF_SOURCE opt_event_config
int err;
list = alloc_list();
- ABORT_ON(!list);
+ if (!list)
+ YYNOMEM;
err = parse_events_load_bpf(_parse_state, list, $1, true, $2);
parse_events_terms__delete($2);
if (err) {
free(list);
- YYABORT;
+ PE_ABORT(err);
}
$$ = list;
}
@@ -775,55 +787,80 @@ event_term
struct list_head *head = malloc(sizeof(*head));
struct parse_events_term *term = $1;
- ABORT_ON(!head);
+ if (!head)
+ YYNOMEM;
INIT_LIST_HEAD(head);
list_add_tail(&term->list, head);
$$ = head;
}
+name_or_raw: PE_RAW | PE_NAME | PE_LEGACY_CACHE
+
+name_or_legacy: PE_NAME | PE_LEGACY_CACHE
+
event_term:
PE_RAW
{
struct parse_events_term *term;
+ int err = parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_RAW,
+ strdup("raw"), $1, &@1, &@1);
- ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_CONFIG,
- NULL, $1, false, &@1, NULL));
+ if (err) {
+ free($1);
+ PE_ABORT(err);
+ }
$$ = term;
}
|
-PE_NAME '=' PE_NAME
+name_or_raw '=' name_or_legacy
{
struct parse_events_term *term;
+ int err = parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER, $1, $3, &@1, &@3);
- if (parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER,
- $1, $3, &@1, &@3)) {
+ if (err) {
free($1);
free($3);
- YYABORT;
+ PE_ABORT(err);
+ }
+ $$ = term;
+}
+|
+name_or_raw '=' PE_VALUE
+{
+ struct parse_events_term *term;
+ int err = parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
+ $1, $3, false, &@1, &@3);
+
+ if (err) {
+ free($1);
+ PE_ABORT(err);
}
$$ = term;
}
|
-PE_NAME '=' PE_VALUE
+name_or_raw '=' PE_TERM_HW
{
struct parse_events_term *term;
+ int err = parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER,
+ $1, $3.str, &@1, &@3);
- if (parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
- $1, $3, false, &@1, &@3)) {
+ if (err) {
free($1);
- YYABORT;
+ free($3.str);
+ PE_ABORT(err);
}
$$ = term;
}
|
-PE_NAME '=' PE_VALUE_SYM_HW
+PE_LEGACY_CACHE
{
struct parse_events_term *term;
- int config = $3 & 255;
+ int err = parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE,
+ $1, 1, true, &@1, NULL);
- if (parse_events_term__sym_hw(&term, $1, config)) {
+ if (err) {
free($1);
- YYABORT;
+ PE_ABORT(err);
}
$$ = term;
}
@@ -831,75 +868,110 @@ PE_NAME '=' PE_VALUE_SYM_HW
PE_NAME
{
struct parse_events_term *term;
+ int err = parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
+ $1, 1, true, &@1, NULL);
- if (parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
- $1, 1, true, &@1, NULL)) {
+ if (err) {
free($1);
- YYABORT;
+ PE_ABORT(err);
}
$$ = term;
}
|
-PE_VALUE_SYM_HW
+PE_TERM_HW
{
struct parse_events_term *term;
- int config = $1 & 255;
+ int err = parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_HARDWARE,
+ $1.str, $1.num & 255, false, &@1, NULL);
- ABORT_ON(parse_events_term__sym_hw(&term, NULL, config));
+ if (err) {
+ free($1.str);
+ PE_ABORT(err);
+ }
$$ = term;
}
|
-PE_TERM '=' PE_NAME
+PE_TERM '=' name_or_legacy
{
struct parse_events_term *term;
+ int err = parse_events_term__str(&term, (int)$1, NULL, $3, &@1, &@3);
- if (parse_events_term__str(&term, (int)$1, NULL, $3, &@1, &@3)) {
+ if (err) {
free($3);
- YYABORT;
+ PE_ABORT(err);
+ }
+ $$ = term;
+}
+|
+PE_TERM '=' PE_TERM_HW
+{
+ struct parse_events_term *term;
+ int err = parse_events_term__str(&term, (int)$1, NULL, $3.str, &@1, &@3);
+
+ if (err) {
+ free($3.str);
+ PE_ABORT(err);
}
$$ = term;
}
|
+PE_TERM '=' PE_TERM
+{
+ struct parse_events_term *term;
+ int err = parse_events_term__term(&term, (int)$1, (int)$3, &@1, &@3);
+
+ if (err)
+ PE_ABORT(err);
+
+ $$ = term;
+}
+|
PE_TERM '=' PE_VALUE
{
struct parse_events_term *term;
+ int err = parse_events_term__num(&term, (int)$1, NULL, $3, false, &@1, &@3);
+
+ if (err)
+ PE_ABORT(err);
- ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, $3, false, &@1, &@3));
$$ = term;
}
|
PE_TERM
{
struct parse_events_term *term;
+ int err = parse_events_term__num(&term, (int)$1, NULL, 1, true, &@1, NULL);
+
+ if (err)
+ PE_ABORT(err);
- ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, 1, true, &@1, NULL));
$$ = term;
}
|
-PE_NAME array '=' PE_NAME
+name_or_raw array '=' name_or_legacy
{
struct parse_events_term *term;
+ int err = parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER, $1, $4, &@1, &@4);
- if (parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER,
- $1, $4, &@1, &@4)) {
+ if (err) {
free($1);
free($4);
free($2.ranges);
- YYABORT;
+ PE_ABORT(err);
}
term->array = $2;
$$ = term;
}
|
-PE_NAME array '=' PE_VALUE
+name_or_raw array '=' PE_VALUE
{
struct parse_events_term *term;
+ int err = parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER, $1, $4, false, &@1, &@4);
- if (parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
- $1, $4, false, &@1, &@4)) {
+ if (err) {
free($1);
free($2.ranges);
- YYABORT;
+ PE_ABORT(err);
}
term->array = $2;
$$ = term;
@@ -909,13 +981,15 @@ PE_DRV_CFG_TERM
{
struct parse_events_term *term;
char *config = strdup($1);
+ int err;
- ABORT_ON(!config);
- if (parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_DRV_CFG,
- config, $1, &@1, NULL)) {
+ if (!config)
+ YYNOMEM;
+ err = parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_DRV_CFG, config, $1, &@1, NULL);
+ if (err) {
free($1);
free(config);
- YYABORT;
+ PE_ABORT(err);
}
$$ = term;
}
@@ -941,7 +1015,8 @@ array_terms ',' array_term
new_array.ranges = realloc($1.ranges,
sizeof(new_array.ranges[0]) *
new_array.nr_ranges);
- ABORT_ON(!new_array.ranges);
+ if (!new_array.ranges)
+ YYNOMEM;
memcpy(&new_array.ranges[$1.nr_ranges], $3.ranges,
$3.nr_ranges * sizeof(new_array.ranges[0]));
free($3.ranges);
@@ -957,7 +1032,8 @@ PE_VALUE
array.nr_ranges = 1;
array.ranges = malloc(sizeof(array.ranges[0]));
- ABORT_ON(!array.ranges);
+ if (!array.ranges)
+ YYNOMEM;
array.ranges[0].start = $1;
array.ranges[0].length = 1;
$$ = array;
@@ -970,7 +1046,8 @@ PE_VALUE PE_ARRAY_RANGE PE_VALUE
ABORT_ON($3 < $1);
array.nr_ranges = 1;
array.ranges = malloc(sizeof(array.ranges[0]));
- ABORT_ON(!array.ranges);
+ if (!array.ranges)
+ YYNOMEM;
array.ranges[0].start = $1;
array.ranges[0].length = $3 - $1 + 1;
$$ = array;
diff --git a/tools/perf/util/path.c b/tools/perf/util/path.c
index ce80b79be103..00adf872bf00 100644
--- a/tools/perf/util/path.c
+++ b/tools/perf/util/path.c
@@ -1,16 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
-/*
- * I'm tired of doing "vsnprintf()" etc just to open a
- * file, so here's a "return static buffer with printf"
- * interface for paths.
- *
- * It's obviously not thread-safe. Sue me. But it's quite
- * useful for doing things like
- *
- * f = open(mkpath("%s/%s.perf", base, name), O_RDONLY);
- *
- * which is what it's designed for.
- */
#include "path.h"
#include "cache.h"
#include <linux/kernel.h>
@@ -22,18 +10,6 @@
#include <dirent.h>
#include <unistd.h>
-static char bad_path[] = "/bad-path/";
-/*
- * One hack:
- */
-static char *get_pathname(void)
-{
- static char pathname_array[4][PATH_MAX];
- static int idx;
-
- return pathname_array[3 & ++idx];
-}
-
static char *cleanup_path(char *path)
{
/* Clean it up */
@@ -45,18 +21,17 @@ static char *cleanup_path(char *path)
return path;
}
-char *mkpath(const char *fmt, ...)
+char *mkpath(char *path_buf, size_t sz, const char *fmt, ...)
{
va_list args;
unsigned len;
- char *pathname = get_pathname();
va_start(args, fmt);
- len = vsnprintf(pathname, PATH_MAX, fmt, args);
+ len = vsnprintf(path_buf, sz, fmt, args);
va_end(args);
- if (len >= PATH_MAX)
- return bad_path;
- return cleanup_path(pathname);
+ if (len >= sz)
+ strncpy(path_buf, "/bad-path/", sz);
+ return cleanup_path(path_buf);
}
int path__join(char *bf, size_t size, const char *path1, const char *path2)
diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c
index 7e5e7b30510d..2247991451f3 100644
--- a/tools/perf/util/perf_event_attr_fprintf.c
+++ b/tools/perf/util/perf_event_attr_fprintf.c
@@ -1,11 +1,13 @@
// SPDX-License-Identifier: GPL-2.0
#include <inttypes.h>
#include <stdio.h>
+#include <stdlib.h>
#include <stdbool.h>
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/perf_event.h>
#include "util/evsel_fprintf.h"
+#include "trace-event.h"
struct bit_names {
int bit;
@@ -71,6 +73,170 @@ static void __p_read_format(char *buf, size_t size, u64 value)
__p_bits(buf, size, value, bits);
}
+#define ENUM_ID_TO_STR_CASE(x) case x: return (#x);
+static const char *stringify_perf_type_id(u64 value)
+{
+ switch (value) {
+ ENUM_ID_TO_STR_CASE(PERF_TYPE_HARDWARE)
+ ENUM_ID_TO_STR_CASE(PERF_TYPE_SOFTWARE)
+ ENUM_ID_TO_STR_CASE(PERF_TYPE_TRACEPOINT)
+ ENUM_ID_TO_STR_CASE(PERF_TYPE_HW_CACHE)
+ ENUM_ID_TO_STR_CASE(PERF_TYPE_RAW)
+ ENUM_ID_TO_STR_CASE(PERF_TYPE_BREAKPOINT)
+ default:
+ return NULL;
+ }
+}
+
+static const char *stringify_perf_hw_id(u64 value)
+{
+ switch (value) {
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CPU_CYCLES)
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_INSTRUCTIONS)
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CACHE_REFERENCES)
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CACHE_MISSES)
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_BRANCH_MISSES)
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_BUS_CYCLES)
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_STALLED_CYCLES_FRONTEND)
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_STALLED_CYCLES_BACKEND)
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_REF_CPU_CYCLES)
+ default:
+ return NULL;
+ }
+}
+
+static const char *stringify_perf_hw_cache_id(u64 value)
+{
+ switch (value) {
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CACHE_L1D)
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CACHE_L1I)
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CACHE_LL)
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CACHE_DTLB)
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CACHE_ITLB)
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CACHE_BPU)
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CACHE_NODE)
+ default:
+ return NULL;
+ }
+}
+
+static const char *stringify_perf_hw_cache_op_id(u64 value)
+{
+ switch (value) {
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CACHE_OP_READ)
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CACHE_OP_WRITE)
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CACHE_OP_PREFETCH)
+ default:
+ return NULL;
+ }
+}
+
+static const char *stringify_perf_hw_cache_op_result_id(u64 value)
+{
+ switch (value) {
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CACHE_RESULT_ACCESS)
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CACHE_RESULT_MISS)
+ default:
+ return NULL;
+ }
+}
+
+static const char *stringify_perf_sw_id(u64 value)
+{
+ switch (value) {
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_SW_CPU_CLOCK)
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_SW_TASK_CLOCK)
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_SW_PAGE_FAULTS)
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_SW_CONTEXT_SWITCHES)
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_SW_CPU_MIGRATIONS)
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_SW_PAGE_FAULTS_MIN)
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_SW_PAGE_FAULTS_MAJ)
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_SW_ALIGNMENT_FAULTS)
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_SW_EMULATION_FAULTS)
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_SW_DUMMY)
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_SW_BPF_OUTPUT)
+ ENUM_ID_TO_STR_CASE(PERF_COUNT_SW_CGROUP_SWITCHES)
+ default:
+ return NULL;
+ }
+}
+#undef ENUM_ID_TO_STR_CASE
+
+#define PRINT_ID(_s, _f) \
+do { \
+ const char *__s = _s; \
+ if (__s == NULL) \
+ snprintf(buf, size, _f, value); \
+ else \
+ snprintf(buf, size, _f" (%s)", value, __s); \
+} while (0)
+#define print_id_unsigned(_s) PRINT_ID(_s, "%"PRIu64)
+#define print_id_hex(_s) PRINT_ID(_s, "%#"PRIx64)
+
+static void __p_type_id(char *buf, size_t size, u64 value)
+{
+ print_id_unsigned(stringify_perf_type_id(value));
+}
+
+static void __p_config_hw_id(char *buf, size_t size, u64 value)
+{
+ print_id_hex(stringify_perf_hw_id(value));
+}
+
+static void __p_config_sw_id(char *buf, size_t size, u64 value)
+{
+ print_id_hex(stringify_perf_sw_id(value));
+}
+
+static void __p_config_hw_cache_id(char *buf, size_t size, u64 value)
+{
+ const char *hw_cache_str = stringify_perf_hw_cache_id(value & 0xff);
+ const char *hw_cache_op_str =
+ stringify_perf_hw_cache_op_id((value & 0xff00) >> 8);
+ const char *hw_cache_op_result_str =
+ stringify_perf_hw_cache_op_result_id((value & 0xff0000) >> 16);
+
+ if (hw_cache_str == NULL || hw_cache_op_str == NULL ||
+ hw_cache_op_result_str == NULL) {
+ snprintf(buf, size, "%#"PRIx64, value);
+ } else {
+ snprintf(buf, size, "%#"PRIx64" (%s | %s | %s)", value,
+ hw_cache_op_result_str, hw_cache_op_str, hw_cache_str);
+ }
+}
+
+#ifdef HAVE_LIBTRACEEVENT
+static void __p_config_tracepoint_id(char *buf, size_t size, u64 value)
+{
+ char *str = tracepoint_id_to_name(value);
+
+ print_id_hex(str);
+ free(str);
+}
+#endif
+
+static void __p_config_id(char *buf, size_t size, u32 type, u64 value)
+{
+ switch (type) {
+ case PERF_TYPE_HARDWARE:
+ return __p_config_hw_id(buf, size, value);
+ case PERF_TYPE_SOFTWARE:
+ return __p_config_sw_id(buf, size, value);
+ case PERF_TYPE_HW_CACHE:
+ return __p_config_hw_cache_id(buf, size, value);
+ case PERF_TYPE_TRACEPOINT:
+#ifdef HAVE_LIBTRACEEVENT
+ return __p_config_tracepoint_id(buf, size, value);
+#endif
+ case PERF_TYPE_RAW:
+ case PERF_TYPE_BREAKPOINT:
+ default:
+ snprintf(buf, size, "%#"PRIx64, value);
+ return;
+ }
+}
+
#define BUF_SIZE 1024
#define p_hex(val) snprintf(buf, BUF_SIZE, "%#"PRIx64, (uint64_t)(val))
@@ -79,16 +245,18 @@ static void __p_read_format(char *buf, size_t size, u64 value)
#define p_sample_type(val) __p_sample_type(buf, BUF_SIZE, val)
#define p_branch_sample_type(val) __p_branch_sample_type(buf, BUF_SIZE, val)
#define p_read_format(val) __p_read_format(buf, BUF_SIZE, val)
+#define p_type_id(val) __p_type_id(buf, BUF_SIZE, val)
+#define p_config_id(val) __p_config_id(buf, BUF_SIZE, attr->type, val)
-#define PRINT_ATTRn(_n, _f, _p) \
+#define PRINT_ATTRn(_n, _f, _p, _a) \
do { \
- if (attr->_f) { \
+ if (_a || attr->_f) { \
_p(attr->_f); \
ret += attr__fprintf(fp, _n, buf, priv);\
} \
} while (0)
-#define PRINT_ATTRf(_f, _p) PRINT_ATTRn(#_f, _f, _p)
+#define PRINT_ATTRf(_f, _p) PRINT_ATTRn(#_f, _f, _p, false)
int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
attr__fprintf_f attr__fprintf, void *priv)
@@ -96,10 +264,10 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
char buf[BUF_SIZE];
int ret = 0;
- PRINT_ATTRf(type, p_unsigned);
+ PRINT_ATTRn("type", type, p_type_id, true);
PRINT_ATTRf(size, p_unsigned);
- PRINT_ATTRf(config, p_hex);
- PRINT_ATTRn("{ sample_period, sample_freq }", sample_period, p_unsigned);
+ PRINT_ATTRn("config", config, p_config_id, true);
+ PRINT_ATTRn("{ sample_period, sample_freq }", sample_period, p_unsigned, false);
PRINT_ATTRf(sample_type, p_sample_type);
PRINT_ATTRf(read_format, p_read_format);
@@ -141,10 +309,10 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
PRINT_ATTRf(remove_on_exec, p_unsigned);
PRINT_ATTRf(sigtrap, p_unsigned);
- PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned);
+ PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned, false);
PRINT_ATTRf(bp_type, p_unsigned);
- PRINT_ATTRn("{ bp_addr, config1 }", bp_addr, p_hex);
- PRINT_ATTRn("{ bp_len, config2 }", bp_len, p_hex);
+ PRINT_ATTRn("{ bp_addr, config1 }", bp_addr, p_hex, false);
+ PRINT_ATTRn("{ bp_len, config2 }", bp_len, p_hex, false);
PRINT_ATTRf(branch_sample_type, p_branch_sample_type);
PRINT_ATTRf(sample_regs_user, p_hex);
PRINT_ATTRf(sample_stack_user, p_unsigned);
diff --git a/tools/perf/util/pfm.c b/tools/perf/util/pfm.c
index 6c11914c179f..862e4a689868 100644
--- a/tools/perf/util/pfm.c
+++ b/tools/perf/util/pfm.c
@@ -10,9 +10,10 @@
#include "util/evlist.h"
#include "util/evsel.h"
#include "util/parse-events.h"
-#include "util/pmu.h"
+#include "util/pmus.h"
#include "util/pfm.h"
#include "util/strbuf.h"
+#include "util/thread_map.h"
#include <string.h>
#include <linux/kernel.h>
@@ -49,7 +50,7 @@ int parse_libpfm_events_option(const struct option *opt, const char *str,
/*
* force loading of the PMU list
*/
- perf_pmu__scan(NULL);
+ perf_pmus__scan(NULL);
for (q = p; strsep(&p, ",{}"); q = p) {
sep = p ? str + (p - p_orig - 1) : "";
@@ -86,7 +87,7 @@ int parse_libpfm_events_option(const struct option *opt, const char *str,
goto error;
}
- pmu = perf_pmu__find_by_type((unsigned int)attr.type);
+ pmu = perf_pmus__find_by_type((unsigned int)attr.type);
evsel = parse_events__add_event(evlist->core.nr_entries,
&attr, q, /*metric_id=*/NULL,
pmu);
@@ -123,6 +124,36 @@ error:
return -1;
}
+static bool is_libpfm_event_supported(const char *name, struct perf_cpu_map *cpus,
+ struct perf_thread_map *threads)
+{
+ struct perf_pmu *pmu;
+ struct evsel *evsel;
+ struct perf_event_attr attr = {};
+ bool result = true;
+ int ret;
+
+ ret = pfm_get_perf_event_encoding(name, PFM_PLM0|PFM_PLM3,
+ &attr, NULL, NULL);
+ if (ret != PFM_SUCCESS)
+ return false;
+
+ pmu = perf_pmus__find_by_type((unsigned int)attr.type);
+ evsel = parse_events__add_event(0, &attr, name, /*metric_id=*/NULL, pmu);
+ if (evsel == NULL)
+ return false;
+
+ evsel->is_libpfm_event = true;
+
+ if (evsel__open(evsel, cpus, threads) < 0)
+ result = false;
+
+ evsel__close(evsel);
+ evsel__delete(evsel);
+
+ return result;
+}
+
static const char *srcs[PFM_ATTR_CTRL_MAX] = {
[PFM_ATTR_CTRL_UNKNOWN] = "???",
[PFM_ATTR_CTRL_PMU] = "PMU",
@@ -146,6 +177,8 @@ print_libpfm_event(const struct print_callbacks *print_cb, void *print_state,
{
int j, ret;
char topic[80], name[80];
+ struct perf_cpu_map *cpus = perf_cpu_map__empty_new(1);
+ struct perf_thread_map *threads = thread_map__new_by_tid(0);
strbuf_setlen(buf, 0);
snprintf(topic, sizeof(topic), "pfm %s", pinfo->name);
@@ -185,14 +218,15 @@ print_libpfm_event(const struct print_callbacks *print_cb, void *print_state,
ainfo.name, ainfo.desc);
}
}
- print_cb->print_event(print_state,
- pinfo->name,
- topic,
- name, info->equiv,
- /*scale_unit=*/NULL,
- /*deprecated=*/NULL, "PFM event",
- info->desc, /*long_desc=*/NULL,
- /*encoding_desc=*/buf->buf);
+
+ if (is_libpfm_event_supported(name, cpus, threads)) {
+ print_cb->print_event(print_state, pinfo->name, topic,
+ name, info->equiv,
+ /*scale_unit=*/NULL,
+ /*deprecated=*/NULL, "PFM event",
+ info->desc, /*long_desc=*/NULL,
+ /*encoding_desc=*/buf->buf);
+ }
pfm_for_each_event_attr(j, info) {
pfm_event_attr_info_t ainfo;
@@ -215,6 +249,10 @@ print_libpfm_event(const struct print_callbacks *print_cb, void *print_state,
print_attr_flags(buf, &ainfo);
snprintf(name, sizeof(name), "%s::%s:%s",
pinfo->name, info->name, ainfo.name);
+
+ if (!is_libpfm_event_supported(name, cpus, threads))
+ continue;
+
print_cb->print_event(print_state,
pinfo->name,
topic,
@@ -225,6 +263,9 @@ print_libpfm_event(const struct print_callbacks *print_cb, void *print_state,
/*encoding_desc=*/buf->buf);
}
}
+
+ perf_cpu_map__put(cpus);
+ perf_thread_map__put(threads);
}
void print_libpfm_events(const struct print_callbacks *print_cb, void *print_state)
diff --git a/tools/perf/util/pmu-hybrid.c b/tools/perf/util/pmu-hybrid.c
deleted file mode 100644
index 38628805a952..000000000000
--- a/tools/perf/util/pmu-hybrid.c
+++ /dev/null
@@ -1,72 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/list.h>
-#include <linux/compiler.h>
-#include <linux/string.h>
-#include <linux/zalloc.h>
-#include <sys/types.h>
-#include <errno.h>
-#include <fcntl.h>
-#include <sys/stat.h>
-#include <unistd.h>
-#include <stdio.h>
-#include <stdbool.h>
-#include <stdarg.h>
-#include <locale.h>
-#include <api/fs/fs.h>
-#include "fncache.h"
-#include "pmu-hybrid.h"
-
-LIST_HEAD(perf_pmu__hybrid_pmus);
-
-bool perf_pmu__hybrid_mounted(const char *name)
-{
- int cpu;
- char pmu_name[PATH_MAX];
- struct perf_pmu pmu = {.name = pmu_name};
-
- if (strncmp(name, "cpu_", 4))
- return false;
-
- strlcpy(pmu_name, name, sizeof(pmu_name));
- return perf_pmu__scan_file(&pmu, "cpus", "%u", &cpu) > 0;
-}
-
-struct perf_pmu *perf_pmu__find_hybrid_pmu(const char *name)
-{
- struct perf_pmu *pmu;
-
- if (!name)
- return NULL;
-
- perf_pmu__for_each_hybrid_pmu(pmu) {
- if (!strcmp(name, pmu->name))
- return pmu;
- }
-
- return NULL;
-}
-
-bool perf_pmu__is_hybrid(const char *name)
-{
- return perf_pmu__find_hybrid_pmu(name) != NULL;
-}
-
-char *perf_pmu__hybrid_type_to_pmu(const char *type)
-{
- char *pmu_name = NULL;
-
- if (asprintf(&pmu_name, "cpu_%s", type) < 0)
- return NULL;
-
- if (perf_pmu__is_hybrid(pmu_name))
- return pmu_name;
-
- /*
- * pmu may be not scanned, check the sysfs.
- */
- if (perf_pmu__hybrid_mounted(pmu_name))
- return pmu_name;
-
- free(pmu_name);
- return NULL;
-}
diff --git a/tools/perf/util/pmu-hybrid.h b/tools/perf/util/pmu-hybrid.h
deleted file mode 100644
index 2b186c26a43e..000000000000
--- a/tools/perf/util/pmu-hybrid.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __PMU_HYBRID_H
-#define __PMU_HYBRID_H
-
-#include <linux/perf_event.h>
-#include <linux/compiler.h>
-#include <linux/list.h>
-#include <stdbool.h>
-#include "pmu.h"
-
-extern struct list_head perf_pmu__hybrid_pmus;
-
-#define perf_pmu__for_each_hybrid_pmu(pmu) \
- list_for_each_entry(pmu, &perf_pmu__hybrid_pmus, hybrid_list)
-
-bool perf_pmu__hybrid_mounted(const char *name);
-
-struct perf_pmu *perf_pmu__find_hybrid_pmu(const char *name);
-bool perf_pmu__is_hybrid(const char *name);
-char *perf_pmu__hybrid_type_to_pmu(const char *type);
-
-static inline int perf_pmu__hybrid_pmu_num(void)
-{
- struct perf_pmu *pmu;
- int num = 0;
-
- perf_pmu__for_each_hybrid_pmu(pmu)
- num++;
-
- return num;
-}
-
-#endif /* __PMU_HYBRID_H */
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index ad209c88a124..d5406effc169 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -4,35 +4,29 @@
#include <linux/string.h>
#include <linux/zalloc.h>
#include <linux/ctype.h>
-#include <subcmd/pager.h>
#include <sys/types.h>
-#include <errno.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <unistd.h>
#include <stdio.h>
#include <stdbool.h>
-#include <stdarg.h>
#include <dirent.h>
#include <api/fs/fs.h>
#include <locale.h>
-#include <regex.h>
-#include <perf/cpumap.h>
#include <fnmatch.h>
#include <math.h>
#include "debug.h"
#include "evsel.h"
#include "pmu.h"
#include "pmus.h"
-#include "pmu-bison.h"
-#include "pmu-flex.h"
+#include <util/pmu-bison.h>
+#include <util/pmu-flex.h>
#include "parse-events.h"
#include "print-events.h"
#include "header.h"
#include "string2.h"
#include "strbuf.h"
#include "fncache.h"
-#include "pmu-hybrid.h"
#include "util/evsel_config.h"
struct perf_pmu perf_pmu__fake;
@@ -60,10 +54,6 @@ struct perf_pmu_format {
struct list_head list;
};
-static bool hybrid_scanned;
-
-static struct perf_pmu *perf_pmu__find2(int dirfd, const char *name);
-
/*
* Parse & process all the sysfs attributes located under
* the directory specified in 'dir' parameter.
@@ -557,36 +547,11 @@ static int pmu_alias_terms(struct perf_pmu_alias *alias,
return 0;
}
-/* Add all pmus in sysfs to pmu list: */
-static void pmu_read_sysfs(void)
-{
- int fd;
- DIR *dir;
- struct dirent *dent;
-
- fd = perf_pmu__event_source_devices_fd();
- if (fd < 0)
- return;
-
- dir = fdopendir(fd);
- if (!dir)
- return;
-
- while ((dent = readdir(dir))) {
- if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, ".."))
- continue;
- /* add to static LIST_HEAD(pmus): */
- perf_pmu__find2(fd, dent->d_name);
- }
-
- closedir(dir);
-}
-
/*
* Uncore PMUs have a "cpumask" file under sysfs. CPU PMUs (e.g. on arm/arm64)
* may have a "cpus" file.
*/
-static struct perf_cpu_map *pmu_cpumask(int dirfd, const char *name)
+static struct perf_cpu_map *pmu_cpumask(int dirfd, const char *name, bool is_core)
{
struct perf_cpu_map *cpus;
const char *templates[] = {
@@ -610,16 +575,14 @@ static struct perf_cpu_map *pmu_cpumask(int dirfd, const char *name)
return cpus;
}
- return NULL;
+ /* Nothing found, for core PMUs assume this means all CPUs. */
+ return is_core ? perf_cpu_map__get(cpu_map__online()) : NULL;
}
static bool pmu_is_uncore(int dirfd, const char *name)
{
int fd;
- if (perf_pmu__hybrid_mounted(name))
- return false;
-
fd = perf_pmu__pathname_fd(dirfd, name, "cpumask", O_PATH);
if (fd < 0)
return false;
@@ -643,12 +606,14 @@ static char *pmu_id(const char *name)
return str;
}
-/*
- * PMU CORE devices have different name other than cpu in sysfs on some
- * platforms.
- * Looking for possible sysfs files to identify the arm core device.
+/**
+ * is_sysfs_pmu_core() - PMU CORE devices have different name other than cpu in
+ * sysfs on some platforms like ARM or Intel hybrid. Looking for
+ * possible the cpus file in sysfs files to identify whether this is a
+ * core device.
+ * @name: The PMU name such as "cpu_atom".
*/
-static int is_arm_pmu_core(const char *name)
+static int is_sysfs_pmu_core(const char *name)
{
char path[PATH_MAX];
@@ -777,9 +742,11 @@ out:
}
struct pmu_add_cpu_aliases_map_data {
+ /* List being added to. */
struct list_head *head;
- const char *name;
- const char *cpu_name;
+ /* If a pmu_event lacks a given PMU the default used. */
+ char *default_pmu_name;
+ /* The PMU that we're searching for events for. */
struct perf_pmu *pmu;
};
@@ -788,37 +755,32 @@ static int pmu_add_cpu_aliases_map_callback(const struct pmu_event *pe,
void *vdata)
{
struct pmu_add_cpu_aliases_map_data *data = vdata;
- const char *pname = pe->pmu ? pe->pmu : data->cpu_name;
-
- if (data->pmu->is_uncore && pmu_uncore_alias_match(pname, data->name))
- goto new_alias;
-
- if (strcmp(pname, data->name))
- return 0;
+ const char *pname = pe->pmu ?: data->default_pmu_name;
-new_alias:
- /* need type casts to override 'const' */
- __perf_pmu__new_alias(data->head, -1, (char *)pe->name, (char *)pe->desc,
- (char *)pe->event, pe);
+ if (!strcmp(pname, data->pmu->name) ||
+ (data->pmu->is_uncore && pmu_uncore_alias_match(pname, data->pmu->name))) {
+ /* need type casts to override 'const' */
+ __perf_pmu__new_alias(data->head, -1, (char *)pe->name, (char *)pe->desc,
+ (char *)pe->event, pe);
+ }
return 0;
}
/*
- * From the pmu_events_map, find the table of PMU events that corresponds
- * to the current running CPU. Then, add all PMU events from that table
- * as aliases.
+ * From the pmu_events_table, find the events that correspond to the given
+ * PMU and add them to the list 'head'.
*/
void pmu_add_cpu_aliases_table(struct list_head *head, struct perf_pmu *pmu,
- const struct pmu_events_table *table)
+ const struct pmu_events_table *table)
{
struct pmu_add_cpu_aliases_map_data data = {
.head = head,
- .name = pmu->name,
- .cpu_name = is_arm_pmu_core(pmu->name) ? pmu->name : "cpu",
+ .default_pmu_name = perf_pmus__default_pmu_name(),
.pmu = pmu,
};
pmu_events_table_for_each_event(table, pmu_add_cpu_aliases_map_callback, &data);
+ free(data.default_pmu_name);
}
static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu)
@@ -898,23 +860,16 @@ static int pmu_max_precise(int dirfd, struct perf_pmu *pmu)
return max_precise;
}
-static struct perf_pmu *pmu_lookup(int dirfd, const char *lookup_name)
+struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char *lookup_name)
{
struct perf_pmu *pmu;
LIST_HEAD(format);
LIST_HEAD(aliases);
__u32 type;
char *name = pmu_find_real_name(lookup_name);
- bool is_hybrid = perf_pmu__hybrid_mounted(name);
char *alias_name;
/*
- * Check pmu name for hybrid and the pmu may be invalid in sysfs
- */
- if (!strncmp(name, "cpu_", 4) && !is_hybrid)
- return NULL;
-
- /*
* The pmu data we store & need consists of the pmu
* type value and format definitions. Load both right
* now.
@@ -932,9 +887,9 @@ static struct perf_pmu *pmu_lookup(int dirfd, const char *lookup_name)
if (!pmu)
return NULL;
- pmu->cpus = pmu_cpumask(dirfd, name);
+ pmu->is_core = is_pmu_core(name);
+ pmu->cpus = pmu_cpumask(dirfd, name, pmu->is_core);
pmu->name = strdup(name);
-
if (!pmu->name)
goto err;
@@ -962,12 +917,7 @@ static struct perf_pmu *pmu_lookup(int dirfd, const char *lookup_name)
INIT_LIST_HEAD(&pmu->caps);
list_splice(&format, &pmu->format);
list_splice(&aliases, &pmu->aliases);
- list_add_tail(&pmu->list, &pmus);
-
- if (is_hybrid)
- list_add_tail(&pmu->hybrid_list, &perf_pmu__hybrid_pmus);
- else
- INIT_LIST_HEAD(&pmu->hybrid_list);
+ list_add_tail(&pmu->list, pmus);
pmu->default_config = perf_pmu__get_default_config(pmu);
@@ -978,10 +928,40 @@ err:
return NULL;
}
+/* Creates the PMU when sysfs scanning fails. */
+struct perf_pmu *perf_pmu__create_placeholder_core_pmu(struct list_head *core_pmus)
+{
+ struct perf_pmu *pmu = zalloc(sizeof(*pmu));
+
+ if (!pmu)
+ return NULL;
+
+ pmu->name = strdup("cpu");
+ if (!pmu->name) {
+ free(pmu);
+ return NULL;
+ }
+
+ pmu->is_core = true;
+ pmu->type = PERF_TYPE_RAW;
+ pmu->cpus = cpu_map__online();
+
+ INIT_LIST_HEAD(&pmu->format);
+ INIT_LIST_HEAD(&pmu->aliases);
+ INIT_LIST_HEAD(&pmu->caps);
+ list_add_tail(&pmu->list, core_pmus);
+ return pmu;
+}
+
void perf_pmu__warn_invalid_formats(struct perf_pmu *pmu)
{
struct perf_pmu_format *format;
+ if (pmu->formats_checked)
+ return;
+
+ pmu->formats_checked = true;
+
/* fake pmu doesn't have format list */
if (pmu == &perf_pmu__fake)
return;
@@ -995,61 +975,6 @@ void perf_pmu__warn_invalid_formats(struct perf_pmu *pmu)
}
}
-static struct perf_pmu *pmu_find(const char *name)
-{
- struct perf_pmu *pmu;
-
- list_for_each_entry(pmu, &pmus, list) {
- if (!strcmp(pmu->name, name) ||
- (pmu->alias_name && !strcmp(pmu->alias_name, name)))
- return pmu;
- }
-
- return NULL;
-}
-
-struct perf_pmu *perf_pmu__find_by_type(unsigned int type)
-{
- struct perf_pmu *pmu;
-
- list_for_each_entry(pmu, &pmus, list)
- if (pmu->type == type)
- return pmu;
-
- return NULL;
-}
-
-struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu)
-{
- /*
- * pmu iterator: If pmu is NULL, we start at the begin,
- * otherwise return the next pmu. Returns NULL on end.
- */
- if (!pmu) {
- pmu_read_sysfs();
- pmu = list_prepare_entry(pmu, &pmus, list);
- }
- list_for_each_entry_continue(pmu, &pmus, list)
- return pmu;
- return NULL;
-}
-
-struct perf_pmu *evsel__find_pmu(const struct evsel *evsel)
-{
- struct perf_pmu *pmu = NULL;
-
- if (evsel->pmu)
- return evsel->pmu;
-
- while ((pmu = perf_pmu__scan(pmu)) != NULL) {
- if (pmu->type == evsel->core.attr.type)
- break;
- }
-
- ((struct evsel *)evsel)->pmu = pmu;
- return pmu;
-}
-
bool evsel__is_aux_event(const struct evsel *evsel)
{
struct perf_pmu *pmu = evsel__find_pmu(evsel);
@@ -1086,43 +1011,6 @@ void evsel__set_config_if_unset(struct perf_pmu *pmu, struct evsel *evsel,
evsel->core.attr.config |= field_prep(bits, val);
}
-struct perf_pmu *perf_pmu__find(const char *name)
-{
- struct perf_pmu *pmu;
- int dirfd;
-
- /*
- * Once PMU is loaded it stays in the list,
- * so we keep us from multiple reading/parsing
- * the pmu format definitions.
- */
- pmu = pmu_find(name);
- if (pmu)
- return pmu;
-
- dirfd = perf_pmu__event_source_devices_fd();
- pmu = pmu_lookup(dirfd, name);
- close(dirfd);
-
- return pmu;
-}
-
-static struct perf_pmu *perf_pmu__find2(int dirfd, const char *name)
-{
- struct perf_pmu *pmu;
-
- /*
- * Once PMU is loaded it stays in the list,
- * so we keep us from multiple reading/parsing
- * the pmu format definitions.
- */
- pmu = pmu_find(name);
- if (pmu)
- return pmu;
-
- return pmu_lookup(dirfd, name);
-}
-
static struct perf_pmu_format *
pmu_find_format(struct list_head *formats, const char *name)
{
@@ -1398,7 +1286,6 @@ int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr,
{
bool zero = !!pmu->default_config;
- attr->type = pmu->type;
return perf_pmu__config_terms(pmu->name, &pmu->format, attr,
head_terms, zero, err);
}
@@ -1553,250 +1440,57 @@ void perf_pmu__del_formats(struct list_head *formats)
}
}
-static int sub_non_neg(int a, int b)
+bool perf_pmu__has_format(const struct perf_pmu *pmu, const char *name)
{
- if (b > a)
- return 0;
- return a - b;
-}
-
-static char *format_alias(char *buf, int len, const struct perf_pmu *pmu,
- const struct perf_pmu_alias *alias)
-{
- struct parse_events_term *term;
- int used = snprintf(buf, len, "%s/%s", pmu->name, alias->name);
-
- list_for_each_entry(term, &alias->terms, list) {
- if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR)
- used += snprintf(buf + used, sub_non_neg(len, used),
- ",%s=%s", term->config,
- term->val.str);
- }
+ struct perf_pmu_format *format;
- if (sub_non_neg(len, used) > 0) {
- buf[used] = '/';
- used++;
+ list_for_each_entry(format, &pmu->format, list) {
+ if (!strcmp(format->name, name))
+ return true;
}
- if (sub_non_neg(len, used) > 0) {
- buf[used] = '\0';
- used++;
- } else
- buf[len - 1] = '\0';
-
- return buf;
+ return false;
}
-/** Struct for ordering events as output in perf list. */
-struct sevent {
- /** PMU for event. */
- const struct perf_pmu *pmu;
- /**
- * Optional event for name, desc, etc. If not present then this is a
- * selectable PMU and the event name is shown as "//".
- */
- const struct perf_pmu_alias *event;
- /** Is the PMU for the CPU? */
- bool is_cpu;
-};
-
-static int cmp_sevent(const void *a, const void *b)
+bool is_pmu_core(const char *name)
{
- const struct sevent *as = a;
- const struct sevent *bs = b;
- const char *a_pmu_name = NULL, *b_pmu_name = NULL;
- const char *a_name = "//", *a_desc = NULL, *a_topic = "";
- const char *b_name = "//", *b_desc = NULL, *b_topic = "";
- int ret;
-
- if (as->event) {
- a_name = as->event->name;
- a_desc = as->event->desc;
- a_topic = as->event->topic ?: "";
- a_pmu_name = as->event->pmu_name;
- }
- if (bs->event) {
- b_name = bs->event->name;
- b_desc = bs->event->desc;
- b_topic = bs->event->topic ?: "";
- b_pmu_name = bs->event->pmu_name;
- }
- /* Put extra events last. */
- if (!!a_desc != !!b_desc)
- return !!a_desc - !!b_desc;
-
- /* Order by topics. */
- ret = strcmp(a_topic, b_topic);
- if (ret)
- return ret;
-
- /* Order CPU core events to be first */
- if (as->is_cpu != bs->is_cpu)
- return as->is_cpu ? -1 : 1;
-
- /* Order by PMU name. */
- if (as->pmu != bs->pmu) {
- a_pmu_name = a_pmu_name ?: (as->pmu->name ?: "");
- b_pmu_name = b_pmu_name ?: (bs->pmu->name ?: "");
- ret = strcmp(a_pmu_name, b_pmu_name);
- if (ret)
- return ret;
- }
-
- /* Order by event name. */
- return strcmp(a_name, b_name);
+ return !strcmp(name, "cpu") || !strcmp(name, "cpum_cf") || is_sysfs_pmu_core(name);
}
-bool is_pmu_core(const char *name)
+bool perf_pmu__supports_legacy_cache(const struct perf_pmu *pmu)
{
- return !strcmp(name, "cpu") || is_arm_pmu_core(name);
+ return pmu->is_core;
}
-static bool pmu_alias_is_duplicate(struct sevent *alias_a,
- struct sevent *alias_b)
+bool perf_pmu__auto_merge_stats(const struct perf_pmu *pmu)
{
- const char *a_pmu_name = NULL, *b_pmu_name = NULL;
- const char *a_name = "//", *b_name = "//";
-
-
- if (alias_a->event) {
- a_name = alias_a->event->name;
- a_pmu_name = alias_a->event->pmu_name;
- }
- if (alias_b->event) {
- b_name = alias_b->event->name;
- b_pmu_name = alias_b->event->pmu_name;
- }
-
- /* Different names -> never duplicates */
- if (strcmp(a_name, b_name))
- return false;
-
- /* Don't remove duplicates for different PMUs */
- a_pmu_name = a_pmu_name ?: (alias_a->pmu->name ?: "");
- b_pmu_name = b_pmu_name ?: (alias_b->pmu->name ?: "");
- return strcmp(a_pmu_name, b_pmu_name) == 0;
+ return !pmu->is_core || perf_pmus__num_core_pmus() == 1;
}
-void print_pmu_events(const struct print_callbacks *print_cb, void *print_state)
+bool perf_pmu__have_event(const struct perf_pmu *pmu, const char *name)
{
- struct perf_pmu *pmu;
- struct perf_pmu_alias *event;
- char buf[1024];
- int printed = 0;
- int len, j;
- struct sevent *aliases;
-
- pmu = NULL;
- len = 0;
- while ((pmu = perf_pmu__scan(pmu)) != NULL) {
- list_for_each_entry(event, &pmu->aliases, list)
- len++;
- if (pmu->selectable)
- len++;
- }
- aliases = zalloc(sizeof(struct sevent) * len);
- if (!aliases) {
- pr_err("FATAL: not enough memory to print PMU events\n");
- return;
- }
- pmu = NULL;
- j = 0;
- while ((pmu = perf_pmu__scan(pmu)) != NULL) {
- bool is_cpu = is_pmu_core(pmu->name) || perf_pmu__is_hybrid(pmu->name);
-
- list_for_each_entry(event, &pmu->aliases, list) {
- aliases[j].event = event;
- aliases[j].pmu = pmu;
- aliases[j].is_cpu = is_cpu;
- j++;
- }
- if (pmu->selectable) {
- aliases[j].event = NULL;
- aliases[j].pmu = pmu;
- aliases[j].is_cpu = is_cpu;
- j++;
- }
- }
- len = j;
- qsort(aliases, len, sizeof(struct sevent), cmp_sevent);
- for (j = 0; j < len; j++) {
- const char *name, *alias = NULL, *scale_unit = NULL,
- *desc = NULL, *long_desc = NULL,
- *encoding_desc = NULL, *topic = NULL,
- *pmu_name = NULL;
- bool deprecated = false;
- size_t buf_used;
-
- /* Skip duplicates */
- if (j > 0 && pmu_alias_is_duplicate(&aliases[j], &aliases[j - 1]))
- continue;
+ struct perf_pmu_alias *alias;
- if (!aliases[j].event) {
- /* A selectable event. */
- pmu_name = aliases[j].pmu->name;
- buf_used = snprintf(buf, sizeof(buf), "%s//", pmu_name) + 1;
- name = buf;
- } else {
- if (aliases[j].event->desc) {
- name = aliases[j].event->name;
- buf_used = 0;
- } else {
- name = format_alias(buf, sizeof(buf), aliases[j].pmu,
- aliases[j].event);
- if (aliases[j].is_cpu) {
- alias = name;
- name = aliases[j].event->name;
- }
- buf_used = strlen(buf) + 1;
- }
- pmu_name = aliases[j].event->pmu_name ?: (aliases[j].pmu->name ?: "");
- if (strlen(aliases[j].event->unit) || aliases[j].event->scale != 1.0) {
- scale_unit = buf + buf_used;
- buf_used += snprintf(buf + buf_used, sizeof(buf) - buf_used,
- "%G%s", aliases[j].event->scale,
- aliases[j].event->unit) + 1;
- }
- desc = aliases[j].event->desc;
- long_desc = aliases[j].event->long_desc;
- topic = aliases[j].event->topic;
- encoding_desc = buf + buf_used;
- buf_used += snprintf(buf + buf_used, sizeof(buf) - buf_used,
- "%s/%s/", pmu_name, aliases[j].event->str) + 1;
- deprecated = aliases[j].event->deprecated;
- }
- print_cb->print_event(print_state,
- pmu_name,
- topic,
- name,
- alias,
- scale_unit,
- deprecated,
- "Kernel PMU event",
- desc,
- long_desc,
- encoding_desc);
+ list_for_each_entry(alias, &pmu->aliases, list) {
+ if (!strcmp(alias->name, name))
+ return true;
}
- if (printed && pager_in_use())
- printf("\n");
-
- zfree(&aliases);
- return;
+ return false;
}
-bool pmu_have_event(const char *pname, const char *name)
+bool perf_pmu__is_software(const struct perf_pmu *pmu)
{
- struct perf_pmu *pmu;
- struct perf_pmu_alias *alias;
-
- pmu = NULL;
- while ((pmu = perf_pmu__scan(pmu)) != NULL) {
- if (strcmp(pname, pmu->name))
- continue;
- list_for_each_entry(alias, &pmu->aliases, list)
- if (!strcmp(alias->name, name))
- return true;
+ if (pmu->is_core || pmu->is_uncore || pmu->auxtrace)
+ return false;
+ switch (pmu->type) {
+ case PERF_TYPE_HARDWARE: return false;
+ case PERF_TYPE_SOFTWARE: return true;
+ case PERF_TYPE_TRACEPOINT: return true;
+ case PERF_TYPE_HW_CACHE: return false;
+ case PERF_TYPE_RAW: return false;
+ case PERF_TYPE_BREAKPOINT: return true;
+ default: break;
}
- return false;
+ return !strcmp(pmu->name, "kprobe") || !strcmp(pmu->name, "uprobe");
}
FILE *perf_pmu__open_file(struct perf_pmu *pmu, const char *name)
@@ -1967,47 +1661,53 @@ int perf_pmu__caps_parse(struct perf_pmu *pmu)
return pmu->nr_caps;
}
-void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config,
- const char *name)
+static void perf_pmu__compute_config_masks(struct perf_pmu *pmu)
{
struct perf_pmu_format *format;
- __u64 masks = 0, bits;
- char buf[100];
- unsigned int i;
+
+ if (pmu->config_masks_computed)
+ return;
list_for_each_entry(format, &pmu->format, list) {
- if (format->value != PERF_PMU_FORMAT_VALUE_CONFIG)
+ unsigned int i;
+ __u64 *mask;
+
+ if (format->value >= PERF_PMU_FORMAT_VALUE_CONFIG_END)
continue;
+ pmu->config_masks_present = true;
+ mask = &pmu->config_masks[format->value];
+
for_each_set_bit(i, format->bits, PERF_PMU_FORMAT_BITS)
- masks |= 1ULL << i;
+ *mask |= 1ULL << i;
}
+ pmu->config_masks_computed = true;
+}
+
+void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config,
+ const char *name, int config_num,
+ const char *config_name)
+{
+ __u64 bits;
+ char buf[100];
+
+ perf_pmu__compute_config_masks(pmu);
/*
* Kernel doesn't export any valid format bits.
*/
- if (masks == 0)
+ if (!pmu->config_masks_present)
return;
- bits = config & ~masks;
+ bits = config & ~pmu->config_masks[config_num];
if (bits == 0)
return;
bitmap_scnprintf((unsigned long *)&bits, sizeof(bits) * 8, buf, sizeof(buf));
- pr_warning("WARNING: event '%s' not valid (bits %s of config "
+ pr_warning("WARNING: event '%s' not valid (bits %s of %s "
"'%llx' not supported by kernel)!\n",
- name ?: "N/A", buf, config);
-}
-
-bool perf_pmu__has_hybrid(void)
-{
- if (!hybrid_scanned) {
- hybrid_scanned = true;
- perf_pmu__scan(NULL);
- }
-
- return !list_empty(&perf_pmu__hybrid_pmus);
+ name ?: "N/A", buf, config_name, config);
}
int perf_pmu__match(char *pattern, char *name, char *tok)
@@ -2024,39 +1724,6 @@ int perf_pmu__match(char *pattern, char *name, char *tok)
return 0;
}
-int perf_pmu__cpus_match(struct perf_pmu *pmu, struct perf_cpu_map *cpus,
- struct perf_cpu_map **mcpus_ptr,
- struct perf_cpu_map **ucpus_ptr)
-{
- struct perf_cpu_map *pmu_cpus = pmu->cpus;
- struct perf_cpu_map *matched_cpus, *unmatched_cpus;
- struct perf_cpu cpu;
- int i, matched_nr = 0, unmatched_nr = 0;
-
- matched_cpus = perf_cpu_map__default_new();
- if (!matched_cpus)
- return -1;
-
- unmatched_cpus = perf_cpu_map__default_new();
- if (!unmatched_cpus) {
- perf_cpu_map__put(matched_cpus);
- return -1;
- }
-
- perf_cpu_map__for_each_cpu(cpu, i, cpus) {
- if (!perf_cpu_map__has(pmu_cpus, cpu))
- RC_CHK_ACCESS(unmatched_cpus)->map[unmatched_nr++] = cpu;
- else
- RC_CHK_ACCESS(matched_cpus)->map[matched_nr++] = cpu;
- }
-
- perf_cpu_map__set_nr(unmatched_cpus, unmatched_nr);
- perf_cpu_map__set_nr(matched_cpus, matched_nr);
- *mcpus_ptr = matched_cpus;
- *ucpus_ptr = unmatched_cpus;
- return 0;
-}
-
double __weak perf_pmu__cpu_slots_per_cycle(void)
{
return NAN;
@@ -2110,7 +1777,7 @@ int perf_pmu__pathname_fd(int dirfd, const char *pmu_name, const char *filename,
return openat(dirfd, path, flags);
}
-static void perf_pmu__delete(struct perf_pmu *pmu)
+void perf_pmu__delete(struct perf_pmu *pmu)
{
perf_pmu__del_formats(&pmu->format);
perf_pmu__del_aliases(pmu);
@@ -2123,15 +1790,3 @@ static void perf_pmu__delete(struct perf_pmu *pmu)
zfree(&pmu->alias_name);
free(pmu);
}
-
-void perf_pmu__destroy(void)
-{
- struct perf_pmu *pmu, *tmp;
-
- list_for_each_entry_safe(pmu, tmp, &pmus, list) {
- list_del(&pmu->list);
- list_del(&pmu->hybrid_list);
-
- perf_pmu__delete(pmu);
- }
-}
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index b9a02dedd473..6b414cecbad2 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -60,6 +60,13 @@ struct perf_pmu {
*/
bool selectable;
/**
+ * @is_core: Is the PMU the core CPU PMU? Determined by the name being
+ * "cpu" or by the presence of
+ * <sysfs>/bus/event_source/devices/<name>/cpus. There may be >1 core
+ * PMU on systems like Intel hybrid.
+ */
+ bool is_core;
+ /**
* @is_uncore: Is the PMU not within the CPU core? Determined by the
* presence of <sysfs>/bus/event_source/devices/<name>/cpumask.
*/
@@ -70,6 +77,15 @@ struct perf_pmu {
*/
bool auxtrace;
/**
+ * @formats_checked: Only check PMU's formats are valid for
+ * perf_event_attr once.
+ */
+ bool formats_checked;
+ /** @config_masks_present: Are there config format values? */
+ bool config_masks_present;
+ /** @config_masks_computed: Set when masks are lazily computed. */
+ bool config_masks_computed;
+ /**
* @max_precise: Number of levels of :ppp precision supported by the
* PMU, read from
* <sysfs>/bus/event_source/devices/<name>/caps/max_precise.
@@ -112,8 +128,12 @@ struct perf_pmu {
struct list_head caps;
/** @list: Element on pmus list in pmu.c. */
struct list_head list;
- /** @hybrid_list: Element on perf_pmu__hybrid_pmus. */
- struct list_head hybrid_list;
+
+ /**
+ * @config_masks: Derived from the PMU's format data, bits that are
+ * valid within the config value.
+ */
+ __u64 config_masks[PERF_PMU_FORMAT_VALUE_CONFIG_END];
/**
* @missing_features: Features to inhibit when events on this PMU are
@@ -193,8 +213,6 @@ struct perf_pmu_alias {
char *pmu_name;
};
-struct perf_pmu *perf_pmu__find(const char *name);
-struct perf_pmu *perf_pmu__find_by_type(unsigned int type);
void pmu_add_sys_aliases(struct list_head *head, struct perf_pmu *pmu);
int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr,
struct list_head *head_terms,
@@ -216,12 +234,17 @@ int perf_pmu__new_format(struct list_head *list, char *name,
void perf_pmu__set_format(unsigned long *bits, long from, long to);
int perf_pmu__format_parse(int dirfd, struct list_head *head);
void perf_pmu__del_formats(struct list_head *formats);
-
-struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu);
+bool perf_pmu__has_format(const struct perf_pmu *pmu, const char *name);
bool is_pmu_core(const char *name);
-void print_pmu_events(const struct print_callbacks *print_cb, void *print_state);
-bool pmu_have_event(const char *pname, const char *name);
+bool perf_pmu__supports_legacy_cache(const struct perf_pmu *pmu);
+bool perf_pmu__auto_merge_stats(const struct perf_pmu *pmu);
+bool perf_pmu__have_event(const struct perf_pmu *pmu, const char *name);
+/**
+ * perf_pmu_is_software - is the PMU a software PMU as in it uses the
+ * perf_sw_context in the kernel?
+ */
+bool perf_pmu__is_software(const struct perf_pmu *pmu);
FILE *perf_pmu__open_file(struct perf_pmu *pmu, const char *name);
FILE *perf_pmu__open_file_at(struct perf_pmu *pmu, int dirfd, const char *name);
@@ -248,16 +271,12 @@ int perf_pmu__convert_scale(const char *scale, char **end, double *sval);
int perf_pmu__caps_parse(struct perf_pmu *pmu);
void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config,
- const char *name);
+ const char *name, int config_num,
+ const char *config_name);
void perf_pmu__warn_invalid_formats(struct perf_pmu *pmu);
-bool perf_pmu__has_hybrid(void);
int perf_pmu__match(char *pattern, char *name, char *tok);
-int perf_pmu__cpus_match(struct perf_pmu *pmu, struct perf_cpu_map *cpus,
- struct perf_cpu_map **mcpus_ptr,
- struct perf_cpu_map **ucpus_ptr);
-
char *pmu_find_real_name(const char *name);
char *pmu_find_alias_name(const char *name);
double perf_pmu__cpu_slots_per_cycle(void);
@@ -267,6 +286,8 @@ int perf_pmu__pathname_scnprintf(char *buf, size_t size,
int perf_pmu__event_source_devices_fd(void);
int perf_pmu__pathname_fd(int dirfd, const char *pmu_name, const char *filename, int flags);
-void perf_pmu__destroy(void);
+struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char *lookup_name);
+struct perf_pmu *perf_pmu__create_placeholder_core_pmu(struct list_head *core_pmus);
+void perf_pmu__delete(struct perf_pmu *pmu);
#endif /* __PMU_H */
diff --git a/tools/perf/util/pmus.c b/tools/perf/util/pmus.c
index 7f3b93c4d229..c58ba9fb6a36 100644
--- a/tools/perf/util/pmus.c
+++ b/tools/perf/util/pmus.c
@@ -1,5 +1,574 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/list.h>
-#include <pmus.h>
+#include <linux/zalloc.h>
+#include <subcmd/pager.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <pthread.h>
+#include <string.h>
+#include <unistd.h>
+#include "debug.h"
+#include "evsel.h"
+#include "pmus.h"
+#include "pmu.h"
+#include "print-events.h"
-LIST_HEAD(pmus);
+/*
+ * core_pmus: A PMU belongs to core_pmus if it's name is "cpu" or it's sysfs
+ * directory contains "cpus" file. All PMUs belonging to core_pmus
+ * must have pmu->is_core=1. If there are more than one PMU in
+ * this list, perf interprets it as a heterogeneous platform.
+ * (FWIW, certain ARM platforms having heterogeneous cores uses
+ * homogeneous PMU, and thus they are treated as homogeneous
+ * platform by perf because core_pmus will have only one entry)
+ * other_pmus: All other PMUs which are not part of core_pmus list. It doesn't
+ * matter whether PMU is present per SMT-thread or outside of the
+ * core in the hw. For e.g., an instance of AMD ibs_fetch// and
+ * ibs_op// PMUs is present in each hw SMT thread, however they
+ * are captured under other_pmus. PMUs belonging to other_pmus
+ * must have pmu->is_core=0 but pmu->is_uncore could be 0 or 1.
+ */
+static LIST_HEAD(core_pmus);
+static LIST_HEAD(other_pmus);
+static bool read_sysfs_core_pmus;
+static bool read_sysfs_all_pmus;
+
+void perf_pmus__destroy(void)
+{
+ struct perf_pmu *pmu, *tmp;
+
+ list_for_each_entry_safe(pmu, tmp, &core_pmus, list) {
+ list_del(&pmu->list);
+
+ perf_pmu__delete(pmu);
+ }
+ list_for_each_entry_safe(pmu, tmp, &other_pmus, list) {
+ list_del(&pmu->list);
+
+ perf_pmu__delete(pmu);
+ }
+ read_sysfs_core_pmus = false;
+ read_sysfs_all_pmus = false;
+}
+
+static struct perf_pmu *pmu_find(const char *name)
+{
+ struct perf_pmu *pmu;
+
+ list_for_each_entry(pmu, &core_pmus, list) {
+ if (!strcmp(pmu->name, name) ||
+ (pmu->alias_name && !strcmp(pmu->alias_name, name)))
+ return pmu;
+ }
+ list_for_each_entry(pmu, &other_pmus, list) {
+ if (!strcmp(pmu->name, name) ||
+ (pmu->alias_name && !strcmp(pmu->alias_name, name)))
+ return pmu;
+ }
+
+ return NULL;
+}
+
+struct perf_pmu *perf_pmus__find(const char *name)
+{
+ struct perf_pmu *pmu;
+ int dirfd;
+ bool core_pmu;
+
+ /*
+ * Once PMU is loaded it stays in the list,
+ * so we keep us from multiple reading/parsing
+ * the pmu format definitions.
+ */
+ pmu = pmu_find(name);
+ if (pmu)
+ return pmu;
+
+ if (read_sysfs_all_pmus)
+ return NULL;
+
+ core_pmu = is_pmu_core(name);
+ if (core_pmu && read_sysfs_core_pmus)
+ return NULL;
+
+ dirfd = perf_pmu__event_source_devices_fd();
+ pmu = perf_pmu__lookup(core_pmu ? &core_pmus : &other_pmus, dirfd, name);
+ close(dirfd);
+
+ return pmu;
+}
+
+static struct perf_pmu *perf_pmu__find2(int dirfd, const char *name)
+{
+ struct perf_pmu *pmu;
+ bool core_pmu;
+
+ /*
+ * Once PMU is loaded it stays in the list,
+ * so we keep us from multiple reading/parsing
+ * the pmu format definitions.
+ */
+ pmu = pmu_find(name);
+ if (pmu)
+ return pmu;
+
+ if (read_sysfs_all_pmus)
+ return NULL;
+
+ core_pmu = is_pmu_core(name);
+ if (core_pmu && read_sysfs_core_pmus)
+ return NULL;
+
+ return perf_pmu__lookup(core_pmu ? &core_pmus : &other_pmus, dirfd, name);
+}
+
+/* Add all pmus in sysfs to pmu list: */
+static void pmu_read_sysfs(bool core_only)
+{
+ int fd;
+ DIR *dir;
+ struct dirent *dent;
+
+ if (read_sysfs_all_pmus || (core_only && read_sysfs_core_pmus))
+ return;
+
+ fd = perf_pmu__event_source_devices_fd();
+ if (fd < 0)
+ return;
+
+ dir = fdopendir(fd);
+ if (!dir) {
+ close(fd);
+ return;
+ }
+
+ while ((dent = readdir(dir))) {
+ if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, ".."))
+ continue;
+ if (core_only && !is_pmu_core(dent->d_name))
+ continue;
+ /* add to static LIST_HEAD(core_pmus) or LIST_HEAD(other_pmus): */
+ perf_pmu__find2(fd, dent->d_name);
+ }
+
+ closedir(dir);
+ if (list_empty(&core_pmus)) {
+ if (!perf_pmu__create_placeholder_core_pmu(&core_pmus))
+ pr_err("Failure to set up any core PMUs\n");
+ }
+ if (!list_empty(&core_pmus)) {
+ read_sysfs_core_pmus = true;
+ if (!core_only)
+ read_sysfs_all_pmus = true;
+ }
+}
+
+static struct perf_pmu *__perf_pmus__find_by_type(unsigned int type)
+{
+ struct perf_pmu *pmu;
+
+ list_for_each_entry(pmu, &core_pmus, list) {
+ if (pmu->type == type)
+ return pmu;
+ }
+
+ list_for_each_entry(pmu, &other_pmus, list) {
+ if (pmu->type == type)
+ return pmu;
+ }
+ return NULL;
+}
+
+struct perf_pmu *perf_pmus__find_by_type(unsigned int type)
+{
+ struct perf_pmu *pmu = __perf_pmus__find_by_type(type);
+
+ if (pmu || read_sysfs_all_pmus)
+ return pmu;
+
+ pmu_read_sysfs(/*core_only=*/false);
+ pmu = __perf_pmus__find_by_type(type);
+ return pmu;
+}
+
+/*
+ * pmu iterator: If pmu is NULL, we start at the begin, otherwise return the
+ * next pmu. Returns NULL on end.
+ */
+struct perf_pmu *perf_pmus__scan(struct perf_pmu *pmu)
+{
+ bool use_core_pmus = !pmu || pmu->is_core;
+
+ if (!pmu) {
+ pmu_read_sysfs(/*core_only=*/false);
+ pmu = list_prepare_entry(pmu, &core_pmus, list);
+ }
+ if (use_core_pmus) {
+ list_for_each_entry_continue(pmu, &core_pmus, list)
+ return pmu;
+
+ pmu = NULL;
+ pmu = list_prepare_entry(pmu, &other_pmus, list);
+ }
+ list_for_each_entry_continue(pmu, &other_pmus, list)
+ return pmu;
+ return NULL;
+}
+
+struct perf_pmu *perf_pmus__scan_core(struct perf_pmu *pmu)
+{
+ if (!pmu) {
+ pmu_read_sysfs(/*core_only=*/true);
+ pmu = list_prepare_entry(pmu, &core_pmus, list);
+ }
+ list_for_each_entry_continue(pmu, &core_pmus, list)
+ return pmu;
+
+ return NULL;
+}
+
+const struct perf_pmu *perf_pmus__pmu_for_pmu_filter(const char *str)
+{
+ struct perf_pmu *pmu = NULL;
+
+ while ((pmu = perf_pmus__scan(pmu)) != NULL) {
+ if (!strcmp(pmu->name, str))
+ return pmu;
+ /* Ignore "uncore_" prefix. */
+ if (!strncmp(pmu->name, "uncore_", 7)) {
+ if (!strcmp(pmu->name + 7, str))
+ return pmu;
+ }
+ /* Ignore "cpu_" prefix on Intel hybrid PMUs. */
+ if (!strncmp(pmu->name, "cpu_", 4)) {
+ if (!strcmp(pmu->name + 4, str))
+ return pmu;
+ }
+ }
+ return NULL;
+}
+
+int __weak perf_pmus__num_mem_pmus(void)
+{
+ /* All core PMUs are for mem events. */
+ return perf_pmus__num_core_pmus();
+}
+
+/** Struct for ordering events as output in perf list. */
+struct sevent {
+ /** PMU for event. */
+ const struct perf_pmu *pmu;
+ /**
+ * Optional event for name, desc, etc. If not present then this is a
+ * selectable PMU and the event name is shown as "//".
+ */
+ const struct perf_pmu_alias *event;
+ /** Is the PMU for the CPU? */
+ bool is_cpu;
+};
+
+static int cmp_sevent(const void *a, const void *b)
+{
+ const struct sevent *as = a;
+ const struct sevent *bs = b;
+ const char *a_pmu_name = NULL, *b_pmu_name = NULL;
+ const char *a_name = "//", *a_desc = NULL, *a_topic = "";
+ const char *b_name = "//", *b_desc = NULL, *b_topic = "";
+ int ret;
+
+ if (as->event) {
+ a_name = as->event->name;
+ a_desc = as->event->desc;
+ a_topic = as->event->topic ?: "";
+ a_pmu_name = as->event->pmu_name;
+ }
+ if (bs->event) {
+ b_name = bs->event->name;
+ b_desc = bs->event->desc;
+ b_topic = bs->event->topic ?: "";
+ b_pmu_name = bs->event->pmu_name;
+ }
+ /* Put extra events last. */
+ if (!!a_desc != !!b_desc)
+ return !!a_desc - !!b_desc;
+
+ /* Order by topics. */
+ ret = strcmp(a_topic, b_topic);
+ if (ret)
+ return ret;
+
+ /* Order CPU core events to be first */
+ if (as->is_cpu != bs->is_cpu)
+ return as->is_cpu ? -1 : 1;
+
+ /* Order by PMU name. */
+ if (as->pmu != bs->pmu) {
+ a_pmu_name = a_pmu_name ?: (as->pmu->name ?: "");
+ b_pmu_name = b_pmu_name ?: (bs->pmu->name ?: "");
+ ret = strcmp(a_pmu_name, b_pmu_name);
+ if (ret)
+ return ret;
+ }
+
+ /* Order by event name. */
+ return strcmp(a_name, b_name);
+}
+
+static bool pmu_alias_is_duplicate(struct sevent *alias_a,
+ struct sevent *alias_b)
+{
+ const char *a_pmu_name = NULL, *b_pmu_name = NULL;
+ const char *a_name = "//", *b_name = "//";
+
+
+ if (alias_a->event) {
+ a_name = alias_a->event->name;
+ a_pmu_name = alias_a->event->pmu_name;
+ }
+ if (alias_b->event) {
+ b_name = alias_b->event->name;
+ b_pmu_name = alias_b->event->pmu_name;
+ }
+
+ /* Different names -> never duplicates */
+ if (strcmp(a_name, b_name))
+ return false;
+
+ /* Don't remove duplicates for different PMUs */
+ a_pmu_name = a_pmu_name ?: (alias_a->pmu->name ?: "");
+ b_pmu_name = b_pmu_name ?: (alias_b->pmu->name ?: "");
+ return strcmp(a_pmu_name, b_pmu_name) == 0;
+}
+
+static int sub_non_neg(int a, int b)
+{
+ if (b > a)
+ return 0;
+ return a - b;
+}
+
+static char *format_alias(char *buf, int len, const struct perf_pmu *pmu,
+ const struct perf_pmu_alias *alias)
+{
+ struct parse_events_term *term;
+ int used = snprintf(buf, len, "%s/%s", pmu->name, alias->name);
+
+ list_for_each_entry(term, &alias->terms, list) {
+ if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR)
+ used += snprintf(buf + used, sub_non_neg(len, used),
+ ",%s=%s", term->config,
+ term->val.str);
+ }
+
+ if (sub_non_neg(len, used) > 0) {
+ buf[used] = '/';
+ used++;
+ }
+ if (sub_non_neg(len, used) > 0) {
+ buf[used] = '\0';
+ used++;
+ } else
+ buf[len - 1] = '\0';
+
+ return buf;
+}
+
+void perf_pmus__print_pmu_events(const struct print_callbacks *print_cb, void *print_state)
+{
+ struct perf_pmu *pmu;
+ struct perf_pmu_alias *event;
+ char buf[1024];
+ int printed = 0;
+ int len, j;
+ struct sevent *aliases;
+
+ pmu = NULL;
+ len = 0;
+ while ((pmu = perf_pmus__scan(pmu)) != NULL) {
+ list_for_each_entry(event, &pmu->aliases, list)
+ len++;
+ if (pmu->selectable)
+ len++;
+ }
+ aliases = zalloc(sizeof(struct sevent) * len);
+ if (!aliases) {
+ pr_err("FATAL: not enough memory to print PMU events\n");
+ return;
+ }
+ pmu = NULL;
+ j = 0;
+ while ((pmu = perf_pmus__scan(pmu)) != NULL) {
+ bool is_cpu = pmu->is_core;
+
+ list_for_each_entry(event, &pmu->aliases, list) {
+ aliases[j].event = event;
+ aliases[j].pmu = pmu;
+ aliases[j].is_cpu = is_cpu;
+ j++;
+ }
+ if (pmu->selectable) {
+ aliases[j].event = NULL;
+ aliases[j].pmu = pmu;
+ aliases[j].is_cpu = is_cpu;
+ j++;
+ }
+ }
+ len = j;
+ qsort(aliases, len, sizeof(struct sevent), cmp_sevent);
+ for (j = 0; j < len; j++) {
+ const char *name, *alias = NULL, *scale_unit = NULL,
+ *desc = NULL, *long_desc = NULL,
+ *encoding_desc = NULL, *topic = NULL,
+ *pmu_name = NULL;
+ bool deprecated = false;
+ size_t buf_used;
+
+ /* Skip duplicates */
+ if (j > 0 && pmu_alias_is_duplicate(&aliases[j], &aliases[j - 1]))
+ continue;
+
+ if (!aliases[j].event) {
+ /* A selectable event. */
+ pmu_name = aliases[j].pmu->name;
+ buf_used = snprintf(buf, sizeof(buf), "%s//", pmu_name) + 1;
+ name = buf;
+ } else {
+ if (aliases[j].event->desc) {
+ name = aliases[j].event->name;
+ buf_used = 0;
+ } else {
+ name = format_alias(buf, sizeof(buf), aliases[j].pmu,
+ aliases[j].event);
+ if (aliases[j].is_cpu) {
+ alias = name;
+ name = aliases[j].event->name;
+ }
+ buf_used = strlen(buf) + 1;
+ }
+ pmu_name = aliases[j].event->pmu_name ?: (aliases[j].pmu->name ?: "");
+ if (strlen(aliases[j].event->unit) || aliases[j].event->scale != 1.0) {
+ scale_unit = buf + buf_used;
+ buf_used += snprintf(buf + buf_used, sizeof(buf) - buf_used,
+ "%G%s", aliases[j].event->scale,
+ aliases[j].event->unit) + 1;
+ }
+ desc = aliases[j].event->desc;
+ long_desc = aliases[j].event->long_desc;
+ topic = aliases[j].event->topic;
+ encoding_desc = buf + buf_used;
+ buf_used += snprintf(buf + buf_used, sizeof(buf) - buf_used,
+ "%s/%s/", pmu_name, aliases[j].event->str) + 1;
+ deprecated = aliases[j].event->deprecated;
+ }
+ print_cb->print_event(print_state,
+ pmu_name,
+ topic,
+ name,
+ alias,
+ scale_unit,
+ deprecated,
+ "Kernel PMU event",
+ desc,
+ long_desc,
+ encoding_desc);
+ }
+ if (printed && pager_in_use())
+ printf("\n");
+
+ zfree(&aliases);
+}
+
+bool perf_pmus__have_event(const char *pname, const char *name)
+{
+ struct perf_pmu *pmu = perf_pmus__find(pname);
+
+ return pmu && perf_pmu__have_event(pmu, name);
+}
+
+int perf_pmus__num_core_pmus(void)
+{
+ static int count;
+
+ if (!count) {
+ struct perf_pmu *pmu = NULL;
+
+ while ((pmu = perf_pmus__scan_core(pmu)) != NULL)
+ count++;
+ }
+ return count;
+}
+
+static bool __perf_pmus__supports_extended_type(void)
+{
+ struct perf_pmu *pmu = NULL;
+
+ if (perf_pmus__num_core_pmus() <= 1)
+ return false;
+
+ while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
+ if (!is_event_supported(PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES | ((__u64)pmu->type << PERF_PMU_TYPE_SHIFT)))
+ return false;
+ }
+
+ return true;
+}
+
+static bool perf_pmus__do_support_extended_type;
+
+static void perf_pmus__init_supports_extended_type(void)
+{
+ perf_pmus__do_support_extended_type = __perf_pmus__supports_extended_type();
+}
+
+bool perf_pmus__supports_extended_type(void)
+{
+ static pthread_once_t extended_type_once = PTHREAD_ONCE_INIT;
+
+ pthread_once(&extended_type_once, perf_pmus__init_supports_extended_type);
+
+ return perf_pmus__do_support_extended_type;
+}
+
+char *perf_pmus__default_pmu_name(void)
+{
+ int fd;
+ DIR *dir;
+ struct dirent *dent;
+ char *result = NULL;
+
+ if (!list_empty(&core_pmus))
+ return strdup(list_first_entry(&core_pmus, struct perf_pmu, list)->name);
+
+ fd = perf_pmu__event_source_devices_fd();
+ if (fd < 0)
+ return strdup("cpu");
+
+ dir = fdopendir(fd);
+ if (!dir) {
+ close(fd);
+ return strdup("cpu");
+ }
+
+ while ((dent = readdir(dir))) {
+ if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, ".."))
+ continue;
+ if (is_pmu_core(dent->d_name)) {
+ result = strdup(dent->d_name);
+ break;
+ }
+ }
+
+ closedir(dir);
+ return result ?: strdup("cpu");
+}
+
+struct perf_pmu *evsel__find_pmu(const struct evsel *evsel)
+{
+ struct perf_pmu *pmu = evsel->pmu;
+
+ if (!pmu) {
+ pmu = perf_pmus__find_by_type(evsel->core.attr.type);
+ ((struct evsel *)evsel)->pmu = pmu;
+ }
+ return pmu;
+}
diff --git a/tools/perf/util/pmus.h b/tools/perf/util/pmus.h
index 5ec12007eb5c..a21464432d0f 100644
--- a/tools/perf/util/pmus.h
+++ b/tools/perf/util/pmus.h
@@ -2,8 +2,24 @@
#ifndef __PMUS_H
#define __PMUS_H
-extern struct list_head pmus;
+struct perf_pmu;
+struct print_callbacks;
-#define perf_pmus__for_each_pmu(pmu) list_for_each_entry(pmu, &pmus, list)
+void perf_pmus__destroy(void);
+
+struct perf_pmu *perf_pmus__find(const char *name);
+struct perf_pmu *perf_pmus__find_by_type(unsigned int type);
+
+struct perf_pmu *perf_pmus__scan(struct perf_pmu *pmu);
+struct perf_pmu *perf_pmus__scan_core(struct perf_pmu *pmu);
+
+const struct perf_pmu *perf_pmus__pmu_for_pmu_filter(const char *str);
+
+int perf_pmus__num_mem_pmus(void);
+void perf_pmus__print_pmu_events(const struct print_callbacks *print_cb, void *print_state);
+bool perf_pmus__have_event(const char *pname, const char *name);
+int perf_pmus__num_core_pmus(void);
+bool perf_pmus__supports_extended_type(void);
+char *perf_pmus__default_pmu_name(void);
#endif /* __PMUS_H */
diff --git a/tools/perf/util/print-events.c b/tools/perf/util/print-events.c
index ee145cec42c0..a7566edc86a3 100644
--- a/tools/perf/util/print-events.c
+++ b/tools/perf/util/print-events.c
@@ -20,13 +20,14 @@
#include "metricgroup.h"
#include "parse-events.h"
#include "pmu.h"
+#include "pmus.h"
#include "print-events.h"
#include "probe-file.h"
#include "string2.h"
#include "strlist.h"
#include "tracepoint.h"
#include "pfm.h"
-#include "pmu-hybrid.h"
+#include "thread_map.h"
#define MAX_NAME_LEN 100
@@ -228,60 +229,93 @@ void print_sdt_events(const struct print_callbacks *print_cb, void *print_state)
strlist__delete(sdtlist);
}
+bool is_event_supported(u8 type, u64 config)
+{
+ bool ret = true;
+ int open_return;
+ struct evsel *evsel;
+ struct perf_event_attr attr = {
+ .type = type,
+ .config = config,
+ .disabled = 1,
+ };
+ struct perf_thread_map *tmap = thread_map__new_by_tid(0);
+
+ if (tmap == NULL)
+ return false;
+
+ evsel = evsel__new(&attr);
+ if (evsel) {
+ open_return = evsel__open(evsel, NULL, tmap);
+ ret = open_return >= 0;
+
+ if (open_return == -EACCES) {
+ /*
+ * This happens if the paranoid value
+ * /proc/sys/kernel/perf_event_paranoid is set to 2
+ * Re-run with exclude_kernel set; we don't do that
+ * by default as some ARM machines do not support it.
+ *
+ */
+ evsel->core.attr.exclude_kernel = 1;
+ ret = evsel__open(evsel, NULL, tmap) >= 0;
+ }
+ evsel__delete(evsel);
+ }
+
+ perf_thread_map__put(tmap);
+ return ret;
+}
+
int print_hwcache_events(const struct print_callbacks *print_cb, void *print_state)
{
- struct strlist *evt_name_list = strlist__new(NULL, NULL);
- struct str_node *nd;
+ struct perf_pmu *pmu = NULL;
+ const char *event_type_descriptor = event_type_descriptors[PERF_TYPE_HW_CACHE];
- if (!evt_name_list) {
- pr_debug("Failed to allocate new strlist for hwcache events\n");
- return -ENOMEM;
- }
- for (int type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) {
- for (int op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) {
- /* skip invalid cache type */
- if (!evsel__is_cache_op_valid(type, op))
- continue;
-
- for (int i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) {
- struct perf_pmu *pmu = NULL;
- char name[64];
-
- __evsel__hw_cache_type_op_res_name(type, op, i, name, sizeof(name));
- if (!perf_pmu__has_hybrid()) {
- if (is_event_supported(PERF_TYPE_HW_CACHE,
- type | (op << 8) | (i << 16)))
- strlist__add(evt_name_list, name);
+ /*
+ * Only print core PMUs, skipping uncore for performance and
+ * PERF_TYPE_SOFTWARE that can succeed in opening legacy cache evenst.
+ */
+ while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
+ if (pmu->is_uncore || pmu->type == PERF_TYPE_SOFTWARE)
+ continue;
+
+ for (int type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) {
+ for (int op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) {
+ /* skip invalid cache type */
+ if (!evsel__is_cache_op_valid(type, op))
continue;
- }
- perf_pmu__for_each_hybrid_pmu(pmu) {
- if (is_event_supported(PERF_TYPE_HW_CACHE,
- type | (op << 8) | (i << 16) |
- ((__u64)pmu->type << PERF_PMU_TYPE_SHIFT))) {
- char new_name[128];
- snprintf(new_name, sizeof(new_name),
- "%s/%s/", pmu->name, name);
- strlist__add(evt_name_list, new_name);
- }
+
+ for (int res = 0; res < PERF_COUNT_HW_CACHE_RESULT_MAX; res++) {
+ char name[64];
+ char alias_name[128];
+ __u64 config;
+ int ret;
+
+ __evsel__hw_cache_type_op_res_name(type, op, res,
+ name, sizeof(name));
+
+ ret = parse_events__decode_legacy_cache(name, pmu->type,
+ &config);
+ if (ret || !is_event_supported(PERF_TYPE_HW_CACHE, config))
+ continue;
+ snprintf(alias_name, sizeof(alias_name), "%s/%s/",
+ pmu->name, name);
+ print_cb->print_event(print_state,
+ "cache",
+ pmu->name,
+ name,
+ alias_name,
+ /*scale_unit=*/NULL,
+ /*deprecated=*/false,
+ event_type_descriptor,
+ /*desc=*/NULL,
+ /*long_desc=*/NULL,
+ /*encoding_desc=*/NULL);
}
}
}
}
-
- strlist__for_each_entry(nd, evt_name_list) {
- print_cb->print_event(print_state,
- "cache",
- /*pmu_name=*/NULL,
- nd->s,
- /*event_alias=*/NULL,
- /*scale_unit=*/NULL,
- /*deprecated=*/false,
- event_type_descriptors[PERF_TYPE_HW_CACHE],
- /*desc=*/NULL,
- /*long_desc=*/NULL,
- /*encoding_desc=*/NULL);
- }
- strlist__delete(evt_name_list);
return 0;
}
@@ -370,7 +404,7 @@ void print_events(const struct print_callbacks *print_cb, void *print_state)
print_hwcache_events(print_cb, print_state);
- print_pmu_events(print_cb, print_state);
+ perf_pmus__print_pmu_events(print_cb, print_state);
print_cb->print_event(print_state,
/*topic=*/NULL,
diff --git a/tools/perf/util/print-events.h b/tools/perf/util/print-events.h
index e75a3d7e3fe3..d7fab411e75c 100644
--- a/tools/perf/util/print-events.h
+++ b/tools/perf/util/print-events.h
@@ -3,6 +3,7 @@
#define __PERF_PRINT_EVENTS_H
#include <linux/perf_event.h>
+#include <linux/types.h>
#include <stdbool.h>
struct event_symbol;
@@ -36,5 +37,6 @@ void print_symbol_events(const struct print_callbacks *print_cb, void *print_sta
unsigned int max);
void print_tool_events(const struct print_callbacks *print_cb, void *print_state);
void print_tracepoint_events(const struct print_callbacks *print_cb, void *print_state);
+bool is_event_supported(u8 type, u64 config);
#endif /* __PERF_PRINT_EVENTS_H */
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 6e2110d605fb..16822a8a540f 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -74,7 +74,6 @@ int init_probe_symbol_maps(bool user_only)
{
int ret;
- symbol_conf.sort_by_name = true;
symbol_conf.allow_aliases = true;
ret = symbol__init(NULL);
if (ret < 0) {
@@ -382,6 +381,7 @@ static int find_alternative_probe_point(struct debuginfo *dinfo,
struct symbol *sym;
u64 address = 0;
int ret = -ENOENT;
+ size_t idx;
/* This can work only for function-name based one */
if (!pp->function || pp->file)
@@ -392,7 +392,7 @@ static int find_alternative_probe_point(struct debuginfo *dinfo,
return -EINVAL;
/* Find the address of given function */
- map__for_each_symbol_by_name(map, pp->function, sym) {
+ map__for_each_symbol_by_name(map, pp->function, sym, idx) {
if (uprobes) {
address = sym->start;
if (sym->type == STT_GNU_IFUNC)
@@ -3738,7 +3738,6 @@ out:
int show_available_funcs(const char *target, struct nsinfo *nsi,
struct strfilter *_filter, bool user)
{
- struct rb_node *nd;
struct map *map;
struct dso *dso;
int ret;
@@ -3767,17 +3766,16 @@ int show_available_funcs(const char *target, struct nsinfo *nsi,
goto end;
}
dso = map__dso(map);
- if (!dso__sorted_by_name(dso))
- dso__sort_by_name(dso);
+ dso__sort_by_name(dso);
/* Show all (filtered) symbols */
setup_pager();
- for (nd = rb_first_cached(&dso->symbol_names); nd; nd = rb_next(nd)) {
- struct symbol_name_rb_node *pos = rb_entry(nd, struct symbol_name_rb_node, rb_node);
+ for (size_t i = 0; i < dso->symbol_names_len; i++) {
+ struct symbol *pos = dso->symbol_names[i];
- if (strfilter__compare(_filter, pos->sym.name))
- printf("%s\n", pos->sym.name);
+ if (strfilter__compare(_filter, pos->name))
+ printf("%s\n", pos->name);
}
end:
map__put(map);
diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources
index aa5156c2bcff..d4c9b4cd35ef 100644
--- a/tools/perf/util/python-ext-sources
+++ b/tools/perf/util/python-ext-sources
@@ -39,5 +39,4 @@ util/affinity.c
util/rwsem.c
util/hashmap.c
util/perf_regs.c
-util/pmu-hybrid.c
util/fncache.c
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index 0faea4c75eed..4eed8ec23994 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -22,6 +22,7 @@
#include "util/bpf-filter.h"
#include "util/env.h"
#include "util/pmu.h"
+#include "util/pmus.h"
#include <internal/lib.h>
#include "util.h"
@@ -50,6 +51,14 @@
#endif
/*
+ * Avoid bringing in event parsing.
+ */
+int parse_event(struct evlist *evlist __maybe_unused, const char *str __maybe_unused)
+{
+ return 0;
+}
+
+/*
* Provide these two so that we don't have to link against callchain.c and
* start dragging hist.c, etc.
*/
@@ -94,6 +103,11 @@ int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt,
return EOF;
}
+int perf_pmus__num_core_pmus(void)
+{
+ return 1;
+}
+
bool evsel__is_aux_event(const struct evsel *evsel __maybe_unused)
{
return false;
@@ -110,6 +124,14 @@ int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp,
}
/*
+ * Add this one here not to drag util/trace-event-info.c
+ */
+char *tracepoint_id_to_name(u64 config)
+{
+ return NULL;
+}
+
+/*
* XXX: All these evsel destructors need some better mechanism, like a linked
* list of destructors registered when the relevant code indeed is used instead
* of having more and more calls in perf_evsel__delete(). -- acme
@@ -1480,3 +1502,7 @@ void test_attr__open(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cp
int fd, int group_fd, unsigned long flags)
{
}
+
+void evlist__free_stats(struct evlist *evlist)
+{
+}
diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c
index 039d0365ad41..603091317bed 100644
--- a/tools/perf/util/scripting-engines/trace-event-perl.c
+++ b/tools/perf/util/scripting-engines/trace-event-perl.c
@@ -67,8 +67,6 @@ INTERP my_perl;
#define TRACE_EVENT_TYPE_MAX \
((1 << (sizeof(unsigned short) * 8)) - 1)
-static DECLARE_BITMAP(events_defined, TRACE_EVENT_TYPE_MAX);
-
extern struct scripting_context *scripting_context;
static char *cur_field_name;
@@ -262,6 +260,7 @@ static SV *perl_process_callchain(struct perf_sample *sample,
struct evsel *evsel,
struct addr_location *al)
{
+ struct callchain_cursor *cursor;
AV *list;
list = newAV();
@@ -271,18 +270,20 @@ static SV *perl_process_callchain(struct perf_sample *sample,
if (!symbol_conf.use_callchain || !sample->callchain)
goto exit;
- if (thread__resolve_callchain(al->thread, &callchain_cursor, evsel,
+ cursor = get_tls_callchain_cursor();
+
+ if (thread__resolve_callchain(al->thread, cursor, evsel,
sample, NULL, NULL, scripting_max_stack) != 0) {
pr_err("Failed to resolve callchain. Skipping\n");
goto exit;
}
- callchain_cursor_commit(&callchain_cursor);
+ callchain_cursor_commit(cursor);
while (1) {
HV *elem;
struct callchain_cursor_node *node;
- node = callchain_cursor_current(&callchain_cursor);
+ node = callchain_cursor_current(cursor);
if (!node)
break;
@@ -330,7 +331,7 @@ static SV *perl_process_callchain(struct perf_sample *sample,
}
}
- callchain_cursor_advance(&callchain_cursor);
+ callchain_cursor_advance(cursor);
av_push(list, newRV_noinc((SV*)elem));
}
@@ -353,7 +354,9 @@ static void perl_process_tracepoint(struct perf_sample *sample,
void *data = sample->raw_data;
unsigned long long nsecs = sample->time;
const char *comm = thread__comm_str(thread);
+ DECLARE_BITMAP(events_defined, TRACE_EVENT_TYPE_MAX);
+ bitmap_zero(events_defined, TRACE_EVENT_TYPE_MAX);
dSP;
if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT)
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 41d4f9e6a8b7..94312741443a 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -93,8 +93,6 @@ PyMODINIT_FUNC PyInit_perf_trace_context(void);
#define TRACE_EVENT_TYPE_MAX \
((1 << (sizeof(unsigned short) * 8)) - 1)
-static DECLARE_BITMAP(events_defined, TRACE_EVENT_TYPE_MAX);
-
#define N_COMMON_FIELDS 7
static char *cur_field_name;
@@ -419,6 +417,7 @@ static PyObject *python_process_callchain(struct perf_sample *sample,
struct addr_location *al)
{
PyObject *pylist;
+ struct callchain_cursor *cursor;
pylist = PyList_New(0);
if (!pylist)
@@ -427,19 +426,20 @@ static PyObject *python_process_callchain(struct perf_sample *sample,
if (!symbol_conf.use_callchain || !sample->callchain)
goto exit;
- if (thread__resolve_callchain(al->thread, &callchain_cursor, evsel,
+ cursor = get_tls_callchain_cursor();
+ if (thread__resolve_callchain(al->thread, cursor, evsel,
sample, NULL, NULL,
scripting_max_stack) != 0) {
pr_err("Failed to resolve callchain. Skipping\n");
goto exit;
}
- callchain_cursor_commit(&callchain_cursor);
+ callchain_cursor_commit(cursor);
while (1) {
PyObject *pyelem;
struct callchain_cursor_node *node;
- node = callchain_cursor_current(&callchain_cursor);
+ node = callchain_cursor_current(cursor);
if (!node)
break;
@@ -471,9 +471,11 @@ static PyObject *python_process_callchain(struct perf_sample *sample,
struct addr_location node_al;
unsigned long offset;
+ addr_location__init(&node_al);
node_al.addr = map__map_ip(map, node->ip);
- node_al.map = map;
+ node_al.map = map__get(map);
offset = get_offset(node->ms.sym, &node_al);
+ addr_location__exit(&node_al);
pydict_set_item_string_decref(
pyelem, "sym_off",
@@ -493,7 +495,7 @@ static PyObject *python_process_callchain(struct perf_sample *sample,
_PyUnicode_FromString(dsoname));
}
- callchain_cursor_advance(&callchain_cursor);
+ callchain_cursor_advance(cursor);
PyList_Append(pylist, pyelem);
Py_DECREF(pyelem);
}
@@ -541,6 +543,7 @@ static PyObject *python_process_brstack(struct perf_sample *sample,
pydict_set_item_string_decref(pyelem, "cycles",
PyLong_FromUnsignedLongLong(entries[i].flags.cycles));
+ addr_location__init(&al);
thread__find_map_fb(thread, sample->cpumode,
entries[i].from, &al);
dsoname = get_dsoname(al.map);
@@ -553,6 +556,7 @@ static PyObject *python_process_brstack(struct perf_sample *sample,
pydict_set_item_string_decref(pyelem, "to_dsoname",
_PyUnicode_FromString(dsoname));
+ addr_location__exit(&al);
PyList_Append(pylist, pyelem);
Py_DECREF(pyelem);
}
@@ -596,7 +600,6 @@ static PyObject *python_process_brstacksym(struct perf_sample *sample,
PyObject *pylist;
u64 i;
char bf[512];
- struct addr_location al;
pylist = PyList_New(0);
if (!pylist)
@@ -607,7 +610,9 @@ static PyObject *python_process_brstacksym(struct perf_sample *sample,
for (i = 0; i < br->nr; i++) {
PyObject *pyelem;
+ struct addr_location al;
+ addr_location__init(&al);
pyelem = PyDict_New();
if (!pyelem)
Py_FatalError("couldn't create Python dictionary");
@@ -646,6 +651,7 @@ static PyObject *python_process_brstacksym(struct perf_sample *sample,
PyList_Append(pylist, pyelem);
Py_DECREF(pyelem);
+ addr_location__exit(&al);
}
exit:
@@ -733,6 +739,9 @@ static void regs_map(struct regs_dump *regs, uint64_t mask, const char *arch, ch
bf[0] = 0;
+ if (size <= 0)
+ return;
+
if (!regs || !regs->regs)
return;
@@ -760,17 +769,18 @@ static void set_regs_in_dict(PyObject *dict,
* 10 chars is for register name.
*/
int size = __sw_hweight64(attr->sample_regs_intr) * 28;
- char bf[size];
+ char *bf = malloc(size);
- regs_map(&sample->intr_regs, attr->sample_regs_intr, arch, bf, sizeof(bf));
+ regs_map(&sample->intr_regs, attr->sample_regs_intr, arch, bf, size);
pydict_set_item_string_decref(dict, "iregs",
_PyUnicode_FromString(bf));
- regs_map(&sample->user_regs, attr->sample_regs_user, arch, bf, sizeof(bf));
+ regs_map(&sample->user_regs, attr->sample_regs_user, arch, bf, size);
pydict_set_item_string_decref(dict, "uregs",
_PyUnicode_FromString(bf));
+ free(bf);
}
static void set_sym_in_dict(PyObject *dict, struct addr_location *al,
@@ -934,6 +944,9 @@ static void python_process_tracepoint(struct perf_sample *sample,
unsigned long long nsecs = sample->time;
const char *comm = thread__comm_str(al->thread);
const char *default_handler_name = "trace_unhandled";
+ DECLARE_BITMAP(events_defined, TRACE_EVENT_TYPE_MAX);
+
+ bitmap_zero(events_defined, TRACE_EVENT_TYPE_MAX);
if (!event) {
snprintf(handler_name, sizeof(handler_name),
@@ -1162,11 +1175,11 @@ static int python_export_thread(struct db_export *dbe, struct thread *thread,
t = tuple_new(5);
- tuple_set_d64(t, 0, thread->db_id);
+ tuple_set_d64(t, 0, thread__db_id(thread));
tuple_set_d64(t, 1, machine->db_id);
tuple_set_d64(t, 2, main_thread_db_id);
- tuple_set_s32(t, 3, thread->pid_);
- tuple_set_s32(t, 4, thread->tid);
+ tuple_set_s32(t, 3, thread__pid(thread));
+ tuple_set_s32(t, 4, thread__tid(thread));
call_object(tables->thread_handler, t, "thread_table");
@@ -1185,7 +1198,7 @@ static int python_export_comm(struct db_export *dbe, struct comm *comm,
tuple_set_d64(t, 0, comm->db_id);
tuple_set_string(t, 1, comm__str(comm));
- tuple_set_d64(t, 2, thread->db_id);
+ tuple_set_d64(t, 2, thread__db_id(thread));
tuple_set_d64(t, 3, comm->start);
tuple_set_s32(t, 4, comm->exec);
@@ -1206,7 +1219,7 @@ static int python_export_comm_thread(struct db_export *dbe, u64 db_id,
tuple_set_d64(t, 0, db_id);
tuple_set_d64(t, 1, comm->db_id);
- tuple_set_d64(t, 2, thread->db_id);
+ tuple_set_d64(t, 2, thread__db_id(thread));
call_object(tables->comm_thread_handler, t, "comm_thread_table");
@@ -1291,7 +1304,7 @@ static void python_export_sample_table(struct db_export *dbe,
tuple_set_d64(t, 0, es->db_id);
tuple_set_d64(t, 1, es->evsel->db_id);
tuple_set_d64(t, 2, maps__machine(es->al->maps)->db_id);
- tuple_set_d64(t, 3, es->al->thread->db_id);
+ tuple_set_d64(t, 3, thread__db_id(es->al->thread));
tuple_set_d64(t, 4, es->comm_db_id);
tuple_set_d64(t, 5, es->dso_db_id);
tuple_set_d64(t, 6, es->sym_db_id);
@@ -1381,7 +1394,7 @@ static int python_export_call_return(struct db_export *dbe,
t = tuple_new(14);
tuple_set_d64(t, 0, cr->db_id);
- tuple_set_d64(t, 1, cr->thread->db_id);
+ tuple_set_d64(t, 1, thread__db_id(cr->thread));
tuple_set_d64(t, 2, comm_db_id);
tuple_set_d64(t, 3, cr->cp->db_id);
tuple_set_d64(t, 4, cr->call_time);
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index e2806791c76a..00d18c74c090 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -278,11 +278,6 @@ struct perf_session *__perf_session__new(struct perf_data *data,
return ERR_PTR(ret);
}
-static void perf_session__delete_threads(struct perf_session *session)
-{
- machine__delete_threads(&session->machines.host);
-}
-
static void perf_decomp__release_events(struct decomp *next)
{
struct decomp *decomp;
@@ -305,7 +300,6 @@ void perf_session__delete(struct perf_session *session)
auxtrace__free(session);
auxtrace_index__free(&session->auxtrace_index);
perf_session__destroy_kernel_maps(session);
- perf_session__delete_threads(session);
perf_decomp__release_events(session->decomp_data.decomp);
perf_env__exit(&session->header.env);
machines__exit(&session->machines);
@@ -2807,7 +2801,7 @@ static int perf_session__set_guest_cpu(struct perf_session *session, pid_t pid,
if (!thread)
return -ENOMEM;
- thread->guest_cpu = guest_cpu;
+ thread__set_guest_cpu(thread, guest_cpu);
thread__put(thread);
return 0;
diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py
index c294db713677..869738fc06c3 100644
--- a/tools/perf/util/setup.py
+++ b/tools/perf/util/setup.py
@@ -36,6 +36,10 @@ if cc_is_clang:
vars[var] = sub("-fno-semantic-interposition", "", vars[var])
if not clang_has_option("-ffat-lto-objects"):
vars[var] = sub("-ffat-lto-objects", "", vars[var])
+ if not clang_has_option("-ftree-loop-distribute-patterns"):
+ vars[var] = sub("-ftree-loop-distribute-patterns", "", vars[var])
+ if not clang_has_option("-gno-variable-location-views"):
+ vars[var] = sub("-gno-variable-location-views", "", vars[var])
from setuptools import setup, Extension
diff --git a/tools/perf/util/sharded_mutex.c b/tools/perf/util/sharded_mutex.c
new file mode 100644
index 000000000000..e11e8d0945a7
--- /dev/null
+++ b/tools/perf/util/sharded_mutex.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "sharded_mutex.h"
+
+#include <stdlib.h>
+
+struct sharded_mutex *sharded_mutex__new(size_t num_shards)
+{
+ struct sharded_mutex *result;
+ size_t size;
+ unsigned int bits;
+
+ for (bits = 0; ((size_t)1 << bits) < num_shards; bits++)
+ ;
+
+ size = sizeof(*result) + sizeof(struct mutex) * (1 << bits);
+ result = malloc(size);
+ if (!result)
+ return NULL;
+
+ result->cap_bits = bits;
+ for (size_t i = 0; i < ((size_t)1 << bits); i++)
+ mutex_init(&result->mutexes[i]);
+
+ return result;
+}
+
+void sharded_mutex__delete(struct sharded_mutex *sm)
+{
+ for (size_t i = 0; i < ((size_t)1 << sm->cap_bits); i++)
+ mutex_destroy(&sm->mutexes[i]);
+
+ free(sm);
+}
diff --git a/tools/perf/util/sharded_mutex.h b/tools/perf/util/sharded_mutex.h
new file mode 100644
index 000000000000..7325e969eee3
--- /dev/null
+++ b/tools/perf/util/sharded_mutex.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PERF_SHARDED_MUTEX_H
+#define PERF_SHARDED_MUTEX_H
+
+#include "mutex.h"
+#include "hashmap.h"
+
+/*
+ * In a situation where a lock is needed per object, having a mutex can be
+ * relatively memory expensive (40 bytes on x86-64). If the object can be
+ * constantly hashed, a sharded mutex is an alternative global pool of mutexes
+ * where the mutex is looked up from a hash value. This can lead to collisions
+ * if the number of shards isn't large enough.
+ */
+struct sharded_mutex {
+ /* mutexes array is 1<<cap_bits in size. */
+ unsigned int cap_bits;
+ struct mutex mutexes[];
+};
+
+struct sharded_mutex *sharded_mutex__new(size_t num_shards);
+void sharded_mutex__delete(struct sharded_mutex *sm);
+
+static inline struct mutex *sharded_mutex__get_mutex(struct sharded_mutex *sm, size_t hash)
+{
+ return &sm->mutexes[hash_bits(hash, sm->cap_bits)];
+}
+
+#endif /* PERF_SHARDED_MUTEX_H */
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 650cd8df4041..6aa1c7f2b444 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -108,7 +108,7 @@ static int64_t cmp_null(const void *l, const void *r)
static int64_t
sort__thread_cmp(struct hist_entry *left, struct hist_entry *right)
{
- return right->thread->tid - left->thread->tid;
+ return thread__tid(right->thread) - thread__tid(left->thread);
}
static int hist_entry__thread_snprintf(struct hist_entry *he, char *bf,
@@ -117,7 +117,7 @@ static int hist_entry__thread_snprintf(struct hist_entry *he, char *bf,
const char *comm = thread__comm_str(he->thread);
width = max(7U, width) - 8;
- return repsep_snprintf(bf, size, "%7d:%-*.*s", he->thread->tid,
+ return repsep_snprintf(bf, size, "%7d:%-*.*s", thread__tid(he->thread),
width, width, comm ?: "");
}
@@ -128,7 +128,7 @@ static int hist_entry__thread_filter(struct hist_entry *he, int type, const void
if (type != HIST_FILTER__THREAD)
return -1;
- return th && he->thread != th;
+ return th && RC_CHK_ACCESS(he->thread) != RC_CHK_ACCESS(th);
}
struct sort_entry sort_thread = {
@@ -643,7 +643,7 @@ static char *hist_entry__get_srcfile(struct hist_entry *e)
sf = __get_srcline(map__dso(map), map__rip_2objdump(map, e->ip),
e->ms.sym, false, true, true, e->ip);
- if (!strcmp(sf, SRCLINE_UNKNOWN))
+ if (sf == SRCLINE_UNKNOWN)
return no_srcfile;
p = strchr(sf, ':');
if (p && *sf) {
@@ -1543,8 +1543,10 @@ sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right)
!l_dso->id.ino && !l_dso->id.ino_generation) {
/* userspace anonymous */
- if (left->thread->pid_ > right->thread->pid_) return -1;
- if (left->thread->pid_ < right->thread->pid_) return 1;
+ if (thread__pid(left->thread) > thread__pid(right->thread))
+ return -1;
+ if (thread__pid(left->thread) < thread__pid(right->thread))
+ return 1;
}
addr:
diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c
index cfca03abd6f8..034b496df297 100644
--- a/tools/perf/util/srcline.c
+++ b/tools/perf/util/srcline.c
@@ -21,8 +21,12 @@
#include "symbol.h"
#include "subcmd/run-command.h"
+/* If addr2line doesn't return data for 1 second then timeout. */
+int addr2line_timeout_ms = 1 * 1000;
bool srcline_full_filename;
+char *srcline__unknown = (char *)"??:0";
+
static const char *dso__name(struct dso *dso)
{
const char *dso_name;
@@ -385,7 +389,7 @@ static int filename_split(char *filename, unsigned int *line_nr)
*line_nr = strtoul(sep, NULL, 0);
return 1;
}
-
+ pr_debug("addr2line missing ':' in filename split\n");
return 0;
}
@@ -406,7 +410,7 @@ static struct child_process *addr2line_subprocess_init(const char *addr2line_pat
const char *argv[] = {
addr2line_path ?: "addr2line",
"-e", binary_path,
- "-i", "-f", NULL
+ "-a", "-i", "-f", NULL
};
struct child_process *a2l = zalloc(sizeof(*a2l));
int start_command_status = 0;
@@ -441,7 +445,7 @@ enum a2l_style {
LLVM,
};
-static enum a2l_style addr2line_configure(struct child_process *a2l)
+static enum a2l_style addr2line_configure(struct child_process *a2l, const char *dso_name)
{
static bool cached;
static enum a2l_style style;
@@ -450,6 +454,7 @@ static enum a2l_style addr2line_configure(struct child_process *a2l)
char buf[128];
struct io io;
int ch;
+ int lines;
if (write(a2l->in, ",\n", 2) != 2)
return BROKEN;
@@ -459,19 +464,32 @@ static enum a2l_style addr2line_configure(struct child_process *a2l)
if (ch == ',') {
style = LLVM;
cached = true;
- } else if (ch == '?') {
+ lines = 1;
+ pr_debug("Detected LLVM addr2line style\n");
+ } else if (ch == '0') {
style = GNU_BINUTILS;
cached = true;
+ lines = 3;
+ pr_debug("Detected binutils addr2line style\n");
} else {
- style = BROKEN;
+ if (!symbol_conf.disable_add2line_warn) {
+ char *output = NULL;
+ size_t output_len;
+
+ io__getline(&io, &output, &output_len);
+ pr_warning("%s %s: addr2line configuration failed\n",
+ __func__, dso_name);
+ pr_warning("\t%c%s", ch, output);
+ }
+ pr_debug("Unknown/broken addr2line style\n");
+ return BROKEN;
}
- do {
+ while (lines) {
ch = io__get_char(&io);
- } while (ch > 0 && ch != '\n');
- if (style == GNU_BINUTILS) {
- do {
- ch = io__get_char(&io);
- } while (ch > 0 && ch != '\n');
+ if (ch <= 0)
+ break;
+ if (ch == '\n')
+ lines--;
}
/* Ignore SIGPIPE in the event addr2line exits. */
signal(SIGPIPE, SIG_IGN);
@@ -481,6 +499,9 @@ static enum a2l_style addr2line_configure(struct child_process *a2l)
static int read_addr2line_record(struct io *io,
enum a2l_style style,
+ const char *dso_name,
+ u64 addr,
+ bool first,
char **function,
char **filename,
unsigned int *line_nr)
@@ -505,23 +526,74 @@ static int read_addr2line_record(struct io *io,
if (line_nr != NULL)
*line_nr = 0;
+ /*
+ * Read the first line. Without an error this will be:
+ * - for the first line an address like 0x1234,
+ * - the binutils sentinel 0x0000000000000000,
+ * - the llvm-addr2line the sentinel ',' character,
+ * - the function name line for an inlined function.
+ */
if (io__getline(io, &line, &line_len) < 0 || !line_len)
goto error;
+ pr_debug("%s %s: addr2line read address for sentinel: %s", __func__, dso_name, line);
if (style == LLVM && line_len == 2 && line[0] == ',') {
+ /* Found the llvm-addr2line sentinel character. */
zfree(&line);
return 0;
+ } else if (style == GNU_BINUTILS && (!first || addr != 0)) {
+ int zero_count = 0, non_zero_count = 0;
+ /*
+ * Check for binutils sentinel ignoring it for the case the
+ * requested address is 0.
+ */
+
+ /* A given address should always start 0x. */
+ if (line_len >= 2 || line[0] != '0' || line[1] != 'x') {
+ for (size_t i = 2; i < line_len; i++) {
+ if (line[i] == '0')
+ zero_count++;
+ else if (line[i] != '\n')
+ non_zero_count++;
+ }
+ if (!non_zero_count) {
+ int ch;
+
+ if (first && !zero_count) {
+ /* Line was erroneous just '0x'. */
+ goto error;
+ }
+ /*
+ * Line was 0x0..0, the sentinel for binutils. Remove
+ * the function and filename lines.
+ */
+ zfree(&line);
+ do {
+ ch = io__get_char(io);
+ } while (ch > 0 && ch != '\n');
+ do {
+ ch = io__get_char(io);
+ } while (ch > 0 && ch != '\n');
+ return 0;
+ }
+ }
}
+ /* Read the second function name line (if inline data then this is the first line). */
+ if (first && (io__getline(io, &line, &line_len) < 0 || !line_len))
+ goto error;
+ pr_debug("%s %s: addr2line read line: %s", __func__, dso_name, line);
if (function != NULL)
*function = strdup(strim(line));
zfree(&line);
line_len = 0;
+ /* Read the third filename and line number line. */
if (io__getline(io, &line, &line_len) < 0 || !line_len)
goto error;
+ pr_debug("%s %s: addr2line filename:number : %s", __func__, dso_name, line);
if (filename_split(line, line_nr == NULL ? &dummy_line_nr : line_nr) == 0 &&
style == GNU_BINUTILS) {
ret = 0;
@@ -574,15 +646,14 @@ static int addr2line(const char *dso_name, u64 addr,
int len;
char buf[128];
ssize_t written;
- struct io io;
+ struct io io = { .eof = false };
enum a2l_style a2l_style;
if (!a2l) {
if (!filename__has_section(dso_name, ".debug_line"))
goto out;
- dso->a2l = addr2line_subprocess_init(symbol_conf.addr2line_path,
- dso_name);
+ dso->a2l = addr2line_subprocess_init(symbol_conf.addr2line_path, dso_name);
a2l = dso->a2l;
}
@@ -591,22 +662,18 @@ static int addr2line(const char *dso_name, u64 addr,
pr_warning("%s %s: addr2line_subprocess_init failed\n", __func__, dso_name);
goto out;
}
- a2l_style = addr2line_configure(a2l);
- if (a2l_style == BROKEN) {
- if (!symbol_conf.disable_add2line_warn)
- pr_warning("%s: addr2line configuration failed\n", __func__);
+ a2l_style = addr2line_configure(a2l, dso_name);
+ if (a2l_style == BROKEN)
goto out;
- }
/*
- * Send our request and then *deliberately* send something that can't be interpreted as
- * a valid address to ask addr2line about (namely, ","). This causes addr2line to first
- * write out the answer to our request, in an unbounded/unknown number of records, and
- * then to write out the lines "??" and "??:0", for GNU binutils, or "," for
- * llvm-addr2line, so that we can detect when it has finished giving us anything
- * useful. With GNU binutils, we have to be careful about the first record, though,
- * because it may be genuinely unknown, in which case we'll get two sets of "??"/"??:0"
- * lines.
+ * Send our request and then *deliberately* send something that can't be
+ * interpreted as a valid address to ask addr2line about (namely,
+ * ","). This causes addr2line to first write out the answer to our
+ * request, in an unbounded/unknown number of records, and then to write
+ * out the lines "0x0...0", "??" and "??:0", for GNU binutils, or ","
+ * for llvm-addr2line, so that we can detect when it has finished giving
+ * us anything useful.
*/
len = snprintf(buf, sizeof(buf), "%016"PRIx64"\n,\n", addr);
written = len > 0 ? write(a2l->in, buf, len) : -1;
@@ -616,8 +683,8 @@ static int addr2line(const char *dso_name, u64 addr,
goto out;
}
io__init(&io, a2l->out, buf, sizeof(buf));
-
- switch (read_addr2line_record(&io, a2l_style,
+ io.timeout_ms = addr2line_timeout_ms;
+ switch (read_addr2line_record(&io, a2l_style, dso_name, addr, /*first=*/true,
&record_function, &record_filename, &record_line_nr)) {
case -1:
if (!symbol_conf.disable_add2line_warn)
@@ -625,17 +692,23 @@ static int addr2line(const char *dso_name, u64 addr,
goto out;
case 0:
/*
- * The first record was invalid, so return failure, but first read another
- * record, since we asked a junk question and have to clear the answer out.
+ * The first record was invalid, so return failure, but first
+ * read another record, since we sent a sentinel ',' for the
+ * sake of detected the last inlined function. Treat this as the
+ * first of a record as the ',' generates a new start with GNU
+ * binutils, also force a non-zero address as we're no longer
+ * reading that record.
*/
- switch (read_addr2line_record(&io, a2l_style, NULL, NULL, NULL)) {
+ switch (read_addr2line_record(&io, a2l_style, dso_name,
+ /*addr=*/1, /*first=*/true,
+ NULL, NULL, NULL)) {
case -1:
if (!symbol_conf.disable_add2line_warn)
- pr_warning("%s %s: could not read delimiter record\n",
+ pr_warning("%s %s: could not read sentinel record\n",
__func__, dso_name);
break;
case 0:
- /* As expected. */
+ /* The sentinel as expected. */
break;
default:
if (!symbol_conf.disable_add2line_warn)
@@ -645,6 +718,7 @@ static int addr2line(const char *dso_name, u64 addr,
}
goto out;
default:
+ /* First record as expected. */
break;
}
@@ -665,9 +739,16 @@ static int addr2line(const char *dso_name, u64 addr,
}
}
- /* We have to read the records even if we don't care about the inline info. */
+ /*
+ * We have to read the records even if we don't care about the inline
+ * info. This isn't the first record and force the address to non-zero
+ * as we're reading records beyond the first.
+ */
while ((record_status = read_addr2line_record(&io,
a2l_style,
+ dso_name,
+ /*addr=*/1,
+ /*first=*/false,
&record_function,
&record_filename,
&record_line_nr)) == 1) {
@@ -686,6 +767,10 @@ static int addr2line(const char *dso_name, u64 addr,
out:
free(record_function);
free(record_filename);
+ if (io.eof) {
+ dso->a2l = NULL;
+ addr2line_subprocess_cleanup(a2l);
+ }
return ret;
}
@@ -804,10 +889,15 @@ out:
return NULL;
}
-void free_srcline(char *srcline)
+void zfree_srcline(char **srcline)
{
- if (srcline && strcmp(srcline, SRCLINE_UNKNOWN) != 0)
- free(srcline);
+ if (*srcline == NULL)
+ return;
+
+ if (*srcline != SRCLINE_UNKNOWN)
+ free(*srcline);
+
+ *srcline = NULL;
}
char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym,
@@ -880,7 +970,7 @@ void srcline__tree_delete(struct rb_root_cached *tree)
pos = rb_entry(next, struct srcline_node, rb_node);
next = rb_next(&pos->rb_node);
rb_erase_cached(&pos->rb_node, tree);
- free_srcline(pos->srcline);
+ zfree_srcline(&pos->srcline);
zfree(&pos);
}
}
@@ -903,7 +993,7 @@ void inline_node__delete(struct inline_node *node)
list_for_each_entry_safe(ilist, tmp, &node->val, list) {
list_del_init(&ilist->list);
- free_srcline(ilist->srcline);
+ zfree_srcline(&ilist->srcline);
/* only the inlined symbols are owned by the list */
if (ilist->symbol && ilist->symbol->inlined)
symbol__delete(ilist->symbol);
diff --git a/tools/perf/util/srcline.h b/tools/perf/util/srcline.h
index b11a0aaaa676..75010d39ea28 100644
--- a/tools/perf/util/srcline.h
+++ b/tools/perf/util/srcline.h
@@ -9,13 +9,14 @@
struct dso;
struct symbol;
+extern int addr2line_timeout_ms;
extern bool srcline_full_filename;
char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym,
bool show_sym, bool show_addr, u64 ip);
char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym,
bool show_sym, bool show_addr, bool unwind_inlines,
u64 ip);
-void free_srcline(char *srcline);
+void zfree_srcline(char **srcline);
char *get_srcline_split(struct dso *dso, u64 addr, unsigned *line);
/* insert the srcline into the DSO, which will take ownership */
@@ -25,7 +26,8 @@ char *srcline__tree_find(struct rb_root_cached *tree, u64 addr);
/* delete all srclines within the tree */
void srcline__tree_delete(struct rb_root_cached *tree);
-#define SRCLINE_UNKNOWN ((char *) "??:0")
+extern char *srcline__unknown;
+#define SRCLINE_UNKNOWN srcline__unknown
struct inline_list {
struct symbol *symbol;
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index bf5a6c14dfcd..d45d5dcb0e2b 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -19,12 +19,13 @@
#include <api/fs/fs.h>
#include "util.h"
#include "iostat.h"
-#include "pmu-hybrid.h"
-#include "evlist-hybrid.h"
+#include "pmu.h"
+#include "pmus.h"
#define CNTR_NOT_SUPPORTED "<not supported>"
#define CNTR_NOT_COUNTED "<not counted>"
+#define MGROUP_LEN 50
#define METRIC_LEN 38
#define EVNAME_LEN 32
#define COUNTS_LEN 18
@@ -36,6 +37,7 @@
static int aggr_header_lens[] = {
[AGGR_CORE] = 18,
+ [AGGR_CACHE] = 22,
[AGGR_DIE] = 12,
[AGGR_SOCKET] = 6,
[AGGR_NODE] = 6,
@@ -46,6 +48,7 @@ static int aggr_header_lens[] = {
static const char *aggr_header_csv[] = {
[AGGR_CORE] = "core,cpus,",
+ [AGGR_CACHE] = "cache,cpus,",
[AGGR_DIE] = "die,cpus,",
[AGGR_SOCKET] = "socket,cpus,",
[AGGR_NONE] = "cpu,",
@@ -56,6 +59,7 @@ static const char *aggr_header_csv[] = {
static const char *aggr_header_std[] = {
[AGGR_CORE] = "core",
+ [AGGR_CACHE] = "cache",
[AGGR_DIE] = "die",
[AGGR_SOCKET] = "socket",
[AGGR_NONE] = "cpu",
@@ -193,6 +197,10 @@ static void print_aggr_id_std(struct perf_stat_config *config,
case AGGR_CORE:
snprintf(buf, sizeof(buf), "S%d-D%d-C%d", id.socket, id.die, id.core);
break;
+ case AGGR_CACHE:
+ snprintf(buf, sizeof(buf), "S%d-D%d-L%d-ID%d",
+ id.socket, id.die, id.cache_lvl, id.cache);
+ break;
case AGGR_DIE:
snprintf(buf, sizeof(buf), "S%d-D%d", id.socket, id.die);
break;
@@ -239,6 +247,10 @@ static void print_aggr_id_csv(struct perf_stat_config *config,
fprintf(output, "S%d-D%d-C%d%s%d%s",
id.socket, id.die, id.core, sep, aggr_nr, sep);
break;
+ case AGGR_CACHE:
+ fprintf(config->output, "S%d-D%d-L%d-ID%d%s%d%s",
+ id.socket, id.die, id.cache_lvl, id.cache, sep, aggr_nr, sep);
+ break;
case AGGR_DIE:
fprintf(output, "S%d-D%d%s%d%s",
id.socket, id.die, sep, aggr_nr, sep);
@@ -284,6 +296,10 @@ static void print_aggr_id_json(struct perf_stat_config *config,
fprintf(output, "\"core\" : \"S%d-D%d-C%d\", \"aggregate-number\" : %d, ",
id.socket, id.die, id.core, aggr_nr);
break;
+ case AGGR_CACHE:
+ fprintf(output, "\"cache\" : \"S%d-D%d-L%d-ID%d\", \"aggregate-number\" : %d, ",
+ id.socket, id.die, id.cache_lvl, id.cache, aggr_nr);
+ break;
case AGGR_DIE:
fprintf(output, "\"die\" : \"S%d-D%d\", \"aggregate-number\" : %d, ",
id.socket, id.die, aggr_nr);
@@ -349,16 +365,27 @@ static void new_line_std(struct perf_stat_config *config __maybe_unused,
os->newline = true;
}
-static void do_new_line_std(struct perf_stat_config *config,
- struct outstate *os)
+static inline void __new_line_std_csv(struct perf_stat_config *config,
+ struct outstate *os)
{
fputc('\n', os->fh);
if (os->prefix)
fputs(os->prefix, os->fh);
aggr_printout(config, os->evsel, os->id, os->aggr_nr);
+}
+
+static inline void __new_line_std(struct outstate *os)
+{
+ fprintf(os->fh, " ");
+}
+
+static void do_new_line_std(struct perf_stat_config *config,
+ struct outstate *os)
+{
+ __new_line_std_csv(config, os);
if (config->aggr_mode == AGGR_NONE)
fprintf(os->fh, " ");
- fprintf(os->fh, " ");
+ __new_line_std(os);
}
static void print_metric_std(struct perf_stat_config *config,
@@ -393,10 +420,7 @@ static void new_line_csv(struct perf_stat_config *config, void *ctx)
struct outstate *os = ctx;
int i;
- fputc('\n', os->fh);
- if (os->prefix)
- fprintf(os->fh, "%s", os->prefix);
- aggr_printout(config, os->evsel, os->id, os->aggr_nr);
+ __new_line_std_csv(config, os);
for (i = 0; i < os->nfields; i++)
fputs(config->csv_sep, os->fh);
}
@@ -447,6 +471,54 @@ static void new_line_json(struct perf_stat_config *config, void *ctx)
aggr_printout(config, os->evsel, os->id, os->aggr_nr);
}
+static void print_metricgroup_header_json(struct perf_stat_config *config,
+ void *ctx,
+ const char *metricgroup_name)
+{
+ if (!metricgroup_name)
+ return;
+
+ fprintf(config->output, "\"metricgroup\" : \"%s\"}", metricgroup_name);
+ new_line_json(config, ctx);
+}
+
+static void print_metricgroup_header_csv(struct perf_stat_config *config,
+ void *ctx,
+ const char *metricgroup_name)
+{
+ struct outstate *os = ctx;
+ int i;
+
+ if (!metricgroup_name) {
+ /* Leave space for running and enabling */
+ for (i = 0; i < os->nfields - 2; i++)
+ fputs(config->csv_sep, os->fh);
+ return;
+ }
+
+ for (i = 0; i < os->nfields; i++)
+ fputs(config->csv_sep, os->fh);
+ fprintf(config->output, "%s", metricgroup_name);
+ new_line_csv(config, ctx);
+}
+
+static void print_metricgroup_header_std(struct perf_stat_config *config,
+ void *ctx,
+ const char *metricgroup_name)
+{
+ struct outstate *os = ctx;
+ int n;
+
+ if (!metricgroup_name) {
+ __new_line_std(os);
+ return;
+ }
+
+ n = fprintf(config->output, " %*s", EVNAME_LEN, metricgroup_name);
+
+ fprintf(config->output, "%*s", MGROUP_LEN - n - 1, "");
+}
+
/* Filter out some columns that don't work well in metrics only mode */
static bool valid_only_metric(const char *unit)
@@ -677,25 +749,44 @@ static bool is_mixed_hw_group(struct evsel *counter)
return false;
}
+static bool evlist__has_hybrid(struct evlist *evlist)
+{
+ struct evsel *evsel;
+
+ if (perf_pmus__num_core_pmus() == 1)
+ return false;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->core.is_pmu_core)
+ return true;
+ }
+
+ return false;
+}
+
static void printout(struct perf_stat_config *config, struct outstate *os,
double uval, u64 run, u64 ena, double noise, int aggr_idx)
{
struct perf_stat_output_ctx out;
print_metric_t pm;
new_line_t nl;
+ print_metricgroup_header_t pmh;
bool ok = true;
struct evsel *counter = os->evsel;
if (config->csv_output) {
pm = config->metric_only ? print_metric_only_csv : print_metric_csv;
nl = config->metric_only ? new_line_metric : new_line_csv;
+ pmh = print_metricgroup_header_csv;
os->nfields = 4 + (counter->cgrp ? 1 : 0);
} else if (config->json_output) {
pm = config->metric_only ? print_metric_only_json : print_metric_json;
nl = config->metric_only ? new_line_metric : new_line_json;
+ pmh = print_metricgroup_header_json;
} else {
pm = config->metric_only ? print_metric_only : print_metric_std;
nl = config->metric_only ? new_line_metric : new_line_std;
+ pmh = print_metricgroup_header_std;
}
if (run == 0 || ena == 0 || counter->counts->scaled == -1) {
@@ -717,10 +808,11 @@ static void printout(struct perf_stat_config *config, struct outstate *os,
out.print_metric = pm;
out.new_line = nl;
+ out.print_metricgroup_header = pmh;
out.ctx = os;
out.force_header = false;
- if (!config->metric_only) {
+ if (!config->metric_only && !counter->default_metricgroup) {
abs_printout(config, os->id, os->aggr_nr, counter, uval, ok);
print_noise(config, counter, noise, /*before_metric=*/true);
@@ -728,8 +820,31 @@ static void printout(struct perf_stat_config *config, struct outstate *os,
}
if (ok) {
- perf_stat__print_shadow_stats(config, counter, uval, aggr_idx,
- &out, &config->metric_events);
+ if (!config->metric_only && counter->default_metricgroup) {
+ void *from = NULL;
+
+ aggr_printout(config, os->evsel, os->id, os->aggr_nr);
+ /* Print out all the metricgroup with the same metric event. */
+ do {
+ int num = 0;
+
+ /* Print out the new line for the next new metricgroup. */
+ if (from) {
+ if (config->json_output)
+ new_line_json(config, (void *)os);
+ else
+ __new_line_std_csv(config, os);
+ }
+
+ print_noise(config, counter, noise, /*before_metric=*/true);
+ print_running(config, run, ena, /*before_metric=*/true);
+ from = perf_stat__print_shadow_stats_metricgroup(config, counter, aggr_idx,
+ &num, from, &out,
+ &config->metric_events);
+ } while (from != NULL);
+ } else
+ perf_stat__print_shadow_stats(config, counter, uval, aggr_idx,
+ &out, &config->metric_events);
} else {
pm(config, os, /*color=*/NULL, /*format=*/NULL, /*unit=*/"", /*val=*/0);
}
@@ -816,6 +931,11 @@ static bool should_skip_zero_counter(struct perf_stat_config *config,
*/
if (config->aggr_mode == AGGR_THREAD && config->system_wide)
return true;
+
+ /* Tool events have the software PMU but are only gathered on 1. */
+ if (evsel__is_tool(counter))
+ return true;
+
/*
* Skip value 0 when it's an uncore event and the given aggr id
* does not belong to the PMU cpumask.
@@ -859,6 +979,9 @@ static void print_counter_aggrdata(struct perf_stat_config *config,
ena = aggr->counts.ena;
run = aggr->counts.run;
+ if (perf_stat__skip_metric_event(counter, &config->metric_events, ena, run))
+ return;
+
if (val == 0 && should_skip_zero_counter(config, counter, &id))
return;
@@ -1125,6 +1248,7 @@ static void print_header_interval_std(struct perf_stat_config *config,
case AGGR_NODE:
case AGGR_SOCKET:
case AGGR_DIE:
+ case AGGR_CACHE:
case AGGR_CORE:
fprintf(output, "#%*s %-*s cpus",
INTERVAL_LEN - 1, "time",
@@ -1425,6 +1549,7 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf
switch (config->aggr_mode) {
case AGGR_CORE:
+ case AGGR_CACHE:
case AGGR_DIE:
case AGGR_SOCKET:
case AGGR_NODE:
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index 1566a206ba42..1c5c3eeba4cf 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -539,6 +539,106 @@ out:
return ratio;
}
+static void perf_stat__print_metricgroup_header(struct perf_stat_config *config,
+ struct evsel *evsel,
+ void *ctxp,
+ const char *name,
+ struct perf_stat_output_ctx *out)
+{
+ bool need_full_name = perf_pmus__num_core_pmus() > 1;
+ static const char *last_name;
+ static const char *last_pmu;
+ char full_name[64];
+
+ /*
+ * A metricgroup may have several metric events,
+ * e.g.,TopdownL1 on e-core of ADL.
+ * The name has been output by the first metric
+ * event. Only align with other metics from
+ * different metric events.
+ */
+ if (last_name && !strcmp(last_name, name)) {
+ if (!need_full_name || !strcmp(last_pmu, evsel->pmu_name)) {
+ out->print_metricgroup_header(config, ctxp, NULL);
+ return;
+ }
+ }
+
+ if (need_full_name)
+ scnprintf(full_name, sizeof(full_name), "%s (%s)", name, evsel->pmu_name);
+ else
+ scnprintf(full_name, sizeof(full_name), "%s", name);
+
+ out->print_metricgroup_header(config, ctxp, full_name);
+
+ last_name = name;
+ last_pmu = evsel->pmu_name;
+}
+
+/**
+ * perf_stat__print_shadow_stats_metricgroup - Print out metrics associated with the evsel
+ * For the non-default, all metrics associated
+ * with the evsel are printed.
+ * For the default mode, only the metrics from
+ * the same metricgroup and the name of the
+ * metricgroup are printed. To print the metrics
+ * from the next metricgroup (if available),
+ * invoke the function with correspoinding
+ * metric_expr.
+ */
+void *perf_stat__print_shadow_stats_metricgroup(struct perf_stat_config *config,
+ struct evsel *evsel,
+ int aggr_idx,
+ int *num,
+ void *from,
+ struct perf_stat_output_ctx *out,
+ struct rblist *metric_events)
+{
+ struct metric_event *me;
+ struct metric_expr *mexp = from;
+ void *ctxp = out->ctx;
+ bool header_printed = false;
+ const char *name = NULL;
+
+ me = metricgroup__lookup(metric_events, evsel, false);
+ if (me == NULL)
+ return NULL;
+
+ if (!mexp)
+ mexp = list_first_entry(&me->head, typeof(*mexp), nd);
+
+ list_for_each_entry_from(mexp, &me->head, nd) {
+ /* Print the display name of the Default metricgroup */
+ if (!config->metric_only && me->is_default) {
+ if (!name)
+ name = mexp->default_metricgroup_name;
+ /*
+ * Two or more metricgroup may share the same metric
+ * event, e.g., TopdownL1 and TopdownL2 on SPR.
+ * Return and print the prefix, e.g., noise, running
+ * for the next metricgroup.
+ */
+ if (strcmp(name, mexp->default_metricgroup_name))
+ return (void *)mexp;
+ /* Only print the name of the metricgroup once */
+ if (!header_printed) {
+ header_printed = true;
+ perf_stat__print_metricgroup_header(config, evsel, ctxp,
+ name, out);
+ }
+ }
+
+ if ((*num)++ > 0)
+ out->new_line(config, ctxp);
+ generic_metric(config, mexp->metric_expr, mexp->metric_threshold,
+ mexp->metric_events, mexp->metric_refs, evsel->name,
+ mexp->metric_name, mexp->metric_unit, mexp->runtime,
+ aggr_idx, out);
+ }
+
+ return NULL;
+}
+
void perf_stat__print_shadow_stats(struct perf_stat_config *config,
struct evsel *evsel,
double avg, int aggr_idx,
@@ -565,7 +665,6 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
};
print_metric_t print_metric = out->print_metric;
void *ctxp = out->ctx;
- struct metric_event *me;
int num = 1;
if (config->iostat_run) {
@@ -592,18 +691,26 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
}
}
- if ((me = metricgroup__lookup(metric_events, evsel, false)) != NULL) {
- struct metric_expr *mexp;
+ perf_stat__print_shadow_stats_metricgroup(config, evsel, aggr_idx,
+ &num, NULL, out, metric_events);
- list_for_each_entry (mexp, &me->head, nd) {
- if (num++ > 0)
- out->new_line(config, ctxp);
- generic_metric(config, mexp->metric_expr, mexp->metric_threshold,
- mexp->metric_events, mexp->metric_refs, evsel->name,
- mexp->metric_name, mexp->metric_unit, mexp->runtime,
- aggr_idx, out);
- }
- }
if (num == 0)
print_metric(config, ctxp, NULL, NULL, NULL, 0);
}
+
+/**
+ * perf_stat__skip_metric_event - Skip the evsel in the Default metricgroup,
+ * if it's not running or not the metric event.
+ */
+bool perf_stat__skip_metric_event(struct evsel *evsel,
+ struct rblist *metric_events,
+ u64 ena, u64 run)
+{
+ if (!evsel->default_metricgroup)
+ return false;
+
+ if (!ena || !run)
+ return true;
+
+ return !metricgroup__lookup(metric_events, evsel, false);
+}
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 0f7b8a8cdea6..967e583392c7 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -264,6 +264,28 @@ void evlist__copy_prev_raw_counts(struct evlist *evlist)
evsel__copy_prev_raw_counts(evsel);
}
+static void evsel__copy_res_stats(struct evsel *evsel)
+{
+ struct perf_stat_evsel *ps = evsel->stats;
+
+ /*
+ * For GLOBAL aggregation mode, it updates the counts for each run
+ * in the evsel->stats.res_stats. See perf_stat_process_counter().
+ */
+ *ps->aggr[0].counts.values = avg_stats(&ps->res_stats);
+}
+
+void evlist__copy_res_stats(struct perf_stat_config *config, struct evlist *evlist)
+{
+ struct evsel *evsel;
+
+ if (config->aggr_mode != AGGR_GLOBAL)
+ return;
+
+ evlist__for_each_entry(evlist, evsel)
+ evsel__copy_res_stats(evsel);
+}
+
static size_t pkg_id_hash(long __key, void *ctx __maybe_unused)
{
uint64_t *key = (uint64_t *) __key;
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index e35e188237c8..325d0fad1842 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -48,6 +48,7 @@ enum aggr_mode {
AGGR_GLOBAL,
AGGR_SOCKET,
AGGR_DIE,
+ AGGR_CACHE,
AGGR_CORE,
AGGR_THREAD,
AGGR_UNSET,
@@ -64,6 +65,7 @@ typedef struct aggr_cpu_id (*aggr_get_id_t)(struct perf_stat_config *config, str
struct perf_stat_config {
enum aggr_mode aggr_mode;
+ u32 aggr_level;
bool scale;
bool no_inherit;
bool identifier;
@@ -156,11 +158,16 @@ typedef void (*print_metric_t)(struct perf_stat_config *config,
const char *fmt, double val);
typedef void (*new_line_t)(struct perf_stat_config *config, void *ctx);
+/* Used to print the display name of the Default metricgroup for now. */
+typedef void (*print_metricgroup_header_t)(struct perf_stat_config *config,
+ void *ctx, const char *metricgroup_name);
+
void perf_stat__reset_shadow_stats(void);
struct perf_stat_output_ctx {
void *ctx;
print_metric_t print_metric;
new_line_t new_line;
+ print_metricgroup_header_t print_metricgroup_header;
bool force_header;
};
@@ -169,6 +176,16 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
double avg, int aggr_idx,
struct perf_stat_output_ctx *out,
struct rblist *metric_events);
+bool perf_stat__skip_metric_event(struct evsel *evsel,
+ struct rblist *metric_events,
+ u64 ena, u64 run);
+void *perf_stat__print_shadow_stats_metricgroup(struct perf_stat_config *config,
+ struct evsel *evsel,
+ int aggr_idx,
+ int *num,
+ void *from,
+ struct perf_stat_output_ctx *out,
+ struct rblist *metric_events);
int evlist__alloc_stats(struct perf_stat_config *config,
struct evlist *evlist, bool alloc_raw);
@@ -180,6 +197,7 @@ void evlist__save_aggr_prev_raw_counts(struct evlist *evlist);
int evlist__alloc_aggr_stats(struct evlist *evlist, int nr_aggr);
void evlist__reset_aggr_stats(struct evlist *evlist);
+void evlist__copy_res_stats(struct perf_stat_config *config, struct evlist *evlist);
int perf_stat_process_counter(struct perf_stat_config *config,
struct evsel *counter);
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 63882a4db5c7..8bd466d1c2bd 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -42,6 +42,10 @@
#define EM_AARCH64 183 /* ARM 64 bit */
#endif
+#ifndef EM_LOONGARCH
+#define EM_LOONGARCH 258
+#endif
+
#ifndef ELF32_ST_VISIBILITY
#define ELF32_ST_VISIBILITY(o) ((o) & 0x03)
#endif
@@ -438,6 +442,10 @@ static bool get_plt_sizes(struct dso *dso, GElf_Ehdr *ehdr, GElf_Shdr *shdr_plt,
*plt_header_size = 32;
*plt_entry_size = 16;
return true;
+ case EM_LOONGARCH:
+ *plt_header_size = 32;
+ *plt_entry_size = 16;
+ return true;
case EM_SPARC:
*plt_header_size = 48;
*plt_entry_size = 12;
@@ -1389,11 +1397,11 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map,
/* Ensure maps are correctly ordered */
if (kmaps) {
int err;
+ struct map *tmp = map__get(map);
- map__get(map);
maps__remove(kmaps, map);
err = maps__insert(kmaps, map);
- map__put(map);
+ map__put(tmp);
if (err)
return err;
}
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 6b9c55784b56..f849f9ef68e6 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -440,38 +440,35 @@ static struct symbol *symbols__next(struct symbol *sym)
return NULL;
}
-static void symbols__insert_by_name(struct rb_root_cached *symbols, struct symbol *sym)
+static int symbols__sort_name_cmp(const void *vlhs, const void *vrhs)
{
- struct rb_node **p = &symbols->rb_root.rb_node;
- struct rb_node *parent = NULL;
- struct symbol_name_rb_node *symn, *s;
- bool leftmost = true;
+ const struct symbol *lhs = *((const struct symbol **)vlhs);
+ const struct symbol *rhs = *((const struct symbol **)vrhs);
- symn = container_of(sym, struct symbol_name_rb_node, sym);
-
- while (*p != NULL) {
- parent = *p;
- s = rb_entry(parent, struct symbol_name_rb_node, rb_node);
- if (strcmp(sym->name, s->sym.name) < 0)
- p = &(*p)->rb_left;
- else {
- p = &(*p)->rb_right;
- leftmost = false;
- }
- }
- rb_link_node(&symn->rb_node, parent, p);
- rb_insert_color_cached(&symn->rb_node, symbols, leftmost);
+ return strcmp(lhs->name, rhs->name);
}
-static void symbols__sort_by_name(struct rb_root_cached *symbols,
- struct rb_root_cached *source)
+static struct symbol **symbols__sort_by_name(struct rb_root_cached *source, size_t *len)
{
struct rb_node *nd;
+ struct symbol **result;
+ size_t i = 0, size = 0;
+
+ for (nd = rb_first_cached(source); nd; nd = rb_next(nd))
+ size++;
+
+ result = malloc(sizeof(*result) * size);
+ if (!result)
+ return NULL;
for (nd = rb_first_cached(source); nd; nd = rb_next(nd)) {
struct symbol *pos = rb_entry(nd, struct symbol, rb_node);
- symbols__insert_by_name(symbols, pos);
+
+ result[i++] = pos;
}
+ qsort(result, size, sizeof(*result), symbols__sort_name_cmp);
+ *len = size;
+ return result;
}
int symbol__match_symbol_name(const char *name, const char *str,
@@ -491,48 +488,53 @@ int symbol__match_symbol_name(const char *name, const char *str,
return arch__compare_symbol_names(name, str);
}
-static struct symbol *symbols__find_by_name(struct rb_root_cached *symbols,
+static struct symbol *symbols__find_by_name(struct symbol *symbols[],
+ size_t symbols_len,
const char *name,
- enum symbol_tag_include includes)
+ enum symbol_tag_include includes,
+ size_t *found_idx)
{
- struct rb_node *n;
- struct symbol_name_rb_node *s = NULL;
+ size_t i, lower = 0, upper = symbols_len;
+ struct symbol *s = NULL;
- if (symbols == NULL)
- return NULL;
+ if (found_idx)
+ *found_idx = SIZE_MAX;
- n = symbols->rb_root.rb_node;
+ if (!symbols_len)
+ return NULL;
- while (n) {
+ while (lower < upper) {
int cmp;
- s = rb_entry(n, struct symbol_name_rb_node, rb_node);
- cmp = symbol__match_symbol_name(s->sym.name, name, includes);
+ i = (lower + upper) / 2;
+ cmp = symbol__match_symbol_name(symbols[i]->name, name, includes);
if (cmp > 0)
- n = n->rb_left;
+ upper = i;
else if (cmp < 0)
- n = n->rb_right;
- else
+ lower = i + 1;
+ else {
+ if (found_idx)
+ *found_idx = i;
+ s = symbols[i];
break;
+ }
}
-
- if (n == NULL)
- return NULL;
-
- if (includes != SYMBOL_TAG_INCLUDE__DEFAULT_ONLY)
+ if (s && includes != SYMBOL_TAG_INCLUDE__DEFAULT_ONLY) {
/* return first symbol that has same name (if any) */
- for (n = rb_prev(n); n; n = rb_prev(n)) {
- struct symbol_name_rb_node *tmp;
-
- tmp = rb_entry(n, struct symbol_name_rb_node, rb_node);
- if (arch__compare_symbol_names(tmp->sym.name, s->sym.name))
+ for (; i > 0; i--) {
+ struct symbol *tmp = symbols[i - 1];
+
+ if (!arch__compare_symbol_names(tmp->name, s->name)) {
+ if (found_idx)
+ *found_idx = i - 1;
+ s = tmp;
+ } else
break;
-
- s = tmp;
}
-
- return &s->sym;
+ }
+ assert(!found_idx || !s || s == symbols[*found_idx]);
+ return s;
}
void dso__reset_find_symbol_cache(struct dso *dso)
@@ -590,31 +592,41 @@ struct symbol *dso__next_symbol(struct symbol *sym)
return symbols__next(sym);
}
-struct symbol *symbol__next_by_name(struct symbol *sym)
+struct symbol *dso__next_symbol_by_name(struct dso *dso, size_t *idx)
{
- struct symbol_name_rb_node *s = container_of(sym, struct symbol_name_rb_node, sym);
- struct rb_node *n = rb_next(&s->rb_node);
+ if (*idx + 1 >= dso->symbol_names_len)
+ return NULL;
- return n ? &rb_entry(n, struct symbol_name_rb_node, rb_node)->sym : NULL;
+ ++*idx;
+ return dso->symbol_names[*idx];
}
/*
* Returns first symbol that matched with @name.
*/
-struct symbol *dso__find_symbol_by_name(struct dso *dso, const char *name)
+struct symbol *dso__find_symbol_by_name(struct dso *dso, const char *name, size_t *idx)
{
- struct symbol *s = symbols__find_by_name(&dso->symbol_names, name,
- SYMBOL_TAG_INCLUDE__NONE);
+ struct symbol *s = symbols__find_by_name(dso->symbol_names, dso->symbol_names_len,
+ name, SYMBOL_TAG_INCLUDE__NONE, idx);
if (!s)
- s = symbols__find_by_name(&dso->symbol_names, name,
- SYMBOL_TAG_INCLUDE__DEFAULT_ONLY);
+ s = symbols__find_by_name(dso->symbol_names, dso->symbol_names_len,
+ name, SYMBOL_TAG_INCLUDE__DEFAULT_ONLY, idx);
return s;
}
void dso__sort_by_name(struct dso *dso)
{
- dso__set_sorted_by_name(dso);
- return symbols__sort_by_name(&dso->symbol_names, &dso->symbols);
+ mutex_lock(&dso->lock);
+ if (!dso__sorted_by_name(dso)) {
+ size_t len;
+
+ dso->symbol_names = symbols__sort_by_name(&dso->symbols, &len);
+ if (dso->symbol_names) {
+ dso->symbol_names_len = len;
+ dso__set_sorted_by_name(dso);
+ }
+ }
+ mutex_unlock(&dso->lock);
}
/*
@@ -1458,16 +1470,18 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
list_del_init(&new_node->node);
if (RC_CHK_ACCESS(new_map) == RC_CHK_ACCESS(replacement_map)) {
+ struct map *map_ref;
+
map__set_start(map, map__start(new_map));
map__set_end(map, map__end(new_map));
map__set_pgoff(map, map__pgoff(new_map));
map__set_map_ip(map, map__map_ip_ptr(new_map));
map__set_unmap_ip(map, map__unmap_ip_ptr(new_map));
/* Ensure maps are correctly ordered */
- map__get(map);
- maps__remove(kmaps, map);
- err = maps__insert(kmaps, map);
- map__put(map);
+ map_ref = map__get(map);
+ maps__remove(kmaps, map_ref);
+ err = maps__insert(kmaps, map_ref);
+ map__put(map_ref);
map__put(new_map);
if (err)
goto out_err;
@@ -2654,10 +2668,6 @@ int symbol__init(struct perf_env *env)
symbol__elf_init();
- if (symbol_conf.sort_by_name)
- symbol_conf.priv_size += (sizeof(struct symbol_name_rb_node) -
- sizeof(struct symbol));
-
if (symbol_conf.try_vmlinux_path && vmlinux_path__init(env) < 0)
return -1;
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 7558735543c2..af87c46b3f89 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -9,6 +9,7 @@
#include <linux/list.h>
#include <linux/rbtree.h>
#include <stdio.h>
+#include "addr_location.h"
#include "path.h"
#include "symbol_conf.h"
#include "spark.h"
@@ -42,8 +43,7 @@ Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep,
/**
* A symtab entry. When allocated this may be preceded by an annotation (see
- * symbol__annotation), a browser_index (see symbol__browser_index) and rb_node
- * to sort by name (see struct symbol_name_rb_node).
+ * symbol__annotation) and/or a browser_index (see symbol__browser_index).
*/
struct symbol {
struct rb_node rb_node;
@@ -94,11 +94,6 @@ static inline size_t symbol__size(const struct symbol *sym)
struct strlist;
struct intlist;
-struct symbol_name_rb_node {
- struct rb_node rb_node;
- struct symbol sym;
-};
-
static inline int __symbol__join_symfs(char *bf, size_t size, const char *path)
{
return path__join(bf, size, symbol_conf.symfs, path);
@@ -120,22 +115,6 @@ struct ref_reloc_sym {
u64 unrelocated_addr;
};
-struct addr_location {
- struct thread *thread;
- struct maps *maps;
- struct map *map;
- struct symbol *sym;
- const char *srcline;
- u64 addr;
- char level;
- u8 filtered;
- u8 cpumode;
- s32 cpu;
- s32 socket;
-};
-
-void addr_location__put(struct addr_location *al);
-
int dso__load(struct dso *dso, struct map *map);
int dso__load_vmlinux(struct dso *dso, struct map *map,
const char *vmlinux, bool vmlinux_allocated);
@@ -151,9 +130,9 @@ void dso__delete_symbol(struct dso *dso,
struct symbol *dso__find_symbol(struct dso *dso, u64 addr);
struct symbol *dso__find_symbol_nocache(struct dso *dso, u64 addr);
-struct symbol *dso__find_symbol_by_name(struct dso *dso, const char *name);
-struct symbol *symbol__next_by_name(struct symbol *sym);
+struct symbol *dso__next_symbol_by_name(struct dso *dso, size_t *idx);
+struct symbol *dso__find_symbol_by_name(struct dso *dso, const char *name, size_t *idx);
struct symbol *dso__first_symbol(struct dso *dso);
struct symbol *dso__last_symbol(struct dso *dso);
diff --git a/tools/perf/util/symbol_conf.h b/tools/perf/util/symbol_conf.h
index f26f81eb8252..0b589570d1d0 100644
--- a/tools/perf/util/symbol_conf.h
+++ b/tools/perf/util/symbol_conf.h
@@ -18,7 +18,6 @@ struct symbol_conf {
show_kernel_path,
use_modules,
allow_aliases,
- sort_by_name,
show_nr_samples,
show_total_period,
use_callchain,
diff --git a/tools/perf/util/symbol_fprintf.c b/tools/perf/util/symbol_fprintf.c
index d9e5ad040b6a..088f4abf230f 100644
--- a/tools/perf/util/symbol_fprintf.c
+++ b/tools/perf/util/symbol_fprintf.c
@@ -63,13 +63,11 @@ size_t dso__fprintf_symbols_by_name(struct dso *dso,
FILE *fp)
{
size_t ret = 0;
- struct rb_node *nd;
- struct symbol_name_rb_node *pos;
- for (nd = rb_first_cached(&dso->symbol_names); nd; nd = rb_next(nd)) {
- pos = rb_entry(nd, struct symbol_name_rb_node, rb_node);
- ret += fprintf(fp, "%s\n", pos->sym.name);
- }
+ for (size_t i = 0; i < dso->symbol_names_len; i++) {
+ struct symbol *pos = dso->symbol_names[i];
+ ret += fprintf(fp, "%s\n", pos->name);
+ }
return ret;
}
diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
index b2e4afa5efa1..45714a2785fd 100644
--- a/tools/perf/util/synthetic-events.c
+++ b/tools/perf/util/synthetic-events.c
@@ -1375,6 +1375,7 @@ int perf_event__synthesize_stat_config(struct perf_tool *tool,
ADD(AGGR_MODE, config->aggr_mode)
ADD(INTERVAL, config->interval)
ADD(SCALE, config->scale)
+ ADD(AGGR_LEVEL, config->aggr_level)
WARN_ONCE(i != PERF_STAT_CONFIG_TERM__MAX,
"stat config terms unbalanced\n");
diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c
index 313eccef6cb4..63be7b58761d 100644
--- a/tools/perf/util/syscalltbl.c
+++ b/tools/perf/util/syscalltbl.c
@@ -17,31 +17,31 @@
#if defined(__x86_64__)
#include <asm/syscalls_64.c>
const int syscalltbl_native_max_id = SYSCALLTBL_x86_64_MAX_ID;
-static const char **syscalltbl_native = syscalltbl_x86_64;
+static const char *const *syscalltbl_native = syscalltbl_x86_64;
#elif defined(__s390x__)
#include <asm/syscalls_64.c>
const int syscalltbl_native_max_id = SYSCALLTBL_S390_64_MAX_ID;
-static const char **syscalltbl_native = syscalltbl_s390_64;
+static const char *const *syscalltbl_native = syscalltbl_s390_64;
#elif defined(__powerpc64__)
#include <asm/syscalls_64.c>
const int syscalltbl_native_max_id = SYSCALLTBL_POWERPC_64_MAX_ID;
-static const char **syscalltbl_native = syscalltbl_powerpc_64;
+static const char *const *syscalltbl_native = syscalltbl_powerpc_64;
#elif defined(__powerpc__)
#include <asm/syscalls_32.c>
const int syscalltbl_native_max_id = SYSCALLTBL_POWERPC_32_MAX_ID;
-static const char **syscalltbl_native = syscalltbl_powerpc_32;
+static const char *const *syscalltbl_native = syscalltbl_powerpc_32;
#elif defined(__aarch64__)
#include <asm/syscalls.c>
const int syscalltbl_native_max_id = SYSCALLTBL_ARM64_MAX_ID;
-static const char **syscalltbl_native = syscalltbl_arm64;
+static const char *const *syscalltbl_native = syscalltbl_arm64;
#elif defined(__mips__)
#include <asm/syscalls_n64.c>
const int syscalltbl_native_max_id = SYSCALLTBL_MIPS_N64_MAX_ID;
-static const char **syscalltbl_native = syscalltbl_mips_n64;
+static const char *const *syscalltbl_native = syscalltbl_mips_n64;
#elif defined(__loongarch__)
#include <asm/syscalls.c>
const int syscalltbl_native_max_id = SYSCALLTBL_LOONGARCH_MAX_ID;
-static const char **syscalltbl_native = syscalltbl_loongarch;
+static const char *const *syscalltbl_native = syscalltbl_loongarch;
#endif
struct syscall {
diff --git a/tools/perf/util/target.h b/tools/perf/util/target.h
index 880f1af7f6ad..d582cae8e105 100644
--- a/tools/perf/util/target.h
+++ b/tools/perf/util/target.h
@@ -17,7 +17,6 @@ struct target {
bool default_per_cpu;
bool per_thread;
bool use_bpf;
- bool hybrid;
int initial_delay;
const char *attr_map;
};
diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c
index 4b85c1728012..c6a0a27b12c2 100644
--- a/tools/perf/util/thread-stack.c
+++ b/tools/perf/util/thread-stack.c
@@ -112,7 +112,7 @@ struct thread_stack {
*/
static inline bool thread_stack__per_cpu(struct thread *thread)
{
- return !(thread->tid || thread->pid_);
+ return !(thread__tid(thread) || thread__pid(thread));
}
static int thread_stack__grow(struct thread_stack *ts)
@@ -155,8 +155,8 @@ static int thread_stack__init(struct thread_stack *ts, struct thread *thread,
ts->br_stack_sz = br_stack_sz;
}
- if (thread->maps && maps__machine(thread->maps)) {
- struct machine *machine = maps__machine(thread->maps);
+ if (thread__maps(thread) && maps__machine(thread__maps(thread))) {
+ struct machine *machine = maps__machine(thread__maps(thread));
const char *arch = perf_env__arch(machine->env);
ts->kernel_start = machine__kernel_start(machine);
@@ -175,7 +175,7 @@ static struct thread_stack *thread_stack__new(struct thread *thread, int cpu,
bool callstack,
unsigned int br_stack_sz)
{
- struct thread_stack *ts = thread->ts, *new_ts;
+ struct thread_stack *ts = thread__ts(thread), *new_ts;
unsigned int old_sz = ts ? ts->arr_sz : 0;
unsigned int new_sz = 1;
@@ -189,8 +189,8 @@ static struct thread_stack *thread_stack__new(struct thread *thread, int cpu,
if (ts)
memcpy(new_ts, ts, old_sz * sizeof(*ts));
new_ts->arr_sz = new_sz;
- zfree(&thread->ts);
- thread->ts = new_ts;
+ free(thread__ts(thread));
+ thread__set_ts(thread, new_ts);
ts = new_ts;
}
@@ -207,7 +207,7 @@ static struct thread_stack *thread_stack__new(struct thread *thread, int cpu,
static struct thread_stack *thread__cpu_stack(struct thread *thread, int cpu)
{
- struct thread_stack *ts = thread->ts;
+ struct thread_stack *ts = thread__ts(thread);
if (cpu < 0)
cpu = 0;
@@ -232,7 +232,7 @@ static inline struct thread_stack *thread__stack(struct thread *thread,
if (thread_stack__per_cpu(thread))
return thread__cpu_stack(thread, cpu);
- return thread->ts;
+ return thread__ts(thread);
}
static int thread_stack__push(struct thread_stack *ts, u64 ret_addr,
@@ -363,7 +363,7 @@ static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts)
int thread_stack__flush(struct thread *thread)
{
- struct thread_stack *ts = thread->ts;
+ struct thread_stack *ts = thread__ts(thread);
unsigned int pos;
int err = 0;
@@ -502,13 +502,14 @@ static void thread_stack__reset(struct thread *thread, struct thread_stack *ts)
void thread_stack__free(struct thread *thread)
{
- struct thread_stack *ts = thread->ts;
+ struct thread_stack *ts = thread__ts(thread);
unsigned int pos;
if (ts) {
for (pos = 0; pos < ts->arr_sz; pos++)
__thread_stack__free(thread, ts + pos);
- zfree(&thread->ts);
+ free(thread__ts(thread));
+ thread__set_ts(thread, NULL);
}
}
@@ -1037,9 +1038,7 @@ static int thread_stack__trace_end(struct thread_stack *ts,
static bool is_x86_retpoline(const char *name)
{
- const char *p = strstr(name, "__x86_indirect_thunk_");
-
- return p == name || !strcmp(name, "__indirect_thunk_start");
+ return strstr(name, "__x86_indirect_thunk_") == name;
}
/*
@@ -1127,7 +1126,7 @@ int thread_stack__process(struct thread *thread, struct comm *comm,
ts->rstate = X86_RETPOLINE_POSSIBLE;
/* Flush stack on exec */
- if (ts->comm != comm && thread->pid_ == thread->tid) {
+ if (ts->comm != comm && thread__pid(thread) == thread__tid(thread)) {
err = __thread_stack__flush(thread, ts);
if (err)
return err;
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 4b5bdc277baa..0b166404c5c3 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -21,38 +21,40 @@
int thread__init_maps(struct thread *thread, struct machine *machine)
{
- pid_t pid = thread->pid_;
+ pid_t pid = thread__pid(thread);
- if (pid == thread->tid || pid == -1) {
- thread->maps = maps__new(machine);
+ if (pid == thread__tid(thread) || pid == -1) {
+ thread__set_maps(thread, maps__new(machine));
} else {
struct thread *leader = __machine__findnew_thread(machine, pid, pid);
+
if (leader) {
- thread->maps = maps__get(leader->maps);
+ thread__set_maps(thread, maps__get(thread__maps(leader)));
thread__put(leader);
}
}
- return thread->maps ? 0 : -1;
+ return thread__maps(thread) ? 0 : -1;
}
struct thread *thread__new(pid_t pid, pid_t tid)
{
char *comm_str;
struct comm *comm;
- struct thread *thread = zalloc(sizeof(*thread));
-
- if (thread != NULL) {
- thread->pid_ = pid;
- thread->tid = tid;
- thread->ppid = -1;
- thread->cpu = -1;
- thread->guest_cpu = -1;
- thread->lbr_stitch_enable = false;
- INIT_LIST_HEAD(&thread->namespaces_list);
- INIT_LIST_HEAD(&thread->comm_list);
- init_rwsem(&thread->namespaces_lock);
- init_rwsem(&thread->comm_lock);
+ RC_STRUCT(thread) *_thread = zalloc(sizeof(*_thread));
+ struct thread *thread;
+
+ if (ADD_RC_CHK(thread, _thread) != NULL) {
+ thread__set_pid(thread, pid);
+ thread__set_tid(thread, tid);
+ thread__set_ppid(thread, -1);
+ thread__set_cpu(thread, -1);
+ thread__set_guest_cpu(thread, -1);
+ thread__set_lbr_stitch_enable(thread, false);
+ INIT_LIST_HEAD(thread__namespaces_list(thread));
+ INIT_LIST_HEAD(thread__comm_list(thread));
+ init_rwsem(thread__namespaces_lock(thread));
+ init_rwsem(thread__comm_lock(thread));
comm_str = malloc(32);
if (!comm_str)
@@ -64,12 +66,11 @@ struct thread *thread__new(pid_t pid, pid_t tid)
if (!comm)
goto err_thread;
- list_add(&comm->list, &thread->comm_list);
- refcount_set(&thread->refcnt, 1);
- RB_CLEAR_NODE(&thread->rb_node);
+ list_add(&comm->list, thread__comm_list(thread));
+ refcount_set(thread__refcnt(thread), 1);
/* Thread holds first ref to nsdata. */
- thread->nsinfo = nsinfo__new(pid);
- srccode_state_init(&thread->srccode_state);
+ RC_CHK_ACCESS(thread)->nsinfo = nsinfo__new(pid);
+ srccode_state_init(thread__srccode_state(thread));
}
return thread;
@@ -84,89 +85,69 @@ void thread__delete(struct thread *thread)
struct namespaces *namespaces, *tmp_namespaces;
struct comm *comm, *tmp_comm;
- BUG_ON(!RB_EMPTY_NODE(&thread->rb_node));
-
thread_stack__free(thread);
- if (thread->maps) {
- maps__put(thread->maps);
- thread->maps = NULL;
+ if (thread__maps(thread)) {
+ maps__put(thread__maps(thread));
+ thread__set_maps(thread, NULL);
}
- down_write(&thread->namespaces_lock);
+ down_write(thread__namespaces_lock(thread));
list_for_each_entry_safe(namespaces, tmp_namespaces,
- &thread->namespaces_list, list) {
+ thread__namespaces_list(thread), list) {
list_del_init(&namespaces->list);
namespaces__free(namespaces);
}
- up_write(&thread->namespaces_lock);
+ up_write(thread__namespaces_lock(thread));
- down_write(&thread->comm_lock);
- list_for_each_entry_safe(comm, tmp_comm, &thread->comm_list, list) {
+ down_write(thread__comm_lock(thread));
+ list_for_each_entry_safe(comm, tmp_comm, thread__comm_list(thread), list) {
list_del_init(&comm->list);
comm__free(comm);
}
- up_write(&thread->comm_lock);
+ up_write(thread__comm_lock(thread));
- nsinfo__zput(thread->nsinfo);
- srccode_state_free(&thread->srccode_state);
+ nsinfo__zput(RC_CHK_ACCESS(thread)->nsinfo);
+ srccode_state_free(thread__srccode_state(thread));
- exit_rwsem(&thread->namespaces_lock);
- exit_rwsem(&thread->comm_lock);
+ exit_rwsem(thread__namespaces_lock(thread));
+ exit_rwsem(thread__comm_lock(thread));
thread__free_stitch_list(thread);
- free(thread);
+ RC_CHK_FREE(thread);
}
struct thread *thread__get(struct thread *thread)
{
- if (thread)
- refcount_inc(&thread->refcnt);
- return thread;
+ struct thread *result;
+
+ if (RC_CHK_GET(result, thread))
+ refcount_inc(thread__refcnt(thread));
+
+ return result;
}
void thread__put(struct thread *thread)
{
- if (thread && refcount_dec_and_test(&thread->refcnt)) {
- /*
- * Remove it from the dead threads list, as last reference is
- * gone, if it is in a dead threads list.
- *
- * We may not be there anymore if say, the machine where it was
- * stored was already deleted, so we already removed it from
- * the dead threads and some other piece of code still keeps a
- * reference.
- *
- * This is what 'perf sched' does and finally drops it in
- * perf_sched__lat(), where it calls perf_sched__read_events(),
- * that processes the events by creating a session and deleting
- * it, which ends up destroying the list heads for the dead
- * threads, but before it does that it removes all threads from
- * it using list_del_init().
- *
- * So we need to check here if it is in a dead threads list and
- * if so, remove it before finally deleting the thread, to avoid
- * an use after free situation.
- */
- if (!list_empty(&thread->node))
- list_del_init(&thread->node);
+ if (thread && refcount_dec_and_test(thread__refcnt(thread)))
thread__delete(thread);
- }
+ else
+ RC_CHK_PUT(thread);
}
-static struct namespaces *__thread__namespaces(const struct thread *thread)
+static struct namespaces *__thread__namespaces(struct thread *thread)
{
- if (list_empty(&thread->namespaces_list))
+ if (list_empty(thread__namespaces_list(thread)))
return NULL;
- return list_first_entry(&thread->namespaces_list, struct namespaces, list);
+ return list_first_entry(thread__namespaces_list(thread), struct namespaces, list);
}
struct namespaces *thread__namespaces(struct thread *thread)
{
struct namespaces *ns;
- down_read(&thread->namespaces_lock);
+ down_read(thread__namespaces_lock(thread));
ns = __thread__namespaces(thread);
- up_read(&thread->namespaces_lock);
+ up_read(thread__namespaces_lock(thread));
return ns;
}
@@ -180,7 +161,7 @@ static int __thread__set_namespaces(struct thread *thread, u64 timestamp,
if (!new)
return -ENOMEM;
- list_add(&new->list, &thread->namespaces_list);
+ list_add(&new->list, thread__namespaces_list(thread));
if (timestamp && curr) {
/*
@@ -200,25 +181,25 @@ int thread__set_namespaces(struct thread *thread, u64 timestamp,
{
int ret;
- down_write(&thread->namespaces_lock);
+ down_write(thread__namespaces_lock(thread));
ret = __thread__set_namespaces(thread, timestamp, event);
- up_write(&thread->namespaces_lock);
+ up_write(thread__namespaces_lock(thread));
return ret;
}
-struct comm *thread__comm(const struct thread *thread)
+struct comm *thread__comm(struct thread *thread)
{
- if (list_empty(&thread->comm_list))
+ if (list_empty(thread__comm_list(thread)))
return NULL;
- return list_first_entry(&thread->comm_list, struct comm, list);
+ return list_first_entry(thread__comm_list(thread), struct comm, list);
}
-struct comm *thread__exec_comm(const struct thread *thread)
+struct comm *thread__exec_comm(struct thread *thread)
{
struct comm *comm, *last = NULL, *second_last = NULL;
- list_for_each_entry(comm, &thread->comm_list, list) {
+ list_for_each_entry(comm, thread__comm_list(thread), list) {
if (comm->exec)
return comm;
second_last = last;
@@ -231,7 +212,7 @@ struct comm *thread__exec_comm(const struct thread *thread)
* thread, that is very probably wrong. Prefer a later comm to avoid
* that case.
*/
- if (second_last && !last->start && thread->pid_ == thread->tid)
+ if (second_last && !last->start && thread__pid(thread) == thread__tid(thread))
return second_last;
return last;
@@ -243,7 +224,7 @@ static int ____thread__set_comm(struct thread *thread, const char *str,
struct comm *new, *curr = thread__comm(thread);
/* Override the default :tid entry */
- if (!thread->comm_set) {
+ if (!thread__comm_set(thread)) {
int err = comm__override(curr, str, timestamp, exec);
if (err)
return err;
@@ -251,13 +232,13 @@ static int ____thread__set_comm(struct thread *thread, const char *str,
new = comm__new(str, timestamp, exec);
if (!new)
return -ENOMEM;
- list_add(&new->list, &thread->comm_list);
+ list_add(&new->list, thread__comm_list(thread));
if (exec)
- unwind__flush_access(thread->maps);
+ unwind__flush_access(thread__maps(thread));
}
- thread->comm_set = true;
+ thread__set_comm_set(thread, true);
return 0;
}
@@ -267,9 +248,9 @@ int __thread__set_comm(struct thread *thread, const char *str, u64 timestamp,
{
int ret;
- down_write(&thread->comm_lock);
+ down_write(thread__comm_lock(thread));
ret = ____thread__set_comm(thread, str, timestamp, exec);
- up_write(&thread->comm_lock);
+ up_write(thread__comm_lock(thread));
return ret;
}
@@ -281,7 +262,7 @@ int thread__set_comm_from_proc(struct thread *thread)
int err = -1;
if (!(snprintf(path, sizeof(path), "%d/task/%d/comm",
- thread->pid_, thread->tid) >= (int)sizeof(path)) &&
+ thread__pid(thread), thread__tid(thread)) >= (int)sizeof(path)) &&
procfs__read_str(path, &comm, &sz) == 0) {
comm[sz - 1] = '\0';
err = thread__set_comm(thread, comm, 0);
@@ -290,7 +271,7 @@ int thread__set_comm_from_proc(struct thread *thread)
return err;
}
-static const char *__thread__comm_str(const struct thread *thread)
+static const char *__thread__comm_str(struct thread *thread)
{
const struct comm *comm = thread__comm(thread);
@@ -304,9 +285,9 @@ const char *thread__comm_str(struct thread *thread)
{
const char *str;
- down_read(&thread->comm_lock);
+ down_read(thread__comm_lock(thread));
str = __thread__comm_str(thread);
- up_read(&thread->comm_lock);
+ up_read(thread__comm_lock(thread));
return str;
}
@@ -315,23 +296,23 @@ static int __thread__comm_len(struct thread *thread, const char *comm)
{
if (!comm)
return 0;
- thread->comm_len = strlen(comm);
+ thread__set_comm_len(thread, strlen(comm));
- return thread->comm_len;
+ return thread__var_comm_len(thread);
}
/* CHECKME: it should probably better return the max comm len from its comm list */
int thread__comm_len(struct thread *thread)
{
- int comm_len = thread->comm_len;
+ int comm_len = thread__var_comm_len(thread);
if (!comm_len) {
const char *comm;
- down_read(&thread->comm_lock);
+ down_read(thread__comm_lock(thread));
comm = __thread__comm_str(thread);
comm_len = __thread__comm_len(thread, comm);
- up_read(&thread->comm_lock);
+ up_read(thread__comm_lock(thread));
}
return comm_len;
@@ -339,33 +320,33 @@ int thread__comm_len(struct thread *thread)
size_t thread__fprintf(struct thread *thread, FILE *fp)
{
- return fprintf(fp, "Thread %d %s\n", thread->tid, thread__comm_str(thread)) +
- maps__fprintf(thread->maps, fp);
+ return fprintf(fp, "Thread %d %s\n", thread__tid(thread), thread__comm_str(thread)) +
+ maps__fprintf(thread__maps(thread), fp);
}
int thread__insert_map(struct thread *thread, struct map *map)
{
int ret;
- ret = unwind__prepare_access(thread->maps, map, NULL);
+ ret = unwind__prepare_access(thread__maps(thread), map, NULL);
if (ret)
return ret;
- maps__fixup_overlappings(thread->maps, map, stderr);
- return maps__insert(thread->maps, map);
+ maps__fixup_overlappings(thread__maps(thread), map, stderr);
+ return maps__insert(thread__maps(thread), map);
}
static int __thread__prepare_access(struct thread *thread)
{
bool initialized = false;
int err = 0;
- struct maps *maps = thread->maps;
+ struct maps *maps = thread__maps(thread);
struct map_rb_node *rb_node;
down_read(maps__lock(maps));
maps__for_each_entry(maps, rb_node) {
- err = unwind__prepare_access(thread->maps, rb_node->map, &initialized);
+ err = unwind__prepare_access(thread__maps(thread), rb_node->map, &initialized);
if (err || initialized)
break;
}
@@ -388,21 +369,22 @@ static int thread__prepare_access(struct thread *thread)
static int thread__clone_maps(struct thread *thread, struct thread *parent, bool do_maps_clone)
{
/* This is new thread, we share map groups for process. */
- if (thread->pid_ == parent->pid_)
+ if (thread__pid(thread) == thread__pid(parent))
return thread__prepare_access(thread);
- if (thread->maps == parent->maps) {
+ if (thread__maps(thread) == thread__maps(parent)) {
pr_debug("broken map groups on thread %d/%d parent %d/%d\n",
- thread->pid_, thread->tid, parent->pid_, parent->tid);
+ thread__pid(thread), thread__tid(thread),
+ thread__pid(parent), thread__tid(parent));
return 0;
}
/* But this one is new process, copy maps. */
- return do_maps_clone ? maps__clone(thread, parent->maps) : 0;
+ return do_maps_clone ? maps__clone(thread, thread__maps(parent)) : 0;
}
int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bool do_maps_clone)
{
- if (parent->comm_set) {
+ if (thread__comm_set(parent)) {
const char *comm = thread__comm_str(parent);
int err;
if (!comm)
@@ -412,7 +394,7 @@ int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bo
return err;
}
- thread->ppid = parent->tid;
+ thread__set_ppid(thread, thread__tid(parent));
return thread__clone_maps(thread, parent, do_maps_clone);
}
@@ -436,13 +418,13 @@ void thread__find_cpumode_addr_location(struct thread *thread, u64 addr,
struct thread *thread__main_thread(struct machine *machine, struct thread *thread)
{
- if (thread->pid_ == thread->tid)
+ if (thread__pid(thread) == thread__tid(thread))
return thread__get(thread);
- if (thread->pid_ == -1)
+ if (thread__pid(thread) == -1)
return NULL;
- return machine__find_thread(machine, thread->pid_, thread->pid_);
+ return machine__find_thread(machine, thread__pid(thread), thread__pid(thread));
}
int thread__memcpy(struct thread *thread, struct machine *machine,
@@ -456,24 +438,31 @@ int thread__memcpy(struct thread *thread, struct machine *machine,
if (machine__kernel_ip(machine, ip))
cpumode = PERF_RECORD_MISC_KERNEL;
- if (!thread__find_map(thread, cpumode, ip, &al))
- return -1;
+ addr_location__init(&al);
+ if (!thread__find_map(thread, cpumode, ip, &al)) {
+ addr_location__exit(&al);
+ return -1;
+ }
dso = map__dso(al.map);
- if( !dso || dso->data.status == DSO_DATA_STATUS_ERROR || map__load(al.map) < 0)
+ if (!dso || dso->data.status == DSO_DATA_STATUS_ERROR || map__load(al.map) < 0) {
+ addr_location__exit(&al);
return -1;
+ }
offset = map__map_ip(al.map, ip);
if (is64bit)
*is64bit = dso->is_64_bit;
+ addr_location__exit(&al);
+
return dso__data_read_offset(dso, machine, offset, buf, len);
}
void thread__free_stitch_list(struct thread *thread)
{
- struct lbr_stitch *lbr_stitch = thread->lbr_stitch;
+ struct lbr_stitch *lbr_stitch = thread__lbr_stitch(thread);
struct stitch_list *pos, *tmp;
if (!lbr_stitch)
@@ -490,5 +479,6 @@ void thread__free_stitch_list(struct thread *thread)
}
zfree(&lbr_stitch->prev_lbr_cursor);
- zfree(&thread->lbr_stitch);
+ free(thread__lbr_stitch(thread));
+ thread__set_lbr_stitch(thread, NULL);
}
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 395c626699a9..9068a21ce0fa 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -15,6 +15,7 @@
#include "rwsem.h"
#include "event.h"
#include "callchain.h"
+#include <internal/rc_check.h>
struct addr_location;
struct map;
@@ -29,11 +30,12 @@ struct lbr_stitch {
struct callchain_cursor_node *prev_lbr_cursor;
};
-struct thread {
- union {
- struct rb_node rb_node;
- struct list_head node;
- };
+struct thread_rb_node {
+ struct rb_node rb_node;
+ struct thread *thread;
+};
+
+DECLARE_RC_STRUCT(thread) {
struct maps *maps;
pid_t pid_; /* Not all tools update this */
pid_t tid;
@@ -43,7 +45,6 @@ struct thread {
refcount_t refcnt;
bool comm_set;
int comm_len;
- bool dead; /* if set thread has exited */
struct list_head namespaces_list;
struct rw_semaphore namespaces_lock;
struct list_head comm_list;
@@ -81,11 +82,6 @@ static inline void __thread__zput(struct thread **thread)
#define thread__zput(thread) __thread__zput(&thread)
-static inline void thread__exited(struct thread *thread)
-{
- thread->dead = true;
-}
-
struct namespaces *thread__namespaces(struct thread *thread);
int thread__set_namespaces(struct thread *thread, u64 timestamp,
struct perf_record_namespaces *event);
@@ -101,8 +97,8 @@ static inline int thread__set_comm(struct thread *thread, const char *comm,
int thread__set_comm_from_proc(struct thread *thread);
int thread__comm_len(struct thread *thread);
-struct comm *thread__comm(const struct thread *thread);
-struct comm *thread__exec_comm(const struct thread *thread);
+struct comm *thread__comm(struct thread *thread);
+struct comm *thread__exec_comm(struct thread *thread);
const char *thread__comm_str(struct thread *thread);
int thread__insert_map(struct thread *thread, struct map *map);
int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bool do_maps_clone);
@@ -126,14 +122,194 @@ void thread__find_cpumode_addr_location(struct thread *thread, u64 addr,
int thread__memcpy(struct thread *thread, struct machine *machine,
void *buf, u64 ip, int len, bool *is64bit);
+static inline struct maps *thread__maps(struct thread *thread)
+{
+ return RC_CHK_ACCESS(thread)->maps;
+}
+
+static inline void thread__set_maps(struct thread *thread, struct maps *maps)
+{
+ RC_CHK_ACCESS(thread)->maps = maps;
+}
+
+static inline pid_t thread__pid(const struct thread *thread)
+{
+ return RC_CHK_ACCESS(thread)->pid_;
+}
+
+static inline void thread__set_pid(struct thread *thread, pid_t pid_)
+{
+ RC_CHK_ACCESS(thread)->pid_ = pid_;
+}
+
+static inline pid_t thread__tid(const struct thread *thread)
+{
+ return RC_CHK_ACCESS(thread)->tid;
+}
+
+static inline void thread__set_tid(struct thread *thread, pid_t tid)
+{
+ RC_CHK_ACCESS(thread)->tid = tid;
+}
+
+static inline pid_t thread__ppid(const struct thread *thread)
+{
+ return RC_CHK_ACCESS(thread)->ppid;
+}
+
+static inline void thread__set_ppid(struct thread *thread, pid_t ppid)
+{
+ RC_CHK_ACCESS(thread)->ppid = ppid;
+}
+
+static inline int thread__cpu(const struct thread *thread)
+{
+ return RC_CHK_ACCESS(thread)->cpu;
+}
+
+static inline void thread__set_cpu(struct thread *thread, int cpu)
+{
+ RC_CHK_ACCESS(thread)->cpu = cpu;
+}
+
+static inline int thread__guest_cpu(const struct thread *thread)
+{
+ return RC_CHK_ACCESS(thread)->guest_cpu;
+}
+
+static inline void thread__set_guest_cpu(struct thread *thread, int guest_cpu)
+{
+ RC_CHK_ACCESS(thread)->guest_cpu = guest_cpu;
+}
+
+static inline refcount_t *thread__refcnt(struct thread *thread)
+{
+ return &RC_CHK_ACCESS(thread)->refcnt;
+}
+
+static inline bool thread__comm_set(const struct thread *thread)
+{
+ return RC_CHK_ACCESS(thread)->comm_set;
+}
+
+static inline void thread__set_comm_set(struct thread *thread, bool set)
+{
+ RC_CHK_ACCESS(thread)->comm_set = set;
+}
+
+static inline int thread__var_comm_len(const struct thread *thread)
+{
+ return RC_CHK_ACCESS(thread)->comm_len;
+}
+
+static inline void thread__set_comm_len(struct thread *thread, int len)
+{
+ RC_CHK_ACCESS(thread)->comm_len = len;
+}
+
+static inline struct list_head *thread__namespaces_list(struct thread *thread)
+{
+ return &RC_CHK_ACCESS(thread)->namespaces_list;
+}
+
+static inline int thread__namespaces_list_empty(const struct thread *thread)
+{
+ return list_empty(&RC_CHK_ACCESS(thread)->namespaces_list);
+}
+
+static inline struct rw_semaphore *thread__namespaces_lock(struct thread *thread)
+{
+ return &RC_CHK_ACCESS(thread)->namespaces_lock;
+}
+
+static inline struct list_head *thread__comm_list(struct thread *thread)
+{
+ return &RC_CHK_ACCESS(thread)->comm_list;
+}
+
+static inline struct rw_semaphore *thread__comm_lock(struct thread *thread)
+{
+ return &RC_CHK_ACCESS(thread)->comm_lock;
+}
+
+static inline u64 thread__db_id(const struct thread *thread)
+{
+ return RC_CHK_ACCESS(thread)->db_id;
+}
+
+static inline void thread__set_db_id(struct thread *thread, u64 db_id)
+{
+ RC_CHK_ACCESS(thread)->db_id = db_id;
+}
+
static inline void *thread__priv(struct thread *thread)
{
- return thread->priv;
+ return RC_CHK_ACCESS(thread)->priv;
}
static inline void thread__set_priv(struct thread *thread, void *p)
{
- thread->priv = p;
+ RC_CHK_ACCESS(thread)->priv = p;
+}
+
+static inline struct thread_stack *thread__ts(struct thread *thread)
+{
+ return RC_CHK_ACCESS(thread)->ts;
+}
+
+static inline void thread__set_ts(struct thread *thread, struct thread_stack *ts)
+{
+ RC_CHK_ACCESS(thread)->ts = ts;
+}
+
+static inline struct nsinfo *thread__nsinfo(struct thread *thread)
+{
+ return RC_CHK_ACCESS(thread)->nsinfo;
+}
+
+static inline struct srccode_state *thread__srccode_state(struct thread *thread)
+{
+ return &RC_CHK_ACCESS(thread)->srccode_state;
+}
+
+static inline bool thread__filter(const struct thread *thread)
+{
+ return RC_CHK_ACCESS(thread)->filter;
+}
+
+static inline void thread__set_filter(struct thread *thread, bool filter)
+{
+ RC_CHK_ACCESS(thread)->filter = filter;
+}
+
+static inline int thread__filter_entry_depth(const struct thread *thread)
+{
+ return RC_CHK_ACCESS(thread)->filter_entry_depth;
+}
+
+static inline void thread__set_filter_entry_depth(struct thread *thread, int depth)
+{
+ RC_CHK_ACCESS(thread)->filter_entry_depth = depth;
+}
+
+static inline bool thread__lbr_stitch_enable(const struct thread *thread)
+{
+ return RC_CHK_ACCESS(thread)->lbr_stitch_enable;
+}
+
+static inline void thread__set_lbr_stitch_enable(struct thread *thread, bool en)
+{
+ RC_CHK_ACCESS(thread)->lbr_stitch_enable = en;
+}
+
+static inline struct lbr_stitch *thread__lbr_stitch(struct thread *thread)
+{
+ return RC_CHK_ACCESS(thread)->lbr_stitch;
+}
+
+static inline void thread__set_lbr_stitch(struct thread *thread, struct lbr_stitch *lbrs)
+{
+ RC_CHK_ACCESS(thread)->lbr_stitch = lbrs;
}
static inline bool thread__is_filtered(struct thread *thread)
@@ -144,12 +320,12 @@ static inline bool thread__is_filtered(struct thread *thread)
}
if (symbol_conf.pid_list &&
- !intlist__has_entry(symbol_conf.pid_list, thread->pid_)) {
+ !intlist__has_entry(symbol_conf.pid_list, thread__pid(thread))) {
return true;
}
if (symbol_conf.tid_list &&
- !intlist__has_entry(symbol_conf.tid_list, thread->tid)) {
+ !intlist__has_entry(symbol_conf.tid_list, thread__tid(thread))) {
return true;
}
diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c
index c24b3a15e319..319ccf09a435 100644
--- a/tools/perf/util/trace-event-info.c
+++ b/tools/perf/util/trace-event-info.c
@@ -466,6 +466,18 @@ next:
return NULL;
}
+char *tracepoint_id_to_name(u64 config)
+{
+ struct tracepoint_path *path = tracepoint_id_to_path(config);
+ char *buf = NULL;
+
+ if (path && asprintf(&buf, "%s:%s", path->system, path->name) < 0)
+ buf = NULL;
+
+ put_tracepoints_path(path);
+ return buf;
+}
+
static struct tracepoint_path *tracepoint_name_to_path(const char *name)
{
struct tracepoint_path *path = zalloc(sizeof(*path));
diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h
index a0cff184b1cd..a69ee29419f3 100644
--- a/tools/perf/util/trace-event.h
+++ b/tools/perf/util/trace-event.h
@@ -62,6 +62,12 @@ unsigned long long eval_flag(const char *flag);
int read_tracing_data(int fd, struct list_head *pattrs);
+/*
+ * Return the tracepoint name in the format "subsystem:event_name",
+ * callers should free the returned string.
+ */
+char *tracepoint_id_to_name(u64 config);
+
struct tracing_data {
/* size is only valid if temp is 'true' */
ssize_t size;
diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c
index bdccfc511b7e..2a96df4c8d42 100644
--- a/tools/perf/util/unwind-libdw.c
+++ b/tools/perf/util/unwind-libdw.c
@@ -66,9 +66,13 @@ static int __report_module(struct addr_location *al, u64 ip,
mod = 0;
}
- if (!mod)
- mod = dwfl_report_elf(ui->dwfl, dso->short_name, dso->long_name, -1,
+ if (!mod) {
+ char filename[PATH_MAX];
+
+ __symbol__join_symfs(filename, sizeof(filename), dso->long_name);
+ mod = dwfl_report_elf(ui->dwfl, dso->short_name, filename, -1,
map__start(al->map) - map__pgoff(al->map), false);
+ }
if (!mod) {
char filename[PATH_MAX];
@@ -90,8 +94,12 @@ static int __report_module(struct addr_location *al, u64 ip,
static int report_module(u64 ip, struct unwind_info *ui)
{
struct addr_location al;
+ int res;
- return __report_module(&al, ip, ui);
+ addr_location__init(&al);
+ res = __report_module(&al, ip, ui);
+ addr_location__exit(&al);
+ return res;
}
/*
@@ -104,8 +112,11 @@ static int entry(u64 ip, struct unwind_info *ui)
struct unwind_entry *e = &ui->entries[ui->idx++];
struct addr_location al;
- if (__report_module(&al, ip, ui))
+ addr_location__init(&al);
+ if (__report_module(&al, ip, ui)) {
+ addr_location__exit(&al);
return -1;
+ }
e->ip = ip;
e->ms.maps = al.maps;
@@ -116,6 +127,7 @@ static int entry(u64 ip, struct unwind_info *ui)
al.sym ? al.sym->name : "''",
ip,
al.map ? map__map_ip(al.map, ip) : (u64) 0);
+ addr_location__exit(&al);
return 0;
}
@@ -136,17 +148,22 @@ static int access_dso_mem(struct unwind_info *ui, Dwarf_Addr addr,
ssize_t size;
struct dso *dso;
+ addr_location__init(&al);
if (!thread__find_map(ui->thread, PERF_RECORD_MISC_USER, addr, &al)) {
pr_debug("unwind: no map for %lx\n", (unsigned long)addr);
- return -1;
+ goto out_fail;
}
dso = map__dso(al.map);
if (!dso)
- return -1;
+ goto out_fail;
size = dso__data_read_addr(dso, al.map, ui->machine, addr, (u8 *) data, sizeof(*data));
+ addr_location__exit(&al);
return !(size == sizeof(*data));
+out_fail:
+ addr_location__exit(&al);
+ return -1;
}
static bool memory_read(Dwfl *dwfl __maybe_unused, Dwarf_Addr addr, Dwarf_Word *result,
@@ -230,7 +247,7 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
struct unwind_info *ui, ui_buf = {
.sample = data,
.thread = thread,
- .machine = RC_CHK_ACCESS(thread->maps)->machine,
+ .machine = RC_CHK_ACCESS(thread__maps(thread))->machine,
.cb = cb,
.arg = arg,
.max_stack = max_stack,
@@ -260,11 +277,11 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
if (err)
goto out;
- err = !dwfl_attach_state(ui->dwfl, EM_NONE, thread->tid, &callbacks, ui);
+ err = !dwfl_attach_state(ui->dwfl, EM_NONE, thread__tid(thread), &callbacks, ui);
if (err)
goto out;
- err = dwfl_getthread_frames(ui->dwfl, thread->tid, frame_callback, ui);
+ err = dwfl_getthread_frames(ui->dwfl, thread__tid(thread), frame_callback, ui);
if (err && ui->max_stack != max_stack)
err = 0;
diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c
index 83dd79dcd597..ebfde537b99b 100644
--- a/tools/perf/util/unwind-libunwind-local.c
+++ b/tools/perf/util/unwind-libunwind-local.c
@@ -325,7 +325,7 @@ static int read_unwind_spec_eh_frame(struct dso *dso, struct unwind_info *ui,
return -EINVAL;
}
- maps__for_each_entry(ui->thread->maps, map_node) {
+ maps__for_each_entry(thread__maps(ui->thread), map_node) {
struct map *map = map_node->map;
u64 start = map__start(map);
@@ -416,7 +416,13 @@ static int read_unwind_spec_debug_frame(struct dso *dso,
static struct map *find_map(unw_word_t ip, struct unwind_info *ui)
{
struct addr_location al;
- return thread__find_map(ui->thread, PERF_RECORD_MISC_USER, ip, &al);
+ struct map *ret;
+
+ addr_location__init(&al);
+ thread__find_map(ui->thread, PERF_RECORD_MISC_USER, ip, &al);
+ ret = map__get(al.map);
+ addr_location__exit(&al);
+ return ret;
}
static int
@@ -435,8 +441,10 @@ find_proc_info(unw_addr_space_t as, unw_word_t ip, unw_proc_info_t *pi,
return -EINVAL;
dso = map__dso(map);
- if (!dso)
+ if (!dso) {
+ map__put(map);
return -EINVAL;
+ }
pr_debug("unwind: find_proc_info dso %s\n", dso->name);
@@ -471,11 +479,11 @@ find_proc_info(unw_addr_space_t as, unw_word_t ip, unw_proc_info_t *pi,
memset(&di, 0, sizeof(di));
if (dwarf_find_debug_frame(0, &di, ip, base, symfile, start, map__end(map)))
- return dwarf_search_unwind_table(as, ip, &di, pi,
- need_unwind_info, arg);
+ ret = dwarf_search_unwind_table(as, ip, &di, pi,
+ need_unwind_info, arg);
}
#endif
-
+ map__put(map);
return ret;
}
@@ -529,12 +537,14 @@ static int access_dso_mem(struct unwind_info *ui, unw_word_t addr,
dso = map__dso(map);
- if (!dso)
+ if (!dso) {
+ map__put(map);
return -1;
+ }
size = dso__data_read_addr(dso, map, ui->machine,
addr, (u8 *) data, sizeof(*data));
-
+ map__put(map);
return !(size == sizeof(*data));
}
@@ -631,7 +641,9 @@ static int entry(u64 ip, struct thread *thread,
{
struct unwind_entry e;
struct addr_location al;
+ int ret;
+ addr_location__init(&al);
e.ms.sym = thread__find_symbol(thread, PERF_RECORD_MISC_USER, ip, &al);
e.ip = ip;
e.ms.map = al.map;
@@ -642,7 +654,9 @@ static int entry(u64 ip, struct thread *thread,
ip,
al.map ? map__map_ip(al.map, ip) : (u64) 0);
- return cb(&e, arg);
+ ret = cb(&e, arg);
+ addr_location__exit(&al);
+ return ret;
}
static void display_error(int err)
@@ -719,7 +733,7 @@ static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb,
*/
if (max_stack - 1 > 0) {
WARN_ONCE(!ui->thread, "WARNING: ui->thread is NULL");
- addr_space = maps__addr_space(ui->thread->maps);
+ addr_space = maps__addr_space(thread__maps(ui->thread));
if (addr_space == NULL)
return -1;
@@ -769,7 +783,7 @@ static int _unwind__get_entries(unwind_entry_cb_t cb, void *arg,
struct unwind_info ui = {
.sample = data,
.thread = thread,
- .machine = maps__machine(thread->maps),
+ .machine = maps__machine(thread__maps(thread)),
.best_effort = best_effort
};
diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c
index 375d23d9a590..76cd63de80a8 100644
--- a/tools/perf/util/unwind-libunwind.c
+++ b/tools/perf/util/unwind-libunwind.c
@@ -89,7 +89,7 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
struct perf_sample *data, int max_stack,
bool best_effort)
{
- const struct unwind_libunwind_ops *ops = maps__unwind_libunwind_ops(thread->maps);
+ const struct unwind_libunwind_ops *ops = maps__unwind_libunwind_ops(thread__maps(thread));
if (ops)
return ops->get_entries(cb, arg, thread, data, max_stack, best_effort);
diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c
index ec777ee11493..ae3eee69b659 100644
--- a/tools/perf/util/vdso.c
+++ b/tools/perf/util/vdso.c
@@ -146,7 +146,7 @@ static enum dso_type machine__thread_dso_type(struct machine *machine,
enum dso_type dso_type = DSO__TYPE_UNKNOWN;
struct map_rb_node *rb_node;
- maps__for_each_entry(thread->maps, rb_node) {
+ maps__for_each_entry(thread__maps(thread), rb_node) {
struct dso *dso = map__dso(rb_node->map);
if (!dso || dso->long_name[0] != '/')
diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c
index 2ca0cedd418f..a73346e854b8 100644
--- a/tools/power/x86/intel-speed-select/isst-config.c
+++ b/tools/power/x86/intel-speed-select/isst-config.c
@@ -15,7 +15,7 @@ struct process_cmd_struct {
int arg;
};
-static const char *version_str = "v1.15";
+static const char *version_str = "v1.16";
static const int supported_api_ver = 2;
static struct isst_if_platform_info isst_platform_info;
@@ -2113,7 +2113,6 @@ static void set_fact_enable(int arg)
else
for_each_online_power_domain_in_set(set_fact_for_cpu, NULL, NULL,
NULL, &enable);
- isst_ctdp_display_information_end(outf);
if (!fact_enable_fail && enable && auto_mode) {
/*
@@ -2192,10 +2191,13 @@ static void set_fact_enable(int arg)
isst_display_result(&id, outf, "turbo-freq --auto", "enable", 0);
}
+ isst_ctdp_display_information_end(outf);
+
return;
error_disp:
isst_display_result(&id, outf, "turbo-freq --auto", "enable", ret);
+ isst_ctdp_display_information_end(outf);
}
@@ -2261,9 +2263,6 @@ static void dump_clos_config_for_cpu(struct isst_id *id, void *arg1, void *arg2,
struct isst_clos_config clos_config;
int ret;
- if (id->cpu < 0)
- return;
-
ret = isst_pm_get_clos(id, current_clos, &clos_config);
if (ret)
isst_display_error_info_message(1, "isst_pm_get_clos failed", 0, 0);
@@ -2437,12 +2436,16 @@ static void set_clos_assoc(int arg)
isst_display_error_info_message(1, "Invalid clos id\n", 0, 0);
exit(0);
}
+
+ isst_ctdp_display_information_start(outf);
+
if (max_target_cpus)
for_each_online_target_cpu_in_set(set_clos_assoc_for_cpu, NULL,
NULL, NULL, NULL);
else {
isst_display_error_info_message(1, "Invalid target cpu. Specify with [-c|--cpu]", 0, 0);
}
+ isst_ctdp_display_information_end(outf);
}
static void get_clos_assoc_for_cpu(struct isst_id *id, void *arg1, void *arg2, void *arg3,
diff --git a/tools/power/x86/intel-speed-select/isst-core-tpmi.c b/tools/power/x86/intel-speed-select/isst-core-tpmi.c
index 19caa9c78d41..3458768562e5 100644
--- a/tools/power/x86/intel-speed-select/isst-core-tpmi.c
+++ b/tools/power/x86/intel-speed-select/isst-core-tpmi.c
@@ -641,16 +641,30 @@ static int tpmi_pm_qos_config(struct isst_id *id, int enable_clos,
int priority_type)
{
struct isst_core_power info;
- int ret;
+ int i, ret, saved_punit;
info.get_set = 1;
info.socket_id = id->pkg;
info.power_domain_id = id->punit;
info.enable = enable_clos;
info.priority_type = priority_type;
- ret = tpmi_process_ioctl(ISST_IF_CORE_POWER_STATE, &info);
- if (ret == -1)
- return ret;
+
+ saved_punit = id->punit;
+
+ /* Set for all other dies also. This is per package setting */
+ for (i = 0; i < MAX_PUNIT_PER_DIE; i++) {
+ id->punit = i;
+ if (isst_is_punit_valid(id)) {
+ info.power_domain_id = i;
+ ret = tpmi_process_ioctl(ISST_IF_CORE_POWER_STATE, &info);
+ if (ret == -1) {
+ id->punit = saved_punit;
+ return ret;
+ }
+ }
+ }
+
+ id->punit = saved_punit;
return 0;
}
@@ -686,7 +700,7 @@ int tpmi_set_clos(struct isst_id *id, int clos,
struct isst_clos_config *clos_config)
{
struct isst_clos_param info;
- int ret;
+ int i, ret, saved_punit;
info.get_set = 1;
info.socket_id = id->pkg;
@@ -702,9 +716,22 @@ int tpmi_set_clos(struct isst_id *id, int clos,
if (info.max_freq_mhz <= 0xff)
info.max_freq_mhz *= 100;
- ret = tpmi_process_ioctl(ISST_IF_CLOS_PARAM, &info);
- if (ret == -1)
- return ret;
+ saved_punit = id->punit;
+
+ /* Set for all other dies also. This is per package setting */
+ for (i = 0; i < MAX_PUNIT_PER_DIE; i++) {
+ id->punit = i;
+ if (isst_is_punit_valid(id)) {
+ info.power_domain_id = i;
+ ret = tpmi_process_ioctl(ISST_IF_CLOS_PARAM, &info);
+ if (ret == -1) {
+ id->punit = saved_punit;
+ return ret;
+ }
+ }
+ }
+
+ id->punit = saved_punit;
debug_printf("set cpu:%d clos:%d min:%d max:%d\n", id->cpu, clos,
clos_config->clos_min, clos_config->clos_max);
diff --git a/tools/spi/spidev_test.c b/tools/spi/spidev_test.c
index b0ca44c70e83..9179942d7f15 100644
--- a/tools/spi/spidev_test.c
+++ b/tools/spi/spidev_test.c
@@ -172,28 +172,37 @@ static void transfer(int fd, uint8_t const *tx, uint8_t const *rx, size_t len)
static void print_usage(const char *prog)
{
- printf("Usage: %s [-DsbdlHOLC3vpNR24SI]\n", prog);
- puts(" -D --device device to use (default /dev/spidev1.1)\n"
- " -s --speed max speed (Hz)\n"
- " -d --delay delay (usec)\n"
- " -b --bpw bits per word\n"
- " -i --input input data from a file (e.g. \"test.bin\")\n"
- " -o --output output data to a file (e.g. \"results.bin\")\n"
- " -l --loop loopback\n"
- " -H --cpha clock phase\n"
- " -O --cpol clock polarity\n"
- " -L --lsb least significant bit first\n"
- " -C --cs-high chip select active high\n"
- " -3 --3wire SI/SO signals shared\n"
- " -v --verbose Verbose (show tx buffer)\n"
- " -p Send data (e.g. \"1234\\xde\\xad\")\n"
- " -N --no-cs no chip select\n"
- " -R --ready slave pulls low to pause\n"
- " -2 --dual dual transfer\n"
- " -4 --quad quad transfer\n"
- " -8 --octal octal transfer\n"
- " -S --size transfer size\n"
- " -I --iter iterations\n");
+ printf("Usage: %s [-2348CDFHILMNORSZbdilopsv]\n", prog);
+ puts("general device settings:\n"
+ " -D --device device to use (default /dev/spidev1.1)\n"
+ " -s --speed max speed (Hz)\n"
+ " -d --delay delay (usec)\n"
+ " -l --loop loopback\n"
+ "spi mode:\n"
+ " -H --cpha clock phase\n"
+ " -O --cpol clock polarity\n"
+ " -F --rx-cpha-flip flip CPHA on Rx only xfer\n"
+ "number of wires for transmission:\n"
+ " -2 --dual dual transfer\n"
+ " -4 --quad quad transfer\n"
+ " -8 --octal octal transfer\n"
+ " -3 --3wire SI/SO signals shared\n"
+ " -Z --3wire-hiz high impedance turnaround\n"
+ "data:\n"
+ " -i --input input data from a file (e.g. \"test.bin\")\n"
+ " -o --output output data to a file (e.g. \"results.bin\")\n"
+ " -p Send data (e.g. \"1234\\xde\\xad\")\n"
+ " -S --size transfer size\n"
+ " -I --iter iterations\n"
+ "additional parameters:\n"
+ " -b --bpw bits per word\n"
+ " -L --lsb least significant bit first\n"
+ " -C --cs-high chip select active high\n"
+ " -N --no-cs no chip select\n"
+ " -R --ready slave pulls low to pause\n"
+ " -M --mosi-idle-low leave mosi line low when idle\n"
+ "misc:\n"
+ " -v --verbose Verbose (show tx buffer)\n");
exit(1);
}
@@ -201,31 +210,34 @@ static void parse_opts(int argc, char *argv[])
{
while (1) {
static const struct option lopts[] = {
- { "device", 1, 0, 'D' },
- { "speed", 1, 0, 's' },
- { "delay", 1, 0, 'd' },
- { "bpw", 1, 0, 'b' },
- { "input", 1, 0, 'i' },
- { "output", 1, 0, 'o' },
- { "loop", 0, 0, 'l' },
- { "cpha", 0, 0, 'H' },
- { "cpol", 0, 0, 'O' },
- { "lsb", 0, 0, 'L' },
- { "cs-high", 0, 0, 'C' },
- { "3wire", 0, 0, '3' },
- { "no-cs", 0, 0, 'N' },
- { "ready", 0, 0, 'R' },
- { "dual", 0, 0, '2' },
- { "verbose", 0, 0, 'v' },
- { "quad", 0, 0, '4' },
- { "octal", 0, 0, '8' },
- { "size", 1, 0, 'S' },
- { "iter", 1, 0, 'I' },
+ { "device", 1, 0, 'D' },
+ { "speed", 1, 0, 's' },
+ { "delay", 1, 0, 'd' },
+ { "loop", 0, 0, 'l' },
+ { "cpha", 0, 0, 'H' },
+ { "cpol", 0, 0, 'O' },
+ { "rx-cpha-flip", 0, 0, 'F' },
+ { "dual", 0, 0, '2' },
+ { "quad", 0, 0, '4' },
+ { "octal", 0, 0, '8' },
+ { "3wire", 0, 0, '3' },
+ { "3wire-hiz", 0, 0, 'Z' },
+ { "input", 1, 0, 'i' },
+ { "output", 1, 0, 'o' },
+ { "size", 1, 0, 'S' },
+ { "iter", 1, 0, 'I' },
+ { "bpw", 1, 0, 'b' },
+ { "lsb", 0, 0, 'L' },
+ { "cs-high", 0, 0, 'C' },
+ { "no-cs", 0, 0, 'N' },
+ { "ready", 0, 0, 'R' },
+ { "mosi-idle-low", 0, 0, 'M' },
+ { "verbose", 0, 0, 'v' },
{ NULL, 0, 0, 0 },
};
int c;
- c = getopt_long(argc, argv, "D:s:d:b:i:o:lHOLC3NR248p:vS:I:",
+ c = getopt_long(argc, argv, "D:s:d:b:i:o:lHOLC3ZFMNR248p:vS:I:",
lopts, NULL);
if (c == -1)
@@ -268,6 +280,15 @@ static void parse_opts(int argc, char *argv[])
case '3':
mode |= SPI_3WIRE;
break;
+ case 'Z':
+ mode |= SPI_3WIRE_HIZ;
+ break;
+ case 'F':
+ mode |= SPI_RX_CPHA_FLIP;
+ break;
+ case 'M':
+ mode |= SPI_MOSI_IDLE_LOW;
+ break;
case 'N':
mode |= SPI_NO_CS;
break;
diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild
index 6f9347ade82c..90f3c9802ffb 100644
--- a/tools/testing/cxl/Kbuild
+++ b/tools/testing/cxl/Kbuild
@@ -6,13 +6,13 @@ ldflags-y += --wrap=acpi_pci_find_root
ldflags-y += --wrap=nvdimm_bus_register
ldflags-y += --wrap=devm_cxl_port_enumerate_dports
ldflags-y += --wrap=devm_cxl_setup_hdm
-ldflags-y += --wrap=devm_cxl_enable_hdm
ldflags-y += --wrap=devm_cxl_add_passthrough_decoder
ldflags-y += --wrap=devm_cxl_enumerate_decoders
ldflags-y += --wrap=cxl_await_media_ready
ldflags-y += --wrap=cxl_hdm_decode_init
ldflags-y += --wrap=cxl_dvsec_rr_decode
-ldflags-y += --wrap=cxl_rcrb_to_component
+ldflags-y += --wrap=devm_cxl_add_rch_dport
+ldflags-y += --wrap=cxl_rcd_component_reg_phys
DRIVERS := ../../../drivers
CXL_SRC := $(DRIVERS)/cxl
@@ -57,6 +57,7 @@ cxl_core-y += $(CXL_CORE_SRC)/memdev.o
cxl_core-y += $(CXL_CORE_SRC)/mbox.o
cxl_core-y += $(CXL_CORE_SRC)/pci.o
cxl_core-y += $(CXL_CORE_SRC)/hdm.o
+cxl_core-y += $(CXL_CORE_SRC)/pmu.o
cxl_core-$(CONFIG_TRACING) += $(CXL_CORE_SRC)/trace.o
cxl_core-$(CONFIG_CXL_REGION) += $(CXL_CORE_SRC)/region.o
cxl_core-y += config_check.o
diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c
index bf00dc52fe96..fb6ab9cef84f 100644
--- a/tools/testing/cxl/test/cxl.c
+++ b/tools/testing/cxl/test/cxl.c
@@ -713,7 +713,7 @@ static void default_mock_decoder(struct cxl_decoder *cxld)
cxld->interleave_ways = 1;
cxld->interleave_granularity = 256;
- cxld->target_type = CXL_DECODER_EXPANDER;
+ cxld->target_type = CXL_DECODER_HOSTONLYMEM;
cxld->commit = mock_decoder_commit;
cxld->reset = mock_decoder_reset;
}
@@ -754,7 +754,7 @@ static void mock_init_hdm_decoder(struct cxl_decoder *cxld)
/* check is endpoint is attach to host-bridge0 */
port = cxled_to_port(cxled);
do {
- if (port->uport == &cxl_host_bridge[0]->dev) {
+ if (port->uport_dev == &cxl_host_bridge[0]->dev) {
hb0 = true;
break;
}
@@ -787,7 +787,7 @@ static void mock_init_hdm_decoder(struct cxl_decoder *cxld)
cxld->interleave_ways = 2;
eig_to_granularity(window->granularity, &cxld->interleave_granularity);
- cxld->target_type = CXL_DECODER_EXPANDER;
+ cxld->target_type = CXL_DECODER_HOSTONLYMEM;
cxld->flags = CXL_DECODER_F_ENABLE;
cxled->state = CXL_DECODER_STATE_AUTO;
port->commit_end = cxld->id;
@@ -820,7 +820,7 @@ static void mock_init_hdm_decoder(struct cxl_decoder *cxld)
} else
cxlsd->target[0] = dport;
cxld = &cxlsd->cxld;
- cxld->target_type = CXL_DECODER_EXPANDER;
+ cxld->target_type = CXL_DECODER_HOSTONLYMEM;
cxld->flags = CXL_DECODER_F_ENABLE;
iter->commit_end = 0;
/*
@@ -889,7 +889,7 @@ static int mock_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm,
mock_init_hdm_decoder(cxld);
if (target_count) {
- rc = device_for_each_child(port->uport, &ctx,
+ rc = device_for_each_child(port->uport_dev, &ctx,
map_targets);
if (rc) {
put_device(&cxld->dev);
@@ -919,29 +919,29 @@ static int mock_cxl_port_enumerate_dports(struct cxl_port *port)
int i, array_size;
if (port->depth == 1) {
- if (is_multi_bridge(port->uport)) {
+ if (is_multi_bridge(port->uport_dev)) {
array_size = ARRAY_SIZE(cxl_root_port);
array = cxl_root_port;
- } else if (is_single_bridge(port->uport)) {
+ } else if (is_single_bridge(port->uport_dev)) {
array_size = ARRAY_SIZE(cxl_root_single);
array = cxl_root_single;
} else {
dev_dbg(&port->dev, "%s: unknown bridge type\n",
- dev_name(port->uport));
+ dev_name(port->uport_dev));
return -ENXIO;
}
} else if (port->depth == 2) {
struct cxl_port *parent = to_cxl_port(port->dev.parent);
- if (is_multi_bridge(parent->uport)) {
+ if (is_multi_bridge(parent->uport_dev)) {
array_size = ARRAY_SIZE(cxl_switch_dport);
array = cxl_switch_dport;
- } else if (is_single_bridge(parent->uport)) {
+ } else if (is_single_bridge(parent->uport_dev)) {
array_size = ARRAY_SIZE(cxl_swd_single);
array = cxl_swd_single;
} else {
dev_dbg(&port->dev, "%s: unknown bridge type\n",
- dev_name(port->uport));
+ dev_name(port->uport_dev));
return -ENXIO;
}
} else {
@@ -954,9 +954,9 @@ static int mock_cxl_port_enumerate_dports(struct cxl_port *port)
struct platform_device *pdev = array[i];
struct cxl_dport *dport;
- if (pdev->dev.parent != port->uport) {
+ if (pdev->dev.parent != port->uport_dev) {
dev_dbg(&port->dev, "%s: mismatch parent %s\n",
- dev_name(port->uport),
+ dev_name(port->uport_dev),
dev_name(pdev->dev.parent));
continue;
}
@@ -971,15 +971,6 @@ static int mock_cxl_port_enumerate_dports(struct cxl_port *port)
return 0;
}
-resource_size_t mock_cxl_rcrb_to_component(struct device *dev,
- resource_size_t rcrb,
- enum cxl_rcrb which)
-{
- dev_dbg(dev, "rcrb: %pa which: %d\n", &rcrb, which);
-
- return (resource_size_t) which + 1;
-}
-
static struct cxl_mock_ops cxl_mock_ops = {
.is_mock_adev = is_mock_adev,
.is_mock_bridge = is_mock_bridge,
@@ -988,7 +979,6 @@ static struct cxl_mock_ops cxl_mock_ops = {
.is_mock_dev = is_mock_dev,
.acpi_table_parse_cedt = mock_acpi_table_parse_cedt,
.acpi_evaluate_integer = mock_acpi_evaluate_integer,
- .cxl_rcrb_to_component = mock_cxl_rcrb_to_component,
.acpi_pci_find_root = mock_acpi_pci_find_root,
.devm_cxl_port_enumerate_dports = mock_cxl_port_enumerate_dports,
.devm_cxl_setup_hdm = mock_cxl_setup_hdm,
@@ -1009,10 +999,6 @@ static void mock_companion(struct acpi_device *adev, struct device *dev)
#define SZ_64G (SZ_32G * 2)
#endif
-#ifndef SZ_512G
-#define SZ_512G (SZ_64G * 8)
-#endif
-
static __init int cxl_rch_init(void)
{
int rc, i;
diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c
index 34b48027b3de..464fc39ed277 100644
--- a/tools/testing/cxl/test/mem.c
+++ b/tools/testing/cxl/test/mem.c
@@ -8,11 +8,14 @@
#include <linux/sizes.h>
#include <linux/bits.h>
#include <asm/unaligned.h>
+#include <crypto/sha2.h>
#include <cxlmem.h>
#include "trace.h"
#define LSA_SIZE SZ_128K
+#define FW_SIZE SZ_64M
+#define FW_SLOTS 3
#define DEV_SIZE SZ_2G
#define EFFECT(x) (1U << x)
@@ -21,42 +24,70 @@
static unsigned int poison_inject_dev_max = MOCK_INJECT_DEV_MAX;
+enum cxl_command_effects {
+ CONF_CHANGE_COLD_RESET = 0,
+ CONF_CHANGE_IMMEDIATE,
+ DATA_CHANGE_IMMEDIATE,
+ POLICY_CHANGE_IMMEDIATE,
+ LOG_CHANGE_IMMEDIATE,
+ SECURITY_CHANGE_IMMEDIATE,
+ BACKGROUND_OP,
+ SECONDARY_MBOX_SUPPORTED,
+};
+
+#define CXL_CMD_EFFECT_NONE cpu_to_le16(0)
+
static struct cxl_cel_entry mock_cel[] = {
{
.opcode = cpu_to_le16(CXL_MBOX_OP_GET_SUPPORTED_LOGS),
- .effect = cpu_to_le16(0),
+ .effect = CXL_CMD_EFFECT_NONE,
},
{
.opcode = cpu_to_le16(CXL_MBOX_OP_IDENTIFY),
- .effect = cpu_to_le16(0),
+ .effect = CXL_CMD_EFFECT_NONE,
},
{
.opcode = cpu_to_le16(CXL_MBOX_OP_GET_LSA),
- .effect = cpu_to_le16(0),
+ .effect = CXL_CMD_EFFECT_NONE,
},
{
.opcode = cpu_to_le16(CXL_MBOX_OP_GET_PARTITION_INFO),
- .effect = cpu_to_le16(0),
+ .effect = CXL_CMD_EFFECT_NONE,
},
{
.opcode = cpu_to_le16(CXL_MBOX_OP_SET_LSA),
- .effect = cpu_to_le16(EFFECT(1) | EFFECT(2)),
+ .effect = cpu_to_le16(EFFECT(CONF_CHANGE_IMMEDIATE) |
+ EFFECT(DATA_CHANGE_IMMEDIATE)),
},
{
.opcode = cpu_to_le16(CXL_MBOX_OP_GET_HEALTH_INFO),
- .effect = cpu_to_le16(0),
+ .effect = CXL_CMD_EFFECT_NONE,
},
{
.opcode = cpu_to_le16(CXL_MBOX_OP_GET_POISON),
- .effect = cpu_to_le16(0),
+ .effect = CXL_CMD_EFFECT_NONE,
},
{
.opcode = cpu_to_le16(CXL_MBOX_OP_INJECT_POISON),
- .effect = cpu_to_le16(0),
+ .effect = cpu_to_le16(EFFECT(DATA_CHANGE_IMMEDIATE)),
},
{
.opcode = cpu_to_le16(CXL_MBOX_OP_CLEAR_POISON),
- .effect = cpu_to_le16(0),
+ .effect = cpu_to_le16(EFFECT(DATA_CHANGE_IMMEDIATE)),
+ },
+ {
+ .opcode = cpu_to_le16(CXL_MBOX_OP_GET_FW_INFO),
+ .effect = CXL_CMD_EFFECT_NONE,
+ },
+ {
+ .opcode = cpu_to_le16(CXL_MBOX_OP_TRANSFER_FW),
+ .effect = cpu_to_le16(EFFECT(CONF_CHANGE_COLD_RESET) |
+ EFFECT(BACKGROUND_OP)),
+ },
+ {
+ .opcode = cpu_to_le16(CXL_MBOX_OP_ACTIVATE_FW),
+ .effect = cpu_to_le16(EFFECT(CONF_CHANGE_COLD_RESET) |
+ EFFECT(CONF_CHANGE_IMMEDIATE)),
},
};
@@ -102,13 +133,17 @@ struct mock_event_log {
};
struct mock_event_store {
- struct cxl_dev_state *cxlds;
+ struct cxl_memdev_state *mds;
struct mock_event_log mock_logs[CXL_EVENT_TYPE_MAX];
u32 ev_status;
};
struct cxl_mockmem_data {
void *lsa;
+ void *fw;
+ int fw_slot;
+ int fw_staged;
+ size_t fw_size;
u32 security_state;
u8 user_pass[NVDIMM_PASSPHRASE_LEN];
u8 master_pass[NVDIMM_PASSPHRASE_LEN];
@@ -180,8 +215,7 @@ static void mes_add_event(struct mock_event_store *mes,
log->nr_events++;
}
-static int mock_get_event(struct cxl_dev_state *cxlds,
- struct cxl_mbox_cmd *cmd)
+static int mock_get_event(struct device *dev, struct cxl_mbox_cmd *cmd)
{
struct cxl_get_event_payload *pl;
struct mock_event_log *log;
@@ -201,7 +235,7 @@ static int mock_get_event(struct cxl_dev_state *cxlds,
memset(cmd->payload_out, 0, cmd->size_out);
- log = event_find_log(cxlds->dev, log_type);
+ log = event_find_log(dev, log_type);
if (!log || event_log_empty(log))
return 0;
@@ -234,8 +268,7 @@ static int mock_get_event(struct cxl_dev_state *cxlds,
return 0;
}
-static int mock_clear_event(struct cxl_dev_state *cxlds,
- struct cxl_mbox_cmd *cmd)
+static int mock_clear_event(struct device *dev, struct cxl_mbox_cmd *cmd)
{
struct cxl_mbox_clear_event_payload *pl = cmd->payload_in;
struct mock_event_log *log;
@@ -246,7 +279,7 @@ static int mock_clear_event(struct cxl_dev_state *cxlds,
if (log_type >= CXL_EVENT_TYPE_MAX)
return -EINVAL;
- log = event_find_log(cxlds->dev, log_type);
+ log = event_find_log(dev, log_type);
if (!log)
return 0; /* No mock data in this log */
@@ -256,7 +289,7 @@ static int mock_clear_event(struct cxl_dev_state *cxlds,
* However, this is not good behavior for the host so test it.
*/
if (log->clear_idx + pl->nr_recs > log->cur_idx) {
- dev_err(cxlds->dev,
+ dev_err(dev,
"Attempting to clear more events than returned!\n");
return -EINVAL;
}
@@ -266,7 +299,7 @@ static int mock_clear_event(struct cxl_dev_state *cxlds,
nr < pl->nr_recs;
nr++, handle++) {
if (handle != le16_to_cpu(pl->handles[nr])) {
- dev_err(cxlds->dev, "Clearing events out of order\n");
+ dev_err(dev, "Clearing events out of order\n");
return -EINVAL;
}
}
@@ -293,7 +326,7 @@ static void cxl_mock_event_trigger(struct device *dev)
event_reset_log(log);
}
- cxl_mem_get_event_records(mes->cxlds, mes->ev_status);
+ cxl_mem_get_event_records(mes->mds, mes->ev_status);
}
struct cxl_event_record_raw maint_needed = {
@@ -453,7 +486,7 @@ static int mock_gsl(struct cxl_mbox_cmd *cmd)
return 0;
}
-static int mock_get_log(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd)
+static int mock_get_log(struct cxl_memdev_state *mds, struct cxl_mbox_cmd *cmd)
{
struct cxl_mbox_get_log *gl = cmd->payload_in;
u32 offset = le32_to_cpu(gl->offset);
@@ -463,7 +496,7 @@ static int mock_get_log(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd)
if (cmd->size_in < sizeof(*gl))
return -EINVAL;
- if (length > cxlds->payload_size)
+ if (length > mds->payload_size)
return -EINVAL;
if (offset + length > sizeof(mock_cel))
return -EINVAL;
@@ -477,7 +510,7 @@ static int mock_get_log(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd)
return 0;
}
-static int mock_rcd_id(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd)
+static int mock_rcd_id(struct cxl_mbox_cmd *cmd)
{
struct cxl_mbox_identify id = {
.fw_revision = { "mock fw v1 " },
@@ -495,7 +528,7 @@ static int mock_rcd_id(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd)
return 0;
}
-static int mock_id(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd)
+static int mock_id(struct cxl_mbox_cmd *cmd)
{
struct cxl_mbox_identify id = {
.fw_revision = { "mock fw v1 " },
@@ -517,8 +550,7 @@ static int mock_id(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd)
return 0;
}
-static int mock_partition_info(struct cxl_dev_state *cxlds,
- struct cxl_mbox_cmd *cmd)
+static int mock_partition_info(struct cxl_mbox_cmd *cmd)
{
struct cxl_mbox_get_partition_info pi = {
.active_volatile_cap =
@@ -535,11 +567,52 @@ static int mock_partition_info(struct cxl_dev_state *cxlds,
return 0;
}
-static int mock_get_security_state(struct cxl_dev_state *cxlds,
- struct cxl_mbox_cmd *cmd)
+static int mock_sanitize(struct cxl_mockmem_data *mdata,
+ struct cxl_mbox_cmd *cmd)
{
- struct cxl_mockmem_data *mdata = dev_get_drvdata(cxlds->dev);
+ if (cmd->size_in != 0)
+ return -EINVAL;
+
+ if (cmd->size_out != 0)
+ return -EINVAL;
+
+ if (mdata->security_state & CXL_PMEM_SEC_STATE_USER_PASS_SET) {
+ cmd->return_code = CXL_MBOX_CMD_RC_SECURITY;
+ return -ENXIO;
+ }
+ if (mdata->security_state & CXL_PMEM_SEC_STATE_LOCKED) {
+ cmd->return_code = CXL_MBOX_CMD_RC_SECURITY;
+ return -ENXIO;
+ }
+
+ return 0; /* assume less than 2 secs, no bg */
+}
+
+static int mock_secure_erase(struct cxl_mockmem_data *mdata,
+ struct cxl_mbox_cmd *cmd)
+{
+ if (cmd->size_in != 0)
+ return -EINVAL;
+ if (cmd->size_out != 0)
+ return -EINVAL;
+
+ if (mdata->security_state & CXL_PMEM_SEC_STATE_USER_PASS_SET) {
+ cmd->return_code = CXL_MBOX_CMD_RC_SECURITY;
+ return -ENXIO;
+ }
+
+ if (mdata->security_state & CXL_PMEM_SEC_STATE_LOCKED) {
+ cmd->return_code = CXL_MBOX_CMD_RC_SECURITY;
+ return -ENXIO;
+ }
+
+ return 0;
+}
+
+static int mock_get_security_state(struct cxl_mockmem_data *mdata,
+ struct cxl_mbox_cmd *cmd)
+{
if (cmd->size_in)
return -EINVAL;
@@ -569,9 +642,9 @@ static void user_plimit_check(struct cxl_mockmem_data *mdata)
mdata->security_state |= CXL_PMEM_SEC_STATE_USER_PLIMIT;
}
-static int mock_set_passphrase(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd)
+static int mock_set_passphrase(struct cxl_mockmem_data *mdata,
+ struct cxl_mbox_cmd *cmd)
{
- struct cxl_mockmem_data *mdata = dev_get_drvdata(cxlds->dev);
struct cxl_set_pass *set_pass;
if (cmd->size_in != sizeof(*set_pass))
@@ -629,9 +702,9 @@ static int mock_set_passphrase(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd
return -EINVAL;
}
-static int mock_disable_passphrase(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd)
+static int mock_disable_passphrase(struct cxl_mockmem_data *mdata,
+ struct cxl_mbox_cmd *cmd)
{
- struct cxl_mockmem_data *mdata = dev_get_drvdata(cxlds->dev);
struct cxl_disable_pass *dis_pass;
if (cmd->size_in != sizeof(*dis_pass))
@@ -700,10 +773,9 @@ static int mock_disable_passphrase(struct cxl_dev_state *cxlds, struct cxl_mbox_
return 0;
}
-static int mock_freeze_security(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd)
+static int mock_freeze_security(struct cxl_mockmem_data *mdata,
+ struct cxl_mbox_cmd *cmd)
{
- struct cxl_mockmem_data *mdata = dev_get_drvdata(cxlds->dev);
-
if (cmd->size_in != 0)
return -EINVAL;
@@ -717,10 +789,9 @@ static int mock_freeze_security(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd
return 0;
}
-static int mock_unlock_security(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd)
+static int mock_unlock_security(struct cxl_mockmem_data *mdata,
+ struct cxl_mbox_cmd *cmd)
{
- struct cxl_mockmem_data *mdata = dev_get_drvdata(cxlds->dev);
-
if (cmd->size_in != NVDIMM_PASSPHRASE_LEN)
return -EINVAL;
@@ -759,10 +830,9 @@ static int mock_unlock_security(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd
return 0;
}
-static int mock_passphrase_secure_erase(struct cxl_dev_state *cxlds,
+static int mock_passphrase_secure_erase(struct cxl_mockmem_data *mdata,
struct cxl_mbox_cmd *cmd)
{
- struct cxl_mockmem_data *mdata = dev_get_drvdata(cxlds->dev);
struct cxl_pass_erase *erase;
if (cmd->size_in != sizeof(*erase))
@@ -858,10 +928,10 @@ static int mock_passphrase_secure_erase(struct cxl_dev_state *cxlds,
return 0;
}
-static int mock_get_lsa(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd)
+static int mock_get_lsa(struct cxl_mockmem_data *mdata,
+ struct cxl_mbox_cmd *cmd)
{
struct cxl_mbox_get_lsa *get_lsa = cmd->payload_in;
- struct cxl_mockmem_data *mdata = dev_get_drvdata(cxlds->dev);
void *lsa = mdata->lsa;
u32 offset, length;
@@ -878,10 +948,10 @@ static int mock_get_lsa(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd)
return 0;
}
-static int mock_set_lsa(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd)
+static int mock_set_lsa(struct cxl_mockmem_data *mdata,
+ struct cxl_mbox_cmd *cmd)
{
struct cxl_mbox_set_lsa *set_lsa = cmd->payload_in;
- struct cxl_mockmem_data *mdata = dev_get_drvdata(cxlds->dev);
void *lsa = mdata->lsa;
u32 offset, length;
@@ -896,8 +966,7 @@ static int mock_set_lsa(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd)
return 0;
}
-static int mock_health_info(struct cxl_dev_state *cxlds,
- struct cxl_mbox_cmd *cmd)
+static int mock_health_info(struct cxl_mbox_cmd *cmd)
{
struct cxl_mbox_health_info health_info = {
/* set flags for maint needed, perf degraded, hw replacement */
@@ -1114,9 +1183,90 @@ static struct attribute *cxl_mock_mem_core_attrs[] = {
};
ATTRIBUTE_GROUPS(cxl_mock_mem_core);
-static int cxl_mock_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd)
+static int mock_fw_info(struct cxl_mockmem_data *mdata,
+ struct cxl_mbox_cmd *cmd)
+{
+ struct cxl_mbox_get_fw_info fw_info = {
+ .num_slots = FW_SLOTS,
+ .slot_info = (mdata->fw_slot & 0x7) |
+ ((mdata->fw_staged & 0x7) << 3),
+ .activation_cap = 0,
+ };
+
+ strcpy(fw_info.slot_1_revision, "cxl_test_fw_001");
+ strcpy(fw_info.slot_2_revision, "cxl_test_fw_002");
+ strcpy(fw_info.slot_3_revision, "cxl_test_fw_003");
+ strcpy(fw_info.slot_4_revision, "");
+
+ if (cmd->size_out < sizeof(fw_info))
+ return -EINVAL;
+
+ memcpy(cmd->payload_out, &fw_info, sizeof(fw_info));
+ return 0;
+}
+
+static int mock_transfer_fw(struct cxl_mockmem_data *mdata,
+ struct cxl_mbox_cmd *cmd)
{
+ struct cxl_mbox_transfer_fw *transfer = cmd->payload_in;
+ void *fw = mdata->fw;
+ size_t offset, length;
+
+ offset = le32_to_cpu(transfer->offset) * CXL_FW_TRANSFER_ALIGNMENT;
+ length = cmd->size_in - sizeof(*transfer);
+ if (offset + length > FW_SIZE)
+ return -EINVAL;
+
+ switch (transfer->action) {
+ case CXL_FW_TRANSFER_ACTION_FULL:
+ if (offset != 0)
+ return -EINVAL;
+ fallthrough;
+ case CXL_FW_TRANSFER_ACTION_END:
+ if (transfer->slot == 0 || transfer->slot > FW_SLOTS)
+ return -EINVAL;
+ mdata->fw_size = offset + length;
+ break;
+ case CXL_FW_TRANSFER_ACTION_INITIATE:
+ case CXL_FW_TRANSFER_ACTION_CONTINUE:
+ break;
+ case CXL_FW_TRANSFER_ACTION_ABORT:
+ return 0;
+ default:
+ return -EINVAL;
+ }
+
+ memcpy(fw + offset, transfer->data, length);
+ return 0;
+}
+
+static int mock_activate_fw(struct cxl_mockmem_data *mdata,
+ struct cxl_mbox_cmd *cmd)
+{
+ struct cxl_mbox_activate_fw *activate = cmd->payload_in;
+
+ if (activate->slot == 0 || activate->slot > FW_SLOTS)
+ return -EINVAL;
+
+ switch (activate->action) {
+ case CXL_FW_ACTIVATE_ONLINE:
+ mdata->fw_slot = activate->slot;
+ mdata->fw_staged = 0;
+ return 0;
+ case CXL_FW_ACTIVATE_OFFLINE:
+ mdata->fw_staged = activate->slot;
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+static int cxl_mock_mbox_send(struct cxl_memdev_state *mds,
+ struct cxl_mbox_cmd *cmd)
+{
+ struct cxl_dev_state *cxlds = &mds->cxlds;
struct device *dev = cxlds->dev;
+ struct cxl_mockmem_data *mdata = dev_get_drvdata(dev);
int rc = -EIO;
switch (cmd->opcode) {
@@ -1127,49 +1277,55 @@ static int cxl_mock_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *
rc = mock_gsl(cmd);
break;
case CXL_MBOX_OP_GET_LOG:
- rc = mock_get_log(cxlds, cmd);
+ rc = mock_get_log(mds, cmd);
break;
case CXL_MBOX_OP_IDENTIFY:
if (cxlds->rcd)
- rc = mock_rcd_id(cxlds, cmd);
+ rc = mock_rcd_id(cmd);
else
- rc = mock_id(cxlds, cmd);
+ rc = mock_id(cmd);
break;
case CXL_MBOX_OP_GET_LSA:
- rc = mock_get_lsa(cxlds, cmd);
+ rc = mock_get_lsa(mdata, cmd);
break;
case CXL_MBOX_OP_GET_PARTITION_INFO:
- rc = mock_partition_info(cxlds, cmd);
+ rc = mock_partition_info(cmd);
break;
case CXL_MBOX_OP_GET_EVENT_RECORD:
- rc = mock_get_event(cxlds, cmd);
+ rc = mock_get_event(dev, cmd);
break;
case CXL_MBOX_OP_CLEAR_EVENT_RECORD:
- rc = mock_clear_event(cxlds, cmd);
+ rc = mock_clear_event(dev, cmd);
break;
case CXL_MBOX_OP_SET_LSA:
- rc = mock_set_lsa(cxlds, cmd);
+ rc = mock_set_lsa(mdata, cmd);
break;
case CXL_MBOX_OP_GET_HEALTH_INFO:
- rc = mock_health_info(cxlds, cmd);
+ rc = mock_health_info(cmd);
+ break;
+ case CXL_MBOX_OP_SANITIZE:
+ rc = mock_sanitize(mdata, cmd);
+ break;
+ case CXL_MBOX_OP_SECURE_ERASE:
+ rc = mock_secure_erase(mdata, cmd);
break;
case CXL_MBOX_OP_GET_SECURITY_STATE:
- rc = mock_get_security_state(cxlds, cmd);
+ rc = mock_get_security_state(mdata, cmd);
break;
case CXL_MBOX_OP_SET_PASSPHRASE:
- rc = mock_set_passphrase(cxlds, cmd);
+ rc = mock_set_passphrase(mdata, cmd);
break;
case CXL_MBOX_OP_DISABLE_PASSPHRASE:
- rc = mock_disable_passphrase(cxlds, cmd);
+ rc = mock_disable_passphrase(mdata, cmd);
break;
case CXL_MBOX_OP_FREEZE_SECURITY:
- rc = mock_freeze_security(cxlds, cmd);
+ rc = mock_freeze_security(mdata, cmd);
break;
case CXL_MBOX_OP_UNLOCK:
- rc = mock_unlock_security(cxlds, cmd);
+ rc = mock_unlock_security(mdata, cmd);
break;
case CXL_MBOX_OP_PASSPHRASE_SECURE_ERASE:
- rc = mock_passphrase_secure_erase(cxlds, cmd);
+ rc = mock_passphrase_secure_erase(mdata, cmd);
break;
case CXL_MBOX_OP_GET_POISON:
rc = mock_get_poison(cxlds, cmd);
@@ -1180,6 +1336,15 @@ static int cxl_mock_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *
case CXL_MBOX_OP_CLEAR_POISON:
rc = mock_clear_poison(cxlds, cmd);
break;
+ case CXL_MBOX_OP_GET_FW_INFO:
+ rc = mock_fw_info(mdata, cmd);
+ break;
+ case CXL_MBOX_OP_TRANSFER_FW:
+ rc = mock_transfer_fw(mdata, cmd);
+ break;
+ case CXL_MBOX_OP_ACTIVATE_FW:
+ rc = mock_activate_fw(mdata, cmd);
+ break;
default:
break;
}
@@ -1195,6 +1360,11 @@ static void label_area_release(void *lsa)
vfree(lsa);
}
+static void fw_buf_release(void *buf)
+{
+ vfree(buf);
+}
+
static bool is_rcd(struct platform_device *pdev)
{
const struct platform_device_id *id = platform_get_device_id(pdev);
@@ -1215,6 +1385,7 @@ static int cxl_mock_mem_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
struct cxl_memdev *cxlmd;
+ struct cxl_memdev_state *mds;
struct cxl_dev_state *cxlds;
struct cxl_mockmem_data *mdata;
int rc;
@@ -1227,52 +1398,67 @@ static int cxl_mock_mem_probe(struct platform_device *pdev)
mdata->lsa = vmalloc(LSA_SIZE);
if (!mdata->lsa)
return -ENOMEM;
+ mdata->fw = vmalloc(FW_SIZE);
+ if (!mdata->fw)
+ return -ENOMEM;
+ mdata->fw_slot = 2;
+
rc = devm_add_action_or_reset(dev, label_area_release, mdata->lsa);
if (rc)
return rc;
- cxlds = cxl_dev_state_create(dev);
- if (IS_ERR(cxlds))
- return PTR_ERR(cxlds);
+ rc = devm_add_action_or_reset(dev, fw_buf_release, mdata->fw);
+ if (rc)
+ return rc;
+
+ mds = cxl_memdev_state_create(dev);
+ if (IS_ERR(mds))
+ return PTR_ERR(mds);
+ mds->mbox_send = cxl_mock_mbox_send;
+ mds->payload_size = SZ_4K;
+ mds->event.buf = (struct cxl_get_event_payload *) mdata->event_buf;
+
+ cxlds = &mds->cxlds;
cxlds->serial = pdev->id;
- cxlds->mbox_send = cxl_mock_mbox_send;
- cxlds->payload_size = SZ_4K;
- cxlds->event.buf = (struct cxl_get_event_payload *) mdata->event_buf;
if (is_rcd(pdev)) {
cxlds->rcd = true;
cxlds->component_reg_phys = CXL_RESOURCE_NONE;
}
- rc = cxl_enumerate_cmds(cxlds);
+ rc = cxl_enumerate_cmds(mds);
if (rc)
return rc;
- rc = cxl_poison_state_init(cxlds);
+ rc = cxl_poison_state_init(mds);
if (rc)
return rc;
- rc = cxl_set_timestamp(cxlds);
+ rc = cxl_set_timestamp(mds);
if (rc)
return rc;
cxlds->media_ready = true;
- rc = cxl_dev_state_identify(cxlds);
+ rc = cxl_dev_state_identify(mds);
if (rc)
return rc;
- rc = cxl_mem_create_range_info(cxlds);
+ rc = cxl_mem_create_range_info(mds);
if (rc)
return rc;
- mdata->mes.cxlds = cxlds;
+ mdata->mes.mds = mds;
cxl_mock_add_event_logs(&mdata->mes);
cxlmd = devm_cxl_add_memdev(cxlds);
if (IS_ERR(cxlmd))
return PTR_ERR(cxlmd);
- cxl_mem_get_event_records(cxlds, CXLDEV_EVENT_STATUS_ALL);
+ rc = cxl_memdev_setup_fw_upload(mds);
+ if (rc)
+ return rc;
+
+ cxl_mem_get_event_records(mds, CXLDEV_EVENT_STATUS_ALL);
return 0;
}
@@ -1310,9 +1496,40 @@ static ssize_t security_lock_store(struct device *dev, struct device_attribute *
static DEVICE_ATTR_RW(security_lock);
+static ssize_t fw_buf_checksum_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct cxl_mockmem_data *mdata = dev_get_drvdata(dev);
+ u8 hash[SHA256_DIGEST_SIZE];
+ unsigned char *hstr, *hptr;
+ struct sha256_state sctx;
+ ssize_t written = 0;
+ int i;
+
+ sha256_init(&sctx);
+ sha256_update(&sctx, mdata->fw, mdata->fw_size);
+ sha256_final(&sctx, hash);
+
+ hstr = kzalloc((SHA256_DIGEST_SIZE * 2) + 1, GFP_KERNEL);
+ if (!hstr)
+ return -ENOMEM;
+
+ hptr = hstr;
+ for (i = 0; i < SHA256_DIGEST_SIZE; i++)
+ hptr += sprintf(hptr, "%02x", hash[i]);
+
+ written = sysfs_emit(buf, "%s\n", hstr);
+
+ kfree(hstr);
+ return written;
+}
+
+static DEVICE_ATTR_RO(fw_buf_checksum);
+
static struct attribute *cxl_mock_mem_attrs[] = {
&dev_attr_security_lock.attr,
&dev_attr_event_trigger.attr,
+ &dev_attr_fw_buf_checksum.attr,
NULL
};
ATTRIBUTE_GROUPS(cxl_mock_mem);
diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c
index 284416527644..1a61e68e3095 100644
--- a/tools/testing/cxl/test/mock.c
+++ b/tools/testing/cxl/test/mock.c
@@ -139,7 +139,7 @@ struct cxl_hdm *__wrap_devm_cxl_setup_hdm(struct cxl_port *port,
struct cxl_hdm *cxlhdm;
struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
- if (ops && ops->is_mock_port(port->uport))
+ if (ops && ops->is_mock_port(port->uport_dev))
cxlhdm = ops->devm_cxl_setup_hdm(port, info);
else
cxlhdm = devm_cxl_setup_hdm(port, info);
@@ -149,27 +149,12 @@ struct cxl_hdm *__wrap_devm_cxl_setup_hdm(struct cxl_port *port,
}
EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_setup_hdm, CXL);
-int __wrap_devm_cxl_enable_hdm(struct cxl_port *port, struct cxl_hdm *cxlhdm)
-{
- int index, rc;
- struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
-
- if (ops && ops->is_mock_port(port->uport))
- rc = 0;
- else
- rc = devm_cxl_enable_hdm(port, cxlhdm);
- put_cxl_mock_ops(index);
-
- return rc;
-}
-EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_enable_hdm, CXL);
-
int __wrap_devm_cxl_add_passthrough_decoder(struct cxl_port *port)
{
int rc, index;
struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
- if (ops && ops->is_mock_port(port->uport))
+ if (ops && ops->is_mock_port(port->uport_dev))
rc = ops->devm_cxl_add_passthrough_decoder(port);
else
rc = devm_cxl_add_passthrough_decoder(port);
@@ -186,7 +171,7 @@ int __wrap_devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm,
struct cxl_port *port = cxlhdm->port;
struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
- if (ops && ops->is_mock_port(port->uport))
+ if (ops && ops->is_mock_port(port->uport_dev))
rc = ops->devm_cxl_enumerate_decoders(cxlhdm, info);
else
rc = devm_cxl_enumerate_decoders(cxlhdm, info);
@@ -201,7 +186,7 @@ int __wrap_devm_cxl_port_enumerate_dports(struct cxl_port *port)
int rc, index;
struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
- if (ops && ops->is_mock_port(port->uport))
+ if (ops && ops->is_mock_port(port->uport_dev))
rc = ops->devm_cxl_port_enumerate_dports(port);
else
rc = devm_cxl_port_enumerate_dports(port);
@@ -259,24 +244,46 @@ int __wrap_cxl_dvsec_rr_decode(struct device *dev, int dvsec,
}
EXPORT_SYMBOL_NS_GPL(__wrap_cxl_dvsec_rr_decode, CXL);
-resource_size_t __wrap_cxl_rcrb_to_component(struct device *dev,
- resource_size_t rcrb,
- enum cxl_rcrb which)
+struct cxl_dport *__wrap_devm_cxl_add_rch_dport(struct cxl_port *port,
+ struct device *dport_dev,
+ int port_id,
+ resource_size_t rcrb)
+{
+ int index;
+ struct cxl_dport *dport;
+ struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
+
+ if (ops && ops->is_mock_port(dport_dev)) {
+ dport = devm_cxl_add_dport(port, dport_dev, port_id,
+ CXL_RESOURCE_NONE);
+ if (!IS_ERR(dport)) {
+ dport->rcrb.base = rcrb;
+ dport->rch = true;
+ }
+ } else
+ dport = devm_cxl_add_rch_dport(port, dport_dev, port_id, rcrb);
+ put_cxl_mock_ops(index);
+
+ return dport;
+}
+EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_add_rch_dport, CXL);
+
+resource_size_t __wrap_cxl_rcd_component_reg_phys(struct device *dev,
+ struct cxl_dport *dport)
{
int index;
resource_size_t component_reg_phys;
struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
if (ops && ops->is_mock_port(dev))
- component_reg_phys =
- ops->cxl_rcrb_to_component(dev, rcrb, which);
+ component_reg_phys = CXL_RESOURCE_NONE;
else
- component_reg_phys = cxl_rcrb_to_component(dev, rcrb, which);
+ component_reg_phys = cxl_rcd_component_reg_phys(dev, dport);
put_cxl_mock_ops(index);
return component_reg_phys;
}
-EXPORT_SYMBOL_NS_GPL(__wrap_cxl_rcrb_to_component, CXL);
+EXPORT_SYMBOL_NS_GPL(__wrap_cxl_rcd_component_reg_phys, CXL);
MODULE_LICENSE("GPL v2");
MODULE_IMPORT_NS(ACPI);
diff --git a/tools/testing/cxl/test/mock.h b/tools/testing/cxl/test/mock.h
index bef8817b01f2..a94223750346 100644
--- a/tools/testing/cxl/test/mock.h
+++ b/tools/testing/cxl/test/mock.h
@@ -15,9 +15,6 @@ struct cxl_mock_ops {
acpi_string pathname,
struct acpi_object_list *arguments,
unsigned long long *data);
- resource_size_t (*cxl_rcrb_to_component)(struct device *dev,
- resource_size_t rcrb,
- enum cxl_rcrb which);
struct acpi_pci_root *(*acpi_pci_find_root)(acpi_handle handle);
bool (*is_mock_bus)(struct pci_bus *bus);
bool (*is_mock_port)(struct device *dev);
diff --git a/tools/testing/kunit/configs/all_tests.config b/tools/testing/kunit/configs/all_tests.config
index f990cbb73250..0393940c706a 100644
--- a/tools/testing/kunit/configs/all_tests.config
+++ b/tools/testing/kunit/configs/all_tests.config
@@ -9,6 +9,8 @@ CONFIG_KUNIT=y
CONFIG_KUNIT_EXAMPLE_TEST=y
CONFIG_KUNIT_ALL_TESTS=y
+CONFIG_FORTIFY_SOURCE=y
+
CONFIG_IIO=y
CONFIG_EXT4_FS=y
diff --git a/tools/testing/kunit/configs/arch_uml.config b/tools/testing/kunit/configs/arch_uml.config
index e824ce43b05a..54ad8972681a 100644
--- a/tools/testing/kunit/configs/arch_uml.config
+++ b/tools/testing/kunit/configs/arch_uml.config
@@ -3,3 +3,6 @@
# Enable virtio/pci, as a lot of tests require it.
CONFIG_VIRTIO_UML=y
CONFIG_UML_PCI_OVER_VIRTIO=y
+
+# Enable FORTIFY_SOURCE for wider checking.
+CONFIG_FORTIFY_SOURCE=y
diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py
index f01f94106129..7f648802caf6 100644
--- a/tools/testing/kunit/kunit_kernel.py
+++ b/tools/testing/kunit/kunit_kernel.py
@@ -92,7 +92,7 @@ class LinuxSourceTreeOperations:
if stderr: # likely only due to build warnings
print(stderr.decode())
- def start(self, params: List[str], build_dir: str) -> subprocess.Popen[str]:
+ def start(self, params: List[str], build_dir: str) -> subprocess.Popen:
raise RuntimeError('not implemented!')
@@ -113,7 +113,7 @@ class LinuxSourceTreeOperationsQemu(LinuxSourceTreeOperations):
kconfig.merge_in_entries(base_kunitconfig)
return kconfig
- def start(self, params: List[str], build_dir: str) -> subprocess.Popen[str]:
+ def start(self, params: List[str], build_dir: str) -> subprocess.Popen:
kernel_path = os.path.join(build_dir, self._kernel_path)
qemu_command = ['qemu-system-' + self._qemu_arch,
'-nodefaults',
@@ -142,7 +142,7 @@ class LinuxSourceTreeOperationsUml(LinuxSourceTreeOperations):
kconfig.merge_in_entries(base_kunitconfig)
return kconfig
- def start(self, params: List[str], build_dir: str) -> subprocess.Popen[str]:
+ def start(self, params: List[str], build_dir: str) -> subprocess.Popen:
"""Runs the Linux UML binary. Must be named 'linux'."""
linux_bin = os.path.join(build_dir, 'linux')
params.extend(['mem=1G', 'console=tty', 'kunit_shutdown=halt'])
diff --git a/tools/testing/kunit/mypy.ini b/tools/testing/kunit/mypy.ini
new file mode 100644
index 000000000000..ddd288309efa
--- /dev/null
+++ b/tools/testing/kunit/mypy.ini
@@ -0,0 +1,6 @@
+[mypy]
+strict = True
+
+# E.g. we can't write subprocess.Popen[str] until Python 3.9+.
+# But kunit.py tries to support Python 3.7+, so let's disable it.
+disable_error_code = type-arg
diff --git a/tools/testing/kunit/run_checks.py b/tools/testing/kunit/run_checks.py
index 8208c3b3135e..c6d494ea3373 100755
--- a/tools/testing/kunit/run_checks.py
+++ b/tools/testing/kunit/run_checks.py
@@ -23,7 +23,7 @@ commands: Dict[str, Sequence[str]] = {
'kunit_tool_test.py': ['./kunit_tool_test.py'],
'kunit smoke test': ['./kunit.py', 'run', '--kunitconfig=lib/kunit', '--build_dir=kunit_run_checks'],
'pytype': ['/bin/sh', '-c', 'pytype *.py'],
- 'mypy': ['mypy', '--strict', '--exclude', '_test.py$', '--exclude', 'qemu_configs/', '.'],
+ 'mypy': ['mypy', '--config-file', 'mypy.ini', '--exclude', '_test.py$', '--exclude', 'qemu_configs/', '.'],
}
# The user might not have mypy or pytype installed, skip them if so.
diff --git a/tools/testing/memblock/tests/alloc_nid_api.c b/tools/testing/memblock/tests/alloc_nid_api.c
index 49ef68cccd6f..49bb416d34ff 100644
--- a/tools/testing/memblock/tests/alloc_nid_api.c
+++ b/tools/testing/memblock/tests/alloc_nid_api.c
@@ -2494,6 +2494,35 @@ static int alloc_nid_numa_split_all_reserved_generic_check(void)
return 0;
}
+/*
+ * A simple test that tries to allocate a memory region through the
+ * memblock_alloc_node() on a NUMA node with id `nid`. Expected to have the
+ * correct NUMA node set for the new region.
+ */
+static int alloc_node_on_correct_nid(void)
+{
+ int nid_req = 2;
+ void *allocated_ptr = NULL;
+#ifdef CONFIG_NUMA
+ struct memblock_region *req_node = &memblock.memory.regions[nid_req];
+#endif
+ phys_addr_t size = SZ_512;
+
+ PREFIX_PUSH();
+ setup_numa_memblock(node_fractions);
+
+ allocated_ptr = memblock_alloc_node(size, SMP_CACHE_BYTES, nid_req);
+
+ ASSERT_NE(allocated_ptr, NULL);
+#ifdef CONFIG_NUMA
+ ASSERT_EQ(nid_req, req_node->nid);
+#endif
+
+ test_pass_pop();
+
+ return 0;
+}
+
/* Test case wrappers for NUMA tests */
static int alloc_nid_numa_simple_check(void)
{
@@ -2632,6 +2661,15 @@ static int alloc_nid_numa_split_all_reserved_check(void)
return 0;
}
+static int alloc_node_numa_on_correct_nid(void)
+{
+ test_print("\tRunning %s...\n", __func__);
+ run_top_down(alloc_node_on_correct_nid);
+ run_bottom_up(alloc_node_on_correct_nid);
+
+ return 0;
+}
+
int __memblock_alloc_nid_numa_checks(void)
{
test_print("Running %s NUMA tests...\n",
@@ -2652,6 +2690,8 @@ int __memblock_alloc_nid_numa_checks(void)
alloc_nid_numa_reserved_full_merge_check();
alloc_nid_numa_split_all_reserved_check();
+ alloc_node_numa_on_correct_nid();
+
return 0;
}
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index e4e2d1650dd5..005043bd9623 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -3240,11 +3240,6 @@ static int nfit_test_probe(struct platform_device *pdev)
return 0;
}
-static int nfit_test_remove(struct platform_device *pdev)
-{
- return 0;
-}
-
static void nfit_test_release(struct device *dev)
{
struct nfit_test *nfit_test = to_nfit_test(dev);
@@ -3259,7 +3254,6 @@ static const struct platform_device_id nfit_test_id[] = {
static struct platform_driver nfit_test_driver = {
.probe = nfit_test_probe,
- .remove = nfit_test_remove,
.driver = {
.name = KBUILD_MODNAME,
},
diff --git a/tools/testing/nvdimm/test/nfit_test.h b/tools/testing/nvdimm/test/nfit_test.h
index b5f7a996c4d0..b00583d1eace 100644
--- a/tools/testing/nvdimm/test/nfit_test.h
+++ b/tools/testing/nvdimm/test/nfit_test.h
@@ -207,7 +207,36 @@ typedef struct nfit_test_resource *(*nfit_test_lookup_fn)(resource_size_t);
typedef union acpi_object *(*nfit_test_evaluate_dsm_fn)(acpi_handle handle,
const guid_t *guid, u64 rev, u64 func,
union acpi_object *argv4);
+void __iomem *__wrap_devm_ioremap(struct device *dev,
+ resource_size_t offset, unsigned long size);
+void *__wrap_devm_memremap(struct device *dev, resource_size_t offset,
+ size_t size, unsigned long flags);
+void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap);
+pfn_t __wrap_phys_to_pfn_t(phys_addr_t addr, unsigned long flags);
+void *__wrap_memremap(resource_size_t offset, size_t size,
+ unsigned long flags);
+void __wrap_devm_memunmap(struct device *dev, void *addr);
+void __iomem *__wrap_ioremap(resource_size_t offset, unsigned long size);
+void __iomem *__wrap_ioremap_wc(resource_size_t offset, unsigned long size);
void __wrap_iounmap(volatile void __iomem *addr);
+void __wrap_memunmap(void *addr);
+struct resource *__wrap___request_region(struct resource *parent,
+ resource_size_t start, resource_size_t n, const char *name,
+ int flags);
+int __wrap_insert_resource(struct resource *parent, struct resource *res);
+int __wrap_remove_resource(struct resource *res);
+struct resource *__wrap___devm_request_region(struct device *dev,
+ struct resource *parent, resource_size_t start,
+ resource_size_t n, const char *name);
+void __wrap___release_region(struct resource *parent, resource_size_t start,
+ resource_size_t n);
+void __wrap___devm_release_region(struct device *dev, struct resource *parent,
+ resource_size_t start, resource_size_t n);
+acpi_status __wrap_acpi_evaluate_object(acpi_handle handle, acpi_string path,
+ struct acpi_object_list *p, struct acpi_buffer *buf);
+union acpi_object * __wrap_acpi_evaluate_dsm(acpi_handle handle, const guid_t *guid,
+ u64 rev, u64 func, union acpi_object *argv4);
+
void nfit_test_setup(nfit_test_lookup_fn lookup,
nfit_test_evaluate_dsm_fn evaluate);
void nfit_test_teardown(void);
diff --git a/tools/testing/radix-tree/linux/init.h b/tools/testing/radix-tree/linux/init.h
index 1bb0afc21309..81563c3dfce7 100644
--- a/tools/testing/radix-tree/linux/init.h
+++ b/tools/testing/radix-tree/linux/init.h
@@ -1 +1,2 @@
#define __init
+#define __exit
diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c
index 9286d3baa12d..75ea2081a317 100644
--- a/tools/testing/radix-tree/maple.c
+++ b/tools/testing/radix-tree/maple.c
@@ -14,6 +14,7 @@
#include "test.h"
#include <stdlib.h>
#include <time.h>
+#include "linux/init.h"
#define module_init(x)
#define module_exit(x)
@@ -22,7 +23,6 @@
#define dump_stack() assert(0)
#include "../../../lib/maple_tree.c"
-#undef CONFIG_DEBUG_MAPLE_TREE
#include "../../../lib/test_maple_tree.c"
#define RCU_RANGE_COUNT 1000
@@ -81,7 +81,7 @@ static void check_mas_alloc_node_count(struct ma_state *mas)
* check_new_node() - Check the creation of new nodes and error path
* verification.
*/
-static noinline void check_new_node(struct maple_tree *mt)
+static noinline void __init check_new_node(struct maple_tree *mt)
{
struct maple_node *mn, *mn2, *mn3;
@@ -206,9 +206,9 @@ static noinline void check_new_node(struct maple_tree *mt)
e = i - 1;
} else {
if (i >= 4)
- e = i - 4;
- else if (i == 3)
- e = i - 2;
+ e = i - 3;
+ else if (i >= 1)
+ e = i - 1;
else
e = 0;
}
@@ -455,7 +455,7 @@ static noinline void check_new_node(struct maple_tree *mt)
/*
* Check erasing including RCU.
*/
-static noinline void check_erase(struct maple_tree *mt, unsigned long index,
+static noinline void __init check_erase(struct maple_tree *mt, unsigned long index,
void *ptr)
{
MT_BUG_ON(mt, mtree_test_erase(mt, index) != ptr);
@@ -465,24 +465,24 @@ static noinline void check_erase(struct maple_tree *mt, unsigned long index,
#define erase_check_insert(mt, i) check_insert(mt, set[i], entry[i%2])
#define erase_check_erase(mt, i) check_erase(mt, set[i], entry[i%2])
-static noinline void check_erase_testset(struct maple_tree *mt)
+static noinline void __init check_erase_testset(struct maple_tree *mt)
{
- unsigned long set[] = { 5015, 5014, 5017, 25, 1000,
- 1001, 1002, 1003, 1005, 0,
- 6003, 6002, 6008, 6012, 6015,
- 7003, 7002, 7008, 7012, 7015,
- 8003, 8002, 8008, 8012, 8015,
- 9003, 9002, 9008, 9012, 9015,
- 10003, 10002, 10008, 10012, 10015,
- 11003, 11002, 11008, 11012, 11015,
- 12003, 12002, 12008, 12012, 12015,
- 13003, 13002, 13008, 13012, 13015,
- 14003, 14002, 14008, 14012, 14015,
- 15003, 15002, 15008, 15012, 15015,
- };
-
-
- void *ptr = &set;
+ static const unsigned long set[] = { 5015, 5014, 5017, 25, 1000,
+ 1001, 1002, 1003, 1005, 0,
+ 6003, 6002, 6008, 6012, 6015,
+ 7003, 7002, 7008, 7012, 7015,
+ 8003, 8002, 8008, 8012, 8015,
+ 9003, 9002, 9008, 9012, 9015,
+ 10003, 10002, 10008, 10012, 10015,
+ 11003, 11002, 11008, 11012, 11015,
+ 12003, 12002, 12008, 12012, 12015,
+ 13003, 13002, 13008, 13012, 13015,
+ 14003, 14002, 14008, 14012, 14015,
+ 15003, 15002, 15008, 15012, 15015,
+ };
+
+
+ void *ptr = &check_erase_testset;
void *entry[2] = { ptr, mt };
void *root_node;
@@ -739,7 +739,7 @@ static noinline void check_erase_testset(struct maple_tree *mt)
int mas_ce2_over_count(struct ma_state *mas_start, struct ma_state *mas_end,
void *s_entry, unsigned long s_min,
void *e_entry, unsigned long e_max,
- unsigned long *set, int i, bool null_entry)
+ const unsigned long *set, int i, bool null_entry)
{
int count = 0, span = 0;
unsigned long retry = 0;
@@ -969,8 +969,8 @@ retry:
}
#if defined(CONFIG_64BIT)
-static noinline void check_erase2_testset(struct maple_tree *mt,
- unsigned long *set, unsigned long size)
+static noinline void __init check_erase2_testset(struct maple_tree *mt,
+ const unsigned long *set, unsigned long size)
{
int entry_count = 0;
int check = 0;
@@ -1054,7 +1054,7 @@ static noinline void check_erase2_testset(struct maple_tree *mt,
if (entry_count)
MT_BUG_ON(mt, !mt_height(mt));
#if check_erase2_debug > 1
- mt_dump(mt);
+ mt_dump(mt, mt_dump_hex);
#endif
#if check_erase2_debug
pr_err("Done\n");
@@ -1085,7 +1085,7 @@ static noinline void check_erase2_testset(struct maple_tree *mt,
mas_for_each(&mas, foo, ULONG_MAX) {
if (xa_is_zero(foo)) {
if (addr == mas.index) {
- mt_dump(mas.tree);
+ mt_dump(mas.tree, mt_dump_hex);
pr_err("retry failed %lu - %lu\n",
mas.index, mas.last);
MT_BUG_ON(mt, 1);
@@ -1114,11 +1114,11 @@ static noinline void check_erase2_testset(struct maple_tree *mt,
/* These tests were pulled from KVM tree modifications which failed. */
-static noinline void check_erase2_sets(struct maple_tree *mt)
+static noinline void __init check_erase2_sets(struct maple_tree *mt)
{
void *entry;
unsigned long start = 0;
- unsigned long set[] = {
+ static const unsigned long set[] = {
STORE, 140737488347136, 140737488351231,
STORE, 140721266458624, 140737488351231,
ERASE, 140721266458624, 140737488351231,
@@ -1136,7 +1136,7 @@ ERASE, 140253902692352, 140253902864383,
STORE, 140253902692352, 140253902696447,
STORE, 140253902696448, 140253902864383,
};
- unsigned long set2[] = {
+ static const unsigned long set2[] = {
STORE, 140737488347136, 140737488351231,
STORE, 140735933583360, 140737488351231,
ERASE, 140735933583360, 140737488351231,
@@ -1160,7 +1160,7 @@ STORE, 140277094813696, 140277094821887,
STORE, 140277094821888, 140277094825983,
STORE, 140735933906944, 140735933911039,
};
- unsigned long set3[] = {
+ static const unsigned long set3[] = {
STORE, 140737488347136, 140737488351231,
STORE, 140735790264320, 140737488351231,
ERASE, 140735790264320, 140737488351231,
@@ -1203,7 +1203,7 @@ STORE, 47135835840512, 47135835885567,
STORE, 47135835885568, 47135835893759,
};
- unsigned long set4[] = {
+ static const unsigned long set4[] = {
STORE, 140737488347136, 140737488351231,
STORE, 140728251703296, 140737488351231,
ERASE, 140728251703296, 140737488351231,
@@ -1224,7 +1224,7 @@ ERASE, 47646523277312, 47646523445247,
STORE, 47646523277312, 47646523400191,
};
- unsigned long set5[] = {
+ static const unsigned long set5[] = {
STORE, 140737488347136, 140737488351231,
STORE, 140726874062848, 140737488351231,
ERASE, 140726874062848, 140737488351231,
@@ -1357,7 +1357,7 @@ STORE, 47884791619584, 47884791623679,
STORE, 47884791623680, 47884791627775,
};
- unsigned long set6[] = {
+ static const unsigned long set6[] = {
STORE, 140737488347136, 140737488351231,
STORE, 140722999021568, 140737488351231,
ERASE, 140722999021568, 140737488351231,
@@ -1489,7 +1489,7 @@ ERASE, 47430432014336, 47430432022527,
STORE, 47430432014336, 47430432018431,
STORE, 47430432018432, 47430432022527,
};
- unsigned long set7[] = {
+ static const unsigned long set7[] = {
STORE, 140737488347136, 140737488351231,
STORE, 140729808330752, 140737488351231,
ERASE, 140729808330752, 140737488351231,
@@ -1621,7 +1621,7 @@ ERASE, 47439987130368, 47439987138559,
STORE, 47439987130368, 47439987134463,
STORE, 47439987134464, 47439987138559,
};
- unsigned long set8[] = {
+ static const unsigned long set8[] = {
STORE, 140737488347136, 140737488351231,
STORE, 140722482974720, 140737488351231,
ERASE, 140722482974720, 140737488351231,
@@ -1754,7 +1754,7 @@ STORE, 47708488638464, 47708488642559,
STORE, 47708488642560, 47708488646655,
};
- unsigned long set9[] = {
+ static const unsigned long set9[] = {
STORE, 140737488347136, 140737488351231,
STORE, 140736427839488, 140737488351231,
ERASE, 140736427839488, 140736427839488,
@@ -5620,7 +5620,7 @@ ERASE, 47906195480576, 47906195480576,
STORE, 94641242615808, 94641242750975,
};
- unsigned long set10[] = {
+ static const unsigned long set10[] = {
STORE, 140737488347136, 140737488351231,
STORE, 140736427839488, 140737488351231,
ERASE, 140736427839488, 140736427839488,
@@ -9484,7 +9484,7 @@ STORE, 139726599680000, 139726599684095,
ERASE, 47906195480576, 47906195480576,
STORE, 94641242615808, 94641242750975,
};
- unsigned long set11[] = {
+ static const unsigned long set11[] = {
STORE, 140737488347136, 140737488351231,
STORE, 140732658499584, 140737488351231,
ERASE, 140732658499584, 140732658499584,
@@ -9510,7 +9510,7 @@ STORE, 140732658565120, 140732658569215,
STORE, 140732658552832, 140732658565119,
};
- unsigned long set12[] = { /* contains 12 values. */
+ static const unsigned long set12[] = { /* contains 12 values. */
STORE, 140737488347136, 140737488351231,
STORE, 140732658499584, 140737488351231,
ERASE, 140732658499584, 140732658499584,
@@ -9537,7 +9537,7 @@ STORE, 140732658552832, 140732658565119,
STORE, 140014592741375, 140014592741375, /* contrived */
STORE, 140014592733184, 140014592741376, /* creates first entry retry. */
};
- unsigned long set13[] = {
+ static const unsigned long set13[] = {
STORE, 140373516247040, 140373516251135,/*: ffffa2e7b0e10d80 */
STORE, 140373516251136, 140373516255231,/*: ffffa2e7b1195d80 */
STORE, 140373516255232, 140373516443647,/*: ffffa2e7b0e109c0 */
@@ -9550,7 +9550,7 @@ STORE, 140373518684160, 140373518688254,/*: ffffa2e7b05fec00 */
STORE, 140373518688256, 140373518692351,/*: ffffa2e7bfbdcd80 */
STORE, 140373518692352, 140373518696447,/*: ffffa2e7b0749e40 */
};
- unsigned long set14[] = {
+ static const unsigned long set14[] = {
STORE, 140737488347136, 140737488351231,
STORE, 140731667996672, 140737488351231,
SNULL, 140731668000767, 140737488351231,
@@ -9834,7 +9834,7 @@ SNULL, 139826136543232, 139826136809471,
STORE, 139826136809472, 139826136842239,
STORE, 139826136543232, 139826136809471,
};
- unsigned long set15[] = {
+ static const unsigned long set15[] = {
STORE, 140737488347136, 140737488351231,
STORE, 140722061451264, 140737488351231,
SNULL, 140722061455359, 140737488351231,
@@ -10119,7 +10119,7 @@ STORE, 139906808958976, 139906808991743,
STORE, 139906808692736, 139906808958975,
};
- unsigned long set16[] = {
+ static const unsigned long set16[] = {
STORE, 94174808662016, 94174809321471,
STORE, 94174811414528, 94174811426815,
STORE, 94174811426816, 94174811430911,
@@ -10330,7 +10330,7 @@ STORE, 139921865613312, 139921865617407,
STORE, 139921865547776, 139921865564159,
};
- unsigned long set17[] = {
+ static const unsigned long set17[] = {
STORE, 94397057224704, 94397057646591,
STORE, 94397057650688, 94397057691647,
STORE, 94397057691648, 94397057695743,
@@ -10392,7 +10392,7 @@ STORE, 140720477511680, 140720477646847,
STORE, 140720478302208, 140720478314495,
STORE, 140720478314496, 140720478318591,
};
- unsigned long set18[] = {
+ static const unsigned long set18[] = {
STORE, 140737488347136, 140737488351231,
STORE, 140724953673728, 140737488351231,
SNULL, 140724953677823, 140737488351231,
@@ -10425,7 +10425,7 @@ STORE, 140222970597376, 140222970605567,
ERASE, 140222970597376, 140222970605567,
STORE, 140222970597376, 140222970605567,
};
- unsigned long set19[] = {
+ static const unsigned long set19[] = {
STORE, 140737488347136, 140737488351231,
STORE, 140725182459904, 140737488351231,
SNULL, 140725182463999, 140737488351231,
@@ -10694,7 +10694,7 @@ STORE, 140656836775936, 140656836780031,
STORE, 140656787476480, 140656791920639,
ERASE, 140656774639616, 140656779083775,
};
- unsigned long set20[] = {
+ static const unsigned long set20[] = {
STORE, 140737488347136, 140737488351231,
STORE, 140735952392192, 140737488351231,
SNULL, 140735952396287, 140737488351231,
@@ -10850,7 +10850,7 @@ STORE, 140590386819072, 140590386823167,
STORE, 140590386823168, 140590386827263,
SNULL, 140590376591359, 140590376595455,
};
- unsigned long set21[] = {
+ static const unsigned long set21[] = {
STORE, 93874710941696, 93874711363583,
STORE, 93874711367680, 93874711408639,
STORE, 93874711408640, 93874711412735,
@@ -10920,7 +10920,7 @@ ERASE, 140708393312256, 140708393316351,
ERASE, 140708393308160, 140708393312255,
ERASE, 140708393291776, 140708393308159,
};
- unsigned long set22[] = {
+ static const unsigned long set22[] = {
STORE, 93951397134336, 93951397183487,
STORE, 93951397183488, 93951397728255,
STORE, 93951397728256, 93951397826559,
@@ -11047,7 +11047,7 @@ STORE, 140551361253376, 140551361519615,
ERASE, 140551361253376, 140551361519615,
};
- unsigned long set23[] = {
+ static const unsigned long set23[] = {
STORE, 94014447943680, 94014448156671,
STORE, 94014450253824, 94014450257919,
STORE, 94014450257920, 94014450266111,
@@ -14371,7 +14371,7 @@ SNULL, 140175956627455, 140175985139711,
STORE, 140175927242752, 140175956627455,
STORE, 140175956627456, 140175985139711,
};
- unsigned long set24[] = {
+ static const unsigned long set24[] = {
STORE, 140737488347136, 140737488351231,
STORE, 140735281639424, 140737488351231,
SNULL, 140735281643519, 140737488351231,
@@ -15533,7 +15533,7 @@ ERASE, 139635393024000, 139635401412607,
ERASE, 139635384627200, 139635384631295,
ERASE, 139635384631296, 139635393019903,
};
- unsigned long set25[] = {
+ static const unsigned long set25[] = {
STORE, 140737488347136, 140737488351231,
STORE, 140737488343040, 140737488351231,
STORE, 140722547441664, 140737488351231,
@@ -22321,7 +22321,7 @@ STORE, 140249652703232, 140249682087935,
STORE, 140249682087936, 140249710600191,
};
- unsigned long set26[] = {
+ static const unsigned long set26[] = {
STORE, 140737488347136, 140737488351231,
STORE, 140729464770560, 140737488351231,
SNULL, 140729464774655, 140737488351231,
@@ -22345,7 +22345,7 @@ ERASE, 140109040951296, 140109040959487,
STORE, 140109040955392, 140109040959487,
ERASE, 140109040955392, 140109040959487,
};
- unsigned long set27[] = {
+ static const unsigned long set27[] = {
STORE, 140737488347136, 140737488351231,
STORE, 140726128070656, 140737488351231,
SNULL, 140726128074751, 140737488351231,
@@ -22741,7 +22741,7 @@ STORE, 140415509696512, 140415535910911,
ERASE, 140415537422336, 140415562588159,
STORE, 140415482433536, 140415509696511,
};
- unsigned long set28[] = {
+ static const unsigned long set28[] = {
STORE, 140737488347136, 140737488351231,
STORE, 140722475622400, 140737488351231,
SNULL, 140722475626495, 140737488351231,
@@ -22809,7 +22809,7 @@ STORE, 139918413348864, 139918413352959,
ERASE, 139918413316096, 139918413344767,
STORE, 93865848528896, 93865848664063,
};
- unsigned long set29[] = {
+ static const unsigned long set29[] = {
STORE, 140737488347136, 140737488351231,
STORE, 140734467944448, 140737488351231,
SNULL, 140734467948543, 140737488351231,
@@ -23684,7 +23684,7 @@ ERASE, 140143079972864, 140143088361471,
ERASE, 140143205793792, 140143205797887,
ERASE, 140143205797888, 140143214186495,
};
- unsigned long set30[] = {
+ static const unsigned long set30[] = {
STORE, 140737488347136, 140737488351231,
STORE, 140733436743680, 140737488351231,
SNULL, 140733436747775, 140737488351231,
@@ -24566,7 +24566,7 @@ ERASE, 140165225893888, 140165225897983,
ERASE, 140165225897984, 140165234286591,
ERASE, 140165058105344, 140165058109439,
};
- unsigned long set31[] = {
+ static const unsigned long set31[] = {
STORE, 140737488347136, 140737488351231,
STORE, 140730890784768, 140737488351231,
SNULL, 140730890788863, 140737488351231,
@@ -25379,7 +25379,7 @@ ERASE, 140623906590720, 140623914979327,
ERASE, 140622950277120, 140622950281215,
ERASE, 140622950281216, 140622958669823,
};
- unsigned long set32[] = {
+ static const unsigned long set32[] = {
STORE, 140737488347136, 140737488351231,
STORE, 140731244212224, 140737488351231,
SNULL, 140731244216319, 140737488351231,
@@ -26175,7 +26175,7 @@ ERASE, 140400417288192, 140400425676799,
ERASE, 140400283066368, 140400283070463,
ERASE, 140400283070464, 140400291459071,
};
- unsigned long set33[] = {
+ static const unsigned long set33[] = {
STORE, 140737488347136, 140737488351231,
STORE, 140734562918400, 140737488351231,
SNULL, 140734562922495, 140737488351231,
@@ -26317,7 +26317,7 @@ STORE, 140582961786880, 140583003750399,
ERASE, 140582961786880, 140583003750399,
};
- unsigned long set34[] = {
+ static const unsigned long set34[] = {
STORE, 140737488347136, 140737488351231,
STORE, 140731327180800, 140737488351231,
SNULL, 140731327184895, 140737488351231,
@@ -27198,7 +27198,7 @@ ERASE, 140012522094592, 140012530483199,
ERASE, 140012033142784, 140012033146879,
ERASE, 140012033146880, 140012041535487,
};
- unsigned long set35[] = {
+ static const unsigned long set35[] = {
STORE, 140737488347136, 140737488351231,
STORE, 140730536939520, 140737488351231,
SNULL, 140730536943615, 140737488351231,
@@ -27955,7 +27955,7 @@ ERASE, 140474471936000, 140474480324607,
ERASE, 140474396430336, 140474396434431,
ERASE, 140474396434432, 140474404823039,
};
- unsigned long set36[] = {
+ static const unsigned long set36[] = {
STORE, 140737488347136, 140737488351231,
STORE, 140723893125120, 140737488351231,
SNULL, 140723893129215, 140737488351231,
@@ -28816,7 +28816,7 @@ ERASE, 140121890357248, 140121898745855,
ERASE, 140121269587968, 140121269592063,
ERASE, 140121269592064, 140121277980671,
};
- unsigned long set37[] = {
+ static const unsigned long set37[] = {
STORE, 140737488347136, 140737488351231,
STORE, 140722404016128, 140737488351231,
SNULL, 140722404020223, 140737488351231,
@@ -28942,7 +28942,7 @@ STORE, 139759821246464, 139759888355327,
ERASE, 139759821246464, 139759888355327,
ERASE, 139759888355328, 139759955464191,
};
- unsigned long set38[] = {
+ static const unsigned long set38[] = {
STORE, 140737488347136, 140737488351231,
STORE, 140730666221568, 140737488351231,
SNULL, 140730666225663, 140737488351231,
@@ -29752,7 +29752,7 @@ ERASE, 140613504712704, 140613504716799,
ERASE, 140613504716800, 140613513105407,
};
- unsigned long set39[] = {
+ static const unsigned long set39[] = {
STORE, 140737488347136, 140737488351231,
STORE, 140736271417344, 140737488351231,
SNULL, 140736271421439, 140737488351231,
@@ -30124,7 +30124,7 @@ STORE, 140325364428800, 140325372821503,
STORE, 140325356036096, 140325364428799,
SNULL, 140325364432895, 140325372821503,
};
- unsigned long set40[] = {
+ static const unsigned long set40[] = {
STORE, 140737488347136, 140737488351231,
STORE, 140734309167104, 140737488351231,
SNULL, 140734309171199, 140737488351231,
@@ -30875,7 +30875,7 @@ ERASE, 140320289300480, 140320289304575,
ERASE, 140320289304576, 140320297693183,
ERASE, 140320163409920, 140320163414015,
};
- unsigned long set41[] = {
+ static const unsigned long set41[] = {
STORE, 140737488347136, 140737488351231,
STORE, 140728157171712, 140737488351231,
SNULL, 140728157175807, 140737488351231,
@@ -31185,7 +31185,7 @@ STORE, 94376135090176, 94376135094271,
STORE, 94376135094272, 94376135098367,
SNULL, 94376135094272, 94377208836095,
};
- unsigned long set42[] = {
+ static const unsigned long set42[] = {
STORE, 314572800, 1388314623,
STORE, 1462157312, 1462169599,
STORE, 1462169600, 1462185983,
@@ -33862,7 +33862,7 @@ SNULL, 3798999040, 3799101439,
*/
};
- unsigned long set43[] = {
+ static const unsigned long set43[] = {
STORE, 140737488347136, 140737488351231,
STORE, 140734187720704, 140737488351231,
SNULL, 140734187724800, 140737488351231,
@@ -34513,7 +34513,7 @@ static void *rcu_reader_rev(void *ptr)
if (mas.index != r_start) {
alt = xa_mk_value(index + i * 2 + 1 +
RCU_RANGE_COUNT);
- mt_dump(test->mt);
+ mt_dump(test->mt, mt_dump_dec);
printk("Error: %lu-%lu %p != %lu-%lu %p %p line %d i %d\n",
mas.index, mas.last, entry,
r_start, r_end, expected, alt,
@@ -34996,7 +34996,7 @@ void run_check_rcu_slowread(struct maple_tree *mt, struct rcu_test_struct *vals)
MT_BUG_ON(mt, !vals->seen_entry3);
MT_BUG_ON(mt, !vals->seen_both);
}
-static noinline void check_rcu_simulated(struct maple_tree *mt)
+static noinline void __init check_rcu_simulated(struct maple_tree *mt)
{
unsigned long i, nr_entries = 1000;
unsigned long target = 4320;
@@ -35157,7 +35157,7 @@ static noinline void check_rcu_simulated(struct maple_tree *mt)
rcu_unregister_thread();
}
-static noinline void check_rcu_threaded(struct maple_tree *mt)
+static noinline void __init check_rcu_threaded(struct maple_tree *mt)
{
unsigned long i, nr_entries = 1000;
struct rcu_test_struct vals;
@@ -35259,6 +35259,7 @@ static void mas_dfs_preorder(struct ma_state *mas)
struct maple_enode *prev;
unsigned char end, slot = 0;
+ unsigned long *pivots;
if (mas->node == MAS_START) {
mas_start(mas);
@@ -35291,6 +35292,9 @@ walk_up:
mas_ascend(mas);
goto walk_up;
}
+ pivots = ma_pivots(mte_to_node(prev), mte_node_type(prev));
+ mas->max = mas_safe_pivot(mas, pivots, slot, mte_node_type(prev));
+ mas->min = mas_safe_min(mas, pivots, slot);
return;
done:
@@ -35366,7 +35370,7 @@ static void check_dfs_preorder(struct maple_tree *mt)
/* End of depth first search tests */
/* Preallocation testing */
-static noinline void check_prealloc(struct maple_tree *mt)
+static noinline void __init check_prealloc(struct maple_tree *mt)
{
unsigned long i, max = 100;
unsigned long allocated;
@@ -35494,7 +35498,7 @@ static noinline void check_prealloc(struct maple_tree *mt)
/* End of preallocation testing */
/* Spanning writes, writes that span nodes and layers of the tree */
-static noinline void check_spanning_write(struct maple_tree *mt)
+static noinline void __init check_spanning_write(struct maple_tree *mt)
{
unsigned long i, max = 5000;
MA_STATE(mas, mt, 1200, 2380);
@@ -35662,7 +35666,7 @@ static noinline void check_spanning_write(struct maple_tree *mt)
/* End of spanning write testing */
/* Writes to a NULL area that are adjacent to other NULLs */
-static noinline void check_null_expand(struct maple_tree *mt)
+static noinline void __init check_null_expand(struct maple_tree *mt)
{
unsigned long i, max = 100;
unsigned char data_end;
@@ -35723,7 +35727,7 @@ static noinline void check_null_expand(struct maple_tree *mt)
/* End of NULL area expansions */
/* Checking for no memory is best done outside the kernel */
-static noinline void check_nomem(struct maple_tree *mt)
+static noinline void __init check_nomem(struct maple_tree *mt)
{
MA_STATE(ms, mt, 1, 1);
@@ -35758,7 +35762,7 @@ static noinline void check_nomem(struct maple_tree *mt)
mtree_destroy(mt);
}
-static noinline void check_locky(struct maple_tree *mt)
+static noinline void __init check_locky(struct maple_tree *mt)
{
MA_STATE(ms, mt, 2, 2);
MA_STATE(reader, mt, 2, 2);
@@ -35780,10 +35784,10 @@ void farmer_tests(void)
struct maple_node *node;
DEFINE_MTREE(tree);
- mt_dump(&tree);
+ mt_dump(&tree, mt_dump_dec);
tree.ma_root = xa_mk_value(0);
- mt_dump(&tree);
+ mt_dump(&tree, mt_dump_dec);
node = mt_alloc_one(GFP_KERNEL);
node->parent = (void *)((unsigned long)(&tree) | 1);
@@ -35793,7 +35797,7 @@ void farmer_tests(void)
node->mr64.pivot[1] = 1;
node->mr64.pivot[2] = 0;
tree.ma_root = mt_mk_node(node, maple_leaf_64);
- mt_dump(&tree);
+ mt_dump(&tree, mt_dump_dec);
node->parent = ma_parent_ptr(node);
ma_free_rcu(node);
diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c
index e00520cc6349..cffaf2245d4f 100644
--- a/tools/testing/radix-tree/multiorder.c
+++ b/tools/testing/radix-tree/multiorder.c
@@ -159,7 +159,7 @@ void multiorder_tagged_iteration(struct xarray *xa)
item_kill_tree(xa);
}
-bool stop_iteration = false;
+bool stop_iteration;
static void *creator_func(void *ptr)
{
@@ -201,6 +201,7 @@ static void multiorder_iteration_race(struct xarray *xa)
pthread_t worker_thread[num_threads];
int i;
+ stop_iteration = false;
pthread_create(&worker_thread[0], NULL, &creator_func, xa);
for (i = 1; i < num_threads; i++)
pthread_create(&worker_thread[i], NULL, &iterator_func, xa);
@@ -211,6 +212,61 @@ static void multiorder_iteration_race(struct xarray *xa)
item_kill_tree(xa);
}
+static void *load_creator(void *ptr)
+{
+ /* 'order' is set up to ensure we have sibling entries */
+ unsigned int order;
+ struct radix_tree_root *tree = ptr;
+ int i;
+
+ rcu_register_thread();
+ item_insert_order(tree, 3 << RADIX_TREE_MAP_SHIFT, 0);
+ item_insert_order(tree, 2 << RADIX_TREE_MAP_SHIFT, 0);
+ for (i = 0; i < 10000; i++) {
+ for (order = 1; order < RADIX_TREE_MAP_SHIFT; order++) {
+ unsigned long index = (3 << RADIX_TREE_MAP_SHIFT) -
+ (1 << order);
+ item_insert_order(tree, index, order);
+ item_delete_rcu(tree, index);
+ }
+ }
+ rcu_unregister_thread();
+
+ stop_iteration = true;
+ return NULL;
+}
+
+static void *load_worker(void *ptr)
+{
+ unsigned long index = (3 << RADIX_TREE_MAP_SHIFT) - 1;
+
+ rcu_register_thread();
+ while (!stop_iteration) {
+ struct item *item = xa_load(ptr, index);
+ assert(!xa_is_internal(item));
+ }
+ rcu_unregister_thread();
+
+ return NULL;
+}
+
+static void load_race(struct xarray *xa)
+{
+ const int num_threads = sysconf(_SC_NPROCESSORS_ONLN) * 4;
+ pthread_t worker_thread[num_threads];
+ int i;
+
+ stop_iteration = false;
+ pthread_create(&worker_thread[0], NULL, &load_creator, xa);
+ for (i = 1; i < num_threads; i++)
+ pthread_create(&worker_thread[i], NULL, &load_worker, xa);
+
+ for (i = 0; i < num_threads; i++)
+ pthread_join(worker_thread[i], NULL);
+
+ item_kill_tree(xa);
+}
+
static DEFINE_XARRAY(array);
void multiorder_checks(void)
@@ -218,12 +274,20 @@ void multiorder_checks(void)
multiorder_iteration(&array);
multiorder_tagged_iteration(&array);
multiorder_iteration_race(&array);
+ load_race(&array);
radix_tree_cpu_dead(0);
}
-int __weak main(void)
+int __weak main(int argc, char **argv)
{
+ int opt;
+
+ while ((opt = getopt(argc, argv, "ls:v")) != -1) {
+ if (opt == 'v')
+ test_verbose++;
+ }
+
rcu_register_thread();
radix_tree_init();
multiorder_checks();
diff --git a/tools/testing/radix-tree/regression1.c b/tools/testing/radix-tree/regression1.c
index a61c7bcbc72d..63f468bf8245 100644
--- a/tools/testing/radix-tree/regression1.c
+++ b/tools/testing/radix-tree/regression1.c
@@ -177,7 +177,7 @@ void regression1_test(void)
nr_threads = 2;
pthread_barrier_init(&worker_barrier, NULL, nr_threads);
- threads = malloc(nr_threads * sizeof(pthread_t *));
+ threads = malloc(nr_threads * sizeof(*threads));
for (i = 0; i < nr_threads; i++) {
arg = i;
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 90a62cf75008..666b56f22a41 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -4,6 +4,7 @@ TARGETS += amd-pstate
TARGETS += arm64
TARGETS += bpf
TARGETS += breakpoints
+TARGETS += cachestat
TARGETS += capabilities
TARGETS += cgroup
TARGETS += clone3
@@ -86,6 +87,7 @@ TARGETS += timers
endif
TARGETS += tmpfs
TARGETS += tpm2
+TARGETS += tty
TARGETS += user
TARGETS += vDSO
TARGETS += mm
@@ -144,10 +146,12 @@ ifneq ($(KBUILD_OUTPUT),)
abs_objtree := $(realpath $(abs_objtree))
BUILD := $(abs_objtree)/kselftest
KHDR_INCLUDES := -isystem ${abs_objtree}/usr/include
+ KHDR_DIR := ${abs_objtree}/usr/include
else
BUILD := $(CURDIR)
abs_srctree := $(shell cd $(top_srcdir) && pwd)
KHDR_INCLUDES := -isystem ${abs_srctree}/usr/include
+ KHDR_DIR := ${abs_srctree}/usr/include
DEFAULT_INSTALL_HDR_PATH := 1
endif
@@ -161,7 +165,7 @@ export KHDR_INCLUDES
# all isn't the first target in the file.
.DEFAULT_GOAL := all
-all:
+all: kernel_header_files
@ret=1; \
for TARGET in $(TARGETS); do \
BUILD_TARGET=$$BUILD/$$TARGET; \
@@ -172,6 +176,23 @@ all:
ret=$$((ret * $$?)); \
done; exit $$ret;
+kernel_header_files:
+ @ls $(KHDR_DIR)/linux/*.h >/dev/null 2>/dev/null; \
+ if [ $$? -ne 0 ]; then \
+ RED='\033[1;31m'; \
+ NOCOLOR='\033[0m'; \
+ echo; \
+ echo -e "$${RED}error$${NOCOLOR}: missing kernel header files."; \
+ echo "Please run this and try again:"; \
+ echo; \
+ echo " cd $(top_srcdir)"; \
+ echo " make headers"; \
+ echo; \
+ exit 1; \
+ fi
+
+.PHONY: kernel_header_files
+
run_tests: all
@for TARGET in $(TARGETS); do \
BUILD_TARGET=$$BUILD/$$TARGET; \
diff --git a/tools/testing/selftests/alsa/.gitignore b/tools/testing/selftests/alsa/.gitignore
index 2b0d11797f25..12dc3fcd3456 100644
--- a/tools/testing/selftests/alsa/.gitignore
+++ b/tools/testing/selftests/alsa/.gitignore
@@ -1,2 +1,3 @@
mixer-test
pcm-test
+test-pcmtest-driver
diff --git a/tools/testing/selftests/alsa/Makefile b/tools/testing/selftests/alsa/Makefile
index 901949db80ad..5af9ba8a4645 100644
--- a/tools/testing/selftests/alsa/Makefile
+++ b/tools/testing/selftests/alsa/Makefile
@@ -12,7 +12,7 @@ LDLIBS+=-lpthread
OVERRIDE_TARGETS = 1
-TEST_GEN_PROGS := mixer-test pcm-test
+TEST_GEN_PROGS := mixer-test pcm-test test-pcmtest-driver
TEST_GEN_PROGS_EXTENDED := libatest.so
diff --git a/tools/testing/selftests/alsa/test-pcmtest-driver.c b/tools/testing/selftests/alsa/test-pcmtest-driver.c
new file mode 100644
index 000000000000..357adc722cba
--- /dev/null
+++ b/tools/testing/selftests/alsa/test-pcmtest-driver.c
@@ -0,0 +1,331 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This is the test which covers PCM middle layer data transferring using
+ * the virtual pcm test driver (snd-pcmtest).
+ *
+ * Copyright 2023 Ivan Orlov <ivan.orlov0322@gmail.com>
+ */
+#include <string.h>
+#include <alsa/asoundlib.h>
+#include "../kselftest_harness.h"
+
+#define CH_NUM 4
+
+struct pattern_buf {
+ char buf[1024];
+ int len;
+};
+
+struct pattern_buf patterns[CH_NUM];
+
+struct pcmtest_test_params {
+ unsigned long buffer_size;
+ unsigned long period_size;
+ unsigned long channels;
+ unsigned int rate;
+ snd_pcm_access_t access;
+ size_t sec_buf_len;
+ size_t sample_size;
+ int time;
+ snd_pcm_format_t format;
+};
+
+static int read_patterns(void)
+{
+ FILE *fp, *fpl;
+ int i;
+ char pf[64];
+ char plf[64];
+
+ for (i = 0; i < CH_NUM; i++) {
+ sprintf(plf, "/sys/kernel/debug/pcmtest/fill_pattern%d_len", i);
+ fpl = fopen(plf, "r");
+ if (!fpl)
+ return -1;
+ fscanf(fpl, "%u", &patterns[i].len);
+ fclose(fpl);
+
+ sprintf(pf, "/sys/kernel/debug/pcmtest/fill_pattern%d", i);
+ fp = fopen(pf, "r");
+ if (!fp)
+ return -1;
+ fread(patterns[i].buf, 1, patterns[i].len, fp);
+ fclose(fp);
+ }
+
+ return 0;
+}
+
+static int get_test_results(char *debug_name)
+{
+ int result;
+ FILE *f;
+ char fname[128];
+
+ sprintf(fname, "/sys/kernel/debug/pcmtest/%s", debug_name);
+
+ f = fopen(fname, "r");
+ if (!f) {
+ printf("Failed to open file\n");
+ return -1;
+ }
+ fscanf(f, "%d", &result);
+ fclose(f);
+
+ return result;
+}
+
+static size_t get_sec_buf_len(unsigned int rate, unsigned long channels, snd_pcm_format_t format)
+{
+ return rate * channels * snd_pcm_format_physical_width(format) / 8;
+}
+
+static int setup_handle(snd_pcm_t **handle, snd_pcm_sw_params_t *swparams,
+ snd_pcm_hw_params_t *hwparams, struct pcmtest_test_params *params,
+ int card, snd_pcm_stream_t stream)
+{
+ char pcm_name[32];
+ int err;
+
+ sprintf(pcm_name, "hw:%d,0,0", card);
+ err = snd_pcm_open(handle, pcm_name, stream, 0);
+ if (err < 0)
+ return err;
+ snd_pcm_hw_params_any(*handle, hwparams);
+ snd_pcm_hw_params_set_rate_resample(*handle, hwparams, 0);
+ snd_pcm_hw_params_set_access(*handle, hwparams, params->access);
+ snd_pcm_hw_params_set_format(*handle, hwparams, params->format);
+ snd_pcm_hw_params_set_channels(*handle, hwparams, params->channels);
+ snd_pcm_hw_params_set_rate_near(*handle, hwparams, &params->rate, 0);
+ snd_pcm_hw_params_set_period_size_near(*handle, hwparams, &params->period_size, 0);
+ snd_pcm_hw_params_set_buffer_size_near(*handle, hwparams, &params->buffer_size);
+ snd_pcm_hw_params(*handle, hwparams);
+ snd_pcm_sw_params_current(*handle, swparams);
+
+ snd_pcm_hw_params_set_rate_resample(*handle, hwparams, 0);
+ snd_pcm_sw_params_set_avail_min(*handle, swparams, params->period_size);
+ snd_pcm_hw_params_set_buffer_size_near(*handle, hwparams, &params->buffer_size);
+ snd_pcm_hw_params_set_period_size_near(*handle, hwparams, &params->period_size, 0);
+ snd_pcm_sw_params(*handle, swparams);
+ snd_pcm_hw_params(*handle, hwparams);
+
+ return 0;
+}
+
+FIXTURE(pcmtest) {
+ int card;
+ snd_pcm_sw_params_t *swparams;
+ snd_pcm_hw_params_t *hwparams;
+ struct pcmtest_test_params params;
+};
+
+FIXTURE_TEARDOWN(pcmtest) {
+}
+
+FIXTURE_SETUP(pcmtest) {
+ char *card_name;
+ int err;
+
+ if (geteuid())
+ SKIP(exit(-1), "This test needs root to run!");
+
+ err = read_patterns();
+ if (err)
+ SKIP(exit(-1), "Can't read patterns. Probably, module isn't loaded");
+
+ card_name = malloc(127);
+ ASSERT_NE(card_name, NULL);
+ self->params.buffer_size = 16384;
+ self->params.period_size = 4096;
+ self->params.channels = CH_NUM;
+ self->params.rate = 8000;
+ self->params.access = SND_PCM_ACCESS_RW_INTERLEAVED;
+ self->params.format = SND_PCM_FORMAT_S16_LE;
+ self->card = -1;
+ self->params.sample_size = snd_pcm_format_physical_width(self->params.format) / 8;
+
+ self->params.sec_buf_len = get_sec_buf_len(self->params.rate, self->params.channels,
+ self->params.format);
+ self->params.time = 4;
+
+ while (snd_card_next(&self->card) >= 0) {
+ if (self->card == -1)
+ break;
+ snd_card_get_name(self->card, &card_name);
+ if (!strcmp(card_name, "PCM-Test"))
+ break;
+ }
+ free(card_name);
+ ASSERT_NE(self->card, -1);
+}
+
+/*
+ * Here we are trying to send the looped monotonically increasing sequence of bytes to the driver.
+ * If our data isn't corrupted, the driver will set the content of 'pc_test' debugfs file to '1'
+ */
+TEST_F(pcmtest, playback) {
+ snd_pcm_t *handle;
+ unsigned char *it;
+ size_t write_res;
+ int test_results;
+ int i, cur_ch, pos_in_ch;
+ void *samples;
+ struct pcmtest_test_params *params = &self->params;
+
+ samples = calloc(self->params.sec_buf_len * self->params.time, 1);
+ ASSERT_NE(samples, NULL);
+
+ snd_pcm_sw_params_alloca(&self->swparams);
+ snd_pcm_hw_params_alloca(&self->hwparams);
+
+ ASSERT_EQ(setup_handle(&handle, self->swparams, self->hwparams, params,
+ self->card, SND_PCM_STREAM_PLAYBACK), 0);
+ snd_pcm_format_set_silence(params->format, samples,
+ params->rate * params->channels * params->time);
+ it = samples;
+ for (i = 0; i < self->params.sec_buf_len * params->time; i++) {
+ cur_ch = (i / params->sample_size) % CH_NUM;
+ pos_in_ch = i / params->sample_size / CH_NUM * params->sample_size
+ + (i % params->sample_size);
+ it[i] = patterns[cur_ch].buf[pos_in_ch % patterns[cur_ch].len];
+ }
+ write_res = snd_pcm_writei(handle, samples, params->rate * params->time);
+ ASSERT_GE(write_res, 0);
+
+ snd_pcm_close(handle);
+ free(samples);
+ test_results = get_test_results("pc_test");
+ ASSERT_EQ(test_results, 1);
+}
+
+/*
+ * Here we test that the virtual alsa driver returns looped and monotonically increasing sequence
+ * of bytes. In the interleaved mode the buffer will contain samples in the following order:
+ * C0, C1, C2, C3, C0, C1, ...
+ */
+TEST_F(pcmtest, capture) {
+ snd_pcm_t *handle;
+ unsigned char *it;
+ size_t read_res;
+ int i, cur_ch, pos_in_ch;
+ void *samples;
+ struct pcmtest_test_params *params = &self->params;
+
+ samples = calloc(self->params.sec_buf_len * self->params.time, 1);
+ ASSERT_NE(samples, NULL);
+
+ snd_pcm_sw_params_alloca(&self->swparams);
+ snd_pcm_hw_params_alloca(&self->hwparams);
+
+ ASSERT_EQ(setup_handle(&handle, self->swparams, self->hwparams,
+ params, self->card, SND_PCM_STREAM_CAPTURE), 0);
+ snd_pcm_format_set_silence(params->format, samples,
+ params->rate * params->channels * params->time);
+ read_res = snd_pcm_readi(handle, samples, params->rate * params->time);
+ ASSERT_GE(read_res, 0);
+ snd_pcm_close(handle);
+ it = (unsigned char *)samples;
+ for (i = 0; i < self->params.sec_buf_len * self->params.time; i++) {
+ cur_ch = (i / params->sample_size) % CH_NUM;
+ pos_in_ch = i / params->sample_size / CH_NUM * params->sample_size
+ + (i % params->sample_size);
+ ASSERT_EQ(it[i], patterns[cur_ch].buf[pos_in_ch % patterns[cur_ch].len]);
+ }
+ free(samples);
+}
+
+// Test capture in the non-interleaved access mode. The are buffers for each recorded channel
+TEST_F(pcmtest, ni_capture) {
+ snd_pcm_t *handle;
+ struct pcmtest_test_params params = self->params;
+ char **chan_samples;
+ size_t i, j, read_res;
+
+ chan_samples = calloc(CH_NUM, sizeof(*chan_samples));
+ ASSERT_NE(chan_samples, NULL);
+
+ snd_pcm_sw_params_alloca(&self->swparams);
+ snd_pcm_hw_params_alloca(&self->hwparams);
+
+ params.access = SND_PCM_ACCESS_RW_NONINTERLEAVED;
+
+ ASSERT_EQ(setup_handle(&handle, self->swparams, self->hwparams,
+ &params, self->card, SND_PCM_STREAM_CAPTURE), 0);
+
+ for (i = 0; i < CH_NUM; i++)
+ chan_samples[i] = calloc(params.sec_buf_len * params.time, 1);
+
+ for (i = 0; i < 1; i++) {
+ read_res = snd_pcm_readn(handle, (void **)chan_samples, params.rate * params.time);
+ ASSERT_GE(read_res, 0);
+ }
+ snd_pcm_close(handle);
+
+ for (i = 0; i < CH_NUM; i++) {
+ for (j = 0; j < params.rate * params.time; j++)
+ ASSERT_EQ(chan_samples[i][j], patterns[i].buf[j % patterns[i].len]);
+ free(chan_samples[i]);
+ }
+ free(chan_samples);
+}
+
+TEST_F(pcmtest, ni_playback) {
+ snd_pcm_t *handle;
+ struct pcmtest_test_params params = self->params;
+ char **chan_samples;
+ size_t i, j, read_res;
+ int test_res;
+
+ chan_samples = calloc(CH_NUM, sizeof(*chan_samples));
+ ASSERT_NE(chan_samples, NULL);
+
+ snd_pcm_sw_params_alloca(&self->swparams);
+ snd_pcm_hw_params_alloca(&self->hwparams);
+
+ params.access = SND_PCM_ACCESS_RW_NONINTERLEAVED;
+
+ ASSERT_EQ(setup_handle(&handle, self->swparams, self->hwparams,
+ &params, self->card, SND_PCM_STREAM_PLAYBACK), 0);
+
+ for (i = 0; i < CH_NUM; i++) {
+ chan_samples[i] = calloc(params.sec_buf_len * params.time, 1);
+ for (j = 0; j < params.sec_buf_len * params.time; j++)
+ chan_samples[i][j] = patterns[i].buf[j % patterns[i].len];
+ }
+
+ for (i = 0; i < 1; i++) {
+ read_res = snd_pcm_writen(handle, (void **)chan_samples, params.rate * params.time);
+ ASSERT_GE(read_res, 0);
+ }
+
+ snd_pcm_close(handle);
+ test_res = get_test_results("pc_test");
+ ASSERT_EQ(test_res, 1);
+
+ for (i = 0; i < CH_NUM; i++)
+ free(chan_samples[i]);
+ free(chan_samples);
+}
+
+/*
+ * Here we are testing the custom ioctl definition inside the virtual driver. If it triggers
+ * successfully, the driver sets the content of 'ioctl_test' debugfs file to '1'.
+ */
+TEST_F(pcmtest, reset_ioctl) {
+ snd_pcm_t *handle;
+ unsigned char *it;
+ int test_res;
+ struct pcmtest_test_params *params = &self->params;
+
+ snd_pcm_sw_params_alloca(&self->swparams);
+ snd_pcm_hw_params_alloca(&self->hwparams);
+
+ ASSERT_EQ(setup_handle(&handle, self->swparams, self->hwparams, params,
+ self->card, SND_PCM_STREAM_CAPTURE), 0);
+ snd_pcm_reset(handle);
+ test_res = get_test_results("ioctl_test");
+ ASSERT_EQ(test_res, 1);
+ snd_pcm_close(handle);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/arm64/Makefile b/tools/testing/selftests/arm64/Makefile
index 9460cbe81bcc..ace8b67fb22d 100644
--- a/tools/testing/selftests/arm64/Makefile
+++ b/tools/testing/selftests/arm64/Makefile
@@ -42,7 +42,7 @@ run_tests: all
done
# Avoid any output on non arm64 on emit_tests
-emit_tests: all
+emit_tests:
@for DIR in $(ARM64_SUBTARGETS); do \
BUILD_TARGET=$(OUTPUT)/$$DIR; \
make OUTPUT=$$BUILD_TARGET -C $$DIR $@; \
diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c
index 93333a90bf3a..d4ad813fed10 100644
--- a/tools/testing/selftests/arm64/abi/hwcap.c
+++ b/tools/testing/selftests/arm64/abi/hwcap.c
@@ -39,6 +39,20 @@ static void cssc_sigill(void)
asm volatile(".inst 0xdac01c00" : : : "x0");
}
+static void mops_sigill(void)
+{
+ char dst[1], src[1];
+ register char *dstp asm ("x0") = dst;
+ register char *srcp asm ("x1") = src;
+ register long size asm ("x2") = 1;
+
+ /* CPYP [x0]!, [x1]!, x2! */
+ asm volatile(".inst 0x1d010440"
+ : "+r" (dstp), "+r" (srcp), "+r" (size)
+ :
+ : "cc", "memory");
+}
+
static void rng_sigill(void)
{
asm volatile("mrs x0, S3_3_C2_C4_0" : : : "x0");
@@ -210,6 +224,14 @@ static const struct hwcap_data {
.sigill_fn = cssc_sigill,
},
{
+ .name = "MOPS",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_MOPS,
+ .cpuinfo = "mops",
+ .sigill_fn = mops_sigill,
+ .sigill_reliable = true,
+ },
+ {
.name = "RNG",
.at_hwcap = AT_HWCAP2,
.hwcap_bit = HWCAP2_RNG,
diff --git a/tools/testing/selftests/arm64/abi/ptrace.c b/tools/testing/selftests/arm64/abi/ptrace.c
index be952511af22..abe4d58d731d 100644
--- a/tools/testing/selftests/arm64/abi/ptrace.c
+++ b/tools/testing/selftests/arm64/abi/ptrace.c
@@ -20,7 +20,7 @@
#include "../../kselftest.h"
-#define EXPECTED_TESTS 7
+#define EXPECTED_TESTS 11
#define MAX_TPIDRS 2
@@ -132,6 +132,34 @@ static void test_tpidr(pid_t child)
}
}
+static void test_hw_debug(pid_t child, int type, const char *type_name)
+{
+ struct user_hwdebug_state state;
+ struct iovec iov;
+ int slots, arch, ret;
+
+ iov.iov_len = sizeof(state);
+ iov.iov_base = &state;
+
+ /* Should be able to read the values */
+ ret = ptrace(PTRACE_GETREGSET, child, type, &iov);
+ ksft_test_result(ret == 0, "read_%s\n", type_name);
+
+ if (ret == 0) {
+ /* Low 8 bits is the number of slots, next 4 bits the arch */
+ slots = state.dbg_info & 0xff;
+ arch = (state.dbg_info >> 8) & 0xf;
+
+ ksft_print_msg("%s version %d with %d slots\n", type_name,
+ arch, slots);
+
+ /* Zero is not currently architecturally valid */
+ ksft_test_result(arch, "%s_arch_set\n", type_name);
+ } else {
+ ksft_test_result_skip("%s_arch_set\n");
+ }
+}
+
static int do_child(void)
{
if (ptrace(PTRACE_TRACEME, -1, NULL, NULL))
@@ -207,6 +235,8 @@ static int do_parent(pid_t child)
ksft_print_msg("Parent is %d, child is %d\n", getpid(), child);
test_tpidr(child);
+ test_hw_debug(child, NT_ARM_HW_WATCH, "NT_ARM_HW_WATCH");
+ test_hw_debug(child, NT_ARM_HW_BREAK, "NT_ARM_HW_BREAK");
ret = EXIT_SUCCESS;
diff --git a/tools/testing/selftests/arm64/signal/.gitignore b/tools/testing/selftests/arm64/signal/.gitignore
index 8ab4c86837fd..839e3a252629 100644
--- a/tools/testing/selftests/arm64/signal/.gitignore
+++ b/tools/testing/selftests/arm64/signal/.gitignore
@@ -4,7 +4,7 @@ fake_sigreturn_*
sme_*
ssve_*
sve_*
-tpidr2_siginfo
+tpidr2_*
za_*
zt_*
!*.[ch]
diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.c b/tools/testing/selftests/arm64/signal/test_signals_utils.c
index 40be8443949d..0dc948db3a4a 100644
--- a/tools/testing/selftests/arm64/signal/test_signals_utils.c
+++ b/tools/testing/selftests/arm64/signal/test_signals_utils.c
@@ -249,7 +249,8 @@ static void default_handler(int signum, siginfo_t *si, void *uc)
fprintf(stderr, "-- Timeout !\n");
} else {
fprintf(stderr,
- "-- RX UNEXPECTED SIGNAL: %d\n", signum);
+ "-- RX UNEXPECTED SIGNAL: %d code %d address %p\n",
+ signum, si->si_code, si->si_addr);
}
default_result(current, 1);
}
diff --git a/tools/testing/selftests/arm64/signal/testcases/tpidr2_restore.c b/tools/testing/selftests/arm64/signal/testcases/tpidr2_restore.c
new file mode 100644
index 000000000000..f9a86c00c28c
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/tpidr2_restore.c
@@ -0,0 +1,86 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023 ARM Limited
+ *
+ * Verify that the TPIDR2 register context in signal frames is restored.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <unistd.h>
+#include <asm/sigcontext.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+#define SYS_TPIDR2 "S3_3_C13_C0_5"
+
+static uint64_t get_tpidr2(void)
+{
+ uint64_t val;
+
+ asm volatile (
+ "mrs %0, " SYS_TPIDR2 "\n"
+ : "=r"(val)
+ :
+ : "cc");
+
+ return val;
+}
+
+static void set_tpidr2(uint64_t val)
+{
+ asm volatile (
+ "msr " SYS_TPIDR2 ", %0\n"
+ :
+ : "r"(val)
+ : "cc");
+}
+
+
+static uint64_t initial_tpidr2;
+
+static bool save_tpidr2(struct tdescr *td)
+{
+ initial_tpidr2 = get_tpidr2();
+ fprintf(stderr, "Initial TPIDR2: %lx\n", initial_tpidr2);
+
+ return true;
+}
+
+static int modify_tpidr2(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+ uint64_t my_tpidr2 = get_tpidr2();
+
+ my_tpidr2++;
+ fprintf(stderr, "Setting TPIDR2 to %lx\n", my_tpidr2);
+ set_tpidr2(my_tpidr2);
+
+ return 0;
+}
+
+static void check_tpidr2(struct tdescr *td)
+{
+ uint64_t tpidr2 = get_tpidr2();
+
+ td->pass = tpidr2 == initial_tpidr2;
+
+ if (td->pass)
+ fprintf(stderr, "TPIDR2 restored\n");
+ else
+ fprintf(stderr, "TPIDR2 was %lx but is now %lx\n",
+ initial_tpidr2, tpidr2);
+}
+
+struct tdescr tde = {
+ .name = "TPIDR2 restore",
+ .descr = "Validate that TPIDR2 is restored from the sigframe",
+ .feats_required = FEAT_SME,
+ .timeout = 3,
+ .sig_trig = SIGUSR1,
+ .init = save_tpidr2,
+ .run = modify_tpidr2,
+ .check_result = check_tpidr2,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/zt_regs.c b/tools/testing/selftests/arm64/signal/testcases/zt_regs.c
index e1eb4d5c027a..2e384d731618 100644
--- a/tools/testing/selftests/arm64/signal/testcases/zt_regs.c
+++ b/tools/testing/selftests/arm64/signal/testcases/zt_regs.c
@@ -65,6 +65,7 @@ int zt_regs_run(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
if (memcmp(zeros, (char *)zt + ZT_SIG_REGS_OFFSET,
ZT_SIG_REGS_SIZE(zt->nregs)) != 0) {
fprintf(stderr, "ZT data invalid\n");
+ free(zeros);
return 1;
}
diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64
index 0a6837f97c32..08adc805878b 100644
--- a/tools/testing/selftests/bpf/DENYLIST.aarch64
+++ b/tools/testing/selftests/bpf/DENYLIST.aarch64
@@ -1,33 +1,6 @@
-bloom_filter_map # libbpf: prog 'check_bloom': failed to attach: ERROR: strerror_r(-524)=22
-bpf_cookie/lsm
-bpf_cookie/multi_kprobe_attach_api
-bpf_cookie/multi_kprobe_link_api
-bpf_cookie/trampoline
-bpf_loop/check_callback_fn_stop # link unexpected error: -524
-bpf_loop/check_invalid_flags
-bpf_loop/check_nested_calls
-bpf_loop/check_non_constant_callback
-bpf_loop/check_nr_loops
-bpf_loop/check_null_callback_ctx
-bpf_loop/check_stack
-bpf_mod_race # bpf_mod_kfunc_race__attach unexpected error: -524 (errno 524)
-bpf_tcp_ca/dctcp_fallback
-btf_dump/btf_dump: var_data # find type id unexpected find type id: actual -2 < expected 0
-cgroup_hierarchical_stats # attach unexpected error: -524 (errno 524)
-d_path/basic # setup attach failed: -524
-deny_namespace # attach unexpected error: -524 (errno 524)
-fentry_fexit # fentry_attach unexpected error: -1 (errno 524)
-fentry_test # fentry_attach unexpected error: -1 (errno 524)
-fexit_sleep # fexit_attach fexit attach failed: -1
-fexit_stress # fexit attach unexpected fexit attach: actual -524 < expected 0
-fexit_test # fexit_attach unexpected error: -1 (errno 524)
-get_func_args_test # get_func_args_test__attach unexpected error: -524 (errno 524) (trampoline)
-get_func_ip_test # get_func_ip_test__attach unexpected error: -524 (errno 524) (trampoline)
-htab_update/reenter_update
-kfree_skb # attach fentry unexpected error: -524 (trampoline)
-kfunc_call/subprog # extern (var ksym) 'bpf_prog_active': not found in kernel BTF
-kfunc_call/subprog_lskel # skel unexpected error: -2
-kfunc_dynptr_param/dynptr_data_null # libbpf: prog 'dynptr_data_null': failed to attach: ERROR: strerror_r(-524)=22
+bpf_cookie/multi_kprobe_attach_api # kprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3
+bpf_cookie/multi_kprobe_link_api # kprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3
+fexit_sleep # The test never returns. The remaining tests cannot start.
kprobe_multi_bench_attach # bpf_program__attach_kprobe_multi_opts unexpected error: -95
kprobe_multi_test/attach_api_addrs # bpf_program__attach_kprobe_multi_opts unexpected error: -95
kprobe_multi_test/attach_api_pattern # bpf_program__attach_kprobe_multi_opts unexpected error: -95
@@ -35,51 +8,5 @@ kprobe_multi_test/attach_api_syms # bpf_program__attach_kprobe_mu
kprobe_multi_test/bench_attach # bpf_program__attach_kprobe_multi_opts unexpected error: -95
kprobe_multi_test/link_api_addrs # link_fd unexpected link_fd: actual -95 < expected 0
kprobe_multi_test/link_api_syms # link_fd unexpected link_fd: actual -95 < expected 0
-kprobe_multi_test/skel_api # kprobe_multi__attach unexpected error: -524 (errno 524)
-ksyms_module/libbpf # 'bpf_testmod_ksym_percpu': not found in kernel BTF
-ksyms_module/lskel # test_ksyms_module_lskel__open_and_load unexpected error: -2
-libbpf_get_fd_by_id_opts # test_libbpf_get_fd_by_id_opts__attach unexpected error: -524 (errno 524)
-linked_list
-lookup_key # test_lookup_key__attach unexpected error: -524 (errno 524)
-lru_bug # lru_bug__attach unexpected error: -524 (errno 524)
-modify_return # modify_return__attach failed unexpected error: -524 (errno 524)
-module_attach # skel_attach skeleton attach failed: -524
-module_fentry_shadow # bpf_link_create unexpected bpf_link_create: actual -524 < expected 0
-mptcp/base # run_test mptcp unexpected error: -524 (errno 524)
-netcnt # packets unexpected packets: actual 10001 != expected 10000
-rcu_read_lock # failed to attach: ERROR: strerror_r(-524)=22
-recursion # skel_attach unexpected error: -524 (errno 524)
-ringbuf # skel_attach skeleton attachment failed: -1
-setget_sockopt # attach_cgroup unexpected error: -524
-sk_storage_tracing # test_sk_storage_tracing__attach unexpected error: -524 (errno 524)
-skc_to_unix_sock # could not attach BPF object unexpected error: -524 (errno 524)
-socket_cookie # prog_attach unexpected error: -524
-stacktrace_build_id # compare_stack_ips stackmap vs. stack_amap err -1 errno 2
-task_local_storage/exit_creds # skel_attach unexpected error: -524 (errno 524)
-task_local_storage/recursion # skel_attach unexpected error: -524 (errno 524)
-test_bprm_opts # attach attach failed: -524
-test_ima # attach attach failed: -524
-test_local_storage # attach lsm attach failed: -524
-test_lsm # test_lsm_first_attach unexpected error: -524 (errno 524)
-test_overhead # attach_fentry unexpected error: -524
-timer # timer unexpected error: -524 (errno 524)
-timer_crash # timer_crash__attach unexpected error: -524 (errno 524)
-timer_mim # timer_mim unexpected error: -524 (errno 524)
-trace_printk # trace_printk__attach unexpected error: -1 (errno 524)
-trace_vprintk # trace_vprintk__attach unexpected error: -1 (errno 524)
-tracing_struct # tracing_struct__attach unexpected error: -524 (errno 524)
-trampoline_count # attach_prog unexpected error: -524
-unpriv_bpf_disabled # skel_attach unexpected error: -524 (errno 524)
-user_ringbuf/test_user_ringbuf_post_misaligned # misaligned_skel unexpected error: -524 (errno 524)
-user_ringbuf/test_user_ringbuf_post_producer_wrong_offset
-user_ringbuf/test_user_ringbuf_post_larger_than_ringbuf_sz
-user_ringbuf/test_user_ringbuf_basic # ringbuf_basic_skel unexpected error: -524 (errno 524)
-user_ringbuf/test_user_ringbuf_sample_full_ring_buffer
-user_ringbuf/test_user_ringbuf_post_alignment_autoadjust
-user_ringbuf/test_user_ringbuf_overfill
-user_ringbuf/test_user_ringbuf_discards_properly_ignored
-user_ringbuf/test_user_ringbuf_loop
-user_ringbuf/test_user_ringbuf_msg_protocol
-user_ringbuf/test_user_ringbuf_blocking_reserve
-verify_pkcs7_sig # test_verify_pkcs7_sig__attach unexpected error: -524 (errno 524)
-vmlinux # skel_attach skeleton attach failed: -524
+kprobe_multi_test/skel_api # libbpf: failed to load BPF skeleton 'kprobe_multi': -3
+module_attach # prog 'kprobe_multi': failed to auto-attach: -95
diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
index c7463f3ec3c0..5061d9e24c16 100644
--- a/tools/testing/selftests/bpf/DENYLIST.s390x
+++ b/tools/testing/selftests/bpf/DENYLIST.s390x
@@ -26,3 +26,4 @@ user_ringbuf # failed to find kernel BTF type ID of
verif_stats # trace_vprintk__open_and_load unexpected error: -9 (?)
xdp_bonding # failed to auto-attach program 'trace_on_entry': -524 (trampoline)
xdp_metadata # JIT does not support calling kernel function (kfunc)
+test_task_under_cgroup # JIT does not support calling kernel function (kfunc)
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 28d2c77262be..538df8fb8c42 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -88,8 +88,7 @@ TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \
xskxceiver xdp_redirect_multi xdp_synproxy veristat xdp_hw_metadata \
xdp_features
-TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read $(OUTPUT)/sign-file
-TEST_GEN_FILES += liburandom_read.so
+TEST_GEN_FILES += liburandom_read.so urandom_read sign-file
# Emit succinct information message describing current building step
# $1 - generic step name (e.g., CC, LINK, etc);
diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index d9c080ac1796..41fe5a82b88b 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -17,7 +17,7 @@ struct env env = {
.duration_sec = 5,
.affinity = false,
.quiet = false,
- .consumer_cnt = 1,
+ .consumer_cnt = 0,
.producer_cnt = 1,
};
@@ -441,12 +441,14 @@ static void setup_timer()
static void set_thread_affinity(pthread_t thread, int cpu)
{
cpu_set_t cpuset;
+ int err;
CPU_ZERO(&cpuset);
CPU_SET(cpu, &cpuset);
- if (pthread_setaffinity_np(thread, sizeof(cpuset), &cpuset)) {
+ err = pthread_setaffinity_np(thread, sizeof(cpuset), &cpuset);
+ if (err) {
fprintf(stderr, "setting affinity to CPU #%d failed: %d\n",
- cpu, errno);
+ cpu, -err);
exit(1);
}
}
@@ -467,7 +469,7 @@ static int next_cpu(struct cpu_set *cpu_set)
exit(1);
}
- return cpu_set->next_cpu++;
+ return cpu_set->next_cpu++ % env.nr_cpus;
}
static struct bench_state {
@@ -605,7 +607,7 @@ static void setup_benchmark(void)
bench->consumer_thread, (void *)(long)i);
if (err) {
fprintf(stderr, "failed to create consumer thread #%d: %d\n",
- i, -errno);
+ i, -err);
exit(1);
}
if (env.affinity)
@@ -624,7 +626,7 @@ static void setup_benchmark(void)
bench->producer_thread, (void *)(long)i);
if (err) {
fprintf(stderr, "failed to create producer thread #%d: %d\n",
- i, -errno);
+ i, -err);
exit(1);
}
if (env.affinity)
@@ -657,6 +659,7 @@ static void collect_measurements(long delta_ns) {
int main(int argc, char **argv)
{
+ env.nr_cpus = get_nprocs();
parse_cmdline_args_init(argc, argv);
if (env.list) {
diff --git a/tools/testing/selftests/bpf/bench.h b/tools/testing/selftests/bpf/bench.h
index 402729c6a3ac..7ff32be3d730 100644
--- a/tools/testing/selftests/bpf/bench.h
+++ b/tools/testing/selftests/bpf/bench.h
@@ -27,6 +27,7 @@ struct env {
bool quiet;
int consumer_cnt;
int producer_cnt;
+ int nr_cpus;
struct cpu_set prod_cpus;
struct cpu_set cons_cpus;
};
diff --git a/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c b/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c
index 7c8ccc108313..e289dd1a14ee 100644
--- a/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c
+++ b/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c
@@ -107,9 +107,9 @@ const struct argp bench_bloom_map_argp = {
static void validate(void)
{
- if (env.consumer_cnt != 1) {
+ if (env.consumer_cnt != 0) {
fprintf(stderr,
- "The bloom filter benchmarks do not support multi-consumer use\n");
+ "The bloom filter benchmarks do not support consumer\n");
exit(1);
}
}
@@ -421,18 +421,12 @@ static void measure(struct bench_res *res)
last_false_hits = total_false_hits;
}
-static void *consumer(void *input)
-{
- return NULL;
-}
-
const struct bench bench_bloom_lookup = {
.name = "bloom-lookup",
.argp = &bench_bloom_map_argp,
.validate = validate,
.setup = bloom_lookup_setup,
.producer_thread = producer,
- .consumer_thread = consumer,
.measure = measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
@@ -444,7 +438,6 @@ const struct bench bench_bloom_update = {
.validate = validate,
.setup = bloom_update_setup,
.producer_thread = producer,
- .consumer_thread = consumer,
.measure = measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
@@ -456,7 +449,6 @@ const struct bench bench_bloom_false_positive = {
.validate = validate,
.setup = false_positive_setup,
.producer_thread = producer,
- .consumer_thread = consumer,
.measure = measure,
.report_progress = false_hits_report_progress,
.report_final = false_hits_report_final,
@@ -468,7 +460,6 @@ const struct bench bench_hashmap_without_bloom = {
.validate = validate,
.setup = hashmap_no_bloom_setup,
.producer_thread = producer,
- .consumer_thread = consumer,
.measure = measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
@@ -480,7 +471,6 @@ const struct bench bench_hashmap_with_bloom = {
.validate = validate,
.setup = hashmap_with_bloom_setup,
.producer_thread = producer,
- .consumer_thread = consumer,
.measure = measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
diff --git a/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c
index 75abe8137b6c..ee1dc12c5e5e 100644
--- a/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c
+++ b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c
@@ -14,8 +14,8 @@ static struct ctx {
static void validate(void)
{
- if (env.consumer_cnt != 1) {
- fprintf(stderr, "benchmark doesn't support multi-consumer!\n");
+ if (env.consumer_cnt != 0) {
+ fprintf(stderr, "benchmark doesn't support consumer!\n");
exit(1);
}
}
@@ -30,11 +30,6 @@ static void *producer(void *input)
return NULL;
}
-static void *consumer(void *input)
-{
- return NULL;
-}
-
static void measure(struct bench_res *res)
{
}
@@ -88,7 +83,6 @@ const struct bench bench_bpf_hashmap_full_update = {
.validate = validate,
.setup = setup,
.producer_thread = producer,
- .consumer_thread = consumer,
.measure = measure,
.report_progress = NULL,
.report_final = hashmap_report_final,
diff --git a/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_lookup.c b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_lookup.c
index 8dbb02f75cff..279ff1b8b5b2 100644
--- a/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_lookup.c
+++ b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_lookup.c
@@ -113,8 +113,8 @@ const struct argp bench_hashmap_lookup_argp = {
static void validate(void)
{
- if (env.consumer_cnt != 1) {
- fprintf(stderr, "benchmark doesn't support multi-consumer!\n");
+ if (env.consumer_cnt != 0) {
+ fprintf(stderr, "benchmark doesn't support consumer!\n");
exit(1);
}
@@ -134,11 +134,6 @@ static void *producer(void *input)
return NULL;
}
-static void *consumer(void *input)
-{
- return NULL;
-}
-
static void measure(struct bench_res *res)
{
}
@@ -276,7 +271,6 @@ const struct bench bench_bpf_hashmap_lookup = {
.validate = validate,
.setup = setup,
.producer_thread = producer,
- .consumer_thread = consumer,
.measure = measure,
.report_progress = NULL,
.report_final = hashmap_report_final,
diff --git a/tools/testing/selftests/bpf/benchs/bench_bpf_loop.c b/tools/testing/selftests/bpf/benchs/bench_bpf_loop.c
index d8a0394e10b1..a705cfb2bccc 100644
--- a/tools/testing/selftests/bpf/benchs/bench_bpf_loop.c
+++ b/tools/testing/selftests/bpf/benchs/bench_bpf_loop.c
@@ -47,8 +47,8 @@ const struct argp bench_bpf_loop_argp = {
static void validate(void)
{
- if (env.consumer_cnt != 1) {
- fprintf(stderr, "benchmark doesn't support multi-consumer!\n");
+ if (env.consumer_cnt != 0) {
+ fprintf(stderr, "benchmark doesn't support consumer!\n");
exit(1);
}
}
@@ -62,11 +62,6 @@ static void *producer(void *input)
return NULL;
}
-static void *consumer(void *input)
-{
- return NULL;
-}
-
static void measure(struct bench_res *res)
{
res->hits = atomic_swap(&ctx.skel->bss->hits, 0);
@@ -99,7 +94,6 @@ const struct bench bench_bpf_loop = {
.validate = validate,
.setup = setup,
.producer_thread = producer,
- .consumer_thread = consumer,
.measure = measure,
.report_progress = ops_report_progress,
.report_final = ops_report_final,
diff --git a/tools/testing/selftests/bpf/benchs/bench_count.c b/tools/testing/selftests/bpf/benchs/bench_count.c
index 078972ce208e..ba89ed3936b7 100644
--- a/tools/testing/selftests/bpf/benchs/bench_count.c
+++ b/tools/testing/selftests/bpf/benchs/bench_count.c
@@ -18,11 +18,6 @@ static void *count_global_producer(void *input)
return NULL;
}
-static void *count_global_consumer(void *input)
-{
- return NULL;
-}
-
static void count_global_measure(struct bench_res *res)
{
struct count_global_ctx *ctx = &count_global_ctx;
@@ -40,7 +35,7 @@ static void count_local_setup(void)
{
struct count_local_ctx *ctx = &count_local_ctx;
- ctx->hits = calloc(env.consumer_cnt, sizeof(*ctx->hits));
+ ctx->hits = calloc(env.producer_cnt, sizeof(*ctx->hits));
if (!ctx->hits)
exit(1);
}
@@ -56,11 +51,6 @@ static void *count_local_producer(void *input)
return NULL;
}
-static void *count_local_consumer(void *input)
-{
- return NULL;
-}
-
static void count_local_measure(struct bench_res *res)
{
struct count_local_ctx *ctx = &count_local_ctx;
@@ -74,7 +64,6 @@ static void count_local_measure(struct bench_res *res)
const struct bench bench_count_global = {
.name = "count-global",
.producer_thread = count_global_producer,
- .consumer_thread = count_global_consumer,
.measure = count_global_measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
@@ -84,7 +73,6 @@ const struct bench bench_count_local = {
.name = "count-local",
.setup = count_local_setup,
.producer_thread = count_local_producer,
- .consumer_thread = count_local_consumer,
.measure = count_local_measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage.c b/tools/testing/selftests/bpf/benchs/bench_local_storage.c
index d4b2817306d4..452499428ceb 100644
--- a/tools/testing/selftests/bpf/benchs/bench_local_storage.c
+++ b/tools/testing/selftests/bpf/benchs/bench_local_storage.c
@@ -74,8 +74,8 @@ static void validate(void)
fprintf(stderr, "benchmark doesn't support multi-producer!\n");
exit(1);
}
- if (env.consumer_cnt != 1) {
- fprintf(stderr, "benchmark doesn't support multi-consumer!\n");
+ if (env.consumer_cnt != 0) {
+ fprintf(stderr, "benchmark doesn't support consumer!\n");
exit(1);
}
@@ -230,11 +230,6 @@ static inline void trigger_bpf_program(void)
syscall(__NR_getpgid);
}
-static void *consumer(void *input)
-{
- return NULL;
-}
-
static void *producer(void *input)
{
while (true)
@@ -259,7 +254,6 @@ const struct bench bench_local_storage_cache_seq_get = {
.validate = validate,
.setup = local_storage_cache_get_setup,
.producer_thread = producer,
- .consumer_thread = consumer,
.measure = measure,
.report_progress = local_storage_report_progress,
.report_final = local_storage_report_final,
@@ -271,7 +265,6 @@ const struct bench bench_local_storage_cache_interleaved_get = {
.validate = validate,
.setup = local_storage_cache_get_interleaved_setup,
.producer_thread = producer,
- .consumer_thread = consumer,
.measure = measure,
.report_progress = local_storage_report_progress,
.report_final = local_storage_report_final,
@@ -283,7 +276,6 @@ const struct bench bench_local_storage_cache_hashmap_control = {
.validate = validate,
.setup = hashmap_setup,
.producer_thread = producer,
- .consumer_thread = consumer,
.measure = measure,
.report_progress = local_storage_report_progress,
.report_final = local_storage_report_final,
diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c b/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c
index cff703f90e95..b36de42ee4d9 100644
--- a/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c
+++ b/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c
@@ -71,7 +71,7 @@ const struct argp bench_local_storage_create_argp = {
static void validate(void)
{
- if (env.consumer_cnt > 1) {
+ if (env.consumer_cnt != 0) {
fprintf(stderr,
"local-storage-create benchmark does not need consumer\n");
exit(1);
@@ -143,11 +143,6 @@ static void measure(struct bench_res *res)
res->drops = atomic_swap(&skel->bss->kmalloc_cnts, 0);
}
-static void *consumer(void *input)
-{
- return NULL;
-}
-
static void *sk_producer(void *input)
{
struct thread *t = &threads[(long)(input)];
@@ -257,7 +252,6 @@ const struct bench bench_local_storage_create = {
.validate = validate,
.setup = setup,
.producer_thread = producer,
- .consumer_thread = consumer,
.measure = measure,
.report_progress = report_progress,
.report_final = report_final,
diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c b/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c
index d5eb5587f2aa..edf0b00418c1 100644
--- a/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c
+++ b/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c
@@ -72,8 +72,8 @@ static void validate(void)
fprintf(stderr, "benchmark doesn't support multi-producer!\n");
exit(1);
}
- if (env.consumer_cnt != 1) {
- fprintf(stderr, "benchmark doesn't support multi-consumer!\n");
+ if (env.consumer_cnt != 0) {
+ fprintf(stderr, "benchmark doesn't support consumer!\n");
exit(1);
}
@@ -197,11 +197,6 @@ static void measure(struct bench_res *res)
ctx.prev_kthread_stime = ticks;
}
-static void *consumer(void *input)
-{
- return NULL;
-}
-
static void *producer(void *input)
{
while (true)
@@ -262,7 +257,6 @@ const struct bench bench_local_storage_tasks_trace = {
.validate = validate,
.setup = local_storage_tasks_trace_setup,
.producer_thread = producer,
- .consumer_thread = consumer,
.measure = measure,
.report_progress = report_progress,
.report_final = report_final,
diff --git a/tools/testing/selftests/bpf/benchs/bench_rename.c b/tools/testing/selftests/bpf/benchs/bench_rename.c
index 3c203b6d6a6e..bf66893c7a33 100644
--- a/tools/testing/selftests/bpf/benchs/bench_rename.c
+++ b/tools/testing/selftests/bpf/benchs/bench_rename.c
@@ -17,8 +17,8 @@ static void validate(void)
fprintf(stderr, "benchmark doesn't support multi-producer!\n");
exit(1);
}
- if (env.consumer_cnt != 1) {
- fprintf(stderr, "benchmark doesn't support multi-consumer!\n");
+ if (env.consumer_cnt != 0) {
+ fprintf(stderr, "benchmark doesn't support consumer!\n");
exit(1);
}
}
@@ -106,17 +106,11 @@ static void setup_fexit(void)
attach_bpf(ctx.skel->progs.prog5);
}
-static void *consumer(void *input)
-{
- return NULL;
-}
-
const struct bench bench_rename_base = {
.name = "rename-base",
.validate = validate,
.setup = setup_base,
.producer_thread = producer,
- .consumer_thread = consumer,
.measure = measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
@@ -127,7 +121,6 @@ const struct bench bench_rename_kprobe = {
.validate = validate,
.setup = setup_kprobe,
.producer_thread = producer,
- .consumer_thread = consumer,
.measure = measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
@@ -138,7 +131,6 @@ const struct bench bench_rename_kretprobe = {
.validate = validate,
.setup = setup_kretprobe,
.producer_thread = producer,
- .consumer_thread = consumer,
.measure = measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
@@ -149,7 +141,6 @@ const struct bench bench_rename_rawtp = {
.validate = validate,
.setup = setup_rawtp,
.producer_thread = producer,
- .consumer_thread = consumer,
.measure = measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
@@ -160,7 +151,6 @@ const struct bench bench_rename_fentry = {
.validate = validate,
.setup = setup_fentry,
.producer_thread = producer,
- .consumer_thread = consumer,
.measure = measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
@@ -171,7 +161,6 @@ const struct bench bench_rename_fexit = {
.validate = validate,
.setup = setup_fexit,
.producer_thread = producer,
- .consumer_thread = consumer,
.measure = measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
diff --git a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
index fc91fdac4faa..3ca14ad36607 100644
--- a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
+++ b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
@@ -96,7 +96,7 @@ static inline void bufs_trigger_batch(void)
static void bufs_validate(void)
{
if (env.consumer_cnt != 1) {
- fprintf(stderr, "rb-libbpf benchmark doesn't support multi-consumer!\n");
+ fprintf(stderr, "rb-libbpf benchmark needs one consumer!\n");
exit(1);
}
diff --git a/tools/testing/selftests/bpf/benchs/bench_strncmp.c b/tools/testing/selftests/bpf/benchs/bench_strncmp.c
index d3fad2ba6916..a5e1428fd7a0 100644
--- a/tools/testing/selftests/bpf/benchs/bench_strncmp.c
+++ b/tools/testing/selftests/bpf/benchs/bench_strncmp.c
@@ -50,8 +50,8 @@ const struct argp bench_strncmp_argp = {
static void strncmp_validate(void)
{
- if (env.consumer_cnt != 1) {
- fprintf(stderr, "strncmp benchmark doesn't support multi-consumer!\n");
+ if (env.consumer_cnt != 0) {
+ fprintf(stderr, "strncmp benchmark doesn't support consumer!\n");
exit(1);
}
}
@@ -128,11 +128,6 @@ static void *strncmp_producer(void *ctx)
return NULL;
}
-static void *strncmp_consumer(void *ctx)
-{
- return NULL;
-}
-
static void strncmp_measure(struct bench_res *res)
{
res->hits = atomic_swap(&ctx.skel->bss->hits, 0);
@@ -144,7 +139,6 @@ const struct bench bench_strncmp_no_helper = {
.validate = strncmp_validate,
.setup = strncmp_no_helper_setup,
.producer_thread = strncmp_producer,
- .consumer_thread = strncmp_consumer,
.measure = strncmp_measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
@@ -156,7 +150,6 @@ const struct bench bench_strncmp_helper = {
.validate = strncmp_validate,
.setup = strncmp_helper_setup,
.producer_thread = strncmp_producer,
- .consumer_thread = strncmp_consumer,
.measure = strncmp_measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c
index 0c481de2833d..dbd362771d6a 100644
--- a/tools/testing/selftests/bpf/benchs/bench_trigger.c
+++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c
@@ -13,8 +13,8 @@ static struct counter base_hits;
static void trigger_validate(void)
{
- if (env.consumer_cnt != 1) {
- fprintf(stderr, "benchmark doesn't support multi-consumer!\n");
+ if (env.consumer_cnt != 0) {
+ fprintf(stderr, "benchmark doesn't support consumer!\n");
exit(1);
}
}
@@ -103,11 +103,6 @@ static void trigger_fmodret_setup(void)
attach_bpf(ctx.skel->progs.bench_trigger_fmodret);
}
-static void *trigger_consumer(void *input)
-{
- return NULL;
-}
-
/* make sure call is not inlined and not avoided by compiler, so __weak and
* inline asm volatile in the body of the function
*
@@ -205,7 +200,6 @@ const struct bench bench_trig_base = {
.name = "trig-base",
.validate = trigger_validate,
.producer_thread = trigger_base_producer,
- .consumer_thread = trigger_consumer,
.measure = trigger_base_measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
@@ -216,7 +210,6 @@ const struct bench bench_trig_tp = {
.validate = trigger_validate,
.setup = trigger_tp_setup,
.producer_thread = trigger_producer,
- .consumer_thread = trigger_consumer,
.measure = trigger_measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
@@ -227,7 +220,6 @@ const struct bench bench_trig_rawtp = {
.validate = trigger_validate,
.setup = trigger_rawtp_setup,
.producer_thread = trigger_producer,
- .consumer_thread = trigger_consumer,
.measure = trigger_measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
@@ -238,7 +230,6 @@ const struct bench bench_trig_kprobe = {
.validate = trigger_validate,
.setup = trigger_kprobe_setup,
.producer_thread = trigger_producer,
- .consumer_thread = trigger_consumer,
.measure = trigger_measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
@@ -249,7 +240,6 @@ const struct bench bench_trig_fentry = {
.validate = trigger_validate,
.setup = trigger_fentry_setup,
.producer_thread = trigger_producer,
- .consumer_thread = trigger_consumer,
.measure = trigger_measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
@@ -260,7 +250,6 @@ const struct bench bench_trig_fentry_sleep = {
.validate = trigger_validate,
.setup = trigger_fentry_sleep_setup,
.producer_thread = trigger_producer,
- .consumer_thread = trigger_consumer,
.measure = trigger_measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
@@ -271,7 +260,6 @@ const struct bench bench_trig_fmodret = {
.validate = trigger_validate,
.setup = trigger_fmodret_setup,
.producer_thread = trigger_producer,
- .consumer_thread = trigger_consumer,
.measure = trigger_measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
@@ -281,7 +269,6 @@ const struct bench bench_trig_uprobe_base = {
.name = "trig-uprobe-base",
.setup = NULL, /* no uprobe/uretprobe is attached */
.producer_thread = uprobe_base_producer,
- .consumer_thread = trigger_consumer,
.measure = trigger_base_measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
@@ -291,7 +278,6 @@ const struct bench bench_trig_uprobe_with_nop = {
.name = "trig-uprobe-with-nop",
.setup = uprobe_setup_with_nop,
.producer_thread = uprobe_producer_with_nop,
- .consumer_thread = trigger_consumer,
.measure = trigger_measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
@@ -301,7 +287,6 @@ const struct bench bench_trig_uretprobe_with_nop = {
.name = "trig-uretprobe-with-nop",
.setup = uretprobe_setup_with_nop,
.producer_thread = uprobe_producer_with_nop,
- .consumer_thread = trigger_consumer,
.measure = trigger_measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
@@ -311,7 +296,6 @@ const struct bench bench_trig_uprobe_without_nop = {
.name = "trig-uprobe-without-nop",
.setup = uprobe_setup_without_nop,
.producer_thread = uprobe_producer_without_nop,
- .consumer_thread = trigger_consumer,
.measure = trigger_measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
@@ -321,7 +305,6 @@ const struct bench bench_trig_uretprobe_without_nop = {
.name = "trig-uretprobe-without-nop",
.setup = uretprobe_setup_without_nop,
.producer_thread = uprobe_producer_without_nop,
- .consumer_thread = trigger_consumer,
.measure = trigger_measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_rename.sh b/tools/testing/selftests/bpf/benchs/run_bench_rename.sh
index 16f774b1cdbe..7b281dbe4165 100755
--- a/tools/testing/selftests/bpf/benchs/run_bench_rename.sh
+++ b/tools/testing/selftests/bpf/benchs/run_bench_rename.sh
@@ -2,7 +2,7 @@
set -eufo pipefail
-for i in base kprobe kretprobe rawtp fentry fexit fmodret
+for i in base kprobe kretprobe rawtp fentry fexit
do
summary=$(sudo ./bench -w2 -d5 -a rename-$i | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-)
printf "%-10s: %s\n" $i "$summary"
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh
index ada028aa9007..91e3567962ff 100755
--- a/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh
+++ b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh
@@ -4,46 +4,48 @@ source ./benchs/run_common.sh
set -eufo pipefail
+RUN_RB_BENCH="$RUN_BENCH -c1"
+
header "Single-producer, parallel producer"
for b in rb-libbpf rb-custom pb-libbpf pb-custom; do
- summarize $b "$($RUN_BENCH $b)"
+ summarize $b "$($RUN_RB_BENCH $b)"
done
header "Single-producer, parallel producer, sampled notification"
for b in rb-libbpf rb-custom pb-libbpf pb-custom; do
- summarize $b "$($RUN_BENCH --rb-sampled $b)"
+ summarize $b "$($RUN_RB_BENCH --rb-sampled $b)"
done
header "Single-producer, back-to-back mode"
for b in rb-libbpf rb-custom pb-libbpf pb-custom; do
- summarize $b "$($RUN_BENCH --rb-b2b $b)"
- summarize $b-sampled "$($RUN_BENCH --rb-sampled --rb-b2b $b)"
+ summarize $b "$($RUN_RB_BENCH --rb-b2b $b)"
+ summarize $b-sampled "$($RUN_RB_BENCH --rb-sampled --rb-b2b $b)"
done
header "Ringbuf back-to-back, effect of sample rate"
for b in 1 5 10 25 50 100 250 500 1000 2000 3000; do
- summarize "rb-sampled-$b" "$($RUN_BENCH --rb-b2b --rb-batch-cnt $b --rb-sampled --rb-sample-rate $b rb-custom)"
+ summarize "rb-sampled-$b" "$($RUN_RB_BENCH --rb-b2b --rb-batch-cnt $b --rb-sampled --rb-sample-rate $b rb-custom)"
done
header "Perfbuf back-to-back, effect of sample rate"
for b in 1 5 10 25 50 100 250 500 1000 2000 3000; do
- summarize "pb-sampled-$b" "$($RUN_BENCH --rb-b2b --rb-batch-cnt $b --rb-sampled --rb-sample-rate $b pb-custom)"
+ summarize "pb-sampled-$b" "$($RUN_RB_BENCH --rb-b2b --rb-batch-cnt $b --rb-sampled --rb-sample-rate $b pb-custom)"
done
header "Ringbuf back-to-back, reserve+commit vs output"
-summarize "reserve" "$($RUN_BENCH --rb-b2b rb-custom)"
-summarize "output" "$($RUN_BENCH --rb-b2b --rb-use-output rb-custom)"
+summarize "reserve" "$($RUN_RB_BENCH --rb-b2b rb-custom)"
+summarize "output" "$($RUN_RB_BENCH --rb-b2b --rb-use-output rb-custom)"
header "Ringbuf sampled, reserve+commit vs output"
-summarize "reserve-sampled" "$($RUN_BENCH --rb-sampled rb-custom)"
-summarize "output-sampled" "$($RUN_BENCH --rb-sampled --rb-use-output rb-custom)"
+summarize "reserve-sampled" "$($RUN_RB_BENCH --rb-sampled rb-custom)"
+summarize "output-sampled" "$($RUN_RB_BENCH --rb-sampled --rb-use-output rb-custom)"
header "Single-producer, consumer/producer competing on the same CPU, low batch count"
for b in rb-libbpf rb-custom pb-libbpf pb-custom; do
- summarize $b "$($RUN_BENCH --rb-batch-cnt 1 --rb-sample-rate 1 --prod-affinity 0 --cons-affinity 0 $b)"
+ summarize $b "$($RUN_RB_BENCH --rb-batch-cnt 1 --rb-sample-rate 1 --prod-affinity 0 --cons-affinity 0 $b)"
done
header "Ringbuf, multi-producer contention"
for b in 1 2 3 4 8 12 16 20 24 28 32 36 40 44 48 52; do
- summarize "rb-libbpf nr_prod $b" "$($RUN_BENCH -p$b --rb-batch-cnt 50 rb-libbpf)"
+ summarize "rb-libbpf nr_prod $b" "$($RUN_RB_BENCH -p$b --rb-batch-cnt 50 rb-libbpf)"
done
diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h
index 8c993ec8ceea..642dda0e758a 100644
--- a/tools/testing/selftests/bpf/bpf_kfuncs.h
+++ b/tools/testing/selftests/bpf/bpf_kfuncs.h
@@ -35,4 +35,10 @@ extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, __u32 offset,
extern void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *ptr, __u32 offset,
void *buffer, __u32 buffer__szk) __ksym;
+extern int bpf_dynptr_adjust(const struct bpf_dynptr *ptr, __u32 start, __u32 end) __ksym;
+extern bool bpf_dynptr_is_null(const struct bpf_dynptr *ptr) __ksym;
+extern bool bpf_dynptr_is_rdonly(const struct bpf_dynptr *ptr) __ksym;
+extern __u32 bpf_dynptr_size(const struct bpf_dynptr *ptr) __ksym;
+extern int bpf_dynptr_clone(const struct bpf_dynptr *ptr, struct bpf_dynptr *clone__init) __ksym;
+
#endif
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
index 52785ba671e6..aaf6ef1201c7 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
@@ -9,6 +9,7 @@
#include <linux/sysfs.h>
#include <linux/tracepoint.h>
#include "bpf_testmod.h"
+#include "bpf_testmod_kfunc.h"
#define CREATE_TRACE_POINTS
#include "bpf_testmod-events.h"
@@ -190,8 +191,6 @@ noinline int bpf_testmod_fentry_test3(char a, int b, u64 c)
return a + b + c;
}
-__diag_pop();
-
int bpf_testmod_fentry_ok;
noinline ssize_t
@@ -272,6 +271,14 @@ bpf_testmod_test_write(struct file *file, struct kobject *kobj,
EXPORT_SYMBOL(bpf_testmod_test_write);
ALLOW_ERROR_INJECTION(bpf_testmod_test_write, ERRNO);
+noinline int bpf_fentry_shadow_test(int a)
+{
+ return a + 2;
+}
+EXPORT_SYMBOL_GPL(bpf_fentry_shadow_test);
+
+__diag_pop();
+
static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = {
.attr = { .name = "bpf_testmod", .mode = 0666, },
.read = bpf_testmod_test_read,
@@ -289,8 +296,171 @@ static const struct btf_kfunc_id_set bpf_testmod_common_kfunc_set = {
.set = &bpf_testmod_common_kfunc_ids,
};
+__bpf_kfunc u64 bpf_kfunc_call_test1(struct sock *sk, u32 a, u64 b, u32 c, u64 d)
+{
+ return a + b + c + d;
+}
+
+__bpf_kfunc int bpf_kfunc_call_test2(struct sock *sk, u32 a, u32 b)
+{
+ return a + b;
+}
+
+__bpf_kfunc struct sock *bpf_kfunc_call_test3(struct sock *sk)
+{
+ return sk;
+}
+
+__bpf_kfunc long noinline bpf_kfunc_call_test4(signed char a, short b, int c, long d)
+{
+ /* Provoke the compiler to assume that the caller has sign-extended a,
+ * b and c on platforms where this is required (e.g. s390x).
+ */
+ return (long)a + (long)b + (long)c + d;
+}
+
+static struct prog_test_ref_kfunc prog_test_struct = {
+ .a = 42,
+ .b = 108,
+ .next = &prog_test_struct,
+ .cnt = REFCOUNT_INIT(1),
+};
+
+__bpf_kfunc struct prog_test_ref_kfunc *
+bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr)
+{
+ refcount_inc(&prog_test_struct.cnt);
+ return &prog_test_struct;
+}
+
+__bpf_kfunc void bpf_kfunc_call_test_offset(struct prog_test_ref_kfunc *p)
+{
+ WARN_ON_ONCE(1);
+}
+
+__bpf_kfunc struct prog_test_member *
+bpf_kfunc_call_memb_acquire(void)
+{
+ WARN_ON_ONCE(1);
+ return NULL;
+}
+
+__bpf_kfunc void bpf_kfunc_call_memb1_release(struct prog_test_member1 *p)
+{
+ WARN_ON_ONCE(1);
+}
+
+static int *__bpf_kfunc_call_test_get_mem(struct prog_test_ref_kfunc *p, const int size)
+{
+ if (size > 2 * sizeof(int))
+ return NULL;
+
+ return (int *)p;
+}
+
+__bpf_kfunc int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p,
+ const int rdwr_buf_size)
+{
+ return __bpf_kfunc_call_test_get_mem(p, rdwr_buf_size);
+}
+
+__bpf_kfunc int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p,
+ const int rdonly_buf_size)
+{
+ return __bpf_kfunc_call_test_get_mem(p, rdonly_buf_size);
+}
+
+/* the next 2 ones can't be really used for testing expect to ensure
+ * that the verifier rejects the call.
+ * Acquire functions must return struct pointers, so these ones are
+ * failing.
+ */
+__bpf_kfunc int *bpf_kfunc_call_test_acq_rdonly_mem(struct prog_test_ref_kfunc *p,
+ const int rdonly_buf_size)
+{
+ return __bpf_kfunc_call_test_get_mem(p, rdonly_buf_size);
+}
+
+__bpf_kfunc void bpf_kfunc_call_int_mem_release(int *p)
+{
+}
+
+__bpf_kfunc void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb)
+{
+}
+
+__bpf_kfunc void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p)
+{
+}
+
+__bpf_kfunc void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p)
+{
+}
+
+__bpf_kfunc void bpf_kfunc_call_test_fail1(struct prog_test_fail1 *p)
+{
+}
+
+__bpf_kfunc void bpf_kfunc_call_test_fail2(struct prog_test_fail2 *p)
+{
+}
+
+__bpf_kfunc void bpf_kfunc_call_test_fail3(struct prog_test_fail3 *p)
+{
+}
+
+__bpf_kfunc void bpf_kfunc_call_test_mem_len_pass1(void *mem, int mem__sz)
+{
+}
+
+__bpf_kfunc void bpf_kfunc_call_test_mem_len_fail1(void *mem, int len)
+{
+}
+
+__bpf_kfunc void bpf_kfunc_call_test_mem_len_fail2(u64 *mem, int len)
+{
+}
+
+__bpf_kfunc void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p)
+{
+ /* p != NULL, but p->cnt could be 0 */
+}
+
+__bpf_kfunc void bpf_kfunc_call_test_destructive(void)
+{
+}
+
+__bpf_kfunc static u32 bpf_kfunc_call_test_static_unused_arg(u32 arg, u32 unused)
+{
+ return arg;
+}
+
BTF_SET8_START(bpf_testmod_check_kfunc_ids)
BTF_ID_FLAGS(func, bpf_testmod_test_mod_kfunc)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test1)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test2)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test3)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test4)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_pass1)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail1)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail2)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_acquire, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_kfunc_call_memb_acquire, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_kfunc_call_memb1_release, KF_RELEASE)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_get_rdwr_mem, KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_get_rdonly_mem, KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_acq_rdonly_mem, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_kfunc_call_int_mem_release, KF_RELEASE)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass_ctx)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass1)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass2)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail1)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail2)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail3)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_ref, KF_TRUSTED_ARGS | KF_RCU)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_destructive, KF_DESTRUCTIVE)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_static_unused_arg)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_offset)
BTF_SET8_END(bpf_testmod_check_kfunc_ids)
static const struct btf_kfunc_id_set bpf_testmod_kfunc_set = {
@@ -298,12 +468,6 @@ static const struct btf_kfunc_id_set bpf_testmod_kfunc_set = {
.set = &bpf_testmod_check_kfunc_ids,
};
-noinline int bpf_fentry_shadow_test(int a)
-{
- return a + 2;
-}
-EXPORT_SYMBOL_GPL(bpf_fentry_shadow_test);
-
extern int bpf_fentry_test1(int a);
static int bpf_testmod_init(void)
@@ -312,6 +476,8 @@ static int bpf_testmod_init(void)
ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC, &bpf_testmod_common_kfunc_set);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_testmod_kfunc_set);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_testmod_kfunc_set);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SYSCALL, &bpf_testmod_kfunc_set);
if (ret < 0)
return ret;
if (bpf_fentry_test1(0) < 0)
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h
new file mode 100644
index 000000000000..f5c5b1375c24
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _BPF_TESTMOD_KFUNC_H
+#define _BPF_TESTMOD_KFUNC_H
+
+#ifndef __KERNEL__
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#else
+#define __ksym
+struct prog_test_member1 {
+ int a;
+};
+
+struct prog_test_member {
+ struct prog_test_member1 m;
+ int c;
+};
+
+struct prog_test_ref_kfunc {
+ int a;
+ int b;
+ struct prog_test_member memb;
+ struct prog_test_ref_kfunc *next;
+ refcount_t cnt;
+};
+#endif
+
+struct prog_test_pass1 {
+ int x0;
+ struct {
+ int x1;
+ struct {
+ int x2;
+ struct {
+ int x3;
+ };
+ };
+ };
+};
+
+struct prog_test_pass2 {
+ int len;
+ short arr1[4];
+ struct {
+ char arr2[4];
+ unsigned long arr3[8];
+ } x;
+};
+
+struct prog_test_fail1 {
+ void *p;
+ int x;
+};
+
+struct prog_test_fail2 {
+ int x8;
+ struct prog_test_pass1 x;
+};
+
+struct prog_test_fail3 {
+ int len;
+ char arr1[2];
+ char arr2[];
+};
+
+struct prog_test_ref_kfunc *
+bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr) __ksym;
+void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym;
+void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p) __ksym;
+
+void bpf_kfunc_call_test_mem_len_pass1(void *mem, int len) __ksym;
+int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p, const int rdwr_buf_size) __ksym;
+int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size) __ksym;
+int *bpf_kfunc_call_test_acq_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size) __ksym;
+void bpf_kfunc_call_int_mem_release(int *p) __ksym;
+
+/* The bpf_kfunc_call_test_static_unused_arg is defined as static,
+ * but bpf program compilation needs to see it as global symbol.
+ */
+#ifndef __KERNEL__
+u32 bpf_kfunc_call_test_static_unused_arg(u32 arg, u32 unused) __ksym;
+#endif
+
+void bpf_testmod_test_mod_kfunc(int i) __ksym;
+
+__u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b,
+ __u32 c, __u64 d) __ksym;
+int bpf_kfunc_call_test2(struct sock *sk, __u32 a, __u32 b) __ksym;
+struct sock *bpf_kfunc_call_test3(struct sock *sk) __ksym;
+long bpf_kfunc_call_test4(signed char a, short b, int c, long d) __ksym;
+
+void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb) __ksym;
+void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p) __ksym;
+void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p) __ksym;
+void bpf_kfunc_call_test_mem_len_fail2(__u64 *mem, int len) __ksym;
+
+void bpf_kfunc_call_test_destructive(void) __ksym;
+
+void bpf_kfunc_call_test_offset(struct prog_test_ref_kfunc *p);
+struct prog_test_member *bpf_kfunc_call_memb_acquire(void);
+void bpf_kfunc_call_memb1_release(struct prog_test_member1 *p);
+void bpf_kfunc_call_test_fail1(struct prog_test_fail1 *p);
+void bpf_kfunc_call_test_fail2(struct prog_test_fail2 *p);
+void bpf_kfunc_call_test_fail3(struct prog_test_fail3 *p);
+void bpf_kfunc_call_test_mem_len_fail1(void *mem, int len);
+#endif /* _BPF_TESTMOD_KFUNC_H */
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 63cd4ab70171..3b350bc31343 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -13,6 +13,9 @@ CONFIG_CGROUP_BPF=y
CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_SHA256=y
CONFIG_CRYPTO_USER_API_HASH=y
+CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_BTF=y
+CONFIG_DEBUG_INFO_DWARF4=y
CONFIG_DYNAMIC_FTRACE=y
CONFIG_FPROBE=y
CONFIG_FTRACE_SYSCALLS=y
@@ -60,6 +63,7 @@ CONFIG_NET_SCH_INGRESS=y
CONFIG_NET_SCHED=y
CONFIG_NETDEVSIM=y
CONFIG_NETFILTER=y
+CONFIG_NETFILTER_ADVANCED=y
CONFIG_NETFILTER_SYNPROXY=y
CONFIG_NETFILTER_XT_CONNMARK=y
CONFIG_NETFILTER_XT_MATCH_STATE=y
diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
index 596caa176582..a105c0cd008a 100644
--- a/tools/testing/selftests/bpf/network_helpers.c
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -427,3 +427,26 @@ void close_netns(struct nstoken *token)
close(token->orig_netns_fd);
free(token);
}
+
+int get_socket_local_port(int sock_fd)
+{
+ struct sockaddr_storage addr;
+ socklen_t addrlen = sizeof(addr);
+ int err;
+
+ err = getsockname(sock_fd, (struct sockaddr *)&addr, &addrlen);
+ if (err < 0)
+ return err;
+
+ if (addr.ss_family == AF_INET) {
+ struct sockaddr_in *sin = (struct sockaddr_in *)&addr;
+
+ return sin->sin_port;
+ } else if (addr.ss_family == AF_INET6) {
+ struct sockaddr_in6 *sin = (struct sockaddr_in6 *)&addr;
+
+ return sin->sin6_port;
+ }
+
+ return -1;
+}
diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h
index f882c691b790..694185644da6 100644
--- a/tools/testing/selftests/bpf/network_helpers.h
+++ b/tools/testing/selftests/bpf/network_helpers.h
@@ -56,6 +56,7 @@ int fastopen_connect(int server_fd, const char *data, unsigned int data_len,
int make_sockaddr(int family, const char *addr_str, __u16 port,
struct sockaddr_storage *addr, socklen_t *len);
char *ping_command(int family);
+int get_socket_local_port(int sock_fd);
struct nstoken;
/**
diff --git a/tools/testing/selftests/bpf/prog_tests/arg_parsing.c b/tools/testing/selftests/bpf/prog_tests/arg_parsing.c
index b17bfa0e0aac..bb143de68875 100644
--- a/tools/testing/selftests/bpf/prog_tests/arg_parsing.c
+++ b/tools/testing/selftests/bpf/prog_tests/arg_parsing.c
@@ -96,12 +96,80 @@ static void test_parse_test_list(void)
goto error;
ASSERT_OK(strcmp("*bpf_cookie*", set.tests[0].name), "test name");
ASSERT_OK(strcmp("*trace*", set.tests[0].subtests[0]), "subtest name");
+ free_test_filter_set(&set);
+
+ ASSERT_OK(parse_test_list("t/subtest1,t/subtest2", &set, true),
+ "parsing");
+ if (!ASSERT_EQ(set.cnt, 1, "count of test filters"))
+ goto error;
+ if (!ASSERT_OK_PTR(set.tests, "test filters initialized"))
+ goto error;
+ if (!ASSERT_EQ(set.tests[0].subtest_cnt, 2, "subtest filters count"))
+ goto error;
+ ASSERT_OK(strcmp("t", set.tests[0].name), "test name");
+ ASSERT_OK(strcmp("subtest1", set.tests[0].subtests[0]), "subtest name");
+ ASSERT_OK(strcmp("subtest2", set.tests[0].subtests[1]), "subtest name");
error:
free_test_filter_set(&set);
}
+static void test_parse_test_list_file(void)
+{
+ struct test_filter_set set;
+ char tmpfile[80];
+ FILE *fp;
+ int fd;
+
+ snprintf(tmpfile, sizeof(tmpfile), "/tmp/bpf_arg_parsing_test.XXXXXX");
+ fd = mkstemp(tmpfile);
+ if (!ASSERT_GE(fd, 0, "create tmp"))
+ return;
+
+ fp = fdopen(fd, "w");
+ if (!ASSERT_NEQ(fp, NULL, "fdopen tmp")) {
+ close(fd);
+ goto out_remove;
+ }
+
+ fprintf(fp, "# comment\n");
+ fprintf(fp, " test_with_spaces \n");
+ fprintf(fp, "testA/subtest # comment\n");
+ fprintf(fp, "testB#comment with no space\n");
+ fprintf(fp, "testB # duplicate\n");
+ fprintf(fp, "testA/subtest # subtest duplicate\n");
+ fprintf(fp, "testA/subtest2\n");
+ fprintf(fp, "testC_no_eof_newline");
+ fflush(fp);
+
+ if (!ASSERT_OK(ferror(fp), "prepare tmp"))
+ goto out_fclose;
+
+ init_test_filter_set(&set);
+
+ ASSERT_OK(parse_test_list_file(tmpfile, &set, true), "parse file");
+
+ ASSERT_EQ(set.cnt, 4, "test count");
+ ASSERT_OK(strcmp("test_with_spaces", set.tests[0].name), "test 0 name");
+ ASSERT_EQ(set.tests[0].subtest_cnt, 0, "test 0 subtest count");
+ ASSERT_OK(strcmp("testA", set.tests[1].name), "test 1 name");
+ ASSERT_EQ(set.tests[1].subtest_cnt, 2, "test 1 subtest count");
+ ASSERT_OK(strcmp("subtest", set.tests[1].subtests[0]), "test 1 subtest 0");
+ ASSERT_OK(strcmp("subtest2", set.tests[1].subtests[1]), "test 1 subtest 1");
+ ASSERT_OK(strcmp("testB", set.tests[2].name), "test 2 name");
+ ASSERT_OK(strcmp("testC_no_eof_newline", set.tests[3].name), "test 3 name");
+
+ free_test_filter_set(&set);
+
+out_fclose:
+ fclose(fp);
+out_remove:
+ remove(tmpfile);
+}
+
void test_arg_parsing(void)
{
if (test__start_subtest("test_parse_test_list"))
test_parse_test_list();
+ if (test__start_subtest("test_parse_test_list_file"))
+ test_parse_test_list_file();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/async_stack_depth.c b/tools/testing/selftests/bpf/prog_tests/async_stack_depth.c
new file mode 100644
index 000000000000..118abc29b236
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/async_stack_depth.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+
+#include "async_stack_depth.skel.h"
+
+void test_async_stack_depth(void)
+{
+ RUN_TESTS(async_stack_depth);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c b/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c
index a4d0cc9d3367..fe2c502e5089 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c
@@ -11,6 +11,7 @@
#include "ksym_race.skel.h"
#include "bpf_mod_race.skel.h"
#include "kfunc_call_race.skel.h"
+#include "testing_helpers.h"
/* This test crafts a race between btf_try_get_module and do_init_module, and
* checks whether btf_try_get_module handles the invocation for a well-formed
@@ -44,35 +45,10 @@ enum bpf_test_state {
static _Atomic enum bpf_test_state state = _TS_INVALID;
-static int sys_finit_module(int fd, const char *param_values, int flags)
-{
- return syscall(__NR_finit_module, fd, param_values, flags);
-}
-
-static int sys_delete_module(const char *name, unsigned int flags)
-{
- return syscall(__NR_delete_module, name, flags);
-}
-
-static int load_module(const char *mod)
-{
- int ret, fd;
-
- fd = open("bpf_testmod.ko", O_RDONLY);
- if (fd < 0)
- return fd;
-
- ret = sys_finit_module(fd, "", 0);
- close(fd);
- if (ret < 0)
- return ret;
- return 0;
-}
-
static void *load_module_thread(void *p)
{
- if (!ASSERT_NEQ(load_module("bpf_testmod.ko"), 0, "load_module_thread must fail"))
+ if (!ASSERT_NEQ(load_bpf_testmod(false), 0, "load_module_thread must fail"))
atomic_store(&state, TS_MODULE_LOAD);
else
atomic_store(&state, TS_MODULE_LOAD_FAIL);
@@ -124,7 +100,7 @@ static void test_bpf_mod_race_config(const struct test_config *config)
if (!ASSERT_NEQ(fault_addr, MAP_FAILED, "mmap for uffd registration"))
return;
- if (!ASSERT_OK(sys_delete_module("bpf_testmod", 0), "unload bpf_testmod"))
+ if (!ASSERT_OK(unload_bpf_testmod(false), "unload bpf_testmod"))
goto end_mmap;
skel = bpf_mod_race__open();
@@ -202,8 +178,8 @@ end_destroy:
bpf_mod_race__destroy(skel);
ASSERT_OK(kern_sync_rcu(), "kern_sync_rcu");
end_module:
- sys_delete_module("bpf_testmod", 0);
- ASSERT_OK(load_module("bpf_testmod.ko"), "restore bpf_testmod");
+ unload_bpf_testmod(false);
+ ASSERT_OK(load_bpf_testmod(false), "restore bpf_testmod");
end_mmap:
munmap(fault_addr, 4096);
atomic_store(&state, _TS_INVALID);
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
index c8ba4009e4ab..b30ff6b3b81a 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
@@ -123,12 +123,13 @@ static void test_bpf_nf_ct(int mode)
ASSERT_EQ(skel->data->test_snat_addr, 0, "Test for source natting");
ASSERT_EQ(skel->data->test_dnat_addr, 0, "Test for destination natting");
end:
- if (srv_client_fd != -1)
- close(srv_client_fd);
if (client_fd != -1)
close(client_fd);
+ if (srv_client_fd != -1)
+ close(srv_client_fd);
if (srv_fd != -1)
close(srv_fd);
+
snprintf(cmd, sizeof(cmd), iptables, "-D");
system(cmd);
test_bpf_nf__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_obj_pinning.c b/tools/testing/selftests/bpf/prog_tests/bpf_obj_pinning.c
new file mode 100644
index 000000000000..ee0458a5ce78
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_obj_pinning.c
@@ -0,0 +1,269 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+#define _GNU_SOURCE
+#include <test_progs.h>
+#include <bpf/btf.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <linux/unistd.h>
+#include <linux/mount.h>
+#include <sys/syscall.h>
+#include "bpf/libbpf_internal.h"
+
+static inline int sys_fsopen(const char *fsname, unsigned flags)
+{
+ return syscall(__NR_fsopen, fsname, flags);
+}
+
+static inline int sys_fsconfig(int fs_fd, unsigned cmd, const char *key, const void *val, int aux)
+{
+ return syscall(__NR_fsconfig, fs_fd, cmd, key, val, aux);
+}
+
+static inline int sys_fsmount(int fs_fd, unsigned flags, unsigned ms_flags)
+{
+ return syscall(__NR_fsmount, fs_fd, flags, ms_flags);
+}
+
+__attribute__((unused))
+static inline int sys_move_mount(int from_dfd, const char *from_path,
+ int to_dfd, const char *to_path,
+ unsigned int ms_flags)
+{
+ return syscall(__NR_move_mount, from_dfd, from_path, to_dfd, to_path, ms_flags);
+}
+
+static void bpf_obj_pinning_detached(void)
+{
+ LIBBPF_OPTS(bpf_obj_pin_opts, pin_opts);
+ LIBBPF_OPTS(bpf_obj_get_opts, get_opts);
+ int fs_fd = -1, mnt_fd = -1;
+ int map_fd = -1, map_fd2 = -1;
+ int zero = 0, src_value, dst_value, err;
+ const char *map_name = "fsmount_map";
+
+ /* A bunch of below UAPI calls are constructed based on reading:
+ * https://brauner.io/2023/02/28/mounting-into-mount-namespaces.html
+ */
+
+ /* create VFS context */
+ fs_fd = sys_fsopen("bpf", 0);
+ if (!ASSERT_GE(fs_fd, 0, "fs_fd"))
+ goto cleanup;
+
+ /* instantiate FS object */
+ err = sys_fsconfig(fs_fd, FSCONFIG_CMD_CREATE, NULL, NULL, 0);
+ if (!ASSERT_OK(err, "fs_create"))
+ goto cleanup;
+
+ /* create O_PATH fd for detached mount */
+ mnt_fd = sys_fsmount(fs_fd, 0, 0);
+ if (!ASSERT_GE(mnt_fd, 0, "mnt_fd"))
+ goto cleanup;
+
+ /* If we wanted to expose detached mount in the file system, we'd do
+ * something like below. But the whole point is that we actually don't
+ * even have to expose BPF FS in the file system to be able to work
+ * (pin/get objects) with it.
+ *
+ * err = sys_move_mount(mnt_fd, "", -EBADF, mnt_path, MOVE_MOUNT_F_EMPTY_PATH);
+ * if (!ASSERT_OK(err, "move_mount"))
+ * goto cleanup;
+ */
+
+ /* create BPF map to pin */
+ map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, map_name, 4, 4, 1, NULL);
+ if (!ASSERT_GE(map_fd, 0, "map_fd"))
+ goto cleanup;
+
+ /* pin BPF map into detached BPF FS through mnt_fd */
+ pin_opts.file_flags = BPF_F_PATH_FD;
+ pin_opts.path_fd = mnt_fd;
+ err = bpf_obj_pin_opts(map_fd, map_name, &pin_opts);
+ if (!ASSERT_OK(err, "map_pin"))
+ goto cleanup;
+
+ /* get BPF map from detached BPF FS through mnt_fd */
+ get_opts.file_flags = BPF_F_PATH_FD;
+ get_opts.path_fd = mnt_fd;
+ map_fd2 = bpf_obj_get_opts(map_name, &get_opts);
+ if (!ASSERT_GE(map_fd2, 0, "map_get"))
+ goto cleanup;
+
+ /* update map through one FD */
+ src_value = 0xcafebeef;
+ err = bpf_map_update_elem(map_fd, &zero, &src_value, 0);
+ ASSERT_OK(err, "map_update");
+
+ /* check values written/read through different FDs do match */
+ dst_value = 0;
+ err = bpf_map_lookup_elem(map_fd2, &zero, &dst_value);
+ ASSERT_OK(err, "map_lookup");
+ ASSERT_EQ(dst_value, src_value, "map_value_eq1");
+ ASSERT_EQ(dst_value, 0xcafebeef, "map_value_eq2");
+
+cleanup:
+ if (map_fd >= 0)
+ ASSERT_OK(close(map_fd), "close_map_fd");
+ if (map_fd2 >= 0)
+ ASSERT_OK(close(map_fd2), "close_map_fd2");
+ if (fs_fd >= 0)
+ ASSERT_OK(close(fs_fd), "close_fs_fd");
+ if (mnt_fd >= 0)
+ ASSERT_OK(close(mnt_fd), "close_mnt_fd");
+}
+
+enum path_kind
+{
+ PATH_STR_ABS,
+ PATH_STR_REL,
+ PATH_FD_REL,
+};
+
+static void validate_pin(int map_fd, const char *map_name, int src_value,
+ enum path_kind path_kind)
+{
+ LIBBPF_OPTS(bpf_obj_pin_opts, pin_opts);
+ char abs_path[PATH_MAX], old_cwd[PATH_MAX];
+ const char *pin_path = NULL;
+ int zero = 0, dst_value, map_fd2, err;
+
+ snprintf(abs_path, sizeof(abs_path), "/sys/fs/bpf/%s", map_name);
+ old_cwd[0] = '\0';
+
+ switch (path_kind) {
+ case PATH_STR_ABS:
+ /* absolute path */
+ pin_path = abs_path;
+ break;
+ case PATH_STR_REL:
+ /* cwd + relative path */
+ ASSERT_OK_PTR(getcwd(old_cwd, sizeof(old_cwd)), "getcwd");
+ ASSERT_OK(chdir("/sys/fs/bpf"), "chdir");
+ pin_path = map_name;
+ break;
+ case PATH_FD_REL:
+ /* dir fd + relative path */
+ pin_opts.file_flags = BPF_F_PATH_FD;
+ pin_opts.path_fd = open("/sys/fs/bpf", O_PATH);
+ ASSERT_GE(pin_opts.path_fd, 0, "path_fd");
+ pin_path = map_name;
+ break;
+ }
+
+ /* pin BPF map using specified path definition */
+ err = bpf_obj_pin_opts(map_fd, pin_path, &pin_opts);
+ ASSERT_OK(err, "obj_pin");
+
+ /* cleanup */
+ if (path_kind == PATH_FD_REL && pin_opts.path_fd >= 0)
+ close(pin_opts.path_fd);
+ if (old_cwd[0])
+ ASSERT_OK(chdir(old_cwd), "restore_cwd");
+
+ map_fd2 = bpf_obj_get(abs_path);
+ if (!ASSERT_GE(map_fd2, 0, "map_get"))
+ goto cleanup;
+
+ /* update map through one FD */
+ err = bpf_map_update_elem(map_fd, &zero, &src_value, 0);
+ ASSERT_OK(err, "map_update");
+
+ /* check values written/read through different FDs do match */
+ dst_value = 0;
+ err = bpf_map_lookup_elem(map_fd2, &zero, &dst_value);
+ ASSERT_OK(err, "map_lookup");
+ ASSERT_EQ(dst_value, src_value, "map_value_eq");
+cleanup:
+ if (map_fd2 >= 0)
+ ASSERT_OK(close(map_fd2), "close_map_fd2");
+ unlink(abs_path);
+}
+
+static void validate_get(int map_fd, const char *map_name, int src_value,
+ enum path_kind path_kind)
+{
+ LIBBPF_OPTS(bpf_obj_get_opts, get_opts);
+ char abs_path[PATH_MAX], old_cwd[PATH_MAX];
+ const char *pin_path = NULL;
+ int zero = 0, dst_value, map_fd2, err;
+
+ snprintf(abs_path, sizeof(abs_path), "/sys/fs/bpf/%s", map_name);
+ /* pin BPF map using specified path definition */
+ err = bpf_obj_pin(map_fd, abs_path);
+ if (!ASSERT_OK(err, "pin_map"))
+ return;
+
+ old_cwd[0] = '\0';
+
+ switch (path_kind) {
+ case PATH_STR_ABS:
+ /* absolute path */
+ pin_path = abs_path;
+ break;
+ case PATH_STR_REL:
+ /* cwd + relative path */
+ ASSERT_OK_PTR(getcwd(old_cwd, sizeof(old_cwd)), "getcwd");
+ ASSERT_OK(chdir("/sys/fs/bpf"), "chdir");
+ pin_path = map_name;
+ break;
+ case PATH_FD_REL:
+ /* dir fd + relative path */
+ get_opts.file_flags = BPF_F_PATH_FD;
+ get_opts.path_fd = open("/sys/fs/bpf", O_PATH);
+ ASSERT_GE(get_opts.path_fd, 0, "path_fd");
+ pin_path = map_name;
+ break;
+ }
+
+ map_fd2 = bpf_obj_get_opts(pin_path, &get_opts);
+ if (!ASSERT_GE(map_fd2, 0, "map_get"))
+ goto cleanup;
+
+ /* cleanup */
+ if (path_kind == PATH_FD_REL && get_opts.path_fd >= 0)
+ close(get_opts.path_fd);
+ if (old_cwd[0])
+ ASSERT_OK(chdir(old_cwd), "restore_cwd");
+
+ /* update map through one FD */
+ err = bpf_map_update_elem(map_fd, &zero, &src_value, 0);
+ ASSERT_OK(err, "map_update");
+
+ /* check values written/read through different FDs do match */
+ dst_value = 0;
+ err = bpf_map_lookup_elem(map_fd2, &zero, &dst_value);
+ ASSERT_OK(err, "map_lookup");
+ ASSERT_EQ(dst_value, src_value, "map_value_eq");
+cleanup:
+ if (map_fd2 >= 0)
+ ASSERT_OK(close(map_fd2), "close_map_fd2");
+ unlink(abs_path);
+}
+
+static void bpf_obj_pinning_mounted(enum path_kind path_kind)
+{
+ const char *map_name = "mounted_map";
+ int map_fd;
+
+ /* create BPF map to pin */
+ map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, map_name, 4, 4, 1, NULL);
+ if (!ASSERT_GE(map_fd, 0, "map_fd"))
+ return;
+
+ validate_pin(map_fd, map_name, 100 + (int)path_kind, path_kind);
+ validate_get(map_fd, map_name, 200 + (int)path_kind, path_kind);
+ ASSERT_OK(close(map_fd), "close_map_fd");
+}
+
+void test_bpf_obj_pinning()
+{
+ if (test__start_subtest("detached"))
+ bpf_obj_pinning_detached();
+ if (test__start_subtest("mounted-str-abs"))
+ bpf_obj_pinning_mounted(PATH_STR_ABS);
+ if (test__start_subtest("mounted-str-rel"))
+ bpf_obj_pinning_mounted(PATH_STR_REL);
+ if (test__start_subtest("mounted-fd-rel"))
+ bpf_obj_pinning_mounted(PATH_FD_REL);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index 210d643fda6c..4e0cdb593318 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -3991,6 +3991,46 @@ static struct btf_raw_test raw_tests[] = {
.err_str = "Invalid arg#1",
},
{
+ .descr = "decl_tag test #18, decl_tag as the map key type",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_STRUCT_ENC(0, 2, 8), /* [2] */
+ BTF_MEMBER_ENC(NAME_TBD, 1, 0),
+ BTF_MEMBER_ENC(NAME_TBD, 1, 32),
+ BTF_DECL_TAG_ENC(NAME_TBD, 2, -1), /* [3] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0m1\0m2\0tag"),
+ .map_type = BPF_MAP_TYPE_HASH,
+ .map_name = "tag_type_check_btf",
+ .key_size = 8,
+ .value_size = 4,
+ .key_type_id = 3,
+ .value_type_id = 1,
+ .max_entries = 1,
+ .map_create_err = true,
+},
+{
+ .descr = "decl_tag test #19, decl_tag as the map value type",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_STRUCT_ENC(0, 2, 8), /* [2] */
+ BTF_MEMBER_ENC(NAME_TBD, 1, 0),
+ BTF_MEMBER_ENC(NAME_TBD, 1, 32),
+ BTF_DECL_TAG_ENC(NAME_TBD, 2, -1), /* [3] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0m1\0m2\0tag"),
+ .map_type = BPF_MAP_TYPE_HASH,
+ .map_name = "tag_type_check_btf",
+ .key_size = 4,
+ .value_size = 8,
+ .key_type_id = 1,
+ .value_type_id = 3,
+ .max_entries = 1,
+ .map_create_err = true,
+},
+{
.descr = "type_tag test #1",
.raw_types = {
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_getset_retval.c b/tools/testing/selftests/bpf/prog_tests/cgroup_getset_retval.c
index 4d2fa99273d8..2bb5773d6f99 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_getset_retval.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_getset_retval.c
@@ -25,6 +25,8 @@ static void test_setsockopt_set(int cgroup_fd, int sock_fd)
if (!ASSERT_OK_PTR(obj, "skel-load"))
return;
+ obj->bss->page_size = sysconf(_SC_PAGESIZE);
+
/* Attach setsockopt that sets EUNATCH, assert that
* we actually get that error when we run setsockopt()
*/
@@ -59,6 +61,8 @@ static void test_setsockopt_set_and_get(int cgroup_fd, int sock_fd)
if (!ASSERT_OK_PTR(obj, "skel-load"))
return;
+ obj->bss->page_size = sysconf(_SC_PAGESIZE);
+
/* Attach setsockopt that sets EUNATCH, and one that gets the
* previously set errno. Assert that we get the same errno back.
*/
@@ -100,6 +104,8 @@ static void test_setsockopt_default_zero(int cgroup_fd, int sock_fd)
if (!ASSERT_OK_PTR(obj, "skel-load"))
return;
+ obj->bss->page_size = sysconf(_SC_PAGESIZE);
+
/* Attach setsockopt that gets the previously set errno.
* Assert that, without anything setting one, we get 0.
*/
@@ -134,6 +140,8 @@ static void test_setsockopt_default_zero_and_set(int cgroup_fd, int sock_fd)
if (!ASSERT_OK_PTR(obj, "skel-load"))
return;
+ obj->bss->page_size = sysconf(_SC_PAGESIZE);
+
/* Attach setsockopt that gets the previously set errno, and then
* one that sets the errno to EUNATCH. Assert that the get does not
* see EUNATCH set later, and does not prevent EUNATCH from being set.
@@ -177,6 +185,8 @@ static void test_setsockopt_override(int cgroup_fd, int sock_fd)
if (!ASSERT_OK_PTR(obj, "skel-load"))
return;
+ obj->bss->page_size = sysconf(_SC_PAGESIZE);
+
/* Attach setsockopt that sets EUNATCH, then one that sets EISCONN,
* and then one that gets the exported errno. Assert both the syscall
* and the helper sees the last set errno.
@@ -224,6 +234,8 @@ static void test_setsockopt_legacy_eperm(int cgroup_fd, int sock_fd)
if (!ASSERT_OK_PTR(obj, "skel-load"))
return;
+ obj->bss->page_size = sysconf(_SC_PAGESIZE);
+
/* Attach setsockopt that return a reject without setting errno
* (legacy reject), and one that gets the errno. Assert that for
* backward compatibility the syscall result in EPERM, and this
@@ -268,6 +280,8 @@ static void test_setsockopt_legacy_no_override(int cgroup_fd, int sock_fd)
if (!ASSERT_OK_PTR(obj, "skel-load"))
return;
+ obj->bss->page_size = sysconf(_SC_PAGESIZE);
+
/* Attach setsockopt that sets EUNATCH, then one that return a reject
* without setting errno, and then one that gets the exported errno.
* Assert both the syscall and the helper's errno are unaffected by
@@ -319,6 +333,8 @@ static void test_getsockopt_get(int cgroup_fd, int sock_fd)
if (!ASSERT_OK_PTR(obj, "skel-load"))
return;
+ obj->bss->page_size = sysconf(_SC_PAGESIZE);
+
/* Attach getsockopt that gets previously set errno. Assert that the
* error from kernel is in both ctx_retval_value and retval_value.
*/
@@ -359,6 +375,8 @@ static void test_getsockopt_override(int cgroup_fd, int sock_fd)
if (!ASSERT_OK_PTR(obj, "skel-load"))
return;
+ obj->bss->page_size = sysconf(_SC_PAGESIZE);
+
/* Attach getsockopt that sets retval to -EISCONN. Assert that this
* overrides the value from kernel.
*/
@@ -396,6 +414,8 @@ static void test_getsockopt_retval_sync(int cgroup_fd, int sock_fd)
if (!ASSERT_OK_PTR(obj, "skel-load"))
return;
+ obj->bss->page_size = sysconf(_SC_PAGESIZE);
+
/* Attach getsockopt that sets retval to -EISCONN, and one that clears
* ctx retval. Assert that the clearing ctx retval is synced to helper
* and clears any errors both from kernel and BPF..
diff --git a/tools/testing/selftests/bpf/prog_tests/check_mtu.c b/tools/testing/selftests/bpf/prog_tests/check_mtu.c
index 5338d2ea0460..2a9a30650350 100644
--- a/tools/testing/selftests/bpf/prog_tests/check_mtu.c
+++ b/tools/testing/selftests/bpf/prog_tests/check_mtu.c
@@ -183,7 +183,7 @@ cleanup:
void serial_test_check_mtu(void)
{
- __u32 mtu_lo;
+ int mtu_lo;
if (test__start_subtest("bpf_check_mtu XDP-attach"))
test_check_mtu_xdp_attach();
diff --git a/tools/testing/selftests/bpf/prog_tests/cpumask.c b/tools/testing/selftests/bpf/prog_tests/cpumask.c
index cdf4acc18e4c..756ea8b590b6 100644
--- a/tools/testing/selftests/bpf/prog_tests/cpumask.c
+++ b/tools/testing/selftests/bpf/prog_tests/cpumask.c
@@ -10,6 +10,7 @@ static const char * const cpumask_success_testcases[] = {
"test_set_clear_cpu",
"test_setall_clear_cpu",
"test_first_firstzero_cpu",
+ "test_firstand_nocpu",
"test_test_and_set_clear",
"test_and_or_xor",
"test_intersects_subset",
@@ -70,5 +71,6 @@ void test_cpumask(void)
verify_success(cpumask_success_testcases[i]);
}
+ RUN_TESTS(cpumask_success);
RUN_TESTS(cpumask_failure);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c
index d176c34a7d2e..7cfac53c0d58 100644
--- a/tools/testing/selftests/bpf/prog_tests/dynptr.c
+++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c
@@ -20,6 +20,14 @@ static struct {
{"test_ringbuf", SETUP_SYSCALL_SLEEP},
{"test_skb_readonly", SETUP_SKB_PROG},
{"test_dynptr_skb_data", SETUP_SKB_PROG},
+ {"test_adjust", SETUP_SYSCALL_SLEEP},
+ {"test_adjust_err", SETUP_SYSCALL_SLEEP},
+ {"test_zero_size_dynptr", SETUP_SYSCALL_SLEEP},
+ {"test_dynptr_is_null", SETUP_SYSCALL_SLEEP},
+ {"test_dynptr_is_rdonly", SETUP_SKB_PROG},
+ {"test_dynptr_clone", SETUP_SKB_PROG},
+ {"test_dynptr_skb_no_buff", SETUP_SKB_PROG},
+ {"test_dynptr_skb_strcmp", SETUP_SKB_PROG},
};
static void verify_success(const char *prog_name, enum test_setup_type setup_type)
diff --git a/tools/testing/selftests/bpf/prog_tests/fib_lookup.c b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c
index a1e712105811..2fd05649bad1 100644
--- a/tools/testing/selftests/bpf/prog_tests/fib_lookup.c
+++ b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+#include <linux/rtnetlink.h>
#include <sys/types.h>
#include <net/if.h>
@@ -15,14 +16,23 @@
#define IPV4_IFACE_ADDR "10.0.0.254"
#define IPV4_NUD_FAILED_ADDR "10.0.0.1"
#define IPV4_NUD_STALE_ADDR "10.0.0.2"
+#define IPV4_TBID_ADDR "172.0.0.254"
+#define IPV4_TBID_NET "172.0.0.0"
+#define IPV4_TBID_DST "172.0.0.2"
+#define IPV6_TBID_ADDR "fd00::FFFF"
+#define IPV6_TBID_NET "fd00::"
+#define IPV6_TBID_DST "fd00::2"
#define DMAC "11:11:11:11:11:11"
#define DMAC_INIT { 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, }
+#define DMAC2 "01:01:01:01:01:01"
+#define DMAC_INIT2 { 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, }
struct fib_lookup_test {
const char *desc;
const char *daddr;
int expected_ret;
int lookup_flags;
+ __u32 tbid;
__u8 dmac[6];
};
@@ -43,6 +53,22 @@ static const struct fib_lookup_test tests[] = {
{ .desc = "IPv4 skip neigh",
.daddr = IPV4_NUD_FAILED_ADDR, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
.lookup_flags = BPF_FIB_LOOKUP_SKIP_NEIGH, },
+ { .desc = "IPv4 TBID lookup failure",
+ .daddr = IPV4_TBID_DST, .expected_ret = BPF_FIB_LKUP_RET_NOT_FWDED,
+ .lookup_flags = BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID,
+ .tbid = RT_TABLE_MAIN, },
+ { .desc = "IPv4 TBID lookup success",
+ .daddr = IPV4_TBID_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .lookup_flags = BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID, .tbid = 100,
+ .dmac = DMAC_INIT2, },
+ { .desc = "IPv6 TBID lookup failure",
+ .daddr = IPV6_TBID_DST, .expected_ret = BPF_FIB_LKUP_RET_NOT_FWDED,
+ .lookup_flags = BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID,
+ .tbid = RT_TABLE_MAIN, },
+ { .desc = "IPv6 TBID lookup success",
+ .daddr = IPV6_TBID_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .lookup_flags = BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID, .tbid = 100,
+ .dmac = DMAC_INIT2, },
};
static int ifindex;
@@ -53,6 +79,7 @@ static int setup_netns(void)
SYS(fail, "ip link add veth1 type veth peer name veth2");
SYS(fail, "ip link set dev veth1 up");
+ SYS(fail, "ip link set dev veth2 up");
err = write_sysctl("/proc/sys/net/ipv4/neigh/veth1/gc_stale_time", "900");
if (!ASSERT_OK(err, "write_sysctl(net.ipv4.neigh.veth1.gc_stale_time)"))
@@ -70,6 +97,17 @@ static int setup_netns(void)
SYS(fail, "ip neigh add %s dev veth1 nud failed", IPV4_NUD_FAILED_ADDR);
SYS(fail, "ip neigh add %s dev veth1 lladdr %s nud stale", IPV4_NUD_STALE_ADDR, DMAC);
+ /* Setup for tbid lookup tests */
+ SYS(fail, "ip addr add %s/24 dev veth2", IPV4_TBID_ADDR);
+ SYS(fail, "ip route del %s/24 dev veth2", IPV4_TBID_NET);
+ SYS(fail, "ip route add table 100 %s/24 dev veth2", IPV4_TBID_NET);
+ SYS(fail, "ip neigh add %s dev veth2 lladdr %s nud stale", IPV4_TBID_DST, DMAC2);
+
+ SYS(fail, "ip addr add %s/64 dev veth2", IPV6_TBID_ADDR);
+ SYS(fail, "ip -6 route del %s/64 dev veth2", IPV6_TBID_NET);
+ SYS(fail, "ip -6 route add table 100 %s/64 dev veth2", IPV6_TBID_NET);
+ SYS(fail, "ip neigh add %s dev veth2 lladdr %s nud stale", IPV6_TBID_DST, DMAC2);
+
err = write_sysctl("/proc/sys/net/ipv4/conf/veth1/forwarding", "1");
if (!ASSERT_OK(err, "write_sysctl(net.ipv4.conf.veth1.forwarding)"))
goto fail;
@@ -83,7 +121,7 @@ fail:
return -1;
}
-static int set_lookup_params(struct bpf_fib_lookup *params, const char *daddr)
+static int set_lookup_params(struct bpf_fib_lookup *params, const struct fib_lookup_test *test)
{
int ret;
@@ -91,8 +129,9 @@ static int set_lookup_params(struct bpf_fib_lookup *params, const char *daddr)
params->l4_protocol = IPPROTO_TCP;
params->ifindex = ifindex;
+ params->tbid = test->tbid;
- if (inet_pton(AF_INET6, daddr, params->ipv6_dst) == 1) {
+ if (inet_pton(AF_INET6, test->daddr, params->ipv6_dst) == 1) {
params->family = AF_INET6;
ret = inet_pton(AF_INET6, IPV6_IFACE_ADDR, params->ipv6_src);
if (!ASSERT_EQ(ret, 1, "inet_pton(IPV6_IFACE_ADDR)"))
@@ -100,7 +139,7 @@ static int set_lookup_params(struct bpf_fib_lookup *params, const char *daddr)
return 0;
}
- ret = inet_pton(AF_INET, daddr, &params->ipv4_dst);
+ ret = inet_pton(AF_INET, test->daddr, &params->ipv4_dst);
if (!ASSERT_EQ(ret, 1, "convert IP[46] address"))
return -1;
params->family = AF_INET;
@@ -154,13 +193,12 @@ void test_fib_lookup(void)
fib_params = &skel->bss->fib_params;
for (i = 0; i < ARRAY_SIZE(tests); i++) {
- printf("Testing %s\n", tests[i].desc);
+ printf("Testing %s ", tests[i].desc);
- if (set_lookup_params(fib_params, tests[i].daddr))
+ if (set_lookup_params(fib_params, &tests[i]))
continue;
skel->bss->fib_lookup_ret = -1;
- skel->bss->lookup_flags = BPF_FIB_LOOKUP_OUTPUT |
- tests[i].lookup_flags;
+ skel->bss->lookup_flags = tests[i].lookup_flags;
err = bpf_prog_test_run_opts(prog_fd, &run_opts);
if (!ASSERT_OK(err, "bpf_prog_test_run_opts"))
@@ -175,7 +213,14 @@ void test_fib_lookup(void)
mac_str(expected, tests[i].dmac);
mac_str(actual, fib_params->dmac);
- printf("dmac expected %s actual %s\n", expected, actual);
+ printf("dmac expected %s actual %s ", expected, actual);
+ }
+
+ // ensure tbid is zero'd out after fib lookup.
+ if (tests[i].lookup_flags & BPF_FIB_LOOKUP_DIRECT) {
+ if (!ASSERT_EQ(skel->bss->fib_params.tbid, 0,
+ "expected fib_params.tbid to be zero"))
+ goto fail;
}
}
diff --git a/tools/testing/selftests/bpf/prog_tests/global_map_resize.c b/tools/testing/selftests/bpf/prog_tests/global_map_resize.c
new file mode 100644
index 000000000000..fd41425d2e5c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/global_map_resize.c
@@ -0,0 +1,227 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+#include <errno.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include "test_global_map_resize.skel.h"
+#include "test_progs.h"
+
+static void run_prog_bss_array_sum(void)
+{
+ (void)syscall(__NR_getpid);
+}
+
+static void run_prog_data_array_sum(void)
+{
+ (void)syscall(__NR_getuid);
+}
+
+static void global_map_resize_bss_subtest(void)
+{
+ int err;
+ struct test_global_map_resize *skel;
+ struct bpf_map *map;
+ const __u32 desired_sz = sizeof(skel->bss->sum) + sysconf(_SC_PAGE_SIZE) * 2;
+ size_t array_len, actual_sz;
+
+ skel = test_global_map_resize__open();
+ if (!ASSERT_OK_PTR(skel, "test_global_map_resize__open"))
+ goto teardown;
+
+ /* set some initial value before resizing.
+ * it is expected this non-zero value will be preserved
+ * while resizing.
+ */
+ skel->bss->array[0] = 1;
+
+ /* resize map value and verify the new size */
+ map = skel->maps.bss;
+ err = bpf_map__set_value_size(map, desired_sz);
+ if (!ASSERT_OK(err, "bpf_map__set_value_size"))
+ goto teardown;
+ if (!ASSERT_EQ(bpf_map__value_size(map), desired_sz, "resize"))
+ goto teardown;
+
+ /* set the expected number of elements based on the resized array */
+ array_len = (desired_sz - sizeof(skel->bss->sum)) / sizeof(skel->bss->array[0]);
+ if (!ASSERT_GT(array_len, 1, "array_len"))
+ goto teardown;
+
+ skel->bss = bpf_map__initial_value(skel->maps.bss, &actual_sz);
+ if (!ASSERT_OK_PTR(skel->bss, "bpf_map__initial_value (ptr)"))
+ goto teardown;
+ if (!ASSERT_EQ(actual_sz, desired_sz, "bpf_map__initial_value (size)"))
+ goto teardown;
+
+ /* fill the newly resized array with ones,
+ * skipping the first element which was previously set
+ */
+ for (int i = 1; i < array_len; i++)
+ skel->bss->array[i] = 1;
+
+ /* set global const values before loading */
+ skel->rodata->pid = getpid();
+ skel->rodata->bss_array_len = array_len;
+ skel->rodata->data_array_len = 1;
+
+ err = test_global_map_resize__load(skel);
+ if (!ASSERT_OK(err, "test_global_map_resize__load"))
+ goto teardown;
+ err = test_global_map_resize__attach(skel);
+ if (!ASSERT_OK(err, "test_global_map_resize__attach"))
+ goto teardown;
+
+ /* run the bpf program which will sum the contents of the array.
+ * since the array was filled with ones,verify the sum equals array_len
+ */
+ run_prog_bss_array_sum();
+ if (!ASSERT_EQ(skel->bss->sum, array_len, "sum"))
+ goto teardown;
+
+teardown:
+ test_global_map_resize__destroy(skel);
+}
+
+static void global_map_resize_data_subtest(void)
+{
+ int err;
+ struct test_global_map_resize *skel;
+ struct bpf_map *map;
+ const __u32 desired_sz = sysconf(_SC_PAGE_SIZE) * 2;
+ size_t array_len, actual_sz;
+
+ skel = test_global_map_resize__open();
+ if (!ASSERT_OK_PTR(skel, "test_global_map_resize__open"))
+ goto teardown;
+
+ /* set some initial value before resizing.
+ * it is expected this non-zero value will be preserved
+ * while resizing.
+ */
+ skel->data_custom->my_array[0] = 1;
+
+ /* resize map value and verify the new size */
+ map = skel->maps.data_custom;
+ err = bpf_map__set_value_size(map, desired_sz);
+ if (!ASSERT_OK(err, "bpf_map__set_value_size"))
+ goto teardown;
+ if (!ASSERT_EQ(bpf_map__value_size(map), desired_sz, "resize"))
+ goto teardown;
+
+ /* set the expected number of elements based on the resized array */
+ array_len = (desired_sz - sizeof(skel->bss->sum)) / sizeof(skel->data_custom->my_array[0]);
+ if (!ASSERT_GT(array_len, 1, "array_len"))
+ goto teardown;
+
+ skel->data_custom = bpf_map__initial_value(skel->maps.data_custom, &actual_sz);
+ if (!ASSERT_OK_PTR(skel->data_custom, "bpf_map__initial_value (ptr)"))
+ goto teardown;
+ if (!ASSERT_EQ(actual_sz, desired_sz, "bpf_map__initial_value (size)"))
+ goto teardown;
+
+ /* fill the newly resized array with ones,
+ * skipping the first element which was previously set
+ */
+ for (int i = 1; i < array_len; i++)
+ skel->data_custom->my_array[i] = 1;
+
+ /* set global const values before loading */
+ skel->rodata->pid = getpid();
+ skel->rodata->bss_array_len = 1;
+ skel->rodata->data_array_len = array_len;
+
+ err = test_global_map_resize__load(skel);
+ if (!ASSERT_OK(err, "test_global_map_resize__load"))
+ goto teardown;
+ err = test_global_map_resize__attach(skel);
+ if (!ASSERT_OK(err, "test_global_map_resize__attach"))
+ goto teardown;
+
+ /* run the bpf program which will sum the contents of the array.
+ * since the array was filled with ones,verify the sum equals array_len
+ */
+ run_prog_data_array_sum();
+ if (!ASSERT_EQ(skel->bss->sum, array_len, "sum"))
+ goto teardown;
+
+teardown:
+ test_global_map_resize__destroy(skel);
+}
+
+static void global_map_resize_invalid_subtest(void)
+{
+ int err;
+ struct test_global_map_resize *skel;
+ struct bpf_map *map;
+ __u32 element_sz, desired_sz;
+
+ skel = test_global_map_resize__open();
+ if (!ASSERT_OK_PTR(skel, "test_global_map_resize__open"))
+ return;
+
+ /* attempt to resize a global datasec map to size
+ * which does NOT align with array
+ */
+ map = skel->maps.data_custom;
+ if (!ASSERT_NEQ(bpf_map__btf_value_type_id(map), 0, ".data.custom initial btf"))
+ goto teardown;
+ /* set desired size a fraction of element size beyond an aligned size */
+ element_sz = sizeof(skel->data_custom->my_array[0]);
+ desired_sz = element_sz + element_sz / 2;
+ /* confirm desired size does NOT align with array */
+ if (!ASSERT_NEQ(desired_sz % element_sz, 0, "my_array alignment"))
+ goto teardown;
+ err = bpf_map__set_value_size(map, desired_sz);
+ /* confirm resize is OK but BTF info is cleared */
+ if (!ASSERT_OK(err, ".data.custom bpf_map__set_value_size") ||
+ !ASSERT_EQ(bpf_map__btf_key_type_id(map), 0, ".data.custom clear btf key") ||
+ !ASSERT_EQ(bpf_map__btf_value_type_id(map), 0, ".data.custom clear btf val"))
+ goto teardown;
+
+ /* attempt to resize a global datasec map whose only var is NOT an array */
+ map = skel->maps.data_non_array;
+ if (!ASSERT_NEQ(bpf_map__btf_value_type_id(map), 0, ".data.non_array initial btf"))
+ goto teardown;
+ /* set desired size to arbitrary value */
+ desired_sz = 1024;
+ err = bpf_map__set_value_size(map, desired_sz);
+ /* confirm resize is OK but BTF info is cleared */
+ if (!ASSERT_OK(err, ".data.non_array bpf_map__set_value_size") ||
+ !ASSERT_EQ(bpf_map__btf_key_type_id(map), 0, ".data.non_array clear btf key") ||
+ !ASSERT_EQ(bpf_map__btf_value_type_id(map), 0, ".data.non_array clear btf val"))
+ goto teardown;
+
+ /* attempt to resize a global datasec map
+ * whose last var is NOT an array
+ */
+ map = skel->maps.data_array_not_last;
+ if (!ASSERT_NEQ(bpf_map__btf_value_type_id(map), 0, ".data.array_not_last initial btf"))
+ goto teardown;
+ /* set desired size to a multiple of element size */
+ element_sz = sizeof(skel->data_array_not_last->my_array_first[0]);
+ desired_sz = element_sz * 8;
+ /* confirm desired size aligns with array */
+ if (!ASSERT_EQ(desired_sz % element_sz, 0, "my_array_first alignment"))
+ goto teardown;
+ err = bpf_map__set_value_size(map, desired_sz);
+ /* confirm resize is OK but BTF info is cleared */
+ if (!ASSERT_OK(err, ".data.array_not_last bpf_map__set_value_size") ||
+ !ASSERT_EQ(bpf_map__btf_key_type_id(map), 0, ".data.array_not_last clear btf key") ||
+ !ASSERT_EQ(bpf_map__btf_value_type_id(map), 0, ".data.array_not_last clear btf val"))
+ goto teardown;
+
+teardown:
+ test_global_map_resize__destroy(skel);
+}
+
+void test_global_map_resize(void)
+{
+ if (test__start_subtest("global_map_resize_bss"))
+ global_map_resize_bss_subtest();
+
+ if (test__start_subtest("global_map_resize_data"))
+ global_map_resize_data_subtest();
+
+ if (test__start_subtest("global_map_resize_invalid"))
+ global_map_resize_invalid_subtest();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
index a543742cd7bd..2eb71559713c 100644
--- a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
+++ b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
@@ -173,8 +173,8 @@ static void verify_fail(struct kfunc_test_params *param)
case tc_test:
topts.data_in = &pkt_v4;
topts.data_size_in = sizeof(pkt_v4);
- break;
topts.repeat = 1;
+ break;
}
skel = kfunc_call_fail__open_opts(&opts);
diff --git a/tools/testing/selftests/bpf/prog_tests/module_attach.c b/tools/testing/selftests/bpf/prog_tests/module_attach.c
index 7fc01ff490db..f53d658ed080 100644
--- a/tools/testing/selftests/bpf/prog_tests/module_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/module_attach.c
@@ -4,6 +4,7 @@
#include <test_progs.h>
#include <stdbool.h>
#include "test_module_attach.skel.h"
+#include "testing_helpers.h"
static int duration;
@@ -32,11 +33,6 @@ static int trigger_module_test_writable(int *val)
return 0;
}
-static int delete_module(const char *name, int flags)
-{
- return syscall(__NR_delete_module, name, flags);
-}
-
void test_module_attach(void)
{
const int READ_SZ = 456;
@@ -93,21 +89,21 @@ void test_module_attach(void)
if (!ASSERT_OK_PTR(link, "attach_fentry"))
goto cleanup;
- ASSERT_ERR(delete_module("bpf_testmod", 0), "delete_module");
+ ASSERT_ERR(unload_bpf_testmod(false), "unload_bpf_testmod");
bpf_link__destroy(link);
link = bpf_program__attach(skel->progs.handle_fexit);
if (!ASSERT_OK_PTR(link, "attach_fexit"))
goto cleanup;
- ASSERT_ERR(delete_module("bpf_testmod", 0), "delete_module");
+ ASSERT_ERR(unload_bpf_testmod(false), "unload_bpf_testmod");
bpf_link__destroy(link);
link = bpf_program__attach(skel->progs.kprobe_multi);
if (!ASSERT_OK_PTR(link, "attach_kprobe_multi"))
goto cleanup;
- ASSERT_ERR(delete_module("bpf_testmod", 0), "delete_module");
+ ASSERT_ERR(unload_bpf_testmod(false), "unload_bpf_testmod");
bpf_link__destroy(link);
cleanup:
diff --git a/tools/testing/selftests/bpf/prog_tests/netcnt.c b/tools/testing/selftests/bpf/prog_tests/netcnt.c
index d3915c58d0e1..c3333edd029f 100644
--- a/tools/testing/selftests/bpf/prog_tests/netcnt.c
+++ b/tools/testing/selftests/bpf/prog_tests/netcnt.c
@@ -67,12 +67,12 @@ void serial_test_netcnt(void)
}
/* No packets should be lost */
- ASSERT_EQ(packets, 10000, "packets");
+ ASSERT_GE(packets, 10000, "packets");
/* Let's check that bytes counter matches the number of packets
* multiplied by the size of ipv6 ICMP packet.
*/
- ASSERT_EQ(bytes, packets * 104, "bytes");
+ ASSERT_GE(bytes, packets * 104, "bytes");
err:
if (cg_fd != -1)
diff --git a/tools/testing/selftests/bpf/prog_tests/sock_destroy.c b/tools/testing/selftests/bpf/prog_tests/sock_destroy.c
new file mode 100644
index 000000000000..b0583309a94e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sock_destroy.c
@@ -0,0 +1,221 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <bpf/bpf_endian.h>
+
+#include "sock_destroy_prog.skel.h"
+#include "sock_destroy_prog_fail.skel.h"
+#include "network_helpers.h"
+
+#define TEST_NS "sock_destroy_netns"
+
+static void start_iter_sockets(struct bpf_program *prog)
+{
+ struct bpf_link *link;
+ char buf[50] = {};
+ int iter_fd, len;
+
+ link = bpf_program__attach_iter(prog, NULL);
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
+ return;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (!ASSERT_GE(iter_fd, 0, "create_iter"))
+ goto free_link;
+
+ while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
+ ;
+ ASSERT_GE(len, 0, "read");
+
+ close(iter_fd);
+
+free_link:
+ bpf_link__destroy(link);
+}
+
+static void test_tcp_client(struct sock_destroy_prog *skel)
+{
+ int serv = -1, clien = -1, accept_serv = -1, n;
+
+ serv = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0);
+ if (!ASSERT_GE(serv, 0, "start_server"))
+ goto cleanup;
+
+ clien = connect_to_fd(serv, 0);
+ if (!ASSERT_GE(clien, 0, "connect_to_fd"))
+ goto cleanup;
+
+ accept_serv = accept(serv, NULL, NULL);
+ if (!ASSERT_GE(accept_serv, 0, "serv accept"))
+ goto cleanup;
+
+ n = send(clien, "t", 1, 0);
+ if (!ASSERT_EQ(n, 1, "client send"))
+ goto cleanup;
+
+ /* Run iterator program that destroys connected client sockets. */
+ start_iter_sockets(skel->progs.iter_tcp6_client);
+
+ n = send(clien, "t", 1, 0);
+ if (!ASSERT_LT(n, 0, "client_send on destroyed socket"))
+ goto cleanup;
+ ASSERT_EQ(errno, ECONNABORTED, "error code on destroyed socket");
+
+cleanup:
+ if (clien != -1)
+ close(clien);
+ if (accept_serv != -1)
+ close(accept_serv);
+ if (serv != -1)
+ close(serv);
+}
+
+static void test_tcp_server(struct sock_destroy_prog *skel)
+{
+ int serv = -1, clien = -1, accept_serv = -1, n, serv_port;
+
+ serv = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0);
+ if (!ASSERT_GE(serv, 0, "start_server"))
+ goto cleanup;
+ serv_port = get_socket_local_port(serv);
+ if (!ASSERT_GE(serv_port, 0, "get_sock_local_port"))
+ goto cleanup;
+ skel->bss->serv_port = (__be16) serv_port;
+
+ clien = connect_to_fd(serv, 0);
+ if (!ASSERT_GE(clien, 0, "connect_to_fd"))
+ goto cleanup;
+
+ accept_serv = accept(serv, NULL, NULL);
+ if (!ASSERT_GE(accept_serv, 0, "serv accept"))
+ goto cleanup;
+
+ n = send(clien, "t", 1, 0);
+ if (!ASSERT_EQ(n, 1, "client send"))
+ goto cleanup;
+
+ /* Run iterator program that destroys server sockets. */
+ start_iter_sockets(skel->progs.iter_tcp6_server);
+
+ n = send(clien, "t", 1, 0);
+ if (!ASSERT_LT(n, 0, "client_send on destroyed socket"))
+ goto cleanup;
+ ASSERT_EQ(errno, ECONNRESET, "error code on destroyed socket");
+
+cleanup:
+ if (clien != -1)
+ close(clien);
+ if (accept_serv != -1)
+ close(accept_serv);
+ if (serv != -1)
+ close(serv);
+}
+
+static void test_udp_client(struct sock_destroy_prog *skel)
+{
+ int serv = -1, clien = -1, n = 0;
+
+ serv = start_server(AF_INET6, SOCK_DGRAM, NULL, 0, 0);
+ if (!ASSERT_GE(serv, 0, "start_server"))
+ goto cleanup;
+
+ clien = connect_to_fd(serv, 0);
+ if (!ASSERT_GE(clien, 0, "connect_to_fd"))
+ goto cleanup;
+
+ n = send(clien, "t", 1, 0);
+ if (!ASSERT_EQ(n, 1, "client send"))
+ goto cleanup;
+
+ /* Run iterator program that destroys sockets. */
+ start_iter_sockets(skel->progs.iter_udp6_client);
+
+ n = send(clien, "t", 1, 0);
+ if (!ASSERT_LT(n, 0, "client_send on destroyed socket"))
+ goto cleanup;
+ /* UDP sockets have an overriding error code after they are disconnected,
+ * so we don't check for ECONNABORTED error code.
+ */
+
+cleanup:
+ if (clien != -1)
+ close(clien);
+ if (serv != -1)
+ close(serv);
+}
+
+static void test_udp_server(struct sock_destroy_prog *skel)
+{
+ int *listen_fds = NULL, n, i, serv_port;
+ unsigned int num_listens = 5;
+ char buf[1];
+
+ /* Start reuseport servers. */
+ listen_fds = start_reuseport_server(AF_INET6, SOCK_DGRAM,
+ "::1", 0, 0, num_listens);
+ if (!ASSERT_OK_PTR(listen_fds, "start_reuseport_server"))
+ goto cleanup;
+ serv_port = get_socket_local_port(listen_fds[0]);
+ if (!ASSERT_GE(serv_port, 0, "get_sock_local_port"))
+ goto cleanup;
+ skel->bss->serv_port = (__be16) serv_port;
+
+ /* Run iterator program that destroys server sockets. */
+ start_iter_sockets(skel->progs.iter_udp6_server);
+
+ for (i = 0; i < num_listens; ++i) {
+ n = read(listen_fds[i], buf, sizeof(buf));
+ if (!ASSERT_EQ(n, -1, "read") ||
+ !ASSERT_EQ(errno, ECONNABORTED, "error code on destroyed socket"))
+ break;
+ }
+ ASSERT_EQ(i, num_listens, "server socket");
+
+cleanup:
+ free_fds(listen_fds, num_listens);
+}
+
+void test_sock_destroy(void)
+{
+ struct sock_destroy_prog *skel;
+ struct nstoken *nstoken = NULL;
+ int cgroup_fd;
+
+ skel = sock_destroy_prog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ cgroup_fd = test__join_cgroup("/sock_destroy");
+ if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup"))
+ goto cleanup;
+
+ skel->links.sock_connect = bpf_program__attach_cgroup(
+ skel->progs.sock_connect, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links.sock_connect, "prog_attach"))
+ goto cleanup;
+
+ SYS(cleanup, "ip netns add %s", TEST_NS);
+ SYS(cleanup, "ip -net %s link set dev lo up", TEST_NS);
+
+ nstoken = open_netns(TEST_NS);
+ if (!ASSERT_OK_PTR(nstoken, "open_netns"))
+ goto cleanup;
+
+ if (test__start_subtest("tcp_client"))
+ test_tcp_client(skel);
+ if (test__start_subtest("tcp_server"))
+ test_tcp_server(skel);
+ if (test__start_subtest("udp_client"))
+ test_udp_client(skel);
+ if (test__start_subtest("udp_server"))
+ test_udp_server(skel);
+
+ RUN_TESTS(sock_destroy_prog_fail);
+
+cleanup:
+ if (nstoken)
+ close_netns(nstoken);
+ SYS_NOFAIL("ip netns del " TEST_NS " &> /dev/null");
+ if (cgroup_fd >= 0)
+ close(cgroup_fd);
+ sock_destroy_prog__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h
index d12665490a90..36d829a65aa4 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h
@@ -179,6 +179,32 @@
__ret; \
})
+static inline int poll_connect(int fd, unsigned int timeout_sec)
+{
+ struct timeval timeout = { .tv_sec = timeout_sec };
+ fd_set wfds;
+ int r, eval;
+ socklen_t esize = sizeof(eval);
+
+ FD_ZERO(&wfds);
+ FD_SET(fd, &wfds);
+
+ r = select(fd + 1, NULL, &wfds, NULL, &timeout);
+ if (r == 0)
+ errno = ETIME;
+ if (r != 1)
+ return -1;
+
+ if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &eval, &esize) < 0)
+ return -1;
+ if (eval != 0) {
+ errno = eval;
+ return -1;
+ }
+
+ return 0;
+}
+
static inline int poll_read(int fd, unsigned int timeout_sec)
{
struct timeval timeout = { .tv_sec = timeout_sec };
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
index b4f6f3a50ae5..8df8cbb447f1 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -869,6 +869,77 @@ static void test_msg_redir_to_listening(struct test_sockmap_listen *skel,
xbpf_prog_detach2(verdict, sock_map, BPF_SK_MSG_VERDICT);
}
+static void redir_partial(int family, int sotype, int sock_map, int parser_map)
+{
+ int s, c0, c1, p0, p1;
+ int err, n, key, value;
+ char buf[] = "abc";
+
+ key = 0;
+ value = sizeof(buf) - 1;
+ err = xbpf_map_update_elem(parser_map, &key, &value, 0);
+ if (err)
+ return;
+
+ s = socket_loopback(family, sotype | SOCK_NONBLOCK);
+ if (s < 0)
+ goto clean_parser_map;
+
+ err = create_socket_pairs(s, family, sotype, &c0, &c1, &p0, &p1);
+ if (err)
+ goto close_srv;
+
+ err = add_to_sockmap(sock_map, p0, p1);
+ if (err)
+ goto close;
+
+ n = xsend(c1, buf, sizeof(buf), 0);
+ if (n < sizeof(buf))
+ FAIL("incomplete write");
+
+ n = xrecv_nonblock(c0, buf, sizeof(buf), 0);
+ if (n != sizeof(buf) - 1)
+ FAIL("expect %zu, received %d", sizeof(buf) - 1, n);
+
+close:
+ xclose(c0);
+ xclose(p0);
+ xclose(c1);
+ xclose(p1);
+close_srv:
+ xclose(s);
+
+clean_parser_map:
+ key = 0;
+ value = 0;
+ xbpf_map_update_elem(parser_map, &key, &value, 0);
+}
+
+static void test_skb_redir_partial(struct test_sockmap_listen *skel,
+ struct bpf_map *inner_map, int family,
+ int sotype)
+{
+ int verdict = bpf_program__fd(skel->progs.prog_stream_verdict);
+ int parser = bpf_program__fd(skel->progs.prog_stream_parser);
+ int parser_map = bpf_map__fd(skel->maps.parser_map);
+ int sock_map = bpf_map__fd(inner_map);
+ int err;
+
+ err = xbpf_prog_attach(parser, sock_map, BPF_SK_SKB_STREAM_PARSER, 0);
+ if (err)
+ return;
+
+ err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (err)
+ goto detach;
+
+ redir_partial(family, sotype, sock_map, parser_map);
+
+ xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT);
+detach:
+ xbpf_prog_detach2(parser, sock_map, BPF_SK_SKB_STREAM_PARSER);
+}
+
static void test_reuseport_select_listening(int family, int sotype,
int sock_map, int verd_map,
int reuseport_prog)
@@ -1243,6 +1314,7 @@ static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
} tests[] = {
TEST(test_skb_redir_to_connected),
TEST(test_skb_redir_to_listening),
+ TEST(test_skb_redir_partial),
TEST(test_msg_redir_to_connected),
TEST(test_msg_redir_to_listening),
};
@@ -1380,11 +1452,18 @@ static int vsock_socketpair_connectible(int sotype, int *v0, int *v1)
if (p < 0)
goto close_cli;
+ if (poll_connect(c, IO_TIMEOUT_SEC) < 0) {
+ FAIL_ERRNO("poll_connect");
+ goto close_acc;
+ }
+
*v0 = p;
*v1 = c;
return 0;
+close_acc:
+ close(p);
close_cli:
close(c);
close_srv:
@@ -1432,7 +1511,7 @@ static void vsock_unix_redir_connectible(int sock_mapfd, int verd_mapfd,
if (n < 1)
goto out;
- n = recv(mode == REDIR_INGRESS ? u0 : u1, &b, sizeof(b), MSG_DONTWAIT);
+ n = xrecv_nonblock(mode == REDIR_INGRESS ? u0 : u1, &b, sizeof(b), 0);
if (n < 0)
FAIL("%s: recv() err, errno=%d", log_prefix, errno);
if (n == 0)
diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt.c b/tools/testing/selftests/bpf/prog_tests/sockopt.c
index aa4debf62fc6..9e6a5e3ed4de 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockopt.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt.c
@@ -5,10 +5,15 @@
static char bpf_log_buf[4096];
static bool verbose;
+#ifndef PAGE_SIZE
+#define PAGE_SIZE 4096
+#endif
+
enum sockopt_test_error {
OK = 0,
DENY_LOAD,
DENY_ATTACH,
+ EOPNOTSUPP_GETSOCKOPT,
EPERM_GETSOCKOPT,
EFAULT_GETSOCKOPT,
EPERM_SETSOCKOPT,
@@ -273,10 +278,31 @@ static struct sockopt_test {
.error = EFAULT_GETSOCKOPT,
},
{
- .descr = "getsockopt: deny arbitrary ctx->retval",
+ .descr = "getsockopt: ignore >PAGE_SIZE optlen",
.insns = {
- /* ctx->retval = 123 */
- BPF_MOV64_IMM(BPF_REG_0, 123),
+ /* write 0xFF to the first optval byte */
+
+ /* r6 = ctx->optval */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1,
+ offsetof(struct bpf_sockopt, optval)),
+ /* r2 = ctx->optval */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_6),
+ /* r6 = ctx->optval + 1 */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
+
+ /* r7 = ctx->optval_end */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_1,
+ offsetof(struct bpf_sockopt, optval_end)),
+
+ /* if (ctx->optval + 1 <= ctx->optval_end) { */
+ BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 1),
+ /* ctx->optval[0] = 0xF0 */
+ BPF_ST_MEM(BPF_B, BPF_REG_2, 0, 0xFF),
+ /* } */
+
+ /* retval changes are ignored */
+ /* ctx->retval = 5 */
+ BPF_MOV64_IMM(BPF_REG_0, 5),
BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
offsetof(struct bpf_sockopt, retval)),
@@ -287,9 +313,11 @@ static struct sockopt_test {
.attach_type = BPF_CGROUP_GETSOCKOPT,
.expected_attach_type = BPF_CGROUP_GETSOCKOPT,
- .get_optlen = 64,
-
- .error = EFAULT_GETSOCKOPT,
+ .get_level = 1234,
+ .get_optname = 5678,
+ .get_optval = {}, /* the changes are ignored */
+ .get_optlen = PAGE_SIZE + 1,
+ .error = EOPNOTSUPP_GETSOCKOPT,
},
{
.descr = "getsockopt: support smaller ctx->optlen",
@@ -649,6 +677,45 @@ static struct sockopt_test {
.error = EFAULT_SETSOCKOPT,
},
{
+ .descr = "setsockopt: ignore >PAGE_SIZE optlen",
+ .insns = {
+ /* write 0xFF to the first optval byte */
+
+ /* r6 = ctx->optval */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1,
+ offsetof(struct bpf_sockopt, optval)),
+ /* r2 = ctx->optval */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_6),
+ /* r6 = ctx->optval + 1 */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
+
+ /* r7 = ctx->optval_end */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_1,
+ offsetof(struct bpf_sockopt, optval_end)),
+
+ /* if (ctx->optval + 1 <= ctx->optval_end) { */
+ BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 1),
+ /* ctx->optval[0] = 0xF0 */
+ BPF_ST_MEM(BPF_B, BPF_REG_2, 0, 0xF0),
+ /* } */
+
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_SETSOCKOPT,
+ .expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+
+ .set_level = SOL_IP,
+ .set_optname = IP_TOS,
+ .set_optval = {},
+ .set_optlen = PAGE_SIZE + 1,
+
+ .get_level = SOL_IP,
+ .get_optname = IP_TOS,
+ .get_optval = {}, /* the changes are ignored */
+ .get_optlen = 4,
+ },
+ {
.descr = "setsockopt: allow changing ctx->optlen within bounds",
.insns = {
/* r6 = ctx->optval */
@@ -906,6 +973,13 @@ static int run_test(int cgroup_fd, struct sockopt_test *test)
}
if (test->set_optlen) {
+ if (test->set_optlen >= PAGE_SIZE) {
+ int num_pages = test->set_optlen / PAGE_SIZE;
+ int remainder = test->set_optlen % PAGE_SIZE;
+
+ test->set_optlen = num_pages * sysconf(_SC_PAGESIZE) + remainder;
+ }
+
err = setsockopt(sock_fd, test->set_level, test->set_optname,
test->set_optval, test->set_optlen);
if (err) {
@@ -921,7 +995,15 @@ static int run_test(int cgroup_fd, struct sockopt_test *test)
}
if (test->get_optlen) {
+ if (test->get_optlen >= PAGE_SIZE) {
+ int num_pages = test->get_optlen / PAGE_SIZE;
+ int remainder = test->get_optlen % PAGE_SIZE;
+
+ test->get_optlen = num_pages * sysconf(_SC_PAGESIZE) + remainder;
+ }
+
optval = malloc(test->get_optlen);
+ memset(optval, 0, test->get_optlen);
socklen_t optlen = test->get_optlen;
socklen_t expected_get_optlen = test->get_optlen_ret ?:
test->get_optlen;
@@ -929,6 +1011,8 @@ static int run_test(int cgroup_fd, struct sockopt_test *test)
err = getsockopt(sock_fd, test->get_level, test->get_optname,
optval, &optlen);
if (err) {
+ if (errno == EOPNOTSUPP && test->error == EOPNOTSUPP_GETSOCKOPT)
+ goto free_optval;
if (errno == EPERM && test->error == EPERM_GETSOCKOPT)
goto free_optval;
if (errno == EFAULT && test->error == EFAULT_GETSOCKOPT)
@@ -976,7 +1060,9 @@ void test_sockopt(void)
return;
for (i = 0; i < ARRAY_SIZE(tests); i++) {
- test__start_subtest(tests[i].descr);
+ if (!test__start_subtest(tests[i].descr))
+ continue;
+
ASSERT_OK(run_test(cgroup_fd, &tests[i]), tests[i].descr);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
index 60c17a8e2789..917f486db826 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
@@ -2,6 +2,8 @@
#include <test_progs.h>
#include "cgroup_helpers.h"
+#include "sockopt_inherit.skel.h"
+
#define SOL_CUSTOM 0xdeadbeef
#define CUSTOM_INHERIT1 0
#define CUSTOM_INHERIT2 1
@@ -132,58 +134,30 @@ static int start_server(void)
return fd;
}
-static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title,
- const char *prog_name)
-{
- enum bpf_attach_type attach_type;
- enum bpf_prog_type prog_type;
- struct bpf_program *prog;
- int err;
-
- err = libbpf_prog_type_by_name(title, &prog_type, &attach_type);
- if (err) {
- log_err("Failed to deduct types for %s BPF program", prog_name);
- return -1;
- }
-
- prog = bpf_object__find_program_by_name(obj, prog_name);
- if (!prog) {
- log_err("Failed to find %s BPF program", prog_name);
- return -1;
- }
-
- err = bpf_prog_attach(bpf_program__fd(prog), cgroup_fd,
- attach_type, 0);
- if (err) {
- log_err("Failed to attach %s BPF program", prog_name);
- return -1;
- }
-
- return 0;
-}
-
static void run_test(int cgroup_fd)
{
+ struct bpf_link *link_getsockopt = NULL;
+ struct bpf_link *link_setsockopt = NULL;
int server_fd = -1, client_fd;
- struct bpf_object *obj;
+ struct sockopt_inherit *obj;
void *server_err;
pthread_t tid;
int err;
- obj = bpf_object__open_file("sockopt_inherit.bpf.o", NULL);
- if (!ASSERT_OK_PTR(obj, "obj_open"))
+ obj = sockopt_inherit__open_and_load();
+ if (!ASSERT_OK_PTR(obj, "skel-load"))
return;
- err = bpf_object__load(obj);
- if (!ASSERT_OK(err, "obj_load"))
- goto close_bpf_object;
+ obj->bss->page_size = sysconf(_SC_PAGESIZE);
- err = prog_attach(obj, cgroup_fd, "cgroup/getsockopt", "_getsockopt");
- if (!ASSERT_OK(err, "prog_attach _getsockopt"))
+ link_getsockopt = bpf_program__attach_cgroup(obj->progs._getsockopt,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_getsockopt, "cg-attach-getsockopt"))
goto close_bpf_object;
- err = prog_attach(obj, cgroup_fd, "cgroup/setsockopt", "_setsockopt");
- if (!ASSERT_OK(err, "prog_attach _setsockopt"))
+ link_setsockopt = bpf_program__attach_cgroup(obj->progs._setsockopt,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_setsockopt, "cg-attach-setsockopt"))
goto close_bpf_object;
server_fd = start_server();
@@ -217,7 +191,10 @@ static void run_test(int cgroup_fd)
close_server_fd:
close(server_fd);
close_bpf_object:
- bpf_object__close(obj);
+ bpf_link__destroy(link_getsockopt);
+ bpf_link__destroy(link_setsockopt);
+
+ sockopt_inherit__destroy(obj);
}
void test_sockopt_inherit(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c b/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c
index 7f5659349011..759bbb6f8c5f 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c
@@ -2,61 +2,13 @@
#include <test_progs.h>
#include "cgroup_helpers.h"
-static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title, const char *name)
-{
- enum bpf_attach_type attach_type;
- enum bpf_prog_type prog_type;
- struct bpf_program *prog;
- int err;
-
- err = libbpf_prog_type_by_name(title, &prog_type, &attach_type);
- if (err) {
- log_err("Failed to deduct types for %s BPF program", title);
- return -1;
- }
-
- prog = bpf_object__find_program_by_name(obj, name);
- if (!prog) {
- log_err("Failed to find %s BPF program", name);
- return -1;
- }
-
- err = bpf_prog_attach(bpf_program__fd(prog), cgroup_fd,
- attach_type, BPF_F_ALLOW_MULTI);
- if (err) {
- log_err("Failed to attach %s BPF program", name);
- return -1;
- }
-
- return 0;
-}
+#include "sockopt_multi.skel.h"
-static int prog_detach(struct bpf_object *obj, int cgroup_fd, const char *title, const char *name)
-{
- enum bpf_attach_type attach_type;
- enum bpf_prog_type prog_type;
- struct bpf_program *prog;
- int err;
-
- err = libbpf_prog_type_by_name(title, &prog_type, &attach_type);
- if (err)
- return -1;
-
- prog = bpf_object__find_program_by_name(obj, name);
- if (!prog)
- return -1;
-
- err = bpf_prog_detach2(bpf_program__fd(prog), cgroup_fd,
- attach_type);
- if (err)
- return -1;
-
- return 0;
-}
-
-static int run_getsockopt_test(struct bpf_object *obj, int cg_parent,
+static int run_getsockopt_test(struct sockopt_multi *obj, int cg_parent,
int cg_child, int sock_fd)
{
+ struct bpf_link *link_parent = NULL;
+ struct bpf_link *link_child = NULL;
socklen_t optlen;
__u8 buf;
int err;
@@ -89,8 +41,9 @@ static int run_getsockopt_test(struct bpf_object *obj, int cg_parent,
* - child: 0x80 -> 0x90
*/
- err = prog_attach(obj, cg_child, "cgroup/getsockopt", "_getsockopt_child");
- if (err)
+ link_child = bpf_program__attach_cgroup(obj->progs._getsockopt_child,
+ cg_child);
+ if (!ASSERT_OK_PTR(link_child, "cg-attach-getsockopt_child"))
goto detach;
buf = 0x00;
@@ -113,8 +66,9 @@ static int run_getsockopt_test(struct bpf_object *obj, int cg_parent,
* - parent: 0x90 -> 0xA0
*/
- err = prog_attach(obj, cg_parent, "cgroup/getsockopt", "_getsockopt_parent");
- if (err)
+ link_parent = bpf_program__attach_cgroup(obj->progs._getsockopt_parent,
+ cg_parent);
+ if (!ASSERT_OK_PTR(link_parent, "cg-attach-getsockopt_parent"))
goto detach;
buf = 0x00;
@@ -157,11 +111,8 @@ static int run_getsockopt_test(struct bpf_object *obj, int cg_parent,
* - parent: unexpected 0x40, EPERM
*/
- err = prog_detach(obj, cg_child, "cgroup/getsockopt", "_getsockopt_child");
- if (err) {
- log_err("Failed to detach child program");
- goto detach;
- }
+ bpf_link__destroy(link_child);
+ link_child = NULL;
buf = 0x00;
optlen = 1;
@@ -198,15 +149,17 @@ static int run_getsockopt_test(struct bpf_object *obj, int cg_parent,
}
detach:
- prog_detach(obj, cg_child, "cgroup/getsockopt", "_getsockopt_child");
- prog_detach(obj, cg_parent, "cgroup/getsockopt", "_getsockopt_parent");
+ bpf_link__destroy(link_child);
+ bpf_link__destroy(link_parent);
return err;
}
-static int run_setsockopt_test(struct bpf_object *obj, int cg_parent,
+static int run_setsockopt_test(struct sockopt_multi *obj, int cg_parent,
int cg_child, int sock_fd)
{
+ struct bpf_link *link_parent = NULL;
+ struct bpf_link *link_child = NULL;
socklen_t optlen;
__u8 buf;
int err;
@@ -236,8 +189,9 @@ static int run_setsockopt_test(struct bpf_object *obj, int cg_parent,
/* Attach child program and make sure it adds 0x10. */
- err = prog_attach(obj, cg_child, "cgroup/setsockopt", "_setsockopt");
- if (err)
+ link_child = bpf_program__attach_cgroup(obj->progs._setsockopt,
+ cg_child);
+ if (!ASSERT_OK_PTR(link_child, "cg-attach-setsockopt_child"))
goto detach;
buf = 0x80;
@@ -263,8 +217,9 @@ static int run_setsockopt_test(struct bpf_object *obj, int cg_parent,
/* Attach parent program and make sure it adds another 0x10. */
- err = prog_attach(obj, cg_parent, "cgroup/setsockopt", "_setsockopt");
- if (err)
+ link_parent = bpf_program__attach_cgroup(obj->progs._setsockopt,
+ cg_parent);
+ if (!ASSERT_OK_PTR(link_parent, "cg-attach-setsockopt_parent"))
goto detach;
buf = 0x80;
@@ -289,8 +244,8 @@ static int run_setsockopt_test(struct bpf_object *obj, int cg_parent,
}
detach:
- prog_detach(obj, cg_child, "cgroup/setsockopt", "_setsockopt");
- prog_detach(obj, cg_parent, "cgroup/setsockopt", "_setsockopt");
+ bpf_link__destroy(link_child);
+ bpf_link__destroy(link_parent);
return err;
}
@@ -298,9 +253,8 @@ detach:
void test_sockopt_multi(void)
{
int cg_parent = -1, cg_child = -1;
- struct bpf_object *obj = NULL;
+ struct sockopt_multi *obj = NULL;
int sock_fd = -1;
- int err = -1;
cg_parent = test__join_cgroup("/parent");
if (!ASSERT_GE(cg_parent, 0, "join_cgroup /parent"))
@@ -310,13 +264,11 @@ void test_sockopt_multi(void)
if (!ASSERT_GE(cg_child, 0, "join_cgroup /parent/child"))
goto out;
- obj = bpf_object__open_file("sockopt_multi.bpf.o", NULL);
- if (!ASSERT_OK_PTR(obj, "obj_load"))
+ obj = sockopt_multi__open_and_load();
+ if (!ASSERT_OK_PTR(obj, "skel-load"))
goto out;
- err = bpf_object__load(obj);
- if (!ASSERT_OK(err, "obj_load"))
- goto out;
+ obj->bss->page_size = sysconf(_SC_PAGESIZE);
sock_fd = socket(AF_INET, SOCK_STREAM, 0);
if (!ASSERT_GE(sock_fd, 0, "socket"))
@@ -327,7 +279,7 @@ void test_sockopt_multi(void)
out:
close(sock_fd);
- bpf_object__close(obj);
+ sockopt_multi__destroy(obj);
close(cg_child);
close(cg_parent);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_qos_to_cc.c b/tools/testing/selftests/bpf/prog_tests/sockopt_qos_to_cc.c
index 6b53b3cb8dad..6b2d300e9fd4 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockopt_qos_to_cc.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt_qos_to_cc.c
@@ -42,6 +42,8 @@ void test_sockopt_qos_to_cc(void)
if (!ASSERT_OK_PTR(skel, "skel"))
goto done;
+ skel->bss->page_size = sysconf(_SC_PAGESIZE);
+
sock_fd = socket(AF_INET6, SOCK_STREAM, 0);
if (!ASSERT_GE(sock_fd, 0, "v6 socket open"))
goto done;
diff --git a/tools/testing/selftests/bpf/prog_tests/task_under_cgroup.c b/tools/testing/selftests/bpf/prog_tests/task_under_cgroup.c
new file mode 100644
index 000000000000..4224727fb364
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/task_under_cgroup.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Bytedance */
+
+#include <sys/syscall.h>
+#include <test_progs.h>
+#include <cgroup_helpers.h>
+#include "test_task_under_cgroup.skel.h"
+
+#define FOO "/foo"
+
+void test_task_under_cgroup(void)
+{
+ struct test_task_under_cgroup *skel;
+ int ret, foo;
+ pid_t pid;
+
+ foo = test__join_cgroup(FOO);
+ if (!ASSERT_OK(foo < 0, "cgroup_join_foo"))
+ return;
+
+ skel = test_task_under_cgroup__open();
+ if (!ASSERT_OK_PTR(skel, "test_task_under_cgroup__open"))
+ goto cleanup;
+
+ skel->rodata->local_pid = getpid();
+ skel->bss->remote_pid = getpid();
+ skel->rodata->cgid = get_cgroup_id(FOO);
+
+ ret = test_task_under_cgroup__load(skel);
+ if (!ASSERT_OK(ret, "test_task_under_cgroup__load"))
+ goto cleanup;
+
+ ret = test_task_under_cgroup__attach(skel);
+ if (!ASSERT_OK(ret, "test_task_under_cgroup__attach"))
+ goto cleanup;
+
+ pid = fork();
+ if (pid == 0)
+ exit(0);
+
+ ret = (pid == -1);
+ if (ASSERT_OK(ret, "fork process"))
+ wait(NULL);
+
+ test_task_under_cgroup__detach(skel);
+
+ ASSERT_NEQ(skel->bss->remote_pid, skel->rodata->local_pid,
+ "test task_under_cgroup");
+
+cleanup:
+ test_task_under_cgroup__destroy(skel);
+ close(foo);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c b/tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c
index 8383a99f610f..0adf8d9475cb 100644
--- a/tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c
+++ b/tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c
@@ -171,7 +171,11 @@ static void test_unpriv_bpf_disabled_negative(struct test_unpriv_bpf_disabled *s
prog_insns, prog_insn_cnt, &load_opts),
-EPERM, "prog_load_fails");
- for (i = BPF_MAP_TYPE_HASH; i <= BPF_MAP_TYPE_BLOOM_FILTER; i++)
+ /* some map types require particular correct parameters which could be
+ * sanity-checked before enforcing -EPERM, so only validate that
+ * the simple ARRAY and HASH maps are failing with -EPERM
+ */
+ for (i = BPF_MAP_TYPE_HASH; i <= BPF_MAP_TYPE_ARRAY; i++)
ASSERT_EQ(bpf_map_create(i, NULL, sizeof(int), sizeof(int), 1, NULL),
-EPERM, "map_create_fails");
diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c
index 2497716ee379..070a13833c3f 100644
--- a/tools/testing/selftests/bpf/prog_tests/verifier.c
+++ b/tools/testing/selftests/bpf/prog_tests/verifier.c
@@ -50,11 +50,13 @@
#include "verifier_regalloc.skel.h"
#include "verifier_ringbuf.skel.h"
#include "verifier_runtime_jit.skel.h"
+#include "verifier_scalar_ids.skel.h"
#include "verifier_search_pruning.skel.h"
#include "verifier_sock.skel.h"
#include "verifier_spill_fill.skel.h"
#include "verifier_spin_lock.skel.h"
#include "verifier_stack_ptr.skel.h"
+#include "verifier_subprog_precision.skel.h"
#include "verifier_subreg.skel.h"
#include "verifier_uninit.skel.h"
#include "verifier_unpriv.skel.h"
@@ -149,11 +151,13 @@ void test_verifier_ref_tracking(void) { RUN(verifier_ref_tracking); }
void test_verifier_regalloc(void) { RUN(verifier_regalloc); }
void test_verifier_ringbuf(void) { RUN(verifier_ringbuf); }
void test_verifier_runtime_jit(void) { RUN(verifier_runtime_jit); }
+void test_verifier_scalar_ids(void) { RUN(verifier_scalar_ids); }
void test_verifier_search_pruning(void) { RUN(verifier_search_pruning); }
void test_verifier_sock(void) { RUN(verifier_sock); }
void test_verifier_spill_fill(void) { RUN(verifier_spill_fill); }
void test_verifier_spin_lock(void) { RUN(verifier_spin_lock); }
void test_verifier_stack_ptr(void) { RUN(verifier_stack_ptr); }
+void test_verifier_subprog_precision(void) { RUN(verifier_subprog_precision); }
void test_verifier_subreg(void) { RUN(verifier_subreg); }
void test_verifier_uninit(void) { RUN(verifier_uninit); }
void test_verifier_unpriv(void) { RUN(verifier_unpriv); }
diff --git a/tools/testing/selftests/bpf/prog_tests/vrf_socket_lookup.c b/tools/testing/selftests/bpf/prog_tests/vrf_socket_lookup.c
new file mode 100644
index 000000000000..2a5e207edad6
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/vrf_socket_lookup.c
@@ -0,0 +1,312 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
+/*
+ * Topology:
+ * ---------
+ * NS0 namespace | NS1 namespace
+ * |
+ * +--------------+ | +--------------+
+ * | veth01 |----------| veth10 |
+ * | 172.16.1.100 | | | 172.16.1.200 |
+ * | bpf | | +--------------+
+ * +--------------+ |
+ * server(UDP/TCP) |
+ * +-------------------+ |
+ * | vrf1 | |
+ * | +--------------+ | | +--------------+
+ * | | veth02 |----------| veth20 |
+ * | | 172.16.2.100 | | | | 172.16.2.200 |
+ * | | bpf | | | +--------------+
+ * | +--------------+ | |
+ * | server(UDP/TCP) | |
+ * +-------------------+ |
+ *
+ * Test flow
+ * -----------
+ * The tests verifies that socket lookup via TC is VRF aware:
+ * 1) Creates two veth pairs between NS0 and NS1:
+ * a) veth01 <-> veth10 outside the VRF
+ * b) veth02 <-> veth20 in the VRF
+ * 2) Attaches to veth01 and veth02 a program that calls:
+ * a) bpf_skc_lookup_tcp() with TCP and tcp_skc is true
+ * b) bpf_sk_lookup_tcp() with TCP and tcp_skc is false
+ * c) bpf_sk_lookup_udp() with UDP
+ * The program stores the lookup result in bss->lookup_status.
+ * 3) Creates a socket TCP/UDP server in/outside the VRF.
+ * 4) The test expects lookup_status to be:
+ * a) 0 from device in VRF to server outside VRF
+ * b) 0 from device outside VRF to server in VRF
+ * c) 1 from device in VRF to server in VRF
+ * d) 1 from device outside VRF to server outside VRF
+ */
+
+#include <net/if.h>
+
+#include "test_progs.h"
+#include "network_helpers.h"
+#include "vrf_socket_lookup.skel.h"
+
+#define NS0 "vrf_socket_lookup_0"
+#define NS1 "vrf_socket_lookup_1"
+
+#define IP4_ADDR_VETH01 "172.16.1.100"
+#define IP4_ADDR_VETH10 "172.16.1.200"
+#define IP4_ADDR_VETH02 "172.16.2.100"
+#define IP4_ADDR_VETH20 "172.16.2.200"
+
+#define NON_VRF_PORT 5000
+#define IN_VRF_PORT 5001
+
+#define TIMEOUT_MS 3000
+
+static int make_socket(int sotype, const char *ip, int port,
+ struct sockaddr_storage *addr)
+{
+ int err, fd;
+
+ err = make_sockaddr(AF_INET, ip, port, addr, NULL);
+ if (!ASSERT_OK(err, "make_address"))
+ return -1;
+
+ fd = socket(AF_INET, sotype, 0);
+ if (!ASSERT_GE(fd, 0, "socket"))
+ return -1;
+
+ if (!ASSERT_OK(settimeo(fd, TIMEOUT_MS), "settimeo"))
+ goto fail;
+
+ return fd;
+fail:
+ close(fd);
+ return -1;
+}
+
+static int make_server(int sotype, const char *ip, int port, const char *ifname)
+{
+ int err, fd = -1;
+
+ fd = start_server(AF_INET, sotype, ip, port, TIMEOUT_MS);
+ if (!ASSERT_GE(fd, 0, "start_server"))
+ return -1;
+
+ if (ifname) {
+ err = setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE,
+ ifname, strlen(ifname) + 1);
+ if (!ASSERT_OK(err, "setsockopt(SO_BINDTODEVICE)"))
+ goto fail;
+ }
+
+ return fd;
+fail:
+ close(fd);
+ return -1;
+}
+
+static int attach_progs(char *ifname, int tc_prog_fd, int xdp_prog_fd)
+{
+ LIBBPF_OPTS(bpf_tc_hook, hook, .attach_point = BPF_TC_INGRESS);
+ LIBBPF_OPTS(bpf_tc_opts, opts, .handle = 1, .priority = 1,
+ .prog_fd = tc_prog_fd);
+ int ret, ifindex;
+
+ ifindex = if_nametoindex(ifname);
+ if (!ASSERT_NEQ(ifindex, 0, "if_nametoindex"))
+ return -1;
+ hook.ifindex = ifindex;
+
+ ret = bpf_tc_hook_create(&hook);
+ if (!ASSERT_OK(ret, "bpf_tc_hook_create"))
+ return ret;
+
+ ret = bpf_tc_attach(&hook, &opts);
+ if (!ASSERT_OK(ret, "bpf_tc_attach")) {
+ bpf_tc_hook_destroy(&hook);
+ return ret;
+ }
+ ret = bpf_xdp_attach(ifindex, xdp_prog_fd, 0, NULL);
+ if (!ASSERT_OK(ret, "bpf_xdp_attach")) {
+ bpf_tc_hook_destroy(&hook);
+ return ret;
+ }
+
+ return 0;
+}
+
+static void cleanup(void)
+{
+ SYS_NOFAIL("test -f /var/run/netns/" NS0 " && ip netns delete "
+ NS0);
+ SYS_NOFAIL("test -f /var/run/netns/" NS1 " && ip netns delete "
+ NS1);
+}
+
+static int setup(struct vrf_socket_lookup *skel)
+{
+ int tc_prog_fd, xdp_prog_fd, ret = 0;
+ struct nstoken *nstoken = NULL;
+
+ SYS(fail, "ip netns add " NS0);
+ SYS(fail, "ip netns add " NS1);
+
+ /* NS0 <-> NS1 [veth01 <-> veth10] */
+ SYS(fail, "ip link add veth01 netns " NS0 " type veth peer name veth10"
+ " netns " NS1);
+ SYS(fail, "ip -net " NS0 " addr add " IP4_ADDR_VETH01 "/24 dev veth01");
+ SYS(fail, "ip -net " NS0 " link set dev veth01 up");
+ SYS(fail, "ip -net " NS1 " addr add " IP4_ADDR_VETH10 "/24 dev veth10");
+ SYS(fail, "ip -net " NS1 " link set dev veth10 up");
+
+ /* NS0 <-> NS1 [veth02 <-> veth20] */
+ SYS(fail, "ip link add veth02 netns " NS0 " type veth peer name veth20"
+ " netns " NS1);
+ SYS(fail, "ip -net " NS0 " addr add " IP4_ADDR_VETH02 "/24 dev veth02");
+ SYS(fail, "ip -net " NS0 " link set dev veth02 up");
+ SYS(fail, "ip -net " NS1 " addr add " IP4_ADDR_VETH20 "/24 dev veth20");
+ SYS(fail, "ip -net " NS1 " link set dev veth20 up");
+
+ /* veth02 -> vrf1 */
+ SYS(fail, "ip -net " NS0 " link add vrf1 type vrf table 11");
+ SYS(fail, "ip -net " NS0 " route add vrf vrf1 unreachable default"
+ " metric 4278198272");
+ SYS(fail, "ip -net " NS0 " link set vrf1 alias vrf");
+ SYS(fail, "ip -net " NS0 " link set vrf1 up");
+ SYS(fail, "ip -net " NS0 " link set veth02 master vrf1");
+
+ /* Attach TC and XDP progs to veth devices in NS0 */
+ nstoken = open_netns(NS0);
+ if (!ASSERT_OK_PTR(nstoken, "setns " NS0))
+ goto fail;
+ tc_prog_fd = bpf_program__fd(skel->progs.tc_socket_lookup);
+ if (!ASSERT_GE(tc_prog_fd, 0, "bpf_program__tc_fd"))
+ goto fail;
+ xdp_prog_fd = bpf_program__fd(skel->progs.xdp_socket_lookup);
+ if (!ASSERT_GE(xdp_prog_fd, 0, "bpf_program__xdp_fd"))
+ goto fail;
+
+ if (attach_progs("veth01", tc_prog_fd, xdp_prog_fd))
+ goto fail;
+
+ if (attach_progs("veth02", tc_prog_fd, xdp_prog_fd))
+ goto fail;
+
+ goto close;
+fail:
+ ret = -1;
+close:
+ if (nstoken)
+ close_netns(nstoken);
+ return ret;
+}
+
+static int test_lookup(struct vrf_socket_lookup *skel, int sotype,
+ const char *ip, int port, bool test_xdp, bool tcp_skc,
+ int lookup_status_exp)
+{
+ static const char msg[] = "Hello Server";
+ struct sockaddr_storage addr = {};
+ int fd, ret = 0;
+
+ fd = make_socket(sotype, ip, port, &addr);
+ if (fd < 0)
+ return -1;
+
+ skel->bss->test_xdp = test_xdp;
+ skel->bss->tcp_skc = tcp_skc;
+ skel->bss->lookup_status = -1;
+
+ if (sotype == SOCK_STREAM)
+ connect(fd, (void *)&addr, sizeof(struct sockaddr_in));
+ else
+ sendto(fd, msg, sizeof(msg), 0, (void *)&addr,
+ sizeof(struct sockaddr_in));
+
+ if (!ASSERT_EQ(skel->bss->lookup_status, lookup_status_exp,
+ "lookup_status"))
+ goto fail;
+
+ goto close;
+
+fail:
+ ret = -1;
+close:
+ close(fd);
+ return ret;
+}
+
+static void _test_vrf_socket_lookup(struct vrf_socket_lookup *skel, int sotype,
+ bool test_xdp, bool tcp_skc)
+{
+ int in_vrf_server = -1, non_vrf_server = -1;
+ struct nstoken *nstoken = NULL;
+
+ nstoken = open_netns(NS0);
+ if (!ASSERT_OK_PTR(nstoken, "setns " NS0))
+ goto done;
+
+ /* Open sockets in and outside VRF */
+ non_vrf_server = make_server(sotype, "0.0.0.0", NON_VRF_PORT, NULL);
+ if (!ASSERT_GE(non_vrf_server, 0, "make_server__outside_vrf_fd"))
+ goto done;
+
+ in_vrf_server = make_server(sotype, "0.0.0.0", IN_VRF_PORT, "veth02");
+ if (!ASSERT_GE(in_vrf_server, 0, "make_server__in_vrf_fd"))
+ goto done;
+
+ /* Perform test from NS1 */
+ close_netns(nstoken);
+ nstoken = open_netns(NS1);
+ if (!ASSERT_OK_PTR(nstoken, "setns " NS1))
+ goto done;
+
+ if (!ASSERT_OK(test_lookup(skel, sotype, IP4_ADDR_VETH02, NON_VRF_PORT,
+ test_xdp, tcp_skc, 0), "in_to_out"))
+ goto done;
+ if (!ASSERT_OK(test_lookup(skel, sotype, IP4_ADDR_VETH02, IN_VRF_PORT,
+ test_xdp, tcp_skc, 1), "in_to_in"))
+ goto done;
+ if (!ASSERT_OK(test_lookup(skel, sotype, IP4_ADDR_VETH01, NON_VRF_PORT,
+ test_xdp, tcp_skc, 1), "out_to_out"))
+ goto done;
+ if (!ASSERT_OK(test_lookup(skel, sotype, IP4_ADDR_VETH01, IN_VRF_PORT,
+ test_xdp, tcp_skc, 0), "out_to_in"))
+ goto done;
+
+done:
+ if (non_vrf_server >= 0)
+ close(non_vrf_server);
+ if (in_vrf_server >= 0)
+ close(in_vrf_server);
+ if (nstoken)
+ close_netns(nstoken);
+}
+
+void test_vrf_socket_lookup(void)
+{
+ struct vrf_socket_lookup *skel;
+
+ cleanup();
+
+ skel = vrf_socket_lookup__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "vrf_socket_lookup__open_and_load"))
+ return;
+
+ if (!ASSERT_OK(setup(skel), "setup"))
+ goto done;
+
+ if (test__start_subtest("tc_socket_lookup_tcp"))
+ _test_vrf_socket_lookup(skel, SOCK_STREAM, false, false);
+ if (test__start_subtest("tc_socket_lookup_tcp_skc"))
+ _test_vrf_socket_lookup(skel, SOCK_STREAM, false, false);
+ if (test__start_subtest("tc_socket_lookup_udp"))
+ _test_vrf_socket_lookup(skel, SOCK_STREAM, false, false);
+ if (test__start_subtest("xdp_socket_lookup_tcp"))
+ _test_vrf_socket_lookup(skel, SOCK_STREAM, true, false);
+ if (test__start_subtest("xdp_socket_lookup_tcp_skc"))
+ _test_vrf_socket_lookup(skel, SOCK_STREAM, true, false);
+ if (test__start_subtest("xdp_socket_lookup_udp"))
+ _test_vrf_socket_lookup(skel, SOCK_STREAM, true, false);
+
+done:
+ vrf_socket_lookup__destroy(skel);
+ cleanup();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c
index d19f79048ff6..c3b45745cbcc 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c
@@ -18,6 +18,7 @@
#include <linux/if_bonding.h>
#include <linux/limits.h>
#include <linux/udp.h>
+#include <uapi/linux/netdev.h>
#include "xdp_dummy.skel.h"
#include "xdp_redirect_multi_kern.skel.h"
@@ -492,6 +493,123 @@ out:
system("ip link del bond_nest2");
}
+static void test_xdp_bonding_features(struct skeletons *skeletons)
+{
+ LIBBPF_OPTS(bpf_xdp_query_opts, query_opts);
+ int bond_idx, veth1_idx, err;
+ struct bpf_link *link = NULL;
+
+ if (!ASSERT_OK(system("ip link add bond type bond"), "add bond"))
+ goto out;
+
+ bond_idx = if_nametoindex("bond");
+ if (!ASSERT_GE(bond_idx, 0, "if_nametoindex bond"))
+ goto out;
+
+ /* query default xdp-feature for bond device */
+ err = bpf_xdp_query(bond_idx, XDP_FLAGS_DRV_MODE, &query_opts);
+ if (!ASSERT_OK(err, "bond bpf_xdp_query"))
+ goto out;
+
+ if (!ASSERT_EQ(query_opts.feature_flags, NETDEV_XDP_ACT_MASK,
+ "bond query_opts.feature_flags"))
+ goto out;
+
+ if (!ASSERT_OK(system("ip link add veth0 type veth peer name veth1"),
+ "add veth{0,1} pair"))
+ goto out;
+
+ if (!ASSERT_OK(system("ip link add veth2 type veth peer name veth3"),
+ "add veth{2,3} pair"))
+ goto out;
+
+ if (!ASSERT_OK(system("ip link set veth0 master bond"),
+ "add veth0 to master bond"))
+ goto out;
+
+ /* xdp-feature for bond device should be obtained from the single slave
+ * device (veth0)
+ */
+ err = bpf_xdp_query(bond_idx, XDP_FLAGS_DRV_MODE, &query_opts);
+ if (!ASSERT_OK(err, "bond bpf_xdp_query"))
+ goto out;
+
+ if (!ASSERT_EQ(query_opts.feature_flags,
+ NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
+ NETDEV_XDP_ACT_RX_SG,
+ "bond query_opts.feature_flags"))
+ goto out;
+
+ veth1_idx = if_nametoindex("veth1");
+ if (!ASSERT_GE(veth1_idx, 0, "if_nametoindex veth1"))
+ goto out;
+
+ link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog,
+ veth1_idx);
+ if (!ASSERT_OK_PTR(link, "attach program to veth1"))
+ goto out;
+
+ /* xdp-feature for veth0 are changed */
+ err = bpf_xdp_query(bond_idx, XDP_FLAGS_DRV_MODE, &query_opts);
+ if (!ASSERT_OK(err, "bond bpf_xdp_query"))
+ goto out;
+
+ if (!ASSERT_EQ(query_opts.feature_flags,
+ NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
+ NETDEV_XDP_ACT_RX_SG | NETDEV_XDP_ACT_NDO_XMIT |
+ NETDEV_XDP_ACT_NDO_XMIT_SG,
+ "bond query_opts.feature_flags"))
+ goto out;
+
+ if (!ASSERT_OK(system("ip link set veth2 master bond"),
+ "add veth2 to master bond"))
+ goto out;
+
+ err = bpf_xdp_query(bond_idx, XDP_FLAGS_DRV_MODE, &query_opts);
+ if (!ASSERT_OK(err, "bond bpf_xdp_query"))
+ goto out;
+
+ /* xdp-feature for bond device should be set to the most restrict
+ * value obtained from attached slave devices (veth0 and veth2)
+ */
+ if (!ASSERT_EQ(query_opts.feature_flags,
+ NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
+ NETDEV_XDP_ACT_RX_SG,
+ "bond query_opts.feature_flags"))
+ goto out;
+
+ if (!ASSERT_OK(system("ip link set veth2 nomaster"),
+ "del veth2 to master bond"))
+ goto out;
+
+ err = bpf_xdp_query(bond_idx, XDP_FLAGS_DRV_MODE, &query_opts);
+ if (!ASSERT_OK(err, "bond bpf_xdp_query"))
+ goto out;
+
+ if (!ASSERT_EQ(query_opts.feature_flags,
+ NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
+ NETDEV_XDP_ACT_RX_SG | NETDEV_XDP_ACT_NDO_XMIT |
+ NETDEV_XDP_ACT_NDO_XMIT_SG,
+ "bond query_opts.feature_flags"))
+ goto out;
+
+ if (!ASSERT_OK(system("ip link set veth0 nomaster"),
+ "del veth0 to master bond"))
+ goto out;
+
+ err = bpf_xdp_query(bond_idx, XDP_FLAGS_DRV_MODE, &query_opts);
+ if (!ASSERT_OK(err, "bond bpf_xdp_query"))
+ goto out;
+
+ ASSERT_EQ(query_opts.feature_flags, NETDEV_XDP_ACT_MASK,
+ "bond query_opts.feature_flags");
+out:
+ bpf_link__destroy(link);
+ system("ip link del veth0");
+ system("ip link del veth2");
+ system("ip link del bond");
+}
+
static int libbpf_debug_print(enum libbpf_print_level level,
const char *format, va_list args)
{
@@ -546,6 +664,9 @@ void serial_test_xdp_bonding(void)
if (test__start_subtest("xdp_bonding_nested"))
test_xdp_bonding_nested(&skeletons);
+ if (test__start_subtest("xdp_bonding_features"))
+ test_xdp_bonding_features(&skeletons);
+
for (i = 0; i < ARRAY_SIZE(bond_test_cases); i++) {
struct bond_test_case *test_case = &bond_test_cases[i];
diff --git a/tools/testing/selftests/bpf/progs/async_stack_depth.c b/tools/testing/selftests/bpf/progs/async_stack_depth.c
new file mode 100644
index 000000000000..3517c0e01206
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/async_stack_depth.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+#include "bpf_misc.h"
+
+struct hmap_elem {
+ struct bpf_timer timer;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 64);
+ __type(key, int);
+ __type(value, struct hmap_elem);
+} hmap SEC(".maps");
+
+__attribute__((noinline))
+static int timer_cb(void *map, int *key, struct bpf_timer *timer)
+{
+ volatile char buf[256] = {};
+ return buf[69];
+}
+
+__attribute__((noinline))
+static int bad_timer_cb(void *map, int *key, struct bpf_timer *timer)
+{
+ volatile char buf[300] = {};
+ return buf[255] + timer_cb(NULL, NULL, NULL);
+}
+
+SEC("tc")
+__failure __msg("combined stack size of 2 calls is 576. Too large")
+int pseudo_call_check(struct __sk_buff *ctx)
+{
+ struct hmap_elem *elem;
+ volatile char buf[256] = {};
+
+ elem = bpf_map_lookup_elem(&hmap, &(int){0});
+ if (!elem)
+ return 0;
+
+ timer_cb(NULL, NULL, NULL);
+ return bpf_timer_set_callback(&elem->timer, timer_cb) + buf[0];
+}
+
+SEC("tc")
+__failure __msg("combined stack size of 2 calls is 608. Too large")
+int async_call_root_check(struct __sk_buff *ctx)
+{
+ struct hmap_elem *elem;
+ volatile char buf[256] = {};
+
+ elem = bpf_map_lookup_elem(&hmap, &(int){0});
+ if (!elem)
+ return 0;
+
+ return bpf_timer_set_callback(&elem->timer, bad_timer_cb) + buf[0];
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c b/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c
index 5ddcc46fd886..521267818f4d 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c
@@ -59,9 +59,7 @@ int dump_ksym(struct bpf_iter__ksym *ctx)
} else {
BPF_SEQ_PRINTF(seq, "0x%llx %c %s ", value, type, iter->name);
}
- if (!iter->pos_arch_end || iter->pos_arch_end > iter->pos)
- BPF_SEQ_PRINTF(seq, "CORE ");
- else if (!iter->pos_mod_end || iter->pos_mod_end > iter->pos)
+ if (!iter->pos_mod_end || iter->pos_mod_end > iter->pos)
BPF_SEQ_PRINTF(seq, "MOD ");
else if (!iter->pos_ftrace_mod_end || iter->pos_ftrace_mod_end > iter->pos)
BPF_SEQ_PRINTF(seq, "FTRACE_MOD ");
diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h
index d3c1217ba79a..38a57a2e70db 100644
--- a/tools/testing/selftests/bpf/progs/bpf_misc.h
+++ b/tools/testing/selftests/bpf/progs/bpf_misc.h
@@ -86,6 +86,10 @@
#define POINTER_VALUE 0xcafe4all
#define TEST_DATA_LEN 64
+#ifndef __used
+#define __used __attribute__((used))
+#endif
+
#if defined(__TARGET_ARCH_x86)
#define SYSCALL_WRAPPER 1
#define SYS_PREFIX "__x64_"
diff --git a/tools/testing/selftests/bpf/progs/cb_refs.c b/tools/testing/selftests/bpf/progs/cb_refs.c
index 50f95ec61165..76d661b20e87 100644
--- a/tools/testing/selftests/bpf/progs/cb_refs.c
+++ b/tools/testing/selftests/bpf/progs/cb_refs.c
@@ -2,6 +2,7 @@
#include <vmlinux.h>
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_helpers.h>
+#include "../bpf_testmod/bpf_testmod_kfunc.h"
struct map_value {
struct prog_test_ref_kfunc __kptr *ptr;
@@ -14,9 +15,6 @@ struct {
__uint(max_entries, 16);
} array_map SEC(".maps");
-extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym;
-extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym;
-
static __noinline int cb1(void *map, void *key, void *value, void *ctx)
{
void *p = *(void **)ctx;
diff --git a/tools/testing/selftests/bpf/progs/cgroup_getset_retval_getsockopt.c b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_getsockopt.c
index b2a409e6382a..932b8ecd4ae3 100644
--- a/tools/testing/selftests/bpf/progs/cgroup_getset_retval_getsockopt.c
+++ b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_getsockopt.c
@@ -12,6 +12,7 @@ __u32 invocations = 0;
__u32 assertion_error = 0;
__u32 retval_value = 0;
__u32 ctx_retval_value = 0;
+__u32 page_size = 0;
SEC("cgroup/getsockopt")
int get_retval(struct bpf_sockopt *ctx)
@@ -20,6 +21,10 @@ int get_retval(struct bpf_sockopt *ctx)
ctx_retval_value = ctx->retval;
__sync_fetch_and_add(&invocations, 1);
+ /* optval larger than PAGE_SIZE use kernel's buffer. */
+ if (ctx->optlen > page_size)
+ ctx->optlen = 0;
+
return 1;
}
@@ -31,6 +36,10 @@ int set_eisconn(struct bpf_sockopt *ctx)
if (bpf_set_retval(-EISCONN))
assertion_error = 1;
+ /* optval larger than PAGE_SIZE use kernel's buffer. */
+ if (ctx->optlen > page_size)
+ ctx->optlen = 0;
+
return 1;
}
@@ -41,5 +50,9 @@ int clear_retval(struct bpf_sockopt *ctx)
ctx->retval = 0;
+ /* optval larger than PAGE_SIZE use kernel's buffer. */
+ if (ctx->optlen > page_size)
+ ctx->optlen = 0;
+
return 1;
}
diff --git a/tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c
index d6e5903e06ba..b7fa8804e19d 100644
--- a/tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c
+++ b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c
@@ -11,6 +11,7 @@
__u32 invocations = 0;
__u32 assertion_error = 0;
__u32 retval_value = 0;
+__u32 page_size = 0;
SEC("cgroup/setsockopt")
int get_retval(struct bpf_sockopt *ctx)
@@ -18,6 +19,10 @@ int get_retval(struct bpf_sockopt *ctx)
retval_value = bpf_get_retval();
__sync_fetch_and_add(&invocations, 1);
+ /* optval larger than PAGE_SIZE use kernel's buffer. */
+ if (ctx->optlen > page_size)
+ ctx->optlen = 0;
+
return 1;
}
@@ -29,6 +34,10 @@ int set_eunatch(struct bpf_sockopt *ctx)
if (bpf_set_retval(-EUNATCH))
assertion_error = 1;
+ /* optval larger than PAGE_SIZE use kernel's buffer. */
+ if (ctx->optlen > page_size)
+ ctx->optlen = 0;
+
return 0;
}
@@ -40,6 +49,10 @@ int set_eisconn(struct bpf_sockopt *ctx)
if (bpf_set_retval(-EISCONN))
assertion_error = 1;
+ /* optval larger than PAGE_SIZE use kernel's buffer. */
+ if (ctx->optlen > page_size)
+ ctx->optlen = 0;
+
return 0;
}
@@ -48,5 +61,9 @@ int legacy_eperm(struct bpf_sockopt *ctx)
{
__sync_fetch_and_add(&invocations, 1);
+ /* optval larger than PAGE_SIZE use kernel's buffer. */
+ if (ctx->optlen > page_size)
+ ctx->optlen = 0;
+
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/cpumask_common.h b/tools/testing/selftests/bpf/progs/cpumask_common.h
index 0c5b785a93e4..b15c588ace15 100644
--- a/tools/testing/selftests/bpf/progs/cpumask_common.h
+++ b/tools/testing/selftests/bpf/progs/cpumask_common.h
@@ -28,6 +28,8 @@ void bpf_cpumask_release(struct bpf_cpumask *cpumask) __ksym;
struct bpf_cpumask *bpf_cpumask_acquire(struct bpf_cpumask *cpumask) __ksym;
u32 bpf_cpumask_first(const struct cpumask *cpumask) __ksym;
u32 bpf_cpumask_first_zero(const struct cpumask *cpumask) __ksym;
+u32 bpf_cpumask_first_and(const struct cpumask *src1,
+ const struct cpumask *src2) __ksym;
void bpf_cpumask_set_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym;
void bpf_cpumask_clear_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym;
bool bpf_cpumask_test_cpu(u32 cpu, const struct cpumask *cpumask) __ksym;
@@ -50,8 +52,8 @@ bool bpf_cpumask_subset(const struct cpumask *src1, const struct cpumask *src2)
bool bpf_cpumask_empty(const struct cpumask *cpumask) __ksym;
bool bpf_cpumask_full(const struct cpumask *cpumask) __ksym;
void bpf_cpumask_copy(struct bpf_cpumask *dst, const struct cpumask *src) __ksym;
-u32 bpf_cpumask_any(const struct cpumask *src) __ksym;
-u32 bpf_cpumask_any_and(const struct cpumask *src1, const struct cpumask *src2) __ksym;
+u32 bpf_cpumask_any_distribute(const struct cpumask *src) __ksym;
+u32 bpf_cpumask_any_and_distribute(const struct cpumask *src1, const struct cpumask *src2) __ksym;
void bpf_rcu_read_lock(void) __ksym;
void bpf_rcu_read_unlock(void) __ksym;
diff --git a/tools/testing/selftests/bpf/progs/cpumask_success.c b/tools/testing/selftests/bpf/progs/cpumask_success.c
index 2fcdd7f68ac7..674a63424dee 100644
--- a/tools/testing/selftests/bpf/progs/cpumask_success.c
+++ b/tools/testing/selftests/bpf/progs/cpumask_success.c
@@ -5,6 +5,7 @@
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
#include "cpumask_common.h"
char _license[] SEC("license") = "GPL";
@@ -175,6 +176,38 @@ release_exit:
}
SEC("tp_btf/task_newtask")
+int BPF_PROG(test_firstand_nocpu, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *mask1, *mask2;
+ u32 first;
+
+ if (!is_test_task())
+ return 0;
+
+ mask1 = create_cpumask();
+ if (!mask1)
+ return 0;
+
+ mask2 = create_cpumask();
+ if (!mask2)
+ goto release_exit;
+
+ bpf_cpumask_set_cpu(0, mask1);
+ bpf_cpumask_set_cpu(1, mask2);
+
+ first = bpf_cpumask_first_and(cast(mask1), cast(mask2));
+ if (first <= 1)
+ err = 3;
+
+release_exit:
+ if (mask1)
+ bpf_cpumask_release(mask1);
+ if (mask2)
+ bpf_cpumask_release(mask2);
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
int BPF_PROG(test_test_and_set_clear, struct task_struct *task, u64 clone_flags)
{
struct bpf_cpumask *cpumask;
@@ -311,13 +344,13 @@ int BPF_PROG(test_copy_any_anyand, struct task_struct *task, u64 clone_flags)
bpf_cpumask_set_cpu(1, mask2);
bpf_cpumask_or(dst1, cast(mask1), cast(mask2));
- cpu = bpf_cpumask_any(cast(mask1));
+ cpu = bpf_cpumask_any_distribute(cast(mask1));
if (cpu != 0) {
err = 6;
goto release_exit;
}
- cpu = bpf_cpumask_any(cast(dst2));
+ cpu = bpf_cpumask_any_distribute(cast(dst2));
if (cpu < nr_cpus) {
err = 7;
goto release_exit;
@@ -329,13 +362,13 @@ int BPF_PROG(test_copy_any_anyand, struct task_struct *task, u64 clone_flags)
goto release_exit;
}
- cpu = bpf_cpumask_any(cast(dst2));
+ cpu = bpf_cpumask_any_distribute(cast(dst2));
if (cpu > 1) {
err = 9;
goto release_exit;
}
- cpu = bpf_cpumask_any_and(cast(mask1), cast(mask2));
+ cpu = bpf_cpumask_any_and_distribute(cast(mask1), cast(mask2));
if (cpu < nr_cpus) {
err = 10;
goto release_exit;
@@ -426,3 +459,26 @@ int BPF_PROG(test_global_mask_rcu, struct task_struct *task, u64 clone_flags)
return 0;
}
+
+SEC("tp_btf/task_newtask")
+__success
+int BPF_PROG(test_refcount_null_tracking, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *mask1, *mask2;
+
+ mask1 = bpf_cpumask_create();
+ mask2 = bpf_cpumask_create();
+
+ if (!mask1 || !mask2)
+ goto free_masks_return;
+
+ bpf_cpumask_test_cpu(0, (const struct cpumask *)mask1);
+ bpf_cpumask_test_cpu(0, (const struct cpumask *)mask2);
+
+free_masks_return:
+ if (mask1)
+ bpf_cpumask_release(mask1);
+ if (mask2)
+ bpf_cpumask_release(mask2);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c
index 759eb5c245cd..7ce7e827d5f0 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c
@@ -3,6 +3,7 @@
#include <errno.h>
#include <string.h>
+#include <stdbool.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <linux/if_ether.h>
@@ -1378,3 +1379,310 @@ int invalid_slice_rdwr_rdonly(struct __sk_buff *skb)
return 0;
}
+
+/* bpf_dynptr_adjust can only be called on initialized dynptrs */
+SEC("?raw_tp")
+__failure __msg("Expected an initialized dynptr as arg #1")
+int dynptr_adjust_invalid(void *ctx)
+{
+ struct bpf_dynptr ptr;
+
+ /* this should fail */
+ bpf_dynptr_adjust(&ptr, 1, 2);
+
+ return 0;
+}
+
+/* bpf_dynptr_is_null can only be called on initialized dynptrs */
+SEC("?raw_tp")
+__failure __msg("Expected an initialized dynptr as arg #1")
+int dynptr_is_null_invalid(void *ctx)
+{
+ struct bpf_dynptr ptr;
+
+ /* this should fail */
+ bpf_dynptr_is_null(&ptr);
+
+ return 0;
+}
+
+/* bpf_dynptr_is_rdonly can only be called on initialized dynptrs */
+SEC("?raw_tp")
+__failure __msg("Expected an initialized dynptr as arg #1")
+int dynptr_is_rdonly_invalid(void *ctx)
+{
+ struct bpf_dynptr ptr;
+
+ /* this should fail */
+ bpf_dynptr_is_rdonly(&ptr);
+
+ return 0;
+}
+
+/* bpf_dynptr_size can only be called on initialized dynptrs */
+SEC("?raw_tp")
+__failure __msg("Expected an initialized dynptr as arg #1")
+int dynptr_size_invalid(void *ctx)
+{
+ struct bpf_dynptr ptr;
+
+ /* this should fail */
+ bpf_dynptr_size(&ptr);
+
+ return 0;
+}
+
+/* Only initialized dynptrs can be cloned */
+SEC("?raw_tp")
+__failure __msg("Expected an initialized dynptr as arg #1")
+int clone_invalid1(void *ctx)
+{
+ struct bpf_dynptr ptr1;
+ struct bpf_dynptr ptr2;
+
+ /* this should fail */
+ bpf_dynptr_clone(&ptr1, &ptr2);
+
+ return 0;
+}
+
+/* Can't overwrite an existing dynptr when cloning */
+SEC("?xdp")
+__failure __msg("cannot overwrite referenced dynptr")
+int clone_invalid2(struct xdp_md *xdp)
+{
+ struct bpf_dynptr ptr1;
+ struct bpf_dynptr clone;
+
+ bpf_dynptr_from_xdp(xdp, 0, &ptr1);
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &clone);
+
+ /* this should fail */
+ bpf_dynptr_clone(&ptr1, &clone);
+
+ bpf_ringbuf_submit_dynptr(&clone, 0);
+
+ return 0;
+}
+
+/* Invalidating a dynptr should invalidate its clones */
+SEC("?raw_tp")
+__failure __msg("Expected an initialized dynptr as arg #3")
+int clone_invalidate1(void *ctx)
+{
+ struct bpf_dynptr clone;
+ struct bpf_dynptr ptr;
+ char read_data[64];
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr);
+
+ bpf_dynptr_clone(&ptr, &clone);
+
+ bpf_ringbuf_submit_dynptr(&ptr, 0);
+
+ /* this should fail */
+ bpf_dynptr_read(read_data, sizeof(read_data), &clone, 0, 0);
+
+ return 0;
+}
+
+/* Invalidating a dynptr should invalidate its parent */
+SEC("?raw_tp")
+__failure __msg("Expected an initialized dynptr as arg #3")
+int clone_invalidate2(void *ctx)
+{
+ struct bpf_dynptr ptr;
+ struct bpf_dynptr clone;
+ char read_data[64];
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr);
+
+ bpf_dynptr_clone(&ptr, &clone);
+
+ bpf_ringbuf_submit_dynptr(&clone, 0);
+
+ /* this should fail */
+ bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0, 0);
+
+ return 0;
+}
+
+/* Invalidating a dynptr should invalidate its siblings */
+SEC("?raw_tp")
+__failure __msg("Expected an initialized dynptr as arg #3")
+int clone_invalidate3(void *ctx)
+{
+ struct bpf_dynptr ptr;
+ struct bpf_dynptr clone1;
+ struct bpf_dynptr clone2;
+ char read_data[64];
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr);
+
+ bpf_dynptr_clone(&ptr, &clone1);
+
+ bpf_dynptr_clone(&ptr, &clone2);
+
+ bpf_ringbuf_submit_dynptr(&clone2, 0);
+
+ /* this should fail */
+ bpf_dynptr_read(read_data, sizeof(read_data), &clone1, 0, 0);
+
+ return 0;
+}
+
+/* Invalidating a dynptr should invalidate any data slices
+ * of its clones
+ */
+SEC("?raw_tp")
+__failure __msg("invalid mem access 'scalar'")
+int clone_invalidate4(void *ctx)
+{
+ struct bpf_dynptr ptr;
+ struct bpf_dynptr clone;
+ int *data;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr);
+
+ bpf_dynptr_clone(&ptr, &clone);
+ data = bpf_dynptr_data(&clone, 0, sizeof(val));
+ if (!data)
+ return 0;
+
+ bpf_ringbuf_submit_dynptr(&ptr, 0);
+
+ /* this should fail */
+ *data = 123;
+
+ return 0;
+}
+
+/* Invalidating a dynptr should invalidate any data slices
+ * of its parent
+ */
+SEC("?raw_tp")
+__failure __msg("invalid mem access 'scalar'")
+int clone_invalidate5(void *ctx)
+{
+ struct bpf_dynptr ptr;
+ struct bpf_dynptr clone;
+ int *data;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr);
+ data = bpf_dynptr_data(&ptr, 0, sizeof(val));
+ if (!data)
+ return 0;
+
+ bpf_dynptr_clone(&ptr, &clone);
+
+ bpf_ringbuf_submit_dynptr(&clone, 0);
+
+ /* this should fail */
+ *data = 123;
+
+ return 0;
+}
+
+/* Invalidating a dynptr should invalidate any data slices
+ * of its sibling
+ */
+SEC("?raw_tp")
+__failure __msg("invalid mem access 'scalar'")
+int clone_invalidate6(void *ctx)
+{
+ struct bpf_dynptr ptr;
+ struct bpf_dynptr clone1;
+ struct bpf_dynptr clone2;
+ int *data;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr);
+
+ bpf_dynptr_clone(&ptr, &clone1);
+
+ bpf_dynptr_clone(&ptr, &clone2);
+
+ data = bpf_dynptr_data(&clone1, 0, sizeof(val));
+ if (!data)
+ return 0;
+
+ bpf_ringbuf_submit_dynptr(&clone2, 0);
+
+ /* this should fail */
+ *data = 123;
+
+ return 0;
+}
+
+/* A skb clone's data slices should be invalid anytime packet data changes */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int clone_skb_packet_data(struct __sk_buff *skb)
+{
+ char buffer[sizeof(__u32)] = {};
+ struct bpf_dynptr clone;
+ struct bpf_dynptr ptr;
+ __u32 *data;
+
+ bpf_dynptr_from_skb(skb, 0, &ptr);
+
+ bpf_dynptr_clone(&ptr, &clone);
+ data = bpf_dynptr_slice_rdwr(&clone, 0, buffer, sizeof(buffer));
+ if (!data)
+ return XDP_DROP;
+
+ if (bpf_skb_pull_data(skb, skb->len))
+ return SK_DROP;
+
+ /* this should fail */
+ *data = 123;
+
+ return 0;
+}
+
+/* A xdp clone's data slices should be invalid anytime packet data changes */
+SEC("?xdp")
+__failure __msg("invalid mem access 'scalar'")
+int clone_xdp_packet_data(struct xdp_md *xdp)
+{
+ char buffer[sizeof(__u32)] = {};
+ struct bpf_dynptr clone;
+ struct bpf_dynptr ptr;
+ struct ethhdr *hdr;
+ __u32 *data;
+
+ bpf_dynptr_from_xdp(xdp, 0, &ptr);
+
+ bpf_dynptr_clone(&ptr, &clone);
+ data = bpf_dynptr_slice_rdwr(&clone, 0, buffer, sizeof(buffer));
+ if (!data)
+ return XDP_DROP;
+
+ if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(*hdr)))
+ return XDP_DROP;
+
+ /* this should fail */
+ *data = 123;
+
+ return 0;
+}
+
+/* Buffers that are provided must be sufficiently long */
+SEC("?cgroup_skb/egress")
+__failure __msg("memory, len pair leads to invalid memory access")
+int test_dynptr_skb_small_buff(struct __sk_buff *skb)
+{
+ struct bpf_dynptr ptr;
+ char buffer[8] = {};
+ __u64 *data;
+
+ if (bpf_dynptr_from_skb(skb, 0, &ptr)) {
+ err = 1;
+ return 1;
+ }
+
+ /* This may return NULL. SKB may require a buffer */
+ data = bpf_dynptr_slice(&ptr, 0, buffer, 9);
+
+ return !!data;
+}
diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c
index b2fa6c47ecc0..5985920d162e 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_success.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_success.c
@@ -2,6 +2,7 @@
/* Copyright (c) 2022 Facebook */
#include <string.h>
+#include <stdbool.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
@@ -207,3 +208,339 @@ int test_dynptr_skb_data(struct __sk_buff *skb)
return 1;
}
+
+SEC("tp/syscalls/sys_enter_nanosleep")
+int test_adjust(void *ctx)
+{
+ struct bpf_dynptr ptr;
+ __u32 bytes = 64;
+ __u32 off = 10;
+ __u32 trim = 15;
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ err = bpf_ringbuf_reserve_dynptr(&ringbuf, bytes, 0, &ptr);
+ if (err) {
+ err = 1;
+ goto done;
+ }
+
+ if (bpf_dynptr_size(&ptr) != bytes) {
+ err = 2;
+ goto done;
+ }
+
+ /* Advance the dynptr by off */
+ err = bpf_dynptr_adjust(&ptr, off, bpf_dynptr_size(&ptr));
+ if (err) {
+ err = 3;
+ goto done;
+ }
+
+ if (bpf_dynptr_size(&ptr) != bytes - off) {
+ err = 4;
+ goto done;
+ }
+
+ /* Trim the dynptr */
+ err = bpf_dynptr_adjust(&ptr, off, 15);
+ if (err) {
+ err = 5;
+ goto done;
+ }
+
+ /* Check that the size was adjusted correctly */
+ if (bpf_dynptr_size(&ptr) != trim - off) {
+ err = 6;
+ goto done;
+ }
+
+done:
+ bpf_ringbuf_discard_dynptr(&ptr, 0);
+ return 0;
+}
+
+SEC("tp/syscalls/sys_enter_nanosleep")
+int test_adjust_err(void *ctx)
+{
+ char write_data[45] = "hello there, world!!";
+ struct bpf_dynptr ptr;
+ __u32 size = 64;
+ __u32 off = 20;
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ if (bpf_ringbuf_reserve_dynptr(&ringbuf, size, 0, &ptr)) {
+ err = 1;
+ goto done;
+ }
+
+ /* Check that start can't be greater than end */
+ if (bpf_dynptr_adjust(&ptr, 5, 1) != -EINVAL) {
+ err = 2;
+ goto done;
+ }
+
+ /* Check that start can't be greater than size */
+ if (bpf_dynptr_adjust(&ptr, size + 1, size + 1) != -ERANGE) {
+ err = 3;
+ goto done;
+ }
+
+ /* Check that end can't be greater than size */
+ if (bpf_dynptr_adjust(&ptr, 0, size + 1) != -ERANGE) {
+ err = 4;
+ goto done;
+ }
+
+ if (bpf_dynptr_adjust(&ptr, off, size)) {
+ err = 5;
+ goto done;
+ }
+
+ /* Check that you can't write more bytes than available into the dynptr
+ * after you've adjusted it
+ */
+ if (bpf_dynptr_write(&ptr, 0, &write_data, sizeof(write_data), 0) != -E2BIG) {
+ err = 6;
+ goto done;
+ }
+
+ /* Check that even after adjusting, submitting/discarding
+ * a ringbuf dynptr works
+ */
+ bpf_ringbuf_submit_dynptr(&ptr, 0);
+ return 0;
+
+done:
+ bpf_ringbuf_discard_dynptr(&ptr, 0);
+ return 0;
+}
+
+SEC("tp/syscalls/sys_enter_nanosleep")
+int test_zero_size_dynptr(void *ctx)
+{
+ char write_data = 'x', read_data;
+ struct bpf_dynptr ptr;
+ __u32 size = 64;
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ if (bpf_ringbuf_reserve_dynptr(&ringbuf, size, 0, &ptr)) {
+ err = 1;
+ goto done;
+ }
+
+ /* After this, the dynptr has a size of 0 */
+ if (bpf_dynptr_adjust(&ptr, size, size)) {
+ err = 2;
+ goto done;
+ }
+
+ /* Test that reading + writing non-zero bytes is not ok */
+ if (bpf_dynptr_read(&read_data, sizeof(read_data), &ptr, 0, 0) != -E2BIG) {
+ err = 3;
+ goto done;
+ }
+
+ if (bpf_dynptr_write(&ptr, 0, &write_data, sizeof(write_data), 0) != -E2BIG) {
+ err = 4;
+ goto done;
+ }
+
+ /* Test that reading + writing 0 bytes from a 0-size dynptr is ok */
+ if (bpf_dynptr_read(&read_data, 0, &ptr, 0, 0)) {
+ err = 5;
+ goto done;
+ }
+
+ if (bpf_dynptr_write(&ptr, 0, &write_data, 0, 0)) {
+ err = 6;
+ goto done;
+ }
+
+ err = 0;
+
+done:
+ bpf_ringbuf_discard_dynptr(&ptr, 0);
+ return 0;
+}
+
+SEC("tp/syscalls/sys_enter_nanosleep")
+int test_dynptr_is_null(void *ctx)
+{
+ struct bpf_dynptr ptr1;
+ struct bpf_dynptr ptr2;
+ __u64 size = 4;
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ /* Pass in invalid flags, get back an invalid dynptr */
+ if (bpf_ringbuf_reserve_dynptr(&ringbuf, size, 123, &ptr1) != -EINVAL) {
+ err = 1;
+ goto exit_early;
+ }
+
+ /* Test that the invalid dynptr is null */
+ if (!bpf_dynptr_is_null(&ptr1)) {
+ err = 2;
+ goto exit_early;
+ }
+
+ /* Get a valid dynptr */
+ if (bpf_ringbuf_reserve_dynptr(&ringbuf, size, 0, &ptr2)) {
+ err = 3;
+ goto exit;
+ }
+
+ /* Test that the valid dynptr is not null */
+ if (bpf_dynptr_is_null(&ptr2)) {
+ err = 4;
+ goto exit;
+ }
+
+exit:
+ bpf_ringbuf_discard_dynptr(&ptr2, 0);
+exit_early:
+ bpf_ringbuf_discard_dynptr(&ptr1, 0);
+ return 0;
+}
+
+SEC("cgroup_skb/egress")
+int test_dynptr_is_rdonly(struct __sk_buff *skb)
+{
+ struct bpf_dynptr ptr1;
+ struct bpf_dynptr ptr2;
+ struct bpf_dynptr ptr3;
+
+ /* Pass in invalid flags, get back an invalid dynptr */
+ if (bpf_dynptr_from_skb(skb, 123, &ptr1) != -EINVAL) {
+ err = 1;
+ return 0;
+ }
+
+ /* Test that an invalid dynptr is_rdonly returns false */
+ if (bpf_dynptr_is_rdonly(&ptr1)) {
+ err = 2;
+ return 0;
+ }
+
+ /* Get a read-only dynptr */
+ if (bpf_dynptr_from_skb(skb, 0, &ptr2)) {
+ err = 3;
+ return 0;
+ }
+
+ /* Test that the dynptr is read-only */
+ if (!bpf_dynptr_is_rdonly(&ptr2)) {
+ err = 4;
+ return 0;
+ }
+
+ /* Get a read-writeable dynptr */
+ if (bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &ptr3)) {
+ err = 5;
+ goto done;
+ }
+
+ /* Test that the dynptr is read-only */
+ if (bpf_dynptr_is_rdonly(&ptr3)) {
+ err = 6;
+ goto done;
+ }
+
+done:
+ bpf_ringbuf_discard_dynptr(&ptr3, 0);
+ return 0;
+}
+
+SEC("cgroup_skb/egress")
+int test_dynptr_clone(struct __sk_buff *skb)
+{
+ struct bpf_dynptr ptr1;
+ struct bpf_dynptr ptr2;
+ __u32 off = 2, size;
+
+ /* Get a dynptr */
+ if (bpf_dynptr_from_skb(skb, 0, &ptr1)) {
+ err = 1;
+ return 0;
+ }
+
+ if (bpf_dynptr_adjust(&ptr1, off, bpf_dynptr_size(&ptr1))) {
+ err = 2;
+ return 0;
+ }
+
+ /* Clone the dynptr */
+ if (bpf_dynptr_clone(&ptr1, &ptr2)) {
+ err = 3;
+ return 0;
+ }
+
+ size = bpf_dynptr_size(&ptr1);
+
+ /* Check that the clone has the same size and rd-only */
+ if (bpf_dynptr_size(&ptr2) != size) {
+ err = 4;
+ return 0;
+ }
+
+ if (bpf_dynptr_is_rdonly(&ptr2) != bpf_dynptr_is_rdonly(&ptr1)) {
+ err = 5;
+ return 0;
+ }
+
+ /* Advance and trim the original dynptr */
+ bpf_dynptr_adjust(&ptr1, 5, 5);
+
+ /* Check that only original dynptr was affected, and the clone wasn't */
+ if (bpf_dynptr_size(&ptr2) != size) {
+ err = 6;
+ return 0;
+ }
+
+ return 0;
+}
+
+SEC("?cgroup_skb/egress")
+int test_dynptr_skb_no_buff(struct __sk_buff *skb)
+{
+ struct bpf_dynptr ptr;
+ __u64 *data;
+
+ if (bpf_dynptr_from_skb(skb, 0, &ptr)) {
+ err = 1;
+ return 1;
+ }
+
+ /* This may return NULL. SKB may require a buffer */
+ data = bpf_dynptr_slice(&ptr, 0, NULL, 1);
+
+ return !!data;
+}
+
+SEC("?cgroup_skb/egress")
+int test_dynptr_skb_strcmp(struct __sk_buff *skb)
+{
+ struct bpf_dynptr ptr;
+ char *data;
+
+ if (bpf_dynptr_from_skb(skb, 0, &ptr)) {
+ err = 1;
+ return 1;
+ }
+
+ /* This may return NULL. SKB may require a buffer */
+ data = bpf_dynptr_slice(&ptr, 0, NULL, 10);
+ if (data) {
+ bpf_strncmp(data, 10, "foo");
+ return 1;
+ }
+
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c
index be16143ae292..6b9b3c56f009 100644
--- a/tools/testing/selftests/bpf/progs/iters.c
+++ b/tools/testing/selftests/bpf/progs/iters.c
@@ -651,29 +651,25 @@ int iter_stack_array_loop(const void *ctx)
return sum;
}
-#define ARR_SZ 16
-
-static __noinline void fill(struct bpf_iter_num *it, int *arr, int mul)
+static __noinline void fill(struct bpf_iter_num *it, int *arr, __u32 n, int mul)
{
- int *t;
- __u64 i;
+ int *t, i;
while ((t = bpf_iter_num_next(it))) {
i = *t;
- if (i >= ARR_SZ)
+ if (i >= n)
break;
arr[i] = i * mul;
}
}
-static __noinline int sum(struct bpf_iter_num *it, int *arr)
+static __noinline int sum(struct bpf_iter_num *it, int *arr, __u32 n)
{
- int *t, sum = 0;;
- __u64 i;
+ int *t, i, sum = 0;;
while ((t = bpf_iter_num_next(it))) {
i = *t;
- if (i >= ARR_SZ)
+ if (i >= n)
break;
sum += arr[i];
}
@@ -685,7 +681,7 @@ SEC("raw_tp")
__success
int iter_pass_iter_ptr_to_subprog(const void *ctx)
{
- int arr1[ARR_SZ], arr2[ARR_SZ];
+ int arr1[16], arr2[32];
struct bpf_iter_num it;
int n, sum1, sum2;
@@ -694,25 +690,25 @@ int iter_pass_iter_ptr_to_subprog(const void *ctx)
/* fill arr1 */
n = ARRAY_SIZE(arr1);
bpf_iter_num_new(&it, 0, n);
- fill(&it, arr1, 2);
+ fill(&it, arr1, n, 2);
bpf_iter_num_destroy(&it);
/* fill arr2 */
n = ARRAY_SIZE(arr2);
bpf_iter_num_new(&it, 0, n);
- fill(&it, arr2, 10);
+ fill(&it, arr2, n, 10);
bpf_iter_num_destroy(&it);
/* sum arr1 */
n = ARRAY_SIZE(arr1);
bpf_iter_num_new(&it, 0, n);
- sum1 = sum(&it, arr1);
+ sum1 = sum(&it, arr1, n);
bpf_iter_num_destroy(&it);
/* sum arr2 */
n = ARRAY_SIZE(arr2);
bpf_iter_num_new(&it, 0, n);
- sum2 = sum(&it, arr2);
+ sum2 = sum(&it, arr2, n);
bpf_iter_num_destroy(&it);
bpf_printk("sum1=%d, sum2=%d", sum1, sum2);
diff --git a/tools/testing/selftests/bpf/progs/jit_probe_mem.c b/tools/testing/selftests/bpf/progs/jit_probe_mem.c
index 13f00ca2ed0a..f9789e668297 100644
--- a/tools/testing/selftests/bpf/progs/jit_probe_mem.c
+++ b/tools/testing/selftests/bpf/progs/jit_probe_mem.c
@@ -3,13 +3,11 @@
#include <vmlinux.h>
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_helpers.h>
+#include "../bpf_testmod/bpf_testmod_kfunc.h"
static struct prog_test_ref_kfunc __kptr *v;
long total_sum = -1;
-extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym;
-extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym;
-
SEC("tc")
int test_jit_probe_mem(struct __sk_buff *ctx)
{
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_destructive.c b/tools/testing/selftests/bpf/progs/kfunc_call_destructive.c
index 767472bc5a97..7632d9ecb253 100644
--- a/tools/testing/selftests/bpf/progs/kfunc_call_destructive.c
+++ b/tools/testing/selftests/bpf/progs/kfunc_call_destructive.c
@@ -1,8 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
-
-extern void bpf_kfunc_call_test_destructive(void) __ksym;
+#include "../bpf_testmod/bpf_testmod_kfunc.h"
SEC("tc")
int kfunc_destructive_test(void)
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_fail.c b/tools/testing/selftests/bpf/progs/kfunc_call_fail.c
index b98313d391c6..4b0b7b79cdfb 100644
--- a/tools/testing/selftests/bpf/progs/kfunc_call_fail.c
+++ b/tools/testing/selftests/bpf/progs/kfunc_call_fail.c
@@ -2,14 +2,7 @@
/* Copyright (c) 2021 Facebook */
#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
-
-extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym;
-extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym;
-extern void bpf_kfunc_call_test_mem_len_pass1(void *mem, int len) __ksym;
-extern int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p, const int rdwr_buf_size) __ksym;
-extern int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size) __ksym;
-extern int *bpf_kfunc_call_test_acq_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size) __ksym;
-extern void bpf_kfunc_call_int_mem_release(int *p) __ksym;
+#include "../bpf_testmod/bpf_testmod_kfunc.h"
struct syscall_test_args {
__u8 data[16];
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_race.c b/tools/testing/selftests/bpf/progs/kfunc_call_race.c
index 4e8fed75a4e0..d532af07decf 100644
--- a/tools/testing/selftests/bpf/progs/kfunc_call_race.c
+++ b/tools/testing/selftests/bpf/progs/kfunc_call_race.c
@@ -1,8 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
-
-extern void bpf_testmod_test_mod_kfunc(int i) __ksym;
+#include "../bpf_testmod/bpf_testmod_kfunc.h"
SEC("tc")
int kfunc_call_fail(struct __sk_buff *ctx)
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test.c b/tools/testing/selftests/bpf/progs/kfunc_call_test.c
index 7daa8f5720b9..cf68d1e48a0f 100644
--- a/tools/testing/selftests/bpf/progs/kfunc_call_test.c
+++ b/tools/testing/selftests/bpf/progs/kfunc_call_test.c
@@ -2,22 +2,7 @@
/* Copyright (c) 2021 Facebook */
#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
-
-extern long bpf_kfunc_call_test4(signed char a, short b, int c, long d) __ksym;
-extern int bpf_kfunc_call_test2(struct sock *sk, __u32 a, __u32 b) __ksym;
-extern __u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b,
- __u32 c, __u64 d) __ksym;
-
-extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym;
-extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym;
-extern void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb) __ksym;
-extern void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p) __ksym;
-extern void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p) __ksym;
-extern void bpf_kfunc_call_test_mem_len_pass1(void *mem, int len) __ksym;
-extern void bpf_kfunc_call_test_mem_len_fail2(__u64 *mem, int len) __ksym;
-extern int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p, const int rdwr_buf_size) __ksym;
-extern int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size) __ksym;
-extern u32 bpf_kfunc_call_test_static_unused_arg(u32 arg, u32 unused) __ksym;
+#include "../bpf_testmod/bpf_testmod_kfunc.h"
SEC("tc")
int kfunc_call_test4(struct __sk_buff *skb)
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c b/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c
index c1fdecabeabf..2380c75e74ce 100644
--- a/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c
+++ b/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c
@@ -1,13 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2021 Facebook */
-#include <linux/bpf.h>
-#include <bpf/bpf_helpers.h>
-#include "bpf_tcp_helpers.h"
+#include "../bpf_testmod/bpf_testmod_kfunc.h"
extern const int bpf_prog_active __ksym;
-extern __u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b,
- __u32 c, __u64 d) __ksym;
-extern struct sock *bpf_kfunc_call_test3(struct sock *sk) __ksym;
int active_res = -1;
int sk_state_res = -1;
@@ -28,7 +23,7 @@ int __noinline f1(struct __sk_buff *skb)
if (active)
active_res = *active;
- sk_state_res = bpf_kfunc_call_test3((struct sock *)sk)->sk_state;
+ sk_state_res = bpf_kfunc_call_test3((struct sock *)sk)->__sk_common.skc_state;
return (__u32)bpf_kfunc_call_test1((struct sock *)sk, 1, 2, 3, 4);
}
diff --git a/tools/testing/selftests/bpf/progs/local_kptr_stash.c b/tools/testing/selftests/bpf/progs/local_kptr_stash.c
index 0ef286da092b..06838083079c 100644
--- a/tools/testing/selftests/bpf/progs/local_kptr_stash.c
+++ b/tools/testing/selftests/bpf/progs/local_kptr_stash.c
@@ -5,7 +5,8 @@
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_core_read.h>
-#include "bpf_experimental.h"
+#include "../bpf_experimental.h"
+#include "../bpf_testmod/bpf_testmod_kfunc.h"
struct node_data {
long key;
@@ -32,8 +33,6 @@ struct map_value {
*/
struct node_data *just_here_because_btf_bug;
-extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym;
-
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__type(key, int);
diff --git a/tools/testing/selftests/bpf/progs/map_kptr.c b/tools/testing/selftests/bpf/progs/map_kptr.c
index d7150041e5d1..da30f0d59364 100644
--- a/tools/testing/selftests/bpf/progs/map_kptr.c
+++ b/tools/testing/selftests/bpf/progs/map_kptr.c
@@ -2,6 +2,7 @@
#include <vmlinux.h>
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_helpers.h>
+#include "../bpf_testmod/bpf_testmod_kfunc.h"
struct map_value {
struct prog_test_ref_kfunc __kptr_untrusted *unref_ptr;
@@ -114,10 +115,6 @@ DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, hash_map, hash_of_hash_maps);
DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, hash_malloc_map, hash_of_hash_malloc_maps);
DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, lru_hash_map, hash_of_lru_hash_maps);
-extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym;
-extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym;
-void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p) __ksym;
-
#define WRITE_ONCE(x, val) ((*(volatile typeof(x) *) &(x)) = (val))
static void test_kptr_unref(struct map_value *v)
diff --git a/tools/testing/selftests/bpf/progs/map_kptr_fail.c b/tools/testing/selftests/bpf/progs/map_kptr_fail.c
index da8c724f839b..450bb373b179 100644
--- a/tools/testing/selftests/bpf/progs/map_kptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/map_kptr_fail.c
@@ -4,6 +4,7 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_core_read.h>
#include "bpf_misc.h"
+#include "../bpf_testmod/bpf_testmod_kfunc.h"
struct map_value {
char buf[8];
@@ -19,9 +20,6 @@ struct array_map {
__uint(max_entries, 1);
} array_map SEC(".maps");
-extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym;
-extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym;
-
SEC("?tc")
__failure __msg("kptr access size must be BPF_DW")
int size_not_bpf_dw(struct __sk_buff *ctx)
diff --git a/tools/testing/selftests/bpf/progs/refcounted_kptr.c b/tools/testing/selftests/bpf/progs/refcounted_kptr.c
index 1d348a225140..a3da610b1e6b 100644
--- a/tools/testing/selftests/bpf/progs/refcounted_kptr.c
+++ b/tools/testing/selftests/bpf/progs/refcounted_kptr.c
@@ -375,6 +375,8 @@ long rbtree_refcounted_node_ref_escapes(void *ctx)
bpf_rbtree_add(&aroot, &n->node, less_a);
m = bpf_refcount_acquire(n);
bpf_spin_unlock(&alock);
+ if (!m)
+ return 2;
m->key = 2;
bpf_obj_drop(m);
diff --git a/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c b/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c
index efcb308f80ad..0b09e5c915b1 100644
--- a/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c
@@ -29,7 +29,7 @@ static bool less(struct bpf_rb_node *a, const struct bpf_rb_node *b)
}
SEC("?tc")
-__failure __msg("Unreleased reference id=3 alloc_insn=21")
+__failure __msg("Unreleased reference id=4 alloc_insn=21")
long rbtree_refcounted_node_ref_escapes(void *ctx)
{
struct node_acquire *n, *m;
@@ -43,6 +43,8 @@ long rbtree_refcounted_node_ref_escapes(void *ctx)
/* m becomes an owning ref but is never drop'd or added to a tree */
m = bpf_refcount_acquire(n);
bpf_spin_unlock(&glock);
+ if (!m)
+ return 2;
m->key = 2;
return 0;
diff --git a/tools/testing/selftests/bpf/progs/sock_destroy_prog.c b/tools/testing/selftests/bpf/progs/sock_destroy_prog.c
new file mode 100644
index 000000000000..9e0bf7a54cec
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/sock_destroy_prog.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#include "bpf_tracing_net.h"
+
+__be16 serv_port = 0;
+
+int bpf_sock_destroy(struct sock_common *sk) __ksym;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} tcp_conn_sockets SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} udp_conn_sockets SEC(".maps");
+
+SEC("cgroup/connect6")
+int sock_connect(struct bpf_sock_addr *ctx)
+{
+ __u64 sock_cookie = 0;
+ int key = 0;
+ __u32 keyc = 0;
+
+ if (ctx->family != AF_INET6 || ctx->user_family != AF_INET6)
+ return 1;
+
+ sock_cookie = bpf_get_socket_cookie(ctx);
+ if (ctx->protocol == IPPROTO_TCP)
+ bpf_map_update_elem(&tcp_conn_sockets, &key, &sock_cookie, 0);
+ else if (ctx->protocol == IPPROTO_UDP)
+ bpf_map_update_elem(&udp_conn_sockets, &keyc, &sock_cookie, 0);
+ else
+ return 1;
+
+ return 1;
+}
+
+SEC("iter/tcp")
+int iter_tcp6_client(struct bpf_iter__tcp *ctx)
+{
+ struct sock_common *sk_common = ctx->sk_common;
+ __u64 sock_cookie = 0;
+ __u64 *val;
+ int key = 0;
+
+ if (!sk_common)
+ return 0;
+
+ if (sk_common->skc_family != AF_INET6)
+ return 0;
+
+ sock_cookie = bpf_get_socket_cookie(sk_common);
+ val = bpf_map_lookup_elem(&tcp_conn_sockets, &key);
+ if (!val)
+ return 0;
+ /* Destroy connected client sockets. */
+ if (sock_cookie == *val)
+ bpf_sock_destroy(sk_common);
+
+ return 0;
+}
+
+SEC("iter/tcp")
+int iter_tcp6_server(struct bpf_iter__tcp *ctx)
+{
+ struct sock_common *sk_common = ctx->sk_common;
+ const struct inet_connection_sock *icsk;
+ const struct inet_sock *inet;
+ struct tcp6_sock *tcp_sk;
+ __be16 srcp;
+
+ if (!sk_common)
+ return 0;
+
+ if (sk_common->skc_family != AF_INET6)
+ return 0;
+
+ tcp_sk = bpf_skc_to_tcp6_sock(sk_common);
+ if (!tcp_sk)
+ return 0;
+
+ icsk = &tcp_sk->tcp.inet_conn;
+ inet = &icsk->icsk_inet;
+ srcp = inet->inet_sport;
+
+ /* Destroy server sockets. */
+ if (srcp == serv_port)
+ bpf_sock_destroy(sk_common);
+
+ return 0;
+}
+
+
+SEC("iter/udp")
+int iter_udp6_client(struct bpf_iter__udp *ctx)
+{
+ struct udp_sock *udp_sk = ctx->udp_sk;
+ struct sock *sk = (struct sock *) udp_sk;
+ __u64 sock_cookie = 0, *val;
+ int key = 0;
+
+ if (!sk)
+ return 0;
+
+ sock_cookie = bpf_get_socket_cookie(sk);
+ val = bpf_map_lookup_elem(&udp_conn_sockets, &key);
+ if (!val)
+ return 0;
+ /* Destroy connected client sockets. */
+ if (sock_cookie == *val)
+ bpf_sock_destroy((struct sock_common *)sk);
+
+ return 0;
+}
+
+SEC("iter/udp")
+int iter_udp6_server(struct bpf_iter__udp *ctx)
+{
+ struct udp_sock *udp_sk = ctx->udp_sk;
+ struct sock *sk = (struct sock *) udp_sk;
+ struct inet_sock *inet;
+ __be16 srcp;
+
+ if (!sk)
+ return 0;
+
+ inet = &udp_sk->inet;
+ srcp = inet->inet_sport;
+ if (srcp == serv_port)
+ bpf_sock_destroy((struct sock_common *)sk);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/sock_destroy_prog_fail.c b/tools/testing/selftests/bpf/progs/sock_destroy_prog_fail.c
new file mode 100644
index 000000000000..dd6850b58e25
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/sock_destroy_prog_fail.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+int bpf_sock_destroy(struct sock_common *sk) __ksym;
+
+SEC("tp_btf/tcp_destroy_sock")
+__failure __msg("calling kernel function bpf_sock_destroy is not allowed")
+int BPF_PROG(trace_tcp_destroy_sock, struct sock *sk)
+{
+ /* should not load */
+ bpf_sock_destroy((struct sock_common *)sk);
+
+ return 0;
+}
+
diff --git a/tools/testing/selftests/bpf/progs/sockopt_inherit.c b/tools/testing/selftests/bpf/progs/sockopt_inherit.c
index 9fb241b97291..c8f59caa4639 100644
--- a/tools/testing/selftests/bpf/progs/sockopt_inherit.c
+++ b/tools/testing/selftests/bpf/progs/sockopt_inherit.c
@@ -9,6 +9,8 @@ char _license[] SEC("license") = "GPL";
#define CUSTOM_INHERIT2 1
#define CUSTOM_LISTENER 2
+__u32 page_size = 0;
+
struct sockopt_inherit {
__u8 val;
};
@@ -55,7 +57,7 @@ int _getsockopt(struct bpf_sockopt *ctx)
__u8 *optval = ctx->optval;
if (ctx->level != SOL_CUSTOM)
- return 1; /* only interested in SOL_CUSTOM */
+ goto out; /* only interested in SOL_CUSTOM */
if (optval + 1 > optval_end)
return 0; /* EPERM, bounds check */
@@ -70,6 +72,12 @@ int _getsockopt(struct bpf_sockopt *ctx)
ctx->optlen = 1;
return 1;
+
+out:
+ /* optval larger than PAGE_SIZE use kernel's buffer. */
+ if (ctx->optlen > page_size)
+ ctx->optlen = 0;
+ return 1;
}
SEC("cgroup/setsockopt")
@@ -80,7 +88,7 @@ int _setsockopt(struct bpf_sockopt *ctx)
__u8 *optval = ctx->optval;
if (ctx->level != SOL_CUSTOM)
- return 1; /* only interested in SOL_CUSTOM */
+ goto out; /* only interested in SOL_CUSTOM */
if (optval + 1 > optval_end)
return 0; /* EPERM, bounds check */
@@ -93,4 +101,10 @@ int _setsockopt(struct bpf_sockopt *ctx)
ctx->optlen = -1;
return 1;
+
+out:
+ /* optval larger than PAGE_SIZE use kernel's buffer. */
+ if (ctx->optlen > page_size)
+ ctx->optlen = 0;
+ return 1;
}
diff --git a/tools/testing/selftests/bpf/progs/sockopt_multi.c b/tools/testing/selftests/bpf/progs/sockopt_multi.c
index 177a59069dae..96f29fce050b 100644
--- a/tools/testing/selftests/bpf/progs/sockopt_multi.c
+++ b/tools/testing/selftests/bpf/progs/sockopt_multi.c
@@ -5,6 +5,8 @@
char _license[] SEC("license") = "GPL";
+__u32 page_size = 0;
+
SEC("cgroup/getsockopt")
int _getsockopt_child(struct bpf_sockopt *ctx)
{
@@ -12,7 +14,7 @@ int _getsockopt_child(struct bpf_sockopt *ctx)
__u8 *optval = ctx->optval;
if (ctx->level != SOL_IP || ctx->optname != IP_TOS)
- return 1;
+ goto out;
if (optval + 1 > optval_end)
return 0; /* EPERM, bounds check */
@@ -26,6 +28,12 @@ int _getsockopt_child(struct bpf_sockopt *ctx)
ctx->optlen = 1;
return 1;
+
+out:
+ /* optval larger than PAGE_SIZE use kernel's buffer. */
+ if (ctx->optlen > page_size)
+ ctx->optlen = 0;
+ return 1;
}
SEC("cgroup/getsockopt")
@@ -35,7 +43,7 @@ int _getsockopt_parent(struct bpf_sockopt *ctx)
__u8 *optval = ctx->optval;
if (ctx->level != SOL_IP || ctx->optname != IP_TOS)
- return 1;
+ goto out;
if (optval + 1 > optval_end)
return 0; /* EPERM, bounds check */
@@ -49,6 +57,12 @@ int _getsockopt_parent(struct bpf_sockopt *ctx)
ctx->optlen = 1;
return 1;
+
+out:
+ /* optval larger than PAGE_SIZE use kernel's buffer. */
+ if (ctx->optlen > page_size)
+ ctx->optlen = 0;
+ return 1;
}
SEC("cgroup/setsockopt")
@@ -58,7 +72,7 @@ int _setsockopt(struct bpf_sockopt *ctx)
__u8 *optval = ctx->optval;
if (ctx->level != SOL_IP || ctx->optname != IP_TOS)
- return 1;
+ goto out;
if (optval + 1 > optval_end)
return 0; /* EPERM, bounds check */
@@ -67,4 +81,10 @@ int _setsockopt(struct bpf_sockopt *ctx)
ctx->optlen = 1;
return 1;
+
+out:
+ /* optval larger than PAGE_SIZE use kernel's buffer. */
+ if (ctx->optlen > page_size)
+ ctx->optlen = 0;
+ return 1;
}
diff --git a/tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c b/tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c
index 1bce83b6e3a7..dbe235ede7f3 100644
--- a/tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c
+++ b/tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c
@@ -9,6 +9,8 @@
char _license[] SEC("license") = "GPL";
+__u32 page_size = 0;
+
SEC("cgroup/setsockopt")
int sockopt_qos_to_cc(struct bpf_sockopt *ctx)
{
@@ -19,7 +21,7 @@ int sockopt_qos_to_cc(struct bpf_sockopt *ctx)
char cc_cubic[TCP_CA_NAME_MAX] = "cubic";
if (ctx->level != SOL_IPV6 || ctx->optname != IPV6_TCLASS)
- return 1;
+ goto out;
if (optval + 1 > optval_end)
return 0; /* EPERM, bounds check */
@@ -36,4 +38,10 @@ int sockopt_qos_to_cc(struct bpf_sockopt *ctx)
return 0;
}
return 1;
+
+out:
+ /* optval larger than PAGE_SIZE use kernel's buffer. */
+ if (ctx->optlen > page_size)
+ ctx->optlen = 0;
+ return 1;
}
diff --git a/tools/testing/selftests/bpf/progs/sockopt_sk.c b/tools/testing/selftests/bpf/progs/sockopt_sk.c
index fe1df4cd206e..cb990a7d3d45 100644
--- a/tools/testing/selftests/bpf/progs/sockopt_sk.c
+++ b/tools/testing/selftests/bpf/progs/sockopt_sk.c
@@ -37,7 +37,7 @@ int _getsockopt(struct bpf_sockopt *ctx)
/* Bypass AF_NETLINK. */
sk = ctx->sk;
if (sk && sk->family == AF_NETLINK)
- return 1;
+ goto out;
/* Make sure bpf_get_netns_cookie is callable.
*/
@@ -52,8 +52,7 @@ int _getsockopt(struct bpf_sockopt *ctx)
* let next BPF program in the cgroup chain or kernel
* handle it.
*/
- ctx->optlen = 0; /* bypass optval>PAGE_SIZE */
- return 1;
+ goto out;
}
if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) {
@@ -61,7 +60,7 @@ int _getsockopt(struct bpf_sockopt *ctx)
* let next BPF program in the cgroup chain or kernel
* handle it.
*/
- return 1;
+ goto out;
}
if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) {
@@ -69,7 +68,7 @@ int _getsockopt(struct bpf_sockopt *ctx)
* let next BPF program in the cgroup chain or kernel
* handle it.
*/
- return 1;
+ goto out;
}
if (ctx->level == SOL_TCP && ctx->optname == TCP_ZEROCOPY_RECEIVE) {
@@ -85,7 +84,7 @@ int _getsockopt(struct bpf_sockopt *ctx)
if (((struct tcp_zerocopy_receive *)optval)->address != 0)
return 0; /* unexpected data */
- return 1;
+ goto out;
}
if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
@@ -129,6 +128,12 @@ int _getsockopt(struct bpf_sockopt *ctx)
ctx->optlen = 1;
return 1;
+
+out:
+ /* optval larger than PAGE_SIZE use kernel's buffer. */
+ if (ctx->optlen > page_size)
+ ctx->optlen = 0;
+ return 1;
}
SEC("cgroup/setsockopt")
@@ -142,7 +147,7 @@ int _setsockopt(struct bpf_sockopt *ctx)
/* Bypass AF_NETLINK. */
sk = ctx->sk;
if (sk && sk->family == AF_NETLINK)
- return 1;
+ goto out;
/* Make sure bpf_get_netns_cookie is callable.
*/
@@ -224,4 +229,10 @@ int _setsockopt(struct bpf_sockopt *ctx)
*/
return 1;
+
+out:
+ /* optval larger than PAGE_SIZE use kernel's buffer. */
+ if (ctx->optlen > page_size)
+ ctx->optlen = 0;
+ return 1;
}
diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect.h b/tools/testing/selftests/bpf/progs/test_cls_redirect.h
index 76eab0aacba0..233b089d1fba 100644
--- a/tools/testing/selftests/bpf/progs/test_cls_redirect.h
+++ b/tools/testing/selftests/bpf/progs/test_cls_redirect.h
@@ -12,6 +12,15 @@
#include <linux/ipv6.h>
#include <linux/udp.h>
+/* offsetof() is used in static asserts, and the libbpf-redefined CO-RE
+ * friendly version breaks compilation for older clang versions <= 15
+ * when invoked in a static assert. Restore original here.
+ */
+#ifdef offsetof
+#undef offsetof
+#define offsetof(type, member) __builtin_offsetof(type, member)
+#endif
+
struct gre_base_hdr {
uint16_t flags;
uint16_t protocol;
diff --git a/tools/testing/selftests/bpf/progs/test_global_func1.c b/tools/testing/selftests/bpf/progs/test_global_func1.c
index b85fc8c423ba..17a9f59bf5f3 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func1.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func1.c
@@ -10,6 +10,8 @@
static __attribute__ ((noinline))
int f0(int var, struct __sk_buff *skb)
{
+ asm volatile ("");
+
return skb->len;
}
diff --git a/tools/testing/selftests/bpf/progs/test_global_map_resize.c b/tools/testing/selftests/bpf/progs/test_global_map_resize.c
new file mode 100644
index 000000000000..2588f2384246
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_global_map_resize.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+/* rodata section */
+const volatile pid_t pid;
+const volatile size_t bss_array_len;
+const volatile size_t data_array_len;
+
+/* bss section */
+int sum = 0;
+int array[1];
+
+/* custom data secton */
+int my_array[1] SEC(".data.custom");
+
+/* custom data section which should NOT be resizable,
+ * since it contains a single var which is not an array
+ */
+int my_int SEC(".data.non_array");
+
+/* custom data section which should NOT be resizable,
+ * since its last var is not an array
+ */
+int my_array_first[1] SEC(".data.array_not_last");
+int my_int_last SEC(".data.array_not_last");
+
+SEC("tp/syscalls/sys_enter_getpid")
+int bss_array_sum(void *ctx)
+{
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ sum = 0;
+
+ for (size_t i = 0; i < bss_array_len; ++i)
+ sum += array[i];
+
+ return 0;
+}
+
+SEC("tp/syscalls/sys_enter_getuid")
+int data_array_sum(void *ctx)
+{
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ sum = 0;
+
+ for (size_t i = 0; i < data_array_len; ++i)
+ sum += my_array[i];
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields.c b/tools/testing/selftests/bpf/progs/test_sock_fields.c
index bbad3c2d9aa5..f75e531bf36f 100644
--- a/tools/testing/selftests/bpf/progs/test_sock_fields.c
+++ b/tools/testing/selftests/bpf/progs/test_sock_fields.c
@@ -265,7 +265,10 @@ static __noinline bool sk_dst_port__load_word(struct bpf_sock *sk)
static __noinline bool sk_dst_port__load_half(struct bpf_sock *sk)
{
- __u16 *half = (__u16 *)&sk->dst_port;
+ __u16 *half;
+
+ asm volatile ("");
+ half = (__u16 *)&sk->dst_port;
return half[0] == bpf_htons(0xcafe);
}
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
index 325c9f193432..464d35bd57c7 100644
--- a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
@@ -28,12 +28,26 @@ struct {
__type(value, unsigned int);
} verdict_map SEC(".maps");
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+} parser_map SEC(".maps");
+
bool test_sockmap = false; /* toggled by user-space */
bool test_ingress = false; /* toggled by user-space */
SEC("sk_skb/stream_parser")
int prog_stream_parser(struct __sk_buff *skb)
{
+ int *value;
+ __u32 key = 0;
+
+ value = bpf_map_lookup_elem(&parser_map, &key);
+ if (value && *value)
+ return *value;
+
return skb->len;
}
diff --git a/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c b/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c
new file mode 100644
index 000000000000..56cdc0a553f0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Bytedance */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+#include "bpf_misc.h"
+
+struct cgroup *bpf_cgroup_from_id(u64 cgid) __ksym;
+long bpf_task_under_cgroup(struct task_struct *task, struct cgroup *ancestor) __ksym;
+void bpf_cgroup_release(struct cgroup *p) __ksym;
+struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym;
+void bpf_task_release(struct task_struct *p) __ksym;
+
+const volatile int local_pid;
+const volatile __u64 cgid;
+int remote_pid;
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(handle__task_newtask, struct task_struct *task, u64 clone_flags)
+{
+ struct cgroup *cgrp = NULL;
+ struct task_struct *acquired;
+
+ if (local_pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ acquired = bpf_task_acquire(task);
+ if (!acquired)
+ return 0;
+
+ if (local_pid == acquired->tgid)
+ goto out;
+
+ cgrp = bpf_cgroup_from_id(cgid);
+ if (!cgrp)
+ goto out;
+
+ if (bpf_task_under_cgroup(acquired, cgrp))
+ remote_pid = acquired->tgid;
+
+out:
+ if (cgrp)
+ bpf_cgroup_release(cgrp);
+ bpf_task_release(acquired);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c b/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c
index 25ee4a22e48d..78c368e71797 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c
@@ -2,6 +2,7 @@
/* Copyright (c) 2022 Meta */
#include <stddef.h>
#include <string.h>
+#include <stdbool.h>
#include <linux/bpf.h>
#include <linux/if_ether.h>
#include <linux/if_packet.h>
diff --git a/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c b/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c
new file mode 100644
index 000000000000..13b29a7faa71
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c
@@ -0,0 +1,659 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+/* Check that precision marks propagate through scalar IDs.
+ * Registers r{0,1,2} have the same scalar ID at the moment when r0 is
+ * marked to be precise, this mark is immediately propagated to r{1,2}.
+ */
+SEC("socket")
+__success __log_level(2)
+__msg("frame0: regs=r0,r1,r2 stack= before 4: (bf) r3 = r10")
+__msg("frame0: regs=r0,r1,r2 stack= before 3: (bf) r2 = r0")
+__msg("frame0: regs=r0,r1 stack= before 2: (bf) r1 = r0")
+__msg("frame0: regs=r0 stack= before 1: (57) r0 &= 255")
+__msg("frame0: regs=r0 stack= before 0: (85) call bpf_ktime_get_ns")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void precision_same_state(void)
+{
+ asm volatile (
+ /* r0 = random number up to 0xff */
+ "call %[bpf_ktime_get_ns];"
+ "r0 &= 0xff;"
+ /* tie r0.id == r1.id == r2.id */
+ "r1 = r0;"
+ "r2 = r0;"
+ /* force r0 to be precise, this immediately marks r1 and r2 as
+ * precise as well because of shared IDs
+ */
+ "r3 = r10;"
+ "r3 += r0;"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+/* Same as precision_same_state, but mark propagates through state /
+ * parent state boundary.
+ */
+SEC("socket")
+__success __log_level(2)
+__msg("frame0: last_idx 6 first_idx 5 subseq_idx -1")
+__msg("frame0: regs=r0,r1,r2 stack= before 5: (bf) r3 = r10")
+__msg("frame0: parent state regs=r0,r1,r2 stack=:")
+__msg("frame0: regs=r0,r1,r2 stack= before 4: (05) goto pc+0")
+__msg("frame0: regs=r0,r1,r2 stack= before 3: (bf) r2 = r0")
+__msg("frame0: regs=r0,r1 stack= before 2: (bf) r1 = r0")
+__msg("frame0: regs=r0 stack= before 1: (57) r0 &= 255")
+__msg("frame0: parent state regs=r0 stack=:")
+__msg("frame0: regs=r0 stack= before 0: (85) call bpf_ktime_get_ns")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void precision_cross_state(void)
+{
+ asm volatile (
+ /* r0 = random number up to 0xff */
+ "call %[bpf_ktime_get_ns];"
+ "r0 &= 0xff;"
+ /* tie r0.id == r1.id == r2.id */
+ "r1 = r0;"
+ "r2 = r0;"
+ /* force checkpoint */
+ "goto +0;"
+ /* force r0 to be precise, this immediately marks r1 and r2 as
+ * precise as well because of shared IDs
+ */
+ "r3 = r10;"
+ "r3 += r0;"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+/* Same as precision_same_state, but break one of the
+ * links, note that r1 is absent from regs=... in __msg below.
+ */
+SEC("socket")
+__success __log_level(2)
+__msg("frame0: regs=r0,r2 stack= before 5: (bf) r3 = r10")
+__msg("frame0: regs=r0,r2 stack= before 4: (b7) r1 = 0")
+__msg("frame0: regs=r0,r2 stack= before 3: (bf) r2 = r0")
+__msg("frame0: regs=r0 stack= before 2: (bf) r1 = r0")
+__msg("frame0: regs=r0 stack= before 1: (57) r0 &= 255")
+__msg("frame0: regs=r0 stack= before 0: (85) call bpf_ktime_get_ns")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void precision_same_state_broken_link(void)
+{
+ asm volatile (
+ /* r0 = random number up to 0xff */
+ "call %[bpf_ktime_get_ns];"
+ "r0 &= 0xff;"
+ /* tie r0.id == r1.id == r2.id */
+ "r1 = r0;"
+ "r2 = r0;"
+ /* break link for r1, this is the only line that differs
+ * compared to the previous test
+ */
+ "r1 = 0;"
+ /* force r0 to be precise, this immediately marks r1 and r2 as
+ * precise as well because of shared IDs
+ */
+ "r3 = r10;"
+ "r3 += r0;"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+/* Same as precision_same_state_broken_link, but with state /
+ * parent state boundary.
+ */
+SEC("socket")
+__success __log_level(2)
+__msg("frame0: regs=r0,r2 stack= before 6: (bf) r3 = r10")
+__msg("frame0: regs=r0,r2 stack= before 5: (b7) r1 = 0")
+__msg("frame0: parent state regs=r0,r2 stack=:")
+__msg("frame0: regs=r0,r1,r2 stack= before 4: (05) goto pc+0")
+__msg("frame0: regs=r0,r1,r2 stack= before 3: (bf) r2 = r0")
+__msg("frame0: regs=r0,r1 stack= before 2: (bf) r1 = r0")
+__msg("frame0: regs=r0 stack= before 1: (57) r0 &= 255")
+__msg("frame0: parent state regs=r0 stack=:")
+__msg("frame0: regs=r0 stack= before 0: (85) call bpf_ktime_get_ns")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void precision_cross_state_broken_link(void)
+{
+ asm volatile (
+ /* r0 = random number up to 0xff */
+ "call %[bpf_ktime_get_ns];"
+ "r0 &= 0xff;"
+ /* tie r0.id == r1.id == r2.id */
+ "r1 = r0;"
+ "r2 = r0;"
+ /* force checkpoint, although link between r1 and r{0,2} is
+ * broken by the next statement current precision tracking
+ * algorithm can't react to it and propagates mark for r1 to
+ * the parent state.
+ */
+ "goto +0;"
+ /* break link for r1, this is the only line that differs
+ * compared to precision_cross_state()
+ */
+ "r1 = 0;"
+ /* force r0 to be precise, this immediately marks r1 and r2 as
+ * precise as well because of shared IDs
+ */
+ "r3 = r10;"
+ "r3 += r0;"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+/* Check that precision marks propagate through scalar IDs.
+ * Use the same scalar ID in multiple stack frames, check that
+ * precision information is propagated up the call stack.
+ */
+SEC("socket")
+__success __log_level(2)
+__msg("11: (0f) r2 += r1")
+/* Current state */
+__msg("frame2: last_idx 11 first_idx 10 subseq_idx -1")
+__msg("frame2: regs=r1 stack= before 10: (bf) r2 = r10")
+__msg("frame2: parent state regs=r1 stack=")
+/* frame1.r{6,7} are marked because mark_precise_scalar_ids()
+ * looks for all registers with frame2.r1.id in the current state
+ */
+__msg("frame1: parent state regs=r6,r7 stack=")
+__msg("frame0: parent state regs=r6 stack=")
+/* Parent state */
+__msg("frame2: last_idx 8 first_idx 8 subseq_idx 10")
+__msg("frame2: regs=r1 stack= before 8: (85) call pc+1")
+/* frame1.r1 is marked because of backtracking of call instruction */
+__msg("frame1: parent state regs=r1,r6,r7 stack=")
+__msg("frame0: parent state regs=r6 stack=")
+/* Parent state */
+__msg("frame1: last_idx 7 first_idx 6 subseq_idx 8")
+__msg("frame1: regs=r1,r6,r7 stack= before 7: (bf) r7 = r1")
+__msg("frame1: regs=r1,r6 stack= before 6: (bf) r6 = r1")
+__msg("frame1: parent state regs=r1 stack=")
+__msg("frame0: parent state regs=r6 stack=")
+/* Parent state */
+__msg("frame1: last_idx 4 first_idx 4 subseq_idx 6")
+__msg("frame1: regs=r1 stack= before 4: (85) call pc+1")
+__msg("frame0: parent state regs=r1,r6 stack=")
+/* Parent state */
+__msg("frame0: last_idx 3 first_idx 1 subseq_idx 4")
+__msg("frame0: regs=r0,r1,r6 stack= before 3: (bf) r6 = r0")
+__msg("frame0: regs=r0,r1 stack= before 2: (bf) r1 = r0")
+__msg("frame0: regs=r0 stack= before 1: (57) r0 &= 255")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void precision_many_frames(void)
+{
+ asm volatile (
+ /* r0 = random number up to 0xff */
+ "call %[bpf_ktime_get_ns];"
+ "r0 &= 0xff;"
+ /* tie r0.id == r1.id == r6.id */
+ "r1 = r0;"
+ "r6 = r0;"
+ "call precision_many_frames__foo;"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+static __naked __noinline __used
+void precision_many_frames__foo(void)
+{
+ asm volatile (
+ /* conflate one of the register numbers (r6) with outer frame,
+ * to verify that those are tracked independently
+ */
+ "r6 = r1;"
+ "r7 = r1;"
+ "call precision_many_frames__bar;"
+ "exit"
+ ::: __clobber_all);
+}
+
+static __naked __noinline __used
+void precision_many_frames__bar(void)
+{
+ asm volatile (
+ /* force r1 to be precise, this immediately marks:
+ * - bar frame r1
+ * - foo frame r{1,6,7}
+ * - main frame r{1,6}
+ */
+ "r2 = r10;"
+ "r2 += r1;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+/* Check that scalars with the same IDs are marked precise on stack as
+ * well as in registers.
+ */
+SEC("socket")
+__success __log_level(2)
+/* foo frame */
+__msg("frame1: regs=r1 stack=-8,-16 before 9: (bf) r2 = r10")
+__msg("frame1: regs=r1 stack=-8,-16 before 8: (7b) *(u64 *)(r10 -16) = r1")
+__msg("frame1: regs=r1 stack=-8 before 7: (7b) *(u64 *)(r10 -8) = r1")
+__msg("frame1: regs=r1 stack= before 4: (85) call pc+2")
+/* main frame */
+__msg("frame0: regs=r0,r1 stack=-8 before 3: (7b) *(u64 *)(r10 -8) = r1")
+__msg("frame0: regs=r0,r1 stack= before 2: (bf) r1 = r0")
+__msg("frame0: regs=r0 stack= before 1: (57) r0 &= 255")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void precision_stack(void)
+{
+ asm volatile (
+ /* r0 = random number up to 0xff */
+ "call %[bpf_ktime_get_ns];"
+ "r0 &= 0xff;"
+ /* tie r0.id == r1.id == fp[-8].id */
+ "r1 = r0;"
+ "*(u64*)(r10 - 8) = r1;"
+ "call precision_stack__foo;"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+static __naked __noinline __used
+void precision_stack__foo(void)
+{
+ asm volatile (
+ /* conflate one of the register numbers (r6) with outer frame,
+ * to verify that those are tracked independently
+ */
+ "*(u64*)(r10 - 8) = r1;"
+ "*(u64*)(r10 - 16) = r1;"
+ /* force r1 to be precise, this immediately marks:
+ * - foo frame r1,fp{-8,-16}
+ * - main frame r1,fp{-8}
+ */
+ "r2 = r10;"
+ "r2 += r1;"
+ "exit"
+ ::: __clobber_all);
+}
+
+/* Use two separate scalar IDs to check that these are propagated
+ * independently.
+ */
+SEC("socket")
+__success __log_level(2)
+/* r{6,7} */
+__msg("11: (0f) r3 += r7")
+__msg("frame0: regs=r6,r7 stack= before 10: (bf) r3 = r10")
+/* ... skip some insns ... */
+__msg("frame0: regs=r6,r7 stack= before 3: (bf) r7 = r0")
+__msg("frame0: regs=r0,r6 stack= before 2: (bf) r6 = r0")
+/* r{8,9} */
+__msg("12: (0f) r3 += r9")
+__msg("frame0: regs=r8,r9 stack= before 11: (0f) r3 += r7")
+/* ... skip some insns ... */
+__msg("frame0: regs=r8,r9 stack= before 7: (bf) r9 = r0")
+__msg("frame0: regs=r0,r8 stack= before 6: (bf) r8 = r0")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void precision_two_ids(void)
+{
+ asm volatile (
+ /* r6 = random number up to 0xff
+ * r6.id == r7.id
+ */
+ "call %[bpf_ktime_get_ns];"
+ "r0 &= 0xff;"
+ "r6 = r0;"
+ "r7 = r0;"
+ /* same, but for r{8,9} */
+ "call %[bpf_ktime_get_ns];"
+ "r0 &= 0xff;"
+ "r8 = r0;"
+ "r9 = r0;"
+ /* clear r0 id */
+ "r0 = 0;"
+ /* force checkpoint */
+ "goto +0;"
+ "r3 = r10;"
+ /* force r7 to be precise, this also marks r6 */
+ "r3 += r7;"
+ /* force r9 to be precise, this also marks r8 */
+ "r3 += r9;"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+/* Verify that check_ids() is used by regsafe() for scalars.
+ *
+ * r9 = ... some pointer with range X ...
+ * r6 = ... unbound scalar ID=a ...
+ * r7 = ... unbound scalar ID=b ...
+ * if (r6 > r7) goto +1
+ * r7 = r6
+ * if (r7 > X) goto exit
+ * r9 += r6
+ * ... access memory using r9 ...
+ *
+ * The memory access is safe only if r7 is bounded,
+ * which is true for one branch and not true for another.
+ */
+SEC("socket")
+__failure __msg("register with unbounded min value")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void check_ids_in_regsafe(void)
+{
+ asm volatile (
+ /* Bump allocated stack */
+ "r1 = 0;"
+ "*(u64*)(r10 - 8) = r1;"
+ /* r9 = pointer to stack */
+ "r9 = r10;"
+ "r9 += -8;"
+ /* r7 = ktime_get_ns() */
+ "call %[bpf_ktime_get_ns];"
+ "r7 = r0;"
+ /* r6 = ktime_get_ns() */
+ "call %[bpf_ktime_get_ns];"
+ "r6 = r0;"
+ /* if r6 > r7 is an unpredictable jump */
+ "if r6 > r7 goto l1_%=;"
+ "r7 = r6;"
+"l1_%=:"
+ /* if r7 > 4 ...; transfers range to r6 on one execution path
+ * but does not transfer on another
+ */
+ "if r7 > 4 goto l2_%=;"
+ /* Access memory at r9[r6], r6 is not always bounded */
+ "r9 += r6;"
+ "r0 = *(u8*)(r9 + 0);"
+"l2_%=:"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+/* Similar to check_ids_in_regsafe.
+ * The l0 could be reached in two states:
+ *
+ * (1) r6{.id=A}, r7{.id=A}, r8{.id=B}
+ * (2) r6{.id=B}, r7{.id=A}, r8{.id=B}
+ *
+ * Where (2) is not safe, as "r7 > 4" check won't propagate range for it.
+ * This example would be considered safe without changes to
+ * mark_chain_precision() to track scalar values with equal IDs.
+ */
+SEC("socket")
+__failure __msg("register with unbounded min value")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void check_ids_in_regsafe_2(void)
+{
+ asm volatile (
+ /* Bump allocated stack */
+ "r1 = 0;"
+ "*(u64*)(r10 - 8) = r1;"
+ /* r9 = pointer to stack */
+ "r9 = r10;"
+ "r9 += -8;"
+ /* r8 = ktime_get_ns() */
+ "call %[bpf_ktime_get_ns];"
+ "r8 = r0;"
+ /* r7 = ktime_get_ns() */
+ "call %[bpf_ktime_get_ns];"
+ "r7 = r0;"
+ /* r6 = ktime_get_ns() */
+ "call %[bpf_ktime_get_ns];"
+ "r6 = r0;"
+ /* scratch .id from r0 */
+ "r0 = 0;"
+ /* if r6 > r7 is an unpredictable jump */
+ "if r6 > r7 goto l1_%=;"
+ /* tie r6 and r7 .id */
+ "r6 = r7;"
+"l0_%=:"
+ /* if r7 > 4 exit(0) */
+ "if r7 > 4 goto l2_%=;"
+ /* Access memory at r9[r6] */
+ "r9 += r6;"
+ "r0 = *(u8*)(r9 + 0);"
+"l2_%=:"
+ "r0 = 0;"
+ "exit;"
+"l1_%=:"
+ /* tie r6 and r8 .id */
+ "r6 = r8;"
+ "goto l0_%=;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+/* Check that scalar IDs *are not* generated on register to register
+ * assignments if source register is a constant.
+ *
+ * If such IDs *are* generated the 'l1' below would be reached in
+ * two states:
+ *
+ * (1) r1{.id=A}, r2{.id=A}
+ * (2) r1{.id=C}, r2{.id=C}
+ *
+ * Thus forcing 'if r1 == r2' verification twice.
+ */
+SEC("socket")
+__success __log_level(2)
+__msg("11: (1d) if r3 == r4 goto pc+0")
+__msg("frame 0: propagating r3,r4")
+__msg("11: safe")
+__msg("processed 15 insns")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void no_scalar_id_for_const(void)
+{
+ asm volatile (
+ "call %[bpf_ktime_get_ns];"
+ /* unpredictable jump */
+ "if r0 > 7 goto l0_%=;"
+ /* possibly generate same scalar ids for r3 and r4 */
+ "r1 = 0;"
+ "r1 = r1;"
+ "r3 = r1;"
+ "r4 = r1;"
+ "goto l1_%=;"
+"l0_%=:"
+ /* possibly generate different scalar ids for r3 and r4 */
+ "r1 = 0;"
+ "r2 = 0;"
+ "r3 = r1;"
+ "r4 = r2;"
+"l1_%=:"
+ /* predictable jump, marks r3 and r4 precise */
+ "if r3 == r4 goto +0;"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+/* Same as no_scalar_id_for_const() but for 32-bit values */
+SEC("socket")
+__success __log_level(2)
+__msg("11: (1e) if w3 == w4 goto pc+0")
+__msg("frame 0: propagating r3,r4")
+__msg("11: safe")
+__msg("processed 15 insns")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void no_scalar_id_for_const32(void)
+{
+ asm volatile (
+ "call %[bpf_ktime_get_ns];"
+ /* unpredictable jump */
+ "if r0 > 7 goto l0_%=;"
+ /* possibly generate same scalar ids for r3 and r4 */
+ "w1 = 0;"
+ "w1 = w1;"
+ "w3 = w1;"
+ "w4 = w1;"
+ "goto l1_%=;"
+"l0_%=:"
+ /* possibly generate different scalar ids for r3 and r4 */
+ "w1 = 0;"
+ "w2 = 0;"
+ "w3 = w1;"
+ "w4 = w2;"
+"l1_%=:"
+ /* predictable jump, marks r1 and r2 precise */
+ "if w3 == w4 goto +0;"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+/* Check that unique scalar IDs are ignored when new verifier state is
+ * compared to cached verifier state. For this test:
+ * - cached state has no id on r1
+ * - new state has a unique id on r1
+ */
+SEC("socket")
+__success __log_level(2)
+__msg("6: (25) if r6 > 0x7 goto pc+1")
+__msg("7: (57) r1 &= 255")
+__msg("8: (bf) r2 = r10")
+__msg("from 6 to 8: safe")
+__msg("processed 12 insns")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void ignore_unique_scalar_ids_cur(void)
+{
+ asm volatile (
+ "call %[bpf_ktime_get_ns];"
+ "r6 = r0;"
+ "call %[bpf_ktime_get_ns];"
+ "r0 &= 0xff;"
+ /* r1.id == r0.id */
+ "r1 = r0;"
+ /* make r1.id unique */
+ "r0 = 0;"
+ "if r6 > 7 goto l0_%=;"
+ /* clear r1 id, but keep the range compatible */
+ "r1 &= 0xff;"
+"l0_%=:"
+ /* get here in two states:
+ * - first: r1 has no id (cached state)
+ * - second: r1 has a unique id (should be considered equivalent)
+ */
+ "r2 = r10;"
+ "r2 += r1;"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+/* Check that unique scalar IDs are ignored when new verifier state is
+ * compared to cached verifier state. For this test:
+ * - cached state has a unique id on r1
+ * - new state has no id on r1
+ */
+SEC("socket")
+__success __log_level(2)
+__msg("6: (25) if r6 > 0x7 goto pc+1")
+__msg("7: (05) goto pc+1")
+__msg("9: (bf) r2 = r10")
+__msg("9: safe")
+__msg("processed 13 insns")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void ignore_unique_scalar_ids_old(void)
+{
+ asm volatile (
+ "call %[bpf_ktime_get_ns];"
+ "r6 = r0;"
+ "call %[bpf_ktime_get_ns];"
+ "r0 &= 0xff;"
+ /* r1.id == r0.id */
+ "r1 = r0;"
+ /* make r1.id unique */
+ "r0 = 0;"
+ "if r6 > 7 goto l1_%=;"
+ "goto l0_%=;"
+"l1_%=:"
+ /* clear r1 id, but keep the range compatible */
+ "r1 &= 0xff;"
+"l0_%=:"
+ /* get here in two states:
+ * - first: r1 has a unique id (cached state)
+ * - second: r1 has no id (should be considered equivalent)
+ */
+ "r2 = r10;"
+ "r2 += r1;"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+/* Check that two different scalar IDs in a verified state can't be
+ * mapped to the same scalar ID in current state.
+ */
+SEC("socket")
+__success __log_level(2)
+/* The exit instruction should be reachable from two states,
+ * use two matches and "processed .. insns" to ensure this.
+ */
+__msg("13: (95) exit")
+__msg("13: (95) exit")
+__msg("processed 18 insns")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void two_old_ids_one_cur_id(void)
+{
+ asm volatile (
+ /* Give unique scalar IDs to r{6,7} */
+ "call %[bpf_ktime_get_ns];"
+ "r0 &= 0xff;"
+ "r6 = r0;"
+ "call %[bpf_ktime_get_ns];"
+ "r0 &= 0xff;"
+ "r7 = r0;"
+ "r0 = 0;"
+ /* Maybe make r{6,7} IDs identical */
+ "if r6 > r7 goto l0_%=;"
+ "goto l1_%=;"
+"l0_%=:"
+ "r6 = r7;"
+"l1_%=:"
+ /* Mark r{6,7} precise.
+ * Get here in two states:
+ * - first: r6{.id=A}, r7{.id=B} (cached state)
+ * - second: r6{.id=A}, r7{.id=A}
+ * Currently we don't want to consider such states equivalent.
+ * Thus "exit;" would be verified twice.
+ */
+ "r2 = r10;"
+ "r2 += r6;"
+ "r2 += r7;"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
new file mode 100644
index 000000000000..db6b3143338b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
@@ -0,0 +1,536 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <errno.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
+
+int vals[] SEC(".data.vals") = {1, 2, 3, 4};
+
+__naked __noinline __used
+static unsigned long identity_subprog()
+{
+ /* the simplest *static* 64-bit identity function */
+ asm volatile (
+ "r0 = r1;"
+ "exit;"
+ );
+}
+
+__noinline __used
+unsigned long global_identity_subprog(__u64 x)
+{
+ /* the simplest *global* 64-bit identity function */
+ return x;
+}
+
+__naked __noinline __used
+static unsigned long callback_subprog()
+{
+ /* the simplest callback function */
+ asm volatile (
+ "r0 = 0;"
+ "exit;"
+ );
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("7: (0f) r1 += r0")
+__msg("mark_precise: frame0: regs=r0 stack= before 6: (bf) r1 = r7")
+__msg("mark_precise: frame0: regs=r0 stack= before 5: (27) r0 *= 4")
+__msg("mark_precise: frame0: regs=r0 stack= before 11: (95) exit")
+__msg("mark_precise: frame1: regs=r0 stack= before 10: (bf) r0 = r1")
+__msg("mark_precise: frame1: regs=r1 stack= before 4: (85) call pc+5")
+__msg("mark_precise: frame0: regs=r1 stack= before 3: (bf) r1 = r6")
+__msg("mark_precise: frame0: regs=r6 stack= before 2: (b7) r6 = 3")
+__naked int subprog_result_precise(void)
+{
+ asm volatile (
+ "r6 = 3;"
+ /* pass r6 through r1 into subprog to get it back as r0;
+ * this whole chain will have to be marked as precise later
+ */
+ "r1 = r6;"
+ "call identity_subprog;"
+ /* now use subprog's returned value (which is a
+ * r6 -> r1 -> r0 chain), as index into vals array, forcing
+ * all of that to be known precisely
+ */
+ "r0 *= 4;"
+ "r1 = %[vals];"
+ /* here r0->r1->r6 chain is forced to be precise and has to be
+ * propagated back to the beginning, including through the
+ * subprog call
+ */
+ "r1 += r0;"
+ "r0 = *(u32 *)(r1 + 0);"
+ "exit;"
+ :
+ : __imm_ptr(vals)
+ : __clobber_common, "r6"
+ );
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("9: (0f) r1 += r0")
+__msg("mark_precise: frame0: last_idx 9 first_idx 0")
+__msg("mark_precise: frame0: regs=r0 stack= before 8: (bf) r1 = r7")
+__msg("mark_precise: frame0: regs=r0 stack= before 7: (27) r0 *= 4")
+__msg("mark_precise: frame0: regs=r0 stack= before 5: (a5) if r0 < 0x4 goto pc+1")
+__msg("mark_precise: frame0: regs=r0 stack= before 4: (85) call pc+7")
+__naked int global_subprog_result_precise(void)
+{
+ asm volatile (
+ "r6 = 3;"
+ /* pass r6 through r1 into subprog to get it back as r0;
+ * given global_identity_subprog is global, precision won't
+ * propagate all the way back to r6
+ */
+ "r1 = r6;"
+ "call global_identity_subprog;"
+ /* now use subprog's returned value (which is unknown now, so
+ * we need to clamp it), as index into vals array, forcing r0
+ * to be marked precise (with no effect on r6, though)
+ */
+ "if r0 < %[vals_arr_sz] goto 1f;"
+ "r0 = %[vals_arr_sz] - 1;"
+ "1:"
+ "r0 *= 4;"
+ "r1 = %[vals];"
+ /* here r0 is forced to be precise and has to be
+ * propagated back to the global subprog call, but it
+ * shouldn't go all the way to mark r6 as precise
+ */
+ "r1 += r0;"
+ "r0 = *(u32 *)(r1 + 0);"
+ "exit;"
+ :
+ : __imm_ptr(vals),
+ __imm_const(vals_arr_sz, ARRAY_SIZE(vals))
+ : __clobber_common, "r6"
+ );
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("14: (0f) r1 += r6")
+__msg("mark_precise: frame0: last_idx 14 first_idx 10")
+__msg("mark_precise: frame0: regs=r6 stack= before 13: (bf) r1 = r7")
+__msg("mark_precise: frame0: regs=r6 stack= before 12: (27) r6 *= 4")
+__msg("mark_precise: frame0: regs=r6 stack= before 11: (25) if r6 > 0x3 goto pc+4")
+__msg("mark_precise: frame0: regs=r6 stack= before 10: (bf) r6 = r0")
+__msg("mark_precise: frame0: parent state regs=r0 stack=:")
+__msg("mark_precise: frame0: last_idx 18 first_idx 0")
+__msg("mark_precise: frame0: regs=r0 stack= before 18: (95) exit")
+__naked int callback_result_precise(void)
+{
+ asm volatile (
+ "r6 = 3;"
+
+ /* call subprog and use result; r0 shouldn't propagate back to
+ * callback_subprog
+ */
+ "r1 = r6;" /* nr_loops */
+ "r2 = %[callback_subprog];" /* callback_fn */
+ "r3 = 0;" /* callback_ctx */
+ "r4 = 0;" /* flags */
+ "call %[bpf_loop];"
+
+ "r6 = r0;"
+ "if r6 > 3 goto 1f;"
+ "r6 *= 4;"
+ "r1 = %[vals];"
+ /* here r6 is forced to be precise and has to be propagated
+ * back to the bpf_loop() call, but not beyond
+ */
+ "r1 += r6;"
+ "r0 = *(u32 *)(r1 + 0);"
+ "1:"
+ "exit;"
+ :
+ : __imm_ptr(vals),
+ __imm_ptr(callback_subprog),
+ __imm(bpf_loop)
+ : __clobber_common, "r6"
+ );
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("7: (0f) r1 += r6")
+__msg("mark_precise: frame0: last_idx 7 first_idx 0")
+__msg("mark_precise: frame0: regs=r6 stack= before 6: (bf) r1 = r7")
+__msg("mark_precise: frame0: regs=r6 stack= before 5: (27) r6 *= 4")
+__msg("mark_precise: frame0: regs=r6 stack= before 11: (95) exit")
+__msg("mark_precise: frame1: regs= stack= before 10: (bf) r0 = r1")
+__msg("mark_precise: frame1: regs= stack= before 4: (85) call pc+5")
+__msg("mark_precise: frame0: regs=r6 stack= before 3: (b7) r1 = 0")
+__msg("mark_precise: frame0: regs=r6 stack= before 2: (b7) r6 = 3")
+__naked int parent_callee_saved_reg_precise(void)
+{
+ asm volatile (
+ "r6 = 3;"
+
+ /* call subprog and ignore result; we need this call only to
+ * complicate jump history
+ */
+ "r1 = 0;"
+ "call identity_subprog;"
+
+ "r6 *= 4;"
+ "r1 = %[vals];"
+ /* here r6 is forced to be precise and has to be propagated
+ * back to the beginning, handling (and ignoring) subprog call
+ */
+ "r1 += r6;"
+ "r0 = *(u32 *)(r1 + 0);"
+ "exit;"
+ :
+ : __imm_ptr(vals)
+ : __clobber_common, "r6"
+ );
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("7: (0f) r1 += r6")
+__msg("mark_precise: frame0: last_idx 7 first_idx 0")
+__msg("mark_precise: frame0: regs=r6 stack= before 6: (bf) r1 = r7")
+__msg("mark_precise: frame0: regs=r6 stack= before 5: (27) r6 *= 4")
+__msg("mark_precise: frame0: regs=r6 stack= before 4: (85) call pc+5")
+__msg("mark_precise: frame0: regs=r6 stack= before 3: (b7) r1 = 0")
+__msg("mark_precise: frame0: regs=r6 stack= before 2: (b7) r6 = 3")
+__naked int parent_callee_saved_reg_precise_global(void)
+{
+ asm volatile (
+ "r6 = 3;"
+
+ /* call subprog and ignore result; we need this call only to
+ * complicate jump history
+ */
+ "r1 = 0;"
+ "call global_identity_subprog;"
+
+ "r6 *= 4;"
+ "r1 = %[vals];"
+ /* here r6 is forced to be precise and has to be propagated
+ * back to the beginning, handling (and ignoring) subprog call
+ */
+ "r1 += r6;"
+ "r0 = *(u32 *)(r1 + 0);"
+ "exit;"
+ :
+ : __imm_ptr(vals)
+ : __clobber_common, "r6"
+ );
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("12: (0f) r1 += r6")
+__msg("mark_precise: frame0: last_idx 12 first_idx 10")
+__msg("mark_precise: frame0: regs=r6 stack= before 11: (bf) r1 = r7")
+__msg("mark_precise: frame0: regs=r6 stack= before 10: (27) r6 *= 4")
+__msg("mark_precise: frame0: parent state regs=r6 stack=:")
+__msg("mark_precise: frame0: last_idx 16 first_idx 0")
+__msg("mark_precise: frame0: regs=r6 stack= before 16: (95) exit")
+__msg("mark_precise: frame1: regs= stack= before 15: (b7) r0 = 0")
+__msg("mark_precise: frame1: regs= stack= before 9: (85) call bpf_loop#181")
+__msg("mark_precise: frame0: regs=r6 stack= before 8: (b7) r4 = 0")
+__msg("mark_precise: frame0: regs=r6 stack= before 7: (b7) r3 = 0")
+__msg("mark_precise: frame0: regs=r6 stack= before 6: (bf) r2 = r8")
+__msg("mark_precise: frame0: regs=r6 stack= before 5: (b7) r1 = 1")
+__msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3")
+__naked int parent_callee_saved_reg_precise_with_callback(void)
+{
+ asm volatile (
+ "r6 = 3;"
+
+ /* call subprog and ignore result; we need this call only to
+ * complicate jump history
+ */
+ "r1 = 1;" /* nr_loops */
+ "r2 = %[callback_subprog];" /* callback_fn */
+ "r3 = 0;" /* callback_ctx */
+ "r4 = 0;" /* flags */
+ "call %[bpf_loop];"
+
+ "r6 *= 4;"
+ "r1 = %[vals];"
+ /* here r6 is forced to be precise and has to be propagated
+ * back to the beginning, handling (and ignoring) callback call
+ */
+ "r1 += r6;"
+ "r0 = *(u32 *)(r1 + 0);"
+ "exit;"
+ :
+ : __imm_ptr(vals),
+ __imm_ptr(callback_subprog),
+ __imm(bpf_loop)
+ : __clobber_common, "r6"
+ );
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("9: (0f) r1 += r6")
+__msg("mark_precise: frame0: last_idx 9 first_idx 6")
+__msg("mark_precise: frame0: regs=r6 stack= before 8: (bf) r1 = r7")
+__msg("mark_precise: frame0: regs=r6 stack= before 7: (27) r6 *= 4")
+__msg("mark_precise: frame0: regs=r6 stack= before 6: (79) r6 = *(u64 *)(r10 -8)")
+__msg("mark_precise: frame0: parent state regs= stack=-8:")
+__msg("mark_precise: frame0: last_idx 13 first_idx 0")
+__msg("mark_precise: frame0: regs= stack=-8 before 13: (95) exit")
+__msg("mark_precise: frame1: regs= stack= before 12: (bf) r0 = r1")
+__msg("mark_precise: frame1: regs= stack= before 5: (85) call pc+6")
+__msg("mark_precise: frame0: regs= stack=-8 before 4: (b7) r1 = 0")
+__msg("mark_precise: frame0: regs= stack=-8 before 3: (7b) *(u64 *)(r10 -8) = r6")
+__msg("mark_precise: frame0: regs=r6 stack= before 2: (b7) r6 = 3")
+__naked int parent_stack_slot_precise(void)
+{
+ asm volatile (
+ /* spill reg */
+ "r6 = 3;"
+ "*(u64 *)(r10 - 8) = r6;"
+
+ /* call subprog and ignore result; we need this call only to
+ * complicate jump history
+ */
+ "r1 = 0;"
+ "call identity_subprog;"
+
+ /* restore reg from stack; in this case we'll be carrying
+ * stack mask when going back into subprog through jump
+ * history
+ */
+ "r6 = *(u64 *)(r10 - 8);"
+
+ "r6 *= 4;"
+ "r1 = %[vals];"
+ /* here r6 is forced to be precise and has to be propagated
+ * back to the beginning, handling (and ignoring) subprog call
+ */
+ "r1 += r6;"
+ "r0 = *(u32 *)(r1 + 0);"
+ "exit;"
+ :
+ : __imm_ptr(vals)
+ : __clobber_common, "r6"
+ );
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("9: (0f) r1 += r6")
+__msg("mark_precise: frame0: last_idx 9 first_idx 6")
+__msg("mark_precise: frame0: regs=r6 stack= before 8: (bf) r1 = r7")
+__msg("mark_precise: frame0: regs=r6 stack= before 7: (27) r6 *= 4")
+__msg("mark_precise: frame0: regs=r6 stack= before 6: (79) r6 = *(u64 *)(r10 -8)")
+__msg("mark_precise: frame0: parent state regs= stack=-8:")
+__msg("mark_precise: frame0: last_idx 5 first_idx 0")
+__msg("mark_precise: frame0: regs= stack=-8 before 5: (85) call pc+6")
+__msg("mark_precise: frame0: regs= stack=-8 before 4: (b7) r1 = 0")
+__msg("mark_precise: frame0: regs= stack=-8 before 3: (7b) *(u64 *)(r10 -8) = r6")
+__msg("mark_precise: frame0: regs=r6 stack= before 2: (b7) r6 = 3")
+__naked int parent_stack_slot_precise_global(void)
+{
+ asm volatile (
+ /* spill reg */
+ "r6 = 3;"
+ "*(u64 *)(r10 - 8) = r6;"
+
+ /* call subprog and ignore result; we need this call only to
+ * complicate jump history
+ */
+ "r1 = 0;"
+ "call global_identity_subprog;"
+
+ /* restore reg from stack; in this case we'll be carrying
+ * stack mask when going back into subprog through jump
+ * history
+ */
+ "r6 = *(u64 *)(r10 - 8);"
+
+ "r6 *= 4;"
+ "r1 = %[vals];"
+ /* here r6 is forced to be precise and has to be propagated
+ * back to the beginning, handling (and ignoring) subprog call
+ */
+ "r1 += r6;"
+ "r0 = *(u32 *)(r1 + 0);"
+ "exit;"
+ :
+ : __imm_ptr(vals)
+ : __clobber_common, "r6"
+ );
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("14: (0f) r1 += r6")
+__msg("mark_precise: frame0: last_idx 14 first_idx 11")
+__msg("mark_precise: frame0: regs=r6 stack= before 13: (bf) r1 = r7")
+__msg("mark_precise: frame0: regs=r6 stack= before 12: (27) r6 *= 4")
+__msg("mark_precise: frame0: regs=r6 stack= before 11: (79) r6 = *(u64 *)(r10 -8)")
+__msg("mark_precise: frame0: parent state regs= stack=-8:")
+__msg("mark_precise: frame0: last_idx 18 first_idx 0")
+__msg("mark_precise: frame0: regs= stack=-8 before 18: (95) exit")
+__msg("mark_precise: frame1: regs= stack= before 17: (b7) r0 = 0")
+__msg("mark_precise: frame1: regs= stack= before 10: (85) call bpf_loop#181")
+__msg("mark_precise: frame0: regs= stack=-8 before 9: (b7) r4 = 0")
+__msg("mark_precise: frame0: regs= stack=-8 before 8: (b7) r3 = 0")
+__msg("mark_precise: frame0: regs= stack=-8 before 7: (bf) r2 = r8")
+__msg("mark_precise: frame0: regs= stack=-8 before 6: (bf) r1 = r6")
+__msg("mark_precise: frame0: regs= stack=-8 before 5: (7b) *(u64 *)(r10 -8) = r6")
+__msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3")
+__naked int parent_stack_slot_precise_with_callback(void)
+{
+ asm volatile (
+ /* spill reg */
+ "r6 = 3;"
+ "*(u64 *)(r10 - 8) = r6;"
+
+ /* ensure we have callback frame in jump history */
+ "r1 = r6;" /* nr_loops */
+ "r2 = %[callback_subprog];" /* callback_fn */
+ "r3 = 0;" /* callback_ctx */
+ "r4 = 0;" /* flags */
+ "call %[bpf_loop];"
+
+ /* restore reg from stack; in this case we'll be carrying
+ * stack mask when going back into subprog through jump
+ * history
+ */
+ "r6 = *(u64 *)(r10 - 8);"
+
+ "r6 *= 4;"
+ "r1 = %[vals];"
+ /* here r6 is forced to be precise and has to be propagated
+ * back to the beginning, handling (and ignoring) subprog call
+ */
+ "r1 += r6;"
+ "r0 = *(u32 *)(r1 + 0);"
+ "exit;"
+ :
+ : __imm_ptr(vals),
+ __imm_ptr(callback_subprog),
+ __imm(bpf_loop)
+ : __clobber_common, "r6"
+ );
+}
+
+__noinline __used
+static __u64 subprog_with_precise_arg(__u64 x)
+{
+ return vals[x]; /* x is forced to be precise */
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("8: (0f) r2 += r1")
+__msg("mark_precise: frame1: last_idx 8 first_idx 0")
+__msg("mark_precise: frame1: regs=r1 stack= before 6: (18) r2 = ")
+__msg("mark_precise: frame1: regs=r1 stack= before 5: (67) r1 <<= 2")
+__msg("mark_precise: frame1: regs=r1 stack= before 2: (85) call pc+2")
+__msg("mark_precise: frame0: regs=r1 stack= before 1: (bf) r1 = r6")
+__msg("mark_precise: frame0: regs=r6 stack= before 0: (b7) r6 = 3")
+__naked int subprog_arg_precise(void)
+{
+ asm volatile (
+ "r6 = 3;"
+ "r1 = r6;"
+ /* subprog_with_precise_arg expects its argument to be
+ * precise, so r1->r6 will be marked precise from inside the
+ * subprog
+ */
+ "call subprog_with_precise_arg;"
+ "r0 += r6;"
+ "exit;"
+ :
+ :
+ : __clobber_common, "r6"
+ );
+}
+
+/* r1 is pointer to stack slot;
+ * r2 is a register to spill into that slot
+ * subprog also spills r2 into its own stack slot
+ */
+__naked __noinline __used
+static __u64 subprog_spill_reg_precise(void)
+{
+ asm volatile (
+ /* spill to parent stack */
+ "*(u64 *)(r1 + 0) = r2;"
+ /* spill to subprog stack (we use -16 offset to avoid
+ * accidental confusion with parent's -8 stack slot in
+ * verifier log output)
+ */
+ "*(u64 *)(r10 - 16) = r2;"
+ /* use both spills as return result to propagete precision everywhere */
+ "r0 = *(u64 *)(r10 - 16);"
+ "r2 = *(u64 *)(r1 + 0);"
+ "r0 += r2;"
+ "exit;"
+ );
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+/* precision backtracking can't currently handle stack access not through r10,
+ * so we won't be able to mark stack slot fp-8 as precise, and so will
+ * fallback to forcing all as precise
+ */
+__msg("mark_precise: frame0: falling back to forcing all scalars precise")
+__naked int subprog_spill_into_parent_stack_slot_precise(void)
+{
+ asm volatile (
+ "r6 = 1;"
+
+ /* pass pointer to stack slot and r6 to subprog;
+ * r6 will be marked precise and spilled into fp-8 slot, which
+ * also should be marked precise
+ */
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = r6;"
+ "call subprog_spill_reg_precise;"
+
+ /* restore reg from stack; in this case we'll be carrying
+ * stack mask when going back into subprog through jump
+ * history
+ */
+ "r7 = *(u64 *)(r10 - 8);"
+
+ "r7 *= 4;"
+ "r1 = %[vals];"
+ /* here r7 is forced to be precise and has to be propagated
+ * back to the beginning, handling subprog call and logic
+ */
+ "r1 += r7;"
+ "r0 = *(u32 *)(r1 + 0);"
+ "exit;"
+ :
+ : __imm_ptr(vals)
+ : __clobber_common, "r6", "r7"
+ );
+}
+
+__naked __noinline __used
+static __u64 subprog_with_checkpoint(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ /* guaranteed checkpoint if BPF_F_TEST_STATE_FREQ is used */
+ "goto +0;"
+ "exit;"
+ );
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/vrf_socket_lookup.c b/tools/testing/selftests/bpf/progs/vrf_socket_lookup.c
new file mode 100644
index 000000000000..bcfb6feb38c0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/vrf_socket_lookup.c
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#include <linux/ip.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/pkt_cls.h>
+#include <stdbool.h>
+
+int lookup_status;
+bool test_xdp;
+bool tcp_skc;
+
+#define CUR_NS BPF_F_CURRENT_NETNS
+
+static void socket_lookup(void *ctx, void *data_end, void *data)
+{
+ struct ethhdr *eth = data;
+ struct bpf_sock_tuple *tp;
+ struct bpf_sock *sk;
+ struct iphdr *iph;
+ int tplen;
+
+ if (eth + 1 > data_end)
+ return;
+
+ if (eth->h_proto != bpf_htons(ETH_P_IP))
+ return;
+
+ iph = (struct iphdr *)(eth + 1);
+ if (iph + 1 > data_end)
+ return;
+
+ tp = (struct bpf_sock_tuple *)&iph->saddr;
+ tplen = sizeof(tp->ipv4);
+ if ((void *)tp + tplen > data_end)
+ return;
+
+ switch (iph->protocol) {
+ case IPPROTO_TCP:
+ if (tcp_skc)
+ sk = bpf_skc_lookup_tcp(ctx, tp, tplen, CUR_NS, 0);
+ else
+ sk = bpf_sk_lookup_tcp(ctx, tp, tplen, CUR_NS, 0);
+ break;
+ case IPPROTO_UDP:
+ sk = bpf_sk_lookup_udp(ctx, tp, tplen, CUR_NS, 0);
+ break;
+ default:
+ return;
+ }
+
+ lookup_status = 0;
+
+ if (sk) {
+ bpf_sk_release(sk);
+ lookup_status = 1;
+ }
+}
+
+SEC("tc")
+int tc_socket_lookup(struct __sk_buff *skb)
+{
+ void *data_end = (void *)(long)skb->data_end;
+ void *data = (void *)(long)skb->data;
+
+ if (test_xdp)
+ return TC_ACT_UNSPEC;
+
+ socket_lookup(skb, data_end, data);
+ return TC_ACT_UNSPEC;
+}
+
+SEC("xdp")
+int xdp_socket_lookup(struct xdp_md *xdp)
+{
+ void *data_end = (void *)(long)xdp->data_end;
+ void *data = (void *)(long)xdp->data;
+
+ if (!test_xdp)
+ return XDP_PASS;
+
+ socket_lookup(xdp, data_end, data);
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c b/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c
index e1c787815e44..b2dfd7066c6e 100644
--- a/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c
+++ b/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c
@@ -77,7 +77,9 @@ int rx(struct xdp_md *ctx)
}
err = bpf_xdp_metadata_rx_timestamp(ctx, &meta->rx_timestamp);
- if (err)
+ if (!err)
+ meta->xdp_timestamp = bpf_ktime_get_tai_ns();
+ else
meta->rx_timestamp = 0; /* Used by AF_XDP as not avail signal */
err = bpf_xdp_metadata_rx_hash(ctx, &meta->rx_hash, &meta->rx_hash_type);
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index ea82921110da..4d582cac2c09 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -11,7 +11,6 @@
#include <signal.h>
#include <string.h>
#include <execinfo.h> /* backtrace */
-#include <linux/membarrier.h>
#include <sys/sysinfo.h> /* get_nprocs */
#include <netinet/in.h>
#include <sys/select.h>
@@ -629,68 +628,6 @@ out:
return err;
}
-static int finit_module(int fd, const char *param_values, int flags)
-{
- return syscall(__NR_finit_module, fd, param_values, flags);
-}
-
-static int delete_module(const char *name, int flags)
-{
- return syscall(__NR_delete_module, name, flags);
-}
-
-/*
- * Trigger synchronize_rcu() in kernel.
- */
-int kern_sync_rcu(void)
-{
- return syscall(__NR_membarrier, MEMBARRIER_CMD_SHARED, 0, 0);
-}
-
-static void unload_bpf_testmod(void)
-{
- if (kern_sync_rcu())
- fprintf(env.stderr, "Failed to trigger kernel-side RCU sync!\n");
- if (delete_module("bpf_testmod", 0)) {
- if (errno == ENOENT) {
- if (verbose())
- fprintf(stdout, "bpf_testmod.ko is already unloaded.\n");
- return;
- }
- fprintf(env.stderr, "Failed to unload bpf_testmod.ko from kernel: %d\n", -errno);
- return;
- }
- if (verbose())
- fprintf(stdout, "Successfully unloaded bpf_testmod.ko.\n");
-}
-
-static int load_bpf_testmod(void)
-{
- int fd;
-
- /* ensure previous instance of the module is unloaded */
- unload_bpf_testmod();
-
- if (verbose())
- fprintf(stdout, "Loading bpf_testmod.ko...\n");
-
- fd = open("bpf_testmod.ko", O_RDONLY);
- if (fd < 0) {
- fprintf(env.stderr, "Can't find bpf_testmod.ko kernel module: %d\n", -errno);
- return -ENOENT;
- }
- if (finit_module(fd, "", 0)) {
- fprintf(env.stderr, "Failed to load bpf_testmod.ko into the kernel: %d\n", -errno);
- close(fd);
- return -EINVAL;
- }
- close(fd);
-
- if (verbose())
- fprintf(stdout, "Successfully loaded bpf_testmod.ko.\n");
- return 0;
-}
-
/* extern declarations for test funcs */
#define DEFINE_TEST(name) \
extern void test_##name(void) __weak; \
@@ -714,7 +651,13 @@ static struct test_state test_states[ARRAY_SIZE(prog_test_defs)];
const char *argp_program_version = "test_progs 0.1";
const char *argp_program_bug_address = "<bpf@vger.kernel.org>";
-static const char argp_program_doc[] = "BPF selftests test runner";
+static const char argp_program_doc[] =
+"BPF selftests test runner\v"
+"Options accepting the NAMES parameter take either a comma-separated list\n"
+"of test names, or a filename prefixed with @. The file contains one name\n"
+"(or wildcard pattern) per line, and comments beginning with # are ignored.\n"
+"\n"
+"These options can be passed repeatedly to read multiple files.\n";
enum ARG_KEYS {
ARG_TEST_NUM = 'n',
@@ -797,6 +740,7 @@ extern int extra_prog_load_log_flags;
static error_t parse_arg(int key, char *arg, struct argp_state *state)
{
struct test_env *env = state->input;
+ int err = 0;
switch (key) {
case ARG_TEST_NUM: {
@@ -821,18 +765,28 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
}
case ARG_TEST_NAME_GLOB_ALLOWLIST:
case ARG_TEST_NAME: {
- if (parse_test_list(arg,
- &env->test_selector.whitelist,
- key == ARG_TEST_NAME_GLOB_ALLOWLIST))
- return -ENOMEM;
+ if (arg[0] == '@')
+ err = parse_test_list_file(arg + 1,
+ &env->test_selector.whitelist,
+ key == ARG_TEST_NAME_GLOB_ALLOWLIST);
+ else
+ err = parse_test_list(arg,
+ &env->test_selector.whitelist,
+ key == ARG_TEST_NAME_GLOB_ALLOWLIST);
+
break;
}
case ARG_TEST_NAME_GLOB_DENYLIST:
case ARG_TEST_NAME_BLACKLIST: {
- if (parse_test_list(arg,
- &env->test_selector.blacklist,
- key == ARG_TEST_NAME_GLOB_DENYLIST))
- return -ENOMEM;
+ if (arg[0] == '@')
+ err = parse_test_list_file(arg + 1,
+ &env->test_selector.blacklist,
+ key == ARG_TEST_NAME_GLOB_DENYLIST);
+ else
+ err = parse_test_list(arg,
+ &env->test_selector.blacklist,
+ key == ARG_TEST_NAME_GLOB_DENYLIST);
+
break;
}
case ARG_VERIFIER_STATS:
@@ -900,7 +854,7 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
default:
return ARGP_ERR_UNKNOWN;
}
- return 0;
+ return err;
}
/*
@@ -1703,9 +1657,14 @@ int main(int argc, char **argv)
env.stderr = stderr;
env.has_testmod = true;
- if (!env.list_test_names && load_bpf_testmod()) {
- fprintf(env.stderr, "WARNING! Selftests relying on bpf_testmod.ko will be skipped.\n");
- env.has_testmod = false;
+ if (!env.list_test_names) {
+ /* ensure previous instance of the module is unloaded */
+ unload_bpf_testmod(verbose());
+
+ if (load_bpf_testmod(verbose())) {
+ fprintf(env.stderr, "WARNING! Selftests relying on bpf_testmod.ko will be skipped.\n");
+ env.has_testmod = false;
+ }
}
/* initializing tests */
@@ -1802,7 +1761,7 @@ int main(int argc, char **argv)
close(env.saved_netns_fd);
out:
if (!env.list_test_names && env.has_testmod)
- unload_bpf_testmod();
+ unload_bpf_testmod(verbose());
free_test_selector(&env.test_selector);
free_test_selector(&env.subtest_selector);
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index 0ed3134333d4..77bd492c6024 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -405,7 +405,6 @@ static inline void *u64_to_ptr(__u64 ptr)
int bpf_find_map(const char *test, struct bpf_object *obj, const char *name);
int compare_map_keys(int map1_fd, int map2_fd);
int compare_stack_ips(int smap_fd, int amap_fd, int stack_trace_len);
-int kern_sync_rcu(void);
int trigger_module_test_read(int read_sz);
int trigger_module_test_write(int write_sz);
int write_sysctl(const char *sysctl, const char *value);
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index e4657c5bc3f1..31f1c935cd07 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -40,6 +40,7 @@
#include "bpf_util.h"
#include "test_btf.h"
#include "../../../include/linux/filter.h"
+#include "testing_helpers.h"
#ifndef ENOTSUPP
#define ENOTSUPP 524
@@ -873,8 +874,140 @@ static int create_map_kptr(void)
return fd;
}
+static void set_root(bool set)
+{
+ __u64 caps;
+
+ if (set) {
+ if (cap_enable_effective(1ULL << CAP_SYS_ADMIN, &caps))
+ perror("cap_disable_effective(CAP_SYS_ADMIN)");
+ } else {
+ if (cap_disable_effective(1ULL << CAP_SYS_ADMIN, &caps))
+ perror("cap_disable_effective(CAP_SYS_ADMIN)");
+ }
+}
+
+static __u64 ptr_to_u64(const void *ptr)
+{
+ return (uintptr_t) ptr;
+}
+
+static struct btf *btf__load_testmod_btf(struct btf *vmlinux)
+{
+ struct bpf_btf_info info;
+ __u32 len = sizeof(info);
+ struct btf *btf = NULL;
+ char name[64];
+ __u32 id = 0;
+ int err, fd;
+
+ /* Iterate all loaded BTF objects and find bpf_testmod,
+ * we need SYS_ADMIN cap for that.
+ */
+ set_root(true);
+
+ while (true) {
+ err = bpf_btf_get_next_id(id, &id);
+ if (err) {
+ if (errno == ENOENT)
+ break;
+ perror("bpf_btf_get_next_id failed");
+ break;
+ }
+
+ fd = bpf_btf_get_fd_by_id(id);
+ if (fd < 0) {
+ if (errno == ENOENT)
+ continue;
+ perror("bpf_btf_get_fd_by_id failed");
+ break;
+ }
+
+ memset(&info, 0, sizeof(info));
+ info.name_len = sizeof(name);
+ info.name = ptr_to_u64(name);
+ len = sizeof(info);
+
+ err = bpf_obj_get_info_by_fd(fd, &info, &len);
+ if (err) {
+ close(fd);
+ perror("bpf_obj_get_info_by_fd failed");
+ break;
+ }
+
+ if (strcmp("bpf_testmod", name)) {
+ close(fd);
+ continue;
+ }
+
+ btf = btf__load_from_kernel_by_id_split(id, vmlinux);
+ if (!btf) {
+ close(fd);
+ break;
+ }
+
+ /* We need the fd to stay open so it can be used in fd_array.
+ * The final cleanup call to btf__free will free btf object
+ * and close the file descriptor.
+ */
+ btf__set_fd(btf, fd);
+ break;
+ }
+
+ set_root(false);
+ return btf;
+}
+
+static struct btf *testmod_btf;
+static struct btf *vmlinux_btf;
+
+static void kfuncs_cleanup(void)
+{
+ btf__free(testmod_btf);
+ btf__free(vmlinux_btf);
+}
+
+static void fixup_prog_kfuncs(struct bpf_insn *prog, int *fd_array,
+ struct kfunc_btf_id_pair *fixup_kfunc_btf_id)
+{
+ /* Patch in kfunc BTF IDs */
+ while (fixup_kfunc_btf_id->kfunc) {
+ int btf_id = 0;
+
+ /* try to find kfunc in kernel BTF */
+ vmlinux_btf = vmlinux_btf ?: btf__load_vmlinux_btf();
+ if (vmlinux_btf) {
+ btf_id = btf__find_by_name_kind(vmlinux_btf,
+ fixup_kfunc_btf_id->kfunc,
+ BTF_KIND_FUNC);
+ btf_id = btf_id < 0 ? 0 : btf_id;
+ }
+
+ /* kfunc not found in kernel BTF, try bpf_testmod BTF */
+ if (!btf_id) {
+ testmod_btf = testmod_btf ?: btf__load_testmod_btf(vmlinux_btf);
+ if (testmod_btf) {
+ btf_id = btf__find_by_name_kind(testmod_btf,
+ fixup_kfunc_btf_id->kfunc,
+ BTF_KIND_FUNC);
+ btf_id = btf_id < 0 ? 0 : btf_id;
+ if (btf_id) {
+ /* We put bpf_testmod module fd into fd_array
+ * and its index 1 into instruction 'off'.
+ */
+ *fd_array = btf__fd(testmod_btf);
+ prog[fixup_kfunc_btf_id->insn_idx].off = 1;
+ }
+ }
+ }
+
+ prog[fixup_kfunc_btf_id->insn_idx].imm = btf_id;
+ fixup_kfunc_btf_id++;
+ }
+}
+
static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
- struct bpf_insn *prog, int *map_fds)
+ struct bpf_insn *prog, int *map_fds, int *fd_array)
{
int *fixup_map_hash_8b = test->fixup_map_hash_8b;
int *fixup_map_hash_48b = test->fixup_map_hash_48b;
@@ -899,7 +1032,6 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
int *fixup_map_ringbuf = test->fixup_map_ringbuf;
int *fixup_map_timer = test->fixup_map_timer;
int *fixup_map_kptr = test->fixup_map_kptr;
- struct kfunc_btf_id_pair *fixup_kfunc_btf_id = test->fixup_kfunc_btf_id;
if (test->fill_helper) {
test->fill_insns = calloc(MAX_TEST_INSNS, sizeof(struct bpf_insn));
@@ -1100,25 +1232,7 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
} while (*fixup_map_kptr);
}
- /* Patch in kfunc BTF IDs */
- if (fixup_kfunc_btf_id->kfunc) {
- struct btf *btf;
- int btf_id;
-
- do {
- btf_id = 0;
- btf = btf__load_vmlinux_btf();
- if (btf) {
- btf_id = btf__find_by_name_kind(btf,
- fixup_kfunc_btf_id->kfunc,
- BTF_KIND_FUNC);
- btf_id = btf_id < 0 ? 0 : btf_id;
- }
- btf__free(btf);
- prog[fixup_kfunc_btf_id->insn_idx].imm = btf_id;
- fixup_kfunc_btf_id++;
- } while (fixup_kfunc_btf_id->kfunc);
- }
+ fixup_prog_kfuncs(prog, fd_array, test->fixup_kfunc_btf_id);
}
struct libcap {
@@ -1227,45 +1341,46 @@ static bool cmp_str_seq(const char *log, const char *exp)
return true;
}
-static int get_xlated_program(int fd_prog, struct bpf_insn **buf, int *cnt)
+static struct bpf_insn *get_xlated_program(int fd_prog, int *cnt)
{
+ __u32 buf_element_size = sizeof(struct bpf_insn);
struct bpf_prog_info info = {};
__u32 info_len = sizeof(info);
__u32 xlated_prog_len;
- __u32 buf_element_size = sizeof(struct bpf_insn);
+ struct bpf_insn *buf;
if (bpf_prog_get_info_by_fd(fd_prog, &info, &info_len)) {
perror("bpf_prog_get_info_by_fd failed");
- return -1;
+ return NULL;
}
xlated_prog_len = info.xlated_prog_len;
if (xlated_prog_len % buf_element_size) {
printf("Program length %d is not multiple of %d\n",
xlated_prog_len, buf_element_size);
- return -1;
+ return NULL;
}
*cnt = xlated_prog_len / buf_element_size;
- *buf = calloc(*cnt, buf_element_size);
+ buf = calloc(*cnt, buf_element_size);
if (!buf) {
perror("can't allocate xlated program buffer");
- return -ENOMEM;
+ return NULL;
}
bzero(&info, sizeof(info));
info.xlated_prog_len = xlated_prog_len;
- info.xlated_prog_insns = (__u64)(unsigned long)*buf;
+ info.xlated_prog_insns = (__u64)(unsigned long)buf;
if (bpf_prog_get_info_by_fd(fd_prog, &info, &info_len)) {
perror("second bpf_prog_get_info_by_fd failed");
goto out_free_buf;
}
- return 0;
+ return buf;
out_free_buf:
- free(*buf);
- return -1;
+ free(buf);
+ return NULL;
}
static bool is_null_insn(struct bpf_insn *insn)
@@ -1398,7 +1513,8 @@ static bool check_xlated_program(struct bpf_test *test, int fd_prog)
if (!check_expected && !check_unexpected)
goto out;
- if (get_xlated_program(fd_prog, &buf, &cnt)) {
+ buf = get_xlated_program(fd_prog, &cnt);
+ if (!buf) {
printf("FAIL: can't get xlated program\n");
result = false;
goto out;
@@ -1445,6 +1561,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
int run_errs, run_successes;
int map_fds[MAX_NR_MAPS];
const char *expected_err;
+ int fd_array[2] = { -1, -1 };
int saved_errno;
int fixup_skips;
__u32 pflags;
@@ -1458,7 +1575,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
if (!prog_type)
prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
fixup_skips = skips;
- do_test_fixup(test, prog_type, prog, map_fds);
+ do_test_fixup(test, prog_type, prog, map_fds, &fd_array[1]);
if (test->fill_insns) {
prog = test->fill_insns;
prog_len = test->prog_len;
@@ -1492,6 +1609,8 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
else
opts.log_level = DEFAULT_LIBBPF_LOG_LEVEL;
opts.prog_flags = pflags;
+ if (fd_array[1] != -1)
+ opts.fd_array = &fd_array[0];
if ((prog_type == BPF_PROG_TYPE_TRACING ||
prog_type == BPF_PROG_TYPE_LSM) && test->kfunc) {
@@ -1684,6 +1803,12 @@ static int do_test(bool unpriv, unsigned int from, unsigned int to)
{
int i, passes = 0, errors = 0;
+ /* ensure previous instance of the module is unloaded */
+ unload_bpf_testmod(verbose);
+
+ if (load_bpf_testmod(verbose))
+ return EXIT_FAILURE;
+
for (i = from; i < to; i++) {
struct bpf_test *test = &tests[i];
@@ -1711,6 +1836,9 @@ static int do_test(bool unpriv, unsigned int from, unsigned int to)
}
}
+ unload_bpf_testmod(verbose);
+ kfuncs_cleanup();
+
printf("Summary: %d PASSED, %d SKIPPED, %d FAILED\n", passes,
skips, errors);
return errors ? EXIT_FAILURE : EXIT_SUCCESS;
diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh
index 377fb157a57c..c2ad50f26b63 100755
--- a/tools/testing/selftests/bpf/test_xsk.sh
+++ b/tools/testing/selftests/bpf/test_xsk.sh
@@ -68,9 +68,6 @@
# Run with verbose output:
# sudo ./test_xsk.sh -v
#
-# Run and dump packet contents:
-# sudo ./test_xsk.sh -D
-#
# Set up veth interfaces and leave them up so xskxceiver can be launched in a debugger:
# sudo ./test_xsk.sh -d
#
@@ -81,11 +78,10 @@
ETH=""
-while getopts "vDi:d" flag
+while getopts "vi:d" flag
do
case "${flag}" in
v) verbose=1;;
- D) dump_pkts=1;;
d) debug=1;;
i) ETH=${OPTARG};;
esac
@@ -157,10 +153,6 @@ if [[ $verbose -eq 1 ]]; then
ARGS+="-v "
fi
-if [[ $dump_pkts -eq 1 ]]; then
- ARGS="-D "
-fi
-
retval=$?
test_status $retval "${TEST_NAME}"
diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c
index 0b5e0829e5be..8d994884c7b4 100644
--- a/tools/testing/selftests/bpf/testing_helpers.c
+++ b/tools/testing/selftests/bpf/testing_helpers.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
/* Copyright (C) 2019 Netronome Systems, Inc. */
/* Copyright (C) 2020 Facebook, Inc. */
+#include <ctype.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
@@ -8,6 +9,7 @@
#include <bpf/libbpf.h>
#include "test_progs.h"
#include "testing_helpers.h"
+#include <linux/membarrier.h>
int parse_num_list(const char *s, bool **num_set, int *num_set_len)
{
@@ -70,92 +72,168 @@ int parse_num_list(const char *s, bool **num_set, int *num_set_len)
return 0;
}
-int parse_test_list(const char *s,
- struct test_filter_set *set,
- bool is_glob_pattern)
+static int do_insert_test(struct test_filter_set *set,
+ char *test_str,
+ char *subtest_str)
{
- char *input, *state = NULL, *next;
- struct test_filter *tmp, *tests = NULL;
- int i, j, cnt = 0;
+ struct test_filter *tmp, *test;
+ char **ctmp;
+ int i;
- input = strdup(s);
- if (!input)
+ for (i = 0; i < set->cnt; i++) {
+ test = &set->tests[i];
+
+ if (strcmp(test_str, test->name) == 0) {
+ free(test_str);
+ goto subtest;
+ }
+ }
+
+ tmp = realloc(set->tests, sizeof(*test) * (set->cnt + 1));
+ if (!tmp)
return -ENOMEM;
- while ((next = strtok_r(state ? NULL : input, ",", &state))) {
- char *subtest_str = strchr(next, '/');
- char *pattern = NULL;
- int glob_chars = 0;
+ set->tests = tmp;
+ test = &set->tests[set->cnt];
- tmp = realloc(tests, sizeof(*tests) * (cnt + 1));
- if (!tmp)
- goto err;
- tests = tmp;
+ test->name = test_str;
+ test->subtests = NULL;
+ test->subtest_cnt = 0;
- tests[cnt].subtest_cnt = 0;
- tests[cnt].subtests = NULL;
+ set->cnt++;
- if (is_glob_pattern) {
- pattern = "%s";
- } else {
- pattern = "*%s*";
- glob_chars = 2;
- }
+subtest:
+ if (!subtest_str)
+ return 0;
- if (subtest_str) {
- char **tmp_subtests = NULL;
- int subtest_cnt = tests[cnt].subtest_cnt;
-
- *subtest_str = '\0';
- subtest_str += 1;
- tmp_subtests = realloc(tests[cnt].subtests,
- sizeof(*tmp_subtests) *
- (subtest_cnt + 1));
- if (!tmp_subtests)
- goto err;
- tests[cnt].subtests = tmp_subtests;
-
- tests[cnt].subtests[subtest_cnt] =
- malloc(strlen(subtest_str) + glob_chars + 1);
- if (!tests[cnt].subtests[subtest_cnt])
- goto err;
- sprintf(tests[cnt].subtests[subtest_cnt],
- pattern,
- subtest_str);
-
- tests[cnt].subtest_cnt++;
+ for (i = 0; i < test->subtest_cnt; i++) {
+ if (strcmp(subtest_str, test->subtests[i]) == 0) {
+ free(subtest_str);
+ return 0;
}
+ }
- tests[cnt].name = malloc(strlen(next) + glob_chars + 1);
- if (!tests[cnt].name)
- goto err;
- sprintf(tests[cnt].name, pattern, next);
+ ctmp = realloc(test->subtests,
+ sizeof(*test->subtests) * (test->subtest_cnt + 1));
+ if (!ctmp)
+ return -ENOMEM;
- cnt++;
+ test->subtests = ctmp;
+ test->subtests[test->subtest_cnt] = subtest_str;
+
+ test->subtest_cnt++;
+
+ return 0;
+}
+
+static int insert_test(struct test_filter_set *set,
+ char *test_spec,
+ bool is_glob_pattern)
+{
+ char *pattern, *subtest_str, *ext_test_str, *ext_subtest_str = NULL;
+ int glob_chars = 0;
+
+ if (is_glob_pattern) {
+ pattern = "%s";
+ } else {
+ pattern = "*%s*";
+ glob_chars = 2;
}
- tmp = realloc(set->tests, sizeof(*tests) * (cnt + set->cnt));
- if (!tmp)
+ subtest_str = strchr(test_spec, '/');
+ if (subtest_str) {
+ *subtest_str = '\0';
+ subtest_str += 1;
+ }
+
+ ext_test_str = malloc(strlen(test_spec) + glob_chars + 1);
+ if (!ext_test_str)
goto err;
- memcpy(tmp + set->cnt, tests, sizeof(*tests) * cnt);
- set->tests = tmp;
- set->cnt += cnt;
+ sprintf(ext_test_str, pattern, test_spec);
- free(tests);
- free(input);
- return 0;
+ if (subtest_str) {
+ ext_subtest_str = malloc(strlen(subtest_str) + glob_chars + 1);
+ if (!ext_subtest_str)
+ goto err;
+
+ sprintf(ext_subtest_str, pattern, subtest_str);
+ }
+
+ return do_insert_test(set, ext_test_str, ext_subtest_str);
err:
- for (i = 0; i < cnt; i++) {
- for (j = 0; j < tests[i].subtest_cnt; j++)
- free(tests[i].subtests[j]);
+ free(ext_test_str);
+ free(ext_subtest_str);
+
+ return -ENOMEM;
+}
+
+int parse_test_list_file(const char *path,
+ struct test_filter_set *set,
+ bool is_glob_pattern)
+{
+ char *buf = NULL, *capture_start, *capture_end, *scan_end;
+ size_t buflen = 0;
+ int err = 0;
+ FILE *f;
+
+ f = fopen(path, "r");
+ if (!f) {
+ err = -errno;
+ fprintf(stderr, "Failed to open '%s': %d\n", path, err);
+ return err;
+ }
+
+ while (getline(&buf, &buflen, f) != -1) {
+ capture_start = buf;
+
+ while (isspace(*capture_start))
+ ++capture_start;
+
+ capture_end = capture_start;
+ scan_end = capture_start;
+
+ while (*scan_end && *scan_end != '#') {
+ if (!isspace(*scan_end))
+ capture_end = scan_end;
+
+ ++scan_end;
+ }
+
+ if (capture_end == capture_start)
+ continue;
+
+ *(++capture_end) = '\0';
+
+ err = insert_test(set, capture_start, is_glob_pattern);
+ if (err)
+ break;
+ }
+
+ fclose(f);
+ return err;
+}
+
+int parse_test_list(const char *s,
+ struct test_filter_set *set,
+ bool is_glob_pattern)
+{
+ char *input, *state = NULL, *test_spec;
+ int err = 0;
+
+ input = strdup(s);
+ if (!input)
+ return -ENOMEM;
- free(tests[i].name);
+ while ((test_spec = strtok_r(state ? NULL : input, ",", &state))) {
+ err = insert_test(set, test_spec, is_glob_pattern);
+ if (err)
+ break;
}
- free(tests);
+
free(input);
- return -ENOMEM;
+ return err;
}
__u32 link_info_prog_id(const struct bpf_link *link, struct bpf_link_info *info)
@@ -249,3 +327,63 @@ __u64 read_perf_max_sample_freq(void)
fclose(f);
return sample_freq;
}
+
+static int finit_module(int fd, const char *param_values, int flags)
+{
+ return syscall(__NR_finit_module, fd, param_values, flags);
+}
+
+static int delete_module(const char *name, int flags)
+{
+ return syscall(__NR_delete_module, name, flags);
+}
+
+int unload_bpf_testmod(bool verbose)
+{
+ if (kern_sync_rcu())
+ fprintf(stdout, "Failed to trigger kernel-side RCU sync!\n");
+ if (delete_module("bpf_testmod", 0)) {
+ if (errno == ENOENT) {
+ if (verbose)
+ fprintf(stdout, "bpf_testmod.ko is already unloaded.\n");
+ return -1;
+ }
+ fprintf(stdout, "Failed to unload bpf_testmod.ko from kernel: %d\n", -errno);
+ return -1;
+ }
+ if (verbose)
+ fprintf(stdout, "Successfully unloaded bpf_testmod.ko.\n");
+ return 0;
+}
+
+int load_bpf_testmod(bool verbose)
+{
+ int fd;
+
+ if (verbose)
+ fprintf(stdout, "Loading bpf_testmod.ko...\n");
+
+ fd = open("bpf_testmod.ko", O_RDONLY);
+ if (fd < 0) {
+ fprintf(stdout, "Can't find bpf_testmod.ko kernel module: %d\n", -errno);
+ return -ENOENT;
+ }
+ if (finit_module(fd, "", 0)) {
+ fprintf(stdout, "Failed to load bpf_testmod.ko into the kernel: %d\n", -errno);
+ close(fd);
+ return -EINVAL;
+ }
+ close(fd);
+
+ if (verbose)
+ fprintf(stdout, "Successfully loaded bpf_testmod.ko.\n");
+ return 0;
+}
+
+/*
+ * Trigger synchronize_rcu() in kernel.
+ */
+int kern_sync_rcu(void)
+{
+ return syscall(__NR_membarrier, MEMBARRIER_CMD_SHARED, 0, 0);
+}
diff --git a/tools/testing/selftests/bpf/testing_helpers.h b/tools/testing/selftests/bpf/testing_helpers.h
index eb8790f928e4..5312323881b6 100644
--- a/tools/testing/selftests/bpf/testing_helpers.h
+++ b/tools/testing/selftests/bpf/testing_helpers.h
@@ -1,5 +1,9 @@
/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
/* Copyright (C) 2020 Facebook, Inc. */
+
+#ifndef __TESTING_HELPERS_H
+#define __TESTING_HELPERS_H
+
#include <stdbool.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
@@ -20,5 +24,13 @@ struct test_filter_set;
int parse_test_list(const char *s,
struct test_filter_set *test_set,
bool is_glob_pattern);
+int parse_test_list_file(const char *path,
+ struct test_filter_set *test_set,
+ bool is_glob_pattern);
__u64 read_perf_max_sample_freq(void);
+int load_bpf_testmod(bool verbose);
+int unload_bpf_testmod(bool verbose);
+int kern_sync_rcu(void);
+
+#endif /* __TESTING_HELPERS_H */
diff --git a/tools/testing/selftests/bpf/verifier/precise.c b/tools/testing/selftests/bpf/verifier/precise.c
index 6c03a7d805f9..99272bb890da 100644
--- a/tools/testing/selftests/bpf/verifier/precise.c
+++ b/tools/testing/selftests/bpf/verifier/precise.c
@@ -38,25 +38,24 @@
.fixup_map_array_48b = { 1 },
.result = VERBOSE_ACCEPT,
.errstr =
- "26: (85) call bpf_probe_read_kernel#113\
- last_idx 26 first_idx 20\
- regs=4 stack=0 before 25\
- regs=4 stack=0 before 24\
- regs=4 stack=0 before 23\
- regs=4 stack=0 before 22\
- regs=4 stack=0 before 20\
- parent didn't have regs=4 stack=0 marks\
- last_idx 19 first_idx 10\
- regs=4 stack=0 before 19\
- regs=200 stack=0 before 18\
- regs=300 stack=0 before 17\
- regs=201 stack=0 before 15\
- regs=201 stack=0 before 14\
- regs=200 stack=0 before 13\
- regs=200 stack=0 before 12\
- regs=200 stack=0 before 11\
- regs=200 stack=0 before 10\
- parent already had regs=0 stack=0 marks",
+ "mark_precise: frame0: last_idx 26 first_idx 20\
+ mark_precise: frame0: regs=r2 stack= before 25\
+ mark_precise: frame0: regs=r2 stack= before 24\
+ mark_precise: frame0: regs=r2 stack= before 23\
+ mark_precise: frame0: regs=r2 stack= before 22\
+ mark_precise: frame0: regs=r2 stack= before 20\
+ mark_precise: frame0: parent state regs=r2 stack=:\
+ mark_precise: frame0: last_idx 19 first_idx 10\
+ mark_precise: frame0: regs=r2,r9 stack= before 19\
+ mark_precise: frame0: regs=r9 stack= before 18\
+ mark_precise: frame0: regs=r8,r9 stack= before 17\
+ mark_precise: frame0: regs=r0,r9 stack= before 15\
+ mark_precise: frame0: regs=r0,r9 stack= before 14\
+ mark_precise: frame0: regs=r9 stack= before 13\
+ mark_precise: frame0: regs=r9 stack= before 12\
+ mark_precise: frame0: regs=r9 stack= before 11\
+ mark_precise: frame0: regs=r9 stack= before 10\
+ mark_precise: frame0: parent state regs= stack=:",
},
{
"precise: test 2",
@@ -100,20 +99,20 @@
.flags = BPF_F_TEST_STATE_FREQ,
.errstr =
"26: (85) call bpf_probe_read_kernel#113\
- last_idx 26 first_idx 22\
- regs=4 stack=0 before 25\
- regs=4 stack=0 before 24\
- regs=4 stack=0 before 23\
- regs=4 stack=0 before 22\
- parent didn't have regs=4 stack=0 marks\
- last_idx 20 first_idx 20\
- regs=4 stack=0 before 20\
- parent didn't have regs=4 stack=0 marks\
- last_idx 19 first_idx 17\
- regs=4 stack=0 before 19\
- regs=200 stack=0 before 18\
- regs=300 stack=0 before 17\
- parent already had regs=0 stack=0 marks",
+ mark_precise: frame0: last_idx 26 first_idx 22\
+ mark_precise: frame0: regs=r2 stack= before 25\
+ mark_precise: frame0: regs=r2 stack= before 24\
+ mark_precise: frame0: regs=r2 stack= before 23\
+ mark_precise: frame0: regs=r2 stack= before 22\
+ mark_precise: frame0: parent state regs=r2 stack=:\
+ mark_precise: frame0: last_idx 20 first_idx 20\
+ mark_precise: frame0: regs=r2,r9 stack= before 20\
+ mark_precise: frame0: parent state regs=r2,r9 stack=:\
+ mark_precise: frame0: last_idx 19 first_idx 17\
+ mark_precise: frame0: regs=r2,r9 stack= before 19\
+ mark_precise: frame0: regs=r9 stack= before 18\
+ mark_precise: frame0: regs=r8,r9 stack= before 17\
+ mark_precise: frame0: parent state regs= stack=:",
},
{
"precise: cross frame pruning",
@@ -153,15 +152,16 @@
},
.prog_type = BPF_PROG_TYPE_XDP,
.flags = BPF_F_TEST_STATE_FREQ,
- .errstr = "5: (2d) if r4 > r0 goto pc+0\
- last_idx 5 first_idx 5\
- parent didn't have regs=10 stack=0 marks\
- last_idx 4 first_idx 2\
- regs=10 stack=0 before 4\
- regs=10 stack=0 before 3\
- regs=0 stack=1 before 2\
- last_idx 5 first_idx 5\
- parent didn't have regs=1 stack=0 marks",
+ .errstr = "mark_precise: frame0: last_idx 5 first_idx 5\
+ mark_precise: frame0: parent state regs=r4 stack=:\
+ mark_precise: frame0: last_idx 4 first_idx 2\
+ mark_precise: frame0: regs=r4 stack= before 4\
+ mark_precise: frame0: regs=r4 stack= before 3\
+ mark_precise: frame0: regs= stack=-8 before 2\
+ mark_precise: frame0: falling back to forcing all scalars precise\
+ force_precise: frame0: forcing r0 to be precise\
+ mark_precise: frame0: last_idx 5 first_idx 5\
+ mark_precise: frame0: parent state regs= stack=:",
.result = VERBOSE_ACCEPT,
.retval = -1,
},
@@ -179,16 +179,19 @@
},
.prog_type = BPF_PROG_TYPE_XDP,
.flags = BPF_F_TEST_STATE_FREQ,
- .errstr = "last_idx 6 first_idx 6\
- parent didn't have regs=10 stack=0 marks\
- last_idx 5 first_idx 3\
- regs=10 stack=0 before 5\
- regs=10 stack=0 before 4\
- regs=0 stack=1 before 3\
- last_idx 6 first_idx 6\
- parent didn't have regs=1 stack=0 marks\
- last_idx 5 first_idx 3\
- regs=1 stack=0 before 5",
+ .errstr = "mark_precise: frame0: last_idx 6 first_idx 6\
+ mark_precise: frame0: parent state regs=r4 stack=:\
+ mark_precise: frame0: last_idx 5 first_idx 3\
+ mark_precise: frame0: regs=r4 stack= before 5\
+ mark_precise: frame0: regs=r4 stack= before 4\
+ mark_precise: frame0: regs= stack=-8 before 3\
+ mark_precise: frame0: falling back to forcing all scalars precise\
+ force_precise: frame0: forcing r0 to be precise\
+ force_precise: frame0: forcing r0 to be precise\
+ force_precise: frame0: forcing r0 to be precise\
+ force_precise: frame0: forcing r0 to be precise\
+ mark_precise: frame0: last_idx 6 first_idx 6\
+ mark_precise: frame0: parent state regs= stack=:",
.result = VERBOSE_ACCEPT,
.retval = -1,
},
@@ -217,3 +220,39 @@
.errstr = "invalid access to memory, mem_size=1 off=42 size=8",
.result = REJECT,
},
+{
+ "precise: program doesn't prematurely prune branches",
+ .insns = {
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_6, 0x400),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_7, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_8, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_9, 0x80000000),
+ BPF_ALU64_IMM(BPF_MOD, BPF_REG_6, 0x401),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 0),
+ BPF_JMP_REG(BPF_JLE, BPF_REG_6, BPF_REG_9, 2),
+ BPF_ALU64_IMM(BPF_MOD, BPF_REG_6, 1),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_9, 0),
+ BPF_JMP_REG(BPF_JLE, BPF_REG_6, BPF_REG_9, 1),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_6, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4),
+ BPF_LD_MAP_FD(BPF_REG_4, 0),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_1, BPF_REG_4),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_6, 10),
+ BPF_ALU64_IMM(BPF_MUL, BPF_REG_6, 8192),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_1, BPF_REG_0),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_6),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_3, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 13 },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .result = REJECT,
+ .errstr = "register with unbounded min value is not allowed",
+},
diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
index 1db7185181da..655095810d4a 100644
--- a/tools/testing/selftests/bpf/veristat.c
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -141,6 +141,7 @@ static struct env {
bool verbose;
bool debug;
bool quiet;
+ bool force_checkpoints;
enum resfmt out_fmt;
bool show_version;
bool comparison_mode;
@@ -209,6 +210,8 @@ static const struct argp_option opts[] = {
{ "log-level", 'l', "LEVEL", 0, "Verifier log level (default 0 for normal mode, 1 for verbose mode)" },
{ "log-fixed", OPT_LOG_FIXED, NULL, 0, "Disable verifier log rotation" },
{ "log-size", OPT_LOG_SIZE, "BYTES", 0, "Customize verifier log size (default to 16MB)" },
+ { "test-states", 't', NULL, 0,
+ "Force frequent BPF verifier state checkpointing (set BPF_F_TEST_STATE_FREQ program flag)" },
{ "quiet", 'q', NULL, 0, "Quiet mode" },
{ "emit", 'e', "SPEC", 0, "Specify stats to be emitted" },
{ "sort", 's', "SPEC", 0, "Specify sort order" },
@@ -284,6 +287,9 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
argp_usage(state);
}
break;
+ case 't':
+ env.force_checkpoints = true;
+ break;
case 'C':
env.comparison_mode = true;
break;
@@ -989,6 +995,9 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf
/* increase chances of successful BPF object loading */
fixup_obj(obj, prog, base_filename);
+ if (env.force_checkpoints)
+ bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_STATE_FREQ);
+
err = bpf_object__load(obj);
env.progs_processed++;
diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c
index 987cf0db5ebc..613321eb84c1 100644
--- a/tools/testing/selftests/bpf/xdp_hw_metadata.c
+++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c
@@ -27,6 +27,7 @@
#include <sys/mman.h>
#include <net/if.h>
#include <poll.h>
+#include <time.h>
#include "xdp_metadata.h"
@@ -134,18 +135,52 @@ static void refill_rx(struct xsk *xsk, __u64 addr)
}
}
-static void verify_xdp_metadata(void *data)
+#define NANOSEC_PER_SEC 1000000000 /* 10^9 */
+static __u64 gettime(clockid_t clock_id)
+{
+ struct timespec t;
+ int res;
+
+ /* See man clock_gettime(2) for type of clock_id's */
+ res = clock_gettime(clock_id, &t);
+
+ if (res < 0)
+ error(res, errno, "Error with clock_gettime()");
+
+ return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec;
+}
+
+static void verify_xdp_metadata(void *data, clockid_t clock_id)
{
struct xdp_meta *meta;
meta = data - sizeof(*meta);
- printf("rx_timestamp: %llu\n", meta->rx_timestamp);
if (meta->rx_hash_err < 0)
printf("No rx_hash err=%d\n", meta->rx_hash_err);
else
printf("rx_hash: 0x%X with RSS type:0x%X\n",
meta->rx_hash, meta->rx_hash_type);
+
+ printf("rx_timestamp: %llu (sec:%0.4f)\n", meta->rx_timestamp,
+ (double)meta->rx_timestamp / NANOSEC_PER_SEC);
+ if (meta->rx_timestamp) {
+ __u64 usr_clock = gettime(clock_id);
+ __u64 xdp_clock = meta->xdp_timestamp;
+ __s64 delta_X = xdp_clock - meta->rx_timestamp;
+ __s64 delta_X2U = usr_clock - xdp_clock;
+
+ printf("XDP RX-time: %llu (sec:%0.4f) delta sec:%0.4f (%0.3f usec)\n",
+ xdp_clock, (double)xdp_clock / NANOSEC_PER_SEC,
+ (double)delta_X / NANOSEC_PER_SEC,
+ (double)delta_X / 1000);
+
+ printf("AF_XDP time: %llu (sec:%0.4f) delta sec:%0.4f (%0.3f usec)\n",
+ usr_clock, (double)usr_clock / NANOSEC_PER_SEC,
+ (double)delta_X2U / NANOSEC_PER_SEC,
+ (double)delta_X2U / 1000);
+ }
+
}
static void verify_skb_metadata(int fd)
@@ -193,7 +228,7 @@ static void verify_skb_metadata(int fd)
printf("skb hwtstamp is not found!\n");
}
-static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd)
+static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t clock_id)
{
const struct xdp_desc *rx_desc;
struct pollfd fds[rxq + 1];
@@ -243,7 +278,8 @@ static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd)
addr = xsk_umem__add_offset_to_addr(rx_desc->addr);
printf("%p: rx_desc[%u]->addr=%llx addr=%llx comp_addr=%llx\n",
xsk, idx, rx_desc->addr, addr, comp_addr);
- verify_xdp_metadata(xsk_umem__get_data(xsk->umem_area, addr));
+ verify_xdp_metadata(xsk_umem__get_data(xsk->umem_area, addr),
+ clock_id);
xsk_ring_cons__release(&xsk->rx, 1);
refill_rx(xsk, comp_addr);
}
@@ -370,6 +406,7 @@ static void timestamping_enable(int fd, int val)
int main(int argc, char *argv[])
{
+ clockid_t clock_id = CLOCK_TAI;
int server_fd = -1;
int ret;
int i;
@@ -443,7 +480,7 @@ int main(int argc, char *argv[])
error(1, -ret, "bpf_xdp_attach");
signal(SIGINT, handle_signal);
- ret = verify_metadata(rx_xsk, rxq, server_fd);
+ ret = verify_metadata(rx_xsk, rxq, server_fd, clock_id);
close(server_fd);
cleanup();
if (ret)
diff --git a/tools/testing/selftests/bpf/xdp_metadata.h b/tools/testing/selftests/bpf/xdp_metadata.h
index 0c4624dc6f2f..938a729bd307 100644
--- a/tools/testing/selftests/bpf/xdp_metadata.h
+++ b/tools/testing/selftests/bpf/xdp_metadata.h
@@ -11,6 +11,7 @@
struct xdp_meta {
__u64 rx_timestamp;
+ __u64 xdp_timestamp;
__u32 rx_hash;
union {
__u32 rx_hash_type;
diff --git a/tools/testing/selftests/bpf/xsk.h b/tools/testing/selftests/bpf/xsk.h
index 04ed8b544712..8da8d557768b 100644
--- a/tools/testing/selftests/bpf/xsk.h
+++ b/tools/testing/selftests/bpf/xsk.h
@@ -134,6 +134,11 @@ static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, __u32 nb)
__atomic_store_n(prod->producer, *prod->producer + nb, __ATOMIC_RELEASE);
}
+static inline void xsk_ring_prod__cancel(struct xsk_ring_prod *prod, __u32 nb)
+{
+ prod->cached_prod -= nb;
+}
+
static inline __u32 xsk_ring_cons__peek(struct xsk_ring_cons *cons, __u32 nb, __u32 *idx)
{
__u32 entries = xsk_cons_nb_avail(cons, nb);
diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c
index f144d0604ddf..218d7f694e5c 100644
--- a/tools/testing/selftests/bpf/xskxceiver.c
+++ b/tools/testing/selftests/bpf/xskxceiver.c
@@ -76,16 +76,13 @@
#include <asm/barrier.h>
#include <linux/if_link.h>
#include <linux/if_ether.h>
-#include <linux/ip.h>
#include <linux/mman.h>
-#include <linux/udp.h>
#include <arpa/inet.h>
#include <net/if.h>
#include <locale.h>
#include <poll.h>
#include <pthread.h>
#include <signal.h>
-#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -94,10 +91,8 @@
#include <sys/socket.h>
#include <sys/time.h>
#include <sys/types.h>
-#include <sys/queue.h>
#include <time.h>
#include <unistd.h>
-#include <stdatomic.h>
#include "xsk_xdp_progs.skel.h"
#include "xsk.h"
@@ -109,10 +104,6 @@
static const char *MAC1 = "\x00\x0A\x56\x9E\xEE\x62";
static const char *MAC2 = "\x00\x0A\x56\x9E\xEE\x61";
-static const char *IP1 = "192.168.100.162";
-static const char *IP2 = "192.168.100.161";
-static const u16 UDP_PORT1 = 2020;
-static const u16 UDP_PORT2 = 2121;
static void __exit_with_error(int error, const char *file, const char *func, int line)
{
@@ -147,112 +138,25 @@ static void report_failure(struct test_spec *test)
test->fail = true;
}
-static void memset32_htonl(void *dest, u32 val, u32 size)
-{
- u32 *ptr = (u32 *)dest;
- int i;
-
- val = htonl(val);
-
- for (i = 0; i < (size & (~0x3)); i += 4)
- ptr[i >> 2] = val;
-}
-
-/*
- * Fold a partial checksum
- * This function code has been taken from
- * Linux kernel include/asm-generic/checksum.h
- */
-static __u16 csum_fold(__u32 csum)
-{
- u32 sum = (__force u32)csum;
-
- sum = (sum & 0xffff) + (sum >> 16);
- sum = (sum & 0xffff) + (sum >> 16);
- return (__force __u16)~sum;
-}
-
-/*
- * This function code has been taken from
- * Linux kernel lib/checksum.c
- */
-static u32 from64to32(u64 x)
-{
- /* add up 32-bit and 32-bit for 32+c bit */
- x = (x & 0xffffffff) + (x >> 32);
- /* add up carry.. */
- x = (x & 0xffffffff) + (x >> 32);
- return (u32)x;
-}
-
-/*
- * This function code has been taken from
- * Linux kernel lib/checksum.c
- */
-static __u32 csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum)
-{
- unsigned long long s = (__force u32)sum;
-
- s += (__force u32)saddr;
- s += (__force u32)daddr;
-#ifdef __BIG_ENDIAN__
- s += proto + len;
-#else
- s += (proto + len) << 8;
-#endif
- return (__force __u32)from64to32(s);
-}
-
-/*
- * This function has been taken from
- * Linux kernel include/asm-generic/checksum.h
+/* The payload is a word consisting of a packet sequence number in the upper
+ * 16-bits and a intra packet data sequence number in the lower 16 bits. So the 3rd packet's
+ * 5th word of data will contain the number (2<<16) | 4 as they are numbered from 0.
*/
-static __u16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum)
-{
- return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
-}
-
-static u16 udp_csum(u32 saddr, u32 daddr, u32 len, u8 proto, u16 *udp_pkt)
+static void write_payload(void *dest, u32 pkt_nb, u32 start, u32 size)
{
- u32 csum = 0;
- u32 cnt = 0;
-
- /* udp hdr and data */
- for (; cnt < len; cnt += 2)
- csum += udp_pkt[cnt >> 1];
+ u32 *ptr = (u32 *)dest, i;
- return csum_tcpudp_magic(saddr, daddr, len, proto, csum);
+ start /= sizeof(*ptr);
+ size /= sizeof(*ptr);
+ for (i = 0; i < size; i++)
+ ptr[i] = htonl(pkt_nb << 16 | (i + start));
}
static void gen_eth_hdr(struct ifobject *ifobject, struct ethhdr *eth_hdr)
{
memcpy(eth_hdr->h_dest, ifobject->dst_mac, ETH_ALEN);
memcpy(eth_hdr->h_source, ifobject->src_mac, ETH_ALEN);
- eth_hdr->h_proto = htons(ETH_P_IP);
-}
-
-static void gen_ip_hdr(struct ifobject *ifobject, struct iphdr *ip_hdr)
-{
- ip_hdr->version = IP_PKT_VER;
- ip_hdr->ihl = 0x5;
- ip_hdr->tos = IP_PKT_TOS;
- ip_hdr->tot_len = htons(IP_PKT_SIZE);
- ip_hdr->id = 0;
- ip_hdr->frag_off = 0;
- ip_hdr->ttl = IPDEFTTL;
- ip_hdr->protocol = IPPROTO_UDP;
- ip_hdr->saddr = ifobject->src_ip;
- ip_hdr->daddr = ifobject->dst_ip;
- ip_hdr->check = 0;
-}
-
-static void gen_udp_hdr(u32 payload, void *pkt, struct ifobject *ifobject,
- struct udphdr *udp_hdr)
-{
- udp_hdr->source = htons(ifobject->src_port);
- udp_hdr->dest = htons(ifobject->dst_port);
- udp_hdr->len = htons(UDP_PKT_SIZE);
- memset32_htonl(pkt + PKT_HDR_SIZE, payload, UDP_PKT_DATA_SIZE);
+ eth_hdr->h_proto = htons(ETH_P_LOOPBACK);
}
static bool is_umem_valid(struct ifobject *ifobj)
@@ -260,19 +164,18 @@ static bool is_umem_valid(struct ifobject *ifobj)
return !!ifobj->umem->umem;
}
-static void gen_udp_csum(struct udphdr *udp_hdr, struct iphdr *ip_hdr)
+static u32 mode_to_xdp_flags(enum test_mode mode)
{
- udp_hdr->check = 0;
- udp_hdr->check =
- udp_csum(ip_hdr->saddr, ip_hdr->daddr, UDP_PKT_SIZE, IPPROTO_UDP, (u16 *)udp_hdr);
+ return (mode == TEST_MODE_SKB) ? XDP_FLAGS_SKB_MODE : XDP_FLAGS_DRV_MODE;
}
-static u32 mode_to_xdp_flags(enum test_mode mode)
+static u64 umem_size(struct xsk_umem_info *umem)
{
- return (mode == TEST_MODE_SKB) ? XDP_FLAGS_SKB_MODE : XDP_FLAGS_DRV_MODE;
+ return umem->num_frames * umem->frame_size;
}
-static int xsk_configure_umem(struct xsk_umem_info *umem, void *buffer, u64 size)
+static int xsk_configure_umem(struct ifobject *ifobj, struct xsk_umem_info *umem, void *buffer,
+ u64 size)
{
struct xsk_umem_config cfg = {
.fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
@@ -292,9 +195,31 @@ static int xsk_configure_umem(struct xsk_umem_info *umem, void *buffer, u64 size
return ret;
umem->buffer = buffer;
+ if (ifobj->shared_umem && ifobj->rx_on) {
+ umem->base_addr = umem_size(umem);
+ umem->next_buffer = umem_size(umem);
+ }
+
return 0;
}
+static u64 umem_alloc_buffer(struct xsk_umem_info *umem)
+{
+ u64 addr;
+
+ addr = umem->next_buffer;
+ umem->next_buffer += umem->frame_size;
+ if (umem->next_buffer >= umem->base_addr + umem_size(umem))
+ umem->next_buffer = umem->base_addr;
+
+ return addr;
+}
+
+static void umem_reset_alloc(struct xsk_umem_info *umem)
+{
+ umem->next_buffer = 0;
+}
+
static void enable_busy_poll(struct xsk_socket_info *xsk)
{
int sock_opt;
@@ -354,7 +279,7 @@ static bool ifobj_zc_avail(struct ifobject *ifobject)
exit_with_error(ENOMEM);
}
umem->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
- ret = xsk_configure_umem(umem, bufs, umem_sz);
+ ret = xsk_configure_umem(ifobject, umem, bufs, umem_sz);
if (ret)
exit_with_error(-ret);
@@ -380,7 +305,6 @@ out:
static struct option long_options[] = {
{"interface", required_argument, 0, 'i'},
{"busy-poll", no_argument, 0, 'b'},
- {"dump-pkts", no_argument, 0, 'D'},
{"verbose", no_argument, 0, 'v'},
{0, 0, 0, 0}
};
@@ -391,7 +315,6 @@ static void usage(const char *prog)
" Usage: %s [OPTIONS]\n"
" Options:\n"
" -i, --interface Use interface\n"
- " -D, --dump-pkts Dump packets L2 - L5\n"
" -v, --verbose Verbose output\n"
" -b, --busy-poll Enable busy poll\n";
@@ -415,7 +338,7 @@ static void parse_command_line(struct ifobject *ifobj_tx, struct ifobject *ifobj
opterr = 0;
for (;;) {
- c = getopt_long(argc, argv, "i:Dvb", long_options, &option_index);
+ c = getopt_long(argc, argv, "i:vb", long_options, &option_index);
if (c == -1)
break;
@@ -437,9 +360,6 @@ static void parse_command_line(struct ifobject *ifobj_tx, struct ifobject *ifobj
interface_nb++;
break;
- case 'D':
- opt_pkt_dump = true;
- break;
case 'v':
opt_verbose = true;
break;
@@ -482,9 +402,6 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
memset(ifobj->umem, 0, sizeof(*ifobj->umem));
ifobj->umem->num_frames = DEFAULT_UMEM_BUFFERS;
ifobj->umem->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
- if (ifobj->shared_umem && ifobj->rx_on)
- ifobj->umem->base_addr = DEFAULT_UMEM_BUFFERS *
- XSK_UMEM__DEFAULT_FRAME_SIZE;
for (j = 0; j < MAX_SOCKETS; j++) {
memset(&ifobj->xsk_arr[j], 0, sizeof(ifobj->xsk_arr[j]));
@@ -554,24 +471,24 @@ static void test_spec_set_xdp_prog(struct test_spec *test, struct bpf_program *x
static void pkt_stream_reset(struct pkt_stream *pkt_stream)
{
if (pkt_stream)
- pkt_stream->rx_pkt_nb = 0;
+ pkt_stream->current_pkt_nb = 0;
}
-static struct pkt *pkt_stream_get_pkt(struct pkt_stream *pkt_stream, u32 pkt_nb)
+static struct pkt *pkt_stream_get_next_tx_pkt(struct pkt_stream *pkt_stream)
{
- if (pkt_nb >= pkt_stream->nb_pkts)
+ if (pkt_stream->current_pkt_nb >= pkt_stream->nb_pkts)
return NULL;
- return &pkt_stream->pkts[pkt_nb];
+ return &pkt_stream->pkts[pkt_stream->current_pkt_nb++];
}
static struct pkt *pkt_stream_get_next_rx_pkt(struct pkt_stream *pkt_stream, u32 *pkts_sent)
{
- while (pkt_stream->rx_pkt_nb < pkt_stream->nb_pkts) {
+ while (pkt_stream->current_pkt_nb < pkt_stream->nb_pkts) {
(*pkts_sent)++;
- if (pkt_stream->pkts[pkt_stream->rx_pkt_nb].valid)
- return &pkt_stream->pkts[pkt_stream->rx_pkt_nb++];
- pkt_stream->rx_pkt_nb++;
+ if (pkt_stream->pkts[pkt_stream->current_pkt_nb].valid)
+ return &pkt_stream->pkts[pkt_stream->current_pkt_nb++];
+ pkt_stream->current_pkt_nb++;
}
return NULL;
}
@@ -616,9 +533,21 @@ static struct pkt_stream *__pkt_stream_alloc(u32 nb_pkts)
return pkt_stream;
}
-static void pkt_set(struct xsk_umem_info *umem, struct pkt *pkt, u64 addr, u32 len)
+static u32 ceil_u32(u32 a, u32 b)
+{
+ return (a + b - 1) / b;
+}
+
+static u32 pkt_nb_frags(u32 frame_size, struct pkt *pkt)
+{
+ if (!pkt || !pkt->valid)
+ return 1;
+ return ceil_u32(pkt->len, frame_size);
+}
+
+static void pkt_set(struct xsk_umem_info *umem, struct pkt *pkt, int offset, u32 len)
{
- pkt->addr = addr + umem->base_addr;
+ pkt->offset = offset;
pkt->len = len;
if (len > umem->frame_size - XDP_PACKET_HEADROOM - MIN_PKT_SIZE * 2 - umem->frame_headroom)
pkt->valid = false;
@@ -626,6 +555,11 @@ static void pkt_set(struct xsk_umem_info *umem, struct pkt *pkt, u64 addr, u32 l
pkt->valid = true;
}
+static u32 pkt_get_buffer_len(struct xsk_umem_info *umem, u32 len)
+{
+ return ceil_u32(len, umem->frame_size) * umem->frame_size;
+}
+
static struct pkt_stream *pkt_stream_generate(struct xsk_umem_info *umem, u32 nb_pkts, u32 pkt_len)
{
struct pkt_stream *pkt_stream;
@@ -635,10 +569,13 @@ static struct pkt_stream *pkt_stream_generate(struct xsk_umem_info *umem, u32 nb
if (!pkt_stream)
exit_with_error(ENOMEM);
+ pkt_stream->nb_pkts = nb_pkts;
+ pkt_stream->max_pkt_len = pkt_len;
for (i = 0; i < nb_pkts; i++) {
- pkt_set(umem, &pkt_stream->pkts[i], (i % umem->num_frames) * umem->frame_size,
- pkt_len);
- pkt_stream->pkts[i].payload = i;
+ struct pkt *pkt = &pkt_stream->pkts[i];
+
+ pkt_set(umem, pkt, 0, pkt_len);
+ pkt->pkt_nb = i;
}
return pkt_stream;
@@ -669,8 +606,7 @@ static void __pkt_stream_replace_half(struct ifobject *ifobj, u32 pkt_len,
pkt_stream = pkt_stream_clone(umem, ifobj->pkt_stream);
for (i = 1; i < ifobj->pkt_stream->nb_pkts; i += 2)
- pkt_set(umem, &pkt_stream->pkts[i],
- (i % umem->num_frames) * umem->frame_size + offset, pkt_len);
+ pkt_set(umem, &pkt_stream->pkts[i], offset, pkt_len);
ifobj->pkt_stream = pkt_stream;
}
@@ -694,30 +630,31 @@ static void pkt_stream_receive_half(struct test_spec *test)
pkt_stream->pkts[i].valid = false;
}
-static struct pkt *pkt_generate(struct ifobject *ifobject, u32 pkt_nb)
+static u64 pkt_get_addr(struct pkt *pkt, struct xsk_umem_info *umem)
{
- struct pkt *pkt = pkt_stream_get_pkt(ifobject->pkt_stream, pkt_nb);
- struct udphdr *udp_hdr;
- struct ethhdr *eth_hdr;
- struct iphdr *ip_hdr;
- void *data;
+ if (!pkt->valid)
+ return pkt->offset;
+ return pkt->offset + umem_alloc_buffer(umem);
+}
- if (!pkt)
- return NULL;
- if (!pkt->valid || pkt->len < MIN_PKT_SIZE)
- return pkt;
+static void pkt_generate(struct ifobject *ifobject, u64 addr, u32 len, u32 pkt_nb,
+ u32 bytes_written)
+{
+ void *data = xsk_umem__get_data(ifobject->umem->buffer, addr);
- data = xsk_umem__get_data(ifobject->umem->buffer, pkt->addr);
- udp_hdr = (struct udphdr *)(data + sizeof(struct ethhdr) + sizeof(struct iphdr));
- ip_hdr = (struct iphdr *)(data + sizeof(struct ethhdr));
- eth_hdr = (struct ethhdr *)data;
+ if (len < MIN_PKT_SIZE)
+ return;
- gen_udp_hdr(pkt_nb, data, ifobject, udp_hdr);
- gen_ip_hdr(ifobject, ip_hdr);
- gen_udp_csum(udp_hdr, ip_hdr);
- gen_eth_hdr(ifobject, eth_hdr);
+ if (!bytes_written) {
+ gen_eth_hdr(ifobject, data);
- return pkt;
+ len -= PKT_HDR_SIZE;
+ data += PKT_HDR_SIZE;
+ } else {
+ bytes_written -= PKT_HDR_SIZE;
+ }
+
+ write_payload(data, pkt_nb, bytes_written, len);
}
static void __pkt_stream_generate_custom(struct ifobject *ifobj,
@@ -731,10 +668,14 @@ static void __pkt_stream_generate_custom(struct ifobject *ifobj,
exit_with_error(ENOMEM);
for (i = 0; i < nb_pkts; i++) {
- pkt_stream->pkts[i].addr = pkts[i].addr + ifobj->umem->base_addr;
- pkt_stream->pkts[i].len = pkts[i].len;
- pkt_stream->pkts[i].payload = i;
- pkt_stream->pkts[i].valid = pkts[i].valid;
+ struct pkt *pkt = &pkt_stream->pkts[i];
+
+ pkt->offset = pkts[i].offset;
+ pkt->len = pkts[i].len;
+ pkt->pkt_nb = i;
+ pkt->valid = pkts[i].valid;
+ if (pkt->len > pkt_stream->max_pkt_len)
+ pkt_stream->max_pkt_len = pkt->len;
}
ifobj->pkt_stream = pkt_stream;
@@ -746,53 +687,62 @@ static void pkt_stream_generate_custom(struct test_spec *test, struct pkt *pkts,
__pkt_stream_generate_custom(test->ifobj_rx, pkts, nb_pkts);
}
-static void pkt_dump(void *pkt, u32 len)
-{
- char s[INET_ADDRSTRLEN];
- struct ethhdr *ethhdr;
- struct udphdr *udphdr;
- struct iphdr *iphdr;
- u32 payload, i;
-
- ethhdr = pkt;
- iphdr = pkt + sizeof(*ethhdr);
- udphdr = pkt + sizeof(*ethhdr) + sizeof(*iphdr);
-
- /*extract L2 frame */
- fprintf(stdout, "DEBUG>> L2: dst mac: ");
- for (i = 0; i < ETH_ALEN; i++)
- fprintf(stdout, "%02X", ethhdr->h_dest[i]);
-
- fprintf(stdout, "\nDEBUG>> L2: src mac: ");
- for (i = 0; i < ETH_ALEN; i++)
- fprintf(stdout, "%02X", ethhdr->h_source[i]);
-
- /*extract L3 frame */
- fprintf(stdout, "\nDEBUG>> L3: ip_hdr->ihl: %02X\n", iphdr->ihl);
- fprintf(stdout, "DEBUG>> L3: ip_hdr->saddr: %s\n",
- inet_ntop(AF_INET, &iphdr->saddr, s, sizeof(s)));
- fprintf(stdout, "DEBUG>> L3: ip_hdr->daddr: %s\n",
- inet_ntop(AF_INET, &iphdr->daddr, s, sizeof(s)));
- /*extract L4 frame */
- fprintf(stdout, "DEBUG>> L4: udp_hdr->src: %d\n", ntohs(udphdr->source));
- fprintf(stdout, "DEBUG>> L4: udp_hdr->dst: %d\n", ntohs(udphdr->dest));
- /*extract L5 frame */
- payload = ntohl(*((u32 *)(pkt + PKT_HDR_SIZE)));
+static void pkt_print_data(u32 *data, u32 cnt)
+{
+ u32 i;
+
+ for (i = 0; i < cnt; i++) {
+ u32 seqnum, pkt_nb;
- fprintf(stdout, "DEBUG>> L5: payload: %d\n", payload);
- fprintf(stdout, "---------------------------------------\n");
+ seqnum = ntohl(*data) & 0xffff;
+ pkt_nb = ntohl(*data) >> 16;
+ fprintf(stdout, "%u:%u ", pkt_nb, seqnum);
+ data++;
+ }
}
-static bool is_offset_correct(struct xsk_umem_info *umem, struct pkt_stream *pkt_stream, u64 addr,
- u64 pkt_stream_addr)
+static void pkt_dump(void *pkt, u32 len, bool eth_header)
+{
+ struct ethhdr *ethhdr = pkt;
+ u32 i, *data;
+
+ if (eth_header) {
+ /*extract L2 frame */
+ fprintf(stdout, "DEBUG>> L2: dst mac: ");
+ for (i = 0; i < ETH_ALEN; i++)
+ fprintf(stdout, "%02X", ethhdr->h_dest[i]);
+
+ fprintf(stdout, "\nDEBUG>> L2: src mac: ");
+ for (i = 0; i < ETH_ALEN; i++)
+ fprintf(stdout, "%02X", ethhdr->h_source[i]);
+
+ data = pkt + PKT_HDR_SIZE;
+ } else {
+ data = pkt;
+ }
+
+ /*extract L5 frame */
+ fprintf(stdout, "\nDEBUG>> L5: seqnum: ");
+ pkt_print_data(data, PKT_DUMP_NB_TO_PRINT);
+ fprintf(stdout, "....");
+ if (len > PKT_DUMP_NB_TO_PRINT * sizeof(u32)) {
+ fprintf(stdout, "\n.... ");
+ pkt_print_data(data + len / sizeof(u32) - PKT_DUMP_NB_TO_PRINT,
+ PKT_DUMP_NB_TO_PRINT);
+ }
+ fprintf(stdout, "\n---------------------------------------\n");
+}
+
+static bool is_offset_correct(struct xsk_umem_info *umem, struct pkt *pkt, u64 addr)
{
u32 headroom = umem->unaligned_mode ? 0 : umem->frame_headroom;
- u32 offset = addr % umem->frame_size, expected_offset = 0;
+ u32 offset = addr % umem->frame_size, expected_offset;
+ int pkt_offset = pkt->valid ? pkt->offset : 0;
- if (!pkt_stream->use_addr_for_fill)
- pkt_stream_addr = 0;
+ if (!umem->unaligned_mode)
+ pkt_offset = 0;
- expected_offset += (pkt_stream_addr + headroom + XDP_PACKET_HEADROOM) % umem->frame_size;
+ expected_offset = (pkt_offset + headroom + XDP_PACKET_HEADROOM) % umem->frame_size;
if (offset == expected_offset)
return true;
@@ -806,9 +756,9 @@ static bool is_metadata_correct(struct pkt *pkt, void *buffer, u64 addr)
void *data = xsk_umem__get_data(buffer, addr);
struct xdp_info *meta = data - sizeof(struct xdp_info);
- if (meta->count != pkt->payload) {
+ if (meta->count != pkt->pkt_nb) {
ksft_print_msg("[%s] expected meta_count [%d], got meta_count [%d]\n",
- __func__, pkt->payload, meta->count);
+ __func__, pkt->pkt_nb, meta->count);
return false;
}
@@ -818,11 +768,11 @@ static bool is_metadata_correct(struct pkt *pkt, void *buffer, u64 addr)
static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len)
{
void *data = xsk_umem__get_data(buffer, addr);
- struct iphdr *iphdr = (struct iphdr *)(data + sizeof(struct ethhdr));
+ u32 seqnum, pkt_data;
if (!pkt) {
ksft_print_msg("[%s] too many packets received\n", __func__);
- return false;
+ goto error;
}
if (len < MIN_PKT_SIZE || pkt->len < MIN_PKT_SIZE) {
@@ -833,28 +783,23 @@ static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len)
if (pkt->len != len) {
ksft_print_msg("[%s] expected length [%d], got length [%d]\n",
__func__, pkt->len, len);
- return false;
+ goto error;
}
- if (iphdr->version == IP_PKT_VER && iphdr->tos == IP_PKT_TOS) {
- u32 seqnum = ntohl(*((u32 *)(data + PKT_HDR_SIZE)));
-
- if (opt_pkt_dump)
- pkt_dump(data, PKT_SIZE);
+ pkt_data = ntohl(*((u32 *)(data + PKT_HDR_SIZE)));
+ seqnum = pkt_data >> 16;
- if (pkt->payload != seqnum) {
- ksft_print_msg("[%s] expected seqnum [%d], got seqnum [%d]\n",
- __func__, pkt->payload, seqnum);
- return false;
- }
- } else {
- ksft_print_msg("Invalid frame received: ");
- ksft_print_msg("[IP_PKT_VER: %02X], [IP_PKT_TOS: %02X]\n", iphdr->version,
- iphdr->tos);
- return false;
+ if (pkt->pkt_nb != seqnum) {
+ ksft_print_msg("[%s] expected seqnum [%d], got seqnum [%d]\n",
+ __func__, pkt->pkt_nb, seqnum);
+ goto error;
}
return true;
+
+error:
+ pkt_dump(data, len, true);
+ return false;
}
static void kick_tx(struct xsk_socket_info *xsk)
@@ -976,7 +921,7 @@ static int receive_pkts(struct test_spec *test, struct pollfd *fds)
addr = xsk_umem__add_offset_to_addr(addr);
if (!is_pkt_valid(pkt, umem->buffer, addr, desc->len) ||
- !is_offset_correct(umem, pkt_stream, addr, pkt->addr) ||
+ !is_offset_correct(umem, pkt, addr) ||
(ifobj->use_metadata && !is_metadata_correct(pkt, umem->buffer, addr)))
return TEST_FAILURE;
@@ -992,8 +937,6 @@ static int receive_pkts(struct test_spec *test, struct pollfd *fds)
pthread_mutex_lock(&pacing_mutex);
pkts_in_flight -= pkts_sent;
- if (pkts_in_flight < umem->num_frames)
- pthread_cond_signal(&pacing_cond);
pthread_mutex_unlock(&pacing_mutex);
pkts_sent = 0;
}
@@ -1001,14 +944,21 @@ static int receive_pkts(struct test_spec *test, struct pollfd *fds)
return TEST_PASS;
}
-static int __send_pkts(struct ifobject *ifobject, u32 *pkt_nb, struct pollfd *fds,
- bool timeout)
+static int __send_pkts(struct ifobject *ifobject, struct pollfd *fds, bool timeout)
{
struct xsk_socket_info *xsk = ifobject->xsk;
+ struct xsk_umem_info *umem = ifobject->umem;
+ u32 i, idx = 0, valid_pkts = 0, buffer_len;
bool use_poll = ifobject->use_poll;
- u32 i, idx = 0, valid_pkts = 0;
int ret;
+ buffer_len = pkt_get_buffer_len(umem, ifobject->pkt_stream->max_pkt_len);
+ /* pkts_in_flight might be negative if many invalid packets are sent */
+ if (pkts_in_flight >= (int)((umem_size(umem) - BATCH_SIZE * buffer_len) / buffer_len)) {
+ kick_tx(xsk);
+ return TEST_CONTINUE;
+ }
+
while (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) < BATCH_SIZE) {
if (use_poll) {
ret = poll(fds, 1, POLL_TMOUT);
@@ -1034,25 +984,21 @@ static int __send_pkts(struct ifobject *ifobject, u32 *pkt_nb, struct pollfd *fd
for (i = 0; i < BATCH_SIZE; i++) {
struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i);
- struct pkt *pkt = pkt_generate(ifobject, *pkt_nb);
+ struct pkt *pkt = pkt_stream_get_next_tx_pkt(ifobject->pkt_stream);
if (!pkt)
break;
- tx_desc->addr = pkt->addr;
+ tx_desc->addr = pkt_get_addr(pkt, umem);
tx_desc->len = pkt->len;
- (*pkt_nb)++;
- if (pkt->valid)
+ if (pkt->valid) {
valid_pkts++;
+ pkt_generate(ifobject, tx_desc->addr, tx_desc->len, pkt->pkt_nb, 0);
+ }
}
pthread_mutex_lock(&pacing_mutex);
pkts_in_flight += valid_pkts;
- /* pkts_in_flight might be negative if many invalid packets are sent */
- if (pkts_in_flight >= (int)(ifobject->umem->num_frames - BATCH_SIZE)) {
- kick_tx(xsk);
- pthread_cond_wait(&pacing_cond, &pacing_mutex);
- }
pthread_mutex_unlock(&pacing_mutex);
xsk_ring_prod__submit(&xsk->tx, i);
@@ -1088,18 +1034,21 @@ static void wait_for_tx_completion(struct xsk_socket_info *xsk)
static int send_pkts(struct test_spec *test, struct ifobject *ifobject)
{
+ struct pkt_stream *pkt_stream = ifobject->pkt_stream;
bool timeout = !is_umem_valid(test->ifobj_rx);
struct pollfd fds = { };
- u32 pkt_cnt = 0, ret;
+ u32 ret;
fds.fd = xsk_socket__fd(ifobject->xsk->xsk);
fds.events = POLLOUT;
- while (pkt_cnt < ifobject->pkt_stream->nb_pkts) {
- ret = __send_pkts(ifobject, &pkt_cnt, &fds, timeout);
+ while (pkt_stream->current_pkt_nb < pkt_stream->nb_pkts) {
+ ret = __send_pkts(ifobject, &fds, timeout);
+ if (ret == TEST_CONTINUE && !test->fail)
+ continue;
if ((ret || test->fail) && !timeout)
return TEST_FAILURE;
- else if (ret == TEST_PASS && timeout)
+ if (ret == TEST_PASS && timeout)
return ret;
}
@@ -1249,11 +1198,14 @@ static void thread_common_ops_tx(struct test_spec *test, struct ifobject *ifobje
ifobject->xsk = &ifobject->xsk_arr[0];
ifobject->xskmap = test->ifobj_rx->xskmap;
memcpy(ifobject->umem, test->ifobj_rx->umem, sizeof(struct xsk_umem_info));
+ ifobject->umem->base_addr = 0;
}
-static void xsk_populate_fill_ring(struct xsk_umem_info *umem, struct pkt_stream *pkt_stream)
+static void xsk_populate_fill_ring(struct xsk_umem_info *umem, struct pkt_stream *pkt_stream,
+ bool fill_up)
{
- u32 idx = 0, i, buffers_to_fill;
+ u32 rx_frame_size = umem->frame_size - XDP_PACKET_HEADROOM;
+ u32 idx = 0, filled = 0, buffers_to_fill, nb_pkts;
int ret;
if (umem->num_frames < XSK_RING_PROD__DEFAULT_NUM_DESCS)
@@ -1264,22 +1216,33 @@ static void xsk_populate_fill_ring(struct xsk_umem_info *umem, struct pkt_stream
ret = xsk_ring_prod__reserve(&umem->fq, buffers_to_fill, &idx);
if (ret != buffers_to_fill)
exit_with_error(ENOSPC);
- for (i = 0; i < buffers_to_fill; i++) {
- u64 addr;
- if (pkt_stream->use_addr_for_fill) {
- struct pkt *pkt = pkt_stream_get_pkt(pkt_stream, i);
+ while (filled < buffers_to_fill) {
+ struct pkt *pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &nb_pkts);
+ u64 addr;
+ u32 i;
+
+ for (i = 0; i < pkt_nb_frags(rx_frame_size, pkt); i++) {
+ if (!pkt) {
+ if (!fill_up)
+ break;
+ addr = filled * umem->frame_size + umem->base_addr;
+ } else if (pkt->offset >= 0) {
+ addr = pkt->offset % umem->frame_size + umem_alloc_buffer(umem);
+ } else {
+ addr = pkt->offset + umem_alloc_buffer(umem);
+ }
- if (!pkt)
+ *xsk_ring_prod__fill_addr(&umem->fq, idx++) = addr;
+ if (++filled >= buffers_to_fill)
break;
- addr = pkt->addr;
- } else {
- addr = i * umem->frame_size;
}
-
- *xsk_ring_prod__fill_addr(&umem->fq, idx++) = addr;
}
- xsk_ring_prod__submit(&umem->fq, i);
+ xsk_ring_prod__submit(&umem->fq, filled);
+ xsk_ring_prod__cancel(&umem->fq, buffers_to_fill - filled);
+
+ pkt_stream_reset(pkt_stream);
+ umem_reset_alloc(umem);
}
static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject)
@@ -1300,12 +1263,10 @@ static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject)
if (bufs == MAP_FAILED)
exit_with_error(errno);
- ret = xsk_configure_umem(ifobject->umem, bufs, umem_sz);
+ ret = xsk_configure_umem(ifobject, ifobject->umem, bufs, umem_sz);
if (ret)
exit_with_error(-ret);
- xsk_populate_fill_ring(ifobject->umem, ifobject->pkt_stream);
-
xsk_configure_socket(test, ifobject, ifobject->umem, false);
ifobject->xsk = &ifobject->xsk_arr[0];
@@ -1313,6 +1274,8 @@ static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject)
if (!ifobject->rx_on)
return;
+ xsk_populate_fill_ring(ifobject->umem, ifobject->pkt_stream, ifobject->use_fill_ring);
+
ret = xsk_update_xskmap(ifobject->xskmap, ifobject->xsk->xsk);
if (ret)
exit_with_error(errno);
@@ -1370,12 +1333,8 @@ static void *worker_testapp_validate_rx(void *arg)
if (!err && ifobject->validation_func)
err = ifobject->validation_func(ifobject);
- if (err) {
+ if (err)
report_failure(test);
- pthread_mutex_lock(&pacing_mutex);
- pthread_cond_signal(&pacing_cond);
- pthread_mutex_unlock(&pacing_mutex);
- }
pthread_exit(NULL);
}
@@ -1402,11 +1361,20 @@ static void handler(int signum)
pthread_exit(NULL);
}
-static bool xdp_prog_changed(struct test_spec *test, struct ifobject *ifobj)
+static bool xdp_prog_changed_rx(struct test_spec *test)
{
+ struct ifobject *ifobj = test->ifobj_rx;
+
return ifobj->xdp_prog != test->xdp_prog_rx || ifobj->mode != test->mode;
}
+static bool xdp_prog_changed_tx(struct test_spec *test)
+{
+ struct ifobject *ifobj = test->ifobj_tx;
+
+ return ifobj->xdp_prog != test->xdp_prog_tx || ifobj->mode != test->mode;
+}
+
static void xsk_reattach_xdp(struct ifobject *ifobj, struct bpf_program *xdp_prog,
struct bpf_map *xskmap, enum test_mode mode)
{
@@ -1433,13 +1401,13 @@ static void xsk_reattach_xdp(struct ifobject *ifobj, struct bpf_program *xdp_pro
static void xsk_attach_xdp_progs(struct test_spec *test, struct ifobject *ifobj_rx,
struct ifobject *ifobj_tx)
{
- if (xdp_prog_changed(test, ifobj_rx))
+ if (xdp_prog_changed_rx(test))
xsk_reattach_xdp(ifobj_rx, test->xdp_prog_rx, test->xskmap_rx, test->mode);
if (!ifobj_tx || ifobj_tx->shared_umem)
return;
- if (xdp_prog_changed(test, ifobj_tx))
+ if (xdp_prog_changed_tx(test))
xsk_reattach_xdp(ifobj_tx, test->xdp_prog_tx, test->xskmap_tx, test->mode);
}
@@ -1448,9 +1416,11 @@ static int __testapp_validate_traffic(struct test_spec *test, struct ifobject *i
{
pthread_t t0, t1;
- if (ifobj2)
+ if (ifobj2) {
if (pthread_barrier_init(&barr, NULL, 2))
exit_with_error(errno);
+ pkt_stream_reset(ifobj2->pkt_stream);
+ }
test->current_step++;
pkt_stream_reset(ifobj1->pkt_stream);
@@ -1493,6 +1463,12 @@ static int testapp_validate_traffic(struct test_spec *test)
struct ifobject *ifobj_rx = test->ifobj_rx;
struct ifobject *ifobj_tx = test->ifobj_tx;
+ if ((ifobj_rx->umem->unaligned_mode && !ifobj_rx->unaligned_supp) ||
+ (ifobj_tx->umem->unaligned_mode && !ifobj_tx->unaligned_supp)) {
+ ksft_test_result_skip("No huge pages present.\n");
+ return TEST_SKIP;
+ }
+
xsk_attach_xdp_progs(test, ifobj_rx, ifobj_tx);
return __testapp_validate_traffic(test, ifobj_rx, ifobj_tx);
}
@@ -1502,16 +1478,18 @@ static int testapp_validate_traffic_single_thread(struct test_spec *test, struct
return __testapp_validate_traffic(test, ifobj, NULL);
}
-static void testapp_teardown(struct test_spec *test)
+static int testapp_teardown(struct test_spec *test)
{
int i;
test_spec_set_name(test, "TEARDOWN");
for (i = 0; i < MAX_TEARDOWN_ITER; i++) {
if (testapp_validate_traffic(test))
- return;
+ return TEST_FAILURE;
test_spec_reset(test);
}
+
+ return TEST_PASS;
}
static void swap_directions(struct ifobject **ifobj1, struct ifobject **ifobj2)
@@ -1526,20 +1504,23 @@ static void swap_directions(struct ifobject **ifobj1, struct ifobject **ifobj2)
*ifobj2 = tmp_ifobj;
}
-static void testapp_bidi(struct test_spec *test)
+static int testapp_bidi(struct test_spec *test)
{
+ int res;
+
test_spec_set_name(test, "BIDIRECTIONAL");
test->ifobj_tx->rx_on = true;
test->ifobj_rx->tx_on = true;
test->total_steps = 2;
if (testapp_validate_traffic(test))
- return;
+ return TEST_FAILURE;
print_verbose("Switching Tx/Rx vectors\n");
swap_directions(&test->ifobj_rx, &test->ifobj_tx);
- __testapp_validate_traffic(test, test->ifobj_rx, test->ifobj_tx);
+ res = __testapp_validate_traffic(test, test->ifobj_rx, test->ifobj_tx);
swap_directions(&test->ifobj_rx, &test->ifobj_tx);
+ return res;
}
static void swap_xsk_resources(struct ifobject *ifobj_tx, struct ifobject *ifobj_rx)
@@ -1556,160 +1537,139 @@ static void swap_xsk_resources(struct ifobject *ifobj_tx, struct ifobject *ifobj
exit_with_error(errno);
}
-static void testapp_bpf_res(struct test_spec *test)
+static int testapp_bpf_res(struct test_spec *test)
{
test_spec_set_name(test, "BPF_RES");
test->total_steps = 2;
test->nb_sockets = 2;
if (testapp_validate_traffic(test))
- return;
+ return TEST_FAILURE;
swap_xsk_resources(test->ifobj_tx, test->ifobj_rx);
- testapp_validate_traffic(test);
+ return testapp_validate_traffic(test);
}
-static void testapp_headroom(struct test_spec *test)
+static int testapp_headroom(struct test_spec *test)
{
test_spec_set_name(test, "UMEM_HEADROOM");
test->ifobj_rx->umem->frame_headroom = UMEM_HEADROOM_TEST_SIZE;
- testapp_validate_traffic(test);
+ return testapp_validate_traffic(test);
}
-static void testapp_stats_rx_dropped(struct test_spec *test)
+static int testapp_stats_rx_dropped(struct test_spec *test)
{
test_spec_set_name(test, "STAT_RX_DROPPED");
+ if (test->mode == TEST_MODE_ZC) {
+ ksft_test_result_skip("Can not run RX_DROPPED test for ZC mode\n");
+ return TEST_SKIP;
+ }
+
pkt_stream_replace_half(test, MIN_PKT_SIZE * 4, 0);
test->ifobj_rx->umem->frame_headroom = test->ifobj_rx->umem->frame_size -
XDP_PACKET_HEADROOM - MIN_PKT_SIZE * 3;
pkt_stream_receive_half(test);
test->ifobj_rx->validation_func = validate_rx_dropped;
- testapp_validate_traffic(test);
+ return testapp_validate_traffic(test);
}
-static void testapp_stats_tx_invalid_descs(struct test_spec *test)
+static int testapp_stats_tx_invalid_descs(struct test_spec *test)
{
test_spec_set_name(test, "STAT_TX_INVALID");
pkt_stream_replace_half(test, XSK_UMEM__INVALID_FRAME_SIZE, 0);
test->ifobj_tx->validation_func = validate_tx_invalid_descs;
- testapp_validate_traffic(test);
+ return testapp_validate_traffic(test);
}
-static void testapp_stats_rx_full(struct test_spec *test)
+static int testapp_stats_rx_full(struct test_spec *test)
{
test_spec_set_name(test, "STAT_RX_FULL");
- pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, PKT_SIZE);
+ pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE);
test->ifobj_rx->pkt_stream = pkt_stream_generate(test->ifobj_rx->umem,
- DEFAULT_UMEM_BUFFERS, PKT_SIZE);
- if (!test->ifobj_rx->pkt_stream)
- exit_with_error(ENOMEM);
+ DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE);
test->ifobj_rx->xsk->rxqsize = DEFAULT_UMEM_BUFFERS;
test->ifobj_rx->release_rx = false;
test->ifobj_rx->validation_func = validate_rx_full;
- testapp_validate_traffic(test);
+ return testapp_validate_traffic(test);
}
-static void testapp_stats_fill_empty(struct test_spec *test)
+static int testapp_stats_fill_empty(struct test_spec *test)
{
test_spec_set_name(test, "STAT_RX_FILL_EMPTY");
- pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, PKT_SIZE);
+ pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE);
test->ifobj_rx->pkt_stream = pkt_stream_generate(test->ifobj_rx->umem,
- DEFAULT_UMEM_BUFFERS, PKT_SIZE);
- if (!test->ifobj_rx->pkt_stream)
- exit_with_error(ENOMEM);
+ DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE);
test->ifobj_rx->use_fill_ring = false;
test->ifobj_rx->validation_func = validate_fill_empty;
- testapp_validate_traffic(test);
-}
-
-/* Simple test */
-static bool hugepages_present(struct ifobject *ifobject)
-{
- size_t mmap_sz = 2 * ifobject->umem->num_frames * ifobject->umem->frame_size;
- void *bufs;
-
- bufs = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_HUGE_2MB, -1, 0);
- if (bufs == MAP_FAILED)
- return false;
-
- mmap_sz = ceil_u64(mmap_sz, HUGEPAGE_SIZE) * HUGEPAGE_SIZE;
- munmap(bufs, mmap_sz);
- return true;
+ return testapp_validate_traffic(test);
}
-static bool testapp_unaligned(struct test_spec *test)
+static int testapp_unaligned(struct test_spec *test)
{
- if (!hugepages_present(test->ifobj_tx)) {
- ksft_test_result_skip("No 2M huge pages present.\n");
- return false;
- }
-
test_spec_set_name(test, "UNALIGNED_MODE");
test->ifobj_tx->umem->unaligned_mode = true;
test->ifobj_rx->umem->unaligned_mode = true;
- /* Let half of the packets straddle a buffer boundrary */
- pkt_stream_replace_half(test, PKT_SIZE, -PKT_SIZE / 2);
- test->ifobj_rx->pkt_stream->use_addr_for_fill = true;
- testapp_validate_traffic(test);
+ /* Let half of the packets straddle a 4K buffer boundary */
+ pkt_stream_replace_half(test, MIN_PKT_SIZE, -MIN_PKT_SIZE / 2);
- return true;
+ return testapp_validate_traffic(test);
}
-static void testapp_single_pkt(struct test_spec *test)
+static int testapp_single_pkt(struct test_spec *test)
{
- struct pkt pkts[] = {{0x1000, PKT_SIZE, 0, true}};
+ struct pkt pkts[] = {{0, MIN_PKT_SIZE, 0, true}};
pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts));
- testapp_validate_traffic(test);
+ return testapp_validate_traffic(test);
}
-static void testapp_invalid_desc(struct test_spec *test)
+static int testapp_invalid_desc(struct test_spec *test)
{
- u64 umem_size = test->ifobj_tx->umem->num_frames * test->ifobj_tx->umem->frame_size;
+ struct xsk_umem_info *umem = test->ifobj_tx->umem;
+ u64 umem_size = umem->num_frames * umem->frame_size;
struct pkt pkts[] = {
/* Zero packet address allowed */
- {0, PKT_SIZE, 0, true},
+ {0, MIN_PKT_SIZE, 0, true},
/* Allowed packet */
- {0x1000, PKT_SIZE, 0, true},
+ {0, MIN_PKT_SIZE, 0, true},
/* Straddling the start of umem */
- {-2, PKT_SIZE, 0, false},
+ {-2, MIN_PKT_SIZE, 0, false},
/* Packet too large */
- {0x2000, XSK_UMEM__INVALID_FRAME_SIZE, 0, false},
+ {0, XSK_UMEM__INVALID_FRAME_SIZE, 0, false},
/* Up to end of umem allowed */
- {umem_size - PKT_SIZE, PKT_SIZE, 0, true},
+ {umem_size - MIN_PKT_SIZE - 2 * umem->frame_size, MIN_PKT_SIZE, 0, true},
/* After umem ends */
- {umem_size, PKT_SIZE, 0, false},
+ {umem_size, MIN_PKT_SIZE, 0, false},
/* Straddle the end of umem */
- {umem_size - PKT_SIZE / 2, PKT_SIZE, 0, false},
- /* Straddle a page boundrary */
- {0x3000 - PKT_SIZE / 2, PKT_SIZE, 0, false},
- /* Straddle a 2K boundrary */
- {0x3800 - PKT_SIZE / 2, PKT_SIZE, 0, true},
+ {umem_size - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, false},
+ /* Straddle a 4K boundary */
+ {0x1000 - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, false},
+ /* Straddle a 2K boundary */
+ {0x800 - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, true},
/* Valid packet for synch so that something is received */
- {0x4000, PKT_SIZE, 0, true}};
+ {0, MIN_PKT_SIZE, 0, true}};
- if (test->ifobj_tx->umem->unaligned_mode) {
- /* Crossing a page boundrary allowed */
+ if (umem->unaligned_mode) {
+ /* Crossing a page boundary allowed */
pkts[7].valid = true;
}
- if (test->ifobj_tx->umem->frame_size == XSK_UMEM__DEFAULT_FRAME_SIZE / 2) {
- /* Crossing a 2K frame size boundrary not allowed */
+ if (umem->frame_size == XSK_UMEM__DEFAULT_FRAME_SIZE / 2) {
+ /* Crossing a 2K frame size boundary not allowed */
pkts[8].valid = false;
}
if (test->ifobj_tx->shared_umem) {
- pkts[4].addr += umem_size;
- pkts[5].addr += umem_size;
- pkts[6].addr += umem_size;
+ pkts[4].offset += umem_size;
+ pkts[5].offset += umem_size;
+ pkts[6].offset += umem_size;
}
pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts));
- testapp_validate_traffic(test);
+ return testapp_validate_traffic(test);
}
-static void testapp_xdp_drop(struct test_spec *test)
+static int testapp_xdp_drop(struct test_spec *test)
{
struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs;
struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs;
@@ -1719,10 +1679,10 @@ static void testapp_xdp_drop(struct test_spec *test)
skel_rx->maps.xsk, skel_tx->maps.xsk);
pkt_stream_receive_half(test);
- testapp_validate_traffic(test);
+ return testapp_validate_traffic(test);
}
-static void testapp_xdp_metadata_count(struct test_spec *test)
+static int testapp_xdp_metadata_count(struct test_spec *test)
{
struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs;
struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs;
@@ -1743,10 +1703,10 @@ static void testapp_xdp_metadata_count(struct test_spec *test)
if (bpf_map_update_elem(bpf_map__fd(data_map), &key, &count, BPF_ANY))
exit_with_error(errno);
- testapp_validate_traffic(test);
+ return testapp_validate_traffic(test);
}
-static void testapp_poll_txq_tmout(struct test_spec *test)
+static int testapp_poll_txq_tmout(struct test_spec *test)
{
test_spec_set_name(test, "POLL_TXQ_FULL");
@@ -1754,14 +1714,14 @@ static void testapp_poll_txq_tmout(struct test_spec *test)
/* create invalid frame by set umem frame_size and pkt length equal to 2048 */
test->ifobj_tx->umem->frame_size = 2048;
pkt_stream_replace(test, 2 * DEFAULT_PKT_CNT, 2048);
- testapp_validate_traffic_single_thread(test, test->ifobj_tx);
+ return testapp_validate_traffic_single_thread(test, test->ifobj_tx);
}
-static void testapp_poll_rxq_tmout(struct test_spec *test)
+static int testapp_poll_rxq_tmout(struct test_spec *test)
{
test_spec_set_name(test, "POLL_RXQ_EMPTY");
test->ifobj_rx->use_poll = true;
- testapp_validate_traffic_single_thread(test, test->ifobj_rx);
+ return testapp_validate_traffic_single_thread(test, test->ifobj_rx);
}
static int xsk_load_xdp_programs(struct ifobject *ifobj)
@@ -1778,25 +1738,30 @@ static void xsk_unload_xdp_programs(struct ifobject *ifobj)
xsk_xdp_progs__destroy(ifobj->xdp_progs);
}
+/* Simple test */
+static bool hugepages_present(void)
+{
+ size_t mmap_sz = 2 * DEFAULT_UMEM_BUFFERS * XSK_UMEM__DEFAULT_FRAME_SIZE;
+ void *bufs;
+
+ bufs = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, MAP_HUGE_2MB);
+ if (bufs == MAP_FAILED)
+ return false;
+
+ mmap_sz = ceil_u64(mmap_sz, HUGEPAGE_SIZE) * HUGEPAGE_SIZE;
+ munmap(bufs, mmap_sz);
+ return true;
+}
+
static void init_iface(struct ifobject *ifobj, const char *dst_mac, const char *src_mac,
- const char *dst_ip, const char *src_ip, const u16 dst_port,
- const u16 src_port, thread_func_t func_ptr)
+ thread_func_t func_ptr)
{
- struct in_addr ip;
int err;
memcpy(ifobj->dst_mac, dst_mac, ETH_ALEN);
memcpy(ifobj->src_mac, src_mac, ETH_ALEN);
- inet_aton(dst_ip, &ip);
- ifobj->dst_ip = ip.s_addr;
-
- inet_aton(src_ip, &ip);
- ifobj->src_ip = ip.s_addr;
-
- ifobj->dst_port = dst_port;
- ifobj->src_port = src_port;
-
ifobj->func_ptr = func_ptr;
err = xsk_load_xdp_programs(ifobj);
@@ -1804,94 +1769,87 @@ static void init_iface(struct ifobject *ifobj, const char *dst_mac, const char *
printf("Error loading XDP program\n");
exit_with_error(err);
}
+
+ if (hugepages_present())
+ ifobj->unaligned_supp = true;
}
static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_type type)
{
+ int ret = TEST_SKIP;
+
switch (type) {
case TEST_TYPE_STATS_RX_DROPPED:
- if (mode == TEST_MODE_ZC) {
- ksft_test_result_skip("Can not run RX_DROPPED test for ZC mode\n");
- return;
- }
- testapp_stats_rx_dropped(test);
+ ret = testapp_stats_rx_dropped(test);
break;
case TEST_TYPE_STATS_TX_INVALID_DESCS:
- testapp_stats_tx_invalid_descs(test);
+ ret = testapp_stats_tx_invalid_descs(test);
break;
case TEST_TYPE_STATS_RX_FULL:
- testapp_stats_rx_full(test);
+ ret = testapp_stats_rx_full(test);
break;
case TEST_TYPE_STATS_FILL_EMPTY:
- testapp_stats_fill_empty(test);
+ ret = testapp_stats_fill_empty(test);
break;
case TEST_TYPE_TEARDOWN:
- testapp_teardown(test);
+ ret = testapp_teardown(test);
break;
case TEST_TYPE_BIDI:
- testapp_bidi(test);
+ ret = testapp_bidi(test);
break;
case TEST_TYPE_BPF_RES:
- testapp_bpf_res(test);
+ ret = testapp_bpf_res(test);
break;
case TEST_TYPE_RUN_TO_COMPLETION:
test_spec_set_name(test, "RUN_TO_COMPLETION");
- testapp_validate_traffic(test);
+ ret = testapp_validate_traffic(test);
break;
case TEST_TYPE_RUN_TO_COMPLETION_SINGLE_PKT:
test_spec_set_name(test, "RUN_TO_COMPLETION_SINGLE_PKT");
- testapp_single_pkt(test);
+ ret = testapp_single_pkt(test);
break;
case TEST_TYPE_RUN_TO_COMPLETION_2K_FRAME:
test_spec_set_name(test, "RUN_TO_COMPLETION_2K_FRAME_SIZE");
test->ifobj_tx->umem->frame_size = 2048;
test->ifobj_rx->umem->frame_size = 2048;
- pkt_stream_replace(test, DEFAULT_PKT_CNT, PKT_SIZE);
- testapp_validate_traffic(test);
+ pkt_stream_replace(test, DEFAULT_PKT_CNT, MIN_PKT_SIZE);
+ ret = testapp_validate_traffic(test);
break;
case TEST_TYPE_RX_POLL:
test->ifobj_rx->use_poll = true;
test_spec_set_name(test, "POLL_RX");
- testapp_validate_traffic(test);
+ ret = testapp_validate_traffic(test);
break;
case TEST_TYPE_TX_POLL:
test->ifobj_tx->use_poll = true;
test_spec_set_name(test, "POLL_TX");
- testapp_validate_traffic(test);
+ ret = testapp_validate_traffic(test);
break;
case TEST_TYPE_POLL_TXQ_TMOUT:
- testapp_poll_txq_tmout(test);
+ ret = testapp_poll_txq_tmout(test);
break;
case TEST_TYPE_POLL_RXQ_TMOUT:
- testapp_poll_rxq_tmout(test);
+ ret = testapp_poll_rxq_tmout(test);
break;
case TEST_TYPE_ALIGNED_INV_DESC:
test_spec_set_name(test, "ALIGNED_INV_DESC");
- testapp_invalid_desc(test);
+ ret = testapp_invalid_desc(test);
break;
case TEST_TYPE_ALIGNED_INV_DESC_2K_FRAME:
test_spec_set_name(test, "ALIGNED_INV_DESC_2K_FRAME_SIZE");
test->ifobj_tx->umem->frame_size = 2048;
test->ifobj_rx->umem->frame_size = 2048;
- testapp_invalid_desc(test);
+ ret = testapp_invalid_desc(test);
break;
case TEST_TYPE_UNALIGNED_INV_DESC:
- if (!hugepages_present(test->ifobj_tx)) {
- ksft_test_result_skip("No 2M huge pages present.\n");
- return;
- }
test_spec_set_name(test, "UNALIGNED_INV_DESC");
test->ifobj_tx->umem->unaligned_mode = true;
test->ifobj_rx->umem->unaligned_mode = true;
- testapp_invalid_desc(test);
+ ret = testapp_invalid_desc(test);
break;
case TEST_TYPE_UNALIGNED_INV_DESC_4K1_FRAME: {
u64 page_size, umem_size;
- if (!hugepages_present(test->ifobj_tx)) {
- ksft_test_result_skip("No 2M huge pages present.\n");
- return;
- }
test_spec_set_name(test, "UNALIGNED_INV_DESC_4K1_FRAME_SIZE");
/* Odd frame size so the UMEM doesn't end near a page boundary. */
test->ifobj_tx->umem->frame_size = 4001;
@@ -1903,29 +1861,28 @@ static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_
*/
page_size = sysconf(_SC_PAGESIZE);
umem_size = test->ifobj_tx->umem->num_frames * test->ifobj_tx->umem->frame_size;
- assert(umem_size % page_size > PKT_SIZE);
- assert(umem_size % page_size < page_size - PKT_SIZE);
- testapp_invalid_desc(test);
+ assert(umem_size % page_size > MIN_PKT_SIZE);
+ assert(umem_size % page_size < page_size - MIN_PKT_SIZE);
+ ret = testapp_invalid_desc(test);
break;
}
case TEST_TYPE_UNALIGNED:
- if (!testapp_unaligned(test))
- return;
+ ret = testapp_unaligned(test);
break;
case TEST_TYPE_HEADROOM:
- testapp_headroom(test);
+ ret = testapp_headroom(test);
break;
case TEST_TYPE_XDP_DROP_HALF:
- testapp_xdp_drop(test);
+ ret = testapp_xdp_drop(test);
break;
case TEST_TYPE_XDP_METADATA_COUNT:
- testapp_xdp_metadata_count(test);
+ ret = testapp_xdp_metadata_count(test);
break;
default:
break;
}
- if (!test->fail)
+ if (ret == TEST_PASS)
ksft_test_result_pass("PASS: %s %s%s\n", mode_string(test), busy_poll_string(test),
test->name);
pkt_stream_restore_default(test);
@@ -2030,14 +1987,12 @@ int main(int argc, char **argv)
modes++;
}
- init_iface(ifobj_rx, MAC1, MAC2, IP1, IP2, UDP_PORT1, UDP_PORT2,
- worker_testapp_validate_rx);
- init_iface(ifobj_tx, MAC2, MAC1, IP2, IP1, UDP_PORT2, UDP_PORT1,
- worker_testapp_validate_tx);
+ init_iface(ifobj_rx, MAC1, MAC2, worker_testapp_validate_rx);
+ init_iface(ifobj_tx, MAC2, MAC1, worker_testapp_validate_tx);
test_spec_init(&test, ifobj_tx, ifobj_rx, 0);
- tx_pkt_stream_default = pkt_stream_generate(ifobj_tx->umem, DEFAULT_PKT_CNT, PKT_SIZE);
- rx_pkt_stream_default = pkt_stream_generate(ifobj_rx->umem, DEFAULT_PKT_CNT, PKT_SIZE);
+ tx_pkt_stream_default = pkt_stream_generate(ifobj_tx->umem, DEFAULT_PKT_CNT, MIN_PKT_SIZE);
+ rx_pkt_stream_default = pkt_stream_generate(ifobj_rx->umem, DEFAULT_PKT_CNT, MIN_PKT_SIZE);
if (!tx_pkt_stream_default || !rx_pkt_stream_default)
exit_with_error(ENOMEM);
test.tx_pkt_stream_default = tx_pkt_stream_default;
diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h
index c535aeab2ca3..aaf27e067640 100644
--- a/tools/testing/selftests/bpf/xskxceiver.h
+++ b/tools/testing/selftests/bpf/xskxceiver.h
@@ -30,22 +30,14 @@
#define TEST_PASS 0
#define TEST_FAILURE -1
#define TEST_CONTINUE 1
+#define TEST_SKIP 2
#define MAX_INTERFACES 2
#define MAX_INTERFACE_NAME_CHARS 16
#define MAX_SOCKETS 2
#define MAX_TEST_NAME_SIZE 32
#define MAX_TEARDOWN_ITER 10
-#define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \
- sizeof(struct udphdr))
-#define MIN_ETH_PKT_SIZE 64
-#define ETH_FCS_SIZE 4
-#define MIN_PKT_SIZE (MIN_ETH_PKT_SIZE - ETH_FCS_SIZE)
-#define PKT_SIZE (MIN_PKT_SIZE)
-#define IP_PKT_SIZE (PKT_SIZE - sizeof(struct ethhdr))
-#define IP_PKT_VER 0x4
-#define IP_PKT_TOS 0x9
-#define UDP_PKT_SIZE (IP_PKT_SIZE - sizeof(struct iphdr))
-#define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - sizeof(struct udphdr))
+#define PKT_HDR_SIZE (sizeof(struct ethhdr) + 2) /* Just to align the data in the packet */
+#define MIN_PKT_SIZE 64
#define USLEEP_MAX 10000
#define SOCK_RECONF_CTR 10
#define BATCH_SIZE 64
@@ -57,6 +49,7 @@
#define UMEM_HEADROOM_TEST_SIZE 128
#define XSK_UMEM__INVALID_FRAME_SIZE (XSK_UMEM__DEFAULT_FRAME_SIZE + 1)
#define HUGEPAGE_SIZE (2 * 1024 * 1024)
+#define PKT_DUMP_NB_TO_PRINT 16
#define print_verbose(x...) do { if (opt_verbose) ksft_print_msg(x); } while (0)
@@ -93,13 +86,13 @@ enum test_type {
TEST_TYPE_MAX
};
-static bool opt_pkt_dump;
static bool opt_verbose;
struct xsk_umem_info {
struct xsk_ring_prod fq;
struct xsk_ring_cons cq;
struct xsk_umem *umem;
+ u64 next_buffer;
u32 num_frames;
u32 frame_headroom;
void *buffer;
@@ -118,17 +111,17 @@ struct xsk_socket_info {
};
struct pkt {
- u64 addr;
+ int offset;
u32 len;
- u32 payload;
+ u32 pkt_nb;
bool valid;
};
struct pkt_stream {
u32 nb_pkts;
- u32 rx_pkt_nb;
+ u32 current_pkt_nb;
struct pkt *pkts;
- bool use_addr_for_fill;
+ u32 max_pkt_len;
};
struct ifobject;
@@ -148,11 +141,7 @@ struct ifobject {
struct bpf_program *xdp_prog;
enum test_mode mode;
int ifindex;
- u32 dst_ip;
- u32 src_ip;
u32 bind_flags;
- u16 src_port;
- u16 dst_port;
bool tx_on;
bool rx_on;
bool use_poll;
@@ -161,6 +150,7 @@ struct ifobject {
bool release_rx;
bool shared_umem;
bool use_metadata;
+ bool unaligned_supp;
u8 dst_mac[ETH_ALEN];
u8 src_mac[ETH_ALEN];
};
@@ -184,7 +174,6 @@ struct test_spec {
pthread_barrier_t barr;
pthread_mutex_t pacing_mutex = PTHREAD_MUTEX_INITIALIZER;
-pthread_cond_t pacing_cond = PTHREAD_COND_INITIALIZER;
int pkts_in_flight;
diff --git a/tools/testing/selftests/cachestat/.gitignore b/tools/testing/selftests/cachestat/.gitignore
new file mode 100644
index 000000000000..d6c30b43a4bb
--- /dev/null
+++ b/tools/testing/selftests/cachestat/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+test_cachestat
diff --git a/tools/testing/selftests/cachestat/Makefile b/tools/testing/selftests/cachestat/Makefile
new file mode 100644
index 000000000000..fca73aaa7d14
--- /dev/null
+++ b/tools/testing/selftests/cachestat/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0
+TEST_GEN_PROGS := test_cachestat
+
+CFLAGS += $(KHDR_INCLUDES)
+CFLAGS += -Wall
+CFLAGS += -lrt
+
+include ../lib.mk
diff --git a/tools/testing/selftests/cachestat/test_cachestat.c b/tools/testing/selftests/cachestat/test_cachestat.c
new file mode 100644
index 000000000000..c037553cfd92
--- /dev/null
+++ b/tools/testing/selftests/cachestat/test_cachestat.c
@@ -0,0 +1,319 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdbool.h>
+#include <linux/kernel.h>
+#include <linux/magic.h>
+#include <linux/mman.h>
+#include <sys/mman.h>
+#include <sys/shm.h>
+#include <sys/syscall.h>
+#include <sys/vfs.h>
+#include <unistd.h>
+#include <string.h>
+#include <fcntl.h>
+#include <errno.h>
+
+#include "../kselftest.h"
+
+#define NR_TESTS 9
+
+static const char * const dev_files[] = {
+ "/dev/zero", "/dev/null", "/dev/urandom",
+ "/proc/version", "/proc"
+};
+static const int cachestat_nr = 451;
+
+void print_cachestat(struct cachestat *cs)
+{
+ ksft_print_msg(
+ "Using cachestat: Cached: %lu, Dirty: %lu, Writeback: %lu, Evicted: %lu, Recently Evicted: %lu\n",
+ cs->nr_cache, cs->nr_dirty, cs->nr_writeback,
+ cs->nr_evicted, cs->nr_recently_evicted);
+}
+
+bool write_exactly(int fd, size_t filesize)
+{
+ int random_fd = open("/dev/urandom", O_RDONLY);
+ char *cursor, *data;
+ int remained;
+ bool ret;
+
+ if (random_fd < 0) {
+ ksft_print_msg("Unable to access urandom.\n");
+ ret = false;
+ goto out;
+ }
+
+ data = malloc(filesize);
+ if (!data) {
+ ksft_print_msg("Unable to allocate data.\n");
+ ret = false;
+ goto close_random_fd;
+ }
+
+ remained = filesize;
+ cursor = data;
+
+ while (remained) {
+ ssize_t read_len = read(random_fd, cursor, remained);
+
+ if (read_len <= 0) {
+ ksft_print_msg("Unable to read from urandom.\n");
+ ret = false;
+ goto out_free_data;
+ }
+
+ remained -= read_len;
+ cursor += read_len;
+ }
+
+ /* write random data to fd */
+ remained = filesize;
+ cursor = data;
+ while (remained) {
+ ssize_t write_len = write(fd, cursor, remained);
+
+ if (write_len <= 0) {
+ ksft_print_msg("Unable write random data to file.\n");
+ ret = false;
+ goto out_free_data;
+ }
+
+ remained -= write_len;
+ cursor += write_len;
+ }
+
+ ret = true;
+out_free_data:
+ free(data);
+close_random_fd:
+ close(random_fd);
+out:
+ return ret;
+}
+
+/*
+ * fsync() is implemented via noop_fsync() on tmpfs. This makes the fsync()
+ * test fail below, so we need to check for test file living on a tmpfs.
+ */
+static bool is_on_tmpfs(int fd)
+{
+ struct statfs statfs_buf;
+
+ if (fstatfs(fd, &statfs_buf))
+ return false;
+
+ return statfs_buf.f_type == TMPFS_MAGIC;
+}
+
+/*
+ * Open/create the file at filename, (optionally) write random data to it
+ * (exactly num_pages), then test the cachestat syscall on this file.
+ *
+ * If test_fsync == true, fsync the file, then check the number of dirty
+ * pages.
+ */
+static int test_cachestat(const char *filename, bool write_random, bool create,
+ bool test_fsync, unsigned long num_pages,
+ int open_flags, mode_t open_mode)
+{
+ size_t PS = sysconf(_SC_PAGESIZE);
+ int filesize = num_pages * PS;
+ int ret = KSFT_PASS;
+ long syscall_ret;
+ struct cachestat cs;
+ struct cachestat_range cs_range = { 0, filesize };
+
+ int fd = open(filename, open_flags, open_mode);
+
+ if (fd == -1) {
+ ksft_print_msg("Unable to create/open file.\n");
+ ret = KSFT_FAIL;
+ goto out;
+ } else {
+ ksft_print_msg("Create/open %s\n", filename);
+ }
+
+ if (write_random) {
+ if (!write_exactly(fd, filesize)) {
+ ksft_print_msg("Unable to access urandom.\n");
+ ret = KSFT_FAIL;
+ goto out1;
+ }
+ }
+
+ syscall_ret = syscall(cachestat_nr, fd, &cs_range, &cs, 0);
+
+ ksft_print_msg("Cachestat call returned %ld\n", syscall_ret);
+
+ if (syscall_ret) {
+ ksft_print_msg("Cachestat returned non-zero.\n");
+ ret = KSFT_FAIL;
+ goto out1;
+
+ } else {
+ print_cachestat(&cs);
+
+ if (write_random) {
+ if (cs.nr_cache + cs.nr_evicted != num_pages) {
+ ksft_print_msg(
+ "Total number of cached and evicted pages is off.\n");
+ ret = KSFT_FAIL;
+ }
+ }
+ }
+
+ if (test_fsync) {
+ if (is_on_tmpfs(fd)) {
+ ret = KSFT_SKIP;
+ } else if (fsync(fd)) {
+ ksft_print_msg("fsync fails.\n");
+ ret = KSFT_FAIL;
+ } else {
+ syscall_ret = syscall(cachestat_nr, fd, &cs_range, &cs, 0);
+
+ ksft_print_msg("Cachestat call (after fsync) returned %ld\n",
+ syscall_ret);
+
+ if (!syscall_ret) {
+ print_cachestat(&cs);
+
+ if (cs.nr_dirty) {
+ ret = KSFT_FAIL;
+ ksft_print_msg(
+ "Number of dirty should be zero after fsync.\n");
+ }
+ } else {
+ ksft_print_msg("Cachestat (after fsync) returned non-zero.\n");
+ ret = KSFT_FAIL;
+ goto out1;
+ }
+ }
+ }
+
+out1:
+ close(fd);
+
+ if (create)
+ remove(filename);
+out:
+ return ret;
+}
+
+bool test_cachestat_shmem(void)
+{
+ size_t PS = sysconf(_SC_PAGESIZE);
+ size_t filesize = PS * 512 * 2; /* 2 2MB huge pages */
+ int syscall_ret;
+ size_t compute_len = PS * 512;
+ struct cachestat_range cs_range = { PS, compute_len };
+ char *filename = "tmpshmcstat";
+ struct cachestat cs;
+ bool ret = true;
+ unsigned long num_pages = compute_len / PS;
+ int fd = shm_open(filename, O_CREAT | O_RDWR, 0600);
+
+ if (fd < 0) {
+ ksft_print_msg("Unable to create shmem file.\n");
+ ret = false;
+ goto out;
+ }
+
+ if (ftruncate(fd, filesize)) {
+ ksft_print_msg("Unable to truncate shmem file.\n");
+ ret = false;
+ goto close_fd;
+ }
+
+ if (!write_exactly(fd, filesize)) {
+ ksft_print_msg("Unable to write to shmem file.\n");
+ ret = false;
+ goto close_fd;
+ }
+
+ syscall_ret = syscall(cachestat_nr, fd, &cs_range, &cs, 0);
+
+ if (syscall_ret) {
+ ksft_print_msg("Cachestat returned non-zero.\n");
+ ret = false;
+ goto close_fd;
+ } else {
+ print_cachestat(&cs);
+ if (cs.nr_cache + cs.nr_evicted != num_pages) {
+ ksft_print_msg(
+ "Total number of cached and evicted pages is off.\n");
+ ret = false;
+ }
+ }
+
+close_fd:
+ shm_unlink(filename);
+out:
+ return ret;
+}
+
+int main(void)
+{
+ int ret;
+
+ ksft_print_header();
+
+ ret = syscall(__NR_cachestat, -1, NULL, NULL, 0);
+ if (ret == -1 && errno == ENOSYS)
+ ksft_exit_skip("cachestat syscall not available\n");
+
+ ksft_set_plan(NR_TESTS);
+
+ if (ret == -1 && errno == EBADF) {
+ ksft_test_result_pass("bad file descriptor recognized\n");
+ ret = 0;
+ } else {
+ ksft_test_result_fail("bad file descriptor ignored\n");
+ ret = 1;
+ }
+
+ for (int i = 0; i < 5; i++) {
+ const char *dev_filename = dev_files[i];
+
+ if (test_cachestat(dev_filename, false, false, false,
+ 4, O_RDONLY, 0400) == KSFT_PASS)
+ ksft_test_result_pass("cachestat works with %s\n", dev_filename);
+ else {
+ ksft_test_result_fail("cachestat fails with %s\n", dev_filename);
+ ret = 1;
+ }
+ }
+
+ if (test_cachestat("tmpfilecachestat", true, true,
+ false, 4, O_CREAT | O_RDWR, 0600) == KSFT_PASS)
+ ksft_test_result_pass("cachestat works with a normal file\n");
+ else {
+ ksft_test_result_fail("cachestat fails with normal file\n");
+ ret = 1;
+ }
+
+ switch (test_cachestat("tmpfilecachestat", true, true,
+ true, 4, O_CREAT | O_RDWR, 0600)) {
+ case KSFT_FAIL:
+ ksft_test_result_fail("cachestat fsync fails with normal file\n");
+ ret = KSFT_FAIL;
+ break;
+ case KSFT_PASS:
+ ksft_test_result_pass("cachestat fsync works with a normal file\n");
+ break;
+ case KSFT_SKIP:
+ ksft_test_result_skip("tmpfilecachestat is on tmpfs\n");
+ break;
+ }
+
+ if (test_cachestat_shmem())
+ ksft_test_result_pass("cachestat works with a shmem file\n");
+ else {
+ ksft_test_result_fail("cachestat fails with a shmem file\n");
+ ret = 1;
+ }
+
+ return ret;
+}
diff --git a/tools/testing/selftests/cgroup/test_kmem.c b/tools/testing/selftests/cgroup/test_kmem.c
index 258ddc565deb..ed2e50bb1e76 100644
--- a/tools/testing/selftests/cgroup/test_kmem.c
+++ b/tools/testing/selftests/cgroup/test_kmem.c
@@ -70,12 +70,16 @@ static int test_kmem_basic(const char *root)
goto cleanup;
cg_write(cg, "memory.high", "1M");
+
+ /* wait for RCU freeing */
+ sleep(1);
+
slab1 = cg_read_key_long(cg, "memory.stat", "slab ");
- if (slab1 <= 0)
+ if (slab1 < 0)
goto cleanup;
current = cg_read_long(cg, "memory.current");
- if (current <= 0)
+ if (current < 0)
goto cleanup;
if (slab1 < slab0 / 2 && current < slab0 / 2)
diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c
index f4f7c0aef702..c7c9572003a8 100644
--- a/tools/testing/selftests/cgroup/test_memcontrol.c
+++ b/tools/testing/selftests/cgroup/test_memcontrol.c
@@ -292,6 +292,7 @@ static int test_memcg_protection(const char *root, bool min)
char *children[4] = {NULL};
const char *attribute = min ? "memory.min" : "memory.low";
long c[4];
+ long current;
int i, attempts;
int fd;
@@ -400,7 +401,8 @@ static int test_memcg_protection(const char *root, bool min)
goto cleanup;
}
- if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
+ current = min ? MB(50) : MB(30);
+ if (!values_close(cg_read_long(parent[1], "memory.current"), current, 3))
goto cleanup;
if (!reclaim_until(children[0], MB(10)))
@@ -987,7 +989,9 @@ static int tcp_client(const char *cgroup, unsigned short port)
char servport[6];
int retries = 0x10; /* nice round number */
int sk, ret;
+ long allocated;
+ allocated = cg_read_long(cgroup, "memory.current");
snprintf(servport, sizeof(servport), "%hd", port);
ret = getaddrinfo(server, servport, NULL, &ai);
if (ret)
@@ -1015,7 +1019,8 @@ static int tcp_client(const char *cgroup, unsigned short port)
if (current < 0 || sock < 0)
goto close_sk;
- if (values_close(current, sock, 10)) {
+ /* exclude the memory not related to socket connection */
+ if (values_close(current - allocated, sock, 10)) {
ret = KSFT_PASS;
break;
}
diff --git a/tools/testing/selftests/clone3/clone3.c b/tools/testing/selftests/clone3/clone3.c
index e495f895a2cd..e60cf4da8fb0 100644
--- a/tools/testing/selftests/clone3/clone3.c
+++ b/tools/testing/selftests/clone3/clone3.c
@@ -129,7 +129,7 @@ int main(int argc, char *argv[])
uid_t uid = getuid();
ksft_print_header();
- ksft_set_plan(18);
+ ksft_set_plan(19);
test_clone3_supported();
/* Just a simple clone3() should return 0.*/
@@ -198,5 +198,8 @@ int main(int argc, char *argv[])
/* Do a clone3() in a new time namespace */
test_clone3(CLONE_NEWTIME, 0, 0, CLONE3_ARGS_NO_TEST);
+ /* Do a clone3() with exit signal (SIGCHLD) in flags */
+ test_clone3(SIGCHLD, 0, -EINVAL, CLONE3_ARGS_NO_TEST);
+
ksft_finished();
}
diff --git a/tools/testing/selftests/cpufreq/config b/tools/testing/selftests/cpufreq/config
index 75e900793e8a..ce5068f5a6a2 100644
--- a/tools/testing/selftests/cpufreq/config
+++ b/tools/testing/selftests/cpufreq/config
@@ -5,11 +5,3 @@ CONFIG_CPU_FREQ_GOV_USERSPACE=y
CONFIG_CPU_FREQ_GOV_ONDEMAND=y
CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y
-CONFIG_DEBUG_RT_MUTEXES=y
-CONFIG_DEBUG_PLIST=y
-CONFIG_DEBUG_SPINLOCK=y
-CONFIG_DEBUG_MUTEXES=y
-CONFIG_DEBUG_LOCK_ALLOC=y
-CONFIG_PROVE_LOCKING=y
-CONFIG_LOCKDEP=y
-CONFIG_DEBUG_ATOMIC_SLEEP=y
diff --git a/tools/testing/selftests/damon/config b/tools/testing/selftests/damon/config
new file mode 100644
index 000000000000..0daf38974eb0
--- /dev/null
+++ b/tools/testing/selftests/damon/config
@@ -0,0 +1,7 @@
+CONFIG_DAMON=y
+CONFIG_DAMON_SYSFS=y
+CONFIG_DAMON_DBGFS=y
+CONFIG_DAMON_PADDR=y
+CONFIG_DAMON_VADDR=y
+CONFIG_DAMON_RECLAIM=y
+CONFIG_DAMON_LRU_SORT=y
diff --git a/tools/testing/selftests/drivers/net/bonding/Makefile b/tools/testing/selftests/drivers/net/bonding/Makefile
index 03f92d7aeb19..8a72bb7de70f 100644
--- a/tools/testing/selftests/drivers/net/bonding/Makefile
+++ b/tools/testing/selftests/drivers/net/bonding/Makefile
@@ -9,10 +9,12 @@ TEST_PROGS := \
mode-1-recovery-updelay.sh \
mode-2-recovery-updelay.sh \
bond_options.sh \
- bond-eth-type-change.sh
+ bond-eth-type-change.sh \
+ bond_macvlan.sh
TEST_FILES := \
lag_lib.sh \
+ bond_topo_2d1c.sh \
bond_topo_3d1c.sh \
net_forwarding_lib.sh
diff --git a/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh b/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh
index 47ab90596acb..6358df5752f9 100755
--- a/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh
+++ b/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh
@@ -57,8 +57,8 @@ ip link add name veth2-bond type veth peer name veth2-end
# add ports
ip link set fbond master fab-br0
-ip link set veth1-bond down master fbond
-ip link set veth2-bond down master fbond
+ip link set veth1-bond master fbond
+ip link set veth2-bond master fbond
# bring up
ip link set veth1-end up
diff --git a/tools/testing/selftests/drivers/net/bonding/bond-eth-type-change.sh b/tools/testing/selftests/drivers/net/bonding/bond-eth-type-change.sh
index 5cdd22048ba7..862e947e17c7 100755
--- a/tools/testing/selftests/drivers/net/bonding/bond-eth-type-change.sh
+++ b/tools/testing/selftests/drivers/net/bonding/bond-eth-type-change.sh
@@ -53,7 +53,6 @@ bond_test_enslave_type_change()
# restore ARPHRD_ETHER type by enslaving such device
ip link set dev "$devbond2" master "$devbond0"
check_err $? "could not enslave $devbond2 to $devbond0"
- ip link set dev "$devbond1" nomaster
bond_check_flags "$devbond0"
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_macvlan.sh b/tools/testing/selftests/drivers/net/bonding/bond_macvlan.sh
new file mode 100755
index 000000000000..b609fb6231f4
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/bond_macvlan.sh
@@ -0,0 +1,99 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test macvlan over balance-alb
+
+lib_dir=$(dirname "$0")
+source ${lib_dir}/bond_topo_2d1c.sh
+
+m1_ns="m1-$(mktemp -u XXXXXX)"
+m2_ns="m1-$(mktemp -u XXXXXX)"
+m1_ip4="192.0.2.11"
+m1_ip6="2001:db8::11"
+m2_ip4="192.0.2.12"
+m2_ip6="2001:db8::12"
+
+cleanup()
+{
+ ip -n ${m1_ns} link del macv0
+ ip netns del ${m1_ns}
+ ip -n ${m2_ns} link del macv0
+ ip netns del ${m2_ns}
+
+ client_destroy
+ server_destroy
+ gateway_destroy
+}
+
+check_connection()
+{
+ local ns=${1}
+ local target=${2}
+ local message=${3:-"macvlan_over_bond"}
+ RET=0
+
+
+ ip netns exec ${ns} ping ${target} -c 4 -i 0.1 &>/dev/null
+ check_err $? "ping failed"
+ log_test "$mode: $message"
+}
+
+macvlan_over_bond()
+{
+ local param="$1"
+ RET=0
+
+ # setup new bond mode
+ bond_reset "${param}"
+
+ ip -n ${s_ns} link add link bond0 name macv0 type macvlan mode bridge
+ ip -n ${s_ns} link set macv0 netns ${m1_ns}
+ ip -n ${m1_ns} link set dev macv0 up
+ ip -n ${m1_ns} addr add ${m1_ip4}/24 dev macv0
+ ip -n ${m1_ns} addr add ${m1_ip6}/24 dev macv0
+
+ ip -n ${s_ns} link add link bond0 name macv0 type macvlan mode bridge
+ ip -n ${s_ns} link set macv0 netns ${m2_ns}
+ ip -n ${m2_ns} link set dev macv0 up
+ ip -n ${m2_ns} addr add ${m2_ip4}/24 dev macv0
+ ip -n ${m2_ns} addr add ${m2_ip6}/24 dev macv0
+
+ sleep 2
+
+ check_connection "${c_ns}" "${s_ip4}" "IPv4: client->server"
+ check_connection "${c_ns}" "${s_ip6}" "IPv6: client->server"
+ check_connection "${c_ns}" "${m1_ip4}" "IPv4: client->macvlan_1"
+ check_connection "${c_ns}" "${m1_ip6}" "IPv6: client->macvlan_1"
+ check_connection "${c_ns}" "${m2_ip4}" "IPv4: client->macvlan_2"
+ check_connection "${c_ns}" "${m2_ip6}" "IPv6: client->macvlan_2"
+ check_connection "${m1_ns}" "${m2_ip4}" "IPv4: macvlan_1->macvlan_2"
+ check_connection "${m1_ns}" "${m2_ip6}" "IPv6: macvlan_1->macvlan_2"
+
+
+ sleep 5
+
+ check_connection "${s_ns}" "${c_ip4}" "IPv4: server->client"
+ check_connection "${s_ns}" "${c_ip6}" "IPv6: server->client"
+ check_connection "${m1_ns}" "${c_ip4}" "IPv4: macvlan_1->client"
+ check_connection "${m1_ns}" "${c_ip6}" "IPv6: macvlan_1->client"
+ check_connection "${m2_ns}" "${c_ip4}" "IPv4: macvlan_2->client"
+ check_connection "${m2_ns}" "${c_ip6}" "IPv6: macvlan_2->client"
+ check_connection "${m2_ns}" "${m1_ip4}" "IPv4: macvlan_2->macvlan_2"
+ check_connection "${m2_ns}" "${m1_ip6}" "IPv6: macvlan_2->macvlan_2"
+
+ ip -n ${c_ns} neigh flush dev eth0
+}
+
+trap cleanup EXIT
+
+setup_prepare
+ip netns add ${m1_ns}
+ip netns add ${m2_ns}
+
+modes="active-backup balance-tlb balance-alb"
+
+for mode in $modes; do
+ macvlan_over_bond "mode $mode"
+done
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_options.sh b/tools/testing/selftests/drivers/net/bonding/bond_options.sh
index 607ba5c38977..c54d1697f439 100755
--- a/tools/testing/selftests/drivers/net/bonding/bond_options.sh
+++ b/tools/testing/selftests/drivers/net/bonding/bond_options.sh
@@ -9,10 +9,7 @@ ALL_TESTS="
num_grat_arp
"
-REQUIRE_MZ=no
-NUM_NETIFS=0
lib_dir=$(dirname "$0")
-source ${lib_dir}/net_forwarding_lib.sh
source ${lib_dir}/bond_topo_3d1c.sh
skip_prio()
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh b/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh
new file mode 100644
index 000000000000..a509ef949dcf
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh
@@ -0,0 +1,158 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Topology for Bond mode 1,5,6 testing
+#
+# +-------------------------+
+# | bond0 | Server
+# | + | 192.0.2.1/24
+# | eth0 | eth1 | 2001:db8::1/24
+# | +---+---+ |
+# | | | |
+# +-------------------------+
+# | |
+# +-------------------------+
+# | | | |
+# | +---+-------+---+ | Gateway
+# | | br0 | | 192.0.2.254/24
+# | +-------+-------+ | 2001:db8::254/24
+# | | |
+# +-------------------------+
+# |
+# +-------------------------+
+# | | | Client
+# | + | 192.0.2.10/24
+# | eth0 | 2001:db8::10/24
+# +-------------------------+
+
+REQUIRE_MZ=no
+NUM_NETIFS=0
+lib_dir=$(dirname "$0")
+source ${lib_dir}/net_forwarding_lib.sh
+
+s_ns="s-$(mktemp -u XXXXXX)"
+c_ns="c-$(mktemp -u XXXXXX)"
+g_ns="g-$(mktemp -u XXXXXX)"
+s_ip4="192.0.2.1"
+c_ip4="192.0.2.10"
+g_ip4="192.0.2.254"
+s_ip6="2001:db8::1"
+c_ip6="2001:db8::10"
+g_ip6="2001:db8::254"
+
+gateway_create()
+{
+ ip netns add ${g_ns}
+ ip -n ${g_ns} link add br0 type bridge
+ ip -n ${g_ns} link set br0 up
+ ip -n ${g_ns} addr add ${g_ip4}/24 dev br0
+ ip -n ${g_ns} addr add ${g_ip6}/24 dev br0
+}
+
+gateway_destroy()
+{
+ ip -n ${g_ns} link del br0
+ ip netns del ${g_ns}
+}
+
+server_create()
+{
+ ip netns add ${s_ns}
+ ip -n ${s_ns} link add bond0 type bond mode active-backup miimon 100
+
+ for i in $(seq 0 1); do
+ ip -n ${s_ns} link add eth${i} type veth peer name s${i} netns ${g_ns}
+
+ ip -n ${g_ns} link set s${i} up
+ ip -n ${g_ns} link set s${i} master br0
+ ip -n ${s_ns} link set eth${i} master bond0
+
+ tc -n ${g_ns} qdisc add dev s${i} clsact
+ done
+
+ ip -n ${s_ns} link set bond0 up
+ ip -n ${s_ns} addr add ${s_ip4}/24 dev bond0
+ ip -n ${s_ns} addr add ${s_ip6}/24 dev bond0
+ sleep 2
+}
+
+# Reset bond with new mode and options
+bond_reset()
+{
+ # Count the eth link number in real-time as this function
+ # maybe called from other topologies.
+ local link_num=$(ip -n ${s_ns} -br link show | grep -c "^eth")
+ local param="$1"
+ link_num=$((link_num -1))
+
+ ip -n ${s_ns} link set bond0 down
+ ip -n ${s_ns} link del bond0
+
+ ip -n ${s_ns} link add bond0 type bond $param
+ for i in $(seq 0 ${link_num}); do
+ ip -n ${s_ns} link set eth$i master bond0
+ done
+
+ ip -n ${s_ns} link set bond0 up
+ ip -n ${s_ns} addr add ${s_ip4}/24 dev bond0
+ ip -n ${s_ns} addr add ${s_ip6}/24 dev bond0
+ sleep 2
+}
+
+server_destroy()
+{
+ # Count the eth link number in real-time as this function
+ # maybe called from other topologies.
+ local link_num=$(ip -n ${s_ns} -br link show | grep -c "^eth")
+ link_num=$((link_num -1))
+ for i in $(seq 0 ${link_num}); do
+ ip -n ${s_ns} link del eth${i}
+ done
+ ip netns del ${s_ns}
+}
+
+client_create()
+{
+ ip netns add ${c_ns}
+ ip -n ${c_ns} link add eth0 type veth peer name c0 netns ${g_ns}
+
+ ip -n ${g_ns} link set c0 up
+ ip -n ${g_ns} link set c0 master br0
+
+ ip -n ${c_ns} link set eth0 up
+ ip -n ${c_ns} addr add ${c_ip4}/24 dev eth0
+ ip -n ${c_ns} addr add ${c_ip6}/24 dev eth0
+}
+
+client_destroy()
+{
+ ip -n ${c_ns} link del eth0
+ ip netns del ${c_ns}
+}
+
+setup_prepare()
+{
+ gateway_create
+ server_create
+ client_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ client_destroy
+ server_destroy
+ gateway_destroy
+}
+
+bond_check_connection()
+{
+ local msg=${1:-"check connection"}
+
+ sleep 2
+ ip netns exec ${s_ns} ping ${c_ip4} -c5 -i 0.1 &>/dev/null
+ check_err $? "${msg}: ping failed"
+ ip netns exec ${s_ns} ping6 ${c_ip6} -c5 -i 0.1 &>/dev/null
+ check_err $? "${msg}: ping6 failed"
+}
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh b/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh
index 69ab99a56043..3a1333d9a85b 100644
--- a/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh
+++ b/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh
@@ -25,121 +25,19 @@
# | eth0 | 2001:db8::10/24
# +-------------------------------------+
-s_ns="s-$(mktemp -u XXXXXX)"
-c_ns="c-$(mktemp -u XXXXXX)"
-g_ns="g-$(mktemp -u XXXXXX)"
-s_ip4="192.0.2.1"
-c_ip4="192.0.2.10"
-g_ip4="192.0.2.254"
-s_ip6="2001:db8::1"
-c_ip6="2001:db8::10"
-g_ip6="2001:db8::254"
-
-gateway_create()
-{
- ip netns add ${g_ns}
- ip -n ${g_ns} link add br0 type bridge
- ip -n ${g_ns} link set br0 up
- ip -n ${g_ns} addr add ${g_ip4}/24 dev br0
- ip -n ${g_ns} addr add ${g_ip6}/24 dev br0
-}
-
-gateway_destroy()
-{
- ip -n ${g_ns} link del br0
- ip netns del ${g_ns}
-}
-
-server_create()
-{
- ip netns add ${s_ns}
- ip -n ${s_ns} link add bond0 type bond mode active-backup miimon 100
-
- for i in $(seq 0 2); do
- ip -n ${s_ns} link add eth${i} type veth peer name s${i} netns ${g_ns}
-
- ip -n ${g_ns} link set s${i} up
- ip -n ${g_ns} link set s${i} master br0
- ip -n ${s_ns} link set eth${i} master bond0
-
- tc -n ${g_ns} qdisc add dev s${i} clsact
- done
-
- ip -n ${s_ns} link set bond0 up
- ip -n ${s_ns} addr add ${s_ip4}/24 dev bond0
- ip -n ${s_ns} addr add ${s_ip6}/24 dev bond0
- sleep 2
-}
-
-# Reset bond with new mode and options
-bond_reset()
-{
- local param="$1"
-
- ip -n ${s_ns} link set bond0 down
- ip -n ${s_ns} link del bond0
-
- ip -n ${s_ns} link add bond0 type bond $param
- for i in $(seq 0 2); do
- ip -n ${s_ns} link set eth$i master bond0
- done
-
- ip -n ${s_ns} link set bond0 up
- ip -n ${s_ns} addr add ${s_ip4}/24 dev bond0
- ip -n ${s_ns} addr add ${s_ip6}/24 dev bond0
- sleep 2
-}
-
-server_destroy()
-{
- for i in $(seq 0 2); do
- ip -n ${s_ns} link del eth${i}
- done
- ip netns del ${s_ns}
-}
-
-client_create()
-{
- ip netns add ${c_ns}
- ip -n ${c_ns} link add eth0 type veth peer name c0 netns ${g_ns}
-
- ip -n ${g_ns} link set c0 up
- ip -n ${g_ns} link set c0 master br0
-
- ip -n ${c_ns} link set eth0 up
- ip -n ${c_ns} addr add ${c_ip4}/24 dev eth0
- ip -n ${c_ns} addr add ${c_ip6}/24 dev eth0
-}
-
-client_destroy()
-{
- ip -n ${c_ns} link del eth0
- ip netns del ${c_ns}
-}
+source bond_topo_2d1c.sh
setup_prepare()
{
gateway_create
server_create
client_create
-}
-
-cleanup()
-{
- pre_cleanup
-
- client_destroy
- server_destroy
- gateway_destroy
-}
-
-bond_check_connection()
-{
- local msg=${1:-"check connection"}
- sleep 2
- ip netns exec ${s_ns} ping ${c_ip4} -c5 -i 0.1 &>/dev/null
- check_err $? "${msg}: ping failed"
- ip netns exec ${s_ns} ping6 ${c_ip6} -c5 -i 0.1 &>/dev/null
- check_err $? "${msg}: ping6 failed"
+ # Add the extra device as we use 3 down links for bond0
+ local i=2
+ ip -n ${s_ns} link add eth${i} type veth peer name s${i} netns ${g_ns}
+ ip -n ${g_ns} link set s${i} up
+ ip -n ${g_ns} link set s${i} master br0
+ ip -n ${s_ns} link set eth${i} master bond0
+ tc -n ${g_ns} qdisc add dev s${i} clsact
}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/egress_vid_classification.sh b/tools/testing/selftests/drivers/net/mlxsw/egress_vid_classification.sh
index 0cf9e47e3209..a5c2aec52898 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/egress_vid_classification.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/egress_vid_classification.sh
@@ -16,10 +16,9 @@
# +----------------|--+ +--|-----------------+
# | |
# +----------------|-------------------------|-----------------+
-# | SW | | |
+# | SW $swp1 + + $swp2 |
+# | | | |
# | +--------------|-------------------------|---------------+ |
-# | | $swp1 + + $swp2 | |
-# | | | | | |
# | | $swp1.10 + + $swp2.10 | |
# | | | |
# | | br0 | |
diff --git a/tools/testing/selftests/drivers/net/mlxsw/extack.sh b/tools/testing/selftests/drivers/net/mlxsw/extack.sh
index 7a0a99c1d22f..6fd422d38fe8 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/extack.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/extack.sh
@@ -35,7 +35,9 @@ netdev_pre_up_test()
{
RET=0
- ip link add name br1 up type bridge vlan_filtering 0 mcast_snooping 0
+ ip link add name br1 type bridge vlan_filtering 0 mcast_snooping 0
+ ip link set dev br1 addrgenmode none
+ ip link set dev br1 up
ip link add name vx1 up type vxlan id 1000 \
local 192.0.2.17 remote 192.0.2.18 \
dstport 4789 nolearning noudpcsum tos inherit ttl 100
@@ -46,7 +48,9 @@ netdev_pre_up_test()
ip link set dev $swp1 master br1
check_err $?
- ip link add name br2 up type bridge vlan_filtering 0 mcast_snooping 0
+ ip link add name br2 type bridge vlan_filtering 0 mcast_snooping 0
+ ip link set dev br2 addrgenmode none
+ ip link set dev br2 up
ip link add name vx2 up type vxlan id 2000 \
local 192.0.2.17 remote 192.0.2.18 \
dstport 4789 nolearning noudpcsum tos inherit ttl 100
@@ -81,7 +85,9 @@ vxlan_vlan_add_test()
{
RET=0
- ip link add name br1 up type bridge vlan_filtering 1 mcast_snooping 0
+ ip link add name br1 type bridge vlan_filtering 1 mcast_snooping 0
+ ip link set dev br1 addrgenmode none
+ ip link set dev br1 up
# Unsupported configuration: mlxsw demands VXLAN with "noudpcsum".
ip link add name vx1 up type vxlan id 1000 \
@@ -117,7 +123,9 @@ vxlan_bridge_create_test()
dstport 4789 tos inherit ttl 100
# Test with VLAN-aware bridge.
- ip link add name br1 up type bridge vlan_filtering 1 mcast_snooping 0
+ ip link add name br1 type bridge vlan_filtering 1 mcast_snooping 0
+ ip link set dev br1 addrgenmode none
+ ip link set dev br1 up
ip link set dev vx1 master br1
@@ -142,8 +150,12 @@ bridge_create_test()
{
RET=0
- ip link add name br1 up type bridge vlan_filtering 1
- ip link add name br2 up type bridge vlan_filtering 1
+ ip link add name br1 type bridge vlan_filtering 1
+ ip link set dev br1 addrgenmode none
+ ip link set dev br1 up
+ ip link add name br2 type bridge vlan_filtering 1
+ ip link set dev br2 addrgenmode none
+ ip link set dev br2 up
ip link set dev $swp1 master br1
check_err $?
diff --git a/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1d.sh b/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1d.sh
index df2b09966886..7d7f862c809c 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1d.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1d.sh
@@ -15,10 +15,9 @@
# +----------------|--+ +--|-----------------+
# | |
# +----------------|-------------------------|-----------------+
-# | SW | | |
+# | SW $swp1 + + $swp2 |
+# | | | |
# | +--------------|-------------------------|---------------+ |
-# | | $swp1 + + $swp2 | |
-# | | | | | |
# | | $swp1.10 + + $swp2.10 | |
# | | | |
# | | br0 | |
diff --git a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh
index e00435753008..e5589e2fca85 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh
@@ -165,6 +165,7 @@ mirror_gre_setup_prepare()
simple_if_init $h3
ip link add name br1 type bridge vlan_filtering 1
+ ip link set dev br1 addrgenmode none
ip link set dev br1 up
ip link set dev $swp1 master br1
diff --git a/tools/testing/selftests/drivers/net/mlxsw/one_armed_router.sh b/tools/testing/selftests/drivers/net/mlxsw/one_armed_router.sh
index f02d83e94576..fca0e1e642c6 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/one_armed_router.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/one_armed_router.sh
@@ -83,7 +83,8 @@ h2_destroy()
switch_create()
{
- ip link add name br0 type bridge mcast_snooping 0
+ ip link add name br0 address $(mac_get $swp1) \
+ type bridge mcast_snooping 0
ip link set dev br0 up
ip link set dev $swp1 master br0
diff --git a/tools/testing/selftests/drivers/net/mlxsw/q_in_q_veto.sh b/tools/testing/selftests/drivers/net/mlxsw/q_in_q_veto.sh
index 7edaed8eb86a..00d55b0e98c1 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/q_in_q_veto.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/q_in_q_veto.sh
@@ -48,6 +48,7 @@ create_vlan_upper_on_top_of_bridge()
ip link add dev br0 type bridge vlan_filtering 1 \
vlan_protocol $bridge_proto vlan_default_pvid 0 mcast_snooping 0
+ ip link set dev br0 addrgenmode none
ip link set dev br0 up
ip link set dev $swp1 master br0
@@ -88,6 +89,7 @@ create_8021ad_vlan_upper_on_top_bridge_port()
ip link add dev br0 type bridge vlan_filtering 1 \
vlan_default_pvid 0 mcast_snooping 0
+ ip link set dev br0 addrgenmode none
ip link set dev $swp1 master br0
ip link set dev br0 up
@@ -155,6 +157,7 @@ create_vlan_upper_on_top_front_panel_enslaved_to_8021ad_bridge()
ip link add dev br0 type bridge vlan_filtering 1 \
vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0
+ ip link set dev br0 addrgenmode none
ip link set dev br0 up
ip link set dev $swp1 master br0
@@ -177,6 +180,7 @@ create_vlan_upper_on_top_lag_enslaved_to_8021ad_bridge()
ip link add dev br0 type bridge vlan_filtering 1 \
vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0
+ ip link set dev br0 addrgenmode none
ip link set dev br0 up
ip link add name bond1 type bond mode 802.3ad
@@ -203,6 +207,7 @@ enslave_front_panel_with_vlan_upper_to_8021ad_bridge()
ip link add dev br0 type bridge vlan_filtering 1 \
vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0
+ ip link set dev br0 addrgenmode none
ip link set dev br0 up
ip link add name $swp1.100 link $swp1 type vlan id 100
@@ -225,6 +230,7 @@ enslave_lag_with_vlan_upper_to_8021ad_bridge()
ip link add dev br0 type bridge vlan_filtering 1 \
vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0
+ ip link set dev br0 addrgenmode none
ip link set dev br0 up
ip link add name bond1 type bond mode 802.3ad
@@ -252,6 +258,7 @@ add_ip_address_to_8021ad_bridge()
ip link add dev br0 type bridge vlan_filtering 1 \
vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0
+ ip link set dev br0 addrgenmode none
ip link set dev br0 up
ip link set dev $swp1 master br0
@@ -273,6 +280,7 @@ switch_bridge_protocol_from_8021q_to_8021ad()
ip link add dev br0 type bridge vlan_filtering 1 \
vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0
+ ip link set dev br0 addrgenmode none
ip link set dev br0 up
ip link set dev $swp1 master br0
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh
index 87c41f5727c9..914c63d6318a 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh
@@ -65,6 +65,7 @@ h2_destroy()
switch_create()
{
ip link add name br1 type bridge vlan_filtering 1
+ ip link set dev br1 addrgenmode none
ip link set dev br1 up
ip link set dev $swp1 master br1
ip link set dev $swp1 up
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh
index 690d8daa71b4..fee74f215cec 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh
@@ -138,11 +138,15 @@ switch_create()
vlan_create $swp3 111
vlan_create $swp3 222
- ip link add name br111 up type bridge vlan_filtering 0
+ ip link add name br111 type bridge vlan_filtering 0
+ ip link set dev br111 addrgenmode none
+ ip link set dev br111 up
ip link set dev $swp1.111 master br111
ip link set dev $swp3.111 master br111
- ip link add name br222 up type bridge vlan_filtering 0
+ ip link add name br222 type bridge vlan_filtering 0
+ ip link set dev br222 addrgenmode none
+ ip link set dev br222 up
ip link set dev $swp2.222 master br222
ip link set dev $swp3.222 master br222
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
index c8e55fa91660..6d892de43fa8 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
@@ -135,11 +135,13 @@ switch_create()
prio bands 8 priomap 7 7 7 7 7 7 7 7
ip link add name br1 type bridge vlan_filtering 0
+ ip link set dev br1 addrgenmode none
ip link set dev br1 up
ip link set dev $swp1 master br1
ip link set dev $swp3 master br1
ip link add name br111 type bridge vlan_filtering 0
+ ip link set dev br111 addrgenmode none
ip link set dev br111 up
ip link set dev $swp2.111 master br111
ip link set dev $swp3.111 master br111
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh
index 7d9e73a43a49..0c47faff9274 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh
@@ -98,12 +98,12 @@ sb_occ_etc_check()
port_pool_test()
{
- local exp_max_occ=288
+ local exp_max_occ=$(devlink_cell_size_get)
local max_occ
devlink sb occupancy clearmax $DEVLINK_DEV
- $MZ $h1 -c 1 -p 160 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \
+ $MZ $h1 -c 1 -p 10 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \
-t ip -q
devlink sb occupancy snapshot $DEVLINK_DEV
@@ -126,12 +126,12 @@ port_pool_test()
port_tc_ip_test()
{
- local exp_max_occ=288
+ local exp_max_occ=$(devlink_cell_size_get)
local max_occ
devlink sb occupancy clearmax $DEVLINK_DEV
- $MZ $h1 -c 1 -p 160 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \
+ $MZ $h1 -c 1 -p 10 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \
-t ip -q
devlink sb occupancy snapshot $DEVLINK_DEV
@@ -154,16 +154,12 @@ port_tc_ip_test()
port_tc_arp_test()
{
- local exp_max_occ=96
+ local exp_max_occ=$(devlink_cell_size_get)
local max_occ
- if [[ $MLXSW_CHIP != "mlxsw_spectrum" ]]; then
- exp_max_occ=144
- fi
-
devlink sb occupancy clearmax $DEVLINK_DEV
- $MZ $h1 -c 1 -p 160 -a $h1mac -A 192.0.1.1 -t arp -q
+ $MZ $h1 -c 1 -p 10 -a $h1mac -A 192.0.1.1 -t arp -q
devlink sb occupancy snapshot $DEVLINK_DEV
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/q_in_vni_veto.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/q_in_vni_veto.sh
index f0443b1b05b9..60753d46a2d4 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/q_in_vni_veto.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/q_in_vni_veto.sh
@@ -34,6 +34,7 @@ create_vxlan_on_top_of_8021ad_bridge()
ip link add dev br0 type bridge vlan_filtering 1 vlan_protocol 802.1ad \
vlan_default_pvid 0 mcast_snooping 0
+ ip link set dev br0 addrgenmode none
ip link set dev br0 up
ip link add name vx100 type vxlan id 1000 local 192.0.2.17 dstport \
diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh
index 99a332b712f0..4687b0a7dffb 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh
@@ -444,8 +444,12 @@ offload_indication_setup_create()
{
# Create a simple setup with two bridges, each with a VxLAN device
# and one local port
- ip link add name br0 up type bridge mcast_snooping 0
- ip link add name br1 up type bridge mcast_snooping 0
+ ip link add name br0 type bridge mcast_snooping 0
+ ip link set dev br0 addrgenmode none
+ ip link set dev br0 up
+ ip link add name br1 type bridge mcast_snooping 0
+ ip link set dev br1 addrgenmode none
+ ip link set dev br1 up
ip link set dev $swp1 master br0
ip link set dev $swp2 master br1
@@ -646,8 +650,12 @@ offload_indication_decap_route_test()
RET=0
- ip link add name br0 up type bridge mcast_snooping 0
- ip link add name br1 up type bridge mcast_snooping 0
+ ip link add name br0 type bridge mcast_snooping 0
+ ip link set dev br0 addrgenmode none
+ ip link set dev br0 up
+ ip link add name br1 type bridge mcast_snooping 0
+ ip link set dev br1 addrgenmode none
+ ip link set dev br1 up
ip link set dev $swp1 master br0
ip link set dev $swp2 master br1
ip link set dev vxlan0 master br0
@@ -780,7 +788,9 @@ __offload_indication_join_vxlan_first()
offload_indication_join_vxlan_first()
{
- ip link add dev br0 up type bridge mcast_snooping 0
+ ip link add dev br0 type bridge mcast_snooping 0
+ ip link set dev br0 addrgenmode none
+ ip link set dev br0 up
ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
@@ -815,7 +825,9 @@ __offload_indication_join_vxlan_last()
offload_indication_join_vxlan_last()
{
- ip link add dev br0 up type bridge mcast_snooping 0
+ ip link add dev br0 type bridge mcast_snooping 0
+ ip link set dev br0 addrgenmode none
+ ip link set dev br0 up
ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
@@ -842,6 +854,7 @@ sanitization_vlan_aware_test()
RET=0
ip link add dev br0 type bridge mcast_snooping 0 vlan_filtering 1
+ ip link set dev br0 addrgenmode none
ip link add name vxlan10 up master br0 type vxlan id 10 nolearning \
$UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
@@ -915,8 +928,10 @@ offload_indication_vlan_aware_setup_create()
{
# Create a simple setup with two VxLAN devices and a single VLAN-aware
# bridge
- ip link add name br0 up type bridge mcast_snooping 0 vlan_filtering 1 \
+ ip link add name br0 type bridge mcast_snooping 0 vlan_filtering 1 \
vlan_default_pvid 0
+ ip link set dev br0 addrgenmode none
+ ip link set dev br0 up
ip link set dev $swp1 master br0
@@ -1060,8 +1075,10 @@ offload_indication_vlan_aware_decap_route_test()
offload_indication_vlan_aware_join_vxlan_first()
{
- ip link add dev br0 up type bridge mcast_snooping 0 \
+ ip link add dev br0 type bridge mcast_snooping 0 \
vlan_filtering 1 vlan_default_pvid 1
+ ip link set dev br0 addrgenmode none
+ ip link set dev br0 up
ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
@@ -1073,8 +1090,10 @@ offload_indication_vlan_aware_join_vxlan_first()
offload_indication_vlan_aware_join_vxlan_last()
{
- ip link add dev br0 up type bridge mcast_snooping 0 \
+ ip link add dev br0 type bridge mcast_snooping 0 \
vlan_filtering 1 vlan_default_pvid 1
+ ip link set dev br0 addrgenmode none
+ ip link set dev br0 up
ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
@@ -1091,8 +1110,10 @@ offload_indication_vlan_aware_l3vni_test()
RET=0
sysctl_set net.ipv6.conf.default.disable_ipv6 1
- ip link add dev br0 up type bridge mcast_snooping 0 \
+ ip link add dev br0 type bridge mcast_snooping 0 \
vlan_filtering 1 vlan_default_pvid 0
+ ip link set dev br0 addrgenmode none
+ ip link set dev br0 up
ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
diff --git a/tools/testing/selftests/ftrace/ftracetest b/tools/testing/selftests/ftrace/ftracetest
index 2506621e75df..d68264a5f3f0 100755
--- a/tools/testing/selftests/ftrace/ftracetest
+++ b/tools/testing/selftests/ftrace/ftracetest
@@ -31,6 +31,9 @@ err_ret=1
# kselftest skip code is 4
err_skip=4
+# umount required
+UMOUNT_DIR=""
+
# cgroup RT scheduling prevents chrt commands from succeeding, which
# induces failures in test wakeup tests. Disable for the duration of
# the tests.
@@ -45,6 +48,9 @@ setup() {
cleanup() {
echo $sched_rt_runtime_orig > $sched_rt_runtime
+ if [ -n "${UMOUNT_DIR}" ]; then
+ umount ${UMOUNT_DIR} ||:
+ fi
}
errexit() { # message
@@ -160,11 +166,13 @@ if [ -z "$TRACING_DIR" ]; then
mount -t tracefs nodev /sys/kernel/tracing ||
errexit "Failed to mount /sys/kernel/tracing"
TRACING_DIR="/sys/kernel/tracing"
+ UMOUNT_DIR=${TRACING_DIR}
# If debugfs exists, then so does /sys/kernel/debug
elif [ -d "/sys/kernel/debug" ]; then
mount -t debugfs nodev /sys/kernel/debug ||
errexit "Failed to mount /sys/kernel/debug"
TRACING_DIR="/sys/kernel/debug/tracing"
+ UMOUNT_DIR=${TRACING_DIR}
else
err_ret=$err_skip
errexit "debugfs and tracefs are not configured in this kernel"
@@ -301,7 +309,7 @@ ktaptest() { # result comment
comment="# $comment"
fi
- echo $CASENO $result $INSTANCE$CASENAME $comment
+ echo $result $CASENO $INSTANCE$CASENAME $comment
}
eval_result() { # sigval
diff --git a/tools/testing/selftests/ftrace/test.d/00basic/snapshot1.tc b/tools/testing/selftests/ftrace/test.d/00basic/snapshot1.tc
new file mode 100644
index 000000000000..63b76cf2a360
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/00basic/snapshot1.tc
@@ -0,0 +1,31 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Snapshot and tracing_cpumask
+# requires: trace_marker tracing_cpumask snapshot
+# flags: instance
+
+# This testcase is constrived to reproduce a problem that the cpu buffers
+# become unavailable which is due to 'record_disabled' of array_buffer and
+# max_buffer being messed up.
+
+# Store origin cpumask
+ORIG_CPUMASK=`cat tracing_cpumask`
+
+# Stop tracing all cpu
+echo 0 > tracing_cpumask
+
+# Take a snapshot of the main buffer
+echo 1 > snapshot
+
+# Restore origin cpumask, note that there should be some cpus being traced
+echo ${ORIG_CPUMASK} > tracing_cpumask
+
+# Set tracing on
+echo 1 > tracing_on
+
+# Write a log into buffer
+echo "test input 1" > trace_marker
+
+# Ensure the log writed so that cpu buffers are still available
+grep -q "test input 1" trace
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_btfarg.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_btfarg.tc
new file mode 100644
index 000000000000..f34b14ef9781
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_btfarg.tc
@@ -0,0 +1,58 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Generic dynamic event - add/remove probes with BTF arguments
+# requires: dynamic_events "<argname>":README
+
+KPROBES=
+FPROBES=
+
+if grep -qF "p[:[<group>/][<event>]] <place> [<args>]" README ; then
+ KPROBES=yes
+fi
+if grep -qF "f[:[<group>/][<event>]] <func-name>[%return] [<args>]" README ; then
+ FPROBES=yes
+fi
+
+if [ -z "$KPROBES" -a -z "$FPROBES" ] ; then
+ exit_unsupported
+fi
+
+echo 0 > events/enable
+echo > dynamic_events
+
+TP=kfree
+
+if [ "$FPROBES" ] ; then
+echo "f:fpevent $TP object" >> dynamic_events
+echo "t:tpevent $TP ptr" >> dynamic_events
+
+grep -q "fpevent.*object=object" dynamic_events
+grep -q "tpevent.*ptr=ptr" dynamic_events
+
+echo > dynamic_events
+
+echo "f:fpevent $TP "'$arg1' >> dynamic_events
+grep -q "fpevent.*object=object" dynamic_events
+echo > dynamic_events
+
+echo "f:fpevent $TP "'$arg*' >> dynamic_events
+echo "t:tpevent $TP "'$arg*' >> dynamic_events
+
+grep -q "fpevent.*object=object" dynamic_events
+grep -q "tpevent.*ptr=ptr" dynamic_events
+! grep -q "tpevent.*_data" dynamic_events
+fi
+
+echo > dynamic_events
+
+if [ "$KPROBES" ] ; then
+echo "p:kpevent $TP object" >> dynamic_events
+grep -q "kpevent.*object=object" dynamic_events
+
+echo > dynamic_events
+
+echo "p:kpevent $TP "'$arg*' >> dynamic_events
+grep -q "kpevent.*object=object" dynamic_events
+fi
+
+clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc
new file mode 100644
index 000000000000..dc25bcf4f9e2
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc
@@ -0,0 +1,26 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Generic dynamic event - add/remove fprobe events
+# requires: dynamic_events "f[:[<group>/][<event>]] <func-name>[%return] [<args>]":README
+
+echo 0 > events/enable
+echo > dynamic_events
+
+PLACE=$FUNCTION_FORK
+
+echo "f:myevent1 $PLACE" >> dynamic_events
+echo "f:myevent2 $PLACE%return" >> dynamic_events
+
+grep -q myevent1 dynamic_events
+grep -q myevent2 dynamic_events
+test -d events/fprobes/myevent1
+test -d events/fprobes/myevent2
+
+echo "-:myevent2" >> dynamic_events
+
+grep -q myevent1 dynamic_events
+! grep -q myevent2 dynamic_events
+
+echo > dynamic_events
+
+clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_tprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_tprobe.tc
new file mode 100644
index 000000000000..155792eaeee5
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_tprobe.tc
@@ -0,0 +1,27 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Generic dynamic event - add/remove tracepoint probe events
+# requires: dynamic_events "t[:[<group>/][<event>]] <tracepoint> [<args>]":README
+
+echo 0 > events/enable
+echo > dynamic_events
+
+TRACEPOINT1=kmem_cache_alloc
+TRACEPOINT2=kmem_cache_free
+
+echo "t:myevent1 $TRACEPOINT1" >> dynamic_events
+echo "t:myevent2 $TRACEPOINT2" >> dynamic_events
+
+grep -q myevent1 dynamic_events
+grep -q myevent2 dynamic_events
+test -d events/tracepoints/myevent1
+test -d events/tracepoints/myevent2
+
+echo "-:myevent2" >> dynamic_events
+
+grep -q myevent1 dynamic_events
+! grep -q myevent2 dynamic_events
+
+echo > dynamic_events
+
+clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc
new file mode 100644
index 000000000000..812f5b3f6055
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc
@@ -0,0 +1,111 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Fprobe event parser error log check
+# requires: dynamic_events "f[:[<group>/][<event>]] <func-name>[%return] [<args>]":README
+
+check_error() { # command-with-error-pos-by-^
+ ftrace_errlog_check 'trace_fprobe' "$1" 'dynamic_events'
+}
+
+case `uname -m` in
+x86_64|i[3456]86)
+ REG=%ax ;;
+aarch64)
+ REG=%x0 ;;
+*)
+ REG=%r0 ;;
+esac
+
+check_error 'f^100 vfs_read' # MAXACT_NO_KPROBE
+check_error 'f^1a111 vfs_read' # BAD_MAXACT
+check_error 'f^100000 vfs_read' # MAXACT_TOO_BIG
+
+check_error 'f ^non_exist_func' # BAD_PROBE_ADDR (enoent)
+check_error 'f ^vfs_read+10' # BAD_PROBE_ADDR
+check_error 'f:^/bar vfs_read' # NO_GROUP_NAME
+check_error 'f:^12345678901234567890123456789012345678901234567890123456789012345/bar vfs_read' # GROUP_TOO_LONG
+
+check_error 'f:^foo.1/bar vfs_read' # BAD_GROUP_NAME
+check_error 'f:^ vfs_read' # NO_EVENT_NAME
+check_error 'f:foo/^12345678901234567890123456789012345678901234567890123456789012345 vfs_read' # EVENT_TOO_LONG
+check_error 'f:foo/^bar.1 vfs_read' # BAD_EVENT_NAME
+
+check_error 'f vfs_read ^$retval' # RETVAL_ON_PROBE
+check_error 'f vfs_read ^$stack10000' # BAD_STACK_NUM
+
+check_error 'f vfs_read ^$arg10000' # BAD_ARG_NUM
+
+check_error 'f vfs_read ^$none_var' # BAD_VAR
+check_error 'f vfs_read ^'$REG # BAD_VAR
+
+check_error 'f vfs_read ^@12345678abcde' # BAD_MEM_ADDR
+check_error 'f vfs_read ^@+10' # FILE_ON_KPROBE
+
+grep -q "imm-value" README && \
+check_error 'f vfs_read arg1=\^x' # BAD_IMM
+grep -q "imm-string" README && \
+check_error 'f vfs_read arg1=\"abcd^' # IMMSTR_NO_CLOSE
+
+check_error 'f vfs_read ^+0@0)' # DEREF_NEED_BRACE
+check_error 'f vfs_read ^+0ab1(@0)' # BAD_DEREF_OFFS
+check_error 'f vfs_read +0(+0(@0^)' # DEREF_OPEN_BRACE
+
+if grep -A1 "fetcharg:" README | grep -q '\$comm' ; then
+check_error 'f vfs_read +0(^$comm)' # COMM_CANT_DEREF
+fi
+
+check_error 'f vfs_read ^&1' # BAD_FETCH_ARG
+
+
+# We've introduced this limitation with array support
+if grep -q ' <type>\\\[<array-size>\\\]' README; then
+check_error 'f vfs_read +0(^+0(+0(+0(+0(+0(+0(+0(+0(+0(+0(+0(+0(+0(@0))))))))))))))' # TOO_MANY_OPS?
+check_error 'f vfs_read +0(@11):u8[10^' # ARRAY_NO_CLOSE
+check_error 'f vfs_read +0(@11):u8[10]^a' # BAD_ARRAY_SUFFIX
+check_error 'f vfs_read +0(@11):u8[^10a]' # BAD_ARRAY_NUM
+check_error 'f vfs_read +0(@11):u8[^256]' # ARRAY_TOO_BIG
+fi
+
+check_error 'f vfs_read @11:^unknown_type' # BAD_TYPE
+check_error 'f vfs_read $stack0:^string' # BAD_STRING
+check_error 'f vfs_read @11:^b10@a/16' # BAD_BITFIELD
+
+check_error 'f vfs_read ^arg123456789012345678901234567890=@11' # ARG_NAME_TOO_LOG
+check_error 'f vfs_read ^=@11' # NO_ARG_NAME
+check_error 'f vfs_read ^var.1=@11' # BAD_ARG_NAME
+check_error 'f vfs_read var1=@11 ^var1=@12' # USED_ARG_NAME
+check_error 'f vfs_read ^+1234567(+1234567(+1234567(+1234567(+1234567(+1234567(@1234))))))' # ARG_TOO_LONG
+check_error 'f vfs_read arg1=^' # NO_ARG_BODY
+
+
+# multiprobe errors
+if grep -q "Create/append/" README && grep -q "imm-value" README; then
+echo "f:fprobes/testevent $FUNCTION_FORK" > dynamic_events
+check_error '^f:fprobes/testevent do_exit%return' # DIFF_PROBE_TYPE
+
+# Explicitly use printf "%s" to not interpret \1
+printf "%s" "f:fprobes/testevent $FUNCTION_FORK abcd=\\1" > dynamic_events
+check_error "f:fprobes/testevent $FUNCTION_FORK ^bcd=\\1" # DIFF_ARG_TYPE
+check_error "f:fprobes/testevent $FUNCTION_FORK ^abcd=\\1:u8" # DIFF_ARG_TYPE
+check_error "f:fprobes/testevent $FUNCTION_FORK ^abcd=\\\"foo\"" # DIFF_ARG_TYPE
+check_error "^f:fprobes/testevent $FUNCTION_FORK abcd=\\1" # SAME_PROBE
+fi
+
+# %return suffix errors
+check_error 'f vfs_read^%hoge' # BAD_ADDR_SUFFIX
+
+# BTF arguments errors
+if grep -q "<argname>" README; then
+check_error 'f vfs_read args=^$arg*' # BAD_VAR_ARGS
+check_error 'f vfs_read +0(^$arg*)' # BAD_VAR_ARGS
+check_error 'f vfs_read $arg* ^$arg*' # DOUBLE_ARGS
+check_error 'f vfs_read%return ^$arg*' # NOFENTRY_ARGS
+check_error 'f vfs_read ^hoge' # NO_BTFARG
+check_error 'f kfree ^$arg10' # NO_BTFARG (exceed the number of parameters)
+check_error 'f kfree%return ^$retval' # NO_RETVAL
+else
+check_error 'f vfs_read ^$arg*' # NOSUP_BTFARG
+check_error 't kfree ^$arg*' # NOSUP_BTFARG
+fi
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/tprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/dynevent/tprobe_syntax_errors.tc
new file mode 100644
index 000000000000..da117b8f1d12
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/tprobe_syntax_errors.tc
@@ -0,0 +1,82 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Tracepoint probe event parser error log check
+# requires: dynamic_events "t[:[<group>/][<event>]] <tracepoint> [<args>]":README
+
+check_error() { # command-with-error-pos-by-^
+ ftrace_errlog_check 'trace_fprobe' "$1" 'dynamic_events'
+}
+
+check_error 't^100 kfree' # BAD_MAXACT_TYPE
+
+check_error 't ^non_exist_tracepoint' # NO_TRACEPOINT
+check_error 't:^/bar kfree' # NO_GROUP_NAME
+check_error 't:^12345678901234567890123456789012345678901234567890123456789012345/bar kfree' # GROUP_TOO_LONG
+
+check_error 't:^foo.1/bar kfree' # BAD_GROUP_NAME
+check_error 't:^ kfree' # NO_EVENT_NAME
+check_error 't:foo/^12345678901234567890123456789012345678901234567890123456789012345 kfree' # EVENT_TOO_LONG
+check_error 't:foo/^bar.1 kfree' # BAD_EVENT_NAME
+
+check_error 't kfree ^$retval' # RETVAL_ON_PROBE
+check_error 't kfree ^$stack10000' # BAD_STACK_NUM
+
+check_error 't kfree ^$arg10000' # BAD_ARG_NUM
+
+check_error 't kfree ^$none_var' # BAD_VAR
+check_error 't kfree ^%rax' # BAD_VAR
+
+check_error 't kfree ^@12345678abcde' # BAD_MEM_ADDR
+check_error 't kfree ^@+10' # FILE_ON_KPROBE
+
+grep -q "imm-value" README && \
+check_error 't kfree arg1=\^x' # BAD_IMM
+grep -q "imm-string" README && \
+check_error 't kfree arg1=\"abcd^' # IMMSTR_NO_CLOSE
+
+check_error 't kfree ^+0@0)' # DEREF_NEED_BRACE
+check_error 't kfree ^+0ab1(@0)' # BAD_DEREF_OFFS
+check_error 't kfree +0(+0(@0^)' # DEREF_OPEN_BRACE
+
+if grep -A1 "fetcharg:" README | grep -q '\$comm' ; then
+check_error 't kfree +0(^$comm)' # COMM_CANT_DEREF
+fi
+
+check_error 't kfree ^&1' # BAD_FETCH_ARG
+
+
+# We've introduced this limitation with array support
+if grep -q ' <type>\\\[<array-size>\\\]' README; then
+check_error 't kfree +0(^+0(+0(+0(+0(+0(+0(+0(+0(+0(+0(+0(+0(+0(@0))))))))))))))' # TOO_MANY_OPS?
+check_error 't kfree +0(@11):u8[10^' # ARRAY_NO_CLOSE
+check_error 't kfree +0(@11):u8[10]^a' # BAD_ARRAY_SUFFIX
+check_error 't kfree +0(@11):u8[^10a]' # BAD_ARRAY_NUM
+check_error 't kfree +0(@11):u8[^256]' # ARRAY_TOO_BIG
+fi
+
+check_error 't kfree @11:^unknown_type' # BAD_TYPE
+check_error 't kfree $stack0:^string' # BAD_STRING
+check_error 't kfree @11:^b10@a/16' # BAD_BITFIELD
+
+check_error 't kfree ^arg123456789012345678901234567890=@11' # ARG_NAME_TOO_LOG
+check_error 't kfree ^=@11' # NO_ARG_NAME
+check_error 't kfree ^var.1=@11' # BAD_ARG_NAME
+check_error 't kfree var1=@11 ^var1=@12' # USED_ARG_NAME
+check_error 't kfree ^+1234567(+1234567(+1234567(+1234567(+1234567(+1234567(@1234))))))' # ARG_TOO_LONG
+check_error 't kfree arg1=^' # NO_ARG_BODY
+
+
+# multiprobe errors
+if grep -q "Create/append/" README && grep -q "imm-value" README; then
+echo "t:tracepoint/testevent kfree" > dynamic_events
+check_error '^f:tracepoint/testevent kfree' # DIFF_PROBE_TYPE
+
+# Explicitly use printf "%s" to not interpret \1
+printf "%s" "t:tracepoints/testevent kfree abcd=\\1" > dynamic_events
+check_error "t:tracepoints/testevent kfree ^bcd=\\1" # DIFF_ARG_TYPE
+check_error "t:tracepoints/testevent kfree ^abcd=\\1:u8" # DIFF_ARG_TYPE
+check_error "t:tracepoints/testevent kfree ^abcd=\\\"foo\"" # DIFF_ARG_TYPE
+check_error "^t:tracepoints/testevent kfree abcd=\\1" # SAME_PROBE
+fi
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-retval.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-retval.tc
new file mode 100644
index 000000000000..e34c0bdef3ed
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-retval.tc
@@ -0,0 +1,44 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: ftrace - function graph print function return value
+# requires: options/funcgraph-retval options/funcgraph-retval-hex function_graph:tracer
+
+# Make sure that funcgraph-retval works
+
+fail() { # msg
+ echo $1
+ exit_fail
+}
+
+disable_tracing
+clear_trace
+
+# get self PID, can not use $$, because it is PPID
+read PID _ < /proc/self/stat
+
+[ -f set_ftrace_filter ] && echo proc_reg_write > set_ftrace_filter
+[ -f set_ftrace_pid ] && echo ${PID} > set_ftrace_pid
+echo function_graph > current_tracer
+echo 1 > options/funcgraph-retval
+
+set +e
+enable_tracing
+echo > /proc/interrupts
+disable_tracing
+set -e
+
+: "Test printing the error code in signed decimal format"
+echo 0 > options/funcgraph-retval-hex
+count=`cat trace | grep 'proc_reg_write' | grep '= -5' | wc -l`
+if [ $count -eq 0 ]; then
+ fail "Return value can not be printed in signed decimal format"
+fi
+
+: "Test printing the error code in hexadecimal format"
+echo 1 > options/funcgraph-retval-hex
+count=`cat trace | grep 'proc_reg_write' | grep 'fffffffb' | wc -l`
+if [ $count -eq 0 ]; then
+ fail "Return value can not be printed in hexadecimal format"
+fi
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_insn_boundary.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_insn_boundary.tc
new file mode 100644
index 000000000000..4f7cc318f331
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_insn_boundary.tc
@@ -0,0 +1,19 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-or-later
+# Copyright (C) 2023 Akanksha J N, IBM corporation
+# description: Register multiple kprobe events in a function
+# requires: kprobe_events
+
+for i in `seq 0 255`; do
+ echo p $FUNCTION_FORK+${i} >> kprobe_events || continue
+done
+
+cat kprobe_events >> $testlog
+
+echo 1 > events/kprobes/enable
+( echo "forked" )
+echo 0 > events/kprobes/enable
+echo > kprobe_events
+echo "Waiting for unoptimizing & freeing"
+sleep 5
+echo "Done"
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_opt_types.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_opt_types.tc
new file mode 100644
index 000000000000..9f5d99328086
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_opt_types.tc
@@ -0,0 +1,34 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-or-later
+# Copyright (C) 2023 Akanksha J N, IBM corporation
+# description: Register/unregister optimized probe
+# requires: kprobe_events
+
+case `uname -m` in
+x86_64)
+;;
+arm*)
+;;
+ppc*)
+;;
+*)
+ echo "Please implement other architecture here"
+ exit_unsupported
+esac
+
+DEFAULT=$(cat /proc/sys/debug/kprobes-optimization)
+echo 1 > /proc/sys/debug/kprobes-optimization
+for i in `seq 0 255`; do
+ echo "p:testprobe $FUNCTION_FORK+${i}" > kprobe_events || continue
+ echo 1 > events/kprobes/enable || continue
+ (echo "forked")
+ PROBE=$(grep $FUNCTION_FORK /sys/kernel/debug/kprobes/list)
+ echo 0 > events/kprobes/enable
+ echo > kprobe_events
+ if echo $PROBE | grep -q OPTIMIZED; then
+ echo "$DEFAULT" > /proc/sys/debug/kprobes-optimization
+ exit_pass
+ fi
+done
+echo "$DEFAULT" > /proc/sys/debug/kprobes-optimization
+exit_unresolved
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
index 9e85d3019ff0..65fbb26fd58c 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
@@ -8,7 +8,7 @@ check_error() { # command-with-error-pos-by-^
}
if grep -q 'r\[maxactive\]' README; then
-check_error 'p^100 vfs_read' # MAXACT_NO_KPROBE
+check_error 'p^100 vfs_read' # BAD_MAXACT_TYPE
check_error 'r^1a111 vfs_read' # BAD_MAXACT
check_error 'r^100000 vfs_read' # MAXACT_TOO_BIG
fi
@@ -103,4 +103,18 @@ check_error 'p vfs_read^%hoge' # BAD_ADDR_SUFFIX
check_error 'p ^vfs_read+10%return' # BAD_RETPROBE
fi
+# BTF arguments errors
+if grep -q "<argname>" README; then
+check_error 'p vfs_read args=^$arg*' # BAD_VAR_ARGS
+check_error 'p vfs_read +0(^$arg*)' # BAD_VAR_ARGS
+check_error 'p vfs_read $arg* ^$arg*' # DOUBLE_ARGS
+check_error 'r vfs_read ^$arg*' # NOFENTRY_ARGS
+check_error 'p vfs_read+8 ^$arg*' # NOFENTRY_ARGS
+check_error 'p vfs_read ^hoge' # NO_BTFARG
+check_error 'p kfree ^$arg10' # NO_BTFARG (exceed the number of parameters)
+check_error 'r kfree ^$retval' # NO_RETVAL
+else
+check_error 'p vfs_read ^$arg*' # NOSUP_BTFARG
+fi
+
exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-dynstring.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-dynstring.tc
index 213d890ed188..174376ddbc6c 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-dynstring.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-dynstring.tc
@@ -1,7 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: event trigger - test inter-event histogram trigger trace action with dynamic string param
-# requires: set_event synthetic_events events/sched/sched_process_exec/hist "char name[]' >> synthetic_events":README ping:program
+# requires: set_event synthetic_events events/sched/sched_process_exec/hist "' >> synthetic_events":README ping:program
fail() { #msg
echo $1
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc
index 955e3ceea44b..b927ee54c02d 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc
@@ -1,7 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: event trigger - test synthetic_events syntax parser errors
-# requires: synthetic_events error_log "char name[]' >> synthetic_events":README
+# requires: synthetic_events error_log "' >> synthetic_events":README
check_error() { # command-with-error-pos-by-^
ftrace_errlog_check 'synthetic_events' "$1" 'synthetic_events'
diff --git a/tools/testing/selftests/futex/functional/futex_wait_timeout.c b/tools/testing/selftests/futex/functional/futex_wait_timeout.c
index 3651ce17beeb..d183f878360b 100644
--- a/tools/testing/selftests/futex/functional/futex_wait_timeout.c
+++ b/tools/testing/selftests/futex/functional/futex_wait_timeout.c
@@ -24,6 +24,7 @@
static long timeout_ns = 100000; /* 100us default timeout */
static futex_t futex_pi;
+static pthread_barrier_t barrier;
void usage(char *prog)
{
@@ -48,6 +49,8 @@ void *get_pi_lock(void *arg)
if (ret != 0)
error("futex_lock_pi failed\n", ret);
+ pthread_barrier_wait(&barrier);
+
/* Blocks forever */
ret = futex_wait(&lock, 0, NULL, 0);
error("futex_wait failed\n", ret);
@@ -130,6 +133,7 @@ int main(int argc, char *argv[])
basename(argv[0]));
ksft_print_msg("\tArguments: timeout=%ldns\n", timeout_ns);
+ pthread_barrier_init(&barrier, NULL, 2);
pthread_create(&thread, NULL, get_pi_lock, NULL);
/* initialize relative timeout */
@@ -163,6 +167,9 @@ int main(int argc, char *argv[])
res = futex_wait_requeue_pi(&f1, f1, &futex_pi, &to, 0);
test_timeout(res, &ret, "futex_wait_requeue_pi monotonic", ETIMEDOUT);
+ /* Wait until the other thread calls futex_lock_pi() */
+ pthread_barrier_wait(&barrier);
+ pthread_barrier_destroy(&barrier);
/*
* FUTEX_LOCK_PI with CLOCK_REALTIME
* Due to historical reasons, FUTEX_LOCK_PI supports only realtime
diff --git a/tools/testing/selftests/gpio/gpio-sim.sh b/tools/testing/selftests/gpio/gpio-sim.sh
index fa2ce2b9dd5f..6fb66a687f17 100755
--- a/tools/testing/selftests/gpio/gpio-sim.sh
+++ b/tools/testing/selftests/gpio/gpio-sim.sh
@@ -152,9 +152,9 @@ sysfs_set_pull() {
local PULL=$4
local DEVNAME=`configfs_dev_name $DEV`
local CHIPNAME=`configfs_chip_name $DEV $BANK`
- local SYSFSPATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio$OFFSET/pull"
+ local SYSFS_PATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio$OFFSET/pull"
- echo $PULL > $SYSFSPATH || fail "Unable to set line pull in sysfs"
+ echo $PULL > $SYSFS_PATH || fail "Unable to set line pull in sysfs"
}
# Load the gpio-sim module. This will pull in configfs if needed too.
diff --git a/tools/testing/selftests/hid/tests/test_wacom_generic.py b/tools/testing/selftests/hid/tests/test_wacom_generic.py
index b1eb2bc787fc..f92fe8e02c1b 100644
--- a/tools/testing/selftests/hid/tests/test_wacom_generic.py
+++ b/tools/testing/selftests/hid/tests/test_wacom_generic.py
@@ -31,6 +31,7 @@ from enum import Enum
from hidtools.hut import HUT
from hidtools.hid import HidUnit
from . import base
+from . import test_multitouch
import libevdev
import pytest
@@ -517,7 +518,7 @@ class BaseTest:
for usage in get_report_usages(report):
yield usage
- def assertName(self, uhdev):
+ def assertName(self, uhdev, type):
"""
Assert that the name is as we expect.
@@ -526,7 +527,7 @@ class BaseTest:
this assertion from the base class to work properly.
"""
evdev = uhdev.get_evdev()
- expected_name = uhdev.name + " Pen"
+ expected_name = uhdev.name + type
if "wacom" not in expected_name.lower():
expected_name = "Wacom " + expected_name
assert evdev.name == expected_name
@@ -549,6 +550,12 @@ class BaseTest:
usage_id("Generic Desktop", "Y"): PhysRange(
PhysRange.CENTIMETER, 5, 150
),
+ usage_id("Digitizers", "Width"): PhysRange(
+ PhysRange.CENTIMETER, 5, 150
+ ),
+ usage_id("Digitizers", "Height"): PhysRange(
+ PhysRange.CENTIMETER, 5, 150
+ ),
usage_id("Digitizers", "X Tilt"): PhysRange(PhysRange.DEGREE, 90, 180),
usage_id("Digitizers", "Y Tilt"): PhysRange(PhysRange.DEGREE, 90, 180),
usage_id("Digitizers", "Twist"): PhysRange(PhysRange.DEGREE, 358, 360),
@@ -603,7 +610,17 @@ class BaseTest:
pass
-class TestOpaqueTablet(BaseTest.TestTablet):
+class PenTabletTest(BaseTest.TestTablet):
+ def assertName(self, uhdev):
+ super().assertName(uhdev, " Pen")
+
+
+class TouchTabletTest(BaseTest.TestTablet):
+ def assertName(self, uhdev):
+ super().assertName(uhdev, " Finger")
+
+
+class TestOpaqueTablet(PenTabletTest):
def create_device(self):
return OpaqueTablet()
@@ -842,3 +859,64 @@ class TestPTHX60_Pen(TestOpaqueCTLTablet):
libevdev.InputEvent(libevdev.EV_KEY.BTN_0, 0),
],
)
+
+
+class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest):
+ def create_device(self):
+ return test_multitouch.Digitizer(
+ "DTH 2452",
+ rdesc="05 0d 09 04 a1 01 85 0c 95 01 75 08 15 00 26 ff 00 81 03 09 54 81 02 09 22 a1 02 05 0d 95 01 75 01 25 01 09 42 81 02 81 03 09 47 81 02 95 05 81 03 09 51 26 ff 00 75 10 95 01 81 02 35 00 65 11 55 0e 05 01 09 30 26 a0 44 46 96 14 81 42 09 31 26 9a 26 46 95 0b 81 42 05 0d 75 08 95 01 15 00 09 48 26 5f 00 46 7c 14 81 02 09 49 25 35 46 7d 0b 81 02 45 00 65 00 55 00 c0 05 0d 09 22 a1 02 05 0d 95 01 75 01 25 01 09 42 81 02 81 03 09 47 81 02 95 05 81 03 09 51 26 ff 00 75 10 95 01 81 02 35 00 65 11 55 0e 05 01 09 30 26 a0 44 46 96 14 81 42 09 31 26 9a 26 46 95 0b 81 42 05 0d 75 08 95 01 15 00 09 48 26 5f 00 46 7c 14 81 02 09 49 25 35 46 7d 0b 81 02 45 00 65 00 55 00 c0 05 0d 09 22 a1 02 05 0d 95 01 75 01 25 01 09 42 81 02 81 03 09 47 81 02 95 05 81 03 09 51 26 ff 00 75 10 95 01 81 02 35 00 65 11 55 0e 05 01 09 30 26 a0 44 46 96 14 81 42 09 31 26 9a 26 46 95 0b 81 42 05 0d 75 08 95 01 15 00 09 48 26 5f 00 46 7c 14 81 02 09 49 25 35 46 7d 0b 81 02 45 00 65 00 55 00 c0 05 0d 09 22 a1 02 05 0d 95 01 75 01 25 01 09 42 81 02 81 03 09 47 81 02 95 05 81 03 09 51 26 ff 00 75 10 95 01 81 02 35 00 65 11 55 0e 05 01 09 30 26 a0 44 46 96 14 81 42 09 31 26 9a 26 46 95 0b 81 42 05 0d 75 08 95 01 15 00 09 48 26 5f 00 46 7c 14 81 02 09 49 25 35 46 7d 0b 81 02 45 00 65 00 55 00 c0 05 0d 09 22 a1 02 05 0d 95 01 75 01 25 01 09 42 81 02 81 03 09 47 81 02 95 05 81 03 09 51 26 ff 00 75 10 95 01 81 02 35 00 65 11 55 0e 05 01 09 30 26 a0 44 46 96 14 81 42 09 31 26 9a 26 46 95 0b 81 42 05 0d 75 08 95 01 15 00 09 48 26 5f 00 46 7c 14 81 02 09 49 25 35 46 7d 0b 81 02 45 00 65 00 55 00 c0 05 0d 27 ff ff 00 00 75 10 95 01 09 56 81 02 75 08 95 0e 81 03 09 55 26 ff 00 75 08 b1 02 85 0a 06 00 ff 09 c5 96 00 01 b1 02 c0 06 00 ff 09 01 a1 01 09 01 85 13 15 00 26 ff 00 75 08 95 3f 81 02 06 00 ff 09 01 15 00 26 ff 00 75 08 95 3f 91 02 c0",
+ input_info=(0x3, 0x056A, 0x0383),
+ )
+
+ def test_contact_id_0(self):
+ """
+ Bring a finger in contact with the tablet, then hold it down and remove it.
+
+ Ensure that even with contact ID = 0 which is usually given as an invalid
+ touch event by most tablets with the exception of a few, that given the
+ confidence bit is set to 1 it should process it as a valid touch to cover
+ the few tablets using contact ID = 0 as a valid touch value.
+ """
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+
+ t0 = test_multitouch.Touch(0, 50, 100)
+ r = uhdev.event([t0])
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+
+ slot = self.get_slot(uhdev, t0, 0)
+
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 1) in events
+ assert evdev.slots[slot][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == 0
+ assert evdev.slots[slot][libevdev.EV_ABS.ABS_MT_POSITION_X] == 50
+ assert evdev.slots[slot][libevdev.EV_ABS.ABS_MT_POSITION_Y] == 100
+
+ t0.tipswitch = False
+ if uhdev.quirks is None or "VALID_IS_INRANGE" not in uhdev.quirks:
+ t0.inrange = False
+ r = uhdev.event([t0])
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 0) in events
+ assert evdev.slots[slot][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+
+ def test_confidence_false(self):
+ """
+ Bring a finger in contact with the tablet with confidence set to false.
+
+ Ensure that the confidence bit being set to false should not result in a touch event.
+ """
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+
+ t0 = test_multitouch.Touch(1, 50, 100)
+ t0.confidence = False
+ r = uhdev.event([t0])
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+
+ slot = self.get_slot(uhdev, t0, 0)
+
+ assert not events \ No newline at end of file
diff --git a/tools/testing/selftests/hid/vmtest.sh b/tools/testing/selftests/hid/vmtest.sh
index 681b906b4853..4da48bf6b328 100755
--- a/tools/testing/selftests/hid/vmtest.sh
+++ b/tools/testing/selftests/hid/vmtest.sh
@@ -79,6 +79,7 @@ recompile_kernel()
cd "${kernel_checkout}"
${make_command} olddefconfig
+ ${make_command} headers
${make_command}
}
diff --git a/tools/testing/selftests/kselftest/runner.sh b/tools/testing/selftests/kselftest/runner.sh
index 294619ade49f..70e0a465e30d 100644
--- a/tools/testing/selftests/kselftest/runner.sh
+++ b/tools/testing/selftests/kselftest/runner.sh
@@ -8,7 +8,8 @@ export logfile=/dev/stdout
export per_test_logging=
# Defaults for "settings" file fields:
-# "timeout" how many seconds to let each test run before failing.
+# "timeout" how many seconds to let each test run before running
+# over our soft timeout limit.
export kselftest_default_timeout=45
# There isn't a shell-agnostic way to find the path of a sourced file,
@@ -35,7 +36,8 @@ tap_timeout()
{
# Make sure tests will time out if utility is available.
if [ -x /usr/bin/timeout ] ; then
- /usr/bin/timeout --foreground "$kselftest_timeout" $1
+ /usr/bin/timeout --foreground "$kselftest_timeout" \
+ /usr/bin/timeout "$kselftest_timeout" $1
else
$1
fi
@@ -90,6 +92,14 @@ run_one()
done < "$settings"
fi
+ # Command line timeout overrides the settings file
+ if [ -n "$kselftest_override_timeout" ]; then
+ kselftest_timeout="$kselftest_override_timeout"
+ echo "# overriding timeout to $kselftest_timeout" >> "$logfile"
+ else
+ echo "# timeout set to $kselftest_timeout" >> "$logfile"
+ fi
+
TEST_HDR_MSG="selftests: $DIR: $BASENAME_TEST"
echo "# $TEST_HDR_MSG"
if [ ! -e "$TEST" ]; then
diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h
index 5fd49ad0c696..e05ac8261046 100644
--- a/tools/testing/selftests/kselftest_harness.h
+++ b/tools/testing/selftests/kselftest_harness.h
@@ -938,7 +938,11 @@ void __wait_for_test(struct __test_metadata *t)
fprintf(TH_LOG_STREAM,
"# %s: Test terminated by timeout\n", t->name);
} else if (WIFEXITED(status)) {
- if (t->termsig != -1) {
+ if (WEXITSTATUS(status) == 255) {
+ /* SKIP */
+ t->passed = 1;
+ t->skip = 1;
+ } else if (t->termsig != -1) {
t->passed = 0;
fprintf(TH_LOG_STREAM,
"# %s: Test exited normally instead of by signal (code: %d)\n",
@@ -950,11 +954,6 @@ void __wait_for_test(struct __test_metadata *t)
case 0:
t->passed = 1;
break;
- /* SKIP */
- case 255:
- t->passed = 1;
- t->skip = 1;
- break;
/* Other failure, assume step report. */
default:
t->passed = 0;
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 4761b768b773..c692cc86e7da 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -61,6 +61,7 @@ TEST_PROGS_x86_64 += x86_64/nx_huge_pages_test.sh
# Compiled test targets
TEST_GEN_PROGS_x86_64 = x86_64/cpuid_test
TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test
+TEST_GEN_PROGS_x86_64 += x86_64/dirty_log_page_splitting_test
TEST_GEN_PROGS_x86_64 += x86_64/get_msr_index_features
TEST_GEN_PROGS_x86_64 += x86_64/exit_on_emulation_failure_test
TEST_GEN_PROGS_x86_64 += x86_64/fix_hypercall_test
@@ -164,6 +165,7 @@ TEST_GEN_PROGS_s390x = s390x/memop
TEST_GEN_PROGS_s390x += s390x/resets
TEST_GEN_PROGS_s390x += s390x/sync_regs_test
TEST_GEN_PROGS_s390x += s390x/tprot
+TEST_GEN_PROGS_s390x += s390x/cmma_test
TEST_GEN_PROGS_s390x += demand_paging_test
TEST_GEN_PROGS_s390x += dirty_log_test
TEST_GEN_PROGS_s390x += kvm_create_max_vcpus
@@ -184,6 +186,8 @@ TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(ARCH_DIR))
TEST_GEN_PROGS_EXTENDED += $(TEST_GEN_PROGS_EXTENDED_$(ARCH_DIR))
LIBKVM += $(LIBKVM_$(ARCH_DIR))
+OVERRIDE_TARGETS = 1
+
# lib.mak defines $(OUTPUT), prepends $(OUTPUT)/ to $(TEST_GEN_PROGS), and most
# importantly defines, i.e. overwrites, $(CC) (unless `make -e` or `make CC=`,
# which causes the environment variable to override the makefile).
@@ -198,7 +202,7 @@ else
LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include
endif
CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \
- -Wno-gnu-variable-sized-type-not-at-end \
+ -Wno-gnu-variable-sized-type-not-at-end -MD\
-fno-builtin-memcmp -fno-builtin-memcpy -fno-builtin-memset \
-fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \
-I$(LINUX_TOOL_ARCH_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude \
@@ -225,7 +229,18 @@ LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S))
LIBKVM_STRING_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_STRING))
LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(LIBKVM_STRING_OBJ)
-EXTRA_CLEAN += $(LIBKVM_OBJS) cscope.*
+TEST_GEN_OBJ = $(patsubst %, %.o, $(TEST_GEN_PROGS))
+TEST_GEN_OBJ += $(patsubst %, %.o, $(TEST_GEN_PROGS_EXTENDED))
+TEST_DEP_FILES = $(patsubst %.o, %.d, $(TEST_GEN_OBJ))
+TEST_DEP_FILES += $(patsubst %.o, %.d, $(LIBKVM_OBJS))
+-include $(TEST_DEP_FILES)
+
+$(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): %: %.o
+ $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) $< $(LIBKVM_OBJS) $(LDLIBS) -o $@
+$(TEST_GEN_OBJ): $(OUTPUT)/%.o: %.c
+ $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
+
+EXTRA_CLEAN += $(LIBKVM_OBJS) $(TEST_DEP_FILES) $(TEST_GEN_OBJ) cscope.*
x := $(shell mkdir -p $(sort $(dir $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ))))
$(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c
diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
index d4e1f4af29d6..4f10055af2aa 100644
--- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c
+++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
@@ -48,6 +48,34 @@ struct reg_sublist {
__u64 rejects_set_n;
};
+struct feature_id_reg {
+ __u64 reg;
+ __u64 id_reg;
+ __u64 feat_shift;
+ __u64 feat_min;
+};
+
+static struct feature_id_reg feat_id_regs[] = {
+ {
+ ARM64_SYS_REG(3, 0, 2, 0, 3), /* TCR2_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */
+ 0,
+ 1
+ },
+ {
+ ARM64_SYS_REG(3, 0, 10, 2, 2), /* PIRE0_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */
+ 4,
+ 1
+ },
+ {
+ ARM64_SYS_REG(3, 0, 10, 2, 3), /* PIR_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */
+ 4,
+ 1
+ }
+};
+
struct vcpu_config {
char *name;
struct reg_sublist sublists[];
@@ -68,7 +96,8 @@ static int vcpu_configs_n;
#define for_each_missing_reg(i) \
for ((i) = 0; (i) < blessed_n; ++(i)) \
- if (!find_reg(reg_list->reg, reg_list->n, blessed_reg[i]))
+ if (!find_reg(reg_list->reg, reg_list->n, blessed_reg[i])) \
+ if (check_supported_feat_reg(vcpu, blessed_reg[i]))
#define for_each_new_reg(i) \
for_each_reg_filtered(i) \
@@ -132,6 +161,25 @@ static bool find_reg(__u64 regs[], __u64 nr_regs, __u64 reg)
return false;
}
+static bool check_supported_feat_reg(struct kvm_vcpu *vcpu, __u64 reg)
+{
+ int i, ret;
+ __u64 data, feat_val;
+
+ for (i = 0; i < ARRAY_SIZE(feat_id_regs); i++) {
+ if (feat_id_regs[i].reg == reg) {
+ ret = __vcpu_get_reg(vcpu, feat_id_regs[i].id_reg, &data);
+ if (ret < 0)
+ return false;
+
+ feat_val = ((data >> feat_id_regs[i].feat_shift) & 0xf);
+ return feat_val >= feat_id_regs[i].feat_min;
+ }
+ }
+
+ return true;
+}
+
static const char *str_with_index(const char *template, __u64 index)
{
char *str, *p;
@@ -843,12 +891,15 @@ static __u64 base_regs[] = {
ARM64_SYS_REG(3, 0, 2, 0, 0), /* TTBR0_EL1 */
ARM64_SYS_REG(3, 0, 2, 0, 1), /* TTBR1_EL1 */
ARM64_SYS_REG(3, 0, 2, 0, 2), /* TCR_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 0, 3), /* TCR2_EL1 */
ARM64_SYS_REG(3, 0, 5, 1, 0), /* AFSR0_EL1 */
ARM64_SYS_REG(3, 0, 5, 1, 1), /* AFSR1_EL1 */
ARM64_SYS_REG(3, 0, 5, 2, 0), /* ESR_EL1 */
ARM64_SYS_REG(3, 0, 6, 0, 0), /* FAR_EL1 */
ARM64_SYS_REG(3, 0, 7, 4, 0), /* PAR_EL1 */
ARM64_SYS_REG(3, 0, 10, 2, 0), /* MAIR_EL1 */
+ ARM64_SYS_REG(3, 0, 10, 2, 2), /* PIRE0_EL1 */
+ ARM64_SYS_REG(3, 0, 10, 2, 3), /* PIR_EL1 */
ARM64_SYS_REG(3, 0, 10, 3, 0), /* AMAIR_EL1 */
ARM64_SYS_REG(3, 0, 12, 0, 0), /* VBAR_EL1 */
ARM64_SYS_REG(3, 0, 12, 1, 1), /* DISR_EL1 */
diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c
index 2439c4043fed..09c116a82a84 100644
--- a/tools/testing/selftests/kvm/demand_paging_test.c
+++ b/tools/testing/selftests/kvm/demand_paging_test.c
@@ -128,6 +128,7 @@ static void prefault_mem(void *alias, uint64_t len)
static void run_test(enum vm_guest_mode mode, void *arg)
{
+ struct memstress_vcpu_args *vcpu_args;
struct test_params *p = arg;
struct uffd_desc **uffd_descs = NULL;
struct timespec start;
@@ -145,24 +146,24 @@ static void run_test(enum vm_guest_mode mode, void *arg)
"Failed to allocate buffer for guest data pattern");
memset(guest_data_prototype, 0xAB, demand_paging_size);
+ if (p->uffd_mode == UFFDIO_REGISTER_MODE_MINOR) {
+ for (i = 0; i < nr_vcpus; i++) {
+ vcpu_args = &memstress_args.vcpu_args[i];
+ prefault_mem(addr_gpa2alias(vm, vcpu_args->gpa),
+ vcpu_args->pages * memstress_args.guest_page_size);
+ }
+ }
+
if (p->uffd_mode) {
uffd_descs = malloc(nr_vcpus * sizeof(struct uffd_desc *));
TEST_ASSERT(uffd_descs, "Memory allocation failed");
-
for (i = 0; i < nr_vcpus; i++) {
- struct memstress_vcpu_args *vcpu_args;
void *vcpu_hva;
- void *vcpu_alias;
vcpu_args = &memstress_args.vcpu_args[i];
/* Cache the host addresses of the region */
vcpu_hva = addr_gpa2hva(vm, vcpu_args->gpa);
- vcpu_alias = addr_gpa2alias(vm, vcpu_args->gpa);
-
- prefault_mem(vcpu_alias,
- vcpu_args->pages * memstress_args.guest_page_size);
-
/*
* Set up user fault fd to handle demand paging
* requests.
@@ -207,10 +208,11 @@ static void help(char *name)
{
puts("");
printf("usage: %s [-h] [-m vm_mode] [-u uffd_mode] [-d uffd_delay_usec]\n"
- " [-b memory] [-s type] [-v vcpus] [-o]\n", name);
+ " [-b memory] [-s type] [-v vcpus] [-c cpu_list] [-o]\n", name);
guest_modes_help();
printf(" -u: use userfaultfd to handle vCPU page faults. Mode is a\n"
" UFFD registration mode: 'MISSING' or 'MINOR'.\n");
+ kvm_print_vcpu_pinning_help();
printf(" -d: add a delay in usec to the User Fault\n"
" FD handler to simulate demand paging\n"
" overheads. Ignored without -u.\n");
@@ -228,6 +230,7 @@ static void help(char *name)
int main(int argc, char *argv[])
{
int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
+ const char *cpulist = NULL;
struct test_params p = {
.src_type = DEFAULT_VM_MEM_SRC,
.partition_vcpu_memory_access = true,
@@ -236,7 +239,7 @@ int main(int argc, char *argv[])
guest_modes_append_default();
- while ((opt = getopt(argc, argv, "hm:u:d:b:s:v:o")) != -1) {
+ while ((opt = getopt(argc, argv, "hm:u:d:b:s:v:c:o")) != -1) {
switch (opt) {
case 'm':
guest_modes_cmdline(optarg);
@@ -263,6 +266,9 @@ int main(int argc, char *argv[])
TEST_ASSERT(nr_vcpus <= max_vcpus,
"Invalid number of vcpus, must be between 1 and %d", max_vcpus);
break;
+ case 'c':
+ cpulist = optarg;
+ break;
case 'o':
p.partition_vcpu_memory_access = false;
break;
@@ -278,6 +284,12 @@ int main(int argc, char *argv[])
TEST_FAIL("userfaultfd MINOR mode requires shared memory; pick a different -s");
}
+ if (cpulist) {
+ kvm_parse_vcpu_pinning(cpulist, memstress_args.vcpu_to_pcpu,
+ nr_vcpus);
+ memstress_args.pin_vcpus = true;
+ }
+
for_each_guest_mode(run_test, &p);
return 0;
diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
index e9d6d1aecf89..d374dbcf9a53 100644
--- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -136,77 +136,6 @@ struct test_params {
bool random_access;
};
-static void toggle_dirty_logging(struct kvm_vm *vm, int slots, bool enable)
-{
- int i;
-
- for (i = 0; i < slots; i++) {
- int slot = MEMSTRESS_MEM_SLOT_INDEX + i;
- int flags = enable ? KVM_MEM_LOG_DIRTY_PAGES : 0;
-
- vm_mem_region_set_flags(vm, slot, flags);
- }
-}
-
-static inline void enable_dirty_logging(struct kvm_vm *vm, int slots)
-{
- toggle_dirty_logging(vm, slots, true);
-}
-
-static inline void disable_dirty_logging(struct kvm_vm *vm, int slots)
-{
- toggle_dirty_logging(vm, slots, false);
-}
-
-static void get_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[], int slots)
-{
- int i;
-
- for (i = 0; i < slots; i++) {
- int slot = MEMSTRESS_MEM_SLOT_INDEX + i;
-
- kvm_vm_get_dirty_log(vm, slot, bitmaps[i]);
- }
-}
-
-static void clear_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[],
- int slots, uint64_t pages_per_slot)
-{
- int i;
-
- for (i = 0; i < slots; i++) {
- int slot = MEMSTRESS_MEM_SLOT_INDEX + i;
-
- kvm_vm_clear_dirty_log(vm, slot, bitmaps[i], 0, pages_per_slot);
- }
-}
-
-static unsigned long **alloc_bitmaps(int slots, uint64_t pages_per_slot)
-{
- unsigned long **bitmaps;
- int i;
-
- bitmaps = malloc(slots * sizeof(bitmaps[0]));
- TEST_ASSERT(bitmaps, "Failed to allocate bitmaps array.");
-
- for (i = 0; i < slots; i++) {
- bitmaps[i] = bitmap_zalloc(pages_per_slot);
- TEST_ASSERT(bitmaps[i], "Failed to allocate slot bitmap.");
- }
-
- return bitmaps;
-}
-
-static void free_bitmaps(unsigned long *bitmaps[], int slots)
-{
- int i;
-
- for (i = 0; i < slots; i++)
- free(bitmaps[i]);
-
- free(bitmaps);
-}
-
static void run_test(enum vm_guest_mode mode, void *arg)
{
struct test_params *p = arg;
@@ -236,7 +165,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
host_num_pages = vm_num_host_pages(mode, guest_num_pages);
pages_per_slot = host_num_pages / p->slots;
- bitmaps = alloc_bitmaps(p->slots, pages_per_slot);
+ bitmaps = memstress_alloc_bitmaps(p->slots, pages_per_slot);
if (dirty_log_manual_caps)
vm_enable_cap(vm, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2,
@@ -277,7 +206,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
/* Enable dirty logging */
clock_gettime(CLOCK_MONOTONIC, &start);
- enable_dirty_logging(vm, p->slots);
+ memstress_enable_dirty_logging(vm, p->slots);
ts_diff = timespec_elapsed(start);
pr_info("Enabling dirty logging time: %ld.%.9lds\n\n",
ts_diff.tv_sec, ts_diff.tv_nsec);
@@ -306,7 +235,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
iteration, ts_diff.tv_sec, ts_diff.tv_nsec);
clock_gettime(CLOCK_MONOTONIC, &start);
- get_dirty_log(vm, bitmaps, p->slots);
+ memstress_get_dirty_log(vm, bitmaps, p->slots);
ts_diff = timespec_elapsed(start);
get_dirty_log_total = timespec_add(get_dirty_log_total,
ts_diff);
@@ -315,7 +244,8 @@ static void run_test(enum vm_guest_mode mode, void *arg)
if (dirty_log_manual_caps) {
clock_gettime(CLOCK_MONOTONIC, &start);
- clear_dirty_log(vm, bitmaps, p->slots, pages_per_slot);
+ memstress_clear_dirty_log(vm, bitmaps, p->slots,
+ pages_per_slot);
ts_diff = timespec_elapsed(start);
clear_dirty_log_total = timespec_add(clear_dirty_log_total,
ts_diff);
@@ -334,7 +264,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
/* Disable dirty logging */
clock_gettime(CLOCK_MONOTONIC, &start);
- disable_dirty_logging(vm, p->slots);
+ memstress_disable_dirty_logging(vm, p->slots);
ts_diff = timespec_elapsed(start);
pr_info("Disabling dirty logging time: %ld.%.9lds\n",
ts_diff.tv_sec, ts_diff.tv_nsec);
@@ -359,7 +289,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
clear_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec);
}
- free_bitmaps(bitmaps, p->slots);
+ memstress_free_bitmaps(bitmaps, p->slots);
arch_cleanup_vm(vm);
memstress_destroy_vm(vm);
}
@@ -402,17 +332,7 @@ static void help(char *name)
" so -w X means each page has an X%% chance of writing\n"
" and a (100-X)%% chance of reading.\n"
" (default: 100 i.e. all pages are written to.)\n");
- printf(" -c: Pin tasks to physical CPUs. Takes a list of comma separated\n"
- " values (target pCPU), one for each vCPU, plus an optional\n"
- " entry for the main application task (specified via entry\n"
- " <nr_vcpus + 1>). If used, entries must be provided for all\n"
- " vCPUs, i.e. pinning vCPUs is all or nothing.\n\n"
- " E.g. to create 3 vCPUs, pin vCPU0=>pCPU22, vCPU1=>pCPU23,\n"
- " vCPU2=>pCPU24, and pin the application task to pCPU50:\n\n"
- " ./dirty_log_perf_test -v 3 -c 22,23,24,50\n\n"
- " To leave the application task unpinned, drop the final entry:\n\n"
- " ./dirty_log_perf_test -v 3 -c 22,23,24\n\n"
- " (default: no pinning)\n");
+ kvm_print_vcpu_pinning_help();
puts("");
exit(0);
}
diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h
index a089c356f354..eb1ff597bcca 100644
--- a/tools/testing/selftests/kvm/include/kvm_util_base.h
+++ b/tools/testing/selftests/kvm/include/kvm_util_base.h
@@ -362,8 +362,10 @@ static inline void read_stats_header(int stats_fd, struct kvm_stats_header *head
{
ssize_t ret;
- ret = read(stats_fd, header, sizeof(*header));
- TEST_ASSERT(ret == sizeof(*header), "Read stats header");
+ ret = pread(stats_fd, header, sizeof(*header), 0);
+ TEST_ASSERT(ret == sizeof(*header),
+ "Failed to read '%lu' header bytes, ret = '%ld'",
+ sizeof(*header), ret);
}
struct kvm_stats_desc *read_stats_descriptors(int stats_fd,
@@ -733,6 +735,7 @@ static inline struct kvm_vm *vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm);
void kvm_pin_this_task_to_pcpu(uint32_t pcpu);
+void kvm_print_vcpu_pinning_help(void);
void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[],
int nr_vcpus);
diff --git a/tools/testing/selftests/kvm/include/memstress.h b/tools/testing/selftests/kvm/include/memstress.h
index 72e3e358ef7b..ce4e603050ea 100644
--- a/tools/testing/selftests/kvm/include/memstress.h
+++ b/tools/testing/selftests/kvm/include/memstress.h
@@ -72,4 +72,12 @@ void memstress_guest_code(uint32_t vcpu_id);
uint64_t memstress_nested_pages(int nr_vcpus);
void memstress_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[]);
+void memstress_enable_dirty_logging(struct kvm_vm *vm, int slots);
+void memstress_disable_dirty_logging(struct kvm_vm *vm, int slots);
+void memstress_get_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[], int slots);
+void memstress_clear_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[],
+ int slots, uint64_t pages_per_slot);
+unsigned long **memstress_alloc_bitmaps(int slots, uint64_t pages_per_slot);
+void memstress_free_bitmaps(unsigned long *bitmaps[], int slots);
+
#endif /* SELFTEST_KVM_MEMSTRESS_H */
diff --git a/tools/testing/selftests/kvm/kvm_binary_stats_test.c b/tools/testing/selftests/kvm/kvm_binary_stats_test.c
index a7001e29dc06..698c1cfa3111 100644
--- a/tools/testing/selftests/kvm/kvm_binary_stats_test.c
+++ b/tools/testing/selftests/kvm/kvm_binary_stats_test.c
@@ -43,8 +43,10 @@ static void stats_test(int stats_fd)
id = malloc(header.name_size);
TEST_ASSERT(id, "Allocate memory for id string");
- ret = read(stats_fd, id, header.name_size);
- TEST_ASSERT(ret == header.name_size, "Read id string");
+ ret = pread(stats_fd, id, header.name_size, sizeof(header));
+ TEST_ASSERT(ret == header.name_size,
+ "Expected header size '%u', read '%lu' bytes",
+ header.name_size, ret);
/* Check id string, that should start with "kvm" */
TEST_ASSERT(!strncmp(id, "kvm", 3) && strlen(id) < header.name_size,
@@ -165,23 +167,7 @@ static void stats_test(int stats_fd)
free(stats_data);
free(stats_desc);
free(id);
-}
-
-
-static void vm_stats_test(struct kvm_vm *vm)
-{
- int stats_fd = vm_get_stats_fd(vm);
-
- stats_test(stats_fd);
- close(stats_fd);
- TEST_ASSERT(fcntl(stats_fd, F_GETFD) == -1, "Stats fd not freed");
-}
-
-static void vcpu_stats_test(struct kvm_vcpu *vcpu)
-{
- int stats_fd = vcpu_get_stats_fd(vcpu);
- stats_test(stats_fd);
close(stats_fd);
TEST_ASSERT(fcntl(stats_fd, F_GETFD) == -1, "Stats fd not freed");
}
@@ -199,6 +185,7 @@ static void vcpu_stats_test(struct kvm_vcpu *vcpu)
int main(int argc, char *argv[])
{
+ int vm_stats_fds, *vcpu_stats_fds;
int i, j;
struct kvm_vcpu **vcpus;
struct kvm_vm **vms;
@@ -231,23 +218,58 @@ int main(int argc, char *argv[])
vcpus = malloc(sizeof(struct kvm_vcpu *) * max_vm * max_vcpu);
TEST_ASSERT(vcpus, "Allocate memory for storing vCPU pointers");
+ /*
+ * Not per-VM as the array is populated, used, and invalidated within a
+ * single for-loop iteration.
+ */
+ vcpu_stats_fds = calloc(max_vm, sizeof(*vcpu_stats_fds));
+ TEST_ASSERT(vcpu_stats_fds, "Allocate memory for VM stats fds");
+
for (i = 0; i < max_vm; ++i) {
vms[i] = vm_create_barebones();
for (j = 0; j < max_vcpu; ++j)
vcpus[i * max_vcpu + j] = __vm_vcpu_add(vms[i], j);
}
- /* Check stats read for every VM and VCPU */
+ /*
+ * Check stats read for every VM and vCPU, with a variety of flavors.
+ * Note, stats_test() closes the passed in stats fd.
+ */
for (i = 0; i < max_vm; ++i) {
- vm_stats_test(vms[i]);
+ /*
+ * Verify that creating multiple userspace references to a
+ * single stats file works and doesn't cause explosions.
+ */
+ vm_stats_fds = vm_get_stats_fd(vms[i]);
+ stats_test(dup(vm_stats_fds));
+
+ /* Verify userspace can instantiate multiple stats files. */
+ stats_test(vm_get_stats_fd(vms[i]));
+
+ for (j = 0; j < max_vcpu; ++j) {
+ vcpu_stats_fds[j] = vcpu_get_stats_fd(vcpus[i * max_vcpu + j]);
+ stats_test(dup(vcpu_stats_fds[j]));
+ stats_test(vcpu_get_stats_fd(vcpus[i * max_vcpu + j]));
+ }
+
+ /*
+ * Close the VM fd and redo the stats tests. KVM should gift a
+ * reference (to the VM) to each stats fd, i.e. stats should
+ * still be accessible even after userspace has put its last
+ * _direct_ reference to the VM.
+ */
+ kvm_vm_free(vms[i]);
+
+ stats_test(vm_stats_fds);
for (j = 0; j < max_vcpu; ++j)
- vcpu_stats_test(vcpus[i * max_vcpu + j]);
+ stats_test(vcpu_stats_fds[j]);
+
ksft_test_result_pass("vm%i\n", i);
}
- for (i = 0; i < max_vm; ++i)
- kvm_vm_free(vms[i]);
free(vms);
+ free(vcpus);
+ free(vcpu_stats_fds);
ksft_finished(); /* Print results and exit() accordingly */
}
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 298c4372fb1a..9741a7ff6380 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -494,6 +494,23 @@ static uint32_t parse_pcpu(const char *cpu_str, const cpu_set_t *allowed_mask)
return pcpu;
}
+void kvm_print_vcpu_pinning_help(void)
+{
+ const char *name = program_invocation_name;
+
+ printf(" -c: Pin tasks to physical CPUs. Takes a list of comma separated\n"
+ " values (target pCPU), one for each vCPU, plus an optional\n"
+ " entry for the main application task (specified via entry\n"
+ " <nr_vcpus + 1>). If used, entries must be provided for all\n"
+ " vCPUs, i.e. pinning vCPUs is all or nothing.\n\n"
+ " E.g. to create 3 vCPUs, pin vCPU0=>pCPU22, vCPU1=>pCPU23,\n"
+ " vCPU2=>pCPU24, and pin the application task to pCPU50:\n\n"
+ " %s -v 3 -c 22,23,24,50\n\n"
+ " To leave the application task unpinned, drop the final entry:\n\n"
+ " %s -v 3 -c 22,23,24\n\n"
+ " (default: no pinning)\n", name, name);
+}
+
void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[],
int nr_vcpus)
{
diff --git a/tools/testing/selftests/kvm/lib/memstress.c b/tools/testing/selftests/kvm/lib/memstress.c
index 5f1d3173c238..df457452d146 100644
--- a/tools/testing/selftests/kvm/lib/memstress.c
+++ b/tools/testing/selftests/kvm/lib/memstress.c
@@ -5,6 +5,7 @@
#define _GNU_SOURCE
#include <inttypes.h>
+#include <linux/bitmap.h>
#include "kvm_util.h"
#include "memstress.h"
@@ -64,6 +65,9 @@ void memstress_guest_code(uint32_t vcpu_idx)
GUEST_ASSERT(vcpu_args->vcpu_idx == vcpu_idx);
while (true) {
+ for (i = 0; i < sizeof(memstress_args); i += args->guest_page_size)
+ (void) *((volatile char *)args + i);
+
for (i = 0; i < pages; i++) {
if (args->random_access)
page = guest_random_u32(&rand_state) % pages;
@@ -320,3 +324,74 @@ void memstress_join_vcpu_threads(int nr_vcpus)
for (i = 0; i < nr_vcpus; i++)
pthread_join(vcpu_threads[i].thread, NULL);
}
+
+static void toggle_dirty_logging(struct kvm_vm *vm, int slots, bool enable)
+{
+ int i;
+
+ for (i = 0; i < slots; i++) {
+ int slot = MEMSTRESS_MEM_SLOT_INDEX + i;
+ int flags = enable ? KVM_MEM_LOG_DIRTY_PAGES : 0;
+
+ vm_mem_region_set_flags(vm, slot, flags);
+ }
+}
+
+void memstress_enable_dirty_logging(struct kvm_vm *vm, int slots)
+{
+ toggle_dirty_logging(vm, slots, true);
+}
+
+void memstress_disable_dirty_logging(struct kvm_vm *vm, int slots)
+{
+ toggle_dirty_logging(vm, slots, false);
+}
+
+void memstress_get_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[], int slots)
+{
+ int i;
+
+ for (i = 0; i < slots; i++) {
+ int slot = MEMSTRESS_MEM_SLOT_INDEX + i;
+
+ kvm_vm_get_dirty_log(vm, slot, bitmaps[i]);
+ }
+}
+
+void memstress_clear_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[],
+ int slots, uint64_t pages_per_slot)
+{
+ int i;
+
+ for (i = 0; i < slots; i++) {
+ int slot = MEMSTRESS_MEM_SLOT_INDEX + i;
+
+ kvm_vm_clear_dirty_log(vm, slot, bitmaps[i], 0, pages_per_slot);
+ }
+}
+
+unsigned long **memstress_alloc_bitmaps(int slots, uint64_t pages_per_slot)
+{
+ unsigned long **bitmaps;
+ int i;
+
+ bitmaps = malloc(slots * sizeof(bitmaps[0]));
+ TEST_ASSERT(bitmaps, "Failed to allocate bitmaps array.");
+
+ for (i = 0; i < slots; i++) {
+ bitmaps[i] = bitmap_zalloc(pages_per_slot);
+ TEST_ASSERT(bitmaps[i], "Failed to allocate slot bitmap.");
+ }
+
+ return bitmaps;
+}
+
+void memstress_free_bitmaps(unsigned long *bitmaps[], int slots)
+{
+ int i;
+
+ for (i = 0; i < slots; i++)
+ free(bitmaps[i]);
+
+ free(bitmaps);
+}
diff --git a/tools/testing/selftests/kvm/lib/userfaultfd_util.c b/tools/testing/selftests/kvm/lib/userfaultfd_util.c
index 92cef20902f1..271f63891581 100644
--- a/tools/testing/selftests/kvm/lib/userfaultfd_util.c
+++ b/tools/testing/selftests/kvm/lib/userfaultfd_util.c
@@ -70,7 +70,7 @@ static void *uffd_handler_thread_fn(void *arg)
r = read(pollfd[1].fd, &tmp_chr, 1);
TEST_ASSERT(r == 1,
"Error reading pipefd in UFFD thread\n");
- return NULL;
+ break;
}
if (!(pollfd[0].revents & POLLIN))
@@ -103,7 +103,7 @@ static void *uffd_handler_thread_fn(void *arg)
ts_diff = timespec_elapsed(start);
PER_VCPU_DEBUG("userfaulted %ld pages over %ld.%.9lds. (%f/sec)\n",
pages, ts_diff.tv_sec, ts_diff.tv_nsec,
- pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0));
+ pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / NSEC_PER_SEC));
return NULL;
}
diff --git a/tools/testing/selftests/kvm/s390x/cmma_test.c b/tools/testing/selftests/kvm/s390x/cmma_test.c
new file mode 100644
index 000000000000..1d73e78e8fa7
--- /dev/null
+++ b/tools/testing/selftests/kvm/s390x/cmma_test.c
@@ -0,0 +1,700 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test for s390x CMMA migration
+ *
+ * Copyright IBM Corp. 2023
+ *
+ * Authors:
+ * Nico Boehr <nrb@linux.ibm.com>
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "kselftest.h"
+
+#define MAIN_PAGE_COUNT 512
+
+#define TEST_DATA_PAGE_COUNT 512
+#define TEST_DATA_MEMSLOT 1
+#define TEST_DATA_START_GFN 4096
+
+#define TEST_DATA_TWO_PAGE_COUNT 256
+#define TEST_DATA_TWO_MEMSLOT 2
+#define TEST_DATA_TWO_START_GFN 8192
+
+static char cmma_value_buf[MAIN_PAGE_COUNT + TEST_DATA_PAGE_COUNT];
+
+/**
+ * Dirty CMMA attributes of exactly one page in the TEST_DATA memslot,
+ * so use_cmma goes on and the CMMA related ioctls do something.
+ */
+static void guest_do_one_essa(void)
+{
+ asm volatile(
+ /* load TEST_DATA_START_GFN into r1 */
+ " llilf 1,%[start_gfn]\n"
+ /* calculate the address from the gfn */
+ " sllg 1,1,12(0)\n"
+ /* set the first page in TEST_DATA memslot to STABLE */
+ " .insn rrf,0xb9ab0000,2,1,1,0\n"
+ /* hypercall */
+ " diag 0,0,0x501\n"
+ "0: j 0b"
+ :
+ : [start_gfn] "L"(TEST_DATA_START_GFN)
+ : "r1", "r2", "memory", "cc"
+ );
+}
+
+/**
+ * Touch CMMA attributes of all pages in TEST_DATA memslot. Set them to stable
+ * state.
+ */
+static void guest_dirty_test_data(void)
+{
+ asm volatile(
+ /* r1 = TEST_DATA_START_GFN */
+ " xgr 1,1\n"
+ " llilf 1,%[start_gfn]\n"
+ /* r5 = TEST_DATA_PAGE_COUNT */
+ " lghi 5,%[page_count]\n"
+ /* r5 += r1 */
+ "2: agfr 5,1\n"
+ /* r2 = r1 << 12 */
+ "1: sllg 2,1,12(0)\n"
+ /* essa(r4, r2, SET_STABLE) */
+ " .insn rrf,0xb9ab0000,4,2,1,0\n"
+ /* i++ */
+ " agfi 1,1\n"
+ /* if r1 < r5 goto 1 */
+ " cgrjl 1,5,1b\n"
+ /* hypercall */
+ " diag 0,0,0x501\n"
+ "0: j 0b"
+ :
+ : [start_gfn] "L"(TEST_DATA_START_GFN),
+ [page_count] "L"(TEST_DATA_PAGE_COUNT)
+ :
+ /* the counter in our loop over the pages */
+ "r1",
+ /* the calculated page physical address */
+ "r2",
+ /* ESSA output register */
+ "r4",
+ /* last page */
+ "r5",
+ "cc", "memory"
+ );
+}
+
+static struct kvm_vm *create_vm(void)
+{
+ return ____vm_create(VM_MODE_DEFAULT);
+}
+
+static void create_main_memslot(struct kvm_vm *vm)
+{
+ int i;
+
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, MAIN_PAGE_COUNT, 0);
+ /* set the array of memslots to zero like __vm_create does */
+ for (i = 0; i < NR_MEM_REGIONS; i++)
+ vm->memslots[i] = 0;
+}
+
+static void create_test_memslot(struct kvm_vm *vm)
+{
+ vm_userspace_mem_region_add(vm,
+ VM_MEM_SRC_ANONYMOUS,
+ TEST_DATA_START_GFN << vm->page_shift,
+ TEST_DATA_MEMSLOT,
+ TEST_DATA_PAGE_COUNT,
+ 0
+ );
+ vm->memslots[MEM_REGION_TEST_DATA] = TEST_DATA_MEMSLOT;
+}
+
+static void create_memslots(struct kvm_vm *vm)
+{
+ /*
+ * Our VM has the following memory layout:
+ * +------+---------------------------+
+ * | GFN | Memslot |
+ * +------+---------------------------+
+ * | 0 | |
+ * | ... | MAIN (Code, Stack, ...) |
+ * | 511 | |
+ * +------+---------------------------+
+ * | 4096 | |
+ * | ... | TEST_DATA |
+ * | 4607 | |
+ * +------+---------------------------+
+ */
+ create_main_memslot(vm);
+ create_test_memslot(vm);
+}
+
+static void finish_vm_setup(struct kvm_vm *vm)
+{
+ struct userspace_mem_region *slot0;
+
+ kvm_vm_elf_load(vm, program_invocation_name);
+
+ slot0 = memslot2region(vm, 0);
+ ucall_init(vm, slot0->region.guest_phys_addr + slot0->region.memory_size);
+
+ kvm_arch_vm_post_create(vm);
+}
+
+static struct kvm_vm *create_vm_two_memslots(void)
+{
+ struct kvm_vm *vm;
+
+ vm = create_vm();
+
+ create_memslots(vm);
+
+ finish_vm_setup(vm);
+
+ return vm;
+}
+
+static void enable_cmma(struct kvm_vm *vm)
+{
+ int r;
+
+ r = __kvm_device_attr_set(vm->fd, KVM_S390_VM_MEM_CTRL, KVM_S390_VM_MEM_ENABLE_CMMA, NULL);
+ TEST_ASSERT(!r, "enabling cmma failed r=%d errno=%d", r, errno);
+}
+
+static void enable_dirty_tracking(struct kvm_vm *vm)
+{
+ vm_mem_region_set_flags(vm, 0, KVM_MEM_LOG_DIRTY_PAGES);
+ vm_mem_region_set_flags(vm, TEST_DATA_MEMSLOT, KVM_MEM_LOG_DIRTY_PAGES);
+}
+
+static int __enable_migration_mode(struct kvm_vm *vm)
+{
+ return __kvm_device_attr_set(vm->fd,
+ KVM_S390_VM_MIGRATION,
+ KVM_S390_VM_MIGRATION_START,
+ NULL
+ );
+}
+
+static void enable_migration_mode(struct kvm_vm *vm)
+{
+ int r = __enable_migration_mode(vm);
+
+ TEST_ASSERT(!r, "enabling migration mode failed r=%d errno=%d", r, errno);
+}
+
+static bool is_migration_mode_on(struct kvm_vm *vm)
+{
+ u64 out;
+ int r;
+
+ r = __kvm_device_attr_get(vm->fd,
+ KVM_S390_VM_MIGRATION,
+ KVM_S390_VM_MIGRATION_STATUS,
+ &out
+ );
+ TEST_ASSERT(!r, "getting migration mode status failed r=%d errno=%d", r, errno);
+ return out;
+}
+
+static int vm_get_cmma_bits(struct kvm_vm *vm, u64 flags, int *errno_out)
+{
+ struct kvm_s390_cmma_log args;
+ int rc;
+
+ errno = 0;
+
+ args = (struct kvm_s390_cmma_log){
+ .start_gfn = 0,
+ .count = sizeof(cmma_value_buf),
+ .flags = flags,
+ .values = (__u64)&cmma_value_buf[0]
+ };
+ rc = __vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
+
+ *errno_out = errno;
+ return rc;
+}
+
+static void test_get_cmma_basic(void)
+{
+ struct kvm_vm *vm = create_vm_two_memslots();
+ struct kvm_vcpu *vcpu;
+ int rc, errno_out;
+
+ /* GET_CMMA_BITS without CMMA enabled should fail */
+ rc = vm_get_cmma_bits(vm, 0, &errno_out);
+ ASSERT_EQ(rc, -1);
+ ASSERT_EQ(errno_out, ENXIO);
+
+ enable_cmma(vm);
+ vcpu = vm_vcpu_add(vm, 1, guest_do_one_essa);
+
+ vcpu_run(vcpu);
+
+ /* GET_CMMA_BITS without migration mode and without peeking should fail */
+ rc = vm_get_cmma_bits(vm, 0, &errno_out);
+ ASSERT_EQ(rc, -1);
+ ASSERT_EQ(errno_out, EINVAL);
+
+ /* GET_CMMA_BITS without migration mode and with peeking should work */
+ rc = vm_get_cmma_bits(vm, KVM_S390_CMMA_PEEK, &errno_out);
+ ASSERT_EQ(rc, 0);
+ ASSERT_EQ(errno_out, 0);
+
+ enable_dirty_tracking(vm);
+ enable_migration_mode(vm);
+
+ /* GET_CMMA_BITS with invalid flags */
+ rc = vm_get_cmma_bits(vm, 0xfeedc0fe, &errno_out);
+ ASSERT_EQ(rc, -1);
+ ASSERT_EQ(errno_out, EINVAL);
+
+ kvm_vm_free(vm);
+}
+
+static void assert_exit_was_hypercall(struct kvm_vcpu *vcpu)
+{
+ ASSERT_EQ(vcpu->run->exit_reason, 13);
+ ASSERT_EQ(vcpu->run->s390_sieic.icptcode, 4);
+ ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x8300);
+ ASSERT_EQ(vcpu->run->s390_sieic.ipb, 0x5010000);
+}
+
+static void test_migration_mode(void)
+{
+ struct kvm_vm *vm = create_vm();
+ struct kvm_vcpu *vcpu;
+ u64 orig_psw;
+ int rc;
+
+ /* enabling migration mode on a VM without memory should fail */
+ rc = __enable_migration_mode(vm);
+ ASSERT_EQ(rc, -1);
+ ASSERT_EQ(errno, EINVAL);
+ TEST_ASSERT(!is_migration_mode_on(vm), "migration mode should still be off");
+ errno = 0;
+
+ create_memslots(vm);
+ finish_vm_setup(vm);
+
+ enable_cmma(vm);
+ vcpu = vm_vcpu_add(vm, 1, guest_do_one_essa);
+ orig_psw = vcpu->run->psw_addr;
+
+ /*
+ * Execute one essa instruction in the guest. Otherwise the guest will
+ * not have use_cmm enabled and GET_CMMA_BITS will return no pages.
+ */
+ vcpu_run(vcpu);
+ assert_exit_was_hypercall(vcpu);
+
+ /* migration mode when memslots have dirty tracking off should fail */
+ rc = __enable_migration_mode(vm);
+ ASSERT_EQ(rc, -1);
+ ASSERT_EQ(errno, EINVAL);
+ TEST_ASSERT(!is_migration_mode_on(vm), "migration mode should still be off");
+ errno = 0;
+
+ /* enable dirty tracking */
+ enable_dirty_tracking(vm);
+
+ /* enabling migration mode should work now */
+ rc = __enable_migration_mode(vm);
+ ASSERT_EQ(rc, 0);
+ TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on");
+ errno = 0;
+
+ /* execute another ESSA instruction to see this goes fine */
+ vcpu->run->psw_addr = orig_psw;
+ vcpu_run(vcpu);
+ assert_exit_was_hypercall(vcpu);
+
+ /*
+ * With migration mode on, create a new memslot with dirty tracking off.
+ * This should turn off migration mode.
+ */
+ TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on");
+ vm_userspace_mem_region_add(vm,
+ VM_MEM_SRC_ANONYMOUS,
+ TEST_DATA_TWO_START_GFN << vm->page_shift,
+ TEST_DATA_TWO_MEMSLOT,
+ TEST_DATA_TWO_PAGE_COUNT,
+ 0
+ );
+ TEST_ASSERT(!is_migration_mode_on(vm),
+ "creating memslot without dirty tracking turns off migration mode"
+ );
+
+ /* ESSA instructions should still execute fine */
+ vcpu->run->psw_addr = orig_psw;
+ vcpu_run(vcpu);
+ assert_exit_was_hypercall(vcpu);
+
+ /*
+ * Turn on dirty tracking on the new memslot.
+ * It should be possible to turn migration mode back on again.
+ */
+ vm_mem_region_set_flags(vm, TEST_DATA_TWO_MEMSLOT, KVM_MEM_LOG_DIRTY_PAGES);
+ rc = __enable_migration_mode(vm);
+ ASSERT_EQ(rc, 0);
+ TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on");
+ errno = 0;
+
+ /*
+ * Turn off dirty tracking again, this time with just a flag change.
+ * Again, migration mode should turn off.
+ */
+ TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on");
+ vm_mem_region_set_flags(vm, TEST_DATA_TWO_MEMSLOT, 0);
+ TEST_ASSERT(!is_migration_mode_on(vm),
+ "disabling dirty tracking should turn off migration mode"
+ );
+
+ /* ESSA instructions should still execute fine */
+ vcpu->run->psw_addr = orig_psw;
+ vcpu_run(vcpu);
+ assert_exit_was_hypercall(vcpu);
+
+ kvm_vm_free(vm);
+}
+
+/**
+ * Given a VM with the MAIN and TEST_DATA memslot, assert that both slots have
+ * CMMA attributes of all pages in both memslots and nothing more dirty.
+ * This has the useful side effect of ensuring nothing is CMMA dirty after this
+ * function.
+ */
+static void assert_all_slots_cmma_dirty(struct kvm_vm *vm)
+{
+ struct kvm_s390_cmma_log args;
+
+ /*
+ * First iteration - everything should be dirty.
+ * Start at the main memslot...
+ */
+ args = (struct kvm_s390_cmma_log){
+ .start_gfn = 0,
+ .count = sizeof(cmma_value_buf),
+ .flags = 0,
+ .values = (__u64)&cmma_value_buf[0]
+ };
+ memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
+ vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
+ ASSERT_EQ(args.count, MAIN_PAGE_COUNT);
+ ASSERT_EQ(args.remaining, TEST_DATA_PAGE_COUNT);
+ ASSERT_EQ(args.start_gfn, 0);
+
+ /* ...and then - after a hole - the TEST_DATA memslot should follow */
+ args = (struct kvm_s390_cmma_log){
+ .start_gfn = MAIN_PAGE_COUNT,
+ .count = sizeof(cmma_value_buf),
+ .flags = 0,
+ .values = (__u64)&cmma_value_buf[0]
+ };
+ memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
+ vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
+ ASSERT_EQ(args.count, TEST_DATA_PAGE_COUNT);
+ ASSERT_EQ(args.start_gfn, TEST_DATA_START_GFN);
+ ASSERT_EQ(args.remaining, 0);
+
+ /* ...and nothing else should be there */
+ args = (struct kvm_s390_cmma_log){
+ .start_gfn = TEST_DATA_START_GFN + TEST_DATA_PAGE_COUNT,
+ .count = sizeof(cmma_value_buf),
+ .flags = 0,
+ .values = (__u64)&cmma_value_buf[0]
+ };
+ memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
+ vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
+ ASSERT_EQ(args.count, 0);
+ ASSERT_EQ(args.start_gfn, 0);
+ ASSERT_EQ(args.remaining, 0);
+}
+
+/**
+ * Given a VM, assert no pages are CMMA dirty.
+ */
+static void assert_no_pages_cmma_dirty(struct kvm_vm *vm)
+{
+ struct kvm_s390_cmma_log args;
+
+ /* If we start from GFN 0 again, nothing should be dirty. */
+ args = (struct kvm_s390_cmma_log){
+ .start_gfn = 0,
+ .count = sizeof(cmma_value_buf),
+ .flags = 0,
+ .values = (__u64)&cmma_value_buf[0]
+ };
+ memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
+ vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
+ if (args.count || args.remaining || args.start_gfn)
+ TEST_FAIL("pages are still dirty start_gfn=0x%llx count=%u remaining=%llu",
+ args.start_gfn,
+ args.count,
+ args.remaining
+ );
+}
+
+static void test_get_inital_dirty(void)
+{
+ struct kvm_vm *vm = create_vm_two_memslots();
+ struct kvm_vcpu *vcpu;
+
+ enable_cmma(vm);
+ vcpu = vm_vcpu_add(vm, 1, guest_do_one_essa);
+
+ /*
+ * Execute one essa instruction in the guest. Otherwise the guest will
+ * not have use_cmm enabled and GET_CMMA_BITS will return no pages.
+ */
+ vcpu_run(vcpu);
+ assert_exit_was_hypercall(vcpu);
+
+ enable_dirty_tracking(vm);
+ enable_migration_mode(vm);
+
+ assert_all_slots_cmma_dirty(vm);
+
+ /* Start from the beginning again and make sure nothing else is dirty */
+ assert_no_pages_cmma_dirty(vm);
+
+ kvm_vm_free(vm);
+}
+
+static void query_cmma_range(struct kvm_vm *vm,
+ u64 start_gfn, u64 gfn_count,
+ struct kvm_s390_cmma_log *res_out)
+{
+ *res_out = (struct kvm_s390_cmma_log){
+ .start_gfn = start_gfn,
+ .count = gfn_count,
+ .flags = 0,
+ .values = (__u64)&cmma_value_buf[0]
+ };
+ memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
+ vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, res_out);
+}
+
+/**
+ * Assert the given cmma_log struct that was executed by query_cmma_range()
+ * indicates the first dirty gfn is at first_dirty_gfn and contains exactly
+ * dirty_gfn_count CMMA values.
+ */
+static void assert_cmma_dirty(u64 first_dirty_gfn,
+ u64 dirty_gfn_count,
+ const struct kvm_s390_cmma_log *res)
+{
+ ASSERT_EQ(res->start_gfn, first_dirty_gfn);
+ ASSERT_EQ(res->count, dirty_gfn_count);
+ for (size_t i = 0; i < dirty_gfn_count; i++)
+ ASSERT_EQ(cmma_value_buf[0], 0x0); /* stable state */
+ ASSERT_EQ(cmma_value_buf[dirty_gfn_count], 0xff); /* not touched */
+}
+
+static void test_get_skip_holes(void)
+{
+ size_t gfn_offset;
+ struct kvm_vm *vm = create_vm_two_memslots();
+ struct kvm_s390_cmma_log log;
+ struct kvm_vcpu *vcpu;
+ u64 orig_psw;
+
+ enable_cmma(vm);
+ vcpu = vm_vcpu_add(vm, 1, guest_dirty_test_data);
+
+ orig_psw = vcpu->run->psw_addr;
+
+ /*
+ * Execute some essa instructions in the guest. Otherwise the guest will
+ * not have use_cmm enabled and GET_CMMA_BITS will return no pages.
+ */
+ vcpu_run(vcpu);
+ assert_exit_was_hypercall(vcpu);
+
+ enable_dirty_tracking(vm);
+ enable_migration_mode(vm);
+
+ /* un-dirty all pages */
+ assert_all_slots_cmma_dirty(vm);
+
+ /* Then, dirty just the TEST_DATA memslot */
+ vcpu->run->psw_addr = orig_psw;
+ vcpu_run(vcpu);
+
+ gfn_offset = TEST_DATA_START_GFN;
+ /**
+ * Query CMMA attributes of one page, starting at page 0. Since the
+ * main memslot was not touched by the VM, this should yield the first
+ * page of the TEST_DATA memslot.
+ * The dirty bitmap should now look like this:
+ * 0: not dirty
+ * [0x1, 0x200): dirty
+ */
+ query_cmma_range(vm, 0, 1, &log);
+ assert_cmma_dirty(gfn_offset, 1, &log);
+ gfn_offset++;
+
+ /**
+ * Query CMMA attributes of 32 (0x20) pages past the end of the TEST_DATA
+ * memslot. This should wrap back to the beginning of the TEST_DATA
+ * memslot, page 1.
+ * The dirty bitmap should now look like this:
+ * [0, 0x21): not dirty
+ * [0x21, 0x200): dirty
+ */
+ query_cmma_range(vm, TEST_DATA_START_GFN + TEST_DATA_PAGE_COUNT, 0x20, &log);
+ assert_cmma_dirty(gfn_offset, 0x20, &log);
+ gfn_offset += 0x20;
+
+ /* Skip 32 pages */
+ gfn_offset += 0x20;
+
+ /**
+ * After skipping 32 pages, query the next 32 (0x20) pages.
+ * The dirty bitmap should now look like this:
+ * [0, 0x21): not dirty
+ * [0x21, 0x41): dirty
+ * [0x41, 0x61): not dirty
+ * [0x61, 0x200): dirty
+ */
+ query_cmma_range(vm, gfn_offset, 0x20, &log);
+ assert_cmma_dirty(gfn_offset, 0x20, &log);
+ gfn_offset += 0x20;
+
+ /**
+ * Query 1 page from the beginning of the TEST_DATA memslot. This should
+ * yield page 0x21.
+ * The dirty bitmap should now look like this:
+ * [0, 0x22): not dirty
+ * [0x22, 0x41): dirty
+ * [0x41, 0x61): not dirty
+ * [0x61, 0x200): dirty
+ */
+ query_cmma_range(vm, TEST_DATA_START_GFN, 1, &log);
+ assert_cmma_dirty(TEST_DATA_START_GFN + 0x21, 1, &log);
+ gfn_offset++;
+
+ /**
+ * Query 15 (0xF) pages from page 0x23 in TEST_DATA memslot.
+ * This should yield pages [0x23, 0x33).
+ * The dirty bitmap should now look like this:
+ * [0, 0x22): not dirty
+ * 0x22: dirty
+ * [0x23, 0x33): not dirty
+ * [0x33, 0x41): dirty
+ * [0x41, 0x61): not dirty
+ * [0x61, 0x200): dirty
+ */
+ gfn_offset = TEST_DATA_START_GFN + 0x23;
+ query_cmma_range(vm, gfn_offset, 15, &log);
+ assert_cmma_dirty(gfn_offset, 15, &log);
+
+ /**
+ * Query 17 (0x11) pages from page 0x22 in TEST_DATA memslot.
+ * This should yield page [0x22, 0x33)
+ * The dirty bitmap should now look like this:
+ * [0, 0x33): not dirty
+ * [0x33, 0x41): dirty
+ * [0x41, 0x61): not dirty
+ * [0x61, 0x200): dirty
+ */
+ gfn_offset = TEST_DATA_START_GFN + 0x22;
+ query_cmma_range(vm, gfn_offset, 17, &log);
+ assert_cmma_dirty(gfn_offset, 17, &log);
+
+ /**
+ * Query 25 (0x19) pages from page 0x40 in TEST_DATA memslot.
+ * This should yield page 0x40 and nothing more, since there are more
+ * than 16 non-dirty pages after page 0x40.
+ * The dirty bitmap should now look like this:
+ * [0, 0x33): not dirty
+ * [0x33, 0x40): dirty
+ * [0x40, 0x61): not dirty
+ * [0x61, 0x200): dirty
+ */
+ gfn_offset = TEST_DATA_START_GFN + 0x40;
+ query_cmma_range(vm, gfn_offset, 25, &log);
+ assert_cmma_dirty(gfn_offset, 1, &log);
+
+ /**
+ * Query pages [0x33, 0x40).
+ * The dirty bitmap should now look like this:
+ * [0, 0x61): not dirty
+ * [0x61, 0x200): dirty
+ */
+ gfn_offset = TEST_DATA_START_GFN + 0x33;
+ query_cmma_range(vm, gfn_offset, 0x40 - 0x33, &log);
+ assert_cmma_dirty(gfn_offset, 0x40 - 0x33, &log);
+
+ /**
+ * Query the remaining pages [0x61, 0x200).
+ */
+ gfn_offset = TEST_DATA_START_GFN;
+ query_cmma_range(vm, gfn_offset, TEST_DATA_PAGE_COUNT - 0x61, &log);
+ assert_cmma_dirty(TEST_DATA_START_GFN + 0x61, TEST_DATA_PAGE_COUNT - 0x61, &log);
+
+ assert_no_pages_cmma_dirty(vm);
+}
+
+struct testdef {
+ const char *name;
+ void (*test)(void);
+} testlist[] = {
+ { "migration mode and dirty tracking", test_migration_mode },
+ { "GET_CMMA_BITS: basic calls", test_get_cmma_basic },
+ { "GET_CMMA_BITS: all pages are dirty initally", test_get_inital_dirty },
+ { "GET_CMMA_BITS: holes are skipped", test_get_skip_holes },
+};
+
+/**
+ * The kernel may support CMMA, but the machine may not (i.e. if running as
+ * guest-3).
+ *
+ * In this case, the CMMA capabilities are all there, but the CMMA-related
+ * ioctls fail. To find out whether the machine supports CMMA, create a
+ * temporary VM and then query the CMMA feature of the VM.
+ */
+static int machine_has_cmma(void)
+{
+ struct kvm_vm *vm = create_vm();
+ int r;
+
+ r = !__kvm_has_device_attr(vm->fd, KVM_S390_VM_MEM_CTRL, KVM_S390_VM_MEM_ENABLE_CMMA);
+ kvm_vm_free(vm);
+
+ return r;
+}
+
+int main(int argc, char *argv[])
+{
+ int idx;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_SYNC_REGS));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_CMMA_MIGRATION));
+ TEST_REQUIRE(machine_has_cmma());
+
+ ksft_print_header();
+
+ ksft_set_plan(ARRAY_SIZE(testlist));
+
+ for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
+ testlist[idx].test();
+ ksft_test_result_pass("%s\n", testlist[idx].name);
+ }
+
+ ksft_finished(); /* Print results and exit() accordingly */
+}
diff --git a/tools/testing/selftests/kvm/x86_64/cpuid_test.c b/tools/testing/selftests/kvm/x86_64/cpuid_test.c
index 2fc3ad9c887e..d3c3aa93f090 100644
--- a/tools/testing/selftests/kvm/x86_64/cpuid_test.c
+++ b/tools/testing/selftests/kvm/x86_64/cpuid_test.c
@@ -163,6 +163,25 @@ static void set_cpuid_after_run(struct kvm_vcpu *vcpu)
ent->eax = eax;
}
+static void test_get_cpuid2(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid2 *cpuid = allocate_kvm_cpuid2(vcpu->cpuid->nent + 1);
+ int i, r;
+
+ vcpu_ioctl(vcpu, KVM_GET_CPUID2, cpuid);
+ TEST_ASSERT(cpuid->nent == vcpu->cpuid->nent,
+ "KVM didn't update nent on success, wanted %u, got %u\n",
+ vcpu->cpuid->nent, cpuid->nent);
+
+ for (i = 0; i < vcpu->cpuid->nent; i++) {
+ cpuid->nent = i;
+ r = __vcpu_ioctl(vcpu, KVM_GET_CPUID2, cpuid);
+ TEST_ASSERT(r && errno == E2BIG, KVM_IOCTL_ERROR(KVM_GET_CPUID2, r));
+ TEST_ASSERT(cpuid->nent == i, "KVM modified nent on failure");
+ }
+ free(cpuid);
+}
+
int main(void)
{
struct kvm_vcpu *vcpu;
@@ -183,5 +202,7 @@ int main(void)
set_cpuid_after_run(vcpu);
+ test_get_cpuid2(vcpu);
+
kvm_vm_free(vm);
}
diff --git a/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c b/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c
new file mode 100644
index 000000000000..beb7e2c10211
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c
@@ -0,0 +1,259 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KVM dirty logging page splitting test
+ *
+ * Based on dirty_log_perf.c
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ * Copyright (C) 2023, Google, Inc.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <linux/bitmap.h>
+
+#include "kvm_util.h"
+#include "test_util.h"
+#include "memstress.h"
+#include "guest_modes.h"
+
+#define VCPUS 2
+#define SLOTS 2
+#define ITERATIONS 2
+
+static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
+
+static enum vm_mem_backing_src_type backing_src = VM_MEM_SRC_ANONYMOUS_HUGETLB;
+
+static u64 dirty_log_manual_caps;
+static bool host_quit;
+static int iteration;
+static int vcpu_last_completed_iteration[KVM_MAX_VCPUS];
+
+struct kvm_page_stats {
+ uint64_t pages_4k;
+ uint64_t pages_2m;
+ uint64_t pages_1g;
+ uint64_t hugepages;
+};
+
+static void get_page_stats(struct kvm_vm *vm, struct kvm_page_stats *stats, const char *stage)
+{
+ stats->pages_4k = vm_get_stat(vm, "pages_4k");
+ stats->pages_2m = vm_get_stat(vm, "pages_2m");
+ stats->pages_1g = vm_get_stat(vm, "pages_1g");
+ stats->hugepages = stats->pages_2m + stats->pages_1g;
+
+ pr_debug("\nPage stats after %s: 4K: %ld 2M: %ld 1G: %ld huge: %ld\n",
+ stage, stats->pages_4k, stats->pages_2m, stats->pages_1g,
+ stats->hugepages);
+}
+
+static void run_vcpu_iteration(struct kvm_vm *vm)
+{
+ int i;
+
+ iteration++;
+ for (i = 0; i < VCPUS; i++) {
+ while (READ_ONCE(vcpu_last_completed_iteration[i]) !=
+ iteration)
+ ;
+ }
+}
+
+static void vcpu_worker(struct memstress_vcpu_args *vcpu_args)
+{
+ struct kvm_vcpu *vcpu = vcpu_args->vcpu;
+ int vcpu_idx = vcpu_args->vcpu_idx;
+
+ while (!READ_ONCE(host_quit)) {
+ int current_iteration = READ_ONCE(iteration);
+
+ vcpu_run(vcpu);
+
+ ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_SYNC);
+
+ vcpu_last_completed_iteration[vcpu_idx] = current_iteration;
+
+ /* Wait for the start of the next iteration to be signaled. */
+ while (current_iteration == READ_ONCE(iteration) &&
+ READ_ONCE(iteration) >= 0 &&
+ !READ_ONCE(host_quit))
+ ;
+ }
+}
+
+static void run_test(enum vm_guest_mode mode, void *unused)
+{
+ struct kvm_vm *vm;
+ unsigned long **bitmaps;
+ uint64_t guest_num_pages;
+ uint64_t host_num_pages;
+ uint64_t pages_per_slot;
+ int i;
+ uint64_t total_4k_pages;
+ struct kvm_page_stats stats_populated;
+ struct kvm_page_stats stats_dirty_logging_enabled;
+ struct kvm_page_stats stats_dirty_pass[ITERATIONS];
+ struct kvm_page_stats stats_clear_pass[ITERATIONS];
+ struct kvm_page_stats stats_dirty_logging_disabled;
+ struct kvm_page_stats stats_repopulated;
+
+ vm = memstress_create_vm(mode, VCPUS, guest_percpu_mem_size,
+ SLOTS, backing_src, false);
+
+ guest_num_pages = (VCPUS * guest_percpu_mem_size) >> vm->page_shift;
+ guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
+ host_num_pages = vm_num_host_pages(mode, guest_num_pages);
+ pages_per_slot = host_num_pages / SLOTS;
+
+ bitmaps = memstress_alloc_bitmaps(SLOTS, pages_per_slot);
+
+ if (dirty_log_manual_caps)
+ vm_enable_cap(vm, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2,
+ dirty_log_manual_caps);
+
+ /* Start the iterations */
+ iteration = -1;
+ host_quit = false;
+
+ for (i = 0; i < VCPUS; i++)
+ vcpu_last_completed_iteration[i] = -1;
+
+ memstress_start_vcpu_threads(VCPUS, vcpu_worker);
+
+ run_vcpu_iteration(vm);
+ get_page_stats(vm, &stats_populated, "populating memory");
+
+ /* Enable dirty logging */
+ memstress_enable_dirty_logging(vm, SLOTS);
+
+ get_page_stats(vm, &stats_dirty_logging_enabled, "enabling dirty logging");
+
+ while (iteration < ITERATIONS) {
+ run_vcpu_iteration(vm);
+ get_page_stats(vm, &stats_dirty_pass[iteration - 1],
+ "dirtying memory");
+
+ memstress_get_dirty_log(vm, bitmaps, SLOTS);
+
+ if (dirty_log_manual_caps) {
+ memstress_clear_dirty_log(vm, bitmaps, SLOTS, pages_per_slot);
+
+ get_page_stats(vm, &stats_clear_pass[iteration - 1], "clearing dirty log");
+ }
+ }
+
+ /* Disable dirty logging */
+ memstress_disable_dirty_logging(vm, SLOTS);
+
+ get_page_stats(vm, &stats_dirty_logging_disabled, "disabling dirty logging");
+
+ /* Run vCPUs again to fault pages back in. */
+ run_vcpu_iteration(vm);
+ get_page_stats(vm, &stats_repopulated, "repopulating memory");
+
+ /*
+ * Tell the vCPU threads to quit. No need to manually check that vCPUs
+ * have stopped running after disabling dirty logging, the join will
+ * wait for them to exit.
+ */
+ host_quit = true;
+ memstress_join_vcpu_threads(VCPUS);
+
+ memstress_free_bitmaps(bitmaps, SLOTS);
+ memstress_destroy_vm(vm);
+
+ /* Make assertions about the page counts. */
+ total_4k_pages = stats_populated.pages_4k;
+ total_4k_pages += stats_populated.pages_2m * 512;
+ total_4k_pages += stats_populated.pages_1g * 512 * 512;
+
+ /*
+ * Check that all huge pages were split. Since large pages can only
+ * exist in the data slot, and the vCPUs should have dirtied all pages
+ * in the data slot, there should be no huge pages left after splitting.
+ * Splitting happens at dirty log enable time without
+ * KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 and after the first clear pass
+ * with that capability.
+ */
+ if (dirty_log_manual_caps) {
+ ASSERT_EQ(stats_clear_pass[0].hugepages, 0);
+ ASSERT_EQ(stats_clear_pass[0].pages_4k, total_4k_pages);
+ ASSERT_EQ(stats_dirty_logging_enabled.hugepages, stats_populated.hugepages);
+ } else {
+ ASSERT_EQ(stats_dirty_logging_enabled.hugepages, 0);
+ ASSERT_EQ(stats_dirty_logging_enabled.pages_4k, total_4k_pages);
+ }
+
+ /*
+ * Once dirty logging is disabled and the vCPUs have touched all their
+ * memory again, the page counts should be the same as they were
+ * right after initial population of memory.
+ */
+ ASSERT_EQ(stats_populated.pages_4k, stats_repopulated.pages_4k);
+ ASSERT_EQ(stats_populated.pages_2m, stats_repopulated.pages_2m);
+ ASSERT_EQ(stats_populated.pages_1g, stats_repopulated.pages_1g);
+}
+
+static void help(char *name)
+{
+ puts("");
+ printf("usage: %s [-h] [-b vcpu bytes] [-s mem type]\n",
+ name);
+ puts("");
+ printf(" -b: specify the size of the memory region which should be\n"
+ " dirtied by each vCPU. e.g. 10M or 3G.\n"
+ " (default: 1G)\n");
+ backing_src_help("-s");
+ puts("");
+}
+
+int main(int argc, char *argv[])
+{
+ int opt;
+
+ TEST_REQUIRE(get_kvm_param_bool("eager_page_split"));
+ TEST_REQUIRE(get_kvm_param_bool("tdp_mmu"));
+
+ while ((opt = getopt(argc, argv, "b:hs:")) != -1) {
+ switch (opt) {
+ case 'b':
+ guest_percpu_mem_size = parse_size(optarg);
+ break;
+ case 'h':
+ help(argv[0]);
+ exit(0);
+ case 's':
+ backing_src = parse_backing_src_type(optarg);
+ break;
+ default:
+ help(argv[0]);
+ exit(1);
+ }
+ }
+
+ if (!is_backing_src_hugetlb(backing_src)) {
+ pr_info("This test will only work reliably with HugeTLB memory. "
+ "It can work with THP, but that is best effort.\n");
+ }
+
+ guest_modes_append_default();
+
+ dirty_log_manual_caps = 0;
+ for_each_guest_mode(run_test, NULL);
+
+ dirty_log_manual_caps =
+ kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
+
+ if (dirty_log_manual_caps) {
+ dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE |
+ KVM_DIRTY_LOG_INITIALLY_SET);
+ for_each_guest_mode(run_test, NULL);
+ } else {
+ pr_info("Skipping testing with MANUAL_PROTECT as it is not supported");
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
index 251794f83719..7f36c32fa760 100644
--- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
+++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
@@ -226,7 +226,7 @@ static void help(char *name)
puts("");
printf("usage: %s [-h] [-p period_ms] [-t token]\n", name);
puts("");
- printf(" -p: The NX reclaim period in miliseconds.\n");
+ printf(" -p: The NX reclaim period in milliseconds.\n");
printf(" -t: The magic token to indicate environment setup is done.\n");
printf(" -r: The test has reboot permissions and can disable NX huge pages.\n");
puts("");
diff --git a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c b/tools/testing/selftests/kvm/x86_64/set_sregs_test.c
index a284fcef6ed7..3610981d9162 100644
--- a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/set_sregs_test.c
@@ -22,26 +22,25 @@
#include "kvm_util.h"
#include "processor.h"
-static void test_cr4_feature_bit(struct kvm_vcpu *vcpu, struct kvm_sregs *orig,
- uint64_t feature_bit)
-{
- struct kvm_sregs sregs;
- int rc;
-
- /* Skip the sub-test, the feature is supported. */
- if (orig->cr4 & feature_bit)
- return;
-
- memcpy(&sregs, orig, sizeof(sregs));
- sregs.cr4 |= feature_bit;
-
- rc = _vcpu_sregs_set(vcpu, &sregs);
- TEST_ASSERT(rc, "KVM allowed unsupported CR4 bit (0x%lx)", feature_bit);
-
- /* Sanity check that KVM didn't change anything. */
- vcpu_sregs_get(vcpu, &sregs);
- TEST_ASSERT(!memcmp(&sregs, orig, sizeof(sregs)), "KVM modified sregs");
-}
+#define TEST_INVALID_CR_BIT(vcpu, cr, orig, bit) \
+do { \
+ struct kvm_sregs new; \
+ int rc; \
+ \
+ /* Skip the sub-test, the feature/bit is supported. */ \
+ if (orig.cr & bit) \
+ break; \
+ \
+ memcpy(&new, &orig, sizeof(sregs)); \
+ new.cr |= bit; \
+ \
+ rc = _vcpu_sregs_set(vcpu, &new); \
+ TEST_ASSERT(rc, "KVM allowed invalid " #cr " bit (0x%lx)", bit); \
+ \
+ /* Sanity check that KVM didn't change anything. */ \
+ vcpu_sregs_get(vcpu, &new); \
+ TEST_ASSERT(!memcmp(&new, &orig, sizeof(new)), "KVM modified sregs"); \
+} while (0)
static uint64_t calc_supported_cr4_feature_bits(void)
{
@@ -80,7 +79,7 @@ int main(int argc, char *argv[])
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
uint64_t cr4;
- int rc;
+ int rc, i;
/*
* Create a dummy VM, specifically to avoid doing KVM_SET_CPUID2, and
@@ -92,6 +91,7 @@ int main(int argc, char *argv[])
vcpu_sregs_get(vcpu, &sregs);
+ sregs.cr0 = 0;
sregs.cr4 |= calc_supported_cr4_feature_bits();
cr4 = sregs.cr4;
@@ -103,16 +103,24 @@ int main(int argc, char *argv[])
sregs.cr4, cr4);
/* Verify all unsupported features are rejected by KVM. */
- test_cr4_feature_bit(vcpu, &sregs, X86_CR4_UMIP);
- test_cr4_feature_bit(vcpu, &sregs, X86_CR4_LA57);
- test_cr4_feature_bit(vcpu, &sregs, X86_CR4_VMXE);
- test_cr4_feature_bit(vcpu, &sregs, X86_CR4_SMXE);
- test_cr4_feature_bit(vcpu, &sregs, X86_CR4_FSGSBASE);
- test_cr4_feature_bit(vcpu, &sregs, X86_CR4_PCIDE);
- test_cr4_feature_bit(vcpu, &sregs, X86_CR4_OSXSAVE);
- test_cr4_feature_bit(vcpu, &sregs, X86_CR4_SMEP);
- test_cr4_feature_bit(vcpu, &sregs, X86_CR4_SMAP);
- test_cr4_feature_bit(vcpu, &sregs, X86_CR4_PKE);
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_UMIP);
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_LA57);
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_VMXE);
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_SMXE);
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_FSGSBASE);
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_PCIDE);
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_OSXSAVE);
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_SMEP);
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_SMAP);
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_PKE);
+
+ for (i = 32; i < 64; i++)
+ TEST_INVALID_CR_BIT(vcpu, cr0, sregs, BIT(i));
+
+ /* NW without CD is illegal, as is PG without PE. */
+ TEST_INVALID_CR_BIT(vcpu, cr0, sregs, X86_CR0_NW);
+ TEST_INVALID_CR_BIT(vcpu, cr0, sregs, X86_CR0_PG);
+
kvm_vm_free(vm);
/* Create a "real" VM and verify APIC_BASE can be set. */
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c b/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c
index fa03c8d1ce4e..e710b6e7fb38 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c
@@ -116,29 +116,21 @@ static void l1_guest_code(struct vmx_pages *vmx_pages)
GUEST_DONE();
}
-static void stable_tsc_check_supported(void)
+static bool system_has_stable_tsc(void)
{
+ bool tsc_is_stable;
FILE *fp;
char buf[4];
fp = fopen("/sys/devices/system/clocksource/clocksource0/current_clocksource", "r");
if (fp == NULL)
- goto skip_test;
+ return false;
- if (fgets(buf, sizeof(buf), fp) == NULL)
- goto close_fp;
+ tsc_is_stable = fgets(buf, sizeof(buf), fp) &&
+ !strncmp(buf, "tsc", sizeof(buf));
- if (strncmp(buf, "tsc", sizeof(buf)))
- goto close_fp;
-
- fclose(fp);
- return;
-
-close_fp:
fclose(fp);
-skip_test:
- print_skip("Kernel does not use TSC clocksource - assuming that host TSC is not stable");
- exit(KSFT_SKIP);
+ return tsc_is_stable;
}
int main(int argc, char *argv[])
@@ -156,7 +148,7 @@ int main(int argc, char *argv[])
TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
TEST_REQUIRE(kvm_has_cap(KVM_CAP_TSC_CONTROL));
- stable_tsc_check_supported();
+ TEST_REQUIRE(system_has_stable_tsc());
/*
* We set L1's scale factor to be a random number from 2 to 10.
diff --git a/tools/testing/selftests/landlock/config b/tools/testing/selftests/landlock/config
index 0f0a65287bac..3dc9e438eab1 100644
--- a/tools/testing/selftests/landlock/config
+++ b/tools/testing/selftests/landlock/config
@@ -1,7 +1,10 @@
+CONFIG_CGROUPS=y
+CONFIG_CGROUP_SCHED=y
CONFIG_OVERLAY_FS=y
-CONFIG_SECURITY_LANDLOCK=y
-CONFIG_SECURITY_PATH=y
+CONFIG_PROC_FS=y
CONFIG_SECURITY=y
+CONFIG_SECURITY_LANDLOCK=y
CONFIG_SHMEM=y
-CONFIG_TMPFS_XATTR=y
+CONFIG_SYSFS=y
CONFIG_TMPFS=y
+CONFIG_TMPFS_XATTR=y
diff --git a/tools/testing/selftests/landlock/config.um b/tools/testing/selftests/landlock/config.um
new file mode 100644
index 000000000000..40937c0395d6
--- /dev/null
+++ b/tools/testing/selftests/landlock/config.um
@@ -0,0 +1 @@
+CONFIG_HOSTFS=y
diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c
index b6c4be3faf7a..251594306d40 100644
--- a/tools/testing/selftests/landlock/fs_test.c
+++ b/tools/testing/selftests/landlock/fs_test.c
@@ -10,6 +10,7 @@
#define _GNU_SOURCE
#include <fcntl.h>
#include <linux/landlock.h>
+#include <linux/magic.h>
#include <sched.h>
#include <stdio.h>
#include <string.h>
@@ -19,6 +20,7 @@
#include <sys/sendfile.h>
#include <sys/stat.h>
#include <sys/sysmacros.h>
+#include <sys/vfs.h>
#include <unistd.h>
#include "common.h"
@@ -107,9 +109,11 @@ static bool fgrep(FILE *const inf, const char *const str)
return false;
}
-static bool supports_overlayfs(void)
+static bool supports_filesystem(const char *const filesystem)
{
- bool res;
+ char str[32];
+ int len;
+ bool res = true;
FILE *const inf = fopen("/proc/filesystems", "r");
/*
@@ -119,11 +123,35 @@ static bool supports_overlayfs(void)
if (!inf)
return true;
- res = fgrep(inf, "nodev\toverlay\n");
+ /* filesystem can be null for bind mounts. */
+ if (!filesystem)
+ goto out;
+
+ len = snprintf(str, sizeof(str), "nodev\t%s\n", filesystem);
+ if (len >= sizeof(str))
+ /* Ignores too-long filesystem names. */
+ goto out;
+
+ res = fgrep(inf, str);
+
+out:
fclose(inf);
return res;
}
+static bool cwd_matches_fs(unsigned int fs_magic)
+{
+ struct statfs statfs_buf;
+
+ if (!fs_magic)
+ return true;
+
+ if (statfs(".", &statfs_buf))
+ return true;
+
+ return statfs_buf.f_type == fs_magic;
+}
+
static void mkdir_parents(struct __test_metadata *const _metadata,
const char *const path)
{
@@ -206,7 +234,26 @@ out:
return err;
}
-static void prepare_layout(struct __test_metadata *const _metadata)
+struct mnt_opt {
+ const char *const source;
+ const char *const type;
+ const unsigned long flags;
+ const char *const data;
+};
+
+const struct mnt_opt mnt_tmp = {
+ .type = "tmpfs",
+ .data = "size=4m,mode=700",
+};
+
+static int mount_opt(const struct mnt_opt *const mnt, const char *const target)
+{
+ return mount(mnt->source ?: mnt->type, target, mnt->type, mnt->flags,
+ mnt->data);
+}
+
+static void prepare_layout_opt(struct __test_metadata *const _metadata,
+ const struct mnt_opt *const mnt)
{
disable_caps(_metadata);
umask(0077);
@@ -217,12 +264,28 @@ static void prepare_layout(struct __test_metadata *const _metadata)
* for tests relying on pivot_root(2) and move_mount(2).
*/
set_cap(_metadata, CAP_SYS_ADMIN);
- ASSERT_EQ(0, unshare(CLONE_NEWNS));
- ASSERT_EQ(0, mount("tmp", TMP_DIR, "tmpfs", 0, "size=4m,mode=700"));
+ ASSERT_EQ(0, unshare(CLONE_NEWNS | CLONE_NEWCGROUP));
+ ASSERT_EQ(0, mount_opt(mnt, TMP_DIR))
+ {
+ TH_LOG("Failed to mount the %s filesystem: %s", mnt->type,
+ strerror(errno));
+ /*
+ * FIXTURE_TEARDOWN() is not called when FIXTURE_SETUP()
+ * failed, so we need to explicitly do a minimal cleanup to
+ * avoid cascading errors with other tests that don't depend on
+ * the same filesystem.
+ */
+ remove_path(TMP_DIR);
+ }
ASSERT_EQ(0, mount(NULL, TMP_DIR, NULL, MS_PRIVATE | MS_REC, NULL));
clear_cap(_metadata, CAP_SYS_ADMIN);
}
+static void prepare_layout(struct __test_metadata *const _metadata)
+{
+ prepare_layout_opt(_metadata, &mnt_tmp);
+}
+
static void cleanup_layout(struct __test_metadata *const _metadata)
{
set_cap(_metadata, CAP_SYS_ADMIN);
@@ -231,6 +294,20 @@ static void cleanup_layout(struct __test_metadata *const _metadata)
EXPECT_EQ(0, remove_path(TMP_DIR));
}
+/* clang-format off */
+FIXTURE(layout0) {};
+/* clang-format on */
+
+FIXTURE_SETUP(layout0)
+{
+ prepare_layout(_metadata);
+}
+
+FIXTURE_TEARDOWN(layout0)
+{
+ cleanup_layout(_metadata);
+}
+
static void create_layout1(struct __test_metadata *const _metadata)
{
create_file(_metadata, file1_s1d1);
@@ -248,7 +325,7 @@ static void create_layout1(struct __test_metadata *const _metadata)
create_file(_metadata, file1_s3d1);
create_directory(_metadata, dir_s3d2);
set_cap(_metadata, CAP_SYS_ADMIN);
- ASSERT_EQ(0, mount("tmp", dir_s3d2, "tmpfs", 0, "size=4m,mode=700"));
+ ASSERT_EQ(0, mount_opt(&mnt_tmp, dir_s3d2));
clear_cap(_metadata, CAP_SYS_ADMIN);
ASSERT_EQ(0, mkdir(dir_s3d3, 0700));
@@ -262,11 +339,13 @@ static void remove_layout1(struct __test_metadata *const _metadata)
EXPECT_EQ(0, remove_path(file1_s1d3));
EXPECT_EQ(0, remove_path(file1_s1d2));
EXPECT_EQ(0, remove_path(file1_s1d1));
+ EXPECT_EQ(0, remove_path(dir_s1d3));
EXPECT_EQ(0, remove_path(file2_s2d3));
EXPECT_EQ(0, remove_path(file1_s2d3));
EXPECT_EQ(0, remove_path(file1_s2d2));
EXPECT_EQ(0, remove_path(file1_s2d1));
+ EXPECT_EQ(0, remove_path(dir_s2d2));
EXPECT_EQ(0, remove_path(file1_s3d1));
EXPECT_EQ(0, remove_path(dir_s3d3));
@@ -510,7 +589,7 @@ TEST_F_FORK(layout1, file_and_dir_access_rights)
ASSERT_EQ(0, close(ruleset_fd));
}
-TEST_F_FORK(layout1, unknown_access_rights)
+TEST_F_FORK(layout0, unknown_access_rights)
{
__u64 access_mask;
@@ -608,7 +687,7 @@ static void enforce_ruleset(struct __test_metadata *const _metadata,
}
}
-TEST_F_FORK(layout1, proc_nsfs)
+TEST_F_FORK(layout0, proc_nsfs)
{
const struct rule rules[] = {
{
@@ -657,11 +736,11 @@ TEST_F_FORK(layout1, proc_nsfs)
ASSERT_EQ(0, close(path_beneath.parent_fd));
}
-TEST_F_FORK(layout1, unpriv)
+TEST_F_FORK(layout0, unpriv)
{
const struct rule rules[] = {
{
- .path = dir_s1d2,
+ .path = TMP_DIR,
.access = ACCESS_RO,
},
{},
@@ -1301,12 +1380,12 @@ TEST_F_FORK(layout1, inherit_superset)
ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
}
-TEST_F_FORK(layout1, max_layers)
+TEST_F_FORK(layout0, max_layers)
{
int i, err;
const struct rule rules[] = {
{
- .path = dir_s1d2,
+ .path = TMP_DIR,
.access = ACCESS_RO,
},
{},
@@ -4030,21 +4109,24 @@ static const char (*merge_sub_files[])[] = {
* └── work
*/
-/* clang-format off */
-FIXTURE(layout2_overlay) {};
-/* clang-format on */
+FIXTURE(layout2_overlay)
+{
+ bool skip_test;
+};
FIXTURE_SETUP(layout2_overlay)
{
- if (!supports_overlayfs())
- SKIP(return, "overlayfs is not supported");
+ if (!supports_filesystem("overlay")) {
+ self->skip_test = true;
+ SKIP(return, "overlayfs is not supported (setup)");
+ }
prepare_layout(_metadata);
create_directory(_metadata, LOWER_BASE);
set_cap(_metadata, CAP_SYS_ADMIN);
/* Creates tmpfs mount points to get deterministic overlayfs. */
- ASSERT_EQ(0, mount("tmp", LOWER_BASE, "tmpfs", 0, "size=4m,mode=700"));
+ ASSERT_EQ(0, mount_opt(&mnt_tmp, LOWER_BASE));
clear_cap(_metadata, CAP_SYS_ADMIN);
create_file(_metadata, lower_fl1);
create_file(_metadata, lower_dl1_fl2);
@@ -4054,7 +4136,7 @@ FIXTURE_SETUP(layout2_overlay)
create_directory(_metadata, UPPER_BASE);
set_cap(_metadata, CAP_SYS_ADMIN);
- ASSERT_EQ(0, mount("tmp", UPPER_BASE, "tmpfs", 0, "size=4m,mode=700"));
+ ASSERT_EQ(0, mount_opt(&mnt_tmp, UPPER_BASE));
clear_cap(_metadata, CAP_SYS_ADMIN);
create_file(_metadata, upper_fu1);
create_file(_metadata, upper_du1_fu2);
@@ -4075,8 +4157,8 @@ FIXTURE_SETUP(layout2_overlay)
FIXTURE_TEARDOWN(layout2_overlay)
{
- if (!supports_overlayfs())
- SKIP(return, "overlayfs is not supported");
+ if (self->skip_test)
+ SKIP(return, "overlayfs is not supported (teardown)");
EXPECT_EQ(0, remove_path(lower_do1_fl3));
EXPECT_EQ(0, remove_path(lower_dl1_fl2));
@@ -4109,8 +4191,8 @@ FIXTURE_TEARDOWN(layout2_overlay)
TEST_F_FORK(layout2_overlay, no_restriction)
{
- if (!supports_overlayfs())
- SKIP(return, "overlayfs is not supported");
+ if (self->skip_test)
+ SKIP(return, "overlayfs is not supported (test)");
ASSERT_EQ(0, test_open(lower_fl1, O_RDONLY));
ASSERT_EQ(0, test_open(lower_dl1, O_RDONLY));
@@ -4275,8 +4357,8 @@ TEST_F_FORK(layout2_overlay, same_content_different_file)
size_t i;
const char *path_entry;
- if (!supports_overlayfs())
- SKIP(return, "overlayfs is not supported");
+ if (self->skip_test)
+ SKIP(return, "overlayfs is not supported (test)");
/* Sets rules on base directories (i.e. outside overlay scope). */
ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer1_base);
@@ -4423,4 +4505,261 @@ TEST_F_FORK(layout2_overlay, same_content_different_file)
}
}
+FIXTURE(layout3_fs)
+{
+ bool has_created_dir;
+ bool has_created_file;
+ char *dir_path;
+ bool skip_test;
+};
+
+FIXTURE_VARIANT(layout3_fs)
+{
+ const struct mnt_opt mnt;
+ const char *const file_path;
+ unsigned int cwd_fs_magic;
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout3_fs, tmpfs) {
+ /* clang-format on */
+ .mnt = mnt_tmp,
+ .file_path = file1_s1d1,
+};
+
+FIXTURE_VARIANT_ADD(layout3_fs, ramfs) {
+ .mnt = {
+ .type = "ramfs",
+ .data = "mode=700",
+ },
+ .file_path = TMP_DIR "/dir/file",
+};
+
+FIXTURE_VARIANT_ADD(layout3_fs, cgroup2) {
+ .mnt = {
+ .type = "cgroup2",
+ },
+ .file_path = TMP_DIR "/test/cgroup.procs",
+};
+
+FIXTURE_VARIANT_ADD(layout3_fs, proc) {
+ .mnt = {
+ .type = "proc",
+ },
+ .file_path = TMP_DIR "/self/status",
+};
+
+FIXTURE_VARIANT_ADD(layout3_fs, sysfs) {
+ .mnt = {
+ .type = "sysfs",
+ },
+ .file_path = TMP_DIR "/kernel/notes",
+};
+
+FIXTURE_VARIANT_ADD(layout3_fs, hostfs) {
+ .mnt = {
+ .source = TMP_DIR,
+ .flags = MS_BIND,
+ },
+ .file_path = TMP_DIR "/dir/file",
+ .cwd_fs_magic = HOSTFS_SUPER_MAGIC,
+};
+
+FIXTURE_SETUP(layout3_fs)
+{
+ struct stat statbuf;
+ const char *slash;
+ size_t dir_len;
+
+ if (!supports_filesystem(variant->mnt.type) ||
+ !cwd_matches_fs(variant->cwd_fs_magic)) {
+ self->skip_test = true;
+ SKIP(return, "this filesystem is not supported (setup)");
+ }
+
+ slash = strrchr(variant->file_path, '/');
+ ASSERT_NE(slash, NULL);
+ dir_len = (size_t)slash - (size_t)variant->file_path;
+ ASSERT_LT(0, dir_len);
+ self->dir_path = malloc(dir_len + 1);
+ self->dir_path[dir_len] = '\0';
+ strncpy(self->dir_path, variant->file_path, dir_len);
+
+ prepare_layout_opt(_metadata, &variant->mnt);
+
+ /* Creates directory when required. */
+ if (stat(self->dir_path, &statbuf)) {
+ set_cap(_metadata, CAP_DAC_OVERRIDE);
+ EXPECT_EQ(0, mkdir(self->dir_path, 0700))
+ {
+ TH_LOG("Failed to create directory \"%s\": %s",
+ self->dir_path, strerror(errno));
+ free(self->dir_path);
+ self->dir_path = NULL;
+ }
+ self->has_created_dir = true;
+ clear_cap(_metadata, CAP_DAC_OVERRIDE);
+ }
+
+ /* Creates file when required. */
+ if (stat(variant->file_path, &statbuf)) {
+ int fd;
+
+ set_cap(_metadata, CAP_DAC_OVERRIDE);
+ fd = creat(variant->file_path, 0600);
+ EXPECT_LE(0, fd)
+ {
+ TH_LOG("Failed to create file \"%s\": %s",
+ variant->file_path, strerror(errno));
+ }
+ EXPECT_EQ(0, close(fd));
+ self->has_created_file = true;
+ clear_cap(_metadata, CAP_DAC_OVERRIDE);
+ }
+}
+
+FIXTURE_TEARDOWN(layout3_fs)
+{
+ if (self->skip_test)
+ SKIP(return, "this filesystem is not supported (teardown)");
+
+ if (self->has_created_file) {
+ set_cap(_metadata, CAP_DAC_OVERRIDE);
+ /*
+ * Don't check for error because the file might already
+ * have been removed (cf. release_inode test).
+ */
+ unlink(variant->file_path);
+ clear_cap(_metadata, CAP_DAC_OVERRIDE);
+ }
+
+ if (self->has_created_dir) {
+ set_cap(_metadata, CAP_DAC_OVERRIDE);
+ /*
+ * Don't check for error because the directory might already
+ * have been removed (cf. release_inode test).
+ */
+ rmdir(self->dir_path);
+ clear_cap(_metadata, CAP_DAC_OVERRIDE);
+ }
+ free(self->dir_path);
+ self->dir_path = NULL;
+
+ cleanup_layout(_metadata);
+}
+
+static void layer3_fs_tag_inode(struct __test_metadata *const _metadata,
+ FIXTURE_DATA(layout3_fs) * self,
+ const FIXTURE_VARIANT(layout3_fs) * variant,
+ const char *const rule_path)
+{
+ const struct rule layer1_allow_read_file[] = {
+ {
+ .path = rule_path,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {},
+ };
+ const struct landlock_ruleset_attr layer2_deny_everything_attr = {
+ .handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE,
+ };
+ const char *const dev_null_path = "/dev/null";
+ int ruleset_fd;
+
+ if (self->skip_test)
+ SKIP(return, "this filesystem is not supported (test)");
+
+ /* Checks without Landlock. */
+ EXPECT_EQ(0, test_open(dev_null_path, O_RDONLY | O_CLOEXEC));
+ EXPECT_EQ(0, test_open(variant->file_path, O_RDONLY | O_CLOEXEC));
+
+ ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE,
+ layer1_allow_read_file);
+ EXPECT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+
+ EXPECT_EQ(EACCES, test_open(dev_null_path, O_RDONLY | O_CLOEXEC));
+ EXPECT_EQ(0, test_open(variant->file_path, O_RDONLY | O_CLOEXEC));
+
+ /* Forbids directory reading. */
+ ruleset_fd =
+ landlock_create_ruleset(&layer2_deny_everything_attr,
+ sizeof(layer2_deny_everything_attr), 0);
+ EXPECT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+
+ /* Checks with Landlock and forbidden access. */
+ EXPECT_EQ(EACCES, test_open(dev_null_path, O_RDONLY | O_CLOEXEC));
+ EXPECT_EQ(EACCES, test_open(variant->file_path, O_RDONLY | O_CLOEXEC));
+}
+
+/* Matrix of tests to check file hierarchy evaluation. */
+
+TEST_F_FORK(layout3_fs, tag_inode_dir_parent)
+{
+ /* The current directory must not be the root for this test. */
+ layer3_fs_tag_inode(_metadata, self, variant, ".");
+}
+
+TEST_F_FORK(layout3_fs, tag_inode_dir_mnt)
+{
+ layer3_fs_tag_inode(_metadata, self, variant, TMP_DIR);
+}
+
+TEST_F_FORK(layout3_fs, tag_inode_dir_child)
+{
+ layer3_fs_tag_inode(_metadata, self, variant, self->dir_path);
+}
+
+TEST_F_FORK(layout3_fs, tag_inode_file)
+{
+ layer3_fs_tag_inode(_metadata, self, variant, variant->file_path);
+}
+
+/* Light version of layout1.release_inodes */
+TEST_F_FORK(layout3_fs, release_inodes)
+{
+ const struct rule layer1[] = {
+ {
+ .path = TMP_DIR,
+ .access = LANDLOCK_ACCESS_FS_READ_DIR,
+ },
+ {},
+ };
+ int ruleset_fd;
+
+ if (self->skip_test)
+ SKIP(return, "this filesystem is not supported (test)");
+
+ /* Clean up for the teardown to not fail. */
+ if (self->has_created_file)
+ EXPECT_EQ(0, remove_path(variant->file_path));
+
+ if (self->has_created_dir)
+ /* Don't check for error because of cgroup specificities. */
+ remove_path(self->dir_path);
+
+ ruleset_fd =
+ create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_DIR, layer1);
+ ASSERT_LE(0, ruleset_fd);
+
+ /* Unmount the filesystem while it is being used by a ruleset. */
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ ASSERT_EQ(0, umount(TMP_DIR));
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+
+ /* Replaces with a new mount point to simplify FIXTURE_TEARDOWN. */
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ ASSERT_EQ(0, mount_opt(&mnt_tmp, TMP_DIR));
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks that access to the new mount point is denied. */
+ ASSERT_EQ(EACCES, test_open(TMP_DIR, O_RDONLY));
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index 05400462c779..118e0964bda9 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -44,10 +44,26 @@ endif
selfdir = $(realpath $(dir $(filter %/lib.mk,$(MAKEFILE_LIST))))
top_srcdir = $(selfdir)/../../..
-ifeq ($(KHDR_INCLUDES),)
-KHDR_INCLUDES := -isystem $(top_srcdir)/usr/include
+ifeq ("$(origin O)", "command line")
+ KBUILD_OUTPUT := $(O)
endif
+ifneq ($(KBUILD_OUTPUT),)
+ # Make's built-in functions such as $(abspath ...), $(realpath ...) cannot
+ # expand a shell special character '~'. We use a somewhat tedious way here.
+ abs_objtree := $(shell cd $(top_srcdir) && mkdir -p $(KBUILD_OUTPUT) && cd $(KBUILD_OUTPUT) && pwd)
+ $(if $(abs_objtree),, \
+ $(error failed to create output directory "$(KBUILD_OUTPUT)"))
+ # $(realpath ...) resolves symlinks
+ abs_objtree := $(realpath $(abs_objtree))
+ KHDR_DIR := ${abs_objtree}/usr/include
+else
+ abs_srctree := $(shell cd $(top_srcdir) && pwd)
+ KHDR_DIR := ${abs_srctree}/usr/include
+endif
+
+KHDR_INCLUDES := -isystem $(KHDR_DIR)
+
# The following are built by lib.mk common compile rules.
# TEST_CUSTOM_PROGS should be used by tests that require
# custom build rule and prevent common build rule use.
@@ -58,7 +74,25 @@ TEST_GEN_PROGS := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS))
TEST_GEN_PROGS_EXTENDED := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS_EXTENDED))
TEST_GEN_FILES := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_FILES))
-all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES)
+all: kernel_header_files $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) \
+ $(TEST_GEN_FILES)
+
+kernel_header_files:
+ @ls $(KHDR_DIR)/linux/*.h >/dev/null 2>/dev/null; \
+ if [ $$? -ne 0 ]; then \
+ RED='\033[1;31m'; \
+ NOCOLOR='\033[0m'; \
+ echo; \
+ echo -e "$${RED}error$${NOCOLOR}: missing kernel header files."; \
+ echo "Please run this and try again:"; \
+ echo; \
+ echo " cd $(top_srcdir)"; \
+ echo " make headers"; \
+ echo; \
+ exit 1; \
+ fi
+
+.PHONY: kernel_header_files
define RUN_TESTS
BASE_DIR="$(selfdir)"; \
@@ -72,7 +106,7 @@ endef
run_tests: all
ifdef building_out_of_srctree
@if [ "X$(TEST_PROGS)$(TEST_PROGS_EXTENDED)$(TEST_FILES)" != "X" ]; then \
- rsync -aLq $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(OUTPUT); \
+ rsync -aq --copy-unsafe-links $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(OUTPUT); \
fi
@if [ "X$(TEST_PROGS)" != "X" ]; then \
$(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) \
@@ -86,7 +120,7 @@ endif
define INSTALL_SINGLE_RULE
$(if $(INSTALL_LIST),@mkdir -p $(INSTALL_PATH))
- $(if $(INSTALL_LIST),rsync -aL $(INSTALL_LIST) $(INSTALL_PATH)/)
+ $(if $(INSTALL_LIST),rsync -a --copy-unsafe-links $(INSTALL_LIST) $(INSTALL_PATH)/)
endef
define INSTALL_RULE
diff --git a/tools/testing/selftests/media_tests/video_device_test.c b/tools/testing/selftests/media_tests/video_device_test.c
index 0f6aef2e2593..2c44e115f2f0 100644
--- a/tools/testing/selftests/media_tests/video_device_test.c
+++ b/tools/testing/selftests/media_tests/video_device_test.c
@@ -37,45 +37,58 @@
#include <time.h>
#include <linux/videodev2.h>
-int main(int argc, char **argv)
+#define PRIORITY_MAX 4
+
+int priority_test(int fd)
{
- int opt;
- char video_dev[256];
- int count;
- struct v4l2_tuner vtuner;
- struct v4l2_capability vcap;
+ /* This test will try to update the priority associated with a file descriptor */
+
+ enum v4l2_priority old_priority, new_priority, priority_to_compare;
int ret;
- int fd;
+ int result = 0;
- if (argc < 2) {
- printf("Usage: %s [-d </dev/videoX>]\n", argv[0]);
- exit(-1);
+ ret = ioctl(fd, VIDIOC_G_PRIORITY, &old_priority);
+ if (ret < 0) {
+ printf("Failed to get priority: %s\n", strerror(errno));
+ return -1;
+ }
+ new_priority = (old_priority + 1) % PRIORITY_MAX;
+ ret = ioctl(fd, VIDIOC_S_PRIORITY, &new_priority);
+ if (ret < 0) {
+ printf("Failed to set priority: %s\n", strerror(errno));
+ return -1;
+ }
+ ret = ioctl(fd, VIDIOC_G_PRIORITY, &priority_to_compare);
+ if (ret < 0) {
+ printf("Failed to get new priority: %s\n", strerror(errno));
+ result = -1;
+ goto cleanup;
+ }
+ if (priority_to_compare != new_priority) {
+ printf("Priority wasn't set - test failed\n");
+ result = -1;
}
- /* Process arguments */
- while ((opt = getopt(argc, argv, "d:")) != -1) {
- switch (opt) {
- case 'd':
- strncpy(video_dev, optarg, sizeof(video_dev) - 1);
- video_dev[sizeof(video_dev)-1] = '\0';
- break;
- default:
- printf("Usage: %s [-d </dev/videoX>]\n", argv[0]);
- exit(-1);
- }
+cleanup:
+ ret = ioctl(fd, VIDIOC_S_PRIORITY, &old_priority);
+ if (ret < 0) {
+ printf("Failed to restore priority: %s\n", strerror(errno));
+ return -1;
}
+ return result;
+}
+
+int loop_test(int fd)
+{
+ int count;
+ struct v4l2_tuner vtuner;
+ struct v4l2_capability vcap;
+ int ret;
/* Generate random number of interations */
srand((unsigned int) time(NULL));
count = rand();
- /* Open Video device and keep it open */
- fd = open(video_dev, O_RDWR);
- if (fd == -1) {
- printf("Video Device open errno %s\n", strerror(errno));
- exit(-1);
- }
-
printf("\nNote:\n"
"While test is running, remove the device or unbind\n"
"driver and ensure there are no use after free errors\n"
@@ -98,4 +111,46 @@ int main(int argc, char **argv)
sleep(10);
count--;
}
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ int opt;
+ char video_dev[256];
+ int fd;
+ int test_result;
+
+ if (argc < 2) {
+ printf("Usage: %s [-d </dev/videoX>]\n", argv[0]);
+ exit(-1);
+ }
+
+ /* Process arguments */
+ while ((opt = getopt(argc, argv, "d:")) != -1) {
+ switch (opt) {
+ case 'd':
+ strncpy(video_dev, optarg, sizeof(video_dev) - 1);
+ video_dev[sizeof(video_dev)-1] = '\0';
+ break;
+ default:
+ printf("Usage: %s [-d </dev/videoX>]\n", argv[0]);
+ exit(-1);
+ }
+ }
+
+ /* Open Video device and keep it open */
+ fd = open(video_dev, O_RDWR);
+ if (fd == -1) {
+ printf("Video Device open errno %s\n", strerror(errno));
+ exit(-1);
+ }
+
+ test_result = priority_test(fd);
+ if (!test_result)
+ printf("Priority test - PASSED\n");
+ else
+ printf("Priority test - FAILED\n");
+
+ loop_test(fd);
}
diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c
index dba0e8ba002f..8b7390ad81d1 100644
--- a/tools/testing/selftests/memfd/memfd_test.c
+++ b/tools/testing/selftests/memfd/memfd_test.c
@@ -1145,8 +1145,25 @@ static void test_sysctl_child(void)
printf("%s sysctl 2\n", memfd_str);
sysctl_assert_write("2");
- mfd_fail_new("kern_memfd_sysctl_2",
- MFD_CLOEXEC | MFD_ALLOW_SEALING);
+ mfd_fail_new("kern_memfd_sysctl_2_exec",
+ MFD_EXEC | MFD_CLOEXEC | MFD_ALLOW_SEALING);
+
+ fd = mfd_assert_new("kern_memfd_sysctl_2_dfl",
+ mfd_def_size,
+ MFD_CLOEXEC | MFD_ALLOW_SEALING);
+ mfd_assert_mode(fd, 0666);
+ mfd_assert_has_seals(fd, F_SEAL_EXEC);
+ mfd_fail_chmod(fd, 0777);
+ close(fd);
+
+ fd = mfd_assert_new("kern_memfd_sysctl_2_noexec_seal",
+ mfd_def_size,
+ MFD_NOEXEC_SEAL | MFD_CLOEXEC | MFD_ALLOW_SEALING);
+ mfd_assert_mode(fd, 0666);
+ mfd_assert_has_seals(fd, F_SEAL_EXEC);
+ mfd_fail_chmod(fd, 0777);
+ close(fd);
+
sysctl_fail_write("0");
sysctl_fail_write("1");
}
@@ -1202,7 +1219,24 @@ static pid_t spawn_newpid_thread(unsigned int flags, int (*fn)(void *))
static void join_newpid_thread(pid_t pid)
{
- waitpid(pid, NULL, 0);
+ int wstatus;
+
+ if (waitpid(pid, &wstatus, 0) < 0) {
+ printf("newpid thread: waitpid() failed: %m\n");
+ abort();
+ }
+
+ if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != 0) {
+ printf("newpid thread: exited with non-zero error code %d\n",
+ WEXITSTATUS(wstatus));
+ abort();
+ }
+
+ if (WIFSIGNALED(wstatus)) {
+ printf("newpid thread: killed by signal %d\n",
+ WTERMSIG(wstatus));
+ abort();
+ }
}
/*
diff --git a/tools/testing/selftests/mincore/mincore_selftest.c b/tools/testing/selftests/mincore/mincore_selftest.c
index 4c88238fc8f0..e949a43a6145 100644
--- a/tools/testing/selftests/mincore/mincore_selftest.c
+++ b/tools/testing/selftests/mincore/mincore_selftest.c
@@ -150,8 +150,8 @@ TEST(check_huge_pages)
MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
-1, 0);
if (addr == MAP_FAILED) {
- if (errno == ENOMEM)
- SKIP(return, "No huge pages available.");
+ if (errno == ENOMEM || errno == EINVAL)
+ SKIP(return, "No huge pages available or CONFIG_HUGETLB_PAGE disabled.");
else
TH_LOG("mmap error: %s", strerror(errno));
}
diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore
index 8917455f4f51..7e2a982383c0 100644
--- a/tools/testing/selftests/mm/.gitignore
+++ b/tools/testing/selftests/mm/.gitignore
@@ -39,3 +39,6 @@ local_config.h
local_config.mk
ksm_functional_tests
mdwe_test
+gup_longterm
+mkdirty
+va_high_addr_switch
diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile
index 4f0c50c33ba7..66d7c07dc177 100644
--- a/tools/testing/selftests/mm/Makefile
+++ b/tools/testing/selftests/mm/Makefile
@@ -32,11 +32,12 @@ endif
# LDLIBS.
MAKEFLAGS += --no-builtin-rules
-CFLAGS = -Wall -I $(top_srcdir) -I $(top_srcdir)/tools/include/uapi $(EXTRA_CFLAGS) $(KHDR_INCLUDES)
+CFLAGS = -Wall -I $(top_srcdir) $(EXTRA_CFLAGS) $(KHDR_INCLUDES)
LDLIBS = -lrt -lpthread
TEST_GEN_PROGS = cow
TEST_GEN_PROGS += compaction_test
+TEST_GEN_PROGS += gup_longterm
TEST_GEN_PROGS += gup_test
TEST_GEN_PROGS += hmm-tests
TEST_GEN_PROGS += hugetlb-madvise
@@ -167,6 +168,8 @@ endif
# IOURING_EXTRA_LIBS may get set in local_config.mk, or it may be left empty.
$(OUTPUT)/cow: LDLIBS += $(IOURING_EXTRA_LIBS)
+$(OUTPUT)/gup_longterm: LDLIBS += $(IOURING_EXTRA_LIBS)
+
$(OUTPUT)/mlock-random-test $(OUTPUT)/memfd_secret: LDLIBS += -lcap
$(OUTPUT)/ksm_tests: LDLIBS += -lnuma
diff --git a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh
index a5cb4b09a46c..a5cb4b09a46c 100644..100755
--- a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh
+++ b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh
diff --git a/tools/testing/selftests/mm/check_config.sh b/tools/testing/selftests/mm/check_config.sh
index 3954f4746161..3954f4746161 100644..100755
--- a/tools/testing/selftests/mm/check_config.sh
+++ b/tools/testing/selftests/mm/check_config.sh
diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c
index dc9d6fe86028..7324ce5363c0 100644
--- a/tools/testing/selftests/mm/cow.c
+++ b/tools/testing/selftests/mm/cow.c
@@ -14,8 +14,8 @@
#include <unistd.h>
#include <errno.h>
#include <fcntl.h>
-#include <dirent.h>
#include <assert.h>
+#include <linux/mman.h>
#include <sys/mman.h>
#include <sys/ioctl.h>
#include <sys/wait.h>
@@ -30,13 +30,6 @@
#include "../kselftest.h"
#include "vm_util.h"
-#ifndef MADV_PAGEOUT
-#define MADV_PAGEOUT 21
-#endif
-#ifndef MADV_COLLAPSE
-#define MADV_COLLAPSE 25
-#endif
-
static size_t pagesize;
static int pagemap_fd;
static size_t thpsize;
@@ -70,31 +63,6 @@ static void detect_huge_zeropage(void)
close(fd);
}
-static void detect_hugetlbsizes(void)
-{
- DIR *dir = opendir("/sys/kernel/mm/hugepages/");
-
- if (!dir)
- return;
-
- while (nr_hugetlbsizes < ARRAY_SIZE(hugetlbsizes)) {
- struct dirent *entry = readdir(dir);
- size_t kb;
-
- if (!entry)
- break;
- if (entry->d_type != DT_DIR)
- continue;
- if (sscanf(entry->d_name, "hugepages-%zukB", &kb) != 1)
- continue;
- hugetlbsizes[nr_hugetlbsizes] = kb * 1024;
- nr_hugetlbsizes++;
- ksft_print_msg("[INFO] detected hugetlb size: %zu KiB\n",
- kb);
- }
- closedir(dir);
-}
-
static bool range_is_swapped(void *addr, size_t size)
{
for (; size; addr += pagesize, size -= pagesize)
@@ -1717,7 +1685,8 @@ int main(int argc, char **argv)
if (thpsize)
ksft_print_msg("[INFO] detected THP size: %zu KiB\n",
thpsize / 1024);
- detect_hugetlbsizes();
+ nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes,
+ ARRAY_SIZE(hugetlbsizes));
detect_huge_zeropage();
ksft_print_header();
diff --git a/tools/testing/selftests/mm/gup_longterm.c b/tools/testing/selftests/mm/gup_longterm.c
new file mode 100644
index 000000000000..d33d3e68ffab
--- /dev/null
+++ b/tools/testing/selftests/mm/gup_longterm.c
@@ -0,0 +1,459 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * GUP long-term page pinning tests.
+ *
+ * Copyright 2023, Red Hat, Inc.
+ *
+ * Author(s): David Hildenbrand <david@redhat.com>
+ */
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <string.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <assert.h>
+#include <sys/mman.h>
+#include <sys/ioctl.h>
+#include <sys/vfs.h>
+#include <linux/magic.h>
+#include <linux/memfd.h>
+
+#include "local_config.h"
+#ifdef LOCAL_CONFIG_HAVE_LIBURING
+#include <liburing.h>
+#endif /* LOCAL_CONFIG_HAVE_LIBURING */
+
+#include "../../../../mm/gup_test.h"
+#include "../kselftest.h"
+#include "vm_util.h"
+
+static size_t pagesize;
+static int nr_hugetlbsizes;
+static size_t hugetlbsizes[10];
+static int gup_fd;
+
+static __fsword_t get_fs_type(int fd)
+{
+ struct statfs fs;
+ int ret;
+
+ do {
+ ret = fstatfs(fd, &fs);
+ } while (ret && errno == EINTR);
+
+ return ret ? 0 : fs.f_type;
+}
+
+static bool fs_is_unknown(__fsword_t fs_type)
+{
+ /*
+ * We only support some filesystems in our tests when dealing with
+ * R/W long-term pinning. For these filesystems, we can be fairly sure
+ * whether they support it or not.
+ */
+ switch (fs_type) {
+ case TMPFS_MAGIC:
+ case HUGETLBFS_MAGIC:
+ case BTRFS_SUPER_MAGIC:
+ case EXT4_SUPER_MAGIC:
+ case XFS_SUPER_MAGIC:
+ return false;
+ default:
+ return true;
+ }
+}
+
+static bool fs_supports_writable_longterm_pinning(__fsword_t fs_type)
+{
+ assert(!fs_is_unknown(fs_type));
+ switch (fs_type) {
+ case TMPFS_MAGIC:
+ case HUGETLBFS_MAGIC:
+ return true;
+ default:
+ return false;
+ }
+}
+
+enum test_type {
+ TEST_TYPE_RO,
+ TEST_TYPE_RO_FAST,
+ TEST_TYPE_RW,
+ TEST_TYPE_RW_FAST,
+#ifdef LOCAL_CONFIG_HAVE_LIBURING
+ TEST_TYPE_IOURING,
+#endif /* LOCAL_CONFIG_HAVE_LIBURING */
+};
+
+static void do_test(int fd, size_t size, enum test_type type, bool shared)
+{
+ __fsword_t fs_type = get_fs_type(fd);
+ bool should_work;
+ char *mem;
+ int ret;
+
+ if (ftruncate(fd, size)) {
+ ksft_test_result_fail("ftruncate() failed\n");
+ return;
+ }
+
+ if (fallocate(fd, 0, 0, size)) {
+ if (size == pagesize)
+ ksft_test_result_fail("fallocate() failed\n");
+ else
+ ksft_test_result_skip("need more free huge pages\n");
+ return;
+ }
+
+ mem = mmap(NULL, size, PROT_READ | PROT_WRITE,
+ shared ? MAP_SHARED : MAP_PRIVATE, fd, 0);
+ if (mem == MAP_FAILED) {
+ if (size == pagesize || shared)
+ ksft_test_result_fail("mmap() failed\n");
+ else
+ ksft_test_result_skip("need more free huge pages\n");
+ return;
+ }
+
+ /*
+ * Fault in the page writable such that GUP-fast can eventually pin
+ * it immediately.
+ */
+ memset(mem, 0, size);
+
+ switch (type) {
+ case TEST_TYPE_RO:
+ case TEST_TYPE_RO_FAST:
+ case TEST_TYPE_RW:
+ case TEST_TYPE_RW_FAST: {
+ struct pin_longterm_test args;
+ const bool fast = type == TEST_TYPE_RO_FAST ||
+ type == TEST_TYPE_RW_FAST;
+ const bool rw = type == TEST_TYPE_RW ||
+ type == TEST_TYPE_RW_FAST;
+
+ if (gup_fd < 0) {
+ ksft_test_result_skip("gup_test not available\n");
+ break;
+ }
+
+ if (rw && shared && fs_is_unknown(fs_type)) {
+ ksft_test_result_skip("Unknown filesystem\n");
+ return;
+ }
+ /*
+ * R/O pinning or pinning in a private mapping is always
+ * expected to work. Otherwise, we expect long-term R/W pinning
+ * to only succeed for special fielesystems.
+ */
+ should_work = !shared || !rw ||
+ fs_supports_writable_longterm_pinning(fs_type);
+
+ args.addr = (__u64)(uintptr_t)mem;
+ args.size = size;
+ args.flags = fast ? PIN_LONGTERM_TEST_FLAG_USE_FAST : 0;
+ args.flags |= rw ? PIN_LONGTERM_TEST_FLAG_USE_WRITE : 0;
+ ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args);
+ if (ret && errno == EINVAL) {
+ ksft_test_result_skip("PIN_LONGTERM_TEST_START failed\n");
+ break;
+ } else if (ret && errno == EFAULT) {
+ ksft_test_result(!should_work, "Should have failed\n");
+ break;
+ } else if (ret) {
+ ksft_test_result_fail("PIN_LONGTERM_TEST_START failed\n");
+ break;
+ }
+
+ if (ioctl(gup_fd, PIN_LONGTERM_TEST_STOP))
+ ksft_print_msg("[INFO] PIN_LONGTERM_TEST_STOP failed\n");
+
+ /*
+ * TODO: if the kernel ever supports long-term R/W pinning on
+ * some previously unsupported filesystems, we might want to
+ * perform some additional tests for possible data corruptions.
+ */
+ ksft_test_result(should_work, "Should have worked\n");
+ break;
+ }
+#ifdef LOCAL_CONFIG_HAVE_LIBURING
+ case TEST_TYPE_IOURING: {
+ struct io_uring ring;
+ struct iovec iov;
+
+ /* io_uring always pins pages writable. */
+ if (shared && fs_is_unknown(fs_type)) {
+ ksft_test_result_skip("Unknown filesystem\n");
+ return;
+ }
+ should_work = !shared ||
+ fs_supports_writable_longterm_pinning(fs_type);
+
+ /* Skip on errors, as we might just lack kernel support. */
+ ret = io_uring_queue_init(1, &ring, 0);
+ if (ret < 0) {
+ ksft_test_result_skip("io_uring_queue_init() failed\n");
+ break;
+ }
+ /*
+ * Register the range as a fixed buffer. This will FOLL_WRITE |
+ * FOLL_PIN | FOLL_LONGTERM the range.
+ */
+ iov.iov_base = mem;
+ iov.iov_len = size;
+ ret = io_uring_register_buffers(&ring, &iov, 1);
+ /* Only new kernels return EFAULT. */
+ if (ret && (errno == ENOSPC || errno == EOPNOTSUPP ||
+ errno == EFAULT)) {
+ ksft_test_result(!should_work, "Should have failed\n");
+ } else if (ret) {
+ /*
+ * We might just lack support or have insufficient
+ * MEMLOCK limits.
+ */
+ ksft_test_result_skip("io_uring_register_buffers() failed\n");
+ } else {
+ ksft_test_result(should_work, "Should have worked\n");
+ io_uring_unregister_buffers(&ring);
+ }
+
+ io_uring_queue_exit(&ring);
+ break;
+ }
+#endif /* LOCAL_CONFIG_HAVE_LIBURING */
+ default:
+ assert(false);
+ }
+
+ munmap(mem, size);
+}
+
+typedef void (*test_fn)(int fd, size_t size);
+
+static void run_with_memfd(test_fn fn, const char *desc)
+{
+ int fd;
+
+ ksft_print_msg("[RUN] %s ... with memfd\n", desc);
+
+ fd = memfd_create("test", 0);
+ if (fd < 0) {
+ ksft_test_result_fail("memfd_create() failed\n");
+ return;
+ }
+
+ fn(fd, pagesize);
+ close(fd);
+}
+
+static void run_with_tmpfile(test_fn fn, const char *desc)
+{
+ FILE *file;
+ int fd;
+
+ ksft_print_msg("[RUN] %s ... with tmpfile\n", desc);
+
+ file = tmpfile();
+ if (!file) {
+ ksft_test_result_fail("tmpfile() failed\n");
+ return;
+ }
+
+ fd = fileno(file);
+ if (fd < 0) {
+ ksft_test_result_fail("fileno() failed\n");
+ return;
+ }
+
+ fn(fd, pagesize);
+ fclose(file);
+}
+
+static void run_with_local_tmpfile(test_fn fn, const char *desc)
+{
+ char filename[] = __FILE__"_tmpfile_XXXXXX";
+ int fd;
+
+ ksft_print_msg("[RUN] %s ... with local tmpfile\n", desc);
+
+ fd = mkstemp(filename);
+ if (fd < 0) {
+ ksft_test_result_fail("mkstemp() failed\n");
+ return;
+ }
+
+ if (unlink(filename)) {
+ ksft_test_result_fail("unlink() failed\n");
+ goto close;
+ }
+
+ fn(fd, pagesize);
+close:
+ close(fd);
+}
+
+static void run_with_memfd_hugetlb(test_fn fn, const char *desc,
+ size_t hugetlbsize)
+{
+ int flags = MFD_HUGETLB;
+ int fd;
+
+ ksft_print_msg("[RUN] %s ... with memfd hugetlb (%zu kB)\n", desc,
+ hugetlbsize / 1024);
+
+ flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT;
+
+ fd = memfd_create("test", flags);
+ if (fd < 0) {
+ ksft_test_result_skip("memfd_create() failed\n");
+ return;
+ }
+
+ fn(fd, hugetlbsize);
+ close(fd);
+}
+
+struct test_case {
+ const char *desc;
+ test_fn fn;
+};
+
+static void test_shared_rw_pin(int fd, size_t size)
+{
+ do_test(fd, size, TEST_TYPE_RW, true);
+}
+
+static void test_shared_rw_fast_pin(int fd, size_t size)
+{
+ do_test(fd, size, TEST_TYPE_RW_FAST, true);
+}
+
+static void test_shared_ro_pin(int fd, size_t size)
+{
+ do_test(fd, size, TEST_TYPE_RO, true);
+}
+
+static void test_shared_ro_fast_pin(int fd, size_t size)
+{
+ do_test(fd, size, TEST_TYPE_RO_FAST, true);
+}
+
+static void test_private_rw_pin(int fd, size_t size)
+{
+ do_test(fd, size, TEST_TYPE_RW, false);
+}
+
+static void test_private_rw_fast_pin(int fd, size_t size)
+{
+ do_test(fd, size, TEST_TYPE_RW_FAST, false);
+}
+
+static void test_private_ro_pin(int fd, size_t size)
+{
+ do_test(fd, size, TEST_TYPE_RO, false);
+}
+
+static void test_private_ro_fast_pin(int fd, size_t size)
+{
+ do_test(fd, size, TEST_TYPE_RO_FAST, false);
+}
+
+#ifdef LOCAL_CONFIG_HAVE_LIBURING
+static void test_shared_iouring(int fd, size_t size)
+{
+ do_test(fd, size, TEST_TYPE_IOURING, true);
+}
+
+static void test_private_iouring(int fd, size_t size)
+{
+ do_test(fd, size, TEST_TYPE_IOURING, false);
+}
+#endif /* LOCAL_CONFIG_HAVE_LIBURING */
+
+static const struct test_case test_cases[] = {
+ {
+ "R/W longterm GUP pin in MAP_SHARED file mapping",
+ test_shared_rw_pin,
+ },
+ {
+ "R/W longterm GUP-fast pin in MAP_SHARED file mapping",
+ test_shared_rw_fast_pin,
+ },
+ {
+ "R/O longterm GUP pin in MAP_SHARED file mapping",
+ test_shared_ro_pin,
+ },
+ {
+ "R/O longterm GUP-fast pin in MAP_SHARED file mapping",
+ test_shared_ro_fast_pin,
+ },
+ {
+ "R/W longterm GUP pin in MAP_PRIVATE file mapping",
+ test_private_rw_pin,
+ },
+ {
+ "R/W longterm GUP-fast pin in MAP_PRIVATE file mapping",
+ test_private_rw_fast_pin,
+ },
+ {
+ "R/O longterm GUP pin in MAP_PRIVATE file mapping",
+ test_private_ro_pin,
+ },
+ {
+ "R/O longterm GUP-fast pin in MAP_PRIVATE file mapping",
+ test_private_ro_fast_pin,
+ },
+#ifdef LOCAL_CONFIG_HAVE_LIBURING
+ {
+ "io_uring fixed buffer with MAP_SHARED file mapping",
+ test_shared_iouring,
+ },
+ {
+ "io_uring fixed buffer with MAP_PRIVATE file mapping",
+ test_private_iouring,
+ },
+#endif /* LOCAL_CONFIG_HAVE_LIBURING */
+};
+
+static void run_test_case(struct test_case const *test_case)
+{
+ int i;
+
+ run_with_memfd(test_case->fn, test_case->desc);
+ run_with_tmpfile(test_case->fn, test_case->desc);
+ run_with_local_tmpfile(test_case->fn, test_case->desc);
+ for (i = 0; i < nr_hugetlbsizes; i++)
+ run_with_memfd_hugetlb(test_case->fn, test_case->desc,
+ hugetlbsizes[i]);
+}
+
+static int tests_per_test_case(void)
+{
+ return 3 + nr_hugetlbsizes;
+}
+
+int main(int argc, char **argv)
+{
+ int i, err;
+
+ pagesize = getpagesize();
+ nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes,
+ ARRAY_SIZE(hugetlbsizes));
+
+ ksft_print_header();
+ ksft_set_plan(ARRAY_SIZE(test_cases) * tests_per_test_case());
+
+ gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR);
+
+ for (i = 0; i < ARRAY_SIZE(test_cases); i++)
+ run_test_case(&test_cases[i]);
+
+ err = ksft_get_fail_cnt();
+ if (err)
+ ksft_exit_fail_msg("%d out of %d tests failed\n",
+ err, ksft_test_num());
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/mm/hmm-tests.c b/tools/testing/selftests/mm/hmm-tests.c
index 4adaad1b822f..20294553a5dd 100644
--- a/tools/testing/selftests/mm/hmm-tests.c
+++ b/tools/testing/selftests/mm/hmm-tests.c
@@ -57,9 +57,14 @@ enum {
#define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1)))
/* Just the flags we need, copied from mm.h: */
+
+#ifndef FOLL_WRITE
#define FOLL_WRITE 0x01 /* check pte is writable */
-#define FOLL_LONGTERM 0x10000 /* mapping lifetime is indefinite */
+#endif
+#ifndef FOLL_LONGTERM
+#define FOLL_LONGTERM 0x100 /* mapping lifetime is indefinite */
+#endif
FIXTURE(hmm)
{
int fd;
diff --git a/tools/testing/selftests/mm/hugepage-shm.c b/tools/testing/selftests/mm/hugepage-shm.c
index e2527f32005b..478bb1e989e9 100644
--- a/tools/testing/selftests/mm/hugepage-shm.c
+++ b/tools/testing/selftests/mm/hugepage-shm.c
@@ -35,10 +35,6 @@
#include <sys/shm.h>
#include <sys/mman.h>
-#ifndef SHM_HUGETLB
-#define SHM_HUGETLB 04000
-#endif
-
#define LENGTH (256UL*1024*1024)
#define dprintf(x) printf(x)
diff --git a/tools/testing/selftests/mm/hugepage-vmemmap.c b/tools/testing/selftests/mm/hugepage-vmemmap.c
index 557bdbd4f87e..5b354c209e93 100644
--- a/tools/testing/selftests/mm/hugepage-vmemmap.c
+++ b/tools/testing/selftests/mm/hugepage-vmemmap.c
@@ -13,10 +13,6 @@
#define MAP_LENGTH (2UL * 1024 * 1024)
-#ifndef MAP_HUGETLB
-#define MAP_HUGETLB 0x40000 /* arch specific */
-#endif
-
#define PAGE_SIZE 4096
#define PAGE_COMPOUND_HEAD (1UL << 15)
diff --git a/tools/testing/selftests/mm/hugetlb-madvise.c b/tools/testing/selftests/mm/hugetlb-madvise.c
index 28426e30d9bc..d55322df4b73 100644
--- a/tools/testing/selftests/mm/hugetlb-madvise.c
+++ b/tools/testing/selftests/mm/hugetlb-madvise.c
@@ -65,11 +65,15 @@ void write_fault_pages(void *addr, unsigned long nr_pages)
void read_fault_pages(void *addr, unsigned long nr_pages)
{
- unsigned long dummy = 0;
+ volatile unsigned long dummy = 0;
unsigned long i;
- for (i = 0; i < nr_pages; i++)
+ for (i = 0; i < nr_pages; i++) {
dummy += *((unsigned long *)(addr + (i * huge_page_size)));
+
+ /* Prevent the compiler from optimizing out the entire loop: */
+ asm volatile("" : "+r" (dummy));
+ }
}
int main(int argc, char **argv)
diff --git a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh
index bf2d2a684edf..bf2d2a684edf 100644..100755
--- a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh
+++ b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh
diff --git a/tools/testing/selftests/mm/khugepaged.c b/tools/testing/selftests/mm/khugepaged.c
index 97adc0f34f9c..030667cb5533 100644
--- a/tools/testing/selftests/mm/khugepaged.c
+++ b/tools/testing/selftests/mm/khugepaged.c
@@ -11,6 +11,7 @@
#include <string.h>
#include <unistd.h>
+#include <linux/mman.h>
#include <sys/mman.h>
#include <sys/wait.h>
#include <sys/types.h>
@@ -22,16 +23,6 @@
#include "vm_util.h"
-#ifndef MADV_PAGEOUT
-#define MADV_PAGEOUT 21
-#endif
-#ifndef MADV_POPULATE_READ
-#define MADV_POPULATE_READ 22
-#endif
-#ifndef MADV_COLLAPSE
-#define MADV_COLLAPSE 25
-#endif
-
#define BASE_ADDR ((void *)(1UL << 30))
static unsigned long hpage_pmd_size;
static unsigned long page_size;
diff --git a/tools/testing/selftests/mm/ksm_tests.c b/tools/testing/selftests/mm/ksm_tests.c
index 435acebdc325..380b691d3eb9 100644
--- a/tools/testing/selftests/mm/ksm_tests.c
+++ b/tools/testing/selftests/mm/ksm_tests.c
@@ -831,6 +831,7 @@ int main(int argc, char *argv[])
printf("Size must be greater than 0\n");
return KSFT_FAIL;
}
+ break;
case 't':
{
int tmp = atoi(optarg);
diff --git a/tools/testing/selftests/mm/madv_populate.c b/tools/testing/selftests/mm/madv_populate.c
index 262eae6b58f2..60547245e479 100644
--- a/tools/testing/selftests/mm/madv_populate.c
+++ b/tools/testing/selftests/mm/madv_populate.c
@@ -20,13 +20,6 @@
#include "../kselftest.h"
#include "vm_util.h"
-#ifndef MADV_POPULATE_READ
-#define MADV_POPULATE_READ 22
-#endif /* MADV_POPULATE_READ */
-#ifndef MADV_POPULATE_WRITE
-#define MADV_POPULATE_WRITE 23
-#endif /* MADV_POPULATE_WRITE */
-
/*
* For now, we're using 2 MiB of private anonymous memory for all tests.
*/
diff --git a/tools/testing/selftests/mm/map_fixed_noreplace.c b/tools/testing/selftests/mm/map_fixed_noreplace.c
index eed44322d1a6..598159f3df1f 100644
--- a/tools/testing/selftests/mm/map_fixed_noreplace.c
+++ b/tools/testing/selftests/mm/map_fixed_noreplace.c
@@ -13,10 +13,6 @@
#include <stdlib.h>
#include <unistd.h>
-#ifndef MAP_FIXED_NOREPLACE
-#define MAP_FIXED_NOREPLACE 0x100000
-#endif
-
static void dump_maps(void)
{
char cmd[32];
diff --git a/tools/testing/selftests/mm/map_hugetlb.c b/tools/testing/selftests/mm/map_hugetlb.c
index 312889edb84a..193281560b61 100644
--- a/tools/testing/selftests/mm/map_hugetlb.c
+++ b/tools/testing/selftests/mm/map_hugetlb.c
@@ -19,18 +19,6 @@
#define LENGTH (256UL*1024*1024)
#define PROTECTION (PROT_READ | PROT_WRITE)
-#ifndef MAP_HUGETLB
-#define MAP_HUGETLB 0x40000 /* arch specific */
-#endif
-
-#ifndef MAP_HUGE_SHIFT
-#define MAP_HUGE_SHIFT 26
-#endif
-
-#ifndef MAP_HUGE_MASK
-#define MAP_HUGE_MASK 0x3f
-#endif
-
/* Only ia64 requires this */
#ifdef __ia64__
#define ADDR (void *)(0x8000000000000000UL)
diff --git a/tools/testing/selftests/mm/map_populate.c b/tools/testing/selftests/mm/map_populate.c
index 6b8aeaa0bf7a..240f2d9dae7a 100644
--- a/tools/testing/selftests/mm/map_populate.c
+++ b/tools/testing/selftests/mm/map_populate.c
@@ -17,9 +17,7 @@
#include <string.h>
#include <unistd.h>
-#ifndef MMAP_SZ
#define MMAP_SZ 4096
-#endif
#define BUG_ON(condition, description) \
do { \
diff --git a/tools/testing/selftests/mm/migration.c b/tools/testing/selftests/mm/migration.c
index 1cec8425e3ca..379581567f27 100644
--- a/tools/testing/selftests/mm/migration.c
+++ b/tools/testing/selftests/mm/migration.c
@@ -95,12 +95,15 @@ int migrate(uint64_t *ptr, int n1, int n2)
void *access_mem(void *ptr)
{
- uint64_t y = 0;
+ volatile uint64_t y = 0;
volatile uint64_t *x = ptr;
while (1) {
pthread_testcancel();
y += *x;
+
+ /* Prevent the compiler from optimizing out the writes to y: */
+ asm volatile("" : "+r" (y));
}
return NULL;
diff --git a/tools/testing/selftests/mm/mkdirty.c b/tools/testing/selftests/mm/mkdirty.c
index 6d71d972997b..301abb99e027 100644
--- a/tools/testing/selftests/mm/mkdirty.c
+++ b/tools/testing/selftests/mm/mkdirty.c
@@ -321,8 +321,8 @@ close_uffd:
munmap:
munmap(dst, pagesize);
free(src);
-#endif /* __NR_userfaultfd */
}
+#endif /* __NR_userfaultfd */
int main(void)
{
diff --git a/tools/testing/selftests/mm/mlock-random-test.c b/tools/testing/selftests/mm/mlock-random-test.c
index 782ea94dee2f..1fba77df7f62 100644
--- a/tools/testing/selftests/mm/mlock-random-test.c
+++ b/tools/testing/selftests/mm/mlock-random-test.c
@@ -7,6 +7,7 @@
#include <sys/resource.h>
#include <sys/capability.h>
#include <sys/mman.h>
+#include <linux/mman.h>
#include <fcntl.h>
#include <string.h>
#include <sys/ipc.h>
diff --git a/tools/testing/selftests/mm/mlock2-tests.c b/tools/testing/selftests/mm/mlock2-tests.c
index 11b2301f3aa3..80cddc0de206 100644
--- a/tools/testing/selftests/mm/mlock2-tests.c
+++ b/tools/testing/selftests/mm/mlock2-tests.c
@@ -50,7 +50,6 @@ static int get_vm_area(unsigned long addr, struct vm_boundaries *area)
printf("cannot parse /proc/self/maps\n");
goto out;
}
- stop = '\0';
sscanf(line, "%lx", &start);
sscanf(end_addr, "%lx", &end);
diff --git a/tools/testing/selftests/mm/mlock2.h b/tools/testing/selftests/mm/mlock2.h
index 2a6e76c226bc..8e02991b313c 100644
--- a/tools/testing/selftests/mm/mlock2.h
+++ b/tools/testing/selftests/mm/mlock2.h
@@ -4,14 +4,6 @@
#include <stdio.h>
#include <stdlib.h>
-#ifndef MLOCK_ONFAULT
-#define MLOCK_ONFAULT 1
-#endif
-
-#ifndef MCL_ONFAULT
-#define MCL_ONFAULT (MCL_FUTURE << 1)
-#endif
-
static int mlock2_(void *start, size_t len, int flags)
{
#ifdef __NR_mlock2
diff --git a/tools/testing/selftests/mm/mrelease_test.c b/tools/testing/selftests/mm/mrelease_test.c
index 37b6d33b9e84..dca21042b679 100644
--- a/tools/testing/selftests/mm/mrelease_test.c
+++ b/tools/testing/selftests/mm/mrelease_test.c
@@ -9,18 +9,10 @@
#include <stdlib.h>
#include <sys/wait.h>
#include <unistd.h>
+#include <asm-generic/unistd.h>
#include "vm_util.h"
-
#include "../kselftest.h"
-#ifndef __NR_pidfd_open
-#define __NR_pidfd_open -1
-#endif
-
-#ifndef __NR_process_mrelease
-#define __NR_process_mrelease -1
-#endif
-
#define MB(x) (x << 20)
#define MAX_SIZE_MB 1024
diff --git a/tools/testing/selftests/mm/mremap_dontunmap.c b/tools/testing/selftests/mm/mremap_dontunmap.c
index f01dc4a85b0b..ca2359835e75 100644
--- a/tools/testing/selftests/mm/mremap_dontunmap.c
+++ b/tools/testing/selftests/mm/mremap_dontunmap.c
@@ -15,10 +15,6 @@
#include "../kselftest.h"
-#ifndef MREMAP_DONTUNMAP
-#define MREMAP_DONTUNMAP 4
-#endif
-
unsigned long page_size;
char *page_buffer;
diff --git a/tools/testing/selftests/mm/on-fault-limit.c b/tools/testing/selftests/mm/on-fault-limit.c
index 634d87dfb2a4..b5888d613f34 100644
--- a/tools/testing/selftests/mm/on-fault-limit.c
+++ b/tools/testing/selftests/mm/on-fault-limit.c
@@ -6,10 +6,6 @@
#include <sys/time.h>
#include <sys/resource.h>
-#ifndef MCL_ONFAULT
-#define MCL_ONFAULT (MCL_FUTURE << 1)
-#endif
-
static int test_limit(void)
{
int ret = 1;
diff --git a/tools/testing/selftests/mm/pkey-powerpc.h b/tools/testing/selftests/mm/pkey-powerpc.h
index 1ebb586b2fbc..ae5df26104e5 100644
--- a/tools/testing/selftests/mm/pkey-powerpc.h
+++ b/tools/testing/selftests/mm/pkey-powerpc.h
@@ -3,9 +3,6 @@
#ifndef _PKEYS_POWERPC_H
#define _PKEYS_POWERPC_H
-#ifndef SYS_mprotect_key
-# define SYS_mprotect_key 386
-#endif
#ifndef SYS_pkey_alloc
# define SYS_pkey_alloc 384
# define SYS_pkey_free 385
diff --git a/tools/testing/selftests/mm/pkey-x86.h b/tools/testing/selftests/mm/pkey-x86.h
index 72c14cd3ddc7..814758e109c0 100644
--- a/tools/testing/selftests/mm/pkey-x86.h
+++ b/tools/testing/selftests/mm/pkey-x86.h
@@ -5,29 +5,11 @@
#ifdef __i386__
-#ifndef SYS_mprotect_key
-# define SYS_mprotect_key 380
-#endif
-
-#ifndef SYS_pkey_alloc
-# define SYS_pkey_alloc 381
-# define SYS_pkey_free 382
-#endif
-
#define REG_IP_IDX REG_EIP
#define si_pkey_offset 0x14
#else
-#ifndef SYS_mprotect_key
-# define SYS_mprotect_key 329
-#endif
-
-#ifndef SYS_pkey_alloc
-# define SYS_pkey_alloc 330
-# define SYS_pkey_free 331
-#endif
-
#define REG_IP_IDX REG_RIP
#define si_pkey_offset 0x20
@@ -132,7 +114,7 @@ int pkey_reg_xstate_offset(void)
unsigned int ecx;
unsigned int edx;
int xstate_offset;
- int xstate_size;
+ int xstate_size = 0;
unsigned long XSTATE_CPUID = 0xd;
int leaf;
diff --git a/tools/testing/selftests/mm/protection_keys.c b/tools/testing/selftests/mm/protection_keys.c
index 0381c34fdd56..48dc151f8fca 100644
--- a/tools/testing/selftests/mm/protection_keys.c
+++ b/tools/testing/selftests/mm/protection_keys.c
@@ -294,15 +294,6 @@ void pkey_access_deny(int pkey)
pkey_disable_set(pkey, PKEY_DISABLE_ACCESS);
}
-/* Failed address bound checks: */
-#ifndef SEGV_BNDERR
-# define SEGV_BNDERR 3
-#endif
-
-#ifndef SEGV_PKUERR
-# define SEGV_PKUERR 4
-#endif
-
static char *si_code_str(int si_code)
{
if (si_code == SEGV_MAPERR)
@@ -476,7 +467,7 @@ int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
ptr, size, orig_prot, pkey);
errno = 0;
- sret = syscall(SYS_mprotect_key, ptr, size, orig_prot, pkey);
+ sret = syscall(__NR_pkey_mprotect, ptr, size, orig_prot, pkey);
if (errno) {
dprintf2("SYS_mprotect_key sret: %d\n", sret);
dprintf2("SYS_mprotect_key prot: 0x%lx\n", orig_prot);
@@ -1684,7 +1675,7 @@ void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey)
return;
}
- sret = syscall(SYS_mprotect_key, ptr, size, PROT_READ, pkey);
+ sret = syscall(__NR_pkey_mprotect, ptr, size, PROT_READ, pkey);
pkey_assert(sret < 0);
}
diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh
index 4893eb60d96d..3f26f6e15b2a 100644..100755
--- a/tools/testing/selftests/mm/run_vmtests.sh
+++ b/tools/testing/selftests/mm/run_vmtests.sh
@@ -24,7 +24,7 @@ separated by spaces:
- mmap
tests for mmap(2)
- gup_test
- tests for gup using gup_test interface
+ tests for gup
- userfaultfd
tests for userfaultfd(2)
- compaction
@@ -196,6 +196,8 @@ CATEGORY="gup_test" run_test ./gup_test -a
# Dump pages 0, 19, and 4096, using pin_user_pages:
CATEGORY="gup_test" run_test ./gup_test -ct -F 0x1 0 19 0x1000
+CATEGORY="gup_test" run_test ./gup_longterm
+
CATEGORY="userfaultfd" run_test ./uffd-unit-tests
uffd_stress_bin=./uffd-stress
CATEGORY="userfaultfd" run_test ${uffd_stress_bin} anon 20 16
@@ -242,18 +244,18 @@ if [ $VADDR64 -ne 0 ]; then
if [ "$ARCH" == "$ARCH_ARM64" ]; then
echo 6 > /proc/sys/vm/nr_hugepages
fi
- CATEGORY="hugevm" run_test ./va_high_addr_switch.sh
+ CATEGORY="hugevm" run_test bash ./va_high_addr_switch.sh
if [ "$ARCH" == "$ARCH_ARM64" ]; then
echo $prev_nr_hugepages > /proc/sys/vm/nr_hugepages
fi
fi # VADDR64
# vmalloc stability smoke test
-CATEGORY="vmalloc" run_test ./test_vmalloc.sh smoke
+CATEGORY="vmalloc" run_test bash ./test_vmalloc.sh smoke
CATEGORY="mremap" run_test ./mremap_dontunmap
-CATEGORY="hmm" run_test ./test_hmm.sh smoke
+CATEGORY="hmm" run_test bash ./test_hmm.sh smoke
# MADV_POPULATE_READ and MADV_POPULATE_WRITE tests
CATEGORY="madv_populate" run_test ./madv_populate
diff --git a/tools/testing/selftests/mm/test_hmm.sh b/tools/testing/selftests/mm/test_hmm.sh
index 46e19b5d648d..46e19b5d648d 100644..100755
--- a/tools/testing/selftests/mm/test_hmm.sh
+++ b/tools/testing/selftests/mm/test_hmm.sh
diff --git a/tools/testing/selftests/mm/test_vmalloc.sh b/tools/testing/selftests/mm/test_vmalloc.sh
index d73b846736f1..d73b846736f1 100644..100755
--- a/tools/testing/selftests/mm/test_vmalloc.sh
+++ b/tools/testing/selftests/mm/test_vmalloc.sh
diff --git a/tools/testing/selftests/mm/uffd-common.c b/tools/testing/selftests/mm/uffd-common.c
index 61c6250adf93..ba20d7504022 100644
--- a/tools/testing/selftests/mm/uffd-common.c
+++ b/tools/testing/selftests/mm/uffd-common.c
@@ -616,3 +616,62 @@ int copy_page(int ufd, unsigned long offset, bool wp)
{
return __copy_page(ufd, offset, false, wp);
}
+
+int uffd_open_dev(unsigned int flags)
+{
+ int fd, uffd;
+
+ fd = open("/dev/userfaultfd", O_RDWR | O_CLOEXEC);
+ if (fd < 0)
+ return fd;
+ uffd = ioctl(fd, USERFAULTFD_IOC_NEW, flags);
+ close(fd);
+
+ return uffd;
+}
+
+int uffd_open_sys(unsigned int flags)
+{
+#ifdef __NR_userfaultfd
+ return syscall(__NR_userfaultfd, flags);
+#else
+ return -1;
+#endif
+}
+
+int uffd_open(unsigned int flags)
+{
+ int uffd = uffd_open_sys(flags);
+
+ if (uffd < 0)
+ uffd = uffd_open_dev(flags);
+
+ return uffd;
+}
+
+int uffd_get_features(uint64_t *features)
+{
+ struct uffdio_api uffdio_api = { .api = UFFD_API, .features = 0 };
+ /*
+ * This should by default work in most kernels; the feature list
+ * will be the same no matter what we pass in here.
+ */
+ int fd = uffd_open(UFFD_USER_MODE_ONLY);
+
+ if (fd < 0)
+ /* Maybe the kernel is older than user-only mode? */
+ fd = uffd_open(0);
+
+ if (fd < 0)
+ return fd;
+
+ if (ioctl(fd, UFFDIO_API, &uffdio_api)) {
+ close(fd);
+ return -errno;
+ }
+
+ *features = uffdio_api.features;
+ close(fd);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/mm/uffd-common.h b/tools/testing/selftests/mm/uffd-common.h
index 6068f2346b86..197f5262fe0d 100644
--- a/tools/testing/selftests/mm/uffd-common.h
+++ b/tools/testing/selftests/mm/uffd-common.h
@@ -110,6 +110,11 @@ int __copy_page(int ufd, unsigned long offset, bool retry, bool wp);
int copy_page(int ufd, unsigned long offset, bool wp);
void *uffd_poll_thread(void *arg);
+int uffd_open_dev(unsigned int flags);
+int uffd_open_sys(unsigned int flags);
+int uffd_open(unsigned int flags);
+int uffd_get_features(uint64_t *features);
+
#define TEST_ANON 1
#define TEST_HUGETLB 2
#define TEST_SHMEM 3
diff --git a/tools/testing/selftests/mm/uffd-stress.c b/tools/testing/selftests/mm/uffd-stress.c
index f1ad9eef1c3a..995ff13e74c7 100644
--- a/tools/testing/selftests/mm/uffd-stress.c
+++ b/tools/testing/selftests/mm/uffd-stress.c
@@ -88,16 +88,6 @@ static void uffd_stats_reset(struct uffd_args *args, unsigned long n_cpus)
}
}
-static inline uint64_t uffd_minor_feature(void)
-{
- if (test_type == TEST_HUGETLB && map_shared)
- return UFFD_FEATURE_MINOR_HUGETLBFS;
- else if (test_type == TEST_SHMEM)
- return UFFD_FEATURE_MINOR_SHMEM;
- else
- return 0;
-}
-
static void *locking_thread(void *arg)
{
unsigned long cpu = (unsigned long) arg;
diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c
index 269c86768a02..04d91f144d1c 100644
--- a/tools/testing/selftests/mm/uffd-unit-tests.c
+++ b/tools/testing/selftests/mm/uffd-unit-tests.c
@@ -109,12 +109,11 @@ static void uffd_test_pass(void)
ksft_inc_fail_cnt(); \
} while (0)
-#define uffd_test_skip(...) do { \
- printf("skipped [reason: "); \
- printf(__VA_ARGS__); \
- printf("]\n"); \
- ksft_inc_xskip_cnt(); \
- } while (0)
+static void uffd_test_skip(const char *message)
+{
+ printf("skipped [reason: %s]\n", message);
+ ksft_inc_xskip_cnt();
+}
/*
* Returns 1 if specific userfaultfd supported, 0 otherwise. Note, we'll
@@ -1149,7 +1148,6 @@ int main(int argc, char *argv[])
uffd_test_case_t *test;
mem_type_t *mem_type;
uffd_test_args_t args;
- char test_name[128];
const char *errmsg;
int has_uffd, opt;
int i, j;
@@ -1192,10 +1190,8 @@ int main(int argc, char *argv[])
mem_type = &mem_types[j];
if (!(test->mem_targets & mem_type->mem_flag))
continue;
- snprintf(test_name, sizeof(test_name),
- "%s on %s", test->name, mem_type->name);
- uffd_test_start(test_name);
+ uffd_test_start("%s on %s", test->name, mem_type->name);
if (!uffd_feature_supported(test)) {
uffd_test_skip("feature missing");
continue;
diff --git a/tools/testing/selftests/mm/va_high_addr_switch.sh b/tools/testing/selftests/mm/va_high_addr_switch.sh
index 45cae7cab27e..45cae7cab27e 100644..100755
--- a/tools/testing/selftests/mm/va_high_addr_switch.sh
+++ b/tools/testing/selftests/mm/va_high_addr_switch.sh
diff --git a/tools/testing/selftests/mm/vm_util.c b/tools/testing/selftests/mm/vm_util.c
index 9b06a5034808..558c9cd8901c 100644
--- a/tools/testing/selftests/mm/vm_util.c
+++ b/tools/testing/selftests/mm/vm_util.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <string.h>
#include <fcntl.h>
+#include <dirent.h>
#include <sys/ioctl.h>
#include <linux/userfaultfd.h>
#include <sys/syscall.h>
@@ -198,6 +199,32 @@ unsigned long default_huge_page_size(void)
return hps;
}
+int detect_hugetlb_page_sizes(size_t sizes[], int max)
+{
+ DIR *dir = opendir("/sys/kernel/mm/hugepages/");
+ int count = 0;
+
+ if (!dir)
+ return 0;
+
+ while (count < max) {
+ struct dirent *entry = readdir(dir);
+ size_t kb;
+
+ if (!entry)
+ break;
+ if (entry->d_type != DT_DIR)
+ continue;
+ if (sscanf(entry->d_name, "hugepages-%zukB", &kb) != 1)
+ continue;
+ sizes[count++] = kb * 1024;
+ ksft_print_msg("[INFO] detected hugetlb page size: %zu KiB\n",
+ kb);
+ }
+ closedir(dir);
+ return count;
+}
+
/* If `ioctls' non-NULL, the allowed ioctls will be returned into the var */
int uffd_register_with_ioctls(int uffd, void *addr, uint64_t len,
bool miss, bool wp, bool minor, uint64_t *ioctls)
@@ -242,62 +269,3 @@ int uffd_unregister(int uffd, void *addr, uint64_t len)
return ret;
}
-
-int uffd_open_dev(unsigned int flags)
-{
- int fd, uffd;
-
- fd = open("/dev/userfaultfd", O_RDWR | O_CLOEXEC);
- if (fd < 0)
- return fd;
- uffd = ioctl(fd, USERFAULTFD_IOC_NEW, flags);
- close(fd);
-
- return uffd;
-}
-
-int uffd_open_sys(unsigned int flags)
-{
-#ifdef __NR_userfaultfd
- return syscall(__NR_userfaultfd, flags);
-#else
- return -1;
-#endif
-}
-
-int uffd_open(unsigned int flags)
-{
- int uffd = uffd_open_sys(flags);
-
- if (uffd < 0)
- uffd = uffd_open_dev(flags);
-
- return uffd;
-}
-
-int uffd_get_features(uint64_t *features)
-{
- struct uffdio_api uffdio_api = { .api = UFFD_API, .features = 0 };
- /*
- * This should by default work in most kernels; the feature list
- * will be the same no matter what we pass in here.
- */
- int fd = uffd_open(UFFD_USER_MODE_ONLY);
-
- if (fd < 0)
- /* Maybe the kernel is older than user-only mode? */
- fd = uffd_open(0);
-
- if (fd < 0)
- return fd;
-
- if (ioctl(fd, UFFDIO_API, &uffdio_api)) {
- close(fd);
- return -errno;
- }
-
- *features = uffdio_api.features;
- close(fd);
-
- return 0;
-}
diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h
index b950bd16083a..c7fa61f0dff8 100644
--- a/tools/testing/selftests/mm/vm_util.h
+++ b/tools/testing/selftests/mm/vm_util.h
@@ -44,14 +44,11 @@ bool check_huge_file(void *addr, int nr_hpages, uint64_t hpage_size);
bool check_huge_shmem(void *addr, int nr_hpages, uint64_t hpage_size);
int64_t allocate_transhuge(void *ptr, int pagemap_fd);
unsigned long default_huge_page_size(void);
+int detect_hugetlb_page_sizes(size_t sizes[], int max);
int uffd_register(int uffd, void *addr, uint64_t len,
bool miss, bool wp, bool minor);
int uffd_unregister(int uffd, void *addr, uint64_t len);
-int uffd_open_dev(unsigned int flags);
-int uffd_open_sys(unsigned int flags);
-int uffd_open(unsigned int flags);
-int uffd_get_features(uint64_t *features);
int uffd_register_with_ioctls(int uffd, void *addr, uint64_t len,
bool miss, bool wp, bool minor, uint64_t *ioctls);
diff --git a/tools/testing/selftests/mm/write_hugetlb_memory.sh b/tools/testing/selftests/mm/write_hugetlb_memory.sh
index 70a02301f4c2..70a02301f4c2 100644..100755
--- a/tools/testing/selftests/mm/write_hugetlb_memory.sh
+++ b/tools/testing/selftests/mm/write_hugetlb_memory.sh
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index f27a7338b60e..2f9d378edec3 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -15,6 +15,7 @@ ip_local_port_range
ipsec
ipv6_flowlabel
ipv6_flowlabel_mgr
+log.txt
msg_zerocopy
nettest
psock_fanout
@@ -29,6 +30,7 @@ reuseport_bpf_numa
reuseport_dualstack
rxtimestamp
sctp_hello
+scm_pidfd
sk_bind_sendto_listen
sk_connect_zero_addr
socket
@@ -44,6 +46,7 @@ test_unix_oob
timestamping
tls
toeplitz
+tools
tun
txring_overwrite
txtimestamp
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index c12df57d5539..7f3ab2a93ed6 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -84,6 +84,7 @@ TEST_GEN_FILES += ip_local_port_range
TEST_GEN_FILES += bind_wildcard
TEST_PROGS += test_vxlan_mdb.sh
TEST_PROGS += test_bridge_neigh_suppress.sh
+TEST_PROGS += test_vxlan_nolocalbypass.sh
TEST_FILES := settings
diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile
index 1e4b397cece6..221c387a7d7f 100644
--- a/tools/testing/selftests/net/af_unix/Makefile
+++ b/tools/testing/selftests/net/af_unix/Makefile
@@ -1,3 +1,4 @@
-TEST_GEN_PROGS := diag_uid test_unix_oob unix_connect
+CFLAGS += $(KHDR_INCLUDES)
+TEST_GEN_PROGS := diag_uid test_unix_oob unix_connect scm_pidfd
include ../../lib.mk
diff --git a/tools/testing/selftests/net/af_unix/scm_pidfd.c b/tools/testing/selftests/net/af_unix/scm_pidfd.c
new file mode 100644
index 000000000000..a86222143d79
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/scm_pidfd.c
@@ -0,0 +1,430 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+#define _GNU_SOURCE
+#include <error.h>
+#include <limits.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/socket.h>
+#include <linux/socket.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/un.h>
+#include <sys/signal.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#include "../../kselftest_harness.h"
+
+#define clean_errno() (errno == 0 ? "None" : strerror(errno))
+#define log_err(MSG, ...) \
+ fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", __FILE__, __LINE__, \
+ clean_errno(), ##__VA_ARGS__)
+
+#ifndef SCM_PIDFD
+#define SCM_PIDFD 0x04
+#endif
+
+static void child_die()
+{
+ exit(1);
+}
+
+static int safe_int(const char *numstr, int *converted)
+{
+ char *err = NULL;
+ long sli;
+
+ errno = 0;
+ sli = strtol(numstr, &err, 0);
+ if (errno == ERANGE && (sli == LONG_MAX || sli == LONG_MIN))
+ return -ERANGE;
+
+ if (errno != 0 && sli == 0)
+ return -EINVAL;
+
+ if (err == numstr || *err != '\0')
+ return -EINVAL;
+
+ if (sli > INT_MAX || sli < INT_MIN)
+ return -ERANGE;
+
+ *converted = (int)sli;
+ return 0;
+}
+
+static int char_left_gc(const char *buffer, size_t len)
+{
+ size_t i;
+
+ for (i = 0; i < len; i++) {
+ if (buffer[i] == ' ' || buffer[i] == '\t')
+ continue;
+
+ return i;
+ }
+
+ return 0;
+}
+
+static int char_right_gc(const char *buffer, size_t len)
+{
+ int i;
+
+ for (i = len - 1; i >= 0; i--) {
+ if (buffer[i] == ' ' || buffer[i] == '\t' ||
+ buffer[i] == '\n' || buffer[i] == '\0')
+ continue;
+
+ return i + 1;
+ }
+
+ return 0;
+}
+
+static char *trim_whitespace_in_place(char *buffer)
+{
+ buffer += char_left_gc(buffer, strlen(buffer));
+ buffer[char_right_gc(buffer, strlen(buffer))] = '\0';
+ return buffer;
+}
+
+/* borrowed (with all helpers) from pidfd/pidfd_open_test.c */
+static pid_t get_pid_from_fdinfo_file(int pidfd, const char *key, size_t keylen)
+{
+ int ret;
+ char path[512];
+ FILE *f;
+ size_t n = 0;
+ pid_t result = -1;
+ char *line = NULL;
+
+ snprintf(path, sizeof(path), "/proc/self/fdinfo/%d", pidfd);
+
+ f = fopen(path, "re");
+ if (!f)
+ return -1;
+
+ while (getline(&line, &n, f) != -1) {
+ char *numstr;
+
+ if (strncmp(line, key, keylen))
+ continue;
+
+ numstr = trim_whitespace_in_place(line + 4);
+ ret = safe_int(numstr, &result);
+ if (ret < 0)
+ goto out;
+
+ break;
+ }
+
+out:
+ free(line);
+ fclose(f);
+ return result;
+}
+
+static int cmsg_check(int fd)
+{
+ struct msghdr msg = { 0 };
+ struct cmsghdr *cmsg;
+ struct iovec iov;
+ struct ucred *ucred = NULL;
+ int data = 0;
+ char control[CMSG_SPACE(sizeof(struct ucred)) +
+ CMSG_SPACE(sizeof(int))] = { 0 };
+ int *pidfd = NULL;
+ pid_t parent_pid;
+ int err;
+
+ iov.iov_base = &data;
+ iov.iov_len = sizeof(data);
+
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = control;
+ msg.msg_controllen = sizeof(control);
+
+ err = recvmsg(fd, &msg, 0);
+ if (err < 0) {
+ log_err("recvmsg");
+ return 1;
+ }
+
+ if (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
+ log_err("recvmsg: truncated");
+ return 1;
+ }
+
+ for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL;
+ cmsg = CMSG_NXTHDR(&msg, cmsg)) {
+ if (cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SCM_PIDFD) {
+ if (cmsg->cmsg_len < sizeof(*pidfd)) {
+ log_err("CMSG parse: SCM_PIDFD wrong len");
+ return 1;
+ }
+
+ pidfd = (void *)CMSG_DATA(cmsg);
+ }
+
+ if (cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SCM_CREDENTIALS) {
+ if (cmsg->cmsg_len < sizeof(*ucred)) {
+ log_err("CMSG parse: SCM_CREDENTIALS wrong len");
+ return 1;
+ }
+
+ ucred = (void *)CMSG_DATA(cmsg);
+ }
+ }
+
+ /* send(pfd, "x", sizeof(char), 0) */
+ if (data != 'x') {
+ log_err("recvmsg: data corruption");
+ return 1;
+ }
+
+ if (!pidfd) {
+ log_err("CMSG parse: SCM_PIDFD not found");
+ return 1;
+ }
+
+ if (!ucred) {
+ log_err("CMSG parse: SCM_CREDENTIALS not found");
+ return 1;
+ }
+
+ /* pidfd from SCM_PIDFD should point to the parent process PID */
+ parent_pid =
+ get_pid_from_fdinfo_file(*pidfd, "Pid:", sizeof("Pid:") - 1);
+ if (parent_pid != getppid()) {
+ log_err("wrong SCM_PIDFD %d != %d", parent_pid, getppid());
+ return 1;
+ }
+
+ return 0;
+}
+
+struct sock_addr {
+ char sock_name[32];
+ struct sockaddr_un listen_addr;
+ socklen_t addrlen;
+};
+
+FIXTURE(scm_pidfd)
+{
+ int server;
+ pid_t client_pid;
+ int startup_pipe[2];
+ struct sock_addr server_addr;
+ struct sock_addr *client_addr;
+};
+
+FIXTURE_VARIANT(scm_pidfd)
+{
+ int type;
+ bool abstract;
+};
+
+FIXTURE_VARIANT_ADD(scm_pidfd, stream_pathname)
+{
+ .type = SOCK_STREAM,
+ .abstract = 0,
+};
+
+FIXTURE_VARIANT_ADD(scm_pidfd, stream_abstract)
+{
+ .type = SOCK_STREAM,
+ .abstract = 1,
+};
+
+FIXTURE_VARIANT_ADD(scm_pidfd, dgram_pathname)
+{
+ .type = SOCK_DGRAM,
+ .abstract = 0,
+};
+
+FIXTURE_VARIANT_ADD(scm_pidfd, dgram_abstract)
+{
+ .type = SOCK_DGRAM,
+ .abstract = 1,
+};
+
+FIXTURE_SETUP(scm_pidfd)
+{
+ self->client_addr = mmap(NULL, sizeof(*self->client_addr), PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ ASSERT_NE(MAP_FAILED, self->client_addr);
+}
+
+FIXTURE_TEARDOWN(scm_pidfd)
+{
+ close(self->server);
+
+ kill(self->client_pid, SIGKILL);
+ waitpid(self->client_pid, NULL, 0);
+
+ if (!variant->abstract) {
+ unlink(self->server_addr.sock_name);
+ unlink(self->client_addr->sock_name);
+ }
+}
+
+static void fill_sockaddr(struct sock_addr *addr, bool abstract)
+{
+ char *sun_path_buf = (char *)&addr->listen_addr.sun_path;
+
+ addr->listen_addr.sun_family = AF_UNIX;
+ addr->addrlen = offsetof(struct sockaddr_un, sun_path);
+ snprintf(addr->sock_name, sizeof(addr->sock_name), "scm_pidfd_%d", getpid());
+ addr->addrlen += strlen(addr->sock_name);
+ if (abstract) {
+ *sun_path_buf = '\0';
+ addr->addrlen++;
+ sun_path_buf++;
+ } else {
+ unlink(addr->sock_name);
+ }
+ memcpy(sun_path_buf, addr->sock_name, strlen(addr->sock_name));
+}
+
+static void client(FIXTURE_DATA(scm_pidfd) *self,
+ const FIXTURE_VARIANT(scm_pidfd) *variant)
+{
+ int err;
+ int cfd;
+ socklen_t len;
+ struct ucred peer_cred;
+ int peer_pidfd;
+ pid_t peer_pid;
+ int on = 0;
+
+ cfd = socket(AF_UNIX, variant->type, 0);
+ if (cfd < 0) {
+ log_err("socket");
+ child_die();
+ }
+
+ if (variant->type == SOCK_DGRAM) {
+ fill_sockaddr(self->client_addr, variant->abstract);
+
+ if (bind(cfd, (struct sockaddr *)&self->client_addr->listen_addr, self->client_addr->addrlen)) {
+ log_err("bind");
+ child_die();
+ }
+ }
+
+ if (connect(cfd, (struct sockaddr *)&self->server_addr.listen_addr,
+ self->server_addr.addrlen) != 0) {
+ log_err("connect");
+ child_die();
+ }
+
+ on = 1;
+ if (setsockopt(cfd, SOL_SOCKET, SO_PASSCRED, &on, sizeof(on))) {
+ log_err("Failed to set SO_PASSCRED");
+ child_die();
+ }
+
+ if (setsockopt(cfd, SOL_SOCKET, SO_PASSPIDFD, &on, sizeof(on))) {
+ log_err("Failed to set SO_PASSPIDFD");
+ child_die();
+ }
+
+ close(self->startup_pipe[1]);
+
+ if (cmsg_check(cfd)) {
+ log_err("cmsg_check failed");
+ child_die();
+ }
+
+ /* skip further for SOCK_DGRAM as it's not applicable */
+ if (variant->type == SOCK_DGRAM)
+ return;
+
+ len = sizeof(peer_cred);
+ if (getsockopt(cfd, SOL_SOCKET, SO_PEERCRED, &peer_cred, &len)) {
+ log_err("Failed to get SO_PEERCRED");
+ child_die();
+ }
+
+ len = sizeof(peer_pidfd);
+ if (getsockopt(cfd, SOL_SOCKET, SO_PEERPIDFD, &peer_pidfd, &len)) {
+ log_err("Failed to get SO_PEERPIDFD");
+ child_die();
+ }
+
+ /* pid from SO_PEERCRED should point to the parent process PID */
+ if (peer_cred.pid != getppid()) {
+ log_err("peer_cred.pid != getppid(): %d != %d", peer_cred.pid, getppid());
+ child_die();
+ }
+
+ peer_pid = get_pid_from_fdinfo_file(peer_pidfd,
+ "Pid:", sizeof("Pid:") - 1);
+ if (peer_pid != peer_cred.pid) {
+ log_err("peer_pid != peer_cred.pid: %d != %d", peer_pid, peer_cred.pid);
+ child_die();
+ }
+}
+
+TEST_F(scm_pidfd, test)
+{
+ int err;
+ int pfd;
+ int child_status = 0;
+
+ self->server = socket(AF_UNIX, variant->type, 0);
+ ASSERT_NE(-1, self->server);
+
+ fill_sockaddr(&self->server_addr, variant->abstract);
+
+ err = bind(self->server, (struct sockaddr *)&self->server_addr.listen_addr, self->server_addr.addrlen);
+ ASSERT_EQ(0, err);
+
+ if (variant->type == SOCK_STREAM) {
+ err = listen(self->server, 1);
+ ASSERT_EQ(0, err);
+ }
+
+ err = pipe(self->startup_pipe);
+ ASSERT_NE(-1, err);
+
+ self->client_pid = fork();
+ ASSERT_NE(-1, self->client_pid);
+ if (self->client_pid == 0) {
+ close(self->server);
+ close(self->startup_pipe[0]);
+ client(self, variant);
+ exit(0);
+ }
+ close(self->startup_pipe[1]);
+
+ if (variant->type == SOCK_STREAM) {
+ pfd = accept(self->server, NULL, NULL);
+ ASSERT_NE(-1, pfd);
+ } else {
+ pfd = self->server;
+ }
+
+ /* wait until the child arrives at checkpoint */
+ read(self->startup_pipe[0], &err, sizeof(int));
+ close(self->startup_pipe[0]);
+
+ if (variant->type == SOCK_DGRAM) {
+ err = sendto(pfd, "x", sizeof(char), 0, (struct sockaddr *)&self->client_addr->listen_addr, self->client_addr->addrlen);
+ ASSERT_NE(-1, err);
+ } else {
+ err = send(pfd, "x", sizeof(char), 0);
+ ASSERT_NE(-1, err);
+ }
+
+ close(pfd);
+ waitpid(self->client_pid, &child_status, 0);
+ ASSERT_EQ(0, WIFEXITED(child_status) ? WEXITSTATUS(child_status) : 1);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/bind_wildcard.c b/tools/testing/selftests/net/bind_wildcard.c
index 58edfc15d28b..e7ebe72e879d 100644
--- a/tools/testing/selftests/net/bind_wildcard.c
+++ b/tools/testing/selftests/net/bind_wildcard.c
@@ -100,7 +100,7 @@ void bind_sockets(struct __test_metadata *_metadata,
TEST_F(bind_wildcard, v4_v6)
{
bind_sockets(_metadata, self,
- (struct sockaddr *)&self->addr4, sizeof(self->addr6),
+ (struct sockaddr *)&self->addr4, sizeof(self->addr4),
(struct sockaddr *)&self->addr6, sizeof(self->addr6));
}
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index d1d421ec10a3..cd3cc52c59b4 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -50,3 +50,4 @@ CONFIG_CRYPTO_SM4_GENERIC=y
CONFIG_AMT=m
CONFIG_VXLAN=m
CONFIG_IP_SCTP=m
+CONFIG_NETFILTER_XT_MATCH_POLICY=m
diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh
index ee6880ac3e5e..d32a14ba069a 100755
--- a/tools/testing/selftests/net/fcnal-test.sh
+++ b/tools/testing/selftests/net/fcnal-test.sh
@@ -592,6 +592,20 @@ ipv4_ping_novrf()
done
#
+ # out, but don't use gateway if peer is not on link
+ #
+ a=${NSB_IP}
+ log_start
+ run_cmd ping -c 1 -w 1 -r ${a}
+ log_test_addr ${a} $? 0 "ping out (don't route), peer on link"
+
+ a=${NSB_LO_IP}
+ log_start
+ show_hint "Fails since peer is not on link"
+ run_cmd ping -c 1 -w 1 -r ${a}
+ log_test_addr ${a} $? 1 "ping out (don't route), peer not on link"
+
+ #
# in
#
for a in ${NSA_IP} ${NSA_LO_IP}
@@ -1105,6 +1119,59 @@ test_ipv4_md5_vrf__global_server__bind_ifindex0()
set_sysctl net.ipv4.tcp_l3mdev_accept="$old_tcp_l3mdev_accept"
}
+ipv4_tcp_dontroute()
+{
+ local syncookies=$1
+ local nsa_syncookies
+ local nsb_syncookies
+ local a
+
+ #
+ # Link local connection tests (SO_DONTROUTE).
+ # Connections should succeed only when the remote IP address is
+ # on link (doesn't need to be routed through a gateway).
+ #
+
+ nsa_syncookies=$(ip netns exec "${NSA}" sysctl -n net.ipv4.tcp_syncookies)
+ nsb_syncookies=$(ip netns exec "${NSB}" sysctl -n net.ipv4.tcp_syncookies)
+ ip netns exec "${NSA}" sysctl -wq net.ipv4.tcp_syncookies=${syncookies}
+ ip netns exec "${NSB}" sysctl -wq net.ipv4.tcp_syncookies=${syncookies}
+
+ # Test with eth1 address (on link).
+
+ a=${NSB_IP}
+ log_start
+ do_run_cmd nettest -B -N "${NSA}" -O "${NSB}" -r ${a} --client-dontroute
+ log_test_addr ${a} $? 0 "SO_DONTROUTE client, syncookies=${syncookies}"
+
+ a=${NSB_IP}
+ log_start
+ do_run_cmd nettest -B -N "${NSA}" -O "${NSB}" -r ${a} --server-dontroute
+ log_test_addr ${a} $? 0 "SO_DONTROUTE server, syncookies=${syncookies}"
+
+ # Test with loopback address (routed).
+ #
+ # The client would use the eth1 address as source IP by default.
+ # Therefore, we need to use the -c option here, to force the use of the
+ # routed (loopback) address as source IP (so that the server will try
+ # to respond to a routed address and not a link local one).
+
+ a=${NSB_LO_IP}
+ log_start
+ show_hint "Should fail 'Network is unreachable' since server is not on link"
+ do_run_cmd nettest -B -N "${NSA}" -O "${NSB}" -c "${NSA_LO_IP}" -r ${a} --client-dontroute
+ log_test_addr ${a} $? 1 "SO_DONTROUTE client, syncookies=${syncookies}"
+
+ a=${NSB_LO_IP}
+ log_start
+ show_hint "Should timeout since server cannot respond (client is not on link)"
+ do_run_cmd nettest -B -N "${NSA}" -O "${NSB}" -c "${NSA_LO_IP}" -r ${a} --server-dontroute
+ log_test_addr ${a} $? 2 "SO_DONTROUTE server, syncookies=${syncookies}"
+
+ ip netns exec "${NSB}" sysctl -wq net.ipv4.tcp_syncookies=${nsb_syncookies}
+ ip netns exec "${NSA}" sysctl -wq net.ipv4.tcp_syncookies=${nsa_syncookies}
+}
+
ipv4_tcp_novrf()
{
local a
@@ -1224,6 +1291,9 @@ ipv4_tcp_novrf()
log_test_addr ${a} $? 1 "No server, device client, local conn"
[ "$fips_enabled" = "1" ] || ipv4_tcp_md5_novrf
+
+ ipv4_tcp_dontroute 0
+ ipv4_tcp_dontroute 2
}
ipv4_tcp_vrf()
@@ -1594,6 +1664,23 @@ ipv4_udp_novrf()
log_start
run_cmd nettest -D -d ${NSA_DEV} -r ${a}
log_test_addr ${a} $? 2 "No server, device client, local conn"
+
+ #
+ # Link local connection tests (SO_DONTROUTE).
+ # Connections should succeed only when the remote IP address is
+ # on link (doesn't need to be routed through a gateway).
+ #
+
+ a=${NSB_IP}
+ log_start
+ do_run_cmd nettest -B -D -N "${NSA}" -O "${NSB}" -r ${a} --client-dontroute
+ log_test_addr ${a} $? 0 "SO_DONTROUTE client"
+
+ a=${NSB_LO_IP}
+ log_start
+ show_hint "Should fail 'Network is unreachable' since server is not on link"
+ do_run_cmd nettest -B -D -N "${NSA}" -O "${NSB}" -r ${a} --client-dontroute
+ log_test_addr ${a} $? 1 "SO_DONTROUTE client"
}
ipv4_udp_vrf()
diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index 0f5e88c8f4ff..df8d90b51867 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -1981,6 +1981,11 @@ basic()
run_cmd "$IP link set dev lo up"
+ # Dump should not loop endlessly when maximum nexthop ID is configured.
+ run_cmd "$IP nexthop add id $((2**32-1)) blackhole"
+ run_cmd "timeout 5 $IP nexthop"
+ log_test $? 0 "Maximum nexthop ID dump"
+
#
# groups
#
@@ -2201,6 +2206,11 @@ basic_res()
run_cmd "$IP nexthop bucket list fdb"
log_test $? 255 "Dump all nexthop buckets with invalid 'fdb' keyword"
+ # Dump should not loop endlessly when maximum nexthop ID is configured.
+ run_cmd "$IP nexthop add id $((2**32-1)) group 1/2 type resilient buckets 4"
+ run_cmd "timeout 5 $IP nexthop bucket"
+ log_test $? 0 "Maximum nexthop ID dump"
+
#
# resilient nexthop buckets get requests
#
diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile
index a474c60fe348..770efbe24f0d 100644
--- a/tools/testing/selftests/net/forwarding/Makefile
+++ b/tools/testing/selftests/net/forwarding/Makefile
@@ -83,6 +83,8 @@ TEST_PROGS = bridge_igmp.sh \
tc_chains.sh \
tc_flower_router.sh \
tc_flower.sh \
+ tc_flower_l2_miss.sh \
+ tc_flower_cfm.sh \
tc_mpls_l2vpn.sh \
tc_police.sh \
tc_shblocks.sh \
diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
index ae3f9462a2b6..d0c6c499d5da 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mdb.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
@@ -617,7 +617,7 @@ __cfg_test_port_ip_sg()
grep -q "permanent"
check_err $? "Entry not added as \"permanent\" when should"
bridge -d -s mdb show dev br0 vid 10 | grep "$grp_key" | \
- grep -q "0.00"
+ grep -q " 0.00"
check_err $? "\"permanent\" entry has a pending group timer"
bridge mdb del dev br0 port $swp1 $grp_key vid 10
@@ -626,7 +626,7 @@ __cfg_test_port_ip_sg()
grep -q "temp"
check_err $? "Entry not added as \"temp\" when should"
bridge -d -s mdb show dev br0 vid 10 | grep "$grp_key" | \
- grep -q "0.00"
+ grep -q " 0.00"
check_fail $? "\"temp\" entry has an unpending group timer"
bridge mdb del dev br0 port $swp1 $grp_key vid 10
@@ -659,7 +659,7 @@ __cfg_test_port_ip_sg()
grep -q "permanent"
check_err $? "Entry not marked as \"permanent\" after replace"
bridge -d -s mdb show dev br0 vid 10 | grep "$grp_key" | \
- grep -q "0.00"
+ grep -q " 0.00"
check_err $? "Entry has a pending group timer after replace"
bridge mdb replace dev br0 port $swp1 $grp_key vid 10 temp
@@ -667,7 +667,7 @@ __cfg_test_port_ip_sg()
grep -q "temp"
check_err $? "Entry not marked as \"temp\" after replace"
bridge -d -s mdb show dev br0 vid 10 | grep "$grp_key" | \
- grep -q "0.00"
+ grep -q " 0.00"
check_fail $? "Entry has an unpending group timer after replace"
bridge mdb del dev br0 port $swp1 $grp_key vid 10
@@ -850,6 +850,7 @@ cfg_test()
__fwd_test_host_ip()
{
local grp=$1; shift
+ local dmac=$1; shift
local src=$1; shift
local mode=$1; shift
local name
@@ -872,27 +873,27 @@ __fwd_test_host_ip()
# Packet should only be flooded to multicast router ports when there is
# no matching MDB entry. The bridge is not configured as a multicast
# router port.
- $MZ $mode $h1.10 -c 1 -p 128 -A $src -B $grp -t udp -q
+ $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $src -B $grp -t udp -q
tc_check_packets "dev br0 ingress" 1 0
check_err $? "Packet locally received after flood"
# Install a regular port group entry and expect the packet to not be
# locally received.
bridge mdb add dev br0 port $swp2 grp $grp temp vid 10
- $MZ $mode $h1.10 -c 1 -p 128 -A $src -B $grp -t udp -q
+ $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $src -B $grp -t udp -q
tc_check_packets "dev br0 ingress" 1 0
check_err $? "Packet locally received after installing a regular entry"
# Add a host entry and expect the packet to be locally received.
bridge mdb add dev br0 port br0 grp $grp temp vid 10
- $MZ $mode $h1.10 -c 1 -p 128 -A $src -B $grp -t udp -q
+ $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $src -B $grp -t udp -q
tc_check_packets "dev br0 ingress" 1 1
check_err $? "Packet not locally received after adding a host entry"
# Remove the host entry and expect the packet to not be locally
# received.
bridge mdb del dev br0 port br0 grp $grp vid 10
- $MZ $mode $h1.10 -c 1 -p 128 -A $src -B $grp -t udp -q
+ $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $src -B $grp -t udp -q
tc_check_packets "dev br0 ingress" 1 1
check_err $? "Packet locally received after removing a host entry"
@@ -905,8 +906,8 @@ __fwd_test_host_ip()
fwd_test_host_ip()
{
- __fwd_test_host_ip "239.1.1.1" "192.0.2.1" "-4"
- __fwd_test_host_ip "ff0e::1" "2001:db8:1::1" "-6"
+ __fwd_test_host_ip "239.1.1.1" "01:00:5e:01:01:01" "192.0.2.1" "-4"
+ __fwd_test_host_ip "ff0e::1" "33:33:00:00:00:01" "2001:db8:1::1" "-6"
}
fwd_test_host_l2()
@@ -966,6 +967,7 @@ fwd_test_host()
__fwd_test_port_ip()
{
local grp=$1; shift
+ local dmac=$1; shift
local valid_src=$1; shift
local invalid_src=$1; shift
local mode=$1; shift
@@ -999,43 +1001,43 @@ __fwd_test_port_ip()
vlan_ethtype $eth_type vlan_id 10 dst_ip $grp \
src_ip $invalid_src action drop
- $MZ $mode $h1.10 -c 1 -p 128 -A $valid_src -B $grp -t udp -q
+ $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $valid_src -B $grp -t udp -q
tc_check_packets "dev $h2 ingress" 1 0
check_err $? "Packet from valid source received on H2 before adding entry"
- $MZ $mode $h1.10 -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
+ $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
tc_check_packets "dev $h2 ingress" 2 0
check_err $? "Packet from invalid source received on H2 before adding entry"
bridge mdb add dev br0 port $swp2 grp $grp vid 10 \
filter_mode $filter_mode source_list $src_list
- $MZ $mode $h1.10 -c 1 -p 128 -A $valid_src -B $grp -t udp -q
+ $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $valid_src -B $grp -t udp -q
tc_check_packets "dev $h2 ingress" 1 1
check_err $? "Packet from valid source not received on H2 after adding entry"
- $MZ $mode $h1.10 -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
+ $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
tc_check_packets "dev $h2 ingress" 2 0
check_err $? "Packet from invalid source received on H2 after adding entry"
bridge mdb replace dev br0 port $swp2 grp $grp vid 10 \
filter_mode exclude
- $MZ $mode $h1.10 -c 1 -p 128 -A $valid_src -B $grp -t udp -q
+ $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $valid_src -B $grp -t udp -q
tc_check_packets "dev $h2 ingress" 1 2
check_err $? "Packet from valid source not received on H2 after allowing all sources"
- $MZ $mode $h1.10 -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
+ $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
tc_check_packets "dev $h2 ingress" 2 1
check_err $? "Packet from invalid source not received on H2 after allowing all sources"
bridge mdb del dev br0 port $swp2 grp $grp vid 10
- $MZ $mode $h1.10 -c 1 -p 128 -A $valid_src -B $grp -t udp -q
+ $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $valid_src -B $grp -t udp -q
tc_check_packets "dev $h2 ingress" 1 2
check_err $? "Packet from valid source received on H2 after deleting entry"
- $MZ $mode $h1.10 -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
+ $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
tc_check_packets "dev $h2 ingress" 2 1
check_err $? "Packet from invalid source received on H2 after deleting entry"
@@ -1047,11 +1049,11 @@ __fwd_test_port_ip()
fwd_test_port_ip()
{
- __fwd_test_port_ip "239.1.1.1" "192.0.2.1" "192.0.2.2" "-4" "exclude"
- __fwd_test_port_ip "ff0e::1" "2001:db8:1::1" "2001:db8:1::2" "-6" \
+ __fwd_test_port_ip "239.1.1.1" "01:00:5e:01:01:01" "192.0.2.1" "192.0.2.2" "-4" "exclude"
+ __fwd_test_port_ip "ff0e::1" "33:33:00:00:00:01" "2001:db8:1::1" "2001:db8:1::2" "-6" \
"exclude"
- __fwd_test_port_ip "239.1.1.1" "192.0.2.1" "192.0.2.2" "-4" "include"
- __fwd_test_port_ip "ff0e::1" "2001:db8:1::1" "2001:db8:1::2" "-6" \
+ __fwd_test_port_ip "239.1.1.1" "01:00:5e:01:01:01" "192.0.2.1" "192.0.2.2" "-4" "include"
+ __fwd_test_port_ip "ff0e::1" "33:33:00:00:00:01" "2001:db8:1::1" "2001:db8:1::2" "-6" \
"include"
}
@@ -1127,7 +1129,7 @@ ctrl_igmpv3_is_in_test()
filter_mode include source_list 192.0.2.1
# IS_IN ( 192.0.2.2 )
- $MZ $h1.10 -c 1 -A 192.0.2.1 -B 239.1.1.1 \
+ $MZ $h1.10 -c 1 -a own -b 01:00:5e:01:01:01 -A 192.0.2.1 -B 239.1.1.1 \
-t ip proto=2,p=$(igmpv3_is_in_get 239.1.1.1 192.0.2.2) -q
bridge -d mdb show dev br0 vid 10 | grep 239.1.1.1 | grep -q 192.0.2.2
@@ -1140,7 +1142,7 @@ ctrl_igmpv3_is_in_test()
filter_mode include source_list 192.0.2.1
# IS_IN ( 192.0.2.2 )
- $MZ $h1.10 -c 1 -A 192.0.2.1 -B 239.1.1.1 \
+ $MZ $h1.10 -a own -b 01:00:5e:01:01:01 -c 1 -A 192.0.2.1 -B 239.1.1.1 \
-t ip proto=2,p=$(igmpv3_is_in_get 239.1.1.1 192.0.2.2) -q
bridge -d mdb show dev br0 vid 10 | grep 239.1.1.1 | grep -v "src" | \
@@ -1167,7 +1169,7 @@ ctrl_mldv2_is_in_test()
# IS_IN ( 2001:db8:1::2 )
local p=$(mldv2_is_in_get fe80::1 ff0e::1 2001:db8:1::2)
- $MZ -6 $h1.10 -c 1 -A fe80::1 -B ff0e::1 \
+ $MZ -6 $h1.10 -a own -b 33:33:00:00:00:01 -c 1 -A fe80::1 -B ff0e::1 \
-t ip hop=1,next=0,p="$p" -q
bridge -d mdb show dev br0 vid 10 | grep ff0e::1 | \
@@ -1181,7 +1183,7 @@ ctrl_mldv2_is_in_test()
filter_mode include source_list 2001:db8:1::1
# IS_IN ( 2001:db8:1::2 )
- $MZ -6 $h1.10 -c 1 -A fe80::1 -B ff0e::1 \
+ $MZ -6 $h1.10 -a own -b 33:33:00:00:00:01 -c 1 -A fe80::1 -B ff0e::1 \
-t ip hop=1,next=0,p="$p" -q
bridge -d mdb show dev br0 vid 10 | grep ff0e::1 | grep -v "src" | \
@@ -1206,6 +1208,11 @@ ctrl_test()
ctrl_mldv2_is_in_test
}
+if ! bridge mdb help 2>&1 | grep -q "replace"; then
+ echo "SKIP: iproute2 too old, missing bridge mdb replace support"
+ exit $ksft_skip
+fi
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb_max.sh b/tools/testing/selftests/net/forwarding/bridge_mdb_max.sh
index ae255b662ba3..3da9d93ab36f 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mdb_max.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mdb_max.sh
@@ -252,7 +252,8 @@ ctl4_entries_add()
local IPs=$(seq -f 192.0.2.%g 1 $((n - 1)))
local peer=$(locus_dev_peer $locus)
local GRP=239.1.1.${grp}
- $MZ $peer -c 1 -A 192.0.2.1 -B $GRP \
+ local dmac=01:00:5e:01:01:$(printf "%02x" $grp)
+ $MZ $peer -a own -b $dmac -c 1 -A 192.0.2.1 -B $GRP \
-t ip proto=2,p=$(igmpv3_is_in_get $GRP $IPs) -q
sleep 1
@@ -272,7 +273,8 @@ ctl4_entries_del()
local peer=$(locus_dev_peer $locus)
local GRP=239.1.1.${grp}
- $MZ $peer -c 1 -A 192.0.2.1 -B 224.0.0.2 \
+ local dmac=01:00:5e:00:00:02
+ $MZ $peer -a own -b $dmac -c 1 -A 192.0.2.1 -B 224.0.0.2 \
-t ip proto=2,p=$(igmpv2_leave_get $GRP) -q
sleep 1
! bridge mdb show dev br0 | grep -q $GRP
@@ -289,8 +291,10 @@ ctl6_entries_add()
local peer=$(locus_dev_peer $locus)
local SIP=fe80::1
local GRP=ff0e::${grp}
+ local dmac=33:33:00:00:00:$(printf "%02x" $grp)
local p=$(mldv2_is_in_get $SIP $GRP $IPs)
- $MZ -6 $peer -c 1 -A $SIP -B $GRP -t ip hop=1,next=0,p="$p" -q
+ $MZ -6 $peer -a own -b $dmac -c 1 -A $SIP -B $GRP \
+ -t ip hop=1,next=0,p="$p" -q
sleep 1
local nn=$(bridge mdb show dev br0 | grep $GRP | wc -l)
@@ -310,8 +314,10 @@ ctl6_entries_del()
local peer=$(locus_dev_peer $locus)
local SIP=fe80::1
local GRP=ff0e::${grp}
+ local dmac=33:33:00:00:00:$(printf "%02x" $grp)
local p=$(mldv1_done_get $SIP $GRP)
- $MZ -6 $peer -c 1 -A $SIP -B $GRP -t ip hop=1,next=0,p="$p" -q
+ $MZ -6 $peer -a own -b $dmac -c 1 -A $SIP -B $GRP \
+ -t ip hop=1,next=0,p="$p" -q
sleep 1
! bridge mdb show dev br0 | grep -q $GRP
}
@@ -1328,6 +1334,11 @@ test_8021qvs()
switch_destroy
}
+if ! bridge link help 2>&1 | grep -q "mcast_max_groups"; then
+ echo "SKIP: iproute2 too old, missing bridge \"mcast_max_groups\" support"
+ exit $ksft_skip
+fi
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/dual_vxlan_bridge.sh b/tools/testing/selftests/net/forwarding/dual_vxlan_bridge.sh
index 5148d97a5df8..68ee92df3e07 100755
--- a/tools/testing/selftests/net/forwarding/dual_vxlan_bridge.sh
+++ b/tools/testing/selftests/net/forwarding/dual_vxlan_bridge.sh
@@ -132,6 +132,7 @@ switch_create()
#### BR1 ####
ip link add name br1 type bridge vlan_filtering 1 \
vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0
+ ip link set dev br1 addrgenmode none
# Make sure the bridge uses the MAC address of the local port and not
# that of the VxLAN's device.
ip link set dev br1 address $(mac_get $swp1)
diff --git a/tools/testing/selftests/net/forwarding/ethtool.sh b/tools/testing/selftests/net/forwarding/ethtool.sh
index dbb9fcf759e0..aa2eafb7b243 100755
--- a/tools/testing/selftests/net/forwarding/ethtool.sh
+++ b/tools/testing/selftests/net/forwarding/ethtool.sh
@@ -286,6 +286,8 @@ different_speeds_autoneg_on()
ethtool -s $h1 autoneg on
}
+skip_on_veth
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh b/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh
index 072faa77f53b..17f89c3b7c02 100755
--- a/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh
+++ b/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh
@@ -108,6 +108,8 @@ no_cable()
ip link set dev $swp3 down
}
+skip_on_veth
+
setup_prepare
tests_run
diff --git a/tools/testing/selftests/net/forwarding/ethtool_mm.sh b/tools/testing/selftests/net/forwarding/ethtool_mm.sh
index c580ad623848..39e736f30322 100755
--- a/tools/testing/selftests/net/forwarding/ethtool_mm.sh
+++ b/tools/testing/selftests/net/forwarding/ethtool_mm.sh
@@ -258,11 +258,6 @@ h2_destroy()
setup_prepare()
{
- check_ethtool_mm_support
- check_tc_fp_support
- require_command lldptool
- bail_on_lldpad "autoconfigure the MAC Merge layer" "configure it manually"
-
h1=${NETIFS[p1]}
h2=${NETIFS[p2]}
@@ -278,6 +273,19 @@ cleanup()
h1_destroy
}
+check_ethtool_mm_support
+check_tc_fp_support
+require_command lldptool
+bail_on_lldpad "autoconfigure the MAC Merge layer" "configure it manually"
+
+for netif in ${NETIFS[@]}; do
+ ethtool --show-mm $netif 2>&1 &> /dev/null
+ if [[ $? -ne 0 ]]; then
+ echo "SKIP: $netif does not support MAC Merge"
+ exit $ksft_skip
+ fi
+done
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh b/tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh
index eb9ec4a68f84..7594bbb49029 100755
--- a/tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh
+++ b/tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh
@@ -99,6 +99,8 @@ test_stats_rx()
test_stats g2a rx
}
+skip_on_veth
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh b/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh
index 9f5b3e2e5e95..49fa94b53a1c 100755
--- a/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh
+++ b/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh
@@ -14,6 +14,8 @@ ALL_TESTS="
NUM_NETIFS=4
source lib.sh
+require_command $TROUTE6
+
h1_create()
{
simple_if_init $h1 2001:1:1::2/64
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 9ddb68dd6a08..f69015bf2dea 100755
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -30,6 +30,7 @@ REQUIRE_MZ=${REQUIRE_MZ:=yes}
REQUIRE_MTOOLS=${REQUIRE_MTOOLS:=no}
STABLE_MAC_ADDRS=${STABLE_MAC_ADDRS:=no}
TCPDUMP_EXTRA_FLAGS=${TCPDUMP_EXTRA_FLAGS:=}
+TROUTE6=${TROUTE6:=traceroute6}
relative_path="${BASH_SOURCE%/*}"
if [[ "$relative_path" == "${BASH_SOURCE}" ]]; then
@@ -163,6 +164,17 @@ check_port_mab_support()
fi
}
+skip_on_veth()
+{
+ local kind=$(ip -j -d link show dev ${NETIFS[p1]} |
+ jq -r '.[].linkinfo.info_kind')
+
+ if [[ $kind == veth ]]; then
+ echo "SKIP: Test cannot be run with veth pairs"
+ exit $ksft_skip
+ fi
+}
+
if [[ "$(id -u)" -ne 0 ]]; then
echo "SKIP: need root privileges"
exit $ksft_skip
@@ -225,6 +237,11 @@ create_netif_veth()
for ((i = 1; i <= NUM_NETIFS; ++i)); do
local j=$((i+1))
+ if [ -z ${NETIFS[p$i]} ]; then
+ echo "SKIP: Cannot create interface. Name not specified"
+ exit $ksft_skip
+ fi
+
ip link show dev ${NETIFS[p$i]} &> /dev/null
if [[ $? -ne 0 ]]; then
ip link add ${NETIFS[p$i]} type veth \
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh
index 360ca133bead..6c257ec03756 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh
@@ -98,6 +98,7 @@ switch_create()
# Bridge between H1 and H2.
ip link add name br1 type bridge vlan_filtering 1
+ ip link set dev br1 addrgenmode none
ip link set dev br1 up
ip link set dev $swp1 master br1
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh
index aec752a22e9e..04fd14b0a9b7 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh
@@ -65,7 +65,8 @@ setup_prepare()
vrf_prepare
mirror_gre_topo_create
- ip link add name br2 type bridge vlan_filtering 0
+ ip link add name br2 address $(mac_get $swp3) \
+ type bridge vlan_filtering 0
ip link set dev br2 up
ip link set dev $swp3 master br2
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh
index 1b27f2b0f196..f35313c76fac 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh
@@ -35,7 +35,8 @@ setup_prepare()
vrf_prepare
mirror_gre_topo_create
- ip link add name br2 type bridge vlan_filtering 0
+ ip link add name br2 address $(mac_get $swp3) \
+ type bridge vlan_filtering 0
ip link set dev br2 up
vlan_create $swp3 555
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh
index 91e431cd919e..c53148b1dc63 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh
@@ -140,7 +140,8 @@ switch_create()
ip link set dev $swp3 up
ip link set dev $swp4 up
- ip link add name br1 type bridge vlan_filtering 1
+ ip link add name br1 address $(mac_get $swp3) \
+ type bridge vlan_filtering 1
team_create lag loadbalance $swp3 $swp4
ip link set dev lag master br1
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh
index aff88f78e339..5ea9d63915f7 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh
@@ -72,7 +72,8 @@ test_span_gre_ttl()
RET=0
- mirror_install $swp1 ingress $tundev "matchall $tcflags"
+ mirror_install $swp1 ingress $tundev \
+ "prot ip flower $tcflags ip_prot icmp"
tc filter add dev $h3 ingress pref 77 prot $prot \
flower skip_hw ip_ttl 50 action pass
diff --git a/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh b/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh
index 04979e5962e7..bb1adbb7b98a 100644
--- a/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh
@@ -60,6 +60,7 @@ mirror_topo_switch_create()
ip link set dev $swp3 up
ip link add name br1 type bridge vlan_filtering 1
+ ip link set dev br1 addrgenmode none
ip link set dev br1 up
ip link set dev $swp1 master br1
diff --git a/tools/testing/selftests/net/forwarding/pedit_dsfield.sh b/tools/testing/selftests/net/forwarding/pedit_dsfield.sh
index 64fbd211d907..af008fbf2725 100755
--- a/tools/testing/selftests/net/forwarding/pedit_dsfield.sh
+++ b/tools/testing/selftests/net/forwarding/pedit_dsfield.sh
@@ -60,7 +60,9 @@ h2_destroy()
switch_create()
{
- ip link add name br1 up type bridge vlan_filtering 1
+ ip link add name br1 type bridge vlan_filtering 1
+ ip link set dev br1 addrgenmode none
+ ip link set dev br1 up
ip link set dev $swp1 master br1
ip link set dev $swp1 up
ip link set dev $swp2 master br1
diff --git a/tools/testing/selftests/net/forwarding/q_in_vni.sh b/tools/testing/selftests/net/forwarding/q_in_vni.sh
index 4c50c0234bce..798b13525c02 100755
--- a/tools/testing/selftests/net/forwarding/q_in_vni.sh
+++ b/tools/testing/selftests/net/forwarding/q_in_vni.sh
@@ -137,6 +137,7 @@ switch_create()
{
ip link add name br1 type bridge vlan_filtering 1 vlan_protocol 802.1ad \
vlan_default_pvid 0 mcast_snooping 0
+ ip link set dev br1 addrgenmode none
# Make sure the bridge uses the MAC address of the local port and not
# that of the VxLAN's device.
ip link set dev br1 address $(mac_get $swp1)
diff --git a/tools/testing/selftests/net/forwarding/router_bridge.sh b/tools/testing/selftests/net/forwarding/router_bridge.sh
index ebc596a272f7..8ce0aed54ece 100755
--- a/tools/testing/selftests/net/forwarding/router_bridge.sh
+++ b/tools/testing/selftests/net/forwarding/router_bridge.sh
@@ -38,7 +38,8 @@ h2_destroy()
router_create()
{
- ip link add name br1 type bridge vlan_filtering 1
+ ip link add name br1 address $(mac_get $swp1) \
+ type bridge vlan_filtering 1
ip link set dev br1 up
ip link set dev $swp1 master br1
diff --git a/tools/testing/selftests/net/forwarding/router_bridge_vlan.sh b/tools/testing/selftests/net/forwarding/router_bridge_vlan.sh
index fa6a88c50750..de2b2d5480dd 100755
--- a/tools/testing/selftests/net/forwarding/router_bridge_vlan.sh
+++ b/tools/testing/selftests/net/forwarding/router_bridge_vlan.sh
@@ -1,6 +1,28 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+# +------------------------+ +----------------------+
+# | H1 (vrf) | | H2 (vrf) |
+# | + $h1.555 | | + $h2 |
+# | | 192.0.2.1/28 | | | 192.0.2.130/28 |
+# | | 2001:db8:1::1/64 | | | 2001:db8:2::2/64 |
+# | | | | | |
+# | + $h1 | | | |
+# +----|-------------------+ +--|-------------------+
+# | |
+# +----|--------------------------------------------------|-------------------+
+# | SW | | |
+# | +--|-------------------------------+ + $swp2 |
+# | | + $swp1 | 192.0.2.129/28 |
+# | | vid 555 | 2001:db8:2::1/64 |
+# | | | |
+# | | + BR1 (802.1q) | |
+# | | vid 555 pvid untagged | |
+# | | 192.0.2.2/28 | |
+# | | 2001:db8:1::2/64 | |
+# | +----------------------------------+ |
+# +---------------------------------------------------------------------------+
+
ALL_TESTS="
ping_ipv4
ping_ipv6
@@ -41,7 +63,7 @@ h2_destroy()
router_create()
{
- ip link add name br1 type bridge vlan_filtering 1
+ ip link add name br1 type bridge vlan_filtering 1 vlan_default_pvid 0
ip link set dev br1 up
ip link set dev $swp1 master br1
diff --git a/tools/testing/selftests/net/forwarding/settings b/tools/testing/selftests/net/forwarding/settings
new file mode 100644
index 000000000000..e7b9417537fb
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/settings
@@ -0,0 +1 @@
+timeout=0
diff --git a/tools/testing/selftests/net/forwarding/skbedit_priority.sh b/tools/testing/selftests/net/forwarding/skbedit_priority.sh
index bde11dc27873..3dd5fcbd3eaa 100755
--- a/tools/testing/selftests/net/forwarding/skbedit_priority.sh
+++ b/tools/testing/selftests/net/forwarding/skbedit_priority.sh
@@ -54,7 +54,9 @@ h2_destroy()
switch_create()
{
- ip link add name br1 up type bridge vlan_filtering 1
+ ip link add name br1 type bridge vlan_filtering 1
+ ip link set dev br1 addrgenmode none
+ ip link set dev br1 up
ip link set dev $swp1 master br1
ip link set dev $swp1 up
ip link set dev $swp2 master br1
diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh
index a96cff8e7219..b0f5e55d2d0b 100755
--- a/tools/testing/selftests/net/forwarding/tc_actions.sh
+++ b/tools/testing/selftests/net/forwarding/tc_actions.sh
@@ -9,6 +9,8 @@ NUM_NETIFS=4
source tc_common.sh
source lib.sh
+require_command ncat
+
tcflags="skip_hw"
h1_create()
@@ -220,9 +222,9 @@ mirred_egress_to_ingress_tcp_test()
ip_proto icmp \
action drop
- ip vrf exec v$h1 nc --recv-only -w10 -l -p 12345 -o $mirred_e2i_tf2 &
+ ip vrf exec v$h1 ncat --recv-only -w10 -l -p 12345 -o $mirred_e2i_tf2 &
local rpid=$!
- ip vrf exec v$h1 nc -w1 --send-only 192.0.2.2 12345 <$mirred_e2i_tf1
+ ip vrf exec v$h1 ncat -w1 --send-only 192.0.2.2 12345 <$mirred_e2i_tf1
wait -n $rpid
cmp -s $mirred_e2i_tf1 $mirred_e2i_tf2
check_err $? "server output check failed"
diff --git a/tools/testing/selftests/net/forwarding/tc_flower.sh b/tools/testing/selftests/net/forwarding/tc_flower.sh
index 683711f41aa9..b1daad19b01e 100755
--- a/tools/testing/selftests/net/forwarding/tc_flower.sh
+++ b/tools/testing/selftests/net/forwarding/tc_flower.sh
@@ -52,8 +52,8 @@ match_dst_mac_test()
tc_check_packets "dev $h2 ingress" 101 1
check_fail $? "Matched on a wrong filter"
- tc_check_packets "dev $h2 ingress" 102 1
- check_err $? "Did not match on correct filter"
+ tc_check_packets "dev $h2 ingress" 102 0
+ check_fail $? "Did not match on correct filter"
tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
@@ -78,8 +78,8 @@ match_src_mac_test()
tc_check_packets "dev $h2 ingress" 101 1
check_fail $? "Matched on a wrong filter"
- tc_check_packets "dev $h2 ingress" 102 1
- check_err $? "Did not match on correct filter"
+ tc_check_packets "dev $h2 ingress" 102 0
+ check_fail $? "Did not match on correct filter"
tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
diff --git a/tools/testing/selftests/net/forwarding/tc_flower_cfm.sh b/tools/testing/selftests/net/forwarding/tc_flower_cfm.sh
new file mode 100755
index 000000000000..3ca20df952eb
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_flower_cfm.sh
@@ -0,0 +1,206 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="match_cfm_opcode match_cfm_level match_cfm_level_and_opcode"
+NUM_NETIFS=2
+source tc_common.sh
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1
+}
+
+h2_create()
+{
+ simple_if_init $h2
+ tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+ tc qdisc del dev $h2 clsact
+ simple_if_fini $h2
+}
+
+u8_to_hex()
+{
+ local u8=$1; shift
+
+ printf "%02x" $u8
+}
+
+generate_cfm_hdr()
+{
+ local mdl=$1; shift
+ local op=$1; shift
+ local flags=$1; shift
+ local tlv_offset=$1; shift
+
+ local cfm_hdr=$(:
+ )"$(u8_to_hex $((mdl << 5))):"$( : MD level and Version
+ )"$(u8_to_hex $op):"$( : OpCode
+ )"$(u8_to_hex $flags):"$( : Flags
+ )"$(u8_to_hex $tlv_offset)"$( : TLV offset
+ )
+
+ echo $cfm_hdr
+}
+
+match_cfm_opcode()
+{
+ local ethtype="89 02"; readonly ethtype
+ RET=0
+
+ tc filter add dev $h2 ingress protocol cfm pref 1 handle 101 \
+ flower cfm op 47 action drop
+ tc filter add dev $h2 ingress protocol cfm pref 1 handle 102 \
+ flower cfm op 43 action drop
+
+ pkt="$ethtype $(generate_cfm_hdr 7 47 0 32)"
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+ pkt="$ethtype $(generate_cfm_hdr 6 5 0 4)"
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_err $? "Did not match on correct opcode"
+
+ tc_check_packets "dev $h2 ingress" 102 0
+ check_err $? "Matched on the wrong opcode"
+
+ pkt="$ethtype $(generate_cfm_hdr 0 43 0 12)"
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_err $? "Matched on the wrong opcode"
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_err $? "Did not match on correct opcode"
+
+ tc filter del dev $h2 ingress protocol cfm pref 1 handle 101 flower
+ tc filter del dev $h2 ingress protocol cfm pref 1 handle 102 flower
+
+ log_test "CFM opcode match test"
+}
+
+match_cfm_level()
+{
+ local ethtype="89 02"; readonly ethtype
+ RET=0
+
+ tc filter add dev $h2 ingress protocol cfm pref 1 handle 101 \
+ flower cfm mdl 5 action drop
+ tc filter add dev $h2 ingress protocol cfm pref 1 handle 102 \
+ flower cfm mdl 3 action drop
+ tc filter add dev $h2 ingress protocol cfm pref 1 handle 103 \
+ flower cfm mdl 0 action drop
+
+ pkt="$ethtype $(generate_cfm_hdr 5 42 0 12)"
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+ pkt="$ethtype $(generate_cfm_hdr 6 1 0 70)"
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+ pkt="$ethtype $(generate_cfm_hdr 0 1 0 70)"
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_err $? "Did not match on correct level"
+
+ tc_check_packets "dev $h2 ingress" 102 0
+ check_err $? "Matched on the wrong level"
+
+ tc_check_packets "dev $h2 ingress" 103 1
+ check_err $? "Did not match on correct level"
+
+ pkt="$ethtype $(generate_cfm_hdr 3 0 0 4)"
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_err $? "Matched on the wrong level"
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_err $? "Did not match on correct level"
+
+ tc_check_packets "dev $h2 ingress" 103 1
+ check_err $? "Matched on the wrong level"
+
+ tc filter del dev $h2 ingress protocol cfm pref 1 handle 101 flower
+ tc filter del dev $h2 ingress protocol cfm pref 1 handle 102 flower
+ tc filter del dev $h2 ingress protocol cfm pref 1 handle 103 flower
+
+ log_test "CFM level match test"
+}
+
+match_cfm_level_and_opcode()
+{
+ local ethtype="89 02"; readonly ethtype
+ RET=0
+
+ tc filter add dev $h2 ingress protocol cfm pref 1 handle 101 \
+ flower cfm mdl 5 op 41 action drop
+ tc filter add dev $h2 ingress protocol cfm pref 1 handle 102 \
+ flower cfm mdl 7 op 42 action drop
+
+ pkt="$ethtype $(generate_cfm_hdr 5 41 0 4)"
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+ pkt="$ethtype $(generate_cfm_hdr 7 3 0 4)"
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+ pkt="$ethtype $(generate_cfm_hdr 3 42 0 12)"
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_err $? "Did not match on correct level and opcode"
+
+ tc_check_packets "dev $h2 ingress" 102 0
+ check_err $? "Matched on the wrong level and opcode"
+
+ pkt="$ethtype $(generate_cfm_hdr 7 42 0 12)"
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_err $? "Matched on the wrong level and opcode"
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_err $? "Did not match on correct level and opcode"
+
+ tc filter del dev $h2 ingress protocol cfm pref 1 handle 101 flower
+ tc filter del dev $h2 ingress protocol cfm pref 1 handle 102 flower
+
+ log_test "CFM opcode and level match test"
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ h2=${NETIFS[p2]}
+ h1mac=$(mac_get $h1)
+ h2mac=$(mac_get $h2)
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh b/tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh
new file mode 100755
index 000000000000..20a7cb7222b8
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh
@@ -0,0 +1,353 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +-----------------------+ +----------------------+
+# | H1 (vrf) | | H2 (vrf) |
+# | + $h1 | | $h2 + |
+# | | 192.0.2.1/28 | | 192.0.2.2/28 | |
+# | | 2001:db8:1::1/64 | | 2001:db8:1::2/64 | |
+# +----|------------------+ +------------------|---+
+# | |
+# +----|-------------------------------------------------------------------|---+
+# | SW | | |
+# | +-|-------------------------------------------------------------------|-+ |
+# | | + $swp1 BR $swp2 + | |
+# | +-----------------------------------------------------------------------+ |
+# +----------------------------------------------------------------------------+
+
+ALL_TESTS="
+ test_l2_miss_unicast
+ test_l2_miss_multicast
+ test_l2_miss_ll_multicast
+ test_l2_miss_broadcast
+"
+
+NUM_NETIFS=4
+source lib.sh
+source tc_common.sh
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/28 2001:db8:1::2/64
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2 192.0.2.2/28 2001:db8:1::2/64
+}
+
+switch_create()
+{
+ ip link add name br1 up type bridge
+ ip link set dev $swp1 master br1
+ ip link set dev $swp1 up
+ ip link set dev $swp2 master br1
+ ip link set dev $swp2 up
+
+ tc qdisc add dev $swp2 clsact
+}
+
+switch_destroy()
+{
+ tc qdisc del dev $swp2 clsact
+
+ ip link set dev $swp2 down
+ ip link set dev $swp2 nomaster
+ ip link set dev $swp1 down
+ ip link set dev $swp1 nomaster
+ ip link del dev br1
+}
+
+test_l2_miss_unicast()
+{
+ local dmac=00:01:02:03:04:05
+ local dip=192.0.2.2
+ local sip=192.0.2.1
+
+ RET=0
+
+ # Unknown unicast.
+ tc filter add dev $swp2 egress protocol ipv4 handle 101 pref 1 \
+ flower indev $swp1 l2_miss 1 dst_mac $dmac src_ip $sip \
+ dst_ip $dip action pass
+ # Known unicast.
+ tc filter add dev $swp2 egress protocol ipv4 handle 102 pref 1 \
+ flower indev $swp1 l2_miss 0 dst_mac $dmac src_ip $sip \
+ dst_ip $dip action pass
+
+ # Before adding FDB entry.
+ $MZ $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q
+
+ tc_check_packets "dev $swp2 egress" 101 1
+ check_err $? "Unknown unicast filter was not hit before adding FDB entry"
+
+ tc_check_packets "dev $swp2 egress" 102 0
+ check_err $? "Known unicast filter was hit before adding FDB entry"
+
+ # Adding FDB entry.
+ bridge fdb replace $dmac dev $swp2 master static
+
+ $MZ $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q
+
+ tc_check_packets "dev $swp2 egress" 101 1
+ check_err $? "Unknown unicast filter was hit after adding FDB entry"
+
+ tc_check_packets "dev $swp2 egress" 102 1
+ check_err $? "Known unicast filter was not hit after adding FDB entry"
+
+ # Deleting FDB entry.
+ bridge fdb del $dmac dev $swp2 master static
+
+ $MZ $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q
+
+ tc_check_packets "dev $swp2 egress" 101 2
+ check_err $? "Unknown unicast filter was not hit after deleting FDB entry"
+
+ tc_check_packets "dev $swp2 egress" 102 1
+ check_err $? "Known unicast filter was hit after deleting FDB entry"
+
+ tc filter del dev $swp2 egress protocol ipv4 pref 1 handle 102 flower
+ tc filter del dev $swp2 egress protocol ipv4 pref 1 handle 101 flower
+
+ log_test "L2 miss - Unicast"
+}
+
+test_l2_miss_multicast_common()
+{
+ local proto=$1; shift
+ local sip=$1; shift
+ local dip=$1; shift
+ local dmac=$1; shift
+ local mode=$1; shift
+ local name=$1; shift
+
+ RET=0
+
+ # Unregistered multicast.
+ tc filter add dev $swp2 egress protocol $proto handle 101 pref 1 \
+ flower indev $swp1 l2_miss 1 src_ip $sip dst_ip $dip \
+ action pass
+ # Registered multicast.
+ tc filter add dev $swp2 egress protocol $proto handle 102 pref 1 \
+ flower indev $swp1 l2_miss 0 src_ip $sip dst_ip $dip \
+ action pass
+
+ # Before adding MDB entry.
+ $MZ $mode $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q
+
+ tc_check_packets "dev $swp2 egress" 101 1
+ check_err $? "Unregistered multicast filter was not hit before adding MDB entry"
+
+ tc_check_packets "dev $swp2 egress" 102 0
+ check_err $? "Registered multicast filter was hit before adding MDB entry"
+
+ # Adding MDB entry.
+ bridge mdb replace dev br1 port $swp2 grp $dip permanent
+
+ $MZ $mode $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q
+
+ tc_check_packets "dev $swp2 egress" 101 1
+ check_err $? "Unregistered multicast filter was hit after adding MDB entry"
+
+ tc_check_packets "dev $swp2 egress" 102 1
+ check_err $? "Registered multicast filter was not hit after adding MDB entry"
+
+ # Deleting MDB entry.
+ bridge mdb del dev br1 port $swp2 grp $dip
+
+ $MZ $mode $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q
+
+ tc_check_packets "dev $swp2 egress" 101 2
+ check_err $? "Unregistered multicast filter was not hit after deleting MDB entry"
+
+ tc_check_packets "dev $swp2 egress" 102 1
+ check_err $? "Registered multicast filter was hit after deleting MDB entry"
+
+ tc filter del dev $swp2 egress protocol $proto pref 1 handle 102 flower
+ tc filter del dev $swp2 egress protocol $proto pref 1 handle 101 flower
+
+ log_test "L2 miss - Multicast ($name)"
+}
+
+test_l2_miss_multicast_ipv4()
+{
+ local proto="ipv4"
+ local sip=192.0.2.1
+ local dip=239.1.1.1
+ local dmac=01:00:5e:01:01:01
+ local mode="-4"
+ local name="IPv4"
+
+ test_l2_miss_multicast_common $proto $sip $dip $dmac $mode $name
+}
+
+test_l2_miss_multicast_ipv6()
+{
+ local proto="ipv6"
+ local sip=2001:db8:1::1
+ local dip=ff0e::1
+ local dmac=33:33:00:00:00:01
+ local mode="-6"
+ local name="IPv6"
+
+ test_l2_miss_multicast_common $proto $sip $dip $dmac $mode $name
+}
+
+test_l2_miss_multicast()
+{
+ # Configure $swp2 as a multicast router port so that it will forward
+ # both registered and unregistered multicast traffic.
+ bridge link set dev $swp2 mcast_router 2
+
+ # Forwarding according to MDB entries only takes place when the bridge
+ # detects that there is a valid querier in the network. Set the bridge
+ # as the querier and assign it a valid IPv6 link-local address to be
+ # used as the source address for MLD queries.
+ ip link set dev br1 type bridge mcast_querier 1
+ ip -6 address add fe80::1/64 nodad dev br1
+ # Wait the default Query Response Interval (10 seconds) for the bridge
+ # to determine that there are no other queriers in the network.
+ sleep 10
+
+ test_l2_miss_multicast_ipv4
+ test_l2_miss_multicast_ipv6
+
+ ip -6 address del fe80::1/64 dev br1
+ ip link set dev br1 type bridge mcast_querier 0
+ bridge link set dev $swp2 mcast_router 1
+}
+
+test_l2_miss_multicast_common2()
+{
+ local name=$1; shift
+ local dmac=$1; shift
+ local dip=224.0.0.1
+ local sip=192.0.2.1
+
+}
+
+test_l2_miss_ll_multicast_common()
+{
+ local proto=$1; shift
+ local dmac=$1; shift
+ local sip=$1; shift
+ local dip=$1; shift
+ local mode=$1; shift
+ local name=$1; shift
+
+ RET=0
+
+ tc filter add dev $swp2 egress protocol $proto handle 101 pref 1 \
+ flower indev $swp1 l2_miss 1 dst_mac $dmac src_ip $sip \
+ dst_ip $dip action pass
+
+ $MZ $mode $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q
+
+ tc_check_packets "dev $swp2 egress" 101 1
+ check_err $? "Filter was not hit"
+
+ tc filter del dev $swp2 egress protocol $proto pref 1 handle 101 flower
+
+ log_test "L2 miss - Link-local multicast ($name)"
+}
+
+test_l2_miss_ll_multicast_ipv4()
+{
+ local proto=ipv4
+ local dmac=01:00:5e:00:00:01
+ local sip=192.0.2.1
+ local dip=224.0.0.1
+ local mode="-4"
+ local name="IPv4"
+
+ test_l2_miss_ll_multicast_common $proto $dmac $sip $dip $mode $name
+}
+
+test_l2_miss_ll_multicast_ipv6()
+{
+ local proto=ipv6
+ local dmac=33:33:00:00:00:01
+ local sip=2001:db8:1::1
+ local dip=ff02::1
+ local mode="-6"
+ local name="IPv6"
+
+ test_l2_miss_ll_multicast_common $proto $dmac $sip $dip $mode $name
+}
+
+test_l2_miss_ll_multicast()
+{
+ test_l2_miss_ll_multicast_ipv4
+ test_l2_miss_ll_multicast_ipv6
+}
+
+test_l2_miss_broadcast()
+{
+ local dmac=ff:ff:ff:ff:ff:ff
+ local smac=00:01:02:03:04:05
+
+ RET=0
+
+ tc filter add dev $swp2 egress protocol all handle 101 pref 1 \
+ flower l2_miss 1 dst_mac $dmac src_mac $smac \
+ action pass
+ tc filter add dev $swp2 egress protocol all handle 102 pref 1 \
+ flower l2_miss 0 dst_mac $dmac src_mac $smac \
+ action pass
+
+ $MZ $h1 -a $smac -b $dmac -c 1 -p 100 -q
+
+ tc_check_packets "dev $swp2 egress" 101 0
+ check_err $? "L2 miss filter was hit when should not"
+
+ tc_check_packets "dev $swp2 egress" 102 1
+ check_err $? "L2 no miss filter was not hit when should"
+
+ tc filter del dev $swp2 egress protocol all pref 1 handle 102 flower
+ tc filter del dev $swp2 egress protocol all pref 1 handle 101 flower
+
+ log_test "L2 miss - Broadcast"
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ vrf_prepare
+ h1_create
+ h2_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+ vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_tunnel_key.sh b/tools/testing/selftests/net/forwarding/tc_tunnel_key.sh
index 5ac184d51809..5a5dd9034819 100755
--- a/tools/testing/selftests/net/forwarding/tc_tunnel_key.sh
+++ b/tools/testing/selftests/net/forwarding/tc_tunnel_key.sh
@@ -104,11 +104,14 @@ tunnel_key_nofrag_test()
local i
tc filter add dev $swp1 ingress protocol ip pref 100 handle 100 \
- flower ip_flags nofrag action drop
+ flower src_ip 192.0.2.1 dst_ip 192.0.2.2 ip_proto udp \
+ ip_flags nofrag action drop
tc filter add dev $swp1 ingress protocol ip pref 101 handle 101 \
- flower ip_flags firstfrag action drop
+ flower src_ip 192.0.2.1 dst_ip 192.0.2.2 ip_proto udp \
+ ip_flags firstfrag action drop
tc filter add dev $swp1 ingress protocol ip pref 102 handle 102 \
- flower ip_flags nofirstfrag action drop
+ flower src_ip 192.0.2.1 dst_ip 192.0.2.2 ip_proto udp \
+ ip_flags nofirstfrag action drop
# test 'nofrag' set
tc filter add dev h1-et egress protocol all pref 1 handle 1 matchall $tcflags \
diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config
index 6032f9b23c4c..e317c2e44dae 100644
--- a/tools/testing/selftests/net/mptcp/config
+++ b/tools/testing/selftests/net/mptcp/config
@@ -6,6 +6,7 @@ CONFIG_INET_DIAG=m
CONFIG_INET_MPTCP_DIAG=m
CONFIG_VETH=y
CONFIG_NET_SCH_NETEM=m
+CONFIG_SYN_COOKIES=y
CONFIG_NETFILTER=y
CONFIG_NETFILTER_ADVANCED=y
CONFIG_NETFILTER_NETLINK=m
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index 773dd770a567..bbae40882bfa 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -718,6 +718,7 @@ table inet mangle {
EOF
if [ $? -ne 0 ]; then
echo "SKIP: $msg, could not load nft ruleset"
+ mptcp_lib_fail_if_expected_feature "nft rules"
return
fi
@@ -733,6 +734,7 @@ EOF
if [ $? -ne 0 ]; then
ip netns exec "$listener_ns" nft flush ruleset
echo "SKIP: $msg, ip $r6flag rule failed"
+ mptcp_lib_fail_if_expected_feature "ip rule"
return
fi
@@ -741,6 +743,7 @@ EOF
ip netns exec "$listener_ns" nft flush ruleset
ip -net "$listener_ns" $r6flag rule del fwmark 1 lookup 100
echo "SKIP: $msg, ip route add local $local_addr failed"
+ mptcp_lib_fail_if_expected_feature "ip route"
return
fi
@@ -809,7 +812,7 @@ run_tests_disconnect()
cat $cin $cin $cin > "$cin".disconnect
- # force do_transfer to cope with the multiple tranmissions
+ # force do_transfer to cope with the multiple transmissions
sin="$cin.disconnect"
cin="$cin.disconnect"
cin_disconnect="$old_cin"
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 0ae8cafde439..d01b73a8ed0f 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -39,6 +39,7 @@ evts_ns1=""
evts_ns2=""
evts_ns1_pid=0
evts_ns2_pid=0
+stats_dumped=0
declare -A all_tests
declare -a only_tests_ids
@@ -49,6 +50,10 @@ TEST_NAME=""
nr_blank=40
export FAILING_LINKS=""
+export test_linkfail=0
+export addr_nr_ns1=0
+export addr_nr_ns2=0
+export sflags=""
# generated using "nfbpf_compile '(ip && (ip[54] & 0xf0) == 0x30) ||
# (ip6 && (ip6[74] & 0xf0) == 0x30)'"
@@ -92,6 +97,7 @@ init_partial()
fi
done
+ stats_dumped=0
check_invert=0
validate_checksum=$checksum
FAILING_LINKS=""
@@ -156,9 +162,7 @@ check_tools()
elif ! iptables -V &> /dev/null; then
echo "SKIP: Could not run all tests without iptables tool"
exit $ksft_skip
- fi
-
- if ! ip6tables -V &> /dev/null; then
+ elif ! ip6tables -V &> /dev/null; then
echo "SKIP: Could not run all tests without ip6tables tool"
exit $ksft_skip
fi
@@ -434,6 +438,9 @@ fail_test()
{
ret=1
failed_tests[${TEST_COUNT}]="${TEST_NAME}"
+
+ [ "${stats_dumped}" = 0 ] && dump_stats
+ stats_dumped=1
}
get_failed_tests_ids()
@@ -584,6 +591,26 @@ wait_rm_addr()
done
}
+rm_sf_count()
+{
+ get_counter "${1}" "MPTcpExtRmSubflow"
+}
+
+# $1: ns, $2: old rm_sf counter in $ns
+wait_rm_sf()
+{
+ local ns="${1}"
+ local old_cnt="${2}"
+ local cnt
+
+ local i
+ for i in $(seq 10); do
+ cnt=$(rm_sf_count ${ns})
+ [ "$cnt" = "${old_cnt}" ] || break
+ sleep 0.1
+ done
+}
+
wait_mpj()
{
local ns="${1}"
@@ -678,6 +705,7 @@ pm_nl_del_endpoint()
local addr=$3
if [ $ip_mptcp -eq 1 ]; then
+ [ $id -ne 0 ] && addr=''
ip -n $ns mptcp endpoint delete id $id $addr
else
ip netns exec $ns ./pm_nl_ctl del $id $addr
@@ -768,10 +796,11 @@ pm_nl_check_endpoint()
fi
if [ $ip_mptcp -eq 1 ]; then
+ # get line and trim trailing whitespace
line=$(ip -n $ns mptcp endpoint show $id)
+ line="${line% }"
# the dump order is: address id flags port dev
- expected_line="$addr"
- [ -n "$addr" ] && expected_line="$expected_line $addr"
+ [ -n "$addr" ] && expected_line="$addr"
expected_line="$expected_line $id"
[ -n "$_flags" ] && expected_line="$expected_line ${_flags//","/" "}"
[ -n "$dev" ] && expected_line="$expected_line $dev"
@@ -793,135 +822,11 @@ pm_nl_check_endpoint()
fi
}
-do_transfer()
+pm_nl_set_endpoint()
{
local listener_ns="$1"
local connector_ns="$2"
- local cl_proto="$3"
- local srv_proto="$4"
- local connect_addr="$5"
- local test_link_fail="$6"
- local addr_nr_ns1="$7"
- local addr_nr_ns2="$8"
- local speed="$9"
- local sflags="${10}"
-
- local port=$((10000 + TEST_COUNT - 1))
- local cappid
- local userspace_pm=0
-
- :> "$cout"
- :> "$sout"
- :> "$capout"
-
- if [ $capture -eq 1 ]; then
- local capuser
- if [ -z $SUDO_USER ] ; then
- capuser=""
- else
- capuser="-Z $SUDO_USER"
- fi
-
- capfile=$(printf "mp_join-%02u-%s.pcap" "$TEST_COUNT" "${listener_ns}")
-
- echo "Capturing traffic for test $TEST_COUNT into $capfile"
- ip netns exec ${listener_ns} tcpdump -i any -s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 &
- cappid=$!
-
- sleep 1
- fi
-
- NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \
- nstat -n
- NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \
- nstat -n
-
- local extra_args
- if [ $speed = "fast" ]; then
- extra_args="-j"
- elif [ $speed = "slow" ]; then
- extra_args="-r 50"
- elif [[ $speed = "speed_"* ]]; then
- extra_args="-r ${speed:6}"
- fi
-
- if [[ "${addr_nr_ns1}" = "userspace_"* ]]; then
- userspace_pm=1
- addr_nr_ns1=${addr_nr_ns1:10}
- fi
-
- local flags="subflow"
- local extra_cl_args=""
- local extra_srv_args=""
- local trunc_size=""
- if [[ "${addr_nr_ns2}" = "fastclose_"* ]]; then
- if [ ${test_link_fail} -le 1 ]; then
- echo "fastclose tests need test_link_fail argument"
- fail_test
- return 1
- fi
-
- # disconnect
- trunc_size=${test_link_fail}
- local side=${addr_nr_ns2:10}
-
- if [ ${side} = "client" ]; then
- extra_cl_args="-f ${test_link_fail}"
- extra_srv_args="-f -1"
- elif [ ${side} = "server" ]; then
- extra_srv_args="-f ${test_link_fail}"
- extra_cl_args="-f -1"
- else
- echo "wrong/unknown fastclose spec ${side}"
- fail_test
- return 1
- fi
- addr_nr_ns2=0
- elif [[ "${addr_nr_ns2}" = "userspace_"* ]]; then
- userspace_pm=1
- addr_nr_ns2=${addr_nr_ns2:10}
- elif [[ "${addr_nr_ns2}" = "fullmesh_"* ]]; then
- flags="${flags},fullmesh"
- addr_nr_ns2=${addr_nr_ns2:9}
- fi
-
- extra_srv_args="$extra_args $extra_srv_args"
- if [ "$test_link_fail" -gt 1 ];then
- timeout ${timeout_test} \
- ip netns exec ${listener_ns} \
- ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
- $extra_srv_args "::" < "$sinfail" > "$sout" &
- else
- timeout ${timeout_test} \
- ip netns exec ${listener_ns} \
- ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
- $extra_srv_args "::" < "$sin" > "$sout" &
- fi
- local spid=$!
-
- wait_local_port_listen "${listener_ns}" "${port}"
-
- extra_cl_args="$extra_args $extra_cl_args"
- if [ "$test_link_fail" -eq 0 ];then
- timeout ${timeout_test} \
- ip netns exec ${connector_ns} \
- ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
- $extra_cl_args $connect_addr < "$cin" > "$cout" &
- elif [ "$test_link_fail" -eq 1 ] || [ "$test_link_fail" -eq 2 ];then
- ( cat "$cinfail" ; sleep 2; link_failure $listener_ns ; cat "$cinfail" ) | \
- tee "$cinsent" | \
- timeout ${timeout_test} \
- ip netns exec ${connector_ns} \
- ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
- $extra_cl_args $connect_addr > "$cout" &
- else
- tee "$cinsent" < "$cinfail" | \
- timeout ${timeout_test} \
- ip netns exec ${connector_ns} \
- ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
- $extra_cl_args $connect_addr > "$cout" &
- fi
- local cpid=$!
+ local connect_addr="$3"
# let the mptcp subflow be established in background before
# do endpoint manipulation
@@ -933,7 +838,6 @@ do_transfer()
local counter=2
local add_nr_ns1=${addr_nr_ns1}
local id=10
- local tk
while [ $add_nr_ns1 -gt 0 ]; do
local addr
if is_v6 "${connect_addr}"; then
@@ -941,24 +845,7 @@ do_transfer()
else
addr="10.0.$counter.1"
fi
- if [ $userspace_pm -eq 0 ]; then
- pm_nl_add_endpoint $ns1 $addr flags signal
- else
- tk=$(grep "type:1," "$evts_ns1" |
- sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q')
- ip netns exec ${listener_ns} ./pm_nl_ctl ann $addr token $tk id $id
- sleep 1
- sp=$(grep "type:10" "$evts_ns1" |
- sed -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q')
- da=$(grep "type:10" "$evts_ns1" |
- sed -n 's/.*\(daddr6:\)\([0-9a-f:.]*\).*$/\2/p;q')
- dp=$(grep "type:10" "$evts_ns1" |
- sed -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q')
- ip netns exec ${listener_ns} ./pm_nl_ctl rem token $tk id $id
- ip netns exec ${listener_ns} ./pm_nl_ctl dsf lip "::ffff:$addr" \
- lport $sp rip $da rport $dp token $tk
- fi
-
+ pm_nl_add_endpoint $ns1 $addr flags signal
counter=$((counter + 1))
add_nr_ns1=$((add_nr_ns1 - 1))
id=$((id + 1))
@@ -1003,7 +890,6 @@ do_transfer()
local add_nr_ns2=${addr_nr_ns2}
local counter=3
local id=20
- local tk da dp sp
while [ $add_nr_ns2 -gt 0 ]; do
local addr
if is_v6 "${connect_addr}"; then
@@ -1011,21 +897,7 @@ do_transfer()
else
addr="10.0.$counter.2"
fi
- if [ $userspace_pm -eq 0 ]; then
- pm_nl_add_endpoint $ns2 $addr flags $flags
- else
- tk=$(sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evts_ns2")
- da=$(sed -n 's/.*\(daddr4:\)\([0-9.]*\).*$/\2/p;q' "$evts_ns2")
- dp=$(sed -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts_ns2")
- ip netns exec ${connector_ns} ./pm_nl_ctl csf lip $addr lid $id \
- rip $da rport $dp token $tk
- sleep 1
- sp=$(grep "type:10" "$evts_ns2" |
- sed -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q')
- ip netns exec ${connector_ns} ./pm_nl_ctl rem token $tk id $id
- ip netns exec ${connector_ns} ./pm_nl_ctl dsf lip $addr lport $sp \
- rip $da rport $dp token $tk
- fi
+ pm_nl_add_endpoint $ns2 $addr flags $flags
counter=$((counter + 1))
add_nr_ns2=$((add_nr_ns2 - 1))
id=$((id + 1))
@@ -1094,6 +966,126 @@ do_transfer()
done
done
fi
+}
+
+do_transfer()
+{
+ local listener_ns="$1"
+ local connector_ns="$2"
+ local cl_proto="$3"
+ local srv_proto="$4"
+ local connect_addr="$5"
+ local speed="$6"
+
+ local port=$((10000 + TEST_COUNT - 1))
+ local cappid
+
+ :> "$cout"
+ :> "$sout"
+ :> "$capout"
+
+ if [ $capture -eq 1 ]; then
+ local capuser
+ if [ -z $SUDO_USER ] ; then
+ capuser=""
+ else
+ capuser="-Z $SUDO_USER"
+ fi
+
+ capfile=$(printf "mp_join-%02u-%s.pcap" "$TEST_COUNT" "${listener_ns}")
+
+ echo "Capturing traffic for test $TEST_COUNT into $capfile"
+ ip netns exec ${listener_ns} tcpdump -i any -s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 &
+ cappid=$!
+
+ sleep 1
+ fi
+
+ NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \
+ nstat -n
+ NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \
+ nstat -n
+
+ local extra_args
+ if [ $speed = "fast" ]; then
+ extra_args="-j"
+ elif [ $speed = "slow" ]; then
+ extra_args="-r 50"
+ elif [[ $speed = "speed_"* ]]; then
+ extra_args="-r ${speed:6}"
+ fi
+
+ local flags="subflow"
+ local extra_cl_args=""
+ local extra_srv_args=""
+ local trunc_size=""
+ if [[ "${addr_nr_ns2}" = "fastclose_"* ]]; then
+ if [ ${test_linkfail} -le 1 ]; then
+ echo "fastclose tests need test_linkfail argument"
+ fail_test
+ return 1
+ fi
+
+ # disconnect
+ trunc_size=${test_linkfail}
+ local side=${addr_nr_ns2:10}
+
+ if [ ${side} = "client" ]; then
+ extra_cl_args="-f ${test_linkfail}"
+ extra_srv_args="-f -1"
+ elif [ ${side} = "server" ]; then
+ extra_srv_args="-f ${test_linkfail}"
+ extra_cl_args="-f -1"
+ else
+ echo "wrong/unknown fastclose spec ${side}"
+ fail_test
+ return 1
+ fi
+ addr_nr_ns2=0
+ elif [[ "${addr_nr_ns2}" = "fullmesh_"* ]]; then
+ flags="${flags},fullmesh"
+ addr_nr_ns2=${addr_nr_ns2:9}
+ fi
+
+ extra_srv_args="$extra_args $extra_srv_args"
+ if [ "$test_linkfail" -gt 1 ];then
+ timeout ${timeout_test} \
+ ip netns exec ${listener_ns} \
+ ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
+ $extra_srv_args "::" < "$sinfail" > "$sout" &
+ else
+ timeout ${timeout_test} \
+ ip netns exec ${listener_ns} \
+ ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
+ $extra_srv_args "::" < "$sin" > "$sout" &
+ fi
+ local spid=$!
+
+ wait_local_port_listen "${listener_ns}" "${port}"
+
+ extra_cl_args="$extra_args $extra_cl_args"
+ if [ "$test_linkfail" -eq 0 ];then
+ timeout ${timeout_test} \
+ ip netns exec ${connector_ns} \
+ ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
+ $extra_cl_args $connect_addr < "$cin" > "$cout" &
+ elif [ "$test_linkfail" -eq 1 ] || [ "$test_linkfail" -eq 2 ];then
+ ( cat "$cinfail" ; sleep 2; link_failure $listener_ns ; cat "$cinfail" ) | \
+ tee "$cinsent" | \
+ timeout ${timeout_test} \
+ ip netns exec ${connector_ns} \
+ ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
+ $extra_cl_args $connect_addr > "$cout" &
+ else
+ tee "$cinsent" < "$cinfail" | \
+ timeout ${timeout_test} \
+ ip netns exec ${connector_ns} \
+ ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
+ $extra_cl_args $connect_addr > "$cout" &
+ fi
+ local cpid=$!
+
+ pm_nl_set_endpoint $listener_ns $connector_ns $connect_addr
wait $cpid
local retc=$?
@@ -1124,13 +1116,13 @@ do_transfer()
return 1
fi
- if [ "$test_link_fail" -gt 1 ];then
+ if [ "$test_linkfail" -gt 1 ];then
check_transfer $sinfail $cout "file received by client" $trunc_size
else
check_transfer $sin $cout "file received by client" $trunc_size
fi
retc=$?
- if [ "$test_link_fail" -eq 0 ];then
+ if [ "$test_linkfail" -eq 0 ];then
check_transfer $cin $sout "file received by server" $trunc_size
else
check_transfer $cinsent $sout "file received by server" $trunc_size
@@ -1163,11 +1155,7 @@ run_tests()
local listener_ns="$1"
local connector_ns="$2"
local connect_addr="$3"
- local test_linkfail="${4:-0}"
- local addr_nr_ns1="${5:-0}"
- local addr_nr_ns2="${6:-0}"
- local speed="${7:-fast}"
- local sflags="${8:-""}"
+ local speed="${4:-fast}"
local size
@@ -1211,8 +1199,7 @@ run_tests()
make_file "$sinfail" "server" $size
fi
- do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} \
- ${test_linkfail} ${addr_nr_ns1} ${addr_nr_ns2} ${speed} ${sflags}
+ do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} ${speed}
}
dump_stats()
@@ -1228,7 +1215,6 @@ chk_csum_nr()
local csum_ns1=${1:-0}
local csum_ns2=${2:-0}
local count
- local dump_stats
local extra_msg=""
local allow_multi_errors_ns1=0
local allow_multi_errors_ns2=0
@@ -1253,7 +1239,6 @@ chk_csum_nr()
{ [ "$count" -lt $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 1 ]; }; then
echo "[fail] got $count data checksum error[s] expected $csum_ns1"
fail_test
- dump_stats=1
else
echo -n "[ ok ]"
fi
@@ -1268,11 +1253,9 @@ chk_csum_nr()
{ [ "$count" -lt $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 1 ]; }; then
echo "[fail] got $count data checksum error[s] expected $csum_ns2"
fail_test
- dump_stats=1
else
echo -n "[ ok ]"
fi
- [ "${dump_stats}" = 1 ] && dump_stats
echo "$extra_msg"
}
@@ -1283,7 +1266,6 @@ chk_fail_nr()
local fail_rx=$2
local ns_invert=${3:-""}
local count
- local dump_stats
local ns_tx=$ns1
local ns_rx=$ns2
local extra_msg=""
@@ -1316,7 +1298,6 @@ chk_fail_nr()
{ [ "$count" -gt "$fail_tx" ] && [ $allow_tx_lost -eq 1 ]; }; then
echo "[fail] got $count MP_FAIL[s] TX expected $fail_tx"
fail_test
- dump_stats=1
else
echo -n "[ ok ]"
fi
@@ -1332,13 +1313,10 @@ chk_fail_nr()
{ [ "$count" -gt "$fail_rx" ] && [ $allow_rx_lost -eq 1 ]; }; then
echo "[fail] got $count MP_FAIL[s] RX expected $fail_rx"
fail_test
- dump_stats=1
else
echo -n "[ ok ]"
fi
- [ "${dump_stats}" = 1 ] && dump_stats
-
echo "$extra_msg"
}
@@ -1348,7 +1326,6 @@ chk_fclose_nr()
local fclose_rx=$2
local ns_invert=$3
local count
- local dump_stats
local ns_tx=$ns2
local ns_rx=$ns1
local extra_msg=" "
@@ -1367,7 +1344,6 @@ chk_fclose_nr()
extra_msg="$extra_msg,tx=$count"
echo "[fail] got $count MP_FASTCLOSE[s] TX expected $fclose_tx"
fail_test
- dump_stats=1
else
echo -n "[ ok ]"
fi
@@ -1380,13 +1356,10 @@ chk_fclose_nr()
extra_msg="$extra_msg,rx=$count"
echo "[fail] got $count MP_FASTCLOSE[s] RX expected $fclose_rx"
fail_test
- dump_stats=1
else
echo -n "[ ok ]"
fi
- [ "${dump_stats}" = 1 ] && dump_stats
-
echo "$extra_msg"
}
@@ -1396,7 +1369,6 @@ chk_rst_nr()
local rst_rx=$2
local ns_invert=${3:-""}
local count
- local dump_stats
local ns_tx=$ns1
local ns_rx=$ns2
local extra_msg=""
@@ -1414,7 +1386,6 @@ chk_rst_nr()
elif [ $count -lt $rst_tx ]; then
echo "[fail] got $count MP_RST[s] TX expected $rst_tx"
fail_test
- dump_stats=1
else
echo -n "[ ok ]"
fi
@@ -1426,13 +1397,10 @@ chk_rst_nr()
elif [ "$count" -lt "$rst_rx" ]; then
echo "[fail] got $count MP_RST[s] RX expected $rst_rx"
fail_test
- dump_stats=1
else
echo -n "[ ok ]"
fi
- [ "${dump_stats}" = 1 ] && dump_stats
-
echo "$extra_msg"
}
@@ -1441,7 +1409,6 @@ chk_infi_nr()
local infi_tx=$1
local infi_rx=$2
local count
- local dump_stats
printf "%-${nr_blank}s %s" " " "itx"
count=$(get_counter ${ns2} "MPTcpExtInfiniteMapTx")
@@ -1450,7 +1417,6 @@ chk_infi_nr()
elif [ "$count" != "$infi_tx" ]; then
echo "[fail] got $count infinite map[s] TX expected $infi_tx"
fail_test
- dump_stats=1
else
echo -n "[ ok ]"
fi
@@ -1462,12 +1428,9 @@ chk_infi_nr()
elif [ "$count" != "$infi_rx" ]; then
echo "[fail] got $count infinite map[s] RX expected $infi_rx"
fail_test
- dump_stats=1
else
echo "[ ok ]"
fi
-
- [ "${dump_stats}" = 1 ] && dump_stats
}
chk_join_nr()
@@ -1482,7 +1445,6 @@ chk_join_nr()
local infi_nr=${8:-0}
local corrupted_pkts=${9:-0}
local count
- local dump_stats
local with_cookie
local title="${TEST_NAME}"
@@ -1497,7 +1459,6 @@ chk_join_nr()
elif [ "$count" != "$syn_nr" ]; then
echo "[fail] got $count JOIN[s] syn expected $syn_nr"
fail_test
- dump_stats=1
else
echo -n "[ ok ]"
fi
@@ -1516,7 +1477,6 @@ chk_join_nr()
else
echo "[fail] got $count JOIN[s] synack expected $syn_ack_nr"
fail_test
- dump_stats=1
fi
else
echo -n "[ ok ]"
@@ -1529,11 +1489,9 @@ chk_join_nr()
elif [ "$count" != "$ack_nr" ]; then
echo "[fail] got $count JOIN[s] ack expected $ack_nr"
fail_test
- dump_stats=1
else
echo "[ ok ]"
fi
- [ "${dump_stats}" = 1 ] && dump_stats
if [ $validate_checksum -eq 1 ]; then
chk_csum_nr $csum_ns1 $csum_ns2
chk_fail_nr $fail_nr $fail_nr
@@ -1593,7 +1551,6 @@ chk_add_nr()
local mis_syn_nr=${7:-0}
local mis_ack_nr=${8:-0}
local count
- local dump_stats
local timeout
timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout)
@@ -1607,7 +1564,6 @@ chk_add_nr()
elif [ "$count" != "$add_nr" ] && { [ "$timeout" -gt 1 ] || [ "$count" -lt "$add_nr" ]; }; then
echo "[fail] got $count ADD_ADDR[s] expected $add_nr"
fail_test
- dump_stats=1
else
echo -n "[ ok ]"
fi
@@ -1619,7 +1575,6 @@ chk_add_nr()
elif [ "$count" != "$echo_nr" ]; then
echo "[fail] got $count ADD_ADDR echo[s] expected $echo_nr"
fail_test
- dump_stats=1
else
echo -n "[ ok ]"
fi
@@ -1632,7 +1587,6 @@ chk_add_nr()
elif [ "$count" != "$port_nr" ]; then
echo "[fail] got $count ADD_ADDR[s] with a port-number expected $port_nr"
fail_test
- dump_stats=1
else
echo "[ ok ]"
fi
@@ -1645,7 +1599,6 @@ chk_add_nr()
echo "[fail] got $count JOIN[s] syn with a different \
port-number expected $syn_nr"
fail_test
- dump_stats=1
else
echo -n "[ ok ]"
fi
@@ -1658,7 +1611,6 @@ chk_add_nr()
echo "[fail] got $count JOIN[s] synack with a different \
port-number expected $syn_ack_nr"
fail_test
- dump_stats=1
else
echo -n "[ ok ]"
fi
@@ -1671,7 +1623,6 @@ chk_add_nr()
echo "[fail] got $count JOIN[s] ack with a different \
port-number expected $ack_nr"
fail_test
- dump_stats=1
else
echo "[ ok ]"
fi
@@ -1684,7 +1635,6 @@ chk_add_nr()
echo "[fail] got $count JOIN[s] syn with a mismatched \
port-number expected $mis_syn_nr"
fail_test
- dump_stats=1
else
echo -n "[ ok ]"
fi
@@ -1697,15 +1647,46 @@ chk_add_nr()
echo "[fail] got $count JOIN[s] ack with a mismatched \
port-number expected $mis_ack_nr"
fail_test
- dump_stats=1
else
echo "[ ok ]"
fi
else
echo ""
fi
+}
- [ "${dump_stats}" = 1 ] && dump_stats
+chk_add_tx_nr()
+{
+ local add_tx_nr=$1
+ local echo_tx_nr=$2
+ local timeout
+ local count
+
+ timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout)
+
+ printf "%-${nr_blank}s %s" " " "add TX"
+ count=$(get_counter ${ns1} "MPTcpExtAddAddrTx")
+ if [ -z "$count" ]; then
+ echo -n "[skip]"
+ # if the test configured a short timeout tolerate greater then expected
+ # add addrs options, due to retransmissions
+ elif [ "$count" != "$add_tx_nr" ] && { [ "$timeout" -gt 1 ] || [ "$count" -lt "$add_tx_nr" ]; }; then
+ echo "[fail] got $count ADD_ADDR[s] TX, expected $add_tx_nr"
+ fail_test
+ else
+ echo -n "[ ok ]"
+ fi
+
+ echo -n " - echo TX "
+ count=$(get_counter ${ns2} "MPTcpExtEchoAddTx")
+ if [ -z "$count" ]; then
+ echo "[skip]"
+ elif [ "$count" != "$echo_tx_nr" ]; then
+ echo "[fail] got $count ADD_ADDR echo[s] TX, expected $echo_tx_nr"
+ fail_test
+ else
+ echo "[ ok ]"
+ fi
}
chk_rm_nr()
@@ -1715,7 +1696,6 @@ chk_rm_nr()
local invert
local simult
local count
- local dump_stats
local addr_ns=$ns1
local subflow_ns=$ns2
local extra_msg=""
@@ -1743,7 +1723,6 @@ chk_rm_nr()
elif [ "$count" != "$rm_addr_nr" ]; then
echo "[fail] got $count RM_ADDR[s] expected $rm_addr_nr"
fail_test
- dump_stats=1
else
echo -n "[ ok ]"
fi
@@ -1767,27 +1746,38 @@ chk_rm_nr()
else
echo "[fail] got $count RM_SUBFLOW[s] expected in range [$rm_subflow_nr:$((rm_subflow_nr*2))]"
fail_test
- dump_stats=1
fi
elif [ "$count" != "$rm_subflow_nr" ]; then
echo "[fail] got $count RM_SUBFLOW[s] expected $rm_subflow_nr"
fail_test
- dump_stats=1
else
echo -n "[ ok ]"
fi
- [ "${dump_stats}" = 1 ] && dump_stats
-
echo "$extra_msg"
}
+chk_rm_tx_nr()
+{
+ local rm_addr_tx_nr=$1
+
+ printf "%-${nr_blank}s %s" " " "rm TX "
+ count=$(get_counter ${ns2} "MPTcpExtRmAddrTx")
+ if [ -z "$count" ]; then
+ echo "[skip]"
+ elif [ "$count" != "$rm_addr_tx_nr" ]; then
+ echo "[fail] got $count RM_ADDR[s] expected $rm_addr_tx_nr"
+ fail_test
+ else
+ echo "[ ok ]"
+ fi
+}
+
chk_prio_nr()
{
local mp_prio_nr_tx=$1
local mp_prio_nr_rx=$2
local count
- local dump_stats
printf "%-${nr_blank}s %s" " " "ptx"
count=$(get_counter ${ns1} "MPTcpExtMPPrioTx")
@@ -1796,7 +1786,6 @@ chk_prio_nr()
elif [ "$count" != "$mp_prio_nr_tx" ]; then
echo "[fail] got $count MP_PRIO[s] TX expected $mp_prio_nr_tx"
fail_test
- dump_stats=1
else
echo -n "[ ok ]"
fi
@@ -1808,12 +1797,9 @@ chk_prio_nr()
elif [ "$count" != "$mp_prio_nr_rx" ]; then
echo "[fail] got $count MP_PRIO[s] RX expected $mp_prio_nr_rx"
fail_test
- dump_stats=1
else
echo "[ ok ]"
fi
-
- [ "${dump_stats}" = 1 ] && dump_stats
}
chk_subflow_nr()
@@ -1845,37 +1831,31 @@ chk_subflow_nr()
ss -N $ns1 -tOni
ss -N $ns1 -tOni | grep token
ip -n $ns1 mptcp endpoint
- dump_stats
fi
}
chk_mptcp_info()
{
- local nr_info=$1
- local info
+ local info1=$1
+ local exp1=$2
+ local info2=$3
+ local exp2=$4
local cnt1
local cnt2
local dump_stats
- if [[ $nr_info = "subflows_"* ]]; then
- info="subflows"
- nr_info=${nr_info:9}
- else
- echo "[fail] unsupported argument: $nr_info"
- fail_test
- return 1
- fi
+ printf "%-${nr_blank}s %-30s" " " "mptcp_info $info1:$info2=$exp1:$exp2"
- printf "%-${nr_blank}s %-30s" " " "mptcp_info $info=$nr_info"
-
- cnt1=$(ss -N $ns1 -inmHM | grep "$info:" |
- sed -n 's/.*\('"$info"':\)\([[:digit:]]*\).*$/\2/p;q')
+ cnt1=$(ss -N $ns1 -inmHM | grep "$info1:" |
+ sed -n 's/.*\('"$info1"':\)\([[:digit:]]*\).*$/\2/p;q')
+ cnt2=$(ss -N $ns2 -inmHM | grep "$info2:" |
+ sed -n 's/.*\('"$info2"':\)\([[:digit:]]*\).*$/\2/p;q')
+ # 'ss' only display active connections and counters that are not 0.
[ -z "$cnt1" ] && cnt1=0
- cnt2=$(ss -N $ns2 -inmHM | grep "$info:" |
- sed -n 's/.*\('"$info"':\)\([[:digit:]]*\).*$/\2/p;q')
[ -z "$cnt2" ] && cnt2=0
- if [ "$cnt1" != "$nr_info" ] || [ "$cnt2" != "$nr_info" ]; then
- echo "[fail] got $cnt1:$cnt2 $info expected $nr_info"
+
+ if [ "$cnt1" != "$exp1" ] || [ "$cnt2" != "$exp2" ]; then
+ echo "[fail] got $cnt1:$cnt2 $info1:$info2 expected $exp1:$exp2"
fail_test
dump_stats=1
else
@@ -1885,7 +1865,6 @@ chk_mptcp_info()
if [ "$dump_stats" = 1 ]; then
ss -N $ns1 -inmHM
ss -N $ns2 -inmHM
- dump_stats
fi
}
@@ -2009,7 +1988,7 @@ subflows_error_tests()
pm_nl_set_limits $ns1 0 1
pm_nl_set_limits $ns2 0 1
pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow
- run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
+ run_tests $ns1 $ns2 10.0.1.1 slow
chk_join_nr 0 0 0
fi
@@ -2020,7 +1999,7 @@ subflows_error_tests()
pm_nl_set_limits $ns2 0 2
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
- run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
+ run_tests $ns1 $ns2 10.0.1.1 slow
chk_join_nr 1 1 1
fi
@@ -2031,7 +2010,7 @@ subflows_error_tests()
pm_nl_set_limits $ns2 0 2
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
- run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
+ run_tests $ns1 $ns2 10.0.1.1 slow
chk_join_nr 1 1 1
fi
@@ -2043,7 +2022,7 @@ subflows_error_tests()
pm_nl_set_limits $ns1 0 1
pm_nl_set_limits $ns2 0 1
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
- run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow &
+ run_tests $ns1 $ns2 10.0.1.1 slow &
# mpj subflow will be in TW after the reset
wait_attempt_fail $ns2
@@ -2063,6 +2042,7 @@ signal_address_tests()
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 0 0 0
+ chk_add_tx_nr 1 1
chk_add_nr 1 1
fi
@@ -2141,7 +2121,7 @@ signal_address_tests()
# the peer could possibly miss some addr notification, allow retransmission
ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=1
- run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
+ run_tests $ns1 $ns2 10.0.1.1 slow
# It is not directly linked to the commit introducing this
# symbol but for the parent one which is linked anyway.
@@ -2173,7 +2153,8 @@ link_failure_tests()
pm_nl_set_limits $ns2 1 3
pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow
pm_nl_add_endpoint $ns2 10.0.4.2 dev ns2eth4 flags subflow
- run_tests $ns1 $ns2 10.0.1.1 1
+ test_linkfail=1 \
+ run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 3 3 3
chk_add_nr 1 1
chk_stale_nr $ns2 1 5 1
@@ -2188,7 +2169,8 @@ link_failure_tests()
pm_nl_set_limits $ns2 1 3
pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow
pm_nl_add_endpoint $ns2 10.0.4.2 dev ns2eth4 flags subflow
- run_tests $ns1 $ns2 10.0.1.1 2
+ test_linkfail=2 \
+ run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 3 3 3
chk_add_nr 1 1
chk_stale_nr $ns2 1 -1 1
@@ -2201,9 +2183,9 @@ link_failure_tests()
pm_nl_set_limits $ns1 0 2
pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal
pm_nl_set_limits $ns2 1 2
- FAILING_LINKS="1"
pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow,backup
- run_tests $ns1 $ns2 10.0.1.1 1
+ FAILING_LINKS="1" test_linkfail=1 \
+ run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 2 2 2
chk_add_nr 1 1
chk_link_usage $ns2 ns2eth3 $cinsent 0
@@ -2217,8 +2199,8 @@ link_failure_tests()
pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal
pm_nl_set_limits $ns2 1 2
pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow,backup
- FAILING_LINKS="1 2"
- run_tests $ns1 $ns2 10.0.1.1 1
+ FAILING_LINKS="1 2" test_linkfail=1 \
+ run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 2 2 2
chk_add_nr 1 1
chk_stale_nr $ns2 2 4 2
@@ -2233,8 +2215,8 @@ link_failure_tests()
pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal
pm_nl_set_limits $ns2 1 3
pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow,backup
- FAILING_LINKS="1 2"
- run_tests $ns1 $ns2 10.0.1.1 2
+ FAILING_LINKS="1 2" test_linkfail=2 \
+ run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 2 2 2
chk_add_nr 1 1
chk_stale_nr $ns2 1 -1 2
@@ -2249,8 +2231,9 @@ add_addr_timeout_tests()
pm_nl_set_limits $ns1 0 1
pm_nl_set_limits $ns2 1 1
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
- run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
+ run_tests $ns1 $ns2 10.0.1.1 slow
chk_join_nr 1 1 1
+ chk_add_tx_nr 4 4
chk_add_nr 4 0
fi
@@ -2259,7 +2242,7 @@ add_addr_timeout_tests()
pm_nl_set_limits $ns1 0 1
pm_nl_set_limits $ns2 1 1
pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
- run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow
+ run_tests $ns1 $ns2 dead:beef:1::1 slow
chk_join_nr 1 1 1
chk_add_nr 4 0
fi
@@ -2270,7 +2253,7 @@ add_addr_timeout_tests()
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
pm_nl_set_limits $ns2 2 2
- run_tests $ns1 $ns2 10.0.1.1 0 0 0 speed_10
+ run_tests $ns1 $ns2 10.0.1.1 speed_10
chk_join_nr 2 2 2
chk_add_nr 8 0
fi
@@ -2281,7 +2264,7 @@ add_addr_timeout_tests()
pm_nl_add_endpoint $ns1 10.0.12.1 flags signal
pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
pm_nl_set_limits $ns2 2 2
- run_tests $ns1 $ns2 10.0.1.1 0 0 0 speed_10
+ run_tests $ns1 $ns2 10.0.1.1 speed_10
chk_join_nr 1 1 1
chk_add_nr 8 0
fi
@@ -2294,8 +2277,10 @@ remove_tests()
pm_nl_set_limits $ns1 0 1
pm_nl_set_limits $ns2 0 1
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
- run_tests $ns1 $ns2 10.0.1.1 0 0 -1 slow
+ addr_nr_ns2=-1 \
+ run_tests $ns1 $ns2 10.0.1.1 slow
chk_join_nr 1 1 1
+ chk_rm_tx_nr 1
chk_rm_nr 1 1
fi
@@ -2305,7 +2290,8 @@ remove_tests()
pm_nl_set_limits $ns2 0 2
pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
- run_tests $ns1 $ns2 10.0.1.1 0 0 -2 slow
+ addr_nr_ns2=-2 \
+ run_tests $ns1 $ns2 10.0.1.1 slow
chk_join_nr 2 2 2
chk_rm_nr 2 2
fi
@@ -2315,7 +2301,8 @@ remove_tests()
pm_nl_set_limits $ns1 0 1
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
pm_nl_set_limits $ns2 1 1
- run_tests $ns1 $ns2 10.0.1.1 0 -1 0 slow
+ addr_nr_ns1=-1 \
+ run_tests $ns1 $ns2 10.0.1.1 slow
chk_join_nr 1 1 1
chk_add_nr 1 1
chk_rm_nr 1 1 invert
@@ -2327,7 +2314,8 @@ remove_tests()
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
pm_nl_set_limits $ns2 1 2
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
- run_tests $ns1 $ns2 10.0.1.1 0 -1 -1 slow
+ addr_nr_ns1=-1 addr_nr_ns2=-1 \
+ run_tests $ns1 $ns2 10.0.1.1 slow
chk_join_nr 2 2 2
chk_add_nr 1 1
chk_rm_nr 1 1
@@ -2340,7 +2328,8 @@ remove_tests()
pm_nl_set_limits $ns2 1 3
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
- run_tests $ns1 $ns2 10.0.1.1 0 -1 -2 speed_10
+ addr_nr_ns1=-1 addr_nr_ns2=-2 \
+ run_tests $ns1 $ns2 10.0.1.1 speed_10
chk_join_nr 3 3 3
chk_add_nr 1 1
chk_rm_nr 2 2
@@ -2353,7 +2342,8 @@ remove_tests()
pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
pm_nl_add_endpoint $ns1 10.0.4.1 flags signal
pm_nl_set_limits $ns2 3 3
- run_tests $ns1 $ns2 10.0.1.1 0 -3 0 speed_10
+ addr_nr_ns1=-3 \
+ run_tests $ns1 $ns2 10.0.1.1 speed_10
chk_join_nr 3 3 3
chk_add_nr 3 3
chk_rm_nr 3 3 invert
@@ -2366,7 +2356,8 @@ remove_tests()
pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
pm_nl_add_endpoint $ns1 10.0.14.1 flags signal
pm_nl_set_limits $ns2 3 3
- run_tests $ns1 $ns2 10.0.1.1 0 -3 0 speed_10
+ addr_nr_ns1=-3 \
+ run_tests $ns1 $ns2 10.0.1.1 speed_10
chk_join_nr 1 1 1
chk_add_nr 3 3
chk_rm_nr 3 1 invert
@@ -2379,7 +2370,8 @@ remove_tests()
pm_nl_set_limits $ns2 1 3
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
- run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow
+ addr_nr_ns1=-8 addr_nr_ns2=-8 \
+ run_tests $ns1 $ns2 10.0.1.1 slow
chk_join_nr 3 3 3
chk_add_nr 1 1
chk_rm_nr 1 3 invert simult
@@ -2392,10 +2384,12 @@ remove_tests()
pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow id 150
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
- run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow
+ addr_nr_ns1=-8 addr_nr_ns2=-8 \
+ run_tests $ns1 $ns2 10.0.1.1 slow
chk_join_nr 3 3 3
if mptcp_lib_kversion_ge 5.18; then
+ chk_rm_tx_nr 0
chk_rm_nr 0 3 simult
else
chk_rm_nr 3 3
@@ -2409,7 +2403,8 @@ remove_tests()
pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
pm_nl_add_endpoint $ns1 10.0.4.1 flags signal
pm_nl_set_limits $ns2 3 3
- run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow
+ addr_nr_ns1=-8 addr_nr_ns2=-8 \
+ run_tests $ns1 $ns2 10.0.1.1 slow
chk_join_nr 3 3 3
chk_add_nr 3 3
chk_rm_nr 3 3 invert simult
@@ -2422,7 +2417,8 @@ remove_tests()
pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
pm_nl_add_endpoint $ns1 10.0.14.1 flags signal
pm_nl_set_limits $ns2 3 3
- run_tests $ns1 $ns2 10.0.1.1 0 -8 0 slow
+ addr_nr_ns1=-8 \
+ run_tests $ns1 $ns2 10.0.1.1 slow
chk_join_nr 1 1 1
chk_add_nr 3 3
chk_rm_nr 3 1 invert
@@ -2433,7 +2429,8 @@ remove_tests()
pm_nl_set_limits $ns1 0 1
pm_nl_set_limits $ns2 0 1
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
- run_tests $ns1 $ns2 10.0.1.1 0 0 -9 slow
+ addr_nr_ns2=-9 \
+ run_tests $ns1 $ns2 10.0.1.1 slow
chk_join_nr 1 1 1
chk_rm_nr 1 1
fi
@@ -2443,7 +2440,8 @@ remove_tests()
pm_nl_set_limits $ns1 0 1
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
pm_nl_set_limits $ns2 1 1
- run_tests $ns1 $ns2 10.0.1.1 0 -9 0 slow
+ addr_nr_ns1=-9 \
+ run_tests $ns1 $ns2 10.0.1.1 slow
chk_join_nr 1 1 1
chk_add_nr 1 1
chk_rm_nr 1 1 invert
@@ -2456,7 +2454,8 @@ add_tests()
if reset "add single subflow"; then
pm_nl_set_limits $ns1 0 1
pm_nl_set_limits $ns2 0 1
- run_tests $ns1 $ns2 10.0.1.1 0 0 1 slow
+ addr_nr_ns2=1 \
+ run_tests $ns1 $ns2 10.0.1.1 slow
chk_join_nr 1 1 1
fi
@@ -2464,7 +2463,8 @@ add_tests()
if reset "add signal address"; then
pm_nl_set_limits $ns1 0 1
pm_nl_set_limits $ns2 1 1
- run_tests $ns1 $ns2 10.0.1.1 0 1 0 slow
+ addr_nr_ns1=1 \
+ run_tests $ns1 $ns2 10.0.1.1 slow
chk_join_nr 1 1 1
chk_add_nr 1 1
fi
@@ -2473,7 +2473,8 @@ add_tests()
if reset "add multiple subflows"; then
pm_nl_set_limits $ns1 0 2
pm_nl_set_limits $ns2 0 2
- run_tests $ns1 $ns2 10.0.1.1 0 0 2 slow
+ addr_nr_ns2=2 \
+ run_tests $ns1 $ns2 10.0.1.1 slow
chk_join_nr 2 2 2
fi
@@ -2481,7 +2482,8 @@ add_tests()
if reset "add multiple subflows IPv6"; then
pm_nl_set_limits $ns1 0 2
pm_nl_set_limits $ns2 0 2
- run_tests $ns1 $ns2 dead:beef:1::1 0 0 2 slow
+ addr_nr_ns2=2 \
+ run_tests $ns1 $ns2 dead:beef:1::1 slow
chk_join_nr 2 2 2
fi
@@ -2489,7 +2491,8 @@ add_tests()
if reset "add multiple addresses IPv6"; then
pm_nl_set_limits $ns1 0 2
pm_nl_set_limits $ns2 2 2
- run_tests $ns1 $ns2 dead:beef:1::1 0 2 0 slow
+ addr_nr_ns1=2 \
+ run_tests $ns1 $ns2 dead:beef:1::1 slow
chk_join_nr 2 2 2
chk_add_nr 2 2
fi
@@ -2502,14 +2505,14 @@ ipv6_tests()
pm_nl_set_limits $ns1 0 1
pm_nl_set_limits $ns2 0 1
pm_nl_add_endpoint $ns2 dead:beef:3::2 dev ns2eth3 flags subflow
- run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow
+ run_tests $ns1 $ns2 dead:beef:1::1 slow
chk_join_nr 1 1 1
fi
# add_address, unused IPv6
if reset "unused signal address IPv6"; then
pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
- run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow
+ run_tests $ns1 $ns2 dead:beef:1::1 slow
chk_join_nr 0 0 0
chk_add_nr 1 1
fi
@@ -2519,7 +2522,7 @@ ipv6_tests()
pm_nl_set_limits $ns1 0 1
pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
pm_nl_set_limits $ns2 1 1
- run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow
+ run_tests $ns1 $ns2 dead:beef:1::1 slow
chk_join_nr 1 1 1
chk_add_nr 1 1
fi
@@ -2529,7 +2532,8 @@ ipv6_tests()
pm_nl_set_limits $ns1 0 1
pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
pm_nl_set_limits $ns2 1 1
- run_tests $ns1 $ns2 dead:beef:1::1 0 -1 0 slow
+ addr_nr_ns1=-1 \
+ run_tests $ns1 $ns2 dead:beef:1::1 slow
chk_join_nr 1 1 1
chk_add_nr 1 1
chk_rm_nr 1 1 invert
@@ -2541,7 +2545,8 @@ ipv6_tests()
pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
pm_nl_set_limits $ns2 1 2
pm_nl_add_endpoint $ns2 dead:beef:3::2 dev ns2eth3 flags subflow
- run_tests $ns1 $ns2 dead:beef:1::1 0 -1 -1 slow
+ addr_nr_ns1=-1 addr_nr_ns2=-1 \
+ run_tests $ns1 $ns2 dead:beef:1::1 slow
chk_join_nr 2 2 2
chk_add_nr 1 1
chk_rm_nr 1 1
@@ -2642,7 +2647,7 @@ mixed_tests()
pm_nl_set_limits $ns1 0 1
pm_nl_set_limits $ns2 1 1
pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
- run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
+ run_tests $ns1 $ns2 10.0.1.1 slow
chk_join_nr 0 0 0
fi
@@ -2652,7 +2657,7 @@ mixed_tests()
pm_nl_set_limits $ns1 0 1
pm_nl_set_limits $ns2 1 1
pm_nl_add_endpoint $ns1 10.0.1.1 flags signal
- run_tests $ns1 $ns2 dead:beef:2::1 0 0 0 slow
+ run_tests $ns1 $ns2 dead:beef:2::1 slow
chk_join_nr 1 1 1
fi
@@ -2663,7 +2668,7 @@ mixed_tests()
pm_nl_set_limits $ns2 1 4
pm_nl_add_endpoint $ns2 dead:beef:2::2 flags subflow,fullmesh
pm_nl_add_endpoint $ns1 10.0.1.1 flags signal
- run_tests $ns1 $ns2 dead:beef:2::1 0 0 0 slow
+ run_tests $ns1 $ns2 dead:beef:2::1 slow
chk_join_nr 1 1 1
fi
@@ -2675,7 +2680,8 @@ mixed_tests()
pm_nl_set_limits $ns2 2 4
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
- run_tests $ns1 $ns2 dead:beef:1::1 0 0 fullmesh_1 slow
+ addr_nr_ns2=fullmesh_1 \
+ run_tests $ns1 $ns2 dead:beef:1::1 slow
chk_join_nr 4 4 4
fi
}
@@ -2688,7 +2694,8 @@ backup_tests()
pm_nl_set_limits $ns1 0 1
pm_nl_set_limits $ns2 0 1
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup
- run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow nobackup
+ sflags=nobackup \
+ run_tests $ns1 $ns2 10.0.1.1 slow
chk_join_nr 1 1 1
chk_prio_nr 0 1
fi
@@ -2699,7 +2706,8 @@ backup_tests()
pm_nl_set_limits $ns1 0 1
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
pm_nl_set_limits $ns2 1 1
- run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup
+ sflags=backup \
+ run_tests $ns1 $ns2 10.0.1.1 slow
chk_join_nr 1 1 1
chk_add_nr 1 1
chk_prio_nr 1 1
@@ -2711,7 +2719,8 @@ backup_tests()
pm_nl_set_limits $ns1 0 1
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
pm_nl_set_limits $ns2 1 1
- run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup
+ sflags=backup \
+ run_tests $ns1 $ns2 10.0.1.1 slow
chk_join_nr 1 1 1
chk_add_nr 1 1
chk_prio_nr 1 1
@@ -2720,7 +2729,7 @@ backup_tests()
if reset "mpc backup" &&
continue_if mptcp_lib_kallsyms_doesnt_have "mptcp_subflow_send_ack$"; then
pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,backup
- run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
+ run_tests $ns1 $ns2 10.0.1.1 slow
chk_join_nr 0 0 0
chk_prio_nr 0 1
fi
@@ -2729,7 +2738,7 @@ backup_tests()
continue_if mptcp_lib_kallsyms_doesnt_have "mptcp_subflow_send_ack$"; then
pm_nl_add_endpoint $ns1 10.0.1.1 flags subflow,backup
pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,backup
- run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
+ run_tests $ns1 $ns2 10.0.1.1 slow
chk_join_nr 0 0 0
chk_prio_nr 1 1
fi
@@ -2737,7 +2746,8 @@ backup_tests()
if reset "mpc switch to backup" &&
continue_if mptcp_lib_kallsyms_doesnt_have "mptcp_subflow_send_ack$"; then
pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow
- run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup
+ sflags=backup \
+ run_tests $ns1 $ns2 10.0.1.1 slow
chk_join_nr 0 0 0
chk_prio_nr 0 1
fi
@@ -2746,7 +2756,8 @@ backup_tests()
continue_if mptcp_lib_kallsyms_doesnt_have "mptcp_subflow_send_ack$"; then
pm_nl_add_endpoint $ns1 10.0.1.1 flags subflow
pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow
- run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup
+ sflags=backup \
+ run_tests $ns1 $ns2 10.0.1.1 slow
chk_join_nr 0 0 0
chk_prio_nr 1 1
fi
@@ -2835,7 +2846,8 @@ add_addr_ports_tests()
pm_nl_set_limits $ns1 0 1
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
pm_nl_set_limits $ns2 1 1
- run_tests $ns1 $ns2 10.0.1.1 0 -1 0 slow
+ addr_nr_ns1=-1 \
+ run_tests $ns1 $ns2 10.0.1.1 slow
chk_join_nr 1 1 1
chk_add_nr 1 1 1
chk_rm_nr 1 1 invert
@@ -2851,7 +2863,8 @@ add_addr_ports_tests()
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
pm_nl_set_limits $ns2 1 2
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
- run_tests $ns1 $ns2 10.0.1.1 0 -1 -1 slow
+ addr_nr_ns1=-1 addr_nr_ns2=-1 \
+ run_tests $ns1 $ns2 10.0.1.1 slow
chk_join_nr 2 2 2
chk_add_nr 1 1 1
chk_rm_nr 1 1
@@ -2864,7 +2877,8 @@ add_addr_ports_tests()
pm_nl_set_limits $ns2 1 3
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
- run_tests $ns1 $ns2 10.0.1.1 0 -8 -2 slow
+ addr_nr_ns1=-8 addr_nr_ns2=-2 \
+ run_tests $ns1 $ns2 10.0.1.1 slow
chk_join_nr 3 3 3
chk_add_nr 1 1
chk_rm_nr 1 3 invert simult
@@ -3066,7 +3080,8 @@ fullmesh_tests()
pm_nl_set_limits $ns2 1 4
pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow,fullmesh
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,fullmesh
- run_tests $ns1 $ns2 10.0.1.1 0 1 0 slow
+ addr_nr_ns1=1 \
+ run_tests $ns1 $ns2 10.0.1.1 slow
chk_join_nr 4 4 4
chk_add_nr 1 1
fi
@@ -3078,7 +3093,8 @@ fullmesh_tests()
pm_nl_set_limits $ns1 1 3
pm_nl_set_limits $ns2 1 3
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
- run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_1 slow
+ addr_nr_ns2=fullmesh_1 \
+ run_tests $ns1 $ns2 10.0.1.1 slow
chk_join_nr 3 3 3
chk_add_nr 1 1
fi
@@ -3090,7 +3106,8 @@ fullmesh_tests()
pm_nl_set_limits $ns1 2 5
pm_nl_set_limits $ns2 1 5
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
- run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_2 slow
+ addr_nr_ns2=fullmesh_2 \
+ run_tests $ns1 $ns2 10.0.1.1 slow
chk_join_nr 5 5 5
chk_add_nr 1 1
fi
@@ -3103,7 +3120,8 @@ fullmesh_tests()
pm_nl_set_limits $ns1 2 4
pm_nl_set_limits $ns2 1 4
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
- run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_2 slow
+ addr_nr_ns2=fullmesh_2 \
+ run_tests $ns1 $ns2 10.0.1.1 slow
chk_join_nr 4 4 4
chk_add_nr 1 1
fi
@@ -3114,7 +3132,8 @@ fullmesh_tests()
pm_nl_set_limits $ns1 4 4
pm_nl_add_endpoint $ns1 10.0.2.1 flags subflow
pm_nl_set_limits $ns2 4 4
- run_tests $ns1 $ns2 10.0.1.1 0 0 1 slow fullmesh
+ addr_nr_ns2=1 sflags=fullmesh \
+ run_tests $ns1 $ns2 10.0.1.1 slow
chk_join_nr 2 2 2
chk_rm_nr 0 1
fi
@@ -3125,7 +3144,8 @@ fullmesh_tests()
pm_nl_set_limits $ns1 4 4
pm_nl_add_endpoint $ns1 10.0.2.1 flags subflow,fullmesh
pm_nl_set_limits $ns2 4 4
- run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_1 slow nofullmesh
+ addr_nr_ns2=fullmesh_1 sflags=nofullmesh \
+ run_tests $ns1 $ns2 10.0.1.1 slow
chk_join_nr 2 2 2
chk_rm_nr 0 1
fi
@@ -3136,7 +3156,8 @@ fullmesh_tests()
pm_nl_set_limits $ns1 4 4
pm_nl_add_endpoint $ns1 10.0.2.1 flags subflow
pm_nl_set_limits $ns2 4 4
- run_tests $ns1 $ns2 10.0.1.1 0 0 1 slow backup,fullmesh
+ addr_nr_ns2=1 sflags=backup,fullmesh \
+ run_tests $ns1 $ns2 10.0.1.1 slow
chk_join_nr 2 2 2
chk_prio_nr 0 1
chk_rm_nr 0 1
@@ -3148,7 +3169,8 @@ fullmesh_tests()
pm_nl_set_limits $ns1 4 4
pm_nl_set_limits $ns2 4 4
pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow,backup,fullmesh
- run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow nobackup,nofullmesh
+ sflags=nobackup,nofullmesh \
+ run_tests $ns1 $ns2 10.0.1.1 slow
chk_join_nr 2 2 2
chk_prio_nr 0 1
chk_rm_nr 0 1
@@ -3158,14 +3180,16 @@ fullmesh_tests()
fastclose_tests()
{
if reset_check_counter "fastclose test" "MPTcpExtMPFastcloseTx"; then
- run_tests $ns1 $ns2 10.0.1.1 1024 0 fastclose_client
+ test_linkfail=1024 addr_nr_ns2=fastclose_client \
+ run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 0 0 0
chk_fclose_nr 1 1
chk_rst_nr 1 1 invert
fi
if reset_check_counter "fastclose server test" "MPTcpExtMPFastcloseRx"; then
- run_tests $ns1 $ns2 10.0.1.1 1024 0 fastclose_server
+ test_linkfail=1024 addr_nr_ns2=fastclose_server \
+ run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 0 0 0
chk_fclose_nr 1 1 invert
chk_rst_nr 1 1
@@ -3183,7 +3207,8 @@ fail_tests()
{
# single subflow
if reset_with_fail "Infinite map" 1; then
- run_tests $ns1 $ns2 10.0.1.1 128
+ test_linkfail=128 \
+ run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 0 0 0 +1 +0 1 0 1 "$(pedit_action_pkts)"
chk_fail_nr 1 -1 invert
fi
@@ -3194,11 +3219,77 @@ fail_tests()
pm_nl_set_limits $ns1 0 1
pm_nl_set_limits $ns2 0 1
pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow
- run_tests $ns1 $ns2 10.0.1.1 1024
+ test_linkfail=1024 \
+ run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 1 1 1 1 0 1 1 0 "$(pedit_action_pkts)"
fi
}
+userspace_pm_add_addr()
+{
+ local addr=$1
+ local id=$2
+ local tk
+
+ tk=$(grep "type:1," "$evts_ns1" |
+ sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q')
+ ip netns exec $ns1 ./pm_nl_ctl ann $addr token $tk id $id
+ sleep 1
+}
+
+userspace_pm_rm_sf_addr_ns1()
+{
+ local addr=$1
+ local id=$2
+ local tk sp da dp
+
+ tk=$(grep "type:1," "$evts_ns1" |
+ sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q')
+ sp=$(grep "type:10" "$evts_ns1" |
+ sed -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q')
+ da=$(grep "type:10" "$evts_ns1" |
+ sed -n 's/.*\(daddr6:\)\([0-9a-f:.]*\).*$/\2/p;q')
+ dp=$(grep "type:10" "$evts_ns1" |
+ sed -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q')
+ ip netns exec $ns1 ./pm_nl_ctl rem token $tk id $id
+ ip netns exec $ns1 ./pm_nl_ctl dsf lip "::ffff:$addr" \
+ lport $sp rip $da rport $dp token $tk
+ wait_rm_addr $ns1 1
+ wait_rm_sf $ns1 1
+}
+
+userspace_pm_add_sf()
+{
+ local addr=$1
+ local id=$2
+ local tk da dp
+
+ tk=$(sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evts_ns2")
+ da=$(sed -n 's/.*\(daddr4:\)\([0-9.]*\).*$/\2/p;q' "$evts_ns2")
+ dp=$(sed -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts_ns2")
+ ip netns exec $ns2 ./pm_nl_ctl csf lip $addr lid $id \
+ rip $da rport $dp token $tk
+ sleep 1
+}
+
+userspace_pm_rm_sf_addr_ns2()
+{
+ local addr=$1
+ local id=$2
+ local tk da dp sp
+
+ tk=$(sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evts_ns2")
+ da=$(sed -n 's/.*\(daddr4:\)\([0-9.]*\).*$/\2/p;q' "$evts_ns2")
+ dp=$(sed -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts_ns2")
+ sp=$(grep "type:10" "$evts_ns2" |
+ sed -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q')
+ ip netns exec $ns2 ./pm_nl_ctl rem token $tk id $id
+ ip netns exec $ns2 ./pm_nl_ctl dsf lip $addr lport $sp \
+ rip $da rport $dp token $tk
+ wait_rm_addr $ns2 1
+ wait_rm_sf $ns2 1
+}
+
userspace_tests()
{
# userspace pm type prevents add_addr
@@ -3254,7 +3345,8 @@ userspace_tests()
pm_nl_set_limits $ns1 1 1
pm_nl_set_limits $ns2 1 1
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
- run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup
+ sflags=backup \
+ run_tests $ns1 $ns2 10.0.1.1 slow
chk_join_nr 1 1 0
chk_prio_nr 0 0
fi
@@ -3267,7 +3359,8 @@ userspace_tests()
pm_nl_set_limits $ns1 0 1
pm_nl_set_limits $ns2 0 1
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
- run_tests $ns1 $ns2 10.0.1.1 0 0 -1 slow
+ addr_nr_ns2=-1 \
+ run_tests $ns1 $ns2 10.0.1.1 slow
chk_join_nr 0 0 0
chk_rm_nr 0 0
fi
@@ -3277,11 +3370,19 @@ userspace_tests()
continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
set_userspace_pm $ns1
pm_nl_set_limits $ns2 1 1
- run_tests $ns1 $ns2 10.0.1.1 0 userspace_1 0 slow
+ run_tests $ns1 $ns2 10.0.1.1 speed_10 &
+ local tests_pid=$!
+ wait_mpj $ns1
+ userspace_pm_add_addr 10.0.2.1 10
chk_join_nr 1 1 1
chk_add_nr 1 1
+ chk_mptcp_info subflows 1 subflows 1
+ chk_mptcp_info add_addr_signal 1 add_addr_accepted 1
+ userspace_pm_rm_sf_addr_ns1 10.0.2.1 10
chk_rm_nr 1 1 invert
+ chk_mptcp_info subflows 0 subflows 0
kill_events_pids
+ wait $tests_pid
fi
# userspace pm create destroy subflow
@@ -3289,10 +3390,17 @@ userspace_tests()
continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
set_userspace_pm $ns2
pm_nl_set_limits $ns1 0 1
- run_tests $ns1 $ns2 10.0.1.1 0 0 userspace_1 slow
+ run_tests $ns1 $ns2 10.0.1.1 speed_10 &
+ local tests_pid=$!
+ wait_mpj $ns2
+ userspace_pm_add_sf 10.0.3.2 20
chk_join_nr 1 1 1
+ chk_mptcp_info subflows 1 subflows 1
+ userspace_pm_rm_sf_addr_ns2 10.0.3.2 20
chk_rm_nr 1 1
+ chk_mptcp_info subflows 0 subflows 0
kill_events_pids
+ wait $tests_pid
fi
}
@@ -3305,11 +3413,13 @@ endpoint_tests()
pm_nl_set_limits $ns1 2 2
pm_nl_set_limits $ns2 2 2
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
- run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow 2>/dev/null &
+ run_tests $ns1 $ns2 10.0.1.1 slow 2>/dev/null &
wait_mpj $ns1
pm_nl_check_endpoint 1 "creation" \
$ns2 10.0.2.2 id 1 flags implicit
+ chk_mptcp_info subflows 1 subflows 1
+ chk_mptcp_info add_addr_signal 1 add_addr_accepted 1
pm_nl_add_endpoint $ns2 10.0.2.2 id 33
pm_nl_check_endpoint 0 "ID change is prevented" \
@@ -3326,21 +3436,22 @@ endpoint_tests()
pm_nl_set_limits $ns1 1 1
pm_nl_set_limits $ns2 1 1
pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow
- run_tests $ns1 $ns2 10.0.1.1 4 0 0 speed_20 2>/dev/null &
+ test_linkfail=4 \
+ run_tests $ns1 $ns2 10.0.1.1 speed_20 2>/dev/null &
wait_mpj $ns2
chk_subflow_nr needtitle "before delete" 2
- chk_mptcp_info subflows_1
+ chk_mptcp_info subflows 1 subflows 1
pm_nl_del_endpoint $ns2 2 10.0.2.2
sleep 0.5
chk_subflow_nr "" "after delete" 1
- chk_mptcp_info subflows_0
+ chk_mptcp_info subflows 0 subflows 0
pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow
wait_mpj $ns2
chk_subflow_nr "" "after re-add" 2
- chk_mptcp_info subflows_1
+ chk_mptcp_info subflows 1 subflows 1
kill_tests_wait
fi
}
diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c
index b35148edbf02..926b0be87c99 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c
@@ -51,6 +51,11 @@ struct mptcp_info {
__u8 mptcpi_local_addr_used;
__u8 mptcpi_local_addr_max;
__u8 mptcpi_csum_enabled;
+ __u32 mptcpi_retransmits;
+ __u64 mptcpi_bytes_retrans;
+ __u64 mptcpi_bytes_sent;
+ __u64 mptcpi_bytes_received;
+ __u64 mptcpi_bytes_acked;
};
struct mptcp_subflow_data {
@@ -81,10 +86,41 @@ struct mptcp_subflow_addrs {
#define MPTCP_SUBFLOW_ADDRS 3
#endif
+#ifndef MPTCP_FULL_INFO
+struct mptcp_subflow_info {
+ __u32 id;
+ struct mptcp_subflow_addrs addrs;
+};
+
+struct mptcp_full_info {
+ __u32 size_tcpinfo_kernel; /* must be 0, set by kernel */
+ __u32 size_tcpinfo_user;
+ __u32 size_sfinfo_kernel; /* must be 0, set by kernel */
+ __u32 size_sfinfo_user;
+ __u32 num_subflows; /* must be 0, set by kernel (real subflow count) */
+ __u32 size_arrays_user; /* max subflows that userspace is interested in;
+ * the buffers at subflow_info/tcp_info
+ * are respectively at least:
+ * size_arrays * size_sfinfo_user
+ * size_arrays * size_tcpinfo_user
+ * bytes wide
+ */
+ __aligned_u64 subflow_info;
+ __aligned_u64 tcp_info;
+ struct mptcp_info mptcp_info;
+};
+
+#define MPTCP_FULL_INFO 4
+#endif
+
struct so_state {
struct mptcp_info mi;
+ struct mptcp_info last_sample;
+ struct tcp_info tcp_info;
+ struct mptcp_subflow_addrs addrs;
uint64_t mptcpi_rcv_delta;
uint64_t tcpi_rcv_delta;
+ bool pkt_stats_avail;
};
#ifndef MIN
@@ -322,8 +358,9 @@ static void do_getsockopt_mptcp_info(struct so_state *s, int fd, size_t w)
if (ret < 0)
die_perror("getsockopt MPTCP_INFO");
- assert(olen == sizeof(i));
+ s->pkt_stats_avail = olen >= sizeof(i);
+ s->last_sample = i;
if (s->mi.mptcpi_write_seq == 0)
s->mi = i;
@@ -362,6 +399,8 @@ static void do_getsockopt_tcp_info(struct so_state *s, int fd, size_t r, size_t
olen -= sizeof(struct mptcp_subflow_data);
assert(olen == ti.d.size_user);
+ s->tcp_info = ti.ti[0];
+
if (ti.ti[0].tcpi_bytes_sent == w &&
ti.ti[0].tcpi_bytes_received == r)
goto done;
@@ -383,7 +422,7 @@ done:
do_getsockopt_bogus_sf_data(fd, MPTCP_TCPINFO);
}
-static void do_getsockopt_subflow_addrs(int fd)
+static void do_getsockopt_subflow_addrs(struct so_state *s, int fd)
{
struct sockaddr_storage remote, local;
socklen_t olen, rlen, llen;
@@ -431,6 +470,7 @@ static void do_getsockopt_subflow_addrs(int fd)
assert(memcmp(&local, &addrs.addr[0].ss_local, sizeof(local)) == 0);
assert(memcmp(&remote, &addrs.addr[0].ss_remote, sizeof(remote)) == 0);
+ s->addrs = addrs.addr[0];
memset(&addrs, 0, sizeof(addrs));
@@ -451,13 +491,70 @@ static void do_getsockopt_subflow_addrs(int fd)
do_getsockopt_bogus_sf_data(fd, MPTCP_SUBFLOW_ADDRS);
}
+static void do_getsockopt_mptcp_full_info(struct so_state *s, int fd)
+{
+ size_t data_size = sizeof(struct mptcp_full_info);
+ struct mptcp_subflow_info sfinfo[2];
+ struct tcp_info tcp_info[2];
+ struct mptcp_full_info mfi;
+ socklen_t olen;
+ int ret;
+
+ memset(&mfi, 0, data_size);
+ memset(tcp_info, 0, sizeof(tcp_info));
+ memset(sfinfo, 0, sizeof(sfinfo));
+
+ mfi.size_tcpinfo_user = sizeof(struct tcp_info);
+ mfi.size_sfinfo_user = sizeof(struct mptcp_subflow_info);
+ mfi.size_arrays_user = 2;
+ mfi.subflow_info = (unsigned long)&sfinfo[0];
+ mfi.tcp_info = (unsigned long)&tcp_info[0];
+ olen = data_size;
+
+ ret = getsockopt(fd, SOL_MPTCP, MPTCP_FULL_INFO, &mfi, &olen);
+ if (ret < 0) {
+ if (errno == EOPNOTSUPP) {
+ perror("MPTCP_FULL_INFO test skipped");
+ return;
+ }
+ xerror("getsockopt MPTCP_FULL_INFO");
+ }
+
+ assert(olen <= data_size);
+ assert(mfi.size_tcpinfo_kernel > 0);
+ assert(mfi.size_tcpinfo_user ==
+ MIN(mfi.size_tcpinfo_kernel, sizeof(struct tcp_info)));
+ assert(mfi.size_sfinfo_kernel > 0);
+ assert(mfi.size_sfinfo_user ==
+ MIN(mfi.size_sfinfo_kernel, sizeof(struct mptcp_subflow_info)));
+ assert(mfi.num_subflows == 1);
+
+ /* Tolerate future extension to mptcp_info struct and running newer
+ * test on top of older kernel.
+ * Anyway any kernel supporting MPTCP_FULL_INFO must at least include
+ * the following in mptcp_info.
+ */
+ assert(olen > (socklen_t)__builtin_offsetof(struct mptcp_full_info, tcp_info));
+ assert(mfi.mptcp_info.mptcpi_subflows == 0);
+ assert(mfi.mptcp_info.mptcpi_bytes_sent == s->last_sample.mptcpi_bytes_sent);
+ assert(mfi.mptcp_info.mptcpi_bytes_received == s->last_sample.mptcpi_bytes_received);
+
+ assert(sfinfo[0].id == 1);
+ assert(tcp_info[0].tcpi_bytes_sent == s->tcp_info.tcpi_bytes_sent);
+ assert(tcp_info[0].tcpi_bytes_received == s->tcp_info.tcpi_bytes_received);
+ assert(!memcmp(&sfinfo->addrs, &s->addrs, sizeof(struct mptcp_subflow_addrs)));
+}
+
static void do_getsockopts(struct so_state *s, int fd, size_t r, size_t w)
{
do_getsockopt_mptcp_info(s, fd, w);
do_getsockopt_tcp_info(s, fd, r, w);
- do_getsockopt_subflow_addrs(fd);
+ do_getsockopt_subflow_addrs(s, fd);
+
+ if (r)
+ do_getsockopt_mptcp_full_info(s, fd);
}
static void connect_one_server(int fd, int pipefd)
@@ -562,6 +659,23 @@ static void process_one_client(int fd, int pipefd)
do_getsockopts(&s, fd, ret, ret2);
if (s.mptcpi_rcv_delta != (uint64_t)ret + 1)
xerror("mptcpi_rcv_delta %" PRIu64 ", expect %" PRIu64, s.mptcpi_rcv_delta, ret + 1, s.mptcpi_rcv_delta - ret);
+
+ /* be nice when running on top of older kernel */
+ if (s.pkt_stats_avail) {
+ if (s.last_sample.mptcpi_bytes_sent != ret2)
+ xerror("mptcpi_bytes_sent %" PRIu64 ", expect %" PRIu64,
+ s.last_sample.mptcpi_bytes_sent, ret2,
+ s.last_sample.mptcpi_bytes_sent - ret2);
+ if (s.last_sample.mptcpi_bytes_received != ret)
+ xerror("mptcpi_bytes_received %" PRIu64 ", expect %" PRIu64,
+ s.last_sample.mptcpi_bytes_received, ret,
+ s.last_sample.mptcpi_bytes_received - ret);
+ if (s.last_sample.mptcpi_bytes_acked != ret)
+ xerror("mptcpi_bytes_acked %" PRIu64 ", expect %" PRIu64,
+ s.last_sample.mptcpi_bytes_acked, ret2,
+ s.last_sample.mptcpi_bytes_acked - ret2);
+ }
+
close(fd);
}
diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
index f295a371ff14..dc8d473fc82c 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
@@ -12,6 +12,8 @@ ksft_skip=4
timeout_poll=30
timeout_test=$((timeout_poll * 2 + 1))
mptcp_connect=""
+iptables="iptables"
+ip6tables="ip6tables"
sec=$(date +%s)
rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
@@ -25,7 +27,7 @@ add_mark_rules()
local m=$2
local t
- for t in iptables ip6tables; do
+ for t in ${iptables} ${ip6tables}; do
# just to debug: check we have multiple subflows connection requests
ip netns exec $ns $t -A OUTPUT -p tcp --syn -m mark --mark $m -j ACCEPT
@@ -95,14 +97,14 @@ if [ $? -ne 0 ];then
exit $ksft_skip
fi
-iptables -V > /dev/null 2>&1
-if [ $? -ne 0 ];then
+# Use the legacy version if available to support old kernel versions
+if iptables-legacy -V &> /dev/null; then
+ iptables="iptables-legacy"
+ ip6tables="ip6tables-legacy"
+elif ! iptables -V &> /dev/null; then
echo "SKIP: Could not run all tests without iptables tool"
exit $ksft_skip
-fi
-
-ip6tables -V > /dev/null 2>&1
-if [ $? -ne 0 ];then
+elif ! ip6tables -V &> /dev/null; then
echo "SKIP: Could not run all tests without ip6tables tool"
exit $ksft_skip
fi
@@ -112,10 +114,10 @@ check_mark()
local ns=$1
local af=$2
- local tables=iptables
+ local tables=${iptables}
if [ $af -eq 6 ];then
- tables=ip6tables
+ tables=${ip6tables}
fi
local counters values
@@ -126,6 +128,7 @@ check_mark()
for v in $values; do
if [ $v -ne 0 ]; then
echo "FAIL: got $tables $values in ns $ns , not 0 - not all expected packets marked" 1>&2
+ ret=1
return 1
fi
done
@@ -225,11 +228,11 @@ do_transfer()
fi
if [ $local_addr = "::" ];then
- check_mark $listener_ns 6
- check_mark $connector_ns 6
+ check_mark $listener_ns 6 || retc=1
+ check_mark $connector_ns 6 || retc=1
else
- check_mark $listener_ns 4
- check_mark $connector_ns 4
+ check_mark $listener_ns 4 || retc=1
+ check_mark $connector_ns 4 || retc=1
fi
check_transfer $cin $sout "file received by server"
diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
index abddf4c63e79..1887bd61bd9a 100644
--- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
+++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
@@ -425,7 +425,7 @@ int dsf(int fd, int pm_family, int argc, char *argv[])
}
/* token */
- token = atoi(params[4]);
+ token = strtoul(params[4], NULL, 10);
rta = (void *)(data + off);
rta->rta_type = MPTCP_PM_ATTR_TOKEN;
rta->rta_len = RTA_LENGTH(4);
@@ -551,7 +551,7 @@ int csf(int fd, int pm_family, int argc, char *argv[])
}
/* token */
- token = atoi(params[4]);
+ token = strtoul(params[4], NULL, 10);
rta = (void *)(data + off);
rta->rta_type = MPTCP_PM_ATTR_TOKEN;
rta->rta_len = RTA_LENGTH(4);
@@ -598,7 +598,7 @@ int remove_addr(int fd, int pm_family, int argc, char *argv[])
if (++arg >= argc)
error(1, 0, " missing token value");
- token = atoi(argv[arg]);
+ token = strtoul(argv[arg], NULL, 10);
rta = (void *)(data + off);
rta->rta_type = MPTCP_PM_ATTR_TOKEN;
rta->rta_len = RTA_LENGTH(4);
@@ -710,7 +710,7 @@ int announce_addr(int fd, int pm_family, int argc, char *argv[])
if (++arg >= argc)
error(1, 0, " missing token value");
- token = atoi(argv[arg]);
+ token = strtoul(argv[arg], NULL, 10);
} else
error(1, 0, "unknown keyword %s", argv[arg]);
}
@@ -1347,7 +1347,7 @@ int set_flags(int fd, int pm_family, int argc, char *argv[])
error(1, 0, " missing token value");
/* token */
- token = atoi(argv[arg]);
+ token = strtoul(argv[arg], NULL, 10);
} else if (!strcmp(argv[arg], "flags")) {
char *tok, *str;
diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh
index 98d9e4d2d3fc..b180133a30af 100755
--- a/tools/testing/selftests/net/mptcp/userspace_pm.sh
+++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh
@@ -423,6 +423,7 @@ test_remove()
stdbuf -o0 -e0 printf "[OK]\n"
else
stdbuf -o0 -e0 printf "[FAIL]\n"
+ exit 1
fi
# RM_ADDR using an invalid addr id should result in no action
@@ -437,6 +438,7 @@ test_remove()
stdbuf -o0 -e0 printf "[OK]\n"
else
stdbuf -o0 -e0 printf "[FAIL]\n"
+ exit 1
fi
# RM_ADDR from the client to server machine
@@ -848,7 +850,7 @@ test_prio()
local count
# Send MP_PRIO signal from client to server machine
- ip netns exec "$ns2" ./pm_nl_ctl set 10.0.1.2 port "$client4_port" flags backup token "$client4_token" rip 10.0.1.1 rport "$server4_port"
+ ip netns exec "$ns2" ./pm_nl_ctl set 10.0.1.2 port "$client4_port" flags backup token "$client4_token" rip 10.0.1.1 rport "$app4_port"
sleep 0.5
# Check TX
diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c
index ee9a72982705..39a0e01f8554 100644
--- a/tools/testing/selftests/net/nettest.c
+++ b/tools/testing/selftests/net/nettest.c
@@ -76,7 +76,9 @@ struct sock_args {
has_grp:1,
has_expected_laddr:1,
has_expected_raddr:1,
- bind_test_only:1;
+ bind_test_only:1,
+ client_dontroute:1,
+ server_dontroute:1;
unsigned short port;
@@ -611,6 +613,18 @@ static int set_dsfield(int sd, int version, int dsfield)
return 0;
}
+static int set_dontroute(int sd)
+{
+ unsigned int one = 1;
+
+ if (setsockopt(sd, SOL_SOCKET, SO_DONTROUTE, &one, sizeof(one)) < 0) {
+ log_err_errno("setsockopt(SO_DONTROUTE)");
+ return -1;
+ }
+
+ return 0;
+}
+
static int str_to_uint(const char *str, int min, int max, unsigned int *value)
{
int number;
@@ -1351,6 +1365,14 @@ static int msock_init(struct sock_args *args, int server)
if (set_dsfield(sd, AF_INET, args->dsfield) != 0)
goto out_err;
+ if (server) {
+ if (args->server_dontroute && set_dontroute(sd) != 0)
+ goto out_err;
+ } else {
+ if (args->client_dontroute && set_dontroute(sd) != 0)
+ goto out_err;
+ }
+
if (args->dev && bind_to_device(sd, args->dev) != 0)
goto out_err;
else if (args->use_setsockopt &&
@@ -1482,6 +1504,9 @@ static int lsock_init(struct sock_args *args)
if (set_dsfield(sd, args->version, args->dsfield) != 0)
goto err;
+ if (args->server_dontroute && set_dontroute(sd) != 0)
+ goto err;
+
if (args->dev && bind_to_device(sd, args->dev) != 0)
goto err;
else if (args->use_setsockopt &&
@@ -1698,6 +1723,9 @@ static int connectsock(void *addr, socklen_t alen, struct sock_args *args)
if (set_dsfield(sd, args->version, args->dsfield) != 0)
goto err;
+ if (args->client_dontroute && set_dontroute(sd) != 0)
+ goto err;
+
if (args->dev && bind_to_device(sd, args->dev) != 0)
goto err;
else if (args->use_setsockopt &&
@@ -1905,10 +1933,14 @@ static int ipc_parent(int cpid, int fd, struct sock_args *args)
#define GETOPT_STR "sr:l:c:Q:p:t:g:P:DRn:M:X:m:d:I:BN:O:SUCi6xL:0:1:2:3:Fbqf"
#define OPT_FORCE_BIND_KEY_IFINDEX 1001
#define OPT_NO_BIND_KEY_IFINDEX 1002
+#define OPT_CLIENT_DONTROUTE 1003
+#define OPT_SERVER_DONTROUTE 1004
static struct option long_opts[] = {
{"force-bind-key-ifindex", 0, 0, OPT_FORCE_BIND_KEY_IFINDEX},
{"no-bind-key-ifindex", 0, 0, OPT_NO_BIND_KEY_IFINDEX},
+ {"client-dontroute", 0, 0, OPT_CLIENT_DONTROUTE},
+ {"server-dontroute", 0, 0, OPT_SERVER_DONTROUTE},
{0, 0, 0, 0}
};
@@ -1954,6 +1986,12 @@ static void print_usage(char *prog)
" --no-bind-key-ifindex: Force TCP_MD5SIG_FLAG_IFINDEX off\n"
" --force-bind-key-ifindex: Force TCP_MD5SIG_FLAG_IFINDEX on\n"
" (default: only if -I is passed)\n"
+ " --client-dontroute: don't use gateways for client socket: send\n"
+ " packets only if destination is on link (see\n"
+ " SO_DONTROUTE in socket(7))\n"
+ " --server-dontroute: don't use gateways for server socket: send\n"
+ " packets only if destination is on link (see\n"
+ " SO_DONTROUTE in socket(7))\n"
"\n"
" -g grp multicast group (e.g., 239.1.1.1)\n"
" -i interactive mode (default is echo and terminate)\n"
@@ -2076,6 +2114,12 @@ int main(int argc, char *argv[])
case OPT_NO_BIND_KEY_IFINDEX:
args.bind_key_ifindex = -1;
break;
+ case OPT_CLIENT_DONTROUTE:
+ args.client_dontroute = 1;
+ break;
+ case OPT_SERVER_DONTROUTE:
+ args.server_dontroute = 1;
+ break;
case 'X':
args.client_pw = optarg;
break;
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index dfe3d287f01d..f838dd370f6a 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -361,6 +361,7 @@ err_buf=
tcpdump_pids=
nettest_pids=
socat_pids=
+tmpoutfile=
err() {
err_buf="${err_buf}${1}
@@ -951,6 +952,7 @@ cleanup() {
ip link del veth_A-R1 2>/dev/null
ovs-vsctl --if-exists del-port vxlan_a 2>/dev/null
ovs-vsctl --if-exists del-br ovs_br0 2>/dev/null
+ rm -f "$tmpoutfile"
}
mtu() {
@@ -1328,6 +1330,39 @@ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception() {
check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on bridged ${type} interface"
pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})"
check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on locally bridged ${type} interface"
+
+ tmpoutfile=$(mktemp)
+
+ # Flush Exceptions, retry with TCP
+ run_cmd ${ns_a} ip route flush cached ${dst}
+ run_cmd ${ns_b} ip route flush cached ${dst}
+ run_cmd ${ns_c} ip route flush cached ${dst}
+
+ for target in "${ns_a}" "${ns_c}" ; do
+ if [ ${family} -eq 4 ]; then
+ TCPDST=TCP:${dst}:50000
+ else
+ TCPDST="TCP:[${dst}]:50000"
+ fi
+ ${ns_b} socat -T 3 -u -6 TCP-LISTEN:50000 STDOUT > $tmpoutfile &
+
+ sleep 1
+
+ dd if=/dev/zero of=/dev/stdout status=none bs=1M count=1 | ${target} socat -T 3 -u STDIN $TCPDST,connect-timeout=3
+
+ size=$(du -sb $tmpoutfile)
+ size=${size%%/tmp/*}
+
+ [ $size -ne 1048576 ] && err "File size $size mismatches exepcted value in locally bridged vxlan test" && return 1
+ done
+
+ rm -f "$tmpoutfile"
+
+ # Check that exceptions were created
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_c}" ${dst})"
+ check_pmtu_value ${exp_mtu} "${pmtu}" "tcp: exceeding link layer MTU on bridged ${type} interface"
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})"
+ check_pmtu_value ${exp_mtu} "${pmtu}" "tcp exceeding link layer MTU on locally bridged ${type} interface"
}
test_pmtu_ipv4_br_vxlan4_exception() {
diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
index 383ac6fc037d..ba286d680fd9 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -860,6 +860,7 @@ EOF
fi
# clean up any leftovers
+ echo 0 > /sys/bus/netdevsim/del_device
$probed && rmmod netdevsim
if [ $ret -ne 0 ]; then
diff --git a/tools/testing/selftests/net/so_incoming_cpu.c b/tools/testing/selftests/net/so_incoming_cpu.c
index 0e04f9fef986..a14818164102 100644
--- a/tools/testing/selftests/net/so_incoming_cpu.c
+++ b/tools/testing/selftests/net/so_incoming_cpu.c
@@ -159,7 +159,7 @@ void create_clients(struct __test_metadata *_metadata,
/* Make sure SYN will be processed on the i-th CPU
* and finally distributed to the i-th listener.
*/
- sched_setaffinity(0, sizeof(cpu_set), &cpu_set);
+ ret = sched_setaffinity(0, sizeof(cpu_set), &cpu_set);
ASSERT_EQ(ret, 0);
for (j = 0; j < CLIENT_PER_SERVER; j++) {
diff --git a/tools/testing/selftests/net/test_vxlan_nolocalbypass.sh b/tools/testing/selftests/net/test_vxlan_nolocalbypass.sh
new file mode 100755
index 000000000000..f75212bf142c
--- /dev/null
+++ b/tools/testing/selftests/net/test_vxlan_nolocalbypass.sh
@@ -0,0 +1,240 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test is for checking the [no]localbypass VXLAN device option. The test
+# configures two VXLAN devices in the same network namespace and a tc filter on
+# the loopback device that drops encapsulated packets. The test sends packets
+# from the first VXLAN device and verifies that by default these packets are
+# received by the second VXLAN device. The test then enables the nolocalbypass
+# option and verifies that packets are no longer received by the second VXLAN
+# device.
+
+ret=0
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+TESTS="
+ nolocalbypass
+"
+VERBOSE=0
+PAUSE_ON_FAIL=no
+PAUSE=no
+
+################################################################################
+# Utilities
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ printf "TEST: %-60s [ OK ]\n" "${msg}"
+ nsuccess=$((nsuccess+1))
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "$VERBOSE" = "1" ]; then
+ echo " rc=$rc, expected $expected"
+ fi
+
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+
+ if [ "${PAUSE}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+
+ [ "$VERBOSE" = "1" ] && echo
+}
+
+run_cmd()
+{
+ local cmd="$1"
+ local out
+ local stderr="2>/dev/null"
+
+ if [ "$VERBOSE" = "1" ]; then
+ printf "COMMAND: $cmd\n"
+ stderr=
+ fi
+
+ out=$(eval $cmd $stderr)
+ rc=$?
+ if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+ echo " $out"
+ fi
+
+ return $rc
+}
+
+tc_check_packets()
+{
+ local ns=$1; shift
+ local id=$1; shift
+ local handle=$1; shift
+ local count=$1; shift
+ local pkts
+
+ sleep 0.1
+ pkts=$(tc -n $ns -j -s filter show $id \
+ | jq ".[] | select(.options.handle == $handle) | \
+ .options.actions[0].stats.packets")
+ [[ $pkts == $count ]]
+}
+
+################################################################################
+# Setup
+
+setup()
+{
+ ip netns add ns1
+
+ ip -n ns1 link set dev lo up
+ ip -n ns1 address add 192.0.2.1/32 dev lo
+ ip -n ns1 address add 198.51.100.1/32 dev lo
+
+ ip -n ns1 link add name vx0 up type vxlan id 100 local 198.51.100.1 \
+ dstport 4789 nolearning
+ ip -n ns1 link add name vx1 up type vxlan id 100 dstport 4790
+}
+
+cleanup()
+{
+ ip netns del ns1 &> /dev/null
+}
+
+################################################################################
+# Tests
+
+nolocalbypass()
+{
+ local smac=00:01:02:03:04:05
+ local dmac=00:0a:0b:0c:0d:0e
+
+ run_cmd "bridge -n ns1 fdb add $dmac dev vx0 self static dst 192.0.2.1 port 4790"
+
+ run_cmd "tc -n ns1 qdisc add dev vx1 clsact"
+ run_cmd "tc -n ns1 filter add dev vx1 ingress pref 1 handle 101 proto all flower src_mac $smac dst_mac $dmac action pass"
+
+ run_cmd "tc -n ns1 qdisc add dev lo clsact"
+ run_cmd "tc -n ns1 filter add dev lo ingress pref 1 handle 101 proto ip flower ip_proto udp dst_port 4790 action drop"
+
+ run_cmd "ip -n ns1 -d -j link show dev vx0 | jq -e '.[][\"linkinfo\"][\"info_data\"][\"localbypass\"] == true'"
+ log_test $? 0 "localbypass enabled"
+
+ run_cmd "ip netns exec ns1 mausezahn vx0 -a $smac -b $dmac -c 1 -p 100 -q"
+
+ tc_check_packets "ns1" "dev vx1 ingress" 101 1
+ log_test $? 0 "Packet received by local VXLAN device - localbypass"
+
+ run_cmd "ip -n ns1 link set dev vx0 type vxlan nolocalbypass"
+
+ run_cmd "ip -n ns1 -d -j link show dev vx0 | jq -e '.[][\"linkinfo\"][\"info_data\"][\"localbypass\"] == false'"
+ log_test $? 0 "localbypass disabled"
+
+ run_cmd "ip netns exec ns1 mausezahn vx0 -a $smac -b $dmac -c 1 -p 100 -q"
+
+ tc_check_packets "ns1" "dev vx1 ingress" 101 1
+ log_test $? 0 "Packet not received by local VXLAN device - nolocalbypass"
+
+ run_cmd "ip -n ns1 link set dev vx0 type vxlan localbypass"
+
+ run_cmd "ip -n ns1 -d -j link show dev vx0 | jq -e '.[][\"linkinfo\"][\"info_data\"][\"localbypass\"] == true'"
+ log_test $? 0 "localbypass enabled"
+
+ run_cmd "ip netns exec ns1 mausezahn vx0 -a $smac -b $dmac -c 1 -p 100 -q"
+
+ tc_check_packets "ns1" "dev vx1 ingress" 101 2
+ log_test $? 0 "Packet received by local VXLAN device - localbypass"
+}
+
+################################################################################
+# Usage
+
+usage()
+{
+ cat <<EOF
+usage: ${0##*/} OPTS
+
+ -t <test> Test(s) to run (default: all)
+ (options: $TESTS)
+ -p Pause on fail
+ -P Pause after each test before cleanup
+ -v Verbose mode (show commands and output)
+EOF
+}
+
+################################################################################
+# Main
+
+trap cleanup EXIT
+
+while getopts ":t:pPvh" opt; do
+ case $opt in
+ t) TESTS=$OPTARG ;;
+ p) PAUSE_ON_FAIL=yes;;
+ P) PAUSE=yes;;
+ v) VERBOSE=$(($VERBOSE + 1));;
+ h) usage; exit 0;;
+ *) usage; exit 1;;
+ esac
+done
+
+# Make sure we don't pause twice.
+[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no
+
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit $ksft_skip;
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v bridge)" ]; then
+ echo "SKIP: Could not run test without bridge tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v mausezahn)" ]; then
+ echo "SKIP: Could not run test without mausezahn tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v jq)" ]; then
+ echo "SKIP: Could not run test without jq tool"
+ exit $ksft_skip
+fi
+
+ip link help vxlan 2>&1 | grep -q "localbypass"
+if [ $? -ne 0 ]; then
+ echo "SKIP: iproute2 ip too old, missing VXLAN nolocalbypass support"
+ exit $ksft_skip
+fi
+
+cleanup
+
+for t in $TESTS
+do
+ setup; $t; cleanup;
+done
+
+if [ "$TESTS" != "none" ]; then
+ printf "\nTests passed: %3d\n" ${nsuccess}
+ printf "Tests failed: %3d\n" ${nfail}
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index ff36844d14b4..a3c57004344c 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -15,6 +15,7 @@
#include <linux/tcp.h>
#include <linux/socket.h>
+#include <sys/epoll.h>
#include <sys/types.h>
#include <sys/sendfile.h>
#include <sys/socket.h>
@@ -1646,6 +1647,136 @@ TEST_F(tls_err, timeo)
}
}
+TEST_F(tls_err, poll_partial_rec)
+{
+ struct pollfd pfd = { };
+ ssize_t rec_len;
+ char rec[256];
+ char buf[128];
+
+ if (self->notls)
+ SKIP(return, "no TLS support");
+
+ pfd.fd = self->cfd2;
+ pfd.events = POLLIN;
+ EXPECT_EQ(poll(&pfd, 1, 1), 0);
+
+ memrnd(buf, sizeof(buf));
+ EXPECT_EQ(send(self->fd, buf, sizeof(buf), 0), sizeof(buf));
+ rec_len = recv(self->cfd, rec, sizeof(rec), 0);
+ EXPECT_GT(rec_len, sizeof(buf));
+
+ /* Write 100B, not the full record ... */
+ EXPECT_EQ(send(self->fd2, rec, 100, 0), 100);
+ /* ... no full record should mean no POLLIN */
+ pfd.fd = self->cfd2;
+ pfd.events = POLLIN;
+ EXPECT_EQ(poll(&pfd, 1, 1), 0);
+ /* Now write the rest, and it should all pop out of the other end. */
+ EXPECT_EQ(send(self->fd2, rec + 100, rec_len - 100, 0), rec_len - 100);
+ pfd.fd = self->cfd2;
+ pfd.events = POLLIN;
+ EXPECT_EQ(poll(&pfd, 1, 1), 1);
+ EXPECT_EQ(recv(self->cfd2, rec, sizeof(rec), 0), sizeof(buf));
+ EXPECT_EQ(memcmp(buf, rec, sizeof(buf)), 0);
+}
+
+TEST_F(tls_err, epoll_partial_rec)
+{
+ struct epoll_event ev, events[10];
+ ssize_t rec_len;
+ char rec[256];
+ char buf[128];
+ int epollfd;
+
+ if (self->notls)
+ SKIP(return, "no TLS support");
+
+ epollfd = epoll_create1(0);
+ ASSERT_GE(epollfd, 0);
+
+ memset(&ev, 0, sizeof(ev));
+ ev.events = EPOLLIN;
+ ev.data.fd = self->cfd2;
+ ASSERT_GE(epoll_ctl(epollfd, EPOLL_CTL_ADD, self->cfd2, &ev), 0);
+
+ EXPECT_EQ(epoll_wait(epollfd, events, 10, 0), 0);
+
+ memrnd(buf, sizeof(buf));
+ EXPECT_EQ(send(self->fd, buf, sizeof(buf), 0), sizeof(buf));
+ rec_len = recv(self->cfd, rec, sizeof(rec), 0);
+ EXPECT_GT(rec_len, sizeof(buf));
+
+ /* Write 100B, not the full record ... */
+ EXPECT_EQ(send(self->fd2, rec, 100, 0), 100);
+ /* ... no full record should mean no POLLIN */
+ EXPECT_EQ(epoll_wait(epollfd, events, 10, 0), 0);
+ /* Now write the rest, and it should all pop out of the other end. */
+ EXPECT_EQ(send(self->fd2, rec + 100, rec_len - 100, 0), rec_len - 100);
+ EXPECT_EQ(epoll_wait(epollfd, events, 10, 0), 1);
+ EXPECT_EQ(recv(self->cfd2, rec, sizeof(rec), 0), sizeof(buf));
+ EXPECT_EQ(memcmp(buf, rec, sizeof(buf)), 0);
+
+ close(epollfd);
+}
+
+TEST_F(tls_err, poll_partial_rec_async)
+{
+ struct pollfd pfd = { };
+ ssize_t rec_len;
+ char rec[256];
+ char buf[128];
+ char token;
+ int p[2];
+ int ret;
+
+ if (self->notls)
+ SKIP(return, "no TLS support");
+
+ ASSERT_GE(pipe(p), 0);
+
+ memrnd(buf, sizeof(buf));
+ EXPECT_EQ(send(self->fd, buf, sizeof(buf), 0), sizeof(buf));
+ rec_len = recv(self->cfd, rec, sizeof(rec), 0);
+ EXPECT_GT(rec_len, sizeof(buf));
+
+ ret = fork();
+ ASSERT_GE(ret, 0);
+
+ if (ret) {
+ int status, pid2;
+
+ close(p[1]);
+ usleep(1000); /* Give child a head start */
+
+ EXPECT_EQ(send(self->fd2, rec, 100, 0), 100);
+
+ EXPECT_EQ(read(p[0], &token, 1), 1); /* Barrier #1 */
+
+ EXPECT_EQ(send(self->fd2, rec + 100, rec_len - 100, 0),
+ rec_len - 100);
+
+ pid2 = wait(&status);
+ EXPECT_EQ(pid2, ret);
+ EXPECT_EQ(status, 0);
+ } else {
+ close(p[0]);
+
+ /* Child should sleep in poll(), never get a wake */
+ pfd.fd = self->cfd2;
+ pfd.events = POLLIN;
+ EXPECT_EQ(poll(&pfd, 1, 5), 0);
+
+ EXPECT_EQ(write(p[1], &token, 1), 1); /* Barrier #1 */
+
+ pfd.fd = self->cfd2;
+ pfd.events = POLLIN;
+ EXPECT_EQ(poll(&pfd, 1, 5), 1);
+
+ exit(!_metadata->passed);
+ }
+}
+
TEST(non_established) {
struct tls12_crypto_info_aes_gcm_256 tls12;
struct sockaddr_in addr;
diff --git a/tools/testing/selftests/nolibc/.gitignore b/tools/testing/selftests/nolibc/.gitignore
index 4696df589d68..52f613cdad54 100644
--- a/tools/testing/selftests/nolibc/.gitignore
+++ b/tools/testing/selftests/nolibc/.gitignore
@@ -1,4 +1,5 @@
/initramfs/
+/libc-test
/nolibc-test
/run.out
/sysroot/
diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile
index bbce57420465..1b7b3c82f8ad 100644
--- a/tools/testing/selftests/nolibc/Makefile
+++ b/tools/testing/selftests/nolibc/Makefile
@@ -64,7 +64,7 @@ QEMU_ARGS_mips = -M malta -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
QEMU_ARGS_riscv = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
QEMU_ARGS_s390 = -M s390-ccw-virtio -m 1G -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
QEMU_ARGS_loongarch = -M virt -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
-QEMU_ARGS = $(QEMU_ARGS_$(ARCH))
+QEMU_ARGS = $(QEMU_ARGS_$(ARCH)) $(QEMU_ARGS_EXTRA)
# OUTPUT is only set when run from the main makefile, otherwise
# it defaults to this nolibc directory.
@@ -76,16 +76,12 @@ else
Q=@
endif
-CFLAGS_STACKPROTECTOR = -DNOLIBC_STACKPROTECTOR \
- $(call cc-option,-mstack-protector-guard=global) \
- $(call cc-option,-fstack-protector-all)
-CFLAGS_STKP_i386 = $(CFLAGS_STACKPROTECTOR)
-CFLAGS_STKP_x86_64 = $(CFLAGS_STACKPROTECTOR)
-CFLAGS_STKP_x86 = $(CFLAGS_STACKPROTECTOR)
CFLAGS_s390 = -m64
-CFLAGS ?= -Os -fno-ident -fno-asynchronous-unwind-tables \
+CFLAGS_mips = -EL
+CFLAGS_STACKPROTECTOR ?= $(call cc-option,-mstack-protector-guard=global $(call cc-option,-fstack-protector-all))
+CFLAGS ?= -Os -fno-ident -fno-asynchronous-unwind-tables -std=c89 \
$(call cc-option,-fno-stack-protector) \
- $(CFLAGS_STKP_$(ARCH)) $(CFLAGS_$(ARCH))
+ $(CFLAGS_$(ARCH)) $(CFLAGS_STACKPROTECTOR)
LDFLAGS := -s
help:
@@ -94,6 +90,7 @@ help:
@echo " help this help"
@echo " sysroot create the nolibc sysroot here (uses \$$ARCH)"
@echo " nolibc-test build the executable (uses \$$CC and \$$CROSS_COMPILE)"
+ @echo " libc-test build an executable using the compiler's default libc instead"
@echo " run-user runs the executable under QEMU (uses \$$ARCH, \$$TEST)"
@echo " initramfs prepare the initramfs with nolibc-test"
@echo " defconfig create a fresh new default config (uses \$$ARCH)"
@@ -128,10 +125,16 @@ nolibc-test: nolibc-test.c sysroot/$(ARCH)/include
$(QUIET_CC)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ \
-nostdlib -static -Isysroot/$(ARCH)/include $< -lgcc
+libc-test: nolibc-test.c
+ $(QUIET_CC)$(CC) -o $@ $<
+
# qemu user-land test
run-user: nolibc-test
$(Q)qemu-$(QEMU_ARCH) ./nolibc-test > "$(CURDIR)/run.out" || :
- $(Q)grep -w FAIL "$(CURDIR)/run.out" && echo "See all results in $(CURDIR)/run.out" || echo "$$(grep -c ^[0-9].*OK $(CURDIR)/run.out) test(s) passed."
+ $(Q)awk '/\[OK\][\r]*$$/{p++} /\[FAIL\][\r]*$$/{f++} /\[SKIPPED\][\r]*$$/{s++} \
+ END{ printf("%d test(s) passed, %d skipped, %d failed.", p, s, f); \
+ if (s+f > 0) printf(" See all results in %s\n", ARGV[1]); else print; }' \
+ $(CURDIR)/run.out
initramfs: nolibc-test
$(QUIET_MKDIR)mkdir -p initramfs
@@ -147,18 +150,26 @@ kernel: initramfs
# run the tests after building the kernel
run: kernel
$(Q)qemu-system-$(QEMU_ARCH) -display none -no-reboot -kernel "$(srctree)/$(IMAGE)" -serial stdio $(QEMU_ARGS) > "$(CURDIR)/run.out"
- $(Q)grep -w FAIL "$(CURDIR)/run.out" && echo "See all results in $(CURDIR)/run.out" || echo "$$(grep -c ^[0-9].*OK $(CURDIR)/run.out) test(s) passed."
+ $(Q)awk '/\[OK\][\r]*$$/{p++} /\[FAIL\][\r]*$$/{f++} /\[SKIPPED\][\r]*$$/{s++} \
+ END{ printf("%d test(s) passed, %d skipped, %d failed.", p, s, f); \
+ if (s+f > 0) printf(" See all results in %s\n", ARGV[1]); else print; }' \
+ $(CURDIR)/run.out
# re-run the tests from an existing kernel
rerun:
$(Q)qemu-system-$(QEMU_ARCH) -display none -no-reboot -kernel "$(srctree)/$(IMAGE)" -serial stdio $(QEMU_ARGS) > "$(CURDIR)/run.out"
- $(Q)grep -w FAIL "$(CURDIR)/run.out" && echo "See all results in $(CURDIR)/run.out" || echo "$$(grep -c ^[0-9].*OK $(CURDIR)/run.out) test(s) passed."
+ $(Q)awk '/\[OK\][\r]*$$/{p++} /\[FAIL\][\r]*$$/{f++} /\[SKIPPED\][\r]*$$/{s++} \
+ END{ printf("%d test(s) passed, %d skipped, %d failed.", p, s, f); \
+ if (s+f > 0) printf(" See all results in %s\n", ARGV[1]); else print; }' \
+ $(CURDIR)/run.out
clean:
$(call QUIET_CLEAN, sysroot)
$(Q)rm -rf sysroot
$(call QUIET_CLEAN, nolibc-test)
$(Q)rm -f nolibc-test
+ $(call QUIET_CLEAN, libc-test)
+ $(Q)rm -f libc-test
$(call QUIET_CLEAN, initramfs)
$(Q)rm -rf initramfs
$(call QUIET_CLEAN, run.out)
diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index 21bacc928bf7..8e7750e2eb97 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -1,10 +1,7 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
#define _GNU_SOURCE
-/* platform-specific include files coming from the compiler */
-#include <limits.h>
-
/* libc-specific include files
* The program may be built in 3 ways:
* $(CC) -nostdlib -include /path/to/nolibc.h => NOLIBC already defined
@@ -20,7 +17,9 @@
#include <linux/reboot.h>
#include <sys/io.h>
#include <sys/ioctl.h>
+#include <sys/mman.h>
#include <sys/mount.h>
+#include <sys/prctl.h>
#include <sys/reboot.h>
#include <sys/stat.h>
#include <sys/syscall.h>
@@ -34,7 +33,10 @@
#include <sched.h>
#include <signal.h>
#include <stdarg.h>
+#include <stddef.h>
+#include <stdint.h>
#include <unistd.h>
+#include <limits.h>
#endif
#endif
@@ -43,8 +45,8 @@ char **environ;
/* definition of a series of tests */
struct test {
- const char *name; // test name
- int (*func)(int min, int max); // handler
+ const char *name; /* test name */
+ int (*func)(int min, int max); /* handler */
};
#ifndef _NOLIBC_STDLIB_H
@@ -103,24 +105,32 @@ const char *errorname(int err)
CASE_ERR(EDOM);
CASE_ERR(ERANGE);
CASE_ERR(ENOSYS);
+ CASE_ERR(EOVERFLOW);
default:
return itoa(err);
}
}
+static void putcharn(char c, size_t n)
+{
+ char buf[64];
+
+ memset(buf, c, n);
+ buf[n] = '\0';
+ fputs(buf, stdout);
+}
+
static int pad_spc(int llen, int cnt, const char *fmt, ...)
{
va_list args;
- int len;
int ret;
- for (len = 0; len < cnt - llen; len++)
- putchar(' ');
+ putcharn(' ', cnt - llen);
va_start(args, fmt);
ret = vfprintf(stdout, fmt, args);
va_end(args);
- return ret < 0 ? ret : ret + len;
+ return ret < 0 ? ret : ret + cnt - llen;
}
/* The tests below are intended to be used by the macroes, which evaluate
@@ -162,7 +172,7 @@ static int expect_eq(uint64_t expr, int llen, uint64_t val)
{
int ret = !(expr == val);
- llen += printf(" = %lld ", expr);
+ llen += printf(" = %lld ", (long long)expr);
pad_spc(llen, 64, ret ? "[FAIL]\n" : " [OK]\n");
return ret;
}
@@ -290,18 +300,24 @@ static int expect_sysne(int expr, int llen, int val)
}
+#define EXPECT_SYSER2(cond, expr, expret, experr1, experr2) \
+ do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_syserr2(expr, expret, experr1, experr2, llen); } while (0)
+
#define EXPECT_SYSER(cond, expr, expret, experr) \
- do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_syserr(expr, expret, experr, llen); } while (0)
+ EXPECT_SYSER2(cond, expr, expret, experr, 0)
-static int expect_syserr(int expr, int expret, int experr, int llen)
+static int expect_syserr2(int expr, int expret, int experr1, int experr2, int llen)
{
int ret = 0;
int _errno = errno;
llen += printf(" = %d %s ", expr, errorname(_errno));
- if (expr != expret || _errno != experr) {
+ if (expr != expret || (_errno != experr1 && _errno != experr2)) {
ret = 1;
- llen += printf(" != (%d %s) ", expret, errorname(experr));
+ if (experr2 == 0)
+ llen += printf(" != (%d %s) ", expret, errorname(experr1));
+ else
+ llen += printf(" != (%d %s %s) ", expret, errorname(experr1), errorname(experr2));
llen += pad_spc(llen, 64, "[FAIL]\n");
} else {
llen += pad_spc(llen, 64, " [OK]\n");
@@ -471,11 +487,60 @@ static int test_getpagesize(void)
return !c;
}
+static int test_fork(void)
+{
+ int status;
+ pid_t pid;
+
+ /* flush the printf buffer to avoid child flush it */
+ fflush(stdout);
+ fflush(stderr);
+
+ pid = fork();
+
+ switch (pid) {
+ case -1:
+ return 1;
+
+ case 0:
+ exit(123);
+
+ default:
+ pid = waitpid(pid, &status, 0);
+
+ return pid == -1 || !WIFEXITED(status) || WEXITSTATUS(status) != 123;
+ }
+}
+
+static int test_stat_timestamps(void)
+{
+ struct stat st;
+
+ if (sizeof(st.st_atim.tv_sec) != sizeof(st.st_atime))
+ return 1;
+
+ if (stat("/proc/self/", &st))
+ return 1;
+
+ if (st.st_atim.tv_sec != st.st_atime || st.st_atim.tv_nsec > 1000000000)
+ return 1;
+
+ if (st.st_mtim.tv_sec != st.st_mtime || st.st_mtim.tv_nsec > 1000000000)
+ return 1;
+
+ if (st.st_ctim.tv_sec != st.st_ctime || st.st_ctim.tv_nsec > 1000000000)
+ return 1;
+
+ return 0;
+}
+
/* Run syscall tests between IDs <min> and <max>.
* Return 0 on success, non-zero on failure.
*/
int run_syscall(int min, int max)
{
+ struct timeval tv;
+ struct timezone tz;
struct stat stat_buf;
int euid0;
int proc;
@@ -491,7 +556,7 @@ int run_syscall(int min, int max)
euid0 = geteuid() == 0;
for (test = min; test >= 0 && test <= max; test++) {
- int llen = 0; // line length
+ int llen = 0; /* line length */
/* avoid leaving empty lines below, this will insert holes into
* test numbers.
@@ -527,14 +592,11 @@ int run_syscall(int min, int max)
CASE_TEST(dup3_0); tmp = dup3(0, 100, 0); EXPECT_SYSNE(1, tmp, -1); close(tmp); break;
CASE_TEST(dup3_m1); tmp = dup3(-1, 100, 0); EXPECT_SYSER(1, tmp, -1, EBADF); if (tmp != -1) close(tmp); break;
CASE_TEST(execve_root); EXPECT_SYSER(1, execve("/", (char*[]){ [0] = "/", [1] = NULL }, NULL), -1, EACCES); break;
+ CASE_TEST(fork); EXPECT_SYSZR(1, test_fork()); break;
CASE_TEST(getdents64_root); EXPECT_SYSNE(1, test_getdents64("/"), -1); break;
CASE_TEST(getdents64_null); EXPECT_SYSER(1, test_getdents64("/dev/null"), -1, ENOTDIR); break;
- CASE_TEST(gettimeofday_null); EXPECT_SYSZR(1, gettimeofday(NULL, NULL)); break;
-#ifdef NOLIBC
- CASE_TEST(gettimeofday_bad1); EXPECT_SYSER(1, gettimeofday((void *)1, NULL), -1, EFAULT); break;
- CASE_TEST(gettimeofday_bad2); EXPECT_SYSER(1, gettimeofday(NULL, (void *)1), -1, EFAULT); break;
- CASE_TEST(gettimeofday_bad2); EXPECT_SYSER(1, gettimeofday(NULL, (void *)1), -1, EFAULT); break;
-#endif
+ CASE_TEST(gettimeofday_tv); EXPECT_SYSZR(1, gettimeofday(&tv, NULL)); break;
+ CASE_TEST(gettimeofday_tv_tz);EXPECT_SYSZR(1, gettimeofday(&tv, &tz)); break;
CASE_TEST(getpagesize); EXPECT_SYSZR(1, test_getpagesize()); break;
CASE_TEST(ioctl_tiocinq); EXPECT_SYSZR(1, ioctl(0, TIOCINQ, &tmp)); break;
CASE_TEST(ioctl_tiocinq); EXPECT_SYSZR(1, ioctl(0, TIOCINQ, &tmp)); break;
@@ -550,6 +612,7 @@ int run_syscall(int min, int max)
CASE_TEST(poll_null); EXPECT_SYSZR(1, poll(NULL, 0, 0)); break;
CASE_TEST(poll_stdout); EXPECT_SYSNE(1, ({ struct pollfd fds = { 1, POLLOUT, 0}; poll(&fds, 1, 0); }), -1); break;
CASE_TEST(poll_fault); EXPECT_SYSER(1, poll((void *)1, 1, 0), -1, EFAULT); break;
+ CASE_TEST(prctl); EXPECT_SYSER(1, prctl(PR_SET_NAME, (unsigned long)NULL, 0, 0, 0), -1, EFAULT); break;
CASE_TEST(read_badf); EXPECT_SYSER(1, read(-1, &tmp, 1), -1, EBADF); break;
CASE_TEST(sched_yield); EXPECT_SYSZR(1, sched_yield()); break;
CASE_TEST(select_null); EXPECT_SYSZR(1, ({ struct timeval tv = { 0 }; select(0, NULL, NULL, NULL, &tv); })); break;
@@ -557,6 +620,7 @@ int run_syscall(int min, int max)
CASE_TEST(select_fault); EXPECT_SYSER(1, select(1, (void *)1, NULL, NULL, 0), -1, EFAULT); break;
CASE_TEST(stat_blah); EXPECT_SYSER(1, stat("/proc/self/blah", &stat_buf), -1, ENOENT); break;
CASE_TEST(stat_fault); EXPECT_SYSER(1, stat(NULL, &stat_buf), -1, EFAULT); break;
+ CASE_TEST(stat_timestamps); EXPECT_SYSZR(1, test_stat_timestamps()); break;
CASE_TEST(symlink_root); EXPECT_SYSER(1, symlink("/", "/"), -1, EEXIST); break;
CASE_TEST(unlink_root); EXPECT_SYSER(1, unlink("/"), -1, EISDIR); break;
CASE_TEST(unlink_blah); EXPECT_SYSER(1, unlink("/proc/self/blah"), -1, ENOENT); break;
@@ -565,6 +629,8 @@ int run_syscall(int min, int max)
CASE_TEST(waitpid_child); EXPECT_SYSER(1, waitpid(getpid(), &tmp, WNOHANG), -1, ECHILD); break;
CASE_TEST(write_badf); EXPECT_SYSER(1, write(-1, &tmp, 1), -1, EBADF); break;
CASE_TEST(write_zero); EXPECT_SYSZR(1, write(1, &tmp, 0)); break;
+ CASE_TEST(syscall_noargs); EXPECT_SYSEQ(1, syscall(__NR_getpid), getpid()); break;
+ CASE_TEST(syscall_args); EXPECT_SYSER(1, syscall(__NR_statx, 0, NULL, 0, 0, NULL), -1, EFAULT); break;
case __LINE__:
return ret; /* must be last */
/* note: do not set any defaults so as to permit holes above */
@@ -581,7 +647,7 @@ int run_stdlib(int min, int max)
void *p1, *p2;
for (test = min; test >= 0 && test <= max; test++) {
- int llen = 0; // line length
+ int llen = 0; /* line length */
/* avoid leaving empty lines below, this will insert holes into
* test numbers.
@@ -639,9 +705,9 @@ int run_stdlib(int min, int max)
CASE_TEST(limit_int_fast32_min); EXPECT_EQ(1, INT_FAST32_MIN, (int_fast32_t) INTPTR_MIN); break;
CASE_TEST(limit_int_fast32_max); EXPECT_EQ(1, INT_FAST32_MAX, (int_fast32_t) INTPTR_MAX); break;
CASE_TEST(limit_uint_fast32_max); EXPECT_EQ(1, UINT_FAST32_MAX, (uint_fast32_t) UINTPTR_MAX); break;
- CASE_TEST(limit_int_fast64_min); EXPECT_EQ(1, INT_FAST64_MIN, (int_fast64_t) INTPTR_MIN); break;
- CASE_TEST(limit_int_fast64_max); EXPECT_EQ(1, INT_FAST64_MAX, (int_fast64_t) INTPTR_MAX); break;
- CASE_TEST(limit_uint_fast64_max); EXPECT_EQ(1, UINT_FAST64_MAX, (uint_fast64_t) UINTPTR_MAX); break;
+ CASE_TEST(limit_int_fast64_min); EXPECT_EQ(1, INT_FAST64_MIN, (int_fast64_t) INT64_MIN); break;
+ CASE_TEST(limit_int_fast64_max); EXPECT_EQ(1, INT_FAST64_MAX, (int_fast64_t) INT64_MAX); break;
+ CASE_TEST(limit_uint_fast64_max); EXPECT_EQ(1, UINT_FAST64_MAX, (uint_fast64_t) UINT64_MAX); break;
#if __SIZEOF_LONG__ == 8
CASE_TEST(limit_intptr_min); EXPECT_EQ(1, INTPTR_MIN, (intptr_t) 0x8000000000000000LL); break;
CASE_TEST(limit_intptr_max); EXPECT_EQ(1, INTPTR_MAX, (intptr_t) 0x7fffffffffffffffLL); break;
@@ -667,17 +733,98 @@ int run_stdlib(int min, int max)
return ret;
}
-#if defined(__clang__)
-__attribute__((optnone))
-#elif defined(__GNUC__)
-__attribute__((optimize("O0")))
-#endif
+#define EXPECT_VFPRINTF(c, expected, fmt, ...) \
+ ret += expect_vfprintf(llen, c, expected, fmt, ##__VA_ARGS__)
+
+static int expect_vfprintf(int llen, size_t c, const char *expected, const char *fmt, ...)
+{
+ int ret, fd, w, r;
+ char buf[100];
+ FILE *memfile;
+ va_list args;
+
+ fd = memfd_create("vfprintf", 0);
+ if (fd == -1) {
+ pad_spc(llen, 64, "[FAIL]\n");
+ return 1;
+ }
+
+ memfile = fdopen(fd, "w+");
+ if (!memfile) {
+ pad_spc(llen, 64, "[FAIL]\n");
+ return 1;
+ }
+
+ va_start(args, fmt);
+ w = vfprintf(memfile, fmt, args);
+ va_end(args);
+
+ if (w != c) {
+ llen += printf(" written(%d) != %d", w, (int) c);
+ pad_spc(llen, 64, "[FAIL]\n");
+ return 1;
+ }
+
+ fflush(memfile);
+ lseek(fd, 0, SEEK_SET);
+
+ r = read(fd, buf, sizeof(buf) - 1);
+
+ fclose(memfile);
+
+ if (r != w) {
+ llen += printf(" written(%d) != read(%d)", w, r);
+ pad_spc(llen, 64, "[FAIL]\n");
+ return 1;
+ }
+
+ buf[r] = '\0';
+ llen += printf(" \"%s\" = \"%s\"", expected, buf);
+ ret = strncmp(expected, buf, c);
+
+ pad_spc(llen, 64, ret ? "[FAIL]\n" : " [OK]\n");
+ return ret;
+}
+
+static int run_vfprintf(int min, int max)
+{
+ int test;
+ int tmp;
+ int ret = 0;
+ void *p1, *p2;
+
+ for (test = min; test >= 0 && test <= max; test++) {
+ int llen = 0; /* line length */
+
+ /* avoid leaving empty lines below, this will insert holes into
+ * test numbers.
+ */
+ switch (test + __LINE__ + 1) {
+ CASE_TEST(empty); EXPECT_VFPRINTF(0, "", ""); break;
+ CASE_TEST(simple); EXPECT_VFPRINTF(3, "foo", "foo"); break;
+ CASE_TEST(string); EXPECT_VFPRINTF(3, "foo", "%s", "foo"); break;
+ CASE_TEST(number); EXPECT_VFPRINTF(4, "1234", "%d", 1234); break;
+ CASE_TEST(negnumber); EXPECT_VFPRINTF(5, "-1234", "%d", -1234); break;
+ CASE_TEST(unsigned); EXPECT_VFPRINTF(5, "12345", "%u", 12345); break;
+ CASE_TEST(char); EXPECT_VFPRINTF(1, "c", "%c", 'c'); break;
+ CASE_TEST(hex); EXPECT_VFPRINTF(1, "f", "%x", 0xf); break;
+ CASE_TEST(pointer); EXPECT_VFPRINTF(3, "0x1", "%p", (void *) 0x1); break;
+ case __LINE__:
+ return ret; /* must be last */
+ /* note: do not set any defaults so as to permit holes above */
+ }
+ }
+ return ret;
+}
+
static int smash_stack(void)
{
char buf[100];
+ volatile char *ptr = buf;
+ size_t i;
- for (size_t i = 0; i < 200; i++)
- buf[i] = 'P';
+ for (i = 0; i < 200; i++)
+ ptr[i] = 'P';
return 1;
}
@@ -689,12 +836,20 @@ static int run_protection(int min, int max)
llen += printf("0 -fstackprotector ");
-#if !defined(NOLIBC_STACKPROTECTOR)
+#if !defined(_NOLIBC_STACKPROTECTOR)
llen += printf("not supported");
pad_spc(llen, 64, "[SKIPPED]\n");
return 0;
#endif
+#if defined(_NOLIBC_STACKPROTECTOR)
+ if (!__stack_chk_guard) {
+ llen += printf("__stack_chk_guard not initialized");
+ pad_spc(llen, 64, "[FAIL]\n");
+ return 1;
+ }
+#endif
+
pid = -1;
pid = fork();
@@ -708,6 +863,7 @@ static int run_protection(int min, int max)
close(STDOUT_FILENO);
close(STDERR_FILENO);
+ prctl(PR_SET_DUMPABLE, 0, 0, 0, 0);
smash_stack();
return 1;
@@ -778,14 +934,44 @@ static const struct test test_names[] = {
/* add new tests here */
{ .name = "syscall", .func = run_syscall },
{ .name = "stdlib", .func = run_stdlib },
+ { .name = "vfprintf", .func = run_vfprintf },
{ .name = "protection", .func = run_protection },
{ 0 }
};
+int is_setting_valid(char *test)
+{
+ int idx, len, test_len, valid = 0;
+ char delimiter;
+
+ if (!test)
+ return valid;
+
+ test_len = strlen(test);
+
+ for (idx = 0; test_names[idx].name; idx++) {
+ len = strlen(test_names[idx].name);
+ if (test_len < len)
+ continue;
+
+ if (strncmp(test, test_names[idx].name, len) != 0)
+ continue;
+
+ delimiter = test[len];
+ if (delimiter != ':' && delimiter != ',' && delimiter != '\0')
+ continue;
+
+ valid = 1;
+ break;
+ }
+
+ return valid;
+}
+
int main(int argc, char **argv, char **envp)
{
int min = 0;
- int max = __INT_MAX__;
+ int max = INT_MAX;
int ret = 0;
int err;
int idx;
@@ -807,10 +993,10 @@ int main(int argc, char **argv, char **envp)
* syscall:5-15[:.*],stdlib:8-10
*/
test = argv[1];
- if (!test)
+ if (!is_setting_valid(test))
test = getenv("NOLIBC_TEST");
- if (test) {
+ if (is_setting_valid(test)) {
char *comma, *colon, *dash, *value;
do {
@@ -833,7 +1019,7 @@ int main(int argc, char **argv, char **envp)
* here, which defaults to the full range.
*/
do {
- min = 0; max = __INT_MAX__;
+ min = 0; max = INT_MAX;
value = colon;
if (value && *value) {
colon = strchr(value, ':');
@@ -899,7 +1085,7 @@ int main(int argc, char **argv, char **envp)
#else
else if (ioperm(0x501, 1, 1) == 0)
#endif
- asm volatile ("outb %%al, %%dx" :: "d"(0x501), "a"(0));
+ __asm__ volatile ("outb %%al, %%dx" :: "d"(0x501), "a"(0));
/* if it does nothing, fall back to the regular panic */
#endif
}
diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h
index 6922d6417e1c..88d6830ee004 100644
--- a/tools/testing/selftests/pidfd/pidfd.h
+++ b/tools/testing/selftests/pidfd/pidfd.h
@@ -90,7 +90,6 @@ again:
}
ret = WEXITSTATUS(status);
- ksft_print_msg("waitpid WEXITSTATUS=%d\n", ret);
return ret;
}
diff --git a/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c b/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c
index 3fd8e903118f..4e86f927880c 100644
--- a/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c
@@ -143,6 +143,7 @@ static inline int child_join(struct child *child, struct error *err)
r = -1;
}
+ ksft_print_msg("waitpid WEXITSTATUS=%d\n", r);
return r;
}
diff --git a/tools/testing/selftests/pidfd/pidfd_test.c b/tools/testing/selftests/pidfd/pidfd_test.c
index e2dd4ed84984..00a07e7c571c 100644
--- a/tools/testing/selftests/pidfd/pidfd_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_test.c
@@ -115,7 +115,8 @@ static int test_pidfd_send_signal_exited_fail(void)
pidfd = open(buf, O_DIRECTORY | O_CLOEXEC);
- (void)wait_for_pid(pid);
+ ret = wait_for_pid(pid);
+ ksft_print_msg("waitpid WEXITSTATUS=%d\n", ret);
if (pidfd < 0)
ksft_exit_fail_msg(
diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile
index ae2bfc0d822f..49f2ad1793fd 100644
--- a/tools/testing/selftests/powerpc/Makefile
+++ b/tools/testing/selftests/powerpc/Makefile
@@ -17,6 +17,7 @@ SUB_DIRS = alignment \
benchmarks \
cache_shape \
copyloops \
+ dexcr \
dscr \
mm \
nx-gzip \
diff --git a/tools/testing/selftests/powerpc/dexcr/.gitignore b/tools/testing/selftests/powerpc/dexcr/.gitignore
new file mode 100644
index 000000000000..b82f45dd46b9
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dexcr/.gitignore
@@ -0,0 +1,2 @@
+hashchk_test
+lsdexcr
diff --git a/tools/testing/selftests/powerpc/dexcr/Makefile b/tools/testing/selftests/powerpc/dexcr/Makefile
new file mode 100644
index 000000000000..76210f2bcec3
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dexcr/Makefile
@@ -0,0 +1,9 @@
+TEST_GEN_PROGS := hashchk_test
+TEST_GEN_FILES := lsdexcr
+
+include ../../lib.mk
+
+$(OUTPUT)/hashchk_test: CFLAGS += -fno-pie $(call cc-option,-mno-rop-protect)
+
+$(TEST_GEN_PROGS): ../harness.c ../utils.c ./dexcr.c
+$(TEST_GEN_FILES): ../utils.c ./dexcr.c
diff --git a/tools/testing/selftests/powerpc/dexcr/dexcr.c b/tools/testing/selftests/powerpc/dexcr/dexcr.c
new file mode 100644
index 000000000000..65ec5347de98
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dexcr/dexcr.c
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <errno.h>
+#include <setjmp.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#include "dexcr.h"
+#include "reg.h"
+#include "utils.h"
+
+static jmp_buf generic_signal_jump_buf;
+
+static void generic_signal_handler(int signum, siginfo_t *info, void *context)
+{
+ longjmp(generic_signal_jump_buf, 0);
+}
+
+bool dexcr_exists(void)
+{
+ struct sigaction old;
+ volatile bool exists;
+
+ old = push_signal_handler(SIGILL, generic_signal_handler);
+ if (setjmp(generic_signal_jump_buf))
+ goto out;
+
+ /*
+ * If the SPR is not recognised by the hardware it triggers
+ * a hypervisor emulation interrupt. If the kernel does not
+ * recognise/try to emulate it, we receive a SIGILL signal.
+ *
+ * If we do not receive a signal, assume we have the SPR or the
+ * kernel is trying to emulate it correctly.
+ */
+ exists = false;
+ mfspr(SPRN_DEXCR_RO);
+ exists = true;
+
+out:
+ pop_signal_handler(SIGILL, old);
+ return exists;
+}
+
+/*
+ * Just test if a bad hashchk triggers a signal, without checking
+ * for support or if the NPHIE aspect is enabled.
+ */
+bool hashchk_triggers(void)
+{
+ struct sigaction old;
+ volatile bool triggers;
+
+ old = push_signal_handler(SIGILL, generic_signal_handler);
+ if (setjmp(generic_signal_jump_buf))
+ goto out;
+
+ triggers = true;
+ do_bad_hashchk();
+ triggers = false;
+
+out:
+ pop_signal_handler(SIGILL, old);
+ return triggers;
+}
+
+unsigned int get_dexcr(enum dexcr_source source)
+{
+ switch (source) {
+ case DEXCR:
+ return mfspr(SPRN_DEXCR_RO);
+ case HDEXCR:
+ return mfspr(SPRN_HDEXCR_RO);
+ case EFFECTIVE:
+ return mfspr(SPRN_DEXCR_RO) | mfspr(SPRN_HDEXCR_RO);
+ default:
+ FAIL_IF_EXIT_MSG(true, "bad enum dexcr_source");
+ }
+}
+
+void await_child_success(pid_t pid)
+{
+ int wstatus;
+
+ FAIL_IF_EXIT_MSG(pid == -1, "fork failed");
+ FAIL_IF_EXIT_MSG(waitpid(pid, &wstatus, 0) == -1, "wait failed");
+ FAIL_IF_EXIT_MSG(!WIFEXITED(wstatus), "child did not exit cleanly");
+ FAIL_IF_EXIT_MSG(WEXITSTATUS(wstatus) != 0, "child exit error");
+}
+
+/*
+ * Perform a hashst instruction. The following components determine the result
+ *
+ * 1. The LR value (any register technically)
+ * 2. The SP value (also any register, but it must be a valid address)
+ * 3. A secret key managed by the kernel
+ *
+ * The result is stored to the address held in SP.
+ */
+void hashst(unsigned long lr, void *sp)
+{
+ asm volatile ("addi 31, %0, 0;" /* set r31 (pretend LR) to lr */
+ "addi 30, %1, 8;" /* set r30 (pretend SP) to sp + 8 */
+ PPC_RAW_HASHST(31, -8, 30) /* compute hash into stack location */
+ : : "r" (lr), "r" (sp) : "r31", "r30", "memory");
+}
+
+/*
+ * Perform a hashchk instruction. A hash is computed as per hashst(),
+ * however the result is not stored to memory. Instead the existing
+ * value is read and compared against the computed hash.
+ *
+ * If they match, execution continues.
+ * If they differ, an interrupt triggers.
+ */
+void hashchk(unsigned long lr, void *sp)
+{
+ asm volatile ("addi 31, %0, 0;" /* set r31 (pretend LR) to lr */
+ "addi 30, %1, 8;" /* set r30 (pretend SP) to sp + 8 */
+ PPC_RAW_HASHCHK(31, -8, 30) /* check hash at stack location */
+ : : "r" (lr), "r" (sp) : "r31", "r30", "memory");
+}
+
+void do_bad_hashchk(void)
+{
+ unsigned long hash = 0;
+
+ hashst(0, &hash);
+ hash += 1;
+ hashchk(0, &hash);
+}
diff --git a/tools/testing/selftests/powerpc/dexcr/dexcr.h b/tools/testing/selftests/powerpc/dexcr/dexcr.h
new file mode 100644
index 000000000000..f55cbbc8643b
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dexcr/dexcr.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * POWER Dynamic Execution Control Facility (DEXCR)
+ *
+ * This header file contains helper functions and macros
+ * required for all the DEXCR related test cases.
+ */
+#ifndef _SELFTESTS_POWERPC_DEXCR_DEXCR_H
+#define _SELFTESTS_POWERPC_DEXCR_DEXCR_H
+
+#include <stdbool.h>
+#include <sys/types.h>
+
+#include "reg.h"
+
+#define DEXCR_PR_BIT(aspect) __MASK(63 - (32 + (aspect)))
+#define DEXCR_PR_SBHE DEXCR_PR_BIT(0)
+#define DEXCR_PR_IBRTPD DEXCR_PR_BIT(3)
+#define DEXCR_PR_SRAPD DEXCR_PR_BIT(4)
+#define DEXCR_PR_NPHIE DEXCR_PR_BIT(5)
+
+#define PPC_RAW_HASH_ARGS(b, i, a) \
+ ((((i) >> 3) & 0x1F) << 21 | (a) << 16 | (b) << 11 | (((i) >> 8) & 0x1))
+#define PPC_RAW_HASHST(b, i, a) \
+ str(.long (0x7C0005A4 | PPC_RAW_HASH_ARGS(b, i, a));)
+#define PPC_RAW_HASHCHK(b, i, a) \
+ str(.long (0x7C0005E4 | PPC_RAW_HASH_ARGS(b, i, a));)
+
+bool dexcr_exists(void);
+
+bool hashchk_triggers(void);
+
+enum dexcr_source {
+ DEXCR, /* Userspace DEXCR value */
+ HDEXCR, /* Hypervisor enforced DEXCR value */
+ EFFECTIVE, /* Bitwise OR of UDEXCR and ENFORCED DEXCR bits */
+};
+
+unsigned int get_dexcr(enum dexcr_source source);
+
+void await_child_success(pid_t pid);
+
+void hashst(unsigned long lr, void *sp);
+
+void hashchk(unsigned long lr, void *sp);
+
+void do_bad_hashchk(void);
+
+#endif /* _SELFTESTS_POWERPC_DEXCR_DEXCR_H */
diff --git a/tools/testing/selftests/powerpc/dexcr/hashchk_test.c b/tools/testing/selftests/powerpc/dexcr/hashchk_test.c
new file mode 100644
index 000000000000..7d5658c9ebe4
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dexcr/hashchk_test.c
@@ -0,0 +1,227 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <sched.h>
+#include <setjmp.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+#include <unistd.h>
+
+#include "dexcr.h"
+#include "utils.h"
+
+static int require_nphie(void)
+{
+ SKIP_IF_MSG(!dexcr_exists(), "DEXCR not supported");
+ SKIP_IF_MSG(!(get_dexcr(EFFECTIVE) & DEXCR_PR_NPHIE),
+ "DEXCR[NPHIE] not enabled");
+
+ return 0;
+}
+
+static jmp_buf hashchk_detected_buf;
+static const char *hashchk_failure_msg;
+
+static void hashchk_handler(int signum, siginfo_t *info, void *context)
+{
+ if (signum != SIGILL)
+ hashchk_failure_msg = "wrong signal received";
+ else if (info->si_code != ILL_ILLOPN)
+ hashchk_failure_msg = "wrong signal code received";
+
+ longjmp(hashchk_detected_buf, 0);
+}
+
+/*
+ * Check that hashchk triggers when DEXCR[NPHIE] is enabled
+ * and is detected as such by the kernel exception handler
+ */
+static int hashchk_detected_test(void)
+{
+ struct sigaction old;
+ int err;
+
+ err = require_nphie();
+ if (err)
+ return err;
+
+ old = push_signal_handler(SIGILL, hashchk_handler);
+ if (setjmp(hashchk_detected_buf))
+ goto out;
+
+ hashchk_failure_msg = NULL;
+ do_bad_hashchk();
+ hashchk_failure_msg = "hashchk failed to trigger";
+
+out:
+ pop_signal_handler(SIGILL, old);
+ FAIL_IF_MSG(hashchk_failure_msg, hashchk_failure_msg);
+ return 0;
+}
+
+#define HASH_COUNT 8
+
+static unsigned long hash_values[HASH_COUNT + 1];
+
+static void fill_hash_values(void)
+{
+ for (unsigned long i = 0; i < HASH_COUNT; i++)
+ hashst(i, &hash_values[i]);
+
+ /* Used to ensure the checks uses the same addresses as the hashes */
+ hash_values[HASH_COUNT] = (unsigned long)&hash_values;
+}
+
+static unsigned int count_hash_values_matches(void)
+{
+ unsigned long matches = 0;
+
+ for (unsigned long i = 0; i < HASH_COUNT; i++) {
+ unsigned long orig_hash = hash_values[i];
+ hash_values[i] = 0;
+
+ hashst(i, &hash_values[i]);
+
+ if (hash_values[i] == orig_hash)
+ matches++;
+ }
+
+ return matches;
+}
+
+static int hashchk_exec_child(void)
+{
+ ssize_t count;
+
+ fill_hash_values();
+
+ count = write(STDOUT_FILENO, hash_values, sizeof(hash_values));
+ return count == sizeof(hash_values) ? 0 : EOVERFLOW;
+}
+
+static char *hashchk_exec_child_args[] = { "hashchk_exec_child", NULL };
+
+/*
+ * Check that new programs get different keys so a malicious process
+ * can't recreate a victim's hash values.
+ */
+static int hashchk_exec_random_key_test(void)
+{
+ pid_t pid;
+ int err;
+ int pipefd[2];
+
+ err = require_nphie();
+ if (err)
+ return err;
+
+ FAIL_IF_MSG(pipe(pipefd), "failed to create pipe");
+
+ pid = fork();
+ if (pid == 0) {
+ if (dup2(pipefd[1], STDOUT_FILENO) == -1)
+ _exit(errno);
+
+ execve("/proc/self/exe", hashchk_exec_child_args, NULL);
+ _exit(errno);
+ }
+
+ await_child_success(pid);
+ FAIL_IF_MSG(read(pipefd[0], hash_values, sizeof(hash_values)) != sizeof(hash_values),
+ "missing expected child output");
+
+ /* Verify the child used the same hash_values address */
+ FAIL_IF_EXIT_MSG(hash_values[HASH_COUNT] != (unsigned long)&hash_values,
+ "bad address check");
+
+ /* If all hashes are the same it means (most likely) same key */
+ FAIL_IF_MSG(count_hash_values_matches() == HASH_COUNT, "shared key detected");
+
+ return 0;
+}
+
+/*
+ * Check that forks share the same key so that existing hash values
+ * remain valid.
+ */
+static int hashchk_fork_share_key_test(void)
+{
+ pid_t pid;
+ int err;
+
+ err = require_nphie();
+ if (err)
+ return err;
+
+ fill_hash_values();
+
+ pid = fork();
+ if (pid == 0) {
+ if (count_hash_values_matches() != HASH_COUNT)
+ _exit(1);
+ _exit(0);
+ }
+
+ await_child_success(pid);
+ return 0;
+}
+
+#define STACK_SIZE (1024 * 1024)
+
+static int hashchk_clone_child_fn(void *args)
+{
+ fill_hash_values();
+ return 0;
+}
+
+/*
+ * Check that threads share the same key so that existing hash values
+ * remain valid.
+ */
+static int hashchk_clone_share_key_test(void)
+{
+ void *child_stack;
+ pid_t pid;
+ int err;
+
+ err = require_nphie();
+ if (err)
+ return err;
+
+ child_stack = mmap(NULL, STACK_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
+
+ FAIL_IF_MSG(child_stack == MAP_FAILED, "failed to map child stack");
+
+ pid = clone(hashchk_clone_child_fn, child_stack + STACK_SIZE,
+ CLONE_VM | SIGCHLD, NULL);
+
+ await_child_success(pid);
+ FAIL_IF_MSG(count_hash_values_matches() != HASH_COUNT,
+ "different key detected");
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ int err = 0;
+
+ if (argc >= 1 && !strcmp(argv[0], hashchk_exec_child_args[0]))
+ return hashchk_exec_child();
+
+ err |= test_harness(hashchk_detected_test, "hashchk_detected");
+ err |= test_harness(hashchk_exec_random_key_test, "hashchk_exec_random_key");
+ err |= test_harness(hashchk_fork_share_key_test, "hashchk_fork_share_key");
+ err |= test_harness(hashchk_clone_share_key_test, "hashchk_clone_share_key");
+
+ return err;
+}
diff --git a/tools/testing/selftests/powerpc/dexcr/lsdexcr.c b/tools/testing/selftests/powerpc/dexcr/lsdexcr.c
new file mode 100644
index 000000000000..94abbfcc389e
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dexcr/lsdexcr.c
@@ -0,0 +1,141 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <errno.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "dexcr.h"
+#include "utils.h"
+
+static unsigned int dexcr;
+static unsigned int hdexcr;
+static unsigned int effective;
+
+struct dexcr_aspect {
+ const char *name;
+ const char *desc;
+ unsigned int index;
+};
+
+static const struct dexcr_aspect aspects[] = {
+ {
+ .name = "SBHE",
+ .desc = "Speculative branch hint enable",
+ .index = 0,
+ },
+ {
+ .name = "IBRTPD",
+ .desc = "Indirect branch recurrent target prediction disable",
+ .index = 3,
+ },
+ {
+ .name = "SRAPD",
+ .desc = "Subroutine return address prediction disable",
+ .index = 4,
+ },
+ {
+ .name = "NPHIE",
+ .desc = "Non-privileged hash instruction enable",
+ .index = 5,
+ },
+ {
+ .name = "PHIE",
+ .desc = "Privileged hash instruction enable",
+ .index = 6,
+ },
+};
+
+static void print_list(const char *list[], size_t len)
+{
+ for (size_t i = 0; i < len; i++) {
+ printf("%s", list[i]);
+ if (i + 1 < len)
+ printf(", ");
+ }
+}
+
+static void print_dexcr(char *name, unsigned int bits)
+{
+ const char *enabled_aspects[ARRAY_SIZE(aspects) + 1] = {NULL};
+ size_t j = 0;
+
+ printf("%s: %08x", name, bits);
+
+ if (bits == 0) {
+ printf("\n");
+ return;
+ }
+
+ for (size_t i = 0; i < ARRAY_SIZE(aspects); i++) {
+ unsigned int mask = DEXCR_PR_BIT(aspects[i].index);
+
+ if (bits & mask) {
+ enabled_aspects[j++] = aspects[i].name;
+ bits &= ~mask;
+ }
+ }
+
+ if (bits)
+ enabled_aspects[j++] = "unknown";
+
+ printf(" (");
+ print_list(enabled_aspects, j);
+ printf(")\n");
+}
+
+static void print_aspect(const struct dexcr_aspect *aspect)
+{
+ const char *attributes[8] = {NULL};
+ size_t j = 0;
+ unsigned long mask;
+
+ mask = DEXCR_PR_BIT(aspect->index);
+ if (dexcr & mask)
+ attributes[j++] = "set";
+ if (hdexcr & mask)
+ attributes[j++] = "set (hypervisor)";
+ if (!(effective & mask))
+ attributes[j++] = "clear";
+
+ printf("%12s %c (%d): ", aspect->name, effective & mask ? '*' : ' ', aspect->index);
+ print_list(attributes, j);
+ printf(" \t(%s)\n", aspect->desc);
+}
+
+int main(int argc, char *argv[])
+{
+ if (!dexcr_exists()) {
+ printf("DEXCR not detected on this hardware\n");
+ return 1;
+ }
+
+ dexcr = get_dexcr(DEXCR);
+ hdexcr = get_dexcr(HDEXCR);
+ effective = dexcr | hdexcr;
+
+ print_dexcr(" DEXCR", dexcr);
+ print_dexcr(" HDEXCR", hdexcr);
+ print_dexcr("Effective", effective);
+ printf("\n");
+
+ for (size_t i = 0; i < ARRAY_SIZE(aspects); i++)
+ print_aspect(&aspects[i]);
+ printf("\n");
+
+ if (effective & DEXCR_PR_NPHIE) {
+ printf("DEXCR[NPHIE] enabled: hashst/hashchk ");
+ if (hashchk_triggers())
+ printf("working\n");
+ else
+ printf("failed to trigger\n");
+ } else {
+ printf("DEXCR[NPHIE] disabled: hashst/hashchk ");
+ if (hashchk_triggers())
+ printf("unexpectedly triggered\n");
+ else
+ printf("ignored\n");
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/powerpc/include/reg.h b/tools/testing/selftests/powerpc/include/reg.h
index d5a547f72669..fad09c9d3387 100644
--- a/tools/testing/selftests/powerpc/include/reg.h
+++ b/tools/testing/selftests/powerpc/include/reg.h
@@ -19,6 +19,8 @@
#define mb() asm volatile("sync" : : : "memory");
#define barrier() asm volatile("" : : : "memory");
+#define SPRN_HDEXCR_RO 455 /* Userspace readonly view of SPRN_HDEXCR (471) */
+
#define SPRN_MMCR2 769
#define SPRN_MMCRA 770
#define SPRN_MMCR0 779
@@ -47,6 +49,8 @@
#define SPRN_SDAR 781
#define SPRN_SIER 768
+#define SPRN_DEXCR_RO 812 /* Userspace readonly view of SPRN_DEXCR (828) */
+
#define SPRN_TEXASR 0x82 /* Transaction Exception and Status Register */
#define SPRN_TFIAR 0x81 /* Transaction Failure Inst Addr */
#define SPRN_TFHAR 0x80 /* Transaction Failure Handler Addr */
diff --git a/tools/testing/selftests/powerpc/include/utils.h b/tools/testing/selftests/powerpc/include/utils.h
index 44bfd48b93d6..36c30c611457 100644
--- a/tools/testing/selftests/powerpc/include/utils.h
+++ b/tools/testing/selftests/powerpc/include/utils.h
@@ -9,11 +9,18 @@
#define __cacheline_aligned __attribute__((aligned(128)))
#include <stdint.h>
+#include <stdio.h>
#include <stdbool.h>
+#include <sys/signal.h>
#include <linux/auxvec.h>
#include <linux/perf_event.h>
#include <asm/cputable.h>
#include "reg.h"
+#include <unistd.h>
+
+#ifndef ARRAY_SIZE
+# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
/* Avoid headaches with PRI?64 - just use %ll? always */
typedef unsigned long long u64;
@@ -67,7 +74,6 @@ struct perf_event_read {
};
#if !defined(__GLIBC_PREREQ) || !__GLIBC_PREREQ(2, 30)
-#include <unistd.h>
#include <sys/syscall.h>
static inline pid_t gettid(void)
@@ -106,6 +112,9 @@ static inline char *auxv_platform(void)
bool is_ppc64le(void);
int using_hash_mmu(bool *using_hash);
+struct sigaction push_signal_handler(int sig, void (*fn)(int, siginfo_t *, void *));
+struct sigaction pop_signal_handler(int sig, struct sigaction old_handler);
+
/* Yes, this is evil */
#define FAIL_IF(x) \
do { \
@@ -116,6 +125,16 @@ do { \
} \
} while (0)
+#define FAIL_IF_MSG(x, msg) \
+do { \
+ if ((x)) { \
+ fprintf(stderr, \
+ "[FAIL] Test FAILED on line %d: %s\n", \
+ __LINE__, msg); \
+ return 1; \
+ } \
+} while (0)
+
#define FAIL_IF_EXIT(x) \
do { \
if ((x)) { \
@@ -125,6 +144,16 @@ do { \
} \
} while (0)
+#define FAIL_IF_EXIT_MSG(x, msg) \
+do { \
+ if ((x)) { \
+ fprintf(stderr, \
+ "[FAIL] Test FAILED on line %d: %s\n", \
+ __LINE__, msg); \
+ _exit(1); \
+ } \
+} while (0)
+
/* The test harness uses this, yes it's gross */
#define MAGIC_SKIP_RETURN_VALUE 99
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h
index 4181755cf5a0..64e25cce1435 100644
--- a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h
@@ -18,8 +18,6 @@
#define MMCR1_RSQ 0x200000000000ULL /* radix scope qual field */
#define BHRB_DISABLE 0x2000000000ULL /* MMCRA BHRB DISABLE bit */
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
-
extern int ev_mask_pmcxsel, ev_shift_pmcxsel;
extern int ev_mask_marked, ev_shift_marked;
extern int ev_mask_comb, ev_shift_comb;
diff --git a/tools/testing/selftests/powerpc/utils.c b/tools/testing/selftests/powerpc/utils.c
index 252fb4a95e90..e5f2d8735c64 100644
--- a/tools/testing/selftests/powerpc/utils.c
+++ b/tools/testing/selftests/powerpc/utils.c
@@ -618,3 +618,27 @@ out:
fclose(f);
return rc;
}
+
+struct sigaction push_signal_handler(int sig, void (*fn)(int, siginfo_t *, void *))
+{
+ struct sigaction sa;
+ struct sigaction old_handler;
+
+ sa.sa_sigaction = fn;
+ sigemptyset(&sa.sa_mask);
+ sa.sa_flags = SA_SIGINFO;
+ FAIL_IF_EXIT_MSG(sigaction(sig, &sa, &old_handler),
+ "failed to push signal handler");
+
+ return old_handler;
+}
+
+struct sigaction pop_signal_handler(int sig, struct sigaction old_handler)
+{
+ struct sigaction popped;
+
+ FAIL_IF_EXIT_MSG(sigaction(sig, &old_handler, &popped),
+ "failed to pop signal handler");
+
+ return popped;
+}
diff --git a/tools/testing/selftests/prctl/set-anon-vma-name-test.c b/tools/testing/selftests/prctl/set-anon-vma-name-test.c
index 26d853c5a0c1..4275cb256dce 100644
--- a/tools/testing/selftests/prctl/set-anon-vma-name-test.c
+++ b/tools/testing/selftests/prctl/set-anon-vma-name-test.c
@@ -97,7 +97,7 @@ TEST_F(vma, renaming) {
TH_LOG("Try to pass invalid name (with non-printable character \\1) to rename the VMA");
EXPECT_EQ(rename_vma((unsigned long)self->ptr_anon, AREA_SIZE, BAD_NAME), -EINVAL);
- TH_LOG("Try to rename non-anonynous VMA");
+ TH_LOG("Try to rename non-anonymous VMA");
EXPECT_EQ(rename_vma((unsigned long) self->ptr_not_anon, AREA_SIZE, GOOD_NAME), -EINVAL);
}
diff --git a/tools/testing/selftests/ptp/testptp.c b/tools/testing/selftests/ptp/testptp.c
index cfa9562f3cd8..e9438a1862ad 100644
--- a/tools/testing/selftests/ptp/testptp.c
+++ b/tools/testing/selftests/ptp/testptp.c
@@ -110,7 +110,7 @@ static long ppb_to_scaled_ppm(int ppb)
static int64_t pctns(struct ptp_clock_time *t)
{
- return t->sec * 1000000000LL + t->nsec;
+ return t->sec * NSEC_PER_SEC + t->nsec;
}
static void usage(char *progname)
@@ -134,6 +134,7 @@ static void usage(char *progname)
" 1 - external time stamp\n"
" 2 - periodic output\n"
" -n val shift the ptp clock time by 'val' nanoseconds\n"
+ " -o val phase offset (in nanoseconds) to be provided to the PHC servo\n"
" -p val enable output with a period of 'val' nanoseconds\n"
" -H val set output phase to 'val' nanoseconds (requires -p)\n"
" -w val set output pulse width to 'val' nanoseconds (requires -p)\n"
@@ -167,6 +168,7 @@ int main(int argc, char *argv[])
int adjfreq = 0x7fffffff;
int adjtime = 0;
int adjns = 0;
+ int adjphase = 0;
int capabilities = 0;
int extts = 0;
int flagtest = 0;
@@ -188,7 +190,7 @@ int main(int argc, char *argv[])
progname = strrchr(argv[0], '/');
progname = progname ? 1+progname : argv[0];
- while (EOF != (c = getopt(argc, argv, "cd:e:f:ghH:i:k:lL:n:p:P:sSt:T:w:z"))) {
+ while (EOF != (c = getopt(argc, argv, "cd:e:f:ghH:i:k:lL:n:o:p:P:sSt:T:w:z"))) {
switch (c) {
case 'c':
capabilities = 1;
@@ -228,6 +230,9 @@ int main(int argc, char *argv[])
case 'n':
adjns = atoi(optarg);
break;
+ case 'o':
+ adjphase = atoi(optarg);
+ break;
case 'p':
perout = atoll(optarg);
break;
@@ -287,7 +292,8 @@ int main(int argc, char *argv[])
" %d pulse per second\n"
" %d programmable pins\n"
" %d cross timestamping\n"
- " %d adjust_phase\n",
+ " %d adjust_phase\n"
+ " %d maximum phase adjustment (ns)\n",
caps.max_adj,
caps.n_alarm,
caps.n_ext_ts,
@@ -295,7 +301,8 @@ int main(int argc, char *argv[])
caps.pps,
caps.n_pins,
caps.cross_timestamping,
- caps.adjust_phase);
+ caps.adjust_phase,
+ caps.max_phase_adj);
}
}
@@ -317,7 +324,7 @@ int main(int argc, char *argv[])
tx.time.tv_usec = adjns;
while (tx.time.tv_usec < 0) {
tx.time.tv_sec -= 1;
- tx.time.tv_usec += 1000000000;
+ tx.time.tv_usec += NSEC_PER_SEC;
}
if (clock_adjtime(clkid, &tx) < 0) {
@@ -327,6 +334,18 @@ int main(int argc, char *argv[])
}
}
+ if (adjphase) {
+ memset(&tx, 0, sizeof(tx));
+ tx.modes = ADJ_OFFSET | ADJ_NANO;
+ tx.offset = adjphase;
+
+ if (clock_adjtime(clkid, &tx) < 0) {
+ perror("clock_adjtime");
+ } else {
+ puts("phase adjustment okay");
+ }
+ }
+
if (gettime) {
if (clock_gettime(clkid, &ts)) {
perror("clock_gettime");
diff --git a/tools/testing/selftests/rcutorture/bin/functions.sh b/tools/testing/selftests/rcutorture/bin/functions.sh
index b52d5069563c..48b9147e8c91 100644
--- a/tools/testing/selftests/rcutorture/bin/functions.sh
+++ b/tools/testing/selftests/rcutorture/bin/functions.sh
@@ -250,7 +250,7 @@ identify_qemu_args () {
echo -machine virt,gic-version=host -cpu host
;;
qemu-system-ppc64)
- echo -enable-kvm -M pseries -nodefaults
+ echo -M pseries -nodefaults
echo -device spapr-vscsi
if test -n "$TORTURE_QEMU_INTERACTIVE" -a -n "$TORTURE_QEMU_MAC"
then
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index 62f3b0f56e4d..d3cdc2d33d4b 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -655,4 +655,4 @@ fi
# Control buffer size: --bootargs trace_buf_size=3k
# Get trace-buffer dumps on all oopses: --bootargs ftrace_dump_on_oops
# Ditto, but dump only the oopsing CPU: --bootargs ftrace_dump_on_oops=orig_cpu
-# Heavy-handed way to also dump on warnings: --bootargs panic_on_warn
+# Heavy-handed way to also dump on warnings: --bootargs panic_on_warn=1
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/BUSTED-BOOST.boot b/tools/testing/selftests/rcutorture/configs/rcu/BUSTED-BOOST.boot
index f57720c52c0f..84f6bb98ce99 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/BUSTED-BOOST.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/BUSTED-BOOST.boot
@@ -5,4 +5,4 @@ rcutree.gp_init_delay=3
rcutree.gp_cleanup_delay=3
rcutree.kthread_prio=2
threadirqs
-tree.use_softirq=0
+rcutree.use_softirq=0
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot
index 64f864f1f361..8e50bfd4b710 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot
@@ -4,4 +4,4 @@ rcutree.gp_init_delay=3
rcutree.gp_cleanup_delay=3
rcutree.kthread_prio=2
threadirqs
-tree.use_softirq=0
+rcutree.use_softirq=0
diff --git a/tools/testing/selftests/resctrl/Makefile b/tools/testing/selftests/resctrl/Makefile
index 73d53257df42..5073dbc96125 100644
--- a/tools/testing/selftests/resctrl/Makefile
+++ b/tools/testing/selftests/resctrl/Makefile
@@ -7,4 +7,4 @@ TEST_GEN_PROGS := resctrl_tests
include ../lib.mk
-$(OUTPUT)/resctrl_tests: $(wildcard *.c)
+$(OUTPUT)/resctrl_tests: $(wildcard *.[ch])
diff --git a/tools/testing/selftests/resctrl/cache.c b/tools/testing/selftests/resctrl/cache.c
index 8a4fe8693be6..289b619116fe 100644
--- a/tools/testing/selftests/resctrl/cache.c
+++ b/tools/testing/selftests/resctrl/cache.c
@@ -87,21 +87,19 @@ static int reset_enable_llc_perf(pid_t pid, int cpu_no)
static int get_llc_perf(unsigned long *llc_perf_miss)
{
__u64 total_misses;
+ int ret;
/* Stop counters after one span to get miss rate */
ioctl(fd_lm, PERF_EVENT_IOC_DISABLE, 0);
- if (read(fd_lm, &rf_cqm, sizeof(struct read_format)) == -1) {
+ ret = read(fd_lm, &rf_cqm, sizeof(struct read_format));
+ if (ret == -1) {
perror("Could not get llc misses through perf");
-
return -1;
}
total_misses = rf_cqm.values[0].value;
-
- close(fd_lm);
-
*llc_perf_miss = total_misses;
return 0;
@@ -253,19 +251,25 @@ int cat_val(struct resctrl_val_param *param)
memflush, operation, resctrl_val)) {
fprintf(stderr, "Error-running fill buffer\n");
ret = -1;
- break;
+ goto pe_close;
}
sleep(1);
ret = measure_cache_vals(param, bm_pid);
if (ret)
- break;
+ goto pe_close;
+
+ close(fd_lm);
} else {
break;
}
}
return ret;
+
+pe_close:
+ close(fd_lm);
+ return ret;
}
/*
diff --git a/tools/testing/selftests/resctrl/fill_buf.c b/tools/testing/selftests/resctrl/fill_buf.c
index 341cc93ca84c..3b328c844896 100644
--- a/tools/testing/selftests/resctrl/fill_buf.c
+++ b/tools/testing/selftests/resctrl/fill_buf.c
@@ -177,12 +177,13 @@ fill_cache(unsigned long long buf_size, int malloc_and_init, int memflush,
else
ret = fill_cache_write(start_ptr, end_ptr, resctrl_val);
+ free(startptr);
+
if (ret) {
printf("\n Error in fill cache read/write...\n");
return -1;
}
- free(startptr);
return 0;
}
diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h
index 87e39456dee0..f455f0b7e314 100644
--- a/tools/testing/selftests/resctrl/resctrl.h
+++ b/tools/testing/selftests/resctrl/resctrl.h
@@ -43,6 +43,7 @@
do { \
perror(err_msg); \
kill(ppid, SIGKILL); \
+ umount_resctrlfs(); \
exit(EXIT_FAILURE); \
} while (0)
diff --git a/tools/testing/selftests/riscv/Makefile b/tools/testing/selftests/riscv/Makefile
index 32a72902d045..f4b3d5c9af5b 100644
--- a/tools/testing/selftests/riscv/Makefile
+++ b/tools/testing/selftests/riscv/Makefile
@@ -5,7 +5,7 @@
ARCH ?= $(shell uname -m 2>/dev/null || echo not)
ifneq (,$(filter $(ARCH),riscv))
-RISCV_SUBTARGETS ?= hwprobe
+RISCV_SUBTARGETS ?= hwprobe vector
else
RISCV_SUBTARGETS :=
endif
@@ -43,7 +43,7 @@ run_tests: all
done
# Avoid any output on non riscv on emit_tests
-emit_tests: all
+emit_tests:
@for DIR in $(RISCV_SUBTARGETS); do \
BUILD_TARGET=$(OUTPUT)/$$DIR; \
$(MAKE) OUTPUT=$$BUILD_TARGET -C $$DIR $@; \
diff --git a/tools/testing/selftests/riscv/hwprobe/.gitignore b/tools/testing/selftests/riscv/hwprobe/.gitignore
new file mode 100644
index 000000000000..8113dc3bdd03
--- /dev/null
+++ b/tools/testing/selftests/riscv/hwprobe/.gitignore
@@ -0,0 +1 @@
+hwprobe
diff --git a/tools/testing/selftests/riscv/vector/.gitignore b/tools/testing/selftests/riscv/vector/.gitignore
new file mode 100644
index 000000000000..9ae7964491d5
--- /dev/null
+++ b/tools/testing/selftests/riscv/vector/.gitignore
@@ -0,0 +1,3 @@
+vstate_exec_nolibc
+vstate_prctl
+v_initval_nolibc
diff --git a/tools/testing/selftests/riscv/vector/Makefile b/tools/testing/selftests/riscv/vector/Makefile
new file mode 100644
index 000000000000..bfff0ff4f3be
--- /dev/null
+++ b/tools/testing/selftests/riscv/vector/Makefile
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2021 ARM Limited
+# Originally tools/testing/arm64/abi/Makefile
+
+TEST_GEN_PROGS := vstate_prctl v_initval_nolibc
+TEST_GEN_PROGS_EXTENDED := vstate_exec_nolibc
+
+include ../../lib.mk
+
+$(OUTPUT)/vstate_prctl: vstate_prctl.c ../hwprobe/sys_hwprobe.S
+ $(CC) -static -o$@ $(CFLAGS) $(LDFLAGS) $^
+
+$(OUTPUT)/vstate_exec_nolibc: vstate_exec_nolibc.c
+ $(CC) -nostdlib -static -include ../../../../include/nolibc/nolibc.h \
+ -Wall $(CFLAGS) $(LDFLAGS) $^ -o $@ -lgcc
+
+$(OUTPUT)/v_initval_nolibc: v_initval_nolibc.c
+ $(CC) -nostdlib -static -include ../../../../include/nolibc/nolibc.h \
+ -Wall $(CFLAGS) $(LDFLAGS) $^ -o $@ -lgcc
diff --git a/tools/testing/selftests/riscv/vector/v_initval_nolibc.c b/tools/testing/selftests/riscv/vector/v_initval_nolibc.c
new file mode 100644
index 000000000000..66764edb0d52
--- /dev/null
+++ b/tools/testing/selftests/riscv/vector/v_initval_nolibc.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "../../kselftest.h"
+#define MAX_VSIZE (8192 * 32)
+
+void dump(char *ptr, int size)
+{
+ int i = 0;
+
+ for (i = 0; i < size; i++) {
+ if (i != 0) {
+ if (i % 16 == 0)
+ printf("\n");
+ else if (i % 8 == 0)
+ printf(" ");
+ }
+ printf("%02x ", ptr[i]);
+ }
+ printf("\n");
+}
+
+int main(void)
+{
+ int i;
+ unsigned long vl;
+ char *datap, *tmp;
+
+ datap = malloc(MAX_VSIZE);
+ if (!datap) {
+ ksft_test_result_fail("fail to allocate memory for size = %lu\n", MAX_VSIZE);
+ exit(-1);
+ }
+
+ tmp = datap;
+ asm volatile (
+ ".option push\n\t"
+ ".option arch, +v\n\t"
+ "vsetvli %0, x0, e8, m8, ta, ma\n\t"
+ "vse8.v v0, (%2)\n\t"
+ "add %1, %2, %0\n\t"
+ "vse8.v v8, (%1)\n\t"
+ "add %1, %1, %0\n\t"
+ "vse8.v v16, (%1)\n\t"
+ "add %1, %1, %0\n\t"
+ "vse8.v v24, (%1)\n\t"
+ ".option pop\n\t"
+ : "=&r" (vl), "=r" (tmp) : "r" (datap) : "memory");
+
+ ksft_print_msg("vl = %lu\n", vl);
+
+ if (datap[0] != 0x00 && datap[0] != 0xff) {
+ ksft_test_result_fail("v-regesters are not properly initialized\n");
+ dump(datap, vl * 4);
+ exit(-1);
+ }
+
+ for (i = 1; i < vl * 4; i++) {
+ if (datap[i] != datap[0]) {
+ ksft_test_result_fail("detect stale values on v-regesters\n");
+ dump(datap, vl * 4);
+ exit(-2);
+ }
+ }
+
+ free(datap);
+ ksft_exit_pass();
+ return 0;
+}
diff --git a/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c b/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c
new file mode 100644
index 000000000000..2c0d2b1126c1
--- /dev/null
+++ b/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c
@@ -0,0 +1,109 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#define THIS_PROGRAM "./vstate_exec_nolibc"
+
+int main(int argc, char **argv)
+{
+ int rc, pid, status, test_inherit = 0;
+ long ctrl, ctrl_c;
+ char *exec_argv[2], *exec_envp[2];
+
+ if (argc > 1)
+ test_inherit = 1;
+
+ ctrl = my_syscall1(__NR_prctl, PR_RISCV_V_GET_CONTROL);
+ if (ctrl < 0) {
+ puts("PR_RISCV_V_GET_CONTROL is not supported\n");
+ return ctrl;
+ }
+
+ if (test_inherit) {
+ pid = fork();
+ if (pid == -1) {
+ puts("fork failed\n");
+ exit(-1);
+ }
+
+ /* child */
+ if (!pid) {
+ exec_argv[0] = THIS_PROGRAM;
+ exec_argv[1] = NULL;
+ exec_envp[0] = NULL;
+ exec_envp[1] = NULL;
+ /* launch the program again to check inherit */
+ rc = execve(THIS_PROGRAM, exec_argv, exec_envp);
+ if (rc) {
+ puts("child execve failed\n");
+ exit(-1);
+ }
+ }
+
+ } else {
+ pid = fork();
+ if (pid == -1) {
+ puts("fork failed\n");
+ exit(-1);
+ }
+
+ if (!pid) {
+ rc = my_syscall1(__NR_prctl, PR_RISCV_V_GET_CONTROL);
+ if (rc != ctrl) {
+ puts("child's vstate_ctrl not equal to parent's\n");
+ exit(-1);
+ }
+ asm volatile (".option push\n\t"
+ ".option arch, +v\n\t"
+ "vsetvli x0, x0, e32, m8, ta, ma\n\t"
+ ".option pop\n\t"
+ );
+ exit(ctrl);
+ }
+ }
+
+ rc = waitpid(-1, &status, 0);
+
+ if (WIFEXITED(status) && WEXITSTATUS(status) == -1) {
+ puts("child exited abnormally\n");
+ exit(-1);
+ }
+
+ if (WIFSIGNALED(status)) {
+ if (WTERMSIG(status) != SIGILL) {
+ puts("child was terminated by unexpected signal\n");
+ exit(-1);
+ }
+
+ if ((ctrl & PR_RISCV_V_VSTATE_CTRL_CUR_MASK) != PR_RISCV_V_VSTATE_CTRL_OFF) {
+ puts("child signaled by illegal V access but vstate_ctrl is not off\n");
+ exit(-1);
+ }
+
+ /* child terminated, and its vstate_ctrl is off */
+ exit(ctrl);
+ }
+
+ ctrl_c = WEXITSTATUS(status);
+ if (test_inherit) {
+ if (ctrl & PR_RISCV_V_VSTATE_CTRL_INHERIT) {
+ if (!(ctrl_c & PR_RISCV_V_VSTATE_CTRL_INHERIT)) {
+ puts("parent has inherit bit, but child has not\n");
+ exit(-1);
+ }
+ }
+ rc = (ctrl & PR_RISCV_V_VSTATE_CTRL_NEXT_MASK) >> 2;
+ if (rc != PR_RISCV_V_VSTATE_CTRL_DEFAULT) {
+ if (rc != (ctrl_c & PR_RISCV_V_VSTATE_CTRL_CUR_MASK)) {
+ puts("parent's next setting does not equal to child's\n");
+ exit(-1);
+ }
+
+ if (!(ctrl & PR_RISCV_V_VSTATE_CTRL_INHERIT)) {
+ if ((ctrl_c & PR_RISCV_V_VSTATE_CTRL_NEXT_MASK) !=
+ PR_RISCV_V_VSTATE_CTRL_DEFAULT) {
+ puts("must clear child's next vstate_ctrl if !inherit\n");
+ exit(-1);
+ }
+ }
+ }
+ }
+ return ctrl;
+}
diff --git a/tools/testing/selftests/riscv/vector/vstate_prctl.c b/tools/testing/selftests/riscv/vector/vstate_prctl.c
new file mode 100644
index 000000000000..b348b475be57
--- /dev/null
+++ b/tools/testing/selftests/riscv/vector/vstate_prctl.c
@@ -0,0 +1,189 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <sys/prctl.h>
+#include <unistd.h>
+#include <asm/hwprobe.h>
+#include <errno.h>
+#include <sys/wait.h>
+
+#include "../../kselftest.h"
+
+/*
+ * Rather than relying on having a new enough libc to define this, just do it
+ * ourselves. This way we don't need to be coupled to a new-enough libc to
+ * contain the call.
+ */
+long riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count,
+ size_t cpu_count, unsigned long *cpus, unsigned int flags);
+
+#define NEXT_PROGRAM "./vstate_exec_nolibc"
+static int launch_test(int test_inherit)
+{
+ char *exec_argv[3], *exec_envp[1];
+ int rc, pid, status;
+
+ pid = fork();
+ if (pid < 0) {
+ ksft_test_result_fail("fork failed %d", pid);
+ return -1;
+ }
+
+ if (!pid) {
+ exec_argv[0] = NEXT_PROGRAM;
+ exec_argv[1] = test_inherit != 0 ? "x" : NULL;
+ exec_argv[2] = NULL;
+ exec_envp[0] = NULL;
+ /* launch the program again to check inherit */
+ rc = execve(NEXT_PROGRAM, exec_argv, exec_envp);
+ if (rc) {
+ perror("execve");
+ ksft_test_result_fail("child execve failed %d\n", rc);
+ exit(-1);
+ }
+ }
+
+ rc = waitpid(-1, &status, 0);
+ if (rc < 0) {
+ ksft_test_result_fail("waitpid failed\n");
+ return -3;
+ }
+
+ if ((WIFEXITED(status) && WEXITSTATUS(status) == -1) ||
+ WIFSIGNALED(status)) {
+ ksft_test_result_fail("child exited abnormally\n");
+ return -4;
+ }
+
+ return WEXITSTATUS(status);
+}
+
+int test_and_compare_child(long provided, long expected, int inherit)
+{
+ int rc;
+
+ rc = prctl(PR_RISCV_V_SET_CONTROL, provided);
+ if (rc != 0) {
+ ksft_test_result_fail("prctl with provided arg %lx failed with code %d\n",
+ provided, rc);
+ return -1;
+ }
+ rc = launch_test(inherit);
+ if (rc != expected) {
+ ksft_test_result_fail("Test failed, check %d != %d\n", rc,
+ expected);
+ return -2;
+ }
+ return 0;
+}
+
+#define PR_RISCV_V_VSTATE_CTRL_CUR_SHIFT 0
+#define PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT 2
+
+int main(void)
+{
+ struct riscv_hwprobe pair;
+ long flag, expected;
+ long rc;
+
+ pair.key = RISCV_HWPROBE_KEY_IMA_EXT_0;
+ rc = riscv_hwprobe(&pair, 1, 0, NULL, 0);
+ if (rc < 0) {
+ ksft_test_result_fail("hwprobe() failed with %d\n", rc);
+ return -1;
+ }
+
+ if (pair.key != RISCV_HWPROBE_KEY_IMA_EXT_0) {
+ ksft_test_result_fail("hwprobe cannot probe RISCV_HWPROBE_KEY_IMA_EXT_0\n");
+ return -2;
+ }
+
+ if (!(pair.value & RISCV_HWPROBE_IMA_V)) {
+ rc = prctl(PR_RISCV_V_GET_CONTROL);
+ if (rc != -1 || errno != EINVAL) {
+ ksft_test_result_fail("GET_CONTROL should fail on kernel/hw without V\n");
+ return -3;
+ }
+
+ rc = prctl(PR_RISCV_V_SET_CONTROL, PR_RISCV_V_VSTATE_CTRL_ON);
+ if (rc != -1 || errno != EINVAL) {
+ ksft_test_result_fail("GET_CONTROL should fail on kernel/hw without V\n");
+ return -4;
+ }
+
+ ksft_test_result_skip("Vector not supported\n");
+ return 0;
+ }
+
+ flag = PR_RISCV_V_VSTATE_CTRL_ON;
+ rc = prctl(PR_RISCV_V_SET_CONTROL, flag);
+ if (rc != 0) {
+ ksft_test_result_fail("Enabling V for current should always success\n");
+ return -5;
+ }
+
+ flag = PR_RISCV_V_VSTATE_CTRL_OFF;
+ rc = prctl(PR_RISCV_V_SET_CONTROL, flag);
+ if (rc != -1 || errno != EPERM) {
+ ksft_test_result_fail("Disabling current's V alive must fail with EPERM(%d)\n",
+ errno);
+ return -5;
+ }
+
+ /* Turn on next's vector explicitly and test */
+ flag = PR_RISCV_V_VSTATE_CTRL_ON << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT;
+ if (test_and_compare_child(flag, PR_RISCV_V_VSTATE_CTRL_ON, 0))
+ return -6;
+
+ /* Turn off next's vector explicitly and test */
+ flag = PR_RISCV_V_VSTATE_CTRL_OFF << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT;
+ if (test_and_compare_child(flag, PR_RISCV_V_VSTATE_CTRL_OFF, 0))
+ return -7;
+
+ /* Turn on next's vector explicitly and test inherit */
+ flag = PR_RISCV_V_VSTATE_CTRL_ON << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT;
+ flag |= PR_RISCV_V_VSTATE_CTRL_INHERIT;
+ expected = flag | PR_RISCV_V_VSTATE_CTRL_ON;
+ if (test_and_compare_child(flag, expected, 0))
+ return -8;
+
+ if (test_and_compare_child(flag, expected, 1))
+ return -9;
+
+ /* Turn off next's vector explicitly and test inherit */
+ flag = PR_RISCV_V_VSTATE_CTRL_OFF << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT;
+ flag |= PR_RISCV_V_VSTATE_CTRL_INHERIT;
+ expected = flag | PR_RISCV_V_VSTATE_CTRL_OFF;
+ if (test_and_compare_child(flag, expected, 0))
+ return -10;
+
+ if (test_and_compare_child(flag, expected, 1))
+ return -11;
+
+ /* arguments should fail with EINVAL */
+ rc = prctl(PR_RISCV_V_SET_CONTROL, 0xff0);
+ if (rc != -1 || errno != EINVAL) {
+ ksft_test_result_fail("Undefined control argument should return EINVAL\n");
+ return -12;
+ }
+
+ rc = prctl(PR_RISCV_V_SET_CONTROL, 0x3);
+ if (rc != -1 || errno != EINVAL) {
+ ksft_test_result_fail("Undefined control argument should return EINVAL\n");
+ return -12;
+ }
+
+ rc = prctl(PR_RISCV_V_SET_CONTROL, 0xc);
+ if (rc != -1 || errno != EINVAL) {
+ ksft_test_result_fail("Undefined control argument should return EINVAL\n");
+ return -12;
+ }
+
+ rc = prctl(PR_RISCV_V_SET_CONTROL, 0xc);
+ if (rc != -1 || errno != EINVAL) {
+ ksft_test_result_fail("Undefined control argument should return EINVAL\n");
+ return -12;
+ }
+
+ ksft_test_result_pass("tests for riscv_v_vstate_ctrl pass\n");
+ ksft_exit_pass();
+ return 0;
+}
diff --git a/tools/testing/selftests/rseq/Makefile b/tools/testing/selftests/rseq/Makefile
index b357ba24af06..7a957c7d459a 100644
--- a/tools/testing/selftests/rseq/Makefile
+++ b/tools/testing/selftests/rseq/Makefile
@@ -4,8 +4,10 @@ ifneq ($(shell $(CC) --version 2>&1 | head -n 1 | grep clang),)
CLANG_FLAGS += -no-integrated-as
endif
+top_srcdir = ../../../..
+
CFLAGS += -O2 -Wall -g -I./ $(KHDR_INCLUDES) -L$(OUTPUT) -Wl,-rpath=./ \
- $(CLANG_FLAGS)
+ $(CLANG_FLAGS) -I$(top_srcdir)/tools/include
LDLIBS += -lpthread -ldl
# Own dependencies because we only want to build against 1st prerequisite, but
diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c
index 4e4aa006004c..96e812bdf8a4 100644
--- a/tools/testing/selftests/rseq/rseq.c
+++ b/tools/testing/selftests/rseq/rseq.c
@@ -31,12 +31,22 @@
#include <sys/auxv.h>
#include <linux/auxvec.h>
+#include <linux/compiler.h>
+
#include "../kselftest.h"
#include "rseq.h"
-static const ptrdiff_t *libc_rseq_offset_p;
-static const unsigned int *libc_rseq_size_p;
-static const unsigned int *libc_rseq_flags_p;
+/*
+ * Define weak versions to play nice with binaries that are statically linked
+ * against a libc that doesn't support registering its own rseq.
+ */
+__weak ptrdiff_t __rseq_offset;
+__weak unsigned int __rseq_size;
+__weak unsigned int __rseq_flags;
+
+static const ptrdiff_t *libc_rseq_offset_p = &__rseq_offset;
+static const unsigned int *libc_rseq_size_p = &__rseq_size;
+static const unsigned int *libc_rseq_flags_p = &__rseq_flags;
/* Offset from the thread pointer to the rseq area. */
ptrdiff_t rseq_offset;
@@ -155,9 +165,17 @@ unsigned int get_rseq_feature_size(void)
static __attribute__((constructor))
void rseq_init(void)
{
- libc_rseq_offset_p = dlsym(RTLD_NEXT, "__rseq_offset");
- libc_rseq_size_p = dlsym(RTLD_NEXT, "__rseq_size");
- libc_rseq_flags_p = dlsym(RTLD_NEXT, "__rseq_flags");
+ /*
+ * If the libc's registered rseq size isn't already valid, it may be
+ * because the binary is dynamically linked and not necessarily due to
+ * libc not having registered a restartable sequence. Try to find the
+ * symbols if that's the case.
+ */
+ if (!*libc_rseq_size_p) {
+ libc_rseq_offset_p = dlsym(RTLD_NEXT, "__rseq_offset");
+ libc_rseq_size_p = dlsym(RTLD_NEXT, "__rseq_size");
+ libc_rseq_flags_p = dlsym(RTLD_NEXT, "__rseq_flags");
+ }
if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p &&
*libc_rseq_size_p != 0) {
/* rseq registration owned by glibc */
diff --git a/tools/testing/selftests/run_kselftest.sh b/tools/testing/selftests/run_kselftest.sh
index 97165a83df63..92743980e553 100755
--- a/tools/testing/selftests/run_kselftest.sh
+++ b/tools/testing/selftests/run_kselftest.sh
@@ -26,6 +26,7 @@ Usage: $0 [OPTIONS]
-l | --list List the available collection:test entries
-d | --dry-run Don't actually run any tests
-h | --help Show this usage info
+ -o | --override-timeout Number of seconds after which we timeout
EOF
exit $1
}
@@ -33,6 +34,7 @@ EOF
COLLECTIONS=""
TESTS=""
dryrun=""
+kselftest_override_timeout=""
while true; do
case "$1" in
-s | --summary)
@@ -51,6 +53,9 @@ while true; do
-d | --dry-run)
dryrun="echo"
shift ;;
+ -o | --override-timeout)
+ kselftest_override_timeout="$2"
+ shift 2 ;;
-h | --help)
usage 0 ;;
"")
@@ -85,7 +90,7 @@ if [ -n "$TESTS" ]; then
available="$(echo "$valid" | sed -e 's/ /\n/g')"
fi
-collections=$(echo "$available" | cut -d: -f1 | uniq)
+collections=$(echo "$available" | cut -d: -f1 | sort | uniq)
for collection in $collections ; do
[ -w /dev/kmsg ] && echo "kselftest: Running tests in $collection" >> /dev/kmsg
tests=$(echo "$available" | grep "^$collection:" | cut -d: -f2)
diff --git a/tools/testing/selftests/sysctl/sysctl.sh b/tools/testing/selftests/sysctl/sysctl.sh
index bfc54b422f25..444b2befda82 100755
--- a/tools/testing/selftests/sysctl/sysctl.sh
+++ b/tools/testing/selftests/sysctl/sysctl.sh
@@ -14,23 +14,27 @@ TEST_FILE=$(mktemp)
# This represents
#
-# TEST_ID:TEST_COUNT:ENABLED:TARGET
+# TEST_ID:TEST_COUNT:ENABLED:TARGET:SKIP_NO_TARGET
#
# TEST_ID: is the test id number
# TEST_COUNT: number of times we should run the test
# ENABLED: 1 if enabled, 0 otherwise
# TARGET: test target file required on the test_sysctl module
+# SKIP_NO_TARGET: 1 skip if TARGET not there
+# 0 run eventhough TARGET not there
#
# Once these are enabled please leave them as-is. Write your own test,
# we have tons of space.
-ALL_TESTS="0001:1:1:int_0001"
-ALL_TESTS="$ALL_TESTS 0002:1:1:string_0001"
-ALL_TESTS="$ALL_TESTS 0003:1:1:int_0002"
-ALL_TESTS="$ALL_TESTS 0004:1:1:uint_0001"
-ALL_TESTS="$ALL_TESTS 0005:3:1:int_0003"
-ALL_TESTS="$ALL_TESTS 0006:50:1:bitmap_0001"
-ALL_TESTS="$ALL_TESTS 0007:1:1:boot_int"
-ALL_TESTS="$ALL_TESTS 0008:1:1:match_int"
+ALL_TESTS="0001:1:1:int_0001:1"
+ALL_TESTS="$ALL_TESTS 0002:1:1:string_0001:1"
+ALL_TESTS="$ALL_TESTS 0003:1:1:int_0002:1"
+ALL_TESTS="$ALL_TESTS 0004:1:1:uint_0001:1"
+ALL_TESTS="$ALL_TESTS 0005:3:1:int_0003:1"
+ALL_TESTS="$ALL_TESTS 0006:50:1:bitmap_0001:1"
+ALL_TESTS="$ALL_TESTS 0007:1:1:boot_int:1"
+ALL_TESTS="$ALL_TESTS 0008:1:1:match_int:1"
+ALL_TESTS="$ALL_TESTS 0009:1:1:unregister_error:0"
+ALL_TESTS="$ALL_TESTS 0010:1:1:mnt/mnt_error:0"
function allow_user_defaults()
{
@@ -613,7 +617,6 @@ target_exists()
TEST_ID="$2"
if [ ! -f ${TARGET} ] ; then
- echo "Target for test $TEST_ID: $TARGET not exist, skipping test ..."
return 0
fi
return 1
@@ -730,7 +733,7 @@ sysctl_test_0005()
sysctl_test_0006()
{
- TARGET="${SYSCTL}/bitmap_0001"
+ TARGET="${SYSCTL}/$(get_test_target 0006)"
reset_vals
ORIG=""
run_bitmaptest
@@ -738,7 +741,7 @@ sysctl_test_0006()
sysctl_test_0007()
{
- TARGET="${SYSCTL}/boot_int"
+ TARGET="${SYSCTL}/$(get_test_target 0007)"
if [ ! -f $TARGET ]; then
echo "Skipping test for $TARGET as it is not present ..."
return $ksft_skip
@@ -778,7 +781,7 @@ sysctl_test_0007()
sysctl_test_0008()
{
- TARGET="${SYSCTL}/match_int"
+ TARGET="${SYSCTL}/$(get_test_target 0008)"
if [ ! -f $TARGET ]; then
echo "Skipping test for $TARGET as it is not present ..."
return $ksft_skip
@@ -797,6 +800,34 @@ sysctl_test_0008()
return 0
}
+sysctl_test_0009()
+{
+ TARGET="${SYSCTL}/$(get_test_target 0009)"
+ echo -n "Testing if $TARGET unregistered correctly ..."
+ if [ -d $TARGET ]; then
+ echo "TEST FAILED"
+ rc=1
+ test_rc
+ fi
+
+ echo "ok"
+ return 0
+}
+
+sysctl_test_0010()
+{
+ TARGET="${SYSCTL}/$(get_test_target 0010)"
+ echo -n "Testing that $TARGET was not created ..."
+ if [ -d $TARGET ]; then
+ echo "TEST FAILED"
+ rc=1
+ test_rc
+ fi
+
+ echo "ok"
+ return 0
+}
+
list_tests()
{
echo "Test ID list:"
@@ -813,6 +844,8 @@ list_tests()
echo "0006 x $(get_test_count 0006) - tests proc_do_large_bitmap()"
echo "0007 x $(get_test_count 0007) - tests setting sysctl from kernel boot param"
echo "0008 x $(get_test_count 0008) - tests sysctl macro values match"
+ echo "0009 x $(get_test_count 0009) - tests sysct unregister"
+ echo "0010 x $(get_test_count 0010) - tests sysct mount point"
}
usage()
@@ -857,38 +890,65 @@ function test_num()
usage
fi
}
+function remove_leading_zeros()
+{
+ echo $1 | sed 's/^0*//'
+}
function get_test_count()
{
test_num $1
- TEST_DATA=$(echo $ALL_TESTS | awk '{print $'$1'}')
+ awk_field=$(remove_leading_zeros $1)
+ TEST_DATA=$(echo $ALL_TESTS | awk '{print $'$awk_field'}')
echo ${TEST_DATA} | awk -F":" '{print $2}'
}
function get_test_enabled()
{
test_num $1
- TEST_DATA=$(echo $ALL_TESTS | awk '{print $'$1'}')
+ awk_field=$(remove_leading_zeros $1)
+ TEST_DATA=$(echo $ALL_TESTS | awk '{print $'$awk_field'}')
echo ${TEST_DATA} | awk -F":" '{print $3}'
}
function get_test_target()
{
test_num $1
- TEST_DATA=$(echo $ALL_TESTS | awk '{print $'$1'}')
+ awk_field=$(remove_leading_zeros $1)
+ TEST_DATA=$(echo $ALL_TESTS | awk '{print $'$awk_field'}')
echo ${TEST_DATA} | awk -F":" '{print $4}'
}
+function get_test_skip_no_target()
+{
+ test_num $1
+ awk_field=$(remove_leading_zeros $1)
+ TEST_DATA=$(echo $ALL_TESTS | awk '{print $'$awk_field'}')
+ echo ${TEST_DATA} | awk -F":" '{print $5}'
+}
+
+function skip_test()
+{
+ TEST_ID=$1
+ TEST_TARGET=$2
+ if target_exists $TEST_TARGET $TEST_ID; then
+ TEST_SKIP=$(get_test_skip_no_target $TEST_ID)
+ if [[ $TEST_SKIP -eq "1" ]]; then
+ echo "Target for test $TEST_ID: $TEST_TARGET not exist, skipping test ..."
+ return 0
+ fi
+ fi
+ return 1
+}
+
function run_all_tests()
{
for i in $ALL_TESTS ; do
- TEST_ID=${i%:*:*:*}
+ TEST_ID=${i%:*:*:*:*}
ENABLED=$(get_test_enabled $TEST_ID)
TEST_COUNT=$(get_test_count $TEST_ID)
TEST_TARGET=$(get_test_target $TEST_ID)
- if target_exists $TEST_TARGET $TEST_ID; then
- continue
- fi
+
if [[ $ENABLED -eq "1" ]]; then
test_case $TEST_ID $TEST_COUNT $TEST_TARGET
fi
@@ -923,18 +983,19 @@ function watch_case()
function test_case()
{
+ TEST_ID=$1
NUM_TESTS=$2
+ TARGET=$3
- i=0
-
- if target_exists $3 $1; then
- continue
+ if skip_test $TEST_ID $TARGET; then
+ return
fi
+ i=0
while [ $i -lt $NUM_TESTS ]; do
- test_num $1
- watch_log $i ${TEST_NAME}_test_$1 noclear
- RUN_TEST=${TEST_NAME}_test_$1
+ test_num $TEST_ID
+ watch_log $i ${TEST_NAME}_test_${TEST_ID} noclear
+ RUN_TEST=${TEST_NAME}_test_${TEST_ID}
$RUN_TEST
let i=$i+1
done
diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config
index 6e73b09c20c8..71706197ba0f 100644
--- a/tools/testing/selftests/tc-testing/config
+++ b/tools/testing/selftests/tc-testing/config
@@ -5,6 +5,8 @@ CONFIG_NF_CONNTRACK=m
CONFIG_NF_CONNTRACK_MARK=y
CONFIG_NF_CONNTRACK_ZONES=y
CONFIG_NF_CONNTRACK_LABELS=y
+CONFIG_NF_CONNTRACK_PROCFS=y
+CONFIG_NF_FLOW_TABLE=m
CONFIG_NF_NAT=m
CONFIG_NETFILTER_XT_TARGET_LOG=m
diff --git a/tools/testing/selftests/tc-testing/settings b/tools/testing/selftests/tc-testing/settings
new file mode 100644
index 000000000000..e2206265f67c
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/settings
@@ -0,0 +1 @@
+timeout=900
diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/filter.json b/tools/testing/selftests/tc-testing/tc-tests/infra/filter.json
new file mode 100644
index 000000000000..c4c778e83da2
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/infra/filter.json
@@ -0,0 +1,25 @@
+[
+ {
+ "id": "c2b4",
+ "name": "soft lockup alarm will be not generated after delete the prio 0 filter of the chain",
+ "category": [
+ "filter",
+ "chain"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true",
+ "$TC qdisc add dev $DUMMY root handle 1: htb default 1",
+ "$TC chain add dev $DUMMY",
+ "$TC filter del dev $DUMMY chain 0 parent 1: prio 0"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY chain 0 parent 1:",
+ "expExitCode": "2",
+ "verifyCmd": "$TC chain ls dev $DUMMY",
+ "matchPattern": "chain parent 1: chain 0",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY root handle 1: htb default 1",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json
index 44fbfc6caec7..e3d2de5c184f 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json
@@ -155,5 +155,28 @@
"teardown": [
"echo \"1\" > /sys/bus/netdevsim/del_device"
]
- }
+ },
+ {
+ "id": "0531",
+ "name": "Replace mq with invalid parent ID",
+ "category": [
+ "qdisc",
+ "mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 16\" > /sys/bus/netdevsim/new_device",
+ "$TC qdisc add dev $ETH root handle ffff: mq"
+ ],
+ "cmdUnderTest": "$TC qdisc replace dev $ETH parent ffff:fff1 handle ffff: mq",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent ffff",
+ "matchCount": "16",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ }
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/qfq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/qfq.json
index 147899a868d3..976dffda4654 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/qfq.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/qfq.json
@@ -213,5 +213,91 @@
"$TC qdisc del dev $DUMMY handle 1: root",
"$IP link del dev $DUMMY type dummy"
]
+ },
+ {
+ "id": "85ee",
+ "name": "QFQ with big MTU",
+ "category": [
+ "qdisc",
+ "qfq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true",
+ "$IP link set dev $DUMMY mtu 2147483647 || /bin/true",
+ "$TC qdisc add dev $DUMMY handle 1: root qfq"
+ ],
+ "cmdUnderTest": "$TC class add dev $DUMMY parent 1: classid 1:1 qfq weight 100",
+ "expExitCode": "2",
+ "verifyCmd": "$TC class show dev $DUMMY",
+ "matchPattern": "class qfq 1:",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "ddfa",
+ "name": "QFQ with small MTU",
+ "category": [
+ "qdisc",
+ "qfq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true",
+ "$IP link set dev $DUMMY mtu 256 || /bin/true",
+ "$TC qdisc add dev $DUMMY handle 1: root qfq"
+ ],
+ "cmdUnderTest": "$TC class add dev $DUMMY parent 1: classid 1:1 qfq weight 100",
+ "expExitCode": "2",
+ "verifyCmd": "$TC class show dev $DUMMY",
+ "matchPattern": "class qfq 1:",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "5993",
+ "name": "QFQ with stab overhead greater than max packet len",
+ "category": [
+ "qdisc",
+ "qfq",
+ "scapy"
+ ],
+ "plugins": {
+ "requires": [
+ "nsPlugin",
+ "scapyPlugin"
+ ]
+ },
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true",
+ "$IP link set dev $DUMMY up || /bin/true",
+ "$TC qdisc add dev $DUMMY handle 1: stab mtu 2048 tsize 512 mpu 0 overhead 999999999 linklayer ethernet root qfq",
+ "$TC class add dev $DUMMY parent 1: classid 1:1 qfq weight 100",
+ "$TC qdisc add dev $DEV1 clsact",
+ "$TC filter add dev $DEV1 ingress protocol ip flower dst_ip 1.3.3.7/32 action mirred egress mirror dev $DUMMY"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY parent 1: matchall classid 1:1",
+ "scapy": [
+ {
+ "iface": "$DEV0",
+ "count": 22,
+ "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='1.3.3.7')/TCP(sport=5000,dport=10)"
+ }
+ ],
+ "expExitCode": "0",
+ "verifyCmd": "$TC -s qdisc ls dev $DUMMY",
+ "matchPattern": "dropped 22",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root qfq"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json
index a44455372646..08d4861c2e78 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json
@@ -131,5 +131,30 @@
"teardown": [
"echo \"1\" > /sys/bus/netdevsim/del_device"
]
+ },
+ {
+ "id": "3e1e",
+ "name": "Add taprio Qdisc with an invalid cycle-time",
+ "category": [
+ "qdisc",
+ "taprio"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 8\" > /sys/bus/netdevsim/new_device",
+ "$TC qdisc add dev $ETH root handle 1: taprio num_tc 3 map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 queues 1@0 1@0 1@0 base-time 1000000000 sched-entry S 01 300000 flags 0x1 clockid CLOCK_TAI cycle-time 4294967296 || /bin/true",
+ "$IP link set dev $ETH up",
+ "$IP addr add 10.10.10.10/24 dev $ETH"
+ ],
+ "cmdUnderTest": "/bin/true",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc taprio 1: root refcnt",
+ "matchCount": "0",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
}
]
diff --git a/tools/testing/selftests/timers/raw_skew.c b/tools/testing/selftests/timers/raw_skew.c
index 5beceeed0d11..6eba203f9da7 100644
--- a/tools/testing/selftests/timers/raw_skew.c
+++ b/tools/testing/selftests/timers/raw_skew.c
@@ -129,8 +129,7 @@ int main(int argc, char **argv)
printf("%lld.%i(est)", eppm/1000, abs((int)(eppm%1000)));
/* Avg the two actual freq samples adjtimex gave us */
- ppm = (tx1.freq + tx2.freq) * 1000 / 2;
- ppm = (long long)tx1.freq * 1000;
+ ppm = (long long)(tx1.freq + tx2.freq) * 1000 / 2;
ppm = shift_right(ppm, 16);
printf(" %lld.%i(act)", ppm/1000, abs((int)(ppm%1000)));
diff --git a/tools/testing/selftests/tty/.gitignore b/tools/testing/selftests/tty/.gitignore
new file mode 100644
index 000000000000..fe70462a4aad
--- /dev/null
+++ b/tools/testing/selftests/tty/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+tty_tstamp_update
diff --git a/tools/testing/selftests/tty/Makefile b/tools/testing/selftests/tty/Makefile
new file mode 100644
index 000000000000..50d7027b2ae3
--- /dev/null
+++ b/tools/testing/selftests/tty/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS = -O2 -Wall
+TEST_GEN_PROGS := tty_tstamp_update
+
+include ../lib.mk
diff --git a/tools/testing/selftests/tty/tty_tstamp_update.c b/tools/testing/selftests/tty/tty_tstamp_update.c
new file mode 100644
index 000000000000..0ee97943dccc
--- /dev/null
+++ b/tools/testing/selftests/tty/tty_tstamp_update.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <linux/limits.h>
+
+#include "../kselftest.h"
+
+#define MIN_TTY_PATH_LEN 8
+
+static bool tty_valid(char *tty)
+{
+ if (strlen(tty) < MIN_TTY_PATH_LEN)
+ return false;
+
+ if (strncmp(tty, "/dev/tty", MIN_TTY_PATH_LEN) == 0 ||
+ strncmp(tty, "/dev/pts", MIN_TTY_PATH_LEN) == 0)
+ return true;
+
+ return false;
+}
+
+static int write_dev_tty(void)
+{
+ FILE *f;
+ int r = 0;
+
+ f = fopen("/dev/tty", "r+");
+ if (!f)
+ return -errno;
+
+ r = fprintf(f, "hello, world!\n");
+ if (r != strlen("hello, world!\n"))
+ r = -EIO;
+
+ fclose(f);
+ return r;
+}
+
+int main(int argc, char **argv)
+{
+ int r;
+ char tty[PATH_MAX] = {};
+ struct stat st1, st2;
+
+ ksft_print_header();
+ ksft_set_plan(1);
+
+ r = readlink("/proc/self/fd/0", tty, PATH_MAX);
+ if (r < 0)
+ ksft_exit_fail_msg("readlink on /proc/self/fd/0 failed: %m\n");
+
+ if (!tty_valid(tty))
+ ksft_exit_skip("invalid tty path '%s'\n", tty);
+
+ r = stat(tty, &st1);
+ if (r < 0)
+ ksft_exit_fail_msg("stat failed on tty path '%s': %m\n", tty);
+
+ /* We need to wait at least 8 seconds in order to observe timestamp change */
+ /* https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=fbf47635315ab308c9b58a1ea0906e711a9228de */
+ sleep(10);
+
+ r = write_dev_tty();
+ if (r < 0)
+ ksft_exit_fail_msg("failed to write to /dev/tty: %s\n",
+ strerror(-r));
+
+ r = stat(tty, &st2);
+ if (r < 0)
+ ksft_exit_fail_msg("stat failed on tty path '%s': %m\n", tty);
+
+ /* We wrote to the terminal so timestamps should have been updated */
+ if (st1.st_atim.tv_sec == st2.st_atim.tv_sec &&
+ st1.st_mtim.tv_sec == st2.st_mtim.tv_sec) {
+ ksft_test_result_fail("tty timestamps not updated\n");
+ ksft_exit_fail();
+ }
+
+ ksft_test_result_pass(
+ "timestamps of terminal '%s' updated after write to /dev/tty\n", tty);
+ return EXIT_SUCCESS;
+}
diff --git a/tools/testing/selftests/user_events/dyn_test.c b/tools/testing/selftests/user_events/dyn_test.c
index d6979a48478f..91a4444ad42b 100644
--- a/tools/testing/selftests/user_events/dyn_test.c
+++ b/tools/testing/selftests/user_events/dyn_test.c
@@ -217,6 +217,18 @@ TEST_F(user, matching) {
/* Types don't match */
TEST_NMATCH("__test_event u64 a; u64 b",
"__test_event u32 a; u32 b");
+
+ /* Struct name and size matches */
+ TEST_MATCH("__test_event struct my_struct a 20",
+ "__test_event struct my_struct a 20");
+
+ /* Struct name don't match */
+ TEST_NMATCH("__test_event struct my_struct a 20",
+ "__test_event struct my_struct b 20");
+
+ /* Struct size don't match */
+ TEST_NMATCH("__test_event struct my_struct a 20",
+ "__test_event struct my_struct a 21");
}
int main(int argc, char **argv)
diff --git a/tools/testing/selftests/user_events/ftrace_test.c b/tools/testing/selftests/user_events/ftrace_test.c
index eb6904d89f14..5beb0aef1d81 100644
--- a/tools/testing/selftests/user_events/ftrace_test.c
+++ b/tools/testing/selftests/user_events/ftrace_test.c
@@ -324,6 +324,10 @@ TEST_F(user, write_events) {
io[0].iov_base = &reg.write_index;
io[0].iov_len = sizeof(reg.write_index);
+ /* Write should return -EBADF when event is not enabled */
+ ASSERT_EQ(-1, writev(self->data_fd, (const struct iovec *)io, 3));
+ ASSERT_EQ(EBADF, errno);
+
/* Enable event */
self->enable_fd = open(enable_file, O_RDWR);
ASSERT_NE(-1, write(self->enable_fd, "1", sizeof("1")))
@@ -400,6 +404,10 @@ TEST_F(user, write_fault) {
ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, &reg));
ASSERT_EQ(0, reg.write_index);
+ /* Enable event */
+ self->enable_fd = open(enable_file, O_RDWR);
+ ASSERT_NE(-1, write(self->enable_fd, "1", sizeof("1")))
+
/* Write should work normally */
ASSERT_NE(-1, writev(self->data_fd, (const struct iovec *)io, 2));
diff --git a/tools/testing/selftests/vDSO/vdso_test_clock_getres.c b/tools/testing/selftests/vDSO/vdso_test_clock_getres.c
index 15dcee16ff72..38d46a8bf7cb 100644
--- a/tools/testing/selftests/vDSO/vdso_test_clock_getres.c
+++ b/tools/testing/selftests/vDSO/vdso_test_clock_getres.c
@@ -84,12 +84,12 @@ static inline int vdso_test_clock(unsigned int clock_id)
int main(int argc, char **argv)
{
- int ret;
+ int ret = 0;
#if _POSIX_TIMERS > 0
#ifdef CLOCK_REALTIME
- ret = vdso_test_clock(CLOCK_REALTIME);
+ ret += vdso_test_clock(CLOCK_REALTIME);
#endif
#ifdef CLOCK_BOOTTIME
diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh
index 69c7796c7ca9..405ff262ca93 100755
--- a/tools/testing/selftests/wireguard/netns.sh
+++ b/tools/testing/selftests/wireguard/netns.sh
@@ -514,10 +514,32 @@ n2 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/all/rp_filter'
n1 ping -W 1 -c 1 192.168.241.2
[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.3:1" ]]
-ip1 link del veth1
-ip1 link del veth3
-ip1 link del wg0
-ip2 link del wg0
+ip1 link del dev veth3
+ip1 link del dev wg0
+ip2 link del dev wg0
+
+# Make sure persistent keep alives are sent when an adapter comes up
+ip1 link add dev wg0 type wireguard
+n1 wg set wg0 private-key <(echo "$key1") peer "$pub2" endpoint 10.0.0.1:1 persistent-keepalive 1
+read _ _ tx_bytes < <(n1 wg show wg0 transfer)
+[[ $tx_bytes -eq 0 ]]
+ip1 link set dev wg0 up
+read _ _ tx_bytes < <(n1 wg show wg0 transfer)
+[[ $tx_bytes -gt 0 ]]
+ip1 link del dev wg0
+# This should also happen even if the private key is set later
+ip1 link add dev wg0 type wireguard
+n1 wg set wg0 peer "$pub2" endpoint 10.0.0.1:1 persistent-keepalive 1
+read _ _ tx_bytes < <(n1 wg show wg0 transfer)
+[[ $tx_bytes -eq 0 ]]
+ip1 link set dev wg0 up
+read _ _ tx_bytes < <(n1 wg show wg0 transfer)
+[[ $tx_bytes -eq 0 ]]
+n1 wg set wg0 private-key <(echo "$key1")
+read _ _ tx_bytes < <(n1 wg show wg0 transfer)
+[[ $tx_bytes -gt 0 ]]
+ip1 link del dev veth1
+ip1 link del dev wg0
# We test that Netlink/IPC is working properly by doing things that usually cause split responses
ip0 link add dev wg0 type wireguard
diff --git a/tools/testing/vsock/Makefile b/tools/testing/vsock/Makefile
index 43a254f0e14d..21a98ba565ab 100644
--- a/tools/testing/vsock/Makefile
+++ b/tools/testing/vsock/Makefile
@@ -8,5 +8,5 @@ vsock_perf: vsock_perf.o
CFLAGS += -g -O2 -Werror -Wall -I. -I../../include -I../../../usr/include -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE -D_GNU_SOURCE
.PHONY: all test clean
clean:
- ${RM} *.o *.d vsock_test vsock_diag_test
+ ${RM} *.o *.d vsock_test vsock_diag_test vsock_perf
-include *.d
diff --git a/tools/tracing/rtla/src/osnoise.c b/tools/tracing/rtla/src/osnoise.c
index 3ca7a3853943..245e9344932b 100644
--- a/tools/tracing/rtla/src/osnoise.c
+++ b/tools/tracing/rtla/src/osnoise.c
@@ -841,6 +841,67 @@ static void osnoise_put_irq_disable(struct osnoise_context *context)
context->orig_opt_irq_disable = OSNOISE_OPTION_INIT_VAL;
}
+static int osnoise_get_workload(struct osnoise_context *context)
+{
+ if (context->opt_workload != OSNOISE_OPTION_INIT_VAL)
+ return context->opt_workload;
+
+ if (context->orig_opt_workload != OSNOISE_OPTION_INIT_VAL)
+ return context->orig_opt_workload;
+
+ context->orig_opt_workload = osnoise_options_get_option("OSNOISE_WORKLOAD");
+
+ return context->orig_opt_workload;
+}
+
+int osnoise_set_workload(struct osnoise_context *context, bool onoff)
+{
+ int opt_workload = osnoise_get_workload(context);
+ int retval;
+
+ if (opt_workload == OSNOISE_OPTION_INIT_VAL)
+ return -1;
+
+ if (opt_workload == onoff)
+ return 0;
+
+ retval = osnoise_options_set_option("OSNOISE_WORKLOAD", onoff);
+ if (retval < 0)
+ return -1;
+
+ context->opt_workload = onoff;
+
+ return 0;
+}
+
+static void osnoise_restore_workload(struct osnoise_context *context)
+{
+ int retval;
+
+ if (context->orig_opt_workload == OSNOISE_OPTION_INIT_VAL)
+ return;
+
+ if (context->orig_opt_workload == context->opt_workload)
+ goto out_done;
+
+ retval = osnoise_options_set_option("OSNOISE_WORKLOAD", context->orig_opt_workload);
+ if (retval < 0)
+ err_msg("Could not restore original OSNOISE_WORKLOAD option\n");
+
+out_done:
+ context->orig_opt_workload = OSNOISE_OPTION_INIT_VAL;
+}
+
+static void osnoise_put_workload(struct osnoise_context *context)
+{
+ osnoise_restore_workload(context);
+
+ if (context->orig_opt_workload == OSNOISE_OPTION_INIT_VAL)
+ return;
+
+ context->orig_opt_workload = OSNOISE_OPTION_INIT_VAL;
+}
+
/*
* enable_osnoise - enable osnoise tracer in the trace_instance
*/
@@ -908,6 +969,9 @@ struct osnoise_context *osnoise_context_alloc(void)
context->orig_opt_irq_disable = OSNOISE_OPTION_INIT_VAL;
context->opt_irq_disable = OSNOISE_OPTION_INIT_VAL;
+ context->orig_opt_workload = OSNOISE_OPTION_INIT_VAL;
+ context->opt_workload = OSNOISE_OPTION_INIT_VAL;
+
osnoise_get_context(context);
return context;
@@ -935,6 +999,7 @@ void osnoise_put_context(struct osnoise_context *context)
osnoise_put_print_stack(context);
osnoise_put_tracing_thresh(context);
osnoise_put_irq_disable(context);
+ osnoise_put_workload(context);
free(context);
}
diff --git a/tools/tracing/rtla/src/osnoise.h b/tools/tracing/rtla/src/osnoise.h
index 4dcf22ccd704..555f4f4903cc 100644
--- a/tools/tracing/rtla/src/osnoise.h
+++ b/tools/tracing/rtla/src/osnoise.h
@@ -42,6 +42,10 @@ struct osnoise_context {
/* -1 as init value because 0 is off */
int orig_opt_irq_disable;
int opt_irq_disable;
+
+ /* -1 as init value because 0 is off */
+ int orig_opt_workload;
+ int opt_workload;
};
/*
@@ -84,6 +88,7 @@ int osnoise_set_print_stack(struct osnoise_context *context,
long long print_stack);
int osnoise_set_irq_disable(struct osnoise_context *context, bool onoff);
+int osnoise_set_workload(struct osnoise_context *context, bool onoff);
/*
* osnoise_tool - osnoise based tool definition.
diff --git a/tools/tracing/rtla/src/osnoise_hist.c b/tools/tracing/rtla/src/osnoise_hist.c
index 13e1233690bb..8f81fa007364 100644
--- a/tools/tracing/rtla/src/osnoise_hist.c
+++ b/tools/tracing/rtla/src/osnoise_hist.c
@@ -3,6 +3,7 @@
* Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
*/
+#define _GNU_SOURCE
#include <getopt.h>
#include <stdlib.h>
#include <string.h>
@@ -11,14 +12,16 @@
#include <errno.h>
#include <stdio.h>
#include <time.h>
+#include <sched.h>
#include "utils.h"
#include "osnoise.h"
struct osnoise_hist_params {
char *cpus;
- char *monitored_cpus;
+ cpu_set_t monitored_cpus;
char *trace_output;
+ char *cgroup_name;
unsigned long long runtime;
unsigned long long period;
long long threshold;
@@ -28,6 +31,9 @@ struct osnoise_hist_params {
int duration;
int set_sched;
int output_divisor;
+ int cgroup;
+ int hk_cpus;
+ cpu_set_t hk_cpu_set;
struct sched_attr sched_param;
struct trace_events *events;
@@ -268,7 +274,7 @@ static void osnoise_hist_header(struct osnoise_tool *tool)
trace_seq_printf(s, "Index");
for (cpu = 0; cpu < data->nr_cpus; cpu++) {
- if (params->cpus && !params->monitored_cpus[cpu])
+ if (params->cpus && !CPU_ISSET(cpu, &params->monitored_cpus))
continue;
if (!data->hist[cpu].count)
@@ -299,7 +305,7 @@ osnoise_print_summary(struct osnoise_hist_params *params,
trace_seq_printf(trace->seq, "count:");
for (cpu = 0; cpu < data->nr_cpus; cpu++) {
- if (params->cpus && !params->monitored_cpus[cpu])
+ if (params->cpus && !CPU_ISSET(cpu, &params->monitored_cpus))
continue;
if (!data->hist[cpu].count)
@@ -313,7 +319,7 @@ osnoise_print_summary(struct osnoise_hist_params *params,
trace_seq_printf(trace->seq, "min: ");
for (cpu = 0; cpu < data->nr_cpus; cpu++) {
- if (params->cpus && !params->monitored_cpus[cpu])
+ if (params->cpus && !CPU_ISSET(cpu, &params->monitored_cpus))
continue;
if (!data->hist[cpu].count)
@@ -328,7 +334,7 @@ osnoise_print_summary(struct osnoise_hist_params *params,
trace_seq_printf(trace->seq, "avg: ");
for (cpu = 0; cpu < data->nr_cpus; cpu++) {
- if (params->cpus && !params->monitored_cpus[cpu])
+ if (params->cpus && !CPU_ISSET(cpu, &params->monitored_cpus))
continue;
if (!data->hist[cpu].count)
@@ -346,7 +352,7 @@ osnoise_print_summary(struct osnoise_hist_params *params,
trace_seq_printf(trace->seq, "max: ");
for (cpu = 0; cpu < data->nr_cpus; cpu++) {
- if (params->cpus && !params->monitored_cpus[cpu])
+ if (params->cpus && !CPU_ISSET(cpu, &params->monitored_cpus))
continue;
if (!data->hist[cpu].count)
@@ -381,7 +387,7 @@ osnoise_print_stats(struct osnoise_hist_params *params, struct osnoise_tool *too
bucket * data->bucket_size);
for (cpu = 0; cpu < data->nr_cpus; cpu++) {
- if (params->cpus && !params->monitored_cpus[cpu])
+ if (params->cpus && !CPU_ISSET(cpu, &params->monitored_cpus))
continue;
if (!data->hist[cpu].count)
@@ -405,7 +411,7 @@ osnoise_print_stats(struct osnoise_hist_params *params, struct osnoise_tool *too
trace_seq_printf(trace->seq, "over: ");
for (cpu = 0; cpu < data->nr_cpus; cpu++) {
- if (params->cpus && !params->monitored_cpus[cpu])
+ if (params->cpus && !CPU_ISSET(cpu, &params->monitored_cpus))
continue;
if (!data->hist[cpu].count)
@@ -432,8 +438,8 @@ static void osnoise_hist_usage(char *usage)
"",
" usage: rtla osnoise hist [-h] [-D] [-d s] [-a us] [-p us] [-r us] [-s us] [-S us] \\",
" [-T us] [-t[=file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] \\",
- " [-c cpu-list] [-P priority] [-b N] [-E N] [--no-header] [--no-summary] [--no-index] \\",
- " [--with-zeros]",
+ " [-c cpu-list] [-H cpu-list] [-P priority] [-b N] [-E N] [--no-header] [--no-summary] \\",
+ " [--no-index] [--with-zeros] [-C[=cgroup_name]]",
"",
" -h/--help: print this menu",
" -a/--auto: set automatic trace mode, stopping the session if argument in us sample is hit",
@@ -443,6 +449,8 @@ static void osnoise_hist_usage(char *usage)
" -S/--stop-total us: stop trace if the total sample is higher than the argument in us",
" -T/--threshold us: the minimum delta to be considered a noise",
" -c/--cpus cpu-list: list of cpus to run osnoise threads",
+ " -H/--house-keeping cpus: run rtla control threads only on the given cpus",
+ " -C/--cgroup[=cgroup_name]: set cgroup, if no cgroup_name is passed, the rtla's cgroup will be inherited",
" -d/--duration time[s|m|h|d]: duration of the session",
" -D/--debug: print debug info",
" -t/--trace[=file]: save the stopped trace to [file|osnoise_trace.txt]",
@@ -501,8 +509,10 @@ static struct osnoise_hist_params
{"bucket-size", required_argument, 0, 'b'},
{"entries", required_argument, 0, 'E'},
{"cpus", required_argument, 0, 'c'},
+ {"cgroup", optional_argument, 0, 'C'},
{"debug", no_argument, 0, 'D'},
{"duration", required_argument, 0, 'd'},
+ {"house-keeping", required_argument, 0, 'H'},
{"help", no_argument, 0, 'h'},
{"period", required_argument, 0, 'p'},
{"priority", required_argument, 0, 'P'},
@@ -524,7 +534,7 @@ static struct osnoise_hist_params
/* getopt_long stores the option index here. */
int option_index = 0;
- c = getopt_long(argc, argv, "a:c:b:d:e:E:Dhp:P:r:s:S:t::T:01234:5:",
+ c = getopt_long(argc, argv, "a:c:C::b:d:e:E:DhH:p:P:r:s:S:t::T:01234:5:",
long_options, &option_index);
/* detect the end of the options. */
@@ -549,11 +559,21 @@ static struct osnoise_hist_params
osnoise_hist_usage("Bucket size needs to be > 0 and <= 1000000\n");
break;
case 'c':
- retval = parse_cpu_list(optarg, &params->monitored_cpus);
+ retval = parse_cpu_set(optarg, &params->monitored_cpus);
if (retval)
osnoise_hist_usage("\nInvalid -c cpu list\n");
params->cpus = optarg;
break;
+ case 'C':
+ params->cgroup = 1;
+ if (!optarg) {
+ /* will inherit this cgroup */
+ params->cgroup_name = NULL;
+ } else if (*optarg == '=') {
+ /* skip the = */
+ params->cgroup_name = ++optarg;
+ }
+ break;
case 'D':
config_debug = 1;
break;
@@ -583,6 +603,14 @@ static struct osnoise_hist_params
case '?':
osnoise_hist_usage(NULL);
break;
+ case 'H':
+ params->hk_cpus = 1;
+ retval = parse_cpu_set(optarg, &params->hk_cpu_set);
+ if (retval) {
+ err_msg("Error parsing house keeping CPUs\n");
+ exit(EXIT_FAILURE);
+ }
+ break;
case 'p':
params->period = get_llong_from_str(optarg);
if (params->period > 10000000)
@@ -718,6 +746,24 @@ osnoise_hist_apply_config(struct osnoise_tool *tool, struct osnoise_hist_params
}
}
+ if (params->hk_cpus) {
+ retval = sched_setaffinity(getpid(), sizeof(params->hk_cpu_set),
+ &params->hk_cpu_set);
+ if (retval == -1) {
+ err_msg("Failed to set rtla to the house keeping CPUs\n");
+ goto out_err;
+ }
+ } else if (params->cpus) {
+ /*
+ * Even if the user do not set a house-keeping CPU, try to
+ * move rtla to a CPU set different to the one where the user
+ * set the workload to run.
+ *
+ * No need to check results as this is an automatic attempt.
+ */
+ auto_house_keeping(&params->monitored_cpus);
+ }
+
return 0;
out_err:
@@ -816,7 +862,13 @@ int osnoise_hist_main(int argc, char *argv[])
}
}
- trace_instance_start(trace);
+ if (params->cgroup) {
+ retval = set_comm_cgroup("timerlat/", params->cgroup_name);
+ if (!retval) {
+ err_msg("Failed to move threads to cgroup\n");
+ goto out_free;
+ }
+ }
if (params->trace_output) {
record = osnoise_init_trace_tool("osnoise");
@@ -831,9 +883,19 @@ int osnoise_hist_main(int argc, char *argv[])
goto out_hist;
}
- trace_instance_start(&record->trace);
}
+ /*
+ * Start the tracer here, after having set all instances.
+ *
+ * Let the trace instance start first for the case of hitting a stop
+ * tracing while enabling other instances. The trace instance is the
+ * one with most valuable information.
+ */
+ if (params->trace_output)
+ trace_instance_start(&record->trace);
+ trace_instance_start(trace);
+
tool->start_time = time(NULL);
osnoise_hist_set_signals(params);
diff --git a/tools/tracing/rtla/src/osnoise_top.c b/tools/tracing/rtla/src/osnoise_top.c
index 562f2e4b18c5..f7c959be8677 100644
--- a/tools/tracing/rtla/src/osnoise_top.c
+++ b/tools/tracing/rtla/src/osnoise_top.c
@@ -3,6 +3,7 @@
* Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
*/
+#define _GNU_SOURCE
#include <getopt.h>
#include <stdlib.h>
#include <string.h>
@@ -10,6 +11,7 @@
#include <unistd.h>
#include <stdio.h>
#include <time.h>
+#include <sched.h>
#include "osnoise.h"
#include "utils.h"
@@ -24,8 +26,9 @@ enum osnoise_mode {
*/
struct osnoise_top_params {
char *cpus;
- char *monitored_cpus;
+ cpu_set_t monitored_cpus;
char *trace_output;
+ char *cgroup_name;
unsigned long long runtime;
unsigned long long period;
long long threshold;
@@ -35,6 +38,9 @@ struct osnoise_top_params {
int duration;
int quiet;
int set_sched;
+ int cgroup;
+ int hk_cpus;
+ cpu_set_t hk_cpu_set;
struct sched_attr sched_param;
struct trace_events *events;
enum osnoise_mode mode;
@@ -257,7 +263,7 @@ osnoise_print_stats(struct osnoise_top_params *params, struct osnoise_tool *top)
osnoise_top_header(top);
for (i = 0; i < nr_cpus; i++) {
- if (params->cpus && !params->monitored_cpus[i])
+ if (params->cpus && !CPU_ISSET(i, &params->monitored_cpus))
continue;
osnoise_top_print(top, i);
}
@@ -276,7 +282,7 @@ static void osnoise_top_usage(struct osnoise_top_params *params, char *usage)
static const char * const msg[] = {
" [-h] [-q] [-D] [-d s] [-a us] [-p us] [-r us] [-s us] [-S us] \\",
" [-T us] [-t[=file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] \\",
- " [-c cpu-list] [-P priority]",
+ " [-c cpu-list] [-H cpu-list] [-P priority] [-C[=cgroup_name]]",
"",
" -h/--help: print this menu",
" -a/--auto: set automatic trace mode, stopping the session if argument in us sample is hit",
@@ -286,6 +292,8 @@ static void osnoise_top_usage(struct osnoise_top_params *params, char *usage)
" -S/--stop-total us: stop trace if the total sample is higher than the argument in us",
" -T/--threshold us: the minimum delta to be considered a noise",
" -c/--cpus cpu-list: list of cpus to run osnoise threads",
+ " -H/--house-keeping cpus: run rtla control threads only on the given cpus",
+ " -C/--cgroup[=cgroup_name]: set cgroup, if no cgroup_name is passed, the rtla's cgroup will be inherited",
" -d/--duration time[s|m|h|d]: duration of the session",
" -D/--debug: print debug info",
" -t/--trace[=file]: save the stopped trace to [file|osnoise_trace.txt]",
@@ -340,16 +348,24 @@ struct osnoise_top_params *osnoise_top_parse_args(int argc, char **argv)
if (!params)
exit(1);
- if (strcmp(argv[0], "hwnoise") == 0)
+ if (strcmp(argv[0], "hwnoise") == 0) {
params->mode = MODE_HWNOISE;
+ /*
+ * Reduce CPU usage for 75% to avoid killing the system.
+ */
+ params->runtime = 750000;
+ params->period = 1000000;
+ }
while (1) {
static struct option long_options[] = {
{"auto", required_argument, 0, 'a'},
{"cpus", required_argument, 0, 'c'},
+ {"cgroup", optional_argument, 0, 'C'},
{"debug", no_argument, 0, 'D'},
{"duration", required_argument, 0, 'd'},
{"event", required_argument, 0, 'e'},
+ {"house-keeping", required_argument, 0, 'H'},
{"help", no_argument, 0, 'h'},
{"period", required_argument, 0, 'p'},
{"priority", required_argument, 0, 'P'},
@@ -367,7 +383,7 @@ struct osnoise_top_params *osnoise_top_parse_args(int argc, char **argv)
/* getopt_long stores the option index here. */
int option_index = 0;
- c = getopt_long(argc, argv, "a:c:d:De:hp:P:qr:s:S:t::T:0:1:",
+ c = getopt_long(argc, argv, "a:c:C::d:De:hH:p:P:qr:s:S:t::T:0:1:",
long_options, &option_index);
/* Detect the end of the options. */
@@ -387,11 +403,21 @@ struct osnoise_top_params *osnoise_top_parse_args(int argc, char **argv)
break;
case 'c':
- retval = parse_cpu_list(optarg, &params->monitored_cpus);
+ retval = parse_cpu_set(optarg, &params->monitored_cpus);
if (retval)
osnoise_top_usage(params, "\nInvalid -c cpu list\n");
params->cpus = optarg;
break;
+ case 'C':
+ params->cgroup = 1;
+ if (!optarg) {
+ /* will inherit this cgroup */
+ params->cgroup_name = NULL;
+ } else if (*optarg == '=') {
+ /* skip the = */
+ params->cgroup_name = ++optarg;
+ }
+ break;
case 'D':
config_debug = 1;
break;
@@ -416,6 +442,14 @@ struct osnoise_top_params *osnoise_top_parse_args(int argc, char **argv)
case '?':
osnoise_top_usage(params, NULL);
break;
+ case 'H':
+ params->hk_cpus = 1;
+ retval = parse_cpu_set(optarg, &params->hk_cpu_set);
+ if (retval) {
+ err_msg("Error parsing house keeping CPUs\n");
+ exit(EXIT_FAILURE);
+ }
+ break;
case 'p':
params->period = get_llong_from_str(optarg);
if (params->period > 10000000)
@@ -547,6 +581,24 @@ osnoise_top_apply_config(struct osnoise_tool *tool, struct osnoise_top_params *p
}
}
+ if (params->hk_cpus) {
+ retval = sched_setaffinity(getpid(), sizeof(params->hk_cpu_set),
+ &params->hk_cpu_set);
+ if (retval == -1) {
+ err_msg("Failed to set rtla to the house keeping CPUs\n");
+ goto out_err;
+ }
+ } else if (params->cpus) {
+ /*
+ * Even if the user do not set a house-keeping CPU, try to
+ * move rtla to a CPU set different to the one where the user
+ * set the workload to run.
+ *
+ * No need to check results as this is an automatic attempt.
+ */
+ auto_house_keeping(&params->monitored_cpus);
+ }
+
return 0;
out_err:
@@ -643,7 +695,13 @@ int osnoise_top_main(int argc, char **argv)
}
}
- trace_instance_start(trace);
+ if (params->cgroup) {
+ retval = set_comm_cgroup("osnoise/", params->cgroup_name);
+ if (!retval) {
+ err_msg("Failed to move threads to cgroup\n");
+ goto out_free;
+ }
+ }
if (params->trace_output) {
record = osnoise_init_trace_tool("osnoise");
@@ -657,9 +715,18 @@ int osnoise_top_main(int argc, char **argv)
if (retval)
goto out_top;
}
+ }
+ /*
+ * Start the tracer here, after having set all instances.
+ *
+ * Let the trace instance start first for the case of hitting a stop
+ * tracing while enabling other instances. The trace instance is the
+ * one with most valuable information.
+ */
+ if (params->trace_output)
trace_instance_start(&record->trace);
- }
+ trace_instance_start(trace);
tool->start_time = time(NULL);
osnoise_top_set_signals(params);
diff --git a/tools/tracing/rtla/src/timerlat_aa.c b/tools/tracing/rtla/src/timerlat_aa.c
index 1843fff66da5..e0ffe69c271c 100644
--- a/tools/tracing/rtla/src/timerlat_aa.c
+++ b/tools/tracing/rtla/src/timerlat_aa.c
@@ -8,6 +8,7 @@
#include "utils.h"
#include "osnoise.h"
#include "timerlat.h"
+#include <unistd.h>
enum timelat_state {
TIMERLAT_INIT = 0,
@@ -233,7 +234,7 @@ static int timerlat_aa_thread_latency(struct timerlat_aa_data *taa_data,
*
* Returns 0 on success, -1 otherwise.
*/
-int timerlat_aa_handler(struct trace_seq *s, struct tep_record *record,
+static int timerlat_aa_handler(struct trace_seq *s, struct tep_record *record,
struct tep_event *event, void *context)
{
struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
@@ -665,6 +666,25 @@ print_total:
ns_to_usf(total));
}
+static int timerlat_auto_analysis_collect_trace(struct timerlat_aa_context *taa_ctx)
+{
+ struct trace_instance *trace = &taa_ctx->tool->trace;
+ int retval;
+
+ retval = tracefs_iterate_raw_events(trace->tep,
+ trace->inst,
+ NULL,
+ 0,
+ collect_registered_events,
+ trace);
+ if (retval < 0) {
+ err_msg("Error iterating on events\n");
+ return 0;
+ }
+
+ return 1;
+}
+
/**
* timerlat_auto_analysis - Analyze the collected data
*/
@@ -677,6 +697,8 @@ void timerlat_auto_analysis(int irq_thresh, int thread_thresh)
struct tep_handle *tep;
int cpu;
+ timerlat_auto_analysis_collect_trace(taa_ctx);
+
/* bring stop tracing to the ns scale */
irq_thresh = irq_thresh * 1000;
thread_thresh = thread_thresh * 1000;
@@ -838,6 +860,10 @@ out_err:
*/
static void timerlat_aa_unregister_events(struct osnoise_tool *tool, int dump_tasks)
{
+
+ tep_unregister_event_handler(tool->trace.tep, -1, "ftrace", "timerlat",
+ timerlat_aa_handler, tool);
+
tracefs_event_disable(tool->trace.inst, "osnoise", NULL);
tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "nmi_noise",
@@ -875,6 +901,10 @@ static int timerlat_aa_register_events(struct osnoise_tool *tool, int dump_tasks
{
int retval;
+ tep_register_event_handler(tool->trace.tep, -1, "ftrace", "timerlat",
+ timerlat_aa_handler, tool);
+
+
/*
* register auto-analysis handlers.
*/
@@ -955,8 +985,9 @@ out_ctx:
*
* Returns 0 on success, -1 otherwise.
*/
-int timerlat_aa_init(struct osnoise_tool *tool, int nr_cpus, int dump_tasks)
+int timerlat_aa_init(struct osnoise_tool *tool, int dump_tasks)
{
+ int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
struct timerlat_aa_context *taa_ctx;
int retval;
diff --git a/tools/tracing/rtla/src/timerlat_aa.h b/tools/tracing/rtla/src/timerlat_aa.h
index d4f6ca7e342a..cea4bb1531a8 100644
--- a/tools/tracing/rtla/src/timerlat_aa.h
+++ b/tools/tracing/rtla/src/timerlat_aa.h
@@ -3,10 +3,7 @@
* Copyright (C) 2023 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
*/
-int timerlat_aa_init(struct osnoise_tool *tool, int nr_cpus, int dump_task);
+int timerlat_aa_init(struct osnoise_tool *tool, int dump_task);
void timerlat_aa_destroy(void);
-int timerlat_aa_handler(struct trace_seq *s, struct tep_record *record,
- struct tep_event *event, void *context);
-
void timerlat_auto_analysis(int irq_thresh, int thread_thresh);
diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c
index 4b48af8a8309..47d3d8b53cb2 100644
--- a/tools/tracing/rtla/src/timerlat_hist.c
+++ b/tools/tracing/rtla/src/timerlat_hist.c
@@ -3,6 +3,7 @@
* Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
*/
+#define _GNU_SOURCE
#include <getopt.h>
#include <stdlib.h>
#include <string.h>
@@ -10,15 +11,20 @@
#include <unistd.h>
#include <stdio.h>
#include <time.h>
+#include <sched.h>
+#include <pthread.h>
#include "utils.h"
#include "osnoise.h"
#include "timerlat.h"
+#include "timerlat_aa.h"
+#include "timerlat_u.h"
struct timerlat_hist_params {
char *cpus;
- char *monitored_cpus;
+ cpu_set_t monitored_cpus;
char *trace_output;
+ char *cgroup_name;
unsigned long long runtime;
long long stop_us;
long long stop_total_us;
@@ -29,9 +35,14 @@ struct timerlat_hist_params {
int duration;
int set_sched;
int dma_latency;
+ int cgroup;
+ int hk_cpus;
+ int no_aa;
+ int dump_tasks;
+ int user_hist;
+ cpu_set_t hk_cpu_set;
struct sched_attr sched_param;
struct trace_events *events;
-
char no_irq;
char no_thread;
char no_header;
@@ -45,9 +56,11 @@ struct timerlat_hist_params {
struct timerlat_hist_cpu {
int *irq;
int *thread;
+ int *user;
int irq_count;
int thread_count;
+ int user_count;
unsigned long long min_irq;
unsigned long long sum_irq;
@@ -56,6 +69,10 @@ struct timerlat_hist_cpu {
unsigned long long min_thread;
unsigned long long sum_thread;
unsigned long long max_thread;
+
+ unsigned long long min_user;
+ unsigned long long sum_user;
+ unsigned long long max_user;
};
struct timerlat_hist_data {
@@ -80,6 +97,10 @@ timerlat_free_histogram(struct timerlat_hist_data *data)
if (data->hist[cpu].thread)
free(data->hist[cpu].thread);
+
+ if (data->hist[cpu].user)
+ free(data->hist[cpu].user);
+
}
/* one set of histograms per CPU */
@@ -116,15 +137,21 @@ static struct timerlat_hist_data
data->hist[cpu].irq = calloc(1, sizeof(*data->hist->irq) * (entries + 1));
if (!data->hist[cpu].irq)
goto cleanup;
+
data->hist[cpu].thread = calloc(1, sizeof(*data->hist->thread) * (entries + 1));
if (!data->hist[cpu].thread)
goto cleanup;
+
+ data->hist[cpu].user = calloc(1, sizeof(*data->hist->user) * (entries + 1));
+ if (!data->hist[cpu].user)
+ goto cleanup;
}
/* set the min to max */
for (cpu = 0; cpu < nr_cpus; cpu++) {
data->hist[cpu].min_irq = ~0;
data->hist[cpu].min_thread = ~0;
+ data->hist[cpu].min_user = ~0;
}
return data;
@@ -139,7 +166,7 @@ cleanup:
*/
static void
timerlat_hist_update(struct osnoise_tool *tool, int cpu,
- unsigned long long thread,
+ unsigned long long context,
unsigned long long latency)
{
struct timerlat_hist_params *params = tool->params;
@@ -154,18 +181,24 @@ timerlat_hist_update(struct osnoise_tool *tool, int cpu,
if (data->bucket_size)
bucket = latency / data->bucket_size;
- if (!thread) {
+ if (!context) {
hist = data->hist[cpu].irq;
data->hist[cpu].irq_count++;
update_min(&data->hist[cpu].min_irq, &latency);
update_sum(&data->hist[cpu].sum_irq, &latency);
update_max(&data->hist[cpu].max_irq, &latency);
- } else {
+ } else if (context == 1) {
hist = data->hist[cpu].thread;
data->hist[cpu].thread_count++;
update_min(&data->hist[cpu].min_thread, &latency);
update_sum(&data->hist[cpu].sum_thread, &latency);
update_max(&data->hist[cpu].max_thread, &latency);
+ } else { /* user */
+ hist = data->hist[cpu].user;
+ data->hist[cpu].user_count++;
+ update_min(&data->hist[cpu].min_user, &latency);
+ update_sum(&data->hist[cpu].sum_user, &latency);
+ update_max(&data->hist[cpu].max_user, &latency);
}
if (bucket < entries)
@@ -182,16 +215,16 @@ timerlat_hist_handler(struct trace_seq *s, struct tep_record *record,
struct tep_event *event, void *data)
{
struct trace_instance *trace = data;
- unsigned long long thread, latency;
+ unsigned long long context, latency;
struct osnoise_tool *tool;
int cpu = record->cpu;
tool = container_of(trace, struct osnoise_tool, trace);
- tep_get_field_val(s, event, "context", record, &thread, 1);
+ tep_get_field_val(s, event, "context", record, &context, 1);
tep_get_field_val(s, event, "timer_latency", record, &latency, 1);
- timerlat_hist_update(tool, cpu, thread, latency);
+ timerlat_hist_update(tool, cpu, context, latency);
return 0;
}
@@ -222,7 +255,7 @@ static void timerlat_hist_header(struct osnoise_tool *tool)
trace_seq_printf(s, "Index");
for (cpu = 0; cpu < data->nr_cpus; cpu++) {
- if (params->cpus && !params->monitored_cpus[cpu])
+ if (params->cpus && !CPU_ISSET(cpu, &params->monitored_cpus))
continue;
if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count)
@@ -233,6 +266,9 @@ static void timerlat_hist_header(struct osnoise_tool *tool)
if (!params->no_thread)
trace_seq_printf(s, " Thr-%03d", cpu);
+
+ if (params->user_hist)
+ trace_seq_printf(s, " Usr-%03d", cpu);
}
trace_seq_printf(s, "\n");
@@ -258,7 +294,7 @@ timerlat_print_summary(struct timerlat_hist_params *params,
trace_seq_printf(trace->seq, "count:");
for (cpu = 0; cpu < data->nr_cpus; cpu++) {
- if (params->cpus && !params->monitored_cpus[cpu])
+ if (params->cpus && !CPU_ISSET(cpu, &params->monitored_cpus))
continue;
if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count)
@@ -271,6 +307,10 @@ timerlat_print_summary(struct timerlat_hist_params *params,
if (!params->no_thread)
trace_seq_printf(trace->seq, "%9d ",
data->hist[cpu].thread_count);
+
+ if (params->user_hist)
+ trace_seq_printf(trace->seq, "%9d ",
+ data->hist[cpu].user_count);
}
trace_seq_printf(trace->seq, "\n");
@@ -278,7 +318,7 @@ timerlat_print_summary(struct timerlat_hist_params *params,
trace_seq_printf(trace->seq, "min: ");
for (cpu = 0; cpu < data->nr_cpus; cpu++) {
- if (params->cpus && !params->monitored_cpus[cpu])
+ if (params->cpus && !CPU_ISSET(cpu, &params->monitored_cpus))
continue;
if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count)
@@ -291,6 +331,10 @@ timerlat_print_summary(struct timerlat_hist_params *params,
if (!params->no_thread)
trace_seq_printf(trace->seq, "%9llu ",
data->hist[cpu].min_thread);
+
+ if (params->user_hist)
+ trace_seq_printf(trace->seq, "%9llu ",
+ data->hist[cpu].min_user);
}
trace_seq_printf(trace->seq, "\n");
@@ -298,7 +342,7 @@ timerlat_print_summary(struct timerlat_hist_params *params,
trace_seq_printf(trace->seq, "avg: ");
for (cpu = 0; cpu < data->nr_cpus; cpu++) {
- if (params->cpus && !params->monitored_cpus[cpu])
+ if (params->cpus && !CPU_ISSET(cpu, &params->monitored_cpus))
continue;
if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count)
@@ -315,7 +359,15 @@ timerlat_print_summary(struct timerlat_hist_params *params,
if (!params->no_thread) {
if (data->hist[cpu].thread_count)
trace_seq_printf(trace->seq, "%9llu ",
- data->hist[cpu].sum_thread / data->hist[cpu].thread_count);
+ data->hist[cpu].sum_thread / data->hist[cpu].thread_count);
+ else
+ trace_seq_printf(trace->seq, " - ");
+ }
+
+ if (params->user_hist) {
+ if (data->hist[cpu].user_count)
+ trace_seq_printf(trace->seq, "%9llu ",
+ data->hist[cpu].sum_user / data->hist[cpu].user_count);
else
trace_seq_printf(trace->seq, " - ");
}
@@ -326,7 +378,7 @@ timerlat_print_summary(struct timerlat_hist_params *params,
trace_seq_printf(trace->seq, "max: ");
for (cpu = 0; cpu < data->nr_cpus; cpu++) {
- if (params->cpus && !params->monitored_cpus[cpu])
+ if (params->cpus && !CPU_ISSET(cpu, &params->monitored_cpus))
continue;
if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count)
@@ -339,6 +391,10 @@ timerlat_print_summary(struct timerlat_hist_params *params,
if (!params->no_thread)
trace_seq_printf(trace->seq, "%9llu ",
data->hist[cpu].max_thread);
+
+ if (params->user_hist)
+ trace_seq_printf(trace->seq, "%9llu ",
+ data->hist[cpu].max_user);
}
trace_seq_printf(trace->seq, "\n");
trace_seq_do_printf(trace->seq);
@@ -366,7 +422,7 @@ timerlat_print_stats(struct timerlat_hist_params *params, struct osnoise_tool *t
bucket * data->bucket_size);
for (cpu = 0; cpu < data->nr_cpus; cpu++) {
- if (params->cpus && !params->monitored_cpus[cpu])
+ if (params->cpus && !CPU_ISSET(cpu, &params->monitored_cpus))
continue;
if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count)
@@ -384,6 +440,12 @@ timerlat_print_stats(struct timerlat_hist_params *params, struct osnoise_tool *t
data->hist[cpu].thread[bucket]);
}
+ if (params->user_hist) {
+ total += data->hist[cpu].user[bucket];
+ trace_seq_printf(trace->seq, "%9d ",
+ data->hist[cpu].user[bucket]);
+ }
+
}
if (total == 0 && !params->with_zeros) {
@@ -400,7 +462,7 @@ timerlat_print_stats(struct timerlat_hist_params *params, struct osnoise_tool *t
trace_seq_printf(trace->seq, "over: ");
for (cpu = 0; cpu < data->nr_cpus; cpu++) {
- if (params->cpus && !params->monitored_cpus[cpu])
+ if (params->cpus && !CPU_ISSET(cpu, &params->monitored_cpus))
continue;
if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count)
@@ -413,6 +475,10 @@ timerlat_print_stats(struct timerlat_hist_params *params, struct osnoise_tool *t
if (!params->no_thread)
trace_seq_printf(trace->seq, "%9d ",
data->hist[cpu].thread[data->entries]);
+
+ if (params->user_hist)
+ trace_seq_printf(trace->seq, "%9d ",
+ data->hist[cpu].user[data->entries]);
}
trace_seq_printf(trace->seq, "\n");
trace_seq_do_printf(trace->seq);
@@ -431,9 +497,9 @@ static void timerlat_hist_usage(char *usage)
char *msg[] = {
"",
" usage: [rtla] timerlat hist [-h] [-q] [-d s] [-D] [-n] [-a us] [-p us] [-i us] [-T us] [-s us] \\",
- " [-t[=file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] [-c cpu-list] \\",
+ " [-t[=file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] [-c cpu-list] [-H cpu-list]\\",
" [-P priority] [-E N] [-b N] [--no-irq] [--no-thread] [--no-header] [--no-summary] \\",
- " [--no-index] [--with-zeros] [--dma-latency us]",
+ " [--no-index] [--with-zeros] [--dma-latency us] [-C[=cgroup_name]] [--no-aa] [--dump-task] [-u]",
"",
" -h/--help: print this menu",
" -a/--auto: set automatic trace mode, stopping the session if argument in us latency is hit",
@@ -442,13 +508,17 @@ static void timerlat_hist_usage(char *usage)
" -T/--thread us: stop trace if the thread latency is higher than the argument in us",
" -s/--stack us: save the stack trace at the IRQ if a thread latency is higher than the argument in us",
" -c/--cpus cpus: run the tracer only on the given cpus",
+ " -H/--house-keeping cpus: run rtla control threads only on the given cpus",
+ " -C/--cgroup[=cgroup_name]: set cgroup, if no cgroup_name is passed, the rtla's cgroup will be inherited",
" -d/--duration time[m|h|d]: duration of the session in seconds",
+ " --dump-tasks: prints the task running on all CPUs if stop conditions are met (depends on !--no-aa)",
" -D/--debug: print debug info",
" -t/--trace[=file]: save the stopped trace to [file|timerlat_trace.txt]",
" -e/--event <sys:event>: enable the <sys:event> in the trace instance, multiple -e are allowed",
" --filter <filter>: enable a trace event filter to the previous -e event",
" --trigger <trigger>: enable a trace event trigger to the previous -e event",
" -n/--nano: display data in nanoseconds",
+ " --no-aa: disable auto-analysis, reducing rtla timerlat cpu usage",
" -b/--bucket-size N: set the histogram bucket size (default 1)",
" -E/--entries N: set the number of entries of the histogram (default 256)",
" --no-irq: ignore IRQ latencies",
@@ -464,6 +534,7 @@ static void timerlat_hist_usage(char *usage)
" f:prio - use SCHED_FIFO with prio",
" d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period",
" in nanoseconds",
+ " -u/--user-threads: use rtla user-space threads instead of in-kernel timerlat threads",
NULL,
};
@@ -506,10 +577,12 @@ static struct timerlat_hist_params
static struct option long_options[] = {
{"auto", required_argument, 0, 'a'},
{"cpus", required_argument, 0, 'c'},
+ {"cgroup", optional_argument, 0, 'C'},
{"bucket-size", required_argument, 0, 'b'},
{"debug", no_argument, 0, 'D'},
{"entries", required_argument, 0, 'E'},
{"duration", required_argument, 0, 'd'},
+ {"house-keeping", required_argument, 0, 'H'},
{"help", no_argument, 0, 'h'},
{"irq", required_argument, 0, 'i'},
{"nano", no_argument, 0, 'n'},
@@ -518,6 +591,7 @@ static struct timerlat_hist_params
{"stack", required_argument, 0, 's'},
{"thread", required_argument, 0, 'T'},
{"trace", optional_argument, 0, 't'},
+ {"user-threads", no_argument, 0, 'u'},
{"event", required_argument, 0, 'e'},
{"no-irq", no_argument, 0, '0'},
{"no-thread", no_argument, 0, '1'},
@@ -528,13 +602,15 @@ static struct timerlat_hist_params
{"trigger", required_argument, 0, '6'},
{"filter", required_argument, 0, '7'},
{"dma-latency", required_argument, 0, '8'},
+ {"no-aa", no_argument, 0, '9'},
+ {"dump-task", no_argument, 0, '\1'},
{0, 0, 0, 0}
};
/* getopt_long stores the option index here. */
int option_index = 0;
- c = getopt_long(argc, argv, "a:c:b:d:e:E:Dhi:np:P:s:t::T:0123456:7:8:",
+ c = getopt_long(argc, argv, "a:c:C::b:d:e:E:DhH:i:np:P:s:t::T:u0123456:7:8:9\1",
long_options, &option_index);
/* detect the end of the options. */
@@ -547,6 +623,7 @@ static struct timerlat_hist_params
/* set thread stop to auto_thresh */
params->stop_total_us = auto_thresh;
+ params->stop_us = auto_thresh;
/* get stack trace */
params->print_stack = auto_thresh;
@@ -556,11 +633,21 @@ static struct timerlat_hist_params
break;
case 'c':
- retval = parse_cpu_list(optarg, &params->monitored_cpus);
+ retval = parse_cpu_set(optarg, &params->monitored_cpus);
if (retval)
timerlat_hist_usage("\nInvalid -c cpu list\n");
params->cpus = optarg;
break;
+ case 'C':
+ params->cgroup = 1;
+ if (!optarg) {
+ /* will inherit this cgroup */
+ params->cgroup_name = NULL;
+ } else if (*optarg == '=') {
+ /* skip the = */
+ params->cgroup_name = ++optarg;
+ }
+ break;
case 'b':
params->bucket_size = get_llong_from_str(optarg);
if ((params->bucket_size == 0) || (params->bucket_size >= 1000000))
@@ -595,6 +682,14 @@ static struct timerlat_hist_params
case '?':
timerlat_hist_usage(NULL);
break;
+ case 'H':
+ params->hk_cpus = 1;
+ retval = parse_cpu_set(optarg, &params->hk_cpu_set);
+ if (retval) {
+ err_msg("Error parsing house keeping CPUs\n");
+ exit(EXIT_FAILURE);
+ }
+ break;
case 'i':
params->stop_us = get_llong_from_str(optarg);
break;
@@ -625,6 +720,9 @@ static struct timerlat_hist_params
else
params->trace_output = "timerlat_trace.txt";
break;
+ case 'u':
+ params->user_hist = 1;
+ break;
case '0': /* no irq */
params->no_irq = 1;
break;
@@ -672,6 +770,12 @@ static struct timerlat_hist_params
exit(EXIT_FAILURE);
}
break;
+ case '9':
+ params->no_aa = 1;
+ break;
+ case '\1':
+ params->dump_tasks = 1;
+ break;
default:
timerlat_hist_usage("Invalid option");
}
@@ -688,6 +792,12 @@ static struct timerlat_hist_params
if (params->no_index && !params->with_zeros)
timerlat_hist_usage("no-index set with with-zeros is not set - it does not make sense");
+ /*
+ * Auto analysis only happens if stop tracing, thus:
+ */
+ if (!params->stop_us && !params->stop_total_us)
+ params->no_aa = 1;
+
return params;
}
@@ -697,7 +807,7 @@ static struct timerlat_hist_params
static int
timerlat_hist_apply_config(struct osnoise_tool *tool, struct timerlat_hist_params *params)
{
- int retval;
+ int retval, i;
if (!params->sleep_time)
params->sleep_time = 1;
@@ -708,6 +818,9 @@ timerlat_hist_apply_config(struct osnoise_tool *tool, struct timerlat_hist_param
err_msg("Failed to apply CPUs config\n");
goto out_err;
}
+ } else {
+ for (i = 0; i < sysconf(_SC_NPROCESSORS_CONF); i++)
+ CPU_SET(i, &params->monitored_cpus);
}
if (params->stop_us) {
@@ -742,6 +855,32 @@ timerlat_hist_apply_config(struct osnoise_tool *tool, struct timerlat_hist_param
}
}
+ if (params->hk_cpus) {
+ retval = sched_setaffinity(getpid(), sizeof(params->hk_cpu_set),
+ &params->hk_cpu_set);
+ if (retval == -1) {
+ err_msg("Failed to set rtla to the house keeping CPUs\n");
+ goto out_err;
+ }
+ } else if (params->cpus) {
+ /*
+ * Even if the user do not set a house-keeping CPU, try to
+ * move rtla to a CPU set different to the one where the user
+ * set the workload to run.
+ *
+ * No need to check results as this is an automatic attempt.
+ */
+ auto_house_keeping(&params->monitored_cpus);
+ }
+
+ if (params->user_hist) {
+ retval = osnoise_set_workload(tool->context, 0);
+ if (retval) {
+ err_msg("Failed to set OSNOISE_WORKLOAD option\n");
+ goto out_err;
+ }
+ }
+
return 0;
out_err:
@@ -802,10 +941,13 @@ int timerlat_hist_main(int argc, char *argv[])
{
struct timerlat_hist_params *params;
struct osnoise_tool *record = NULL;
+ struct timerlat_u_params params_u;
struct osnoise_tool *tool = NULL;
+ struct osnoise_tool *aa = NULL;
struct trace_instance *trace;
int dma_latency_fd = -1;
int return_value = 1;
+ pthread_t timerlat_u;
int retval;
params = timerlat_hist_parse_args(argc, argv);
@@ -840,6 +982,14 @@ int timerlat_hist_main(int argc, char *argv[])
}
}
+ if (params->cgroup && !params->user_hist) {
+ retval = set_comm_cgroup("timerlat/", params->cgroup_name);
+ if (!retval) {
+ err_msg("Failed to move threads to cgroup\n");
+ goto out_free;
+ }
+ }
+
if (params->dma_latency >= 0) {
dma_latency_fd = set_cpu_dma_latency(params->dma_latency);
if (dma_latency_fd < 0) {
@@ -848,8 +998,6 @@ int timerlat_hist_main(int argc, char *argv[])
}
}
- trace_instance_start(trace);
-
if (params->trace_output) {
record = osnoise_init_trace_tool("timerlat");
if (!record) {
@@ -862,13 +1010,61 @@ int timerlat_hist_main(int argc, char *argv[])
if (retval)
goto out_hist;
}
+ }
- trace_instance_start(&record->trace);
+ if (!params->no_aa) {
+ aa = osnoise_init_tool("timerlat_aa");
+ if (!aa)
+ goto out_hist;
+
+ retval = timerlat_aa_init(aa, params->dump_tasks);
+ if (retval) {
+ err_msg("Failed to enable the auto analysis instance\n");
+ goto out_hist;
+ }
+
+ retval = enable_timerlat(&aa->trace);
+ if (retval) {
+ err_msg("Failed to enable timerlat tracer\n");
+ goto out_hist;
+ }
}
+ /*
+ * Start the tracers here, after having set all instances.
+ *
+ * Let the trace instance start first for the case of hitting a stop
+ * tracing while enabling other instances. The trace instance is the
+ * one with most valuable information.
+ */
+ if (params->trace_output)
+ trace_instance_start(&record->trace);
+ if (!params->no_aa)
+ trace_instance_start(&aa->trace);
+ trace_instance_start(trace);
+
tool->start_time = time(NULL);
timerlat_hist_set_signals(params);
+ if (params->user_hist) {
+ /* rtla asked to stop */
+ params_u.should_run = 1;
+ /* all threads left */
+ params_u.stopped_running = 0;
+
+ params_u.set = &params->monitored_cpus;
+ if (params->set_sched)
+ params_u.sched_param = &params->sched_param;
+ else
+ params_u.sched_param = NULL;
+
+ params_u.cgroup_name = params->cgroup_name;
+
+ retval = pthread_create(&timerlat_u, NULL, timerlat_u_dispatcher, &params_u);
+ if (retval)
+ err_msg("Error creating timerlat user-space threads\n");
+ }
+
while (!stop_tracing) {
sleep(params->sleep_time);
@@ -885,6 +1081,18 @@ int timerlat_hist_main(int argc, char *argv[])
if (trace_is_off(&tool->trace, &record->trace))
break;
+
+ /* is there still any user-threads ? */
+ if (params->user_hist) {
+ if (params_u.stopped_running) {
+ debug_msg("timerlat user-space threads stopped!\n");
+ break;
+ }
+ }
+ }
+ if (params->user_hist && !params_u.stopped_running) {
+ params_u.should_run = 0;
+ sleep(1);
}
timerlat_print_stats(params, tool);
@@ -893,6 +1101,10 @@ int timerlat_hist_main(int argc, char *argv[])
if (trace_is_off(&tool->trace, &record->trace)) {
printf("rtla timerlat hit stop tracing\n");
+
+ if (!params->no_aa)
+ timerlat_auto_analysis(params->stop_us, params->stop_total_us);
+
if (params->trace_output) {
printf(" Saving trace to %s\n", params->trace_output);
save_trace_to_file(record->trace.inst, params->trace_output);
@@ -900,12 +1112,14 @@ int timerlat_hist_main(int argc, char *argv[])
}
out_hist:
+ timerlat_aa_destroy();
if (dma_latency_fd >= 0)
close(dma_latency_fd);
trace_events_destroy(&record->trace, params->events);
params->events = NULL;
out_free:
timerlat_free_histogram(tool->data);
+ osnoise_destroy_tool(aa);
osnoise_destroy_tool(record);
osnoise_destroy_tool(tool);
free(params);
diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c
index 92c658c64f28..1640f121baca 100644
--- a/tools/tracing/rtla/src/timerlat_top.c
+++ b/tools/tracing/rtla/src/timerlat_top.c
@@ -3,6 +3,7 @@
* Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
*/
+#define _GNU_SOURCE
#include <getopt.h>
#include <stdlib.h>
#include <string.h>
@@ -11,16 +12,20 @@
#include <stdio.h>
#include <time.h>
#include <errno.h>
+#include <sched.h>
+#include <pthread.h>
#include "utils.h"
#include "osnoise.h"
#include "timerlat.h"
#include "timerlat_aa.h"
+#include "timerlat_u.h"
struct timerlat_top_params {
char *cpus;
- char *monitored_cpus;
+ cpu_set_t monitored_cpus;
char *trace_output;
+ char *cgroup_name;
unsigned long long runtime;
long long stop_us;
long long stop_total_us;
@@ -35,6 +40,10 @@ struct timerlat_top_params {
int no_aa;
int aa_only;
int dump_tasks;
+ int cgroup;
+ int hk_cpus;
+ int user_top;
+ cpu_set_t hk_cpu_set;
struct sched_attr sched_param;
struct trace_events *events;
};
@@ -42,6 +51,7 @@ struct timerlat_top_params {
struct timerlat_top_cpu {
int irq_count;
int thread_count;
+ int user_count;
unsigned long long cur_irq;
unsigned long long min_irq;
@@ -52,6 +62,11 @@ struct timerlat_top_cpu {
unsigned long long min_thread;
unsigned long long sum_thread;
unsigned long long max_thread;
+
+ unsigned long long cur_user;
+ unsigned long long min_user;
+ unsigned long long sum_user;
+ unsigned long long max_user;
};
struct timerlat_top_data {
@@ -92,6 +107,7 @@ static struct timerlat_top_data *timerlat_alloc_top(int nr_cpus)
for (cpu = 0; cpu < nr_cpus; cpu++) {
data->cpu_data[cpu].min_irq = ~0;
data->cpu_data[cpu].min_thread = ~0;
+ data->cpu_data[cpu].min_user = ~0;
}
return data;
@@ -118,12 +134,18 @@ timerlat_top_update(struct osnoise_tool *tool, int cpu,
update_min(&cpu_data->min_irq, &latency);
update_sum(&cpu_data->sum_irq, &latency);
update_max(&cpu_data->max_irq, &latency);
- } else {
+ } else if (thread == 1) {
cpu_data->thread_count++;
cpu_data->cur_thread = latency;
update_min(&cpu_data->min_thread, &latency);
update_sum(&cpu_data->sum_thread, &latency);
update_max(&cpu_data->max_thread, &latency);
+ } else {
+ cpu_data->user_count++;
+ cpu_data->cur_user = latency;
+ update_min(&cpu_data->min_user, &latency);
+ update_sum(&cpu_data->sum_user, &latency);
+ update_max(&cpu_data->max_user, &latency);
}
}
@@ -150,9 +172,6 @@ timerlat_top_handler(struct trace_seq *s, struct tep_record *record,
timerlat_top_update(top, cpu, thread, latency);
}
- if (!params->no_aa)
- timerlat_aa_handler(s, record, event, context);
-
return 0;
}
@@ -169,15 +188,25 @@ static void timerlat_top_header(struct osnoise_tool *top)
trace_seq_printf(s, "\033[2;37;40m");
trace_seq_printf(s, " Timer Latency ");
+ if (params->user_top)
+ trace_seq_printf(s, " ");
trace_seq_printf(s, "\033[0;0;0m");
trace_seq_printf(s, "\n");
- trace_seq_printf(s, "%-6s | IRQ Timer Latency (%s) | Thread Timer Latency (%s)\n", duration,
+ trace_seq_printf(s, "%-6s | IRQ Timer Latency (%s) | Thread Timer Latency (%s)", duration,
params->output_divisor == 1 ? "ns" : "us",
params->output_divisor == 1 ? "ns" : "us");
+ if (params->user_top) {
+ trace_seq_printf(s, " | Ret user Timer Latency (%s)",
+ params->output_divisor == 1 ? "ns" : "us");
+ }
+
+ trace_seq_printf(s, "\n");
trace_seq_printf(s, "\033[2;30;47m");
trace_seq_printf(s, "CPU COUNT | cur min avg max | cur min avg max");
+ if (params->user_top)
+ trace_seq_printf(s, " | cur min avg max");
trace_seq_printf(s, "\033[0;0;0m");
trace_seq_printf(s, "\n");
}
@@ -230,7 +259,27 @@ static void timerlat_top_print(struct osnoise_tool *top, int cpu)
trace_seq_printf(s, "%9llu ", cpu_data->min_thread / divisor);
trace_seq_printf(s, "%9llu ",
(cpu_data->sum_thread / cpu_data->thread_count) / divisor);
- trace_seq_printf(s, "%9llu\n", cpu_data->max_thread / divisor);
+ trace_seq_printf(s, "%9llu", cpu_data->max_thread / divisor);
+ }
+
+ if (!params->user_top) {
+ trace_seq_printf(s, "\n");
+ return;
+ }
+
+ trace_seq_printf(s, " |");
+
+ if (!cpu_data->user_count) {
+ trace_seq_printf(s, " - ");
+ trace_seq_printf(s, " - ");
+ trace_seq_printf(s, " - ");
+ trace_seq_printf(s, " -\n");
+ } else {
+ trace_seq_printf(s, "%9llu ", cpu_data->cur_user / divisor);
+ trace_seq_printf(s, "%9llu ", cpu_data->min_user / divisor);
+ trace_seq_printf(s, "%9llu ",
+ (cpu_data->sum_user / cpu_data->user_count) / divisor);
+ trace_seq_printf(s, "%9llu\n", cpu_data->max_user / divisor);
}
}
@@ -265,7 +314,7 @@ timerlat_print_stats(struct timerlat_top_params *params, struct osnoise_tool *to
timerlat_top_header(top);
for (i = 0; i < nr_cpus; i++) {
- if (params->cpus && !params->monitored_cpus[i])
+ if (params->cpus && !CPU_ISSET(i, &params->monitored_cpus))
continue;
timerlat_top_print(top, i);
}
@@ -284,8 +333,8 @@ static void timerlat_top_usage(char *usage)
static const char *const msg[] = {
"",
" usage: rtla timerlat [top] [-h] [-q] [-a us] [-d s] [-D] [-n] [-p us] [-i us] [-T us] [-s us] \\",
- " [[-t[=file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] [-c cpu-list] \\",
- " [-P priority] [--dma-latency us] [--aa-only us]",
+ " [[-t[=file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] [-c cpu-list] [-H cpu-list]\\",
+ " [-P priority] [--dma-latency us] [--aa-only us] [-C[=cgroup_name]] [-u]",
"",
" -h/--help: print this menu",
" -a/--auto: set automatic trace mode, stopping the session if argument in us latency is hit",
@@ -295,6 +344,8 @@ static void timerlat_top_usage(char *usage)
" -T/--thread us: stop trace if the thread latency is higher than the argument in us",
" -s/--stack us: save the stack trace at the IRQ if a thread latency is higher than the argument in us",
" -c/--cpus cpus: run the tracer only on the given cpus",
+ " -H/--house-keeping cpus: run rtla control threads only on the given cpus",
+ " -C/--cgroup[=cgroup_name]: set cgroup, if no cgroup_name is passed, the rtla's cgroup will be inherited",
" -d/--duration time[m|h|d]: duration of the session in seconds",
" -D/--debug: print debug info",
" --dump-tasks: prints the task running on all CPUs if stop conditions are met (depends on !--no-aa)",
@@ -312,6 +363,7 @@ static void timerlat_top_usage(char *usage)
" f:prio - use SCHED_FIFO with prio",
" d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period",
" in nanoseconds",
+ " -u/--user-threads: use rtla user-space threads instead of in-kernel timerlat threads",
NULL,
};
@@ -352,10 +404,12 @@ static struct timerlat_top_params
static struct option long_options[] = {
{"auto", required_argument, 0, 'a'},
{"cpus", required_argument, 0, 'c'},
+ {"cgroup", optional_argument, 0, 'C'},
{"debug", no_argument, 0, 'D'},
{"duration", required_argument, 0, 'd'},
{"event", required_argument, 0, 'e'},
{"help", no_argument, 0, 'h'},
+ {"house-keeping", required_argument, 0, 'H'},
{"irq", required_argument, 0, 'i'},
{"nano", no_argument, 0, 'n'},
{"period", required_argument, 0, 'p'},
@@ -364,6 +418,7 @@ static struct timerlat_top_params
{"stack", required_argument, 0, 's'},
{"thread", required_argument, 0, 'T'},
{"trace", optional_argument, 0, 't'},
+ {"user-threads", no_argument, 0, 'u'},
{"trigger", required_argument, 0, '0'},
{"filter", required_argument, 0, '1'},
{"dma-latency", required_argument, 0, '2'},
@@ -376,7 +431,7 @@ static struct timerlat_top_params
/* getopt_long stores the option index here. */
int option_index = 0;
- c = getopt_long(argc, argv, "a:c:d:De:hi:np:P:qs:t::T:0:1:2:345:",
+ c = getopt_long(argc, argv, "a:c:C::d:De:hH:i:np:P:qs:t::T:u0:1:2:345:",
long_options, &option_index);
/* detect the end of the options. */
@@ -412,11 +467,21 @@ static struct timerlat_top_params
params->aa_only = 1;
break;
case 'c':
- retval = parse_cpu_list(optarg, &params->monitored_cpus);
+ retval = parse_cpu_set(optarg, &params->monitored_cpus);
if (retval)
timerlat_top_usage("\nInvalid -c cpu list\n");
params->cpus = optarg;
break;
+ case 'C':
+ params->cgroup = 1;
+ if (!optarg) {
+ /* will inherit this cgroup */
+ params->cgroup_name = NULL;
+ } else if (*optarg == '=') {
+ /* skip the = */
+ params->cgroup_name = ++optarg;
+ }
+ break;
case 'D':
config_debug = 1;
break;
@@ -440,6 +505,14 @@ static struct timerlat_top_params
case '?':
timerlat_top_usage(NULL);
break;
+ case 'H':
+ params->hk_cpus = 1;
+ retval = parse_cpu_set(optarg, &params->hk_cpu_set);
+ if (retval) {
+ err_msg("Error parsing house keeping CPUs\n");
+ exit(EXIT_FAILURE);
+ }
+ break;
case 'i':
params->stop_us = get_llong_from_str(optarg);
break;
@@ -474,6 +547,9 @@ static struct timerlat_top_params
params->trace_output = "timerlat_trace.txt";
break;
+ case 'u':
+ params->user_top = true;
+ break;
case '0': /* trigger */
if (params->events) {
retval = trace_event_add_trigger(params->events, optarg);
@@ -538,6 +614,7 @@ static int
timerlat_top_apply_config(struct osnoise_tool *top, struct timerlat_top_params *params)
{
int retval;
+ int i;
if (!params->sleep_time)
params->sleep_time = 1;
@@ -548,6 +625,9 @@ timerlat_top_apply_config(struct osnoise_tool *top, struct timerlat_top_params *
err_msg("Failed to apply CPUs config\n");
goto out_err;
}
+ } else {
+ for (i = 0; i < sysconf(_SC_NPROCESSORS_CONF); i++)
+ CPU_SET(i, &params->monitored_cpus);
}
if (params->stop_us) {
@@ -584,6 +664,32 @@ timerlat_top_apply_config(struct osnoise_tool *top, struct timerlat_top_params *
}
}
+ if (params->hk_cpus) {
+ retval = sched_setaffinity(getpid(), sizeof(params->hk_cpu_set),
+ &params->hk_cpu_set);
+ if (retval == -1) {
+ err_msg("Failed to set rtla to the house keeping CPUs\n");
+ goto out_err;
+ }
+ } else if (params->cpus) {
+ /*
+ * Even if the user do not set a house-keeping CPU, try to
+ * move rtla to a CPU set different to the one where the user
+ * set the workload to run.
+ *
+ * No need to check results as this is an automatic attempt.
+ */
+ auto_house_keeping(&params->monitored_cpus);
+ }
+
+ if (params->user_top) {
+ retval = osnoise_set_workload(top->context, 0);
+ if (retval) {
+ err_msg("Failed to set OSNOISE_WORKLOAD option\n");
+ goto out_err;
+ }
+ }
+
return 0;
out_err:
@@ -598,7 +704,6 @@ static struct osnoise_tool
{
struct osnoise_tool *top;
int nr_cpus;
- int retval;
nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
@@ -615,16 +720,6 @@ static struct osnoise_tool
tep_register_event_handler(top->trace.tep, -1, "ftrace", "timerlat",
timerlat_top_handler, top);
- /*
- * If no auto analysis, we are ready.
- */
- if (params->no_aa)
- return top;
-
- retval = timerlat_aa_init(top, nr_cpus, params->dump_tasks);
- if (retval)
- goto out_err;
-
return top;
out_err:
@@ -655,9 +750,12 @@ int timerlat_top_main(int argc, char *argv[])
{
struct timerlat_top_params *params;
struct osnoise_tool *record = NULL;
+ struct timerlat_u_params params_u;
struct osnoise_tool *top = NULL;
+ struct osnoise_tool *aa = NULL;
struct trace_instance *trace;
int dma_latency_fd = -1;
+ pthread_t timerlat_u;
int return_value = 1;
char *max_lat;
int retval;
@@ -694,6 +792,14 @@ int timerlat_top_main(int argc, char *argv[])
}
}
+ if (params->cgroup && !params->user_top) {
+ retval = set_comm_cgroup("timerlat/", params->cgroup_name);
+ if (!retval) {
+ err_msg("Failed to move threads to cgroup\n");
+ goto out_free;
+ }
+ }
+
if (params->dma_latency >= 0) {
dma_latency_fd = set_cpu_dma_latency(params->dma_latency);
if (dma_latency_fd < 0) {
@@ -702,8 +808,6 @@ int timerlat_top_main(int argc, char *argv[])
}
}
- trace_instance_start(trace);
-
if (params->trace_output) {
record = osnoise_init_trace_tool("timerlat");
if (!record) {
@@ -716,13 +820,70 @@ int timerlat_top_main(int argc, char *argv[])
if (retval)
goto out_top;
}
+ }
- trace_instance_start(&record->trace);
+ if (!params->no_aa) {
+ if (params->aa_only) {
+ /* as top is not used for display, use it for aa */
+ aa = top;
+ } else {
+ /* otherwise, a new instance is needed */
+ aa = osnoise_init_tool("timerlat_aa");
+ if (!aa)
+ goto out_top;
+ }
+
+ retval = timerlat_aa_init(aa, params->dump_tasks);
+ if (retval) {
+ err_msg("Failed to enable the auto analysis instance\n");
+ goto out_top;
+ }
+
+ /* if it is re-using the main instance, there is no need to start it */
+ if (aa != top) {
+ retval = enable_timerlat(&aa->trace);
+ if (retval) {
+ err_msg("Failed to enable timerlat tracer\n");
+ goto out_top;
+ }
+ }
}
+ /*
+ * Start the tracers here, after having set all instances.
+ *
+ * Let the trace instance start first for the case of hitting a stop
+ * tracing while enabling other instances. The trace instance is the
+ * one with most valuable information.
+ */
+ if (params->trace_output)
+ trace_instance_start(&record->trace);
+ if (!params->no_aa && aa != top)
+ trace_instance_start(&aa->trace);
+ trace_instance_start(trace);
+
top->start_time = time(NULL);
timerlat_top_set_signals(params);
+ if (params->user_top) {
+ /* rtla asked to stop */
+ params_u.should_run = 1;
+ /* all threads left */
+ params_u.stopped_running = 0;
+
+ params_u.set = &params->monitored_cpus;
+ if (params->set_sched)
+ params_u.sched_param = &params->sched_param;
+ else
+ params_u.sched_param = NULL;
+
+ params_u.cgroup_name = params->cgroup_name;
+
+ retval = pthread_create(&timerlat_u, NULL, timerlat_u_dispatcher, &params_u);
+ if (retval)
+ err_msg("Error creating timerlat user-space threads\n");
+ }
+
while (!stop_tracing) {
sleep(params->sleep_time);
@@ -746,6 +907,18 @@ int timerlat_top_main(int argc, char *argv[])
if (trace_is_off(&top->trace, &record->trace))
break;
+ /* is there still any user-threads ? */
+ if (params->user_top) {
+ if (params_u.stopped_running) {
+ debug_msg("timerlat user space threads stopped!\n");
+ break;
+ }
+ }
+ }
+
+ if (params->user_top && !params_u.stopped_running) {
+ params_u.should_run = 0;
+ sleep(1);
}
timerlat_print_stats(params, top);
@@ -775,13 +948,15 @@ int timerlat_top_main(int argc, char *argv[])
}
out_top:
+ timerlat_aa_destroy();
if (dma_latency_fd >= 0)
close(dma_latency_fd);
trace_events_destroy(&record->trace, params->events);
params->events = NULL;
out_free:
timerlat_free_top(top->data);
- timerlat_aa_destroy();
+ if (aa && aa != top)
+ osnoise_destroy_tool(aa);
osnoise_destroy_tool(record);
osnoise_destroy_tool(top);
free(params);
diff --git a/tools/tracing/rtla/src/timerlat_u.c b/tools/tracing/rtla/src/timerlat_u.c
new file mode 100644
index 000000000000..05e310696dd5
--- /dev/null
+++ b/tools/tracing/rtla/src/timerlat_u.c
@@ -0,0 +1,224 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
+ */
+
+#define _GNU_SOURCE
+#include <sched.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <tracefs.h>
+#include <pthread.h>
+#include <sys/wait.h>
+#include <sys/prctl.h>
+
+#include "utils.h"
+#include "timerlat_u.h"
+
+/*
+ * This is the user-space main for the tool timerlatu/ threads.
+ *
+ * It is as simple as this:
+ * - set affinity
+ * - set priority
+ * - open tracer fd
+ * - spin
+ * - close
+ */
+static int timerlat_u_main(int cpu, struct timerlat_u_params *params)
+{
+ struct sched_param sp = { .sched_priority = 95 };
+ char buffer[1024];
+ int timerlat_fd;
+ cpu_set_t set;
+ int retval;
+
+ /*
+ * This all is only setting up the tool.
+ */
+ CPU_ZERO(&set);
+ CPU_SET(cpu, &set);
+
+ retval = sched_setaffinity(gettid(), sizeof(set), &set);
+ if (retval == -1) {
+ err_msg("Error setting user thread affinity\n");
+ exit(1);
+ }
+
+ if (!params->sched_param) {
+ retval = sched_setscheduler(0, SCHED_FIFO, &sp);
+ if (retval < 0) {
+ err_msg("Error setting timerlat u default priority: %s\n", strerror(errno));
+ exit(1);
+ }
+ } else {
+ retval = __set_sched_attr(getpid(), params->sched_param);
+ if (retval) {
+ /* __set_sched_attr prints an error message, so */
+ exit(0);
+ }
+ }
+
+ if (params->cgroup_name) {
+ retval = set_pid_cgroup(gettid(), params->cgroup_name);
+ if (!retval) {
+ err_msg("Error setting timerlat u cgroup pid\n");
+ pthread_exit(&retval);
+ }
+ }
+
+ /*
+ * This is the tool's loop. If you want to use as base for your own tool...
+ * go ahead.
+ */
+ snprintf(buffer, sizeof(buffer), "osnoise/per_cpu/cpu%d/timerlat_fd", cpu);
+
+ timerlat_fd = tracefs_instance_file_open(NULL, buffer, O_RDONLY);
+ if (timerlat_fd < 0) {
+ err_msg("Error opening %s:%s\n", buffer, strerror(errno));
+ exit(1);
+ }
+
+ debug_msg("User-space timerlat pid %d on cpu %d\n", gettid(), cpu);
+
+ /* add should continue with a signal handler */
+ while (true) {
+ retval = read(timerlat_fd, buffer, 1024);
+ if (retval < 0)
+ break;
+ }
+
+ close(timerlat_fd);
+
+ debug_msg("Leaving timerlat pid %d on cpu %d\n", gettid(), cpu);
+ exit(0);
+}
+
+/*
+ * timerlat_u_send_kill - send a kill signal for all processes
+ *
+ * Return the number of processes that received the kill.
+ */
+static int timerlat_u_send_kill(pid_t *procs, int nr_cpus)
+{
+ int killed = 0;
+ int i, retval;
+
+ for (i = 0; i < nr_cpus; i++) {
+ if (!procs[i])
+ continue;
+ retval = kill(procs[i], SIGKILL);
+ if (!retval)
+ killed++;
+ else
+ err_msg("Error killing child process %d\n", procs[i]);
+ }
+
+ return killed;
+}
+
+/**
+ * timerlat_u_dispatcher - dispatch one timerlatu/ process per monitored CPU
+ *
+ * This is a thread main that will fork one new process for each monitored
+ * CPU. It will wait for:
+ *
+ * - rtla to tell to kill the child processes
+ * - some child process to die, and the cleanup all the processes
+ *
+ * whichever comes first.
+ *
+ */
+void *timerlat_u_dispatcher(void *data)
+{
+ int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+ struct timerlat_u_params *params = data;
+ char proc_name[128];
+ int procs_count = 0;
+ int retval = 1;
+ pid_t *procs;
+ int wstatus;
+ pid_t pid;
+ int i;
+
+ debug_msg("Dispatching timerlat u procs\n");
+
+ procs = calloc(nr_cpus, sizeof(pid_t));
+ if (!procs)
+ pthread_exit(&retval);
+
+ for (i = 0; i < nr_cpus; i++) {
+ if (params->set && !CPU_ISSET(i, params->set))
+ continue;
+
+ pid = fork();
+
+ /* child */
+ if (!pid) {
+
+ /*
+ * rename the process
+ */
+ snprintf(proc_name, sizeof(proc_name), "timerlatu/%d", i);
+ pthread_setname_np(pthread_self(), proc_name);
+ prctl(PR_SET_NAME, (unsigned long)proc_name, 0, 0, 0);
+
+ timerlat_u_main(i, params);
+ /* timerlat_u_main should exit()! Anyways... */
+ pthread_exit(&retval);
+ }
+
+ /* parent */
+ if (pid == -1) {
+ timerlat_u_send_kill(procs, nr_cpus);
+ debug_msg("Failed to create child processes");
+ pthread_exit(&retval);
+ }
+
+ procs_count++;
+ procs[i] = pid;
+ }
+
+ while (params->should_run) {
+ /* check if processes died */
+ pid = waitpid(-1, &wstatus, WNOHANG);
+ if (pid != 0) {
+ for (i = 0; i < nr_cpus; i++) {
+ if (procs[i] == pid) {
+ procs[i] = 0;
+ procs_count--;
+ }
+ }
+ break;
+ }
+
+ sleep(1);
+ }
+
+ timerlat_u_send_kill(procs, nr_cpus);
+
+ while (procs_count) {
+ pid = waitpid(-1, &wstatus, 0);
+ if (pid == -1) {
+ err_msg("Failed to monitor child processes");
+ pthread_exit(&retval);
+ }
+ for (i = 0; i < nr_cpus; i++) {
+ if (procs[i] == pid) {
+ procs[i] = 0;
+ procs_count--;
+ }
+ }
+ }
+
+ params->stopped_running = 1;
+
+ free(procs);
+ retval = 0;
+ pthread_exit(&retval);
+
+}
diff --git a/tools/tracing/rtla/src/timerlat_u.h b/tools/tracing/rtla/src/timerlat_u.h
new file mode 100644
index 000000000000..661511908957
--- /dev/null
+++ b/tools/tracing/rtla/src/timerlat_u.h
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
+ */
+
+struct timerlat_u_params {
+ /* timerlat -> timerlat_u: user-space threads can keep running */
+ int should_run;
+ /* timerlat_u -> timerlat: all timerlat_u threads left, no reason to continue */
+ int stopped_running;
+
+ /* threads config */
+ cpu_set_t *set;
+ char *cgroup_name;
+ struct sched_attr *sched_param;
+};
+
+void *timerlat_u_dispatcher(void *data);
diff --git a/tools/tracing/rtla/src/utils.c b/tools/tracing/rtla/src/utils.c
index 663a047f794d..623a38908ed5 100644
--- a/tools/tracing/rtla/src/utils.c
+++ b/tools/tracing/rtla/src/utils.c
@@ -3,6 +3,7 @@
* Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
*/
+#define _GNU_SOURCE
#include <dirent.h>
#include <stdarg.h>
#include <stdlib.h>
@@ -88,27 +89,24 @@ void get_duration(time_t start_time, char *output, int output_size)
}
/*
- * parse_cpu_list - parse a cpu_list filling a char vector with cpus set
+ * parse_cpu_set - parse a cpu_list filling cpu_set_t argument
*
- * Receives a cpu list, like 1-3,5 (cpus 1, 2, 3, 5), and then set the char
- * in the monitored_cpus.
+ * Receives a cpu list, like 1-3,5 (cpus 1, 2, 3, 5), and then set
+ * filling cpu_set_t argument.
*
- * XXX: convert to a bitmask.
+ * Returns 1 on success, 0 otherwise.
*/
-int parse_cpu_list(char *cpu_list, char **monitored_cpus)
+int parse_cpu_set(char *cpu_list, cpu_set_t *set)
{
- char *mon_cpus;
const char *p;
int end_cpu;
int nr_cpus;
int cpu;
int i;
- nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+ CPU_ZERO(set);
- mon_cpus = calloc(nr_cpus, sizeof(char));
- if (!mon_cpus)
- goto err;
+ nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
for (p = cpu_list; *p; ) {
cpu = atoi(p);
@@ -128,12 +126,12 @@ int parse_cpu_list(char *cpu_list, char **monitored_cpus)
end_cpu = cpu;
if (cpu == end_cpu) {
- debug_msg("cpu_list: adding cpu %d\n", cpu);
- mon_cpus[cpu] = 1;
+ debug_msg("cpu_set: adding cpu %d\n", cpu);
+ CPU_SET(cpu, set);
} else {
for (i = cpu; i <= end_cpu; i++) {
- debug_msg("cpu_list: adding cpu %d\n", i);
- mon_cpus[i] = 1;
+ debug_msg("cpu_set: adding cpu %d\n", i);
+ CPU_SET(i, set);
}
}
@@ -141,12 +139,9 @@ int parse_cpu_list(char *cpu_list, char **monitored_cpus)
p++;
}
- *monitored_cpus = mon_cpus;
-
return 0;
-
err:
- debug_msg("Error parsing the cpu list %s", cpu_list);
+ debug_msg("Error parsing the cpu set %s\n", cpu_list);
return 1;
}
@@ -529,3 +524,296 @@ int set_cpu_dma_latency(int32_t latency)
return fd;
}
+
+#define _STR(x) #x
+#define STR(x) _STR(x)
+
+/*
+ * find_mount - find a the mount point of a given fs
+ *
+ * Returns 0 if mount is not found, otherwise return 1 and fill mp
+ * with the mount point.
+ */
+static const int find_mount(const char *fs, char *mp, int sizeof_mp)
+{
+ char mount_point[MAX_PATH];
+ char type[100];
+ int found;
+ FILE *fp;
+
+ fp = fopen("/proc/mounts", "r");
+ if (!fp)
+ return 0;
+
+ while (fscanf(fp, "%*s %" STR(MAX_PATH) "s %99s %*s %*d %*d\n", mount_point, type) == 2) {
+ if (strcmp(type, fs) == 0) {
+ found = 1;
+ break;
+ }
+ }
+ fclose(fp);
+
+ if (!found)
+ return 0;
+
+ memset(mp, 0, sizeof_mp);
+ strncpy(mp, mount_point, sizeof_mp - 1);
+
+ debug_msg("Fs %s found at %s\n", fs, mp);
+ return 1;
+}
+
+/*
+ * get_self_cgroup - get the current thread cgroup path
+ *
+ * Parse /proc/$$/cgroup file to get the thread's cgroup. As an example of line to parse:
+ *
+ * 0::/user.slice/user-0.slice/session-3.scope'\n'
+ *
+ * This function is interested in the content after the second : and before the '\n'.
+ *
+ * Returns 1 if a string was found, 0 otherwise.
+ */
+static int get_self_cgroup(char *self_cg, int sizeof_self_cg)
+{
+ char path[MAX_PATH], *start;
+ int fd, retval;
+
+ snprintf(path, MAX_PATH, "/proc/%d/cgroup", getpid());
+
+ fd = open(path, O_RDONLY);
+ if (fd < 0)
+ return 0;
+
+ retval = read(fd, path, MAX_PATH);
+
+ close(fd);
+
+ if (retval <= 0)
+ return 0;
+
+ start = path;
+
+ start = strstr(start, ":");
+ if (!start)
+ return 0;
+
+ /* skip ":" */
+ start++;
+
+ start = strstr(start, ":");
+ if (!start)
+ return 0;
+
+ /* skip ":" */
+ start++;
+
+ if (strlen(start) >= sizeof_self_cg)
+ return 0;
+
+ snprintf(self_cg, sizeof_self_cg, "%s", start);
+
+ /* Swap '\n' with '\0' */
+ start = strstr(self_cg, "\n");
+
+ /* there must be '\n' */
+ if (!start)
+ return 0;
+
+ /* ok, it found a string after the second : and before the \n */
+ *start = '\0';
+
+ return 1;
+}
+
+/*
+ * set_comm_cgroup - Set cgroup to pid_t pid
+ *
+ * If cgroup argument is not NULL, the threads will move to the given cgroup.
+ * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used.
+ *
+ * Supports cgroup v2.
+ *
+ * Returns 1 on success, 0 otherwise.
+ */
+int set_pid_cgroup(pid_t pid, const char *cgroup)
+{
+ char cgroup_path[MAX_PATH - strlen("/cgroup.procs")];
+ char cgroup_procs[MAX_PATH];
+ char pid_str[24];
+ int retval;
+ int cg_fd;
+
+ retval = find_mount("cgroup2", cgroup_path, sizeof(cgroup_path));
+ if (!retval) {
+ err_msg("Did not find cgroupv2 mount point\n");
+ return 0;
+ }
+
+ if (!cgroup) {
+ retval = get_self_cgroup(&cgroup_path[strlen(cgroup_path)],
+ sizeof(cgroup_path) - strlen(cgroup_path));
+ if (!retval) {
+ err_msg("Did not find self cgroup\n");
+ return 0;
+ }
+ } else {
+ snprintf(&cgroup_path[strlen(cgroup_path)],
+ sizeof(cgroup_path) - strlen(cgroup_path), "%s/", cgroup);
+ }
+
+ snprintf(cgroup_procs, MAX_PATH, "%s/cgroup.procs", cgroup_path);
+
+ debug_msg("Using cgroup path at: %s\n", cgroup_procs);
+
+ cg_fd = open(cgroup_procs, O_RDWR);
+ if (cg_fd < 0)
+ return 0;
+
+ snprintf(pid_str, sizeof(pid_str), "%d\n", pid);
+
+ retval = write(cg_fd, pid_str, strlen(pid_str));
+ if (retval < 0)
+ err_msg("Error setting cgroup attributes for pid:%s - %s\n",
+ pid_str, strerror(errno));
+ else
+ debug_msg("Set cgroup attributes for pid:%s\n", pid_str);
+
+ close(cg_fd);
+
+ return (retval >= 0);
+}
+
+/**
+ * set_comm_cgroup - Set cgroup to threads starting with char *comm_prefix
+ *
+ * If cgroup argument is not NULL, the threads will move to the given cgroup.
+ * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used.
+ *
+ * Supports cgroup v2.
+ *
+ * Returns 1 on success, 0 otherwise.
+ */
+int set_comm_cgroup(const char *comm_prefix, const char *cgroup)
+{
+ char cgroup_path[MAX_PATH - strlen("/cgroup.procs")];
+ char cgroup_procs[MAX_PATH];
+ struct dirent *proc_entry;
+ DIR *procfs;
+ int retval;
+ int cg_fd;
+
+ if (strlen(comm_prefix) >= MAX_PATH) {
+ err_msg("Command prefix is too long: %d < strlen(%s)\n",
+ MAX_PATH, comm_prefix);
+ return 0;
+ }
+
+ retval = find_mount("cgroup2", cgroup_path, sizeof(cgroup_path));
+ if (!retval) {
+ err_msg("Did not find cgroupv2 mount point\n");
+ return 0;
+ }
+
+ if (!cgroup) {
+ retval = get_self_cgroup(&cgroup_path[strlen(cgroup_path)],
+ sizeof(cgroup_path) - strlen(cgroup_path));
+ if (!retval) {
+ err_msg("Did not find self cgroup\n");
+ return 0;
+ }
+ } else {
+ snprintf(&cgroup_path[strlen(cgroup_path)],
+ sizeof(cgroup_path) - strlen(cgroup_path), "%s/", cgroup);
+ }
+
+ snprintf(cgroup_procs, MAX_PATH, "%s/cgroup.procs", cgroup_path);
+
+ debug_msg("Using cgroup path at: %s\n", cgroup_procs);
+
+ cg_fd = open(cgroup_procs, O_RDWR);
+ if (cg_fd < 0)
+ return 0;
+
+ procfs = opendir("/proc");
+ if (!procfs) {
+ err_msg("Could not open procfs\n");
+ goto out_cg;
+ }
+
+ while ((proc_entry = readdir(procfs))) {
+
+ retval = procfs_is_workload_pid(comm_prefix, proc_entry);
+ if (!retval)
+ continue;
+
+ retval = write(cg_fd, proc_entry->d_name, strlen(proc_entry->d_name));
+ if (retval < 0) {
+ err_msg("Error setting cgroup attributes for pid:%s - %s\n",
+ proc_entry->d_name, strerror(errno));
+ goto out_procfs;
+ }
+
+ debug_msg("Set cgroup attributes for pid:%s\n", proc_entry->d_name);
+ }
+
+ closedir(procfs);
+ close(cg_fd);
+ return 1;
+
+out_procfs:
+ closedir(procfs);
+out_cg:
+ close(cg_fd);
+ return 0;
+}
+
+/**
+ * auto_house_keeping - Automatically move rtla out of measurement threads
+ *
+ * Try to move rtla away from the tracer, if possible.
+ *
+ * Returns 1 on success, 0 otherwise.
+ */
+int auto_house_keeping(cpu_set_t *monitored_cpus)
+{
+ cpu_set_t rtla_cpus, house_keeping_cpus;
+ int retval;
+
+ /* first get the CPUs in which rtla can actually run. */
+ retval = sched_getaffinity(getpid(), sizeof(rtla_cpus), &rtla_cpus);
+ if (retval == -1) {
+ debug_msg("Could not get rtla affinity, rtla might run with the threads!\n");
+ return 0;
+ }
+
+ /* then check if the existing setup is already good. */
+ CPU_AND(&house_keeping_cpus, &rtla_cpus, monitored_cpus);
+ if (!CPU_COUNT(&house_keeping_cpus)) {
+ debug_msg("rtla and the monitored CPUs do not share CPUs.");
+ debug_msg("Skipping auto house-keeping\n");
+ return 1;
+ }
+
+ /* remove the intersection */
+ CPU_XOR(&house_keeping_cpus, &rtla_cpus, monitored_cpus);
+
+ /* get only those that rtla can run */
+ CPU_AND(&house_keeping_cpus, &house_keeping_cpus, &rtla_cpus);
+
+ /* is there any cpu left? */
+ if (!CPU_COUNT(&house_keeping_cpus)) {
+ debug_msg("Could not find any CPU for auto house-keeping\n");
+ return 0;
+ }
+
+ retval = sched_setaffinity(getpid(), sizeof(house_keeping_cpus), &house_keeping_cpus);
+ if (retval == -1) {
+ debug_msg("Could not set affinity for auto house-keeping\n");
+ return 0;
+ }
+
+ debug_msg("rtla automatically moved to an auto house-keeping cpu set\n");
+
+ return 1;
+}
diff --git a/tools/tracing/rtla/src/utils.h b/tools/tracing/rtla/src/utils.h
index 90e4f52a030b..04ed1e650495 100644
--- a/tools/tracing/rtla/src/utils.h
+++ b/tools/tracing/rtla/src/utils.h
@@ -1,6 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
+
#include <stdint.h>
#include <time.h>
+#include <sched.h>
/*
* '18446744073709551615\0'
@@ -54,8 +56,13 @@ struct sched_attr {
};
int parse_prio(char *arg, struct sched_attr *sched_param);
+int parse_cpu_set(char *cpu_list, cpu_set_t *set);
+int __set_sched_attr(int pid, struct sched_attr *attr);
int set_comm_sched_attr(const char *comm_prefix, struct sched_attr *attr);
+int set_comm_cgroup(const char *comm_prefix, const char *cgroup);
+int set_pid_cgroup(pid_t pid, const char *cgroup);
int set_cpu_dma_latency(int32_t latency);
+int auto_house_keeping(cpu_set_t *monitored_cpus);
#define ns_to_usf(x) (((double)x/1000))
#define ns_to_per(total, part) ((part * 100) / (double)total)
diff --git a/tools/usb/usbip/configure.ac b/tools/usb/usbip/configure.ac
index 607d05c5ccfd..8debf934f8b7 100644
--- a/tools/usb/usbip/configure.ac
+++ b/tools/usb/usbip/configure.ac
@@ -94,11 +94,11 @@ AC_SUBST([USBIDS_DIR])
AC_MSG_CHECKING([whether to use fortify])
AC_ARG_WITH([fortify],
[AS_HELP_STRING([--with-fortify],
- [use _FORTIFY_SROUCE option when compiling)])],
+ [use _FORTIFY_SOURCE=2 option when compiling)])],
dnl [ACTION-IF-GIVEN]
[if test "$withval" = "yes"; then
AC_MSG_RESULT([yes])
- CFLAGS="$CFLAGS -D_FORTIFY_SOURCE -O"
+ CFLAGS="$CFLAGS -D_FORTIFY_SOURCE=2 -O"
else
AC_MSG_RESULT([no])
CFLAGS="$CFLAGS -U_FORTIFY_SOURCE"
diff --git a/tools/usb/usbip/src/usbip_attach.c b/tools/usb/usbip/src/usbip_attach.c
index b4aeb9f1f493..531a415538f9 100644
--- a/tools/usb/usbip/src/usbip_attach.c
+++ b/tools/usb/usbip/src/usbip_attach.c
@@ -86,7 +86,7 @@ static int import_device(int sockfd, struct usbip_usb_device *udev)
rc = usbip_vhci_driver_open();
if (rc < 0) {
- err("open vhci_driver");
+ err("open vhci_driver (is vhci_hcd loaded?)");
goto err_out;
}
diff --git a/tools/usb/usbip/src/usbip_detach.c b/tools/usb/usbip/src/usbip_detach.c
index aec993159036..b29101986b5a 100644
--- a/tools/usb/usbip/src/usbip_detach.c
+++ b/tools/usb/usbip/src/usbip_detach.c
@@ -50,7 +50,7 @@ static int detach_port(char *port)
ret = usbip_vhci_driver_open();
if (ret < 0) {
- err("open vhci_driver");
+ err("open vhci_driver (is vhci_hcd loaded?)");
return -1;
}
diff --git a/tools/usb/usbip/src/usbip_port.c b/tools/usb/usbip/src/usbip_port.c
index 4d14387df13d..21a20e378419 100644
--- a/tools/usb/usbip/src/usbip_port.c
+++ b/tools/usb/usbip/src/usbip_port.c
@@ -18,7 +18,7 @@ static int list_imported_devices(void)
ret = usbip_vhci_driver_open();
if (ret < 0) {
- err("open vhci_driver");
+ err("open vhci_driver (is vhci_hcd loaded?)");
goto err_names_free;
}
diff --git a/tools/virtio/Makefile b/tools/virtio/Makefile
index 7b7139d97d74..d128925980e0 100644
--- a/tools/virtio/Makefile
+++ b/tools/virtio/Makefile
@@ -4,7 +4,18 @@ test: virtio_test vringh_test
virtio_test: virtio_ring.o virtio_test.o
vringh_test: vringh_test.o vringh.o virtio_ring.o
-CFLAGS += -g -O2 -Werror -Wno-maybe-uninitialized -Wall -I. -I../include/ -I ../../usr/include/ -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE -include ../../include/linux/kconfig.h -mfunction-return=thunk -fcf-protection=none -mindirect-branch-register
+try-run = $(shell set -e; \
+ if ($(1)) >/dev/null 2>&1; \
+ then echo "$(2)"; \
+ else echo "$(3)"; \
+ fi)
+
+__cc-option = $(call try-run,\
+ $(1) -Werror $(2) -c -x c /dev/null -o /dev/null,$(2),)
+cc-option = $(call __cc-option, $(CC),$(1))
+
+CFLAGS += -g -O2 -Werror -Wno-maybe-uninitialized -Wall -I. -I../include/ -I ../../usr/include/ -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE -include ../../include/linux/kconfig.h $(call cc-option,-mfunction-return=thunk) $(call cc-option,-fcf-protection=none) $(call cc-option,-mindirect-branch-register)
+
CFLAGS += -pthread
LDFLAGS += -pthread
vpath %.c ../../drivers/virtio ../../drivers/vhost
diff --git a/tools/workqueue/wq_monitor.py b/tools/workqueue/wq_monitor.py
new file mode 100644
index 000000000000..6e258d123e8c
--- /dev/null
+++ b/tools/workqueue/wq_monitor.py
@@ -0,0 +1,168 @@
+#!/usr/bin/env drgn
+#
+# Copyright (C) 2023 Tejun Heo <tj@kernel.org>
+# Copyright (C) 2023 Meta Platforms, Inc. and affiliates.
+
+desc = """
+This is a drgn script to monitor workqueues. For more info on drgn, visit
+https://github.com/osandov/drgn.
+
+ total Total number of work items executed by the workqueue.
+
+ infl The number of currently in-flight work items.
+
+ CPUtime Total CPU time consumed by the workqueue in seconds. This is
+ sampled from scheduler ticks and only provides ballpark
+ measurement. "nohz_full=" CPUs are excluded from measurement.
+
+ CPUitsv The number of times a concurrency-managed work item hogged CPU
+ longer than the threshold (workqueue.cpu_intensive_thresh_us)
+ and got excluded from concurrency management to avoid stalling
+ other work items.
+
+ CMwake The number of concurrency-management wake-ups while executing a
+ work item of the workqueue.
+
+ mayday The number of times the rescuer was requested while waiting for
+ new worker creation.
+
+ rescued The number of work items executed by the rescuer.
+"""
+
+import sys
+import signal
+import os
+import re
+import time
+import json
+
+import drgn
+from drgn.helpers.linux.list import list_for_each_entry,list_empty
+from drgn.helpers.linux.cpumask import for_each_possible_cpu
+
+import argparse
+parser = argparse.ArgumentParser(description=desc,
+ formatter_class=argparse.RawTextHelpFormatter)
+parser.add_argument('workqueue', metavar='REGEX', nargs='*',
+ help='Target workqueue name patterns (all if empty)')
+parser.add_argument('-i', '--interval', metavar='SECS', type=float, default=1,
+ help='Monitoring interval (0 to print once and exit)')
+parser.add_argument('-j', '--json', action='store_true',
+ help='Output in json')
+args = parser.parse_args()
+
+def err(s):
+ print(s, file=sys.stderr, flush=True)
+ sys.exit(1)
+
+workqueues = prog['workqueues']
+
+WQ_UNBOUND = prog['WQ_UNBOUND']
+WQ_MEM_RECLAIM = prog['WQ_MEM_RECLAIM']
+
+PWQ_STAT_STARTED = prog['PWQ_STAT_STARTED'] # work items started execution
+PWQ_STAT_COMPLETED = prog['PWQ_STAT_COMPLETED'] # work items completed execution
+PWQ_STAT_CPU_TIME = prog['PWQ_STAT_CPU_TIME'] # total CPU time consumed
+PWQ_STAT_CPU_INTENSIVE = prog['PWQ_STAT_CPU_INTENSIVE'] # wq_cpu_intensive_thresh_us violations
+PWQ_STAT_CM_WAKEUP = prog['PWQ_STAT_CM_WAKEUP'] # concurrency-management worker wakeups
+PWQ_STAT_MAYDAY = prog['PWQ_STAT_MAYDAY'] # maydays to rescuer
+PWQ_STAT_RESCUED = prog['PWQ_STAT_RESCUED'] # linked work items executed by rescuer
+PWQ_NR_STATS = prog['PWQ_NR_STATS']
+
+class WqStats:
+ def __init__(self, wq):
+ self.name = wq.name.string_().decode()
+ self.unbound = wq.flags & WQ_UNBOUND != 0
+ self.mem_reclaim = wq.flags & WQ_MEM_RECLAIM != 0
+ self.stats = [0] * PWQ_NR_STATS
+ for pwq in list_for_each_entry('struct pool_workqueue', wq.pwqs.address_of_(), 'pwqs_node'):
+ for i in range(PWQ_NR_STATS):
+ self.stats[i] += int(pwq.stats[i])
+
+ def dict(self, now):
+ return { 'timestamp' : now,
+ 'name' : self.name,
+ 'unbound' : self.unbound,
+ 'mem_reclaim' : self.mem_reclaim,
+ 'started' : self.stats[PWQ_STAT_STARTED],
+ 'completed' : self.stats[PWQ_STAT_COMPLETED],
+ 'cpu_time' : self.stats[PWQ_STAT_CPU_TIME],
+ 'cpu_intensive' : self.stats[PWQ_STAT_CPU_INTENSIVE],
+ 'cm_wakeup' : self.stats[PWQ_STAT_CM_WAKEUP],
+ 'mayday' : self.stats[PWQ_STAT_MAYDAY],
+ 'rescued' : self.stats[PWQ_STAT_RESCUED], }
+
+ def table_header_str():
+ return f'{"":>24} {"total":>8} {"infl":>5} {"CPUtime":>8} '\
+ f'{"CPUitsv":>7} {"CMwake":>7} {"mayday":>7} {"rescued":>7}'
+
+ def table_row_str(self):
+ cpu_intensive = '-'
+ cm_wakeup = '-'
+ mayday = '-'
+ rescued = '-'
+
+ if not self.unbound:
+ cpu_intensive = str(self.stats[PWQ_STAT_CPU_INTENSIVE])
+ cm_wakeup = str(self.stats[PWQ_STAT_CM_WAKEUP])
+
+ if self.mem_reclaim:
+ mayday = str(self.stats[PWQ_STAT_MAYDAY])
+ rescued = str(self.stats[PWQ_STAT_RESCUED])
+
+ out = f'{self.name[-24:]:24} ' \
+ f'{self.stats[PWQ_STAT_STARTED]:8} ' \
+ f'{max(self.stats[PWQ_STAT_STARTED] - self.stats[PWQ_STAT_COMPLETED], 0):5} ' \
+ f'{self.stats[PWQ_STAT_CPU_TIME] / 1000000:8.1f} ' \
+ f'{cpu_intensive:>7} ' \
+ f'{cm_wakeup:>7} ' \
+ f'{mayday:>7} ' \
+ f'{rescued:>7} '
+ return out.rstrip(':')
+
+exit_req = False
+
+def sigint_handler(signr, frame):
+ global exit_req
+ exit_req = True
+
+def main():
+ # handle args
+ table_fmt = not args.json
+ interval = args.interval
+
+ re_str = None
+ if args.workqueue:
+ for r in args.workqueue:
+ if re_str is None:
+ re_str = r
+ else:
+ re_str += '|' + r
+
+ filter_re = re.compile(re_str) if re_str else None
+
+ # monitoring loop
+ signal.signal(signal.SIGINT, sigint_handler)
+
+ while not exit_req:
+ now = time.time()
+
+ if table_fmt:
+ print()
+ print(WqStats.table_header_str())
+
+ for wq in list_for_each_entry('struct workqueue_struct', workqueues.address_of_(), 'list'):
+ stats = WqStats(wq)
+ if filter_re and not filter_re.search(stats.name):
+ continue
+ if table_fmt:
+ print(stats.table_row_str())
+ else:
+ print(stats.dict(now))
+
+ if interval == 0:
+ break
+ time.sleep(interval)
+
+if __name__ == "__main__":
+ main()