aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/Makefile3
-rw-r--r--tools/arch/powerpc/include/uapi/asm/kvm.h5
-rw-r--r--tools/arch/powerpc/include/uapi/asm/perf_regs.h20
-rw-r--r--tools/arch/riscv/include/uapi/asm/unistd.h2
-rw-r--r--tools/arch/x86/include/asm/cpufeatures.h4
-rw-r--r--tools/arch/x86/include/asm/msr-index.h26
-rw-r--r--tools/bootconfig/samples/bad-override.bconf3
-rw-r--r--tools/bootconfig/samples/bad-override2.bconf3
-rw-r--r--tools/bootconfig/samples/good-override.bconf6
-rwxr-xr-xtools/bootconfig/test-bootconfig.sh13
-rw-r--r--tools/bpf/Makefile9
-rw-r--r--tools/bpf/bpftool/.gitignore5
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-btf.rst5
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-iter.rst18
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-link.rst21
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-map.rst8
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-prog.rst13
-rw-r--r--tools/bpf/bpftool/Makefile69
-rw-r--r--tools/bpf/bpftool/bash-completion/bpftool24
-rw-r--r--tools/bpf/bpftool/btf.c62
-rw-r--r--tools/bpf/bpftool/common.c483
-rw-r--r--tools/bpf/bpftool/feature.c12
-rw-r--r--tools/bpf/bpftool/gen.c28
-rw-r--r--tools/bpf/bpftool/iter.c36
-rw-r--r--tools/bpf/bpftool/link.c48
-rw-r--r--tools/bpf/bpftool/main.c12
-rw-r--r--tools/bpf/bpftool/main.h129
-rw-r--r--tools/bpf/bpftool/map.c167
-rw-r--r--tools/bpf/bpftool/pids.c231
-rw-r--r--tools/bpf/bpftool/prog.c197
-rw-r--r--tools/bpf/bpftool/skeleton/pid_iter.bpf.c81
-rw-r--r--tools/bpf/bpftool/skeleton/pid_iter.h12
-rw-r--r--tools/bpf/bpftool/skeleton/profiler.bpf.c7
-rw-r--r--tools/bpf/bpftool/skeleton/profiler.h46
-rw-r--r--tools/bpf/resolve_btfids/.gitignore4
-rw-r--r--tools/bpf/resolve_btfids/Build10
-rw-r--r--tools/bpf/resolve_btfids/Makefile91
-rw-r--r--tools/bpf/resolve_btfids/main.c666
-rw-r--r--tools/build/Build.include3
-rw-r--r--tools/build/Makefile.feature2
-rw-r--r--tools/build/feature/Makefile6
-rw-r--r--tools/build/feature/test-clang-bpf-co-re.c9
-rw-r--r--tools/build/feature/test-clang-bpf-global-var.c4
-rw-r--r--tools/cgroup/memcg_slabinfo.py226
-rw-r--r--tools/gpio/gpio-event-mon.c3
-rw-r--r--tools/gpio/gpio-utils.c4
-rw-r--r--tools/gpio/lsgpio.c3
-rw-r--r--tools/hv/hv_kvp_daemon.c2
-rw-r--r--tools/include/linux/btf_ids.h130
-rw-r--r--tools/include/linux/compiler.h4
-rw-r--r--tools/include/linux/jhash.h2
-rw-r--r--tools/include/uapi/asm-generic/unistd.h6
-rw-r--r--tools/include/uapi/drm/i915_drm.h4
-rw-r--r--tools/include/uapi/linux/bpf.h394
-rw-r--r--tools/include/uapi/linux/if_link.h1
-rw-r--r--tools/include/uapi/linux/if_xdp.h5
-rw-r--r--tools/include/uapi/linux/in.h1
-rw-r--r--tools/include/uapi/linux/perf_event.h26
-rw-r--r--tools/lib/api/fd/array.c23
-rw-r--r--tools/lib/api/fd/array.h16
-rw-r--r--tools/lib/bpf/bpf.c14
-rw-r--r--tools/lib/bpf/bpf.h8
-rw-r--r--tools/lib/bpf/bpf_core_read.h8
-rw-r--r--tools/lib/bpf/bpf_endian.h43
-rw-r--r--tools/lib/bpf/bpf_helpers.h3
-rw-r--r--tools/lib/bpf/bpf_tracing.h4
-rw-r--r--tools/lib/bpf/btf.c127
-rw-r--r--tools/lib/bpf/btf.h17
-rw-r--r--tools/lib/bpf/btf_dump.c12
-rw-r--r--tools/lib/bpf/libbpf.c851
-rw-r--r--tools/lib/bpf/libbpf.h41
-rw-r--r--tools/lib/bpf/libbpf.map27
-rw-r--r--tools/lib/bpf/libbpf_probes.c3
-rw-r--r--tools/lib/perf/evlist.c6
-rw-r--r--tools/lib/perf/include/internal/evlist.h2
-rw-r--r--tools/lib/perf/include/perf/event.h9
-rw-r--r--tools/lib/rbtree.c2
-rw-r--r--tools/lib/traceevent/Documentation/libtraceevent-plugins.txt25
-rw-r--r--tools/lib/traceevent/event-parse-local.h22
-rw-r--r--tools/lib/traceevent/event-parse.c1004
-rw-r--r--tools/lib/traceevent/event-parse.h36
-rw-r--r--tools/lib/traceevent/event-plugin.c285
-rw-r--r--tools/lib/traceevent/kbuffer.h17
-rw-r--r--tools/lib/traceevent/plugins/Build2
-rw-r--r--tools/lib/traceevent/plugins/Makefile2
-rw-r--r--tools/lib/traceevent/plugins/plugin_function.c123
-rw-r--r--tools/lib/traceevent/plugins/plugin_futex.c123
-rw-r--r--tools/lib/traceevent/plugins/plugin_hrtimer.c17
-rw-r--r--tools/lib/traceevent/plugins/plugin_jbd2.c17
-rw-r--r--tools/lib/traceevent/plugins/plugin_kmem.c17
-rw-r--r--tools/lib/traceevent/plugins/plugin_kvm.c42
-rw-r--r--tools/lib/traceevent/plugins/plugin_mac80211.c17
-rw-r--r--tools/lib/traceevent/plugins/plugin_sched_switch.c17
-rw-r--r--tools/lib/traceevent/plugins/plugin_tlb.c66
-rw-r--r--tools/perf/Documentation/itrace.txt14
-rw-r--r--tools/perf/Documentation/perf-bench.txt11
-rw-r--r--tools/perf/Documentation/perf-data.txt3
-rw-r--r--tools/perf/Documentation/perf-intel-pt.txt63
-rw-r--r--tools/perf/Documentation/perf-list.txt1
-rw-r--r--tools/perf/Documentation/perf-record.txt44
-rw-r--r--tools/perf/Documentation/perf-script.txt4
-rw-r--r--tools/perf/Documentation/perf-stat.txt44
-rw-r--r--tools/perf/Documentation/perf.data-file-format.txt13
-rw-r--r--tools/perf/arch/arm/util/auxtrace.c9
-rw-r--r--tools/perf/arch/arm/util/cs-etm.c6
-rw-r--r--tools/perf/arch/powerpc/entry/syscalls/syscall.tbl4
-rw-r--r--tools/perf/arch/powerpc/include/perf_regs.h8
-rw-r--r--tools/perf/arch/powerpc/util/book3s_hcalls.h2
-rw-r--r--tools/perf/arch/powerpc/util/header.c9
-rw-r--r--tools/perf/arch/powerpc/util/perf_regs.c55
-rw-r--r--tools/perf/arch/powerpc/util/utils_header.h15
-rw-r--r--tools/perf/arch/s390/entry/syscalls/syscall.tbl4
-rw-r--r--tools/perf/arch/x86/entry/syscalls/syscall_64.tbl5
-rw-r--r--tools/perf/arch/x86/util/intel-pt.c4
-rw-r--r--tools/perf/bench/Build2
-rw-r--r--tools/perf/bench/bench.h2
-rw-r--r--tools/perf/bench/find-bit-bench.c135
-rw-r--r--tools/perf/bench/syscall.c81
-rw-r--r--tools/perf/builtin-bench.c9
-rw-r--r--tools/perf/builtin-c2c.c2
-rw-r--r--tools/perf/builtin-data.c1
-rw-r--r--tools/perf/builtin-inject.c4
-rw-r--r--tools/perf/builtin-kmem.c3
-rw-r--r--tools/perf/builtin-kvm.c2
-rw-r--r--tools/perf/builtin-record.c265
-rw-r--r--tools/perf/builtin-report.c9
-rw-r--r--tools/perf/builtin-script.c233
-rw-r--r--tools/perf/builtin-stat.c200
-rw-r--r--tools/perf/builtin-top.c2
-rw-r--r--tools/perf/builtin-trace.c9
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power9/nest_metrics.json35
-rw-r--r--tools/perf/tests/Build1
-rw-r--r--tools/perf/tests/attr/README1
-rw-r--r--tools/perf/tests/attr/test-record-pfm-period9
-rw-r--r--tools/perf/tests/builtin-test.c4
-rw-r--r--tools/perf/tests/code-reading.c2
-rw-r--r--tools/perf/tests/expr.c15
-rw-r--r--tools/perf/tests/fdarray.c22
-rw-r--r--tools/perf/tests/parse-events.c42
-rw-r--r--tools/perf/tests/parse-metric.c352
-rw-r--r--tools/perf/tests/perf-record.c4
-rw-r--r--tools/perf/tests/pmu-events.c133
-rw-r--r--tools/perf/tests/tests.h1
-rw-r--r--tools/perf/ui/browsers/annotate.c2
-rw-r--r--tools/perf/util/Build63
-rw-r--r--tools/perf/util/annotate.c15
-rw-r--r--tools/perf/util/auxtrace.c50
-rw-r--r--tools/perf/util/auxtrace.h31
-rw-r--r--tools/perf/util/clockid.c119
-rw-r--r--tools/perf/util/clockid.h11
-rw-r--r--tools/perf/util/data-convert-bt.c57
-rw-r--r--tools/perf/util/data-convert.h1
-rw-r--r--tools/perf/util/dso.c3
-rw-r--r--tools/perf/util/dso.h1
-rw-r--r--tools/perf/util/env.h14
-rw-r--r--tools/perf/util/event.c60
-rw-r--r--tools/perf/util/event.h7
-rw-r--r--tools/perf/util/evlist.c183
-rw-r--r--tools/perf/util/evlist.h59
-rw-r--r--tools/perf/util/evsel.c23
-rw-r--r--tools/perf/util/expr.c156
-rw-r--r--tools/perf/util/expr.h34
-rw-r--r--tools/perf/util/expr.l3
-rw-r--r--tools/perf/util/expr.y33
-rw-r--r--tools/perf/util/header.c121
-rw-r--r--tools/perf/util/header.h1
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.c214
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.h1
-rw-r--r--tools/perf/util/intel-pt.c122
-rw-r--r--tools/perf/util/jitdump.c31
-rw-r--r--tools/perf/util/machine.c49
-rw-r--r--tools/perf/util/machine.h3
-rw-r--r--tools/perf/util/map.c5
-rw-r--r--tools/perf/util/map.h3
-rw-r--r--tools/perf/util/metricgroup.c549
-rw-r--r--tools/perf/util/metricgroup.h16
-rw-r--r--tools/perf/util/parse-events.c87
-rw-r--r--tools/perf/util/parse-events.h16
-rw-r--r--tools/perf/util/parse-events.l28
-rw-r--r--tools/perf/util/parse-events.y41
-rw-r--r--tools/perf/util/perf_api_probe.c10
-rw-r--r--tools/perf/util/perf_api_probe.h1
-rw-r--r--tools/perf/util/perf_event_attr_fprintf.c1
-rw-r--r--tools/perf/util/pmu.c11
-rw-r--r--tools/perf/util/pmu.h2
-rw-r--r--tools/perf/util/probe-event.c18
-rw-r--r--tools/perf/util/probe-finder.c5
-rw-r--r--tools/perf/util/record.h5
-rw-r--r--tools/perf/util/session.c39
-rw-r--r--tools/perf/util/stat-shadow.c81
-rw-r--r--tools/perf/util/stat.h7
-rw-r--r--tools/perf/util/symbol.c3
-rw-r--r--tools/perf/util/tool.h3
-rw-r--r--tools/testing/ktest/examples/README2
-rw-r--r--tools/testing/ktest/examples/crosstests.conf2
-rwxr-xr-xtools/testing/ktest/ktest.pl103
-rw-r--r--tools/testing/ktest/sample.conf18
-rwxr-xr-xtools/testing/kunit/kunit.py24
-rw-r--r--tools/testing/kunit/kunit_kernel.py6
-rwxr-xr-xtools/testing/kunit/kunit_tool_test.py14
-rw-r--r--tools/testing/nvdimm/test/nfit.c367
-rw-r--r--tools/testing/selftests/Makefile2
-rw-r--r--tools/testing/selftests/bpf/Makefile73
-rw-r--r--tools/testing/selftests/bpf/bpf_legacy.h14
-rw-r--r--tools/testing/selftests/bpf/cgroup_helpers.c23
-rw-r--r--tools/testing/selftests/bpf/cgroup_helpers.h1
-rw-r--r--tools/testing/selftests/bpf/get_cgroup_id_user.c14
-rw-r--r--tools/testing/selftests/bpf/network_helpers.c187
-rw-r--r--tools/testing/selftests/bpf/network_helpers.h11
-rw-r--r--tools/testing/selftests/bpf/prog_tests/autoload.c41
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_iter.c564
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c417
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_link.c20
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c12
-rw-r--r--tools/testing/selftests/bpf/prog_tests/connect_force_port.c10
-rw-r--r--tools/testing/selftests/bpf/prog_tests/core_retro.c37
-rw-r--r--tools/testing/selftests/bpf/prog_tests/endian.c53
-rw-r--r--tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c91
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ksyms.c71
-rw-r--r--tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/map_ptr.c32
-rw-r--r--tools/testing/selftests/bpf/prog_tests/perf_buffer.c42
-rw-r--r--tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c116
-rw-r--r--tools/testing/selftests/bpf/prog_tests/resolve_btfids.c129
-rw-r--r--tools/testing/selftests/bpf/prog_tests/section_names.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/send_signal.c18
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sk_lookup.c1329
-rw-r--r--tools/testing/selftests/bpf/prog_tests/skb_ctx.c5
-rw-r--r--tools/testing/selftests/bpf/prog_tests/skeleton.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tcp_rtt.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/trace_printk.c75
-rw-r--r--tools/testing/selftests/bpf/prog_tests/udp_limit.c75
-rw-r--r--tools/testing/selftests/bpf/prog_tests/varlen.c68
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c70
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_link.c151
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter.h98
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c40
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c100
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c18
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c46
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_hash_map.c50
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_map.c34
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c25
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_netlink.c28
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_task.c18
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_task_file.c20
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c37
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c234
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c250
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c17
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c17
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_test_kern5.c35
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_test_kern6.c21
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h18
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_udp4.c71
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_udp6.c79
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_tracing_net.h51
-rw-r--r--tools/testing/selftests/bpf/progs/btf_data.c50
-rw-r--r--tools/testing/selftests/bpf/progs/cg_storage_multi.h13
-rw-r--r--tools/testing/selftests/bpf/progs/cg_storage_multi_egress_only.c33
-rw-r--r--tools/testing/selftests/bpf/progs/cg_storage_multi_isolated.c57
-rw-r--r--tools/testing/selftests/bpf/progs/cg_storage_multi_shared.c57
-rw-r--r--tools/testing/selftests/bpf/progs/connect4_prog.c27
-rw-r--r--tools/testing/selftests/bpf/progs/map_ptr_kern.c686
-rw-r--r--tools/testing/selftests/bpf/progs/perf_event_stackmap.c59
-rw-r--r--tools/testing/selftests/bpf/progs/test_autoload.c40
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_retro.c43
-rw-r--r--tools/testing/selftests/bpf/progs/test_endian.c37
-rw-r--r--tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c3
-rw-r--r--tools/testing/selftests/bpf/progs/test_ksyms.c32
-rw-r--r--tools/testing/selftests/bpf/progs/test_perf_buffer.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_sk_lookup.c641
-rw-r--r--tools/testing/selftests/bpf/progs/test_skeleton.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_varlen.c158
-rw-r--r--tools/testing/selftests/bpf/progs/test_vmlinux.c16
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_link.c12
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c36
-rw-r--r--tools/testing/selftests/bpf/progs/trace_printk.c21
-rw-r--r--tools/testing/selftests/bpf/progs/udp_limit.c61
-rw-r--r--tools/testing/selftests/bpf/settings1
-rwxr-xr-xtools/testing/selftests/bpf/tcp_client.py2
-rwxr-xr-xtools/testing/selftests/bpf/tcp_server.py2
-rw-r--r--tools/testing/selftests/bpf/test_cgroup_storage.c17
-rw-r--r--tools/testing/selftests/bpf/test_dev_cgroup.c15
-rwxr-xr-xtools/testing/selftests/bpf/test_kmod.sh12
-rwxr-xr-xtools/testing/selftests/bpf/test_lwt_seg6local.sh2
-rw-r--r--tools/testing/selftests/bpf/test_netcnt.c21
-rw-r--r--tools/testing/selftests/bpf/test_progs.c66
-rw-r--r--tools/testing/selftests/bpf/test_progs.h4
-rw-r--r--tools/testing/selftests/bpf/test_skb_cgroup_id_user.c8
-rw-r--r--tools/testing/selftests/bpf/test_sock.c8
-rw-r--r--tools/testing/selftests/bpf/test_sock_addr.c8
-rw-r--r--tools/testing/selftests/bpf/test_sock_fields.c14
-rw-r--r--tools/testing/selftests/bpf/test_socket_cookie.c8
-rw-r--r--tools/testing/selftests/bpf/test_sockmap.c18
-rw-r--r--tools/testing/selftests/bpf/test_sysctl.c8
-rw-r--r--tools/testing/selftests/bpf/test_tcpbpf_user.c8
-rw-r--r--tools/testing/selftests/bpf/test_tcpnotify_user.c21
-rwxr-xr-xtools/testing/selftests/bpf/test_xdp_redirect.sh84
-rw-r--r--tools/testing/selftests/bpf/testing_helpers.c14
-rw-r--r--tools/testing/selftests/bpf/testing_helpers.h3
-rw-r--r--tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c492
-rw-r--r--tools/testing/selftests/bpf/verifier/map_ptr.c62
-rw-r--r--tools/testing/selftests/bpf/verifier/map_ptr_mixing.c2
-rw-r--r--tools/testing/selftests/bpf/verifier/value_ptr_arith.c38
-rw-r--r--tools/testing/selftests/breakpoints/step_after_suspend_test.c53
-rw-r--r--tools/testing/selftests/cgroup/.gitignore1
-rw-r--r--tools/testing/selftests/cgroup/Makefile2
-rw-r--r--tools/testing/selftests/cgroup/cgroup_util.c2
-rw-r--r--tools/testing/selftests/cgroup/test_kmem.c450
-rw-r--r--tools/testing/selftests/clone3/clone3.c2
-rw-r--r--tools/testing/selftests/clone3/clone3_clear_sighand.c3
-rw-r--r--tools/testing/selftests/clone3/clone3_set_tid.c2
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh129
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh22
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh8
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh2
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_police_scale.sh16
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh2
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/spectrum/tc_police_scale.sh16
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/tc_police_occ.sh108
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh92
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh76
-rw-r--r--tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh786
-rw-r--r--tools/testing/selftests/filesystems/binderfs/binderfs_test.c284
-rw-r--r--tools/testing/selftests/firmware/settings8
-rwxr-xr-xtools/testing/selftests/kmod/kmod.sh6
-rw-r--r--tools/testing/selftests/kselftest.h106
-rw-r--r--tools/testing/selftests/kselftest/runner.sh8
-rw-r--r--tools/testing/selftests/kselftest_harness.h166
-rw-r--r--tools/testing/selftests/lib.mk20
-rw-r--r--tools/testing/selftests/livepatch/README16
-rw-r--r--tools/testing/selftests/livepatch/functions.sh40
-rwxr-xr-xtools/testing/selftests/livepatch/test-callbacks.sh84
-rwxr-xr-xtools/testing/selftests/livepatch/test-ftrace.sh6
-rwxr-xr-xtools/testing/selftests/livepatch/test-livepatch.sh12
-rwxr-xr-xtools/testing/selftests/livepatch/test-shadow-vars.sh85
-rwxr-xr-xtools/testing/selftests/livepatch/test-state.sh21
-rwxr-xr-xtools/testing/selftests/lkdtm/run.sh6
-rw-r--r--tools/testing/selftests/lkdtm/tests.txt1
-rw-r--r--tools/testing/selftests/mincore/.gitignore2
-rw-r--r--tools/testing/selftests/mincore/Makefile6
-rw-r--r--tools/testing/selftests/mincore/mincore_selftest.c361
-rw-r--r--tools/testing/selftests/net/Makefile2
-rwxr-xr-xtools/testing/selftests/net/devlink_port_split.py277
-rw-r--r--tools/testing/selftests/net/forwarding/devlink_lib.sh5
-rwxr-xr-xtools/testing/selftests/net/forwarding/ethtool.sh17
-rwxr-xr-xtools/testing/selftests/net/forwarding/ethtool_extended_state.sh102
-rw-r--r--tools/testing/selftests/net/forwarding/ethtool_lib.sh17
-rw-r--r--tools/testing/selftests/net/forwarding/forwarding.config.sample3
-rwxr-xr-xtools/testing/selftests/net/forwarding/pedit_l4port.sh198
-rwxr-xr-xtools/testing/selftests/net/forwarding/sch_red.sh492
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_police.sh333
-rw-r--r--tools/testing/selftests/net/mptcp/Makefile2
-rw-r--r--tools/testing/selftests/net/mptcp/config2
-rwxr-xr-xtools/testing/selftests/net/mptcp/diag.sh121
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_connect.c27
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect.sh112
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh66
-rw-r--r--tools/testing/selftests/net/msg_zerocopy.c5
-rwxr-xr-xtools/testing/selftests/net/pmtu.sh346
-rwxr-xr-xtools/testing/selftests/net/rtnetlink.sh68
-rw-r--r--tools/testing/selftests/net/rxtimestamp.c122
-rwxr-xr-xtools/testing/selftests/net/rxtimestamp.sh4
-rw-r--r--tools/testing/selftests/net/txtimestamp.c10
-rwxr-xr-xtools/testing/selftests/net/vrf_strict_mode_test.sh396
-rw-r--r--tools/testing/selftests/netfilter/Makefile2
-rwxr-xr-xtools/testing/selftests/netfilter/nft_meta.sh124
-rw-r--r--tools/testing/selftests/pid_namespace/regression_enomem.c1
-rw-r--r--tools/testing/selftests/pidfd/pidfd_getfd_test.c1
-rw-r--r--tools/testing/selftests/pidfd/pidfd_setns_test.c1
-rw-r--r--tools/testing/selftests/pidfd/pidfd_test.c55
-rw-r--r--tools/testing/selftests/powerpc/alignment/alignment_handler.c150
-rw-r--r--tools/testing/selftests/powerpc/benchmarks/context_switch.c21
-rwxr-xr-xtools/testing/selftests/powerpc/eeh/eeh-functions.sh11
-rw-r--r--tools/testing/selftests/powerpc/include/instructions.h77
-rw-r--r--tools/testing/selftests/powerpc/include/pkeys.h136
-rw-r--r--tools/testing/selftests/powerpc/include/reg.h6
-rw-r--r--tools/testing/selftests/powerpc/include/utils.h29
-rw-r--r--tools/testing/selftests/powerpc/math/.gitignore1
-rw-r--r--tools/testing/selftests/powerpc/math/Makefile10
-rw-r--r--tools/testing/selftests/powerpc/math/fpu_denormal.c38
-rw-r--r--tools/testing/selftests/powerpc/math/vmx_preempt.c3
-rw-r--r--tools/testing/selftests/powerpc/math/vmx_signal.c3
-rw-r--r--tools/testing/selftests/powerpc/math/vmx_syscall.c7
-rw-r--r--tools/testing/selftests/powerpc/math/vsx_preempt.c2
-rw-r--r--tools/testing/selftests/powerpc/mm/.gitignore5
-rw-r--r--tools/testing/selftests/powerpc/mm/Makefile18
-rw-r--r--tools/testing/selftests/powerpc/mm/bad_accesses.c28
-rw-r--r--tools/testing/selftests/powerpc/mm/pkey_exec_prot.c294
-rw-r--r--tools/testing/selftests/powerpc/mm/pkey_siginfo.c333
-rw-r--r--tools/testing/selftests/powerpc/mm/prot_sao.c42
-rw-r--r--tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c202
-rw-r--r--tools/testing/selftests/powerpc/mm/stack_expansion_signal.c118
-rw-r--r--tools/testing/selftests/powerpc/pmu/count_stcx_fail.c4
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c2
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c2
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c2
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c2
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/ebb.c2
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c2
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c1
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c7
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c2
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c2
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c2
-rw-r--r--tools/testing/selftests/powerpc/pmu/lib.h1
-rw-r--r--tools/testing/selftests/powerpc/pmu/per_event_excludes.c7
-rw-r--r--tools/testing/selftests/powerpc/ptrace/core-pkey.c2
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c57
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-tar.c3
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c2
-rw-r--r--tools/testing/selftests/powerpc/security/spectre_v2.c10
-rw-r--r--tools/testing/selftests/powerpc/stringloops/Makefile2
-rw-r--r--tools/testing/selftests/powerpc/stringloops/memcmp.c46
-rw-r--r--tools/testing/selftests/powerpc/utils.c65
-rw-r--r--tools/testing/selftests/ptp/testptp.c51
-rw-r--r--tools/testing/selftests/seccomp/seccomp_bpf.c26
-rw-r--r--tools/testing/selftests/sigaltstack/sas.c4
-rw-r--r--tools/testing/selftests/splice/.gitignore1
-rw-r--r--tools/testing/selftests/splice/Makefile4
-rw-r--r--tools/testing/selftests/splice/config1
-rw-r--r--tools/testing/selftests/splice/settings1
-rwxr-xr-xtools/testing/selftests/splice/short_splice_read.sh56
-rw-r--r--tools/testing/selftests/splice/splice_read.c57
-rw-r--r--tools/testing/selftests/sync/sync_test.c2
-rw-r--r--tools/testing/selftests/tc-testing/Makefile (renamed from tools/testing/selftests/tc-testing/bpf/Makefile)9
-rw-r--r--tools/testing/selftests/tc-testing/action.c (renamed from tools/testing/selftests/tc-testing/bpf/action.c)0
-rwxr-xr-xtools/testing/selftests/tc-testing/tdc.sh5
-rw-r--r--tools/testing/selftests/tc-testing/tdc_config.py2
-rw-r--r--tools/testing/selftests/uevent/uevent_filtering.c1
-rw-r--r--tools/testing/selftests/vm/hmm-tests.c129
-rw-r--r--tools/usb/usbip/doc/usbip.84
-rw-r--r--tools/usb/usbip/doc/usbipd.82
-rw-r--r--tools/usb/usbip/libsrc/usbip_host_common.c2
-rwxr-xr-xtools/usb/usbip/vudc/vudc_server_example.sh2
-rw-r--r--tools/virtio/linux/virtio_config.h6
439 files changed, 23933 insertions, 3517 deletions
diff --git a/tools/Makefile b/tools/Makefile
index bd778812e915..85af6ebbce91 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -67,6 +67,9 @@ cpupower: FORCE
cgroup firewire hv guest bootconfig spi usb virtio vm bpf iio gpio objtool leds wmi pci firmware debugging: FORCE
$(call descend,$@)
+bpf/%: FORCE
+ $(call descend,$@)
+
liblockdep: FORCE
$(call descend,lib/lockdep)
diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h
index 264e266a85bf..c3af3f324c5a 100644
--- a/tools/arch/powerpc/include/uapi/asm/kvm.h
+++ b/tools/arch/powerpc/include/uapi/asm/kvm.h
@@ -640,6 +640,11 @@ struct kvm_ppc_cpu_char {
#define KVM_REG_PPC_ONLINE (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbf)
#define KVM_REG_PPC_PTCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc0)
+/* POWER10 registers */
+#define KVM_REG_PPC_MMCR3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc1)
+#define KVM_REG_PPC_SIER2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc2)
+#define KVM_REG_PPC_SIER3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc3)
+
/* Transactional Memory checkpointed state:
* This is all GPRs, all VSX regs and a subset of SPRs
*/
diff --git a/tools/arch/powerpc/include/uapi/asm/perf_regs.h b/tools/arch/powerpc/include/uapi/asm/perf_regs.h
index f599064dd8dc..bdf5f10f8b9f 100644
--- a/tools/arch/powerpc/include/uapi/asm/perf_regs.h
+++ b/tools/arch/powerpc/include/uapi/asm/perf_regs.h
@@ -48,6 +48,24 @@ enum perf_event_powerpc_regs {
PERF_REG_POWERPC_DSISR,
PERF_REG_POWERPC_SIER,
PERF_REG_POWERPC_MMCRA,
- PERF_REG_POWERPC_MAX,
+ /* Extended registers */
+ PERF_REG_POWERPC_MMCR0,
+ PERF_REG_POWERPC_MMCR1,
+ PERF_REG_POWERPC_MMCR2,
+ PERF_REG_POWERPC_MMCR3,
+ PERF_REG_POWERPC_SIER2,
+ PERF_REG_POWERPC_SIER3,
+ /* Max regs without the extended regs */
+ PERF_REG_POWERPC_MAX = PERF_REG_POWERPC_MMCRA + 1,
};
+
+#define PERF_REG_PMU_MASK ((1ULL << PERF_REG_POWERPC_MAX) - 1)
+
+/* PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_300 */
+#define PERF_REG_PMU_MASK_300 (((1ULL << (PERF_REG_POWERPC_MMCR2 + 1)) - 1) - PERF_REG_PMU_MASK)
+/* PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_31 */
+#define PERF_REG_PMU_MASK_31 (((1ULL << (PERF_REG_POWERPC_SIER3 + 1)) - 1) - PERF_REG_PMU_MASK)
+
+#define PERF_REG_MAX_ISA_300 (PERF_REG_POWERPC_MMCR2 + 1)
+#define PERF_REG_MAX_ISA_31 (PERF_REG_POWERPC_SIER3 + 1)
#endif /* _UAPI_ASM_POWERPC_PERF_REGS_H */
diff --git a/tools/arch/riscv/include/uapi/asm/unistd.h b/tools/arch/riscv/include/uapi/asm/unistd.h
index 0e2eeeb1fd27..f506cca520b0 100644
--- a/tools/arch/riscv/include/uapi/asm/unistd.h
+++ b/tools/arch/riscv/include/uapi/asm/unistd.h
@@ -12,7 +12,7 @@
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
#ifdef __LP64__
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 02dabc9e77b0..2901d5df4366 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -96,6 +96,7 @@
#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */
#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */
#define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */
+/* free ( 3*32+17) */
#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" LFENCE synchronizes RDTSC */
#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */
#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
@@ -107,6 +108,7 @@
#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* Extended APICID (8 bits) */
#define X86_FEATURE_AMD_DCM ( 3*32+27) /* AMD multi-node processor */
#define X86_FEATURE_APERFMPERF ( 3*32+28) /* P-State hardware coordination feedback capability (APERF/MPERF MSRs) */
+/* free ( 3*32+29) */
#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */
@@ -365,7 +367,9 @@
#define X86_FEATURE_SRBDS_CTRL (18*32+ 9) /* "" SRBDS mitigation MSR available */
#define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */
#define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */
+#define X86_FEATURE_SERIALIZE (18*32+14) /* SERIALIZE instruction */
#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */
+#define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */
#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */
#define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index e8370e64a155..2859ee4f39a8 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -149,6 +149,10 @@
#define MSR_LBR_SELECT 0x000001c8
#define MSR_LBR_TOS 0x000001c9
+
+#define MSR_IA32_POWER_CTL 0x000001fc
+#define MSR_IA32_POWER_CTL_BIT_EE 19
+
#define MSR_LBR_NHM_FROM 0x00000680
#define MSR_LBR_NHM_TO 0x000006c0
#define MSR_LBR_CORE_FROM 0x00000040
@@ -158,7 +162,23 @@
#define LBR_INFO_MISPRED BIT_ULL(63)
#define LBR_INFO_IN_TX BIT_ULL(62)
#define LBR_INFO_ABORT BIT_ULL(61)
+#define LBR_INFO_CYC_CNT_VALID BIT_ULL(60)
#define LBR_INFO_CYCLES 0xffff
+#define LBR_INFO_BR_TYPE_OFFSET 56
+#define LBR_INFO_BR_TYPE (0xfull << LBR_INFO_BR_TYPE_OFFSET)
+
+#define MSR_ARCH_LBR_CTL 0x000014ce
+#define ARCH_LBR_CTL_LBREN BIT(0)
+#define ARCH_LBR_CTL_CPL_OFFSET 1
+#define ARCH_LBR_CTL_CPL (0x3ull << ARCH_LBR_CTL_CPL_OFFSET)
+#define ARCH_LBR_CTL_STACK_OFFSET 3
+#define ARCH_LBR_CTL_STACK (0x1ull << ARCH_LBR_CTL_STACK_OFFSET)
+#define ARCH_LBR_CTL_FILTER_OFFSET 16
+#define ARCH_LBR_CTL_FILTER (0x7full << ARCH_LBR_CTL_FILTER_OFFSET)
+#define MSR_ARCH_LBR_DEPTH 0x000014cf
+#define MSR_ARCH_LBR_FROM_0 0x00001500
+#define MSR_ARCH_LBR_TO_0 0x00001600
+#define MSR_ARCH_LBR_INFO_0 0x00001200
#define MSR_IA32_PEBS_ENABLE 0x000003f1
#define MSR_PEBS_DATA_CFG 0x000003f2
@@ -253,8 +273,6 @@
#define MSR_PEBS_FRONTEND 0x000003f7
-#define MSR_IA32_POWER_CTL 0x000001fc
-
#define MSR_IA32_MC0_CTL 0x00000400
#define MSR_IA32_MC0_STATUS 0x00000401
#define MSR_IA32_MC0_ADDR 0x00000402
@@ -418,7 +436,6 @@
#define MSR_AMD64_PATCH_LEVEL 0x0000008b
#define MSR_AMD64_TSC_RATIO 0xc0000104
#define MSR_AMD64_NB_CFG 0xc001001f
-#define MSR_AMD64_CPUID_FN_1 0xc0011004
#define MSR_AMD64_PATCH_LOADER 0xc0010020
#define MSR_AMD_PERF_CTL 0xc0010062
#define MSR_AMD_PERF_STATUS 0xc0010063
@@ -427,6 +444,7 @@
#define MSR_AMD64_OSVW_STATUS 0xc0010141
#define MSR_AMD_PPIN_CTL 0xc00102f0
#define MSR_AMD_PPIN 0xc00102f1
+#define MSR_AMD64_CPUID_FN_1 0xc0011004
#define MSR_AMD64_LS_CFG 0xc0011020
#define MSR_AMD64_DC_CFG 0xc0011022
#define MSR_AMD64_BU_CFG2 0xc001102a
@@ -466,6 +484,8 @@
#define MSR_F16H_DR0_ADDR_MASK 0xc0011027
/* Fam 15h MSRs */
+#define MSR_F15H_CU_PWR_ACCUMULATOR 0xc001007a
+#define MSR_F15H_CU_MAX_PWR_ACCUMULATOR 0xc001007b
#define MSR_F15H_PERF_CTL 0xc0010200
#define MSR_F15H_PERF_CTL0 MSR_F15H_PERF_CTL
#define MSR_F15H_PERF_CTL1 (MSR_F15H_PERF_CTL + 2)
diff --git a/tools/bootconfig/samples/bad-override.bconf b/tools/bootconfig/samples/bad-override.bconf
new file mode 100644
index 000000000000..fde6c561512e
--- /dev/null
+++ b/tools/bootconfig/samples/bad-override.bconf
@@ -0,0 +1,3 @@
+key.subkey = value
+# We can not override pre-defined subkeys with value
+key := value
diff --git a/tools/bootconfig/samples/bad-override2.bconf b/tools/bootconfig/samples/bad-override2.bconf
new file mode 100644
index 000000000000..688587cb023c
--- /dev/null
+++ b/tools/bootconfig/samples/bad-override2.bconf
@@ -0,0 +1,3 @@
+key = value
+# We can not override pre-defined value with subkey
+key.subkey := value
diff --git a/tools/bootconfig/samples/good-override.bconf b/tools/bootconfig/samples/good-override.bconf
new file mode 100644
index 000000000000..7d31d5f8fbd8
--- /dev/null
+++ b/tools/bootconfig/samples/good-override.bconf
@@ -0,0 +1,6 @@
+# Override the value
+key.word = 1,2,4
+key.word := 2,3
+
+# No pre-defined key
+key.new.word := "new"
diff --git a/tools/bootconfig/test-bootconfig.sh b/tools/bootconfig/test-bootconfig.sh
index 3c2ab9e75730..56284b98d8f0 100755
--- a/tools/bootconfig/test-bootconfig.sh
+++ b/tools/bootconfig/test-bootconfig.sh
@@ -117,6 +117,19 @@ xpass grep -q "bar" $OUTFILE
xpass grep -q "baz" $OUTFILE
xpass grep -q "qux" $OUTFILE
+echo "Override same-key values"
+cat > $TEMPCONF << EOF
+key = bar, baz
+key := qux
+EOF
+echo > $INITRD
+
+xpass $BOOTCONF -a $TEMPCONF $INITRD
+$BOOTCONF $INITRD > $OUTFILE
+xfail grep -q "bar" $OUTFILE
+xfail grep -q "baz" $OUTFILE
+xpass grep -q "qux" $OUTFILE
+
echo "Double/single quotes test"
echo "key = '\"string\"';" > $TEMPCONF
$BOOTCONF -a $TEMPCONF $INITRD
diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile
index 8a69258fd8aa..0a6d09a3e91f 100644
--- a/tools/bpf/Makefile
+++ b/tools/bpf/Makefile
@@ -124,5 +124,12 @@ runqslower_install:
runqslower_clean:
$(call descend,runqslower,clean)
+resolve_btfids:
+ $(call descend,resolve_btfids)
+
+resolve_btfids_clean:
+ $(call descend,resolve_btfids,clean)
+
.PHONY: all install clean bpftool bpftool_install bpftool_clean \
- runqslower runqslower_install runqslower_clean
+ runqslower runqslower_install runqslower_clean \
+ resolve_btfids resolve_btfids_clean
diff --git a/tools/bpf/bpftool/.gitignore b/tools/bpf/bpftool/.gitignore
index 26cde83e1ca3..3e601bcfd461 100644
--- a/tools/bpf/bpftool/.gitignore
+++ b/tools/bpf/bpftool/.gitignore
@@ -1,10 +1,11 @@
# SPDX-License-Identifier: GPL-2.0-only
*.d
-/_bpftool
+/bpftool-bootstrap
/bpftool
bpftool*.8
bpf-helpers.*
FEATURE-DUMP.bpftool
feature
libbpf
-profiler.skel.h
+/*.skel.h
+/vmlinux.h
diff --git a/tools/bpf/bpftool/Documentation/bpftool-btf.rst b/tools/bpf/bpftool/Documentation/bpftool-btf.rst
index ce3a724f50c1..896f4c6c2870 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-btf.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-btf.rst
@@ -36,6 +36,11 @@ DESCRIPTION
otherwise list all BTF objects currently loaded on the
system.
+ Since Linux 5.8 bpftool is able to discover information about
+ processes that hold open file descriptors (FDs) against BTF
+ objects. On such kernels bpftool will automatically emit this
+ information as well.
+
**bpftool btf dump** *BTF_SRC*
Dump BTF entries from a given *BTF_SRC*.
diff --git a/tools/bpf/bpftool/Documentation/bpftool-iter.rst b/tools/bpf/bpftool/Documentation/bpftool-iter.rst
index 8dce698eab79..070ffacb42b5 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-iter.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-iter.rst
@@ -17,14 +17,15 @@ SYNOPSIS
ITER COMMANDS
===================
-| **bpftool** **iter pin** *OBJ* *PATH*
+| **bpftool** **iter pin** *OBJ* *PATH* [**map** *MAP*]
| **bpftool** **iter help**
|
| *OBJ* := /a/file/of/bpf_iter_target.o
+| *MAP* := { **id** *MAP_ID* | **pinned** *FILE* }
DESCRIPTION
===========
- **bpftool iter pin** *OBJ* *PATH*
+ **bpftool iter pin** *OBJ* *PATH* [**map** *MAP*]
A bpf iterator combines a kernel iterating of
particular kernel data (e.g., tasks, bpf_maps, etc.)
and a bpf program called for each kernel data object
@@ -37,6 +38,12 @@ DESCRIPTION
character ('.'), which is reserved for future extensions
of *bpffs*.
+ Map element bpf iterator requires an additional parameter
+ *MAP* so bpf program can iterate over map elements for
+ that map. User can have a bpf program in kernel to run
+ with each map element, do checking, filtering, aggregation,
+ etc. without copying data to user space.
+
User can then *cat PATH* to see the bpf iterator output.
**bpftool iter help**
@@ -64,6 +71,13 @@ EXAMPLES
Create a file-based bpf iterator from bpf_iter_netlink.o and pin it
to /sys/fs/bpf/my_netlink
+**# bpftool iter pin bpf_iter_hashmap.o /sys/fs/bpf/my_hashmap map id 20**
+
+::
+
+ Create a file-based bpf iterator from bpf_iter_hashmap.o and map with
+ id 20, and pin it to /sys/fs/bpf/my_hashmap
+
SEE ALSO
========
**bpf**\ (2),
diff --git a/tools/bpf/bpftool/Documentation/bpftool-link.rst b/tools/bpf/bpftool/Documentation/bpftool-link.rst
index 0e43d7b06c11..4a52e7a93339 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-link.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-link.rst
@@ -21,6 +21,7 @@ LINK COMMANDS
| **bpftool** **link { show | list }** [*LINK*]
| **bpftool** **link pin** *LINK* *FILE*
+| **bpftool** **link detach *LINK*
| **bpftool** **link help**
|
| *LINK* := { **id** *LINK_ID* | **pinned** *FILE* }
@@ -37,6 +38,11 @@ DESCRIPTION
zero or more named attributes, some of which depend on type
of link.
+ Since Linux 5.8 bpftool is able to discover information about
+ processes that hold open file descriptors (FDs) against BPF
+ links. On such kernels bpftool will automatically emit this
+ information as well.
+
**bpftool link pin** *LINK* *FILE*
Pin link *LINK* as *FILE*.
@@ -44,6 +50,13 @@ DESCRIPTION
contain a dot character ('.'), which is reserved for future
extensions of *bpffs*.
+ **bpftool link detach** *LINK*
+ Force-detach link *LINK*. BPF link and its underlying BPF
+ program will stay valid, but they will be detached from the
+ respective BPF hook and BPF link will transition into
+ a defunct state until last open file descriptor for that
+ link is closed.
+
**bpftool link help**
Print short help message.
@@ -82,6 +95,7 @@ EXAMPLES
10: cgroup prog 25
cgroup_id 614 attach_type egress
+ pids test_progs(223)
**# bpftool --json --pretty link show**
@@ -91,7 +105,12 @@ EXAMPLES
"type": "cgroup",
"prog_id": 25,
"cgroup_id": 614,
- "attach_type": "egress"
+ "attach_type": "egress",
+ "pids": [{
+ "pid": 223,
+ "comm": "test_progs"
+ }
+ ]
}
]
diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
index 70c78faa47ab..41e2a74252d0 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
@@ -62,6 +62,11 @@ DESCRIPTION
Output will start with map ID followed by map type and
zero or more named attributes (depending on kernel version).
+ Since Linux 5.8 bpftool is able to discover information about
+ processes that hold open file descriptors (FDs) against BPF
+ maps. On such kernels bpftool will automatically emit this
+ information as well.
+
**bpftool map create** *FILE* **type** *TYPE* **key** *KEY_SIZE* **value** *VALUE_SIZE* **entries** *MAX_ENTRIES* **name** *NAME* [**flags** *FLAGS*] [**dev** *NAME*]
Create a new map with given parameters and pin it to *bpffs*
as *FILE*.
@@ -180,7 +185,8 @@ EXAMPLES
::
10: hash name some_map flags 0x0
- key 4B value 8B max_entries 2048 memlock 167936B
+ key 4B value 8B max_entries 2048 memlock 167936B
+ pids systemd(1)
The following three commands are equivalent:
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index 2b254959d488..82e356b664e8 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -45,7 +45,7 @@ PROG COMMANDS
| **cgroup/getsockname4** | **cgroup/getsockname6** | **cgroup/sendmsg4** | **cgroup/sendmsg6** |
| **cgroup/recvmsg4** | **cgroup/recvmsg6** | **cgroup/sysctl** |
| **cgroup/getsockopt** | **cgroup/setsockopt** |
-| **struct_ops** | **fentry** | **fexit** | **freplace**
+| **struct_ops** | **fentry** | **fexit** | **freplace** | **sk_lookup**
| }
| *ATTACH_TYPE* := {
| **msg_verdict** | **stream_verdict** | **stream_parser** | **flow_dissector**
@@ -75,6 +75,11 @@ DESCRIPTION
program run. Activation or deactivation of the feature is
performed via the **kernel.bpf_stats_enabled** sysctl knob.
+ Since Linux 5.8 bpftool is able to discover information about
+ processes that hold open file descriptors (FDs) against BPF
+ programs. On such kernels bpftool will automatically emit this
+ information as well.
+
**bpftool prog dump xlated** *PROG* [{ **file** *FILE* | **opcodes** | **visual** | **linum** }]
Dump eBPF instructions of the programs from the kernel. By
default, eBPF will be disassembled and printed to standard
@@ -243,6 +248,7 @@ EXAMPLES
10: xdp name some_prog tag 005a3d2123620c8b gpl run_time_ns 81632 run_cnt 10
loaded_at 2017-09-29T20:11:00+0000 uid 0
xlated 528B jited 370B memlock 4096B map_ids 10
+ pids systemd(1)
**# bpftool --json --pretty prog show**
@@ -262,6 +268,11 @@ EXAMPLES
"bytes_jited": 370,
"bytes_memlock": 4096,
"map_ids": [10
+ ],
+ "pids": [{
+ "pid": 1,
+ "comm": "systemd"
+ }
]
}
]
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 9e85f101be85..8462690a039b 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -40,8 +40,9 @@ bash_compdir ?= /usr/share/bash-completion/completions
CFLAGS += -O2
CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers
-CFLAGS += $(filter-out -Wswitch-enum,$(EXTRA_WARNINGS))
+CFLAGS += $(filter-out -Wswitch-enum -Wnested-externs,$(EXTRA_WARNINGS))
CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ \
+ -I$(if $(OUTPUT),$(OUTPUT),.) \
-I$(srctree)/kernel/bpf/ \
-I$(srctree)/tools/include \
-I$(srctree)/tools/include/uapi \
@@ -58,12 +59,13 @@ endif
INSTALL ?= install
RM ?= rm -f
CLANG ?= clang
+LLVM_STRIP ?= llvm-strip
FEATURE_USER = .bpftool
FEATURE_TESTS = libbfd disassembler-four-args reallocarray zlib libcap \
- clang-bpf-global-var
+ clang-bpf-co-re
FEATURE_DISPLAY = libbfd disassembler-four-args zlib libcap \
- clang-bpf-global-var
+ clang-bpf-co-re
check_feat := 1
NON_CHECK_FEAT_TARGETS := clean uninstall doc doc-clean doc-install doc-uninstall
@@ -116,40 +118,60 @@ CFLAGS += -DHAVE_LIBBFD_SUPPORT
SRCS += $(BFD_SRCS)
endif
+BPFTOOL_BOOTSTRAP := $(if $(OUTPUT),$(OUTPUT)bpftool-bootstrap,./bpftool-bootstrap)
+
+BOOTSTRAP_OBJS = $(addprefix $(OUTPUT),main.o common.o json_writer.o gen.o btf.o)
OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o
-_OBJS = $(filter-out $(OUTPUT)prog.o,$(OBJS)) $(OUTPUT)_prog.o
-ifeq ($(feature-clang-bpf-global-var),1)
- __OBJS = $(OBJS)
-else
- __OBJS = $(_OBJS)
-endif
+VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \
+ $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \
+ ../../../vmlinux \
+ /sys/kernel/btf/vmlinux \
+ /boot/vmlinux-$(shell uname -r)
+VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
-$(OUTPUT)_prog.o: prog.c
- $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -DBPFTOOL_WITHOUT_SKELETONS -o $@ $<
+ifneq ($(VMLINUX_BTF)$(VMLINUX_H),)
+ifeq ($(feature-clang-bpf-co-re),1)
-$(OUTPUT)_bpftool: $(_OBJS) $(LIBBPF)
- $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(_OBJS) $(LIBS)
+BUILD_BPF_SKELS := 1
-skeleton/profiler.bpf.o: skeleton/profiler.bpf.c $(LIBBPF)
+$(OUTPUT)vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL_BOOTSTRAP)
+ifeq ($(VMLINUX_H),)
+ $(QUIET_GEN)$(BPFTOOL_BOOTSTRAP) btf dump file $< format c > $@
+else
+ $(Q)cp "$(VMLINUX_H)" $@
+endif
+
+$(OUTPUT)%.bpf.o: skeleton/%.bpf.c $(OUTPUT)vmlinux.h $(LIBBPF)
$(QUIET_CLANG)$(CLANG) \
+ -I$(if $(OUTPUT),$(OUTPUT),.) \
-I$(srctree)/tools/include/uapi/ \
- -I$(LIBBPF_PATH) -I$(srctree)/tools/lib \
- -g -O2 -target bpf -c $< -o $@
+ -I$(LIBBPF_PATH) \
+ -I$(srctree)/tools/lib \
+ -g -O2 -target bpf -c $< -o $@ && $(LLVM_STRIP) -g $@
-profiler.skel.h: $(OUTPUT)_bpftool skeleton/profiler.bpf.o
- $(QUIET_GEN)$(OUTPUT)./_bpftool gen skeleton skeleton/profiler.bpf.o > $@
+$(OUTPUT)%.skel.h: $(OUTPUT)%.bpf.o $(BPFTOOL_BOOTSTRAP)
+ $(QUIET_GEN)$(BPFTOOL_BOOTSTRAP) gen skeleton $< > $@
-$(OUTPUT)prog.o: prog.c profiler.skel.h
- $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $<
+$(OUTPUT)prog.o: $(OUTPUT)profiler.skel.h
+
+$(OUTPUT)pids.o: $(OUTPUT)pid_iter.skel.h
+
+endif
+endif
+
+CFLAGS += $(if $(BUILD_BPF_SKELS),,-DBPFTOOL_WITHOUT_SKELETONS)
$(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c
$(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $<
$(OUTPUT)feature.o: | zdep
-$(OUTPUT)bpftool: $(__OBJS) $(LIBBPF)
- $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(__OBJS) $(LIBS)
+$(BPFTOOL_BOOTSTRAP): $(BOOTSTRAP_OBJS) $(LIBBPF)
+ $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(BOOTSTRAP_OBJS) $(LIBS)
+
+$(OUTPUT)bpftool: $(OBJS) $(LIBBPF)
+ $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJS) $(LIBS)
$(OUTPUT)%.o: %.c
$(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $<
@@ -157,7 +179,7 @@ $(OUTPUT)%.o: %.c
clean: $(LIBBPF)-clean
$(call QUIET_CLEAN, bpftool)
$(Q)$(RM) -- $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d
- $(Q)$(RM) -- $(OUTPUT)_bpftool profiler.skel.h skeleton/profiler.bpf.o
+ $(Q)$(RM) -- $(BPFTOOL_BOOTSTRAP) $(OUTPUT)*.skel.h $(OUTPUT)vmlinux.h
$(Q)$(RM) -r -- $(OUTPUT)libbpf/
$(call QUIET_CLEAN, core-gen)
$(Q)$(RM) -- $(OUTPUT)FEATURE-DUMP.bpftool
@@ -192,6 +214,7 @@ FORCE:
zdep:
@if [ "$(feature-zlib)" != "1" ]; then echo "No zlib found"; exit 1 ; fi
+.SECONDARY:
.PHONY: all FORCE clean install uninstall zdep
.PHONY: doc doc-clean doc-install doc-uninstall
.DEFAULT_GOAL := all
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 25b25aca1112..f53ed2f1a4aa 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -479,7 +479,7 @@ _bpftool()
cgroup/post_bind4 cgroup/post_bind6 \
cgroup/sysctl cgroup/getsockopt \
cgroup/setsockopt struct_ops \
- fentry fexit freplace" -- \
+ fentry fexit freplace sk_lookup" -- \
"$cur" ) )
return 0
;;
@@ -615,7 +615,23 @@ _bpftool()
iter)
case $command in
pin)
- _filedir
+ case $prev in
+ $command)
+ _filedir
+ ;;
+ id)
+ _bpftool_get_map_ids
+ ;;
+ name)
+ _bpftool_get_map_names
+ ;;
+ pinned)
+ _filedir
+ ;;
+ *)
+ _bpftool_one_of_list $MAP_TYPE
+ ;;
+ esac
return 0
;;
*)
@@ -1106,7 +1122,7 @@ _bpftool()
;;
link)
case $command in
- show|list|pin)
+ show|list|pin|detach)
case $prev in
id)
_bpftool_get_link_ids
@@ -1123,7 +1139,7 @@ _bpftool()
COMPREPLY=( $( compgen -W "$LINK_TYPE" -- "$cur" ) )
return 0
;;
- pin)
+ pin|detach)
if [[ $prev == "$command" ]]; then
COMPREPLY=( $( compgen -W "$LINK_TYPE" -- "$cur" ) )
else
diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index faac8189b285..8ab142ff5eac 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -422,54 +422,6 @@ done:
return err;
}
-static struct btf *btf__parse_raw(const char *file)
-{
- struct btf *btf;
- struct stat st;
- __u8 *buf;
- FILE *f;
-
- if (stat(file, &st))
- return NULL;
-
- f = fopen(file, "rb");
- if (!f)
- return NULL;
-
- buf = malloc(st.st_size);
- if (!buf) {
- btf = ERR_PTR(-ENOMEM);
- goto exit_close;
- }
-
- if ((size_t) st.st_size != fread(buf, 1, st.st_size, f)) {
- btf = ERR_PTR(-EINVAL);
- goto exit_free;
- }
-
- btf = btf__new(buf, st.st_size);
-
-exit_free:
- free(buf);
-exit_close:
- fclose(f);
- return btf;
-}
-
-static bool is_btf_raw(const char *file)
-{
- __u16 magic = 0;
- int fd, nb_read;
-
- fd = open(file, O_RDONLY);
- if (fd < 0)
- return false;
-
- nb_read = read(fd, &magic, sizeof(magic));
- close(fd);
- return nb_read == sizeof(magic) && magic == BTF_MAGIC;
-}
-
static int do_dump(int argc, char **argv)
{
struct btf *btf = NULL;
@@ -547,11 +499,7 @@ static int do_dump(int argc, char **argv)
}
NEXT_ARG();
} else if (is_prefix(src, "file")) {
- if (is_btf_raw(*argv))
- btf = btf__parse_raw(*argv);
- else
- btf = btf__parse_elf(*argv, NULL);
-
+ btf = btf__parse(*argv, NULL);
if (IS_ERR(btf)) {
err = -PTR_ERR(btf);
btf = NULL;
@@ -596,7 +544,7 @@ static int do_dump(int argc, char **argv)
goto done;
}
if (!btf) {
- err = ENOENT;
+ err = -ENOENT;
p_err("can't find btf with ID (%u)", btf_id);
goto done;
}
@@ -809,6 +757,7 @@ show_btf_plain(struct bpf_btf_info *info, int fd,
printf("%s%u", n++ == 0 ? " map_ids " : ",",
obj->obj_id);
}
+ emit_obj_refs_plain(&refs_table, info->id, "\n\tpids ");
printf("\n");
}
@@ -841,6 +790,9 @@ show_btf_json(struct bpf_btf_info *info, int fd,
jsonw_uint(json_wtr, obj->obj_id);
}
jsonw_end_array(json_wtr); /* map_ids */
+
+ emit_obj_refs_json(&refs_table, info->id, json_wtr); /* pids */
+
jsonw_end_object(json_wtr); /* btf object */
}
@@ -893,6 +845,7 @@ static int do_show(int argc, char **argv)
close(fd);
return err;
}
+ build_obj_refs_table(&refs_table, BPF_OBJ_BTF);
if (fd >= 0) {
err = show_btf(fd, &btf_prog_table, &btf_map_table);
@@ -939,6 +892,7 @@ static int do_show(int argc, char **argv)
exit_free:
delete_btf_table(&btf_prog_table);
delete_btf_table(&btf_map_table);
+ delete_obj_refs_table(&refs_table);
return err;
}
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index c47bdc65de8e..65303664417e 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -1,10 +1,11 @@
// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
/* Copyright (C) 2017-2018 Netronome Systems, Inc. */
+#define _GNU_SOURCE
#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
-#include <fts.h>
+#include <ftw.h>
#include <libgen.h>
#include <mntent.h>
#include <stdbool.h>
@@ -29,6 +30,44 @@
#define BPF_FS_MAGIC 0xcafe4a11
#endif
+const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE] = {
+ [BPF_CGROUP_INET_INGRESS] = "ingress",
+ [BPF_CGROUP_INET_EGRESS] = "egress",
+ [BPF_CGROUP_INET_SOCK_CREATE] = "sock_create",
+ [BPF_CGROUP_INET_SOCK_RELEASE] = "sock_release",
+ [BPF_CGROUP_SOCK_OPS] = "sock_ops",
+ [BPF_CGROUP_DEVICE] = "device",
+ [BPF_CGROUP_INET4_BIND] = "bind4",
+ [BPF_CGROUP_INET6_BIND] = "bind6",
+ [BPF_CGROUP_INET4_CONNECT] = "connect4",
+ [BPF_CGROUP_INET6_CONNECT] = "connect6",
+ [BPF_CGROUP_INET4_POST_BIND] = "post_bind4",
+ [BPF_CGROUP_INET6_POST_BIND] = "post_bind6",
+ [BPF_CGROUP_INET4_GETPEERNAME] = "getpeername4",
+ [BPF_CGROUP_INET6_GETPEERNAME] = "getpeername6",
+ [BPF_CGROUP_INET4_GETSOCKNAME] = "getsockname4",
+ [BPF_CGROUP_INET6_GETSOCKNAME] = "getsockname6",
+ [BPF_CGROUP_UDP4_SENDMSG] = "sendmsg4",
+ [BPF_CGROUP_UDP6_SENDMSG] = "sendmsg6",
+ [BPF_CGROUP_SYSCTL] = "sysctl",
+ [BPF_CGROUP_UDP4_RECVMSG] = "recvmsg4",
+ [BPF_CGROUP_UDP6_RECVMSG] = "recvmsg6",
+ [BPF_CGROUP_GETSOCKOPT] = "getsockopt",
+ [BPF_CGROUP_SETSOCKOPT] = "setsockopt",
+
+ [BPF_SK_SKB_STREAM_PARSER] = "sk_skb_stream_parser",
+ [BPF_SK_SKB_STREAM_VERDICT] = "sk_skb_stream_verdict",
+ [BPF_SK_MSG_VERDICT] = "sk_msg_verdict",
+ [BPF_LIRC_MODE2] = "lirc_mode2",
+ [BPF_FLOW_DISSECTOR] = "flow_dissector",
+ [BPF_TRACE_RAW_TP] = "raw_tp",
+ [BPF_TRACE_FENTRY] = "fentry",
+ [BPF_TRACE_FEXIT] = "fexit",
+ [BPF_MODIFY_RETURN] = "mod_ret",
+ [BPF_LSM_MAC] = "lsm_mac",
+ [BPF_SK_LOOKUP] = "sk_lookup",
+};
+
void p_err(const char *fmt, ...)
{
va_list ap;
@@ -123,24 +162,35 @@ int mount_tracefs(const char *target)
return err;
}
-int open_obj_pinned(char *path, bool quiet)
+int open_obj_pinned(const char *path, bool quiet)
{
- int fd;
+ char *pname;
+ int fd = -1;
+
+ pname = strdup(path);
+ if (!pname) {
+ if (!quiet)
+ p_err("mem alloc failed");
+ goto out_ret;
+ }
- fd = bpf_obj_get(path);
+ fd = bpf_obj_get(pname);
if (fd < 0) {
if (!quiet)
- p_err("bpf obj get (%s): %s", path,
- errno == EACCES && !is_bpffs(dirname(path)) ?
+ p_err("bpf obj get (%s): %s", pname,
+ errno == EACCES && !is_bpffs(dirname(pname)) ?
"directory not in bpf file system (bpffs)" :
strerror(errno));
- return -1;
+ goto out_free;
}
+out_free:
+ free(pname);
+out_ret:
return fd;
}
-int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type)
+int open_obj_pinned_any(const char *path, enum bpf_obj_type exp_type)
{
enum bpf_obj_type type;
int fd;
@@ -330,71 +380,82 @@ void print_hex_data_json(uint8_t *data, size_t len)
jsonw_end_array(json_wtr);
}
+/* extra params for nftw cb */
+static struct pinned_obj_table *build_fn_table;
+static enum bpf_obj_type build_fn_type;
+
+static int do_build_table_cb(const char *fpath, const struct stat *sb,
+ int typeflag, struct FTW *ftwbuf)
+{
+ struct bpf_prog_info pinned_info;
+ __u32 len = sizeof(pinned_info);
+ struct pinned_obj *obj_node;
+ enum bpf_obj_type objtype;
+ int fd, err = 0;
+
+ if (typeflag != FTW_F)
+ goto out_ret;
+
+ fd = open_obj_pinned(fpath, true);
+ if (fd < 0)
+ goto out_ret;
+
+ objtype = get_fd_type(fd);
+ if (objtype != build_fn_type)
+ goto out_close;
+
+ memset(&pinned_info, 0, sizeof(pinned_info));
+ if (bpf_obj_get_info_by_fd(fd, &pinned_info, &len))
+ goto out_close;
+
+ obj_node = calloc(1, sizeof(*obj_node));
+ if (!obj_node) {
+ err = -1;
+ goto out_close;
+ }
+
+ obj_node->id = pinned_info.id;
+ obj_node->path = strdup(fpath);
+ if (!obj_node->path) {
+ err = -1;
+ free(obj_node);
+ goto out_close;
+ }
+
+ hash_add(build_fn_table->table, &obj_node->hash, obj_node->id);
+out_close:
+ close(fd);
+out_ret:
+ return err;
+}
+
int build_pinned_obj_table(struct pinned_obj_table *tab,
enum bpf_obj_type type)
{
- struct bpf_prog_info pinned_info = {};
- struct pinned_obj *obj_node = NULL;
- __u32 len = sizeof(pinned_info);
struct mntent *mntent = NULL;
- enum bpf_obj_type objtype;
FILE *mntfile = NULL;
- FTSENT *ftse = NULL;
- FTS *fts = NULL;
- int fd, err;
+ int flags = FTW_PHYS;
+ int nopenfd = 16;
+ int err = 0;
mntfile = setmntent("/proc/mounts", "r");
if (!mntfile)
return -1;
+ build_fn_table = tab;
+ build_fn_type = type;
+
while ((mntent = getmntent(mntfile))) {
- char *path[] = { mntent->mnt_dir, NULL };
+ char *path = mntent->mnt_dir;
if (strncmp(mntent->mnt_type, "bpf", 3) != 0)
continue;
-
- fts = fts_open(path, 0, NULL);
- if (!fts)
- continue;
-
- while ((ftse = fts_read(fts))) {
- if (!(ftse->fts_info & FTS_F))
- continue;
- fd = open_obj_pinned(ftse->fts_path, true);
- if (fd < 0)
- continue;
-
- objtype = get_fd_type(fd);
- if (objtype != type) {
- close(fd);
- continue;
- }
- memset(&pinned_info, 0, sizeof(pinned_info));
- err = bpf_obj_get_info_by_fd(fd, &pinned_info, &len);
- if (err) {
- close(fd);
- continue;
- }
-
- obj_node = malloc(sizeof(*obj_node));
- if (!obj_node) {
- close(fd);
- fts_close(fts);
- fclose(mntfile);
- return -1;
- }
-
- memset(obj_node, 0, sizeof(*obj_node));
- obj_node->id = pinned_info.id;
- obj_node->path = strdup(ftse->fts_path);
- hash_add(tab->table, &obj_node->hash, obj_node->id);
-
- close(fd);
- }
- fts_close(fts);
+ err = nftw(path, do_build_table_cb, nopenfd, flags);
+ if (err)
+ break;
}
fclose(mntfile);
- return 0;
+ return err;
}
void delete_pinned_obj_table(struct pinned_obj_table *tab)
@@ -581,3 +642,311 @@ print_all_levels(__maybe_unused enum libbpf_print_level level,
{
return vfprintf(stderr, format, args);
}
+
+static int prog_fd_by_nametag(void *nametag, int **fds, bool tag)
+{
+ unsigned int id = 0;
+ int fd, nb_fds = 0;
+ void *tmp;
+ int err;
+
+ while (true) {
+ struct bpf_prog_info info = {};
+ __u32 len = sizeof(info);
+
+ err = bpf_prog_get_next_id(id, &id);
+ if (err) {
+ if (errno != ENOENT) {
+ p_err("%s", strerror(errno));
+ goto err_close_fds;
+ }
+ return nb_fds;
+ }
+
+ fd = bpf_prog_get_fd_by_id(id);
+ if (fd < 0) {
+ p_err("can't get prog by id (%u): %s",
+ id, strerror(errno));
+ goto err_close_fds;
+ }
+
+ err = bpf_obj_get_info_by_fd(fd, &info, &len);
+ if (err) {
+ p_err("can't get prog info (%u): %s",
+ id, strerror(errno));
+ goto err_close_fd;
+ }
+
+ if ((tag && memcmp(nametag, info.tag, BPF_TAG_SIZE)) ||
+ (!tag && strncmp(nametag, info.name, BPF_OBJ_NAME_LEN))) {
+ close(fd);
+ continue;
+ }
+
+ if (nb_fds > 0) {
+ tmp = realloc(*fds, (nb_fds + 1) * sizeof(int));
+ if (!tmp) {
+ p_err("failed to realloc");
+ goto err_close_fd;
+ }
+ *fds = tmp;
+ }
+ (*fds)[nb_fds++] = fd;
+ }
+
+err_close_fd:
+ close(fd);
+err_close_fds:
+ while (--nb_fds >= 0)
+ close((*fds)[nb_fds]);
+ return -1;
+}
+
+int prog_parse_fds(int *argc, char ***argv, int **fds)
+{
+ if (is_prefix(**argv, "id")) {
+ unsigned int id;
+ char *endptr;
+
+ NEXT_ARGP();
+
+ id = strtoul(**argv, &endptr, 0);
+ if (*endptr) {
+ p_err("can't parse %s as ID", **argv);
+ return -1;
+ }
+ NEXT_ARGP();
+
+ (*fds)[0] = bpf_prog_get_fd_by_id(id);
+ if ((*fds)[0] < 0) {
+ p_err("get by id (%u): %s", id, strerror(errno));
+ return -1;
+ }
+ return 1;
+ } else if (is_prefix(**argv, "tag")) {
+ unsigned char tag[BPF_TAG_SIZE];
+
+ NEXT_ARGP();
+
+ if (sscanf(**argv, BPF_TAG_FMT, tag, tag + 1, tag + 2,
+ tag + 3, tag + 4, tag + 5, tag + 6, tag + 7)
+ != BPF_TAG_SIZE) {
+ p_err("can't parse tag");
+ return -1;
+ }
+ NEXT_ARGP();
+
+ return prog_fd_by_nametag(tag, fds, true);
+ } else if (is_prefix(**argv, "name")) {
+ char *name;
+
+ NEXT_ARGP();
+
+ name = **argv;
+ if (strlen(name) > BPF_OBJ_NAME_LEN - 1) {
+ p_err("can't parse name");
+ return -1;
+ }
+ NEXT_ARGP();
+
+ return prog_fd_by_nametag(name, fds, false);
+ } else if (is_prefix(**argv, "pinned")) {
+ char *path;
+
+ NEXT_ARGP();
+
+ path = **argv;
+ NEXT_ARGP();
+
+ (*fds)[0] = open_obj_pinned_any(path, BPF_OBJ_PROG);
+ if ((*fds)[0] < 0)
+ return -1;
+ return 1;
+ }
+
+ p_err("expected 'id', 'tag', 'name' or 'pinned', got: '%s'?", **argv);
+ return -1;
+}
+
+int prog_parse_fd(int *argc, char ***argv)
+{
+ int *fds = NULL;
+ int nb_fds, fd;
+
+ fds = malloc(sizeof(int));
+ if (!fds) {
+ p_err("mem alloc failed");
+ return -1;
+ }
+ nb_fds = prog_parse_fds(argc, argv, &fds);
+ if (nb_fds != 1) {
+ if (nb_fds > 1) {
+ p_err("several programs match this handle");
+ while (nb_fds--)
+ close(fds[nb_fds]);
+ }
+ fd = -1;
+ goto exit_free;
+ }
+
+ fd = fds[0];
+exit_free:
+ free(fds);
+ return fd;
+}
+
+static int map_fd_by_name(char *name, int **fds)
+{
+ unsigned int id = 0;
+ int fd, nb_fds = 0;
+ void *tmp;
+ int err;
+
+ while (true) {
+ struct bpf_map_info info = {};
+ __u32 len = sizeof(info);
+
+ err = bpf_map_get_next_id(id, &id);
+ if (err) {
+ if (errno != ENOENT) {
+ p_err("%s", strerror(errno));
+ goto err_close_fds;
+ }
+ return nb_fds;
+ }
+
+ fd = bpf_map_get_fd_by_id(id);
+ if (fd < 0) {
+ p_err("can't get map by id (%u): %s",
+ id, strerror(errno));
+ goto err_close_fds;
+ }
+
+ err = bpf_obj_get_info_by_fd(fd, &info, &len);
+ if (err) {
+ p_err("can't get map info (%u): %s",
+ id, strerror(errno));
+ goto err_close_fd;
+ }
+
+ if (strncmp(name, info.name, BPF_OBJ_NAME_LEN)) {
+ close(fd);
+ continue;
+ }
+
+ if (nb_fds > 0) {
+ tmp = realloc(*fds, (nb_fds + 1) * sizeof(int));
+ if (!tmp) {
+ p_err("failed to realloc");
+ goto err_close_fd;
+ }
+ *fds = tmp;
+ }
+ (*fds)[nb_fds++] = fd;
+ }
+
+err_close_fd:
+ close(fd);
+err_close_fds:
+ while (--nb_fds >= 0)
+ close((*fds)[nb_fds]);
+ return -1;
+}
+
+int map_parse_fds(int *argc, char ***argv, int **fds)
+{
+ if (is_prefix(**argv, "id")) {
+ unsigned int id;
+ char *endptr;
+
+ NEXT_ARGP();
+
+ id = strtoul(**argv, &endptr, 0);
+ if (*endptr) {
+ p_err("can't parse %s as ID", **argv);
+ return -1;
+ }
+ NEXT_ARGP();
+
+ (*fds)[0] = bpf_map_get_fd_by_id(id);
+ if ((*fds)[0] < 0) {
+ p_err("get map by id (%u): %s", id, strerror(errno));
+ return -1;
+ }
+ return 1;
+ } else if (is_prefix(**argv, "name")) {
+ char *name;
+
+ NEXT_ARGP();
+
+ name = **argv;
+ if (strlen(name) > BPF_OBJ_NAME_LEN - 1) {
+ p_err("can't parse name");
+ return -1;
+ }
+ NEXT_ARGP();
+
+ return map_fd_by_name(name, fds);
+ } else if (is_prefix(**argv, "pinned")) {
+ char *path;
+
+ NEXT_ARGP();
+
+ path = **argv;
+ NEXT_ARGP();
+
+ (*fds)[0] = open_obj_pinned_any(path, BPF_OBJ_MAP);
+ if ((*fds)[0] < 0)
+ return -1;
+ return 1;
+ }
+
+ p_err("expected 'id', 'name' or 'pinned', got: '%s'?", **argv);
+ return -1;
+}
+
+int map_parse_fd(int *argc, char ***argv)
+{
+ int *fds = NULL;
+ int nb_fds, fd;
+
+ fds = malloc(sizeof(int));
+ if (!fds) {
+ p_err("mem alloc failed");
+ return -1;
+ }
+ nb_fds = map_parse_fds(argc, argv, &fds);
+ if (nb_fds != 1) {
+ if (nb_fds > 1) {
+ p_err("several maps match this handle");
+ while (nb_fds--)
+ close(fds[nb_fds]);
+ }
+ fd = -1;
+ goto exit_free;
+ }
+
+ fd = fds[0];
+exit_free:
+ free(fds);
+ return fd;
+}
+
+int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len)
+{
+ int err;
+ int fd;
+
+ fd = map_parse_fd(argc, argv);
+ if (fd < 0)
+ return -1;
+
+ err = bpf_obj_get_info_by_fd(fd, info, info_len);
+ if (err) {
+ p_err("can't get map info: %s", strerror(errno));
+ close(fd);
+ return err;
+ }
+
+ return fd;
+}
diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c
index 768bf77df886..a43a6f10b564 100644
--- a/tools/bpf/bpftool/feature.c
+++ b/tools/bpf/bpftool/feature.c
@@ -504,6 +504,10 @@ probe_prog_type(enum bpf_prog_type prog_type, bool *supported_types,
supported_types[prog_type] |= res;
+ if (!prog_type_name[prog_type]) {
+ p_info("program type name not found (type %d)", prog_type);
+ return;
+ }
maxlen = sizeof(plain_desc) - strlen(plain_comment) - 1;
if (strlen(prog_type_name[prog_type]) > maxlen) {
p_info("program type name too long");
@@ -533,6 +537,10 @@ probe_map_type(enum bpf_map_type map_type, const char *define_prefix,
* check required for unprivileged users
*/
+ if (!map_type_name[map_type]) {
+ p_info("map type name not found (type %d)", map_type);
+ return;
+ }
maxlen = sizeof(plain_desc) - strlen(plain_comment) - 1;
if (strlen(map_type_name[map_type]) > maxlen) {
p_info("map type name too long");
@@ -695,7 +703,7 @@ section_program_types(bool *supported_types, const char *define_prefix,
"/*** eBPF program types ***/",
define_prefix);
- for (i = BPF_PROG_TYPE_UNSPEC + 1; i < ARRAY_SIZE(prog_type_name); i++)
+ for (i = BPF_PROG_TYPE_UNSPEC + 1; i < prog_type_name_size; i++)
probe_prog_type(i, supported_types, define_prefix, ifindex);
print_end_section();
@@ -741,7 +749,7 @@ section_helpers(bool *supported_types, const char *define_prefix, __u32 ifindex)
" %sBPF__PROG_TYPE_ ## prog_type ## __HELPER_ ## helper\n",
define_prefix, define_prefix, define_prefix,
define_prefix);
- for (i = BPF_PROG_TYPE_UNSPEC + 1; i < ARRAY_SIZE(prog_type_name); i++)
+ for (i = BPF_PROG_TYPE_UNSPEC + 1; i < prog_type_name_size; i++)
probe_helpers_for_progtype(i, supported_types[i], define_prefix,
ifindex);
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index 10de76b296ba..8a4c2b3b0cd6 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -88,7 +88,7 @@ static const char *get_map_ident(const struct bpf_map *map)
return NULL;
}
-static void codegen_btf_dump_printf(void *ct, const char *fmt, va_list args)
+static void codegen_btf_dump_printf(void *ctx, const char *fmt, va_list args)
{
vprintf(fmt, args);
}
@@ -104,17 +104,20 @@ static int codegen_datasec_def(struct bpf_object *obj,
int i, err, off = 0, pad_cnt = 0, vlen = btf_vlen(sec);
const char *sec_ident;
char var_ident[256];
+ bool strip_mods = false;
- if (strcmp(sec_name, ".data") == 0)
+ if (strcmp(sec_name, ".data") == 0) {
sec_ident = "data";
- else if (strcmp(sec_name, ".bss") == 0)
+ } else if (strcmp(sec_name, ".bss") == 0) {
sec_ident = "bss";
- else if (strcmp(sec_name, ".rodata") == 0)
+ } else if (strcmp(sec_name, ".rodata") == 0) {
sec_ident = "rodata";
- else if (strcmp(sec_name, ".kconfig") == 0)
+ strip_mods = true;
+ } else if (strcmp(sec_name, ".kconfig") == 0) {
sec_ident = "kconfig";
- else
+ } else {
return 0;
+ }
printf(" struct %s__%s {\n", obj_name, sec_ident);
for (i = 0; i < vlen; i++, sec_var++) {
@@ -123,16 +126,10 @@ static int codegen_datasec_def(struct bpf_object *obj,
DECLARE_LIBBPF_OPTS(btf_dump_emit_type_decl_opts, opts,
.field_name = var_ident,
.indent_level = 2,
+ .strip_mods = strip_mods,
);
int need_off = sec_var->offset, align_off, align;
__u32 var_type_id = var->type;
- const struct btf_type *t;
-
- t = btf__type_by_id(btf, var_type_id);
- while (btf_is_mod(t)) {
- var_type_id = t->type;
- t = btf__type_by_id(btf, var_type_id);
- }
if (off > need_off) {
p_err("Something is wrong for %s's variable #%d: need offset %d, already at %d.\n",
@@ -305,8 +302,11 @@ static int do_skeleton(int argc, char **argv)
opts.object_name = obj_name;
obj = bpf_object__open_mem(obj_data, file_sz, &opts);
if (IS_ERR(obj)) {
+ char err_buf[256];
+
+ libbpf_strerror(PTR_ERR(obj), err_buf, sizeof(err_buf));
+ p_err("failed to open BPF object file: %s", err_buf);
obj = NULL;
- p_err("failed to open BPF object file: %ld", PTR_ERR(obj));
goto out;
}
diff --git a/tools/bpf/bpftool/iter.c b/tools/bpf/bpftool/iter.c
index 33240fcc6319..3b1aad7535dd 100644
--- a/tools/bpf/bpftool/iter.c
+++ b/tools/bpf/bpftool/iter.c
@@ -2,6 +2,7 @@
// Copyright (C) 2020 Facebook
#define _GNU_SOURCE
+#include <unistd.h>
#include <linux/err.h>
#include <bpf/libbpf.h>
@@ -9,11 +10,13 @@
static int do_pin(int argc, char **argv)
{
+ DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, iter_opts);
+ union bpf_iter_link_info linfo;
const char *objfile, *path;
struct bpf_program *prog;
struct bpf_object *obj;
struct bpf_link *link;
- int err;
+ int err = -1, map_fd = -1;
if (!REQ_ARGS(2))
usage();
@@ -21,10 +24,31 @@ static int do_pin(int argc, char **argv)
objfile = GET_ARG();
path = GET_ARG();
+ /* optional arguments */
+ if (argc) {
+ if (is_prefix(*argv, "map")) {
+ NEXT_ARG();
+
+ if (!REQ_ARGS(2)) {
+ p_err("incorrect map spec");
+ return -1;
+ }
+
+ map_fd = map_parse_fd(&argc, &argv);
+ if (map_fd < 0)
+ return -1;
+
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.map.map_fd = map_fd;
+ iter_opts.link_info = &linfo;
+ iter_opts.link_info_len = sizeof(linfo);
+ }
+ }
+
obj = bpf_object__open(objfile);
if (IS_ERR(obj)) {
p_err("can't open objfile %s", objfile);
- return -1;
+ goto close_map_fd;
}
err = bpf_object__load(obj);
@@ -39,7 +63,7 @@ static int do_pin(int argc, char **argv)
goto close_obj;
}
- link = bpf_program__attach_iter(prog, NULL);
+ link = bpf_program__attach_iter(prog, &iter_opts);
if (IS_ERR(link)) {
err = PTR_ERR(link);
p_err("attach_iter failed for program %s",
@@ -62,14 +86,18 @@ close_link:
bpf_link__destroy(link);
close_obj:
bpf_object__close(obj);
+close_map_fd:
+ if (map_fd >= 0)
+ close(map_fd);
return err;
}
static int do_help(int argc, char **argv)
{
fprintf(stderr,
- "Usage: %1$s %2$s pin OBJ PATH\n"
+ "Usage: %1$s %2$s pin OBJ PATH [map MAP]\n"
" %1$s %2$s help\n"
+ " " HELP_SPEC_MAP "\n"
"",
bin_name, "iter");
diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c
index fca57ee8fafe..1b793759170e 100644
--- a/tools/bpf/bpftool/link.c
+++ b/tools/bpf/bpftool/link.c
@@ -22,6 +22,8 @@ static const char * const link_type_name[] = {
static int link_parse_fd(int *argc, char ***argv)
{
+ int fd;
+
if (is_prefix(**argv, "id")) {
unsigned int id;
char *endptr;
@@ -35,7 +37,10 @@ static int link_parse_fd(int *argc, char ***argv)
}
NEXT_ARGP();
- return bpf_link_get_fd_by_id(id);
+ fd = bpf_link_get_fd_by_id(id);
+ if (fd < 0)
+ p_err("failed to get link with ID %d: %s", id, strerror(errno));
+ return fd;
} else if (is_prefix(**argv, "pinned")) {
char *path;
@@ -108,7 +113,7 @@ static int show_link_close_json(int fd, struct bpf_link_info *info)
if (err)
return err;
- if (prog_info.type < ARRAY_SIZE(prog_type_name))
+ if (prog_info.type < prog_type_name_size)
jsonw_string_field(json_wtr, "prog_type",
prog_type_name[prog_info.type]);
else
@@ -143,6 +148,9 @@ static int show_link_close_json(int fd, struct bpf_link_info *info)
}
jsonw_end_array(json_wtr);
}
+
+ emit_obj_refs_json(&refs_table, info->id, json_wtr);
+
jsonw_end_object(json_wtr);
return 0;
@@ -184,7 +192,7 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info)
if (err)
return err;
- if (prog_info.type < ARRAY_SIZE(prog_type_name))
+ if (prog_info.type < prog_type_name_size)
printf("\n\tprog_type %s ",
prog_type_name[prog_info.type]);
else
@@ -212,6 +220,7 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info)
printf("\n\tpinned %s", obj->path);
}
}
+ emit_obj_refs_plain(&refs_table, info->id, "\n\tpids ");
printf("\n");
@@ -257,6 +266,7 @@ static int do_show(int argc, char **argv)
if (show_pinned)
build_pinned_obj_table(&link_table, BPF_OBJ_LINK);
+ build_obj_refs_table(&refs_table, BPF_OBJ_LINK);
if (argc == 2) {
fd = link_parse_fd(&argc, &argv);
@@ -296,6 +306,8 @@ static int do_show(int argc, char **argv)
if (json_output)
jsonw_end_array(json_wtr);
+ delete_obj_refs_table(&refs_table);
+
return errno == ENOENT ? 0 : -1;
}
@@ -309,6 +321,34 @@ static int do_pin(int argc, char **argv)
return err;
}
+static int do_detach(int argc, char **argv)
+{
+ int err, fd;
+
+ if (argc != 2) {
+ p_err("link specifier is invalid or missing\n");
+ return 1;
+ }
+
+ fd = link_parse_fd(&argc, &argv);
+ if (fd < 0)
+ return 1;
+
+ err = bpf_link_detach(fd);
+ if (err)
+ err = -errno;
+ close(fd);
+ if (err) {
+ p_err("failed link detach: %s", strerror(-err));
+ return 1;
+ }
+
+ if (json_output)
+ jsonw_null(json_wtr);
+
+ return 0;
+}
+
static int do_help(int argc, char **argv)
{
if (json_output) {
@@ -319,6 +359,7 @@ static int do_help(int argc, char **argv)
fprintf(stderr,
"Usage: %1$s %2$s { show | list } [LINK]\n"
" %1$s %2$s pin LINK FILE\n"
+ " %1$s %2$s detach LINK\n"
" %1$s %2$s help\n"
"\n"
" " HELP_SPEC_LINK "\n"
@@ -334,6 +375,7 @@ static const struct cmd cmds[] = {
{ "list", do_show },
{ "help", do_help },
{ "pin", do_pin },
+ { "detach", do_detach },
{ 0 }
};
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index 46bd716a9d86..4a191fcbeb82 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -31,6 +31,7 @@ bool relaxed_maps;
struct pinned_obj_table prog_table;
struct pinned_obj_table map_table;
struct pinned_obj_table link_table;
+struct obj_refs_table refs_table;
static void __noreturn clean_and_exit(int i)
{
@@ -92,9 +93,16 @@ int cmd_select(const struct cmd *cmds, int argc, char **argv,
if (argc < 1 && cmds[0].func)
return cmds[0].func(argc, argv);
- for (i = 0; cmds[i].func; i++)
- if (is_prefix(*argv, cmds[i].cmd))
+ for (i = 0; cmds[i].cmd; i++) {
+ if (is_prefix(*argv, cmds[i].cmd)) {
+ if (!cmds[i].func) {
+ p_err("command '%s' is not supported in bootstrap mode",
+ cmds[i].cmd);
+ return -1;
+ }
return cmds[i].func(argc - 1, argv + 1);
+ }
+ }
help(argc - 1, argv + 1);
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 5cdf0bc049bd..e3a79b5a9960 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -56,82 +56,21 @@
#define HELP_SPEC_LINK \
"LINK := { id LINK_ID | pinned FILE }"
-static const char * const prog_type_name[] = {
- [BPF_PROG_TYPE_UNSPEC] = "unspec",
- [BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter",
- [BPF_PROG_TYPE_KPROBE] = "kprobe",
- [BPF_PROG_TYPE_SCHED_CLS] = "sched_cls",
- [BPF_PROG_TYPE_SCHED_ACT] = "sched_act",
- [BPF_PROG_TYPE_TRACEPOINT] = "tracepoint",
- [BPF_PROG_TYPE_XDP] = "xdp",
- [BPF_PROG_TYPE_PERF_EVENT] = "perf_event",
- [BPF_PROG_TYPE_CGROUP_SKB] = "cgroup_skb",
- [BPF_PROG_TYPE_CGROUP_SOCK] = "cgroup_sock",
- [BPF_PROG_TYPE_LWT_IN] = "lwt_in",
- [BPF_PROG_TYPE_LWT_OUT] = "lwt_out",
- [BPF_PROG_TYPE_LWT_XMIT] = "lwt_xmit",
- [BPF_PROG_TYPE_SOCK_OPS] = "sock_ops",
- [BPF_PROG_TYPE_SK_SKB] = "sk_skb",
- [BPF_PROG_TYPE_CGROUP_DEVICE] = "cgroup_device",
- [BPF_PROG_TYPE_SK_MSG] = "sk_msg",
- [BPF_PROG_TYPE_RAW_TRACEPOINT] = "raw_tracepoint",
- [BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr",
- [BPF_PROG_TYPE_LWT_SEG6LOCAL] = "lwt_seg6local",
- [BPF_PROG_TYPE_LIRC_MODE2] = "lirc_mode2",
- [BPF_PROG_TYPE_SK_REUSEPORT] = "sk_reuseport",
- [BPF_PROG_TYPE_FLOW_DISSECTOR] = "flow_dissector",
- [BPF_PROG_TYPE_CGROUP_SYSCTL] = "cgroup_sysctl",
- [BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE] = "raw_tracepoint_writable",
- [BPF_PROG_TYPE_CGROUP_SOCKOPT] = "cgroup_sockopt",
- [BPF_PROG_TYPE_TRACING] = "tracing",
- [BPF_PROG_TYPE_STRUCT_OPS] = "struct_ops",
- [BPF_PROG_TYPE_EXT] = "ext",
-};
+extern const char * const prog_type_name[];
+extern const size_t prog_type_name_size;
-static const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE] = {
- [BPF_CGROUP_INET_INGRESS] = "ingress",
- [BPF_CGROUP_INET_EGRESS] = "egress",
- [BPF_CGROUP_INET_SOCK_CREATE] = "sock_create",
- [BPF_CGROUP_SOCK_OPS] = "sock_ops",
- [BPF_CGROUP_DEVICE] = "device",
- [BPF_CGROUP_INET4_BIND] = "bind4",
- [BPF_CGROUP_INET6_BIND] = "bind6",
- [BPF_CGROUP_INET4_CONNECT] = "connect4",
- [BPF_CGROUP_INET6_CONNECT] = "connect6",
- [BPF_CGROUP_INET4_POST_BIND] = "post_bind4",
- [BPF_CGROUP_INET6_POST_BIND] = "post_bind6",
- [BPF_CGROUP_INET4_GETPEERNAME] = "getpeername4",
- [BPF_CGROUP_INET6_GETPEERNAME] = "getpeername6",
- [BPF_CGROUP_INET4_GETSOCKNAME] = "getsockname4",
- [BPF_CGROUP_INET6_GETSOCKNAME] = "getsockname6",
- [BPF_CGROUP_UDP4_SENDMSG] = "sendmsg4",
- [BPF_CGROUP_UDP6_SENDMSG] = "sendmsg6",
- [BPF_CGROUP_SYSCTL] = "sysctl",
- [BPF_CGROUP_UDP4_RECVMSG] = "recvmsg4",
- [BPF_CGROUP_UDP6_RECVMSG] = "recvmsg6",
- [BPF_CGROUP_GETSOCKOPT] = "getsockopt",
- [BPF_CGROUP_SETSOCKOPT] = "setsockopt",
-
- [BPF_SK_SKB_STREAM_PARSER] = "sk_skb_stream_parser",
- [BPF_SK_SKB_STREAM_VERDICT] = "sk_skb_stream_verdict",
- [BPF_SK_MSG_VERDICT] = "sk_msg_verdict",
- [BPF_LIRC_MODE2] = "lirc_mode2",
- [BPF_FLOW_DISSECTOR] = "flow_dissector",
- [BPF_TRACE_RAW_TP] = "raw_tp",
- [BPF_TRACE_FENTRY] = "fentry",
- [BPF_TRACE_FEXIT] = "fexit",
- [BPF_MODIFY_RETURN] = "mod_ret",
- [BPF_LSM_MAC] = "lsm_mac",
-};
+extern const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE];
extern const char * const map_type_name[];
extern const size_t map_type_name_size;
+/* keep in sync with the definition in skeleton/pid_iter.bpf.c */
enum bpf_obj_type {
BPF_OBJ_UNKNOWN,
BPF_OBJ_PROG,
BPF_OBJ_MAP,
BPF_OBJ_LINK,
+ BPF_OBJ_BTF,
};
extern const char *bin_name;
@@ -139,12 +78,14 @@ extern const char *bin_name;
extern json_writer_t *json_wtr;
extern bool json_output;
extern bool show_pinned;
+extern bool show_pids;
extern bool block_mount;
extern bool verifier_logs;
extern bool relaxed_maps;
extern struct pinned_obj_table prog_table;
extern struct pinned_obj_table map_table;
extern struct pinned_obj_table link_table;
+extern struct obj_refs_table refs_table;
void __printf(1, 2) p_err(const char *fmt, ...);
void __printf(1, 2) p_info(const char *fmt, ...);
@@ -168,12 +109,35 @@ struct pinned_obj {
struct hlist_node hash;
};
+struct obj_refs_table {
+ DECLARE_HASHTABLE(table, 16);
+};
+
+struct obj_ref {
+ int pid;
+ char comm[16];
+};
+
+struct obj_refs {
+ struct hlist_node node;
+ __u32 id;
+ int ref_cnt;
+ struct obj_ref *refs;
+};
+
struct btf;
struct bpf_line_info;
int build_pinned_obj_table(struct pinned_obj_table *table,
enum bpf_obj_type type);
void delete_pinned_obj_table(struct pinned_obj_table *tab);
+__weak int build_obj_refs_table(struct obj_refs_table *table,
+ enum bpf_obj_type type);
+__weak void delete_obj_refs_table(struct obj_refs_table *table);
+__weak void emit_obj_refs_json(struct obj_refs_table *table, __u32 id,
+ json_writer_t *json_wtr);
+__weak void emit_obj_refs_plain(struct obj_refs_table *table, __u32 id,
+ const char *prefix);
void print_dev_plain(__u32 ifindex, __u64 ns_dev, __u64 ns_inode);
void print_dev_json(__u32 ifindex, __u64 ns_dev, __u64 ns_inode);
@@ -188,29 +152,34 @@ int cmd_select(const struct cmd *cmds, int argc, char **argv,
int get_fd_type(int fd);
const char *get_fd_type_name(enum bpf_obj_type type);
char *get_fdinfo(int fd, const char *key);
-int open_obj_pinned(char *path, bool quiet);
-int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type);
+int open_obj_pinned(const char *path, bool quiet);
+int open_obj_pinned_any(const char *path, enum bpf_obj_type exp_type);
int mount_bpffs_for_pin(const char *name);
int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(int *, char ***));
int do_pin_fd(int fd, const char *name);
-int do_prog(int argc, char **arg);
-int do_map(int argc, char **arg);
-int do_link(int argc, char **arg);
-int do_event_pipe(int argc, char **argv);
-int do_cgroup(int argc, char **arg);
-int do_perf(int argc, char **arg);
-int do_net(int argc, char **arg);
-int do_tracelog(int argc, char **arg);
-int do_feature(int argc, char **argv);
-int do_btf(int argc, char **argv);
+/* commands available in bootstrap mode */
int do_gen(int argc, char **argv);
-int do_struct_ops(int argc, char **argv);
-int do_iter(int argc, char **argv);
+int do_btf(int argc, char **argv);
+
+/* non-bootstrap only commands */
+int do_prog(int argc, char **arg) __weak;
+int do_map(int argc, char **arg) __weak;
+int do_link(int argc, char **arg) __weak;
+int do_event_pipe(int argc, char **argv) __weak;
+int do_cgroup(int argc, char **arg) __weak;
+int do_perf(int argc, char **arg) __weak;
+int do_net(int argc, char **arg) __weak;
+int do_tracelog(int argc, char **arg) __weak;
+int do_feature(int argc, char **argv) __weak;
+int do_struct_ops(int argc, char **argv) __weak;
+int do_iter(int argc, char **argv) __weak;
int parse_u32_arg(int *argc, char ***argv, __u32 *val, const char *what);
int prog_parse_fd(int *argc, char ***argv);
+int prog_parse_fds(int *argc, char ***argv, int **fds);
int map_parse_fd(int *argc, char ***argv);
+int map_parse_fds(int *argc, char ***argv, int **fds);
int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len);
struct bpf_prog_linfo;
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index 1d3b60651078..3a27d31a1856 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -93,162 +93,6 @@ static void *alloc_value(struct bpf_map_info *info)
return malloc(info->value_size);
}
-static int map_fd_by_name(char *name, int **fds)
-{
- unsigned int id = 0;
- int fd, nb_fds = 0;
- void *tmp;
- int err;
-
- while (true) {
- struct bpf_map_info info = {};
- __u32 len = sizeof(info);
-
- err = bpf_map_get_next_id(id, &id);
- if (err) {
- if (errno != ENOENT) {
- p_err("%s", strerror(errno));
- goto err_close_fds;
- }
- return nb_fds;
- }
-
- fd = bpf_map_get_fd_by_id(id);
- if (fd < 0) {
- p_err("can't get map by id (%u): %s",
- id, strerror(errno));
- goto err_close_fds;
- }
-
- err = bpf_obj_get_info_by_fd(fd, &info, &len);
- if (err) {
- p_err("can't get map info (%u): %s",
- id, strerror(errno));
- goto err_close_fd;
- }
-
- if (strncmp(name, info.name, BPF_OBJ_NAME_LEN)) {
- close(fd);
- continue;
- }
-
- if (nb_fds > 0) {
- tmp = realloc(*fds, (nb_fds + 1) * sizeof(int));
- if (!tmp) {
- p_err("failed to realloc");
- goto err_close_fd;
- }
- *fds = tmp;
- }
- (*fds)[nb_fds++] = fd;
- }
-
-err_close_fd:
- close(fd);
-err_close_fds:
- while (--nb_fds >= 0)
- close((*fds)[nb_fds]);
- return -1;
-}
-
-static int map_parse_fds(int *argc, char ***argv, int **fds)
-{
- if (is_prefix(**argv, "id")) {
- unsigned int id;
- char *endptr;
-
- NEXT_ARGP();
-
- id = strtoul(**argv, &endptr, 0);
- if (*endptr) {
- p_err("can't parse %s as ID", **argv);
- return -1;
- }
- NEXT_ARGP();
-
- (*fds)[0] = bpf_map_get_fd_by_id(id);
- if ((*fds)[0] < 0) {
- p_err("get map by id (%u): %s", id, strerror(errno));
- return -1;
- }
- return 1;
- } else if (is_prefix(**argv, "name")) {
- char *name;
-
- NEXT_ARGP();
-
- name = **argv;
- if (strlen(name) > BPF_OBJ_NAME_LEN - 1) {
- p_err("can't parse name");
- return -1;
- }
- NEXT_ARGP();
-
- return map_fd_by_name(name, fds);
- } else if (is_prefix(**argv, "pinned")) {
- char *path;
-
- NEXT_ARGP();
-
- path = **argv;
- NEXT_ARGP();
-
- (*fds)[0] = open_obj_pinned_any(path, BPF_OBJ_MAP);
- if ((*fds)[0] < 0)
- return -1;
- return 1;
- }
-
- p_err("expected 'id', 'name' or 'pinned', got: '%s'?", **argv);
- return -1;
-}
-
-int map_parse_fd(int *argc, char ***argv)
-{
- int *fds = NULL;
- int nb_fds, fd;
-
- fds = malloc(sizeof(int));
- if (!fds) {
- p_err("mem alloc failed");
- return -1;
- }
- nb_fds = map_parse_fds(argc, argv, &fds);
- if (nb_fds != 1) {
- if (nb_fds > 1) {
- p_err("several maps match this handle");
- while (nb_fds--)
- close(fds[nb_fds]);
- }
- fd = -1;
- goto exit_free;
- }
-
- fd = fds[0];
-exit_free:
- free(fds);
- return fd;
-}
-
-int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len)
-{
- int err;
- int fd;
-
- fd = map_parse_fd(argc, argv);
- if (fd < 0)
- return -1;
-
- err = bpf_obj_get_info_by_fd(fd, info, info_len);
- if (err) {
- p_err("can't get map info: %s", strerror(errno));
- close(fd);
- return err;
- }
-
- return fd;
-}
-
static int do_dump_btf(const struct btf_dumper *d,
struct bpf_map_info *map_info, void *key,
void *value)
@@ -629,7 +473,7 @@ static int show_map_close_json(int fd, struct bpf_map_info *info)
if (owner_prog_type) {
unsigned int prog_type = atoi(owner_prog_type);
- if (prog_type < ARRAY_SIZE(prog_type_name))
+ if (prog_type < prog_type_name_size)
jsonw_string_field(json_wtr, "owner_prog_type",
prog_type_name[prog_type]);
else
@@ -666,6 +510,8 @@ static int show_map_close_json(int fd, struct bpf_map_info *info)
jsonw_end_array(json_wtr);
}
+ emit_obj_refs_json(&refs_table, info->id, json_wtr);
+
jsonw_end_object(json_wtr);
return 0;
@@ -712,7 +558,7 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info)
if (owner_prog_type) {
unsigned int prog_type = atoi(owner_prog_type);
- if (prog_type < ARRAY_SIZE(prog_type_name))
+ if (prog_type < prog_type_name_size)
printf("owner_prog_type %s ",
prog_type_name[prog_type]);
else
@@ -753,6 +599,8 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info)
if (frozen)
printf("%sfrozen", info->btf_id ? " " : "");
+ emit_obj_refs_plain(&refs_table, info->id, "\n\tpids ");
+
printf("\n");
return 0;
}
@@ -811,6 +659,7 @@ static int do_show(int argc, char **argv)
if (show_pinned)
build_pinned_obj_table(&map_table, BPF_OBJ_MAP);
+ build_obj_refs_table(&refs_table, BPF_OBJ_MAP);
if (argc == 2)
return do_show_subset(argc, argv);
@@ -854,6 +703,8 @@ static int do_show(int argc, char **argv)
if (json_output)
jsonw_end_array(json_wtr);
+ delete_obj_refs_table(&refs_table);
+
return errno == ENOENT ? 0 : -1;
}
diff --git a/tools/bpf/bpftool/pids.c b/tools/bpf/bpftool/pids.c
new file mode 100644
index 000000000000..e3b116325403
--- /dev/null
+++ b/tools/bpf/bpftool/pids.c
@@ -0,0 +1,231 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2020 Facebook */
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <bpf/bpf.h>
+
+#include "main.h"
+#include "skeleton/pid_iter.h"
+
+#ifdef BPFTOOL_WITHOUT_SKELETONS
+
+int build_obj_refs_table(struct obj_refs_table *table, enum bpf_obj_type type)
+{
+ return -ENOTSUP;
+}
+void delete_obj_refs_table(struct obj_refs_table *table) {}
+void emit_obj_refs_plain(struct obj_refs_table *table, __u32 id, const char *prefix) {}
+void emit_obj_refs_json(struct obj_refs_table *table, __u32 id, json_writer_t *json_writer) {}
+
+#else /* BPFTOOL_WITHOUT_SKELETONS */
+
+#include "pid_iter.skel.h"
+
+static void add_ref(struct obj_refs_table *table, struct pid_iter_entry *e)
+{
+ struct obj_refs *refs;
+ struct obj_ref *ref;
+ void *tmp;
+ int i;
+
+ hash_for_each_possible(table->table, refs, node, e->id) {
+ if (refs->id != e->id)
+ continue;
+
+ for (i = 0; i < refs->ref_cnt; i++) {
+ if (refs->refs[i].pid == e->pid)
+ return;
+ }
+
+ tmp = realloc(refs->refs, (refs->ref_cnt + 1) * sizeof(*ref));
+ if (!tmp) {
+ p_err("failed to re-alloc memory for ID %u, PID %d, COMM %s...",
+ e->id, e->pid, e->comm);
+ return;
+ }
+ refs->refs = tmp;
+ ref = &refs->refs[refs->ref_cnt];
+ ref->pid = e->pid;
+ memcpy(ref->comm, e->comm, sizeof(ref->comm));
+ refs->ref_cnt++;
+
+ return;
+ }
+
+ /* new ref */
+ refs = calloc(1, sizeof(*refs));
+ if (!refs) {
+ p_err("failed to alloc memory for ID %u, PID %d, COMM %s...",
+ e->id, e->pid, e->comm);
+ return;
+ }
+
+ refs->id = e->id;
+ refs->refs = malloc(sizeof(*refs->refs));
+ if (!refs->refs) {
+ free(refs);
+ p_err("failed to alloc memory for ID %u, PID %d, COMM %s...",
+ e->id, e->pid, e->comm);
+ return;
+ }
+ ref = &refs->refs[0];
+ ref->pid = e->pid;
+ memcpy(ref->comm, e->comm, sizeof(ref->comm));
+ refs->ref_cnt = 1;
+ hash_add(table->table, &refs->node, e->id);
+}
+
+static int __printf(2, 0)
+libbpf_print_none(__maybe_unused enum libbpf_print_level level,
+ __maybe_unused const char *format,
+ __maybe_unused va_list args)
+{
+ return 0;
+}
+
+int build_obj_refs_table(struct obj_refs_table *table, enum bpf_obj_type type)
+{
+ char buf[4096];
+ struct pid_iter_bpf *skel;
+ struct pid_iter_entry *e;
+ int err, ret, fd = -1, i;
+ libbpf_print_fn_t default_print;
+
+ hash_init(table->table);
+ set_max_rlimit();
+
+ skel = pid_iter_bpf__open();
+ if (!skel) {
+ p_err("failed to open PID iterator skeleton");
+ return -1;
+ }
+
+ skel->rodata->obj_type = type;
+
+ /* we don't want output polluted with libbpf errors if bpf_iter is not
+ * supported
+ */
+ default_print = libbpf_set_print(libbpf_print_none);
+ err = pid_iter_bpf__load(skel);
+ libbpf_set_print(default_print);
+ if (err) {
+ /* too bad, kernel doesn't support BPF iterators yet */
+ err = 0;
+ goto out;
+ }
+ err = pid_iter_bpf__attach(skel);
+ if (err) {
+ /* if we loaded above successfully, attach has to succeed */
+ p_err("failed to attach PID iterator: %d", err);
+ goto out;
+ }
+
+ fd = bpf_iter_create(bpf_link__fd(skel->links.iter));
+ if (fd < 0) {
+ err = -errno;
+ p_err("failed to create PID iterator session: %d", err);
+ goto out;
+ }
+
+ while (true) {
+ ret = read(fd, buf, sizeof(buf));
+ if (ret < 0) {
+ err = -errno;
+ p_err("failed to read PID iterator output: %d", err);
+ goto out;
+ }
+ if (ret == 0)
+ break;
+ if (ret % sizeof(*e)) {
+ err = -EINVAL;
+ p_err("invalid PID iterator output format");
+ goto out;
+ }
+ ret /= sizeof(*e);
+
+ e = (void *)buf;
+ for (i = 0; i < ret; i++, e++) {
+ add_ref(table, e);
+ }
+ }
+ err = 0;
+out:
+ if (fd >= 0)
+ close(fd);
+ pid_iter_bpf__destroy(skel);
+ return err;
+}
+
+void delete_obj_refs_table(struct obj_refs_table *table)
+{
+ struct obj_refs *refs;
+ struct hlist_node *tmp;
+ unsigned int bkt;
+
+ hash_for_each_safe(table->table, bkt, tmp, refs, node) {
+ hash_del(&refs->node);
+ free(refs->refs);
+ free(refs);
+ }
+}
+
+void emit_obj_refs_json(struct obj_refs_table *table, __u32 id,
+ json_writer_t *json_writer)
+{
+ struct obj_refs *refs;
+ struct obj_ref *ref;
+ int i;
+
+ if (hash_empty(table->table))
+ return;
+
+ hash_for_each_possible(table->table, refs, node, id) {
+ if (refs->id != id)
+ continue;
+ if (refs->ref_cnt == 0)
+ break;
+
+ jsonw_name(json_writer, "pids");
+ jsonw_start_array(json_writer);
+ for (i = 0; i < refs->ref_cnt; i++) {
+ ref = &refs->refs[i];
+ jsonw_start_object(json_writer);
+ jsonw_int_field(json_writer, "pid", ref->pid);
+ jsonw_string_field(json_writer, "comm", ref->comm);
+ jsonw_end_object(json_writer);
+ }
+ jsonw_end_array(json_writer);
+ break;
+ }
+}
+
+void emit_obj_refs_plain(struct obj_refs_table *table, __u32 id, const char *prefix)
+{
+ struct obj_refs *refs;
+ struct obj_ref *ref;
+ int i;
+
+ if (hash_empty(table->table))
+ return;
+
+ hash_for_each_possible(table->table, refs, node, id) {
+ if (refs->id != id)
+ continue;
+ if (refs->ref_cnt == 0)
+ break;
+
+ printf("%s", prefix);
+ for (i = 0; i < refs->ref_cnt; i++) {
+ ref = &refs->refs[i];
+ printf("%s%s(%d)", i == 0 ? "" : ", ", ref->comm, ref->pid);
+ }
+ break;
+ }
+}
+
+
+#endif
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index a5eff83496f2..158995d853b0 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -29,6 +29,42 @@
#include "main.h"
#include "xlated_dumper.h"
+const char * const prog_type_name[] = {
+ [BPF_PROG_TYPE_UNSPEC] = "unspec",
+ [BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter",
+ [BPF_PROG_TYPE_KPROBE] = "kprobe",
+ [BPF_PROG_TYPE_SCHED_CLS] = "sched_cls",
+ [BPF_PROG_TYPE_SCHED_ACT] = "sched_act",
+ [BPF_PROG_TYPE_TRACEPOINT] = "tracepoint",
+ [BPF_PROG_TYPE_XDP] = "xdp",
+ [BPF_PROG_TYPE_PERF_EVENT] = "perf_event",
+ [BPF_PROG_TYPE_CGROUP_SKB] = "cgroup_skb",
+ [BPF_PROG_TYPE_CGROUP_SOCK] = "cgroup_sock",
+ [BPF_PROG_TYPE_LWT_IN] = "lwt_in",
+ [BPF_PROG_TYPE_LWT_OUT] = "lwt_out",
+ [BPF_PROG_TYPE_LWT_XMIT] = "lwt_xmit",
+ [BPF_PROG_TYPE_SOCK_OPS] = "sock_ops",
+ [BPF_PROG_TYPE_SK_SKB] = "sk_skb",
+ [BPF_PROG_TYPE_CGROUP_DEVICE] = "cgroup_device",
+ [BPF_PROG_TYPE_SK_MSG] = "sk_msg",
+ [BPF_PROG_TYPE_RAW_TRACEPOINT] = "raw_tracepoint",
+ [BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr",
+ [BPF_PROG_TYPE_LWT_SEG6LOCAL] = "lwt_seg6local",
+ [BPF_PROG_TYPE_LIRC_MODE2] = "lirc_mode2",
+ [BPF_PROG_TYPE_SK_REUSEPORT] = "sk_reuseport",
+ [BPF_PROG_TYPE_FLOW_DISSECTOR] = "flow_dissector",
+ [BPF_PROG_TYPE_CGROUP_SYSCTL] = "cgroup_sysctl",
+ [BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE] = "raw_tracepoint_writable",
+ [BPF_PROG_TYPE_CGROUP_SOCKOPT] = "cgroup_sockopt",
+ [BPF_PROG_TYPE_TRACING] = "tracing",
+ [BPF_PROG_TYPE_STRUCT_OPS] = "struct_ops",
+ [BPF_PROG_TYPE_EXT] = "ext",
+ [BPF_PROG_TYPE_LSM] = "lsm",
+ [BPF_PROG_TYPE_SK_LOOKUP] = "sk_lookup",
+};
+
+const size_t prog_type_name_size = ARRAY_SIZE(prog_type_name);
+
enum dump_mode {
DUMP_JITED,
DUMP_XLATED,
@@ -86,158 +122,6 @@ static void print_boot_time(__u64 nsecs, char *buf, unsigned int size)
strftime(buf, size, "%FT%T%z", &load_tm);
}
-static int prog_fd_by_nametag(void *nametag, int **fds, bool tag)
-{
- unsigned int id = 0;
- int fd, nb_fds = 0;
- void *tmp;
- int err;
-
- while (true) {
- struct bpf_prog_info info = {};
- __u32 len = sizeof(info);
-
- err = bpf_prog_get_next_id(id, &id);
- if (err) {
- if (errno != ENOENT) {
- p_err("%s", strerror(errno));
- goto err_close_fds;
- }
- return nb_fds;
- }
-
- fd = bpf_prog_get_fd_by_id(id);
- if (fd < 0) {
- p_err("can't get prog by id (%u): %s",
- id, strerror(errno));
- goto err_close_fds;
- }
-
- err = bpf_obj_get_info_by_fd(fd, &info, &len);
- if (err) {
- p_err("can't get prog info (%u): %s",
- id, strerror(errno));
- goto err_close_fd;
- }
-
- if ((tag && memcmp(nametag, info.tag, BPF_TAG_SIZE)) ||
- (!tag && strncmp(nametag, info.name, BPF_OBJ_NAME_LEN))) {
- close(fd);
- continue;
- }
-
- if (nb_fds > 0) {
- tmp = realloc(*fds, (nb_fds + 1) * sizeof(int));
- if (!tmp) {
- p_err("failed to realloc");
- goto err_close_fd;
- }
- *fds = tmp;
- }
- (*fds)[nb_fds++] = fd;
- }
-
-err_close_fd:
- close(fd);
-err_close_fds:
- while (--nb_fds >= 0)
- close((*fds)[nb_fds]);
- return -1;
-}
-
-static int prog_parse_fds(int *argc, char ***argv, int **fds)
-{
- if (is_prefix(**argv, "id")) {
- unsigned int id;
- char *endptr;
-
- NEXT_ARGP();
-
- id = strtoul(**argv, &endptr, 0);
- if (*endptr) {
- p_err("can't parse %s as ID", **argv);
- return -1;
- }
- NEXT_ARGP();
-
- (*fds)[0] = bpf_prog_get_fd_by_id(id);
- if ((*fds)[0] < 0) {
- p_err("get by id (%u): %s", id, strerror(errno));
- return -1;
- }
- return 1;
- } else if (is_prefix(**argv, "tag")) {
- unsigned char tag[BPF_TAG_SIZE];
-
- NEXT_ARGP();
-
- if (sscanf(**argv, BPF_TAG_FMT, tag, tag + 1, tag + 2,
- tag + 3, tag + 4, tag + 5, tag + 6, tag + 7)
- != BPF_TAG_SIZE) {
- p_err("can't parse tag");
- return -1;
- }
- NEXT_ARGP();
-
- return prog_fd_by_nametag(tag, fds, true);
- } else if (is_prefix(**argv, "name")) {
- char *name;
-
- NEXT_ARGP();
-
- name = **argv;
- if (strlen(name) > BPF_OBJ_NAME_LEN - 1) {
- p_err("can't parse name");
- return -1;
- }
- NEXT_ARGP();
-
- return prog_fd_by_nametag(name, fds, false);
- } else if (is_prefix(**argv, "pinned")) {
- char *path;
-
- NEXT_ARGP();
-
- path = **argv;
- NEXT_ARGP();
-
- (*fds)[0] = open_obj_pinned_any(path, BPF_OBJ_PROG);
- if ((*fds)[0] < 0)
- return -1;
- return 1;
- }
-
- p_err("expected 'id', 'tag', 'name' or 'pinned', got: '%s'?", **argv);
- return -1;
-}
-
-int prog_parse_fd(int *argc, char ***argv)
-{
- int *fds = NULL;
- int nb_fds, fd;
-
- fds = malloc(sizeof(int));
- if (!fds) {
- p_err("mem alloc failed");
- return -1;
- }
- nb_fds = prog_parse_fds(argc, argv, &fds);
- if (nb_fds != 1) {
- if (nb_fds > 1) {
- p_err("several programs match this handle");
- while (nb_fds--)
- close(fds[nb_fds]);
- }
- fd = -1;
- goto exit_free;
- }
-
- fd = fds[0];
-exit_free:
- free(fds);
- return fd;
-}
-
static void show_prog_maps(int fd, __u32 num_maps)
{
struct bpf_prog_info info = {};
@@ -342,6 +226,8 @@ static void print_prog_json(struct bpf_prog_info *info, int fd)
jsonw_end_array(json_wtr);
}
+ emit_obj_refs_json(&refs_table, info->id, json_wtr);
+
jsonw_end_object(json_wtr);
}
@@ -408,6 +294,8 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd)
if (info->btf_id)
printf("\n\tbtf_id %d", info->btf_id);
+ emit_obj_refs_plain(&refs_table, info->id, "\n\tpids ");
+
printf("\n");
}
@@ -473,6 +361,7 @@ static int do_show(int argc, char **argv)
if (show_pinned)
build_pinned_obj_table(&prog_table, BPF_OBJ_PROG);
+ build_obj_refs_table(&refs_table, BPF_OBJ_PROG);
if (argc == 2)
return do_show_subset(argc, argv);
@@ -514,6 +403,8 @@ static int do_show(int argc, char **argv)
if (json_output)
jsonw_end_array(json_wtr);
+ delete_obj_refs_table(&refs_table);
+
return err;
}
@@ -2016,7 +1907,7 @@ static int do_help(int argc, char **argv)
" cgroup/getsockname4 | cgroup/getsockname6 | cgroup/sendmsg4 |\n"
" cgroup/sendmsg6 | cgroup/recvmsg4 | cgroup/recvmsg6 |\n"
" cgroup/getsockopt | cgroup/setsockopt |\n"
- " struct_ops | fentry | fexit | freplace }\n"
+ " struct_ops | fentry | fexit | freplace | sk_lookup }\n"
" ATTACH_TYPE := { msg_verdict | stream_verdict | stream_parser |\n"
" flow_dissector }\n"
" METRIC := { cycles | instructions | l1d_loads | llc_misses }\n"
diff --git a/tools/bpf/bpftool/skeleton/pid_iter.bpf.c b/tools/bpf/bpftool/skeleton/pid_iter.bpf.c
new file mode 100644
index 000000000000..d9b420972934
--- /dev/null
+++ b/tools/bpf/bpftool/skeleton/pid_iter.bpf.c
@@ -0,0 +1,81 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (c) 2020 Facebook */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_tracing.h>
+#include "pid_iter.h"
+
+/* keep in sync with the definition in main.h */
+enum bpf_obj_type {
+ BPF_OBJ_UNKNOWN,
+ BPF_OBJ_PROG,
+ BPF_OBJ_MAP,
+ BPF_OBJ_LINK,
+ BPF_OBJ_BTF,
+};
+
+extern const void bpf_link_fops __ksym;
+extern const void bpf_map_fops __ksym;
+extern const void bpf_prog_fops __ksym;
+extern const void btf_fops __ksym;
+
+const volatile enum bpf_obj_type obj_type = BPF_OBJ_UNKNOWN;
+
+static __always_inline __u32 get_obj_id(void *ent, enum bpf_obj_type type)
+{
+ switch (type) {
+ case BPF_OBJ_PROG:
+ return BPF_CORE_READ((struct bpf_prog *)ent, aux, id);
+ case BPF_OBJ_MAP:
+ return BPF_CORE_READ((struct bpf_map *)ent, id);
+ case BPF_OBJ_BTF:
+ return BPF_CORE_READ((struct btf *)ent, id);
+ case BPF_OBJ_LINK:
+ return BPF_CORE_READ((struct bpf_link *)ent, id);
+ default:
+ return 0;
+ }
+}
+
+SEC("iter/task_file")
+int iter(struct bpf_iter__task_file *ctx)
+{
+ struct file *file = ctx->file;
+ struct task_struct *task = ctx->task;
+ struct pid_iter_entry e;
+ const void *fops;
+
+ if (!file || !task)
+ return 0;
+
+ switch (obj_type) {
+ case BPF_OBJ_PROG:
+ fops = &bpf_prog_fops;
+ break;
+ case BPF_OBJ_MAP:
+ fops = &bpf_map_fops;
+ break;
+ case BPF_OBJ_BTF:
+ fops = &btf_fops;
+ break;
+ case BPF_OBJ_LINK:
+ fops = &bpf_link_fops;
+ break;
+ default:
+ return 0;
+ }
+
+ if (file->f_op != fops)
+ return 0;
+
+ e.pid = task->tgid;
+ e.id = get_obj_id(file->private_data, obj_type);
+ bpf_probe_read_kernel(&e.comm, sizeof(e.comm),
+ task->group_leader->comm);
+ bpf_seq_write(ctx->meta->seq, &e, sizeof(e));
+
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "Dual BSD/GPL";
diff --git a/tools/bpf/bpftool/skeleton/pid_iter.h b/tools/bpf/bpftool/skeleton/pid_iter.h
new file mode 100644
index 000000000000..5692cf257adb
--- /dev/null
+++ b/tools/bpf/bpftool/skeleton/pid_iter.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (c) 2020 Facebook */
+#ifndef __PID_ITER_H
+#define __PID_ITER_H
+
+struct pid_iter_entry {
+ __u32 id;
+ int pid;
+ char comm[16];
+};
+
+#endif
diff --git a/tools/bpf/bpftool/skeleton/profiler.bpf.c b/tools/bpf/bpftool/skeleton/profiler.bpf.c
index 20034c12f7c5..4e3512f700c0 100644
--- a/tools/bpf/bpftool/skeleton/profiler.bpf.c
+++ b/tools/bpf/bpftool/skeleton/profiler.bpf.c
@@ -1,7 +1,6 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
// Copyright (c) 2020 Facebook
-#include "profiler.h"
-#include <linux/bpf.h>
+#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
@@ -116,4 +115,4 @@ int BPF_PROG(fexit_XXX)
return 0;
}
-char LICENSE[] SEC("license") = "GPL";
+char LICENSE[] SEC("license") = "Dual BSD/GPL";
diff --git a/tools/bpf/bpftool/skeleton/profiler.h b/tools/bpf/bpftool/skeleton/profiler.h
deleted file mode 100644
index 1f767e9510f7..000000000000
--- a/tools/bpf/bpftool/skeleton/profiler.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
-#ifndef __PROFILER_H
-#define __PROFILER_H
-
-/* useful typedefs from vmlinux.h */
-
-typedef signed char __s8;
-typedef unsigned char __u8;
-typedef short int __s16;
-typedef short unsigned int __u16;
-typedef int __s32;
-typedef unsigned int __u32;
-typedef long long int __s64;
-typedef long long unsigned int __u64;
-
-typedef __s8 s8;
-typedef __u8 u8;
-typedef __s16 s16;
-typedef __u16 u16;
-typedef __s32 s32;
-typedef __u32 u32;
-typedef __s64 s64;
-typedef __u64 u64;
-
-enum {
- false = 0,
- true = 1,
-};
-
-#ifdef __CHECKER__
-#define __bitwise__ __attribute__((bitwise))
-#else
-#define __bitwise__
-#endif
-
-typedef __u16 __bitwise__ __le16;
-typedef __u16 __bitwise__ __be16;
-typedef __u32 __bitwise__ __le32;
-typedef __u32 __bitwise__ __be32;
-typedef __u64 __bitwise__ __le64;
-typedef __u64 __bitwise__ __be64;
-
-typedef __u16 __bitwise__ __sum16;
-typedef __u32 __bitwise__ __wsum;
-
-#endif /* __PROFILER_H */
diff --git a/tools/bpf/resolve_btfids/.gitignore b/tools/bpf/resolve_btfids/.gitignore
new file mode 100644
index 000000000000..a026df7dc280
--- /dev/null
+++ b/tools/bpf/resolve_btfids/.gitignore
@@ -0,0 +1,4 @@
+/FEATURE-DUMP.libbpf
+/bpf_helper_defs.h
+/fixdep
+/resolve_btfids
diff --git a/tools/bpf/resolve_btfids/Build b/tools/bpf/resolve_btfids/Build
new file mode 100644
index 000000000000..ae82da03f9bf
--- /dev/null
+++ b/tools/bpf/resolve_btfids/Build
@@ -0,0 +1,10 @@
+resolve_btfids-y += main.o
+resolve_btfids-y += rbtree.o
+resolve_btfids-y += zalloc.o
+resolve_btfids-y += string.o
+resolve_btfids-y += ctype.o
+resolve_btfids-y += str_error_r.o
+
+$(OUTPUT)%.o: ../../lib/%.c FORCE
+ $(call rule_mkdir)
+ $(call if_changed_dep,cc_o_c)
diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile
new file mode 100644
index 000000000000..a88cd4426398
--- /dev/null
+++ b/tools/bpf/resolve_btfids/Makefile
@@ -0,0 +1,91 @@
+# SPDX-License-Identifier: GPL-2.0-only
+include ../../scripts/Makefile.include
+
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+endif
+
+ifeq ($(V),1)
+ Q =
+ msg =
+else
+ Q = @
+ msg = @printf ' %-8s %s%s\n' "$(1)" "$(notdir $(2))" "$(if $(3), $(3))";
+ MAKEFLAGS=--no-print-directory
+endif
+
+# always use the host compiler
+ifneq ($(LLVM),)
+HOSTAR ?= llvm-ar
+HOSTCC ?= clang
+HOSTLD ?= ld.lld
+else
+HOSTAR ?= ar
+HOSTCC ?= gcc
+HOSTLD ?= ld
+endif
+AR = $(HOSTAR)
+CC = $(HOSTCC)
+LD = $(HOSTLD)
+
+OUTPUT ?= $(srctree)/tools/bpf/resolve_btfids/
+
+LIBBPF_SRC := $(srctree)/tools/lib/bpf/
+SUBCMD_SRC := $(srctree)/tools/lib/subcmd/
+
+BPFOBJ := $(OUTPUT)/libbpf.a
+SUBCMDOBJ := $(OUTPUT)/libsubcmd.a
+
+BINARY := $(OUTPUT)/resolve_btfids
+BINARY_IN := $(BINARY)-in.o
+
+all: $(BINARY)
+
+$(OUTPUT):
+ $(call msg,MKDIR,,$@)
+ $(Q)mkdir -p $(OUTPUT)
+
+$(SUBCMDOBJ): fixdep FORCE
+ $(Q)$(MAKE) -C $(SUBCMD_SRC) OUTPUT=$(OUTPUT)
+
+$(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(OUTPUT)
+ $(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) OUTPUT=$(abspath $(dir $@))/ $(abspath $@)
+
+CFLAGS := -g \
+ -I$(srctree)/tools/include \
+ -I$(srctree)/tools/include/uapi \
+ -I$(LIBBPF_SRC) \
+ -I$(SUBCMD_SRC)
+
+LIBS = -lelf -lz
+
+export srctree OUTPUT CFLAGS Q
+include $(srctree)/tools/build/Makefile.include
+
+$(BINARY_IN): fixdep FORCE
+ $(Q)$(MAKE) $(build)=resolve_btfids
+
+$(BINARY): $(BPFOBJ) $(SUBCMDOBJ) $(BINARY_IN)
+ $(call msg,LINK,$@)
+ $(Q)$(CC) $(BINARY_IN) $(LDFLAGS) -o $@ $(BPFOBJ) $(SUBCMDOBJ) $(LIBS)
+
+libsubcmd-clean:
+ $(Q)$(MAKE) -C $(SUBCMD_SRC) OUTPUT=$(OUTPUT) clean
+
+libbpf-clean:
+ $(Q)$(MAKE) -C $(LIBBPF_SRC) OUTPUT=$(OUTPUT) clean
+
+clean: libsubcmd-clean libbpf-clean fixdep-clean
+ $(call msg,CLEAN,$(BINARY))
+ $(Q)$(RM) -f $(BINARY); \
+ find $(if $(OUTPUT),$(OUTPUT),.) -name \*.o -or -name \*.o.cmd -or -name \*.o.d | xargs $(RM)
+
+tags:
+ $(call msg,GEN,,tags)
+ $(Q)ctags -R . $(LIBBPF_SRC) $(SUBCMD_SRC)
+
+FORCE:
+
+.PHONY: all FORCE clean tags
diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c
new file mode 100644
index 000000000000..4d9ecb975862
--- /dev/null
+++ b/tools/bpf/resolve_btfids/main.c
@@ -0,0 +1,666 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * resolve_btfids scans Elf object for .BTF_ids section and resolves
+ * its symbols with BTF ID values.
+ *
+ * Each symbol points to 4 bytes data and is expected to have
+ * following name syntax:
+ *
+ * __BTF_ID__<type>__<symbol>[__<id>]
+ *
+ * type is:
+ *
+ * func - lookup BTF_KIND_FUNC symbol with <symbol> name
+ * and store its ID into the data:
+ *
+ * __BTF_ID__func__vfs_close__1:
+ * .zero 4
+ *
+ * struct - lookup BTF_KIND_STRUCT symbol with <symbol> name
+ * and store its ID into the data:
+ *
+ * __BTF_ID__struct__sk_buff__1:
+ * .zero 4
+ *
+ * union - lookup BTF_KIND_UNION symbol with <symbol> name
+ * and store its ID into the data:
+ *
+ * __BTF_ID__union__thread_union__1:
+ * .zero 4
+ *
+ * typedef - lookup BTF_KIND_TYPEDEF symbol with <symbol> name
+ * and store its ID into the data:
+ *
+ * __BTF_ID__typedef__pid_t__1:
+ * .zero 4
+ *
+ * set - store symbol size into first 4 bytes and sort following
+ * ID list
+ *
+ * __BTF_ID__set__list:
+ * .zero 4
+ * list:
+ * __BTF_ID__func__vfs_getattr__3:
+ * .zero 4
+ * __BTF_ID__func__vfs_fallocate__4:
+ * .zero 4
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <libelf.h>
+#include <gelf.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <linux/rbtree.h>
+#include <linux/zalloc.h>
+#include <linux/err.h>
+#include <btf.h>
+#include <libbpf.h>
+#include <parse-options.h>
+
+#define BTF_IDS_SECTION ".BTF_ids"
+#define BTF_ID "__BTF_ID__"
+
+#define BTF_STRUCT "struct"
+#define BTF_UNION "union"
+#define BTF_TYPEDEF "typedef"
+#define BTF_FUNC "func"
+#define BTF_SET "set"
+
+#define ADDR_CNT 100
+
+struct btf_id {
+ struct rb_node rb_node;
+ char *name;
+ union {
+ int id;
+ int cnt;
+ };
+ int addr_cnt;
+ Elf64_Addr addr[ADDR_CNT];
+};
+
+struct object {
+ const char *path;
+ const char *btf;
+
+ struct {
+ int fd;
+ Elf *elf;
+ Elf_Data *symbols;
+ Elf_Data *idlist;
+ int symbols_shndx;
+ int idlist_shndx;
+ size_t strtabidx;
+ unsigned long idlist_addr;
+ } efile;
+
+ struct rb_root sets;
+ struct rb_root structs;
+ struct rb_root unions;
+ struct rb_root typedefs;
+ struct rb_root funcs;
+
+ int nr_funcs;
+ int nr_structs;
+ int nr_unions;
+ int nr_typedefs;
+};
+
+static int verbose;
+
+int eprintf(int level, int var, const char *fmt, ...)
+{
+ va_list args;
+ int ret;
+
+ if (var >= level) {
+ va_start(args, fmt);
+ ret = vfprintf(stderr, fmt, args);
+ va_end(args);
+ }
+ return ret;
+}
+
+#ifndef pr_fmt
+#define pr_fmt(fmt) fmt
+#endif
+
+#define pr_debug(fmt, ...) \
+ eprintf(1, verbose, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_debugN(n, fmt, ...) \
+ eprintf(n, verbose, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_debug2(fmt, ...) pr_debugN(2, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_err(fmt, ...) \
+ eprintf(0, verbose, pr_fmt(fmt), ##__VA_ARGS__)
+
+static bool is_btf_id(const char *name)
+{
+ return name && !strncmp(name, BTF_ID, sizeof(BTF_ID) - 1);
+}
+
+static struct btf_id *btf_id__find(struct rb_root *root, const char *name)
+{
+ struct rb_node *p = root->rb_node;
+ struct btf_id *id;
+ int cmp;
+
+ while (p) {
+ id = rb_entry(p, struct btf_id, rb_node);
+ cmp = strcmp(id->name, name);
+ if (cmp < 0)
+ p = p->rb_left;
+ else if (cmp > 0)
+ p = p->rb_right;
+ else
+ return id;
+ }
+ return NULL;
+}
+
+static struct btf_id*
+btf_id__add(struct rb_root *root, char *name, bool unique)
+{
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *parent = NULL;
+ struct btf_id *id;
+ int cmp;
+
+ while (*p != NULL) {
+ parent = *p;
+ id = rb_entry(parent, struct btf_id, rb_node);
+ cmp = strcmp(id->name, name);
+ if (cmp < 0)
+ p = &(*p)->rb_left;
+ else if (cmp > 0)
+ p = &(*p)->rb_right;
+ else
+ return unique ? NULL : id;
+ }
+
+ id = zalloc(sizeof(*id));
+ if (id) {
+ pr_debug("adding symbol %s\n", name);
+ id->name = name;
+ rb_link_node(&id->rb_node, parent, p);
+ rb_insert_color(&id->rb_node, root);
+ }
+ return id;
+}
+
+static char *get_id(const char *prefix_end)
+{
+ /*
+ * __BTF_ID__func__vfs_truncate__0
+ * prefix_end = ^
+ */
+ char *p, *id = strdup(prefix_end + sizeof("__") - 1);
+
+ if (id) {
+ /*
+ * __BTF_ID__func__vfs_truncate__0
+ * id = ^
+ *
+ * cut the unique id part
+ */
+ p = strrchr(id, '_');
+ p--;
+ if (*p != '_') {
+ free(id);
+ return NULL;
+ }
+ *p = '\0';
+ }
+ return id;
+}
+
+static struct btf_id *add_symbol(struct rb_root *root, char *name, size_t size)
+{
+ char *id;
+
+ id = get_id(name + size);
+ if (!id) {
+ pr_err("FAILED to parse symbol name: %s\n", name);
+ return NULL;
+ }
+
+ return btf_id__add(root, id, false);
+}
+
+static int elf_collect(struct object *obj)
+{
+ Elf_Scn *scn = NULL;
+ size_t shdrstrndx;
+ int idx = 0;
+ Elf *elf;
+ int fd;
+
+ fd = open(obj->path, O_RDWR, 0666);
+ if (fd == -1) {
+ pr_err("FAILED cannot open %s: %s\n",
+ obj->path, strerror(errno));
+ return -1;
+ }
+
+ elf_version(EV_CURRENT);
+
+ elf = elf_begin(fd, ELF_C_RDWR_MMAP, NULL);
+ if (!elf) {
+ pr_err("FAILED cannot create ELF descriptor: %s\n",
+ elf_errmsg(-1));
+ return -1;
+ }
+
+ obj->efile.fd = fd;
+ obj->efile.elf = elf;
+
+ elf_flagelf(elf, ELF_C_SET, ELF_F_LAYOUT);
+
+ if (elf_getshdrstrndx(elf, &shdrstrndx) != 0) {
+ pr_err("FAILED cannot get shdr str ndx\n");
+ return -1;
+ }
+
+ /*
+ * Scan all the elf sections and look for save data
+ * from .BTF_ids section and symbols.
+ */
+ while ((scn = elf_nextscn(elf, scn)) != NULL) {
+ Elf_Data *data;
+ GElf_Shdr sh;
+ char *name;
+
+ idx++;
+ if (gelf_getshdr(scn, &sh) != &sh) {
+ pr_err("FAILED get section(%d) header\n", idx);
+ return -1;
+ }
+
+ name = elf_strptr(elf, shdrstrndx, sh.sh_name);
+ if (!name) {
+ pr_err("FAILED get section(%d) name\n", idx);
+ return -1;
+ }
+
+ data = elf_getdata(scn, 0);
+ if (!data) {
+ pr_err("FAILED to get section(%d) data from %s\n",
+ idx, name);
+ return -1;
+ }
+
+ pr_debug2("section(%d) %s, size %ld, link %d, flags %lx, type=%d\n",
+ idx, name, (unsigned long) data->d_size,
+ (int) sh.sh_link, (unsigned long) sh.sh_flags,
+ (int) sh.sh_type);
+
+ if (sh.sh_type == SHT_SYMTAB) {
+ obj->efile.symbols = data;
+ obj->efile.symbols_shndx = idx;
+ obj->efile.strtabidx = sh.sh_link;
+ } else if (!strcmp(name, BTF_IDS_SECTION)) {
+ obj->efile.idlist = data;
+ obj->efile.idlist_shndx = idx;
+ obj->efile.idlist_addr = sh.sh_addr;
+ }
+ }
+
+ return 0;
+}
+
+static int symbols_collect(struct object *obj)
+{
+ Elf_Scn *scn = NULL;
+ int n, i, err = 0;
+ GElf_Shdr sh;
+ char *name;
+
+ scn = elf_getscn(obj->efile.elf, obj->efile.symbols_shndx);
+ if (!scn)
+ return -1;
+
+ if (gelf_getshdr(scn, &sh) != &sh)
+ return -1;
+
+ n = sh.sh_size / sh.sh_entsize;
+
+ /*
+ * Scan symbols and look for the ones starting with
+ * __BTF_ID__* over .BTF_ids section.
+ */
+ for (i = 0; !err && i < n; i++) {
+ char *tmp, *prefix;
+ struct btf_id *id;
+ GElf_Sym sym;
+ int err = -1;
+
+ if (!gelf_getsym(obj->efile.symbols, i, &sym))
+ return -1;
+
+ if (sym.st_shndx != obj->efile.idlist_shndx)
+ continue;
+
+ name = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
+ sym.st_name);
+
+ if (!is_btf_id(name))
+ continue;
+
+ /*
+ * __BTF_ID__TYPE__vfs_truncate__0
+ * prefix = ^
+ */
+ prefix = name + sizeof(BTF_ID) - 1;
+
+ /* struct */
+ if (!strncmp(prefix, BTF_STRUCT, sizeof(BTF_STRUCT) - 1)) {
+ obj->nr_structs++;
+ id = add_symbol(&obj->structs, prefix, sizeof(BTF_STRUCT) - 1);
+ /* union */
+ } else if (!strncmp(prefix, BTF_UNION, sizeof(BTF_UNION) - 1)) {
+ obj->nr_unions++;
+ id = add_symbol(&obj->unions, prefix, sizeof(BTF_UNION) - 1);
+ /* typedef */
+ } else if (!strncmp(prefix, BTF_TYPEDEF, sizeof(BTF_TYPEDEF) - 1)) {
+ obj->nr_typedefs++;
+ id = add_symbol(&obj->typedefs, prefix, sizeof(BTF_TYPEDEF) - 1);
+ /* func */
+ } else if (!strncmp(prefix, BTF_FUNC, sizeof(BTF_FUNC) - 1)) {
+ obj->nr_funcs++;
+ id = add_symbol(&obj->funcs, prefix, sizeof(BTF_FUNC) - 1);
+ /* set */
+ } else if (!strncmp(prefix, BTF_SET, sizeof(BTF_SET) - 1)) {
+ id = add_symbol(&obj->sets, prefix, sizeof(BTF_SET) - 1);
+ /*
+ * SET objects store list's count, which is encoded
+ * in symbol's size, together with 'cnt' field hence
+ * that - 1.
+ */
+ if (id)
+ id->cnt = sym.st_size / sizeof(int) - 1;
+ } else {
+ pr_err("FAILED unsupported prefix %s\n", prefix);
+ return -1;
+ }
+
+ if (!id)
+ return -ENOMEM;
+
+ if (id->addr_cnt >= ADDR_CNT) {
+ pr_err("FAILED symbol %s crossed the number of allowed lists",
+ id->name);
+ return -1;
+ }
+ id->addr[id->addr_cnt++] = sym.st_value;
+ }
+
+ return 0;
+}
+
+static int symbols_resolve(struct object *obj)
+{
+ int nr_typedefs = obj->nr_typedefs;
+ int nr_structs = obj->nr_structs;
+ int nr_unions = obj->nr_unions;
+ int nr_funcs = obj->nr_funcs;
+ int err, type_id;
+ struct btf *btf;
+ __u32 nr;
+
+ btf = btf__parse(obj->btf ?: obj->path, NULL);
+ err = libbpf_get_error(btf);
+ if (err) {
+ pr_err("FAILED: load BTF from %s: %s",
+ obj->path, strerror(err));
+ return -1;
+ }
+
+ err = -1;
+ nr = btf__get_nr_types(btf);
+
+ /*
+ * Iterate all the BTF types and search for collected symbol IDs.
+ */
+ for (type_id = 1; type_id <= nr; type_id++) {
+ const struct btf_type *type;
+ struct rb_root *root;
+ struct btf_id *id;
+ const char *str;
+ int *nr;
+
+ type = btf__type_by_id(btf, type_id);
+ if (!type) {
+ pr_err("FAILED: malformed BTF, can't resolve type for ID %d\n",
+ type_id);
+ goto out;
+ }
+
+ if (btf_is_func(type) && nr_funcs) {
+ nr = &nr_funcs;
+ root = &obj->funcs;
+ } else if (btf_is_struct(type) && nr_structs) {
+ nr = &nr_structs;
+ root = &obj->structs;
+ } else if (btf_is_union(type) && nr_unions) {
+ nr = &nr_unions;
+ root = &obj->unions;
+ } else if (btf_is_typedef(type) && nr_typedefs) {
+ nr = &nr_typedefs;
+ root = &obj->typedefs;
+ } else
+ continue;
+
+ str = btf__name_by_offset(btf, type->name_off);
+ if (!str) {
+ pr_err("FAILED: malformed BTF, can't resolve name for ID %d\n",
+ type_id);
+ goto out;
+ }
+
+ id = btf_id__find(root, str);
+ if (id) {
+ id->id = type_id;
+ (*nr)--;
+ }
+ }
+
+ err = 0;
+out:
+ btf__free(btf);
+ return err;
+}
+
+static int id_patch(struct object *obj, struct btf_id *id)
+{
+ Elf_Data *data = obj->efile.idlist;
+ int *ptr = data->d_buf;
+ int i;
+
+ if (!id->id) {
+ pr_err("FAILED unresolved symbol %s\n", id->name);
+ return -EINVAL;
+ }
+
+ for (i = 0; i < id->addr_cnt; i++) {
+ unsigned long addr = id->addr[i];
+ unsigned long idx = addr - obj->efile.idlist_addr;
+
+ pr_debug("patching addr %5lu: ID %7d [%s]\n",
+ idx, id->id, id->name);
+
+ if (idx >= data->d_size) {
+ pr_err("FAILED patching index %lu out of bounds %lu\n",
+ idx, data->d_size);
+ return -1;
+ }
+
+ idx = idx / sizeof(int);
+ ptr[idx] = id->id;
+ }
+
+ return 0;
+}
+
+static int __symbols_patch(struct object *obj, struct rb_root *root)
+{
+ struct rb_node *next;
+ struct btf_id *id;
+
+ next = rb_first(root);
+ while (next) {
+ id = rb_entry(next, struct btf_id, rb_node);
+
+ if (id_patch(obj, id))
+ return -1;
+
+ next = rb_next(next);
+ }
+ return 0;
+}
+
+static int cmp_id(const void *pa, const void *pb)
+{
+ const int *a = pa, *b = pb;
+
+ return *a - *b;
+}
+
+static int sets_patch(struct object *obj)
+{
+ Elf_Data *data = obj->efile.idlist;
+ int *ptr = data->d_buf;
+ struct rb_node *next;
+
+ next = rb_first(&obj->sets);
+ while (next) {
+ unsigned long addr, idx;
+ struct btf_id *id;
+ int *base;
+ int cnt;
+
+ id = rb_entry(next, struct btf_id, rb_node);
+ addr = id->addr[0];
+ idx = addr - obj->efile.idlist_addr;
+
+ /* sets are unique */
+ if (id->addr_cnt != 1) {
+ pr_err("FAILED malformed data for set '%s'\n",
+ id->name);
+ return -1;
+ }
+
+ idx = idx / sizeof(int);
+ base = &ptr[idx] + 1;
+ cnt = ptr[idx];
+
+ pr_debug("sorting addr %5lu: cnt %6d [%s]\n",
+ (idx + 1) * sizeof(int), cnt, id->name);
+
+ qsort(base, cnt, sizeof(int), cmp_id);
+
+ next = rb_next(next);
+ }
+ return 0;
+}
+
+static int symbols_patch(struct object *obj)
+{
+ int err;
+
+ if (__symbols_patch(obj, &obj->structs) ||
+ __symbols_patch(obj, &obj->unions) ||
+ __symbols_patch(obj, &obj->typedefs) ||
+ __symbols_patch(obj, &obj->funcs) ||
+ __symbols_patch(obj, &obj->sets))
+ return -1;
+
+ if (sets_patch(obj))
+ return -1;
+
+ elf_flagdata(obj->efile.idlist, ELF_C_SET, ELF_F_DIRTY);
+
+ err = elf_update(obj->efile.elf, ELF_C_WRITE);
+ if (err < 0) {
+ pr_err("FAILED elf_update(WRITE): %s\n",
+ elf_errmsg(-1));
+ }
+
+ pr_debug("update %s for %s\n",
+ err >= 0 ? "ok" : "failed", obj->path);
+ return err < 0 ? -1 : 0;
+}
+
+static const char * const resolve_btfids_usage[] = {
+ "resolve_btfids [<options>] <ELF object>",
+ NULL
+};
+
+int main(int argc, const char **argv)
+{
+ bool no_fail = false;
+ struct object obj = {
+ .efile = {
+ .idlist_shndx = -1,
+ .symbols_shndx = -1,
+ },
+ .structs = RB_ROOT,
+ .unions = RB_ROOT,
+ .typedefs = RB_ROOT,
+ .funcs = RB_ROOT,
+ .sets = RB_ROOT,
+ };
+ struct option btfid_options[] = {
+ OPT_INCR('v', "verbose", &verbose,
+ "be more verbose (show errors, etc)"),
+ OPT_STRING(0, "btf", &obj.btf, "BTF data",
+ "BTF data"),
+ OPT_BOOLEAN(0, "no-fail", &no_fail,
+ "do not fail if " BTF_IDS_SECTION " section is not found"),
+ OPT_END()
+ };
+ int err = -1;
+
+ argc = parse_options(argc, argv, btfid_options, resolve_btfids_usage,
+ PARSE_OPT_STOP_AT_NON_OPTION);
+ if (argc != 1)
+ usage_with_options(resolve_btfids_usage, btfid_options);
+
+ obj.path = argv[0];
+
+ if (elf_collect(&obj))
+ goto out;
+
+ /*
+ * We did not find .BTF_ids section or symbols section,
+ * nothing to do..
+ */
+ if (obj.efile.idlist_shndx == -1 ||
+ obj.efile.symbols_shndx == -1) {
+ if (no_fail)
+ return 0;
+ pr_err("FAILED to find needed sections\n");
+ return -1;
+ }
+
+ if (symbols_collect(&obj))
+ goto out;
+
+ if (symbols_resolve(&obj))
+ goto out;
+
+ if (symbols_patch(&obj))
+ goto out;
+
+ err = 0;
+out:
+ if (obj.efile.elf)
+ elf_end(obj.efile.elf);
+ close(obj.efile.fd);
+ return err;
+}
diff --git a/tools/build/Build.include b/tools/build/Build.include
index 9ec01f4454f9..585486e40995 100644
--- a/tools/build/Build.include
+++ b/tools/build/Build.include
@@ -74,7 +74,8 @@ dep-cmd = $(if $(wildcard $(fixdep)),
# dependencies in the cmd file
if_changed_dep = $(if $(strip $(any-prereq) $(arg-check)), \
@set -e; \
- $(echo-cmd) $(cmd_$(1)) && $(dep-cmd))
+ $(echo-cmd) $(cmd_$(1)); \
+ $(dep-cmd))
# if_changed - execute command if any prerequisite is newer than
# target, or command line has changed
diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index cb152370fdef..774f0b0ca28a 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -8,7 +8,7 @@ endif
feature_check = $(eval $(feature_check_code))
define feature_check_code
- feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS) $(FEATURE_CHECK_CFLAGS-$(1))" CXXFLAGS="$(EXTRA_CXXFLAGS) $(FEATURE_CHECK_CXXFLAGS-$(1))" LDFLAGS="$(LDFLAGS) $(FEATURE_CHECK_LDFLAGS-$(1))" -C $(feature_dir) $(OUTPUT_FEATURES)test-$1.bin >/dev/null 2>/dev/null && echo 1 || echo 0)
+ feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CC=$(CC) CXX=$(CXX) CFLAGS="$(EXTRA_CFLAGS) $(FEATURE_CHECK_CFLAGS-$(1))" CXXFLAGS="$(EXTRA_CXXFLAGS) $(FEATURE_CHECK_CXXFLAGS-$(1))" LDFLAGS="$(LDFLAGS) $(FEATURE_CHECK_LDFLAGS-$(1))" -C $(feature_dir) $(OUTPUT_FEATURES)test-$1.bin >/dev/null 2>/dev/null && echo 1 || echo 0)
endef
feature_set = $(eval $(feature_set_code))
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index b1f0321180f5..846ee1341a5c 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -68,14 +68,12 @@ FILES= \
test-llvm-version.bin \
test-libaio.bin \
test-libzstd.bin \
- test-clang-bpf-global-var.bin \
+ test-clang-bpf-co-re.bin \
test-file-handle.bin \
test-libpfm4.bin
FILES := $(addprefix $(OUTPUT),$(FILES))
-CC ?= $(CROSS_COMPILE)gcc
-CXX ?= $(CROSS_COMPILE)g++
PKG_CONFIG ?= $(CROSS_COMPILE)pkg-config
LLVM_CONFIG ?= llvm-config
CLANG ?= clang
@@ -325,7 +323,7 @@ $(OUTPUT)test-libaio.bin:
$(OUTPUT)test-libzstd.bin:
$(BUILD) -lzstd
-$(OUTPUT)test-clang-bpf-global-var.bin:
+$(OUTPUT)test-clang-bpf-co-re.bin:
$(CLANG) -S -g -target bpf -o - $(patsubst %.bin,%.c,$(@F)) | \
grep BTF_KIND_VAR
diff --git a/tools/build/feature/test-clang-bpf-co-re.c b/tools/build/feature/test-clang-bpf-co-re.c
new file mode 100644
index 000000000000..cb5265bfdd83
--- /dev/null
+++ b/tools/build/feature/test-clang-bpf-co-re.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+struct test {
+ int a;
+ int b;
+} __attribute__((preserve_access_index));
+
+volatile struct test global_value_for_test = {};
diff --git a/tools/build/feature/test-clang-bpf-global-var.c b/tools/build/feature/test-clang-bpf-global-var.c
deleted file mode 100644
index 221f1481d52e..000000000000
--- a/tools/build/feature/test-clang-bpf-global-var.c
+++ /dev/null
@@ -1,4 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (c) 2020 Facebook
-
-volatile int global_value_for_test = 1;
diff --git a/tools/cgroup/memcg_slabinfo.py b/tools/cgroup/memcg_slabinfo.py
new file mode 100644
index 000000000000..c4225ed63565
--- /dev/null
+++ b/tools/cgroup/memcg_slabinfo.py
@@ -0,0 +1,226 @@
+#!/usr/bin/env drgn
+#
+# Copyright (C) 2020 Roman Gushchin <guro@fb.com>
+# Copyright (C) 2020 Facebook
+
+from os import stat
+import argparse
+import sys
+
+from drgn.helpers.linux import list_for_each_entry, list_empty
+from drgn.helpers.linux import for_each_page
+from drgn.helpers.linux.cpumask import for_each_online_cpu
+from drgn.helpers.linux.percpu import per_cpu_ptr
+from drgn import container_of, FaultError, Object
+
+
+DESC = """
+This is a drgn script to provide slab statistics for memory cgroups.
+It supports cgroup v2 and v1 and can emulate memory.kmem.slabinfo
+interface of cgroup v1.
+For drgn, visit https://github.com/osandov/drgn.
+"""
+
+
+MEMCGS = {}
+
+OO_SHIFT = 16
+OO_MASK = ((1 << OO_SHIFT) - 1)
+
+
+def err(s):
+ print('slabinfo.py: error: %s' % s, file=sys.stderr, flush=True)
+ sys.exit(1)
+
+
+def find_memcg_ids(css=prog['root_mem_cgroup'].css, prefix=''):
+ if not list_empty(css.children.address_of_()):
+ for css in list_for_each_entry('struct cgroup_subsys_state',
+ css.children.address_of_(),
+ 'sibling'):
+ name = prefix + '/' + css.cgroup.kn.name.string_().decode('utf-8')
+ memcg = container_of(css, 'struct mem_cgroup', 'css')
+ MEMCGS[css.cgroup.kn.id.value_()] = memcg
+ find_memcg_ids(css, name)
+
+
+def is_root_cache(s):
+ try:
+ return False if s.memcg_params.root_cache else True
+ except AttributeError:
+ return True
+
+
+def cache_name(s):
+ if is_root_cache(s):
+ return s.name.string_().decode('utf-8')
+ else:
+ return s.memcg_params.root_cache.name.string_().decode('utf-8')
+
+
+# SLUB
+
+def oo_order(s):
+ return s.oo.x >> OO_SHIFT
+
+
+def oo_objects(s):
+ return s.oo.x & OO_MASK
+
+
+def count_partial(n, fn):
+ nr_pages = 0
+ for page in list_for_each_entry('struct page', n.partial.address_of_(),
+ 'lru'):
+ nr_pages += fn(page)
+ return nr_pages
+
+
+def count_free(page):
+ return page.objects - page.inuse
+
+
+def slub_get_slabinfo(s, cfg):
+ nr_slabs = 0
+ nr_objs = 0
+ nr_free = 0
+
+ for node in range(cfg['nr_nodes']):
+ n = s.node[node]
+ nr_slabs += n.nr_slabs.counter.value_()
+ nr_objs += n.total_objects.counter.value_()
+ nr_free += count_partial(n, count_free)
+
+ return {'active_objs': nr_objs - nr_free,
+ 'num_objs': nr_objs,
+ 'active_slabs': nr_slabs,
+ 'num_slabs': nr_slabs,
+ 'objects_per_slab': oo_objects(s),
+ 'cache_order': oo_order(s),
+ 'limit': 0,
+ 'batchcount': 0,
+ 'shared': 0,
+ 'shared_avail': 0}
+
+
+def cache_show(s, cfg, objs):
+ if cfg['allocator'] == 'SLUB':
+ sinfo = slub_get_slabinfo(s, cfg)
+ else:
+ err('SLAB isn\'t supported yet')
+
+ if cfg['shared_slab_pages']:
+ sinfo['active_objs'] = objs
+ sinfo['num_objs'] = objs
+
+ print('%-17s %6lu %6lu %6u %4u %4d'
+ ' : tunables %4u %4u %4u'
+ ' : slabdata %6lu %6lu %6lu' % (
+ cache_name(s), sinfo['active_objs'], sinfo['num_objs'],
+ s.size, sinfo['objects_per_slab'], 1 << sinfo['cache_order'],
+ sinfo['limit'], sinfo['batchcount'], sinfo['shared'],
+ sinfo['active_slabs'], sinfo['num_slabs'],
+ sinfo['shared_avail']))
+
+
+def detect_kernel_config():
+ cfg = {}
+
+ cfg['nr_nodes'] = prog['nr_online_nodes'].value_()
+
+ if prog.type('struct kmem_cache').members[1][1] == 'flags':
+ cfg['allocator'] = 'SLUB'
+ elif prog.type('struct kmem_cache').members[1][1] == 'batchcount':
+ cfg['allocator'] = 'SLAB'
+ else:
+ err('Can\'t determine the slab allocator')
+
+ cfg['shared_slab_pages'] = False
+ try:
+ if prog.type('struct obj_cgroup'):
+ cfg['shared_slab_pages'] = True
+ except:
+ pass
+
+ return cfg
+
+
+def for_each_slab_page(prog):
+ PGSlab = 1 << prog.constant('PG_slab')
+ PGHead = 1 << prog.constant('PG_head')
+
+ for page in for_each_page(prog):
+ try:
+ if page.flags.value_() & PGSlab:
+ yield page
+ except FaultError:
+ pass
+
+
+def main():
+ parser = argparse.ArgumentParser(description=DESC,
+ formatter_class=
+ argparse.RawTextHelpFormatter)
+ parser.add_argument('cgroup', metavar='CGROUP',
+ help='Target memory cgroup')
+ args = parser.parse_args()
+
+ try:
+ cgroup_id = stat(args.cgroup).st_ino
+ find_memcg_ids()
+ memcg = MEMCGS[cgroup_id]
+ except KeyError:
+ err('Can\'t find the memory cgroup')
+
+ cfg = detect_kernel_config()
+
+ print('# name <active_objs> <num_objs> <objsize> <objperslab> <pagesperslab>'
+ ' : tunables <limit> <batchcount> <sharedfactor>'
+ ' : slabdata <active_slabs> <num_slabs> <sharedavail>')
+
+ if cfg['shared_slab_pages']:
+ obj_cgroups = set()
+ stats = {}
+ caches = {}
+
+ # find memcg pointers belonging to the specified cgroup
+ obj_cgroups.add(memcg.objcg.value_())
+ for ptr in list_for_each_entry('struct obj_cgroup',
+ memcg.objcg_list.address_of_(),
+ 'list'):
+ obj_cgroups.add(ptr.value_())
+
+ # look over all slab pages, belonging to non-root memcgs
+ # and look for objects belonging to the given memory cgroup
+ for page in for_each_slab_page(prog):
+ objcg_vec_raw = page.obj_cgroups.value_()
+ if objcg_vec_raw == 0:
+ continue
+ cache = page.slab_cache
+ if not cache:
+ continue
+ addr = cache.value_()
+ caches[addr] = cache
+ # clear the lowest bit to get the true obj_cgroups
+ objcg_vec = Object(prog, page.obj_cgroups.type_,
+ value=objcg_vec_raw & ~1)
+
+ if addr not in stats:
+ stats[addr] = 0
+
+ for i in range(oo_objects(cache)):
+ if objcg_vec[i].value_() in obj_cgroups:
+ stats[addr] += 1
+
+ for addr in caches:
+ if stats[addr] > 0:
+ cache_show(caches[addr], cfg, stats[addr])
+
+ else:
+ for s in list_for_each_entry('struct kmem_cache',
+ memcg.kmem_caches.address_of_(),
+ 'memcg_params.kmem_caches_node'):
+ cache_show(s, cfg, None)
+
+
+main()
diff --git a/tools/gpio/gpio-event-mon.c b/tools/gpio/gpio-event-mon.c
index 30ed0e06f52a..1a303a81aeef 100644
--- a/tools/gpio/gpio-event-mon.c
+++ b/tools/gpio/gpio-event-mon.c
@@ -45,7 +45,7 @@ int monitor_device(const char *device_name,
if (fd == -1) {
ret = -errno;
fprintf(stderr, "Failed to open %s\n", chrdev_name);
- goto exit_close_error;
+ goto exit_free_name;
}
req.lineoffset = line;
@@ -117,6 +117,7 @@ int monitor_device(const char *device_name,
exit_close_error:
if (close(fd) == -1)
perror("Failed to close GPIO character device file");
+exit_free_name:
free(chrdev_name);
return ret;
}
diff --git a/tools/gpio/gpio-utils.c b/tools/gpio/gpio-utils.c
index 06003789e7c7..16a5d9cb9da2 100644
--- a/tools/gpio/gpio-utils.c
+++ b/tools/gpio/gpio-utils.c
@@ -75,7 +75,7 @@ int gpiotools_request_linehandle(const char *device_name, unsigned int *lines,
ret = -errno;
fprintf(stderr, "Failed to open %s, %s\n",
chrdev_name, strerror(errno));
- goto exit_close_error;
+ goto exit_free_name;
}
for (i = 0; i < nlines; i++)
@@ -94,9 +94,9 @@ int gpiotools_request_linehandle(const char *device_name, unsigned int *lines,
"GPIO_GET_LINEHANDLE_IOCTL", ret, strerror(errno));
}
-exit_close_error:
if (close(fd) == -1)
perror("Failed to close GPIO character device file");
+exit_free_name:
free(chrdev_name);
return ret < 0 ? ret : req.fd;
}
diff --git a/tools/gpio/lsgpio.c b/tools/gpio/lsgpio.c
index 8a71ad36f83b..b08d7a5e779b 100644
--- a/tools/gpio/lsgpio.c
+++ b/tools/gpio/lsgpio.c
@@ -94,7 +94,7 @@ int list_device(const char *device_name)
if (fd == -1) {
ret = -errno;
fprintf(stderr, "Failed to open %s\n", chrdev_name);
- goto exit_close_error;
+ goto exit_free_name;
}
/* Inspect this GPIO chip */
@@ -141,6 +141,7 @@ int list_device(const char *device_name)
exit_close_error:
if (close(fd) == -1)
perror("Failed to close GPIO character device file");
+exit_free_name:
free(chrdev_name);
return ret;
}
diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c
index ee9c1bb2293e..1e6fd6ca513b 100644
--- a/tools/hv/hv_kvp_daemon.c
+++ b/tools/hv/hv_kvp_daemon.c
@@ -437,7 +437,7 @@ void kvp_get_os_info(void)
/*
* Parse the /etc/os-release file if present:
- * http://www.freedesktop.org/software/systemd/man/os-release.html
+ * https://www.freedesktop.org/software/systemd/man/os-release.html
*/
file = fopen("/etc/os-release", "r");
if (file != NULL) {
diff --git a/tools/include/linux/btf_ids.h b/tools/include/linux/btf_ids.h
new file mode 100644
index 000000000000..4867d549e3c1
--- /dev/null
+++ b/tools/include/linux/btf_ids.h
@@ -0,0 +1,130 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _LINUX_BTF_IDS_H
+#define _LINUX_BTF_IDS_H
+
+#ifdef CONFIG_DEBUG_INFO_BTF
+
+#include <linux/compiler.h> /* for __PASTE */
+
+/*
+ * Following macros help to define lists of BTF IDs placed
+ * in .BTF_ids section. They are initially filled with zeros
+ * (during compilation) and resolved later during the
+ * linking phase by resolve_btfids tool.
+ *
+ * Any change in list layout must be reflected in resolve_btfids
+ * tool logic.
+ */
+
+#define BTF_IDS_SECTION ".BTF_ids"
+
+#define ____BTF_ID(symbol) \
+asm( \
+".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \
+".local " #symbol " ; \n" \
+".type " #symbol ", STT_OBJECT; \n" \
+".size " #symbol ", 4; \n" \
+#symbol ": \n" \
+".zero 4 \n" \
+".popsection; \n");
+
+#define __BTF_ID(symbol) \
+ ____BTF_ID(symbol)
+
+#define __ID(prefix) \
+ __PASTE(prefix, __COUNTER__)
+
+/*
+ * The BTF_ID defines unique symbol for each ID pointing
+ * to 4 zero bytes.
+ */
+#define BTF_ID(prefix, name) \
+ __BTF_ID(__ID(__BTF_ID__##prefix##__##name##__))
+
+/*
+ * The BTF_ID_LIST macro defines pure (unsorted) list
+ * of BTF IDs, with following layout:
+ *
+ * BTF_ID_LIST(list1)
+ * BTF_ID(type1, name1)
+ * BTF_ID(type2, name2)
+ *
+ * list1:
+ * __BTF_ID__type1__name1__1:
+ * .zero 4
+ * __BTF_ID__type2__name2__2:
+ * .zero 4
+ *
+ */
+#define __BTF_ID_LIST(name, scope) \
+asm( \
+".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \
+"." #scope " " #name "; \n" \
+#name ":; \n" \
+".popsection; \n"); \
+
+#define BTF_ID_LIST(name) \
+__BTF_ID_LIST(name, local) \
+extern u32 name[];
+
+#define BTF_ID_LIST_GLOBAL(name) \
+__BTF_ID_LIST(name, globl)
+
+/*
+ * The BTF_ID_UNUSED macro defines 4 zero bytes.
+ * It's used when we want to define 'unused' entry
+ * in BTF_ID_LIST, like:
+ *
+ * BTF_ID_LIST(bpf_skb_output_btf_ids)
+ * BTF_ID(struct, sk_buff)
+ * BTF_ID_UNUSED
+ * BTF_ID(struct, task_struct)
+ */
+
+#define BTF_ID_UNUSED \
+asm( \
+".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \
+".zero 4 \n" \
+".popsection; \n");
+
+#else
+
+#define BTF_ID_LIST(name) static u32 name[5];
+#define BTF_ID(prefix, name)
+#define BTF_ID_UNUSED
+#define BTF_ID_LIST_GLOBAL(name) u32 name[1];
+
+#endif /* CONFIG_DEBUG_INFO_BTF */
+
+#ifdef CONFIG_NET
+/* Define a list of socket types which can be the argument for
+ * skc_to_*_sock() helpers. All these sockets should have
+ * sock_common as the first argument in its memory layout.
+ */
+#define BTF_SOCK_TYPE_xxx \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET, inet_sock) \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_CONN, inet_connection_sock) \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_REQ, inet_request_sock) \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_TW, inet_timewait_sock) \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_REQ, request_sock) \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCK, sock) \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCK_COMMON, sock_common) \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP, tcp_sock) \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_REQ, tcp_request_sock) \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_TW, tcp_timewait_sock) \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP6, tcp6_sock) \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP, udp_sock) \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock)
+
+enum {
+#define BTF_SOCK_TYPE(name, str) name,
+BTF_SOCK_TYPE_xxx
+#undef BTF_SOCK_TYPE
+MAX_BTF_SOCK_TYPE,
+};
+
+extern u32 btf_sock_ids[];
+#endif
+
+#endif
diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h
index 2f2f4082225e..2b3f7353e891 100644
--- a/tools/include/linux/compiler.h
+++ b/tools/include/linux/compiler.h
@@ -199,4 +199,8 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
# define __fallthrough
#endif
+/* Indirect macros required for expanded argument pasting, eg. __LINE__. */
+#define ___PASTE(a, b) a##b
+#define __PASTE(a, b) ___PASTE(a, b)
+
#endif /* _TOOLS_LINUX_COMPILER_H */
diff --git a/tools/include/linux/jhash.h b/tools/include/linux/jhash.h
index 348c6f47e4cc..af8d0fe1c6ce 100644
--- a/tools/include/linux/jhash.h
+++ b/tools/include/linux/jhash.h
@@ -5,7 +5,7 @@
*
* Copyright (C) 2006. Bob Jenkins (bob_jenkins@burtleburtle.net)
*
- * http://burtleburtle.net/bob/hash/
+ * https://burtleburtle.net/bob/hash/
*
* These are the credits from Bob's sources:
*
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index f4a01305d9a6..995b36c2ea7d 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -606,9 +606,9 @@ __SYSCALL(__NR_sendto, sys_sendto)
#define __NR_recvfrom 207
__SC_COMP(__NR_recvfrom, sys_recvfrom, compat_sys_recvfrom)
#define __NR_setsockopt 208
-__SC_COMP(__NR_setsockopt, sys_setsockopt, compat_sys_setsockopt)
+__SC_COMP(__NR_setsockopt, sys_setsockopt, sys_setsockopt)
#define __NR_getsockopt 209
-__SC_COMP(__NR_getsockopt, sys_getsockopt, compat_sys_getsockopt)
+__SC_COMP(__NR_getsockopt, sys_getsockopt, sys_getsockopt)
#define __NR_shutdown 210
__SYSCALL(__NR_shutdown, sys_shutdown)
#define __NR_sendmsg 211
@@ -850,6 +850,8 @@ __SYSCALL(__NR_pidfd_open, sys_pidfd_open)
#define __NR_clone3 435
__SYSCALL(__NR_clone3, sys_clone3)
#endif
+#define __NR_close_range 436
+__SYSCALL(__NR_close_range, sys_close_range)
#define __NR_openat2 437
__SYSCALL(__NR_openat2, sys_openat2)
diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h
index 14b67cd6b54b..00546062e023 100644
--- a/tools/include/uapi/drm/i915_drm.h
+++ b/tools/include/uapi/drm/i915_drm.h
@@ -55,7 +55,7 @@ extern "C" {
* cause the related events to not be seen.
*
* I915_RESET_UEVENT - Event is generated just before an attempt to reset the
- * the GPU. The value supplied with the event is always 1. NOTE: Disable
+ * GPU. The value supplied with the event is always 1. NOTE: Disable
* reset via module parameter will cause this event to not be seen.
*/
#define I915_L3_PARITY_UEVENT "L3_PARITY_ERROR"
@@ -1934,7 +1934,7 @@ enum drm_i915_perf_property_id {
/**
* The value specifies which set of OA unit metrics should be
- * be configured, defining the contents of any OA unit reports.
+ * configured, defining the contents of any OA unit reports.
*
* This property is available in perf revision 1.
*/
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 8bd33050b7bb..0480f893facd 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -81,6 +81,12 @@ struct bpf_cgroup_storage_key {
__u32 attach_type; /* program attach type */
};
+union bpf_iter_link_info {
+ struct {
+ __u32 map_fd;
+ } map;
+};
+
/* BPF syscall commands, see bpf(2) man-page for details. */
enum bpf_cmd {
BPF_MAP_CREATE,
@@ -117,6 +123,7 @@ enum bpf_cmd {
BPF_LINK_GET_NEXT_ID,
BPF_ENABLE_STATS,
BPF_ITER_CREATE,
+ BPF_LINK_DETACH,
};
enum bpf_map_type {
@@ -189,6 +196,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_STRUCT_OPS,
BPF_PROG_TYPE_EXT,
BPF_PROG_TYPE_LSM,
+ BPF_PROG_TYPE_SK_LOOKUP,
};
enum bpf_attach_type {
@@ -226,6 +234,10 @@ enum bpf_attach_type {
BPF_CGROUP_INET4_GETSOCKNAME,
BPF_CGROUP_INET6_GETSOCKNAME,
BPF_XDP_DEVMAP,
+ BPF_CGROUP_INET_SOCK_RELEASE,
+ BPF_XDP_CPUMAP,
+ BPF_SK_LOOKUP,
+ BPF_XDP,
__MAX_BPF_ATTACH_TYPE
};
@@ -238,6 +250,7 @@ enum bpf_link_type {
BPF_LINK_TYPE_CGROUP = 3,
BPF_LINK_TYPE_ITER = 4,
BPF_LINK_TYPE_NETNS = 5,
+ BPF_LINK_TYPE_XDP = 6,
MAX_BPF_LINK_TYPE,
};
@@ -603,9 +616,14 @@ union bpf_attr {
struct { /* struct used by BPF_LINK_CREATE command */
__u32 prog_fd; /* eBPF program to attach */
- __u32 target_fd; /* object to attach to */
+ union {
+ __u32 target_fd; /* object to attach to */
+ __u32 target_ifindex; /* target ifindex */
+ };
__u32 attach_type; /* attach type */
__u32 flags; /* extra flags */
+ __aligned_u64 iter_info; /* extra bpf_iter_link_info */
+ __u32 iter_info_len; /* iter_info length */
} link_create;
struct { /* struct used by BPF_LINK_UPDATE command */
@@ -618,6 +636,10 @@ union bpf_attr {
__u32 old_prog_fd;
} link_update;
+ struct {
+ __u32 link_fd;
+ } link_detach;
+
struct { /* struct used by BPF_ENABLE_STATS command */
__u32 type;
} enable_stats;
@@ -653,7 +675,7 @@ union bpf_attr {
* Map value associated to *key*, or **NULL** if no entry was
* found.
*
- * int bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags)
+ * long bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags)
* Description
* Add or update the value of the entry associated to *key* in
* *map* with *value*. *flags* is one of:
@@ -671,13 +693,13 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_map_delete_elem(struct bpf_map *map, const void *key)
+ * long bpf_map_delete_elem(struct bpf_map *map, const void *key)
* Description
* Delete entry with *key* from *map*.
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_probe_read(void *dst, u32 size, const void *unsafe_ptr)
+ * long bpf_probe_read(void *dst, u32 size, const void *unsafe_ptr)
* Description
* For tracing programs, safely attempt to read *size* bytes from
* kernel space address *unsafe_ptr* and store the data in *dst*.
@@ -695,7 +717,7 @@ union bpf_attr {
* Return
* Current *ktime*.
*
- * int bpf_trace_printk(const char *fmt, u32 fmt_size, ...)
+ * long bpf_trace_printk(const char *fmt, u32 fmt_size, ...)
* Description
* This helper is a "printk()-like" facility for debugging. It
* prints a message defined by format *fmt* (of size *fmt_size*)
@@ -775,7 +797,7 @@ union bpf_attr {
* Return
* The SMP id of the processor running the program.
*
- * int bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags)
+ * long bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags)
* Description
* Store *len* bytes from address *from* into the packet
* associated to *skb*, at *offset*. *flags* are a combination of
@@ -792,7 +814,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_l3_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 size)
+ * long bpf_l3_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 size)
* Description
* Recompute the layer 3 (e.g. IP) checksum for the packet
* associated to *skb*. Computation is incremental, so the helper
@@ -817,7 +839,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_l4_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 flags)
+ * long bpf_l4_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 flags)
* Description
* Recompute the layer 4 (e.g. TCP, UDP or ICMP) checksum for the
* packet associated to *skb*. Computation is incremental, so the
@@ -849,7 +871,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_tail_call(void *ctx, struct bpf_map *prog_array_map, u32 index)
+ * long bpf_tail_call(void *ctx, struct bpf_map *prog_array_map, u32 index)
* Description
* This special helper is used to trigger a "tail call", or in
* other words, to jump into another eBPF program. The same stack
@@ -880,7 +902,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_clone_redirect(struct sk_buff *skb, u32 ifindex, u64 flags)
+ * long bpf_clone_redirect(struct sk_buff *skb, u32 ifindex, u64 flags)
* Description
* Clone and redirect the packet associated to *skb* to another
* net device of index *ifindex*. Both ingress and egress
@@ -916,7 +938,7 @@ union bpf_attr {
* A 64-bit integer containing the current GID and UID, and
* created as such: *current_gid* **<< 32 \|** *current_uid*.
*
- * int bpf_get_current_comm(void *buf, u32 size_of_buf)
+ * long bpf_get_current_comm(void *buf, u32 size_of_buf)
* Description
* Copy the **comm** attribute of the current task into *buf* of
* *size_of_buf*. The **comm** attribute contains the name of
@@ -953,7 +975,7 @@ union bpf_attr {
* Return
* The classid, or 0 for the default unconfigured classid.
*
- * int bpf_skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
+ * long bpf_skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
* Description
* Push a *vlan_tci* (VLAN tag control information) of protocol
* *vlan_proto* to the packet associated to *skb*, then update
@@ -969,7 +991,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_skb_vlan_pop(struct sk_buff *skb)
+ * long bpf_skb_vlan_pop(struct sk_buff *skb)
* Description
* Pop a VLAN header from the packet associated to *skb*.
*
@@ -981,7 +1003,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_skb_get_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags)
+ * long bpf_skb_get_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags)
* Description
* Get tunnel metadata. This helper takes a pointer *key* to an
* empty **struct bpf_tunnel_key** of **size**, that will be
@@ -1032,7 +1054,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_skb_set_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags)
+ * long bpf_skb_set_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags)
* Description
* Populate tunnel metadata for packet associated to *skb.* The
* tunnel metadata is set to the contents of *key*, of *size*. The
@@ -1098,7 +1120,7 @@ union bpf_attr {
* The value of the perf event counter read from the map, or a
* negative error code in case of failure.
*
- * int bpf_redirect(u32 ifindex, u64 flags)
+ * long bpf_redirect(u32 ifindex, u64 flags)
* Description
* Redirect the packet to another net device of index *ifindex*.
* This helper is somewhat similar to **bpf_clone_redirect**\
@@ -1145,7 +1167,7 @@ union bpf_attr {
* The realm of the route for the packet associated to *skb*, or 0
* if none was found.
*
- * int bpf_perf_event_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
+ * long bpf_perf_event_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
* Description
* Write raw *data* blob into a special BPF perf event held by
* *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
@@ -1190,7 +1212,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_skb_load_bytes(const void *skb, u32 offset, void *to, u32 len)
+ * long bpf_skb_load_bytes(const void *skb, u32 offset, void *to, u32 len)
* Description
* This helper was provided as an easy way to load data from a
* packet. It can be used to load *len* bytes from *offset* from
@@ -1207,7 +1229,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_get_stackid(void *ctx, struct bpf_map *map, u64 flags)
+ * long bpf_get_stackid(void *ctx, struct bpf_map *map, u64 flags)
* Description
* Walk a user or a kernel stack and return its id. To achieve
* this, the helper needs *ctx*, which is a pointer to the context
@@ -1276,7 +1298,7 @@ union bpf_attr {
* The checksum result, or a negative error code in case of
* failure.
*
- * int bpf_skb_get_tunnel_opt(struct sk_buff *skb, void *opt, u32 size)
+ * long bpf_skb_get_tunnel_opt(struct sk_buff *skb, void *opt, u32 size)
* Description
* Retrieve tunnel options metadata for the packet associated to
* *skb*, and store the raw tunnel option data to the buffer *opt*
@@ -1294,7 +1316,7 @@ union bpf_attr {
* Return
* The size of the option data retrieved.
*
- * int bpf_skb_set_tunnel_opt(struct sk_buff *skb, void *opt, u32 size)
+ * long bpf_skb_set_tunnel_opt(struct sk_buff *skb, void *opt, u32 size)
* Description
* Set tunnel options metadata for the packet associated to *skb*
* to the option data contained in the raw buffer *opt* of *size*.
@@ -1304,7 +1326,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_skb_change_proto(struct sk_buff *skb, __be16 proto, u64 flags)
+ * long bpf_skb_change_proto(struct sk_buff *skb, __be16 proto, u64 flags)
* Description
* Change the protocol of the *skb* to *proto*. Currently
* supported are transition from IPv4 to IPv6, and from IPv6 to
@@ -1331,7 +1353,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_skb_change_type(struct sk_buff *skb, u32 type)
+ * long bpf_skb_change_type(struct sk_buff *skb, u32 type)
* Description
* Change the packet type for the packet associated to *skb*. This
* comes down to setting *skb*\ **->pkt_type** to *type*, except
@@ -1358,7 +1380,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_skb_under_cgroup(struct sk_buff *skb, struct bpf_map *map, u32 index)
+ * long bpf_skb_under_cgroup(struct sk_buff *skb, struct bpf_map *map, u32 index)
* Description
* Check whether *skb* is a descendant of the cgroup2 held by
* *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*.
@@ -1389,7 +1411,7 @@ union bpf_attr {
* Return
* A pointer to the current task struct.
*
- * int bpf_probe_write_user(void *dst, const void *src, u32 len)
+ * long bpf_probe_write_user(void *dst, const void *src, u32 len)
* Description
* Attempt in a safe way to write *len* bytes from the buffer
* *src* to *dst* in memory. It only works for threads that are in
@@ -1408,7 +1430,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_current_task_under_cgroup(struct bpf_map *map, u32 index)
+ * long bpf_current_task_under_cgroup(struct bpf_map *map, u32 index)
* Description
* Check whether the probe is being run is the context of a given
* subset of the cgroup2 hierarchy. The cgroup2 to test is held by
@@ -1420,7 +1442,7 @@ union bpf_attr {
* * 1, if the *skb* task does not belong to the cgroup2.
* * A negative error code, if an error occurred.
*
- * int bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags)
+ * long bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags)
* Description
* Resize (trim or grow) the packet associated to *skb* to the
* new *len*. The *flags* are reserved for future usage, and must
@@ -1444,7 +1466,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_skb_pull_data(struct sk_buff *skb, u32 len)
+ * long bpf_skb_pull_data(struct sk_buff *skb, u32 len)
* Description
* Pull in non-linear data in case the *skb* is non-linear and not
* all of *len* are part of the linear section. Make *len* bytes
@@ -1500,7 +1522,7 @@ union bpf_attr {
* recalculation the next time the kernel tries to access this
* hash or when the **bpf_get_hash_recalc**\ () helper is called.
*
- * int bpf_get_numa_node_id(void)
+ * long bpf_get_numa_node_id(void)
* Description
* Return the id of the current NUMA node. The primary use case
* for this helper is the selection of sockets for the local NUMA
@@ -1511,7 +1533,7 @@ union bpf_attr {
* Return
* The id of current NUMA node.
*
- * int bpf_skb_change_head(struct sk_buff *skb, u32 len, u64 flags)
+ * long bpf_skb_change_head(struct sk_buff *skb, u32 len, u64 flags)
* Description
* Grows headroom of packet associated to *skb* and adjusts the
* offset of the MAC header accordingly, adding *len* bytes of
@@ -1532,7 +1554,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_xdp_adjust_head(struct xdp_buff *xdp_md, int delta)
+ * long bpf_xdp_adjust_head(struct xdp_buff *xdp_md, int delta)
* Description
* Adjust (move) *xdp_md*\ **->data** by *delta* bytes. Note that
* it is possible to use a negative value for *delta*. This helper
@@ -1547,7 +1569,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_probe_read_str(void *dst, u32 size, const void *unsafe_ptr)
+ * long bpf_probe_read_str(void *dst, u32 size, const void *unsafe_ptr)
* Description
* Copy a NUL terminated string from an unsafe kernel address
* *unsafe_ptr* to *dst*. See **bpf_probe_read_kernel_str**\ () for
@@ -1595,14 +1617,14 @@ union bpf_attr {
* is returned (note that **overflowuid** might also be the actual
* UID value for the socket).
*
- * u32 bpf_set_hash(struct sk_buff *skb, u32 hash)
+ * long bpf_set_hash(struct sk_buff *skb, u32 hash)
* Description
* Set the full hash for *skb* (set the field *skb*\ **->hash**)
* to value *hash*.
* Return
* 0
*
- * int bpf_setsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen)
+ * long bpf_setsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen)
* Description
* Emulate a call to **setsockopt()** on the socket associated to
* *bpf_socket*, which must be a full socket. The *level* at
@@ -1621,16 +1643,19 @@ union bpf_attr {
*
* * **SOL_SOCKET**, which supports the following *optname*\ s:
* **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**,
- * **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**.
+ * **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**,
+ * **SO_BINDTODEVICE**, **SO_KEEPALIVE**.
* * **IPPROTO_TCP**, which supports the following *optname*\ s:
* **TCP_CONGESTION**, **TCP_BPF_IW**,
- * **TCP_BPF_SNDCWND_CLAMP**.
+ * **TCP_BPF_SNDCWND_CLAMP**, **TCP_SAVE_SYN**,
+ * **TCP_KEEPIDLE**, **TCP_KEEPINTVL**, **TCP_KEEPCNT**,
+ * **TCP_SYNCNT**, **TCP_USER_TIMEOUT**.
* * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
* * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_skb_adjust_room(struct sk_buff *skb, s32 len_diff, u32 mode, u64 flags)
+ * long bpf_skb_adjust_room(struct sk_buff *skb, s32 len_diff, u32 mode, u64 flags)
* Description
* Grow or shrink the room for data in the packet associated to
* *skb* by *len_diff*, and according to the selected *mode*.
@@ -1676,7 +1701,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags)
+ * long bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags)
* Description
* Redirect the packet to the endpoint referenced by *map* at
* index *key*. Depending on its type, this *map* can contain
@@ -1697,7 +1722,7 @@ union bpf_attr {
* **XDP_REDIRECT** on success, or the value of the two lower bits
* of the *flags* argument on error.
*
- * int bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags)
+ * long bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags)
* Description
* Redirect the packet to the socket referenced by *map* (of type
* **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and
@@ -1708,7 +1733,7 @@ union bpf_attr {
* Return
* **SK_PASS** on success, or **SK_DROP** on error.
*
- * int bpf_sock_map_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags)
+ * long bpf_sock_map_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags)
* Description
* Add an entry to, or update a *map* referencing sockets. The
* *skops* is used as a new value for the entry associated to
@@ -1727,7 +1752,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_xdp_adjust_meta(struct xdp_buff *xdp_md, int delta)
+ * long bpf_xdp_adjust_meta(struct xdp_buff *xdp_md, int delta)
* Description
* Adjust the address pointed by *xdp_md*\ **->data_meta** by
* *delta* (which can be positive or negative). Note that this
@@ -1756,7 +1781,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_perf_event_read_value(struct bpf_map *map, u64 flags, struct bpf_perf_event_value *buf, u32 buf_size)
+ * long bpf_perf_event_read_value(struct bpf_map *map, u64 flags, struct bpf_perf_event_value *buf, u32 buf_size)
* Description
* Read the value of a perf event counter, and store it into *buf*
* of size *buf_size*. This helper relies on a *map* of type
@@ -1806,7 +1831,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size)
+ * long bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size)
* Description
* For en eBPF program attached to a perf event, retrieve the
* value of the event counter associated to *ctx* and store it in
@@ -1817,7 +1842,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_getsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen)
+ * long bpf_getsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen)
* Description
* Emulate a call to **getsockopt()** on the socket associated to
* *bpf_socket*, which must be a full socket. The *level* at
@@ -1842,7 +1867,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_override_return(struct pt_regs *regs, u64 rc)
+ * long bpf_override_return(struct pt_regs *regs, u64 rc)
* Description
* Used for error injection, this helper uses kprobes to override
* the return value of the probed function, and to set it to *rc*.
@@ -1867,7 +1892,7 @@ union bpf_attr {
* Return
* 0
*
- * int bpf_sock_ops_cb_flags_set(struct bpf_sock_ops *bpf_sock, int argval)
+ * long bpf_sock_ops_cb_flags_set(struct bpf_sock_ops *bpf_sock, int argval)
* Description
* Attempt to set the value of the **bpf_sock_ops_cb_flags** field
* for the full TCP socket associated to *bpf_sock_ops* to
@@ -1911,7 +1936,7 @@ union bpf_attr {
* be set is returned (which comes down to 0 if all bits were set
* as required).
*
- * int bpf_msg_redirect_map(struct sk_msg_buff *msg, struct bpf_map *map, u32 key, u64 flags)
+ * long bpf_msg_redirect_map(struct sk_msg_buff *msg, struct bpf_map *map, u32 key, u64 flags)
* Description
* This helper is used in programs implementing policies at the
* socket level. If the message *msg* is allowed to pass (i.e. if
@@ -1925,7 +1950,7 @@ union bpf_attr {
* Return
* **SK_PASS** on success, or **SK_DROP** on error.
*
- * int bpf_msg_apply_bytes(struct sk_msg_buff *msg, u32 bytes)
+ * long bpf_msg_apply_bytes(struct sk_msg_buff *msg, u32 bytes)
* Description
* For socket policies, apply the verdict of the eBPF program to
* the next *bytes* (number of bytes) of message *msg*.
@@ -1959,7 +1984,7 @@ union bpf_attr {
* Return
* 0
*
- * int bpf_msg_cork_bytes(struct sk_msg_buff *msg, u32 bytes)
+ * long bpf_msg_cork_bytes(struct sk_msg_buff *msg, u32 bytes)
* Description
* For socket policies, prevent the execution of the verdict eBPF
* program for message *msg* until *bytes* (byte number) have been
@@ -1977,7 +2002,7 @@ union bpf_attr {
* Return
* 0
*
- * int bpf_msg_pull_data(struct sk_msg_buff *msg, u32 start, u32 end, u64 flags)
+ * long bpf_msg_pull_data(struct sk_msg_buff *msg, u32 start, u32 end, u64 flags)
* Description
* For socket policies, pull in non-linear data from user space
* for *msg* and set pointers *msg*\ **->data** and *msg*\
@@ -2008,7 +2033,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_bind(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len)
+ * long bpf_bind(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len)
* Description
* Bind the socket associated to *ctx* to the address pointed by
* *addr*, of length *addr_len*. This allows for making outgoing
@@ -2026,7 +2051,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta)
+ * long bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta)
* Description
* Adjust (move) *xdp_md*\ **->data_end** by *delta* bytes. It is
* possible to both shrink and grow the packet tail.
@@ -2040,7 +2065,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_skb_get_xfrm_state(struct sk_buff *skb, u32 index, struct bpf_xfrm_state *xfrm_state, u32 size, u64 flags)
+ * long bpf_skb_get_xfrm_state(struct sk_buff *skb, u32 index, struct bpf_xfrm_state *xfrm_state, u32 size, u64 flags)
* Description
* Retrieve the XFRM state (IP transform framework, see also
* **ip-xfrm(8)**) at *index* in XFRM "security path" for *skb*.
@@ -2056,7 +2081,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_get_stack(void *ctx, void *buf, u32 size, u64 flags)
+ * long bpf_get_stack(void *ctx, void *buf, u32 size, u64 flags)
* Description
* Return a user or a kernel stack in bpf program provided buffer.
* To achieve this, the helper needs *ctx*, which is a pointer
@@ -2089,7 +2114,7 @@ union bpf_attr {
* A non-negative value equal to or less than *size* on success,
* or a negative error in case of failure.
*
- * int bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header)
+ * long bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header)
* Description
* This helper is similar to **bpf_skb_load_bytes**\ () in that
* it provides an easy way to load *len* bytes from *offset*
@@ -2111,7 +2136,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_fib_lookup(void *ctx, struct bpf_fib_lookup *params, int plen, u32 flags)
+ * long bpf_fib_lookup(void *ctx, struct bpf_fib_lookup *params, int plen, u32 flags)
* Description
* Do FIB lookup in kernel tables using parameters in *params*.
* If lookup is successful and result shows packet is to be
@@ -2142,7 +2167,7 @@ union bpf_attr {
* * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the
* packet is not forwarded or needs assist from full stack
*
- * int bpf_sock_hash_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags)
+ * long bpf_sock_hash_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags)
* Description
* Add an entry to, or update a sockhash *map* referencing sockets.
* The *skops* is used as a new value for the entry associated to
@@ -2161,7 +2186,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_msg_redirect_hash(struct sk_msg_buff *msg, struct bpf_map *map, void *key, u64 flags)
+ * long bpf_msg_redirect_hash(struct sk_msg_buff *msg, struct bpf_map *map, void *key, u64 flags)
* Description
* This helper is used in programs implementing policies at the
* socket level. If the message *msg* is allowed to pass (i.e. if
@@ -2175,7 +2200,7 @@ union bpf_attr {
* Return
* **SK_PASS** on success, or **SK_DROP** on error.
*
- * int bpf_sk_redirect_hash(struct sk_buff *skb, struct bpf_map *map, void *key, u64 flags)
+ * long bpf_sk_redirect_hash(struct sk_buff *skb, struct bpf_map *map, void *key, u64 flags)
* Description
* This helper is used in programs implementing policies at the
* skb socket level. If the sk_buff *skb* is allowed to pass (i.e.
@@ -2189,7 +2214,7 @@ union bpf_attr {
* Return
* **SK_PASS** on success, or **SK_DROP** on error.
*
- * int bpf_lwt_push_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len)
+ * long bpf_lwt_push_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len)
* Description
* Encapsulate the packet associated to *skb* within a Layer 3
* protocol header. This header is provided in the buffer at
@@ -2226,7 +2251,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_lwt_seg6_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len)
+ * long bpf_lwt_seg6_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len)
* Description
* Store *len* bytes from address *from* into the packet
* associated to *skb*, at *offset*. Only the flags, tag and TLVs
@@ -2241,7 +2266,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_lwt_seg6_adjust_srh(struct sk_buff *skb, u32 offset, s32 delta)
+ * long bpf_lwt_seg6_adjust_srh(struct sk_buff *skb, u32 offset, s32 delta)
* Description
* Adjust the size allocated to TLVs in the outermost IPv6
* Segment Routing Header contained in the packet associated to
@@ -2257,7 +2282,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_lwt_seg6_action(struct sk_buff *skb, u32 action, void *param, u32 param_len)
+ * long bpf_lwt_seg6_action(struct sk_buff *skb, u32 action, void *param, u32 param_len)
* Description
* Apply an IPv6 Segment Routing action of type *action* to the
* packet associated to *skb*. Each action takes a parameter
@@ -2286,7 +2311,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_rc_repeat(void *ctx)
+ * long bpf_rc_repeat(void *ctx)
* Description
* This helper is used in programs implementing IR decoding, to
* report a successfully decoded repeat key message. This delays
@@ -2305,7 +2330,7 @@ union bpf_attr {
* Return
* 0
*
- * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle)
+ * long bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle)
* Description
* This helper is used in programs implementing IR decoding, to
* report a successfully decoded key press with *scancode*,
@@ -2370,7 +2395,7 @@ union bpf_attr {
* Return
* A pointer to the local storage area.
*
- * int bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags)
+ * long bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags)
* Description
* Select a **SO_REUSEPORT** socket from a
* **BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*.
@@ -2415,7 +2440,7 @@ union bpf_attr {
* Look for an IPv6 socket.
*
* If the *netns* is a negative signed 32-bit integer, then the
- * socket lookup table in the netns associated with the *ctx* will
+ * socket lookup table in the netns associated with the *ctx*
* will be used. For the TC hooks, this is the netns of the device
* in the skb. For socket hooks, this is the netns of the socket.
* If *netns* is any other signed 32-bit value greater than or
@@ -2452,7 +2477,7 @@ union bpf_attr {
* Look for an IPv6 socket.
*
* If the *netns* is a negative signed 32-bit integer, then the
- * socket lookup table in the netns associated with the *ctx* will
+ * socket lookup table in the netns associated with the *ctx*
* will be used. For the TC hooks, this is the netns of the device
* in the skb. For socket hooks, this is the netns of the socket.
* If *netns* is any other signed 32-bit value greater than or
@@ -2471,7 +2496,7 @@ union bpf_attr {
* result is from *reuse*\ **->socks**\ [] using the hash of the
* tuple.
*
- * int bpf_sk_release(struct bpf_sock *sock)
+ * long bpf_sk_release(struct bpf_sock *sock)
* Description
* Release the reference held by *sock*. *sock* must be a
* non-**NULL** pointer that was returned from
@@ -2479,7 +2504,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
+ * long bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
* Description
* Push an element *value* in *map*. *flags* is one of:
*
@@ -2489,19 +2514,19 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_map_pop_elem(struct bpf_map *map, void *value)
+ * long bpf_map_pop_elem(struct bpf_map *map, void *value)
* Description
* Pop an element from *map*.
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_map_peek_elem(struct bpf_map *map, void *value)
+ * long bpf_map_peek_elem(struct bpf_map *map, void *value)
* Description
* Get an element from *map* without removing it.
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_msg_push_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags)
+ * long bpf_msg_push_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags)
* Description
* For socket policies, insert *len* bytes into *msg* at offset
* *start*.
@@ -2517,7 +2542,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags)
+ * long bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags)
* Description
* Will remove *len* bytes from a *msg* starting at byte *start*.
* This may result in **ENOMEM** errors under certain situations if
@@ -2529,7 +2554,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_rc_pointer_rel(void *ctx, s32 rel_x, s32 rel_y)
+ * long bpf_rc_pointer_rel(void *ctx, s32 rel_x, s32 rel_y)
* Description
* This helper is used in programs implementing IR decoding, to
* report a successfully decoded pointer movement.
@@ -2543,7 +2568,7 @@ union bpf_attr {
* Return
* 0
*
- * int bpf_spin_lock(struct bpf_spin_lock *lock)
+ * long bpf_spin_lock(struct bpf_spin_lock *lock)
* Description
* Acquire a spinlock represented by the pointer *lock*, which is
* stored as part of a value of a map. Taking the lock allows to
@@ -2591,7 +2616,7 @@ union bpf_attr {
* Return
* 0
*
- * int bpf_spin_unlock(struct bpf_spin_lock *lock)
+ * long bpf_spin_unlock(struct bpf_spin_lock *lock)
* Description
* Release the *lock* previously locked by a call to
* **bpf_spin_lock**\ (\ *lock*\ ).
@@ -2614,7 +2639,7 @@ union bpf_attr {
* A **struct bpf_tcp_sock** pointer on success, or **NULL** in
* case of failure.
*
- * int bpf_skb_ecn_set_ce(struct sk_buff *skb)
+ * long bpf_skb_ecn_set_ce(struct sk_buff *skb)
* Description
* Set ECN (Explicit Congestion Notification) field of IP header
* to **CE** (Congestion Encountered) if current value is **ECT**
@@ -2651,7 +2676,7 @@ union bpf_attr {
* result is from *reuse*\ **->socks**\ [] using the hash of the
* tuple.
*
- * int bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
+ * long bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
* Description
* Check whether *iph* and *th* contain a valid SYN cookie ACK for
* the listening socket in *sk*.
@@ -2666,7 +2691,7 @@ union bpf_attr {
* 0 if *iph* and *th* are a valid SYN cookie ACK, or a negative
* error otherwise.
*
- * int bpf_sysctl_get_name(struct bpf_sysctl *ctx, char *buf, size_t buf_len, u64 flags)
+ * long bpf_sysctl_get_name(struct bpf_sysctl *ctx, char *buf, size_t buf_len, u64 flags)
* Description
* Get name of sysctl in /proc/sys/ and copy it into provided by
* program buffer *buf* of size *buf_len*.
@@ -2682,7 +2707,7 @@ union bpf_attr {
* **-E2BIG** if the buffer wasn't big enough (*buf* will contain
* truncated name in this case).
*
- * int bpf_sysctl_get_current_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len)
+ * long bpf_sysctl_get_current_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len)
* Description
* Get current value of sysctl as it is presented in /proc/sys
* (incl. newline, etc), and copy it as a string into provided
@@ -2701,7 +2726,7 @@ union bpf_attr {
* **-EINVAL** if current value was unavailable, e.g. because
* sysctl is uninitialized and read returns -EIO for it.
*
- * int bpf_sysctl_get_new_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len)
+ * long bpf_sysctl_get_new_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len)
* Description
* Get new value being written by user space to sysctl (before
* the actual write happens) and copy it as a string into
@@ -2718,7 +2743,7 @@ union bpf_attr {
*
* **-EINVAL** if sysctl is being read.
*
- * int bpf_sysctl_set_new_value(struct bpf_sysctl *ctx, const char *buf, size_t buf_len)
+ * long bpf_sysctl_set_new_value(struct bpf_sysctl *ctx, const char *buf, size_t buf_len)
* Description
* Override new value being written by user space to sysctl with
* value provided by program in buffer *buf* of size *buf_len*.
@@ -2735,7 +2760,7 @@ union bpf_attr {
*
* **-EINVAL** if sysctl is being read.
*
- * int bpf_strtol(const char *buf, size_t buf_len, u64 flags, long *res)
+ * long bpf_strtol(const char *buf, size_t buf_len, u64 flags, long *res)
* Description
* Convert the initial part of the string from buffer *buf* of
* size *buf_len* to a long integer according to the given base
@@ -2759,7 +2784,7 @@ union bpf_attr {
*
* **-ERANGE** if resulting value was out of range.
*
- * int bpf_strtoul(const char *buf, size_t buf_len, u64 flags, unsigned long *res)
+ * long bpf_strtoul(const char *buf, size_t buf_len, u64 flags, unsigned long *res)
* Description
* Convert the initial part of the string from buffer *buf* of
* size *buf_len* to an unsigned long integer according to the
@@ -2810,7 +2835,7 @@ union bpf_attr {
* **NULL** if not found or there was an error in adding
* a new bpf-local-storage.
*
- * int bpf_sk_storage_delete(struct bpf_map *map, struct bpf_sock *sk)
+ * long bpf_sk_storage_delete(struct bpf_map *map, struct bpf_sock *sk)
* Description
* Delete a bpf-local-storage from a *sk*.
* Return
@@ -2818,7 +2843,7 @@ union bpf_attr {
*
* **-ENOENT** if the bpf-local-storage cannot be found.
*
- * int bpf_send_signal(u32 sig)
+ * long bpf_send_signal(u32 sig)
* Description
* Send signal *sig* to the process of the current task.
* The signal may be delivered to any of this process's threads.
@@ -2859,7 +2884,7 @@ union bpf_attr {
*
* **-EPROTONOSUPPORT** IP packet version is not 4 or 6
*
- * int bpf_skb_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
+ * long bpf_skb_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
* Description
* Write raw *data* blob into a special BPF perf event held by
* *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
@@ -2883,21 +2908,21 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_probe_read_user(void *dst, u32 size, const void *unsafe_ptr)
+ * long bpf_probe_read_user(void *dst, u32 size, const void *unsafe_ptr)
* Description
* Safely attempt to read *size* bytes from user space address
* *unsafe_ptr* and store the data in *dst*.
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
+ * long bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
* Description
* Safely attempt to read *size* bytes from kernel space address
* *unsafe_ptr* and store the data in *dst*.
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_probe_read_user_str(void *dst, u32 size, const void *unsafe_ptr)
+ * long bpf_probe_read_user_str(void *dst, u32 size, const void *unsafe_ptr)
* Description
* Copy a NUL terminated string from an unsafe user address
* *unsafe_ptr* to *dst*. The *size* should include the
@@ -2941,7 +2966,7 @@ union bpf_attr {
* including the trailing NUL character. On error, a negative
* value.
*
- * int bpf_probe_read_kernel_str(void *dst, u32 size, const void *unsafe_ptr)
+ * long bpf_probe_read_kernel_str(void *dst, u32 size, const void *unsafe_ptr)
* Description
* Copy a NUL terminated string from an unsafe kernel address *unsafe_ptr*
* to *dst*. Same semantics as with **bpf_probe_read_user_str**\ () apply.
@@ -2949,14 +2974,14 @@ union bpf_attr {
* On success, the strictly positive length of the string, including
* the trailing NUL character. On error, a negative value.
*
- * int bpf_tcp_send_ack(void *tp, u32 rcv_nxt)
+ * long bpf_tcp_send_ack(void *tp, u32 rcv_nxt)
* Description
* Send out a tcp-ack. *tp* is the in-kernel struct **tcp_sock**.
* *rcv_nxt* is the ack_seq to be sent out.
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_send_signal_thread(u32 sig)
+ * long bpf_send_signal_thread(u32 sig)
* Description
* Send signal *sig* to the thread corresponding to the current task.
* Return
@@ -2976,7 +3001,7 @@ union bpf_attr {
* Return
* The 64 bit jiffies
*
- * int bpf_read_branch_records(struct bpf_perf_event_data *ctx, void *buf, u32 size, u64 flags)
+ * long bpf_read_branch_records(struct bpf_perf_event_data *ctx, void *buf, u32 size, u64 flags)
* Description
* For an eBPF program attached to a perf event, retrieve the
* branch records (**struct perf_branch_entry**) associated to *ctx*
@@ -2995,7 +3020,7 @@ union bpf_attr {
*
* **-ENOENT** if architecture does not support branch records.
*
- * int bpf_get_ns_current_pid_tgid(u64 dev, u64 ino, struct bpf_pidns_info *nsdata, u32 size)
+ * long bpf_get_ns_current_pid_tgid(u64 dev, u64 ino, struct bpf_pidns_info *nsdata, u32 size)
* Description
* Returns 0 on success, values for *pid* and *tgid* as seen from the current
* *namespace* will be returned in *nsdata*.
@@ -3007,7 +3032,7 @@ union bpf_attr {
*
* **-ENOENT** if pidns does not exists for the current task.
*
- * int bpf_xdp_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
+ * long bpf_xdp_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
* Description
* Write raw *data* blob into a special BPF perf event held by
* *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
@@ -3062,8 +3087,12 @@ union bpf_attr {
* Return
* The id is returned or 0 in case the id could not be retrieved.
*
- * int bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags)
+ * long bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags)
* Description
+ * Helper is overloaded depending on BPF program type. This
+ * description applies to **BPF_PROG_TYPE_SCHED_CLS** and
+ * **BPF_PROG_TYPE_SCHED_ACT** programs.
+ *
* Assign the *sk* to the *skb*. When combined with appropriate
* routing configuration to receive the packet towards the socket,
* will cause *skb* to be delivered to the specified socket.
@@ -3089,6 +3118,56 @@ union bpf_attr {
* **-ESOCKTNOSUPPORT** if the socket type is not supported
* (reuseport).
*
+ * long bpf_sk_assign(struct bpf_sk_lookup *ctx, struct bpf_sock *sk, u64 flags)
+ * Description
+ * Helper is overloaded depending on BPF program type. This
+ * description applies to **BPF_PROG_TYPE_SK_LOOKUP** programs.
+ *
+ * Select the *sk* as a result of a socket lookup.
+ *
+ * For the operation to succeed passed socket must be compatible
+ * with the packet description provided by the *ctx* object.
+ *
+ * L4 protocol (**IPPROTO_TCP** or **IPPROTO_UDP**) must
+ * be an exact match. While IP family (**AF_INET** or
+ * **AF_INET6**) must be compatible, that is IPv6 sockets
+ * that are not v6-only can be selected for IPv4 packets.
+ *
+ * Only TCP listeners and UDP unconnected sockets can be
+ * selected. *sk* can also be NULL to reset any previous
+ * selection.
+ *
+ * *flags* argument can combination of following values:
+ *
+ * * **BPF_SK_LOOKUP_F_REPLACE** to override the previous
+ * socket selection, potentially done by a BPF program
+ * that ran before us.
+ *
+ * * **BPF_SK_LOOKUP_F_NO_REUSEPORT** to skip
+ * load-balancing within reuseport group for the socket
+ * being selected.
+ *
+ * On success *ctx->sk* will point to the selected socket.
+ *
+ * Return
+ * 0 on success, or a negative errno in case of failure.
+ *
+ * * **-EAFNOSUPPORT** if socket family (*sk->family*) is
+ * not compatible with packet family (*ctx->family*).
+ *
+ * * **-EEXIST** if socket has been already selected,
+ * potentially by another program, and
+ * **BPF_SK_LOOKUP_F_REPLACE** flag was not specified.
+ *
+ * * **-EINVAL** if unsupported flags were specified.
+ *
+ * * **-EPROTOTYPE** if socket L4 protocol
+ * (*sk->protocol*) doesn't match packet protocol
+ * (*ctx->protocol*).
+ *
+ * * **-ESOCKTNOSUPPORT** if socket is not in allowed
+ * state (TCP listening or UDP unconnected).
+ *
* u64 bpf_ktime_get_boot_ns(void)
* Description
* Return the time elapsed since system boot, in nanoseconds.
@@ -3097,7 +3176,7 @@ union bpf_attr {
* Return
* Current *ktime*.
*
- * int bpf_seq_printf(struct seq_file *m, const char *fmt, u32 fmt_size, const void *data, u32 data_len)
+ * long bpf_seq_printf(struct seq_file *m, const char *fmt, u32 fmt_size, const void *data, u32 data_len)
* Description
* **bpf_seq_printf**\ () uses seq_file **seq_printf**\ () to print
* out the format string.
@@ -3126,7 +3205,7 @@ union bpf_attr {
*
* **-EOVERFLOW** if an overflow happened: The same object will be tried again.
*
- * int bpf_seq_write(struct seq_file *m, const void *data, u32 len)
+ * long bpf_seq_write(struct seq_file *m, const void *data, u32 len)
* Description
* **bpf_seq_write**\ () uses seq_file **seq_write**\ () to write the data.
* The *m* represents the seq_file. The *data* and *len* represent the
@@ -3168,7 +3247,7 @@ union bpf_attr {
* Return
* The id is returned or 0 in case the id could not be retrieved.
*
- * int bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags)
+ * long bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags)
* Description
* Copy *size* bytes from *data* into a ring buffer *ringbuf*.
* If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification
@@ -3222,7 +3301,7 @@ union bpf_attr {
* Return
* Requested value, or 0, if *flags* are not recognized.
*
- * int bpf_csum_level(struct sk_buff *skb, u64 level)
+ * long bpf_csum_level(struct sk_buff *skb, u64 level)
* Description
* Change the skbs checksum level by one layer up or down, or
* reset it entirely to none in order to have the stack perform
@@ -3253,6 +3332,69 @@ union bpf_attr {
* case of **BPF_CSUM_LEVEL_QUERY**, the current skb->csum_level
* is returned or the error code -EACCES in case the skb is not
* subject to CHECKSUM_UNNECESSARY.
+ *
+ * struct tcp6_sock *bpf_skc_to_tcp6_sock(void *sk)
+ * Description
+ * Dynamically cast a *sk* pointer to a *tcp6_sock* pointer.
+ * Return
+ * *sk* if casting is valid, or NULL otherwise.
+ *
+ * struct tcp_sock *bpf_skc_to_tcp_sock(void *sk)
+ * Description
+ * Dynamically cast a *sk* pointer to a *tcp_sock* pointer.
+ * Return
+ * *sk* if casting is valid, or NULL otherwise.
+ *
+ * struct tcp_timewait_sock *bpf_skc_to_tcp_timewait_sock(void *sk)
+ * Description
+ * Dynamically cast a *sk* pointer to a *tcp_timewait_sock* pointer.
+ * Return
+ * *sk* if casting is valid, or NULL otherwise.
+ *
+ * struct tcp_request_sock *bpf_skc_to_tcp_request_sock(void *sk)
+ * Description
+ * Dynamically cast a *sk* pointer to a *tcp_request_sock* pointer.
+ * Return
+ * *sk* if casting is valid, or NULL otherwise.
+ *
+ * struct udp6_sock *bpf_skc_to_udp6_sock(void *sk)
+ * Description
+ * Dynamically cast a *sk* pointer to a *udp6_sock* pointer.
+ * Return
+ * *sk* if casting is valid, or NULL otherwise.
+ *
+ * long bpf_get_task_stack(struct task_struct *task, void *buf, u32 size, u64 flags)
+ * Description
+ * Return a user or a kernel stack in bpf program provided buffer.
+ * To achieve this, the helper needs *task*, which is a valid
+ * pointer to struct task_struct. To store the stacktrace, the
+ * bpf program provides *buf* with a nonnegative *size*.
+ *
+ * The last argument, *flags*, holds the number of stack frames to
+ * skip (from 0 to 255), masked with
+ * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set
+ * the following flags:
+ *
+ * **BPF_F_USER_STACK**
+ * Collect a user space stack instead of a kernel stack.
+ * **BPF_F_USER_BUILD_ID**
+ * Collect buildid+offset instead of ips for user stack,
+ * only valid if **BPF_F_USER_STACK** is also specified.
+ *
+ * **bpf_get_task_stack**\ () can collect up to
+ * **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject
+ * to sufficient large buffer size. Note that
+ * this limit can be controlled with the **sysctl** program, and
+ * that it should be manually increased in order to profile long
+ * user stacks (such as stacks for Java programs). To do so, use:
+ *
+ * ::
+ *
+ * # sysctl kernel.perf_event_max_stack=<new value>
+ * Return
+ * A non-negative value equal to or less than *size* on success,
+ * or a negative error in case of failure.
+ *
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -3390,7 +3532,14 @@ union bpf_attr {
FN(ringbuf_submit), \
FN(ringbuf_discard), \
FN(ringbuf_query), \
- FN(csum_level),
+ FN(csum_level), \
+ FN(skc_to_tcp6_sock), \
+ FN(skc_to_tcp_sock), \
+ FN(skc_to_tcp_timewait_sock), \
+ FN(skc_to_tcp_request_sock), \
+ FN(skc_to_udp6_sock), \
+ FN(get_task_stack), \
+ /* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@@ -3532,6 +3681,12 @@ enum {
BPF_RINGBUF_HDR_SZ = 8,
};
+/* BPF_FUNC_sk_assign flags in bpf_sk_lookup context. */
+enum {
+ BPF_SK_LOOKUP_F_REPLACE = (1ULL << 0),
+ BPF_SK_LOOKUP_F_NO_REUSEPORT = (1ULL << 1),
+};
+
/* Mode for BPF_FUNC_skb_adjust_room helper. */
enum bpf_adj_room_mode {
BPF_ADJ_ROOM_NET,
@@ -3775,6 +3930,19 @@ struct bpf_devmap_val {
} bpf_prog;
};
+/* CPUMAP map-value layout
+ *
+ * The struct data-layout of map-value is a configuration interface.
+ * New members can only be added to the end of this structure.
+ */
+struct bpf_cpumap_val {
+ __u32 qsize; /* queue size to remote target CPU */
+ union {
+ int fd; /* prog fd on map write */
+ __u32 id; /* prog id on map read */
+ } bpf_prog;
+};
+
enum sk_action {
SK_DROP = 0,
SK_PASS,
@@ -3907,12 +4075,15 @@ struct bpf_link_info {
__u32 netns_ino;
__u32 attach_type;
} netns;
+ struct {
+ __u32 ifindex;
+ } xdp;
};
} __attribute__((aligned(8)));
/* User bpf_sock_addr struct to access socket fields and sockaddr struct passed
* by user and intended to be used by socket (e.g. to bind to, depends on
- * attach attach type).
+ * attach type).
*/
struct bpf_sock_addr {
__u32 user_family; /* Allows 4-byte read, but no write. */
@@ -4261,4 +4432,19 @@ struct bpf_pidns_info {
__u32 pid;
__u32 tgid;
};
+
+/* User accessible data for SK_LOOKUP programs. Add new fields at the end. */
+struct bpf_sk_lookup {
+ __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */
+
+ __u32 family; /* Protocol family (AF_INET, AF_INET6) */
+ __u32 protocol; /* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */
+ __u32 remote_ip4; /* Network byte order */
+ __u32 remote_ip6[4]; /* Network byte order */
+ __u32 remote_port; /* Network byte order */
+ __u32 local_ip4; /* Network byte order */
+ __u32 local_ip6[4]; /* Network byte order */
+ __u32 local_port; /* Host byte order */
+};
+
#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
index cafedbbfefbe..781e482dc499 100644
--- a/tools/include/uapi/linux/if_link.h
+++ b/tools/include/uapi/linux/if_link.h
@@ -344,6 +344,7 @@ enum {
IFLA_BRPORT_ISOLATED,
IFLA_BRPORT_BACKUP_PORT,
IFLA_BRPORT_MRP_RING_OPEN,
+ IFLA_BRPORT_MRP_IN_OPEN,
__IFLA_BRPORT_MAX
};
#define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
diff --git a/tools/include/uapi/linux/if_xdp.h b/tools/include/uapi/linux/if_xdp.h
index be328c59389d..a78a8096f4ce 100644
--- a/tools/include/uapi/linux/if_xdp.h
+++ b/tools/include/uapi/linux/if_xdp.h
@@ -73,9 +73,12 @@ struct xdp_umem_reg {
};
struct xdp_statistics {
- __u64 rx_dropped; /* Dropped for reasons other than invalid desc */
+ __u64 rx_dropped; /* Dropped for other reasons */
__u64 rx_invalid_descs; /* Dropped due to invalid descriptor */
__u64 tx_invalid_descs; /* Dropped due to invalid descriptor */
+ __u64 rx_ring_full; /* Dropped due to rx ring being full */
+ __u64 rx_fill_ring_empty_descs; /* Failed to retrieve item from fill ring */
+ __u64 tx_ring_empty_descs; /* Failed to retrieve item from tx ring */
};
struct xdp_options {
diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h
index 8533bf07450f..3d0d8231dc19 100644
--- a/tools/include/uapi/linux/in.h
+++ b/tools/include/uapi/linux/in.h
@@ -123,6 +123,7 @@ struct in_addr {
#define IP_CHECKSUM 23
#define IP_BIND_ADDRESS_NO_PORT 24
#define IP_RECVFRAGSIZE 25
+#define IP_RECVERR_RFC4884 26
/* IP_MTU_DISCOVER values */
#define IP_PMTUDISC_DONT 0 /* Never send DF frames */
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index 21a1edd08cbe..077e7ee69e3d 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -383,7 +383,8 @@ struct perf_event_attr {
bpf_event : 1, /* include bpf events */
aux_output : 1, /* generate AUX records instead of events */
cgroup : 1, /* include cgroup events */
- __reserved_1 : 31;
+ text_poke : 1, /* include text poke events */
+ __reserved_1 : 30;
union {
__u32 wakeup_events; /* wakeup every n events */
@@ -1041,12 +1042,35 @@ enum perf_event_type {
*/
PERF_RECORD_CGROUP = 19,
+ /*
+ * Records changes to kernel text i.e. self-modified code. 'old_len' is
+ * the number of old bytes, 'new_len' is the number of new bytes. Either
+ * 'old_len' or 'new_len' may be zero to indicate, for example, the
+ * addition or removal of a trampoline. 'bytes' contains the old bytes
+ * followed immediately by the new bytes.
+ *
+ * struct {
+ * struct perf_event_header header;
+ * u64 addr;
+ * u16 old_len;
+ * u16 new_len;
+ * u8 bytes[];
+ * struct sample_id sample_id;
+ * };
+ */
+ PERF_RECORD_TEXT_POKE = 20,
+
PERF_RECORD_MAX, /* non-ABI */
};
enum perf_record_ksymbol_type {
PERF_RECORD_KSYMBOL_TYPE_UNKNOWN = 0,
PERF_RECORD_KSYMBOL_TYPE_BPF = 1,
+ /*
+ * Out of line code such as kprobe-replaced instructions or optimized
+ * kprobes or ftrace trampolines.
+ */
+ PERF_RECORD_KSYMBOL_TYPE_OOL = 2,
PERF_RECORD_KSYMBOL_TYPE_MAX /* non-ABI */
};
diff --git a/tools/lib/api/fd/array.c b/tools/lib/api/fd/array.c
index 58d44d5eee31..5e6cb9debe37 100644
--- a/tools/lib/api/fd/array.c
+++ b/tools/lib/api/fd/array.c
@@ -8,6 +8,7 @@
#include <poll.h>
#include <stdlib.h>
#include <unistd.h>
+#include <string.h>
void fdarray__init(struct fdarray *fda, int nr_autogrow)
{
@@ -19,7 +20,7 @@ void fdarray__init(struct fdarray *fda, int nr_autogrow)
int fdarray__grow(struct fdarray *fda, int nr)
{
- void *priv;
+ struct priv *priv;
int nr_alloc = fda->nr_alloc + nr;
size_t psize = sizeof(fda->priv[0]) * nr_alloc;
size_t size = sizeof(struct pollfd) * nr_alloc;
@@ -34,6 +35,9 @@ int fdarray__grow(struct fdarray *fda, int nr)
return -ENOMEM;
}
+ memset(&entries[fda->nr_alloc], 0, sizeof(struct pollfd) * nr);
+ memset(&priv[fda->nr_alloc], 0, sizeof(fda->priv[0]) * nr);
+
fda->nr_alloc = nr_alloc;
fda->entries = entries;
fda->priv = priv;
@@ -69,7 +73,7 @@ void fdarray__delete(struct fdarray *fda)
free(fda);
}
-int fdarray__add(struct fdarray *fda, int fd, short revents)
+int fdarray__add(struct fdarray *fda, int fd, short revents, enum fdarray_flags flags)
{
int pos = fda->nr;
@@ -79,6 +83,7 @@ int fdarray__add(struct fdarray *fda, int fd, short revents)
fda->entries[fda->nr].fd = fd;
fda->entries[fda->nr].events = revents;
+ fda->priv[fda->nr].flags = flags;
fda->nr++;
return pos;
}
@@ -93,22 +98,22 @@ int fdarray__filter(struct fdarray *fda, short revents,
return 0;
for (fd = 0; fd < fda->nr; ++fd) {
+ if (!fda->entries[fd].events)
+ continue;
+
if (fda->entries[fd].revents & revents) {
if (entry_destructor)
entry_destructor(fda, fd, arg);
+ fda->entries[fd].revents = fda->entries[fd].events = 0;
continue;
}
- if (fd != nr) {
- fda->entries[nr] = fda->entries[fd];
- fda->priv[nr] = fda->priv[fd];
- }
-
- ++nr;
+ if (!(fda->priv[fd].flags & fdarray_flag__nonfilterable))
+ ++nr;
}
- return fda->nr = nr;
+ return nr;
}
int fdarray__poll(struct fdarray *fda, int timeout)
diff --git a/tools/lib/api/fd/array.h b/tools/lib/api/fd/array.h
index b39557d1a88f..7fcf21a33c0c 100644
--- a/tools/lib/api/fd/array.h
+++ b/tools/lib/api/fd/array.h
@@ -21,19 +21,27 @@ struct fdarray {
int nr_alloc;
int nr_autogrow;
struct pollfd *entries;
- union {
- int idx;
- void *ptr;
+ struct priv {
+ union {
+ int idx;
+ void *ptr;
+ };
+ unsigned int flags;
} *priv;
};
+enum fdarray_flags {
+ fdarray_flag__default = 0x00000000,
+ fdarray_flag__nonfilterable = 0x00000001
+};
+
void fdarray__init(struct fdarray *fda, int nr_autogrow);
void fdarray__exit(struct fdarray *fda);
struct fdarray *fdarray__new(int nr_alloc, int nr_autogrow);
void fdarray__delete(struct fdarray *fda);
-int fdarray__add(struct fdarray *fda, int fd, short revents);
+int fdarray__add(struct fdarray *fda, int fd, short revents, enum fdarray_flags flags);
int fdarray__poll(struct fdarray *fda, int timeout);
int fdarray__filter(struct fdarray *fda, short revents,
void (*entry_destructor)(struct fdarray *fda, int fd, void *arg),
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index a7329b671c41..0750681057c2 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -598,10 +598,24 @@ int bpf_link_create(int prog_fd, int target_fd,
attr.link_create.prog_fd = prog_fd;
attr.link_create.target_fd = target_fd;
attr.link_create.attach_type = attach_type;
+ attr.link_create.flags = OPTS_GET(opts, flags, 0);
+ attr.link_create.iter_info =
+ ptr_to_u64(OPTS_GET(opts, iter_info, (void *)0));
+ attr.link_create.iter_info_len = OPTS_GET(opts, iter_info_len, 0);
return sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr));
}
+int bpf_link_detach(int link_fd)
+{
+ union bpf_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.link_detach.link_fd = link_fd;
+
+ return sys_bpf(BPF_LINK_DETACH, &attr, sizeof(attr));
+}
+
int bpf_link_update(int link_fd, int new_prog_fd,
const struct bpf_link_update_opts *opts)
{
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index dbef24ebcfcb..015d13f25fcc 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -168,15 +168,21 @@ LIBBPF_API int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type);
LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd,
enum bpf_attach_type type);
+union bpf_iter_link_info; /* defined in up-to-date linux/bpf.h */
struct bpf_link_create_opts {
size_t sz; /* size of this struct for forward/backward compatibility */
+ __u32 flags;
+ union bpf_iter_link_info *iter_info;
+ __u32 iter_info_len;
};
-#define bpf_link_create_opts__last_field sz
+#define bpf_link_create_opts__last_field iter_info_len
LIBBPF_API int bpf_link_create(int prog_fd, int target_fd,
enum bpf_attach_type attach_type,
const struct bpf_link_create_opts *opts);
+LIBBPF_API int bpf_link_detach(int link_fd);
+
struct bpf_link_update_opts {
size_t sz; /* size of this struct for forward/backward compatibility */
__u32 flags; /* extra flags */
diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h
index 7009dc90e012..eae5cccff761 100644
--- a/tools/lib/bpf/bpf_core_read.h
+++ b/tools/lib/bpf/bpf_core_read.h
@@ -217,7 +217,7 @@ enum bpf_field_info_kind {
*/
#define BPF_CORE_READ_INTO(dst, src, a, ...) \
({ \
- ___core_read(bpf_core_read, dst, src, a, ##__VA_ARGS__) \
+ ___core_read(bpf_core_read, dst, (src), a, ##__VA_ARGS__) \
})
/*
@@ -227,7 +227,7 @@ enum bpf_field_info_kind {
*/
#define BPF_CORE_READ_STR_INTO(dst, src, a, ...) \
({ \
- ___core_read(bpf_core_read_str, dst, src, a, ##__VA_ARGS__) \
+ ___core_read(bpf_core_read_str, dst, (src), a, ##__VA_ARGS__)\
})
/*
@@ -254,8 +254,8 @@ enum bpf_field_info_kind {
*/
#define BPF_CORE_READ(src, a, ...) \
({ \
- ___type(src, a, ##__VA_ARGS__) __r; \
- BPF_CORE_READ_INTO(&__r, src, a, ##__VA_ARGS__); \
+ ___type((src), a, ##__VA_ARGS__) __r; \
+ BPF_CORE_READ_INTO(&__r, (src), a, ##__VA_ARGS__); \
__r; \
})
diff --git a/tools/lib/bpf/bpf_endian.h b/tools/lib/bpf/bpf_endian.h
index fbe28008450f..ec9db4feca9f 100644
--- a/tools/lib/bpf/bpf_endian.h
+++ b/tools/lib/bpf/bpf_endian.h
@@ -2,8 +2,35 @@
#ifndef __BPF_ENDIAN__
#define __BPF_ENDIAN__
-#include <linux/stddef.h>
-#include <linux/swab.h>
+/*
+ * Isolate byte #n and put it into byte #m, for __u##b type.
+ * E.g., moving byte #6 (nnnnnnnn) into byte #1 (mmmmmmmm) for __u64:
+ * 1) xxxxxxxx nnnnnnnn xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx mmmmmmmm xxxxxxxx
+ * 2) nnnnnnnn xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx mmmmmmmm xxxxxxxx 00000000
+ * 3) 00000000 00000000 00000000 00000000 00000000 00000000 00000000 nnnnnnnn
+ * 4) 00000000 00000000 00000000 00000000 00000000 00000000 nnnnnnnn 00000000
+ */
+#define ___bpf_mvb(x, b, n, m) ((__u##b)(x) << (b-(n+1)*8) >> (b-8) << (m*8))
+
+#define ___bpf_swab16(x) ((__u16)( \
+ ___bpf_mvb(x, 16, 0, 1) | \
+ ___bpf_mvb(x, 16, 1, 0)))
+
+#define ___bpf_swab32(x) ((__u32)( \
+ ___bpf_mvb(x, 32, 0, 3) | \
+ ___bpf_mvb(x, 32, 1, 2) | \
+ ___bpf_mvb(x, 32, 2, 1) | \
+ ___bpf_mvb(x, 32, 3, 0)))
+
+#define ___bpf_swab64(x) ((__u64)( \
+ ___bpf_mvb(x, 64, 0, 7) | \
+ ___bpf_mvb(x, 64, 1, 6) | \
+ ___bpf_mvb(x, 64, 2, 5) | \
+ ___bpf_mvb(x, 64, 3, 4) | \
+ ___bpf_mvb(x, 64, 4, 3) | \
+ ___bpf_mvb(x, 64, 5, 2) | \
+ ___bpf_mvb(x, 64, 6, 1) | \
+ ___bpf_mvb(x, 64, 7, 0)))
/* LLVM's BPF target selects the endianness of the CPU
* it compiles on, or the user specifies (bpfel/bpfeb),
@@ -23,16 +50,16 @@
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
# define __bpf_ntohs(x) __builtin_bswap16(x)
# define __bpf_htons(x) __builtin_bswap16(x)
-# define __bpf_constant_ntohs(x) ___constant_swab16(x)
-# define __bpf_constant_htons(x) ___constant_swab16(x)
+# define __bpf_constant_ntohs(x) ___bpf_swab16(x)
+# define __bpf_constant_htons(x) ___bpf_swab16(x)
# define __bpf_ntohl(x) __builtin_bswap32(x)
# define __bpf_htonl(x) __builtin_bswap32(x)
-# define __bpf_constant_ntohl(x) ___constant_swab32(x)
-# define __bpf_constant_htonl(x) ___constant_swab32(x)
+# define __bpf_constant_ntohl(x) ___bpf_swab32(x)
+# define __bpf_constant_htonl(x) ___bpf_swab32(x)
# define __bpf_be64_to_cpu(x) __builtin_bswap64(x)
# define __bpf_cpu_to_be64(x) __builtin_bswap64(x)
-# define __bpf_constant_be64_to_cpu(x) ___constant_swab64(x)
-# define __bpf_constant_cpu_to_be64(x) ___constant_swab64(x)
+# define __bpf_constant_be64_to_cpu(x) ___bpf_swab64(x)
+# define __bpf_constant_cpu_to_be64(x) ___bpf_swab64(x)
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
# define __bpf_ntohs(x) (x)
# define __bpf_htons(x) (x)
diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index f67dce2af802..bc14db706b88 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -40,7 +40,7 @@
* Helper macro to manipulate data structures
*/
#ifndef offsetof
-#define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER)
+#define offsetof(TYPE, MEMBER) __builtin_offsetof(TYPE, MEMBER)
#endif
#ifndef container_of
#define container_of(ptr, type, member) \
@@ -75,5 +75,6 @@ enum libbpf_tristate {
};
#define __kconfig __attribute__((section(".kconfig")))
+#define __ksym __attribute__((section(".ksyms")))
#endif
diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h
index 58eceb884df3..eebf020cbe3e 100644
--- a/tools/lib/bpf/bpf_tracing.h
+++ b/tools/lib/bpf/bpf_tracing.h
@@ -215,7 +215,7 @@ struct pt_regs;
#define PT_REGS_PARM5(x) ((x)->regs[8])
#define PT_REGS_RET(x) ((x)->regs[31])
#define PT_REGS_FP(x) ((x)->regs[30]) /* Works only with CONFIG_FRAME_POINTER */
-#define PT_REGS_RC(x) ((x)->regs[1])
+#define PT_REGS_RC(x) ((x)->regs[2])
#define PT_REGS_SP(x) ((x)->regs[29])
#define PT_REGS_IP(x) ((x)->cp0_epc)
@@ -226,7 +226,7 @@ struct pt_regs;
#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), regs[8])
#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), regs[31])
#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), regs[30])
-#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), regs[1])
+#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), regs[2])
#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), regs[29])
#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), cp0_epc)
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index bfef3d606b54..4843e44916f7 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -386,10 +386,10 @@ __s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name,
void btf__free(struct btf *btf)
{
- if (!btf)
+ if (IS_ERR_OR_NULL(btf))
return;
- if (btf->fd != -1)
+ if (btf->fd >= 0)
close(btf->fd);
free(btf->data);
@@ -397,7 +397,7 @@ void btf__free(struct btf *btf)
free(btf);
}
-struct btf *btf__new(__u8 *data, __u32 size)
+struct btf *btf__new(const void *data, __u32 size)
{
struct btf *btf;
int err;
@@ -562,6 +562,83 @@ done:
return btf;
}
+struct btf *btf__parse_raw(const char *path)
+{
+ struct btf *btf = NULL;
+ void *data = NULL;
+ FILE *f = NULL;
+ __u16 magic;
+ int err = 0;
+ long sz;
+
+ f = fopen(path, "rb");
+ if (!f) {
+ err = -errno;
+ goto err_out;
+ }
+
+ /* check BTF magic */
+ if (fread(&magic, 1, sizeof(magic), f) < sizeof(magic)) {
+ err = -EIO;
+ goto err_out;
+ }
+ if (magic != BTF_MAGIC) {
+ /* definitely not a raw BTF */
+ err = -EPROTO;
+ goto err_out;
+ }
+
+ /* get file size */
+ if (fseek(f, 0, SEEK_END)) {
+ err = -errno;
+ goto err_out;
+ }
+ sz = ftell(f);
+ if (sz < 0) {
+ err = -errno;
+ goto err_out;
+ }
+ /* rewind to the start */
+ if (fseek(f, 0, SEEK_SET)) {
+ err = -errno;
+ goto err_out;
+ }
+
+ /* pre-alloc memory and read all of BTF data */
+ data = malloc(sz);
+ if (!data) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+ if (fread(data, 1, sz, f) < sz) {
+ err = -EIO;
+ goto err_out;
+ }
+
+ /* finally parse BTF data */
+ btf = btf__new(data, sz);
+
+err_out:
+ free(data);
+ if (f)
+ fclose(f);
+ return err ? ERR_PTR(err) : btf;
+}
+
+struct btf *btf__parse(const char *path, struct btf_ext **btf_ext)
+{
+ struct btf *btf;
+
+ if (btf_ext)
+ *btf_ext = NULL;
+
+ btf = btf__parse_raw(path);
+ if (!IS_ERR(btf) || PTR_ERR(btf) != -EPROTO)
+ return btf;
+
+ return btf__parse_elf(path, btf_ext);
+}
+
static int compare_vsi_off(const void *_a, const void *_b)
{
const struct btf_var_secinfo *a = _a;
@@ -700,6 +777,11 @@ int btf__fd(const struct btf *btf)
return btf->fd;
}
+void btf__set_fd(struct btf *btf, int fd)
+{
+ btf->fd = fd;
+}
+
const void *btf__get_raw_data(const struct btf *btf, __u32 *size)
{
*size = btf->data_size;
@@ -1020,7 +1102,7 @@ static int btf_ext_parse_hdr(__u8 *data, __u32 data_size)
void btf_ext__free(struct btf_ext *btf_ext)
{
- if (!btf_ext)
+ if (IS_ERR_OR_NULL(btf_ext))
return;
free(btf_ext->data);
free(btf_ext);
@@ -2946,41 +3028,6 @@ static int btf_dedup_remap_types(struct btf_dedup *d)
return 0;
}
-static struct btf *btf_load_raw(const char *path)
-{
- struct btf *btf;
- size_t read_cnt;
- struct stat st;
- void *data;
- FILE *f;
-
- if (stat(path, &st))
- return ERR_PTR(-errno);
-
- data = malloc(st.st_size);
- if (!data)
- return ERR_PTR(-ENOMEM);
-
- f = fopen(path, "rb");
- if (!f) {
- btf = ERR_PTR(-errno);
- goto cleanup;
- }
-
- read_cnt = fread(data, 1, st.st_size, f);
- fclose(f);
- if (read_cnt < st.st_size) {
- btf = ERR_PTR(-EBADF);
- goto cleanup;
- }
-
- btf = btf__new(data, read_cnt);
-
-cleanup:
- free(data);
- return btf;
-}
-
/*
* Probe few well-known locations for vmlinux kernel image and try to load BTF
* data out of it to use for target BTF.
@@ -3016,7 +3063,7 @@ struct btf *libbpf_find_kernel_btf(void)
continue;
if (locations[i].raw_btf)
- btf = btf_load_raw(path);
+ btf = btf__parse_raw(path);
else
btf = btf__parse_elf(path, NULL);
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 70c1b7ec2bd0..f4a1a1d2b9a3 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -63,9 +63,10 @@ struct btf_ext_header {
};
LIBBPF_API void btf__free(struct btf *btf);
-LIBBPF_API struct btf *btf__new(__u8 *data, __u32 size);
-LIBBPF_API struct btf *btf__parse_elf(const char *path,
- struct btf_ext **btf_ext);
+LIBBPF_API struct btf *btf__new(const void *data, __u32 size);
+LIBBPF_API struct btf *btf__parse(const char *path, struct btf_ext **btf_ext);
+LIBBPF_API struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext);
+LIBBPF_API struct btf *btf__parse_raw(const char *path);
LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf);
LIBBPF_API int btf__load(struct btf *btf);
LIBBPF_API __s32 btf__find_by_name(const struct btf *btf,
@@ -79,6 +80,7 @@ LIBBPF_API __s64 btf__resolve_size(const struct btf *btf, __u32 type_id);
LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id);
LIBBPF_API int btf__align_of(const struct btf *btf, __u32 id);
LIBBPF_API int btf__fd(const struct btf *btf);
+LIBBPF_API void btf__set_fd(struct btf *btf, int fd);
LIBBPF_API const void *btf__get_raw_data(const struct btf *btf, __u32 *size);
LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset);
LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf);
@@ -143,8 +145,10 @@ struct btf_dump_emit_type_decl_opts {
* necessary indentation already
*/
int indent_level;
+ /* strip all the const/volatile/restrict mods */
+ bool strip_mods;
};
-#define btf_dump_emit_type_decl_opts__last_field indent_level
+#define btf_dump_emit_type_decl_opts__last_field strip_mods
LIBBPF_API int
btf_dump__emit_type_decl(struct btf_dump *d, __u32 id,
@@ -168,6 +172,11 @@ static inline bool btf_kflag(const struct btf_type *t)
return BTF_INFO_KFLAG(t->info);
}
+static inline bool btf_is_void(const struct btf_type *t)
+{
+ return btf_kind(t) == BTF_KIND_UNKN;
+}
+
static inline bool btf_is_int(const struct btf_type *t)
{
return btf_kind(t) == BTF_KIND_INT;
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index bbb430317260..cf711168d34a 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -60,6 +60,7 @@ struct btf_dump {
const struct btf_ext *btf_ext;
btf_dump_printf_fn_t printf_fn;
struct btf_dump_opts opts;
+ bool strip_mods;
/* per-type auxiliary state */
struct btf_dump_type_aux_state *type_states;
@@ -182,7 +183,7 @@ void btf_dump__free(struct btf_dump *d)
{
int i, cnt;
- if (!d)
+ if (IS_ERR_OR_NULL(d))
return;
free(d->type_states);
@@ -1032,7 +1033,9 @@ int btf_dump__emit_type_decl(struct btf_dump *d, __u32 id,
fname = OPTS_GET(opts, field_name, "");
lvl = OPTS_GET(opts, indent_level, 0);
+ d->strip_mods = OPTS_GET(opts, strip_mods, false);
btf_dump_emit_type_decl(d, id, fname, lvl);
+ d->strip_mods = false;
return 0;
}
@@ -1045,6 +1048,10 @@ static void btf_dump_emit_type_decl(struct btf_dump *d, __u32 id,
stack_start = d->decl_stack_cnt;
for (;;) {
+ t = btf__type_by_id(d->btf, id);
+ if (d->strip_mods && btf_is_mod(t))
+ goto skip_mod;
+
err = btf_dump_push_decl_stack_id(d, id);
if (err < 0) {
/*
@@ -1056,12 +1063,11 @@ static void btf_dump_emit_type_decl(struct btf_dump *d, __u32 id,
d->decl_stack_cnt = stack_start;
return;
}
-
+skip_mod:
/* VOID */
if (id == 0)
break;
- t = btf__type_by_id(d->btf, id);
switch (btf_kind(t)) {
case BTF_KIND_PTR:
case BTF_KIND_VOLATILE:
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 11e4725b8b1c..0a06124f7999 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -230,6 +230,7 @@ struct bpf_program {
struct bpf_insn *insns;
size_t insns_cnt, main_prog_cnt;
enum bpf_prog_type type;
+ bool load;
struct reloc_desc *reloc_desc;
int nr_reloc;
@@ -285,6 +286,7 @@ struct bpf_struct_ops {
#define BSS_SEC ".bss"
#define RODATA_SEC ".rodata"
#define KCONFIG_SEC ".kconfig"
+#define KSYMS_SEC ".ksyms"
#define STRUCT_OPS_SEC ".struct_ops"
enum libbpf_map_type {
@@ -310,6 +312,7 @@ struct bpf_map {
int map_ifindex;
int inner_map_fd;
struct bpf_map_def def;
+ __u32 numa_node;
__u32 btf_var_idx;
__u32 btf_key_type_id;
__u32 btf_value_type_id;
@@ -329,24 +332,39 @@ struct bpf_map {
enum extern_type {
EXT_UNKNOWN,
- EXT_CHAR,
- EXT_BOOL,
- EXT_INT,
- EXT_TRISTATE,
- EXT_CHAR_ARR,
+ EXT_KCFG,
+ EXT_KSYM,
+};
+
+enum kcfg_type {
+ KCFG_UNKNOWN,
+ KCFG_CHAR,
+ KCFG_BOOL,
+ KCFG_INT,
+ KCFG_TRISTATE,
+ KCFG_CHAR_ARR,
};
struct extern_desc {
- const char *name;
+ enum extern_type type;
int sym_idx;
int btf_id;
- enum extern_type type;
- int sz;
- int align;
- int data_off;
- bool is_signed;
- bool is_weak;
+ int sec_btf_id;
+ const char *name;
bool is_set;
+ bool is_weak;
+ union {
+ struct {
+ enum kcfg_type type;
+ int sz;
+ int align;
+ int data_off;
+ bool is_signed;
+ } kcfg;
+ struct {
+ unsigned long long addr;
+ } ksym;
+ };
};
static LIST_HEAD(bpf_objects_list);
@@ -524,6 +542,7 @@ bpf_program__init(void *data, size_t size, char *section_name, int idx,
prog->instances.fds = NULL;
prog->instances.nr = -1;
prog->type = BPF_PROG_TYPE_UNSPEC;
+ prog->load = true;
return 0;
errout:
@@ -1423,19 +1442,19 @@ static struct extern_desc *find_extern_by_name(const struct bpf_object *obj,
return NULL;
}
-static int set_ext_value_tri(struct extern_desc *ext, void *ext_val,
- char value)
+static int set_kcfg_value_tri(struct extern_desc *ext, void *ext_val,
+ char value)
{
- switch (ext->type) {
- case EXT_BOOL:
+ switch (ext->kcfg.type) {
+ case KCFG_BOOL:
if (value == 'm') {
- pr_warn("extern %s=%c should be tristate or char\n",
+ pr_warn("extern (kcfg) %s=%c should be tristate or char\n",
ext->name, value);
return -EINVAL;
}
*(bool *)ext_val = value == 'y' ? true : false;
break;
- case EXT_TRISTATE:
+ case KCFG_TRISTATE:
if (value == 'y')
*(enum libbpf_tristate *)ext_val = TRI_YES;
else if (value == 'm')
@@ -1443,14 +1462,14 @@ static int set_ext_value_tri(struct extern_desc *ext, void *ext_val,
else /* value == 'n' */
*(enum libbpf_tristate *)ext_val = TRI_NO;
break;
- case EXT_CHAR:
+ case KCFG_CHAR:
*(char *)ext_val = value;
break;
- case EXT_UNKNOWN:
- case EXT_INT:
- case EXT_CHAR_ARR:
+ case KCFG_UNKNOWN:
+ case KCFG_INT:
+ case KCFG_CHAR_ARR:
default:
- pr_warn("extern %s=%c should be bool, tristate, or char\n",
+ pr_warn("extern (kcfg) %s=%c should be bool, tristate, or char\n",
ext->name, value);
return -EINVAL;
}
@@ -1458,29 +1477,29 @@ static int set_ext_value_tri(struct extern_desc *ext, void *ext_val,
return 0;
}
-static int set_ext_value_str(struct extern_desc *ext, char *ext_val,
- const char *value)
+static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val,
+ const char *value)
{
size_t len;
- if (ext->type != EXT_CHAR_ARR) {
- pr_warn("extern %s=%s should char array\n", ext->name, value);
+ if (ext->kcfg.type != KCFG_CHAR_ARR) {
+ pr_warn("extern (kcfg) %s=%s should be char array\n", ext->name, value);
return -EINVAL;
}
len = strlen(value);
if (value[len - 1] != '"') {
- pr_warn("extern '%s': invalid string config '%s'\n",
+ pr_warn("extern (kcfg) '%s': invalid string config '%s'\n",
ext->name, value);
return -EINVAL;
}
/* strip quotes */
len -= 2;
- if (len >= ext->sz) {
- pr_warn("extern '%s': long string config %s of (%zu bytes) truncated to %d bytes\n",
- ext->name, value, len, ext->sz - 1);
- len = ext->sz - 1;
+ if (len >= ext->kcfg.sz) {
+ pr_warn("extern (kcfg) '%s': long string config %s of (%zu bytes) truncated to %d bytes\n",
+ ext->name, value, len, ext->kcfg.sz - 1);
+ len = ext->kcfg.sz - 1;
}
memcpy(ext_val, value + 1, len);
ext_val[len] = '\0';
@@ -1507,11 +1526,11 @@ static int parse_u64(const char *value, __u64 *res)
return 0;
}
-static bool is_ext_value_in_range(const struct extern_desc *ext, __u64 v)
+static bool is_kcfg_value_in_range(const struct extern_desc *ext, __u64 v)
{
- int bit_sz = ext->sz * 8;
+ int bit_sz = ext->kcfg.sz * 8;
- if (ext->sz == 8)
+ if (ext->kcfg.sz == 8)
return true;
/* Validate that value stored in u64 fits in integer of `ext->sz`
@@ -1526,26 +1545,26 @@ static bool is_ext_value_in_range(const struct extern_desc *ext, __u64 v)
* For unsigned target integer, check that all the (64 - Y) bits are
* zero.
*/
- if (ext->is_signed)
+ if (ext->kcfg.is_signed)
return v + (1ULL << (bit_sz - 1)) < (1ULL << bit_sz);
else
return (v >> bit_sz) == 0;
}
-static int set_ext_value_num(struct extern_desc *ext, void *ext_val,
- __u64 value)
+static int set_kcfg_value_num(struct extern_desc *ext, void *ext_val,
+ __u64 value)
{
- if (ext->type != EXT_INT && ext->type != EXT_CHAR) {
- pr_warn("extern %s=%llu should be integer\n",
+ if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR) {
+ pr_warn("extern (kcfg) %s=%llu should be integer\n",
ext->name, (unsigned long long)value);
return -EINVAL;
}
- if (!is_ext_value_in_range(ext, value)) {
- pr_warn("extern %s=%llu value doesn't fit in %d bytes\n",
- ext->name, (unsigned long long)value, ext->sz);
+ if (!is_kcfg_value_in_range(ext, value)) {
+ pr_warn("extern (kcfg) %s=%llu value doesn't fit in %d bytes\n",
+ ext->name, (unsigned long long)value, ext->kcfg.sz);
return -ERANGE;
}
- switch (ext->sz) {
+ switch (ext->kcfg.sz) {
case 1: *(__u8 *)ext_val = value; break;
case 2: *(__u16 *)ext_val = value; break;
case 4: *(__u32 *)ext_val = value; break;
@@ -1591,30 +1610,30 @@ static int bpf_object__process_kconfig_line(struct bpf_object *obj,
if (!ext || ext->is_set)
return 0;
- ext_val = data + ext->data_off;
+ ext_val = data + ext->kcfg.data_off;
value = sep + 1;
switch (*value) {
case 'y': case 'n': case 'm':
- err = set_ext_value_tri(ext, ext_val, *value);
+ err = set_kcfg_value_tri(ext, ext_val, *value);
break;
case '"':
- err = set_ext_value_str(ext, ext_val, value);
+ err = set_kcfg_value_str(ext, ext_val, value);
break;
default:
/* assume integer */
err = parse_u64(value, &num);
if (err) {
- pr_warn("extern %s=%s should be integer\n",
+ pr_warn("extern (kcfg) %s=%s should be integer\n",
ext->name, value);
return err;
}
- err = set_ext_value_num(ext, ext_val, num);
+ err = set_kcfg_value_num(ext, ext_val, num);
break;
}
if (err)
return err;
- pr_debug("extern %s=%s\n", ext->name, value);
+ pr_debug("extern (kcfg) %s=%s\n", ext->name, value);
return 0;
}
@@ -1685,16 +1704,20 @@ static int bpf_object__read_kconfig_mem(struct bpf_object *obj,
static int bpf_object__init_kconfig_map(struct bpf_object *obj)
{
- struct extern_desc *last_ext;
+ struct extern_desc *last_ext = NULL, *ext;
size_t map_sz;
- int err;
+ int i, err;
- if (obj->nr_extern == 0)
- return 0;
+ for (i = 0; i < obj->nr_extern; i++) {
+ ext = &obj->externs[i];
+ if (ext->type == EXT_KCFG)
+ last_ext = ext;
+ }
- last_ext = &obj->externs[obj->nr_extern - 1];
- map_sz = last_ext->data_off + last_ext->sz;
+ if (!last_ext)
+ return 0;
+ map_sz = last_ext->kcfg.data_off + last_ext->kcfg.sz;
err = bpf_object__init_internal_map(obj, LIBBPF_MAP_KCONFIG,
obj->efile.symbols_shndx,
NULL, map_sz);
@@ -1957,6 +1980,10 @@ static int parse_btf_map_def(struct bpf_object *obj,
return -EINVAL;
pr_debug("map '%s': found map_flags = %u.\n",
map->name, map->def.map_flags);
+ } else if (strcmp(name, "numa_node") == 0) {
+ if (!get_map_field_int(map->name, obj->btf, m, &map->numa_node))
+ return -EINVAL;
+ pr_debug("map '%s': found numa_node = %u.\n", map->name, map->numa_node);
} else if (strcmp(name, "key_size") == 0) {
__u32 sz;
@@ -2311,18 +2338,23 @@ static bool section_have_execinstr(struct bpf_object *obj, int idx)
return false;
}
-static void bpf_object__sanitize_btf(struct bpf_object *obj)
+static bool btf_needs_sanitization(struct bpf_object *obj)
+{
+ bool has_func_global = obj->caps.btf_func_global;
+ bool has_datasec = obj->caps.btf_datasec;
+ bool has_func = obj->caps.btf_func;
+
+ return !has_func || !has_datasec || !has_func_global;
+}
+
+static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
{
bool has_func_global = obj->caps.btf_func_global;
bool has_datasec = obj->caps.btf_datasec;
bool has_func = obj->caps.btf_func;
- struct btf *btf = obj->btf;
struct btf_type *t;
int i, j, vlen;
- if (!obj->btf || (has_func && has_datasec && has_func_global))
- return;
-
for (i = 1; i <= btf__get_nr_types(btf); i++) {
t = (struct btf_type *)btf__type_by_id(btf, i);
@@ -2375,17 +2407,6 @@ static void bpf_object__sanitize_btf(struct bpf_object *obj)
}
}
-static void bpf_object__sanitize_btf_ext(struct bpf_object *obj)
-{
- if (!obj->btf_ext)
- return;
-
- if (!obj->caps.btf_func) {
- btf_ext__free(obj->btf_ext);
- obj->btf_ext = NULL;
- }
-}
-
static bool libbpf_needs_btf(const struct bpf_object *obj)
{
return obj->efile.btf_maps_shndx >= 0 ||
@@ -2446,19 +2467,11 @@ static int bpf_object__finalize_btf(struct bpf_object *obj)
return 0;
err = btf__finalize_data(obj, obj->btf);
- if (!err)
- return 0;
-
- pr_warn("Error finalizing %s: %d.\n", BTF_ELF_SEC, err);
- btf__free(obj->btf);
- obj->btf = NULL;
- btf_ext__free(obj->btf_ext);
- obj->btf_ext = NULL;
-
- if (libbpf_needs_btf(obj)) {
- pr_warn("BTF is required, but is missing or corrupted.\n");
- return -ENOENT;
+ if (err) {
+ pr_warn("Error finalizing %s: %d.\n", BTF_ELF_SEC, err);
+ return err;
}
+
return 0;
}
@@ -2479,51 +2492,77 @@ static inline bool libbpf_prog_needs_vmlinux_btf(struct bpf_program *prog)
static int bpf_object__load_vmlinux_btf(struct bpf_object *obj)
{
+ bool need_vmlinux_btf = false;
struct bpf_program *prog;
int err;
+ /* CO-RE relocations need kernel BTF */
+ if (obj->btf_ext && obj->btf_ext->field_reloc_info.len)
+ need_vmlinux_btf = true;
+
bpf_object__for_each_program(prog, obj) {
+ if (!prog->load)
+ continue;
if (libbpf_prog_needs_vmlinux_btf(prog)) {
- obj->btf_vmlinux = libbpf_find_kernel_btf();
- if (IS_ERR(obj->btf_vmlinux)) {
- err = PTR_ERR(obj->btf_vmlinux);
- pr_warn("Error loading vmlinux BTF: %d\n", err);
- obj->btf_vmlinux = NULL;
- return err;
- }
- return 0;
+ need_vmlinux_btf = true;
+ break;
}
}
+ if (!need_vmlinux_btf)
+ return 0;
+
+ obj->btf_vmlinux = libbpf_find_kernel_btf();
+ if (IS_ERR(obj->btf_vmlinux)) {
+ err = PTR_ERR(obj->btf_vmlinux);
+ pr_warn("Error loading vmlinux BTF: %d\n", err);
+ obj->btf_vmlinux = NULL;
+ return err;
+ }
return 0;
}
static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
{
+ struct btf *kern_btf = obj->btf;
+ bool btf_mandatory, sanitize;
int err = 0;
if (!obj->btf)
return 0;
- bpf_object__sanitize_btf(obj);
- bpf_object__sanitize_btf_ext(obj);
+ sanitize = btf_needs_sanitization(obj);
+ if (sanitize) {
+ const void *raw_data;
+ __u32 sz;
- err = btf__load(obj->btf);
- if (err) {
- pr_warn("Error loading %s into kernel: %d.\n",
- BTF_ELF_SEC, err);
- btf__free(obj->btf);
- obj->btf = NULL;
- /* btf_ext can't exist without btf, so free it as well */
- if (obj->btf_ext) {
- btf_ext__free(obj->btf_ext);
- obj->btf_ext = NULL;
- }
+ /* clone BTF to sanitize a copy and leave the original intact */
+ raw_data = btf__get_raw_data(obj->btf, &sz);
+ kern_btf = btf__new(raw_data, sz);
+ if (IS_ERR(kern_btf))
+ return PTR_ERR(kern_btf);
- if (kernel_needs_btf(obj))
- return err;
+ bpf_object__sanitize_btf(obj, kern_btf);
}
- return 0;
+
+ err = btf__load(kern_btf);
+ if (sanitize) {
+ if (!err) {
+ /* move fd to libbpf's BTF */
+ btf__set_fd(obj->btf, btf__fd(kern_btf));
+ btf__set_fd(kern_btf, -1);
+ }
+ btf__free(kern_btf);
+ }
+ if (err) {
+ btf_mandatory = kernel_needs_btf(obj);
+ pr_warn("Error loading .BTF into kernel: %d. %s\n", err,
+ btf_mandatory ? "BTF is mandatory, can't proceed."
+ : "BTF is optional, ignoring.");
+ if (!btf_mandatory)
+ err = 0;
+ }
+ return err;
}
static int bpf_object__elf_collect(struct bpf_object *obj)
@@ -2709,8 +2748,33 @@ static int find_extern_btf_id(const struct btf *btf, const char *ext_name)
return -ENOENT;
}
-static enum extern_type find_extern_type(const struct btf *btf, int id,
- bool *is_signed)
+static int find_extern_sec_btf_id(struct btf *btf, int ext_btf_id) {
+ const struct btf_var_secinfo *vs;
+ const struct btf_type *t;
+ int i, j, n;
+
+ if (!btf)
+ return -ESRCH;
+
+ n = btf__get_nr_types(btf);
+ for (i = 1; i <= n; i++) {
+ t = btf__type_by_id(btf, i);
+
+ if (!btf_is_datasec(t))
+ continue;
+
+ vs = btf_var_secinfos(t);
+ for (j = 0; j < btf_vlen(t); j++, vs++) {
+ if (vs->type == ext_btf_id)
+ return i;
+ }
+ }
+
+ return -ENOENT;
+}
+
+static enum kcfg_type find_kcfg_type(const struct btf *btf, int id,
+ bool *is_signed)
{
const struct btf_type *t;
const char *name;
@@ -2725,29 +2789,29 @@ static enum extern_type find_extern_type(const struct btf *btf, int id,
int enc = btf_int_encoding(t);
if (enc & BTF_INT_BOOL)
- return t->size == 1 ? EXT_BOOL : EXT_UNKNOWN;
+ return t->size == 1 ? KCFG_BOOL : KCFG_UNKNOWN;
if (is_signed)
*is_signed = enc & BTF_INT_SIGNED;
if (t->size == 1)
- return EXT_CHAR;
+ return KCFG_CHAR;
if (t->size < 1 || t->size > 8 || (t->size & (t->size - 1)))
- return EXT_UNKNOWN;
- return EXT_INT;
+ return KCFG_UNKNOWN;
+ return KCFG_INT;
}
case BTF_KIND_ENUM:
if (t->size != 4)
- return EXT_UNKNOWN;
+ return KCFG_UNKNOWN;
if (strcmp(name, "libbpf_tristate"))
- return EXT_UNKNOWN;
- return EXT_TRISTATE;
+ return KCFG_UNKNOWN;
+ return KCFG_TRISTATE;
case BTF_KIND_ARRAY:
if (btf_array(t)->nelems == 0)
- return EXT_UNKNOWN;
- if (find_extern_type(btf, btf_array(t)->type, NULL) != EXT_CHAR)
- return EXT_UNKNOWN;
- return EXT_CHAR_ARR;
+ return KCFG_UNKNOWN;
+ if (find_kcfg_type(btf, btf_array(t)->type, NULL) != KCFG_CHAR)
+ return KCFG_UNKNOWN;
+ return KCFG_CHAR_ARR;
default:
- return EXT_UNKNOWN;
+ return KCFG_UNKNOWN;
}
}
@@ -2756,23 +2820,45 @@ static int cmp_externs(const void *_a, const void *_b)
const struct extern_desc *a = _a;
const struct extern_desc *b = _b;
- /* descending order by alignment requirements */
- if (a->align != b->align)
- return a->align > b->align ? -1 : 1;
- /* ascending order by size, within same alignment class */
- if (a->sz != b->sz)
- return a->sz < b->sz ? -1 : 1;
+ if (a->type != b->type)
+ return a->type < b->type ? -1 : 1;
+
+ if (a->type == EXT_KCFG) {
+ /* descending order by alignment requirements */
+ if (a->kcfg.align != b->kcfg.align)
+ return a->kcfg.align > b->kcfg.align ? -1 : 1;
+ /* ascending order by size, within same alignment class */
+ if (a->kcfg.sz != b->kcfg.sz)
+ return a->kcfg.sz < b->kcfg.sz ? -1 : 1;
+ }
+
/* resolve ties by name */
return strcmp(a->name, b->name);
}
+static int find_int_btf_id(const struct btf *btf)
+{
+ const struct btf_type *t;
+ int i, n;
+
+ n = btf__get_nr_types(btf);
+ for (i = 1; i <= n; i++) {
+ t = btf__type_by_id(btf, i);
+
+ if (btf_is_int(t) && btf_int_bits(t) == 32)
+ return i;
+ }
+
+ return 0;
+}
+
static int bpf_object__collect_externs(struct bpf_object *obj)
{
+ struct btf_type *sec, *kcfg_sec = NULL, *ksym_sec = NULL;
const struct btf_type *t;
struct extern_desc *ext;
- int i, n, off, btf_id;
- struct btf_type *sec;
- const char *ext_name;
+ int i, n, off;
+ const char *ext_name, *sec_name;
Elf_Scn *scn;
GElf_Shdr sh;
@@ -2818,22 +2904,50 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
ext->name = btf__name_by_offset(obj->btf, t->name_off);
ext->sym_idx = i;
ext->is_weak = GELF_ST_BIND(sym.st_info) == STB_WEAK;
- ext->sz = btf__resolve_size(obj->btf, t->type);
- if (ext->sz <= 0) {
- pr_warn("failed to resolve size of extern '%s': %d\n",
- ext_name, ext->sz);
- return ext->sz;
- }
- ext->align = btf__align_of(obj->btf, t->type);
- if (ext->align <= 0) {
- pr_warn("failed to determine alignment of extern '%s': %d\n",
- ext_name, ext->align);
- return -EINVAL;
- }
- ext->type = find_extern_type(obj->btf, t->type,
- &ext->is_signed);
- if (ext->type == EXT_UNKNOWN) {
- pr_warn("extern '%s' type is unsupported\n", ext_name);
+
+ ext->sec_btf_id = find_extern_sec_btf_id(obj->btf, ext->btf_id);
+ if (ext->sec_btf_id <= 0) {
+ pr_warn("failed to find BTF for extern '%s' [%d] section: %d\n",
+ ext_name, ext->btf_id, ext->sec_btf_id);
+ return ext->sec_btf_id;
+ }
+ sec = (void *)btf__type_by_id(obj->btf, ext->sec_btf_id);
+ sec_name = btf__name_by_offset(obj->btf, sec->name_off);
+
+ if (strcmp(sec_name, KCONFIG_SEC) == 0) {
+ kcfg_sec = sec;
+ ext->type = EXT_KCFG;
+ ext->kcfg.sz = btf__resolve_size(obj->btf, t->type);
+ if (ext->kcfg.sz <= 0) {
+ pr_warn("failed to resolve size of extern (kcfg) '%s': %d\n",
+ ext_name, ext->kcfg.sz);
+ return ext->kcfg.sz;
+ }
+ ext->kcfg.align = btf__align_of(obj->btf, t->type);
+ if (ext->kcfg.align <= 0) {
+ pr_warn("failed to determine alignment of extern (kcfg) '%s': %d\n",
+ ext_name, ext->kcfg.align);
+ return -EINVAL;
+ }
+ ext->kcfg.type = find_kcfg_type(obj->btf, t->type,
+ &ext->kcfg.is_signed);
+ if (ext->kcfg.type == KCFG_UNKNOWN) {
+ pr_warn("extern (kcfg) '%s' type is unsupported\n", ext_name);
+ return -ENOTSUP;
+ }
+ } else if (strcmp(sec_name, KSYMS_SEC) == 0) {
+ const struct btf_type *vt;
+
+ ksym_sec = sec;
+ ext->type = EXT_KSYM;
+
+ vt = skip_mods_and_typedefs(obj->btf, t->type, NULL);
+ if (!btf_is_void(vt)) {
+ pr_warn("extern (ksym) '%s' is not typeless (void)\n", ext_name);
+ return -ENOTSUP;
+ }
+ } else {
+ pr_warn("unrecognized extern section '%s'\n", sec_name);
return -ENOTSUP;
}
}
@@ -2842,42 +2956,80 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
if (!obj->nr_extern)
return 0;
- /* sort externs by (alignment, size, name) and calculate their offsets
- * within a map */
+ /* sort externs by type, for kcfg ones also by (align, size, name) */
qsort(obj->externs, obj->nr_extern, sizeof(*ext), cmp_externs);
- off = 0;
- for (i = 0; i < obj->nr_extern; i++) {
- ext = &obj->externs[i];
- ext->data_off = roundup(off, ext->align);
- off = ext->data_off + ext->sz;
- pr_debug("extern #%d: symbol %d, off %u, name %s\n",
- i, ext->sym_idx, ext->data_off, ext->name);
- }
- btf_id = btf__find_by_name(obj->btf, KCONFIG_SEC);
- if (btf_id <= 0) {
- pr_warn("no BTF info found for '%s' datasec\n", KCONFIG_SEC);
- return -ESRCH;
- }
+ /* for .ksyms section, we need to turn all externs into allocated
+ * variables in BTF to pass kernel verification; we do this by
+ * pretending that each extern is a 8-byte variable
+ */
+ if (ksym_sec) {
+ /* find existing 4-byte integer type in BTF to use for fake
+ * extern variables in DATASEC
+ */
+ int int_btf_id = find_int_btf_id(obj->btf);
- sec = (struct btf_type *)btf__type_by_id(obj->btf, btf_id);
- sec->size = off;
- n = btf_vlen(sec);
- for (i = 0; i < n; i++) {
- struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
-
- t = btf__type_by_id(obj->btf, vs->type);
- ext_name = btf__name_by_offset(obj->btf, t->name_off);
- ext = find_extern_by_name(obj, ext_name);
- if (!ext) {
- pr_warn("failed to find extern definition for BTF var '%s'\n",
- ext_name);
- return -ESRCH;
+ for (i = 0; i < obj->nr_extern; i++) {
+ ext = &obj->externs[i];
+ if (ext->type != EXT_KSYM)
+ continue;
+ pr_debug("extern (ksym) #%d: symbol %d, name %s\n",
+ i, ext->sym_idx, ext->name);
}
- vs->offset = ext->data_off;
- btf_var(t)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
+
+ sec = ksym_sec;
+ n = btf_vlen(sec);
+ for (i = 0, off = 0; i < n; i++, off += sizeof(int)) {
+ struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
+ struct btf_type *vt;
+
+ vt = (void *)btf__type_by_id(obj->btf, vs->type);
+ ext_name = btf__name_by_offset(obj->btf, vt->name_off);
+ ext = find_extern_by_name(obj, ext_name);
+ if (!ext) {
+ pr_warn("failed to find extern definition for BTF var '%s'\n",
+ ext_name);
+ return -ESRCH;
+ }
+ btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
+ vt->type = int_btf_id;
+ vs->offset = off;
+ vs->size = sizeof(int);
+ }
+ sec->size = off;
}
+ if (kcfg_sec) {
+ sec = kcfg_sec;
+ /* for kcfg externs calculate their offsets within a .kconfig map */
+ off = 0;
+ for (i = 0; i < obj->nr_extern; i++) {
+ ext = &obj->externs[i];
+ if (ext->type != EXT_KCFG)
+ continue;
+
+ ext->kcfg.data_off = roundup(off, ext->kcfg.align);
+ off = ext->kcfg.data_off + ext->kcfg.sz;
+ pr_debug("extern (kcfg) #%d: symbol %d, off %u, name %s\n",
+ i, ext->sym_idx, ext->kcfg.data_off, ext->name);
+ }
+ sec->size = off;
+ n = btf_vlen(sec);
+ for (i = 0; i < n; i++) {
+ struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
+
+ t = btf__type_by_id(obj->btf, vs->type);
+ ext_name = btf__name_by_offset(obj->btf, t->name_off);
+ ext = find_extern_by_name(obj, ext_name);
+ if (!ext) {
+ pr_warn("failed to find extern definition for BTF var '%s'\n",
+ ext_name);
+ return -ESRCH;
+ }
+ btf_var(t)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
+ vs->offset = ext->kcfg.data_off;
+ }
+ }
return 0;
}
@@ -3007,11 +3159,11 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
sym_idx);
return -LIBBPF_ERRNO__RELOC;
}
- pr_debug("found extern #%d '%s' (sym %d, off %u) for insn %u\n",
- i, ext->name, ext->sym_idx, ext->data_off, insn_idx);
+ pr_debug("found extern #%d '%s' (sym %d) for insn %u\n",
+ i, ext->name, ext->sym_idx, insn_idx);
reloc_desc->type = RELO_EXTERN;
reloc_desc->insn_idx = insn_idx;
- reloc_desc->sym_off = ext->data_off;
+ reloc_desc->sym_off = i; /* sym_off stores extern index */
return 0;
}
@@ -3222,20 +3374,27 @@ err_free_new_name:
return err;
}
-int bpf_map__resize(struct bpf_map *map, __u32 max_entries)
+__u32 bpf_map__max_entries(const struct bpf_map *map)
{
- if (!map || !max_entries)
- return -EINVAL;
+ return map->def.max_entries;
+}
- /* If map already created, its attributes can't be changed. */
+int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
+{
if (map->fd >= 0)
return -EBUSY;
-
map->def.max_entries = max_entries;
-
return 0;
}
+int bpf_map__resize(struct bpf_map *map, __u32 max_entries)
+{
+ if (!map || !max_entries)
+ return -EINVAL;
+
+ return bpf_map__set_max_entries(map, max_entries);
+}
+
static int
bpf_object__probe_loading(struct bpf_object *obj)
{
@@ -3603,6 +3762,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map)
create_attr.map_flags = def->map_flags;
create_attr.key_size = def->key_size;
create_attr.value_size = def->value_size;
+ create_attr.numa_node = map->numa_node;
if (def->type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !def->max_entries) {
int nr_cpus;
@@ -3626,7 +3786,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map)
create_attr.btf_fd = 0;
create_attr.btf_key_type_id = 0;
create_attr.btf_value_type_id = 0;
- if (obj->btf && !bpf_map_find_btf_info(obj, map)) {
+ if (obj->btf && btf__fd(obj->btf) >= 0 && !bpf_map_find_btf_info(obj, map)) {
create_attr.btf_fd = btf__fd(obj->btf);
create_attr.btf_key_type_id = map->btf_key_type_id;
create_attr.btf_value_type_id = map->btf_value_type_id;
@@ -4799,8 +4959,8 @@ bpf_core_reloc_fields(struct bpf_object *obj, const char *targ_btf_path)
if (targ_btf_path)
targ_btf = btf__parse_elf(targ_btf_path, NULL);
else
- targ_btf = libbpf_find_kernel_btf();
- if (IS_ERR(targ_btf)) {
+ targ_btf = obj->btf_vmlinux;
+ if (IS_ERR_OR_NULL(targ_btf)) {
pr_warn("failed to get target BTF: %ld\n", PTR_ERR(targ_btf));
return PTR_ERR(targ_btf);
}
@@ -4847,7 +5007,9 @@ bpf_core_reloc_fields(struct bpf_object *obj, const char *targ_btf_path)
}
out:
- btf__free(targ_btf);
+ /* obj->btf_vmlinux is freed at the end of object load phase */
+ if (targ_btf != obj->btf_vmlinux)
+ btf__free(targ_btf);
if (!IS_ERR_OR_NULL(cand_cache)) {
hashmap__for_each_entry(cand_cache, entry, i) {
bpf_core_free_cands(entry->value);
@@ -4934,6 +5096,7 @@ bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj)
for (i = 0; i < prog->nr_reloc; i++) {
struct reloc_desc *relo = &prog->reloc_desc[i];
struct bpf_insn *insn = &prog->insns[relo->insn_idx];
+ struct extern_desc *ext;
if (relo->insn_idx + 1 >= (int)prog->insns_cnt) {
pr_warn("relocation out of range: '%s'\n",
@@ -4952,9 +5115,15 @@ bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj)
insn[0].imm = obj->maps[relo->map_idx].fd;
break;
case RELO_EXTERN:
- insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
- insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
- insn[1].imm = relo->sym_off;
+ ext = &obj->externs[relo->sym_off];
+ if (ext->type == EXT_KCFG) {
+ insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
+ insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
+ insn[1].imm = ext->kcfg.data_off;
+ } else /* EXT_KSYM */ {
+ insn[0].imm = (__u32)ext->ksym.addr;
+ insn[1].imm = ext->ksym.addr >> 32;
+ }
break;
case RELO_CALL:
err = bpf_program__reloc_text(prog, obj, relo);
@@ -5207,18 +5376,17 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
load_attr.kern_version = kern_version;
load_attr.prog_ifindex = prog->prog_ifindex;
}
- /* if .BTF.ext was loaded, kernel supports associated BTF for prog */
- if (prog->obj->btf_ext)
- btf_fd = bpf_object__btf_fd(prog->obj);
- else
- btf_fd = -1;
- load_attr.prog_btf_fd = btf_fd >= 0 ? btf_fd : 0;
- load_attr.func_info = prog->func_info;
- load_attr.func_info_rec_size = prog->func_info_rec_size;
- load_attr.func_info_cnt = prog->func_info_cnt;
- load_attr.line_info = prog->line_info;
- load_attr.line_info_rec_size = prog->line_info_rec_size;
- load_attr.line_info_cnt = prog->line_info_cnt;
+ /* specify func_info/line_info only if kernel supports them */
+ btf_fd = bpf_object__btf_fd(prog->obj);
+ if (btf_fd >= 0 && prog->obj->caps.btf_func) {
+ load_attr.prog_btf_fd = btf_fd;
+ load_attr.func_info = prog->func_info;
+ load_attr.func_info_rec_size = prog->func_info_rec_size;
+ load_attr.func_info_cnt = prog->func_info_cnt;
+ load_attr.line_info = prog->line_info;
+ load_attr.line_info_rec_size = prog->line_info_rec_size;
+ load_attr.line_info_cnt = prog->line_info_cnt;
+ }
load_attr.log_level = prog->log_level;
load_attr.prog_flags = prog->prog_flags;
@@ -5287,6 +5455,12 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
{
int err = 0, fd, i, btf_id;
+ if (prog->obj->loaded) {
+ pr_warn("prog '%s'('%s'): can't load after object was loaded\n",
+ prog->name, prog->section_name);
+ return -EINVAL;
+ }
+
if ((prog->type == BPF_PROG_TYPE_TRACING ||
prog->type == BPF_PROG_TYPE_LSM ||
prog->type == BPF_PROG_TYPE_EXT) && !prog->attach_btf_id) {
@@ -5375,16 +5549,21 @@ static bool bpf_program__is_function_storage(const struct bpf_program *prog,
static int
bpf_object__load_progs(struct bpf_object *obj, int log_level)
{
+ struct bpf_program *prog;
size_t i;
int err;
for (i = 0; i < obj->nr_programs; i++) {
- if (bpf_program__is_function_storage(&obj->programs[i], obj))
+ prog = &obj->programs[i];
+ if (bpf_program__is_function_storage(prog, obj))
continue;
- obj->programs[i].log_level |= log_level;
- err = bpf_program__load(&obj->programs[i],
- obj->license,
- obj->kern_version);
+ if (!prog->load) {
+ pr_debug("prog '%s'('%s'): skipped loading\n",
+ prog->name, prog->section_name);
+ continue;
+ }
+ prog->log_level |= log_level;
+ err = bpf_program__load(prog, obj->license, obj->kern_version);
if (err)
return err;
}
@@ -5573,56 +5752,114 @@ static int bpf_object__sanitize_maps(struct bpf_object *obj)
return 0;
}
+static int bpf_object__read_kallsyms_file(struct bpf_object *obj)
+{
+ char sym_type, sym_name[500];
+ unsigned long long sym_addr;
+ struct extern_desc *ext;
+ int ret, err = 0;
+ FILE *f;
+
+ f = fopen("/proc/kallsyms", "r");
+ if (!f) {
+ err = -errno;
+ pr_warn("failed to open /proc/kallsyms: %d\n", err);
+ return err;
+ }
+
+ while (true) {
+ ret = fscanf(f, "%llx %c %499s%*[^\n]\n",
+ &sym_addr, &sym_type, sym_name);
+ if (ret == EOF && feof(f))
+ break;
+ if (ret != 3) {
+ pr_warn("failed to read kallsyms entry: %d\n", ret);
+ err = -EINVAL;
+ goto out;
+ }
+
+ ext = find_extern_by_name(obj, sym_name);
+ if (!ext || ext->type != EXT_KSYM)
+ continue;
+
+ if (ext->is_set && ext->ksym.addr != sym_addr) {
+ pr_warn("extern (ksym) '%s' resolution is ambiguous: 0x%llx or 0x%llx\n",
+ sym_name, ext->ksym.addr, sym_addr);
+ err = -EINVAL;
+ goto out;
+ }
+ if (!ext->is_set) {
+ ext->is_set = true;
+ ext->ksym.addr = sym_addr;
+ pr_debug("extern (ksym) %s=0x%llx\n", sym_name, sym_addr);
+ }
+ }
+
+out:
+ fclose(f);
+ return err;
+}
+
static int bpf_object__resolve_externs(struct bpf_object *obj,
const char *extra_kconfig)
{
- bool need_config = false;
+ bool need_config = false, need_kallsyms = false;
struct extern_desc *ext;
+ void *kcfg_data = NULL;
int err, i;
- void *data;
if (obj->nr_extern == 0)
return 0;
- data = obj->maps[obj->kconfig_map_idx].mmaped;
+ if (obj->kconfig_map_idx >= 0)
+ kcfg_data = obj->maps[obj->kconfig_map_idx].mmaped;
for (i = 0; i < obj->nr_extern; i++) {
ext = &obj->externs[i];
- if (strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) {
- void *ext_val = data + ext->data_off;
+ if (ext->type == EXT_KCFG &&
+ strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) {
+ void *ext_val = kcfg_data + ext->kcfg.data_off;
__u32 kver = get_kernel_version();
if (!kver) {
pr_warn("failed to get kernel version\n");
return -EINVAL;
}
- err = set_ext_value_num(ext, ext_val, kver);
+ err = set_kcfg_value_num(ext, ext_val, kver);
if (err)
return err;
- pr_debug("extern %s=0x%x\n", ext->name, kver);
- } else if (strncmp(ext->name, "CONFIG_", 7) == 0) {
+ pr_debug("extern (kcfg) %s=0x%x\n", ext->name, kver);
+ } else if (ext->type == EXT_KCFG &&
+ strncmp(ext->name, "CONFIG_", 7) == 0) {
need_config = true;
+ } else if (ext->type == EXT_KSYM) {
+ need_kallsyms = true;
} else {
pr_warn("unrecognized extern '%s'\n", ext->name);
return -EINVAL;
}
}
if (need_config && extra_kconfig) {
- err = bpf_object__read_kconfig_mem(obj, extra_kconfig, data);
+ err = bpf_object__read_kconfig_mem(obj, extra_kconfig, kcfg_data);
if (err)
return -EINVAL;
need_config = false;
for (i = 0; i < obj->nr_extern; i++) {
ext = &obj->externs[i];
- if (!ext->is_set) {
+ if (ext->type == EXT_KCFG && !ext->is_set) {
need_config = true;
break;
}
}
}
if (need_config) {
- err = bpf_object__read_kconfig_file(obj, data);
+ err = bpf_object__read_kconfig_file(obj, kcfg_data);
+ if (err)
+ return -EINVAL;
+ }
+ if (need_kallsyms) {
+ err = bpf_object__read_kallsyms_file(obj);
if (err)
return -EINVAL;
}
@@ -5653,12 +5890,10 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
return -EINVAL;
if (obj->loaded) {
- pr_warn("object should not be loaded twice\n");
+ pr_warn("object '%s': load can't be attempted twice\n", obj->name);
return -EINVAL;
}
- obj->loaded = true;
-
err = bpf_object__probe_loading(obj);
err = err ? : bpf_object__probe_caps(obj);
err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
@@ -5673,6 +5908,8 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
btf__free(obj->btf_vmlinux);
obj->btf_vmlinux = NULL;
+ obj->loaded = true; /* doesn't matter if successfully or not */
+
if (err)
goto out;
@@ -6267,7 +6504,7 @@ void bpf_object__close(struct bpf_object *obj)
{
size_t i;
- if (!obj)
+ if (IS_ERR_OR_NULL(obj))
return;
if (obj->clear_priv)
@@ -6445,6 +6682,20 @@ const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy)
return title;
}
+bool bpf_program__autoload(const struct bpf_program *prog)
+{
+ return prog->load;
+}
+
+int bpf_program__set_autoload(struct bpf_program *prog, bool autoload)
+{
+ if (prog->obj->loaded)
+ return -EINVAL;
+
+ prog->load = autoload;
+ return 0;
+}
+
int bpf_program__fd(const struct bpf_program *prog)
{
return bpf_program__nth_fd(prog, 0);
@@ -6548,6 +6799,7 @@ BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT);
BPF_PROG_TYPE_FNS(tracing, BPF_PROG_TYPE_TRACING);
BPF_PROG_TYPE_FNS(struct_ops, BPF_PROG_TYPE_STRUCT_OPS);
BPF_PROG_TYPE_FNS(extension, BPF_PROG_TYPE_EXT);
+BPF_PROG_TYPE_FNS(sk_lookup, BPF_PROG_TYPE_SK_LOOKUP);
enum bpf_attach_type
bpf_program__get_expected_attach_type(struct bpf_program *prog)
@@ -6661,7 +6913,10 @@ static const struct bpf_sec_def section_defs[] = {
.attach_fn = attach_iter),
BPF_EAPROG_SEC("xdp_devmap/", BPF_PROG_TYPE_XDP,
BPF_XDP_DEVMAP),
- BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP),
+ BPF_EAPROG_SEC("xdp_cpumap/", BPF_PROG_TYPE_XDP,
+ BPF_XDP_CPUMAP),
+ BPF_EAPROG_SEC("xdp", BPF_PROG_TYPE_XDP,
+ BPF_XDP),
BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT),
BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN),
BPF_PROG_SEC("lwt_out", BPF_PROG_TYPE_LWT_OUT),
@@ -6672,6 +6927,10 @@ static const struct bpf_sec_def section_defs[] = {
BPF_APROG_SEC("cgroup_skb/egress", BPF_PROG_TYPE_CGROUP_SKB,
BPF_CGROUP_INET_EGRESS),
BPF_APROG_COMPAT("cgroup/skb", BPF_PROG_TYPE_CGROUP_SKB),
+ BPF_EAPROG_SEC("cgroup/sock_create", BPF_PROG_TYPE_CGROUP_SOCK,
+ BPF_CGROUP_INET_SOCK_CREATE),
+ BPF_EAPROG_SEC("cgroup/sock_release", BPF_PROG_TYPE_CGROUP_SOCK,
+ BPF_CGROUP_INET_SOCK_RELEASE),
BPF_APROG_SEC("cgroup/sock", BPF_PROG_TYPE_CGROUP_SOCK,
BPF_CGROUP_INET_SOCK_CREATE),
BPF_EAPROG_SEC("cgroup/post_bind4", BPF_PROG_TYPE_CGROUP_SOCK,
@@ -6724,6 +6983,8 @@ static const struct bpf_sec_def section_defs[] = {
BPF_EAPROG_SEC("cgroup/setsockopt", BPF_PROG_TYPE_CGROUP_SOCKOPT,
BPF_CGROUP_SETSOCKOPT),
BPF_PROG_SEC("struct_ops", BPF_PROG_TYPE_STRUCT_OPS),
+ BPF_EAPROG_SEC("sk_lookup/", BPF_PROG_TYPE_SK_LOOKUP,
+ BPF_SK_LOOKUP),
};
#undef BPF_PROG_SEC_IMPL
@@ -7094,6 +7355,71 @@ const char *bpf_map__name(const struct bpf_map *map)
return map ? map->name : NULL;
}
+enum bpf_map_type bpf_map__type(const struct bpf_map *map)
+{
+ return map->def.type;
+}
+
+int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type)
+{
+ if (map->fd >= 0)
+ return -EBUSY;
+ map->def.type = type;
+ return 0;
+}
+
+__u32 bpf_map__map_flags(const struct bpf_map *map)
+{
+ return map->def.map_flags;
+}
+
+int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags)
+{
+ if (map->fd >= 0)
+ return -EBUSY;
+ map->def.map_flags = flags;
+ return 0;
+}
+
+__u32 bpf_map__numa_node(const struct bpf_map *map)
+{
+ return map->numa_node;
+}
+
+int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node)
+{
+ if (map->fd >= 0)
+ return -EBUSY;
+ map->numa_node = numa_node;
+ return 0;
+}
+
+__u32 bpf_map__key_size(const struct bpf_map *map)
+{
+ return map->def.key_size;
+}
+
+int bpf_map__set_key_size(struct bpf_map *map, __u32 size)
+{
+ if (map->fd >= 0)
+ return -EBUSY;
+ map->def.key_size = size;
+ return 0;
+}
+
+__u32 bpf_map__value_size(const struct bpf_map *map)
+{
+ return map->def.value_size;
+}
+
+int bpf_map__set_value_size(struct bpf_map *map, __u32 size)
+{
+ if (map->fd >= 0)
+ return -EBUSY;
+ map->def.value_size = size;
+ return 0;
+}
+
__u32 bpf_map__btf_key_type_id(const struct bpf_map *map)
{
return map ? map->btf_key_type_id : 0;
@@ -7146,9 +7472,17 @@ bool bpf_map__is_internal(const struct bpf_map *map)
return map->libbpf_type != LIBBPF_MAP_UNSPEC;
}
-void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex)
+__u32 bpf_map__ifindex(const struct bpf_map *map)
{
+ return map->map_ifindex;
+}
+
+int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex)
+{
+ if (map->fd >= 0)
+ return -EBUSY;
map->map_ifindex = ifindex;
+ return 0;
}
int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)
@@ -7356,7 +7690,7 @@ int bpf_link__destroy(struct bpf_link *link)
{
int err = 0;
- if (!link)
+ if (IS_ERR_OR_NULL(link))
return 0;
if (!link->disconnected && link->detach)
@@ -7414,6 +7748,11 @@ struct bpf_link *bpf_link__open(const char *path)
return link;
}
+int bpf_link__detach(struct bpf_link *link)
+{
+ return bpf_link_detach(link->fd) ? -errno : 0;
+}
+
int bpf_link__pin(struct bpf_link *link, const char *path)
{
int err;
@@ -7500,6 +7839,9 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
pr_warn("program '%s': failed to attach to pfd %d: %s\n",
bpf_program__title(prog, false), pfd,
libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+ if (err == -EPROTO)
+ pr_warn("program '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n",
+ bpf_program__title(prog, false), pfd);
return ERR_PTR(err);
}
if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
@@ -7945,17 +8287,28 @@ bpf_program__attach_netns(struct bpf_program *prog, int netns_fd)
return bpf_program__attach_fd(prog, netns_fd, "netns");
}
+struct bpf_link *bpf_program__attach_xdp(struct bpf_program *prog, int ifindex)
+{
+ /* target_fd/target_ifindex use the same field in LINK_CREATE */
+ return bpf_program__attach_fd(prog, ifindex, "xdp");
+}
+
struct bpf_link *
bpf_program__attach_iter(struct bpf_program *prog,
const struct bpf_iter_attach_opts *opts)
{
+ DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
char errmsg[STRERR_BUFSIZE];
struct bpf_link *link;
int prog_fd, link_fd;
+ __u32 target_fd = 0;
if (!OPTS_VALID(opts, bpf_iter_attach_opts))
return ERR_PTR(-EINVAL);
+ link_create_opts.iter_info = OPTS_GET(opts, link_info, (void *)0);
+ link_create_opts.iter_info_len = OPTS_GET(opts, link_info_len, 0);
+
prog_fd = bpf_program__fd(prog);
if (prog_fd < 0) {
pr_warn("program '%s': can't attach before loaded\n",
@@ -7968,7 +8321,8 @@ bpf_program__attach_iter(struct bpf_program *prog,
return ERR_PTR(-ENOMEM);
link->detach = &bpf_link__detach_fd;
- link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_ITER, NULL);
+ link_fd = bpf_link_create(prog_fd, target_fd, BPF_TRACE_ITER,
+ &link_create_opts);
if (link_fd < 0) {
link_fd = -errno;
free(link);
@@ -8151,7 +8505,7 @@ void perf_buffer__free(struct perf_buffer *pb)
{
int i;
- if (!pb)
+ if (IS_ERR_OR_NULL(pb))
return;
if (pb->cpu_bufs) {
for (i = 0; i < pb->cpu_cnt; i++) {
@@ -8264,7 +8618,7 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
struct perf_buffer_params *p)
{
const char *online_cpus_file = "/sys/devices/system/cpu/online";
- struct bpf_map_info map = {};
+ struct bpf_map_info map;
char msg[STRERR_BUFSIZE];
struct perf_buffer *pb;
bool *online = NULL;
@@ -8277,19 +8631,28 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
return ERR_PTR(-EINVAL);
}
+ /* best-effort sanity checks */
+ memset(&map, 0, sizeof(map));
map_info_len = sizeof(map);
err = bpf_obj_get_info_by_fd(map_fd, &map, &map_info_len);
if (err) {
err = -errno;
- pr_warn("failed to get map info for map FD %d: %s\n",
- map_fd, libbpf_strerror_r(err, msg, sizeof(msg)));
- return ERR_PTR(err);
- }
-
- if (map.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
- pr_warn("map '%s' should be BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
- map.name);
- return ERR_PTR(-EINVAL);
+ /* if BPF_OBJ_GET_INFO_BY_FD is supported, will return
+ * -EBADFD, -EFAULT, or -E2BIG on real error
+ */
+ if (err != -EINVAL) {
+ pr_warn("failed to get map info for map FD %d: %s\n",
+ map_fd, libbpf_strerror_r(err, msg, sizeof(msg)));
+ return ERR_PTR(err);
+ }
+ pr_debug("failed to get map info for FD %d; API not supported? Ignoring...\n",
+ map_fd);
+ } else {
+ if (map.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
+ pr_warn("map '%s' should be BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
+ map.name);
+ return ERR_PTR(-EINVAL);
+ }
}
pb = calloc(1, sizeof(*pb));
@@ -8321,7 +8684,7 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
err = pb->cpu_cnt;
goto error;
}
- if (map.max_entries < pb->cpu_cnt)
+ if (map.max_entries && map.max_entries < pb->cpu_cnt)
pb->cpu_cnt = map.max_entries;
}
@@ -8994,6 +9357,9 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s)
const struct bpf_sec_def *sec_def;
const char *sec_name = bpf_program__title(prog, false);
+ if (!prog->load)
+ continue;
+
sec_def = find_sec_def(sec_name);
if (!sec_def || !sec_def->attach_fn)
continue;
@@ -9016,8 +9382,7 @@ void bpf_object__detach_skeleton(struct bpf_object_skeleton *s)
for (i = 0; i < s->prog_cnt; i++) {
struct bpf_link **link = s->progs[i].link;
- if (!IS_ERR_OR_NULL(*link))
- bpf_link__destroy(*link);
+ bpf_link__destroy(*link);
*link = NULL;
}
}
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 334437af3014..5ecb4069a9f0 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -200,6 +200,8 @@ LIBBPF_API void bpf_program__set_ifindex(struct bpf_program *prog,
LIBBPF_API const char *bpf_program__name(const struct bpf_program *prog);
LIBBPF_API const char *bpf_program__title(const struct bpf_program *prog,
bool needs_copy);
+LIBBPF_API bool bpf_program__autoload(const struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_autoload(struct bpf_program *prog, bool autoload);
/* returns program size in bytes */
LIBBPF_API size_t bpf_program__size(const struct bpf_program *prog);
@@ -227,6 +229,7 @@ LIBBPF_API int bpf_link__unpin(struct bpf_link *link);
LIBBPF_API int bpf_link__update_program(struct bpf_link *link,
struct bpf_program *prog);
LIBBPF_API void bpf_link__disconnect(struct bpf_link *link);
+LIBBPF_API int bpf_link__detach(struct bpf_link *link);
LIBBPF_API int bpf_link__destroy(struct bpf_link *link);
LIBBPF_API struct bpf_link *
@@ -255,6 +258,8 @@ LIBBPF_API struct bpf_link *
bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd);
LIBBPF_API struct bpf_link *
bpf_program__attach_netns(struct bpf_program *prog, int netns_fd);
+LIBBPF_API struct bpf_link *
+bpf_program__attach_xdp(struct bpf_program *prog, int ifindex);
struct bpf_map;
@@ -262,8 +267,10 @@ LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map);
struct bpf_iter_attach_opts {
size_t sz; /* size of this struct for forward/backward compatibility */
+ union bpf_iter_link_info *link_info;
+ __u32 link_info_len;
};
-#define bpf_iter_attach_opts__last_field sz
+#define bpf_iter_attach_opts__last_field link_info_len
LIBBPF_API struct bpf_link *
bpf_program__attach_iter(struct bpf_program *prog,
@@ -348,6 +355,7 @@ LIBBPF_API int bpf_program__set_perf_event(struct bpf_program *prog);
LIBBPF_API int bpf_program__set_tracing(struct bpf_program *prog);
LIBBPF_API int bpf_program__set_struct_ops(struct bpf_program *prog);
LIBBPF_API int bpf_program__set_extension(struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_sk_lookup(struct bpf_program *prog);
LIBBPF_API enum bpf_prog_type bpf_program__get_type(struct bpf_program *prog);
LIBBPF_API void bpf_program__set_type(struct bpf_program *prog,
@@ -375,6 +383,7 @@ LIBBPF_API bool bpf_program__is_perf_event(const struct bpf_program *prog);
LIBBPF_API bool bpf_program__is_tracing(const struct bpf_program *prog);
LIBBPF_API bool bpf_program__is_struct_ops(const struct bpf_program *prog);
LIBBPF_API bool bpf_program__is_extension(const struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_sk_lookup(const struct bpf_program *prog);
/*
* No need for __attribute__((packed)), all members of 'bpf_map_def'
@@ -418,11 +427,38 @@ bpf_map__next(const struct bpf_map *map, const struct bpf_object *obj);
LIBBPF_API struct bpf_map *
bpf_map__prev(const struct bpf_map *map, const struct bpf_object *obj);
+/* get/set map FD */
LIBBPF_API int bpf_map__fd(const struct bpf_map *map);
+LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd);
+/* get map definition */
LIBBPF_API const struct bpf_map_def *bpf_map__def(const struct bpf_map *map);
+/* get map name */
LIBBPF_API const char *bpf_map__name(const struct bpf_map *map);
+/* get/set map type */
+LIBBPF_API enum bpf_map_type bpf_map__type(const struct bpf_map *map);
+LIBBPF_API int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type);
+/* get/set map size (max_entries) */
+LIBBPF_API __u32 bpf_map__max_entries(const struct bpf_map *map);
+LIBBPF_API int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries);
+LIBBPF_API int bpf_map__resize(struct bpf_map *map, __u32 max_entries);
+/* get/set map flags */
+LIBBPF_API __u32 bpf_map__map_flags(const struct bpf_map *map);
+LIBBPF_API int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags);
+/* get/set map NUMA node */
+LIBBPF_API __u32 bpf_map__numa_node(const struct bpf_map *map);
+LIBBPF_API int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node);
+/* get/set map key size */
+LIBBPF_API __u32 bpf_map__key_size(const struct bpf_map *map);
+LIBBPF_API int bpf_map__set_key_size(struct bpf_map *map, __u32 size);
+/* get/set map value size */
+LIBBPF_API __u32 bpf_map__value_size(const struct bpf_map *map);
+LIBBPF_API int bpf_map__set_value_size(struct bpf_map *map, __u32 size);
+/* get map key/value BTF type IDs */
LIBBPF_API __u32 bpf_map__btf_key_type_id(const struct bpf_map *map);
LIBBPF_API __u32 bpf_map__btf_value_type_id(const struct bpf_map *map);
+/* get/set map if_index */
+LIBBPF_API __u32 bpf_map__ifindex(const struct bpf_map *map);
+LIBBPF_API int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex);
typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *);
LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv,
@@ -430,11 +466,8 @@ LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv,
LIBBPF_API void *bpf_map__priv(const struct bpf_map *map);
LIBBPF_API int bpf_map__set_initial_value(struct bpf_map *map,
const void *data, size_t size);
-LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd);
-LIBBPF_API int bpf_map__resize(struct bpf_map *map, __u32 max_entries);
LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map);
LIBBPF_API bool bpf_map__is_internal(const struct bpf_map *map);
-LIBBPF_API void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex);
LIBBPF_API int bpf_map__set_pin_path(struct bpf_map *map, const char *path);
LIBBPF_API const char *bpf_map__get_pin_path(const struct bpf_map *map);
LIBBPF_API bool bpf_map__is_pinned(const struct bpf_map *map);
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index f732c77b7ed0..0c4722bfdd0a 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -270,3 +270,30 @@ LIBBPF_0.0.9 {
ring_buffer__new;
ring_buffer__poll;
} LIBBPF_0.0.8;
+
+LIBBPF_0.1.0 {
+ global:
+ bpf_link__detach;
+ bpf_link_detach;
+ bpf_map__ifindex;
+ bpf_map__key_size;
+ bpf_map__map_flags;
+ bpf_map__max_entries;
+ bpf_map__numa_node;
+ bpf_map__set_key_size;
+ bpf_map__set_map_flags;
+ bpf_map__set_max_entries;
+ bpf_map__set_numa_node;
+ bpf_map__set_type;
+ bpf_map__set_value_size;
+ bpf_map__type;
+ bpf_map__value_size;
+ bpf_program__attach_xdp;
+ bpf_program__autoload;
+ bpf_program__is_sk_lookup;
+ bpf_program__set_autoload;
+ bpf_program__set_sk_lookup;
+ btf__parse;
+ btf__parse_raw;
+ btf__set_fd;
+} LIBBPF_0.0.9;
diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c
index 10cd8d1891f5..5a3d3f078408 100644
--- a/tools/lib/bpf/libbpf_probes.c
+++ b/tools/lib/bpf/libbpf_probes.c
@@ -78,6 +78,9 @@ probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns,
case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
xattr.expected_attach_type = BPF_CGROUP_INET4_CONNECT;
break;
+ case BPF_PROG_TYPE_SK_LOOKUP:
+ xattr.expected_attach_type = BPF_SK_LOOKUP;
+ break;
case BPF_PROG_TYPE_KPROBE:
xattr.kern_version = get_kernel_version();
break;
diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c
index 6a875a0f01bb..2208444ecb44 100644
--- a/tools/lib/perf/evlist.c
+++ b/tools/lib/perf/evlist.c
@@ -305,9 +305,9 @@ int perf_evlist__alloc_pollfd(struct perf_evlist *evlist)
}
int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd,
- void *ptr, short revent)
+ void *ptr, short revent, enum fdarray_flags flags)
{
- int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP);
+ int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP, flags);
if (pos >= 0) {
evlist->pollfd.priv[pos].ptr = ptr;
@@ -488,7 +488,7 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
revent = !overwrite ? POLLIN : 0;
if (!evsel->system_wide &&
- perf_evlist__add_pollfd(evlist, fd, map, revent) < 0) {
+ perf_evlist__add_pollfd(evlist, fd, map, revent, fdarray_flag__default) < 0) {
perf_mmap__put(map);
return -1;
}
diff --git a/tools/lib/perf/include/internal/evlist.h b/tools/lib/perf/include/internal/evlist.h
index 74dc8c3f0b66..2d0fa02b036f 100644
--- a/tools/lib/perf/include/internal/evlist.h
+++ b/tools/lib/perf/include/internal/evlist.h
@@ -45,7 +45,7 @@ struct perf_evlist_mmap_ops {
int perf_evlist__alloc_pollfd(struct perf_evlist *evlist);
int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd,
- void *ptr, short revent);
+ void *ptr, short revent, enum fdarray_flags flags);
int perf_evlist__mmap_ops(struct perf_evlist *evlist,
struct perf_evlist_mmap_ops *ops,
diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h
index 69b44d2cc0f5..842028858d66 100644
--- a/tools/lib/perf/include/perf/event.h
+++ b/tools/lib/perf/include/perf/event.h
@@ -111,6 +111,14 @@ struct perf_record_cgroup {
char path[PATH_MAX];
};
+struct perf_record_text_poke_event {
+ struct perf_event_header header;
+ __u64 addr;
+ __u16 old_len;
+ __u16 new_len;
+ __u8 bytes[];
+};
+
struct perf_record_sample {
struct perf_event_header header;
__u64 array[];
@@ -367,6 +375,7 @@ union perf_event {
struct perf_record_sample sample;
struct perf_record_bpf_event bpf;
struct perf_record_ksymbol ksymbol;
+ struct perf_record_text_poke_event text_poke;
struct perf_record_header_attr attr;
struct perf_record_event_update event_update;
struct perf_record_header_event_type event_type;
diff --git a/tools/lib/rbtree.c b/tools/lib/rbtree.c
index 06ac7bd2144b..727396de6be5 100644
--- a/tools/lib/rbtree.c
+++ b/tools/lib/rbtree.c
@@ -13,7 +13,7 @@
#include <linux/export.h>
/*
- * red-black trees properties: http://en.wikipedia.org/wiki/Rbtree
+ * red-black trees properties: https://en.wikipedia.org/wiki/Rbtree
*
* 1) A node is either red or black
* 2) The root is black
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-plugins.txt b/tools/lib/traceevent/Documentation/libtraceevent-plugins.txt
index 596032ade31f..4d6394397d92 100644
--- a/tools/lib/traceevent/Documentation/libtraceevent-plugins.txt
+++ b/tools/lib/traceevent/Documentation/libtraceevent-plugins.txt
@@ -3,7 +3,7 @@ libtraceevent(3)
NAME
----
-tep_load_plugins, tep_unload_plugins - Load / unload traceevent plugins.
+tep_load_plugins, tep_unload_plugins, tep_load_plugins_hook - Load / unload traceevent plugins.
SYNOPSIS
--------
@@ -13,6 +13,12 @@ SYNOPSIS
struct tep_plugin_list pass:[*]*tep_load_plugins*(struct tep_handle pass:[*]_tep_);
void *tep_unload_plugins*(struct tep_plugin_list pass:[*]_plugin_list_, struct tep_handle pass:[*]_tep_);
+void *tep_load_plugins_hook*(struct tep_handle pass:[*]_tep_, const char pass:[*]_suffix_,
+ void (pass:[*]_load_plugin_)(struct tep_handle pass:[*]tep,
+ const char pass:[*]path,
+ const char pass:[*]name,
+ void pass:[*]data),
+ void pass:[*]_data_);
--
DESCRIPTION
@@ -22,11 +28,13 @@ directories. The _tep_ argument is trace event parser context.
The plugin directories are :
[verse]
--
+ - Directories, specified in _tep_->plugins_dir with priority TEP_PLUGIN_FIRST
- System's plugin directory, defined at the library compile time. It
depends on the library installation prefix and usually is
_(install_preffix)/lib/traceevent/plugins_
- Directory, defined by the environment variable _TRACEEVENT_PLUGIN_DIR_
- User's plugin directory, located at _~/.local/lib/traceevent/plugins_
+ - Directories, specified in _tep_->plugins_dir with priority TEP_PLUGIN_LAST
--
Loading of plugins can be controlled by the _tep_flags_, using the
_tep_set_flag()_ API:
@@ -44,6 +52,12 @@ _tep_load_plugins()_. The _tep_ argument is trace event parser context. The
_plugin_list_ is the list of loaded plugins, returned by
the _tep_load_plugins()_ function.
+The _tep_load_plugins_hook_ function walks through all directories with plugins
+and calls user specified _load_plugin()_ hook for each plugin file. Only files
+with given _suffix_ are considered to be plugins. The _data_ is a user specified
+context, passed to _load_plugin()_. Directories and the walk order are the same
+as in _tep_load_plugins()_ API.
+
RETURN VALUE
------------
The _tep_load_plugins()_ function returns a list of successfully loaded plugins,
@@ -63,6 +77,15 @@ if (plugins == NULL) {
}
...
tep_unload_plugins(plugins, tep);
+...
+void print_plugin(struct tep_handle *tep, const char *path,
+ const char *name, void *data)
+{
+ pritnf("Found libtraceevent plugin %s/%s\n", path, name);
+}
+...
+tep_load_plugins_hook(tep, ".so", print_plugin, NULL);
+...
--
FILES
diff --git a/tools/lib/traceevent/event-parse-local.h b/tools/lib/traceevent/event-parse-local.h
index cee469803a34..d805a920af6f 100644
--- a/tools/lib/traceevent/event-parse-local.h
+++ b/tools/lib/traceevent/event-parse-local.h
@@ -13,6 +13,7 @@ struct func_map;
struct func_list;
struct event_handler;
struct func_resolver;
+struct tep_plugins_dir;
struct tep_handle {
int ref_count;
@@ -47,7 +48,6 @@ struct tep_handle {
struct printk_list *printklist;
unsigned int printk_count;
-
struct tep_event **events;
int nr_events;
struct tep_event **sort_events;
@@ -81,10 +81,30 @@ struct tep_handle {
/* cache */
struct tep_event *last_event;
+
+ struct tep_plugins_dir *plugins_dir;
+};
+
+enum tep_print_parse_type {
+ PRINT_FMT_STRING,
+ PRINT_FMT_ARG_DIGIT,
+ PRINT_FMT_ARG_POINTER,
+ PRINT_FMT_ARG_STRING,
+};
+
+struct tep_print_parse {
+ struct tep_print_parse *next;
+
+ char *format;
+ int ls;
+ enum tep_print_parse_type type;
+ struct tep_print_arg *arg;
+ struct tep_print_arg *len_as_arg;
};
void tep_free_event(struct tep_event *event);
void tep_free_format_field(struct tep_format_field *field);
+void tep_free_plugin_paths(struct tep_handle *tep);
unsigned short tep_data2host2(struct tep_handle *tep, unsigned short data);
unsigned int tep_data2host4(struct tep_handle *tep, unsigned int data);
diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index ba4f33804af1..3ba566de821c 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -4565,43 +4565,93 @@ get_bprint_format(void *data, int size __maybe_unused,
return format;
}
-static void print_mac_arg(struct trace_seq *s, int mac, void *data, int size,
- struct tep_event *event, struct tep_print_arg *arg)
+static int print_mac_arg(struct trace_seq *s, const char *format,
+ void *data, int size, struct tep_event *event,
+ struct tep_print_arg *arg)
{
- unsigned char *buf;
const char *fmt = "%.2x:%.2x:%.2x:%.2x:%.2x:%.2x";
+ bool reverse = false;
+ unsigned char *buf;
+ int ret = 0;
if (arg->type == TEP_PRINT_FUNC) {
process_defined_func(s, data, size, event, arg);
- return;
+ return 0;
}
if (arg->type != TEP_PRINT_FIELD) {
trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d",
arg->type);
- return;
+ return 0;
}
- if (mac == 'm')
+ if (format[0] == 'm') {
fmt = "%.2x%.2x%.2x%.2x%.2x%.2x";
+ } else if (format[0] == 'M' && format[1] == 'F') {
+ fmt = "%.2x-%.2x-%.2x-%.2x-%.2x-%.2x";
+ ret++;
+ }
+ if (format[1] == 'R') {
+ reverse = true;
+ ret++;
+ }
+
if (!arg->field.field) {
arg->field.field =
tep_find_any_field(event, arg->field.name);
if (!arg->field.field) {
do_warning_event(event, "%s: field %s not found",
__func__, arg->field.name);
- return;
+ return ret;
}
}
if (arg->field.field->size != 6) {
trace_seq_printf(s, "INVALIDMAC");
- return;
+ return ret;
}
+
buf = data + arg->field.field->offset;
- trace_seq_printf(s, fmt, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
+ if (reverse)
+ trace_seq_printf(s, fmt, buf[5], buf[4], buf[3], buf[2], buf[1], buf[0]);
+ else
+ trace_seq_printf(s, fmt, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
+
+ return ret;
}
-static void print_ip4_addr(struct trace_seq *s, char i, unsigned char *buf)
+static int parse_ip4_print_args(struct tep_handle *tep,
+ const char *ptr, bool *reverse)
+{
+ int ret = 0;
+
+ *reverse = false;
+
+ /* hnbl */
+ switch (*ptr) {
+ case 'h':
+ if (tep->file_bigendian)
+ *reverse = false;
+ else
+ *reverse = true;
+ ret++;
+ break;
+ case 'l':
+ *reverse = true;
+ ret++;
+ break;
+ case 'n':
+ case 'b':
+ ret++;
+ /* fall through */
+ default:
+ *reverse = false;
+ break;
+ }
+
+ return ret;
+}
+
+static void print_ip4_addr(struct trace_seq *s, char i, bool reverse, unsigned char *buf)
{
const char *fmt;
@@ -4610,7 +4660,11 @@ static void print_ip4_addr(struct trace_seq *s, char i, unsigned char *buf)
else
fmt = "%d.%d.%d.%d";
- trace_seq_printf(s, fmt, buf[0], buf[1], buf[2], buf[3]);
+ if (reverse)
+ trace_seq_printf(s, fmt, buf[3], buf[2], buf[1], buf[0]);
+ else
+ trace_seq_printf(s, fmt, buf[0], buf[1], buf[2], buf[3]);
+
}
static inline bool ipv6_addr_v4mapped(const struct in6_addr *a)
@@ -4693,7 +4747,7 @@ static void print_ip6c_addr(struct trace_seq *s, unsigned char *addr)
if (useIPv4) {
if (needcolon)
trace_seq_printf(s, ":");
- print_ip4_addr(s, 'I', &in6.s6_addr[12]);
+ print_ip4_addr(s, 'I', false, &in6.s6_addr[12]);
}
return;
@@ -4722,16 +4776,20 @@ static int print_ipv4_arg(struct trace_seq *s, const char *ptr, char i,
void *data, int size, struct tep_event *event,
struct tep_print_arg *arg)
{
+ bool reverse = false;
unsigned char *buf;
+ int ret;
+
+ ret = parse_ip4_print_args(event->tep, ptr, &reverse);
if (arg->type == TEP_PRINT_FUNC) {
process_defined_func(s, data, size, event, arg);
- return 0;
+ return ret;
}
if (arg->type != TEP_PRINT_FIELD) {
trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type);
- return 0;
+ return ret;
}
if (!arg->field.field) {
@@ -4740,7 +4798,7 @@ static int print_ipv4_arg(struct trace_seq *s, const char *ptr, char i,
if (!arg->field.field) {
do_warning("%s: field %s not found",
__func__, arg->field.name);
- return 0;
+ return ret;
}
}
@@ -4748,11 +4806,12 @@ static int print_ipv4_arg(struct trace_seq *s, const char *ptr, char i,
if (arg->field.field->size != 4) {
trace_seq_printf(s, "INVALIDIPv4");
- return 0;
+ return ret;
}
- print_ip4_addr(s, i, buf);
- return 0;
+ print_ip4_addr(s, i, reverse, buf);
+ return ret;
+
}
static int print_ipv6_arg(struct trace_seq *s, const char *ptr, char i,
@@ -4812,7 +4871,9 @@ static int print_ipsa_arg(struct trace_seq *s, const char *ptr, char i,
char have_c = 0, have_p = 0;
unsigned char *buf;
struct sockaddr_storage *sa;
+ bool reverse = false;
int rc = 0;
+ int ret;
/* pISpc */
if (i == 'I') {
@@ -4827,6 +4888,9 @@ static int print_ipsa_arg(struct trace_seq *s, const char *ptr, char i,
rc++;
}
}
+ ret = parse_ip4_print_args(event->tep, ptr, &reverse);
+ ptr += ret;
+ rc += ret;
if (arg->type == TEP_PRINT_FUNC) {
process_defined_func(s, data, size, event, arg);
@@ -4858,7 +4922,7 @@ static int print_ipsa_arg(struct trace_seq *s, const char *ptr, char i,
return rc;
}
- print_ip4_addr(s, i, (unsigned char *) &sa4->sin_addr);
+ print_ip4_addr(s, i, reverse, (unsigned char *) &sa4->sin_addr);
if (have_p)
trace_seq_printf(s, ":%d", ntohs(sa4->sin_port));
@@ -4892,25 +4956,20 @@ static int print_ip_arg(struct trace_seq *s, const char *ptr,
struct tep_print_arg *arg)
{
char i = *ptr; /* 'i' or 'I' */
- char ver;
- int rc = 0;
+ int rc = 1;
+ /* IP version */
ptr++;
- rc++;
- ver = *ptr;
- ptr++;
- rc++;
-
- switch (ver) {
+ switch (*ptr) {
case '4':
- rc += print_ipv4_arg(s, ptr, i, data, size, event, arg);
+ rc += print_ipv4_arg(s, ptr + 1, i, data, size, event, arg);
break;
case '6':
- rc += print_ipv6_arg(s, ptr, i, data, size, event, arg);
+ rc += print_ipv6_arg(s, ptr + 1, i, data, size, event, arg);
break;
case 'S':
- rc += print_ipsa_arg(s, ptr, i, data, size, event, arg);
+ rc += print_ipsa_arg(s, ptr + 1, i, data, size, event, arg);
break;
default:
return 0;
@@ -4919,6 +4978,133 @@ static int print_ip_arg(struct trace_seq *s, const char *ptr,
return rc;
}
+static const int guid_index[16] = {3, 2, 1, 0, 5, 4, 7, 6, 8, 9, 10, 11, 12, 13, 14, 15};
+static const int uuid_index[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
+
+static int print_uuid_arg(struct trace_seq *s, const char *ptr,
+ void *data, int size, struct tep_event *event,
+ struct tep_print_arg *arg)
+{
+ const int *index = uuid_index;
+ char *format = "%02x";
+ int ret = 0;
+ char *buf;
+ int i;
+
+ switch (*(ptr + 1)) {
+ case 'L':
+ format = "%02X";
+ /* fall through */
+ case 'l':
+ index = guid_index;
+ ret++;
+ break;
+ case 'B':
+ format = "%02X";
+ /* fall through */
+ case 'b':
+ ret++;
+ break;
+ }
+
+ if (arg->type == TEP_PRINT_FUNC) {
+ process_defined_func(s, data, size, event, arg);
+ return ret;
+ }
+
+ if (arg->type != TEP_PRINT_FIELD) {
+ trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type);
+ return ret;
+ }
+
+ if (!arg->field.field) {
+ arg->field.field =
+ tep_find_any_field(event, arg->field.name);
+ if (!arg->field.field) {
+ do_warning("%s: field %s not found",
+ __func__, arg->field.name);
+ return ret;
+ }
+ }
+
+ if (arg->field.field->size != 16) {
+ trace_seq_printf(s, "INVALIDUUID");
+ return ret;
+ }
+
+ buf = data + arg->field.field->offset;
+
+ for (i = 0; i < 16; i++) {
+ trace_seq_printf(s, format, buf[index[i]] & 0xff);
+ switch (i) {
+ case 3:
+ case 5:
+ case 7:
+ case 9:
+ trace_seq_printf(s, "-");
+ break;
+ }
+ }
+
+ return ret;
+}
+
+static int print_raw_buff_arg(struct trace_seq *s, const char *ptr,
+ void *data, int size, struct tep_event *event,
+ struct tep_print_arg *arg, int print_len)
+{
+ int plen = print_len;
+ char *delim = " ";
+ int ret = 0;
+ char *buf;
+ int i;
+ unsigned long offset;
+ int arr_len;
+
+ switch (*(ptr + 1)) {
+ case 'C':
+ delim = ":";
+ ret++;
+ break;
+ case 'D':
+ delim = "-";
+ ret++;
+ break;
+ case 'N':
+ delim = "";
+ ret++;
+ break;
+ }
+
+ if (arg->type == TEP_PRINT_FUNC) {
+ process_defined_func(s, data, size, event, arg);
+ return ret;
+ }
+
+ if (arg->type != TEP_PRINT_DYNAMIC_ARRAY) {
+ trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type);
+ return ret;
+ }
+
+ offset = tep_read_number(event->tep,
+ data + arg->dynarray.field->offset,
+ arg->dynarray.field->size);
+ arr_len = (unsigned long long)(offset >> 16);
+ buf = data + (offset & 0xffff);
+
+ if (arr_len < plen)
+ plen = arr_len;
+
+ if (plen < 1)
+ return ret;
+
+ trace_seq_printf(s, "%02x", buf[0] & 0xff);
+ for (i = 1; i < plen; i++)
+ trace_seq_printf(s, "%s%02x", delim, buf[i] & 0xff);
+
+ return ret;
+}
+
static int is_printable_array(char *p, unsigned int len)
{
unsigned int i;
@@ -5007,264 +5193,567 @@ void tep_print_fields(struct trace_seq *s, void *data,
}
}
-static void pretty_print(struct trace_seq *s, void *data, int size, struct tep_event *event)
+static int print_function(struct trace_seq *s, const char *format,
+ void *data, int size, struct tep_event *event,
+ struct tep_print_arg *arg)
{
- struct tep_handle *tep = event->tep;
- struct tep_print_fmt *print_fmt = &event->print_fmt;
- struct tep_print_arg *arg = print_fmt->args;
- struct tep_print_arg *args = NULL;
- const char *ptr = print_fmt->format;
- unsigned long long val;
struct func_map *func;
- const char *saveptr;
- struct trace_seq p;
- char *bprint_fmt = NULL;
- char format[32];
- int show_func;
- int len_as_arg;
- int len_arg = 0;
- int len;
- int ls;
+ unsigned long long val;
- if (event->flags & TEP_EVENT_FL_FAILED) {
- trace_seq_printf(s, "[FAILED TO PARSE]");
- tep_print_fields(s, data, size, event);
- return;
+ val = eval_num_arg(data, size, event, arg);
+ func = find_func(event->tep, val);
+ if (func) {
+ trace_seq_puts(s, func->func);
+ if (*format == 'F' || *format == 'S')
+ trace_seq_printf(s, "+0x%llx", val - func->addr);
+ } else {
+ if (event->tep->long_size == 4)
+ trace_seq_printf(s, "0x%lx", (long)val);
+ else
+ trace_seq_printf(s, "0x%llx", (long long)val);
}
- if (event->flags & TEP_EVENT_FL_ISBPRINT) {
- bprint_fmt = get_bprint_format(data, size, event);
- args = make_bprint_args(bprint_fmt, data, size, event);
- arg = args;
- ptr = bprint_fmt;
+ return 0;
+}
+
+static int print_arg_pointer(struct trace_seq *s, const char *format, int plen,
+ void *data, int size,
+ struct tep_event *event, struct tep_print_arg *arg)
+{
+ unsigned long long val;
+ int ret = 1;
+
+ if (arg->type == TEP_PRINT_BSTRING) {
+ trace_seq_puts(s, arg->string.string);
+ return 0;
+ }
+ while (*format) {
+ if (*format == 'p') {
+ format++;
+ break;
+ }
+ format++;
}
- for (; *ptr; ptr++) {
- ls = 0;
- if (*ptr == '\\') {
- ptr++;
- switch (*ptr) {
+ switch (*format) {
+ case 'F':
+ case 'f':
+ case 'S':
+ case 's':
+ ret += print_function(s, format, data, size, event, arg);
+ break;
+ case 'M':
+ case 'm':
+ ret += print_mac_arg(s, format, data, size, event, arg);
+ break;
+ case 'I':
+ case 'i':
+ ret += print_ip_arg(s, format, data, size, event, arg);
+ break;
+ case 'U':
+ ret += print_uuid_arg(s, format, data, size, event, arg);
+ break;
+ case 'h':
+ ret += print_raw_buff_arg(s, format, data, size, event, arg, plen);
+ break;
+ default:
+ ret = 0;
+ val = eval_num_arg(data, size, event, arg);
+ trace_seq_printf(s, "%p", (void *)val);
+ break;
+ }
+
+ return ret;
+
+}
+
+static int print_arg_number(struct trace_seq *s, const char *format, int plen,
+ void *data, int size, int ls,
+ struct tep_event *event, struct tep_print_arg *arg)
+{
+ unsigned long long val;
+
+ val = eval_num_arg(data, size, event, arg);
+
+ switch (ls) {
+ case -2:
+ if (plen >= 0)
+ trace_seq_printf(s, format, plen, (char)val);
+ else
+ trace_seq_printf(s, format, (char)val);
+ break;
+ case -1:
+ if (plen >= 0)
+ trace_seq_printf(s, format, plen, (short)val);
+ else
+ trace_seq_printf(s, format, (short)val);
+ break;
+ case 0:
+ if (plen >= 0)
+ trace_seq_printf(s, format, plen, (int)val);
+ else
+ trace_seq_printf(s, format, (int)val);
+ break;
+ case 1:
+ if (plen >= 0)
+ trace_seq_printf(s, format, plen, (long)val);
+ else
+ trace_seq_printf(s, format, (long)val);
+ break;
+ case 2:
+ if (plen >= 0)
+ trace_seq_printf(s, format, plen, (long long)val);
+ else
+ trace_seq_printf(s, format, (long long)val);
+ break;
+ default:
+ do_warning_event(event, "bad count (%d)", ls);
+ event->flags |= TEP_EVENT_FL_FAILED;
+ }
+ return 0;
+}
+
+
+static void print_arg_string(struct trace_seq *s, const char *format, int plen,
+ void *data, int size,
+ struct tep_event *event, struct tep_print_arg *arg)
+{
+ struct trace_seq p;
+
+ /* Use helper trace_seq */
+ trace_seq_init(&p);
+ print_str_arg(&p, data, size, event,
+ format, plen, arg);
+ trace_seq_terminate(&p);
+ trace_seq_puts(s, p.buffer);
+ trace_seq_destroy(&p);
+}
+
+static int parse_arg_format_pointer(const char *format)
+{
+ int ret = 0;
+ int index;
+ int loop;
+
+ switch (*format) {
+ case 'F':
+ case 'S':
+ case 'f':
+ case 's':
+ ret++;
+ break;
+ case 'M':
+ case 'm':
+ /* [mM]R , [mM]F */
+ switch (format[1]) {
+ case 'R':
+ case 'F':
+ ret++;
+ break;
+ }
+ ret++;
+ break;
+ case 'I':
+ case 'i':
+ index = 2;
+ loop = 1;
+ switch (format[1]) {
+ case 'S':
+ /*[S][pfs]*/
+ while (loop) {
+ switch (format[index]) {
+ case 'p':
+ case 'f':
+ case 's':
+ ret++;
+ index++;
+ break;
+ default:
+ loop = 0;
+ break;
+ }
+ }
+ /* fall through */
+ case '4':
+ /* [4S][hnbl] */
+ switch (format[index]) {
+ case 'h':
case 'n':
- trace_seq_putc(s, '\n');
- break;
- case 't':
- trace_seq_putc(s, '\t');
- break;
- case 'r':
- trace_seq_putc(s, '\r');
- break;
- case '\\':
- trace_seq_putc(s, '\\');
+ case 'l':
+ case 'b':
+ ret++;
+ index++;
break;
- default:
- trace_seq_putc(s, *ptr);
+ }
+ if (format[1] == '4') {
+ ret++;
break;
}
+ /* fall through */
+ case '6':
+ /* [6S]c */
+ if (format[index] == 'c')
+ ret++;
+ ret++;
+ break;
+ }
+ ret++;
+ break;
+ case 'U':
+ switch (format[1]) {
+ case 'L':
+ case 'l':
+ case 'B':
+ case 'b':
+ ret++;
+ break;
+ }
+ ret++;
+ break;
+ case 'h':
+ switch (format[1]) {
+ case 'C':
+ case 'D':
+ case 'N':
+ ret++;
+ break;
+ }
+ ret++;
+ break;
+ default:
+ break;
+ }
- } else if (*ptr == '%') {
- saveptr = ptr;
- show_func = 0;
- len_as_arg = 0;
- cont_process:
- ptr++;
- switch (*ptr) {
- case '%':
- trace_seq_putc(s, '%');
- break;
- case '#':
- /* FIXME: need to handle properly */
- goto cont_process;
- case 'h':
- ls--;
- goto cont_process;
- case 'l':
- ls++;
- goto cont_process;
- case 'L':
- ls = 2;
- goto cont_process;
- case '*':
- /* The argument is the length. */
- if (!arg) {
- do_warning_event(event, "no argument match");
- event->flags |= TEP_EVENT_FL_FAILED;
- goto out_failed;
- }
- len_arg = eval_num_arg(data, size, event, arg);
- len_as_arg = 1;
- arg = arg->next;
- goto cont_process;
- case '.':
- case 'z':
- case 'Z':
- case '0' ... '9':
- case '-':
- goto cont_process;
- case 'p':
- if (tep->long_size == 4)
- ls = 1;
- else
- ls = 2;
+ return ret;
+}
- if (isalnum(ptr[1]))
- ptr++;
+static void free_parse_args(struct tep_print_parse *arg)
+{
+ struct tep_print_parse *del;
- if (arg->type == TEP_PRINT_BSTRING) {
- trace_seq_puts(s, arg->string.string);
- arg = arg->next;
- break;
- }
+ while (arg) {
+ del = arg;
+ arg = del->next;
+ free(del->format);
+ free(del);
+ }
+}
- if (*ptr == 'F' || *ptr == 'f' ||
- *ptr == 'S' || *ptr == 's') {
- show_func = *ptr;
- } else if (*ptr == 'M' || *ptr == 'm') {
- print_mac_arg(s, *ptr, data, size, event, arg);
- arg = arg->next;
- break;
- } else if (*ptr == 'I' || *ptr == 'i') {
- int n;
+static int parse_arg_add(struct tep_print_parse **parse, char *format,
+ enum tep_print_parse_type type,
+ struct tep_print_arg *arg,
+ struct tep_print_arg *len_as_arg,
+ int ls)
+{
+ struct tep_print_parse *parg = NULL;
- n = print_ip_arg(s, ptr, data, size, event, arg);
- if (n > 0) {
- ptr += n - 1;
- arg = arg->next;
- break;
- }
- }
+ parg = calloc(1, sizeof(*parg));
+ if (!parg)
+ goto error;
+ parg->format = strdup(format);
+ if (!parg->format)
+ goto error;
+ parg->type = type;
+ parg->arg = arg;
+ parg->len_as_arg = len_as_arg;
+ parg->ls = ls;
+ *parse = parg;
+ return 0;
+error:
+ if (parg) {
+ free(parg->format);
+ free(parg);
+ }
+ return -1;
+}
- /* fall through */
- case 'd':
- case 'u':
- case 'i':
- case 'x':
- case 'X':
- case 'o':
- if (!arg) {
- do_warning_event(event, "no argument match");
- event->flags |= TEP_EVENT_FL_FAILED;
- goto out_failed;
- }
+static int parse_arg_format(struct tep_print_parse **parse,
+ struct tep_event *event,
+ const char *format, struct tep_print_arg **arg)
+{
+ struct tep_print_arg *len_arg = NULL;
+ char print_format[32];
+ const char *start = format;
+ int ret = 0;
+ int ls = 0;
+ int res;
+ int len;
- len = ((unsigned long)ptr + 1) -
- (unsigned long)saveptr;
+ format++;
+ ret++;
+ for (; *format; format++) {
+ switch (*format) {
+ case '#':
+ /* FIXME: need to handle properly */
+ break;
+ case 'h':
+ ls--;
+ break;
+ case 'l':
+ ls++;
+ break;
+ case 'L':
+ ls = 2;
+ break;
+ case '.':
+ case 'z':
+ case 'Z':
+ case '0' ... '9':
+ case '-':
+ break;
+ case '*':
+ /* The argument is the length. */
+ if (!*arg) {
+ do_warning_event(event, "no argument match");
+ event->flags |= TEP_EVENT_FL_FAILED;
+ goto out_failed;
+ }
+ if (len_arg) {
+ do_warning_event(event, "argument already matched");
+ event->flags |= TEP_EVENT_FL_FAILED;
+ goto out_failed;
+ }
+ len_arg = *arg;
+ *arg = (*arg)->next;
+ break;
+ case 'p':
+ if (!*arg) {
+ do_warning_event(event, "no argument match");
+ event->flags |= TEP_EVENT_FL_FAILED;
+ goto out_failed;
+ }
+ res = parse_arg_format_pointer(format + 1);
+ if (res > 0) {
+ format += res;
+ ret += res;
+ }
+ len = ((unsigned long)format + 1) -
+ (unsigned long)start;
+ /* should never happen */
+ if (len > 31) {
+ do_warning_event(event, "bad format!");
+ event->flags |= TEP_EVENT_FL_FAILED;
+ len = 31;
+ }
+ memcpy(print_format, start, len);
+ print_format[len] = 0;
- /* should never happen */
- if (len > 31) {
- do_warning_event(event, "bad format!");
- event->flags |= TEP_EVENT_FL_FAILED;
- len = 31;
- }
+ parse_arg_add(parse, print_format,
+ PRINT_FMT_ARG_POINTER, *arg, len_arg, ls);
+ *arg = (*arg)->next;
+ ret++;
+ return ret;
+ case 'd':
+ case 'u':
+ case 'i':
+ case 'x':
+ case 'X':
+ case 'o':
+ if (!*arg) {
+ do_warning_event(event, "no argument match");
+ event->flags |= TEP_EVENT_FL_FAILED;
+ goto out_failed;
+ }
- memcpy(format, saveptr, len);
- format[len] = 0;
+ len = ((unsigned long)format + 1) -
+ (unsigned long)start;
- val = eval_num_arg(data, size, event, arg);
- arg = arg->next;
+ /* should never happen */
+ if (len > 30) {
+ do_warning_event(event, "bad format!");
+ event->flags |= TEP_EVENT_FL_FAILED;
+ len = 31;
+ }
+ memcpy(print_format, start, len);
+ print_format[len] = 0;
- if (show_func) {
- func = find_func(tep, val);
- if (func) {
- trace_seq_puts(s, func->func);
- if (show_func == 'F')
- trace_seq_printf(s,
- "+0x%llx",
- val - func->addr);
- break;
- }
- }
- if (tep->long_size == 8 && ls == 1 &&
- sizeof(long) != 8) {
- char *p;
-
- /* make %l into %ll */
- if (ls == 1 && (p = strchr(format, 'l')))
- memmove(p+1, p, strlen(p)+1);
- else if (strcmp(format, "%p") == 0)
- strcpy(format, "0x%llx");
- ls = 2;
- }
- switch (ls) {
- case -2:
- if (len_as_arg)
- trace_seq_printf(s, format, len_arg, (char)val);
- else
- trace_seq_printf(s, format, (char)val);
- break;
- case -1:
- if (len_as_arg)
- trace_seq_printf(s, format, len_arg, (short)val);
- else
- trace_seq_printf(s, format, (short)val);
- break;
- case 0:
- if (len_as_arg)
- trace_seq_printf(s, format, len_arg, (int)val);
- else
- trace_seq_printf(s, format, (int)val);
- break;
- case 1:
- if (len_as_arg)
- trace_seq_printf(s, format, len_arg, (long)val);
- else
- trace_seq_printf(s, format, (long)val);
- break;
- case 2:
- if (len_as_arg)
- trace_seq_printf(s, format, len_arg,
- (long long)val);
- else
- trace_seq_printf(s, format, (long long)val);
- break;
- default:
- do_warning_event(event, "bad count (%d)", ls);
- event->flags |= TEP_EVENT_FL_FAILED;
- }
- break;
- case 's':
- if (!arg) {
- do_warning_event(event, "no matching argument");
- event->flags |= TEP_EVENT_FL_FAILED;
- goto out_failed;
- }
+ if (event->tep->long_size == 8 && ls == 1 &&
+ sizeof(long) != 8) {
+ char *p;
+
+ /* make %l into %ll */
+ if (ls == 1 && (p = strchr(print_format, 'l')))
+ memmove(p+1, p, strlen(p)+1);
+ ls = 2;
+ }
+ if (ls < -2 || ls > 2) {
+ do_warning_event(event, "bad count (%d)", ls);
+ event->flags |= TEP_EVENT_FL_FAILED;
+ }
+ parse_arg_add(parse, print_format,
+ PRINT_FMT_ARG_DIGIT, *arg, len_arg, ls);
+ *arg = (*arg)->next;
+ ret++;
+ return ret;
+ case 's':
+ if (!*arg) {
+ do_warning_event(event, "no matching argument");
+ event->flags |= TEP_EVENT_FL_FAILED;
+ goto out_failed;
+ }
- len = ((unsigned long)ptr + 1) -
- (unsigned long)saveptr;
+ len = ((unsigned long)format + 1) -
+ (unsigned long)start;
- /* should never happen */
- if (len > 31) {
- do_warning_event(event, "bad format!");
- event->flags |= TEP_EVENT_FL_FAILED;
- len = 31;
- }
+ /* should never happen */
+ if (len > 31) {
+ do_warning_event(event, "bad format!");
+ event->flags |= TEP_EVENT_FL_FAILED;
+ len = 31;
+ }
+
+ memcpy(print_format, start, len);
+ print_format[len] = 0;
+
+ parse_arg_add(parse, print_format,
+ PRINT_FMT_ARG_STRING, *arg, len_arg, 0);
+ *arg = (*arg)->next;
+ ret++;
+ return ret;
+ default:
+ snprintf(print_format, 32, ">%c<", *format);
+ parse_arg_add(parse, print_format,
+ PRINT_FMT_STRING, NULL, NULL, 0);
+ ret++;
+ return ret;
+ }
+ ret++;
+ }
+
+out_failed:
+ return ret;
- memcpy(format, saveptr, len);
- format[len] = 0;
- if (!len_as_arg)
- len_arg = -1;
- /* Use helper trace_seq */
- trace_seq_init(&p);
- print_str_arg(&p, data, size, event,
- format, len_arg, arg);
- trace_seq_terminate(&p);
- trace_seq_puts(s, p.buffer);
- trace_seq_destroy(&p);
- arg = arg->next;
+}
+
+static int parse_arg_string(struct tep_print_parse **parse, const char *format)
+{
+ struct trace_seq s;
+ int ret = 0;
+
+ trace_seq_init(&s);
+ for (; *format; format++) {
+ if (*format == '\\') {
+ format++;
+ ret++;
+ switch (*format) {
+ case 'n':
+ trace_seq_putc(&s, '\n');
+ break;
+ case 't':
+ trace_seq_putc(&s, '\t');
+ break;
+ case 'r':
+ trace_seq_putc(&s, '\r');
+ break;
+ case '\\':
+ trace_seq_putc(&s, '\\');
break;
default:
- trace_seq_printf(s, ">%c<", *ptr);
-
+ trace_seq_putc(&s, *format);
+ break;
}
+ } else if (*format == '%') {
+ if (*(format + 1) == '%') {
+ trace_seq_putc(&s, '%');
+ format++;
+ ret++;
+ } else
+ break;
} else
- trace_seq_putc(s, *ptr);
+ trace_seq_putc(&s, *format);
+
+ ret++;
+ }
+ trace_seq_terminate(&s);
+ parse_arg_add(parse, s.buffer, PRINT_FMT_STRING, NULL, NULL, 0);
+ trace_seq_destroy(&s);
+
+ return ret;
+}
+
+static struct tep_print_parse *
+parse_args(struct tep_event *event, const char *format, struct tep_print_arg *arg)
+{
+ struct tep_print_parse *parse_ret = NULL;
+ struct tep_print_parse **parse = NULL;
+ int ret;
+ int len;
+
+ len = strlen(format);
+ while (*format) {
+ if (!parse_ret)
+ parse = &parse_ret;
+ if (*format == '%' && *(format + 1) != '%')
+ ret = parse_arg_format(parse, event, format, &arg);
+ else
+ ret = parse_arg_string(parse, format);
+ if (*parse)
+ parse = &((*parse)->next);
+
+ len -= ret;
+ if (len > 0)
+ format += ret;
+ else
+ break;
+ }
+ return parse_ret;
+}
+
+static void print_event_cache(struct tep_print_parse *parse, struct trace_seq *s,
+ void *data, int size, struct tep_event *event)
+{
+ int len_arg;
+
+ while (parse) {
+ if (parse->len_as_arg)
+ len_arg = eval_num_arg(data, size, event, parse->len_as_arg);
+ switch (parse->type) {
+ case PRINT_FMT_ARG_DIGIT:
+ print_arg_number(s, parse->format,
+ parse->len_as_arg ? len_arg : -1, data,
+ size, parse->ls, event, parse->arg);
+ break;
+ case PRINT_FMT_ARG_POINTER:
+ print_arg_pointer(s, parse->format,
+ parse->len_as_arg ? len_arg : 1,
+ data, size, event, parse->arg);
+ break;
+ case PRINT_FMT_ARG_STRING:
+ print_arg_string(s, parse->format,
+ parse->len_as_arg ? len_arg : -1,
+ data, size, event, parse->arg);
+ break;
+ case PRINT_FMT_STRING:
+ default:
+ trace_seq_printf(s, "%s", parse->format);
+ break;
+ }
+ parse = parse->next;
}
+}
+
+static void pretty_print(struct trace_seq *s, void *data, int size, struct tep_event *event)
+{
+ struct tep_print_parse *parse = event->print_fmt.print_cache;
+ struct tep_print_arg *args = NULL;
+ char *bprint_fmt = NULL;
if (event->flags & TEP_EVENT_FL_FAILED) {
-out_failed:
trace_seq_printf(s, "[FAILED TO PARSE]");
+ tep_print_fields(s, data, size, event);
+ return;
}
- if (args) {
+ if (event->flags & TEP_EVENT_FL_ISBPRINT) {
+ bprint_fmt = get_bprint_format(data, size, event);
+ args = make_bprint_args(bprint_fmt, data, size, event);
+ parse = parse_args(event, bprint_fmt, args);
+ }
+
+ print_event_cache(parse, s, data, size, event);
+
+ if (event->flags & TEP_EVENT_FL_ISBPRINT) {
+ free_parse_args(parse);
free_args(args);
free(bprint_fmt);
}
@@ -6363,9 +6852,13 @@ enum tep_errno __tep_parse_format(struct tep_event **eventp,
*list = arg;
list = &arg->next;
}
- return 0;
}
+ if (!(event->flags & TEP_EVENT_FL_ISBPRINT))
+ event->print_fmt.print_cache = parse_args(event,
+ event->print_fmt.format,
+ event->print_fmt.args);
+
return 0;
event_parse_failed:
@@ -7032,7 +7525,7 @@ void tep_free_event(struct tep_event *event)
free(event->print_fmt.format);
free_args(event->print_fmt.args);
-
+ free_parse_args(event->print_fmt.print_cache);
free(event);
}
@@ -7120,6 +7613,7 @@ void tep_free(struct tep_handle *tep)
free(tep->events);
free(tep->sort_events);
free(tep->func_resolver);
+ tep_free_plugin_paths(tep);
free(tep);
}
diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h
index b77837f75a0d..c29b693e31ee 100644
--- a/tools/lib/traceevent/event-parse.h
+++ b/tools/lib/traceevent/event-parse.h
@@ -1,21 +1,7 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
/*
* Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
*
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License (not later!)
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, see <http://www.gnu.org/licenses>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
#ifndef _PARSE_EVENTS_H
#define _PARSE_EVENTS_H
@@ -272,9 +258,12 @@ struct tep_print_arg {
};
};
+struct tep_print_parse;
+
struct tep_print_fmt {
char *format;
struct tep_print_arg *args;
+ struct tep_print_parse *print_cache;
};
struct tep_event {
@@ -379,7 +368,7 @@ enum tep_errno {
* errno since SUS requires the errno has distinct positive values.
* See 'Issue 6' in the link below.
*
- * http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html
+ * https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html
*/
__TEP_ERRNO__START = -100000,
@@ -393,14 +382,29 @@ struct tep_plugin_list;
#define INVALID_PLUGIN_LIST_OPTION ((char **)((unsigned long)-1))
+enum tep_plugin_load_priority {
+ TEP_PLUGIN_FIRST,
+ TEP_PLUGIN_LAST,
+};
+
+int tep_add_plugin_path(struct tep_handle *tep, char *path,
+ enum tep_plugin_load_priority prio);
struct tep_plugin_list *tep_load_plugins(struct tep_handle *tep);
void tep_unload_plugins(struct tep_plugin_list *plugin_list,
struct tep_handle *tep);
+void tep_load_plugins_hook(struct tep_handle *tep, const char *suffix,
+ void (*load_plugin)(struct tep_handle *tep,
+ const char *path,
+ const char *name,
+ void *data),
+ void *data);
char **tep_plugin_list_options(void);
void tep_plugin_free_options_list(char **list);
int tep_plugin_add_options(const char *name,
struct tep_plugin_option *options);
+int tep_plugin_add_option(const char *name, const char *val);
void tep_plugin_remove_options(struct tep_plugin_option *options);
+void tep_plugin_print_options(struct trace_seq *s);
void tep_print_plugins(struct trace_seq *s,
const char *prefix, const char *suffix,
const struct tep_plugin_list *list);
diff --git a/tools/lib/traceevent/event-plugin.c b/tools/lib/traceevent/event-plugin.c
index e1f7ddd5a6cf..e7c2acb8680f 100644
--- a/tools/lib/traceevent/event-plugin.c
+++ b/tools/lib/traceevent/event-plugin.c
@@ -13,6 +13,7 @@
#include <sys/stat.h>
#include <unistd.h>
#include <dirent.h>
+#include <errno.h>
#include "event-parse.h"
#include "event-parse-local.h"
#include "event-utils.h"
@@ -38,6 +39,12 @@ struct tep_plugin_list {
void *handle;
};
+struct tep_plugins_dir {
+ struct tep_plugins_dir *next;
+ char *path;
+ enum tep_plugin_load_priority prio;
+};
+
static void lower_case(char *str)
{
if (!str)
@@ -247,6 +254,170 @@ void tep_plugin_remove_options(struct tep_plugin_option *options)
}
}
+static int parse_option_name(char **option, char **plugin)
+{
+ char *p;
+
+ *plugin = NULL;
+
+ if ((p = strstr(*option, ":"))) {
+ *plugin = *option;
+ *p = '\0';
+ *option = strdup(p + 1);
+ if (!*option)
+ return -1;
+ }
+ return 0;
+}
+
+static struct tep_plugin_option *
+find_registered_option(const char *plugin, const char *option)
+{
+ struct registered_plugin_options *reg;
+ struct tep_plugin_option *op;
+ const char *op_plugin;
+
+ for (reg = registered_options; reg; reg = reg->next) {
+ for (op = reg->options; op->name; op++) {
+ if (op->plugin_alias)
+ op_plugin = op->plugin_alias;
+ else
+ op_plugin = op->file;
+
+ if (plugin && strcmp(plugin, op_plugin) != 0)
+ continue;
+ if (strcmp(option, op->name) != 0)
+ continue;
+
+ return op;
+ }
+ }
+
+ return NULL;
+}
+
+static int process_option(const char *plugin, const char *option, const char *val)
+{
+ struct tep_plugin_option *op;
+
+ op = find_registered_option(plugin, option);
+ if (!op)
+ return 0;
+
+ return update_option_value(op, val);
+}
+
+/**
+ * tep_plugin_add_option - add an option/val pair to set plugin options
+ * @name: The name of the option (format: <plugin>:<option> or just <option>)
+ * @val: (optional) the value for the option
+ *
+ * Modify a plugin option. If @val is given than the value of the option
+ * is set (note, some options just take a boolean, so @val must be either
+ * "1" or "0" or "true" or "false").
+ */
+int tep_plugin_add_option(const char *name, const char *val)
+{
+ struct trace_plugin_options *op;
+ char *option_str;
+ char *plugin;
+
+ option_str = strdup(name);
+ if (!option_str)
+ return -ENOMEM;
+
+ if (parse_option_name(&option_str, &plugin) < 0)
+ return -ENOMEM;
+
+ /* If the option exists, update the val */
+ for (op = trace_plugin_options; op; op = op->next) {
+ /* Both must be NULL or not NULL */
+ if ((!plugin || !op->plugin) && plugin != op->plugin)
+ continue;
+ if (plugin && strcmp(plugin, op->plugin) != 0)
+ continue;
+ if (strcmp(op->option, option_str) != 0)
+ continue;
+
+ /* update option */
+ free(op->value);
+ if (val) {
+ op->value = strdup(val);
+ if (!op->value)
+ goto out_free;
+ } else
+ op->value = NULL;
+
+ /* plugin and option_str don't get freed at the end */
+ free(plugin);
+ free(option_str);
+
+ plugin = op->plugin;
+ option_str = op->option;
+ break;
+ }
+
+ /* If not found, create */
+ if (!op) {
+ op = malloc(sizeof(*op));
+ if (!op)
+ goto out_free;
+ memset(op, 0, sizeof(*op));
+ op->plugin = plugin;
+ op->option = option_str;
+ if (val) {
+ op->value = strdup(val);
+ if (!op->value) {
+ free(op);
+ goto out_free;
+ }
+ }
+ op->next = trace_plugin_options;
+ trace_plugin_options = op;
+ }
+
+ return process_option(plugin, option_str, val);
+
+out_free:
+ free(plugin);
+ free(option_str);
+ return -ENOMEM;
+}
+
+static void print_op_data(struct trace_seq *s, const char *name,
+ const char *op)
+{
+ if (op)
+ trace_seq_printf(s, "%8s:\t%s\n", name, op);
+}
+
+/**
+ * tep_plugin_print_options - print out the registered plugin options
+ * @s: The trace_seq descriptor to write the plugin options into
+ *
+ * Writes a list of options into trace_seq @s.
+ */
+void tep_plugin_print_options(struct trace_seq *s)
+{
+ struct registered_plugin_options *reg;
+ struct tep_plugin_option *op;
+
+ for (reg = registered_options; reg; reg = reg->next) {
+ if (reg != registered_options)
+ trace_seq_printf(s, "============\n");
+ for (op = reg->options; op->name; op++) {
+ if (op != reg->options)
+ trace_seq_printf(s, "------------\n");
+ print_op_data(s, "file", op->file);
+ print_op_data(s, "plugin", op->plugin_alias);
+ print_op_data(s, "option", op->name);
+ print_op_data(s, "desc", op->description);
+ print_op_data(s, "value", op->value);
+ trace_seq_printf(s, "%8s:\t%d\n", "set", op->set);
+ }
+ }
+}
+
/**
* tep_print_plugins - print out the list of plugins loaded
* @s: the trace_seq descripter to write to
@@ -273,6 +444,7 @@ load_plugin(struct tep_handle *tep, const char *path,
const char *file, void *data)
{
struct tep_plugin_list **plugin_list = data;
+ struct tep_plugin_option *options;
tep_plugin_load_func func;
struct tep_plugin_list *list;
const char *alias;
@@ -297,6 +469,16 @@ load_plugin(struct tep_handle *tep, const char *path,
if (!alias)
alias = file;
+ options = dlsym(handle, TEP_PLUGIN_OPTIONS_NAME);
+ if (options) {
+ while (options->name) {
+ ret = update_option(alias, options);
+ if (ret < 0)
+ goto out_free;
+ options++;
+ }
+ }
+
func = dlsym(handle, TEP_PLUGIN_LOADER_NAME);
if (!func) {
warning("could not find func '%s' in plugin '%s'\n%s\n",
@@ -365,28 +547,53 @@ load_plugins_dir(struct tep_handle *tep, const char *suffix,
closedir(dir);
}
-static void
-load_plugins(struct tep_handle *tep, const char *suffix,
- void (*load_plugin)(struct tep_handle *tep,
- const char *path,
- const char *name,
- void *data),
- void *data)
+/**
+ * tep_load_plugins_hook - call a user specified callback to load a plugin
+ * @tep: handler to traceevent context
+ * @suffix: filter only plugin files with given suffix
+ * @load_plugin: user specified callback, called for each plugin file
+ * @data: custom context, passed to @load_plugin
+ *
+ * Searches for traceevent plugin files and calls @load_plugin for each
+ * The order of plugins search is:
+ * - Directories, specified in @tep->plugins_dir and priority TEP_PLUGIN_FIRST
+ * - Directory, specified at compile time with PLUGIN_TRACEEVENT_DIR
+ * - Directory, specified by environment variable TRACEEVENT_PLUGIN_DIR
+ * - In user's home: ~/.local/lib/traceevent/plugins/
+ * - Directories, specified in @tep->plugins_dir and priority TEP_PLUGIN_LAST
+ *
+ */
+void tep_load_plugins_hook(struct tep_handle *tep, const char *suffix,
+ void (*load_plugin)(struct tep_handle *tep,
+ const char *path,
+ const char *name,
+ void *data),
+ void *data)
{
+ struct tep_plugins_dir *dir = NULL;
char *home;
char *path;
char *envdir;
int ret;
- if (tep->flags & TEP_DISABLE_PLUGINS)
+ if (tep && tep->flags & TEP_DISABLE_PLUGINS)
return;
+ if (tep)
+ dir = tep->plugins_dir;
+ while (dir) {
+ if (dir->prio == TEP_PLUGIN_FIRST)
+ load_plugins_dir(tep, suffix, dir->path,
+ load_plugin, data);
+ dir = dir->next;
+ }
+
/*
* If a system plugin directory was defined,
* check that first.
*/
#ifdef PLUGIN_DIR
- if (!(tep->flags & TEP_DISABLE_SYS_PLUGINS))
+ if (!tep || !(tep->flags & TEP_DISABLE_SYS_PLUGINS))
load_plugins_dir(tep, suffix, PLUGIN_DIR,
load_plugin, data);
#endif
@@ -415,6 +622,15 @@ load_plugins(struct tep_handle *tep, const char *suffix,
load_plugins_dir(tep, suffix, path, load_plugin, data);
+ if (tep)
+ dir = tep->plugins_dir;
+ while (dir) {
+ if (dir->prio == TEP_PLUGIN_LAST)
+ load_plugins_dir(tep, suffix, dir->path,
+ load_plugin, data);
+ dir = dir->next;
+ }
+
free(path);
}
@@ -423,10 +639,59 @@ tep_load_plugins(struct tep_handle *tep)
{
struct tep_plugin_list *list = NULL;
- load_plugins(tep, ".so", load_plugin, &list);
+ tep_load_plugins_hook(tep, ".so", load_plugin, &list);
return list;
}
+/**
+ * tep_add_plugin_path - Add a new plugin directory.
+ * @tep: Trace event handler.
+ * @path: Path to a directory. All plugin files in that
+ * directory will be loaded.
+ *@prio: Load priority of the plugins in that directory.
+ *
+ * Returns -1 in case of an error, 0 otherwise.
+ */
+int tep_add_plugin_path(struct tep_handle *tep, char *path,
+ enum tep_plugin_load_priority prio)
+{
+ struct tep_plugins_dir *dir;
+
+ if (!tep || !path)
+ return -1;
+
+ dir = calloc(1, sizeof(*dir));
+ if (!dir)
+ return -1;
+
+ dir->path = strdup(path);
+ if (!dir->path) {
+ free(dir);
+ return -1;
+ }
+ dir->prio = prio;
+ dir->next = tep->plugins_dir;
+ tep->plugins_dir = dir;
+
+ return 0;
+}
+
+void tep_free_plugin_paths(struct tep_handle *tep)
+{
+ struct tep_plugins_dir *dir;
+
+ if (!tep)
+ return;
+
+ dir = tep->plugins_dir;
+ while (dir) {
+ tep->plugins_dir = tep->plugins_dir->next;
+ free(dir->path);
+ free(dir);
+ dir = tep->plugins_dir;
+ }
+}
+
void
tep_unload_plugins(struct tep_plugin_list *plugin_list, struct tep_handle *tep)
{
diff --git a/tools/lib/traceevent/kbuffer.h b/tools/lib/traceevent/kbuffer.h
index 5fa8292e341b..a2b522093cfd 100644
--- a/tools/lib/traceevent/kbuffer.h
+++ b/tools/lib/traceevent/kbuffer.h
@@ -1,22 +1,7 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
/*
* Copyright (C) 2012 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
*
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License (not later!)
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
#ifndef _KBUFFER_H
#define _KBUFFER_H
diff --git a/tools/lib/traceevent/plugins/Build b/tools/lib/traceevent/plugins/Build
index 210d26910613..dd4da823c38f 100644
--- a/tools/lib/traceevent/plugins/Build
+++ b/tools/lib/traceevent/plugins/Build
@@ -5,6 +5,8 @@ plugin_kvm-y += plugin_kvm.o
plugin_mac80211-y += plugin_mac80211.o
plugin_sched_switch-y += plugin_sched_switch.o
plugin_function-y += plugin_function.o
+plugin_futex-y += plugin_futex.o
plugin_xen-y += plugin_xen.o
plugin_scsi-y += plugin_scsi.o
plugin_cfg80211-y += plugin_cfg80211.o
+plugin_tlb-y += plugin_tlb.o \ No newline at end of file
diff --git a/tools/lib/traceevent/plugins/Makefile b/tools/lib/traceevent/plugins/Makefile
index 680d883efe05..47e802553250 100644
--- a/tools/lib/traceevent/plugins/Makefile
+++ b/tools/lib/traceevent/plugins/Makefile
@@ -134,9 +134,11 @@ PLUGINS += plugin_kvm.so
PLUGINS += plugin_mac80211.so
PLUGINS += plugin_sched_switch.so
PLUGINS += plugin_function.so
+PLUGINS += plugin_futex.so
PLUGINS += plugin_xen.so
PLUGINS += plugin_scsi.so
PLUGINS += plugin_cfg80211.so
+PLUGINS += plugin_tlb.so
PLUGINS := $(addprefix $(OUTPUT),$(PLUGINS))
PLUGINS_IN := $(PLUGINS:.so=-in.o)
diff --git a/tools/lib/traceevent/plugins/plugin_function.c b/tools/lib/traceevent/plugins/plugin_function.c
index 7770fcb78e0f..807b16e1bf0f 100644
--- a/tools/lib/traceevent/plugins/plugin_function.c
+++ b/tools/lib/traceevent/plugins/plugin_function.c
@@ -1,21 +1,6 @@
+// SPDX-License-Identifier: LGPL-2.1
/*
* Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License (not later!)
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, see <http://www.gnu.org/licenses>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
#include <stdio.h>
#include <stdlib.h>
@@ -50,12 +35,20 @@ struct tep_plugin_option plugin_options[] =
.set = 1,
},
{
+ .name = "offset",
+ .plugin_alias = "ftrace",
+ .description =
+ "Show function names as well as their offsets",
+ .set = 0,
+ },
+ {
.name = NULL,
}
};
static struct tep_plugin_option *ftrace_parent = &plugin_options[0];
static struct tep_plugin_option *ftrace_indent = &plugin_options[1];
+static struct tep_plugin_option *ftrace_offset = &plugin_options[2];
static void add_child(struct func_stack *stack, const char *child, int pos)
{
@@ -123,6 +116,18 @@ static int add_and_get_index(const char *parent, const char *child, int cpu)
return 0;
}
+static void show_function(struct trace_seq *s, struct tep_handle *tep,
+ const char *func, unsigned long long function)
+{
+ unsigned long long offset;
+
+ trace_seq_printf(s, "%s", func);
+ if (ftrace_offset->set) {
+ offset = tep_find_function_address(tep, function);
+ trace_seq_printf(s, "+0x%x ", (int)(function - offset));
+ }
+}
+
static int function_handler(struct trace_seq *s, struct tep_record *record,
struct tep_event *event, void *context)
{
@@ -149,14 +154,14 @@ static int function_handler(struct trace_seq *s, struct tep_record *record,
trace_seq_printf(s, "%*s", index*3, "");
if (func)
- trace_seq_printf(s, "%s", func);
+ show_function(s, tep, func, function);
else
trace_seq_printf(s, "0x%llx", function);
if (ftrace_parent->set) {
trace_seq_printf(s, " <-- ");
if (parent)
- trace_seq_printf(s, "%s", parent);
+ show_function(s, tep, parent, pfunction);
else
trace_seq_printf(s, "0x%llx", pfunction);
}
@@ -164,11 +169,93 @@ static int function_handler(struct trace_seq *s, struct tep_record *record,
return 0;
}
+static int
+trace_stack_handler(struct trace_seq *s, struct tep_record *record,
+ struct tep_event *event, void *context)
+{
+ struct tep_format_field *field;
+ unsigned long long addr;
+ const char *func;
+ int long_size;
+ void *data = record->data;
+
+ field = tep_find_any_field(event, "caller");
+ if (!field) {
+ trace_seq_printf(s, "<CANT FIND FIELD %s>", "caller");
+ return 0;
+ }
+
+ trace_seq_puts(s, "<stack trace >\n");
+
+ long_size = tep_get_long_size(event->tep);
+
+ for (data += field->offset; data < record->data + record->size;
+ data += long_size) {
+ addr = tep_read_number(event->tep, data, long_size);
+
+ if ((long_size == 8 && addr == (unsigned long long)-1) ||
+ ((int)addr == -1))
+ break;
+
+ func = tep_find_function(event->tep, addr);
+ if (func)
+ trace_seq_printf(s, "=> %s (%llx)\n", func, addr);
+ else
+ trace_seq_printf(s, "=> %llx\n", addr);
+ }
+
+ return 0;
+}
+
+static int
+trace_raw_data_handler(struct trace_seq *s, struct tep_record *record,
+ struct tep_event *event, void *context)
+{
+ struct tep_format_field *field;
+ unsigned long long id;
+ int long_size;
+ void *data = record->data;
+
+ if (tep_get_field_val(s, event, "id", record, &id, 1))
+ return trace_seq_putc(s, '!');
+
+ trace_seq_printf(s, "# %llx", id);
+
+ field = tep_find_any_field(event, "buf");
+ if (!field) {
+ trace_seq_printf(s, "<CANT FIND FIELD %s>", "buf");
+ return 0;
+ }
+
+ long_size = tep_get_long_size(event->tep);
+
+ for (data += field->offset; data < record->data + record->size;
+ data += long_size) {
+ int size = sizeof(long);
+ int left = (record->data + record->size) - data;
+ int i;
+
+ if (size > left)
+ size = left;
+
+ for (i = 0; i < size; i++)
+ trace_seq_printf(s, " %02x", *(unsigned char *)(data + i));
+ }
+
+ return 0;
+}
+
int TEP_PLUGIN_LOADER(struct tep_handle *tep)
{
tep_register_event_handler(tep, -1, "ftrace", "function",
function_handler, NULL);
+ tep_register_event_handler(tep, -1, "ftrace", "kernel_stack",
+ trace_stack_handler, NULL);
+
+ tep_register_event_handler(tep, -1, "ftrace", "raw_data",
+ trace_raw_data_handler, NULL);
+
tep_plugin_add_options("ftrace", plugin_options);
return 0;
diff --git a/tools/lib/traceevent/plugins/plugin_futex.c b/tools/lib/traceevent/plugins/plugin_futex.c
new file mode 100644
index 000000000000..eb7c9f8a850a
--- /dev/null
+++ b/tools/lib/traceevent/plugins/plugin_futex.c
@@ -0,0 +1,123 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2017 National Instruments Corp.
+ *
+ * Author: Julia Cartwright <julia@ni.com>
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <linux/futex.h>
+
+#include "event-parse.h"
+
+#define ARRAY_SIZE(_a) (sizeof(_a) / sizeof((_a)[0]))
+
+struct futex_args {
+ unsigned long long uaddr;
+ unsigned long long op;
+ unsigned long long val;
+ unsigned long long utime; /* or val2 */
+ unsigned long long uaddr2;
+ unsigned long long val3;
+};
+
+struct futex_op {
+ const char *name;
+ const char *fmt_val;
+ const char *fmt_utime;
+ const char *fmt_uaddr2;
+ const char *fmt_val3;
+};
+
+static const struct futex_op futex_op_tbl[] = {
+ { "FUTEX_WAIT", " val=0x%08llx", " utime=0x%08llx", NULL, NULL },
+ { "FUTEX_WAKE", " val=%llu", NULL, NULL, NULL },
+ { "FUTEX_FD", " val=%llu", NULL, NULL, NULL },
+ { "FUTEX_REQUEUE", " val=%llu", " val2=%llu", " uaddr2=0x%08llx", NULL },
+ { "FUTEX_CMP_REQUEUE", " val=%llu", " val2=%llu", " uaddr2=0x%08llx", " val3=0x%08llx" },
+ { "FUTEX_WAKE_OP", " val=%llu", " val2=%llu", " uaddr2=0x%08llx", " val3=0x%08llx" },
+ { "FUTEX_LOCK_PI", NULL, " utime=0x%08llx", NULL, NULL },
+ { "FUTEX_UNLOCK_PI", NULL, NULL, NULL, NULL },
+ { "FUTEX_TRYLOCK_PI", NULL, NULL, NULL, NULL },
+ { "FUTEX_WAIT_BITSET", " val=0x%08llx", " utime=0x%08llx", NULL, " val3=0x%08llx" },
+ { "FUTEX_WAKE_BITSET", " val=%llu", NULL, NULL, " val3=0x%08llx" },
+ { "FUTEX_WAIT_REQUEUE_PI", " val=0x%08llx", " utime=0x%08llx", " uaddr2=0x%08llx", " val3=0x%08llx" },
+ { "FUTEX_CMP_REQUEUE_PI", " val=%llu", " val2=%llu", " uaddr2=0x%08llx", " val3=0x%08llx" },
+};
+
+
+static void futex_print(struct trace_seq *s, const struct futex_args *args,
+ const struct futex_op *fop)
+{
+ trace_seq_printf(s, " uaddr=0x%08llx", args->uaddr);
+
+ if (fop->fmt_val)
+ trace_seq_printf(s, fop->fmt_val, args->val);
+
+ if (fop->fmt_utime)
+ trace_seq_printf(s,fop->fmt_utime, args->utime);
+
+ if (fop->fmt_uaddr2)
+ trace_seq_printf(s, fop->fmt_uaddr2, args->uaddr2);
+
+ if (fop->fmt_val3)
+ trace_seq_printf(s, fop->fmt_val3, args->val3);
+}
+
+static int futex_handler(struct trace_seq *s, struct tep_record *record,
+ struct tep_event *event, void *context)
+{
+ const struct futex_op *fop;
+ struct futex_args args;
+ unsigned long long cmd;
+
+ if (tep_get_field_val(s, event, "uaddr", record, &args.uaddr, 1))
+ return 1;
+
+ if (tep_get_field_val(s, event, "op", record, &args.op, 1))
+ return 1;
+
+ if (tep_get_field_val(s, event, "val", record, &args.val, 1))
+ return 1;
+
+ if (tep_get_field_val(s, event, "utime", record, &args.utime, 1))
+ return 1;
+
+ if (tep_get_field_val(s, event, "uaddr2", record, &args.uaddr2, 1))
+ return 1;
+
+ if (tep_get_field_val(s, event, "val3", record, &args.val3, 1))
+ return 1;
+
+ cmd = args.op & FUTEX_CMD_MASK;
+ if (cmd >= ARRAY_SIZE(futex_op_tbl))
+ return 1;
+
+ fop = &futex_op_tbl[cmd];
+
+ trace_seq_printf(s, "op=%s", fop->name);
+
+ if (args.op & FUTEX_PRIVATE_FLAG)
+ trace_seq_puts(s, "|FUTEX_PRIVATE_FLAG");
+
+ if (args.op & FUTEX_CLOCK_REALTIME)
+ trace_seq_puts(s, "|FUTEX_CLOCK_REALTIME");
+
+ futex_print(s, &args, fop);
+ return 0;
+}
+
+int TEP_PLUGIN_LOADER(struct tep_handle *tep)
+{
+ tep_register_event_handler(tep, -1, "syscalls", "sys_enter_futex",
+ futex_handler, NULL);
+ return 0;
+}
+
+void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
+{
+ tep_unregister_event_handler(tep, -1, "syscalls", "sys_enter_futex",
+ futex_handler, NULL);
+}
diff --git a/tools/lib/traceevent/plugins/plugin_hrtimer.c b/tools/lib/traceevent/plugins/plugin_hrtimer.c
index bb434e0ed03a..d98466788f14 100644
--- a/tools/lib/traceevent/plugins/plugin_hrtimer.c
+++ b/tools/lib/traceevent/plugins/plugin_hrtimer.c
@@ -1,22 +1,7 @@
+// SPDX-License-Identifier: LGPL-2.1
/*
* Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
* Copyright (C) 2009 Johannes Berg <johannes@sipsolutions.net>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License (not later!)
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, see <http://www.gnu.org/licenses>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
#include <stdio.h>
#include <stdlib.h>
diff --git a/tools/lib/traceevent/plugins/plugin_jbd2.c b/tools/lib/traceevent/plugins/plugin_jbd2.c
index 04fc125f38cb..69111a68d3cf 100644
--- a/tools/lib/traceevent/plugins/plugin_jbd2.c
+++ b/tools/lib/traceevent/plugins/plugin_jbd2.c
@@ -1,21 +1,6 @@
+// SPDX-License-Identifier: LGPL-2.1
/*
* Copyright (C) 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License (not later!)
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, see <http://www.gnu.org/licenses>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
#include <stdio.h>
#include <stdlib.h>
diff --git a/tools/lib/traceevent/plugins/plugin_kmem.c b/tools/lib/traceevent/plugins/plugin_kmem.c
index edaec5d962c3..4b4f7f9616e3 100644
--- a/tools/lib/traceevent/plugins/plugin_kmem.c
+++ b/tools/lib/traceevent/plugins/plugin_kmem.c
@@ -1,21 +1,6 @@
+// SPDX-License-Identifier: LGPL-2.1
/*
* Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License (not later!)
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, see <http://www.gnu.org/licenses>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
#include <stdio.h>
#include <stdlib.h>
diff --git a/tools/lib/traceevent/plugins/plugin_kvm.c b/tools/lib/traceevent/plugins/plugin_kvm.c
index c8e623065a7e..51ceeb9147eb 100644
--- a/tools/lib/traceevent/plugins/plugin_kvm.c
+++ b/tools/lib/traceevent/plugins/plugin_kvm.c
@@ -1,21 +1,6 @@
+// SPDX-License-Identifier: LGPL-2.1
/*
* Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License (not later!)
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, see <http://www.gnu.org/licenses>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
#include <stdio.h>
#include <stdlib.h>
@@ -155,7 +140,23 @@ static const char *disassemble(unsigned char *insn, int len, uint64_t rip,
_ER(EXIT_WRITE_DR5, 0x035) \
_ER(EXIT_WRITE_DR6, 0x036) \
_ER(EXIT_WRITE_DR7, 0x037) \
- _ER(EXIT_EXCP_BASE, 0x040) \
+ _ER(EXIT_EXCP_DE, 0x040) \
+ _ER(EXIT_EXCP_DB, 0x041) \
+ _ER(EXIT_EXCP_BP, 0x043) \
+ _ER(EXIT_EXCP_OF, 0x044) \
+ _ER(EXIT_EXCP_BR, 0x045) \
+ _ER(EXIT_EXCP_UD, 0x046) \
+ _ER(EXIT_EXCP_NM, 0x047) \
+ _ER(EXIT_EXCP_DF, 0x048) \
+ _ER(EXIT_EXCP_TS, 0x04a) \
+ _ER(EXIT_EXCP_NP, 0x04b) \
+ _ER(EXIT_EXCP_SS, 0x04c) \
+ _ER(EXIT_EXCP_GP, 0x04d) \
+ _ER(EXIT_EXCP_PF, 0x04e) \
+ _ER(EXIT_EXCP_MF, 0x050) \
+ _ER(EXIT_EXCP_AC, 0x051) \
+ _ER(EXIT_EXCP_MC, 0x052) \
+ _ER(EXIT_EXCP_XF, 0x053) \
_ER(EXIT_INTR, 0x060) \
_ER(EXIT_NMI, 0x061) \
_ER(EXIT_SMI, 0x062) \
@@ -201,7 +202,10 @@ static const char *disassemble(unsigned char *insn, int len, uint64_t rip,
_ER(EXIT_MONITOR, 0x08a) \
_ER(EXIT_MWAIT, 0x08b) \
_ER(EXIT_MWAIT_COND, 0x08c) \
- _ER(EXIT_NPF, 0x400) \
+ _ER(EXIT_XSETBV, 0x08d) \
+ _ER(EXIT_NPF, 0x400) \
+ _ER(EXIT_AVIC_INCOMPLETE_IPI, 0x401) \
+ _ER(EXIT_AVIC_UNACCELERATED_ACCESS, 0x402) \
_ER(EXIT_ERR, -1)
#define _ER(reason, val) { #reason, val },
@@ -241,7 +245,7 @@ static const char *find_exit_reason(unsigned isa, int val)
}
if (!strings)
return "UNKNOWN-ISA";
- for (i = 0; strings[i].val >= 0; i++)
+ for (i = 0; strings[i].str; i++)
if (strings[i].val == val)
break;
diff --git a/tools/lib/traceevent/plugins/plugin_mac80211.c b/tools/lib/traceevent/plugins/plugin_mac80211.c
index 884303c26b5c..f48071e3cfb8 100644
--- a/tools/lib/traceevent/plugins/plugin_mac80211.c
+++ b/tools/lib/traceevent/plugins/plugin_mac80211.c
@@ -1,21 +1,6 @@
+// SPDX-License-Identifier: LGPL-2.1
/*
* Copyright (C) 2009 Johannes Berg <johannes@sipsolutions.net>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License (not later!)
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, see <http://www.gnu.org/licenses>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
#include <stdio.h>
#include <stdlib.h>
diff --git a/tools/lib/traceevent/plugins/plugin_sched_switch.c b/tools/lib/traceevent/plugins/plugin_sched_switch.c
index 957389a0ff7a..e12fa103820a 100644
--- a/tools/lib/traceevent/plugins/plugin_sched_switch.c
+++ b/tools/lib/traceevent/plugins/plugin_sched_switch.c
@@ -1,21 +1,6 @@
+// SPDX-License-Identifier: LGPL-2.1
/*
* Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License (not later!)
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, see <http://www.gnu.org/licenses>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
#include <stdio.h>
#include <stdlib.h>
diff --git a/tools/lib/traceevent/plugins/plugin_tlb.c b/tools/lib/traceevent/plugins/plugin_tlb.c
new file mode 100644
index 000000000000..43657fb60504
--- /dev/null
+++ b/tools/lib/traceevent/plugins/plugin_tlb.c
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2015 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "event-parse.h"
+
+enum tlb_flush_reason {
+ TLB_FLUSH_ON_TASK_SWITCH,
+ TLB_REMOTE_SHOOTDOWN,
+ TLB_LOCAL_SHOOTDOWN,
+ TLB_LOCAL_MM_SHOOTDOWN,
+ NR_TLB_FLUSH_REASONS,
+};
+
+static int tlb_flush_handler(struct trace_seq *s, struct tep_record *record,
+ struct tep_event *event, void *context)
+{
+ unsigned long long val;
+
+ trace_seq_printf(s, "pages=");
+
+ tep_print_num_field(s, "%ld", event, "pages", record, 1);
+
+ if (tep_get_field_val(s, event, "reason", record, &val, 1) < 0)
+ return -1;
+
+ trace_seq_puts(s, " reason=");
+
+ switch (val) {
+ case TLB_FLUSH_ON_TASK_SWITCH:
+ trace_seq_puts(s, "flush on task switch");
+ break;
+ case TLB_REMOTE_SHOOTDOWN:
+ trace_seq_puts(s, "remote shootdown");
+ break;
+ case TLB_LOCAL_SHOOTDOWN:
+ trace_seq_puts(s, "local shootdown");
+ break;
+ case TLB_LOCAL_MM_SHOOTDOWN:
+ trace_seq_puts(s, "local mm shootdown");
+ break;
+ }
+
+ trace_seq_printf(s, " (%lld)", val);
+
+ return 0;
+}
+
+int TEP_PLUGIN_LOADER(struct tep_handle *tep)
+{
+ tep_register_event_handler(tep, -1, "tlb", "tlb_flush",
+ tlb_flush_handler, NULL);
+
+ return 0;
+}
+
+void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
+{
+ tep_unregister_event_handler(tep, -1,
+ "tlb", "tlb_flush",
+ tlb_flush_handler, NULL);
+}
diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt
index e817179c5027..d3740c8f399b 100644
--- a/tools/perf/Documentation/itrace.txt
+++ b/tools/perf/Documentation/itrace.txt
@@ -18,6 +18,7 @@
l synthesize last branch entries (use with i or x)
L synthesize last branch entries on existing event records
s skip initial number of events
+ q quicker (less detailed) decoding
The default is all events i.e. the same as --itrace=ibxwpe,
except for perf script where it is --itrace=ce
@@ -47,3 +48,16 @@
--itrace=i0nss1000000
skips the first million instructions.
+
+ The 'e' option may be followed by flags which affect what errors will or
+ will not be reported. Each flag must be preceded by either '+' or '-'.
+ The flags are:
+ o overflow
+ l trace data lost
+
+ If supported, the 'd' option may be followed by flags which affect what
+ debug messages will or will not be logged. Each flag must be preceded
+ by either '+' or '-'. The flags are:
+ a all perf events
+
+ If supported, the 'q' option may be repeated to increase the effect.
diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt
index bad16512c48d..a0529c7fa5ef 100644
--- a/tools/perf/Documentation/perf-bench.txt
+++ b/tools/perf/Documentation/perf-bench.txt
@@ -49,6 +49,9 @@ SUBSYSTEM
'sched'::
Scheduler and IPC mechanisms.
+'syscall'::
+ System call performance (throughput).
+
'mem'::
Memory access performance.
@@ -137,6 +140,14 @@ Example of *pipe*
59004 ops/sec
---------------------
+SUITES FOR 'syscall'
+~~~~~~~~~~~~~~~~~~
+*basic*::
+Suite for evaluating performance of core system call throughput (both usecs/op and ops/sec metrics).
+This uses a single thread simply doing getppid(2), which is a simple syscall where the result is not
+cached by glibc.
+
+
SUITES FOR 'mem'
~~~~~~~~~~~~~~~~
*memcpy*::
diff --git a/tools/perf/Documentation/perf-data.txt b/tools/perf/Documentation/perf-data.txt
index c87180764829..726b9bc9e1a7 100644
--- a/tools/perf/Documentation/perf-data.txt
+++ b/tools/perf/Documentation/perf-data.txt
@@ -27,6 +27,9 @@ OPTIONS for 'convert'
--to-ctf::
Triggers the CTF conversion, specify the path of CTF data directory.
+--tod::
+ Convert time to wall clock time.
+
-i::
Specify input perf data file path.
diff --git a/tools/perf/Documentation/perf-intel-pt.txt b/tools/perf/Documentation/perf-intel-pt.txt
index f4cd49a7fcdb..d5a266d7f15b 100644
--- a/tools/perf/Documentation/perf-intel-pt.txt
+++ b/tools/perf/Documentation/perf-intel-pt.txt
@@ -825,6 +825,7 @@ The letters are:
l synthesize last branch entries (use with i or x)
L synthesize last branch entries on existing event records
s skip initial number of events
+ q quicker (less detailed) decoding
"Instructions" events look like they were recorded by "perf record -e
instructions".
@@ -871,11 +872,24 @@ Developer Manuals.
Error events show where the decoder lost the trace. Error events
are quite important. Users must know if what they are seeing is a complete
-picture or not.
+picture or not. The "e" option may be followed by flags which affect what errors
+will or will not be reported. Each flag must be preceded by either '+' or '-'.
+The flags supported by Intel PT are:
+ -o Suppress overflow errors
+ -l Suppress trace data lost errors
+For example, for errors but not overflow or data lost errors:
+
+ --itrace=e-o-l
The "d" option will cause the creation of a file "intel_pt.log" containing all
decoded packets and instructions. Note that this option slows down the decoder
-and that the resulting file may be very large.
+and that the resulting file may be very large. The "d" option may be followed
+by flags which affect what debug messages will or will not be logged. Each flag
+must be preceded by either '+' or '-'. The flags support by Intel PT are:
+ -a Suppress logging of perf events
+ +a Log all perf events
+By default, logged perf events are filtered by any specified time ranges, but
+flag +a overrides that.
In addition, the period of the "instructions" event can be specified. e.g.
@@ -956,6 +970,51 @@ at the beginning. This is useful to ignore initialization code.
skips the first million instructions.
+The q option changes the way the trace is decoded. The decoding is much faster
+but much less detailed. Specifically, with the q option, the decoder does not
+decode TNT packets, and does not walk object code, but gets the ip from FUP and
+TIP packets. The q option can be used with the b and i options but the period
+is not used. The q option decodes more quickly, but is useful only if the
+control flow of interest is represented or indicated by FUP, TIP, TIP.PGE, or
+TIP.PGD packets (refer below). However the q option could be used to find time
+ranges that could then be decoded fully using the --time option.
+
+What will *not* be decoded with the (single) q option:
+
+ - direct calls and jmps
+ - conditional branches
+ - non-branch instructions
+
+What *will* be decoded with the (single) q option:
+
+ - asynchronous branches such as interrupts
+ - indirect branches
+ - function return target address *if* the noretcomp config term (refer
+ config terms section) was used
+ - start of (control-flow) tracing
+ - end of (control-flow) tracing, if it is not out of context
+ - power events, ptwrite, transaction start and abort
+ - instruction pointer associated with PSB packets
+
+Note the q option does not specify what events will be synthesized e.g. the p
+option must be used also to show power events.
+
+Repeating the q option (double-q i.e. qq) results in even faster decoding and even
+less detail. The decoder decodes only extended PSB (PSB+) packets, getting the
+instruction pointer if there is a FUP packet within PSB+ (i.e. between PSB and
+PSBEND). Note PSB packets occur regularly in the trace based on the psb_period
+config term (refer config terms section). There will be a FUP packet if the
+PSB+ occurs while control flow is being traced.
+
+What will *not* be decoded with the qq option:
+
+ - everything except instruction pointer associated with PSB packets
+
+What *will* be decoded with the qq option:
+
+ - instruction pointer associated with PSB packets
+
+
dump option
~~~~~~~~~~~
diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
index 376a50b3452d..10ed539a8859 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -119,6 +119,7 @@ It's also possible to use pmu syntax:
perf record -e r1a8 -a sleep 1
perf record -e cpu/r1a8/ ...
+ perf record -e cpu/r0x1a8/ ...
You should refer to the processor specific documentation for getting these
details. Some of them are referenced in the SEE ALSO section below.
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index fa8a5fcd27ab..3f72d8e261f3 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -407,8 +407,9 @@ if combined with -a or -C options.
-D::
--delay=::
-After starting the program, wait msecs before measuring. This is useful to
-filter out the startup phase of the program, which is often very different.
+After starting the program, wait msecs before measuring (-1: start with events
+disabled). This is useful to filter out the startup phase of the program, which
+is often very different.
-I::
--intr-regs::
@@ -626,6 +627,45 @@ option. The -e option and this one can be mixed and matched. Events
can be grouped using the {} notation.
endif::HAVE_LIBPFM[]
+--control fd:ctl-fd[,ack-fd]
+Listen on ctl-fd descriptor for command to control measurement ('enable': enable events,
+'disable': disable events). Measurements can be started with events disabled using
+--delay=-1 option. Optionally send control command completion ('ack\n') to ack-fd descriptor
+to synchronize with the controlling process. Example of bash shell script to enable and
+disable events during measurements:
+
+#!/bin/bash
+
+ctl_dir=/tmp/
+
+ctl_fifo=${ctl_dir}perf_ctl.fifo
+test -p ${ctl_fifo} && unlink ${ctl_fifo}
+mkfifo ${ctl_fifo}
+exec {ctl_fd}<>${ctl_fifo}
+
+ctl_ack_fifo=${ctl_dir}perf_ctl_ack.fifo
+test -p ${ctl_ack_fifo} && unlink ${ctl_ack_fifo}
+mkfifo ${ctl_ack_fifo}
+exec {ctl_fd_ack}<>${ctl_ack_fifo}
+
+perf record -D -1 -e cpu-cycles -a \
+ --control fd:${ctl_fd},${ctl_fd_ack} \
+ -- sleep 30 &
+perf_pid=$!
+
+sleep 5 && echo 'enable' >&${ctl_fd} && read -u ${ctl_fd_ack} e1 && echo "enabled(${e1})"
+sleep 10 && echo 'disable' >&${ctl_fd} && read -u ${ctl_fd_ack} d1 && echo "disabled(${d1})"
+
+exec {ctl_fd_ack}>&-
+unlink ${ctl_ack_fifo}
+
+exec {ctl_fd}>&-
+unlink ${ctl_fifo}
+
+wait -n ${perf_pid}
+exit $?
+
+
SEE ALSO
--------
linkperf:perf-stat[1], linkperf:perf-list[1], linkperf:perf-intel-pt[1]
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 372dfd110e6d..4f712fb8f175 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -322,6 +322,10 @@ OPTIONS
--show-cgroup-events
Display cgroup events i.e. events of type PERF_RECORD_CGROUP.
+--show-text-poke-events
+ Display text poke events i.e. events of type PERF_RECORD_TEXT_POKE and
+ PERF_RECORD_KSYMBOL.
+
--demangle::
Demangle symbol names to human readable form. It's enabled by default,
disable with --no-demangle.
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index b029ee728a0b..c9bfefc051fb 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -176,6 +176,45 @@ with it. --append may be used here. Examples:
3>results perf stat --log-fd 3 -- $cmd
3>>results perf stat --log-fd 3 --append -- $cmd
+--control fd:ctl-fd[,ack-fd]
+Listen on ctl-fd descriptor for command to control measurement ('enable': enable events,
+'disable': disable events). Measurements can be started with events disabled using
+--delay=-1 option. Optionally send control command completion ('ack\n') to ack-fd descriptor
+to synchronize with the controlling process. Example of bash shell script to enable and
+disable events during measurements:
+
+#!/bin/bash
+
+ctl_dir=/tmp/
+
+ctl_fifo=${ctl_dir}perf_ctl.fifo
+test -p ${ctl_fifo} && unlink ${ctl_fifo}
+mkfifo ${ctl_fifo}
+exec {ctl_fd}<>${ctl_fifo}
+
+ctl_ack_fifo=${ctl_dir}perf_ctl_ack.fifo
+test -p ${ctl_ack_fifo} && unlink ${ctl_ack_fifo}
+mkfifo ${ctl_ack_fifo}
+exec {ctl_fd_ack}<>${ctl_ack_fifo}
+
+perf stat -D -1 -e cpu-cycles -a -I 1000 \
+ --control fd:${ctl_fd},${ctl_fd_ack} \
+ -- sleep 30 &
+perf_pid=$!
+
+sleep 5 && echo 'enable' >&${ctl_fd} && read -u ${ctl_fd_ack} e1 && echo "enabled(${e1})"
+sleep 10 && echo 'disable' >&${ctl_fd} && read -u ${ctl_fd_ack} d1 && echo "disabled(${d1})"
+
+exec {ctl_fd_ack}>&-
+unlink ${ctl_ack_fifo}
+
+exec {ctl_fd}>&-
+unlink ${ctl_fifo}
+
+wait -n ${perf_pid}
+exit $?
+
+
--pre::
--post::
Pre and post measurement hooks, e.g.:
@@ -238,8 +277,9 @@ mode, use --per-node in addition to -a. (system-wide).
-D msecs::
--delay msecs::
-After starting the program, wait msecs before measuring. This is useful to
-filter out the startup phase of the program, which is often very different.
+After starting the program, wait msecs before measuring (-1: start with events
+disabled). This is useful to filter out the startup phase of the program,
+which is often very different.
-T::
--transaction::
diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt
index b6472e463284..9ee96640744e 100644
--- a/tools/perf/Documentation/perf.data-file-format.txt
+++ b/tools/perf/Documentation/perf.data-file-format.txt
@@ -389,6 +389,19 @@ struct {
Example:
cpu pmu capabilities: branches=32, max_precise=3, pmu_name=icelake
+ HEADER_CLOCK_DATA = 29,
+
+ Contains clock id and its reference time together with wall clock
+ time taken at the 'same time', both values are in nanoseconds.
+ The format of data is as below.
+
+struct {
+ u32 version; /* version = 1 */
+ u32 clockid;
+ u64 wall_clock_ns;
+ u64 clockid_time_ns;
+};
+
other bits are reserved and should ignored for now
HEADER_FEAT_BITS = 256,
diff --git a/tools/perf/arch/arm/util/auxtrace.c b/tools/perf/arch/arm/util/auxtrace.c
index 28a5d0c18b1d..b187bddbd01a 100644
--- a/tools/perf/arch/arm/util/auxtrace.c
+++ b/tools/perf/arch/arm/util/auxtrace.c
@@ -57,17 +57,15 @@ struct auxtrace_record
struct evsel *evsel;
bool found_etm = false;
struct perf_pmu *found_spe = NULL;
- static struct perf_pmu **arm_spe_pmus = NULL;
- static int nr_spes = 0;
+ struct perf_pmu **arm_spe_pmus = NULL;
+ int nr_spes = 0;
int i = 0;
if (!evlist)
return NULL;
cs_etm_pmu = perf_pmu__find(CORESIGHT_ETM_PMU_NAME);
-
- if (!arm_spe_pmus)
- arm_spe_pmus = find_all_arm_spe_pmus(&nr_spes, err);
+ arm_spe_pmus = find_all_arm_spe_pmus(&nr_spes, err);
evlist__for_each_entry(evlist, evsel) {
if (cs_etm_pmu &&
@@ -84,6 +82,7 @@ struct auxtrace_record
}
}
}
+ free(arm_spe_pmus);
if (found_etm && found_spe) {
pr_err("Concurrent ARM Coresight ETM and SPE operation not currently supported\n");
diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
index cea5e33d61d2..cad7bf783413 100644
--- a/tools/perf/arch/arm/util/cs-etm.c
+++ b/tools/perf/arch/arm/util/cs-etm.c
@@ -243,10 +243,10 @@ static int cs_etm_set_sink_attr(struct perf_pmu *pmu,
}
/*
- * No sink was provided on the command line - for _now_ treat
- * this as an error.
+ * No sink was provided on the command line - allow the CoreSight
+ * system to look for a default
*/
- return ret;
+ return 0;
}
static int cs_etm_recording_options(struct auxtrace_record *itr,
diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
index 35b61bfc1b1a..b190f2eb2611 100644
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -427,8 +427,8 @@
336 common recv sys_recv compat_sys_recv
337 common recvfrom sys_recvfrom compat_sys_recvfrom
338 common shutdown sys_shutdown
-339 common setsockopt sys_setsockopt compat_sys_setsockopt
-340 common getsockopt sys_getsockopt compat_sys_getsockopt
+339 common setsockopt sys_setsockopt sys_setsockopt
+340 common getsockopt sys_getsockopt sys_getsockopt
341 common sendmsg sys_sendmsg compat_sys_sendmsg
342 common recvmsg sys_recvmsg compat_sys_recvmsg
343 32 recvmmsg sys_recvmmsg_time32 compat_sys_recvmmsg_time32
diff --git a/tools/perf/arch/powerpc/include/perf_regs.h b/tools/perf/arch/powerpc/include/perf_regs.h
index e18a3556f5e3..63f3ac91049f 100644
--- a/tools/perf/arch/powerpc/include/perf_regs.h
+++ b/tools/perf/arch/powerpc/include/perf_regs.h
@@ -64,7 +64,13 @@ static const char *reg_names[] = {
[PERF_REG_POWERPC_DAR] = "dar",
[PERF_REG_POWERPC_DSISR] = "dsisr",
[PERF_REG_POWERPC_SIER] = "sier",
- [PERF_REG_POWERPC_MMCRA] = "mmcra"
+ [PERF_REG_POWERPC_MMCRA] = "mmcra",
+ [PERF_REG_POWERPC_MMCR0] = "mmcr0",
+ [PERF_REG_POWERPC_MMCR1] = "mmcr1",
+ [PERF_REG_POWERPC_MMCR2] = "mmcr2",
+ [PERF_REG_POWERPC_MMCR3] = "mmcr3",
+ [PERF_REG_POWERPC_SIER2] = "sier2",
+ [PERF_REG_POWERPC_SIER3] = "sier3",
};
static inline const char *perf_reg_name(int id)
diff --git a/tools/perf/arch/powerpc/util/book3s_hcalls.h b/tools/perf/arch/powerpc/util/book3s_hcalls.h
index 54cfa0530e86..488f4339b83c 100644
--- a/tools/perf/arch/powerpc/util/book3s_hcalls.h
+++ b/tools/perf/arch/powerpc/util/book3s_hcalls.h
@@ -84,7 +84,7 @@
{0x1a4, "H_CREATE_RPT"}, \
{0x1a8, "H_REMOVE_RPT"}, \
{0x1ac, "H_REGISTER_RPAGES"}, \
- {0x1b0, "H_DISABLE_AND_GETC"}, \
+ {0x1b0, "H_DISABLE_AND_GET"}, \
{0x1b4, "H_ERROR_DATA"}, \
{0x1b8, "H_GET_HCA_INFO"}, \
{0x1bc, "H_GET_PERF_COUNT"}, \
diff --git a/tools/perf/arch/powerpc/util/header.c b/tools/perf/arch/powerpc/util/header.c
index d4870074f14c..1a950171a66f 100644
--- a/tools/perf/arch/powerpc/util/header.c
+++ b/tools/perf/arch/powerpc/util/header.c
@@ -7,17 +7,10 @@
#include <string.h>
#include <linux/stringify.h>
#include "header.h"
+#include "utils_header.h"
#include "metricgroup.h"
#include <api/fs/fs.h>
-#define mfspr(rn) ({unsigned long rval; \
- asm volatile("mfspr %0," __stringify(rn) \
- : "=r" (rval)); rval; })
-
-#define SPRN_PVR 0x11F /* Processor Version Register */
-#define PVR_VER(pvr) (((pvr) >> 16) & 0xFFFF) /* Version field */
-#define PVR_REV(pvr) (((pvr) >> 0) & 0xFFFF) /* Revison field */
-
int
get_cpuid(char *buffer, size_t sz)
{
diff --git a/tools/perf/arch/powerpc/util/perf_regs.c b/tools/perf/arch/powerpc/util/perf_regs.c
index 0a5242900248..2b6d4704e3aa 100644
--- a/tools/perf/arch/powerpc/util/perf_regs.c
+++ b/tools/perf/arch/powerpc/util/perf_regs.c
@@ -6,9 +6,16 @@
#include "../../../util/perf_regs.h"
#include "../../../util/debug.h"
+#include "../../../util/event.h"
+#include "../../../util/header.h"
+#include "../../../perf-sys.h"
+#include "utils_header.h"
#include <linux/kernel.h>
+#define PVR_POWER9 0x004E
+#define PVR_POWER10 0x0080
+
const struct sample_reg sample_reg_masks[] = {
SMPL_REG(r0, PERF_REG_POWERPC_R0),
SMPL_REG(r1, PERF_REG_POWERPC_R1),
@@ -55,6 +62,12 @@ const struct sample_reg sample_reg_masks[] = {
SMPL_REG(dsisr, PERF_REG_POWERPC_DSISR),
SMPL_REG(sier, PERF_REG_POWERPC_SIER),
SMPL_REG(mmcra, PERF_REG_POWERPC_MMCRA),
+ SMPL_REG(mmcr0, PERF_REG_POWERPC_MMCR0),
+ SMPL_REG(mmcr1, PERF_REG_POWERPC_MMCR1),
+ SMPL_REG(mmcr2, PERF_REG_POWERPC_MMCR2),
+ SMPL_REG(mmcr3, PERF_REG_POWERPC_MMCR3),
+ SMPL_REG(sier2, PERF_REG_POWERPC_SIER2),
+ SMPL_REG(sier3, PERF_REG_POWERPC_SIER3),
SMPL_REG_END
};
@@ -163,3 +176,45 @@ int arch_sdt_arg_parse_op(char *old_op, char **new_op)
return SDT_ARG_VALID;
}
+
+uint64_t arch__intr_reg_mask(void)
+{
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_CPU_CYCLES,
+ .sample_type = PERF_SAMPLE_REGS_INTR,
+ .precise_ip = 1,
+ .disabled = 1,
+ .exclude_kernel = 1,
+ };
+ int fd;
+ u32 version;
+ u64 extended_mask = 0, mask = PERF_REGS_MASK;
+
+ /*
+ * Get the PVR value to set the extended
+ * mask specific to platform.
+ */
+ version = (((mfspr(SPRN_PVR)) >> 16) & 0xFFFF);
+ if (version == PVR_POWER9)
+ extended_mask = PERF_REG_PMU_MASK_300;
+ else if (version == PVR_POWER10)
+ extended_mask = PERF_REG_PMU_MASK_31;
+ else
+ return mask;
+
+ attr.sample_regs_intr = extended_mask;
+ attr.sample_period = 1;
+ event_attr_init(&attr);
+
+ /*
+ * check if the pmu supports perf extended regs, before
+ * returning the register mask to sample.
+ */
+ fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
+ if (fd != -1) {
+ close(fd);
+ mask |= extended_mask;
+ }
+ return mask;
+}
diff --git a/tools/perf/arch/powerpc/util/utils_header.h b/tools/perf/arch/powerpc/util/utils_header.h
new file mode 100644
index 000000000000..5788eb1f1fe3
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/utils_header.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_UTIL_HEADER_H
+#define __PERF_UTIL_HEADER_H
+
+#include <linux/stringify.h>
+
+#define mfspr(rn) ({unsigned long rval; \
+ asm volatile("mfspr %0," __stringify(rn) \
+ : "=r" (rval)); rval; })
+
+#define SPRN_PVR 0x11F /* Processor Version Register */
+#define PVR_VER(pvr) (((pvr) >> 16) & 0xFFFF) /* Version field */
+#define PVR_REV(pvr) (((pvr) >> 0) & 0xFFFF) /* Revison field */
+
+#endif /* __PERF_UTIL_HEADER_H */
diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
index b38d48464368..56ae24b6e4be 100644
--- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -372,8 +372,8 @@
362 common connect sys_connect compat_sys_connect
363 common listen sys_listen sys_listen
364 common accept4 sys_accept4 compat_sys_accept4
-365 common getsockopt sys_getsockopt compat_sys_getsockopt
-366 common setsockopt sys_setsockopt compat_sys_setsockopt
+365 common getsockopt sys_getsockopt sys_getsockopt
+366 common setsockopt sys_setsockopt sys_setsockopt
367 common getsockname sys_getsockname compat_sys_getsockname
368 common getpeername sys_getpeername compat_sys_getpeername
369 common sendto sys_sendto compat_sys_sendto
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index 78847b32e137..9d82078c949a 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -357,6 +357,7 @@
433 common fspick sys_fspick
434 common pidfd_open sys_pidfd_open
435 common clone3 sys_clone3
+436 common close_range sys_close_range
437 common openat2 sys_openat2
438 common pidfd_getfd sys_pidfd_getfd
439 common faccessat2 sys_faccessat2
@@ -396,8 +397,8 @@
538 x32 sendmmsg compat_sys_sendmmsg
539 x32 process_vm_readv compat_sys_process_vm_readv
540 x32 process_vm_writev compat_sys_process_vm_writev
-541 x32 setsockopt compat_sys_setsockopt
-542 x32 getsockopt compat_sys_getsockopt
+541 x32 setsockopt sys_setsockopt
+542 x32 getsockopt sys_getsockopt
543 x32 io_setup compat_sys_io_setup
544 x32 io_submit compat_sys_io_submit
545 x32 execveat compat_sys_execveat
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index 6ce451293634..082e5f2a415a 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -837,6 +837,10 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
}
}
+ if (have_timing_info && !intel_pt_evsel->core.attr.exclude_kernel &&
+ perf_can_record_text_poke_events() && perf_can_record_cpu_wide())
+ opts->text_poke = true;
+
if (intel_pt_evsel) {
/*
* To obtain the auxtrace buffer file descriptor, the auxtrace
diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build
index 768e408757a0..dd68a40a790c 100644
--- a/tools/perf/bench/Build
+++ b/tools/perf/bench/Build
@@ -1,5 +1,6 @@
perf-y += sched-messaging.o
perf-y += sched-pipe.o
+perf-y += syscall.o
perf-y += mem-functions.o
perf-y += futex-hash.o
perf-y += futex-wake.o
@@ -10,6 +11,7 @@ perf-y += epoll-wait.o
perf-y += epoll-ctl.o
perf-y += synthesize.o
perf-y += kallsyms-parse.o
+perf-y += find-bit-bench.o
perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-lib.o
perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index 61cae4966cae..2804812d4154 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -33,8 +33,10 @@ extern struct timeval bench__start, bench__end, bench__runtime;
int bench_numa(int argc, const char **argv);
int bench_sched_messaging(int argc, const char **argv);
int bench_sched_pipe(int argc, const char **argv);
+int bench_syscall_basic(int argc, const char **argv);
int bench_mem_memcpy(int argc, const char **argv);
int bench_mem_memset(int argc, const char **argv);
+int bench_mem_find_bit(int argc, const char **argv);
int bench_futex_hash(int argc, const char **argv);
int bench_futex_wake(int argc, const char **argv);
int bench_futex_wake_parallel(int argc, const char **argv);
diff --git a/tools/perf/bench/find-bit-bench.c b/tools/perf/bench/find-bit-bench.c
new file mode 100644
index 000000000000..fa90f3e9d368
--- /dev/null
+++ b/tools/perf/bench/find-bit-bench.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Benchmark find_next_bit and related bit operations.
+ *
+ * Copyright 2020 Google LLC.
+ */
+#include <stdlib.h>
+#include "bench.h"
+#include "../util/stat.h"
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+#include <linux/time64.h>
+#include <subcmd/parse-options.h>
+
+static unsigned int outer_iterations = 5;
+static unsigned int inner_iterations = 100000;
+
+static const struct option options[] = {
+ OPT_UINTEGER('i', "outer-iterations", &outer_iterations,
+ "Number of outerer iterations used"),
+ OPT_UINTEGER('j', "inner-iterations", &inner_iterations,
+ "Number of outerer iterations used"),
+ OPT_END()
+};
+
+static const char *const bench_usage[] = {
+ "perf bench mem find_bit <options>",
+ NULL
+};
+
+static unsigned int accumulator;
+static unsigned int use_of_val;
+
+static noinline void workload(int val)
+{
+ use_of_val += val;
+ accumulator++;
+}
+
+#if (defined(__i386__) || defined(__x86_64__)) && defined(__GCC_ASM_FLAG_OUTPUTS__)
+static bool asm_test_bit(long nr, const unsigned long *addr)
+{
+ bool oldbit;
+
+ asm volatile("bt %2,%1"
+ : "=@ccc" (oldbit)
+ : "m" (*(unsigned long *)addr), "Ir" (nr) : "memory");
+
+ return oldbit;
+}
+#else
+#define asm_test_bit test_bit
+#endif
+
+static int do_for_each_set_bit(unsigned int num_bits)
+{
+ unsigned long *to_test = bitmap_alloc(num_bits);
+ struct timeval start, end, diff;
+ u64 runtime_us;
+ struct stats fb_time_stats, tb_time_stats;
+ double time_average, time_stddev;
+ unsigned int bit, i, j;
+ unsigned int set_bits, skip;
+ unsigned int old;
+
+ init_stats(&fb_time_stats);
+ init_stats(&tb_time_stats);
+
+ for (set_bits = 1; set_bits <= num_bits; set_bits <<= 1) {
+ bitmap_zero(to_test, num_bits);
+ skip = num_bits / set_bits;
+ for (i = 0; i < num_bits; i += skip)
+ set_bit(i, to_test);
+
+ for (i = 0; i < outer_iterations; i++) {
+ old = accumulator;
+ gettimeofday(&start, NULL);
+ for (j = 0; j < inner_iterations; j++) {
+ for_each_set_bit(bit, to_test, num_bits)
+ workload(bit);
+ }
+ gettimeofday(&end, NULL);
+ assert(old + (inner_iterations * set_bits) == accumulator);
+ timersub(&end, &start, &diff);
+ runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
+ update_stats(&fb_time_stats, runtime_us);
+
+ old = accumulator;
+ gettimeofday(&start, NULL);
+ for (j = 0; j < inner_iterations; j++) {
+ for (bit = 0; bit < num_bits; bit++) {
+ if (asm_test_bit(bit, to_test))
+ workload(bit);
+ }
+ }
+ gettimeofday(&end, NULL);
+ assert(old + (inner_iterations * set_bits) == accumulator);
+ timersub(&end, &start, &diff);
+ runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
+ update_stats(&tb_time_stats, runtime_us);
+ }
+
+ printf("%d operations %d bits set of %d bits\n",
+ inner_iterations, set_bits, num_bits);
+ time_average = avg_stats(&fb_time_stats);
+ time_stddev = stddev_stats(&fb_time_stats);
+ printf(" Average for_each_set_bit took: %.3f usec (+- %.3f usec)\n",
+ time_average, time_stddev);
+ time_average = avg_stats(&tb_time_stats);
+ time_stddev = stddev_stats(&tb_time_stats);
+ printf(" Average test_bit loop took: %.3f usec (+- %.3f usec)\n",
+ time_average, time_stddev);
+
+ if (use_of_val == accumulator) /* Try to avoid compiler tricks. */
+ printf("\n");
+ }
+ bitmap_free(to_test);
+ return 0;
+}
+
+int bench_mem_find_bit(int argc, const char **argv)
+{
+ int err = 0, i;
+
+ argc = parse_options(argc, argv, options, bench_usage, 0);
+ if (argc) {
+ usage_with_options(bench_usage, options);
+ exit(EXIT_FAILURE);
+ }
+
+ for (i = 1; i <= 2048; i <<= 1)
+ do_for_each_set_bit(i);
+
+ return err;
+}
diff --git a/tools/perf/bench/syscall.c b/tools/perf/bench/syscall.c
new file mode 100644
index 000000000000..5fe621cff8e9
--- /dev/null
+++ b/tools/perf/bench/syscall.c
@@ -0,0 +1,81 @@
+/*
+ *
+ * syscall.c
+ *
+ * syscall: Benchmark for system call performance
+ */
+#include "../perf.h"
+#include "../util/util.h"
+#include <subcmd/parse-options.h>
+#include "../builtin.h"
+#include "bench.h"
+
+#include <stdio.h>
+#include <sys/time.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <stdlib.h>
+
+#define LOOPS_DEFAULT 10000000
+static int loops = LOOPS_DEFAULT;
+
+static const struct option options[] = {
+ OPT_INTEGER('l', "loop", &loops, "Specify number of loops"),
+ OPT_END()
+};
+
+static const char * const bench_syscall_usage[] = {
+ "perf bench syscall <options>",
+ NULL
+};
+
+int bench_syscall_basic(int argc, const char **argv)
+{
+ struct timeval start, stop, diff;
+ unsigned long long result_usec = 0;
+ int i;
+
+ argc = parse_options(argc, argv, options, bench_syscall_usage, 0);
+
+ gettimeofday(&start, NULL);
+
+ for (i = 0; i < loops; i++)
+ getppid();
+
+ gettimeofday(&stop, NULL);
+ timersub(&stop, &start, &diff);
+
+ switch (bench_format) {
+ case BENCH_FORMAT_DEFAULT:
+ printf("# Executed %'d getppid() calls\n", loops);
+
+ result_usec = diff.tv_sec * 1000000;
+ result_usec += diff.tv_usec;
+
+ printf(" %14s: %lu.%03lu [sec]\n\n", "Total time",
+ diff.tv_sec,
+ (unsigned long) (diff.tv_usec/1000));
+
+ printf(" %14lf usecs/op\n",
+ (double)result_usec / (double)loops);
+ printf(" %'14d ops/sec\n",
+ (int)((double)loops /
+ ((double)result_usec / (double)1000000)));
+ break;
+
+ case BENCH_FORMAT_SIMPLE:
+ printf("%lu.%03lu\n",
+ diff.tv_sec,
+ (unsigned long) (diff.tv_usec / 1000));
+ break;
+
+ default:
+ /* reaching here is something disaster */
+ fprintf(stderr, "Unknown format:%d\n", bench_format);
+ exit(1);
+ break;
+ }
+
+ return 0;
+}
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index cad31b1d3438..4f176039fc8f 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -11,6 +11,7 @@
* Available benchmark collection list:
*
* sched ... scheduler and IPC performance
+ * syscall ... System call performance
* mem ... memory access performance
* numa ... NUMA scheduling and MM performance
* futex ... Futex performance
@@ -49,9 +50,16 @@ static struct bench sched_benchmarks[] = {
{ NULL, NULL, NULL }
};
+static struct bench syscall_benchmarks[] = {
+ { "basic", "Benchmark for basic getppid(2) calls", bench_syscall_basic },
+ { "all", "Run all syscall benchmarks", NULL },
+ { NULL, NULL, NULL },
+};
+
static struct bench mem_benchmarks[] = {
{ "memcpy", "Benchmark for memcpy() functions", bench_mem_memcpy },
{ "memset", "Benchmark for memset() functions", bench_mem_memset },
+ { "find_bit", "Benchmark for find_bit() functions", bench_mem_find_bit },
{ "all", "Run all memory access benchmarks", NULL },
{ NULL, NULL, NULL }
};
@@ -90,6 +98,7 @@ struct collection {
static struct collection collections[] = {
{ "sched", "Scheduler and IPC benchmarks", sched_benchmarks },
+ { "syscall", "System call benchmarks", syscall_benchmarks },
{ "mem", "Memory access benchmarks", mem_benchmarks },
#ifdef HAVE_LIBNUMA_SUPPORT
{ "numa", "NUMA scheduling and MM benchmarks", numa_benchmarks },
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index d617d5682c68..5938b100eaf4 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -2582,7 +2582,7 @@ parse_callchain_opt(const struct option *opt, const char *arg, int unset)
static int setup_callchain(struct evlist *evlist)
{
- u64 sample_type = perf_evlist__combined_sample_type(evlist);
+ u64 sample_type = evlist__combined_sample_type(evlist);
enum perf_call_graph_mode mode = CALLCHAIN_NONE;
if ((sample_type & PERF_SAMPLE_REGS_USER) &&
diff --git a/tools/perf/builtin-data.c b/tools/perf/builtin-data.c
index ca2fb44874e4..8d23b8d6ee8e 100644
--- a/tools/perf/builtin-data.c
+++ b/tools/perf/builtin-data.c
@@ -65,6 +65,7 @@ static int cmd_data_convert(int argc, const char **argv)
OPT_STRING('i', "input", &input_name, "file", "input file name"),
#ifdef HAVE_LIBBABELTRACE_SUPPORT
OPT_STRING(0, "to-ctf", &to_ctf, NULL, "Convert to CTF format"),
+ OPT_BOOLEAN(0, "tod", &opts.tod, "Convert time to wall clock time"),
#endif
OPT_BOOLEAN('f', "force", &opts.force, "don't complain, do it"),
OPT_BOOLEAN(0, "all", &opts.all, "Convert all events"),
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 4a6de4b03ac0..6d2f410d773a 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -292,7 +292,7 @@ static int perf_event__jit_repipe_mmap(struct perf_tool *tool,
* if jit marker, then inject jit mmaps and generate ELF images
*/
ret = jit_process(inject->session, &inject->output, machine,
- event->mmap.filename, sample->pid, &n);
+ event->mmap.filename, event->mmap.pid, &n);
if (ret < 0)
return ret;
if (ret) {
@@ -330,7 +330,7 @@ static int perf_event__jit_repipe_mmap2(struct perf_tool *tool,
* if jit marker, then inject jit mmaps and generate ELF images
*/
ret = jit_process(inject->session, &inject->output, machine,
- event->mmap2.filename, sample->pid, &n);
+ event->mmap2.filename, event->mmap2.pid, &n);
if (ret < 0)
return ret;
if (ret) {
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 38a5ab683ebc..a50dae2c4ae9 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -1933,7 +1933,8 @@ int cmd_kmem(int argc, const char **argv)
return ret;
argc = parse_options_subcommand(argc, argv, kmem_options,
- kmem_subcommands, kmem_usage, 0);
+ kmem_subcommands, kmem_usage,
+ PARSE_OPT_STOP_AT_NON_OPTION);
if (!argc)
usage_with_options(kmem_usage, kmem_options);
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 95a77058023e..460945ded6dd 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -1319,7 +1319,7 @@ static struct evlist *kvm_live_event_list(void)
*name = '\0';
name++;
- if (perf_evlist__add_newtp(evlist, sys, name, NULL)) {
+ if (evlist__add_newtp(evlist, sys, name, NULL)) {
pr_err("Failed to add %s tracepoint to the list\n", *events_tp);
free(tp);
goto out;
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index a37e7910e9e9..f91352f847c0 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -46,6 +46,7 @@
#include "util/bpf-event.h"
#include "util/util.h"
#include "util/pfm.h"
+#include "util/clockid.h"
#include "asm/bug.h"
#include "perf.h"
@@ -70,6 +71,7 @@
#include <linux/time64.h>
#include <linux/zalloc.h>
#include <linux/bitmap.h>
+#include <sys/time.h>
struct switch_output {
bool enabled;
@@ -765,6 +767,43 @@ static int record__auxtrace_init(struct record *rec __maybe_unused)
#endif
+static int record__config_text_poke(struct evlist *evlist)
+{
+ struct evsel *evsel;
+ int err;
+
+ /* Nothing to do if text poke is already configured */
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->core.attr.text_poke)
+ return 0;
+ }
+
+ err = parse_events(evlist, "dummy:u", NULL);
+ if (err)
+ return err;
+
+ evsel = evlist__last(evlist);
+
+ evsel->core.attr.freq = 0;
+ evsel->core.attr.sample_period = 1;
+ evsel->core.attr.text_poke = 1;
+ evsel->core.attr.ksymbol = 1;
+
+ evsel->core.system_wide = true;
+ evsel->no_aux_samples = true;
+ evsel->immediate = true;
+
+ /* Text poke must be collected on all CPUs */
+ perf_cpu_map__put(evsel->core.own_cpus);
+ evsel->core.own_cpus = perf_cpu_map__new(NULL);
+ perf_cpu_map__put(evsel->core.cpus);
+ evsel->core.cpus = perf_cpu_map__get(evsel->core.own_cpus);
+
+ evsel__set_sample_bit(evsel, TIME);
+
+ return 0;
+}
+
static bool record__kcore_readable(struct machine *machine)
{
char kcore[PATH_MAX];
@@ -855,7 +894,7 @@ static int record__open(struct record *rec)
pos = perf_evlist__get_tracking_event(evlist);
if (!evsel__is_dummy_event(pos)) {
/* Set up dummy event. */
- if (perf_evlist__add_dummy(evlist))
+ if (evlist__add_dummy(evlist))
return -ENOMEM;
pos = evlist__last(evlist);
perf_evlist__set_tracking_event(evlist, pos);
@@ -1166,6 +1205,9 @@ static void record__init_features(struct record *rec)
if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
perf_header__clear_feat(&session->header, HEADER_CLOCKID);
+ if (!rec->opts.use_clockid)
+ perf_header__clear_feat(&session->header, HEADER_CLOCK_DATA);
+
perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
if (!record__comp_enabled(rec))
perf_header__clear_feat(&session->header, HEADER_COMPRESSED);
@@ -1489,7 +1531,7 @@ static int record__setup_sb_evlist(struct record *rec)
evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec);
rec->thread_id = pthread_self();
}
-
+#ifdef HAVE_LIBBPF_SUPPORT
if (!opts->no_bpf_event) {
if (rec->sb_evlist == NULL) {
rec->sb_evlist = evlist__new();
@@ -1505,7 +1547,7 @@ static int record__setup_sb_evlist(struct record *rec)
return -1;
}
}
-
+#endif
if (perf_evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) {
pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
opts->no_bpf_event = true;
@@ -1514,6 +1556,43 @@ static int record__setup_sb_evlist(struct record *rec)
return 0;
}
+static int record__init_clock(struct record *rec)
+{
+ struct perf_session *session = rec->session;
+ struct timespec ref_clockid;
+ struct timeval ref_tod;
+ u64 ref;
+
+ if (!rec->opts.use_clockid)
+ return 0;
+
+ if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
+ session->header.env.clock.clockid_res_ns = rec->opts.clockid_res_ns;
+
+ session->header.env.clock.clockid = rec->opts.clockid;
+
+ if (gettimeofday(&ref_tod, NULL) != 0) {
+ pr_err("gettimeofday failed, cannot set reference time.\n");
+ return -1;
+ }
+
+ if (clock_gettime(rec->opts.clockid, &ref_clockid)) {
+ pr_err("clock_gettime failed, cannot set reference time.\n");
+ return -1;
+ }
+
+ ref = (u64) ref_tod.tv_sec * NSEC_PER_SEC +
+ (u64) ref_tod.tv_usec * NSEC_PER_USEC;
+
+ session->header.env.clock.tod_ns = ref;
+
+ ref = (u64) ref_clockid.tv_sec * NSEC_PER_SEC +
+ (u64) ref_clockid.tv_nsec;
+
+ session->header.env.clock.clockid_ns = ref;
+ return 0;
+}
+
static int __cmd_record(struct record *rec, int argc, const char **argv)
{
int err;
@@ -1527,6 +1606,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
bool disabled = false, draining = false;
int fd;
float ratio = 0;
+ enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED;
atexit(record__sig_exit);
signal(SIGCHLD, sig_handler);
@@ -1593,10 +1673,10 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
return -1;
}
- record__init_features(rec);
+ if (record__init_clock(rec))
+ return -1;
- if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
- session->header.env.clockid_res_ns = rec->opts.clockid_res_ns;
+ record__init_features(rec);
if (forks) {
err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
@@ -1646,7 +1726,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
* Normally perf_session__new would do this, but it doesn't have the
* evlist.
*/
- if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
+ if (rec->tool.ordered_events && !evlist__sample_id_all(rec->evlist)) {
pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
rec->tool.ordered_events = false;
}
@@ -1748,9 +1828,16 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
perf_evlist__start_workload(rec->evlist);
}
+ if (evlist__initialize_ctlfd(rec->evlist, opts->ctl_fd, opts->ctl_fd_ack))
+ goto out_child;
+
if (opts->initial_delay) {
- usleep(opts->initial_delay * USEC_PER_MSEC);
- evlist__enable(rec->evlist);
+ pr_info(EVLIST_DISABLED_MSG);
+ if (opts->initial_delay > 0) {
+ usleep(opts->initial_delay * USEC_PER_MSEC);
+ evlist__enable(rec->evlist);
+ pr_info(EVLIST_ENABLED_MSG);
+ }
}
trigger_ready(&auxtrace_snapshot_trigger);
@@ -1842,6 +1929,21 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
draining = true;
}
+ if (evlist__ctlfd_process(rec->evlist, &cmd) > 0) {
+ switch (cmd) {
+ case EVLIST_CTL_CMD_ENABLE:
+ pr_info(EVLIST_ENABLED_MSG);
+ break;
+ case EVLIST_CTL_CMD_DISABLE:
+ pr_info(EVLIST_DISABLED_MSG);
+ break;
+ case EVLIST_CTL_CMD_ACK:
+ case EVLIST_CTL_CMD_UNSUPPORTED:
+ default:
+ break;
+ }
+ }
+
/*
* When perf is starting the traced process, at the end events
* die with the process and we wait for that. Thus no need to
@@ -1875,6 +1977,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
record__synthesize_workload(rec, true);
out_child:
+ evlist__finalize_ctlfd(rec->evlist);
record__mmap_read_all(rec, true);
record__aio_mmap_read_sync(rec);
@@ -2041,103 +2144,6 @@ static int perf_record_config(const char *var, const char *value, void *cb)
return 0;
}
-struct clockid_map {
- const char *name;
- int clockid;
-};
-
-#define CLOCKID_MAP(n, c) \
- { .name = n, .clockid = (c), }
-
-#define CLOCKID_END { .name = NULL, }
-
-
-/*
- * Add the missing ones, we need to build on many distros...
- */
-#ifndef CLOCK_MONOTONIC_RAW
-#define CLOCK_MONOTONIC_RAW 4
-#endif
-#ifndef CLOCK_BOOTTIME
-#define CLOCK_BOOTTIME 7
-#endif
-#ifndef CLOCK_TAI
-#define CLOCK_TAI 11
-#endif
-
-static const struct clockid_map clockids[] = {
- /* available for all events, NMI safe */
- CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
- CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
-
- /* available for some events */
- CLOCKID_MAP("realtime", CLOCK_REALTIME),
- CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
- CLOCKID_MAP("tai", CLOCK_TAI),
-
- /* available for the lazy */
- CLOCKID_MAP("mono", CLOCK_MONOTONIC),
- CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
- CLOCKID_MAP("real", CLOCK_REALTIME),
- CLOCKID_MAP("boot", CLOCK_BOOTTIME),
-
- CLOCKID_END,
-};
-
-static int get_clockid_res(clockid_t clk_id, u64 *res_ns)
-{
- struct timespec res;
-
- *res_ns = 0;
- if (!clock_getres(clk_id, &res))
- *res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC;
- else
- pr_warning("WARNING: Failed to determine specified clock resolution.\n");
-
- return 0;
-}
-
-static int parse_clockid(const struct option *opt, const char *str, int unset)
-{
- struct record_opts *opts = (struct record_opts *)opt->value;
- const struct clockid_map *cm;
- const char *ostr = str;
-
- if (unset) {
- opts->use_clockid = 0;
- return 0;
- }
-
- /* no arg passed */
- if (!str)
- return 0;
-
- /* no setting it twice */
- if (opts->use_clockid)
- return -1;
-
- opts->use_clockid = true;
-
- /* if its a number, we're done */
- if (sscanf(str, "%d", &opts->clockid) == 1)
- return get_clockid_res(opts->clockid, &opts->clockid_res_ns);
-
- /* allow a "CLOCK_" prefix to the name */
- if (!strncasecmp(str, "CLOCK_", 6))
- str += 6;
-
- for (cm = clockids; cm->name; cm++) {
- if (!strcasecmp(str, cm->name)) {
- opts->clockid = cm->clockid;
- return get_clockid_res(opts->clockid,
- &opts->clockid_res_ns);
- }
- }
-
- opts->use_clockid = false;
- ui__warning("unknown clockid %s, check man page\n", ostr);
- return -1;
-}
static int record__parse_affinity(const struct option *opt, const char *str, int unset)
{
@@ -2224,6 +2230,33 @@ out_free:
return ret;
}
+static int parse_control_option(const struct option *opt,
+ const char *str,
+ int unset __maybe_unused)
+{
+ char *comma = NULL, *endptr = NULL;
+ struct record_opts *config = (struct record_opts *)opt->value;
+
+ if (strncmp(str, "fd:", 3))
+ return -EINVAL;
+
+ config->ctl_fd = strtoul(&str[3], &endptr, 0);
+ if (endptr == &str[3])
+ return -EINVAL;
+
+ comma = strchr(str, ',');
+ if (comma) {
+ if (endptr != comma)
+ return -EINVAL;
+
+ config->ctl_fd_ack = strtoul(comma + 1, &endptr, 0);
+ if (endptr == comma + 1 || *endptr != '\0')
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static void switch_output_size_warn(struct record *rec)
{
u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages);
@@ -2360,6 +2393,8 @@ static struct record record = {
},
.mmap_flush = MMAP_FLUSH_DEFAULT,
.nr_threads_synthesize = 1,
+ .ctl_fd = -1,
+ .ctl_fd_ack = -1,
},
.tool = {
.sample = process_sample_event,
@@ -2462,8 +2497,8 @@ static struct option __record_options[] = {
OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
"monitor event in cgroup name only",
parse_cgroups),
- OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
- "ms to wait before starting measurement after program start"),
+ OPT_INTEGER('D', "delay", &record.opts.initial_delay,
+ "ms to wait before starting measurement after program start (-1: start with events disabled)"),
OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"),
OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
"user to profile"),
@@ -2561,6 +2596,10 @@ static struct option __record_options[] = {
"libpfm4 event selector. use 'perf list' to list available events",
parse_libpfm_events_option),
#endif
+ OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd]",
+ "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events).\n"
+ "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.",
+ parse_control_option),
OPT_END()
};
@@ -2722,7 +2761,7 @@ int cmd_record(int argc, const char **argv)
record.opts.tail_synthesize = true;
if (rec->evlist->core.nr_entries == 0 &&
- __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) {
+ __evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) {
pr_err("Not enough memory for event selector list\n");
goto out;
}
@@ -2766,6 +2805,14 @@ int cmd_record(int argc, const char **argv)
if (rec->opts.full_auxtrace)
rec->buildid_all = true;
+ if (rec->opts.text_poke) {
+ err = record__config_text_poke(rec->evlist);
+ if (err) {
+ pr_err("record__config_text_poke failed, error %d\n", err);
+ goto out;
+ }
+ }
+
if (record_opts__config(&rec->opts)) {
err = -EINVAL;
goto out;
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 5f1d2a878fad..ece1cddfcd7c 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -338,7 +338,7 @@ static int process_read_event(struct perf_tool *tool,
static int report__setup_sample_type(struct report *rep)
{
struct perf_session *session = rep->session;
- u64 sample_type = perf_evlist__combined_sample_type(session->evlist);
+ u64 sample_type = evlist__combined_sample_type(session->evlist);
bool is_pipe = perf_data__is_pipe(session->data);
if (session->itrace_synth_opts->callchain ||
@@ -410,8 +410,7 @@ static int report__setup_sample_type(struct report *rep)
}
/* ??? handle more cases than just ANY? */
- if (!(perf_evlist__combined_branch_type(session->evlist) &
- PERF_SAMPLE_BRANCH_ANY))
+ if (!(evlist__combined_branch_type(session->evlist) & PERF_SAMPLE_BRANCH_ANY))
rep->nonany_branch_mode = true;
#if !defined(HAVE_LIBUNWIND_SUPPORT) && !defined(HAVE_DWARF_SUPPORT)
@@ -1093,7 +1092,7 @@ static int process_attr(struct perf_tool *tool __maybe_unused,
* Check if we need to enable callchains based
* on events sample_type.
*/
- sample_type = perf_evlist__combined_sample_type(*pevlist);
+ sample_type = evlist__combined_sample_type(*pevlist);
callchain_param_setup(sample_type);
return 0;
}
@@ -1389,7 +1388,7 @@ repeat:
has_br_stack = perf_header__has_feat(&session->header,
HEADER_BRANCH_STACK);
- if (perf_evlist__combined_sample_type(session->evlist) & PERF_SAMPLE_STACK_USER)
+ if (evlist__combined_sample_type(session->evlist) & PERF_SAMPLE_STACK_USER)
has_br_stack = false;
setup_forced_leader(&report, session->evlist);
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 447457786362..484ce6067d23 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -82,38 +82,64 @@ static bool native_arch;
unsigned int scripting_max_stack = PERF_MAX_STACK_DEPTH;
enum perf_output_field {
- PERF_OUTPUT_COMM = 1U << 0,
- PERF_OUTPUT_TID = 1U << 1,
- PERF_OUTPUT_PID = 1U << 2,
- PERF_OUTPUT_TIME = 1U << 3,
- PERF_OUTPUT_CPU = 1U << 4,
- PERF_OUTPUT_EVNAME = 1U << 5,
- PERF_OUTPUT_TRACE = 1U << 6,
- PERF_OUTPUT_IP = 1U << 7,
- PERF_OUTPUT_SYM = 1U << 8,
- PERF_OUTPUT_DSO = 1U << 9,
- PERF_OUTPUT_ADDR = 1U << 10,
- PERF_OUTPUT_SYMOFFSET = 1U << 11,
- PERF_OUTPUT_SRCLINE = 1U << 12,
- PERF_OUTPUT_PERIOD = 1U << 13,
- PERF_OUTPUT_IREGS = 1U << 14,
- PERF_OUTPUT_BRSTACK = 1U << 15,
- PERF_OUTPUT_BRSTACKSYM = 1U << 16,
- PERF_OUTPUT_DATA_SRC = 1U << 17,
- PERF_OUTPUT_WEIGHT = 1U << 18,
- PERF_OUTPUT_BPF_OUTPUT = 1U << 19,
- PERF_OUTPUT_CALLINDENT = 1U << 20,
- PERF_OUTPUT_INSN = 1U << 21,
- PERF_OUTPUT_INSNLEN = 1U << 22,
- PERF_OUTPUT_BRSTACKINSN = 1U << 23,
- PERF_OUTPUT_BRSTACKOFF = 1U << 24,
- PERF_OUTPUT_SYNTH = 1U << 25,
- PERF_OUTPUT_PHYS_ADDR = 1U << 26,
- PERF_OUTPUT_UREGS = 1U << 27,
- PERF_OUTPUT_METRIC = 1U << 28,
- PERF_OUTPUT_MISC = 1U << 29,
- PERF_OUTPUT_SRCCODE = 1U << 30,
- PERF_OUTPUT_IPC = 1U << 31,
+ PERF_OUTPUT_COMM = 1ULL << 0,
+ PERF_OUTPUT_TID = 1ULL << 1,
+ PERF_OUTPUT_PID = 1ULL << 2,
+ PERF_OUTPUT_TIME = 1ULL << 3,
+ PERF_OUTPUT_CPU = 1ULL << 4,
+ PERF_OUTPUT_EVNAME = 1ULL << 5,
+ PERF_OUTPUT_TRACE = 1ULL << 6,
+ PERF_OUTPUT_IP = 1ULL << 7,
+ PERF_OUTPUT_SYM = 1ULL << 8,
+ PERF_OUTPUT_DSO = 1ULL << 9,
+ PERF_OUTPUT_ADDR = 1ULL << 10,
+ PERF_OUTPUT_SYMOFFSET = 1ULL << 11,
+ PERF_OUTPUT_SRCLINE = 1ULL << 12,
+ PERF_OUTPUT_PERIOD = 1ULL << 13,
+ PERF_OUTPUT_IREGS = 1ULL << 14,
+ PERF_OUTPUT_BRSTACK = 1ULL << 15,
+ PERF_OUTPUT_BRSTACKSYM = 1ULL << 16,
+ PERF_OUTPUT_DATA_SRC = 1ULL << 17,
+ PERF_OUTPUT_WEIGHT = 1ULL << 18,
+ PERF_OUTPUT_BPF_OUTPUT = 1ULL << 19,
+ PERF_OUTPUT_CALLINDENT = 1ULL << 20,
+ PERF_OUTPUT_INSN = 1ULL << 21,
+ PERF_OUTPUT_INSNLEN = 1ULL << 22,
+ PERF_OUTPUT_BRSTACKINSN = 1ULL << 23,
+ PERF_OUTPUT_BRSTACKOFF = 1ULL << 24,
+ PERF_OUTPUT_SYNTH = 1ULL << 25,
+ PERF_OUTPUT_PHYS_ADDR = 1ULL << 26,
+ PERF_OUTPUT_UREGS = 1ULL << 27,
+ PERF_OUTPUT_METRIC = 1ULL << 28,
+ PERF_OUTPUT_MISC = 1ULL << 29,
+ PERF_OUTPUT_SRCCODE = 1ULL << 30,
+ PERF_OUTPUT_IPC = 1ULL << 31,
+ PERF_OUTPUT_TOD = 1ULL << 32,
+};
+
+struct perf_script {
+ struct perf_tool tool;
+ struct perf_session *session;
+ bool show_task_events;
+ bool show_mmap_events;
+ bool show_switch_events;
+ bool show_namespace_events;
+ bool show_lost_events;
+ bool show_round_events;
+ bool show_bpf_events;
+ bool show_cgroup_events;
+ bool show_text_poke_events;
+ bool allocated;
+ bool per_event_dump;
+ bool stitch_lbr;
+ struct evswitch evswitch;
+ struct perf_cpu_map *cpus;
+ struct perf_thread_map *threads;
+ int name_width;
+ const char *time_str;
+ struct perf_time_interval *ptime_range;
+ int range_size;
+ int range_num;
};
struct output_option {
@@ -152,6 +178,7 @@ struct output_option {
{.str = "misc", .field = PERF_OUTPUT_MISC},
{.str = "srccode", .field = PERF_OUTPUT_SRCCODE},
{.str = "ipc", .field = PERF_OUTPUT_IPC},
+ {.str = "tod", .field = PERF_OUTPUT_TOD},
};
enum {
@@ -388,7 +415,7 @@ static int evsel__check_stype(struct evsel *evsel, u64 sample_type, const char *
return evsel__do_check_stype(evsel, sample_type, sample_msg, field, false);
}
-static int perf_evsel__check_attr(struct evsel *evsel, struct perf_session *session)
+static int evsel__check_attr(struct evsel *evsel, struct perf_session *session)
{
struct perf_event_attr *attr = &evsel->core.attr;
bool allow_user_set;
@@ -443,8 +470,7 @@ static int perf_evsel__check_attr(struct evsel *evsel, struct perf_session *sess
return -EINVAL;
}
if (PRINT_FIELD(BRSTACKINSN) && !allow_user_set &&
- !(perf_evlist__combined_branch_type(session->evlist) &
- PERF_SAMPLE_BRANCH_ANY)) {
+ !(evlist__combined_branch_type(session->evlist) & PERF_SAMPLE_BRANCH_ANY)) {
pr_err("Display of branch stack assembler requested, but non all-branch filter set\n"
"Hint: run 'perf record -b ...'\n");
return -EINVAL;
@@ -503,6 +529,7 @@ static void set_print_ip_opts(struct perf_event_attr *attr)
*/
static int perf_session__check_output_opt(struct perf_session *session)
{
+ bool tod = false;
unsigned int j;
struct evsel *evsel;
@@ -522,13 +549,14 @@ static int perf_session__check_output_opt(struct perf_session *session)
}
if (evsel && output[j].fields &&
- perf_evsel__check_attr(evsel, session))
+ evsel__check_attr(evsel, session))
return -1;
if (evsel == NULL)
continue;
set_print_ip_opts(&evsel->core.attr);
+ tod |= output[j].fields & PERF_OUTPUT_TOD;
}
if (!no_callchain) {
@@ -569,13 +597,17 @@ static int perf_session__check_output_opt(struct perf_session *session)
}
}
+ if (tod && !session->header.env.clock.enabled) {
+ pr_err("Can't provide 'tod' time, missing clock data. "
+ "Please record with -k/--clockid option.\n");
+ return -1;
+ }
out:
return 0;
}
static int perf_sample__fprintf_regs(struct regs_dump *regs, uint64_t mask,
- FILE *fp
-)
+ FILE *fp)
{
unsigned i = 0, r;
int printed = 0;
@@ -593,6 +625,56 @@ static int perf_sample__fprintf_regs(struct regs_dump *regs, uint64_t mask,
return printed;
}
+#define DEFAULT_TOD_FMT "%F %H:%M:%S"
+
+static char*
+tod_scnprintf(struct perf_script *script, char *buf, int buflen,
+ u64 timestamp)
+{
+ u64 tod_ns, clockid_ns;
+ struct perf_env *env;
+ unsigned long nsec;
+ struct tm ltime;
+ char date[64];
+ time_t sec;
+
+ buf[0] = '\0';
+ if (buflen < 64 || !script)
+ return buf;
+
+ env = &script->session->header.env;
+ if (!env->clock.enabled) {
+ scnprintf(buf, buflen, "disabled");
+ return buf;
+ }
+
+ clockid_ns = env->clock.clockid_ns;
+ tod_ns = env->clock.tod_ns;
+
+ if (timestamp > clockid_ns)
+ tod_ns += timestamp - clockid_ns;
+ else
+ tod_ns -= clockid_ns - timestamp;
+
+ sec = (time_t) (tod_ns / NSEC_PER_SEC);
+ nsec = tod_ns - sec * NSEC_PER_SEC;
+
+ if (localtime_r(&sec, &ltime) == NULL) {
+ scnprintf(buf, buflen, "failed");
+ } else {
+ strftime(date, sizeof(date), DEFAULT_TOD_FMT, &ltime);
+
+ if (symbol_conf.nanosecs) {
+ snprintf(buf, buflen, "%s.%09lu", date, nsec);
+ } else {
+ snprintf(buf, buflen, "%s.%06lu",
+ date, nsec / NSEC_PER_USEC);
+ }
+ }
+
+ return buf;
+}
+
static int perf_sample__fprintf_iregs(struct perf_sample *sample,
struct perf_event_attr *attr, FILE *fp)
{
@@ -607,7 +689,8 @@ static int perf_sample__fprintf_uregs(struct perf_sample *sample,
attr->sample_regs_user, fp);
}
-static int perf_sample__fprintf_start(struct perf_sample *sample,
+static int perf_sample__fprintf_start(struct perf_script *script,
+ struct perf_sample *sample,
struct thread *thread,
struct evsel *evsel,
u32 type, FILE *fp)
@@ -616,6 +699,7 @@ static int perf_sample__fprintf_start(struct perf_sample *sample,
unsigned long secs;
unsigned long long nsecs;
int printed = 0;
+ char tstr[128];
if (PRINT_FIELD(COMM)) {
if (latency_format)
@@ -684,6 +768,11 @@ static int perf_sample__fprintf_start(struct perf_sample *sample,
printed += ret;
}
+ if (PRINT_FIELD(TOD)) {
+ tod_scnprintf(script, tstr, sizeof(tstr), sample->time);
+ printed += fprintf(fp, "%s ", tstr);
+ }
+
if (PRINT_FIELD(TIME)) {
u64 t = sample->time;
if (reltime) {
@@ -1668,31 +1757,7 @@ static int perf_sample__fprintf_synth(struct perf_sample *sample,
return 0;
}
-struct perf_script {
- struct perf_tool tool;
- struct perf_session *session;
- bool show_task_events;
- bool show_mmap_events;
- bool show_switch_events;
- bool show_namespace_events;
- bool show_lost_events;
- bool show_round_events;
- bool show_bpf_events;
- bool show_cgroup_events;
- bool allocated;
- bool per_event_dump;
- bool stitch_lbr;
- struct evswitch evswitch;
- struct perf_cpu_map *cpus;
- struct perf_thread_map *threads;
- int name_width;
- const char *time_str;
- struct perf_time_interval *ptime_range;
- int range_size;
- int range_num;
-};
-
-static int perf_evlist__max_name_len(struct evlist *evlist)
+static int evlist__max_name_len(struct evlist *evlist)
{
struct evsel *evsel;
int max = 0;
@@ -1739,7 +1804,7 @@ static void script_print_metric(struct perf_stat_config *config __maybe_unused,
if (!fmt)
return;
- perf_sample__fprintf_start(mctx->sample, mctx->thread, mctx->evsel,
+ perf_sample__fprintf_start(NULL, mctx->sample, mctx->thread, mctx->evsel,
PERF_RECORD_SAMPLE, mctx->fp);
fputs("\tmetric: ", mctx->fp);
if (color)
@@ -1754,7 +1819,7 @@ static void script_new_line(struct perf_stat_config *config __maybe_unused,
{
struct metric_ctx *mctx = ctx;
- perf_sample__fprintf_start(mctx->sample, mctx->thread, mctx->evsel,
+ perf_sample__fprintf_start(NULL, mctx->sample, mctx->thread, mctx->evsel,
PERF_RECORD_SAMPLE, mctx->fp);
fputs("\tmetric: ", mctx->fp);
}
@@ -1865,7 +1930,7 @@ static void process_event(struct perf_script *script,
++es->samples;
- perf_sample__fprintf_start(sample, thread, evsel,
+ perf_sample__fprintf_start(script, sample, thread, evsel,
PERF_RECORD_SAMPLE, fp);
if (PRINT_FIELD(PERIOD))
@@ -1875,7 +1940,7 @@ static void process_event(struct perf_script *script,
const char *evname = evsel__name(evsel);
if (!script->name_width)
- script->name_width = perf_evlist__max_name_len(script->session->evlist);
+ script->name_width = evlist__max_name_len(script->session->evlist);
fprintf(fp, "%*s: ", script->name_width, evname ?: "[unknown]");
}
@@ -2120,7 +2185,7 @@ static int process_attr(struct perf_tool *tool, union perf_event *event,
}
if (evsel->core.attr.sample_type) {
- err = perf_evsel__check_attr(evsel, scr->session);
+ err = evsel__check_attr(evsel, scr->session);
if (err)
return err;
}
@@ -2129,7 +2194,7 @@ static int process_attr(struct perf_tool *tool, union perf_event *event,
* Check if we need to enable callchains based
* on events sample_type.
*/
- sample_type = perf_evlist__combined_sample_type(evlist);
+ sample_type = evlist__combined_sample_type(evlist);
callchain_param_setup(sample_type);
/* Enable fields for callchain entries */
@@ -2174,11 +2239,11 @@ static int print_event_with_time(struct perf_tool *tool,
thread = machine__findnew_thread(machine, pid, tid);
if (thread && evsel) {
- perf_sample__fprintf_start(sample, thread, evsel,
+ perf_sample__fprintf_start(script, sample, thread, evsel,
event->header.type, stdout);
}
- perf_event__fprintf(event, stdout);
+ perf_event__fprintf(event, machine, stdout);
thread__put(thread);
@@ -2313,7 +2378,7 @@ process_finished_round_event(struct perf_tool *tool __maybe_unused,
struct ordered_events *oe __maybe_unused)
{
- perf_event__fprintf(event, stdout);
+ perf_event__fprintf(event, NULL, stdout);
return 0;
}
@@ -2330,6 +2395,18 @@ process_bpf_events(struct perf_tool *tool __maybe_unused,
sample->tid);
}
+static int process_text_poke_events(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ if (perf_event__process_text_poke(tool, event, sample, machine) < 0)
+ return -1;
+
+ return print_event(tool, event, sample, machine, sample->pid,
+ sample->tid);
+}
+
static void sig_handler(int sig __maybe_unused)
{
session_done = 1;
@@ -2438,6 +2515,10 @@ static int __cmd_script(struct perf_script *script)
script->tool.ksymbol = process_bpf_events;
script->tool.bpf = process_bpf_events;
}
+ if (script->show_text_poke_events) {
+ script->tool.ksymbol = process_bpf_events;
+ script->tool.text_poke = process_text_poke_events;
+ }
if (perf_script__setup_per_event_dump(script)) {
pr_err("Couldn't create the per event dump files\n");
@@ -3171,7 +3252,7 @@ static int have_cmd(int argc, const char **argv)
static void script__setup_sample_type(struct perf_script *script)
{
struct perf_session *session = script->session;
- u64 sample_type = perf_evlist__combined_sample_type(session->evlist);
+ u64 sample_type = evlist__combined_sample_type(session->evlist);
if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) {
if ((sample_type & PERF_SAMPLE_REGS_USER) &&
@@ -3423,7 +3504,7 @@ int cmd_script(int argc, const char **argv)
"Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
"addr,symoff,srcline,period,iregs,uregs,brstack,"
"brstacksym,flags,bpf-output,brstackinsn,brstackoff,"
- "callindent,insn,insnlen,synth,phys_addr,metric,misc,ipc",
+ "callindent,insn,insnlen,synth,phys_addr,metric,misc,ipc,tod",
parse_output_fields),
OPT_BOOLEAN('a', "all-cpus", &system_wide,
"system-wide collection from all CPUs"),
@@ -3474,6 +3555,8 @@ int cmd_script(int argc, const char **argv)
"Show round events (if recorded)"),
OPT_BOOLEAN('\0', "show-bpf-events", &script.show_bpf_events,
"Show bpf related events (if recorded)"),
+ OPT_BOOLEAN('\0', "show-text-poke-events", &script.show_text_poke_events,
+ "Show text poke related events (if recorded)"),
OPT_BOOLEAN('\0', "per-event-dump", &script.per_event_dump,
"Dump trace output to files named by the monitored events"),
OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"),
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 9be020e0098a..483a28ef4ec4 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -188,6 +188,8 @@ static struct perf_stat_config stat_config = {
.metric_only_len = METRIC_ONLY_LEN,
.walltime_nsecs_stats = &walltime_nsecs_stats,
.big_num = true,
+ .ctl_fd = -1,
+ .ctl_fd_ack = -1
};
static bool cpus_map_matched(struct evsel *a, struct evsel *b)
@@ -475,18 +477,38 @@ static void process_interval(void)
print_counters(&rs, 0, NULL);
}
+static bool handle_interval(unsigned int interval, int *times)
+{
+ if (interval) {
+ process_interval();
+ if (interval_count && !(--(*times)))
+ return true;
+ }
+ return false;
+}
+
static void enable_counters(void)
{
- if (stat_config.initial_delay)
+ if (stat_config.initial_delay < 0) {
+ pr_info(EVLIST_DISABLED_MSG);
+ return;
+ }
+
+ if (stat_config.initial_delay > 0) {
+ pr_info(EVLIST_DISABLED_MSG);
usleep(stat_config.initial_delay * USEC_PER_MSEC);
+ }
/*
* We need to enable counters only if:
* - we don't have tracee (attaching to task or cpu)
* - we have initial delay configured
*/
- if (!target__none(&target) || stat_config.initial_delay)
+ if (!target__none(&target) || stat_config.initial_delay) {
evlist__enable(evsel_list);
+ if (stat_config.initial_delay > 0)
+ pr_info(EVLIST_ENABLED_MSG);
+ }
}
static void disable_counters(void)
@@ -540,6 +562,86 @@ static bool is_target_alive(struct target *_target,
return false;
}
+static void process_evlist(struct evlist *evlist, unsigned int interval)
+{
+ enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED;
+
+ if (evlist__ctlfd_process(evlist, &cmd) > 0) {
+ switch (cmd) {
+ case EVLIST_CTL_CMD_ENABLE:
+ pr_info(EVLIST_ENABLED_MSG);
+ if (interval)
+ process_interval();
+ break;
+ case EVLIST_CTL_CMD_DISABLE:
+ if (interval)
+ process_interval();
+ pr_info(EVLIST_DISABLED_MSG);
+ break;
+ case EVLIST_CTL_CMD_ACK:
+ case EVLIST_CTL_CMD_UNSUPPORTED:
+ default:
+ break;
+ }
+ }
+}
+
+static void compute_tts(struct timespec *time_start, struct timespec *time_stop,
+ int *time_to_sleep)
+{
+ int tts = *time_to_sleep;
+ struct timespec time_diff;
+
+ diff_timespec(&time_diff, time_stop, time_start);
+
+ tts -= time_diff.tv_sec * MSEC_PER_SEC +
+ time_diff.tv_nsec / NSEC_PER_MSEC;
+
+ if (tts < 0)
+ tts = 0;
+
+ *time_to_sleep = tts;
+}
+
+static int dispatch_events(bool forks, int timeout, int interval, int *times)
+{
+ int child_exited = 0, status = 0;
+ int time_to_sleep, sleep_time;
+ struct timespec time_start, time_stop;
+
+ if (interval)
+ sleep_time = interval;
+ else if (timeout)
+ sleep_time = timeout;
+ else
+ sleep_time = 1000;
+
+ time_to_sleep = sleep_time;
+
+ while (!done) {
+ if (forks)
+ child_exited = waitpid(child_pid, &status, WNOHANG);
+ else
+ child_exited = !is_target_alive(&target, evsel_list->core.threads) ? 1 : 0;
+
+ if (child_exited)
+ break;
+
+ clock_gettime(CLOCK_MONOTONIC, &time_start);
+ if (!(evlist__poll(evsel_list, time_to_sleep) > 0)) { /* poll timeout or EINTR */
+ if (timeout || handle_interval(interval, times))
+ break;
+ time_to_sleep = sleep_time;
+ } else { /* fd revent */
+ process_evlist(evsel_list, interval);
+ clock_gettime(CLOCK_MONOTONIC, &time_stop);
+ compute_tts(&time_start, &time_stop, &time_to_sleep);
+ }
+ }
+
+ return status;
+}
+
enum counter_recovery {
COUNTER_SKIP,
COUNTER_RETRY,
@@ -603,7 +705,6 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
char msg[BUFSIZ];
unsigned long long t0, t1;
struct evsel *counter;
- struct timespec ts;
size_t l;
int status = 0;
const bool forks = (argc > 0);
@@ -612,17 +713,6 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
int i, cpu;
bool second_pass = false;
- if (interval) {
- ts.tv_sec = interval / USEC_PER_MSEC;
- ts.tv_nsec = (interval % USEC_PER_MSEC) * NSEC_PER_MSEC;
- } else if (timeout) {
- ts.tv_sec = timeout / USEC_PER_MSEC;
- ts.tv_nsec = (timeout % USEC_PER_MSEC) * NSEC_PER_MSEC;
- } else {
- ts.tv_sec = 1;
- ts.tv_nsec = 0;
- }
-
if (forks) {
if (perf_evlist__prepare_workload(evsel_list, &target, argv, is_pipe,
workload_exec_failed_signal) < 0) {
@@ -779,16 +869,8 @@ try_again_reset:
perf_evlist__start_workload(evsel_list);
enable_counters();
- if (interval || timeout) {
- while (!waitpid(child_pid, &status, WNOHANG)) {
- nanosleep(&ts, NULL);
- if (timeout)
- break;
- process_interval();
- if (interval_count && !(--times))
- break;
- }
- }
+ if (interval || timeout || evlist__ctlfd_initialized(evsel_list))
+ status = dispatch_events(forks, timeout, interval, &times);
if (child_pid != -1) {
if (timeout)
kill(child_pid, SIGTERM);
@@ -805,18 +887,7 @@ try_again_reset:
psignal(WTERMSIG(status), argv[0]);
} else {
enable_counters();
- while (!done) {
- nanosleep(&ts, NULL);
- if (!is_target_alive(&target, evsel_list->core.threads))
- break;
- if (timeout)
- break;
- if (interval) {
- process_interval();
- if (interval_count && !(--times))
- break;
- }
- }
+ status = dispatch_events(forks, timeout, interval, &times);
}
disable_counters();
@@ -970,6 +1041,33 @@ static int parse_metric_groups(const struct option *opt,
&stat_config.metric_events);
}
+static int parse_control_option(const struct option *opt,
+ const char *str,
+ int unset __maybe_unused)
+{
+ char *comma = NULL, *endptr = NULL;
+ struct perf_stat_config *config = (struct perf_stat_config *)opt->value;
+
+ if (strncmp(str, "fd:", 3))
+ return -EINVAL;
+
+ config->ctl_fd = strtoul(&str[3], &endptr, 0);
+ if (endptr == &str[3])
+ return -EINVAL;
+
+ comma = strchr(str, ',');
+ if (comma) {
+ if (endptr != comma)
+ return -EINVAL;
+
+ config->ctl_fd_ack = strtoul(comma + 1, &endptr, 0);
+ if (endptr == comma + 1 || *endptr != '\0')
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static struct option stat_options[] = {
OPT_BOOLEAN('T', "transaction", &transaction_run,
"hardware transaction statistics"),
@@ -1041,8 +1139,8 @@ static struct option stat_options[] = {
"aggregate counts per thread", AGGR_THREAD),
OPT_SET_UINT(0, "per-node", &stat_config.aggr_mode,
"aggregate counts per numa node", AGGR_NODE),
- OPT_UINTEGER('D', "delay", &stat_config.initial_delay,
- "ms to wait before starting measurement after program start"),
+ OPT_INTEGER('D', "delay", &stat_config.initial_delay,
+ "ms to wait before starting measurement after program start (-1: start with events disabled)"),
OPT_CALLBACK_NOOPT(0, "metric-only", &stat_config.metric_only, NULL,
"Only print computed metrics. No raw values", enable_metric_only),
OPT_BOOLEAN(0, "metric-no-group", &stat_config.metric_no_group,
@@ -1071,6 +1169,10 @@ static struct option stat_options[] = {
"libpfm4 event selector. use 'perf list' to list available events",
parse_libpfm_events_option),
#endif
+ OPT_CALLBACK(0, "control", &stat_config, "fd:ctl-fd[,ack-fd]",
+ "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events).\n"
+ "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.",
+ parse_control_option),
OPT_END()
};
@@ -1679,19 +1781,17 @@ static int add_default_attributes(void)
if (target__has_cpu(&target))
default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK;
- if (perf_evlist__add_default_attrs(evsel_list, default_attrs0) < 0)
+ if (evlist__add_default_attrs(evsel_list, default_attrs0) < 0)
return -1;
if (pmu_have_event("cpu", "stalled-cycles-frontend")) {
- if (perf_evlist__add_default_attrs(evsel_list,
- frontend_attrs) < 0)
+ if (evlist__add_default_attrs(evsel_list, frontend_attrs) < 0)
return -1;
}
if (pmu_have_event("cpu", "stalled-cycles-backend")) {
- if (perf_evlist__add_default_attrs(evsel_list,
- backend_attrs) < 0)
+ if (evlist__add_default_attrs(evsel_list, backend_attrs) < 0)
return -1;
}
- if (perf_evlist__add_default_attrs(evsel_list, default_attrs1) < 0)
+ if (evlist__add_default_attrs(evsel_list, default_attrs1) < 0)
return -1;
}
@@ -1701,21 +1801,21 @@ static int add_default_attributes(void)
return 0;
/* Append detailed run extra attributes: */
- if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0)
+ if (evlist__add_default_attrs(evsel_list, detailed_attrs) < 0)
return -1;
if (detailed_run < 2)
return 0;
/* Append very detailed run extra attributes: */
- if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0)
+ if (evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0)
return -1;
if (detailed_run < 3)
return 0;
/* Append very, very detailed run extra attributes: */
- return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs);
+ return evlist__add_default_attrs(evsel_list, very_very_detailed_attrs);
}
static const char * const stat_record_usage[] = {
@@ -2242,6 +2342,9 @@ int cmd_stat(int argc, const char **argv)
signal(SIGALRM, skip_signal);
signal(SIGABRT, skip_signal);
+ if (evlist__initialize_ctlfd(evsel_list, stat_config.ctl_fd, stat_config.ctl_fd_ack))
+ goto out;
+
status = 0;
for (run_idx = 0; forever || run_idx < stat_config.run_count; run_idx++) {
if (stat_config.run_count != 1 && verbose > 0)
@@ -2261,6 +2364,8 @@ int cmd_stat(int argc, const char **argv)
if (!forever && status != -1 && (!interval || stat_config.summary))
print_counters(NULL, argc, argv);
+ evlist__finalize_ctlfd(evsel_list);
+
if (STAT_RECORD) {
/*
* We synthesize the kernel mmap record just so that older tools
@@ -2307,6 +2412,7 @@ out:
evlist__delete(evsel_list);
+ metricgroup__rblist_exit(&stat_config.metric_events);
runtime_stat_delete(&stat_config);
return status;
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 13889d73f8dd..994c230027bb 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1627,7 +1627,7 @@ int cmd_top(int argc, const char **argv)
goto out_delete_evlist;
if (!top.evlist->core.nr_entries &&
- perf_evlist__add_default(top.evlist) < 0) {
+ evlist__add_default(top.evlist) < 0) {
pr_err("Not enough memory for event selector list\n");
goto out_delete_evlist;
}
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 4cbb64edc998..bea461b6f937 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -3917,8 +3917,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
}
if (trace->sched &&
- perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
- trace__sched_stat_runtime))
+ evlist__add_newtp(evlist, "sched", "sched_stat_runtime", trace__sched_stat_runtime))
goto out_error_sched_stat_runtime;
/*
* If a global cgroup was set, apply it to all the events without an
@@ -4150,11 +4149,11 @@ out_error_raw_syscalls:
goto out_error;
out_error_mmap:
- perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
+ evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
goto out_error;
out_error_open:
- perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
+ evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
out_error:
fprintf(trace->output, "%s\n", errbuf);
@@ -4813,7 +4812,7 @@ int cmd_trace(int argc, const char **argv)
"per thread proc mmap processing timeout in ms"),
OPT_CALLBACK('G', "cgroup", &trace, "name", "monitor event in cgroup name only",
trace__parse_cgroups),
- OPT_UINTEGER('D', "delay", &trace.opts.initial_delay,
+ OPT_INTEGER('D', "delay", &trace.opts.initial_delay,
"ms to wait before starting measurement after program "
"start"),
OPTS_EVSWITCH(&trace.evswitch),
diff --git a/tools/perf/pmu-events/arch/powerpc/power9/nest_metrics.json b/tools/perf/pmu-events/arch/powerpc/power9/nest_metrics.json
index c121e526442a..8383a37647ad 100644
--- a/tools/perf/pmu-events/arch/powerpc/power9/nest_metrics.json
+++ b/tools/perf/pmu-events/arch/powerpc/power9/nest_metrics.json
@@ -15,5 +15,40 @@
"MetricExpr": "(hv_24x7@PM_PB_CYC\\,chip\\=?@ )",
"MetricName": "PowerBUS_Frequency",
"ScaleUnit": "2.5e-7GHz"
+ },
+ {
+ "MetricExpr" : "nest_mcs01_imc@PM_MCS01_128B_RD_DISP_PORT01@ + nest_mcs01_imc@PM_MCS01_128B_RD_DISP_PORT23@",
+ "MetricName" : "mcs01-read",
+ "MetricGroup" : "memory_bw",
+ "ScaleUnit": "6.1e-5MB"
+ },
+ {
+ "MetricExpr" : "nest_mcs23_imc@PM_MCS23_128B_RD_DISP_PORT01@ + nest_mcs23_imc@PM_MCS23_128B_RD_DISP_PORT23@",
+ "MetricName" : "mcs23-read",
+ "MetricGroup" : "memory_bw",
+ "ScaleUnit": "6.1e-5MB"
+ },
+ {
+ "MetricExpr" : "nest_mcs01_imc@PM_MCS01_128B_WR_DISP_PORT01@ + nest_mcs01_imc@PM_MCS01_128B_WR_DISP_PORT23@",
+ "MetricName" : "mcs01-write",
+ "MetricGroup" : "memory_bw",
+ "ScaleUnit": "6.1e-5MB"
+ },
+ {
+ "MetricExpr" : "nest_mcs23_imc@PM_MCS23_128B_WR_DISP_PORT01@ + nest_mcs23_imc@PM_MCS23_128B_WR_DISP_PORT23@",
+ "MetricName" : "mcs23-write",
+ "MetricGroup" : "memory-bandwidth",
+ "ScaleUnit": "6.1e-5MB"
+ },
+ {
+ "MetricExpr" : "nest_powerbus0_imc@PM_PB_CYC@",
+ "MetricName" : "powerbus_freq",
+ "ScaleUnit": "1e-9GHz"
+ },
+ {
+ "MetricExpr" : "(nest_mcs01_imc@PM_MCS01_128B_RD_DISP_PORT01@ + nest_mcs01_imc@PM_MCS01_128B_RD_DISP_PORT23@ + nest_mcs23_imc@PM_MCS23_128B_RD_DISP_PORT01@ + nest_mcs23_imc@PM_MCS23_128B_RD_DISP_PORT23@ + nest_mcs01_imc@PM_MCS01_128B_WR_DISP_PORT01@ + nest_mcs01_imc@PM_MCS01_128B_WR_DISP_PORT23@ + nest_mcs23_imc@PM_MCS23_128B_WR_DISP_PORT01@ + nest_mcs23_imc@PM_MCS23_128B_WR_DISP_PORT23@)",
+ "MetricName" : "Memory-bandwidth-MCS",
+ "MetricGroup" : "memory_bw",
+ "ScaleUnit": "6.1e-5MB"
}
]
diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index cd00498a5dce..84352fc49a20 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -59,6 +59,7 @@ perf-y += genelf.o
perf-y += api-io.o
perf-y += demangle-java-test.o
perf-y += pfm.o
+perf-y += parse-metric.o
$(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build
$(call rule_mkdir)
diff --git a/tools/perf/tests/attr/README b/tools/perf/tests/attr/README
index 430024f618f1..6cd408108595 100644
--- a/tools/perf/tests/attr/README
+++ b/tools/perf/tests/attr/README
@@ -53,6 +53,7 @@ Following tests are defined (with perf commands):
perf record -i kill (test-record-no-inherit)
perf record -n kill (test-record-no-samples)
perf record -c 100 -P kill (test-record-period)
+ perf record -c 1 --pfm-events=cycles:period=2 (test-record-pfm-period)
perf record -R kill (test-record-raw)
perf stat -e cycles kill (test-stat-basic)
perf stat kill (test-stat-default)
diff --git a/tools/perf/tests/attr/test-record-pfm-period b/tools/perf/tests/attr/test-record-pfm-period
new file mode 100644
index 000000000000..368f5b814094
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-pfm-period
@@ -0,0 +1,9 @@
+[config]
+command = record
+args = --no-bpf-event -c 10000 --pfm-events=cycles:period=77777 kill >/dev/null 2>&1
+ret = 1
+
+[event:base-record]
+sample_period=77777
+sample_type=7
+freq=0
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index da5b6cc23f25..d328caaba45d 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -338,6 +338,10 @@ static struct test generic_tests[] = {
.func = test__demangle_java,
},
{
+ .desc = "Parse and process metrics",
+ .func = test__parse_metric,
+ },
+ {
.func = NULL,
},
};
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index 6fe221d31f07..035c9123549a 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -678,7 +678,7 @@ static int do_test_code_reading(bool try_kcore)
if (verbose > 0) {
char errbuf[512];
- perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
+ evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
pr_debug("perf_evlist__open() failed!\n%s\n", errbuf);
}
diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c
index 1cb02ca2b15f..4d01051951cd 100644
--- a/tools/perf/tests/expr.c
+++ b/tools/perf/tests/expr.c
@@ -18,14 +18,15 @@ static int test(struct expr_parse_ctx *ctx, const char *e, double val2)
int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused)
{
+ struct expr_id_data *val_ptr;
const char *p;
- double val, *val_ptr;
+ double val;
int ret;
struct expr_parse_ctx ctx;
expr__ctx_init(&ctx);
- expr__add_id(&ctx, strdup("FOO"), 1);
- expr__add_id(&ctx, strdup("BAR"), 2);
+ expr__add_id_val(&ctx, strdup("FOO"), 1);
+ expr__add_id_val(&ctx, strdup("BAR"), 2);
ret = test(&ctx, "1+1", 2);
ret |= test(&ctx, "FOO+BAR", 3);
@@ -39,6 +40,14 @@ int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused)
ret |= test(&ctx, "1+1 if 3*4 else 0", 2);
ret |= test(&ctx, "1.1 + 2.1", 3.2);
ret |= test(&ctx, ".1 + 2.", 2.1);
+ ret |= test(&ctx, "d_ratio(1, 2)", 0.5);
+ ret |= test(&ctx, "d_ratio(2.5, 0)", 0);
+ ret |= test(&ctx, "1.1 < 2.2", 1);
+ ret |= test(&ctx, "2.2 > 1.1", 1);
+ ret |= test(&ctx, "1.1 < 1.1", 0);
+ ret |= test(&ctx, "2.2 > 2.2", 0);
+ ret |= test(&ctx, "2.2 < 1.1", 0);
+ ret |= test(&ctx, "1.1 > 2.2", 0);
if (ret)
return ret;
diff --git a/tools/perf/tests/fdarray.c b/tools/perf/tests/fdarray.c
index c7c81c4a5b2b..d9eca8e86a6b 100644
--- a/tools/perf/tests/fdarray.c
+++ b/tools/perf/tests/fdarray.c
@@ -12,6 +12,7 @@ static void fdarray__init_revents(struct fdarray *fda, short revents)
for (fd = 0; fd < fda->nr; ++fd) {
fda->entries[fd].fd = fda->nr - fd;
+ fda->entries[fd].events = revents;
fda->entries[fd].revents = revents;
}
}
@@ -29,7 +30,7 @@ static int fdarray__fprintf_prefix(struct fdarray *fda, const char *prefix, FILE
int test__fdarray__filter(struct test *test __maybe_unused, int subtest __maybe_unused)
{
- int nr_fds, expected_fd[2], fd, err = TEST_FAIL;
+ int nr_fds, err = TEST_FAIL;
struct fdarray *fda = fdarray__new(5, 5);
if (fda == NULL) {
@@ -55,7 +56,6 @@ int test__fdarray__filter(struct test *test __maybe_unused, int subtest __maybe_
fdarray__init_revents(fda, POLLHUP);
fda->entries[2].revents = POLLIN;
- expected_fd[0] = fda->entries[2].fd;
pr_debug("\nfiltering all but fda->entries[2]:");
fdarray__fprintf_prefix(fda, "before", stderr);
@@ -66,17 +66,9 @@ int test__fdarray__filter(struct test *test __maybe_unused, int subtest __maybe_
goto out_delete;
}
- if (fda->entries[0].fd != expected_fd[0]) {
- pr_debug("\nfda->entries[0].fd=%d != %d\n",
- fda->entries[0].fd, expected_fd[0]);
- goto out_delete;
- }
-
fdarray__init_revents(fda, POLLHUP);
fda->entries[0].revents = POLLIN;
- expected_fd[0] = fda->entries[0].fd;
fda->entries[3].revents = POLLIN;
- expected_fd[1] = fda->entries[3].fd;
pr_debug("\nfiltering all but (fda->entries[0], fda->entries[3]):");
fdarray__fprintf_prefix(fda, "before", stderr);
@@ -88,14 +80,6 @@ int test__fdarray__filter(struct test *test __maybe_unused, int subtest __maybe_
goto out_delete;
}
- for (fd = 0; fd < 2; ++fd) {
- if (fda->entries[fd].fd != expected_fd[fd]) {
- pr_debug("\nfda->entries[%d].fd=%d != %d\n", fd,
- fda->entries[fd].fd, expected_fd[fd]);
- goto out_delete;
- }
- }
-
pr_debug("\n");
err = 0;
@@ -128,7 +112,7 @@ int test__fdarray__add(struct test *test __maybe_unused, int subtest __maybe_unu
}
#define FDA_ADD(_idx, _fd, _revents, _nr) \
- if (fdarray__add(fda, _fd, _revents) < 0) { \
+ if (fdarray__add(fda, _fd, _revents, fdarray_flag__default) < 0) { \
pr_debug("\n%d: fdarray__add(fda, %d, %d) failed!", \
__LINE__,_fd, _revents); \
goto out_delete; \
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index 895188b63f96..7f9f87a470c3 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -631,6 +631,34 @@ static int test__checkterms_simple(struct list_head *terms)
TEST_ASSERT_VAL("wrong val", term->val.num == 1);
TEST_ASSERT_VAL("wrong config", !strcmp(term->config, "umask"));
+ /*
+ * read
+ *
+ * The perf_pmu__test_parse_init injects 'read' term into
+ * perf_pmu_events_list, so 'read' is evaluated as read term
+ * and not as raw event with 'ead' hex value.
+ */
+ term = list_entry(term->list.next, struct parse_events_term, list);
+ TEST_ASSERT_VAL("wrong type term",
+ term->type_term == PARSE_EVENTS__TERM_TYPE_USER);
+ TEST_ASSERT_VAL("wrong type val",
+ term->type_val == PARSE_EVENTS__TERM_TYPE_NUM);
+ TEST_ASSERT_VAL("wrong val", term->val.num == 1);
+ TEST_ASSERT_VAL("wrong config", !strcmp(term->config, "read"));
+
+ /*
+ * r0xead
+ *
+ * To be still able to pass 'ead' value with 'r' syntax,
+ * we added support to parse 'r0xHEX' event.
+ */
+ term = list_entry(term->list.next, struct parse_events_term, list);
+ TEST_ASSERT_VAL("wrong type term",
+ term->type_term == PARSE_EVENTS__TERM_TYPE_CONFIG);
+ TEST_ASSERT_VAL("wrong type val",
+ term->type_val == PARSE_EVENTS__TERM_TYPE_NUM);
+ TEST_ASSERT_VAL("wrong val", term->val.num == 0xead);
+ TEST_ASSERT_VAL("wrong config", !term->config);
return 0;
}
@@ -1766,6 +1794,11 @@ static struct evlist_test test__events_pmu[] = {
.check = test__checkevent_raw_pmu,
.id = 4,
},
+ {
+ .name = "software/r0x1a/",
+ .check = test__checkevent_raw_pmu,
+ .id = 4,
+ },
};
struct terms_test {
@@ -1776,7 +1809,7 @@ struct terms_test {
static struct terms_test test__terms[] = {
[0] = {
- .str = "config=10,config1,config2=3,umask=1",
+ .str = "config=10,config1,config2=3,umask=1,read,r0xead",
.check = test__checkterms_simple,
},
};
@@ -1836,6 +1869,13 @@ static int test_term(struct terms_test *t)
INIT_LIST_HEAD(&terms);
+ /*
+ * The perf_pmu__test_parse_init prepares perf_pmu_events_list
+ * which gets freed in parse_events_terms.
+ */
+ if (perf_pmu__test_parse_init())
+ return -1;
+
ret = parse_events_terms(&terms, t->str);
if (ret) {
pr_debug("failed to parse terms '%s', err %d\n",
diff --git a/tools/perf/tests/parse-metric.c b/tools/perf/tests/parse-metric.c
new file mode 100644
index 000000000000..fc0838a7abc2
--- /dev/null
+++ b/tools/perf/tests/parse-metric.c
@@ -0,0 +1,352 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+#include <string.h>
+#include <perf/cpumap.h>
+#include <perf/evlist.h>
+#include "metricgroup.h"
+#include "tests.h"
+#include "pmu-events/pmu-events.h"
+#include "evlist.h"
+#include "rblist.h"
+#include "debug.h"
+#include "expr.h"
+#include "stat.h"
+#include <perf/cpumap.h>
+#include <perf/evlist.h>
+
+static struct pmu_event pme_test[] = {
+{
+ .metric_expr = "inst_retired.any / cpu_clk_unhalted.thread",
+ .metric_name = "IPC",
+ .metric_group = "group1",
+},
+{
+ .metric_expr = "idq_uops_not_delivered.core / (4 * (( ( cpu_clk_unhalted.thread / 2 ) * "
+ "( 1 + cpu_clk_unhalted.one_thread_active / cpu_clk_unhalted.ref_xclk ) )))",
+ .metric_name = "Frontend_Bound_SMT",
+},
+{
+ .metric_expr = "l1d\\-loads\\-misses / inst_retired.any",
+ .metric_name = "dcache_miss_cpi",
+},
+{
+ .metric_expr = "l1i\\-loads\\-misses / inst_retired.any",
+ .metric_name = "icache_miss_cycles",
+},
+{
+ .metric_expr = "(dcache_miss_cpi + icache_miss_cycles)",
+ .metric_name = "cache_miss_cycles",
+ .metric_group = "group1",
+},
+{
+ .metric_expr = "l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hit",
+ .metric_name = "DCache_L2_All_Hits",
+},
+{
+ .metric_expr = "max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) + "
+ "l2_rqsts.pf_miss + l2_rqsts.rfo_miss",
+ .metric_name = "DCache_L2_All_Miss",
+},
+{
+ .metric_expr = "dcache_l2_all_hits + dcache_l2_all_miss",
+ .metric_name = "DCache_L2_All",
+},
+{
+ .metric_expr = "d_ratio(dcache_l2_all_hits, dcache_l2_all)",
+ .metric_name = "DCache_L2_Hits",
+},
+{
+ .metric_expr = "d_ratio(dcache_l2_all_miss, dcache_l2_all)",
+ .metric_name = "DCache_L2_Misses",
+},
+{
+ .metric_expr = "ipc + m2",
+ .metric_name = "M1",
+},
+{
+ .metric_expr = "ipc + m1",
+ .metric_name = "M2",
+},
+{
+ .metric_expr = "1/m3",
+ .metric_name = "M3",
+}
+};
+
+static struct pmu_events_map map = {
+ .cpuid = "test",
+ .version = "1",
+ .type = "core",
+ .table = pme_test,
+};
+
+struct value {
+ const char *event;
+ u64 val;
+};
+
+static u64 find_value(const char *name, struct value *values)
+{
+ struct value *v = values;
+
+ while (v->event) {
+ if (!strcmp(name, v->event))
+ return v->val;
+ v++;
+ };
+ return 0;
+}
+
+static void load_runtime_stat(struct runtime_stat *st, struct evlist *evlist,
+ struct value *vals)
+{
+ struct evsel *evsel;
+ u64 count;
+
+ evlist__for_each_entry(evlist, evsel) {
+ count = find_value(evsel->name, vals);
+ perf_stat__update_shadow_stats(evsel, count, 0, st);
+ }
+}
+
+static double compute_single(struct rblist *metric_events, struct evlist *evlist,
+ struct runtime_stat *st, const char *name)
+{
+ struct metric_expr *mexp;
+ struct metric_event *me;
+ struct evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ me = metricgroup__lookup(metric_events, evsel, false);
+ if (me != NULL) {
+ list_for_each_entry (mexp, &me->head, nd) {
+ if (strcmp(mexp->metric_name, name))
+ continue;
+ return test_generic_metric(mexp, 0, st);
+ }
+ }
+ }
+ return 0.;
+}
+
+static int __compute_metric(const char *name, struct value *vals,
+ const char *name1, double *ratio1,
+ const char *name2, double *ratio2)
+{
+ struct rblist metric_events = {
+ .nr_entries = 0,
+ };
+ struct perf_cpu_map *cpus;
+ struct runtime_stat st;
+ struct evlist *evlist;
+ int err;
+
+ /*
+ * We need to prepare evlist for stat mode running on CPU 0
+ * because that's where all the stats are going to be created.
+ */
+ evlist = evlist__new();
+ if (!evlist)
+ return -ENOMEM;
+
+ cpus = perf_cpu_map__new("0");
+ if (!cpus)
+ return -ENOMEM;
+
+ perf_evlist__set_maps(&evlist->core, cpus, NULL);
+
+ /* Parse the metric into metric_events list. */
+ err = metricgroup__parse_groups_test(evlist, &map, name,
+ false, false,
+ &metric_events);
+ if (err)
+ return err;
+
+ if (perf_evlist__alloc_stats(evlist, false))
+ return -1;
+
+ /* Load the runtime stats with given numbers for events. */
+ runtime_stat__init(&st);
+ load_runtime_stat(&st, evlist, vals);
+
+ /* And execute the metric */
+ if (name1 && ratio1)
+ *ratio1 = compute_single(&metric_events, evlist, &st, name1);
+ if (name2 && ratio2)
+ *ratio2 = compute_single(&metric_events, evlist, &st, name2);
+
+ /* ... clenup. */
+ metricgroup__rblist_exit(&metric_events);
+ runtime_stat__exit(&st);
+ perf_evlist__free_stats(evlist);
+ perf_cpu_map__put(cpus);
+ evlist__delete(evlist);
+ return 0;
+}
+
+static int compute_metric(const char *name, struct value *vals, double *ratio)
+{
+ return __compute_metric(name, vals, name, ratio, NULL, NULL);
+}
+
+static int compute_metric_group(const char *name, struct value *vals,
+ const char *name1, double *ratio1,
+ const char *name2, double *ratio2)
+{
+ return __compute_metric(name, vals, name1, ratio1, name2, ratio2);
+}
+
+static int test_ipc(void)
+{
+ double ratio;
+ struct value vals[] = {
+ { .event = "inst_retired.any", .val = 300 },
+ { .event = "cpu_clk_unhalted.thread", .val = 200 },
+ { .event = NULL, },
+ };
+
+ TEST_ASSERT_VAL("failed to compute metric",
+ compute_metric("IPC", vals, &ratio) == 0);
+
+ TEST_ASSERT_VAL("IPC failed, wrong ratio",
+ ratio == 1.5);
+ return 0;
+}
+
+static int test_frontend(void)
+{
+ double ratio;
+ struct value vals[] = {
+ { .event = "idq_uops_not_delivered.core", .val = 300 },
+ { .event = "cpu_clk_unhalted.thread", .val = 200 },
+ { .event = "cpu_clk_unhalted.one_thread_active", .val = 400 },
+ { .event = "cpu_clk_unhalted.ref_xclk", .val = 600 },
+ { .event = NULL, },
+ };
+
+ TEST_ASSERT_VAL("failed to compute metric",
+ compute_metric("Frontend_Bound_SMT", vals, &ratio) == 0);
+
+ TEST_ASSERT_VAL("Frontend_Bound_SMT failed, wrong ratio",
+ ratio == 0.45);
+ return 0;
+}
+
+static int test_cache_miss_cycles(void)
+{
+ double ratio;
+ struct value vals[] = {
+ { .event = "l1d-loads-misses", .val = 300 },
+ { .event = "l1i-loads-misses", .val = 200 },
+ { .event = "inst_retired.any", .val = 400 },
+ { .event = NULL, },
+ };
+
+ TEST_ASSERT_VAL("failed to compute metric",
+ compute_metric("cache_miss_cycles", vals, &ratio) == 0);
+
+ TEST_ASSERT_VAL("cache_miss_cycles failed, wrong ratio",
+ ratio == 1.25);
+ return 0;
+}
+
+
+/*
+ * DCache_L2_All_Hits = l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hi
+ * DCache_L2_All_Miss = max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) +
+ * l2_rqsts.pf_miss + l2_rqsts.rfo_miss
+ * DCache_L2_All = dcache_l2_all_hits + dcache_l2_all_miss
+ * DCache_L2_Hits = d_ratio(dcache_l2_all_hits, dcache_l2_all)
+ * DCache_L2_Misses = d_ratio(dcache_l2_all_miss, dcache_l2_all)
+ *
+ * l2_rqsts.demand_data_rd_hit = 100
+ * l2_rqsts.pf_hit = 200
+ * l2_rqsts.rfo_hi = 300
+ * l2_rqsts.all_demand_data_rd = 400
+ * l2_rqsts.pf_miss = 500
+ * l2_rqsts.rfo_miss = 600
+ *
+ * DCache_L2_All_Hits = 600
+ * DCache_L2_All_Miss = MAX(400 - 100, 0) + 500 + 600 = 1400
+ * DCache_L2_All = 600 + 1400 = 2000
+ * DCache_L2_Hits = 600 / 2000 = 0.3
+ * DCache_L2_Misses = 1400 / 2000 = 0.7
+ */
+static int test_dcache_l2(void)
+{
+ double ratio;
+ struct value vals[] = {
+ { .event = "l2_rqsts.demand_data_rd_hit", .val = 100 },
+ { .event = "l2_rqsts.pf_hit", .val = 200 },
+ { .event = "l2_rqsts.rfo_hit", .val = 300 },
+ { .event = "l2_rqsts.all_demand_data_rd", .val = 400 },
+ { .event = "l2_rqsts.pf_miss", .val = 500 },
+ { .event = "l2_rqsts.rfo_miss", .val = 600 },
+ { .event = NULL, },
+ };
+
+ TEST_ASSERT_VAL("failed to compute metric",
+ compute_metric("DCache_L2_Hits", vals, &ratio) == 0);
+
+ TEST_ASSERT_VAL("DCache_L2_Hits failed, wrong ratio",
+ ratio == 0.3);
+
+ TEST_ASSERT_VAL("failed to compute metric",
+ compute_metric("DCache_L2_Misses", vals, &ratio) == 0);
+
+ TEST_ASSERT_VAL("DCache_L2_Misses failed, wrong ratio",
+ ratio == 0.7);
+ return 0;
+}
+
+static int test_recursion_fail(void)
+{
+ double ratio;
+ struct value vals[] = {
+ { .event = "inst_retired.any", .val = 300 },
+ { .event = "cpu_clk_unhalted.thread", .val = 200 },
+ { .event = NULL, },
+ };
+
+ TEST_ASSERT_VAL("failed to find recursion",
+ compute_metric("M1", vals, &ratio) == -1);
+
+ TEST_ASSERT_VAL("failed to find recursion",
+ compute_metric("M3", vals, &ratio) == -1);
+ return 0;
+}
+
+static int test_metric_group(void)
+{
+ double ratio1, ratio2;
+ struct value vals[] = {
+ { .event = "cpu_clk_unhalted.thread", .val = 200 },
+ { .event = "l1d-loads-misses", .val = 300 },
+ { .event = "l1i-loads-misses", .val = 200 },
+ { .event = "inst_retired.any", .val = 400 },
+ { .event = NULL, },
+ };
+
+ TEST_ASSERT_VAL("failed to find recursion",
+ compute_metric_group("group1", vals,
+ "IPC", &ratio1,
+ "cache_miss_cycles", &ratio2) == 0);
+
+ TEST_ASSERT_VAL("group IPC failed, wrong ratio",
+ ratio1 == 2.0);
+
+ TEST_ASSERT_VAL("group cache_miss_cycles failed, wrong ratio",
+ ratio2 == 1.25);
+ return 0;
+}
+
+int test__parse_metric(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ TEST_ASSERT_VAL("IPC failed", test_ipc() == 0);
+ TEST_ASSERT_VAL("frontend failed", test_frontend() == 0);
+ TEST_ASSERT_VAL("cache_miss_cycles failed", test_cache_miss_cycles() == 0);
+ TEST_ASSERT_VAL("DCache_L2 failed", test_dcache_l2() == 0);
+ TEST_ASSERT_VAL("recursion fail failed", test_recursion_fail() == 0);
+ TEST_ASSERT_VAL("test metric group", test_metric_group() == 0);
+ return 0;
+}
diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
index 83adfd846ccd..67d3f5aad016 100644
--- a/tools/perf/tests/perf-record.c
+++ b/tools/perf/tests/perf-record.c
@@ -185,14 +185,14 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus
err = perf_evlist__parse_sample(evlist, event, &sample);
if (err < 0) {
if (verbose > 0)
- perf_event__fprintf(event, stderr);
+ perf_event__fprintf(event, NULL, stderr);
pr_debug("Couldn't parse sample\n");
goto out_delete_evlist;
}
if (verbose > 0) {
pr_info("%" PRIu64" %d ", sample.time, sample.cpu);
- perf_event__fprintf(event, stderr);
+ perf_event__fprintf(event, NULL, stderr);
}
if (prev_time > sample.time) {
diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c
index ab64b4a4e284..eb19f9a0bc15 100644
--- a/tools/perf/tests/pmu-events.c
+++ b/tools/perf/tests/pmu-events.c
@@ -390,9 +390,9 @@ static bool is_number(const char *str)
return errno == 0 && end_ptr != str;
}
-static int check_parse_id(const char *id, bool same_cpu, struct pmu_event *pe)
+static int check_parse_id(const char *id, struct parse_events_error *error,
+ struct perf_pmu *fake_pmu)
{
- struct parse_events_error error;
struct evlist *evlist;
int ret;
@@ -401,8 +401,18 @@ static int check_parse_id(const char *id, bool same_cpu, struct pmu_event *pe)
return 0;
evlist = evlist__new();
- memset(&error, 0, sizeof(error));
- ret = parse_events(evlist, id, &error);
+ if (!evlist)
+ return -ENOMEM;
+ ret = __parse_events(evlist, id, error, fake_pmu);
+ evlist__delete(evlist);
+ return ret;
+}
+
+static int check_parse_cpu(const char *id, bool same_cpu, struct pmu_event *pe)
+{
+ struct parse_events_error error = { .idx = 0, };
+
+ int ret = check_parse_id(id, &error, NULL);
if (ret && same_cpu) {
pr_warning("Parse event failed metric '%s' id '%s' expr '%s'\n",
pe->metric_name, id, pe->metric_expr);
@@ -413,7 +423,18 @@ static int check_parse_id(const char *id, bool same_cpu, struct pmu_event *pe)
id, pe->metric_name, pe->metric_expr);
ret = 0;
}
- evlist__delete(evlist);
+ free(error.str);
+ free(error.help);
+ free(error.first_str);
+ free(error.first_help);
+ return ret;
+}
+
+static int check_parse_fake(const char *id)
+{
+ struct parse_events_error error = { .idx = 0, };
+ int ret = check_parse_id(id, &error, &perf_pmu__fake);
+
free(error.str);
free(error.help);
free(error.first_str);
@@ -471,10 +492,10 @@ static int test_parsing(void)
*/
k = 1;
hashmap__for_each_entry((&ctx.ids), cur, bkt)
- expr__add_id(&ctx, strdup(cur->key), k++);
+ expr__add_id_val(&ctx, strdup(cur->key), k++);
hashmap__for_each_entry((&ctx.ids), cur, bkt) {
- if (check_parse_id(cur->key, map == cpus_map,
+ if (check_parse_cpu(cur->key, map == cpus_map,
pe))
ret++;
}
@@ -490,6 +511,100 @@ static int test_parsing(void)
return ret == 0 ? TEST_OK : TEST_SKIP;
}
+struct test_metric {
+ const char *str;
+};
+
+static struct test_metric metrics[] = {
+ { "(unc_p_power_state_occupancy.cores_c0 / unc_p_clockticks) * 100." },
+ { "imx8_ddr0@read\\-cycles@ * 4 * 4", },
+ { "imx8_ddr0@axid\\-read\\,axi_mask\\=0xffff\\,axi_id\\=0x0000@ * 4", },
+ { "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100", },
+ { "(imx8_ddr0@read\\-cycles@ + imx8_ddr0@write\\-cycles@)", },
+};
+
+static int metric_parse_fake(const char *str)
+{
+ struct expr_parse_ctx ctx;
+ struct hashmap_entry *cur;
+ double result;
+ int ret = -1;
+ size_t bkt;
+ int i;
+
+ pr_debug("parsing '%s'\n", str);
+
+ expr__ctx_init(&ctx);
+ if (expr__find_other(str, NULL, &ctx, 0) < 0) {
+ pr_err("expr__find_other failed\n");
+ return -1;
+ }
+
+ /*
+ * Add all ids with a made up value. The value may
+ * trigger divide by zero when subtracted and so try to
+ * make them unique.
+ */
+ i = 1;
+ hashmap__for_each_entry((&ctx.ids), cur, bkt)
+ expr__add_id_val(&ctx, strdup(cur->key), i++);
+
+ hashmap__for_each_entry((&ctx.ids), cur, bkt) {
+ if (check_parse_fake(cur->key)) {
+ pr_err("check_parse_fake failed\n");
+ goto out;
+ }
+ }
+
+ if (expr__parse(&result, &ctx, str, 1))
+ pr_err("expr__parse failed\n");
+ else
+ ret = 0;
+
+out:
+ expr__ctx_clear(&ctx);
+ return ret;
+}
+
+/*
+ * Parse all the metrics for current architecture,
+ * or all defined cpus via the 'fake_pmu'
+ * in parse_events.
+ */
+static int test_parsing_fake(void)
+{
+ struct pmu_events_map *map;
+ struct pmu_event *pe;
+ unsigned int i, j;
+ int err = 0;
+
+ for (i = 0; i < ARRAY_SIZE(metrics); i++) {
+ err = metric_parse_fake(metrics[i].str);
+ if (err)
+ return err;
+ }
+
+ i = 0;
+ for (;;) {
+ map = &pmu_events_map[i++];
+ if (!map->table)
+ break;
+ j = 0;
+ for (;;) {
+ pe = &map->table[j++];
+ if (!pe->name && !pe->metric_group && !pe->metric_name)
+ break;
+ if (!pe->metric_expr)
+ continue;
+ err = metric_parse_fake(pe->metric_expr);
+ if (err)
+ return err;
+ }
+ }
+
+ return 0;
+}
+
static const struct {
int (*func)(void);
const char *desc;
@@ -506,6 +621,10 @@ static const struct {
.func = test_parsing,
.desc = "Parsing of PMU event table metrics",
},
+ {
+ .func = test_parsing_fake,
+ .desc = "Parsing of PMU event table metrics with fake PMUs",
+ },
};
const char *test__pmu_events_subtest_get_desc(int subtest)
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index 76a4e352eaaf..4447a516c689 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -121,6 +121,7 @@ int test__demangle_java(struct test *test, int subtest);
int test__pfm(struct test *test, int subtest);
const char *test__pfm_subtest_get_desc(int subtest);
int test__pfm_subtest_get_nr(void);
+int test__parse_metric(struct test *test, int subtest);
bool test__bp_signal_is_supported(void);
bool test__bp_account_is_supported(void);
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 9023267e5643..bd77825fd5a1 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -209,7 +209,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
ui_browser__mark_fused(browser,
pcnt_width + 3 + notes->widths.addr + width,
from - 1,
- to > from ? true : false);
+ to > from);
}
}
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 8d18380ecd10..494626e303f5 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -128,6 +128,7 @@ perf-y += expr-bison.o
perf-y += expr.o
perf-y += branch.o
perf-y += mem2node.o
+perf-y += clockid.o
perf-$(CONFIG_LIBBPF) += bpf-loader.o
perf-$(CONFIG_LIBBPF) += bpf_map.o
@@ -191,36 +192,60 @@ CFLAGS_llvm-utils.o += -DPERF_INCLUDE_DIR="BUILD_STR($(perf_include_dir_SQ))"
# avoid compiler warnings in 32-bit mode
CFLAGS_genelf_debug.o += -Wno-packed
-$(OUTPUT)util/parse-events-flex.c: util/parse-events.l $(OUTPUT)util/parse-events-bison.c
+$(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-flex.h: util/parse-events.l $(OUTPUT)util/parse-events-bison.c
$(call rule_mkdir)
- $(Q)$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) util/parse-events.l
+ $(Q)$(call echo-cmd,flex)$(FLEX) -o $(OUTPUT)util/parse-events-flex.c \
+ --header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) $<
-$(OUTPUT)util/parse-events-bison.c: util/parse-events.y
+$(OUTPUT)util/parse-events-bison.c $(OUTPUT)util/parse-events-bison.h: util/parse-events.y
$(call rule_mkdir)
- $(Q)$(call echo-cmd,bison)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $@ -p parse_events_
+ $(Q)$(call echo-cmd,bison)$(BISON) -v $< -d $(PARSER_DEBUG_BISON) \
+ -o $(OUTPUT)util/parse-events-bison.c -p parse_events_
-$(OUTPUT)util/expr-flex.c: util/expr.l $(OUTPUT)util/expr-bison.c
+$(OUTPUT)util/expr-flex.c $(OUTPUT)util/expr-flex.h: util/expr.l $(OUTPUT)util/expr-bison.c
$(call rule_mkdir)
- $(Q)$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/expr-flex.h $(PARSER_DEBUG_FLEX) util/expr.l
+ $(Q)$(call echo-cmd,flex)$(FLEX) -o $(OUTPUT)util/expr-flex.c \
+ --header-file=$(OUTPUT)util/expr-flex.h $(PARSER_DEBUG_FLEX) $<
-$(OUTPUT)util/expr-bison.c: util/expr.y
+$(OUTPUT)util/expr-bison.c $(OUTPUT)util/expr-bison.h: util/expr.y
$(call rule_mkdir)
- $(Q)$(call echo-cmd,bison)$(BISON) -v util/expr.y -d $(PARSER_DEBUG_BISON) -o $@ -p expr_
+ $(Q)$(call echo-cmd,bison)$(BISON) -v $< -d $(PARSER_DEBUG_BISON) \
+ -o $(OUTPUT)util/expr-bison.c -p expr_
-$(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c
+$(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-flex.h: util/pmu.l $(OUTPUT)util/pmu-bison.c
$(call rule_mkdir)
- $(Q)$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/pmu-flex.h util/pmu.l
+ $(Q)$(call echo-cmd,flex)$(FLEX) -o $(OUTPUT)util/pmu-flex.c \
+ --header-file=$(OUTPUT)util/pmu-flex.h $(PARSER_DEBUG_FLEX) $<
-$(OUTPUT)util/pmu-bison.c: util/pmu.y
+$(OUTPUT)util/pmu-bison.c $(OUTPUT)util/pmu-bison.h: util/pmu.y
$(call rule_mkdir)
- $(Q)$(call echo-cmd,bison)$(BISON) -v util/pmu.y -d -o $@ -p perf_pmu_
-
-CFLAGS_parse-events-flex.o += -w
-CFLAGS_pmu-flex.o += -w
-CFLAGS_expr-flex.o += -w
-CFLAGS_parse-events-bison.o += -DYYENABLE_NLS=0 -w
-CFLAGS_pmu-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w
-CFLAGS_expr-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w
+ $(Q)$(call echo-cmd,bison)$(BISON) -v $< -d $(PARSER_DEBUG_BISON) \
+ -o $(OUTPUT)util/pmu-bison.c -p perf_pmu_
+
+FLEX_GE_26 := $(shell expr $(shell $(FLEX) --version | sed -e 's/flex \([0-9]\+\).\([0-9]\+\)/\1\2/g') \>\= 26)
+ifeq ($(FLEX_GE_26),1)
+ flex_flags := -Wno-switch-enum -Wno-switch-default -Wno-unused-function -Wno-redundant-decls -Wno-sign-compare -Wno-unused-parameter -Wno-missing-prototypes -Wno-missing-declarations
+ CC_HASNT_MISLEADING_INDENTATION := $(shell echo "int main(void) { return 0 }" | $(CC) -Werror -Wno-misleading-indentation -o /dev/null -xc - 2>&1 | grep -q -- -Wno-misleading-indentation ; echo $$?)
+ ifeq ($(CC_HASNT_MISLEADING_INDENTATION), 1)
+ flex_flags += -Wno-misleading-indentation
+ endif
+else
+ flex_flags := -w
+endif
+CFLAGS_parse-events-flex.o += $(flex_flags)
+CFLAGS_pmu-flex.o += $(flex_flags)
+CFLAGS_expr-flex.o += $(flex_flags)
+
+bison_flags := -DYYENABLE_NLS=0
+BISON_GE_35 := $(shell expr $(shell $(BISON) --version | grep bison | sed -e 's/.\+ \([0-9]\+\).\([0-9]\+\)/\1\2/g') \>\= 35)
+ifeq ($(BISON_GE_35),1)
+ bison_flags += -Wno-unused-parameter -Wno-nested-externs -Wno-implicit-function-declaration -Wno-switch-enum
+else
+ bison_flags += -w
+endif
+CFLAGS_parse-events-bison.o += $(bison_flags)
+CFLAGS_pmu-bison.o += -DYYLTYPE_IS_TRIVIAL=0 $(bison_flags)
+CFLAGS_expr-bison.o += -DYYLTYPE_IS_TRIVIAL=0 $(bison_flags)
$(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c
$(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 76bfb4a9d94e..0a1fcf787538 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -1621,6 +1621,7 @@ static int dso__disassemble_filename(struct dso *dso, char *filename, size_t fil
char *build_id_filename;
char *build_id_path = NULL;
char *pos;
+ int len;
if (dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS &&
!dso__is_kcore(dso))
@@ -1649,10 +1650,16 @@ static int dso__disassemble_filename(struct dso *dso, char *filename, size_t fil
if (pos && strlen(pos) < SBUILD_ID_SIZE - 2)
dirname(build_id_path);
- if (dso__is_kcore(dso) ||
- readlink(build_id_path, linkname, sizeof(linkname)) < 0 ||
- strstr(linkname, DSO__NAME_KALLSYMS) ||
- access(filename, R_OK)) {
+ if (dso__is_kcore(dso))
+ goto fallback;
+
+ len = readlink(build_id_path, linkname, sizeof(linkname) - 1);
+ if (len < 0)
+ goto fallback;
+
+ linkname[len] = '\0';
+ if (strstr(linkname, DSO__NAME_KALLSYMS) ||
+ access(filename, R_OK)) {
fallback:
/*
* If we don't have build-ids or the build-id file isn't in the
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 25c639ac4ad4..42a85c86421d 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -1349,6 +1349,47 @@ void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts,
synth_opts->initial_skip = 0;
}
+static int get_flag(const char **ptr, unsigned int *flags)
+{
+ while (1) {
+ char c = **ptr;
+
+ if (c >= 'a' && c <= 'z') {
+ *flags |= 1 << (c - 'a');
+ ++*ptr;
+ return 0;
+ } else if (c == ' ') {
+ ++*ptr;
+ continue;
+ } else {
+ return -1;
+ }
+ }
+}
+
+static int get_flags(const char **ptr, unsigned int *plus_flags, unsigned int *minus_flags)
+{
+ while (1) {
+ switch (**ptr) {
+ case '+':
+ ++*ptr;
+ if (get_flag(ptr, plus_flags))
+ return -1;
+ break;
+ case '-':
+ ++*ptr;
+ if (get_flag(ptr, minus_flags))
+ return -1;
+ break;
+ case ' ':
+ ++*ptr;
+ break;
+ default:
+ return 0;
+ }
+ }
+}
+
/*
* Please check tools/perf/Documentation/perf-script.txt for information
* about the options parsed here, which is introduced after this cset,
@@ -1436,9 +1477,15 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
break;
case 'e':
synth_opts->errors = true;
+ if (get_flags(&p, &synth_opts->error_plus_flags,
+ &synth_opts->error_minus_flags))
+ goto out_err;
break;
case 'd':
synth_opts->log = true;
+ if (get_flags(&p, &synth_opts->log_plus_flags,
+ &synth_opts->log_minus_flags))
+ goto out_err;
break;
case 'c':
synth_opts->branches = true;
@@ -1507,6 +1554,9 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
case 'a':
synth_opts->remote_access = true;
break;
+ case 'q':
+ synth_opts->quick += 1;
+ break;
case ' ':
case ',':
break;
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index 142ccf7d34df..951d2d14cf24 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -55,6 +55,11 @@ enum itrace_period_type {
PERF_ITRACE_PERIOD_NANOSECS,
};
+#define AUXTRACE_ERR_FLG_OVERFLOW (1 << ('o' - 'a'))
+#define AUXTRACE_ERR_FLG_DATA_LOST (1 << ('l' - 'a'))
+
+#define AUXTRACE_LOG_FLG_ALL_PERF_EVTS (1 << ('a' - 'a'))
+
/**
* struct itrace_synth_opts - AUX area tracing synthesis options.
* @set: indicates whether or not options have been set
@@ -91,6 +96,11 @@ enum itrace_period_type {
* @cpu_bitmap: CPUs for which to synthesize events, or NULL for all
* @ptime_range: time intervals to trace or NULL
* @range_num: number of time intervals to trace
+ * @error_plus_flags: flags to affect what errors are reported
+ * @error_minus_flags: flags to affect what errors are reported
+ * @log_plus_flags: flags to affect what is logged
+ * @log_minus_flags: flags to affect what is logged
+ * @quick: quicker (less detailed) decoding
*/
struct itrace_synth_opts {
bool set;
@@ -124,6 +134,11 @@ struct itrace_synth_opts {
unsigned long *cpu_bitmap;
struct perf_time_interval *ptime_range;
int range_num;
+ unsigned int error_plus_flags;
+ unsigned int error_minus_flags;
+ unsigned int log_plus_flags;
+ unsigned int log_minus_flags;
+ unsigned int quick;
};
/**
@@ -604,22 +619,32 @@ bool auxtrace__evsel_is_auxtrace(struct perf_session *session,
struct evsel *evsel);
#define ITRACE_HELP \
-" i: synthesize instructions events\n" \
+" i[period]: synthesize instructions events\n" \
" b: synthesize branches events (branch misses for Arm SPE)\n" \
" c: synthesize branches events (calls only)\n" \
" r: synthesize branches events (returns only)\n" \
" x: synthesize transactions events\n" \
" w: synthesize ptwrite events\n" \
" p: synthesize power events\n" \
-" e: synthesize error events\n" \
-" d: create a debug log\n" \
+" o: synthesize other events recorded due to the use\n" \
+" of aux-output (refer to perf record)\n" \
+" e[flags]: synthesize error events\n" \
+" each flag must be preceded by + or -\n" \
+" error flags are: o (overflow)\n" \
+" l (data lost)\n" \
+" d[flags]: create a debug log\n" \
+" each flag must be preceded by + or -\n" \
+" log flags are: a (all perf events)\n" \
" f: synthesize first level cache events\n" \
" m: synthesize last level cache events\n" \
" t: synthesize TLB events\n" \
" a: synthesize remote access events\n" \
" g[len]: synthesize a call chain (use with i or x)\n" \
+" G[len]: synthesize a call chain on existing event records\n" \
" l[len]: synthesize last branch entries (use with i or x)\n" \
+" L[len]: synthesize last branch entries on existing event records\n" \
" sNUMBER: skip initial number of events\n" \
+" q: quicker (less detailed) decoding\n" \
" PERIOD[ns|us|ms|i|t]: specify period to sample stream\n" \
" concatenate multiple options. Default is ibxwpe or cewp\n"
diff --git a/tools/perf/util/clockid.c b/tools/perf/util/clockid.c
new file mode 100644
index 000000000000..74365a5d99c1
--- /dev/null
+++ b/tools/perf/util/clockid.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <subcmd/parse-options.h>
+#include <stdio.h>
+#include <time.h>
+#include <strings.h>
+#include <linux/time64.h>
+#include "debug.h"
+#include "clockid.h"
+#include "record.h"
+
+struct clockid_map {
+ const char *name;
+ int clockid;
+};
+
+#define CLOCKID_MAP(n, c) \
+ { .name = n, .clockid = (c), }
+
+#define CLOCKID_END { .name = NULL, }
+
+
+/*
+ * Add the missing ones, we need to build on many distros...
+ */
+#ifndef CLOCK_MONOTONIC_RAW
+#define CLOCK_MONOTONIC_RAW 4
+#endif
+#ifndef CLOCK_BOOTTIME
+#define CLOCK_BOOTTIME 7
+#endif
+#ifndef CLOCK_TAI
+#define CLOCK_TAI 11
+#endif
+
+static const struct clockid_map clockids[] = {
+ /* available for all events, NMI safe */
+ CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
+ CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
+
+ /* available for some events */
+ CLOCKID_MAP("realtime", CLOCK_REALTIME),
+ CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
+ CLOCKID_MAP("tai", CLOCK_TAI),
+
+ /* available for the lazy */
+ CLOCKID_MAP("mono", CLOCK_MONOTONIC),
+ CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
+ CLOCKID_MAP("real", CLOCK_REALTIME),
+ CLOCKID_MAP("boot", CLOCK_BOOTTIME),
+
+ CLOCKID_END,
+};
+
+static int get_clockid_res(clockid_t clk_id, u64 *res_ns)
+{
+ struct timespec res;
+
+ *res_ns = 0;
+ if (!clock_getres(clk_id, &res))
+ *res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC;
+ else
+ pr_warning("WARNING: Failed to determine specified clock resolution.\n");
+
+ return 0;
+}
+
+int parse_clockid(const struct option *opt, const char *str, int unset)
+{
+ struct record_opts *opts = (struct record_opts *)opt->value;
+ const struct clockid_map *cm;
+ const char *ostr = str;
+
+ if (unset) {
+ opts->use_clockid = 0;
+ return 0;
+ }
+
+ /* no arg passed */
+ if (!str)
+ return 0;
+
+ /* no setting it twice */
+ if (opts->use_clockid)
+ return -1;
+
+ opts->use_clockid = true;
+
+ /* if its a number, we're done */
+ if (sscanf(str, "%d", &opts->clockid) == 1)
+ return get_clockid_res(opts->clockid, &opts->clockid_res_ns);
+
+ /* allow a "CLOCK_" prefix to the name */
+ if (!strncasecmp(str, "CLOCK_", 6))
+ str += 6;
+
+ for (cm = clockids; cm->name; cm++) {
+ if (!strcasecmp(str, cm->name)) {
+ opts->clockid = cm->clockid;
+ return get_clockid_res(opts->clockid,
+ &opts->clockid_res_ns);
+ }
+ }
+
+ opts->use_clockid = false;
+ ui__warning("unknown clockid %s, check man page\n", ostr);
+ return -1;
+}
+
+const char *clockid_name(clockid_t clk_id)
+{
+ const struct clockid_map *cm;
+
+ for (cm = clockids; cm->name; cm++) {
+ if (cm->clockid == clk_id)
+ return cm->name;
+ }
+ return "(not found)";
+}
diff --git a/tools/perf/util/clockid.h b/tools/perf/util/clockid.h
new file mode 100644
index 000000000000..9b49b4711c76
--- /dev/null
+++ b/tools/perf/util/clockid.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __PERF_CLOCKID_H
+#define __PERF_CLOCKID_H
+
+struct option;
+int parse_clockid(const struct option *opt, const char *str, int unset);
+
+const char *clockid_name(clockid_t clk_id);
+
+#endif
diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index 5f36fc6a5578..27c5fef9ad54 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -31,6 +31,9 @@
#include "config.h"
#include <linux/ctype.h>
#include <linux/err.h>
+#include <linux/time64.h>
+#include "util.h"
+#include "clockid.h"
#define pr_N(n, fmt, ...) \
eprintf(n, debug_data_convert, fmt, ##__VA_ARGS__)
@@ -1381,11 +1384,26 @@ do { \
return 0;
}
-static int ctf_writer__setup_clock(struct ctf_writer *cw)
+static int ctf_writer__setup_clock(struct ctf_writer *cw,
+ struct perf_session *session,
+ bool tod)
{
struct bt_ctf_clock *clock = cw->clock;
+ const char *desc = "perf clock";
+ int64_t offset = 0;
- bt_ctf_clock_set_description(clock, "perf clock");
+ if (tod) {
+ struct perf_env *env = &session->header.env;
+
+ if (!env->clock.enabled) {
+ pr_err("Can't provide --tod time, missing clock data. "
+ "Please record with -k/--clockid option.\n");
+ return -1;
+ }
+
+ desc = clockid_name(env->clock.clockid);
+ offset = env->clock.tod_ns - env->clock.clockid_ns;
+ }
#define SET(__n, __v) \
do { \
@@ -1394,8 +1412,8 @@ do { \
} while (0)
SET(frequency, 1000000000);
- SET(offset_s, 0);
- SET(offset, 0);
+ SET(offset, offset);
+ SET(description, desc);
SET(precision, 10);
SET(is_absolute, 0);
@@ -1481,7 +1499,8 @@ static void ctf_writer__cleanup(struct ctf_writer *cw)
memset(cw, 0, sizeof(*cw));
}
-static int ctf_writer__init(struct ctf_writer *cw, const char *path)
+static int ctf_writer__init(struct ctf_writer *cw, const char *path,
+ struct perf_session *session, bool tod)
{
struct bt_ctf_writer *writer;
struct bt_ctf_stream_class *stream_class;
@@ -1505,7 +1524,7 @@ static int ctf_writer__init(struct ctf_writer *cw, const char *path)
cw->clock = clock;
- if (ctf_writer__setup_clock(cw)) {
+ if (ctf_writer__setup_clock(cw, session, tod)) {
pr("Failed to setup CTF clock.\n");
goto err_cleanup;
}
@@ -1613,17 +1632,15 @@ int bt_convert__perf2ctf(const char *input, const char *path,
if (err)
return err;
- /* CTF writer */
- if (ctf_writer__init(cw, path))
- return -1;
-
err = -1;
/* perf.data session */
session = perf_session__new(&data, 0, &c.tool);
- if (IS_ERR(session)) {
- err = PTR_ERR(session);
- goto free_writer;
- }
+ if (IS_ERR(session))
+ return PTR_ERR(session);
+
+ /* CTF writer */
+ if (ctf_writer__init(cw, path, session, opts->tod))
+ goto free_session;
if (c.queue_size) {
ordered_events__set_alloc_size(&session->ordered_events,
@@ -1632,17 +1649,17 @@ int bt_convert__perf2ctf(const char *input, const char *path,
/* CTF writer env/clock setup */
if (ctf_writer__setup_env(cw, session))
- goto free_session;
+ goto free_writer;
/* CTF events setup */
if (setup_events(cw, session))
- goto free_session;
+ goto free_writer;
if (opts->all && setup_non_sample_events(cw, session))
- goto free_session;
+ goto free_writer;
if (setup_streams(cw, session))
- goto free_session;
+ goto free_writer;
err = perf_session__process_events(session);
if (!err)
@@ -1670,10 +1687,10 @@ int bt_convert__perf2ctf(const char *input, const char *path,
return err;
-free_session:
- perf_session__delete(session);
free_writer:
ctf_writer__cleanup(cw);
+free_session:
+ perf_session__delete(session);
pr_err("Error during conversion setup.\n");
return err;
}
diff --git a/tools/perf/util/data-convert.h b/tools/perf/util/data-convert.h
index af90b6076c06..feab5f114e37 100644
--- a/tools/perf/util/data-convert.h
+++ b/tools/perf/util/data-convert.h
@@ -5,6 +5,7 @@
struct perf_data_convert_opts {
bool force;
bool all;
+ bool tod;
};
#endif /* __DATA_CONVERT_H */
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 99f0a39c3c59..be991cbbe9f8 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -208,6 +208,7 @@ int dso__read_binary_type_filename(const struct dso *dso,
case DSO_BINARY_TYPE__JAVA_JIT:
case DSO_BINARY_TYPE__BPF_PROG_INFO:
case DSO_BINARY_TYPE__BPF_IMAGE:
+ case DSO_BINARY_TYPE__OOL:
case DSO_BINARY_TYPE__NOT_FOUND:
ret = -1;
break;
@@ -898,6 +899,8 @@ static struct dso_cache *dso_cache__populate(struct dso *dso,
if (dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO)
*ret = bpf_read(dso, cache_offset, cache->data);
+ else if (dso->binary_type == DSO_BINARY_TYPE__OOL)
+ *ret = DSO__DATA_CACHE_SIZE;
else
*ret = file_read(dso, machine, cache_offset, cache->data);
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index d3d03274b0d1..31c3a9244938 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -42,6 +42,7 @@ enum dso_binary_type {
DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO,
DSO_BINARY_TYPE__BPF_PROG_INFO,
DSO_BINARY_TYPE__BPF_IMAGE,
+ DSO_BINARY_TYPE__OOL,
DSO_BINARY_TYPE__NOT_FOUND,
};
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index 1ab2682d5d2b..a12972652006 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -77,7 +77,6 @@ struct perf_env {
struct numa_node *numa_nodes;
struct memory_node *memory_nodes;
unsigned long long memory_bsize;
- u64 clockid_res_ns;
/*
* bpf_info_lock protects bpf rbtrees. This is needed because the
@@ -100,6 +99,19 @@ struct perf_env {
/* For fast cpu to numa node lookup via perf_env__numa_node */
int *numa_map;
int nr_numa_map;
+
+ /* For real clock time reference. */
+ struct {
+ u64 tod_ns;
+ u64 clockid_ns;
+ u64 clockid_res_ns;
+ int clockid;
+ /*
+ * enabled is valid for report mode, and is true if above
+ * values are set, it's set in process_clock_data
+ */
+ bool enabled;
+ } clock;
};
enum perf_compress_type {
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index f581550a3015..317a26571845 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -31,6 +31,7 @@
#include "stat.h"
#include "session.h"
#include "bpf-event.h"
+#include "print_binary.h"
#include "tool.h"
#include "../perf.h"
@@ -55,6 +56,7 @@ static const char *perf_event__names[] = {
[PERF_RECORD_KSYMBOL] = "KSYMBOL",
[PERF_RECORD_BPF_EVENT] = "BPF_EVENT",
[PERF_RECORD_CGROUP] = "CGROUP",
+ [PERF_RECORD_TEXT_POKE] = "TEXT_POKE",
[PERF_RECORD_HEADER_ATTR] = "ATTR",
[PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE",
[PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA",
@@ -267,6 +269,14 @@ int perf_event__process_bpf(struct perf_tool *tool __maybe_unused,
return machine__process_bpf(machine, event, sample);
}
+int perf_event__process_text_poke(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ return machine__process_text_poke(machine, event, sample);
+}
+
size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp)
{
return fprintf(fp, " %d/%d: [%#" PRI_lx64 "(%#" PRI_lx64 ") @ %#" PRI_lx64 "]: %c %s\n",
@@ -413,7 +423,52 @@ size_t perf_event__fprintf_bpf(union perf_event *event, FILE *fp)
event->bpf.type, event->bpf.flags, event->bpf.id);
}
-size_t perf_event__fprintf(union perf_event *event, FILE *fp)
+static int text_poke_printer(enum binary_printer_ops op, unsigned int val,
+ void *extra, FILE *fp)
+{
+ bool old = *(bool *)extra;
+
+ switch ((int)op) {
+ case BINARY_PRINT_LINE_BEGIN:
+ return fprintf(fp, " %s bytes:", old ? "Old" : "New");
+ case BINARY_PRINT_NUM_DATA:
+ return fprintf(fp, " %02x", val);
+ case BINARY_PRINT_LINE_END:
+ return fprintf(fp, "\n");
+ default:
+ return 0;
+ }
+}
+
+size_t perf_event__fprintf_text_poke(union perf_event *event, struct machine *machine, FILE *fp)
+{
+ struct perf_record_text_poke_event *tp = &event->text_poke;
+ size_t ret;
+ bool old;
+
+ ret = fprintf(fp, " %" PRI_lx64 " ", tp->addr);
+ if (machine) {
+ struct addr_location al;
+
+ al.map = maps__find(&machine->kmaps, tp->addr);
+ if (al.map && map__load(al.map) >= 0) {
+ al.addr = al.map->map_ip(al.map, tp->addr);
+ al.sym = map__find_symbol(al.map, al.addr);
+ if (al.sym)
+ ret += symbol__fprintf_symname_offs(al.sym, &al, fp);
+ }
+ }
+ ret += fprintf(fp, " old len %u new len %u\n", tp->old_len, tp->new_len);
+ old = true;
+ ret += binary__fprintf(tp->bytes, tp->old_len, 16, text_poke_printer,
+ &old, fp);
+ old = false;
+ ret += binary__fprintf(tp->bytes + tp->old_len, tp->new_len, 16,
+ text_poke_printer, &old, fp);
+ return ret;
+}
+
+size_t perf_event__fprintf(union perf_event *event, struct machine *machine, FILE *fp)
{
size_t ret = fprintf(fp, "PERF_RECORD_%s",
perf_event__name(event->header.type));
@@ -457,6 +512,9 @@ size_t perf_event__fprintf(union perf_event *event, FILE *fp)
case PERF_RECORD_BPF_EVENT:
ret += perf_event__fprintf_bpf(event, fp);
break;
+ case PERF_RECORD_TEXT_POKE:
+ ret += perf_event__fprintf_text_poke(event, machine, fp);
+ break;
default:
ret += fprintf(fp, "\n");
}
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 6ae01c3c2ffa..b828b99176f4 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -351,6 +351,10 @@ int perf_event__process_bpf(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine);
+int perf_event__process_text_poke(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
int perf_event__process(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
@@ -385,7 +389,8 @@ size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_cgroup(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_ksymbol(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_bpf(union perf_event *event, FILE *fp);
-size_t perf_event__fprintf(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_text_poke(union perf_event *event, struct machine *machine,FILE *fp);
+size_t perf_event__fprintf(union perf_event *event, struct machine *machine, FILE *fp);
int kallsyms__get_function_start(const char *kallsyms_filename,
const char *symbol_name, u64 *addr);
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index ab48be4cf258..e3fa3bf7498a 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -63,6 +63,9 @@ void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus,
perf_evlist__set_maps(&evlist->core, cpus, threads);
evlist->workload.pid = -1;
evlist->bkw_mmap_state = BKW_MMAP_NOTREADY;
+ evlist->ctl_fd.fd = -1;
+ evlist->ctl_fd.ack = -1;
+ evlist->ctl_fd.pos = -1;
}
struct evlist *evlist__new(void)
@@ -79,7 +82,7 @@ struct evlist *perf_evlist__new_default(void)
{
struct evlist *evlist = evlist__new();
- if (evlist && perf_evlist__add_default(evlist)) {
+ if (evlist && evlist__add_default(evlist)) {
evlist__delete(evlist);
evlist = NULL;
}
@@ -91,7 +94,7 @@ struct evlist *perf_evlist__new_dummy(void)
{
struct evlist *evlist = evlist__new();
- if (evlist && perf_evlist__add_dummy(evlist)) {
+ if (evlist && evlist__add_dummy(evlist)) {
evlist__delete(evlist);
evlist = NULL;
}
@@ -231,7 +234,7 @@ void perf_evlist__set_leader(struct evlist *evlist)
}
}
-int __perf_evlist__add_default(struct evlist *evlist, bool precise)
+int __evlist__add_default(struct evlist *evlist, bool precise)
{
struct evsel *evsel = evsel__new_cycles(precise);
@@ -242,7 +245,7 @@ int __perf_evlist__add_default(struct evlist *evlist, bool precise)
return 0;
}
-int perf_evlist__add_dummy(struct evlist *evlist)
+int evlist__add_dummy(struct evlist *evlist)
{
struct perf_event_attr attr = {
.type = PERF_TYPE_SOFTWARE,
@@ -258,8 +261,7 @@ int perf_evlist__add_dummy(struct evlist *evlist)
return 0;
}
-static int evlist__add_attrs(struct evlist *evlist,
- struct perf_event_attr *attrs, size_t nr_attrs)
+static int evlist__add_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs)
{
struct evsel *evsel, *n;
LIST_HEAD(head);
@@ -282,8 +284,7 @@ out_delete_partial_list:
return -1;
}
-int __perf_evlist__add_default_attrs(struct evlist *evlist,
- struct perf_event_attr *attrs, size_t nr_attrs)
+int __evlist__add_default_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs)
{
size_t i;
@@ -322,8 +323,7 @@ perf_evlist__find_tracepoint_by_name(struct evlist *evlist,
return NULL;
}
-int perf_evlist__add_newtp(struct evlist *evlist,
- const char *sys, const char *name, void *handler)
+int evlist__add_newtp(struct evlist *evlist, const char *sys, const char *name, void *handler)
{
struct evsel *evsel = evsel__newtp(sys, name);
@@ -500,7 +500,7 @@ int perf_evlist__enable_event_idx(struct evlist *evlist,
int evlist__add_pollfd(struct evlist *evlist, int fd)
{
- return perf_evlist__add_pollfd(&evlist->core, fd, NULL, POLLIN);
+ return perf_evlist__add_pollfd(&evlist->core, fd, NULL, POLLIN, fdarray_flag__default);
}
int evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask)
@@ -540,7 +540,7 @@ struct evsel *perf_evlist__id2evsel(struct evlist *evlist, u64 id)
if (sid)
return container_of(sid->evsel, struct evsel, core);
- if (!perf_evlist__sample_id_all(evlist))
+ if (!evlist__sample_id_all(evlist))
return evlist__first(evlist);
return NULL;
@@ -1088,7 +1088,7 @@ int perf_evlist__append_tp_filter_pid(struct evlist *evlist, pid_t pid)
return perf_evlist__append_tp_filter_pids(evlist, 1, &pid);
}
-bool perf_evlist__valid_sample_type(struct evlist *evlist)
+bool evlist__valid_sample_type(struct evlist *evlist)
{
struct evsel *pos;
@@ -1107,7 +1107,7 @@ bool perf_evlist__valid_sample_type(struct evlist *evlist)
return true;
}
-u64 __perf_evlist__combined_sample_type(struct evlist *evlist)
+u64 __evlist__combined_sample_type(struct evlist *evlist)
{
struct evsel *evsel;
@@ -1120,13 +1120,13 @@ u64 __perf_evlist__combined_sample_type(struct evlist *evlist)
return evlist->combined_sample_type;
}
-u64 perf_evlist__combined_sample_type(struct evlist *evlist)
+u64 evlist__combined_sample_type(struct evlist *evlist)
{
evlist->combined_sample_type = 0;
- return __perf_evlist__combined_sample_type(evlist);
+ return __evlist__combined_sample_type(evlist);
}
-u64 perf_evlist__combined_branch_type(struct evlist *evlist)
+u64 evlist__combined_branch_type(struct evlist *evlist)
{
struct evsel *evsel;
u64 branch_type = 0;
@@ -1191,7 +1191,7 @@ out:
return size;
}
-bool perf_evlist__valid_sample_id_all(struct evlist *evlist)
+bool evlist__valid_sample_id_all(struct evlist *evlist)
{
struct evsel *first = evlist__first(evlist), *pos = first;
@@ -1203,7 +1203,7 @@ bool perf_evlist__valid_sample_id_all(struct evlist *evlist)
return true;
}
-bool perf_evlist__sample_id_all(struct evlist *evlist)
+bool evlist__sample_id_all(struct evlist *evlist)
{
struct evsel *first = evlist__first(evlist);
return first->core.attr.sample_id_all;
@@ -1464,8 +1464,7 @@ int perf_evlist__parse_sample_timestamp(struct evlist *evlist,
return evsel__parse_sample_timestamp(evsel, event, timestamp);
}
-int perf_evlist__strerror_open(struct evlist *evlist,
- int err, char *buf, size_t size)
+int evlist__strerror_open(struct evlist *evlist, int err, char *buf, size_t size)
{
int printed, value;
char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf));
@@ -1518,7 +1517,7 @@ out_default:
return 0;
}
-int perf_evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size)
+int evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size)
{
char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf));
int pages_attempted = evlist->core.mmap_len / 1024, pages_max_per_user, printed = 0;
@@ -1727,3 +1726,143 @@ struct evsel *perf_evlist__reset_weak_group(struct evlist *evsel_list,
}
return leader;
}
+
+int evlist__initialize_ctlfd(struct evlist *evlist, int fd, int ack)
+{
+ if (fd == -1) {
+ pr_debug("Control descriptor is not initialized\n");
+ return 0;
+ }
+
+ evlist->ctl_fd.pos = perf_evlist__add_pollfd(&evlist->core, fd, NULL, POLLIN,
+ fdarray_flag__nonfilterable);
+ if (evlist->ctl_fd.pos < 0) {
+ evlist->ctl_fd.pos = -1;
+ pr_err("Failed to add ctl fd entry: %m\n");
+ return -1;
+ }
+
+ evlist->ctl_fd.fd = fd;
+ evlist->ctl_fd.ack = ack;
+
+ return 0;
+}
+
+bool evlist__ctlfd_initialized(struct evlist *evlist)
+{
+ return evlist->ctl_fd.pos >= 0;
+}
+
+int evlist__finalize_ctlfd(struct evlist *evlist)
+{
+ struct pollfd *entries = evlist->core.pollfd.entries;
+
+ if (!evlist__ctlfd_initialized(evlist))
+ return 0;
+
+ entries[evlist->ctl_fd.pos].fd = -1;
+ entries[evlist->ctl_fd.pos].events = 0;
+ entries[evlist->ctl_fd.pos].revents = 0;
+
+ evlist->ctl_fd.pos = -1;
+ evlist->ctl_fd.ack = -1;
+ evlist->ctl_fd.fd = -1;
+
+ return 0;
+}
+
+static int evlist__ctlfd_recv(struct evlist *evlist, enum evlist_ctl_cmd *cmd,
+ char *cmd_data, size_t data_size)
+{
+ int err;
+ char c;
+ size_t bytes_read = 0;
+
+ memset(cmd_data, 0, data_size);
+ data_size--;
+
+ do {
+ err = read(evlist->ctl_fd.fd, &c, 1);
+ if (err > 0) {
+ if (c == '\n' || c == '\0')
+ break;
+ cmd_data[bytes_read++] = c;
+ if (bytes_read == data_size)
+ break;
+ } else {
+ if (err == -1)
+ pr_err("Failed to read from ctlfd %d: %m\n", evlist->ctl_fd.fd);
+ break;
+ }
+ } while (1);
+
+ pr_debug("Message from ctl_fd: \"%s%s\"\n", cmd_data,
+ bytes_read == data_size ? "" : c == '\n' ? "\\n" : "\\0");
+
+ if (err > 0) {
+ if (!strncmp(cmd_data, EVLIST_CTL_CMD_ENABLE_TAG,
+ (sizeof(EVLIST_CTL_CMD_ENABLE_TAG)-1))) {
+ *cmd = EVLIST_CTL_CMD_ENABLE;
+ } else if (!strncmp(cmd_data, EVLIST_CTL_CMD_DISABLE_TAG,
+ (sizeof(EVLIST_CTL_CMD_DISABLE_TAG)-1))) {
+ *cmd = EVLIST_CTL_CMD_DISABLE;
+ }
+ }
+
+ return err;
+}
+
+static int evlist__ctlfd_ack(struct evlist *evlist)
+{
+ int err;
+
+ if (evlist->ctl_fd.ack == -1)
+ return 0;
+
+ err = write(evlist->ctl_fd.ack, EVLIST_CTL_CMD_ACK_TAG,
+ sizeof(EVLIST_CTL_CMD_ACK_TAG));
+ if (err == -1)
+ pr_err("failed to write to ctl_ack_fd %d: %m\n", evlist->ctl_fd.ack);
+
+ return err;
+}
+
+int evlist__ctlfd_process(struct evlist *evlist, enum evlist_ctl_cmd *cmd)
+{
+ int err = 0;
+ char cmd_data[EVLIST_CTL_CMD_MAX_LEN];
+ int ctlfd_pos = evlist->ctl_fd.pos;
+ struct pollfd *entries = evlist->core.pollfd.entries;
+
+ if (!evlist__ctlfd_initialized(evlist) || !entries[ctlfd_pos].revents)
+ return 0;
+
+ if (entries[ctlfd_pos].revents & POLLIN) {
+ err = evlist__ctlfd_recv(evlist, cmd, cmd_data,
+ EVLIST_CTL_CMD_MAX_LEN);
+ if (err > 0) {
+ switch (*cmd) {
+ case EVLIST_CTL_CMD_ENABLE:
+ evlist__enable(evlist);
+ break;
+ case EVLIST_CTL_CMD_DISABLE:
+ evlist__disable(evlist);
+ break;
+ case EVLIST_CTL_CMD_ACK:
+ case EVLIST_CTL_CMD_UNSUPPORTED:
+ default:
+ pr_debug("ctlfd: unsupported %d\n", *cmd);
+ break;
+ }
+ if (!(*cmd == EVLIST_CTL_CMD_ACK || *cmd == EVLIST_CTL_CMD_UNSUPPORTED))
+ evlist__ctlfd_ack(evlist);
+ }
+ }
+
+ if (entries[ctlfd_pos].revents & (POLLHUP | POLLERR))
+ evlist__finalize_ctlfd(evlist);
+ else
+ entries[ctlfd_pos].revents = 0;
+
+ return err;
+}
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index a8081dfc19cf..c73f7f7f120b 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -74,6 +74,11 @@ struct evlist {
pthread_t th;
volatile int done;
} thread;
+ struct {
+ int fd; /* control file descriptor */
+ int ack; /* ack file descriptor for control commands */
+ int pos; /* index at evlist core object to check signals */
+ } ctl_fd;
};
struct evsel_str_handler {
@@ -92,20 +97,20 @@ void evlist__delete(struct evlist *evlist);
void evlist__add(struct evlist *evlist, struct evsel *entry);
void evlist__remove(struct evlist *evlist, struct evsel *evsel);
-int __perf_evlist__add_default(struct evlist *evlist, bool precise);
+int __evlist__add_default(struct evlist *evlist, bool precise);
-static inline int perf_evlist__add_default(struct evlist *evlist)
+static inline int evlist__add_default(struct evlist *evlist)
{
- return __perf_evlist__add_default(evlist, true);
+ return __evlist__add_default(evlist, true);
}
-int __perf_evlist__add_default_attrs(struct evlist *evlist,
+int __evlist__add_default_attrs(struct evlist *evlist,
struct perf_event_attr *attrs, size_t nr_attrs);
-#define perf_evlist__add_default_attrs(evlist, array) \
- __perf_evlist__add_default_attrs(evlist, array, ARRAY_SIZE(array))
+#define evlist__add_default_attrs(evlist, array) \
+ __evlist__add_default_attrs(evlist, array, ARRAY_SIZE(array))
-int perf_evlist__add_dummy(struct evlist *evlist);
+int evlist__add_dummy(struct evlist *evlist);
int perf_evlist__add_sb_event(struct evlist *evlist,
struct perf_event_attr *attr,
@@ -116,8 +121,7 @@ int perf_evlist__start_sb_thread(struct evlist *evlist,
struct target *target);
void perf_evlist__stop_sb_thread(struct evlist *evlist);
-int perf_evlist__add_newtp(struct evlist *evlist,
- const char *sys, const char *name, void *handler);
+int evlist__add_newtp(struct evlist *evlist, const char *sys, const char *name, void *handler);
int __evlist__set_tracepoints_handlers(struct evlist *evlist,
const struct evsel_str_handler *assocs,
@@ -219,10 +223,10 @@ int perf_evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel);
void __perf_evlist__set_leader(struct list_head *list);
void perf_evlist__set_leader(struct evlist *evlist);
-u64 __perf_evlist__combined_sample_type(struct evlist *evlist);
-u64 perf_evlist__combined_sample_type(struct evlist *evlist);
-u64 perf_evlist__combined_branch_type(struct evlist *evlist);
-bool perf_evlist__sample_id_all(struct evlist *evlist);
+u64 __evlist__combined_sample_type(struct evlist *evlist);
+u64 evlist__combined_sample_type(struct evlist *evlist);
+u64 evlist__combined_branch_type(struct evlist *evlist);
+bool evlist__sample_id_all(struct evlist *evlist);
u16 perf_evlist__id_hdr_size(struct evlist *evlist);
int perf_evlist__parse_sample(struct evlist *evlist, union perf_event *event,
@@ -232,8 +236,8 @@ int perf_evlist__parse_sample_timestamp(struct evlist *evlist,
union perf_event *event,
u64 *timestamp);
-bool perf_evlist__valid_sample_type(struct evlist *evlist);
-bool perf_evlist__valid_sample_id_all(struct evlist *evlist);
+bool evlist__valid_sample_type(struct evlist *evlist);
+bool evlist__valid_sample_id_all(struct evlist *evlist);
bool perf_evlist__valid_read_format(struct evlist *evlist);
void perf_evlist__splice_list_tail(struct evlist *evlist,
@@ -258,8 +262,8 @@ static inline struct evsel *evlist__last(struct evlist *evlist)
return container_of(evsel, struct evsel, core);
}
-int perf_evlist__strerror_open(struct evlist *evlist, int err, char *buf, size_t size);
-int perf_evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size);
+int evlist__strerror_open(struct evlist *evlist, int err, char *buf, size_t size);
+int evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size);
bool perf_evlist__can_select_event(struct evlist *evlist, const char *str);
void perf_evlist__to_front(struct evlist *evlist,
@@ -356,4 +360,25 @@ void perf_evlist__force_leader(struct evlist *evlist);
struct evsel *perf_evlist__reset_weak_group(struct evlist *evlist,
struct evsel *evsel,
bool close);
+#define EVLIST_CTL_CMD_ENABLE_TAG "enable"
+#define EVLIST_CTL_CMD_DISABLE_TAG "disable"
+#define EVLIST_CTL_CMD_ACK_TAG "ack\n"
+
+#define EVLIST_CTL_CMD_MAX_LEN 64
+
+enum evlist_ctl_cmd {
+ EVLIST_CTL_CMD_UNSUPPORTED = 0,
+ EVLIST_CTL_CMD_ENABLE,
+ EVLIST_CTL_CMD_DISABLE,
+ EVLIST_CTL_CMD_ACK
+};
+
+int evlist__initialize_ctlfd(struct evlist *evlist, int ctl_fd, int ctl_fd_ack);
+int evlist__finalize_ctlfd(struct evlist *evlist);
+bool evlist__ctlfd_initialized(struct evlist *evlist);
+int evlist__ctlfd_process(struct evlist *evlist, enum evlist_ctl_cmd *cmd);
+
+#define EVLIST_ENABLED_MSG "Events enabled\n"
+#define EVLIST_DISABLED_MSG "Events disabled\n"
+
#endif /* __PERF_EVLIST_H */
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index ef802f6d40c1..fd865002cbbd 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1014,12 +1014,14 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts,
if (callchain && callchain->enabled && !evsel->no_aux_samples)
evsel__config_callchain(evsel, opts, callchain);
- if (opts->sample_intr_regs && !evsel->no_aux_samples) {
+ if (opts->sample_intr_regs && !evsel->no_aux_samples &&
+ !evsel__is_dummy_event(evsel)) {
attr->sample_regs_intr = opts->sample_intr_regs;
evsel__set_sample_bit(evsel, REGS_INTR);
}
- if (opts->sample_user_regs && !evsel->no_aux_samples) {
+ if (opts->sample_user_regs && !evsel->no_aux_samples &&
+ !evsel__is_dummy_event(evsel)) {
attr->sample_regs_user |= opts->sample_user_regs;
evsel__set_sample_bit(evsel, REGS_USER);
}
@@ -1064,7 +1066,12 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts,
attr->mmap = track;
attr->mmap2 = track && !perf_missing_features.mmap2;
attr->comm = track;
- attr->ksymbol = track && !perf_missing_features.ksymbol;
+ /*
+ * ksymbol is tracked separately with text poke because it needs to be
+ * system wide and enabled immediately.
+ */
+ if (!opts->text_poke)
+ attr->ksymbol = track && !perf_missing_features.ksymbol;
attr->bpf_event = track && !opts->no_bpf_event && !perf_missing_features.bpf;
if (opts->record_namespaces)
@@ -2495,8 +2502,10 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target,
return scnprintf(msg + printed, size - printed,
"Consider adjusting /proc/sys/kernel/perf_event_paranoid setting to open\n"
- "access to performance monitoring and observability operations for users\n"
- "without CAP_PERFMON or CAP_SYS_ADMIN Linux capability.\n"
+ "access to performance monitoring and observability operations for processes\n"
+ "without CAP_PERFMON, CAP_SYS_PTRACE or CAP_SYS_ADMIN Linux capability.\n"
+ "More information can be found at 'Perf events and tool security' document:\n"
+ "https://www.kernel.org/doc/html/latest/admin-guide/perf-security.html\n"
"perf_event_paranoid setting is %d:\n"
" -1: Allow use of (almost) all events by all users\n"
" Ignore mlock limit after perf_event_mlock_kb without CAP_IPC_LOCK\n"
@@ -2528,6 +2537,10 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target,
"No such device - did you specify an out-of-range profile CPU?");
break;
case EOPNOTSUPP:
+ if (evsel->core.attr.aux_output)
+ return scnprintf(msg, size,
+ "%s: PMU Hardware doesn't support 'aux_output' feature",
+ evsel__name(evsel));
if (evsel->core.attr.sample_period != 0)
return scnprintf(msg, size,
"%s: PMU Hardware doesn't support sampling/overflow-interrupts. Try 'perf stat'",
diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c
index f64ab91c432b..53482ef53c41 100644
--- a/tools/perf/util/expr.c
+++ b/tools/perf/util/expr.c
@@ -1,10 +1,17 @@
// SPDX-License-Identifier: GPL-2.0
#include <stdbool.h>
#include <assert.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include "metricgroup.h"
+#include "debug.h"
#include "expr.h"
#include "expr-bison.h"
#include "expr-flex.h"
#include <linux/kernel.h>
+#include <linux/zalloc.h>
+#include <ctype.h>
#ifdef PARSER_DEBUG
extern int expr_debug;
@@ -30,35 +37,144 @@ static bool key_equal(const void *key1, const void *key2,
}
/* Caller must make sure id is allocated */
-int expr__add_id(struct expr_parse_ctx *ctx, const char *name, double val)
+int expr__add_id(struct expr_parse_ctx *ctx, const char *id)
{
- double *val_ptr = NULL, *old_val = NULL;
+ struct expr_id_data *data_ptr = NULL, *old_data = NULL;
char *old_key = NULL;
int ret;
- if (val != 0.0) {
- val_ptr = malloc(sizeof(double));
- if (!val_ptr)
- return -ENOMEM;
- *val_ptr = val;
+ data_ptr = malloc(sizeof(*data_ptr));
+ if (!data_ptr)
+ return -ENOMEM;
+
+ data_ptr->parent = ctx->parent;
+
+ ret = hashmap__set(&ctx->ids, id, data_ptr,
+ (const void **)&old_key, (void **)&old_data);
+ if (ret)
+ free(data_ptr);
+ free(old_key);
+ free(old_data);
+ return ret;
+}
+
+/* Caller must make sure id is allocated */
+int expr__add_id_val(struct expr_parse_ctx *ctx, const char *id, double val)
+{
+ struct expr_id_data *data_ptr = NULL, *old_data = NULL;
+ char *old_key = NULL;
+ int ret;
+
+ data_ptr = malloc(sizeof(*data_ptr));
+ if (!data_ptr)
+ return -ENOMEM;
+ data_ptr->val = val;
+ data_ptr->is_ref = false;
+
+ ret = hashmap__set(&ctx->ids, id, data_ptr,
+ (const void **)&old_key, (void **)&old_data);
+ if (ret)
+ free(data_ptr);
+ free(old_key);
+ free(old_data);
+ return ret;
+}
+
+int expr__add_ref(struct expr_parse_ctx *ctx, struct metric_ref *ref)
+{
+ struct expr_id_data *data_ptr = NULL, *old_data = NULL;
+ char *old_key = NULL;
+ char *name, *p;
+ int ret;
+
+ data_ptr = zalloc(sizeof(*data_ptr));
+ if (!data_ptr)
+ return -ENOMEM;
+
+ name = strdup(ref->metric_name);
+ if (!name) {
+ free(data_ptr);
+ return -ENOMEM;
}
- ret = hashmap__set(&ctx->ids, name, val_ptr,
- (const void **)&old_key, (void **)&old_val);
+
+ /*
+ * The jevents tool converts all metric expressions
+ * to lowercase, including metric references, hence
+ * we need to add lowercase name for metric, so it's
+ * properly found.
+ */
+ for (p = name; *p; p++)
+ *p = tolower(*p);
+
+ /*
+ * Intentionally passing just const char pointers,
+ * originally from 'struct pmu_event' object.
+ * We don't need to change them, so there's no
+ * need to create our own copy.
+ */
+ data_ptr->ref.metric_name = ref->metric_name;
+ data_ptr->ref.metric_expr = ref->metric_expr;
+ data_ptr->ref.counted = false;
+ data_ptr->is_ref = true;
+
+ ret = hashmap__set(&ctx->ids, name, data_ptr,
+ (const void **)&old_key, (void **)&old_data);
+ if (ret)
+ free(data_ptr);
+
+ pr_debug2("adding ref metric %s: %s\n",
+ ref->metric_name, ref->metric_expr);
+
free(old_key);
- free(old_val);
+ free(old_data);
return ret;
}
-int expr__get_id(struct expr_parse_ctx *ctx, const char *id, double *val_ptr)
+int expr__get_id(struct expr_parse_ctx *ctx, const char *id,
+ struct expr_id_data **data)
+{
+ return hashmap__find(&ctx->ids, id, (void **)data) ? 0 : -1;
+}
+
+int expr__resolve_id(struct expr_parse_ctx *ctx, const char *id,
+ struct expr_id_data **datap)
{
- double *data;
+ struct expr_id_data *data;
- if (!hashmap__find(&ctx->ids, id, (void **)&data))
+ if (expr__get_id(ctx, id, datap) || !*datap) {
+ pr_debug("%s not found\n", id);
return -1;
- *val_ptr = (data == NULL) ? 0.0 : *data;
+ }
+
+ data = *datap;
+
+ pr_debug2("lookup: is_ref %d, counted %d, val %f: %s\n",
+ data->is_ref, data->ref.counted, data->val, id);
+
+ if (data->is_ref && !data->ref.counted) {
+ data->ref.counted = true;
+ pr_debug("processing metric: %s ENTRY\n", id);
+ if (expr__parse(&data->val, ctx, data->ref.metric_expr, 1)) {
+ pr_debug("%s failed to count\n", id);
+ return -1;
+ }
+ pr_debug("processing metric: %s EXIT: %f\n", id, data->val);
+ }
+
return 0;
}
+void expr__del_id(struct expr_parse_ctx *ctx, const char *id)
+{
+ struct expr_id_data *old_val = NULL;
+ char *old_key = NULL;
+
+ hashmap__delete(&ctx->ids, id,
+ (const void **)&old_key, (void **)&old_val);
+ free(old_key);
+ free(old_val);
+}
+
void expr__ctx_init(struct expr_parse_ctx *ctx)
{
hashmap__init(&ctx->ids, key_hash, key_equal, NULL);
@@ -88,6 +204,8 @@ __expr__parse(double *val, struct expr_parse_ctx *ctx, const char *expr,
void *scanner;
int ret;
+ pr_debug2("parsing metric: %s\n", expr);
+
ret = expr_lex_init_extra(&scanner_ctx, &scanner);
if (ret)
return ret;
@@ -116,16 +234,10 @@ int expr__parse(double *final_val, struct expr_parse_ctx *ctx,
int expr__find_other(const char *expr, const char *one,
struct expr_parse_ctx *ctx, int runtime)
{
- double *old_val = NULL;
- char *old_key = NULL;
int ret = __expr__parse(NULL, ctx, expr, EXPR_OTHER, runtime);
- if (one) {
- hashmap__delete(&ctx->ids, one,
- (const void **)&old_key, (void **)&old_val);
- free(old_key);
- free(old_val);
- }
+ if (one)
+ expr__del_id(ctx, one);
return ret;
}
diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h
index 8a2c1074f90f..fc2b5e824a66 100644
--- a/tools/perf/util/expr.h
+++ b/tools/perf/util/expr.h
@@ -11,8 +11,30 @@
#include "util/hashmap.h"
//#endif
+struct metric_ref;
+
+struct expr_id {
+ char *id;
+ struct expr_id *parent;
+};
+
struct expr_parse_ctx {
- struct hashmap ids;
+ struct hashmap ids;
+ struct expr_id *parent;
+};
+
+struct expr_id_data {
+ union {
+ double val;
+ struct {
+ const char *metric_name;
+ const char *metric_expr;
+ bool counted;
+ } ref;
+ struct expr_id *parent;
+ };
+
+ bool is_ref;
};
struct expr_scanner_ctx {
@@ -22,8 +44,14 @@ struct expr_scanner_ctx {
void expr__ctx_init(struct expr_parse_ctx *ctx);
void expr__ctx_clear(struct expr_parse_ctx *ctx);
-int expr__add_id(struct expr_parse_ctx *ctx, const char *id, double val);
-int expr__get_id(struct expr_parse_ctx *ctx, const char *id, double *val_ptr);
+void expr__del_id(struct expr_parse_ctx *ctx, const char *id);
+int expr__add_id(struct expr_parse_ctx *ctx, const char *id);
+int expr__add_id_val(struct expr_parse_ctx *ctx, const char *id, double val);
+int expr__add_ref(struct expr_parse_ctx *ctx, struct metric_ref *ref);
+int expr__get_id(struct expr_parse_ctx *ctx, const char *id,
+ struct expr_id_data **data);
+int expr__resolve_id(struct expr_parse_ctx *ctx, const char *id,
+ struct expr_id_data **datap);
int expr__parse(double *final_val, struct expr_parse_ctx *ctx,
const char *expr, int runtime);
int expr__find_other(const char *expr, const char *one,
diff --git a/tools/perf/util/expr.l b/tools/perf/util/expr.l
index f397bf8b1a48..13e5e3c75f56 100644
--- a/tools/perf/util/expr.l
+++ b/tools/perf/util/expr.l
@@ -100,6 +100,7 @@ symbol ({spec}|{sym})+
}
}
+d_ratio { return D_RATIO; }
max { return MAX; }
min { return MIN; }
if { return IF; }
@@ -110,6 +111,8 @@ else { return ELSE; }
"|" { return '|'; }
"^" { return '^'; }
"&" { return '&'; }
+"<" { return '<'; }
+">" { return '>'; }
"-" { return '-'; }
"+" { return '+'; }
"*" { return '*'; }
diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y
index bf3e898e3055..d34b370391c6 100644
--- a/tools/perf/util/expr.y
+++ b/tools/perf/util/expr.y
@@ -10,6 +10,14 @@
#include "smt.h"
#include <string.h>
+static double d_ratio(double val0, double val1)
+{
+ if (val1 == 0) {
+ return 0;
+ }
+ return val0 / val1;
+}
+
%}
%define api.pure full
@@ -28,11 +36,12 @@
%token <num> NUMBER
%token <str> ID
%destructor { free ($$); } <str>
-%token MIN MAX IF ELSE SMT_ON
+%token MIN MAX IF ELSE SMT_ON D_RATIO
%left MIN MAX IF
%left '|'
%left '^'
%left '&'
+%left '<' '>'
%left '-' '+'
%left '*' '/' '%'
%left NEG NOT
@@ -60,11 +69,12 @@ all_other: all_other other
other: ID
{
- expr__add_id(ctx, $1, 0.0);
+ expr__add_id(ctx, $1);
}
|
MIN | MAX | IF | ELSE | SMT_ON | NUMBER | '|' | '^' | '&' | '-' | '+' | '*' | '/' | '%' | '(' | ')' | ','
-
+|
+'<' | '>' | D_RATIO
all_expr: if_expr { *final_val = $1; }
;
@@ -75,16 +85,22 @@ if_expr:
;
expr: NUMBER
- | ID { if (expr__get_id(ctx, $1, &$$)) {
- pr_debug("%s not found\n", $1);
+ | ID {
+ struct expr_id_data *data;
+
+ if (expr__resolve_id(ctx, $1, &data)) {
+ free($1);
+ YYABORT;
+ }
+
+ $$ = data->val;
free($1);
- YYABORT;
- }
- free($1);
}
| expr '|' expr { $$ = (long)$1 | (long)$3; }
| expr '&' expr { $$ = (long)$1 & (long)$3; }
| expr '^' expr { $$ = (long)$1 ^ (long)$3; }
+ | expr '<' expr { $$ = $1 < $3; }
+ | expr '>' expr { $$ = $1 > $3; }
| expr '+' expr { $$ = $1 + $3; }
| expr '-' expr { $$ = $1 - $3; }
| expr '*' expr { $$ = $1 * $3; }
@@ -105,6 +121,7 @@ expr: NUMBER
| MIN '(' expr ',' expr ')' { $$ = $3 < $5 ? $3 : $5; }
| MAX '(' expr ',' expr ')' { $$ = $3 > $5 ? $3 : $5; }
| SMT_ON { $$ = smt_on() > 0; }
+ | D_RATIO '(' expr ',' expr ')' { $$ = d_ratio($3,$5); }
;
%%
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 7a67d017d72c..251faa9a5789 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -46,6 +46,7 @@
#include "util/util.h" // perf_exe()
#include "cputopo.h"
#include "bpf-event.h"
+#include "clockid.h"
#include <linux/ctype.h>
#include <internal/lib.h>
@@ -891,8 +892,42 @@ static int write_auxtrace(struct feat_fd *ff,
static int write_clockid(struct feat_fd *ff,
struct evlist *evlist __maybe_unused)
{
- return do_write(ff, &ff->ph->env.clockid_res_ns,
- sizeof(ff->ph->env.clockid_res_ns));
+ return do_write(ff, &ff->ph->env.clock.clockid_res_ns,
+ sizeof(ff->ph->env.clock.clockid_res_ns));
+}
+
+static int write_clock_data(struct feat_fd *ff,
+ struct evlist *evlist __maybe_unused)
+{
+ u64 *data64;
+ u32 data32;
+ int ret;
+
+ /* version */
+ data32 = 1;
+
+ ret = do_write(ff, &data32, sizeof(data32));
+ if (ret < 0)
+ return ret;
+
+ /* clockid */
+ data32 = ff->ph->env.clock.clockid;
+
+ ret = do_write(ff, &data32, sizeof(data32));
+ if (ret < 0)
+ return ret;
+
+ /* TOD ref time */
+ data64 = &ff->ph->env.clock.tod_ns;
+
+ ret = do_write(ff, data64, sizeof(*data64));
+ if (ret < 0)
+ return ret;
+
+ /* clockid ref time */
+ data64 = &ff->ph->env.clock.clockid_ns;
+
+ return do_write(ff, data64, sizeof(*data64));
}
static int write_dir_format(struct feat_fd *ff,
@@ -1546,7 +1581,50 @@ static void print_cpu_topology(struct feat_fd *ff, FILE *fp)
static void print_clockid(struct feat_fd *ff, FILE *fp)
{
fprintf(fp, "# clockid frequency: %"PRIu64" MHz\n",
- ff->ph->env.clockid_res_ns * 1000);
+ ff->ph->env.clock.clockid_res_ns * 1000);
+}
+
+static void print_clock_data(struct feat_fd *ff, FILE *fp)
+{
+ struct timespec clockid_ns;
+ char tstr[64], date[64];
+ struct timeval tod_ns;
+ clockid_t clockid;
+ struct tm ltime;
+ u64 ref;
+
+ if (!ff->ph->env.clock.enabled) {
+ fprintf(fp, "# reference time disabled\n");
+ return;
+ }
+
+ /* Compute TOD time. */
+ ref = ff->ph->env.clock.tod_ns;
+ tod_ns.tv_sec = ref / NSEC_PER_SEC;
+ ref -= tod_ns.tv_sec * NSEC_PER_SEC;
+ tod_ns.tv_usec = ref / NSEC_PER_USEC;
+
+ /* Compute clockid time. */
+ ref = ff->ph->env.clock.clockid_ns;
+ clockid_ns.tv_sec = ref / NSEC_PER_SEC;
+ ref -= clockid_ns.tv_sec * NSEC_PER_SEC;
+ clockid_ns.tv_nsec = ref;
+
+ clockid = ff->ph->env.clock.clockid;
+
+ if (localtime_r(&tod_ns.tv_sec, &ltime) == NULL)
+ snprintf(tstr, sizeof(tstr), "<error>");
+ else {
+ strftime(date, sizeof(date), "%F %T", &ltime);
+ scnprintf(tstr, sizeof(tstr), "%s.%06d",
+ date, (int) tod_ns.tv_usec);
+ }
+
+ fprintf(fp, "# clockid: %s (%u)\n", clockid_name(clockid), clockid);
+ fprintf(fp, "# reference time: %s = %ld.%06d (TOD) = %ld.%09ld (%s)\n",
+ tstr, tod_ns.tv_sec, (int) tod_ns.tv_usec,
+ clockid_ns.tv_sec, clockid_ns.tv_nsec,
+ clockid_name(clockid));
}
static void print_dir_format(struct feat_fd *ff, FILE *fp)
@@ -2732,9 +2810,43 @@ out:
static int process_clockid(struct feat_fd *ff,
void *data __maybe_unused)
{
- if (do_read_u64(ff, &ff->ph->env.clockid_res_ns))
+ if (do_read_u64(ff, &ff->ph->env.clock.clockid_res_ns))
+ return -1;
+
+ return 0;
+}
+
+static int process_clock_data(struct feat_fd *ff,
+ void *_data __maybe_unused)
+{
+ u32 data32;
+ u64 data64;
+
+ /* version */
+ if (do_read_u32(ff, &data32))
+ return -1;
+
+ if (data32 != 1)
+ return -1;
+
+ /* clockid */
+ if (do_read_u32(ff, &data32))
+ return -1;
+
+ ff->ph->env.clock.clockid = data32;
+
+ /* TOD ref time */
+ if (do_read_u64(ff, &data64))
+ return -1;
+
+ ff->ph->env.clock.tod_ns = data64;
+
+ /* clockid ref time */
+ if (do_read_u64(ff, &data64))
return -1;
+ ff->ph->env.clock.clockid_ns = data64;
+ ff->ph->env.clock.enabled = true;
return 0;
}
@@ -3008,6 +3120,7 @@ const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE] = {
FEAT_OPR(BPF_BTF, bpf_btf, false),
FEAT_OPR(COMPRESSED, compressed, false),
FEAT_OPR(CPU_PMU_CAPS, cpu_pmu_caps, false),
+ FEAT_OPR(CLOCK_DATA, clock_data, false),
};
struct header_print_data {
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 650bd1c7a99b..2aca71763ecf 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -44,6 +44,7 @@ enum {
HEADER_BPF_BTF,
HEADER_COMPRESSED,
HEADER_CPU_PMU_CAPS,
+ HEADER_CLOCK_DATA,
HEADER_LAST_FEATURE,
HEADER_FEAT_BITS = 256,
};
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index f8ccfd6be0ee..697513f35154 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -55,6 +55,7 @@ enum intel_pt_pkt_state {
INTEL_PT_STATE_TIP_PGD,
INTEL_PT_STATE_FUP,
INTEL_PT_STATE_FUP_NO_TIP,
+ INTEL_PT_STATE_RESAMPLE,
};
static inline bool intel_pt_sample_time(enum intel_pt_pkt_state pkt_state)
@@ -65,6 +66,7 @@ static inline bool intel_pt_sample_time(enum intel_pt_pkt_state pkt_state)
case INTEL_PT_STATE_ERR_RESYNC:
case INTEL_PT_STATE_IN_SYNC:
case INTEL_PT_STATE_TNT_CONT:
+ case INTEL_PT_STATE_RESAMPLE:
return true;
case INTEL_PT_STATE_TNT:
case INTEL_PT_STATE_TIP:
@@ -109,6 +111,9 @@ struct intel_pt_decoder {
bool fixup_last_mtc;
bool have_last_ip;
bool in_psb;
+ bool hop;
+ bool hop_psb_fup;
+ bool leap;
enum intel_pt_param_flags flags;
uint64_t pos;
uint64_t last_ip;
@@ -235,6 +240,8 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params)
decoder->data = params->data;
decoder->return_compression = params->return_compression;
decoder->branch_enable = params->branch_enable;
+ decoder->hop = params->quick >= 1;
+ decoder->leap = params->quick >= 2;
decoder->flags = params->flags;
@@ -275,6 +282,9 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params)
intel_pt_log("timestamp: tsc_ctc_mult %u\n", decoder->tsc_ctc_mult);
intel_pt_log("timestamp: tsc_slip %#x\n", decoder->tsc_slip);
+ if (decoder->hop)
+ intel_pt_log("Hop mode: decoding FUP and TIPs, but not TNT\n");
+
return decoder;
}
@@ -1164,6 +1174,7 @@ static int intel_pt_walk_fup(struct intel_pt_decoder *decoder)
return 0;
if (err == -EAGAIN ||
intel_pt_fup_with_nlip(decoder, &intel_pt_insn, ip, err)) {
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
if (intel_pt_fup_event(decoder))
return 0;
return -EAGAIN;
@@ -1729,8 +1740,14 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
case INTEL_PT_FUP:
decoder->pge = true;
- if (decoder->packet.count)
+ if (decoder->packet.count) {
intel_pt_set_last_ip(decoder);
+ if (decoder->hop) {
+ /* Act on FUP at PSBEND */
+ decoder->ip = decoder->last_ip;
+ decoder->hop_psb_fup = true;
+ }
+ }
break;
case INTEL_PT_MODE_TSX:
@@ -1874,6 +1891,127 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
}
}
+static int intel_pt_resample(struct intel_pt_decoder *decoder)
+{
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ decoder->state.type = INTEL_PT_INSTRUCTION;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ return 0;
+}
+
+#define HOP_PROCESS 0
+#define HOP_IGNORE 1
+#define HOP_RETURN 2
+#define HOP_AGAIN 3
+
+static int intel_pt_scan_for_psb(struct intel_pt_decoder *decoder);
+
+/* Hop mode: Ignore TNT, do not walk code, but get ip from FUPs and TIPs */
+static int intel_pt_hop_trace(struct intel_pt_decoder *decoder, bool *no_tip, int *err)
+{
+ /* Leap from PSB to PSB, getting ip from FUP within PSB+ */
+ if (decoder->leap && !decoder->in_psb && decoder->packet.type != INTEL_PT_PSB) {
+ *err = intel_pt_scan_for_psb(decoder);
+ if (*err)
+ return HOP_RETURN;
+ }
+
+ switch (decoder->packet.type) {
+ case INTEL_PT_TNT:
+ return HOP_IGNORE;
+
+ case INTEL_PT_TIP_PGD:
+ if (!decoder->packet.count)
+ return HOP_IGNORE;
+ intel_pt_set_ip(decoder);
+ decoder->state.type |= INTEL_PT_TRACE_END;
+ decoder->state.from_ip = 0;
+ decoder->state.to_ip = decoder->ip;
+ return HOP_RETURN;
+
+ case INTEL_PT_TIP:
+ if (!decoder->packet.count)
+ return HOP_IGNORE;
+ intel_pt_set_ip(decoder);
+ decoder->state.type = INTEL_PT_INSTRUCTION;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ return HOP_RETURN;
+
+ case INTEL_PT_FUP:
+ if (!decoder->packet.count)
+ return HOP_IGNORE;
+ intel_pt_set_ip(decoder);
+ if (intel_pt_fup_event(decoder))
+ return HOP_RETURN;
+ if (!decoder->branch_enable)
+ *no_tip = true;
+ if (*no_tip) {
+ decoder->state.type = INTEL_PT_INSTRUCTION;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ return HOP_RETURN;
+ }
+ *err = intel_pt_walk_fup_tip(decoder);
+ if (!*err)
+ decoder->pkt_state = INTEL_PT_STATE_RESAMPLE;
+ return HOP_RETURN;
+
+ case INTEL_PT_PSB:
+ decoder->last_ip = 0;
+ decoder->have_last_ip = true;
+ decoder->hop_psb_fup = false;
+ *err = intel_pt_walk_psbend(decoder);
+ if (*err == -EAGAIN)
+ return HOP_AGAIN;
+ if (*err)
+ return HOP_RETURN;
+ if (decoder->hop_psb_fup) {
+ decoder->hop_psb_fup = false;
+ decoder->state.type = INTEL_PT_INSTRUCTION;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ return HOP_RETURN;
+ }
+ if (decoder->cbr != decoder->cbr_seen) {
+ decoder->state.type = 0;
+ return HOP_RETURN;
+ }
+ return HOP_IGNORE;
+
+ case INTEL_PT_BAD:
+ case INTEL_PT_PAD:
+ case INTEL_PT_TIP_PGE:
+ case INTEL_PT_TSC:
+ case INTEL_PT_TMA:
+ case INTEL_PT_MODE_EXEC:
+ case INTEL_PT_MODE_TSX:
+ case INTEL_PT_MTC:
+ case INTEL_PT_CYC:
+ case INTEL_PT_VMCS:
+ case INTEL_PT_PSBEND:
+ case INTEL_PT_CBR:
+ case INTEL_PT_TRACESTOP:
+ case INTEL_PT_PIP:
+ case INTEL_PT_OVF:
+ case INTEL_PT_MNT:
+ case INTEL_PT_PTWRITE:
+ case INTEL_PT_PTWRITE_IP:
+ case INTEL_PT_EXSTOP:
+ case INTEL_PT_EXSTOP_IP:
+ case INTEL_PT_MWAIT:
+ case INTEL_PT_PWRE:
+ case INTEL_PT_PWRX:
+ case INTEL_PT_BBP:
+ case INTEL_PT_BIP:
+ case INTEL_PT_BEP:
+ case INTEL_PT_BEP_IP:
+ default:
+ return HOP_PROCESS;
+ }
+}
+
static int intel_pt_walk_trace(struct intel_pt_decoder *decoder)
{
bool no_tip = false;
@@ -1884,6 +2022,19 @@ static int intel_pt_walk_trace(struct intel_pt_decoder *decoder)
if (err)
return err;
next:
+ if (decoder->hop) {
+ switch (intel_pt_hop_trace(decoder, &no_tip, &err)) {
+ case HOP_IGNORE:
+ continue;
+ case HOP_RETURN:
+ return err;
+ case HOP_AGAIN:
+ goto next;
+ default:
+ break;
+ }
+ }
+
switch (decoder->packet.type) {
case INTEL_PT_TNT:
if (!decoder->packet.count)
@@ -1913,6 +2064,12 @@ next:
decoder->state.from_ip = 0;
decoder->state.to_ip = decoder->ip;
decoder->state.type |= INTEL_PT_TRACE_BEGIN;
+ /*
+ * In hop mode, resample to get the to_ip as an
+ * "instruction" sample.
+ */
+ if (decoder->hop)
+ decoder->pkt_state = INTEL_PT_STATE_RESAMPLE;
return 0;
}
@@ -1942,17 +2099,13 @@ next:
}
if (decoder->set_fup_mwait)
no_tip = true;
+ if (no_tip)
+ decoder->pkt_state = INTEL_PT_STATE_FUP_NO_TIP;
+ else
+ decoder->pkt_state = INTEL_PT_STATE_FUP;
err = intel_pt_walk_fup(decoder);
- if (err != -EAGAIN) {
- if (err)
- return err;
- if (no_tip)
- decoder->pkt_state =
- INTEL_PT_STATE_FUP_NO_TIP;
- else
- decoder->pkt_state = INTEL_PT_STATE_FUP;
- return 0;
- }
+ if (err != -EAGAIN)
+ return err;
if (no_tip) {
no_tip = false;
break;
@@ -1980,8 +2133,10 @@ next:
* possibility of another CBR change that gets caught up
* in the PSB+.
*/
- if (decoder->cbr != decoder->cbr_seen)
+ if (decoder->cbr != decoder->cbr_seen) {
+ decoder->state.type = 0;
return 0;
+ }
break;
case INTEL_PT_PIP:
@@ -2022,8 +2177,10 @@ next:
case INTEL_PT_CBR:
intel_pt_calc_cbr(decoder);
- if (decoder->cbr != decoder->cbr_seen)
+ if (decoder->cbr != decoder->cbr_seen) {
+ decoder->state.type = 0;
return 0;
+ }
break;
case INTEL_PT_MODE_EXEC:
@@ -2032,7 +2189,7 @@ next:
case INTEL_PT_MODE_TSX:
/* MODE_TSX need not be followed by FUP */
- if (!decoder->pge) {
+ if (!decoder->pge || decoder->in_psb) {
intel_pt_update_in_tx(decoder);
break;
}
@@ -2423,7 +2580,11 @@ static int intel_pt_sync_ip(struct intel_pt_decoder *decoder)
if (err)
return err;
- decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ /* In hop mode, resample to get the to_ip as an "instruction" sample */
+ if (decoder->hop)
+ decoder->pkt_state = INTEL_PT_STATE_RESAMPLE;
+ else
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
decoder->overflow = false;
decoder->state.from_ip = 0;
@@ -2531,6 +2692,7 @@ static int intel_pt_sync(struct intel_pt_decoder *decoder)
decoder->ip = 0;
intel_pt_clear_stack(&decoder->stack);
+leap:
err = intel_pt_scan_for_psb(decoder);
if (err)
return err;
@@ -2544,7 +2706,20 @@ static int intel_pt_sync(struct intel_pt_decoder *decoder)
if (decoder->ip) {
decoder->state.type = 0; /* Do not have a sample */
- decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ /*
+ * In hop mode, resample to get the PSB FUP ip as an
+ * "instruction" sample.
+ */
+ if (decoder->hop)
+ decoder->pkt_state = INTEL_PT_STATE_RESAMPLE;
+ else
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ } else if (decoder->leap) {
+ /*
+ * In leap mode, only PSB+ is decoded, so keeping leaping to the
+ * next PSB until there is an ip.
+ */
+ goto leap;
} else {
return intel_pt_sync_ip(decoder);
}
@@ -2599,19 +2774,18 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
err = intel_pt_walk_tip(decoder);
break;
case INTEL_PT_STATE_FUP:
- decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
err = intel_pt_walk_fup(decoder);
if (err == -EAGAIN)
err = intel_pt_walk_fup_tip(decoder);
- else if (!err)
- decoder->pkt_state = INTEL_PT_STATE_FUP;
break;
case INTEL_PT_STATE_FUP_NO_TIP:
- decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
err = intel_pt_walk_fup(decoder);
if (err == -EAGAIN)
err = intel_pt_walk_trace(decoder);
break;
+ case INTEL_PT_STATE_RESAMPLE:
+ err = intel_pt_resample(decoder);
+ break;
default:
err = intel_pt_bug(decoder);
break;
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
index e289e463d635..8645fc265481 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
@@ -250,6 +250,7 @@ struct intel_pt_params {
uint32_t tsc_ctc_ratio_n;
uint32_t tsc_ctc_ratio_d;
enum intel_pt_param_flags flags;
+ unsigned int quick;
};
struct intel_pt_decoder;
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index cb3c1e569a2d..2a8d245351e7 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -236,7 +236,7 @@ static void intel_pt_log_event(union perf_event *event)
if (!intel_pt_enable_logging || !f)
return;
- perf_event__fprintf(event, f);
+ perf_event__fprintf(event, NULL, f);
}
static void intel_pt_dump_sample(struct perf_session *session,
@@ -249,6 +249,24 @@ static void intel_pt_dump_sample(struct perf_session *session,
intel_pt_dump(pt, sample->aux_sample.data, sample->aux_sample.size);
}
+static bool intel_pt_log_events(struct intel_pt *pt, u64 tm)
+{
+ struct perf_time_interval *range = pt->synth_opts.ptime_range;
+ int n = pt->synth_opts.range_num;
+
+ if (pt->synth_opts.log_plus_flags & AUXTRACE_LOG_FLG_ALL_PERF_EVTS)
+ return true;
+
+ if (pt->synth_opts.log_minus_flags & AUXTRACE_LOG_FLG_ALL_PERF_EVTS)
+ return false;
+
+ /* perf_time__ranges_skip_sample does not work if time is zero */
+ if (!tm)
+ tm = 1;
+
+ return !n || !perf_time__ranges_skip_sample(range, n, tm);
+}
+
static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a,
struct auxtrace_buffer *b)
{
@@ -520,6 +538,17 @@ intel_pt_cache_lookup(struct dso *dso, struct machine *machine, u64 offset)
return auxtrace_cache__lookup(dso->auxtrace_cache, offset);
}
+static void intel_pt_cache_invalidate(struct dso *dso, struct machine *machine,
+ u64 offset)
+{
+ struct auxtrace_cache *c = intel_pt_cache(dso, machine);
+
+ if (!c)
+ return;
+
+ auxtrace_cache__remove(dso->auxtrace_cache, offset);
+}
+
static inline u8 intel_pt_cpumode(struct intel_pt *pt, uint64_t ip)
{
return ip >= pt->kernel_start ?
@@ -1001,6 +1030,7 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
params.mtc_period = intel_pt_mtc_period(pt);
params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n;
params.tsc_ctc_ratio_d = pt->tsc_ctc_ratio_d;
+ params.quick = pt->synth_opts.quick;
if (pt->filts.cnt > 0)
params.pgd_ip = intel_pt_pgd_ip;
@@ -1394,7 +1424,10 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
sample.id = ptq->pt->instructions_id;
sample.stream_id = ptq->pt->instructions_id;
- sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt;
+ if (pt->synth_opts.quick)
+ sample.period = 1;
+ else
+ sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt;
sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_in_cyc_cnt;
if (sample.cyc_cnt) {
@@ -1852,6 +1885,15 @@ static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
char msg[MAX_AUXTRACE_ERROR_MSG];
int err;
+ if (pt->synth_opts.error_minus_flags) {
+ if (code == INTEL_PT_ERR_OVR &&
+ pt->synth_opts.error_minus_flags & AUXTRACE_ERR_FLG_OVERFLOW)
+ return 0;
+ if (code == INTEL_PT_ERR_LOST &&
+ pt->synth_opts.error_minus_flags & AUXTRACE_ERR_FLG_DATA_LOST)
+ return 0;
+ }
+
intel_pt__strerror(code, msg, MAX_AUXTRACE_ERROR_MSG);
auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
@@ -2566,10 +2608,6 @@ static int intel_pt_context_switch(struct intel_pt *pt, union perf_event *event,
return -EINVAL;
}
- intel_pt_log("context_switch: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
- cpu, pid, tid, sample->time, perf_time_to_tsc(sample->time,
- &pt->tc));
-
ret = intel_pt_sync_switch(pt, cpu, tid, sample->time);
if (ret <= 0)
return ret;
@@ -2594,6 +2632,67 @@ static int intel_pt_process_itrace_start(struct intel_pt *pt,
event->itrace_start.tid);
}
+static int intel_pt_find_map(struct thread *thread, u8 cpumode, u64 addr,
+ struct addr_location *al)
+{
+ if (!al->map || addr < al->map->start || addr >= al->map->end) {
+ if (!thread__find_map(thread, cpumode, addr, al))
+ return -1;
+ }
+
+ return 0;
+}
+
+/* Invalidate all instruction cache entries that overlap the text poke */
+static int intel_pt_text_poke(struct intel_pt *pt, union perf_event *event)
+{
+ u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+ u64 addr = event->text_poke.addr + event->text_poke.new_len - 1;
+ /* Assume text poke begins in a basic block no more than 4096 bytes */
+ int cnt = 4096 + event->text_poke.new_len;
+ struct thread *thread = pt->unknown_thread;
+ struct addr_location al = { .map = NULL };
+ struct machine *machine = pt->machine;
+ struct intel_pt_cache_entry *e;
+ u64 offset;
+
+ if (!event->text_poke.new_len)
+ return 0;
+
+ for (; cnt; cnt--, addr--) {
+ if (intel_pt_find_map(thread, cpumode, addr, &al)) {
+ if (addr < event->text_poke.addr)
+ return 0;
+ continue;
+ }
+
+ if (!al.map->dso || !al.map->dso->auxtrace_cache)
+ continue;
+
+ offset = al.map->map_ip(al.map, addr);
+
+ e = intel_pt_cache_lookup(al.map->dso, machine, offset);
+ if (!e)
+ continue;
+
+ if (addr + e->byte_cnt + e->length <= event->text_poke.addr) {
+ /*
+ * No overlap. Working backwards there cannot be another
+ * basic block that overlaps the text poke if there is a
+ * branch instruction before the text poke address.
+ */
+ if (e->branch != INTEL_PT_BR_NO_BRANCH)
+ return 0;
+ } else {
+ intel_pt_cache_invalidate(al.map->dso, machine, offset);
+ intel_pt_log("Invalidated instruction cache for %s at %#"PRIx64"\n",
+ al.map->dso->long_name, addr);
+ }
+ }
+
+ return 0;
+}
+
static int intel_pt_process_event(struct perf_session *session,
union perf_event *event,
struct perf_sample *sample,
@@ -2662,9 +2761,14 @@ static int intel_pt_process_event(struct perf_session *session,
event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)
err = intel_pt_context_switch(pt, event, sample);
- intel_pt_log("event %u: cpu %d time %"PRIu64" tsc %#"PRIx64" ",
- event->header.type, sample->cpu, sample->time, timestamp);
- intel_pt_log_event(event);
+ if (!err && event->header.type == PERF_RECORD_TEXT_POKE)
+ err = intel_pt_text_poke(pt, event);
+
+ if (intel_pt_enable_logging && intel_pt_log_events(pt, sample->time)) {
+ intel_pt_log("event %u: cpu %d time %"PRIu64" tsc %#"PRIx64" ",
+ event->header.type, sample->cpu, sample->time, timestamp);
+ intel_pt_log_event(event);
+ }
return err;
}
diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c
index 32bb05e03fb2..0804308ef285 100644
--- a/tools/perf/util/jitdump.c
+++ b/tools/perf/util/jitdump.c
@@ -26,6 +26,7 @@
#include "jit.h"
#include "jitdump.h"
#include "genelf.h"
+#include "thread.h"
#include <linux/ctype.h>
#include <linux/zalloc.h>
@@ -749,6 +750,28 @@ jit_detect(char *mmap_name, pid_t pid)
return 0;
}
+static void jit_add_pid(struct machine *machine, pid_t pid)
+{
+ struct thread *thread = machine__findnew_thread(machine, pid, pid);
+
+ if (!thread) {
+ pr_err("%s: thread %d not found or created\n", __func__, pid);
+ return;
+ }
+
+ thread->priv = (void *)1;
+}
+
+static bool jit_has_pid(struct machine *machine, pid_t pid)
+{
+ struct thread *thread = machine__find_thread(machine, pid, pid);
+
+ if (!thread)
+ return 0;
+
+ return (bool)thread->priv;
+}
+
int
jit_process(struct perf_session *session,
struct perf_data *output,
@@ -764,8 +787,13 @@ jit_process(struct perf_session *session,
/*
* first, detect marker mmap (i.e., the jitdump mmap)
*/
- if (jit_detect(filename, pid))
+ if (jit_detect(filename, pid)) {
+ // Strip //anon* mmaps if we processed a jitdump for this pid
+ if (jit_has_pid(machine, pid) && (strncmp(filename, "//anon", 6) == 0))
+ return 1;
+
return 0;
+ }
memset(&jd, 0, sizeof(jd));
@@ -784,6 +812,7 @@ jit_process(struct perf_session *session,
ret = jit_inject(&jd, filename);
if (!ret) {
+ jit_add_pid(machine, pid);
*nbytes = jd.bytes_written;
ret = 1;
}
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index d5384807372b..96af544eac8f 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -762,6 +762,12 @@ static int machine__process_ksymbol_register(struct machine *machine,
return -ENOMEM;
}
+ if (event->ksymbol.ksym_type == PERF_RECORD_KSYMBOL_TYPE_OOL) {
+ map->dso->binary_type = DSO_BINARY_TYPE__OOL;
+ map->dso->data.file_size = event->ksymbol.len;
+ dso__set_loaded(map->dso);
+ }
+
map->start = event->ksymbol.addr;
map->end = map->start + event->ksymbol.len;
maps__insert(&machine->kmaps, map);
@@ -808,6 +814,47 @@ int machine__process_ksymbol(struct machine *machine __maybe_unused,
return machine__process_ksymbol_register(machine, event, sample);
}
+int machine__process_text_poke(struct machine *machine, union perf_event *event,
+ struct perf_sample *sample __maybe_unused)
+{
+ struct map *map = maps__find(&machine->kmaps, event->text_poke.addr);
+ u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+
+ if (dump_trace)
+ perf_event__fprintf_text_poke(event, machine, stdout);
+
+ if (!event->text_poke.new_len)
+ return 0;
+
+ if (cpumode != PERF_RECORD_MISC_KERNEL) {
+ pr_debug("%s: unsupported cpumode - ignoring\n", __func__);
+ return 0;
+ }
+
+ if (map && map->dso) {
+ u8 *new_bytes = event->text_poke.bytes + event->text_poke.old_len;
+ int ret;
+
+ /*
+ * Kernel maps might be changed when loading symbols so loading
+ * must be done prior to using kernel maps.
+ */
+ map__load(map);
+ ret = dso__data_write_cache_addr(map->dso, map, machine,
+ event->text_poke.addr,
+ new_bytes,
+ event->text_poke.new_len);
+ if (ret != event->text_poke.new_len)
+ pr_debug("Failed to write kernel text poke at %#" PRI_lx64 "\n",
+ event->text_poke.addr);
+ } else {
+ pr_debug("Failed to find kernel text poke address map for %#" PRI_lx64 "\n",
+ event->text_poke.addr);
+ }
+
+ return 0;
+}
+
static struct map *machine__addnew_module_map(struct machine *machine, u64 start,
const char *filename)
{
@@ -1930,6 +1977,8 @@ int machine__process_event(struct machine *machine, union perf_event *event,
ret = machine__process_ksymbol(machine, event, sample); break;
case PERF_RECORD_BPF_EVENT:
ret = machine__process_bpf(machine, event, sample); break;
+ case PERF_RECORD_TEXT_POKE:
+ ret = machine__process_text_poke(machine, event, sample); break;
default:
ret = -1;
break;
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index fa1be9ea00fa..062c36a8433c 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -138,6 +138,9 @@ int machine__process_mmap2_event(struct machine *machine, union perf_event *even
int machine__process_ksymbol(struct machine *machine,
union perf_event *event,
struct perf_sample *sample);
+int machine__process_text_poke(struct machine *machine,
+ union perf_event *event,
+ struct perf_sample *sample);
int machine__process_event(struct machine *machine, union perf_event *event,
struct perf_sample *sample);
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 53d96611e6a6..f9dc8c5493ea 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -267,6 +267,11 @@ bool __map__is_bpf_prog(const struct map *map)
return name && (strstr(name, "bpf_prog_") == name);
}
+bool __map__is_ool(const struct map *map)
+{
+ return map->dso && map->dso->binary_type == DSO_BINARY_TYPE__OOL;
+}
+
bool map__has_symbols(const struct map *map)
{
return dso__has_symbols(map->dso);
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index 067036e8970c..9e312ae2d656 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -147,11 +147,12 @@ int map__set_kallsyms_ref_reloc_sym(struct map *map, const char *symbol_name,
bool __map__is_kernel(const struct map *map);
bool __map__is_extra_kernel_map(const struct map *map);
bool __map__is_bpf_prog(const struct map *map);
+bool __map__is_ool(const struct map *map);
static inline bool __map__is_kmodule(const struct map *map)
{
return !__map__is_kernel(map) && !__map__is_extra_kernel_map(map) &&
- !__map__is_bpf_prog(map);
+ !__map__is_bpf_prog(map) && !__map__is_ool(map);
}
bool map__has_symbols(const struct map *map);
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index 9e21aa767e41..8831b964288f 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -24,6 +24,7 @@
#include <subcmd/parse-options.h>
#include <api/fs/fs.h>
#include "util.h"
+#include <asm/bug.h>
struct metric_event *metricgroup__lookup(struct rblist *metric_events,
struct evsel *evsel,
@@ -76,23 +77,78 @@ static struct rb_node *metric_event_new(struct rblist *rblist __maybe_unused,
return &me->nd;
}
+static void metric_event_delete(struct rblist *rblist __maybe_unused,
+ struct rb_node *rb_node)
+{
+ struct metric_event *me = container_of(rb_node, struct metric_event, nd);
+ struct metric_expr *expr, *tmp;
+
+ list_for_each_entry_safe(expr, tmp, &me->head, nd) {
+ free(expr->metric_refs);
+ free(expr);
+ }
+
+ free(me);
+}
+
static void metricgroup__rblist_init(struct rblist *metric_events)
{
rblist__init(metric_events);
metric_events->node_cmp = metric_event_cmp;
metric_events->node_new = metric_event_new;
+ metric_events->node_delete = metric_event_delete;
}
-struct egroup {
+void metricgroup__rblist_exit(struct rblist *metric_events)
+{
+ rblist__exit(metric_events);
+}
+
+/*
+ * A node in the list of referenced metrics. metric_expr
+ * is held as a convenience to avoid a search through the
+ * metric list.
+ */
+struct metric_ref_node {
+ const char *metric_name;
+ const char *metric_expr;
+ struct list_head list;
+};
+
+struct metric {
struct list_head nd;
struct expr_parse_ctx pctx;
const char *metric_name;
const char *metric_expr;
const char *metric_unit;
+ struct list_head metric_refs;
+ int metric_refs_cnt;
int runtime;
bool has_constraint;
};
+#define RECURSION_ID_MAX 1000
+
+struct expr_ids {
+ struct expr_id id[RECURSION_ID_MAX];
+ int cnt;
+};
+
+static struct expr_id *expr_ids__alloc(struct expr_ids *ids)
+{
+ if (ids->cnt >= RECURSION_ID_MAX)
+ return NULL;
+ return &ids->id[ids->cnt++];
+}
+
+static void expr_ids__exit(struct expr_ids *ids)
+{
+ int i;
+
+ for (i = 0; i < ids->cnt; i++)
+ free(ids->id[i].id);
+}
+
/**
* Find a group of events in perf_evlist that correpond to those from a parsed
* metric expression. Note, as find_evsel_group is called in the same order as
@@ -119,7 +175,7 @@ static struct evsel *find_evsel_group(struct evlist *perf_evlist,
unsigned long *evlist_used)
{
struct evsel *ev, *current_leader = NULL;
- double *val_ptr;
+ struct expr_id_data *val_ptr;
int i = 0, matched_events = 0, events_to_match;
const int idnum = (int)hashmap__size(&pctx->ids);
@@ -206,7 +262,7 @@ static int metricgroup__setup_events(struct list_head *groups,
struct metric_expr *expr;
int i = 0;
int ret = 0;
- struct egroup *eg;
+ struct metric *m;
struct evsel *evsel, *tmp;
unsigned long *evlist_used;
@@ -214,22 +270,23 @@ static int metricgroup__setup_events(struct list_head *groups,
if (!evlist_used)
return -ENOMEM;
- list_for_each_entry (eg, groups, nd) {
+ list_for_each_entry (m, groups, nd) {
struct evsel **metric_events;
+ struct metric_ref *metric_refs = NULL;
metric_events = calloc(sizeof(void *),
- hashmap__size(&eg->pctx.ids) + 1);
+ hashmap__size(&m->pctx.ids) + 1);
if (!metric_events) {
ret = -ENOMEM;
break;
}
- evsel = find_evsel_group(perf_evlist, &eg->pctx,
+ evsel = find_evsel_group(perf_evlist, &m->pctx,
metric_no_merge,
- eg->has_constraint, metric_events,
+ m->has_constraint, metric_events,
evlist_used);
if (!evsel) {
pr_debug("Cannot resolve %s: %s\n",
- eg->metric_name, eg->metric_expr);
+ m->metric_name, m->metric_expr);
free(metric_events);
continue;
}
@@ -247,11 +304,41 @@ static int metricgroup__setup_events(struct list_head *groups,
free(metric_events);
break;
}
- expr->metric_expr = eg->metric_expr;
- expr->metric_name = eg->metric_name;
- expr->metric_unit = eg->metric_unit;
+
+ /*
+ * Collect and store collected nested expressions
+ * for metric processing.
+ */
+ if (m->metric_refs_cnt) {
+ struct metric_ref_node *ref;
+
+ metric_refs = zalloc(sizeof(struct metric_ref) * (m->metric_refs_cnt + 1));
+ if (!metric_refs) {
+ ret = -ENOMEM;
+ free(metric_events);
+ break;
+ }
+
+ i = 0;
+ list_for_each_entry(ref, &m->metric_refs, list) {
+ /*
+ * Intentionally passing just const char pointers,
+ * originally from 'struct pmu_event' object.
+ * We don't need to change them, so there's no
+ * need to create our own copy.
+ */
+ metric_refs[i].metric_name = ref->metric_name;
+ metric_refs[i].metric_expr = ref->metric_expr;
+ i++;
+ }
+ };
+
+ expr->metric_refs = metric_refs;
+ expr->metric_expr = m->metric_expr;
+ expr->metric_name = m->metric_name;
+ expr->metric_unit = m->metric_unit;
expr->metric_events = metric_events;
- expr->runtime = eg->runtime;
+ expr->runtime = m->runtime;
list_add(&expr->nd, &me->head);
}
@@ -552,123 +639,339 @@ int __weak arch_get_runtimeparam(void)
return 1;
}
-static int __metricgroup__add_metric(struct list_head *group_list,
- struct pmu_event *pe,
- bool metric_no_group,
- int runtime)
+static int __add_metric(struct list_head *metric_list,
+ struct pmu_event *pe,
+ bool metric_no_group,
+ int runtime,
+ struct metric **mp,
+ struct expr_id *parent,
+ struct expr_ids *ids)
{
- struct egroup *eg;
+ struct metric_ref_node *ref;
+ struct metric *m;
- eg = malloc(sizeof(*eg));
- if (!eg)
- return -ENOMEM;
+ if (*mp == NULL) {
+ /*
+ * We got in here for the parent group,
+ * allocate it and put it on the list.
+ */
+ m = zalloc(sizeof(*m));
+ if (!m)
+ return -ENOMEM;
+
+ expr__ctx_init(&m->pctx);
+ m->metric_name = pe->metric_name;
+ m->metric_expr = pe->metric_expr;
+ m->metric_unit = pe->unit;
+ m->runtime = runtime;
+ m->has_constraint = metric_no_group || metricgroup__has_constraint(pe);
+ INIT_LIST_HEAD(&m->metric_refs);
+ m->metric_refs_cnt = 0;
+ *mp = m;
+
+ parent = expr_ids__alloc(ids);
+ if (!parent) {
+ free(m);
+ return -EINVAL;
+ }
+
+ parent->id = strdup(pe->metric_name);
+ if (!parent->id) {
+ free(m);
+ return -ENOMEM;
+ }
+ } else {
+ /*
+ * We got here for the referenced metric, via the
+ * recursive metricgroup__add_metric call, add
+ * it to the parent group.
+ */
+ m = *mp;
- expr__ctx_init(&eg->pctx);
- eg->metric_name = pe->metric_name;
- eg->metric_expr = pe->metric_expr;
- eg->metric_unit = pe->unit;
- eg->runtime = runtime;
- eg->has_constraint = metric_no_group || metricgroup__has_constraint(pe);
+ ref = malloc(sizeof(*ref));
+ if (!ref)
+ return -ENOMEM;
- if (expr__find_other(pe->metric_expr, NULL, &eg->pctx, runtime) < 0) {
- expr__ctx_clear(&eg->pctx);
- free(eg);
+ /*
+ * Intentionally passing just const char pointers,
+ * from 'pe' object, so they never go away. We don't
+ * need to change them, so there's no need to create
+ * our own copy.
+ */
+ ref->metric_name = pe->metric_name;
+ ref->metric_expr = pe->metric_expr;
+
+ list_add(&ref->list, &m->metric_refs);
+ m->metric_refs_cnt++;
+ }
+
+ /* Force all found IDs in metric to have us as parent ID. */
+ WARN_ON_ONCE(!parent);
+ m->pctx.parent = parent;
+
+ /*
+ * For both the parent and referenced metrics, we parse
+ * all the metric's IDs and add it to the parent context.
+ */
+ if (expr__find_other(pe->metric_expr, NULL, &m->pctx, runtime) < 0) {
+ expr__ctx_clear(&m->pctx);
+ free(m);
return -EINVAL;
}
- if (list_empty(group_list))
- list_add(&eg->nd, group_list);
+ /*
+ * We add new group only in the 'parent' call,
+ * so bail out for referenced metric case.
+ */
+ if (m->metric_refs_cnt)
+ return 0;
+
+ if (list_empty(metric_list))
+ list_add(&m->nd, metric_list);
else {
struct list_head *pos;
/* Place the largest groups at the front. */
- list_for_each_prev(pos, group_list) {
- struct egroup *old = list_entry(pos, struct egroup, nd);
+ list_for_each_prev(pos, metric_list) {
+ struct metric *old = list_entry(pos, struct metric, nd);
- if (hashmap__size(&eg->pctx.ids) <=
+ if (hashmap__size(&m->pctx.ids) <=
hashmap__size(&old->pctx.ids))
break;
}
- list_add(&eg->nd, pos);
+ list_add(&m->nd, pos);
}
return 0;
}
+#define map_for_each_event(__pe, __idx, __map) \
+ for (__idx = 0, __pe = &__map->table[__idx]; \
+ __pe->name || __pe->metric_group || __pe->metric_name; \
+ __pe = &__map->table[++__idx])
+
+#define map_for_each_metric(__pe, __idx, __map, __metric) \
+ map_for_each_event(__pe, __idx, __map) \
+ if (__pe->metric_expr && \
+ (match_metric(__pe->metric_group, __metric) || \
+ match_metric(__pe->metric_name, __metric)))
+
+static struct pmu_event *find_metric(const char *metric, struct pmu_events_map *map)
+{
+ struct pmu_event *pe;
+ int i;
+
+ map_for_each_event(pe, i, map) {
+ if (match_metric(pe->metric_name, metric))
+ return pe;
+ }
+
+ return NULL;
+}
+
+static int recursion_check(struct metric *m, const char *id, struct expr_id **parent,
+ struct expr_ids *ids)
+{
+ struct expr_id_data *data;
+ struct expr_id *p;
+ int ret;
+
+ /*
+ * We get the parent referenced by 'id' argument and
+ * traverse through all the parent object IDs to check
+ * if we already processed 'id', if we did, it's recursion
+ * and we fail.
+ */
+ ret = expr__get_id(&m->pctx, id, &data);
+ if (ret)
+ return ret;
+
+ p = data->parent;
+
+ while (p->parent) {
+ if (!strcmp(p->id, id)) {
+ pr_err("failed: recursion detected for %s\n", id);
+ return -1;
+ }
+ p = p->parent;
+ }
+
+ /*
+ * If we are over the limit of static entris, the metric
+ * is too difficult/nested to process, fail as well.
+ */
+ p = expr_ids__alloc(ids);
+ if (!p) {
+ pr_err("failed: too many nested metrics\n");
+ return -EINVAL;
+ }
+
+ p->id = strdup(id);
+ p->parent = data->parent;
+ *parent = p;
+
+ return p->id ? 0 : -ENOMEM;
+}
+
+static int add_metric(struct list_head *metric_list,
+ struct pmu_event *pe,
+ bool metric_no_group,
+ struct metric **mp,
+ struct expr_id *parent,
+ struct expr_ids *ids);
+
+static int __resolve_metric(struct metric *m,
+ bool metric_no_group,
+ struct list_head *metric_list,
+ struct pmu_events_map *map,
+ struct expr_ids *ids)
+{
+ struct hashmap_entry *cur;
+ size_t bkt;
+ bool all;
+ int ret;
+
+ /*
+ * Iterate all the parsed IDs and if there's metric,
+ * add it to the context.
+ */
+ do {
+ all = true;
+ hashmap__for_each_entry((&m->pctx.ids), cur, bkt) {
+ struct expr_id *parent;
+ struct pmu_event *pe;
+
+ pe = find_metric(cur->key, map);
+ if (!pe)
+ continue;
+
+ ret = recursion_check(m, cur->key, &parent, ids);
+ if (ret)
+ return ret;
+
+ all = false;
+ /* The metric key itself needs to go out.. */
+ expr__del_id(&m->pctx, cur->key);
+
+ /* ... and it gets resolved to the parent context. */
+ ret = add_metric(metric_list, pe, metric_no_group, &m, parent, ids);
+ if (ret)
+ return ret;
+
+ /*
+ * We added new metric to hashmap, so we need
+ * to break the iteration and start over.
+ */
+ break;
+ }
+ } while (!all);
+
+ return 0;
+}
+
+static int resolve_metric(bool metric_no_group,
+ struct list_head *metric_list,
+ struct pmu_events_map *map,
+ struct expr_ids *ids)
+{
+ struct metric *m;
+ int err;
+
+ list_for_each_entry(m, metric_list, nd) {
+ err = __resolve_metric(m, metric_no_group, metric_list, map, ids);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+static int add_metric(struct list_head *metric_list,
+ struct pmu_event *pe,
+ bool metric_no_group,
+ struct metric **m,
+ struct expr_id *parent,
+ struct expr_ids *ids)
+{
+ struct metric *orig = *m;
+ int ret = 0;
+
+ pr_debug("metric expr %s for %s\n", pe->metric_expr, pe->metric_name);
+
+ if (!strstr(pe->metric_expr, "?")) {
+ ret = __add_metric(metric_list, pe, metric_no_group, 1, m, parent, ids);
+ } else {
+ int j, count;
+
+ count = arch_get_runtimeparam();
+
+ /* This loop is added to create multiple
+ * events depend on count value and add
+ * those events to metric_list.
+ */
+
+ for (j = 0; j < count && !ret; j++, *m = orig)
+ ret = __add_metric(metric_list, pe, metric_no_group, j, m, parent, ids);
+ }
+
+ return ret;
+}
+
static int metricgroup__add_metric(const char *metric, bool metric_no_group,
struct strbuf *events,
- struct list_head *group_list)
+ struct list_head *metric_list,
+ struct pmu_events_map *map)
{
- struct pmu_events_map *map = perf_pmu__find_map(NULL);
+ struct expr_ids ids = { .cnt = 0, };
struct pmu_event *pe;
- struct egroup *eg;
+ struct metric *m;
+ LIST_HEAD(list);
int i, ret;
bool has_match = false;
- if (!map)
- return 0;
+ map_for_each_metric(pe, i, map, metric) {
+ has_match = true;
+ m = NULL;
- for (i = 0; ; i++) {
- pe = &map->table[i];
+ ret = add_metric(&list, pe, metric_no_group, &m, NULL, &ids);
+ if (ret)
+ return ret;
- if (!pe->name && !pe->metric_group && !pe->metric_name) {
- /* End of pmu events. */
- if (!has_match)
- return -EINVAL;
- break;
- }
- if (!pe->metric_expr)
- continue;
- if (match_metric(pe->metric_group, metric) ||
- match_metric(pe->metric_name, metric)) {
- has_match = true;
- pr_debug("metric expr %s for %s\n", pe->metric_expr, pe->metric_name);
-
- if (!strstr(pe->metric_expr, "?")) {
- ret = __metricgroup__add_metric(group_list,
- pe,
- metric_no_group,
- 1);
- if (ret)
- return ret;
- } else {
- int j, count;
-
- count = arch_get_runtimeparam();
-
- /* This loop is added to create multiple
- * events depend on count value and add
- * those events to group_list.
- */
-
- for (j = 0; j < count; j++) {
- ret = __metricgroup__add_metric(
- group_list, pe,
- metric_no_group, j);
- if (ret)
- return ret;
- }
- }
- }
+ /*
+ * Process any possible referenced metrics
+ * included in the expression.
+ */
+ ret = resolve_metric(metric_no_group,
+ &list, map, &ids);
+ if (ret)
+ return ret;
}
- list_for_each_entry(eg, group_list, nd) {
+
+ /* End of pmu events. */
+ if (!has_match)
+ return -EINVAL;
+
+ list_for_each_entry(m, &list, nd) {
if (events->len > 0)
strbuf_addf(events, ",");
- if (eg->has_constraint) {
+ if (m->has_constraint) {
metricgroup__add_metric_non_group(events,
- &eg->pctx);
+ &m->pctx);
} else {
metricgroup__add_metric_weak_group(events,
- &eg->pctx);
+ &m->pctx);
}
}
+
+ list_splice(&list, metric_list);
+ expr_ids__exit(&ids);
return 0;
}
static int metricgroup__add_metric_list(const char *list, bool metric_no_group,
struct strbuf *events,
- struct list_head *group_list)
+ struct list_head *metric_list,
+ struct pmu_events_map *map)
{
char *llist, *nlist, *p;
int ret = -EINVAL;
@@ -683,7 +986,7 @@ static int metricgroup__add_metric_list(const char *list, bool metric_no_group,
while ((p = strsep(&llist, ",")) != NULL) {
ret = metricgroup__add_metric(p, metric_no_group, events,
- group_list);
+ metric_list, map);
if (ret == -EINVAL) {
fprintf(stderr, "Cannot find metric or group `%s'\n",
p);
@@ -698,50 +1001,88 @@ static int metricgroup__add_metric_list(const char *list, bool metric_no_group,
return ret;
}
-static void metricgroup__free_egroups(struct list_head *group_list)
+static void metric__free_refs(struct metric *metric)
{
- struct egroup *eg, *egtmp;
+ struct metric_ref_node *ref, *tmp;
- list_for_each_entry_safe (eg, egtmp, group_list, nd) {
- expr__ctx_clear(&eg->pctx);
- list_del_init(&eg->nd);
- free(eg);
+ list_for_each_entry_safe(ref, tmp, &metric->metric_refs, list) {
+ list_del(&ref->list);
+ free(ref);
}
}
-int metricgroup__parse_groups(const struct option *opt,
- const char *str,
- bool metric_no_group,
- bool metric_no_merge,
- struct rblist *metric_events)
+static void metricgroup__free_metrics(struct list_head *metric_list)
+{
+ struct metric *m, *tmp;
+
+ list_for_each_entry_safe (m, tmp, metric_list, nd) {
+ metric__free_refs(m);
+ expr__ctx_clear(&m->pctx);
+ list_del_init(&m->nd);
+ free(m);
+ }
+}
+
+static int parse_groups(struct evlist *perf_evlist, const char *str,
+ bool metric_no_group,
+ bool metric_no_merge,
+ struct perf_pmu *fake_pmu,
+ struct rblist *metric_events,
+ struct pmu_events_map *map)
{
struct parse_events_error parse_error;
- struct evlist *perf_evlist = *(struct evlist **)opt->value;
struct strbuf extra_events;
- LIST_HEAD(group_list);
+ LIST_HEAD(metric_list);
int ret;
if (metric_events->nr_entries == 0)
metricgroup__rblist_init(metric_events);
ret = metricgroup__add_metric_list(str, metric_no_group,
- &extra_events, &group_list);
+ &extra_events, &metric_list, map);
if (ret)
return ret;
pr_debug("adding %s\n", extra_events.buf);
bzero(&parse_error, sizeof(parse_error));
- ret = parse_events(perf_evlist, extra_events.buf, &parse_error);
+ ret = __parse_events(perf_evlist, extra_events.buf, &parse_error, fake_pmu);
if (ret) {
parse_events_print_error(&parse_error, extra_events.buf);
goto out;
}
strbuf_release(&extra_events);
- ret = metricgroup__setup_events(&group_list, metric_no_merge,
+ ret = metricgroup__setup_events(&metric_list, metric_no_merge,
perf_evlist, metric_events);
out:
- metricgroup__free_egroups(&group_list);
+ metricgroup__free_metrics(&metric_list);
return ret;
}
+int metricgroup__parse_groups(const struct option *opt,
+ const char *str,
+ bool metric_no_group,
+ bool metric_no_merge,
+ struct rblist *metric_events)
+{
+ struct evlist *perf_evlist = *(struct evlist **)opt->value;
+ struct pmu_events_map *map = perf_pmu__find_map(NULL);
+
+ if (!map)
+ return 0;
+
+ return parse_groups(perf_evlist, str, metric_no_group,
+ metric_no_merge, NULL, metric_events, map);
+}
+
+int metricgroup__parse_groups_test(struct evlist *evlist,
+ struct pmu_events_map *map,
+ const char *str,
+ bool metric_no_group,
+ bool metric_no_merge,
+ struct rblist *metric_events)
+{
+ return parse_groups(evlist, str, metric_no_group,
+ metric_no_merge, &perf_pmu__fake, metric_events, map);
+}
+
bool metricgroup__has_metric(const char *metric)
{
struct pmu_events_map *map = perf_pmu__find_map(NULL);
diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h
index 287850bcdeca..62623a39cbec 100644
--- a/tools/perf/util/metricgroup.h
+++ b/tools/perf/util/metricgroup.h
@@ -7,8 +7,10 @@
#include <stdbool.h>
struct evsel;
+struct evlist;
struct option;
struct rblist;
+struct pmu_events_map;
struct metric_event {
struct rb_node nd;
@@ -16,12 +18,18 @@ struct metric_event {
struct list_head head; /* list of metric_expr */
};
+struct metric_ref {
+ const char *metric_name;
+ const char *metric_expr;
+};
+
struct metric_expr {
struct list_head nd;
const char *metric_expr;
const char *metric_name;
const char *metric_unit;
struct evsel **metric_events;
+ struct metric_ref *metric_refs;
int runtime;
};
@@ -34,8 +42,16 @@ int metricgroup__parse_groups(const struct option *opt,
bool metric_no_merge,
struct rblist *metric_events);
+int metricgroup__parse_groups_test(struct evlist *evlist,
+ struct pmu_events_map *map,
+ const char *str,
+ bool metric_no_group,
+ bool metric_no_merge,
+ struct rblist *metric_events);
+
void metricgroup__print(bool metrics, bool groups, char *filter,
bool raw, bool details);
bool metricgroup__has_metric(const char *metric);
int arch_get_runtimeparam(void);
+void metricgroup__rblist_exit(struct rblist *metric_events);
#endif
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 3decbb203846..9f7260e69113 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -767,8 +767,8 @@ int parse_events_load_bpf_obj(struct parse_events_state *parse_state,
return 0;
errout:
- parse_state->error->help = strdup("(add -v to see detail)");
- parse_state->error->str = strdup(errbuf);
+ parse_events__handle_error(parse_state->error, 0,
+ strdup(errbuf), strdup("(add -v to see detail)"));
return err;
}
@@ -784,36 +784,38 @@ parse_events_config_bpf(struct parse_events_state *parse_state,
return 0;
list_for_each_entry(term, head_config, list) {
- char errbuf[BUFSIZ];
int err;
if (term->type_term != PARSE_EVENTS__TERM_TYPE_USER) {
- snprintf(errbuf, sizeof(errbuf),
- "Invalid config term for BPF object");
- errbuf[BUFSIZ - 1] = '\0';
-
- parse_state->error->idx = term->err_term;
- parse_state->error->str = strdup(errbuf);
+ parse_events__handle_error(parse_state->error, term->err_term,
+ strdup("Invalid config term for BPF object"),
+ NULL);
return -EINVAL;
}
err = bpf__config_obj(obj, term, parse_state->evlist, &error_pos);
if (err) {
+ char errbuf[BUFSIZ];
+ int idx;
+
bpf__strerror_config_obj(obj, term, parse_state->evlist,
&error_pos, err, errbuf,
sizeof(errbuf));
- parse_state->error->help = strdup(
+
+ if (err == -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE)
+ idx = term->err_val;
+ else
+ idx = term->err_term + error_pos;
+
+ parse_events__handle_error(parse_state->error, idx,
+ strdup(errbuf),
+ strdup(
"Hint:\tValid config terms:\n"
" \tmap:[<arraymap>].value<indices>=[value]\n"
" \tmap:[<eventmap>].event<indices>=[event]\n"
"\n"
" \twhere <indices> is something like [0,3...5] or [all]\n"
-" \t(add -v to see detail)");
- parse_state->error->str = strdup(errbuf);
- if (err == -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE)
- parse_state->error->idx = term->err_val;
- else
- parse_state->error->idx = term->err_term + error_pos;
+" \t(add -v to see detail)"));
return err;
}
}
@@ -877,8 +879,8 @@ int parse_events_load_bpf(struct parse_events_state *parse_state,
-err, errbuf,
sizeof(errbuf));
- parse_state->error->help = strdup("(add -v to see detail)");
- parse_state->error->str = strdup(errbuf);
+ parse_events__handle_error(parse_state->error, 0,
+ strdup(errbuf), strdup("(add -v to see detail)"));
return err;
}
@@ -1450,7 +1452,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
fprintf(stderr, "' that may result in non-fatal errors\n");
}
- pmu = perf_pmu__find(name);
+ pmu = parse_state->fake_pmu ?: perf_pmu__find(name);
if (!pmu) {
char *err_str;
@@ -1483,7 +1485,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
}
}
- if (perf_pmu__check_alias(pmu, head_config, &info))
+ if (!parse_state->fake_pmu && perf_pmu__check_alias(pmu, head_config, &info))
return -EINVAL;
if (verbose > 1) {
@@ -1516,7 +1518,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
if (pmu->default_config && get_config_chgs(pmu, head_config, &config_terms))
return -ENOMEM;
- if (perf_pmu__config(pmu, &attr, head_config, parse_state->error)) {
+ if (!parse_state->fake_pmu && perf_pmu__config(pmu, &attr, head_config, parse_state->error)) {
struct evsel_config_term *pos, *tmp;
list_for_each_entry_safe(pos, tmp, &config_terms, list) {
@@ -2017,6 +2019,32 @@ err:
perf_pmu__parse_cleanup();
}
+/*
+ * This function injects special term in
+ * perf_pmu_events_list so the test code
+ * can check on this functionality.
+ */
+int perf_pmu__test_parse_init(void)
+{
+ struct perf_pmu_event_symbol *list;
+
+ list = malloc(sizeof(*list) * 1);
+ if (!list)
+ return -ENOMEM;
+
+ list->type = PMU_EVENT_SYMBOL;
+ list->symbol = strdup("read");
+
+ if (!list->symbol) {
+ free(list);
+ return -ENOMEM;
+ }
+
+ perf_pmu_events_list = list;
+ perf_pmu_events_list_num = 1;
+ return 0;
+}
+
enum perf_pmu_event_symbol_type
perf_pmu__parse_check(const char *name)
{
@@ -2078,6 +2106,8 @@ int parse_events_terms(struct list_head *terms, const char *str)
int ret;
ret = parse_events__scanner(str, &parse_state);
+ perf_pmu__parse_cleanup();
+
if (!ret) {
list_splice(parse_state.terms, terms);
zfree(&parse_state.terms);
@@ -2088,15 +2118,16 @@ int parse_events_terms(struct list_head *terms, const char *str)
return ret;
}
-int parse_events(struct evlist *evlist, const char *str,
- struct parse_events_error *err)
+int __parse_events(struct evlist *evlist, const char *str,
+ struct parse_events_error *err, struct perf_pmu *fake_pmu)
{
struct parse_events_state parse_state = {
- .list = LIST_HEAD_INIT(parse_state.list),
- .idx = evlist->core.nr_entries,
- .error = err,
- .evlist = evlist,
- .stoken = PE_START_EVENTS,
+ .list = LIST_HEAD_INIT(parse_state.list),
+ .idx = evlist->core.nr_entries,
+ .error = err,
+ .evlist = evlist,
+ .stoken = PE_START_EVENTS,
+ .fake_pmu = fake_pmu,
};
int ret;
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 1fe23a2f9b36..00cde7d2e30c 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -33,8 +33,15 @@ const char *event_type(int type);
int parse_events_option(const struct option *opt, const char *str, int unset);
int parse_events_option_new_evlist(const struct option *opt, const char *str, int unset);
-int parse_events(struct evlist *evlist, const char *str,
- struct parse_events_error *error);
+int __parse_events(struct evlist *evlist, const char *str, struct parse_events_error *error,
+ struct perf_pmu *fake_pmu);
+
+static inline int parse_events(struct evlist *evlist, const char *str,
+ struct parse_events_error *err)
+{
+ return __parse_events(evlist, str, err, NULL);
+}
+
int parse_events_terms(struct list_head *terms, const char *str);
int parse_filter(const struct option *opt, const char *str, int unset);
int exclude_perf(const struct option *opt, const char *arg, int unset);
@@ -127,9 +134,10 @@ struct parse_events_state {
int idx;
int nr_groups;
struct parse_events_error *error;
- struct evlist *evlist;
+ struct evlist *evlist;
struct list_head *terms;
int stoken;
+ struct perf_pmu *fake_pmu;
};
void parse_events__handle_error(struct parse_events_error *err, int idx,
@@ -253,4 +261,6 @@ static inline bool is_sdt_event(char *str __maybe_unused)
}
#endif /* HAVE_LIBELF_SUPPORT */
+int perf_pmu__test_parse_init(void);
+
#endif /* __PERF_PARSE_EVENTS_H */
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 002802e17059..3ca5fd2829ca 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -41,14 +41,6 @@ static int value(yyscan_t scanner, int base)
return __value(yylval, text, base, PE_VALUE);
}
-static int raw(yyscan_t scanner)
-{
- YYSTYPE *yylval = parse_events_get_lval(scanner);
- char *text = parse_events_get_text(scanner);
-
- return __value(yylval, text + 1, 16, PE_RAW);
-}
-
static int str(yyscan_t scanner, int token)
{
YYSTYPE *yylval = parse_events_get_lval(scanner);
@@ -72,6 +64,17 @@ static int str(yyscan_t scanner, int token)
return token;
}
+static int raw(yyscan_t scanner)
+{
+ YYSTYPE *yylval = parse_events_get_lval(scanner);
+ char *text = parse_events_get_text(scanner);
+
+ if (perf_pmu__parse_check(text) == PMU_EVENT_SYMBOL)
+ return str(scanner, PE_NAME);
+
+ return __value(yylval, text + 1, 16, PE_RAW);
+}
+
static bool isbpf_suffix(char *text)
{
int len = strlen(text);
@@ -129,12 +132,16 @@ do { \
yyless(0); \
} while (0)
-static int pmu_str_check(yyscan_t scanner)
+static int pmu_str_check(yyscan_t scanner, struct parse_events_state *parse_state)
{
YYSTYPE *yylval = parse_events_get_lval(scanner);
char *text = parse_events_get_text(scanner);
yylval->str = strdup(text);
+
+ if (parse_state->fake_pmu)
+ return PE_PMU_EVENT_FAKE;
+
switch (perf_pmu__parse_check(text)) {
case PMU_EVENT_SYMBOL_PREFIX:
return PE_PMU_EVENT_PRE;
@@ -289,6 +296,7 @@ percore { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_PERCORE); }
aux-output { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT); }
aux-sample-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE); }
r{num_raw_hex} { return raw(yyscanner); }
+r0x{num_raw_hex} { return raw(yyscanner); }
, { return ','; }
"/" { BEGIN(INITIAL); return '/'; }
{name_minus} { return str(yyscanner, PE_NAME); }
@@ -376,7 +384,7 @@ r{num_raw_hex} { return raw(yyscanner); }
{modifier_event} { return str(yyscanner, PE_MODIFIER_EVENT); }
{bpf_object} { if (!isbpf(yyscanner)) { USER_REJECT }; return str(yyscanner, PE_BPF_OBJECT); }
{bpf_source} { if (!isbpf(yyscanner)) { USER_REJECT }; return str(yyscanner, PE_BPF_SOURCE); }
-{name} { return pmu_str_check(yyscanner); }
+{name} { return pmu_str_check(yyscanner, _parse_state); }
{name_tag} { return str(yyscanner, PE_NAME); }
"/" { BEGIN(config); return '/'; }
- { return '-'; }
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index acef87d9af58..b9fb91fdc5de 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -69,7 +69,7 @@ static void inc_group_count(struct list_head *list,
%token PE_NAME_CACHE_TYPE PE_NAME_CACHE_OP_RESULT
%token PE_PREFIX_MEM PE_PREFIX_RAW PE_PREFIX_GROUP
%token PE_ERROR
-%token PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT
+%token PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE
%token PE_ARRAY_ALL PE_ARRAY_RANGE
%token PE_DRV_CFG_TERM
%type <num> PE_VALUE
@@ -87,7 +87,7 @@ static void inc_group_count(struct list_head *list,
%type <str> PE_MODIFIER_EVENT
%type <str> PE_MODIFIER_BP
%type <str> PE_EVENT_NAME
-%type <str> PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT
+%type <str> PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE
%type <str> PE_DRV_CFG_TERM
%destructor { free ($$); } <str>
%type <term> event_term
@@ -356,6 +356,43 @@ PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF sep_dc
YYABORT;
$$ = list;
}
+|
+PE_PMU_EVENT_FAKE sep_dc
+{
+ struct list_head *list;
+ int err;
+
+ list = alloc_list();
+ if (!list)
+ YYABORT;
+
+ err = parse_events_add_pmu(_parse_state, list, $1, NULL, false, false);
+ free($1);
+ if (err < 0) {
+ free(list);
+ YYABORT;
+ }
+ $$ = list;
+}
+|
+PE_PMU_EVENT_FAKE opt_pmu_config
+{
+ struct list_head *list;
+ int err;
+
+ list = alloc_list();
+ if (!list)
+ YYABORT;
+
+ err = parse_events_add_pmu(_parse_state, list, $1, $2, false, false);
+ free($1);
+ parse_events_terms__delete($2);
+ if (err < 0) {
+ free(list);
+ YYABORT;
+ }
+ $$ = list;
+}
value_sym:
PE_VALUE_SYM_HW
diff --git a/tools/perf/util/perf_api_probe.c b/tools/perf/util/perf_api_probe.c
index 1337965673d7..3840d02f0f7b 100644
--- a/tools/perf/util/perf_api_probe.c
+++ b/tools/perf/util/perf_api_probe.c
@@ -93,6 +93,11 @@ static void perf_probe_context_switch(struct evsel *evsel)
evsel->core.attr.context_switch = 1;
}
+static void perf_probe_text_poke(struct evsel *evsel)
+{
+ evsel->core.attr.text_poke = 1;
+}
+
bool perf_can_sample_identifier(void)
{
return perf_probe_api(perf_probe_sample_identifier);
@@ -108,6 +113,11 @@ bool perf_can_record_switch_events(void)
return perf_probe_api(perf_probe_context_switch);
}
+bool perf_can_record_text_poke_events(void)
+{
+ return perf_probe_api(perf_probe_text_poke);
+}
+
bool perf_can_record_cpu_wide(void)
{
struct perf_event_attr attr = {
diff --git a/tools/perf/util/perf_api_probe.h b/tools/perf/util/perf_api_probe.h
index 706c3c6426e2..d5506a983a94 100644
--- a/tools/perf/util/perf_api_probe.h
+++ b/tools/perf/util/perf_api_probe.h
@@ -9,6 +9,7 @@ bool perf_can_aux_sample(void);
bool perf_can_comm_exec(void);
bool perf_can_record_cpu_wide(void);
bool perf_can_record_switch_events(void);
+bool perf_can_record_text_poke_events(void);
bool perf_can_sample_identifier(void);
#endif // __PERF_API_PROBE_H
diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c
index b94fa07f5d32..e67a227c0ce7 100644
--- a/tools/perf/util/perf_event_attr_fprintf.c
+++ b/tools/perf/util/perf_event_attr_fprintf.c
@@ -147,6 +147,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
PRINT_ATTRf(aux_watermark, p_unsigned);
PRINT_ATTRf(sample_max_stack, p_unsigned);
PRINT_ATTRf(aux_sample_size, p_unsigned);
+ PRINT_ATTRf(text_poke, p_unsigned);
return ret;
}
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 93fe72a9dc0b..f1688e1f6ed7 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -26,6 +26,8 @@
#include "strbuf.h"
#include "fncache.h"
+struct perf_pmu perf_pmu__fake;
+
struct perf_pmu_format {
char *name;
int value;
@@ -1400,6 +1402,7 @@ struct sevent {
char *pmu;
char *metric_expr;
char *metric_name;
+ int is_cpu;
};
static int cmp_sevent(const void *a, const void *b)
@@ -1416,6 +1419,11 @@ static int cmp_sevent(const void *a, const void *b)
if (n)
return n;
}
+
+ /* Order CPU core events to be first */
+ if (as->is_cpu != bs->is_cpu)
+ return bs->is_cpu - as->is_cpu;
+
return strcmp(as->name, bs->name);
}
@@ -1475,7 +1483,7 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag,
list_for_each_entry(alias, &pmu->aliases, list) {
char *name = alias->desc ? alias->name :
format_alias(buf, sizeof(buf), pmu, alias);
- bool is_cpu = !strcmp(pmu->name, "cpu");
+ bool is_cpu = is_pmu_core(pmu->name);
if (alias->deprecated && !deprecated)
continue;
@@ -1507,6 +1515,7 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag,
aliases[j].pmu = pmu->name;
aliases[j].metric_expr = alias->metric_expr;
aliases[j].metric_name = alias->metric_name;
+ aliases[j].is_cpu = is_cpu;
j++;
}
if (pmu->selectable &&
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index f971d9aa4570..44ccbdbb1c37 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -43,6 +43,8 @@ struct perf_pmu {
struct list_head list; /* ELEM */
};
+extern struct perf_pmu perf_pmu__fake;
+
struct perf_pmu_info {
const char *unit;
const char *metric_expr;
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index df713a5d1e26..99d36ac77c08 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -375,9 +375,13 @@ static int find_alternative_probe_point(struct debuginfo *dinfo,
/* Find the address of given function */
map__for_each_symbol_by_name(map, pp->function, sym) {
- if (uprobes)
+ if (uprobes) {
address = sym->start;
- else
+ if (sym->type == STT_GNU_IFUNC)
+ pr_warning("Warning: The probe function (%s) is a GNU indirect function.\n"
+ "Consider identifying the final function used at run time and set the probe directly on that.\n",
+ pp->function);
+ } else
address = map->unmap_ip(map, sym->start) - map->reloc;
break;
}
@@ -2968,6 +2972,16 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
for (j = 0; j < num_matched_functions; j++) {
sym = syms[j];
+ /* There can be duplicated symbols in the map */
+ for (i = 0; i < j; i++)
+ if (sym->start == syms[i]->start) {
+ pr_debug("Found duplicated symbol %s @ %" PRIx64 "\n",
+ sym->name, sym->start);
+ break;
+ }
+ if (i != j)
+ continue;
+
tev = (*tevs) + ret;
tp = &tev->point;
if (ret == num_matched_functions) {
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 55924255c535..659024342e9a 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -1408,6 +1408,9 @@ static int fill_empty_trace_arg(struct perf_probe_event *pev,
char *type;
int i, j, ret;
+ if (!ntevs)
+ return -ENOENT;
+
for (i = 0; i < pev->nargs; i++) {
type = NULL;
for (j = 0; j < ntevs; j++) {
@@ -1464,7 +1467,7 @@ int debuginfo__find_trace_events(struct debuginfo *dbg,
if (ret >= 0 && tf.pf.skip_empty_arg)
ret = fill_empty_trace_arg(pev, tf.tevs, tf.ntevs);
- if (ret < 0) {
+ if (ret < 0 || tf.ntevs == 0) {
for (i = 0; i < tf.ntevs; i++)
clear_probe_trace_event(&tf.tevs[i]);
zfree(tevs);
diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h
index 39d1de4b2a36..03678ff25539 100644
--- a/tools/perf/util/record.h
+++ b/tools/perf/util/record.h
@@ -48,6 +48,7 @@ struct record_opts {
bool sample_id;
bool no_bpf_event;
bool kcore;
+ bool text_poke;
unsigned int freq;
unsigned int mmap_pages;
unsigned int auxtrace_mmap_pages;
@@ -61,7 +62,7 @@ struct record_opts {
const char *auxtrace_snapshot_opts;
const char *auxtrace_sample_opts;
bool sample_transaction;
- unsigned initial_delay;
+ int initial_delay;
bool use_clockid;
clockid_t clockid;
u64 clockid_res_ns;
@@ -70,6 +71,8 @@ struct record_opts {
int mmap_flush;
unsigned int comp_level;
unsigned int nr_threads_synthesize;
+ int ctl_fd;
+ int ctl_fd_ack;
};
extern const char * const *record_usage;
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 1a157e84a04a..ffbc9d35a383 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -115,12 +115,12 @@ static int perf_session__open(struct perf_session *session)
if (perf_header__has_feat(&session->header, HEADER_STAT))
return 0;
- if (!perf_evlist__valid_sample_type(session->evlist)) {
+ if (!evlist__valid_sample_type(session->evlist)) {
pr_err("non matching sample_type\n");
return -1;
}
- if (!perf_evlist__valid_sample_id_all(session->evlist)) {
+ if (!evlist__valid_sample_id_all(session->evlist)) {
pr_err("non matching sample_id_all\n");
return -1;
}
@@ -252,10 +252,10 @@ struct perf_session *perf_session__new(struct perf_data *data,
/*
* In pipe-mode, evlist is empty until PERF_RECORD_HEADER_ATTR is
- * processed, so perf_evlist__sample_id_all is not meaningful here.
+ * processed, so evlist__sample_id_all is not meaningful here.
*/
if ((!data || !data->is_pipe) && tool && tool->ordering_requires_timestamps &&
- tool->ordered_events && !perf_evlist__sample_id_all(session->evlist)) {
+ tool->ordered_events && !evlist__sample_id_all(session->evlist)) {
dump_printf("WARNING: No sample_id_all support, falling back to unordered processing\n");
tool->ordered_events = false;
}
@@ -490,6 +490,8 @@ void perf_tool__fill_defaults(struct perf_tool *tool)
tool->ksymbol = perf_event__process_ksymbol;
if (tool->bpf == NULL)
tool->bpf = perf_event__process_bpf;
+ if (tool->text_poke == NULL)
+ tool->text_poke = perf_event__process_text_poke;
if (tool->read == NULL)
tool->read = process_event_sample_stub;
if (tool->throttle == NULL)
@@ -659,6 +661,24 @@ static void perf_event__switch_swap(union perf_event *event, bool sample_id_all)
swap_sample_id_all(event, &event->context_switch + 1);
}
+static void perf_event__text_poke_swap(union perf_event *event, bool sample_id_all)
+{
+ event->text_poke.addr = bswap_64(event->text_poke.addr);
+ event->text_poke.old_len = bswap_16(event->text_poke.old_len);
+ event->text_poke.new_len = bswap_16(event->text_poke.new_len);
+
+ if (sample_id_all) {
+ size_t len = sizeof(event->text_poke.old_len) +
+ sizeof(event->text_poke.new_len) +
+ event->text_poke.old_len +
+ event->text_poke.new_len;
+ void *data = &event->text_poke.old_len;
+
+ data += PERF_ALIGN(len, sizeof(u64));
+ swap_sample_id_all(event, data);
+ }
+}
+
static void perf_event__throttle_swap(union perf_event *event,
bool sample_id_all)
{
@@ -932,6 +952,7 @@ static perf_event__swap_op perf_event__swap_ops[] = {
[PERF_RECORD_SWITCH] = perf_event__switch_swap,
[PERF_RECORD_SWITCH_CPU_WIDE] = perf_event__switch_swap,
[PERF_RECORD_NAMESPACES] = perf_event__namespaces_swap,
+ [PERF_RECORD_TEXT_POKE] = perf_event__text_poke_swap,
[PERF_RECORD_HEADER_ATTR] = perf_event__hdr_attr_swap,
[PERF_RECORD_HEADER_EVENT_TYPE] = perf_event__event_type_swap,
[PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap,
@@ -1160,10 +1181,10 @@ static void perf_evlist__print_tstamp(struct evlist *evlist,
union perf_event *event,
struct perf_sample *sample)
{
- u64 sample_type = __perf_evlist__combined_sample_type(evlist);
+ u64 sample_type = __evlist__combined_sample_type(evlist);
if (event->header.type != PERF_RECORD_SAMPLE &&
- !perf_evlist__sample_id_all(evlist)) {
+ !evlist__sample_id_all(evlist)) {
fputs("-1 -1 ", stdout);
return;
}
@@ -1474,6 +1495,8 @@ static int machines__deliver_event(struct machines *machines,
return tool->ksymbol(tool, event, sample, machine);
case PERF_RECORD_BPF_EVENT:
return tool->bpf(tool, event, sample, machine);
+ case PERF_RECORD_TEXT_POKE:
+ return tool->text_poke(tool, event, sample, machine);
default:
++evlist->stats.nr_unknown_events;
return -1;
@@ -1655,7 +1678,7 @@ int perf_session__peek_event(struct perf_session *session, off_t file_offset,
return -1;
if (session->header.needs_swap)
- event_swap(event, perf_evlist__sample_id_all(session->evlist));
+ event_swap(event, evlist__sample_id_all(session->evlist));
out_parse_sample:
@@ -1704,7 +1727,7 @@ static s64 perf_session__process_event(struct perf_session *session,
int ret;
if (session->header.needs_swap)
- event_swap(event, perf_evlist__sample_id_all(evlist));
+ event_swap(event, evlist__sample_id_all(evlist));
if (event->header.type >= PERF_RECORD_HEADER_MAX)
return -EINVAL;
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index a7c13a88ecb9..e1ba6c1b916a 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -730,25 +730,17 @@ static void print_smi_cost(struct perf_stat_config *config,
out->print_metric(config, out->ctx, NULL, "%4.0f", "SMI#", smi_num);
}
-static void generic_metric(struct perf_stat_config *config,
- const char *metric_expr,
- struct evsel **metric_events,
- char *name,
- const char *metric_name,
- const char *metric_unit,
- int runtime,
- int cpu,
- struct perf_stat_output_ctx *out,
- struct runtime_stat *st)
+static int prepare_metric(struct evsel **metric_events,
+ struct metric_ref *metric_refs,
+ struct expr_parse_ctx *pctx,
+ int cpu,
+ struct runtime_stat *st)
{
- print_metric_t print_metric = out->print_metric;
- struct expr_parse_ctx pctx;
- double ratio, scale;
- int i;
- void *ctxp = out->ctx;
+ double scale;
char *n, *pn;
+ int i, j, ret;
- expr__ctx_init(&pctx);
+ expr__ctx_init(pctx);
for (i = 0; metric_events[i]; i++) {
struct saved_value *v;
struct stats *stats;
@@ -771,7 +763,7 @@ static void generic_metric(struct perf_stat_config *config,
n = strdup(metric_events[i]->name);
if (!n)
- return;
+ return -ENOMEM;
/*
* This display code with --no-merge adds [cpu] postfixes.
* These are not supported by the parser. Remove everything
@@ -782,11 +774,42 @@ static void generic_metric(struct perf_stat_config *config,
*pn = 0;
if (metric_total)
- expr__add_id(&pctx, n, metric_total);
+ expr__add_id_val(pctx, n, metric_total);
else
- expr__add_id(&pctx, n, avg_stats(stats)*scale);
+ expr__add_id_val(pctx, n, avg_stats(stats)*scale);
}
+ for (j = 0; metric_refs && metric_refs[j].metric_name; j++) {
+ ret = expr__add_ref(pctx, &metric_refs[j]);
+ if (ret)
+ return ret;
+ }
+
+ return i;
+}
+
+static void generic_metric(struct perf_stat_config *config,
+ const char *metric_expr,
+ struct evsel **metric_events,
+ struct metric_ref *metric_refs,
+ char *name,
+ const char *metric_name,
+ const char *metric_unit,
+ int runtime,
+ int cpu,
+ struct perf_stat_output_ctx *out,
+ struct runtime_stat *st)
+{
+ print_metric_t print_metric = out->print_metric;
+ struct expr_parse_ctx pctx;
+ double ratio, scale;
+ int i;
+ void *ctxp = out->ctx;
+
+ i = prepare_metric(metric_events, metric_refs, &pctx, cpu, st);
+ if (i < 0)
+ return;
+
if (!metric_events[i]) {
if (expr__parse(&ratio, &pctx, metric_expr, runtime) == 0) {
char *unit;
@@ -827,6 +850,20 @@ static void generic_metric(struct perf_stat_config *config,
expr__ctx_clear(&pctx);
}
+double test_generic_metric(struct metric_expr *mexp, int cpu, struct runtime_stat *st)
+{
+ struct expr_parse_ctx pctx;
+ double ratio;
+
+ if (prepare_metric(mexp->metric_events, mexp->metric_refs, &pctx, cpu, st) < 0)
+ return 0.;
+
+ if (expr__parse(&ratio, &pctx, mexp->metric_expr, 1))
+ return 0.;
+
+ return ratio;
+}
+
void perf_stat__print_shadow_stats(struct perf_stat_config *config,
struct evsel *evsel,
double avg, int cpu,
@@ -1035,8 +1072,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
else
print_metric(config, ctxp, NULL, NULL, name, 0);
} else if (evsel->metric_expr) {
- generic_metric(config, evsel->metric_expr, evsel->metric_events, evsel->name,
- evsel->metric_name, NULL, 1, cpu, out, st);
+ generic_metric(config, evsel->metric_expr, evsel->metric_events, NULL,
+ evsel->name, evsel->metric_name, NULL, 1, cpu, out, st);
} else if (runtime_stat_n(st, STAT_NSECS, 0, cpu) != 0) {
char unit = 'M';
char unit_buf[10];
@@ -1064,7 +1101,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
if (num++ > 0)
out->new_line(config, ctxp);
generic_metric(config, mexp->metric_expr, mexp->metric_events,
- evsel->name, mexp->metric_name,
+ mexp->metric_refs, evsel->name, mexp->metric_name,
mexp->metric_unit, mexp->runtime, cpu, out, st);
}
}
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index f75ae679eb28..f8778cffd941 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -116,7 +116,7 @@ struct perf_stat_config {
FILE *output;
unsigned int interval;
unsigned int timeout;
- unsigned int initial_delay;
+ int initial_delay;
unsigned int unit_width;
unsigned int metric_only_len;
int times;
@@ -133,6 +133,8 @@ struct perf_stat_config {
struct perf_cpu_map *cpus_aggr_map;
u64 *walltime_run;
struct rblist metric_events;
+ int ctl_fd;
+ int ctl_fd_ack;
};
void perf_stat__set_big_num(int set);
@@ -230,4 +232,7 @@ perf_evlist__print_counters(struct evlist *evlist,
struct target *_target,
struct timespec *ts,
int argc, const char **argv);
+
+struct metric_expr;
+double test_generic_metric(struct metric_expr *mexp, int cpu, struct runtime_stat *st);
#endif
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 5ddf84dcbae7..053468ffcb8a 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -666,6 +666,8 @@ static bool symbol__is_idle(const char *name)
"poll_idle",
"ppc64_runlatch_off",
"pseries_dedicated_idle_sleep",
+ "psw_idle",
+ "psw_idle_exit",
NULL
};
int i;
@@ -1563,6 +1565,7 @@ static bool dso__is_compatible_symtab_type(struct dso *dso, bool kmod,
case DSO_BINARY_TYPE__BPF_PROG_INFO:
case DSO_BINARY_TYPE__BPF_IMAGE:
+ case DSO_BINARY_TYPE__OOL:
case DSO_BINARY_TYPE__NOT_FOUND:
default:
return false;
diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
index 3fb67bd31e4a..bbbc0dcd461f 100644
--- a/tools/perf/util/tool.h
+++ b/tools/perf/util/tool.h
@@ -57,7 +57,8 @@ struct perf_tool {
throttle,
unthrottle,
ksymbol,
- bpf;
+ bpf,
+ text_poke;
event_attr_op attr;
event_attr_op event_update;
diff --git a/tools/testing/ktest/examples/README b/tools/testing/ktest/examples/README
index a12d295a09d8..4f048789b260 100644
--- a/tools/testing/ktest/examples/README
+++ b/tools/testing/ktest/examples/README
@@ -11,7 +11,7 @@ crosstests.conf - this config shows an example of testing a git repo against
lots of different architectures. It only does build tests, but makes
it easy to compile test different archs. You can download the arch
cross compilers from:
- http://kernel.org/pub/tools/crosstool/files/bin/x86_64/
+ https://kernel.org/pub/tools/crosstool/files/bin/x86_64/
test.conf - A generic example of a config. This is based on an actual config
used to perform real testing.
diff --git a/tools/testing/ktest/examples/crosstests.conf b/tools/testing/ktest/examples/crosstests.conf
index 6907f32590b2..3b15e85f26bd 100644
--- a/tools/testing/ktest/examples/crosstests.conf
+++ b/tools/testing/ktest/examples/crosstests.conf
@@ -3,7 +3,7 @@
#
# In this config, it is expected that the tool chains from:
#
-# http://kernel.org/pub/tools/crosstool/files/bin/x86_64/
+# https://kernel.org/pub/tools/crosstool/files/bin/x86_64/
#
# running on a x86_64 system have been downloaded and installed into:
#
diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl
index 7570e36d636d..cb16d2aac51c 100755
--- a/tools/testing/ktest/ktest.pl
+++ b/tools/testing/ktest/ktest.pl
@@ -11,6 +11,7 @@ use File::Path qw(mkpath);
use File::Copy qw(cp);
use FileHandle;
use FindBin;
+use IO::Handle;
my $VERSION = "0.2";
@@ -81,6 +82,8 @@ my %default = (
"IGNORE_UNUSED" => 0,
);
+my $test_log_start = 0;
+
my $ktest_config = "ktest.conf";
my $version;
my $have_version = 0;
@@ -98,6 +101,7 @@ my $final_post_ktest;
my $pre_ktest;
my $post_ktest;
my $pre_test;
+my $pre_test_die;
my $post_test;
my $pre_build;
my $post_build;
@@ -223,6 +227,7 @@ my $dirname = $FindBin::Bin;
my $mailto;
my $mailer;
my $mail_path;
+my $mail_max_size;
my $mail_command;
my $email_on_error;
my $email_when_finished;
@@ -259,6 +264,7 @@ my %option_map = (
"MAILTO" => \$mailto,
"MAILER" => \$mailer,
"MAIL_PATH" => \$mail_path,
+ "MAIL_MAX_SIZE" => \$mail_max_size,
"MAIL_COMMAND" => \$mail_command,
"EMAIL_ON_ERROR" => \$email_on_error,
"EMAIL_WHEN_FINISHED" => \$email_when_finished,
@@ -273,6 +279,7 @@ my %option_map = (
"PRE_KTEST" => \$pre_ktest,
"POST_KTEST" => \$post_ktest,
"PRE_TEST" => \$pre_test,
+ "PRE_TEST_DIE" => \$pre_test_die,
"POST_TEST" => \$post_test,
"BUILD_TYPE" => \$build_type,
"BUILD_OPTIONS" => \$build_options,
@@ -507,9 +514,7 @@ EOF
sub _logit {
if (defined($opt{"LOG_FILE"})) {
- open(OUT, ">> $opt{LOG_FILE}") or die "Can't write to $opt{LOG_FILE}";
- print OUT @_;
- close(OUT);
+ print LOG @_;
}
}
@@ -909,6 +914,12 @@ sub process_expression {
}
}
+ if ($val =~ s/^\s*NOT\s+(.*)//) {
+ my $express = $1;
+ my $ret = process_expression($name, $express);
+ return !$ret;
+ }
+
if ($val =~ /^\s*0\s*$/) {
return 0;
} elsif ($val =~ /^\s*\d+\s*$/) {
@@ -1485,8 +1496,32 @@ sub dodie {
if ($email_on_error) {
my $name = get_test_name;
+ my $log_file;
+
+ if (defined($opt{"LOG_FILE"})) {
+ my $whence = 0; # beginning of file
+ my $pos = $test_log_start;
+
+ if (defined($mail_max_size)) {
+ my $log_size = tell LOG;
+ $log_size -= $test_log_start;
+ if ($log_size > $mail_max_size) {
+ $whence = 2; # end of file
+ $pos = - $mail_max_size;
+ }
+ }
+ $log_file = "$tmpdir/log";
+ open (L, "$opt{LOG_FILE}") or die "Can't open $opt{LOG_FILE} to read)";
+ open (O, "> $tmpdir/log") or die "Can't open $tmpdir/log\n";
+ seek(L, $pos, $whence);
+ while (<L>) {
+ print O;
+ }
+ close O;
+ close L;
+ }
send_email("KTEST: critical failure for test $i [$name]",
- "Your test started at $script_start_time has failed with:\n@_\n");
+ "Your test started at $script_start_time has failed with:\n@_\n", $log_file);
}
if ($monitor_cnt) {
@@ -1508,7 +1543,7 @@ sub create_pty {
my $TIOCGPTN = 0x80045430;
sysopen($ptm, "/dev/ptmx", O_RDWR | O_NONBLOCK) or
- dodie "Cant open /dev/ptmx";
+ dodie "Can't open /dev/ptmx";
# unlockpt()
$tmp = pack("i", 0);
@@ -1772,8 +1807,6 @@ sub run_command {
(fail "unable to exec $command" and return 0);
if (defined($opt{"LOG_FILE"})) {
- open(LOG, ">>$opt{LOG_FILE}") or
- dodie "failed to write to log";
$dolog = 1;
}
@@ -1821,7 +1854,6 @@ sub run_command {
}
close(CMD);
- close(LOG) if ($dolog);
close(RD) if ($dord);
$end_time = time;
@@ -3188,6 +3220,8 @@ sub config_bisect_end {
doprint "***************************************\n\n";
}
+my $pass = 1;
+
sub run_config_bisect {
my ($good, $bad, $last_result) = @_;
my $reset = "";
@@ -3210,11 +3244,15 @@ sub run_config_bisect {
$ret = run_config_bisect_test $config_bisect_type;
if ($ret) {
- doprint "NEW GOOD CONFIG\n";
+ doprint "NEW GOOD CONFIG ($pass)\n";
+ system("cp $output_config $tmpdir/good_config.tmp.$pass");
+ $pass++;
# Return 3 for good config
return 3;
} else {
- doprint "NEW BAD CONFIG\n";
+ doprint "NEW BAD CONFIG ($pass)\n";
+ system("cp $output_config $tmpdir/bad_config.tmp.$pass");
+ $pass++;
# Return 4 for bad config
return 4;
}
@@ -4077,8 +4115,12 @@ if ($#new_configs >= 0) {
}
}
-if ($opt{"CLEAR_LOG"} && defined($opt{"LOG_FILE"})) {
- unlink $opt{"LOG_FILE"};
+if (defined($opt{"LOG_FILE"})) {
+ if ($opt{"CLEAR_LOG"}) {
+ unlink $opt{"LOG_FILE"};
+ }
+ open(LOG, ">> $opt{LOG_FILE}") or die "Can't write to $opt{LOG_FILE}";
+ LOG->autoflush(1);
}
doprint "\n\nSTARTING AUTOMATED TESTS\n\n";
@@ -4171,7 +4213,7 @@ sub find_mailer {
}
sub do_send_mail {
- my ($subject, $message) = @_;
+ my ($subject, $message, $file) = @_;
if (!defined($mail_path)) {
# find the mailer
@@ -4181,16 +4223,30 @@ sub do_send_mail {
}
}
+ my $header_file = "$tmpdir/header";
+ open (HEAD, ">$header_file") or die "Can not create $header_file\n";
+ print HEAD "To: $mailto\n";
+ print HEAD "Subject: $subject\n\n";
+ print HEAD "$message\n";
+ close HEAD;
+
if (!defined($mail_command)) {
if ($mailer eq "mail" || $mailer eq "mailx") {
- $mail_command = "\$MAIL_PATH/\$MAILER -s \'\$SUBJECT\' \$MAILTO <<< \'\$MESSAGE\'";
+ $mail_command = "cat \$HEADER_FILE \$BODY_FILE | \$MAIL_PATH/\$MAILER -s \'\$SUBJECT\' \$MAILTO";
} elsif ($mailer eq "sendmail" ) {
- $mail_command = "echo \'Subject: \$SUBJECT\n\n\$MESSAGE\' | \$MAIL_PATH/\$MAILER -t \$MAILTO";
+ $mail_command = "cat \$HEADER_FILE \$BODY_FILE | \$MAIL_PATH/\$MAILER -t \$MAILTO";
} else {
die "\nYour mailer: $mailer is not supported.\n";
}
}
+ if (defined($file)) {
+ $mail_command =~ s/\$BODY_FILE/$file/g;
+ } else {
+ $mail_command =~ s/\$BODY_FILE//g;
+ }
+
+ $mail_command =~ s/\$HEADER_FILE/$header_file/g;
$mail_command =~ s/\$MAILER/$mailer/g;
$mail_command =~ s/\$MAIL_PATH/$mail_path/g;
$mail_command =~ s/\$MAILTO/$mailto/g;
@@ -4338,10 +4394,19 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) {
}
doprint "\n\n";
+
+ if (defined($opt{"LOG_FILE"})) {
+ $test_log_start = tell(LOG);
+ }
+
doprint "RUNNING TEST $i of $opt{NUM_TESTS}$name with option $test_type $run_type$installme\n\n";
if (defined($pre_test)) {
- run_command $pre_test;
+ my $ret = run_command $pre_test;
+ if (!$ret && defined($pre_test_die) &&
+ $pre_test_die) {
+ dodie "failed to pre_test\n";
+ }
}
unlink $dmesg;
@@ -4441,4 +4506,10 @@ if ($email_when_finished) {
send_email("KTEST: Your test has finished!",
"$successes of $opt{NUM_TESTS} tests started at $script_start_time were successful!");
}
+
+if (defined($opt{"LOG_FILE"})) {
+ print "\n See $opt{LOG_FILE} for the record of results.\n\n";
+ close LOG;
+}
+
exit 0;
diff --git a/tools/testing/ktest/sample.conf b/tools/testing/ktest/sample.conf
index 27666b8007ed..5e7d1d729752 100644
--- a/tools/testing/ktest/sample.conf
+++ b/tools/testing/ktest/sample.conf
@@ -442,6 +442,19 @@
# Users can cancel the test by Ctrl^C
# (default 0)
#EMAIL_WHEN_CANCELED = 1
+#
+# If a test ends with an error and EMAIL_ON_ERROR is set as well
+# as a LOG_FILE is defined, then the log of the failing test will
+# be included in the email that is sent.
+# It is possible that the log may be very large, in which case,
+# only the last amount of the log should be sent. To limit how
+# much of the log is sent, set MAIL_MAX_SIZE. This will be the
+# size in bytes of the last portion of the log of the failed
+# test file. That is, if this is set to 100000, then only the
+# last 100 thousand bytes of the log file will be included in
+# the email.
+# (default undef)
+#MAIL_MAX_SIZE = 1000000
# Start a test setup. If you leave this off, all options
# will be default and the test will run once.
@@ -557,6 +570,11 @@
# default (undefined)
#PRE_TEST = ${SSH} reboot_to_special_kernel
+# To kill the entire test if PRE_TEST is defined but fails set this
+# to 1.
+# (default 0)
+#PRE_TEST_DIE = 1
+
# If there is a command you want to run after the individual test case
# completes, then you can set this option.
#
diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py
index f9b769f3437d..425ef40067e7 100755
--- a/tools/testing/kunit/kunit.py
+++ b/tools/testing/kunit/kunit.py
@@ -240,12 +240,6 @@ def main(argv, linux=None):
if cli_args.subcommand == 'run':
if not os.path.exists(cli_args.build_dir):
os.mkdir(cli_args.build_dir)
- kunit_kernel.kunitconfig_path = os.path.join(
- cli_args.build_dir,
- kunit_kernel.kunitconfig_path)
-
- if not os.path.exists(kunit_kernel.kunitconfig_path):
- create_default_kunitconfig()
if not linux:
linux = kunit_kernel.LinuxSourceTree()
@@ -263,12 +257,6 @@ def main(argv, linux=None):
if cli_args.build_dir:
if not os.path.exists(cli_args.build_dir):
os.mkdir(cli_args.build_dir)
- kunit_kernel.kunitconfig_path = os.path.join(
- cli_args.build_dir,
- kunit_kernel.kunitconfig_path)
-
- if not os.path.exists(kunit_kernel.kunitconfig_path):
- create_default_kunitconfig()
if not linux:
linux = kunit_kernel.LinuxSourceTree()
@@ -285,12 +273,6 @@ def main(argv, linux=None):
if cli_args.build_dir:
if not os.path.exists(cli_args.build_dir):
os.mkdir(cli_args.build_dir)
- kunit_kernel.kunitconfig_path = os.path.join(
- cli_args.build_dir,
- kunit_kernel.kunitconfig_path)
-
- if not os.path.exists(kunit_kernel.kunitconfig_path):
- create_default_kunitconfig()
if not linux:
linux = kunit_kernel.LinuxSourceTree()
@@ -309,12 +291,6 @@ def main(argv, linux=None):
if cli_args.build_dir:
if not os.path.exists(cli_args.build_dir):
os.mkdir(cli_args.build_dir)
- kunit_kernel.kunitconfig_path = os.path.join(
- cli_args.build_dir,
- kunit_kernel.kunitconfig_path)
-
- if not os.path.exists(kunit_kernel.kunitconfig_path):
- create_default_kunitconfig()
if not linux:
linux = kunit_kernel.LinuxSourceTree()
diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py
index 63dbda2d029f..e20e2056cb38 100644
--- a/tools/testing/kunit/kunit_kernel.py
+++ b/tools/testing/kunit/kunit_kernel.py
@@ -34,7 +34,7 @@ class LinuxSourceTreeOperations(object):
def make_mrproper(self):
try:
- subprocess.check_output(['make', 'mrproper'])
+ subprocess.check_output(['make', 'mrproper'], stderr=subprocess.STDOUT)
except OSError as e:
raise ConfigError('Could not call make command: ' + e)
except subprocess.CalledProcessError as e:
@@ -47,7 +47,7 @@ class LinuxSourceTreeOperations(object):
if build_dir:
command += ['O=' + build_dir]
try:
- subprocess.check_output(command, stderr=subprocess.PIPE)
+ subprocess.check_output(command, stderr=subprocess.STDOUT)
except OSError as e:
raise ConfigError('Could not call make command: ' + e)
except subprocess.CalledProcessError as e:
@@ -77,7 +77,7 @@ class LinuxSourceTreeOperations(object):
if build_dir:
command += ['O=' + build_dir]
try:
- subprocess.check_output(command)
+ subprocess.check_output(command, stderr=subprocess.STDOUT)
except OSError as e:
raise BuildError('Could not call execute make: ' + e)
except subprocess.CalledProcessError as e:
diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py
index f9eeaea94cad..287c74d821c3 100755
--- a/tools/testing/kunit/kunit_tool_test.py
+++ b/tools/testing/kunit/kunit_tool_test.py
@@ -251,21 +251,21 @@ class KUnitMainTest(unittest.TestCase):
pass
def test_config_passes_args_pass(self):
- kunit.main(['config'], self.linux_source_mock)
+ kunit.main(['config', '--build_dir=.kunit'], self.linux_source_mock)
assert self.linux_source_mock.build_reconfig.call_count == 1
assert self.linux_source_mock.run_kernel.call_count == 0
def test_build_passes_args_pass(self):
kunit.main(['build'], self.linux_source_mock)
assert self.linux_source_mock.build_reconfig.call_count == 0
- self.linux_source_mock.build_um_kernel.assert_called_once_with(False, 8, '', None)
+ self.linux_source_mock.build_um_kernel.assert_called_once_with(False, 8, '.kunit', None)
assert self.linux_source_mock.run_kernel.call_count == 0
def test_exec_passes_args_pass(self):
kunit.main(['exec'], self.linux_source_mock)
assert self.linux_source_mock.build_reconfig.call_count == 0
assert self.linux_source_mock.run_kernel.call_count == 1
- self.linux_source_mock.run_kernel.assert_called_once_with(build_dir='', timeout=300)
+ self.linux_source_mock.run_kernel.assert_called_once_with(build_dir='.kunit', timeout=300)
self.print_mock.assert_any_call(StrContains('Testing complete.'))
def test_run_passes_args_pass(self):
@@ -273,7 +273,7 @@ class KUnitMainTest(unittest.TestCase):
assert self.linux_source_mock.build_reconfig.call_count == 1
assert self.linux_source_mock.run_kernel.call_count == 1
self.linux_source_mock.run_kernel.assert_called_once_with(
- build_dir='', timeout=300)
+ build_dir='.kunit', timeout=300)
self.print_mock.assert_any_call(StrContains('Testing complete.'))
def test_exec_passes_args_fail(self):
@@ -313,7 +313,7 @@ class KUnitMainTest(unittest.TestCase):
def test_exec_timeout(self):
timeout = 3453
kunit.main(['exec', '--timeout', str(timeout)], self.linux_source_mock)
- self.linux_source_mock.run_kernel.assert_called_once_with(build_dir='', timeout=timeout)
+ self.linux_source_mock.run_kernel.assert_called_once_with(build_dir='.kunit', timeout=timeout)
self.print_mock.assert_any_call(StrContains('Testing complete.'))
def test_run_timeout(self):
@@ -321,12 +321,12 @@ class KUnitMainTest(unittest.TestCase):
kunit.main(['run', '--timeout', str(timeout)], self.linux_source_mock)
assert self.linux_source_mock.build_reconfig.call_count == 1
self.linux_source_mock.run_kernel.assert_called_once_with(
- build_dir='', timeout=timeout)
+ build_dir='.kunit', timeout=timeout)
self.print_mock.assert_any_call(StrContains('Testing complete.'))
def test_run_builddir(self):
build_dir = '.kunit'
- kunit.main(['run', '--build_dir', build_dir], self.linux_source_mock)
+ kunit.main(['run', '--build_dir=.kunit'], self.linux_source_mock)
assert self.linux_source_mock.build_reconfig.call_count == 1
self.linux_source_mock.run_kernel.assert_called_once_with(
build_dir=build_dir, timeout=300)
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index a8ee5c4d41eb..a1a5dc645b40 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -173,6 +173,9 @@ struct nfit_test_fw {
u64 version;
u32 size_received;
u64 end_time;
+ bool armed;
+ bool missed_activate;
+ unsigned long last_activate;
};
struct nfit_test {
@@ -345,7 +348,7 @@ static int nd_intel_test_finish_fw(struct nfit_test *t,
__func__, t, nd_cmd, buf_len, idx);
if (fw->state == FW_STATE_UPDATED) {
- /* update already done, need cold boot */
+ /* update already done, need activation */
nd_cmd->status = 0x20007;
return 0;
}
@@ -430,6 +433,7 @@ static int nd_intel_test_finish_query(struct nfit_test *t,
}
dev_dbg(dev, "%s: transition out verify\n", __func__);
fw->state = FW_STATE_UPDATED;
+ fw->missed_activate = false;
/* fall through */
case FW_STATE_UPDATED:
nd_cmd->status = 0;
@@ -1178,6 +1182,134 @@ static int nd_intel_test_cmd_master_secure_erase(struct nfit_test *t,
return 0;
}
+static unsigned long last_activate;
+
+static int nvdimm_bus_intel_fw_activate_businfo(struct nfit_test *t,
+ struct nd_intel_bus_fw_activate_businfo *nd_cmd,
+ unsigned int buf_len)
+{
+ int i, armed = 0;
+ int state;
+ u64 tmo;
+
+ for (i = 0; i < NUM_DCR; i++) {
+ struct nfit_test_fw *fw = &t->fw[i];
+
+ if (fw->armed)
+ armed++;
+ }
+
+ /*
+ * Emulate 3 second activation max, and 1 second incremental
+ * quiesce time per dimm requiring multiple activates to get all
+ * DIMMs updated.
+ */
+ if (armed)
+ state = ND_INTEL_FWA_ARMED;
+ else if (!last_activate || time_after(jiffies, last_activate + 3 * HZ))
+ state = ND_INTEL_FWA_IDLE;
+ else
+ state = ND_INTEL_FWA_BUSY;
+
+ tmo = armed * USEC_PER_SEC;
+ *nd_cmd = (struct nd_intel_bus_fw_activate_businfo) {
+ .capability = ND_INTEL_BUS_FWA_CAP_FWQUIESCE
+ | ND_INTEL_BUS_FWA_CAP_OSQUIESCE
+ | ND_INTEL_BUS_FWA_CAP_RESET,
+ .state = state,
+ .activate_tmo = tmo,
+ .cpu_quiesce_tmo = tmo,
+ .io_quiesce_tmo = tmo,
+ .max_quiesce_tmo = 3 * USEC_PER_SEC,
+ };
+
+ return 0;
+}
+
+static int nvdimm_bus_intel_fw_activate(struct nfit_test *t,
+ struct nd_intel_bus_fw_activate *nd_cmd,
+ unsigned int buf_len)
+{
+ struct nd_intel_bus_fw_activate_businfo info;
+ u32 status = 0;
+ int i;
+
+ nvdimm_bus_intel_fw_activate_businfo(t, &info, sizeof(info));
+ if (info.state == ND_INTEL_FWA_BUSY)
+ status = ND_INTEL_BUS_FWA_STATUS_BUSY;
+ else if (info.activate_tmo > info.max_quiesce_tmo)
+ status = ND_INTEL_BUS_FWA_STATUS_TMO;
+ else if (info.state == ND_INTEL_FWA_IDLE)
+ status = ND_INTEL_BUS_FWA_STATUS_NOARM;
+
+ dev_dbg(&t->pdev.dev, "status: %d\n", status);
+ nd_cmd->status = status;
+ if (status && status != ND_INTEL_BUS_FWA_STATUS_TMO)
+ return 0;
+
+ last_activate = jiffies;
+ for (i = 0; i < NUM_DCR; i++) {
+ struct nfit_test_fw *fw = &t->fw[i];
+
+ if (!fw->armed)
+ continue;
+ if (fw->state != FW_STATE_UPDATED)
+ fw->missed_activate = true;
+ else
+ fw->state = FW_STATE_NEW;
+ fw->armed = false;
+ fw->last_activate = last_activate;
+ }
+
+ return 0;
+}
+
+static int nd_intel_test_cmd_fw_activate_dimminfo(struct nfit_test *t,
+ struct nd_intel_fw_activate_dimminfo *nd_cmd,
+ unsigned int buf_len, int dimm)
+{
+ struct nd_intel_bus_fw_activate_businfo info;
+ struct nfit_test_fw *fw = &t->fw[dimm];
+ u32 result, state;
+
+ nvdimm_bus_intel_fw_activate_businfo(t, &info, sizeof(info));
+
+ if (info.state == ND_INTEL_FWA_BUSY)
+ state = ND_INTEL_FWA_BUSY;
+ else if (info.state == ND_INTEL_FWA_IDLE)
+ state = ND_INTEL_FWA_IDLE;
+ else if (fw->armed)
+ state = ND_INTEL_FWA_ARMED;
+ else
+ state = ND_INTEL_FWA_IDLE;
+
+ result = ND_INTEL_DIMM_FWA_NONE;
+ if (last_activate && fw->last_activate == last_activate &&
+ state == ND_INTEL_FWA_IDLE) {
+ if (fw->missed_activate)
+ result = ND_INTEL_DIMM_FWA_NOTSTAGED;
+ else
+ result = ND_INTEL_DIMM_FWA_SUCCESS;
+ }
+
+ *nd_cmd = (struct nd_intel_fw_activate_dimminfo) {
+ .result = result,
+ .state = state,
+ };
+
+ return 0;
+}
+
+static int nd_intel_test_cmd_fw_activate_arm(struct nfit_test *t,
+ struct nd_intel_fw_activate_arm *nd_cmd,
+ unsigned int buf_len, int dimm)
+{
+ struct nfit_test_fw *fw = &t->fw[dimm];
+
+ fw->armed = nd_cmd->activate_arm == ND_INTEL_DIMM_FWA_ARM;
+ nd_cmd->status = 0;
+ return 0;
+}
static int get_dimm(struct nfit_mem *nfit_mem, unsigned int func)
{
@@ -1192,6 +1324,29 @@ static int get_dimm(struct nfit_mem *nfit_mem, unsigned int func)
return i;
}
+static void nfit_ctl_dbg(struct acpi_nfit_desc *acpi_desc,
+ struct nvdimm *nvdimm, unsigned int cmd, void *buf,
+ unsigned int len)
+{
+ struct nfit_test *t = container_of(acpi_desc, typeof(*t), acpi_desc);
+ unsigned int func = cmd;
+ unsigned int family = 0;
+
+ if (cmd == ND_CMD_CALL) {
+ struct nd_cmd_pkg *pkg = buf;
+
+ len = pkg->nd_size_in;
+ family = pkg->nd_family;
+ buf = pkg->nd_payload;
+ func = pkg->nd_command;
+ }
+ dev_dbg(&t->pdev.dev, "%s family: %d cmd: %d: func: %d input length: %d\n",
+ nvdimm ? nvdimm_name(nvdimm) : "bus", family, cmd, func,
+ len);
+ print_hex_dump_debug("nvdimm in ", DUMP_PREFIX_OFFSET, 16, 4,
+ buf, min(len, 256u), true);
+}
+
static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
struct nvdimm *nvdimm, unsigned int cmd, void *buf,
unsigned int buf_len, int *cmd_rc)
@@ -1205,6 +1360,8 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
cmd_rc = &__cmd_rc;
*cmd_rc = 0;
+ nfit_ctl_dbg(acpi_desc, nvdimm, cmd, buf, buf_len);
+
if (nvdimm) {
struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
unsigned long cmd_mask = nvdimm_cmd_mask(nvdimm);
@@ -1224,6 +1381,11 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
i = get_dimm(nfit_mem, func);
if (i < 0)
return i;
+ if (i >= NUM_DCR) {
+ dev_WARN_ONCE(&t->pdev.dev, 1,
+ "ND_CMD_CALL only valid for nfit_test0\n");
+ return -EINVAL;
+ }
switch (func) {
case NVDIMM_INTEL_GET_SECURITY_STATE:
@@ -1252,11 +1414,11 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
break;
case NVDIMM_INTEL_OVERWRITE:
rc = nd_intel_test_cmd_overwrite(t,
- buf, buf_len, i - t->dcr_idx);
+ buf, buf_len, i);
break;
case NVDIMM_INTEL_QUERY_OVERWRITE:
rc = nd_intel_test_cmd_query_overwrite(t,
- buf, buf_len, i - t->dcr_idx);
+ buf, buf_len, i);
break;
case NVDIMM_INTEL_SET_MASTER_PASSPHRASE:
rc = nd_intel_test_cmd_master_set_pass(t,
@@ -1266,54 +1428,59 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
rc = nd_intel_test_cmd_master_secure_erase(t,
buf, buf_len, i);
break;
+ case NVDIMM_INTEL_FW_ACTIVATE_DIMMINFO:
+ rc = nd_intel_test_cmd_fw_activate_dimminfo(
+ t, buf, buf_len, i);
+ break;
+ case NVDIMM_INTEL_FW_ACTIVATE_ARM:
+ rc = nd_intel_test_cmd_fw_activate_arm(
+ t, buf, buf_len, i);
+ break;
case ND_INTEL_ENABLE_LSS_STATUS:
rc = nd_intel_test_cmd_set_lss_status(t,
buf, buf_len);
break;
case ND_INTEL_FW_GET_INFO:
rc = nd_intel_test_get_fw_info(t, buf,
- buf_len, i - t->dcr_idx);
+ buf_len, i);
break;
case ND_INTEL_FW_START_UPDATE:
rc = nd_intel_test_start_update(t, buf,
- buf_len, i - t->dcr_idx);
+ buf_len, i);
break;
case ND_INTEL_FW_SEND_DATA:
rc = nd_intel_test_send_data(t, buf,
- buf_len, i - t->dcr_idx);
+ buf_len, i);
break;
case ND_INTEL_FW_FINISH_UPDATE:
rc = nd_intel_test_finish_fw(t, buf,
- buf_len, i - t->dcr_idx);
+ buf_len, i);
break;
case ND_INTEL_FW_FINISH_QUERY:
rc = nd_intel_test_finish_query(t, buf,
- buf_len, i - t->dcr_idx);
+ buf_len, i);
break;
case ND_INTEL_SMART:
rc = nfit_test_cmd_smart(buf, buf_len,
- &t->smart[i - t->dcr_idx]);
+ &t->smart[i]);
break;
case ND_INTEL_SMART_THRESHOLD:
rc = nfit_test_cmd_smart_threshold(buf,
buf_len,
- &t->smart_threshold[i -
- t->dcr_idx]);
+ &t->smart_threshold[i]);
break;
case ND_INTEL_SMART_SET_THRESHOLD:
rc = nfit_test_cmd_smart_set_threshold(buf,
buf_len,
- &t->smart_threshold[i -
- t->dcr_idx],
- &t->smart[i - t->dcr_idx],
+ &t->smart_threshold[i],
+ &t->smart[i],
&t->pdev.dev, t->dimm_dev[i]);
break;
case ND_INTEL_SMART_INJECT:
rc = nfit_test_cmd_smart_inject(buf,
buf_len,
- &t->smart_threshold[i -
- t->dcr_idx],
- &t->smart[i - t->dcr_idx],
+ &t->smart_threshold[i],
+ &t->smart[i],
&t->pdev.dev, t->dimm_dev[i]);
break;
default:
@@ -1353,9 +1520,9 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
if (!nd_desc)
return -ENOTTY;
- if (cmd == ND_CMD_CALL) {
+ if (cmd == ND_CMD_CALL && call_pkg->nd_family
+ == NVDIMM_BUS_FAMILY_NFIT) {
func = call_pkg->nd_command;
-
buf_len = call_pkg->nd_size_in + call_pkg->nd_size_out;
buf = (void *) call_pkg->nd_payload;
@@ -1379,7 +1546,26 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
default:
return -ENOTTY;
}
- }
+ } else if (cmd == ND_CMD_CALL && call_pkg->nd_family
+ == NVDIMM_BUS_FAMILY_INTEL) {
+ func = call_pkg->nd_command;
+ buf_len = call_pkg->nd_size_in + call_pkg->nd_size_out;
+ buf = (void *) call_pkg->nd_payload;
+
+ switch (func) {
+ case NVDIMM_BUS_INTEL_FW_ACTIVATE_BUSINFO:
+ rc = nvdimm_bus_intel_fw_activate_businfo(t,
+ buf, buf_len);
+ return rc;
+ case NVDIMM_BUS_INTEL_FW_ACTIVATE:
+ rc = nvdimm_bus_intel_fw_activate(t, buf,
+ buf_len);
+ return rc;
+ default:
+ return -ENOTTY;
+ }
+ } else if (cmd == ND_CMD_CALL)
+ return -ENOTTY;
if (!nd_desc || !test_bit(cmd, &nd_desc->cmd_mask))
return -ENOTTY;
@@ -1805,6 +1991,7 @@ static void nfit_test0_setup(struct nfit_test *t)
struct acpi_nfit_flush_address *flush;
struct acpi_nfit_capabilities *pcap;
unsigned int offset = 0, i;
+ unsigned long *acpi_mask;
/*
* spa0 (interleave first half of dimm0 and dimm1, note storage
@@ -2507,10 +2694,10 @@ static void nfit_test0_setup(struct nfit_test *t)
set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en);
set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en);
set_bit(ND_CMD_CALL, &acpi_desc->bus_cmd_force_en);
- set_bit(NFIT_CMD_TRANSLATE_SPA, &acpi_desc->bus_nfit_cmd_force_en);
- set_bit(NFIT_CMD_ARS_INJECT_SET, &acpi_desc->bus_nfit_cmd_force_en);
- set_bit(NFIT_CMD_ARS_INJECT_CLEAR, &acpi_desc->bus_nfit_cmd_force_en);
- set_bit(NFIT_CMD_ARS_INJECT_GET, &acpi_desc->bus_nfit_cmd_force_en);
+ set_bit(NFIT_CMD_TRANSLATE_SPA, &acpi_desc->bus_dsm_mask);
+ set_bit(NFIT_CMD_ARS_INJECT_SET, &acpi_desc->bus_dsm_mask);
+ set_bit(NFIT_CMD_ARS_INJECT_CLEAR, &acpi_desc->bus_dsm_mask);
+ set_bit(NFIT_CMD_ARS_INJECT_GET, &acpi_desc->bus_dsm_mask);
set_bit(ND_INTEL_FW_GET_INFO, &acpi_desc->dimm_cmd_force_en);
set_bit(ND_INTEL_FW_START_UPDATE, &acpi_desc->dimm_cmd_force_en);
set_bit(ND_INTEL_FW_SEND_DATA, &acpi_desc->dimm_cmd_force_en);
@@ -2531,6 +2718,12 @@ static void nfit_test0_setup(struct nfit_test *t)
&acpi_desc->dimm_cmd_force_en);
set_bit(NVDIMM_INTEL_MASTER_SECURE_ERASE,
&acpi_desc->dimm_cmd_force_en);
+ set_bit(NVDIMM_INTEL_FW_ACTIVATE_DIMMINFO, &acpi_desc->dimm_cmd_force_en);
+ set_bit(NVDIMM_INTEL_FW_ACTIVATE_ARM, &acpi_desc->dimm_cmd_force_en);
+
+ acpi_mask = &acpi_desc->family_dsm_mask[NVDIMM_BUS_FAMILY_INTEL];
+ set_bit(NVDIMM_BUS_INTEL_FW_ACTIVATE_BUSINFO, acpi_mask);
+ set_bit(NVDIMM_BUS_INTEL_FW_ACTIVATE, acpi_mask);
}
static void nfit_test1_setup(struct nfit_test *t)
@@ -2699,14 +2892,18 @@ static int nfit_ctl_test(struct device *dev)
struct acpi_nfit_desc *acpi_desc;
const u64 test_val = 0x0123456789abcdefULL;
unsigned long mask, cmd_size, offset;
- union {
- struct nd_cmd_get_config_size cfg_size;
- struct nd_cmd_clear_error clear_err;
- struct nd_cmd_ars_status ars_stat;
- struct nd_cmd_ars_cap ars_cap;
- char buf[sizeof(struct nd_cmd_ars_status)
- + sizeof(struct nd_ars_record)];
- } cmds;
+ struct nfit_ctl_test_cmd {
+ struct nd_cmd_pkg pkg;
+ union {
+ struct nd_cmd_get_config_size cfg_size;
+ struct nd_cmd_clear_error clear_err;
+ struct nd_cmd_ars_status ars_stat;
+ struct nd_cmd_ars_cap ars_cap;
+ struct nd_intel_bus_fw_activate_businfo fwa_info;
+ char buf[sizeof(struct nd_cmd_ars_status)
+ + sizeof(struct nd_ars_record)];
+ };
+ } cmd;
adev = devm_kzalloc(dev, sizeof(*adev), GFP_KERNEL);
if (!adev)
@@ -2731,11 +2928,15 @@ static int nfit_ctl_test(struct device *dev)
.module = THIS_MODULE,
.provider_name = "ACPI.NFIT",
.ndctl = acpi_nfit_ctl,
- .bus_dsm_mask = 1UL << NFIT_CMD_TRANSLATE_SPA
- | 1UL << NFIT_CMD_ARS_INJECT_SET
- | 1UL << NFIT_CMD_ARS_INJECT_CLEAR
- | 1UL << NFIT_CMD_ARS_INJECT_GET,
+ .bus_family_mask = 1UL << NVDIMM_BUS_FAMILY_NFIT
+ | 1UL << NVDIMM_BUS_FAMILY_INTEL,
},
+ .bus_dsm_mask = 1UL << NFIT_CMD_TRANSLATE_SPA
+ | 1UL << NFIT_CMD_ARS_INJECT_SET
+ | 1UL << NFIT_CMD_ARS_INJECT_CLEAR
+ | 1UL << NFIT_CMD_ARS_INJECT_GET,
+ .family_dsm_mask[NVDIMM_BUS_FAMILY_INTEL] =
+ NVDIMM_BUS_INTEL_FW_ACTIVATE_CMDMASK,
.dev = &adev->dev,
};
@@ -2766,21 +2967,21 @@ static int nfit_ctl_test(struct device *dev)
/* basic checkout of a typical 'get config size' command */
- cmd_size = sizeof(cmds.cfg_size);
- cmds.cfg_size = (struct nd_cmd_get_config_size) {
+ cmd_size = sizeof(cmd.cfg_size);
+ cmd.cfg_size = (struct nd_cmd_get_config_size) {
.status = 0,
.config_size = SZ_128K,
.max_xfer = SZ_4K,
};
- rc = setup_result(cmds.buf, cmd_size);
+ rc = setup_result(cmd.buf, cmd_size);
if (rc)
return rc;
rc = acpi_nfit_ctl(&acpi_desc->nd_desc, nvdimm, ND_CMD_GET_CONFIG_SIZE,
- cmds.buf, cmd_size, &cmd_rc);
+ cmd.buf, cmd_size, &cmd_rc);
- if (rc < 0 || cmd_rc || cmds.cfg_size.status != 0
- || cmds.cfg_size.config_size != SZ_128K
- || cmds.cfg_size.max_xfer != SZ_4K) {
+ if (rc < 0 || cmd_rc || cmd.cfg_size.status != 0
+ || cmd.cfg_size.config_size != SZ_128K
+ || cmd.cfg_size.max_xfer != SZ_4K) {
dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n",
__func__, __LINE__, rc, cmd_rc);
return -EIO;
@@ -2789,14 +2990,14 @@ static int nfit_ctl_test(struct device *dev)
/* test ars_status with zero output */
cmd_size = offsetof(struct nd_cmd_ars_status, address);
- cmds.ars_stat = (struct nd_cmd_ars_status) {
+ cmd.ars_stat = (struct nd_cmd_ars_status) {
.out_length = 0,
};
- rc = setup_result(cmds.buf, cmd_size);
+ rc = setup_result(cmd.buf, cmd_size);
if (rc)
return rc;
rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_ARS_STATUS,
- cmds.buf, cmd_size, &cmd_rc);
+ cmd.buf, cmd_size, &cmd_rc);
if (rc < 0 || cmd_rc) {
dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n",
@@ -2806,16 +3007,16 @@ static int nfit_ctl_test(struct device *dev)
/* test ars_cap with benign extended status */
- cmd_size = sizeof(cmds.ars_cap);
- cmds.ars_cap = (struct nd_cmd_ars_cap) {
+ cmd_size = sizeof(cmd.ars_cap);
+ cmd.ars_cap = (struct nd_cmd_ars_cap) {
.status = ND_ARS_PERSISTENT << 16,
};
offset = offsetof(struct nd_cmd_ars_cap, status);
- rc = setup_result(cmds.buf + offset, cmd_size - offset);
+ rc = setup_result(cmd.buf + offset, cmd_size - offset);
if (rc)
return rc;
rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_ARS_CAP,
- cmds.buf, cmd_size, &cmd_rc);
+ cmd.buf, cmd_size, &cmd_rc);
if (rc < 0 || cmd_rc) {
dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n",
@@ -2825,19 +3026,19 @@ static int nfit_ctl_test(struct device *dev)
/* test ars_status with 'status' trimmed from 'out_length' */
- cmd_size = sizeof(cmds.ars_stat) + sizeof(struct nd_ars_record);
- cmds.ars_stat = (struct nd_cmd_ars_status) {
+ cmd_size = sizeof(cmd.ars_stat) + sizeof(struct nd_ars_record);
+ cmd.ars_stat = (struct nd_cmd_ars_status) {
.out_length = cmd_size - 4,
};
- record = &cmds.ars_stat.records[0];
+ record = &cmd.ars_stat.records[0];
*record = (struct nd_ars_record) {
.length = test_val,
};
- rc = setup_result(cmds.buf, cmd_size);
+ rc = setup_result(cmd.buf, cmd_size);
if (rc)
return rc;
rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_ARS_STATUS,
- cmds.buf, cmd_size, &cmd_rc);
+ cmd.buf, cmd_size, &cmd_rc);
if (rc < 0 || cmd_rc || record->length != test_val) {
dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n",
@@ -2847,19 +3048,19 @@ static int nfit_ctl_test(struct device *dev)
/* test ars_status with 'Output (Size)' including 'status' */
- cmd_size = sizeof(cmds.ars_stat) + sizeof(struct nd_ars_record);
- cmds.ars_stat = (struct nd_cmd_ars_status) {
+ cmd_size = sizeof(cmd.ars_stat) + sizeof(struct nd_ars_record);
+ cmd.ars_stat = (struct nd_cmd_ars_status) {
.out_length = cmd_size,
};
- record = &cmds.ars_stat.records[0];
+ record = &cmd.ars_stat.records[0];
*record = (struct nd_ars_record) {
.length = test_val,
};
- rc = setup_result(cmds.buf, cmd_size);
+ rc = setup_result(cmd.buf, cmd_size);
if (rc)
return rc;
rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_ARS_STATUS,
- cmds.buf, cmd_size, &cmd_rc);
+ cmd.buf, cmd_size, &cmd_rc);
if (rc < 0 || cmd_rc || record->length != test_val) {
dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n",
@@ -2869,15 +3070,15 @@ static int nfit_ctl_test(struct device *dev)
/* test extended status for get_config_size results in failure */
- cmd_size = sizeof(cmds.cfg_size);
- cmds.cfg_size = (struct nd_cmd_get_config_size) {
+ cmd_size = sizeof(cmd.cfg_size);
+ cmd.cfg_size = (struct nd_cmd_get_config_size) {
.status = 1 << 16,
};
- rc = setup_result(cmds.buf, cmd_size);
+ rc = setup_result(cmd.buf, cmd_size);
if (rc)
return rc;
rc = acpi_nfit_ctl(&acpi_desc->nd_desc, nvdimm, ND_CMD_GET_CONFIG_SIZE,
- cmds.buf, cmd_size, &cmd_rc);
+ cmd.buf, cmd_size, &cmd_rc);
if (rc < 0 || cmd_rc >= 0) {
dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n",
@@ -2886,16 +3087,46 @@ static int nfit_ctl_test(struct device *dev)
}
/* test clear error */
- cmd_size = sizeof(cmds.clear_err);
- cmds.clear_err = (struct nd_cmd_clear_error) {
+ cmd_size = sizeof(cmd.clear_err);
+ cmd.clear_err = (struct nd_cmd_clear_error) {
.length = 512,
.cleared = 512,
};
- rc = setup_result(cmds.buf, cmd_size);
+ rc = setup_result(cmd.buf, cmd_size);
if (rc)
return rc;
rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_CLEAR_ERROR,
- cmds.buf, cmd_size, &cmd_rc);
+ cmd.buf, cmd_size, &cmd_rc);
+ if (rc < 0 || cmd_rc) {
+ dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n",
+ __func__, __LINE__, rc, cmd_rc);
+ return -EIO;
+ }
+
+ /* test firmware activate bus info */
+ cmd_size = sizeof(cmd.fwa_info);
+ cmd = (struct nfit_ctl_test_cmd) {
+ .pkg = {
+ .nd_command = NVDIMM_BUS_INTEL_FW_ACTIVATE_BUSINFO,
+ .nd_family = NVDIMM_BUS_FAMILY_INTEL,
+ .nd_size_out = cmd_size,
+ .nd_fw_size = cmd_size,
+ },
+ .fwa_info = {
+ .state = ND_INTEL_FWA_IDLE,
+ .capability = ND_INTEL_BUS_FWA_CAP_FWQUIESCE
+ | ND_INTEL_BUS_FWA_CAP_OSQUIESCE,
+ .activate_tmo = 1,
+ .cpu_quiesce_tmo = 1,
+ .io_quiesce_tmo = 1,
+ .max_quiesce_tmo = 1,
+ },
+ };
+ rc = setup_result(cmd.buf, cmd_size);
+ if (rc)
+ return rc;
+ rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_CALL,
+ &cmd, sizeof(cmd.pkg) + cmd_size, &cmd_rc);
if (rc < 0 || cmd_rc) {
dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n",
__func__, __LINE__, rc, cmd_rc);
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 017ce2a7ae36..9018f45d631d 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -32,6 +32,7 @@ TARGETS += lkdtm
TARGETS += membarrier
TARGETS += memfd
TARGETS += memory-hotplug
+TARGETS += mincore
TARGETS += mount
TARGETS += mqueue
TARGETS += net
@@ -56,6 +57,7 @@ TARGETS += splice
TARGETS += static_keys
TARGETS += sync
TARGETS += sysctl
+TARGETS += tc-testing
TARGETS += timens
ifneq (1, $(quicktest))
TARGETS += timers
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 22aaec74ea0a..a83b5827532f 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -102,7 +102,7 @@ endif
OVERRIDE_TARGETS := 1
override define CLEAN
$(call msg,CLEAN)
- $(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN)
+ $(Q)$(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN)
endef
include ../lib.mk
@@ -111,6 +111,7 @@ SCRATCH_DIR := $(OUTPUT)/tools
BUILD_DIR := $(SCRATCH_DIR)/build
INCLUDE_DIR := $(SCRATCH_DIR)/include
BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a
+RESOLVE_BTFIDS := $(BUILD_DIR)/resolve_btfids/resolve_btfids
# Define simple and short `make test_progs`, `make test_sysctl`, etc targets
# to build individual tests.
@@ -122,26 +123,32 @@ $(notdir $(TEST_GEN_PROGS) \
$(TEST_GEN_PROGS_EXTENDED) \
$(TEST_CUSTOM_PROGS)): %: $(OUTPUT)/% ;
+$(OUTPUT)/%.o: %.c
+ $(call msg,CC,,$@)
+ $(Q)$(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@
+
$(OUTPUT)/%:%.c
$(call msg,BINARY,,$@)
- $(LINK.c) $^ $(LDLIBS) -o $@
+ $(Q)$(LINK.c) $^ $(LDLIBS) -o $@
$(OUTPUT)/urandom_read: urandom_read.c
$(call msg,BINARY,,$@)
- $(CC) $(LDFLAGS) -o $@ $< $(LDLIBS) -Wl,--build-id
+ $(Q)$(CC) $(LDFLAGS) -o $@ $< $(LDLIBS) -Wl,--build-id
$(OUTPUT)/test_stub.o: test_stub.c $(BPFOBJ)
$(call msg,CC,,$@)
- $(CC) -c $(CFLAGS) -o $@ $<
+ $(Q)$(CC) -c $(CFLAGS) -o $@ $<
-VMLINUX_BTF_PATHS := $(if $(O),$(O)/vmlinux) \
+VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \
$(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \
../../../../vmlinux \
/sys/kernel/btf/vmlinux \
/boot/vmlinux-$(shell uname -r)
-VMLINUX_BTF := $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
+VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
+
+DEFAULT_BPFTOOL := $(SCRATCH_DIR)/sbin/bpftool
-$(OUTPUT)/runqslower: $(BPFOBJ)
+$(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL)
$(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower \
OUTPUT=$(SCRATCH_DIR)/ VMLINUX_BTF=$(VMLINUX_BTF) \
BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) && \
@@ -163,7 +170,6 @@ $(OUTPUT)/test_netcnt: cgroup_helpers.c
$(OUTPUT)/test_sock_fields: cgroup_helpers.c
$(OUTPUT)/test_sysctl: cgroup_helpers.c
-DEFAULT_BPFTOOL := $(SCRATCH_DIR)/sbin/bpftool
BPFTOOL ?= $(DEFAULT_BPFTOOL)
$(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \
$(BPFOBJ) | $(BUILD_DIR)/bpftool
@@ -177,13 +183,28 @@ $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \
$(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \
DESTDIR=$(SCRATCH_DIR) prefix= all install_headers
-$(BUILD_DIR)/libbpf $(BUILD_DIR)/bpftool $(INCLUDE_DIR):
+$(BUILD_DIR)/libbpf $(BUILD_DIR)/bpftool $(BUILD_DIR)/resolve_btfids $(INCLUDE_DIR):
$(call msg,MKDIR,,$@)
- mkdir -p $@
+ $(Q)mkdir -p $@
$(INCLUDE_DIR)/vmlinux.h: $(VMLINUX_BTF) | $(BPFTOOL) $(INCLUDE_DIR)
+ifeq ($(VMLINUX_H),)
$(call msg,GEN,,$@)
- $(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@
+ $(Q)$(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@
+else
+ $(call msg,CP,,$@)
+ $(Q)cp "$(VMLINUX_H)" $@
+endif
+
+$(RESOLVE_BTFIDS): $(BPFOBJ) | $(BUILD_DIR)/resolve_btfids \
+ $(TOOLSDIR)/bpf/resolve_btfids/main.c \
+ $(TOOLSDIR)/lib/rbtree.c \
+ $(TOOLSDIR)/lib/zalloc.c \
+ $(TOOLSDIR)/lib/string.c \
+ $(TOOLSDIR)/lib/ctype.c \
+ $(TOOLSDIR)/lib/str_error_r.c
+ $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/resolve_btfids \
+ OUTPUT=$(BUILD_DIR)/resolve_btfids/ BPFOBJ=$(BPFOBJ)
# Get Clang's default includes on this system, as opposed to those seen by
# '-target bpf'. This fixes "missing" files on some architectures/distros,
@@ -221,28 +242,28 @@ $(OUTPUT)/flow_dissector_load.o: flow_dissector_load.h
# $4 - LDFLAGS
define CLANG_BPF_BUILD_RULE
$(call msg,CLNG-LLC,$(TRUNNER_BINARY),$2)
- ($(CLANG) $3 -O2 -target bpf -emit-llvm \
+ $(Q)($(CLANG) $3 -O2 -target bpf -emit-llvm \
-c $1 -o - || echo "BPF obj compilation failed") | \
$(LLC) -mattr=dwarfris -march=bpf -mcpu=v3 $4 -filetype=obj -o $2
endef
# Similar to CLANG_BPF_BUILD_RULE, but with disabled alu32
define CLANG_NOALU32_BPF_BUILD_RULE
$(call msg,CLNG-LLC,$(TRUNNER_BINARY),$2)
- ($(CLANG) $3 -O2 -target bpf -emit-llvm \
+ $(Q)($(CLANG) $3 -O2 -target bpf -emit-llvm \
-c $1 -o - || echo "BPF obj compilation failed") | \
$(LLC) -march=bpf -mcpu=v2 $4 -filetype=obj -o $2
endef
# Similar to CLANG_BPF_BUILD_RULE, but using native Clang and bpf LLC
define CLANG_NATIVE_BPF_BUILD_RULE
$(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2)
- ($(CLANG) $3 -O2 -emit-llvm \
+ $(Q)($(CLANG) $3 -O2 -emit-llvm \
-c $1 -o - || echo "BPF obj compilation failed") | \
$(LLC) -march=bpf -mcpu=v3 $4 -filetype=obj -o $2
endef
# Build BPF object using GCC
define GCC_BPF_BUILD_RULE
$(call msg,GCC-BPF,$(TRUNNER_BINARY),$2)
- $(BPF_GCC) $3 $4 -O2 -c $1 -o $2
+ $(Q)$(BPF_GCC) $3 $4 -O2 -c $1 -o $2
endef
SKEL_BLACKLIST := btf__% test_pinning_invalid.c test_sk_assign.c
@@ -284,7 +305,7 @@ ifeq ($($(TRUNNER_OUTPUT)-dir),)
$(TRUNNER_OUTPUT)-dir := y
$(TRUNNER_OUTPUT):
$$(call msg,MKDIR,,$$@)
- mkdir -p $$@
+ $(Q)mkdir -p $$@
endif
# ensure we set up BPF objects generation rule just once for a given
@@ -304,7 +325,7 @@ $(TRUNNER_BPF_SKELS): $(TRUNNER_OUTPUT)/%.skel.h: \
$(TRUNNER_OUTPUT)/%.o \
| $(BPFTOOL) $(TRUNNER_OUTPUT)
$$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@)
- $$(BPFTOOL) gen skeleton $$< > $$@
+ $(Q)$$(BPFTOOL) gen skeleton $$< > $$@
endif
# ensure we set up tests.h header generation rule just once
@@ -328,7 +349,7 @@ $(TRUNNER_TEST_OBJS): $(TRUNNER_OUTPUT)/%.test.o: \
$(TRUNNER_BPF_SKELS) \
$$(BPFOBJ) | $(TRUNNER_OUTPUT)
$$(call msg,TEST-OBJ,$(TRUNNER_BINARY),$$@)
- cd $$(@D) && $$(CC) -I. $$(CFLAGS) -c $(CURDIR)/$$< $$(LDLIBS) -o $$(@F)
+ $(Q)cd $$(@D) && $$(CC) -I. $$(CFLAGS) -c $(CURDIR)/$$< $$(LDLIBS) -o $$(@F)
$(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o: \
%.c \
@@ -336,20 +357,22 @@ $(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o: \
$(TRUNNER_TESTS_HDR) \
$$(BPFOBJ) | $(TRUNNER_OUTPUT)
$$(call msg,EXT-OBJ,$(TRUNNER_BINARY),$$@)
- $$(CC) $$(CFLAGS) -c $$< $$(LDLIBS) -o $$@
+ $(Q)$$(CC) $$(CFLAGS) -c $$< $$(LDLIBS) -o $$@
# only copy extra resources if in flavored build
$(TRUNNER_BINARY)-extras: $(TRUNNER_EXTRA_FILES) | $(TRUNNER_OUTPUT)
ifneq ($2,)
$$(call msg,EXT-COPY,$(TRUNNER_BINARY),$(TRUNNER_EXTRA_FILES))
- cp -a $$^ $(TRUNNER_OUTPUT)/
+ $(Q)cp -a $$^ $(TRUNNER_OUTPUT)/
endif
$(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \
$(TRUNNER_EXTRA_OBJS) $$(BPFOBJ) \
+ $(RESOLVE_BTFIDS) \
| $(TRUNNER_BINARY)-extras
$$(call msg,BINARY,,$$@)
- $$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) -o $$@
+ $(Q)$$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) -o $$@
+ $(Q)$(RESOLVE_BTFIDS) --no-fail --btf btf_data.o $$@
endef
@@ -402,17 +425,17 @@ verifier/tests.h: verifier/*.c
) > verifier/tests.h)
$(OUTPUT)/test_verifier: test_verifier.c verifier/tests.h $(BPFOBJ) | $(OUTPUT)
$(call msg,BINARY,,$@)
- $(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@
+ $(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@
# Make sure we are able to include and link libbpf against c++.
$(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ)
$(call msg,CXX,,$@)
- $(CXX) $(CFLAGS) $^ $(LDLIBS) -o $@
+ $(Q)$(CXX) $(CFLAGS) $^ $(LDLIBS) -o $@
# Benchmark runner
$(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h
$(call msg,CC,,$@)
- $(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@
+ $(Q)$(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@
$(OUTPUT)/bench_rename.o: $(OUTPUT)/test_overhead.skel.h
$(OUTPUT)/bench_trigger.o: $(OUTPUT)/trigger_bench.skel.h
$(OUTPUT)/bench_ringbufs.o: $(OUTPUT)/ringbuf_bench.skel.h \
@@ -425,7 +448,7 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o $(OUTPUT)/testing_helpers.o \
$(OUTPUT)/bench_trigger.o \
$(OUTPUT)/bench_ringbufs.o
$(call msg,BINARY,,$@)
- $(CC) $(LDFLAGS) -o $@ $(filter %.a %.o,$^) $(LDLIBS)
+ $(Q)$(CC) $(LDFLAGS) -o $@ $(filter %.a %.o,$^) $(LDLIBS)
EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) \
prog_tests/tests.h map_tests/tests.h verifier/tests.h \
diff --git a/tools/testing/selftests/bpf/bpf_legacy.h b/tools/testing/selftests/bpf/bpf_legacy.h
index 6f8988738bc1..719ab56cdb5d 100644
--- a/tools/testing/selftests/bpf/bpf_legacy.h
+++ b/tools/testing/selftests/bpf/bpf_legacy.h
@@ -2,20 +2,6 @@
#ifndef __BPF_LEGACY__
#define __BPF_LEGACY__
-/*
- * legacy bpf_map_def with extra fields supported only by bpf_load(), do not
- * use outside of samples/bpf
- */
-struct bpf_map_def_legacy {
- unsigned int type;
- unsigned int key_size;
- unsigned int value_size;
- unsigned int max_entries;
- unsigned int map_flags;
- unsigned int inner_map_idx;
- unsigned int numa_node;
-};
-
#define BPF_ANNOTATE_KV_PAIR(name, type_key, type_val) \
struct ____btf_map_##name { \
type_key key; \
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c
index 0fb910df5387..033051717ba5 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.c
+++ b/tools/testing/selftests/bpf/cgroup_helpers.c
@@ -290,3 +290,26 @@ free_mem:
free(fhp);
return ret;
}
+
+int cgroup_setup_and_join(const char *path) {
+ int cg_fd;
+
+ if (setup_cgroup_environment()) {
+ fprintf(stderr, "Failed to setup cgroup environment\n");
+ return -EINVAL;
+ }
+
+ cg_fd = create_and_get_cgroup(path);
+ if (cg_fd < 0) {
+ fprintf(stderr, "Failed to create test cgroup\n");
+ cleanup_cgroup_environment();
+ return cg_fd;
+ }
+
+ if (join_cgroup(path)) {
+ fprintf(stderr, "Failed to join cgroup\n");
+ cleanup_cgroup_environment();
+ return -EINVAL;
+ }
+ return cg_fd;
+}
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h
index d64bb8957090..5fe3d88e4f0d 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.h
+++ b/tools/testing/selftests/bpf/cgroup_helpers.h
@@ -9,6 +9,7 @@
__FILE__, __LINE__, clean_errno(), ##__VA_ARGS__)
+int cgroup_setup_and_join(const char *path);
int create_and_get_cgroup(const char *path);
int join_cgroup(const char *path);
int setup_cgroup_environment(void);
diff --git a/tools/testing/selftests/bpf/get_cgroup_id_user.c b/tools/testing/selftests/bpf/get_cgroup_id_user.c
index e8da7b39158d..b8d6aef99db4 100644
--- a/tools/testing/selftests/bpf/get_cgroup_id_user.c
+++ b/tools/testing/selftests/bpf/get_cgroup_id_user.c
@@ -58,20 +58,10 @@ int main(int argc, char **argv)
int exit_code = 1;
char buf[256];
- err = setup_cgroup_environment();
- if (CHECK(err, "setup_cgroup_environment", "err %d errno %d\n", err,
- errno))
+ cgroup_fd = cgroup_setup_and_join(TEST_CGROUP);
+ if (CHECK(cgroup_fd < 0, "cgroup_setup_and_join", "err %d errno %d\n", cgroup_fd, errno))
return 1;
- cgroup_fd = create_and_get_cgroup(TEST_CGROUP);
- if (CHECK(cgroup_fd < 0, "create_and_get_cgroup", "err %d errno %d\n",
- cgroup_fd, errno))
- goto cleanup_cgroup_env;
-
- err = join_cgroup(TEST_CGROUP);
- if (CHECK(err, "join_cgroup", "err %d errno %d\n", err, errno))
- goto cleanup_cgroup_env;
-
err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
if (CHECK(err, "bpf_prog_load", "err %d errno %d\n", err, errno))
goto cleanup_cgroup_env;
diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
index e36dd1a1780d..f56655690f9b 100644
--- a/tools/testing/selftests/bpf/network_helpers.c
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -7,8 +7,6 @@
#include <arpa/inet.h>
-#include <sys/epoll.h>
-
#include <linux/err.h>
#include <linux/in.h>
#include <linux/in6.h>
@@ -17,8 +15,13 @@
#include "network_helpers.h"
#define clean_errno() (errno == 0 ? "None" : strerror(errno))
-#define log_err(MSG, ...) fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
- __FILE__, __LINE__, clean_errno(), ##__VA_ARGS__)
+#define log_err(MSG, ...) ({ \
+ int __save = errno; \
+ fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
+ __FILE__, __LINE__, clean_errno(), \
+ ##__VA_ARGS__); \
+ errno = __save; \
+})
struct ipv4_packet pkt_v4 = {
.eth.h_proto = __bpf_constant_htons(ETH_P_IP),
@@ -37,131 +40,169 @@ struct ipv6_packet pkt_v6 = {
.tcp.doff = 5,
};
-int start_server_with_port(int family, int type, __u16 port)
+static int settimeo(int fd, int timeout_ms)
{
- struct sockaddr_storage addr = {};
- socklen_t len;
- int fd;
+ struct timeval timeout = { .tv_sec = 3 };
- if (family == AF_INET) {
- struct sockaddr_in *sin = (void *)&addr;
+ if (timeout_ms > 0) {
+ timeout.tv_sec = timeout_ms / 1000;
+ timeout.tv_usec = (timeout_ms % 1000) * 1000;
+ }
- sin->sin_family = AF_INET;
- sin->sin_port = htons(port);
- len = sizeof(*sin);
- } else {
- struct sockaddr_in6 *sin6 = (void *)&addr;
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeout,
+ sizeof(timeout))) {
+ log_err("Failed to set SO_RCVTIMEO");
+ return -1;
+ }
- sin6->sin6_family = AF_INET6;
- sin6->sin6_port = htons(port);
- len = sizeof(*sin6);
+ if (setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &timeout,
+ sizeof(timeout))) {
+ log_err("Failed to set SO_SNDTIMEO");
+ return -1;
}
- fd = socket(family, type | SOCK_NONBLOCK, 0);
+ return 0;
+}
+
+#define save_errno_close(fd) ({ int __save = errno; close(fd); errno = __save; })
+
+int start_server(int family, int type, const char *addr_str, __u16 port,
+ int timeout_ms)
+{
+ struct sockaddr_storage addr = {};
+ socklen_t len;
+ int fd;
+
+ if (make_sockaddr(family, addr_str, port, &addr, &len))
+ return -1;
+
+ fd = socket(family, type, 0);
if (fd < 0) {
log_err("Failed to create server socket");
return -1;
}
+ if (settimeo(fd, timeout_ms))
+ goto error_close;
+
if (bind(fd, (const struct sockaddr *)&addr, len) < 0) {
log_err("Failed to bind socket");
- close(fd);
- return -1;
+ goto error_close;
}
if (type == SOCK_STREAM) {
if (listen(fd, 1) < 0) {
log_err("Failed to listed on socket");
- close(fd);
- return -1;
+ goto error_close;
}
}
return fd;
+
+error_close:
+ save_errno_close(fd);
+ return -1;
}
-int start_server(int family, int type)
+static int connect_fd_to_addr(int fd,
+ const struct sockaddr_storage *addr,
+ socklen_t addrlen)
{
- return start_server_with_port(family, type, 0);
-}
+ if (connect(fd, (const struct sockaddr *)addr, addrlen)) {
+ log_err("Failed to connect to server");
+ return -1;
+ }
-static const struct timeval timeo_sec = { .tv_sec = 3 };
-static const size_t timeo_optlen = sizeof(timeo_sec);
+ return 0;
+}
-int connect_to_fd(int family, int type, int server_fd)
+int connect_to_fd(int server_fd, int timeout_ms)
{
- int fd, save_errno;
+ struct sockaddr_storage addr;
+ struct sockaddr_in *addr_in;
+ socklen_t addrlen, optlen;
+ int fd, type;
- fd = socket(family, type, 0);
- if (fd < 0) {
- log_err("Failed to create client socket");
+ optlen = sizeof(type);
+ if (getsockopt(server_fd, SOL_SOCKET, SO_TYPE, &type, &optlen)) {
+ log_err("getsockopt(SOL_TYPE)");
return -1;
}
- if (connect_fd_to_fd(fd, server_fd) < 0 && errno != EINPROGRESS) {
- save_errno = errno;
- close(fd);
- errno = save_errno;
+ addrlen = sizeof(addr);
+ if (getsockname(server_fd, (struct sockaddr *)&addr, &addrlen)) {
+ log_err("Failed to get server addr");
return -1;
}
+ addr_in = (struct sockaddr_in *)&addr;
+ fd = socket(addr_in->sin_family, type, 0);
+ if (fd < 0) {
+ log_err("Failed to create client socket");
+ return -1;
+ }
+
+ if (settimeo(fd, timeout_ms))
+ goto error_close;
+
+ if (connect_fd_to_addr(fd, &addr, addrlen))
+ goto error_close;
+
return fd;
+
+error_close:
+ save_errno_close(fd);
+ return -1;
}
-int connect_fd_to_fd(int client_fd, int server_fd)
+int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms)
{
struct sockaddr_storage addr;
socklen_t len = sizeof(addr);
- int save_errno;
- if (setsockopt(client_fd, SOL_SOCKET, SO_RCVTIMEO, &timeo_sec,
- timeo_optlen)) {
- log_err("Failed to set SO_RCVTIMEO");
+ if (settimeo(client_fd, timeout_ms))
return -1;
- }
if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
log_err("Failed to get server addr");
return -1;
}
- if (connect(client_fd, (const struct sockaddr *)&addr, len) < 0) {
- if (errno != EINPROGRESS) {
- save_errno = errno;
- log_err("Failed to connect to server");
- errno = save_errno;
- }
+ if (connect_fd_to_addr(client_fd, &addr, len))
return -1;
- }
return 0;
}
-int connect_wait(int fd)
+int make_sockaddr(int family, const char *addr_str, __u16 port,
+ struct sockaddr_storage *addr, socklen_t *len)
{
- struct epoll_event ev = {}, events[2];
- int timeout_ms = 1000;
- int efd, nfd;
-
- efd = epoll_create1(EPOLL_CLOEXEC);
- if (efd < 0) {
- log_err("Failed to open epoll fd");
- return -1;
- }
+ if (family == AF_INET) {
+ struct sockaddr_in *sin = (void *)addr;
- ev.events = EPOLLRDHUP | EPOLLOUT;
- ev.data.fd = fd;
+ sin->sin_family = AF_INET;
+ sin->sin_port = htons(port);
+ if (addr_str &&
+ inet_pton(AF_INET, addr_str, &sin->sin_addr) != 1) {
+ log_err("inet_pton(AF_INET, %s)", addr_str);
+ return -1;
+ }
+ if (len)
+ *len = sizeof(*sin);
+ return 0;
+ } else if (family == AF_INET6) {
+ struct sockaddr_in6 *sin6 = (void *)addr;
- if (epoll_ctl(efd, EPOLL_CTL_ADD, fd, &ev) < 0) {
- log_err("Failed to register fd=%d on epoll fd=%d", fd, efd);
- close(efd);
- return -1;
+ sin6->sin6_family = AF_INET6;
+ sin6->sin6_port = htons(port);
+ if (addr_str &&
+ inet_pton(AF_INET6, addr_str, &sin6->sin6_addr) != 1) {
+ log_err("inet_pton(AF_INET6, %s)", addr_str);
+ return -1;
+ }
+ if (len)
+ *len = sizeof(*sin6);
+ return 0;
}
-
- nfd = epoll_wait(efd, events, ARRAY_SIZE(events), timeout_ms);
- if (nfd < 0)
- log_err("Failed to wait for I/O event on epoll fd=%d", efd);
-
- close(efd);
- return nfd;
+ return -1;
}
diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h
index 6a8009605670..c3728f6667e4 100644
--- a/tools/testing/selftests/bpf/network_helpers.h
+++ b/tools/testing/selftests/bpf/network_helpers.h
@@ -33,10 +33,11 @@ struct ipv6_packet {
} __packed;
extern struct ipv6_packet pkt_v6;
-int start_server(int family, int type);
-int start_server_with_port(int family, int type, __u16 port);
-int connect_to_fd(int family, int type, int server_fd);
-int connect_fd_to_fd(int client_fd, int server_fd);
-int connect_wait(int client_fd);
+int start_server(int family, int type, const char *addr, __u16 port,
+ int timeout_ms);
+int connect_to_fd(int server_fd, int timeout_ms);
+int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms);
+int make_sockaddr(int family, const char *addr_str, __u16 port,
+ struct sockaddr_storage *addr, socklen_t *len);
#endif
diff --git a/tools/testing/selftests/bpf/prog_tests/autoload.c b/tools/testing/selftests/bpf/prog_tests/autoload.c
new file mode 100644
index 000000000000..3693f7d133eb
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/autoload.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <test_progs.h>
+#include <time.h>
+#include "test_autoload.skel.h"
+
+void test_autoload(void)
+{
+ int duration = 0, err;
+ struct test_autoload* skel;
+
+ skel = test_autoload__open_and_load();
+ /* prog3 should be broken */
+ if (CHECK(skel, "skel_open_and_load", "unexpected success\n"))
+ goto cleanup;
+
+ skel = test_autoload__open();
+ if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+ goto cleanup;
+
+ /* don't load prog3 */
+ bpf_program__set_autoload(skel->progs.prog3, false);
+
+ err = test_autoload__load(skel);
+ if (CHECK(err, "skel_load", "failed to load skeleton: %d\n", err))
+ goto cleanup;
+
+ err = test_autoload__attach(skel);
+ if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+ goto cleanup;
+
+ usleep(1);
+
+ CHECK(!skel->bss->prog1_called, "prog1", "not called\n");
+ CHECK(!skel->bss->prog2_called, "prog2", "not called\n");
+ CHECK(skel->bss->prog3_called, "prog3", "called?!\n");
+
+cleanup:
+ test_autoload__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 87c29dde1cf9..7375d9a6d242 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -5,11 +5,23 @@
#include "bpf_iter_netlink.skel.h"
#include "bpf_iter_bpf_map.skel.h"
#include "bpf_iter_task.skel.h"
+#include "bpf_iter_task_stack.skel.h"
#include "bpf_iter_task_file.skel.h"
+#include "bpf_iter_tcp4.skel.h"
+#include "bpf_iter_tcp6.skel.h"
+#include "bpf_iter_udp4.skel.h"
+#include "bpf_iter_udp6.skel.h"
#include "bpf_iter_test_kern1.skel.h"
#include "bpf_iter_test_kern2.skel.h"
#include "bpf_iter_test_kern3.skel.h"
#include "bpf_iter_test_kern4.skel.h"
+#include "bpf_iter_bpf_hash_map.skel.h"
+#include "bpf_iter_bpf_percpu_hash_map.skel.h"
+#include "bpf_iter_bpf_array_map.skel.h"
+#include "bpf_iter_bpf_percpu_array_map.skel.h"
+#include "bpf_iter_bpf_sk_storage_map.skel.h"
+#include "bpf_iter_test_kern5.skel.h"
+#include "bpf_iter_test_kern6.skel.h"
static int duration;
@@ -106,6 +118,20 @@ static void test_task(void)
bpf_iter_task__destroy(skel);
}
+static void test_task_stack(void)
+{
+ struct bpf_iter_task_stack *skel;
+
+ skel = bpf_iter_task_stack__open_and_load();
+ if (CHECK(!skel, "bpf_iter_task_stack__open_and_load",
+ "skeleton open_and_load failed\n"))
+ return;
+
+ do_dummy_read(skel->progs.dump_task_stack);
+
+ bpf_iter_task_stack__destroy(skel);
+}
+
static void test_task_file(void)
{
struct bpf_iter_task_file *skel;
@@ -120,6 +146,62 @@ static void test_task_file(void)
bpf_iter_task_file__destroy(skel);
}
+static void test_tcp4(void)
+{
+ struct bpf_iter_tcp4 *skel;
+
+ skel = bpf_iter_tcp4__open_and_load();
+ if (CHECK(!skel, "bpf_iter_tcp4__open_and_load",
+ "skeleton open_and_load failed\n"))
+ return;
+
+ do_dummy_read(skel->progs.dump_tcp4);
+
+ bpf_iter_tcp4__destroy(skel);
+}
+
+static void test_tcp6(void)
+{
+ struct bpf_iter_tcp6 *skel;
+
+ skel = bpf_iter_tcp6__open_and_load();
+ if (CHECK(!skel, "bpf_iter_tcp6__open_and_load",
+ "skeleton open_and_load failed\n"))
+ return;
+
+ do_dummy_read(skel->progs.dump_tcp6);
+
+ bpf_iter_tcp6__destroy(skel);
+}
+
+static void test_udp4(void)
+{
+ struct bpf_iter_udp4 *skel;
+
+ skel = bpf_iter_udp4__open_and_load();
+ if (CHECK(!skel, "bpf_iter_udp4__open_and_load",
+ "skeleton open_and_load failed\n"))
+ return;
+
+ do_dummy_read(skel->progs.dump_udp4);
+
+ bpf_iter_udp4__destroy(skel);
+}
+
+static void test_udp6(void)
+{
+ struct bpf_iter_udp6 *skel;
+
+ skel = bpf_iter_udp6__open_and_load();
+ if (CHECK(!skel, "bpf_iter_udp6__open_and_load",
+ "skeleton open_and_load failed\n"))
+ return;
+
+ do_dummy_read(skel->progs.dump_udp6);
+
+ bpf_iter_udp6__destroy(skel);
+}
+
/* The expected string is less than 16 bytes */
static int do_read_with_fd(int iter_fd, const char *expected,
bool read_one_char)
@@ -380,6 +462,464 @@ out:
bpf_iter_test_kern4__destroy(skel);
}
+static void test_bpf_hash_map(void)
+{
+ __u32 expected_key_a = 0, expected_key_b = 0, expected_key_c = 0;
+ DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ struct bpf_iter_bpf_hash_map *skel;
+ int err, i, len, map_fd, iter_fd;
+ union bpf_iter_link_info linfo;
+ __u64 val, expected_val = 0;
+ struct bpf_link *link;
+ struct key_t {
+ int a;
+ int b;
+ int c;
+ } key;
+ char buf[64];
+
+ skel = bpf_iter_bpf_hash_map__open();
+ if (CHECK(!skel, "bpf_iter_bpf_hash_map__open",
+ "skeleton open failed\n"))
+ return;
+
+ skel->bss->in_test_mode = true;
+
+ err = bpf_iter_bpf_hash_map__load(skel);
+ if (CHECK(!skel, "bpf_iter_bpf_hash_map__load",
+ "skeleton load failed\n"))
+ goto out;
+
+ /* iterator with hashmap2 and hashmap3 should fail */
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.map.map_fd = bpf_map__fd(skel->maps.hashmap2);
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+ link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
+ if (CHECK(!IS_ERR(link), "attach_iter",
+ "attach_iter for hashmap2 unexpected succeeded\n"))
+ goto out;
+
+ linfo.map.map_fd = bpf_map__fd(skel->maps.hashmap3);
+ link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
+ if (CHECK(!IS_ERR(link), "attach_iter",
+ "attach_iter for hashmap3 unexpected succeeded\n"))
+ goto out;
+
+ /* hashmap1 should be good, update map values here */
+ map_fd = bpf_map__fd(skel->maps.hashmap1);
+ for (i = 0; i < bpf_map__max_entries(skel->maps.hashmap1); i++) {
+ key.a = i + 1;
+ key.b = i + 2;
+ key.c = i + 3;
+ val = i + 4;
+ expected_key_a += key.a;
+ expected_key_b += key.b;
+ expected_key_c += key.c;
+ expected_val += val;
+
+ err = bpf_map_update_elem(map_fd, &key, &val, BPF_ANY);
+ if (CHECK(err, "map_update", "map_update failed\n"))
+ goto out;
+ }
+
+ linfo.map.map_fd = map_fd;
+ link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
+ if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ goto out;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+ goto free_link;
+
+ /* do some tests */
+ while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
+ ;
+ if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+ goto close_iter;
+
+ /* test results */
+ if (CHECK(skel->bss->key_sum_a != expected_key_a,
+ "key_sum_a", "got %u expected %u\n",
+ skel->bss->key_sum_a, expected_key_a))
+ goto close_iter;
+ if (CHECK(skel->bss->key_sum_b != expected_key_b,
+ "key_sum_b", "got %u expected %u\n",
+ skel->bss->key_sum_b, expected_key_b))
+ goto close_iter;
+ if (CHECK(skel->bss->val_sum != expected_val,
+ "val_sum", "got %llu expected %llu\n",
+ skel->bss->val_sum, expected_val))
+ goto close_iter;
+
+close_iter:
+ close(iter_fd);
+free_link:
+ bpf_link__destroy(link);
+out:
+ bpf_iter_bpf_hash_map__destroy(skel);
+}
+
+static void test_bpf_percpu_hash_map(void)
+{
+ __u32 expected_key_a = 0, expected_key_b = 0, expected_key_c = 0;
+ DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ struct bpf_iter_bpf_percpu_hash_map *skel;
+ int err, i, j, len, map_fd, iter_fd;
+ union bpf_iter_link_info linfo;
+ __u32 expected_val = 0;
+ struct bpf_link *link;
+ struct key_t {
+ int a;
+ int b;
+ int c;
+ } key;
+ char buf[64];
+ void *val;
+
+ val = malloc(8 * bpf_num_possible_cpus());
+
+ skel = bpf_iter_bpf_percpu_hash_map__open();
+ if (CHECK(!skel, "bpf_iter_bpf_percpu_hash_map__open",
+ "skeleton open failed\n"))
+ return;
+
+ skel->rodata->num_cpus = bpf_num_possible_cpus();
+
+ err = bpf_iter_bpf_percpu_hash_map__load(skel);
+ if (CHECK(!skel, "bpf_iter_bpf_percpu_hash_map__load",
+ "skeleton load failed\n"))
+ goto out;
+
+ /* update map values here */
+ map_fd = bpf_map__fd(skel->maps.hashmap1);
+ for (i = 0; i < bpf_map__max_entries(skel->maps.hashmap1); i++) {
+ key.a = i + 1;
+ key.b = i + 2;
+ key.c = i + 3;
+ expected_key_a += key.a;
+ expected_key_b += key.b;
+ expected_key_c += key.c;
+
+ for (j = 0; j < bpf_num_possible_cpus(); j++) {
+ *(__u32 *)(val + j * 8) = i + j;
+ expected_val += i + j;
+ }
+
+ err = bpf_map_update_elem(map_fd, &key, val, BPF_ANY);
+ if (CHECK(err, "map_update", "map_update failed\n"))
+ goto out;
+ }
+
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.map.map_fd = map_fd;
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+ link = bpf_program__attach_iter(skel->progs.dump_bpf_percpu_hash_map, &opts);
+ if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ goto out;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+ goto free_link;
+
+ /* do some tests */
+ while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
+ ;
+ if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+ goto close_iter;
+
+ /* test results */
+ if (CHECK(skel->bss->key_sum_a != expected_key_a,
+ "key_sum_a", "got %u expected %u\n",
+ skel->bss->key_sum_a, expected_key_a))
+ goto close_iter;
+ if (CHECK(skel->bss->key_sum_b != expected_key_b,
+ "key_sum_b", "got %u expected %u\n",
+ skel->bss->key_sum_b, expected_key_b))
+ goto close_iter;
+ if (CHECK(skel->bss->val_sum != expected_val,
+ "val_sum", "got %u expected %u\n",
+ skel->bss->val_sum, expected_val))
+ goto close_iter;
+
+close_iter:
+ close(iter_fd);
+free_link:
+ bpf_link__destroy(link);
+out:
+ bpf_iter_bpf_percpu_hash_map__destroy(skel);
+}
+
+static void test_bpf_array_map(void)
+{
+ __u64 val, expected_val = 0, res_first_val, first_val = 0;
+ DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ __u32 expected_key = 0, res_first_key;
+ struct bpf_iter_bpf_array_map *skel;
+ union bpf_iter_link_info linfo;
+ int err, i, map_fd, iter_fd;
+ struct bpf_link *link;
+ char buf[64] = {};
+ int len, start;
+
+ skel = bpf_iter_bpf_array_map__open_and_load();
+ if (CHECK(!skel, "bpf_iter_bpf_array_map__open_and_load",
+ "skeleton open_and_load failed\n"))
+ return;
+
+ map_fd = bpf_map__fd(skel->maps.arraymap1);
+ for (i = 0; i < bpf_map__max_entries(skel->maps.arraymap1); i++) {
+ val = i + 4;
+ expected_key += i;
+ expected_val += val;
+
+ if (i == 0)
+ first_val = val;
+
+ err = bpf_map_update_elem(map_fd, &i, &val, BPF_ANY);
+ if (CHECK(err, "map_update", "map_update failed\n"))
+ goto out;
+ }
+
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.map.map_fd = map_fd;
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+ link = bpf_program__attach_iter(skel->progs.dump_bpf_array_map, &opts);
+ if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ goto out;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+ goto free_link;
+
+ /* do some tests */
+ start = 0;
+ while ((len = read(iter_fd, buf + start, sizeof(buf) - start)) > 0)
+ start += len;
+ if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+ goto close_iter;
+
+ /* test results */
+ res_first_key = *(__u32 *)buf;
+ res_first_val = *(__u64 *)(buf + sizeof(__u32));
+ if (CHECK(res_first_key != 0 || res_first_val != first_val,
+ "bpf_seq_write",
+ "seq_write failure: first key %u vs expected 0, "
+ " first value %llu vs expected %llu\n",
+ res_first_key, res_first_val, first_val))
+ goto close_iter;
+
+ if (CHECK(skel->bss->key_sum != expected_key,
+ "key_sum", "got %u expected %u\n",
+ skel->bss->key_sum, expected_key))
+ goto close_iter;
+ if (CHECK(skel->bss->val_sum != expected_val,
+ "val_sum", "got %llu expected %llu\n",
+ skel->bss->val_sum, expected_val))
+ goto close_iter;
+
+ for (i = 0; i < bpf_map__max_entries(skel->maps.arraymap1); i++) {
+ err = bpf_map_lookup_elem(map_fd, &i, &val);
+ if (CHECK(err, "map_lookup", "map_lookup failed\n"))
+ goto out;
+ if (CHECK(i != val, "invalid_val",
+ "got value %llu expected %u\n", val, i))
+ goto out;
+ }
+
+close_iter:
+ close(iter_fd);
+free_link:
+ bpf_link__destroy(link);
+out:
+ bpf_iter_bpf_array_map__destroy(skel);
+}
+
+static void test_bpf_percpu_array_map(void)
+{
+ DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ struct bpf_iter_bpf_percpu_array_map *skel;
+ __u32 expected_key = 0, expected_val = 0;
+ union bpf_iter_link_info linfo;
+ int err, i, j, map_fd, iter_fd;
+ struct bpf_link *link;
+ char buf[64];
+ void *val;
+ int len;
+
+ val = malloc(8 * bpf_num_possible_cpus());
+
+ skel = bpf_iter_bpf_percpu_array_map__open();
+ if (CHECK(!skel, "bpf_iter_bpf_percpu_array_map__open",
+ "skeleton open failed\n"))
+ return;
+
+ skel->rodata->num_cpus = bpf_num_possible_cpus();
+
+ err = bpf_iter_bpf_percpu_array_map__load(skel);
+ if (CHECK(!skel, "bpf_iter_bpf_percpu_array_map__load",
+ "skeleton load failed\n"))
+ goto out;
+
+ /* update map values here */
+ map_fd = bpf_map__fd(skel->maps.arraymap1);
+ for (i = 0; i < bpf_map__max_entries(skel->maps.arraymap1); i++) {
+ expected_key += i;
+
+ for (j = 0; j < bpf_num_possible_cpus(); j++) {
+ *(__u32 *)(val + j * 8) = i + j;
+ expected_val += i + j;
+ }
+
+ err = bpf_map_update_elem(map_fd, &i, val, BPF_ANY);
+ if (CHECK(err, "map_update", "map_update failed\n"))
+ goto out;
+ }
+
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.map.map_fd = map_fd;
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+ link = bpf_program__attach_iter(skel->progs.dump_bpf_percpu_array_map, &opts);
+ if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ goto out;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+ goto free_link;
+
+ /* do some tests */
+ while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
+ ;
+ if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+ goto close_iter;
+
+ /* test results */
+ if (CHECK(skel->bss->key_sum != expected_key,
+ "key_sum", "got %u expected %u\n",
+ skel->bss->key_sum, expected_key))
+ goto close_iter;
+ if (CHECK(skel->bss->val_sum != expected_val,
+ "val_sum", "got %u expected %u\n",
+ skel->bss->val_sum, expected_val))
+ goto close_iter;
+
+close_iter:
+ close(iter_fd);
+free_link:
+ bpf_link__destroy(link);
+out:
+ bpf_iter_bpf_percpu_array_map__destroy(skel);
+}
+
+static void test_bpf_sk_storage_map(void)
+{
+ DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ int err, i, len, map_fd, iter_fd, num_sockets;
+ struct bpf_iter_bpf_sk_storage_map *skel;
+ union bpf_iter_link_info linfo;
+ int sock_fd[3] = {-1, -1, -1};
+ __u32 val, expected_val = 0;
+ struct bpf_link *link;
+ char buf[64];
+
+ skel = bpf_iter_bpf_sk_storage_map__open_and_load();
+ if (CHECK(!skel, "bpf_iter_bpf_sk_storage_map__open_and_load",
+ "skeleton open_and_load failed\n"))
+ return;
+
+ map_fd = bpf_map__fd(skel->maps.sk_stg_map);
+ num_sockets = ARRAY_SIZE(sock_fd);
+ for (i = 0; i < num_sockets; i++) {
+ sock_fd[i] = socket(AF_INET6, SOCK_STREAM, 0);
+ if (CHECK(sock_fd[i] < 0, "socket", "errno: %d\n", errno))
+ goto out;
+
+ val = i + 1;
+ expected_val += val;
+
+ err = bpf_map_update_elem(map_fd, &sock_fd[i], &val,
+ BPF_NOEXIST);
+ if (CHECK(err, "map_update", "map_update failed\n"))
+ goto out;
+ }
+
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.map.map_fd = map_fd;
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+ link = bpf_program__attach_iter(skel->progs.dump_bpf_sk_storage_map, &opts);
+ if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ goto out;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+ goto free_link;
+
+ /* do some tests */
+ while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
+ ;
+ if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+ goto close_iter;
+
+ /* test results */
+ if (CHECK(skel->bss->ipv6_sk_count != num_sockets,
+ "ipv6_sk_count", "got %u expected %u\n",
+ skel->bss->ipv6_sk_count, num_sockets))
+ goto close_iter;
+
+ if (CHECK(skel->bss->val_sum != expected_val,
+ "val_sum", "got %u expected %u\n",
+ skel->bss->val_sum, expected_val))
+ goto close_iter;
+
+close_iter:
+ close(iter_fd);
+free_link:
+ bpf_link__destroy(link);
+out:
+ for (i = 0; i < num_sockets; i++) {
+ if (sock_fd[i] >= 0)
+ close(sock_fd[i]);
+ }
+ bpf_iter_bpf_sk_storage_map__destroy(skel);
+}
+
+static void test_rdonly_buf_out_of_bound(void)
+{
+ DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ struct bpf_iter_test_kern5 *skel;
+ union bpf_iter_link_info linfo;
+ struct bpf_link *link;
+
+ skel = bpf_iter_test_kern5__open_and_load();
+ if (CHECK(!skel, "bpf_iter_test_kern5__open_and_load",
+ "skeleton open_and_load failed\n"))
+ return;
+
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.map.map_fd = bpf_map__fd(skel->maps.hashmap1);
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+ link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
+ if (CHECK(!IS_ERR(link), "attach_iter", "unexpected success\n"))
+ bpf_link__destroy(link);
+
+ bpf_iter_test_kern5__destroy(skel);
+}
+
+static void test_buf_neg_offset(void)
+{
+ struct bpf_iter_test_kern6 *skel;
+
+ skel = bpf_iter_test_kern6__open_and_load();
+ if (CHECK(skel, "bpf_iter_test_kern6__open_and_load",
+ "skeleton open_and_load unexpected success\n"))
+ bpf_iter_test_kern6__destroy(skel);
+}
+
void test_bpf_iter(void)
{
if (test__start_subtest("btf_id_or_null"))
@@ -392,8 +932,18 @@ void test_bpf_iter(void)
test_bpf_map();
if (test__start_subtest("task"))
test_task();
+ if (test__start_subtest("task_stack"))
+ test_task_stack();
if (test__start_subtest("task_file"))
test_task_file();
+ if (test__start_subtest("tcp4"))
+ test_tcp4();
+ if (test__start_subtest("tcp6"))
+ test_tcp6();
+ if (test__start_subtest("udp4"))
+ test_udp4();
+ if (test__start_subtest("udp6"))
+ test_udp6();
if (test__start_subtest("anon"))
test_anon_iter(false);
if (test__start_subtest("anon-read-one-char"))
@@ -406,4 +956,18 @@ void test_bpf_iter(void)
test_overflow(true, false);
if (test__start_subtest("prog-ret-1"))
test_overflow(false, true);
+ if (test__start_subtest("bpf_hash_map"))
+ test_bpf_hash_map();
+ if (test__start_subtest("bpf_percpu_hash_map"))
+ test_bpf_percpu_hash_map();
+ if (test__start_subtest("bpf_array_map"))
+ test_bpf_array_map();
+ if (test__start_subtest("bpf_percpu_array_map"))
+ test_bpf_percpu_array_map();
+ if (test__start_subtest("bpf_sk_storage_map"))
+ test_bpf_sk_storage_map();
+ if (test__start_subtest("rdonly-buf-out-of-bound"))
+ test_rdonly_buf_out_of_bound();
+ if (test__start_subtest("buf-neg-offset"))
+ test_buf_neg_offset();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c b/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c
new file mode 100644
index 000000000000..643dfa35419c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c
@@ -0,0 +1,417 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2020 Google LLC.
+ */
+
+#include <test_progs.h>
+#include <cgroup_helpers.h>
+#include <network_helpers.h>
+
+#include "progs/cg_storage_multi.h"
+
+#include "cg_storage_multi_egress_only.skel.h"
+#include "cg_storage_multi_isolated.skel.h"
+#include "cg_storage_multi_shared.skel.h"
+
+#define PARENT_CGROUP "/cgroup_storage"
+#define CHILD_CGROUP "/cgroup_storage/child"
+
+static int duration;
+
+static bool assert_storage(struct bpf_map *map, const void *key,
+ struct cgroup_value *expected)
+{
+ struct cgroup_value value;
+ int map_fd;
+
+ map_fd = bpf_map__fd(map);
+
+ if (CHECK(bpf_map_lookup_elem(map_fd, key, &value) < 0,
+ "map-lookup", "errno %d", errno))
+ return true;
+ if (CHECK(memcmp(&value, expected, sizeof(struct cgroup_value)),
+ "assert-storage", "storages differ"))
+ return true;
+
+ return false;
+}
+
+static bool assert_storage_noexist(struct bpf_map *map, const void *key)
+{
+ struct cgroup_value value;
+ int map_fd;
+
+ map_fd = bpf_map__fd(map);
+
+ if (CHECK(bpf_map_lookup_elem(map_fd, key, &value) == 0,
+ "map-lookup", "succeeded, expected ENOENT"))
+ return true;
+ if (CHECK(errno != ENOENT,
+ "map-lookup", "errno %d, expected ENOENT", errno))
+ return true;
+
+ return false;
+}
+
+static bool connect_send(const char *cgroup_path)
+{
+ bool res = true;
+ int server_fd = -1, client_fd = -1;
+
+ if (join_cgroup(cgroup_path))
+ goto out_clean;
+
+ server_fd = start_server(AF_INET, SOCK_DGRAM, NULL, 0, 0);
+ if (server_fd < 0)
+ goto out_clean;
+
+ client_fd = connect_to_fd(server_fd, 0);
+ if (client_fd < 0)
+ goto out_clean;
+
+ if (send(client_fd, "message", strlen("message"), 0) < 0)
+ goto out_clean;
+
+ res = false;
+
+out_clean:
+ close(client_fd);
+ close(server_fd);
+ return res;
+}
+
+static void test_egress_only(int parent_cgroup_fd, int child_cgroup_fd)
+{
+ struct cg_storage_multi_egress_only *obj;
+ struct cgroup_value expected_cgroup_value;
+ struct bpf_cgroup_storage_key key;
+ struct bpf_link *parent_link = NULL, *child_link = NULL;
+ bool err;
+
+ key.attach_type = BPF_CGROUP_INET_EGRESS;
+
+ obj = cg_storage_multi_egress_only__open_and_load();
+ if (CHECK(!obj, "skel-load", "errno %d", errno))
+ return;
+
+ /* Attach to parent cgroup, trigger packet from child.
+ * Assert that there is only one run and in that run the storage is
+ * parent cgroup's storage.
+ * Also assert that child cgroup's storage does not exist
+ */
+ parent_link = bpf_program__attach_cgroup(obj->progs.egress,
+ parent_cgroup_fd);
+ if (CHECK(IS_ERR(parent_link), "parent-cg-attach",
+ "err %ld", PTR_ERR(parent_link)))
+ goto close_bpf_object;
+ err = connect_send(CHILD_CGROUP);
+ if (CHECK(err, "first-connect-send", "errno %d", errno))
+ goto close_bpf_object;
+ if (CHECK(obj->bss->invocations != 1,
+ "first-invoke", "invocations=%d", obj->bss->invocations))
+ goto close_bpf_object;
+ key.cgroup_inode_id = get_cgroup_id(PARENT_CGROUP);
+ expected_cgroup_value = (struct cgroup_value) { .egress_pkts = 1 };
+ if (assert_storage(obj->maps.cgroup_storage,
+ &key, &expected_cgroup_value))
+ goto close_bpf_object;
+ key.cgroup_inode_id = get_cgroup_id(CHILD_CGROUP);
+ if (assert_storage_noexist(obj->maps.cgroup_storage, &key))
+ goto close_bpf_object;
+
+ /* Attach to parent and child cgroup, trigger packet from child.
+ * Assert that there are two additional runs, one that run with parent
+ * cgroup's storage and one with child cgroup's storage.
+ */
+ child_link = bpf_program__attach_cgroup(obj->progs.egress,
+ child_cgroup_fd);
+ if (CHECK(IS_ERR(child_link), "child-cg-attach",
+ "err %ld", PTR_ERR(child_link)))
+ goto close_bpf_object;
+ err = connect_send(CHILD_CGROUP);
+ if (CHECK(err, "second-connect-send", "errno %d", errno))
+ goto close_bpf_object;
+ if (CHECK(obj->bss->invocations != 3,
+ "second-invoke", "invocations=%d", obj->bss->invocations))
+ goto close_bpf_object;
+ key.cgroup_inode_id = get_cgroup_id(PARENT_CGROUP);
+ expected_cgroup_value = (struct cgroup_value) { .egress_pkts = 2 };
+ if (assert_storage(obj->maps.cgroup_storage,
+ &key, &expected_cgroup_value))
+ goto close_bpf_object;
+ key.cgroup_inode_id = get_cgroup_id(CHILD_CGROUP);
+ expected_cgroup_value = (struct cgroup_value) { .egress_pkts = 1 };
+ if (assert_storage(obj->maps.cgroup_storage,
+ &key, &expected_cgroup_value))
+ goto close_bpf_object;
+
+close_bpf_object:
+ if (!IS_ERR(parent_link))
+ bpf_link__destroy(parent_link);
+ if (!IS_ERR(child_link))
+ bpf_link__destroy(child_link);
+
+ cg_storage_multi_egress_only__destroy(obj);
+}
+
+static void test_isolated(int parent_cgroup_fd, int child_cgroup_fd)
+{
+ struct cg_storage_multi_isolated *obj;
+ struct cgroup_value expected_cgroup_value;
+ struct bpf_cgroup_storage_key key;
+ struct bpf_link *parent_egress1_link = NULL, *parent_egress2_link = NULL;
+ struct bpf_link *child_egress1_link = NULL, *child_egress2_link = NULL;
+ struct bpf_link *parent_ingress_link = NULL, *child_ingress_link = NULL;
+ bool err;
+
+ obj = cg_storage_multi_isolated__open_and_load();
+ if (CHECK(!obj, "skel-load", "errno %d", errno))
+ return;
+
+ /* Attach to parent cgroup, trigger packet from child.
+ * Assert that there is three runs, two with parent cgroup egress and
+ * one with parent cgroup ingress, stored in separate parent storages.
+ * Also assert that child cgroup's storages does not exist
+ */
+ parent_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1,
+ parent_cgroup_fd);
+ if (CHECK(IS_ERR(parent_egress1_link), "parent-egress1-cg-attach",
+ "err %ld", PTR_ERR(parent_egress1_link)))
+ goto close_bpf_object;
+ parent_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2,
+ parent_cgroup_fd);
+ if (CHECK(IS_ERR(parent_egress2_link), "parent-egress2-cg-attach",
+ "err %ld", PTR_ERR(parent_egress2_link)))
+ goto close_bpf_object;
+ parent_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress,
+ parent_cgroup_fd);
+ if (CHECK(IS_ERR(parent_ingress_link), "parent-ingress-cg-attach",
+ "err %ld", PTR_ERR(parent_ingress_link)))
+ goto close_bpf_object;
+ err = connect_send(CHILD_CGROUP);
+ if (CHECK(err, "first-connect-send", "errno %d", errno))
+ goto close_bpf_object;
+ if (CHECK(obj->bss->invocations != 3,
+ "first-invoke", "invocations=%d", obj->bss->invocations))
+ goto close_bpf_object;
+ key.cgroup_inode_id = get_cgroup_id(PARENT_CGROUP);
+ key.attach_type = BPF_CGROUP_INET_EGRESS;
+ expected_cgroup_value = (struct cgroup_value) { .egress_pkts = 2 };
+ if (assert_storage(obj->maps.cgroup_storage,
+ &key, &expected_cgroup_value))
+ goto close_bpf_object;
+ key.attach_type = BPF_CGROUP_INET_INGRESS;
+ expected_cgroup_value = (struct cgroup_value) { .ingress_pkts = 1 };
+ if (assert_storage(obj->maps.cgroup_storage,
+ &key, &expected_cgroup_value))
+ goto close_bpf_object;
+ key.cgroup_inode_id = get_cgroup_id(CHILD_CGROUP);
+ key.attach_type = BPF_CGROUP_INET_EGRESS;
+ if (assert_storage_noexist(obj->maps.cgroup_storage, &key))
+ goto close_bpf_object;
+ key.attach_type = BPF_CGROUP_INET_INGRESS;
+ if (assert_storage_noexist(obj->maps.cgroup_storage, &key))
+ goto close_bpf_object;
+
+ /* Attach to parent and child cgroup, trigger packet from child.
+ * Assert that there is six additional runs, parent cgroup egresses and
+ * ingress, child cgroup egresses and ingress.
+ * Assert that egree and ingress storages are separate.
+ */
+ child_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1,
+ child_cgroup_fd);
+ if (CHECK(IS_ERR(child_egress1_link), "child-egress1-cg-attach",
+ "err %ld", PTR_ERR(child_egress1_link)))
+ goto close_bpf_object;
+ child_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2,
+ child_cgroup_fd);
+ if (CHECK(IS_ERR(child_egress2_link), "child-egress2-cg-attach",
+ "err %ld", PTR_ERR(child_egress2_link)))
+ goto close_bpf_object;
+ child_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress,
+ child_cgroup_fd);
+ if (CHECK(IS_ERR(child_ingress_link), "child-ingress-cg-attach",
+ "err %ld", PTR_ERR(child_ingress_link)))
+ goto close_bpf_object;
+ err = connect_send(CHILD_CGROUP);
+ if (CHECK(err, "second-connect-send", "errno %d", errno))
+ goto close_bpf_object;
+ if (CHECK(obj->bss->invocations != 9,
+ "second-invoke", "invocations=%d", obj->bss->invocations))
+ goto close_bpf_object;
+ key.cgroup_inode_id = get_cgroup_id(PARENT_CGROUP);
+ key.attach_type = BPF_CGROUP_INET_EGRESS;
+ expected_cgroup_value = (struct cgroup_value) { .egress_pkts = 4 };
+ if (assert_storage(obj->maps.cgroup_storage,
+ &key, &expected_cgroup_value))
+ goto close_bpf_object;
+ key.attach_type = BPF_CGROUP_INET_INGRESS;
+ expected_cgroup_value = (struct cgroup_value) { .ingress_pkts = 2 };
+ if (assert_storage(obj->maps.cgroup_storage,
+ &key, &expected_cgroup_value))
+ goto close_bpf_object;
+ key.cgroup_inode_id = get_cgroup_id(CHILD_CGROUP);
+ key.attach_type = BPF_CGROUP_INET_EGRESS;
+ expected_cgroup_value = (struct cgroup_value) { .egress_pkts = 2 };
+ if (assert_storage(obj->maps.cgroup_storage,
+ &key, &expected_cgroup_value))
+ goto close_bpf_object;
+ key.attach_type = BPF_CGROUP_INET_INGRESS;
+ expected_cgroup_value = (struct cgroup_value) { .ingress_pkts = 1 };
+ if (assert_storage(obj->maps.cgroup_storage,
+ &key, &expected_cgroup_value))
+ goto close_bpf_object;
+
+close_bpf_object:
+ if (!IS_ERR(parent_egress1_link))
+ bpf_link__destroy(parent_egress1_link);
+ if (!IS_ERR(parent_egress2_link))
+ bpf_link__destroy(parent_egress2_link);
+ if (!IS_ERR(parent_ingress_link))
+ bpf_link__destroy(parent_ingress_link);
+ if (!IS_ERR(child_egress1_link))
+ bpf_link__destroy(child_egress1_link);
+ if (!IS_ERR(child_egress2_link))
+ bpf_link__destroy(child_egress2_link);
+ if (!IS_ERR(child_ingress_link))
+ bpf_link__destroy(child_ingress_link);
+
+ cg_storage_multi_isolated__destroy(obj);
+}
+
+static void test_shared(int parent_cgroup_fd, int child_cgroup_fd)
+{
+ struct cg_storage_multi_shared *obj;
+ struct cgroup_value expected_cgroup_value;
+ __u64 key;
+ struct bpf_link *parent_egress1_link = NULL, *parent_egress2_link = NULL;
+ struct bpf_link *child_egress1_link = NULL, *child_egress2_link = NULL;
+ struct bpf_link *parent_ingress_link = NULL, *child_ingress_link = NULL;
+ bool err;
+
+ obj = cg_storage_multi_shared__open_and_load();
+ if (CHECK(!obj, "skel-load", "errno %d", errno))
+ return;
+
+ /* Attach to parent cgroup, trigger packet from child.
+ * Assert that there is three runs, two with parent cgroup egress and
+ * one with parent cgroup ingress.
+ * Also assert that child cgroup's storage does not exist
+ */
+ parent_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1,
+ parent_cgroup_fd);
+ if (CHECK(IS_ERR(parent_egress1_link), "parent-egress1-cg-attach",
+ "err %ld", PTR_ERR(parent_egress1_link)))
+ goto close_bpf_object;
+ parent_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2,
+ parent_cgroup_fd);
+ if (CHECK(IS_ERR(parent_egress2_link), "parent-egress2-cg-attach",
+ "err %ld", PTR_ERR(parent_egress2_link)))
+ goto close_bpf_object;
+ parent_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress,
+ parent_cgroup_fd);
+ if (CHECK(IS_ERR(parent_ingress_link), "parent-ingress-cg-attach",
+ "err %ld", PTR_ERR(parent_ingress_link)))
+ goto close_bpf_object;
+ err = connect_send(CHILD_CGROUP);
+ if (CHECK(err, "first-connect-send", "errno %d", errno))
+ goto close_bpf_object;
+ if (CHECK(obj->bss->invocations != 3,
+ "first-invoke", "invocations=%d", obj->bss->invocations))
+ goto close_bpf_object;
+ key = get_cgroup_id(PARENT_CGROUP);
+ expected_cgroup_value = (struct cgroup_value) {
+ .egress_pkts = 2,
+ .ingress_pkts = 1,
+ };
+ if (assert_storage(obj->maps.cgroup_storage,
+ &key, &expected_cgroup_value))
+ goto close_bpf_object;
+ key = get_cgroup_id(CHILD_CGROUP);
+ if (assert_storage_noexist(obj->maps.cgroup_storage, &key))
+ goto close_bpf_object;
+
+ /* Attach to parent and child cgroup, trigger packet from child.
+ * Assert that there is six additional runs, parent cgroup egresses and
+ * ingress, child cgroup egresses and ingress.
+ */
+ child_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1,
+ child_cgroup_fd);
+ if (CHECK(IS_ERR(child_egress1_link), "child-egress1-cg-attach",
+ "err %ld", PTR_ERR(child_egress1_link)))
+ goto close_bpf_object;
+ child_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2,
+ child_cgroup_fd);
+ if (CHECK(IS_ERR(child_egress2_link), "child-egress2-cg-attach",
+ "err %ld", PTR_ERR(child_egress2_link)))
+ goto close_bpf_object;
+ child_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress,
+ child_cgroup_fd);
+ if (CHECK(IS_ERR(child_ingress_link), "child-ingress-cg-attach",
+ "err %ld", PTR_ERR(child_ingress_link)))
+ goto close_bpf_object;
+ err = connect_send(CHILD_CGROUP);
+ if (CHECK(err, "second-connect-send", "errno %d", errno))
+ goto close_bpf_object;
+ if (CHECK(obj->bss->invocations != 9,
+ "second-invoke", "invocations=%d", obj->bss->invocations))
+ goto close_bpf_object;
+ key = get_cgroup_id(PARENT_CGROUP);
+ expected_cgroup_value = (struct cgroup_value) {
+ .egress_pkts = 4,
+ .ingress_pkts = 2,
+ };
+ if (assert_storage(obj->maps.cgroup_storage,
+ &key, &expected_cgroup_value))
+ goto close_bpf_object;
+ key = get_cgroup_id(CHILD_CGROUP);
+ expected_cgroup_value = (struct cgroup_value) {
+ .egress_pkts = 2,
+ .ingress_pkts = 1,
+ };
+ if (assert_storage(obj->maps.cgroup_storage,
+ &key, &expected_cgroup_value))
+ goto close_bpf_object;
+
+close_bpf_object:
+ if (!IS_ERR(parent_egress1_link))
+ bpf_link__destroy(parent_egress1_link);
+ if (!IS_ERR(parent_egress2_link))
+ bpf_link__destroy(parent_egress2_link);
+ if (!IS_ERR(parent_ingress_link))
+ bpf_link__destroy(parent_ingress_link);
+ if (!IS_ERR(child_egress1_link))
+ bpf_link__destroy(child_egress1_link);
+ if (!IS_ERR(child_egress2_link))
+ bpf_link__destroy(child_egress2_link);
+ if (!IS_ERR(child_ingress_link))
+ bpf_link__destroy(child_ingress_link);
+
+ cg_storage_multi_shared__destroy(obj);
+}
+
+void test_cg_storage_multi(void)
+{
+ int parent_cgroup_fd = -1, child_cgroup_fd = -1;
+
+ parent_cgroup_fd = test__join_cgroup(PARENT_CGROUP);
+ if (CHECK(parent_cgroup_fd < 0, "cg-create-parent", "errno %d", errno))
+ goto close_cgroup_fd;
+ child_cgroup_fd = create_and_get_cgroup(CHILD_CGROUP);
+ if (CHECK(child_cgroup_fd < 0, "cg-create-child", "errno %d", errno))
+ goto close_cgroup_fd;
+
+ if (test__start_subtest("egress_only"))
+ test_egress_only(parent_cgroup_fd, child_cgroup_fd);
+
+ if (test__start_subtest("isolated"))
+ test_isolated(parent_cgroup_fd, child_cgroup_fd);
+
+ if (test__start_subtest("shared"))
+ test_shared(parent_cgroup_fd, child_cgroup_fd);
+
+close_cgroup_fd:
+ close(child_cgroup_fd);
+ close(parent_cgroup_fd);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_link.c b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c
index 6e04f8d1d15b..4d9b514b3fd9 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_link.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c
@@ -2,6 +2,7 @@
#include <test_progs.h>
#include "cgroup_helpers.h"
+#include "testing_helpers.h"
#include "test_cgroup_link.skel.h"
static __u32 duration = 0;
@@ -37,7 +38,8 @@ void test_cgroup_link(void)
int last_cg = ARRAY_SIZE(cgs) - 1, cg_nr = ARRAY_SIZE(cgs);
DECLARE_LIBBPF_OPTS(bpf_link_update_opts, link_upd_opts);
struct bpf_link *links[ARRAY_SIZE(cgs)] = {}, *tmp_link;
- __u32 prog_ids[ARRAY_SIZE(cgs)], prog_cnt = 0, attach_flags;
+ __u32 prog_ids[ARRAY_SIZE(cgs)], prog_cnt = 0, attach_flags, prog_id;
+ struct bpf_link_info info;
int i = 0, err, prog_fd;
bool detach_legacy = false;
@@ -219,6 +221,22 @@ void test_cgroup_link(void)
/* BPF programs should still get called */
ping_and_check(0, cg_nr);
+ prog_id = link_info_prog_id(links[0], &info);
+ CHECK(prog_id == 0, "link_info", "failed\n");
+ CHECK(info.cgroup.cgroup_id == 0, "cgroup_id", "unexpected %llu\n", info.cgroup.cgroup_id);
+
+ err = bpf_link__detach(links[0]);
+ if (CHECK(err, "link_detach", "failed %d\n", err))
+ goto cleanup;
+
+ /* cgroup_id should be zero in link_info */
+ prog_id = link_info_prog_id(links[0], &info);
+ CHECK(prog_id == 0, "link_info", "failed\n");
+ CHECK(info.cgroup.cgroup_id != 0, "cgroup_id", "unexpected %llu\n", info.cgroup.cgroup_id);
+
+ /* First BPF program shouldn't be called anymore */
+ ping_and_check(0, cg_nr - 1);
+
/* leave cgroup and remove them, don't detach programs */
cleanup_cgroup_environment();
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c
index 059047af7df3..464edc1c1708 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c
@@ -13,7 +13,7 @@ static void run_lookup_test(__u16 *g_serv_port, int out_sk)
socklen_t addr_len = sizeof(addr);
__u32 duration = 0;
- serv_sk = start_server(AF_INET6, SOCK_STREAM);
+ serv_sk = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0);
if (CHECK(serv_sk < 0, "start_server", "failed to start server\n"))
return;
@@ -24,17 +24,13 @@ static void run_lookup_test(__u16 *g_serv_port, int out_sk)
*g_serv_port = addr.sin6_port;
/* Client outside of test cgroup should fail to connect by timeout. */
- err = connect_fd_to_fd(out_sk, serv_sk);
+ err = connect_fd_to_fd(out_sk, serv_sk, 1000);
if (CHECK(!err || errno != EINPROGRESS, "connect_fd_to_fd",
"unexpected result err %d errno %d\n", err, errno))
goto cleanup;
- err = connect_wait(out_sk);
- if (CHECK(err, "connect_wait", "unexpected result %d\n", err))
- goto cleanup;
-
/* Client inside test cgroup should connect just fine. */
- in_sk = connect_to_fd(AF_INET6, SOCK_STREAM, serv_sk);
+ in_sk = connect_to_fd(serv_sk, 0);
if (CHECK(in_sk < 0, "connect_to_fd", "errno %d\n", errno))
goto cleanup;
@@ -85,7 +81,7 @@ void test_cgroup_skb_sk_lookup(void)
* differs from that of testing cgroup. Moving selftests process to
* testing cgroup won't change cgroup id of an already created socket.
*/
- out_sk = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, 0);
+ out_sk = socket(AF_INET6, SOCK_STREAM, 0);
if (CHECK_FAIL(out_sk < 0))
return;
diff --git a/tools/testing/selftests/bpf/prog_tests/connect_force_port.c b/tools/testing/selftests/bpf/prog_tests/connect_force_port.c
index 17bbf76812ca..9229db2f5ca5 100644
--- a/tools/testing/selftests/bpf/prog_tests/connect_force_port.c
+++ b/tools/testing/selftests/bpf/prog_tests/connect_force_port.c
@@ -114,7 +114,7 @@ static int run_test(int cgroup_fd, int server_fd, int family, int type)
goto close_bpf_object;
}
- fd = connect_to_fd(family, type, server_fd);
+ fd = connect_to_fd(server_fd, 0);
if (fd < 0) {
err = -1;
goto close_bpf_object;
@@ -137,25 +137,25 @@ void test_connect_force_port(void)
if (CHECK_FAIL(cgroup_fd < 0))
return;
- server_fd = start_server_with_port(AF_INET, SOCK_STREAM, 60123);
+ server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 60123, 0);
if (CHECK_FAIL(server_fd < 0))
goto close_cgroup_fd;
CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET, SOCK_STREAM));
close(server_fd);
- server_fd = start_server_with_port(AF_INET6, SOCK_STREAM, 60124);
+ server_fd = start_server(AF_INET6, SOCK_STREAM, NULL, 60124, 0);
if (CHECK_FAIL(server_fd < 0))
goto close_cgroup_fd;
CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET6, SOCK_STREAM));
close(server_fd);
- server_fd = start_server_with_port(AF_INET, SOCK_DGRAM, 60123);
+ server_fd = start_server(AF_INET, SOCK_DGRAM, NULL, 60123, 0);
if (CHECK_FAIL(server_fd < 0))
goto close_cgroup_fd;
CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET, SOCK_DGRAM));
close(server_fd);
- server_fd = start_server_with_port(AF_INET6, SOCK_DGRAM, 60124);
+ server_fd = start_server(AF_INET6, SOCK_DGRAM, NULL, 60124, 0);
if (CHECK_FAIL(server_fd < 0))
goto close_cgroup_fd;
CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET6, SOCK_DGRAM));
diff --git a/tools/testing/selftests/bpf/prog_tests/core_retro.c b/tools/testing/selftests/bpf/prog_tests/core_retro.c
new file mode 100644
index 000000000000..6acb0e94d4d7
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/core_retro.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+#define _GNU_SOURCE
+#include <test_progs.h>
+#include "test_core_retro.skel.h"
+
+void test_core_retro(void)
+{
+ int err, zero = 0, res, duration = 0, my_pid = getpid();
+ struct test_core_retro *skel;
+
+ /* load program */
+ skel = test_core_retro__open_and_load();
+ if (CHECK(!skel, "skel_load", "skeleton open/load failed\n"))
+ goto out_close;
+
+ err = bpf_map_update_elem(bpf_map__fd(skel->maps.exp_tgid_map), &zero, &my_pid, 0);
+ if (CHECK(err, "map_update", "failed to set expected PID: %d\n", errno))
+ goto out_close;
+
+ /* attach probe */
+ err = test_core_retro__attach(skel);
+ if (CHECK(err, "attach_kprobe", "err %d\n", err))
+ goto out_close;
+
+ /* trigger */
+ usleep(1);
+
+ err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.results), &zero, &res);
+ if (CHECK(err, "map_lookup", "failed to lookup result: %d\n", errno))
+ goto out_close;
+
+ CHECK(res != my_pid, "pid_check", "got %d != exp %d\n", res, my_pid);
+
+out_close:
+ test_core_retro__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/endian.c b/tools/testing/selftests/bpf/prog_tests/endian.c
new file mode 100644
index 000000000000..1a11612ace6c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/endian.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <test_progs.h>
+#include "test_endian.skel.h"
+
+static int duration;
+
+#define IN16 0x1234
+#define IN32 0x12345678U
+#define IN64 0x123456789abcdef0ULL
+
+#define OUT16 0x3412
+#define OUT32 0x78563412U
+#define OUT64 0xf0debc9a78563412ULL
+
+void test_endian(void)
+{
+ struct test_endian* skel;
+ struct test_endian__bss *bss;
+ int err;
+
+ skel = test_endian__open_and_load();
+ if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+ return;
+ bss = skel->bss;
+
+ bss->in16 = IN16;
+ bss->in32 = IN32;
+ bss->in64 = IN64;
+
+ err = test_endian__attach(skel);
+ if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+ goto cleanup;
+
+ usleep(1);
+
+ CHECK(bss->out16 != OUT16, "out16", "got 0x%llx != exp 0x%llx\n",
+ (__u64)bss->out16, (__u64)OUT16);
+ CHECK(bss->out32 != OUT32, "out32", "got 0x%llx != exp 0x%llx\n",
+ (__u64)bss->out32, (__u64)OUT32);
+ CHECK(bss->out64 != OUT64, "out16", "got 0x%llx != exp 0x%llx\n",
+ (__u64)bss->out64, (__u64)OUT64);
+
+ CHECK(bss->const16 != OUT16, "const16", "got 0x%llx != exp 0x%llx\n",
+ (__u64)bss->const16, (__u64)OUT16);
+ CHECK(bss->const32 != OUT32, "const32", "got 0x%llx != exp 0x%llx\n",
+ (__u64)bss->const32, (__u64)OUT32);
+ CHECK(bss->const64 != OUT64, "const64", "got 0x%llx != exp 0x%llx\n",
+ (__u64)bss->const64, (__u64)OUT64);
+cleanup:
+ test_endian__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c b/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c
new file mode 100644
index 000000000000..d884b2ed5bc5
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+#include <test_progs.h>
+#include "test_stacktrace_build_id.skel.h"
+
+void test_get_stackid_cannot_attach(void)
+{
+ struct perf_event_attr attr = {
+ /* .type = PERF_TYPE_SOFTWARE, */
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_CPU_CYCLES,
+ .precise_ip = 1,
+ .sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_BRANCH_STACK,
+ .branch_sample_type = PERF_SAMPLE_BRANCH_USER |
+ PERF_SAMPLE_BRANCH_NO_FLAGS |
+ PERF_SAMPLE_BRANCH_NO_CYCLES |
+ PERF_SAMPLE_BRANCH_CALL_STACK,
+ .sample_period = 5000,
+ .size = sizeof(struct perf_event_attr),
+ };
+ struct test_stacktrace_build_id *skel;
+ __u32 duration = 0;
+ int pmu_fd, err;
+
+ skel = test_stacktrace_build_id__open();
+ if (CHECK(!skel, "skel_open", "skeleton open failed\n"))
+ return;
+
+ /* override program type */
+ bpf_program__set_perf_event(skel->progs.oncpu);
+
+ err = test_stacktrace_build_id__load(skel);
+ if (CHECK(err, "skel_load", "skeleton load failed: %d\n", err))
+ goto cleanup;
+
+ pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
+ 0 /* cpu 0 */, -1 /* group id */,
+ 0 /* flags */);
+ if (pmu_fd < 0 && (errno == ENOENT || errno == EOPNOTSUPP)) {
+ printf("%s:SKIP:cannot open PERF_COUNT_HW_CPU_CYCLES with precise_ip > 0\n",
+ __func__);
+ test__skip();
+ goto cleanup;
+ }
+ if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n",
+ pmu_fd, errno))
+ goto cleanup;
+
+ skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
+ pmu_fd);
+ CHECK(!IS_ERR(skel->links.oncpu), "attach_perf_event_no_callchain",
+ "should have failed\n");
+ close(pmu_fd);
+
+ /* add PERF_SAMPLE_CALLCHAIN, attach should succeed */
+ attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
+
+ pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
+ 0 /* cpu 0 */, -1 /* group id */,
+ 0 /* flags */);
+
+ if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n",
+ pmu_fd, errno))
+ goto cleanup;
+
+ skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
+ pmu_fd);
+ CHECK(IS_ERR(skel->links.oncpu), "attach_perf_event_callchain",
+ "err: %ld\n", PTR_ERR(skel->links.oncpu));
+ close(pmu_fd);
+
+ /* add exclude_callchain_kernel, attach should fail */
+ attr.exclude_callchain_kernel = 1;
+
+ pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
+ 0 /* cpu 0 */, -1 /* group id */,
+ 0 /* flags */);
+
+ if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n",
+ pmu_fd, errno))
+ goto cleanup;
+
+ skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
+ pmu_fd);
+ CHECK(!IS_ERR(skel->links.oncpu), "attach_perf_event_exclude_callchain_kernel",
+ "should have failed\n");
+ close(pmu_fd);
+
+cleanup:
+ test_stacktrace_build_id__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms.c b/tools/testing/selftests/bpf/prog_tests/ksyms.c
new file mode 100644
index 000000000000..e3d6777226a8
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/ksyms.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+
+#include <test_progs.h>
+#include "test_ksyms.skel.h"
+#include <sys/stat.h>
+
+static int duration;
+
+static __u64 kallsyms_find(const char *sym)
+{
+ char type, name[500];
+ __u64 addr, res = 0;
+ FILE *f;
+
+ f = fopen("/proc/kallsyms", "r");
+ if (CHECK(!f, "kallsyms_fopen", "failed to open: %d\n", errno))
+ return 0;
+
+ while (fscanf(f, "%llx %c %499s%*[^\n]\n", &addr, &type, name) > 0) {
+ if (strcmp(name, sym) == 0) {
+ res = addr;
+ goto out;
+ }
+ }
+
+ CHECK(false, "not_found", "symbol %s not found\n", sym);
+out:
+ fclose(f);
+ return res;
+}
+
+void test_ksyms(void)
+{
+ __u64 link_fops_addr = kallsyms_find("bpf_link_fops");
+ const char *btf_path = "/sys/kernel/btf/vmlinux";
+ struct test_ksyms *skel;
+ struct test_ksyms__data *data;
+ struct stat st;
+ __u64 btf_size;
+ int err;
+
+ if (CHECK(stat(btf_path, &st), "stat_btf", "err %d\n", errno))
+ return;
+ btf_size = st.st_size;
+
+ skel = test_ksyms__open_and_load();
+ if (CHECK(!skel, "skel_open", "failed to open and load skeleton\n"))
+ return;
+
+ err = test_ksyms__attach(skel);
+ if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+ goto cleanup;
+
+ /* trigger tracepoint */
+ usleep(1);
+
+ data = skel->data;
+ CHECK(data->out__bpf_link_fops != link_fops_addr, "bpf_link_fops",
+ "got 0x%llx, exp 0x%llx\n",
+ data->out__bpf_link_fops, link_fops_addr);
+ CHECK(data->out__bpf_link_fops1 != 0, "bpf_link_fops1",
+ "got %llu, exp %llu\n", data->out__bpf_link_fops1, (__u64)0);
+ CHECK(data->out__btf_size != btf_size, "btf_size",
+ "got %llu, exp %llu\n", data->out__btf_size, btf_size);
+ CHECK(data->out__per_cpu_start != 0, "__per_cpu_start",
+ "got %llu, exp %llu\n", data->out__per_cpu_start, (__u64)0);
+
+cleanup:
+ test_ksyms__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c b/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c
index c1168e4a9036..5a2a689dbb68 100644
--- a/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c
+++ b/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c
@@ -23,7 +23,7 @@ void test_load_bytes_relative(void)
if (CHECK_FAIL(cgroup_fd < 0))
return;
- server_fd = start_server(AF_INET, SOCK_STREAM);
+ server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0);
if (CHECK_FAIL(server_fd < 0))
goto close_cgroup_fd;
@@ -49,7 +49,7 @@ void test_load_bytes_relative(void)
if (CHECK_FAIL(err))
goto close_bpf_object;
- client_fd = connect_to_fd(AF_INET, SOCK_STREAM, server_fd);
+ client_fd = connect_to_fd(server_fd, 0);
if (CHECK_FAIL(client_fd < 0))
goto close_bpf_object;
close(client_fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/map_ptr.c b/tools/testing/selftests/bpf/prog_tests/map_ptr.c
new file mode 100644
index 000000000000..c230a573c373
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/map_ptr.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <test_progs.h>
+#include <network_helpers.h>
+
+#include "map_ptr_kern.skel.h"
+
+void test_map_ptr(void)
+{
+ struct map_ptr_kern *skel;
+ __u32 duration = 0, retval;
+ char buf[128];
+ int err;
+
+ skel = map_ptr_kern__open_and_load();
+ if (CHECK(!skel, "skel_open_load", "open_load failed\n"))
+ return;
+
+ err = bpf_prog_test_run(bpf_program__fd(skel->progs.cg_skb), 1, &pkt_v4,
+ sizeof(pkt_v4), buf, NULL, &retval, NULL);
+
+ if (CHECK(err, "test_run", "err=%d errno=%d\n", err, errno))
+ goto cleanup;
+
+ if (CHECK(!retval, "retval", "retval=%d map_type=%u line=%u\n", retval,
+ skel->bss->g_map_type, skel->bss->g_line))
+ goto cleanup;
+
+cleanup:
+ map_ptr_kern__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
index a122ce3b360e..c33ec180b3f2 100644
--- a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
+++ b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
@@ -4,6 +4,7 @@
#include <sched.h>
#include <sys/socket.h>
#include <test_progs.h>
+#include "test_perf_buffer.skel.h"
#include "bpf/libbpf_internal.h"
/* AddressSanitizer sometimes crashes due to data dereference below, due to
@@ -25,16 +26,11 @@ static void on_sample(void *ctx, int cpu, void *data, __u32 size)
void test_perf_buffer(void)
{
- int err, prog_fd, on_len, nr_on_cpus = 0, nr_cpus, i, duration = 0;
- const char *prog_name = "kprobe/sys_nanosleep";
- const char *file = "./test_perf_buffer.o";
+ int err, on_len, nr_on_cpus = 0, nr_cpus, i, duration = 0;
struct perf_buffer_opts pb_opts = {};
- struct bpf_map *perf_buf_map;
+ struct test_perf_buffer *skel;
cpu_set_t cpu_set, cpu_seen;
- struct bpf_program *prog;
- struct bpf_object *obj;
struct perf_buffer *pb;
- struct bpf_link *link;
bool *online;
nr_cpus = libbpf_num_possible_cpus();
@@ -51,33 +47,21 @@ void test_perf_buffer(void)
nr_on_cpus++;
/* load program */
- err = bpf_prog_load(file, BPF_PROG_TYPE_KPROBE, &obj, &prog_fd);
- if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno)) {
- obj = NULL;
- goto out_close;
- }
-
- prog = bpf_object__find_program_by_title(obj, prog_name);
- if (CHECK(!prog, "find_probe", "prog '%s' not found\n", prog_name))
+ skel = test_perf_buffer__open_and_load();
+ if (CHECK(!skel, "skel_load", "skeleton open/load failed\n"))
goto out_close;
- /* load map */
- perf_buf_map = bpf_object__find_map_by_name(obj, "perf_buf_map");
- if (CHECK(!perf_buf_map, "find_perf_buf_map", "not found\n"))
- goto out_close;
-
- /* attach kprobe */
- link = bpf_program__attach_kprobe(prog, false /* retprobe */,
- SYS_NANOSLEEP_KPROBE_NAME);
- if (CHECK(IS_ERR(link), "attach_kprobe", "err %ld\n", PTR_ERR(link)))
+ /* attach probe */
+ err = test_perf_buffer__attach(skel);
+ if (CHECK(err, "attach_kprobe", "err %d\n", err))
goto out_close;
/* set up perf buffer */
pb_opts.sample_cb = on_sample;
pb_opts.ctx = &cpu_seen;
- pb = perf_buffer__new(bpf_map__fd(perf_buf_map), 1, &pb_opts);
+ pb = perf_buffer__new(bpf_map__fd(skel->maps.perf_buf_map), 1, &pb_opts);
if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb)))
- goto out_detach;
+ goto out_close;
/* trigger kprobe on every CPU */
CPU_ZERO(&cpu_seen);
@@ -94,7 +78,7 @@ void test_perf_buffer(void)
&cpu_set);
if (err && CHECK(err, "set_affinity", "cpu #%d, err %d\n",
i, err))
- goto out_detach;
+ goto out_close;
usleep(1);
}
@@ -110,9 +94,7 @@ void test_perf_buffer(void)
out_free_pb:
perf_buffer__free(pb);
-out_detach:
- bpf_link__destroy(link);
out_close:
- bpf_object__close(obj);
+ test_perf_buffer__destroy(skel);
free(online);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c b/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c
new file mode 100644
index 000000000000..72c3690844fb
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+#define _GNU_SOURCE
+#include <pthread.h>
+#include <sched.h>
+#include <test_progs.h>
+#include "perf_event_stackmap.skel.h"
+
+#ifndef noinline
+#define noinline __attribute__((noinline))
+#endif
+
+noinline int func_1(void)
+{
+ static int val = 1;
+
+ val += 1;
+
+ usleep(100);
+ return val;
+}
+
+noinline int func_2(void)
+{
+ return func_1();
+}
+
+noinline int func_3(void)
+{
+ return func_2();
+}
+
+noinline int func_4(void)
+{
+ return func_3();
+}
+
+noinline int func_5(void)
+{
+ return func_4();
+}
+
+noinline int func_6(void)
+{
+ int i, val = 1;
+
+ for (i = 0; i < 100; i++)
+ val += func_5();
+
+ return val;
+}
+
+void test_perf_event_stackmap(void)
+{
+ struct perf_event_attr attr = {
+ /* .type = PERF_TYPE_SOFTWARE, */
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_CPU_CYCLES,
+ .precise_ip = 2,
+ .sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_BRANCH_STACK |
+ PERF_SAMPLE_CALLCHAIN,
+ .branch_sample_type = PERF_SAMPLE_BRANCH_USER |
+ PERF_SAMPLE_BRANCH_NO_FLAGS |
+ PERF_SAMPLE_BRANCH_NO_CYCLES |
+ PERF_SAMPLE_BRANCH_CALL_STACK,
+ .sample_period = 5000,
+ .size = sizeof(struct perf_event_attr),
+ };
+ struct perf_event_stackmap *skel;
+ __u32 duration = 0;
+ cpu_set_t cpu_set;
+ int pmu_fd, err;
+
+ skel = perf_event_stackmap__open();
+
+ if (CHECK(!skel, "skel_open", "skeleton open failed\n"))
+ return;
+
+ err = perf_event_stackmap__load(skel);
+ if (CHECK(err, "skel_load", "skeleton load failed: %d\n", err))
+ goto cleanup;
+
+ CPU_ZERO(&cpu_set);
+ CPU_SET(0, &cpu_set);
+ err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set);
+ if (CHECK(err, "set_affinity", "err %d, errno %d\n", err, errno))
+ goto cleanup;
+
+ pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
+ 0 /* cpu 0 */, -1 /* group id */,
+ 0 /* flags */);
+ if (pmu_fd < 0) {
+ printf("%s:SKIP:cpu doesn't support the event\n", __func__);
+ test__skip();
+ goto cleanup;
+ }
+
+ skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
+ pmu_fd);
+ if (CHECK(IS_ERR(skel->links.oncpu), "attach_perf_event",
+ "err %ld\n", PTR_ERR(skel->links.oncpu))) {
+ close(pmu_fd);
+ goto cleanup;
+ }
+
+ /* create kernel and user stack traces for testing */
+ func_6();
+
+ CHECK(skel->data->stackid_kernel != 2, "get_stackid_kernel", "failed\n");
+ CHECK(skel->data->stackid_user != 2, "get_stackid_user", "failed\n");
+ CHECK(skel->data->stack_kernel != 2, "get_stack_kernel", "failed\n");
+ CHECK(skel->data->stack_user != 2, "get_stack_user", "failed\n");
+
+cleanup:
+ perf_event_stackmap__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
new file mode 100644
index 000000000000..3b127cab4864
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
@@ -0,0 +1,129 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/err.h>
+#include <string.h>
+#include <bpf/btf.h>
+#include <bpf/libbpf.h>
+#include <linux/btf.h>
+#include <linux/kernel.h>
+#define CONFIG_DEBUG_INFO_BTF
+#include <linux/btf_ids.h>
+#include "test_progs.h"
+
+static int duration;
+
+struct symbol {
+ const char *name;
+ int type;
+ int id;
+};
+
+struct symbol test_symbols[] = {
+ { "unused", BTF_KIND_UNKN, 0 },
+ { "S", BTF_KIND_TYPEDEF, -1 },
+ { "T", BTF_KIND_TYPEDEF, -1 },
+ { "U", BTF_KIND_TYPEDEF, -1 },
+ { "S", BTF_KIND_STRUCT, -1 },
+ { "U", BTF_KIND_UNION, -1 },
+ { "func", BTF_KIND_FUNC, -1 },
+};
+
+BTF_ID_LIST(test_list_local)
+BTF_ID_UNUSED
+BTF_ID(typedef, S)
+BTF_ID(typedef, T)
+BTF_ID(typedef, U)
+BTF_ID(struct, S)
+BTF_ID(union, U)
+BTF_ID(func, func)
+
+extern __u32 test_list_global[];
+BTF_ID_LIST_GLOBAL(test_list_global)
+BTF_ID_UNUSED
+BTF_ID(typedef, S)
+BTF_ID(typedef, T)
+BTF_ID(typedef, U)
+BTF_ID(struct, S)
+BTF_ID(union, U)
+BTF_ID(func, func)
+
+static int
+__resolve_symbol(struct btf *btf, int type_id)
+{
+ const struct btf_type *type;
+ const char *str;
+ unsigned int i;
+
+ type = btf__type_by_id(btf, type_id);
+ if (!type) {
+ PRINT_FAIL("Failed to get type for ID %d\n", type_id);
+ return -1;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(test_symbols); i++) {
+ if (test_symbols[i].id != -1)
+ continue;
+
+ if (BTF_INFO_KIND(type->info) != test_symbols[i].type)
+ continue;
+
+ str = btf__name_by_offset(btf, type->name_off);
+ if (!str) {
+ PRINT_FAIL("Failed to get name for BTF ID %d\n", type_id);
+ return -1;
+ }
+
+ if (!strcmp(str, test_symbols[i].name))
+ test_symbols[i].id = type_id;
+ }
+
+ return 0;
+}
+
+static int resolve_symbols(void)
+{
+ struct btf *btf;
+ int type_id;
+ __u32 nr;
+
+ btf = btf__parse_elf("btf_data.o", NULL);
+ if (CHECK(libbpf_get_error(btf), "resolve",
+ "Failed to load BTF from btf_data.o\n"))
+ return -1;
+
+ nr = btf__get_nr_types(btf);
+
+ for (type_id = 1; type_id <= nr; type_id++) {
+ if (__resolve_symbol(btf, type_id))
+ break;
+ }
+
+ btf__free(btf);
+ return 0;
+}
+
+int test_resolve_btfids(void)
+{
+ __u32 *test_list, *test_lists[] = { test_list_local, test_list_global };
+ unsigned int i, j;
+ int ret = 0;
+
+ if (resolve_symbols())
+ return -1;
+
+ /* Check BTF_ID_LIST(test_list_local) and
+ * BTF_ID_LIST_GLOBAL(test_list_global) IDs
+ */
+ for (j = 0; j < ARRAY_SIZE(test_lists); j++) {
+ test_list = test_lists[j];
+ for (i = 0; i < ARRAY_SIZE(test_symbols) && !ret; i++) {
+ ret = CHECK(test_list[i] != test_symbols[i].id,
+ "id_check",
+ "wrong ID for %s (%d != %d)\n",
+ test_symbols[i].name,
+ test_list[i], test_symbols[i].id);
+ }
+ }
+
+ return ret;
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/section_names.c b/tools/testing/selftests/bpf/prog_tests/section_names.c
index 713167449c98..8b571890c57e 100644
--- a/tools/testing/selftests/bpf/prog_tests/section_names.c
+++ b/tools/testing/selftests/bpf/prog_tests/section_names.c
@@ -35,7 +35,7 @@ static struct sec_name_test tests[] = {
{-EINVAL, 0},
},
{"raw_tp/", {0, BPF_PROG_TYPE_RAW_TRACEPOINT, 0}, {-EINVAL, 0} },
- {"xdp", {0, BPF_PROG_TYPE_XDP, 0}, {-EINVAL, 0} },
+ {"xdp", {0, BPF_PROG_TYPE_XDP, BPF_XDP}, {0, BPF_XDP} },
{"perf_event", {0, BPF_PROG_TYPE_PERF_EVENT, 0}, {-EINVAL, 0} },
{"lwt_in", {0, BPF_PROG_TYPE_LWT_IN, 0}, {-EINVAL, 0} },
{"lwt_out", {0, BPF_PROG_TYPE_LWT_OUT, 0}, {-EINVAL, 0} },
diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c
index 504abb7bfb95..7043e6ded0e6 100644
--- a/tools/testing/selftests/bpf/prog_tests/send_signal.c
+++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c
@@ -48,21 +48,19 @@ static void test_send_signal_common(struct perf_event_attr *attr,
close(pipe_p2c[1]); /* close write */
/* notify parent signal handler is installed */
- write(pipe_c2p[1], buf, 1);
+ CHECK(write(pipe_c2p[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno);
/* make sure parent enabled bpf program to send_signal */
- read(pipe_p2c[0], buf, 1);
+ CHECK(read(pipe_p2c[0], buf, 1) != 1, "pipe_read", "err %d\n", -errno);
/* wait a little for signal handler */
sleep(1);
- if (sigusr1_received)
- write(pipe_c2p[1], "2", 1);
- else
- write(pipe_c2p[1], "0", 1);
+ buf[0] = sigusr1_received ? '2' : '0';
+ CHECK(write(pipe_c2p[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno);
/* wait for parent notification and exit */
- read(pipe_p2c[0], buf, 1);
+ CHECK(read(pipe_p2c[0], buf, 1) != 1, "pipe_read", "err %d\n", -errno);
close(pipe_c2p[1]);
close(pipe_p2c[0]);
@@ -99,7 +97,7 @@ static void test_send_signal_common(struct perf_event_attr *attr,
}
/* wait until child signal handler installed */
- read(pipe_c2p[0], buf, 1);
+ CHECK(read(pipe_c2p[0], buf, 1) != 1, "pipe_read", "err %d\n", -errno);
/* trigger the bpf send_signal */
skel->bss->pid = pid;
@@ -107,7 +105,7 @@ static void test_send_signal_common(struct perf_event_attr *attr,
skel->bss->signal_thread = signal_thread;
/* notify child that bpf program can send_signal now */
- write(pipe_p2c[1], buf, 1);
+ CHECK(write(pipe_p2c[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno);
/* wait for result */
err = read(pipe_c2p[0], buf, 1);
@@ -121,7 +119,7 @@ static void test_send_signal_common(struct perf_event_attr *attr,
CHECK(buf[0] != '2', test_name, "incorrect result\n");
/* notify child safe to exit */
- write(pipe_p2c[1], buf, 1);
+ CHECK(write(pipe_p2c[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno);
disable_pmu:
close(pmu_fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
new file mode 100644
index 000000000000..c571584c00f5
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
@@ -0,0 +1,1329 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+// Copyright (c) 2020 Cloudflare
+/*
+ * Test BPF attach point for INET socket lookup (BPF_SK_LOOKUP).
+ *
+ * Tests exercise:
+ * - attaching/detaching/querying programs to BPF_SK_LOOKUP hook,
+ * - redirecting socket lookup to a socket selected by BPF program,
+ * - failing a socket lookup on BPF program's request,
+ * - error scenarios for selecting a socket from BPF program,
+ * - accessing BPF program context,
+ * - attaching and running multiple BPF programs.
+ *
+ * Tests run in a dedicated network namespace.
+ */
+
+#define _GNU_SOURCE
+#include <arpa/inet.h>
+#include <assert.h>
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <bpf/libbpf.h>
+#include <bpf/bpf.h>
+
+#include "test_progs.h"
+#include "bpf_rlimit.h"
+#include "bpf_util.h"
+#include "cgroup_helpers.h"
+#include "network_helpers.h"
+#include "testing_helpers.h"
+#include "test_sk_lookup.skel.h"
+
+/* External (address, port) pairs the client sends packets to. */
+#define EXT_IP4 "127.0.0.1"
+#define EXT_IP6 "fd00::1"
+#define EXT_PORT 7007
+
+/* Internal (address, port) pairs the server listens/receives at. */
+#define INT_IP4 "127.0.0.2"
+#define INT_IP4_V6 "::ffff:127.0.0.2"
+#define INT_IP6 "fd00::2"
+#define INT_PORT 8008
+
+#define IO_TIMEOUT_SEC 3
+
+enum server {
+ SERVER_A = 0,
+ SERVER_B = 1,
+ MAX_SERVERS,
+};
+
+enum {
+ PROG1 = 0,
+ PROG2,
+};
+
+struct inet_addr {
+ const char *ip;
+ unsigned short port;
+};
+
+struct test {
+ const char *desc;
+ struct bpf_program *lookup_prog;
+ struct bpf_program *reuseport_prog;
+ struct bpf_map *sock_map;
+ int sotype;
+ struct inet_addr connect_to;
+ struct inet_addr listen_at;
+ enum server accept_on;
+ bool reuseport_has_conns; /* Add a connected socket to reuseport group */
+};
+
+static __u32 duration; /* for CHECK macro */
+
+static bool is_ipv6(const char *ip)
+{
+ return !!strchr(ip, ':');
+}
+
+static int attach_reuseport(int sock_fd, struct bpf_program *reuseport_prog)
+{
+ int err, prog_fd;
+
+ prog_fd = bpf_program__fd(reuseport_prog);
+ if (prog_fd < 0) {
+ errno = -prog_fd;
+ return -1;
+ }
+
+ err = setsockopt(sock_fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF,
+ &prog_fd, sizeof(prog_fd));
+ if (err)
+ return -1;
+
+ return 0;
+}
+
+static socklen_t inetaddr_len(const struct sockaddr_storage *addr)
+{
+ return (addr->ss_family == AF_INET ? sizeof(struct sockaddr_in) :
+ addr->ss_family == AF_INET6 ? sizeof(struct sockaddr_in6) : 0);
+}
+
+static int make_socket(int sotype, const char *ip, int port,
+ struct sockaddr_storage *addr)
+{
+ struct timeval timeo = { .tv_sec = IO_TIMEOUT_SEC };
+ int err, family, fd;
+
+ family = is_ipv6(ip) ? AF_INET6 : AF_INET;
+ err = make_sockaddr(family, ip, port, addr, NULL);
+ if (CHECK(err, "make_address", "failed\n"))
+ return -1;
+
+ fd = socket(addr->ss_family, sotype, 0);
+ if (CHECK(fd < 0, "socket", "failed\n")) {
+ log_err("failed to make socket");
+ return -1;
+ }
+
+ err = setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
+ if (CHECK(err, "setsockopt(SO_SNDTIMEO)", "failed\n")) {
+ log_err("failed to set SNDTIMEO");
+ close(fd);
+ return -1;
+ }
+
+ err = setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
+ if (CHECK(err, "setsockopt(SO_RCVTIMEO)", "failed\n")) {
+ log_err("failed to set RCVTIMEO");
+ close(fd);
+ return -1;
+ }
+
+ return fd;
+}
+
+static int make_server(int sotype, const char *ip, int port,
+ struct bpf_program *reuseport_prog)
+{
+ struct sockaddr_storage addr = {0};
+ const int one = 1;
+ int err, fd = -1;
+
+ fd = make_socket(sotype, ip, port, &addr);
+ if (fd < 0)
+ return -1;
+
+ /* Enabled for UDPv6 sockets for IPv4-mapped IPv6 to work. */
+ if (sotype == SOCK_DGRAM) {
+ err = setsockopt(fd, SOL_IP, IP_RECVORIGDSTADDR, &one,
+ sizeof(one));
+ if (CHECK(err, "setsockopt(IP_RECVORIGDSTADDR)", "failed\n")) {
+ log_err("failed to enable IP_RECVORIGDSTADDR");
+ goto fail;
+ }
+ }
+
+ if (sotype == SOCK_DGRAM && addr.ss_family == AF_INET6) {
+ err = setsockopt(fd, SOL_IPV6, IPV6_RECVORIGDSTADDR, &one,
+ sizeof(one));
+ if (CHECK(err, "setsockopt(IPV6_RECVORIGDSTADDR)", "failed\n")) {
+ log_err("failed to enable IPV6_RECVORIGDSTADDR");
+ goto fail;
+ }
+ }
+
+ if (sotype == SOCK_STREAM) {
+ err = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one,
+ sizeof(one));
+ if (CHECK(err, "setsockopt(SO_REUSEADDR)", "failed\n")) {
+ log_err("failed to enable SO_REUSEADDR");
+ goto fail;
+ }
+ }
+
+ if (reuseport_prog) {
+ err = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one,
+ sizeof(one));
+ if (CHECK(err, "setsockopt(SO_REUSEPORT)", "failed\n")) {
+ log_err("failed to enable SO_REUSEPORT");
+ goto fail;
+ }
+ }
+
+ err = bind(fd, (void *)&addr, inetaddr_len(&addr));
+ if (CHECK(err, "bind", "failed\n")) {
+ log_err("failed to bind listen socket");
+ goto fail;
+ }
+
+ if (sotype == SOCK_STREAM) {
+ err = listen(fd, SOMAXCONN);
+ if (CHECK(err, "make_server", "listen")) {
+ log_err("failed to listen on port %d", port);
+ goto fail;
+ }
+ }
+
+ /* Late attach reuseport prog so we can have one init path */
+ if (reuseport_prog) {
+ err = attach_reuseport(fd, reuseport_prog);
+ if (CHECK(err, "attach_reuseport", "failed\n")) {
+ log_err("failed to attach reuseport prog");
+ goto fail;
+ }
+ }
+
+ return fd;
+fail:
+ close(fd);
+ return -1;
+}
+
+static int make_client(int sotype, const char *ip, int port)
+{
+ struct sockaddr_storage addr = {0};
+ int err, fd;
+
+ fd = make_socket(sotype, ip, port, &addr);
+ if (fd < 0)
+ return -1;
+
+ err = connect(fd, (void *)&addr, inetaddr_len(&addr));
+ if (CHECK(err, "make_client", "connect")) {
+ log_err("failed to connect client socket");
+ goto fail;
+ }
+
+ return fd;
+fail:
+ close(fd);
+ return -1;
+}
+
+static int send_byte(int fd)
+{
+ ssize_t n;
+
+ errno = 0;
+ n = send(fd, "a", 1, 0);
+ if (CHECK(n <= 0, "send_byte", "send")) {
+ log_err("failed/partial send");
+ return -1;
+ }
+ return 0;
+}
+
+static int recv_byte(int fd)
+{
+ char buf[1];
+ ssize_t n;
+
+ n = recv(fd, buf, sizeof(buf), 0);
+ if (CHECK(n <= 0, "recv_byte", "recv")) {
+ log_err("failed/partial recv");
+ return -1;
+ }
+ return 0;
+}
+
+static int tcp_recv_send(int server_fd)
+{
+ char buf[1];
+ int ret, fd;
+ ssize_t n;
+
+ fd = accept(server_fd, NULL, NULL);
+ if (CHECK(fd < 0, "accept", "failed\n")) {
+ log_err("failed to accept");
+ return -1;
+ }
+
+ n = recv(fd, buf, sizeof(buf), 0);
+ if (CHECK(n <= 0, "recv", "failed\n")) {
+ log_err("failed/partial recv");
+ ret = -1;
+ goto close;
+ }
+
+ n = send(fd, buf, n, 0);
+ if (CHECK(n <= 0, "send", "failed\n")) {
+ log_err("failed/partial send");
+ ret = -1;
+ goto close;
+ }
+
+ ret = 0;
+close:
+ close(fd);
+ return ret;
+}
+
+static void v4_to_v6(struct sockaddr_storage *ss)
+{
+ struct sockaddr_in6 *v6 = (struct sockaddr_in6 *)ss;
+ struct sockaddr_in v4 = *(struct sockaddr_in *)ss;
+
+ v6->sin6_family = AF_INET6;
+ v6->sin6_port = v4.sin_port;
+ v6->sin6_addr.s6_addr[10] = 0xff;
+ v6->sin6_addr.s6_addr[11] = 0xff;
+ memcpy(&v6->sin6_addr.s6_addr[12], &v4.sin_addr.s_addr, 4);
+}
+
+static int udp_recv_send(int server_fd)
+{
+ char cmsg_buf[CMSG_SPACE(sizeof(struct sockaddr_storage))];
+ struct sockaddr_storage _src_addr = { 0 };
+ struct sockaddr_storage *src_addr = &_src_addr;
+ struct sockaddr_storage *dst_addr = NULL;
+ struct msghdr msg = { 0 };
+ struct iovec iov = { 0 };
+ struct cmsghdr *cm;
+ char buf[1];
+ int ret, fd;
+ ssize_t n;
+
+ iov.iov_base = buf;
+ iov.iov_len = sizeof(buf);
+
+ msg.msg_name = src_addr;
+ msg.msg_namelen = sizeof(*src_addr);
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = cmsg_buf;
+ msg.msg_controllen = sizeof(cmsg_buf);
+
+ errno = 0;
+ n = recvmsg(server_fd, &msg, 0);
+ if (CHECK(n <= 0, "recvmsg", "failed\n")) {
+ log_err("failed to receive");
+ return -1;
+ }
+ if (CHECK(msg.msg_flags & MSG_CTRUNC, "recvmsg", "truncated cmsg\n"))
+ return -1;
+
+ for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) {
+ if ((cm->cmsg_level == SOL_IP &&
+ cm->cmsg_type == IP_ORIGDSTADDR) ||
+ (cm->cmsg_level == SOL_IPV6 &&
+ cm->cmsg_type == IPV6_ORIGDSTADDR)) {
+ dst_addr = (struct sockaddr_storage *)CMSG_DATA(cm);
+ break;
+ }
+ log_err("warning: ignored cmsg at level %d type %d",
+ cm->cmsg_level, cm->cmsg_type);
+ }
+ if (CHECK(!dst_addr, "recvmsg", "missing ORIGDSTADDR\n"))
+ return -1;
+
+ /* Server socket bound to IPv4-mapped IPv6 address */
+ if (src_addr->ss_family == AF_INET6 &&
+ dst_addr->ss_family == AF_INET) {
+ v4_to_v6(dst_addr);
+ }
+
+ /* Reply from original destination address. */
+ fd = socket(dst_addr->ss_family, SOCK_DGRAM, 0);
+ if (CHECK(fd < 0, "socket", "failed\n")) {
+ log_err("failed to create tx socket");
+ return -1;
+ }
+
+ ret = bind(fd, (struct sockaddr *)dst_addr, sizeof(*dst_addr));
+ if (CHECK(ret, "bind", "failed\n")) {
+ log_err("failed to bind tx socket");
+ goto out;
+ }
+
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ n = sendmsg(fd, &msg, 0);
+ if (CHECK(n <= 0, "sendmsg", "failed\n")) {
+ log_err("failed to send echo reply");
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ close(fd);
+ return ret;
+}
+
+static int tcp_echo_test(int client_fd, int server_fd)
+{
+ int err;
+
+ err = send_byte(client_fd);
+ if (err)
+ return -1;
+ err = tcp_recv_send(server_fd);
+ if (err)
+ return -1;
+ err = recv_byte(client_fd);
+ if (err)
+ return -1;
+
+ return 0;
+}
+
+static int udp_echo_test(int client_fd, int server_fd)
+{
+ int err;
+
+ err = send_byte(client_fd);
+ if (err)
+ return -1;
+ err = udp_recv_send(server_fd);
+ if (err)
+ return -1;
+ err = recv_byte(client_fd);
+ if (err)
+ return -1;
+
+ return 0;
+}
+
+static struct bpf_link *attach_lookup_prog(struct bpf_program *prog)
+{
+ struct bpf_link *link;
+ int net_fd;
+
+ net_fd = open("/proc/self/ns/net", O_RDONLY);
+ if (CHECK(net_fd < 0, "open", "failed\n")) {
+ log_err("failed to open /proc/self/ns/net");
+ return NULL;
+ }
+
+ link = bpf_program__attach_netns(prog, net_fd);
+ if (CHECK(IS_ERR(link), "bpf_program__attach_netns", "failed\n")) {
+ errno = -PTR_ERR(link);
+ log_err("failed to attach program '%s' to netns",
+ bpf_program__name(prog));
+ link = NULL;
+ }
+
+ close(net_fd);
+ return link;
+}
+
+static int update_lookup_map(struct bpf_map *map, int index, int sock_fd)
+{
+ int err, map_fd;
+ uint64_t value;
+
+ map_fd = bpf_map__fd(map);
+ if (CHECK(map_fd < 0, "bpf_map__fd", "failed\n")) {
+ errno = -map_fd;
+ log_err("failed to get map FD");
+ return -1;
+ }
+
+ value = (uint64_t)sock_fd;
+ err = bpf_map_update_elem(map_fd, &index, &value, BPF_NOEXIST);
+ if (CHECK(err, "bpf_map_update_elem", "failed\n")) {
+ log_err("failed to update redir_map @ %d", index);
+ return -1;
+ }
+
+ return 0;
+}
+
+static void query_lookup_prog(struct test_sk_lookup *skel)
+{
+ struct bpf_link *link[3] = {};
+ struct bpf_link_info info;
+ __u32 attach_flags = 0;
+ __u32 prog_ids[3] = {};
+ __u32 prog_cnt = 3;
+ __u32 prog_id;
+ int net_fd;
+ int err;
+
+ net_fd = open("/proc/self/ns/net", O_RDONLY);
+ if (CHECK(net_fd < 0, "open", "failed\n")) {
+ log_err("failed to open /proc/self/ns/net");
+ return;
+ }
+
+ link[0] = attach_lookup_prog(skel->progs.lookup_pass);
+ if (!link[0])
+ goto close;
+ link[1] = attach_lookup_prog(skel->progs.lookup_pass);
+ if (!link[1])
+ goto detach;
+ link[2] = attach_lookup_prog(skel->progs.lookup_drop);
+ if (!link[2])
+ goto detach;
+
+ err = bpf_prog_query(net_fd, BPF_SK_LOOKUP, 0 /* query flags */,
+ &attach_flags, prog_ids, &prog_cnt);
+ if (CHECK(err, "bpf_prog_query", "failed\n")) {
+ log_err("failed to query lookup prog");
+ goto detach;
+ }
+
+ errno = 0;
+ if (CHECK(attach_flags != 0, "bpf_prog_query",
+ "wrong attach_flags on query: %u", attach_flags))
+ goto detach;
+ if (CHECK(prog_cnt != 3, "bpf_prog_query",
+ "wrong program count on query: %u", prog_cnt))
+ goto detach;
+ prog_id = link_info_prog_id(link[0], &info);
+ CHECK(prog_ids[0] != prog_id, "bpf_prog_query",
+ "invalid program #0 id on query: %u != %u\n",
+ prog_ids[0], prog_id);
+ CHECK(info.netns.netns_ino == 0, "netns_ino",
+ "unexpected netns_ino: %u\n", info.netns.netns_ino);
+ prog_id = link_info_prog_id(link[1], &info);
+ CHECK(prog_ids[1] != prog_id, "bpf_prog_query",
+ "invalid program #1 id on query: %u != %u\n",
+ prog_ids[1], prog_id);
+ CHECK(info.netns.netns_ino == 0, "netns_ino",
+ "unexpected netns_ino: %u\n", info.netns.netns_ino);
+ prog_id = link_info_prog_id(link[2], &info);
+ CHECK(prog_ids[2] != prog_id, "bpf_prog_query",
+ "invalid program #2 id on query: %u != %u\n",
+ prog_ids[2], prog_id);
+ CHECK(info.netns.netns_ino == 0, "netns_ino",
+ "unexpected netns_ino: %u\n", info.netns.netns_ino);
+
+ err = bpf_link__detach(link[0]);
+ if (CHECK(err, "link_detach", "failed %d\n", err))
+ goto detach;
+
+ /* prog id is still there, but netns_ino is zeroed out */
+ prog_id = link_info_prog_id(link[0], &info);
+ CHECK(prog_ids[0] != prog_id, "bpf_prog_query",
+ "invalid program #0 id on query: %u != %u\n",
+ prog_ids[0], prog_id);
+ CHECK(info.netns.netns_ino != 0, "netns_ino",
+ "unexpected netns_ino: %u\n", info.netns.netns_ino);
+
+detach:
+ if (link[2])
+ bpf_link__destroy(link[2]);
+ if (link[1])
+ bpf_link__destroy(link[1]);
+ if (link[0])
+ bpf_link__destroy(link[0]);
+close:
+ close(net_fd);
+}
+
+static void run_lookup_prog(const struct test *t)
+{
+ int server_fds[MAX_SERVERS] = { -1 };
+ int client_fd, reuse_conn_fd = -1;
+ struct bpf_link *lookup_link;
+ int i, err;
+
+ lookup_link = attach_lookup_prog(t->lookup_prog);
+ if (!lookup_link)
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(server_fds); i++) {
+ server_fds[i] = make_server(t->sotype, t->listen_at.ip,
+ t->listen_at.port,
+ t->reuseport_prog);
+ if (server_fds[i] < 0)
+ goto close;
+
+ err = update_lookup_map(t->sock_map, i, server_fds[i]);
+ if (err)
+ goto close;
+
+ /* want just one server for non-reuseport test */
+ if (!t->reuseport_prog)
+ break;
+ }
+
+ /* Regular UDP socket lookup with reuseport behaves
+ * differently when reuseport group contains connected
+ * sockets. Check that adding a connected UDP socket to the
+ * reuseport group does not affect how reuseport works with
+ * BPF socket lookup.
+ */
+ if (t->reuseport_has_conns) {
+ struct sockaddr_storage addr = {};
+ socklen_t len = sizeof(addr);
+
+ /* Add an extra socket to reuseport group */
+ reuse_conn_fd = make_server(t->sotype, t->listen_at.ip,
+ t->listen_at.port,
+ t->reuseport_prog);
+ if (reuse_conn_fd < 0)
+ goto close;
+
+ /* Connect the extra socket to itself */
+ err = getsockname(reuse_conn_fd, (void *)&addr, &len);
+ if (CHECK(err, "getsockname", "errno %d\n", errno))
+ goto close;
+ err = connect(reuse_conn_fd, (void *)&addr, len);
+ if (CHECK(err, "connect", "errno %d\n", errno))
+ goto close;
+ }
+
+ client_fd = make_client(t->sotype, t->connect_to.ip, t->connect_to.port);
+ if (client_fd < 0)
+ goto close;
+
+ if (t->sotype == SOCK_STREAM)
+ tcp_echo_test(client_fd, server_fds[t->accept_on]);
+ else
+ udp_echo_test(client_fd, server_fds[t->accept_on]);
+
+ close(client_fd);
+close:
+ if (reuse_conn_fd != -1)
+ close(reuse_conn_fd);
+ for (i = 0; i < ARRAY_SIZE(server_fds); i++) {
+ if (server_fds[i] != -1)
+ close(server_fds[i]);
+ }
+ bpf_link__destroy(lookup_link);
+}
+
+static void test_redirect_lookup(struct test_sk_lookup *skel)
+{
+ const struct test tests[] = {
+ {
+ .desc = "TCP IPv4 redir port",
+ .lookup_prog = skel->progs.redir_port,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { EXT_IP4, INT_PORT },
+ },
+ {
+ .desc = "TCP IPv4 redir addr",
+ .lookup_prog = skel->progs.redir_ip4,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { INT_IP4, EXT_PORT },
+ },
+ {
+ .desc = "TCP IPv4 redir with reuseport",
+ .lookup_prog = skel->progs.select_sock_a,
+ .reuseport_prog = skel->progs.select_sock_b,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { INT_IP4, INT_PORT },
+ .accept_on = SERVER_B,
+ },
+ {
+ .desc = "TCP IPv4 redir skip reuseport",
+ .lookup_prog = skel->progs.select_sock_a_no_reuseport,
+ .reuseport_prog = skel->progs.select_sock_b,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { INT_IP4, INT_PORT },
+ .accept_on = SERVER_A,
+ },
+ {
+ .desc = "TCP IPv6 redir port",
+ .lookup_prog = skel->progs.redir_port,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP6, EXT_PORT },
+ .listen_at = { EXT_IP6, INT_PORT },
+ },
+ {
+ .desc = "TCP IPv6 redir addr",
+ .lookup_prog = skel->progs.redir_ip6,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP6, EXT_PORT },
+ .listen_at = { INT_IP6, EXT_PORT },
+ },
+ {
+ .desc = "TCP IPv4->IPv6 redir port",
+ .lookup_prog = skel->progs.redir_port,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { INT_IP4_V6, INT_PORT },
+ },
+ {
+ .desc = "TCP IPv6 redir with reuseport",
+ .lookup_prog = skel->progs.select_sock_a,
+ .reuseport_prog = skel->progs.select_sock_b,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP6, EXT_PORT },
+ .listen_at = { INT_IP6, INT_PORT },
+ .accept_on = SERVER_B,
+ },
+ {
+ .desc = "TCP IPv6 redir skip reuseport",
+ .lookup_prog = skel->progs.select_sock_a_no_reuseport,
+ .reuseport_prog = skel->progs.select_sock_b,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP6, EXT_PORT },
+ .listen_at = { INT_IP6, INT_PORT },
+ .accept_on = SERVER_A,
+ },
+ {
+ .desc = "UDP IPv4 redir port",
+ .lookup_prog = skel->progs.redir_port,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_DGRAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { EXT_IP4, INT_PORT },
+ },
+ {
+ .desc = "UDP IPv4 redir addr",
+ .lookup_prog = skel->progs.redir_ip4,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_DGRAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { INT_IP4, EXT_PORT },
+ },
+ {
+ .desc = "UDP IPv4 redir with reuseport",
+ .lookup_prog = skel->progs.select_sock_a,
+ .reuseport_prog = skel->progs.select_sock_b,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_DGRAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { INT_IP4, INT_PORT },
+ .accept_on = SERVER_B,
+ },
+ {
+ .desc = "UDP IPv4 redir and reuseport with conns",
+ .lookup_prog = skel->progs.select_sock_a,
+ .reuseport_prog = skel->progs.select_sock_b,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_DGRAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { INT_IP4, INT_PORT },
+ .accept_on = SERVER_B,
+ .reuseport_has_conns = true,
+ },
+ {
+ .desc = "UDP IPv4 redir skip reuseport",
+ .lookup_prog = skel->progs.select_sock_a_no_reuseport,
+ .reuseport_prog = skel->progs.select_sock_b,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_DGRAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { INT_IP4, INT_PORT },
+ .accept_on = SERVER_A,
+ },
+ {
+ .desc = "UDP IPv6 redir port",
+ .lookup_prog = skel->progs.redir_port,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_DGRAM,
+ .connect_to = { EXT_IP6, EXT_PORT },
+ .listen_at = { EXT_IP6, INT_PORT },
+ },
+ {
+ .desc = "UDP IPv6 redir addr",
+ .lookup_prog = skel->progs.redir_ip6,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_DGRAM,
+ .connect_to = { EXT_IP6, EXT_PORT },
+ .listen_at = { INT_IP6, EXT_PORT },
+ },
+ {
+ .desc = "UDP IPv4->IPv6 redir port",
+ .lookup_prog = skel->progs.redir_port,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_DGRAM,
+ .listen_at = { INT_IP4_V6, INT_PORT },
+ .connect_to = { EXT_IP4, EXT_PORT },
+ },
+ {
+ .desc = "UDP IPv6 redir and reuseport",
+ .lookup_prog = skel->progs.select_sock_a,
+ .reuseport_prog = skel->progs.select_sock_b,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_DGRAM,
+ .connect_to = { EXT_IP6, EXT_PORT },
+ .listen_at = { INT_IP6, INT_PORT },
+ .accept_on = SERVER_B,
+ },
+ {
+ .desc = "UDP IPv6 redir and reuseport with conns",
+ .lookup_prog = skel->progs.select_sock_a,
+ .reuseport_prog = skel->progs.select_sock_b,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_DGRAM,
+ .connect_to = { EXT_IP6, EXT_PORT },
+ .listen_at = { INT_IP6, INT_PORT },
+ .accept_on = SERVER_B,
+ .reuseport_has_conns = true,
+ },
+ {
+ .desc = "UDP IPv6 redir skip reuseport",
+ .lookup_prog = skel->progs.select_sock_a_no_reuseport,
+ .reuseport_prog = skel->progs.select_sock_b,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_DGRAM,
+ .connect_to = { EXT_IP6, EXT_PORT },
+ .listen_at = { INT_IP6, INT_PORT },
+ .accept_on = SERVER_A,
+ },
+ };
+ const struct test *t;
+
+ for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
+ if (test__start_subtest(t->desc))
+ run_lookup_prog(t);
+ }
+}
+
+static void drop_on_lookup(const struct test *t)
+{
+ struct sockaddr_storage dst = {};
+ int client_fd, server_fd, err;
+ struct bpf_link *lookup_link;
+ ssize_t n;
+
+ lookup_link = attach_lookup_prog(t->lookup_prog);
+ if (!lookup_link)
+ return;
+
+ server_fd = make_server(t->sotype, t->listen_at.ip, t->listen_at.port,
+ t->reuseport_prog);
+ if (server_fd < 0)
+ goto detach;
+
+ client_fd = make_socket(t->sotype, t->connect_to.ip,
+ t->connect_to.port, &dst);
+ if (client_fd < 0)
+ goto close_srv;
+
+ err = connect(client_fd, (void *)&dst, inetaddr_len(&dst));
+ if (t->sotype == SOCK_DGRAM) {
+ err = send_byte(client_fd);
+ if (err)
+ goto close_all;
+
+ /* Read out asynchronous error */
+ n = recv(client_fd, NULL, 0, 0);
+ err = n == -1;
+ }
+ if (CHECK(!err || errno != ECONNREFUSED, "connect",
+ "unexpected success or error\n"))
+ log_err("expected ECONNREFUSED on connect");
+
+close_all:
+ close(client_fd);
+close_srv:
+ close(server_fd);
+detach:
+ bpf_link__destroy(lookup_link);
+}
+
+static void test_drop_on_lookup(struct test_sk_lookup *skel)
+{
+ const struct test tests[] = {
+ {
+ .desc = "TCP IPv4 drop on lookup",
+ .lookup_prog = skel->progs.lookup_drop,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { EXT_IP4, EXT_PORT },
+ },
+ {
+ .desc = "TCP IPv6 drop on lookup",
+ .lookup_prog = skel->progs.lookup_drop,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP6, EXT_PORT },
+ .listen_at = { EXT_IP6, EXT_PORT },
+ },
+ {
+ .desc = "UDP IPv4 drop on lookup",
+ .lookup_prog = skel->progs.lookup_drop,
+ .sotype = SOCK_DGRAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { EXT_IP4, EXT_PORT },
+ },
+ {
+ .desc = "UDP IPv6 drop on lookup",
+ .lookup_prog = skel->progs.lookup_drop,
+ .sotype = SOCK_DGRAM,
+ .connect_to = { EXT_IP6, EXT_PORT },
+ .listen_at = { EXT_IP6, INT_PORT },
+ },
+ };
+ const struct test *t;
+
+ for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
+ if (test__start_subtest(t->desc))
+ drop_on_lookup(t);
+ }
+}
+
+static void drop_on_reuseport(const struct test *t)
+{
+ struct sockaddr_storage dst = { 0 };
+ int client, server1, server2, err;
+ struct bpf_link *lookup_link;
+ ssize_t n;
+
+ lookup_link = attach_lookup_prog(t->lookup_prog);
+ if (!lookup_link)
+ return;
+
+ server1 = make_server(t->sotype, t->listen_at.ip, t->listen_at.port,
+ t->reuseport_prog);
+ if (server1 < 0)
+ goto detach;
+
+ err = update_lookup_map(t->sock_map, SERVER_A, server1);
+ if (err)
+ goto detach;
+
+ /* second server on destination address we should never reach */
+ server2 = make_server(t->sotype, t->connect_to.ip, t->connect_to.port,
+ NULL /* reuseport prog */);
+ if (server2 < 0)
+ goto close_srv1;
+
+ client = make_socket(t->sotype, t->connect_to.ip,
+ t->connect_to.port, &dst);
+ if (client < 0)
+ goto close_srv2;
+
+ err = connect(client, (void *)&dst, inetaddr_len(&dst));
+ if (t->sotype == SOCK_DGRAM) {
+ err = send_byte(client);
+ if (err)
+ goto close_all;
+
+ /* Read out asynchronous error */
+ n = recv(client, NULL, 0, 0);
+ err = n == -1;
+ }
+ if (CHECK(!err || errno != ECONNREFUSED, "connect",
+ "unexpected success or error\n"))
+ log_err("expected ECONNREFUSED on connect");
+
+close_all:
+ close(client);
+close_srv2:
+ close(server2);
+close_srv1:
+ close(server1);
+detach:
+ bpf_link__destroy(lookup_link);
+}
+
+static void test_drop_on_reuseport(struct test_sk_lookup *skel)
+{
+ const struct test tests[] = {
+ {
+ .desc = "TCP IPv4 drop on reuseport",
+ .lookup_prog = skel->progs.select_sock_a,
+ .reuseport_prog = skel->progs.reuseport_drop,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { INT_IP4, INT_PORT },
+ },
+ {
+ .desc = "TCP IPv6 drop on reuseport",
+ .lookup_prog = skel->progs.select_sock_a,
+ .reuseport_prog = skel->progs.reuseport_drop,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP6, EXT_PORT },
+ .listen_at = { INT_IP6, INT_PORT },
+ },
+ {
+ .desc = "UDP IPv4 drop on reuseport",
+ .lookup_prog = skel->progs.select_sock_a,
+ .reuseport_prog = skel->progs.reuseport_drop,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_DGRAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { INT_IP4, INT_PORT },
+ },
+ {
+ .desc = "TCP IPv6 drop on reuseport",
+ .lookup_prog = skel->progs.select_sock_a,
+ .reuseport_prog = skel->progs.reuseport_drop,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP6, EXT_PORT },
+ .listen_at = { INT_IP6, INT_PORT },
+ },
+ };
+ const struct test *t;
+
+ for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
+ if (test__start_subtest(t->desc))
+ drop_on_reuseport(t);
+ }
+}
+
+static void run_sk_assign(struct test_sk_lookup *skel,
+ struct bpf_program *lookup_prog,
+ const char *listen_ip, const char *connect_ip)
+{
+ int client_fd, peer_fd, server_fds[MAX_SERVERS] = { -1 };
+ struct bpf_link *lookup_link;
+ int i, err;
+
+ lookup_link = attach_lookup_prog(lookup_prog);
+ if (!lookup_link)
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(server_fds); i++) {
+ server_fds[i] = make_server(SOCK_STREAM, listen_ip, 0, NULL);
+ if (server_fds[i] < 0)
+ goto close_servers;
+
+ err = update_lookup_map(skel->maps.redir_map, i,
+ server_fds[i]);
+ if (err)
+ goto close_servers;
+ }
+
+ client_fd = make_client(SOCK_STREAM, connect_ip, EXT_PORT);
+ if (client_fd < 0)
+ goto close_servers;
+
+ peer_fd = accept(server_fds[SERVER_B], NULL, NULL);
+ if (CHECK(peer_fd < 0, "accept", "failed\n"))
+ goto close_client;
+
+ close(peer_fd);
+close_client:
+ close(client_fd);
+close_servers:
+ for (i = 0; i < ARRAY_SIZE(server_fds); i++) {
+ if (server_fds[i] != -1)
+ close(server_fds[i]);
+ }
+ bpf_link__destroy(lookup_link);
+}
+
+static void run_sk_assign_v4(struct test_sk_lookup *skel,
+ struct bpf_program *lookup_prog)
+{
+ run_sk_assign(skel, lookup_prog, INT_IP4, EXT_IP4);
+}
+
+static void run_sk_assign_v6(struct test_sk_lookup *skel,
+ struct bpf_program *lookup_prog)
+{
+ run_sk_assign(skel, lookup_prog, INT_IP6, EXT_IP6);
+}
+
+static void run_sk_assign_connected(struct test_sk_lookup *skel,
+ int sotype)
+{
+ int err, client_fd, connected_fd, server_fd;
+ struct bpf_link *lookup_link;
+
+ server_fd = make_server(sotype, EXT_IP4, EXT_PORT, NULL);
+ if (server_fd < 0)
+ return;
+
+ connected_fd = make_client(sotype, EXT_IP4, EXT_PORT);
+ if (connected_fd < 0)
+ goto out_close_server;
+
+ /* Put a connected socket in redirect map */
+ err = update_lookup_map(skel->maps.redir_map, SERVER_A, connected_fd);
+ if (err)
+ goto out_close_connected;
+
+ lookup_link = attach_lookup_prog(skel->progs.sk_assign_esocknosupport);
+ if (!lookup_link)
+ goto out_close_connected;
+
+ /* Try to redirect TCP SYN / UDP packet to a connected socket */
+ client_fd = make_client(sotype, EXT_IP4, EXT_PORT);
+ if (client_fd < 0)
+ goto out_unlink_prog;
+ if (sotype == SOCK_DGRAM) {
+ send_byte(client_fd);
+ recv_byte(server_fd);
+ }
+
+ close(client_fd);
+out_unlink_prog:
+ bpf_link__destroy(lookup_link);
+out_close_connected:
+ close(connected_fd);
+out_close_server:
+ close(server_fd);
+}
+
+static void test_sk_assign_helper(struct test_sk_lookup *skel)
+{
+ if (test__start_subtest("sk_assign returns EEXIST"))
+ run_sk_assign_v4(skel, skel->progs.sk_assign_eexist);
+ if (test__start_subtest("sk_assign honors F_REPLACE"))
+ run_sk_assign_v4(skel, skel->progs.sk_assign_replace_flag);
+ if (test__start_subtest("sk_assign accepts NULL socket"))
+ run_sk_assign_v4(skel, skel->progs.sk_assign_null);
+ if (test__start_subtest("access ctx->sk"))
+ run_sk_assign_v4(skel, skel->progs.access_ctx_sk);
+ if (test__start_subtest("narrow access to ctx v4"))
+ run_sk_assign_v4(skel, skel->progs.ctx_narrow_access);
+ if (test__start_subtest("narrow access to ctx v6"))
+ run_sk_assign_v6(skel, skel->progs.ctx_narrow_access);
+ if (test__start_subtest("sk_assign rejects TCP established"))
+ run_sk_assign_connected(skel, SOCK_STREAM);
+ if (test__start_subtest("sk_assign rejects UDP connected"))
+ run_sk_assign_connected(skel, SOCK_DGRAM);
+}
+
+struct test_multi_prog {
+ const char *desc;
+ struct bpf_program *prog1;
+ struct bpf_program *prog2;
+ struct bpf_map *redir_map;
+ struct bpf_map *run_map;
+ int expect_errno;
+ struct inet_addr listen_at;
+};
+
+static void run_multi_prog_lookup(const struct test_multi_prog *t)
+{
+ struct sockaddr_storage dst = {};
+ int map_fd, server_fd, client_fd;
+ struct bpf_link *link1, *link2;
+ int prog_idx, done, err;
+
+ map_fd = bpf_map__fd(t->run_map);
+
+ done = 0;
+ prog_idx = PROG1;
+ err = bpf_map_update_elem(map_fd, &prog_idx, &done, BPF_ANY);
+ if (CHECK(err, "bpf_map_update_elem", "failed\n"))
+ return;
+ prog_idx = PROG2;
+ err = bpf_map_update_elem(map_fd, &prog_idx, &done, BPF_ANY);
+ if (CHECK(err, "bpf_map_update_elem", "failed\n"))
+ return;
+
+ link1 = attach_lookup_prog(t->prog1);
+ if (!link1)
+ return;
+ link2 = attach_lookup_prog(t->prog2);
+ if (!link2)
+ goto out_unlink1;
+
+ server_fd = make_server(SOCK_STREAM, t->listen_at.ip,
+ t->listen_at.port, NULL);
+ if (server_fd < 0)
+ goto out_unlink2;
+
+ err = update_lookup_map(t->redir_map, SERVER_A, server_fd);
+ if (err)
+ goto out_close_server;
+
+ client_fd = make_socket(SOCK_STREAM, EXT_IP4, EXT_PORT, &dst);
+ if (client_fd < 0)
+ goto out_close_server;
+
+ err = connect(client_fd, (void *)&dst, inetaddr_len(&dst));
+ if (CHECK(err && !t->expect_errno, "connect",
+ "unexpected error %d\n", errno))
+ goto out_close_client;
+ if (CHECK(err && t->expect_errno && errno != t->expect_errno,
+ "connect", "unexpected error %d\n", errno))
+ goto out_close_client;
+
+ done = 0;
+ prog_idx = PROG1;
+ err = bpf_map_lookup_elem(map_fd, &prog_idx, &done);
+ CHECK(err, "bpf_map_lookup_elem", "failed\n");
+ CHECK(!done, "bpf_map_lookup_elem", "PROG1 !done\n");
+
+ done = 0;
+ prog_idx = PROG2;
+ err = bpf_map_lookup_elem(map_fd, &prog_idx, &done);
+ CHECK(err, "bpf_map_lookup_elem", "failed\n");
+ CHECK(!done, "bpf_map_lookup_elem", "PROG2 !done\n");
+
+out_close_client:
+ close(client_fd);
+out_close_server:
+ close(server_fd);
+out_unlink2:
+ bpf_link__destroy(link2);
+out_unlink1:
+ bpf_link__destroy(link1);
+}
+
+static void test_multi_prog_lookup(struct test_sk_lookup *skel)
+{
+ struct test_multi_prog tests[] = {
+ {
+ .desc = "multi prog - pass, pass",
+ .prog1 = skel->progs.multi_prog_pass1,
+ .prog2 = skel->progs.multi_prog_pass2,
+ .listen_at = { EXT_IP4, EXT_PORT },
+ },
+ {
+ .desc = "multi prog - drop, drop",
+ .prog1 = skel->progs.multi_prog_drop1,
+ .prog2 = skel->progs.multi_prog_drop2,
+ .listen_at = { EXT_IP4, EXT_PORT },
+ .expect_errno = ECONNREFUSED,
+ },
+ {
+ .desc = "multi prog - pass, drop",
+ .prog1 = skel->progs.multi_prog_pass1,
+ .prog2 = skel->progs.multi_prog_drop2,
+ .listen_at = { EXT_IP4, EXT_PORT },
+ .expect_errno = ECONNREFUSED,
+ },
+ {
+ .desc = "multi prog - drop, pass",
+ .prog1 = skel->progs.multi_prog_drop1,
+ .prog2 = skel->progs.multi_prog_pass2,
+ .listen_at = { EXT_IP4, EXT_PORT },
+ .expect_errno = ECONNREFUSED,
+ },
+ {
+ .desc = "multi prog - pass, redir",
+ .prog1 = skel->progs.multi_prog_pass1,
+ .prog2 = skel->progs.multi_prog_redir2,
+ .listen_at = { INT_IP4, INT_PORT },
+ },
+ {
+ .desc = "multi prog - redir, pass",
+ .prog1 = skel->progs.multi_prog_redir1,
+ .prog2 = skel->progs.multi_prog_pass2,
+ .listen_at = { INT_IP4, INT_PORT },
+ },
+ {
+ .desc = "multi prog - drop, redir",
+ .prog1 = skel->progs.multi_prog_drop1,
+ .prog2 = skel->progs.multi_prog_redir2,
+ .listen_at = { INT_IP4, INT_PORT },
+ },
+ {
+ .desc = "multi prog - redir, drop",
+ .prog1 = skel->progs.multi_prog_redir1,
+ .prog2 = skel->progs.multi_prog_drop2,
+ .listen_at = { INT_IP4, INT_PORT },
+ },
+ {
+ .desc = "multi prog - redir, redir",
+ .prog1 = skel->progs.multi_prog_redir1,
+ .prog2 = skel->progs.multi_prog_redir2,
+ .listen_at = { INT_IP4, INT_PORT },
+ },
+ };
+ struct test_multi_prog *t;
+
+ for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
+ t->redir_map = skel->maps.redir_map;
+ t->run_map = skel->maps.run_map;
+ if (test__start_subtest(t->desc))
+ run_multi_prog_lookup(t);
+ }
+}
+
+static void run_tests(struct test_sk_lookup *skel)
+{
+ if (test__start_subtest("query lookup prog"))
+ query_lookup_prog(skel);
+ test_redirect_lookup(skel);
+ test_drop_on_lookup(skel);
+ test_drop_on_reuseport(skel);
+ test_sk_assign_helper(skel);
+ test_multi_prog_lookup(skel);
+}
+
+static int switch_netns(void)
+{
+ static const char * const setup_script[] = {
+ "ip -6 addr add dev lo " EXT_IP6 "/128",
+ "ip -6 addr add dev lo " INT_IP6 "/128",
+ "ip link set dev lo up",
+ NULL,
+ };
+ const char * const *cmd;
+ int err;
+
+ err = unshare(CLONE_NEWNET);
+ if (CHECK(err, "unshare", "failed\n")) {
+ log_err("unshare(CLONE_NEWNET)");
+ return -1;
+ }
+
+ for (cmd = setup_script; *cmd; cmd++) {
+ err = system(*cmd);
+ if (CHECK(err, "system", "failed\n")) {
+ log_err("system(%s)", *cmd);
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+void test_sk_lookup(void)
+{
+ struct test_sk_lookup *skel;
+ int err;
+
+ err = switch_netns();
+ if (err)
+ return;
+
+ skel = test_sk_lookup__open_and_load();
+ if (CHECK(!skel, "skel open_and_load", "failed\n"))
+ return;
+
+ run_tests(skel);
+
+ test_sk_lookup__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
index 7021b92af313..25de86af2d03 100644
--- a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
+++ b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
@@ -11,6 +11,7 @@ void test_skb_ctx(void)
.cb[3] = 4,
.cb[4] = 5,
.priority = 6,
+ .ifindex = 1,
.tstamp = 7,
.wire_len = 100,
.gso_segs = 8,
@@ -92,6 +93,10 @@ void test_skb_ctx(void)
"ctx_out_priority",
"skb->priority == %d, expected %d\n",
skb.priority, 7);
+ CHECK_ATTR(skb.ifindex != 1,
+ "ctx_out_ifindex",
+ "skb->ifindex == %d, expected %d\n",
+ skb.ifindex, 1);
CHECK_ATTR(skb.tstamp != 8,
"ctx_out_tstamp",
"skb->tstamp == %lld, expected %d\n",
diff --git a/tools/testing/selftests/bpf/prog_tests/skeleton.c b/tools/testing/selftests/bpf/prog_tests/skeleton.c
index fa153cf67b1b..fe87b77af459 100644
--- a/tools/testing/selftests/bpf/prog_tests/skeleton.c
+++ b/tools/testing/selftests/bpf/prog_tests/skeleton.c
@@ -41,7 +41,7 @@ void test_skeleton(void)
CHECK(bss->in4 != 0, "in4", "got %lld != exp %lld\n", bss->in4, 0LL);
CHECK(bss->out4 != 0, "out4", "got %lld != exp %lld\n", bss->out4, 0LL);
- CHECK(rodata->in6 != 0, "in6", "got %d != exp %d\n", rodata->in6, 0);
+ CHECK(rodata->in.in6 != 0, "in6", "got %d != exp %d\n", rodata->in.in6, 0);
CHECK(bss->out6 != 0, "out6", "got %d != exp %d\n", bss->out6, 0);
/* validate we can pre-setup global variables, even in .bss */
@@ -49,7 +49,7 @@ void test_skeleton(void)
data->in2 = 11;
bss->in3 = 12;
bss->in4 = 13;
- rodata->in6 = 14;
+ rodata->in.in6 = 14;
err = test_skeleton__load(skel);
if (CHECK(err, "skel_load", "failed to load skeleton: %d\n", err))
@@ -60,7 +60,7 @@ void test_skeleton(void)
CHECK(data->in2 != 11, "in2", "got %lld != exp %lld\n", data->in2, 11LL);
CHECK(bss->in3 != 12, "in3", "got %d != exp %d\n", bss->in3, 12);
CHECK(bss->in4 != 13, "in4", "got %lld != exp %lld\n", bss->in4, 13LL);
- CHECK(rodata->in6 != 14, "in6", "got %d != exp %d\n", rodata->in6, 14);
+ CHECK(rodata->in.in6 != 14, "in6", "got %d != exp %d\n", rodata->in.in6, 14);
/* now set new values and attach to get them into outX variables */
data->in1 = 1;
diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
index 8547ecbdc61f..ec281b0363b8 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
@@ -193,11 +193,10 @@ static void run_test(int cgroup_fd)
if (CHECK_FAIL(server_fd < 0))
goto close_bpf_object;
+ pthread_mutex_lock(&server_started_mtx);
if (CHECK_FAIL(pthread_create(&tid, NULL, server_thread,
(void *)&server_fd)))
goto close_server_fd;
-
- pthread_mutex_lock(&server_started_mtx);
pthread_cond_wait(&server_started, &server_started_mtx);
pthread_mutex_unlock(&server_started_mtx);
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
index f002e3090d92..11a769e18f5d 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
@@ -6,11 +6,13 @@ static __u64 read_perf_max_sample_freq(void)
{
__u64 sample_freq = 5000; /* fallback to 5000 on error */
FILE *f;
+ __u32 duration = 0;
f = fopen("/proc/sys/kernel/perf_event_max_sample_rate", "r");
if (f == NULL)
return sample_freq;
- fscanf(f, "%llu", &sample_freq);
+ CHECK(fscanf(f, "%llu", &sample_freq) != 1, "Get max sample rate",
+ "return default value: 5000,err %d\n", -errno);
fclose(f);
return sample_freq;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
index 9013a0c01eed..d207e968e6b1 100644
--- a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
@@ -118,7 +118,7 @@ static int run_test(int cgroup_fd, int server_fd)
goto close_bpf_object;
}
- client_fd = connect_to_fd(AF_INET, SOCK_STREAM, server_fd);
+ client_fd = connect_to_fd(server_fd, 0);
if (client_fd < 0) {
err = -1;
goto close_bpf_object;
@@ -161,7 +161,7 @@ void test_tcp_rtt(void)
if (CHECK_FAIL(cgroup_fd < 0))
return;
- server_fd = start_server(AF_INET, SOCK_STREAM);
+ server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0);
if (CHECK_FAIL(server_fd < 0))
goto close_cgroup_fd;
diff --git a/tools/testing/selftests/bpf/prog_tests/trace_printk.c b/tools/testing/selftests/bpf/prog_tests/trace_printk.c
new file mode 100644
index 000000000000..39b0decb1bb2
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/trace_printk.c
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020, Oracle and/or its affiliates. */
+
+#include <test_progs.h>
+
+#include "trace_printk.skel.h"
+
+#define TRACEBUF "/sys/kernel/debug/tracing/trace_pipe"
+#define SEARCHMSG "testing,testing"
+
+void test_trace_printk(void)
+{
+ int err, iter = 0, duration = 0, found = 0;
+ struct trace_printk__bss *bss;
+ struct trace_printk *skel;
+ char *buf = NULL;
+ FILE *fp = NULL;
+ size_t buflen;
+
+ skel = trace_printk__open();
+ if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+ return;
+
+ err = trace_printk__load(skel);
+ if (CHECK(err, "skel_load", "failed to load skeleton: %d\n", err))
+ goto cleanup;
+
+ bss = skel->bss;
+
+ err = trace_printk__attach(skel);
+ if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+ goto cleanup;
+
+ fp = fopen(TRACEBUF, "r");
+ if (CHECK(fp == NULL, "could not open trace buffer",
+ "error %d opening %s", errno, TRACEBUF))
+ goto cleanup;
+
+ /* We do not want to wait forever if this test fails... */
+ fcntl(fileno(fp), F_SETFL, O_NONBLOCK);
+
+ /* wait for tracepoint to trigger */
+ usleep(1);
+ trace_printk__detach(skel);
+
+ if (CHECK(bss->trace_printk_ran == 0,
+ "bpf_trace_printk never ran",
+ "ran == %d", bss->trace_printk_ran))
+ goto cleanup;
+
+ if (CHECK(bss->trace_printk_ret <= 0,
+ "bpf_trace_printk returned <= 0 value",
+ "got %d", bss->trace_printk_ret))
+ goto cleanup;
+
+ /* verify our search string is in the trace buffer */
+ while (getline(&buf, &buflen, fp) >= 0 || errno == EAGAIN) {
+ if (strstr(buf, SEARCHMSG) != NULL)
+ found++;
+ if (found == bss->trace_printk_ran)
+ break;
+ if (++iter > 1000)
+ break;
+ }
+
+ if (CHECK(!found, "message from bpf_trace_printk not found",
+ "no instance of %s in %s", SEARCHMSG, TRACEBUF))
+ goto cleanup;
+
+cleanup:
+ trace_printk__destroy(skel);
+ free(buf);
+ if (fp)
+ fclose(fp);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/udp_limit.c b/tools/testing/selftests/bpf/prog_tests/udp_limit.c
new file mode 100644
index 000000000000..2aba09d4d01b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/udp_limit.c
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include "udp_limit.skel.h"
+
+#include <sys/types.h>
+#include <sys/socket.h>
+
+static int duration;
+
+void test_udp_limit(void)
+{
+ struct udp_limit *skel;
+ int fd1 = -1, fd2 = -1;
+ int cgroup_fd;
+
+ cgroup_fd = test__join_cgroup("/udp_limit");
+ if (CHECK(cgroup_fd < 0, "cg-join", "errno %d", errno))
+ return;
+
+ skel = udp_limit__open_and_load();
+ if (CHECK(!skel, "skel-load", "errno %d", errno))
+ goto close_cgroup_fd;
+
+ skel->links.sock = bpf_program__attach_cgroup(skel->progs.sock, cgroup_fd);
+ skel->links.sock_release = bpf_program__attach_cgroup(skel->progs.sock_release, cgroup_fd);
+ if (CHECK(IS_ERR(skel->links.sock) || IS_ERR(skel->links.sock_release),
+ "cg-attach", "sock %ld sock_release %ld",
+ PTR_ERR(skel->links.sock),
+ PTR_ERR(skel->links.sock_release)))
+ goto close_skeleton;
+
+ /* BPF program enforces a single UDP socket per cgroup,
+ * verify that.
+ */
+ fd1 = socket(AF_INET, SOCK_DGRAM, 0);
+ if (CHECK(fd1 < 0, "fd1", "errno %d", errno))
+ goto close_skeleton;
+
+ fd2 = socket(AF_INET, SOCK_DGRAM, 0);
+ if (CHECK(fd2 >= 0, "fd2", "errno %d", errno))
+ goto close_skeleton;
+
+ /* We can reopen again after close. */
+ close(fd1);
+ fd1 = -1;
+
+ fd1 = socket(AF_INET, SOCK_DGRAM, 0);
+ if (CHECK(fd1 < 0, "fd1-again", "errno %d", errno))
+ goto close_skeleton;
+
+ /* Make sure the program was invoked the expected
+ * number of times:
+ * - open fd1 - BPF_CGROUP_INET_SOCK_CREATE
+ * - attempt to openfd2 - BPF_CGROUP_INET_SOCK_CREATE
+ * - close fd1 - BPF_CGROUP_INET_SOCK_RELEASE
+ * - open fd1 again - BPF_CGROUP_INET_SOCK_CREATE
+ */
+ if (CHECK(skel->bss->invocations != 4, "bss-invocations",
+ "invocations=%d", skel->bss->invocations))
+ goto close_skeleton;
+
+ /* We should still have a single socket in use */
+ if (CHECK(skel->bss->in_use != 1, "bss-in_use",
+ "in_use=%d", skel->bss->in_use))
+ goto close_skeleton;
+
+close_skeleton:
+ if (fd1 >= 0)
+ close(fd1);
+ if (fd2 >= 0)
+ close(fd2);
+ udp_limit__destroy(skel);
+close_cgroup_fd:
+ close(cgroup_fd);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/varlen.c b/tools/testing/selftests/bpf/prog_tests/varlen.c
new file mode 100644
index 000000000000..c75525eab02c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/varlen.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <test_progs.h>
+#include <time.h>
+#include "test_varlen.skel.h"
+
+#define CHECK_VAL(got, exp) \
+ CHECK((got) != (exp), "check", "got %ld != exp %ld\n", \
+ (long)(got), (long)(exp))
+
+void test_varlen(void)
+{
+ int duration = 0, err;
+ struct test_varlen* skel;
+ struct test_varlen__bss *bss;
+ struct test_varlen__data *data;
+ const char str1[] = "Hello, ";
+ const char str2[] = "World!";
+ const char exp_str[] = "Hello, \0World!\0";
+ const int size1 = sizeof(str1);
+ const int size2 = sizeof(str2);
+
+ skel = test_varlen__open_and_load();
+ if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+ return;
+ bss = skel->bss;
+ data = skel->data;
+
+ err = test_varlen__attach(skel);
+ if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+ goto cleanup;
+
+ bss->test_pid = getpid();
+
+ /* trigger everything */
+ memcpy(bss->buf_in1, str1, size1);
+ memcpy(bss->buf_in2, str2, size2);
+ bss->capture = true;
+ usleep(1);
+ bss->capture = false;
+
+ CHECK_VAL(bss->payload1_len1, size1);
+ CHECK_VAL(bss->payload1_len2, size2);
+ CHECK_VAL(bss->total1, size1 + size2);
+ CHECK(memcmp(bss->payload1, exp_str, size1 + size2), "content_check",
+ "doesn't match!");
+
+ CHECK_VAL(data->payload2_len1, size1);
+ CHECK_VAL(data->payload2_len2, size2);
+ CHECK_VAL(data->total2, size1 + size2);
+ CHECK(memcmp(data->payload2, exp_str, size1 + size2), "content_check",
+ "doesn't match!");
+
+ CHECK_VAL(data->payload3_len1, size1);
+ CHECK_VAL(data->payload3_len2, size2);
+ CHECK_VAL(data->total3, size1 + size2);
+ CHECK(memcmp(data->payload3, exp_str, size1 + size2), "content_check",
+ "doesn't match!");
+
+ CHECK_VAL(data->payload4_len1, size1);
+ CHECK_VAL(data->payload4_len2, size2);
+ CHECK_VAL(data->total4, size1 + size2);
+ CHECK(memcmp(data->payload4, exp_str, size1 + size2), "content_check",
+ "doesn't match!");
+cleanup:
+ test_varlen__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
new file mode 100644
index 000000000000..0176573fe4e7
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <uapi/linux/bpf.h>
+#include <linux/if_link.h>
+#include <test_progs.h>
+
+#include "test_xdp_with_cpumap_helpers.skel.h"
+
+#define IFINDEX_LO 1
+
+void test_xdp_with_cpumap_helpers(void)
+{
+ struct test_xdp_with_cpumap_helpers *skel;
+ struct bpf_prog_info info = {};
+ struct bpf_cpumap_val val = {
+ .qsize = 192,
+ };
+ __u32 duration = 0, idx = 0;
+ __u32 len = sizeof(info);
+ int err, prog_fd, map_fd;
+
+ skel = test_xdp_with_cpumap_helpers__open_and_load();
+ if (CHECK_FAIL(!skel)) {
+ perror("test_xdp_with_cpumap_helpers__open_and_load");
+ return;
+ }
+
+ /* can not attach program with cpumaps that allow programs
+ * as xdp generic
+ */
+ prog_fd = bpf_program__fd(skel->progs.xdp_redir_prog);
+ err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE);
+ CHECK(err == 0, "Generic attach of program with 8-byte CPUMAP",
+ "should have failed\n");
+
+ prog_fd = bpf_program__fd(skel->progs.xdp_dummy_cm);
+ map_fd = bpf_map__fd(skel->maps.cpu_map);
+ err = bpf_obj_get_info_by_fd(prog_fd, &info, &len);
+ if (CHECK_FAIL(err))
+ goto out_close;
+
+ val.bpf_prog.fd = prog_fd;
+ err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+ CHECK(err, "Add program to cpumap entry", "err %d errno %d\n",
+ err, errno);
+
+ err = bpf_map_lookup_elem(map_fd, &idx, &val);
+ CHECK(err, "Read cpumap entry", "err %d errno %d\n", err, errno);
+ CHECK(info.id != val.bpf_prog.id, "Expected program id in cpumap entry",
+ "expected %u read %u\n", info.id, val.bpf_prog.id);
+
+ /* can not attach BPF_XDP_CPUMAP program to a device */
+ err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE);
+ CHECK(err == 0, "Attach of BPF_XDP_CPUMAP program",
+ "should have failed\n");
+
+ val.qsize = 192;
+ val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_prog);
+ err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+ CHECK(err == 0, "Add non-BPF_XDP_CPUMAP program to cpumap entry",
+ "should have failed\n");
+
+out_close:
+ test_xdp_with_cpumap_helpers__destroy(skel);
+}
+
+void test_xdp_cpumap_attach(void)
+{
+ if (test__start_subtest("cpumap_with_progs"))
+ test_xdp_with_cpumap_helpers();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_link.c b/tools/testing/selftests/bpf/prog_tests/xdp_link.c
new file mode 100644
index 000000000000..6f814999b395
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_link.c
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <uapi/linux/if_link.h>
+#include <test_progs.h>
+#include "test_xdp_link.skel.h"
+
+#define IFINDEX_LO 1
+
+void test_xdp_link(void)
+{
+ __u32 duration = 0, id1, id2, id0 = 0, prog_fd1, prog_fd2, err;
+ DECLARE_LIBBPF_OPTS(bpf_xdp_set_link_opts, opts, .old_fd = -1);
+ struct test_xdp_link *skel1 = NULL, *skel2 = NULL;
+ struct bpf_link_info link_info;
+ struct bpf_prog_info prog_info;
+ struct bpf_link *link;
+ __u32 link_info_len = sizeof(link_info);
+ __u32 prog_info_len = sizeof(prog_info);
+
+ skel1 = test_xdp_link__open_and_load();
+ if (CHECK(!skel1, "skel_load", "skeleton open and load failed\n"))
+ goto cleanup;
+ prog_fd1 = bpf_program__fd(skel1->progs.xdp_handler);
+
+ skel2 = test_xdp_link__open_and_load();
+ if (CHECK(!skel2, "skel_load", "skeleton open and load failed\n"))
+ goto cleanup;
+ prog_fd2 = bpf_program__fd(skel2->progs.xdp_handler);
+
+ memset(&prog_info, 0, sizeof(prog_info));
+ err = bpf_obj_get_info_by_fd(prog_fd1, &prog_info, &prog_info_len);
+ if (CHECK(err, "fd_info1", "failed %d\n", -errno))
+ goto cleanup;
+ id1 = prog_info.id;
+
+ memset(&prog_info, 0, sizeof(prog_info));
+ err = bpf_obj_get_info_by_fd(prog_fd2, &prog_info, &prog_info_len);
+ if (CHECK(err, "fd_info2", "failed %d\n", -errno))
+ goto cleanup;
+ id2 = prog_info.id;
+
+ /* set initial prog attachment */
+ err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, prog_fd1, XDP_FLAGS_REPLACE, &opts);
+ if (CHECK(err, "fd_attach", "initial prog attach failed: %d\n", err))
+ goto cleanup;
+
+ /* validate prog ID */
+ err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
+ CHECK(err || id0 != id1, "id1_check",
+ "loaded prog id %u != id1 %u, err %d", id0, id1, err);
+
+ /* BPF link is not allowed to replace prog attachment */
+ link = bpf_program__attach_xdp(skel1->progs.xdp_handler, IFINDEX_LO);
+ if (CHECK(!IS_ERR(link), "link_attach_fail", "unexpected success\n")) {
+ bpf_link__destroy(link);
+ /* best-effort detach prog */
+ opts.old_fd = prog_fd1;
+ bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, XDP_FLAGS_REPLACE, &opts);
+ goto cleanup;
+ }
+
+ /* detach BPF program */
+ opts.old_fd = prog_fd1;
+ err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, XDP_FLAGS_REPLACE, &opts);
+ if (CHECK(err, "prog_detach", "failed %d\n", err))
+ goto cleanup;
+
+ /* now BPF link should attach successfully */
+ link = bpf_program__attach_xdp(skel1->progs.xdp_handler, IFINDEX_LO);
+ if (CHECK(IS_ERR(link), "link_attach", "failed: %ld\n", PTR_ERR(link)))
+ goto cleanup;
+ skel1->links.xdp_handler = link;
+
+ /* validate prog ID */
+ err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
+ if (CHECK(err || id0 != id1, "id1_check",
+ "loaded prog id %u != id1 %u, err %d", id0, id1, err))
+ goto cleanup;
+
+ /* BPF prog attach is not allowed to replace BPF link */
+ opts.old_fd = prog_fd1;
+ err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, prog_fd2, XDP_FLAGS_REPLACE, &opts);
+ if (CHECK(!err, "prog_attach_fail", "unexpected success\n"))
+ goto cleanup;
+
+ /* Can't force-update when BPF link is active */
+ err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd2, 0);
+ if (CHECK(!err, "prog_update_fail", "unexpected success\n"))
+ goto cleanup;
+
+ /* Can't force-detach when BPF link is active */
+ err = bpf_set_link_xdp_fd(IFINDEX_LO, -1, 0);
+ if (CHECK(!err, "prog_detach_fail", "unexpected success\n"))
+ goto cleanup;
+
+ /* BPF link is not allowed to replace another BPF link */
+ link = bpf_program__attach_xdp(skel2->progs.xdp_handler, IFINDEX_LO);
+ if (CHECK(!IS_ERR(link), "link_attach_fail", "unexpected success\n")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ bpf_link__destroy(skel1->links.xdp_handler);
+ skel1->links.xdp_handler = NULL;
+
+ /* new link attach should succeed */
+ link = bpf_program__attach_xdp(skel2->progs.xdp_handler, IFINDEX_LO);
+ if (CHECK(IS_ERR(link), "link_attach", "failed: %ld\n", PTR_ERR(link)))
+ goto cleanup;
+ skel2->links.xdp_handler = link;
+
+ err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
+ if (CHECK(err || id0 != id2, "id2_check",
+ "loaded prog id %u != id2 %u, err %d", id0, id1, err))
+ goto cleanup;
+
+ /* updating program under active BPF link works as expected */
+ err = bpf_link__update_program(link, skel1->progs.xdp_handler);
+ if (CHECK(err, "link_upd", "failed: %d\n", err))
+ goto cleanup;
+
+ memset(&link_info, 0, sizeof(link_info));
+ err = bpf_obj_get_info_by_fd(bpf_link__fd(link), &link_info, &link_info_len);
+ if (CHECK(err, "link_info", "failed: %d\n", err))
+ goto cleanup;
+
+ CHECK(link_info.type != BPF_LINK_TYPE_XDP, "link_type",
+ "got %u != exp %u\n", link_info.type, BPF_LINK_TYPE_XDP);
+ CHECK(link_info.prog_id != id1, "link_prog_id",
+ "got %u != exp %u\n", link_info.prog_id, id1);
+ CHECK(link_info.xdp.ifindex != IFINDEX_LO, "link_ifindex",
+ "got %u != exp %u\n", link_info.xdp.ifindex, IFINDEX_LO);
+
+ err = bpf_link__detach(link);
+ if (CHECK(err, "link_detach", "failed %d\n", err))
+ goto cleanup;
+
+ memset(&link_info, 0, sizeof(link_info));
+ err = bpf_obj_get_info_by_fd(bpf_link__fd(link), &link_info, &link_info_len);
+ if (CHECK(err, "link_info", "failed: %d\n", err))
+ goto cleanup;
+ CHECK(link_info.prog_id != id1, "link_prog_id",
+ "got %u != exp %u\n", link_info.prog_id, id1);
+ /* ifindex should be zeroed out */
+ CHECK(link_info.xdp.ifindex != 0, "link_ifindex",
+ "got %u != exp %u\n", link_info.xdp.ifindex, 0);
+
+cleanup:
+ test_xdp_link__destroy(skel1);
+ test_xdp_link__destroy(skel2);
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter.h b/tools/testing/selftests/bpf/progs/bpf_iter.h
new file mode 100644
index 000000000000..c196280df90d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter.h
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2020 Facebook */
+/* "undefine" structs in vmlinux.h, because we "override" them below */
+#define bpf_iter_meta bpf_iter_meta___not_used
+#define bpf_iter__bpf_map bpf_iter__bpf_map___not_used
+#define bpf_iter__ipv6_route bpf_iter__ipv6_route___not_used
+#define bpf_iter__netlink bpf_iter__netlink___not_used
+#define bpf_iter__task bpf_iter__task___not_used
+#define bpf_iter__task_file bpf_iter__task_file___not_used
+#define bpf_iter__tcp bpf_iter__tcp___not_used
+#define tcp6_sock tcp6_sock___not_used
+#define bpf_iter__udp bpf_iter__udp___not_used
+#define udp6_sock udp6_sock___not_used
+#define bpf_iter__bpf_map_elem bpf_iter__bpf_map_elem___not_used
+#define bpf_iter__bpf_sk_storage_map bpf_iter__bpf_sk_storage_map___not_used
+#include "vmlinux.h"
+#undef bpf_iter_meta
+#undef bpf_iter__bpf_map
+#undef bpf_iter__ipv6_route
+#undef bpf_iter__netlink
+#undef bpf_iter__task
+#undef bpf_iter__task_file
+#undef bpf_iter__tcp
+#undef tcp6_sock
+#undef bpf_iter__udp
+#undef udp6_sock
+#undef bpf_iter__bpf_map_elem
+#undef bpf_iter__bpf_sk_storage_map
+
+struct bpf_iter_meta {
+ struct seq_file *seq;
+ __u64 session_id;
+ __u64 seq_num;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__ipv6_route {
+ struct bpf_iter_meta *meta;
+ struct fib6_info *rt;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__netlink {
+ struct bpf_iter_meta *meta;
+ struct netlink_sock *sk;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__task {
+ struct bpf_iter_meta *meta;
+ struct task_struct *task;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__task_file {
+ struct bpf_iter_meta *meta;
+ struct task_struct *task;
+ __u32 fd;
+ struct file *file;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__bpf_map {
+ struct bpf_iter_meta *meta;
+ struct bpf_map *map;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__tcp {
+ struct bpf_iter_meta *meta;
+ struct sock_common *sk_common;
+ uid_t uid;
+} __attribute__((preserve_access_index));
+
+struct tcp6_sock {
+ struct tcp_sock tcp;
+ struct ipv6_pinfo inet6;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__udp {
+ struct bpf_iter_meta *meta;
+ struct udp_sock *udp_sk;
+ uid_t uid __attribute__((aligned(8)));
+ int bucket __attribute__((aligned(8)));
+} __attribute__((preserve_access_index));
+
+struct udp6_sock {
+ struct udp_sock udp;
+ struct ipv6_pinfo inet6;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__bpf_map_elem {
+ struct bpf_iter_meta *meta;
+ struct bpf_map *map;
+ void *key;
+ void *value;
+};
+
+struct bpf_iter__bpf_sk_storage_map {
+ struct bpf_iter_meta *meta;
+ struct bpf_map *map;
+ struct sock *sk;
+ void *value;
+};
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c
new file mode 100644
index 000000000000..6286023fd62b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct key_t {
+ int a;
+ int b;
+ int c;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 3);
+ __type(key, __u32);
+ __type(value, __u64);
+} arraymap1 SEC(".maps");
+
+__u32 key_sum = 0;
+__u64 val_sum = 0;
+
+SEC("iter/bpf_map_elem")
+int dump_bpf_array_map(struct bpf_iter__bpf_map_elem *ctx)
+{
+ __u32 *key = ctx->key;
+ __u64 *val = ctx->value;
+
+ if (key == (void *)0 || val == (void *)0)
+ return 0;
+
+ bpf_seq_write(ctx->meta->seq, key, sizeof(__u32));
+ bpf_seq_write(ctx->meta->seq, val, sizeof(__u64));
+ key_sum += *key;
+ val_sum += *val;
+ *val = *key;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c
new file mode 100644
index 000000000000..07ddbfdbcab7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct key_t {
+ int a;
+ int b;
+ int c;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 3);
+ __type(key, struct key_t);
+ __type(value, __u64);
+} hashmap1 SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 3);
+ __type(key, __u64);
+ __type(value, __u64);
+} hashmap2 SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 3);
+ __type(key, struct key_t);
+ __type(value, __u32);
+} hashmap3 SEC(".maps");
+
+/* will set before prog run */
+bool in_test_mode = 0;
+
+/* will collect results during prog run */
+__u32 key_sum_a = 0, key_sum_b = 0, key_sum_c = 0;
+__u64 val_sum = 0;
+
+SEC("iter/bpf_map_elem")
+int dump_bpf_hash_map(struct bpf_iter__bpf_map_elem *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ __u32 seq_num = ctx->meta->seq_num;
+ struct bpf_map *map = ctx->map;
+ struct key_t *key = ctx->key;
+ __u64 *val = ctx->value;
+
+ if (in_test_mode) {
+ /* test mode is used by selftests to
+ * test functionality of bpf_hash_map iter.
+ *
+ * the above hashmap1 will have correct size
+ * and will be accepted, hashmap2 and hashmap3
+ * should be rejected due to smaller key/value
+ * size.
+ */
+ if (key == (void *)0 || val == (void *)0)
+ return 0;
+
+ key_sum_a += key->a;
+ key_sum_b += key->b;
+ key_sum_c += key->c;
+ val_sum += *val;
+ return 0;
+ }
+
+ /* non-test mode, the map is prepared with the
+ * below bpftool command sequence:
+ * bpftool map create /sys/fs/bpf/m1 type hash \
+ * key 12 value 8 entries 3 name map1
+ * bpftool map update id 77 key 0 0 0 1 0 0 0 0 0 0 0 1 \
+ * value 0 0 0 1 0 0 0 1
+ * bpftool map update id 77 key 0 0 0 1 0 0 0 0 0 0 0 2 \
+ * value 0 0 0 1 0 0 0 2
+ * The bpftool iter command line:
+ * bpftool iter pin ./bpf_iter_bpf_hash_map.o /sys/fs/bpf/p1 \
+ * map id 77
+ * The below output will be:
+ * map dump starts
+ * 77: (1000000 0 2000000) (200000001000000)
+ * 77: (1000000 0 1000000) (100000001000000)
+ * map dump ends
+ */
+ if (seq_num == 0)
+ BPF_SEQ_PRINTF(seq, "map dump starts\n");
+
+ if (key == (void *)0 || val == (void *)0) {
+ BPF_SEQ_PRINTF(seq, "map dump ends\n");
+ return 0;
+ }
+
+ BPF_SEQ_PRINTF(seq, "%d: (%x %d %x) (%llx)\n", map->id,
+ key->a, key->b, key->c, *val);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
index b57bd6fef208..08651b23edba 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
@@ -1,27 +1,11 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
-/* "undefine" structs in vmlinux.h, because we "override" them below */
-#define bpf_iter_meta bpf_iter_meta___not_used
-#define bpf_iter__bpf_map bpf_iter__bpf_map___not_used
-#include "vmlinux.h"
-#undef bpf_iter_meta
-#undef bpf_iter__bpf_map
+#include "bpf_iter.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
-struct bpf_iter_meta {
- struct seq_file *seq;
- __u64 session_id;
- __u64 seq_num;
-} __attribute__((preserve_access_index));
-
-struct bpf_iter__bpf_map {
- struct bpf_iter_meta *meta;
- struct bpf_map *map;
-} __attribute__((preserve_access_index));
-
SEC("iter/bpf_map")
int dump_bpf_map(struct bpf_iter__bpf_map *ctx)
{
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c
new file mode 100644
index 000000000000..85fa710fad90
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct key_t {
+ int a;
+ int b;
+ int c;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(max_entries, 3);
+ __type(key, __u32);
+ __type(value, __u32);
+} arraymap1 SEC(".maps");
+
+/* will set before prog run */
+volatile const __u32 num_cpus = 0;
+
+__u32 key_sum = 0, val_sum = 0;
+
+SEC("iter/bpf_map_elem")
+int dump_bpf_percpu_array_map(struct bpf_iter__bpf_map_elem *ctx)
+{
+ __u32 *key = ctx->key;
+ void *pptr = ctx->value;
+ __u32 step;
+ int i;
+
+ if (key == (void *)0 || pptr == (void *)0)
+ return 0;
+
+ key_sum += *key;
+
+ step = 8;
+ for (i = 0; i < num_cpus; i++) {
+ val_sum += *(__u32 *)pptr;
+ pptr += step;
+ }
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_hash_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_hash_map.c
new file mode 100644
index 000000000000..feaaa2b89c57
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_hash_map.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct key_t {
+ int a;
+ int b;
+ int c;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __uint(max_entries, 3);
+ __type(key, struct key_t);
+ __type(value, __u32);
+} hashmap1 SEC(".maps");
+
+/* will set before prog run */
+volatile const __u32 num_cpus = 0;
+
+/* will collect results during prog run */
+__u32 key_sum_a = 0, key_sum_b = 0, key_sum_c = 0;
+__u32 val_sum = 0;
+
+SEC("iter/bpf_map_elem")
+int dump_bpf_percpu_hash_map(struct bpf_iter__bpf_map_elem *ctx)
+{
+ struct key_t *key = ctx->key;
+ void *pptr = ctx->value;
+ __u32 step;
+ int i;
+
+ if (key == (void *)0 || pptr == (void *)0)
+ return 0;
+
+ key_sum_a += key->a;
+ key_sum_b += key->b;
+ key_sum_c += key->c;
+
+ step = 8;
+ for (i = 0; i < num_cpus; i++) {
+ val_sum += *(__u32 *)pptr;
+ pptr += step;
+ }
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_map.c
new file mode 100644
index 000000000000..6b70ccaba301
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_map.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, int);
+} sk_stg_map SEC(".maps");
+
+__u32 val_sum = 0;
+__u32 ipv6_sk_count = 0;
+
+SEC("iter/bpf_sk_storage_map")
+int dump_bpf_sk_storage_map(struct bpf_iter__bpf_sk_storage_map *ctx)
+{
+ struct sock *sk = ctx->sk;
+ __u32 *val = ctx->value;
+
+ if (sk == (void *)0 || val == (void *)0)
+ return 0;
+
+ if (sk->sk_family == AF_INET6)
+ ipv6_sk_count++;
+
+ val_sum += *val;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
index c8e9ca74c87b..d58d9f1642b5 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
@@ -1,35 +1,14 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
-/* "undefine" structs in vmlinux.h, because we "override" them below */
-#define bpf_iter_meta bpf_iter_meta___not_used
-#define bpf_iter__ipv6_route bpf_iter__ipv6_route___not_used
-#include "vmlinux.h"
-#undef bpf_iter_meta
-#undef bpf_iter__ipv6_route
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
-struct bpf_iter_meta {
- struct seq_file *seq;
- __u64 session_id;
- __u64 seq_num;
-} __attribute__((preserve_access_index));
-
-struct bpf_iter__ipv6_route {
- struct bpf_iter_meta *meta;
- struct fib6_info *rt;
-} __attribute__((preserve_access_index));
-
char _license[] SEC("license") = "GPL";
extern bool CONFIG_IPV6_SUBTREES __kconfig __weak;
-#define RTF_GATEWAY 0x0002
-#define IFNAMSIZ 16
-#define fib_nh_gw_family nh_common.nhc_gw_family
-#define fib_nh_gw6 nh_common.nhc_gw.ipv6
-#define fib_nh_dev nh_common.nhc_dev
-
SEC("iter/ipv6_route")
int dump_ipv6_route(struct bpf_iter__ipv6_route *ctx)
{
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
index 75ecf956a2df..95989f4c99b5 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
@@ -1,30 +1,12 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
-/* "undefine" structs in vmlinux.h, because we "override" them below */
-#define bpf_iter_meta bpf_iter_meta___not_used
-#define bpf_iter__netlink bpf_iter__netlink___not_used
-#include "vmlinux.h"
-#undef bpf_iter_meta
-#undef bpf_iter__netlink
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
-#define sk_rmem_alloc sk_backlog.rmem_alloc
-#define sk_refcnt __sk_common.skc_refcnt
-
-struct bpf_iter_meta {
- struct seq_file *seq;
- __u64 session_id;
- __u64 seq_num;
-} __attribute__((preserve_access_index));
-
-struct bpf_iter__netlink {
- struct bpf_iter_meta *meta;
- struct netlink_sock *sk;
-} __attribute__((preserve_access_index));
-
static __attribute__((noinline)) struct inode *SOCK_INODE(struct socket *socket)
{
return &container_of(socket, struct socket_alloc, socket)->vfs_inode;
@@ -54,10 +36,10 @@ int dump_netlink(struct bpf_iter__netlink *ctx)
if (!nlk->groups) {
group = 0;
} else {
- /* FIXME: temporary use bpf_probe_read here, needs
+ /* FIXME: temporary use bpf_probe_read_kernel here, needs
* verifier support to do direct access.
*/
- bpf_probe_read(&group, sizeof(group), &nlk->groups[0]);
+ bpf_probe_read_kernel(&group, sizeof(group), &nlk->groups[0]);
}
BPF_SEQ_PRINTF(seq, "%-10u %08x %-8d %-8d %-5d %-8d ",
nlk->portid, (u32)group,
@@ -74,7 +56,7 @@ int dump_netlink(struct bpf_iter__netlink *ctx)
* with current verifier.
*/
inode = SOCK_INODE(sk);
- bpf_probe_read(&ino, sizeof(ino), &inode->i_ino);
+ bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
}
BPF_SEQ_PRINTF(seq, "%-8u %-8lu\n", s->sk_drops.counter, ino);
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task.c b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
index ee754021f98e..4983087852a0 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
@@ -1,27 +1,11 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
-/* "undefine" structs in vmlinux.h, because we "override" them below */
-#define bpf_iter_meta bpf_iter_meta___not_used
-#define bpf_iter__task bpf_iter__task___not_used
-#include "vmlinux.h"
-#undef bpf_iter_meta
-#undef bpf_iter__task
+#include "bpf_iter.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
-struct bpf_iter_meta {
- struct seq_file *seq;
- __u64 session_id;
- __u64 seq_num;
-} __attribute__((preserve_access_index));
-
-struct bpf_iter__task {
- struct bpf_iter_meta *meta;
- struct task_struct *task;
-} __attribute__((preserve_access_index));
-
SEC("iter/task")
int dump_task(struct bpf_iter__task *ctx)
{
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
index 0f0ec3db20ba..8b787baa2654 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
@@ -1,29 +1,11 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
-/* "undefine" structs in vmlinux.h, because we "override" them below */
-#define bpf_iter_meta bpf_iter_meta___not_used
-#define bpf_iter__task_file bpf_iter__task_file___not_used
-#include "vmlinux.h"
-#undef bpf_iter_meta
-#undef bpf_iter__task_file
+#include "bpf_iter.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
-struct bpf_iter_meta {
- struct seq_file *seq;
- __u64 session_id;
- __u64 seq_num;
-} __attribute__((preserve_access_index));
-
-struct bpf_iter__task_file {
- struct bpf_iter_meta *meta;
- struct task_struct *task;
- __u32 fd;
- struct file *file;
-} __attribute__((preserve_access_index));
-
SEC("iter/task_file")
int dump_task_file(struct bpf_iter__task_file *ctx)
{
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
new file mode 100644
index 000000000000..50e59a2e142e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+#define MAX_STACK_TRACE_DEPTH 64
+unsigned long entries[MAX_STACK_TRACE_DEPTH] = {};
+#define SIZE_OF_ULONG (sizeof(unsigned long))
+
+SEC("iter/task")
+int dump_task_stack(struct bpf_iter__task *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ struct task_struct *task = ctx->task;
+ long i, retlen;
+
+ if (task == (void *)0)
+ return 0;
+
+ retlen = bpf_get_task_stack(task, entries,
+ MAX_STACK_TRACE_DEPTH * SIZE_OF_ULONG, 0);
+ if (retlen < 0)
+ return 0;
+
+ BPF_SEQ_PRINTF(seq, "pid: %8u num_entries: %8u\n", task->pid,
+ retlen / SIZE_OF_ULONG);
+ for (i = 0; i < MAX_STACK_TRACE_DEPTH; i++) {
+ if (retlen > i * SIZE_OF_ULONG)
+ BPF_SEQ_PRINTF(seq, "[<0>] %pB\n", (void *)entries[i]);
+ }
+ BPF_SEQ_PRINTF(seq, "\n");
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
new file mode 100644
index 000000000000..54380c5e1069
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
@@ -0,0 +1,234 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_endian.h>
+
+char _license[] SEC("license") = "GPL";
+
+static int hlist_unhashed_lockless(const struct hlist_node *h)
+{
+ return !(h->pprev);
+}
+
+static int timer_pending(const struct timer_list * timer)
+{
+ return !hlist_unhashed_lockless(&timer->entry);
+}
+
+extern unsigned CONFIG_HZ __kconfig;
+
+#define USER_HZ 100
+#define NSEC_PER_SEC 1000000000ULL
+static clock_t jiffies_to_clock_t(unsigned long x)
+{
+ /* The implementation here tailored to a particular
+ * setting of USER_HZ.
+ */
+ u64 tick_nsec = (NSEC_PER_SEC + CONFIG_HZ/2) / CONFIG_HZ;
+ u64 user_hz_nsec = NSEC_PER_SEC / USER_HZ;
+
+ if ((tick_nsec % user_hz_nsec) == 0) {
+ if (CONFIG_HZ < USER_HZ)
+ return x * (USER_HZ / CONFIG_HZ);
+ else
+ return x / (CONFIG_HZ / USER_HZ);
+ }
+ return x * tick_nsec/user_hz_nsec;
+}
+
+static clock_t jiffies_delta_to_clock_t(long delta)
+{
+ if (delta <= 0)
+ return 0;
+
+ return jiffies_to_clock_t(delta);
+}
+
+static long sock_i_ino(const struct sock *sk)
+{
+ const struct socket *sk_socket = sk->sk_socket;
+ const struct inode *inode;
+ unsigned long ino;
+
+ if (!sk_socket)
+ return 0;
+
+ inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
+ bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
+ return ino;
+}
+
+static bool
+inet_csk_in_pingpong_mode(const struct inet_connection_sock *icsk)
+{
+ return icsk->icsk_ack.pingpong >= TCP_PINGPONG_THRESH;
+}
+
+static bool tcp_in_initial_slowstart(const struct tcp_sock *tcp)
+{
+ return tcp->snd_ssthresh >= TCP_INFINITE_SSTHRESH;
+}
+
+static int dump_tcp_sock(struct seq_file *seq, struct tcp_sock *tp,
+ uid_t uid, __u32 seq_num)
+{
+ const struct inet_connection_sock *icsk;
+ const struct fastopen_queue *fastopenq;
+ const struct inet_sock *inet;
+ unsigned long timer_expires;
+ const struct sock *sp;
+ __u16 destp, srcp;
+ __be32 dest, src;
+ int timer_active;
+ int rx_queue;
+ int state;
+
+ icsk = &tp->inet_conn;
+ inet = &icsk->icsk_inet;
+ sp = &inet->sk;
+ fastopenq = &icsk->icsk_accept_queue.fastopenq;
+
+ dest = inet->inet_daddr;
+ src = inet->inet_rcv_saddr;
+ destp = bpf_ntohs(inet->inet_dport);
+ srcp = bpf_ntohs(inet->inet_sport);
+
+ if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
+ icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
+ icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
+ timer_active = 1;
+ timer_expires = icsk->icsk_timeout;
+ } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
+ timer_active = 4;
+ timer_expires = icsk->icsk_timeout;
+ } else if (timer_pending(&sp->sk_timer)) {
+ timer_active = 2;
+ timer_expires = sp->sk_timer.expires;
+ } else {
+ timer_active = 0;
+ timer_expires = bpf_jiffies64();
+ }
+
+ state = sp->sk_state;
+ if (state == TCP_LISTEN) {
+ rx_queue = sp->sk_ack_backlog;
+ } else {
+ rx_queue = tp->rcv_nxt - tp->copied_seq;
+ if (rx_queue < 0)
+ rx_queue = 0;
+ }
+
+ BPF_SEQ_PRINTF(seq, "%4d: %08X:%04X %08X:%04X ",
+ seq_num, src, srcp, destp, destp);
+ BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d ",
+ state,
+ tp->write_seq - tp->snd_una, rx_queue,
+ timer_active,
+ jiffies_delta_to_clock_t(timer_expires - bpf_jiffies64()),
+ icsk->icsk_retransmits, uid,
+ icsk->icsk_probes_out,
+ sock_i_ino(sp),
+ sp->sk_refcnt.refs.counter);
+ BPF_SEQ_PRINTF(seq, "%pK %lu %lu %u %u %d\n",
+ tp,
+ jiffies_to_clock_t(icsk->icsk_rto),
+ jiffies_to_clock_t(icsk->icsk_ack.ato),
+ (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(icsk),
+ tp->snd_cwnd,
+ state == TCP_LISTEN ? fastopenq->max_qlen
+ : (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
+ );
+
+ return 0;
+}
+
+static int dump_tw_sock(struct seq_file *seq, struct tcp_timewait_sock *ttw,
+ uid_t uid, __u32 seq_num)
+{
+ struct inet_timewait_sock *tw = &ttw->tw_sk;
+ __u16 destp, srcp;
+ __be32 dest, src;
+ long delta;
+
+ delta = tw->tw_timer.expires - bpf_jiffies64();
+ dest = tw->tw_daddr;
+ src = tw->tw_rcv_saddr;
+ destp = bpf_ntohs(tw->tw_dport);
+ srcp = bpf_ntohs(tw->tw_sport);
+
+ BPF_SEQ_PRINTF(seq, "%4d: %08X:%04X %08X:%04X ",
+ seq_num, src, srcp, dest, destp);
+
+ BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
+ tw->tw_substate, 0, 0,
+ 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
+ tw->tw_refcnt.refs.counter, tw);
+
+ return 0;
+}
+
+static int dump_req_sock(struct seq_file *seq, struct tcp_request_sock *treq,
+ uid_t uid, __u32 seq_num)
+{
+ struct inet_request_sock *irsk = &treq->req;
+ struct request_sock *req = &irsk->req;
+ long ttd;
+
+ ttd = req->rsk_timer.expires - bpf_jiffies64();
+
+ if (ttd < 0)
+ ttd = 0;
+
+ BPF_SEQ_PRINTF(seq, "%4d: %08X:%04X %08X:%04X ",
+ seq_num, irsk->ir_loc_addr,
+ irsk->ir_num, irsk->ir_rmt_addr,
+ bpf_ntohs(irsk->ir_rmt_port));
+ BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
+ TCP_SYN_RECV, 0, 0, 1, jiffies_to_clock_t(ttd),
+ req->num_timeout, uid, 0, 0, 0, req);
+
+ return 0;
+}
+
+SEC("iter/tcp")
+int dump_tcp4(struct bpf_iter__tcp *ctx)
+{
+ struct sock_common *sk_common = ctx->sk_common;
+ struct seq_file *seq = ctx->meta->seq;
+ struct tcp_timewait_sock *tw;
+ struct tcp_request_sock *req;
+ struct tcp_sock *tp;
+ uid_t uid = ctx->uid;
+ __u32 seq_num;
+
+ if (sk_common == (void *)0)
+ return 0;
+
+ seq_num = ctx->meta->seq_num;
+ if (seq_num == 0)
+ BPF_SEQ_PRINTF(seq, " sl "
+ "local_address "
+ "rem_address "
+ "st tx_queue rx_queue tr tm->when retrnsmt"
+ " uid timeout inode\n");
+
+ if (sk_common->skc_family != AF_INET)
+ return 0;
+
+ tp = bpf_skc_to_tcp_sock(sk_common);
+ if (tp)
+ return dump_tcp_sock(seq, tp, uid, seq_num);
+
+ tw = bpf_skc_to_tcp_timewait_sock(sk_common);
+ if (tw)
+ return dump_tw_sock(seq, tw, uid, seq_num);
+
+ req = bpf_skc_to_tcp_request_sock(sk_common);
+ if (req)
+ return dump_req_sock(seq, req, uid, seq_num);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
new file mode 100644
index 000000000000..b4fbddfa4e10
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
@@ -0,0 +1,250 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_endian.h>
+
+char _license[] SEC("license") = "GPL";
+
+static int hlist_unhashed_lockless(const struct hlist_node *h)
+{
+ return !(h->pprev);
+}
+
+static int timer_pending(const struct timer_list * timer)
+{
+ return !hlist_unhashed_lockless(&timer->entry);
+}
+
+extern unsigned CONFIG_HZ __kconfig;
+
+#define USER_HZ 100
+#define NSEC_PER_SEC 1000000000ULL
+static clock_t jiffies_to_clock_t(unsigned long x)
+{
+ /* The implementation here tailored to a particular
+ * setting of USER_HZ.
+ */
+ u64 tick_nsec = (NSEC_PER_SEC + CONFIG_HZ/2) / CONFIG_HZ;
+ u64 user_hz_nsec = NSEC_PER_SEC / USER_HZ;
+
+ if ((tick_nsec % user_hz_nsec) == 0) {
+ if (CONFIG_HZ < USER_HZ)
+ return x * (USER_HZ / CONFIG_HZ);
+ else
+ return x / (CONFIG_HZ / USER_HZ);
+ }
+ return x * tick_nsec/user_hz_nsec;
+}
+
+static clock_t jiffies_delta_to_clock_t(long delta)
+{
+ if (delta <= 0)
+ return 0;
+
+ return jiffies_to_clock_t(delta);
+}
+
+static long sock_i_ino(const struct sock *sk)
+{
+ const struct socket *sk_socket = sk->sk_socket;
+ const struct inode *inode;
+ unsigned long ino;
+
+ if (!sk_socket)
+ return 0;
+
+ inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
+ bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
+ return ino;
+}
+
+static bool
+inet_csk_in_pingpong_mode(const struct inet_connection_sock *icsk)
+{
+ return icsk->icsk_ack.pingpong >= TCP_PINGPONG_THRESH;
+}
+
+static bool tcp_in_initial_slowstart(const struct tcp_sock *tcp)
+{
+ return tcp->snd_ssthresh >= TCP_INFINITE_SSTHRESH;
+}
+
+static int dump_tcp6_sock(struct seq_file *seq, struct tcp6_sock *tp,
+ uid_t uid, __u32 seq_num)
+{
+ const struct inet_connection_sock *icsk;
+ const struct fastopen_queue *fastopenq;
+ const struct in6_addr *dest, *src;
+ const struct inet_sock *inet;
+ unsigned long timer_expires;
+ const struct sock *sp;
+ __u16 destp, srcp;
+ int timer_active;
+ int rx_queue;
+ int state;
+
+ icsk = &tp->tcp.inet_conn;
+ inet = &icsk->icsk_inet;
+ sp = &inet->sk;
+ fastopenq = &icsk->icsk_accept_queue.fastopenq;
+
+ dest = &sp->sk_v6_daddr;
+ src = &sp->sk_v6_rcv_saddr;
+ destp = bpf_ntohs(inet->inet_dport);
+ srcp = bpf_ntohs(inet->inet_sport);
+
+ if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
+ icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
+ icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
+ timer_active = 1;
+ timer_expires = icsk->icsk_timeout;
+ } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
+ timer_active = 4;
+ timer_expires = icsk->icsk_timeout;
+ } else if (timer_pending(&sp->sk_timer)) {
+ timer_active = 2;
+ timer_expires = sp->sk_timer.expires;
+ } else {
+ timer_active = 0;
+ timer_expires = bpf_jiffies64();
+ }
+
+ state = sp->sk_state;
+ if (state == TCP_LISTEN) {
+ rx_queue = sp->sk_ack_backlog;
+ } else {
+ rx_queue = tp->tcp.rcv_nxt - tp->tcp.copied_seq;
+ if (rx_queue < 0)
+ rx_queue = 0;
+ }
+
+ BPF_SEQ_PRINTF(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X ",
+ seq_num,
+ src->s6_addr32[0], src->s6_addr32[1],
+ src->s6_addr32[2], src->s6_addr32[3], srcp,
+ dest->s6_addr32[0], dest->s6_addr32[1],
+ dest->s6_addr32[2], dest->s6_addr32[3], destp);
+ BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d ",
+ state,
+ tp->tcp.write_seq - tp->tcp.snd_una, rx_queue,
+ timer_active,
+ jiffies_delta_to_clock_t(timer_expires - bpf_jiffies64()),
+ icsk->icsk_retransmits, uid,
+ icsk->icsk_probes_out,
+ sock_i_ino(sp),
+ sp->sk_refcnt.refs.counter);
+ BPF_SEQ_PRINTF(seq, "%pK %lu %lu %u %u %d\n",
+ tp,
+ jiffies_to_clock_t(icsk->icsk_rto),
+ jiffies_to_clock_t(icsk->icsk_ack.ato),
+ (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(icsk),
+ tp->tcp.snd_cwnd,
+ state == TCP_LISTEN ? fastopenq->max_qlen
+ : (tcp_in_initial_slowstart(&tp->tcp) ? -1
+ : tp->tcp.snd_ssthresh)
+ );
+
+ return 0;
+}
+
+static int dump_tw_sock(struct seq_file *seq, struct tcp_timewait_sock *ttw,
+ uid_t uid, __u32 seq_num)
+{
+ struct inet_timewait_sock *tw = &ttw->tw_sk;
+ const struct in6_addr *dest, *src;
+ __u16 destp, srcp;
+ long delta;
+
+ delta = tw->tw_timer.expires - bpf_jiffies64();
+ dest = &tw->tw_v6_daddr;
+ src = &tw->tw_v6_rcv_saddr;
+ destp = bpf_ntohs(tw->tw_dport);
+ srcp = bpf_ntohs(tw->tw_sport);
+
+ BPF_SEQ_PRINTF(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X ",
+ seq_num,
+ src->s6_addr32[0], src->s6_addr32[1],
+ src->s6_addr32[2], src->s6_addr32[3], srcp,
+ dest->s6_addr32[0], dest->s6_addr32[1],
+ dest->s6_addr32[2], dest->s6_addr32[3], destp);
+
+ BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
+ tw->tw_substate, 0, 0,
+ 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
+ tw->tw_refcnt.refs.counter, tw);
+
+ return 0;
+}
+
+static int dump_req_sock(struct seq_file *seq, struct tcp_request_sock *treq,
+ uid_t uid, __u32 seq_num)
+{
+ struct inet_request_sock *irsk = &treq->req;
+ struct request_sock *req = &irsk->req;
+ struct in6_addr *src, *dest;
+ long ttd;
+
+ ttd = req->rsk_timer.expires - bpf_jiffies64();
+ src = &irsk->ir_v6_loc_addr;
+ dest = &irsk->ir_v6_rmt_addr;
+
+ if (ttd < 0)
+ ttd = 0;
+
+ BPF_SEQ_PRINTF(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X ",
+ seq_num,
+ src->s6_addr32[0], src->s6_addr32[1],
+ src->s6_addr32[2], src->s6_addr32[3],
+ irsk->ir_num,
+ dest->s6_addr32[0], dest->s6_addr32[1],
+ dest->s6_addr32[2], dest->s6_addr32[3],
+ bpf_ntohs(irsk->ir_rmt_port));
+ BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
+ TCP_SYN_RECV, 0, 0, 1, jiffies_to_clock_t(ttd),
+ req->num_timeout, uid, 0, 0, 0, req);
+
+ return 0;
+}
+
+SEC("iter/tcp")
+int dump_tcp6(struct bpf_iter__tcp *ctx)
+{
+ struct sock_common *sk_common = ctx->sk_common;
+ struct seq_file *seq = ctx->meta->seq;
+ struct tcp_timewait_sock *tw;
+ struct tcp_request_sock *req;
+ struct tcp6_sock *tp;
+ uid_t uid = ctx->uid;
+ __u32 seq_num;
+
+ if (sk_common == (void *)0)
+ return 0;
+
+ seq_num = ctx->meta->seq_num;
+ if (seq_num == 0)
+ BPF_SEQ_PRINTF(seq, " sl "
+ "local_address "
+ "remote_address "
+ "st tx_queue rx_queue tr tm->when retrnsmt"
+ " uid timeout inode\n");
+
+ if (sk_common->skc_family != AF_INET6)
+ return 0;
+
+ tp = bpf_skc_to_tcp6_sock(sk_common);
+ if (tp)
+ return dump_tcp6_sock(seq, tp, uid, seq_num);
+
+ tw = bpf_skc_to_tcp_timewait_sock(sk_common);
+ if (tw)
+ return dump_tw_sock(seq, tw, uid, seq_num);
+
+ req = bpf_skc_to_tcp_request_sock(sk_common);
+ if (req)
+ return dump_req_sock(seq, req, uid, seq_num);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c
index 13c2c90c835f..2a4647f20c46 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c
@@ -1,25 +1,10 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
-#define bpf_iter_meta bpf_iter_meta___not_used
-#define bpf_iter__task bpf_iter__task___not_used
-#include "vmlinux.h"
-#undef bpf_iter_meta
-#undef bpf_iter__task
+#include "bpf_iter.h"
#include <bpf/bpf_helpers.h>
char _license[] SEC("license") = "GPL";
-struct bpf_iter_meta {
- struct seq_file *seq;
- __u64 session_id;
- __u64 seq_num;
-} __attribute__((preserve_access_index));
-
-struct bpf_iter__task {
- struct bpf_iter_meta *meta;
- struct task_struct *task;
-} __attribute__((preserve_access_index));
-
SEC("iter/task")
int dump_task(struct bpf_iter__task *ctx)
{
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c
index 0aa71b333cf3..ee49493dc125 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c
@@ -1,25 +1,10 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
-#define bpf_iter_meta bpf_iter_meta___not_used
-#define bpf_iter__bpf_map bpf_iter__bpf_map___not_used
-#include "vmlinux.h"
-#undef bpf_iter_meta
-#undef bpf_iter__bpf_map
+#include "bpf_iter.h"
#include <bpf/bpf_helpers.h>
char _license[] SEC("license") = "GPL";
-struct bpf_iter_meta {
- struct seq_file *seq;
- __u64 session_id;
- __u64 seq_num;
-} __attribute__((preserve_access_index));
-
-struct bpf_iter__bpf_map {
- struct bpf_iter_meta *meta;
- struct bpf_map *map;
-} __attribute__((preserve_access_index));
-
__u32 map1_id = 0, map2_id = 0;
__u32 map1_accessed = 0, map2_accessed = 0;
__u64 map1_seqnum = 0, map2_seqnum1 = 0, map2_seqnum2 = 0;
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern5.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern5.c
new file mode 100644
index 000000000000..e3a7575e81d2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern5.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct key_t {
+ int a;
+ int b;
+ int c;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 3);
+ __type(key, struct key_t);
+ __type(value, __u64);
+} hashmap1 SEC(".maps");
+
+__u32 key_sum = 0;
+
+SEC("iter/bpf_map_elem")
+int dump_bpf_hash_map(struct bpf_iter__bpf_map_elem *ctx)
+{
+ void *key = ctx->key;
+
+ if (key == (void *)0)
+ return 0;
+
+ /* out of bound access w.r.t. hashmap1 */
+ key_sum += *(__u32 *)(key + sizeof(struct key_t));
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern6.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern6.c
new file mode 100644
index 000000000000..1c7304f56b1e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern6.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+__u32 value_sum = 0;
+
+SEC("iter/bpf_map_elem")
+int dump_bpf_hash_map(struct bpf_iter__bpf_map_elem *ctx)
+{
+ void *value = ctx->value;
+
+ if (value == (void *)0)
+ return 0;
+
+ /* negative offset, verifier failure. */
+ value_sum += *(__u32 *)(value - 4);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h
index dee1339e6905..d5e3df66ad9a 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h
@@ -1,27 +1,11 @@
/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (c) 2020 Facebook */
-/* "undefine" structs in vmlinux.h, because we "override" them below */
-#define bpf_iter_meta bpf_iter_meta___not_used
-#define bpf_iter__task bpf_iter__task___not_used
-#include "vmlinux.h"
-#undef bpf_iter_meta
-#undef bpf_iter__task
+#include "bpf_iter.h"
#include <bpf/bpf_helpers.h>
char _license[] SEC("license") = "GPL";
int count = 0;
-struct bpf_iter_meta {
- struct seq_file *seq;
- __u64 session_id;
- __u64 seq_num;
-} __attribute__((preserve_access_index));
-
-struct bpf_iter__task {
- struct bpf_iter_meta *meta;
- struct task_struct *task;
-} __attribute__((preserve_access_index));
-
SEC("iter/task")
int dump_task(struct bpf_iter__task *ctx)
{
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
new file mode 100644
index 000000000000..f258583afbbd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_endian.h>
+
+char _license[] SEC("license") = "GPL";
+
+static long sock_i_ino(const struct sock *sk)
+{
+ const struct socket *sk_socket = sk->sk_socket;
+ const struct inode *inode;
+ unsigned long ino;
+
+ if (!sk_socket)
+ return 0;
+
+ inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
+ bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
+ return ino;
+}
+
+SEC("iter/udp")
+int dump_udp4(struct bpf_iter__udp *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ struct udp_sock *udp_sk = ctx->udp_sk;
+ struct inet_sock *inet;
+ __u16 srcp, destp;
+ __be32 dest, src;
+ __u32 seq_num;
+ int rqueue;
+
+ if (udp_sk == (void *)0)
+ return 0;
+
+ seq_num = ctx->meta->seq_num;
+ if (seq_num == 0)
+ BPF_SEQ_PRINTF(seq,
+ " sl local_address rem_address st tx_queue "
+ "rx_queue tr tm->when retrnsmt uid timeout "
+ "inode ref pointer drops\n");
+
+ /* filter out udp6 sockets */
+ inet = &udp_sk->inet;
+ if (inet->sk.sk_family == AF_INET6)
+ return 0;
+
+ inet = &udp_sk->inet;
+ dest = inet->inet_daddr;
+ src = inet->inet_rcv_saddr;
+ srcp = bpf_ntohs(inet->inet_sport);
+ destp = bpf_ntohs(inet->inet_dport);
+ rqueue = inet->sk.sk_rmem_alloc.counter - udp_sk->forward_deficit;
+
+ BPF_SEQ_PRINTF(seq, "%5d: %08X:%04X %08X:%04X ",
+ ctx->bucket, src, srcp, dest, destp);
+
+ BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %u\n",
+ inet->sk.sk_state,
+ inet->sk.sk_wmem_alloc.refs.counter - 1,
+ rqueue,
+ 0, 0L, 0, ctx->uid, 0,
+ sock_i_ino(&inet->sk),
+ inet->sk.sk_refcnt.refs.counter, udp_sk,
+ inet->sk.sk_drops.counter);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c
new file mode 100644
index 000000000000..65f93bb03f0f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_endian.h>
+
+char _license[] SEC("license") = "GPL";
+
+#define IPV6_SEQ_DGRAM_HEADER \
+ " sl " \
+ "local_address " \
+ "remote_address " \
+ "st tx_queue rx_queue tr tm->when retrnsmt" \
+ " uid timeout inode ref pointer drops\n"
+
+static long sock_i_ino(const struct sock *sk)
+{
+ const struct socket *sk_socket = sk->sk_socket;
+ const struct inode *inode;
+ unsigned long ino;
+
+ if (!sk_socket)
+ return 0;
+
+ inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
+ bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
+ return ino;
+}
+
+SEC("iter/udp")
+int dump_udp6(struct bpf_iter__udp *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ struct udp_sock *udp_sk = ctx->udp_sk;
+ const struct in6_addr *dest, *src;
+ struct udp6_sock *udp6_sk;
+ struct inet_sock *inet;
+ __u16 srcp, destp;
+ __u32 seq_num;
+ int rqueue;
+
+ if (udp_sk == (void *)0)
+ return 0;
+
+ seq_num = ctx->meta->seq_num;
+ if (seq_num == 0)
+ BPF_SEQ_PRINTF(seq, IPV6_SEQ_DGRAM_HEADER);
+
+ udp6_sk = bpf_skc_to_udp6_sock(udp_sk);
+ if (udp6_sk == (void *)0)
+ return 0;
+
+ inet = &udp_sk->inet;
+ srcp = bpf_ntohs(inet->inet_sport);
+ destp = bpf_ntohs(inet->inet_dport);
+ rqueue = inet->sk.sk_rmem_alloc.counter - udp_sk->forward_deficit;
+ dest = &inet->sk.sk_v6_daddr;
+ src = &inet->sk.sk_v6_rcv_saddr;
+
+ BPF_SEQ_PRINTF(seq, "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X ",
+ ctx->bucket,
+ src->s6_addr32[0], src->s6_addr32[1],
+ src->s6_addr32[2], src->s6_addr32[3], srcp,
+ dest->s6_addr32[0], dest->s6_addr32[1],
+ dest->s6_addr32[2], dest->s6_addr32[3], destp);
+
+ BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %u\n",
+ inet->sk.sk_state,
+ inet->sk.sk_wmem_alloc.refs.counter - 1,
+ rqueue,
+ 0, 0L, 0, ctx->uid, 0,
+ sock_i_ino(&inet->sk),
+ inet->sk.sk_refcnt.refs.counter, udp_sk,
+ inet->sk.sk_drops.counter);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
new file mode 100644
index 000000000000..01378911252b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#ifndef __BPF_TRACING_NET_H__
+#define __BPF_TRACING_NET_H__
+
+#define AF_INET 2
+#define AF_INET6 10
+
+#define ICSK_TIME_RETRANS 1
+#define ICSK_TIME_PROBE0 3
+#define ICSK_TIME_LOSS_PROBE 5
+#define ICSK_TIME_REO_TIMEOUT 6
+
+#define IFNAMSIZ 16
+
+#define RTF_GATEWAY 0x0002
+
+#define TCP_INFINITE_SSTHRESH 0x7fffffff
+#define TCP_PINGPONG_THRESH 3
+
+#define fib_nh_dev nh_common.nhc_dev
+#define fib_nh_gw_family nh_common.nhc_gw_family
+#define fib_nh_gw6 nh_common.nhc_gw.ipv6
+
+#define inet_daddr sk.__sk_common.skc_daddr
+#define inet_rcv_saddr sk.__sk_common.skc_rcv_saddr
+#define inet_dport sk.__sk_common.skc_dport
+
+#define ir_loc_addr req.__req_common.skc_rcv_saddr
+#define ir_num req.__req_common.skc_num
+#define ir_rmt_addr req.__req_common.skc_daddr
+#define ir_rmt_port req.__req_common.skc_dport
+#define ir_v6_rmt_addr req.__req_common.skc_v6_daddr
+#define ir_v6_loc_addr req.__req_common.skc_v6_rcv_saddr
+
+#define sk_family __sk_common.skc_family
+#define sk_rmem_alloc sk_backlog.rmem_alloc
+#define sk_refcnt __sk_common.skc_refcnt
+#define sk_state __sk_common.skc_state
+#define sk_v6_daddr __sk_common.skc_v6_daddr
+#define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr
+
+#define s6_addr32 in6_u.u6_addr32
+
+#define tw_daddr __tw_common.skc_daddr
+#define tw_rcv_saddr __tw_common.skc_rcv_saddr
+#define tw_dport __tw_common.skc_dport
+#define tw_refcnt __tw_common.skc_refcnt
+#define tw_v6_daddr __tw_common.skc_v6_daddr
+#define tw_v6_rcv_saddr __tw_common.skc_v6_rcv_saddr
+
+#endif
diff --git a/tools/testing/selftests/bpf/progs/btf_data.c b/tools/testing/selftests/bpf/progs/btf_data.c
new file mode 100644
index 000000000000..baa525275bde
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf_data.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+struct S {
+ int a;
+ int b;
+ int c;
+};
+
+union U {
+ int a;
+ int b;
+ int c;
+};
+
+struct S1 {
+ int a;
+ int b;
+ int c;
+};
+
+union U1 {
+ int a;
+ int b;
+ int c;
+};
+
+typedef int T;
+typedef int S;
+typedef int U;
+typedef int T1;
+typedef int S1;
+typedef int U1;
+
+struct root_struct {
+ S m_1;
+ T m_2;
+ U m_3;
+ S1 m_4;
+ T1 m_5;
+ U1 m_6;
+ struct S m_7;
+ struct S1 m_8;
+ union U m_9;
+ union U1 m_10;
+};
+
+int func(struct root_struct *root)
+{
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/cg_storage_multi.h b/tools/testing/selftests/bpf/progs/cg_storage_multi.h
new file mode 100644
index 000000000000..a0778fe7857a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cg_storage_multi.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef __PROGS_CG_STORAGE_MULTI_H
+#define __PROGS_CG_STORAGE_MULTI_H
+
+#include <asm/types.h>
+
+struct cgroup_value {
+ __u32 egress_pkts;
+ __u32 ingress_pkts;
+};
+
+#endif
diff --git a/tools/testing/selftests/bpf/progs/cg_storage_multi_egress_only.c b/tools/testing/selftests/bpf/progs/cg_storage_multi_egress_only.c
new file mode 100644
index 000000000000..44ad46b33539
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cg_storage_multi_egress_only.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2020 Google LLC.
+ */
+
+#include <errno.h>
+#include <linux/bpf.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <bpf/bpf_helpers.h>
+
+#include "progs/cg_storage_multi.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
+ __type(key, struct bpf_cgroup_storage_key);
+ __type(value, struct cgroup_value);
+} cgroup_storage SEC(".maps");
+
+__u32 invocations = 0;
+
+SEC("cgroup_skb/egress")
+int egress(struct __sk_buff *skb)
+{
+ struct cgroup_value *ptr_cg_storage =
+ bpf_get_local_storage(&cgroup_storage, 0);
+
+ __sync_fetch_and_add(&ptr_cg_storage->egress_pkts, 1);
+ __sync_fetch_and_add(&invocations, 1);
+
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/cg_storage_multi_isolated.c b/tools/testing/selftests/bpf/progs/cg_storage_multi_isolated.c
new file mode 100644
index 000000000000..a25373002055
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cg_storage_multi_isolated.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2020 Google LLC.
+ */
+
+#include <errno.h>
+#include <linux/bpf.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <bpf/bpf_helpers.h>
+
+#include "progs/cg_storage_multi.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
+ __type(key, struct bpf_cgroup_storage_key);
+ __type(value, struct cgroup_value);
+} cgroup_storage SEC(".maps");
+
+__u32 invocations = 0;
+
+SEC("cgroup_skb/egress/1")
+int egress1(struct __sk_buff *skb)
+{
+ struct cgroup_value *ptr_cg_storage =
+ bpf_get_local_storage(&cgroup_storage, 0);
+
+ __sync_fetch_and_add(&ptr_cg_storage->egress_pkts, 1);
+ __sync_fetch_and_add(&invocations, 1);
+
+ return 1;
+}
+
+SEC("cgroup_skb/egress/2")
+int egress2(struct __sk_buff *skb)
+{
+ struct cgroup_value *ptr_cg_storage =
+ bpf_get_local_storage(&cgroup_storage, 0);
+
+ __sync_fetch_and_add(&ptr_cg_storage->egress_pkts, 1);
+ __sync_fetch_and_add(&invocations, 1);
+
+ return 1;
+}
+
+SEC("cgroup_skb/ingress")
+int ingress(struct __sk_buff *skb)
+{
+ struct cgroup_value *ptr_cg_storage =
+ bpf_get_local_storage(&cgroup_storage, 0);
+
+ __sync_fetch_and_add(&ptr_cg_storage->ingress_pkts, 1);
+ __sync_fetch_and_add(&invocations, 1);
+
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/cg_storage_multi_shared.c b/tools/testing/selftests/bpf/progs/cg_storage_multi_shared.c
new file mode 100644
index 000000000000..a149f33bc533
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cg_storage_multi_shared.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2020 Google LLC.
+ */
+
+#include <errno.h>
+#include <linux/bpf.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <bpf/bpf_helpers.h>
+
+#include "progs/cg_storage_multi.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
+ __type(key, __u64);
+ __type(value, struct cgroup_value);
+} cgroup_storage SEC(".maps");
+
+__u32 invocations = 0;
+
+SEC("cgroup_skb/egress/1")
+int egress1(struct __sk_buff *skb)
+{
+ struct cgroup_value *ptr_cg_storage =
+ bpf_get_local_storage(&cgroup_storage, 0);
+
+ __sync_fetch_and_add(&ptr_cg_storage->egress_pkts, 1);
+ __sync_fetch_and_add(&invocations, 1);
+
+ return 1;
+}
+
+SEC("cgroup_skb/egress/2")
+int egress2(struct __sk_buff *skb)
+{
+ struct cgroup_value *ptr_cg_storage =
+ bpf_get_local_storage(&cgroup_storage, 0);
+
+ __sync_fetch_and_add(&ptr_cg_storage->egress_pkts, 1);
+ __sync_fetch_and_add(&invocations, 1);
+
+ return 1;
+}
+
+SEC("cgroup_skb/ingress")
+int ingress(struct __sk_buff *skb)
+{
+ struct cgroup_value *ptr_cg_storage =
+ bpf_get_local_storage(&cgroup_storage, 0);
+
+ __sync_fetch_and_add(&ptr_cg_storage->ingress_pkts, 1);
+ __sync_fetch_and_add(&invocations, 1);
+
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/connect4_prog.c b/tools/testing/selftests/bpf/progs/connect4_prog.c
index 1ab2c5eba86c..b1b2773c0b9d 100644
--- a/tools/testing/selftests/bpf/progs/connect4_prog.c
+++ b/tools/testing/selftests/bpf/progs/connect4_prog.c
@@ -104,6 +104,30 @@ static __inline int bind_to_device(struct bpf_sock_addr *ctx)
return 0;
}
+static __inline int set_keepalive(struct bpf_sock_addr *ctx)
+{
+ int zero = 0, one = 1;
+
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_KEEPALIVE, &one, sizeof(one)))
+ return 1;
+ if (ctx->type == SOCK_STREAM) {
+ if (bpf_setsockopt(ctx, SOL_TCP, TCP_KEEPIDLE, &one, sizeof(one)))
+ return 1;
+ if (bpf_setsockopt(ctx, SOL_TCP, TCP_KEEPINTVL, &one, sizeof(one)))
+ return 1;
+ if (bpf_setsockopt(ctx, SOL_TCP, TCP_KEEPCNT, &one, sizeof(one)))
+ return 1;
+ if (bpf_setsockopt(ctx, SOL_TCP, TCP_SYNCNT, &one, sizeof(one)))
+ return 1;
+ if (bpf_setsockopt(ctx, SOL_TCP, TCP_USER_TIMEOUT, &one, sizeof(one)))
+ return 1;
+ }
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_KEEPALIVE, &zero, sizeof(zero)))
+ return 1;
+
+ return 0;
+}
+
SEC("cgroup/connect4")
int connect_v4_prog(struct bpf_sock_addr *ctx)
{
@@ -121,6 +145,9 @@ int connect_v4_prog(struct bpf_sock_addr *ctx)
if (bind_to_device(ctx))
return 0;
+ if (set_keepalive(ctx))
+ return 0;
+
if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM)
return 0;
else if (ctx->type == SOCK_STREAM)
diff --git a/tools/testing/selftests/bpf/progs/map_ptr_kern.c b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
new file mode 100644
index 000000000000..473665cac67e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
@@ -0,0 +1,686 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+#define LOOP_BOUND 0xf
+#define MAX_ENTRIES 8
+#define HALF_ENTRIES (MAX_ENTRIES >> 1)
+
+_Static_assert(MAX_ENTRIES < LOOP_BOUND, "MAX_ENTRIES must be < LOOP_BOUND");
+
+enum bpf_map_type g_map_type = BPF_MAP_TYPE_UNSPEC;
+__u32 g_line = 0;
+
+#define VERIFY_TYPE(type, func) ({ \
+ g_map_type = type; \
+ if (!func()) \
+ return 0; \
+})
+
+
+#define VERIFY(expr) ({ \
+ g_line = __LINE__; \
+ if (!(expr)) \
+ return 0; \
+})
+
+struct bpf_map_memory {
+ __u32 pages;
+} __attribute__((preserve_access_index));
+
+struct bpf_map {
+ enum bpf_map_type map_type;
+ __u32 key_size;
+ __u32 value_size;
+ __u32 max_entries;
+ __u32 id;
+ struct bpf_map_memory memory;
+} __attribute__((preserve_access_index));
+
+static inline int check_bpf_map_fields(struct bpf_map *map, __u32 key_size,
+ __u32 value_size, __u32 max_entries)
+{
+ VERIFY(map->map_type == g_map_type);
+ VERIFY(map->key_size == key_size);
+ VERIFY(map->value_size == value_size);
+ VERIFY(map->max_entries == max_entries);
+ VERIFY(map->id > 0);
+ VERIFY(map->memory.pages > 0);
+
+ return 1;
+}
+
+static inline int check_bpf_map_ptr(struct bpf_map *indirect,
+ struct bpf_map *direct)
+{
+ VERIFY(indirect->map_type == direct->map_type);
+ VERIFY(indirect->key_size == direct->key_size);
+ VERIFY(indirect->value_size == direct->value_size);
+ VERIFY(indirect->max_entries == direct->max_entries);
+ VERIFY(indirect->id == direct->id);
+ VERIFY(indirect->memory.pages == direct->memory.pages);
+
+ return 1;
+}
+
+static inline int check(struct bpf_map *indirect, struct bpf_map *direct,
+ __u32 key_size, __u32 value_size, __u32 max_entries)
+{
+ VERIFY(check_bpf_map_ptr(indirect, direct));
+ VERIFY(check_bpf_map_fields(indirect, key_size, value_size,
+ max_entries));
+ return 1;
+}
+
+static inline int check_default(struct bpf_map *indirect,
+ struct bpf_map *direct)
+{
+ VERIFY(check(indirect, direct, sizeof(__u32), sizeof(__u32),
+ MAX_ENTRIES));
+ return 1;
+}
+
+typedef struct {
+ int counter;
+} atomic_t;
+
+struct bpf_htab {
+ struct bpf_map map;
+ atomic_t count;
+ __u32 n_buckets;
+ __u32 elem_size;
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(map_flags, BPF_F_NO_PREALLOC); /* to test bpf_htab.count */
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_hash SEC(".maps");
+
+static inline int check_hash(void)
+{
+ struct bpf_htab *hash = (struct bpf_htab *)&m_hash;
+ struct bpf_map *map = (struct bpf_map *)&m_hash;
+ int i;
+
+ VERIFY(check_default(&hash->map, map));
+
+ VERIFY(hash->n_buckets == MAX_ENTRIES);
+ VERIFY(hash->elem_size == 64);
+
+ VERIFY(hash->count.counter == 0);
+ for (i = 0; i < HALF_ENTRIES; ++i) {
+ const __u32 key = i;
+ const __u32 val = 1;
+
+ if (bpf_map_update_elem(hash, &key, &val, 0))
+ return 0;
+ }
+ VERIFY(hash->count.counter == HALF_ENTRIES);
+
+ return 1;
+}
+
+struct bpf_array {
+ struct bpf_map map;
+ __u32 elem_size;
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_array SEC(".maps");
+
+static inline int check_array(void)
+{
+ struct bpf_array *array = (struct bpf_array *)&m_array;
+ struct bpf_map *map = (struct bpf_map *)&m_array;
+ int i, n_lookups = 0, n_keys = 0;
+
+ VERIFY(check_default(&array->map, map));
+
+ VERIFY(array->elem_size == 8);
+
+ for (i = 0; i < array->map.max_entries && i < LOOP_BOUND; ++i) {
+ const __u32 key = i;
+ __u32 *val = bpf_map_lookup_elem(array, &key);
+
+ ++n_lookups;
+ if (val)
+ ++n_keys;
+ }
+
+ VERIFY(n_lookups == MAX_ENTRIES);
+ VERIFY(n_keys == MAX_ENTRIES);
+
+ return 1;
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_prog_array SEC(".maps");
+
+static inline int check_prog_array(void)
+{
+ struct bpf_array *prog_array = (struct bpf_array *)&m_prog_array;
+ struct bpf_map *map = (struct bpf_map *)&m_prog_array;
+
+ VERIFY(check_default(&prog_array->map, map));
+
+ return 1;
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_perf_event_array SEC(".maps");
+
+static inline int check_perf_event_array(void)
+{
+ struct bpf_array *perf_event_array = (struct bpf_array *)&m_perf_event_array;
+ struct bpf_map *map = (struct bpf_map *)&m_perf_event_array;
+
+ VERIFY(check_default(&perf_event_array->map, map));
+
+ return 1;
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_percpu_hash SEC(".maps");
+
+static inline int check_percpu_hash(void)
+{
+ struct bpf_htab *percpu_hash = (struct bpf_htab *)&m_percpu_hash;
+ struct bpf_map *map = (struct bpf_map *)&m_percpu_hash;
+
+ VERIFY(check_default(&percpu_hash->map, map));
+
+ return 1;
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_percpu_array SEC(".maps");
+
+static inline int check_percpu_array(void)
+{
+ struct bpf_array *percpu_array = (struct bpf_array *)&m_percpu_array;
+ struct bpf_map *map = (struct bpf_map *)&m_percpu_array;
+
+ VERIFY(check_default(&percpu_array->map, map));
+
+ return 1;
+}
+
+struct bpf_stack_map {
+ struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_STACK_TRACE);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u64);
+} m_stack_trace SEC(".maps");
+
+static inline int check_stack_trace(void)
+{
+ struct bpf_stack_map *stack_trace =
+ (struct bpf_stack_map *)&m_stack_trace;
+ struct bpf_map *map = (struct bpf_map *)&m_stack_trace;
+
+ VERIFY(check(&stack_trace->map, map, sizeof(__u32), sizeof(__u64),
+ MAX_ENTRIES));
+
+ return 1;
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CGROUP_ARRAY);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_cgroup_array SEC(".maps");
+
+static inline int check_cgroup_array(void)
+{
+ struct bpf_array *cgroup_array = (struct bpf_array *)&m_cgroup_array;
+ struct bpf_map *map = (struct bpf_map *)&m_cgroup_array;
+
+ VERIFY(check_default(&cgroup_array->map, map));
+
+ return 1;
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_LRU_HASH);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_lru_hash SEC(".maps");
+
+static inline int check_lru_hash(void)
+{
+ struct bpf_htab *lru_hash = (struct bpf_htab *)&m_lru_hash;
+ struct bpf_map *map = (struct bpf_map *)&m_lru_hash;
+
+ VERIFY(check_default(&lru_hash->map, map));
+
+ return 1;
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_LRU_PERCPU_HASH);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_lru_percpu_hash SEC(".maps");
+
+static inline int check_lru_percpu_hash(void)
+{
+ struct bpf_htab *lru_percpu_hash = (struct bpf_htab *)&m_lru_percpu_hash;
+ struct bpf_map *map = (struct bpf_map *)&m_lru_percpu_hash;
+
+ VERIFY(check_default(&lru_percpu_hash->map, map));
+
+ return 1;
+}
+
+struct lpm_trie {
+ struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct lpm_key {
+ struct bpf_lpm_trie_key trie_key;
+ __u32 data;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_LPM_TRIE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, struct lpm_key);
+ __type(value, __u32);
+} m_lpm_trie SEC(".maps");
+
+static inline int check_lpm_trie(void)
+{
+ struct lpm_trie *lpm_trie = (struct lpm_trie *)&m_lpm_trie;
+ struct bpf_map *map = (struct bpf_map *)&m_lpm_trie;
+
+ VERIFY(check(&lpm_trie->map, map, sizeof(struct lpm_key), sizeof(__u32),
+ MAX_ENTRIES));
+
+ return 1;
+}
+
+struct inner_map {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u32);
+} inner_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+ __array(values, struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u32);
+ });
+} m_array_of_maps SEC(".maps") = {
+ .values = { (void *)&inner_map, 0, 0, 0, 0, 0, 0, 0, 0 },
+};
+
+static inline int check_array_of_maps(void)
+{
+ struct bpf_array *array_of_maps = (struct bpf_array *)&m_array_of_maps;
+ struct bpf_map *map = (struct bpf_map *)&m_array_of_maps;
+
+ VERIFY(check_default(&array_of_maps->map, map));
+
+ return 1;
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+ __array(values, struct inner_map);
+} m_hash_of_maps SEC(".maps") = {
+ .values = {
+ [2] = &inner_map,
+ },
+};
+
+static inline int check_hash_of_maps(void)
+{
+ struct bpf_htab *hash_of_maps = (struct bpf_htab *)&m_hash_of_maps;
+ struct bpf_map *map = (struct bpf_map *)&m_hash_of_maps;
+
+ VERIFY(check_default(&hash_of_maps->map, map));
+
+ return 1;
+}
+
+struct bpf_dtab {
+ struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_devmap SEC(".maps");
+
+static inline int check_devmap(void)
+{
+ struct bpf_dtab *devmap = (struct bpf_dtab *)&m_devmap;
+ struct bpf_map *map = (struct bpf_map *)&m_devmap;
+
+ VERIFY(check_default(&devmap->map, map));
+
+ return 1;
+}
+
+struct bpf_stab {
+ struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_sockmap SEC(".maps");
+
+static inline int check_sockmap(void)
+{
+ struct bpf_stab *sockmap = (struct bpf_stab *)&m_sockmap;
+ struct bpf_map *map = (struct bpf_map *)&m_sockmap;
+
+ VERIFY(check_default(&sockmap->map, map));
+
+ return 1;
+}
+
+struct bpf_cpu_map {
+ struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CPUMAP);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_cpumap SEC(".maps");
+
+static inline int check_cpumap(void)
+{
+ struct bpf_cpu_map *cpumap = (struct bpf_cpu_map *)&m_cpumap;
+ struct bpf_map *map = (struct bpf_map *)&m_cpumap;
+
+ VERIFY(check_default(&cpumap->map, map));
+
+ return 1;
+}
+
+struct xsk_map {
+ struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_XSKMAP);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_xskmap SEC(".maps");
+
+static inline int check_xskmap(void)
+{
+ struct xsk_map *xskmap = (struct xsk_map *)&m_xskmap;
+ struct bpf_map *map = (struct bpf_map *)&m_xskmap;
+
+ VERIFY(check_default(&xskmap->map, map));
+
+ return 1;
+}
+
+struct bpf_shtab {
+ struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKHASH);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_sockhash SEC(".maps");
+
+static inline int check_sockhash(void)
+{
+ struct bpf_shtab *sockhash = (struct bpf_shtab *)&m_sockhash;
+ struct bpf_map *map = (struct bpf_map *)&m_sockhash;
+
+ VERIFY(check_default(&sockhash->map, map));
+
+ return 1;
+}
+
+struct bpf_cgroup_storage_map {
+ struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
+ __type(key, struct bpf_cgroup_storage_key);
+ __type(value, __u32);
+} m_cgroup_storage SEC(".maps");
+
+static inline int check_cgroup_storage(void)
+{
+ struct bpf_cgroup_storage_map *cgroup_storage =
+ (struct bpf_cgroup_storage_map *)&m_cgroup_storage;
+ struct bpf_map *map = (struct bpf_map *)&m_cgroup_storage;
+
+ VERIFY(check(&cgroup_storage->map, map,
+ sizeof(struct bpf_cgroup_storage_key), sizeof(__u32), 0));
+
+ return 1;
+}
+
+struct reuseport_array {
+ struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_reuseport_sockarray SEC(".maps");
+
+static inline int check_reuseport_sockarray(void)
+{
+ struct reuseport_array *reuseport_sockarray =
+ (struct reuseport_array *)&m_reuseport_sockarray;
+ struct bpf_map *map = (struct bpf_map *)&m_reuseport_sockarray;
+
+ VERIFY(check_default(&reuseport_sockarray->map, map));
+
+ return 1;
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
+ __type(key, struct bpf_cgroup_storage_key);
+ __type(value, __u32);
+} m_percpu_cgroup_storage SEC(".maps");
+
+static inline int check_percpu_cgroup_storage(void)
+{
+ struct bpf_cgroup_storage_map *percpu_cgroup_storage =
+ (struct bpf_cgroup_storage_map *)&m_percpu_cgroup_storage;
+ struct bpf_map *map = (struct bpf_map *)&m_percpu_cgroup_storage;
+
+ VERIFY(check(&percpu_cgroup_storage->map, map,
+ sizeof(struct bpf_cgroup_storage_key), sizeof(__u32), 0));
+
+ return 1;
+}
+
+struct bpf_queue_stack {
+ struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_QUEUE);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(value, __u32);
+} m_queue SEC(".maps");
+
+static inline int check_queue(void)
+{
+ struct bpf_queue_stack *queue = (struct bpf_queue_stack *)&m_queue;
+ struct bpf_map *map = (struct bpf_map *)&m_queue;
+
+ VERIFY(check(&queue->map, map, 0, sizeof(__u32), MAX_ENTRIES));
+
+ return 1;
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_STACK);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(value, __u32);
+} m_stack SEC(".maps");
+
+static inline int check_stack(void)
+{
+ struct bpf_queue_stack *stack = (struct bpf_queue_stack *)&m_stack;
+ struct bpf_map *map = (struct bpf_map *)&m_stack;
+
+ VERIFY(check(&stack->map, map, 0, sizeof(__u32), MAX_ENTRIES));
+
+ return 1;
+}
+
+struct bpf_sk_storage_map {
+ struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_sk_storage SEC(".maps");
+
+static inline int check_sk_storage(void)
+{
+ struct bpf_sk_storage_map *sk_storage =
+ (struct bpf_sk_storage_map *)&m_sk_storage;
+ struct bpf_map *map = (struct bpf_map *)&m_sk_storage;
+
+ VERIFY(check(&sk_storage->map, map, sizeof(__u32), sizeof(__u32), 0));
+
+ return 1;
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP_HASH);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_devmap_hash SEC(".maps");
+
+static inline int check_devmap_hash(void)
+{
+ struct bpf_dtab *devmap_hash = (struct bpf_dtab *)&m_devmap_hash;
+ struct bpf_map *map = (struct bpf_map *)&m_devmap_hash;
+
+ VERIFY(check_default(&devmap_hash->map, map));
+
+ return 1;
+}
+
+struct bpf_ringbuf_map {
+ struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+ __uint(max_entries, 1 << 12);
+} m_ringbuf SEC(".maps");
+
+static inline int check_ringbuf(void)
+{
+ struct bpf_ringbuf_map *ringbuf = (struct bpf_ringbuf_map *)&m_ringbuf;
+ struct bpf_map *map = (struct bpf_map *)&m_ringbuf;
+
+ VERIFY(check(&ringbuf->map, map, 0, 0, 1 << 12));
+
+ return 1;
+}
+
+SEC("cgroup_skb/egress")
+int cg_skb(void *ctx)
+{
+ VERIFY_TYPE(BPF_MAP_TYPE_HASH, check_hash);
+ VERIFY_TYPE(BPF_MAP_TYPE_ARRAY, check_array);
+ VERIFY_TYPE(BPF_MAP_TYPE_PROG_ARRAY, check_prog_array);
+ VERIFY_TYPE(BPF_MAP_TYPE_PERF_EVENT_ARRAY, check_perf_event_array);
+ VERIFY_TYPE(BPF_MAP_TYPE_PERCPU_HASH, check_percpu_hash);
+ VERIFY_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, check_percpu_array);
+ VERIFY_TYPE(BPF_MAP_TYPE_STACK_TRACE, check_stack_trace);
+ VERIFY_TYPE(BPF_MAP_TYPE_CGROUP_ARRAY, check_cgroup_array);
+ VERIFY_TYPE(BPF_MAP_TYPE_LRU_HASH, check_lru_hash);
+ VERIFY_TYPE(BPF_MAP_TYPE_LRU_PERCPU_HASH, check_lru_percpu_hash);
+ VERIFY_TYPE(BPF_MAP_TYPE_LPM_TRIE, check_lpm_trie);
+ VERIFY_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, check_array_of_maps);
+ VERIFY_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, check_hash_of_maps);
+ VERIFY_TYPE(BPF_MAP_TYPE_DEVMAP, check_devmap);
+ VERIFY_TYPE(BPF_MAP_TYPE_SOCKMAP, check_sockmap);
+ VERIFY_TYPE(BPF_MAP_TYPE_CPUMAP, check_cpumap);
+ VERIFY_TYPE(BPF_MAP_TYPE_XSKMAP, check_xskmap);
+ VERIFY_TYPE(BPF_MAP_TYPE_SOCKHASH, check_sockhash);
+ VERIFY_TYPE(BPF_MAP_TYPE_CGROUP_STORAGE, check_cgroup_storage);
+ VERIFY_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
+ check_reuseport_sockarray);
+ VERIFY_TYPE(BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
+ check_percpu_cgroup_storage);
+ VERIFY_TYPE(BPF_MAP_TYPE_QUEUE, check_queue);
+ VERIFY_TYPE(BPF_MAP_TYPE_STACK, check_stack);
+ VERIFY_TYPE(BPF_MAP_TYPE_SK_STORAGE, check_sk_storage);
+ VERIFY_TYPE(BPF_MAP_TYPE_DEVMAP_HASH, check_devmap_hash);
+ VERIFY_TYPE(BPF_MAP_TYPE_RINGBUF, check_ringbuf);
+
+ return 1;
+}
+
+__u32 _version SEC("version") = 1;
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/perf_event_stackmap.c b/tools/testing/selftests/bpf/progs/perf_event_stackmap.c
new file mode 100644
index 000000000000..25467d13c356
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/perf_event_stackmap.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+#ifndef PERF_MAX_STACK_DEPTH
+#define PERF_MAX_STACK_DEPTH 127
+#endif
+
+typedef __u64 stack_trace_t[PERF_MAX_STACK_DEPTH];
+struct {
+ __uint(type, BPF_MAP_TYPE_STACK_TRACE);
+ __uint(max_entries, 16384);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(stack_trace_t));
+} stackmap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, stack_trace_t);
+} stackdata_map SEC(".maps");
+
+long stackid_kernel = 1;
+long stackid_user = 1;
+long stack_kernel = 1;
+long stack_user = 1;
+
+SEC("perf_event")
+int oncpu(void *ctx)
+{
+ stack_trace_t *trace;
+ __u32 key = 0;
+ long val;
+
+ val = bpf_get_stackid(ctx, &stackmap, 0);
+ if (val > 0)
+ stackid_kernel = 2;
+ val = bpf_get_stackid(ctx, &stackmap, BPF_F_USER_STACK);
+ if (val > 0)
+ stackid_user = 2;
+
+ trace = bpf_map_lookup_elem(&stackdata_map, &key);
+ if (!trace)
+ return 0;
+
+ val = bpf_get_stack(ctx, trace, sizeof(stack_trace_t), 0);
+ if (val > 0)
+ stack_kernel = 2;
+
+ val = bpf_get_stack(ctx, trace, sizeof(stack_trace_t), BPF_F_USER_STACK);
+ if (val > 0)
+ stack_user = 2;
+
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_autoload.c b/tools/testing/selftests/bpf/progs/test_autoload.c
new file mode 100644
index 000000000000..62c8cdec6d5d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_autoload.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+bool prog1_called = false;
+bool prog2_called = false;
+bool prog3_called = false;
+
+SEC("raw_tp/sys_enter")
+int prog1(const void *ctx)
+{
+ prog1_called = true;
+ return 0;
+}
+
+SEC("raw_tp/sys_exit")
+int prog2(const void *ctx)
+{
+ prog2_called = true;
+ return 0;
+}
+
+struct fake_kernel_struct {
+ int whatever;
+} __attribute__((preserve_access_index));
+
+SEC("fentry/unexisting-kprobe-will-fail-if-loaded")
+int prog3(const void *ctx)
+{
+ struct fake_kernel_struct *fake = (void *)ctx;
+ fake->whatever = 123;
+ prog3_called = true;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_core_retro.c b/tools/testing/selftests/bpf/progs/test_core_retro.c
new file mode 100644
index 000000000000..20861ec2f674
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_retro.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+struct task_struct {
+ int tgid;
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+} exp_tgid_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+} results SEC(".maps");
+
+SEC("tp/raw_syscalls/sys_enter")
+int handle_sys_enter(void *ctx)
+{
+ struct task_struct *task = (void *)bpf_get_current_task();
+ int tgid = BPF_CORE_READ(task, tgid);
+ int zero = 0;
+ int real_tgid = bpf_get_current_pid_tgid() >> 32;
+ int *exp_tgid = bpf_map_lookup_elem(&exp_tgid_map, &zero);
+
+ /* only pass through sys_enters from test process */
+ if (!exp_tgid || *exp_tgid != real_tgid)
+ return 0;
+
+ bpf_map_update_elem(&results, &zero, &tgid, 0);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_endian.c b/tools/testing/selftests/bpf/progs/test_endian.c
new file mode 100644
index 000000000000..ddb687c5d125
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_endian.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#define IN16 0x1234
+#define IN32 0x12345678U
+#define IN64 0x123456789abcdef0ULL
+
+__u16 in16 = 0;
+__u32 in32 = 0;
+__u64 in64 = 0;
+
+__u16 out16 = 0;
+__u32 out32 = 0;
+__u64 out64 = 0;
+
+__u16 const16 = 0;
+__u32 const32 = 0;
+__u64 const64 = 0;
+
+SEC("raw_tp/sys_enter")
+int sys_enter(const void *ctx)
+{
+ out16 = __builtin_bswap16(in16);
+ out32 = __builtin_bswap32(in32);
+ out64 = __builtin_bswap64(in64);
+ const16 = ___bpf_swab16(IN16);
+ const32 = ___bpf_swab32(IN32);
+ const64 = ___bpf_swab64(IN64);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c
index 29817a703984..b6a6eb279e54 100644
--- a/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c
+++ b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c
@@ -57,8 +57,9 @@ struct {
SEC("raw_tracepoint/sys_enter")
int bpf_prog1(void *ctx)
{
- int max_len, max_buildid_len, usize, ksize, total_size;
+ int max_len, max_buildid_len, total_size;
struct stack_trace_t *data;
+ long usize, ksize;
void *raw_data;
__u32 key = 0;
diff --git a/tools/testing/selftests/bpf/progs/test_ksyms.c b/tools/testing/selftests/bpf/progs/test_ksyms.c
new file mode 100644
index 000000000000..6c9cbb5a3bdf
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ksyms.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+
+#include <stdbool.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+__u64 out__bpf_link_fops = -1;
+__u64 out__bpf_link_fops1 = -1;
+__u64 out__btf_size = -1;
+__u64 out__per_cpu_start = -1;
+
+extern const void bpf_link_fops __ksym;
+extern const void __start_BTF __ksym;
+extern const void __stop_BTF __ksym;
+extern const void __per_cpu_start __ksym;
+/* non-existing symbol, weak, default to zero */
+extern const void bpf_link_fops1 __ksym __weak;
+
+SEC("raw_tp/sys_enter")
+int handler(const void *ctx)
+{
+ out__bpf_link_fops = (__u64)&bpf_link_fops;
+ out__btf_size = (__u64)(&__stop_BTF - &__start_BTF);
+ out__per_cpu_start = (__u64)&__per_cpu_start;
+
+ out__bpf_link_fops1 = (__u64)&bpf_link_fops1;
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_perf_buffer.c b/tools/testing/selftests/bpf/progs/test_perf_buffer.c
index ad59c4c9aba8..8207a2dc2f9d 100644
--- a/tools/testing/selftests/bpf/progs/test_perf_buffer.c
+++ b/tools/testing/selftests/bpf/progs/test_perf_buffer.c
@@ -12,8 +12,8 @@ struct {
__uint(value_size, sizeof(int));
} perf_buf_map SEC(".maps");
-SEC("kprobe/sys_nanosleep")
-int BPF_KPROBE(handle_sys_nanosleep_entry)
+SEC("tp/raw_syscalls/sys_enter")
+int handle_sys_enter(void *ctx)
{
int cpu = bpf_get_smp_processor_id();
diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup.c b/tools/testing/selftests/bpf/progs/test_sk_lookup.c
new file mode 100644
index 000000000000..bbf8296f4d66
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sk_lookup.c
@@ -0,0 +1,641 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+// Copyright (c) 2020 Cloudflare
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <sys/socket.h>
+
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_helpers.h>
+
+#define IP4(a, b, c, d) \
+ bpf_htonl((((__u32)(a) & 0xffU) << 24) | \
+ (((__u32)(b) & 0xffU) << 16) | \
+ (((__u32)(c) & 0xffU) << 8) | \
+ (((__u32)(d) & 0xffU) << 0))
+#define IP6(aaaa, bbbb, cccc, dddd) \
+ { bpf_htonl(aaaa), bpf_htonl(bbbb), bpf_htonl(cccc), bpf_htonl(dddd) }
+
+#define MAX_SOCKS 32
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, MAX_SOCKS);
+ __type(key, __u32);
+ __type(value, __u64);
+} redir_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 2);
+ __type(key, int);
+ __type(value, int);
+} run_map SEC(".maps");
+
+enum {
+ PROG1 = 0,
+ PROG2,
+};
+
+enum {
+ SERVER_A = 0,
+ SERVER_B,
+};
+
+/* Addressable key/value constants for convenience */
+static const int KEY_PROG1 = PROG1;
+static const int KEY_PROG2 = PROG2;
+static const int PROG_DONE = 1;
+
+static const __u32 KEY_SERVER_A = SERVER_A;
+static const __u32 KEY_SERVER_B = SERVER_B;
+
+static const __u16 DST_PORT = 7007; /* Host byte order */
+static const __u32 DST_IP4 = IP4(127, 0, 0, 1);
+static const __u32 DST_IP6[] = IP6(0xfd000000, 0x0, 0x0, 0x00000001);
+
+SEC("sk_lookup/lookup_pass")
+int lookup_pass(struct bpf_sk_lookup *ctx)
+{
+ return SK_PASS;
+}
+
+SEC("sk_lookup/lookup_drop")
+int lookup_drop(struct bpf_sk_lookup *ctx)
+{
+ return SK_DROP;
+}
+
+SEC("sk_reuseport/reuse_pass")
+int reuseport_pass(struct sk_reuseport_md *ctx)
+{
+ return SK_PASS;
+}
+
+SEC("sk_reuseport/reuse_drop")
+int reuseport_drop(struct sk_reuseport_md *ctx)
+{
+ return SK_DROP;
+}
+
+/* Redirect packets destined for port DST_PORT to socket at redir_map[0]. */
+SEC("sk_lookup/redir_port")
+int redir_port(struct bpf_sk_lookup *ctx)
+{
+ struct bpf_sock *sk;
+ int err;
+
+ if (ctx->local_port != DST_PORT)
+ return SK_PASS;
+
+ sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+ if (!sk)
+ return SK_PASS;
+
+ err = bpf_sk_assign(ctx, sk, 0);
+ bpf_sk_release(sk);
+ return err ? SK_DROP : SK_PASS;
+}
+
+/* Redirect packets destined for DST_IP4 address to socket at redir_map[0]. */
+SEC("sk_lookup/redir_ip4")
+int redir_ip4(struct bpf_sk_lookup *ctx)
+{
+ struct bpf_sock *sk;
+ int err;
+
+ if (ctx->family != AF_INET)
+ return SK_PASS;
+ if (ctx->local_port != DST_PORT)
+ return SK_PASS;
+ if (ctx->local_ip4 != DST_IP4)
+ return SK_PASS;
+
+ sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+ if (!sk)
+ return SK_PASS;
+
+ err = bpf_sk_assign(ctx, sk, 0);
+ bpf_sk_release(sk);
+ return err ? SK_DROP : SK_PASS;
+}
+
+/* Redirect packets destined for DST_IP6 address to socket at redir_map[0]. */
+SEC("sk_lookup/redir_ip6")
+int redir_ip6(struct bpf_sk_lookup *ctx)
+{
+ struct bpf_sock *sk;
+ int err;
+
+ if (ctx->family != AF_INET6)
+ return SK_PASS;
+ if (ctx->local_port != DST_PORT)
+ return SK_PASS;
+ if (ctx->local_ip6[0] != DST_IP6[0] ||
+ ctx->local_ip6[1] != DST_IP6[1] ||
+ ctx->local_ip6[2] != DST_IP6[2] ||
+ ctx->local_ip6[3] != DST_IP6[3])
+ return SK_PASS;
+
+ sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+ if (!sk)
+ return SK_PASS;
+
+ err = bpf_sk_assign(ctx, sk, 0);
+ bpf_sk_release(sk);
+ return err ? SK_DROP : SK_PASS;
+}
+
+SEC("sk_lookup/select_sock_a")
+int select_sock_a(struct bpf_sk_lookup *ctx)
+{
+ struct bpf_sock *sk;
+ int err;
+
+ sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+ if (!sk)
+ return SK_PASS;
+
+ err = bpf_sk_assign(ctx, sk, 0);
+ bpf_sk_release(sk);
+ return err ? SK_DROP : SK_PASS;
+}
+
+SEC("sk_lookup/select_sock_a_no_reuseport")
+int select_sock_a_no_reuseport(struct bpf_sk_lookup *ctx)
+{
+ struct bpf_sock *sk;
+ int err;
+
+ sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+ if (!sk)
+ return SK_DROP;
+
+ err = bpf_sk_assign(ctx, sk, BPF_SK_LOOKUP_F_NO_REUSEPORT);
+ bpf_sk_release(sk);
+ return err ? SK_DROP : SK_PASS;
+}
+
+SEC("sk_reuseport/select_sock_b")
+int select_sock_b(struct sk_reuseport_md *ctx)
+{
+ __u32 key = KEY_SERVER_B;
+ int err;
+
+ err = bpf_sk_select_reuseport(ctx, &redir_map, &key, 0);
+ return err ? SK_DROP : SK_PASS;
+}
+
+/* Check that bpf_sk_assign() returns -EEXIST if socket already selected. */
+SEC("sk_lookup/sk_assign_eexist")
+int sk_assign_eexist(struct bpf_sk_lookup *ctx)
+{
+ struct bpf_sock *sk;
+ int err, ret;
+
+ ret = SK_DROP;
+ sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B);
+ if (!sk)
+ goto out;
+ err = bpf_sk_assign(ctx, sk, 0);
+ if (err)
+ goto out;
+ bpf_sk_release(sk);
+
+ sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+ if (!sk)
+ goto out;
+ err = bpf_sk_assign(ctx, sk, 0);
+ if (err != -EEXIST) {
+ bpf_printk("sk_assign returned %d, expected %d\n",
+ err, -EEXIST);
+ goto out;
+ }
+
+ ret = SK_PASS; /* Success, redirect to KEY_SERVER_B */
+out:
+ if (sk)
+ bpf_sk_release(sk);
+ return ret;
+}
+
+/* Check that bpf_sk_assign(BPF_SK_LOOKUP_F_REPLACE) can override selection. */
+SEC("sk_lookup/sk_assign_replace_flag")
+int sk_assign_replace_flag(struct bpf_sk_lookup *ctx)
+{
+ struct bpf_sock *sk;
+ int err, ret;
+
+ ret = SK_DROP;
+ sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+ if (!sk)
+ goto out;
+ err = bpf_sk_assign(ctx, sk, 0);
+ if (err)
+ goto out;
+ bpf_sk_release(sk);
+
+ sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B);
+ if (!sk)
+ goto out;
+ err = bpf_sk_assign(ctx, sk, BPF_SK_LOOKUP_F_REPLACE);
+ if (err) {
+ bpf_printk("sk_assign returned %d, expected 0\n", err);
+ goto out;
+ }
+
+ ret = SK_PASS; /* Success, redirect to KEY_SERVER_B */
+out:
+ if (sk)
+ bpf_sk_release(sk);
+ return ret;
+}
+
+/* Check that bpf_sk_assign(sk=NULL) is accepted. */
+SEC("sk_lookup/sk_assign_null")
+int sk_assign_null(struct bpf_sk_lookup *ctx)
+{
+ struct bpf_sock *sk = NULL;
+ int err, ret;
+
+ ret = SK_DROP;
+
+ err = bpf_sk_assign(ctx, NULL, 0);
+ if (err) {
+ bpf_printk("sk_assign returned %d, expected 0\n", err);
+ goto out;
+ }
+
+ sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B);
+ if (!sk)
+ goto out;
+ err = bpf_sk_assign(ctx, sk, BPF_SK_LOOKUP_F_REPLACE);
+ if (err) {
+ bpf_printk("sk_assign returned %d, expected 0\n", err);
+ goto out;
+ }
+
+ if (ctx->sk != sk)
+ goto out;
+ err = bpf_sk_assign(ctx, NULL, 0);
+ if (err != -EEXIST)
+ goto out;
+ err = bpf_sk_assign(ctx, NULL, BPF_SK_LOOKUP_F_REPLACE);
+ if (err)
+ goto out;
+ err = bpf_sk_assign(ctx, sk, BPF_SK_LOOKUP_F_REPLACE);
+ if (err)
+ goto out;
+
+ ret = SK_PASS; /* Success, redirect to KEY_SERVER_B */
+out:
+ if (sk)
+ bpf_sk_release(sk);
+ return ret;
+}
+
+/* Check that selected sk is accessible through context. */
+SEC("sk_lookup/access_ctx_sk")
+int access_ctx_sk(struct bpf_sk_lookup *ctx)
+{
+ struct bpf_sock *sk1 = NULL, *sk2 = NULL;
+ int err, ret;
+
+ ret = SK_DROP;
+
+ /* Try accessing unassigned (NULL) ctx->sk field */
+ if (ctx->sk && ctx->sk->family != AF_INET)
+ goto out;
+
+ /* Assign a value to ctx->sk */
+ sk1 = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+ if (!sk1)
+ goto out;
+ err = bpf_sk_assign(ctx, sk1, 0);
+ if (err)
+ goto out;
+ if (ctx->sk != sk1)
+ goto out;
+
+ /* Access ctx->sk fields */
+ if (ctx->sk->family != AF_INET ||
+ ctx->sk->type != SOCK_STREAM ||
+ ctx->sk->state != BPF_TCP_LISTEN)
+ goto out;
+
+ /* Reset selection */
+ err = bpf_sk_assign(ctx, NULL, BPF_SK_LOOKUP_F_REPLACE);
+ if (err)
+ goto out;
+ if (ctx->sk)
+ goto out;
+
+ /* Assign another socket */
+ sk2 = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B);
+ if (!sk2)
+ goto out;
+ err = bpf_sk_assign(ctx, sk2, BPF_SK_LOOKUP_F_REPLACE);
+ if (err)
+ goto out;
+ if (ctx->sk != sk2)
+ goto out;
+
+ /* Access reassigned ctx->sk fields */
+ if (ctx->sk->family != AF_INET ||
+ ctx->sk->type != SOCK_STREAM ||
+ ctx->sk->state != BPF_TCP_LISTEN)
+ goto out;
+
+ ret = SK_PASS; /* Success, redirect to KEY_SERVER_B */
+out:
+ if (sk1)
+ bpf_sk_release(sk1);
+ if (sk2)
+ bpf_sk_release(sk2);
+ return ret;
+}
+
+/* Check narrow loads from ctx fields that support them.
+ *
+ * Narrow loads of size >= target field size from a non-zero offset
+ * are not covered because they give bogus results, that is the
+ * verifier ignores the offset.
+ */
+SEC("sk_lookup/ctx_narrow_access")
+int ctx_narrow_access(struct bpf_sk_lookup *ctx)
+{
+ struct bpf_sock *sk;
+ int err, family;
+ __u16 *half;
+ __u8 *byte;
+ bool v4;
+
+ v4 = (ctx->family == AF_INET);
+
+ /* Narrow loads from family field */
+ byte = (__u8 *)&ctx->family;
+ half = (__u16 *)&ctx->family;
+ if (byte[0] != (v4 ? AF_INET : AF_INET6) ||
+ byte[1] != 0 || byte[2] != 0 || byte[3] != 0)
+ return SK_DROP;
+ if (half[0] != (v4 ? AF_INET : AF_INET6))
+ return SK_DROP;
+
+ byte = (__u8 *)&ctx->protocol;
+ if (byte[0] != IPPROTO_TCP ||
+ byte[1] != 0 || byte[2] != 0 || byte[3] != 0)
+ return SK_DROP;
+ half = (__u16 *)&ctx->protocol;
+ if (half[0] != IPPROTO_TCP)
+ return SK_DROP;
+
+ /* Narrow loads from remote_port field. Expect non-0 value. */
+ byte = (__u8 *)&ctx->remote_port;
+ if (byte[0] == 0 && byte[1] == 0 && byte[2] == 0 && byte[3] == 0)
+ return SK_DROP;
+ half = (__u16 *)&ctx->remote_port;
+ if (half[0] == 0)
+ return SK_DROP;
+
+ /* Narrow loads from local_port field. Expect DST_PORT. */
+ byte = (__u8 *)&ctx->local_port;
+ if (byte[0] != ((DST_PORT >> 0) & 0xff) ||
+ byte[1] != ((DST_PORT >> 8) & 0xff) ||
+ byte[2] != 0 || byte[3] != 0)
+ return SK_DROP;
+ half = (__u16 *)&ctx->local_port;
+ if (half[0] != DST_PORT)
+ return SK_DROP;
+
+ /* Narrow loads from IPv4 fields */
+ if (v4) {
+ /* Expect non-0.0.0.0 in remote_ip4 */
+ byte = (__u8 *)&ctx->remote_ip4;
+ if (byte[0] == 0 && byte[1] == 0 &&
+ byte[2] == 0 && byte[3] == 0)
+ return SK_DROP;
+ half = (__u16 *)&ctx->remote_ip4;
+ if (half[0] == 0 && half[1] == 0)
+ return SK_DROP;
+
+ /* Expect DST_IP4 in local_ip4 */
+ byte = (__u8 *)&ctx->local_ip4;
+ if (byte[0] != ((DST_IP4 >> 0) & 0xff) ||
+ byte[1] != ((DST_IP4 >> 8) & 0xff) ||
+ byte[2] != ((DST_IP4 >> 16) & 0xff) ||
+ byte[3] != ((DST_IP4 >> 24) & 0xff))
+ return SK_DROP;
+ half = (__u16 *)&ctx->local_ip4;
+ if (half[0] != ((DST_IP4 >> 0) & 0xffff) ||
+ half[1] != ((DST_IP4 >> 16) & 0xffff))
+ return SK_DROP;
+ } else {
+ /* Expect 0.0.0.0 IPs when family != AF_INET */
+ byte = (__u8 *)&ctx->remote_ip4;
+ if (byte[0] != 0 || byte[1] != 0 &&
+ byte[2] != 0 || byte[3] != 0)
+ return SK_DROP;
+ half = (__u16 *)&ctx->remote_ip4;
+ if (half[0] != 0 || half[1] != 0)
+ return SK_DROP;
+
+ byte = (__u8 *)&ctx->local_ip4;
+ if (byte[0] != 0 || byte[1] != 0 &&
+ byte[2] != 0 || byte[3] != 0)
+ return SK_DROP;
+ half = (__u16 *)&ctx->local_ip4;
+ if (half[0] != 0 || half[1] != 0)
+ return SK_DROP;
+ }
+
+ /* Narrow loads from IPv6 fields */
+ if (!v4) {
+ /* Expenct non-:: IP in remote_ip6 */
+ byte = (__u8 *)&ctx->remote_ip6;
+ if (byte[0] == 0 && byte[1] == 0 &&
+ byte[2] == 0 && byte[3] == 0 &&
+ byte[4] == 0 && byte[5] == 0 &&
+ byte[6] == 0 && byte[7] == 0 &&
+ byte[8] == 0 && byte[9] == 0 &&
+ byte[10] == 0 && byte[11] == 0 &&
+ byte[12] == 0 && byte[13] == 0 &&
+ byte[14] == 0 && byte[15] == 0)
+ return SK_DROP;
+ half = (__u16 *)&ctx->remote_ip6;
+ if (half[0] == 0 && half[1] == 0 &&
+ half[2] == 0 && half[3] == 0 &&
+ half[4] == 0 && half[5] == 0 &&
+ half[6] == 0 && half[7] == 0)
+ return SK_DROP;
+
+ /* Expect DST_IP6 in local_ip6 */
+ byte = (__u8 *)&ctx->local_ip6;
+ if (byte[0] != ((DST_IP6[0] >> 0) & 0xff) ||
+ byte[1] != ((DST_IP6[0] >> 8) & 0xff) ||
+ byte[2] != ((DST_IP6[0] >> 16) & 0xff) ||
+ byte[3] != ((DST_IP6[0] >> 24) & 0xff) ||
+ byte[4] != ((DST_IP6[1] >> 0) & 0xff) ||
+ byte[5] != ((DST_IP6[1] >> 8) & 0xff) ||
+ byte[6] != ((DST_IP6[1] >> 16) & 0xff) ||
+ byte[7] != ((DST_IP6[1] >> 24) & 0xff) ||
+ byte[8] != ((DST_IP6[2] >> 0) & 0xff) ||
+ byte[9] != ((DST_IP6[2] >> 8) & 0xff) ||
+ byte[10] != ((DST_IP6[2] >> 16) & 0xff) ||
+ byte[11] != ((DST_IP6[2] >> 24) & 0xff) ||
+ byte[12] != ((DST_IP6[3] >> 0) & 0xff) ||
+ byte[13] != ((DST_IP6[3] >> 8) & 0xff) ||
+ byte[14] != ((DST_IP6[3] >> 16) & 0xff) ||
+ byte[15] != ((DST_IP6[3] >> 24) & 0xff))
+ return SK_DROP;
+ half = (__u16 *)&ctx->local_ip6;
+ if (half[0] != ((DST_IP6[0] >> 0) & 0xffff) ||
+ half[1] != ((DST_IP6[0] >> 16) & 0xffff) ||
+ half[2] != ((DST_IP6[1] >> 0) & 0xffff) ||
+ half[3] != ((DST_IP6[1] >> 16) & 0xffff) ||
+ half[4] != ((DST_IP6[2] >> 0) & 0xffff) ||
+ half[5] != ((DST_IP6[2] >> 16) & 0xffff) ||
+ half[6] != ((DST_IP6[3] >> 0) & 0xffff) ||
+ half[7] != ((DST_IP6[3] >> 16) & 0xffff))
+ return SK_DROP;
+ } else {
+ /* Expect :: IPs when family != AF_INET6 */
+ byte = (__u8 *)&ctx->remote_ip6;
+ if (byte[0] != 0 || byte[1] != 0 ||
+ byte[2] != 0 || byte[3] != 0 ||
+ byte[4] != 0 || byte[5] != 0 ||
+ byte[6] != 0 || byte[7] != 0 ||
+ byte[8] != 0 || byte[9] != 0 ||
+ byte[10] != 0 || byte[11] != 0 ||
+ byte[12] != 0 || byte[13] != 0 ||
+ byte[14] != 0 || byte[15] != 0)
+ return SK_DROP;
+ half = (__u16 *)&ctx->remote_ip6;
+ if (half[0] != 0 || half[1] != 0 ||
+ half[2] != 0 || half[3] != 0 ||
+ half[4] != 0 || half[5] != 0 ||
+ half[6] != 0 || half[7] != 0)
+ return SK_DROP;
+
+ byte = (__u8 *)&ctx->local_ip6;
+ if (byte[0] != 0 || byte[1] != 0 ||
+ byte[2] != 0 || byte[3] != 0 ||
+ byte[4] != 0 || byte[5] != 0 ||
+ byte[6] != 0 || byte[7] != 0 ||
+ byte[8] != 0 || byte[9] != 0 ||
+ byte[10] != 0 || byte[11] != 0 ||
+ byte[12] != 0 || byte[13] != 0 ||
+ byte[14] != 0 || byte[15] != 0)
+ return SK_DROP;
+ half = (__u16 *)&ctx->local_ip6;
+ if (half[0] != 0 || half[1] != 0 ||
+ half[2] != 0 || half[3] != 0 ||
+ half[4] != 0 || half[5] != 0 ||
+ half[6] != 0 || half[7] != 0)
+ return SK_DROP;
+ }
+
+ /* Success, redirect to KEY_SERVER_B */
+ sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B);
+ if (sk) {
+ bpf_sk_assign(ctx, sk, 0);
+ bpf_sk_release(sk);
+ }
+ return SK_PASS;
+}
+
+/* Check that sk_assign rejects SERVER_A socket with -ESOCKNOSUPPORT */
+SEC("sk_lookup/sk_assign_esocknosupport")
+int sk_assign_esocknosupport(struct bpf_sk_lookup *ctx)
+{
+ struct bpf_sock *sk;
+ int err, ret;
+
+ ret = SK_DROP;
+ sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+ if (!sk)
+ goto out;
+
+ err = bpf_sk_assign(ctx, sk, 0);
+ if (err != -ESOCKTNOSUPPORT) {
+ bpf_printk("sk_assign returned %d, expected %d\n",
+ err, -ESOCKTNOSUPPORT);
+ goto out;
+ }
+
+ ret = SK_PASS; /* Success, pass to regular lookup */
+out:
+ if (sk)
+ bpf_sk_release(sk);
+ return ret;
+}
+
+SEC("sk_lookup/multi_prog_pass1")
+int multi_prog_pass1(struct bpf_sk_lookup *ctx)
+{
+ bpf_map_update_elem(&run_map, &KEY_PROG1, &PROG_DONE, BPF_ANY);
+ return SK_PASS;
+}
+
+SEC("sk_lookup/multi_prog_pass2")
+int multi_prog_pass2(struct bpf_sk_lookup *ctx)
+{
+ bpf_map_update_elem(&run_map, &KEY_PROG2, &PROG_DONE, BPF_ANY);
+ return SK_PASS;
+}
+
+SEC("sk_lookup/multi_prog_drop1")
+int multi_prog_drop1(struct bpf_sk_lookup *ctx)
+{
+ bpf_map_update_elem(&run_map, &KEY_PROG1, &PROG_DONE, BPF_ANY);
+ return SK_DROP;
+}
+
+SEC("sk_lookup/multi_prog_drop2")
+int multi_prog_drop2(struct bpf_sk_lookup *ctx)
+{
+ bpf_map_update_elem(&run_map, &KEY_PROG2, &PROG_DONE, BPF_ANY);
+ return SK_DROP;
+}
+
+static __always_inline int select_server_a(struct bpf_sk_lookup *ctx)
+{
+ struct bpf_sock *sk;
+ int err;
+
+ sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+ if (!sk)
+ return SK_DROP;
+
+ err = bpf_sk_assign(ctx, sk, 0);
+ bpf_sk_release(sk);
+ if (err)
+ return SK_DROP;
+
+ return SK_PASS;
+}
+
+SEC("sk_lookup/multi_prog_redir1")
+int multi_prog_redir1(struct bpf_sk_lookup *ctx)
+{
+ int ret;
+
+ ret = select_server_a(ctx);
+ bpf_map_update_elem(&run_map, &KEY_PROG1, &PROG_DONE, BPF_ANY);
+ return SK_PASS;
+}
+
+SEC("sk_lookup/multi_prog_redir2")
+int multi_prog_redir2(struct bpf_sk_lookup *ctx)
+{
+ int ret;
+
+ ret = select_server_a(ctx);
+ bpf_map_update_elem(&run_map, &KEY_PROG2, &PROG_DONE, BPF_ANY);
+ return SK_PASS;
+}
+
+char _license[] SEC("license") = "Dual BSD/GPL";
+__u32 _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/test_skeleton.c b/tools/testing/selftests/bpf/progs/test_skeleton.c
index 77ae86f44db5..374ccef704e1 100644
--- a/tools/testing/selftests/bpf/progs/test_skeleton.c
+++ b/tools/testing/selftests/bpf/progs/test_skeleton.c
@@ -20,7 +20,9 @@ long long in4 __attribute__((aligned(64))) = 0;
struct s in5 = {};
/* .rodata section */
-const volatile int in6 = 0;
+const volatile struct {
+ const int in6;
+} in = {};
/* .data section */
int out1 = -1;
@@ -46,7 +48,7 @@ int handler(const void *ctx)
out3 = in3;
out4 = in4;
out5 = in5;
- out6 = in6;
+ out6 = in.in6;
bpf_syscall = CONFIG_BPF_SYSCALL;
kern_ver = LINUX_KERNEL_VERSION;
diff --git a/tools/testing/selftests/bpf/progs/test_varlen.c b/tools/testing/selftests/bpf/progs/test_varlen.c
new file mode 100644
index 000000000000..cd4b72c55dfe
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_varlen.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+#define MAX_LEN 256
+
+char buf_in1[MAX_LEN] = {};
+char buf_in2[MAX_LEN] = {};
+
+int test_pid = 0;
+bool capture = false;
+
+/* .bss */
+long payload1_len1 = 0;
+long payload1_len2 = 0;
+long total1 = 0;
+char payload1[MAX_LEN + MAX_LEN] = {};
+
+/* .data */
+int payload2_len1 = -1;
+int payload2_len2 = -1;
+int total2 = -1;
+char payload2[MAX_LEN + MAX_LEN] = { 1 };
+
+int payload3_len1 = -1;
+int payload3_len2 = -1;
+int total3= -1;
+char payload3[MAX_LEN + MAX_LEN] = { 1 };
+
+int payload4_len1 = -1;
+int payload4_len2 = -1;
+int total4= -1;
+char payload4[MAX_LEN + MAX_LEN] = { 1 };
+
+SEC("raw_tp/sys_enter")
+int handler64_unsigned(void *regs)
+{
+ int pid = bpf_get_current_pid_tgid() >> 32;
+ void *payload = payload1;
+ u64 len;
+
+ /* ignore irrelevant invocations */
+ if (test_pid != pid || !capture)
+ return 0;
+
+ len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in1[0]);
+ if (len <= MAX_LEN) {
+ payload += len;
+ payload1_len1 = len;
+ }
+
+ len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in2[0]);
+ if (len <= MAX_LEN) {
+ payload += len;
+ payload1_len2 = len;
+ }
+
+ total1 = payload - (void *)payload1;
+
+ return 0;
+}
+
+SEC("raw_tp/sys_exit")
+int handler64_signed(void *regs)
+{
+ int pid = bpf_get_current_pid_tgid() >> 32;
+ void *payload = payload3;
+ long len;
+
+ /* ignore irrelevant invocations */
+ if (test_pid != pid || !capture)
+ return 0;
+
+ len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in1[0]);
+ if (len >= 0) {
+ payload += len;
+ payload3_len1 = len;
+ }
+ len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in2[0]);
+ if (len >= 0) {
+ payload += len;
+ payload3_len2 = len;
+ }
+ total3 = payload - (void *)payload3;
+
+ return 0;
+}
+
+SEC("tp/raw_syscalls/sys_enter")
+int handler32_unsigned(void *regs)
+{
+ int pid = bpf_get_current_pid_tgid() >> 32;
+ void *payload = payload2;
+ u32 len;
+
+ /* ignore irrelevant invocations */
+ if (test_pid != pid || !capture)
+ return 0;
+
+ len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in1[0]);
+ if (len <= MAX_LEN) {
+ payload += len;
+ payload2_len1 = len;
+ }
+
+ len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in2[0]);
+ if (len <= MAX_LEN) {
+ payload += len;
+ payload2_len2 = len;
+ }
+
+ total2 = payload - (void *)payload2;
+
+ return 0;
+}
+
+SEC("tp/raw_syscalls/sys_exit")
+int handler32_signed(void *regs)
+{
+ int pid = bpf_get_current_pid_tgid() >> 32;
+ void *payload = payload4;
+ int len;
+
+ /* ignore irrelevant invocations */
+ if (test_pid != pid || !capture)
+ return 0;
+
+ len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in1[0]);
+ if (len >= 0) {
+ payload += len;
+ payload4_len1 = len;
+ }
+ len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in2[0]);
+ if (len >= 0) {
+ payload += len;
+ payload4_len2 = len;
+ }
+ total4 = payload - (void *)payload4;
+
+ return 0;
+}
+
+SEC("tp/syscalls/sys_exit_getpid")
+int handler_exit(void *regs)
+{
+ long bla;
+
+ if (bpf_probe_read_kernel(&bla, sizeof(bla), 0))
+ return 1;
+ else
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_vmlinux.c b/tools/testing/selftests/bpf/progs/test_vmlinux.c
index 5611b564d3b1..29fa09d6a6c6 100644
--- a/tools/testing/selftests/bpf/progs/test_vmlinux.c
+++ b/tools/testing/selftests/bpf/progs/test_vmlinux.c
@@ -63,20 +63,20 @@ int BPF_PROG(handle__tp_btf, struct pt_regs *regs, long id)
return 0;
}
-SEC("kprobe/hrtimer_nanosleep")
-int BPF_KPROBE(handle__kprobe,
- ktime_t rqtp, enum hrtimer_mode mode, clockid_t clockid)
+SEC("kprobe/hrtimer_start_range_ns")
+int BPF_KPROBE(handle__kprobe, struct hrtimer *timer, ktime_t tim, u64 delta_ns,
+ const enum hrtimer_mode mode)
{
- if (rqtp == MY_TV_NSEC)
+ if (tim == MY_TV_NSEC)
kprobe_called = true;
return 0;
}
-SEC("fentry/hrtimer_nanosleep")
-int BPF_PROG(handle__fentry,
- ktime_t rqtp, enum hrtimer_mode mode, clockid_t clockid)
+SEC("fentry/hrtimer_start_range_ns")
+int BPF_PROG(handle__fentry, struct hrtimer *timer, ktime_t tim, u64 delta_ns,
+ const enum hrtimer_mode mode)
{
- if (rqtp == MY_TV_NSEC)
+ if (tim == MY_TV_NSEC)
fentry_called = true;
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_link.c b/tools/testing/selftests/bpf/progs/test_xdp_link.c
new file mode 100644
index 000000000000..eb93ea95d1d8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_link.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+char LICENSE[] SEC("license") = "GPL";
+
+SEC("xdp/handler")
+int xdp_handler(struct xdp_md *xdp)
+{
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c
new file mode 100644
index 000000000000..59ee4f182ff8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+#define IFINDEX_LO 1
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CPUMAP);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(struct bpf_cpumap_val));
+ __uint(max_entries, 4);
+} cpu_map SEC(".maps");
+
+SEC("xdp_redir")
+int xdp_redir_prog(struct xdp_md *ctx)
+{
+ return bpf_redirect_map(&cpu_map, 1, 0);
+}
+
+SEC("xdp_dummy")
+int xdp_dummy_prog(struct xdp_md *ctx)
+{
+ return XDP_PASS;
+}
+
+SEC("xdp_cpumap/dummy_cm")
+int xdp_dummy_cm(struct xdp_md *ctx)
+{
+ if (ctx->ingress_ifindex == IFINDEX_LO)
+ return XDP_DROP;
+
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/trace_printk.c b/tools/testing/selftests/bpf/progs/trace_printk.c
new file mode 100644
index 000000000000..8ca7f399b670
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/trace_printk.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020, Oracle and/or its affiliates.
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+int trace_printk_ret = 0;
+int trace_printk_ran = 0;
+
+SEC("tp/raw_syscalls/sys_enter")
+int sys_enter(void *ctx)
+{
+ static const char fmt[] = "testing,testing %d\n";
+
+ trace_printk_ret = bpf_trace_printk(fmt, sizeof(fmt),
+ ++trace_printk_ran);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/udp_limit.c b/tools/testing/selftests/bpf/progs/udp_limit.c
new file mode 100644
index 000000000000..165e3c2dd9a3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/udp_limit.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <sys/socket.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+int invocations = 0, in_use = 0;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, int);
+} sk_map SEC(".maps");
+
+SEC("cgroup/sock_create")
+int sock(struct bpf_sock *ctx)
+{
+ int *sk_storage;
+ __u32 key;
+
+ if (ctx->type != SOCK_DGRAM)
+ return 1;
+
+ sk_storage = bpf_sk_storage_get(&sk_map, ctx, 0,
+ BPF_SK_STORAGE_GET_F_CREATE);
+ if (!sk_storage)
+ return 0;
+ *sk_storage = 0xdeadbeef;
+
+ __sync_fetch_and_add(&invocations, 1);
+
+ if (in_use > 0) {
+ /* BPF_CGROUP_INET_SOCK_RELEASE is _not_ called
+ * when we return an error from the BPF
+ * program!
+ */
+ return 0;
+ }
+
+ __sync_fetch_and_add(&in_use, 1);
+ return 1;
+}
+
+SEC("cgroup/sock_release")
+int sock_release(struct bpf_sock *ctx)
+{
+ int *sk_storage;
+ __u32 key;
+
+ if (ctx->type != SOCK_DGRAM)
+ return 1;
+
+ sk_storage = bpf_sk_storage_get(&sk_map, ctx, 0, 0);
+ if (!sk_storage || *sk_storage != 0xdeadbeef)
+ return 0;
+
+ __sync_fetch_and_add(&invocations, 1);
+ __sync_fetch_and_add(&in_use, -1);
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/settings b/tools/testing/selftests/bpf/settings
new file mode 100644
index 000000000000..e7b9417537fb
--- /dev/null
+++ b/tools/testing/selftests/bpf/settings
@@ -0,0 +1 @@
+timeout=0
diff --git a/tools/testing/selftests/bpf/tcp_client.py b/tools/testing/selftests/bpf/tcp_client.py
index a53ed58528d6..bfff82be3fc1 100755
--- a/tools/testing/selftests/bpf/tcp_client.py
+++ b/tools/testing/selftests/bpf/tcp_client.py
@@ -34,7 +34,7 @@ serverPort = int(sys.argv[1])
# create active socket
sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
try:
- sock.connect(('localhost', serverPort))
+ sock.connect(('::1', serverPort))
except socket.error as e:
sys.exit(1)
diff --git a/tools/testing/selftests/bpf/tcp_server.py b/tools/testing/selftests/bpf/tcp_server.py
index 0ca60d193bed..42ab8882f00f 100755
--- a/tools/testing/selftests/bpf/tcp_server.py
+++ b/tools/testing/selftests/bpf/tcp_server.py
@@ -38,7 +38,7 @@ serverSocket = None
# create passive socket
serverSocket = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
-try: serverSocket.bind(('localhost', 0))
+try: serverSocket.bind(('::1', 0))
except socket.error as msg:
print('bind fails: ' + str(msg))
diff --git a/tools/testing/selftests/bpf/test_cgroup_storage.c b/tools/testing/selftests/bpf/test_cgroup_storage.c
index 655729004391..d946252a25bb 100644
--- a/tools/testing/selftests/bpf/test_cgroup_storage.c
+++ b/tools/testing/selftests/bpf/test_cgroup_storage.c
@@ -74,22 +74,7 @@ int main(int argc, char **argv)
goto out;
}
- if (setup_cgroup_environment()) {
- printf("Failed to setup cgroup environment\n");
- goto err;
- }
-
- /* Create a cgroup, get fd, and join it */
- cgroup_fd = create_and_get_cgroup(TEST_CGROUP);
- if (cgroup_fd < 0) {
- printf("Failed to create test cgroup\n");
- goto err;
- }
-
- if (join_cgroup(TEST_CGROUP)) {
- printf("Failed to join cgroup\n");
- goto err;
- }
+ cgroup_fd = cgroup_setup_and_join(TEST_CGROUP);
/* Attach the bpf program */
if (bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0)) {
diff --git a/tools/testing/selftests/bpf/test_dev_cgroup.c b/tools/testing/selftests/bpf/test_dev_cgroup.c
index d850fb9076b5..804dddd97d4c 100644
--- a/tools/testing/selftests/bpf/test_dev_cgroup.c
+++ b/tools/testing/selftests/bpf/test_dev_cgroup.c
@@ -33,21 +33,10 @@ int main(int argc, char **argv)
goto out;
}
- if (setup_cgroup_environment()) {
- printf("Failed to load DEV_CGROUP program\n");
- goto err;
- }
-
- /* Create a cgroup, get fd, and join it */
- cgroup_fd = create_and_get_cgroup(TEST_CGROUP);
+ cgroup_fd = cgroup_setup_and_join(TEST_CGROUP);
if (cgroup_fd < 0) {
printf("Failed to create test cgroup\n");
- goto err;
- }
-
- if (join_cgroup(TEST_CGROUP)) {
- printf("Failed to join cgroup\n");
- goto err;
+ goto out;
}
/* Attach bpf program */
diff --git a/tools/testing/selftests/bpf/test_kmod.sh b/tools/testing/selftests/bpf/test_kmod.sh
index 9df0d2ac45f8..4f6444bcd53f 100755
--- a/tools/testing/selftests/bpf/test_kmod.sh
+++ b/tools/testing/selftests/bpf/test_kmod.sh
@@ -10,7 +10,13 @@ if [ "$(id -u)" != "0" ]; then
exit $ksft_skip
fi
-SRC_TREE=../../../../
+if [ "$building_out_of_srctree" ]; then
+ # We are in linux-build/kselftest/bpf
+ OUTPUT=../../
+else
+ # We are in linux/tools/testing/selftests/bpf
+ OUTPUT=../../../../
+fi
test_run()
{
@@ -19,8 +25,8 @@ test_run()
echo "[ JIT enabled:$1 hardened:$2 ]"
dmesg -C
- if [ -f ${SRC_TREE}/lib/test_bpf.ko ]; then
- insmod ${SRC_TREE}/lib/test_bpf.ko 2> /dev/null
+ if [ -f ${OUTPUT}/lib/test_bpf.ko ]; then
+ insmod ${OUTPUT}/lib/test_bpf.ko 2> /dev/null
if [ $? -ne 0 ]; then
rc=1
fi
diff --git a/tools/testing/selftests/bpf/test_lwt_seg6local.sh b/tools/testing/selftests/bpf/test_lwt_seg6local.sh
index 785eabf2a593..5620919fde9e 100755
--- a/tools/testing/selftests/bpf/test_lwt_seg6local.sh
+++ b/tools/testing/selftests/bpf/test_lwt_seg6local.sh
@@ -140,7 +140,7 @@ ip netns exec ns6 sysctl net.ipv6.conf.veth10.seg6_enabled=1 > /dev/null
ip netns exec ns6 nc -l -6 -u -d 7330 > $TMP_FILE &
ip netns exec ns1 bash -c "echo 'foobar' | nc -w0 -6 -u -p 2121 -s fb00::1 fb00::6 7330"
sleep 5 # wait enough time to ensure the UDP datagram arrived to the last segment
-kill -INT $!
+kill -TERM $!
if [[ $(< $TMP_FILE) != "foobar" ]]; then
exit 1
diff --git a/tools/testing/selftests/bpf/test_netcnt.c b/tools/testing/selftests/bpf/test_netcnt.c
index c1da5404454a..a7b9a69f4fd5 100644
--- a/tools/testing/selftests/bpf/test_netcnt.c
+++ b/tools/testing/selftests/bpf/test_netcnt.c
@@ -58,22 +58,9 @@ int main(int argc, char **argv)
goto out;
}
- if (setup_cgroup_environment()) {
- printf("Failed to load bpf program\n");
- goto err;
- }
-
- /* Create a cgroup, get fd, and join it */
- cgroup_fd = create_and_get_cgroup(TEST_CGROUP);
- if (cgroup_fd < 0) {
- printf("Failed to create test cgroup\n");
+ cgroup_fd = cgroup_setup_and_join(TEST_CGROUP);
+ if (cgroup_fd < 0)
goto err;
- }
-
- if (join_cgroup(TEST_CGROUP)) {
- printf("Failed to join cgroup\n");
- goto err;
- }
/* Attach bpf program */
if (bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0)) {
@@ -82,9 +69,9 @@ int main(int argc, char **argv)
}
if (system("which ping6 &>/dev/null") == 0)
- assert(!system("ping6 localhost -c 10000 -f -q > /dev/null"));
+ assert(!system("ping6 ::1 -c 10000 -f -q > /dev/null"));
else
- assert(!system("ping -6 localhost -c 10000 -f -q > /dev/null"));
+ assert(!system("ping -6 ::1 -c 10000 -f -q > /dev/null"));
if (bpf_prog_query(cgroup_fd, BPF_CGROUP_INET_EGRESS, 0, NULL, NULL,
&prog_cnt)) {
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 54fa5fa688ce..b1e4dadacd9b 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -12,6 +12,9 @@
#include <string.h>
#include <execinfo.h> /* backtrace */
+#define EXIT_NO_TEST 2
+#define EXIT_ERR_SETUP_INFRA 3
+
/* defined in test_progs.h */
struct test_env env = {};
@@ -111,13 +114,31 @@ static void reset_affinity() {
if (err < 0) {
stdio_restore();
fprintf(stderr, "Failed to reset process affinity: %d!\n", err);
- exit(-1);
+ exit(EXIT_ERR_SETUP_INFRA);
}
err = pthread_setaffinity_np(pthread_self(), sizeof(cpuset), &cpuset);
if (err < 0) {
stdio_restore();
fprintf(stderr, "Failed to reset thread affinity: %d!\n", err);
- exit(-1);
+ exit(EXIT_ERR_SETUP_INFRA);
+ }
+}
+
+static void save_netns(void)
+{
+ env.saved_netns_fd = open("/proc/self/ns/net", O_RDONLY);
+ if (env.saved_netns_fd == -1) {
+ perror("open(/proc/self/ns/net)");
+ exit(EXIT_ERR_SETUP_INFRA);
+ }
+}
+
+static void restore_netns(void)
+{
+ if (setns(env.saved_netns_fd, CLONE_NEWNET) == -1) {
+ stdio_restore();
+ perror("setns(CLONE_NEWNS)");
+ exit(EXIT_ERR_SETUP_INFRA);
}
}
@@ -138,8 +159,6 @@ void test__end_subtest()
test->test_num, test->subtest_num,
test->subtest_name, sub_error_cnt ? "FAIL" : "OK");
- reset_affinity();
-
free(test->subtest_name);
test->subtest_name = NULL;
}
@@ -366,6 +385,8 @@ enum ARG_KEYS {
ARG_TEST_NAME_BLACKLIST = 'b',
ARG_VERIFIER_STATS = 's',
ARG_VERBOSE = 'v',
+ ARG_GET_TEST_CNT = 'c',
+ ARG_LIST_TEST_NAMES = 'l',
};
static const struct argp_option opts[] = {
@@ -379,6 +400,10 @@ static const struct argp_option opts[] = {
"Output verifier statistics", },
{ "verbose", ARG_VERBOSE, "LEVEL", OPTION_ARG_OPTIONAL,
"Verbose output (use -vv or -vvv for progressively verbose output)" },
+ { "count", ARG_GET_TEST_CNT, NULL, 0,
+ "Get number of selected top-level tests " },
+ { "list", ARG_LIST_TEST_NAMES, NULL, 0,
+ "List test names that would run (without running them) " },
{},
};
@@ -511,6 +536,12 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
}
}
break;
+ case ARG_GET_TEST_CNT:
+ env->get_test_cnt = true;
+ break;
+ case ARG_LIST_TEST_NAMES:
+ env->list_test_names = true;
+ break;
case ARGP_KEY_ARG:
argp_usage(state);
break;
@@ -643,6 +674,7 @@ int main(int argc, char **argv)
return -1;
}
+ save_netns();
stdio_hijack();
for (i = 0; i < prog_test_cnt; i++) {
struct prog_test_def *test = &prog_test_defs[i];
@@ -654,6 +686,17 @@ int main(int argc, char **argv)
test->test_num, test->test_name))
continue;
+ if (env.get_test_cnt) {
+ env.succ_cnt++;
+ continue;
+ }
+
+ if (env.list_test_names) {
+ fprintf(env.stdout, "%s\n", test->test_name);
+ env.succ_cnt++;
+ continue;
+ }
+
test->run_test();
/* ensure last sub-test is finalized properly */
if (test->subtest_name)
@@ -673,19 +716,34 @@ int main(int argc, char **argv)
test->error_cnt ? "FAIL" : "OK");
reset_affinity();
+ restore_netns();
if (test->need_cgroup_cleanup)
cleanup_cgroup_environment();
}
stdio_restore();
+
+ if (env.get_test_cnt) {
+ printf("%d\n", env.succ_cnt);
+ goto out;
+ }
+
+ if (env.list_test_names)
+ goto out;
+
fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n",
env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt);
+out:
free_str_set(&env.test_selector.blacklist);
free_str_set(&env.test_selector.whitelist);
free(env.test_selector.num_set);
free_str_set(&env.subtest_selector.blacklist);
free_str_set(&env.subtest_selector.whitelist);
free(env.subtest_selector.num_set);
+ close(env.saved_netns_fd);
+
+ if (env.succ_cnt + env.fail_cnt + env.skip_cnt == 0)
+ return EXIT_NO_TEST;
return env.fail_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
}
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index f4503c926aca..6e09bf738473 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -66,6 +66,8 @@ struct test_env {
enum verbosity verbosity;
bool jit_enabled;
+ bool get_test_cnt;
+ bool list_test_names;
struct prog_test_def *test;
FILE *stdout;
@@ -78,6 +80,8 @@ struct test_env {
int sub_succ_cnt; /* successful sub-tests */
int fail_cnt; /* total failed tests + sub-tests */
int skip_cnt; /* skipped tests */
+
+ int saved_netns_fd;
};
extern struct test_env env;
diff --git a/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c b/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c
index 356351c0ac28..4a64306728ab 100644
--- a/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c
+++ b/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c
@@ -160,16 +160,10 @@ int main(int argc, char **argv)
exit(EXIT_FAILURE);
}
- if (setup_cgroup_environment())
- goto err;
-
- cgfd = create_and_get_cgroup(CGROUP_PATH);
+ cgfd = cgroup_setup_and_join(CGROUP_PATH);
if (cgfd < 0)
goto err;
- if (join_cgroup(CGROUP_PATH))
- goto err;
-
if (send_packet(argv[1]))
goto err;
diff --git a/tools/testing/selftests/bpf/test_sock.c b/tools/testing/selftests/bpf/test_sock.c
index 52bf14955797..9613f7538840 100644
--- a/tools/testing/selftests/bpf/test_sock.c
+++ b/tools/testing/selftests/bpf/test_sock.c
@@ -464,16 +464,10 @@ int main(int argc, char **argv)
int cgfd = -1;
int err = 0;
- if (setup_cgroup_environment())
- goto err;
-
- cgfd = create_and_get_cgroup(CG_PATH);
+ cgfd = cgroup_setup_and_join(CG_PATH);
if (cgfd < 0)
goto err;
- if (join_cgroup(CG_PATH))
- goto err;
-
if (run_tests(cgfd))
goto err;
diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c
index 0358814c67dc..b8c72c1d9cf7 100644
--- a/tools/testing/selftests/bpf/test_sock_addr.c
+++ b/tools/testing/selftests/bpf/test_sock_addr.c
@@ -1638,16 +1638,10 @@ int main(int argc, char **argv)
exit(err);
}
- if (setup_cgroup_environment())
- goto err;
-
- cgfd = create_and_get_cgroup(CG_PATH);
+ cgfd = cgroup_setup_and_join(CG_PATH);
if (cgfd < 0)
goto err;
- if (join_cgroup(CG_PATH))
- goto err;
-
if (run_tests(cgfd))
goto err;
diff --git a/tools/testing/selftests/bpf/test_sock_fields.c b/tools/testing/selftests/bpf/test_sock_fields.c
index f0fc103261a4..6c9f269c396d 100644
--- a/tools/testing/selftests/bpf/test_sock_fields.c
+++ b/tools/testing/selftests/bpf/test_sock_fields.c
@@ -421,19 +421,11 @@ int main(int argc, char **argv)
struct bpf_object *obj;
struct bpf_map *map;
- err = setup_cgroup_environment();
- CHECK(err, "setup_cgroup_environment()", "err:%d errno:%d",
- err, errno);
-
- atexit(cleanup_cgroup_environment);
-
/* Create a cgroup, get fd, and join it */
- cgroup_fd = create_and_get_cgroup(TEST_CGROUP);
- CHECK(cgroup_fd == -1, "create_and_get_cgroup()",
+ cgroup_fd = cgroup_setup_and_join(TEST_CGROUP);
+ CHECK(cgroup_fd < 0, "cgroup_setup_and_join()",
"cgroup_fd:%d errno:%d", cgroup_fd, errno);
-
- err = join_cgroup(TEST_CGROUP);
- CHECK(err, "join_cgroup", "err:%d errno:%d", err, errno);
+ atexit(cleanup_cgroup_environment);
err = bpf_prog_load_xattr(&attr, &obj, &egress_fd);
CHECK(err, "bpf_prog_load_xattr()", "err:%d", err);
diff --git a/tools/testing/selftests/bpf/test_socket_cookie.c b/tools/testing/selftests/bpf/test_socket_cookie.c
index 15653b0e26eb..154a8fd2a48d 100644
--- a/tools/testing/selftests/bpf/test_socket_cookie.c
+++ b/tools/testing/selftests/bpf/test_socket_cookie.c
@@ -191,16 +191,10 @@ int main(int argc, char **argv)
int cgfd = -1;
int err = 0;
- if (setup_cgroup_environment())
- goto err;
-
- cgfd = create_and_get_cgroup(CG_PATH);
+ cgfd = cgroup_setup_and_join(CG_PATH);
if (cgfd < 0)
goto err;
- if (join_cgroup(CG_PATH))
- goto err;
-
if (run_test(cgfd))
goto err;
diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index 78789b27e573..9b6fb00dc7a0 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -1963,23 +1963,9 @@ int main(int argc, char **argv)
}
if (!cg_fd) {
- if (setup_cgroup_environment()) {
- fprintf(stderr, "ERROR: cgroup env failed\n");
- return -EINVAL;
- }
-
- cg_fd = create_and_get_cgroup(CG_PATH);
- if (cg_fd < 0) {
- fprintf(stderr,
- "ERROR: (%i) open cg path failed: %s\n",
- cg_fd, strerror(errno));
+ cg_fd = cgroup_setup_and_join(CG_PATH);
+ if (cg_fd < 0)
return cg_fd;
- }
-
- if (join_cgroup(CG_PATH)) {
- fprintf(stderr, "ERROR: failed to join cgroup\n");
- return -EINVAL;
- }
cg_created = 1;
}
diff --git a/tools/testing/selftests/bpf/test_sysctl.c b/tools/testing/selftests/bpf/test_sysctl.c
index d196e2a4a6e0..a20a919244c0 100644
--- a/tools/testing/selftests/bpf/test_sysctl.c
+++ b/tools/testing/selftests/bpf/test_sysctl.c
@@ -1619,16 +1619,10 @@ int main(int argc, char **argv)
int cgfd = -1;
int err = 0;
- if (setup_cgroup_environment())
- goto err;
-
- cgfd = create_and_get_cgroup(CG_PATH);
+ cgfd = cgroup_setup_and_join(CG_PATH);
if (cgfd < 0)
goto err;
- if (join_cgroup(CG_PATH))
- goto err;
-
if (run_tests(cgfd))
goto err;
diff --git a/tools/testing/selftests/bpf/test_tcpbpf_user.c b/tools/testing/selftests/bpf/test_tcpbpf_user.c
index 3ae127620463..74a9e49988b6 100644
--- a/tools/testing/selftests/bpf/test_tcpbpf_user.c
+++ b/tools/testing/selftests/bpf/test_tcpbpf_user.c
@@ -102,16 +102,10 @@ int main(int argc, char **argv)
__u32 key = 0;
int rv;
- if (setup_cgroup_environment())
- goto err;
-
- cg_fd = create_and_get_cgroup(cg_path);
+ cg_fd = cgroup_setup_and_join(cg_path);
if (cg_fd < 0)
goto err;
- if (join_cgroup(cg_path))
- goto err;
-
if (bpf_prog_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) {
printf("FAILED: load_bpf_file failed for: %s\n", file);
goto err;
diff --git a/tools/testing/selftests/bpf/test_tcpnotify_user.c b/tools/testing/selftests/bpf/test_tcpnotify_user.c
index f9765ddf0761..73da7fe8c152 100644
--- a/tools/testing/selftests/bpf/test_tcpnotify_user.c
+++ b/tools/testing/selftests/bpf/test_tcpnotify_user.c
@@ -86,16 +86,10 @@ int main(int argc, char **argv)
CPU_SET(0, &cpuset);
pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset);
- if (setup_cgroup_environment())
- goto err;
-
- cg_fd = create_and_get_cgroup(cg_path);
+ cg_fd = cgroup_setup_and_join(cg_path);
if (cg_fd < 0)
goto err;
- if (join_cgroup(cg_path))
- goto err;
-
if (bpf_prog_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) {
printf("FAILED: load_bpf_file failed for: %s\n", file);
goto err;
@@ -130,17 +124,24 @@ int main(int argc, char **argv)
sprintf(test_script,
"iptables -A INPUT -p tcp --dport %d -j DROP",
TESTPORT);
- system(test_script);
+ if (system(test_script)) {
+ printf("FAILED: execute command: %s, err %d\n", test_script, -errno);
+ goto err;
+ }
sprintf(test_script,
"nc 127.0.0.1 %d < /etc/passwd > /dev/null 2>&1 ",
TESTPORT);
- system(test_script);
+ if (system(test_script))
+ printf("execute command: %s, err %d\n", test_script, -errno);
sprintf(test_script,
"iptables -D INPUT -p tcp --dport %d -j DROP",
TESTPORT);
- system(test_script);
+ if (system(test_script)) {
+ printf("FAILED: execute command: %s, err %d\n", test_script, -errno);
+ goto err;
+ }
rv = bpf_map_lookup_elem(bpf_map__fd(global_map), &key, &g);
if (rv != 0) {
diff --git a/tools/testing/selftests/bpf/test_xdp_redirect.sh b/tools/testing/selftests/bpf/test_xdp_redirect.sh
index c4b17e08d431..dd80f0c84afb 100755
--- a/tools/testing/selftests/bpf/test_xdp_redirect.sh
+++ b/tools/testing/selftests/bpf/test_xdp_redirect.sh
@@ -10,52 +10,72 @@
# | xdp forwarding |
# ------------------
-cleanup()
+ret=0
+
+setup()
{
- if [ "$?" = "0" ]; then
- echo "selftests: test_xdp_redirect [PASS]";
- else
- echo "selftests: test_xdp_redirect [FAILED]";
- fi
- set +e
+ local xdpmode=$1
+
+ ip netns add ns1
+ ip netns add ns2
+
+ ip link add veth1 index 111 type veth peer name veth11 netns ns1
+ ip link add veth2 index 222 type veth peer name veth22 netns ns2
+
+ ip link set veth1 up
+ ip link set veth2 up
+ ip -n ns1 link set dev veth11 up
+ ip -n ns2 link set dev veth22 up
+
+ ip -n ns1 addr add 10.1.1.11/24 dev veth11
+ ip -n ns2 addr add 10.1.1.22/24 dev veth22
+}
+
+cleanup()
+{
ip link del veth1 2> /dev/null
ip link del veth2 2> /dev/null
ip netns del ns1 2> /dev/null
ip netns del ns2 2> /dev/null
}
-ip link set dev lo xdpgeneric off 2>/dev/null > /dev/null
-if [ $? -ne 0 ];then
- echo "selftests: [SKIP] Could not run test without the ip xdpgeneric support"
- exit 0
-fi
-set -e
-
-ip netns add ns1
-ip netns add ns2
+test_xdp_redirect()
+{
+ local xdpmode=$1
-trap cleanup 0 2 3 6 9
+ setup
-ip link add veth1 index 111 type veth peer name veth11
-ip link add veth2 index 222 type veth peer name veth22
+ ip link set dev veth1 $xdpmode off &> /dev/null
+ if [ $? -ne 0 ];then
+ echo "selftests: test_xdp_redirect $xdpmode [SKIP]"
+ return 0
+ fi
-ip link set veth11 netns ns1
-ip link set veth22 netns ns2
+ ip -n ns1 link set veth11 $xdpmode obj xdp_dummy.o sec xdp_dummy &> /dev/null
+ ip -n ns2 link set veth22 $xdpmode obj xdp_dummy.o sec xdp_dummy &> /dev/null
+ ip link set dev veth1 $xdpmode obj test_xdp_redirect.o sec redirect_to_222 &> /dev/null
+ ip link set dev veth2 $xdpmode obj test_xdp_redirect.o sec redirect_to_111 &> /dev/null
-ip link set veth1 up
-ip link set veth2 up
+ ip netns exec ns1 ping -c 1 10.1.1.22 &> /dev/null
+ local ret1=$?
+ ip netns exec ns2 ping -c 1 10.1.1.11 &> /dev/null
+ local ret2=$?
-ip netns exec ns1 ip addr add 10.1.1.11/24 dev veth11
-ip netns exec ns2 ip addr add 10.1.1.22/24 dev veth22
+ if [ $ret1 -eq 0 -a $ret2 -eq 0 ]; then
+ echo "selftests: test_xdp_redirect $xdpmode [PASS]";
+ else
+ ret=1
+ echo "selftests: test_xdp_redirect $xdpmode [FAILED]";
+ fi
-ip netns exec ns1 ip link set dev veth11 up
-ip netns exec ns2 ip link set dev veth22 up
+ cleanup
+}
-ip link set dev veth1 xdpgeneric obj test_xdp_redirect.o sec redirect_to_222
-ip link set dev veth2 xdpgeneric obj test_xdp_redirect.o sec redirect_to_111
+set -e
+trap cleanup 2 3 6 9
-ip netns exec ns1 ping -c 1 10.1.1.22
-ip netns exec ns2 ping -c 1 10.1.1.11
+test_xdp_redirect xdpgeneric
+test_xdp_redirect xdpdrv
-exit 0
+exit $ret
diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c
index 0af6337a8962..800d503e5cb4 100644
--- a/tools/testing/selftests/bpf/testing_helpers.c
+++ b/tools/testing/selftests/bpf/testing_helpers.c
@@ -64,3 +64,17 @@ int parse_num_list(const char *s, bool **num_set, int *num_set_len)
return 0;
}
+
+__u32 link_info_prog_id(const struct bpf_link *link, struct bpf_link_info *info)
+{
+ __u32 info_len = sizeof(*info);
+ int err;
+
+ memset(info, 0, sizeof(*info));
+ err = bpf_obj_get_info_by_fd(bpf_link__fd(link), info, &info_len);
+ if (err) {
+ printf("failed to get link info: %d\n", -errno);
+ return 0;
+ }
+ return info->prog_id;
+}
diff --git a/tools/testing/selftests/bpf/testing_helpers.h b/tools/testing/selftests/bpf/testing_helpers.h
index 923b51762759..d4f8e749611b 100644
--- a/tools/testing/selftests/bpf/testing_helpers.h
+++ b/tools/testing/selftests/bpf/testing_helpers.h
@@ -1,5 +1,8 @@
/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
/* Copyright (C) 2020 Facebook, Inc. */
#include <stdbool.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
int parse_num_list(const char *s, bool **set, int *set_len);
+__u32 link_info_prog_id(const struct bpf_link *link, struct bpf_link_info *info);
diff --git a/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c b/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c
new file mode 100644
index 000000000000..2ad5f974451c
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c
@@ -0,0 +1,492 @@
+{
+ "valid 1,2,4,8-byte reads from bpf_sk_lookup",
+ .insns = {
+ /* 1-byte read from family field */
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, family)),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, family) + 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, family) + 2),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, family) + 3),
+ /* 2-byte read from family field */
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, family)),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, family) + 2),
+ /* 4-byte read from family field */
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, family)),
+
+ /* 1-byte read from protocol field */
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, protocol)),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, protocol) + 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, protocol) + 2),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, protocol) + 3),
+ /* 2-byte read from protocol field */
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, protocol)),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, protocol) + 2),
+ /* 4-byte read from protocol field */
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, protocol)),
+
+ /* 1-byte read from remote_ip4 field */
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip4)),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip4) + 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip4) + 2),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip4) + 3),
+ /* 2-byte read from remote_ip4 field */
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip4)),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip4) + 2),
+ /* 4-byte read from remote_ip4 field */
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip4)),
+
+ /* 1-byte read from remote_ip6 field */
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6)),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 2),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 3),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 4),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 5),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 6),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 7),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 8),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 9),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 10),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 11),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 12),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 13),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 14),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 15),
+ /* 2-byte read from remote_ip6 field */
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6)),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 2),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 4),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 6),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 8),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 10),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 12),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 14),
+ /* 4-byte read from remote_ip6 field */
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 4),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 8),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 12),
+
+ /* 1-byte read from remote_port field */
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_port)),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_port) + 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_port) + 2),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_port) + 3),
+ /* 2-byte read from remote_port field */
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_port)),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_port) + 2),
+ /* 4-byte read from remote_port field */
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_port)),
+
+ /* 1-byte read from local_ip4 field */
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip4)),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip4) + 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip4) + 2),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip4) + 3),
+ /* 2-byte read from local_ip4 field */
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip4)),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip4) + 2),
+ /* 4-byte read from local_ip4 field */
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip4)),
+
+ /* 1-byte read from local_ip6 field */
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6)),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 2),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 3),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 4),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 5),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 6),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 7),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 8),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 9),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 10),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 11),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 12),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 13),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 14),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 15),
+ /* 2-byte read from local_ip6 field */
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6)),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 2),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 4),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 6),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 8),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 10),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 12),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 14),
+ /* 4-byte read from local_ip6 field */
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 4),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 8),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 12),
+
+ /* 1-byte read from local_port field */
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_port)),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_port) + 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_port) + 2),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_port) + 3),
+ /* 2-byte read from local_port field */
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_port)),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_port) + 2),
+ /* 4-byte read from local_port field */
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_port)),
+
+ /* 8-byte read from sk field */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, sk)),
+
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+/* invalid 8-byte reads from a 4-byte fields in bpf_sk_lookup */
+{
+ "invalid 8-byte read from bpf_sk_lookup family field",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, family)),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+ "invalid 8-byte read from bpf_sk_lookup protocol field",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, protocol)),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+ "invalid 8-byte read from bpf_sk_lookup remote_ip4 field",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip4)),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+ "invalid 8-byte read from bpf_sk_lookup remote_ip6 field",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6)),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+ "invalid 8-byte read from bpf_sk_lookup remote_port field",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_port)),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+ "invalid 8-byte read from bpf_sk_lookup local_ip4 field",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip4)),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+ "invalid 8-byte read from bpf_sk_lookup local_ip6 field",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6)),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+ "invalid 8-byte read from bpf_sk_lookup local_port field",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_port)),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+/* invalid 1,2,4-byte reads from 8-byte fields in bpf_sk_lookup */
+{
+ "invalid 4-byte read from bpf_sk_lookup sk field",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, sk)),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+ "invalid 2-byte read from bpf_sk_lookup sk field",
+ .insns = {
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, sk)),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+ "invalid 1-byte read from bpf_sk_lookup sk field",
+ .insns = {
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, sk)),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+/* out of bounds and unaligned reads from bpf_sk_lookup */
+{
+ "invalid 4-byte read past end of bpf_sk_lookup",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ sizeof(struct bpf_sk_lookup)),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+ "invalid 4-byte unaligned read from bpf_sk_lookup at odd offset",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 1),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+ "invalid 4-byte unaligned read from bpf_sk_lookup at even offset",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 2),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+/* in-bound and out-of-bound writes to bpf_sk_lookup */
+{
+ "invalid 8-byte write to bpf_sk_lookup",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0xcafe4a11U),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+ "invalid 4-byte write to bpf_sk_lookup",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0xcafe4a11U),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+ "invalid 2-byte write to bpf_sk_lookup",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0xcafe4a11U),
+ BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0, 0),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+ "invalid 1-byte write to bpf_sk_lookup",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0xcafe4a11U),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+ "invalid 4-byte write past end of bpf_sk_lookup",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0xcafe4a11U),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ sizeof(struct bpf_sk_lookup)),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
diff --git a/tools/testing/selftests/bpf/verifier/map_ptr.c b/tools/testing/selftests/bpf/verifier/map_ptr.c
new file mode 100644
index 000000000000..b52209db8250
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/map_ptr.c
@@ -0,0 +1,62 @@
+{
+ "bpf_map_ptr: read with negative offset rejected",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1 },
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN",
+ .result = REJECT,
+ .errstr = "R1 is bpf_array invalid negative access: off=-8",
+},
+{
+ "bpf_map_ptr: write rejected",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 3 },
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN",
+ .result = REJECT,
+ .errstr = "only read from bpf_array is supported",
+},
+{
+ "bpf_map_ptr: read non-existent field rejected",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_6, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1 },
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN",
+ .result = REJECT,
+ .errstr = "cannot access ptr member ops with moff 0 in struct bpf_map with off 1 size 4",
+},
+{
+ "bpf_map_ptr: read ops field accepted",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_6, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1 },
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN",
+ .result = ACCEPT,
+ .retval = 1,
+},
diff --git a/tools/testing/selftests/bpf/verifier/map_ptr_mixing.c b/tools/testing/selftests/bpf/verifier/map_ptr_mixing.c
index cd26ee6b7b1d..1f2b8c4cb26d 100644
--- a/tools/testing/selftests/bpf/verifier/map_ptr_mixing.c
+++ b/tools/testing/selftests/bpf/verifier/map_ptr_mixing.c
@@ -56,7 +56,7 @@
.fixup_map_in_map = { 16 },
.fixup_map_array_48b = { 13 },
.result = REJECT,
- .errstr = "R0 invalid mem access 'map_ptr'",
+ .errstr = "only read from bpf_array is supported",
},
{
"cond: two branches returning different map pointers for lookup (tail, tail)",
diff --git a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
index 97ee658e1242..ed4e76b24649 100644
--- a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
+++ b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
@@ -836,3 +836,41 @@
.errstr = "R0 invalid mem access 'inv'",
.errstr_unpriv = "R0 pointer -= pointer prohibited",
},
+{
+ "32bit pkt_ptr -= scalar",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_7),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 40),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_8, 2),
+ BPF_ALU32_REG(BPF_MOV, BPF_REG_4, BPF_REG_7),
+ BPF_ALU32_REG(BPF_SUB, BPF_REG_6, BPF_REG_4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
+{
+ "32bit scalar -= pkt_ptr",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_7),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 40),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_8, 2),
+ BPF_ALU32_REG(BPF_MOV, BPF_REG_4, BPF_REG_6),
+ BPF_ALU32_REG(BPF_SUB, BPF_REG_4, BPF_REG_7),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
diff --git a/tools/testing/selftests/breakpoints/step_after_suspend_test.c b/tools/testing/selftests/breakpoints/step_after_suspend_test.c
index b3ead29c6089..2cf6f10ab7c4 100644
--- a/tools/testing/selftests/breakpoints/step_after_suspend_test.c
+++ b/tools/testing/selftests/breakpoints/step_after_suspend_test.c
@@ -47,7 +47,7 @@ void child(int cpu)
_exit(0);
}
-bool run_test(int cpu)
+int run_test(int cpu)
{
int status;
pid_t pid = fork();
@@ -55,7 +55,7 @@ bool run_test(int cpu)
if (pid < 0) {
ksft_print_msg("fork() failed: %s\n", strerror(errno));
- return false;
+ return KSFT_FAIL;
}
if (pid == 0)
child(cpu);
@@ -63,67 +63,68 @@ bool run_test(int cpu)
wpid = waitpid(pid, &status, __WALL);
if (wpid != pid) {
ksft_print_msg("waitpid() failed: %s\n", strerror(errno));
- return false;
+ return KSFT_FAIL;
}
if (!WIFSTOPPED(status)) {
ksft_print_msg("child did not stop: %s\n", strerror(errno));
- return false;
+ return KSFT_FAIL;
}
if (WSTOPSIG(status) != SIGSTOP) {
ksft_print_msg("child did not stop with SIGSTOP: %s\n",
strerror(errno));
- return false;
+ return KSFT_FAIL;
}
if (ptrace(PTRACE_SINGLESTEP, pid, NULL, NULL) < 0) {
if (errno == EIO) {
- ksft_exit_skip(
+ ksft_print_msg(
"ptrace(PTRACE_SINGLESTEP) not supported on this architecture: %s\n",
strerror(errno));
+ return KSFT_SKIP;
}
ksft_print_msg("ptrace(PTRACE_SINGLESTEP) failed: %s\n",
strerror(errno));
- return false;
+ return KSFT_FAIL;
}
wpid = waitpid(pid, &status, __WALL);
if (wpid != pid) {
ksft_print_msg("waitpid() failed: $s\n", strerror(errno));
- return false;
+ return KSFT_FAIL;
}
if (WIFEXITED(status)) {
ksft_print_msg("child did not single-step: %s\n",
strerror(errno));
- return false;
+ return KSFT_FAIL;
}
if (!WIFSTOPPED(status)) {
ksft_print_msg("child did not stop: %s\n", strerror(errno));
- return false;
+ return KSFT_FAIL;
}
if (WSTOPSIG(status) != SIGTRAP) {
ksft_print_msg("child did not stop with SIGTRAP: %s\n",
strerror(errno));
- return false;
+ return KSFT_FAIL;
}
if (ptrace(PTRACE_CONT, pid, NULL, NULL) < 0) {
ksft_print_msg("ptrace(PTRACE_CONT) failed: %s\n",
strerror(errno));
- return false;
+ return KSFT_FAIL;
}
wpid = waitpid(pid, &status, __WALL);
if (wpid != pid) {
ksft_print_msg("waitpid() failed: %s\n", strerror(errno));
- return false;
+ return KSFT_FAIL;
}
if (!WIFEXITED(status)) {
ksft_print_msg("child did not exit after PTRACE_CONT: %s\n",
strerror(errno));
- return false;
+ return KSFT_FAIL;
}
- return true;
+ return KSFT_PASS;
}
void suspend(void)
@@ -183,32 +184,38 @@ int main(int argc, char **argv)
}
}
+ err = sched_getaffinity(0, sizeof(available_cpus), &available_cpus);
+ if (err < 0)
+ ksft_exit_fail_msg("sched_getaffinity() failed\n");
+
for (cpu = 0; cpu < CPU_SETSIZE; cpu++) {
if (!CPU_ISSET(cpu, &available_cpus))
continue;
tests++;
}
- ksft_set_plan(tests);
if (do_suspend)
suspend();
- err = sched_getaffinity(0, sizeof(available_cpus), &available_cpus);
- if (err < 0)
- ksft_exit_fail_msg("sched_getaffinity() failed\n");
-
+ ksft_set_plan(tests);
for (cpu = 0; cpu < CPU_SETSIZE; cpu++) {
- bool test_success;
+ int test_success;
if (!CPU_ISSET(cpu, &available_cpus))
continue;
test_success = run_test(cpu);
- if (test_success) {
+ switch (test_success) {
+ case KSFT_PASS:
ksft_test_result_pass("CPU %d\n", cpu);
- } else {
+ break;
+ case KSFT_SKIP:
+ ksft_test_result_skip("CPU %d\n", cpu);
+ break;
+ case KSFT_FAIL:
ksft_test_result_fail("CPU %d\n", cpu);
succeeded = false;
+ break;
}
}
diff --git a/tools/testing/selftests/cgroup/.gitignore b/tools/testing/selftests/cgroup/.gitignore
index aa6de65b0838..84cfcabea838 100644
--- a/tools/testing/selftests/cgroup/.gitignore
+++ b/tools/testing/selftests/cgroup/.gitignore
@@ -2,3 +2,4 @@
test_memcontrol
test_core
test_freezer
+test_kmem \ No newline at end of file
diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile
index 967f268fde74..f027d933595b 100644
--- a/tools/testing/selftests/cgroup/Makefile
+++ b/tools/testing/selftests/cgroup/Makefile
@@ -6,11 +6,13 @@ all:
TEST_FILES := with_stress.sh
TEST_PROGS := test_stress.sh
TEST_GEN_PROGS = test_memcontrol
+TEST_GEN_PROGS += test_kmem
TEST_GEN_PROGS += test_core
TEST_GEN_PROGS += test_freezer
include ../lib.mk
$(OUTPUT)/test_memcontrol: cgroup_util.c ../clone3/clone3_selftests.h
+$(OUTPUT)/test_kmem: cgroup_util.c ../clone3/clone3_selftests.h
$(OUTPUT)/test_core: cgroup_util.c ../clone3/clone3_selftests.h
$(OUTPUT)/test_freezer: cgroup_util.c ../clone3/clone3_selftests.h
diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c
index 8a637ca7d73a..05853b0b8831 100644
--- a/tools/testing/selftests/cgroup/cgroup_util.c
+++ b/tools/testing/selftests/cgroup/cgroup_util.c
@@ -106,7 +106,7 @@ int cg_read_strcmp(const char *cgroup, const char *control,
/* Handle the case of comparing against empty string */
if (!expected)
- size = 32;
+ return -1;
else
size = strlen(expected) + 1;
diff --git a/tools/testing/selftests/cgroup/test_kmem.c b/tools/testing/selftests/cgroup/test_kmem.c
new file mode 100644
index 000000000000..0941aa16157e
--- /dev/null
+++ b/tools/testing/selftests/cgroup/test_kmem.c
@@ -0,0 +1,450 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+
+#include <linux/limits.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/wait.h>
+#include <errno.h>
+#include <sys/sysinfo.h>
+#include <pthread.h>
+
+#include "../kselftest.h"
+#include "cgroup_util.h"
+
+
+/*
+ * Memory cgroup charging and vmstat data aggregation is performed using
+ * percpu batches 32 pages big (look at MEMCG_CHARGE_BATCH). So the maximum
+ * discrepancy between charge and vmstat entries is number of cpus multiplied
+ * by 32 pages multiplied by 2.
+ */
+#define MAX_VMSTAT_ERROR (4096 * 32 * 2 * get_nprocs())
+
+
+static int alloc_dcache(const char *cgroup, void *arg)
+{
+ unsigned long i;
+ struct stat st;
+ char buf[128];
+
+ for (i = 0; i < (unsigned long)arg; i++) {
+ snprintf(buf, sizeof(buf),
+ "/something-non-existent-with-a-long-name-%64lu-%d",
+ i, getpid());
+ stat(buf, &st);
+ }
+
+ return 0;
+}
+
+/*
+ * This test allocates 100000 of negative dentries with long names.
+ * Then it checks that "slab" in memory.stat is larger than 1M.
+ * Then it sets memory.high to 1M and checks that at least 1/2
+ * of slab memory has been reclaimed.
+ */
+static int test_kmem_basic(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *cg = NULL;
+ long slab0, slab1, current;
+
+ cg = cg_name(root, "kmem_basic_test");
+ if (!cg)
+ goto cleanup;
+
+ if (cg_create(cg))
+ goto cleanup;
+
+ if (cg_run(cg, alloc_dcache, (void *)100000))
+ goto cleanup;
+
+ slab0 = cg_read_key_long(cg, "memory.stat", "slab ");
+ if (slab0 < (1 << 20))
+ goto cleanup;
+
+ cg_write(cg, "memory.high", "1M");
+ slab1 = cg_read_key_long(cg, "memory.stat", "slab ");
+ if (slab1 <= 0)
+ goto cleanup;
+
+ current = cg_read_long(cg, "memory.current");
+ if (current <= 0)
+ goto cleanup;
+
+ if (slab1 < slab0 / 2 && current < slab0 / 2)
+ ret = KSFT_PASS;
+cleanup:
+ cg_destroy(cg);
+ free(cg);
+
+ return ret;
+}
+
+static void *alloc_kmem_fn(void *arg)
+{
+ alloc_dcache(NULL, (void *)100);
+ return NULL;
+}
+
+static int alloc_kmem_smp(const char *cgroup, void *arg)
+{
+ int nr_threads = 2 * get_nprocs();
+ pthread_t *tinfo;
+ unsigned long i;
+ int ret = -1;
+
+ tinfo = calloc(nr_threads, sizeof(pthread_t));
+ if (tinfo == NULL)
+ return -1;
+
+ for (i = 0; i < nr_threads; i++) {
+ if (pthread_create(&tinfo[i], NULL, &alloc_kmem_fn,
+ (void *)i)) {
+ free(tinfo);
+ return -1;
+ }
+ }
+
+ for (i = 0; i < nr_threads; i++) {
+ ret = pthread_join(tinfo[i], NULL);
+ if (ret)
+ break;
+ }
+
+ free(tinfo);
+ return ret;
+}
+
+static int cg_run_in_subcgroups(const char *parent,
+ int (*fn)(const char *cgroup, void *arg),
+ void *arg, int times)
+{
+ char *child;
+ int i;
+
+ for (i = 0; i < times; i++) {
+ child = cg_name_indexed(parent, "child", i);
+ if (!child)
+ return -1;
+
+ if (cg_create(child)) {
+ cg_destroy(child);
+ free(child);
+ return -1;
+ }
+
+ if (cg_run(child, fn, NULL)) {
+ cg_destroy(child);
+ free(child);
+ return -1;
+ }
+
+ cg_destroy(child);
+ free(child);
+ }
+
+ return 0;
+}
+
+/*
+ * The test creates and destroys a large number of cgroups. In each cgroup it
+ * allocates some slab memory (mostly negative dentries) using 2 * NR_CPUS
+ * threads. Then it checks the sanity of numbers on the parent level:
+ * the total size of the cgroups should be roughly equal to
+ * anon + file + slab + kernel_stack.
+ */
+static int test_kmem_memcg_deletion(const char *root)
+{
+ long current, slab, anon, file, kernel_stack, sum;
+ int ret = KSFT_FAIL;
+ char *parent;
+
+ parent = cg_name(root, "kmem_memcg_deletion_test");
+ if (!parent)
+ goto cleanup;
+
+ if (cg_create(parent))
+ goto cleanup;
+
+ if (cg_write(parent, "cgroup.subtree_control", "+memory"))
+ goto cleanup;
+
+ if (cg_run_in_subcgroups(parent, alloc_kmem_smp, NULL, 100))
+ goto cleanup;
+
+ current = cg_read_long(parent, "memory.current");
+ slab = cg_read_key_long(parent, "memory.stat", "slab ");
+ anon = cg_read_key_long(parent, "memory.stat", "anon ");
+ file = cg_read_key_long(parent, "memory.stat", "file ");
+ kernel_stack = cg_read_key_long(parent, "memory.stat", "kernel_stack ");
+ if (current < 0 || slab < 0 || anon < 0 || file < 0 ||
+ kernel_stack < 0)
+ goto cleanup;
+
+ sum = slab + anon + file + kernel_stack;
+ if (abs(sum - current) < MAX_VMSTAT_ERROR) {
+ ret = KSFT_PASS;
+ } else {
+ printf("memory.current = %ld\n", current);
+ printf("slab + anon + file + kernel_stack = %ld\n", sum);
+ printf("slab = %ld\n", slab);
+ printf("anon = %ld\n", anon);
+ printf("file = %ld\n", file);
+ printf("kernel_stack = %ld\n", kernel_stack);
+ }
+
+cleanup:
+ cg_destroy(parent);
+ free(parent);
+
+ return ret;
+}
+
+/*
+ * The test reads the entire /proc/kpagecgroup. If the operation went
+ * successfully (and the kernel didn't panic), the test is treated as passed.
+ */
+static int test_kmem_proc_kpagecgroup(const char *root)
+{
+ unsigned long buf[128];
+ int ret = KSFT_FAIL;
+ ssize_t len;
+ int fd;
+
+ fd = open("/proc/kpagecgroup", O_RDONLY);
+ if (fd < 0)
+ return ret;
+
+ do {
+ len = read(fd, buf, sizeof(buf));
+ } while (len > 0);
+
+ if (len == 0)
+ ret = KSFT_PASS;
+
+ close(fd);
+ return ret;
+}
+
+static void *pthread_wait_fn(void *arg)
+{
+ sleep(100);
+ return NULL;
+}
+
+static int spawn_1000_threads(const char *cgroup, void *arg)
+{
+ int nr_threads = 1000;
+ pthread_t *tinfo;
+ unsigned long i;
+ long stack;
+ int ret = -1;
+
+ tinfo = calloc(nr_threads, sizeof(pthread_t));
+ if (tinfo == NULL)
+ return -1;
+
+ for (i = 0; i < nr_threads; i++) {
+ if (pthread_create(&tinfo[i], NULL, &pthread_wait_fn,
+ (void *)i)) {
+ free(tinfo);
+ return(-1);
+ }
+ }
+
+ stack = cg_read_key_long(cgroup, "memory.stat", "kernel_stack ");
+ if (stack >= 4096 * 1000)
+ ret = 0;
+
+ free(tinfo);
+ return ret;
+}
+
+/*
+ * The test spawns a process, which spawns 1000 threads. Then it checks
+ * that memory.stat's kernel_stack is at least 1000 pages large.
+ */
+static int test_kmem_kernel_stacks(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *cg = NULL;
+
+ cg = cg_name(root, "kmem_kernel_stacks_test");
+ if (!cg)
+ goto cleanup;
+
+ if (cg_create(cg))
+ goto cleanup;
+
+ if (cg_run(cg, spawn_1000_threads, NULL))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+cleanup:
+ cg_destroy(cg);
+ free(cg);
+
+ return ret;
+}
+
+/*
+ * This test sequentionally creates 30 child cgroups, allocates some
+ * kernel memory in each of them, and deletes them. Then it checks
+ * that the number of dying cgroups on the parent level is 0.
+ */
+static int test_kmem_dead_cgroups(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *parent;
+ long dead;
+ int i;
+
+ parent = cg_name(root, "kmem_dead_cgroups_test");
+ if (!parent)
+ goto cleanup;
+
+ if (cg_create(parent))
+ goto cleanup;
+
+ if (cg_write(parent, "cgroup.subtree_control", "+memory"))
+ goto cleanup;
+
+ if (cg_run_in_subcgroups(parent, alloc_dcache, (void *)100, 30))
+ goto cleanup;
+
+ for (i = 0; i < 5; i++) {
+ dead = cg_read_key_long(parent, "cgroup.stat",
+ "nr_dying_descendants ");
+ if (dead == 0) {
+ ret = KSFT_PASS;
+ break;
+ }
+ /*
+ * Reclaiming cgroups might take some time,
+ * let's wait a bit and repeat.
+ */
+ sleep(1);
+ }
+
+cleanup:
+ cg_destroy(parent);
+ free(parent);
+
+ return ret;
+}
+
+/*
+ * This test creates a sub-tree with 1000 memory cgroups.
+ * Then it checks that the memory.current on the parent level
+ * is greater than 0 and approximates matches the percpu value
+ * from memory.stat.
+ */
+static int test_percpu_basic(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *parent, *child;
+ long current, percpu;
+ int i;
+
+ parent = cg_name(root, "percpu_basic_test");
+ if (!parent)
+ goto cleanup;
+
+ if (cg_create(parent))
+ goto cleanup;
+
+ if (cg_write(parent, "cgroup.subtree_control", "+memory"))
+ goto cleanup;
+
+ for (i = 0; i < 1000; i++) {
+ child = cg_name_indexed(parent, "child", i);
+ if (!child)
+ return -1;
+
+ if (cg_create(child))
+ goto cleanup_children;
+
+ free(child);
+ }
+
+ current = cg_read_long(parent, "memory.current");
+ percpu = cg_read_key_long(parent, "memory.stat", "percpu ");
+
+ if (current > 0 && percpu > 0 && abs(current - percpu) <
+ MAX_VMSTAT_ERROR)
+ ret = KSFT_PASS;
+ else
+ printf("memory.current %ld\npercpu %ld\n",
+ current, percpu);
+
+cleanup_children:
+ for (i = 0; i < 1000; i++) {
+ child = cg_name_indexed(parent, "child", i);
+ cg_destroy(child);
+ free(child);
+ }
+
+cleanup:
+ cg_destroy(parent);
+ free(parent);
+
+ return ret;
+}
+
+#define T(x) { x, #x }
+struct kmem_test {
+ int (*fn)(const char *root);
+ const char *name;
+} tests[] = {
+ T(test_kmem_basic),
+ T(test_kmem_memcg_deletion),
+ T(test_kmem_proc_kpagecgroup),
+ T(test_kmem_kernel_stacks),
+ T(test_kmem_dead_cgroups),
+ T(test_percpu_basic),
+};
+#undef T
+
+int main(int argc, char **argv)
+{
+ char root[PATH_MAX];
+ int i, ret = EXIT_SUCCESS;
+
+ if (cg_find_unified_root(root, sizeof(root)))
+ ksft_exit_skip("cgroup v2 isn't mounted\n");
+
+ /*
+ * Check that memory controller is available:
+ * memory is listed in cgroup.controllers
+ */
+ if (cg_read_strstr(root, "cgroup.controllers", "memory"))
+ ksft_exit_skip("memory controller isn't available\n");
+
+ if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
+ if (cg_write(root, "cgroup.subtree_control", "+memory"))
+ ksft_exit_skip("Failed to set memory controller\n");
+
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ switch (tests[i].fn(root)) {
+ case KSFT_PASS:
+ ksft_test_result_pass("%s\n", tests[i].name);
+ break;
+ case KSFT_SKIP:
+ ksft_test_result_skip("%s\n", tests[i].name);
+ break;
+ default:
+ ret = EXIT_FAILURE;
+ ksft_test_result_fail("%s\n", tests[i].name);
+ break;
+ }
+ }
+
+ return ret;
+}
diff --git a/tools/testing/selftests/clone3/clone3.c b/tools/testing/selftests/clone3/clone3.c
index f14c269a5a18..b7e6dec36173 100644
--- a/tools/testing/selftests/clone3/clone3.c
+++ b/tools/testing/selftests/clone3/clone3.c
@@ -131,9 +131,9 @@ int main(int argc, char *argv[])
uid_t uid = getuid();
- test_clone3_supported();
ksft_print_header();
ksft_set_plan(17);
+ test_clone3_supported();
/* Just a simple clone3() should return 0.*/
test_clone3(0, 0, 0, CLONE3_ARGS_NO_TEST);
diff --git a/tools/testing/selftests/clone3/clone3_clear_sighand.c b/tools/testing/selftests/clone3/clone3_clear_sighand.c
index 9e1af8aa7698..db5fc9c5edcf 100644
--- a/tools/testing/selftests/clone3/clone3_clear_sighand.c
+++ b/tools/testing/selftests/clone3/clone3_clear_sighand.c
@@ -119,9 +119,8 @@ static void test_clone3_clear_sighand(void)
int main(int argc, char **argv)
{
ksft_print_header();
- test_clone3_supported();
-
ksft_set_plan(1);
+ test_clone3_supported();
test_clone3_clear_sighand();
diff --git a/tools/testing/selftests/clone3/clone3_set_tid.c b/tools/testing/selftests/clone3/clone3_set_tid.c
index 25beb22f35b5..5831c1082d6d 100644
--- a/tools/testing/selftests/clone3/clone3_set_tid.c
+++ b/tools/testing/selftests/clone3/clone3_set_tid.c
@@ -157,8 +157,8 @@ int main(int argc, char *argv[])
pid_t set_tid[MAX_PID_NS_LEVEL * 2];
ksft_print_header();
- test_clone3_supported();
ksft_set_plan(29);
+ test_clone3_supported();
if (pipe(pipe_1) < 0 || pipe(pipe_2) < 0)
ksft_exit_fail_msg("pipe() failed\n");
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh
index 0d347d48c112..517297a14ecf 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh
@@ -121,6 +121,7 @@ h1_destroy()
h2_create()
{
host_create $h2 2
+ tc qdisc add dev $h2 clsact
# Some of the tests in this suite use multicast traffic. As this traffic
# enters BR2_10 resp. BR2_11, it is flooded to all other ports. Thus
@@ -141,6 +142,7 @@ h2_create()
h2_destroy()
{
ethtool -s $h2 autoneg on
+ tc qdisc del dev $h2 clsact
host_destroy $h2
}
@@ -336,6 +338,17 @@ get_qdisc_npackets()
qdisc_stats_get $swp3 $(get_qdisc_handle $vlan) .packets
}
+send_packets()
+{
+ local vlan=$1; shift
+ local proto=$1; shift
+ local pkts=$1; shift
+
+ $MZ $h2.$vlan -p 8000 -a own -b $h3_mac \
+ -A $(ipaddr 2 $vlan) -B $(ipaddr 3 $vlan) \
+ -t $proto -q -c $pkts "$@"
+}
+
# This sends traffic in an attempt to build a backlog of $size. Returns 0 on
# success. After 10 failed attempts it bails out and returns 1. It dumps the
# backlog size to stdout.
@@ -364,9 +377,7 @@ build_backlog()
return 1
fi
- $MZ $h2.$vlan -p 8000 -a own -b $h3_mac \
- -A $(ipaddr 2 $vlan) -B $(ipaddr 3 $vlan) \
- -t $proto -q -c $pkts "$@"
+ send_packets $vlan $proto $pkts "$@"
done
}
@@ -531,3 +542,115 @@ do_mc_backlog_test()
log_test "TC $((vlan - 10)): Qdisc reports MC backlog"
}
+
+do_drop_test()
+{
+ local vlan=$1; shift
+ local limit=$1; shift
+ local trigger=$1; shift
+ local subtest=$1; shift
+ local fetch_counter=$1; shift
+ local backlog
+ local base
+ local now
+ local pct
+
+ RET=0
+
+ start_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) $h3_mac
+
+ # Create a bit of a backlog and observe no mirroring due to drops.
+ qevent_rule_install_$subtest
+ base=$($fetch_counter)
+
+ build_backlog $vlan $((2 * limit / 3)) udp >/dev/null
+
+ busywait 1100 until_counter_is ">= $((base + 1))" $fetch_counter >/dev/null
+ check_fail $? "Spurious packets observed without buffer pressure"
+
+ # Push to the queue until it's at the limit. The configured limit is
+ # rounded by the qdisc and then by the driver, so this is the best we
+ # can do to get to the real limit of the system.
+ build_backlog $vlan $((3 * limit / 2)) udp >/dev/null
+
+ base=$($fetch_counter)
+ send_packets $vlan udp 11
+
+ now=$(busywait 1100 until_counter_is ">= $((base + 10))" $fetch_counter)
+ check_err $? "Dropped packets not observed: 11 expected, $((now - base)) seen"
+
+ # When no extra traffic is injected, there should be no mirroring.
+ busywait 1100 until_counter_is ">= $((base + 20))" $fetch_counter >/dev/null
+ check_fail $? "Spurious packets observed"
+
+ # When the rule is uninstalled, there should be no mirroring.
+ qevent_rule_uninstall_$subtest
+ send_packets $vlan udp 11
+ busywait 1100 until_counter_is ">= $((base + 20))" $fetch_counter >/dev/null
+ check_fail $? "Spurious packets observed after uninstall"
+
+ log_test "TC $((vlan - 10)): ${trigger}ped packets $subtest'd"
+
+ stop_traffic
+ sleep 1
+}
+
+qevent_rule_install_mirror()
+{
+ tc filter add block 10 pref 1234 handle 102 matchall skip_sw \
+ action mirred egress mirror dev $swp2 hw_stats disabled
+}
+
+qevent_rule_uninstall_mirror()
+{
+ tc filter del block 10 pref 1234 handle 102 matchall
+}
+
+qevent_counter_fetch_mirror()
+{
+ tc_rule_handle_stats_get "dev $h2 ingress" 101
+}
+
+do_drop_mirror_test()
+{
+ local vlan=$1; shift
+ local limit=$1; shift
+ local qevent_name=$1; shift
+
+ tc filter add dev $h2 ingress pref 1 handle 101 prot ip \
+ flower skip_sw ip_proto udp \
+ action drop
+
+ do_drop_test "$vlan" "$limit" "$qevent_name" mirror \
+ qevent_counter_fetch_mirror
+
+ tc filter del dev $h2 ingress pref 1 handle 101 flower
+}
+
+qevent_rule_install_trap()
+{
+ tc filter add block 10 pref 1234 handle 102 matchall skip_sw \
+ action trap hw_stats disabled
+}
+
+qevent_rule_uninstall_trap()
+{
+ tc filter del block 10 pref 1234 handle 102 matchall
+}
+
+qevent_counter_fetch_trap()
+{
+ local trap_name=$1; shift
+
+ devlink_trap_rx_packets_get "$trap_name"
+}
+
+do_drop_trap_test()
+{
+ local vlan=$1; shift
+ local limit=$1; shift
+ local trap_name=$1; shift
+
+ do_drop_test "$vlan" "$limit" "$trap_name" trap \
+ "qevent_counter_fetch_trap $trap_name"
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh
index 1c36c576613b..3f007c5f8361 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh
@@ -7,6 +7,8 @@ ALL_TESTS="
ecn_nodrop_test
red_test
mc_backlog_test
+ red_mirror_test
+ red_trap_test
"
: ${QDISC:=ets}
source sch_red_core.sh
@@ -83,6 +85,26 @@ mc_backlog_test()
uninstall_qdisc
}
+red_mirror_test()
+{
+ install_qdisc qevent early_drop block 10
+
+ do_drop_mirror_test 10 $BACKLOG1 early_drop
+ do_drop_mirror_test 11 $BACKLOG2 early_drop
+
+ uninstall_qdisc
+}
+
+red_trap_test()
+{
+ install_qdisc qevent early_drop block 10
+
+ do_drop_trap_test 10 $BACKLOG1 early_drop
+ do_drop_trap_test 11 $BACKLOG2 early_drop
+
+ uninstall_qdisc
+}
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh
index 558667ea11ec..ede9c38d3eff 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh
@@ -7,6 +7,7 @@ ALL_TESTS="
ecn_nodrop_test
red_test
mc_backlog_test
+ red_mirror_test
"
source sch_red_core.sh
@@ -57,6 +58,13 @@ mc_backlog_test()
uninstall_qdisc
}
+red_mirror_test()
+{
+ install_qdisc qevent early_drop block 10
+ do_drop_mirror_test 10 $BACKLOG
+ uninstall_qdisc
+}
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
index fd583a171db7..d7cf33a3f18d 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
@@ -28,7 +28,7 @@ cleanup()
trap cleanup EXIT
-ALL_TESTS="router tc_flower mirror_gre"
+ALL_TESTS="router tc_flower mirror_gre tc_police"
for current_test in ${TESTS:-$ALL_TESTS}; do
source ${current_test}_scale.sh
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_police_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_police_scale.sh
new file mode 100644
index 000000000000..e79ac0dad1f4
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_police_scale.sh
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../tc_police_scale.sh
+
+tc_police_get_target()
+{
+ local should_fail=$1; shift
+ local target
+
+ target=$(devlink_resource_size_get global_policers single_rate_policers)
+
+ if ((! should_fail)); then
+ echo $target
+ else
+ echo $((target + 1))
+ fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
index 43ba1b438f6d..43f662401bc3 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
@@ -22,7 +22,7 @@ cleanup()
devlink_sp_read_kvd_defaults
trap cleanup EXIT
-ALL_TESTS="router tc_flower mirror_gre"
+ALL_TESTS="router tc_flower mirror_gre tc_police"
for current_test in ${TESTS:-$ALL_TESTS}; do
source ${current_test}_scale.sh
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/tc_police_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/tc_police_scale.sh
new file mode 100644
index 000000000000..e79ac0dad1f4
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/tc_police_scale.sh
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../tc_police_scale.sh
+
+tc_police_get_target()
+{
+ local should_fail=$1; shift
+ local target
+
+ target=$(devlink_resource_size_get global_policers single_rate_policers)
+
+ if ((! should_fail)); then
+ echo $target
+ else
+ echo $((target + 1))
+ fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_police_occ.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_police_occ.sh
new file mode 100755
index 000000000000..448b75c1545a
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_police_occ.sh
@@ -0,0 +1,108 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test that policers shared by different tc filters are correctly reference
+# counted by observing policers' occupancy via devlink-resource.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ tc_police_occ_test
+"
+NUM_NETIFS=2
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+ simple_if_init $h1
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1
+}
+
+switch_create()
+{
+ simple_if_init $swp1
+ tc qdisc add dev $swp1 clsact
+}
+
+switch_destroy()
+{
+ tc qdisc del dev $swp1 clsact
+ simple_if_fini $swp1
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ vrf_prepare
+
+ h1_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+tc_police_occ_get()
+{
+ devlink_resource_occ_get global_policers single_rate_policers
+}
+
+tc_police_occ_test()
+{
+ RET=0
+
+ local occ=$(tc_police_occ_get)
+
+ tc filter add dev $swp1 ingress pref 1 handle 101 proto ip \
+ flower skip_sw \
+ action police rate 100mbit burst 100k conform-exceed drop/ok
+ (( occ + 1 == $(tc_police_occ_get) ))
+ check_err $? "Got occupancy $(tc_police_occ_get), expected $((occ + 1))"
+
+ tc filter del dev $swp1 ingress pref 1 handle 101 flower
+ (( occ == $(tc_police_occ_get) ))
+ check_err $? "Got occupancy $(tc_police_occ_get), expected $occ"
+
+ tc filter add dev $swp1 ingress pref 1 handle 101 proto ip \
+ flower skip_sw \
+ action police rate 100mbit burst 100k conform-exceed drop/ok \
+ index 10
+ tc filter add dev $swp1 ingress pref 2 handle 102 proto ip \
+ flower skip_sw action police index 10
+
+ (( occ + 1 == $(tc_police_occ_get) ))
+ check_err $? "Got occupancy $(tc_police_occ_get), expected $((occ + 1))"
+
+ tc filter del dev $swp1 ingress pref 2 handle 102 flower
+ (( occ + 1 == $(tc_police_occ_get) ))
+ check_err $? "Got occupancy $(tc_police_occ_get), expected $((occ + 1))"
+
+ tc filter del dev $swp1 ingress pref 1 handle 101 flower
+ (( occ == $(tc_police_occ_get) ))
+ check_err $? "Got occupancy $(tc_police_occ_get), expected $occ"
+
+ log_test "tc police occupancy"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh
new file mode 100644
index 000000000000..4b96561c462f
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh
@@ -0,0 +1,92 @@
+# SPDX-License-Identifier: GPL-2.0
+
+TC_POLICE_NUM_NETIFS=2
+
+tc_police_h1_create()
+{
+ simple_if_init $h1
+}
+
+tc_police_h1_destroy()
+{
+ simple_if_fini $h1
+}
+
+tc_police_switch_create()
+{
+ simple_if_init $swp1
+ tc qdisc add dev $swp1 clsact
+}
+
+tc_police_switch_destroy()
+{
+ tc qdisc del dev $swp1 clsact
+ simple_if_fini $swp1
+}
+
+tc_police_rules_create()
+{
+ local count=$1; shift
+ local should_fail=$1; shift
+
+ TC_POLICE_BATCH_FILE="$(mktemp)"
+
+ for ((i = 0; i < count; ++i)); do
+ cat >> $TC_POLICE_BATCH_FILE <<-EOF
+ filter add dev $swp1 ingress \
+ prot ip \
+ flower skip_sw \
+ action police rate 10mbit burst 100k \
+ conform-exceed drop/ok
+ EOF
+ done
+
+ tc -b $TC_POLICE_BATCH_FILE
+ check_err_fail $should_fail $? "Rule insertion"
+}
+
+__tc_police_test()
+{
+ local count=$1; shift
+ local should_fail=$1; shift
+
+ tc_police_rules_create $count $should_fail
+
+ offload_count=$(tc filter show dev $swp1 ingress | grep in_hw | wc -l)
+ ((offload_count == count))
+ check_err_fail $should_fail $? "tc police offload count"
+}
+
+tc_police_test()
+{
+ local count=$1; shift
+ local should_fail=$1; shift
+
+ if ! tc_offload_check $TC_POLICE_NUM_NETIFS; then
+ check_err 1 "Could not test offloaded functionality"
+ return
+ fi
+
+ __tc_police_test $count $should_fail
+}
+
+tc_police_setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ vrf_prepare
+
+ tc_police_h1_create
+ tc_police_switch_create
+}
+
+tc_police_cleanup()
+{
+ pre_cleanup
+
+ tc_police_switch_destroy
+ tc_police_h1_destroy
+
+ vrf_cleanup
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh
index 9241250c5921..553cb9fad508 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh
@@ -11,6 +11,8 @@ ALL_TESTS="
matchall_mirror_behind_flower_ingress_test
matchall_sample_behind_flower_ingress_test
matchall_mirror_behind_flower_egress_test
+ police_limits_test
+ multi_police_test
"
NUM_NETIFS=2
@@ -287,6 +289,80 @@ matchall_mirror_behind_flower_egress_test()
matchall_behind_flower_egress_test "mirror" "mirred egress mirror dev $swp2"
}
+police_limits_test()
+{
+ RET=0
+
+ tc qdisc add dev $swp1 clsact
+
+ tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \
+ flower skip_sw \
+ action police rate 0.5kbit burst 1m conform-exceed drop/ok
+ check_fail $? "Incorrect success to add police action with too low rate"
+
+ tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \
+ flower skip_sw \
+ action police rate 2.5tbit burst 1g conform-exceed drop/ok
+ check_fail $? "Incorrect success to add police action with too high rate"
+
+ tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \
+ flower skip_sw \
+ action police rate 1.5kbit burst 1m conform-exceed drop/ok
+ check_err $? "Failed to add police action with low rate"
+
+ tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+ tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \
+ flower skip_sw \
+ action police rate 1.9tbit burst 1g conform-exceed drop/ok
+ check_err $? "Failed to add police action with high rate"
+
+ tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+ tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \
+ flower skip_sw \
+ action police rate 1.5kbit burst 512b conform-exceed drop/ok
+ check_fail $? "Incorrect success to add police action with too low burst size"
+
+ tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \
+ flower skip_sw \
+ action police rate 1.5kbit burst 2k conform-exceed drop/ok
+ check_err $? "Failed to add police action with low burst size"
+
+ tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+ tc qdisc del dev $swp1 clsact
+
+ log_test "police rate and burst limits"
+}
+
+multi_police_test()
+{
+ RET=0
+
+ # It is forbidden in mlxsw driver to have multiple police
+ # actions in a single rule.
+
+ tc qdisc add dev $swp1 clsact
+
+ tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 \
+ flower skip_sw \
+ action police rate 100mbit burst 100k conform-exceed drop/ok
+ check_err $? "Failed to add rule with single police action"
+
+ tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+ tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 \
+ flower skip_sw \
+ action police rate 100mbit burst 100k conform-exceed drop/pipe \
+ action police rate 200mbit burst 200k conform-exceed drop/ok
+ check_fail $? "Incorrect success to add rule with two police actions"
+
+ tc qdisc del dev $swp1 clsact
+
+ log_test "multi police"
+}
+
setup_prepare()
{
swp1=${NETIFS[p1]}
diff --git a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh
new file mode 100644
index 000000000000..ba1d53b9f815
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh
@@ -0,0 +1,786 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+
+VNI_GEN=$RANDOM
+NSIM_ID=$((RANDOM % 1024))
+NSIM_DEV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_ID
+NSIM_DEV_DFS=/sys/kernel/debug/netdevsim/netdevsim$NSIM_ID
+NSIM_NETDEV=
+HAS_ETHTOOL=
+EXIT_STATUS=0
+num_cases=0
+num_errors=0
+
+clean_up_devs=( )
+
+function err_cnt {
+ echo "ERROR:" $@
+ EXIT_STATUS=1
+ ((num_errors++))
+ ((num_cases++))
+}
+
+function pass_cnt {
+ ((num_cases++))
+}
+
+function cleanup_tuns {
+ for dev in "${clean_up_devs[@]}"; do
+ [ -e /sys/class/net/$dev ] && ip link del dev $dev
+ done
+ clean_up_devs=( )
+}
+
+function cleanup_nsim {
+ if [ -e $NSIM_DEV_SYS ]; then
+ echo $NSIM_ID > /sys/bus/netdevsim/del_device
+ fi
+}
+
+function cleanup {
+ cleanup_tuns
+ cleanup_nsim
+}
+
+trap cleanup EXIT
+
+function new_vxlan {
+ local dev=$1
+ local dstport=$2
+ local lower=$3
+ local ipver=$4
+ local flags=$5
+
+ local group ipfl
+
+ [ "$ipver" != '6' ] && group=239.1.1.1 || group=fff1::1
+ [ "$ipver" != '6' ] || ipfl="-6"
+
+ [[ ! "$flags" =~ "external" ]] && flags="$flags id $((VNI_GEN++))"
+
+ ip $ipfl link add $dev type vxlan \
+ group $group \
+ dev $lower \
+ dstport $dstport \
+ $flags
+
+ ip link set dev $dev up
+
+ clean_up_devs=("${clean_up_devs[@]}" $dev)
+
+ check_tables
+}
+
+function new_geneve {
+ local dev=$1
+ local dstport=$2
+ local ipver=$3
+ local flags=$4
+
+ local group ipfl
+
+ [ "$ipver" != '6' ] && remote=1.1.1.2 || group=::2
+ [ "$ipver" != '6' ] || ipfl="-6"
+
+ [[ ! "$flags" =~ "external" ]] && flags="$flags vni $((VNI_GEN++))"
+
+ ip $ipfl link add $dev type geneve \
+ remote $remote \
+ dstport $dstport \
+ $flags
+
+ ip link set dev $dev up
+
+ clean_up_devs=("${clean_up_devs[@]}" $dev)
+
+ check_tables
+}
+
+function del_dev {
+ local dev=$1
+
+ ip link del dev $dev
+ check_tables
+}
+
+# Helpers for netdevsim port/type encoding
+function mke {
+ local port=$1
+ local type=$2
+
+ echo $((port << 16 | type))
+}
+
+function pre {
+ local val=$1
+
+ echo -e "port: $((val >> 16))\ttype: $((val & 0xffff))"
+}
+
+function pre_ethtool {
+ local val=$1
+ local port=$((val >> 16))
+ local type=$((val & 0xffff))
+
+ case $type in
+ 1)
+ type_name="vxlan"
+ ;;
+ 2)
+ type_name="geneve"
+ ;;
+ 4)
+ type_name="vxlan-gpe"
+ ;;
+ *)
+ type_name="bit X"
+ ;;
+ esac
+
+ echo "port $port, $type_name"
+}
+
+function check_table {
+ local path=$NSIM_DEV_DFS/ports/$port/udp_ports_table$1
+ local -n expected=$2
+ local last=$3
+
+ read -a have < $path
+
+ if [ ${#expected[@]} -ne ${#have[@]} ]; then
+ echo "check_table: BAD NUMBER OF ITEMS"
+ return 0
+ fi
+
+ for i in "${!expected[@]}"; do
+ if [ -n "$HAS_ETHTOOL" -a ${expected[i]} -ne 0 ]; then
+ pp_expected=`pre_ethtool ${expected[i]}`
+ ethtool --show-tunnels $NSIM_NETDEV | grep "$pp_expected" >/dev/null
+ if [ $? -ne 0 -a $last -ne 0 ]; then
+ err_cnt "ethtool table $1 on port $port: $pfx - $msg"
+ echo " check_table: ethtool does not contain '$pp_expected'"
+ ethtool --show-tunnels $NSIM_NETDEV
+ return 0
+
+ fi
+ fi
+
+ if [ ${expected[i]} != ${have[i]} ]; then
+ if [ $last -ne 0 ]; then
+ err_cnt "table $1 on port $port: $pfx - $msg"
+ echo " check_table: wrong entry $i"
+ echo " expected: `pre ${expected[i]}`"
+ echo " have: `pre ${have[i]}`"
+ return 0
+ fi
+ return 1
+ fi
+ done
+
+ pass_cnt
+ return 0
+}
+
+function check_tables {
+ # Need retries in case we have workqueue making the changes
+ local retries=10
+
+ while ! check_table 0 exp0 $((retries == 0)); do
+ sleep 0.02
+ ((retries--))
+ done
+ while ! check_table 1 exp1 $((retries == 0)); do
+ sleep 0.02
+ ((retries--))
+ done
+}
+
+function print_table {
+ local path=$NSIM_DEV_DFS/ports/$port/udp_ports_table$1
+ read -a have < $path
+
+ tree $NSIM_DEV_DFS/
+
+ echo "Port $port table $1:"
+
+ for i in "${!have[@]}"; do
+ echo " `pre ${have[i]}`"
+ done
+
+}
+
+function print_tables {
+ print_table 0
+ print_table 1
+}
+
+function get_netdev_name {
+ local -n old=$1
+
+ new=$(ls /sys/class/net)
+
+ for netdev in $new; do
+ for check in $old; do
+ [ $netdev == $check ] && break
+ done
+
+ if [ $netdev != $check ]; then
+ echo $netdev
+ break
+ fi
+ done
+}
+
+###
+### Code start
+###
+
+# Probe ethtool support
+ethtool -h | grep show-tunnels 2>&1 >/dev/null && HAS_ETHTOOL=y
+
+modprobe netdevsim
+
+# Basic test
+pfx="basic"
+
+for port in 0 1; do
+ old_netdevs=$(ls /sys/class/net)
+ if [ $port -eq 0 ]; then
+ echo $NSIM_ID > /sys/bus/netdevsim/new_device
+ else
+ echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
+ echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
+ echo 1 > $NSIM_DEV_SYS/new_port
+ fi
+ NSIM_NETDEV=`get_netdev_name old_netdevs`
+
+ msg="new NIC device created"
+ exp0=( 0 0 0 0 )
+ exp1=( 0 0 0 0 )
+ check_tables
+
+ msg="VxLAN v4 devices"
+ exp0=( `mke 4789 1` 0 0 0 )
+ new_vxlan vxlan0 4789 $NSIM_NETDEV
+ new_vxlan vxlan1 4789 $NSIM_NETDEV
+
+ msg="VxLAN v4 devices go down"
+ exp0=( 0 0 0 0 )
+ ifconfig vxlan1 down
+ ifconfig vxlan0 down
+ check_tables
+
+ msg="VxLAN v6 devices"
+ exp0=( `mke 4789 1` 0 0 0 )
+ new_vxlan vxlanA 4789 $NSIM_NETDEV 6
+
+ for ifc in vxlan0 vxlan1; do
+ ifconfig $ifc up
+ done
+
+ new_vxlan vxlanB 4789 $NSIM_NETDEV 6
+
+ msg="another VxLAN v6 devices"
+ exp0=( `mke 4789 1` `mke 4790 1` 0 0 )
+ new_vxlan vxlanC 4790 $NSIM_NETDEV 6
+
+ msg="Geneve device"
+ exp1=( `mke 6081 2` 0 0 0 )
+ new_geneve gnv0 6081
+
+ msg="NIC device goes down"
+ ifconfig $NSIM_NETDEV down
+ if [ $port -eq 1 ]; then
+ exp0=( 0 0 0 0 )
+ exp1=( 0 0 0 0 )
+ fi
+ check_tables
+ msg="NIC device goes up again"
+ ifconfig $NSIM_NETDEV up
+ exp0=( `mke 4789 1` `mke 4790 1` 0 0 )
+ exp1=( `mke 6081 2` 0 0 0 )
+ check_tables
+
+ cleanup_tuns
+
+ msg="tunnels destroyed"
+ exp0=( 0 0 0 0 )
+ exp1=( 0 0 0 0 )
+ check_tables
+
+ modprobe -r geneve
+ modprobe -r vxlan
+ modprobe -r udp_tunnel
+
+ check_tables
+done
+
+modprobe -r netdevsim
+
+# Module tests
+pfx="module tests"
+
+if modinfo netdevsim | grep udp_tunnel >/dev/null; then
+ err_cnt "netdevsim depends on udp_tunnel"
+else
+ pass_cnt
+fi
+
+modprobe netdevsim
+
+old_netdevs=$(ls /sys/class/net)
+port=0
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+echo 0 > $NSIM_DEV_SYS/del_port
+echo 1000 > $NSIM_DEV_DFS/udp_ports_sleep
+echo 0 > $NSIM_DEV_SYS/new_port
+NSIM_NETDEV=`get_netdev_name old_netdevs`
+
+msg="create VxLANs"
+exp0=( 0 0 0 0 ) # sleep is longer than out wait
+new_vxlan vxlan0 10000 $NSIM_NETDEV
+
+modprobe -r vxlan
+modprobe -r udp_tunnel
+
+msg="remove tunnels"
+exp0=( 0 0 0 0 )
+check_tables
+
+msg="create VxLANs"
+exp0=( 0 0 0 0 ) # sleep is longer than out wait
+new_vxlan vxlan0 10000 $NSIM_NETDEV
+
+exp0=( 0 0 0 0 )
+
+modprobe -r netdevsim
+modprobe netdevsim
+
+# Overflow the table
+
+function overflow_table0 {
+ local pfx=$1
+
+ msg="create VxLANs 1/5"
+ exp0=( `mke 10000 1` 0 0 0 )
+ new_vxlan vxlan0 10000 $NSIM_NETDEV
+
+ msg="create VxLANs 2/5"
+ exp0=( `mke 10000 1` `mke 10001 1` 0 0 )
+ new_vxlan vxlan1 10001 $NSIM_NETDEV
+
+ msg="create VxLANs 3/5"
+ exp0=( `mke 10000 1` `mke 10001 1` `mke 10002 1` 0 )
+ new_vxlan vxlan2 10002 $NSIM_NETDEV
+
+ msg="create VxLANs 4/5"
+ exp0=( `mke 10000 1` `mke 10001 1` `mke 10002 1` `mke 10003 1` )
+ new_vxlan vxlan3 10003 $NSIM_NETDEV
+
+ msg="create VxLANs 5/5"
+ new_vxlan vxlan4 10004 $NSIM_NETDEV
+}
+
+function overflow_table1 {
+ local pfx=$1
+
+ msg="create GENEVE 1/5"
+ exp1=( `mke 20000 2` 0 0 0 )
+ new_geneve gnv0 20000
+
+ msg="create GENEVE 2/5"
+ exp1=( `mke 20000 2` `mke 20001 2` 0 0 )
+ new_geneve gnv1 20001
+
+ msg="create GENEVE 3/5"
+ exp1=( `mke 20000 2` `mke 20001 2` `mke 20002 2` 0 )
+ new_geneve gnv2 20002
+
+ msg="create GENEVE 4/5"
+ exp1=( `mke 20000 2` `mke 20001 2` `mke 20002 2` `mke 20003 2` )
+ new_geneve gnv3 20003
+
+ msg="create GENEVE 5/5"
+ new_geneve gnv4 20004
+}
+
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+echo 0 > $NSIM_DEV_SYS/del_port
+
+for port in 0 1; do
+ if [ $port -ne 0 ]; then
+ echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
+ echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
+ fi
+
+ echo $port > $NSIM_DEV_SYS/new_port
+ ifconfig $NSIM_NETDEV up
+
+ overflow_table0 "overflow NIC table"
+ overflow_table1 "overflow NIC table"
+
+ msg="replace VxLAN in overflow table"
+ exp0=( `mke 10000 1` `mke 10004 1` `mke 10002 1` `mke 10003 1` )
+ del_dev vxlan1
+
+ msg="vacate VxLAN in overflow table"
+ exp0=( `mke 10000 1` `mke 10004 1` 0 `mke 10003 1` )
+ del_dev vxlan2
+
+ msg="replace GENEVE in overflow table"
+ exp1=( `mke 20000 2` `mke 20004 2` `mke 20002 2` `mke 20003 2` )
+ del_dev gnv1
+
+ msg="vacate GENEVE in overflow table"
+ exp1=( `mke 20000 2` `mke 20004 2` 0 `mke 20003 2` )
+ del_dev gnv2
+
+ msg="table sharing - share"
+ exp1=( `mke 20000 2` `mke 20004 2` `mke 30001 4` `mke 20003 2` )
+ new_vxlan vxlanG0 30001 $NSIM_NETDEV 4 "gpe external"
+
+ msg="table sharing - overflow"
+ new_vxlan vxlanG1 30002 $NSIM_NETDEV 4 "gpe external"
+ msg="table sharing - overflow v6"
+ new_vxlan vxlanG2 30002 $NSIM_NETDEV 6 "gpe external"
+
+ exp1=( `mke 20000 2` `mke 30002 4` `mke 30001 4` `mke 20003 2` )
+ del_dev gnv4
+
+ msg="destroy NIC"
+ echo $port > $NSIM_DEV_SYS/del_port
+
+ cleanup_tuns
+ exp0=( 0 0 0 0 )
+ exp1=( 0 0 0 0 )
+done
+
+cleanup_nsim
+
+# Sync all
+pfx="sync all"
+
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+echo 0 > $NSIM_DEV_SYS/del_port
+echo 1 > $NSIM_DEV_DFS/udp_ports_sync_all
+
+for port in 0 1; do
+ if [ $port -ne 0 ]; then
+ echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
+ echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
+ fi
+
+ echo $port > $NSIM_DEV_SYS/new_port
+ ifconfig $NSIM_NETDEV up
+
+ overflow_table0 "overflow NIC table"
+ overflow_table1 "overflow NIC table"
+
+ msg="replace VxLAN in overflow table"
+ exp0=( `mke 10000 1` `mke 10004 1` `mke 10002 1` `mke 10003 1` )
+ del_dev vxlan1
+
+ msg="vacate VxLAN in overflow table"
+ exp0=( `mke 10000 1` `mke 10004 1` 0 `mke 10003 1` )
+ del_dev vxlan2
+
+ msg="replace GENEVE in overflow table"
+ exp1=( `mke 20000 2` `mke 20004 2` `mke 20002 2` `mke 20003 2` )
+ del_dev gnv1
+
+ msg="vacate GENEVE in overflow table"
+ exp1=( `mke 20000 2` `mke 20004 2` 0 `mke 20003 2` )
+ del_dev gnv2
+
+ msg="table sharing - share"
+ exp1=( `mke 20000 2` `mke 20004 2` `mke 30001 4` `mke 20003 2` )
+ new_vxlan vxlanG0 30001 $NSIM_NETDEV 4 "gpe external"
+
+ msg="table sharing - overflow"
+ new_vxlan vxlanG1 30002 $NSIM_NETDEV 4 "gpe external"
+ msg="table sharing - overflow v6"
+ new_vxlan vxlanG2 30002 $NSIM_NETDEV 6 "gpe external"
+
+ exp1=( `mke 20000 2` `mke 30002 4` `mke 30001 4` `mke 20003 2` )
+ del_dev gnv4
+
+ msg="destroy NIC"
+ echo $port > $NSIM_DEV_SYS/del_port
+
+ cleanup_tuns
+ exp0=( 0 0 0 0 )
+ exp1=( 0 0 0 0 )
+done
+
+cleanup_nsim
+
+# Destroy full NIC
+pfx="destroy full"
+
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+echo 0 > $NSIM_DEV_SYS/del_port
+
+for port in 0 1; do
+ if [ $port -ne 0 ]; then
+ echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
+ echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
+ fi
+
+ echo $port > $NSIM_DEV_SYS/new_port
+ ifconfig $NSIM_NETDEV up
+
+ overflow_table0 "destroy NIC"
+ overflow_table1 "destroy NIC"
+
+ msg="destroy NIC"
+ echo $port > $NSIM_DEV_SYS/del_port
+
+ cleanup_tuns
+ exp0=( 0 0 0 0 )
+ exp1=( 0 0 0 0 )
+done
+
+cleanup_nsim
+
+# IPv4 only
+pfx="IPv4 only"
+
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+echo 0 > $NSIM_DEV_SYS/del_port
+echo 1 > $NSIM_DEV_DFS/udp_ports_ipv4_only
+
+for port in 0 1; do
+ if [ $port -ne 0 ]; then
+ echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
+ echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
+ fi
+
+ echo $port > $NSIM_DEV_SYS/new_port
+ ifconfig $NSIM_NETDEV up
+
+ msg="create VxLANs v6"
+ new_vxlan vxlanA0 10000 $NSIM_NETDEV 6
+
+ msg="create VxLANs v6"
+ new_vxlan vxlanA1 10000 $NSIM_NETDEV 6
+
+ ip link set dev vxlanA0 down
+ ip link set dev vxlanA0 up
+ check_tables
+
+ msg="create VxLANs v4"
+ exp0=( `mke 10000 1` 0 0 0 )
+ new_vxlan vxlan0 10000 $NSIM_NETDEV
+
+ msg="down VxLANs v4"
+ exp0=( 0 0 0 0 )
+ ip link set dev vxlan0 down
+ check_tables
+
+ msg="up VxLANs v4"
+ exp0=( `mke 10000 1` 0 0 0 )
+ ip link set dev vxlan0 up
+ check_tables
+
+ msg="destroy VxLANs v4"
+ exp0=( 0 0 0 0 )
+ del_dev vxlan0
+
+ msg="recreate VxLANs v4"
+ exp0=( `mke 10000 1` 0 0 0 )
+ new_vxlan vxlan0 10000 $NSIM_NETDEV
+
+ del_dev vxlanA0
+ del_dev vxlanA1
+
+ msg="destroy NIC"
+ echo $port > $NSIM_DEV_SYS/del_port
+
+ cleanup_tuns
+ exp0=( 0 0 0 0 )
+ exp1=( 0 0 0 0 )
+done
+
+cleanup_nsim
+
+# Failures
+pfx="error injection"
+
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+echo 0 > $NSIM_DEV_SYS/del_port
+
+for port in 0 1; do
+ if [ $port -ne 0 ]; then
+ echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
+ echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
+ fi
+
+ echo $port > $NSIM_DEV_SYS/new_port
+ ifconfig $NSIM_NETDEV up
+
+ echo 110 > $NSIM_DEV_DFS/ports/$port/udp_ports_inject_error
+
+ msg="1 - create VxLANs v6"
+ exp0=( 0 0 0 0 )
+ new_vxlan vxlanA0 10000 $NSIM_NETDEV 6
+
+ msg="1 - create VxLANs v4"
+ exp0=( `mke 10000 1` 0 0 0 )
+ new_vxlan vxlan0 10000 $NSIM_NETDEV
+
+ msg="1 - remove VxLANs v4"
+ del_dev vxlan0
+
+ msg="1 - remove VxLANs v6"
+ exp0=( 0 0 0 0 )
+ del_dev vxlanA0
+
+ msg="2 - create GENEVE"
+ exp1=( `mke 20000 2` 0 0 0 )
+ new_geneve gnv0 20000
+
+ msg="2 - destroy GENEVE"
+ echo 2 > $NSIM_DEV_DFS/ports/$port/udp_ports_inject_error
+ exp1=( `mke 20000 2` 0 0 0 )
+ del_dev gnv0
+
+ msg="2 - create second GENEVE"
+ exp1=( 0 `mke 20001 2` 0 0 )
+ new_geneve gnv0 20001
+
+ msg="destroy NIC"
+ echo $port > $NSIM_DEV_SYS/del_port
+
+ cleanup_tuns
+ exp0=( 0 0 0 0 )
+ exp1=( 0 0 0 0 )
+done
+
+cleanup_nsim
+
+# netdev flags
+pfx="netdev flags"
+
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+echo 0 > $NSIM_DEV_SYS/del_port
+
+for port in 0 1; do
+ if [ $port -ne 0 ]; then
+ echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
+ echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
+ fi
+
+ echo $port > $NSIM_DEV_SYS/new_port
+ ifconfig $NSIM_NETDEV up
+
+ msg="create VxLANs v6"
+ exp0=( `mke 10000 1` 0 0 0 )
+ new_vxlan vxlanA0 10000 $NSIM_NETDEV 6
+
+ msg="create VxLANs v4"
+ new_vxlan vxlan0 10000 $NSIM_NETDEV
+
+ msg="turn off"
+ exp0=( 0 0 0 0 )
+ ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload off
+ check_tables
+
+ msg="turn on"
+ exp0=( `mke 10000 1` 0 0 0 )
+ ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload on
+ check_tables
+
+ msg="remove both"
+ del_dev vxlanA0
+ exp0=( 0 0 0 0 )
+ del_dev vxlan0
+ check_tables
+
+ ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload off
+
+ msg="create VxLANs v4 - off"
+ exp0=( 0 0 0 0 )
+ new_vxlan vxlan0 10000 $NSIM_NETDEV
+
+ msg="created off - turn on"
+ exp0=( `mke 10000 1` 0 0 0 )
+ ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload on
+ check_tables
+
+ msg="destroy NIC"
+ echo $port > $NSIM_DEV_SYS/del_port
+
+ cleanup_tuns
+ exp0=( 0 0 0 0 )
+ exp1=( 0 0 0 0 )
+done
+
+cleanup_nsim
+
+# device initiated reset
+pfx="reset notification"
+
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+echo 0 > $NSIM_DEV_SYS/del_port
+
+for port in 0 1; do
+ if [ $port -ne 0 ]; then
+ echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
+ echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
+ fi
+
+ echo $port > $NSIM_DEV_SYS/new_port
+ ifconfig $NSIM_NETDEV up
+
+ msg="create VxLANs v6"
+ exp0=( `mke 10000 1` 0 0 0 )
+ new_vxlan vxlanA0 10000 $NSIM_NETDEV 6
+
+ msg="create VxLANs v4"
+ new_vxlan vxlan0 10000 $NSIM_NETDEV
+
+ echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset
+ check_tables
+
+ msg="NIC device goes down"
+ ifconfig $NSIM_NETDEV down
+ if [ $port -eq 1 ]; then
+ exp0=( 0 0 0 0 )
+ exp1=( 0 0 0 0 )
+ fi
+ check_tables
+
+ echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset
+ check_tables
+
+ msg="NIC device goes up again"
+ ifconfig $NSIM_NETDEV up
+ exp0=( `mke 10000 1` 0 0 0 )
+ check_tables
+
+ msg="remove both"
+ del_dev vxlanA0
+ exp0=( 0 0 0 0 )
+ del_dev vxlan0
+ check_tables
+
+ echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset
+ check_tables
+
+ msg="destroy NIC"
+ echo $port > $NSIM_DEV_SYS/del_port
+
+ cleanup_tuns
+ exp0=( 0 0 0 0 )
+ exp1=( 0 0 0 0 )
+done
+
+modprobe -r netdevsim
+
+if [ $num_errors -eq 0 ]; then
+ echo "PASSED all $num_cases checks"
+else
+ echo "FAILED $num_errors/$num_cases checks"
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c
index 8a6b507e34a8..1d27f52c61e6 100644
--- a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c
+++ b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c
@@ -21,7 +21,6 @@
#include <linux/android/binder.h>
#include <linux/android/binderfs.h>
-#include "../../kselftest.h"
#include "../../kselftest_harness.h"
#define DEFAULT_THREADS 4
@@ -37,37 +36,26 @@
fd = -EBADF; \
}
-#define log_exit(format, ...) \
- ({ \
- fprintf(stderr, format "\n", ##__VA_ARGS__); \
- exit(EXIT_FAILURE); \
- })
-
-static void change_mountns(void)
+static void change_mountns(struct __test_metadata *_metadata)
{
int ret;
ret = unshare(CLONE_NEWNS);
- if (ret < 0)
- ksft_exit_fail_msg("%s - Failed to unshare mount namespace\n",
- strerror(errno));
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("%s - Failed to unshare mount namespace",
+ strerror(errno));
+ }
ret = mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0);
- if (ret < 0)
- ksft_exit_fail_msg("%s - Failed to mount / as private\n",
- strerror(errno));
-}
-
-static void rmdir_protect_errno(const char *dir)
-{
- int saved_errno = errno;
- (void)rmdir(dir);
- errno = saved_errno;
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("%s - Failed to mount / as private",
+ strerror(errno));
+ }
}
-static int __do_binderfs_test(void)
+static int __do_binderfs_test(struct __test_metadata *_metadata)
{
- int fd, ret, saved_errno;
+ int fd, ret, saved_errno, result = 1;
size_t len;
ssize_t wret;
struct binderfs_device device = { 0 };
@@ -75,113 +63,107 @@ static int __do_binderfs_test(void)
char binderfs_mntpt[] = P_tmpdir "/binderfs_XXXXXX",
device_path[sizeof(P_tmpdir "/binderfs_XXXXXX/") + BINDERFS_MAX_NAME];
- change_mountns();
+ change_mountns(_metadata);
- if (!mkdtemp(binderfs_mntpt))
- ksft_exit_fail_msg(
- "%s - Failed to create binderfs mountpoint\n",
+ EXPECT_NE(mkdtemp(binderfs_mntpt), NULL) {
+ TH_LOG("%s - Failed to create binderfs mountpoint",
strerror(errno));
+ goto out;
+ }
ret = mount(NULL, binderfs_mntpt, "binder", 0, 0);
- if (ret < 0) {
- if (errno != ENODEV)
- ksft_exit_fail_msg("%s - Failed to mount binderfs\n",
- strerror(errno));
-
- rmdir_protect_errno(binderfs_mntpt);
- return 1;
+ EXPECT_EQ(ret, 0) {
+ if (errno == ENODEV)
+ XFAIL(goto out, "binderfs missing");
+ TH_LOG("%s - Failed to mount binderfs", strerror(errno));
+ goto rmdir;
}
- /* binderfs mount test passed */
- ksft_inc_pass_cnt();
+ /* success: binderfs mounted */
memcpy(device.name, "my-binder", strlen("my-binder"));
snprintf(device_path, sizeof(device_path), "%s/binder-control", binderfs_mntpt);
fd = open(device_path, O_RDONLY | O_CLOEXEC);
- if (fd < 0)
- ksft_exit_fail_msg(
- "%s - Failed to open binder-control device\n",
+ EXPECT_GE(fd, 0) {
+ TH_LOG("%s - Failed to open binder-control device",
strerror(errno));
+ goto umount;
+ }
ret = ioctl(fd, BINDER_CTL_ADD, &device);
saved_errno = errno;
close(fd);
errno = saved_errno;
- if (ret < 0) {
- rmdir_protect_errno(binderfs_mntpt);
- ksft_exit_fail_msg(
- "%s - Failed to allocate new binder device\n",
+ EXPECT_GE(ret, 0) {
+ TH_LOG("%s - Failed to allocate new binder device",
strerror(errno));
+ goto umount;
}
- ksft_print_msg(
- "Allocated new binder device with major %d, minor %d, and name %s\n",
+ TH_LOG("Allocated new binder device with major %d, minor %d, and name %s",
device.major, device.minor, device.name);
- /* binder device allocation test passed */
- ksft_inc_pass_cnt();
+ /* success: binder device allocation */
snprintf(device_path, sizeof(device_path), "%s/my-binder", binderfs_mntpt);
fd = open(device_path, O_CLOEXEC | O_RDONLY);
- if (fd < 0) {
- rmdir_protect_errno(binderfs_mntpt);
- ksft_exit_fail_msg("%s - Failed to open my-binder device\n",
- strerror(errno));
+ EXPECT_GE(fd, 0) {
+ TH_LOG("%s - Failed to open my-binder device",
+ strerror(errno));
+ goto umount;
}
ret = ioctl(fd, BINDER_VERSION, &version);
saved_errno = errno;
close(fd);
errno = saved_errno;
- if (ret < 0) {
- rmdir_protect_errno(binderfs_mntpt);
- ksft_exit_fail_msg(
- "%s - Failed to open perform BINDER_VERSION request\n",
+ EXPECT_GE(ret, 0) {
+ TH_LOG("%s - Failed to open perform BINDER_VERSION request",
strerror(errno));
+ goto umount;
}
- ksft_print_msg("Detected binder version: %d\n",
- version.protocol_version);
+ TH_LOG("Detected binder version: %d", version.protocol_version);
- /* binder transaction with binderfs binder device passed */
- ksft_inc_pass_cnt();
+ /* success: binder transaction with binderfs binder device */
ret = unlink(device_path);
- if (ret < 0) {
- rmdir_protect_errno(binderfs_mntpt);
- ksft_exit_fail_msg("%s - Failed to delete binder device\n",
- strerror(errno));
+ EXPECT_EQ(ret, 0) {
+ TH_LOG("%s - Failed to delete binder device",
+ strerror(errno));
+ goto umount;
}
- /* binder device removal passed */
- ksft_inc_pass_cnt();
+ /* success: binder device removal */
snprintf(device_path, sizeof(device_path), "%s/binder-control", binderfs_mntpt);
ret = unlink(device_path);
- if (!ret) {
- rmdir_protect_errno(binderfs_mntpt);
- ksft_exit_fail_msg("Managed to delete binder-control device\n");
- } else if (errno != EPERM) {
- rmdir_protect_errno(binderfs_mntpt);
- ksft_exit_fail_msg(
- "%s - Failed to delete binder-control device but exited with unexpected error code\n",
+ EXPECT_NE(ret, 0) {
+ TH_LOG("Managed to delete binder-control device");
+ goto umount;
+ }
+ EXPECT_EQ(errno, EPERM) {
+ TH_LOG("%s - Failed to delete binder-control device but exited with unexpected error code",
strerror(errno));
+ goto umount;
}
- /* binder-control device removal failed as expected */
- ksft_inc_xfail_cnt();
+ /* success: binder-control device removal failed as expected */
+ result = 0;
-on_error:
+umount:
ret = umount2(binderfs_mntpt, MNT_DETACH);
- rmdir_protect_errno(binderfs_mntpt);
- if (ret < 0)
- ksft_exit_fail_msg("%s - Failed to unmount binderfs\n",
- strerror(errno));
-
- /* binderfs unmount test passed */
- ksft_inc_pass_cnt();
- return 0;
+ EXPECT_EQ(ret, 0) {
+ TH_LOG("%s - Failed to unmount binderfs", strerror(errno));
+ }
+rmdir:
+ ret = rmdir(binderfs_mntpt);
+ EXPECT_EQ(ret, 0) {
+ TH_LOG("%s - Failed to rmdir binderfs mount", strerror(errno));
+ }
+out:
+ return result;
}
static int wait_for_pid(pid_t pid)
@@ -291,7 +273,7 @@ static int write_id_mapping(enum idmap_type type, pid_t pid, const char *buf,
return 0;
}
-static void change_userns(int syncfds[2])
+static void change_userns(struct __test_metadata *_metadata, int syncfds[2])
{
int ret;
char buf;
@@ -299,25 +281,29 @@ static void change_userns(int syncfds[2])
close_prot_errno_disarm(syncfds[1]);
ret = unshare(CLONE_NEWUSER);
- if (ret < 0)
- ksft_exit_fail_msg("%s - Failed to unshare user namespace\n",
- strerror(errno));
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("%s - Failed to unshare user namespace",
+ strerror(errno));
+ }
ret = write_nointr(syncfds[0], "1", 1);
- if (ret != 1)
- ksft_exit_fail_msg("write_nointr() failed\n");
+ ASSERT_EQ(ret, 1) {
+ TH_LOG("write_nointr() failed");
+ }
ret = read_nointr(syncfds[0], &buf, 1);
- if (ret != 1)
- ksft_exit_fail_msg("read_nointr() failed\n");
+ ASSERT_EQ(ret, 1) {
+ TH_LOG("read_nointr() failed");
+ }
close_prot_errno_disarm(syncfds[0]);
- if (setid_userns_root())
- ksft_exit_fail_msg("setid_userns_root() failed");
+ ASSERT_EQ(setid_userns_root(), 0) {
+ TH_LOG("setid_userns_root() failed");
+ }
}
-static void change_idmaps(int syncfds[2], pid_t pid)
+static void change_idmaps(struct __test_metadata *_metadata, int syncfds[2], pid_t pid)
{
int ret;
char buf;
@@ -326,35 +312,42 @@ static void change_idmaps(int syncfds[2], pid_t pid)
close_prot_errno_disarm(syncfds[0]);
ret = read_nointr(syncfds[1], &buf, 1);
- if (ret != 1)
- ksft_exit_fail_msg("read_nointr() failed\n");
+ ASSERT_EQ(ret, 1) {
+ TH_LOG("read_nointr() failed");
+ }
snprintf(id_map, sizeof(id_map), "0 %d 1\n", getuid());
ret = write_id_mapping(UID_MAP, pid, id_map, strlen(id_map));
- if (ret)
- ksft_exit_fail_msg("write_id_mapping(UID_MAP) failed");
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("write_id_mapping(UID_MAP) failed");
+ }
snprintf(id_map, sizeof(id_map), "0 %d 1\n", getgid());
ret = write_id_mapping(GID_MAP, pid, id_map, strlen(id_map));
- if (ret)
- ksft_exit_fail_msg("write_id_mapping(GID_MAP) failed");
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("write_id_mapping(GID_MAP) failed");
+ }
ret = write_nointr(syncfds[1], "1", 1);
- if (ret != 1)
- ksft_exit_fail_msg("write_nointr() failed");
+ ASSERT_EQ(ret, 1) {
+ TH_LOG("write_nointr() failed");
+ }
close_prot_errno_disarm(syncfds[1]);
}
+struct __test_metadata *_thread_metadata;
static void *binder_version_thread(void *data)
{
+ struct __test_metadata *_metadata = _thread_metadata;
int fd = PTR_TO_INT(data);
struct binder_version version = { 0 };
int ret;
ret = ioctl(fd, BINDER_VERSION, &version);
if (ret < 0)
- ksft_print_msg("%s - Failed to open perform BINDER_VERSION request\n", strerror(errno));
+ TH_LOG("%s - Failed to open perform BINDER_VERSION request\n",
+ strerror(errno));
pthread_exit(data);
}
@@ -377,68 +370,79 @@ TEST(binderfs_stress)
device_path[sizeof(P_tmpdir "/binderfs_XXXXXX/") + BINDERFS_MAX_NAME];
ret = socketpair(PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, syncfds);
- if (ret < 0)
- ksft_exit_fail_msg("%s - Failed to create socket pair", strerror(errno));
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("%s - Failed to create socket pair", strerror(errno));
+ }
pid = fork();
- if (pid < 0) {
+ ASSERT_GE(pid, 0) {
+ TH_LOG("%s - Failed to fork", strerror(errno));
close_prot_errno_disarm(syncfds[0]);
close_prot_errno_disarm(syncfds[1]);
- ksft_exit_fail_msg("%s - Failed to fork", strerror(errno));
}
if (pid == 0) {
int i, j, k, nthreads;
pthread_attr_t attr;
pthread_t threads[DEFAULT_THREADS];
- change_userns(syncfds);
- change_mountns();
+ change_userns(_metadata, syncfds);
+ change_mountns(_metadata);
- if (!mkdtemp(binderfs_mntpt))
- log_exit("%s - Failed to create binderfs mountpoint\n",
- strerror(errno));
+ ASSERT_NE(mkdtemp(binderfs_mntpt), NULL) {
+ TH_LOG("%s - Failed to create binderfs mountpoint",
+ strerror(errno));
+ }
ret = mount(NULL, binderfs_mntpt, "binder", 0, 0);
- if (ret < 0)
- log_exit("%s - Failed to mount binderfs\n", strerror(errno));
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("%s - Failed to mount binderfs", strerror(errno));
+ }
for (int i = 0; i < ARRAY_SIZE(fds); i++) {
snprintf(device_path, sizeof(device_path),
"%s/binder-control", binderfs_mntpt);
fd = open(device_path, O_RDONLY | O_CLOEXEC);
- if (fd < 0)
- log_exit("%s - Failed to open binder-control device\n", strerror(errno));
+ ASSERT_GE(fd, 0) {
+ TH_LOG("%s - Failed to open binder-control device",
+ strerror(errno));
+ }
memset(&device, 0, sizeof(device));
snprintf(device.name, sizeof(device.name), "%d", i);
ret = ioctl(fd, BINDER_CTL_ADD, &device);
close_prot_errno_disarm(fd);
- if (ret < 0)
- log_exit("%s - Failed to allocate new binder device\n", strerror(errno));
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("%s - Failed to allocate new binder device",
+ strerror(errno));
+ }
snprintf(device_path, sizeof(device_path), "%s/%d",
binderfs_mntpt, i);
fds[i] = open(device_path, O_RDONLY | O_CLOEXEC);
- if (fds[i] < 0)
- log_exit("%s - Failed to open binder device\n", strerror(errno));
+ ASSERT_GE(fds[i], 0) {
+ TH_LOG("%s - Failed to open binder device", strerror(errno));
+ }
}
ret = umount2(binderfs_mntpt, MNT_DETACH);
- rmdir_protect_errno(binderfs_mntpt);
- if (ret < 0)
- log_exit("%s - Failed to unmount binderfs\n", strerror(errno));
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("%s - Failed to unmount binderfs", strerror(errno));
+ rmdir(binderfs_mntpt);
+ }
nthreads = get_nprocs_conf();
if (nthreads > DEFAULT_THREADS)
nthreads = DEFAULT_THREADS;
+ _thread_metadata = _metadata;
pthread_attr_init(&attr);
for (k = 0; k < ARRAY_SIZE(fds); k++) {
for (i = 0; i < nthreads; i++) {
ret = pthread_create(&threads[i], &attr, binder_version_thread, INT_TO_PTR(fds[k]));
if (ret) {
- ksft_print_msg("%s - Failed to create thread %d\n", strerror(errno), i);
+ TH_LOG("%s - Failed to create thread %d",
+ strerror(errno), i);
break;
}
}
@@ -448,7 +452,8 @@ TEST(binderfs_stress)
ret = pthread_join(threads[j], &fdptr);
if (ret)
- ksft_print_msg("%s - Failed to join thread %d for fd %d\n", strerror(errno), j, PTR_TO_INT(fdptr));
+ TH_LOG("%s - Failed to join thread %d for fd %d",
+ strerror(errno), j, PTR_TO_INT(fdptr));
}
}
pthread_attr_destroy(&attr);
@@ -459,11 +464,12 @@ TEST(binderfs_stress)
exit(EXIT_SUCCESS);
}
- change_idmaps(syncfds, pid);
+ change_idmaps(_metadata, syncfds, pid);
ret = wait_for_pid(pid);
- if (ret)
- ksft_exit_fail_msg("wait_for_pid() failed");
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("wait_for_pid() failed");
+ }
}
TEST(binderfs_test_privileged)
@@ -471,7 +477,7 @@ TEST(binderfs_test_privileged)
if (geteuid() != 0)
XFAIL(return, "Tests are not run as root. Skipping privileged tests");
- if (__do_binderfs_test() == 1)
+ if (__do_binderfs_test(_metadata))
XFAIL(return, "The Android binderfs filesystem is not available");
}
@@ -482,31 +488,33 @@ TEST(binderfs_test_unprivileged)
pid_t pid;
ret = socketpair(PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, syncfds);
- if (ret < 0)
- ksft_exit_fail_msg("%s - Failed to create socket pair", strerror(errno));
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("%s - Failed to create socket pair", strerror(errno));
+ }
pid = fork();
- if (pid < 0) {
+ ASSERT_GE(pid, 0) {
close_prot_errno_disarm(syncfds[0]);
close_prot_errno_disarm(syncfds[1]);
- ksft_exit_fail_msg("%s - Failed to fork", strerror(errno));
+ TH_LOG("%s - Failed to fork", strerror(errno));
}
if (pid == 0) {
- change_userns(syncfds);
- if (__do_binderfs_test() == 1)
+ change_userns(_metadata, syncfds);
+ if (__do_binderfs_test(_metadata))
exit(2);
exit(EXIT_SUCCESS);
}
- change_idmaps(syncfds, pid);
+ change_idmaps(_metadata, syncfds, pid);
ret = wait_for_pid(pid);
if (ret) {
if (ret == 2)
XFAIL(return, "The Android binderfs filesystem is not available");
- else
- ksft_exit_fail_msg("wait_for_pid() failed");
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("wait_for_pid() failed");
+ }
}
}
diff --git a/tools/testing/selftests/firmware/settings b/tools/testing/selftests/firmware/settings
new file mode 100644
index 000000000000..085e664ee093
--- /dev/null
+++ b/tools/testing/selftests/firmware/settings
@@ -0,0 +1,8 @@
+# The async firmware timeout is set to 1 second (but ends up being effectively
+# 2 seconds). There are 3 test configs, each done with and without firmware
+# present, each with 2 "nowait" functions tested 5 times. Expected time for a
+# normal execution should be 2 * 3 * 2 * 2 * 5 = 120 seconds for those alone.
+# Additionally, fw_fallback may take 5 seconds for internal timeouts in each
+# of the 3 configs, so at least another 15 seconds are needed. Add another
+# 10 seconds for each testing config: 120 + 15 + 30
+timeout=165
diff --git a/tools/testing/selftests/kmod/kmod.sh b/tools/testing/selftests/kmod/kmod.sh
index c82aa77958e5..afd42387e8b2 100755
--- a/tools/testing/selftests/kmod/kmod.sh
+++ b/tools/testing/selftests/kmod/kmod.sh
@@ -130,7 +130,7 @@ test_reqs()
if [[ $KMOD_VERSION -le 19 ]]; then
echo "$0: You need at least kmod 20" >&2
echo "kmod <= 19 is buggy, for details see:" >&2
- echo "http://git.kernel.org/cgit/utils/kernel/kmod/kmod.git/commit/libkmod/libkmod-module.c?id=fd44a98ae2eb5eb32161088954ab21e58e19dfc4" >&2
+ echo "https://git.kernel.org/cgit/utils/kernel/kmod/kmod.git/commit/libkmod/libkmod-module.c?id=fd44a98ae2eb5eb32161088954ab21e58e19dfc4" >&2
exit $ksft_skip
fi
@@ -343,7 +343,7 @@ kmod_test_0001_driver()
kmod_defaults_driver
config_num_threads 1
- printf '\000' >"$DIR"/config_test_driver
+ printf $NAME >"$DIR"/config_test_driver
config_trigger ${FUNCNAME[0]}
config_expect_result ${FUNCNAME[0]} MODULE_NOT_FOUND
}
@@ -354,7 +354,7 @@ kmod_test_0001_fs()
kmod_defaults_fs
config_num_threads 1
- printf '\000' >"$DIR"/config_test_fs
+ printf $NAME >"$DIR"/config_test_fs
config_trigger ${FUNCNAME[0]}
config_expect_result ${FUNCNAME[0]} -EINVAL
}
diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h
index 862eee734553..8d50483fe204 100644
--- a/tools/testing/selftests/kselftest.h
+++ b/tools/testing/selftests/kselftest.h
@@ -1,11 +1,43 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
- * kselftest.h: kselftest framework return codes to include from
- * selftests.
+ * kselftest.h: low-level kselftest framework to include from
+ * selftest programs. When possible, please use
+ * kselftest_harness.h instead.
*
* Copyright (c) 2014 Shuah Khan <shuahkh@osg.samsung.com>
* Copyright (c) 2014 Samsung Electronics Co., Ltd.
*
+ * Using this API consists of first counting how many tests your code
+ * has to run, and then starting up the reporting:
+ *
+ * ksft_print_header();
+ * ksft_set_plan(total_number_of_tests);
+ *
+ * For each test, report any progress, debugging, etc with:
+ *
+ * ksft_print_msg(fmt, ...);
+ *
+ * and finally report the pass/fail/skip/xfail state of the test with one of:
+ *
+ * ksft_test_result(condition, fmt, ...);
+ * ksft_test_result_pass(fmt, ...);
+ * ksft_test_result_fail(fmt, ...);
+ * ksft_test_result_skip(fmt, ...);
+ * ksft_test_result_xfail(fmt, ...);
+ * ksft_test_result_error(fmt, ...);
+ *
+ * When all tests are finished, clean up and exit the program with one of:
+ *
+ * ksft_exit(condition);
+ * ksft_exit_pass();
+ * ksft_exit_fail();
+ *
+ * If the program wants to report details on why the entire program has
+ * failed, it can instead exit with a message (this is usually done when
+ * the program is aborting before finishing all tests):
+ *
+ * ksft_exit_fail_msg(fmt, ...);
+ *
*/
#ifndef __KSELFTEST_H
#define __KSELFTEST_H
@@ -74,7 +106,7 @@ static inline void ksft_print_cnts(void)
if (ksft_plan != ksft_test_num())
printf("# Planned tests != run tests (%u != %u)\n",
ksft_plan, ksft_test_num());
- printf("# Pass %d Fail %d Xfail %d Xpass %d Skip %d Error %d\n",
+ printf("# Totals: pass:%d fail:%d xfail:%d xpass:%d skip:%d error:%d\n",
ksft_cnt.ksft_pass, ksft_cnt.ksft_fail,
ksft_cnt.ksft_xfail, ksft_cnt.ksft_xpass,
ksft_cnt.ksft_xskip, ksft_cnt.ksft_error);
@@ -120,6 +152,32 @@ static inline void ksft_test_result_fail(const char *msg, ...)
va_end(args);
}
+/**
+ * ksft_test_result() - Report test success based on truth of condition
+ *
+ * @condition: if true, report test success, otherwise failure.
+ */
+#define ksft_test_result(condition, fmt, ...) do { \
+ if (!!(condition)) \
+ ksft_test_result_pass(fmt, ##__VA_ARGS__);\
+ else \
+ ksft_test_result_fail(fmt, ##__VA_ARGS__);\
+ } while (0)
+
+static inline void ksft_test_result_xfail(const char *msg, ...)
+{
+ int saved_errno = errno;
+ va_list args;
+
+ ksft_cnt.ksft_xfail++;
+
+ va_start(args, msg);
+ printf("ok %d # XFAIL ", ksft_test_num());
+ errno = saved_errno;
+ vprintf(msg, args);
+ va_end(args);
+}
+
static inline void ksft_test_result_skip(const char *msg, ...)
{
int saved_errno = errno;
@@ -128,12 +186,13 @@ static inline void ksft_test_result_skip(const char *msg, ...)
ksft_cnt.ksft_xskip++;
va_start(args, msg);
- printf("not ok %d # SKIP ", ksft_test_num());
+ printf("ok %d # SKIP ", ksft_test_num());
errno = saved_errno;
vprintf(msg, args);
va_end(args);
}
+/* TODO: how does "error" differ from "fail" or "skip"? */
static inline void ksft_test_result_error(const char *msg, ...)
{
int saved_errno = errno;
@@ -156,11 +215,22 @@ static inline int ksft_exit_pass(void)
static inline int ksft_exit_fail(void)
{
- printf("Bail out!\n");
ksft_print_cnts();
exit(KSFT_FAIL);
}
+/**
+ * ksft_exit() - Exit selftest based on truth of condition
+ *
+ * @condition: if true, exit self test with success, otherwise fail.
+ */
+#define ksft_exit(condition) do { \
+ if (!!(condition)) \
+ ksft_exit_pass(); \
+ else \
+ ksft_exit_fail(); \
+ } while (0)
+
static inline int ksft_exit_fail_msg(const char *msg, ...)
{
int saved_errno = errno;
@@ -190,18 +260,30 @@ static inline int ksft_exit_xpass(void)
static inline int ksft_exit_skip(const char *msg, ...)
{
- if (msg) {
- int saved_errno = errno;
- va_list args;
+ int saved_errno = errno;
+ va_list args;
+
+ va_start(args, msg);
- va_start(args, msg);
- printf("not ok %d # SKIP ", 1 + ksft_test_num());
+ /*
+ * FIXME: several tests misuse ksft_exit_skip so produce
+ * something sensible if some tests have already been run
+ * or a plan has been printed. Those tests should use
+ * ksft_test_result_skip or ksft_exit_fail_msg instead.
+ */
+ if (ksft_plan || ksft_test_num()) {
+ ksft_cnt.ksft_xskip++;
+ printf("ok %d # SKIP ", 1 + ksft_test_num());
+ } else {
+ printf("1..0 # SKIP ");
+ }
+ if (msg) {
errno = saved_errno;
vprintf(msg, args);
va_end(args);
- } else {
- ksft_print_cnts();
}
+ if (ksft_test_num())
+ ksft_print_cnts();
exit(KSFT_SKIP);
}
diff --git a/tools/testing/selftests/kselftest/runner.sh b/tools/testing/selftests/kselftest/runner.sh
index 676b3a8b114d..cc9c846585f0 100644
--- a/tools/testing/selftests/kselftest/runner.sh
+++ b/tools/testing/selftests/kselftest/runner.sh
@@ -53,6 +53,10 @@ run_one()
settings="$BASE_DIR/$DIR/settings"
if [ -r "$settings" ] ; then
while read line ; do
+ # Skip comments.
+ if echo "$line" | grep -q '^#'; then
+ continue
+ fi
field=$(echo "$line" | cut -d= -f1)
value=$(echo "$line" | cut -d= -f2-)
eval "kselftest_$field"="$value"
@@ -77,10 +81,10 @@ run_one()
echo "ok $test_num $TEST_HDR_MSG") ||
(rc=$?; \
if [ $rc -eq $skip_rc ]; then \
- echo "not ok $test_num $TEST_HDR_MSG # SKIP"
+ echo "ok $test_num $TEST_HDR_MSG # SKIP"
elif [ $rc -eq $timeout_rc ]; then \
echo "#"
- echo "not ok $test_num $TEST_HDR_MSG # TIMEOUT"
+ echo "not ok $test_num $TEST_HDR_MSG # TIMEOUT $kselftest_timeout seconds"
else
echo "not ok $test_num $TEST_HDR_MSG # exit=$rc"
fi)
diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h
index 7f32a7099a81..4f78e4805633 100644
--- a/tools/testing/selftests/kselftest_harness.h
+++ b/tools/testing/selftests/kselftest_harness.h
@@ -50,7 +50,9 @@
#ifndef __KSELFTEST_HARNESS_H
#define __KSELFTEST_HARNESS_H
+#ifndef _GNU_SOURCE
#define _GNU_SOURCE
+#endif
#include <asm/types.h>
#include <errno.h>
#include <stdbool.h>
@@ -58,10 +60,13 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <sys/mman.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
+#include "kselftest.h"
+
#define TEST_TIMEOUT_DEFAULT 30
/* Utilities exposed to the test definitions */
@@ -104,26 +109,28 @@
/* Unconditional logger for internal use. */
#define __TH_LOG(fmt, ...) \
- fprintf(TH_LOG_STREAM, "%s:%d:%s:" fmt "\n", \
+ fprintf(TH_LOG_STREAM, "# %s:%d:%s:" fmt "\n", \
__FILE__, __LINE__, _metadata->name, ##__VA_ARGS__)
/**
- * XFAIL(statement, fmt, ...)
+ * SKIP(statement, fmt, ...)
*
- * @statement: statement to run after reporting XFAIL
+ * @statement: statement to run after reporting SKIP
* @fmt: format string
* @...: optional arguments
*
- * This forces a "pass" after reporting a failure with an XFAIL prefix,
+ * This forces a "pass" after reporting why something is being skipped
* and runs "statement", which is usually "return" or "goto skip".
*/
-#define XFAIL(statement, fmt, ...) do { \
+#define SKIP(statement, fmt, ...) do { \
+ snprintf(_metadata->results->reason, \
+ sizeof(_metadata->results->reason), fmt, ##__VA_ARGS__); \
if (TH_LOG_ENABLED) { \
- fprintf(TH_LOG_STREAM, "[ XFAIL! ] " fmt "\n", \
- ##__VA_ARGS__); \
+ fprintf(TH_LOG_STREAM, "# SKIP %s\n", \
+ _metadata->results->reason); \
} \
- /* TODO: find a way to pass xfail to test runner process. */ \
_metadata->passed = 1; \
+ _metadata->skip = 1; \
_metadata->trigger = 0; \
statement; \
} while (0)
@@ -673,20 +680,53 @@
__bail(_assert, _metadata->no_print, _metadata->step))
#define __INC_STEP(_metadata) \
- if (_metadata->passed && _metadata->step < 255) \
+ /* Keep "step" below 255 (which is used for "SKIP" reporting). */ \
+ if (_metadata->passed && _metadata->step < 253) \
_metadata->step++;
+#define is_signed_type(var) (!!(((__typeof__(var))(-1)) < (__typeof__(var))1))
+
#define __EXPECT(_expected, _expected_str, _seen, _seen_str, _t, _assert) do { \
/* Avoid multiple evaluation of the cases */ \
__typeof__(_expected) __exp = (_expected); \
__typeof__(_seen) __seen = (_seen); \
if (_assert) __INC_STEP(_metadata); \
if (!(__exp _t __seen)) { \
- unsigned long long __exp_print = (uintptr_t)__exp; \
- unsigned long long __seen_print = (uintptr_t)__seen; \
- __TH_LOG("Expected %s (%llu) %s %s (%llu)", \
- _expected_str, __exp_print, #_t, \
- _seen_str, __seen_print); \
+ /* Report with actual signedness to avoid weird output. */ \
+ switch (is_signed_type(__exp) * 2 + is_signed_type(__seen)) { \
+ case 0: { \
+ unsigned long long __exp_print = (uintptr_t)__exp; \
+ unsigned long long __seen_print = (uintptr_t)__seen; \
+ __TH_LOG("Expected %s (%llu) %s %s (%llu)", \
+ _expected_str, __exp_print, #_t, \
+ _seen_str, __seen_print); \
+ break; \
+ } \
+ case 1: { \
+ unsigned long long __exp_print = (uintptr_t)__exp; \
+ long long __seen_print = (intptr_t)__seen; \
+ __TH_LOG("Expected %s (%llu) %s %s (%lld)", \
+ _expected_str, __exp_print, #_t, \
+ _seen_str, __seen_print); \
+ break; \
+ } \
+ case 2: { \
+ long long __exp_print = (intptr_t)__exp; \
+ unsigned long long __seen_print = (uintptr_t)__seen; \
+ __TH_LOG("Expected %s (%lld) %s %s (%llu)", \
+ _expected_str, __exp_print, #_t, \
+ _seen_str, __seen_print); \
+ break; \
+ } \
+ case 3: { \
+ long long __exp_print = (intptr_t)__exp; \
+ long long __seen_print = (intptr_t)__seen; \
+ __TH_LOG("Expected %s (%lld) %s %s (%lld)", \
+ _expected_str, __exp_print, #_t, \
+ _seen_str, __seen_print); \
+ break; \
+ } \
+ } \
_metadata->passed = 0; \
/* Ensure the optional handler is triggered */ \
_metadata->trigger = 1; \
@@ -727,6 +767,10 @@
} \
}
+struct __test_results {
+ char reason[1024]; /* Reason for test result */
+};
+
struct __test_metadata;
struct __fixture_variant_metadata;
@@ -774,11 +818,13 @@ struct __test_metadata {
struct __fixture_metadata *fixture;
int termsig;
int passed;
+ int skip; /* did SKIP get used? */
int trigger; /* extra handler after the evaluation */
int timeout; /* seconds to wait for test timeout */
bool timed_out; /* did this test timeout instead of exiting? */
__u8 step;
bool no_print; /* manual trigger when TH_LOG_STREAM is not available */
+ struct __test_results *results;
struct __test_metadata *prev, *next;
};
@@ -814,12 +860,12 @@ static void __timeout_handler(int sig, siginfo_t *info, void *ucontext)
/* Sanity check handler execution environment. */
if (!t) {
fprintf(TH_LOG_STREAM,
- "no active test in SIGALRM handler!?\n");
+ "# no active test in SIGALRM handler!?\n");
abort();
}
if (sig != SIGALRM || sig != info->si_signo) {
fprintf(TH_LOG_STREAM,
- "%s: SIGALRM handler caught signal %d!?\n",
+ "# %s: SIGALRM handler caught signal %d!?\n",
t->name, sig != SIGALRM ? sig : info->si_signo);
abort();
}
@@ -840,7 +886,7 @@ void __wait_for_test(struct __test_metadata *t)
if (sigaction(SIGALRM, &action, &saved_action)) {
t->passed = 0;
fprintf(TH_LOG_STREAM,
- "%s: unable to install SIGALRM handler\n",
+ "# %s: unable to install SIGALRM handler\n",
t->name);
return;
}
@@ -852,7 +898,7 @@ void __wait_for_test(struct __test_metadata *t)
if (sigaction(SIGALRM, &saved_action, NULL)) {
t->passed = 0;
fprintf(TH_LOG_STREAM,
- "%s: unable to uninstall SIGALRM handler\n",
+ "# %s: unable to uninstall SIGALRM handler\n",
t->name);
return;
}
@@ -861,39 +907,51 @@ void __wait_for_test(struct __test_metadata *t)
if (t->timed_out) {
t->passed = 0;
fprintf(TH_LOG_STREAM,
- "%s: Test terminated by timeout\n", t->name);
+ "# %s: Test terminated by timeout\n", t->name);
} else if (WIFEXITED(status)) {
- t->passed = t->termsig == -1 ? !WEXITSTATUS(status) : 0;
if (t->termsig != -1) {
+ t->passed = 0;
fprintf(TH_LOG_STREAM,
- "%s: Test exited normally "
- "instead of by signal (code: %d)\n",
- t->name,
- WEXITSTATUS(status));
- } else if (!t->passed) {
- fprintf(TH_LOG_STREAM,
- "%s: Test failed at step #%d\n",
+ "# %s: Test exited normally instead of by signal (code: %d)\n",
t->name,
WEXITSTATUS(status));
+ } else {
+ switch (WEXITSTATUS(status)) {
+ /* Success */
+ case 0:
+ t->passed = 1;
+ break;
+ /* SKIP */
+ case 255:
+ t->passed = 1;
+ t->skip = 1;
+ break;
+ /* Other failure, assume step report. */
+ default:
+ t->passed = 0;
+ fprintf(TH_LOG_STREAM,
+ "# %s: Test failed at step #%d\n",
+ t->name,
+ WEXITSTATUS(status));
+ }
}
} else if (WIFSIGNALED(status)) {
t->passed = 0;
if (WTERMSIG(status) == SIGABRT) {
fprintf(TH_LOG_STREAM,
- "%s: Test terminated by assertion\n",
+ "# %s: Test terminated by assertion\n",
t->name);
} else if (WTERMSIG(status) == t->termsig) {
t->passed = 1;
} else {
fprintf(TH_LOG_STREAM,
- "%s: Test terminated unexpectedly "
- "by signal %d\n",
+ "# %s: Test terminated unexpectedly by signal %d\n",
t->name,
WTERMSIG(status));
}
} else {
fprintf(TH_LOG_STREAM,
- "%s: Test ended in some other way [%u]\n",
+ "# %s: Test ended in some other way [%u]\n",
t->name,
status);
}
@@ -905,25 +963,39 @@ void __run_test(struct __fixture_metadata *f,
{
/* reset test struct */
t->passed = 1;
+ t->skip = 0;
t->trigger = 0;
t->step = 0;
t->no_print = 0;
+ memset(t->results->reason, 0, sizeof(t->results->reason));
- printf("[ RUN ] %s%s%s.%s\n",
+ ksft_print_msg(" RUN %s%s%s.%s ...\n",
f->name, variant->name[0] ? "." : "", variant->name, t->name);
t->pid = fork();
if (t->pid < 0) {
- printf("ERROR SPAWNING TEST CHILD\n");
+ ksft_print_msg("ERROR SPAWNING TEST CHILD\n");
t->passed = 0;
} else if (t->pid == 0) {
t->fn(t, variant);
- /* return the step that failed or 0 */
- _exit(t->passed ? 0 : t->step);
+ if (t->skip)
+ _exit(255);
+ /* Pass is exit 0 */
+ if (t->passed)
+ _exit(0);
+ /* Something else happened, report the step. */
+ _exit(t->step);
} else {
__wait_for_test(t);
}
- printf("[ %4s ] %s%s%s.%s\n", (t->passed ? "OK" : "FAIL"),
+ ksft_print_msg(" %4s %s%s%s.%s\n", t->passed ? "OK" : "FAIL",
f->name, variant->name[0] ? "." : "", variant->name, t->name);
+
+ if (t->skip)
+ ksft_test_result_skip("%s\n", t->results->reason[0] ?
+ t->results->reason : "unknown");
+ else
+ ksft_test_result(t->passed, "%s%s%s.%s\n",
+ f->name, variant->name[0] ? "." : "", variant->name, t->name);
}
static int test_harness_run(int __attribute__((unused)) argc,
@@ -932,6 +1004,7 @@ static int test_harness_run(int __attribute__((unused)) argc,
struct __fixture_variant_metadata no_variant = { .name = "", };
struct __fixture_variant_metadata *v;
struct __fixture_metadata *f;
+ struct __test_results *results;
struct __test_metadata *t;
int ret = 0;
unsigned int case_count = 0, test_count = 0;
@@ -946,14 +1019,20 @@ static int test_harness_run(int __attribute__((unused)) argc,
}
}
- /* TODO(wad) add optional arguments similar to gtest. */
- printf("[==========] Running %u tests from %u test cases.\n",
+ results = mmap(NULL, sizeof(*results), PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+
+ ksft_print_header();
+ ksft_set_plan(test_count);
+ ksft_print_msg("Starting %u tests from %u test cases.\n",
test_count, case_count);
for (f = __fixture_list; f; f = f->next) {
for (v = f->variant ?: &no_variant; v; v = v->next) {
for (t = f->tests; t; t = t->next) {
count++;
+ t->results = results;
__run_test(f, v, t);
+ t->results = NULL;
if (t->passed)
pass_count++;
else
@@ -961,9 +1040,14 @@ static int test_harness_run(int __attribute__((unused)) argc,
}
}
}
- printf("[==========] %u / %u tests passed.\n", pass_count, count);
- printf("[ %s ]\n", (ret ? "FAILED" : "PASSED"));
- return ret;
+ munmap(results, sizeof(*results));
+
+ ksft_print_msg("%s: %u / %u tests passed.\n", ret ? "FAILED" : "PASSED",
+ pass_count, count);
+ ksft_exit(ret == 0);
+
+ /* unreachable */
+ return KSFT_FAIL;
}
static void __attribute__((constructor)) __constructor_order_first(void)
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index b0556c752443..7a17ea815736 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -59,9 +59,8 @@ else
all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES)
endif
-.ONESHELL:
define RUN_TESTS
- @BASE_DIR="$(selfdir)"; \
+ BASE_DIR="$(selfdir)"; \
. $(selfdir)/kselftest/runner.sh; \
if [ "X$(summary)" != "X" ]; then \
per_test_logging=1; \
@@ -71,22 +70,21 @@ endef
run_tests: all
ifdef building_out_of_srctree
- @if [ "X$(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES)" != "X" ]; then
- @rsync -aq $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(OUTPUT)
+ @if [ "X$(TEST_PROGS)$(TEST_PROGS_EXTENDED)$(TEST_FILES)" != "X" ]; then \
+ rsync -aq $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(OUTPUT); \
fi
- @if [ "X$(TEST_PROGS)" != "X" ]; then
- $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(OUTPUT)/$(TEST_PROGS))
- else
- $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS))
+ @if [ "X$(TEST_PROGS)" != "X" ]; then \
+ $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(OUTPUT)/$(TEST_PROGS)) ; \
+ else \
+ $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS)); \
fi
else
- $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS))
+ @$(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS))
endif
define INSTALL_SINGLE_RULE
$(if $(INSTALL_LIST),@mkdir -p $(INSTALL_PATH))
- $(if $(INSTALL_LIST),@echo rsync -a $(INSTALL_LIST) $(INSTALL_PATH)/)
- $(if $(INSTALL_LIST),@rsync -a $(INSTALL_LIST) $(INSTALL_PATH)/)
+ $(if $(INSTALL_LIST),rsync -a $(INSTALL_LIST) $(INSTALL_PATH)/)
endef
define INSTALL_RULE
diff --git a/tools/testing/selftests/livepatch/README b/tools/testing/selftests/livepatch/README
index 621d325425c2..0942dd5826f8 100644
--- a/tools/testing/selftests/livepatch/README
+++ b/tools/testing/selftests/livepatch/README
@@ -6,8 +6,8 @@ This is a small set of sanity tests for the kernel livepatching.
The test suite loads and unloads several test kernel modules to verify
livepatch behavior. Debug information is logged to the kernel's message
-buffer and parsed for expected messages. (Note: the tests will clear
-the message buffer between individual tests.)
+buffer and parsed for expected messages. (Note: the tests will compare
+the message buffer for only the duration of each individual test.)
Config
@@ -35,9 +35,9 @@ Adding tests
------------
See the common functions.sh file for the existing collection of utility
-functions, most importantly setup_config() and check_result(). The
-latter function greps the kernel's ring buffer for "livepatch:" and
-"test_klp" strings, so tests be sure to include one of those strings for
-result comparison. Other utility functions include general module
-loading and livepatch loading helpers (waiting for patch transitions,
-sysfs entries, etc.)
+functions, most importantly setup_config(), start_test() and
+check_result(). The latter function greps the kernel's ring buffer for
+"livepatch:" and "test_klp" strings, so tests be sure to include one of
+those strings for result comparison. Other utility functions include
+general module loading and livepatch loading helpers (waiting for patch
+transitions, sysfs entries, etc.)
diff --git a/tools/testing/selftests/livepatch/functions.sh b/tools/testing/selftests/livepatch/functions.sh
index 2aab9791791d..1aba83c87ad3 100644
--- a/tools/testing/selftests/livepatch/functions.sh
+++ b/tools/testing/selftests/livepatch/functions.sh
@@ -41,6 +41,17 @@ function die() {
exit 1
}
+# save existing dmesg so we can detect new content
+function save_dmesg() {
+ SAVED_DMESG=$(mktemp --tmpdir -t klp-dmesg-XXXXXX)
+ dmesg > "$SAVED_DMESG"
+}
+
+# cleanup temporary dmesg file from save_dmesg()
+function cleanup_dmesg_file() {
+ rm -f "$SAVED_DMESG"
+}
+
function push_config() {
DYNAMIC_DEBUG=$(grep '^kernel/livepatch' /sys/kernel/debug/dynamic_debug/control | \
awk -F'[: ]' '{print "file " $1 " line " $2 " " $4}')
@@ -64,10 +75,16 @@ function set_dynamic_debug() {
}
function set_ftrace_enabled() {
- result=$(sysctl kernel.ftrace_enabled="$1" 2>&1 | paste --serial --delimiters=' ')
+ result=$(sysctl -q kernel.ftrace_enabled="$1" 2>&1 && \
+ sysctl kernel.ftrace_enabled 2>&1)
echo "livepatch: $result" > /dev/kmsg
}
+function cleanup() {
+ pop_config
+ cleanup_dmesg_file
+}
+
# setup_config - save the current config and set a script exit trap that
# restores the original config. Setup the dynamic debug
# for verbose livepatching output and turn on
@@ -77,7 +94,7 @@ function setup_config() {
push_config
set_dynamic_debug
set_ftrace_enabled 1
- trap pop_config EXIT INT TERM HUP
+ trap cleanup EXIT INT TERM HUP
}
# loop_until(cmd) - loop a command until it is successful or $MAX_RETRIES,
@@ -243,13 +260,28 @@ function set_pre_patch_ret {
die "failed to set pre_patch_ret parameter for $mod module"
}
+function start_test {
+ local test="$1"
+
+ save_dmesg
+ echo -n "TEST: $test ... "
+ log "===== TEST: $test ====="
+}
+
# check_result() - verify dmesg output
# TODO - better filter, out of order msgs, etc?
function check_result {
local expect="$*"
local result
- result=$(dmesg | grep -v 'tainting' | grep -e 'livepatch:' -e 'test_klp' | sed 's/^\[[ 0-9.]*\] //')
+ # Note: when comparing dmesg output, the kernel log timestamps
+ # help differentiate repeated testing runs. Remove them with a
+ # post-comparison sed filter.
+
+ result=$(dmesg | comm -13 "$SAVED_DMESG" - | \
+ grep -e 'livepatch:' -e 'test_klp' | \
+ grep -v '\(tainting\|taints\) kernel' | \
+ sed 's/^\[[ 0-9.]*\] //')
if [[ "$expect" == "$result" ]] ; then
echo "ok"
@@ -257,4 +289,6 @@ function check_result {
echo -e "not ok\n\n$(diff -upr --label expected --label result <(echo "$expect") <(echo "$result"))\n"
die "livepatch kselftest(s) failed"
fi
+
+ cleanup_dmesg_file
}
diff --git a/tools/testing/selftests/livepatch/test-callbacks.sh b/tools/testing/selftests/livepatch/test-callbacks.sh
index a35289b13c9c..90b26dbb2626 100755
--- a/tools/testing/selftests/livepatch/test-callbacks.sh
+++ b/tools/testing/selftests/livepatch/test-callbacks.sh
@@ -12,8 +12,6 @@ MOD_TARGET_BUSY=test_klp_callbacks_busy
setup_config
-# TEST: target module before livepatch
-#
# Test a combination of loading a kernel module and a livepatch that
# patches a function in the first module. Load the target module
# before the livepatch module. Unload them in the same order.
@@ -28,8 +26,7 @@ setup_config
# unpatching transition starts. klp_objects are reverted, post-patch
# callbacks execute and the transition completes.
-echo -n "TEST: target module before livepatch ... "
-dmesg -C
+start_test "target module before livepatch"
load_mod $MOD_TARGET
load_lp $MOD_LIVEPATCH
@@ -63,8 +60,6 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete
$MOD_TARGET: ${MOD_TARGET}_exit"
-# TEST: module_coming notifier
-#
# This test is similar to the previous test, but (un)load the livepatch
# module before the target kernel module. This tests the livepatch
# core's module_coming handler.
@@ -78,8 +73,7 @@ $MOD_TARGET: ${MOD_TARGET}_exit"
# - On livepatch disable, all currently loaded klp_objects' (vmlinux and
# $MOD_TARGET) pre/post-unpatch callbacks are executed.
-echo -n "TEST: module_coming notifier ... "
-dmesg -C
+start_test "module_coming notifier"
load_lp $MOD_LIVEPATCH
load_mod $MOD_TARGET
@@ -114,8 +108,6 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete
$MOD_TARGET: ${MOD_TARGET}_exit"
-# TEST: module_going notifier
-#
# Test loading the livepatch after a targeted kernel module, then unload
# the kernel module before disabling the livepatch. This tests the
# livepatch core's module_going handler.
@@ -129,8 +121,7 @@ $MOD_TARGET: ${MOD_TARGET}_exit"
# - When the livepatch is disabled, pre and post-unpatch callbacks are
# run for the remaining klp_object, vmlinux.
-echo -n "TEST: module_going notifier ... "
-dmesg -C
+start_test "module_going notifier"
load_mod $MOD_TARGET
load_lp $MOD_LIVEPATCH
@@ -165,8 +156,6 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete
% rmmod $MOD_LIVEPATCH"
-# TEST: module_coming and module_going notifiers
-#
# This test is similar to the previous test, however the livepatch is
# loaded first. This tests the livepatch core's module_coming and
# module_going handlers.
@@ -180,8 +169,7 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete
# from the $MOD_TARGET klp_object. As such, only pre and
# post-unpatch callbacks are executed when this occurs.
-echo -n "TEST: module_coming and module_going notifiers ... "
-dmesg -C
+start_test "module_coming and module_going notifiers"
load_lp $MOD_LIVEPATCH
load_mod $MOD_TARGET
@@ -217,8 +205,6 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete
% rmmod $MOD_LIVEPATCH"
-# TEST: target module not present
-#
# A simple test of loading a livepatch without one of its patch target
# klp_objects ever loaded ($MOD_TARGET).
#
@@ -227,8 +213,7 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete
# - As expected, only pre/post-(un)patch handlers are executed for
# vmlinux.
-echo -n "TEST: target module not present ... "
-dmesg -C
+start_test "target module not present"
load_lp $MOD_LIVEPATCH
disable_lp $MOD_LIVEPATCH
@@ -252,8 +237,6 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete
% rmmod $MOD_LIVEPATCH"
-# TEST: pre-patch callback -ENODEV
-#
# Test a scenario where a vmlinux pre-patch callback returns a non-zero
# status (ie, failure).
#
@@ -265,8 +248,7 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete
# The result is that the insmod command refuses to load the livepatch
# module.
-echo -n "TEST: pre-patch callback -ENODEV ... "
-dmesg -C
+start_test "pre-patch callback -ENODEV"
load_mod $MOD_TARGET
load_failing_mod $MOD_LIVEPATCH pre_patch_ret=-19
@@ -288,8 +270,6 @@ modprobe: ERROR: could not insert '$MOD_LIVEPATCH': No such device
$MOD_TARGET: ${MOD_TARGET}_exit"
-# TEST: module_coming + pre-patch callback -ENODEV
-#
# Similar to the previous test, setup a livepatch such that its vmlinux
# pre-patch callback returns success. However, when a targeted kernel
# module is later loaded, have the livepatch return a failing status
@@ -307,8 +287,7 @@ $MOD_TARGET: ${MOD_TARGET}_exit"
#
# - Pre/post-unpatch callbacks are run for the vmlinux klp_object.
-echo -n "TEST: module_coming + pre-patch callback -ENODEV ... "
-dmesg -C
+start_test "module_coming + pre-patch callback -ENODEV"
load_lp $MOD_LIVEPATCH
set_pre_patch_ret $MOD_LIVEPATCH -19
@@ -341,8 +320,6 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete
% rmmod $MOD_LIVEPATCH"
-# TEST: multiple target modules
-#
# Test loading multiple targeted kernel modules. This test-case is
# mainly for comparing with the next test-case.
#
@@ -353,12 +330,9 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete
# module. Post-patch callbacks are executed and the transition
# completes quickly.
-echo -n "TEST: multiple target modules ... "
-dmesg -C
+start_test "multiple target modules"
-load_mod $MOD_TARGET_BUSY sleep_secs=0
-# give $MOD_TARGET_BUSY::busymod_work_func() a chance to run
-sleep 5
+load_mod $MOD_TARGET_BUSY block_transition=N
load_lp $MOD_LIVEPATCH
load_mod $MOD_TARGET
unload_mod $MOD_TARGET
@@ -366,9 +340,9 @@ disable_lp $MOD_LIVEPATCH
unload_lp $MOD_LIVEPATCH
unload_mod $MOD_TARGET_BUSY
-check_result "% modprobe $MOD_TARGET_BUSY sleep_secs=0
+check_result "% modprobe $MOD_TARGET_BUSY block_transition=N
$MOD_TARGET_BUSY: ${MOD_TARGET_BUSY}_init
-$MOD_TARGET_BUSY: busymod_work_func, sleeping 0 seconds ...
+$MOD_TARGET_BUSY: busymod_work_func enter
$MOD_TARGET_BUSY: busymod_work_func exit
% modprobe $MOD_LIVEPATCH
livepatch: enabling patch '$MOD_LIVEPATCH'
@@ -404,11 +378,8 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete
$MOD_TARGET_BUSY: ${MOD_TARGET_BUSY}_exit"
-
-# TEST: busy target module
-#
# A similar test as the previous one, but force the "busy" kernel module
-# to do longer work.
+# to block the livepatch transition.
#
# The livepatching core will refuse to patch a task that is currently
# executing a to-be-patched function -- the consistency model stalls the
@@ -417,8 +388,7 @@ $MOD_TARGET_BUSY: ${MOD_TARGET_BUSY}_exit"
# function for a long time. Meanwhile, load and unload other target
# kernel modules while the livepatch transition is in progress.
#
-# - Load the "busy" kernel module, this time make it do 10 seconds worth
-# of work.
+# - Load the "busy" kernel module, this time make its work function loop
#
# - Meanwhile, the livepatch is loaded. Notice that the patch
# transition does not complete as the targeted "busy" module is
@@ -435,23 +405,25 @@ $MOD_TARGET_BUSY: ${MOD_TARGET_BUSY}_exit"
# klp_object's post-patch callbacks executed, the remaining
# klp_object's pre-unpatch callbacks are skipped.
-echo -n "TEST: busy target module ... "
-dmesg -C
+start_test "busy target module"
-load_mod $MOD_TARGET_BUSY sleep_secs=10
+load_mod $MOD_TARGET_BUSY block_transition=Y
load_lp_nowait $MOD_LIVEPATCH
-# Don't wait for transition, load $MOD_TARGET while the transition
-# is still stalled in $MOD_TARGET_BUSY::busymod_work_func()
-sleep 5
+
+# Wait until the livepatch reports in-transition state, i.e. that it's
+# stalled on $MOD_TARGET_BUSY::busymod_work_func()
+loop_until 'grep -q '^1$' /sys/kernel/livepatch/$MOD_LIVEPATCH/transition' ||
+ die "failed to stall transition"
+
load_mod $MOD_TARGET
unload_mod $MOD_TARGET
disable_lp $MOD_LIVEPATCH
unload_lp $MOD_LIVEPATCH
unload_mod $MOD_TARGET_BUSY
-check_result "% modprobe $MOD_TARGET_BUSY sleep_secs=10
+check_result "% modprobe $MOD_TARGET_BUSY block_transition=Y
$MOD_TARGET_BUSY: ${MOD_TARGET_BUSY}_init
-$MOD_TARGET_BUSY: busymod_work_func, sleeping 10 seconds ...
+$MOD_TARGET_BUSY: busymod_work_func enter
% modprobe $MOD_LIVEPATCH
livepatch: enabling patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': initializing patching transition
@@ -479,8 +451,6 @@ $MOD_TARGET_BUSY: busymod_work_func exit
$MOD_TARGET_BUSY: ${MOD_TARGET_BUSY}_exit"
-# TEST: multiple livepatches
-#
# Test loading multiple livepatches. This test-case is mainly for comparing
# with the next test-case.
#
@@ -488,8 +458,7 @@ $MOD_TARGET_BUSY: ${MOD_TARGET_BUSY}_exit"
# execute as each patch progresses through its (un)patching
# transition.
-echo -n "TEST: multiple livepatches ... "
-dmesg -C
+start_test "multiple livepatches"
load_lp $MOD_LIVEPATCH
load_lp $MOD_LIVEPATCH2
@@ -532,8 +501,6 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete
% rmmod $MOD_LIVEPATCH"
-# TEST: atomic replace
-#
# Load multiple livepatches, but the second as an 'atomic-replace'
# patch. When the latter loads, the original livepatch should be
# disabled and *none* of its pre/post-unpatch callbacks executed. On
@@ -548,8 +515,7 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete
# - Once the atomic replace module is loaded, only its pre and post
# unpatch callbacks are executed.
-echo -n "TEST: atomic replace ... "
-dmesg -C
+start_test "atomic replace"
load_lp $MOD_LIVEPATCH
load_lp $MOD_LIVEPATCH2 replace=1
diff --git a/tools/testing/selftests/livepatch/test-ftrace.sh b/tools/testing/selftests/livepatch/test-ftrace.sh
index e2a76887f40a..552e165512f4 100755
--- a/tools/testing/selftests/livepatch/test-ftrace.sh
+++ b/tools/testing/selftests/livepatch/test-ftrace.sh
@@ -9,13 +9,11 @@ MOD_LIVEPATCH=test_klp_livepatch
setup_config
-# TEST: livepatch interaction with ftrace_enabled sysctl
# - turn ftrace_enabled OFF and verify livepatches can't load
# - turn ftrace_enabled ON and verify livepatch can load
# - verify that ftrace_enabled can't be turned OFF while a livepatch is loaded
-echo -n "TEST: livepatch interaction with ftrace_enabled sysctl ... "
-dmesg -C
+start_test "livepatch interaction with ftrace_enabled sysctl"
set_ftrace_enabled 0
load_failing_mod $MOD_LIVEPATCH
@@ -53,7 +51,7 @@ livepatch: '$MOD_LIVEPATCH': initializing patching transition
livepatch: '$MOD_LIVEPATCH': starting patching transition
livepatch: '$MOD_LIVEPATCH': completing patching transition
livepatch: '$MOD_LIVEPATCH': patching complete
-livepatch: sysctl: setting key \"kernel.ftrace_enabled\": Device or resource busy kernel.ftrace_enabled = 0
+livepatch: sysctl: setting key \"kernel.ftrace_enabled\": Device or resource busy
% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled
livepatch: '$MOD_LIVEPATCH': initializing unpatching transition
livepatch: '$MOD_LIVEPATCH': starting unpatching transition
diff --git a/tools/testing/selftests/livepatch/test-livepatch.sh b/tools/testing/selftests/livepatch/test-livepatch.sh
index 493e3df415a1..5fe79ac34be1 100755
--- a/tools/testing/selftests/livepatch/test-livepatch.sh
+++ b/tools/testing/selftests/livepatch/test-livepatch.sh
@@ -10,13 +10,11 @@ MOD_REPLACE=test_klp_atomic_replace
setup_config
-# TEST: basic function patching
# - load a livepatch that modifies the output from /proc/cmdline and
# verify correct behavior
# - unload the livepatch and make sure the patch was removed
-echo -n "TEST: basic function patching ... "
-dmesg -C
+start_test "basic function patching"
load_lp $MOD_LIVEPATCH
@@ -47,15 +45,13 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete
% rmmod $MOD_LIVEPATCH"
-# TEST: multiple livepatches
# - load a livepatch that modifies the output from /proc/cmdline and
# verify correct behavior
# - load another livepatch and verify that both livepatches are active
# - unload the second livepatch and verify that the first is still active
# - unload the first livepatch and verify none are active
-echo -n "TEST: multiple livepatches ... "
-dmesg -C
+start_test "multiple livepatches"
load_lp $MOD_LIVEPATCH
@@ -109,7 +105,6 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete
% rmmod $MOD_LIVEPATCH"
-# TEST: atomic replace livepatch
# - load a livepatch that modifies the output from /proc/cmdline and
# verify correct behavior
# - load an atomic replace livepatch and verify that only the second is active
@@ -117,8 +112,7 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete
# is still active
# - remove the atomic replace livepatch and verify that none are active
-echo -n "TEST: atomic replace livepatch ... "
-dmesg -C
+start_test "atomic replace livepatch"
load_lp $MOD_LIVEPATCH
diff --git a/tools/testing/selftests/livepatch/test-shadow-vars.sh b/tools/testing/selftests/livepatch/test-shadow-vars.sh
index 1aae73299114..e04cb354f56b 100755
--- a/tools/testing/selftests/livepatch/test-shadow-vars.sh
+++ b/tools/testing/selftests/livepatch/test-shadow-vars.sh
@@ -9,52 +9,71 @@ MOD_TEST=test_klp_shadow_vars
setup_config
-# TEST: basic shadow variable API
# - load a module that exercises the shadow variable API
-echo -n "TEST: basic shadow variable API ... "
-dmesg -C
+start_test "basic shadow variable API"
load_mod $MOD_TEST
unload_mod $MOD_TEST
check_result "% modprobe $MOD_TEST
-$MOD_TEST: klp_shadow_get(obj=PTR5, id=0x1234) = PTR0
+$MOD_TEST: klp_shadow_get(obj=PTR1, id=0x1234) = PTR0
$MOD_TEST: got expected NULL result
-$MOD_TEST: shadow_ctor: PTR6 -> PTR1
-$MOD_TEST: klp_shadow_alloc(obj=PTR5, id=0x1234, size=8, gfp_flags=GFP_KERNEL), ctor=PTR7, ctor_data=PTR1 = PTR6
-$MOD_TEST: shadow_ctor: PTR8 -> PTR2
-$MOD_TEST: klp_shadow_alloc(obj=PTR9, id=0x1234, size=8, gfp_flags=GFP_KERNEL), ctor=PTR7, ctor_data=PTR2 = PTR8
-$MOD_TEST: shadow_ctor: PTR10 -> PTR3
-$MOD_TEST: klp_shadow_alloc(obj=PTR5, id=0x1235, size=8, gfp_flags=GFP_KERNEL), ctor=PTR7, ctor_data=PTR3 = PTR10
-$MOD_TEST: klp_shadow_get(obj=PTR5, id=0x1234) = PTR6
-$MOD_TEST: got expected PTR6 -> PTR1 result
+$MOD_TEST: shadow_ctor: PTR3 -> PTR2
+$MOD_TEST: klp_shadow_get_or_alloc(obj=PTR1, id=0x1234, size=8, gfp_flags=GFP_KERNEL), ctor=PTR4, ctor_data=PTR2 = PTR3
+$MOD_TEST: shadow_ctor: PTR6 -> PTR5
+$MOD_TEST: klp_shadow_alloc(obj=PTR1, id=0x1235, size=8, gfp_flags=GFP_KERNEL), ctor=PTR4, ctor_data=PTR5 = PTR6
+$MOD_TEST: shadow_ctor: PTR8 -> PTR7
+$MOD_TEST: klp_shadow_alloc(obj=PTR9, id=0x1234, size=8, gfp_flags=GFP_KERNEL), ctor=PTR4, ctor_data=PTR7 = PTR8
+$MOD_TEST: shadow_ctor: PTR11 -> PTR10
+$MOD_TEST: klp_shadow_alloc(obj=PTR9, id=0x1235, size=8, gfp_flags=GFP_KERNEL), ctor=PTR4, ctor_data=PTR10 = PTR11
+$MOD_TEST: shadow_ctor: PTR13 -> PTR12
+$MOD_TEST: klp_shadow_get_or_alloc(obj=PTR14, id=0x1234, size=8, gfp_flags=GFP_KERNEL), ctor=PTR4, ctor_data=PTR12 = PTR13
+$MOD_TEST: shadow_ctor: PTR16 -> PTR15
+$MOD_TEST: klp_shadow_alloc(obj=PTR14, id=0x1235, size=8, gfp_flags=GFP_KERNEL), ctor=PTR4, ctor_data=PTR15 = PTR16
+$MOD_TEST: klp_shadow_get(obj=PTR1, id=0x1234) = PTR3
+$MOD_TEST: got expected PTR3 -> PTR2 result
+$MOD_TEST: klp_shadow_get(obj=PTR1, id=0x1235) = PTR6
+$MOD_TEST: got expected PTR6 -> PTR5 result
$MOD_TEST: klp_shadow_get(obj=PTR9, id=0x1234) = PTR8
-$MOD_TEST: got expected PTR8 -> PTR2 result
-$MOD_TEST: klp_shadow_get(obj=PTR5, id=0x1235) = PTR10
-$MOD_TEST: got expected PTR10 -> PTR3 result
-$MOD_TEST: shadow_ctor: PTR11 -> PTR4
-$MOD_TEST: klp_shadow_get_or_alloc(obj=PTR12, id=0x1234, size=8, gfp_flags=GFP_KERNEL), ctor=PTR7, ctor_data=PTR4 = PTR11
-$MOD_TEST: klp_shadow_get_or_alloc(obj=PTR12, id=0x1234, size=8, gfp_flags=GFP_KERNEL), ctor=PTR7, ctor_data=PTR4 = PTR11
-$MOD_TEST: got expected PTR11 -> PTR4 result
-$MOD_TEST: shadow_dtor(obj=PTR5, shadow_data=PTR6)
-$MOD_TEST: klp_shadow_free(obj=PTR5, id=0x1234, dtor=PTR13)
-$MOD_TEST: klp_shadow_get(obj=PTR5, id=0x1234) = PTR0
+$MOD_TEST: got expected PTR8 -> PTR7 result
+$MOD_TEST: klp_shadow_get(obj=PTR9, id=0x1235) = PTR11
+$MOD_TEST: got expected PTR11 -> PTR10 result
+$MOD_TEST: klp_shadow_get(obj=PTR14, id=0x1234) = PTR13
+$MOD_TEST: got expected PTR13 -> PTR12 result
+$MOD_TEST: klp_shadow_get(obj=PTR14, id=0x1235) = PTR16
+$MOD_TEST: got expected PTR16 -> PTR15 result
+$MOD_TEST: klp_shadow_get_or_alloc(obj=PTR1, id=0x1234, size=8, gfp_flags=GFP_KERNEL), ctor=PTR4, ctor_data=PTR2 = PTR3
+$MOD_TEST: got expected PTR3 -> PTR2 result
+$MOD_TEST: klp_shadow_get_or_alloc(obj=PTR9, id=0x1234, size=8, gfp_flags=GFP_KERNEL), ctor=PTR4, ctor_data=PTR7 = PTR8
+$MOD_TEST: got expected PTR8 -> PTR7 result
+$MOD_TEST: klp_shadow_get_or_alloc(obj=PTR14, id=0x1234, size=8, gfp_flags=GFP_KERNEL), ctor=PTR4, ctor_data=PTR12 = PTR13
+$MOD_TEST: got expected PTR13 -> PTR12 result
+$MOD_TEST: shadow_dtor(obj=PTR1, shadow_data=PTR3)
+$MOD_TEST: klp_shadow_free(obj=PTR1, id=0x1234, dtor=PTR17)
+$MOD_TEST: klp_shadow_get(obj=PTR1, id=0x1234) = PTR0
$MOD_TEST: got expected NULL result
$MOD_TEST: shadow_dtor(obj=PTR9, shadow_data=PTR8)
-$MOD_TEST: klp_shadow_free(obj=PTR9, id=0x1234, dtor=PTR13)
+$MOD_TEST: klp_shadow_free(obj=PTR9, id=0x1234, dtor=PTR17)
$MOD_TEST: klp_shadow_get(obj=PTR9, id=0x1234) = PTR0
$MOD_TEST: got expected NULL result
-$MOD_TEST: shadow_dtor(obj=PTR12, shadow_data=PTR11)
-$MOD_TEST: klp_shadow_free(obj=PTR12, id=0x1234, dtor=PTR13)
-$MOD_TEST: klp_shadow_get(obj=PTR12, id=0x1234) = PTR0
+$MOD_TEST: shadow_dtor(obj=PTR14, shadow_data=PTR13)
+$MOD_TEST: klp_shadow_free(obj=PTR14, id=0x1234, dtor=PTR17)
+$MOD_TEST: klp_shadow_get(obj=PTR14, id=0x1234) = PTR0
$MOD_TEST: got expected NULL result
-$MOD_TEST: klp_shadow_get(obj=PTR5, id=0x1235) = PTR10
-$MOD_TEST: got expected PTR10 -> PTR3 result
-$MOD_TEST: shadow_dtor(obj=PTR5, shadow_data=PTR10)
-$MOD_TEST: klp_shadow_free_all(id=0x1235, dtor=PTR13)
-$MOD_TEST: klp_shadow_get(obj=PTR5, id=0x1234) = PTR0
-$MOD_TEST: shadow_get() got expected NULL result
-% rmmod test_klp_shadow_vars"
+$MOD_TEST: klp_shadow_get(obj=PTR1, id=0x1235) = PTR6
+$MOD_TEST: got expected PTR6 -> PTR5 result
+$MOD_TEST: klp_shadow_get(obj=PTR9, id=0x1235) = PTR11
+$MOD_TEST: got expected PTR11 -> PTR10 result
+$MOD_TEST: klp_shadow_get(obj=PTR14, id=0x1235) = PTR16
+$MOD_TEST: got expected PTR16 -> PTR15 result
+$MOD_TEST: klp_shadow_free_all(id=0x1235, dtor=PTR0)
+$MOD_TEST: klp_shadow_get(obj=PTR1, id=0x1235) = PTR0
+$MOD_TEST: got expected NULL result
+$MOD_TEST: klp_shadow_get(obj=PTR9, id=0x1235) = PTR0
+$MOD_TEST: got expected NULL result
+$MOD_TEST: klp_shadow_get(obj=PTR14, id=0x1235) = PTR0
+$MOD_TEST: got expected NULL result
+% rmmod $MOD_TEST"
exit 0
diff --git a/tools/testing/selftests/livepatch/test-state.sh b/tools/testing/selftests/livepatch/test-state.sh
index a08212708115..38656721c958 100755
--- a/tools/testing/selftests/livepatch/test-state.sh
+++ b/tools/testing/selftests/livepatch/test-state.sh
@@ -10,10 +10,10 @@ MOD_LIVEPATCH3=test_klp_state3
setup_config
-# TEST: Loading and removing a module that modifies the system state
-echo -n "TEST: system state modification ... "
-dmesg -C
+# Load and remove a module that modifies the system state
+
+start_test "system state modification"
load_lp $MOD_LIVEPATCH
disable_lp $MOD_LIVEPATCH
@@ -41,10 +41,9 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete
% rmmod $MOD_LIVEPATCH"
-# TEST: Take over system state change by a cumulative patch
+# Take over system state change by a cumulative patch
-echo -n "TEST: taking over system state modification ... "
-dmesg -C
+start_test "taking over system state modification"
load_lp $MOD_LIVEPATCH
load_lp $MOD_LIVEPATCH2
@@ -85,10 +84,9 @@ livepatch: '$MOD_LIVEPATCH2': unpatching complete
% rmmod $MOD_LIVEPATCH2"
-# TEST: Take over system state change by a cumulative patch
+# Take over system state change by a cumulative patch
-echo -n "TEST: compatible cumulative livepatches ... "
-dmesg -C
+start_test "compatible cumulative livepatches"
load_lp $MOD_LIVEPATCH2
load_lp $MOD_LIVEPATCH3
@@ -142,10 +140,9 @@ livepatch: '$MOD_LIVEPATCH2': unpatching complete
% rmmod $MOD_LIVEPATCH3"
-# TEST: Failure caused by incompatible cumulative livepatches
+# Failure caused by incompatible cumulative livepatches
-echo -n "TEST: incompatible cumulative livepatches ... "
-dmesg -C
+start_test "incompatible cumulative livepatches"
load_lp $MOD_LIVEPATCH2
load_failing_mod $MOD_LIVEPATCH
diff --git a/tools/testing/selftests/lkdtm/run.sh b/tools/testing/selftests/lkdtm/run.sh
index ee64ff8df8f4..8383eb89d88a 100755
--- a/tools/testing/selftests/lkdtm/run.sh
+++ b/tools/testing/selftests/lkdtm/run.sh
@@ -8,6 +8,7 @@
#
set -e
TRIGGER=/sys/kernel/debug/provoke-crash/DIRECT
+CLEAR_ONCE=/sys/kernel/debug/clear_warn_once
KSELFTEST_SKIP_TEST=4
# Verify we have LKDTM available in the kernel.
@@ -67,6 +68,11 @@ cleanup() {
}
trap cleanup EXIT
+# Reset WARN_ONCE counters so we trip it each time this runs.
+if [ -w $CLEAR_ONCE ] ; then
+ echo 1 > $CLEAR_ONCE
+fi
+
# Save existing dmesg so we can detect new content below
dmesg > "$DMESG"
diff --git a/tools/testing/selftests/lkdtm/tests.txt b/tools/testing/selftests/lkdtm/tests.txt
index 92ca32143ae5..9d266e79c6a2 100644
--- a/tools/testing/selftests/lkdtm/tests.txt
+++ b/tools/testing/selftests/lkdtm/tests.txt
@@ -14,6 +14,7 @@ STACK_GUARD_PAGE_LEADING
STACK_GUARD_PAGE_TRAILING
UNSET_SMEP CR4 bits went missing
DOUBLE_FAULT
+CORRUPT_PAC
UNALIGNED_LOAD_STORE_WRITE
#OVERWRITE_ALLOCATION Corrupts memory on failure
#WRITE_AFTER_FREE Corrupts memory on failure
diff --git a/tools/testing/selftests/mincore/.gitignore b/tools/testing/selftests/mincore/.gitignore
new file mode 100644
index 000000000000..15c4dfc2df00
--- /dev/null
+++ b/tools/testing/selftests/mincore/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0+
+mincore_selftest
diff --git a/tools/testing/selftests/mincore/Makefile b/tools/testing/selftests/mincore/Makefile
new file mode 100644
index 000000000000..38c7db1e8926
--- /dev/null
+++ b/tools/testing/selftests/mincore/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0+
+
+CFLAGS += -Wall
+
+TEST_GEN_PROGS := mincore_selftest
+include ../lib.mk
diff --git a/tools/testing/selftests/mincore/mincore_selftest.c b/tools/testing/selftests/mincore/mincore_selftest.c
new file mode 100644
index 000000000000..5a1e85ff5d32
--- /dev/null
+++ b/tools/testing/selftests/mincore/mincore_selftest.c
@@ -0,0 +1,361 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * kselftest suite for mincore().
+ *
+ * Copyright (C) 2020 Collabora, Ltd.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <errno.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <string.h>
+#include <fcntl.h>
+#include <string.h>
+
+#include "../kselftest.h"
+#include "../kselftest_harness.h"
+
+/* Default test file size: 4MB */
+#define MB (1UL << 20)
+#define FILE_SIZE (4 * MB)
+
+
+/*
+ * Tests the user interface. This test triggers most of the documented
+ * error conditions in mincore().
+ */
+TEST(basic_interface)
+{
+ int retval;
+ int page_size;
+ unsigned char vec[1];
+ char *addr;
+
+ page_size = sysconf(_SC_PAGESIZE);
+
+ /* Query a 0 byte sized range */
+ retval = mincore(0, 0, vec);
+ EXPECT_EQ(0, retval);
+
+ /* Addresses in the specified range are invalid or unmapped */
+ errno = 0;
+ retval = mincore(NULL, page_size, vec);
+ EXPECT_EQ(-1, retval);
+ EXPECT_EQ(ENOMEM, errno);
+
+ errno = 0;
+ addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ ASSERT_NE(MAP_FAILED, addr) {
+ TH_LOG("mmap error: %s", strerror(errno));
+ }
+
+ /* <addr> argument is not page-aligned */
+ errno = 0;
+ retval = mincore(addr + 1, page_size, vec);
+ EXPECT_EQ(-1, retval);
+ EXPECT_EQ(EINVAL, errno);
+
+ /* <length> argument is too large */
+ errno = 0;
+ retval = mincore(addr, -1, vec);
+ EXPECT_EQ(-1, retval);
+ EXPECT_EQ(ENOMEM, errno);
+
+ /* <vec> argument points to an illegal address */
+ errno = 0;
+ retval = mincore(addr, page_size, NULL);
+ EXPECT_EQ(-1, retval);
+ EXPECT_EQ(EFAULT, errno);
+ munmap(addr, page_size);
+}
+
+
+/*
+ * Test mincore() behavior on a private anonymous page mapping.
+ * Check that the page is not loaded into memory right after the mapping
+ * but after accessing it (on-demand allocation).
+ * Then free the page and check that it's not memory-resident.
+ */
+TEST(check_anonymous_locked_pages)
+{
+ unsigned char vec[1];
+ char *addr;
+ int retval;
+ int page_size;
+
+ page_size = sysconf(_SC_PAGESIZE);
+
+ /* Map one page and check it's not memory-resident */
+ errno = 0;
+ addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ ASSERT_NE(MAP_FAILED, addr) {
+ TH_LOG("mmap error: %s", strerror(errno));
+ }
+ retval = mincore(addr, page_size, vec);
+ ASSERT_EQ(0, retval);
+ ASSERT_EQ(0, vec[0]) {
+ TH_LOG("Page found in memory before use");
+ }
+
+ /* Touch the page and check again. It should now be in memory */
+ addr[0] = 1;
+ mlock(addr, page_size);
+ retval = mincore(addr, page_size, vec);
+ ASSERT_EQ(0, retval);
+ ASSERT_EQ(1, vec[0]) {
+ TH_LOG("Page not found in memory after use");
+ }
+
+ /*
+ * It shouldn't be memory-resident after unlocking it and
+ * marking it as unneeded.
+ */
+ munlock(addr, page_size);
+ madvise(addr, page_size, MADV_DONTNEED);
+ retval = mincore(addr, page_size, vec);
+ ASSERT_EQ(0, retval);
+ ASSERT_EQ(0, vec[0]) {
+ TH_LOG("Page in memory after being zapped");
+ }
+ munmap(addr, page_size);
+}
+
+
+/*
+ * Check mincore() behavior on huge pages.
+ * This test will be skipped if the mapping fails (ie. if there are no
+ * huge pages available).
+ *
+ * Make sure the system has at least one free huge page, check
+ * "HugePages_Free" in /proc/meminfo.
+ * Increment /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages if
+ * needed.
+ */
+TEST(check_huge_pages)
+{
+ unsigned char vec[1];
+ char *addr;
+ int retval;
+ int page_size;
+
+ page_size = sysconf(_SC_PAGESIZE);
+
+ errno = 0;
+ addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
+ -1, 0);
+ if (addr == MAP_FAILED) {
+ if (errno == ENOMEM)
+ SKIP(return, "No huge pages available.");
+ else
+ TH_LOG("mmap error: %s", strerror(errno));
+ }
+ retval = mincore(addr, page_size, vec);
+ ASSERT_EQ(0, retval);
+ ASSERT_EQ(0, vec[0]) {
+ TH_LOG("Page found in memory before use");
+ }
+
+ addr[0] = 1;
+ mlock(addr, page_size);
+ retval = mincore(addr, page_size, vec);
+ ASSERT_EQ(0, retval);
+ ASSERT_EQ(1, vec[0]) {
+ TH_LOG("Page not found in memory after use");
+ }
+
+ munlock(addr, page_size);
+ munmap(addr, page_size);
+}
+
+
+/*
+ * Test mincore() behavior on a file-backed page.
+ * No pages should be loaded into memory right after the mapping. Then,
+ * accessing any address in the mapping range should load the page
+ * containing the address and a number of subsequent pages (readahead).
+ *
+ * The actual readahead settings depend on the test environment, so we
+ * can't make a lot of assumptions about that. This test covers the most
+ * general cases.
+ */
+TEST(check_file_mmap)
+{
+ unsigned char *vec;
+ int vec_size;
+ char *addr;
+ int retval;
+ int page_size;
+ int fd;
+ int i;
+ int ra_pages = 0;
+
+ page_size = sysconf(_SC_PAGESIZE);
+ vec_size = FILE_SIZE / page_size;
+ if (FILE_SIZE % page_size)
+ vec_size++;
+
+ vec = calloc(vec_size, sizeof(unsigned char));
+ ASSERT_NE(NULL, vec) {
+ TH_LOG("Can't allocate array");
+ }
+
+ errno = 0;
+ fd = open(".", O_TMPFILE | O_RDWR, 0600);
+ ASSERT_NE(-1, fd) {
+ TH_LOG("Can't create temporary file: %s",
+ strerror(errno));
+ }
+ errno = 0;
+ retval = fallocate(fd, 0, 0, FILE_SIZE);
+ ASSERT_EQ(0, retval) {
+ TH_LOG("Error allocating space for the temporary file: %s",
+ strerror(errno));
+ }
+
+ /*
+ * Map the whole file, the pages shouldn't be fetched yet.
+ */
+ errno = 0;
+ addr = mmap(NULL, FILE_SIZE, PROT_READ | PROT_WRITE,
+ MAP_SHARED, fd, 0);
+ ASSERT_NE(MAP_FAILED, addr) {
+ TH_LOG("mmap error: %s", strerror(errno));
+ }
+ retval = mincore(addr, FILE_SIZE, vec);
+ ASSERT_EQ(0, retval);
+ for (i = 0; i < vec_size; i++) {
+ ASSERT_EQ(0, vec[i]) {
+ TH_LOG("Unexpected page in memory");
+ }
+ }
+
+ /*
+ * Touch a page in the middle of the mapping. We expect the next
+ * few pages (the readahead window) to be populated too.
+ */
+ addr[FILE_SIZE / 2] = 1;
+ retval = mincore(addr, FILE_SIZE, vec);
+ ASSERT_EQ(0, retval);
+ ASSERT_EQ(1, vec[FILE_SIZE / 2 / page_size]) {
+ TH_LOG("Page not found in memory after use");
+ }
+
+ i = FILE_SIZE / 2 / page_size + 1;
+ while (i < vec_size && vec[i]) {
+ ra_pages++;
+ i++;
+ }
+ EXPECT_GT(ra_pages, 0) {
+ TH_LOG("No read-ahead pages found in memory");
+ }
+
+ EXPECT_LT(i, vec_size) {
+ TH_LOG("Read-ahead pages reached the end of the file");
+ }
+ /*
+ * End of the readahead window. The rest of the pages shouldn't
+ * be in memory.
+ */
+ if (i < vec_size) {
+ while (i < vec_size && !vec[i])
+ i++;
+ EXPECT_EQ(vec_size, i) {
+ TH_LOG("Unexpected page in memory beyond readahead window");
+ }
+ }
+
+ munmap(addr, FILE_SIZE);
+ close(fd);
+ free(vec);
+}
+
+
+/*
+ * Test mincore() behavior on a page backed by a tmpfs file. This test
+ * performs the same steps as the previous one. However, we don't expect
+ * any readahead in this case.
+ */
+TEST(check_tmpfs_mmap)
+{
+ unsigned char *vec;
+ int vec_size;
+ char *addr;
+ int retval;
+ int page_size;
+ int fd;
+ int i;
+ int ra_pages = 0;
+
+ page_size = sysconf(_SC_PAGESIZE);
+ vec_size = FILE_SIZE / page_size;
+ if (FILE_SIZE % page_size)
+ vec_size++;
+
+ vec = calloc(vec_size, sizeof(unsigned char));
+ ASSERT_NE(NULL, vec) {
+ TH_LOG("Can't allocate array");
+ }
+
+ errno = 0;
+ fd = open("/dev/shm", O_TMPFILE | O_RDWR, 0600);
+ ASSERT_NE(-1, fd) {
+ TH_LOG("Can't create temporary file: %s",
+ strerror(errno));
+ }
+ errno = 0;
+ retval = fallocate(fd, 0, 0, FILE_SIZE);
+ ASSERT_EQ(0, retval) {
+ TH_LOG("Error allocating space for the temporary file: %s",
+ strerror(errno));
+ }
+
+ /*
+ * Map the whole file, the pages shouldn't be fetched yet.
+ */
+ errno = 0;
+ addr = mmap(NULL, FILE_SIZE, PROT_READ | PROT_WRITE,
+ MAP_SHARED, fd, 0);
+ ASSERT_NE(MAP_FAILED, addr) {
+ TH_LOG("mmap error: %s", strerror(errno));
+ }
+ retval = mincore(addr, FILE_SIZE, vec);
+ ASSERT_EQ(0, retval);
+ for (i = 0; i < vec_size; i++) {
+ ASSERT_EQ(0, vec[i]) {
+ TH_LOG("Unexpected page in memory");
+ }
+ }
+
+ /*
+ * Touch a page in the middle of the mapping. We expect only
+ * that page to be fetched into memory.
+ */
+ addr[FILE_SIZE / 2] = 1;
+ retval = mincore(addr, FILE_SIZE, vec);
+ ASSERT_EQ(0, retval);
+ ASSERT_EQ(1, vec[FILE_SIZE / 2 / page_size]) {
+ TH_LOG("Page not found in memory after use");
+ }
+
+ i = FILE_SIZE / 2 / page_size + 1;
+ while (i < vec_size && vec[i]) {
+ ra_pages++;
+ i++;
+ }
+ ASSERT_EQ(ra_pages, 0) {
+ TH_LOG("Read-ahead pages found in memory");
+ }
+
+ munmap(addr, FILE_SIZE);
+ close(fd);
+ free(vec);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 895ec992b2f1..9491bbaa0831 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -17,6 +17,8 @@ TEST_PROGS += route_localnet.sh
TEST_PROGS += reuseaddr_ports_exhausted.sh
TEST_PROGS += txtimestamp.sh
TEST_PROGS += vrf-xfrm-tests.sh
+TEST_PROGS += rxtimestamp.sh
+TEST_PROGS += devlink_port_split.py
TEST_PROGS_EXTENDED := in_netns.sh
TEST_GEN_FILES = socket nettest
TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
diff --git a/tools/testing/selftests/net/devlink_port_split.py b/tools/testing/selftests/net/devlink_port_split.py
new file mode 100755
index 000000000000..58bb7e9b88ce
--- /dev/null
+++ b/tools/testing/selftests/net/devlink_port_split.py
@@ -0,0 +1,277 @@
+#!/usr/bin/python3
+# SPDX-License-Identifier: GPL-2.0
+
+from subprocess import PIPE, Popen
+import json
+import time
+import argparse
+import collections
+import sys
+
+#
+# Test port split configuration using devlink-port lanes attribute.
+# The test is skipped in case the attribute is not available.
+#
+# First, check that all the ports with 1 lane fail to split.
+# Second, check that all the ports with more than 1 lane can be split
+# to all valid configurations (e.g., split to 2, split to 4 etc.)
+#
+
+
+Port = collections.namedtuple('Port', 'bus_info name')
+
+
+def run_command(cmd, should_fail=False):
+ """
+ Run a command in subprocess.
+ Return: Tuple of (stdout, stderr).
+ """
+
+ p = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True)
+ stdout, stderr = p.communicate()
+ stdout, stderr = stdout.decode(), stderr.decode()
+
+ if stderr != "" and not should_fail:
+ print("Error sending command: %s" % cmd)
+ print(stdout)
+ print(stderr)
+ return stdout, stderr
+
+
+class devlink_ports(object):
+ """
+ Class that holds information on the devlink ports, required to the tests;
+ if_names: A list of interfaces in the devlink ports.
+ """
+
+ def get_if_names(dev):
+ """
+ Get a list of physical devlink ports.
+ Return: Array of tuples (bus_info/port, if_name).
+ """
+
+ arr = []
+
+ cmd = "devlink -j port show"
+ stdout, stderr = run_command(cmd)
+ assert stderr == ""
+ ports = json.loads(stdout)['port']
+
+ for port in ports:
+ if dev in port:
+ if ports[port]['flavour'] == 'physical':
+ arr.append(Port(bus_info=port, name=ports[port]['netdev']))
+
+ return arr
+
+ def __init__(self, dev):
+ self.if_names = devlink_ports.get_if_names(dev)
+
+
+def get_max_lanes(port):
+ """
+ Get the $port's maximum number of lanes.
+ Return: number of lanes, e.g. 1, 2, 4 and 8.
+ """
+
+ cmd = "devlink -j port show %s" % port
+ stdout, stderr = run_command(cmd)
+ assert stderr == ""
+ values = list(json.loads(stdout)['port'].values())[0]
+
+ if 'lanes' in values:
+ lanes = values['lanes']
+ else:
+ lanes = 0
+ return lanes
+
+
+def get_split_ability(port):
+ """
+ Get the $port split ability.
+ Return: split ability, true or false.
+ """
+
+ cmd = "devlink -j port show %s" % port.name
+ stdout, stderr = run_command(cmd)
+ assert stderr == ""
+ values = list(json.loads(stdout)['port'].values())[0]
+
+ return values['splittable']
+
+
+def split(k, port, should_fail=False):
+ """
+ Split $port into $k ports.
+ If should_fail == True, the split should fail. Otherwise, should pass.
+ Return: Array of sub ports after splitting.
+ If the $port wasn't split, the array will be empty.
+ """
+
+ cmd = "devlink port split %s count %s" % (port.bus_info, k)
+ stdout, stderr = run_command(cmd, should_fail=should_fail)
+
+ if should_fail:
+ if not test(stderr != "", "%s is unsplittable" % port.name):
+ print("split an unsplittable port %s" % port.name)
+ return create_split_group(port, k)
+ else:
+ if stderr == "":
+ return create_split_group(port, k)
+ print("didn't split a splittable port %s" % port.name)
+
+ return []
+
+
+def unsplit(port):
+ """
+ Unsplit $port.
+ """
+
+ cmd = "devlink port unsplit %s" % port
+ stdout, stderr = run_command(cmd)
+ test(stderr == "", "Unsplit port %s" % port)
+
+
+def exists(port, dev):
+ """
+ Check if $port exists in the devlink ports.
+ Return: True is so, False otherwise.
+ """
+
+ return any(dev_port.name == port
+ for dev_port in devlink_ports.get_if_names(dev))
+
+
+def exists_and_lanes(ports, lanes, dev):
+ """
+ Check if every port in the list $ports exists in the devlink ports and has
+ $lanes number of lanes after splitting.
+ Return: True if both are True, False otherwise.
+ """
+
+ for port in ports:
+ max_lanes = get_max_lanes(port)
+ if not exists(port, dev):
+ print("port %s doesn't exist in devlink ports" % port)
+ return False
+ if max_lanes != lanes:
+ print("port %s has %d lanes, but %s were expected"
+ % (port, lanes, max_lanes))
+ return False
+ return True
+
+
+def test(cond, msg):
+ """
+ Check $cond and print a message accordingly.
+ Return: True is pass, False otherwise.
+ """
+
+ if cond:
+ print("TEST: %-60s [ OK ]" % msg)
+ else:
+ print("TEST: %-60s [FAIL]" % msg)
+
+ return cond
+
+
+def create_split_group(port, k):
+ """
+ Create the split group for $port.
+ Return: Array with $k elements, which are the split port group.
+ """
+
+ return list(port.name + "s" + str(i) for i in range(k))
+
+
+def split_unsplittable_port(port, k):
+ """
+ Test that splitting of unsplittable port fails.
+ """
+
+ # split to max
+ new_split_group = split(k, port, should_fail=True)
+
+ if new_split_group != []:
+ unsplit(port.bus_info)
+
+
+def split_splittable_port(port, k, lanes, dev):
+ """
+ Test that splitting of splittable port passes correctly.
+ """
+
+ new_split_group = split(k, port)
+
+ # Once the split command ends, it takes some time to the sub ifaces'
+ # to get their names. Use udevadm to continue only when all current udev
+ # events are handled.
+ cmd = "udevadm settle"
+ stdout, stderr = run_command(cmd)
+ assert stderr == ""
+
+ if new_split_group != []:
+ test(exists_and_lanes(new_split_group, lanes/k, dev),
+ "split port %s into %s" % (port.name, k))
+
+ unsplit(port.bus_info)
+
+
+def make_parser():
+ parser = argparse.ArgumentParser(description='A test for port splitting.')
+ parser.add_argument('--dev',
+ help='The devlink handle of the device under test. ' +
+ 'The default is the first registered devlink ' +
+ 'handle.')
+
+ return parser
+
+
+def main(cmdline=None):
+ parser = make_parser()
+ args = parser.parse_args(cmdline)
+
+ dev = args.dev
+ if not dev:
+ cmd = "devlink -j dev show"
+ stdout, stderr = run_command(cmd)
+ assert stderr == ""
+
+ devs = json.loads(stdout)['dev']
+ dev = list(devs.keys())[0]
+
+ cmd = "devlink dev show %s" % dev
+ stdout, stderr = run_command(cmd)
+ if stderr != "":
+ print("devlink device %s can not be found" % dev)
+ sys.exit(1)
+
+ ports = devlink_ports(dev)
+
+ for port in ports.if_names:
+ max_lanes = get_max_lanes(port.name)
+
+ # If max lanes is 0, do not test port splitting at all
+ if max_lanes == 0:
+ continue
+
+ # If 1 lane, shouldn't be able to split
+ elif max_lanes == 1:
+ test(not get_split_ability(port),
+ "%s should not be able to split" % port.name)
+ split_unsplittable_port(port, max_lanes)
+
+ # Else, splitting should pass and all the split ports should exist.
+ else:
+ lane = max_lanes
+ test(get_split_ability(port),
+ "%s should be able to split" % port.name)
+ while lane > 1:
+ split_splittable_port(port, lane, max_lanes, dev)
+
+ lane //= 2
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh
index f0e6be4c09e9..75fe24bcb9cd 100644
--- a/tools/testing/selftests/net/forwarding/devlink_lib.sh
+++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh
@@ -98,6 +98,11 @@ devlink_resource_size_set()
check_err $? "Failed setting path $path to size $size"
}
+devlink_resource_occ_get()
+{
+ devlink_resource_get "$@" | jq '.["occ"]'
+}
+
devlink_reload()
{
local still_pending
diff --git a/tools/testing/selftests/net/forwarding/ethtool.sh b/tools/testing/selftests/net/forwarding/ethtool.sh
index 43a948feed26..dbb9fcf759e0 100755
--- a/tools/testing/selftests/net/forwarding/ethtool.sh
+++ b/tools/testing/selftests/net/forwarding/ethtool.sh
@@ -50,23 +50,6 @@ cleanup()
h1_destroy
}
-different_speeds_get()
-{
- local dev1=$1; shift
- local dev2=$1; shift
- local with_mode=$1; shift
- local adver=$1; shift
-
- local -a speeds_arr
-
- speeds_arr=($(common_speeds_get $dev1 $dev2 $with_mode $adver))
- if [[ ${#speeds_arr[@]} < 2 ]]; then
- check_err 1 "cannot check different speeds. There are not enough speeds"
- fi
-
- echo ${speeds_arr[0]} ${speeds_arr[1]}
-}
-
same_speeds_autoneg_off()
{
# Check that when each of the reported speeds is forced, the links come
diff --git a/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh b/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh
new file mode 100755
index 000000000000..4b42dfd4efd1
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh
@@ -0,0 +1,102 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+ autoneg
+ autoneg_force_mode
+ no_cable
+"
+
+NUM_NETIFS=2
+source lib.sh
+source ethtool_lib.sh
+
+setup_prepare()
+{
+ swp1=${NETIFS[p1]}
+ swp2=${NETIFS[p2]}
+ swp3=$NETIF_NO_CABLE
+}
+
+ethtool_extended_state_check()
+{
+ local dev=$1; shift
+ local expected_ext_state=$1; shift
+ local expected_ext_substate=${1:-""}; shift
+
+ local ext_state=$(ethtool $dev | grep "Link detected" \
+ | cut -d "(" -f2 | cut -d ")" -f1)
+ local ext_substate=$(echo $ext_state | cut -sd "," -f2 \
+ | sed -e 's/^[[:space:]]*//')
+ ext_state=$(echo $ext_state | cut -d "," -f1)
+
+ [[ $ext_state == $expected_ext_state ]]
+ check_err $? "Expected \"$expected_ext_state\", got \"$ext_state\""
+
+ [[ $ext_substate == $expected_ext_substate ]]
+ check_err $? "Expected \"$expected_ext_substate\", got \"$ext_substate\""
+}
+
+autoneg()
+{
+ RET=0
+
+ ip link set dev $swp1 up
+
+ sleep 4
+ ethtool_extended_state_check $swp1 "Autoneg" "No partner detected"
+
+ log_test "Autoneg, No partner detected"
+
+ ip link set dev $swp1 down
+}
+
+autoneg_force_mode()
+{
+ RET=0
+
+ ip link set dev $swp1 up
+ ip link set dev $swp2 up
+
+ local -a speeds_arr=($(different_speeds_get $swp1 $swp2 0 0))
+ local speed1=${speeds_arr[0]}
+ local speed2=${speeds_arr[1]}
+
+ ethtool_set $swp1 speed $speed1 autoneg off
+ ethtool_set $swp2 speed $speed2 autoneg off
+
+ sleep 4
+ ethtool_extended_state_check $swp1 "Autoneg" \
+ "No partner detected during force mode"
+
+ ethtool_extended_state_check $swp2 "Autoneg" \
+ "No partner detected during force mode"
+
+ log_test "Autoneg, No partner detected during force mode"
+
+ ethtool -s $swp2 autoneg on
+ ethtool -s $swp1 autoneg on
+
+ ip link set dev $swp2 down
+ ip link set dev $swp1 down
+}
+
+no_cable()
+{
+ RET=0
+
+ ip link set dev $swp3 up
+
+ sleep 1
+ ethtool_extended_state_check $swp3 "No cable"
+
+ log_test "No cable"
+
+ ip link set dev $swp3 down
+}
+
+setup_prepare
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ethtool_lib.sh b/tools/testing/selftests/net/forwarding/ethtool_lib.sh
index 925d229a59d8..9188e624dec0 100644
--- a/tools/testing/selftests/net/forwarding/ethtool_lib.sh
+++ b/tools/testing/selftests/net/forwarding/ethtool_lib.sh
@@ -67,3 +67,20 @@ common_speeds_get()
<(printf '%s\n' "${dev1_speeds[@]}" | sort -u) \
<(printf '%s\n' "${dev2_speeds[@]}" | sort -u)
}
+
+different_speeds_get()
+{
+ local dev1=$1; shift
+ local dev2=$1; shift
+ local with_mode=$1; shift
+ local adver=$1; shift
+
+ local -a speeds_arr
+
+ speeds_arr=($(common_speeds_get $dev1 $dev2 $with_mode $adver))
+ if [[ ${#speeds_arr[@]} < 2 ]]; then
+ check_err 1 "cannot check different speeds. There are not enough speeds"
+ fi
+
+ echo ${speeds_arr[0]} ${speeds_arr[1]}
+}
diff --git a/tools/testing/selftests/net/forwarding/forwarding.config.sample b/tools/testing/selftests/net/forwarding/forwarding.config.sample
index e2adb533c8fc..b802c14d2950 100644
--- a/tools/testing/selftests/net/forwarding/forwarding.config.sample
+++ b/tools/testing/selftests/net/forwarding/forwarding.config.sample
@@ -14,6 +14,9 @@ NETIFS[p6]=veth5
NETIFS[p7]=veth6
NETIFS[p8]=veth7
+# Port that does not have a cable connected.
+NETIF_NO_CABLE=eth8
+
##############################################################################
# Defines
diff --git a/tools/testing/selftests/net/forwarding/pedit_l4port.sh b/tools/testing/selftests/net/forwarding/pedit_l4port.sh
new file mode 100755
index 000000000000..5f20d289ee43
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/pedit_l4port.sh
@@ -0,0 +1,198 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test sends traffic from H1 to H2. Either on ingress of $swp1, or on egress of $swp2, the
+# traffic is acted upon by a pedit action. An ingress filter installed on $h2 verifies that the
+# packet looks like expected.
+#
+# +----------------------+ +----------------------+
+# | H1 | | H2 |
+# | + $h1 | | $h2 + |
+# | | 192.0.2.1/28 | | 192.0.2.2/28 | |
+# +----|-----------------+ +----------------|-----+
+# | |
+# +----|----------------------------------------------------------------|-----+
+# | SW | | |
+# | +-|----------------------------------------------------------------|-+ |
+# | | + $swp1 BR $swp2 + | |
+# | +--------------------------------------------------------------------+ |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+ ping_ipv4
+ test_udp_sport
+ test_udp_dport
+ test_tcp_sport
+ test_tcp_dport
+"
+
+NUM_NETIFS=4
+source lib.sh
+source tc_common.sh
+
+: ${HIT_TIMEOUT:=2000} # ms
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/28 2001:db8:1::2/64
+ tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+ tc qdisc del dev $h2 clsact
+ simple_if_fini $h2 192.0.2.2/28 2001:db8:1::2/64
+}
+
+switch_create()
+{
+ ip link add name br1 up type bridge vlan_filtering 1
+ ip link set dev $swp1 master br1
+ ip link set dev $swp1 up
+ ip link set dev $swp2 master br1
+ ip link set dev $swp2 up
+
+ tc qdisc add dev $swp1 clsact
+ tc qdisc add dev $swp2 clsact
+}
+
+switch_destroy()
+{
+ tc qdisc del dev $swp2 clsact
+ tc qdisc del dev $swp1 clsact
+
+ ip link set dev $swp2 nomaster
+ ip link set dev $swp1 nomaster
+ ip link del dev br1
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ h2mac=$(mac_get $h2)
+
+ vrf_prepare
+ h1_create
+ h2_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 192.0.2.2
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:1::2
+}
+
+do_test_pedit_l4port_one()
+{
+ local pedit_locus=$1; shift
+ local pedit_prot=$1; shift
+ local pedit_action=$1; shift
+ local match_prot=$1; shift
+ local match_flower=$1; shift
+ local mz_flags=$1; shift
+ local saddr=$1; shift
+ local daddr=$1; shift
+
+ tc filter add $pedit_locus handle 101 pref 1 \
+ flower action pedit ex munge $pedit_action
+ tc filter add dev $h2 ingress handle 101 pref 1 prot $match_prot \
+ flower skip_hw $match_flower action pass
+
+ RET=0
+
+ $MZ $mz_flags $h1 -c 10 -d 20msec -p 100 \
+ -a own -b $h2mac -q -t $pedit_prot sp=54321,dp=12345
+
+ local pkts
+ pkts=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= 10" \
+ tc_rule_handle_stats_get "dev $h2 ingress" 101)
+ check_err $? "Expected to get 10 packets, but got $pkts."
+
+ pkts=$(tc_rule_handle_stats_get "$pedit_locus" 101)
+ ((pkts >= 10))
+ check_err $? "Expected to get 10 packets on pedit rule, but got $pkts."
+
+ log_test "$pedit_locus pedit $pedit_action"
+
+ tc filter del dev $h2 ingress pref 1
+ tc filter del $pedit_locus pref 1
+}
+
+do_test_pedit_l4port()
+{
+ local locus=$1; shift
+ local prot=$1; shift
+ local pedit_port=$1; shift
+ local flower_port=$1; shift
+ local port
+
+ for port in 1 11111 65535; do
+ do_test_pedit_l4port_one "$locus" "$prot" \
+ "$prot $pedit_port set $port" \
+ ip "ip_proto $prot $flower_port $port" \
+ "-A 192.0.2.1 -B 192.0.2.2"
+ done
+}
+
+test_udp_sport()
+{
+ do_test_pedit_l4port "dev $swp1 ingress" udp sport src_port
+ do_test_pedit_l4port "dev $swp2 egress" udp sport src_port
+}
+
+test_udp_dport()
+{
+ do_test_pedit_l4port "dev $swp1 ingress" udp dport dst_port
+ do_test_pedit_l4port "dev $swp2 egress" udp dport dst_port
+}
+
+test_tcp_sport()
+{
+ do_test_pedit_l4port "dev $swp1 ingress" tcp sport src_port
+ do_test_pedit_l4port "dev $swp2 egress" tcp sport src_port
+}
+
+test_tcp_dport()
+{
+ do_test_pedit_l4port "dev $swp1 ingress" tcp dport dst_port
+ do_test_pedit_l4port "dev $swp2 egress" tcp dport dst_port
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/sch_red.sh b/tools/testing/selftests/net/forwarding/sch_red.sh
new file mode 100755
index 000000000000..e714bae473fb
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/sch_red.sh
@@ -0,0 +1,492 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# This test sends one stream of traffic from H1 through a TBF shaper, to a RED
+# within TBF shaper on $swp3. The two shapers have the same configuration, and
+# thus the resulting stream should fill all available bandwidth on the latter
+# shaper. A second stream is sent from H2 also via $swp3, and used to inject
+# additional traffic. Since all available bandwidth is taken, this traffic has
+# to go to backlog.
+#
+# +--------------------------+ +--------------------------+
+# | H1 | | H2 |
+# | + $h1 | | + $h2 |
+# | | 192.0.2.1/28 | | | 192.0.2.2/28 |
+# | | TBF 10Mbps | | | |
+# +-----|--------------------+ +-----|--------------------+
+# | |
+# +-----|------------------------------------------------|--------------------+
+# | SW | | |
+# | +--|------------------------------------------------|----------------+ |
+# | | + $swp1 + $swp2 | |
+# | | BR | |
+# | | | |
+# | | + $swp3 | |
+# | | | TBF 10Mbps / RED | |
+# | +--------------------------------|-----------------------------------+ |
+# | | |
+# +-----------------------------------|---------------------------------------+
+# |
+# +-----|--------------------+
+# | H3 | |
+# | + $h1 |
+# | 192.0.2.3/28 |
+# | |
+# +--------------------------+
+
+ALL_TESTS="
+ ping_ipv4
+ ecn_test
+ ecn_nodrop_test
+ red_test
+ red_qevent_test
+ ecn_qevent_test
+"
+
+NUM_NETIFS=6
+CHECK_TC="yes"
+source lib.sh
+
+BACKLOG=30000
+PKTSZ=1400
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/28
+ mtu_set $h1 10000
+ tc qdisc replace dev $h1 root handle 1: tbf \
+ rate 10Mbit burst 10K limit 1M
+}
+
+h1_destroy()
+{
+ tc qdisc del dev $h1 root
+ mtu_restore $h1
+ simple_if_fini $h1 192.0.2.1/28
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/28
+ mtu_set $h2 10000
+}
+
+h2_destroy()
+{
+ mtu_restore $h2
+ simple_if_fini $h2 192.0.2.2/28
+}
+
+h3_create()
+{
+ simple_if_init $h3 192.0.2.3/28
+ mtu_set $h3 10000
+}
+
+h3_destroy()
+{
+ mtu_restore $h3
+ simple_if_fini $h3 192.0.2.3/28
+}
+
+switch_create()
+{
+ ip link add dev br up type bridge
+ ip link set dev $swp1 up master br
+ ip link set dev $swp2 up master br
+ ip link set dev $swp3 up master br
+
+ mtu_set $swp1 10000
+ mtu_set $swp2 10000
+ mtu_set $swp3 10000
+
+ tc qdisc replace dev $swp3 root handle 1: tbf \
+ rate 10Mbit burst 10K limit 1M
+ ip link add name _drop_test up type dummy
+}
+
+switch_destroy()
+{
+ ip link del dev _drop_test
+ tc qdisc del dev $swp3 root
+
+ mtu_restore $h3
+ mtu_restore $h2
+ mtu_restore $h1
+
+ ip link set dev $swp3 down nomaster
+ ip link set dev $swp2 down nomaster
+ ip link set dev $swp1 down nomaster
+ ip link del dev br
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ h2=${NETIFS[p3]}
+ swp2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ h3_mac=$(mac_get $h3)
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+ h3_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h3_destroy
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 192.0.2.3 " from host 1"
+ ping_test $h2 192.0.2.3 " from host 2"
+}
+
+get_qdisc_backlog()
+{
+ qdisc_stats_get $swp3 11: .backlog
+}
+
+get_nmarked()
+{
+ qdisc_stats_get $swp3 11: .marked
+}
+
+get_qdisc_npackets()
+{
+ qdisc_stats_get $swp3 11: .packets
+}
+
+get_nmirrored()
+{
+ link_stats_get _drop_test tx packets
+}
+
+send_packets()
+{
+ local proto=$1; shift
+ local pkts=$1; shift
+
+ $MZ $h2 -p $PKTSZ -a own -b $h3_mac -A 192.0.2.2 -B 192.0.2.3 -t $proto -q -c $pkts "$@"
+}
+
+# This sends traffic in an attempt to build a backlog of $size. Returns 0 on
+# success. After 10 failed attempts it bails out and returns 1. It dumps the
+# backlog size to stdout.
+build_backlog()
+{
+ local size=$1; shift
+ local proto=$1; shift
+
+ local i=0
+
+ while :; do
+ local cur=$(get_qdisc_backlog)
+ local diff=$((size - cur))
+ local pkts=$(((diff + PKTSZ - 1) / PKTSZ))
+
+ if ((cur >= size)); then
+ echo $cur
+ return 0
+ elif ((i++ > 10)); then
+ echo $cur
+ return 1
+ fi
+
+ send_packets $proto $pkts "$@"
+ sleep 1
+ done
+}
+
+check_marking()
+{
+ local cond=$1; shift
+
+ local npackets_0=$(get_qdisc_npackets)
+ local nmarked_0=$(get_nmarked)
+ sleep 5
+ local npackets_1=$(get_qdisc_npackets)
+ local nmarked_1=$(get_nmarked)
+
+ local nmarked_d=$((nmarked_1 - nmarked_0))
+ local npackets_d=$((npackets_1 - npackets_0))
+ local pct=$((100 * nmarked_d / npackets_d))
+
+ echo $pct
+ ((pct $cond))
+}
+
+check_mirroring()
+{
+ local cond=$1; shift
+
+ local npackets_0=$(get_qdisc_npackets)
+ local nmirrored_0=$(get_nmirrored)
+ sleep 5
+ local npackets_1=$(get_qdisc_npackets)
+ local nmirrored_1=$(get_nmirrored)
+
+ local nmirrored_d=$((nmirrored_1 - nmirrored_0))
+ local npackets_d=$((npackets_1 - npackets_0))
+ local pct=$((100 * nmirrored_d / npackets_d))
+
+ echo $pct
+ ((pct $cond))
+}
+
+ecn_test_common()
+{
+ local name=$1; shift
+ local limit=$1; shift
+ local backlog
+ local pct
+
+ # Build the below-the-limit backlog using UDP. We could use TCP just
+ # fine, but this way we get a proof that UDP is accepted when queue
+ # length is below the limit. The main stream is using TCP, and if the
+ # limit is misconfigured, we would see this traffic being ECN marked.
+ RET=0
+ backlog=$(build_backlog $((2 * limit / 3)) udp)
+ check_err $? "Could not build the requested backlog"
+ pct=$(check_marking "== 0")
+ check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
+ log_test "$name backlog < limit"
+
+ # Now push TCP, because non-TCP traffic would be early-dropped after the
+ # backlog crosses the limit, and we want to make sure that the backlog
+ # is above the limit.
+ RET=0
+ backlog=$(build_backlog $((3 * limit / 2)) tcp tos=0x01)
+ check_err $? "Could not build the requested backlog"
+ pct=$(check_marking ">= 95")
+ check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected >= 95."
+ log_test "$name backlog > limit"
+}
+
+do_ecn_test()
+{
+ local limit=$1; shift
+ local name=ECN
+
+ $MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \
+ -a own -b $h3_mac -t tcp -q tos=0x01 &
+ sleep 1
+
+ ecn_test_common "$name" $limit
+
+ # Up there we saw that UDP gets accepted when backlog is below the
+ # limit. Now that it is above, it should all get dropped, and backlog
+ # building should fail.
+ RET=0
+ build_backlog $((2 * limit)) udp >/dev/null
+ check_fail $? "UDP traffic went into backlog instead of being early-dropped"
+ log_test "$name backlog > limit: UDP early-dropped"
+
+ stop_traffic
+ sleep 1
+}
+
+do_ecn_nodrop_test()
+{
+ local limit=$1; shift
+ local name="ECN nodrop"
+
+ $MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \
+ -a own -b $h3_mac -t tcp -q tos=0x01 &
+ sleep 1
+
+ ecn_test_common "$name" $limit
+
+ # Up there we saw that UDP gets accepted when backlog is below the
+ # limit. Now that it is above, in nodrop mode, make sure it goes to
+ # backlog as well.
+ RET=0
+ build_backlog $((2 * limit)) udp >/dev/null
+ check_err $? "UDP traffic was early-dropped instead of getting into backlog"
+ log_test "$name backlog > limit: UDP not dropped"
+
+ stop_traffic
+ sleep 1
+}
+
+do_red_test()
+{
+ local limit=$1; shift
+ local backlog
+ local pct
+
+ # Use ECN-capable TCP to verify there's no marking even though the queue
+ # is above limit.
+ $MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \
+ -a own -b $h3_mac -t tcp -q tos=0x01 &
+
+ # Pushing below the queue limit should work.
+ RET=0
+ backlog=$(build_backlog $((2 * limit / 3)) tcp tos=0x01)
+ check_err $? "Could not build the requested backlog"
+ pct=$(check_marking "== 0")
+ check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
+ log_test "RED backlog < limit"
+
+ # Pushing above should not.
+ RET=0
+ backlog=$(build_backlog $((3 * limit / 2)) tcp tos=0x01)
+ check_fail $? "Traffic went into backlog instead of being early-dropped"
+ pct=$(check_marking "== 0")
+ check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
+ log_test "RED backlog > limit"
+
+ stop_traffic
+ sleep 1
+}
+
+do_red_qevent_test()
+{
+ local limit=$1; shift
+ local backlog
+ local base
+ local now
+ local pct
+
+ RET=0
+
+ $MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \
+ -a own -b $h3_mac -t udp -q &
+ sleep 1
+
+ tc filter add block 10 pref 1234 handle 102 matchall skip_hw \
+ action mirred egress mirror dev _drop_test
+
+ # Push to the queue until it's at the limit. The configured limit is
+ # rounded by the qdisc, so this is the best we can do to get to the real
+ # limit.
+ build_backlog $((3 * limit / 2)) udp >/dev/null
+
+ base=$(get_nmirrored)
+ send_packets udp 100
+ sleep 1
+ now=$(get_nmirrored)
+ ((now >= base + 100))
+ check_err $? "Dropped packets not observed: 100 expected, $((now - base)) seen"
+
+ tc filter del block 10 pref 1234 handle 102 matchall
+
+ base=$(get_nmirrored)
+ send_packets udp 100
+ sleep 1
+ now=$(get_nmirrored)
+ ((now == base))
+ check_err $? "Dropped packets still observed: 0 expected, $((now - base)) seen"
+
+ log_test "RED early_dropped packets mirrored"
+
+ stop_traffic
+ sleep 1
+}
+
+do_ecn_qevent_test()
+{
+ local limit=$1; shift
+ local name=ECN
+
+ RET=0
+
+ $MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \
+ -a own -b $h3_mac -t tcp -q tos=0x01 &
+ sleep 1
+
+ tc filter add block 10 pref 1234 handle 102 matchall skip_hw \
+ action mirred egress mirror dev _drop_test
+
+ backlog=$(build_backlog $((2 * limit / 3)) tcp tos=0x01)
+ check_err $? "Could not build the requested backlog"
+ pct=$(check_mirroring "== 0")
+ check_err $? "backlog $backlog / $limit Got $pct% mirrored packets, expected == 0."
+
+ backlog=$(build_backlog $((3 * limit / 2)) tcp tos=0x01)
+ check_err $? "Could not build the requested backlog"
+ pct=$(check_mirroring ">= 95")
+ check_err $? "backlog $backlog / $limit Got $pct% mirrored packets, expected >= 95."
+
+ tc filter del block 10 pref 1234 handle 102 matchall
+
+ log_test "ECN marked packets mirrored"
+
+ stop_traffic
+ sleep 1
+}
+
+install_qdisc()
+{
+ local -a args=("$@")
+
+ tc qdisc replace dev $swp3 parent 1:1 handle 11: red \
+ limit 1M avpkt $PKTSZ probability 1 \
+ min $BACKLOG max $((BACKLOG + 1)) burst 38 "${args[@]}"
+ sleep 1
+}
+
+uninstall_qdisc()
+{
+ tc qdisc del dev $swp3 parent 1:1
+}
+
+ecn_test()
+{
+ install_qdisc ecn
+ do_ecn_test $BACKLOG
+ uninstall_qdisc
+}
+
+ecn_nodrop_test()
+{
+ install_qdisc ecn nodrop
+ do_ecn_nodrop_test $BACKLOG
+ uninstall_qdisc
+}
+
+red_test()
+{
+ install_qdisc
+ do_red_test $BACKLOG
+ uninstall_qdisc
+}
+
+red_qevent_test()
+{
+ install_qdisc qevent early_drop block 10
+ do_red_qevent_test $BACKLOG
+ uninstall_qdisc
+}
+
+ecn_qevent_test()
+{
+ install_qdisc ecn qevent mark block 10
+ do_ecn_qevent_test $BACKLOG
+ uninstall_qdisc
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_police.sh b/tools/testing/selftests/net/forwarding/tc_police.sh
new file mode 100755
index 000000000000..160f9cccdfb7
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_police.sh
@@ -0,0 +1,333 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test tc-police action.
+#
+# +---------------------------------+
+# | H1 (vrf) |
+# | + $h1 |
+# | | 192.0.2.1/24 |
+# | | |
+# | | default via 192.0.2.2 |
+# +----|----------------------------+
+# |
+# +----|----------------------------------------------------------------------+
+# | SW | |
+# | + $rp1 |
+# | 192.0.2.2/24 |
+# | |
+# | 198.51.100.2/24 203.0.113.2/24 |
+# | + $rp2 + $rp3 |
+# | | | |
+# +----|-----------------------------------------|----------------------------+
+# | |
+# +----|----------------------------+ +----|----------------------------+
+# | | default via 198.51.100.2 | | | default via 203.0.113.2 |
+# | | | | | |
+# | | 198.51.100.1/24 | | | 203.0.113.1/24 |
+# | + $h2 | | + $h3 |
+# | H2 (vrf) | | H3 (vrf) |
+# +---------------------------------+ +---------------------------------+
+
+ALL_TESTS="
+ police_rx_test
+ police_tx_test
+ police_shared_test
+ police_rx_mirror_test
+ police_tx_mirror_test
+"
+NUM_NETIFS=6
+source tc_common.sh
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24
+
+ ip -4 route add default vrf v$h1 nexthop via 192.0.2.2
+}
+
+h1_destroy()
+{
+ ip -4 route del default vrf v$h1 nexthop via 192.0.2.2
+
+ simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+ simple_if_init $h2 198.51.100.1/24
+
+ ip -4 route add default vrf v$h2 nexthop via 198.51.100.2
+
+ tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+ tc qdisc del dev $h2 clsact
+
+ ip -4 route del default vrf v$h2 nexthop via 198.51.100.2
+
+ simple_if_fini $h2 198.51.100.1/24
+}
+
+h3_create()
+{
+ simple_if_init $h3 203.0.113.1/24
+
+ ip -4 route add default vrf v$h3 nexthop via 203.0.113.2
+
+ tc qdisc add dev $h3 clsact
+}
+
+h3_destroy()
+{
+ tc qdisc del dev $h3 clsact
+
+ ip -4 route del default vrf v$h3 nexthop via 203.0.113.2
+
+ simple_if_fini $h3 203.0.113.1/24
+}
+
+router_create()
+{
+ ip link set dev $rp1 up
+ ip link set dev $rp2 up
+ ip link set dev $rp3 up
+
+ __addr_add_del $rp1 add 192.0.2.2/24
+ __addr_add_del $rp2 add 198.51.100.2/24
+ __addr_add_del $rp3 add 203.0.113.2/24
+
+ tc qdisc add dev $rp1 clsact
+ tc qdisc add dev $rp2 clsact
+}
+
+router_destroy()
+{
+ tc qdisc del dev $rp2 clsact
+ tc qdisc del dev $rp1 clsact
+
+ __addr_add_del $rp3 del 203.0.113.2/24
+ __addr_add_del $rp2 del 198.51.100.2/24
+ __addr_add_del $rp1 del 192.0.2.2/24
+
+ ip link set dev $rp3 down
+ ip link set dev $rp2 down
+ ip link set dev $rp1 down
+}
+
+police_common_test()
+{
+ local test_name=$1; shift
+
+ RET=0
+
+ # Rule to measure bandwidth on ingress of $h2
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
+ action drop
+
+ mausezahn $h1 -a own -b $(mac_get $rp1) -A 192.0.2.1 -B 198.51.100.1 \
+ -t udp sp=12345,dp=54321 -p 1000 -c 0 -q &
+
+ local t0=$(tc_rule_stats_get $h2 1 ingress .bytes)
+ sleep 10
+ local t1=$(tc_rule_stats_get $h2 1 ingress .bytes)
+
+ local er=$((80 * 1000 * 1000))
+ local nr=$(rate $t0 $t1 10)
+ local nr_pct=$((100 * (nr - er) / er))
+ ((-10 <= nr_pct && nr_pct <= 10))
+ check_err $? "Expected rate $(humanize $er), got $(humanize $nr), which is $nr_pct% off. Required accuracy is +-10%."
+
+ log_test "$test_name"
+
+ { kill %% && wait %%; } 2>/dev/null
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+}
+
+police_rx_test()
+{
+ # Rule to police traffic destined to $h2 on ingress of $rp1
+ tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \
+ dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
+ action police rate 80mbit burst 16k conform-exceed drop/ok
+
+ police_common_test "police on rx"
+
+ tc filter del dev $rp1 ingress protocol ip pref 1 handle 101 flower
+}
+
+police_tx_test()
+{
+ # Rule to police traffic destined to $h2 on egress of $rp2
+ tc filter add dev $rp2 egress protocol ip pref 1 handle 101 flower \
+ dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
+ action police rate 80mbit burst 16k conform-exceed drop/ok
+
+ police_common_test "police on tx"
+
+ tc filter del dev $rp2 egress protocol ip pref 1 handle 101 flower
+}
+
+police_shared_common_test()
+{
+ local dport=$1; shift
+ local test_name=$1; shift
+
+ RET=0
+
+ mausezahn $h1 -a own -b $(mac_get $rp1) -A 192.0.2.1 -B 198.51.100.1 \
+ -t udp sp=12345,dp=$dport -p 1000 -c 0 -q &
+
+ local t0=$(tc_rule_stats_get $h2 1 ingress .bytes)
+ sleep 10
+ local t1=$(tc_rule_stats_get $h2 1 ingress .bytes)
+
+ local er=$((80 * 1000 * 1000))
+ local nr=$(rate $t0 $t1 10)
+ local nr_pct=$((100 * (nr - er) / er))
+ ((-10 <= nr_pct && nr_pct <= 10))
+ check_err $? "Expected rate $(humanize $er), got $(humanize $nr), which is $nr_pct% off. Required accuracy is +-10%."
+
+ log_test "$test_name"
+
+ { kill %% && wait %%; } 2>/dev/null
+}
+
+police_shared_test()
+{
+ # Rule to measure bandwidth on ingress of $h2
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ dst_ip 198.51.100.1 ip_proto udp src_port 12345 \
+ action drop
+
+ # Rule to police traffic destined to $h2 on ingress of $rp1
+ tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \
+ dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
+ action police rate 80mbit burst 16k conform-exceed drop/ok \
+ index 10
+
+ # Rule to police a different flow destined to $h2 on egress of $rp2
+ # using same policer
+ tc filter add dev $rp2 egress protocol ip pref 1 handle 101 flower \
+ dst_ip 198.51.100.1 ip_proto udp dst_port 22222 \
+ action police index 10
+
+ police_shared_common_test 54321 "police with shared policer - rx"
+
+ police_shared_common_test 22222 "police with shared policer - tx"
+
+ tc filter del dev $rp2 egress protocol ip pref 1 handle 101 flower
+ tc filter del dev $rp1 ingress protocol ip pref 1 handle 101 flower
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+}
+
+police_mirror_common_test()
+{
+ local pol_if=$1; shift
+ local dir=$1; shift
+ local test_name=$1; shift
+
+ RET=0
+
+ # Rule to measure bandwidth on ingress of $h2
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
+ action drop
+
+ # Rule to measure bandwidth of mirrored traffic on ingress of $h3
+ tc filter add dev $h3 ingress protocol ip pref 1 handle 101 flower \
+ dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
+ action drop
+
+ # Rule to police traffic destined to $h2 and mirror to $h3
+ tc filter add dev $pol_if $dir protocol ip pref 1 handle 101 flower \
+ dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
+ action police rate 80mbit burst 16k conform-exceed drop/pipe \
+ action mirred egress mirror dev $rp3
+
+ mausezahn $h1 -a own -b $(mac_get $rp1) -A 192.0.2.1 -B 198.51.100.1 \
+ -t udp sp=12345,dp=54321 -p 1000 -c 0 -q &
+
+ local t0=$(tc_rule_stats_get $h2 1 ingress .bytes)
+ sleep 10
+ local t1=$(tc_rule_stats_get $h2 1 ingress .bytes)
+
+ local er=$((80 * 1000 * 1000))
+ local nr=$(rate $t0 $t1 10)
+ local nr_pct=$((100 * (nr - er) / er))
+ ((-10 <= nr_pct && nr_pct <= 10))
+ check_err $? "Expected rate $(humanize $er), got $(humanize $nr), which is $nr_pct% off. Required accuracy is +-10%."
+
+ local t0=$(tc_rule_stats_get $h3 1 ingress .bytes)
+ sleep 10
+ local t1=$(tc_rule_stats_get $h3 1 ingress .bytes)
+
+ local er=$((80 * 1000 * 1000))
+ local nr=$(rate $t0 $t1 10)
+ local nr_pct=$((100 * (nr - er) / er))
+ ((-10 <= nr_pct && nr_pct <= 10))
+ check_err $? "Expected rate $(humanize $er), got $(humanize $nr), which is $nr_pct% off. Required accuracy is +-10%."
+
+ log_test "$test_name"
+
+ { kill %% && wait %%; } 2>/dev/null
+ tc filter del dev $pol_if $dir protocol ip pref 1 handle 101 flower
+ tc filter del dev $h3 ingress protocol ip pref 1 handle 101 flower
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+}
+
+police_rx_mirror_test()
+{
+ police_mirror_common_test $rp1 ingress "police rx and mirror"
+}
+
+police_tx_mirror_test()
+{
+ police_mirror_common_test $rp2 egress "police tx and mirror"
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ rp1=${NETIFS[p2]}
+
+ rp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ rp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+ forwarding_enable
+
+ h1_create
+ h2_create
+ h3_create
+ router_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ router_destroy
+ h3_destroy
+ h2_destroy
+ h1_destroy
+
+ forwarding_restore
+ vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile
index f50976ee7d44..aa254aefc2c3 100644
--- a/tools/testing/selftests/net/mptcp/Makefile
+++ b/tools/testing/selftests/net/mptcp/Makefile
@@ -5,7 +5,7 @@ KSFT_KHDR_INSTALL := 1
CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include
-TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh
+TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh
TEST_GEN_FILES = mptcp_connect pm_nl_ctl
diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config
index 2499824d9e1c..8df5cb8f71ff 100644
--- a/tools/testing/selftests/net/mptcp/config
+++ b/tools/testing/selftests/net/mptcp/config
@@ -1,4 +1,6 @@
CONFIG_MPTCP=y
CONFIG_MPTCP_IPV6=y
+CONFIG_INET_DIAG=m
+CONFIG_INET_MPTCP_DIAG=m
CONFIG_VETH=y
CONFIG_NET_SCH_NETEM=m
diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh
new file mode 100755
index 000000000000..39edce4f541c
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/diag.sh
@@ -0,0 +1,121 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
+ns="ns1-$rndh"
+ksft_skip=4
+test_cnt=1
+ret=0
+pids=()
+
+flush_pids()
+{
+ # mptcp_connect in join mode will sleep a bit before completing,
+ # give it some time
+ sleep 1.1
+
+ for pid in ${pids[@]}; do
+ [ -d /proc/$pid ] && kill -SIGUSR1 $pid >/dev/null 2>&1
+ done
+ pids=()
+}
+
+cleanup()
+{
+ ip netns del $ns
+ for pid in ${pids[@]}; do
+ [ -d /proc/$pid ] && kill -9 $pid >/dev/null 2>&1
+ done
+}
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+ss -h | grep -q MPTCP
+if [ $? -ne 0 ];then
+ echo "SKIP: ss tool does not support MPTCP"
+ exit $ksft_skip
+fi
+
+__chk_nr()
+{
+ local condition="$1"
+ local expected=$2
+ local msg nr
+
+ shift 2
+ msg=$*
+ nr=$(ss -inmHMN $ns | $condition)
+
+ printf "%-50s" "$msg"
+ if [ $nr != $expected ]; then
+ echo "[ fail ] expected $expected found $nr"
+ ret=$test_cnt
+ else
+ echo "[ ok ]"
+ fi
+ test_cnt=$((test_cnt+1))
+}
+
+chk_msk_nr()
+{
+ __chk_nr "grep -c token:" $*
+}
+
+chk_msk_fallback_nr()
+{
+ __chk_nr "grep -c fallback" $*
+}
+
+chk_msk_remote_key_nr()
+{
+ __chk_nr "grep -c remote_key" $*
+}
+
+
+trap cleanup EXIT
+ip netns add $ns
+ip -n $ns link set dev lo up
+
+echo "a" | ip netns exec $ns ./mptcp_connect -p 10000 -l 0.0.0.0 -t 100 >/dev/null &
+sleep 0.1
+pids[0]=$!
+chk_msk_nr 0 "no msk on netns creation"
+
+echo "b" | ip netns exec $ns ./mptcp_connect -p 10000 127.0.0.1 -j -t 100 >/dev/null &
+sleep 0.1
+pids[1]=$!
+chk_msk_nr 2 "after MPC handshake "
+chk_msk_remote_key_nr 2 "....chk remote_key"
+chk_msk_fallback_nr 0 "....chk no fallback"
+flush_pids
+
+
+echo "a" | ip netns exec $ns ./mptcp_connect -p 10001 -s TCP -l 0.0.0.0 -t 100 >/dev/null &
+pids[0]=$!
+sleep 0.1
+echo "b" | ip netns exec $ns ./mptcp_connect -p 10001 127.0.0.1 -j -t 100 >/dev/null &
+pids[1]=$!
+sleep 0.1
+chk_msk_fallback_nr 1 "check fallback"
+flush_pids
+
+NR_CLIENTS=100
+for I in `seq 1 $NR_CLIENTS`; do
+ echo "a" | ip netns exec $ns ./mptcp_connect -p $((I+10001)) -l 0.0.0.0 -t 100 -w 10 >/dev/null &
+ pids[$((I*2))]=$!
+done
+sleep 0.1
+
+for I in `seq 1 $NR_CLIENTS`; do
+ echo "b" | ip netns exec $ns ./mptcp_connect -p $((I+10001)) 127.0.0.1 -t 100 -w 10 >/dev/null &
+ pids[$((I*2 + 1))]=$!
+done
+sleep 1.5
+
+chk_msk_nr $((NR_CLIENTS*2)) "many msk socket present"
+flush_pids
+
+exit $ret
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index cedee5b952ba..090620c3e10c 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -11,6 +11,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <strings.h>
+#include <signal.h>
#include <unistd.h>
#include <sys/poll.h>
@@ -36,6 +37,7 @@ extern int optind;
static int poll_timeout = 10 * 1000;
static bool listen_mode;
+static bool quit;
enum cfg_mode {
CFG_MODE_POLL,
@@ -52,11 +54,12 @@ static int pf = AF_INET;
static int cfg_sndbuf;
static int cfg_rcvbuf;
static bool cfg_join;
+static int cfg_wait;
static void die_usage(void)
{
fprintf(stderr, "Usage: mptcp_connect [-6] [-u] [-s MPTCP|TCP] [-p port] [-m mode]"
- "[-l] connect_address\n");
+ "[-l] [-w sec] connect_address\n");
fprintf(stderr, "\t-6 use ipv6\n");
fprintf(stderr, "\t-t num -- set poll timeout to num\n");
fprintf(stderr, "\t-S num -- set SO_SNDBUF to num\n");
@@ -65,9 +68,15 @@ static void die_usage(void)
fprintf(stderr, "\t-m [MPTCP|TCP] -- use tcp or mptcp sockets\n");
fprintf(stderr, "\t-s [mmap|poll] -- use poll (default) or mmap\n");
fprintf(stderr, "\t-u -- check mptcp ulp\n");
+ fprintf(stderr, "\t-w num -- wait num sec before closing the socket\n");
exit(1);
}
+static void handle_signal(int nr)
+{
+ quit = true;
+}
+
static const char *getxinfo_strerr(int err)
{
if (err == EAI_SYSTEM)
@@ -397,10 +406,11 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd)
/* ... but we still receive.
* Close our write side, ev. give some time
- * for address notification
+ * for address notification and/or checking
+ * the current status
*/
- if (cfg_join)
- usleep(400000);
+ if (cfg_wait)
+ usleep(cfg_wait);
shutdown(peerfd, SHUT_WR);
} else {
if (errno == EINTR)
@@ -419,7 +429,7 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd)
/* leave some time for late join/announce */
if (cfg_join)
- usleep(400000);
+ usleep(cfg_wait);
close(peerfd);
return 0;
@@ -812,11 +822,12 @@ static void parse_opts(int argc, char **argv)
{
int c;
- while ((c = getopt(argc, argv, "6jlp:s:hut:m:S:R:")) != -1) {
+ while ((c = getopt(argc, argv, "6jlp:s:hut:m:S:R:w:")) != -1) {
switch (c) {
case 'j':
cfg_join = true;
cfg_mode = CFG_MODE_POLL;
+ cfg_wait = 400000;
break;
case 'l':
listen_mode = true;
@@ -850,6 +861,9 @@ static void parse_opts(int argc, char **argv)
case 'R':
cfg_rcvbuf = parse_int(optarg);
break;
+ case 'w':
+ cfg_wait = atoi(optarg)*1000000;
+ break;
}
}
@@ -865,6 +879,7 @@ int main(int argc, char *argv[])
{
init_rng();
+ signal(SIGUSR1, handle_signal);
parse_opts(argc, argv);
if (tcpulp_audit)
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index acf02e156d20..57d75b7f6220 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -3,7 +3,7 @@
time_start=$(date +%s)
-optstring="S:R:d:e:l:r:h4cm:"
+optstring="S:R:d:e:l:r:h4cm:f:t"
ret=0
sin=""
sout=""
@@ -21,6 +21,8 @@ testmode=""
sndbuf=0
rcvbuf=0
options_log=true
+do_tcp=0
+filesize=0
if [ $tc_loss -eq 100 ];then
tc_loss=1%
@@ -40,9 +42,11 @@ usage() {
echo -e "\t-e: ethtool features to disable, e.g.: \"-e tso -e gso\" (default: randomly disable any of tso/gso/gro)"
echo -e "\t-4: IPv4 only: disable IPv6 tests (default: test both IPv4 and IPv6)"
echo -e "\t-c: capture packets for each test using tcpdump (default: no capture)"
+ echo -e "\t-f: size of file to transfer in bytes (default random)"
echo -e "\t-S: set sndbuf value (default: use kernel default)"
echo -e "\t-R: set rcvbuf value (default: use kernel default)"
echo -e "\t-m: test mode (poll, sendfile; default: poll)"
+ echo -e "\t-t: also run tests with TCP (use twice to non-fallback tcp)"
}
while getopts "$optstring" option;do
@@ -94,6 +98,12 @@ while getopts "$optstring" option;do
"m")
testmode="$OPTARG"
;;
+ "f")
+ filesize="$OPTARG"
+ ;;
+ "t")
+ do_tcp=$((do_tcp+1))
+ ;;
"?")
usage $0
exit 1
@@ -186,6 +196,9 @@ ip -net "$ns4" link set ns4eth3 up
ip -net "$ns4" route add default via 10.0.3.2
ip -net "$ns4" route add default via dead:beef:3::2
+# use TCP syn cookies, even if no flooding was detected.
+ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=2
+
set_ethtool_flags() {
local ns="$1"
local dev="$2"
@@ -385,14 +398,23 @@ do_transfer()
capuser="-Z $SUDO_USER"
fi
- local capfile="${listener_ns}-${connector_ns}-${cl_proto}-${srv_proto}-${connect_addr}.pcap"
+ local capfile="${rndh}-${connector_ns:0:3}-${listener_ns:0:3}-${cl_proto}-${srv_proto}-${connect_addr}-${port}"
+ local capopt="-i any -s 65535 -B 32768 ${capuser}"
+
+ ip netns exec ${listener_ns} tcpdump ${capopt} -w "${capfile}-listener.pcap" >> "${capout}" 2>&1 &
+ local cappid_listener=$!
- ip netns exec ${listener_ns} tcpdump -i any -s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 &
- local cappid=$!
+ ip netns exec ${connector_ns} tcpdump ${capopt} -w "${capfile}-connector.pcap" >> "${capout}" 2>&1 &
+ local cappid_connector=$!
sleep 1
fi
+ local stat_synrx_last_l=$(ip netns exec ${listener_ns} nstat -z -a MPTcpExtMPCapableSYNRX | while read a count c rest ;do echo $count;done)
+ local stat_ackrx_last_l=$(ip netns exec ${listener_ns} nstat -z -a MPTcpExtMPCapableACKRX | while read a count c rest ;do echo $count;done)
+ local stat_cookietx_last=$(ip netns exec ${listener_ns} nstat -z -a TcpExtSyncookiesSent | while read a count c rest ;do echo $count;done)
+ local stat_cookierx_last=$(ip netns exec ${listener_ns} nstat -z -a TcpExtSyncookiesRecv | while read a count c rest ;do echo $count;done)
+
ip netns exec ${listener_ns} ./mptcp_connect -t $timeout -l -p $port -s ${srv_proto} $extra_args $local_addr < "$sin" > "$sout" &
local spid=$!
@@ -413,7 +435,8 @@ do_transfer()
if $capture; then
sleep 1
- kill $cappid
+ kill ${cappid_listener}
+ kill ${cappid_connector}
fi
local duration
@@ -435,6 +458,45 @@ do_transfer()
check_transfer $cin $sout "file received by server"
rets=$?
+ local stat_synrx_now_l=$(ip netns exec ${listener_ns} nstat -z -a MPTcpExtMPCapableSYNRX | while read a count c rest ;do echo $count;done)
+ local stat_ackrx_now_l=$(ip netns exec ${listener_ns} nstat -z -a MPTcpExtMPCapableACKRX | while read a count c rest ;do echo $count;done)
+
+ local stat_cookietx_now=$(ip netns exec ${listener_ns} nstat -z -a TcpExtSyncookiesSent | while read a count c rest ;do echo $count;done)
+ local stat_cookierx_now=$(ip netns exec ${listener_ns} nstat -z -a TcpExtSyncookiesRecv | while read a count c rest ;do echo $count;done)
+
+ expect_synrx=$((stat_synrx_last_l))
+ expect_ackrx=$((stat_ackrx_last_l))
+
+ cookies=$(ip netns exec ${listener_ns} sysctl net.ipv4.tcp_syncookies)
+ cookies=${cookies##*=}
+
+ if [ ${cl_proto} = "MPTCP" ] && [ ${srv_proto} = "MPTCP" ]; then
+ expect_synrx=$((stat_synrx_last_l+1))
+ expect_ackrx=$((stat_ackrx_last_l+1))
+ fi
+ if [ $cookies -eq 2 ];then
+ if [ $stat_cookietx_last -ge $stat_cookietx_now ] ;then
+ echo "${listener_ns} CookieSent: ${cl_proto} -> ${srv_proto}: did not advance"
+ fi
+ if [ $stat_cookierx_last -ge $stat_cookierx_now ] ;then
+ echo "${listener_ns} CookieRecv: ${cl_proto} -> ${srv_proto}: did not advance"
+ fi
+ else
+ if [ $stat_cookietx_last -ne $stat_cookietx_now ] ;then
+ echo "${listener_ns} CookieSent: ${cl_proto} -> ${srv_proto}: changed"
+ fi
+ if [ $stat_cookierx_last -ne $stat_cookierx_now ] ;then
+ echo "${listener_ns} CookieRecv: ${cl_proto} -> ${srv_proto}: changed"
+ fi
+ fi
+
+ if [ $expect_synrx -ne $stat_synrx_now_l ] ;then
+ echo "${listener_ns} SYNRX: ${cl_proto} -> ${srv_proto}: expect ${expect_synrx}, got ${stat_synrx_now_l}"
+ fi
+ if [ $expect_ackrx -ne $stat_ackrx_now_l ] ;then
+ echo "${listener_ns} ACKRX: ${cl_proto} -> ${srv_proto}: expect ${expect_synrx}, got ${stat_synrx_now_l}"
+ fi
+
if [ $retc -eq 0 ] && [ $rets -eq 0 ];then
echo "$duration [ OK ]"
cat "$capout"
@@ -449,20 +511,25 @@ make_file()
{
local name=$1
local who=$2
+ local SIZE=$filesize
+ local ksize
+ local rem
- local SIZE TSIZE
- SIZE=$((RANDOM % (1024 * 8)))
- TSIZE=$((SIZE * 1024))
+ if [ $SIZE -eq 0 ]; then
+ local MAXSIZE=$((1024 * 1024 * 8))
+ local MINSIZE=$((1024 * 256))
- dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null
+ SIZE=$(((RANDOM * RANDOM + MINSIZE) % MAXSIZE))
+ fi
- SIZE=$((RANDOM % 1024))
- SIZE=$((SIZE + 128))
- TSIZE=$((TSIZE + SIZE))
- dd if=/dev/urandom conv=notrunc of="$name" bs=1 count=$SIZE 2> /dev/null
+ ksize=$((SIZE / 1024))
+ rem=$((SIZE - (ksize * 1024)))
+
+ dd if=/dev/urandom of="$name" bs=1024 count=$ksize 2> /dev/null
+ dd if=/dev/urandom conv=notrunc of="$name" bs=1 count=$rem 2> /dev/null
echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name"
- echo "Created $name (size $TSIZE) containing data sent by $who"
+ echo "Created $name (size $(du -b "$name")) containing data sent by $who"
}
run_tests_lo()
@@ -497,9 +564,11 @@ run_tests_lo()
return 1
fi
- # don't bother testing fallback tcp except for loopback case.
- if [ ${listener_ns} != ${connector_ns} ]; then
- return 0
+ if [ $do_tcp -eq 0 ]; then
+ # don't bother testing fallback tcp except for loopback case.
+ if [ ${listener_ns} != ${connector_ns} ]; then
+ return 0
+ fi
fi
do_transfer ${listener_ns} ${connector_ns} MPTCP TCP ${connect_addr} ${local_addr}
@@ -516,6 +585,15 @@ run_tests_lo()
return 1
fi
+ if [ $do_tcp -gt 1 ] ;then
+ do_transfer ${listener_ns} ${connector_ns} TCP TCP ${connect_addr} ${local_addr}
+ lret=$?
+ if [ $lret -ne 0 ]; then
+ ret=$lret
+ return 1
+ fi
+ fi
+
return 0
}
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index dd42c2f692d0..f39c1129ce5f 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -72,6 +72,15 @@ reset()
init
}
+reset_with_cookies()
+{
+ reset
+
+ for netns in "$ns1" "$ns2";do
+ ip netns exec $netns sysctl -q net.ipv4.tcp_syncookies=2
+ done
+}
+
for arg in "$@"; do
if [ "$arg" = "-c" ]; then
capture=1
@@ -138,7 +147,7 @@ do_transfer()
capuser="-Z $SUDO_USER"
fi
- capfile="mp_join-${listener_ns}.pcap"
+ capfile=$(printf "mp_join-%02u-%s.pcap" "$TEST_COUNT" "${listener_ns}")
echo "Capturing traffic for test $TEST_COUNT into $capfile"
ip netns exec ${listener_ns} tcpdump -i any -s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 &
@@ -227,7 +236,7 @@ chk_join_nr()
local count
local dump_stats
- printf "%-36s %s" "$msg" "syn"
+ printf "%02u %-36s %s" "$TEST_COUNT" "$msg" "syn"
count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinSynRx | awk '{print $2}'`
[ -z "$count" ] && count=0
if [ "$count" != "$syn_nr" ]; then
@@ -354,4 +363,57 @@ ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "multiple subflows and signal" 3 3 3
+# single subflow, syncookies
+reset_with_cookies
+ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+run_tests $ns1 $ns2 10.0.1.1
+chk_join_nr "single subflow with syn cookies" 1 1 1
+
+# multiple subflows with syn cookies
+reset_with_cookies
+ip netns exec $ns1 ./pm_nl_ctl limits 0 2
+ip netns exec $ns2 ./pm_nl_ctl limits 0 2
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
+run_tests $ns1 $ns2 10.0.1.1
+chk_join_nr "multiple subflows with syn cookies" 2 2 2
+
+# multiple subflows limited by server
+reset_with_cookies
+ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl limits 0 2
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
+run_tests $ns1 $ns2 10.0.1.1
+chk_join_nr "subflows limited by server w cookies" 2 2 1
+
+# test signal address with cookies
+reset_with_cookies
+ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+run_tests $ns1 $ns2 10.0.1.1
+chk_join_nr "signal address with syn cookies" 1 1 1
+
+# test cookie with subflow and signal
+reset_with_cookies
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ip netns exec $ns1 ./pm_nl_ctl limits 0 2
+ip netns exec $ns2 ./pm_nl_ctl limits 1 2
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+run_tests $ns1 $ns2 10.0.1.1
+chk_join_nr "subflow and signal w cookies" 2 2 2
+
+# accept and use add_addr with additional subflows
+reset_with_cookies
+ip netns exec $ns1 ./pm_nl_ctl limits 0 3
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ip netns exec $ns2 ./pm_nl_ctl limits 1 3
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
+run_tests $ns1 $ns2 10.0.1.1
+chk_join_nr "subflows and signal w. cookies" 3 3 3
+
exit $ret
diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c
index 4b02933cab8a..bdc03a2097e8 100644
--- a/tools/testing/selftests/net/msg_zerocopy.c
+++ b/tools/testing/selftests/net/msg_zerocopy.c
@@ -125,9 +125,8 @@ static int do_setcpu(int cpu)
CPU_ZERO(&mask);
CPU_SET(cpu, &mask);
if (sched_setaffinity(0, sizeof(mask), &mask))
- error(1, 0, "setaffinity %d", cpu);
-
- if (cfg_verbose)
+ fprintf(stderr, "cpu: unable to pin, may increase variance.\n");
+ else if (cfg_verbose)
fprintf(stderr, "cpu: %u\n", cpu);
return 0;
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 77c09cd339c3..6bbf69a28e12 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -59,6 +59,45 @@
# Same as pmtu_ipv6_vxlan6_exception, but using a GENEVE tunnel instead of
# VXLAN
#
+# - pmtu_ipv{4,6}_br_vxlan{4,6}_exception
+# Set up three namespaces, A, B, and C, with routing between A and B over
+# R1. R2 is unused in these tests. A has a veth connection to C, and is
+# connected to B via a VXLAN endpoint, which is directly bridged to C.
+# MTU on the B-R1 link is lower than other MTUs.
+#
+# Check that both C and A are able to communicate with B over the VXLAN
+# tunnel, and that PMTU exceptions with the correct values are created.
+#
+# segment a_r1 segment b_r1 b_r1: 4000
+# .--------------R1--------------. everything
+# C---veth A B else: 5000
+# ' bridge |
+# '---- - - - - - VXLAN - - - - - - - '
+#
+# - pmtu_ipv{4,6}_br_geneve{4,6}_exception
+# Same as pmtu_ipv{4,6}_br_vxlan{4,6}_exception, with a GENEVE tunnel
+# instead.
+#
+# - pmtu_ipv{4,6}_ovs_vxlan{4,6}_exception
+# Set up two namespaces, B, and C, with routing between the init namespace
+# and B over R1. A and R2 are unused in these tests. The init namespace
+# has a veth connection to C, and is connected to B via a VXLAN endpoint,
+# which is handled by Open vSwitch and bridged to C. MTU on the B-R1 link
+# is lower than other MTUs.
+#
+# Check that C is able to communicate with B over the VXLAN tunnel, and
+# that PMTU exceptions with the correct values are created.
+#
+# segment a_r1 segment b_r1 b_r1: 4000
+# .--------------R1--------------. everything
+# C---veth init B else: 5000
+# '- ovs |
+# '---- - - - - - VXLAN - - - - - - - '
+#
+# - pmtu_ipv{4,6}_ovs_geneve{4,6}_exception
+# Same as pmtu_ipv{4,6}_ovs_vxlan{4,6}_exception, with a GENEVE tunnel
+# instead.
+#
# - pmtu_ipv{4,6}_fou{4,6}_exception
# Same as pmtu_ipv4_vxlan4, but using a direct IPv4/IPv6 encapsulation
# (FoU) over IPv4/IPv6, instead of VXLAN
@@ -147,6 +186,22 @@ tests="
pmtu_ipv6_geneve4_exception IPv6 over geneve4: PMTU exceptions 1
pmtu_ipv4_geneve6_exception IPv4 over geneve6: PMTU exceptions 1
pmtu_ipv6_geneve6_exception IPv6 over geneve6: PMTU exceptions 1
+ pmtu_ipv4_br_vxlan4_exception IPv4, bridged vxlan4: PMTU exceptions 1
+ pmtu_ipv6_br_vxlan4_exception IPv6, bridged vxlan4: PMTU exceptions 1
+ pmtu_ipv4_br_vxlan6_exception IPv4, bridged vxlan6: PMTU exceptions 1
+ pmtu_ipv6_br_vxlan6_exception IPv6, bridged vxlan6: PMTU exceptions 1
+ pmtu_ipv4_br_geneve4_exception IPv4, bridged geneve4: PMTU exceptions 1
+ pmtu_ipv6_br_geneve4_exception IPv6, bridged geneve4: PMTU exceptions 1
+ pmtu_ipv4_br_geneve6_exception IPv4, bridged geneve6: PMTU exceptions 1
+ pmtu_ipv6_br_geneve6_exception IPv6, bridged geneve6: PMTU exceptions 1
+ pmtu_ipv4_ovs_vxlan4_exception IPv4, OVS vxlan4: PMTU exceptions 1
+ pmtu_ipv6_ovs_vxlan4_exception IPv6, OVS vxlan4: PMTU exceptions 1
+ pmtu_ipv4_ovs_vxlan6_exception IPv4, OVS vxlan6: PMTU exceptions 1
+ pmtu_ipv6_ovs_vxlan6_exception IPv6, OVS vxlan6: PMTU exceptions 1
+ pmtu_ipv4_ovs_geneve4_exception IPv4, OVS geneve4: PMTU exceptions 1
+ pmtu_ipv6_ovs_geneve4_exception IPv6, OVS geneve4: PMTU exceptions 1
+ pmtu_ipv4_ovs_geneve6_exception IPv4, OVS geneve6: PMTU exceptions 1
+ pmtu_ipv6_ovs_geneve6_exception IPv6, OVS geneve6: PMTU exceptions 1
pmtu_ipv4_fou4_exception IPv4 over fou4: PMTU exceptions 1
pmtu_ipv6_fou4_exception IPv6 over fou4: PMTU exceptions 1
pmtu_ipv4_fou6_exception IPv4 over fou6: PMTU exceptions 1
@@ -173,10 +228,12 @@ tests="
NS_A="ns-A"
NS_B="ns-B"
+NS_C="ns-C"
NS_R1="ns-R1"
NS_R2="ns-R2"
ns_a="ip netns exec ${NS_A}"
ns_b="ip netns exec ${NS_B}"
+ns_c="ip netns exec ${NS_C}"
ns_r1="ip netns exec ${NS_R1}"
ns_r2="ip netns exec ${NS_R2}"
@@ -239,9 +296,11 @@ routes_nh="
veth4_a_addr="192.168.1.1"
veth4_b_addr="192.168.1.2"
+veth4_c_addr="192.168.2.10"
veth4_mask="24"
veth6_a_addr="fd00:1::a"
veth6_b_addr="fd00:1::b"
+veth6_c_addr="fd00:2::c"
veth6_mask="64"
tunnel4_a_addr="192.168.2.1"
@@ -428,7 +487,7 @@ setup_ip6ip6() {
}
setup_namespaces() {
- for n in ${NS_A} ${NS_B} ${NS_R1} ${NS_R2}; do
+ for n in ${NS_A} ${NS_B} ${NS_C} ${NS_R1} ${NS_R2}; do
ip netns add ${n} || return 1
# Disable DAD, so that we don't have to wait to use the
@@ -484,6 +543,7 @@ setup_vxlan_or_geneve() {
a_addr="${2}"
b_addr="${3}"
opts="${4}"
+ br_if_a="${5}"
if [ "${type}" = "vxlan" ]; then
opts="${opts} ttl 64 dstport 4789"
@@ -497,10 +557,16 @@ setup_vxlan_or_geneve() {
run_cmd ${ns_a} ip link add ${type}_a type ${type} id 1 ${opts_a} remote ${b_addr} ${opts} || return 1
run_cmd ${ns_b} ip link add ${type}_b type ${type} id 1 ${opts_b} remote ${a_addr} ${opts}
- run_cmd ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${type}_a
- run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${type}_b
+ if [ -n "${br_if_a}" ]; then
+ run_cmd ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${br_if_a}
+ run_cmd ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${br_if_a}
+ run_cmd ${ns_a} ip link set ${type}_a master ${br_if_a}
+ else
+ run_cmd ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${type}_a
+ run_cmd ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${type}_a
+ fi
- run_cmd ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${type}_a
+ run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${type}_b
run_cmd ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${type}_b
run_cmd ${ns_a} ip link set ${type}_a up
@@ -516,11 +582,27 @@ setup_vxlan4() {
}
setup_geneve6() {
- setup_vxlan_or_geneve geneve ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1
+ setup_vxlan_or_geneve geneve ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 ""
}
setup_vxlan6() {
- setup_vxlan_or_geneve vxlan ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1
+ setup_vxlan_or_geneve vxlan ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 ""
+}
+
+setup_bridged_geneve4() {
+ setup_vxlan_or_geneve geneve ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "df set" "br0"
+}
+
+setup_bridged_vxlan4() {
+ setup_vxlan_or_geneve vxlan ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "df set" "br0"
+}
+
+setup_bridged_geneve6() {
+ setup_vxlan_or_geneve geneve ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "" "br0"
+}
+
+setup_bridged_vxlan6() {
+ setup_vxlan_or_geneve vxlan ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "" "br0"
}
setup_xfrm() {
@@ -630,6 +712,80 @@ setup_routing() {
return 0
}
+setup_bridge() {
+ run_cmd ${ns_a} ip link add br0 type bridge || return 2
+ run_cmd ${ns_a} ip link set br0 up
+
+ run_cmd ${ns_c} ip link add veth_C-A type veth peer name veth_A-C
+ run_cmd ${ns_c} ip link set veth_A-C netns ns-A
+
+ run_cmd ${ns_a} ip link set veth_A-C up
+ run_cmd ${ns_c} ip link set veth_C-A up
+ run_cmd ${ns_c} ip addr add ${veth4_c_addr}/${veth4_mask} dev veth_C-A
+ run_cmd ${ns_c} ip addr add ${veth6_c_addr}/${veth6_mask} dev veth_C-A
+ run_cmd ${ns_a} ip link set veth_A-C master br0
+}
+
+setup_ovs_vxlan_or_geneve() {
+ type="${1}"
+ a_addr="${2}"
+ b_addr="${3}"
+
+ if [ "${type}" = "vxlan" ]; then
+ opts="${opts} ttl 64 dstport 4789"
+ opts_b="local ${b_addr}"
+ fi
+
+ run_cmd ovs-vsctl add-port ovs_br0 ${type}_a -- \
+ set interface ${type}_a type=${type} \
+ options:remote_ip=${b_addr} options:key=1 options:csum=true || return 1
+
+ run_cmd ${ns_b} ip link add ${type}_b type ${type} id 1 ${opts_b} remote ${a_addr} ${opts} || return 1
+
+ run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${type}_b
+ run_cmd ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${type}_b
+
+ run_cmd ${ns_b} ip link set ${type}_b up
+}
+
+setup_ovs_geneve4() {
+ setup_ovs_vxlan_or_geneve geneve ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1
+}
+
+setup_ovs_vxlan4() {
+ setup_ovs_vxlan_or_geneve vxlan ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1
+}
+
+setup_ovs_geneve6() {
+ setup_ovs_vxlan_or_geneve geneve ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1
+}
+
+setup_ovs_vxlan6() {
+ setup_ovs_vxlan_or_geneve vxlan ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1
+}
+
+setup_ovs_bridge() {
+ run_cmd ovs-vsctl add-br ovs_br0 || return 2
+ run_cmd ip link set ovs_br0 up
+
+ run_cmd ${ns_c} ip link add veth_C-A type veth peer name veth_A-C
+ run_cmd ${ns_c} ip link set veth_A-C netns 1
+
+ run_cmd ip link set veth_A-C up
+ run_cmd ${ns_c} ip link set veth_C-A up
+ run_cmd ${ns_c} ip addr add ${veth4_c_addr}/${veth4_mask} dev veth_C-A
+ run_cmd ${ns_c} ip addr add ${veth6_c_addr}/${veth6_mask} dev veth_C-A
+ run_cmd ovs-vsctl add-port ovs_br0 veth_A-C
+
+ # Move veth_A-R1 to init
+ run_cmd ${ns_a} ip link set veth_A-R1 netns 1
+ run_cmd ip addr add ${prefix4}.${a_r1}.1/${veth4_mask} dev veth_A-R1
+ run_cmd ip addr add ${prefix6}:${a_r1}::1/${veth6_mask} dev veth_A-R1
+ run_cmd ip link set veth_A-R1 up
+ run_cmd ip route add ${prefix4}.${b_r1}.1 via ${prefix4}.${a_r1}.2
+ run_cmd ip route add ${prefix6}:${b_r1}::1 via ${prefix6}:${a_r1}::2
+}
+
setup() {
[ "$(id -u)" -ne 0 ] && echo " need to run as root" && return $ksft_skip
@@ -657,9 +813,14 @@ cleanup() {
done
tcpdump_pids=
- for n in ${NS_A} ${NS_B} ${NS_R1} ${NS_R2}; do
+ for n in ${NS_A} ${NS_B} ${NS_C} ${NS_R1} ${NS_R2}; do
ip netns del ${n} 2> /dev/null
done
+
+ ip link del veth_A-C 2>/dev/null
+ ip link del veth_A-R1 2>/dev/null
+ ovs-vsctl --if-exists del-port vxlan_a 2>/dev/null
+ ovs-vsctl --if-exists del-br ovs_br0 2>/dev/null
}
mtu() {
@@ -892,6 +1053,177 @@ test_pmtu_ipv6_geneve6_exception() {
test_pmtu_ipvX_over_vxlanY_or_geneveY_exception geneve 6 6
}
+test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception() {
+ type=${1}
+ family=${2}
+ outer_family=${3}
+ ll_mtu=4000
+
+ if [ ${outer_family} -eq 4 ]; then
+ setup namespaces routing bridge bridged_${type}4 || return 2
+ # IPv4 header UDP header VXLAN/GENEVE header Ethernet header
+ exp_mtu=$((${ll_mtu} - 20 - 8 - 8 - 14))
+ else
+ setup namespaces routing bridge bridged_${type}6 || return 2
+ # IPv6 header UDP header VXLAN/GENEVE header Ethernet header
+ exp_mtu=$((${ll_mtu} - 40 - 8 - 8 - 14))
+ fi
+
+ trace "${ns_a}" ${type}_a "${ns_b}" ${type}_b \
+ "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \
+ "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B \
+ "${ns_a}" br0 "${ns_a}" veth-A-C \
+ "${ns_c}" veth_C-A
+
+ if [ ${family} -eq 4 ]; then
+ ping=ping
+ dst=${tunnel4_b_addr}
+ else
+ ping=${ping6}
+ dst=${tunnel6_b_addr}
+ fi
+
+ # Create route exception by exceeding link layer MTU
+ mtu "${ns_a}" veth_A-R1 $((${ll_mtu} + 1000))
+ mtu "${ns_a}" br0 $((${ll_mtu} + 1000))
+ mtu "${ns_a}" veth_A-C $((${ll_mtu} + 1000))
+ mtu "${ns_c}" veth_C-A $((${ll_mtu} + 1000))
+ mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000))
+ mtu "${ns_b}" veth_B-R1 ${ll_mtu}
+ mtu "${ns_r1}" veth_R1-B ${ll_mtu}
+
+ mtu "${ns_a}" ${type}_a $((${ll_mtu} + 1000))
+ mtu "${ns_b}" ${type}_b $((${ll_mtu} + 1000))
+
+ run_cmd ${ns_c} ${ping} -q -M want -i 0.1 -c 10 -s $((${ll_mtu} + 500)) ${dst} || return 1
+ run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${dst} || return 1
+
+ # Check that exceptions were created
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_c}" ${dst})"
+ check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on bridged ${type} interface"
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})"
+ check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on locally bridged ${type} interface"
+}
+
+test_pmtu_ipv4_br_vxlan4_exception() {
+ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception vxlan 4 4
+}
+
+test_pmtu_ipv6_br_vxlan4_exception() {
+ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception vxlan 6 4
+}
+
+test_pmtu_ipv4_br_geneve4_exception() {
+ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception geneve 4 4
+}
+
+test_pmtu_ipv6_br_geneve4_exception() {
+ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception geneve 6 4
+}
+
+test_pmtu_ipv4_br_vxlan6_exception() {
+ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception vxlan 4 6
+}
+
+test_pmtu_ipv6_br_vxlan6_exception() {
+ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception vxlan 6 6
+}
+
+test_pmtu_ipv4_br_geneve6_exception() {
+ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception geneve 4 6
+}
+
+test_pmtu_ipv6_br_geneve6_exception() {
+ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception geneve 6 6
+}
+
+test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception() {
+ type=${1}
+ family=${2}
+ outer_family=${3}
+ ll_mtu=4000
+
+ if [ ${outer_family} -eq 4 ]; then
+ setup namespaces routing ovs_bridge ovs_${type}4 || return 2
+ # IPv4 header UDP header VXLAN/GENEVE header Ethernet header
+ exp_mtu=$((${ll_mtu} - 20 - 8 - 8 - 14))
+ else
+ setup namespaces routing ovs_bridge ovs_${type}6 || return 2
+ # IPv6 header UDP header VXLAN/GENEVE header Ethernet header
+ exp_mtu=$((${ll_mtu} - 40 - 8 - 8 - 14))
+ fi
+
+ if [ "${type}" = "vxlan" ]; then
+ tun_a="vxlan_sys_4789"
+ elif [ "${type}" = "geneve" ]; then
+ tun_a="genev_sys_6081"
+ fi
+
+ trace "" "${tun_a}" "${ns_b}" ${type}_b \
+ "" veth_A-R1 "${ns_r1}" veth_R1-A \
+ "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B \
+ "" ovs_br0 "" veth-A-C \
+ "${ns_c}" veth_C-A
+
+ if [ ${family} -eq 4 ]; then
+ ping=ping
+ dst=${tunnel4_b_addr}
+ else
+ ping=${ping6}
+ dst=${tunnel6_b_addr}
+ fi
+
+ # Create route exception by exceeding link layer MTU
+ mtu "" veth_A-R1 $((${ll_mtu} + 1000))
+ mtu "" ovs_br0 $((${ll_mtu} + 1000))
+ mtu "" veth_A-C $((${ll_mtu} + 1000))
+ mtu "${ns_c}" veth_C-A $((${ll_mtu} + 1000))
+ mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000))
+ mtu "${ns_b}" veth_B-R1 ${ll_mtu}
+ mtu "${ns_r1}" veth_R1-B ${ll_mtu}
+
+ mtu "" ${tun_a} $((${ll_mtu} + 1000))
+ mtu "${ns_b}" ${type}_b $((${ll_mtu} + 1000))
+
+ run_cmd ${ns_c} ${ping} -q -M want -i 0.1 -c 20 -s $((${ll_mtu} + 500)) ${dst} || return 1
+
+ # Check that exceptions were created
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_c}" ${dst})"
+ check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on Open vSwitch ${type} interface"
+}
+
+test_pmtu_ipv4_ovs_vxlan4_exception() {
+ test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception vxlan 4 4
+}
+
+test_pmtu_ipv6_ovs_vxlan4_exception() {
+ test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception vxlan 6 4
+}
+
+test_pmtu_ipv4_ovs_geneve4_exception() {
+ test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception geneve 4 4
+}
+
+test_pmtu_ipv6_ovs_geneve4_exception() {
+ test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception geneve 6 4
+}
+
+test_pmtu_ipv4_ovs_vxlan6_exception() {
+ test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception vxlan 4 6
+}
+
+test_pmtu_ipv6_ovs_vxlan6_exception() {
+ test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception vxlan 6 6
+}
+
+test_pmtu_ipv4_ovs_geneve6_exception() {
+ test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception geneve 4 6
+}
+
+test_pmtu_ipv6_ovs_geneve6_exception() {
+ test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception geneve 6 6
+}
+
test_pmtu_ipvX_over_fouY_or_gueY() {
inner_family=${1}
outer_family=${2}
diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
index bdbf4b3125b6..7c38a909f8b8 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -5,7 +5,6 @@
# set -e
devdummy="test-dummy0"
-ret=0
# Kselftest framework requirement - SKIP code is 4.
ksft_skip=4
@@ -66,7 +65,7 @@ kci_test_bridge()
devbr="test-br0"
vlandev="testbr-vlan1"
- ret=0
+ local ret=0
ip link add name "$devbr" type bridge
check_err $?
@@ -113,7 +112,7 @@ kci_test_gre()
rem=10.42.42.1
loc=10.0.0.1
- ret=0
+ local ret=0
ip tunnel add $gredev mode gre remote $rem local $loc ttl 1
check_err $?
ip link set $gredev up
@@ -149,7 +148,7 @@ kci_test_gre()
kci_test_tc()
{
dev=lo
- ret=0
+ local ret=0
tc qdisc add dev "$dev" root handle 1: htb
check_err $?
@@ -184,7 +183,7 @@ kci_test_tc()
kci_test_polrouting()
{
- ret=0
+ local ret=0
ip rule add fwmark 1 lookup 100
check_err $?
ip route add local 0.0.0.0/0 dev lo table 100
@@ -207,7 +206,7 @@ kci_test_route_get()
{
local hash_policy=$(sysctl -n net.ipv4.fib_multipath_hash_policy)
- ret=0
+ local ret=0
ip route get 127.0.0.1 > /dev/null
check_err $?
@@ -290,7 +289,7 @@ kci_test_promote_secondaries()
kci_test_addrlabel()
{
- ret=0
+ local ret=0
ip addrlabel add prefix dead::/64 dev lo label 1
check_err $?
@@ -330,7 +329,7 @@ kci_test_addrlabel()
kci_test_ifalias()
{
- ret=0
+ local ret=0
namewant=$(uuidgen)
syspathname="/sys/class/net/$devdummy/ifalias"
@@ -385,7 +384,7 @@ kci_test_ifalias()
kci_test_vrf()
{
vrfname="test-vrf"
- ret=0
+ local ret=0
ip link show type vrf 2>/dev/null
if [ $? -ne 0 ]; then
@@ -425,7 +424,7 @@ kci_test_vrf()
kci_test_encap_vxlan()
{
- ret=0
+ local ret=0
vxlan="test-vxlan0"
vlan="test-vlan0"
testns="$1"
@@ -511,7 +510,7 @@ kci_test_encap_vxlan()
kci_test_encap_fou()
{
- ret=0
+ local ret=0
name="test-fou"
testns="$1"
@@ -548,7 +547,7 @@ kci_test_encap_fou()
kci_test_encap()
{
testns="testns"
- ret=0
+ local ret=0
ip netns add "$testns"
if [ $? -ne 0 ]; then
@@ -565,15 +564,18 @@ kci_test_encap()
check_err $?
kci_test_encap_vxlan "$testns"
+ check_err $?
kci_test_encap_fou "$testns"
+ check_err $?
ip netns del "$testns"
+ return $ret
}
kci_test_macsec()
{
msname="test_macsec0"
- ret=0
+ local ret=0
ip macsec help 2>&1 | grep -q "^Usage: ip macsec"
if [ $? -ne 0 ]; then
@@ -631,7 +633,7 @@ kci_test_macsec()
#-------------------------------------------------------------------
kci_test_ipsec()
{
- ret=0
+ local ret=0
algo="aead rfc4106(gcm(aes)) 0x3132333435363738393031323334353664636261 128"
srcip=192.168.123.1
dstip=192.168.123.2
@@ -731,7 +733,7 @@ kci_test_ipsec()
#-------------------------------------------------------------------
kci_test_ipsec_offload()
{
- ret=0
+ local ret=0
algo="aead rfc4106(gcm(aes)) 0x3132333435363738393031323334353664636261 128"
srcip=192.168.123.3
dstip=192.168.123.4
@@ -841,7 +843,7 @@ kci_test_gretap()
{
testns="testns"
DEV_NS=gretap00
- ret=0
+ local ret=0
ip netns add "$testns"
if [ $? -ne 0 ]; then
@@ -891,7 +893,7 @@ kci_test_ip6gretap()
{
testns="testns"
DEV_NS=ip6gretap00
- ret=0
+ local ret=0
ip netns add "$testns"
if [ $? -ne 0 ]; then
@@ -941,7 +943,7 @@ kci_test_erspan()
{
testns="testns"
DEV_NS=erspan00
- ret=0
+ local ret=0
ip link help erspan 2>&1 | grep -q "^Usage:"
if [ $? -ne 0 ];then
@@ -1006,7 +1008,7 @@ kci_test_ip6erspan()
{
testns="testns"
DEV_NS=ip6erspan00
- ret=0
+ local ret=0
ip link help ip6erspan 2>&1 | grep -q "^Usage:"
if [ $? -ne 0 ];then
@@ -1077,7 +1079,7 @@ kci_test_fdb_get()
test_mac=de:ad:be:ef:13:37
localip="10.0.2.2"
dstip="10.0.2.3"
- ret=0
+ local ret=0
bridge fdb help 2>&1 |grep -q 'bridge fdb get'
if [ $? -ne 0 ];then
@@ -1125,7 +1127,7 @@ kci_test_neigh_get()
dstmac=de:ad:be:ef:13:37
dstip=10.0.2.4
dstip6=dead::2
- ret=0
+ local ret=0
ip neigh help 2>&1 |grep -q 'ip neigh get'
if [ $? -ne 0 ];then
@@ -1175,6 +1177,7 @@ kci_test_neigh_get()
kci_test_rtnl()
{
+ local ret=0
kci_add_dummy
if [ $ret -ne 0 ];then
echo "FAIL: cannot add dummy interface"
@@ -1182,27 +1185,48 @@ kci_test_rtnl()
fi
kci_test_polrouting
+ check_err $?
kci_test_route_get
+ check_err $?
kci_test_addrlft
+ check_err $?
kci_test_promote_secondaries
+ check_err $?
kci_test_tc
+ check_err $?
kci_test_gre
+ check_err $?
kci_test_gretap
+ check_err $?
kci_test_ip6gretap
+ check_err $?
kci_test_erspan
+ check_err $?
kci_test_ip6erspan
+ check_err $?
kci_test_bridge
+ check_err $?
kci_test_addrlabel
+ check_err $?
kci_test_ifalias
+ check_err $?
kci_test_vrf
+ check_err $?
kci_test_encap
+ check_err $?
kci_test_macsec
+ check_err $?
kci_test_ipsec
+ check_err $?
kci_test_ipsec_offload
+ check_err $?
kci_test_fdb_get
+ check_err $?
kci_test_neigh_get
+ check_err $?
kci_del_dummy
+ return $ret
}
#check for needed privileges
@@ -1221,4 +1245,4 @@ done
kci_test_rtnl
-exit $ret
+exit $?
diff --git a/tools/testing/selftests/net/rxtimestamp.c b/tools/testing/selftests/net/rxtimestamp.c
index bcb79ba1f214..e4613ce4ed69 100644
--- a/tools/testing/selftests/net/rxtimestamp.c
+++ b/tools/testing/selftests/net/rxtimestamp.c
@@ -44,6 +44,7 @@ struct test_case {
struct options sockopt;
struct tstamps expected;
bool enabled;
+ bool warn_on_fail;
};
struct sof_flag {
@@ -67,44 +68,44 @@ static struct socket_type socket_types[] = {
static struct test_case test_cases[] = {
{ {}, {} },
{
- { so_timestamp: 1 },
- { tstamp: true }
+ { .so_timestamp = 1 },
+ { .tstamp = true }
},
{
- { so_timestampns: 1 },
- { tstampns: true }
+ { .so_timestampns = 1 },
+ { .tstampns = true }
},
{
- { so_timestamp: 1, so_timestampns: 1 },
- { tstampns: true }
+ { .so_timestamp = 1, .so_timestampns = 1 },
+ { .tstampns = true }
},
{
- { so_timestamping: SOF_TIMESTAMPING_RX_SOFTWARE },
+ { .so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE },
{}
},
{
/* Loopback device does not support hw timestamps. */
- { so_timestamping: SOF_TIMESTAMPING_RX_HARDWARE },
+ { .so_timestamping = SOF_TIMESTAMPING_RX_HARDWARE },
{}
},
{
- { so_timestamping: SOF_TIMESTAMPING_SOFTWARE },
- {}
+ { .so_timestamping = SOF_TIMESTAMPING_SOFTWARE },
+ .warn_on_fail = true
},
{
- { so_timestamping: SOF_TIMESTAMPING_RX_SOFTWARE
+ { .so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE
| SOF_TIMESTAMPING_RX_HARDWARE },
{}
},
{
- { so_timestamping: SOF_TIMESTAMPING_SOFTWARE
+ { .so_timestamping = SOF_TIMESTAMPING_SOFTWARE
| SOF_TIMESTAMPING_RX_SOFTWARE },
- { swtstamp: true }
+ { .swtstamp = true }
},
{
- { so_timestamp: 1, so_timestamping: SOF_TIMESTAMPING_SOFTWARE
+ { .so_timestamp = 1, .so_timestamping = SOF_TIMESTAMPING_SOFTWARE
| SOF_TIMESTAMPING_RX_SOFTWARE },
- { tstamp: true, swtstamp: true }
+ { .tstamp = true, .swtstamp = true }
},
};
@@ -115,6 +116,9 @@ static struct option long_options[] = {
{ "tcp", no_argument, 0, 't' },
{ "udp", no_argument, 0, 'u' },
{ "ip", no_argument, 0, 'i' },
+ { "strict", no_argument, 0, 'S' },
+ { "ipv4", no_argument, 0, '4' },
+ { "ipv6", no_argument, 0, '6' },
{ NULL, 0, NULL, 0 },
};
@@ -270,37 +274,55 @@ void config_so_flags(int rcv, struct options o)
error(1, errno, "Failed to set SO_TIMESTAMPING");
}
-bool run_test_case(struct socket_type s, struct test_case t)
+bool run_test_case(struct socket_type *s, int test_num, char ip_version,
+ bool strict)
{
- int port = (s.type == SOCK_RAW) ? 0 : next_port++;
+ union {
+ struct sockaddr_in6 addr6;
+ struct sockaddr_in addr4;
+ struct sockaddr addr_un;
+ } addr;
int read_size = op_size;
- struct sockaddr_in addr;
+ int src, dst, rcv, port;
+ socklen_t addr_size;
bool failed = false;
- int src, dst, rcv;
- src = socket(AF_INET, s.type, s.protocol);
+ port = (s->type == SOCK_RAW) ? 0 : next_port++;
+ memset(&addr, 0, sizeof(addr));
+ if (ip_version == '4') {
+ addr.addr4.sin_family = AF_INET;
+ addr.addr4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ addr.addr4.sin_port = htons(port);
+ addr_size = sizeof(addr.addr4);
+ if (s->type == SOCK_RAW)
+ read_size += 20; /* for IPv4 header */
+ } else {
+ addr.addr6.sin6_family = AF_INET6;
+ addr.addr6.sin6_addr = in6addr_loopback;
+ addr.addr6.sin6_port = htons(port);
+ addr_size = sizeof(addr.addr6);
+ }
+ printf("Starting testcase %d over ipv%c...\n", test_num, ip_version);
+ src = socket(addr.addr_un.sa_family, s->type,
+ s->protocol);
if (src < 0)
error(1, errno, "Failed to open src socket");
- dst = socket(AF_INET, s.type, s.protocol);
+ dst = socket(addr.addr_un.sa_family, s->type,
+ s->protocol);
if (dst < 0)
error(1, errno, "Failed to open dst socket");
- memset(&addr, 0, sizeof(addr));
- addr.sin_family = AF_INET;
- addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
- addr.sin_port = htons(port);
-
- if (bind(dst, (struct sockaddr *)&addr, sizeof(addr)) < 0)
+ if (bind(dst, &addr.addr_un, addr_size) < 0)
error(1, errno, "Failed to bind to port %d", port);
- if (s.type == SOCK_STREAM && (listen(dst, 1) < 0))
+ if (s->type == SOCK_STREAM && (listen(dst, 1) < 0))
error(1, errno, "Failed to listen");
- if (connect(src, (struct sockaddr *)&addr, sizeof(addr)) < 0)
+ if (connect(src, &addr.addr_un, addr_size) < 0)
error(1, errno, "Failed to connect");
- if (s.type == SOCK_STREAM) {
+ if (s->type == SOCK_STREAM) {
rcv = accept(dst, NULL, NULL);
if (rcv < 0)
error(1, errno, "Failed to accept");
@@ -309,17 +331,22 @@ bool run_test_case(struct socket_type s, struct test_case t)
rcv = dst;
}
- config_so_flags(rcv, t.sockopt);
+ config_so_flags(rcv, test_cases[test_num].sockopt);
usleep(20000); /* setsockopt for SO_TIMESTAMPING is asynchronous */
do_send(src);
- if (s.type == SOCK_RAW)
- read_size += 20; /* for IP header */
- failed = do_recv(rcv, read_size, t.expected);
+ failed = do_recv(rcv, read_size, test_cases[test_num].expected);
close(rcv);
close(src);
+ if (failed) {
+ printf("FAILURE in testcase %d over ipv%c ", test_num,
+ ip_version);
+ print_test_case(&test_cases[test_num]);
+ if (!strict && test_cases[test_num].warn_on_fail)
+ failed = false;
+ }
return failed;
}
@@ -327,6 +354,9 @@ int main(int argc, char **argv)
{
bool all_protocols = true;
bool all_tests = true;
+ bool cfg_ipv4 = false;
+ bool cfg_ipv6 = false;
+ bool strict = false;
int arg_index = 0;
int failures = 0;
int s, t, opt;
@@ -362,6 +392,15 @@ int main(int argc, char **argv)
all_protocols = false;
socket_types[0].enabled = true;
break;
+ case 'S':
+ strict = true;
+ break;
+ case '4':
+ cfg_ipv4 = true;
+ break;
+ case '6':
+ cfg_ipv6 = true;
+ break;
default:
error(1, 0, "Failed to parse parameters.");
}
@@ -375,13 +414,14 @@ int main(int argc, char **argv)
for (t = 0; t < ARRAY_SIZE(test_cases); t++) {
if (!all_tests && !test_cases[t].enabled)
continue;
-
- printf("Starting testcase %d...\n", t);
- if (run_test_case(socket_types[s], test_cases[t])) {
- failures++;
- printf("FAILURE in test case ");
- print_test_case(&test_cases[t]);
- }
+ if (cfg_ipv4 || !cfg_ipv6)
+ if (run_test_case(&socket_types[s], t, '4',
+ strict))
+ failures++;
+ if (cfg_ipv6 || !cfg_ipv4)
+ if (run_test_case(&socket_types[s], t, '6',
+ strict))
+ failures++;
}
}
if (!failures)
diff --git a/tools/testing/selftests/net/rxtimestamp.sh b/tools/testing/selftests/net/rxtimestamp.sh
new file mode 100755
index 000000000000..91631e88bf46
--- /dev/null
+++ b/tools/testing/selftests/net/rxtimestamp.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+./in_netns.sh ./rxtimestamp $@
diff --git a/tools/testing/selftests/net/txtimestamp.c b/tools/testing/selftests/net/txtimestamp.c
index 011b0da6b033..490a8cca708a 100644
--- a/tools/testing/selftests/net/txtimestamp.c
+++ b/tools/testing/selftests/net/txtimestamp.c
@@ -64,6 +64,7 @@ static int cfg_payload_len = 10;
static int cfg_poll_timeout = 100;
static int cfg_delay_snd;
static int cfg_delay_ack;
+static int cfg_delay_tolerance_usec = 500;
static bool cfg_show_payload;
static bool cfg_do_pktinfo;
static bool cfg_busy_poll;
@@ -152,11 +153,12 @@ static void validate_key(int tskey, int tstype)
static void validate_timestamp(struct timespec *cur, int min_delay)
{
- int max_delay = min_delay + 500 /* processing time upper bound */;
int64_t cur64, start64;
+ int max_delay;
cur64 = timespec_to_us64(cur);
start64 = timespec_to_us64(&ts_usr);
+ max_delay = min_delay + cfg_delay_tolerance_usec;
if (cur64 < start64 + min_delay || cur64 > start64 + max_delay) {
fprintf(stderr, "ERROR: %lu us expected between %d and %d\n",
@@ -683,6 +685,7 @@ static void __attribute__((noreturn)) usage(const char *filepath)
" -r: use raw\n"
" -R: use raw (IP_HDRINCL)\n"
" -S N: usec to sleep before reading error queue\n"
+ " -t N: tolerance (usec) for timestamp validation\n"
" -u: use udp\n"
" -v: validate SND delay (usec)\n"
" -V: validate ACK delay (usec)\n"
@@ -697,7 +700,7 @@ static void parse_opt(int argc, char **argv)
int c;
while ((c = getopt(argc, argv,
- "46bc:CeEFhIl:LnNp:PrRS:uv:V:x")) != -1) {
+ "46bc:CeEFhIl:LnNp:PrRS:t:uv:V:x")) != -1) {
switch (c) {
case '4':
do_ipv6 = 0;
@@ -760,6 +763,9 @@ static void parse_opt(int argc, char **argv)
case 'S':
cfg_sleep_usec = strtoul(optarg, NULL, 10);
break;
+ case 't':
+ cfg_delay_tolerance_usec = strtoul(optarg, NULL, 10);
+ break;
case 'u':
proto_count++;
cfg_proto = SOCK_DGRAM;
diff --git a/tools/testing/selftests/net/vrf_strict_mode_test.sh b/tools/testing/selftests/net/vrf_strict_mode_test.sh
new file mode 100755
index 000000000000..18b982d611de
--- /dev/null
+++ b/tools/testing/selftests/net/vrf_strict_mode_test.sh
@@ -0,0 +1,396 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test is designed for testing the new VRF strict_mode functionality.
+
+ret=0
+
+# identifies the "init" network namespace which is often called root network
+# namespace.
+INIT_NETNS_NAME="init"
+
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ nsuccess=$((nsuccess+1))
+ printf "\n TEST: %-60s [ OK ]\n" "${msg}"
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "\n TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+}
+
+print_log_test_results()
+{
+ if [ "$TESTS" != "none" ]; then
+ printf "\nTests passed: %3d\n" ${nsuccess}
+ printf "Tests failed: %3d\n" ${nfail}
+ fi
+}
+
+log_section()
+{
+ echo
+ echo "################################################################################"
+ echo "TEST SECTION: $*"
+ echo "################################################################################"
+}
+
+ip_expand_args()
+{
+ local nsname=$1
+ local nsarg=""
+
+ if [ "${nsname}" != "${INIT_NETNS_NAME}" ]; then
+ nsarg="-netns ${nsname}"
+ fi
+
+ echo "${nsarg}"
+}
+
+vrf_count()
+{
+ local nsname=$1
+ local nsarg="$(ip_expand_args ${nsname})"
+
+ ip ${nsarg} -o link show type vrf | wc -l
+}
+
+count_vrf_by_table_id()
+{
+ local nsname=$1
+ local tableid=$2
+ local nsarg="$(ip_expand_args ${nsname})"
+
+ ip ${nsarg} -d -o link show type vrf | grep "table ${tableid}" | wc -l
+}
+
+add_vrf()
+{
+ local nsname=$1
+ local vrfname=$2
+ local vrftable=$3
+ local nsarg="$(ip_expand_args ${nsname})"
+
+ ip ${nsarg} link add ${vrfname} type vrf table ${vrftable} &>/dev/null
+}
+
+add_vrf_and_check()
+{
+ local nsname=$1
+ local vrfname=$2
+ local vrftable=$3
+ local cnt
+ local rc
+
+ add_vrf ${nsname} ${vrfname} ${vrftable}; rc=$?
+
+ cnt=$(count_vrf_by_table_id ${nsname} ${vrftable})
+
+ log_test ${rc} 0 "${nsname}: add vrf ${vrfname}, ${cnt} vrfs for table ${vrftable}"
+}
+
+add_vrf_and_check_fail()
+{
+ local nsname=$1
+ local vrfname=$2
+ local vrftable=$3
+ local cnt
+ local rc
+
+ add_vrf ${nsname} ${vrfname} ${vrftable}; rc=$?
+
+ cnt=$(count_vrf_by_table_id ${nsname} ${vrftable})
+
+ log_test ${rc} 2 "${nsname}: CANNOT add vrf ${vrfname}, ${cnt} vrfs for table ${vrftable}"
+}
+
+del_vrf_and_check()
+{
+ local nsname=$1
+ local vrfname=$2
+ local nsarg="$(ip_expand_args ${nsname})"
+
+ ip ${nsarg} link del ${vrfname}
+ log_test $? 0 "${nsname}: remove vrf ${vrfname}"
+}
+
+config_vrf_and_check()
+{
+ local nsname=$1
+ local addr=$2
+ local vrfname=$3
+ local nsarg="$(ip_expand_args ${nsname})"
+
+ ip ${nsarg} link set dev ${vrfname} up && \
+ ip ${nsarg} addr add ${addr} dev ${vrfname}
+ log_test $? 0 "${nsname}: vrf ${vrfname} up, addr ${addr}"
+}
+
+read_strict_mode()
+{
+ local nsname=$1
+ local rval
+ local rc=0
+ local nsexec=""
+
+ if [ "${nsname}" != "${INIT_NETNS_NAME}" ]; then
+ # a custom network namespace is provided
+ nsexec="ip netns exec ${nsname}"
+ fi
+
+ rval="$(${nsexec} bash -c "cat /proc/sys/net/vrf/strict_mode" | \
+ grep -E "^[0-1]$")" &> /dev/null
+ if [ $? -ne 0 ]; then
+ # set errors
+ rval=255
+ rc=1
+ fi
+
+ # on success, rval can be only 0 or 1; on error, rval is equal to 255
+ echo ${rval}
+ return ${rc}
+}
+
+read_strict_mode_compare_and_check()
+{
+ local nsname=$1
+ local expected=$2
+ local res
+
+ res="$(read_strict_mode ${nsname})"
+ log_test ${res} ${expected} "${nsname}: check strict_mode=${res}"
+}
+
+set_strict_mode()
+{
+ local nsname=$1
+ local val=$2
+ local nsexec=""
+
+ if [ "${nsname}" != "${INIT_NETNS_NAME}" ]; then
+ # a custom network namespace is provided
+ nsexec="ip netns exec ${nsname}"
+ fi
+
+ ${nsexec} bash -c "echo ${val} >/proc/sys/net/vrf/strict_mode" &>/dev/null
+}
+
+enable_strict_mode()
+{
+ local nsname=$1
+
+ set_strict_mode ${nsname} 1
+}
+
+disable_strict_mode()
+{
+ local nsname=$1
+
+ set_strict_mode ${nsname} 0
+}
+
+disable_strict_mode_and_check()
+{
+ local nsname=$1
+
+ disable_strict_mode ${nsname}
+ log_test $? 0 "${nsname}: disable strict_mode (=0)"
+}
+
+enable_strict_mode_and_check()
+{
+ local nsname=$1
+
+ enable_strict_mode ${nsname}
+ log_test $? 0 "${nsname}: enable strict_mode (=1)"
+}
+
+enable_strict_mode_and_check_fail()
+{
+ local nsname=$1
+
+ enable_strict_mode ${nsname}
+ log_test $? 1 "${nsname}: CANNOT enable strict_mode"
+}
+
+strict_mode_check_default()
+{
+ local nsname=$1
+ local strictmode
+ local vrfcnt
+
+ vrfcnt=$(vrf_count ${nsname})
+ strictmode=$(read_strict_mode ${nsname})
+ log_test ${strictmode} 0 "${nsname}: strict_mode=0 by default, ${vrfcnt} vrfs"
+}
+
+setup()
+{
+ modprobe vrf
+
+ ip netns add testns
+ ip netns exec testns ip link set lo up
+}
+
+cleanup()
+{
+ ip netns del testns 2>/dev/null
+
+ ip link del vrf100 2>/dev/null
+ ip link del vrf101 2>/dev/null
+ ip link del vrf102 2>/dev/null
+
+ echo 0 >/proc/sys/net/vrf/strict_mode 2>/dev/null
+}
+
+vrf_strict_mode_tests_init()
+{
+ vrf_strict_mode_check_support init
+
+ strict_mode_check_default init
+
+ add_vrf_and_check init vrf100 100
+ config_vrf_and_check init 172.16.100.1/24 vrf100
+
+ enable_strict_mode_and_check init
+
+ add_vrf_and_check_fail init vrf101 100
+
+ disable_strict_mode_and_check init
+
+ add_vrf_and_check init vrf101 100
+ config_vrf_and_check init 172.16.101.1/24 vrf101
+
+ enable_strict_mode_and_check_fail init
+
+ del_vrf_and_check init vrf101
+
+ enable_strict_mode_and_check init
+
+ add_vrf_and_check init vrf102 102
+ config_vrf_and_check init 172.16.102.1/24 vrf102
+
+ # the strict_modle is enabled in the init
+}
+
+vrf_strict_mode_tests_testns()
+{
+ vrf_strict_mode_check_support testns
+
+ strict_mode_check_default testns
+
+ enable_strict_mode_and_check testns
+
+ add_vrf_and_check testns vrf100 100
+ config_vrf_and_check testns 10.0.100.1/24 vrf100
+
+ add_vrf_and_check_fail testns vrf101 100
+
+ add_vrf_and_check_fail testns vrf102 100
+
+ add_vrf_and_check testns vrf200 200
+
+ disable_strict_mode_and_check testns
+
+ add_vrf_and_check testns vrf101 100
+
+ add_vrf_and_check testns vrf102 100
+
+ #the strict_mode is disabled in the testns
+}
+
+vrf_strict_mode_tests_mix()
+{
+ read_strict_mode_compare_and_check init 1
+
+ read_strict_mode_compare_and_check testns 0
+
+ del_vrf_and_check testns vrf101
+
+ del_vrf_and_check testns vrf102
+
+ disable_strict_mode_and_check init
+
+ enable_strict_mode_and_check testns
+
+ enable_strict_mode_and_check init
+ enable_strict_mode_and_check init
+
+ disable_strict_mode_and_check testns
+ disable_strict_mode_and_check testns
+
+ read_strict_mode_compare_and_check init 1
+
+ read_strict_mode_compare_and_check testns 0
+}
+
+vrf_strict_mode_tests()
+{
+ log_section "VRF strict_mode test on init network namespace"
+ vrf_strict_mode_tests_init
+
+ log_section "VRF strict_mode test on testns network namespace"
+ vrf_strict_mode_tests_testns
+
+ log_section "VRF strict_mode test mixing init and testns network namespaces"
+ vrf_strict_mode_tests_mix
+}
+
+vrf_strict_mode_check_support()
+{
+ local nsname=$1
+ local output
+ local rc
+
+ output="$(lsmod | grep '^vrf' | awk '{print $1}')"
+ if [ -z "${output}" ]; then
+ modinfo vrf || return $?
+ fi
+
+ # we do not care about the value of the strict_mode; we only check if
+ # the strict_mode parameter is available or not.
+ read_strict_mode ${nsname} &>/dev/null; rc=$?
+ log_test ${rc} 0 "${nsname}: net.vrf.strict_mode is available"
+
+ return ${rc}
+}
+
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit 0
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+ echo "SKIP: Could not run test without ip tool"
+ exit 0
+fi
+
+modprobe vrf &>/dev/null
+if [ ! -e /proc/sys/net/vrf/strict_mode ]; then
+ echo "SKIP: vrf sysctl does not exist"
+ exit 0
+fi
+
+cleanup &> /dev/null
+
+setup
+vrf_strict_mode_tests
+cleanup
+
+print_log_test_results
+
+exit $ret
diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile
index a179f0dca8ce..a374e10ef506 100644
--- a/tools/testing/selftests/netfilter/Makefile
+++ b/tools/testing/selftests/netfilter/Makefile
@@ -4,7 +4,7 @@
TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \
conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \
nft_concat_range.sh nft_conntrack_helper.sh \
- nft_queue.sh
+ nft_queue.sh nft_meta.sh
LDLIBS = -lmnl
TEST_GEN_FILES = nf-queue
diff --git a/tools/testing/selftests/netfilter/nft_meta.sh b/tools/testing/selftests/netfilter/nft_meta.sh
new file mode 100755
index 000000000000..d250b84dd5bc
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nft_meta.sh
@@ -0,0 +1,124 @@
+#!/bin/bash
+
+# check iif/iifname/oifgroup/iiftype match.
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+sfx=$(mktemp -u "XXXXXXXX")
+ns0="ns0-$sfx"
+
+nft --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without nft tool"
+ exit $ksft_skip
+fi
+
+cleanup()
+{
+ ip netns del "$ns0"
+}
+
+ip netns add "$ns0"
+ip -net "$ns0" link set lo up
+ip -net "$ns0" addr add 127.0.0.1 dev lo
+
+trap cleanup EXIT
+
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+table inet filter {
+ counter iifcount {}
+ counter iifnamecount {}
+ counter iifgroupcount {}
+ counter iiftypecount {}
+ counter infproto4count {}
+ counter il4protocounter {}
+ counter imarkcounter {}
+
+ counter oifcount {}
+ counter oifnamecount {}
+ counter oifgroupcount {}
+ counter oiftypecount {}
+ counter onfproto4count {}
+ counter ol4protocounter {}
+ counter oskuidcounter {}
+ counter oskgidcounter {}
+ counter omarkcounter {}
+
+ chain input {
+ type filter hook input priority 0; policy accept;
+
+ meta iif lo counter name "iifcount"
+ meta iifname "lo" counter name "iifnamecount"
+ meta iifgroup "default" counter name "iifgroupcount"
+ meta iiftype "loopback" counter name "iiftypecount"
+ meta nfproto ipv4 counter name "infproto4count"
+ meta l4proto icmp counter name "il4protocounter"
+ meta mark 42 counter name "imarkcounter"
+ }
+
+ chain output {
+ type filter hook output priority 0; policy accept;
+ meta oif lo counter name "oifcount" counter
+ meta oifname "lo" counter name "oifnamecount"
+ meta oifgroup "default" counter name "oifgroupcount"
+ meta oiftype "loopback" counter name "oiftypecount"
+ meta nfproto ipv4 counter name "onfproto4count"
+ meta l4proto icmp counter name "ol4protocounter"
+ meta skuid 0 counter name "oskuidcounter"
+ meta skgid 0 counter name "oskgidcounter"
+ meta mark 42 counter name "omarkcounter"
+ }
+}
+EOF
+
+if [ $? -ne 0 ]; then
+ echo "SKIP: Could not add test ruleset"
+ exit $ksft_skip
+fi
+
+ret=0
+
+check_one_counter()
+{
+ local cname="$1"
+ local want="packets $2"
+ local verbose="$3"
+
+ cnt=$(ip netns exec "$ns0" nft list counter inet filter $cname | grep -q "$want")
+ if [ $? -ne 0 ];then
+ echo "FAIL: $cname, want \"$want\", got"
+ ret=1
+ ip netns exec "$ns0" nft list counter inet filter $counter
+ fi
+}
+
+check_lo_counters()
+{
+ local want="$1"
+ local verbose="$2"
+ local counter
+
+ for counter in iifcount iifnamecount iifgroupcount iiftypecount infproto4count \
+ oifcount oifnamecount oifgroupcount oiftypecount onfproto4count \
+ il4protocounter \
+ ol4protocounter \
+ ; do
+ check_one_counter "$counter" "$want" "$verbose"
+ done
+}
+
+check_lo_counters "0" false
+ip netns exec "$ns0" ping -q -c 1 127.0.0.1 -m 42 > /dev/null
+
+check_lo_counters "2" true
+
+check_one_counter oskuidcounter "1" true
+check_one_counter oskgidcounter "1" true
+check_one_counter imarkcounter "1" true
+check_one_counter omarkcounter "1" true
+
+if [ $ret -eq 0 ];then
+ echo "OK: nftables meta iif/oif counters at expected values"
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/pid_namespace/regression_enomem.c b/tools/testing/selftests/pid_namespace/regression_enomem.c
index 73d532556d17..7d84097ad45c 100644
--- a/tools/testing/selftests/pid_namespace/regression_enomem.c
+++ b/tools/testing/selftests/pid_namespace/regression_enomem.c
@@ -11,7 +11,6 @@
#include <syscall.h>
#include <sys/wait.h>
-#include "../kselftest.h"
#include "../kselftest_harness.h"
#include "../pidfd/pidfd.h"
diff --git a/tools/testing/selftests/pidfd/pidfd_getfd_test.c b/tools/testing/selftests/pidfd/pidfd_getfd_test.c
index 84b65ecccb04..7758c98be015 100644
--- a/tools/testing/selftests/pidfd/pidfd_getfd_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_getfd_test.c
@@ -18,7 +18,6 @@
#include <linux/kcmp.h>
#include "pidfd.h"
-#include "../kselftest.h"
#include "../kselftest_harness.h"
/*
diff --git a/tools/testing/selftests/pidfd/pidfd_setns_test.c b/tools/testing/selftests/pidfd/pidfd_setns_test.c
index 7a27dbe07e23..7dca1aa4672d 100644
--- a/tools/testing/selftests/pidfd/pidfd_setns_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_setns_test.c
@@ -20,7 +20,6 @@
#include "pidfd.h"
#include "../clone3/clone3_selftests.h"
-#include "../kselftest.h"
#include "../kselftest_harness.h"
enum {
diff --git a/tools/testing/selftests/pidfd/pidfd_test.c b/tools/testing/selftests/pidfd/pidfd_test.c
index 7aff2d3b42c0..c585aaa2acd8 100644
--- a/tools/testing/selftests/pidfd/pidfd_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_test.c
@@ -8,6 +8,7 @@
#include <sched.h>
#include <signal.h>
#include <stdio.h>
+#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <syscall.h>
@@ -27,6 +28,8 @@
#define MAX_EVENTS 5
+static bool have_pidfd_send_signal;
+
static pid_t pidfd_clone(int flags, int *pidfd, int (*fn)(void *))
{
size_t stack_size = 1024;
@@ -56,6 +59,13 @@ static int test_pidfd_send_signal_simple_success(void)
int pidfd, ret;
const char *test_name = "pidfd_send_signal send SIGUSR1";
+ if (!have_pidfd_send_signal) {
+ ksft_test_result_skip(
+ "%s test: pidfd_send_signal() syscall not supported\n",
+ test_name);
+ return 0;
+ }
+
pidfd = open("/proc/self", O_DIRECTORY | O_CLOEXEC);
if (pidfd < 0)
ksft_exit_fail_msg(
@@ -86,6 +96,13 @@ static int test_pidfd_send_signal_exited_fail(void)
pid_t pid;
const char *test_name = "pidfd_send_signal signal exited process";
+ if (!have_pidfd_send_signal) {
+ ksft_test_result_skip(
+ "%s test: pidfd_send_signal() syscall not supported\n",
+ test_name);
+ return 0;
+ }
+
pid = fork();
if (pid < 0)
ksft_exit_fail_msg("%s test: Failed to create new process\n",
@@ -137,16 +154,34 @@ static int test_pidfd_send_signal_recycled_pid_fail(void)
pid_t pid1;
const char *test_name = "pidfd_send_signal signal recycled pid";
+ if (!have_pidfd_send_signal) {
+ ksft_test_result_skip(
+ "%s test: pidfd_send_signal() syscall not supported\n",
+ test_name);
+ return 0;
+ }
+
ret = unshare(CLONE_NEWPID);
- if (ret < 0)
+ if (ret < 0) {
+ if (errno == EPERM) {
+ ksft_test_result_skip("%s test: Unsharing pid namespace not permitted\n",
+ test_name);
+ return 0;
+ }
ksft_exit_fail_msg("%s test: Failed to unshare pid namespace\n",
test_name);
+ }
ret = unshare(CLONE_NEWNS);
- if (ret < 0)
- ksft_exit_fail_msg(
- "%s test: Failed to unshare mount namespace\n",
- test_name);
+ if (ret < 0) {
+ if (errno == EPERM) {
+ ksft_test_result_skip("%s test: Unsharing mount namespace not permitted\n",
+ test_name);
+ return 0;
+ }
+ ksft_exit_fail_msg("%s test: Failed to unshare mount namespace\n",
+ test_name);
+ }
ret = mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0);
if (ret < 0)
@@ -325,15 +360,17 @@ static int test_pidfd_send_signal_syscall_support(void)
ret = sys_pidfd_send_signal(pidfd, 0, NULL, 0);
if (ret < 0) {
- if (errno == ENOSYS)
- ksft_exit_skip(
+ if (errno == ENOSYS) {
+ ksft_test_result_skip(
"%s test: pidfd_send_signal() syscall not supported\n",
test_name);
-
+ return 0;
+ }
ksft_exit_fail_msg("%s test: Failed to send signal\n",
test_name);
}
+ have_pidfd_send_signal = true;
close(pidfd);
ksft_test_result_pass(
"%s test: pidfd_send_signal() syscall is supported. Tests can be executed\n",
@@ -521,7 +558,7 @@ static void test_pidfd_poll_leader_exit(int use_waitpid)
int main(int argc, char **argv)
{
ksft_print_header();
- ksft_set_plan(4);
+ ksft_set_plan(8);
test_pidfd_poll_exec(0);
test_pidfd_poll_exec(1);
diff --git a/tools/testing/selftests/powerpc/alignment/alignment_handler.c b/tools/testing/selftests/powerpc/alignment/alignment_handler.c
index 0453c50c949c..55ef15184057 100644
--- a/tools/testing/selftests/powerpc/alignment/alignment_handler.c
+++ b/tools/testing/selftests/powerpc/alignment/alignment_handler.c
@@ -9,7 +9,17 @@
* This selftest exercises the powerpc alignment fault handler.
*
* We create two sets of source and destination buffers, one in regular memory,
- * the other cache-inhibited (we use /dev/fb0 for this).
+ * the other cache-inhibited (by default we use /dev/fb0 for this, but an
+ * alterative path for cache-inhibited memory may be provided).
+ *
+ * One way to get cache-inhibited memory is to use the "mem" kernel parameter
+ * to limit the kernel to less memory than actually exists. Addresses above
+ * the limit may still be accessed but will be treated as cache-inhibited. For
+ * example, if there is actually 4GB of memory and the parameter "mem=3GB" is
+ * used, memory from address 0xC0000000 onwards is treated as cache-inhibited.
+ * To access this region /dev/mem is used. The kernel should be configured
+ * without CONFIG_STRICT_DEVMEM. In this case use:
+ * ./alignment_handler /dev/mem 0xc0000000
*
* We initialise the source buffers, then use whichever set of load/store
* instructions is under test to copy bytes from the source buffers to the
@@ -48,11 +58,14 @@
#include <asm/cputable.h>
#include "utils.h"
+#include "instructions.h"
int bufsize;
int debug;
int testing;
volatile int gotsig;
+char *cipath = "/dev/fb0";
+long cioffset;
void sighandler(int sig, siginfo_t *info, void *ctx)
{
@@ -84,6 +97,17 @@ void sighandler(int sig, siginfo_t *info, void *ctx)
} \
rc |= do_test(#name, test_##name)
+#define TESTP(name, ld_op, st_op, ld_reg, st_reg) \
+ void test_##name(char *s, char *d) \
+ { \
+ asm volatile( \
+ ld_op(ld_reg, %0, 0, 0) \
+ st_op(st_reg, %1, 0, 0) \
+ :: "r"(s), "r"(d), "r"(0) \
+ : "memory", "vs0", "vs32", "r31"); \
+ } \
+ rc |= do_test(#name, test_##name)
+
#define LOAD_VSX_XFORM_TEST(op) TEST(op, op, stxvd2x, XFORM, 32, 32)
#define STORE_VSX_XFORM_TEST(op) TEST(op, lxvd2x, op, XFORM, 32, 32)
#define LOAD_VSX_DFORM_TEST(op) TEST(op, op, stxv, DFORM, 32, 32)
@@ -103,6 +127,17 @@ void sighandler(int sig, siginfo_t *info, void *ctx)
#define LOAD_FLOAT_XFORM_TEST(op) TEST(op, op, stfdx, XFORM, 0, 0)
#define STORE_FLOAT_XFORM_TEST(op) TEST(op, lfdx, op, XFORM, 0, 0)
+#define LOAD_MLS_PREFIX_TEST(op) TESTP(op, op, PSTD, 31, 31)
+#define STORE_MLS_PREFIX_TEST(op) TESTP(op, PLD, op, 31, 31)
+
+#define LOAD_8LS_PREFIX_TEST(op) TESTP(op, op, PSTD, 31, 31)
+#define STORE_8LS_PREFIX_TEST(op) TESTP(op, PLD, op, 31, 31)
+
+#define LOAD_FLOAT_MLS_PREFIX_TEST(op) TESTP(op, op, PSTFD, 0, 0)
+#define STORE_FLOAT_MLS_PREFIX_TEST(op) TESTP(op, PLFD, op, 0, 0)
+
+#define LOAD_VSX_8LS_PREFIX_TEST(op, tail) TESTP(op, op, PSTXV ## tail, 0, 32)
+#define STORE_VSX_8LS_PREFIX_TEST(op, tail) TESTP(op, PLXV ## tail, op, 32, 0)
/* FIXME: Unimplemented tests: */
// STORE_DFORM_TEST(stq) /* FIXME: need two registers for quad */
@@ -195,17 +230,18 @@ int do_test(char *test_name, void (*test_func)(char *, char *))
printf("\tDoing %s:\t", test_name);
- fd = open("/dev/fb0", O_RDWR);
+ fd = open(cipath, O_RDWR);
if (fd < 0) {
printf("\n");
- perror("Can't open /dev/fb0 now?");
+ perror("Can't open ci file now?");
return 1;
}
- ci0 = mmap(NULL, bufsize, PROT_WRITE, MAP_SHARED,
- fd, 0x0);
- ci1 = mmap(NULL, bufsize, PROT_WRITE, MAP_SHARED,
- fd, bufsize);
+ ci0 = mmap(NULL, bufsize, PROT_WRITE | PROT_READ, MAP_SHARED,
+ fd, cioffset);
+ ci1 = mmap(NULL, bufsize, PROT_WRITE | PROT_READ, MAP_SHARED,
+ fd, cioffset + bufsize);
+
if ((ci0 == MAP_FAILED) || (ci1 == MAP_FAILED)) {
printf("\n");
perror("mmap failed");
@@ -270,11 +306,11 @@ int do_test(char *test_name, void (*test_func)(char *, char *))
return rc;
}
-static bool can_open_fb0(void)
+static bool can_open_cifile(void)
{
int fd;
- fd = open("/dev/fb0", O_RDWR);
+ fd = open(cipath, O_RDWR);
if (fd < 0)
return false;
@@ -286,7 +322,7 @@ int test_alignment_handler_vsx_206(void)
{
int rc = 0;
- SKIP_IF(!can_open_fb0());
+ SKIP_IF(!can_open_cifile());
SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
printf("VSX: 2.06B\n");
@@ -304,7 +340,7 @@ int test_alignment_handler_vsx_207(void)
{
int rc = 0;
- SKIP_IF(!can_open_fb0());
+ SKIP_IF(!can_open_cifile());
SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
printf("VSX: 2.07B\n");
@@ -320,7 +356,7 @@ int test_alignment_handler_vsx_300(void)
{
int rc = 0;
- SKIP_IF(!can_open_fb0());
+ SKIP_IF(!can_open_cifile());
SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_00));
printf("VSX: 3.00B\n");
@@ -348,11 +384,30 @@ int test_alignment_handler_vsx_300(void)
return rc;
}
+int test_alignment_handler_vsx_prefix(void)
+{
+ int rc = 0;
+
+ SKIP_IF(!can_open_cifile());
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1));
+
+ printf("VSX: PREFIX\n");
+ LOAD_VSX_8LS_PREFIX_TEST(PLXSD, 0);
+ LOAD_VSX_8LS_PREFIX_TEST(PLXSSP, 0);
+ LOAD_VSX_8LS_PREFIX_TEST(PLXV0, 0);
+ LOAD_VSX_8LS_PREFIX_TEST(PLXV1, 1);
+ STORE_VSX_8LS_PREFIX_TEST(PSTXSD, 0);
+ STORE_VSX_8LS_PREFIX_TEST(PSTXSSP, 0);
+ STORE_VSX_8LS_PREFIX_TEST(PSTXV0, 0);
+ STORE_VSX_8LS_PREFIX_TEST(PSTXV1, 1);
+ return rc;
+}
+
int test_alignment_handler_integer(void)
{
int rc = 0;
- SKIP_IF(!can_open_fb0());
+ SKIP_IF(!can_open_cifile());
printf("Integer\n");
LOAD_DFORM_TEST(lbz);
@@ -408,7 +463,7 @@ int test_alignment_handler_integer_206(void)
{
int rc = 0;
- SKIP_IF(!can_open_fb0());
+ SKIP_IF(!can_open_cifile());
SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
printf("Integer: 2.06\n");
@@ -419,11 +474,32 @@ int test_alignment_handler_integer_206(void)
return rc;
}
+int test_alignment_handler_integer_prefix(void)
+{
+ int rc = 0;
+
+ SKIP_IF(!can_open_cifile());
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1));
+
+ printf("Integer: PREFIX\n");
+ LOAD_MLS_PREFIX_TEST(PLBZ);
+ LOAD_MLS_PREFIX_TEST(PLHZ);
+ LOAD_MLS_PREFIX_TEST(PLHA);
+ LOAD_MLS_PREFIX_TEST(PLWZ);
+ LOAD_8LS_PREFIX_TEST(PLWA);
+ LOAD_8LS_PREFIX_TEST(PLD);
+ STORE_MLS_PREFIX_TEST(PSTB);
+ STORE_MLS_PREFIX_TEST(PSTH);
+ STORE_MLS_PREFIX_TEST(PSTW);
+ STORE_8LS_PREFIX_TEST(PSTD);
+ return rc;
+}
+
int test_alignment_handler_vmx(void)
{
int rc = 0;
- SKIP_IF(!can_open_fb0());
+ SKIP_IF(!can_open_cifile());
SKIP_IF(!have_hwcap(PPC_FEATURE_HAS_ALTIVEC));
printf("VMX\n");
@@ -451,7 +527,7 @@ int test_alignment_handler_fp(void)
{
int rc = 0;
- SKIP_IF(!can_open_fb0());
+ SKIP_IF(!can_open_cifile());
printf("Floating point\n");
LOAD_FLOAT_DFORM_TEST(lfd);
@@ -479,7 +555,7 @@ int test_alignment_handler_fp_205(void)
{
int rc = 0;
- SKIP_IF(!can_open_fb0());
+ SKIP_IF(!can_open_cifile());
SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_05));
printf("Floating point: 2.05\n");
@@ -497,7 +573,7 @@ int test_alignment_handler_fp_206(void)
{
int rc = 0;
- SKIP_IF(!can_open_fb0());
+ SKIP_IF(!can_open_cifile());
SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
printf("Floating point: 2.06\n");
@@ -507,13 +583,32 @@ int test_alignment_handler_fp_206(void)
return rc;
}
+
+int test_alignment_handler_fp_prefix(void)
+{
+ int rc = 0;
+
+ SKIP_IF(!can_open_cifile());
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1));
+
+ printf("Floating point: PREFIX\n");
+ LOAD_FLOAT_DFORM_TEST(lfs);
+ LOAD_FLOAT_MLS_PREFIX_TEST(PLFS);
+ LOAD_FLOAT_MLS_PREFIX_TEST(PLFD);
+ STORE_FLOAT_MLS_PREFIX_TEST(PSTFS);
+ STORE_FLOAT_MLS_PREFIX_TEST(PSTFD);
+ return rc;
+}
+
void usage(char *prog)
{
- printf("Usage: %s [options]\n", prog);
+ printf("Usage: %s [options] [path [offset]]\n", prog);
printf(" -d Enable debug error output\n");
printf("\n");
- printf("This test requires a POWER8 or POWER9 CPU and a usable ");
- printf("framebuffer at /dev/fb0.\n");
+ printf("This test requires a POWER8, POWER9 or POWER10 CPU ");
+ printf("and either a usable framebuffer at /dev/fb0 or ");
+ printf("the path to usable cache inhibited memory and optional ");
+ printf("offset to be provided\n");
}
int main(int argc, char *argv[])
@@ -533,6 +628,13 @@ int main(int argc, char *argv[])
exit(1);
}
}
+ argc -= optind;
+ argv += optind;
+
+ if (argc > 0)
+ cipath = argv[0];
+ if (argc > 1)
+ cioffset = strtol(argv[1], 0, 0x10);
bufsize = getpagesize();
@@ -552,10 +654,14 @@ int main(int argc, char *argv[])
"test_alignment_handler_vsx_207");
rc |= test_harness(test_alignment_handler_vsx_300,
"test_alignment_handler_vsx_300");
+ rc |= test_harness(test_alignment_handler_vsx_prefix,
+ "test_alignment_handler_vsx_prefix");
rc |= test_harness(test_alignment_handler_integer,
"test_alignment_handler_integer");
rc |= test_harness(test_alignment_handler_integer_206,
"test_alignment_handler_integer_206");
+ rc |= test_harness(test_alignment_handler_integer_prefix,
+ "test_alignment_handler_integer_prefix");
rc |= test_harness(test_alignment_handler_vmx,
"test_alignment_handler_vmx");
rc |= test_harness(test_alignment_handler_fp,
@@ -564,5 +670,7 @@ int main(int argc, char *argv[])
"test_alignment_handler_fp_205");
rc |= test_harness(test_alignment_handler_fp_206,
"test_alignment_handler_fp_206");
+ rc |= test_harness(test_alignment_handler_fp_prefix,
+ "test_alignment_handler_fp_prefix");
return rc;
}
diff --git a/tools/testing/selftests/powerpc/benchmarks/context_switch.c b/tools/testing/selftests/powerpc/benchmarks/context_switch.c
index a2e8c9da7fa5..d50cc05df495 100644
--- a/tools/testing/selftests/powerpc/benchmarks/context_switch.c
+++ b/tools/testing/selftests/powerpc/benchmarks/context_switch.c
@@ -19,6 +19,7 @@
#include <limits.h>
#include <sys/time.h>
#include <sys/syscall.h>
+#include <sys/sysinfo.h>
#include <sys/types.h>
#include <sys/shm.h>
#include <linux/futex.h>
@@ -104,8 +105,9 @@ static void start_thread_on(void *(*fn)(void *), void *arg, unsigned long cpu)
static void start_process_on(void *(*fn)(void *), void *arg, unsigned long cpu)
{
- int pid;
- cpu_set_t cpuset;
+ int pid, ncpus;
+ cpu_set_t *cpuset;
+ size_t size;
pid = fork();
if (pid == -1) {
@@ -116,14 +118,23 @@ static void start_process_on(void *(*fn)(void *), void *arg, unsigned long cpu)
if (pid)
return;
- CPU_ZERO(&cpuset);
- CPU_SET(cpu, &cpuset);
+ ncpus = get_nprocs();
+ size = CPU_ALLOC_SIZE(ncpus);
+ cpuset = CPU_ALLOC(ncpus);
+ if (!cpuset) {
+ perror("malloc");
+ exit(1);
+ }
+ CPU_ZERO_S(size, cpuset);
+ CPU_SET_S(cpu, size, cpuset);
- if (sched_setaffinity(0, sizeof(cpuset), &cpuset)) {
+ if (sched_setaffinity(0, size, cpuset)) {
perror("sched_setaffinity");
+ CPU_FREE(cpuset);
exit(1);
}
+ CPU_FREE(cpuset);
fn(arg);
exit(0);
diff --git a/tools/testing/selftests/powerpc/eeh/eeh-functions.sh b/tools/testing/selftests/powerpc/eeh/eeh-functions.sh
index f52ed92b53e7..00dc32c0ed75 100755
--- a/tools/testing/selftests/powerpc/eeh/eeh-functions.sh
+++ b/tools/testing/selftests/powerpc/eeh/eeh-functions.sh
@@ -5,12 +5,17 @@ pe_ok() {
local dev="$1"
local path="/sys/bus/pci/devices/$dev/eeh_pe_state"
- if ! [ -e "$path" ] ; then
+ # if a driver doesn't support the error handling callbacks then the
+ # device is recovered by removing and re-probing it. This causes the
+ # sysfs directory to disappear so read the PE state once and squash
+ # any potential error messages
+ local eeh_state="$(cat $path 2>/dev/null)"
+ if [ -z "$eeh_state" ]; then
return 1;
fi
- local fw_state="$(cut -d' ' -f1 < $path)"
- local sw_state="$(cut -d' ' -f2 < $path)"
+ local fw_state="$(echo $eeh_state | cut -d' ' -f1)"
+ local sw_state="$(echo $eeh_state | cut -d' ' -f2)"
# If EEH_PE_ISOLATED or EEH_PE_RECOVERING are set then the PE is in an
# error state or being recovered. Either way, not ok.
diff --git a/tools/testing/selftests/powerpc/include/instructions.h b/tools/testing/selftests/powerpc/include/instructions.h
index f36061eb6f0f..4efa6314bd96 100644
--- a/tools/testing/selftests/powerpc/include/instructions.h
+++ b/tools/testing/selftests/powerpc/include/instructions.h
@@ -66,4 +66,81 @@ static inline int paste_last(void *i)
#define PPC_INST_PASTE __PASTE(0, 0, 0, 0)
#define PPC_INST_PASTE_LAST __PASTE(0, 0, 1, 1)
+/* This defines the prefixed load/store instructions */
+#ifdef __ASSEMBLY__
+# define stringify_in_c(...) __VA_ARGS__
+#else
+# define __stringify_in_c(...) #__VA_ARGS__
+# define stringify_in_c(...) __stringify_in_c(__VA_ARGS__) " "
+#endif
+
+#define __PPC_RA(a) (((a) & 0x1f) << 16)
+#define __PPC_RS(s) (((s) & 0x1f) << 21)
+#define __PPC_RT(t) __PPC_RS(t)
+#define __PPC_PREFIX_R(r) (((r) & 0x1) << 20)
+
+#define PPC_PREFIX_MLS 0x06000000
+#define PPC_PREFIX_8LS 0x04000000
+
+#define PPC_INST_LBZ 0x88000000
+#define PPC_INST_LHZ 0xa0000000
+#define PPC_INST_LHA 0xa8000000
+#define PPC_INST_LWZ 0x80000000
+#define PPC_INST_STB 0x98000000
+#define PPC_INST_STH 0xb0000000
+#define PPC_INST_STW 0x90000000
+#define PPC_INST_STD 0xf8000000
+#define PPC_INST_LFS 0xc0000000
+#define PPC_INST_LFD 0xc8000000
+#define PPC_INST_STFS 0xd0000000
+#define PPC_INST_STFD 0xd8000000
+
+#define PREFIX_MLS(instr, t, a, r, d) stringify_in_c(.balign 64, , 4;) \
+ stringify_in_c(.long PPC_PREFIX_MLS | \
+ __PPC_PREFIX_R(r) | \
+ (((d) >> 16) & 0x3ffff);) \
+ stringify_in_c(.long (instr) | \
+ __PPC_RT(t) | \
+ __PPC_RA(a) | \
+ ((d) & 0xffff);\n)
+
+#define PREFIX_8LS(instr, t, a, r, d) stringify_in_c(.balign 64, , 4;) \
+ stringify_in_c(.long PPC_PREFIX_8LS | \
+ __PPC_PREFIX_R(r) | \
+ (((d) >> 16) & 0x3ffff);) \
+ stringify_in_c(.long (instr) | \
+ __PPC_RT(t) | \
+ __PPC_RA(a) | \
+ ((d) & 0xffff);\n)
+
+/* Prefixed Integer Load/Store instructions */
+#define PLBZ(t, a, r, d) PREFIX_MLS(PPC_INST_LBZ, t, a, r, d)
+#define PLHZ(t, a, r, d) PREFIX_MLS(PPC_INST_LHZ, t, a, r, d)
+#define PLHA(t, a, r, d) PREFIX_MLS(PPC_INST_LHA, t, a, r, d)
+#define PLWZ(t, a, r, d) PREFIX_MLS(PPC_INST_LWZ, t, a, r, d)
+#define PLWA(t, a, r, d) PREFIX_8LS(0xa4000000, t, a, r, d)
+#define PLD(t, a, r, d) PREFIX_8LS(0xe4000000, t, a, r, d)
+#define PLQ(t, a, r, d) PREFIX_8LS(0xe0000000, t, a, r, d)
+#define PSTB(s, a, r, d) PREFIX_MLS(PPC_INST_STB, s, a, r, d)
+#define PSTH(s, a, r, d) PREFIX_MLS(PPC_INST_STH, s, a, r, d)
+#define PSTW(s, a, r, d) PREFIX_MLS(PPC_INST_STW, s, a, r, d)
+#define PSTD(s, a, r, d) PREFIX_8LS(0xf4000000, s, a, r, d)
+#define PSTQ(s, a, r, d) PREFIX_8LS(0xf0000000, s, a, r, d)
+
+/* Prefixed Floating-Point Load/Store Instructions */
+#define PLFS(frt, a, r, d) PREFIX_MLS(PPC_INST_LFS, frt, a, r, d)
+#define PLFD(frt, a, r, d) PREFIX_MLS(PPC_INST_LFD, frt, a, r, d)
+#define PSTFS(frs, a, r, d) PREFIX_MLS(PPC_INST_STFS, frs, a, r, d)
+#define PSTFD(frs, a, r, d) PREFIX_MLS(PPC_INST_STFD, frs, a, r, d)
+
+/* Prefixed VSX Load/Store Instructions */
+#define PLXSD(vrt, a, r, d) PREFIX_8LS(0xa8000000, vrt, a, r, d)
+#define PLXSSP(vrt, a, r, d) PREFIX_8LS(0xac000000, vrt, a, r, d)
+#define PLXV0(s, a, r, d) PREFIX_8LS(0xc8000000, s, a, r, d)
+#define PLXV1(s, a, r, d) PREFIX_8LS(0xcc000000, s, a, r, d)
+#define PSTXSD(vrs, a, r, d) PREFIX_8LS(0xb8000000, vrs, a, r, d)
+#define PSTXSSP(vrs, a, r, d) PREFIX_8LS(0xbc000000, vrs, a, r, d)
+#define PSTXV0(s, a, r, d) PREFIX_8LS(0xd8000000, s, a, r, d)
+#define PSTXV1(s, a, r, d) PREFIX_8LS(0xdc000000, s, a, r, d)
+
#endif /* _SELFTESTS_POWERPC_INSTRUCTIONS_H */
diff --git a/tools/testing/selftests/powerpc/include/pkeys.h b/tools/testing/selftests/powerpc/include/pkeys.h
new file mode 100644
index 000000000000..3312cb1b058d
--- /dev/null
+++ b/tools/testing/selftests/powerpc/include/pkeys.h
@@ -0,0 +1,136 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2020, Sandipan Das, IBM Corp.
+ */
+
+#ifndef _SELFTESTS_POWERPC_PKEYS_H
+#define _SELFTESTS_POWERPC_PKEYS_H
+
+#include <sys/mman.h>
+
+#include "reg.h"
+#include "utils.h"
+
+/*
+ * Older versions of libc use the Intel-specific access rights.
+ * Hence, override the definitions as they might be incorrect.
+ */
+#undef PKEY_DISABLE_ACCESS
+#define PKEY_DISABLE_ACCESS 0x3
+
+#undef PKEY_DISABLE_WRITE
+#define PKEY_DISABLE_WRITE 0x2
+
+#undef PKEY_DISABLE_EXECUTE
+#define PKEY_DISABLE_EXECUTE 0x4
+
+/* Older versions of libc do not not define this */
+#ifndef SEGV_PKUERR
+#define SEGV_PKUERR 4
+#endif
+
+#define SI_PKEY_OFFSET 0x20
+
+#define __NR_pkey_mprotect 386
+#define __NR_pkey_alloc 384
+#define __NR_pkey_free 385
+
+#define PKEY_BITS_PER_PKEY 2
+#define NR_PKEYS 32
+#define PKEY_BITS_MASK ((1UL << PKEY_BITS_PER_PKEY) - 1)
+
+inline unsigned long pkeyreg_get(void)
+{
+ return mfspr(SPRN_AMR);
+}
+
+inline void pkeyreg_set(unsigned long amr)
+{
+ set_amr(amr);
+}
+
+void pkey_set_rights(int pkey, unsigned long rights)
+{
+ unsigned long amr, shift;
+
+ shift = (NR_PKEYS - pkey - 1) * PKEY_BITS_PER_PKEY;
+ amr = pkeyreg_get();
+ amr &= ~(PKEY_BITS_MASK << shift);
+ amr |= (rights & PKEY_BITS_MASK) << shift;
+ pkeyreg_set(amr);
+}
+
+int sys_pkey_mprotect(void *addr, size_t len, int prot, int pkey)
+{
+ return syscall(__NR_pkey_mprotect, addr, len, prot, pkey);
+}
+
+int sys_pkey_alloc(unsigned long flags, unsigned long rights)
+{
+ return syscall(__NR_pkey_alloc, flags, rights);
+}
+
+int sys_pkey_free(int pkey)
+{
+ return syscall(__NR_pkey_free, pkey);
+}
+
+int pkeys_unsupported(void)
+{
+ bool hash_mmu = false;
+ int pkey;
+
+ /* Protection keys are currently supported on Hash MMU only */
+ FAIL_IF(using_hash_mmu(&hash_mmu));
+ SKIP_IF(!hash_mmu);
+
+ /* Check if the system call is supported */
+ pkey = sys_pkey_alloc(0, 0);
+ SKIP_IF(pkey < 0);
+ sys_pkey_free(pkey);
+
+ return 0;
+}
+
+int siginfo_pkey(siginfo_t *si)
+{
+ /*
+ * In older versions of libc, siginfo_t does not have si_pkey as
+ * a member.
+ */
+#ifdef si_pkey
+ return si->si_pkey;
+#else
+ return *((int *)(((char *) si) + SI_PKEY_OFFSET));
+#endif
+}
+
+#define pkey_rights(r) ({ \
+ static char buf[4] = "rwx"; \
+ unsigned int amr_bits; \
+ if ((r) & PKEY_DISABLE_EXECUTE) \
+ buf[2] = '-'; \
+ amr_bits = (r) & PKEY_BITS_MASK; \
+ if (amr_bits & PKEY_DISABLE_WRITE) \
+ buf[1] = '-'; \
+ if (amr_bits & PKEY_DISABLE_ACCESS & ~PKEY_DISABLE_WRITE) \
+ buf[0] = '-'; \
+ buf; \
+})
+
+unsigned long next_pkey_rights(unsigned long rights)
+{
+ if (rights == PKEY_DISABLE_ACCESS)
+ return PKEY_DISABLE_EXECUTE;
+ else if (rights == (PKEY_DISABLE_ACCESS | PKEY_DISABLE_EXECUTE))
+ return 0;
+
+ if ((rights & PKEY_BITS_MASK) == 0)
+ rights |= PKEY_DISABLE_WRITE;
+ else if ((rights & PKEY_BITS_MASK) == PKEY_DISABLE_WRITE)
+ rights |= PKEY_DISABLE_ACCESS;
+
+ return rights;
+}
+
+#endif /* _SELFTESTS_POWERPC_PKEYS_H */
diff --git a/tools/testing/selftests/powerpc/include/reg.h b/tools/testing/selftests/powerpc/include/reg.h
index 022c5076b2c5..c0f2742a3a59 100644
--- a/tools/testing/selftests/powerpc/include/reg.h
+++ b/tools/testing/selftests/powerpc/include/reg.h
@@ -57,6 +57,12 @@
#define SPRN_PPR 896 /* Program Priority Register */
#define SPRN_AMR 13 /* Authority Mask Register - problem state */
+#define set_amr(v) asm volatile("isync;" \
+ "mtspr " __stringify(SPRN_AMR) ",%0;" \
+ "isync" : \
+ : "r" ((unsigned long)(v)) \
+ : "memory")
+
/* TEXASR register bits */
#define TEXASR_FC 0xFE00000000000000
#define TEXASR_FP 0x0100000000000000
diff --git a/tools/testing/selftests/powerpc/include/utils.h b/tools/testing/selftests/powerpc/include/utils.h
index e089a0c30d9a..71d2924f5b8b 100644
--- a/tools/testing/selftests/powerpc/include/utils.h
+++ b/tools/testing/selftests/powerpc/include/utils.h
@@ -42,6 +42,16 @@ int perf_event_enable(int fd);
int perf_event_disable(int fd);
int perf_event_reset(int fd);
+#if !defined(__GLIBC_PREREQ) || !__GLIBC_PREREQ(2, 30)
+#include <unistd.h>
+#include <sys/syscall.h>
+
+static inline pid_t gettid(void)
+{
+ return syscall(SYS_gettid);
+}
+#endif
+
static inline bool have_hwcap(unsigned long ftr)
{
return ((unsigned long)get_auxv_entry(AT_HWCAP) & ftr) == ftr;
@@ -60,6 +70,7 @@ static inline bool have_hwcap2(unsigned long ftr2)
#endif
bool is_ppc64le(void);
+int using_hash_mmu(bool *using_hash);
/* Yes, this is evil */
#define FAIL_IF(x) \
@@ -71,6 +82,15 @@ do { \
} \
} while (0)
+#define FAIL_IF_EXIT(x) \
+do { \
+ if ((x)) { \
+ fprintf(stderr, \
+ "[FAIL] Test FAILED on line %d\n", __LINE__); \
+ _exit(1); \
+ } \
+} while (0)
+
/* The test harness uses this, yes it's gross */
#define MAGIC_SKIP_RETURN_VALUE 99
@@ -96,11 +116,20 @@ do { \
#define _str(s) #s
#define str(s) _str(s)
+#define sigsafe_err(msg) ({ \
+ ssize_t nbytes __attribute__((unused)); \
+ nbytes = write(STDERR_FILENO, msg, strlen(msg)); })
+
/* POWER9 feature */
#ifndef PPC_FEATURE2_ARCH_3_00
#define PPC_FEATURE2_ARCH_3_00 0x00800000
#endif
+/* POWER10 feature */
+#ifndef PPC_FEATURE2_ARCH_3_1
+#define PPC_FEATURE2_ARCH_3_1 0x00040000
+#endif
+
#if defined(__powerpc64__)
#define UCONTEXT_NIA(UC) (UC)->uc_mcontext.gp_regs[PT_NIP]
#define UCONTEXT_MSR(UC) (UC)->uc_mcontext.gp_regs[PT_MSR]
diff --git a/tools/testing/selftests/powerpc/math/.gitignore b/tools/testing/selftests/powerpc/math/.gitignore
index e31ca6f453ed..d0c23b2e4b60 100644
--- a/tools/testing/selftests/powerpc/math/.gitignore
+++ b/tools/testing/selftests/powerpc/math/.gitignore
@@ -6,3 +6,4 @@ vmx_preempt
fpu_signal
vmx_signal
vsx_preempt
+fpu_denormal
diff --git a/tools/testing/selftests/powerpc/math/Makefile b/tools/testing/selftests/powerpc/math/Makefile
index 11a10d7a2bbd..fcc91c205984 100644
--- a/tools/testing/selftests/powerpc/math/Makefile
+++ b/tools/testing/selftests/powerpc/math/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-TEST_GEN_PROGS := fpu_syscall fpu_preempt fpu_signal vmx_syscall vmx_preempt vmx_signal vsx_preempt
+TEST_GEN_PROGS := fpu_syscall fpu_preempt fpu_signal fpu_denormal vmx_syscall vmx_preempt vmx_signal vsx_preempt
top_srcdir = ../../../../..
include ../../lib.mk
@@ -11,9 +11,9 @@ $(OUTPUT)/fpu_syscall: fpu_asm.S
$(OUTPUT)/fpu_preempt: fpu_asm.S
$(OUTPUT)/fpu_signal: fpu_asm.S
-$(OUTPUT)/vmx_syscall: vmx_asm.S
-$(OUTPUT)/vmx_preempt: vmx_asm.S
-$(OUTPUT)/vmx_signal: vmx_asm.S
+$(OUTPUT)/vmx_syscall: vmx_asm.S ../utils.c
+$(OUTPUT)/vmx_preempt: vmx_asm.S ../utils.c
+$(OUTPUT)/vmx_signal: vmx_asm.S ../utils.c
$(OUTPUT)/vsx_preempt: CFLAGS += -mvsx
-$(OUTPUT)/vsx_preempt: vsx_asm.S
+$(OUTPUT)/vsx_preempt: vsx_asm.S ../utils.c
diff --git a/tools/testing/selftests/powerpc/math/fpu_denormal.c b/tools/testing/selftests/powerpc/math/fpu_denormal.c
new file mode 100644
index 000000000000..5f96682abaa8
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/fpu_denormal.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright IBM Corp. 2020
+ *
+ * This test attempts to cause a FP denormal exception on POWER8 CPUs. Unfortunately
+ * if the denormal handler is not configured or working properly, this can cause a bad
+ * crash in kernel mode when the kernel tries to save FP registers when the process
+ * exits.
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include "utils.h"
+
+static int test_denormal_fpu(void)
+{
+ unsigned int m32;
+ unsigned long m64;
+ volatile float f;
+ volatile double d;
+
+ /* try to induce lfs <denormal> ; stfd */
+
+ m32 = 0x00715fcf; /* random denormal */
+ memcpy((float *)&f, &m32, sizeof(f));
+ d = f;
+ memcpy(&m64, (double *)&d, sizeof(d));
+
+ FAIL_IF((long)(m64 != 0x380c57f3c0000000)); /* renormalised value */
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(test_denormal_fpu, "fpu_denormal");
+}
diff --git a/tools/testing/selftests/powerpc/math/vmx_preempt.c b/tools/testing/selftests/powerpc/math/vmx_preempt.c
index 2e059f154e77..6761d6ce30ec 100644
--- a/tools/testing/selftests/powerpc/math/vmx_preempt.c
+++ b/tools/testing/selftests/powerpc/math/vmx_preempt.c
@@ -57,6 +57,9 @@ int test_preempt_vmx(void)
int i, rc, threads;
pthread_t *tids;
+ // vcmpequd used in vmx_asm.S is v2.07
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+
threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR;
tids = malloc(threads * sizeof(pthread_t));
FAIL_IF(!tids);
diff --git a/tools/testing/selftests/powerpc/math/vmx_signal.c b/tools/testing/selftests/powerpc/math/vmx_signal.c
index 785a48e0976f..b340a5c4e79d 100644
--- a/tools/testing/selftests/powerpc/math/vmx_signal.c
+++ b/tools/testing/selftests/powerpc/math/vmx_signal.c
@@ -96,6 +96,9 @@ int test_signal_vmx(void)
void *rc_p;
pthread_t *tids;
+ // vcmpequd used in vmx_asm.S is v2.07
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+
threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR;
tids = malloc(threads * sizeof(pthread_t));
FAIL_IF(!tids);
diff --git a/tools/testing/selftests/powerpc/math/vmx_syscall.c b/tools/testing/selftests/powerpc/math/vmx_syscall.c
index 9ee293cc868e..03c78dfe3444 100644
--- a/tools/testing/selftests/powerpc/math/vmx_syscall.c
+++ b/tools/testing/selftests/powerpc/math/vmx_syscall.c
@@ -49,9 +49,14 @@ int test_vmx_syscall(void)
* Setup an environment with much context switching
*/
pid_t pid2;
- pid_t pid = fork();
+ pid_t pid;
int ret;
int child_ret;
+
+ // vcmpequd used in vmx_asm.S is v2.07
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+
+ pid = fork();
FAIL_IF(pid == -1);
pid2 = fork();
diff --git a/tools/testing/selftests/powerpc/math/vsx_preempt.c b/tools/testing/selftests/powerpc/math/vsx_preempt.c
index 63de9c6e2cd3..d1601bb889d4 100644
--- a/tools/testing/selftests/powerpc/math/vsx_preempt.c
+++ b/tools/testing/selftests/powerpc/math/vsx_preempt.c
@@ -92,6 +92,8 @@ int test_preempt_vsx(void)
int i, rc, threads;
pthread_t *tids;
+ SKIP_IF(!have_hwcap(PPC_FEATURE_HAS_VSX));
+
threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR;
tids = malloc(threads * sizeof(pthread_t));
FAIL_IF(!tids);
diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore
index 2ca523255b1b..91c775c23c66 100644
--- a/tools/testing/selftests/powerpc/mm/.gitignore
+++ b/tools/testing/selftests/powerpc/mm/.gitignore
@@ -2,9 +2,12 @@
hugetlb_vs_thp_test
subpage_prot
tempfile
-prot_sao
segv_errors
wild_bctr
large_vm_fork_separation
bad_accesses
tlbie_test
+pkey_exec_prot
+pkey_siginfo
+stack_expansion_ldst
+stack_expansion_signal
diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile
index b9103c4bb414..250ce172e0da 100644
--- a/tools/testing/selftests/powerpc/mm/Makefile
+++ b/tools/testing/selftests/powerpc/mm/Makefile
@@ -2,23 +2,31 @@
noarg:
$(MAKE) -C ../
-TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr \
- large_vm_fork_separation bad_accesses
+TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot segv_errors wild_bctr \
+ large_vm_fork_separation bad_accesses pkey_exec_prot \
+ pkey_siginfo stack_expansion_signal stack_expansion_ldst
+
TEST_GEN_PROGS_EXTENDED := tlbie_test
TEST_GEN_FILES := tempfile
top_srcdir = ../../../../..
include ../../lib.mk
-$(TEST_GEN_PROGS): ../harness.c
-
-$(OUTPUT)/prot_sao: ../utils.c
+$(TEST_GEN_PROGS): ../harness.c ../utils.c
$(OUTPUT)/wild_bctr: CFLAGS += -m64
$(OUTPUT)/large_vm_fork_separation: CFLAGS += -m64
$(OUTPUT)/bad_accesses: CFLAGS += -m64
+$(OUTPUT)/pkey_exec_prot: CFLAGS += -m64
+$(OUTPUT)/pkey_siginfo: CFLAGS += -m64
+
+$(OUTPUT)/stack_expansion_signal: ../utils.c ../pmu/lib.c
+
+$(OUTPUT)/stack_expansion_ldst: CFLAGS += -fno-stack-protector
+$(OUTPUT)/stack_expansion_ldst: ../utils.c
$(OUTPUT)/tempfile:
dd if=/dev/zero of=$@ bs=64k count=1
$(OUTPUT)/tlbie_test: LDLIBS += -lpthread
+$(OUTPUT)/pkey_siginfo: LDLIBS += -lpthread
diff --git a/tools/testing/selftests/powerpc/mm/bad_accesses.c b/tools/testing/selftests/powerpc/mm/bad_accesses.c
index adc465f499ef..a864ed7e2008 100644
--- a/tools/testing/selftests/powerpc/mm/bad_accesses.c
+++ b/tools/testing/selftests/powerpc/mm/bad_accesses.c
@@ -64,34 +64,6 @@ int bad_access(char *p, bool write)
return 0;
}
-static int using_hash_mmu(bool *using_hash)
-{
- char line[128];
- FILE *f;
- int rc;
-
- f = fopen("/proc/cpuinfo", "r");
- FAIL_IF(!f);
-
- rc = 0;
- while (fgets(line, sizeof(line), f) != NULL) {
- if (strcmp(line, "MMU : Hash\n") == 0) {
- *using_hash = true;
- goto out;
- }
-
- if (strcmp(line, "MMU : Radix\n") == 0) {
- *using_hash = false;
- goto out;
- }
- }
-
- rc = -1;
-out:
- fclose(f);
- return rc;
-}
-
static int test(void)
{
unsigned long i, j, addr, region_shift, page_shift, page_size;
diff --git a/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c b/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c
new file mode 100644
index 000000000000..9e5c7f3f498a
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c
@@ -0,0 +1,294 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Copyright 2020, Sandipan Das, IBM Corp.
+ *
+ * Test if applying execute protection on pages using memory
+ * protection keys works as expected.
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+
+#include <unistd.h>
+
+#include "pkeys.h"
+
+#define PPC_INST_NOP 0x60000000
+#define PPC_INST_TRAP 0x7fe00008
+#define PPC_INST_BLR 0x4e800020
+
+static volatile sig_atomic_t fault_pkey, fault_code, fault_type;
+static volatile sig_atomic_t remaining_faults;
+static volatile unsigned int *fault_addr;
+static unsigned long pgsize, numinsns;
+static unsigned int *insns;
+
+static void trap_handler(int signum, siginfo_t *sinfo, void *ctx)
+{
+ /* Check if this fault originated from the expected address */
+ if (sinfo->si_addr != (void *) fault_addr)
+ sigsafe_err("got a fault for an unexpected address\n");
+
+ _exit(1);
+}
+
+static void segv_handler(int signum, siginfo_t *sinfo, void *ctx)
+{
+ int signal_pkey;
+
+ signal_pkey = siginfo_pkey(sinfo);
+ fault_code = sinfo->si_code;
+
+ /* Check if this fault originated from the expected address */
+ if (sinfo->si_addr != (void *) fault_addr) {
+ sigsafe_err("got a fault for an unexpected address\n");
+ _exit(1);
+ }
+
+ /* Check if too many faults have occurred for a single test case */
+ if (!remaining_faults) {
+ sigsafe_err("got too many faults for the same address\n");
+ _exit(1);
+ }
+
+
+ /* Restore permissions in order to continue */
+ switch (fault_code) {
+ case SEGV_ACCERR:
+ if (mprotect(insns, pgsize, PROT_READ | PROT_WRITE)) {
+ sigsafe_err("failed to set access permissions\n");
+ _exit(1);
+ }
+ break;
+ case SEGV_PKUERR:
+ if (signal_pkey != fault_pkey) {
+ sigsafe_err("got a fault for an unexpected pkey\n");
+ _exit(1);
+ }
+
+ switch (fault_type) {
+ case PKEY_DISABLE_ACCESS:
+ pkey_set_rights(fault_pkey, 0);
+ break;
+ case PKEY_DISABLE_EXECUTE:
+ /*
+ * Reassociate the exec-only pkey with the region
+ * to be able to continue. Unlike AMR, we cannot
+ * set IAMR directly from userspace to restore the
+ * permissions.
+ */
+ if (mprotect(insns, pgsize, PROT_EXEC)) {
+ sigsafe_err("failed to set execute permissions\n");
+ _exit(1);
+ }
+ break;
+ default:
+ sigsafe_err("got a fault with an unexpected type\n");
+ _exit(1);
+ }
+ break;
+ default:
+ sigsafe_err("got a fault with an unexpected code\n");
+ _exit(1);
+ }
+
+ remaining_faults--;
+}
+
+static int test(void)
+{
+ struct sigaction segv_act, trap_act;
+ unsigned long rights;
+ int pkey, ret, i;
+
+ ret = pkeys_unsupported();
+ if (ret)
+ return ret;
+
+ /* Setup SIGSEGV handler */
+ segv_act.sa_handler = 0;
+ segv_act.sa_sigaction = segv_handler;
+ FAIL_IF(sigprocmask(SIG_SETMASK, 0, &segv_act.sa_mask) != 0);
+ segv_act.sa_flags = SA_SIGINFO;
+ segv_act.sa_restorer = 0;
+ FAIL_IF(sigaction(SIGSEGV, &segv_act, NULL) != 0);
+
+ /* Setup SIGTRAP handler */
+ trap_act.sa_handler = 0;
+ trap_act.sa_sigaction = trap_handler;
+ FAIL_IF(sigprocmask(SIG_SETMASK, 0, &trap_act.sa_mask) != 0);
+ trap_act.sa_flags = SA_SIGINFO;
+ trap_act.sa_restorer = 0;
+ FAIL_IF(sigaction(SIGTRAP, &trap_act, NULL) != 0);
+
+ /* Setup executable region */
+ pgsize = getpagesize();
+ numinsns = pgsize / sizeof(unsigned int);
+ insns = (unsigned int *) mmap(NULL, pgsize, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ FAIL_IF(insns == MAP_FAILED);
+
+ /* Write the instruction words */
+ for (i = 1; i < numinsns - 1; i++)
+ insns[i] = PPC_INST_NOP;
+
+ /*
+ * Set the first instruction as an unconditional trap. If
+ * the last write to this address succeeds, this should
+ * get overwritten by a no-op.
+ */
+ insns[0] = PPC_INST_TRAP;
+
+ /*
+ * Later, to jump to the executable region, we use a branch
+ * and link instruction (bctrl) which sets the return address
+ * automatically in LR. Use that to return back.
+ */
+ insns[numinsns - 1] = PPC_INST_BLR;
+
+ /* Allocate a pkey that restricts execution */
+ rights = PKEY_DISABLE_EXECUTE;
+ pkey = sys_pkey_alloc(0, rights);
+ FAIL_IF(pkey < 0);
+
+ /*
+ * Pick the first instruction's address from the executable
+ * region.
+ */
+ fault_addr = insns;
+
+ /* The following two cases will avoid SEGV_PKUERR */
+ fault_type = -1;
+ fault_pkey = -1;
+
+ /*
+ * Read an instruction word from the address when AMR bits
+ * are not set i.e. the pkey permits both read and write
+ * access.
+ *
+ * This should not generate a fault as having PROT_EXEC
+ * implies PROT_READ on GNU systems. The pkey currently
+ * restricts execution only based on the IAMR bits. The
+ * AMR bits are cleared.
+ */
+ remaining_faults = 0;
+ FAIL_IF(sys_pkey_mprotect(insns, pgsize, PROT_EXEC, pkey) != 0);
+ printf("read from %p, pkey permissions are %s\n", fault_addr,
+ pkey_rights(rights));
+ i = *fault_addr;
+ FAIL_IF(remaining_faults != 0);
+
+ /*
+ * Write an instruction word to the address when AMR bits
+ * are not set i.e. the pkey permits both read and write
+ * access.
+ *
+ * This should generate an access fault as having just
+ * PROT_EXEC also restricts writes. The pkey currently
+ * restricts execution only based on the IAMR bits. The
+ * AMR bits are cleared.
+ */
+ remaining_faults = 1;
+ FAIL_IF(sys_pkey_mprotect(insns, pgsize, PROT_EXEC, pkey) != 0);
+ printf("write to %p, pkey permissions are %s\n", fault_addr,
+ pkey_rights(rights));
+ *fault_addr = PPC_INST_TRAP;
+ FAIL_IF(remaining_faults != 0 || fault_code != SEGV_ACCERR);
+
+ /* The following three cases will generate SEGV_PKUERR */
+ rights |= PKEY_DISABLE_ACCESS;
+ fault_type = PKEY_DISABLE_ACCESS;
+ fault_pkey = pkey;
+
+ /*
+ * Read an instruction word from the address when AMR bits
+ * are set i.e. the pkey permits neither read nor write
+ * access.
+ *
+ * This should generate a pkey fault based on AMR bits only
+ * as having PROT_EXEC implicitly allows reads.
+ */
+ remaining_faults = 1;
+ FAIL_IF(sys_pkey_mprotect(insns, pgsize, PROT_EXEC, pkey) != 0);
+ pkey_set_rights(pkey, rights);
+ printf("read from %p, pkey permissions are %s\n", fault_addr,
+ pkey_rights(rights));
+ i = *fault_addr;
+ FAIL_IF(remaining_faults != 0 || fault_code != SEGV_PKUERR);
+
+ /*
+ * Write an instruction word to the address when AMR bits
+ * are set i.e. the pkey permits neither read nor write
+ * access.
+ *
+ * This should generate two faults. First, a pkey fault
+ * based on AMR bits and then an access fault since
+ * PROT_EXEC does not allow writes.
+ */
+ remaining_faults = 2;
+ FAIL_IF(sys_pkey_mprotect(insns, pgsize, PROT_EXEC, pkey) != 0);
+ pkey_set_rights(pkey, rights);
+ printf("write to %p, pkey permissions are %s\n", fault_addr,
+ pkey_rights(rights));
+ *fault_addr = PPC_INST_NOP;
+ FAIL_IF(remaining_faults != 0 || fault_code != SEGV_ACCERR);
+
+ /* Free the current pkey */
+ sys_pkey_free(pkey);
+
+ rights = 0;
+ do {
+ /*
+ * Allocate pkeys with all valid combinations of read,
+ * write and execute restrictions.
+ */
+ pkey = sys_pkey_alloc(0, rights);
+ FAIL_IF(pkey < 0);
+
+ /*
+ * Jump to the executable region. AMR bits may or may not
+ * be set but they should not affect execution.
+ *
+ * This should generate pkey faults based on IAMR bits which
+ * may be set to restrict execution.
+ *
+ * The first iteration also checks if the overwrite of the
+ * first instruction word from a trap to a no-op succeeded.
+ */
+ fault_pkey = pkey;
+ fault_type = -1;
+ remaining_faults = 0;
+ if (rights & PKEY_DISABLE_EXECUTE) {
+ fault_type = PKEY_DISABLE_EXECUTE;
+ remaining_faults = 1;
+ }
+
+ FAIL_IF(sys_pkey_mprotect(insns, pgsize, PROT_EXEC, pkey) != 0);
+ printf("execute at %p, pkey permissions are %s\n", fault_addr,
+ pkey_rights(rights));
+ asm volatile("mtctr %0; bctrl" : : "r"(insns));
+ FAIL_IF(remaining_faults != 0);
+ if (rights & PKEY_DISABLE_EXECUTE)
+ FAIL_IF(fault_code != SEGV_PKUERR);
+
+ /* Free the current pkey */
+ sys_pkey_free(pkey);
+
+ /* Find next valid combination of pkey rights */
+ rights = next_pkey_rights(rights);
+ } while (rights);
+
+ /* Cleanup */
+ munmap((void *) insns, pgsize);
+
+ return 0;
+}
+
+int main(void)
+{
+ test_harness(test, "pkey_exec_prot");
+}
diff --git a/tools/testing/selftests/powerpc/mm/pkey_siginfo.c b/tools/testing/selftests/powerpc/mm/pkey_siginfo.c
new file mode 100644
index 000000000000..4f815d7c1214
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/pkey_siginfo.c
@@ -0,0 +1,333 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2020, Sandipan Das, IBM Corp.
+ *
+ * Test if the signal information reports the correct memory protection
+ * key upon getting a key access violation fault for a page that was
+ * attempted to be protected by two different keys from two competing
+ * threads at the same time.
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+
+#include <unistd.h>
+#include <pthread.h>
+#include <sys/mman.h>
+
+#include "pkeys.h"
+
+#define PPC_INST_NOP 0x60000000
+#define PPC_INST_BLR 0x4e800020
+#define PROT_RWX (PROT_READ | PROT_WRITE | PROT_EXEC)
+
+#define NUM_ITERATIONS 1000000
+
+static volatile sig_atomic_t perm_pkey, rest_pkey;
+static volatile sig_atomic_t rights, fault_count;
+static volatile unsigned int *volatile fault_addr;
+static pthread_barrier_t iteration_barrier;
+
+static void segv_handler(int signum, siginfo_t *sinfo, void *ctx)
+{
+ void *pgstart;
+ size_t pgsize;
+ int pkey;
+
+ pkey = siginfo_pkey(sinfo);
+
+ /* Check if this fault originated from a pkey access violation */
+ if (sinfo->si_code != SEGV_PKUERR) {
+ sigsafe_err("got a fault for an unexpected reason\n");
+ _exit(1);
+ }
+
+ /* Check if this fault originated from the expected address */
+ if (sinfo->si_addr != (void *) fault_addr) {
+ sigsafe_err("got a fault for an unexpected address\n");
+ _exit(1);
+ }
+
+ /* Check if this fault originated from the restrictive pkey */
+ if (pkey != rest_pkey) {
+ sigsafe_err("got a fault for an unexpected pkey\n");
+ _exit(1);
+ }
+
+ /* Check if too many faults have occurred for the same iteration */
+ if (fault_count > 0) {
+ sigsafe_err("got too many faults for the same address\n");
+ _exit(1);
+ }
+
+ pgsize = getpagesize();
+ pgstart = (void *) ((unsigned long) fault_addr & ~(pgsize - 1));
+
+ /*
+ * If the current fault occurred due to lack of execute rights,
+ * reassociate the page with the exec-only pkey since execute
+ * rights cannot be changed directly for the faulting pkey as
+ * IAMR is inaccessible from userspace.
+ *
+ * Otherwise, if the current fault occurred due to lack of
+ * read-write rights, change the AMR permission bits for the
+ * pkey.
+ *
+ * This will let the test continue.
+ */
+ if (rights == PKEY_DISABLE_EXECUTE &&
+ mprotect(pgstart, pgsize, PROT_EXEC))
+ _exit(1);
+ else
+ pkey_set_rights(pkey, 0);
+
+ fault_count++;
+}
+
+struct region {
+ unsigned long rights;
+ unsigned int *base;
+ size_t size;
+};
+
+static void *protect(void *p)
+{
+ unsigned long rights;
+ unsigned int *base;
+ size_t size;
+ int tid, i;
+
+ tid = gettid();
+ base = ((struct region *) p)->base;
+ size = ((struct region *) p)->size;
+ FAIL_IF_EXIT(!base);
+
+ /* No read, write and execute restrictions */
+ rights = 0;
+
+ printf("tid %d, pkey permissions are %s\n", tid, pkey_rights(rights));
+
+ /* Allocate the permissive pkey */
+ perm_pkey = sys_pkey_alloc(0, rights);
+ FAIL_IF_EXIT(perm_pkey < 0);
+
+ /*
+ * Repeatedly try to protect the common region with a permissive
+ * pkey
+ */
+ for (i = 0; i < NUM_ITERATIONS; i++) {
+ /*
+ * Wait until the other thread has finished allocating the
+ * restrictive pkey or until the next iteration has begun
+ */
+ pthread_barrier_wait(&iteration_barrier);
+
+ /* Try to associate the permissive pkey with the region */
+ FAIL_IF_EXIT(sys_pkey_mprotect(base, size, PROT_RWX,
+ perm_pkey));
+ }
+
+ /* Free the permissive pkey */
+ sys_pkey_free(perm_pkey);
+
+ return NULL;
+}
+
+static void *protect_access(void *p)
+{
+ size_t size, numinsns;
+ unsigned int *base;
+ int tid, i;
+
+ tid = gettid();
+ base = ((struct region *) p)->base;
+ size = ((struct region *) p)->size;
+ rights = ((struct region *) p)->rights;
+ numinsns = size / sizeof(base[0]);
+ FAIL_IF_EXIT(!base);
+
+ /* Allocate the restrictive pkey */
+ rest_pkey = sys_pkey_alloc(0, rights);
+ FAIL_IF_EXIT(rest_pkey < 0);
+
+ printf("tid %d, pkey permissions are %s\n", tid, pkey_rights(rights));
+ printf("tid %d, %s randomly in range [%p, %p]\n", tid,
+ (rights == PKEY_DISABLE_EXECUTE) ? "execute" :
+ (rights == PKEY_DISABLE_WRITE) ? "write" : "read",
+ base, base + numinsns);
+
+ /*
+ * Repeatedly try to protect the common region with a restrictive
+ * pkey and read, write or execute from it
+ */
+ for (i = 0; i < NUM_ITERATIONS; i++) {
+ /*
+ * Wait until the other thread has finished allocating the
+ * permissive pkey or until the next iteration has begun
+ */
+ pthread_barrier_wait(&iteration_barrier);
+
+ /* Try to associate the restrictive pkey with the region */
+ FAIL_IF_EXIT(sys_pkey_mprotect(base, size, PROT_RWX,
+ rest_pkey));
+
+ /* Choose a random instruction word address from the region */
+ fault_addr = base + (rand() % numinsns);
+ fault_count = 0;
+
+ switch (rights) {
+ /* Read protection test */
+ case PKEY_DISABLE_ACCESS:
+ /*
+ * Read an instruction word from the region and
+ * verify if it has not been overwritten to
+ * something unexpected
+ */
+ FAIL_IF_EXIT(*fault_addr != PPC_INST_NOP &&
+ *fault_addr != PPC_INST_BLR);
+ break;
+
+ /* Write protection test */
+ case PKEY_DISABLE_WRITE:
+ /*
+ * Write an instruction word to the region and
+ * verify if the overwrite has succeeded
+ */
+ *fault_addr = PPC_INST_BLR;
+ FAIL_IF_EXIT(*fault_addr != PPC_INST_BLR);
+ break;
+
+ /* Execute protection test */
+ case PKEY_DISABLE_EXECUTE:
+ /* Jump to the region and execute instructions */
+ asm volatile(
+ "mtctr %0; bctrl"
+ : : "r"(fault_addr) : "ctr", "lr");
+ break;
+ }
+
+ /*
+ * Restore the restrictions originally imposed by the
+ * restrictive pkey as the signal handler would have
+ * cleared out the corresponding AMR bits
+ */
+ pkey_set_rights(rest_pkey, rights);
+ }
+
+ /* Free restrictive pkey */
+ sys_pkey_free(rest_pkey);
+
+ return NULL;
+}
+
+static void reset_pkeys(unsigned long rights)
+{
+ int pkeys[NR_PKEYS], i;
+
+ /* Exhaustively allocate all available pkeys */
+ for (i = 0; i < NR_PKEYS; i++)
+ pkeys[i] = sys_pkey_alloc(0, rights);
+
+ /* Free all allocated pkeys */
+ for (i = 0; i < NR_PKEYS; i++)
+ sys_pkey_free(pkeys[i]);
+}
+
+static int test(void)
+{
+ pthread_t prot_thread, pacc_thread;
+ struct sigaction act;
+ pthread_attr_t attr;
+ size_t numinsns;
+ struct region r;
+ int ret, i;
+
+ srand(time(NULL));
+ ret = pkeys_unsupported();
+ if (ret)
+ return ret;
+
+ /* Allocate the region */
+ r.size = getpagesize();
+ r.base = mmap(NULL, r.size, PROT_RWX,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ FAIL_IF(r.base == MAP_FAILED);
+
+ /*
+ * Fill the region with no-ops with a branch at the end
+ * for returning to the caller
+ */
+ numinsns = r.size / sizeof(r.base[0]);
+ for (i = 0; i < numinsns - 1; i++)
+ r.base[i] = PPC_INST_NOP;
+ r.base[i] = PPC_INST_BLR;
+
+ /* Setup SIGSEGV handler */
+ act.sa_handler = 0;
+ act.sa_sigaction = segv_handler;
+ FAIL_IF(sigprocmask(SIG_SETMASK, 0, &act.sa_mask) != 0);
+ act.sa_flags = SA_SIGINFO;
+ act.sa_restorer = 0;
+ FAIL_IF(sigaction(SIGSEGV, &act, NULL) != 0);
+
+ /*
+ * For these tests, the parent process should clear all bits of
+ * AMR and IAMR, i.e. impose no restrictions, for all available
+ * pkeys. This will be the base for the initial AMR and IAMR
+ * values for all the test thread pairs.
+ *
+ * If the AMR and IAMR bits of all available pkeys are cleared
+ * before running the tests and a fault is generated when
+ * attempting to read, write or execute instructions from a
+ * pkey protected region, the pkey responsible for this must be
+ * the one from the protect-and-access thread since the other
+ * one is fully permissive. Despite that, if the pkey reported
+ * by siginfo is not the restrictive pkey, then there must be a
+ * kernel bug.
+ */
+ reset_pkeys(0);
+
+ /* Setup barrier for protect and protect-and-access threads */
+ FAIL_IF(pthread_attr_init(&attr) != 0);
+ FAIL_IF(pthread_barrier_init(&iteration_barrier, NULL, 2) != 0);
+
+ /* Setup and start protect and protect-and-read threads */
+ puts("starting thread pair (protect, protect-and-read)");
+ r.rights = PKEY_DISABLE_ACCESS;
+ FAIL_IF(pthread_create(&prot_thread, &attr, &protect, &r) != 0);
+ FAIL_IF(pthread_create(&pacc_thread, &attr, &protect_access, &r) != 0);
+ FAIL_IF(pthread_join(prot_thread, NULL) != 0);
+ FAIL_IF(pthread_join(pacc_thread, NULL) != 0);
+
+ /* Setup and start protect and protect-and-write threads */
+ puts("starting thread pair (protect, protect-and-write)");
+ r.rights = PKEY_DISABLE_WRITE;
+ FAIL_IF(pthread_create(&prot_thread, &attr, &protect, &r) != 0);
+ FAIL_IF(pthread_create(&pacc_thread, &attr, &protect_access, &r) != 0);
+ FAIL_IF(pthread_join(prot_thread, NULL) != 0);
+ FAIL_IF(pthread_join(pacc_thread, NULL) != 0);
+
+ /* Setup and start protect and protect-and-execute threads */
+ puts("starting thread pair (protect, protect-and-execute)");
+ r.rights = PKEY_DISABLE_EXECUTE;
+ FAIL_IF(pthread_create(&prot_thread, &attr, &protect, &r) != 0);
+ FAIL_IF(pthread_create(&pacc_thread, &attr, &protect_access, &r) != 0);
+ FAIL_IF(pthread_join(prot_thread, NULL) != 0);
+ FAIL_IF(pthread_join(pacc_thread, NULL) != 0);
+
+ /* Cleanup */
+ FAIL_IF(pthread_attr_destroy(&attr) != 0);
+ FAIL_IF(pthread_barrier_destroy(&iteration_barrier) != 0);
+ munmap(r.base, r.size);
+
+ return 0;
+}
+
+int main(void)
+{
+ test_harness(test, "pkey_siginfo");
+}
diff --git a/tools/testing/selftests/powerpc/mm/prot_sao.c b/tools/testing/selftests/powerpc/mm/prot_sao.c
deleted file mode 100644
index e2eed65b7735..000000000000
--- a/tools/testing/selftests/powerpc/mm/prot_sao.c
+++ /dev/null
@@ -1,42 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright 2016, Michael Ellerman, IBM Corp.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/mman.h>
-
-#include <asm/cputable.h>
-
-#include "utils.h"
-
-#define SIZE (64 * 1024)
-
-int test_prot_sao(void)
-{
- char *p;
-
- /* 2.06 or later should support SAO */
- SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
-
- /*
- * Ensure we can ask for PROT_SAO.
- * We can't really verify that it does the right thing, but at least we
- * confirm the kernel will accept it.
- */
- p = mmap(NULL, SIZE, PROT_READ | PROT_WRITE | PROT_SAO,
- MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
- FAIL_IF(p == MAP_FAILED);
-
- /* Write to the mapping, to at least cause a fault */
- memset(p, 0xaa, SIZE);
-
- return 0;
-}
-
-int main(void)
-{
- return test_harness(test_prot_sao, "prot-sao");
-}
diff --git a/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c b/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c
new file mode 100644
index 000000000000..ed9143990888
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c
@@ -0,0 +1,202 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test that loads/stores expand the stack segment, or trigger a SEGV, in
+ * various conditions.
+ *
+ * Based on test code by Tom Lane.
+ */
+
+#undef NDEBUG
+#include <assert.h>
+
+#include <err.h>
+#include <errno.h>
+#include <stdio.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/resource.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#define _KB (1024)
+#define _MB (1024 * 1024)
+
+volatile char *stack_top_ptr;
+volatile unsigned long stack_top_sp;
+volatile char c;
+
+enum access_type {
+ LOAD,
+ STORE,
+};
+
+/*
+ * Consume stack until the stack pointer is below @target_sp, then do an access
+ * (load or store) at offset @delta from either the base of the stack or the
+ * current stack pointer.
+ */
+__attribute__ ((noinline))
+int consume_stack(unsigned long target_sp, unsigned long stack_high, int delta, enum access_type type)
+{
+ unsigned long target;
+ char stack_cur;
+
+ if ((unsigned long)&stack_cur > target_sp)
+ return consume_stack(target_sp, stack_high, delta, type);
+ else {
+ // We don't really need this, but without it GCC might not
+ // generate a recursive call above.
+ stack_top_ptr = &stack_cur;
+
+#ifdef __powerpc__
+ asm volatile ("mr %[sp], %%r1" : [sp] "=r" (stack_top_sp));
+#else
+ asm volatile ("mov %%rsp, %[sp]" : [sp] "=r" (stack_top_sp));
+#endif
+ target = stack_high - delta + 1;
+ volatile char *p = (char *)target;
+
+ if (type == STORE)
+ *p = c;
+ else
+ c = *p;
+
+ // Do something to prevent the stack frame being popped prior to
+ // our access above.
+ getpid();
+ }
+
+ return 0;
+}
+
+static int search_proc_maps(char *needle, unsigned long *low, unsigned long *high)
+{
+ unsigned long start, end;
+ static char buf[4096];
+ char name[128];
+ FILE *f;
+ int rc;
+
+ f = fopen("/proc/self/maps", "r");
+ if (!f) {
+ perror("fopen");
+ return -1;
+ }
+
+ while (fgets(buf, sizeof(buf), f)) {
+ rc = sscanf(buf, "%lx-%lx %*c%*c%*c%*c %*x %*d:%*d %*d %127s\n",
+ &start, &end, name);
+ if (rc == 2)
+ continue;
+
+ if (rc != 3) {
+ printf("sscanf errored\n");
+ rc = -1;
+ break;
+ }
+
+ if (strstr(name, needle)) {
+ *low = start;
+ *high = end - 1;
+ rc = 0;
+ break;
+ }
+ }
+
+ fclose(f);
+
+ return rc;
+}
+
+int child(unsigned int stack_used, int delta, enum access_type type)
+{
+ unsigned long low, stack_high;
+
+ assert(search_proc_maps("[stack]", &low, &stack_high) == 0);
+
+ assert(consume_stack(stack_high - stack_used, stack_high, delta, type) == 0);
+
+ printf("Access OK: %s delta %-7d used size 0x%06x stack high 0x%lx top_ptr %p top sp 0x%lx actual used 0x%lx\n",
+ type == LOAD ? "load" : "store", delta, stack_used, stack_high,
+ stack_top_ptr, stack_top_sp, stack_high - stack_top_sp + 1);
+
+ return 0;
+}
+
+static int test_one(unsigned int stack_used, int delta, enum access_type type)
+{
+ pid_t pid;
+ int rc;
+
+ pid = fork();
+ if (pid == 0)
+ exit(child(stack_used, delta, type));
+
+ assert(waitpid(pid, &rc, 0) != -1);
+
+ if (WIFEXITED(rc) && WEXITSTATUS(rc) == 0)
+ return 0;
+
+ // We don't expect a non-zero exit that's not a signal
+ assert(!WIFEXITED(rc));
+
+ printf("Faulted: %s delta %-7d used size 0x%06x signal %d\n",
+ type == LOAD ? "load" : "store", delta, stack_used,
+ WTERMSIG(rc));
+
+ return 1;
+}
+
+// This is fairly arbitrary but is well below any of the targets below,
+// so that the delta between the stack pointer and the target is large.
+#define DEFAULT_SIZE (32 * _KB)
+
+static void test_one_type(enum access_type type, unsigned long page_size, unsigned long rlim_cur)
+{
+ unsigned long delta;
+
+ // We should be able to access anywhere within the rlimit
+ for (delta = page_size; delta <= rlim_cur; delta += page_size)
+ assert(test_one(DEFAULT_SIZE, delta, type) == 0);
+
+ assert(test_one(DEFAULT_SIZE, rlim_cur, type) == 0);
+
+ // But if we go past the rlimit it should fail
+ assert(test_one(DEFAULT_SIZE, rlim_cur + 1, type) != 0);
+}
+
+static int test(void)
+{
+ unsigned long page_size;
+ struct rlimit rlimit;
+
+ page_size = getpagesize();
+ getrlimit(RLIMIT_STACK, &rlimit);
+ printf("Stack rlimit is 0x%lx\n", rlimit.rlim_cur);
+
+ printf("Testing loads ...\n");
+ test_one_type(LOAD, page_size, rlimit.rlim_cur);
+ printf("Testing stores ...\n");
+ test_one_type(STORE, page_size, rlimit.rlim_cur);
+
+ printf("All OK\n");
+
+ return 0;
+}
+
+#ifdef __powerpc__
+#include "utils.h"
+
+int main(void)
+{
+ return test_harness(test, "stack_expansion_ldst");
+}
+#else
+int main(void)
+{
+ return test();
+}
+#endif
diff --git a/tools/testing/selftests/powerpc/mm/stack_expansion_signal.c b/tools/testing/selftests/powerpc/mm/stack_expansion_signal.c
new file mode 100644
index 000000000000..c8b32a29e274
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/stack_expansion_signal.c
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test that signal delivery is able to expand the stack segment without
+ * triggering a SEGV.
+ *
+ * Based on test code by Tom Lane.
+ */
+
+#include <err.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "../pmu/lib.h"
+#include "utils.h"
+
+#define _KB (1024)
+#define _MB (1024 * 1024)
+
+static char *stack_base_ptr;
+static char *stack_top_ptr;
+
+static volatile sig_atomic_t sig_occurred = 0;
+
+static void sigusr1_handler(int signal_arg)
+{
+ sig_occurred = 1;
+}
+
+static int consume_stack(unsigned int stack_size, union pipe write_pipe)
+{
+ char stack_cur;
+
+ if ((stack_base_ptr - &stack_cur) < stack_size)
+ return consume_stack(stack_size, write_pipe);
+ else {
+ stack_top_ptr = &stack_cur;
+
+ FAIL_IF(notify_parent(write_pipe));
+
+ while (!sig_occurred)
+ barrier();
+ }
+
+ return 0;
+}
+
+static int child(unsigned int stack_size, union pipe write_pipe)
+{
+ struct sigaction act;
+ char stack_base;
+
+ act.sa_handler = sigusr1_handler;
+ sigemptyset(&act.sa_mask);
+ act.sa_flags = 0;
+ if (sigaction(SIGUSR1, &act, NULL) < 0)
+ err(1, "sigaction");
+
+ stack_base_ptr = (char *) (((size_t) &stack_base + 65535) & ~65535UL);
+
+ FAIL_IF(consume_stack(stack_size, write_pipe));
+
+ printf("size 0x%06x: OK, stack base %p top %p (%zx used)\n",
+ stack_size, stack_base_ptr, stack_top_ptr,
+ stack_base_ptr - stack_top_ptr);
+
+ return 0;
+}
+
+static int test_one_size(unsigned int stack_size)
+{
+ union pipe read_pipe, write_pipe;
+ pid_t pid;
+
+ FAIL_IF(pipe(read_pipe.fds) == -1);
+ FAIL_IF(pipe(write_pipe.fds) == -1);
+
+ pid = fork();
+ if (pid == 0) {
+ close(read_pipe.read_fd);
+ close(write_pipe.write_fd);
+ exit(child(stack_size, read_pipe));
+ }
+
+ close(read_pipe.write_fd);
+ close(write_pipe.read_fd);
+ FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+ kill(pid, SIGUSR1);
+
+ FAIL_IF(wait_for_child(pid));
+
+ close(read_pipe.read_fd);
+ close(write_pipe.write_fd);
+
+ return 0;
+}
+
+int test(void)
+{
+ unsigned int i, size;
+
+ // Test with used stack from 1MB - 64K to 1MB + 64K
+ // Increment by 64 to get more coverage of odd sizes
+ for (i = 0; i < (128 * _KB); i += 64) {
+ size = i + (1 * _MB) - (64 * _KB);
+ FAIL_IF(test_one_size(size));
+ }
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(test, "stack_expansion_signal");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c b/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c
index 7b4ac4537702..2980abca31e0 100644
--- a/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c
+++ b/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c
@@ -9,6 +9,7 @@
#include <stdbool.h>
#include <string.h>
#include <sys/prctl.h>
+#include <asm/cputable.h>
#include "event.h"
#include "utils.h"
@@ -104,6 +105,9 @@ static int test_body(void)
struct event events[3];
u64 overhead;
+ // The STCX_FAIL event we use works on Power8 or later
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+
setup_event(&events[0], PERF_COUNT_HW_INSTRUCTIONS, PERF_TYPE_HARDWARE, "instructions");
setup_event(&events[1], PERF_COUNT_HW_CPU_CYCLES, PERF_TYPE_HARDWARE, "cycles");
setup_event(&events[2], PM_STCX_FAIL, PERF_TYPE_RAW, "stcx_fail");
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c b/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c
index a2d7b0e3dca9..a26ac122c759 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c
@@ -91,8 +91,6 @@ int back_to_back_ebbs(void)
ebb_global_disable();
ebb_freeze_pmcs();
- count_pmc(1, sample_period);
-
dump_ebb_state();
event_close(&event);
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c
index bc893813483e..bb9f587fa76e 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c
@@ -42,8 +42,6 @@ int cycles(void)
ebb_global_disable();
ebb_freeze_pmcs();
- count_pmc(1, sample_period);
-
dump_ebb_state();
event_close(&event);
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c
index dcd351d20328..9ae795ce314e 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c
@@ -99,8 +99,6 @@ int cycles_with_freeze(void)
ebb_global_disable();
ebb_freeze_pmcs();
- count_pmc(1, sample_period);
-
dump_ebb_state();
printf("EBBs while frozen %d\n", ebbs_while_frozen);
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c
index 94c99c12c0f2..4b45a2e70f62 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c
@@ -71,8 +71,6 @@ int cycles_with_mmcr2(void)
ebb_global_disable();
ebb_freeze_pmcs();
- count_pmc(1, sample_period);
-
dump_ebb_state();
event_close(&event);
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb.c b/tools/testing/selftests/powerpc/pmu/ebb/ebb.c
index dfbc5c3ad52d..21537d6eb6b7 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/ebb.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb.c
@@ -396,8 +396,6 @@ int ebb_child(union pipe read_pipe, union pipe write_pipe)
ebb_global_disable();
ebb_freeze_pmcs();
- count_pmc(1, sample_period);
-
dump_ebb_state();
event_close(&event);
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c b/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c
index ca2f7d729155..b208bf6ad58d 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c
@@ -38,8 +38,6 @@ static int victim_child(union pipe read_pipe, union pipe write_pipe)
ebb_global_disable();
ebb_freeze_pmcs();
- count_pmc(1, sample_period);
-
dump_ebb_state();
FAIL_IF(ebb_state.stats.ebb_count == 0);
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c b/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c
index ac3e6e182614..ba2681a12cc7 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c
@@ -75,7 +75,6 @@ static int test_body(void)
ebb_freeze_pmcs();
ebb_global_disable();
- count_pmc(4, sample_period);
mtspr(SPRN_PMC4, 0xdead);
dump_summary_ebb_state();
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c b/tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c
index b8242e9d97d2..791d37ba327b 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c
@@ -70,13 +70,6 @@ int multi_counter(void)
ebb_global_disable();
ebb_freeze_pmcs();
- count_pmc(1, sample_period);
- count_pmc(2, sample_period);
- count_pmc(3, sample_period);
- count_pmc(4, sample_period);
- count_pmc(5, sample_period);
- count_pmc(6, sample_period);
-
dump_ebb_state();
for (i = 0; i < 6; i++)
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c b/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c
index a05c0e18ded6..9b0f70d59702 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c
@@ -61,8 +61,6 @@ static int cycles_child(void)
ebb_global_disable();
ebb_freeze_pmcs();
- count_pmc(1, sample_period);
-
dump_summary_ebb_state();
event_close(&event);
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c b/tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c
index 153ebc92234f..2904c741e04e 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c
@@ -82,8 +82,6 @@ static int test_body(void)
ebb_global_disable();
ebb_freeze_pmcs();
- count_pmc(1, sample_period);
-
dump_ebb_state();
if (mmcr0_mismatch)
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c b/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c
index eadad75ed7e6..b29f8ba22d1e 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c
@@ -76,8 +76,6 @@ int pmc56_overflow(void)
ebb_global_disable();
ebb_freeze_pmcs();
- count_pmc(2, sample_period);
-
dump_ebb_state();
printf("PMC5/6 overflow %d\n", pmc56_overflowed);
diff --git a/tools/testing/selftests/powerpc/pmu/lib.h b/tools/testing/selftests/powerpc/pmu/lib.h
index fa12e7d0b4d3..bf1bec013bbb 100644
--- a/tools/testing/selftests/powerpc/pmu/lib.h
+++ b/tools/testing/selftests/powerpc/pmu/lib.h
@@ -6,6 +6,7 @@
#ifndef __SELFTESTS_POWERPC_PMU_LIB_H
#define __SELFTESTS_POWERPC_PMU_LIB_H
+#include <stdbool.h>
#include <stdio.h>
#include <stdint.h>
#include <string.h>
diff --git a/tools/testing/selftests/powerpc/pmu/per_event_excludes.c b/tools/testing/selftests/powerpc/pmu/per_event_excludes.c
index 2756fe2efdc5..2d37942bf72b 100644
--- a/tools/testing/selftests/powerpc/pmu/per_event_excludes.c
+++ b/tools/testing/selftests/powerpc/pmu/per_event_excludes.c
@@ -12,6 +12,8 @@
#include <string.h>
#include <sys/prctl.h>
+#include <asm/cputable.h>
+
#include "event.h"
#include "lib.h"
#include "utils.h"
@@ -23,12 +25,9 @@
static int per_event_excludes(void)
{
struct event *e, events[4];
- char *platform;
int i;
- platform = (char *)get_auxv_entry(AT_BASE_PLATFORM);
- FAIL_IF(!platform);
- SKIP_IF(strcmp(platform, "power8") != 0);
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
/*
* We need to create the events disabled, otherwise the running/enabled
diff --git a/tools/testing/selftests/powerpc/ptrace/core-pkey.c b/tools/testing/selftests/powerpc/ptrace/core-pkey.c
index d5c64fee032d..bbc05ffc5860 100644
--- a/tools/testing/selftests/powerpc/ptrace/core-pkey.c
+++ b/tools/testing/selftests/powerpc/ptrace/core-pkey.c
@@ -150,7 +150,7 @@ static int child(struct shared_info *info)
printf("%-30s AMR: %016lx pkey1: %d pkey2: %d pkey3: %d\n",
user_write, info->amr, pkey1, pkey2, pkey3);
- mtspr(SPRN_AMR, info->amr);
+ set_amr(info->amr);
/*
* We won't use pkey3. This tests whether the kernel restores the UAMOR
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c
index bdbbbe8431e0..bc454f899124 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c
@@ -44,7 +44,7 @@ struct shared_info {
unsigned long amr2;
/* AMR value that ptrace should refuse to write to the child. */
- unsigned long amr3;
+ unsigned long invalid_amr;
/* IAMR value the parent expects to read from the child. */
unsigned long expected_iamr;
@@ -57,8 +57,8 @@ struct shared_info {
* (even though they're valid ones) because userspace doesn't have
* access to those registers.
*/
- unsigned long new_iamr;
- unsigned long new_uamor;
+ unsigned long invalid_iamr;
+ unsigned long invalid_uamor;
};
static int sys_pkey_alloc(unsigned long flags, unsigned long init_access_rights)
@@ -66,11 +66,6 @@ static int sys_pkey_alloc(unsigned long flags, unsigned long init_access_rights)
return syscall(__NR_pkey_alloc, flags, init_access_rights);
}
-static int sys_pkey_free(int pkey)
-{
- return syscall(__NR_pkey_free, pkey);
-}
-
static int child(struct shared_info *info)
{
unsigned long reg;
@@ -100,33 +95,37 @@ static int child(struct shared_info *info)
info->amr1 |= 3ul << pkeyshift(pkey1);
info->amr2 |= 3ul << pkeyshift(pkey2);
- info->amr3 |= info->amr2 | 3ul << pkeyshift(pkey3);
+ /*
+ * invalid amr value where we try to force write
+ * things which are deined by a uamor setting.
+ */
+ info->invalid_amr = info->amr2 | (~0x0UL & ~info->expected_uamor);
+ /*
+ * if PKEY_DISABLE_EXECUTE succeeded we should update the expected_iamr
+ */
if (disable_execute)
info->expected_iamr |= 1ul << pkeyshift(pkey1);
else
info->expected_iamr &= ~(1ul << pkeyshift(pkey1));
- info->expected_iamr &= ~(1ul << pkeyshift(pkey2) | 1ul << pkeyshift(pkey3));
-
- info->expected_uamor |= 3ul << pkeyshift(pkey1) |
- 3ul << pkeyshift(pkey2);
- info->new_iamr |= 1ul << pkeyshift(pkey1) | 1ul << pkeyshift(pkey2);
- info->new_uamor |= 3ul << pkeyshift(pkey1);
+ /*
+ * We allocated pkey2 and pkey 3 above. Clear the IAMR bits.
+ */
+ info->expected_iamr &= ~(1ul << pkeyshift(pkey2));
+ info->expected_iamr &= ~(1ul << pkeyshift(pkey3));
/*
- * We won't use pkey3. We just want a plausible but invalid key to test
- * whether ptrace will let us write to AMR bits we are not supposed to.
- *
- * This also tests whether the kernel restores the UAMOR permissions
- * after a key is freed.
+ * Create an IAMR value different from expected value.
+ * Kernel will reject an IAMR and UAMOR change.
*/
- sys_pkey_free(pkey3);
+ info->invalid_iamr = info->expected_iamr | (1ul << pkeyshift(pkey1) | 1ul << pkeyshift(pkey2));
+ info->invalid_uamor = info->expected_uamor & ~(0x3ul << pkeyshift(pkey1));
printf("%-30s AMR: %016lx pkey1: %d pkey2: %d pkey3: %d\n",
user_write, info->amr1, pkey1, pkey2, pkey3);
- mtspr(SPRN_AMR, info->amr1);
+ set_amr(info->amr1);
/* Wait for parent to read our AMR value and write a new one. */
ret = prod_parent(&info->child_sync);
@@ -196,9 +195,9 @@ static int parent(struct shared_info *info, pid_t pid)
PARENT_SKIP_IF_UNSUPPORTED(ret, &info->child_sync);
PARENT_FAIL_IF(ret, &info->child_sync);
- info->amr1 = info->amr2 = info->amr3 = regs[0];
- info->expected_iamr = info->new_iamr = regs[1];
- info->expected_uamor = info->new_uamor = regs[2];
+ info->amr1 = info->amr2 = regs[0];
+ info->expected_iamr = regs[1];
+ info->expected_uamor = regs[2];
/* Wake up child so that it can set itself up. */
ret = prod_child(&info->child_sync);
@@ -234,10 +233,10 @@ static int parent(struct shared_info *info, pid_t pid)
return ret;
/* Write invalid AMR value in child. */
- ret = ptrace_write_regs(pid, NT_PPC_PKEY, &info->amr3, 1);
+ ret = ptrace_write_regs(pid, NT_PPC_PKEY, &info->invalid_amr, 1);
PARENT_FAIL_IF(ret, &info->child_sync);
- printf("%-30s AMR: %016lx\n", ptrace_write_running, info->amr3);
+ printf("%-30s AMR: %016lx\n", ptrace_write_running, info->invalid_amr);
/* Wake up child so that it can verify it didn't change. */
ret = prod_child(&info->child_sync);
@@ -249,7 +248,7 @@ static int parent(struct shared_info *info, pid_t pid)
/* Try to write to IAMR. */
regs[0] = info->amr1;
- regs[1] = info->new_iamr;
+ regs[1] = info->invalid_iamr;
ret = ptrace_write_regs(pid, NT_PPC_PKEY, regs, 2);
PARENT_FAIL_IF(!ret, &info->child_sync);
@@ -257,7 +256,7 @@ static int parent(struct shared_info *info, pid_t pid)
ptrace_write_running, regs[0], regs[1]);
/* Try to write to IAMR and UAMOR. */
- regs[2] = info->new_uamor;
+ regs[2] = info->invalid_uamor;
ret = ptrace_write_regs(pid, NT_PPC_PKEY, regs, 3);
PARENT_FAIL_IF(!ret, &info->child_sync);
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c
index 58cb1a860cc9..4436ca9d3caf 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c
@@ -78,6 +78,9 @@ int ptrace_tar(void)
pid_t pid;
int ret, status;
+ // TAR was added in v2.07
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+
shm_id = shmget(IPC_PRIVATE, sizeof(int) * 3, 0777|IPC_CREAT);
pid = fork();
if (pid < 0) {
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c
index c4fe0e893306..cb9875f764ca 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c
@@ -61,6 +61,8 @@ int ptrace_vsx(void)
pid_t pid;
int ret, status, i;
+ SKIP_IF(!have_hwcap(PPC_FEATURE_HAS_VSX));
+
shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT);
for (i = 0; i < VEC_MAX; i++)
diff --git a/tools/testing/selftests/powerpc/security/spectre_v2.c b/tools/testing/selftests/powerpc/security/spectre_v2.c
index 8c6b982af2a8..c8d82b784102 100644
--- a/tools/testing/selftests/powerpc/security/spectre_v2.c
+++ b/tools/testing/selftests/powerpc/security/spectre_v2.c
@@ -183,6 +183,16 @@ int spectre_v2_test(void)
if (miss_percent > 15) {
printf("Branch misses > 15%% unexpected in this configuration!\n");
printf("Possible mis-match between reported & actual mitigation\n");
+ /*
+ * Such a mismatch may be caused by a guest system
+ * reporting as vulnerable when the host is mitigated.
+ * Return skip code to avoid detecting this as an error.
+ * We are not vulnerable and reporting otherwise, so
+ * missing such a mismatch is safe.
+ */
+ if (state == VULNERABLE)
+ return 4;
+
return 1;
}
break;
diff --git a/tools/testing/selftests/powerpc/stringloops/Makefile b/tools/testing/selftests/powerpc/stringloops/Makefile
index 7fc0623d85c3..9c39f55a58ff 100644
--- a/tools/testing/selftests/powerpc/stringloops/Makefile
+++ b/tools/testing/selftests/powerpc/stringloops/Makefile
@@ -8,7 +8,7 @@ build_32bit = $(shell if ($(CC) $(CFLAGS) -m32 -o /dev/null memcmp.c >/dev/null
TEST_GEN_PROGS := memcmp_64 strlen
-$(OUTPUT)/memcmp_64: memcmp.c
+$(OUTPUT)/memcmp_64: memcmp.c ../utils.c
$(OUTPUT)/memcmp_64: CFLAGS += -m64 -maltivec
ifeq ($(build_32bit),1)
diff --git a/tools/testing/selftests/powerpc/stringloops/memcmp.c b/tools/testing/selftests/powerpc/stringloops/memcmp.c
index b1fa7546957f..979df3d98368 100644
--- a/tools/testing/selftests/powerpc/stringloops/memcmp.c
+++ b/tools/testing/selftests/powerpc/stringloops/memcmp.c
@@ -2,7 +2,9 @@
#include <malloc.h>
#include <stdlib.h>
#include <string.h>
+#include <sys/mman.h>
#include <time.h>
+#include <asm/cputable.h>
#include "utils.h"
#define SIZE 256
@@ -13,6 +15,9 @@
#define LARGE_MAX_OFFSET 32
#define LARGE_SIZE_START 4096
+/* This is big enough to fit LARGE_SIZE and works on 4K & 64K kernels */
+#define MAP_SIZE (64 * 1024)
+
#define MAX_OFFSET_DIFF_S1_S2 48
int vmx_count;
@@ -68,25 +73,25 @@ static void test_one(char *s1, char *s2, unsigned long max_offset,
static int testcase(bool islarge)
{
- char *s1;
- char *s2;
- unsigned long i;
-
- unsigned long comp_size = (islarge ? LARGE_SIZE : SIZE);
- unsigned long alloc_size = comp_size + MAX_OFFSET_DIFF_S1_S2;
- int iterations = islarge ? LARGE_ITERATIONS : ITERATIONS;
-
- s1 = memalign(128, alloc_size);
- if (!s1) {
- perror("memalign");
- exit(1);
- }
+ unsigned long i, comp_size, alloc_size;
+ char *p, *s1, *s2;
+ int iterations;
- s2 = memalign(128, alloc_size);
- if (!s2) {
- perror("memalign");
- exit(1);
- }
+ comp_size = (islarge ? LARGE_SIZE : SIZE);
+ alloc_size = comp_size + MAX_OFFSET_DIFF_S1_S2;
+ iterations = islarge ? LARGE_ITERATIONS : ITERATIONS;
+
+ p = mmap(NULL, 4 * MAP_SIZE, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ FAIL_IF(p == MAP_FAILED);
+
+ /* Put s1/s2 at the end of a page */
+ s1 = p + MAP_SIZE - alloc_size;
+ s2 = p + 3 * MAP_SIZE - alloc_size;
+
+ /* And unmap the subsequent page to force a fault if we overread */
+ munmap(p + MAP_SIZE, MAP_SIZE);
+ munmap(p + 3 * MAP_SIZE, MAP_SIZE);
srandom(time(0));
@@ -147,6 +152,11 @@ static int testcase(bool islarge)
static int testcases(void)
{
+#ifdef __powerpc64__
+ // vcmpequd used in memcmp_64.S is v2.07
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+#endif
+
testcase(0);
testcase(1);
return 0;
diff --git a/tools/testing/selftests/powerpc/utils.c b/tools/testing/selftests/powerpc/utils.c
index 5ee0e98c4896..18b6a773d5c7 100644
--- a/tools/testing/selftests/powerpc/utils.c
+++ b/tools/testing/selftests/powerpc/utils.c
@@ -16,6 +16,7 @@
#include <string.h>
#include <sys/ioctl.h>
#include <sys/stat.h>
+#include <sys/sysinfo.h>
#include <sys/types.h>
#include <sys/utsname.h>
#include <unistd.h>
@@ -88,28 +89,40 @@ void *get_auxv_entry(int type)
int pick_online_cpu(void)
{
- cpu_set_t mask;
- int cpu;
+ int ncpus, cpu = -1;
+ cpu_set_t *mask;
+ size_t size;
+
+ ncpus = get_nprocs_conf();
+ size = CPU_ALLOC_SIZE(ncpus);
+ mask = CPU_ALLOC(ncpus);
+ if (!mask) {
+ perror("malloc");
+ return -1;
+ }
- CPU_ZERO(&mask);
+ CPU_ZERO_S(size, mask);
- if (sched_getaffinity(0, sizeof(mask), &mask)) {
+ if (sched_getaffinity(0, size, mask)) {
perror("sched_getaffinity");
- return -1;
+ goto done;
}
/* We prefer a primary thread, but skip 0 */
- for (cpu = 8; cpu < CPU_SETSIZE; cpu += 8)
- if (CPU_ISSET(cpu, &mask))
- return cpu;
+ for (cpu = 8; cpu < ncpus; cpu += 8)
+ if (CPU_ISSET_S(cpu, size, mask))
+ goto done;
/* Search for anything, but in reverse */
- for (cpu = CPU_SETSIZE - 1; cpu >= 0; cpu--)
- if (CPU_ISSET(cpu, &mask))
- return cpu;
+ for (cpu = ncpus - 1; cpu >= 0; cpu--)
+ if (CPU_ISSET_S(cpu, size, mask))
+ goto done;
printf("No cpus in affinity mask?!\n");
- return -1;
+
+done:
+ CPU_FREE(mask);
+ return cpu;
}
bool is_ppc64le(void)
@@ -293,3 +306,31 @@ void set_dscr(unsigned long val)
asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR));
}
+
+int using_hash_mmu(bool *using_hash)
+{
+ char line[128];
+ FILE *f;
+ int rc;
+
+ f = fopen("/proc/cpuinfo", "r");
+ FAIL_IF(!f);
+
+ rc = 0;
+ while (fgets(line, sizeof(line), f) != NULL) {
+ if (strcmp(line, "MMU : Hash\n") == 0) {
+ *using_hash = true;
+ goto out;
+ }
+
+ if (strcmp(line, "MMU : Radix\n") == 0) {
+ *using_hash = false;
+ goto out;
+ }
+ }
+
+ rc = -1;
+out:
+ fclose(f);
+ return rc;
+}
diff --git a/tools/testing/selftests/ptp/testptp.c b/tools/testing/selftests/ptp/testptp.c
index da7a9dda9490..f7911aaeb007 100644
--- a/tools/testing/selftests/ptp/testptp.c
+++ b/tools/testing/selftests/ptp/testptp.c
@@ -35,6 +35,8 @@
#define CLOCK_INVALID -1
#endif
+#define NSEC_PER_SEC 1000000000LL
+
/* clock_adjtime is not available in GLIBC < 2.14 */
#if !__GLIBC_PREREQ(2, 14)
#include <sys/syscall.h>
@@ -132,6 +134,8 @@ static void usage(char *progname)
" 1 - external time stamp\n"
" 2 - periodic output\n"
" -p val enable output with a period of 'val' nanoseconds\n"
+ " -H val set output phase to 'val' nanoseconds (requires -p)\n"
+ " -w val set output pulse width to 'val' nanoseconds (requires -p)\n"
" -P val enable or disable (val=1|0) the system clock PPS\n"
" -s set the ptp clock time from the system time\n"
" -S set the system time from the ptp clock time\n"
@@ -169,7 +173,6 @@ int main(int argc, char *argv[])
int list_pins = 0;
int pct_offset = 0;
int n_samples = 0;
- int perout = -1;
int pin_index = -1, pin_func;
int pps = -1;
int seconds = 0;
@@ -177,10 +180,13 @@ int main(int argc, char *argv[])
int64_t t1, t2, tp;
int64_t interval, offset;
+ int64_t perout_phase = -1;
+ int64_t pulsewidth = -1;
+ int64_t perout = -1;
progname = strrchr(argv[0], '/');
progname = progname ? 1+progname : argv[0];
- while (EOF != (c = getopt(argc, argv, "cd:e:f:ghi:k:lL:p:P:sSt:T:z"))) {
+ while (EOF != (c = getopt(argc, argv, "cd:e:f:ghH:i:k:lL:p:P:sSt:T:w:z"))) {
switch (c) {
case 'c':
capabilities = 1;
@@ -197,6 +203,9 @@ int main(int argc, char *argv[])
case 'g':
gettime = 1;
break;
+ case 'H':
+ perout_phase = atoll(optarg);
+ break;
case 'i':
index = atoi(optarg);
break;
@@ -215,7 +224,7 @@ int main(int argc, char *argv[])
}
break;
case 'p':
- perout = atoi(optarg);
+ perout = atoll(optarg);
break;
case 'P':
pps = atoi(optarg);
@@ -233,6 +242,9 @@ int main(int argc, char *argv[])
settime = 3;
seconds = atoi(optarg);
break;
+ case 'w':
+ pulsewidth = atoi(optarg);
+ break;
case 'z':
flagtest = 1;
break;
@@ -391,6 +403,16 @@ int main(int argc, char *argv[])
}
}
+ if (pulsewidth >= 0 && perout < 0) {
+ puts("-w can only be specified together with -p");
+ return -1;
+ }
+
+ if (perout_phase >= 0 && perout < 0) {
+ puts("-H can only be specified together with -p");
+ return -1;
+ }
+
if (perout >= 0) {
if (clock_gettime(clkid, &ts)) {
perror("clock_gettime");
@@ -398,11 +420,24 @@ int main(int argc, char *argv[])
}
memset(&perout_request, 0, sizeof(perout_request));
perout_request.index = index;
- perout_request.start.sec = ts.tv_sec + 2;
- perout_request.start.nsec = 0;
- perout_request.period.sec = 0;
- perout_request.period.nsec = perout;
- if (ioctl(fd, PTP_PEROUT_REQUEST, &perout_request)) {
+ perout_request.period.sec = perout / NSEC_PER_SEC;
+ perout_request.period.nsec = perout % NSEC_PER_SEC;
+ perout_request.flags = 0;
+ if (pulsewidth >= 0) {
+ perout_request.flags |= PTP_PEROUT_DUTY_CYCLE;
+ perout_request.on.sec = pulsewidth / NSEC_PER_SEC;
+ perout_request.on.nsec = pulsewidth % NSEC_PER_SEC;
+ }
+ if (perout_phase >= 0) {
+ perout_request.flags |= PTP_PEROUT_PHASE;
+ perout_request.phase.sec = perout_phase / NSEC_PER_SEC;
+ perout_request.phase.nsec = perout_phase % NSEC_PER_SEC;
+ } else {
+ perout_request.start.sec = ts.tv_sec + 2;
+ perout_request.start.nsec = 0;
+ }
+
+ if (ioctl(fd, PTP_PEROUT_REQUEST2, &perout_request)) {
perror("PTP_PEROUT_REQUEST");
} else {
puts("periodic output request okay");
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 5267b9fb7c0f..535720b2592a 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -123,12 +123,16 @@ struct seccomp_data {
# define __NR_seccomp 277
# elif defined(__riscv)
# define __NR_seccomp 277
+# elif defined(__csky__)
+# define __NR_seccomp 277
# elif defined(__hppa__)
# define __NR_seccomp 338
# elif defined(__powerpc__)
# define __NR_seccomp 358
# elif defined(__s390__)
# define __NR_seccomp 348
+# elif defined(__xtensa__)
+# define __NR_seccomp 337
# else
# warning "seccomp syscall number unknown for this architecture"
# define __NR_seccomp 0xffff
@@ -1680,6 +1684,14 @@ TEST_F(TRACE_poke, getpid_runs_normally)
# define ARCH_REGS struct user_regs_struct
# define SYSCALL_NUM a7
# define SYSCALL_RET a0
+#elif defined(__csky__)
+# define ARCH_REGS struct pt_regs
+#if defined(__CSKYABIV2__)
+# define SYSCALL_NUM regs[3]
+#else
+# define SYSCALL_NUM regs[9]
+#endif
+# define SYSCALL_RET a0
#elif defined(__hppa__)
# define ARCH_REGS struct user_regs_struct
# define SYSCALL_NUM gr[20]
@@ -1699,6 +1711,14 @@ TEST_F(TRACE_poke, getpid_runs_normally)
# define SYSCALL_SYSCALL_NUM regs[4]
# define SYSCALL_RET regs[2]
# define SYSCALL_NUM_RET_SHARE_REG
+#elif defined(__xtensa__)
+# define ARCH_REGS struct user_pt_regs
+# define SYSCALL_NUM syscall
+/*
+ * On xtensa syscall return value is in the register
+ * a2 of the current window which is not fixed.
+ */
+#define SYSCALL_RET(reg) a[(reg).windowbase * 4 + 2]
#else
# error "Do not know how to find your architecture's registers and syscalls"
#endif
@@ -1770,7 +1790,8 @@ void change_syscall(struct __test_metadata *_metadata,
EXPECT_EQ(0, ret) {}
#if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \
- defined(__s390__) || defined(__hppa__) || defined(__riscv)
+ defined(__s390__) || defined(__hppa__) || defined(__riscv) || \
+ defined(__xtensa__) || defined(__csky__)
{
regs.SYSCALL_NUM = syscall;
}
@@ -1813,6 +1834,9 @@ void change_syscall(struct __test_metadata *_metadata,
if (syscall == -1)
#ifdef SYSCALL_NUM_RET_SHARE_REG
TH_LOG("Can't modify syscall return on this architecture");
+
+#elif defined(__xtensa__)
+ regs.SYSCALL_RET(regs) = result;
#else
regs.SYSCALL_RET = result;
#endif
diff --git a/tools/testing/selftests/sigaltstack/sas.c b/tools/testing/selftests/sigaltstack/sas.c
index ad0f8df2ca0a..8934a3766d20 100644
--- a/tools/testing/selftests/sigaltstack/sas.c
+++ b/tools/testing/selftests/sigaltstack/sas.c
@@ -71,7 +71,7 @@ void my_usr1(int sig, siginfo_t *si, void *u)
swapcontext(&sc, &uc);
ksft_print_msg("%s\n", p->msg);
if (!p->flag) {
- ksft_exit_skip("[RUN]\tAborting\n");
+ ksft_exit_fail_msg("[RUN]\tAborting\n");
exit(EXIT_FAILURE);
}
}
@@ -144,7 +144,7 @@ int main(void)
err = sigaltstack(&stk, NULL);
if (err) {
if (errno == EINVAL) {
- ksft_exit_skip(
+ ksft_test_result_skip(
"[NOTE]\tThe running kernel doesn't support SS_AUTODISARM\n");
/*
* If test cases for the !SS_AUTODISARM variant were
diff --git a/tools/testing/selftests/splice/.gitignore b/tools/testing/selftests/splice/.gitignore
index d5a2da428752..be8266f5d04c 100644
--- a/tools/testing/selftests/splice/.gitignore
+++ b/tools/testing/selftests/splice/.gitignore
@@ -1,2 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
default_file_splice_read
+splice_read
diff --git a/tools/testing/selftests/splice/Makefile b/tools/testing/selftests/splice/Makefile
index e519b159b60d..541cd826d5a5 100644
--- a/tools/testing/selftests/splice/Makefile
+++ b/tools/testing/selftests/splice/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-TEST_PROGS := default_file_splice_read.sh
-TEST_GEN_PROGS_EXTENDED := default_file_splice_read
+TEST_PROGS := default_file_splice_read.sh short_splice_read.sh
+TEST_GEN_PROGS_EXTENDED := default_file_splice_read splice_read
include ../lib.mk
diff --git a/tools/testing/selftests/splice/config b/tools/testing/selftests/splice/config
new file mode 100644
index 000000000000..058c928368b8
--- /dev/null
+++ b/tools/testing/selftests/splice/config
@@ -0,0 +1 @@
+CONFIG_TEST_LKM=m
diff --git a/tools/testing/selftests/splice/settings b/tools/testing/selftests/splice/settings
new file mode 100644
index 000000000000..89cedfc0d12b
--- /dev/null
+++ b/tools/testing/selftests/splice/settings
@@ -0,0 +1 @@
+timeout=5
diff --git a/tools/testing/selftests/splice/short_splice_read.sh b/tools/testing/selftests/splice/short_splice_read.sh
new file mode 100755
index 000000000000..7810d3589d9a
--- /dev/null
+++ b/tools/testing/selftests/splice/short_splice_read.sh
@@ -0,0 +1,56 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+set -e
+
+ret=0
+
+do_splice()
+{
+ filename="$1"
+ bytes="$2"
+ expected="$3"
+
+ out=$(./splice_read "$filename" "$bytes" | cat)
+ if [ "$out" = "$expected" ] ; then
+ echo "ok: $filename $bytes"
+ else
+ echo "FAIL: $filename $bytes"
+ ret=1
+ fi
+}
+
+test_splice()
+{
+ filename="$1"
+
+ full=$(cat "$filename")
+ two=$(echo "$full" | grep -m1 . | cut -c-2)
+
+ # Make sure full splice has the same contents as a standard read.
+ do_splice "$filename" 4096 "$full"
+
+ # Make sure a partial splice see the first two characters.
+ do_splice "$filename" 2 "$two"
+}
+
+# proc_single_open(), seq_read()
+test_splice /proc/$$/limits
+# special open, seq_read()
+test_splice /proc/$$/comm
+
+# proc_handler, proc_dointvec_minmax
+test_splice /proc/sys/fs/nr_open
+# proc_handler, proc_dostring
+test_splice /proc/sys/kernel/modprobe
+# proc_handler, special read
+test_splice /proc/sys/kernel/version
+
+if ! [ -d /sys/module/test_module/sections ] ; then
+ modprobe test_module
+fi
+# kernfs, attr
+test_splice /sys/module/test_module/coresize
+# kernfs, binattr
+test_splice /sys/module/test_module/sections/.init.text
+
+exit $ret
diff --git a/tools/testing/selftests/splice/splice_read.c b/tools/testing/selftests/splice/splice_read.c
new file mode 100644
index 000000000000..46dae6a25cfb
--- /dev/null
+++ b/tools/testing/selftests/splice/splice_read.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+int main(int argc, char *argv[])
+{
+ int fd;
+ size_t size;
+ ssize_t spliced;
+
+ if (argc < 2) {
+ fprintf(stderr, "Usage: %s INPUT [BYTES]\n", argv[0]);
+ return EXIT_FAILURE;
+ }
+
+ fd = open(argv[1], O_RDONLY);
+ if (fd < 0) {
+ perror(argv[1]);
+ return EXIT_FAILURE;
+ }
+
+ if (argc == 3)
+ size = atol(argv[2]);
+ else {
+ struct stat statbuf;
+
+ if (fstat(fd, &statbuf) < 0) {
+ perror(argv[1]);
+ return EXIT_FAILURE;
+ }
+
+ if (statbuf.st_size > INT_MAX) {
+ fprintf(stderr, "%s: Too big\n", argv[1]);
+ return EXIT_FAILURE;
+ }
+
+ size = statbuf.st_size;
+ }
+
+ /* splice(2) file to stdout. */
+ spliced = splice(fd, NULL, STDOUT_FILENO, NULL,
+ size, SPLICE_F_MOVE);
+ if (spliced < 0) {
+ perror("splice");
+ return EXIT_FAILURE;
+ }
+
+ close(fd);
+ return EXIT_SUCCESS;
+}
diff --git a/tools/testing/selftests/sync/sync_test.c b/tools/testing/selftests/sync/sync_test.c
index 3824b66f41a0..414a617db993 100644
--- a/tools/testing/selftests/sync/sync_test.c
+++ b/tools/testing/selftests/sync/sync_test.c
@@ -86,9 +86,9 @@ int main(void)
int err;
ksft_print_header();
- ksft_set_plan(3 + 7);
sync_api_supported();
+ ksft_set_plan(3 + 7);
ksft_print_msg("[RUN]\tTesting sync framework\n");
diff --git a/tools/testing/selftests/tc-testing/bpf/Makefile b/tools/testing/selftests/tc-testing/Makefile
index be5a5e542804..91fee5c43274 100644
--- a/tools/testing/selftests/tc-testing/bpf/Makefile
+++ b/tools/testing/selftests/tc-testing/Makefile
@@ -1,11 +1,11 @@
# SPDX-License-Identifier: GPL-2.0
-APIDIR := ../../../../include/uapi
+top_srcdir = $(abspath ../../../..)
+APIDIR := $(top_scrdir)/include/uapi
TEST_GEN_FILES = action.o
-top_srcdir = ../../../../..
KSFT_KHDR_INSTALL := 1
-include ../../lib.mk
+include ../lib.mk
CLANG ?= clang
LLC ?= llc
@@ -28,3 +28,6 @@ $(OUTPUT)/%.o: %.c
$(CLANG) $(CLANG_FLAGS) \
-O2 -target bpf -emit-llvm -c $< -o - | \
$(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@
+
+TEST_PROGS += ./tdc.sh
+TEST_FILES := tdc*.py Tdc*.py plugins plugin-lib tc-tests
diff --git a/tools/testing/selftests/tc-testing/bpf/action.c b/tools/testing/selftests/tc-testing/action.c
index c32b99b80e19..c32b99b80e19 100644
--- a/tools/testing/selftests/tc-testing/bpf/action.c
+++ b/tools/testing/selftests/tc-testing/action.c
diff --git a/tools/testing/selftests/tc-testing/tdc.sh b/tools/testing/selftests/tc-testing/tdc.sh
new file mode 100755
index 000000000000..7fe38c76db44
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tdc.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+./tdc.py -c actions --nobuildebpf
+./tdc.py -c qdisc
diff --git a/tools/testing/selftests/tc-testing/tdc_config.py b/tools/testing/selftests/tc-testing/tdc_config.py
index 080709cc4297..cd4a27ee1466 100644
--- a/tools/testing/selftests/tc-testing/tdc_config.py
+++ b/tools/testing/selftests/tc-testing/tdc_config.py
@@ -24,7 +24,7 @@ NAMES = {
# Name of the namespace to use
'NS': 'tcut',
# Directory containing eBPF test programs
- 'EBPFDIR': './bpf'
+ 'EBPFDIR': './'
}
diff --git a/tools/testing/selftests/uevent/uevent_filtering.c b/tools/testing/selftests/uevent/uevent_filtering.c
index f83391aa42cf..5cebfb356345 100644
--- a/tools/testing/selftests/uevent/uevent_filtering.c
+++ b/tools/testing/selftests/uevent/uevent_filtering.c
@@ -19,7 +19,6 @@
#include <sys/wait.h>
#include <unistd.h>
-#include "../kselftest.h"
#include "../kselftest_harness.h"
#define __DEV_FULL "/sys/devices/virtual/mem/full/uevent"
diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/vm/hmm-tests.c
index 79db22604019..93fc5cadce61 100644
--- a/tools/testing/selftests/vm/hmm-tests.c
+++ b/tools/testing/selftests/vm/hmm-tests.c
@@ -881,8 +881,9 @@ TEST_F(hmm, migrate)
}
/*
- * Migrate anonymous memory to device private memory and fault it back to system
- * memory.
+ * Migrate anonymous memory to device private memory and fault some of it back
+ * to system memory, then try migrating the resulting mix of system and device
+ * private memory to the device.
*/
TEST_F(hmm, migrate_fault)
{
@@ -924,14 +925,58 @@ TEST_F(hmm, migrate_fault)
for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
ASSERT_EQ(ptr[i], i);
- /* Fault pages back to system memory and check them. */
- for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ /* Fault half the pages back to system memory and check them. */
+ for (i = 0, ptr = buffer->ptr; i < size / (2 * sizeof(*ptr)); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ /* Migrate memory to the device again. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
ASSERT_EQ(ptr[i], i);
hmm_buffer_free(buffer);
}
/*
+ * Migrate anonymous shared memory to device private memory.
+ */
+TEST_F(hmm, migrate_shared)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ int ret;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Migrate memory to device. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages);
+ ASSERT_EQ(ret, -ENOENT);
+
+ hmm_buffer_free(buffer);
+}
+
+/*
* Try to migrate various memory types to device private memory.
*/
TEST_F(hmm2, migrate_mixed)
@@ -1292,6 +1337,82 @@ TEST_F(hmm2, snapshot)
}
/*
+ * Test the hmm_range_fault() HMM_PFN_PMD flag for large pages that
+ * should be mapped by a large page table entry.
+ */
+TEST_F(hmm, compound)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ int *ptr;
+ unsigned char *m;
+ int ret;
+ long pagesizes[4];
+ int n, idx;
+ unsigned long i;
+
+ /* Skip test if we can't allocate a hugetlbfs page. */
+
+ n = gethugepagesizes(pagesizes, 4);
+ if (n <= 0)
+ return;
+ for (idx = 0; --n > 0; ) {
+ if (pagesizes[n] < pagesizes[idx])
+ idx = n;
+ }
+ size = ALIGN(TWOMEG, pagesizes[idx]);
+ npages = size >> self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->ptr = get_hugepage_region(size, GHR_STRICT);
+ if (buffer->ptr == NULL) {
+ free(buffer);
+ return;
+ }
+
+ buffer->size = size;
+ buffer->mirror = malloc(npages);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ /* Initialize the pages the device will snapshot in buffer->ptr. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Simulate a device snapshotting CPU pagetables. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device saw. */
+ m = buffer->mirror;
+ for (i = 0; i < npages; ++i)
+ ASSERT_EQ(m[i], HMM_DMIRROR_PROT_WRITE |
+ HMM_DMIRROR_PROT_PMD);
+
+ /* Make the region read-only. */
+ ret = mprotect(buffer->ptr, size, PROT_READ);
+ ASSERT_EQ(ret, 0);
+
+ /* Simulate a device snapshotting CPU pagetables. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device saw. */
+ m = buffer->mirror;
+ for (i = 0; i < npages; ++i)
+ ASSERT_EQ(m[i], HMM_DMIRROR_PROT_READ |
+ HMM_DMIRROR_PROT_PMD);
+
+ free_hugepage_region(buffer->ptr);
+ buffer->ptr = NULL;
+ hmm_buffer_free(buffer);
+}
+
+/*
* Test two devices reading the same memory (double mapped).
*/
TEST_F(hmm2, double_map)
diff --git a/tools/usb/usbip/doc/usbip.8 b/tools/usb/usbip/doc/usbip.8
index a6097be25d28..a15d20063b98 100644
--- a/tools/usb/usbip/doc/usbip.8
+++ b/tools/usb/usbip/doc/usbip.8
@@ -83,7 +83,9 @@ List local USB devices.
.SH EXAMPLES
client:# usbip list --remote=server
- - List exportable usb devices on the server.
+ - List devices exported by remote server.
+
+ client:# modprobe vhci-hcd
client:# usbip attach --remote=server --busid=1-2
- Connect the remote USB device.
diff --git a/tools/usb/usbip/doc/usbipd.8 b/tools/usb/usbip/doc/usbipd.8
index ac4635db3f03..fb62a756893b 100644
--- a/tools/usb/usbip/doc/usbipd.8
+++ b/tools/usb/usbip/doc/usbipd.8
@@ -73,7 +73,7 @@ USB/IP client can connect and use exported devices.
.SH EXAMPLES
- server:# modprobe usbip
+ server:# modprobe usbip-host
server:# usbipd -D
- Start usbip daemon.
diff --git a/tools/usb/usbip/libsrc/usbip_host_common.c b/tools/usb/usbip/libsrc/usbip_host_common.c
index d1d8ba2a4a40..ca78aa368476 100644
--- a/tools/usb/usbip/libsrc/usbip_host_common.c
+++ b/tools/usb/usbip/libsrc/usbip_host_common.c
@@ -23,7 +23,7 @@
#include "list.h"
#include "sysfs_utils.h"
-struct udev *udev_context;
+extern struct udev *udev_context;
static int32_t read_attr_usbip_status(struct usbip_usb_device *udev)
{
diff --git a/tools/usb/usbip/vudc/vudc_server_example.sh b/tools/usb/usbip/vudc/vudc_server_example.sh
index 2736be64f203..fed53f51ee01 100755
--- a/tools/usb/usbip/vudc/vudc_server_example.sh
+++ b/tools/usb/usbip/vudc/vudc_server_example.sh
@@ -24,7 +24,7 @@
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
#
-# For more information, please refer to <http://unlicense.org/>
+# For more information, please refer to <https://unlicense.org/>
################################################################################
################################################################################
diff --git a/tools/virtio/linux/virtio_config.h b/tools/virtio/linux/virtio_config.h
index dbf14c1e2188..f2640e505c4e 100644
--- a/tools/virtio/linux/virtio_config.h
+++ b/tools/virtio/linux/virtio_config.h
@@ -42,16 +42,16 @@ static inline void __virtio_clear_bit(struct virtio_device *vdev,
(__virtio_test_bit((dev), feature))
/**
- * virtio_has_iommu_quirk - determine whether this device has the iommu quirk
+ * virtio_has_dma_quirk - determine whether this device has the DMA quirk
* @vdev: the device
*/
-static inline bool virtio_has_iommu_quirk(const struct virtio_device *vdev)
+static inline bool virtio_has_dma_quirk(const struct virtio_device *vdev)
{
/*
* Note the reverse polarity of the quirk feature (compared to most
* other features), this is for compatibility with legacy systems.
*/
- return !virtio_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM);
+ return !virtio_has_feature(vdev, VIRTIO_F_ACCESS_PLATFORM);
}
static inline bool virtio_is_little_endian(struct virtio_device *vdev)