Compare commits

...

30 Commits

Author SHA1 Message Date
Ihor Solodrai
f5dcbae736 sync: latest libbpf changes from kernel
Syncing latest libbpf commits from kernel repository.
Baseline bpf-next commit:   d7988720ef3ea5926f1b886b27eddf08abbadba0
Checkpoint bpf-next commit: ca0f39a369c5f927c3d004e63a5a778b08a9df94
Baseline bpf commit:        593fffb8bcfdacc2111a9951afe1fae77988aa4a
Checkpoint bpf commit:      e06e6b8001233241eb5b2e2791162f0585f50f4b

Andrey Grodzovsky (1):
  libbpf: Optimize kprobe.session attachment for exact function names

Ihor Solodrai (1):
  libbpf: Remove extern declaration of bpf_stream_vprintk()

Jakub Kicinski (1):
  bpftool: Fix truncated netlink dumps

Jiri Olsa (2):
  libbpf: Add uprobe syscall feature detection
  libbpf: Add support to detect nop,nop5 instructions combo for usdt
    probe

Josef Bacik (1):
  libbpf: Support appending split BTF in btf__add_btf()

 src/bpf_helpers.h     |  3 ---
 src/btf.c             | 27 ++++++++++++++++---------
 src/features.c        | 24 ++++++++++++++++++++++
 src/libbpf.c          | 19 ++++++++++++++++-
 src/libbpf_internal.h |  2 ++
 src/netlink.c         |  4 +++-
 src/usdt.c            | 47 +++++++++++++++++++++++++++++++++++++++----
 7 files changed, 108 insertions(+), 18 deletions(-)

Signed-off-by: Ihor Solodrai <ihor.solodrai@linux.dev>
2026-03-12 13:02:22 -07:00
Ihor Solodrai
8bf68faba8 sync: update .mailmap
Update .mailmap based on libbpf's list of contributors and on the latest
.mailmap version in the upstream repository.

Signed-off-by: Ihor Solodrai <ihor.solodrai@linux.dev>
2026-03-12 13:02:22 -07:00
Andrey Grodzovsky
e08da3014b libbpf: Optimize kprobe.session attachment for exact function names
Detect exact function names (no wildcards) in
bpf_program__attach_kprobe_multi_opts() and bypass kallsyms parsing,
passing the symbol directly to the kernel via syms[] array.  This
benefits all callers, not just kprobe.session.

When the pattern contains no '*' or '?' characters, set syms to point
directly at the pattern string and cnt to 1, skipping the expensive
/proc/kallsyms or available_filter_functions parsing (~150ms per
function).

Error code normalization: the fast path returns ESRCH from kernel's
ftrace_lookup_symbols(), while the slow path returns ENOENT from
userspace kallsyms parsing.  Convert ESRCH to ENOENT in the
bpf_link_create error path to maintain API consistency - both paths
now return identical error codes for "symbol not found".

Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@crowdstrike.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20260302200837.317907-2-andrey.grodzovsky@crowdstrike.com
2026-03-12 13:02:22 -07:00
Josef Bacik
ddb0c14f1b libbpf: Support appending split BTF in btf__add_btf()
btf__add_btf() currently rejects split BTF sources with -ENOTSUP.
This prevents merging types from multiple kernel module BTFs that
are all split against the same vmlinux base.

Extend btf__add_btf() to handle split BTF sources by:

- Replacing the blanket -ENOTSUP with a validation that src and dst
  share the same base BTF pointer when both are split, returning
  -EOPNOTSUPP on mismatch.

- Computing src_start_id from the source's base to distinguish base
  type ID references (which must remain unchanged) from split type
  IDs (which must be remapped to new positions in the destination).

- Using src_btf->nr_types instead of btf__type_cnt()-1 for the type
  count, which is correct for both split and non-split sources.

- Skipping base string offsets (< start_str_off) during the string
  rewrite loop, mirroring the type ID skip pattern.  Since src and
  dst share the same base BTF, base string offsets are already valid
  and need no remapping.

For non-split sources the behavior is identical: src_start_id is 1,
the type_id < 1 guard is never true (VOID is already skipped), and
the remapping formula reduces to the original.  start_str_off is 0
so no string offsets are skipped.

Assisted-by: Claude:claude-opus-4-6

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Reviewed-by: Alan Maguire <alan.maguire@oracle.com>
Link: https://lore.kernel.org/bpf/c00216ed48cf7897078d9645679059d5ebf42738.1772657690.git.josef@toxicpanda.com
2026-03-12 13:02:22 -07:00
Jiri Olsa
67cce78af9 libbpf: Add support to detect nop,nop5 instructions combo for usdt probe
Adding support to detect nop,nop5 instructions combo for usdt probe
by checking on probe's following nop5 instruction.

When the nop,nop5 combo is detected together with uprobe syscall,
we can place the probe on top of nop5 and get it optimized.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Link: https://lore.kernel.org/r/20260224103915.1369690-3-jolsa@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-03-12 13:02:22 -07:00
Jiri Olsa
8cbfbe1415 libbpf: Add uprobe syscall feature detection
Adding uprobe syscall feature detection that will be used
in following changes.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Link: https://lore.kernel.org/r/20260224103915.1369690-2-jolsa@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-03-12 13:02:22 -07:00
Ihor Solodrai
cc7760ce9a libbpf: Remove extern declaration of bpf_stream_vprintk()
An issue was reported that building BPF program which includes both
vmlinux.h and bpf_helpers.h from libbpf fails due to conflicting
declarations of bpf_stream_vprintk().

Remove the extern declaration from bpf_helpers.h to address this.

In order to use bpf_stream_printk() macro, BPF programs are expected
to either include vmlinux.h of the kernel they are targeting, or add
their own extern declaration.

Reported-by: Luca Boccassi <luca.boccassi@gmail.com>
Closes: https://github.com/libbpf/libbpf/issues/947
Signed-off-by: Ihor Solodrai <ihor.solodrai@linux.dev>
Link: https://lore.kernel.org/r/20260218215651.2057673-3-ihor.solodrai@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-03-12 13:02:22 -07:00
Jakub Kicinski
7155e8e234 bpftool: Fix truncated netlink dumps
Netlink requires that the recv buffer used during dumps is at least
min(PAGE_SIZE, 8k) (see the man page). Otherwise the messages will
get truncated. Make sure bpftool follows this requirement, avoid
missing information on systems with large pages.

Acked-by: Quentin Monnet <qmo@kernel.org>
Fixes: 7084566a236f ("tools/bpftool: Remove libbpf_internal.h usage in bpftool")
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Link: https://lore.kernel.org/r/20260217194150.734701-1-kuba@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2026-03-12 13:02:22 -07:00
Ihor Solodrai
6314de34ef Update checkpoint commit due to upstream rebase
Signed-off-by: Ihor Solodrai <ihor.solodrai@linux.dev>
2026-03-12 13:02:22 -07:00
Ihor Solodrai
bacf439175 ci/diffs: Add a patch fixing up bpftool_helpers.c
Signed-off-by: Ihor Solodrai <ihor.solodrai@linux.dev>
2026-03-12 13:02:22 -07:00
Ihor Solodrai
eca706a5e1 ci: fix concurrency group for push and schedule triggers
github.head_ref is empty for push and schedule events, so all those
runs shared the same concurrency group and cancelled each other. Fall
back to github.run_id to give each non-PR run a unique group.

Assisted-by: Claude:claude-opus-4-6
Signed-off-by: Ihor Solodrai <ihor.solodrai@linux.dev>
2026-03-11 18:35:55 -07:00
Ihor Solodrai
ac0952761f ci: run vmtest job inside kbuilder-debian container
Assisted-by: Claude:claude-opus-4-6
Signed-off-by: Ihor Solodrai <ihor.solodrai@linux.dev>
2026-03-11 18:35:55 -07:00
Ihor Solodrai
6b9b5e34b1 ci: remove LATEST kernel handling from vmtest
The libbpf CI always builds the kernel from source at CHECKPOINT-COMMIT;
there is no prebuilt-kernel matrix entry. Remove the `kernel` input and
the conditional build-vs-download logic, simplifying the workflow.

The run-vmtest action defaults KERNEL to "LATEST" internally when the
env var is unset, so DENYLIST-LATEST is still picked up. The vmlinuz
path is auto-discovered via `make -s image_name` when not passed.

Assisted-by: Claude:claude-opus-4-6
Signed-off-by: Ihor Solodrai <ihor.solodrai@linux.dev>
2026-03-11 18:35:55 -07:00
Ihor Solodrai
09ae947cd8 ci: consolidate and clean up denylists
Merge DENYLIST-LATEST into DENYLIST and remove the per-kernel denylist
files. With LATEST being the only kernel mode, there's no need for
separate files. Also remove the s390x denylist (libbpf CI only tests
x86_64) and drop stale entries fixed upstream.

Add map_kptr, test_profiler (kprobes not available in VM kernel), and
sockmap udp multi channels (flaky) based on CI run results.

Assisted-by: Claude:claude-opus-4-6
Signed-off-by: Ihor Solodrai <ihor.solodrai@linux.dev>
2026-03-11 18:35:55 -07:00
Ihor Solodrai
b57c0a1b38 ci: update libbpf/ci action references from v3 to v4
Assisted-by: Claude:claude-opus-4-6
Signed-off-by: Ihor Solodrai <ihor.solodrai@linux.dev>
2026-03-11 18:35:55 -07:00
Ihor Solodrai
f0b6d8cd49 ci: remove stale patches from ci/diffs
All 4 patches either fail to apply (context mismatch) or are already
applied upstream. They produce noise in CI logs.

Assisted-by: Claude:claude-opus-4-6
Signed-off-by: Ihor Solodrai <ihor.solodrai@linux.dev>
2026-03-11 18:35:55 -07:00
Andrii Nakryiko
6ddc03d4fe sync: latest libbpf changes from kernel
Syncing latest libbpf commits from kernel repository.
Baseline bpf-next commit:   dc855b77719fe452d670cae2cf64da1eb51f16cc
Checkpoint bpf-next commit: 4c51f90d45dca71e7974ed5a7c40b9c04a6c6762
Baseline bpf commit:        1a7eb7a3d74031e6c173f0822023f354c2870354
Checkpoint bpf commit:      593fffb8bcfdacc2111a9951afe1fae77988aa4a

Amery Hung (1):
  libbpf: Fix invalid write loop logic in bpf_linker__add_buf()

Daniel Borkmann (2):
  net: Add queue-create operation
  netkit: Add single device mode for netkit

Emil Tsalapatis (3):
  libbpf: Add gating for arena globals relocation feature
  libbpf: Do not use PROG_TYPE_TRACEPOINT program for feature gating
  libbpf: Delay feature gate check until object prepare time

Jakub Kicinski (1):
  Revert "Merge branch
    'netkit-support-for-io_uring-zero-copy-and-af_xdp'"

Jonas Köppeler (1):
  net/sched: sch_cake: share shaper state across sub-instances of
    cake_mq

Paolo Abeni (1):
  geneve: add netlink support for GRO hint

 include/uapi/linux/if_link.h   |  1 +
 include/uapi/linux/pkt_sched.h |  1 +
 src/features.c                 | 65 ++++++++++++++++++++++++++++++++++
 src/libbpf.c                   | 17 ++++++---
 src/libbpf_internal.h          |  2 ++
 src/linker.c                   |  2 +-
 6 files changed, 83 insertions(+), 5 deletions(-)

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
2026-02-17 20:15:42 -08:00
Emil Tsalapatis
93c0673751 libbpf: Delay feature gate check until object prepare time
Commit 728ff167910e ("libbpf: Add gating for arena globals relocation feature")
adds a feature gate check that loads a map and BPF program to
test the running kernel supports large direct offsets for LDIMM64
instructions. This check is currently used to calculate arena symbol
offsets during bpf_object__collect_relos, itself called by
bpf_object_open.

However, the program calling bpf_object_open may not have the permissions to
load maps and programs. This is the case with the BPF selftests, where
bpftool is invoked at compilation time during skeleton generation. This
causes errors as the feature gate unexpectedly fails with -EPERM.

Avoid this by moving all the use of the FEAT_LDIMM64_FULL_RANGE_OFF feature gate
to BPF object preparation time instead.

Fixes: 728ff167910e ("libbpf: Add gating for arena globals relocation feature")
Signed-off-by: Emil Tsalapatis <emil@etsalapatis.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20260217204345.548648-3-emil@etsalapatis.com
2026-02-17 20:15:42 -08:00
Emil Tsalapatis
8a596848c7 libbpf: Do not use PROG_TYPE_TRACEPOINT program for feature gating
Commit 728ff167910e uses a PROG_TYPE_TRACEPOINT BPF test program to
check whether the running kernel supports large LDIMM64 offsets. The
feature gate incorrectly assumes that the program will fail at
verification time with one of two messages, depending on whether the
feature is supported by the running kernel. However,
PROG_TYPE_TRACEPOINT programs may fail to load before verification even
starts, e.g., if the shell does not have the appropriate capabilities.
Use a BPF_PROG_TYPE_SOCKET_FILTER program for the feature gate instead.

Also fix two minor issues. First, ensure the log buffer for the test is
initialized: Failing program load before verification led to libbpf dumping
uninitialized data to stdout. Also, ensure that close() is only called
for program_fd in the probe if the program load actually succeeded. The
call was currently failing silently with -EBADF most of the time.

Fixes: 728ff167910e ("libbpf: Add gating for arena globals relocation feature")
Reported-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Emil Tsalapatis <emil@etsalapatis.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20260217204345.548648-2-emil@etsalapatis.com
2026-02-17 20:15:42 -08:00
Paolo Abeni
a61d018702 geneve: add netlink support for GRO hint
Allow configuring and dumping the new device option, and cache its value
into the geneve socket itself.
The new option is not tie to it any code yet.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Link: https://patch.msgid.link/2295d4e4d1e919a3189425141bbc71c7850a2de0.1769011015.git.pabeni@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-17 20:15:42 -08:00
Jakub Kicinski
13ae2d1841 Revert "Merge branch 'netkit-support-for-io_uring-zero-copy-and-af_xdp'"
This reverts commit 77b9c4a438fc66e2ab004c411056b3fb71a54f2c, reversing
changes made to 4515ec4ad58a37e70a9e1256c0b993958c9b7497:

 931420a2fc36 ("selftests/net: Add netkit container tests")
 ab771c938d9a ("selftests/net: Make NetDrvContEnv support queue leasing")
 6be87fbb2776 ("selftests/net: Add env for container based tests")
 61d99ce3dfc2 ("selftests/net: Add bpf skb forwarding program")
 920da3634194 ("netkit: Add xsk support for af_xdp applications")
 eef51113f8af ("netkit: Add netkit notifier to check for unregistering devices")
 b5ef109d22d4 ("netkit: Implement rtnl_link_ops->alloc and ndo_queue_create")
 b5c3fa4a0b16 ("netkit: Add single device mode for netkit")
 0073d2fd679d ("xsk: Proxy pool management for leased queues")
 1ecea95dd3b5 ("xsk: Extend xsk_rcv_check validation")
 804bf334d08a ("net: Proxy netdev_queue_get_dma_dev for leased queues")
 0caa9a8ddec3 ("net: Proxy net_mp_{open,close}_rxq for leased queues")
 ff8889ff9107 ("net, ethtool: Disallow leased real rxqs to be resized")
 9e2103f36110 ("net: Add lease info to queue-get response")
 31127deddef4 ("net: Implement netdev_nl_queue_create_doit")
 a5546e18f77c ("net: Add queue-create operation")

The series will conflict with io_uring work, and the code needs more
polish.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-02-17 20:15:42 -08:00
Daniel Borkmann
05121e3fd9 netkit: Add single device mode for netkit
Add a single device mode for netkit instead of netkit pairs. The primary
target for the paired devices is to connect network namespaces, of course,
and support has been implemented in projects like Cilium [0]. For the rxq
leasing the plan is to support two main scenarios related to single device
mode:

* For the use-case of io_uring zero-copy, the control plane can either
  set up a netkit pair where the peer device can perform rxq leasing which
  is then tied to the lifetime of the peer device, or the control plane
  can use a regular netkit pair to connect the hostns to a Pod/container
  and dynamically add/remove rxq leasing through a single device without
  having to interrupt the device pair. In the case of io_uring, the memory
  pool is used as skb non-linear pages, and thus the skb will go its way
  through the regular stack into netkit. Things like the netkit policy when
  no BPF is attached or skb scrubbing etc apply as-is in case the paired
  devices are used, or if the backend memory is tied to the single device
  and traffic goes through a paired device.

* For the use-case of AF_XDP, the control plane needs to use netkit in the
  single device mode. The single device mode currently enforces only a
  pass policy when no BPF is attached, and does not yet support BPF link
  attachments for AF_XDP. skbs sent to that device get dropped at the
  moment. Given AF_XDP operates at a lower layer of the stack tying this
  to the netkit pair did not make sense. In future, the plan is to allow
  BPF at the XDP layer which can: i) process traffic coming from the AF_XDP
  application (e.g. QEMU with AF_XDP backend) to filter egress traffic or
  to push selected egress traffic up to the single netkit device to the
  local stack (e.g. DHCP requests), and ii) vice-versa skbs sent to the
  single netkit into the AF_XDP application (e.g. DHCP replies). Also,
  the control-plane can dynamically manage rxq leasing for the single
  netkit device without having to interrupt (e.g. down/up cycle) the main
  netkit pair for the Pod which has traffic going in and out.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Co-developed-by: David Wei <dw@davidwei.uk>
Signed-off-by: David Wei <dw@davidwei.uk>
Reviewed-by: Jordan Rife <jordan@jrife.io>
Reviewed-by: Nikolay Aleksandrov <razor@blackwall.org>
Link: https://docs.cilium.io/en/stable/operations/performance/tuning/#netkit-device-mode [0]
Link: https://patch.msgid.link/20260115082603.219152-10-daniel@iogearbox.net
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-02-17 20:15:42 -08:00
Daniel Borkmann
a623828cdc net: Add queue-create operation
Add a ynl netdev family operation called queue-create that creates a
new queue on a netdevice:

      name: queue-create
      attribute-set: queue
      flags: [admin-perm]
      do:
        request:
          attributes:
            - ifindex
            - type
            - lease
        reply: &queue-create-op
          attributes:
            - id

This is a generic operation such that it can be extended for various
use cases in future. Right now it is mandatory to specify ifindex,
the queue type which is enforced to rx and a lease. The newly created
queue id is returned to the caller.

A queue from a virtual device can have a lease which refers to another
queue from a physical device. This is useful for memory providers
and AF_XDP operations which take an ifindex and queue id to allow
applications to bind against virtual devices in containers. The lease
couples both queues together and allows to proxy the operations from
a virtual device in a container to the physical device.

In future, the nested lease attribute can be lifted and made optional
for other use-cases such as dynamic queue creation for physical
netdevs. The lack of lease and the specification of the physical
device as an ifindex will imply that we need a real queue to be
allocated. Similarly, the queue type enforcement to rx can then be
lifted as well to support tx.

An early implementation had only driver-specific integration [0], but
in order for other virtual devices to reuse, it makes sense to have
this as a generic API in core net.

For leasing queues, the virtual netdev must have real_num_rx_queue
less than num_rx_queues at the time of calling queue-create. The
queue-type must be rx as only rx queues are supported for leasing
for now. We also enforce that the queue-create ifindex must point
to a virtual device, and that the nested lease attribute's ifindex
must point to a physical device. The nested lease attribute set
contains a netns-id attribute which is currently only intended for
dumping as part of the queue-get operation. Also, it is modeled as
an s32 type similarly as done elsewhere in the stack.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Co-developed-by: David Wei <dw@davidwei.uk>
Signed-off-by: David Wei <dw@davidwei.uk>
Link: https://bpfconf.ebpf.io/bpfconf2025/bpfconf2025_material/lsfmmbpf_2025_netkit_borkmann.pdf [0]
Acked-by: Stanislav Fomichev <sdf@fomichev.me>
Reviewed-by: Nikolay Aleksandrov <razor@blackwall.org>
Link: https://patch.msgid.link/20260115082603.219152-2-daniel@iogearbox.net
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-02-17 20:15:42 -08:00
Jonas Köppeler
4f8bdd9ce3 net/sched: sch_cake: share shaper state across sub-instances of cake_mq
This commit adds shared shaper state across the cake instances beneath a
cake_mq qdisc. It works by periodically tracking the number of active
instances, and scaling the configured rate by the number of active
queues.

The scan is lockless and simply reads the qlen and the last_active state
variable of each of the instances configured beneath the parent cake_mq
instance. Locking is not required since the values are only updated by
the owning instance, and eventual consistency is sufficient for the
purpose of estimating the number of active queues.

The interval for scanning the number of active queues is set to 200 us.
We found this to be a good tradeoff between overhead and response time.
For a detailed analysis of this aspect see the Netdevconf talk:

https://netdevconf.info/0x19/docs/netdev-0x19-paper16-talk-paper.pdf

Reviewed-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Jonas Köppeler <j.koeppeler@tu-berlin.de>
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://patch.msgid.link/20260109-mq-cake-sub-qdisc-v8-5-8d613fece5d8@redhat.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2026-02-17 20:15:42 -08:00
Andrii Nakryiko
fa0bbf147e sync: latest libbpf changes from kernel
Syncing latest libbpf commits from kernel repository.
Baseline bpf-next commit:   08a7491843224f8b96518fbe70d9e48163046054
Checkpoint bpf-next commit: dc855b77719fe452d670cae2cf64da1eb51f16cc
Baseline bpf commit:        22cc16c04b7893d8fc22810599f49a305d600b9e
Checkpoint bpf commit:      1a7eb7a3d74031e6c173f0822023f354c2870354

Amery Hung (1):
  libbpf: Fix invalid write loop logic in bpf_linker__add_buf()

Bill Wendling (1):
  compiler_types.h: Attributes: Add __counted_by_ptr macro

Dapeng Mi (1):
  perf/x86/intel: Add support for PEBS memory auxiliary info field in
    DMR

Emil Tsalapatis (1):
  libbpf: Add gating for arena globals relocation feature

 include/uapi/linux/perf_event.h | 27 ++++++++++++--
 include/uapi/linux/stddef.h     |  4 +++
 src/features.c                  | 64 +++++++++++++++++++++++++++++++++
 src/libbpf.c                    |  7 ++--
 src/libbpf_internal.h           |  2 ++
 src/linker.c                    |  2 +-
 6 files changed, 100 insertions(+), 6 deletions(-)

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
2026-02-11 12:04:56 -08:00
Andrii Nakryiko
4c4f39f873 sync: update .mailmap
Update .mailmap based on libbpf's list of contributors and on the latest
.mailmap version in the upstream repository.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
2026-02-11 12:04:56 -08:00
Amery Hung
fc735dab54 libbpf: Fix invalid write loop logic in bpf_linker__add_buf()
Fix bpf_linker__add_buf()'s logic of copying data from memory buffer into
memfd. In the event of short write not writing entire buf_sz bytes into memfd
file, we'll append bytes from the beginning of buf *again* (corrupting ELF
file contents) instead of correctly appending the rest of not-yet-read buf
contents.

Closes: https://github.com/libbpf/libbpf/issues/945
Fixes: 6d5e5e5d7ce1 ("libbpf: Extend linker API to support in-memory ELF files")
Signed-off-by: Amery Hung <ameryhung@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Link: https://lore.kernel.org/bpf/20260209230134.3530521-1-ameryhung@gmail.com
2026-02-11 12:04:56 -08:00
Emil Tsalapatis
429aaef6a3 libbpf: Add gating for arena globals relocation feature
Add feature gating for the arena globals relocation introduced in
commit c1f61171d44b. The commit depends on a previous commit in the
same patchset that is absent from older kernels
(12a1fe6e12db "bpf/verifier: Do not limit maximum direct offset into arena map").

Without this commit, arena globals relocation with arenas >= 512MiB
fails to load and breaks libbpf's backwards compatibility.

Introduce a libbpf feature to check whether the running kernel allows for
full range ldimm64 offset, and only relocate arena globals if it does.

Fixes: c1f61171d44b ("libbpf: Move arena globals to the end of the arena")
Signed-off-by: Emil Tsalapatis <emil@etsalapatis.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20260210184532.255475-1-emil@etsalapatis.com
2026-02-11 12:04:56 -08:00
Dapeng Mi
ad0d0e5112 perf/x86/intel: Add support for PEBS memory auxiliary info field in DMR
With the introduction of the OMR feature, the PEBS memory auxiliary info
field for load and store latency events has been restructured for DMR.

The memory auxiliary info field's bit[8] indicates whether a L2 cache
miss occurred for a memory load or store instruction. If bit[8] is 0,
it signifies no L2 cache miss, and bits[7:0] specify the exact cache data
source (up to the L2 cache level). If bit[8] is 1, bits[7:0] represent
the OMR encoding, indicating the specific L3 cache or memory region
involved in the memory access. A significant enhancement is OMR encoding
provides up to 8 fine-grained memory regions besides the cache region.

A significant enhancement for OMR encoding is the ability to provide
up to 8 fine-grained memory regions in addition to the cache region,
offering more detailed insights into memory access regions.

For detailed information on the memory auxiliary info encoding, please
refer to section 16.2 "PEBS LOAD LATENCY AND STORE LATENCY FACILITY" in
the ISE documentation.

This patch ensures that the PEBS memory auxiliary info field is correctly
interpreted and utilized in DMR.

Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260114011750.350569-3-dapeng1.mi@linux.intel.com
2026-02-11 12:04:56 -08:00
Bill Wendling
1317132162 compiler_types.h: Attributes: Add __counted_by_ptr macro
Introduce __counted_by_ptr(), which works like __counted_by(), but for
pointer struct members.

struct foo {
	int a, b, c;
	char *buffer __counted_by_ptr(bytes);
	short nr_bars;
	struct bar *bars __counted_by_ptr(nr_bars);
	size_t bytes;
};

Because "counted_by" can only be applied to pointer members in very
recent compiler versions, its application ends up needing to be distinct
from flexibe array "counted_by" annotations, hence a separate macro.

This is a reworking of Kees' previous patch [1].

Link: https://lore.kernel.org/all/20251020220118.1226740-1-kees@kernel.org/ [1]
Co-developed-by: Kees Cook <kees@kernel.org>
Signed-off-by: Bill Wendling <morbo@google.com>
Link: https://patch.msgid.link/20260116005838.2419118-1-morbo@google.com
Signed-off-by: Kees Cook <kees@kernel.org>
2026-02-11 12:04:56 -08:00
26 changed files with 322 additions and 395 deletions

View File

@@ -7,7 +7,7 @@ on:
- cron: '0 18 * * *'
concurrency:
group: ci-build-${{ github.head_ref }}
group: ci-build-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
jobs:

View File

@@ -7,7 +7,7 @@ on:
- cron: '0 18 * * *'
concurrency:
group: ci-test-${{ github.head_ref }}
group: ci-test-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
jobs:
@@ -16,16 +16,14 @@ jobs:
fail-fast: false
matrix:
include:
- kernel: 'LATEST'
runs_on: 'ubuntu-24.04'
- runs_on: 'ubuntu-24.04'
arch: 'x86_64'
llvm-version: '21'
pahole: 'master'
name: Linux ${{ matrix.kernel }} llvm-${{ matrix.llvm-version }}
name: llvm-${{ matrix.llvm-version }}
uses: ./.github/workflows/vmtest.yml
with:
runs_on: ${{ matrix.runs_on }}
kernel: ${{ matrix.kernel }}
arch: ${{ matrix.arch }}
llvm-version: ${{ matrix.llvm-version }}
pahole: ${{ matrix.pahole }}

View File

@@ -12,11 +12,6 @@ on:
required: true
default: 'x86_64'
type: string
kernel:
description: 'kernel version or LATEST'
required: true
default: 'LATEST'
type: string
pahole:
description: 'pahole rev or branch'
required: false
@@ -31,12 +26,15 @@ jobs:
vmtest:
name: pahole@${{ inputs.pahole }}
runs-on: ${{ inputs.runs_on }}
container:
image: ghcr.io/kernel-patches/runner:kbuilder-debian-x86_64
options: --privileged
steps:
- uses: actions/checkout@v4
- name: Setup environment
uses: libbpf/ci/setup-build-env@v3
uses: libbpf/ci/setup-build-env@v4
with:
pahole: ${{ inputs.pahole }}
arch: ${{ inputs.arch }}
@@ -49,14 +47,14 @@ jobs:
echo "CHECKPOINT=$(cat CHECKPOINT-COMMIT)" >> $GITHUB_ENV
- name: Get kernel source at checkpoint
uses: libbpf/ci/get-linux-source@v3
uses: libbpf/ci/get-linux-source@v4
with:
repo: 'https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git'
rev: ${{ env.CHECKPOINT }}
dest: '${{ github.workspace }}/.kernel'
- name: Patch kernel source
uses: libbpf/ci/patch-kernel@v3
uses: libbpf/ci/patch-kernel@v4
with:
patches-root: '${{ github.workspace }}/ci/diffs'
repo-root: '.kernel'
@@ -73,27 +71,18 @@ jobs:
cd -
- name: Build kernel image
if: ${{ inputs.kernel == 'LATEST' }}
shell: bash
run: |
cd .kernel
make -j $((4*$(nproc))) all
cp vmlinux ${{ github.workspace }}
cp vmlinux $GITHUB_WORKSPACE
cd -
- name: Download prebuilt kernel
if: ${{ inputs.kernel != 'LATEST' }}
uses: libbpf/ci/download-vmlinux@v3
with:
kernel: ${{ inputs.kernel }}
arch: ${{ inputs.arch }}
- name: Build selftests/bpf
uses: libbpf/ci/build-selftests@v3
uses: libbpf/ci/build-selftests@v4
env:
MAX_MAKE_JOBS: 32
VMLINUX_BTF: ${{ github.workspace }}/vmlinux
VMLINUX_H: ${{ inputs.kernel != 'LATEST' && format('{0}/.github/actions/build-selftests/vmlinux.h', github.workspace) || '' }}
with:
arch: ${{ inputs.arch }}
kernel-root: ${{ github.workspace }}/.kernel
@@ -103,15 +92,13 @@ jobs:
env:
ALLOWLIST_FILE: /tmp/allowlist
DENYLIST_FILE: /tmp/denylist
KERNEL: ${{ inputs.kernel }}
VMLINUX: ${{ github.workspace }}/vmlinux
LLVM_VERSION: ${{ inputs.llvm-version }}
SELFTESTS_BPF: ${{ github.workspace }}/.kernel/tools/testing/selftests/bpf
VMTEST_CONFIGS: ${{ github.workspace }}/ci/vmtest/configs
uses: libbpf/ci/run-vmtest@v3
uses: libbpf/ci/run-vmtest@v4
with:
arch: ${{ inputs.arch }}
kbuild-output: ${{ github.workspace }}/.kernel
kernel-root: ${{ github.workspace }}/.kernel
vmlinuz: ${{ inputs.arch }}/vmlinuz-${{ inputs.kernel }}

View File

@@ -8,8 +8,10 @@ Dan Carpenter <error27@gmail.com> <dan.carpenter@oracle.com>
Geliang Tang <geliang@kernel.org> <geliang.tang@suse.com>
Herbert Xu <herbert@gondor.apana.org.au>
Jakub Kicinski <kuba@kernel.org> <jakub.kicinski@netronome.com>
Jason Xing <kerneljasonxing@gmail.com> <kernelxing@tencent.com>
Jean-Philippe Brucker <jpb@kernel.org> <jean-philippe@linaro.org>
Jesper Dangaard Brouer <hawk@kernel.org> <brouer@redhat.com>
Joe Damato <joe@dama.to> <jdamato@fastly.com>
Kees Cook <kees@kernel.org> <keescook@chromium.org>
Kuniyuki Iwashima <kuniyu@google.com> <kuniyu@amazon.co.jp>
Leo Yan <leo.yan@linux.dev> <leo.yan@linaro.org>
@@ -22,3 +24,4 @@ Quentin Monnet <qmo@kernel.org> <quentin.monnet@netronome.com>
Stanislav Fomichev <sdf@fomichev.me> <sdf@google.com>
Vadim Fedorenko <vadim.fedorenko@linux.dev> <vadfed@meta.com>
Vadim Fedorenko <vadim.fedorenko@linux.dev> <vfedorenko@novek.ru>
Yixun Lan <dlan@kernel.org> <dlan@gentoo.org>

View File

@@ -1 +1 @@
22cc16c04b7893d8fc22810599f49a305d600b9e
e06e6b8001233241eb5b2e2791162f0585f50f4b

View File

@@ -1 +1 @@
08a7491843224f8b96518fbe70d9e48163046054
ca0f39a369c5f927c3d004e63a5a778b08a9df94

View File

@@ -1,85 +0,0 @@
From e3a4f5092e847ec00e2b66c060f2cef52b8d0177 Mon Sep 17 00:00:00 2001
From: Ihor Solodrai <ihor.solodrai@pm.me>
Date: Thu, 14 Nov 2024 12:49:34 -0800
Subject: [PATCH bpf-next] selftests/bpf: set test path for
token/obj_priv_implicit_token_envvar
token/obj_priv_implicit_token_envvar test may fail in an environment
where the process executing tests can not write to the root path.
Example:
https://github.com/libbpf/libbpf/actions/runs/11844507007/job/33007897936
Change default path used by the test to /tmp/bpf-token-fs, and make it
runtime configurable via an environment variable.
Signed-off-by: Ihor Solodrai <ihor.solodrai@pm.me>
---
tools/testing/selftests/bpf/prog_tests/token.c | 18 +++++++++++-------
1 file changed, 11 insertions(+), 7 deletions(-)
diff --git a/tools/testing/selftests/bpf/prog_tests/token.c b/tools/testing/selftests/bpf/prog_tests/token.c
index fe86e4fdb89c..39f5414b674b 100644
--- a/tools/testing/selftests/bpf/prog_tests/token.c
+++ b/tools/testing/selftests/bpf/prog_tests/token.c
@@ -828,8 +828,11 @@ static int userns_obj_priv_btf_success(int mnt_fd, struct token_lsm *lsm_skel)
return validate_struct_ops_load(mnt_fd, true /* should succeed */);
}
+static const char* token_bpffs_custom_dir() {
+ return getenv("BPF_SELFTESTS_BPF_TOKEN_DIR") ? : "/tmp/bpf-token-fs";
+}
+
#define TOKEN_ENVVAR "LIBBPF_BPF_TOKEN_PATH"
-#define TOKEN_BPFFS_CUSTOM "/bpf-token-fs"
static int userns_obj_priv_implicit_token(int mnt_fd, struct token_lsm *lsm_skel)
{
@@ -892,6 +895,7 @@ static int userns_obj_priv_implicit_token(int mnt_fd, struct token_lsm *lsm_skel
static int userns_obj_priv_implicit_token_envvar(int mnt_fd, struct token_lsm *lsm_skel)
{
+ const char *custom_dir = token_bpffs_custom_dir();
LIBBPF_OPTS(bpf_object_open_opts, opts);
struct dummy_st_ops_success *skel;
int err;
@@ -909,10 +913,10 @@ static int userns_obj_priv_implicit_token_envvar(int mnt_fd, struct token_lsm *l
* BPF token implicitly, unless pointed to it through
* LIBBPF_BPF_TOKEN_PATH envvar
*/
- rmdir(TOKEN_BPFFS_CUSTOM);
- if (!ASSERT_OK(mkdir(TOKEN_BPFFS_CUSTOM, 0777), "mkdir_bpffs_custom"))
+ rmdir(custom_dir);
+ if (!ASSERT_OK(mkdir(custom_dir, 0777), "mkdir_bpffs_custom"))
goto err_out;
- err = sys_move_mount(mnt_fd, "", AT_FDCWD, TOKEN_BPFFS_CUSTOM, MOVE_MOUNT_F_EMPTY_PATH);
+ err = sys_move_mount(mnt_fd, "", AT_FDCWD, custom_dir, MOVE_MOUNT_F_EMPTY_PATH);
if (!ASSERT_OK(err, "move_mount_bpffs"))
goto err_out;
@@ -925,7 +929,7 @@ static int userns_obj_priv_implicit_token_envvar(int mnt_fd, struct token_lsm *l
goto err_out;
}
- err = setenv(TOKEN_ENVVAR, TOKEN_BPFFS_CUSTOM, 1 /*overwrite*/);
+ err = setenv(TOKEN_ENVVAR, custom_dir, 1 /*overwrite*/);
if (!ASSERT_OK(err, "setenv_token_path"))
goto err_out;
@@ -951,11 +955,11 @@ static int userns_obj_priv_implicit_token_envvar(int mnt_fd, struct token_lsm *l
if (!ASSERT_ERR(err, "obj_empty_token_path_load"))
goto err_out;
- rmdir(TOKEN_BPFFS_CUSTOM);
+ rmdir(custom_dir);
unsetenv(TOKEN_ENVVAR);
return 0;
err_out:
- rmdir(TOKEN_BPFFS_CUSTOM);
+ rmdir(custom_dir);
unsetenv(TOKEN_ENVVAR);
return -EINVAL;
}
--
2.47.0

View File

@@ -0,0 +1,72 @@
From 8b6db30a1ca47875070ce6b282316a5148a5c4c0 Mon Sep 17 00:00:00 2001
From: Ihor Solodrai <ihor.solodrai@linux.dev>
Date: Thu, 12 Mar 2026 11:13:14 -0700
Subject: [PATCH] selftests/bpf: Bump path and command buffer sizes in
bpftool_helpers.c
Signed-off-by: Ihor Solodrai <ihor.solodrai@linux.dev>
---
tools/testing/selftests/bpf/bpftool_helpers.c | 13 ++++++-------
1 file changed, 6 insertions(+), 7 deletions(-)
diff --git a/tools/testing/selftests/bpf/bpftool_helpers.c b/tools/testing/selftests/bpf/bpftool_helpers.c
index 929fc257f431..6a5e99b5e542 100644
--- a/tools/testing/selftests/bpf/bpftool_helpers.c
+++ b/tools/testing/selftests/bpf/bpftool_helpers.c
@@ -2,18 +2,18 @@
#include <unistd.h>
#include <string.h>
#include <stdbool.h>
+#include <limits.h>
#include "bpf_util.h"
#include "bpftool_helpers.h"
-#define BPFTOOL_PATH_MAX_LEN 64
-#define BPFTOOL_FULL_CMD_MAX_LEN 512
+#define BPFTOOL_FULL_CMD_MAX_LEN (PATH_MAX * 2)
#define BPFTOOL_DEFAULT_PATH "tools/sbin/bpftool"
static int detect_bpftool_path(char *buffer, size_t size)
{
- char tmp[BPFTOOL_PATH_MAX_LEN];
+ char tmp[PATH_MAX];
const char *env_path;
/* First, check if BPFTOOL environment variable is set */
@@ -29,7 +29,7 @@ static int detect_bpftool_path(char *buffer, size_t size)
/* Check default bpftool location (will work if we are running the
* default flavor of test_progs)
*/
- snprintf(tmp, BPFTOOL_PATH_MAX_LEN, "./%s", BPFTOOL_DEFAULT_PATH);
+ snprintf(tmp, PATH_MAX, "./%s", BPFTOOL_DEFAULT_PATH);
if (access(tmp, X_OK) == 0) {
strscpy(buffer, tmp, size);
return 0;
@@ -38,7 +38,7 @@ static int detect_bpftool_path(char *buffer, size_t size)
/* Check alternate bpftool location (will work if we are running a
* specific flavor of test_progs, e.g. cpuv4 or no_alu32)
*/
- snprintf(tmp, BPFTOOL_PATH_MAX_LEN, "../%s", BPFTOOL_DEFAULT_PATH);
+ snprintf(tmp, PATH_MAX, "../%s", BPFTOOL_DEFAULT_PATH);
if (access(tmp, X_OK) == 0) {
strscpy(buffer, tmp, size);
return 0;
@@ -50,7 +50,7 @@ static int detect_bpftool_path(char *buffer, size_t size)
static int run_command(char *args, char *output_buf, size_t output_max_len)
{
- static char bpftool_path[BPFTOOL_PATH_MAX_LEN] = {0};
+ static char bpftool_path[PATH_MAX] = {0};
bool suppress_output = !(output_buf && output_max_len);
char command[BPFTOOL_FULL_CMD_MAX_LEN];
FILE *f;
@@ -84,4 +84,3 @@ int get_bpftool_command_output(char *args, char *output_buf, size_t output_max_l
{
return run_command(args, output_buf, output_max_len);
}
-
--
2.53.0

View File

@@ -1,69 +0,0 @@
From bd06a13f44e15e2e83561ea165061c445a15bd9e Mon Sep 17 00:00:00 2001
From: Song Liu <song@kernel.org>
Date: Thu, 27 Mar 2025 11:55:28 -0700
Subject: [PATCH 4000/4002] selftests/bpf: Fix tests after fields reorder in
struct file
The change in struct file [1] moved f_ref to the 3rd cache line.
It made *(u64 *)file dereference invalid from the verifier point of view,
because btf_struct_walk() walks into f_lock field, which is 4-byte long.
Fix the selftests to deference the file pointer as a 4-byte access.
[1] commit e249056c91a2 ("fs: place f_ref to 3rd cache line in struct file to resolve false sharing")
Reported-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20250327185528.1740787-1-song@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
tools/testing/selftests/bpf/progs/test_module_attach.c | 2 +-
tools/testing/selftests/bpf/progs/test_subprogs_extable.c | 6 +++---
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/tools/testing/selftests/bpf/progs/test_module_attach.c b/tools/testing/selftests/bpf/progs/test_module_attach.c
index fb07f5773888..7f3c233943b3 100644
--- a/tools/testing/selftests/bpf/progs/test_module_attach.c
+++ b/tools/testing/selftests/bpf/progs/test_module_attach.c
@@ -117,7 +117,7 @@ int BPF_PROG(handle_fexit_ret, int arg, struct file *ret)
bpf_probe_read_kernel(&buf, 8, ret);
bpf_probe_read_kernel(&buf, 8, (char *)ret + 256);
- *(volatile long long *)ret;
+ *(volatile int *)ret;
*(volatile int *)&ret->f_mode;
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_subprogs_extable.c b/tools/testing/selftests/bpf/progs/test_subprogs_extable.c
index e2a21fbd4e44..dcac69f5928a 100644
--- a/tools/testing/selftests/bpf/progs/test_subprogs_extable.c
+++ b/tools/testing/selftests/bpf/progs/test_subprogs_extable.c
@@ -21,7 +21,7 @@ static __u64 test_cb(struct bpf_map *map, __u32 *key, __u64 *val, void *data)
SEC("fexit/bpf_testmod_return_ptr")
int BPF_PROG(handle_fexit_ret_subprogs, int arg, struct file *ret)
{
- *(volatile long *)ret;
+ *(volatile int *)ret;
*(volatile int *)&ret->f_mode;
bpf_for_each_map_elem(&test_array, test_cb, NULL, 0);
triggered++;
@@ -31,7 +31,7 @@ int BPF_PROG(handle_fexit_ret_subprogs, int arg, struct file *ret)
SEC("fexit/bpf_testmod_return_ptr")
int BPF_PROG(handle_fexit_ret_subprogs2, int arg, struct file *ret)
{
- *(volatile long *)ret;
+ *(volatile int *)ret;
*(volatile int *)&ret->f_mode;
bpf_for_each_map_elem(&test_array, test_cb, NULL, 0);
triggered++;
@@ -41,7 +41,7 @@ int BPF_PROG(handle_fexit_ret_subprogs2, int arg, struct file *ret)
SEC("fexit/bpf_testmod_return_ptr")
int BPF_PROG(handle_fexit_ret_subprogs3, int arg, struct file *ret)
{
- *(volatile long *)ret;
+ *(volatile int *)ret;
*(volatile int *)&ret->f_mode;
bpf_for_each_map_elem(&test_array, test_cb, NULL, 0);
triggered++;
--
2.49.0

View File

@@ -1,71 +0,0 @@
From 8be3a12f9f266aaf3f06f0cfe0e90cfe4d956f3d Mon Sep 17 00:00:00 2001
From: Song Liu <song@kernel.org>
Date: Fri, 28 Mar 2025 12:31:24 -0700
Subject: [PATCH 4001/4002] selftests/bpf: Fix verifier_bpf_fastcall test
Commit [1] moves percpu data on x86 from address 0x000... to address
0xfff...
Before [1]:
159020: 0000000000030700 0 OBJECT GLOBAL DEFAULT 23 pcpu_hot
After [1]:
152602: ffffffff83a3e034 4 OBJECT GLOBAL DEFAULT 35 pcpu_hot
As a result, verifier_bpf_fastcall tests should now expect a negative
value for pcpu_hot, IOW, the disassemble should show "r=" instead of
"w=".
Fix this in the test.
Note that, a later change created a new variable "cpu_number" for
bpf_get_smp_processor_id() [2]. The inlining logic is updated properly
as part of this change, so there is no need to fix anything on the
kernel side.
[1] commit 9d7de2aa8b41 ("x86/percpu/64: Use relative percpu offsets")
[2] commit 01c7bc5198e9 ("x86/smp: Move cpu number to percpu hot section")
Reported-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20250328193124.808784-1-song@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c b/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c
index a9be6ae49454..c258b0722e04 100644
--- a/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c
+++ b/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c
@@ -12,7 +12,7 @@ SEC("raw_tp")
__arch_x86_64
__log_level(4) __msg("stack depth 8")
__xlated("4: r5 = 5")
-__xlated("5: w0 = ")
+__xlated("5: r0 = ")
__xlated("6: r0 = &(void __percpu *)(r0)")
__xlated("7: r0 = *(u32 *)(r0 +0)")
__xlated("8: exit")
@@ -704,7 +704,7 @@ SEC("raw_tp")
__arch_x86_64
__log_level(4) __msg("stack depth 32+0")
__xlated("2: r1 = 1")
-__xlated("3: w0 =")
+__xlated("3: r0 =")
__xlated("4: r0 = &(void __percpu *)(r0)")
__xlated("5: r0 = *(u32 *)(r0 +0)")
/* bpf_loop params setup */
@@ -753,7 +753,7 @@ __arch_x86_64
__log_level(4) __msg("stack depth 40+0")
/* call bpf_get_smp_processor_id */
__xlated("2: r1 = 42")
-__xlated("3: w0 =")
+__xlated("3: r0 =")
__xlated("4: r0 = &(void __percpu *)(r0)")
__xlated("5: r0 = *(u32 *)(r0 +0)")
/* call bpf_get_prandom_u32 */
--
2.49.0

View File

@@ -1,71 +0,0 @@
From 07be1f644ff9eeb842fd0490ddd824df0828cb0e Mon Sep 17 00:00:00 2001
From: Yonghong Song <yonghong.song@linux.dev>
Date: Sun, 30 Mar 2025 20:38:28 -0700
Subject: [PATCH 4002/4002] selftests/bpf: Fix verifier_private_stack test
failure
Several verifier_private_stack tests failed with latest bpf-next.
For example, for 'Private stack, single prog' subtest, the
jitted code:
func #0:
0: f3 0f 1e fa endbr64
4: 0f 1f 44 00 00 nopl (%rax,%rax)
9: 0f 1f 00 nopl (%rax)
c: 55 pushq %rbp
d: 48 89 e5 movq %rsp, %rbp
10: f3 0f 1e fa endbr64
14: 49 b9 58 74 8a 8f 7d 60 00 00 movabsq $0x607d8f8a7458, %r9
1e: 65 4c 03 0c 25 28 c0 48 87 addq %gs:-0x78b73fd8, %r9
27: bf 2a 00 00 00 movl $0x2a, %edi
2c: 49 89 b9 00 ff ff ff movq %rdi, -0x100(%r9)
33: 31 c0 xorl %eax, %eax
35: c9 leave
36: e9 20 5d 0f e1 jmp 0xffffffffe10f5d5b
The insn 'addq %gs:-0x78b73fd8, %r9' does not match the expected
regex 'addq %gs:0x{{.*}}, %r9' and this caused test failure.
Fix it by changing '%gs:0x{{.*}}' to '%gs:{{.*}}' to accommodate the
possible negative offset. A few other subtests are fixed in a similar way.
Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
Link: https://lore.kernel.org/r/20250331033828.365077-1-yonghong.song@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
tools/testing/selftests/bpf/progs/verifier_private_stack.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/tools/testing/selftests/bpf/progs/verifier_private_stack.c b/tools/testing/selftests/bpf/progs/verifier_private_stack.c
index b1fbdf119553..fc91b414364e 100644
--- a/tools/testing/selftests/bpf/progs/verifier_private_stack.c
+++ b/tools/testing/selftests/bpf/progs/verifier_private_stack.c
@@ -27,7 +27,7 @@ __description("Private stack, single prog")
__success
__arch_x86_64
__jited(" movabsq $0x{{.*}}, %r9")
-__jited(" addq %gs:0x{{.*}}, %r9")
+__jited(" addq %gs:{{.*}}, %r9")
__jited(" movl $0x2a, %edi")
__jited(" movq %rdi, -0x100(%r9)")
__naked void private_stack_single_prog(void)
@@ -74,7 +74,7 @@ __success
__arch_x86_64
/* private stack fp for the main prog */
__jited(" movabsq $0x{{.*}}, %r9")
-__jited(" addq %gs:0x{{.*}}, %r9")
+__jited(" addq %gs:{{.*}}, %r9")
__jited(" movl $0x2a, %edi")
__jited(" movq %rdi, -0x200(%r9)")
__jited(" pushq %r9")
@@ -122,7 +122,7 @@ __jited(" pushq %rbp")
__jited(" movq %rsp, %rbp")
__jited(" endbr64")
__jited(" movabsq $0x{{.*}}, %r9")
-__jited(" addq %gs:0x{{.*}}, %r9")
+__jited(" addq %gs:{{.*}}, %r9")
__jited(" pushq %r9")
__jited(" callq")
__jited(" popq %r9")
--
2.49.0

View File

@@ -1,17 +1,17 @@
# TEMPORARY
btf_dump/btf_dump: syntax
bpf_cookie/perf_event
kprobe_multi_bench_attach
core_reloc/enum64val
core_reloc/size___diff_sz
core_reloc/type_based___diff_sz
test_ima # All of CI is broken on it following 6.3-rc1 merge
lwt_reroute # crashes kernel after netnext merge from 2ab1efad60ad "net/sched: cls_api: complement tcf_tfilter_dump_policy"
tc_links_ingress # started failing after net-next merge from 2ab1efad60ad "net/sched: cls_api: complement tcf_tfilter_dump_policy"
xdp_bonding/xdp_bonding_features # started failing after net merge from 359e54a93ab4 "l2tp: pass correct message length to ip6_append_data"
tc_redirect/tc_redirect_dtime # uapi breakage after net-next commit 885c36e59f46 ("net: Re-use and set mono_delivery_time bit for userspace tstamp packets")
migrate_reuseport/IPv4 TCP_NEW_SYN_RECV reqsk_timer_handler # flaky, under investigation
migrate_reuseport/IPv6 TCP_NEW_SYN_RECV reqsk_timer_handler # flaky, under investigation
verify_pkcs7_sig # keeps failing
verif_scale_pyperf600 # fails on newer Clangs
decap_sanity # weird failure with decap_sanity_ns netns already existing, TBD
empty_skb # waiting the fix in bpf tree to make it to bpf-next
bpf_nf/tc-bpf-ct # test consistently failing on x86: https://github.com/libbpf/libbpf/pull/698#issuecomment-1590341200
bpf_nf/xdp-ct # test consistently failing on x86: https://github.com/libbpf/libbpf/pull/698#issuecomment-1590341200
find_vma # test consistently fails on latest kernel, see https://github.com/libbpf/libbpf/issues/754 for details
send_signal/send_signal_nmi
send_signal/send_signal_nmi_thread
lwt_reroute # crashes kernel, fix pending upstream
tc_links_ingress # fails, same fix is pending upstream
tc_redirect # enough is enough, banned for life for flakiness
map_kptr # kprobe rcu_tasks_trace_postgp not available in VM kernel
test_profiler # kretprobe do_filp_open not available in VM kernel
sockmap_basic/sockmap udp multi channels # flaky in CI

View File

@@ -1,13 +0,0 @@
decap_sanity # weird failure with decap_sanity_ns netns already existing, TBD
empty_skb # waiting the fix in bpf tree to make it to bpf-next
bpf_nf/tc-bpf-ct # test consistently failing on x86: https://github.com/libbpf/libbpf/pull/698#issuecomment-1590341200
bpf_nf/xdp-ct # test consistently failing on x86: https://github.com/libbpf/libbpf/pull/698#issuecomment-1590341200
kprobe_multi_bench_attach # suspected to cause crashes in CI
find_vma # test consistently fails on latest kernel, see https://github.com/libbpf/libbpf/issues/754 for details
bpf_cookie/perf_event
send_signal/send_signal_nmi
send_signal/send_signal_nmi_thread
lwt_reroute # crashes kernel, fix pending upstream
tc_links_ingress # fails, same fix is pending upstream
tc_redirect # enough is enough, banned for life for flakiness

View File

@@ -1,17 +0,0 @@
# TEMPORARY
sockmap_listen/sockhash VSOCK test_vsock_redir
usdt/basic # failing verifier due to bounds check after LLVM update
usdt/multispec # same as above
deny_namespace # not yet in bpf denylist
tc_redirect/tc_redirect_dtime # very flaky
lru_bug # not yet in bpf-next denylist
# Disabled temporarily for a crash.
# https://lore.kernel.org/bpf/c9923c1d-971d-4022-8dc8-1364e929d34c@gmail.com/
dummy_st_ops/dummy_init_ptr_arg
fexit_bpf2bpf
tailcalls
trace_ext
xdp_bpf2bpf
xdp_metadata

View File

@@ -1443,6 +1443,7 @@ enum {
IFLA_GENEVE_DF,
IFLA_GENEVE_INNER_PROTO_INHERIT,
IFLA_GENEVE_PORT_RANGE,
IFLA_GENEVE_GRO_HINT,
__IFLA_GENEVE_MAX
};
#define IFLA_GENEVE_MAX (__IFLA_GENEVE_MAX - 1)

View File

@@ -1330,14 +1330,16 @@ union perf_mem_data_src {
mem_snoopx : 2, /* Snoop mode, ext */
mem_blk : 3, /* Access blocked */
mem_hops : 3, /* Hop level */
mem_rsvd : 18;
mem_region : 5, /* cache/memory regions */
mem_rsvd : 13;
};
};
#elif defined(__BIG_ENDIAN_BITFIELD)
union perf_mem_data_src {
__u64 val;
struct {
__u64 mem_rsvd : 18,
__u64 mem_rsvd : 13,
mem_region : 5, /* cache/memory regions */
mem_hops : 3, /* Hop level */
mem_blk : 3, /* Access blocked */
mem_snoopx : 2, /* Snoop mode, ext */
@@ -1394,7 +1396,7 @@ union perf_mem_data_src {
#define PERF_MEM_LVLNUM_L4 0x0004 /* L4 */
#define PERF_MEM_LVLNUM_L2_MHB 0x0005 /* L2 Miss Handling Buffer */
#define PERF_MEM_LVLNUM_MSC 0x0006 /* Memory-side Cache */
/* 0x007 available */
#define PERF_MEM_LVLNUM_L0 0x0007 /* L0 */
#define PERF_MEM_LVLNUM_UNC 0x0008 /* Uncached */
#define PERF_MEM_LVLNUM_CXL 0x0009 /* CXL */
#define PERF_MEM_LVLNUM_IO 0x000a /* I/O */
@@ -1447,6 +1449,25 @@ union perf_mem_data_src {
/* 5-7 available */
#define PERF_MEM_HOPS_SHIFT 43
/* Cache/Memory region */
#define PERF_MEM_REGION_NA 0x0 /* Invalid */
#define PERF_MEM_REGION_RSVD 0x01 /* Reserved */
#define PERF_MEM_REGION_L_SHARE 0x02 /* Local CA shared cache */
#define PERF_MEM_REGION_L_NON_SHARE 0x03 /* Local CA non-shared cache */
#define PERF_MEM_REGION_O_IO 0x04 /* Other CA IO agent */
#define PERF_MEM_REGION_O_SHARE 0x05 /* Other CA shared cache */
#define PERF_MEM_REGION_O_NON_SHARE 0x06 /* Other CA non-shared cache */
#define PERF_MEM_REGION_MMIO 0x07 /* MMIO */
#define PERF_MEM_REGION_MEM0 0x08 /* Memory region 0 */
#define PERF_MEM_REGION_MEM1 0x09 /* Memory region 1 */
#define PERF_MEM_REGION_MEM2 0x0a /* Memory region 2 */
#define PERF_MEM_REGION_MEM3 0x0b /* Memory region 3 */
#define PERF_MEM_REGION_MEM4 0x0c /* Memory region 4 */
#define PERF_MEM_REGION_MEM5 0x0d /* Memory region 5 */
#define PERF_MEM_REGION_MEM6 0x0e /* Memory region 6 */
#define PERF_MEM_REGION_MEM7 0x0f /* Memory region 7 */
#define PERF_MEM_REGION_SHIFT 46
#define PERF_MEM_S(a, s) \
(((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)

View File

@@ -1036,6 +1036,7 @@ enum {
TCA_CAKE_STATS_DROP_NEXT_US,
TCA_CAKE_STATS_P_DROP,
TCA_CAKE_STATS_BLUE_TIMER_US,
TCA_CAKE_STATS_ACTIVE_QUEUES,
__TCA_CAKE_STATS_MAX
};
#define TCA_CAKE_STATS_MAX (__TCA_CAKE_STATS_MAX - 1)

View File

@@ -72,6 +72,10 @@
#define __counted_by_be(m)
#endif
#ifndef __counted_by_ptr
#define __counted_by_ptr(m)
#endif
#ifdef __KERNEL__
#define __kernel_nonstring __nonstring
#else

View File

@@ -315,9 +315,6 @@ enum libbpf_tristate {
___param, sizeof(___param)); \
})
extern int bpf_stream_vprintk(int stream_id, const char *fmt__str, const void *args,
__u32 len__sz) __weak __ksym;
#define bpf_stream_printk(stream_id, fmt, args...) \
({ \
static const char ___fmt[] = fmt; \

View File

@@ -2004,12 +2004,18 @@ int btf__add_btf(struct btf *btf, const struct btf *src_btf)
{
struct btf_pipe p = { .src = src_btf, .dst = btf };
int data_sz, sz, cnt, i, err, old_strs_len;
__u32 src_start_id;
__u32 *off;
void *t;
/* appending split BTF isn't supported yet */
if (src_btf->base_btf)
return libbpf_err(-ENOTSUP);
/*
* When appending split BTF, the destination must share the same base
* BTF so that base type ID references remain valid.
*/
if (src_btf->base_btf && src_btf->base_btf != btf->base_btf)
return libbpf_err(-EOPNOTSUPP);
src_start_id = src_btf->base_btf ? btf__type_cnt(src_btf->base_btf) : 1;
/* deconstruct BTF, if necessary, and invalidate raw_data */
if (btf_ensure_modifiable(btf))
@@ -2021,7 +2027,7 @@ int btf__add_btf(struct btf *btf, const struct btf *src_btf)
old_strs_len = btf->hdr->str_len;
data_sz = src_btf->hdr->type_len;
cnt = btf__type_cnt(src_btf) - 1;
cnt = src_btf->nr_types;
/* pre-allocate enough memory for new types */
t = btf_add_type_mem(btf, data_sz);
@@ -2060,6 +2066,9 @@ int btf__add_btf(struct btf *btf, const struct btf *src_btf)
if (err)
goto err_out;
while ((str_off = btf_field_iter_next(&it))) {
/* don't remap strings from shared base BTF */
if (*str_off < src_btf->start_str_off)
continue;
err = btf_rewrite_str(&p, str_off);
if (err)
goto err_out;
@@ -2074,11 +2083,11 @@ int btf__add_btf(struct btf *btf, const struct btf *src_btf)
if (!*type_id) /* nothing to do for VOID references */
continue;
/* we haven't updated btf's type count yet, so
* btf->start_id + btf->nr_types - 1 is the type ID offset we should
* add to all newly added BTF types
*/
*type_id += btf->start_id + btf->nr_types - 1;
/* don't remap types from shared base BTF */
if (*type_id < src_start_id)
continue;
*type_id += btf->start_id + btf->nr_types - src_start_id;
}
/* go to next type data and type offset index entry */

View File

@@ -506,6 +506,89 @@ static int probe_kern_arg_ctx_tag(int token_fd)
return probe_fd(prog_fd);
}
static int probe_ldimm64_full_range_off(int token_fd)
{
char log_buf[1024];
int prog_fd, map_fd;
int ret;
LIBBPF_OPTS(bpf_map_create_opts, map_opts,
.token_fd = token_fd,
.map_flags = token_fd ? BPF_F_TOKEN_FD : 0,
);
LIBBPF_OPTS(bpf_prog_load_opts, prog_opts,
.token_fd = token_fd,
.prog_flags = token_fd ? BPF_F_TOKEN_FD : 0,
.log_buf = log_buf,
.log_size = sizeof(log_buf),
);
struct bpf_insn insns[] = {
BPF_LD_MAP_VALUE(BPF_REG_1, 0, 1UL << 30),
BPF_EXIT_INSN(),
};
int insn_cnt = ARRAY_SIZE(insns);
map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "arr", sizeof(int), 1, 1, &map_opts);
if (map_fd < 0) {
ret = -errno;
pr_warn("Error in %s(): %s. Couldn't create simple array map.\n",
__func__, errstr(ret));
return ret;
}
insns[0].imm = map_fd;
log_buf[0] = '\0';
prog_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "global_reloc", "GPL", insns, insn_cnt, &prog_opts);
ret = -errno;
close(map_fd);
if (prog_fd >= 0) {
pr_warn("Error in %s(): Program loading unexpectedly succeeded.\n", __func__);
close(prog_fd);
return -EINVAL;
}
/*
* Feature is allowed if we're not failing with the error message
* "direct value offset of %u is not allowed" removed in
* 12a1fe6e12db ("bpf/verifier: Do not limit maximum direct offset into arena map").
* We should instead fail with "invalid access to map value pointer".
* Ensure we match with one of the two and we're not failing with a
* different, unexpected message.
*/
if (strstr(log_buf, "direct value offset of"))
return 0;
if (!strstr(log_buf, "invalid access to map value pointer")) {
pr_warn("Error in %s(): Program unexpectedly failed with message: %s.\n",
__func__, log_buf);
return ret;
}
return 1;
}
#ifdef __x86_64__
#ifndef __NR_uprobe
#define __NR_uprobe 336
#endif
static int probe_uprobe_syscall(int token_fd)
{
/*
* If kernel supports uprobe() syscall, it will return -ENXIO when called
* from the outside of a kernel-generated uprobe trampoline.
*/
return syscall(__NR_uprobe) < 0 && errno == ENXIO;
}
#else
static int probe_uprobe_syscall(int token_fd)
{
return 0;
}
#endif
typedef int (*feature_probe_fn)(int /* token_fd */);
static struct kern_feature_cache feature_cache;
@@ -581,6 +664,12 @@ static struct kern_feature_desc {
[FEAT_BTF_QMARK_DATASEC] = {
"BTF DATASEC names starting from '?'", probe_kern_btf_qmark_datasec,
},
[FEAT_LDIMM64_FULL_RANGE_OFF] = {
"full range LDIMM64 support", probe_ldimm64_full_range_off,
},
[FEAT_UPROBE_SYSCALL] = {
"kernel supports uprobe syscall", probe_uprobe_syscall,
},
};
bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id)

View File

@@ -3009,9 +3009,6 @@ static int init_arena_map_data(struct bpf_object *obj, struct bpf_map *map,
memcpy(obj->arena_data, data, data_sz);
obj->arena_data_sz = data_sz;
/* place globals at the end of the arena */
obj->arena_data_off = mmap_sz - data_alloc_sz;
/* make bpf_map__init_value() work for ARENA maps */
map->mmaped = obj->arena_data;
@@ -4669,7 +4666,7 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
reloc_desc->type = RELO_DATA;
reloc_desc->insn_idx = insn_idx;
reloc_desc->map_idx = obj->arena_map_idx;
reloc_desc->sym_off = sym->st_value + obj->arena_data_off;
reloc_desc->sym_off = sym->st_value;
map = &obj->maps[obj->arena_map_idx];
pr_debug("prog '%s': found arena map %d (%s, sec %d, off %zu) for insn %u\n",
@@ -6383,6 +6380,10 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
case RELO_DATA:
map = &obj->maps[relo->map_idx];
insn[1].imm = insn[0].imm + relo->sym_off;
if (relo->map_idx == obj->arena_map_idx)
insn[1].imm += obj->arena_data_off;
if (obj->gen_loader) {
insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE;
insn[0].imm = relo->map_idx;
@@ -7384,6 +7385,14 @@ static int bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_pat
bpf_object__sort_relos(obj);
}
/* place globals at the end of the arena (if supported) */
if (obj->arena_map_idx >= 0 && kernel_supports(obj, FEAT_LDIMM64_FULL_RANGE_OFF)) {
struct bpf_map *arena_map = &obj->maps[obj->arena_map_idx];
obj->arena_data_off = bpf_map_mmap_sz(arena_map) -
roundup(obj->arena_data_sz, sysconf(_SC_PAGE_SIZE));
}
/* Before relocating calls pre-process relocations and mark
* few ld_imm64 instructions that points to subprogs.
* Otherwise bpf_object__reloc_code() later would have to consider
@@ -12032,7 +12041,16 @@ bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog,
if (addrs && syms)
return libbpf_err_ptr(-EINVAL);
if (pattern) {
/*
* Exact function name (no wildcards) without unique_match:
* bypass kallsyms parsing and pass the symbol directly to the
* kernel via syms[] array. When unique_match is set, fall
* through to the slow path which detects duplicate symbols.
*/
if (pattern && !strpbrk(pattern, "*?") && !unique_match) {
syms = &pattern;
cnt = 1;
} else if (pattern) {
if (has_available_filter_functions_addrs())
err = libbpf_available_kprobes_parse(&res);
else
@@ -12075,6 +12093,14 @@ bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog,
link_fd = bpf_link_create(prog_fd, 0, attach_type, &lopts);
if (link_fd < 0) {
err = -errno;
/*
* Normalize error code: when exact name bypasses kallsyms
* parsing, kernel returns ESRCH from ftrace_lookup_symbols().
* Convert to ENOENT for API consistency with the pattern
* matching path which returns ENOENT from userspace.
*/
if (err == -ESRCH)
err = -ENOENT;
pr_warn("prog '%s': failed to attach: %s\n",
prog->name, errstr(err));
goto error;

View File

@@ -392,6 +392,10 @@ enum kern_feature_id {
FEAT_ARG_CTX_TAG,
/* Kernel supports '?' at the front of datasec names */
FEAT_BTF_QMARK_DATASEC,
/* Kernel supports LDIMM64 imm offsets past 512 MiB. */
FEAT_LDIMM64_FULL_RANGE_OFF,
/* Kernel supports uprobe syscall */
FEAT_UPROBE_SYSCALL,
__FEAT_CNT,
};

View File

@@ -581,7 +581,7 @@ int bpf_linker__add_buf(struct bpf_linker *linker, void *buf, size_t buf_sz,
written = 0;
while (written < buf_sz) {
ret = write(fd, buf, buf_sz);
ret = write(fd, buf + written, buf_sz - written);
if (ret < 0) {
ret = -errno;
pr_warn("failed to write '%s': %s\n", filename, errstr(ret));

View File

@@ -143,7 +143,7 @@ static int libbpf_netlink_recv(int sock, __u32 nl_pid, int seq,
struct nlmsghdr *nh;
int len, ret;
ret = alloc_iov(&iov, 4096);
ret = alloc_iov(&iov, 8192);
if (ret)
goto done;
@@ -212,6 +212,8 @@ start:
}
}
}
if (len)
pr_warn("Invalid message or trailing data in Netlink response: %d bytes left\n", len);
}
ret = 0;
done:

View File

@@ -262,6 +262,7 @@ struct usdt_manager {
bool has_bpf_cookie;
bool has_sema_refcnt;
bool has_uprobe_multi;
bool has_uprobe_syscall;
};
struct usdt_manager *usdt_manager_new(struct bpf_object *obj)
@@ -301,6 +302,13 @@ struct usdt_manager *usdt_manager_new(struct bpf_object *obj)
* usdt probes.
*/
man->has_uprobe_multi = kernel_supports(obj, FEAT_UPROBE_MULTI_LINK);
/*
* Detect kernel support for uprobe() syscall, it's presence means we can
* take advantage of faster nop5 uprobe handling.
* Added in: 56101b69c919 ("uprobes/x86: Add uprobe syscall to speed up uprobe")
*/
man->has_uprobe_syscall = kernel_supports(obj, FEAT_UPROBE_SYSCALL);
return man;
}
@@ -585,13 +593,34 @@ static int parse_usdt_note(GElf_Nhdr *nhdr, const char *data, size_t name_off,
static int parse_usdt_spec(struct usdt_spec *spec, const struct usdt_note *note, __u64 usdt_cookie);
static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *path, pid_t pid,
const char *usdt_provider, const char *usdt_name, __u64 usdt_cookie,
struct usdt_target **out_targets, size_t *out_target_cnt)
#if defined(__x86_64__)
static bool has_nop_combo(int fd, long off)
{
unsigned char nop_combo[6] = {
0x90, 0x0f, 0x1f, 0x44, 0x00, 0x00 /* nop,nop5 */
};
unsigned char buf[6];
if (pread(fd, buf, 6, off) != 6)
return false;
return memcmp(buf, nop_combo, 6) == 0;
}
#else
static bool has_nop_combo(int fd, long off)
{
return false;
}
#endif
static int collect_usdt_targets(struct usdt_manager *man, struct elf_fd *elf_fd, const char *path,
pid_t pid, const char *usdt_provider, const char *usdt_name,
__u64 usdt_cookie, struct usdt_target **out_targets,
size_t *out_target_cnt)
{
size_t off, name_off, desc_off, seg_cnt = 0, vma_seg_cnt = 0, target_cnt = 0;
struct elf_seg *segs = NULL, *vma_segs = NULL;
struct usdt_target *targets = NULL, *target;
Elf *elf = elf_fd->elf;
long base_addr = 0;
Elf_Scn *notes_scn, *base_scn;
GElf_Shdr base_shdr, notes_shdr;
@@ -784,6 +813,16 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *
target = &targets[target_cnt];
memset(target, 0, sizeof(*target));
/*
* We have uprobe syscall and usdt with nop,nop5 instructions combo,
* so we can place the uprobe directly on nop5 (+1) and get this probe
* optimized.
*/
if (man->has_uprobe_syscall && has_nop_combo(elf_fd->fd, usdt_rel_ip)) {
usdt_abs_ip++;
usdt_rel_ip++;
}
target->abs_ip = usdt_abs_ip;
target->rel_ip = usdt_rel_ip;
target->sema_off = usdt_sema_off;
@@ -998,7 +1037,7 @@ struct bpf_link *usdt_manager_attach_usdt(struct usdt_manager *man, const struct
/* discover USDT in given binary, optionally limiting
* activations to a given PID, if pid > 0
*/
err = collect_usdt_targets(man, elf_fd.elf, path, pid, usdt_provider, usdt_name,
err = collect_usdt_targets(man, &elf_fd, path, pid, usdt_provider, usdt_name,
usdt_cookie, &targets, &target_cnt);
if (err <= 0) {
err = (err == 0) ? -ENOENT : err;