Merge branch 'libbpf:master' into master

This commit is contained in:
thiagoftsm
2022-09-29 21:37:39 +00:00
committed by GitHub
39 changed files with 73141 additions and 72348 deletions

View File

@@ -18,9 +18,10 @@ runs:
steps:
- shell: bash
run: |
echo "::group::Setup Env"
source $GITHUB_ACTION_PATH/../../../ci/vmtest/helpers.sh
foldable start "Setup Env"
sudo apt-get install -y qemu-kvm zstd binutils-dev elfutils libcap-dev libelf-dev libdw-dev python3-docutils
echo "::endgroup::"
foldable end
- shell: bash
run: |
export KERNEL=${{ inputs.kernel }}

File diff suppressed because it is too large Load Diff

View File

@@ -41,24 +41,26 @@ runs:
- name: Prepare to build BPF selftests
shell: bash
run: |
echo "::group::Prepare building selftest"
source $GITHUB_ACTION_PATH/../../../ci/vmtest/helpers.sh
foldable start "Prepare building selftest"
cd .kernel
cat tools/testing/selftests/bpf/config \
tools/testing/selftests/bpf/config.${{ inputs.arch }} > .config
make olddefconfig && make prepare
cd -
echo "::endgroup::"
foldable end
# 2. if kernel == LATEST, build kernel image from tree
- name: Build kernel image
if: ${{ inputs.kernel == 'LATEST' }}
shell: bash
run: |
echo "::group::Build Kernel Image"
source $GITHUB_ACTION_PATH/../../../ci/vmtest/helpers.sh
foldable start "Build Kernel Image"
cd .kernel
make -j $((4*$(nproc))) all > /dev/null
cp vmlinux ${{ github.workspace }}
cd -
echo "::endgroup::"
foldable end
# else, just download prebuilt kernel image
- name: Download prebuilt kernel
if: ${{ inputs.kernel != 'LATEST' }}
@@ -76,14 +78,19 @@ runs:
- name: prepare rootfs
uses: libbpf/ci/prepare-rootfs@master
with:
kernel: ${{ inputs.kernel }}
project-name: 'libbpf'
arch: ${{ inputs.arch }}
kernel: ${{ inputs.kernel }}
kernel-root: '.kernel'
image-output: '/tmp/root.img'
# 5. run selftest in QEMU
- name: Run selftests
env:
KERNEL: ${{ inputs.kernel }}
REPO_ROOT: ${{ github.workspace }}
uses: libbpf/ci/run-qemu@master
with:
arch: ${{ inputs.arch }}
img: '/tmp/root.img'
vmlinuz: 'vmlinuz'
arch: ${{ inputs.arch }}
kernel-root: '.kernel'

View File

@@ -15,12 +15,13 @@ jobs:
- uses: ./.github/actions/setup
- name: Run coverity
run: |
echo ::group::Setup CI env
source "${GITHUB_WORKSPACE}"/ci/vmtest/helpers.sh
foldable start "Setup CI env"
source /tmp/ci_setup
export COVERITY_SCAN_NOTIFICATION_EMAIL="${AUTHOR_EMAIL}"
export COVERITY_SCAN_BRANCH_PATTERN=${GITHUB_REF##refs/*/}
export TRAVIS_BRANCH=${COVERITY_SCAN_BRANCH_PATTERN}
echo ::endgroup::
foldable end
scripts/coverity.sh
env:
COVERITY_SCAN_TOKEN: ${{ secrets.COVERITY_SCAN_TOKEN }}

View File

@@ -1 +1 @@
14b20b784f59bdd95f6f1cfb112c9818bcec4d84
60240bc26114543fcbfcd8a28466e67e77b20388

View File

@@ -1 +1 @@
e34cfee65ec891a319ce79797dda18083af33a76
87dbdc230d162bf9ee1ac77c8ade178b6b1e199e

View File

@@ -1,4 +1,7 @@
<img src="https://user-images.githubusercontent.com/508075/185997470-2f427d3d-f040-4eef-afc5-ae4f766615b2.png" width="40%" >
<picture>
<source media="(prefers-color-scheme: dark)" srcset="assets/libbpf-logo-sideways-darkbg.png" width="40%">
<img src="assets/libbpf-logo-sideways.png" width="40%">
</picture>
libbpf
[![Github Actions Builds & Tests](https://github.com/libbpf/libbpf/actions/workflows/test.yml/badge.svg)](https://github.com/libbpf/libbpf/actions/workflows/test.yml)

Binary file not shown.

After

Width:  |  Height:  |  Size: 262 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 128 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 116 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 284 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 142 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 140 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 352 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 206 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 236 KiB

View File

@@ -36,6 +36,7 @@ $ sudo systemctl daemon-reload
$ sudo tee /etc/actions-runner-libbpf
repo=<owner>/<name>
access_token=<ghp_***>
runner_name=<hostname>
```
Access token should have the repo scope, consult

View File

@@ -24,7 +24,9 @@ RUN apt-get update && apt-get -y install \
rsync \
software-properties-common \
sudo \
tree
tree \
iproute2 \
iputils-ping
# amd64 dependencies.
COPY --from=ld-prefix / /usr/x86_64-linux-gnu/
@@ -33,7 +35,7 @@ RUN ln -fs /etc/resolv.conf /usr/x86_64-linux-gnu/etc/
ENV QEMU_LD_PREFIX=/usr/x86_64-linux-gnu
# amd64 Github Actions Runner.
ARG version=2.285.0
ARG version=2.296.0
RUN useradd -m actions-runner
RUN echo "actions-runner ALL=(ALL) NOPASSWD: ALL" >>/etc/sudoers
RUN echo "Defaults env_keep += \"DEBIAN_FRONTEND\"" >>/etc/sudoers

View File

@@ -7,7 +7,7 @@
#
# - repo=<owner>/<name>
# - access_token=<ghp_***>
#
# - runner_name=<hostname>
set -e -u
@@ -34,6 +34,7 @@ registration_token=$(jq --raw-output .token "$token_file")
--url "https://github.com/$repo" \
--token "$registration_token" \
--labels z15 \
--name "$runner_name" \
--ephemeral
# Run one job.

View File

@@ -1,5 +1,5 @@
# This file is not used and is there for historic purposes only.
# See WHITELIST-5.5.0 instead.
# See ALLOWLIST-5.5.0 instead.
# PERMANENTLY DISABLED
align # verifier output format changed

View File

@@ -1,6 +0,0 @@
# TEMPORARY
get_stack_raw_tp # spams with kernel warnings until next bpf -> bpf-next merge
stacktrace_build_id_nmi
stacktrace_build_id
task_fd_query_rawtp
varlen

View File

@@ -1,68 +1,6 @@
# TEMPORARY until bpf-next -> bpf merge
lru_bug # prog 'printk': failed to auto-attach: -524
# TEMPORARY
atomics # attach(add): actual -524 <= expected 0 (trampoline)
bpf_iter_setsockopt # JIT does not support calling kernel function (kfunc)
bloom_filter_map # failed to find kernel BTF type ID of '__x64_sys_getpgid': -3 (?)
bpf_tcp_ca # JIT does not support calling kernel function (kfunc)
bpf_loop # attaches to __x64_sys_nanosleep
bpf_mod_race # BPF trampoline
bpf_nf # JIT does not support calling kernel function
core_read_macros # unknown func bpf_probe_read#4 (overlapping)
d_path # failed to auto-attach program 'prog_stat': -524 (trampoline)
dummy_st_ops # test_run unexpected error: -524 (errno 524) (trampoline)
fentry_fexit # fentry attach failed: -524 (trampoline)
fentry_test # fentry_first_attach unexpected error: -524 (trampoline)
fexit_bpf2bpf # freplace_attach_trace unexpected error: -524 (trampoline)
fexit_sleep # fexit_skel_load fexit skeleton failed (trampoline)
fexit_stress # fexit attach failed prog 0 failed: -524 (trampoline)
fexit_test # fexit_first_attach unexpected error: -524 (trampoline)
get_func_args_test # trampoline
get_func_ip_test # get_func_ip_test__attach unexpected error: -524 (trampoline)
get_stack_raw_tp # user_stack corrupted user stack (no backchain userspace)
kfree_skb # attach fentry unexpected error: -524 (trampoline)
kfunc_call # 'bpf_prog_active': not found in kernel BTF (?)
ksyms_module # test_ksyms_module__open_and_load unexpected error: -9 (?)
ksyms_module_libbpf # JIT does not support calling kernel function (kfunc)
ksyms_module_lskel # test_ksyms_module_lskel__open_and_load unexpected error: -9 (?)
modify_return # modify_return attach failed: -524 (trampoline)
module_attach # skel_attach skeleton attach failed: -524 (trampoline)
mptcp
kprobe_multi_test # relies on fentry
netcnt # failed to load BPF skeleton 'netcnt_prog': -7 (?)
probe_user # check_kprobe_res wrong kprobe res from probe read (?)
recursion # skel_attach unexpected error: -524 (trampoline)
ringbuf # skel_load skeleton load failed (?)
sk_assign # Can't read on server: Invalid argument (?)
sk_lookup # endianness problem
sk_storage_tracing # test_sk_storage_tracing__attach unexpected error: -524 (trampoline)
skc_to_unix_sock # could not attach BPF object unexpected error: -524 (trampoline)
socket_cookie # prog_attach unexpected error: -524 (trampoline)
stacktrace_build_id # compare_map_keys stackid_hmap vs. stackmap err -2 errno 2 (?)
tailcalls # tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls (?)
task_local_storage # failed to auto-attach program 'trace_exit_creds': -524 (trampoline)
test_bpffs # bpffs test failed 255 (iterator)
test_bprm_opts # failed to auto-attach program 'secure_exec': -524 (trampoline)
test_ima # failed to auto-attach program 'ima': -524 (trampoline)
test_local_storage # failed to auto-attach program 'unlink_hook': -524 (trampoline)
test_lsm # failed to find kernel BTF type ID of '__x64_sys_setdomainname': -3 (?)
test_overhead # attach_fentry unexpected error: -524 (trampoline)
test_profiler # unknown func bpf_probe_read_str#45 (overlapping)
timer # failed to auto-attach program 'test1': -524 (trampoline)
timer_crash # trampoline
timer_mim # failed to auto-attach program 'test1': -524 (trampoline)
trace_ext # failed to auto-attach program 'test_pkt_md_access_new': -524 (trampoline)
trace_printk # trace_printk__load unexpected error: -2 (errno 2) (?)
trace_vprintk # trace_vprintk__open_and_load unexpected error: -9 (?)
trampoline_count # prog 'prog1': failed to attach: ERROR: strerror_r(-524)=22 (trampoline)
verif_stats # trace_vprintk__open_and_load unexpected error: -9 (?)
vmlinux # failed to auto-attach program 'handle__fentry': -524 (trampoline)
xdp_adjust_tail # case-128 err 0 errno 28 retval 1 size 128 expect-size 3520 (?)
xdp_bonding # failed to auto-attach program 'trace_on_entry': -524 (trampoline)
xdp_bpf2bpf # failed to auto-attach program 'trace_on_entry': -524 (trampoline)
map_kptr # failed to open_and_load program: -524 (trampoline)
bpf_cookie # failed to open_and_load program: -524 (trampoline)
xdp_do_redirect # prog_run_max_size unexpected error: -22 (errno 22)
send_signal # intermittently fails to receive signal
select_reuseport # intermittently fails on new s390x setup
xdp_synproxy # JIT does not support calling kernel function (kfunc)
unpriv_bpf_disabled # fentry
lru_bug
usdt/basic # failing verifier due to bounds check after LLVM update
usdt/multispec # same as above

View File

@@ -28,7 +28,7 @@ test_progs() {
fi
}
test_progs_noalu() {
test_progs_no_alu32() {
foldable start test_progs-no_alu32 "Testing test_progs-no_alu32"
./test_progs-no_alu32 ${DENYLIST:+-d$DENYLIST} ${ALLOWLIST:+-a$ALLOWLIST} && true
echo "test_progs-no_alu32:$?" >> "${STATUS_FILE}"
@@ -55,9 +55,20 @@ test_verifier() {
foldable end vm_init
configs_path=${PROJECT_NAME}/vmtest/configs
DENYLIST=$(read_lists "$configs_path/DENYLIST-${KERNEL}" "$configs_path/DENYLIST-${KERNEL}.${ARCH}")
ALLOWLIST=$(read_lists "$configs_path/ALLOWLIST-${KERNEL}" "$configs_path/ALLOWLIST-${KERNEL}.${ARCH}")
configs_path=/${PROJECT_NAME}/selftests/bpf
local_configs_path=${PROJECT_NAME}/vmtest/configs
DENYLIST=$(read_lists \
"$configs_path/DENYLIST" \
"$configs_path/DENYLIST.${ARCH}" \
"$local_configs_path/DENYLIST-${KERNEL}" \
"$local_configs_path/DENYLIST-${KERNEL}.${ARCH}" \
)
ALLOWLIST=$(read_lists \
"$configs_path/ALLOWLIST" \
"$configs_path/ALLOWLIST.${ARCH}" \
"$local_configs_path/ALLOWLIST-${KERNEL}" \
"$local_configs_path/ALLOWLIST-${KERNEL}.${ARCH}" \
)
echo "DENYLIST: ${DENYLIST}"
echo "ALLOWLIST: ${ALLOWLIST}"
@@ -66,7 +77,7 @@ cd ${PROJECT_NAME}/selftests/bpf
if [ $# -eq 0 ]; then
test_progs
test_progs_noalu
test_progs_no_alu32
test_maps
test_verifier
else

View File

@@ -87,10 +87,29 @@ struct bpf_cgroup_storage_key {
__u32 attach_type; /* program attach type (enum bpf_attach_type) */
};
enum bpf_cgroup_iter_order {
BPF_CGROUP_ITER_ORDER_UNSPEC = 0,
BPF_CGROUP_ITER_SELF_ONLY, /* process only a single object. */
BPF_CGROUP_ITER_DESCENDANTS_PRE, /* walk descendants in pre-order. */
BPF_CGROUP_ITER_DESCENDANTS_POST, /* walk descendants in post-order. */
BPF_CGROUP_ITER_ANCESTORS_UP, /* walk ancestors upward. */
};
union bpf_iter_link_info {
struct {
__u32 map_fd;
} map;
struct {
enum bpf_cgroup_iter_order order;
/* At most one of cgroup_fd and cgroup_id can be non-zero. If
* both are zero, the walk starts from the default cgroup v2
* root. For walking v1 hierarchy, one should always explicitly
* specify cgroup_fd.
*/
__u32 cgroup_fd;
__u64 cgroup_id;
} cgroup;
};
/* BPF syscall commands, see bpf(2) man-page for more details. */
@@ -909,6 +928,7 @@ enum bpf_map_type {
BPF_MAP_TYPE_INODE_STORAGE,
BPF_MAP_TYPE_TASK_STORAGE,
BPF_MAP_TYPE_BLOOM_FILTER,
BPF_MAP_TYPE_USER_RINGBUF,
};
/* Note that tracing related programs such as
@@ -1233,7 +1253,7 @@ enum {
/* Query effective (directly attached + inherited from ancestor cgroups)
* programs that will be executed for events within a cgroup.
* attach_flags with this flag are returned only for directly attached programs.
* attach_flags with this flag are always returned 0.
*/
#define BPF_F_QUERY_EFFECTIVE (1U << 0)
@@ -1432,7 +1452,10 @@ union bpf_attr {
__u32 attach_flags;
__aligned_u64 prog_ids;
__u32 prog_cnt;
__aligned_u64 prog_attach_flags; /* output: per-program attach_flags */
/* output: per-program attach_flags.
* not allowed to be set during effective query.
*/
__aligned_u64 prog_attach_flags;
} query;
struct { /* anonymous struct used by BPF_RAW_TRACEPOINT_OPEN command */
@@ -4437,7 +4460,7 @@ union bpf_attr {
*
* **-EEXIST** if the option already exists.
*
* **-EFAULT** on failrue to parse the existing header options.
* **-EFAULT** on failure to parse the existing header options.
*
* **-EPERM** if the helper cannot be used under the current
* *skops*\ **->op**.
@@ -4646,7 +4669,7 @@ union bpf_attr {
* a *map* with *task* as the **key**. From this
* perspective, the usage is not much different from
* **bpf_map_lookup_elem**\ (*map*, **&**\ *task*) except this
* helper enforces the key must be an task_struct and the map must also
* helper enforces the key must be a task_struct and the map must also
* be a **BPF_MAP_TYPE_TASK_STORAGE**.
*
* Underneath, the value is stored locally at *task* instead of
@@ -4704,7 +4727,7 @@ union bpf_attr {
*
* long bpf_ima_inode_hash(struct inode *inode, void *dst, u32 size)
* Description
* Returns the stored IMA hash of the *inode* (if it's avaialable).
* Returns the stored IMA hash of the *inode* (if it's available).
* If the hash is larger than *size*, then only *size*
* bytes will be copied to *dst*
* Return
@@ -4728,12 +4751,12 @@ union bpf_attr {
*
* The argument *len_diff* can be used for querying with a planned
* size change. This allows to check MTU prior to changing packet
* ctx. Providing an *len_diff* adjustment that is larger than the
* ctx. Providing a *len_diff* adjustment that is larger than the
* actual packet size (resulting in negative packet size) will in
* principle not exceed the MTU, why it is not considered a
* failure. Other BPF-helpers are needed for performing the
* planned size change, why the responsability for catch a negative
* packet size belong in those helpers.
* principle not exceed the MTU, which is why it is not considered
* a failure. Other BPF helpers are needed for performing the
* planned size change; therefore the responsibility for catching
* a negative packet size belongs in those helpers.
*
* Specifying *ifindex* zero means the MTU check is performed
* against the current net device. This is practical if this isn't
@@ -4931,6 +4954,7 @@ union bpf_attr {
* Get address of the traced function (for tracing and kprobe programs).
* Return
* Address of the traced function.
* 0 for kprobes placed within the function (not at the entry).
*
* u64 bpf_get_attach_cookie(void *ctx)
* Description
@@ -5060,12 +5084,12 @@ union bpf_attr {
*
* long bpf_get_func_arg(void *ctx, u32 n, u64 *value)
* Description
* Get **n**-th argument (zero based) of the traced function (for tracing programs)
* Get **n**-th argument register (zero based) of the traced function (for tracing programs)
* returned in **value**.
*
* Return
* 0 on success.
* **-EINVAL** if n >= arguments count of traced function.
* **-EINVAL** if n >= argument register count of traced function.
*
* long bpf_get_func_ret(void *ctx, u64 *value)
* Description
@@ -5078,24 +5102,37 @@ union bpf_attr {
*
* long bpf_get_func_arg_cnt(void *ctx)
* Description
* Get number of arguments of the traced function (for tracing programs).
* Get number of registers of the traced function (for tracing programs) where
* function arguments are stored in these registers.
*
* Return
* The number of arguments of the traced function.
* The number of argument registers of the traced function.
*
* int bpf_get_retval(void)
* Description
* Get the syscall's return value that will be returned to userspace.
* Get the BPF program's return value that will be returned to the upper layers.
*
* This helper is currently supported by cgroup programs only.
* This helper is currently supported by cgroup programs and only by the hooks
* where BPF program's return value is returned to the userspace via errno.
* Return
* The syscall's return value.
* The BPF program's return value.
*
* int bpf_set_retval(int retval)
* Description
* Set the syscall's return value that will be returned to userspace.
* Set the BPF program's return value that will be returned to the upper layers.
*
* This helper is currently supported by cgroup programs and only by the hooks
* where BPF program's return value is returned to the userspace via errno.
*
* Note that there is the following corner case where the program exports an error
* via bpf_set_retval but signals success via 'return 1':
*
* bpf_set_retval(-EPERM);
* return 1;
*
* In this case, the BPF program's return value will use helper's -EPERM. This
* still holds true for cgroup/bind{4,6} which supports extra 'return 3' success case.
*
* This helper is currently supported by cgroup programs only.
* Return
* 0 on success, or a negative error in case of failure.
*
@@ -5355,6 +5392,43 @@ union bpf_attr {
* Return
* Current *ktime*.
*
* long bpf_user_ringbuf_drain(struct bpf_map *map, void *callback_fn, void *ctx, u64 flags)
* Description
* Drain samples from the specified user ring buffer, and invoke
* the provided callback for each such sample:
*
* long (\*callback_fn)(struct bpf_dynptr \*dynptr, void \*ctx);
*
* If **callback_fn** returns 0, the helper will continue to try
* and drain the next sample, up to a maximum of
* BPF_MAX_USER_RINGBUF_SAMPLES samples. If the return value is 1,
* the helper will skip the rest of the samples and return. Other
* return values are not used now, and will be rejected by the
* verifier.
* Return
* The number of drained samples if no error was encountered while
* draining samples, or 0 if no samples were present in the ring
* buffer. If a user-space producer was epoll-waiting on this map,
* and at least one sample was drained, they will receive an event
* notification notifying them of available space in the ring
* buffer. If the BPF_RB_NO_WAKEUP flag is passed to this
* function, no wakeup notification will be sent. If the
* BPF_RB_FORCE_WAKEUP flag is passed, a wakeup notification will
* be sent even if no sample was drained.
*
* On failure, the returned value is one of the following:
*
* **-EBUSY** if the ring buffer is contended, and another calling
* context was concurrently draining the ring buffer.
*
* **-EINVAL** if user-space is not properly tracking the ring
* buffer due to the producer position not being aligned to 8
* bytes, a sample not being aligned to 8 bytes, or the producer
* position not matching the advertised length of a sample.
*
* **-E2BIG** if user-space has tried to publish a sample which is
* larger than the size of the ring buffer, or which cannot fit
* within a struct bpf_dynptr.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -5566,6 +5640,7 @@ union bpf_attr {
FN(tcp_raw_check_syncookie_ipv4), \
FN(tcp_raw_check_syncookie_ipv6), \
FN(ktime_get_tai_ns), \
FN(user_ringbuf_drain), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -5628,6 +5703,11 @@ enum {
BPF_F_SEQ_NUMBER = (1ULL << 3),
};
/* BPF_FUNC_skb_get_tunnel_key flags. */
enum {
BPF_F_TUNINFO_FLAGS = (1ULL << 4),
};
/* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and
* BPF_FUNC_perf_event_read_value flags.
*/
@@ -5817,7 +5897,10 @@ struct bpf_tunnel_key {
};
__u8 tunnel_tos;
__u8 tunnel_ttl;
__u16 tunnel_ext; /* Padding, future use. */
union {
__u16 tunnel_ext; /* compat */
__be16 tunnel_flags;
};
__u32 tunnel_label;
union {
__u32 local_ipv4;
@@ -5861,6 +5944,11 @@ enum bpf_ret_code {
* represented by BPF_REDIRECT above).
*/
BPF_LWT_REROUTE = 128,
/* BPF_FLOW_DISSECTOR_CONTINUE: used by BPF_PROG_TYPE_FLOW_DISSECTOR
* to indicate that no custom dissection was performed, and
* fallback to standard dissector is requested.
*/
BPF_FLOW_DISSECTOR_CONTINUE = 129,
};
struct bpf_sock {
@@ -6159,11 +6247,22 @@ struct bpf_link_info {
struct {
__aligned_u64 target_name; /* in/out: target_name buffer ptr */
__u32 target_name_len; /* in/out: target_name buffer len */
/* If the iter specific field is 32 bits, it can be put
* in the first or second union. Otherwise it should be
* put in the second union.
*/
union {
struct {
__u32 map_id;
} map;
};
union {
struct {
__u64 cgroup_id;
__u32 order;
} cgroup;
};
} iter;
struct {
__u32 netns_ino;

View File

@@ -9,7 +9,7 @@ else
endif
LIBBPF_MAJOR_VERSION := 1
LIBBPF_MINOR_VERSION := 0
LIBBPF_MINOR_VERSION := 1
LIBBPF_PATCH_VERSION := 0
LIBBPF_VERSION := $(LIBBPF_MAJOR_VERSION).$(LIBBPF_MINOR_VERSION).$(LIBBPF_PATCH_VERSION)
LIBBPF_MAJMIN_VERSION := $(LIBBPF_MAJOR_VERSION).$(LIBBPF_MINOR_VERSION).0

View File

@@ -3426,7 +3426,7 @@ static long (*bpf_load_hdr_opt)(struct bpf_sock_ops *skops, void *searchby_res,
*
* **-EEXIST** if the option already exists.
*
* **-EFAULT** on failrue to parse the existing header options.
* **-EFAULT** on failure to parse the existing header options.
*
* **-EPERM** if the helper cannot be used under the current
* *skops*\ **->op**.
@@ -3686,7 +3686,7 @@ static long (*bpf_redirect_peer)(__u32 ifindex, __u64 flags) = (void *) 155;
* a *map* with *task* as the **key**. From this
* perspective, the usage is not much different from
* **bpf_map_lookup_elem**\ (*map*, **&**\ *task*) except this
* helper enforces the key must be an task_struct and the map must also
* helper enforces the key must be a task_struct and the map must also
* be a **BPF_MAP_TYPE_TASK_STORAGE**.
*
* Underneath, the value is stored locally at *task* instead of
@@ -3764,7 +3764,7 @@ static __u64 (*bpf_ktime_get_coarse_ns)(void) = (void *) 160;
/*
* bpf_ima_inode_hash
*
* Returns the stored IMA hash of the *inode* (if it's avaialable).
* Returns the stored IMA hash of the *inode* (if it's available).
* If the hash is larger than *size*, then only *size*
* bytes will be copied to *dst*
*
@@ -3796,12 +3796,12 @@ static struct socket *(*bpf_sock_from_file)(struct file *file) = (void *) 162;
*
* The argument *len_diff* can be used for querying with a planned
* size change. This allows to check MTU prior to changing packet
* ctx. Providing an *len_diff* adjustment that is larger than the
* ctx. Providing a *len_diff* adjustment that is larger than the
* actual packet size (resulting in negative packet size) will in
* principle not exceed the MTU, why it is not considered a
* failure. Other BPF-helpers are needed for performing the
* planned size change, why the responsability for catch a negative
* packet size belong in those helpers.
* principle not exceed the MTU, which is why it is not considered
* a failure. Other BPF helpers are needed for performing the
* planned size change; therefore the responsibility for catching
* a negative packet size belongs in those helpers.
*
* Specifying *ifindex* zero means the MTU check is performed
* against the current net device. This is practical if this isn't
@@ -4040,6 +4040,7 @@ static long (*bpf_timer_cancel)(struct bpf_timer *timer) = (void *) 172;
*
* Returns
* Address of the traced function.
* 0 for kprobes placed within the function (not at the entry).
*/
static __u64 (*bpf_get_func_ip)(void *ctx) = (void *) 173;
@@ -4208,13 +4209,13 @@ static long (*bpf_strncmp)(const char *s1, __u32 s1_sz, const char *s2) = (void
/*
* bpf_get_func_arg
*
* Get **n**-th argument (zero based) of the traced function (for tracing programs)
* Get **n**-th argument register (zero based) of the traced function (for tracing programs)
* returned in **value**.
*
*
* Returns
* 0 on success.
* **-EINVAL** if n >= arguments count of traced function.
* **-EINVAL** if n >= argument register count of traced function.
*/
static long (*bpf_get_func_arg)(void *ctx, __u32 n, __u64 *value) = (void *) 183;
@@ -4234,32 +4235,45 @@ static long (*bpf_get_func_ret)(void *ctx, __u64 *value) = (void *) 184;
/*
* bpf_get_func_arg_cnt
*
* Get number of arguments of the traced function (for tracing programs).
* Get number of registers of the traced function (for tracing programs) where
* function arguments are stored in these registers.
*
*
* Returns
* The number of arguments of the traced function.
* The number of argument registers of the traced function.
*/
static long (*bpf_get_func_arg_cnt)(void *ctx) = (void *) 185;
/*
* bpf_get_retval
*
* Get the syscall's return value that will be returned to userspace.
* Get the BPF program's return value that will be returned to the upper layers.
*
* This helper is currently supported by cgroup programs only.
* This helper is currently supported by cgroup programs and only by the hooks
* where BPF program's return value is returned to the userspace via errno.
*
* Returns
* The syscall's return value.
* The BPF program's return value.
*/
static int (*bpf_get_retval)(void) = (void *) 186;
/*
* bpf_set_retval
*
* Set the syscall's return value that will be returned to userspace.
* Set the BPF program's return value that will be returned to the upper layers.
*
* This helper is currently supported by cgroup programs and only by the hooks
* where BPF program's return value is returned to the userspace via errno.
*
* Note that there is the following corner case where the program exports an error
* via bpf_set_retval but signals success via 'return 1':
*
* bpf_set_retval(-EPERM);
* return 1;
*
* In this case, the BPF program's return value will use helper's -EPERM. This
* still holds true for cgroup/bind{4,6} which supports extra 'return 3' success case.
*
* This helper is currently supported by cgroup programs only.
*
* Returns
* 0 on success, or a negative error in case of failure.
@@ -4606,4 +4620,46 @@ static long (*bpf_tcp_raw_check_syncookie_ipv6)(struct ipv6hdr *iph, struct tcph
*/
static __u64 (*bpf_ktime_get_tai_ns)(void) = (void *) 208;
/*
* bpf_user_ringbuf_drain
*
* Drain samples from the specified user ring buffer, and invoke
* the provided callback for each such sample:
*
* long (\*callback_fn)(struct bpf_dynptr \*dynptr, void \*ctx);
*
* If **callback_fn** returns 0, the helper will continue to try
* and drain the next sample, up to a maximum of
* BPF_MAX_USER_RINGBUF_SAMPLES samples. If the return value is 1,
* the helper will skip the rest of the samples and return. Other
* return values are not used now, and will be rejected by the
* verifier.
*
* Returns
* The number of drained samples if no error was encountered while
* draining samples, or 0 if no samples were present in the ring
* buffer. If a user-space producer was epoll-waiting on this map,
* and at least one sample was drained, they will receive an event
* notification notifying them of available space in the ring
* buffer. If the BPF_RB_NO_WAKEUP flag is passed to this
* function, no wakeup notification will be sent. If the
* BPF_RB_FORCE_WAKEUP flag is passed, a wakeup notification will
* be sent even if no sample was drained.
*
* On failure, the returned value is one of the following:
*
* **-EBUSY** if the ring buffer is contended, and another calling
* context was concurrently draining the ring buffer.
*
* **-EINVAL** if user-space is not properly tracking the ring
* buffer due to the producer position not being aligned to 8
* bytes, a sample not being aligned to 8 bytes, or the producer
* position not matching the advertised length of a sample.
*
* **-E2BIG** if user-space has tried to publish a sample which is
* larger than the size of the ring buffer, or which cannot fit
* within a struct bpf_dynptr.
*/
static long (*bpf_user_ringbuf_drain)(void *map, void *callback_fn, void *ctx, __u64 flags) = (void *) 209;

View File

@@ -160,18 +160,6 @@ bpf_tail_call_static(void *ctx, const void *map, const __u32 slot)
}
#endif
/*
* Helper structure used by eBPF C program
* to describe BPF map attributes to libbpf loader
*/
struct bpf_map_def {
unsigned int type;
unsigned int key_size;
unsigned int value_size;
unsigned int max_entries;
unsigned int map_flags;
} __attribute__((deprecated("use BTF-defined maps in .maps section")));
enum libbpf_pin_type {
LIBBPF_PIN_NONE,
/* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */

View File

@@ -438,6 +438,113 @@ typeof(name(0)) name(unsigned long long *ctx) \
static __always_inline typeof(name(0)) \
____##name(unsigned long long *ctx, ##args)
#ifndef ___bpf_nth2
#define ___bpf_nth2(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, \
_14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, N, ...) N
#endif
#ifndef ___bpf_narg2
#define ___bpf_narg2(...) \
___bpf_nth2(_, ##__VA_ARGS__, 12, 12, 11, 11, 10, 10, 9, 9, 8, 8, 7, 7, \
6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0)
#endif
#define ___bpf_treg_cnt(t) \
__builtin_choose_expr(sizeof(t) == 1, 1, \
__builtin_choose_expr(sizeof(t) == 2, 1, \
__builtin_choose_expr(sizeof(t) == 4, 1, \
__builtin_choose_expr(sizeof(t) == 8, 1, \
__builtin_choose_expr(sizeof(t) == 16, 2, \
(void)0)))))
#define ___bpf_reg_cnt0() (0)
#define ___bpf_reg_cnt1(t, x) (___bpf_reg_cnt0() + ___bpf_treg_cnt(t))
#define ___bpf_reg_cnt2(t, x, args...) (___bpf_reg_cnt1(args) + ___bpf_treg_cnt(t))
#define ___bpf_reg_cnt3(t, x, args...) (___bpf_reg_cnt2(args) + ___bpf_treg_cnt(t))
#define ___bpf_reg_cnt4(t, x, args...) (___bpf_reg_cnt3(args) + ___bpf_treg_cnt(t))
#define ___bpf_reg_cnt5(t, x, args...) (___bpf_reg_cnt4(args) + ___bpf_treg_cnt(t))
#define ___bpf_reg_cnt6(t, x, args...) (___bpf_reg_cnt5(args) + ___bpf_treg_cnt(t))
#define ___bpf_reg_cnt7(t, x, args...) (___bpf_reg_cnt6(args) + ___bpf_treg_cnt(t))
#define ___bpf_reg_cnt8(t, x, args...) (___bpf_reg_cnt7(args) + ___bpf_treg_cnt(t))
#define ___bpf_reg_cnt9(t, x, args...) (___bpf_reg_cnt8(args) + ___bpf_treg_cnt(t))
#define ___bpf_reg_cnt10(t, x, args...) (___bpf_reg_cnt9(args) + ___bpf_treg_cnt(t))
#define ___bpf_reg_cnt11(t, x, args...) (___bpf_reg_cnt10(args) + ___bpf_treg_cnt(t))
#define ___bpf_reg_cnt12(t, x, args...) (___bpf_reg_cnt11(args) + ___bpf_treg_cnt(t))
#define ___bpf_reg_cnt(args...) ___bpf_apply(___bpf_reg_cnt, ___bpf_narg2(args))(args)
#define ___bpf_union_arg(t, x, n) \
__builtin_choose_expr(sizeof(t) == 1, ({ union { __u8 z[1]; t x; } ___t = { .z = {ctx[n]}}; ___t.x; }), \
__builtin_choose_expr(sizeof(t) == 2, ({ union { __u16 z[1]; t x; } ___t = { .z = {ctx[n]} }; ___t.x; }), \
__builtin_choose_expr(sizeof(t) == 4, ({ union { __u32 z[1]; t x; } ___t = { .z = {ctx[n]} }; ___t.x; }), \
__builtin_choose_expr(sizeof(t) == 8, ({ union { __u64 z[1]; t x; } ___t = {.z = {ctx[n]} }; ___t.x; }), \
__builtin_choose_expr(sizeof(t) == 16, ({ union { __u64 z[2]; t x; } ___t = {.z = {ctx[n], ctx[n + 1]} }; ___t.x; }), \
(void)0)))))
#define ___bpf_ctx_arg0(n, args...)
#define ___bpf_ctx_arg1(n, t, x) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt1(t, x))
#define ___bpf_ctx_arg2(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt2(t, x, args)) ___bpf_ctx_arg1(n, args)
#define ___bpf_ctx_arg3(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt3(t, x, args)) ___bpf_ctx_arg2(n, args)
#define ___bpf_ctx_arg4(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt4(t, x, args)) ___bpf_ctx_arg3(n, args)
#define ___bpf_ctx_arg5(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt5(t, x, args)) ___bpf_ctx_arg4(n, args)
#define ___bpf_ctx_arg6(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt6(t, x, args)) ___bpf_ctx_arg5(n, args)
#define ___bpf_ctx_arg7(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt7(t, x, args)) ___bpf_ctx_arg6(n, args)
#define ___bpf_ctx_arg8(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt8(t, x, args)) ___bpf_ctx_arg7(n, args)
#define ___bpf_ctx_arg9(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt9(t, x, args)) ___bpf_ctx_arg8(n, args)
#define ___bpf_ctx_arg10(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt10(t, x, args)) ___bpf_ctx_arg9(n, args)
#define ___bpf_ctx_arg11(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt11(t, x, args)) ___bpf_ctx_arg10(n, args)
#define ___bpf_ctx_arg12(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt12(t, x, args)) ___bpf_ctx_arg11(n, args)
#define ___bpf_ctx_arg(args...) ___bpf_apply(___bpf_ctx_arg, ___bpf_narg2(args))(___bpf_reg_cnt(args), args)
#define ___bpf_ctx_decl0()
#define ___bpf_ctx_decl1(t, x) , t x
#define ___bpf_ctx_decl2(t, x, args...) , t x ___bpf_ctx_decl1(args)
#define ___bpf_ctx_decl3(t, x, args...) , t x ___bpf_ctx_decl2(args)
#define ___bpf_ctx_decl4(t, x, args...) , t x ___bpf_ctx_decl3(args)
#define ___bpf_ctx_decl5(t, x, args...) , t x ___bpf_ctx_decl4(args)
#define ___bpf_ctx_decl6(t, x, args...) , t x ___bpf_ctx_decl5(args)
#define ___bpf_ctx_decl7(t, x, args...) , t x ___bpf_ctx_decl6(args)
#define ___bpf_ctx_decl8(t, x, args...) , t x ___bpf_ctx_decl7(args)
#define ___bpf_ctx_decl9(t, x, args...) , t x ___bpf_ctx_decl8(args)
#define ___bpf_ctx_decl10(t, x, args...) , t x ___bpf_ctx_decl9(args)
#define ___bpf_ctx_decl11(t, x, args...) , t x ___bpf_ctx_decl10(args)
#define ___bpf_ctx_decl12(t, x, args...) , t x ___bpf_ctx_decl11(args)
#define ___bpf_ctx_decl(args...) ___bpf_apply(___bpf_ctx_decl, ___bpf_narg2(args))(args)
/*
* BPF_PROG2 is an enhanced version of BPF_PROG in order to handle struct
* arguments. Since each struct argument might take one or two u64 values
* in the trampoline stack, argument type size is needed to place proper number
* of u64 values for each argument. Therefore, BPF_PROG2 has different
* syntax from BPF_PROG. For example, for the following BPF_PROG syntax:
*
* int BPF_PROG(test2, int a, int b) { ... }
*
* the corresponding BPF_PROG2 syntax is:
*
* int BPF_PROG2(test2, int, a, int, b) { ... }
*
* where type and the corresponding argument name are separated by comma.
*
* Use BPF_PROG2 macro if one of the arguments might be a struct/union larger
* than 8 bytes:
*
* int BPF_PROG2(test_struct_arg, struct bpf_testmod_struct_arg_1, a, int, b,
* int, c, int, d, struct bpf_testmod_struct_arg_2, e, int, ret)
* {
* // access a, b, c, d, e, and ret directly
* ...
* }
*/
#define BPF_PROG2(name, args...) \
name(unsigned long long *ctx); \
static __always_inline typeof(name(0)) \
____##name(unsigned long long *ctx ___bpf_ctx_decl(args)); \
typeof(name(0)) name(unsigned long long *ctx) \
{ \
return ____##name(ctx ___bpf_ctx_arg(args)); \
} \
static __always_inline typeof(name(0)) \
____##name(unsigned long long *ctx ___bpf_ctx_decl(args))
struct pt_regs;
#define ___bpf_kprobe_args0() ctx

View File

@@ -4642,20 +4642,17 @@ static int btf_dedup_remap_types(struct btf_dedup *d)
*/
struct btf *btf__load_vmlinux_btf(void)
{
struct {
const char *path_fmt;
bool raw_btf;
} locations[] = {
const char *locations[] = {
/* try canonical vmlinux BTF through sysfs first */
{ "/sys/kernel/btf/vmlinux", true /* raw BTF */ },
/* fall back to trying to find vmlinux ELF on disk otherwise */
{ "/boot/vmlinux-%1$s" },
{ "/lib/modules/%1$s/vmlinux-%1$s" },
{ "/lib/modules/%1$s/build/vmlinux" },
{ "/usr/lib/modules/%1$s/kernel/vmlinux" },
{ "/usr/lib/debug/boot/vmlinux-%1$s" },
{ "/usr/lib/debug/boot/vmlinux-%1$s.debug" },
{ "/usr/lib/debug/lib/modules/%1$s/vmlinux" },
"/sys/kernel/btf/vmlinux",
/* fall back to trying to find vmlinux on disk otherwise */
"/boot/vmlinux-%1$s",
"/lib/modules/%1$s/vmlinux-%1$s",
"/lib/modules/%1$s/build/vmlinux",
"/usr/lib/modules/%1$s/kernel/vmlinux",
"/usr/lib/debug/boot/vmlinux-%1$s",
"/usr/lib/debug/boot/vmlinux-%1$s.debug",
"/usr/lib/debug/lib/modules/%1$s/vmlinux",
};
char path[PATH_MAX + 1];
struct utsname buf;
@@ -4665,15 +4662,12 @@ struct btf *btf__load_vmlinux_btf(void)
uname(&buf);
for (i = 0; i < ARRAY_SIZE(locations); i++) {
snprintf(path, PATH_MAX, locations[i].path_fmt, buf.release);
snprintf(path, PATH_MAX, locations[i], buf.release);
if (access(path, R_OK))
if (faccessat(AT_FDCWD, path, R_OK, AT_EACCESS))
continue;
if (locations[i].raw_btf)
btf = btf__parse_raw(path);
else
btf = btf__parse_elf(path, NULL);
btf = btf__parse(path, NULL);
err = libbpf_get_error(btf);
pr_debug("loading kernel BTF '%s': %d\n", path, err);
if (err)

View File

@@ -486,6 +486,8 @@ static inline struct btf_enum *btf_enum(const struct btf_type *t)
return (struct btf_enum *)(t + 1);
}
struct btf_enum64;
static inline struct btf_enum64 *btf_enum64(const struct btf_type *t)
{
return (struct btf_enum64 *)(t + 1);
@@ -493,7 +495,28 @@ static inline struct btf_enum64 *btf_enum64(const struct btf_type *t)
static inline __u64 btf_enum64_value(const struct btf_enum64 *e)
{
return ((__u64)e->val_hi32 << 32) | e->val_lo32;
/* struct btf_enum64 is introduced in Linux 6.0, which is very
* bleeding-edge. Here we are avoiding relying on struct btf_enum64
* definition coming from kernel UAPI headers to support wider range
* of system-wide kernel headers.
*
* Given this header can be also included from C++ applications, that
* further restricts C tricks we can use (like using compatible
* anonymous struct). So just treat struct btf_enum64 as
* a three-element array of u32 and access second (lo32) and third
* (hi32) elements directly.
*
* For reference, here is a struct btf_enum64 definition:
*
* const struct btf_enum64 {
* __u32 name_off;
* __u32 val_lo32;
* __u32 val_hi32;
* };
*/
const __u32 *e64 = (const __u32 *)e;
return ((__u64)e64[2] << 32) | e64[1];
}
static inline struct btf_member *btf_members(const struct btf_type *t)

View File

@@ -2385,7 +2385,7 @@ int btf_dump__dump_type_data(struct btf_dump *d, __u32 id,
d->typed_dump->indent_lvl = OPTS_GET(opts, indent_level, 0);
/* default indent string is a tab */
if (!opts->indent_str)
if (!OPTS_GET(opts, indent_str, NULL))
d->typed_dump->indent_str[0] = '\t';
else
libbpf_strlcpy(d->typed_dump->indent_str, opts->indent_str,

View File

@@ -163,6 +163,7 @@ static const char * const map_type_name[] = {
[BPF_MAP_TYPE_INODE_STORAGE] = "inode_storage",
[BPF_MAP_TYPE_TASK_STORAGE] = "task_storage",
[BPF_MAP_TYPE_BLOOM_FILTER] = "bloom_filter",
[BPF_MAP_TYPE_USER_RINGBUF] = "user_ringbuf",
};
static const char * const prog_type_name[] = {
@@ -883,7 +884,7 @@ __u32 get_kernel_version(void)
__u32 major, minor, patch;
struct utsname info;
if (access(ubuntu_kver_file, R_OK) == 0) {
if (faccessat(AT_FDCWD, ubuntu_kver_file, R_OK, AT_EACCESS) == 0) {
FILE *f;
f = fopen(ubuntu_kver_file, "r");
@@ -2096,19 +2097,30 @@ static bool get_map_field_int(const char *map_name, const struct btf *btf,
return true;
}
static int pathname_concat(char *buf, size_t buf_sz, const char *path, const char *name)
{
int len;
len = snprintf(buf, buf_sz, "%s/%s", path, name);
if (len < 0)
return -EINVAL;
if (len >= buf_sz)
return -ENAMETOOLONG;
return 0;
}
static int build_map_pin_path(struct bpf_map *map, const char *path)
{
char buf[PATH_MAX];
int len;
int err;
if (!path)
path = "/sys/fs/bpf";
len = snprintf(buf, PATH_MAX, "%s/%s", path, bpf_map__name(map));
if (len < 0)
return -EINVAL;
else if (len >= PATH_MAX)
return -ENAMETOOLONG;
err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map));
if (err)
return err;
return bpf_map__set_pin_path(map, buf);
}
@@ -2372,6 +2384,12 @@ static size_t adjust_ringbuf_sz(size_t sz)
return sz;
}
static bool map_is_ringbuf(const struct bpf_map *map)
{
return map->def.type == BPF_MAP_TYPE_RINGBUF ||
map->def.type == BPF_MAP_TYPE_USER_RINGBUF;
}
static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def)
{
map->def.type = def->map_type;
@@ -2386,7 +2404,7 @@ static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def
map->btf_value_type_id = def->value_type_id;
/* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */
if (map->def.type == BPF_MAP_TYPE_RINGBUF)
if (map_is_ringbuf(map))
map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries);
if (def->parts & MAP_DEF_MAP_TYPE)
@@ -4369,7 +4387,7 @@ int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
map->def.max_entries = max_entries;
/* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */
if (map->def.type == BPF_MAP_TYPE_RINGBUF)
if (map_is_ringbuf(map))
map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries);
return 0;
@@ -7961,17 +7979,9 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
continue;
if (path) {
int len;
len = snprintf(buf, PATH_MAX, "%s/%s", path,
bpf_map__name(map));
if (len < 0) {
err = -EINVAL;
err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map));
if (err)
goto err_unpin_maps;
} else if (len >= PATH_MAX) {
err = -ENAMETOOLONG;
goto err_unpin_maps;
}
sanitize_pin_path(buf);
pin_path = buf;
} else if (!map->pin_path) {
@@ -8009,14 +8019,9 @@ int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
char buf[PATH_MAX];
if (path) {
int len;
len = snprintf(buf, PATH_MAX, "%s/%s", path,
bpf_map__name(map));
if (len < 0)
return libbpf_err(-EINVAL);
else if (len >= PATH_MAX)
return libbpf_err(-ENAMETOOLONG);
err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map));
if (err)
return libbpf_err(err);
sanitize_pin_path(buf);
pin_path = buf;
} else if (!map->pin_path) {
@@ -8034,6 +8039,7 @@ int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
{
struct bpf_program *prog;
char buf[PATH_MAX];
int err;
if (!obj)
@@ -8045,17 +8051,9 @@ int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
}
bpf_object__for_each_program(prog, obj) {
char buf[PATH_MAX];
int len;
len = snprintf(buf, PATH_MAX, "%s/%s", path, prog->name);
if (len < 0) {
err = -EINVAL;
err = pathname_concat(buf, sizeof(buf), path, prog->name);
if (err)
goto err_unpin_programs;
} else if (len >= PATH_MAX) {
err = -ENAMETOOLONG;
goto err_unpin_programs;
}
err = bpf_program__pin(prog, buf);
if (err)
@@ -8066,13 +8064,7 @@ int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
err_unpin_programs:
while ((prog = bpf_object__prev_program(obj, prog))) {
char buf[PATH_MAX];
int len;
len = snprintf(buf, PATH_MAX, "%s/%s", path, prog->name);
if (len < 0)
continue;
else if (len >= PATH_MAX)
if (pathname_concat(buf, sizeof(buf), path, prog->name))
continue;
bpf_program__unpin(prog, buf);
@@ -8091,13 +8083,10 @@ int bpf_object__unpin_programs(struct bpf_object *obj, const char *path)
bpf_object__for_each_program(prog, obj) {
char buf[PATH_MAX];
int len;
len = snprintf(buf, PATH_MAX, "%s/%s", path, prog->name);
if (len < 0)
return libbpf_err(-EINVAL);
else if (len >= PATH_MAX)
return libbpf_err(-ENAMETOOLONG);
err = pathname_concat(buf, sizeof(buf), path, prog->name);
if (err)
return libbpf_err(err);
err = bpf_program__unpin(prog, buf);
if (err)
@@ -9084,11 +9073,15 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attac
int err = 0;
/* BPF program's BTF ID */
if (attach_prog_fd) {
if (prog->type == BPF_PROG_TYPE_EXT || attach_prog_fd) {
if (!attach_prog_fd) {
pr_warn("prog '%s': attach program FD is not set\n", prog->name);
return -EINVAL;
}
err = libbpf_find_prog_btf_id(attach_name, attach_prog_fd);
if (err < 0) {
pr_warn("failed to find BPF program (FD %d) BTF ID for '%s': %d\n",
attach_prog_fd, attach_name, err);
pr_warn("prog '%s': failed to find BPF program (FD %d) BTF ID for '%s': %d\n",
prog->name, attach_prog_fd, attach_name, err);
return err;
}
*btf_obj_fd = 0;
@@ -9105,7 +9098,8 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attac
err = find_kernel_btf_id(prog->obj, attach_name, attach_type, btf_obj_fd, btf_type_id);
}
if (err) {
pr_warn("failed to find kernel BTF type ID of '%s': %d\n", attach_name, err);
pr_warn("prog '%s': failed to find kernel BTF type ID of '%s': %d\n",
prog->name, attach_name, err);
return err;
}
return 0;
@@ -9910,7 +9904,7 @@ static bool use_debugfs(void)
static int has_debugfs = -1;
if (has_debugfs < 0)
has_debugfs = access(DEBUGFS, F_OK) == 0;
has_debugfs = faccessat(AT_FDCWD, DEBUGFS, F_OK, AT_EACCESS) == 0;
return has_debugfs == 1;
}
@@ -10727,7 +10721,7 @@ static int resolve_full_path(const char *file, char *result, size_t result_sz)
continue;
snprintf(result, result_sz, "%.*s/%s", seg_len, s, file);
/* ensure it has required permissions */
if (access(result, perm) < 0)
if (faccessat(AT_FDCWD, result, perm, AT_EACCESS) < 0)
continue;
pr_debug("resolved '%s' to '%s'\n", file, result);
return 0;

View File

@@ -118,7 +118,9 @@ struct bpf_object_open_opts {
* auto-pinned to that path on load; defaults to "/sys/fs/bpf".
*/
const char *pin_root_path;
long :0;
__u32 :32; /* stub out now removed attach_prog_fd */
/* Additional kernel config content that augments and overrides
* system Kconfig for CONFIG_xxx externs.
*/
@@ -1011,6 +1013,7 @@ LIBBPF_API int bpf_tc_query(const struct bpf_tc_hook *hook,
/* Ring buffer APIs */
struct ring_buffer;
struct user_ring_buffer;
typedef int (*ring_buffer_sample_fn)(void *ctx, void *data, size_t size);
@@ -1030,6 +1033,112 @@ LIBBPF_API int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms);
LIBBPF_API int ring_buffer__consume(struct ring_buffer *rb);
LIBBPF_API int ring_buffer__epoll_fd(const struct ring_buffer *rb);
struct user_ring_buffer_opts {
size_t sz; /* size of this struct, for forward/backward compatibility */
};
#define user_ring_buffer_opts__last_field sz
/* @brief **user_ring_buffer__new()** creates a new instance of a user ring
* buffer.
*
* @param map_fd A file descriptor to a BPF_MAP_TYPE_USER_RINGBUF map.
* @param opts Options for how the ring buffer should be created.
* @return A user ring buffer on success; NULL and errno being set on a
* failure.
*/
LIBBPF_API struct user_ring_buffer *
user_ring_buffer__new(int map_fd, const struct user_ring_buffer_opts *opts);
/* @brief **user_ring_buffer__reserve()** reserves a pointer to a sample in the
* user ring buffer.
* @param rb A pointer to a user ring buffer.
* @param size The size of the sample, in bytes.
* @return A pointer to an 8-byte aligned reserved region of the user ring
* buffer; NULL, and errno being set if a sample could not be reserved.
*
* This function is *not* thread safe, and callers must synchronize accessing
* this function if there are multiple producers. If a size is requested that
* is larger than the size of the entire ring buffer, errno will be set to
* E2BIG and NULL is returned. If the ring buffer could accommodate the size,
* but currently does not have enough space, errno is set to ENOSPC and NULL is
* returned.
*
* After initializing the sample, callers must invoke
* **user_ring_buffer__submit()** to post the sample to the kernel. Otherwise,
* the sample must be freed with **user_ring_buffer__discard()**.
*/
LIBBPF_API void *user_ring_buffer__reserve(struct user_ring_buffer *rb, __u32 size);
/* @brief **user_ring_buffer__reserve_blocking()** reserves a record in the
* ring buffer, possibly blocking for up to @timeout_ms until a sample becomes
* available.
* @param rb The user ring buffer.
* @param size The size of the sample, in bytes.
* @param timeout_ms The amount of time, in milliseconds, for which the caller
* should block when waiting for a sample. -1 causes the caller to block
* indefinitely.
* @return A pointer to an 8-byte aligned reserved region of the user ring
* buffer; NULL, and errno being set if a sample could not be reserved.
*
* This function is *not* thread safe, and callers must synchronize
* accessing this function if there are multiple producers
*
* If **timeout_ms** is -1, the function will block indefinitely until a sample
* becomes available. Otherwise, **timeout_ms** must be non-negative, or errno
* is set to EINVAL, and NULL is returned. If **timeout_ms** is 0, no blocking
* will occur and the function will return immediately after attempting to
* reserve a sample.
*
* If **size** is larger than the size of the entire ring buffer, errno is set
* to E2BIG and NULL is returned. If the ring buffer could accommodate
* **size**, but currently does not have enough space, the caller will block
* until at most **timeout_ms** has elapsed. If insufficient space is available
* at that time, errno is set to ENOSPC, and NULL is returned.
*
* The kernel guarantees that it will wake up this thread to check if
* sufficient space is available in the ring buffer at least once per
* invocation of the **bpf_ringbuf_drain()** helper function, provided that at
* least one sample is consumed, and the BPF program did not invoke the
* function with BPF_RB_NO_WAKEUP. A wakeup may occur sooner than that, but the
* kernel does not guarantee this. If the helper function is invoked with
* BPF_RB_FORCE_WAKEUP, a wakeup event will be sent even if no sample is
* consumed.
*
* When a sample of size **size** is found within **timeout_ms**, a pointer to
* the sample is returned. After initializing the sample, callers must invoke
* **user_ring_buffer__submit()** to post the sample to the ring buffer.
* Otherwise, the sample must be freed with **user_ring_buffer__discard()**.
*/
LIBBPF_API void *user_ring_buffer__reserve_blocking(struct user_ring_buffer *rb,
__u32 size,
int timeout_ms);
/* @brief **user_ring_buffer__submit()** submits a previously reserved sample
* into the ring buffer.
* @param rb The user ring buffer.
* @param sample A reserved sample.
*
* It is not necessary to synchronize amongst multiple producers when invoking
* this function.
*/
LIBBPF_API void user_ring_buffer__submit(struct user_ring_buffer *rb, void *sample);
/* @brief **user_ring_buffer__discard()** discards a previously reserved sample.
* @param rb The user ring buffer.
* @param sample A reserved sample.
*
* It is not necessary to synchronize amongst multiple producers when invoking
* this function.
*/
LIBBPF_API void user_ring_buffer__discard(struct user_ring_buffer *rb, void *sample);
/* @brief **user_ring_buffer__free()** frees a ring buffer that was previously
* created with **user_ring_buffer__new()**.
* @param rb The user ring buffer being freed.
*/
LIBBPF_API void user_ring_buffer__free(struct user_ring_buffer *rb);
/* Perf buffer APIs */
struct perf_buffer;

View File

@@ -368,3 +368,13 @@ LIBBPF_1.0.0 {
libbpf_bpf_prog_type_str;
perf_buffer__buffer;
};
LIBBPF_1.1.0 {
global:
user_ring_buffer__discard;
user_ring_buffer__free;
user_ring_buffer__new;
user_ring_buffer__reserve;
user_ring_buffer__reserve_blocking;
user_ring_buffer__submit;
} LIBBPF_1.0.0;

View File

@@ -231,6 +231,7 @@ static int probe_map_create(enum bpf_map_type map_type)
return btf_fd;
break;
case BPF_MAP_TYPE_RINGBUF:
case BPF_MAP_TYPE_USER_RINGBUF:
key_size = 0;
value_size = 0;
max_entries = 4096;

View File

@@ -4,6 +4,6 @@
#define __LIBBPF_VERSION_H
#define LIBBPF_MAJOR_VERSION 1
#define LIBBPF_MINOR_VERSION 0
#define LIBBPF_MINOR_VERSION 1
#endif /* __LIBBPF_VERSION_H */

View File

@@ -16,6 +16,7 @@
#include <asm/barrier.h>
#include <sys/mman.h>
#include <sys/epoll.h>
#include <time.h>
#include "libbpf.h"
#include "libbpf_internal.h"
@@ -39,6 +40,23 @@ struct ring_buffer {
int ring_cnt;
};
struct user_ring_buffer {
struct epoll_event event;
unsigned long *consumer_pos;
unsigned long *producer_pos;
void *data;
unsigned long mask;
size_t page_size;
int map_fd;
int epoll_fd;
};
/* 8-byte ring buffer header structure */
struct ringbuf_hdr {
__u32 len;
__u32 pad;
};
static void ringbuf_unmap_ring(struct ring_buffer *rb, struct ring *r)
{
if (r->consumer_pos) {
@@ -300,3 +318,256 @@ int ring_buffer__epoll_fd(const struct ring_buffer *rb)
{
return rb->epoll_fd;
}
static void user_ringbuf_unmap_ring(struct user_ring_buffer *rb)
{
if (rb->consumer_pos) {
munmap(rb->consumer_pos, rb->page_size);
rb->consumer_pos = NULL;
}
if (rb->producer_pos) {
munmap(rb->producer_pos, rb->page_size + 2 * (rb->mask + 1));
rb->producer_pos = NULL;
}
}
void user_ring_buffer__free(struct user_ring_buffer *rb)
{
if (!rb)
return;
user_ringbuf_unmap_ring(rb);
if (rb->epoll_fd >= 0)
close(rb->epoll_fd);
free(rb);
}
static int user_ringbuf_map(struct user_ring_buffer *rb, int map_fd)
{
struct bpf_map_info info;
__u32 len = sizeof(info);
void *tmp;
struct epoll_event *rb_epoll;
int err;
memset(&info, 0, sizeof(info));
err = bpf_obj_get_info_by_fd(map_fd, &info, &len);
if (err) {
err = -errno;
pr_warn("user ringbuf: failed to get map info for fd=%d: %d\n", map_fd, err);
return err;
}
if (info.type != BPF_MAP_TYPE_USER_RINGBUF) {
pr_warn("user ringbuf: map fd=%d is not BPF_MAP_TYPE_USER_RINGBUF\n", map_fd);
return -EINVAL;
}
rb->map_fd = map_fd;
rb->mask = info.max_entries - 1;
/* Map read-only consumer page */
tmp = mmap(NULL, rb->page_size, PROT_READ, MAP_SHARED, map_fd, 0);
if (tmp == MAP_FAILED) {
err = -errno;
pr_warn("user ringbuf: failed to mmap consumer page for map fd=%d: %d\n",
map_fd, err);
return err;
}
rb->consumer_pos = tmp;
/* Map read-write the producer page and data pages. We map the data
* region as twice the total size of the ring buffer to allow the
* simple reading and writing of samples that wrap around the end of
* the buffer. See the kernel implementation for details.
*/
tmp = mmap(NULL, rb->page_size + 2 * info.max_entries,
PROT_READ | PROT_WRITE, MAP_SHARED, map_fd, rb->page_size);
if (tmp == MAP_FAILED) {
err = -errno;
pr_warn("user ringbuf: failed to mmap data pages for map fd=%d: %d\n",
map_fd, err);
return err;
}
rb->producer_pos = tmp;
rb->data = tmp + rb->page_size;
rb_epoll = &rb->event;
rb_epoll->events = EPOLLOUT;
if (epoll_ctl(rb->epoll_fd, EPOLL_CTL_ADD, map_fd, rb_epoll) < 0) {
err = -errno;
pr_warn("user ringbuf: failed to epoll add map fd=%d: %d\n", map_fd, err);
return err;
}
return 0;
}
struct user_ring_buffer *
user_ring_buffer__new(int map_fd, const struct user_ring_buffer_opts *opts)
{
struct user_ring_buffer *rb;
int err;
if (!OPTS_VALID(opts, user_ring_buffer_opts))
return errno = EINVAL, NULL;
rb = calloc(1, sizeof(*rb));
if (!rb)
return errno = ENOMEM, NULL;
rb->page_size = getpagesize();
rb->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
if (rb->epoll_fd < 0) {
err = -errno;
pr_warn("user ringbuf: failed to create epoll instance: %d\n", err);
goto err_out;
}
err = user_ringbuf_map(rb, map_fd);
if (err)
goto err_out;
return rb;
err_out:
user_ring_buffer__free(rb);
return errno = -err, NULL;
}
static void user_ringbuf_commit(struct user_ring_buffer *rb, void *sample, bool discard)
{
__u32 new_len;
struct ringbuf_hdr *hdr;
uintptr_t hdr_offset;
hdr_offset = rb->mask + 1 + (sample - rb->data) - BPF_RINGBUF_HDR_SZ;
hdr = rb->data + (hdr_offset & rb->mask);
new_len = hdr->len & ~BPF_RINGBUF_BUSY_BIT;
if (discard)
new_len |= BPF_RINGBUF_DISCARD_BIT;
/* Synchronizes with smp_load_acquire() in __bpf_user_ringbuf_peek() in
* the kernel.
*/
__atomic_exchange_n(&hdr->len, new_len, __ATOMIC_ACQ_REL);
}
void user_ring_buffer__discard(struct user_ring_buffer *rb, void *sample)
{
user_ringbuf_commit(rb, sample, true);
}
void user_ring_buffer__submit(struct user_ring_buffer *rb, void *sample)
{
user_ringbuf_commit(rb, sample, false);
}
void *user_ring_buffer__reserve(struct user_ring_buffer *rb, __u32 size)
{
__u32 avail_size, total_size, max_size;
/* 64-bit to avoid overflow in case of extreme application behavior */
__u64 cons_pos, prod_pos;
struct ringbuf_hdr *hdr;
/* Synchronizes with smp_store_release() in __bpf_user_ringbuf_peek() in
* the kernel.
*/
cons_pos = smp_load_acquire(rb->consumer_pos);
/* Synchronizes with smp_store_release() in user_ringbuf_commit() */
prod_pos = smp_load_acquire(rb->producer_pos);
max_size = rb->mask + 1;
avail_size = max_size - (prod_pos - cons_pos);
/* Round up total size to a multiple of 8. */
total_size = (size + BPF_RINGBUF_HDR_SZ + 7) / 8 * 8;
if (total_size > max_size)
return errno = E2BIG, NULL;
if (avail_size < total_size)
return errno = ENOSPC, NULL;
hdr = rb->data + (prod_pos & rb->mask);
hdr->len = size | BPF_RINGBUF_BUSY_BIT;
hdr->pad = 0;
/* Synchronizes with smp_load_acquire() in __bpf_user_ringbuf_peek() in
* the kernel.
*/
smp_store_release(rb->producer_pos, prod_pos + total_size);
return (void *)rb->data + ((prod_pos + BPF_RINGBUF_HDR_SZ) & rb->mask);
}
static __u64 ns_elapsed_timespec(const struct timespec *start, const struct timespec *end)
{
__u64 start_ns, end_ns, ns_per_s = 1000000000;
start_ns = (__u64)start->tv_sec * ns_per_s + start->tv_nsec;
end_ns = (__u64)end->tv_sec * ns_per_s + end->tv_nsec;
return end_ns - start_ns;
}
void *user_ring_buffer__reserve_blocking(struct user_ring_buffer *rb, __u32 size, int timeout_ms)
{
void *sample;
int err, ms_remaining = timeout_ms;
struct timespec start;
if (timeout_ms < 0 && timeout_ms != -1)
return errno = EINVAL, NULL;
if (timeout_ms != -1) {
err = clock_gettime(CLOCK_MONOTONIC, &start);
if (err)
return NULL;
}
do {
int cnt, ms_elapsed;
struct timespec curr;
__u64 ns_per_ms = 1000000;
sample = user_ring_buffer__reserve(rb, size);
if (sample)
return sample;
else if (errno != ENOSPC)
return NULL;
/* The kernel guarantees at least one event notification
* delivery whenever at least one sample is drained from the
* ring buffer in an invocation to bpf_ringbuf_drain(). Other
* additional events may be delivered at any time, but only one
* event is guaranteed per bpf_ringbuf_drain() invocation,
* provided that a sample is drained, and the BPF program did
* not pass BPF_RB_NO_WAKEUP to bpf_ringbuf_drain(). If
* BPF_RB_FORCE_WAKEUP is passed to bpf_ringbuf_drain(), a
* wakeup event will be delivered even if no samples are
* drained.
*/
cnt = epoll_wait(rb->epoll_fd, &rb->event, 1, ms_remaining);
if (cnt < 0)
return NULL;
if (timeout_ms == -1)
continue;
err = clock_gettime(CLOCK_MONOTONIC, &curr);
if (err)
return NULL;
ms_elapsed = ns_elapsed_timespec(&start, &curr) / ns_per_ms;
ms_remaining = timeout_ms - ms_elapsed;
} while (ms_remaining > 0);
/* Try one more time to reserve a sample after the specified timeout has elapsed. */
return user_ring_buffer__reserve(rb, size);
}

View File

@@ -251,6 +251,29 @@ static inline int skel_map_update_elem(int fd, const void *key,
return skel_sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, attr_sz);
}
static inline int skel_map_delete_elem(int fd, const void *key)
{
const size_t attr_sz = offsetofend(union bpf_attr, flags);
union bpf_attr attr;
memset(&attr, 0, attr_sz);
attr.map_fd = fd;
attr.key = (long)key;
return skel_sys_bpf(BPF_MAP_DELETE_ELEM, &attr, attr_sz);
}
static inline int skel_map_get_fd_by_id(__u32 id)
{
const size_t attr_sz = offsetofend(union bpf_attr, flags);
union bpf_attr attr;
memset(&attr, 0, attr_sz);
attr.map_id = id;
return skel_sys_bpf(BPF_MAP_GET_FD_BY_ID, &attr, attr_sz);
}
static inline int skel_raw_tracepoint_open(const char *name, int prog_fd)
{
const size_t attr_sz = offsetofend(union bpf_attr, raw_tracepoint.prog_fd);

View File

@@ -282,7 +282,7 @@ struct usdt_manager *usdt_manager_new(struct bpf_object *obj)
* If this is not supported, USDTs with semaphores will not be supported.
* Added in: a6ca88b241d5 ("trace_uprobe: support reference counter in fd-based uprobe")
*/
man->has_sema_refcnt = access(ref_ctr_sysfs_path, F_OK) == 0;
man->has_sema_refcnt = faccessat(AT_FDCWD, ref_ctr_sysfs_path, F_OK, AT_EACCESS) == 0;
return man;
}