Compare commits
44 Commits
libbpf_1_0
...
v1.0.1_net
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
dac1c4b6a8 | ||
|
|
1714037104 | ||
|
|
d598cb20c7 | ||
|
|
ce321d6fd4 | ||
|
|
0f5b3a10ae | ||
|
|
5859c59e50 | ||
|
|
85f8b7c4dc | ||
|
|
9da0dcb621 | ||
|
|
82c4054376 | ||
|
|
b3a117773d | ||
|
|
fc2577c54c | ||
|
|
0420f75dbc | ||
|
|
aa25f218b4 | ||
|
|
9e9bf46c92 | ||
|
|
28903eb40e | ||
|
|
8138aa78bd | ||
|
|
8ac9773f52 | ||
|
|
b63791cbde | ||
|
|
0ff6d28aec | ||
|
|
861364fa45 | ||
|
|
21ec5ca723 | ||
|
|
255690da57 | ||
|
|
b1753eaf3b | ||
|
|
eeb2bc4061 | ||
|
|
a11587cc01 | ||
|
|
7fb6138fae | ||
|
|
c918b3e724 | ||
|
|
981001bf46 | ||
|
|
ee7d295f83 | ||
|
|
94d69cc07f | ||
|
|
12a41a80c5 | ||
|
|
10a32130e7 | ||
|
|
fad270918d | ||
|
|
c091b07808 | ||
|
|
efd33720cd | ||
|
|
9aedff8d03 | ||
|
|
51e63f7229 | ||
|
|
c53af98d1a | ||
|
|
2c44349e09 | ||
|
|
58361243ec | ||
|
|
c32e1cf948 | ||
|
|
c4f44c7c11 | ||
|
|
a7a525d47a | ||
|
|
cfbd763ef8 |
5
.github/actions/build-selftests/action.yml
vendored
@@ -18,9 +18,10 @@ runs:
|
||||
steps:
|
||||
- shell: bash
|
||||
run: |
|
||||
echo "::group::Setup Env"
|
||||
source $GITHUB_ACTION_PATH/../../../ci/vmtest/helpers.sh
|
||||
foldable start "Setup Env"
|
||||
sudo apt-get install -y qemu-kvm zstd binutils-dev elfutils libcap-dev libelf-dev libdw-dev python3-docutils
|
||||
echo "::endgroup::"
|
||||
foldable end
|
||||
- shell: bash
|
||||
run: |
|
||||
export KERNEL=${{ inputs.kernel }}
|
||||
|
||||
144405
.github/actions/build-selftests/vmlinux.h
vendored
19
.github/actions/vmtest/action.yml
vendored
@@ -41,24 +41,26 @@ runs:
|
||||
- name: Prepare to build BPF selftests
|
||||
shell: bash
|
||||
run: |
|
||||
echo "::group::Prepare building selftest"
|
||||
source $GITHUB_ACTION_PATH/../../../ci/vmtest/helpers.sh
|
||||
foldable start "Prepare building selftest"
|
||||
cd .kernel
|
||||
cat tools/testing/selftests/bpf/config \
|
||||
tools/testing/selftests/bpf/config.${{ inputs.arch }} > .config
|
||||
make olddefconfig && make prepare
|
||||
cd -
|
||||
echo "::endgroup::"
|
||||
foldable end
|
||||
# 2. if kernel == LATEST, build kernel image from tree
|
||||
- name: Build kernel image
|
||||
if: ${{ inputs.kernel == 'LATEST' }}
|
||||
shell: bash
|
||||
run: |
|
||||
echo "::group::Build Kernel Image"
|
||||
source $GITHUB_ACTION_PATH/../../../ci/vmtest/helpers.sh
|
||||
foldable start "Build Kernel Image"
|
||||
cd .kernel
|
||||
make -j $((4*$(nproc))) all > /dev/null
|
||||
cp vmlinux ${{ github.workspace }}
|
||||
cd -
|
||||
echo "::endgroup::"
|
||||
foldable end
|
||||
# else, just download prebuilt kernel image
|
||||
- name: Download prebuilt kernel
|
||||
if: ${{ inputs.kernel != 'LATEST' }}
|
||||
@@ -76,14 +78,19 @@ runs:
|
||||
- name: prepare rootfs
|
||||
uses: libbpf/ci/prepare-rootfs@master
|
||||
with:
|
||||
kernel: ${{ inputs.kernel }}
|
||||
project-name: 'libbpf'
|
||||
arch: ${{ inputs.arch }}
|
||||
kernel: ${{ inputs.kernel }}
|
||||
kernel-root: '.kernel'
|
||||
image-output: '/tmp/root.img'
|
||||
# 5. run selftest in QEMU
|
||||
- name: Run selftests
|
||||
env:
|
||||
KERNEL: ${{ inputs.kernel }}
|
||||
REPO_ROOT: ${{ github.workspace }}
|
||||
uses: libbpf/ci/run-qemu@master
|
||||
with:
|
||||
arch: ${{ inputs.arch }}
|
||||
img: '/tmp/root.img'
|
||||
vmlinuz: 'vmlinuz'
|
||||
arch: ${{ inputs.arch }}
|
||||
kernel-root: '.kernel'
|
||||
|
||||
5
.github/workflows/coverity.yml
vendored
@@ -15,12 +15,13 @@ jobs:
|
||||
- uses: ./.github/actions/setup
|
||||
- name: Run coverity
|
||||
run: |
|
||||
echo ::group::Setup CI env
|
||||
source "${GITHUB_WORKSPACE}"/ci/vmtest/helpers.sh
|
||||
foldable start "Setup CI env"
|
||||
source /tmp/ci_setup
|
||||
export COVERITY_SCAN_NOTIFICATION_EMAIL="${AUTHOR_EMAIL}"
|
||||
export COVERITY_SCAN_BRANCH_PATTERN=${GITHUB_REF##refs/*/}
|
||||
export TRAVIS_BRANCH=${COVERITY_SCAN_BRANCH_PATTERN}
|
||||
echo ::endgroup::
|
||||
foldable end
|
||||
scripts/coverity.sh
|
||||
env:
|
||||
COVERITY_SCAN_TOKEN: ${{ secrets.COVERITY_SCAN_TOKEN }}
|
||||
|
||||
@@ -1 +1 @@
|
||||
14b20b784f59bdd95f6f1cfb112c9818bcec4d84
|
||||
60240bc26114543fcbfcd8a28466e67e77b20388
|
||||
|
||||
@@ -1 +1 @@
|
||||
e34cfee65ec891a319ce79797dda18083af33a76
|
||||
87dbdc230d162bf9ee1ac77c8ade178b6b1e199e
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
<img src="https://user-images.githubusercontent.com/508075/185997470-2f427d3d-f040-4eef-afc5-ae4f766615b2.png" width="40%" >
|
||||
<picture>
|
||||
<source media="(prefers-color-scheme: dark)" srcset="assets/libbpf-logo-sideways-darkbg.png" width="40%">
|
||||
<img src="assets/libbpf-logo-sideways.png" width="40%">
|
||||
</picture>
|
||||
|
||||
libbpf
|
||||
[](https://github.com/libbpf/libbpf/actions/workflows/test.yml)
|
||||
|
||||
BIN
assets/libbpf-logo-compact-darkbg.png
Normal file
|
After Width: | Height: | Size: 262 KiB |
BIN
assets/libbpf-logo-compact-mono.png
Normal file
|
After Width: | Height: | Size: 128 KiB |
BIN
assets/libbpf-logo-compact.png
Normal file
|
After Width: | Height: | Size: 116 KiB |
BIN
assets/libbpf-logo-sideways-darkbg.png
Normal file
|
After Width: | Height: | Size: 284 KiB |
BIN
assets/libbpf-logo-sideways-mono.png
Normal file
|
After Width: | Height: | Size: 142 KiB |
BIN
assets/libbpf-logo-sideways.png
Normal file
|
After Width: | Height: | Size: 140 KiB |
BIN
assets/libbpf-logo-sparse-darkbg.png
Normal file
|
After Width: | Height: | Size: 352 KiB |
BIN
assets/libbpf-logo-sparse-mono.png
Normal file
|
After Width: | Height: | Size: 206 KiB |
BIN
assets/libbpf-logo-sparse.png
Normal file
|
After Width: | Height: | Size: 236 KiB |
@@ -36,6 +36,7 @@ $ sudo systemctl daemon-reload
|
||||
$ sudo tee /etc/actions-runner-libbpf
|
||||
repo=<owner>/<name>
|
||||
access_token=<ghp_***>
|
||||
runner_name=<hostname>
|
||||
```
|
||||
|
||||
Access token should have the repo scope, consult
|
||||
|
||||
@@ -24,7 +24,9 @@ RUN apt-get update && apt-get -y install \
|
||||
rsync \
|
||||
software-properties-common \
|
||||
sudo \
|
||||
tree
|
||||
tree \
|
||||
iproute2 \
|
||||
iputils-ping
|
||||
|
||||
# amd64 dependencies.
|
||||
COPY --from=ld-prefix / /usr/x86_64-linux-gnu/
|
||||
@@ -33,7 +35,7 @@ RUN ln -fs /etc/resolv.conf /usr/x86_64-linux-gnu/etc/
|
||||
ENV QEMU_LD_PREFIX=/usr/x86_64-linux-gnu
|
||||
|
||||
# amd64 Github Actions Runner.
|
||||
ARG version=2.285.0
|
||||
ARG version=2.296.0
|
||||
RUN useradd -m actions-runner
|
||||
RUN echo "actions-runner ALL=(ALL) NOPASSWD: ALL" >>/etc/sudoers
|
||||
RUN echo "Defaults env_keep += \"DEBIAN_FRONTEND\"" >>/etc/sudoers
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
#
|
||||
# - repo=<owner>/<name>
|
||||
# - access_token=<ghp_***>
|
||||
#
|
||||
# - runner_name=<hostname>
|
||||
|
||||
set -e -u
|
||||
|
||||
@@ -34,6 +34,7 @@ registration_token=$(jq --raw-output .token "$token_file")
|
||||
--url "https://github.com/$repo" \
|
||||
--token "$registration_token" \
|
||||
--labels z15 \
|
||||
--name "$runner_name" \
|
||||
--ephemeral
|
||||
|
||||
# Run one job.
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# This file is not used and is there for historic purposes only.
|
||||
# See WHITELIST-5.5.0 instead.
|
||||
# See ALLOWLIST-5.5.0 instead.
|
||||
|
||||
# PERMANENTLY DISABLED
|
||||
align # verifier output format changed
|
||||
|
||||
@@ -1,6 +0,0 @@
|
||||
# TEMPORARY
|
||||
get_stack_raw_tp # spams with kernel warnings until next bpf -> bpf-next merge
|
||||
stacktrace_build_id_nmi
|
||||
stacktrace_build_id
|
||||
task_fd_query_rawtp
|
||||
varlen
|
||||
|
||||
@@ -1,68 +1,6 @@
|
||||
# TEMPORARY until bpf-next -> bpf merge
|
||||
lru_bug # prog 'printk': failed to auto-attach: -524
|
||||
|
||||
# TEMPORARY
|
||||
atomics # attach(add): actual -524 <= expected 0 (trampoline)
|
||||
bpf_iter_setsockopt # JIT does not support calling kernel function (kfunc)
|
||||
bloom_filter_map # failed to find kernel BTF type ID of '__x64_sys_getpgid': -3 (?)
|
||||
bpf_tcp_ca # JIT does not support calling kernel function (kfunc)
|
||||
bpf_loop # attaches to __x64_sys_nanosleep
|
||||
bpf_mod_race # BPF trampoline
|
||||
bpf_nf # JIT does not support calling kernel function
|
||||
core_read_macros # unknown func bpf_probe_read#4 (overlapping)
|
||||
d_path # failed to auto-attach program 'prog_stat': -524 (trampoline)
|
||||
dummy_st_ops # test_run unexpected error: -524 (errno 524) (trampoline)
|
||||
fentry_fexit # fentry attach failed: -524 (trampoline)
|
||||
fentry_test # fentry_first_attach unexpected error: -524 (trampoline)
|
||||
fexit_bpf2bpf # freplace_attach_trace unexpected error: -524 (trampoline)
|
||||
fexit_sleep # fexit_skel_load fexit skeleton failed (trampoline)
|
||||
fexit_stress # fexit attach failed prog 0 failed: -524 (trampoline)
|
||||
fexit_test # fexit_first_attach unexpected error: -524 (trampoline)
|
||||
get_func_args_test # trampoline
|
||||
get_func_ip_test # get_func_ip_test__attach unexpected error: -524 (trampoline)
|
||||
get_stack_raw_tp # user_stack corrupted user stack (no backchain userspace)
|
||||
kfree_skb # attach fentry unexpected error: -524 (trampoline)
|
||||
kfunc_call # 'bpf_prog_active': not found in kernel BTF (?)
|
||||
ksyms_module # test_ksyms_module__open_and_load unexpected error: -9 (?)
|
||||
ksyms_module_libbpf # JIT does not support calling kernel function (kfunc)
|
||||
ksyms_module_lskel # test_ksyms_module_lskel__open_and_load unexpected error: -9 (?)
|
||||
modify_return # modify_return attach failed: -524 (trampoline)
|
||||
module_attach # skel_attach skeleton attach failed: -524 (trampoline)
|
||||
mptcp
|
||||
kprobe_multi_test # relies on fentry
|
||||
netcnt # failed to load BPF skeleton 'netcnt_prog': -7 (?)
|
||||
probe_user # check_kprobe_res wrong kprobe res from probe read (?)
|
||||
recursion # skel_attach unexpected error: -524 (trampoline)
|
||||
ringbuf # skel_load skeleton load failed (?)
|
||||
sk_assign # Can't read on server: Invalid argument (?)
|
||||
sk_lookup # endianness problem
|
||||
sk_storage_tracing # test_sk_storage_tracing__attach unexpected error: -524 (trampoline)
|
||||
skc_to_unix_sock # could not attach BPF object unexpected error: -524 (trampoline)
|
||||
socket_cookie # prog_attach unexpected error: -524 (trampoline)
|
||||
stacktrace_build_id # compare_map_keys stackid_hmap vs. stackmap err -2 errno 2 (?)
|
||||
tailcalls # tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls (?)
|
||||
task_local_storage # failed to auto-attach program 'trace_exit_creds': -524 (trampoline)
|
||||
test_bpffs # bpffs test failed 255 (iterator)
|
||||
test_bprm_opts # failed to auto-attach program 'secure_exec': -524 (trampoline)
|
||||
test_ima # failed to auto-attach program 'ima': -524 (trampoline)
|
||||
test_local_storage # failed to auto-attach program 'unlink_hook': -524 (trampoline)
|
||||
test_lsm # failed to find kernel BTF type ID of '__x64_sys_setdomainname': -3 (?)
|
||||
test_overhead # attach_fentry unexpected error: -524 (trampoline)
|
||||
test_profiler # unknown func bpf_probe_read_str#45 (overlapping)
|
||||
timer # failed to auto-attach program 'test1': -524 (trampoline)
|
||||
timer_crash # trampoline
|
||||
timer_mim # failed to auto-attach program 'test1': -524 (trampoline)
|
||||
trace_ext # failed to auto-attach program 'test_pkt_md_access_new': -524 (trampoline)
|
||||
trace_printk # trace_printk__load unexpected error: -2 (errno 2) (?)
|
||||
trace_vprintk # trace_vprintk__open_and_load unexpected error: -9 (?)
|
||||
trampoline_count # prog 'prog1': failed to attach: ERROR: strerror_r(-524)=22 (trampoline)
|
||||
verif_stats # trace_vprintk__open_and_load unexpected error: -9 (?)
|
||||
vmlinux # failed to auto-attach program 'handle__fentry': -524 (trampoline)
|
||||
xdp_adjust_tail # case-128 err 0 errno 28 retval 1 size 128 expect-size 3520 (?)
|
||||
xdp_bonding # failed to auto-attach program 'trace_on_entry': -524 (trampoline)
|
||||
xdp_bpf2bpf # failed to auto-attach program 'trace_on_entry': -524 (trampoline)
|
||||
map_kptr # failed to open_and_load program: -524 (trampoline)
|
||||
bpf_cookie # failed to open_and_load program: -524 (trampoline)
|
||||
xdp_do_redirect # prog_run_max_size unexpected error: -22 (errno 22)
|
||||
send_signal # intermittently fails to receive signal
|
||||
select_reuseport # intermittently fails on new s390x setup
|
||||
xdp_synproxy # JIT does not support calling kernel function (kfunc)
|
||||
unpriv_bpf_disabled # fentry
|
||||
lru_bug
|
||||
usdt/basic # failing verifier due to bounds check after LLVM update
|
||||
usdt/multispec # same as above
|
||||
|
||||
@@ -28,7 +28,7 @@ test_progs() {
|
||||
fi
|
||||
}
|
||||
|
||||
test_progs_noalu() {
|
||||
test_progs_no_alu32() {
|
||||
foldable start test_progs-no_alu32 "Testing test_progs-no_alu32"
|
||||
./test_progs-no_alu32 ${DENYLIST:+-d$DENYLIST} ${ALLOWLIST:+-a$ALLOWLIST} && true
|
||||
echo "test_progs-no_alu32:$?" >> "${STATUS_FILE}"
|
||||
@@ -55,9 +55,20 @@ test_verifier() {
|
||||
|
||||
foldable end vm_init
|
||||
|
||||
configs_path=${PROJECT_NAME}/vmtest/configs
|
||||
DENYLIST=$(read_lists "$configs_path/DENYLIST-${KERNEL}" "$configs_path/DENYLIST-${KERNEL}.${ARCH}")
|
||||
ALLOWLIST=$(read_lists "$configs_path/ALLOWLIST-${KERNEL}" "$configs_path/ALLOWLIST-${KERNEL}.${ARCH}")
|
||||
configs_path=/${PROJECT_NAME}/selftests/bpf
|
||||
local_configs_path=${PROJECT_NAME}/vmtest/configs
|
||||
DENYLIST=$(read_lists \
|
||||
"$configs_path/DENYLIST" \
|
||||
"$configs_path/DENYLIST.${ARCH}" \
|
||||
"$local_configs_path/DENYLIST-${KERNEL}" \
|
||||
"$local_configs_path/DENYLIST-${KERNEL}.${ARCH}" \
|
||||
)
|
||||
ALLOWLIST=$(read_lists \
|
||||
"$configs_path/ALLOWLIST" \
|
||||
"$configs_path/ALLOWLIST.${ARCH}" \
|
||||
"$local_configs_path/ALLOWLIST-${KERNEL}" \
|
||||
"$local_configs_path/ALLOWLIST-${KERNEL}.${ARCH}" \
|
||||
)
|
||||
|
||||
echo "DENYLIST: ${DENYLIST}"
|
||||
echo "ALLOWLIST: ${ALLOWLIST}"
|
||||
@@ -66,7 +77,7 @@ cd ${PROJECT_NAME}/selftests/bpf
|
||||
|
||||
if [ $# -eq 0 ]; then
|
||||
test_progs
|
||||
test_progs_noalu
|
||||
test_progs_no_alu32
|
||||
test_maps
|
||||
test_verifier
|
||||
else
|
||||
|
||||
@@ -87,10 +87,29 @@ struct bpf_cgroup_storage_key {
|
||||
__u32 attach_type; /* program attach type (enum bpf_attach_type) */
|
||||
};
|
||||
|
||||
enum bpf_cgroup_iter_order {
|
||||
BPF_CGROUP_ITER_ORDER_UNSPEC = 0,
|
||||
BPF_CGROUP_ITER_SELF_ONLY, /* process only a single object. */
|
||||
BPF_CGROUP_ITER_DESCENDANTS_PRE, /* walk descendants in pre-order. */
|
||||
BPF_CGROUP_ITER_DESCENDANTS_POST, /* walk descendants in post-order. */
|
||||
BPF_CGROUP_ITER_ANCESTORS_UP, /* walk ancestors upward. */
|
||||
};
|
||||
|
||||
union bpf_iter_link_info {
|
||||
struct {
|
||||
__u32 map_fd;
|
||||
} map;
|
||||
struct {
|
||||
enum bpf_cgroup_iter_order order;
|
||||
|
||||
/* At most one of cgroup_fd and cgroup_id can be non-zero. If
|
||||
* both are zero, the walk starts from the default cgroup v2
|
||||
* root. For walking v1 hierarchy, one should always explicitly
|
||||
* specify cgroup_fd.
|
||||
*/
|
||||
__u32 cgroup_fd;
|
||||
__u64 cgroup_id;
|
||||
} cgroup;
|
||||
};
|
||||
|
||||
/* BPF syscall commands, see bpf(2) man-page for more details. */
|
||||
@@ -909,6 +928,7 @@ enum bpf_map_type {
|
||||
BPF_MAP_TYPE_INODE_STORAGE,
|
||||
BPF_MAP_TYPE_TASK_STORAGE,
|
||||
BPF_MAP_TYPE_BLOOM_FILTER,
|
||||
BPF_MAP_TYPE_USER_RINGBUF,
|
||||
};
|
||||
|
||||
/* Note that tracing related programs such as
|
||||
@@ -1233,7 +1253,7 @@ enum {
|
||||
|
||||
/* Query effective (directly attached + inherited from ancestor cgroups)
|
||||
* programs that will be executed for events within a cgroup.
|
||||
* attach_flags with this flag are returned only for directly attached programs.
|
||||
* attach_flags with this flag are always returned 0.
|
||||
*/
|
||||
#define BPF_F_QUERY_EFFECTIVE (1U << 0)
|
||||
|
||||
@@ -1432,7 +1452,10 @@ union bpf_attr {
|
||||
__u32 attach_flags;
|
||||
__aligned_u64 prog_ids;
|
||||
__u32 prog_cnt;
|
||||
__aligned_u64 prog_attach_flags; /* output: per-program attach_flags */
|
||||
/* output: per-program attach_flags.
|
||||
* not allowed to be set during effective query.
|
||||
*/
|
||||
__aligned_u64 prog_attach_flags;
|
||||
} query;
|
||||
|
||||
struct { /* anonymous struct used by BPF_RAW_TRACEPOINT_OPEN command */
|
||||
@@ -4437,7 +4460,7 @@ union bpf_attr {
|
||||
*
|
||||
* **-EEXIST** if the option already exists.
|
||||
*
|
||||
* **-EFAULT** on failrue to parse the existing header options.
|
||||
* **-EFAULT** on failure to parse the existing header options.
|
||||
*
|
||||
* **-EPERM** if the helper cannot be used under the current
|
||||
* *skops*\ **->op**.
|
||||
@@ -4646,7 +4669,7 @@ union bpf_attr {
|
||||
* a *map* with *task* as the **key**. From this
|
||||
* perspective, the usage is not much different from
|
||||
* **bpf_map_lookup_elem**\ (*map*, **&**\ *task*) except this
|
||||
* helper enforces the key must be an task_struct and the map must also
|
||||
* helper enforces the key must be a task_struct and the map must also
|
||||
* be a **BPF_MAP_TYPE_TASK_STORAGE**.
|
||||
*
|
||||
* Underneath, the value is stored locally at *task* instead of
|
||||
@@ -4704,7 +4727,7 @@ union bpf_attr {
|
||||
*
|
||||
* long bpf_ima_inode_hash(struct inode *inode, void *dst, u32 size)
|
||||
* Description
|
||||
* Returns the stored IMA hash of the *inode* (if it's avaialable).
|
||||
* Returns the stored IMA hash of the *inode* (if it's available).
|
||||
* If the hash is larger than *size*, then only *size*
|
||||
* bytes will be copied to *dst*
|
||||
* Return
|
||||
@@ -4728,12 +4751,12 @@ union bpf_attr {
|
||||
*
|
||||
* The argument *len_diff* can be used for querying with a planned
|
||||
* size change. This allows to check MTU prior to changing packet
|
||||
* ctx. Providing an *len_diff* adjustment that is larger than the
|
||||
* ctx. Providing a *len_diff* adjustment that is larger than the
|
||||
* actual packet size (resulting in negative packet size) will in
|
||||
* principle not exceed the MTU, why it is not considered a
|
||||
* failure. Other BPF-helpers are needed for performing the
|
||||
* planned size change, why the responsability for catch a negative
|
||||
* packet size belong in those helpers.
|
||||
* principle not exceed the MTU, which is why it is not considered
|
||||
* a failure. Other BPF helpers are needed for performing the
|
||||
* planned size change; therefore the responsibility for catching
|
||||
* a negative packet size belongs in those helpers.
|
||||
*
|
||||
* Specifying *ifindex* zero means the MTU check is performed
|
||||
* against the current net device. This is practical if this isn't
|
||||
@@ -4931,6 +4954,7 @@ union bpf_attr {
|
||||
* Get address of the traced function (for tracing and kprobe programs).
|
||||
* Return
|
||||
* Address of the traced function.
|
||||
* 0 for kprobes placed within the function (not at the entry).
|
||||
*
|
||||
* u64 bpf_get_attach_cookie(void *ctx)
|
||||
* Description
|
||||
@@ -5060,12 +5084,12 @@ union bpf_attr {
|
||||
*
|
||||
* long bpf_get_func_arg(void *ctx, u32 n, u64 *value)
|
||||
* Description
|
||||
* Get **n**-th argument (zero based) of the traced function (for tracing programs)
|
||||
* Get **n**-th argument register (zero based) of the traced function (for tracing programs)
|
||||
* returned in **value**.
|
||||
*
|
||||
* Return
|
||||
* 0 on success.
|
||||
* **-EINVAL** if n >= arguments count of traced function.
|
||||
* **-EINVAL** if n >= argument register count of traced function.
|
||||
*
|
||||
* long bpf_get_func_ret(void *ctx, u64 *value)
|
||||
* Description
|
||||
@@ -5078,24 +5102,37 @@ union bpf_attr {
|
||||
*
|
||||
* long bpf_get_func_arg_cnt(void *ctx)
|
||||
* Description
|
||||
* Get number of arguments of the traced function (for tracing programs).
|
||||
* Get number of registers of the traced function (for tracing programs) where
|
||||
* function arguments are stored in these registers.
|
||||
*
|
||||
* Return
|
||||
* The number of arguments of the traced function.
|
||||
* The number of argument registers of the traced function.
|
||||
*
|
||||
* int bpf_get_retval(void)
|
||||
* Description
|
||||
* Get the syscall's return value that will be returned to userspace.
|
||||
* Get the BPF program's return value that will be returned to the upper layers.
|
||||
*
|
||||
* This helper is currently supported by cgroup programs only.
|
||||
* This helper is currently supported by cgroup programs and only by the hooks
|
||||
* where BPF program's return value is returned to the userspace via errno.
|
||||
* Return
|
||||
* The syscall's return value.
|
||||
* The BPF program's return value.
|
||||
*
|
||||
* int bpf_set_retval(int retval)
|
||||
* Description
|
||||
* Set the syscall's return value that will be returned to userspace.
|
||||
* Set the BPF program's return value that will be returned to the upper layers.
|
||||
*
|
||||
* This helper is currently supported by cgroup programs and only by the hooks
|
||||
* where BPF program's return value is returned to the userspace via errno.
|
||||
*
|
||||
* Note that there is the following corner case where the program exports an error
|
||||
* via bpf_set_retval but signals success via 'return 1':
|
||||
*
|
||||
* bpf_set_retval(-EPERM);
|
||||
* return 1;
|
||||
*
|
||||
* In this case, the BPF program's return value will use helper's -EPERM. This
|
||||
* still holds true for cgroup/bind{4,6} which supports extra 'return 3' success case.
|
||||
*
|
||||
* This helper is currently supported by cgroup programs only.
|
||||
* Return
|
||||
* 0 on success, or a negative error in case of failure.
|
||||
*
|
||||
@@ -5355,6 +5392,43 @@ union bpf_attr {
|
||||
* Return
|
||||
* Current *ktime*.
|
||||
*
|
||||
* long bpf_user_ringbuf_drain(struct bpf_map *map, void *callback_fn, void *ctx, u64 flags)
|
||||
* Description
|
||||
* Drain samples from the specified user ring buffer, and invoke
|
||||
* the provided callback for each such sample:
|
||||
*
|
||||
* long (\*callback_fn)(struct bpf_dynptr \*dynptr, void \*ctx);
|
||||
*
|
||||
* If **callback_fn** returns 0, the helper will continue to try
|
||||
* and drain the next sample, up to a maximum of
|
||||
* BPF_MAX_USER_RINGBUF_SAMPLES samples. If the return value is 1,
|
||||
* the helper will skip the rest of the samples and return. Other
|
||||
* return values are not used now, and will be rejected by the
|
||||
* verifier.
|
||||
* Return
|
||||
* The number of drained samples if no error was encountered while
|
||||
* draining samples, or 0 if no samples were present in the ring
|
||||
* buffer. If a user-space producer was epoll-waiting on this map,
|
||||
* and at least one sample was drained, they will receive an event
|
||||
* notification notifying them of available space in the ring
|
||||
* buffer. If the BPF_RB_NO_WAKEUP flag is passed to this
|
||||
* function, no wakeup notification will be sent. If the
|
||||
* BPF_RB_FORCE_WAKEUP flag is passed, a wakeup notification will
|
||||
* be sent even if no sample was drained.
|
||||
*
|
||||
* On failure, the returned value is one of the following:
|
||||
*
|
||||
* **-EBUSY** if the ring buffer is contended, and another calling
|
||||
* context was concurrently draining the ring buffer.
|
||||
*
|
||||
* **-EINVAL** if user-space is not properly tracking the ring
|
||||
* buffer due to the producer position not being aligned to 8
|
||||
* bytes, a sample not being aligned to 8 bytes, or the producer
|
||||
* position not matching the advertised length of a sample.
|
||||
*
|
||||
* **-E2BIG** if user-space has tried to publish a sample which is
|
||||
* larger than the size of the ring buffer, or which cannot fit
|
||||
* within a struct bpf_dynptr.
|
||||
*/
|
||||
#define __BPF_FUNC_MAPPER(FN) \
|
||||
FN(unspec), \
|
||||
@@ -5566,6 +5640,7 @@ union bpf_attr {
|
||||
FN(tcp_raw_check_syncookie_ipv4), \
|
||||
FN(tcp_raw_check_syncookie_ipv6), \
|
||||
FN(ktime_get_tai_ns), \
|
||||
FN(user_ringbuf_drain), \
|
||||
/* */
|
||||
|
||||
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
|
||||
@@ -5628,6 +5703,11 @@ enum {
|
||||
BPF_F_SEQ_NUMBER = (1ULL << 3),
|
||||
};
|
||||
|
||||
/* BPF_FUNC_skb_get_tunnel_key flags. */
|
||||
enum {
|
||||
BPF_F_TUNINFO_FLAGS = (1ULL << 4),
|
||||
};
|
||||
|
||||
/* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and
|
||||
* BPF_FUNC_perf_event_read_value flags.
|
||||
*/
|
||||
@@ -5817,7 +5897,10 @@ struct bpf_tunnel_key {
|
||||
};
|
||||
__u8 tunnel_tos;
|
||||
__u8 tunnel_ttl;
|
||||
__u16 tunnel_ext; /* Padding, future use. */
|
||||
union {
|
||||
__u16 tunnel_ext; /* compat */
|
||||
__be16 tunnel_flags;
|
||||
};
|
||||
__u32 tunnel_label;
|
||||
union {
|
||||
__u32 local_ipv4;
|
||||
@@ -5861,6 +5944,11 @@ enum bpf_ret_code {
|
||||
* represented by BPF_REDIRECT above).
|
||||
*/
|
||||
BPF_LWT_REROUTE = 128,
|
||||
/* BPF_FLOW_DISSECTOR_CONTINUE: used by BPF_PROG_TYPE_FLOW_DISSECTOR
|
||||
* to indicate that no custom dissection was performed, and
|
||||
* fallback to standard dissector is requested.
|
||||
*/
|
||||
BPF_FLOW_DISSECTOR_CONTINUE = 129,
|
||||
};
|
||||
|
||||
struct bpf_sock {
|
||||
@@ -6159,11 +6247,22 @@ struct bpf_link_info {
|
||||
struct {
|
||||
__aligned_u64 target_name; /* in/out: target_name buffer ptr */
|
||||
__u32 target_name_len; /* in/out: target_name buffer len */
|
||||
|
||||
/* If the iter specific field is 32 bits, it can be put
|
||||
* in the first or second union. Otherwise it should be
|
||||
* put in the second union.
|
||||
*/
|
||||
union {
|
||||
struct {
|
||||
__u32 map_id;
|
||||
} map;
|
||||
};
|
||||
union {
|
||||
struct {
|
||||
__u64 cgroup_id;
|
||||
__u32 order;
|
||||
} cgroup;
|
||||
};
|
||||
} iter;
|
||||
struct {
|
||||
__u32 netns_ino;
|
||||
|
||||
@@ -9,7 +9,7 @@ else
|
||||
endif
|
||||
|
||||
LIBBPF_MAJOR_VERSION := 1
|
||||
LIBBPF_MINOR_VERSION := 0
|
||||
LIBBPF_MINOR_VERSION := 1
|
||||
LIBBPF_PATCH_VERSION := 0
|
||||
LIBBPF_VERSION := $(LIBBPF_MAJOR_VERSION).$(LIBBPF_MINOR_VERSION).$(LIBBPF_PATCH_VERSION)
|
||||
LIBBPF_MAJMIN_VERSION := $(LIBBPF_MAJOR_VERSION).$(LIBBPF_MINOR_VERSION).0
|
||||
|
||||
@@ -3426,7 +3426,7 @@ static long (*bpf_load_hdr_opt)(struct bpf_sock_ops *skops, void *searchby_res,
|
||||
*
|
||||
* **-EEXIST** if the option already exists.
|
||||
*
|
||||
* **-EFAULT** on failrue to parse the existing header options.
|
||||
* **-EFAULT** on failure to parse the existing header options.
|
||||
*
|
||||
* **-EPERM** if the helper cannot be used under the current
|
||||
* *skops*\ **->op**.
|
||||
@@ -3686,7 +3686,7 @@ static long (*bpf_redirect_peer)(__u32 ifindex, __u64 flags) = (void *) 155;
|
||||
* a *map* with *task* as the **key**. From this
|
||||
* perspective, the usage is not much different from
|
||||
* **bpf_map_lookup_elem**\ (*map*, **&**\ *task*) except this
|
||||
* helper enforces the key must be an task_struct and the map must also
|
||||
* helper enforces the key must be a task_struct and the map must also
|
||||
* be a **BPF_MAP_TYPE_TASK_STORAGE**.
|
||||
*
|
||||
* Underneath, the value is stored locally at *task* instead of
|
||||
@@ -3764,7 +3764,7 @@ static __u64 (*bpf_ktime_get_coarse_ns)(void) = (void *) 160;
|
||||
/*
|
||||
* bpf_ima_inode_hash
|
||||
*
|
||||
* Returns the stored IMA hash of the *inode* (if it's avaialable).
|
||||
* Returns the stored IMA hash of the *inode* (if it's available).
|
||||
* If the hash is larger than *size*, then only *size*
|
||||
* bytes will be copied to *dst*
|
||||
*
|
||||
@@ -3796,12 +3796,12 @@ static struct socket *(*bpf_sock_from_file)(struct file *file) = (void *) 162;
|
||||
*
|
||||
* The argument *len_diff* can be used for querying with a planned
|
||||
* size change. This allows to check MTU prior to changing packet
|
||||
* ctx. Providing an *len_diff* adjustment that is larger than the
|
||||
* ctx. Providing a *len_diff* adjustment that is larger than the
|
||||
* actual packet size (resulting in negative packet size) will in
|
||||
* principle not exceed the MTU, why it is not considered a
|
||||
* failure. Other BPF-helpers are needed for performing the
|
||||
* planned size change, why the responsability for catch a negative
|
||||
* packet size belong in those helpers.
|
||||
* principle not exceed the MTU, which is why it is not considered
|
||||
* a failure. Other BPF helpers are needed for performing the
|
||||
* planned size change; therefore the responsibility for catching
|
||||
* a negative packet size belongs in those helpers.
|
||||
*
|
||||
* Specifying *ifindex* zero means the MTU check is performed
|
||||
* against the current net device. This is practical if this isn't
|
||||
@@ -4040,6 +4040,7 @@ static long (*bpf_timer_cancel)(struct bpf_timer *timer) = (void *) 172;
|
||||
*
|
||||
* Returns
|
||||
* Address of the traced function.
|
||||
* 0 for kprobes placed within the function (not at the entry).
|
||||
*/
|
||||
static __u64 (*bpf_get_func_ip)(void *ctx) = (void *) 173;
|
||||
|
||||
@@ -4208,13 +4209,13 @@ static long (*bpf_strncmp)(const char *s1, __u32 s1_sz, const char *s2) = (void
|
||||
/*
|
||||
* bpf_get_func_arg
|
||||
*
|
||||
* Get **n**-th argument (zero based) of the traced function (for tracing programs)
|
||||
* Get **n**-th argument register (zero based) of the traced function (for tracing programs)
|
||||
* returned in **value**.
|
||||
*
|
||||
*
|
||||
* Returns
|
||||
* 0 on success.
|
||||
* **-EINVAL** if n >= arguments count of traced function.
|
||||
* **-EINVAL** if n >= argument register count of traced function.
|
||||
*/
|
||||
static long (*bpf_get_func_arg)(void *ctx, __u32 n, __u64 *value) = (void *) 183;
|
||||
|
||||
@@ -4234,32 +4235,45 @@ static long (*bpf_get_func_ret)(void *ctx, __u64 *value) = (void *) 184;
|
||||
/*
|
||||
* bpf_get_func_arg_cnt
|
||||
*
|
||||
* Get number of arguments of the traced function (for tracing programs).
|
||||
* Get number of registers of the traced function (for tracing programs) where
|
||||
* function arguments are stored in these registers.
|
||||
*
|
||||
*
|
||||
* Returns
|
||||
* The number of arguments of the traced function.
|
||||
* The number of argument registers of the traced function.
|
||||
*/
|
||||
static long (*bpf_get_func_arg_cnt)(void *ctx) = (void *) 185;
|
||||
|
||||
/*
|
||||
* bpf_get_retval
|
||||
*
|
||||
* Get the syscall's return value that will be returned to userspace.
|
||||
* Get the BPF program's return value that will be returned to the upper layers.
|
||||
*
|
||||
* This helper is currently supported by cgroup programs only.
|
||||
* This helper is currently supported by cgroup programs and only by the hooks
|
||||
* where BPF program's return value is returned to the userspace via errno.
|
||||
*
|
||||
* Returns
|
||||
* The syscall's return value.
|
||||
* The BPF program's return value.
|
||||
*/
|
||||
static int (*bpf_get_retval)(void) = (void *) 186;
|
||||
|
||||
/*
|
||||
* bpf_set_retval
|
||||
*
|
||||
* Set the syscall's return value that will be returned to userspace.
|
||||
* Set the BPF program's return value that will be returned to the upper layers.
|
||||
*
|
||||
* This helper is currently supported by cgroup programs and only by the hooks
|
||||
* where BPF program's return value is returned to the userspace via errno.
|
||||
*
|
||||
* Note that there is the following corner case where the program exports an error
|
||||
* via bpf_set_retval but signals success via 'return 1':
|
||||
*
|
||||
* bpf_set_retval(-EPERM);
|
||||
* return 1;
|
||||
*
|
||||
* In this case, the BPF program's return value will use helper's -EPERM. This
|
||||
* still holds true for cgroup/bind{4,6} which supports extra 'return 3' success case.
|
||||
*
|
||||
* This helper is currently supported by cgroup programs only.
|
||||
*
|
||||
* Returns
|
||||
* 0 on success, or a negative error in case of failure.
|
||||
@@ -4606,4 +4620,46 @@ static long (*bpf_tcp_raw_check_syncookie_ipv6)(struct ipv6hdr *iph, struct tcph
|
||||
*/
|
||||
static __u64 (*bpf_ktime_get_tai_ns)(void) = (void *) 208;
|
||||
|
||||
/*
|
||||
* bpf_user_ringbuf_drain
|
||||
*
|
||||
* Drain samples from the specified user ring buffer, and invoke
|
||||
* the provided callback for each such sample:
|
||||
*
|
||||
* long (\*callback_fn)(struct bpf_dynptr \*dynptr, void \*ctx);
|
||||
*
|
||||
* If **callback_fn** returns 0, the helper will continue to try
|
||||
* and drain the next sample, up to a maximum of
|
||||
* BPF_MAX_USER_RINGBUF_SAMPLES samples. If the return value is 1,
|
||||
* the helper will skip the rest of the samples and return. Other
|
||||
* return values are not used now, and will be rejected by the
|
||||
* verifier.
|
||||
*
|
||||
* Returns
|
||||
* The number of drained samples if no error was encountered while
|
||||
* draining samples, or 0 if no samples were present in the ring
|
||||
* buffer. If a user-space producer was epoll-waiting on this map,
|
||||
* and at least one sample was drained, they will receive an event
|
||||
* notification notifying them of available space in the ring
|
||||
* buffer. If the BPF_RB_NO_WAKEUP flag is passed to this
|
||||
* function, no wakeup notification will be sent. If the
|
||||
* BPF_RB_FORCE_WAKEUP flag is passed, a wakeup notification will
|
||||
* be sent even if no sample was drained.
|
||||
*
|
||||
* On failure, the returned value is one of the following:
|
||||
*
|
||||
* **-EBUSY** if the ring buffer is contended, and another calling
|
||||
* context was concurrently draining the ring buffer.
|
||||
*
|
||||
* **-EINVAL** if user-space is not properly tracking the ring
|
||||
* buffer due to the producer position not being aligned to 8
|
||||
* bytes, a sample not being aligned to 8 bytes, or the producer
|
||||
* position not matching the advertised length of a sample.
|
||||
*
|
||||
* **-E2BIG** if user-space has tried to publish a sample which is
|
||||
* larger than the size of the ring buffer, or which cannot fit
|
||||
* within a struct bpf_dynptr.
|
||||
*/
|
||||
static long (*bpf_user_ringbuf_drain)(void *map, void *callback_fn, void *ctx, __u64 flags) = (void *) 209;
|
||||
|
||||
|
||||
|
||||
@@ -160,18 +160,6 @@ bpf_tail_call_static(void *ctx, const void *map, const __u32 slot)
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Helper structure used by eBPF C program
|
||||
* to describe BPF map attributes to libbpf loader
|
||||
*/
|
||||
struct bpf_map_def {
|
||||
unsigned int type;
|
||||
unsigned int key_size;
|
||||
unsigned int value_size;
|
||||
unsigned int max_entries;
|
||||
unsigned int map_flags;
|
||||
} __attribute__((deprecated("use BTF-defined maps in .maps section")));
|
||||
|
||||
enum libbpf_pin_type {
|
||||
LIBBPF_PIN_NONE,
|
||||
/* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */
|
||||
|
||||
@@ -438,6 +438,113 @@ typeof(name(0)) name(unsigned long long *ctx) \
|
||||
static __always_inline typeof(name(0)) \
|
||||
____##name(unsigned long long *ctx, ##args)
|
||||
|
||||
#ifndef ___bpf_nth2
|
||||
#define ___bpf_nth2(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, \
|
||||
_14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, N, ...) N
|
||||
#endif
|
||||
#ifndef ___bpf_narg2
|
||||
#define ___bpf_narg2(...) \
|
||||
___bpf_nth2(_, ##__VA_ARGS__, 12, 12, 11, 11, 10, 10, 9, 9, 8, 8, 7, 7, \
|
||||
6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0)
|
||||
#endif
|
||||
|
||||
#define ___bpf_treg_cnt(t) \
|
||||
__builtin_choose_expr(sizeof(t) == 1, 1, \
|
||||
__builtin_choose_expr(sizeof(t) == 2, 1, \
|
||||
__builtin_choose_expr(sizeof(t) == 4, 1, \
|
||||
__builtin_choose_expr(sizeof(t) == 8, 1, \
|
||||
__builtin_choose_expr(sizeof(t) == 16, 2, \
|
||||
(void)0)))))
|
||||
|
||||
#define ___bpf_reg_cnt0() (0)
|
||||
#define ___bpf_reg_cnt1(t, x) (___bpf_reg_cnt0() + ___bpf_treg_cnt(t))
|
||||
#define ___bpf_reg_cnt2(t, x, args...) (___bpf_reg_cnt1(args) + ___bpf_treg_cnt(t))
|
||||
#define ___bpf_reg_cnt3(t, x, args...) (___bpf_reg_cnt2(args) + ___bpf_treg_cnt(t))
|
||||
#define ___bpf_reg_cnt4(t, x, args...) (___bpf_reg_cnt3(args) + ___bpf_treg_cnt(t))
|
||||
#define ___bpf_reg_cnt5(t, x, args...) (___bpf_reg_cnt4(args) + ___bpf_treg_cnt(t))
|
||||
#define ___bpf_reg_cnt6(t, x, args...) (___bpf_reg_cnt5(args) + ___bpf_treg_cnt(t))
|
||||
#define ___bpf_reg_cnt7(t, x, args...) (___bpf_reg_cnt6(args) + ___bpf_treg_cnt(t))
|
||||
#define ___bpf_reg_cnt8(t, x, args...) (___bpf_reg_cnt7(args) + ___bpf_treg_cnt(t))
|
||||
#define ___bpf_reg_cnt9(t, x, args...) (___bpf_reg_cnt8(args) + ___bpf_treg_cnt(t))
|
||||
#define ___bpf_reg_cnt10(t, x, args...) (___bpf_reg_cnt9(args) + ___bpf_treg_cnt(t))
|
||||
#define ___bpf_reg_cnt11(t, x, args...) (___bpf_reg_cnt10(args) + ___bpf_treg_cnt(t))
|
||||
#define ___bpf_reg_cnt12(t, x, args...) (___bpf_reg_cnt11(args) + ___bpf_treg_cnt(t))
|
||||
#define ___bpf_reg_cnt(args...) ___bpf_apply(___bpf_reg_cnt, ___bpf_narg2(args))(args)
|
||||
|
||||
#define ___bpf_union_arg(t, x, n) \
|
||||
__builtin_choose_expr(sizeof(t) == 1, ({ union { __u8 z[1]; t x; } ___t = { .z = {ctx[n]}}; ___t.x; }), \
|
||||
__builtin_choose_expr(sizeof(t) == 2, ({ union { __u16 z[1]; t x; } ___t = { .z = {ctx[n]} }; ___t.x; }), \
|
||||
__builtin_choose_expr(sizeof(t) == 4, ({ union { __u32 z[1]; t x; } ___t = { .z = {ctx[n]} }; ___t.x; }), \
|
||||
__builtin_choose_expr(sizeof(t) == 8, ({ union { __u64 z[1]; t x; } ___t = {.z = {ctx[n]} }; ___t.x; }), \
|
||||
__builtin_choose_expr(sizeof(t) == 16, ({ union { __u64 z[2]; t x; } ___t = {.z = {ctx[n], ctx[n + 1]} }; ___t.x; }), \
|
||||
(void)0)))))
|
||||
|
||||
#define ___bpf_ctx_arg0(n, args...)
|
||||
#define ___bpf_ctx_arg1(n, t, x) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt1(t, x))
|
||||
#define ___bpf_ctx_arg2(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt2(t, x, args)) ___bpf_ctx_arg1(n, args)
|
||||
#define ___bpf_ctx_arg3(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt3(t, x, args)) ___bpf_ctx_arg2(n, args)
|
||||
#define ___bpf_ctx_arg4(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt4(t, x, args)) ___bpf_ctx_arg3(n, args)
|
||||
#define ___bpf_ctx_arg5(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt5(t, x, args)) ___bpf_ctx_arg4(n, args)
|
||||
#define ___bpf_ctx_arg6(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt6(t, x, args)) ___bpf_ctx_arg5(n, args)
|
||||
#define ___bpf_ctx_arg7(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt7(t, x, args)) ___bpf_ctx_arg6(n, args)
|
||||
#define ___bpf_ctx_arg8(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt8(t, x, args)) ___bpf_ctx_arg7(n, args)
|
||||
#define ___bpf_ctx_arg9(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt9(t, x, args)) ___bpf_ctx_arg8(n, args)
|
||||
#define ___bpf_ctx_arg10(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt10(t, x, args)) ___bpf_ctx_arg9(n, args)
|
||||
#define ___bpf_ctx_arg11(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt11(t, x, args)) ___bpf_ctx_arg10(n, args)
|
||||
#define ___bpf_ctx_arg12(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt12(t, x, args)) ___bpf_ctx_arg11(n, args)
|
||||
#define ___bpf_ctx_arg(args...) ___bpf_apply(___bpf_ctx_arg, ___bpf_narg2(args))(___bpf_reg_cnt(args), args)
|
||||
|
||||
#define ___bpf_ctx_decl0()
|
||||
#define ___bpf_ctx_decl1(t, x) , t x
|
||||
#define ___bpf_ctx_decl2(t, x, args...) , t x ___bpf_ctx_decl1(args)
|
||||
#define ___bpf_ctx_decl3(t, x, args...) , t x ___bpf_ctx_decl2(args)
|
||||
#define ___bpf_ctx_decl4(t, x, args...) , t x ___bpf_ctx_decl3(args)
|
||||
#define ___bpf_ctx_decl5(t, x, args...) , t x ___bpf_ctx_decl4(args)
|
||||
#define ___bpf_ctx_decl6(t, x, args...) , t x ___bpf_ctx_decl5(args)
|
||||
#define ___bpf_ctx_decl7(t, x, args...) , t x ___bpf_ctx_decl6(args)
|
||||
#define ___bpf_ctx_decl8(t, x, args...) , t x ___bpf_ctx_decl7(args)
|
||||
#define ___bpf_ctx_decl9(t, x, args...) , t x ___bpf_ctx_decl8(args)
|
||||
#define ___bpf_ctx_decl10(t, x, args...) , t x ___bpf_ctx_decl9(args)
|
||||
#define ___bpf_ctx_decl11(t, x, args...) , t x ___bpf_ctx_decl10(args)
|
||||
#define ___bpf_ctx_decl12(t, x, args...) , t x ___bpf_ctx_decl11(args)
|
||||
#define ___bpf_ctx_decl(args...) ___bpf_apply(___bpf_ctx_decl, ___bpf_narg2(args))(args)
|
||||
|
||||
/*
|
||||
* BPF_PROG2 is an enhanced version of BPF_PROG in order to handle struct
|
||||
* arguments. Since each struct argument might take one or two u64 values
|
||||
* in the trampoline stack, argument type size is needed to place proper number
|
||||
* of u64 values for each argument. Therefore, BPF_PROG2 has different
|
||||
* syntax from BPF_PROG. For example, for the following BPF_PROG syntax:
|
||||
*
|
||||
* int BPF_PROG(test2, int a, int b) { ... }
|
||||
*
|
||||
* the corresponding BPF_PROG2 syntax is:
|
||||
*
|
||||
* int BPF_PROG2(test2, int, a, int, b) { ... }
|
||||
*
|
||||
* where type and the corresponding argument name are separated by comma.
|
||||
*
|
||||
* Use BPF_PROG2 macro if one of the arguments might be a struct/union larger
|
||||
* than 8 bytes:
|
||||
*
|
||||
* int BPF_PROG2(test_struct_arg, struct bpf_testmod_struct_arg_1, a, int, b,
|
||||
* int, c, int, d, struct bpf_testmod_struct_arg_2, e, int, ret)
|
||||
* {
|
||||
* // access a, b, c, d, e, and ret directly
|
||||
* ...
|
||||
* }
|
||||
*/
|
||||
#define BPF_PROG2(name, args...) \
|
||||
name(unsigned long long *ctx); \
|
||||
static __always_inline typeof(name(0)) \
|
||||
____##name(unsigned long long *ctx ___bpf_ctx_decl(args)); \
|
||||
typeof(name(0)) name(unsigned long long *ctx) \
|
||||
{ \
|
||||
return ____##name(ctx ___bpf_ctx_arg(args)); \
|
||||
} \
|
||||
static __always_inline typeof(name(0)) \
|
||||
____##name(unsigned long long *ctx ___bpf_ctx_decl(args))
|
||||
|
||||
struct pt_regs;
|
||||
|
||||
#define ___bpf_kprobe_args0() ctx
|
||||
|
||||
32
src/btf.c
@@ -4642,20 +4642,17 @@ static int btf_dedup_remap_types(struct btf_dedup *d)
|
||||
*/
|
||||
struct btf *btf__load_vmlinux_btf(void)
|
||||
{
|
||||
struct {
|
||||
const char *path_fmt;
|
||||
bool raw_btf;
|
||||
} locations[] = {
|
||||
const char *locations[] = {
|
||||
/* try canonical vmlinux BTF through sysfs first */
|
||||
{ "/sys/kernel/btf/vmlinux", true /* raw BTF */ },
|
||||
/* fall back to trying to find vmlinux ELF on disk otherwise */
|
||||
{ "/boot/vmlinux-%1$s" },
|
||||
{ "/lib/modules/%1$s/vmlinux-%1$s" },
|
||||
{ "/lib/modules/%1$s/build/vmlinux" },
|
||||
{ "/usr/lib/modules/%1$s/kernel/vmlinux" },
|
||||
{ "/usr/lib/debug/boot/vmlinux-%1$s" },
|
||||
{ "/usr/lib/debug/boot/vmlinux-%1$s.debug" },
|
||||
{ "/usr/lib/debug/lib/modules/%1$s/vmlinux" },
|
||||
"/sys/kernel/btf/vmlinux",
|
||||
/* fall back to trying to find vmlinux on disk otherwise */
|
||||
"/boot/vmlinux-%1$s",
|
||||
"/lib/modules/%1$s/vmlinux-%1$s",
|
||||
"/lib/modules/%1$s/build/vmlinux",
|
||||
"/usr/lib/modules/%1$s/kernel/vmlinux",
|
||||
"/usr/lib/debug/boot/vmlinux-%1$s",
|
||||
"/usr/lib/debug/boot/vmlinux-%1$s.debug",
|
||||
"/usr/lib/debug/lib/modules/%1$s/vmlinux",
|
||||
};
|
||||
char path[PATH_MAX + 1];
|
||||
struct utsname buf;
|
||||
@@ -4665,15 +4662,12 @@ struct btf *btf__load_vmlinux_btf(void)
|
||||
uname(&buf);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(locations); i++) {
|
||||
snprintf(path, PATH_MAX, locations[i].path_fmt, buf.release);
|
||||
snprintf(path, PATH_MAX, locations[i], buf.release);
|
||||
|
||||
if (access(path, R_OK))
|
||||
if (faccessat(AT_FDCWD, path, R_OK, AT_EACCESS))
|
||||
continue;
|
||||
|
||||
if (locations[i].raw_btf)
|
||||
btf = btf__parse_raw(path);
|
||||
else
|
||||
btf = btf__parse_elf(path, NULL);
|
||||
btf = btf__parse(path, NULL);
|
||||
err = libbpf_get_error(btf);
|
||||
pr_debug("loading kernel BTF '%s': %d\n", path, err);
|
||||
if (err)
|
||||
|
||||
25
src/btf.h
@@ -486,6 +486,8 @@ static inline struct btf_enum *btf_enum(const struct btf_type *t)
|
||||
return (struct btf_enum *)(t + 1);
|
||||
}
|
||||
|
||||
struct btf_enum64;
|
||||
|
||||
static inline struct btf_enum64 *btf_enum64(const struct btf_type *t)
|
||||
{
|
||||
return (struct btf_enum64 *)(t + 1);
|
||||
@@ -493,7 +495,28 @@ static inline struct btf_enum64 *btf_enum64(const struct btf_type *t)
|
||||
|
||||
static inline __u64 btf_enum64_value(const struct btf_enum64 *e)
|
||||
{
|
||||
return ((__u64)e->val_hi32 << 32) | e->val_lo32;
|
||||
/* struct btf_enum64 is introduced in Linux 6.0, which is very
|
||||
* bleeding-edge. Here we are avoiding relying on struct btf_enum64
|
||||
* definition coming from kernel UAPI headers to support wider range
|
||||
* of system-wide kernel headers.
|
||||
*
|
||||
* Given this header can be also included from C++ applications, that
|
||||
* further restricts C tricks we can use (like using compatible
|
||||
* anonymous struct). So just treat struct btf_enum64 as
|
||||
* a three-element array of u32 and access second (lo32) and third
|
||||
* (hi32) elements directly.
|
||||
*
|
||||
* For reference, here is a struct btf_enum64 definition:
|
||||
*
|
||||
* const struct btf_enum64 {
|
||||
* __u32 name_off;
|
||||
* __u32 val_lo32;
|
||||
* __u32 val_hi32;
|
||||
* };
|
||||
*/
|
||||
const __u32 *e64 = (const __u32 *)e;
|
||||
|
||||
return ((__u64)e64[2] << 32) | e64[1];
|
||||
}
|
||||
|
||||
static inline struct btf_member *btf_members(const struct btf_type *t)
|
||||
|
||||
@@ -2385,7 +2385,7 @@ int btf_dump__dump_type_data(struct btf_dump *d, __u32 id,
|
||||
d->typed_dump->indent_lvl = OPTS_GET(opts, indent_level, 0);
|
||||
|
||||
/* default indent string is a tab */
|
||||
if (!opts->indent_str)
|
||||
if (!OPTS_GET(opts, indent_str, NULL))
|
||||
d->typed_dump->indent_str[0] = '\t';
|
||||
else
|
||||
libbpf_strlcpy(d->typed_dump->indent_str, opts->indent_str,
|
||||
|
||||
106
src/libbpf.c
@@ -163,6 +163,7 @@ static const char * const map_type_name[] = {
|
||||
[BPF_MAP_TYPE_INODE_STORAGE] = "inode_storage",
|
||||
[BPF_MAP_TYPE_TASK_STORAGE] = "task_storage",
|
||||
[BPF_MAP_TYPE_BLOOM_FILTER] = "bloom_filter",
|
||||
[BPF_MAP_TYPE_USER_RINGBUF] = "user_ringbuf",
|
||||
};
|
||||
|
||||
static const char * const prog_type_name[] = {
|
||||
@@ -883,7 +884,7 @@ __u32 get_kernel_version(void)
|
||||
__u32 major, minor, patch;
|
||||
struct utsname info;
|
||||
|
||||
if (access(ubuntu_kver_file, R_OK) == 0) {
|
||||
if (faccessat(AT_FDCWD, ubuntu_kver_file, R_OK, AT_EACCESS) == 0) {
|
||||
FILE *f;
|
||||
|
||||
f = fopen(ubuntu_kver_file, "r");
|
||||
@@ -2096,19 +2097,30 @@ static bool get_map_field_int(const char *map_name, const struct btf *btf,
|
||||
return true;
|
||||
}
|
||||
|
||||
static int pathname_concat(char *buf, size_t buf_sz, const char *path, const char *name)
|
||||
{
|
||||
int len;
|
||||
|
||||
len = snprintf(buf, buf_sz, "%s/%s", path, name);
|
||||
if (len < 0)
|
||||
return -EINVAL;
|
||||
if (len >= buf_sz)
|
||||
return -ENAMETOOLONG;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int build_map_pin_path(struct bpf_map *map, const char *path)
|
||||
{
|
||||
char buf[PATH_MAX];
|
||||
int len;
|
||||
int err;
|
||||
|
||||
if (!path)
|
||||
path = "/sys/fs/bpf";
|
||||
|
||||
len = snprintf(buf, PATH_MAX, "%s/%s", path, bpf_map__name(map));
|
||||
if (len < 0)
|
||||
return -EINVAL;
|
||||
else if (len >= PATH_MAX)
|
||||
return -ENAMETOOLONG;
|
||||
err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map));
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
return bpf_map__set_pin_path(map, buf);
|
||||
}
|
||||
@@ -2372,6 +2384,12 @@ static size_t adjust_ringbuf_sz(size_t sz)
|
||||
return sz;
|
||||
}
|
||||
|
||||
static bool map_is_ringbuf(const struct bpf_map *map)
|
||||
{
|
||||
return map->def.type == BPF_MAP_TYPE_RINGBUF ||
|
||||
map->def.type == BPF_MAP_TYPE_USER_RINGBUF;
|
||||
}
|
||||
|
||||
static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def)
|
||||
{
|
||||
map->def.type = def->map_type;
|
||||
@@ -2386,7 +2404,7 @@ static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def
|
||||
map->btf_value_type_id = def->value_type_id;
|
||||
|
||||
/* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */
|
||||
if (map->def.type == BPF_MAP_TYPE_RINGBUF)
|
||||
if (map_is_ringbuf(map))
|
||||
map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries);
|
||||
|
||||
if (def->parts & MAP_DEF_MAP_TYPE)
|
||||
@@ -4369,7 +4387,7 @@ int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
|
||||
map->def.max_entries = max_entries;
|
||||
|
||||
/* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */
|
||||
if (map->def.type == BPF_MAP_TYPE_RINGBUF)
|
||||
if (map_is_ringbuf(map))
|
||||
map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries);
|
||||
|
||||
return 0;
|
||||
@@ -7961,17 +7979,9 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
|
||||
continue;
|
||||
|
||||
if (path) {
|
||||
int len;
|
||||
|
||||
len = snprintf(buf, PATH_MAX, "%s/%s", path,
|
||||
bpf_map__name(map));
|
||||
if (len < 0) {
|
||||
err = -EINVAL;
|
||||
err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map));
|
||||
if (err)
|
||||
goto err_unpin_maps;
|
||||
} else if (len >= PATH_MAX) {
|
||||
err = -ENAMETOOLONG;
|
||||
goto err_unpin_maps;
|
||||
}
|
||||
sanitize_pin_path(buf);
|
||||
pin_path = buf;
|
||||
} else if (!map->pin_path) {
|
||||
@@ -8009,14 +8019,9 @@ int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
|
||||
char buf[PATH_MAX];
|
||||
|
||||
if (path) {
|
||||
int len;
|
||||
|
||||
len = snprintf(buf, PATH_MAX, "%s/%s", path,
|
||||
bpf_map__name(map));
|
||||
if (len < 0)
|
||||
return libbpf_err(-EINVAL);
|
||||
else if (len >= PATH_MAX)
|
||||
return libbpf_err(-ENAMETOOLONG);
|
||||
err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map));
|
||||
if (err)
|
||||
return libbpf_err(err);
|
||||
sanitize_pin_path(buf);
|
||||
pin_path = buf;
|
||||
} else if (!map->pin_path) {
|
||||
@@ -8034,6 +8039,7 @@ int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
|
||||
int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
|
||||
{
|
||||
struct bpf_program *prog;
|
||||
char buf[PATH_MAX];
|
||||
int err;
|
||||
|
||||
if (!obj)
|
||||
@@ -8045,17 +8051,9 @@ int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
|
||||
}
|
||||
|
||||
bpf_object__for_each_program(prog, obj) {
|
||||
char buf[PATH_MAX];
|
||||
int len;
|
||||
|
||||
len = snprintf(buf, PATH_MAX, "%s/%s", path, prog->name);
|
||||
if (len < 0) {
|
||||
err = -EINVAL;
|
||||
err = pathname_concat(buf, sizeof(buf), path, prog->name);
|
||||
if (err)
|
||||
goto err_unpin_programs;
|
||||
} else if (len >= PATH_MAX) {
|
||||
err = -ENAMETOOLONG;
|
||||
goto err_unpin_programs;
|
||||
}
|
||||
|
||||
err = bpf_program__pin(prog, buf);
|
||||
if (err)
|
||||
@@ -8066,13 +8064,7 @@ int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
|
||||
|
||||
err_unpin_programs:
|
||||
while ((prog = bpf_object__prev_program(obj, prog))) {
|
||||
char buf[PATH_MAX];
|
||||
int len;
|
||||
|
||||
len = snprintf(buf, PATH_MAX, "%s/%s", path, prog->name);
|
||||
if (len < 0)
|
||||
continue;
|
||||
else if (len >= PATH_MAX)
|
||||
if (pathname_concat(buf, sizeof(buf), path, prog->name))
|
||||
continue;
|
||||
|
||||
bpf_program__unpin(prog, buf);
|
||||
@@ -8091,13 +8083,10 @@ int bpf_object__unpin_programs(struct bpf_object *obj, const char *path)
|
||||
|
||||
bpf_object__for_each_program(prog, obj) {
|
||||
char buf[PATH_MAX];
|
||||
int len;
|
||||
|
||||
len = snprintf(buf, PATH_MAX, "%s/%s", path, prog->name);
|
||||
if (len < 0)
|
||||
return libbpf_err(-EINVAL);
|
||||
else if (len >= PATH_MAX)
|
||||
return libbpf_err(-ENAMETOOLONG);
|
||||
err = pathname_concat(buf, sizeof(buf), path, prog->name);
|
||||
if (err)
|
||||
return libbpf_err(err);
|
||||
|
||||
err = bpf_program__unpin(prog, buf);
|
||||
if (err)
|
||||
@@ -9084,11 +9073,15 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attac
|
||||
int err = 0;
|
||||
|
||||
/* BPF program's BTF ID */
|
||||
if (attach_prog_fd) {
|
||||
if (prog->type == BPF_PROG_TYPE_EXT || attach_prog_fd) {
|
||||
if (!attach_prog_fd) {
|
||||
pr_warn("prog '%s': attach program FD is not set\n", prog->name);
|
||||
return -EINVAL;
|
||||
}
|
||||
err = libbpf_find_prog_btf_id(attach_name, attach_prog_fd);
|
||||
if (err < 0) {
|
||||
pr_warn("failed to find BPF program (FD %d) BTF ID for '%s': %d\n",
|
||||
attach_prog_fd, attach_name, err);
|
||||
pr_warn("prog '%s': failed to find BPF program (FD %d) BTF ID for '%s': %d\n",
|
||||
prog->name, attach_prog_fd, attach_name, err);
|
||||
return err;
|
||||
}
|
||||
*btf_obj_fd = 0;
|
||||
@@ -9105,7 +9098,8 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attac
|
||||
err = find_kernel_btf_id(prog->obj, attach_name, attach_type, btf_obj_fd, btf_type_id);
|
||||
}
|
||||
if (err) {
|
||||
pr_warn("failed to find kernel BTF type ID of '%s': %d\n", attach_name, err);
|
||||
pr_warn("prog '%s': failed to find kernel BTF type ID of '%s': %d\n",
|
||||
prog->name, attach_name, err);
|
||||
return err;
|
||||
}
|
||||
return 0;
|
||||
@@ -9910,7 +9904,7 @@ static bool use_debugfs(void)
|
||||
static int has_debugfs = -1;
|
||||
|
||||
if (has_debugfs < 0)
|
||||
has_debugfs = access(DEBUGFS, F_OK) == 0;
|
||||
has_debugfs = faccessat(AT_FDCWD, DEBUGFS, F_OK, AT_EACCESS) == 0;
|
||||
|
||||
return has_debugfs == 1;
|
||||
}
|
||||
@@ -10727,7 +10721,7 @@ static int resolve_full_path(const char *file, char *result, size_t result_sz)
|
||||
continue;
|
||||
snprintf(result, result_sz, "%.*s/%s", seg_len, s, file);
|
||||
/* ensure it has required permissions */
|
||||
if (access(result, perm) < 0)
|
||||
if (faccessat(AT_FDCWD, result, perm, AT_EACCESS) < 0)
|
||||
continue;
|
||||
pr_debug("resolved '%s' to '%s'\n", file, result);
|
||||
return 0;
|
||||
|
||||
111
src/libbpf.h
@@ -118,7 +118,9 @@ struct bpf_object_open_opts {
|
||||
* auto-pinned to that path on load; defaults to "/sys/fs/bpf".
|
||||
*/
|
||||
const char *pin_root_path;
|
||||
long :0;
|
||||
|
||||
__u32 :32; /* stub out now removed attach_prog_fd */
|
||||
|
||||
/* Additional kernel config content that augments and overrides
|
||||
* system Kconfig for CONFIG_xxx externs.
|
||||
*/
|
||||
@@ -1011,6 +1013,7 @@ LIBBPF_API int bpf_tc_query(const struct bpf_tc_hook *hook,
|
||||
|
||||
/* Ring buffer APIs */
|
||||
struct ring_buffer;
|
||||
struct user_ring_buffer;
|
||||
|
||||
typedef int (*ring_buffer_sample_fn)(void *ctx, void *data, size_t size);
|
||||
|
||||
@@ -1030,6 +1033,112 @@ LIBBPF_API int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms);
|
||||
LIBBPF_API int ring_buffer__consume(struct ring_buffer *rb);
|
||||
LIBBPF_API int ring_buffer__epoll_fd(const struct ring_buffer *rb);
|
||||
|
||||
struct user_ring_buffer_opts {
|
||||
size_t sz; /* size of this struct, for forward/backward compatibility */
|
||||
};
|
||||
|
||||
#define user_ring_buffer_opts__last_field sz
|
||||
|
||||
/* @brief **user_ring_buffer__new()** creates a new instance of a user ring
|
||||
* buffer.
|
||||
*
|
||||
* @param map_fd A file descriptor to a BPF_MAP_TYPE_USER_RINGBUF map.
|
||||
* @param opts Options for how the ring buffer should be created.
|
||||
* @return A user ring buffer on success; NULL and errno being set on a
|
||||
* failure.
|
||||
*/
|
||||
LIBBPF_API struct user_ring_buffer *
|
||||
user_ring_buffer__new(int map_fd, const struct user_ring_buffer_opts *opts);
|
||||
|
||||
/* @brief **user_ring_buffer__reserve()** reserves a pointer to a sample in the
|
||||
* user ring buffer.
|
||||
* @param rb A pointer to a user ring buffer.
|
||||
* @param size The size of the sample, in bytes.
|
||||
* @return A pointer to an 8-byte aligned reserved region of the user ring
|
||||
* buffer; NULL, and errno being set if a sample could not be reserved.
|
||||
*
|
||||
* This function is *not* thread safe, and callers must synchronize accessing
|
||||
* this function if there are multiple producers. If a size is requested that
|
||||
* is larger than the size of the entire ring buffer, errno will be set to
|
||||
* E2BIG and NULL is returned. If the ring buffer could accommodate the size,
|
||||
* but currently does not have enough space, errno is set to ENOSPC and NULL is
|
||||
* returned.
|
||||
*
|
||||
* After initializing the sample, callers must invoke
|
||||
* **user_ring_buffer__submit()** to post the sample to the kernel. Otherwise,
|
||||
* the sample must be freed with **user_ring_buffer__discard()**.
|
||||
*/
|
||||
LIBBPF_API void *user_ring_buffer__reserve(struct user_ring_buffer *rb, __u32 size);
|
||||
|
||||
/* @brief **user_ring_buffer__reserve_blocking()** reserves a record in the
|
||||
* ring buffer, possibly blocking for up to @timeout_ms until a sample becomes
|
||||
* available.
|
||||
* @param rb The user ring buffer.
|
||||
* @param size The size of the sample, in bytes.
|
||||
* @param timeout_ms The amount of time, in milliseconds, for which the caller
|
||||
* should block when waiting for a sample. -1 causes the caller to block
|
||||
* indefinitely.
|
||||
* @return A pointer to an 8-byte aligned reserved region of the user ring
|
||||
* buffer; NULL, and errno being set if a sample could not be reserved.
|
||||
*
|
||||
* This function is *not* thread safe, and callers must synchronize
|
||||
* accessing this function if there are multiple producers
|
||||
*
|
||||
* If **timeout_ms** is -1, the function will block indefinitely until a sample
|
||||
* becomes available. Otherwise, **timeout_ms** must be non-negative, or errno
|
||||
* is set to EINVAL, and NULL is returned. If **timeout_ms** is 0, no blocking
|
||||
* will occur and the function will return immediately after attempting to
|
||||
* reserve a sample.
|
||||
*
|
||||
* If **size** is larger than the size of the entire ring buffer, errno is set
|
||||
* to E2BIG and NULL is returned. If the ring buffer could accommodate
|
||||
* **size**, but currently does not have enough space, the caller will block
|
||||
* until at most **timeout_ms** has elapsed. If insufficient space is available
|
||||
* at that time, errno is set to ENOSPC, and NULL is returned.
|
||||
*
|
||||
* The kernel guarantees that it will wake up this thread to check if
|
||||
* sufficient space is available in the ring buffer at least once per
|
||||
* invocation of the **bpf_ringbuf_drain()** helper function, provided that at
|
||||
* least one sample is consumed, and the BPF program did not invoke the
|
||||
* function with BPF_RB_NO_WAKEUP. A wakeup may occur sooner than that, but the
|
||||
* kernel does not guarantee this. If the helper function is invoked with
|
||||
* BPF_RB_FORCE_WAKEUP, a wakeup event will be sent even if no sample is
|
||||
* consumed.
|
||||
*
|
||||
* When a sample of size **size** is found within **timeout_ms**, a pointer to
|
||||
* the sample is returned. After initializing the sample, callers must invoke
|
||||
* **user_ring_buffer__submit()** to post the sample to the ring buffer.
|
||||
* Otherwise, the sample must be freed with **user_ring_buffer__discard()**.
|
||||
*/
|
||||
LIBBPF_API void *user_ring_buffer__reserve_blocking(struct user_ring_buffer *rb,
|
||||
__u32 size,
|
||||
int timeout_ms);
|
||||
|
||||
/* @brief **user_ring_buffer__submit()** submits a previously reserved sample
|
||||
* into the ring buffer.
|
||||
* @param rb The user ring buffer.
|
||||
* @param sample A reserved sample.
|
||||
*
|
||||
* It is not necessary to synchronize amongst multiple producers when invoking
|
||||
* this function.
|
||||
*/
|
||||
LIBBPF_API void user_ring_buffer__submit(struct user_ring_buffer *rb, void *sample);
|
||||
|
||||
/* @brief **user_ring_buffer__discard()** discards a previously reserved sample.
|
||||
* @param rb The user ring buffer.
|
||||
* @param sample A reserved sample.
|
||||
*
|
||||
* It is not necessary to synchronize amongst multiple producers when invoking
|
||||
* this function.
|
||||
*/
|
||||
LIBBPF_API void user_ring_buffer__discard(struct user_ring_buffer *rb, void *sample);
|
||||
|
||||
/* @brief **user_ring_buffer__free()** frees a ring buffer that was previously
|
||||
* created with **user_ring_buffer__new()**.
|
||||
* @param rb The user ring buffer being freed.
|
||||
*/
|
||||
LIBBPF_API void user_ring_buffer__free(struct user_ring_buffer *rb);
|
||||
|
||||
/* Perf buffer APIs */
|
||||
struct perf_buffer;
|
||||
|
||||
|
||||
@@ -368,3 +368,13 @@ LIBBPF_1.0.0 {
|
||||
libbpf_bpf_prog_type_str;
|
||||
perf_buffer__buffer;
|
||||
};
|
||||
|
||||
LIBBPF_1.1.0 {
|
||||
global:
|
||||
user_ring_buffer__discard;
|
||||
user_ring_buffer__free;
|
||||
user_ring_buffer__new;
|
||||
user_ring_buffer__reserve;
|
||||
user_ring_buffer__reserve_blocking;
|
||||
user_ring_buffer__submit;
|
||||
} LIBBPF_1.0.0;
|
||||
|
||||
@@ -231,6 +231,7 @@ static int probe_map_create(enum bpf_map_type map_type)
|
||||
return btf_fd;
|
||||
break;
|
||||
case BPF_MAP_TYPE_RINGBUF:
|
||||
case BPF_MAP_TYPE_USER_RINGBUF:
|
||||
key_size = 0;
|
||||
value_size = 0;
|
||||
max_entries = 4096;
|
||||
|
||||
@@ -4,6 +4,6 @@
|
||||
#define __LIBBPF_VERSION_H
|
||||
|
||||
#define LIBBPF_MAJOR_VERSION 1
|
||||
#define LIBBPF_MINOR_VERSION 0
|
||||
#define LIBBPF_MINOR_VERSION 1
|
||||
|
||||
#endif /* __LIBBPF_VERSION_H */
|
||||
|
||||
271
src/ringbuf.c
@@ -16,6 +16,7 @@
|
||||
#include <asm/barrier.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/epoll.h>
|
||||
#include <time.h>
|
||||
|
||||
#include "libbpf.h"
|
||||
#include "libbpf_internal.h"
|
||||
@@ -39,6 +40,23 @@ struct ring_buffer {
|
||||
int ring_cnt;
|
||||
};
|
||||
|
||||
struct user_ring_buffer {
|
||||
struct epoll_event event;
|
||||
unsigned long *consumer_pos;
|
||||
unsigned long *producer_pos;
|
||||
void *data;
|
||||
unsigned long mask;
|
||||
size_t page_size;
|
||||
int map_fd;
|
||||
int epoll_fd;
|
||||
};
|
||||
|
||||
/* 8-byte ring buffer header structure */
|
||||
struct ringbuf_hdr {
|
||||
__u32 len;
|
||||
__u32 pad;
|
||||
};
|
||||
|
||||
static void ringbuf_unmap_ring(struct ring_buffer *rb, struct ring *r)
|
||||
{
|
||||
if (r->consumer_pos) {
|
||||
@@ -300,3 +318,256 @@ int ring_buffer__epoll_fd(const struct ring_buffer *rb)
|
||||
{
|
||||
return rb->epoll_fd;
|
||||
}
|
||||
|
||||
static void user_ringbuf_unmap_ring(struct user_ring_buffer *rb)
|
||||
{
|
||||
if (rb->consumer_pos) {
|
||||
munmap(rb->consumer_pos, rb->page_size);
|
||||
rb->consumer_pos = NULL;
|
||||
}
|
||||
if (rb->producer_pos) {
|
||||
munmap(rb->producer_pos, rb->page_size + 2 * (rb->mask + 1));
|
||||
rb->producer_pos = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void user_ring_buffer__free(struct user_ring_buffer *rb)
|
||||
{
|
||||
if (!rb)
|
||||
return;
|
||||
|
||||
user_ringbuf_unmap_ring(rb);
|
||||
|
||||
if (rb->epoll_fd >= 0)
|
||||
close(rb->epoll_fd);
|
||||
|
||||
free(rb);
|
||||
}
|
||||
|
||||
static int user_ringbuf_map(struct user_ring_buffer *rb, int map_fd)
|
||||
{
|
||||
struct bpf_map_info info;
|
||||
__u32 len = sizeof(info);
|
||||
void *tmp;
|
||||
struct epoll_event *rb_epoll;
|
||||
int err;
|
||||
|
||||
memset(&info, 0, sizeof(info));
|
||||
|
||||
err = bpf_obj_get_info_by_fd(map_fd, &info, &len);
|
||||
if (err) {
|
||||
err = -errno;
|
||||
pr_warn("user ringbuf: failed to get map info for fd=%d: %d\n", map_fd, err);
|
||||
return err;
|
||||
}
|
||||
|
||||
if (info.type != BPF_MAP_TYPE_USER_RINGBUF) {
|
||||
pr_warn("user ringbuf: map fd=%d is not BPF_MAP_TYPE_USER_RINGBUF\n", map_fd);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
rb->map_fd = map_fd;
|
||||
rb->mask = info.max_entries - 1;
|
||||
|
||||
/* Map read-only consumer page */
|
||||
tmp = mmap(NULL, rb->page_size, PROT_READ, MAP_SHARED, map_fd, 0);
|
||||
if (tmp == MAP_FAILED) {
|
||||
err = -errno;
|
||||
pr_warn("user ringbuf: failed to mmap consumer page for map fd=%d: %d\n",
|
||||
map_fd, err);
|
||||
return err;
|
||||
}
|
||||
rb->consumer_pos = tmp;
|
||||
|
||||
/* Map read-write the producer page and data pages. We map the data
|
||||
* region as twice the total size of the ring buffer to allow the
|
||||
* simple reading and writing of samples that wrap around the end of
|
||||
* the buffer. See the kernel implementation for details.
|
||||
*/
|
||||
tmp = mmap(NULL, rb->page_size + 2 * info.max_entries,
|
||||
PROT_READ | PROT_WRITE, MAP_SHARED, map_fd, rb->page_size);
|
||||
if (tmp == MAP_FAILED) {
|
||||
err = -errno;
|
||||
pr_warn("user ringbuf: failed to mmap data pages for map fd=%d: %d\n",
|
||||
map_fd, err);
|
||||
return err;
|
||||
}
|
||||
|
||||
rb->producer_pos = tmp;
|
||||
rb->data = tmp + rb->page_size;
|
||||
|
||||
rb_epoll = &rb->event;
|
||||
rb_epoll->events = EPOLLOUT;
|
||||
if (epoll_ctl(rb->epoll_fd, EPOLL_CTL_ADD, map_fd, rb_epoll) < 0) {
|
||||
err = -errno;
|
||||
pr_warn("user ringbuf: failed to epoll add map fd=%d: %d\n", map_fd, err);
|
||||
return err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct user_ring_buffer *
|
||||
user_ring_buffer__new(int map_fd, const struct user_ring_buffer_opts *opts)
|
||||
{
|
||||
struct user_ring_buffer *rb;
|
||||
int err;
|
||||
|
||||
if (!OPTS_VALID(opts, user_ring_buffer_opts))
|
||||
return errno = EINVAL, NULL;
|
||||
|
||||
rb = calloc(1, sizeof(*rb));
|
||||
if (!rb)
|
||||
return errno = ENOMEM, NULL;
|
||||
|
||||
rb->page_size = getpagesize();
|
||||
|
||||
rb->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
|
||||
if (rb->epoll_fd < 0) {
|
||||
err = -errno;
|
||||
pr_warn("user ringbuf: failed to create epoll instance: %d\n", err);
|
||||
goto err_out;
|
||||
}
|
||||
|
||||
err = user_ringbuf_map(rb, map_fd);
|
||||
if (err)
|
||||
goto err_out;
|
||||
|
||||
return rb;
|
||||
|
||||
err_out:
|
||||
user_ring_buffer__free(rb);
|
||||
return errno = -err, NULL;
|
||||
}
|
||||
|
||||
static void user_ringbuf_commit(struct user_ring_buffer *rb, void *sample, bool discard)
|
||||
{
|
||||
__u32 new_len;
|
||||
struct ringbuf_hdr *hdr;
|
||||
uintptr_t hdr_offset;
|
||||
|
||||
hdr_offset = rb->mask + 1 + (sample - rb->data) - BPF_RINGBUF_HDR_SZ;
|
||||
hdr = rb->data + (hdr_offset & rb->mask);
|
||||
|
||||
new_len = hdr->len & ~BPF_RINGBUF_BUSY_BIT;
|
||||
if (discard)
|
||||
new_len |= BPF_RINGBUF_DISCARD_BIT;
|
||||
|
||||
/* Synchronizes with smp_load_acquire() in __bpf_user_ringbuf_peek() in
|
||||
* the kernel.
|
||||
*/
|
||||
__atomic_exchange_n(&hdr->len, new_len, __ATOMIC_ACQ_REL);
|
||||
}
|
||||
|
||||
void user_ring_buffer__discard(struct user_ring_buffer *rb, void *sample)
|
||||
{
|
||||
user_ringbuf_commit(rb, sample, true);
|
||||
}
|
||||
|
||||
void user_ring_buffer__submit(struct user_ring_buffer *rb, void *sample)
|
||||
{
|
||||
user_ringbuf_commit(rb, sample, false);
|
||||
}
|
||||
|
||||
void *user_ring_buffer__reserve(struct user_ring_buffer *rb, __u32 size)
|
||||
{
|
||||
__u32 avail_size, total_size, max_size;
|
||||
/* 64-bit to avoid overflow in case of extreme application behavior */
|
||||
__u64 cons_pos, prod_pos;
|
||||
struct ringbuf_hdr *hdr;
|
||||
|
||||
/* Synchronizes with smp_store_release() in __bpf_user_ringbuf_peek() in
|
||||
* the kernel.
|
||||
*/
|
||||
cons_pos = smp_load_acquire(rb->consumer_pos);
|
||||
/* Synchronizes with smp_store_release() in user_ringbuf_commit() */
|
||||
prod_pos = smp_load_acquire(rb->producer_pos);
|
||||
|
||||
max_size = rb->mask + 1;
|
||||
avail_size = max_size - (prod_pos - cons_pos);
|
||||
/* Round up total size to a multiple of 8. */
|
||||
total_size = (size + BPF_RINGBUF_HDR_SZ + 7) / 8 * 8;
|
||||
|
||||
if (total_size > max_size)
|
||||
return errno = E2BIG, NULL;
|
||||
|
||||
if (avail_size < total_size)
|
||||
return errno = ENOSPC, NULL;
|
||||
|
||||
hdr = rb->data + (prod_pos & rb->mask);
|
||||
hdr->len = size | BPF_RINGBUF_BUSY_BIT;
|
||||
hdr->pad = 0;
|
||||
|
||||
/* Synchronizes with smp_load_acquire() in __bpf_user_ringbuf_peek() in
|
||||
* the kernel.
|
||||
*/
|
||||
smp_store_release(rb->producer_pos, prod_pos + total_size);
|
||||
|
||||
return (void *)rb->data + ((prod_pos + BPF_RINGBUF_HDR_SZ) & rb->mask);
|
||||
}
|
||||
|
||||
static __u64 ns_elapsed_timespec(const struct timespec *start, const struct timespec *end)
|
||||
{
|
||||
__u64 start_ns, end_ns, ns_per_s = 1000000000;
|
||||
|
||||
start_ns = (__u64)start->tv_sec * ns_per_s + start->tv_nsec;
|
||||
end_ns = (__u64)end->tv_sec * ns_per_s + end->tv_nsec;
|
||||
|
||||
return end_ns - start_ns;
|
||||
}
|
||||
|
||||
void *user_ring_buffer__reserve_blocking(struct user_ring_buffer *rb, __u32 size, int timeout_ms)
|
||||
{
|
||||
void *sample;
|
||||
int err, ms_remaining = timeout_ms;
|
||||
struct timespec start;
|
||||
|
||||
if (timeout_ms < 0 && timeout_ms != -1)
|
||||
return errno = EINVAL, NULL;
|
||||
|
||||
if (timeout_ms != -1) {
|
||||
err = clock_gettime(CLOCK_MONOTONIC, &start);
|
||||
if (err)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
do {
|
||||
int cnt, ms_elapsed;
|
||||
struct timespec curr;
|
||||
__u64 ns_per_ms = 1000000;
|
||||
|
||||
sample = user_ring_buffer__reserve(rb, size);
|
||||
if (sample)
|
||||
return sample;
|
||||
else if (errno != ENOSPC)
|
||||
return NULL;
|
||||
|
||||
/* The kernel guarantees at least one event notification
|
||||
* delivery whenever at least one sample is drained from the
|
||||
* ring buffer in an invocation to bpf_ringbuf_drain(). Other
|
||||
* additional events may be delivered at any time, but only one
|
||||
* event is guaranteed per bpf_ringbuf_drain() invocation,
|
||||
* provided that a sample is drained, and the BPF program did
|
||||
* not pass BPF_RB_NO_WAKEUP to bpf_ringbuf_drain(). If
|
||||
* BPF_RB_FORCE_WAKEUP is passed to bpf_ringbuf_drain(), a
|
||||
* wakeup event will be delivered even if no samples are
|
||||
* drained.
|
||||
*/
|
||||
cnt = epoll_wait(rb->epoll_fd, &rb->event, 1, ms_remaining);
|
||||
if (cnt < 0)
|
||||
return NULL;
|
||||
|
||||
if (timeout_ms == -1)
|
||||
continue;
|
||||
|
||||
err = clock_gettime(CLOCK_MONOTONIC, &curr);
|
||||
if (err)
|
||||
return NULL;
|
||||
|
||||
ms_elapsed = ns_elapsed_timespec(&start, &curr) / ns_per_ms;
|
||||
ms_remaining = timeout_ms - ms_elapsed;
|
||||
} while (ms_remaining > 0);
|
||||
|
||||
/* Try one more time to reserve a sample after the specified timeout has elapsed. */
|
||||
return user_ring_buffer__reserve(rb, size);
|
||||
}
|
||||
|
||||
@@ -251,6 +251,29 @@ static inline int skel_map_update_elem(int fd, const void *key,
|
||||
return skel_sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, attr_sz);
|
||||
}
|
||||
|
||||
static inline int skel_map_delete_elem(int fd, const void *key)
|
||||
{
|
||||
const size_t attr_sz = offsetofend(union bpf_attr, flags);
|
||||
union bpf_attr attr;
|
||||
|
||||
memset(&attr, 0, attr_sz);
|
||||
attr.map_fd = fd;
|
||||
attr.key = (long)key;
|
||||
|
||||
return skel_sys_bpf(BPF_MAP_DELETE_ELEM, &attr, attr_sz);
|
||||
}
|
||||
|
||||
static inline int skel_map_get_fd_by_id(__u32 id)
|
||||
{
|
||||
const size_t attr_sz = offsetofend(union bpf_attr, flags);
|
||||
union bpf_attr attr;
|
||||
|
||||
memset(&attr, 0, attr_sz);
|
||||
attr.map_id = id;
|
||||
|
||||
return skel_sys_bpf(BPF_MAP_GET_FD_BY_ID, &attr, attr_sz);
|
||||
}
|
||||
|
||||
static inline int skel_raw_tracepoint_open(const char *name, int prog_fd)
|
||||
{
|
||||
const size_t attr_sz = offsetofend(union bpf_attr, raw_tracepoint.prog_fd);
|
||||
|
||||
@@ -282,7 +282,7 @@ struct usdt_manager *usdt_manager_new(struct bpf_object *obj)
|
||||
* If this is not supported, USDTs with semaphores will not be supported.
|
||||
* Added in: a6ca88b241d5 ("trace_uprobe: support reference counter in fd-based uprobe")
|
||||
*/
|
||||
man->has_sema_refcnt = access(ref_ctr_sysfs_path, F_OK) == 0;
|
||||
man->has_sema_refcnt = faccessat(AT_FDCWD, ref_ctr_sysfs_path, F_OK, AT_EACCESS) == 0;
|
||||
|
||||
return man;
|
||||
}
|
||||
|
||||