mirror of
https://github.com/netdata/libbpf.git
synced 2026-03-15 13:59:06 +08:00
Compare commits
87 Commits
v0.0.8
...
netdata-pa
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
673424c561 | ||
|
|
d2feaff998 | ||
|
|
0d4b75d30e | ||
|
|
d7b2934cf9 | ||
|
|
c83d2166e8 | ||
|
|
fb27968bf1 | ||
|
|
d6ae406429 | ||
|
|
cb174c5b8d | ||
|
|
17f747ed38 | ||
|
|
bf34234885 | ||
|
|
46c272f9b4 | ||
|
|
40e69c9538 | ||
|
|
a975d8ea28 | ||
|
|
45f7113925 | ||
|
|
6816734203 | ||
|
|
11d2a59689 | ||
|
|
8c7527ea88 | ||
|
|
c569e03985 | ||
|
|
1862741fb0 | ||
|
|
6a269cf458 | ||
|
|
6e15a022db | ||
|
|
20d9816471 | ||
|
|
538b3f4ce7 | ||
|
|
f2610ca9cf | ||
|
|
adb5dd203c | ||
|
|
3aadd91e97 | ||
|
|
1206ab0e75 | ||
|
|
70eac9941d | ||
|
|
2fdbf42f98 | ||
|
|
365e4805a1 | ||
|
|
890f25520a | ||
|
|
fbdee96fa1 | ||
|
|
f54c56be0d | ||
|
|
8dc4b38871 | ||
|
|
ed023acd35 | ||
|
|
ff3116bfcb | ||
|
|
65f4b3ba4c | ||
|
|
e1bf7a787e | ||
|
|
17a6d61898 | ||
|
|
ff2322b879 | ||
|
|
ab1b4f3844 | ||
|
|
df9a526f99 | ||
|
|
3b23942542 | ||
|
|
90941cde5f | ||
|
|
97a0d1e7b5 | ||
|
|
d650751a9b | ||
|
|
dcb0c5ac44 | ||
|
|
2c892f1aa1 | ||
|
|
46407182c7 | ||
|
|
a00d463bb9 | ||
|
|
d8fdd1e848 | ||
|
|
b8482d74a1 | ||
|
|
3cd9cac8fb | ||
|
|
70e6075d1d | ||
|
|
d71e9baa8b | ||
|
|
b41c6d34a4 | ||
|
|
9029d18d9b | ||
|
|
f81f504e12 | ||
|
|
021e35fba2 | ||
|
|
7112841ade | ||
|
|
940f4df57b | ||
|
|
46c906b6d1 | ||
|
|
9dc3736a7f | ||
|
|
8b3cbf12a2 | ||
|
|
dfa07417ff | ||
|
|
5c1c96c579 | ||
|
|
83f269b088 | ||
|
|
597d350e4a | ||
|
|
7fc4d5025b | ||
|
|
bd9e2feb2a | ||
|
|
814ed5011f | ||
|
|
f8faf2b33d | ||
|
|
3cb0b3fd52 | ||
|
|
edb1aaa8dc | ||
|
|
f3271942dd | ||
|
|
040f73a7c7 | ||
|
|
35283f89c6 | ||
|
|
1c4c845e79 | ||
|
|
2a374b5df0 | ||
|
|
7878754030 | ||
|
|
da5aa114e2 | ||
|
|
625f64a126 | ||
|
|
ba344d9494 | ||
|
|
976e29343d | ||
|
|
b3da63d59d | ||
|
|
902ba3fd33 | ||
|
|
cf3fc46ea8 |
31
.travis.yml
31
.travis.yml
@@ -35,72 +35,71 @@ stages:
|
||||
|
||||
jobs:
|
||||
include:
|
||||
- stage: Build
|
||||
- stage: Build & Test
|
||||
name: Debian Build
|
||||
language: bash
|
||||
install: $CI_ROOT/managers/debian.sh SETUP
|
||||
script: $CI_ROOT/managers/debian.sh RUN || travis_terminate
|
||||
script: $CI_ROOT/managers/debian.sh RUN || travis_terminate 1
|
||||
after_script: $CI_ROOT/managers/debian.sh CLEANUP
|
||||
|
||||
- name: Debian Build (ASan+UBSan)
|
||||
language: bash
|
||||
install: $CI_ROOT/managers/debian.sh SETUP
|
||||
script: $CI_ROOT/managers/debian.sh RUN_ASAN || travis_terminate
|
||||
script: $CI_ROOT/managers/debian.sh RUN_ASAN || travis_terminate 1
|
||||
after_script: $CI_ROOT/managers/debian.sh CLEANUP
|
||||
|
||||
- name: Debian Build (clang)
|
||||
language: bash
|
||||
install: $CI_ROOT/managers/debian.sh SETUP
|
||||
script: $CI_ROOT/managers/debian.sh RUN_CLANG || travis_terminate
|
||||
script: $CI_ROOT/managers/debian.sh RUN_CLANG || travis_terminate 1
|
||||
after_script: $CI_ROOT/managers/debian.sh CLEANUP
|
||||
|
||||
- name: Debian Build (clang ASan+UBSan)
|
||||
language: bash
|
||||
install: $CI_ROOT/managers/debian.sh SETUP
|
||||
script: $CI_ROOT/managers/debian.sh RUN_CLANG_ASAN || travis_terminate
|
||||
script: $CI_ROOT/managers/debian.sh RUN_CLANG_ASAN || travis_terminate 1
|
||||
after_script: $CI_ROOT/managers/debian.sh CLEANUP
|
||||
|
||||
- name: Debian Build (gcc-8)
|
||||
language: bash
|
||||
install: $CI_ROOT/managers/debian.sh SETUP
|
||||
script: $CI_ROOT/managers/debian.sh RUN_GCC8 || travis_terminate
|
||||
script: $CI_ROOT/managers/debian.sh RUN_GCC8 || travis_terminate 1
|
||||
after_script: $CI_ROOT/managers/debian.sh CLEANUP
|
||||
|
||||
- name: Debian Build (gcc-8 ASan+UBSan)
|
||||
language: bash
|
||||
install: $CI_ROOT/managers/debian.sh SETUP
|
||||
script: $CI_ROOT/managers/debian.sh RUN_GCC8_ASAN || travis_terminate
|
||||
script: $CI_ROOT/managers/debian.sh RUN_GCC8_ASAN || travis_terminate 1
|
||||
after_script: $CI_ROOT/managers/debian.sh CLEANUP
|
||||
|
||||
- name: Ubuntu Bionic Build
|
||||
language: bash
|
||||
script: sudo $CI_ROOT/managers/ubuntu.sh || travis_terminate
|
||||
script: sudo $CI_ROOT/managers/ubuntu.sh || travis_terminate 1
|
||||
|
||||
- name: Ubuntu Bionic Build (arm)
|
||||
arch: arm64
|
||||
language: bash
|
||||
script: sudo $CI_ROOT/managers/ubuntu.sh || travis_terminate
|
||||
script: sudo $CI_ROOT/managers/ubuntu.sh || travis_terminate 1
|
||||
|
||||
- name: Ubuntu Bionic Build (s390x)
|
||||
arch: s390x
|
||||
language: bash
|
||||
script: sudo $CI_ROOT/managers/ubuntu.sh || travis_terminate
|
||||
script: sudo $CI_ROOT/managers/ubuntu.sh || travis_terminate 1
|
||||
|
||||
- name: Ubuntu Bionic Build (ppc64le)
|
||||
arch: ppc64le
|
||||
language: bash
|
||||
script: sudo $CI_ROOT/managers/ubuntu.sh || travis_terminate
|
||||
script: sudo $CI_ROOT/managers/ubuntu.sh || travis_terminate 1
|
||||
|
||||
- stage: Build & Test
|
||||
name: Kernel 5.5.0 + selftests
|
||||
- name: Kernel 5.5.0 + selftests
|
||||
language: bash
|
||||
env: KERNEL=5.5.0
|
||||
script: $CI_ROOT/vmtest/run_vmtest.sh || travis_terminate
|
||||
script: $CI_ROOT/vmtest/run_vmtest.sh || travis_terminate 1
|
||||
|
||||
- name: Kernel LATEST + selftests
|
||||
language: bash
|
||||
env: KERNEL=LATEST
|
||||
script: $CI_ROOT/vmtest/run_vmtest.sh || travis_terminate
|
||||
script: $CI_ROOT/vmtest/run_vmtest.sh || travis_terminate 1
|
||||
|
||||
- stage: Coverity
|
||||
language: bash
|
||||
@@ -121,4 +120,4 @@ jobs:
|
||||
- sudo apt-get -y build-dep libelf-dev
|
||||
- sudo apt-get install -y libelf-dev pkg-config
|
||||
script:
|
||||
- scripts/coverity.sh || travis_terminate
|
||||
- scripts/coverity.sh || travis_terminate 1
|
||||
|
||||
@@ -1 +1 @@
|
||||
edadedf1c5b4e4404192a0a4c3c0c05e3b7672ab
|
||||
4e15507fea70c0c312d79610efa46b6853ccf8e0
|
||||
|
||||
@@ -1 +1 @@
|
||||
2fcd80144b93ff90836a44f2054b4d82133d3a85
|
||||
69119673bd50b176ded34032fadd41530fb5af21
|
||||
|
||||
51
README.md
51
README.md
@@ -52,11 +52,12 @@ $ PKG_CONFIG_PATH=/build/root/lib64/pkgconfig DESTDIR=/build/root make install
|
||||
```
|
||||
|
||||
Distributions
|
||||
=====
|
||||
=============
|
||||
|
||||
Distributions packaging libbpf from this mirror:
|
||||
- [Fedora](https://src.fedoraproject.org/rpms/libbpf)
|
||||
- [Gentoo](https://packages.gentoo.org/packages/dev-libs/libbpf)
|
||||
- [Debian](https://packages.debian.org/sid/libbpf-dev)
|
||||
|
||||
Benefits of packaging from the mirror over packaging from kernel sources:
|
||||
- Consistent versioning across distributions.
|
||||
@@ -73,8 +74,54 @@ Package dependencies of libbpf, package names may vary across distros:
|
||||
- zlib
|
||||
- libelf
|
||||
|
||||
BPF CO-RE (Compile Once – Run Everywhere)
|
||||
=========================================
|
||||
|
||||
Libbpf supports building BPF CO-RE-enabled applications, which, in contrast to
|
||||
[BCC](https://github.com/iovisor/bcc/), do not require Clang/LLVM runtime
|
||||
being deployed to target servers and doesn't rely on kernel-devel headers
|
||||
being available.
|
||||
|
||||
It does rely on kernel to be built with [BTF type
|
||||
information](https://www.kernel.org/doc/html/latest/bpf/btf.html), though.
|
||||
Some major Linux distributions come with kernel BTF already built in:
|
||||
- Fedora 31+
|
||||
- RHEL 8.2+
|
||||
- OpenSUSE Tumbleweed (in the next release, as of 2020-06-04)
|
||||
- Arch Linux (from kernel 5.7.1.arch1-1)
|
||||
|
||||
If your kernel doesn't come with BTF built-in, you'll need to build custom
|
||||
kernel. You'll need:
|
||||
- `pahole` 1.16+ tool (part of `dwarves` package), which performs DWARF to
|
||||
BTF conversion;
|
||||
- kernel built with `CONFIG_DEBUG_INFO_BTF=y` option;
|
||||
- you can check if your kernel has BTF built-in by looking for
|
||||
`/sys/kernel/btf/vmlinux` file:
|
||||
|
||||
```shell
|
||||
$ ls -la /sys/kernel/btf/vmlinux
|
||||
-r--r--r--. 1 root root 3541561 Jun 2 18:16 /sys/kernel/btf/vmlinux
|
||||
```
|
||||
|
||||
To develop and build BPF programs, you'll need Clang/LLVM 10+. The following
|
||||
distributions have Clang/LLVM 10+ packaged by default:
|
||||
- Fedora 32+
|
||||
- Ubuntu 20.04+
|
||||
- Arch Linux
|
||||
|
||||
Otherwise, please make sure to update it on your system.
|
||||
|
||||
The following resources are useful to understand what BPF CO-RE is and how to
|
||||
use it:
|
||||
- [BPF Portability and CO-RE](https://facebookmicrosites.github.io/bpf/blog/2020/02/19/bpf-portability-and-co-re.html)
|
||||
- [HOWTO: BCC to libbpf conversion](https://facebookmicrosites.github.io/bpf/blog/2020/02/20/bcc-to-libbpf-howto-guide.html)
|
||||
- [libbpf-tools in BCC repo](https://github.com/iovisor/bcc/tree/master/libbpf-tools)
|
||||
contain lots of real-world tools converted from BCC to BPF CO-RE. Consider
|
||||
converting some more to both contribute to the BPF community and gain some
|
||||
more experience with it.
|
||||
|
||||
License
|
||||
=====
|
||||
=======
|
||||
|
||||
This work is dual-licensed under BSD 2-clause license and GNU LGPL v2.1 license.
|
||||
You can choose between one of them if you use this work.
|
||||
|
||||
@@ -73,7 +73,7 @@ struct bpf_insn {
|
||||
/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */
|
||||
struct bpf_lpm_trie_key {
|
||||
__u32 prefixlen; /* up to 32 for AF_INET, 128 for AF_INET6 */
|
||||
__u8 data[]; /* Arbitrary size */
|
||||
__u8 data[0]; /* Arbitrary size */
|
||||
};
|
||||
|
||||
struct bpf_cgroup_storage_key {
|
||||
@@ -113,6 +113,10 @@ enum bpf_cmd {
|
||||
BPF_MAP_DELETE_BATCH,
|
||||
BPF_LINK_CREATE,
|
||||
BPF_LINK_UPDATE,
|
||||
BPF_LINK_GET_FD_BY_ID,
|
||||
BPF_LINK_GET_NEXT_ID,
|
||||
BPF_ENABLE_STATS,
|
||||
BPF_ITER_CREATE,
|
||||
};
|
||||
|
||||
enum bpf_map_type {
|
||||
@@ -143,6 +147,7 @@ enum bpf_map_type {
|
||||
BPF_MAP_TYPE_SK_STORAGE,
|
||||
BPF_MAP_TYPE_DEVMAP_HASH,
|
||||
BPF_MAP_TYPE_STRUCT_OPS,
|
||||
BPF_MAP_TYPE_RINGBUF,
|
||||
};
|
||||
|
||||
/* Note that tracing related programs such as
|
||||
@@ -215,11 +220,28 @@ enum bpf_attach_type {
|
||||
BPF_TRACE_FEXIT,
|
||||
BPF_MODIFY_RETURN,
|
||||
BPF_LSM_MAC,
|
||||
BPF_TRACE_ITER,
|
||||
BPF_CGROUP_INET4_GETPEERNAME,
|
||||
BPF_CGROUP_INET6_GETPEERNAME,
|
||||
BPF_CGROUP_INET4_GETSOCKNAME,
|
||||
BPF_CGROUP_INET6_GETSOCKNAME,
|
||||
BPF_XDP_DEVMAP,
|
||||
__MAX_BPF_ATTACH_TYPE
|
||||
};
|
||||
|
||||
#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
|
||||
|
||||
enum bpf_link_type {
|
||||
BPF_LINK_TYPE_UNSPEC = 0,
|
||||
BPF_LINK_TYPE_RAW_TRACEPOINT = 1,
|
||||
BPF_LINK_TYPE_TRACING = 2,
|
||||
BPF_LINK_TYPE_CGROUP = 3,
|
||||
BPF_LINK_TYPE_ITER = 4,
|
||||
BPF_LINK_TYPE_NETNS = 5,
|
||||
|
||||
MAX_BPF_LINK_TYPE,
|
||||
};
|
||||
|
||||
/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
|
||||
*
|
||||
* NONE(default): No further bpf programs allowed in the subtree.
|
||||
@@ -379,6 +401,12 @@ enum {
|
||||
*/
|
||||
#define BPF_F_QUERY_EFFECTIVE (1U << 0)
|
||||
|
||||
/* type for BPF_ENABLE_STATS */
|
||||
enum bpf_stats_type {
|
||||
/* enabled run_time_ns and run_cnt */
|
||||
BPF_STATS_RUN_TIME = 0,
|
||||
};
|
||||
|
||||
enum bpf_stack_build_id_status {
|
||||
/* user space need an empty entry to identify end of a trace */
|
||||
BPF_STACK_BUILD_ID_EMPTY = 0,
|
||||
@@ -523,6 +551,7 @@ union bpf_attr {
|
||||
__u32 prog_id;
|
||||
__u32 map_id;
|
||||
__u32 btf_id;
|
||||
__u32 link_id;
|
||||
};
|
||||
__u32 next_id;
|
||||
__u32 open_flags;
|
||||
@@ -589,6 +618,15 @@ union bpf_attr {
|
||||
__u32 old_prog_fd;
|
||||
} link_update;
|
||||
|
||||
struct { /* struct used by BPF_ENABLE_STATS command */
|
||||
__u32 type;
|
||||
} enable_stats;
|
||||
|
||||
struct { /* struct used by BPF_ITER_CREATE command */
|
||||
__u32 link_fd;
|
||||
__u32 flags;
|
||||
} iter_create;
|
||||
|
||||
} __attribute__((aligned(8)));
|
||||
|
||||
/* The description below is an attempt at providing documentation to eBPF
|
||||
@@ -644,14 +682,16 @@ union bpf_attr {
|
||||
* For tracing programs, safely attempt to read *size* bytes from
|
||||
* kernel space address *unsafe_ptr* and store the data in *dst*.
|
||||
*
|
||||
* Generally, use bpf_probe_read_user() or bpf_probe_read_kernel()
|
||||
* instead.
|
||||
* Generally, use **bpf_probe_read_user**\ () or
|
||||
* **bpf_probe_read_kernel**\ () instead.
|
||||
* Return
|
||||
* 0 on success, or a negative error in case of failure.
|
||||
*
|
||||
* u64 bpf_ktime_get_ns(void)
|
||||
* Description
|
||||
* Return the time elapsed since system boot, in nanoseconds.
|
||||
* Does not include time the system was suspended.
|
||||
* See: **clock_gettime**\ (**CLOCK_MONOTONIC**)
|
||||
* Return
|
||||
* Current *ktime*.
|
||||
*
|
||||
@@ -1510,11 +1550,11 @@ union bpf_attr {
|
||||
* int bpf_probe_read_str(void *dst, u32 size, const void *unsafe_ptr)
|
||||
* Description
|
||||
* Copy a NUL terminated string from an unsafe kernel address
|
||||
* *unsafe_ptr* to *dst*. See bpf_probe_read_kernel_str() for
|
||||
* *unsafe_ptr* to *dst*. See **bpf_probe_read_kernel_str**\ () for
|
||||
* more details.
|
||||
*
|
||||
* Generally, use bpf_probe_read_user_str() or bpf_probe_read_kernel_str()
|
||||
* instead.
|
||||
* Generally, use **bpf_probe_read_user_str**\ () or
|
||||
* **bpf_probe_read_kernel_str**\ () instead.
|
||||
* Return
|
||||
* On success, the strictly positive length of the string,
|
||||
* including the trailing NUL character. On error, a negative
|
||||
@@ -1542,7 +1582,7 @@ union bpf_attr {
|
||||
*
|
||||
* u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx)
|
||||
* Description
|
||||
* Equivalent to bpf_get_socket_cookie() helper that accepts
|
||||
* Equivalent to **bpf_get_socket_cookie**\ () helper that accepts
|
||||
* *skb*, but gets socket from **struct bpf_sock_ops** context.
|
||||
* Return
|
||||
* A 8-byte long non-decreasing number.
|
||||
@@ -1562,7 +1602,7 @@ union bpf_attr {
|
||||
* Return
|
||||
* 0
|
||||
*
|
||||
* int bpf_setsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, void *optval, int optlen)
|
||||
* int bpf_setsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen)
|
||||
* Description
|
||||
* Emulate a call to **setsockopt()** on the socket associated to
|
||||
* *bpf_socket*, which must be a full socket. The *level* at
|
||||
@@ -1570,6 +1610,12 @@ union bpf_attr {
|
||||
* must be specified, see **setsockopt(2)** for more information.
|
||||
* The option value of length *optlen* is pointed by *optval*.
|
||||
*
|
||||
* *bpf_socket* should be one of the following:
|
||||
*
|
||||
* * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**.
|
||||
* * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**
|
||||
* and **BPF_CGROUP_INET6_CONNECT**.
|
||||
*
|
||||
* This helper actually implements a subset of **setsockopt()**.
|
||||
* It supports the following *level*\ s:
|
||||
*
|
||||
@@ -1589,6 +1635,13 @@ union bpf_attr {
|
||||
* Grow or shrink the room for data in the packet associated to
|
||||
* *skb* by *len_diff*, and according to the selected *mode*.
|
||||
*
|
||||
* By default, the helper will reset any offloaded checksum
|
||||
* indicator of the skb to CHECKSUM_NONE. This can be avoided
|
||||
* by the following flag:
|
||||
*
|
||||
* * **BPF_F_ADJ_ROOM_NO_CSUM_RESET**: Do not reset offloaded
|
||||
* checksum data of the skb to CHECKSUM_NONE.
|
||||
*
|
||||
* There are two supported modes at this time:
|
||||
*
|
||||
* * **BPF_ADJ_ROOM_MAC**: Adjust room at the mac layer
|
||||
@@ -1634,15 +1687,15 @@ union bpf_attr {
|
||||
*
|
||||
* The lower two bits of *flags* are used as the return code if
|
||||
* the map lookup fails. This is so that the return value can be
|
||||
* one of the XDP program return codes up to XDP_TX, as chosen by
|
||||
* the caller. Any higher bits in the *flags* argument must be
|
||||
* one of the XDP program return codes up to **XDP_TX**, as chosen
|
||||
* by the caller. Any higher bits in the *flags* argument must be
|
||||
* unset.
|
||||
*
|
||||
* See also bpf_redirect(), which only supports redirecting to an
|
||||
* ifindex, but doesn't require a map to do so.
|
||||
* See also **bpf_redirect**\ (), which only supports redirecting
|
||||
* to an ifindex, but doesn't require a map to do so.
|
||||
* Return
|
||||
* **XDP_REDIRECT** on success, or the value of the two lower bits
|
||||
* of the **flags* argument on error.
|
||||
* of the *flags* argument on error.
|
||||
*
|
||||
* int bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags)
|
||||
* Description
|
||||
@@ -1747,7 +1800,7 @@ union bpf_attr {
|
||||
* the time running for event since last normalization. The
|
||||
* enabled and running times are accumulated since the perf event
|
||||
* open. To achieve scaling factor between two invocations of an
|
||||
* eBPF program, users can can use CPU id as the key (which is
|
||||
* eBPF program, users can use CPU id as the key (which is
|
||||
* typical for perf array usage model) to remember the previous
|
||||
* value and do the calculation inside the eBPF program.
|
||||
* Return
|
||||
@@ -1764,7 +1817,7 @@ union bpf_attr {
|
||||
* Return
|
||||
* 0 on success, or a negative error in case of failure.
|
||||
*
|
||||
* int bpf_getsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, void *optval, int optlen)
|
||||
* int bpf_getsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen)
|
||||
* Description
|
||||
* Emulate a call to **getsockopt()** on the socket associated to
|
||||
* *bpf_socket*, which must be a full socket. The *level* at
|
||||
@@ -1773,6 +1826,12 @@ union bpf_attr {
|
||||
* The retrieved value is stored in the structure pointed by
|
||||
* *opval* and of length *optlen*.
|
||||
*
|
||||
* *bpf_socket* should be one of the following:
|
||||
*
|
||||
* * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**.
|
||||
* * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**
|
||||
* and **BPF_CGROUP_INET6_CONNECT**.
|
||||
*
|
||||
* This helper actually implements a subset of **getsockopt()**.
|
||||
* It supports the following *level*\ s:
|
||||
*
|
||||
@@ -1790,7 +1849,7 @@ union bpf_attr {
|
||||
* The first argument is the context *regs* on which the kprobe
|
||||
* works.
|
||||
*
|
||||
* This helper works by setting setting the PC (program counter)
|
||||
* This helper works by setting the PC (program counter)
|
||||
* to an override function which is run in place of the original
|
||||
* probed function. This means the probed function is not run at
|
||||
* all. The replacement function just returns with the required
|
||||
@@ -1959,18 +2018,19 @@ union bpf_attr {
|
||||
*
|
||||
* This helper works for IPv4 and IPv6, TCP and UDP sockets. The
|
||||
* domain (*addr*\ **->sa_family**) must be **AF_INET** (or
|
||||
* **AF_INET6**). Looking for a free port to bind to can be
|
||||
* expensive, therefore binding to port is not permitted by the
|
||||
* helper: *addr*\ **->sin_port** (or **sin6_port**, respectively)
|
||||
* must be set to zero.
|
||||
* **AF_INET6**). It's advised to pass zero port (**sin_port**
|
||||
* or **sin6_port**) which triggers IP_BIND_ADDRESS_NO_PORT-like
|
||||
* behavior and lets the kernel efficiently pick up an unused
|
||||
* port as long as 4-tuple is unique. Passing non-zero port might
|
||||
* lead to degraded performance.
|
||||
* Return
|
||||
* 0 on success, or a negative error in case of failure.
|
||||
*
|
||||
* int bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta)
|
||||
* Description
|
||||
* Adjust (move) *xdp_md*\ **->data_end** by *delta* bytes. It is
|
||||
* only possible to shrink the packet as of this writing,
|
||||
* therefore *delta* must be a negative integer.
|
||||
* possible to both shrink and grow the packet tail.
|
||||
* Shrink done via *delta* being a negative integer.
|
||||
*
|
||||
* A call to this helper is susceptible to change the underlying
|
||||
* packet buffer. Therefore, at load time, all checks on pointers
|
||||
@@ -2256,7 +2316,7 @@ union bpf_attr {
|
||||
* **bpf_rc_keydown**\ () again with the same values, or calling
|
||||
* **bpf_rc_repeat**\ ().
|
||||
*
|
||||
* Some protocols include a toggle bit, in case the button was
|
||||
* Some protocols include a toggle bit, in case the button was
|
||||
* released and pressed again between consecutive scancodes.
|
||||
*
|
||||
* The *ctx* should point to the lirc sample as passed into
|
||||
@@ -2602,7 +2662,6 @@ union bpf_attr {
|
||||
*
|
||||
* *th* points to the start of the TCP header, while *th_len*
|
||||
* contains **sizeof**\ (**struct tcphdr**).
|
||||
*
|
||||
* Return
|
||||
* 0 if *iph* and *th* are a valid SYN cookie ACK, or a negative
|
||||
* error otherwise.
|
||||
@@ -2785,7 +2844,6 @@ union bpf_attr {
|
||||
*
|
||||
* *th* points to the start of the TCP header, while *th_len*
|
||||
* contains the length of the TCP header.
|
||||
*
|
||||
* Return
|
||||
* On success, lower 32 bits hold the generated SYN cookie in
|
||||
* followed by 16 bits which hold the MSS value for that cookie,
|
||||
@@ -2868,7 +2926,7 @@ union bpf_attr {
|
||||
* // size, after checking its boundaries.
|
||||
* }
|
||||
*
|
||||
* In comparison, using **bpf_probe_read_user()** helper here
|
||||
* In comparison, using **bpf_probe_read_user**\ () helper here
|
||||
* instead to read the string would require to estimate the length
|
||||
* at compile time, and would often result in copying more memory
|
||||
* than necessary.
|
||||
@@ -2886,14 +2944,14 @@ union bpf_attr {
|
||||
* int bpf_probe_read_kernel_str(void *dst, u32 size, const void *unsafe_ptr)
|
||||
* Description
|
||||
* Copy a NUL terminated string from an unsafe kernel address *unsafe_ptr*
|
||||
* to *dst*. Same semantics as with bpf_probe_read_user_str() apply.
|
||||
* to *dst*. Same semantics as with **bpf_probe_read_user_str**\ () apply.
|
||||
* Return
|
||||
* On success, the strictly positive length of the string, including
|
||||
* On success, the strictly positive length of the string, including
|
||||
* the trailing NUL character. On error, a negative value.
|
||||
*
|
||||
* int bpf_tcp_send_ack(void *tp, u32 rcv_nxt)
|
||||
* Description
|
||||
* Send out a tcp-ack. *tp* is the in-kernel struct tcp_sock.
|
||||
* Send out a tcp-ack. *tp* is the in-kernel struct **tcp_sock**.
|
||||
* *rcv_nxt* is the ack_seq to be sent out.
|
||||
* Return
|
||||
* 0 on success, or a negative error in case of failure.
|
||||
@@ -2921,19 +2979,19 @@ union bpf_attr {
|
||||
* int bpf_read_branch_records(struct bpf_perf_event_data *ctx, void *buf, u32 size, u64 flags)
|
||||
* Description
|
||||
* For an eBPF program attached to a perf event, retrieve the
|
||||
* branch records (struct perf_branch_entry) associated to *ctx*
|
||||
* and store it in the buffer pointed by *buf* up to size
|
||||
* branch records (**struct perf_branch_entry**) associated to *ctx*
|
||||
* and store it in the buffer pointed by *buf* up to size
|
||||
* *size* bytes.
|
||||
* Return
|
||||
* On success, number of bytes written to *buf*. On error, a
|
||||
* negative value.
|
||||
*
|
||||
* The *flags* can be set to **BPF_F_GET_BRANCH_RECORDS_SIZE** to
|
||||
* instead return the number of bytes required to store all the
|
||||
* instead return the number of bytes required to store all the
|
||||
* branch entries. If this flag is set, *buf* may be NULL.
|
||||
*
|
||||
* **-EINVAL** if arguments invalid or **size** not a multiple
|
||||
* of sizeof(struct perf_branch_entry).
|
||||
* of **sizeof**\ (**struct perf_branch_entry**\ ).
|
||||
*
|
||||
* **-ENOENT** if architecture does not support branch records.
|
||||
*
|
||||
@@ -2941,8 +2999,8 @@ union bpf_attr {
|
||||
* Description
|
||||
* Returns 0 on success, values for *pid* and *tgid* as seen from the current
|
||||
* *namespace* will be returned in *nsdata*.
|
||||
*
|
||||
* On failure, the returned value is one of the following:
|
||||
* Return
|
||||
* 0 on success, or one of the following in case of failure:
|
||||
*
|
||||
* **-EINVAL** if dev and inum supplied don't match dev_t and inode number
|
||||
* with nsfs of current task, or if dev conversion to dev_t lost high bits.
|
||||
@@ -2981,8 +3039,8 @@ union bpf_attr {
|
||||
* a global identifier that can be assumed unique. If *ctx* is
|
||||
* NULL, then the helper returns the cookie for the initial
|
||||
* network namespace. The cookie itself is very similar to that
|
||||
* of bpf_get_socket_cookie() helper, but for network namespaces
|
||||
* instead of sockets.
|
||||
* of **bpf_get_socket_cookie**\ () helper, but for network
|
||||
* namespaces instead of sockets.
|
||||
* Return
|
||||
* A 8-byte long opaque number.
|
||||
*
|
||||
@@ -3017,14 +3075,183 @@ union bpf_attr {
|
||||
*
|
||||
* The *flags* argument must be zero.
|
||||
* Return
|
||||
* 0 on success, or a negative errno in case of failure.
|
||||
* 0 on success, or a negative error in case of failure:
|
||||
*
|
||||
* * **-EINVAL** Unsupported flags specified.
|
||||
* * **-ENOENT** Socket is unavailable for assignment.
|
||||
* * **-ENETUNREACH** Socket is unreachable (wrong netns).
|
||||
* * **-EOPNOTSUPP** Unsupported operation, for example a
|
||||
* call from outside of TC ingress.
|
||||
* * **-ESOCKTNOSUPPORT** Socket type not supported (reuseport).
|
||||
* **-EINVAL** if specified *flags* are not supported.
|
||||
*
|
||||
* **-ENOENT** if the socket is unavailable for assignment.
|
||||
*
|
||||
* **-ENETUNREACH** if the socket is unreachable (wrong netns).
|
||||
*
|
||||
* **-EOPNOTSUPP** if the operation is not supported, for example
|
||||
* a call from outside of TC ingress.
|
||||
*
|
||||
* **-ESOCKTNOSUPPORT** if the socket type is not supported
|
||||
* (reuseport).
|
||||
*
|
||||
* u64 bpf_ktime_get_boot_ns(void)
|
||||
* Description
|
||||
* Return the time elapsed since system boot, in nanoseconds.
|
||||
* Does include the time the system was suspended.
|
||||
* See: **clock_gettime**\ (**CLOCK_BOOTTIME**)
|
||||
* Return
|
||||
* Current *ktime*.
|
||||
*
|
||||
* int bpf_seq_printf(struct seq_file *m, const char *fmt, u32 fmt_size, const void *data, u32 data_len)
|
||||
* Description
|
||||
* **bpf_seq_printf**\ () uses seq_file **seq_printf**\ () to print
|
||||
* out the format string.
|
||||
* The *m* represents the seq_file. The *fmt* and *fmt_size* are for
|
||||
* the format string itself. The *data* and *data_len* are format string
|
||||
* arguments. The *data* are a **u64** array and corresponding format string
|
||||
* values are stored in the array. For strings and pointers where pointees
|
||||
* are accessed, only the pointer values are stored in the *data* array.
|
||||
* The *data_len* is the size of *data* in bytes.
|
||||
*
|
||||
* Formats **%s**, **%p{i,I}{4,6}** requires to read kernel memory.
|
||||
* Reading kernel memory may fail due to either invalid address or
|
||||
* valid address but requiring a major memory fault. If reading kernel memory
|
||||
* fails, the string for **%s** will be an empty string, and the ip
|
||||
* address for **%p{i,I}{4,6}** will be 0. Not returning error to
|
||||
* bpf program is consistent with what **bpf_trace_printk**\ () does for now.
|
||||
* Return
|
||||
* 0 on success, or a negative error in case of failure:
|
||||
*
|
||||
* **-EBUSY** if per-CPU memory copy buffer is busy, can try again
|
||||
* by returning 1 from bpf program.
|
||||
*
|
||||
* **-EINVAL** if arguments are invalid, or if *fmt* is invalid/unsupported.
|
||||
*
|
||||
* **-E2BIG** if *fmt* contains too many format specifiers.
|
||||
*
|
||||
* **-EOVERFLOW** if an overflow happened: The same object will be tried again.
|
||||
*
|
||||
* int bpf_seq_write(struct seq_file *m, const void *data, u32 len)
|
||||
* Description
|
||||
* **bpf_seq_write**\ () uses seq_file **seq_write**\ () to write the data.
|
||||
* The *m* represents the seq_file. The *data* and *len* represent the
|
||||
* data to write in bytes.
|
||||
* Return
|
||||
* 0 on success, or a negative error in case of failure:
|
||||
*
|
||||
* **-EOVERFLOW** if an overflow happened: The same object will be tried again.
|
||||
*
|
||||
* u64 bpf_sk_cgroup_id(struct bpf_sock *sk)
|
||||
* Description
|
||||
* Return the cgroup v2 id of the socket *sk*.
|
||||
*
|
||||
* *sk* must be a non-**NULL** pointer to a full socket, e.g. one
|
||||
* returned from **bpf_sk_lookup_xxx**\ (),
|
||||
* **bpf_sk_fullsock**\ (), etc. The format of returned id is
|
||||
* same as in **bpf_skb_cgroup_id**\ ().
|
||||
*
|
||||
* This helper is available only if the kernel was compiled with
|
||||
* the **CONFIG_SOCK_CGROUP_DATA** configuration option.
|
||||
* Return
|
||||
* The id is returned or 0 in case the id could not be retrieved.
|
||||
*
|
||||
* u64 bpf_sk_ancestor_cgroup_id(struct bpf_sock *sk, int ancestor_level)
|
||||
* Description
|
||||
* Return id of cgroup v2 that is ancestor of cgroup associated
|
||||
* with the *sk* at the *ancestor_level*. The root cgroup is at
|
||||
* *ancestor_level* zero and each step down the hierarchy
|
||||
* increments the level. If *ancestor_level* == level of cgroup
|
||||
* associated with *sk*, then return value will be same as that
|
||||
* of **bpf_sk_cgroup_id**\ ().
|
||||
*
|
||||
* The helper is useful to implement policies based on cgroups
|
||||
* that are upper in hierarchy than immediate cgroup associated
|
||||
* with *sk*.
|
||||
*
|
||||
* The format of returned id and helper limitations are same as in
|
||||
* **bpf_sk_cgroup_id**\ ().
|
||||
* Return
|
||||
* The id is returned or 0 in case the id could not be retrieved.
|
||||
*
|
||||
* int bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags)
|
||||
* Description
|
||||
* Copy *size* bytes from *data* into a ring buffer *ringbuf*.
|
||||
* If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of
|
||||
* new data availability is sent.
|
||||
* IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of
|
||||
* new data availability is sent unconditionally.
|
||||
* Return
|
||||
* 0, on success;
|
||||
* < 0, on error.
|
||||
*
|
||||
* void *bpf_ringbuf_reserve(void *ringbuf, u64 size, u64 flags)
|
||||
* Description
|
||||
* Reserve *size* bytes of payload in a ring buffer *ringbuf*.
|
||||
* Return
|
||||
* Valid pointer with *size* bytes of memory available; NULL,
|
||||
* otherwise.
|
||||
*
|
||||
* void bpf_ringbuf_submit(void *data, u64 flags)
|
||||
* Description
|
||||
* Submit reserved ring buffer sample, pointed to by *data*.
|
||||
* If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of
|
||||
* new data availability is sent.
|
||||
* IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of
|
||||
* new data availability is sent unconditionally.
|
||||
* Return
|
||||
* Nothing. Always succeeds.
|
||||
*
|
||||
* void bpf_ringbuf_discard(void *data, u64 flags)
|
||||
* Description
|
||||
* Discard reserved ring buffer sample, pointed to by *data*.
|
||||
* If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of
|
||||
* new data availability is sent.
|
||||
* IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of
|
||||
* new data availability is sent unconditionally.
|
||||
* Return
|
||||
* Nothing. Always succeeds.
|
||||
*
|
||||
* u64 bpf_ringbuf_query(void *ringbuf, u64 flags)
|
||||
* Description
|
||||
* Query various characteristics of provided ring buffer. What
|
||||
* exactly is queries is determined by *flags*:
|
||||
* - BPF_RB_AVAIL_DATA - amount of data not yet consumed;
|
||||
* - BPF_RB_RING_SIZE - the size of ring buffer;
|
||||
* - BPF_RB_CONS_POS - consumer position (can wrap around);
|
||||
* - BPF_RB_PROD_POS - producer(s) position (can wrap around);
|
||||
* Data returned is just a momentary snapshots of actual values
|
||||
* and could be inaccurate, so this facility should be used to
|
||||
* power heuristics and for reporting, not to make 100% correct
|
||||
* calculation.
|
||||
* Return
|
||||
* Requested value, or 0, if flags are not recognized.
|
||||
*
|
||||
* int bpf_csum_level(struct sk_buff *skb, u64 level)
|
||||
* Description
|
||||
* Change the skbs checksum level by one layer up or down, or
|
||||
* reset it entirely to none in order to have the stack perform
|
||||
* checksum validation. The level is applicable to the following
|
||||
* protocols: TCP, UDP, GRE, SCTP, FCOE. For example, a decap of
|
||||
* | ETH | IP | UDP | GUE | IP | TCP | into | ETH | IP | TCP |
|
||||
* through **bpf_skb_adjust_room**\ () helper with passing in
|
||||
* **BPF_F_ADJ_ROOM_NO_CSUM_RESET** flag would require one call
|
||||
* to **bpf_csum_level**\ () with **BPF_CSUM_LEVEL_DEC** since
|
||||
* the UDP header is removed. Similarly, an encap of the latter
|
||||
* into the former could be accompanied by a helper call to
|
||||
* **bpf_csum_level**\ () with **BPF_CSUM_LEVEL_INC** if the
|
||||
* skb is still intended to be processed in higher layers of the
|
||||
* stack instead of just egressing at tc.
|
||||
*
|
||||
* There are three supported level settings at this time:
|
||||
*
|
||||
* * **BPF_CSUM_LEVEL_INC**: Increases skb->csum_level for skbs
|
||||
* with CHECKSUM_UNNECESSARY.
|
||||
* * **BPF_CSUM_LEVEL_DEC**: Decreases skb->csum_level for skbs
|
||||
* with CHECKSUM_UNNECESSARY.
|
||||
* * **BPF_CSUM_LEVEL_RESET**: Resets skb->csum_level to 0 and
|
||||
* sets CHECKSUM_NONE to force checksum validation by the stack.
|
||||
* * **BPF_CSUM_LEVEL_QUERY**: No-op, returns the current
|
||||
* skb->csum_level.
|
||||
* Return
|
||||
* 0 on success, or a negative error in case of failure. In the
|
||||
* case of **BPF_CSUM_LEVEL_QUERY**, the current skb->csum_level
|
||||
* is returned or the error code -EACCES in case the skb is not
|
||||
* subject to CHECKSUM_UNNECESSARY.
|
||||
*/
|
||||
#define __BPF_FUNC_MAPPER(FN) \
|
||||
FN(unspec), \
|
||||
@@ -3151,7 +3378,18 @@ union bpf_attr {
|
||||
FN(xdp_output), \
|
||||
FN(get_netns_cookie), \
|
||||
FN(get_current_ancestor_cgroup_id), \
|
||||
FN(sk_assign),
|
||||
FN(sk_assign), \
|
||||
FN(ktime_get_boot_ns), \
|
||||
FN(seq_printf), \
|
||||
FN(seq_write), \
|
||||
FN(sk_cgroup_id), \
|
||||
FN(sk_ancestor_cgroup_id), \
|
||||
FN(ringbuf_output), \
|
||||
FN(ringbuf_reserve), \
|
||||
FN(ringbuf_submit), \
|
||||
FN(ringbuf_discard), \
|
||||
FN(ringbuf_query), \
|
||||
FN(csum_level),
|
||||
|
||||
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
|
||||
* function eBPF program intends to call
|
||||
@@ -3228,6 +3466,14 @@ enum {
|
||||
BPF_F_CURRENT_NETNS = (-1L),
|
||||
};
|
||||
|
||||
/* BPF_FUNC_csum_level level values. */
|
||||
enum {
|
||||
BPF_CSUM_LEVEL_QUERY,
|
||||
BPF_CSUM_LEVEL_INC,
|
||||
BPF_CSUM_LEVEL_DEC,
|
||||
BPF_CSUM_LEVEL_RESET,
|
||||
};
|
||||
|
||||
/* BPF_FUNC_skb_adjust_room flags. */
|
||||
enum {
|
||||
BPF_F_ADJ_ROOM_FIXED_GSO = (1ULL << 0),
|
||||
@@ -3235,6 +3481,7 @@ enum {
|
||||
BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 = (1ULL << 2),
|
||||
BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3),
|
||||
BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4),
|
||||
BPF_F_ADJ_ROOM_NO_CSUM_RESET = (1ULL << 5),
|
||||
};
|
||||
|
||||
enum {
|
||||
@@ -3261,6 +3508,29 @@ enum {
|
||||
BPF_F_GET_BRANCH_RECORDS_SIZE = (1ULL << 0),
|
||||
};
|
||||
|
||||
/* BPF_FUNC_bpf_ringbuf_commit, BPF_FUNC_bpf_ringbuf_discard, and
|
||||
* BPF_FUNC_bpf_ringbuf_output flags.
|
||||
*/
|
||||
enum {
|
||||
BPF_RB_NO_WAKEUP = (1ULL << 0),
|
||||
BPF_RB_FORCE_WAKEUP = (1ULL << 1),
|
||||
};
|
||||
|
||||
/* BPF_FUNC_bpf_ringbuf_query flags */
|
||||
enum {
|
||||
BPF_RB_AVAIL_DATA = 0,
|
||||
BPF_RB_RING_SIZE = 1,
|
||||
BPF_RB_CONS_POS = 2,
|
||||
BPF_RB_PROD_POS = 3,
|
||||
};
|
||||
|
||||
/* BPF ring buffer constants */
|
||||
enum {
|
||||
BPF_RINGBUF_BUSY_BIT = (1U << 31),
|
||||
BPF_RINGBUF_DISCARD_BIT = (1U << 30),
|
||||
BPF_RINGBUF_HDR_SZ = 8,
|
||||
};
|
||||
|
||||
/* Mode for BPF_FUNC_skb_adjust_room helper. */
|
||||
enum bpf_adj_room_mode {
|
||||
BPF_ADJ_ROOM_NET,
|
||||
@@ -3393,6 +3663,7 @@ struct bpf_sock {
|
||||
__u32 dst_ip4;
|
||||
__u32 dst_ip6[4];
|
||||
__u32 state;
|
||||
__s32 rx_queue_mapping;
|
||||
};
|
||||
|
||||
struct bpf_tcp_sock {
|
||||
@@ -3486,6 +3757,21 @@ struct xdp_md {
|
||||
/* Below access go through struct xdp_rxq_info */
|
||||
__u32 ingress_ifindex; /* rxq->dev->ifindex */
|
||||
__u32 rx_queue_index; /* rxq->queue_index */
|
||||
|
||||
__u32 egress_ifindex; /* txq->dev->ifindex */
|
||||
};
|
||||
|
||||
/* DEVMAP map-value layout
|
||||
*
|
||||
* The struct data-layout of map-value is a configuration interface.
|
||||
* New members can only be added to the end of this structure.
|
||||
*/
|
||||
struct bpf_devmap_val {
|
||||
__u32 ifindex; /* device index */
|
||||
union {
|
||||
int fd; /* prog fd on map write */
|
||||
__u32 id; /* prog id on map read */
|
||||
} bpf_prog;
|
||||
};
|
||||
|
||||
enum sk_action {
|
||||
@@ -3508,6 +3794,8 @@ struct sk_msg_md {
|
||||
__u32 remote_port; /* Stored in network byte order */
|
||||
__u32 local_port; /* stored in host byte order */
|
||||
__u32 size; /* Total size of sk_msg */
|
||||
|
||||
__bpf_md_ptr(struct bpf_sock *, sk); /* current socket */
|
||||
};
|
||||
|
||||
struct sk_reuseport_md {
|
||||
@@ -3598,6 +3886,29 @@ struct bpf_btf_info {
|
||||
__u32 id;
|
||||
} __attribute__((aligned(8)));
|
||||
|
||||
struct bpf_link_info {
|
||||
__u32 type;
|
||||
__u32 id;
|
||||
__u32 prog_id;
|
||||
union {
|
||||
struct {
|
||||
__aligned_u64 tp_name; /* in/out: tp_name buffer ptr */
|
||||
__u32 tp_name_len; /* in/out: tp_name buffer len */
|
||||
} raw_tracepoint;
|
||||
struct {
|
||||
__u32 attach_type;
|
||||
} tracing;
|
||||
struct {
|
||||
__u64 cgroup_id;
|
||||
__u32 attach_type;
|
||||
} cgroup;
|
||||
struct {
|
||||
__u32 netns_ino;
|
||||
__u32 attach_type;
|
||||
} netns;
|
||||
};
|
||||
} __attribute__((aligned(8)));
|
||||
|
||||
/* User bpf_sock_addr struct to access socket fields and sockaddr struct passed
|
||||
* by user and intended to be used by socket (e.g. to bind to, depends on
|
||||
* attach attach type).
|
||||
@@ -3610,7 +3921,7 @@ struct bpf_sock_addr {
|
||||
__u32 user_ip6[4]; /* Allows 1,2,4,8-byte read and 4,8-byte write.
|
||||
* Stored in network byte order.
|
||||
*/
|
||||
__u32 user_port; /* Allows 4-byte read and write.
|
||||
__u32 user_port; /* Allows 1,2,4-byte read and 4-byte write.
|
||||
* Stored in network byte order
|
||||
*/
|
||||
__u32 family; /* Allows 4-byte read, but no write */
|
||||
|
||||
@@ -343,6 +343,7 @@ enum {
|
||||
IFLA_BRPORT_NEIGH_SUPPRESS,
|
||||
IFLA_BRPORT_ISOLATED,
|
||||
IFLA_BRPORT_BACKUP_PORT,
|
||||
IFLA_BRPORT_MRP_RING_OPEN,
|
||||
__IFLA_BRPORT_MAX
|
||||
};
|
||||
#define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
|
||||
|
||||
@@ -85,36 +85,6 @@ commit_signature()
|
||||
git show --pretty='("%s")|%aI|%b' --shortstat $1 -- ${2-.} | tr '\n' '|'
|
||||
}
|
||||
|
||||
# Validate there are no non-empty merges (we can't handle them)
|
||||
# $1 - baseline tag
|
||||
# $2 - tip tag
|
||||
validate_merges()
|
||||
{
|
||||
local baseline_tag=$1
|
||||
local tip_tag=$2
|
||||
local new_merges
|
||||
local merge_change_cnt
|
||||
local ignore_merge_resolutions
|
||||
local desc
|
||||
|
||||
new_merges=$(git rev-list --merges --topo-order --reverse ${baseline_tag}..${tip_tag} ${LIBBPF_PATHS[@]})
|
||||
for new_merge in ${new_merges}; do
|
||||
desc=$(commit_desc ${new_merge})
|
||||
echo "MERGE: ${desc}"
|
||||
merge_change_cnt=$(git show --format='' ${new_merge} | wc -l)
|
||||
if ((${merge_change_cnt} > 0)); then
|
||||
read -p "Merge '${desc}' is non-empty, which will cause conflicts! Do you want to proceed? [y/N]: " ignore_merge_resolutions
|
||||
case "${ignore_merge_resolutions}" in
|
||||
"y" | "Y")
|
||||
echo "Skipping '${desc}'..."
|
||||
continue
|
||||
;;
|
||||
esac
|
||||
exit 3
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
# Cherry-pick commits touching libbpf-related files
|
||||
# $1 - baseline_tag
|
||||
# $2 - tip_tag
|
||||
@@ -243,18 +213,14 @@ git branch ${BPF_TIP_TAG} ${BPF_TIP_COMMIT}
|
||||
git branch ${SQUASH_BASE_TAG} ${SQUASH_COMMIT}
|
||||
git checkout -b ${SQUASH_TIP_TAG} ${SQUASH_COMMIT}
|
||||
|
||||
# Validate there are no non-empty merges in bpf-next and bpf trees
|
||||
validate_merges ${BASELINE_TAG} ${TIP_TAG}
|
||||
validate_merges ${BPF_BASELINE_TAG} ${BPF_TIP_TAG}
|
||||
|
||||
# Cherry-pick new commits onto squashed baseline commit
|
||||
cherry_pick_commits ${BASELINE_TAG} ${TIP_TAG}
|
||||
cherry_pick_commits ${BPF_BASELINE_TAG} ${BPF_TIP_TAG}
|
||||
|
||||
# Move all libbpf files into __libbpf directory.
|
||||
git filter-branch --prune-empty -f --tree-filter "${LIBBPF_TREE_FILTER}" ${SQUASH_TIP_TAG} ${SQUASH_BASE_TAG}
|
||||
FILTER_BRANCH_SQUELCH_WARNING=1 git filter-branch --prune-empty -f --tree-filter "${LIBBPF_TREE_FILTER}" ${SQUASH_TIP_TAG} ${SQUASH_BASE_TAG}
|
||||
# Make __libbpf a new root directory
|
||||
git filter-branch --prune-empty -f --subdirectory-filter __libbpf ${SQUASH_TIP_TAG} ${SQUASH_BASE_TAG}
|
||||
FILTER_BRANCH_SQUELCH_WARNING=1 git filter-branch --prune-empty -f --subdirectory-filter __libbpf ${SQUASH_TIP_TAG} ${SQUASH_BASE_TAG}
|
||||
|
||||
# If there are no new commits with libbpf-related changes, bail out
|
||||
COMMIT_CNT=$(git rev-list --count ${SQUASH_BASE_TAG}..${SQUASH_TIP_TAG})
|
||||
@@ -318,8 +284,8 @@ echo "Verifying Linux's and Github's libbpf state"
|
||||
|
||||
cd_to ${LINUX_REPO}
|
||||
git checkout -b ${VIEW_TAG} ${TIP_COMMIT}
|
||||
git filter-branch -f --tree-filter "${LIBBPF_TREE_FILTER}" ${VIEW_TAG}^..${VIEW_TAG}
|
||||
git filter-branch -f --subdirectory-filter __libbpf ${VIEW_TAG}^..${VIEW_TAG}
|
||||
FILTER_BRANCH_SQUELCH_WARNING=1 git filter-branch -f --tree-filter "${LIBBPF_TREE_FILTER}" ${VIEW_TAG}^..${VIEW_TAG}
|
||||
FILTER_BRANCH_SQUELCH_WARNING=1 git filter-branch -f --subdirectory-filter __libbpf ${VIEW_TAG}^..${VIEW_TAG}
|
||||
git ls-files -- ${LIBBPF_VIEW_PATHS[@]} > ${TMP_DIR}/linux-view.ls
|
||||
|
||||
cd_to ${LIBBPF_REPO}
|
||||
|
||||
@@ -33,7 +33,7 @@ SHARED_OBJDIR := $(OBJDIR)/sharedobjs
|
||||
STATIC_OBJDIR := $(OBJDIR)/staticobjs
|
||||
OBJS := bpf.o btf.o libbpf.o libbpf_errno.o netlink.o \
|
||||
nlattr.o str_error.o libbpf_probes.o bpf_prog_linfo.o xsk.o \
|
||||
btf_dump.o hashmap.o
|
||||
btf_dump.o hashmap.o ringbuf.o
|
||||
SHARED_OBJS := $(addprefix $(SHARED_OBJDIR)/,$(OBJS))
|
||||
STATIC_OBJS := $(addprefix $(STATIC_OBJDIR)/,$(OBJS))
|
||||
|
||||
|
||||
39
src/bpf.c
39
src/bpf.c
@@ -619,6 +619,16 @@ int bpf_link_update(int link_fd, int new_prog_fd,
|
||||
return sys_bpf(BPF_LINK_UPDATE, &attr, sizeof(attr));
|
||||
}
|
||||
|
||||
int bpf_iter_create(int link_fd)
|
||||
{
|
||||
union bpf_attr attr;
|
||||
|
||||
memset(&attr, 0, sizeof(attr));
|
||||
attr.iter_create.link_fd = link_fd;
|
||||
|
||||
return sys_bpf(BPF_ITER_CREATE, &attr, sizeof(attr));
|
||||
}
|
||||
|
||||
int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
|
||||
__u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt)
|
||||
{
|
||||
@@ -721,6 +731,11 @@ int bpf_btf_get_next_id(__u32 start_id, __u32 *next_id)
|
||||
return bpf_obj_get_next_id(start_id, next_id, BPF_BTF_GET_NEXT_ID);
|
||||
}
|
||||
|
||||
int bpf_link_get_next_id(__u32 start_id, __u32 *next_id)
|
||||
{
|
||||
return bpf_obj_get_next_id(start_id, next_id, BPF_LINK_GET_NEXT_ID);
|
||||
}
|
||||
|
||||
int bpf_prog_get_fd_by_id(__u32 id)
|
||||
{
|
||||
union bpf_attr attr;
|
||||
@@ -751,13 +766,23 @@ int bpf_btf_get_fd_by_id(__u32 id)
|
||||
return sys_bpf(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr));
|
||||
}
|
||||
|
||||
int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len)
|
||||
int bpf_link_get_fd_by_id(__u32 id)
|
||||
{
|
||||
union bpf_attr attr;
|
||||
|
||||
memset(&attr, 0, sizeof(attr));
|
||||
attr.link_id = id;
|
||||
|
||||
return sys_bpf(BPF_LINK_GET_FD_BY_ID, &attr, sizeof(attr));
|
||||
}
|
||||
|
||||
int bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len)
|
||||
{
|
||||
union bpf_attr attr;
|
||||
int err;
|
||||
|
||||
memset(&attr, 0, sizeof(attr));
|
||||
attr.info.bpf_fd = prog_fd;
|
||||
attr.info.bpf_fd = bpf_fd;
|
||||
attr.info.info_len = *info_len;
|
||||
attr.info.info = ptr_to_u64(info);
|
||||
|
||||
@@ -826,3 +851,13 @@ int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len,
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
int bpf_enable_stats(enum bpf_stats_type type)
|
||||
{
|
||||
union bpf_attr attr;
|
||||
|
||||
memset(&attr, 0, sizeof(attr));
|
||||
attr.enable_stats.type = type;
|
||||
|
||||
return sys_bpf(BPF_ENABLE_STATS, &attr, sizeof(attr));
|
||||
}
|
||||
|
||||
@@ -187,6 +187,8 @@ struct bpf_link_update_opts {
|
||||
LIBBPF_API int bpf_link_update(int link_fd, int new_prog_fd,
|
||||
const struct bpf_link_update_opts *opts);
|
||||
|
||||
LIBBPF_API int bpf_iter_create(int link_fd);
|
||||
|
||||
struct bpf_prog_test_run_attr {
|
||||
int prog_fd;
|
||||
int repeat;
|
||||
@@ -216,10 +218,12 @@ LIBBPF_API int bpf_prog_test_run(int prog_fd, int repeat, void *data,
|
||||
LIBBPF_API int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id);
|
||||
LIBBPF_API int bpf_map_get_next_id(__u32 start_id, __u32 *next_id);
|
||||
LIBBPF_API int bpf_btf_get_next_id(__u32 start_id, __u32 *next_id);
|
||||
LIBBPF_API int bpf_link_get_next_id(__u32 start_id, __u32 *next_id);
|
||||
LIBBPF_API int bpf_prog_get_fd_by_id(__u32 id);
|
||||
LIBBPF_API int bpf_map_get_fd_by_id(__u32 id);
|
||||
LIBBPF_API int bpf_btf_get_fd_by_id(__u32 id);
|
||||
LIBBPF_API int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len);
|
||||
LIBBPF_API int bpf_link_get_fd_by_id(__u32 id);
|
||||
LIBBPF_API int bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len);
|
||||
LIBBPF_API int bpf_prog_query(int target_fd, enum bpf_attach_type type,
|
||||
__u32 query_flags, __u32 *attach_flags,
|
||||
__u32 *prog_ids, __u32 *prog_cnt);
|
||||
@@ -230,6 +234,9 @@ LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf,
|
||||
__u32 *buf_len, __u32 *prog_id, __u32 *fd_type,
|
||||
__u64 *probe_offset, __u64 *probe_addr);
|
||||
|
||||
enum bpf_stats_type; /* defined in up-to-date linux/bpf.h */
|
||||
LIBBPF_API int bpf_enable_stats(enum bpf_stats_type type);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
@@ -18,6 +18,7 @@ struct pt_regs;
|
||||
struct sk_reuseport_md;
|
||||
struct sockaddr;
|
||||
struct tcphdr;
|
||||
struct seq_file;
|
||||
struct __sk_buff;
|
||||
struct sk_msg_md;
|
||||
struct xdp_md;
|
||||
@@ -71,8 +72,8 @@ static int (*bpf_map_delete_elem)(void *map, const void *key) = (void *) 3;
|
||||
* For tracing programs, safely attempt to read *size* bytes from
|
||||
* kernel space address *unsafe_ptr* and store the data in *dst*.
|
||||
*
|
||||
* Generally, use bpf_probe_read_user() or bpf_probe_read_kernel()
|
||||
* instead.
|
||||
* Generally, use **bpf_probe_read_user**\ () or
|
||||
* **bpf_probe_read_kernel**\ () instead.
|
||||
*
|
||||
* Returns
|
||||
* 0 on success, or a negative error in case of failure.
|
||||
@@ -83,6 +84,8 @@ static int (*bpf_probe_read)(void *dst, __u32 size, const void *unsafe_ptr) = (v
|
||||
* bpf_ktime_get_ns
|
||||
*
|
||||
* Return the time elapsed since system boot, in nanoseconds.
|
||||
* Does not include time the system was suspended.
|
||||
* See: **clock_gettime**\ (**CLOCK_MONOTONIC**)
|
||||
*
|
||||
* Returns
|
||||
* Current *ktime*.
|
||||
@@ -1104,11 +1107,11 @@ static int (*bpf_xdp_adjust_head)(struct xdp_md *xdp_md, int delta) = (void *) 4
|
||||
* bpf_probe_read_str
|
||||
*
|
||||
* Copy a NUL terminated string from an unsafe kernel address
|
||||
* *unsafe_ptr* to *dst*. See bpf_probe_read_kernel_str() for
|
||||
* *unsafe_ptr* to *dst*. See **bpf_probe_read_kernel_str**\ () for
|
||||
* more details.
|
||||
*
|
||||
* Generally, use bpf_probe_read_user_str() or bpf_probe_read_kernel_str()
|
||||
* instead.
|
||||
* Generally, use **bpf_probe_read_user_str**\ () or
|
||||
* **bpf_probe_read_kernel_str**\ () instead.
|
||||
*
|
||||
* Returns
|
||||
* On success, the strictly positive length of the string,
|
||||
@@ -1167,6 +1170,12 @@ static __u32 (*bpf_set_hash)(struct __sk_buff *skb, __u32 hash) = (void *) 48;
|
||||
* must be specified, see **setsockopt(2)** for more information.
|
||||
* The option value of length *optlen* is pointed by *optval*.
|
||||
*
|
||||
* *bpf_socket* should be one of the following:
|
||||
*
|
||||
* * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**.
|
||||
* * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**
|
||||
* and **BPF_CGROUP_INET6_CONNECT**.
|
||||
*
|
||||
* This helper actually implements a subset of **setsockopt()**.
|
||||
* It supports the following *level*\ s:
|
||||
*
|
||||
@@ -1182,7 +1191,7 @@ static __u32 (*bpf_set_hash)(struct __sk_buff *skb, __u32 hash) = (void *) 48;
|
||||
* Returns
|
||||
* 0 on success, or a negative error in case of failure.
|
||||
*/
|
||||
static int (*bpf_setsockopt)(struct bpf_sock_ops *bpf_socket, int level, int optname, void *optval, int optlen) = (void *) 49;
|
||||
static int (*bpf_setsockopt)(void *bpf_socket, int level, int optname, void *optval, int optlen) = (void *) 49;
|
||||
|
||||
/*
|
||||
* bpf_skb_adjust_room
|
||||
@@ -1190,6 +1199,13 @@ static int (*bpf_setsockopt)(struct bpf_sock_ops *bpf_socket, int level, int opt
|
||||
* Grow or shrink the room for data in the packet associated to
|
||||
* *skb* by *len_diff*, and according to the selected *mode*.
|
||||
*
|
||||
* By default, the helper will reset any offloaded checksum
|
||||
* indicator of the skb to CHECKSUM_NONE. This can be avoided
|
||||
* by the following flag:
|
||||
*
|
||||
* * **BPF_F_ADJ_ROOM_NO_CSUM_RESET**: Do not reset offloaded
|
||||
* checksum data of the skb to CHECKSUM_NONE.
|
||||
*
|
||||
* There are two supported modes at this time:
|
||||
*
|
||||
* * **BPF_ADJ_ROOM_MAC**: Adjust room at the mac layer
|
||||
@@ -1239,16 +1255,16 @@ static int (*bpf_skb_adjust_room)(struct __sk_buff *skb, __s32 len_diff, __u32 m
|
||||
*
|
||||
* The lower two bits of *flags* are used as the return code if
|
||||
* the map lookup fails. This is so that the return value can be
|
||||
* one of the XDP program return codes up to XDP_TX, as chosen by
|
||||
* the caller. Any higher bits in the *flags* argument must be
|
||||
* one of the XDP program return codes up to **XDP_TX**, as chosen
|
||||
* by the caller. Any higher bits in the *flags* argument must be
|
||||
* unset.
|
||||
*
|
||||
* See also bpf_redirect(), which only supports redirecting to an
|
||||
* ifindex, but doesn't require a map to do so.
|
||||
* See also **bpf_redirect**\ (), which only supports redirecting
|
||||
* to an ifindex, but doesn't require a map to do so.
|
||||
*
|
||||
* Returns
|
||||
* **XDP_REDIRECT** on success, or the value of the two lower bits
|
||||
* of the **flags* argument on error.
|
||||
* of the *flags* argument on error.
|
||||
*/
|
||||
static int (*bpf_redirect_map)(void *map, __u32 key, __u64 flags) = (void *) 51;
|
||||
|
||||
@@ -1368,7 +1384,7 @@ static int (*bpf_xdp_adjust_meta)(struct xdp_md *xdp_md, int delta) = (void *) 5
|
||||
* the time running for event since last normalization. The
|
||||
* enabled and running times are accumulated since the perf event
|
||||
* open. To achieve scaling factor between two invocations of an
|
||||
* eBPF program, users can can use CPU id as the key (which is
|
||||
* eBPF program, users can use CPU id as the key (which is
|
||||
* typical for perf array usage model) to remember the previous
|
||||
* value and do the calculation inside the eBPF program.
|
||||
*
|
||||
@@ -1402,6 +1418,12 @@ static int (*bpf_perf_prog_read_value)(struct bpf_perf_event_data *ctx, struct b
|
||||
* The retrieved value is stored in the structure pointed by
|
||||
* *opval* and of length *optlen*.
|
||||
*
|
||||
* *bpf_socket* should be one of the following:
|
||||
*
|
||||
* * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**.
|
||||
* * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**
|
||||
* and **BPF_CGROUP_INET6_CONNECT**.
|
||||
*
|
||||
* This helper actually implements a subset of **getsockopt()**.
|
||||
* It supports the following *level*\ s:
|
||||
*
|
||||
@@ -1413,7 +1435,7 @@ static int (*bpf_perf_prog_read_value)(struct bpf_perf_event_data *ctx, struct b
|
||||
* Returns
|
||||
* 0 on success, or a negative error in case of failure.
|
||||
*/
|
||||
static int (*bpf_getsockopt)(struct bpf_sock_ops *bpf_socket, int level, int optname, void *optval, int optlen) = (void *) 57;
|
||||
static int (*bpf_getsockopt)(void *bpf_socket, int level, int optname, void *optval, int optlen) = (void *) 57;
|
||||
|
||||
/*
|
||||
* bpf_override_return
|
||||
@@ -1423,7 +1445,7 @@ static int (*bpf_getsockopt)(struct bpf_sock_ops *bpf_socket, int level, int opt
|
||||
* The first argument is the context *regs* on which the kprobe
|
||||
* works.
|
||||
*
|
||||
* This helper works by setting setting the PC (program counter)
|
||||
* This helper works by setting the PC (program counter)
|
||||
* to an override function which is run in place of the original
|
||||
* probed function. This means the probed function is not run at
|
||||
* all. The replacement function just returns with the required
|
||||
@@ -1616,10 +1638,11 @@ static int (*bpf_msg_pull_data)(struct sk_msg_md *msg, __u32 start, __u32 end, _
|
||||
*
|
||||
* This helper works for IPv4 and IPv6, TCP and UDP sockets. The
|
||||
* domain (*addr*\ **->sa_family**) must be **AF_INET** (or
|
||||
* **AF_INET6**). Looking for a free port to bind to can be
|
||||
* expensive, therefore binding to port is not permitted by the
|
||||
* helper: *addr*\ **->sin_port** (or **sin6_port**, respectively)
|
||||
* must be set to zero.
|
||||
* **AF_INET6**). It's advised to pass zero port (**sin_port**
|
||||
* or **sin6_port**) which triggers IP_BIND_ADDRESS_NO_PORT-like
|
||||
* behavior and lets the kernel efficiently pick up an unused
|
||||
* port as long as 4-tuple is unique. Passing non-zero port might
|
||||
* lead to degraded performance.
|
||||
*
|
||||
* Returns
|
||||
* 0 on success, or a negative error in case of failure.
|
||||
@@ -1630,8 +1653,8 @@ static int (*bpf_bind)(struct bpf_sock_addr *ctx, struct sockaddr *addr, int add
|
||||
* bpf_xdp_adjust_tail
|
||||
*
|
||||
* Adjust (move) *xdp_md*\ **->data_end** by *delta* bytes. It is
|
||||
* only possible to shrink the packet as of this writing,
|
||||
* therefore *delta* must be a negative integer.
|
||||
* possible to both shrink and grow the packet tail.
|
||||
* Shrink done via *delta* being a negative integer.
|
||||
*
|
||||
* A call to this helper is susceptible to change the underlying
|
||||
* packet buffer. Therefore, at load time, all checks on pointers
|
||||
@@ -1969,7 +1992,7 @@ static int (*bpf_rc_repeat)(void *ctx) = (void *) 77;
|
||||
* **bpf_rc_keydown**\ () again with the same values, or calling
|
||||
* **bpf_rc_repeat**\ ().
|
||||
*
|
||||
* Some protocols include a toggle bit, in case the button was
|
||||
* Some protocols include a toggle bit, in case the button was
|
||||
* released and pressed again between consecutive scancodes.
|
||||
*
|
||||
* The *ctx* should point to the lirc sample as passed into
|
||||
@@ -2405,7 +2428,6 @@ static struct bpf_sock *(*bpf_skc_lookup_tcp)(void *ctx, struct bpf_sock_tuple *
|
||||
* *th* points to the start of the TCP header, while *th_len*
|
||||
* contains **sizeof**\ (**struct tcphdr**).
|
||||
*
|
||||
*
|
||||
* Returns
|
||||
* 0 if *iph* and *th* are a valid SYN cookie ACK, or a negative
|
||||
* error otherwise.
|
||||
@@ -2628,7 +2650,6 @@ static int (*bpf_send_signal)(__u32 sig) = (void *) 109;
|
||||
* *th* points to the start of the TCP header, while *th_len*
|
||||
* contains the length of the TCP header.
|
||||
*
|
||||
*
|
||||
* Returns
|
||||
* On success, lower 32 bits hold the generated SYN cookie in
|
||||
* followed by 16 bits which hold the MSS value for that cookie,
|
||||
@@ -2726,7 +2747,7 @@ static int (*bpf_probe_read_kernel)(void *dst, __u32 size, const void *unsafe_pt
|
||||
* // size, after checking its boundaries.
|
||||
* }
|
||||
*
|
||||
* In comparison, using **bpf_probe_read_user()** helper here
|
||||
* In comparison, using **bpf_probe_read_user**\ () helper here
|
||||
* instead to read the string would require to estimate the length
|
||||
* at compile time, and would often result in copying more memory
|
||||
* than necessary.
|
||||
@@ -2748,10 +2769,10 @@ static int (*bpf_probe_read_user_str)(void *dst, __u32 size, const void *unsafe_
|
||||
* bpf_probe_read_kernel_str
|
||||
*
|
||||
* Copy a NUL terminated string from an unsafe kernel address *unsafe_ptr*
|
||||
* to *dst*. Same semantics as with bpf_probe_read_user_str() apply.
|
||||
* to *dst*. Same semantics as with **bpf_probe_read_user_str**\ () apply.
|
||||
*
|
||||
* Returns
|
||||
* On success, the strictly positive length of the string, including
|
||||
* On success, the strictly positive length of the string, including
|
||||
* the trailing NUL character. On error, a negative value.
|
||||
*/
|
||||
static int (*bpf_probe_read_kernel_str)(void *dst, __u32 size, const void *unsafe_ptr) = (void *) 115;
|
||||
@@ -2759,7 +2780,7 @@ static int (*bpf_probe_read_kernel_str)(void *dst, __u32 size, const void *unsaf
|
||||
/*
|
||||
* bpf_tcp_send_ack
|
||||
*
|
||||
* Send out a tcp-ack. *tp* is the in-kernel struct tcp_sock.
|
||||
* Send out a tcp-ack. *tp* is the in-kernel struct **tcp_sock**.
|
||||
* *rcv_nxt* is the ack_seq to be sent out.
|
||||
*
|
||||
* Returns
|
||||
@@ -2799,8 +2820,8 @@ static __u64 (*bpf_jiffies64)(void) = (void *) 118;
|
||||
* bpf_read_branch_records
|
||||
*
|
||||
* For an eBPF program attached to a perf event, retrieve the
|
||||
* branch records (struct perf_branch_entry) associated to *ctx*
|
||||
* and store it in the buffer pointed by *buf* up to size
|
||||
* branch records (**struct perf_branch_entry**) associated to *ctx*
|
||||
* and store it in the buffer pointed by *buf* up to size
|
||||
* *size* bytes.
|
||||
*
|
||||
* Returns
|
||||
@@ -2808,11 +2829,11 @@ static __u64 (*bpf_jiffies64)(void) = (void *) 118;
|
||||
* negative value.
|
||||
*
|
||||
* The *flags* can be set to **BPF_F_GET_BRANCH_RECORDS_SIZE** to
|
||||
* instead return the number of bytes required to store all the
|
||||
* instead return the number of bytes required to store all the
|
||||
* branch entries. If this flag is set, *buf* may be NULL.
|
||||
*
|
||||
* **-EINVAL** if arguments invalid or **size** not a multiple
|
||||
* of sizeof(struct perf_branch_entry).
|
||||
* of **sizeof**\ (**struct perf_branch_entry**\ ).
|
||||
*
|
||||
* **-ENOENT** if architecture does not support branch records.
|
||||
*/
|
||||
@@ -2824,13 +2845,13 @@ static int (*bpf_read_branch_records)(struct bpf_perf_event_data *ctx, void *buf
|
||||
* Returns 0 on success, values for *pid* and *tgid* as seen from the current
|
||||
* *namespace* will be returned in *nsdata*.
|
||||
*
|
||||
* On failure, the returned value is one of the following:
|
||||
* Returns
|
||||
* 0 on success, or one of the following in case of failure:
|
||||
*
|
||||
* **-EINVAL** if dev and inum supplied don't match dev_t and inode number
|
||||
* with nsfs of current task, or if dev conversion to dev_t lost high bits.
|
||||
*
|
||||
* **-ENOENT** if pidns does not exists for the current task.
|
||||
*
|
||||
*/
|
||||
static int (*bpf_get_ns_current_pid_tgid)(__u64 dev, __u64 ino, struct bpf_pidns_info *nsdata, __u32 size) = (void *) 120;
|
||||
|
||||
@@ -2871,8 +2892,8 @@ static int (*bpf_xdp_output)(void *ctx, void *map, __u64 flags, void *data, __u6
|
||||
* a global identifier that can be assumed unique. If *ctx* is
|
||||
* NULL, then the helper returns the cookie for the initial
|
||||
* network namespace. The cookie itself is very similar to that
|
||||
* of bpf_get_socket_cookie() helper, but for network namespaces
|
||||
* instead of sockets.
|
||||
* of **bpf_get_socket_cookie**\ () helper, but for network
|
||||
* namespaces instead of sockets.
|
||||
*
|
||||
* Returns
|
||||
* A 8-byte long opaque number.
|
||||
@@ -2916,15 +2937,228 @@ static __u64 (*bpf_get_current_ancestor_cgroup_id)(int ancestor_level) = (void *
|
||||
* The *flags* argument must be zero.
|
||||
*
|
||||
* Returns
|
||||
* 0 on success, or a negative errno in case of failure.
|
||||
* 0 on success, or a negative error in case of failure:
|
||||
*
|
||||
* * **-EINVAL** Unsupported flags specified.
|
||||
* * **-ENOENT** Socket is unavailable for assignment.
|
||||
* * **-ENETUNREACH** Socket is unreachable (wrong netns).
|
||||
* * **-EOPNOTSUPP** Unsupported operation, for example a
|
||||
* call from outside of TC ingress.
|
||||
* * **-ESOCKTNOSUPPORT** Socket type not supported (reuseport).
|
||||
* **-EINVAL** if specified *flags* are not supported.
|
||||
*
|
||||
* **-ENOENT** if the socket is unavailable for assignment.
|
||||
*
|
||||
* **-ENETUNREACH** if the socket is unreachable (wrong netns).
|
||||
*
|
||||
* **-EOPNOTSUPP** if the operation is not supported, for example
|
||||
* a call from outside of TC ingress.
|
||||
*
|
||||
* **-ESOCKTNOSUPPORT** if the socket type is not supported
|
||||
* (reuseport).
|
||||
*/
|
||||
static int (*bpf_sk_assign)(struct __sk_buff *skb, struct bpf_sock *sk, __u64 flags) = (void *) 124;
|
||||
|
||||
/*
|
||||
* bpf_ktime_get_boot_ns
|
||||
*
|
||||
* Return the time elapsed since system boot, in nanoseconds.
|
||||
* Does include the time the system was suspended.
|
||||
* See: **clock_gettime**\ (**CLOCK_BOOTTIME**)
|
||||
*
|
||||
* Returns
|
||||
* Current *ktime*.
|
||||
*/
|
||||
static __u64 (*bpf_ktime_get_boot_ns)(void) = (void *) 125;
|
||||
|
||||
/*
|
||||
* bpf_seq_printf
|
||||
*
|
||||
* **bpf_seq_printf**\ () uses seq_file **seq_printf**\ () to print
|
||||
* out the format string.
|
||||
* The *m* represents the seq_file. The *fmt* and *fmt_size* are for
|
||||
* the format string itself. The *data* and *data_len* are format string
|
||||
* arguments. The *data* are a **u64** array and corresponding format string
|
||||
* values are stored in the array. For strings and pointers where pointees
|
||||
* are accessed, only the pointer values are stored in the *data* array.
|
||||
* The *data_len* is the size of *data* in bytes.
|
||||
*
|
||||
* Formats **%s**, **%p{i,I}{4,6}** requires to read kernel memory.
|
||||
* Reading kernel memory may fail due to either invalid address or
|
||||
* valid address but requiring a major memory fault. If reading kernel memory
|
||||
* fails, the string for **%s** will be an empty string, and the ip
|
||||
* address for **%p{i,I}{4,6}** will be 0. Not returning error to
|
||||
* bpf program is consistent with what **bpf_trace_printk**\ () does for now.
|
||||
*
|
||||
* Returns
|
||||
* 0 on success, or a negative error in case of failure:
|
||||
*
|
||||
* **-EBUSY** if per-CPU memory copy buffer is busy, can try again
|
||||
* by returning 1 from bpf program.
|
||||
*
|
||||
* **-EINVAL** if arguments are invalid, or if *fmt* is invalid/unsupported.
|
||||
*
|
||||
* **-E2BIG** if *fmt* contains too many format specifiers.
|
||||
*
|
||||
* **-EOVERFLOW** if an overflow happened: The same object will be tried again.
|
||||
*/
|
||||
static int (*bpf_seq_printf)(struct seq_file *m, const char *fmt, __u32 fmt_size, const void *data, __u32 data_len) = (void *) 126;
|
||||
|
||||
/*
|
||||
* bpf_seq_write
|
||||
*
|
||||
* **bpf_seq_write**\ () uses seq_file **seq_write**\ () to write the data.
|
||||
* The *m* represents the seq_file. The *data* and *len* represent the
|
||||
* data to write in bytes.
|
||||
*
|
||||
* Returns
|
||||
* 0 on success, or a negative error in case of failure:
|
||||
*
|
||||
* **-EOVERFLOW** if an overflow happened: The same object will be tried again.
|
||||
*/
|
||||
static int (*bpf_seq_write)(struct seq_file *m, const void *data, __u32 len) = (void *) 127;
|
||||
|
||||
/*
|
||||
* bpf_sk_cgroup_id
|
||||
*
|
||||
* Return the cgroup v2 id of the socket *sk*.
|
||||
*
|
||||
* *sk* must be a non-**NULL** pointer to a full socket, e.g. one
|
||||
* returned from **bpf_sk_lookup_xxx**\ (),
|
||||
* **bpf_sk_fullsock**\ (), etc. The format of returned id is
|
||||
* same as in **bpf_skb_cgroup_id**\ ().
|
||||
*
|
||||
* This helper is available only if the kernel was compiled with
|
||||
* the **CONFIG_SOCK_CGROUP_DATA** configuration option.
|
||||
*
|
||||
* Returns
|
||||
* The id is returned or 0 in case the id could not be retrieved.
|
||||
*/
|
||||
static __u64 (*bpf_sk_cgroup_id)(struct bpf_sock *sk) = (void *) 128;
|
||||
|
||||
/*
|
||||
* bpf_sk_ancestor_cgroup_id
|
||||
*
|
||||
* Return id of cgroup v2 that is ancestor of cgroup associated
|
||||
* with the *sk* at the *ancestor_level*. The root cgroup is at
|
||||
* *ancestor_level* zero and each step down the hierarchy
|
||||
* increments the level. If *ancestor_level* == level of cgroup
|
||||
* associated with *sk*, then return value will be same as that
|
||||
* of **bpf_sk_cgroup_id**\ ().
|
||||
*
|
||||
* The helper is useful to implement policies based on cgroups
|
||||
* that are upper in hierarchy than immediate cgroup associated
|
||||
* with *sk*.
|
||||
*
|
||||
* The format of returned id and helper limitations are same as in
|
||||
* **bpf_sk_cgroup_id**\ ().
|
||||
*
|
||||
* Returns
|
||||
* The id is returned or 0 in case the id could not be retrieved.
|
||||
*/
|
||||
static __u64 (*bpf_sk_ancestor_cgroup_id)(struct bpf_sock *sk, int ancestor_level) = (void *) 129;
|
||||
|
||||
/*
|
||||
* bpf_ringbuf_output
|
||||
*
|
||||
* Copy *size* bytes from *data* into a ring buffer *ringbuf*.
|
||||
* If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of
|
||||
* new data availability is sent.
|
||||
* IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of
|
||||
* new data availability is sent unconditionally.
|
||||
*
|
||||
* Returns
|
||||
* 0, on success;
|
||||
* < 0, on error.
|
||||
*/
|
||||
static int (*bpf_ringbuf_output)(void *ringbuf, void *data, __u64 size, __u64 flags) = (void *) 130;
|
||||
|
||||
/*
|
||||
* bpf_ringbuf_reserve
|
||||
*
|
||||
* Reserve *size* bytes of payload in a ring buffer *ringbuf*.
|
||||
*
|
||||
* Returns
|
||||
* Valid pointer with *size* bytes of memory available; NULL,
|
||||
* otherwise.
|
||||
*/
|
||||
static void *(*bpf_ringbuf_reserve)(void *ringbuf, __u64 size, __u64 flags) = (void *) 131;
|
||||
|
||||
/*
|
||||
* bpf_ringbuf_submit
|
||||
*
|
||||
* Submit reserved ring buffer sample, pointed to by *data*.
|
||||
* If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of
|
||||
* new data availability is sent.
|
||||
* IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of
|
||||
* new data availability is sent unconditionally.
|
||||
*
|
||||
* Returns
|
||||
* Nothing. Always succeeds.
|
||||
*/
|
||||
static void (*bpf_ringbuf_submit)(void *data, __u64 flags) = (void *) 132;
|
||||
|
||||
/*
|
||||
* bpf_ringbuf_discard
|
||||
*
|
||||
* Discard reserved ring buffer sample, pointed to by *data*.
|
||||
* If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of
|
||||
* new data availability is sent.
|
||||
* IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of
|
||||
* new data availability is sent unconditionally.
|
||||
*
|
||||
* Returns
|
||||
* Nothing. Always succeeds.
|
||||
*/
|
||||
static void (*bpf_ringbuf_discard)(void *data, __u64 flags) = (void *) 133;
|
||||
|
||||
/*
|
||||
* bpf_ringbuf_query
|
||||
*
|
||||
* Query various characteristics of provided ring buffer. What
|
||||
* exactly is queries is determined by *flags*:
|
||||
* - BPF_RB_AVAIL_DATA - amount of data not yet consumed;
|
||||
* - BPF_RB_RING_SIZE - the size of ring buffer;
|
||||
* - BPF_RB_CONS_POS - consumer position (can wrap around);
|
||||
* - BPF_RB_PROD_POS - producer(s) position (can wrap around);
|
||||
* Data returned is just a momentary snapshots of actual values
|
||||
* and could be inaccurate, so this facility should be used to
|
||||
* power heuristics and for reporting, not to make 100% correct
|
||||
* calculation.
|
||||
*
|
||||
* Returns
|
||||
* Requested value, or 0, if flags are not recognized.
|
||||
*/
|
||||
static __u64 (*bpf_ringbuf_query)(void *ringbuf, __u64 flags) = (void *) 134;
|
||||
|
||||
/*
|
||||
* bpf_csum_level
|
||||
*
|
||||
* Change the skbs checksum level by one layer up or down, or
|
||||
* reset it entirely to none in order to have the stack perform
|
||||
* checksum validation. The level is applicable to the following
|
||||
* protocols: TCP, UDP, GRE, SCTP, FCOE. For example, a decap of
|
||||
* | ETH | IP | UDP | GUE | IP | TCP | into | ETH | IP | TCP |
|
||||
* through **bpf_skb_adjust_room**\ () helper with passing in
|
||||
* **BPF_F_ADJ_ROOM_NO_CSUM_RESET** flag would require one call
|
||||
* to **bpf_csum_level**\ () with **BPF_CSUM_LEVEL_DEC** since
|
||||
* the UDP header is removed. Similarly, an encap of the latter
|
||||
* into the former could be accompanied by a helper call to
|
||||
* **bpf_csum_level**\ () with **BPF_CSUM_LEVEL_INC** if the
|
||||
* skb is still intended to be processed in higher layers of the
|
||||
* stack instead of just egressing at tc.
|
||||
*
|
||||
* There are three supported level settings at this time:
|
||||
*
|
||||
* * **BPF_CSUM_LEVEL_INC**: Increases skb->csum_level for skbs
|
||||
* with CHECKSUM_UNNECESSARY.
|
||||
* * **BPF_CSUM_LEVEL_DEC**: Decreases skb->csum_level for skbs
|
||||
* with CHECKSUM_UNNECESSARY.
|
||||
* * **BPF_CSUM_LEVEL_RESET**: Resets skb->csum_level to 0 and
|
||||
* sets CHECKSUM_NONE to force checksum validation by the stack.
|
||||
* * **BPF_CSUM_LEVEL_QUERY**: No-op, returns the current
|
||||
* skb->csum_level.
|
||||
*
|
||||
* Returns
|
||||
* 0 on success, or a negative error in case of failure. In the
|
||||
* case of **BPF_CSUM_LEVEL_QUERY**, the current skb->csum_level
|
||||
* is returned or the error code -EACCES in case the skb is not
|
||||
* subject to CHECKSUM_UNNECESSARY.
|
||||
*/
|
||||
static int (*bpf_csum_level)(struct __sk_buff *skb, __u64 level) = (void *) 135;
|
||||
|
||||
|
||||
|
||||
@@ -2,10 +2,17 @@
|
||||
#ifndef __BPF_HELPERS__
|
||||
#define __BPF_HELPERS__
|
||||
|
||||
/*
|
||||
* Note that bpf programs need to include either
|
||||
* vmlinux.h (auto-generated from BTF) or linux/types.h
|
||||
* in advance since bpf_helper_defs.h uses such types
|
||||
* as __u64.
|
||||
*/
|
||||
#include "bpf_helper_defs.h"
|
||||
|
||||
#define __uint(name, val) int (*name)[val]
|
||||
#define __type(name, val) typeof(val) *name
|
||||
#define __array(name, val) typeof(val) *name[]
|
||||
|
||||
/* Helper macro to print out debug messages */
|
||||
#define bpf_printk(fmt, ...) \
|
||||
@@ -29,6 +36,20 @@
|
||||
#define __weak __attribute__((weak))
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Helper macro to manipulate data structures
|
||||
*/
|
||||
#ifndef offsetof
|
||||
#define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER)
|
||||
#endif
|
||||
#ifndef container_of
|
||||
#define container_of(ptr, type, member) \
|
||||
({ \
|
||||
void *__mptr = (void *)(ptr); \
|
||||
((type *)(__mptr - offsetof(type, member))); \
|
||||
})
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Helper structure used by eBPF C program
|
||||
* to describe BPF map attributes to libbpf loader
|
||||
|
||||
@@ -148,11 +148,11 @@ struct pt_regs;
|
||||
#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[4])
|
||||
#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[5])
|
||||
#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[6])
|
||||
#define PT_REGS_RET_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), grps[14])
|
||||
#define PT_REGS_RET_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[14])
|
||||
#define PT_REGS_FP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[11])
|
||||
#define PT_REGS_RC_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[2])
|
||||
#define PT_REGS_SP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[15])
|
||||
#define PT_REGS_IP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), pdw.addr)
|
||||
#define PT_REGS_IP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), psw.addr)
|
||||
|
||||
#elif defined(bpf_target_arm)
|
||||
|
||||
@@ -413,4 +413,20 @@ typeof(name(0)) name(struct pt_regs *ctx) \
|
||||
} \
|
||||
static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args)
|
||||
|
||||
/*
|
||||
* BPF_SEQ_PRINTF to wrap bpf_seq_printf to-be-printed values
|
||||
* in a structure.
|
||||
*/
|
||||
#define BPF_SEQ_PRINTF(seq, fmt, args...) \
|
||||
({ \
|
||||
_Pragma("GCC diagnostic push") \
|
||||
_Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
|
||||
static const char ___fmt[] = fmt; \
|
||||
unsigned long long ___param[] = { args }; \
|
||||
_Pragma("GCC diagnostic pop") \
|
||||
int ___ret = bpf_seq_printf(seq, ___fmt, sizeof(___fmt), \
|
||||
___param, sizeof(___param)); \
|
||||
___ret; \
|
||||
})
|
||||
|
||||
#endif
|
||||
|
||||
@@ -658,7 +658,7 @@ static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id)
|
||||
if (!btf_dump_is_blacklisted(d, id)) {
|
||||
btf_dump_emit_typedef_def(d, id, t, 0);
|
||||
btf_dump_printf(d, ";\n\n");
|
||||
};
|
||||
}
|
||||
tstate->fwd_emitted = 1;
|
||||
break;
|
||||
default:
|
||||
@@ -1137,6 +1137,20 @@ static void btf_dump_emit_mods(struct btf_dump *d, struct id_stack *decl_stack)
|
||||
}
|
||||
}
|
||||
|
||||
static void btf_dump_drop_mods(struct btf_dump *d, struct id_stack *decl_stack)
|
||||
{
|
||||
const struct btf_type *t;
|
||||
__u32 id;
|
||||
|
||||
while (decl_stack->cnt) {
|
||||
id = decl_stack->ids[decl_stack->cnt - 1];
|
||||
t = btf__type_by_id(d->btf, id);
|
||||
if (!btf_is_mod(t))
|
||||
return;
|
||||
decl_stack->cnt--;
|
||||
}
|
||||
}
|
||||
|
||||
static void btf_dump_emit_name(const struct btf_dump *d,
|
||||
const char *name, bool last_was_ptr)
|
||||
{
|
||||
@@ -1235,14 +1249,7 @@ static void btf_dump_emit_type_chain(struct btf_dump *d,
|
||||
* a const/volatile modifier for array, so we are
|
||||
* going to silently skip them here.
|
||||
*/
|
||||
while (decls->cnt) {
|
||||
next_id = decls->ids[decls->cnt - 1];
|
||||
next_t = btf__type_by_id(d->btf, next_id);
|
||||
if (btf_is_mod(next_t))
|
||||
decls->cnt--;
|
||||
else
|
||||
break;
|
||||
}
|
||||
btf_dump_drop_mods(d, decls);
|
||||
|
||||
if (decls->cnt == 0) {
|
||||
btf_dump_emit_name(d, fname, last_was_ptr);
|
||||
@@ -1270,7 +1277,15 @@ static void btf_dump_emit_type_chain(struct btf_dump *d,
|
||||
__u16 vlen = btf_vlen(t);
|
||||
int i;
|
||||
|
||||
btf_dump_emit_mods(d, decls);
|
||||
/*
|
||||
* GCC emits extra volatile qualifier for
|
||||
* __attribute__((noreturn)) function pointers. Clang
|
||||
* doesn't do it. It's a GCC quirk for backwards
|
||||
* compatibility with code written for GCC <2.5. So,
|
||||
* similarly to extra qualifiers for array, just drop
|
||||
* them, instead of handling them.
|
||||
*/
|
||||
btf_dump_drop_mods(d, decls);
|
||||
if (decls->cnt) {
|
||||
btf_dump_printf(d, " (");
|
||||
btf_dump_emit_type_chain(d, decls, fname, lvl);
|
||||
|
||||
@@ -59,7 +59,14 @@ struct hashmap *hashmap__new(hashmap_hash_fn hash_fn,
|
||||
|
||||
void hashmap__clear(struct hashmap *map)
|
||||
{
|
||||
struct hashmap_entry *cur, *tmp;
|
||||
size_t bkt;
|
||||
|
||||
hashmap__for_each_entry_safe(map, cur, tmp, bkt) {
|
||||
free(cur);
|
||||
}
|
||||
free(map->buckets);
|
||||
map->buckets = NULL;
|
||||
map->cap = map->cap_bits = map->sz = 0;
|
||||
}
|
||||
|
||||
@@ -93,8 +100,7 @@ static int hashmap_grow(struct hashmap *map)
|
||||
struct hashmap_entry **new_buckets;
|
||||
struct hashmap_entry *cur, *tmp;
|
||||
size_t new_cap_bits, new_cap;
|
||||
size_t h;
|
||||
int bkt;
|
||||
size_t h, bkt;
|
||||
|
||||
new_cap_bits = map->cap_bits + 1;
|
||||
if (new_cap_bits < HASHMAP_MIN_CAP_BITS)
|
||||
|
||||
@@ -10,12 +10,10 @@
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
#ifdef __GLIBC__
|
||||
#include <bits/wordsize.h>
|
||||
#else
|
||||
#include <bits/reg.h>
|
||||
#include <limits.h>
|
||||
#ifndef __WORDSIZE
|
||||
#define __WORDSIZE (__SIZEOF_LONG__ * 8)
|
||||
#endif
|
||||
#include "libbpf_internal.h"
|
||||
|
||||
static inline size_t hash_bits(size_t h, int bits)
|
||||
{
|
||||
|
||||
1172
src/libbpf.c
1172
src/libbpf.c
File diff suppressed because it is too large
Load Diff
33
src/libbpf.h
33
src/libbpf.h
@@ -253,11 +253,22 @@ LIBBPF_API struct bpf_link *
|
||||
bpf_program__attach_lsm(struct bpf_program *prog);
|
||||
LIBBPF_API struct bpf_link *
|
||||
bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd);
|
||||
LIBBPF_API struct bpf_link *
|
||||
bpf_program__attach_netns(struct bpf_program *prog, int netns_fd);
|
||||
|
||||
struct bpf_map;
|
||||
|
||||
LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map);
|
||||
|
||||
struct bpf_iter_attach_opts {
|
||||
size_t sz; /* size of this struct for forward/backward compatibility */
|
||||
};
|
||||
#define bpf_iter_attach_opts__last_field sz
|
||||
|
||||
LIBBPF_API struct bpf_link *
|
||||
bpf_program__attach_iter(struct bpf_program *prog,
|
||||
const struct bpf_iter_attach_opts *opts);
|
||||
|
||||
struct bpf_insn;
|
||||
|
||||
/*
|
||||
@@ -469,6 +480,27 @@ LIBBPF_API int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags);
|
||||
LIBBPF_API int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
|
||||
size_t info_size, __u32 flags);
|
||||
|
||||
/* Ring buffer APIs */
|
||||
struct ring_buffer;
|
||||
|
||||
typedef int (*ring_buffer_sample_fn)(void *ctx, void *data, size_t size);
|
||||
|
||||
struct ring_buffer_opts {
|
||||
size_t sz; /* size of this struct, for forward/backward compatiblity */
|
||||
};
|
||||
|
||||
#define ring_buffer_opts__last_field sz
|
||||
|
||||
LIBBPF_API struct ring_buffer *
|
||||
ring_buffer__new(int map_fd, ring_buffer_sample_fn sample_cb, void *ctx,
|
||||
const struct ring_buffer_opts *opts);
|
||||
LIBBPF_API void ring_buffer__free(struct ring_buffer *rb);
|
||||
LIBBPF_API int ring_buffer__add(struct ring_buffer *rb, int map_fd,
|
||||
ring_buffer_sample_fn sample_cb, void *ctx);
|
||||
LIBBPF_API int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms);
|
||||
LIBBPF_API int ring_buffer__consume(struct ring_buffer *rb);
|
||||
|
||||
/* Perf buffer APIs */
|
||||
struct perf_buffer;
|
||||
|
||||
typedef void (*perf_buffer_sample_fn)(void *ctx, int cpu,
|
||||
@@ -524,6 +556,7 @@ perf_buffer__new_raw(int map_fd, size_t page_cnt,
|
||||
|
||||
LIBBPF_API void perf_buffer__free(struct perf_buffer *pb);
|
||||
LIBBPF_API int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms);
|
||||
LIBBPF_API int perf_buffer__consume(struct perf_buffer *pb);
|
||||
|
||||
typedef enum bpf_perf_event_ret
|
||||
(*bpf_perf_event_print_t)(struct perf_event_header *hdr,
|
||||
|
||||
@@ -254,3 +254,19 @@ LIBBPF_0.0.8 {
|
||||
bpf_program__set_lsm;
|
||||
bpf_set_link_xdp_fd_opts;
|
||||
} LIBBPF_0.0.7;
|
||||
|
||||
LIBBPF_0.0.9 {
|
||||
global:
|
||||
bpf_enable_stats;
|
||||
bpf_iter_create;
|
||||
bpf_link_get_fd_by_id;
|
||||
bpf_link_get_next_id;
|
||||
bpf_program__attach_iter;
|
||||
bpf_program__attach_netns;
|
||||
perf_buffer__consume;
|
||||
ring_buffer__add;
|
||||
ring_buffer__consume;
|
||||
ring_buffer__free;
|
||||
ring_buffer__new;
|
||||
ring_buffer__poll;
|
||||
} LIBBPF_0.0.8;
|
||||
|
||||
@@ -153,7 +153,7 @@ struct btf_ext_info_sec {
|
||||
__u32 sec_name_off;
|
||||
__u32 num_info;
|
||||
/* Followed by num_info * record_size number of bytes */
|
||||
__u8 data[0];
|
||||
__u8 data[];
|
||||
};
|
||||
|
||||
/* The minimum bpf_func_info checked by the loader */
|
||||
|
||||
@@ -238,6 +238,11 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex)
|
||||
if (btf_fd < 0)
|
||||
return false;
|
||||
break;
|
||||
case BPF_MAP_TYPE_RINGBUF:
|
||||
key_size = 0;
|
||||
value_size = 0;
|
||||
max_entries = 4096;
|
||||
break;
|
||||
case BPF_MAP_TYPE_UNSPEC:
|
||||
case BPF_MAP_TYPE_HASH:
|
||||
case BPF_MAP_TYPE_ARRAY:
|
||||
|
||||
@@ -321,6 +321,8 @@ int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
|
||||
|
||||
static __u32 get_xdp_id(struct xdp_link_info *info, __u32 flags)
|
||||
{
|
||||
flags &= XDP_FLAGS_MODES;
|
||||
|
||||
if (info->attach_mode != XDP_ATTACHED_MULTI && !flags)
|
||||
return info->prog_id;
|
||||
if (flags & XDP_FLAGS_DRV_MODE)
|
||||
|
||||
288
src/ringbuf.c
Normal file
288
src/ringbuf.c
Normal file
@@ -0,0 +1,288 @@
|
||||
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
|
||||
/*
|
||||
* Ring buffer operations.
|
||||
*
|
||||
* Copyright (C) 2020 Facebook, Inc.
|
||||
*/
|
||||
#ifndef _GNU_SOURCE
|
||||
#define _GNU_SOURCE
|
||||
#endif
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <errno.h>
|
||||
#include <unistd.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/bpf.h>
|
||||
#include <asm/barrier.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/epoll.h>
|
||||
#include <tools/libc_compat.h>
|
||||
|
||||
#include "libbpf.h"
|
||||
#include "libbpf_internal.h"
|
||||
#include "bpf.h"
|
||||
|
||||
/* make sure libbpf doesn't use kernel-only integer typedefs */
|
||||
#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
|
||||
|
||||
struct ring {
|
||||
ring_buffer_sample_fn sample_cb;
|
||||
void *ctx;
|
||||
void *data;
|
||||
unsigned long *consumer_pos;
|
||||
unsigned long *producer_pos;
|
||||
unsigned long mask;
|
||||
int map_fd;
|
||||
};
|
||||
|
||||
struct ring_buffer {
|
||||
struct epoll_event *events;
|
||||
struct ring *rings;
|
||||
size_t page_size;
|
||||
int epoll_fd;
|
||||
int ring_cnt;
|
||||
};
|
||||
|
||||
static void ringbuf_unmap_ring(struct ring_buffer *rb, struct ring *r)
|
||||
{
|
||||
if (r->consumer_pos) {
|
||||
munmap(r->consumer_pos, rb->page_size);
|
||||
r->consumer_pos = NULL;
|
||||
}
|
||||
if (r->producer_pos) {
|
||||
munmap(r->producer_pos, rb->page_size + 2 * (r->mask + 1));
|
||||
r->producer_pos = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* Add extra RINGBUF maps to this ring buffer manager */
|
||||
int ring_buffer__add(struct ring_buffer *rb, int map_fd,
|
||||
ring_buffer_sample_fn sample_cb, void *ctx)
|
||||
{
|
||||
struct bpf_map_info info;
|
||||
__u32 len = sizeof(info);
|
||||
struct epoll_event *e;
|
||||
struct ring *r;
|
||||
void *tmp;
|
||||
int err;
|
||||
|
||||
memset(&info, 0, sizeof(info));
|
||||
|
||||
err = bpf_obj_get_info_by_fd(map_fd, &info, &len);
|
||||
if (err) {
|
||||
err = -errno;
|
||||
pr_warn("ringbuf: failed to get map info for fd=%d: %d\n",
|
||||
map_fd, err);
|
||||
return err;
|
||||
}
|
||||
|
||||
if (info.type != BPF_MAP_TYPE_RINGBUF) {
|
||||
pr_warn("ringbuf: map fd=%d is not BPF_MAP_TYPE_RINGBUF\n",
|
||||
map_fd);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
tmp = reallocarray(rb->rings, rb->ring_cnt + 1, sizeof(*rb->rings));
|
||||
if (!tmp)
|
||||
return -ENOMEM;
|
||||
rb->rings = tmp;
|
||||
|
||||
tmp = reallocarray(rb->events, rb->ring_cnt + 1, sizeof(*rb->events));
|
||||
if (!tmp)
|
||||
return -ENOMEM;
|
||||
rb->events = tmp;
|
||||
|
||||
r = &rb->rings[rb->ring_cnt];
|
||||
memset(r, 0, sizeof(*r));
|
||||
|
||||
r->map_fd = map_fd;
|
||||
r->sample_cb = sample_cb;
|
||||
r->ctx = ctx;
|
||||
r->mask = info.max_entries - 1;
|
||||
|
||||
/* Map writable consumer page */
|
||||
tmp = mmap(NULL, rb->page_size, PROT_READ | PROT_WRITE, MAP_SHARED,
|
||||
map_fd, 0);
|
||||
if (tmp == MAP_FAILED) {
|
||||
err = -errno;
|
||||
pr_warn("ringbuf: failed to mmap consumer page for map fd=%d: %d\n",
|
||||
map_fd, err);
|
||||
return err;
|
||||
}
|
||||
r->consumer_pos = tmp;
|
||||
|
||||
/* Map read-only producer page and data pages. We map twice as big
|
||||
* data size to allow simple reading of samples that wrap around the
|
||||
* end of a ring buffer. See kernel implementation for details.
|
||||
* */
|
||||
tmp = mmap(NULL, rb->page_size + 2 * info.max_entries, PROT_READ,
|
||||
MAP_SHARED, map_fd, rb->page_size);
|
||||
if (tmp == MAP_FAILED) {
|
||||
err = -errno;
|
||||
ringbuf_unmap_ring(rb, r);
|
||||
pr_warn("ringbuf: failed to mmap data pages for map fd=%d: %d\n",
|
||||
map_fd, err);
|
||||
return err;
|
||||
}
|
||||
r->producer_pos = tmp;
|
||||
r->data = tmp + rb->page_size;
|
||||
|
||||
e = &rb->events[rb->ring_cnt];
|
||||
memset(e, 0, sizeof(*e));
|
||||
|
||||
e->events = EPOLLIN;
|
||||
e->data.fd = rb->ring_cnt;
|
||||
if (epoll_ctl(rb->epoll_fd, EPOLL_CTL_ADD, map_fd, e) < 0) {
|
||||
err = -errno;
|
||||
ringbuf_unmap_ring(rb, r);
|
||||
pr_warn("ringbuf: failed to epoll add map fd=%d: %d\n",
|
||||
map_fd, err);
|
||||
return err;
|
||||
}
|
||||
|
||||
rb->ring_cnt++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void ring_buffer__free(struct ring_buffer *rb)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (!rb)
|
||||
return;
|
||||
|
||||
for (i = 0; i < rb->ring_cnt; ++i)
|
||||
ringbuf_unmap_ring(rb, &rb->rings[i]);
|
||||
if (rb->epoll_fd >= 0)
|
||||
close(rb->epoll_fd);
|
||||
|
||||
free(rb->events);
|
||||
free(rb->rings);
|
||||
free(rb);
|
||||
}
|
||||
|
||||
struct ring_buffer *
|
||||
ring_buffer__new(int map_fd, ring_buffer_sample_fn sample_cb, void *ctx,
|
||||
const struct ring_buffer_opts *opts)
|
||||
{
|
||||
struct ring_buffer *rb;
|
||||
int err;
|
||||
|
||||
if (!OPTS_VALID(opts, ring_buffer_opts))
|
||||
return NULL;
|
||||
|
||||
rb = calloc(1, sizeof(*rb));
|
||||
if (!rb)
|
||||
return NULL;
|
||||
|
||||
rb->page_size = getpagesize();
|
||||
|
||||
rb->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
|
||||
if (rb->epoll_fd < 0) {
|
||||
err = -errno;
|
||||
pr_warn("ringbuf: failed to create epoll instance: %d\n", err);
|
||||
goto err_out;
|
||||
}
|
||||
|
||||
err = ring_buffer__add(rb, map_fd, sample_cb, ctx);
|
||||
if (err)
|
||||
goto err_out;
|
||||
|
||||
return rb;
|
||||
|
||||
err_out:
|
||||
ring_buffer__free(rb);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline int roundup_len(__u32 len)
|
||||
{
|
||||
/* clear out top 2 bits (discard and busy, if set) */
|
||||
len <<= 2;
|
||||
len >>= 2;
|
||||
/* add length prefix */
|
||||
len += BPF_RINGBUF_HDR_SZ;
|
||||
/* round up to 8 byte alignment */
|
||||
return (len + 7) / 8 * 8;
|
||||
}
|
||||
|
||||
static int ringbuf_process_ring(struct ring* r)
|
||||
{
|
||||
int *len_ptr, len, err, cnt = 0;
|
||||
unsigned long cons_pos, prod_pos;
|
||||
bool got_new_data;
|
||||
void *sample;
|
||||
|
||||
cons_pos = smp_load_acquire(r->consumer_pos);
|
||||
do {
|
||||
got_new_data = false;
|
||||
prod_pos = smp_load_acquire(r->producer_pos);
|
||||
while (cons_pos < prod_pos) {
|
||||
len_ptr = r->data + (cons_pos & r->mask);
|
||||
len = smp_load_acquire(len_ptr);
|
||||
|
||||
/* sample not committed yet, bail out for now */
|
||||
if (len & BPF_RINGBUF_BUSY_BIT)
|
||||
goto done;
|
||||
|
||||
got_new_data = true;
|
||||
cons_pos += roundup_len(len);
|
||||
|
||||
if ((len & BPF_RINGBUF_DISCARD_BIT) == 0) {
|
||||
sample = (void *)len_ptr + BPF_RINGBUF_HDR_SZ;
|
||||
err = r->sample_cb(r->ctx, sample, len);
|
||||
if (err) {
|
||||
/* update consumer pos and bail out */
|
||||
smp_store_release(r->consumer_pos,
|
||||
cons_pos);
|
||||
return err;
|
||||
}
|
||||
cnt++;
|
||||
}
|
||||
|
||||
smp_store_release(r->consumer_pos, cons_pos);
|
||||
}
|
||||
} while (got_new_data);
|
||||
done:
|
||||
return cnt;
|
||||
}
|
||||
|
||||
/* Consume available ring buffer(s) data without event polling.
|
||||
* Returns number of records consumed across all registered ring buffers, or
|
||||
* negative number if any of the callbacks return error.
|
||||
*/
|
||||
int ring_buffer__consume(struct ring_buffer *rb)
|
||||
{
|
||||
int i, err, res = 0;
|
||||
|
||||
for (i = 0; i < rb->ring_cnt; i++) {
|
||||
struct ring *ring = &rb->rings[i];
|
||||
|
||||
err = ringbuf_process_ring(ring);
|
||||
if (err < 0)
|
||||
return err;
|
||||
res += err;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
/* Poll for available data and consume records, if any are available.
|
||||
* Returns number of records consumed, or negative number, if any of the
|
||||
* registered callbacks returned error.
|
||||
*/
|
||||
int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms)
|
||||
{
|
||||
int i, cnt, err, res = 0;
|
||||
|
||||
cnt = epoll_wait(rb->epoll_fd, rb->events, rb->ring_cnt, timeout_ms);
|
||||
for (i = 0; i < cnt; i++) {
|
||||
__u32 ring_id = rb->events[i].data.fd;
|
||||
struct ring *ring = &rb->rings[ring_id];
|
||||
|
||||
err = ringbuf_process_ring(ring);
|
||||
if (err < 0)
|
||||
return err;
|
||||
res += cnt;
|
||||
}
|
||||
return cnt < 0 ? -errno : res;
|
||||
}
|
||||
@@ -1,11 +1,15 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -euxo pipefail
|
||||
|
||||
LLVM_VER=11
|
||||
LIBBPF_PATH="${REPO_ROOT}"
|
||||
REPO_PATH="travis-ci/vmtest/bpf-next"
|
||||
|
||||
# temporary work-around for failing tests
|
||||
rm "${REPO_ROOT}/${REPO_PATH}/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c"
|
||||
PREPARE_SELFTESTS_SCRIPT=${VMTEST_ROOT}/prepare_selftests-${KERNEL}.sh
|
||||
if [ -f "${PREPARE_SELFTESTS_SCRIPT}" ]; then
|
||||
(cd "${REPO_ROOT}/${REPO_PATH}/tools/testing/selftests/bpf" && ${PREPARE_SELFTESTS_SCRIPT})
|
||||
fi
|
||||
|
||||
make \
|
||||
CLANG=clang-${LLVM_VER} \
|
||||
@@ -21,5 +25,4 @@ cd ${LIBBPF_PATH}
|
||||
rm selftests/bpf/.gitignore
|
||||
git add selftests
|
||||
|
||||
blacklist_path="${VMTEST_ROOT}/configs/blacklist"
|
||||
git add "${blacklist_path}"
|
||||
git add "${VMTEST_ROOT}/configs/blacklist"
|
||||
|
||||
@@ -1,27 +1,53 @@
|
||||
# PERMANENTLY DISABLED
|
||||
align # verifier output format changed
|
||||
bpf_iter # bpf_iter support is missing
|
||||
bpf_obj_id # bpf_link support missing for GET_OBJ_INFO, GET_FD_BY_ID, etc
|
||||
bpf_tcp_ca # STRUCT_OPS is missing
|
||||
|
||||
# latest Clang generates code that fails to verify
|
||||
bpf_verif_scale
|
||||
#bpf_verif_scale/strobemeta.o
|
||||
#bpf_verif_scale/strobemeta_nounroll1.o
|
||||
#bpf_verif_scale/strobemeta_nounroll2.o
|
||||
|
||||
cgroup_attach_multi # BPF_F_REPLACE_PROG missing
|
||||
cgroup_link # LINK_CREATE is missing
|
||||
cgroup_skb_sk_lookup # bpf_sk_lookup_tcp() helper is missing
|
||||
connect_force_port # cgroup/get{peer,sock}name{4,6} support is missing
|
||||
enable_stats # BPF_ENABLE_STATS support is missing
|
||||
fentry_fexit # bpf_prog_test_tracing missing
|
||||
fentry_test # bpf_prog_test_tracing missing
|
||||
fexit_bpf2bpf # freplace is missing
|
||||
fexit_test # bpf_prog_test_tracing missing
|
||||
flow_dissector # bpf_link-based flow dissector is in 5.8+
|
||||
flow_dissector_reattach
|
||||
get_stack_raw_tp # exercising BPF verifier bug causing infinite loop
|
||||
link_pinning # bpf_link is missing
|
||||
load_bytes_relative # new functionality in 5.8
|
||||
mmap # 5.5 kernel is too permissive with re-mmaping
|
||||
modify_return # fmod_ret is missing
|
||||
modify_return # fmod_ret support is missing
|
||||
ns_current_pid_tgid # bpf_get_ns_current_pid_tgid() helper is missing
|
||||
perf_branches # bpf_read_branch_records() helper is missing
|
||||
ringbuf # BPF_MAP_TYPE_RINGBUF is supported in 5.8+
|
||||
|
||||
# bug in verifier w/ tracking references
|
||||
#reference_tracking/classifier/sk_lookup_success
|
||||
reference_tracking
|
||||
|
||||
select_reuseport # UDP support is missing
|
||||
sk_assign # bpf_sk_assign helper missing
|
||||
skb_helpers # helpers added in 5.8+
|
||||
sockmap_listen # no listen socket supportin SOCKMAP
|
||||
skb_ctx # ctx_{size, }_{in, out} in BPF_PROG_TEST_RUN is missing
|
||||
test_global_funcs # kernel doesn't support BTF linkage=global on FUNCs
|
||||
test_lsm # no BPF_LSM support
|
||||
test_overhead # no fmod_ret support
|
||||
vmlinux # hrtimer_nanosleep() signature changed incompatibly
|
||||
xdp_adjust_tail # new XDP functionality added in 5.8
|
||||
xdp_attach # IFLA_XDP_EXPECTED_FD support is missing
|
||||
xdp_bpf2bpf # freplace is missing
|
||||
|
||||
|
||||
# TEMPORARILY DISABLED
|
||||
send_signal # flaky
|
||||
cls_redirect # latest Clang breaks BPF verification
|
||||
|
||||
@@ -2,3 +2,4 @@
|
||||
send_signal # flaky
|
||||
test_lsm # semi-working
|
||||
sk_assign # needs better setup in Travis CI
|
||||
core_reloc # temporary test breakage
|
||||
|
||||
12
travis-ci/vmtest/prepare_selftests-5.5.0.sh
Executable file
12
travis-ci/vmtest/prepare_selftests-5.5.0.sh
Executable file
@@ -0,0 +1,12 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -euxo pipefail
|
||||
|
||||
# these tests expect vmlinux.h to have latest defiition of bpf_devmap_val xdp_md->egress_ifindex
|
||||
rm progs/test_xdp_with_devmap_helpers.c
|
||||
rm progs/test_xdp_devmap_helpers.c
|
||||
rm prog_tests/xdp_devmap_attach.c
|
||||
|
||||
# no BPF_F_NO_PREALLOC in BTF and no sk_msg_md->sk field
|
||||
rm progs/test_skmsg_load_helpers.c
|
||||
rm prog_tests/sockmap_basic.c
|
||||
Reference in New Issue
Block a user