Compare commits

...

108 Commits

Author SHA1 Message Date
thiagoftsm
a16e904d6c Merge branch 'libbpf:master' into master 2022-12-21 16:15:11 +00:00
Andrii Nakryiko
6597330c45 sync: latest libbpf changes from kernel
Syncing latest libbpf commits from kernel repository.
Baseline bpf-next commit:   0e43662e61f2569500ab83b8188c065603530785
Checkpoint bpf-next commit: 7b43df6c6ec38c9097420902a1c8165c4b25bf70
Baseline bpf commit:        f506439ec3dee11e0e77b0a1f3fb3eec22c97873
Checkpoint bpf commit:      54c3f1a81421f85e60ae2eaae7be3727a09916ee

Changbin Du (1):
  libbpf: Show error info about missing ".BTF" section

Christian Ehrig (1):
  bpf: Add flag BPF_F_NO_TUNNEL_KEY to bpf_skb_set_tunnel_key()

Khem Raj (1):
  libbpf: Fix build warning on ref_ctr_off for 32-bit architectures

 include/uapi/linux/bpf.h | 4 ++++
 src/btf.c                | 1 +
 src/libbpf.c             | 2 +-
 3 files changed, 6 insertions(+), 1 deletion(-)

--
2.30.2

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
2022-12-20 22:23:18 -08:00
Andrii Nakryiko
2e287cd201 sync: auto-generate latest BPF helpers
Latest changes to BPF helper definitions.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
2022-12-20 22:23:18 -08:00
Changbin Du
49bd40e869 libbpf: Show error info about missing ".BTF" section
Show the real problem instead of just saying "No such file or directory".

Now will print below info:
libbpf: failed to find '.BTF' ELF section in /home/changbin/work/linux/vmlinux
Error: failed to load BTF from /home/changbin/work/linux/vmlinux: No such file or directory

Signed-off-by: Changbin Du <changbin.du@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20221217223509.88254-2-changbin.du@gmail.com
2022-12-20 22:23:18 -08:00
Khem Raj
f7dba2c313 libbpf: Fix build warning on ref_ctr_off for 32-bit architectures
Clang warns on 32-bit ARM on this comparision:

libbpf.c:10497:18: error: result of comparison of constant 4294967296 with expression of type 'size_t' (aka 'unsigned int') is always false [-Werror,-Wtautological-constant-out-of-range-compare]
        if (ref_ctr_off >= (1ULL << PERF_UPROBE_REF_CTR_OFFSET_BITS))
            ~~~~~~~~~~~ ^  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Typecast ref_ctr_off to __u64 in the check conditional, it is false on
32bit anyways.

Signed-off-by: Khem Raj <raj.khem@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20221219191526.296264-1-raj.khem@gmail.com
2022-12-20 22:23:18 -08:00
Christian Ehrig
41ac436073 bpf: Add flag BPF_F_NO_TUNNEL_KEY to bpf_skb_set_tunnel_key()
This patch allows to remove TUNNEL_KEY from the tunnel flags bitmap
when using bpf_skb_set_tunnel_key by providing a BPF_F_NO_TUNNEL_KEY
flag. On egress, the resulting tunnel header will not contain a tunnel
key if the protocol and implementation supports it.

At the moment bpf_tunnel_key wants a user to specify a numeric tunnel
key. This will wrap the inner packet into a tunnel header with the key
bit and value set accordingly. This is problematic when using a tunnel
protocol that supports optional tunnel keys and a receiving tunnel
device that is not expecting packets with the key bit set. The receiver
won't decapsulate and drop the packet.

RFC 2890 and RFC 2784 GRE tunnels are examples where this flag is
useful. It allows for generating packets, that can be decapsulated by
a GRE tunnel device not operating in collect metadata mode or not
expecting the key bit set.

Signed-off-by: Christian Ehrig <cehrig@cloudflare.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Jakub Sitnicki <jakub@cloudflare.com>
Acked-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/bpf/20221218051734.31411-1-cehrig@cloudflare.com
2022-12-20 22:23:18 -08:00
Andrii Nakryiko
75987cc295 sync: latest libbpf changes from kernel
Syncing latest libbpf commits from kernel repository.
Baseline bpf-next commit:   b148c8b9b926e257a59c8eb2cd6fa3adfd443254
Checkpoint bpf-next commit: 0e43662e61f2569500ab83b8188c065603530785
Baseline bpf commit:        4121d4481b72501aa4d22680be4ea1096d69d133
Checkpoint bpf commit:      f506439ec3dee11e0e77b0a1f3fb3eec22c97873

Andrii Nakryiko (1):
  libbpf: Fix btf_dump's packed struct determination

 src/btf_dump.c | 33 ++++++---------------------------
 1 file changed, 6 insertions(+), 27 deletions(-)

--
2.30.2

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
2022-12-15 14:34:52 -08:00
Andrii Nakryiko
b9f1a06c70 libbpf: Fix btf_dump's packed struct determination
Fix bug in btf_dump's logic of determining if a given struct type is
packed or not. The notion of "natural alignment" is not needed and is
even harmful in this case, so drop it altogether. The biggest difference
in btf_is_struct_packed() compared to its original implementation is
that we don't really use btf__align_of() to determine overall alignment
of a struct type (because it could be 1 for both packed and non-packed
struct, depending on specifci field definitions), and just use field's
actual alignment to calculate whether any field is requiring packing or
struct's size overall necessitates packing.

Add two simple test cases that demonstrate the difference this change
would make.

Fixes: ea2ce1ba99aa ("libbpf: Fix BTF-to-C converter's padding logic")
Reported-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/bpf/20221215183605.4149488-1-andrii@kernel.org
2022-12-15 14:34:52 -08:00
Andrii Nakryiko
30554b08fe sync: latest libbpf changes from kernel
Syncing latest libbpf commits from kernel repository.
Baseline bpf-next commit:   706819495921ddad6b3780140b9d9e9293b6dedc
Checkpoint bpf-next commit: b148c8b9b926e257a59c8eb2cd6fa3adfd443254
Baseline bpf commit:        e931a173a685fe213127ae5aa6b7f2196c1d875d
Checkpoint bpf commit:      4121d4481b72501aa4d22680be4ea1096d69d133

Andrii Nakryiko (4):
  libbpf: Fix single-line struct definition output in btf_dump
  libbpf: Handle non-standardly sized enums better in BTF-to-C dumper
  libbpf: Fix btf__align_of() by taking into account field offsets
  libbpf: Fix BTF-to-C converter's padding logic

Eyal Birger (1):
  tools: add IFLA_XFRM_COLLECT_METADATA to uapi/linux/if_link.h

Kumar Kartikeya Dwivedi (1):
  bpf: Rework process_dynptr_func

Timo Hunziker (1):
  libbpf: Parse usdt args without offset on x86 (e.g. 8@(%rsp))

Xin Liu (1):
  libbpf: Optimized return value in libbpf_strerror when errno is libbpf
    errno

 include/uapi/linux/bpf.h     |   8 +-
 include/uapi/linux/if_link.h |   1 +
 src/btf.c                    |  13 +++
 src/btf_dump.c               | 214 +++++++++++++++++++++++++++--------
 src/libbpf_errno.c           |  16 ++-
 src/usdt.c                   |   8 ++
 6 files changed, 204 insertions(+), 56 deletions(-)

--
2.30.2

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
2022-12-14 22:09:00 -08:00
Andrii Nakryiko
b0ff8e90f7 sync: auto-generate latest BPF helpers
Latest changes to BPF helper definitions.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
2022-12-14 22:09:00 -08:00
Andrii Nakryiko
0b80970cb6 libbpf: Fix BTF-to-C converter's padding logic
Turns out that btf_dump API doesn't handle a bunch of tricky corner
cases, as reported by Per, and further discovered using his testing
Python script ([0]).

This patch revamps btf_dump's padding logic significantly, making it
more correct and also avoiding unnecessary explicit padding, where
compiler would pad naturally. This overall topic turned out to be very
tricky and subtle, there are lots of subtle corner cases. The comments
in the code tries to give some clues, but comments themselves are
supposed to be paired with good understanding of C alignment and padding
rules. Plus some experimentation to figure out subtle things like
whether `long :0;` means that struct is now forced to be long-aligned
(no, it's not, turns out).

Anyways, Per's script, while not completely correct in some known
situations, doesn't show any obvious cases where this logic breaks, so
this is a nice improvement over the previous state of this logic.

Some selftests had to be adjusted to accommodate better use of natural
alignment rules, eliminating some unnecessary padding, or changing it to
`type: 0;` alignment markers.

Note also that for when we are in between bitfields, we emit explicit
bit size, while otherwise we use `: 0`, this feels much more natural in
practice.

Next patch will add few more test cases, found through randomized Per's
script.

  [0] https://lore.kernel.org/bpf/85f83c333f5355c8ac026f835b18d15060725fcb.camel@ericsson.com/

Reported-by: Per Sundström XP <per.xp.sundstrom@ericsson.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20221212211505.558851-6-andrii@kernel.org
2022-12-14 22:09:00 -08:00
Andrii Nakryiko
58b164237a libbpf: Fix btf__align_of() by taking into account field offsets
btf__align_of() is supposed to be return alignment requirement of
a requested BTF type. For STRUCT/UNION it doesn't always return correct
value, because it calculates alignment only based on field types. But
for packed structs this is not enough, we need to also check field
offsets and struct size. If field offset isn't aligned according to
field type's natural alignment, then struct must be packed. Similarly,
if struct size is not a multiple of struct's natural alignment, then
struct must be packed as well.

This patch fixes this issue precisely by additionally checking these
conditions.

Fixes: 3d208f4ca111 ("libbpf: Expose btf__align_of() API")
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20221212211505.558851-5-andrii@kernel.org
2022-12-14 22:09:00 -08:00
Andrii Nakryiko
e6e0e3fd85 libbpf: Handle non-standardly sized enums better in BTF-to-C dumper
Turns out C allows to force enum to be 1-byte or 8-byte explicitly using
mode(byte) or mode(word), respecticely. Linux sources are using this in
some cases. This is imporant to handle correctly, as enum size
determines corresponding fields in a struct that use that enum type. And
if enum size is incorrect, this will lead to invalid struct layout. So
add mode(byte) and mode(word) attribute support to btf_dump APIs.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20221212211505.558851-3-andrii@kernel.org
2022-12-14 22:09:00 -08:00
Andrii Nakryiko
db11704944 libbpf: Fix single-line struct definition output in btf_dump
btf_dump APIs emit unnecessary tabs when emitting struct/union
definition that fits on the single line. Before this patch we'd get:

struct blah {<tab>};

This patch fixes this and makes sure that we get more natural:

struct blah {};

Fixes: 44a726c3f23c ("bpftool: Print newline before '}' for struct with padding only fields")
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20221212211505.558851-2-andrii@kernel.org
2022-12-14 22:09:00 -08:00
Xin Liu
8d719b0c08 libbpf: Optimized return value in libbpf_strerror when errno is libbpf errno
This is a small improvement in libbpf_strerror. When libbpf_strerror
is used to obtain the system error description, if the length of the
buf is insufficient, libbpf_sterror returns ERANGE and sets errno to
ERANGE.

However, this processing is not performed when the error code
customized by libbpf is obtained. Make some minor improvements here,
return -ERANGE and set errno to ERANGE when buf is not enough for
custom description.

Signed-off-by: Xin Liu <liuxin350@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20221210082045.233697-1-liuxin350@huawei.com
2022-12-14 22:09:00 -08:00
Kumar Kartikeya Dwivedi
6b90604fa7 bpf: Rework process_dynptr_func
Recently, user ringbuf support introduced a PTR_TO_DYNPTR register type
for use in callback state, because in case of user ringbuf helpers,
there is no dynptr on the stack that is passed into the callback. To
reflect such a state, a special register type was created.

However, some checks have been bypassed incorrectly during the addition
of this feature. First, for arg_type with MEM_UNINIT flag which
initialize a dynptr, they must be rejected for such register type.
Secondly, in the future, there are plans to add dynptr helpers that
operate on the dynptr itself and may change its offset and other
properties.

In all of these cases, PTR_TO_DYNPTR shouldn't be allowed to be passed
to such helpers, however the current code simply returns 0.

The rejection for helpers that release the dynptr is already handled.

For fixing this, we take a step back and rework existing code in a way
that will allow fitting in all classes of helpers and have a coherent
model for dealing with the variety of use cases in which dynptr is used.

First, for ARG_PTR_TO_DYNPTR, it can either be set alone or together
with a DYNPTR_TYPE_* constant that denotes the only type it accepts.

Next, helpers which initialize a dynptr use MEM_UNINIT to indicate this
fact. To make the distinction clear, use MEM_RDONLY flag to indicate
that the helper only operates on the memory pointed to by the dynptr,
not the dynptr itself. In C parlance, it would be equivalent to taking
the dynptr as a point to const argument.

When either of these flags are not present, the helper is allowed to
mutate both the dynptr itself and also the memory it points to.
Currently, the read only status of the memory is not tracked in the
dynptr, but it would be trivial to add this support inside dynptr state
of the register.

With these changes and renaming PTR_TO_DYNPTR to CONST_PTR_TO_DYNPTR to
better reflect its usage, it can no longer be passed to helpers that
initialize a dynptr, i.e. bpf_dynptr_from_mem, bpf_ringbuf_reserve_dynptr.

A note to reviewers is that in code that does mark_stack_slots_dynptr,
and unmark_stack_slots_dynptr, we implicitly rely on the fact that
PTR_TO_STACK reg is the only case that can reach that code path, as one
cannot pass CONST_PTR_TO_DYNPTR to helpers that don't set MEM_RDONLY. In
both cases such helpers won't be setting that flag.

The next patch will add a couple of selftest cases to make sure this
doesn't break.

Fixes: 205715673844 ("bpf: Add bpf_user_ringbuf_drain() helper")
Acked-by: Joanne Koong <joannelkoong@gmail.com>
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221207204141.308952-4-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-12-14 22:09:00 -08:00
Timo Hunziker
74244c5bd7 libbpf: Parse usdt args without offset on x86 (e.g. 8@(%rsp))
Parse USDT arguments like "8@(%rsp)" on x86. These are emmited by
SystemTap. The argument syntax is similar to the existing "memory
dereference case" but the offset left out as it's zero (i.e. read
the value from the address in the register). We treat it the same
as the the "memory dereference case", but set the offset to 0.

I've tested that this fixes the "unrecognized arg #N spec: 8@(%rsp).."
error I've run into when attaching to a probe with such an argument.
Attaching and reading the correct argument values works.

Something similar might be needed for the other supported
architectures.

  [0] Closes: https://github.com/libbpf/libbpf/issues/559

Signed-off-by: Timo Hunziker <timo.hunziker@gmx.ch>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20221203123746.2160-1-timo.hunziker@eclipso.ch
2022-12-14 22:09:00 -08:00
Eyal Birger
da08611c65 tools: add IFLA_XFRM_COLLECT_METADATA to uapi/linux/if_link.h
Needed for XFRM metadata tests.

Signed-off-by: Eyal Birger <eyal.birger@gmail.com>
Link: https://lore.kernel.org/r/20221203084659.1837829-4-eyal.birger@gmail.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
2022-12-14 22:09:00 -08:00
Andrii Nakryiko
1e479aec4f ci: don't run test_maps in libbpf CI
It crashes often, it doesn't really test libbpf much.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
2022-12-07 09:28:07 -08:00
Andrii Nakryiko
8846dc7a20 ci: fix Ubuntu version for kernel tests and pahole workflows
Having too new build environment in workflows that build selftests on
the host, but run them in a separate QEMU image can lead to problems
with runtime linker complaining about missing new enough version of
glibc and other dependencies.

Until we update images, fix used Ubuntu version to ubuntu-20.04 to
mitigate.

Suggested-by: Manu Bretelle <chantr4@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
2022-12-05 11:52:11 -08:00
Andrii Nakryiko
eb9b5c567d sync: regenerate vmlinux.h
Update checked in vmlinux.h for 5.5 and 4.9 kernels.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
2022-12-02 22:12:29 -08:00
Andrii Nakryiko
be8f15bb93 sync: latest libbpf changes from kernel
Syncing latest libbpf commits from kernel repository.
Baseline bpf-next commit:   5b1d640800de7fe02d68bf592d9d101de24c87f2
Checkpoint bpf-next commit: 706819495921ddad6b3780140b9d9e9293b6dedc
Baseline bpf commit:        47df8a2f78bc34ff170d147d05b121f84e252b85
Checkpoint bpf commit:      e931a173a685fe213127ae5aa6b7f2196c1d875d

Alexei Starovoitov (1):
  selftests/bpf: Workaround for llvm nop-4 bug

Andrii Nakryiko (2):
  libbpf: Ignore hashmap__find() result explicitly in btf_dump
  libbpf: Avoid enum forward-declarations in public API in C++ mode

Donald Hunter (1):
  docs/bpf: Add table of BPF program types to libbpf docs

Hou Tao (4):
  libbpf: Use page size as max_entries when probing ring buffer map
  libbpf: Handle size overflow for ringbuf mmap
  libbpf: Handle size overflow for user ringbuf mmap
  libbpf: Check the validity of size in user_ring_buffer__reserve()

Ji Rongfeng (1):
  bpf: Update bpf_{g,s}etsockopt() documentation

 docs/index.rst           |   3 +
 docs/program_types.rst   | 203 +++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/bpf.h |  23 +++--
 src/bpf.h                |   7 ++
 src/btf_dump.c           |   2 +-
 src/libbpf.c             |   3 +-
 src/libbpf_probes.c      |   2 +-
 src/ringbuf.c            |  26 +++--
 8 files changed, 250 insertions(+), 19 deletions(-)
 create mode 100644 docs/program_types.rst

--
2.30.2

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
2022-12-02 22:12:29 -08:00
Andrii Nakryiko
2bf5ed3a48 sync: auto-generate latest BPF helpers
Latest changes to BPF helper definitions.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
2022-12-02 22:12:29 -08:00
Andrii Nakryiko
0fbf777e0b libbpf: Avoid enum forward-declarations in public API in C++ mode
C++ enum forward declarations are fundamentally not compatible with pure
C enum definitions, and so libbpf's use of `enum bpf_stats_type;`
forward declaration in libbpf/bpf.h public API header is causing C++
compilation issues.

More details can be found in [0], but it comes down to C++ supporting
enum forward declaration only with explicitly specified backing type:

  enum bpf_stats_type: int;

In C (and I believe it's a GCC extension also), such forward declaration
is simply:

  enum bpf_stats_type;

Further, in Linux UAPI this enum is defined in pure C way:

enum bpf_stats_type { BPF_STATS_RUN_TIME = 0; }

And even though in both cases backing type is int, which can be
confirmed by looking at DWARF information, for C++ compiler actual enum
definition and forward declaration are incompatible.

To eliminate this problem, for C++ mode define input argument as int,
which makes enum unnecessary in libbpf public header. This solves the
issue and as demonstrated by next patch doesn't cause any unwanted
compiler warnings, at least with default warnings setting.

  [0] https://stackoverflow.com/questions/42766839/c11-enum-forward-causes-underlying-type-mismatch
  [1] Closes: https://github.com/libbpf/libbpf/issues/249

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20221130200013.2997831-1-andrii@kernel.org
2022-12-02 22:12:29 -08:00
Hou Tao
4d21c979ce libbpf: Check the validity of size in user_ring_buffer__reserve()
The top two bits of size are used as busy and discard flags, so reject
the reservation that has any of these special bits in the size. With the
addition of validity check, these is also no need to check whether or
not total_size is overflowed.

Signed-off-by: Hou Tao <houtao1@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20221116072351.1168938-5-houtao@huaweicloud.com
2022-12-02 22:12:29 -08:00
Hou Tao
11ad834557 libbpf: Handle size overflow for user ringbuf mmap
Similar with the overflow problem on ringbuf mmap, in user_ringbuf_map()
2 * max_entries may overflow u32 when mapping writeable region.

Fixing it by casting the size of writable mmap region into a __u64 and
checking whether or not there will be overflow during mmap.

Fixes: b66ccae01f1d ("bpf: Add libbpf logic for user-space ring buffer")
Signed-off-by: Hou Tao <houtao1@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20221116072351.1168938-4-houtao@huaweicloud.com
2022-12-02 22:12:29 -08:00
Hou Tao
f056d1bd54 libbpf: Handle size overflow for ringbuf mmap
The maximum size of ringbuf is 2GB on x86-64 host, so 2 * max_entries
will overflow u32 when mapping producer page and data pages. Only
casting max_entries to size_t is not enough, because for 32-bits
application on 64-bits kernel the size of read-only mmap region
also could overflow size_t.

So fixing it by casting the size of read-only mmap region into a __u64
and checking whether or not there will be overflow during mmap.

Fixes: bf99c936f947 ("libbpf: Add BPF ring buffer support")
Signed-off-by: Hou Tao <houtao1@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20221116072351.1168938-3-houtao@huaweicloud.com
2022-12-02 22:12:29 -08:00
Hou Tao
b822a139e3 libbpf: Use page size as max_entries when probing ring buffer map
Using page size as max_entries when probing ring buffer map, else the
probe may fail on host with 64KB page size (e.g., an ARM64 host).

After the fix, the output of "bpftool feature" on above host will be
correct.

Before :
    eBPF map_type ringbuf is NOT available
    eBPF map_type user_ringbuf is NOT available

After :
    eBPF map_type ringbuf is available
    eBPF map_type user_ringbuf is available

Signed-off-by: Hou Tao <houtao1@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20221116072351.1168938-2-houtao@huaweicloud.com
2022-12-02 22:12:29 -08:00
Ji Rongfeng
a5b4a53781 bpf: Update bpf_{g,s}etsockopt() documentation
* append missing optnames to the end
* simplify bpf_getsockopt()'s doc

Signed-off-by: Ji Rongfeng <SikoJobs@outlook.com>
Link: https://lore.kernel.org/r/DU0P192MB15479B86200B1216EC90E162D6099@DU0P192MB1547.EURP192.PROD.OUTLOOK.COM
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
2022-12-02 22:12:29 -08:00
Donald Hunter
e84419ff5a docs/bpf: Add table of BPF program types to libbpf docs
Extend the libbpf documentation with a table of program types,
attach points and ELF section names.

Signed-off-by: Donald Hunter <donald.hunter@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Reviewed-by: Bagas Sanjaya <bagasdotme@gmail.com>
Acked-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/bpf/20221121121734.98329-1-donald.hunter@gmail.com
2022-12-02 22:12:29 -08:00
Alexei Starovoitov
ca515c0dda selftests/bpf: Workaround for llvm nop-4 bug
Currently LLVM fails to recognize .data.* as data section and defaults to .text
section. Later BPF backend tries to emit 4-byte NOP instruction which doesn't
exist in BPF ISA and aborts.
The fix for LLVM is pending:
https://reviews.llvm.org/D138477

While waiting for the fix lets workaround the linked_list test case
by using .bss.* prefix which is properly recognized by LLVM as BSS section.

Fix libbpf to support .bss. prefix and adjust tests.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-12-02 22:12:29 -08:00
Andrii Nakryiko
95959419a7 libbpf: Ignore hashmap__find() result explicitly in btf_dump
Coverity is reporting that btf_dump_name_dups() doesn't check return
result of hashmap__find() call. This is intentional, so make it explicit
with (void) cast.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20221117192824.4093553-1-andrii@kernel.org
2022-12-02 22:12:29 -08:00
Andrii Nakryiko
3c659715ec sync: fix sync scripts commit_signature function
After recent lint changes, commit_signature() function now gets optional
array of paths as multiple arguments, instead of entire array as second
argument. So adjust commit_signature() to handle this correctly.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
2022-12-02 21:04:03 -08:00
Andrii Nakryiko
f46b17ef0e sync: add Signed-off-by for auto-generated sync commits
Now that we enforce Signed-off-by on every commit, make sure that
auto-generatd sync commits also get corrected Signed-off-by tags.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
2022-12-02 20:51:21 -08:00
Evgeny Vereshchagin
1596a09b5d oss-fuzz: bump elfutils
to make it less likely for the libbpf fuzz target to run into
elfutils bugs that have been fixed upstream since two new fuzz
targets were added there back in April.

Signed-off-by: Evgeny Vereshchagin <evvers@ya.ru>
2022-11-18 13:54:40 -08:00
Kui-Feng Lee
5322b8e76c sync: latest libbpf changes from kernel
Syncing latest libbpf commits from kernel repository.
Baseline bpf-next commit:   b548b17a93fd18357a5a6f535c10c1e68719ad32
Checkpoint bpf-next commit: 5b1d640800de7fe02d68bf592d9d101de24c87f2
Baseline bpf commit:        9cbd48d5fa14e4c65f8580de16686077f7cea02b
Checkpoint bpf commit:      47df8a2f78bc34ff170d147d05b121f84e252b85

David Michael (1):
  libbpf: Fix uninitialized warning in btf_dump_dump_type_data

Jiri Olsa (1):
  libbpf: Use correct return pointer in attach_raw_tp

Kang Minchul (3):
  libbpf: checkpatch: Fixed code alignments in btf.c
  libbpf: Fixed various checkpatch issues in libbpf.c
  libbpf: checkpatch: Fixed code alignments in ringbuf.c

Kumar Kartikeya Dwivedi (1):
  bpf: Support bpf_list_head in map values

 include/uapi/linux/bpf.h | 10 +++++++++
 src/btf.c                |  5 +++--
 src/btf_dump.c           |  2 +-
 src/libbpf.c             | 47 +++++++++++++++++++++++++---------------
 src/ringbuf.c            |  4 ++--
 5 files changed, 45 insertions(+), 23 deletions(-)

--
2.30.2
2022-11-18 13:53:39 -08:00
Kui-Feng Lee
15bbaabed8 sync: auto-generate latest BPF helpers
Latest changes to BPF helper definitions.
2022-11-18 13:53:39 -08:00
Jiri Olsa
eb77c7210b libbpf: Use correct return pointer in attach_raw_tp
We need to pass '*link' to final libbpf_get_error,
because that one holds the return value, not 'link'.

Fixes: 4fa5bcfe07f7 ("libbpf: Allow BPF program auto-attach handlers to bail out")
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20221114145257.882322-1-jolsa@kernel.org
2022-11-18 13:53:39 -08:00
Kumar Kartikeya Dwivedi
2557efc8e1 bpf: Support bpf_list_head in map values
Add the support on the map side to parse, recognize, verify, and build
metadata table for a new special field of the type struct bpf_list_head.
To parameterize the bpf_list_head for a certain value type and the
list_node member it will accept in that value type, we use BTF
declaration tags.

The definition of bpf_list_head in a map value will be done as follows:

struct foo {
	struct bpf_list_node node;
	int data;
};

struct map_value {
	struct bpf_list_head head __contains(foo, node);
};

Then, the bpf_list_head only allows adding to the list 'head' using the
bpf_list_node 'node' for the type struct foo.

The 'contains' annotation is a BTF declaration tag composed of four
parts, "contains:name:node" where the name is then used to look up the
type in the map BTF, with its kind hardcoded to BTF_KIND_STRUCT during
the lookup. The node defines name of the member in this type that has
the type struct bpf_list_node, which is actually used for linking into
the linked list. For now, 'kind' part is hardcoded as struct.

This allows building intrusive linked lists in BPF, using container_of
to obtain pointer to entry, while being completely type safe from the
perspective of the verifier. The verifier knows exactly the type of the
nodes, and knows that list helpers return that type at some fixed offset
where the bpf_list_node member used for this list exists. The verifier
also uses this information to disallow adding types that are not
accepted by a certain list.

For now, no elements can be added to such lists. Support for that is
coming in future patches, hence draining and freeing items is done with
a TODO that will be resolved in a future patch.

Note that the bpf_list_head_free function moves the list out to a local
variable under the lock and releases it, doing the actual draining of
the list items outside the lock. While this helps with not holding the
lock for too long pessimizing other concurrent list operations, it is
also necessary for deadlock prevention: unless every function called in
the critical section would be notrace, a fentry/fexit program could
attach and call bpf_map_update_elem again on the map, leading to the
same lock being acquired if the key matches and lead to a deadlock.
While this requires some special effort on part of the BPF programmer to
trigger and is highly unlikely to occur in practice, it is always better
if we can avoid such a condition.

While notrace would prevent this, doing the draining outside the lock
has advantages of its own, hence it is used to also fix the deadlock
related problem.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221114191547.1694267-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-18 13:53:39 -08:00
Kang Minchul
9781b9eced libbpf: checkpatch: Fixed code alignments in ringbuf.c
Fixed some checkpatch issues in ringbuf.c

Signed-off-by: Kang Minchul <tegongkang@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/bpf/20221113190648.38556-4-tegongkang@gmail.com
2022-11-18 13:53:39 -08:00
Kang Minchul
4c3b53d09c libbpf: Fixed various checkpatch issues in libbpf.c
Fixed following checkpatch issues:

WARNING: Block comments use a trailing */ on a separate line
+        * other BPF program's BTF object */

WARNING: Possible repeated word: 'be'
+        * name. This is important to be be able to find corresponding BTF

ERROR: switch and case should be at the same indent
+       switch (ext->kcfg.sz) {
+               case 1: *(__u8 *)ext_val = value; break;
+               case 2: *(__u16 *)ext_val = value; break;
+               case 4: *(__u32 *)ext_val = value; break;
+               case 8: *(__u64 *)ext_val = value; break;
+               default:

ERROR: trailing statements should be on next line
+               case 1: *(__u8 *)ext_val = value; break;

ERROR: trailing statements should be on next line
+               case 2: *(__u16 *)ext_val = value; break;

ERROR: trailing statements should be on next line
+               case 4: *(__u32 *)ext_val = value; break;

ERROR: trailing statements should be on next line
+               case 8: *(__u64 *)ext_val = value; break;

ERROR: code indent should use tabs where possible
+                }$

WARNING: please, no spaces at the start of a line
+                }$

WARNING: Block comments use a trailing */ on a separate line
+        * for faster search */

ERROR: code indent should use tabs where possible
+^I^I^I^I^I^I        &ext->kcfg.is_signed);$

WARNING: braces {} are not necessary for single statement blocks
+       if (err) {
+               return err;
+       }

ERROR: code indent should use tabs where possible
+^I^I^I^I        sizeof(*obj->btf_modules), obj->btf_module_cnt + 1);$

Signed-off-by: Kang Minchul <tegongkang@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/bpf/20221113190648.38556-3-tegongkang@gmail.com
2022-11-18 13:53:39 -08:00
Kang Minchul
7b18ff1212 libbpf: checkpatch: Fixed code alignments in btf.c
Fixed some checkpatch issues in btf.c

Signed-off-by: Kang Minchul <tegongkang@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/bpf/20221113190648.38556-2-tegongkang@gmail.com
2022-11-18 13:53:39 -08:00
David Michael
c975797ebe libbpf: Fix uninitialized warning in btf_dump_dump_type_data
GCC 11.3.0 fails to compile btf_dump.c due to the following error,
which seems to originate in btf_dump_struct_data where the returned
value would be uninitialized if btf_vlen returns zero.

btf_dump.c: In function ‘btf_dump_dump_type_data’:
btf_dump.c:2363:12: error: ‘err’ may be used uninitialized in this function [-Werror=maybe-uninitialized]
 2363 |         if (err < 0)
      |            ^

Fixes: 920d16af9b42 ("libbpf: BTF dumper support for typed data")
Signed-off-by: David Michael <fedora.dm0@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Stanislav Fomichev <sdf@google.com>
Acked-by: Alan Maguire <alan.maguire@oracle.com>
Link: https://lore.kernel.org/bpf/87zgcu60hq.fsf@gmail.com
2022-11-18 13:53:39 -08:00
Manu Bretelle
9167308b4a ci: remove s390x-self-hosted-builder from libbpf/libbpf
Those were moved to libbpf/ci: https://github.com/libbpf/ci/tree/master/rootfs/s390x-self-hosted-builder

Signed-off-by: Manu Bretelle <chantr4@gmail.com>
2022-11-16 13:58:37 -08:00
Manu Bretelle
7049d3a2ea ci: Use s390x label to schedule workflows on s390x
The runners are having their labels uniformized across architecture.
z15 is being removed in favor of s390x.

Signed-off-by: Manu Bretelle <chantr4@gmail.com>
2022-11-16 13:55:31 -08:00
Andrii Nakryiko
ea931ec6c5 ci: drop LGTM integration
LGTM is deprecated, remove it. We have CodeQL now.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
2022-11-16 12:17:40 -08:00
Andrii Nakryiko
3a73d6f865 readme: replace LGTM badge with CodeQL badge
LGTM is going to be removed, CodeQL is supposed to be a replacement.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
2022-11-16 12:17:40 -08:00
Andrii Nakryiko
7b0891ac6b ci: build libbpf with more versions of clang and gcc
Add few more versions of clang and gcc used to compile-test libbpf.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
2022-11-16 12:16:17 -08:00
Andrii Nakryiko
c80f12f7f6 ci: fix Debian builds due to pkg-config dependency change
Seems like we need pkgconfig dependency instead of pkg-config.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
2022-11-16 11:25:17 -08:00
Andrii Nakryiko
3b6093fd43 sync: start syncing include/uapi/linux/fcntl.h UAPI header
Libbpf relies on F_DUPFD_CLOEXEC constant coming from fcntl.h UAPI
header, so we need to sync it along other UAPI headers. Also update sync
script to keep doing this automatically going forward.

Reported-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
2022-11-16 10:56:59 -08:00
Andrii Nakryiko
8d358ab948 sync: make LIBBPF_PATHS and LIBBPF_VIEW_PATHS into real array variables
Use correct Bash syntax to define these two variables as arrays.
Drop shellcheck opt-out for unquoted use of array.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
2022-11-14 21:42:37 -08:00
Andrii Nakryiko
971ad8f8d0 sync: fix sync script's use of bash array variables
Don't wrap LIBBPF_PATHS[@] and LIBBPF_VIEW_PATHS[@] in quotes when
passing it to git commands. Not clear how it worked before, but
something recently broke. Either git commands became stricter or
something.

But either way, we do want to pass each element of LIBBPF_PATHS or
LIBBPF_VIEW_PATHS as separate command line arguments, so putting them in
quotes doesn't make sense, as that makes them look like a single
argument to git.

So drop all the quotes around these arrays. The only place where it's
still needed is in commit_signature call, as we do want to pass array as
single arg ($2) and then internally we unfold it into multiple command
line arguments.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
2022-11-12 18:24:12 -08:00
Andrii Nakryiko
2ed27f9e63 ci: update vmlinux.h
Update vmlinux.h to get latest enums for some of selftests.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
2022-11-12 18:24:12 -08:00
Andrii Nakryiko
4bdbb7ea28 sync: latest libbpf changes from kernel
Syncing latest libbpf commits from kernel repository.
Baseline bpf-next commit:   62c69e89e81bfbdb9a87ae3e0599dcc6aacf786b
Checkpoint bpf-next commit: b548b17a93fd18357a5a6f535c10c1e68719ad32
Baseline bpf commit:        e7b09357453a99e6f9e74c39e9ca1363c22c0b96
Checkpoint bpf commit:      9cbd48d5fa14e4c65f8580de16686077f7cea02b

Alan Maguire (1):
  libbpf: Btf dedup identical struct test needs check for nested
    structs/arrays

Andrii Nakryiko (2):
  libbpf: clean up and refactor BTF fixup step
  libbpf: only add BPF_F_MMAPABLE flag for data maps with global vars

Anshuman Khandual (4):
  perf: Add system error and not in transaction branch types
  perf: Extend branch type classification
  perf: Capture branch privilege information
  perf: Add PERF_BR_NEW_ARCH_[N] map for BRBE on arm64 platform

Eduard Zingerman (4):
  libbpf: Resolve enum fwd as full enum64 and vice versa
  libbpf: Hashmap interface update to allow both long and void*
    keys/values
  libbpf: Resolve unambigous forward declarations
  libbpf: Hashmap.h update to fix build issues using LLVM14

Martin KaFai Lau (1):
  bpf: Add hwtstamp field for the sockops prog

Namhyung Kim (1):
  perf: Kill __PERF_SAMPLE_CALLCHAIN_EARLY

Ravi Bangoria (3):
  perf/mem: Introduce PERF_MEM_LVLNUM_{EXTN_MEM|IO}
  perf/uapi: Define PERF_MEM_SNOOPX_PEER in kernel header file
  perf/mem: Rename PERF_MEM_LVLNUM_EXTN_MEM to PERF_MEM_LVLNUM_CXL

Sandipan Das (1):
  perf/core: Add speculation info to branch entries

Xu Kuohai (1):
  libbpf: Avoid allocating reg_name with sscanf in parse_usdt_arg()

Yonghong Song (2):
  bpf: Implement cgroup storage available to non-cgroup-attached bpf
    progs
  libbpf: Support new cgroup local storage

 include/uapi/linux/bpf.h        |  51 +++++-
 include/uapi/linux/perf_event.h |  57 ++++++-
 src/btf.c                       | 267 ++++++++++++++++++++++----------
 src/btf_dump.c                  |  15 +-
 src/hashmap.c                   |  18 +--
 src/hashmap.h                   |  91 +++++++----
 src/libbpf.c                    | 196 ++++++++++++++---------
 src/libbpf_probes.c             |   1 +
 src/strset.c                    |  18 +--
 src/usdt.c                      |  44 +++---
 10 files changed, 511 insertions(+), 247 deletions(-)

--
2.30.2
2022-11-12 18:24:12 -08:00
Andrii Nakryiko
4978cf9cd8 sync: auto-generate latest BPF helpers
Latest changes to BPF helper definitions.
2022-11-12 18:24:12 -08:00
Martin KaFai Lau
00fc9f407c bpf: Add hwtstamp field for the sockops prog
The bpf-tc prog has already been able to access the
skb_hwtstamps(skb)->hwtstamp.  This patch extends the same hwtstamp
access to the sockops prog.

In sockops, the skb is also available to the bpf prog during
the BPF_SOCK_OPS_PARSE_HDR_OPT_CB event.  There is a use case
that the hwtstamp will be useful to the sockops prog to better
measure the one-way-delay when the sender has put the tx
timestamp in the tcp header option.

Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20221107230420.4192307-2-martin.lau@linux.dev
2022-11-12 18:24:12 -08:00
Eduard Zingerman
e1b34c589d libbpf: Hashmap.h update to fix build issues using LLVM14
A fix for the LLVM compilation error while building bpftool.
Replaces the expression:

  _Static_assert((p) == NULL || ...)

by expression:

  _Static_assert((__builtin_constant_p((p)) ? (p) == NULL : 0) || ...)

When "p" is not a constant the former is not considered to be a
constant expression by LLVM 14.

The error was introduced in the following patch-set: [1].
The error was reported here: [2].

  [1] https://lore.kernel.org/bpf/20221109142611.879983-1-eddyz87@gmail.com/
  [2] https://lore.kernel.org/all/202211110355.BcGcbZxP-lkp@intel.com/

Reported-by: kernel test robot <lkp@intel.com>
Fixes: c302378bc157 ("libbpf: Hashmap interface update to allow both long and void* keys/values")
Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/bpf/20221110223240.1350810-1-eddyz87@gmail.com
2022-11-12 18:24:12 -08:00
Eduard Zingerman
7583310911 libbpf: Resolve unambigous forward declarations
Resolve forward declarations that don't take part in type graphs
comparisons if declaration name is unambiguous. Example:

CU #1:

struct foo;              // standalone forward declaration
struct foo *some_global;

CU #2:

struct foo { int x; };
struct foo *another_global;

The `struct foo` from CU #1 is not a part of any definition that is
compared against another definition while `btf_dedup_struct_types`
processes structural types. The the BTF after `btf_dedup_struct_types`
the BTF looks as follows:

[1] STRUCT 'foo' size=4 vlen=1 ...
[2] INT 'int' size=4 ...
[3] PTR '(anon)' type_id=1
[4] FWD 'foo' fwd_kind=struct
[5] PTR '(anon)' type_id=4

This commit adds a new pass `btf_dedup_resolve_fwds`, that maps such
forward declarations to structs or unions with identical name in case
if the name is not ambiguous.

The pass is positioned before `btf_dedup_ref_types` so that types
[3] and [5] could be merged as a same type after [1] and [4] are merged.
The final result for the example above looks as follows:

[1] STRUCT 'foo' size=4 vlen=1
	'x' type_id=2 bits_offset=0
[2] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED
[3] PTR '(anon)' type_id=1

For defconfig kernel with BTF enabled this removes 63 forward
declarations. Examples of removed declarations: `pt_regs`, `in6_addr`.
The running time of `btf__dedup` function is increased by about 3%.

Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Reviewed-by: Alan Maguire <alan.maguire@oracle.com>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20221109142611.879983-3-eddyz87@gmail.com
2022-11-12 18:24:12 -08:00
Eduard Zingerman
4a65c5d888 libbpf: Hashmap interface update to allow both long and void* keys/values
An update for libbpf's hashmap interface from void* -> void* to a
polymorphic one, allowing both long and void* keys and values.

This simplifies many use cases in libbpf as hashmaps there are mostly
integer to integer.

Perf copies hashmap implementation from libbpf and has to be
updated as well.

Changes to libbpf, selftests/bpf and perf are packed as a single
commit to avoid compilation issues with any future bisect.

Polymorphic interface is acheived by hiding hashmap interface
functions behind auxiliary macros that take care of necessary
type casts, for example:

    #define hashmap_cast_ptr(p)						\
	({								\
		_Static_assert((p) == NULL || sizeof(*(p)) == sizeof(long),\
			       #p " pointee should be a long-sized integer or a pointer"); \
		(long *)(p);						\
	})

    bool hashmap_find(const struct hashmap *map, long key, long *value);

    #define hashmap__find(map, key, value) \
		hashmap_find((map), (long)(key), hashmap_cast_ptr(value))

- hashmap__find macro casts key and value parameters to long
  and long* respectively
- hashmap_cast_ptr ensures that value pointer points to a memory
  of appropriate size.

This hack was suggested by Andrii Nakryiko in [1].
This is a follow up for [2].

[1] https://lore.kernel.org/bpf/CAEf4BzZ8KFneEJxFAaNCCFPGqp20hSpS2aCj76uRk3-qZUH5xg@mail.gmail.com/
[2] https://lore.kernel.org/bpf/af1facf9-7bc8-8a3d-0db4-7b3f333589a2@meta.com/T/#m65b28f1d6d969fcd318b556db6a3ad499a42607d

Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20221109142611.879983-2-eddyz87@gmail.com
2022-11-12 18:24:12 -08:00
Eduard Zingerman
3a387f5a8f libbpf: Resolve enum fwd as full enum64 and vice versa
Changes de-duplication logic for enums in the following way:
- update btf_hash_enum to ignore size and kind fields to get
  ENUM and ENUM64 types in a same hash bucket;
- update btf_compat_enum to consider enum fwd to be compatible with
  full enum64 (and vice versa);

This allows BTF de-duplication in the following case:

    // CU #1
    enum foo;

    struct s {
      enum foo *a;
    } *x;

    // CU #2
    enum foo {
      x = 0xfffffffff // big enough to force enum64
    };

    struct s {
      enum foo *a;
    } *y;

De-duplicated BTF prior to this commit:

    [1] ENUM64 'foo' encoding=UNSIGNED size=8 vlen=1
    	'x' val=68719476735ULL
    [2] INT 'long unsigned int' size=8 bits_offset=0 nr_bits=64
        encoding=(none)
    [3] STRUCT 's' size=8 vlen=1
    	'a' type_id=4 bits_offset=0
    [4] PTR '(anon)' type_id=1
    [5] PTR '(anon)' type_id=3
    [6] STRUCT 's' size=8 vlen=1
    	'a' type_id=8 bits_offset=0
    [7] ENUM 'foo' encoding=UNSIGNED size=4 vlen=0
    [8] PTR '(anon)' type_id=7
    [9] PTR '(anon)' type_id=6

De-duplicated BTF after this commit:

    [1] ENUM64 'foo' encoding=UNSIGNED size=8 vlen=1
    	'x' val=68719476735ULL
    [2] INT 'long unsigned int' size=8 bits_offset=0 nr_bits=64
        encoding=(none)
    [3] STRUCT 's' size=8 vlen=1
    	'a' type_id=4 bits_offset=0
    [4] PTR '(anon)' type_id=1
    [5] PTR '(anon)' type_id=3

Enum forward declarations in C do not provide information about
enumeration values range. Thus the `btf_type->size` field is
meaningless for forward enum declarations. In fact, GCC does not
encode size in DWARF for forward enum declarations
(but dwarves sets enumeration size to a default value of `sizeof(int) * 8`
when size is not specified see dwarf_loader.c:die__create_new_enumeration).

Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20221101235413.1824260-1-eddyz87@gmail.com
2022-11-12 18:24:12 -08:00
Ravi Bangoria
a2eba90326 perf/mem: Rename PERF_MEM_LVLNUM_EXTN_MEM to PERF_MEM_LVLNUM_CXL
PERF_MEM_LVLNUM_EXTN_MEM was introduced to cover CXL devices but it's
bit ambiguous name and also not generic enough to cover cxl.cache and
cxl.io devices. Rename it to PERF_MEM_LVLNUM_CXL to be more specific.

Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/f6268268-b4e9-9ed6-0453-65792644d953@amd.com
2022-11-12 18:24:12 -08:00
Yonghong Song
7106ebe768 libbpf: Support new cgroup local storage
Add support for new cgroup local storage.

Acked-by: David Vernet <void@manifault.com>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20221026042856.673989-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-12 18:24:12 -08:00
Yonghong Song
3c6d127e50 bpf: Implement cgroup storage available to non-cgroup-attached bpf progs
Similar to sk/inode/task storage, implement similar cgroup local storage.

There already exists a local storage implementation for cgroup-attached
bpf programs.  See map type BPF_MAP_TYPE_CGROUP_STORAGE and helper
bpf_get_local_storage(). But there are use cases such that non-cgroup
attached bpf progs wants to access cgroup local storage data. For example,
tc egress prog has access to sk and cgroup. It is possible to use
sk local storage to emulate cgroup local storage by storing data in socket.
But this is a waste as it could be lots of sockets belonging to a particular
cgroup. Alternatively, a separate map can be created with cgroup id as the key.
But this will introduce additional overhead to manipulate the new map.
A cgroup local storage, similar to existing sk/inode/task storage,
should help for this use case.

The life-cycle of storage is managed with the life-cycle of the
cgroup struct.  i.e. the storage is destroyed along with the owning cgroup
with a call to bpf_cgrp_storage_free() when cgroup itself
is deleted.

The userspace map operations can be done by using a cgroup fd as a key
passed to the lookup, update and delete operations.

Typically, the following code is used to get the current cgroup:
    struct task_struct *task = bpf_get_current_task_btf();
    ... task->cgroups->dfl_cgrp ...
and in structure task_struct definition:
    struct task_struct {
        ....
        struct css_set __rcu            *cgroups;
        ....
    }
With sleepable program, accessing task->cgroups is not protected by rcu_read_lock.
So the current implementation only supports non-sleepable program and supporting
sleepable program will be the next step together with adding rcu_read_lock
protection for rcu tagged structures.

Since map name BPF_MAP_TYPE_CGROUP_STORAGE has been used for old cgroup local
storage support, the new map name BPF_MAP_TYPE_CGRP_STORAGE is used
for cgroup storage available to non-cgroup-attached bpf programs. The old
cgroup storage supports bpf_get_local_storage() helper to get the cgroup data.
The new cgroup storage helper bpf_cgrp_storage_get() can provide similar
functionality. While old cgroup storage pre-allocates storage memory, the new
mechanism can also pre-allocate with a user space bpf_map_update_elem() call
to avoid potential run-time memory allocation failure.
Therefore, the new cgroup storage can provide all functionality w.r.t.
the old one. So in uapi bpf.h, the old BPF_MAP_TYPE_CGROUP_STORAGE is alias to
BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED to indicate the old cgroup storage can
be deprecated since the new one can provide the same functionality.

Acked-by: David Vernet <void@manifault.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20221026042850.673791-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-12 18:24:12 -08:00
Alan Maguire
6ebbbacb5c libbpf: Btf dedup identical struct test needs check for nested structs/arrays
When examining module BTF, it is common to see core kernel structures
such as sk_buff, net_device duplicated in the module.  After adding
debug messaging to BTF it turned out that much of the problem
was down to the identical struct test failing during deduplication;
sometimes the compiler adds identical structs.  However
it turns out sometimes that type ids of identical struct members
can also differ, even when the containing structs are still identical.

To take an example, for struct sk_buff, debug messaging revealed
that the identical struct matching was failing for the anon
struct "headers"; specifically for the first field:

__u8       __pkt_type_offset[0]; /*   128     0 */

Looking at the code in BTF deduplication, we have code that guards
against the possibility of identical struct definitions, down to
type ids, and identical array definitions.  However in this case
we have a struct which is being defined twice but does not have
identical type ids since each duplicate struct has separate type
ids for the above array member.   A similar problem (though not
observed) could occur for struct-in-struct.

The solution is to make the "identical struct" test check members
not just for matching ids, but to also check if they in turn are
identical structs or arrays.

The results of doing this are quite dramatic (for some modules
at least); I see the number of type ids drop from around 10000
to just over 1000 in one module for example.

For testing use latest pahole or apply [1], otherwise dedups
can fail for the reasons described there.

Also fix return type of btf_dedup_identical_arrays() as
suggested by Andrii to match boolean return type used
elsewhere.

Fixes: efdd3eb8015e ("libbpf: Accommodate DWARF/compiler bug with duplicated structs")
Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/1666622309-22289-1-git-send-email-alan.maguire@oracle.com

[1] https://lore.kernel.org/bpf/1666364523-9648-1-git-send-email-alan.maguire
2022-11-12 18:24:12 -08:00
Xu Kuohai
1bb7a8349a libbpf: Avoid allocating reg_name with sscanf in parse_usdt_arg()
The reg_name in parse_usdt_arg() is used to hold register name, which
is short enough to be held in a 16-byte array, so we could define
reg_name as char reg_name[16] to avoid dynamically allocating reg_name
with sscanf.

Suggested-by: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Signed-off-by: Xu Kuohai <xukuohai@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/bpf/20221018145538.2046842-1-xukuohai@huaweicloud.com
2022-11-12 18:24:12 -08:00
Andrii Nakryiko
3cd45b660c libbpf: only add BPF_F_MMAPABLE flag for data maps with global vars
Teach libbpf to not add BPF_F_MMAPABLE flag unnecessarily for ARRAY maps
that are backing data sections, if such data sections don't expose any
variables to user-space. Exposed variables are those that have
STB_GLOBAL or STB_WEAK ELF binding and correspond to BTF VAR's
BTF_VAR_GLOBAL_ALLOCATED linkage.

The overall idea is that if some data section doesn't have any variable that
is exposed through BPF skeleton, then there is no reason to make such
BPF array mmapable. Making BPF array mmapable is not a free no-op
action, because BPF verifier doesn't allow users to put special objects
(such as BPF spin locks, RB tree nodes, linked list nodes, kptrs, etc;
anything that has a sensitive internal state that should not be modified
arbitrarily from user space) into mmapable arrays, as there is no way to
prevent user space from corrupting such sensitive state through direct
memory access through memory-mapped region.

By making sure that libbpf doesn't add BPF_F_MMAPABLE flag to BPF array
maps corresponding to data sections that only have static variables
(which are not supposed to be visible to user space according to libbpf
and BPF skeleton rules), users now can have spinlocks, kptrs, etc in
either default .bss/.data sections or custom .data.* sections (assuming
there are no global variables in such sections).

The only possible hiccup with this approach is the need to use global
variables during BPF static linking, even if it's not intended to be
shared with user space through BPF skeleton. To allow such scenarios,
extend libbpf's STV_HIDDEN ELF visibility attribute handling to
variables. Libbpf is already treating global hidden BPF subprograms as
static subprograms and adjusts BTF accordingly to make BPF verifier
verify such subprograms as static subprograms with preserving entire BPF
verifier state between subprog calls. This patch teaches libbpf to treat
global hidden variables as static ones and adjust BTF information
accordingly as well. This allows to share variables between multiple
object files during static linking, but still keep them internal to BPF
program and not get them exposed through BPF skeleton.

Note, that if the user has some advanced scenario where they absolutely
need BPF_F_MMAPABLE flag on .data/.bss/.rodata BPF array map despite
only having static variables, they still can achieve this by forcing it
through explicit bpf_map__set_map_flags() API.

Acked-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20221019002816.359650-3-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-12 18:24:12 -08:00
Andrii Nakryiko
0e195e4597 libbpf: clean up and refactor BTF fixup step
Refactor libbpf's BTF fixup step during BPF object open phase. The only
functional change is that we now ignore BTF_VAR_GLOBAL_EXTERN variables
during fix up, not just BTF_VAR_STATIC ones, which shouldn't cause any
change in behavior as there shouldn't be any extern variable in data
sections for valid BPF object anyways.

Otherwise it's just collapsing two functions that have no reason to be
separate, and switching find_elf_var_offset() helper to return entire
symbol pointer, not just its offset. This will be used by next patch to
get ELF symbol visibility.

While refactoring, also "normalize" debug messages inside
btf_fixup_datasec() to follow general libbpf style and print out data
section name consistently, where it's available.

Acked-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20221019002816.359650-2-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-12 18:24:12 -08:00
Ravi Bangoria
08830e9d2f perf/uapi: Define PERF_MEM_SNOOPX_PEER in kernel header file
PERF_MEM_SNOOPX_PEER is defined only in tools uapi header. Although
it's used only by perf tool, not defining it in kernel header can
create problems in future.

Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20220928095805.596-8-ravi.bangoria@amd.com
2022-11-12 18:24:12 -08:00
Ravi Bangoria
1022f26d04 perf/mem: Introduce PERF_MEM_LVLNUM_{EXTN_MEM|IO}
PERF_MEM_LVLNUM_EXTN_MEM which can be used to indicate accesses to
extension memory like CXL etc. PERF_MEM_LVL_IO can be used for IO
accesses but it can not distinguish between local and remote IO.
Introduce new field PERF_MEM_LVLNUM_IO which can be clubbed with
PERF_MEM_REMOTE_REMOTE to indicate Remote IO accesses.

Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20220928095805.596-2-ravi.bangoria@amd.com
2022-11-12 18:24:12 -08:00
Namhyung Kim
b4ca1f6407 perf: Kill __PERF_SAMPLE_CALLCHAIN_EARLY
There's no in-tree user anymore.  Let's get rid of it.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220908214104.3851807-3-namhyung@kernel.org
2022-11-12 18:24:12 -08:00
Anshuman Khandual
fd71ca941b perf: Add PERF_BR_NEW_ARCH_[N] map for BRBE on arm64 platform
BRBE captured branch types will overflow perf_branch_entry.type and generic
branch types in perf_branch_entry.new_type. So override each available arch
specific branch type in the following manner to comprehensively process all
reported branch types in BRBE.

  PERF_BR_ARM64_FIQ            PERF_BR_NEW_ARCH_1
  PERF_BR_ARM64_DEBUG_HALT     PERF_BR_NEW_ARCH_2
  PERF_BR_ARM64_DEBUG_EXIT     PERF_BR_NEW_ARCH_3
  PERF_BR_ARM64_DEBUG_INST     PERF_BR_NEW_ARCH_4
  PERF_BR_ARM64_DEBUG_DATA     PERF_BR_NEW_ARCH_5

Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: James Clark <james.clark@arm.com>
Link: https://lkml.kernel.org/r/20220824044822.70230-5-anshuman.khandual@arm.com
2022-11-12 18:24:12 -08:00
Anshuman Khandual
a14b39bd31 perf: Capture branch privilege information
Platforms like arm64 could capture privilege level information for all the
branch records. Hence this adds a new element in the struct branch_entry to
record the privilege level information, which could be requested through a
new event.attr.branch_sample_type based flag PERF_SAMPLE_BRANCH_PRIV_SAVE.
This flag helps user choose whether privilege information is captured.

Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: James Clark <james.clark@arm.com>
Link: https://lkml.kernel.org/r/20220824044822.70230-4-anshuman.khandual@arm.com
2022-11-12 18:24:12 -08:00
Anshuman Khandual
ade228b8f0 perf: Extend branch type classification
branch_entry.type now has ran out of space to accommodate more branch types
classification. This will prevent perf branch stack implementation on arm64
(via BRBE) to capture all available branch types. Extending this bit field
i.e branch_entry.type [4 bits] is not an option as it will break user space
ABI both for little and big endian perf tools.

Extend branch classification with a new field branch_entry.new_type via a
new branch type PERF_BR_EXTEND_ABI in branch_entry.type. Perf tools which
could decode PERF_BR_EXTEND_ABI, will then parse branch_entry.new_type as
well.

branch_entry.new_type is a 4 bit field which can hold upto 16 branch types.
The first three branch types will hold various generic page faults followed
by five architecture specific branch types, which can be overridden by the
platform for specific use cases. These architecture specific branch types
gets overridden on arm64 platform for BRBE implementation.

New generic branch types

 - PERF_BR_NEW_FAULT_ALGN
 - PERF_BR_NEW_FAULT_DATA
 - PERF_BR_NEW_FAULT_INST

New arch specific branch types

 - PERF_BR_NEW_ARCH_1
 - PERF_BR_NEW_ARCH_2
 - PERF_BR_NEW_ARCH_3
 - PERF_BR_NEW_ARCH_4
 - PERF_BR_NEW_ARCH_5

Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: James Clark <james.clark@arm.com>
Link: https://lkml.kernel.org/r/20220824044822.70230-3-anshuman.khandual@arm.com
2022-11-12 18:24:12 -08:00
Anshuman Khandual
41ab246bdf perf: Add system error and not in transaction branch types
This expands generic branch type classification by adding two more entries
there in i.e system error and not in transaction. This also updates the x86
implementation to process X86_BR_NO_TX records as appropriate. This changes
branch types reported to user space on x86 platform but it should not be a
problem. The possible scenarios and impacts are enumerated here.

 --------------------------------------------------------------------------
 | kernel | perf tool |                     Impact                        |
 --------------------------------------------------------------------------
 |   old  |    old    |  Works as before                                  |
 --------------------------------------------------------------------------
 |   old  |    new    |  PERF_BR_UNKNOWN is processed                     |
 --------------------------------------------------------------------------
 |   new  |    old    |  PERF_BR_NO_TX is blocked via old PERF_BR_MAX     |
 --------------------------------------------------------------------------
 |   new  |    new    |  PERF_BR_NO_TX is recognized                      |
 --------------------------------------------------------------------------

When PERF_BR_NO_TX is blocked via old PERF_BR_MAX (new kernel with old perf
tool) the user space might throw up an warning complaining about an
unrecognized branch types being reported, but it's expected. PERF_BR_SERROR
& PERF_BR_NO_TX branch types will be used for BRBE implementation on arm64
platform.

PERF_BR_NO_TX complements 'abort' and 'in_tx' elements in perf_branch_entry
which represent other transaction states for a given branch record. Because
this completes the transaction state classification.

Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: James Clark <james.clark@arm.com>
Link: https://lkml.kernel.org/r/20220824044822.70230-2-anshuman.khandual@arm.com
2022-11-12 18:24:12 -08:00
Sandipan Das
d918025bc8 perf/core: Add speculation info to branch entries
Add a new "spec" bitfield to branch entries for providing speculation
information. This will be populated using hints provided by branch sampling
features on supported hardware. The following cases are covered:

  * No branch speculation information is available
  * Branch is speculative but taken on the wrong path
  * Branch is non-speculative but taken on the correct path
  * Branch is speculative and taken on the correct path

Suggested-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Sandipan Das <sandipan.das@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/834088c302faf21c7b665031dd111f424e509a64.1660211399.git.sandipan.das@amd.com
2022-11-12 18:24:12 -08:00
Daniel Müller
918d7712c0 ci: Make sure to keep ci/diffs/ directory around
Commit 837664758d ("ci: Allow usage of .patch patches") removed the
ci/diffs/.do_not_use_dot_patch_here marker file. Given that we currently
have no CI patches present and that git does not track (empty)
directories, ci/diffs/ got removed. That's fine functionality-wise, but
it makes for a bit of a discoverability hurdle. Add back a marker file
to keep the directory around.

Signed-off-by: Daniel Müller <deso@posteo.net>
2022-11-08 08:33:47 -08:00
Daniel Müller
4a84a7619f ci: Provide KBUILD_OUTPUT to actions asking for it
As of https://github.com/libbpf/ci/pull/67 a bunch of actions honor
KBUILD_OUTPUT. Doing so will make it possible to separate source code
from build artifacts, which in turn may allow us to support incremental
kernel compilation in CI down the line.
Irrespective of these future changes, actions pertaining the kernel
build now ask for an additional input defining where to store or expect
build artifacts. Provide it.

Signed-off-by: Daniel Müller <deso@posteo.net>
2022-11-07 11:02:01 -08:00
Daniel Müller
837664758d ci: Allow usage of .patch patches
With https://github.com/libbpf/ci/pull/68 merged we can now keep the
.patch extension for patches and don't have to worry about forgetting
the rename to .diff.
Remove the marker file reminding us of that need.

Signed-off-by: Daniel Müller <deso@posteo.net>
2022-11-07 11:00:56 -08:00
Daniel Müller
11bf829873 ci: Remove no longer needed patches
Patch "selftests/bpf: Fix OOB write in test_verifier" has made it to the
bpf branch (after originally landing on bpf-next). Remove it from CI, as
it is no longer necessary.

Signed-off-by: Daniel Müller <deso@posteo.net>
2022-11-07 11:00:56 -08:00
Matteo Croce
c97b16d96c ci: enable shellcheck linter
Run shellckeck linter in a github action,
as in https://github.com/libbpf/ci/pull/61

Signed-off-by: Matteo Croce <teknoraver@meta.com>
2022-10-27 16:46:38 -07:00
Matteo Croce
1c17672353 shellcheck: fix errors
Signed-off-by: Matteo Croce <teknoraver@meta.com>
2022-10-27 16:46:38 -07:00
Tobias Waldekranz
68e6f83f22 Makefile: Fix cross-compilation for 32-bit targets
Determining the correct library installation path (lib vs. lib64)
using uname(1) breaks in cross compilation scenarios where word widths
differ between the host and target system.

Instead, source the information from the compilers '-dumpmachine'
option (supported by both GCC and Clang).

We call this the "host" architecture, using the same nomenclature as
Autotools (--host configure option).

Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
2022-10-18 17:33:04 -07:00
grantseltzer
383ffb79a6 Add documentation badge to README
This adds a documentation badge that links to libbpf.readthedocs.org
When rendered on github it will display the status of the docs build

Signed-off-by: Grant Seltzer <grantseltzer@gmail.com>
2022-10-17 13:55:59 -07:00
David Vernet
50315fd763 README: Fix Arch packaging link
libbpf is now packaged as part of the core repository, not the extra
repository. Fix the current link which gets a 404.

Signed-off-by: David Vernet <void@manifault.com>
2022-10-17 13:17:40 -07:00
Andrii Nakryiko
534a2c6f53 sync: latest libbpf changes from kernel
Syncing latest libbpf commits from kernel repository.
Baseline bpf-next commit:   87dbdc230d162bf9ee1ac77c8ade178b6b1e199e
Checkpoint bpf-next commit: 62c69e89e81bfbdb9a87ae3e0599dcc6aacf786b
Baseline bpf commit:        60240bc26114543fcbfcd8a28466e67e77b20388
Checkpoint bpf commit:      e7b09357453a99e6f9e74c39e9ca1363c22c0b96

Andrii Nakryiko (1):
  bpf: explicitly define BPF_FUNC_xxx integer values

Eduard Zingerman (1):
  bpftool: Print newline before '}' for struct with padding only fields

Kui-Feng Lee (2):
  bpf: Parameterize task iterators.
  bpf: Handle bpf_link_info for the parameterized task BPF iterators.

Roberto Sassu (5):
  libbpf: Fix LIBBPF_1.0.0 declaration in libbpf.map
  libbpf: Introduce bpf_get_fd_by_id_opts and
    bpf_map_get_fd_by_id_opts()
  libbpf: Introduce bpf_prog_get_fd_by_id_opts()
  libbpf: Introduce bpf_btf_get_fd_by_id_opts()
  libbpf: Introduce bpf_link_get_fd_by_id_opts()

Shung-Hsi Yu (3):
  libbpf: Use elf_getshdrnum() instead of e_shnum
  libbpf: Deal with section with no data gracefully
  libbpf: Fix null-pointer dereference in find_prog_by_sec_insn()

Xin Liu (1):
  libbpf: Fix overrun in netlink attribute iteration

Xu Kuohai (2):
  libbpf: Fix use-after-free in btf_dump_name_dups
  libbpf: Fix memory leak in parse_usdt_arg()

 include/uapi/linux/bpf.h | 442 ++++++++++++++++++++-------------------
 src/bpf.c                |  48 ++++-
 src/bpf.h                |  16 ++
 src/btf_dump.c           |  35 +++-
 src/libbpf.c             |  22 +-
 src/libbpf.map           |   6 +-
 src/nlattr.c             |   2 +-
 src/usdt.c               |  11 +-
 8 files changed, 347 insertions(+), 235 deletions(-)

--
2.30.2
2022-10-17 13:13:02 -07:00
Shung-Hsi Yu
3a3ef0c1d0 libbpf: Fix null-pointer dereference in find_prog_by_sec_insn()
When there are no program sections, obj->programs is left unallocated,
and find_prog_by_sec_insn()'s search lands on &obj->programs[0] == NULL,
and will cause null-pointer dereference in the following access to
prog->sec_idx.

Guard the search with obj->nr_programs similar to what's being done in
__bpf_program__iter() to prevent null-pointer access from happening.

Fixes: db2b8b06423c ("libbpf: Support CO-RE relocations for multi-prog sections")
Signed-off-by: Shung-Hsi Yu <shung-hsi.yu@suse.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20221012022353.7350-4-shung-hsi.yu@suse.com
2022-10-17 13:13:02 -07:00
Shung-Hsi Yu
3ee4823fcb libbpf: Deal with section with no data gracefully
ELF section data pointer returned by libelf may be NULL (if section has
SHT_NOBITS), so null check section data pointer before attempting to
copy license and kversion section.

Fixes: cb1e5e961991 ("bpf tools: Collect version and license from ELF sections")
Signed-off-by: Shung-Hsi Yu <shung-hsi.yu@suse.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20221012022353.7350-3-shung-hsi.yu@suse.com
2022-10-17 13:13:02 -07:00
Shung-Hsi Yu
7412775110 libbpf: Use elf_getshdrnum() instead of e_shnum
This commit replace e_shnum with the elf_getshdrnum() helper to fix two
oss-fuzz-reported heap-buffer overflow in __bpf_object__open. Both
reports are incorrectly marked as fixed and while still being
reproducible in the latest libbpf.

  # clusterfuzz-testcase-minimized-bpf-object-fuzzer-5747922482888704
  libbpf: loading object 'fuzz-object' from buffer
  libbpf: sec_cnt is 0
  libbpf: elf: section(1) .data, size 0, link 538976288, flags 2020202020202020, type=2
  libbpf: elf: section(2) .data, size 32, link 538976288, flags 202020202020ff20, type=1
  =================================================================
  ==13==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x6020000000c0 at pc 0x0000005a7b46 bp 0x7ffd12214af0 sp 0x7ffd12214ae8
  WRITE of size 4 at 0x6020000000c0 thread T0
  SCARINESS: 46 (4-byte-write-heap-buffer-overflow-far-from-bounds)
      #0 0x5a7b45 in bpf_object__elf_collect /src/libbpf/src/libbpf.c:3414:24
      #1 0x5733c0 in bpf_object_open /src/libbpf/src/libbpf.c:7223:16
      #2 0x5739fd in bpf_object__open_mem /src/libbpf/src/libbpf.c:7263:20
      ...

The issue lie in libbpf's direct use of e_shnum field in ELF header as
the section header count. Where as libelf implemented an extra logic
that, when e_shnum == 0 && e_shoff != 0, will use sh_size member of the
initial section header as the real section header count (part of ELF
spec to accommodate situation where section header counter is larger
than SHN_LORESERVE).

The above inconsistency lead to libbpf writing into a zero-entry calloc
area. So intead of using e_shnum directly, use the elf_getshdrnum()
helper provided by libelf to retrieve the section header counter into
sec_cnt.

Fixes: 0d6988e16a12 ("libbpf: Fix section counting logic")
Fixes: 25bbbd7a444b ("libbpf: Remove assumptions about uniqueness of .rodata/.data/.bss maps")
Signed-off-by: Shung-Hsi Yu <shung-hsi.yu@suse.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=40868
Link: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=40957
Link: https://lore.kernel.org/bpf/20221012022353.7350-2-shung-hsi.yu@suse.com
2022-10-17 13:13:02 -07:00
Xu Kuohai
881a10980b libbpf: Fix memory leak in parse_usdt_arg()
In the arm64 version of parse_usdt_arg(), when sscanf returns 2, reg_name
is allocated but not freed. Fix it.

Fixes: 0f8619929c57 ("libbpf: Usdt aarch64 arg parsing support")
Signed-off-by: Xu Kuohai <xukuohai@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://lore.kernel.org/bpf/20221011120108.782373-3-xukuohai@huaweicloud.com
2022-10-17 13:13:02 -07:00
Xu Kuohai
54caf920db libbpf: Fix use-after-free in btf_dump_name_dups
ASAN reports an use-after-free in btf_dump_name_dups:

ERROR: AddressSanitizer: heap-use-after-free on address 0xffff927006db at pc 0xaaaab5dfb618 bp 0xffffdd89b890 sp 0xffffdd89b928
READ of size 2 at 0xffff927006db thread T0
    #0 0xaaaab5dfb614 in __interceptor_strcmp.part.0 (test_progs+0x21b614)
    #1 0xaaaab635f144 in str_equal_fn tools/lib/bpf/btf_dump.c:127
    #2 0xaaaab635e3e0 in hashmap_find_entry tools/lib/bpf/hashmap.c:143
    #3 0xaaaab635e72c in hashmap__find tools/lib/bpf/hashmap.c:212
    #4 0xaaaab6362258 in btf_dump_name_dups tools/lib/bpf/btf_dump.c:1525
    #5 0xaaaab636240c in btf_dump_resolve_name tools/lib/bpf/btf_dump.c:1552
    #6 0xaaaab6362598 in btf_dump_type_name tools/lib/bpf/btf_dump.c:1567
    #7 0xaaaab6360b48 in btf_dump_emit_struct_def tools/lib/bpf/btf_dump.c:912
    #8 0xaaaab6360630 in btf_dump_emit_type tools/lib/bpf/btf_dump.c:798
    #9 0xaaaab635f720 in btf_dump__dump_type tools/lib/bpf/btf_dump.c:282
    #10 0xaaaab608523c in test_btf_dump_incremental tools/testing/selftests/bpf/prog_tests/btf_dump.c:236
    #11 0xaaaab6097530 in test_btf_dump tools/testing/selftests/bpf/prog_tests/btf_dump.c:875
    #12 0xaaaab6314ed0 in run_one_test tools/testing/selftests/bpf/test_progs.c:1062
    #13 0xaaaab631a0a8 in main tools/testing/selftests/bpf/test_progs.c:1697
    #14 0xffff9676d214 in __libc_start_main ../csu/libc-start.c:308
    #15 0xaaaab5d65990  (test_progs+0x185990)

0xffff927006db is located 11 bytes inside of 16-byte region [0xffff927006d0,0xffff927006e0)
freed by thread T0 here:
    #0 0xaaaab5e2c7c4 in realloc (test_progs+0x24c7c4)
    #1 0xaaaab634f4a0 in libbpf_reallocarray tools/lib/bpf/libbpf_internal.h:191
    #2 0xaaaab634f840 in libbpf_add_mem tools/lib/bpf/btf.c:163
    #3 0xaaaab636643c in strset_add_str_mem tools/lib/bpf/strset.c:106
    #4 0xaaaab6366560 in strset__add_str tools/lib/bpf/strset.c:157
    #5 0xaaaab6352d70 in btf__add_str tools/lib/bpf/btf.c:1519
    #6 0xaaaab6353e10 in btf__add_field tools/lib/bpf/btf.c:2032
    #7 0xaaaab6084fcc in test_btf_dump_incremental tools/testing/selftests/bpf/prog_tests/btf_dump.c:232
    #8 0xaaaab6097530 in test_btf_dump tools/testing/selftests/bpf/prog_tests/btf_dump.c:875
    #9 0xaaaab6314ed0 in run_one_test tools/testing/selftests/bpf/test_progs.c:1062
    #10 0xaaaab631a0a8 in main tools/testing/selftests/bpf/test_progs.c:1697
    #11 0xffff9676d214 in __libc_start_main ../csu/libc-start.c:308
    #12 0xaaaab5d65990  (test_progs+0x185990)

previously allocated by thread T0 here:
    #0 0xaaaab5e2c7c4 in realloc (test_progs+0x24c7c4)
    #1 0xaaaab634f4a0 in libbpf_reallocarray tools/lib/bpf/libbpf_internal.h:191
    #2 0xaaaab634f840 in libbpf_add_mem tools/lib/bpf/btf.c:163
    #3 0xaaaab636643c in strset_add_str_mem tools/lib/bpf/strset.c:106
    #4 0xaaaab6366560 in strset__add_str tools/lib/bpf/strset.c:157
    #5 0xaaaab6352d70 in btf__add_str tools/lib/bpf/btf.c:1519
    #6 0xaaaab6353ff0 in btf_add_enum_common tools/lib/bpf/btf.c:2070
    #7 0xaaaab6354080 in btf__add_enum tools/lib/bpf/btf.c:2102
    #8 0xaaaab6082f50 in test_btf_dump_incremental tools/testing/selftests/bpf/prog_tests/btf_dump.c:162
    #9 0xaaaab6097530 in test_btf_dump tools/testing/selftests/bpf/prog_tests/btf_dump.c:875
    #10 0xaaaab6314ed0 in run_one_test tools/testing/selftests/bpf/test_progs.c:1062
    #11 0xaaaab631a0a8 in main tools/testing/selftests/bpf/test_progs.c:1697
    #12 0xffff9676d214 in __libc_start_main ../csu/libc-start.c:308
    #13 0xaaaab5d65990  (test_progs+0x185990)

The reason is that the key stored in hash table name_map is a string
address, and the string memory is allocated by realloc() function, when
the memory is resized by realloc() later, the old memory may be freed,
so the address stored in name_map references to a freed memory, causing
use-after-free.

Fix it by storing duplicated string address in name_map.

Fixes: 919d2b1dbb07 ("libbpf: Allow modification of BTF and add btf__add_str API")
Signed-off-by: Xu Kuohai <xukuohai@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://lore.kernel.org/bpf/20221011120108.782373-2-xukuohai@huaweicloud.com
2022-10-17 13:13:02 -07:00
Roberto Sassu
0d6c47523c libbpf: Introduce bpf_link_get_fd_by_id_opts()
Introduce bpf_link_get_fd_by_id_opts(), for symmetry with
bpf_map_get_fd_by_id_opts(), to let the caller pass the newly introduced
data structure bpf_get_fd_by_id_opts. Keep the existing
bpf_link_get_fd_by_id(), and call bpf_link_get_fd_by_id_opts() with NULL as
opts argument, to prevent setting open_flags.

Currently, the kernel does not support non-zero open_flags for
bpf_link_get_fd_by_id_opts(), and a call with them will result in an error
returned by the bpf() system call. The caller should always pass zero
open_flags.

Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20221006110736.84253-6-roberto.sassu@huaweicloud.com
2022-10-17 13:13:02 -07:00
Roberto Sassu
998282f179 libbpf: Introduce bpf_btf_get_fd_by_id_opts()
Introduce bpf_btf_get_fd_by_id_opts(), for symmetry with
bpf_map_get_fd_by_id_opts(), to let the caller pass the newly introduced
data structure bpf_get_fd_by_id_opts. Keep the existing
bpf_btf_get_fd_by_id(), and call bpf_btf_get_fd_by_id_opts() with NULL as
opts argument, to prevent setting open_flags.

Currently, the kernel does not support non-zero open_flags for
bpf_btf_get_fd_by_id_opts(), and a call with them will result in an error
returned by the bpf() system call. The caller should always pass zero
open_flags.

Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20221006110736.84253-5-roberto.sassu@huaweicloud.com
2022-10-17 13:13:02 -07:00
Roberto Sassu
d6d1ec5b25 libbpf: Introduce bpf_prog_get_fd_by_id_opts()
Introduce bpf_prog_get_fd_by_id_opts(), for symmetry with
bpf_map_get_fd_by_id_opts(), to let the caller pass the newly introduced
data structure bpf_get_fd_by_id_opts. Keep the existing
bpf_prog_get_fd_by_id(), and call bpf_prog_get_fd_by_id_opts() with NULL as
opts argument, to prevent setting open_flags.

Currently, the kernel does not support non-zero open_flags for
bpf_prog_get_fd_by_id_opts(), and a call with them will result in an error
returned by the bpf() system call. The caller should always pass zero
open_flags.

Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20221006110736.84253-4-roberto.sassu@huaweicloud.com
2022-10-17 13:13:02 -07:00
Roberto Sassu
a719cae6aa libbpf: Introduce bpf_get_fd_by_id_opts and bpf_map_get_fd_by_id_opts()
Define a new data structure called bpf_get_fd_by_id_opts, with the member
open_flags, to be used by callers of the _opts variants of
bpf_*_get_fd_by_id() to specify the permissions needed for the file
descriptor to be obtained.

Also, introduce bpf_map_get_fd_by_id_opts(), to let the caller pass a
bpf_get_fd_by_id_opts structure.

Finally, keep the existing bpf_map_get_fd_by_id(), and call
bpf_map_get_fd_by_id_opts() with NULL as opts argument, to request
read-write permissions (current behavior).

Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20221006110736.84253-3-roberto.sassu@huaweicloud.com
2022-10-17 13:13:02 -07:00
Roberto Sassu
07024c87de libbpf: Fix LIBBPF_1.0.0 declaration in libbpf.map
Add the missing LIBBPF_0.8.0 at the end of the LIBBPF_1.0.0 declaration,
similarly to other version declarations.

Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20221006110736.84253-2-roberto.sassu@huaweicloud.com
2022-10-17 13:13:02 -07:00
Andrii Nakryiko
19ef40cee6 bpf: explicitly define BPF_FUNC_xxx integer values
Historically enum bpf_func_id's BPF_FUNC_xxx enumerators relied on
implicit sequential values being assigned by compiler. This is
convenient, as new BPF helpers are always added at the very end, but it
also has its downsides, some of them being:

  - with over 200 helpers now it's very hard to know what's each helper's ID,
    which is often important to know when working with BPF assembly (e.g.,
    by dumping raw bpf assembly instructions with llvm-objdump -d
    command). it's possible to work around this by looking into vmlinux.h,
    dumping /sys/btf/kernel/vmlinux, looking at libbpf-provided
    bpf_helper_defs.h, etc. But it always feels like an unnecessary step
    and one should be able to quickly figure this out from UAPI header.

  - when backporting and cherry-picking only some BPF helpers onto older
    kernels it's important to be able to skip some enum values for helpers
    that weren't backported, but preserve absolute integer IDs to keep BPF
    helper IDs stable so that BPF programs stay portable across upstream
    and backported kernels.

While neither problem is insurmountable, they come up frequently enough
and are annoying enough to warrant improving the situation. And for the
backporting the problem can easily go unnoticed for a while, especially
if backport is done with people not very familiar with BPF subsystem overall.

Anyways, it's easy to fix this by making sure that __BPF_FUNC_MAPPER
macro provides explicit helper IDs. Unfortunately that would potentially
break existing users that use UAPI-exposed __BPF_FUNC_MAPPER and are
expected to pass macro that accepts only symbolic helper identifier
(e.g., map_lookup_elem for bpf_map_lookup_elem() helper).

As such, we need to introduce a new macro (___BPF_FUNC_MAPPER) which
would specify both identifier and integer ID, but in such a way as to
allow existing __BPF_FUNC_MAPPER be expressed in terms of new
___BPF_FUNC_MAPPER macro. And that's what this patch is doing. To avoid
duplication and allow __BPF_FUNC_MAPPER stay *exactly* the same,
___BPF_FUNC_MAPPER accepts arbitrary "context" arguments, which can be
used to pass any extra macros, arguments, and whatnot. In our case we
use this to pass original user-provided macro that expects single
argument and __BPF_FUNC_MAPPER is using it's own three-argument
__BPF_FUNC_MAPPER_APPLY intermediate macro to impedance-match new and
old "callback" macros.

Once we resolve this, we use new ___BPF_FUNC_MAPPER to define enum
bpf_func_id with explicit values. The other users of __BPF_FUNC_MAPPER
in kernel (namely in kernel/bpf/disasm.c) are kept exactly the same both
as demonstration that backwards compat works, but also to avoid
unnecessary code churn.

Note that new ___BPF_FUNC_MAPPER() doesn't forcefully insert comma
between values, as that might not be appropriate in all possible cases
where ___BPF_FUNC_MAPPER might be used by users. This doesn't reduce
usability, as it's trivial to insert that comma inside "callback" macro.

To validate all the manually specified IDs are exactly right, we used
BTF to compare before and after values:

  $ bpftool btf dump file ~/linux-build/default/vmlinux | rg bpf_func_id -A 211 > after.txt
  $ git stash # stach UAPI changes
  $ make -j90
  ... re-building kernel without UAPI changes ...
  $ bpftool btf dump file ~/linux-build/default/vmlinux | rg bpf_func_id -A 211 > before.txt
  $ diff -u before.txt after.txt
  --- before.txt  2022-10-05 10:48:18.119195916 -0700
  +++ after.txt   2022-10-05 10:46:49.446615025 -0700
  @@ -1,4 +1,4 @@
  -[14576] ENUM 'bpf_func_id' encoding=UNSIGNED size=4 vlen=211
  +[9560] ENUM 'bpf_func_id' encoding=UNSIGNED size=4 vlen=211
          'BPF_FUNC_unspec' val=0
          'BPF_FUNC_map_lookup_elem' val=1
          'BPF_FUNC_map_update_elem' val=2

As can be seen from diff above, the only thing that changed was resulting BTF
type ID of ENUM bpf_func_id, not any of the enumerators, their names or integer
values.

The only other place that needed fixing was scripts/bpf_doc.py used to generate
man pages and bpf_helper_defs.h header for libbpf and selftests. That script is
tightly-coupled to exact shape of ___BPF_FUNC_MAPPER macro definition, so had
to be trivially adapted.

Cc: Quentin Monnet <quentin@isovalent.com>
Reported-by: Andrea Terzolo <andrea.terzolo@polito.it>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/r/20221006042452.2089843-1-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-10-17 13:13:02 -07:00
Eduard Zingerman
3d3ff49213 bpftool: Print newline before '}' for struct with padding only fields
btf_dump_emit_struct_def attempts to print empty structures at a
single line, e.g. `struct empty {}`. However, it has to account for a
case when there are no regular but some padding fields in the struct.
In such case `vlen` would be zero, but size would be non-zero.

E.g. here is struct bpf_timer from vmlinux.h before this patch:

 struct bpf_timer {
 	long: 64;
	long: 64;};

And after this patch:

 struct bpf_dynptr {
 	long: 64;
	long: 64;
 };

Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20221001104425.415768-1-eddyz87@gmail.com
2022-10-17 13:13:02 -07:00
Xin Liu
3745a20b28 libbpf: Fix overrun in netlink attribute iteration
I accidentally found that a change in commit 1045b03e07d8 ("netlink: fix
overrun in attribute iteration") was not synchronized to the function
`nla_ok` in tools/lib/bpf/nlattr.c, I think it is necessary to modify,
this patch will do it.

Signed-off-by: Xin Liu <liuxin350@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220930090708.62394-1-liuxin350@huawei.com
2022-10-17 13:13:02 -07:00
Kui-Feng Lee
b9e909dd41 bpf: Handle bpf_link_info for the parameterized task BPF iterators.
Add new fields to bpf_link_info that users can query it through
bpf_obj_get_info_by_fd().

Signed-off-by: Kui-Feng Lee <kuifeng@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Acked-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://lore.kernel.org/bpf/20220926184957.208194-3-kuifeng@fb.com
2022-10-17 13:13:02 -07:00
Kui-Feng Lee
73c0c44b67 bpf: Parameterize task iterators.
Allow creating an iterator that loops through resources of one
thread/process.

People could only create iterators to loop through all resources of
files, vma, and tasks in the system, even though they were interested
in only the resources of a specific task or process.  Passing the
additional parameters, people can now create an iterator to go
through all resources or only the resources of a task.

Signed-off-by: Kui-Feng Lee <kuifeng@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Acked-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://lore.kernel.org/bpf/20220926184957.208194-2-kuifeng@fb.com
2022-10-17 13:13:02 -07:00
Daniel Müller
abde7fb314 Remove lru_bug from DENYLIST-latest.s390x
The comment associated with the entry is a bit confusing. It stemmed
from the test being denylisted on bpf, but not bpf-next in the past.
Regardless, by now said change has propagated to both trees, so we no
longer need to carry around this deny list entry here.

Signed-off-by: Daniel Müller <deso@posteo.net>
2022-10-12 09:29:08 -07:00
Manu Bretelle
63389d32f6 ci: remove mkrootfs from libbpf/libbpf
This is being moved to libbpf/ci instead https://github.com/libbpf/ci/pull/44

Signed-off-by: Manu Bretelle <chantr4@gmail.com>
2022-10-11 09:14:31 -07:00
Frantisek Sumsal
59080bd06c ci: use CodeQL instead of LGTM
As LGTM is going to be shut down by EOY[0], let's move the code scanning to
CodeQL as recommended. Thanks to GH integration the results from such
scans will be shown both in the respective PR and in the Security ->
Code Scanning tab[1].

[0] https://github.blog/2022-08-15-the-next-step-for-lgtm-com-github-code-scanning/
[1] https://github.com/libbpf/libbpf/security/code-scanning
2022-10-10 16:31:14 -07:00
Daniel Müller
8b0b41f812 Remove travis-ci symlink
With https://github.com/libbpf/ci/pull/41 merged we no longer require
the travis-ci symlink in this repository. Remove it. Also, it turns out
we still have a few locations referencing travis-ci/ instead of ci/.
Convert those.

Signed-off-by: Daniel Müller <deso@posteo.net>
2022-10-07 11:54:39 -07:00
chantra
6bd5b40bcd ci: install wget package on s390x runners
`wget` is installed by default in GH runners.
It is used in
[`get-linux-source`](79c799d6fb/get-linux-source/checkout_latest_kernel.sh (L32))
to download source faster than through a git fetch.

Signed-off-by: Manu Bretelle <chantr4@gmail.com>
2022-10-06 14:24:50 -07:00
chantra
6cd8907a4a ci: update actions-runner to 2.298.2 on s390x
Signed-off-by: Manu Bretelle <chantr4@gmail.com>
2022-10-06 14:24:50 -07:00
chantra
fa2875be8a ci: install zstd on s390x runners
zstd is installed by [default in GH
runners](https://github.com/actions/runner-images/blob/main/images/linux/Ubuntu2004-Readme.md).

Having it by default, we can start leveraging it when uploading
artifacts. It has a better compression ratio and is multithreaded.

Signed-off-by: Manu Bretelle <chantr4@gmail.com>
2022-10-06 14:24:50 -07:00
chantra
27a93eae7c [s390x][ci] Force replacing workers when a worker already exist with
same name.

This is essentially aligning whith what is done in
0f2883e196/entrypoint.sh (L90-L91)

The issue at hand did manifest on s390x host when restarting a runner
and GH having an existing runner with the same name.
The logic was to default to not replace it and the runner would be
started with somne defaults, which mean the name would change, and the
labels would be lost, making the runner unusable (while still running): https://gist.github.com/chantra/ef0bd3e0c9e35bb82619636acf2f7c98

By replacing the existing runner, we will not get into that state.
2022-10-04 10:52:07 -07:00
53 changed files with 81415 additions and 76588 deletions

View File

@@ -1,3 +1,5 @@
# shellcheck shell=bash
# $1 - start or end
# $2 - fold identifier, no spaces
# $3 - fold section description

File diff suppressed because it is too large Load Diff

View File

@@ -6,7 +6,7 @@ runs:
- id: variables
run: |
export REPO_ROOT=$GITHUB_WORKSPACE
export CI_ROOT=$REPO_ROOT/travis-ci
export CI_ROOT=$REPO_ROOT/ci
# this is somewhat ugly, but that is the easiest way to share this code with
# arch specific docker
echo 'echo ::group::Env setup' > /tmp/ci_setup
@@ -16,7 +16,7 @@ runs:
echo export PROJECT_NAME='libbpf' >> /tmp/ci_setup
echo export AUTHOR_EMAIL="$(git log -1 --pretty=\"%aE\")" >> /tmp/ci_setup
echo export REPO_ROOT=$GITHUB_WORKSPACE >> /tmp/ci_setup
echo export CI_ROOT=$REPO_ROOT/travis-ci >> /tmp/ci_setup
echo export CI_ROOT=$REPO_ROOT/ci >> /tmp/ci_setup
echo export VMTEST_ROOT=$CI_ROOT/vmtest >> /tmp/ci_setup
echo 'echo ::endgroup::' >> /tmp/ci_setup
shell: bash

View File

@@ -36,7 +36,7 @@ runs:
- name: Patch kernel source
uses: libbpf/ci/patch-kernel@master
with:
patches-root: '${{ github.workspace }}/travis-ci/diffs'
patches-root: '${{ github.workspace }}/ci/diffs'
repo-root: '.kernel'
- name: Prepare to build BPF selftests
shell: bash
@@ -77,11 +77,14 @@ runs:
# 4. prepare rootfs
- name: prepare rootfs
uses: libbpf/ci/prepare-rootfs@master
env:
KBUILD_OUTPUT: '.kernel'
with:
project-name: 'libbpf'
arch: ${{ inputs.arch }}
kernel: ${{ inputs.kernel }}
kernel-root: '.kernel'
kbuild-output: ${{ env.KBUILD_OUTPUT }}
image-output: '/tmp/root.img'
# 5. run selftest in QEMU
- name: Run selftests

View File

@@ -23,14 +23,24 @@ jobs:
target: RUN
- name: ASan+UBSan
target: RUN_ASAN
- name: clang
target: RUN_CLANG
- name: clang ASan+UBSan
target: RUN_CLANG_ASAN
- name: gcc-10
target: RUN_GCC10
- name: gcc-10 ASan+UBSan
target: RUN_GCC10_ASAN
- name: clang
target: RUN_CLANG
- name: clang-14
target: RUN_CLANG14
- name: clang-15
target: RUN_CLANG15
- name: clang-16
target: RUN_CLANG16
- name: gcc-10
target: RUN_GCC10
- name: gcc-11
target: RUN_GCC11
- name: gcc-12
target: RUN_GCC12
steps:
- uses: actions/checkout@v3
name: Checkout

52
.github/workflows/codeql.yml vendored Normal file
View File

@@ -0,0 +1,52 @@
---
# vi: ts=2 sw=2 et:
name: "CodeQL"
on:
push:
branches:
- master
pull_request:
branches:
- master
permissions:
contents: read
jobs:
analyze:
name: Analyze
runs-on: ubuntu-22.04
concurrency:
group: ${{ github.workflow }}-${{ matrix.language }}-${{ github.ref }}
cancel-in-progress: true
permissions:
actions: read
security-events: write
strategy:
fail-fast: false
matrix:
language: ['cpp', 'python']
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Initialize CodeQL
uses: github/codeql-action/init@v2
with:
languages: ${{ matrix.language }}
queries: +security-extended,security-and-quality
- name: Setup
uses: ./.github/actions/setup
- name: Build
run: |
source /tmp/ci_setup
make -C ./src
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v2

19
.github/workflows/lint.yml vendored Normal file
View File

@@ -0,0 +1,19 @@
name: "lint"
on:
pull_request:
push:
branches:
- master
jobs:
shellcheck:
name: ShellCheck
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Run ShellCheck
uses: ludeeus/action-shellcheck@master
env:
SHELLCHECK_OPTS: --severity=error

View File

@@ -7,7 +7,7 @@ on:
jobs:
vmtest:
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
name: Kernel LATEST + staging pahole
env:
STAGING: tmp.master

View File

@@ -19,16 +19,16 @@ jobs:
matrix:
include:
- kernel: 'LATEST'
runs_on: ubuntu-latest
runs_on: ubuntu-20.04
arch: 'x86_64'
- kernel: '5.5.0'
runs_on: ubuntu-latest
runs_on: ubuntu-20.04
arch: 'x86_64'
- kernel: '4.9.0'
runs_on: ubuntu-latest
runs_on: ubuntu-20.04
arch: 'x86_64'
- kernel: 'LATEST'
runs_on: z15
runs_on: s390x
arch: 's390x'
steps:
- uses: actions/checkout@v3

View File

@@ -1,14 +0,0 @@
# vi: set ts=2 sw=2:
extraction:
cpp:
prepare:
packages:
- libelf-dev
- pkg-config
after_prepare:
# As the buildsystem detection by LGTM is performed _only_ during the
# 'configure' phase, we need to trick LGTM we use a supported build
# system (configure, meson, cmake, etc.). This way LGTM correctly detects
# that our sources are in the src/ subfolder.
- touch src/configure
- chmod +x src/configure

View File

@@ -1 +1 @@
60240bc26114543fcbfcd8a28466e67e77b20388
54c3f1a81421f85e60ae2eaae7be3727a09916ee

View File

@@ -1 +1 @@
87dbdc230d162bf9ee1ac77c8ade178b6b1e199e
7b43df6c6ec38c9097420902a1c8165c4b25bf70

View File

@@ -5,9 +5,10 @@
libbpf
[![Github Actions Builds & Tests](https://github.com/libbpf/libbpf/actions/workflows/test.yml/badge.svg)](https://github.com/libbpf/libbpf/actions/workflows/test.yml)
[![Total alerts](https://img.shields.io/lgtm/alerts/g/libbpf/libbpf.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/libbpf/libbpf/alerts/)
[![Coverity](https://img.shields.io/coverity/scan/18195.svg)](https://scan.coverity.com/projects/libbpf)
[![CodeQL](https://github.com/libbpf/libbpf/workflows/CodeQL/badge.svg?branch=master)](https://github.com/libbpf/libbpf/actions?query=workflow%3ACodeQL+branch%3Amaster)
[![OSS-Fuzz Status](https://oss-fuzz-build-logs.storage.googleapis.com/badges/libbpf.svg)](https://oss-fuzz-build-logs.storage.googleapis.com/index.html#libbpf)
[![Read the Docs](https://readthedocs.org/projects/libbpf/badge/?version=latest)](https://libbpf.readthedocs.io/en/latest/)
======
**This is the official home of the libbpf library.**
@@ -144,7 +145,7 @@ Distributions packaging libbpf from this mirror:
- [Fedora](https://src.fedoraproject.org/rpms/libbpf)
- [Gentoo](https://packages.gentoo.org/packages/dev-libs/libbpf)
- [Debian](https://packages.debian.org/source/sid/libbpf)
- [Arch](https://www.archlinux.org/packages/extra/x86_64/libbpf/)
- [Arch](https://archlinux.org/packages/core/x86_64/libbpf/)
- [Ubuntu](https://packages.ubuntu.com/source/impish/libbpf)
- [Alpine](https://pkgs.alpinelinux.org/packages?name=libbpf)

View File

@@ -1,35 +0,0 @@
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
To: bpf@vger.kernel.org
Cc: Alexei Starovoitov <ast@kernel.org>,
Daniel Borkmann <daniel@iogearbox.net>,
Andrii Nakryiko <andrii@kernel.org>
Subject: [PATCH bpf-next] selftests/bpf: Fix OOB write in test_verifier
Date: Tue, 14 Dec 2021 07:18:00 +0530 [thread overview]
Message-ID: <20211214014800.78762-1-memxor@gmail.com> (raw)
The commit referenced below added fixup_map_timer support (to create a
BPF map containing timers), but failed to increase the size of the
map_fds array, leading to out of bounds write. Fix this by changing
MAX_NR_MAPS to 22.
Fixes: e60e6962c503 ("selftests/bpf: Add tests for restricted helpers")
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
---
tools/testing/selftests/bpf/test_verifier.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index ad5d30bafd93..33e2ecb3bef9 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -54,7 +54,7 @@
#define MAX_INSNS BPF_MAXINSNS
#define MAX_TEST_INSNS 1000000
#define MAX_FIXUPS 8
-#define MAX_NR_MAPS 21
+#define MAX_NR_MAPS 22
#define MAX_TEST_RUNS 8
#define POINTER_VALUE 0xcafe4all
#define TEST_DATA_LEN 64
--
2.34.1

View File

@@ -6,7 +6,7 @@ CONT_NAME="${CONT_NAME:-libbpf-debian-$DEBIAN_RELEASE}"
ENV_VARS="${ENV_VARS:-}"
DOCKER_RUN="${DOCKER_RUN:-docker run}"
REPO_ROOT="${REPO_ROOT:-$PWD}"
ADDITIONAL_DEPS=(clang pkg-config gcc-10)
ADDITIONAL_DEPS=(pkgconf)
EXTRA_CFLAGS=""
EXTRA_LDFLAGS=""
@@ -43,30 +43,35 @@ for phase in "${PHASES[@]}"; do
docker_exec bash -c "echo deb-src http://deb.debian.org/debian $DEBIAN_RELEASE main >>/etc/apt/sources.list"
docker_exec apt-get -y update
docker_exec apt-get -y install aptitude
docker_exec aptitude -y build-dep libelf-dev
docker_exec aptitude -y install libelf-dev
docker_exec aptitude -y install make libz-dev libelf-dev
docker_exec aptitude -y install "${ADDITIONAL_DEPS[@]}"
echo -e "::endgroup::"
;;
RUN|RUN_CLANG|RUN_GCC10|RUN_ASAN|RUN_CLANG_ASAN|RUN_GCC10_ASAN)
RUN|RUN_CLANG|RUN_CLANG14|RUN_CLANG15|RUN_CLANG16|RUN_GCC10|RUN_GCC11|RUN_GCC12|RUN_ASAN|RUN_CLANG_ASAN|RUN_GCC10_ASAN)
CC="cc"
if [[ "$phase" = *"CLANG"* ]]; then
if [[ "$phase" =~ "RUN_CLANG(\d+)(_ASAN)?" ]]; then
ENV_VARS="-e CC=clang-${BASH_REMATCH[1]} -e CXX=clang++-${BASH_REMATCH[1]}"
CC="clang-${BASH_REMATCH[1]}"
elif [[ "$phase" = *"CLANG"* ]]; then
ENV_VARS="-e CC=clang -e CXX=clang++"
CC="clang"
elif [[ "$phase" = *"GCC10"* ]]; then
ENV_VARS="-e CC=gcc-10 -e CXX=g++-10"
CC="gcc-10"
else
EXTRA_CFLAGS="${EXTRA_CFLAGS} -Wno-stringop-truncation"
elif [[ "$phase" =~ "RUN_GCC(\d+)(_ASAN)?" ]]; then
ENV_VARS="-e CC=gcc-${BASH_REMATCH[1]} -e CXX=g++-${BASH_REMATCH[1]}"
CC="gcc-${BASH_REMATCH[1]}"
fi
if [[ "$phase" = *"ASAN"* ]]; then
EXTRA_CFLAGS="${EXTRA_CFLAGS} -fsanitize=address,undefined"
EXTRA_LDFLAGS="${EXTRA_LDFLAGS} -fsanitize=address,undefined"
fi
if [[ "$CC" != "cc" ]]; then
docker_exec aptitude -y install "$CC"
else
docker_exec aptitude -y install gcc
fi
docker_exec mkdir build install
docker_exec ${CC} --version
info "build"
docker_exec make -j$((4*$(nproc))) EXTRA_CFLAGS="${EXTRA_CFLAGS}" EXTRA_LDFLAGS="${EXTRA_LDFLAGS}" -C ./src -B OBJDIR=../build
docker_exec make -j$((4*$(nproc))) EXTRA_CFLAGS="${EXTRA_CFLAGS}" EXTRA_LDFLAGS="${EXTRA_LDFLAGS}" -C ./src -B OBJDIR=../build
info "ldd build/libbpf.so:"
docker_exec ldd build/libbpf.so
if ! docker_exec ldd build/libbpf.so | grep -q libelf; then

View File

@@ -1,107 +0,0 @@
#!/bin/bash
# This script is based on drgn script for generating Arch Linux bootstrap
# images.
# https://github.com/osandov/drgn/blob/master/scripts/vmtest/mkrootfs.sh
set -euo pipefail
usage () {
USAGE_STRING="usage: $0 [NAME]
$0 -h
Build an Arch Linux root filesystem image for testing libbpf in a virtual
machine.
The image is generated as a zstd-compressed tarball.
This must be run as root, as most of the installation is done in a chroot.
Arguments:
NAME name of generated image file (default:
libbpf-vmtest-rootfs-\$DATE.tar.zst)
Options:
-h display this help message and exit"
case "$1" in
out)
echo "$USAGE_STRING"
exit 0
;;
err)
echo "$USAGE_STRING" >&2
exit 1
;;
esac
}
while getopts "h" OPT; do
case "$OPT" in
h)
usage out
;;
*)
usage err
;;
esac
done
if [[ $OPTIND -eq $# ]]; then
NAME="${!OPTIND}"
elif [[ $OPTIND -gt $# ]]; then
NAME="libbpf-vmtest-rootfs-$(date +%Y.%m.%d).tar.zst"
else
usage err
fi
pacman_conf=
root=
trap 'rm -rf "$pacman_conf" "$root"' EXIT
pacman_conf="$(mktemp -p "$PWD")"
cat > "$pacman_conf" << "EOF"
[options]
Architecture = x86_64
CheckSpace
SigLevel = Required DatabaseOptional
[core]
Include = /etc/pacman.d/mirrorlist
[extra]
Include = /etc/pacman.d/mirrorlist
[community]
Include = /etc/pacman.d/mirrorlist
EOF
root="$(mktemp -d -p "$PWD")"
packages=(
busybox
# libbpf dependencies.
libelf
zlib
# selftests test_progs dependencies.
binutils
elfutils
ethtool
glibc
iproute2
# selftests test_verifier dependencies.
libcap
)
pacstrap -C "$pacman_conf" -cGM "$root" "${packages[@]}"
# Remove unnecessary files from the chroot.
# We don't need the pacman databases anymore.
rm -rf "$root/var/lib/pacman/sync/"
# We don't need D, Fortran, or Go.
rm -f "$root/usr/lib/libgdruntime."* \
"$root/usr/lib/libgphobos."* \
"$root/usr/lib/libgfortran."* \
"$root/usr/lib/libgo."*
# We don't need any documentation.
rm -rf "$root/usr/share/{doc,help,man,texinfo}"
"$(dirname "$0")"/mkrootfs_tweak.sh "$root"
tar -C "$root" -c . | zstd -T0 -19 -o "$NAME"
chmod 644 "$NAME"

View File

@@ -1,52 +0,0 @@
#!/bin/bash
# This script builds a Debian root filesystem image for testing libbpf in a
# virtual machine. Requires debootstrap >= 1.0.95 and zstd.
# Use e.g. ./mkrootfs_debian.sh --arch=s390x to generate a rootfs for a
# foreign architecture. Requires configured binfmt_misc, e.g. using
# Debian/Ubuntu's qemu-user-binfmt package or
# https://github.com/multiarch/qemu-user-static.
set -e -u -x -o pipefail
# Check whether we are root now in order to avoid confusing errors later.
if [ "$(id -u)" != 0 ]; then
echo "$0 must run as root" >&2
exit 1
fi
# Create a working directory and schedule its deletion.
root=$(mktemp -d -p "$PWD")
trap 'rm -r "$root"' EXIT
# Install packages.
packages=(
binutils
busybox
elfutils
ethtool
iproute2
iptables
libcap2
libelf1
strace
zlib1g
)
packages=$(IFS=, && echo "${packages[*]}")
debootstrap --include="$packages" --variant=minbase "$@" bookworm "$root"
# Remove the init scripts (tests use their own). Also remove various
# unnecessary files in order to save space.
rm -rf \
"$root"/etc/rcS.d \
"$root"/usr/share/{doc,info,locale,man,zoneinfo} \
"$root"/var/cache/apt/archives/* \
"$root"/var/lib/apt/lists/*
# Apply common tweaks.
"$(dirname "$0")"/mkrootfs_tweak.sh "$root"
# Save the result.
name="libbpf-vmtest-rootfs-$(date +%Y.%m.%d).tar.zst"
rm -f "$name"
tar -C "$root" -c . | zstd -T0 -19 -o "$name"

View File

@@ -1,61 +0,0 @@
#!/bin/bash
# This script prepares a mounted root filesystem for testing libbpf in a virtual
# machine.
set -e -u -x -o pipefail
root=$1
shift
chroot "${root}" /bin/busybox --install
cat > "$root/etc/inittab" << "EOF"
::sysinit:/etc/init.d/rcS
::ctrlaltdel:/sbin/reboot
::shutdown:/sbin/swapoff -a
::shutdown:/bin/umount -a -r
::restart:/sbin/init
EOF
chmod 644 "$root/etc/inittab"
mkdir -m 755 -p "$root/etc/init.d" "$root/etc/rcS.d"
cat > "$root/etc/rcS.d/S10-mount" << "EOF"
#!/bin/sh
set -eux
/bin/mount proc /proc -t proc
# Mount devtmpfs if not mounted
if [[ -z $(/bin/mount -t devtmpfs) ]]; then
/bin/mount devtmpfs /dev -t devtmpfs
fi
/bin/mount sysfs /sys -t sysfs
/bin/mount bpffs /sys/fs/bpf -t bpf
/bin/mount debugfs /sys/kernel/debug -t debugfs
echo 'Listing currently mounted file systems'
/bin/mount
EOF
chmod 755 "$root/etc/rcS.d/S10-mount"
cat > "$root/etc/rcS.d/S40-network" << "EOF"
#!/bin/sh
set -eux
ip link set lo up
EOF
chmod 755 "$root/etc/rcS.d/S40-network"
cat > "$root/etc/init.d/rcS" << "EOF"
#!/bin/sh
set -eux
for path in /etc/rcS.d/S*; do
[ -x "$path" ] && "$path"
done
EOF
chmod 755 "$root/etc/init.d/rcS"
chmod 755 "$root"

View File

@@ -1,108 +0,0 @@
# IBM Z self-hosted builder
libbpf CI uses an IBM-provided z15 self-hosted builder. There are no IBM Z
builds of GitHub (GH) Actions runner, and stable qemu-user has problems with .NET
apps, so the builder runs the x86_64 runner version with qemu-user built from
the master branch.
We are currently supporting runners for the following repositories:
* libbpf/libbpf
* kernel-patches/bpf
* kernel-patches/vmtest
Below instructions are directly applicable to libbpf, and require minor
modifications for kernel-patches repos. Currently, qemu-user-static Docker
image is shared between all GitHub runners, but separate actions-runner-\*
service / Docker image is created for each runner type.
## Configuring the builder.
### Install prerequisites.
```
$ sudo apt install -y docker.io # Ubuntu
```
### Add services.
```
$ sudo cp *.service /etc/systemd/system/
$ sudo systemctl daemon-reload
```
### Create a config file.
```
$ sudo tee /etc/actions-runner-libbpf
repo=<owner>/<name>
access_token=<ghp_***>
runner_name=<hostname>
```
Access token should have the repo scope, consult
https://docs.github.com/en/rest/reference/actions#create-a-registration-token-for-a-repository
for details.
### Autostart the x86_64 emulation support.
This step is important, you would not be able to build docker container
without having this service running. If container build fails, make sure
service is running properly.
```
$ sudo systemctl enable --now qemu-user-static
```
### Autostart the runner.
```
$ sudo systemctl enable --now actions-runner-libbpf
```
## Rebuilding the image
In order to update the `iiilinuxibmcom/actions-runner-libbpf` image, e.g. to
get the latest OS security fixes, use the following commands:
```
$ sudo docker build \
--pull \
-f actions-runner-libbpf.Dockerfile \
-t iiilinuxibmcom/actions-runner-libbpf \
.
$ sudo systemctl restart actions-runner-libbpf
```
## Removing persistent data
The `actions-runner-libbpf` service stores various temporary data, such as
runner registration information, work directories and logs, in the
`actions-runner-libbpf` volume. In order to remove it and start from scratch,
e.g. when upgrading the runner or switching it to a different repository, use
the following commands:
```
$ sudo systemctl stop actions-runner-libbpf
$ sudo docker rm -f actions-runner-libbpf
$ sudo docker volume rm actions-runner-libbpf
```
## Troubleshooting
In order to check if service is running, use the following command:
```
$ sudo systemctl status <service name>
```
In order to get logs for service:
```
$ journalctl -u <service name>
```
In order to check which containers are currently active:
```
$ sudo docker ps
```

View File

@@ -1,52 +0,0 @@
# Self-Hosted IBM Z Github Actions Runner.
# Temporary image: amd64 dependencies.
FROM amd64/ubuntu:20.04 as ld-prefix
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get -y install ca-certificates libicu66 libssl1.1
# Main image.
FROM s390x/ubuntu:20.04
# Packages for libbpf testing that are not installed by .github/actions/setup.
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get -y install \
bc \
bison \
cmake \
cpu-checker \
curl \
flex \
git \
jq \
linux-image-generic \
qemu-system-s390x \
rsync \
software-properties-common \
sudo \
tree \
iproute2 \
iputils-ping
# amd64 dependencies.
COPY --from=ld-prefix / /usr/x86_64-linux-gnu/
RUN ln -fs ../lib/x86_64-linux-gnu/ld-linux-x86-64.so.2 /usr/x86_64-linux-gnu/lib64/
RUN ln -fs /etc/resolv.conf /usr/x86_64-linux-gnu/etc/
ENV QEMU_LD_PREFIX=/usr/x86_64-linux-gnu
# amd64 Github Actions Runner.
ARG version=2.296.0
RUN useradd -m actions-runner
RUN echo "actions-runner ALL=(ALL) NOPASSWD: ALL" >>/etc/sudoers
RUN echo "Defaults env_keep += \"DEBIAN_FRONTEND\"" >>/etc/sudoers
RUN usermod -a -G kvm actions-runner
USER actions-runner
ENV USER=actions-runner
WORKDIR /home/actions-runner
RUN curl -L https://github.com/actions/runner/releases/download/v${version}/actions-runner-linux-x64-${version}.tar.gz | tar -xz
VOLUME /home/actions-runner
# Scripts.
COPY fs/ /
ENTRYPOINT ["/usr/bin/entrypoint"]
CMD ["/usr/bin/actions-runner"]

View File

@@ -1,24 +0,0 @@
[Unit]
Description=Self-Hosted IBM Z Github Actions Runner
Wants=qemu-user-static
After=qemu-user-static
StartLimitIntervalSec=0
[Service]
Type=simple
Restart=always
ExecStart=/usr/bin/docker run \
--device=/dev/kvm \
--env-file=/etc/actions-runner-libbpf \
--init \
--interactive \
--name=actions-runner-libbpf \
--rm \
--volume=actions-runner-libbpf:/home/actions-runner \
iiilinuxibmcom/actions-runner-libbpf
ExecStop=/bin/sh -c "docker exec actions-runner-libbpf kill -INT -- -1"
ExecStop=/bin/sh -c "docker wait actions-runner-libbpf"
ExecStop=/bin/sh -c "docker rm actions-runner-libbpf"
[Install]
WantedBy=multi-user.target

View File

@@ -1,41 +0,0 @@
#!/bin/bash
#
# Ephemeral runner startup script.
#
# Expects the following environment variables:
#
# - repo=<owner>/<name>
# - access_token=<ghp_***>
# - runner_name=<hostname>
set -e -u
# Check the cached registration token.
token_file=registration-token.json
set +e
expires_at=$(jq --raw-output .expires_at "$token_file" 2>/dev/null)
status=$?
set -e
if [[ $status -ne 0 || $(date +%s) -ge $(date -d "$expires_at" +%s) ]]; then
# Refresh the cached registration token.
curl \
-X POST \
-H "Accept: application/vnd.github.v3+json" \
-H "Authorization: token $access_token" \
"https://api.github.com/repos/$repo/actions/runners/registration-token" \
-o "$token_file"
fi
# (Re-)register the runner.
registration_token=$(jq --raw-output .token "$token_file")
./config.sh remove --token "$registration_token" || true
./config.sh \
--url "https://github.com/$repo" \
--token "$registration_token" \
--labels z15 \
--name "$runner_name" \
--ephemeral
# Run one job.
./run.sh

View File

@@ -1,35 +0,0 @@
#!/bin/bash
#
# Container entrypoint that waits for all spawned processes.
#
set -e -u
# /dev/kvm has host permissions, fix it.
if [ -e /dev/kvm ]; then
sudo chown root:kvm /dev/kvm
fi
# Create a FIFO and start reading from its read end.
tempdir=$(mktemp -d "/tmp/done.XXXXXXXXXX")
trap 'rm -r "$tempdir"' EXIT
done="$tempdir/pipe"
mkfifo "$done"
cat "$done" & waiter=$!
# Start the workload. Its descendants will inherit the FIFO's write end.
status=0
if [ "$#" -eq 0 ]; then
bash 9>"$done" || status=$?
else
"$@" 9>"$done" || status=$?
fi
# When the workload and all of its descendants exit, the FIFO's write end will
# be closed and `cat "$done"` will exit. Wait until it happens. This is needed
# in order to handle SelfUpdater, which the workload may start in background
# before exiting.
wait "$waiter"
exit "$status"

View File

@@ -1,11 +0,0 @@
[Unit]
Description=Support for transparent execution of non-native binaries with QEMU user emulation
[Service]
Type=oneshot
# The source code for iiilinuxibmcom/qemu-user-static is at https://github.com/iii-i/qemu-user-static/tree/v6.1.0-1
# TODO: replace it with multiarch/qemu-user-static once version >6.1 is available
ExecStart=/usr/bin/docker run --rm --interactive --privileged iiilinuxibmcom/qemu-user-static --reset -p yes
[Install]
WantedBy=multi-user.target

View File

@@ -1,6 +1,3 @@
# TEMPORARY until bpf-next -> bpf merge
lru_bug # prog 'printk': failed to auto-attach: -524
# TEMPORARY
usdt/basic # failing verifier due to bounds check after LLVM update
usdt/multispec # same as above

View File

@@ -1,3 +1,5 @@
# shellcheck shell=bash
# $1 - start or end
# $2 - fold identifier, no spaces
# $3 - fold section description

View File

@@ -78,7 +78,7 @@ cd ${PROJECT_NAME}/selftests/bpf
if [ $# -eq 0 ]; then
test_progs
test_progs_no_alu32
test_maps
# test_maps
test_verifier
else
for test_name in "$@"; do

View File

@@ -1,5 +1,7 @@
.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
.. _libbpf:
libbpf
======
@@ -7,6 +9,7 @@ libbpf
:maxdepth: 1
API Documentation <https://libbpf.readthedocs.io/en/latest/api.html>
program_types
libbpf_naming_convention
libbpf_build

203
docs/program_types.rst Normal file
View File

@@ -0,0 +1,203 @@
.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
.. _program_types_and_elf:
Program Types and ELF Sections
==============================
The table below lists the program types, their attach types where relevant and the ELF section
names supported by libbpf for them. The ELF section names follow these rules:
- ``type`` is an exact match, e.g. ``SEC("socket")``
- ``type+`` means it can be either exact ``SEC("type")`` or well-formed ``SEC("type/extras")``
with a '``/``' separator between ``type`` and ``extras``.
When ``extras`` are specified, they provide details of how to auto-attach the BPF program. The
format of ``extras`` depends on the program type, e.g. ``SEC("tracepoint/<category>/<name>")``
for tracepoints or ``SEC("usdt/<path>:<provider>:<name>")`` for USDT probes. The extras are
described in more detail in the footnotes.
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| Program Type | Attach Type | ELF Section Name | Sleepable |
+===========================================+========================================+==================================+===========+
| ``BPF_PROG_TYPE_CGROUP_DEVICE`` | ``BPF_CGROUP_DEVICE`` | ``cgroup/dev`` | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_CGROUP_SKB`` | | ``cgroup/skb`` | |
+ +----------------------------------------+----------------------------------+-----------+
| | ``BPF_CGROUP_INET_EGRESS`` | ``cgroup_skb/egress`` | |
+ +----------------------------------------+----------------------------------+-----------+
| | ``BPF_CGROUP_INET_INGRESS`` | ``cgroup_skb/ingress`` | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_CGROUP_SOCKOPT`` | ``BPF_CGROUP_GETSOCKOPT`` | ``cgroup/getsockopt`` | |
+ +----------------------------------------+----------------------------------+-----------+
| | ``BPF_CGROUP_SETSOCKOPT`` | ``cgroup/setsockopt`` | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_CGROUP_SOCK_ADDR`` | ``BPF_CGROUP_INET4_BIND`` | ``cgroup/bind4`` | |
+ +----------------------------------------+----------------------------------+-----------+
| | ``BPF_CGROUP_INET4_CONNECT`` | ``cgroup/connect4`` | |
+ +----------------------------------------+----------------------------------+-----------+
| | ``BPF_CGROUP_INET4_GETPEERNAME`` | ``cgroup/getpeername4`` | |
+ +----------------------------------------+----------------------------------+-----------+
| | ``BPF_CGROUP_INET4_GETSOCKNAME`` | ``cgroup/getsockname4`` | |
+ +----------------------------------------+----------------------------------+-----------+
| | ``BPF_CGROUP_INET6_BIND`` | ``cgroup/bind6`` | |
+ +----------------------------------------+----------------------------------+-----------+
| | ``BPF_CGROUP_INET6_CONNECT`` | ``cgroup/connect6`` | |
+ +----------------------------------------+----------------------------------+-----------+
| | ``BPF_CGROUP_INET6_GETPEERNAME`` | ``cgroup/getpeername6`` | |
+ +----------------------------------------+----------------------------------+-----------+
| | ``BPF_CGROUP_INET6_GETSOCKNAME`` | ``cgroup/getsockname6`` | |
+ +----------------------------------------+----------------------------------+-----------+
| | ``BPF_CGROUP_UDP4_RECVMSG`` | ``cgroup/recvmsg4`` | |
+ +----------------------------------------+----------------------------------+-----------+
| | ``BPF_CGROUP_UDP4_SENDMSG`` | ``cgroup/sendmsg4`` | |
+ +----------------------------------------+----------------------------------+-----------+
| | ``BPF_CGROUP_UDP6_RECVMSG`` | ``cgroup/recvmsg6`` | |
+ +----------------------------------------+----------------------------------+-----------+
| | ``BPF_CGROUP_UDP6_SENDMSG`` | ``cgroup/sendmsg6`` | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_CGROUP_SOCK`` | ``BPF_CGROUP_INET4_POST_BIND`` | ``cgroup/post_bind4`` | |
+ +----------------------------------------+----------------------------------+-----------+
| | ``BPF_CGROUP_INET6_POST_BIND`` | ``cgroup/post_bind6`` | |
+ +----------------------------------------+----------------------------------+-----------+
| | ``BPF_CGROUP_INET_SOCK_CREATE`` | ``cgroup/sock_create`` | |
+ + +----------------------------------+-----------+
| | | ``cgroup/sock`` | |
+ +----------------------------------------+----------------------------------+-----------+
| | ``BPF_CGROUP_INET_SOCK_RELEASE`` | ``cgroup/sock_release`` | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_CGROUP_SYSCTL`` | ``BPF_CGROUP_SYSCTL`` | ``cgroup/sysctl`` | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_EXT`` | | ``freplace+`` [#fentry]_ | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_FLOW_DISSECTOR`` | ``BPF_FLOW_DISSECTOR`` | ``flow_dissector`` | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_KPROBE`` | | ``kprobe+`` [#kprobe]_ | |
+ + +----------------------------------+-----------+
| | | ``kretprobe+`` [#kprobe]_ | |
+ + +----------------------------------+-----------+
| | | ``ksyscall+`` [#ksyscall]_ | |
+ + +----------------------------------+-----------+
| | | ``kretsyscall+`` [#ksyscall]_ | |
+ + +----------------------------------+-----------+
| | | ``uprobe+`` [#uprobe]_ | |
+ + +----------------------------------+-----------+
| | | ``uprobe.s+`` [#uprobe]_ | Yes |
+ + +----------------------------------+-----------+
| | | ``uretprobe+`` [#uprobe]_ | |
+ + +----------------------------------+-----------+
| | | ``uretprobe.s+`` [#uprobe]_ | Yes |
+ + +----------------------------------+-----------+
| | | ``usdt+`` [#usdt]_ | |
+ +----------------------------------------+----------------------------------+-----------+
| | ``BPF_TRACE_KPROBE_MULTI`` | ``kprobe.multi+`` [#kpmulti]_ | |
+ + +----------------------------------+-----------+
| | | ``kretprobe.multi+`` [#kpmulti]_ | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_LIRC_MODE2`` | ``BPF_LIRC_MODE2`` | ``lirc_mode2`` | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_LSM`` | ``BPF_LSM_CGROUP`` | ``lsm_cgroup+`` | |
+ +----------------------------------------+----------------------------------+-----------+
| | ``BPF_LSM_MAC`` | ``lsm+`` [#lsm]_ | |
+ + +----------------------------------+-----------+
| | | ``lsm.s+`` [#lsm]_ | Yes |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_LWT_IN`` | | ``lwt_in`` | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_LWT_OUT`` | | ``lwt_out`` | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_LWT_SEG6LOCAL`` | | ``lwt_seg6local`` | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_LWT_XMIT`` | | ``lwt_xmit`` | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_PERF_EVENT`` | | ``perf_event`` | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE`` | | ``raw_tp.w+`` [#rawtp]_ | |
+ + +----------------------------------+-----------+
| | | ``raw_tracepoint.w+`` | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_RAW_TRACEPOINT`` | | ``raw_tp+`` [#rawtp]_ | |
+ + +----------------------------------+-----------+
| | | ``raw_tracepoint+`` | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_SCHED_ACT`` | | ``action`` | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_SCHED_CLS`` | | ``classifier`` | |
+ + +----------------------------------+-----------+
| | | ``tc`` | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_SK_LOOKUP`` | ``BPF_SK_LOOKUP`` | ``sk_lookup`` | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_SK_MSG`` | ``BPF_SK_MSG_VERDICT`` | ``sk_msg`` | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_SK_REUSEPORT`` | ``BPF_SK_REUSEPORT_SELECT_OR_MIGRATE`` | ``sk_reuseport/migrate`` | |
+ +----------------------------------------+----------------------------------+-----------+
| | ``BPF_SK_REUSEPORT_SELECT`` | ``sk_reuseport`` | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_SK_SKB`` | | ``sk_skb`` | |
+ +----------------------------------------+----------------------------------+-----------+
| | ``BPF_SK_SKB_STREAM_PARSER`` | ``sk_skb/stream_parser`` | |
+ +----------------------------------------+----------------------------------+-----------+
| | ``BPF_SK_SKB_STREAM_VERDICT`` | ``sk_skb/stream_verdict`` | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_SOCKET_FILTER`` | | ``socket`` | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_SOCK_OPS`` | ``BPF_CGROUP_SOCK_OPS`` | ``sockops`` | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_STRUCT_OPS`` | | ``struct_ops+`` | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_SYSCALL`` | | ``syscall`` | Yes |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_TRACEPOINT`` | | ``tp+`` [#tp]_ | |
+ + +----------------------------------+-----------+
| | | ``tracepoint+`` [#tp]_ | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_TRACING`` | ``BPF_MODIFY_RETURN`` | ``fmod_ret+`` [#fentry]_ | |
+ + +----------------------------------+-----------+
| | | ``fmod_ret.s+`` [#fentry]_ | Yes |
+ +----------------------------------------+----------------------------------+-----------+
| | ``BPF_TRACE_FENTRY`` | ``fentry+`` [#fentry]_ | |
+ + +----------------------------------+-----------+
| | | ``fentry.s+`` [#fentry]_ | Yes |
+ +----------------------------------------+----------------------------------+-----------+
| | ``BPF_TRACE_FEXIT`` | ``fexit+`` [#fentry]_ | |
+ + +----------------------------------+-----------+
| | | ``fexit.s+`` [#fentry]_ | Yes |
+ +----------------------------------------+----------------------------------+-----------+
| | ``BPF_TRACE_ITER`` | ``iter+`` [#iter]_ | |
+ + +----------------------------------+-----------+
| | | ``iter.s+`` [#iter]_ | Yes |
+ +----------------------------------------+----------------------------------+-----------+
| | ``BPF_TRACE_RAW_TP`` | ``tp_btf+`` [#fentry]_ | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_XDP`` | ``BPF_XDP_CPUMAP`` | ``xdp.frags/cpumap`` | |
+ + +----------------------------------+-----------+
| | | ``xdp/cpumap`` | |
+ +----------------------------------------+----------------------------------+-----------+
| | ``BPF_XDP_DEVMAP`` | ``xdp.frags/devmap`` | |
+ + +----------------------------------+-----------+
| | | ``xdp/devmap`` | |
+ +----------------------------------------+----------------------------------+-----------+
| | ``BPF_XDP`` | ``xdp.frags`` | |
+ + +----------------------------------+-----------+
| | | ``xdp`` | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
.. rubric:: Footnotes
.. [#fentry] The ``fentry`` attach format is ``fentry[.s]/<function>``.
.. [#kprobe] The ``kprobe`` attach format is ``kprobe/<function>[+<offset>]``. Valid
characters for ``function`` are ``a-zA-Z0-9_.`` and ``offset`` must be a valid
non-negative integer.
.. [#ksyscall] The ``ksyscall`` attach format is ``ksyscall/<syscall>``.
.. [#uprobe] The ``uprobe`` attach format is ``uprobe[.s]/<path>:<function>[+<offset>]``.
.. [#usdt] The ``usdt`` attach format is ``usdt/<path>:<provider>:<name>``.
.. [#kpmulti] The ``kprobe.multi`` attach format is ``kprobe.multi/<pattern>`` where ``pattern``
supports ``*`` and ``?`` wildcards. Valid characters for pattern are
``a-zA-Z0-9_.*?``.
.. [#lsm] The ``lsm`` attachment format is ``lsm[.s]/<hook>``.
.. [#rawtp] The ``raw_tp`` attach format is ``raw_tracepoint[.w]/<tracepoint>``.
.. [#tp] The ``tracepoint`` attach format is ``tracepoint/<category>/<name>``.
.. [#iter] The ``iter`` attach format is ``iter[.s]/<struct-name>``.

View File

@@ -110,6 +110,12 @@ union bpf_iter_link_info {
__u32 cgroup_fd;
__u64 cgroup_id;
} cgroup;
/* Parameters of task iterators. */
struct {
__u32 tid;
__u32 pid;
__u32 pid_fd;
} task;
};
/* BPF syscall commands, see bpf(2) man-page for more details. */
@@ -916,7 +922,14 @@ enum bpf_map_type {
BPF_MAP_TYPE_CPUMAP,
BPF_MAP_TYPE_XSKMAP,
BPF_MAP_TYPE_SOCKHASH,
BPF_MAP_TYPE_CGROUP_STORAGE,
BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED,
/* BPF_MAP_TYPE_CGROUP_STORAGE is available to bpf programs attaching
* to a cgroup. The newer BPF_MAP_TYPE_CGRP_STORAGE is available to
* both cgroup-attached and other progs and supports all functionality
* provided by BPF_MAP_TYPE_CGROUP_STORAGE. So mark
* BPF_MAP_TYPE_CGROUP_STORAGE deprecated.
*/
BPF_MAP_TYPE_CGROUP_STORAGE = BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED,
BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
BPF_MAP_TYPE_QUEUE,
@@ -929,6 +942,7 @@ enum bpf_map_type {
BPF_MAP_TYPE_TASK_STORAGE,
BPF_MAP_TYPE_BLOOM_FILTER,
BPF_MAP_TYPE_USER_RINGBUF,
BPF_MAP_TYPE_CGRP_STORAGE,
};
/* Note that tracing related programs such as
@@ -1987,6 +2001,9 @@ union bpf_attr {
* sending the packet. This flag was added for GRE
* encapsulation, but might be used with other protocols
* as well in the future.
* **BPF_F_NO_TUNNEL_KEY**
* Add a flag to tunnel metadata indicating that no tunnel
* key should be set in the resulting tunnel header.
*
* Here is a typical usage on the transmit path:
*
@@ -2570,14 +2587,19 @@ union bpf_attr {
* * **SOL_SOCKET**, which supports the following *optname*\ s:
* **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**,
* **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**,
* **SO_BINDTODEVICE**, **SO_KEEPALIVE**.
* **SO_BINDTODEVICE**, **SO_KEEPALIVE**, **SO_REUSEADDR**,
* **SO_REUSEPORT**, **SO_BINDTOIFINDEX**, **SO_TXREHASH**.
* * **IPPROTO_TCP**, which supports the following *optname*\ s:
* **TCP_CONGESTION**, **TCP_BPF_IW**,
* **TCP_BPF_SNDCWND_CLAMP**, **TCP_SAVE_SYN**,
* **TCP_KEEPIDLE**, **TCP_KEEPINTVL**, **TCP_KEEPCNT**,
* **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**.
* **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**,
* **TCP_NODELAY**, **TCP_MAXSEG**, **TCP_WINDOW_CLAMP**,
* **TCP_THIN_LINEAR_TIMEOUTS**, **TCP_BPF_DELACK_MAX**,
* **TCP_BPF_RTO_MIN**.
* * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
* * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
* * **IPPROTO_IPV6**, which supports the following *optname*\ s:
* **IPV6_TCLASS**, **IPV6_AUTOFLOWLABEL**.
* Return
* 0 on success, or a negative error in case of failure.
*
@@ -2633,7 +2655,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
* long bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags)
* long bpf_redirect_map(struct bpf_map *map, u64 key, u64 flags)
* Description
* Redirect the packet to the endpoint referenced by *map* at
* index *key*. Depending on its type, this *map* can contain
@@ -2794,12 +2816,10 @@ union bpf_attr {
* and **BPF_CGROUP_INET6_CONNECT**.
*
* This helper actually implements a subset of **getsockopt()**.
* It supports the following *level*\ s:
*
* * **IPPROTO_TCP**, which supports *optname*
* **TCP_CONGESTION**.
* * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
* * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
* It supports the same set of *optname*\ s that is supported by
* the **bpf_setsockopt**\ () helper. The exceptions are
* **TCP_BPF_*** is **bpf_setsockopt**\ () only and
* **TCP_SAVED_SYN** is **bpf_getsockopt**\ () only.
* Return
* 0 on success, or a negative error in case of failure.
*
@@ -5276,7 +5296,7 @@ union bpf_attr {
* Return
* Nothing. Always succeeds.
*
* long bpf_dynptr_read(void *dst, u32 len, struct bpf_dynptr *src, u32 offset, u64 flags)
* long bpf_dynptr_read(void *dst, u32 len, const struct bpf_dynptr *src, u32 offset, u64 flags)
* Description
* Read *len* bytes from *src* into *dst*, starting from *offset*
* into *src*.
@@ -5286,7 +5306,7 @@ union bpf_attr {
* of *src*'s data, -EINVAL if *src* is an invalid dynptr or if
* *flags* is not 0.
*
* long bpf_dynptr_write(struct bpf_dynptr *dst, u32 offset, void *src, u32 len, u64 flags)
* long bpf_dynptr_write(const struct bpf_dynptr *dst, u32 offset, void *src, u32 len, u64 flags)
* Description
* Write *len* bytes from *src* into *dst*, starting from *offset*
* into *dst*.
@@ -5296,7 +5316,7 @@ union bpf_attr {
* of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst*
* is a read-only dynptr or if *flags* is not 0.
*
* void *bpf_dynptr_data(struct bpf_dynptr *ptr, u32 offset, u32 len)
* void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u32 offset, u32 len)
* Description
* Get a pointer to the underlying dynptr data.
*
@@ -5397,7 +5417,7 @@ union bpf_attr {
* Drain samples from the specified user ring buffer, and invoke
* the provided callback for each such sample:
*
* long (\*callback_fn)(struct bpf_dynptr \*dynptr, void \*ctx);
* long (\*callback_fn)(const struct bpf_dynptr \*dynptr, void \*ctx);
*
* If **callback_fn** returns 0, the helper will continue to try
* and drain the next sample, up to a maximum of
@@ -5429,226 +5449,272 @@ union bpf_attr {
* **-E2BIG** if user-space has tried to publish a sample which is
* larger than the size of the ring buffer, or which cannot fit
* within a struct bpf_dynptr.
*
* void *bpf_cgrp_storage_get(struct bpf_map *map, struct cgroup *cgroup, void *value, u64 flags)
* Description
* Get a bpf_local_storage from the *cgroup*.
*
* Logically, it could be thought of as getting the value from
* a *map* with *cgroup* as the **key**. From this
* perspective, the usage is not much different from
* **bpf_map_lookup_elem**\ (*map*, **&**\ *cgroup*) except this
* helper enforces the key must be a cgroup struct and the map must also
* be a **BPF_MAP_TYPE_CGRP_STORAGE**.
*
* In reality, the local-storage value is embedded directly inside of the
* *cgroup* object itself, rather than being located in the
* **BPF_MAP_TYPE_CGRP_STORAGE** map. When the local-storage value is
* queried for some *map* on a *cgroup* object, the kernel will perform an
* O(n) iteration over all of the live local-storage values for that
* *cgroup* object until the local-storage value for the *map* is found.
*
* An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be
* used such that a new bpf_local_storage will be
* created if one does not exist. *value* can be used
* together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify
* the initial value of a bpf_local_storage. If *value* is
* **NULL**, the new bpf_local_storage will be zero initialized.
* Return
* A bpf_local_storage pointer is returned on success.
*
* **NULL** if not found or there was an error in adding
* a new bpf_local_storage.
*
* long bpf_cgrp_storage_delete(struct bpf_map *map, struct cgroup *cgroup)
* Description
* Delete a bpf_local_storage from a *cgroup*.
* Return
* 0 on success.
*
* **-ENOENT** if the bpf_local_storage cannot be found.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
FN(map_lookup_elem), \
FN(map_update_elem), \
FN(map_delete_elem), \
FN(probe_read), \
FN(ktime_get_ns), \
FN(trace_printk), \
FN(get_prandom_u32), \
FN(get_smp_processor_id), \
FN(skb_store_bytes), \
FN(l3_csum_replace), \
FN(l4_csum_replace), \
FN(tail_call), \
FN(clone_redirect), \
FN(get_current_pid_tgid), \
FN(get_current_uid_gid), \
FN(get_current_comm), \
FN(get_cgroup_classid), \
FN(skb_vlan_push), \
FN(skb_vlan_pop), \
FN(skb_get_tunnel_key), \
FN(skb_set_tunnel_key), \
FN(perf_event_read), \
FN(redirect), \
FN(get_route_realm), \
FN(perf_event_output), \
FN(skb_load_bytes), \
FN(get_stackid), \
FN(csum_diff), \
FN(skb_get_tunnel_opt), \
FN(skb_set_tunnel_opt), \
FN(skb_change_proto), \
FN(skb_change_type), \
FN(skb_under_cgroup), \
FN(get_hash_recalc), \
FN(get_current_task), \
FN(probe_write_user), \
FN(current_task_under_cgroup), \
FN(skb_change_tail), \
FN(skb_pull_data), \
FN(csum_update), \
FN(set_hash_invalid), \
FN(get_numa_node_id), \
FN(skb_change_head), \
FN(xdp_adjust_head), \
FN(probe_read_str), \
FN(get_socket_cookie), \
FN(get_socket_uid), \
FN(set_hash), \
FN(setsockopt), \
FN(skb_adjust_room), \
FN(redirect_map), \
FN(sk_redirect_map), \
FN(sock_map_update), \
FN(xdp_adjust_meta), \
FN(perf_event_read_value), \
FN(perf_prog_read_value), \
FN(getsockopt), \
FN(override_return), \
FN(sock_ops_cb_flags_set), \
FN(msg_redirect_map), \
FN(msg_apply_bytes), \
FN(msg_cork_bytes), \
FN(msg_pull_data), \
FN(bind), \
FN(xdp_adjust_tail), \
FN(skb_get_xfrm_state), \
FN(get_stack), \
FN(skb_load_bytes_relative), \
FN(fib_lookup), \
FN(sock_hash_update), \
FN(msg_redirect_hash), \
FN(sk_redirect_hash), \
FN(lwt_push_encap), \
FN(lwt_seg6_store_bytes), \
FN(lwt_seg6_adjust_srh), \
FN(lwt_seg6_action), \
FN(rc_repeat), \
FN(rc_keydown), \
FN(skb_cgroup_id), \
FN(get_current_cgroup_id), \
FN(get_local_storage), \
FN(sk_select_reuseport), \
FN(skb_ancestor_cgroup_id), \
FN(sk_lookup_tcp), \
FN(sk_lookup_udp), \
FN(sk_release), \
FN(map_push_elem), \
FN(map_pop_elem), \
FN(map_peek_elem), \
FN(msg_push_data), \
FN(msg_pop_data), \
FN(rc_pointer_rel), \
FN(spin_lock), \
FN(spin_unlock), \
FN(sk_fullsock), \
FN(tcp_sock), \
FN(skb_ecn_set_ce), \
FN(get_listener_sock), \
FN(skc_lookup_tcp), \
FN(tcp_check_syncookie), \
FN(sysctl_get_name), \
FN(sysctl_get_current_value), \
FN(sysctl_get_new_value), \
FN(sysctl_set_new_value), \
FN(strtol), \
FN(strtoul), \
FN(sk_storage_get), \
FN(sk_storage_delete), \
FN(send_signal), \
FN(tcp_gen_syncookie), \
FN(skb_output), \
FN(probe_read_user), \
FN(probe_read_kernel), \
FN(probe_read_user_str), \
FN(probe_read_kernel_str), \
FN(tcp_send_ack), \
FN(send_signal_thread), \
FN(jiffies64), \
FN(read_branch_records), \
FN(get_ns_current_pid_tgid), \
FN(xdp_output), \
FN(get_netns_cookie), \
FN(get_current_ancestor_cgroup_id), \
FN(sk_assign), \
FN(ktime_get_boot_ns), \
FN(seq_printf), \
FN(seq_write), \
FN(sk_cgroup_id), \
FN(sk_ancestor_cgroup_id), \
FN(ringbuf_output), \
FN(ringbuf_reserve), \
FN(ringbuf_submit), \
FN(ringbuf_discard), \
FN(ringbuf_query), \
FN(csum_level), \
FN(skc_to_tcp6_sock), \
FN(skc_to_tcp_sock), \
FN(skc_to_tcp_timewait_sock), \
FN(skc_to_tcp_request_sock), \
FN(skc_to_udp6_sock), \
FN(get_task_stack), \
FN(load_hdr_opt), \
FN(store_hdr_opt), \
FN(reserve_hdr_opt), \
FN(inode_storage_get), \
FN(inode_storage_delete), \
FN(d_path), \
FN(copy_from_user), \
FN(snprintf_btf), \
FN(seq_printf_btf), \
FN(skb_cgroup_classid), \
FN(redirect_neigh), \
FN(per_cpu_ptr), \
FN(this_cpu_ptr), \
FN(redirect_peer), \
FN(task_storage_get), \
FN(task_storage_delete), \
FN(get_current_task_btf), \
FN(bprm_opts_set), \
FN(ktime_get_coarse_ns), \
FN(ima_inode_hash), \
FN(sock_from_file), \
FN(check_mtu), \
FN(for_each_map_elem), \
FN(snprintf), \
FN(sys_bpf), \
FN(btf_find_by_name_kind), \
FN(sys_close), \
FN(timer_init), \
FN(timer_set_callback), \
FN(timer_start), \
FN(timer_cancel), \
FN(get_func_ip), \
FN(get_attach_cookie), \
FN(task_pt_regs), \
FN(get_branch_snapshot), \
FN(trace_vprintk), \
FN(skc_to_unix_sock), \
FN(kallsyms_lookup_name), \
FN(find_vma), \
FN(loop), \
FN(strncmp), \
FN(get_func_arg), \
FN(get_func_ret), \
FN(get_func_arg_cnt), \
FN(get_retval), \
FN(set_retval), \
FN(xdp_get_buff_len), \
FN(xdp_load_bytes), \
FN(xdp_store_bytes), \
FN(copy_from_user_task), \
FN(skb_set_tstamp), \
FN(ima_file_hash), \
FN(kptr_xchg), \
FN(map_lookup_percpu_elem), \
FN(skc_to_mptcp_sock), \
FN(dynptr_from_mem), \
FN(ringbuf_reserve_dynptr), \
FN(ringbuf_submit_dynptr), \
FN(ringbuf_discard_dynptr), \
FN(dynptr_read), \
FN(dynptr_write), \
FN(dynptr_data), \
FN(tcp_raw_gen_syncookie_ipv4), \
FN(tcp_raw_gen_syncookie_ipv6), \
FN(tcp_raw_check_syncookie_ipv4), \
FN(tcp_raw_check_syncookie_ipv6), \
FN(ktime_get_tai_ns), \
FN(user_ringbuf_drain), \
#define ___BPF_FUNC_MAPPER(FN, ctx...) \
FN(unspec, 0, ##ctx) \
FN(map_lookup_elem, 1, ##ctx) \
FN(map_update_elem, 2, ##ctx) \
FN(map_delete_elem, 3, ##ctx) \
FN(probe_read, 4, ##ctx) \
FN(ktime_get_ns, 5, ##ctx) \
FN(trace_printk, 6, ##ctx) \
FN(get_prandom_u32, 7, ##ctx) \
FN(get_smp_processor_id, 8, ##ctx) \
FN(skb_store_bytes, 9, ##ctx) \
FN(l3_csum_replace, 10, ##ctx) \
FN(l4_csum_replace, 11, ##ctx) \
FN(tail_call, 12, ##ctx) \
FN(clone_redirect, 13, ##ctx) \
FN(get_current_pid_tgid, 14, ##ctx) \
FN(get_current_uid_gid, 15, ##ctx) \
FN(get_current_comm, 16, ##ctx) \
FN(get_cgroup_classid, 17, ##ctx) \
FN(skb_vlan_push, 18, ##ctx) \
FN(skb_vlan_pop, 19, ##ctx) \
FN(skb_get_tunnel_key, 20, ##ctx) \
FN(skb_set_tunnel_key, 21, ##ctx) \
FN(perf_event_read, 22, ##ctx) \
FN(redirect, 23, ##ctx) \
FN(get_route_realm, 24, ##ctx) \
FN(perf_event_output, 25, ##ctx) \
FN(skb_load_bytes, 26, ##ctx) \
FN(get_stackid, 27, ##ctx) \
FN(csum_diff, 28, ##ctx) \
FN(skb_get_tunnel_opt, 29, ##ctx) \
FN(skb_set_tunnel_opt, 30, ##ctx) \
FN(skb_change_proto, 31, ##ctx) \
FN(skb_change_type, 32, ##ctx) \
FN(skb_under_cgroup, 33, ##ctx) \
FN(get_hash_recalc, 34, ##ctx) \
FN(get_current_task, 35, ##ctx) \
FN(probe_write_user, 36, ##ctx) \
FN(current_task_under_cgroup, 37, ##ctx) \
FN(skb_change_tail, 38, ##ctx) \
FN(skb_pull_data, 39, ##ctx) \
FN(csum_update, 40, ##ctx) \
FN(set_hash_invalid, 41, ##ctx) \
FN(get_numa_node_id, 42, ##ctx) \
FN(skb_change_head, 43, ##ctx) \
FN(xdp_adjust_head, 44, ##ctx) \
FN(probe_read_str, 45, ##ctx) \
FN(get_socket_cookie, 46, ##ctx) \
FN(get_socket_uid, 47, ##ctx) \
FN(set_hash, 48, ##ctx) \
FN(setsockopt, 49, ##ctx) \
FN(skb_adjust_room, 50, ##ctx) \
FN(redirect_map, 51, ##ctx) \
FN(sk_redirect_map, 52, ##ctx) \
FN(sock_map_update, 53, ##ctx) \
FN(xdp_adjust_meta, 54, ##ctx) \
FN(perf_event_read_value, 55, ##ctx) \
FN(perf_prog_read_value, 56, ##ctx) \
FN(getsockopt, 57, ##ctx) \
FN(override_return, 58, ##ctx) \
FN(sock_ops_cb_flags_set, 59, ##ctx) \
FN(msg_redirect_map, 60, ##ctx) \
FN(msg_apply_bytes, 61, ##ctx) \
FN(msg_cork_bytes, 62, ##ctx) \
FN(msg_pull_data, 63, ##ctx) \
FN(bind, 64, ##ctx) \
FN(xdp_adjust_tail, 65, ##ctx) \
FN(skb_get_xfrm_state, 66, ##ctx) \
FN(get_stack, 67, ##ctx) \
FN(skb_load_bytes_relative, 68, ##ctx) \
FN(fib_lookup, 69, ##ctx) \
FN(sock_hash_update, 70, ##ctx) \
FN(msg_redirect_hash, 71, ##ctx) \
FN(sk_redirect_hash, 72, ##ctx) \
FN(lwt_push_encap, 73, ##ctx) \
FN(lwt_seg6_store_bytes, 74, ##ctx) \
FN(lwt_seg6_adjust_srh, 75, ##ctx) \
FN(lwt_seg6_action, 76, ##ctx) \
FN(rc_repeat, 77, ##ctx) \
FN(rc_keydown, 78, ##ctx) \
FN(skb_cgroup_id, 79, ##ctx) \
FN(get_current_cgroup_id, 80, ##ctx) \
FN(get_local_storage, 81, ##ctx) \
FN(sk_select_reuseport, 82, ##ctx) \
FN(skb_ancestor_cgroup_id, 83, ##ctx) \
FN(sk_lookup_tcp, 84, ##ctx) \
FN(sk_lookup_udp, 85, ##ctx) \
FN(sk_release, 86, ##ctx) \
FN(map_push_elem, 87, ##ctx) \
FN(map_pop_elem, 88, ##ctx) \
FN(map_peek_elem, 89, ##ctx) \
FN(msg_push_data, 90, ##ctx) \
FN(msg_pop_data, 91, ##ctx) \
FN(rc_pointer_rel, 92, ##ctx) \
FN(spin_lock, 93, ##ctx) \
FN(spin_unlock, 94, ##ctx) \
FN(sk_fullsock, 95, ##ctx) \
FN(tcp_sock, 96, ##ctx) \
FN(skb_ecn_set_ce, 97, ##ctx) \
FN(get_listener_sock, 98, ##ctx) \
FN(skc_lookup_tcp, 99, ##ctx) \
FN(tcp_check_syncookie, 100, ##ctx) \
FN(sysctl_get_name, 101, ##ctx) \
FN(sysctl_get_current_value, 102, ##ctx) \
FN(sysctl_get_new_value, 103, ##ctx) \
FN(sysctl_set_new_value, 104, ##ctx) \
FN(strtol, 105, ##ctx) \
FN(strtoul, 106, ##ctx) \
FN(sk_storage_get, 107, ##ctx) \
FN(sk_storage_delete, 108, ##ctx) \
FN(send_signal, 109, ##ctx) \
FN(tcp_gen_syncookie, 110, ##ctx) \
FN(skb_output, 111, ##ctx) \
FN(probe_read_user, 112, ##ctx) \
FN(probe_read_kernel, 113, ##ctx) \
FN(probe_read_user_str, 114, ##ctx) \
FN(probe_read_kernel_str, 115, ##ctx) \
FN(tcp_send_ack, 116, ##ctx) \
FN(send_signal_thread, 117, ##ctx) \
FN(jiffies64, 118, ##ctx) \
FN(read_branch_records, 119, ##ctx) \
FN(get_ns_current_pid_tgid, 120, ##ctx) \
FN(xdp_output, 121, ##ctx) \
FN(get_netns_cookie, 122, ##ctx) \
FN(get_current_ancestor_cgroup_id, 123, ##ctx) \
FN(sk_assign, 124, ##ctx) \
FN(ktime_get_boot_ns, 125, ##ctx) \
FN(seq_printf, 126, ##ctx) \
FN(seq_write, 127, ##ctx) \
FN(sk_cgroup_id, 128, ##ctx) \
FN(sk_ancestor_cgroup_id, 129, ##ctx) \
FN(ringbuf_output, 130, ##ctx) \
FN(ringbuf_reserve, 131, ##ctx) \
FN(ringbuf_submit, 132, ##ctx) \
FN(ringbuf_discard, 133, ##ctx) \
FN(ringbuf_query, 134, ##ctx) \
FN(csum_level, 135, ##ctx) \
FN(skc_to_tcp6_sock, 136, ##ctx) \
FN(skc_to_tcp_sock, 137, ##ctx) \
FN(skc_to_tcp_timewait_sock, 138, ##ctx) \
FN(skc_to_tcp_request_sock, 139, ##ctx) \
FN(skc_to_udp6_sock, 140, ##ctx) \
FN(get_task_stack, 141, ##ctx) \
FN(load_hdr_opt, 142, ##ctx) \
FN(store_hdr_opt, 143, ##ctx) \
FN(reserve_hdr_opt, 144, ##ctx) \
FN(inode_storage_get, 145, ##ctx) \
FN(inode_storage_delete, 146, ##ctx) \
FN(d_path, 147, ##ctx) \
FN(copy_from_user, 148, ##ctx) \
FN(snprintf_btf, 149, ##ctx) \
FN(seq_printf_btf, 150, ##ctx) \
FN(skb_cgroup_classid, 151, ##ctx) \
FN(redirect_neigh, 152, ##ctx) \
FN(per_cpu_ptr, 153, ##ctx) \
FN(this_cpu_ptr, 154, ##ctx) \
FN(redirect_peer, 155, ##ctx) \
FN(task_storage_get, 156, ##ctx) \
FN(task_storage_delete, 157, ##ctx) \
FN(get_current_task_btf, 158, ##ctx) \
FN(bprm_opts_set, 159, ##ctx) \
FN(ktime_get_coarse_ns, 160, ##ctx) \
FN(ima_inode_hash, 161, ##ctx) \
FN(sock_from_file, 162, ##ctx) \
FN(check_mtu, 163, ##ctx) \
FN(for_each_map_elem, 164, ##ctx) \
FN(snprintf, 165, ##ctx) \
FN(sys_bpf, 166, ##ctx) \
FN(btf_find_by_name_kind, 167, ##ctx) \
FN(sys_close, 168, ##ctx) \
FN(timer_init, 169, ##ctx) \
FN(timer_set_callback, 170, ##ctx) \
FN(timer_start, 171, ##ctx) \
FN(timer_cancel, 172, ##ctx) \
FN(get_func_ip, 173, ##ctx) \
FN(get_attach_cookie, 174, ##ctx) \
FN(task_pt_regs, 175, ##ctx) \
FN(get_branch_snapshot, 176, ##ctx) \
FN(trace_vprintk, 177, ##ctx) \
FN(skc_to_unix_sock, 178, ##ctx) \
FN(kallsyms_lookup_name, 179, ##ctx) \
FN(find_vma, 180, ##ctx) \
FN(loop, 181, ##ctx) \
FN(strncmp, 182, ##ctx) \
FN(get_func_arg, 183, ##ctx) \
FN(get_func_ret, 184, ##ctx) \
FN(get_func_arg_cnt, 185, ##ctx) \
FN(get_retval, 186, ##ctx) \
FN(set_retval, 187, ##ctx) \
FN(xdp_get_buff_len, 188, ##ctx) \
FN(xdp_load_bytes, 189, ##ctx) \
FN(xdp_store_bytes, 190, ##ctx) \
FN(copy_from_user_task, 191, ##ctx) \
FN(skb_set_tstamp, 192, ##ctx) \
FN(ima_file_hash, 193, ##ctx) \
FN(kptr_xchg, 194, ##ctx) \
FN(map_lookup_percpu_elem, 195, ##ctx) \
FN(skc_to_mptcp_sock, 196, ##ctx) \
FN(dynptr_from_mem, 197, ##ctx) \
FN(ringbuf_reserve_dynptr, 198, ##ctx) \
FN(ringbuf_submit_dynptr, 199, ##ctx) \
FN(ringbuf_discard_dynptr, 200, ##ctx) \
FN(dynptr_read, 201, ##ctx) \
FN(dynptr_write, 202, ##ctx) \
FN(dynptr_data, 203, ##ctx) \
FN(tcp_raw_gen_syncookie_ipv4, 204, ##ctx) \
FN(tcp_raw_gen_syncookie_ipv6, 205, ##ctx) \
FN(tcp_raw_check_syncookie_ipv4, 206, ##ctx) \
FN(tcp_raw_check_syncookie_ipv6, 207, ##ctx) \
FN(ktime_get_tai_ns, 208, ##ctx) \
FN(user_ringbuf_drain, 209, ##ctx) \
FN(cgrp_storage_get, 210, ##ctx) \
FN(cgrp_storage_delete, 211, ##ctx) \
/* */
/* backwards-compatibility macros for users of __BPF_FUNC_MAPPER that don't
* know or care about integer value that is now passed as second argument
*/
#define __BPF_FUNC_MAPPER_APPLY(name, value, FN) FN(name),
#define __BPF_FUNC_MAPPER(FN) ___BPF_FUNC_MAPPER(__BPF_FUNC_MAPPER_APPLY, FN)
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
*/
#define __BPF_ENUM_FN(x) BPF_FUNC_ ## x
#define __BPF_ENUM_FN(x, y) BPF_FUNC_ ## x = y,
enum bpf_func_id {
__BPF_FUNC_MAPPER(__BPF_ENUM_FN)
___BPF_FUNC_MAPPER(__BPF_ENUM_FN)
__BPF_FUNC_MAX_ID,
};
#undef __BPF_ENUM_FN
@@ -5701,6 +5767,7 @@ enum {
BPF_F_ZERO_CSUM_TX = (1ULL << 1),
BPF_F_DONT_FRAGMENT = (1ULL << 2),
BPF_F_SEQ_NUMBER = (1ULL << 3),
BPF_F_NO_TUNNEL_KEY = (1ULL << 4),
};
/* BPF_FUNC_skb_get_tunnel_key flags. */
@@ -6262,6 +6329,10 @@ struct bpf_link_info {
__u64 cgroup_id;
__u32 order;
} cgroup;
struct {
__u32 tid;
__u32 pid;
} task;
};
} iter;
struct {
@@ -6381,6 +6452,7 @@ struct bpf_sock_ops {
* the outgoing header has not
* been written yet.
*/
__u64 skb_hwtstamp;
};
/* Definitions for bpf_sock_ops_cb_flags */
@@ -6823,6 +6895,16 @@ struct bpf_dynptr {
__u64 :64;
} __attribute__((aligned(8)));
struct bpf_list_head {
__u64 :64;
__u64 :64;
} __attribute__((aligned(8)));
struct bpf_list_node {
__u64 :64;
__u64 :64;
} __attribute__((aligned(8)));
struct bpf_sysctl {
__u32 write; /* Sysctl is being read (= 0) or written (= 1).
* Allows 1,2,4-byte read, but no write.

114
include/uapi/linux/fcntl.h Normal file
View File

@@ -0,0 +1,114 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _UAPI_LINUX_FCNTL_H
#define _UAPI_LINUX_FCNTL_H
#include <asm/fcntl.h>
#include <linux/openat2.h>
#define F_SETLEASE (F_LINUX_SPECIFIC_BASE + 0)
#define F_GETLEASE (F_LINUX_SPECIFIC_BASE + 1)
/*
* Cancel a blocking posix lock; internal use only until we expose an
* asynchronous lock api to userspace:
*/
#define F_CANCELLK (F_LINUX_SPECIFIC_BASE + 5)
/* Create a file descriptor with FD_CLOEXEC set. */
#define F_DUPFD_CLOEXEC (F_LINUX_SPECIFIC_BASE + 6)
/*
* Request nofications on a directory.
* See below for events that may be notified.
*/
#define F_NOTIFY (F_LINUX_SPECIFIC_BASE+2)
/*
* Set and get of pipe page size array
*/
#define F_SETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 7)
#define F_GETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 8)
/*
* Set/Get seals
*/
#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9)
#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10)
/*
* Types of seals
*/
#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */
#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */
#define F_SEAL_GROW 0x0004 /* prevent file from growing */
#define F_SEAL_WRITE 0x0008 /* prevent writes */
#define F_SEAL_FUTURE_WRITE 0x0010 /* prevent future writes while mapped */
/* (1U << 31) is reserved for signed error codes */
/*
* Set/Get write life time hints. {GET,SET}_RW_HINT operate on the
* underlying inode, while {GET,SET}_FILE_RW_HINT operate only on
* the specific file.
*/
#define F_GET_RW_HINT (F_LINUX_SPECIFIC_BASE + 11)
#define F_SET_RW_HINT (F_LINUX_SPECIFIC_BASE + 12)
#define F_GET_FILE_RW_HINT (F_LINUX_SPECIFIC_BASE + 13)
#define F_SET_FILE_RW_HINT (F_LINUX_SPECIFIC_BASE + 14)
/*
* Valid hint values for F_{GET,SET}_RW_HINT. 0 is "not set", or can be
* used to clear any hints previously set.
*/
#define RWH_WRITE_LIFE_NOT_SET 0
#define RWH_WRITE_LIFE_NONE 1
#define RWH_WRITE_LIFE_SHORT 2
#define RWH_WRITE_LIFE_MEDIUM 3
#define RWH_WRITE_LIFE_LONG 4
#define RWH_WRITE_LIFE_EXTREME 5
/*
* The originally introduced spelling is remained from the first
* versions of the patch set that introduced the feature, see commit
* v4.13-rc1~212^2~51.
*/
#define RWF_WRITE_LIFE_NOT_SET RWH_WRITE_LIFE_NOT_SET
/*
* Types of directory notifications that may be requested.
*/
#define DN_ACCESS 0x00000001 /* File accessed */
#define DN_MODIFY 0x00000002 /* File modified */
#define DN_CREATE 0x00000004 /* File created */
#define DN_DELETE 0x00000008 /* File removed */
#define DN_RENAME 0x00000010 /* File renamed */
#define DN_ATTRIB 0x00000020 /* File changed attibutes */
#define DN_MULTISHOT 0x80000000 /* Don't remove notifier */
/*
* The constants AT_REMOVEDIR and AT_EACCESS have the same value. AT_EACCESS is
* meaningful only to faccessat, while AT_REMOVEDIR is meaningful only to
* unlinkat. The two functions do completely different things and therefore,
* the flags can be allowed to overlap. For example, passing AT_REMOVEDIR to
* faccessat would be undefined behavior and thus treating it equivalent to
* AT_EACCESS is valid undefined behavior.
*/
#define AT_FDCWD -100 /* Special value used to indicate
openat should use the current
working directory. */
#define AT_SYMLINK_NOFOLLOW 0x100 /* Do not follow symbolic links. */
#define AT_EACCESS 0x200 /* Test access permitted for
effective IDs, not real IDs. */
#define AT_REMOVEDIR 0x200 /* Remove directory instead of
unlinking file. */
#define AT_SYMLINK_FOLLOW 0x400 /* Follow symbolic links. */
#define AT_NO_AUTOMOUNT 0x800 /* Suppress terminal automount traversal */
#define AT_EMPTY_PATH 0x1000 /* Allow empty relative pathname */
#define AT_STATX_SYNC_TYPE 0x6000 /* Type of synchronisation required from statx() */
#define AT_STATX_SYNC_AS_STAT 0x0000 /* - Do whatever stat() does */
#define AT_STATX_FORCE_SYNC 0x2000 /* - Force the attributes to be sync'd with the server */
#define AT_STATX_DONT_SYNC 0x4000 /* - Don't sync attributes with the server */
#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */
#endif /* _UAPI_LINUX_FCNTL_H */

View File

@@ -673,6 +673,7 @@ enum {
IFLA_XFRM_UNSPEC,
IFLA_XFRM_LINK,
IFLA_XFRM_IF_ID,
IFLA_XFRM_COLLECT_METADATA,
__IFLA_XFRM_MAX
};

View File

@@ -164,8 +164,6 @@ enum perf_event_sample_format {
PERF_SAMPLE_WEIGHT_STRUCT = 1U << 24,
PERF_SAMPLE_MAX = 1U << 25, /* non-ABI */
__PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, /* non-ABI; internal use */
};
#define PERF_SAMPLE_WEIGHT_TYPE (PERF_SAMPLE_WEIGHT | PERF_SAMPLE_WEIGHT_STRUCT)
@@ -204,6 +202,8 @@ enum perf_branch_sample_type_shift {
PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT = 17, /* save low level index of raw branch records */
PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT = 18, /* save privilege mode */
PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */
};
@@ -233,6 +233,8 @@ enum perf_branch_sample_type {
PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT,
PERF_SAMPLE_BRANCH_PRIV_SAVE = 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT,
PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT,
};
@@ -253,9 +255,48 @@ enum {
PERF_BR_COND_RET = 10, /* conditional function return */
PERF_BR_ERET = 11, /* exception return */
PERF_BR_IRQ = 12, /* irq */
PERF_BR_SERROR = 13, /* system error */
PERF_BR_NO_TX = 14, /* not in transaction */
PERF_BR_EXTEND_ABI = 15, /* extend ABI */
PERF_BR_MAX,
};
/*
* Common branch speculation outcome classification
*/
enum {
PERF_BR_SPEC_NA = 0, /* Not available */
PERF_BR_SPEC_WRONG_PATH = 1, /* Speculative but on wrong path */
PERF_BR_NON_SPEC_CORRECT_PATH = 2, /* Non-speculative but on correct path */
PERF_BR_SPEC_CORRECT_PATH = 3, /* Speculative and on correct path */
PERF_BR_SPEC_MAX,
};
enum {
PERF_BR_NEW_FAULT_ALGN = 0, /* Alignment fault */
PERF_BR_NEW_FAULT_DATA = 1, /* Data fault */
PERF_BR_NEW_FAULT_INST = 2, /* Inst fault */
PERF_BR_NEW_ARCH_1 = 3, /* Architecture specific */
PERF_BR_NEW_ARCH_2 = 4, /* Architecture specific */
PERF_BR_NEW_ARCH_3 = 5, /* Architecture specific */
PERF_BR_NEW_ARCH_4 = 6, /* Architecture specific */
PERF_BR_NEW_ARCH_5 = 7, /* Architecture specific */
PERF_BR_NEW_MAX,
};
enum {
PERF_BR_PRIV_UNKNOWN = 0,
PERF_BR_PRIV_USER = 1,
PERF_BR_PRIV_KERNEL = 2,
PERF_BR_PRIV_HV = 3,
};
#define PERF_BR_ARM64_FIQ PERF_BR_NEW_ARCH_1
#define PERF_BR_ARM64_DEBUG_HALT PERF_BR_NEW_ARCH_2
#define PERF_BR_ARM64_DEBUG_EXIT PERF_BR_NEW_ARCH_3
#define PERF_BR_ARM64_DEBUG_INST PERF_BR_NEW_ARCH_4
#define PERF_BR_ARM64_DEBUG_DATA PERF_BR_NEW_ARCH_5
#define PERF_SAMPLE_BRANCH_PLM_ALL \
(PERF_SAMPLE_BRANCH_USER|\
PERF_SAMPLE_BRANCH_KERNEL|\
@@ -1295,7 +1336,9 @@ union perf_mem_data_src {
#define PERF_MEM_LVLNUM_L2 0x02 /* L2 */
#define PERF_MEM_LVLNUM_L3 0x03 /* L3 */
#define PERF_MEM_LVLNUM_L4 0x04 /* L4 */
/* 5-0xa available */
/* 5-0x8 available */
#define PERF_MEM_LVLNUM_CXL 0x09 /* CXL */
#define PERF_MEM_LVLNUM_IO 0x0a /* I/O */
#define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */
#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB */
#define PERF_MEM_LVLNUM_RAM 0x0d /* RAM */
@@ -1313,7 +1356,7 @@ union perf_mem_data_src {
#define PERF_MEM_SNOOP_SHIFT 19
#define PERF_MEM_SNOOPX_FWD 0x01 /* forward */
/* 1 free */
#define PERF_MEM_SNOOPX_PEER 0x02 /* xfer from peer */
#define PERF_MEM_SNOOPX_SHIFT 38
/* locked instruction */
@@ -1363,6 +1406,7 @@ union perf_mem_data_src {
* abort: aborting a hardware transaction
* cycles: cycles from last branch (or 0 if not supported)
* type: branch type
* spec: branch speculation info (or 0 if not supported)
*/
struct perf_branch_entry {
__u64 from;
@@ -1373,7 +1417,10 @@ struct perf_branch_entry {
abort:1, /* transaction abort */
cycles:16, /* cycle count to last branch */
type:4, /* branch type */
reserved:40;
spec:2, /* branch speculation info */
new_type:4, /* additional branch type */
priv:3, /* privilege level */
reserved:31;
};
union perf_sample_weight {

View File

@@ -44,7 +44,7 @@ rm -rf elfutils
git clone git://sourceware.org/git/elfutils.git
(
cd elfutils
git checkout 83251d4091241acddbdcf16f814e3bc6ef3df49a
git checkout e9f3045caa5c4498f371383e5519151942d48b6d
git log --oneline -1
# ASan isn't compatible with -Wl,--no-undefined: https://github.com/google/sanitizers/issues/380

View File

@@ -42,6 +42,7 @@ PATH_MAP=( \
[tools/include/uapi/linux/bpf_common.h]=include/uapi/linux/bpf_common.h \
[tools/include/uapi/linux/bpf.h]=include/uapi/linux/bpf.h \
[tools/include/uapi/linux/btf.h]=include/uapi/linux/btf.h \
[tools/include/uapi/linux/fcntl.h]=include/uapi/linux/fcntl.h \
[tools/include/uapi/linux/if_link.h]=include/uapi/linux/if_link.h \
[tools/include/uapi/linux/if_xdp.h]=include/uapi/linux/if_xdp.h \
[tools/include/uapi/linux/netlink.h]=include/uapi/linux/netlink.h \
@@ -51,8 +52,8 @@ PATH_MAP=( \
[Documentation/bpf/libbpf]=docs \
)
LIBBPF_PATHS="${!PATH_MAP[@]} :^tools/lib/bpf/Makefile :^tools/lib/bpf/Build :^tools/lib/bpf/.gitignore :^tools/include/tools/libc_compat.h"
LIBBPF_VIEW_PATHS="${PATH_MAP[@]}"
LIBBPF_PATHS=("${!PATH_MAP[@]}" ":^tools/lib/bpf/Makefile" ":^tools/lib/bpf/Build" ":^tools/lib/bpf/.gitignore" ":^tools/include/tools/libc_compat.h")
LIBBPF_VIEW_PATHS=("${PATH_MAP[@]}")
LIBBPF_VIEW_EXCLUDE_REGEX='^src/(Makefile|Build|test_libbpf\.c|bpf_helper_defs\.h|\.gitignore)$|^docs/(\.gitignore|api\.rst|conf\.py)$|^docs/sphinx/.*'
LINUX_VIEW_EXCLUDE_REGEX='^include/tools/libc_compat.h$'
@@ -85,7 +86,9 @@ commit_desc()
# $2 - paths filter
commit_signature()
{
git show --pretty='("%s")|%aI|%b' --shortstat $1 -- ${2-.} | tr '\n' '|'
local ref=$1
shift
git show --pretty='("%s")|%aI|%b' --shortstat $ref -- "${@-.}" | tr '\n' '|'
}
# Cherry-pick commits touching libbpf-related files
@@ -104,7 +107,7 @@ cherry_pick_commits()
local libbpf_conflict_cnt
local desc
new_commits=$(git rev-list --no-merges --topo-order --reverse ${baseline_tag}..${tip_tag} ${LIBBPF_PATHS[@]})
new_commits=$(git rev-list --no-merges --topo-order --reverse ${baseline_tag}..${tip_tag} -- "${LIBBPF_PATHS[@]}")
for new_commit in ${new_commits}; do
desc="$(commit_desc ${new_commit})"
signature="$(commit_signature ${new_commit} "${LIBBPF_PATHS[@]}")"
@@ -138,7 +141,7 @@ cherry_pick_commits()
echo "Picking '${desc}'..."
if ! git cherry-pick ${new_commit} &>/dev/null; then
echo "Warning! Cherry-picking '${desc} failed, checking if it's non-libbpf files causing problems..."
libbpf_conflict_cnt=$(git diff --name-only --diff-filter=U -- ${LIBBPF_PATHS[@]} | wc -l)
libbpf_conflict_cnt=$(git diff --name-only --diff-filter=U -- "${LIBBPF_PATHS[@]}" | wc -l)
conflict_cnt=$(git diff --name-only | wc -l)
prompt_resolution=1
@@ -284,7 +287,7 @@ cd_to ${LIBBPF_REPO}
helpers_changes=$(git status --porcelain src/bpf_helper_defs.h | wc -l)
if ((${helpers_changes} == 1)); then
git add src/bpf_helper_defs.h
git commit -m "sync: auto-generate latest BPF helpers
git commit -s -m "sync: auto-generate latest BPF helpers
Latest changes to BPF helper definitions.
" -- src/bpf_helper_defs.h
@@ -306,7 +309,7 @@ Baseline bpf-next commit: ${BASELINE_COMMIT}\n\
Checkpoint bpf-next commit: ${TIP_COMMIT}\n\
Baseline bpf commit: ${BPF_BASELINE_COMMIT}\n\
Checkpoint bpf commit: ${BPF_TIP_COMMIT}/" | \
git commit --file=-
git commit -s --file=-
echo "SUCCESS! ${COMMIT_CNT} commits synced."
@@ -316,10 +319,10 @@ cd_to ${LINUX_REPO}
git checkout -b ${VIEW_TAG} ${TIP_COMMIT}
FILTER_BRANCH_SQUELCH_WARNING=1 git filter-branch -f --tree-filter "${LIBBPF_TREE_FILTER}" ${VIEW_TAG}^..${VIEW_TAG}
FILTER_BRANCH_SQUELCH_WARNING=1 git filter-branch -f --subdirectory-filter __libbpf ${VIEW_TAG}^..${VIEW_TAG}
git ls-files -- ${LIBBPF_VIEW_PATHS[@]} | grep -v -E "${LINUX_VIEW_EXCLUDE_REGEX}" > ${TMP_DIR}/linux-view.ls
git ls-files -- "${LIBBPF_VIEW_PATHS[@]}" | grep -v -E "${LINUX_VIEW_EXCLUDE_REGEX}" > ${TMP_DIR}/linux-view.ls
cd_to ${LIBBPF_REPO}
git ls-files -- ${LIBBPF_VIEW_PATHS[@]} | grep -v -E "${LIBBPF_VIEW_EXCLUDE_REGEX}" > ${TMP_DIR}/github-view.ls
git ls-files -- "${LIBBPF_VIEW_PATHS[@]}" | grep -v -E "${LIBBPF_VIEW_EXCLUDE_REGEX}" > ${TMP_DIR}/github-view.ls
echo "Comparing list of files..."
diff -u ${TMP_DIR}/linux-view.ls ${TMP_DIR}/github-view.ls

View File

@@ -77,7 +77,8 @@ INSTALL = install
DESTDIR ?=
ifeq ($(filter-out %64 %64be %64eb %64le %64el s390x, $(shell uname -m)),)
HOSTARCH = $(firstword $(subst -, ,$(shell $(CC) -dumpmachine)))
ifeq ($(filter-out %64 %64be %64eb %64le %64el s390x, $(HOSTARCH)),)
LIBSUBDIR := lib64
else
LIBSUBDIR := lib

View File

@@ -935,58 +935,98 @@ int bpf_link_get_next_id(__u32 start_id, __u32 *next_id)
return bpf_obj_get_next_id(start_id, next_id, BPF_LINK_GET_NEXT_ID);
}
int bpf_prog_get_fd_by_id(__u32 id)
int bpf_prog_get_fd_by_id_opts(__u32 id,
const struct bpf_get_fd_by_id_opts *opts)
{
const size_t attr_sz = offsetofend(union bpf_attr, open_flags);
union bpf_attr attr;
int fd;
if (!OPTS_VALID(opts, bpf_get_fd_by_id_opts))
return libbpf_err(-EINVAL);
memset(&attr, 0, attr_sz);
attr.prog_id = id;
attr.open_flags = OPTS_GET(opts, open_flags, 0);
fd = sys_bpf_fd(BPF_PROG_GET_FD_BY_ID, &attr, attr_sz);
return libbpf_err_errno(fd);
}
int bpf_map_get_fd_by_id(__u32 id)
int bpf_prog_get_fd_by_id(__u32 id)
{
return bpf_prog_get_fd_by_id_opts(id, NULL);
}
int bpf_map_get_fd_by_id_opts(__u32 id,
const struct bpf_get_fd_by_id_opts *opts)
{
const size_t attr_sz = offsetofend(union bpf_attr, open_flags);
union bpf_attr attr;
int fd;
if (!OPTS_VALID(opts, bpf_get_fd_by_id_opts))
return libbpf_err(-EINVAL);
memset(&attr, 0, attr_sz);
attr.map_id = id;
attr.open_flags = OPTS_GET(opts, open_flags, 0);
fd = sys_bpf_fd(BPF_MAP_GET_FD_BY_ID, &attr, attr_sz);
return libbpf_err_errno(fd);
}
int bpf_btf_get_fd_by_id(__u32 id)
int bpf_map_get_fd_by_id(__u32 id)
{
return bpf_map_get_fd_by_id_opts(id, NULL);
}
int bpf_btf_get_fd_by_id_opts(__u32 id,
const struct bpf_get_fd_by_id_opts *opts)
{
const size_t attr_sz = offsetofend(union bpf_attr, open_flags);
union bpf_attr attr;
int fd;
if (!OPTS_VALID(opts, bpf_get_fd_by_id_opts))
return libbpf_err(-EINVAL);
memset(&attr, 0, attr_sz);
attr.btf_id = id;
attr.open_flags = OPTS_GET(opts, open_flags, 0);
fd = sys_bpf_fd(BPF_BTF_GET_FD_BY_ID, &attr, attr_sz);
return libbpf_err_errno(fd);
}
int bpf_link_get_fd_by_id(__u32 id)
int bpf_btf_get_fd_by_id(__u32 id)
{
return bpf_btf_get_fd_by_id_opts(id, NULL);
}
int bpf_link_get_fd_by_id_opts(__u32 id,
const struct bpf_get_fd_by_id_opts *opts)
{
const size_t attr_sz = offsetofend(union bpf_attr, open_flags);
union bpf_attr attr;
int fd;
if (!OPTS_VALID(opts, bpf_get_fd_by_id_opts))
return libbpf_err(-EINVAL);
memset(&attr, 0, attr_sz);
attr.link_id = id;
attr.open_flags = OPTS_GET(opts, open_flags, 0);
fd = sys_bpf_fd(BPF_LINK_GET_FD_BY_ID, &attr, attr_sz);
return libbpf_err_errno(fd);
}
int bpf_link_get_fd_by_id(__u32 id)
{
return bpf_link_get_fd_by_id_opts(id, NULL);
}
int bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len)
{
const size_t attr_sz = offsetofend(union bpf_attr, info);

View File

@@ -365,10 +365,26 @@ LIBBPF_API int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id);
LIBBPF_API int bpf_map_get_next_id(__u32 start_id, __u32 *next_id);
LIBBPF_API int bpf_btf_get_next_id(__u32 start_id, __u32 *next_id);
LIBBPF_API int bpf_link_get_next_id(__u32 start_id, __u32 *next_id);
struct bpf_get_fd_by_id_opts {
size_t sz; /* size of this struct for forward/backward compatibility */
__u32 open_flags; /* permissions requested for the operation on fd */
size_t :0;
};
#define bpf_get_fd_by_id_opts__last_field open_flags
LIBBPF_API int bpf_prog_get_fd_by_id(__u32 id);
LIBBPF_API int bpf_prog_get_fd_by_id_opts(__u32 id,
const struct bpf_get_fd_by_id_opts *opts);
LIBBPF_API int bpf_map_get_fd_by_id(__u32 id);
LIBBPF_API int bpf_map_get_fd_by_id_opts(__u32 id,
const struct bpf_get_fd_by_id_opts *opts);
LIBBPF_API int bpf_btf_get_fd_by_id(__u32 id);
LIBBPF_API int bpf_btf_get_fd_by_id_opts(__u32 id,
const struct bpf_get_fd_by_id_opts *opts);
LIBBPF_API int bpf_link_get_fd_by_id(__u32 id);
LIBBPF_API int bpf_link_get_fd_by_id_opts(__u32 id,
const struct bpf_get_fd_by_id_opts *opts);
LIBBPF_API int bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len);
struct bpf_prog_query_opts {
@@ -393,8 +409,15 @@ LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf,
__u32 *buf_len, __u32 *prog_id, __u32 *fd_type,
__u64 *probe_offset, __u64 *probe_addr);
#ifdef __cplusplus
/* forward-declaring enums in C++ isn't compatible with pure C enums, so
* instead define bpf_enable_stats() as accepting int as an input
*/
LIBBPF_API int bpf_enable_stats(int type);
#else
enum bpf_stats_type; /* defined in up-to-date linux/bpf.h */
LIBBPF_API int bpf_enable_stats(enum bpf_stats_type type);
#endif
struct bpf_prog_bind_opts {
size_t sz; /* size of this struct for forward/backward compatibility */

View File

@@ -29,6 +29,7 @@ struct tcp_request_sock;
struct udp6_sock;
struct unix_sock;
struct task_struct;
struct cgroup;
struct __sk_buff;
struct sk_msg_md;
struct xdp_md;
@@ -536,6 +537,9 @@ static long (*bpf_skb_get_tunnel_key)(struct __sk_buff *skb, struct bpf_tunnel_k
* sending the packet. This flag was added for GRE
* encapsulation, but might be used with other protocols
* as well in the future.
* **BPF_F_NO_TUNNEL_KEY**
* Add a flag to tunnel metadata indicating that no tunnel
* key should be set in the resulting tunnel header.
*
* Here is a typical usage on the transmit path:
*
@@ -1209,14 +1213,19 @@ static long (*bpf_set_hash)(struct __sk_buff *skb, __u32 hash) = (void *) 48;
* * **SOL_SOCKET**, which supports the following *optname*\ s:
* **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**,
* **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**,
* **SO_BINDTODEVICE**, **SO_KEEPALIVE**.
* **SO_BINDTODEVICE**, **SO_KEEPALIVE**, **SO_REUSEADDR**,
* **SO_REUSEPORT**, **SO_BINDTOIFINDEX**, **SO_TXREHASH**.
* * **IPPROTO_TCP**, which supports the following *optname*\ s:
* **TCP_CONGESTION**, **TCP_BPF_IW**,
* **TCP_BPF_SNDCWND_CLAMP**, **TCP_SAVE_SYN**,
* **TCP_KEEPIDLE**, **TCP_KEEPINTVL**, **TCP_KEEPCNT**,
* **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**.
* **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**,
* **TCP_NODELAY**, **TCP_MAXSEG**, **TCP_WINDOW_CLAMP**,
* **TCP_THIN_LINEAR_TIMEOUTS**, **TCP_BPF_DELACK_MAX**,
* **TCP_BPF_RTO_MIN**.
* * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
* * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
* * **IPPROTO_IPV6**, which supports the following *optname*\ s:
* **IPV6_TCLASS**, **IPV6_AUTOFLOWLABEL**.
*
* Returns
* 0 on success, or a negative error in case of failure.
@@ -1306,7 +1315,7 @@ static long (*bpf_skb_adjust_room)(struct __sk_buff *skb, __s32 len_diff, __u32
* **XDP_REDIRECT** on success, or the value of the two lower bits
* of the *flags* argument on error.
*/
static long (*bpf_redirect_map)(void *map, __u32 key, __u64 flags) = (void *) 51;
static long (*bpf_redirect_map)(void *map, __u64 key, __u64 flags) = (void *) 51;
/*
* bpf_sk_redirect_map
@@ -1465,12 +1474,10 @@ static long (*bpf_perf_prog_read_value)(struct bpf_perf_event_data *ctx, struct
* and **BPF_CGROUP_INET6_CONNECT**.
*
* This helper actually implements a subset of **getsockopt()**.
* It supports the following *level*\ s:
*
* * **IPPROTO_TCP**, which supports *optname*
* **TCP_CONGESTION**.
* * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
* * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
* It supports the same set of *optname*\ s that is supported by
* the **bpf_setsockopt**\ () helper. The exceptions are
* **TCP_BPF_*** is **bpf_setsockopt**\ () only and
* **TCP_SAVED_SYN** is **bpf_getsockopt**\ () only.
*
* Returns
* 0 on success, or a negative error in case of failure.
@@ -4484,7 +4491,7 @@ static void (*bpf_ringbuf_discard_dynptr)(struct bpf_dynptr *ptr, __u64 flags) =
* of *src*'s data, -EINVAL if *src* is an invalid dynptr or if
* *flags* is not 0.
*/
static long (*bpf_dynptr_read)(void *dst, __u32 len, struct bpf_dynptr *src, __u32 offset, __u64 flags) = (void *) 201;
static long (*bpf_dynptr_read)(void *dst, __u32 len, const struct bpf_dynptr *src, __u32 offset, __u64 flags) = (void *) 201;
/*
* bpf_dynptr_write
@@ -4498,7 +4505,7 @@ static long (*bpf_dynptr_read)(void *dst, __u32 len, struct bpf_dynptr *src, __u
* of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst*
* is a read-only dynptr or if *flags* is not 0.
*/
static long (*bpf_dynptr_write)(struct bpf_dynptr *dst, __u32 offset, void *src, __u32 len, __u64 flags) = (void *) 202;
static long (*bpf_dynptr_write)(const struct bpf_dynptr *dst, __u32 offset, void *src, __u32 len, __u64 flags) = (void *) 202;
/*
* bpf_dynptr_data
@@ -4513,7 +4520,7 @@ static long (*bpf_dynptr_write)(struct bpf_dynptr *dst, __u32 offset, void *src,
* read-only, if the dynptr is invalid, or if the offset and length
* is out of bounds.
*/
static void *(*bpf_dynptr_data)(struct bpf_dynptr *ptr, __u32 offset, __u32 len) = (void *) 203;
static void *(*bpf_dynptr_data)(const struct bpf_dynptr *ptr, __u32 offset, __u32 len) = (void *) 203;
/*
* bpf_tcp_raw_gen_syncookie_ipv4
@@ -4626,7 +4633,7 @@ static __u64 (*bpf_ktime_get_tai_ns)(void) = (void *) 208;
* Drain samples from the specified user ring buffer, and invoke
* the provided callback for each such sample:
*
* long (\*callback_fn)(struct bpf_dynptr \*dynptr, void \*ctx);
* long (\*callback_fn)(const struct bpf_dynptr \*dynptr, void \*ctx);
*
* If **callback_fn** returns 0, the helper will continue to try
* and drain the next sample, up to a maximum of
@@ -4662,4 +4669,50 @@ static __u64 (*bpf_ktime_get_tai_ns)(void) = (void *) 208;
*/
static long (*bpf_user_ringbuf_drain)(void *map, void *callback_fn, void *ctx, __u64 flags) = (void *) 209;
/*
* bpf_cgrp_storage_get
*
* Get a bpf_local_storage from the *cgroup*.
*
* Logically, it could be thought of as getting the value from
* a *map* with *cgroup* as the **key**. From this
* perspective, the usage is not much different from
* **bpf_map_lookup_elem**\ (*map*, **&**\ *cgroup*) except this
* helper enforces the key must be a cgroup struct and the map must also
* be a **BPF_MAP_TYPE_CGRP_STORAGE**.
*
* In reality, the local-storage value is embedded directly inside of the
* *cgroup* object itself, rather than being located in the
* **BPF_MAP_TYPE_CGRP_STORAGE** map. When the local-storage value is
* queried for some *map* on a *cgroup* object, the kernel will perform an
* O(n) iteration over all of the live local-storage values for that
* *cgroup* object until the local-storage value for the *map* is found.
*
* An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be
* used such that a new bpf_local_storage will be
* created if one does not exist. *value* can be used
* together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify
* the initial value of a bpf_local_storage. If *value* is
* **NULL**, the new bpf_local_storage will be zero initialized.
*
* Returns
* A bpf_local_storage pointer is returned on success.
*
* **NULL** if not found or there was an error in adding
* a new bpf_local_storage.
*/
static void *(*bpf_cgrp_storage_get)(void *map, struct cgroup *cgroup, void *value, __u64 flags) = (void *) 210;
/*
* bpf_cgrp_storage_delete
*
* Delete a bpf_local_storage from a *cgroup*.
*
* Returns
* 0 on success.
*
* **-ENOENT** if the bpf_local_storage cannot be found.
*/
static long (*bpf_cgrp_storage_delete)(void *map, struct cgroup *cgroup) = (void *) 211;

286
src/btf.c
View File

@@ -688,8 +688,21 @@ int btf__align_of(const struct btf *btf, __u32 id)
if (align <= 0)
return libbpf_err(align);
max_align = max(max_align, align);
/* if field offset isn't aligned according to field
* type's alignment, then struct must be packed
*/
if (btf_member_bitfield_size(t, i) == 0 &&
(m->offset % (8 * align)) != 0)
return 1;
}
/* if struct/union size isn't a multiple of its alignment,
* then struct must be packed
*/
if ((t->size % max_align) != 0)
return 1;
return max_align;
}
default:
@@ -990,6 +1003,7 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf,
err = 0;
if (!btf_data) {
pr_warn("failed to find '%s' ELF section in %s\n", BTF_ELF_SEC, path);
err = -ENOENT;
goto done;
}
@@ -1559,15 +1573,15 @@ struct btf_pipe {
static int btf_rewrite_str(__u32 *str_off, void *ctx)
{
struct btf_pipe *p = ctx;
void *mapped_off;
long mapped_off;
int off, err;
if (!*str_off) /* nothing to do for empty strings */
return 0;
if (p->str_off_map &&
hashmap__find(p->str_off_map, (void *)(long)*str_off, &mapped_off)) {
*str_off = (__u32)(long)mapped_off;
hashmap__find(p->str_off_map, *str_off, &mapped_off)) {
*str_off = mapped_off;
return 0;
}
@@ -1579,7 +1593,7 @@ static int btf_rewrite_str(__u32 *str_off, void *ctx)
* performing expensive string comparisons.
*/
if (p->str_off_map) {
err = hashmap__append(p->str_off_map, (void *)(long)*str_off, (void *)(long)off);
err = hashmap__append(p->str_off_map, *str_off, off);
if (err)
return err;
}
@@ -1630,8 +1644,8 @@ static int btf_rewrite_type_ids(__u32 *type_id, void *ctx)
return 0;
}
static size_t btf_dedup_identity_hash_fn(const void *key, void *ctx);
static bool btf_dedup_equal_fn(const void *k1, const void *k2, void *ctx);
static size_t btf_dedup_identity_hash_fn(long key, void *ctx);
static bool btf_dedup_equal_fn(long k1, long k2, void *ctx);
int btf__add_btf(struct btf *btf, const struct btf *src_btf)
{
@@ -1724,7 +1738,8 @@ err_out:
memset(btf->strs_data + old_strs_len, 0, btf->hdr->str_len - old_strs_len);
/* and now restore original strings section size; types data size
* wasn't modified, so doesn't need restoring, see big comment above */
* wasn't modified, so doesn't need restoring, see big comment above
*/
btf->hdr->str_len = old_strs_len;
hashmap__free(p.str_off_map);
@@ -2329,7 +2344,7 @@ int btf__add_restrict(struct btf *btf, int ref_type_id)
*/
int btf__add_type_tag(struct btf *btf, const char *value, int ref_type_id)
{
if (!value|| !value[0])
if (!value || !value[0])
return libbpf_err(-EINVAL);
return btf_add_ref_kind(btf, BTF_KIND_TYPE_TAG, value, ref_type_id);
@@ -2881,6 +2896,7 @@ static int btf_dedup_strings(struct btf_dedup *d);
static int btf_dedup_prim_types(struct btf_dedup *d);
static int btf_dedup_struct_types(struct btf_dedup *d);
static int btf_dedup_ref_types(struct btf_dedup *d);
static int btf_dedup_resolve_fwds(struct btf_dedup *d);
static int btf_dedup_compact_types(struct btf_dedup *d);
static int btf_dedup_remap_types(struct btf_dedup *d);
@@ -2988,15 +3004,16 @@ static int btf_dedup_remap_types(struct btf_dedup *d);
* Algorithm summary
* =================
*
* Algorithm completes its work in 6 separate passes:
* Algorithm completes its work in 7 separate passes:
*
* 1. Strings deduplication.
* 2. Primitive types deduplication (int, enum, fwd).
* 3. Struct/union types deduplication.
* 4. Reference types deduplication (pointers, typedefs, arrays, funcs, func
* 4. Resolve unambiguous forward declarations.
* 5. Reference types deduplication (pointers, typedefs, arrays, funcs, func
* protos, and const/volatile/restrict modifiers).
* 5. Types compaction.
* 6. Types remapping.
* 6. Types compaction.
* 7. Types remapping.
*
* Algorithm determines canonical type descriptor, which is a single
* representative type for each truly unique type. This canonical type is the
@@ -3060,6 +3077,11 @@ int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts)
pr_debug("btf_dedup_struct_types failed:%d\n", err);
goto done;
}
err = btf_dedup_resolve_fwds(d);
if (err < 0) {
pr_debug("btf_dedup_resolve_fwds failed:%d\n", err);
goto done;
}
err = btf_dedup_ref_types(d);
if (err < 0) {
pr_debug("btf_dedup_ref_types failed:%d\n", err);
@@ -3126,12 +3148,11 @@ static long hash_combine(long h, long value)
}
#define for_each_dedup_cand(d, node, hash) \
hashmap__for_each_key_entry(d->dedup_table, node, (void *)hash)
hashmap__for_each_key_entry(d->dedup_table, node, hash)
static int btf_dedup_table_add(struct btf_dedup *d, long hash, __u32 type_id)
{
return hashmap__append(d->dedup_table,
(void *)hash, (void *)(long)type_id);
return hashmap__append(d->dedup_table, hash, type_id);
}
static int btf_dedup_hypot_map_add(struct btf_dedup *d,
@@ -3178,17 +3199,17 @@ static void btf_dedup_free(struct btf_dedup *d)
free(d);
}
static size_t btf_dedup_identity_hash_fn(const void *key, void *ctx)
static size_t btf_dedup_identity_hash_fn(long key, void *ctx)
{
return (size_t)key;
return key;
}
static size_t btf_dedup_collision_hash_fn(const void *key, void *ctx)
static size_t btf_dedup_collision_hash_fn(long key, void *ctx)
{
return 0;
}
static bool btf_dedup_equal_fn(const void *k1, const void *k2, void *ctx)
static bool btf_dedup_equal_fn(long k1, long k2, void *ctx)
{
return k1 == k2;
}
@@ -3404,23 +3425,17 @@ static long btf_hash_enum(struct btf_type *t)
{
long h;
/* don't hash vlen and enum members to support enum fwd resolving */
/* don't hash vlen, enum members and size to support enum fwd resolving */
h = hash_combine(0, t->name_off);
h = hash_combine(h, t->info & ~0xffff);
h = hash_combine(h, t->size);
return h;
}
/* Check structural equality of two ENUMs. */
static bool btf_equal_enum(struct btf_type *t1, struct btf_type *t2)
static bool btf_equal_enum_members(struct btf_type *t1, struct btf_type *t2)
{
const struct btf_enum *m1, *m2;
__u16 vlen;
int i;
if (!btf_equal_common(t1, t2))
return false;
vlen = btf_vlen(t1);
m1 = btf_enum(t1);
m2 = btf_enum(t2);
@@ -3433,15 +3448,12 @@ static bool btf_equal_enum(struct btf_type *t1, struct btf_type *t2)
return true;
}
static bool btf_equal_enum64(struct btf_type *t1, struct btf_type *t2)
static bool btf_equal_enum64_members(struct btf_type *t1, struct btf_type *t2)
{
const struct btf_enum64 *m1, *m2;
__u16 vlen;
int i;
if (!btf_equal_common(t1, t2))
return false;
vlen = btf_vlen(t1);
m1 = btf_enum64(t1);
m2 = btf_enum64(t2);
@@ -3455,6 +3467,19 @@ static bool btf_equal_enum64(struct btf_type *t1, struct btf_type *t2)
return true;
}
/* Check structural equality of two ENUMs or ENUM64s. */
static bool btf_equal_enum(struct btf_type *t1, struct btf_type *t2)
{
if (!btf_equal_common(t1, t2))
return false;
/* t1 & t2 kinds are identical because of btf_equal_common */
if (btf_kind(t1) == BTF_KIND_ENUM)
return btf_equal_enum_members(t1, t2);
else
return btf_equal_enum64_members(t1, t2);
}
static inline bool btf_is_enum_fwd(struct btf_type *t)
{
return btf_is_any_enum(t) && btf_vlen(t) == 0;
@@ -3464,21 +3489,14 @@ static bool btf_compat_enum(struct btf_type *t1, struct btf_type *t2)
{
if (!btf_is_enum_fwd(t1) && !btf_is_enum_fwd(t2))
return btf_equal_enum(t1, t2);
/* ignore vlen when comparing */
/* At this point either t1 or t2 or both are forward declarations, thus:
* - skip comparing vlen because it is zero for forward declarations;
* - skip comparing size to allow enum forward declarations
* to be compatible with enum64 full declarations;
* - skip comparing kind for the same reason.
*/
return t1->name_off == t2->name_off &&
(t1->info & ~0xffff) == (t2->info & ~0xffff) &&
t1->size == t2->size;
}
static bool btf_compat_enum64(struct btf_type *t1, struct btf_type *t2)
{
if (!btf_is_enum_fwd(t1) && !btf_is_enum_fwd(t2))
return btf_equal_enum64(t1, t2);
/* ignore vlen when comparing */
return t1->name_off == t2->name_off &&
(t1->info & ~0xffff) == (t2->info & ~0xffff) &&
t1->size == t2->size;
btf_is_any_enum(t1) && btf_is_any_enum(t2);
}
/*
@@ -3753,7 +3771,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
case BTF_KIND_INT:
h = btf_hash_int_decl_tag(t);
for_each_dedup_cand(d, hash_entry, h) {
cand_id = (__u32)(long)hash_entry->value;
cand_id = hash_entry->value;
cand = btf_type_by_id(d->btf, cand_id);
if (btf_equal_int_tag(t, cand)) {
new_id = cand_id;
@@ -3763,9 +3781,10 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
break;
case BTF_KIND_ENUM:
case BTF_KIND_ENUM64:
h = btf_hash_enum(t);
for_each_dedup_cand(d, hash_entry, h) {
cand_id = (__u32)(long)hash_entry->value;
cand_id = hash_entry->value;
cand = btf_type_by_id(d->btf, cand_id);
if (btf_equal_enum(t, cand)) {
new_id = cand_id;
@@ -3783,32 +3802,11 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
}
break;
case BTF_KIND_ENUM64:
h = btf_hash_enum(t);
for_each_dedup_cand(d, hash_entry, h) {
cand_id = (__u32)(long)hash_entry->value;
cand = btf_type_by_id(d->btf, cand_id);
if (btf_equal_enum64(t, cand)) {
new_id = cand_id;
break;
}
if (btf_compat_enum64(t, cand)) {
if (btf_is_enum_fwd(t)) {
/* resolve fwd to full enum */
new_id = cand_id;
break;
}
/* resolve canonical enum fwd to full enum */
d->map[cand_id] = type_id;
}
}
break;
case BTF_KIND_FWD:
case BTF_KIND_FLOAT:
h = btf_hash_common(t);
for_each_dedup_cand(d, hash_entry, h) {
cand_id = (__u32)(long)hash_entry->value;
cand_id = hash_entry->value;
cand = btf_type_by_id(d->btf, cand_id);
if (btf_equal_common(t, cand)) {
new_id = cand_id;
@@ -3887,14 +3885,14 @@ static inline __u16 btf_fwd_kind(struct btf_type *t)
}
/* Check if given two types are identical ARRAY definitions */
static int btf_dedup_identical_arrays(struct btf_dedup *d, __u32 id1, __u32 id2)
static bool btf_dedup_identical_arrays(struct btf_dedup *d, __u32 id1, __u32 id2)
{
struct btf_type *t1, *t2;
t1 = btf_type_by_id(d->btf, id1);
t2 = btf_type_by_id(d->btf, id2);
if (!btf_is_array(t1) || !btf_is_array(t2))
return 0;
return false;
return btf_equal_array(t1, t2);
}
@@ -3918,7 +3916,9 @@ static bool btf_dedup_identical_structs(struct btf_dedup *d, __u32 id1, __u32 id
m1 = btf_members(t1);
m2 = btf_members(t2);
for (i = 0, n = btf_vlen(t1); i < n; i++, m1++, m2++) {
if (m1->type != m2->type)
if (m1->type != m2->type &&
!btf_dedup_identical_arrays(d, m1->type, m2->type) &&
!btf_dedup_identical_structs(d, m1->type, m2->type))
return false;
}
return true;
@@ -4097,10 +4097,8 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
return btf_equal_int_tag(cand_type, canon_type);
case BTF_KIND_ENUM:
return btf_compat_enum(cand_type, canon_type);
case BTF_KIND_ENUM64:
return btf_compat_enum64(cand_type, canon_type);
return btf_compat_enum(cand_type, canon_type);
case BTF_KIND_FWD:
case BTF_KIND_FLOAT:
@@ -4311,7 +4309,7 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)
h = btf_hash_struct(t);
for_each_dedup_cand(d, hash_entry, h) {
__u32 cand_id = (__u32)(long)hash_entry->value;
__u32 cand_id = hash_entry->value;
int eq;
/*
@@ -4416,7 +4414,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
h = btf_hash_common(t);
for_each_dedup_cand(d, hash_entry, h) {
cand_id = (__u32)(long)hash_entry->value;
cand_id = hash_entry->value;
cand = btf_type_by_id(d->btf, cand_id);
if (btf_equal_common(t, cand)) {
new_id = cand_id;
@@ -4433,7 +4431,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
h = btf_hash_int_decl_tag(t);
for_each_dedup_cand(d, hash_entry, h) {
cand_id = (__u32)(long)hash_entry->value;
cand_id = hash_entry->value;
cand = btf_type_by_id(d->btf, cand_id);
if (btf_equal_int_tag(t, cand)) {
new_id = cand_id;
@@ -4457,7 +4455,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
h = btf_hash_array(t);
for_each_dedup_cand(d, hash_entry, h) {
cand_id = (__u32)(long)hash_entry->value;
cand_id = hash_entry->value;
cand = btf_type_by_id(d->btf, cand_id);
if (btf_equal_array(t, cand)) {
new_id = cand_id;
@@ -4489,7 +4487,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
h = btf_hash_fnproto(t);
for_each_dedup_cand(d, hash_entry, h) {
cand_id = (__u32)(long)hash_entry->value;
cand_id = hash_entry->value;
cand = btf_type_by_id(d->btf, cand_id);
if (btf_equal_fnproto(t, cand)) {
new_id = cand_id;
@@ -4525,6 +4523,134 @@ static int btf_dedup_ref_types(struct btf_dedup *d)
return 0;
}
/*
* Collect a map from type names to type ids for all canonical structs
* and unions. If the same name is shared by several canonical types
* use a special value 0 to indicate this fact.
*/
static int btf_dedup_fill_unique_names_map(struct btf_dedup *d, struct hashmap *names_map)
{
__u32 nr_types = btf__type_cnt(d->btf);
struct btf_type *t;
__u32 type_id;
__u16 kind;
int err;
/*
* Iterate over base and split module ids in order to get all
* available structs in the map.
*/
for (type_id = 1; type_id < nr_types; ++type_id) {
t = btf_type_by_id(d->btf, type_id);
kind = btf_kind(t);
if (kind != BTF_KIND_STRUCT && kind != BTF_KIND_UNION)
continue;
/* Skip non-canonical types */
if (type_id != d->map[type_id])
continue;
err = hashmap__add(names_map, t->name_off, type_id);
if (err == -EEXIST)
err = hashmap__set(names_map, t->name_off, 0, NULL, NULL);
if (err)
return err;
}
return 0;
}
static int btf_dedup_resolve_fwd(struct btf_dedup *d, struct hashmap *names_map, __u32 type_id)
{
struct btf_type *t = btf_type_by_id(d->btf, type_id);
enum btf_fwd_kind fwd_kind = btf_kflag(t);
__u16 cand_kind, kind = btf_kind(t);
struct btf_type *cand_t;
uintptr_t cand_id;
if (kind != BTF_KIND_FWD)
return 0;
/* Skip if this FWD already has a mapping */
if (type_id != d->map[type_id])
return 0;
if (!hashmap__find(names_map, t->name_off, &cand_id))
return 0;
/* Zero is a special value indicating that name is not unique */
if (!cand_id)
return 0;
cand_t = btf_type_by_id(d->btf, cand_id);
cand_kind = btf_kind(cand_t);
if ((cand_kind == BTF_KIND_STRUCT && fwd_kind != BTF_FWD_STRUCT) ||
(cand_kind == BTF_KIND_UNION && fwd_kind != BTF_FWD_UNION))
return 0;
d->map[type_id] = cand_id;
return 0;
}
/*
* Resolve unambiguous forward declarations.
*
* The lion's share of all FWD declarations is resolved during
* `btf_dedup_struct_types` phase when different type graphs are
* compared against each other. However, if in some compilation unit a
* FWD declaration is not a part of a type graph compared against
* another type graph that declaration's canonical type would not be
* changed. Example:
*
* CU #1:
*
* struct foo;
* struct foo *some_global;
*
* CU #2:
*
* struct foo { int u; };
* struct foo *another_global;
*
* After `btf_dedup_struct_types` the BTF looks as follows:
*
* [1] STRUCT 'foo' size=4 vlen=1 ...
* [2] INT 'int' size=4 ...
* [3] PTR '(anon)' type_id=1
* [4] FWD 'foo' fwd_kind=struct
* [5] PTR '(anon)' type_id=4
*
* This pass assumes that such FWD declarations should be mapped to
* structs or unions with identical name in case if the name is not
* ambiguous.
*/
static int btf_dedup_resolve_fwds(struct btf_dedup *d)
{
int i, err;
struct hashmap *names_map;
names_map = hashmap__new(btf_dedup_identity_hash_fn, btf_dedup_equal_fn, NULL);
if (IS_ERR(names_map))
return PTR_ERR(names_map);
err = btf_dedup_fill_unique_names_map(d, names_map);
if (err < 0)
goto exit;
for (i = 0; i < d->btf->nr_types; i++) {
err = btf_dedup_resolve_fwd(d, names_map, d->btf->start_id + i);
if (err < 0)
break;
}
exit:
hashmap__free(names_map);
return err;
}
/*
* Compact types.
*

View File

@@ -13,6 +13,7 @@
#include <ctype.h>
#include <endian.h>
#include <errno.h>
#include <limits.h>
#include <linux/err.h>
#include <linux/btf.h>
#include <linux/kernel.h>
@@ -117,14 +118,14 @@ struct btf_dump {
struct btf_dump_data *typed_dump;
};
static size_t str_hash_fn(const void *key, void *ctx)
static size_t str_hash_fn(long key, void *ctx)
{
return str_hash(key);
return str_hash((void *)key);
}
static bool str_equal_fn(const void *a, const void *b, void *ctx)
static bool str_equal_fn(long a, long b, void *ctx)
{
return strcmp(a, b) == 0;
return strcmp((void *)a, (void *)b) == 0;
}
static const char *btf_name_of(const struct btf_dump *d, __u32 name_off)
@@ -219,6 +220,17 @@ static int btf_dump_resize(struct btf_dump *d)
return 0;
}
static void btf_dump_free_names(struct hashmap *map)
{
size_t bkt;
struct hashmap_entry *cur;
hashmap__for_each_entry(map, cur, bkt)
free((void *)cur->pkey);
hashmap__free(map);
}
void btf_dump__free(struct btf_dump *d)
{
int i;
@@ -237,8 +249,8 @@ void btf_dump__free(struct btf_dump *d)
free(d->cached_names);
free(d->emit_queue);
free(d->decl_stack);
hashmap__free(d->type_names);
hashmap__free(d->ident_names);
btf_dump_free_names(d->type_names);
btf_dump_free_names(d->ident_names);
free(d);
}
@@ -822,14 +834,9 @@ static bool btf_is_struct_packed(const struct btf *btf, __u32 id,
const struct btf_type *t)
{
const struct btf_member *m;
int align, i, bit_sz;
int max_align = 1, align, i, bit_sz;
__u16 vlen;
align = btf__align_of(btf, id);
/* size of a non-packed struct has to be a multiple of its alignment*/
if (align && t->size % align)
return true;
m = btf_members(t);
vlen = btf_vlen(t);
/* all non-bitfield fields have to be naturally aligned */
@@ -838,8 +845,11 @@ static bool btf_is_struct_packed(const struct btf *btf, __u32 id,
bit_sz = btf_member_bitfield_size(t, i);
if (align && bit_sz == 0 && m->offset % (8 * align) != 0)
return true;
max_align = max(align, max_align);
}
/* size of a non-packed struct has to be a multiple of its alignment */
if (t->size % max_align != 0)
return true;
/*
* if original struct was marked as packed, but its layout is
* naturally aligned, we'll detect that it's not packed
@@ -847,44 +857,97 @@ static bool btf_is_struct_packed(const struct btf *btf, __u32 id,
return false;
}
static int chip_away_bits(int total, int at_most)
{
return total % at_most ? : at_most;
}
static void btf_dump_emit_bit_padding(const struct btf_dump *d,
int cur_off, int m_off, int m_bit_sz,
int align, int lvl)
int cur_off, int next_off, int next_align,
bool in_bitfield, int lvl)
{
int off_diff = m_off - cur_off;
int ptr_bits = d->ptr_sz * 8;
const struct {
const char *name;
int bits;
} pads[] = {
{"long", d->ptr_sz * 8}, {"int", 32}, {"short", 16}, {"char", 8}
};
int new_off, pad_bits, bits, i;
const char *pad_type;
if (off_diff <= 0)
/* no gap */
return;
if (m_bit_sz == 0 && off_diff < align * 8)
/* natural padding will take care of a gap */
return;
if (cur_off >= next_off)
return; /* no gap */
while (off_diff > 0) {
const char *pad_type;
int pad_bits;
/* For filling out padding we want to take advantage of
* natural alignment rules to minimize unnecessary explicit
* padding. First, we find the largest type (among long, int,
* short, or char) that can be used to force naturally aligned
* boundary. Once determined, we'll use such type to fill in
* the remaining padding gap. In some cases we can rely on
* compiler filling some gaps, but sometimes we need to force
* alignment to close natural alignment with markers like
* `long: 0` (this is always the case for bitfields). Note
* that even if struct itself has, let's say 4-byte alignment
* (i.e., it only uses up to int-aligned types), using `long:
* X;` explicit padding doesn't actually change struct's
* overall alignment requirements, but compiler does take into
* account that type's (long, in this example) natural
* alignment requirements when adding implicit padding. We use
* this fact heavily and don't worry about ruining correct
* struct alignment requirement.
*/
for (i = 0; i < ARRAY_SIZE(pads); i++) {
pad_bits = pads[i].bits;
pad_type = pads[i].name;
if (ptr_bits > 32 && off_diff > 32) {
pad_type = "long";
pad_bits = chip_away_bits(off_diff, ptr_bits);
} else if (off_diff > 16) {
pad_type = "int";
pad_bits = chip_away_bits(off_diff, 32);
} else if (off_diff > 8) {
pad_type = "short";
pad_bits = chip_away_bits(off_diff, 16);
} else {
pad_type = "char";
pad_bits = chip_away_bits(off_diff, 8);
new_off = roundup(cur_off, pad_bits);
if (new_off <= next_off)
break;
}
if (new_off > cur_off && new_off <= next_off) {
/* We need explicit `<type>: 0` aligning mark if next
* field is right on alignment offset and its
* alignment requirement is less strict than <type>'s
* alignment (so compiler won't naturally align to the
* offset we expect), or if subsequent `<type>: X`,
* will actually completely fit in the remaining hole,
* making compiler basically ignore `<type>: X`
* completely.
*/
if (in_bitfield ||
(new_off == next_off && roundup(cur_off, next_align * 8) != new_off) ||
(new_off != next_off && next_off - new_off <= new_off - cur_off))
/* but for bitfields we'll emit explicit bit count */
btf_dump_printf(d, "\n%s%s: %d;", pfx(lvl), pad_type,
in_bitfield ? new_off - cur_off : 0);
cur_off = new_off;
}
/* Now we know we start at naturally aligned offset for a chosen
* padding type (long, int, short, or char), and so the rest is just
* a straightforward filling of remaining padding gap with full
* `<type>: sizeof(<type>);` markers, except for the last one, which
* might need smaller than sizeof(<type>) padding.
*/
while (cur_off != next_off) {
bits = min(next_off - cur_off, pad_bits);
if (bits == pad_bits) {
btf_dump_printf(d, "\n%s%s: %d;", pfx(lvl), pad_type, pad_bits);
cur_off += bits;
continue;
}
/* For the remainder padding that doesn't cover entire
* pad_type bit length, we pick the smallest necessary type.
* This is pure aesthetics, we could have just used `long`,
* but having smallest necessary one communicates better the
* scale of the padding gap.
*/
for (i = ARRAY_SIZE(pads) - 1; i >= 0; i--) {
pad_type = pads[i].name;
pad_bits = pads[i].bits;
if (pad_bits < bits)
continue;
btf_dump_printf(d, "\n%s%s: %d;", pfx(lvl), pad_type, bits);
cur_off += bits;
break;
}
btf_dump_printf(d, "\n%s%s: %d;", pfx(lvl), pad_type, pad_bits);
off_diff -= pad_bits;
}
}
@@ -904,9 +967,11 @@ static void btf_dump_emit_struct_def(struct btf_dump *d,
{
const struct btf_member *m = btf_members(t);
bool is_struct = btf_is_struct(t);
int align, i, packed, off = 0;
bool packed, prev_bitfield = false;
int align, i, off = 0;
__u16 vlen = btf_vlen(t);
align = btf__align_of(d->btf, id);
packed = is_struct ? btf_is_struct_packed(d->btf, id, t) : 0;
btf_dump_printf(d, "%s%s%s {",
@@ -916,37 +981,47 @@ static void btf_dump_emit_struct_def(struct btf_dump *d,
for (i = 0; i < vlen; i++, m++) {
const char *fname;
int m_off, m_sz;
int m_off, m_sz, m_align;
bool in_bitfield;
fname = btf_name_of(d, m->name_off);
m_sz = btf_member_bitfield_size(t, i);
m_off = btf_member_bit_offset(t, i);
align = packed ? 1 : btf__align_of(d->btf, m->type);
m_align = packed ? 1 : btf__align_of(d->btf, m->type);
btf_dump_emit_bit_padding(d, off, m_off, m_sz, align, lvl + 1);
in_bitfield = prev_bitfield && m_sz != 0;
btf_dump_emit_bit_padding(d, off, m_off, m_align, in_bitfield, lvl + 1);
btf_dump_printf(d, "\n%s", pfx(lvl + 1));
btf_dump_emit_type_decl(d, m->type, fname, lvl + 1);
if (m_sz) {
btf_dump_printf(d, ": %d", m_sz);
off = m_off + m_sz;
prev_bitfield = true;
} else {
m_sz = max((__s64)0, btf__resolve_size(d->btf, m->type));
off = m_off + m_sz * 8;
prev_bitfield = false;
}
btf_dump_printf(d, ";");
}
/* pad at the end, if necessary */
if (is_struct) {
align = packed ? 1 : btf__align_of(d->btf, id);
btf_dump_emit_bit_padding(d, off, t->size * 8, 0, align,
lvl + 1);
}
if (is_struct)
btf_dump_emit_bit_padding(d, off, t->size * 8, align, false, lvl + 1);
if (vlen)
/*
* Keep `struct empty {}` on a single line,
* only print newline when there are regular or padding fields.
*/
if (vlen || t->size) {
btf_dump_printf(d, "\n");
btf_dump_printf(d, "%s}", pfx(lvl));
btf_dump_printf(d, "%s}", pfx(lvl));
} else {
btf_dump_printf(d, "}");
}
if (packed)
btf_dump_printf(d, " __attribute__((packed))");
}
@@ -1058,6 +1133,43 @@ static void btf_dump_emit_enum_def(struct btf_dump *d, __u32 id,
else
btf_dump_emit_enum64_val(d, t, lvl, vlen);
btf_dump_printf(d, "\n%s}", pfx(lvl));
/* special case enums with special sizes */
if (t->size == 1) {
/* one-byte enums can be forced with mode(byte) attribute */
btf_dump_printf(d, " __attribute__((mode(byte)))");
} else if (t->size == 8 && d->ptr_sz == 8) {
/* enum can be 8-byte sized if one of the enumerator values
* doesn't fit in 32-bit integer, or by adding mode(word)
* attribute (but probably only on 64-bit architectures); do
* our best here to try to satisfy the contract without adding
* unnecessary attributes
*/
bool needs_word_mode;
if (btf_is_enum(t)) {
/* enum can't represent 64-bit values, so we need word mode */
needs_word_mode = true;
} else {
/* enum64 needs mode(word) if none of its values has
* non-zero upper 32-bits (which means that all values
* fit in 32-bit integers and won't cause compiler to
* bump enum to be 64-bit naturally
*/
int i;
needs_word_mode = true;
for (i = 0; i < vlen; i++) {
if (btf_enum64(t)[i].val_hi32 != 0) {
needs_word_mode = false;
break;
}
}
}
if (needs_word_mode)
btf_dump_printf(d, " __attribute__((mode(word)))");
}
}
static void btf_dump_emit_fwd_def(struct btf_dump *d, __u32 id,
@@ -1520,11 +1632,22 @@ static void btf_dump_emit_type_cast(struct btf_dump *d, __u32 id,
static size_t btf_dump_name_dups(struct btf_dump *d, struct hashmap *name_map,
const char *orig_name)
{
char *old_name, *new_name;
size_t dup_cnt = 0;
int err;
hashmap__find(name_map, orig_name, (void **)&dup_cnt);
new_name = strdup(orig_name);
if (!new_name)
return 1;
(void)hashmap__find(name_map, orig_name, &dup_cnt);
dup_cnt++;
hashmap__set(name_map, orig_name, (void *)dup_cnt, NULL, NULL);
err = hashmap__set(name_map, new_name, dup_cnt, &old_name, NULL);
if (err)
free(new_name);
free(old_name);
return dup_cnt;
}
@@ -1963,7 +2086,7 @@ static int btf_dump_struct_data(struct btf_dump *d,
{
const struct btf_member *m = btf_members(t);
__u16 n = btf_vlen(t);
int i, err;
int i, err = 0;
/* note that we increment depth before calling btf_dump_print() below;
* this is intentional. btf_dump_data_newline() will not print a

View File

@@ -128,7 +128,7 @@ static int hashmap_grow(struct hashmap *map)
}
static bool hashmap_find_entry(const struct hashmap *map,
const void *key, size_t hash,
const long key, size_t hash,
struct hashmap_entry ***pprev,
struct hashmap_entry **entry)
{
@@ -151,18 +151,18 @@ static bool hashmap_find_entry(const struct hashmap *map,
return false;
}
int hashmap__insert(struct hashmap *map, const void *key, void *value,
enum hashmap_insert_strategy strategy,
const void **old_key, void **old_value)
int hashmap_insert(struct hashmap *map, long key, long value,
enum hashmap_insert_strategy strategy,
long *old_key, long *old_value)
{
struct hashmap_entry *entry;
size_t h;
int err;
if (old_key)
*old_key = NULL;
*old_key = 0;
if (old_value)
*old_value = NULL;
*old_value = 0;
h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
if (strategy != HASHMAP_APPEND &&
@@ -203,7 +203,7 @@ int hashmap__insert(struct hashmap *map, const void *key, void *value,
return 0;
}
bool hashmap__find(const struct hashmap *map, const void *key, void **value)
bool hashmap_find(const struct hashmap *map, long key, long *value)
{
struct hashmap_entry *entry;
size_t h;
@@ -217,8 +217,8 @@ bool hashmap__find(const struct hashmap *map, const void *key, void **value)
return true;
}
bool hashmap__delete(struct hashmap *map, const void *key,
const void **old_key, void **old_value)
bool hashmap_delete(struct hashmap *map, long key,
long *old_key, long *old_value)
{
struct hashmap_entry **pprev, *entry;
size_t h;

View File

@@ -40,12 +40,32 @@ static inline size_t str_hash(const char *s)
return h;
}
typedef size_t (*hashmap_hash_fn)(const void *key, void *ctx);
typedef bool (*hashmap_equal_fn)(const void *key1, const void *key2, void *ctx);
typedef size_t (*hashmap_hash_fn)(long key, void *ctx);
typedef bool (*hashmap_equal_fn)(long key1, long key2, void *ctx);
/*
* Hashmap interface is polymorphic, keys and values could be either
* long-sized integers or pointers, this is achieved as follows:
* - interface functions that operate on keys and values are hidden
* behind auxiliary macros, e.g. hashmap_insert <-> hashmap__insert;
* - these auxiliary macros cast the key and value parameters as
* long or long *, so the user does not have to specify the casts explicitly;
* - for pointer parameters (e.g. old_key) the size of the pointed
* type is verified by hashmap_cast_ptr using _Static_assert;
* - when iterating using hashmap__for_each_* forms
* hasmap_entry->key should be used for integer keys and
* hasmap_entry->pkey should be used for pointer keys,
* same goes for values.
*/
struct hashmap_entry {
const void *key;
void *value;
union {
long key;
const void *pkey;
};
union {
long value;
void *pvalue;
};
struct hashmap_entry *next;
};
@@ -102,6 +122,13 @@ enum hashmap_insert_strategy {
HASHMAP_APPEND,
};
#define hashmap_cast_ptr(p) ({ \
_Static_assert((__builtin_constant_p((p)) ? (p) == NULL : 0) || \
sizeof(*(p)) == sizeof(long), \
#p " pointee should be a long-sized integer or a pointer"); \
(long *)(p); \
})
/*
* hashmap__insert() adds key/value entry w/ various semantics, depending on
* provided strategy value. If a given key/value pair replaced already
@@ -109,42 +136,38 @@ enum hashmap_insert_strategy {
* through old_key and old_value to allow calling code do proper memory
* management.
*/
int hashmap__insert(struct hashmap *map, const void *key, void *value,
enum hashmap_insert_strategy strategy,
const void **old_key, void **old_value);
int hashmap_insert(struct hashmap *map, long key, long value,
enum hashmap_insert_strategy strategy,
long *old_key, long *old_value);
static inline int hashmap__add(struct hashmap *map,
const void *key, void *value)
{
return hashmap__insert(map, key, value, HASHMAP_ADD, NULL, NULL);
}
#define hashmap__insert(map, key, value, strategy, old_key, old_value) \
hashmap_insert((map), (long)(key), (long)(value), (strategy), \
hashmap_cast_ptr(old_key), \
hashmap_cast_ptr(old_value))
static inline int hashmap__set(struct hashmap *map,
const void *key, void *value,
const void **old_key, void **old_value)
{
return hashmap__insert(map, key, value, HASHMAP_SET,
old_key, old_value);
}
#define hashmap__add(map, key, value) \
hashmap__insert((map), (key), (value), HASHMAP_ADD, NULL, NULL)
static inline int hashmap__update(struct hashmap *map,
const void *key, void *value,
const void **old_key, void **old_value)
{
return hashmap__insert(map, key, value, HASHMAP_UPDATE,
old_key, old_value);
}
#define hashmap__set(map, key, value, old_key, old_value) \
hashmap__insert((map), (key), (value), HASHMAP_SET, (old_key), (old_value))
static inline int hashmap__append(struct hashmap *map,
const void *key, void *value)
{
return hashmap__insert(map, key, value, HASHMAP_APPEND, NULL, NULL);
}
#define hashmap__update(map, key, value, old_key, old_value) \
hashmap__insert((map), (key), (value), HASHMAP_UPDATE, (old_key), (old_value))
bool hashmap__delete(struct hashmap *map, const void *key,
const void **old_key, void **old_value);
#define hashmap__append(map, key, value) \
hashmap__insert((map), (key), (value), HASHMAP_APPEND, NULL, NULL)
bool hashmap__find(const struct hashmap *map, const void *key, void **value);
bool hashmap_delete(struct hashmap *map, long key, long *old_key, long *old_value);
#define hashmap__delete(map, key, old_key, old_value) \
hashmap_delete((map), (long)(key), \
hashmap_cast_ptr(old_key), \
hashmap_cast_ptr(old_value))
bool hashmap_find(const struct hashmap *map, long key, long *value);
#define hashmap__find(map, key, value) \
hashmap_find((map), (long)(key), hashmap_cast_ptr(value))
/*
* hashmap__for_each_entry - iterate over all entries in hashmap

View File

@@ -164,6 +164,7 @@ static const char * const map_type_name[] = {
[BPF_MAP_TYPE_TASK_STORAGE] = "task_storage",
[BPF_MAP_TYPE_BLOOM_FILTER] = "bloom_filter",
[BPF_MAP_TYPE_USER_RINGBUF] = "user_ringbuf",
[BPF_MAP_TYPE_CGRP_STORAGE] = "cgrp_storage",
};
static const char * const prog_type_name[] = {
@@ -346,7 +347,8 @@ enum sec_def_flags {
SEC_ATTACHABLE = 2,
SEC_ATTACHABLE_OPT = SEC_ATTACHABLE | SEC_EXP_ATTACH_OPT,
/* attachment target is specified through BTF ID in either kernel or
* other BPF program's BTF object */
* other BPF program's BTF object
*/
SEC_ATTACH_BTF = 4,
/* BPF program type allows sleeping/blocking in kernel */
SEC_SLEEPABLE = 8,
@@ -487,7 +489,7 @@ struct bpf_map {
char *name;
/* real_name is defined for special internal maps (.rodata*,
* .data*, .bss, .kconfig) and preserves their original ELF section
* name. This is important to be be able to find corresponding BTF
* name. This is important to be able to find corresponding BTF
* DATASEC information.
*/
char *real_name;
@@ -597,7 +599,7 @@ struct elf_state {
size_t shstrndx; /* section index for section name strings */
size_t strtabidx;
struct elf_sec_desc *secs;
int sec_cnt;
size_t sec_cnt;
int btf_maps_shndx;
__u32 btf_maps_sec_btf_id;
int text_shndx;
@@ -1408,6 +1410,10 @@ static int bpf_object__check_endianness(struct bpf_object *obj)
static int
bpf_object__init_license(struct bpf_object *obj, void *data, size_t size)
{
if (!data) {
pr_warn("invalid license section in %s\n", obj->path);
return -LIBBPF_ERRNO__FORMAT;
}
/* libbpf_strlcpy() only copies first N - 1 bytes, so size + 1 won't
* go over allowed ELF data section buffer
*/
@@ -1421,7 +1427,7 @@ bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size)
{
__u32 kver;
if (size != sizeof(kver)) {
if (!data || size != sizeof(kver)) {
pr_warn("invalid kver section in %s\n", obj->path);
return -LIBBPF_ERRNO__FORMAT;
}
@@ -1457,15 +1463,12 @@ static int find_elf_sec_sz(const struct bpf_object *obj, const char *name, __u32
return -ENOENT;
}
static int find_elf_var_offset(const struct bpf_object *obj, const char *name, __u32 *off)
static Elf64_Sym *find_elf_var_sym(const struct bpf_object *obj, const char *name)
{
Elf_Data *symbols = obj->efile.symbols;
const char *sname;
size_t si;
if (!name || !off)
return -EINVAL;
for (si = 0; si < symbols->d_size / sizeof(Elf64_Sym); si++) {
Elf64_Sym *sym = elf_sym_by_idx(obj, si);
@@ -1479,15 +1482,13 @@ static int find_elf_var_offset(const struct bpf_object *obj, const char *name, _
sname = elf_sym_str(obj, sym->st_name);
if (!sname) {
pr_warn("failed to get sym name string for var %s\n", name);
return -EIO;
}
if (strcmp(name, sname) == 0) {
*off = sym->st_value;
return 0;
return ERR_PTR(-EIO);
}
if (strcmp(name, sname) == 0)
return sym;
}
return -ENOENT;
return ERR_PTR(-ENOENT);
}
static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
@@ -1578,7 +1579,38 @@ static char *internal_map_name(struct bpf_object *obj, const char *real_name)
}
static int
bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map);
map_fill_btf_type_info(struct bpf_object *obj, struct bpf_map *map);
/* Internal BPF map is mmap()'able only if at least one of corresponding
* DATASEC's VARs are to be exposed through BPF skeleton. I.e., it's a GLOBAL
* variable and it's not marked as __hidden (which turns it into, effectively,
* a STATIC variable).
*/
static bool map_is_mmapable(struct bpf_object *obj, struct bpf_map *map)
{
const struct btf_type *t, *vt;
struct btf_var_secinfo *vsi;
int i, n;
if (!map->btf_value_type_id)
return false;
t = btf__type_by_id(obj->btf, map->btf_value_type_id);
if (!btf_is_datasec(t))
return false;
vsi = btf_var_secinfos(t);
for (i = 0, n = btf_vlen(t); i < n; i++, vsi++) {
vt = btf__type_by_id(obj->btf, vsi->type);
if (!btf_is_var(vt))
continue;
if (btf_var(vt)->linkage != BTF_VAR_STATIC)
return true;
}
return false;
}
static int
bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
@@ -1610,7 +1642,12 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
def->max_entries = 1;
def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG
? BPF_F_RDONLY_PROG : 0;
def->map_flags |= BPF_F_MMAPABLE;
/* failures are fine because of maps like .rodata.str1.1 */
(void) map_fill_btf_type_info(obj, map);
if (map_is_mmapable(obj, map))
def->map_flags |= BPF_F_MMAPABLE;
pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n",
map->name, map->sec_idx, map->sec_offset, def->map_flags);
@@ -1627,9 +1664,6 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
return err;
}
/* failures are fine because of maps like .rodata.str1.1 */
(void) bpf_map_find_btf_info(obj, map);
if (data)
memcpy(map->mmaped, data, data_sz);
@@ -1830,12 +1864,20 @@ static int set_kcfg_value_num(struct extern_desc *ext, void *ext_val,
return -ERANGE;
}
switch (ext->kcfg.sz) {
case 1: *(__u8 *)ext_val = value; break;
case 2: *(__u16 *)ext_val = value; break;
case 4: *(__u32 *)ext_val = value; break;
case 8: *(__u64 *)ext_val = value; break;
default:
return -EINVAL;
case 1:
*(__u8 *)ext_val = value;
break;
case 2:
*(__u16 *)ext_val = value;
break;
case 4:
*(__u32 *)ext_val = value;
break;
case 8:
*(__u64 *)ext_val = value;
break;
default:
return -EINVAL;
}
ext->is_set = true;
return 0;
@@ -2541,7 +2583,7 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
fill_map_from_def(map->inner_map, &inner_def);
}
err = bpf_map_find_btf_info(obj, map);
err = map_fill_btf_type_info(obj, map);
if (err)
return err;
@@ -2737,7 +2779,7 @@ static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
m->type = enum64_placeholder_id;
m->offset = 0;
}
}
}
}
return 0;
@@ -2846,57 +2888,89 @@ static int compare_vsi_off(const void *_a, const void *_b)
static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf,
struct btf_type *t)
{
__u32 size = 0, off = 0, i, vars = btf_vlen(t);
const char *name = btf__name_by_offset(btf, t->name_off);
const struct btf_type *t_var;
__u32 size = 0, i, vars = btf_vlen(t);
const char *sec_name = btf__name_by_offset(btf, t->name_off);
struct btf_var_secinfo *vsi;
const struct btf_var *var;
int ret;
bool fixup_offsets = false;
int err;
if (!name) {
if (!sec_name) {
pr_debug("No name found in string section for DATASEC kind.\n");
return -ENOENT;
}
/* .extern datasec size and var offsets were set correctly during
* extern collection step, so just skip straight to sorting variables
/* Extern-backing datasecs (.ksyms, .kconfig) have their size and
* variable offsets set at the previous step. Further, not every
* extern BTF VAR has corresponding ELF symbol preserved, so we skip
* all fixups altogether for such sections and go straight to sorting
* VARs within their DATASEC.
*/
if (t->size)
if (strcmp(sec_name, KCONFIG_SEC) == 0 || strcmp(sec_name, KSYMS_SEC) == 0)
goto sort_vars;
ret = find_elf_sec_sz(obj, name, &size);
if (ret || !size) {
pr_debug("Invalid size for section %s: %u bytes\n", name, size);
return -ENOENT;
/* Clang leaves DATASEC size and VAR offsets as zeroes, so we need to
* fix this up. But BPF static linker already fixes this up and fills
* all the sizes and offsets during static linking. So this step has
* to be optional. But the STV_HIDDEN handling is non-optional for any
* non-extern DATASEC, so the variable fixup loop below handles both
* functions at the same time, paying the cost of BTF VAR <-> ELF
* symbol matching just once.
*/
if (t->size == 0) {
err = find_elf_sec_sz(obj, sec_name, &size);
if (err || !size) {
pr_debug("sec '%s': failed to determine size from ELF: size %u, err %d\n",
sec_name, size, err);
return -ENOENT;
}
t->size = size;
fixup_offsets = true;
}
t->size = size;
for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) {
const struct btf_type *t_var;
struct btf_var *var;
const char *var_name;
Elf64_Sym *sym;
t_var = btf__type_by_id(btf, vsi->type);
if (!t_var || !btf_is_var(t_var)) {
pr_debug("Non-VAR type seen in section %s\n", name);
pr_debug("sec '%s': unexpected non-VAR type found\n", sec_name);
return -EINVAL;
}
var = btf_var(t_var);
if (var->linkage == BTF_VAR_STATIC)
if (var->linkage == BTF_VAR_STATIC || var->linkage == BTF_VAR_GLOBAL_EXTERN)
continue;
name = btf__name_by_offset(btf, t_var->name_off);
if (!name) {
pr_debug("No name found in string section for VAR kind\n");
var_name = btf__name_by_offset(btf, t_var->name_off);
if (!var_name) {
pr_debug("sec '%s': failed to find name of DATASEC's member #%d\n",
sec_name, i);
return -ENOENT;
}
ret = find_elf_var_offset(obj, name, &off);
if (ret) {
pr_debug("No offset found in symbol table for VAR %s\n",
name);
sym = find_elf_var_sym(obj, var_name);
if (IS_ERR(sym)) {
pr_debug("sec '%s': failed to find ELF symbol for VAR '%s'\n",
sec_name, var_name);
return -ENOENT;
}
vsi->offset = off;
if (fixup_offsets)
vsi->offset = sym->st_value;
/* if variable is a global/weak symbol, but has restricted
* (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF VAR
* as static. This follows similar logic for functions (BPF
* subprogs) and influences libbpf's further decisions about
* whether to make global data BPF array maps as
* BPF_F_MMAPABLE.
*/
if (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN
|| ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL)
var->linkage = BTF_VAR_STATIC;
}
sort_vars:
@@ -2904,13 +2978,16 @@ sort_vars:
return 0;
}
static int btf_finalize_data(struct bpf_object *obj, struct btf *btf)
static int bpf_object_fixup_btf(struct bpf_object *obj)
{
int err = 0;
__u32 i, n = btf__type_cnt(btf);
int i, n, err = 0;
if (!obj->btf)
return 0;
n = btf__type_cnt(obj->btf);
for (i = 1; i < n; i++) {
struct btf_type *t = btf_type_by_id(btf, i);
struct btf_type *t = btf_type_by_id(obj->btf, i);
/* Loader needs to fix up some of the things compiler
* couldn't get its hands on while emitting BTF. This
@@ -2918,28 +2995,12 @@ static int btf_finalize_data(struct bpf_object *obj, struct btf *btf)
* the info from the ELF itself for this purpose.
*/
if (btf_is_datasec(t)) {
err = btf_fixup_datasec(obj, btf, t);
err = btf_fixup_datasec(obj, obj->btf, t);
if (err)
break;
return err;
}
}
return libbpf_err(err);
}
static int bpf_object__finalize_btf(struct bpf_object *obj)
{
int err;
if (!obj->btf)
return 0;
err = btf_finalize_data(obj, obj->btf);
if (err) {
pr_warn("Error finalizing %s: %d.\n", BTF_ELF_SEC, err);
return err;
}
return 0;
}
@@ -3312,10 +3373,15 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
Elf64_Shdr *sh;
/* ELF section indices are 0-based, but sec #0 is special "invalid"
* section. e_shnum does include sec #0, so e_shnum is the necessary
* size of an array to keep all the sections.
* section. Since section count retrieved by elf_getshdrnum() does
* include sec #0, it is already the necessary size of an array to keep
* all the sections.
*/
obj->efile.sec_cnt = obj->efile.ehdr->e_shnum;
if (elf_getshdrnum(obj->efile.elf, &obj->efile.sec_cnt)) {
pr_warn("elf: failed to get the number of sections for %s: %s\n",
obj->path, elf_errmsg(-1));
return -LIBBPF_ERRNO__FORMAT;
}
obj->efile.secs = calloc(obj->efile.sec_cnt, sizeof(*obj->efile.secs));
if (!obj->efile.secs)
return -ENOMEM;
@@ -3445,7 +3511,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
sec_desc->sec_type = SEC_RELO;
sec_desc->shdr = sh;
sec_desc->data = data;
} else if (sh->sh_type == SHT_NOBITS && strcmp(name, BSS_SEC) == 0) {
} else if (sh->sh_type == SHT_NOBITS && (strcmp(name, BSS_SEC) == 0 ||
str_has_pfx(name, BSS_SEC "."))) {
sec_desc->sec_type = SEC_BSS;
sec_desc->shdr = sh;
sec_desc->data = data;
@@ -3461,7 +3528,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
}
/* sort BPF programs by section name and in-section instruction offset
* for faster search */
* for faster search
*/
if (obj->nr_programs)
qsort(obj->programs, obj->nr_programs, sizeof(*obj->programs), cmp_progs);
@@ -3760,7 +3828,7 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
return -EINVAL;
}
ext->kcfg.type = find_kcfg_type(obj->btf, t->type,
&ext->kcfg.is_signed);
&ext->kcfg.is_signed);
if (ext->kcfg.type == KCFG_UNKNOWN) {
pr_warn("extern (kcfg) '%s': type is unsupported\n", ext_name);
return -ENOTSUP;
@@ -4106,6 +4174,9 @@ static struct bpf_program *find_prog_by_sec_insn(const struct bpf_object *obj,
int l = 0, r = obj->nr_programs - 1, m;
struct bpf_program *prog;
if (!obj->nr_programs)
return NULL;
while (l < r) {
m = l + (r - l + 1) / 2;
prog = &obj->programs[m];
@@ -4223,7 +4294,7 @@ bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Dat
return 0;
}
static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map)
static int map_fill_btf_type_info(struct bpf_object *obj, struct bpf_map *map)
{
int id;
@@ -4905,9 +4976,9 @@ bpf_object__reuse_map(struct bpf_map *map)
err = bpf_map__reuse_fd(map, pin_fd);
close(pin_fd);
if (err) {
if (err)
return err;
}
map->pinned = true;
pr_debug("reused pinned map at '%s'\n", map->pin_path);
@@ -5425,7 +5496,7 @@ static int load_module_btfs(struct bpf_object *obj)
}
err = libbpf_ensure_mem((void **)&obj->btf_modules, &obj->btf_module_cap,
sizeof(*obj->btf_modules), obj->btf_module_cnt + 1);
sizeof(*obj->btf_modules), obj->btf_module_cnt + 1);
if (err)
goto err_out;
@@ -5541,21 +5612,16 @@ int bpf_core_types_match(const struct btf *local_btf, __u32 local_id,
return __bpf_core_types_match(local_btf, local_id, targ_btf, targ_id, false, 32);
}
static size_t bpf_core_hash_fn(const void *key, void *ctx)
static size_t bpf_core_hash_fn(const long key, void *ctx)
{
return (size_t)key;
return key;
}
static bool bpf_core_equal_fn(const void *k1, const void *k2, void *ctx)
static bool bpf_core_equal_fn(const long k1, const long k2, void *ctx)
{
return k1 == k2;
}
static void *u32_as_hash_key(__u32 x)
{
return (void *)(uintptr_t)x;
}
static int record_relo_core(struct bpf_program *prog,
const struct bpf_core_relo *core_relo, int insn_idx)
{
@@ -5598,7 +5664,6 @@ static int bpf_core_resolve_relo(struct bpf_program *prog,
struct bpf_core_relo_res *targ_res)
{
struct bpf_core_spec specs_scratch[3] = {};
const void *type_key = u32_as_hash_key(relo->type_id);
struct bpf_core_cand_list *cands = NULL;
const char *prog_name = prog->name;
const struct btf_type *local_type;
@@ -5615,7 +5680,7 @@ static int bpf_core_resolve_relo(struct bpf_program *prog,
return -EINVAL;
if (relo->kind != BPF_CORE_TYPE_ID_LOCAL &&
!hashmap__find(cand_cache, type_key, (void **)&cands)) {
!hashmap__find(cand_cache, local_id, &cands)) {
cands = bpf_core_find_cands(prog->obj, local_btf, local_id);
if (IS_ERR(cands)) {
pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld\n",
@@ -5623,7 +5688,7 @@ static int bpf_core_resolve_relo(struct bpf_program *prog,
local_name, PTR_ERR(cands));
return PTR_ERR(cands);
}
err = hashmap__set(cand_cache, type_key, cands, NULL, NULL);
err = hashmap__set(cand_cache, local_id, cands, NULL, NULL);
if (err) {
bpf_core_free_cands(cands);
return err;
@@ -5746,7 +5811,7 @@ out:
if (!IS_ERR_OR_NULL(cand_cache)) {
hashmap__for_each_entry(cand_cache, entry, i) {
bpf_core_free_cands(entry->value);
bpf_core_free_cands(entry->pvalue);
}
hashmap__free(cand_cache);
}
@@ -6183,7 +6248,8 @@ bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
* prog; each main prog can have a different set of
* subprograms appended (potentially in different order as
* well), so position of any subprog can be different for
* different main programs */
* different main programs
*/
insn->imm = subprog->sub_insn_off - (prog->sub_insn_off + insn_idx) - 1;
pr_debug("prog '%s': insn #%zu relocated, imm %d points to subprog '%s' (now at %zu offset)\n",
@@ -7221,7 +7287,7 @@ static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf,
err = err ? : bpf_object__check_endianness(obj);
err = err ? : bpf_object__elf_collect(obj);
err = err ? : bpf_object__collect_externs(obj);
err = err ? : bpf_object__finalize_btf(obj);
err = err ? : bpf_object_fixup_btf(obj);
err = err ? : bpf_object__init_maps(obj, opts);
err = err ? : bpf_object_init_progs(obj, opts);
err = err ? : bpf_object__collect_relos(obj);
@@ -9837,7 +9903,7 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
char errmsg[STRERR_BUFSIZE];
int type, pfd;
if (ref_ctr_off >= (1ULL << PERF_UPROBE_REF_CTR_OFFSET_BITS))
if ((__u64)ref_ctr_off >= (1ULL << PERF_UPROBE_REF_CTR_OFFSET_BITS))
return -EINVAL;
memset(&attr, 0, attr_sz);
@@ -10941,7 +11007,7 @@ struct bpf_link *bpf_program__attach_usdt(const struct bpf_program *prog,
usdt_cookie = OPTS_GET(opts, usdt_cookie, 0);
link = usdt_manager_attach_usdt(obj->usdt_man, prog, pid, binary_path,
usdt_provider, usdt_name, usdt_cookie);
usdt_provider, usdt_name, usdt_cookie);
err = libbpf_get_error(link);
if (err)
return libbpf_err_ptr(err);
@@ -11169,7 +11235,7 @@ static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf
}
*link = bpf_program__attach_raw_tracepoint(prog, tp_name);
return libbpf_get_error(link);
return libbpf_get_error(*link);
}
/* Common logic for all BPF program types that attach to a btf_id */
@@ -12250,7 +12316,7 @@ int bpf_object__open_subskeleton(struct bpf_object_subskeleton *s)
btf = bpf_object__btf(s->obj);
if (!btf) {
pr_warn("subskeletons require BTF at runtime (object %s)\n",
bpf_object__name(s->obj));
bpf_object__name(s->obj));
return libbpf_err(-errno);
}

View File

@@ -367,10 +367,14 @@ LIBBPF_1.0.0 {
libbpf_bpf_map_type_str;
libbpf_bpf_prog_type_str;
perf_buffer__buffer;
};
} LIBBPF_0.8.0;
LIBBPF_1.1.0 {
global:
bpf_btf_get_fd_by_id_opts;
bpf_link_get_fd_by_id_opts;
bpf_map_get_fd_by_id_opts;
bpf_prog_get_fd_by_id_opts;
user_ring_buffer__discard;
user_ring_buffer__free;
user_ring_buffer__new;

View File

@@ -39,14 +39,14 @@ static const char *libbpf_strerror_table[NR_ERRNO] = {
int libbpf_strerror(int err, char *buf, size_t size)
{
int ret;
if (!buf || !size)
return libbpf_err(-EINVAL);
err = err > 0 ? err : -err;
if (err < __LIBBPF_ERRNO__START) {
int ret;
ret = strerror_r(err, buf, size);
buf[size - 1] = '\0';
return libbpf_err_errno(ret);
@@ -56,12 +56,20 @@ int libbpf_strerror(int err, char *buf, size_t size)
const char *msg;
msg = libbpf_strerror_table[ERRNO_OFFSET(err)];
snprintf(buf, size, "%s", msg);
ret = snprintf(buf, size, "%s", msg);
buf[size - 1] = '\0';
/* The length of the buf and msg is positive.
* A negative number may be returned only when the
* size exceeds INT_MAX. Not likely to appear.
*/
if (ret >= size)
return libbpf_err(-ERANGE);
return 0;
}
snprintf(buf, size, "Unknown libbpf error %d", err);
ret = snprintf(buf, size, "Unknown libbpf error %d", err);
buf[size - 1] = '\0';
if (ret >= size)
return libbpf_err(-ERANGE);
return libbpf_err(-ENOENT);
}

View File

@@ -221,6 +221,7 @@ static int probe_map_create(enum bpf_map_type map_type)
case BPF_MAP_TYPE_SK_STORAGE:
case BPF_MAP_TYPE_INODE_STORAGE:
case BPF_MAP_TYPE_TASK_STORAGE:
case BPF_MAP_TYPE_CGRP_STORAGE:
btf_key_type_id = 1;
btf_value_type_id = 3;
value_size = 8;
@@ -234,7 +235,7 @@ static int probe_map_create(enum bpf_map_type map_type)
case BPF_MAP_TYPE_USER_RINGBUF:
key_size = 0;
value_size = 0;
max_entries = 4096;
max_entries = sysconf(_SC_PAGE_SIZE);
break;
case BPF_MAP_TYPE_STRUCT_OPS:
/* we'll get -ENOTSUPP for invalid BTF type ID for struct_ops */

View File

@@ -32,7 +32,7 @@ static struct nlattr *nla_next(const struct nlattr *nla, int *remaining)
static int nla_ok(const struct nlattr *nla, int remaining)
{
return remaining >= sizeof(*nla) &&
return remaining >= (int)sizeof(*nla) &&
nla->nla_len >= sizeof(*nla) &&
nla->nla_len <= remaining;
}

View File

@@ -77,6 +77,7 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd,
__u32 len = sizeof(info);
struct epoll_event *e;
struct ring *r;
__u64 mmap_sz;
void *tmp;
int err;
@@ -115,8 +116,7 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd,
r->mask = info.max_entries - 1;
/* Map writable consumer page */
tmp = mmap(NULL, rb->page_size, PROT_READ | PROT_WRITE, MAP_SHARED,
map_fd, 0);
tmp = mmap(NULL, rb->page_size, PROT_READ | PROT_WRITE, MAP_SHARED, map_fd, 0);
if (tmp == MAP_FAILED) {
err = -errno;
pr_warn("ringbuf: failed to mmap consumer page for map fd=%d: %d\n",
@@ -128,9 +128,13 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd,
/* Map read-only producer page and data pages. We map twice as big
* data size to allow simple reading of samples that wrap around the
* end of a ring buffer. See kernel implementation for details.
* */
tmp = mmap(NULL, rb->page_size + 2 * info.max_entries, PROT_READ,
MAP_SHARED, map_fd, rb->page_size);
*/
mmap_sz = rb->page_size + 2 * (__u64)info.max_entries;
if (mmap_sz != (__u64)(size_t)mmap_sz) {
pr_warn("ringbuf: ring buffer size (%u) is too big\n", info.max_entries);
return libbpf_err(-E2BIG);
}
tmp = mmap(NULL, (size_t)mmap_sz, PROT_READ, MAP_SHARED, map_fd, rb->page_size);
if (tmp == MAP_FAILED) {
err = -errno;
ringbuf_unmap_ring(rb, r);
@@ -220,7 +224,7 @@ static inline int roundup_len(__u32 len)
return (len + 7) / 8 * 8;
}
static int64_t ringbuf_process_ring(struct ring* r)
static int64_t ringbuf_process_ring(struct ring *r)
{
int *len_ptr, len, err;
/* 64-bit to avoid overflow in case of extreme application behavior */
@@ -348,6 +352,7 @@ static int user_ringbuf_map(struct user_ring_buffer *rb, int map_fd)
{
struct bpf_map_info info;
__u32 len = sizeof(info);
__u64 mmap_sz;
void *tmp;
struct epoll_event *rb_epoll;
int err;
@@ -384,8 +389,13 @@ static int user_ringbuf_map(struct user_ring_buffer *rb, int map_fd)
* simple reading and writing of samples that wrap around the end of
* the buffer. See the kernel implementation for details.
*/
tmp = mmap(NULL, rb->page_size + 2 * info.max_entries,
PROT_READ | PROT_WRITE, MAP_SHARED, map_fd, rb->page_size);
mmap_sz = rb->page_size + 2 * (__u64)info.max_entries;
if (mmap_sz != (__u64)(size_t)mmap_sz) {
pr_warn("user ringbuf: ring buf size (%u) is too big\n", info.max_entries);
return -E2BIG;
}
tmp = mmap(NULL, (size_t)mmap_sz, PROT_READ | PROT_WRITE, MAP_SHARED,
map_fd, rb->page_size);
if (tmp == MAP_FAILED) {
err = -errno;
pr_warn("user ringbuf: failed to mmap data pages for map fd=%d: %d\n",
@@ -476,6 +486,10 @@ void *user_ring_buffer__reserve(struct user_ring_buffer *rb, __u32 size)
__u64 cons_pos, prod_pos;
struct ringbuf_hdr *hdr;
/* The top two bits are used as special flags */
if (size & (BPF_RINGBUF_BUSY_BIT | BPF_RINGBUF_DISCARD_BIT))
return errno = E2BIG, NULL;
/* Synchronizes with smp_store_release() in __bpf_user_ringbuf_peek() in
* the kernel.
*/

View File

@@ -19,19 +19,19 @@ struct strset {
struct hashmap *strs_hash;
};
static size_t strset_hash_fn(const void *key, void *ctx)
static size_t strset_hash_fn(long key, void *ctx)
{
const struct strset *s = ctx;
const char *str = s->strs_data + (long)key;
const char *str = s->strs_data + key;
return str_hash(str);
}
static bool strset_equal_fn(const void *key1, const void *key2, void *ctx)
static bool strset_equal_fn(long key1, long key2, void *ctx)
{
const struct strset *s = ctx;
const char *str1 = s->strs_data + (long)key1;
const char *str2 = s->strs_data + (long)key2;
const char *str1 = s->strs_data + key1;
const char *str2 = s->strs_data + key2;
return strcmp(str1, str2) == 0;
}
@@ -67,7 +67,7 @@ struct strset *strset__new(size_t max_data_sz, const char *init_data, size_t ini
/* hashmap__add() returns EEXIST if string with the same
* content already is in the hash map
*/
err = hashmap__add(hash, (void *)off, (void *)off);
err = hashmap__add(hash, off, off);
if (err == -EEXIST)
continue; /* duplicate */
if (err)
@@ -127,7 +127,7 @@ int strset__find_str(struct strset *set, const char *s)
new_off = set->strs_data_len;
memcpy(p, s, len);
if (hashmap__find(set->strs_hash, (void *)new_off, (void **)&old_off))
if (hashmap__find(set->strs_hash, new_off, &old_off))
return old_off;
return -ENOENT;
@@ -165,8 +165,8 @@ int strset__add_str(struct strset *set, const char *s)
* contents doesn't exist already (HASHMAP_ADD strategy). If such
* string exists, we'll get its offset in old_off (that's old_key).
*/
err = hashmap__insert(set->strs_hash, (void *)new_off, (void *)new_off,
HASHMAP_ADD, (const void **)&old_off, NULL);
err = hashmap__insert(set->strs_hash, new_off, new_off,
HASHMAP_ADD, &old_off, NULL);
if (err == -EEXIST)
return old_off; /* duplicated string, return existing offset */
if (err)

View File

@@ -873,31 +873,27 @@ static void bpf_link_usdt_dealloc(struct bpf_link *link)
free(usdt_link);
}
static size_t specs_hash_fn(const void *key, void *ctx)
static size_t specs_hash_fn(long key, void *ctx)
{
const char *s = key;
return str_hash(s);
return str_hash((char *)key);
}
static bool specs_equal_fn(const void *key1, const void *key2, void *ctx)
static bool specs_equal_fn(long key1, long key2, void *ctx)
{
const char *s1 = key1;
const char *s2 = key2;
return strcmp(s1, s2) == 0;
return strcmp((char *)key1, (char *)key2) == 0;
}
static int allocate_spec_id(struct usdt_manager *man, struct hashmap *specs_hash,
struct bpf_link_usdt *link, struct usdt_target *target,
int *spec_id, bool *is_new)
{
void *tmp;
long tmp;
void *new_ids;
int err;
/* check if we already allocated spec ID for this spec string */
if (hashmap__find(specs_hash, target->spec_str, &tmp)) {
*spec_id = (long)tmp;
*spec_id = tmp;
*is_new = false;
return 0;
}
@@ -905,17 +901,17 @@ static int allocate_spec_id(struct usdt_manager *man, struct hashmap *specs_hash
/* otherwise it's a new ID that needs to be set up in specs map and
* returned back to usdt_manager when USDT link is detached
*/
tmp = libbpf_reallocarray(link->spec_ids, link->spec_cnt + 1, sizeof(*link->spec_ids));
if (!tmp)
new_ids = libbpf_reallocarray(link->spec_ids, link->spec_cnt + 1, sizeof(*link->spec_ids));
if (!new_ids)
return -ENOMEM;
link->spec_ids = tmp;
link->spec_ids = new_ids;
/* get next free spec ID, giving preference to free list, if not empty */
if (man->free_spec_cnt) {
*spec_id = man->free_spec_ids[man->free_spec_cnt - 1];
/* cache spec ID for current spec string for future lookups */
err = hashmap__add(specs_hash, target->spec_str, (void *)(long)*spec_id);
err = hashmap__add(specs_hash, target->spec_str, *spec_id);
if (err)
return err;
@@ -928,7 +924,7 @@ static int allocate_spec_id(struct usdt_manager *man, struct hashmap *specs_hash
*spec_id = man->next_free_spec_id;
/* cache spec ID for current spec string for future lookups */
err = hashmap__add(specs_hash, target->spec_str, (void *)(long)*spec_id);
err = hashmap__add(specs_hash, target->spec_str, *spec_id);
if (err)
return err;
@@ -1225,26 +1221,32 @@ static int calc_pt_regs_off(const char *reg_name)
static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg)
{
char *reg_name = NULL;
char reg_name[16];
int arg_sz, len, reg_off;
long off;
if (sscanf(arg_str, " %d @ %ld ( %%%m[^)] ) %n", &arg_sz, &off, &reg_name, &len) == 3) {
if (sscanf(arg_str, " %d @ %ld ( %%%15[^)] ) %n", &arg_sz, &off, reg_name, &len) == 3) {
/* Memory dereference case, e.g., -4@-20(%rbp) */
arg->arg_type = USDT_ARG_REG_DEREF;
arg->val_off = off;
reg_off = calc_pt_regs_off(reg_name);
free(reg_name);
if (reg_off < 0)
return reg_off;
arg->reg_off = reg_off;
} else if (sscanf(arg_str, " %d @ %%%ms %n", &arg_sz, &reg_name, &len) == 2) {
} else if (sscanf(arg_str, " %d @ ( %%%15[^)] ) %n", &arg_sz, reg_name, &len) == 2) {
/* Memory dereference case without offset, e.g., 8@(%rsp) */
arg->arg_type = USDT_ARG_REG_DEREF;
arg->val_off = 0;
reg_off = calc_pt_regs_off(reg_name);
if (reg_off < 0)
return reg_off;
arg->reg_off = reg_off;
} else if (sscanf(arg_str, " %d @ %%%15s %n", &arg_sz, reg_name, &len) == 2) {
/* Register read case, e.g., -4@%eax */
arg->arg_type = USDT_ARG_REG;
arg->val_off = 0;
reg_off = calc_pt_regs_off(reg_name);
free(reg_name);
if (reg_off < 0)
return reg_off;
arg->reg_off = reg_off;
@@ -1348,25 +1350,23 @@ static int calc_pt_regs_off(const char *reg_name)
static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg)
{
char *reg_name = NULL;
char reg_name[16];
int arg_sz, len, reg_off;
long off;
if (sscanf(arg_str, " %d @ \[ %m[a-z0-9], %ld ] %n", &arg_sz, &reg_name, &off, &len) == 3) {
if (sscanf(arg_str, " %d @ \[ %15[a-z0-9], %ld ] %n", &arg_sz, reg_name, &off, &len) == 3) {
/* Memory dereference case, e.g., -4@[sp, 96] */
arg->arg_type = USDT_ARG_REG_DEREF;
arg->val_off = off;
reg_off = calc_pt_regs_off(reg_name);
free(reg_name);
if (reg_off < 0)
return reg_off;
arg->reg_off = reg_off;
} else if (sscanf(arg_str, " %d @ \[ %m[a-z0-9] ] %n", &arg_sz, &reg_name, &len) == 2) {
} else if (sscanf(arg_str, " %d @ \[ %15[a-z0-9] ] %n", &arg_sz, reg_name, &len) == 2) {
/* Memory dereference case, e.g., -4@[sp] */
arg->arg_type = USDT_ARG_REG_DEREF;
arg->val_off = 0;
reg_off = calc_pt_regs_off(reg_name);
free(reg_name);
if (reg_off < 0)
return reg_off;
arg->reg_off = reg_off;
@@ -1375,12 +1375,11 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec
arg->arg_type = USDT_ARG_CONST;
arg->val_off = off;
arg->reg_off = 0;
} else if (sscanf(arg_str, " %d @ %m[a-z0-9] %n", &arg_sz, &reg_name, &len) == 2) {
} else if (sscanf(arg_str, " %d @ %15[a-z0-9] %n", &arg_sz, reg_name, &len) == 2) {
/* Register read case, e.g., -8@x4 */
arg->arg_type = USDT_ARG_REG;
arg->val_off = 0;
reg_off = calc_pt_regs_off(reg_name);
free(reg_name);
if (reg_off < 0)
return reg_off;
arg->reg_off = reg_off;
@@ -1459,16 +1458,15 @@ static int calc_pt_regs_off(const char *reg_name)
static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg)
{
char *reg_name = NULL;
char reg_name[16];
int arg_sz, len, reg_off;
long off;
if (sscanf(arg_str, " %d @ %ld ( %m[a-z0-9] ) %n", &arg_sz, &off, &reg_name, &len) == 3) {
if (sscanf(arg_str, " %d @ %ld ( %15[a-z0-9] ) %n", &arg_sz, &off, reg_name, &len) == 3) {
/* Memory dereference case, e.g., -8@-88(s0) */
arg->arg_type = USDT_ARG_REG_DEREF;
arg->val_off = off;
reg_off = calc_pt_regs_off(reg_name);
free(reg_name);
if (reg_off < 0)
return reg_off;
arg->reg_off = reg_off;
@@ -1477,12 +1475,11 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec
arg->arg_type = USDT_ARG_CONST;
arg->val_off = off;
arg->reg_off = 0;
} else if (sscanf(arg_str, " %d @ %m[a-z0-9] %n", &arg_sz, &reg_name, &len) == 2) {
} else if (sscanf(arg_str, " %d @ %15[a-z0-9] %n", &arg_sz, reg_name, &len) == 2) {
/* Register read case, e.g., -8@a1 */
arg->arg_type = USDT_ARG_REG;
arg->val_off = 0;
reg_off = calc_pt_regs_off(reg_name);
free(reg_name);
if (reg_off < 0)
return reg_off;
arg->reg_off = reg_off;

View File

@@ -1 +0,0 @@
ci