From c92a5d043ea513dbecde23bbb8109586990a6106 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 30 Aug 2021 11:43:50 -0700 Subject: [PATCH 01/72] ci: don't hang on kernel crashes in qemu Backport of kernel-patches PR ([0]), done by @thefallentree. [0] https://github.com/kernel-patches/vmtest/pull/29 Signed-off-by: Andrii Nakryiko --- travis-ci/vmtest/run.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/travis-ci/vmtest/run.sh b/travis-ci/vmtest/run.sh index 5309e66..f09d81d 100755 --- a/travis-ci/vmtest/run.sh +++ b/travis-ci/vmtest/run.sh @@ -459,10 +459,10 @@ if kvm-ok ; then else accel="-cpu qemu64 -machine accel=tcg" fi -qemu-system-x86_64 -nodefaults -display none -serial mon:stdio \ +qemu-system-x86_64 -nodefaults -display none -serial mon:stdio -no-reboot \ ${accel} -smp "$(nproc)" -m 4G \ -drive file="$IMG",format=raw,index=1,media=disk,if=virtio,cache=none \ - -kernel "$vmlinuz" -append "root=/dev/vda rw console=ttyS0,115200$APPEND" + -kernel "$vmlinuz" -append "root=/dev/vda rw console=ttyS0,115200 kernel.panic=-1 $APPEND" sudo mount -o loop "$IMG" "$mnt" if exitstatus="$(cat "$mnt/exitstatus" 2>/dev/null)"; then printf '\nTests exit status: %s\n' "$exitstatus" >&2 From ed529685dbc5282ea320710d383a2be7cfe48397 Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Thu, 19 Aug 2021 02:24:20 -0700 Subject: [PATCH 02/72] bpf: Migrate cgroup_bpf to internal cgroup_bpf_attach_type enum Add an enum (cgroup_bpf_attach_type) containing only valid cgroup_bpf attach types and a function to map bpf_attach_type values to the new enum. Inspired by netns_bpf_attach_type. Then, migrate cgroup_bpf to use cgroup_bpf_attach_type wherever possible. Functionality is unchanged as attach_type_to_prog_type switches in bpf/syscall.c were preventing non-cgroup programs from making use of the invalid cgroup_bpf array slots. As a result struct cgroup_bpf uses 504 fewer bytes relative to when its arrays were sized using MAX_BPF_ATTACH_TYPE. bpf_cgroup_storage is notably not migrated as struct bpf_cgroup_storage_key is part of uapi and contains a bpf_attach_type member which is not meant to be opaque. Similarly, bpf_cgroup_link continues to report its bpf_attach_type member to userspace via fdinfo and bpf_link_info. To ease disambiguation, bpf_attach_type variables are renamed from 'type' to 'atype' when changed to cgroup_bpf_attach_type. Signed-off-by: Dave Marchevsky Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210819092420.1984861-2-davemarchevsky@fb.com --- include/uapi/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c4f7892..191f0b2 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -84,7 +84,7 @@ struct bpf_lpm_trie_key { struct bpf_cgroup_storage_key { __u64 cgroup_inode_id; /* cgroup inode id */ - __u32 attach_type; /* program attach type */ + __u32 attach_type; /* program attach type (enum bpf_attach_type) */ }; union bpf_iter_link_info { From 1670e6100b849a16283a71f931806112a92c2dce Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Mon, 23 Aug 2021 19:43:49 -0700 Subject: [PATCH 03/72] bpf: Add bpf_task_pt_regs() helper The motivation behind this helper is to access userspace pt_regs in a kprobe handler. uprobe's ctx is the userspace pt_regs. kprobe's ctx is the kernelspace pt_regs. bpf_task_pt_regs() allows accessing userspace pt_regs in a kprobe handler. The final case (kernelspace pt_regs in uprobe) is pretty rare (usermode helper) so I think that can be solved later if necessary. More concretely, this helper is useful in doing BPF-based DWARF stack unwinding. Currently the kernel can only do framepointer based stack unwinds for userspace code. This is because the DWARF state machines are too fragile to be computed in kernelspace [0]. The idea behind DWARF-based stack unwinds w/ BPF is to copy a chunk of the userspace stack (while in prog context) and send it up to userspace for unwinding (probably with libunwind) [1]. This would effectively enable profiling applications with -fomit-frame-pointer using kprobes and uprobes. [0]: https://lkml.org/lkml/2012/2/10/356 [1]: https://github.com/danobi/bpf-dwarf-walk Signed-off-by: Daniel Xu Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/e2718ced2d51ef4268590ab8562962438ab82815.1629772842.git.dxu@dxuuu.xyz --- include/uapi/linux/bpf.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 191f0b2..791f31d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4871,6 +4871,12 @@ union bpf_attr { * Return * Value specified by user at BPF link creation/attachment time * or 0, if it was not specified. + * + * long bpf_task_pt_regs(struct task_struct *task) + * Description + * Get the struct pt_regs associated with **task**. + * Return + * A pointer to struct pt_regs. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5048,6 +5054,7 @@ union bpf_attr { FN(timer_cancel), \ FN(get_func_ip), \ FN(get_attach_cookie), \ + FN(task_pt_regs), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper From 517762deca22624e9d27594e6868d0f7f8859cfa Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 30 Aug 2021 12:43:56 -0700 Subject: [PATCH 04/72] sync: auto-generate latest BPF helpers Latest changes to BPF helper definitions. --- src/bpf_helper_defs.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/bpf_helper_defs.h b/src/bpf_helper_defs.h index 3e44468..00929fa 100644 --- a/src/bpf_helper_defs.h +++ b/src/bpf_helper_defs.h @@ -4033,4 +4033,14 @@ static __u64 (*bpf_get_func_ip)(void *ctx) = (void *) 173; */ static __u64 (*bpf_get_attach_cookie)(void *ctx) = (void *) 174; +/* + * bpf_task_pt_regs + * + * Get the struct pt_regs associated with **task**. + * + * Returns + * A pointer to struct pt_regs. + */ +static long (*bpf_task_pt_regs)(struct task_struct *task) = (void *) 175; + From d6e9681b0d633a23efbad76e5ab9c9833277a229 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 30 Aug 2021 12:43:56 -0700 Subject: [PATCH 05/72] sync: latest libbpf changes from kernel Syncing latest libbpf commits from kernel repository. Baseline bpf-next commit: d20b41115ad53293201cc07ee429a38740cb056b Checkpoint bpf-next commit: 47bb27a20d6ea22cd092c1fc2bb4fcecac374838 Baseline bpf commit: 3776f3517ed94d40ff0e3851d7ce2ce17b63099f Checkpoint bpf commit: 3776f3517ed94d40ff0e3851d7ce2ce17b63099f Daniel Xu (1): bpf: Add bpf_task_pt_regs() helper Dave Marchevsky (1): bpf: Migrate cgroup_bpf to internal cgroup_bpf_attach_type enum include/uapi/linux/bpf.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) -- 2.30.2 --- CHECKPOINT-COMMIT | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHECKPOINT-COMMIT b/CHECKPOINT-COMMIT index 79178c1..765d989 100644 --- a/CHECKPOINT-COMMIT +++ b/CHECKPOINT-COMMIT @@ -1 +1 @@ -d20b41115ad53293201cc07ee429a38740cb056b +47bb27a20d6ea22cd092c1fc2bb4fcecac374838 From 72fd44da537aa7c5819cf15ea6ddc15d641cc1d6 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 30 Aug 2021 13:21:29 -0700 Subject: [PATCH 06/72] ci: blacklist few new selftests for 5.5 Blacklist netns_cookie, sockopt_qos_to_cc, task_pt_regs for 5.5. Signed-off-by: Andrii Nakryiko --- travis-ci/vmtest/configs/blacklist/BLACKLIST-5.5.0 | 3 +++ 1 file changed, 3 insertions(+) diff --git a/travis-ci/vmtest/configs/blacklist/BLACKLIST-5.5.0 b/travis-ci/vmtest/configs/blacklist/BLACKLIST-5.5.0 index cdd3dc3..b3737e3 100644 --- a/travis-ci/vmtest/configs/blacklist/BLACKLIST-5.5.0 +++ b/travis-ci/vmtest/configs/blacklist/BLACKLIST-5.5.0 @@ -45,6 +45,7 @@ mmap # 5.5 kernel is too permissive with re-mmaping modify_return # fmod_ret support is missing module_attach # module BTF support missing (v5.11+) netcnt +netns_cookie # v5.15+ ns_current_pid_tgid # bpf_get_ns_current_pid_tgid() helper is missing pe_preserve_elems # v5.10+ perf_branches # bpf_read_branch_records() helper is missing @@ -74,10 +75,12 @@ socket_cookie # v5.12+ sockmap_basic # uses new socket fields, 5.8+ sockmap_listen # no listen socket supportin SOCKMAP sockopt_sk +sockopt_qos_to_cc # v5.15+ stacktrace_build_id # v5.9+ stack_var_off # v5.12+ syscall # v5.14+ task_local_storage # v5.12+ +task_pt_regs # v5.15+ tcp_hdr_options # v5.10+, new TCP header options feature in BPF tcpbpf_user # LINK_CREATE is missing tc_redirect # v5.14+ From 50e13993a1e98174fbe0f0b06c1de92f18d593c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Wed, 1 Sep 2021 13:48:12 +0200 Subject: [PATCH 07/72] libbpf: Don't crash on object files with no symbol tables MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If libbpf encounters an ELF file that has been stripped of its symbol table, it will crash in bpf_object__add_programs() when trying to dereference the obj->efile.symbols pointer. Fix this by erroring out of bpf_object__elf_collect() if it is not able able to find the symbol table. v2: - Move check into bpf_object__elf_collect() and add nice error message Fixes: 6245947c1b3c ("libbpf: Allow gaps in BPF program sections to support overriden weak functions") Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210901114812.204720-1-toke@redhat.com --- src/libbpf.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/libbpf.c b/src/libbpf.c index 88d8825..8f579c6 100644 --- a/src/libbpf.c +++ b/src/libbpf.c @@ -2993,6 +2993,12 @@ static int bpf_object__elf_collect(struct bpf_object *obj) } } + if (!obj->efile.symbols) { + pr_warn("elf: couldn't find symbol table in %s, stripped object file?\n", + obj->path); + return -ENOENT; + } + scn = NULL; while ((scn = elf_nextscn(elf, scn)) != NULL) { idx++; From 860b201cd0a9ca35538d6d66f7d7e04bb6b1565a Mon Sep 17 00:00:00 2001 From: Matt Smith Date: Wed, 1 Sep 2021 12:44:37 -0700 Subject: [PATCH 08/72] libbpf: Change bpf_object_skeleton data field to const pointer This change was necessary to enforce the implied contract that bpf_object_skeleton->data should not be mutated. The data will be cast to `void *` during assignment to handle the case where a user is compiling with older libbpf headers to avoid a compiler warning of `const void *` data being cast to `void *` Signed-off-by: Matt Smith Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210901194439.3853238-2-alastorze@fb.com --- src/libbpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libbpf.h b/src/libbpf.h index f177d89..2f6f0e1 100644 --- a/src/libbpf.h +++ b/src/libbpf.h @@ -854,7 +854,7 @@ struct bpf_object_skeleton { size_t sz; /* size of this struct, for forward/backward compatibility */ const char *name; - void *data; + const void *data; size_t data_sz; struct bpf_object **obj; From 5579664205e42194e1921d69d0839f660c801a4d Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 7 Sep 2021 15:10:23 -0700 Subject: [PATCH 09/72] libbpf: Fix build with latest gcc/binutils with LTO After updating to binutils 2.35, the build began to fail with an assembler error. A bug was opened on the Red Hat Bugzilla a few days later for the same issue. Work around the problem by using the new `symver` attribute (introduced in GCC 10) as needed instead of assembler directives. This addresses Red Hat ([0]) and OpenSUSE ([1]) bug reports, as well as libbpf issue ([2]). [0]: https://bugzilla.redhat.com/show_bug.cgi?id=1863059 [1]: https://bugzilla.opensuse.org/show_bug.cgi?id=1188749 [2]: Closes: https://github.com/libbpf/libbpf/issues/338 Co-developed-by: Patrick McCarty Co-developed-by: Michal Suchanek Signed-off-by: Patrick McCarty Signed-off-by: Michal Suchanek Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210907221023.2660953-1-andrii@kernel.org --- src/libbpf_internal.h | 25 +++++++++++++++++++------ src/xsk.c | 4 ++-- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/src/libbpf_internal.h b/src/libbpf_internal.h index 533b021..4f6ff5c 100644 --- a/src/libbpf_internal.h +++ b/src/libbpf_internal.h @@ -90,17 +90,30 @@ /* Symbol versioning is different between static and shared library. * Properly versioned symbols are needed for shared library, but * only the symbol of the new version is needed for static library. + * Starting with GNU C 10, use symver attribute instead of .symver assembler + * directive, which works better with GCC LTO builds. */ -#ifdef SHARED -# define COMPAT_VERSION(internal_name, api_name, version) \ +#if defined(SHARED) && defined(__GNUC__) && __GNUC__ >= 10 + +#define DEFAULT_VERSION(internal_name, api_name, version) \ + __attribute__((symver(#api_name "@@" #version))) +#define COMPAT_VERSION(internal_name, api_name, version) \ + __attribute__((symver(#api_name "@" #version))) + +#elif defined(SHARED) + +#define COMPAT_VERSION(internal_name, api_name, version) \ asm(".symver " #internal_name "," #api_name "@" #version); -# define DEFAULT_VERSION(internal_name, api_name, version) \ +#define DEFAULT_VERSION(internal_name, api_name, version) \ asm(".symver " #internal_name "," #api_name "@@" #version); -#else -# define COMPAT_VERSION(internal_name, api_name, version) -# define DEFAULT_VERSION(internal_name, api_name, version) \ + +#else /* !SHARED */ + +#define COMPAT_VERSION(internal_name, api_name, version) +#define DEFAULT_VERSION(internal_name, api_name, version) \ extern typeof(internal_name) api_name \ __attribute__((alias(#internal_name))); + #endif extern void libbpf_print(enum libbpf_print_level level, diff --git a/src/xsk.c b/src/xsk.c index e9b619a..a211169 100644 --- a/src/xsk.c +++ b/src/xsk.c @@ -281,6 +281,7 @@ out_mmap: return err; } +DEFAULT_VERSION(xsk_umem__create_v0_0_4, xsk_umem__create, LIBBPF_0.0.4) int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area, __u64 size, struct xsk_ring_prod *fill, struct xsk_ring_cons *comp, @@ -345,6 +346,7 @@ struct xsk_umem_config_v1 { __u32 frame_headroom; }; +COMPAT_VERSION(xsk_umem__create_v0_0_2, xsk_umem__create, LIBBPF_0.0.2) int xsk_umem__create_v0_0_2(struct xsk_umem **umem_ptr, void *umem_area, __u64 size, struct xsk_ring_prod *fill, struct xsk_ring_cons *comp, @@ -358,8 +360,6 @@ int xsk_umem__create_v0_0_2(struct xsk_umem **umem_ptr, void *umem_area, return xsk_umem__create_v0_0_4(umem_ptr, umem_area, size, fill, comp, &config); } -COMPAT_VERSION(xsk_umem__create_v0_0_2, xsk_umem__create, LIBBPF_0.0.2) -DEFAULT_VERSION(xsk_umem__create_v0_0_4, xsk_umem__create, LIBBPF_0.0.4) static enum xsk_prog get_xsk_prog(void) { From 7c26fe30f350f73b84fef4dc1b3722f130bc810c Mon Sep 17 00:00:00 2001 From: grantseltzer Date: Thu, 16 Sep 2021 14:35:37 -0400 Subject: [PATCH 10/72] Add enum to be displayed in documentation Signed-off-by: grantseltzer --- docs/api.rst | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/api.rst b/docs/api.rst index 781972d..4ad9f34 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -12,40 +12,40 @@ libbpf.h -------- .. doxygenfile:: libbpf.h :project: libbpf - :sections: func define public-type + :sections: func define public-type enum bpf.h ----- .. doxygenfile:: bpf.h :project: libbpf - :sections: func define public-type + :sections: func define public-type enum btf.h ----- .. doxygenfile:: btf.h :project: libbpf - :sections: func define public-type + :sections: func define public-type enum xsk.h ----- .. doxygenfile:: xsk.h :project: libbpf - :sections: func define public-type + :sections: func define public-type enum bpf_tracing.h ------------- .. doxygenfile:: bpf_tracing.h :project: libbpf - :sections: func define public-type + :sections: func define public-type enum bpf_core_read.h --------------- .. doxygenfile:: bpf_core_read.h :project: libbpf - :sections: func define public-type + :sections: func define public-type enum bpf_endian.h ------------ .. doxygenfile:: bpf_endian.h :project: libbpf - :sections: func define public-type + :sections: func define public-type enum From eda1ebe52057bf0bc758576d5ac97accfb8bdf93 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 15 Sep 2021 13:05:16 -0700 Subject: [PATCH 11/72] ci: use test_progs -l option to generate the list of whitelisted tests Use this list of enabled tests as a whitelist, so that we don't have to keep updating BLACKLIST-5.5.0 anymore. I'll keep BLACKLIST-5.5.0 for now, because it serves as a nice historic log of which tests depend on which kernels. Signed-off-by: Andrii Nakryiko --- .../vmtest/configs/whitelist/WHITELIST-5.5.0 | 55 +++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 travis-ci/vmtest/configs/whitelist/WHITELIST-5.5.0 diff --git a/travis-ci/vmtest/configs/whitelist/WHITELIST-5.5.0 b/travis-ci/vmtest/configs/whitelist/WHITELIST-5.5.0 new file mode 100644 index 0000000..4c22cd8 --- /dev/null +++ b/travis-ci/vmtest/configs/whitelist/WHITELIST-5.5.0 @@ -0,0 +1,55 @@ +attach_probe +autoload +bpf_verif_scale +cgroup_attach_autodetach +cgroup_attach_override +core_autosize +core_extern +core_read_macros +core_reloc +core_retro +cpu_mask +endian +fexit_stress +get_branch_snapshot +get_stackid_cannot_attach +global_data +global_data_init +global_func_args +hashmap +l4lb_all +linked_funcs +linked_maps +map_lock +obj_name +perf_buffer +perf_event_stackmap +pinning +pkt_md_access +probe_user +queue_stack_map +raw_tp_writable_reject_nbd_invalid +raw_tp_writable_test_run +rdonly_maps +section_names +signal_pending +skeleton +sockmap_ktls +sockopt +sockopt_inherit +sockopt_multi +spinlock +stacktrace_map +stacktrace_map_raw_tp +static_linked +subprogs +task_fd_query_rawtp +task_fd_query_tp +tc_bpf +tcp_estats +tcp_rtt +tp_attach_query +xdp +xdp_info +xdp_noinline +xdp_perf From 05d95ef6faf92de12333955ccbe1b36d94278053 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 15 Sep 2021 13:30:16 -0700 Subject: [PATCH 12/72] makefile: add libbpf_version.h to the list of installed headers libbpf_version.h is a new header that is installed along other public libbpf API headers. Signed-off-by: Andrii Nakryiko --- src/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Makefile b/src/Makefile index 913082d..61b8190 100644 --- a/src/Makefile +++ b/src/Makefile @@ -51,7 +51,7 @@ endif HEADERS := bpf.h libbpf.h btf.h libbpf_common.h libbpf_legacy.h xsk.h \ bpf_helpers.h bpf_helper_defs.h bpf_tracing.h \ - bpf_endian.h bpf_core_read.h skel_internal.h + bpf_endian.h bpf_core_read.h skel_internal.h libbpf_version.h UAPI_HEADERS := $(addprefix $(TOPDIR)/include/uapi/linux/,\ bpf.h bpf_common.h btf.h) From 514bb47ac519a434d595faf0db43c6298c66c48d Mon Sep 17 00:00:00 2001 From: Rocco Yue Date: Fri, 27 Aug 2021 23:04:12 +0800 Subject: [PATCH 13/72] ipv6: add IFLA_INET6_RA_MTU to expose mtu value The kernel provides a "/proc/sys/net/ipv6/conf//mtu" file, which can temporarily record the mtu value of the last received RA message when the RA mtu value is lower than the interface mtu, but this proc has following limitations: (1) when the interface mtu (/sys/class/net//mtu) is updeated, mtu6 (/proc/sys/net/ipv6/conf//mtu) will be updated to the value of interface mtu; (2) mtu6 (/proc/sys/net/ipv6/conf//mtu) only affect ipv6 connection, and not affect ipv4. Therefore, when the mtu option is carried in the RA message, there will be a problem that the user sometimes cannot obtain RA mtu value correctly by reading mtu6. After this patch set, if a RA message carries the mtu option, you can send a netlink msg which nlmsg_type is RTM_GETLINK, and then by parsing the attribute of IFLA_INET6_RA_MTU to get the mtu value carried in the RA message received on the inet6 device. In addition, you can also get a link notification when ra_mtu is updated so it doesn't have to poll. In this way, if the MTU values that the device receives from the network in the PCO IPv4 and the RA IPv6 procedures are different, the user can obtain the correct ipv6 ra_mtu value and compare the value of ra_mtu and ipv4 mtu, then the device can use the lower MTU value for both IPv4 and IPv6. Signed-off-by: Rocco Yue Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20210827150412.9267-1-rocco.yue@mediatek.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index eb15f31..b3610fd 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -230,6 +230,7 @@ enum { IFLA_INET6_ICMP6STATS, /* statistics (icmpv6) */ IFLA_INET6_TOKEN, /* device token */ IFLA_INET6_ADDR_GEN_MODE, /* implicit address generator mode */ + IFLA_INET6_RA_MTU, /* mtu carried in the RA message */ __IFLA_INET6_MAX }; From f8983f7fb04745abe513fd2dad57157497a1221f Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Wed, 8 Sep 2021 14:32:26 -0700 Subject: [PATCH 14/72] libbpf: Add LIBBPF_DEPRECATED_SINCE macro for scheduling API deprecations Introduce a macro LIBBPF_DEPRECATED_SINCE(major, minor, message) to prepare the deprecation of two API functions. This macro marks functions as deprecated when libbpf's version reaches the values passed as an argument. As part of this change libbpf_version.h header is added with recorded major (LIBBPF_MAJOR_VERSION) and minor (LIBBPF_MINOR_VERSION) libbpf version macros. They are now part of libbpf public API and can be relied upon by user code. libbpf_version.h is installed system-wide along other libbpf public headers. Due to this new build-time auto-generated header, in-kernel applications relying on libbpf (resolve_btfids, bpftool, bpf_preload) are updated to include libbpf's output directory as part of a list of include search paths. Better fix would be to use libbpf's make_install target to install public API headers, but that clean up is left out as a future improvement. The build changes were tested by building kernel (with KBUILD_OUTPUT and O= specified explicitly), bpftool, libbpf, selftests/bpf, and resolve_btfids builds. No problems were detected. Note that because of the constraints of the C preprocessor we have to write a few lines of macro magic for each version used to prepare deprecation (0.6 for now). Also, use LIBBPF_DEPRECATED_SINCE() to schedule deprecation of btf__get_from_id() and btf__load(), which are replaced by btf__load_from_kernel_by_id() and btf__load_into_kernel(), respectively, starting from future libbpf v0.6. This is part of libbpf 1.0 effort ([0]). [0] Closes: https://github.com/libbpf/libbpf/issues/278 Co-developed-by: Quentin Monnet Co-developed-by: Andrii Nakryiko Signed-off-by: Quentin Monnet Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210908213226.1871016-1-andrii@kernel.org --- src/btf.h | 2 ++ src/libbpf_common.h | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/src/btf.h b/src/btf.h index 4a711f9..f2e2fab 100644 --- a/src/btf.h +++ b/src/btf.h @@ -50,9 +50,11 @@ LIBBPF_API struct btf *libbpf_find_kernel_btf(void); LIBBPF_API struct btf *btf__load_from_kernel_by_id(__u32 id); LIBBPF_API struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf); +LIBBPF_DEPRECATED_SINCE(0, 6, "use btf__load_from_kernel_by_id instead") LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf); LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf); +LIBBPF_DEPRECATED_SINCE(0, 6, "use btf__load_into_kernel instead") LIBBPF_API int btf__load(struct btf *btf); LIBBPF_API int btf__load_into_kernel(struct btf *btf); LIBBPF_API __s32 btf__find_by_name(const struct btf *btf, diff --git a/src/libbpf_common.h b/src/libbpf_common.h index 947d8bd..36ac77f 100644 --- a/src/libbpf_common.h +++ b/src/libbpf_common.h @@ -10,6 +10,7 @@ #define __LIBBPF_LIBBPF_COMMON_H #include +#include "libbpf_version.h" #ifndef LIBBPF_API #define LIBBPF_API __attribute__((visibility("default"))) @@ -17,6 +18,24 @@ #define LIBBPF_DEPRECATED(msg) __attribute__((deprecated(msg))) +/* Mark a symbol as deprecated when libbpf version is >= {major}.{minor} */ +#define LIBBPF_DEPRECATED_SINCE(major, minor, msg) \ + __LIBBPF_MARK_DEPRECATED_ ## major ## _ ## minor \ + (LIBBPF_DEPRECATED("libbpf v" # major "." # minor "+: " msg)) + +#define __LIBBPF_CURRENT_VERSION_GEQ(major, minor) \ + (LIBBPF_MAJOR_VERSION > (major) || \ + (LIBBPF_MAJOR_VERSION == (major) && LIBBPF_MINOR_VERSION >= (minor))) + +/* Add checks for other versions below when planning deprecation of API symbols + * with the LIBBPF_DEPRECATED_SINCE macro. + */ +#if __LIBBPF_CURRENT_VERSION_GEQ(0, 6) +#define __LIBBPF_MARK_DEPRECATED_0_6(X) X +#else +#define __LIBBPF_MARK_DEPRECATED_0_6(X) +#endif + /* Helper macro to declare and initialize libbpf options struct * * This dance with uninitialized declaration, followed by memset to zero, From 03f31a6aed6c483583ec430fedc407a0464093a0 Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Fri, 10 Sep 2021 01:04:08 +0300 Subject: [PATCH 15/72] bpf: Add hardware timestamp field to __sk_buff BPF programs may want to know hardware timestamps if NIC supports such timestamping. Expose this data as hwtstamp field of __sk_buff the same way as gso_segs/gso_size. This field could be accessed from the same programs as tstamp field, but it's read-only field. Explicit test to deny access to padding data is added to bpf_skb_is_valid_access. Also update BPF_PROG_TEST_RUN tests of the feature. Signed-off-by: Vadim Fedorenko Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20210909220409.8804-2-vfedorenko@novek.ru --- include/uapi/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 791f31d..51cfd91 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5284,6 +5284,8 @@ struct __sk_buff { __u32 gso_segs; __bpf_md_ptr(struct bpf_sock *, sk); __u32 gso_size; + __u32 :32; /* Padding, future use. */ + __u64 hwtstamp; }; struct bpf_tunnel_key { From 0c0f4a57daff2f3076249c9625c6ddbafa05d3a9 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Fri, 10 Sep 2021 11:33:51 -0700 Subject: [PATCH 16/72] bpf: Introduce helper bpf_get_branch_snapshot Introduce bpf_get_branch_snapshot(), which allows tracing pogram to get branch trace from hardware (e.g. Intel LBR). To use the feature, the user need to create perf_event with proper branch_record filtering on each cpu, and then calls bpf_get_branch_snapshot in the bpf function. On Intel CPUs, VLBR event (raw event 0x1b00) can be use for this. Signed-off-by: Song Liu Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210910183352.3151445-3-songliubraving@fb.com --- include/uapi/linux/bpf.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 51cfd91..d213265 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4877,6 +4877,27 @@ union bpf_attr { * Get the struct pt_regs associated with **task**. * Return * A pointer to struct pt_regs. + * + * long bpf_get_branch_snapshot(void *entries, u32 size, u64 flags) + * Description + * Get branch trace from hardware engines like Intel LBR. The + * hardware engine is stopped shortly after the helper is + * called. Therefore, the user need to filter branch entries + * based on the actual use case. To capture branch trace + * before the trigger point of the BPF program, the helper + * should be called at the beginning of the BPF program. + * + * The data is stored as struct perf_branch_entry into output + * buffer *entries*. *size* is the size of *entries* in bytes. + * *flags* is reserved for now and must be zero. + * + * Return + * On success, number of bytes written to *buf*. On error, a + * negative value. + * + * **-EINVAL** if *flags* is not zero. + * + * **-ENOENT** if architecture does not support branch records. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5055,6 +5076,7 @@ union bpf_attr { FN(get_func_ip), \ FN(get_attach_cookie), \ FN(task_pt_regs), \ + FN(get_branch_snapshot), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper From 8ade99a6f84d0d7e70965451533b8d76926ee391 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 13 Sep 2021 15:23:09 -0700 Subject: [PATCH 17/72] libbpf: Make libbpf_version.h non-auto-generated Turn previously auto-generated libbpf_version.h header into a normal header file. This prevents various tricky Makefile integration issues, simplifies the overall build process, but also allows to further extend it with some more versioning-related APIs in the future. To prevent accidental out-of-sync versions as defined by libbpf.map and libbpf_version.h, Makefile checks their consistency at build time. Simultaneously with this change bump libbpf.map to v0.6. Also undo adding libbpf's output directory into include path for kernel/bpf/preload, bpftool, and resolve_btfids, which is not necessary because libbpf_version.h is just a normal header like any other. Fixes: 0b46b7550560 ("libbpf: Add LIBBPF_DEPRECATED_SINCE macro for scheduling API deprecations") Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210913222309.3220849-1-andrii@kernel.org --- src/libbpf.map | 3 +++ src/libbpf_version.h | 9 +++++++++ 2 files changed, 12 insertions(+) create mode 100644 src/libbpf_version.h diff --git a/src/libbpf.map b/src/libbpf.map index bbc53bb..78ea62c 100644 --- a/src/libbpf.map +++ b/src/libbpf.map @@ -386,3 +386,6 @@ LIBBPF_0.5.0 { btf_dump__dump_type_data; libbpf_set_strict_mode; } LIBBPF_0.4.0; + +LIBBPF_0.6.0 { +} LIBBPF_0.5.0; diff --git a/src/libbpf_version.h b/src/libbpf_version.h new file mode 100644 index 0000000..dd56d76 --- /dev/null +++ b/src/libbpf_version.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +/* Copyright (C) 2021 Facebook */ +#ifndef __LIBBPF_VERSION_H +#define __LIBBPF_VERSION_H + +#define LIBBPF_MAJOR_VERSION 0 +#define LIBBPF_MINOR_VERSION 6 + +#endif /* __LIBBPF_VERSION_H */ From 749b3942a09300a570198bc27814405ab92607ae Mon Sep 17 00:00:00 2001 From: Rafael David Tinoco Date: Sun, 12 Sep 2021 03:48:44 -0300 Subject: [PATCH 18/72] libbpf: Introduce legacy kprobe events support Allow kprobe tracepoint events creation through legacy interface, as the kprobe dynamic PMUs support, used by default, was only created in v4.17. Store legacy kprobe name in struct bpf_perf_link, instead of creating a new "subclass" off of bpf_perf_link. This is ok as it's just two new fields, which are also going to be reused for legacy uprobe support in follow up patches. Signed-off-by: Rafael David Tinoco Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210912064844.3181742-1-rafaeldtinoco@gmail.com --- src/libbpf.c | 128 +++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 124 insertions(+), 4 deletions(-) diff --git a/src/libbpf.c b/src/libbpf.c index 8f579c6..9d99183 100644 --- a/src/libbpf.c +++ b/src/libbpf.c @@ -8993,9 +8993,57 @@ int bpf_link__unpin(struct bpf_link *link) return 0; } +static int poke_kprobe_events(bool add, const char *name, bool retprobe, uint64_t offset) +{ + int fd, ret = 0; + pid_t p = getpid(); + char cmd[260], probename[128], probefunc[128]; + const char *file = "/sys/kernel/debug/tracing/kprobe_events"; + + if (retprobe) + snprintf(probename, sizeof(probename), "kretprobes/%s_libbpf_%u", name, p); + else + snprintf(probename, sizeof(probename), "kprobes/%s_libbpf_%u", name, p); + + if (offset) + snprintf(probefunc, sizeof(probefunc), "%s+%zu", name, (size_t)offset); + + if (add) { + snprintf(cmd, sizeof(cmd), "%c:%s %s", + retprobe ? 'r' : 'p', + probename, + offset ? probefunc : name); + } else { + snprintf(cmd, sizeof(cmd), "-:%s", probename); + } + + fd = open(file, O_WRONLY | O_APPEND, 0); + if (!fd) + return -errno; + ret = write(fd, cmd, strlen(cmd)); + if (ret < 0) + ret = -errno; + close(fd); + + return ret; +} + +static inline int add_kprobe_event_legacy(const char *name, bool retprobe, uint64_t offset) +{ + return poke_kprobe_events(true, name, retprobe, offset); +} + +static inline int remove_kprobe_event_legacy(const char *name, bool retprobe) +{ + return poke_kprobe_events(false, name, retprobe, 0); +} + struct bpf_link_perf { struct bpf_link link; int perf_event_fd; + /* legacy kprobe support: keep track of probe identifier and type */ + char *legacy_probe_name; + bool legacy_is_retprobe; }; static int bpf_link_perf_detach(struct bpf_link *link) @@ -9010,13 +9058,19 @@ static int bpf_link_perf_detach(struct bpf_link *link) close(perf_link->perf_event_fd); close(link->fd); - return libbpf_err(err); + /* legacy kprobe needs to be removed after perf event fd closure */ + if (perf_link->legacy_probe_name) + err = remove_kprobe_event_legacy(perf_link->legacy_probe_name, + perf_link->legacy_is_retprobe); + + return err; } static void bpf_link_perf_dealloc(struct bpf_link *link) { struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link); + free(perf_link->legacy_probe_name); free(perf_link); } @@ -9130,6 +9184,18 @@ static int parse_uint_from_file(const char *file, const char *fmt) return ret; } +static int determine_kprobe_perf_type_legacy(const char *func_name, bool is_retprobe) +{ + char file[192]; + + snprintf(file, sizeof(file), + "/sys/kernel/debug/tracing/events/%s/%s_libbpf_%d/id", + is_retprobe ? "kretprobes" : "kprobes", + func_name, getpid()); + + return parse_uint_from_file(file, "%d\n"); +} + static int determine_kprobe_perf_type(void) { const char *file = "/sys/bus/event_source/devices/kprobe/type"; @@ -9212,6 +9278,41 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name, return pfd; } +static int perf_event_kprobe_open_legacy(bool retprobe, const char *name, uint64_t offset, int pid) +{ + struct perf_event_attr attr = {}; + char errmsg[STRERR_BUFSIZE]; + int type, pfd, err; + + err = add_kprobe_event_legacy(name, retprobe, offset); + if (err < 0) { + pr_warn("failed to add legacy kprobe event: %s\n", + libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + return err; + } + type = determine_kprobe_perf_type_legacy(name, retprobe); + if (type < 0) { + pr_warn("failed to determine legacy kprobe event id: %s\n", + libbpf_strerror_r(type, errmsg, sizeof(errmsg))); + return type; + } + attr.size = sizeof(attr); + attr.config = type; + attr.type = PERF_TYPE_TRACEPOINT; + + pfd = syscall(__NR_perf_event_open, &attr, + pid < 0 ? -1 : pid, /* pid */ + pid == -1 ? 0 : -1, /* cpu */ + -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC); + if (pfd < 0) { + err = -errno; + pr_warn("legacy kprobe perf_event_open() failed: %s\n", + libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + return err; + } + return pfd; +} + struct bpf_link * bpf_program__attach_kprobe_opts(struct bpf_program *prog, const char *func_name, @@ -9219,9 +9320,10 @@ bpf_program__attach_kprobe_opts(struct bpf_program *prog, { DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts); char errmsg[STRERR_BUFSIZE]; + char *legacy_probe = NULL; struct bpf_link *link; unsigned long offset; - bool retprobe; + bool retprobe, legacy; int pfd, err; if (!OPTS_VALID(opts, bpf_kprobe_opts)) @@ -9231,8 +9333,19 @@ bpf_program__attach_kprobe_opts(struct bpf_program *prog, offset = OPTS_GET(opts, offset, 0); pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); - pfd = perf_event_open_probe(false /* uprobe */, retprobe, func_name, - offset, -1 /* pid */, 0 /* ref_ctr_off */); + legacy = determine_kprobe_perf_type() < 0; + if (!legacy) { + pfd = perf_event_open_probe(false /* uprobe */, retprobe, + func_name, offset, + -1 /* pid */, 0 /* ref_ctr_off */); + } else { + legacy_probe = strdup(func_name); + if (!legacy_probe) + return libbpf_err_ptr(-ENOMEM); + + pfd = perf_event_kprobe_open_legacy(retprobe, func_name, + offset, -1 /* pid */); + } if (pfd < 0) { pr_warn("prog '%s': failed to create %s '%s' perf event: %s\n", prog->name, retprobe ? "kretprobe" : "kprobe", func_name, @@ -9248,6 +9361,13 @@ bpf_program__attach_kprobe_opts(struct bpf_program *prog, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); return libbpf_err_ptr(err); } + if (legacy) { + struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link); + + perf_link->legacy_probe_name = legacy_probe; + perf_link->legacy_is_retprobe = retprobe; + } + return link; } From f3c744997f89dcd9520e58d29363af97c649f35e Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 13 Sep 2021 18:47:31 -0700 Subject: [PATCH 19/72] libbpf: Ensure BPF prog types are set before relocations Refactor bpf_object__open() sequencing to perform BPF program type detection based on SEC() definitions before we get to relocations collection. This allows to have more information about BPF program by the time we get to, say, struct_ops relocation gathering. This, subsequently, simplifies struct_ops logic and removes the need to perform extra find_sec_def() resolution. With this patch libbpf will require all struct_ops BPF programs to be marked with SEC("struct_ops") or SEC("struct_ops/xxx") annotations. Real-world applications are already doing that through something like selftests's BPF_STRUCT_OPS() macro. This change streamlines libbpf's internal handling of SEC() definitions and is in the sprit of upcoming libbpf-1.0 section strictness changes ([0]). [0] https://github.com/libbpf/libbpf/wiki/Libbpf:-the-road-to-v1.0#stricter-and-more-uniform-bpf-program-section-name-sec-handling Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20210914014733.2768-3-andrii@kernel.org --- src/libbpf.c | 93 ++++++++++++++++++++++++++++------------------------ 1 file changed, 51 insertions(+), 42 deletions(-) diff --git a/src/libbpf.c b/src/libbpf.c index 9d99183..6abfbf2 100644 --- a/src/libbpf.c +++ b/src/libbpf.c @@ -6373,12 +6373,37 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level) static const struct bpf_sec_def *find_sec_def(const char *sec_name); +static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object_open_opts *opts) +{ + struct bpf_program *prog; + + bpf_object__for_each_program(prog, obj) { + prog->sec_def = find_sec_def(prog->sec_name); + if (!prog->sec_def) { + /* couldn't guess, but user might manually specify */ + pr_debug("prog '%s': unrecognized ELF section name '%s'\n", + prog->name, prog->sec_name); + continue; + } + + if (prog->sec_def->is_sleepable) + prog->prog_flags |= BPF_F_SLEEPABLE; + bpf_program__set_type(prog, prog->sec_def->prog_type); + bpf_program__set_expected_attach_type(prog, prog->sec_def->expected_attach_type); + + if (prog->sec_def->prog_type == BPF_PROG_TYPE_TRACING || + prog->sec_def->prog_type == BPF_PROG_TYPE_EXT) + prog->attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0); + } + + return 0; +} + static struct bpf_object * __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz, const struct bpf_object_open_opts *opts) { const char *obj_name, *kconfig, *btf_tmp_path; - struct bpf_program *prog; struct bpf_object *obj; char tmp_name[64]; int err; @@ -6436,31 +6461,13 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz, err = err ? : bpf_object__collect_externs(obj); err = err ? : bpf_object__finalize_btf(obj); err = err ? : bpf_object__init_maps(obj, opts); + err = err ? : bpf_object_init_progs(obj, opts); err = err ? : bpf_object__collect_relos(obj); if (err) goto out; + bpf_object__elf_finish(obj); - bpf_object__for_each_program(prog, obj) { - prog->sec_def = find_sec_def(prog->sec_name); - if (!prog->sec_def) { - /* couldn't guess, but user might manually specify */ - pr_debug("prog '%s': unrecognized ELF section name '%s'\n", - prog->name, prog->sec_name); - continue; - } - - if (prog->sec_def->is_sleepable) - prog->prog_flags |= BPF_F_SLEEPABLE; - bpf_program__set_type(prog, prog->sec_def->prog_type); - bpf_program__set_expected_attach_type(prog, - prog->sec_def->expected_attach_type); - - if (prog->sec_def->prog_type == BPF_PROG_TYPE_TRACING || - prog->sec_def->prog_type == BPF_PROG_TYPE_EXT) - prog->attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0); - } - return obj; out: bpf_object__close(obj); @@ -8250,35 +8257,37 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, return -EINVAL; } - if (prog->type == BPF_PROG_TYPE_UNSPEC) { - const struct bpf_sec_def *sec_def; + /* prevent the use of BPF prog with invalid type */ + if (prog->type != BPF_PROG_TYPE_STRUCT_OPS) { + pr_warn("struct_ops reloc %s: prog %s is not struct_ops BPF program\n", + map->name, prog->name); + return -EINVAL; + } - sec_def = find_sec_def(prog->sec_name); - if (sec_def && - sec_def->prog_type != BPF_PROG_TYPE_STRUCT_OPS) { - /* for pr_warn */ - prog->type = sec_def->prog_type; - goto invalid_prog; - } - - prog->type = BPF_PROG_TYPE_STRUCT_OPS; + /* if we haven't yet processed this BPF program, record proper + * attach_btf_id and member_idx + */ + if (!prog->attach_btf_id) { prog->attach_btf_id = st_ops->type_id; prog->expected_attach_type = member_idx; - } else if (prog->type != BPF_PROG_TYPE_STRUCT_OPS || - prog->attach_btf_id != st_ops->type_id || - prog->expected_attach_type != member_idx) { - goto invalid_prog; } + + /* struct_ops BPF prog can be re-used between multiple + * .struct_ops as long as it's the same struct_ops struct + * definition and the same function pointer field + */ + if (prog->attach_btf_id != st_ops->type_id || + prog->expected_attach_type != member_idx) { + pr_warn("struct_ops reloc %s: cannot use prog %s in sec %s with type %u attach_btf_id %u expected_attach_type %u for func ptr %s\n", + map->name, prog->name, prog->sec_name, prog->type, + prog->attach_btf_id, prog->expected_attach_type, name); + return -EINVAL; + } + st_ops->progs[member_idx] = prog; } return 0; - -invalid_prog: - pr_warn("struct_ops reloc %s: cannot use prog %s in sec %s with type %u attach_btf_id %u expected_attach_type %u for func ptr %s\n", - map->name, prog->name, prog->sec_name, prog->type, - prog->attach_btf_id, prog->expected_attach_type, name); - return -EINVAL; } #define BTF_TRACE_PREFIX "btf_trace_" From 27d14b6e3b143634b05aeeeb752dfbdbccd6b658 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 13 Sep 2021 18:47:32 -0700 Subject: [PATCH 20/72] libbpf: Simplify BPF program auto-attach code Remove the need to explicitly pass bpf_sec_def for auto-attachable BPF programs, as it is already recorded at bpf_object__open() time for all recognized type of BPF programs. This further reduces number of explicit calls to find_sec_def(), simplifying further refactorings. No functional changes are done by this patch. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20210914014733.2768-4-andrii@kernel.org --- src/libbpf.c | 61 +++++++++++++++++++--------------------------------- 1 file changed, 22 insertions(+), 39 deletions(-) diff --git a/src/libbpf.c b/src/libbpf.c index 6abfbf2..e473fbd 100644 --- a/src/libbpf.c +++ b/src/libbpf.c @@ -218,8 +218,7 @@ struct reloc_desc { struct bpf_sec_def; -typedef struct bpf_link *(*attach_fn_t)(const struct bpf_sec_def *sec, - struct bpf_program *prog); +typedef struct bpf_link *(*attach_fn_t)(struct bpf_program *prog); struct bpf_sec_def { const char *sec; @@ -7920,18 +7919,12 @@ void bpf_program__set_expected_attach_type(struct bpf_program *prog, __VA_ARGS__ \ } -static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec, - struct bpf_program *prog); -static struct bpf_link *attach_tp(const struct bpf_sec_def *sec, - struct bpf_program *prog); -static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec, - struct bpf_program *prog); -static struct bpf_link *attach_trace(const struct bpf_sec_def *sec, - struct bpf_program *prog); -static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec, - struct bpf_program *prog); -static struct bpf_link *attach_iter(const struct bpf_sec_def *sec, - struct bpf_program *prog); +static struct bpf_link *attach_kprobe(struct bpf_program *prog); +static struct bpf_link *attach_tp(struct bpf_program *prog); +static struct bpf_link *attach_raw_tp(struct bpf_program *prog); +static struct bpf_link *attach_trace(struct bpf_program *prog); +static struct bpf_link *attach_lsm(struct bpf_program *prog); +static struct bpf_link *attach_iter(struct bpf_program *prog); static const struct bpf_sec_def section_defs[] = { BPF_PROG_SEC("socket", BPF_PROG_TYPE_SOCKET_FILTER), @@ -9391,8 +9384,7 @@ struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog, return bpf_program__attach_kprobe_opts(prog, func_name, &opts); } -static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec, - struct bpf_program *prog) +static struct bpf_link *attach_kprobe(struct bpf_program *prog) { DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts); unsigned long offset = 0; @@ -9401,8 +9393,8 @@ static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec, char *func; int n, err; - func_name = prog->sec_name + sec->len; - opts.retprobe = strcmp(sec->sec, "kretprobe/") == 0; + func_name = prog->sec_name + prog->sec_def->len; + opts.retprobe = strcmp(prog->sec_def->sec, "kretprobe/") == 0; n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%li", &func, &offset); if (n < 1) { @@ -9565,8 +9557,7 @@ struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog, return bpf_program__attach_tracepoint_opts(prog, tp_category, tp_name, NULL); } -static struct bpf_link *attach_tp(const struct bpf_sec_def *sec, - struct bpf_program *prog) +static struct bpf_link *attach_tp(struct bpf_program *prog) { char *sec_name, *tp_cat, *tp_name; struct bpf_link *link; @@ -9576,7 +9567,7 @@ static struct bpf_link *attach_tp(const struct bpf_sec_def *sec, return libbpf_err_ptr(-ENOMEM); /* extract "tp//" */ - tp_cat = sec_name + sec->len; + tp_cat = sec_name + prog->sec_def->len; tp_name = strchr(tp_cat, '/'); if (!tp_name) { free(sec_name); @@ -9620,10 +9611,9 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog, return link; } -static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec, - struct bpf_program *prog) +static struct bpf_link *attach_raw_tp(struct bpf_program *prog) { - const char *tp_name = prog->sec_name + sec->len; + const char *tp_name = prog->sec_name + prog->sec_def->len; return bpf_program__attach_raw_tracepoint(prog, tp_name); } @@ -9668,14 +9658,12 @@ struct bpf_link *bpf_program__attach_lsm(struct bpf_program *prog) return bpf_program__attach_btf_id(prog); } -static struct bpf_link *attach_trace(const struct bpf_sec_def *sec, - struct bpf_program *prog) +static struct bpf_link *attach_trace(struct bpf_program *prog) { return bpf_program__attach_trace(prog); } -static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec, - struct bpf_program *prog) +static struct bpf_link *attach_lsm(struct bpf_program *prog) { return bpf_program__attach_lsm(prog); } @@ -9806,21 +9794,17 @@ bpf_program__attach_iter(struct bpf_program *prog, return link; } -static struct bpf_link *attach_iter(const struct bpf_sec_def *sec, - struct bpf_program *prog) +static struct bpf_link *attach_iter(struct bpf_program *prog) { return bpf_program__attach_iter(prog, NULL); } struct bpf_link *bpf_program__attach(struct bpf_program *prog) { - const struct bpf_sec_def *sec_def; - - sec_def = find_sec_def(prog->sec_name); - if (!sec_def || !sec_def->attach_fn) + if (!prog->sec_def || !prog->sec_def->attach_fn) return libbpf_err_ptr(-ESRCH); - return sec_def->attach_fn(sec_def, prog); + return prog->sec_def->attach_fn(prog); } static int bpf_link__detach_struct_ops(struct bpf_link *link) @@ -10899,16 +10883,15 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s) for (i = 0; i < s->prog_cnt; i++) { struct bpf_program *prog = *s->progs[i].prog; struct bpf_link **link = s->progs[i].link; - const struct bpf_sec_def *sec_def; if (!prog->load) continue; - sec_def = find_sec_def(prog->sec_name); - if (!sec_def || !sec_def->attach_fn) + /* auto-attaching not supported for this program */ + if (!prog->sec_def || !prog->sec_def->attach_fn) continue; - *link = sec_def->attach_fn(sec_def, prog); + *link = prog->sec_def->attach_fn(prog); err = libbpf_get_error(*link); if (err) { pr_warn("failed to auto-attach program '%s': %d\n", From d328ba7768bb69b22e2b9abfaf8791a9d2efe040 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 13 Sep 2021 18:47:33 -0700 Subject: [PATCH 21/72] libbpf: Minimize explicit iterator of section definition array Remove almost all the code that explicitly iterated BPF program section definitions in favor of using find_sec_def(). The only remaining user of section_defs is libbpf_get_type_names that has to iterate all of them to construct its result. Having one internal API entry point for section definitions will simplify further refactorings around libbpf's program section definitions parsing. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20210914014733.2768-5-andrii@kernel.org --- src/libbpf.c | 46 +++++++++++++++++++--------------------------- 1 file changed, 19 insertions(+), 27 deletions(-) diff --git a/src/libbpf.c b/src/libbpf.c index e473fbd..62ed073 100644 --- a/src/libbpf.c +++ b/src/libbpf.c @@ -8438,22 +8438,13 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog, int *btf_obj_fd, __u32 attach_prog_fd = prog->attach_prog_fd; const char *name = prog->sec_name, *attach_name; const struct bpf_sec_def *sec = NULL; - int i, err = 0; + int err = 0; if (!name) return -EINVAL; - for (i = 0; i < ARRAY_SIZE(section_defs); i++) { - if (!section_defs[i].is_attach_btf) - continue; - if (strncmp(name, section_defs[i].sec, section_defs[i].len)) - continue; - - sec = §ion_defs[i]; - break; - } - - if (!sec) { + sec = find_sec_def(name); + if (!sec || !sec->is_attach_btf) { pr_warn("failed to identify BTF ID based on ELF section name '%s'\n", name); return -ESRCH; } @@ -8491,27 +8482,28 @@ int libbpf_attach_type_by_name(const char *name, enum bpf_attach_type *attach_type) { char *type_names; - int i; + const struct bpf_sec_def *sec_def; if (!name) return libbpf_err(-EINVAL); - for (i = 0; i < ARRAY_SIZE(section_defs); i++) { - if (strncmp(name, section_defs[i].sec, section_defs[i].len)) - continue; - if (!section_defs[i].is_attachable) - return libbpf_err(-EINVAL); - *attach_type = section_defs[i].expected_attach_type; - return 0; - } - pr_debug("failed to guess attach type based on ELF section name '%s'\n", name); - type_names = libbpf_get_type_names(true); - if (type_names != NULL) { - pr_debug("attachable section(type) names are:%s\n", type_names); - free(type_names); + sec_def = find_sec_def(name); + if (!sec_def) { + pr_debug("failed to guess attach type based on ELF section name '%s'\n", name); + type_names = libbpf_get_type_names(true); + if (type_names != NULL) { + pr_debug("attachable section(type) names are:%s\n", type_names); + free(type_names); + } + + return libbpf_err(-EINVAL); } - return libbpf_err(-EINVAL); + if (!sec_def->is_attachable) + return libbpf_err(-EINVAL); + + *attach_type = sec_def->expected_attach_type; + return 0; } int bpf_map__fd(const struct bpf_map *map) From a6188fc5b4861f5f746f27a1d71a34fc1ae8e284 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 14 Sep 2021 15:30:09 -0700 Subject: [PATCH 22/72] btf: Change BTF_KIND_* macros to enums Change BTF_KIND_* macros to enums so they are encoded in dwarf and appear in vmlinux.h. This will make it easier for bpf programs to use these constants without macro definitions. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210914223009.245307-1-yhs@fb.com --- include/uapi/linux/btf.h | 41 +++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h index d27b170..10e4010 100644 --- a/include/uapi/linux/btf.h +++ b/include/uapi/linux/btf.h @@ -56,25 +56,28 @@ struct btf_type { #define BTF_INFO_VLEN(info) ((info) & 0xffff) #define BTF_INFO_KFLAG(info) ((info) >> 31) -#define BTF_KIND_UNKN 0 /* Unknown */ -#define BTF_KIND_INT 1 /* Integer */ -#define BTF_KIND_PTR 2 /* Pointer */ -#define BTF_KIND_ARRAY 3 /* Array */ -#define BTF_KIND_STRUCT 4 /* Struct */ -#define BTF_KIND_UNION 5 /* Union */ -#define BTF_KIND_ENUM 6 /* Enumeration */ -#define BTF_KIND_FWD 7 /* Forward */ -#define BTF_KIND_TYPEDEF 8 /* Typedef */ -#define BTF_KIND_VOLATILE 9 /* Volatile */ -#define BTF_KIND_CONST 10 /* Const */ -#define BTF_KIND_RESTRICT 11 /* Restrict */ -#define BTF_KIND_FUNC 12 /* Function */ -#define BTF_KIND_FUNC_PROTO 13 /* Function Proto */ -#define BTF_KIND_VAR 14 /* Variable */ -#define BTF_KIND_DATASEC 15 /* Section */ -#define BTF_KIND_FLOAT 16 /* Floating point */ -#define BTF_KIND_MAX BTF_KIND_FLOAT -#define NR_BTF_KINDS (BTF_KIND_MAX + 1) +enum { + BTF_KIND_UNKN = 0, /* Unknown */ + BTF_KIND_INT = 1, /* Integer */ + BTF_KIND_PTR = 2, /* Pointer */ + BTF_KIND_ARRAY = 3, /* Array */ + BTF_KIND_STRUCT = 4, /* Struct */ + BTF_KIND_UNION = 5, /* Union */ + BTF_KIND_ENUM = 6, /* Enumeration */ + BTF_KIND_FWD = 7, /* Forward */ + BTF_KIND_TYPEDEF = 8, /* Typedef */ + BTF_KIND_VOLATILE = 9, /* Volatile */ + BTF_KIND_CONST = 10, /* Const */ + BTF_KIND_RESTRICT = 11, /* Restrict */ + BTF_KIND_FUNC = 12, /* Function */ + BTF_KIND_FUNC_PROTO = 13, /* Function Proto */ + BTF_KIND_VAR = 14, /* Variable */ + BTF_KIND_DATASEC = 15, /* Section */ + BTF_KIND_FLOAT = 16, /* Floating point */ + + NR_BTF_KINDS, + BTF_KIND_MAX = NR_BTF_KINDS - 1, +}; /* For some specific BTF_KIND, "struct btf_type" is immediately * followed by extra data. From fad7357469711ebb3b3bce43eb4d454d5b31bc90 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 14 Sep 2021 15:30:15 -0700 Subject: [PATCH 23/72] bpf: Support for new btf kind BTF_KIND_TAG LLVM14 added support for a new C attribute ([1]) __attribute__((btf_tag("arbitrary_str"))) This attribute will be emitted to dwarf ([2]) and pahole will convert it to BTF. Or for bpf target, this attribute will be emitted to BTF directly ([3], [4]). The attribute is intended to provide additional information for - struct/union type or struct/union member - static/global variables - static/global function or function parameter. For linux kernel, the btf_tag can be applied in various places to specify user pointer, function pre- or post- condition, function allow/deny in certain context, etc. Such information will be encoded in vmlinux BTF and can be used by verifier. The btf_tag can also be applied to bpf programs to help global verifiable functions, e.g., specifying preconditions, etc. This patch added basic parsing and checking support in kernel for new BTF_KIND_TAG kind. [1] https://reviews.llvm.org/D106614 [2] https://reviews.llvm.org/D106621 [3] https://reviews.llvm.org/D106622 [4] https://reviews.llvm.org/D109560 Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210914223015.245546-1-yhs@fb.com --- include/uapi/linux/btf.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h index 10e4010..642b6ec 100644 --- a/include/uapi/linux/btf.h +++ b/include/uapi/linux/btf.h @@ -43,7 +43,7 @@ struct btf_type { * "size" tells the size of the type it is describing. * * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT, - * FUNC, FUNC_PROTO and VAR. + * FUNC, FUNC_PROTO, VAR and TAG. * "type" is a type_id referring to another type. */ union { @@ -74,6 +74,7 @@ enum { BTF_KIND_VAR = 14, /* Variable */ BTF_KIND_DATASEC = 15, /* Section */ BTF_KIND_FLOAT = 16, /* Floating point */ + BTF_KIND_TAG = 17, /* Tag */ NR_BTF_KINDS, BTF_KIND_MAX = NR_BTF_KINDS - 1, @@ -173,4 +174,15 @@ struct btf_var_secinfo { __u32 size; }; +/* BTF_KIND_TAG is followed by a single "struct btf_tag" to describe + * additional information related to the tag applied location. + * If component_idx == -1, the tag is applied to a struct, union, + * variable or function. Otherwise, it is applied to a struct/union + * member or a func argument, and component_idx indicates which member + * or argument (0 ... vlen-1). + */ +struct btf_tag { + __s32 component_idx; +}; + #endif /* _UAPI__LINUX_BTF_H__ */ From 966ba8918dc16723586560282215a0d09390c5a2 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 14 Sep 2021 15:30:20 -0700 Subject: [PATCH 24/72] libbpf: Rename btf_{hash,equal}_int to btf_{hash,equal}_int_tag This patch renames functions btf_{hash,equal}_int() to btf_{hash,equal}_int_tag() so they can be reused for BTF_KIND_TAG support. There is no functionality change for this patch. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210914223020.245829-1-yhs@fb.com --- src/btf.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/btf.c b/src/btf.c index 77dc24d..7cb6ebf 100644 --- a/src/btf.c +++ b/src/btf.c @@ -3256,8 +3256,8 @@ static bool btf_equal_common(struct btf_type *t1, struct btf_type *t2) t1->size == t2->size; } -/* Calculate type signature hash of INT. */ -static long btf_hash_int(struct btf_type *t) +/* Calculate type signature hash of INT or TAG. */ +static long btf_hash_int_tag(struct btf_type *t) { __u32 info = *(__u32 *)(t + 1); long h; @@ -3267,8 +3267,8 @@ static long btf_hash_int(struct btf_type *t) return h; } -/* Check structural equality of two INTs. */ -static bool btf_equal_int(struct btf_type *t1, struct btf_type *t2) +/* Check structural equality of two INTs or TAGs. */ +static bool btf_equal_int_tag(struct btf_type *t1, struct btf_type *t2) { __u32 info1, info2; @@ -3535,7 +3535,7 @@ static int btf_dedup_prep(struct btf_dedup *d) h = btf_hash_common(t); break; case BTF_KIND_INT: - h = btf_hash_int(t); + h = btf_hash_int_tag(t); break; case BTF_KIND_ENUM: h = btf_hash_enum(t); @@ -3593,11 +3593,11 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id) return 0; case BTF_KIND_INT: - h = btf_hash_int(t); + h = btf_hash_int_tag(t); for_each_dedup_cand(d, hash_entry, h) { cand_id = (__u32)(long)hash_entry->value; cand = btf_type_by_id(d->btf, cand_id); - if (btf_equal_int(t, cand)) { + if (btf_equal_int_tag(t, cand)) { new_id = cand_id; break; } @@ -3881,7 +3881,7 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, switch (cand_kind) { case BTF_KIND_INT: - return btf_equal_int(cand_type, canon_type); + return btf_equal_int_tag(cand_type, canon_type); case BTF_KIND_ENUM: if (d->opts.dont_resolve_fwds) From 7e7f59d658d47c55e791bda2f6f9eb7f51352b13 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 14 Sep 2021 15:30:25 -0700 Subject: [PATCH 25/72] libbpf: Add support for BTF_KIND_TAG Add BTF_KIND_TAG support for parsing and dedup. Also added sanitization for BTF_KIND_TAG. If BTF_KIND_TAG is not supported in the kernel, sanitize it to INTs. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210914223025.246687-1-yhs@fb.com --- src/btf.c | 68 +++++++++++++++++++++++++++++++++++++++++++ src/btf.h | 15 ++++++++++ src/btf_dump.c | 3 ++ src/libbpf.c | 31 ++++++++++++++++++-- src/libbpf.map | 2 ++ src/libbpf_internal.h | 2 ++ 6 files changed, 118 insertions(+), 3 deletions(-) diff --git a/src/btf.c b/src/btf.c index 7cb6ebf..6ad63e4 100644 --- a/src/btf.c +++ b/src/btf.c @@ -304,6 +304,8 @@ static int btf_type_size(const struct btf_type *t) return base_size + sizeof(struct btf_var); case BTF_KIND_DATASEC: return base_size + vlen * sizeof(struct btf_var_secinfo); + case BTF_KIND_TAG: + return base_size + sizeof(struct btf_tag); default: pr_debug("Unsupported BTF_KIND:%u\n", btf_kind(t)); return -EINVAL; @@ -376,6 +378,9 @@ static int btf_bswap_type_rest(struct btf_type *t) v->size = bswap_32(v->size); } return 0; + case BTF_KIND_TAG: + btf_tag(t)->component_idx = bswap_32(btf_tag(t)->component_idx); + return 0; default: pr_debug("Unsupported BTF_KIND:%u\n", btf_kind(t)); return -EINVAL; @@ -586,6 +591,7 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id) case BTF_KIND_CONST: case BTF_KIND_RESTRICT: case BTF_KIND_VAR: + case BTF_KIND_TAG: type_id = t->type; break; case BTF_KIND_ARRAY: @@ -2440,6 +2446,48 @@ int btf__add_datasec_var_info(struct btf *btf, int var_type_id, __u32 offset, __ return 0; } +/* + * Append new BTF_KIND_TAG type with: + * - *value* - non-empty/non-NULL string; + * - *ref_type_id* - referenced type ID, it might not exist yet; + * - *component_idx* - -1 for tagging reference type, otherwise struct/union + * member or function argument index; + * Returns: + * - >0, type ID of newly added BTF type; + * - <0, on error. + */ +int btf__add_tag(struct btf *btf, const char *value, int ref_type_id, + int component_idx) +{ + struct btf_type *t; + int sz, value_off; + + if (!value || !value[0] || component_idx < -1) + return libbpf_err(-EINVAL); + + if (validate_type_id(ref_type_id)) + return libbpf_err(-EINVAL); + + if (btf_ensure_modifiable(btf)) + return libbpf_err(-ENOMEM); + + sz = sizeof(struct btf_type) + sizeof(struct btf_tag); + t = btf_add_type_mem(btf, sz); + if (!t) + return libbpf_err(-ENOMEM); + + value_off = btf__add_str(btf, value); + if (value_off < 0) + return value_off; + + t->name_off = value_off; + t->info = btf_type_info(BTF_KIND_TAG, 0, false); + t->type = ref_type_id; + btf_tag(t)->component_idx = component_idx; + + return btf_commit_type(btf, sz); +} + struct btf_ext_sec_setup_param { __u32 off; __u32 len; @@ -3535,6 +3583,7 @@ static int btf_dedup_prep(struct btf_dedup *d) h = btf_hash_common(t); break; case BTF_KIND_INT: + case BTF_KIND_TAG: h = btf_hash_int_tag(t); break; case BTF_KIND_ENUM: @@ -3590,6 +3639,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id) case BTF_KIND_FUNC_PROTO: case BTF_KIND_VAR: case BTF_KIND_DATASEC: + case BTF_KIND_TAG: return 0; case BTF_KIND_INT: @@ -4210,6 +4260,23 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id) } break; + case BTF_KIND_TAG: + ref_type_id = btf_dedup_ref_type(d, t->type); + if (ref_type_id < 0) + return ref_type_id; + t->type = ref_type_id; + + h = btf_hash_int_tag(t); + for_each_dedup_cand(d, hash_entry, h) { + cand_id = (__u32)(long)hash_entry->value; + cand = btf_type_by_id(d->btf, cand_id); + if (btf_equal_int_tag(t, cand)) { + new_id = cand_id; + break; + } + } + break; + case BTF_KIND_ARRAY: { struct btf_array *info = btf_array(t); @@ -4482,6 +4549,7 @@ int btf_type_visit_type_ids(struct btf_type *t, type_id_visit_fn visit, void *ct case BTF_KIND_TYPEDEF: case BTF_KIND_FUNC: case BTF_KIND_VAR: + case BTF_KIND_TAG: return visit(&t->type, ctx); case BTF_KIND_ARRAY: { diff --git a/src/btf.h b/src/btf.h index f2e2fab..659ea8a 100644 --- a/src/btf.h +++ b/src/btf.h @@ -143,6 +143,10 @@ LIBBPF_API int btf__add_datasec(struct btf *btf, const char *name, __u32 byte_sz LIBBPF_API int btf__add_datasec_var_info(struct btf *btf, int var_type_id, __u32 offset, __u32 byte_sz); +/* tag construction API */ +LIBBPF_API int btf__add_tag(struct btf *btf, const char *value, int ref_type_id, + int component_idx); + struct btf_dedup_opts { unsigned int dedup_table_size; bool dont_resolve_fwds; @@ -330,6 +334,11 @@ static inline bool btf_is_float(const struct btf_type *t) return btf_kind(t) == BTF_KIND_FLOAT; } +static inline bool btf_is_tag(const struct btf_type *t) +{ + return btf_kind(t) == BTF_KIND_TAG; +} + static inline __u8 btf_int_encoding(const struct btf_type *t) { return BTF_INT_ENCODING(*(__u32 *)(t + 1)); @@ -398,6 +407,12 @@ btf_var_secinfos(const struct btf_type *t) return (struct btf_var_secinfo *)(t + 1); } +struct btf_tag; +static inline struct btf_tag *btf_tag(const struct btf_type *t) +{ + return (struct btf_tag *)(t + 1); +} + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/src/btf_dump.c b/src/btf_dump.c index e4b483f..ad6df97 100644 --- a/src/btf_dump.c +++ b/src/btf_dump.c @@ -316,6 +316,7 @@ static int btf_dump_mark_referenced(struct btf_dump *d) case BTF_KIND_TYPEDEF: case BTF_KIND_FUNC: case BTF_KIND_VAR: + case BTF_KIND_TAG: d->type_states[t->type].referenced = 1; break; @@ -583,6 +584,7 @@ static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr) case BTF_KIND_FUNC: case BTF_KIND_VAR: case BTF_KIND_DATASEC: + case BTF_KIND_TAG: d->type_states[id].order_state = ORDERED; return 0; @@ -2215,6 +2217,7 @@ static int btf_dump_dump_type_data(struct btf_dump *d, case BTF_KIND_FWD: case BTF_KIND_FUNC: case BTF_KIND_FUNC_PROTO: + case BTF_KIND_TAG: err = btf_dump_unsupported_data(d, t, id); break; case BTF_KIND_INT: diff --git a/src/libbpf.c b/src/libbpf.c index 62ed073..62a43c4 100644 --- a/src/libbpf.c +++ b/src/libbpf.c @@ -195,6 +195,8 @@ enum kern_feature_id { FEAT_BTF_FLOAT, /* BPF perf link support */ FEAT_PERF_LINK, + /* BTF_KIND_TAG support */ + FEAT_BTF_TAG, __FEAT_CNT, }; @@ -1986,6 +1988,7 @@ static const char *__btf_kind_str(__u16 kind) case BTF_KIND_VAR: return "var"; case BTF_KIND_DATASEC: return "datasec"; case BTF_KIND_FLOAT: return "float"; + case BTF_KIND_TAG: return "tag"; default: return "unknown"; } } @@ -2485,8 +2488,9 @@ static bool btf_needs_sanitization(struct bpf_object *obj) bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC); bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT); bool has_func = kernel_supports(obj, FEAT_BTF_FUNC); + bool has_tag = kernel_supports(obj, FEAT_BTF_TAG); - return !has_func || !has_datasec || !has_func_global || !has_float; + return !has_func || !has_datasec || !has_func_global || !has_float || !has_tag; } static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) @@ -2495,14 +2499,15 @@ static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC); bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT); bool has_func = kernel_supports(obj, FEAT_BTF_FUNC); + bool has_tag = kernel_supports(obj, FEAT_BTF_TAG); struct btf_type *t; int i, j, vlen; for (i = 1; i <= btf__get_nr_types(btf); i++) { t = (struct btf_type *)btf__type_by_id(btf, i); - if (!has_datasec && btf_is_var(t)) { - /* replace VAR with INT */ + if ((!has_datasec && btf_is_var(t)) || (!has_tag && btf_is_tag(t))) { + /* replace VAR/TAG with INT */ t->info = BTF_INFO_ENC(BTF_KIND_INT, 0, 0); /* * using size = 1 is the safest choice, 4 will be too @@ -4212,6 +4217,23 @@ static int probe_kern_btf_float(void) strs, sizeof(strs))); } +static int probe_kern_btf_tag(void) +{ + static const char strs[] = "\0tag"; + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* VAR x */ /* [2] */ + BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1), + BTF_VAR_STATIC, + /* attr */ + BTF_TYPE_TAG_ENC(1, 2, -1), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs))); +} + static int probe_kern_array_mmap(void) { struct bpf_create_map_attr attr = { @@ -4428,6 +4450,9 @@ static struct kern_feature_desc { [FEAT_PERF_LINK] = { "BPF perf link support", probe_perf_link, }, + [FEAT_BTF_TAG] = { + "BTF_KIND_TAG support", probe_kern_btf_tag, + }, }; static bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) diff --git a/src/libbpf.map b/src/libbpf.map index 78ea62c..9e649cf 100644 --- a/src/libbpf.map +++ b/src/libbpf.map @@ -388,4 +388,6 @@ LIBBPF_0.5.0 { } LIBBPF_0.4.0; LIBBPF_0.6.0 { + global: + btf__add_tag; } LIBBPF_0.5.0; diff --git a/src/libbpf_internal.h b/src/libbpf_internal.h index 4f6ff5c..ceb0c98 100644 --- a/src/libbpf_internal.h +++ b/src/libbpf_internal.h @@ -69,6 +69,8 @@ #define BTF_VAR_SECINFO_ENC(type, offset, size) (type), (offset), (size) #define BTF_TYPE_FLOAT_ENC(name, sz) \ BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FLOAT, 0, 0), sz) +#define BTF_TYPE_TAG_ENC(value, type, component_idx) \ + BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_TAG, 0, 0), type), (component_idx) #ifndef likely #define likely(x) __builtin_expect(!!(x), 1) From 627cbb395bc04df89507fff62bbb8bf44268f320 Mon Sep 17 00:00:00 2001 From: Grant Seltzer Date: Tue, 14 Sep 2021 22:19:52 -0400 Subject: [PATCH 26/72] libbpf: Add sphinx code documentation comments This adds comments above five functions in btf.h which document their uses. These comments are of a format that doxygen and sphinx can pick up and render. These are rendered by libbpf.readthedocs.org Signed-off-by: Grant Seltzer Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210915021951.117186-1-grantseltzer@gmail.com --- src/btf.h | 70 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/src/btf.h b/src/btf.h index 659ea8a..2cfe313 100644 --- a/src/btf.h +++ b/src/btf.h @@ -1,5 +1,6 @@ /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ /* Copyright (c) 2018 Facebook */ +/*! \file */ #ifndef __LIBBPF_BTF_H #define __LIBBPF_BTF_H @@ -30,11 +31,80 @@ enum btf_endianness { BTF_BIG_ENDIAN = 1, }; +/** + * @brief **btf__free()** frees all data of a BTF object + * @param btf BTF object to free + */ LIBBPF_API void btf__free(struct btf *btf); +/** + * @brief **btf__new()** creates a new instance of a BTF object from the raw + * bytes of an ELF's BTF section + * @param data raw bytes + * @param size number of bytes passed in `data` + * @return new BTF object instance which has to be eventually freed with + * **btf__free()** + * + * On error, error-code-encoded-as-pointer is returned, not a NULL. To extract + * error code from such a pointer `libbpf_get_error()` should be used. If + * `libbpf_set_strict_mode(LIBBPF_STRICT_CLEAN_PTRS)` is enabled, NULL is + * returned on error instead. In both cases thread-local `errno` variable is + * always set to error code as well. + */ LIBBPF_API struct btf *btf__new(const void *data, __u32 size); + +/** + * @brief **btf__new_split()** create a new instance of a BTF object from the + * provided raw data bytes. It takes another BTF instance, **base_btf**, which + * serves as a base BTF, which is extended by types in a newly created BTF + * instance + * @param data raw bytes + * @param size length of raw bytes + * @param base_btf the base BTF object + * @return new BTF object instance which has to be eventually freed with + * **btf__free()** + * + * If *base_btf* is NULL, `btf__new_split()` is equivalent to `btf__new()` and + * creates non-split BTF. + * + * On error, error-code-encoded-as-pointer is returned, not a NULL. To extract + * error code from such a pointer `libbpf_get_error()` should be used. If + * `libbpf_set_strict_mode(LIBBPF_STRICT_CLEAN_PTRS)` is enabled, NULL is + * returned on error instead. In both cases thread-local `errno` variable is + * always set to error code as well. + */ LIBBPF_API struct btf *btf__new_split(const void *data, __u32 size, struct btf *base_btf); + +/** + * @brief **btf__new_empty()** creates an empty BTF object. Use + * `btf__add_*()` to populate such BTF object. + * @return new BTF object instance which has to be eventually freed with + * **btf__free()** + * + * On error, error-code-encoded-as-pointer is returned, not a NULL. To extract + * error code from such a pointer `libbpf_get_error()` should be used. If + * `libbpf_set_strict_mode(LIBBPF_STRICT_CLEAN_PTRS)` is enabled, NULL is + * returned on error instead. In both cases thread-local `errno` variable is + * always set to error code as well. + */ LIBBPF_API struct btf *btf__new_empty(void); + +/** + * @brief **btf__new_empty_split()** creates an unpopulated BTF object from an + * ELF BTF section except with a base BTF on top of which split BTF should be + * based + * @return new BTF object instance which has to be eventually freed with + * **btf__free()** + * + * If *base_btf* is NULL, `btf__new_empty_split()` is equivalent to + * `btf__new_empty()` and creates non-split BTF. + * + * On error, error-code-encoded-as-pointer is returned, not a NULL. To extract + * error code from such a pointer `libbpf_get_error()` should be used. If + * `libbpf_set_strict_mode(LIBBPF_STRICT_CLEAN_PTRS)` is enabled, NULL is + * returned on error instead. In both cases thread-local `errno` variable is + * always set to error code as well. + */ LIBBPF_API struct btf *btf__new_empty_split(struct btf *base_btf); LIBBPF_API struct btf *btf__parse(const char *path, struct btf_ext **btf_ext); From 42da89eb16fda7bbf208bb92b17c4e0945ddfc66 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 15 Sep 2021 13:19:37 -0700 Subject: [PATCH 27/72] sync: auto-generate latest BPF helpers Latest changes to BPF helper definitions. --- src/bpf_helper_defs.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/src/bpf_helper_defs.h b/src/bpf_helper_defs.h index 00929fa..2ee6ab5 100644 --- a/src/bpf_helper_defs.h +++ b/src/bpf_helper_defs.h @@ -4043,4 +4043,29 @@ static __u64 (*bpf_get_attach_cookie)(void *ctx) = (void *) 174; */ static long (*bpf_task_pt_regs)(struct task_struct *task) = (void *) 175; +/* + * bpf_get_branch_snapshot + * + * Get branch trace from hardware engines like Intel LBR. The + * hardware engine is stopped shortly after the helper is + * called. Therefore, the user need to filter branch entries + * based on the actual use case. To capture branch trace + * before the trigger point of the BPF program, the helper + * should be called at the beginning of the BPF program. + * + * The data is stored as struct perf_branch_entry into output + * buffer *entries*. *size* is the size of *entries* in bytes. + * *flags* is reserved for now and must be zero. + * + * + * Returns + * On success, number of bytes written to *buf*. On error, a + * negative value. + * + * **-EINVAL** if *flags* is not zero. + * + * **-ENOENT** if architecture does not support branch records. + */ +static long (*bpf_get_branch_snapshot)(void *entries, __u32 size, __u64 flags) = (void *) 176; + From 24dbcb3a307531b0cd07f81bae5b8c402e4ca7ff Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 15 Sep 2021 13:19:37 -0700 Subject: [PATCH 28/72] sync: latest libbpf changes from kernel Syncing latest libbpf commits from kernel repository. Baseline bpf-next commit: 47bb27a20d6ea22cd092c1fc2bb4fcecac374838 Checkpoint bpf-next commit: 69cd823956ba8ce266a901170b1060db8073bddd Baseline bpf commit: 3776f3517ed94d40ff0e3851d7ce2ce17b63099f Checkpoint bpf commit: bc23f724481759d0fac61dfb5ce979af2190bbe0 Andrii Nakryiko (4): libbpf: Make libbpf_version.h non-auto-generated libbpf: Ensure BPF prog types are set before relocations libbpf: Simplify BPF program auto-attach code libbpf: Minimize explicit iterator of section definition array Grant Seltzer (1): libbpf: Add sphinx code documentation comments Quentin Monnet (1): libbpf: Add LIBBPF_DEPRECATED_SINCE macro for scheduling API deprecations Rafael David Tinoco (1): libbpf: Introduce legacy kprobe events support Rocco Yue (1): ipv6: add IFLA_INET6_RA_MTU to expose mtu value Song Liu (1): bpf: Introduce helper bpf_get_branch_snapshot Vadim Fedorenko (1): bpf: Add hardware timestamp field to __sk_buff Yonghong Song (4): btf: Change BTF_KIND_* macros to enums bpf: Support for new btf kind BTF_KIND_TAG libbpf: Rename btf_{hash,equal}_int to btf_{hash,equal}_int_tag libbpf: Add support for BTF_KIND_TAG include/uapi/linux/bpf.h | 24 +++ include/uapi/linux/btf.h | 55 ++++-- include/uapi/linux/if_link.h | 1 + src/btf.c | 84 +++++++- src/btf.h | 87 +++++++++ src/btf_dump.c | 3 + src/libbpf.c | 359 ++++++++++++++++++++++++----------- src/libbpf.map | 5 + src/libbpf_common.h | 19 ++ src/libbpf_internal.h | 2 + src/libbpf_version.h | 9 + 11 files changed, 505 insertions(+), 143 deletions(-) create mode 100644 src/libbpf_version.h -- 2.30.2 --- BPF-CHECKPOINT-COMMIT | 2 +- CHECKPOINT-COMMIT | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/BPF-CHECKPOINT-COMMIT b/BPF-CHECKPOINT-COMMIT index e49628c..84dc5c2 100644 --- a/BPF-CHECKPOINT-COMMIT +++ b/BPF-CHECKPOINT-COMMIT @@ -1 +1 @@ -3776f3517ed94d40ff0e3851d7ce2ce17b63099f +bc23f724481759d0fac61dfb5ce979af2190bbe0 diff --git a/CHECKPOINT-COMMIT b/CHECKPOINT-COMMIT index 765d989..1ac683c 100644 --- a/CHECKPOINT-COMMIT +++ b/CHECKPOINT-COMMIT @@ -1 +1 @@ -47bb27a20d6ea22cd092c1fc2bb4fcecac374838 +69cd823956ba8ce266a901170b1060db8073bddd From 980777cc16db75d5628a537c892aefc2640bb242 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 17 Sep 2021 10:47:02 -0700 Subject: [PATCH 29/72] vmtest: temporarily blacklist spammy selftests There is a problem in bpf-next tree which causes get_stack_raw_tp and few other selftests to produce tons of kernel warnings, timing out and failing CI test runs. Blacklist until bpf tree, which has a fix, is merged into bpf-next. Signed-off-by: Andrii Nakryiko --- travis-ci/vmtest/configs/blacklist/BLACKLIST-latest | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/travis-ci/vmtest/configs/blacklist/BLACKLIST-latest b/travis-ci/vmtest/configs/blacklist/BLACKLIST-latest index 688dcae..2718997 100644 --- a/travis-ci/vmtest/configs/blacklist/BLACKLIST-latest +++ b/travis-ci/vmtest/configs/blacklist/BLACKLIST-latest @@ -1 +1,5 @@ # TEMPORARY +get_stack_raw_tp # spams with kernel warnings until next bpf -> bpf-next merge +stacktrace_build_id_nmi +stacktrace_build_id +task_fd_query_rawtp From 7d365e49f3912a0aa2e808f5c1a01338c08101f2 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 29 Sep 2021 09:57:09 -0700 Subject: [PATCH 30/72] ci: use -a/-d with exact string match for black/white-listing Doing substring matches allows accidental new tests to be enabled, when they are not supposed to be. E.g., whitelisting "xdp" allows new "xdpwall" test on 5.5.0, which wasn't supposed to happen. Cc: Yucong Sun Signed-off-by: Andrii Nakryiko --- travis-ci/vmtest/run_selftests.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/travis-ci/vmtest/run_selftests.sh b/travis-ci/vmtest/run_selftests.sh index b2d4b5b..6195a76 100755 --- a/travis-ci/vmtest/run_selftests.sh +++ b/travis-ci/vmtest/run_selftests.sh @@ -7,12 +7,12 @@ source $(cd $(dirname $0) && pwd)/helpers.sh test_progs() { if [[ "${KERNEL}" != '4.9.0' ]]; then travis_fold start test_progs "Testing test_progs" - ./test_progs ${BLACKLIST:+-b$BLACKLIST} ${WHITELIST:+-t$WHITELIST} + ./test_progs ${BLACKLIST:+-d$BLACKLIST} ${WHITELIST:+-a$WHITELIST} travis_fold end test_progs fi travis_fold start test_progs-no_alu32 "Testing test_progs-no_alu32" - ./test_progs-no_alu32 ${BLACKLIST:+-b$BLACKLIST} ${WHITELIST:+-t$WHITELIST} + ./test_progs-no_alu32 ${BLACKLIST:+-d$BLACKLIST} ${WHITELIST:+-a$WHITELIST} travis_fold end test_progs-no_alu32 } From 1eba3a647092bb0122af699f86e10d2ba4182675 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Wed, 15 Sep 2021 01:54:00 +0200 Subject: [PATCH 31/72] bpf: Update bpf_get_smp_processor_id() documentation BPF programs run with migration disabled regardless of preemption, as they are protected by migrate_disable(). Update the uapi documentation accordingly. Signed-off-by: Matteo Croce Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210914235400.59427-1-mcroce@linux.microsoft.com --- include/uapi/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d213265..3e9785f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1629,7 +1629,7 @@ union bpf_attr { * u32 bpf_get_smp_processor_id(void) * Description * Get the SMP (symmetric multiprocessing) processor id. Note that - * all programs run with preemption disabled, which means that the + * all programs run with migration disabled, which means that the * SMP processor id is stable during all the execution of the * program. * Return From 0a76ce13953149772a76ce33ed4b022a3ed20c33 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 15 Sep 2021 18:58:30 -0700 Subject: [PATCH 32/72] libbpf: Use pre-setup sec_def in libbpf_find_attach_btf_id() Don't perform another search for sec_def inside libbpf_find_attach_btf_id(), as each recognized bpf_program already has prog->sec_def set. Also remove unnecessary NULL check for prog->sec_name, as it can never be NULL. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210916015836.1248906-2-andrii@kernel.org --- src/libbpf.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/src/libbpf.c b/src/libbpf.c index 62a43c4..5ba11b2 100644 --- a/src/libbpf.c +++ b/src/libbpf.c @@ -8461,19 +8461,15 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog, int *btf_obj_fd, { enum bpf_attach_type attach_type = prog->expected_attach_type; __u32 attach_prog_fd = prog->attach_prog_fd; - const char *name = prog->sec_name, *attach_name; - const struct bpf_sec_def *sec = NULL; + const char *attach_name; int err = 0; - if (!name) - return -EINVAL; - - sec = find_sec_def(name); - if (!sec || !sec->is_attach_btf) { - pr_warn("failed to identify BTF ID based on ELF section name '%s'\n", name); + if (!prog->sec_def || !prog->sec_def->is_attach_btf) { + pr_warn("failed to identify BTF ID based on ELF section name '%s'\n", + prog->sec_name); return -ESRCH; } - attach_name = name + sec->len; + attach_name = prog->sec_name + prog->sec_def->len; /* BPF program's BTF ID */ if (attach_prog_fd) { From e856a7d560b935f52690aa3df88148fbb913e79a Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 15 Sep 2021 18:58:32 -0700 Subject: [PATCH 33/72] libbpf: Deprecated bpf_object_open_opts.relaxed_core_relocs It's relevant and hasn't been doing anything for a long while now. Deprecated it. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210916015836.1248906-4-andrii@kernel.org --- src/libbpf.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/libbpf.h b/src/libbpf.h index 2f6f0e1..7111e8d 100644 --- a/src/libbpf.h +++ b/src/libbpf.h @@ -83,6 +83,7 @@ struct bpf_object_open_opts { * Non-relocatable instructions are replaced with invalid ones to * prevent accidental errors. * */ + LIBBPF_DEPRECATED_SINCE(0, 6, "field has no effect") bool relaxed_core_relocs; /* maps that set the 'pinning' attribute in their definition will have * their pin_path attribute set to a file in this directory, and be From 599b999f5d4700a77aa8876066e1e95985d53ddb Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 15 Sep 2021 18:58:33 -0700 Subject: [PATCH 34/72] libbpf: Allow skipping attach_func_name in bpf_program__set_attach_target() Allow to use bpf_program__set_attach_target to only set target attach program FD, while letting libbpf to use target attach function name from SEC() definition. This might be useful for some scenarios where bpf_object contains multiple related freplace BPF programs intended to replace different sub-programs in target BPF program. In such case all programs will have the same attach_prog_fd, but different attach_func_name. It's convenient to specify such target function names declaratively in SEC() definitions, but attach_prog_fd is a dynamic runtime setting. To simplify such scenario, allow bpf_program__set_attach_target() to delay BTF ID resolution till the BPF program load time by providing NULL attach_func_name. In that case the behavior will be similar to using bpf_object_open_opts.attach_prog_fd (which is marked deprecated since v0.7), but has the benefit of allowing more control by user in what is attached to what. Such setup allows having BPF programs attached to different target attach_prog_fd with target functions still declaratively recorded in BPF source code in SEC() definitions. Selftests changes in the next patch should make this more obvious. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210916015836.1248906-5-andrii@kernel.org --- src/libbpf.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/libbpf.c b/src/libbpf.c index 5ba11b2..552d05a 100644 --- a/src/libbpf.c +++ b/src/libbpf.c @@ -10643,18 +10643,29 @@ int bpf_program__set_attach_target(struct bpf_program *prog, { int btf_obj_fd = 0, btf_id = 0, err; - if (!prog || attach_prog_fd < 0 || !attach_func_name) + if (!prog || attach_prog_fd < 0) return libbpf_err(-EINVAL); if (prog->obj->loaded) return libbpf_err(-EINVAL); + if (attach_prog_fd && !attach_func_name) { + /* remember attach_prog_fd and let bpf_program__load() find + * BTF ID during the program load + */ + prog->attach_prog_fd = attach_prog_fd; + return 0; + } + if (attach_prog_fd) { btf_id = libbpf_find_prog_btf_id(attach_func_name, attach_prog_fd); if (btf_id < 0) return libbpf_err(btf_id); } else { + if (!attach_func_name) + return libbpf_err(-EINVAL); + /* load btf_vmlinux, if not yet */ err = bpf_object__load_vmlinux_btf(prog->obj, true); if (err) From 3e7c04669e90534c174555d4320746351148b879 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 15 Sep 2021 18:58:35 -0700 Subject: [PATCH 35/72] libbpf: Schedule open_opts.attach_prog_fd deprecation since v0.7 bpf_object_open_opts.attach_prog_fd makes a pretty strong assumption that bpf_object contains either only single freplace BPF program or all of BPF programs in BPF object are freplaces intended to replace different subprograms of the same target BPF program. This seems both a bit confusing, too assuming, and limiting. We've had bpf_program__set_attach_target() API which allows more fine-grained control over this, on a per-program level. As such, mark open_opts.attach_prog_fd as deprecated starting from v0.7, so that we have one more universal way of setting freplace targets. With previous change to allow NULL attach_func_name argument, and especially combined with BPF skeleton, arguable bpf_program__set_attach_target() is a more convenient and explicit API as well. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210916015836.1248906-7-andrii@kernel.org --- src/libbpf.c | 3 +++ src/libbpf.h | 2 ++ src/libbpf_common.h | 5 +++++ 3 files changed, 10 insertions(+) diff --git a/src/libbpf.c b/src/libbpf.c index 552d05a..6aeeb0e 100644 --- a/src/libbpf.c +++ b/src/libbpf.c @@ -6415,9 +6415,12 @@ static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object bpf_program__set_type(prog, prog->sec_def->prog_type); bpf_program__set_expected_attach_type(prog, prog->sec_def->expected_attach_type); +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" if (prog->sec_def->prog_type == BPF_PROG_TYPE_TRACING || prog->sec_def->prog_type == BPF_PROG_TYPE_EXT) prog->attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0); +#pragma GCC diagnostic pop } return 0; diff --git a/src/libbpf.h b/src/libbpf.h index 7111e8d..52b7ee0 100644 --- a/src/libbpf.h +++ b/src/libbpf.h @@ -90,6 +90,8 @@ struct bpf_object_open_opts { * auto-pinned to that path on load; defaults to "/sys/fs/bpf". */ const char *pin_root_path; + + LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_program__set_attach_target() on each individual bpf_program") __u32 attach_prog_fd; /* Additional kernel config content that augments and overrides * system Kconfig for CONFIG_xxx externs. diff --git a/src/libbpf_common.h b/src/libbpf_common.h index 36ac77f..aaa1efb 100644 --- a/src/libbpf_common.h +++ b/src/libbpf_common.h @@ -35,6 +35,11 @@ #else #define __LIBBPF_MARK_DEPRECATED_0_6(X) #endif +#if __LIBBPF_CURRENT_VERSION_GEQ(0, 7) +#define __LIBBPF_MARK_DEPRECATED_0_7(X) X +#else +#define __LIBBPF_MARK_DEPRECATED_0_7(X) +#endif /* Helper macro to declare and initialize libbpf options struct * From b177fac2e20c9fb69ce12ffafb36c6899e98c253 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 15 Sep 2021 18:58:36 -0700 Subject: [PATCH 36/72] libbpf: Constify all high-level program attach APIs Attach APIs shouldn't need to modify bpf_program/bpf_map structs, so change all struct bpf_program and struct bpf_map pointers to const pointers. This is completely backwards compatible with no functional change. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210916015836.1248906-8-andrii@kernel.org --- src/libbpf.c | 68 ++++++++++++++++++++++++++-------------------------- src/libbpf.h | 36 ++++++++++++++-------------- 2 files changed, 52 insertions(+), 52 deletions(-) diff --git a/src/libbpf.c b/src/libbpf.c index 6aeeb0e..da65a16 100644 --- a/src/libbpf.c +++ b/src/libbpf.c @@ -220,7 +220,7 @@ struct reloc_desc { struct bpf_sec_def; -typedef struct bpf_link *(*attach_fn_t)(struct bpf_program *prog); +typedef struct bpf_link *(*attach_fn_t)(const struct bpf_program *prog); struct bpf_sec_def { const char *sec; @@ -7947,12 +7947,12 @@ void bpf_program__set_expected_attach_type(struct bpf_program *prog, __VA_ARGS__ \ } -static struct bpf_link *attach_kprobe(struct bpf_program *prog); -static struct bpf_link *attach_tp(struct bpf_program *prog); -static struct bpf_link *attach_raw_tp(struct bpf_program *prog); -static struct bpf_link *attach_trace(struct bpf_program *prog); -static struct bpf_link *attach_lsm(struct bpf_program *prog); -static struct bpf_link *attach_iter(struct bpf_program *prog); +static struct bpf_link *attach_kprobe(const struct bpf_program *prog); +static struct bpf_link *attach_tp(const struct bpf_program *prog); +static struct bpf_link *attach_raw_tp(const struct bpf_program *prog); +static struct bpf_link *attach_trace(const struct bpf_program *prog); +static struct bpf_link *attach_lsm(const struct bpf_program *prog); +static struct bpf_link *attach_iter(const struct bpf_program *prog); static const struct bpf_sec_def section_defs[] = { BPF_PROG_SEC("socket", BPF_PROG_TYPE_SOCKET_FILTER), @@ -9092,7 +9092,7 @@ static void bpf_link_perf_dealloc(struct bpf_link *link) free(perf_link); } -struct bpf_link *bpf_program__attach_perf_event_opts(struct bpf_program *prog, int pfd, +struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *prog, int pfd, const struct bpf_perf_event_opts *opts) { char errmsg[STRERR_BUFSIZE]; @@ -9167,7 +9167,7 @@ err_out: return libbpf_err_ptr(err); } -struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog, int pfd) +struct bpf_link *bpf_program__attach_perf_event(const struct bpf_program *prog, int pfd) { return bpf_program__attach_perf_event_opts(prog, pfd, NULL); } @@ -9332,7 +9332,7 @@ static int perf_event_kprobe_open_legacy(bool retprobe, const char *name, uint64 } struct bpf_link * -bpf_program__attach_kprobe_opts(struct bpf_program *prog, +bpf_program__attach_kprobe_opts(const struct bpf_program *prog, const char *func_name, const struct bpf_kprobe_opts *opts) { @@ -9389,7 +9389,7 @@ bpf_program__attach_kprobe_opts(struct bpf_program *prog, return link; } -struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog, +struct bpf_link *bpf_program__attach_kprobe(const struct bpf_program *prog, bool retprobe, const char *func_name) { @@ -9400,7 +9400,7 @@ struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog, return bpf_program__attach_kprobe_opts(prog, func_name, &opts); } -static struct bpf_link *attach_kprobe(struct bpf_program *prog) +static struct bpf_link *attach_kprobe(const struct bpf_program *prog) { DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts); unsigned long offset = 0; @@ -9432,7 +9432,7 @@ static struct bpf_link *attach_kprobe(struct bpf_program *prog) } LIBBPF_API struct bpf_link * -bpf_program__attach_uprobe_opts(struct bpf_program *prog, pid_t pid, +bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, const char *binary_path, size_t func_offset, const struct bpf_uprobe_opts *opts) { @@ -9472,7 +9472,7 @@ bpf_program__attach_uprobe_opts(struct bpf_program *prog, pid_t pid, return link; } -struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog, +struct bpf_link *bpf_program__attach_uprobe(const struct bpf_program *prog, bool retprobe, pid_t pid, const char *binary_path, size_t func_offset) @@ -9532,7 +9532,7 @@ static int perf_event_open_tracepoint(const char *tp_category, return pfd; } -struct bpf_link *bpf_program__attach_tracepoint_opts(struct bpf_program *prog, +struct bpf_link *bpf_program__attach_tracepoint_opts(const struct bpf_program *prog, const char *tp_category, const char *tp_name, const struct bpf_tracepoint_opts *opts) @@ -9566,14 +9566,14 @@ struct bpf_link *bpf_program__attach_tracepoint_opts(struct bpf_program *prog, return link; } -struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog, +struct bpf_link *bpf_program__attach_tracepoint(const struct bpf_program *prog, const char *tp_category, const char *tp_name) { return bpf_program__attach_tracepoint_opts(prog, tp_category, tp_name, NULL); } -static struct bpf_link *attach_tp(struct bpf_program *prog) +static struct bpf_link *attach_tp(const struct bpf_program *prog) { char *sec_name, *tp_cat, *tp_name; struct bpf_link *link; @@ -9597,7 +9597,7 @@ static struct bpf_link *attach_tp(struct bpf_program *prog) return link; } -struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog, +struct bpf_link *bpf_program__attach_raw_tracepoint(const struct bpf_program *prog, const char *tp_name) { char errmsg[STRERR_BUFSIZE]; @@ -9627,7 +9627,7 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog, return link; } -static struct bpf_link *attach_raw_tp(struct bpf_program *prog) +static struct bpf_link *attach_raw_tp(const struct bpf_program *prog) { const char *tp_name = prog->sec_name + prog->sec_def->len; @@ -9635,7 +9635,7 @@ static struct bpf_link *attach_raw_tp(struct bpf_program *prog) } /* Common logic for all BPF program types that attach to a btf_id */ -static struct bpf_link *bpf_program__attach_btf_id(struct bpf_program *prog) +static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *prog) { char errmsg[STRERR_BUFSIZE]; struct bpf_link *link; @@ -9664,28 +9664,28 @@ static struct bpf_link *bpf_program__attach_btf_id(struct bpf_program *prog) return (struct bpf_link *)link; } -struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog) +struct bpf_link *bpf_program__attach_trace(const struct bpf_program *prog) { return bpf_program__attach_btf_id(prog); } -struct bpf_link *bpf_program__attach_lsm(struct bpf_program *prog) +struct bpf_link *bpf_program__attach_lsm(const struct bpf_program *prog) { return bpf_program__attach_btf_id(prog); } -static struct bpf_link *attach_trace(struct bpf_program *prog) +static struct bpf_link *attach_trace(const struct bpf_program *prog) { return bpf_program__attach_trace(prog); } -static struct bpf_link *attach_lsm(struct bpf_program *prog) +static struct bpf_link *attach_lsm(const struct bpf_program *prog) { return bpf_program__attach_lsm(prog); } static struct bpf_link * -bpf_program__attach_fd(struct bpf_program *prog, int target_fd, int btf_id, +bpf_program__attach_fd(const struct bpf_program *prog, int target_fd, int btf_id, const char *target_name) { DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts, @@ -9721,24 +9721,24 @@ bpf_program__attach_fd(struct bpf_program *prog, int target_fd, int btf_id, } struct bpf_link * -bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd) +bpf_program__attach_cgroup(const struct bpf_program *prog, int cgroup_fd) { return bpf_program__attach_fd(prog, cgroup_fd, 0, "cgroup"); } struct bpf_link * -bpf_program__attach_netns(struct bpf_program *prog, int netns_fd) +bpf_program__attach_netns(const struct bpf_program *prog, int netns_fd) { return bpf_program__attach_fd(prog, netns_fd, 0, "netns"); } -struct bpf_link *bpf_program__attach_xdp(struct bpf_program *prog, int ifindex) +struct bpf_link *bpf_program__attach_xdp(const struct bpf_program *prog, int ifindex) { /* target_fd/target_ifindex use the same field in LINK_CREATE */ return bpf_program__attach_fd(prog, ifindex, 0, "xdp"); } -struct bpf_link *bpf_program__attach_freplace(struct bpf_program *prog, +struct bpf_link *bpf_program__attach_freplace(const struct bpf_program *prog, int target_fd, const char *attach_func_name) { @@ -9771,7 +9771,7 @@ struct bpf_link *bpf_program__attach_freplace(struct bpf_program *prog, } struct bpf_link * -bpf_program__attach_iter(struct bpf_program *prog, +bpf_program__attach_iter(const struct bpf_program *prog, const struct bpf_iter_attach_opts *opts) { DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_create_opts); @@ -9810,12 +9810,12 @@ bpf_program__attach_iter(struct bpf_program *prog, return link; } -static struct bpf_link *attach_iter(struct bpf_program *prog) +static struct bpf_link *attach_iter(const struct bpf_program *prog) { return bpf_program__attach_iter(prog, NULL); } -struct bpf_link *bpf_program__attach(struct bpf_program *prog) +struct bpf_link *bpf_program__attach(const struct bpf_program *prog) { if (!prog->sec_def || !prog->sec_def->attach_fn) return libbpf_err_ptr(-ESRCH); @@ -9833,7 +9833,7 @@ static int bpf_link__detach_struct_ops(struct bpf_link *link) return 0; } -struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map) +struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map) { struct bpf_struct_ops *st_ops; struct bpf_link *link; @@ -10918,7 +10918,7 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s) if (!prog->sec_def || !prog->sec_def->attach_fn) continue; - *link = prog->sec_def->attach_fn(prog); + *link = bpf_program__attach(prog); err = libbpf_get_error(*link); if (err) { pr_warn("failed to auto-attach program '%s': %d\n", diff --git a/src/libbpf.h b/src/libbpf.h index 52b7ee0..c90e3d7 100644 --- a/src/libbpf.h +++ b/src/libbpf.h @@ -246,7 +246,7 @@ LIBBPF_API int bpf_link__detach(struct bpf_link *link); LIBBPF_API int bpf_link__destroy(struct bpf_link *link); LIBBPF_API struct bpf_link * -bpf_program__attach(struct bpf_program *prog); +bpf_program__attach(const struct bpf_program *prog); struct bpf_perf_event_opts { /* size of this struct, for forward/backward compatiblity */ @@ -257,10 +257,10 @@ struct bpf_perf_event_opts { #define bpf_perf_event_opts__last_field bpf_cookie LIBBPF_API struct bpf_link * -bpf_program__attach_perf_event(struct bpf_program *prog, int pfd); +bpf_program__attach_perf_event(const struct bpf_program *prog, int pfd); LIBBPF_API struct bpf_link * -bpf_program__attach_perf_event_opts(struct bpf_program *prog, int pfd, +bpf_program__attach_perf_event_opts(const struct bpf_program *prog, int pfd, const struct bpf_perf_event_opts *opts); struct bpf_kprobe_opts { @@ -277,10 +277,10 @@ struct bpf_kprobe_opts { #define bpf_kprobe_opts__last_field retprobe LIBBPF_API struct bpf_link * -bpf_program__attach_kprobe(struct bpf_program *prog, bool retprobe, +bpf_program__attach_kprobe(const struct bpf_program *prog, bool retprobe, const char *func_name); LIBBPF_API struct bpf_link * -bpf_program__attach_kprobe_opts(struct bpf_program *prog, +bpf_program__attach_kprobe_opts(const struct bpf_program *prog, const char *func_name, const struct bpf_kprobe_opts *opts); @@ -300,11 +300,11 @@ struct bpf_uprobe_opts { #define bpf_uprobe_opts__last_field retprobe LIBBPF_API struct bpf_link * -bpf_program__attach_uprobe(struct bpf_program *prog, bool retprobe, +bpf_program__attach_uprobe(const struct bpf_program *prog, bool retprobe, pid_t pid, const char *binary_path, size_t func_offset); LIBBPF_API struct bpf_link * -bpf_program__attach_uprobe_opts(struct bpf_program *prog, pid_t pid, +bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, const char *binary_path, size_t func_offset, const struct bpf_uprobe_opts *opts); @@ -317,35 +317,35 @@ struct bpf_tracepoint_opts { #define bpf_tracepoint_opts__last_field bpf_cookie LIBBPF_API struct bpf_link * -bpf_program__attach_tracepoint(struct bpf_program *prog, +bpf_program__attach_tracepoint(const struct bpf_program *prog, const char *tp_category, const char *tp_name); LIBBPF_API struct bpf_link * -bpf_program__attach_tracepoint_opts(struct bpf_program *prog, +bpf_program__attach_tracepoint_opts(const struct bpf_program *prog, const char *tp_category, const char *tp_name, const struct bpf_tracepoint_opts *opts); LIBBPF_API struct bpf_link * -bpf_program__attach_raw_tracepoint(struct bpf_program *prog, +bpf_program__attach_raw_tracepoint(const struct bpf_program *prog, const char *tp_name); LIBBPF_API struct bpf_link * -bpf_program__attach_trace(struct bpf_program *prog); +bpf_program__attach_trace(const struct bpf_program *prog); LIBBPF_API struct bpf_link * -bpf_program__attach_lsm(struct bpf_program *prog); +bpf_program__attach_lsm(const struct bpf_program *prog); LIBBPF_API struct bpf_link * -bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd); +bpf_program__attach_cgroup(const struct bpf_program *prog, int cgroup_fd); LIBBPF_API struct bpf_link * -bpf_program__attach_netns(struct bpf_program *prog, int netns_fd); +bpf_program__attach_netns(const struct bpf_program *prog, int netns_fd); LIBBPF_API struct bpf_link * -bpf_program__attach_xdp(struct bpf_program *prog, int ifindex); +bpf_program__attach_xdp(const struct bpf_program *prog, int ifindex); LIBBPF_API struct bpf_link * -bpf_program__attach_freplace(struct bpf_program *prog, +bpf_program__attach_freplace(const struct bpf_program *prog, int target_fd, const char *attach_func_name); struct bpf_map; -LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map); +LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map); struct bpf_iter_attach_opts { size_t sz; /* size of this struct for forward/backward compatibility */ @@ -355,7 +355,7 @@ struct bpf_iter_attach_opts { #define bpf_iter_attach_opts__last_field link_info_len LIBBPF_API struct bpf_link * -bpf_program__attach_iter(struct bpf_program *prog, +bpf_program__attach_iter(const struct bpf_program *prog, const struct bpf_iter_attach_opts *opts); struct bpf_insn; From 029273039d466f4629213dd281800541cf93c9d3 Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Fri, 17 Sep 2021 11:29:05 -0700 Subject: [PATCH 37/72] bpf: Add bpf_trace_vprintk helper This helper is meant to be "bpf_trace_printk, but with proper vararg support". Follow bpf_snprintf's example and take a u64 pseudo-vararg array. Write to /sys/kernel/debug/tracing/trace_pipe using the same mechanism as bpf_trace_printk. The functionality of this helper was requested in the libbpf issue tracker [0]. [0] Closes: https://github.com/libbpf/libbpf/issues/315 Signed-off-by: Dave Marchevsky Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210917182911.2426606-4-davemarchevsky@fb.com --- include/uapi/linux/bpf.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 3e9785f..98ca79a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4898,6 +4898,16 @@ union bpf_attr { * **-EINVAL** if *flags* is not zero. * * **-ENOENT** if architecture does not support branch records. + * + * long bpf_trace_vprintk(const char *fmt, u32 fmt_size, const void *data, u32 data_len) + * Description + * Behaves like **bpf_trace_printk**\ () helper, but takes an array of u64 + * to format and can handle more format args as a result. + * + * Arguments are to be used as in **bpf_seq_printf**\ () helper. + * Return + * The number of bytes written to the buffer, or a negative error + * in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5077,6 +5087,7 @@ union bpf_attr { FN(get_attach_cookie), \ FN(task_pt_regs), \ FN(get_branch_snapshot), \ + FN(trace_vprintk), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper From 18922504c3366d5738322466a53c172c025097a0 Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Fri, 17 Sep 2021 11:29:06 -0700 Subject: [PATCH 38/72] libbpf: Modify bpf_printk to choose helper based on arg count Instead of being a thin wrapper which calls into bpf_trace_printk, libbpf's bpf_printk convenience macro now chooses between bpf_trace_printk and bpf_trace_vprintk. If the arg count (excluding format string) is >3, use bpf_trace_vprintk, otherwise use the older helper. The motivation behind this added complexity - instead of migrating entirely to bpf_trace_vprintk - is to maintain good developer experience for users compiling against new libbpf but running on older kernels. Users who are passing <=3 args to bpf_printk will see no change in their bytecode. __bpf_vprintk functions similarly to BPF_SEQ_PRINTF and BPF_SNPRINTF macros elsewhere in the file - it allows use of bpf_trace_vprintk without manual conversion of varargs to u64 array. Previous implementation of bpf_printk macro is moved to __bpf_printk for use by the new implementation. This does change behavior of bpf_printk calls with >3 args in the "new libbpf, old kernels" scenario. Before this patch, attempting to use 4 args to bpf_printk results in a compile-time error. After this patch, using bpf_printk with 4 args results in a trace_vprintk helper call being emitted and a load-time failure on older kernels. Signed-off-by: Dave Marchevsky Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210917182911.2426606-5-davemarchevsky@fb.com --- src/bpf_helpers.h | 45 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 37 insertions(+), 8 deletions(-) diff --git a/src/bpf_helpers.h b/src/bpf_helpers.h index b9987c3..55a3087 100644 --- a/src/bpf_helpers.h +++ b/src/bpf_helpers.h @@ -14,14 +14,6 @@ #define __type(name, val) typeof(val) *name #define __array(name, val) typeof(val) *name[] -/* Helper macro to print out debug messages */ -#define bpf_printk(fmt, ...) \ -({ \ - char ____fmt[] = fmt; \ - bpf_trace_printk(____fmt, sizeof(____fmt), \ - ##__VA_ARGS__); \ -}) - /* * Helper macro to place programs, maps, license in * different sections in elf_bpf file. Section names @@ -224,4 +216,41 @@ enum libbpf_tristate { ___param, sizeof(___param)); \ }) +#define __bpf_printk(fmt, ...) \ +({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ +}) + +/* + * __bpf_vprintk wraps the bpf_trace_vprintk helper with variadic arguments + * instead of an array of u64. + */ +#define __bpf_vprintk(fmt, args...) \ +({ \ + static const char ___fmt[] = fmt; \ + unsigned long long ___param[___bpf_narg(args)]; \ + \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ + ___bpf_fill(___param, args); \ + _Pragma("GCC diagnostic pop") \ + \ + bpf_trace_vprintk(___fmt, sizeof(___fmt), \ + ___param, sizeof(___param)); \ +}) + +/* Use __bpf_printk when bpf_printk call has 3 or fewer fmt args + * Otherwise use __bpf_vprintk + */ +#define ___bpf_pick_printk(...) \ + ___bpf_nth(_, ##__VA_ARGS__, __bpf_vprintk, __bpf_vprintk, __bpf_vprintk, \ + __bpf_vprintk, __bpf_vprintk, __bpf_vprintk, __bpf_vprintk, \ + __bpf_vprintk, __bpf_vprintk, __bpf_printk /*3*/, __bpf_printk /*2*/,\ + __bpf_printk /*1*/, __bpf_printk /*0*/) + +/* Helper macro to print out debug messages */ +#define bpf_printk(fmt, args...) ___bpf_pick_printk(args)(fmt, ##args) + #endif From 989d7189cdcf43480c7d1f15a13ad13333e0877d Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Fri, 17 Sep 2021 11:29:07 -0700 Subject: [PATCH 39/72] libbpf: Use static const fmt string in __bpf_printk The __bpf_printk convenience macro was using a 'char' fmt string holder as it predates support for globals in libbpf. Move to more efficient 'static const char', but provide a fallback to the old way via BPF_NO_GLOBAL_DATA so users on old kernels can still use the macro. Signed-off-by: Dave Marchevsky Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210917182911.2426606-6-davemarchevsky@fb.com --- src/bpf_helpers.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/bpf_helpers.h b/src/bpf_helpers.h index 55a3087..963b106 100644 --- a/src/bpf_helpers.h +++ b/src/bpf_helpers.h @@ -216,9 +216,15 @@ enum libbpf_tristate { ___param, sizeof(___param)); \ }) +#ifdef BPF_NO_GLOBAL_DATA +#define BPF_PRINTK_FMT_MOD +#else +#define BPF_PRINTK_FMT_MOD static const +#endif + #define __bpf_printk(fmt, ...) \ ({ \ - char ____fmt[] = fmt; \ + BPF_PRINTK_FMT_MOD char ____fmt[] = fmt; \ bpf_trace_printk(____fmt, sizeof(____fmt), \ ##__VA_ARGS__); \ }) From e1966114cc7dfb607452d970624a5be314a5a822 Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Fri, 17 Sep 2021 11:29:11 -0700 Subject: [PATCH 40/72] bpf: Clarify data_len param in bpf_snprintf and bpf_seq_printf comments Since the data_len in these two functions is a byte len of the preceding u64 *data array, it must always be a multiple of 8. If this isn't the case both helpers error out, so let's make the requirement explicit so users don't need to infer it. Signed-off-by: Dave Marchevsky Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210917182911.2426606-10-davemarchevsky@fb.com --- include/uapi/linux/bpf.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 98ca79a..6fc59d6 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4046,7 +4046,7 @@ union bpf_attr { * arguments. The *data* are a **u64** array and corresponding format string * values are stored in the array. For strings and pointers where pointees * are accessed, only the pointer values are stored in the *data* array. - * The *data_len* is the size of *data* in bytes. + * The *data_len* is the size of *data* in bytes - must be a multiple of 8. * * Formats **%s**, **%p{i,I}{4,6}** requires to read kernel memory. * Reading kernel memory may fail due to either invalid address or @@ -4751,7 +4751,8 @@ union bpf_attr { * Each format specifier in **fmt** corresponds to one u64 element * in the **data** array. For strings and pointers where pointees * are accessed, only the pointer values are stored in the *data* - * array. The *data_len* is the size of *data* in bytes. + * array. The *data_len* is the size of *data* in bytes - must be + * a multiple of 8. * * Formats **%s** and **%p{i,I}{4,6}** require to read kernel * memory. Reading kernel memory may fail due to either invalid From cdf14ff22b5b7e0f46ee3870ce9a1acc029d8144 Mon Sep 17 00:00:00 2001 From: Grant Seltzer Date: Fri, 17 Sep 2021 23:14:58 -0400 Subject: [PATCH 41/72] libbpf: Add doc comments in libbpf.h This adds comments above functions in libbpf.h which document their uses. These comments are of a format that doxygen and sphinx can pick up and render. These are rendered by libbpf.readthedocs.org These doc comments are for: - bpf_object__find_map_by_name() - bpf_map__fd() - bpf_map__is_internal() - libbpf_get_error() - libbpf_num_possible_cpus() Signed-off-by: Grant Seltzer Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210918031457.36204-1-grantseltzer@gmail.com --- src/libbpf.h | 65 +++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 57 insertions(+), 8 deletions(-) diff --git a/src/libbpf.h b/src/libbpf.h index c90e3d7..d0bedd6 100644 --- a/src/libbpf.h +++ b/src/libbpf.h @@ -481,9 +481,13 @@ struct bpf_map_def { unsigned int map_flags; }; -/* - * The 'struct bpf_map' in include/linux/bpf.h is internal to the kernel, - * so no need to worry about a name clash. +/** + * @brief **bpf_object__find_map_by_name()** returns BPF map of + * the given name, if it exists within the passed BPF object + * @param obj BPF object + * @param name name of the BPF map + * @return BPF map instance, if such map exists within the BPF object; + * or NULL otherwise. */ LIBBPF_API struct bpf_map * bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name); @@ -509,7 +513,12 @@ bpf_map__next(const struct bpf_map *map, const struct bpf_object *obj); LIBBPF_API struct bpf_map * bpf_map__prev(const struct bpf_map *map, const struct bpf_object *obj); -/* get/set map FD */ +/** + * @brief **bpf_map__fd()** gets the file descriptor of the passed + * BPF map + * @param map the BPF map instance + * @return the file descriptor; or -EINVAL in case of an error + */ LIBBPF_API int bpf_map__fd(const struct bpf_map *map); LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd); /* get map definition */ @@ -550,6 +559,14 @@ LIBBPF_API int bpf_map__set_initial_value(struct bpf_map *map, const void *data, size_t size); LIBBPF_API const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize); LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map); + +/** + * @brief **bpf_map__is_internal()** tells the caller whether or not the + * passed map is a special map created by libbpf automatically for things like + * global variables, __ksym externs, Kconfig values, etc + * @param map the bpf_map + * @return true, if the map is an internal map; false, otherwise + */ LIBBPF_API bool bpf_map__is_internal(const struct bpf_map *map); LIBBPF_API int bpf_map__set_pin_path(struct bpf_map *map, const char *path); LIBBPF_API const char *bpf_map__get_pin_path(const struct bpf_map *map); @@ -561,6 +578,38 @@ LIBBPF_API int bpf_map__unpin(struct bpf_map *map, const char *path); LIBBPF_API int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd); LIBBPF_API struct bpf_map *bpf_map__inner_map(struct bpf_map *map); +/** + * @brief **libbpf_get_error()** extracts the error code from the passed + * pointer + * @param ptr pointer returned from libbpf API function + * @return error code; or 0 if no error occured + * + * Many libbpf API functions which return pointers have logic to encode error + * codes as pointers, and do not return NULL. Meaning **libbpf_get_error()** + * should be used on the return value from these functions immediately after + * calling the API function, with no intervening calls that could clobber the + * `errno` variable. Consult the individual functions documentation to verify + * if this logic applies should be used. + * + * For these API functions, if `libbpf_set_strict_mode(LIBBPF_STRICT_CLEAN_PTRS)` + * is enabled, NULL is returned on error instead. + * + * If ptr is NULL, then errno should be already set by the failing + * API, because libbpf never returns NULL on success and it now always + * sets errno on error. + * + * Example usage: + * + * struct perf_buffer *pb; + * + * pb = perf_buffer__new(bpf_map__fd(obj->maps.events), PERF_BUFFER_PAGES, &opts); + * err = libbpf_get_error(pb); + * if (err) { + * pb = NULL; + * fprintf(stderr, "failed to open perf buffer: %d\n", err); + * goto cleanup; + * } + */ LIBBPF_API long libbpf_get_error(const void *ptr); struct bpf_prog_load_attr { @@ -825,9 +874,10 @@ bpf_program__bpil_addr_to_offs(struct bpf_prog_info_linear *info_linear); LIBBPF_API void bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear); -/* - * A helper function to get the number of possible CPUs before looking up - * per-CPU maps. Negative errno is returned on failure. +/** + * @brief **libbpf_num_possible_cpus()** is a helper function to get the + * number of possible CPUs that the host kernel supports and expects. + * @return number of possible CPUs; or error code on failure * * Example usage: * @@ -837,7 +887,6 @@ bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear); * } * long values[ncpus]; * bpf_map_lookup_elem(per_cpu_map_fd, key, values); - * */ LIBBPF_API int libbpf_num_possible_cpus(void); From 246b61780b052c2ad61eab196147698c525cb0b2 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 21 Sep 2021 14:00:33 -0700 Subject: [PATCH 42/72] libbpf: Fix memory leak in legacy kprobe attach logic In some error scenarios legacy_probe string won't be free()'d. Fix this. This was reported by Coverity static analysis. Fixes: ca304b40c20d ("libbpf: Introduce legacy kprobe events support") Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210921210036.1545557-2-andrii@kernel.org --- src/libbpf.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/libbpf.c b/src/libbpf.c index da65a16..6d2f12d 100644 --- a/src/libbpf.c +++ b/src/libbpf.c @@ -9365,10 +9365,11 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog, offset, -1 /* pid */); } if (pfd < 0) { + err = pfd; pr_warn("prog '%s': failed to create %s '%s' perf event: %s\n", prog->name, retprobe ? "kretprobe" : "kprobe", func_name, - libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); - return libbpf_err_ptr(pfd); + libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + goto err_out; } link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts); err = libbpf_get_error(link); @@ -9377,7 +9378,7 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog, pr_warn("prog '%s': failed to attach to %s '%s': %s\n", prog->name, retprobe ? "kretprobe" : "kprobe", func_name, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); - return libbpf_err_ptr(err); + goto err_out; } if (legacy) { struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link); @@ -9387,6 +9388,9 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog, } return link; +err_out: + free(legacy_probe); + return libbpf_err_ptr(err); } struct bpf_link *bpf_program__attach_kprobe(const struct bpf_program *prog, From 9ecc0dcc190ab04523e727aaa6e470aa143aec5b Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 21 Sep 2021 14:00:35 -0700 Subject: [PATCH 43/72] libbpf: Refactor and simplify legacy kprobe code Refactor legacy kprobe handling code to follow the same logic as uprobe legacy logic added in the next patchs: - add append_to_file() helper that makes it simpler to work with tracefs file-based interface for creating and deleting probes; - move out probe/event name generation outside of the code that adds/removes it, which simplifies bookkeeping significantly; - change the probe name format to start with "libbpf_" prefix and include offset within kernel function; - switch 'unsigned long' to 'size_t' for specifying kprobe offsets, which is consistent with how uprobes define that, simplifies printf()-ing internally, and also avoids unnecessary complications on architectures where sizeof(long) != sizeof(void *). This patch also implicitly fixes the problem with invalid open() error handling present in poke_kprobe_events(), which (the function) this patch removes. Fixes: ca304b40c20d ("libbpf: Introduce legacy kprobe events support") Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210921210036.1545557-4-andrii@kernel.org --- src/libbpf.c | 159 ++++++++++++++++++++++++++++----------------------- src/libbpf.h | 2 +- 2 files changed, 88 insertions(+), 73 deletions(-) diff --git a/src/libbpf.c b/src/libbpf.c index 6d2f12d..aa842f0 100644 --- a/src/libbpf.c +++ b/src/libbpf.c @@ -9011,59 +9011,17 @@ int bpf_link__unpin(struct bpf_link *link) return 0; } -static int poke_kprobe_events(bool add, const char *name, bool retprobe, uint64_t offset) -{ - int fd, ret = 0; - pid_t p = getpid(); - char cmd[260], probename[128], probefunc[128]; - const char *file = "/sys/kernel/debug/tracing/kprobe_events"; - - if (retprobe) - snprintf(probename, sizeof(probename), "kretprobes/%s_libbpf_%u", name, p); - else - snprintf(probename, sizeof(probename), "kprobes/%s_libbpf_%u", name, p); - - if (offset) - snprintf(probefunc, sizeof(probefunc), "%s+%zu", name, (size_t)offset); - - if (add) { - snprintf(cmd, sizeof(cmd), "%c:%s %s", - retprobe ? 'r' : 'p', - probename, - offset ? probefunc : name); - } else { - snprintf(cmd, sizeof(cmd), "-:%s", probename); - } - - fd = open(file, O_WRONLY | O_APPEND, 0); - if (!fd) - return -errno; - ret = write(fd, cmd, strlen(cmd)); - if (ret < 0) - ret = -errno; - close(fd); - - return ret; -} - -static inline int add_kprobe_event_legacy(const char *name, bool retprobe, uint64_t offset) -{ - return poke_kprobe_events(true, name, retprobe, offset); -} - -static inline int remove_kprobe_event_legacy(const char *name, bool retprobe) -{ - return poke_kprobe_events(false, name, retprobe, 0); -} - struct bpf_link_perf { struct bpf_link link; int perf_event_fd; /* legacy kprobe support: keep track of probe identifier and type */ char *legacy_probe_name; + bool legacy_is_kprobe; bool legacy_is_retprobe; }; +static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe); + static int bpf_link_perf_detach(struct bpf_link *link) { struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link); @@ -9077,9 +9035,12 @@ static int bpf_link_perf_detach(struct bpf_link *link) close(link->fd); /* legacy kprobe needs to be removed after perf event fd closure */ - if (perf_link->legacy_probe_name) - err = remove_kprobe_event_legacy(perf_link->legacy_probe_name, - perf_link->legacy_is_retprobe); + if (perf_link->legacy_probe_name) { + if (perf_link->legacy_is_kprobe) { + err = remove_kprobe_event_legacy(perf_link->legacy_probe_name, + perf_link->legacy_is_retprobe); + } + } return err; } @@ -9202,18 +9163,6 @@ static int parse_uint_from_file(const char *file, const char *fmt) return ret; } -static int determine_kprobe_perf_type_legacy(const char *func_name, bool is_retprobe) -{ - char file[192]; - - snprintf(file, sizeof(file), - "/sys/kernel/debug/tracing/events/%s/%s_libbpf_%d/id", - is_retprobe ? "kretprobes" : "kprobes", - func_name, getpid()); - - return parse_uint_from_file(file, "%d\n"); -} - static int determine_kprobe_perf_type(void) { const char *file = "/sys/bus/event_source/devices/kprobe/type"; @@ -9296,21 +9245,79 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name, return pfd; } -static int perf_event_kprobe_open_legacy(bool retprobe, const char *name, uint64_t offset, int pid) +static int append_to_file(const char *file, const char *fmt, ...) +{ + int fd, n, err = 0; + va_list ap; + + fd = open(file, O_WRONLY | O_APPEND, 0); + if (fd < 0) + return -errno; + + va_start(ap, fmt); + n = vdprintf(fd, fmt, ap); + va_end(ap); + + if (n < 0) + err = -errno; + + close(fd); + return err; +} + +static void gen_kprobe_legacy_event_name(char *buf, size_t buf_sz, + const char *kfunc_name, size_t offset) +{ + snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx", getpid(), kfunc_name, offset); +} + +static int add_kprobe_event_legacy(const char *probe_name, bool retprobe, + const char *kfunc_name, size_t offset) +{ + const char *file = "/sys/kernel/debug/tracing/kprobe_events"; + + return append_to_file(file, "%c:%s/%s %s+0x%zx", + retprobe ? 'r' : 'p', + retprobe ? "kretprobes" : "kprobes", + probe_name, kfunc_name, offset); +} + +static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe) +{ + const char *file = "/sys/kernel/debug/tracing/kprobe_events"; + + return append_to_file(file, "-:%s/%s", retprobe ? "kretprobes" : "kprobes", probe_name); +} + +static int determine_kprobe_perf_type_legacy(const char *probe_name, bool retprobe) +{ + char file[256]; + + snprintf(file, sizeof(file), + "/sys/kernel/debug/tracing/events/%s/%s/id", + retprobe ? "kretprobes" : "kprobes", probe_name); + + return parse_uint_from_file(file, "%d\n"); +} + +static int perf_event_kprobe_open_legacy(const char *probe_name, bool retprobe, + const char *kfunc_name, size_t offset, int pid) { struct perf_event_attr attr = {}; char errmsg[STRERR_BUFSIZE]; int type, pfd, err; - err = add_kprobe_event_legacy(name, retprobe, offset); + err = add_kprobe_event_legacy(probe_name, retprobe, kfunc_name, offset); if (err < 0) { - pr_warn("failed to add legacy kprobe event: %s\n", + pr_warn("failed to add legacy kprobe event for '%s+0x%zx': %s\n", + kfunc_name, offset, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); return err; } - type = determine_kprobe_perf_type_legacy(name, retprobe); + type = determine_kprobe_perf_type_legacy(probe_name, retprobe); if (type < 0) { - pr_warn("failed to determine legacy kprobe event id: %s\n", + pr_warn("failed to determine legacy kprobe event id for '%s+0x%zx': %s\n", + kfunc_name, offset, libbpf_strerror_r(type, errmsg, sizeof(errmsg))); return type; } @@ -9340,7 +9347,7 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog, char errmsg[STRERR_BUFSIZE]; char *legacy_probe = NULL; struct bpf_link *link; - unsigned long offset; + size_t offset; bool retprobe, legacy; int pfd, err; @@ -9357,17 +9364,23 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog, func_name, offset, -1 /* pid */, 0 /* ref_ctr_off */); } else { + char probe_name[256]; + + gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), + func_name, offset); + legacy_probe = strdup(func_name); if (!legacy_probe) return libbpf_err_ptr(-ENOMEM); - pfd = perf_event_kprobe_open_legacy(retprobe, func_name, + pfd = perf_event_kprobe_open_legacy(legacy_probe, retprobe, func_name, offset, -1 /* pid */); } if (pfd < 0) { - err = pfd; - pr_warn("prog '%s': failed to create %s '%s' perf event: %s\n", - prog->name, retprobe ? "kretprobe" : "kprobe", func_name, + err = -errno; + pr_warn("prog '%s': failed to create %s '%s+0x%zx' perf event: %s\n", + prog->name, retprobe ? "kretprobe" : "kprobe", + func_name, offset, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); goto err_out; } @@ -9375,8 +9388,9 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog, err = libbpf_get_error(link); if (err) { close(pfd); - pr_warn("prog '%s': failed to attach to %s '%s': %s\n", - prog->name, retprobe ? "kretprobe" : "kprobe", func_name, + pr_warn("prog '%s': failed to attach to %s '%s+0x%zx': %s\n", + prog->name, retprobe ? "kretprobe" : "kprobe", + func_name, offset, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); goto err_out; } @@ -9384,6 +9398,7 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog, struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link); perf_link->legacy_probe_name = legacy_probe; + perf_link->legacy_is_kprobe = true; perf_link->legacy_is_retprobe = retprobe; } diff --git a/src/libbpf.h b/src/libbpf.h index d0bedd6..e35490c 100644 --- a/src/libbpf.h +++ b/src/libbpf.h @@ -269,7 +269,7 @@ struct bpf_kprobe_opts { /* custom user-provided value fetchable through bpf_get_attach_cookie() */ __u64 bpf_cookie; /* function's offset to install kprobe to */ - unsigned long offset; + size_t offset; /* kprobe is return probe */ bool retprobe; size_t :0; From 09b528e8472ed22c7c5246e8631c66411c38043f Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 21 Sep 2021 14:00:36 -0700 Subject: [PATCH 44/72] libbpf: Add legacy uprobe attaching support Similarly to recently added legacy kprobe attach interface support through tracefs, support attaching uprobes using the legacy interface if host kernel doesn't support newer FD-based interface. For uprobes event name consists of "libbpf_" prefix, PID, sanitized binary path and offset within that binary. Structuraly the code is aligned with kprobe logic refactoring in previous patch. struct bpf_link_perf is re-used and all the same legacy_probe_name and legacy_is_retprobe fields are used to ensure proper cleanup on bpf_link__destroy(). Users should be aware, though, that on old kernels which don't support FD-based interface for kprobe/uprobe attachment, if the application crashes before bpf_link__destroy() is called, uprobe legacy events will be left in tracefs. This is the same limitation as with legacy kprobe interfaces. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210921210036.1545557-5-andrii@kernel.org --- src/libbpf.c | 130 +++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 122 insertions(+), 8 deletions(-) diff --git a/src/libbpf.c b/src/libbpf.c index aa842f0..ef5db34 100644 --- a/src/libbpf.c +++ b/src/libbpf.c @@ -9021,6 +9021,7 @@ struct bpf_link_perf { }; static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe); +static int remove_uprobe_event_legacy(const char *probe_name, bool retprobe); static int bpf_link_perf_detach(struct bpf_link *link) { @@ -9034,11 +9035,14 @@ static int bpf_link_perf_detach(struct bpf_link *link) close(perf_link->perf_event_fd); close(link->fd); - /* legacy kprobe needs to be removed after perf event fd closure */ + /* legacy uprobe/kprobe needs to be removed after perf event fd closure */ if (perf_link->legacy_probe_name) { if (perf_link->legacy_is_kprobe) { err = remove_kprobe_event_legacy(perf_link->legacy_probe_name, perf_link->legacy_is_retprobe); + } else { + err = remove_uprobe_event_legacy(perf_link->legacy_probe_name, + perf_link->legacy_is_retprobe); } } @@ -9450,17 +9454,96 @@ static struct bpf_link *attach_kprobe(const struct bpf_program *prog) return link; } +static void gen_uprobe_legacy_event_name(char *buf, size_t buf_sz, + const char *binary_path, uint64_t offset) +{ + int i; + + snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx", getpid(), binary_path, (size_t)offset); + + /* sanitize binary_path in the probe name */ + for (i = 0; buf[i]; i++) { + if (!isalnum(buf[i])) + buf[i] = '_'; + } +} + +static inline int add_uprobe_event_legacy(const char *probe_name, bool retprobe, + const char *binary_path, size_t offset) +{ + const char *file = "/sys/kernel/debug/tracing/uprobe_events"; + + return append_to_file(file, "%c:%s/%s %s:0x%zx", + retprobe ? 'r' : 'p', + retprobe ? "uretprobes" : "uprobes", + probe_name, binary_path, offset); +} + +static inline int remove_uprobe_event_legacy(const char *probe_name, bool retprobe) +{ + const char *file = "/sys/kernel/debug/tracing/uprobe_events"; + + return append_to_file(file, "-:%s/%s", retprobe ? "uretprobes" : "uprobes", probe_name); +} + +static int determine_uprobe_perf_type_legacy(const char *probe_name, bool retprobe) +{ + char file[512]; + + snprintf(file, sizeof(file), + "/sys/kernel/debug/tracing/events/%s/%s/id", + retprobe ? "uretprobes" : "uprobes", probe_name); + + return parse_uint_from_file(file, "%d\n"); +} + +static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe, + const char *binary_path, size_t offset, int pid) +{ + struct perf_event_attr attr; + int type, pfd, err; + + err = add_uprobe_event_legacy(probe_name, retprobe, binary_path, offset); + if (err < 0) { + pr_warn("failed to add legacy uprobe event for %s:0x%zx: %d\n", + binary_path, (size_t)offset, err); + return err; + } + type = determine_uprobe_perf_type_legacy(probe_name, retprobe); + if (type < 0) { + pr_warn("failed to determine legacy uprobe event id for %s:0x%zx: %d\n", + binary_path, offset, err); + return type; + } + + memset(&attr, 0, sizeof(attr)); + attr.size = sizeof(attr); + attr.config = type; + attr.type = PERF_TYPE_TRACEPOINT; + + pfd = syscall(__NR_perf_event_open, &attr, + pid < 0 ? -1 : pid, /* pid */ + pid == -1 ? 0 : -1, /* cpu */ + -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC); + if (pfd < 0) { + err = -errno; + pr_warn("legacy uprobe perf_event_open() failed: %d\n", err); + return err; + } + return pfd; +} + LIBBPF_API struct bpf_link * bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, const char *binary_path, size_t func_offset, const struct bpf_uprobe_opts *opts) { DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts); - char errmsg[STRERR_BUFSIZE]; + char errmsg[STRERR_BUFSIZE], *legacy_probe = NULL; struct bpf_link *link; size_t ref_ctr_off; int pfd, err; - bool retprobe; + bool retprobe, legacy; if (!OPTS_VALID(opts, bpf_uprobe_opts)) return libbpf_err_ptr(-EINVAL); @@ -9469,15 +9552,35 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, ref_ctr_off = OPTS_GET(opts, ref_ctr_offset, 0); pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); - pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path, - func_offset, pid, ref_ctr_off); + legacy = determine_uprobe_perf_type() < 0; + if (!legacy) { + pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path, + func_offset, pid, ref_ctr_off); + } else { + char probe_name[512]; + + if (ref_ctr_off) + return libbpf_err_ptr(-EINVAL); + + gen_uprobe_legacy_event_name(probe_name, sizeof(probe_name), + binary_path, func_offset); + + legacy_probe = strdup(probe_name); + if (!legacy_probe) + return libbpf_err_ptr(-ENOMEM); + + pfd = perf_event_uprobe_open_legacy(legacy_probe, retprobe, + binary_path, func_offset, pid); + } if (pfd < 0) { + err = -errno; pr_warn("prog '%s': failed to create %s '%s:0x%zx' perf event: %s\n", prog->name, retprobe ? "uretprobe" : "uprobe", binary_path, func_offset, - libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); - return libbpf_err_ptr(pfd); + libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + goto err_out; } + link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts); err = libbpf_get_error(link); if (err) { @@ -9486,9 +9589,20 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, prog->name, retprobe ? "uretprobe" : "uprobe", binary_path, func_offset, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); - return libbpf_err_ptr(err); + goto err_out; + } + if (legacy) { + struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link); + + perf_link->legacy_probe_name = legacy_probe; + perf_link->legacy_is_kprobe = false; + perf_link->legacy_is_retprobe = retprobe; } return link; +err_out: + free(legacy_probe); + return libbpf_err_ptr(err); + } struct bpf_link *bpf_program__attach_uprobe(const struct bpf_program *prog, From cb12e83136646e8ba698df72d5e341a82eeabee2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Mon, 27 Sep 2021 22:58:10 +0200 Subject: [PATCH 45/72] libbpf: Ignore STT_SECTION symbols in 'maps' section MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When parsing legacy map definitions, libbpf would error out when encountering an STT_SECTION symbol. This becomes a problem because some versions of binutils will produce SECTION symbols for every section when processing an ELF file, so BPF files run through 'strip' will end up with such symbols, making libbpf refuse to load them. There's not really any reason why erroring out is strictly necessary, so change libbpf to just ignore SECTION symbols when parsing the ELF. Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210927205810.715656-1-toke@redhat.com --- src/libbpf.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/libbpf.c b/src/libbpf.c index ef5db34..453148f 100644 --- a/src/libbpf.c +++ b/src/libbpf.c @@ -1869,6 +1869,8 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict) continue; if (sym.st_shndx != obj->efile.maps_shndx) continue; + if (GELF_ST_TYPE(sym.st_info) == STT_SECTION) + continue; map = bpf_object__add_map(obj); if (IS_ERR(map)) @@ -1881,8 +1883,7 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict) return -LIBBPF_ERRNO__FORMAT; } - if (GELF_ST_TYPE(sym.st_info) == STT_SECTION - || GELF_ST_BIND(sym.st_info) == STB_LOCAL) { + if (GELF_ST_BIND(sym.st_info) == STB_LOCAL) { pr_warn("map '%s' (legacy): static maps are not supported\n", map_name); return -ENOTSUP; } From bb833f81295fa6b380695605dd7d8197ce2b1941 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 28 Sep 2021 09:19:37 -0700 Subject: [PATCH 46/72] libbpf: Add "tc" SEC_DEF which is a better name for "classifier" As argued in [0], add "tc" ELF section definition for SCHED_CLS BPF program type. "classifier" is a misleading terminology and should be migrated away from. [0] https://lore.kernel.org/bpf/270e27b1-e5be-5b1c-b343-51bd644d0747@iogearbox.net/ Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210928161946.2512801-2-andrii@kernel.org --- src/libbpf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/libbpf.c b/src/libbpf.c index 453148f..0bcd0a4 100644 --- a/src/libbpf.c +++ b/src/libbpf.c @@ -7968,6 +7968,7 @@ static const struct bpf_sec_def section_defs[] = { .attach_fn = attach_kprobe), BPF_PROG_SEC("uretprobe/", BPF_PROG_TYPE_KPROBE), BPF_PROG_SEC("classifier", BPF_PROG_TYPE_SCHED_CLS), + BPF_PROG_SEC("tc", BPF_PROG_TYPE_SCHED_CLS), BPF_PROG_SEC("action", BPF_PROG_TYPE_SCHED_ACT), SEC_DEF("tracepoint/", TRACEPOINT, .attach_fn = attach_tp), From fba5f024019b58dc914dd539cf72ef228a994cf4 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 28 Sep 2021 09:19:41 -0700 Subject: [PATCH 47/72] libbpf: Refactor internal sec_def handling to enable pluggability Refactor internals of libbpf to allow adding custom SEC() handling logic easily from outside of libbpf. To that effect, each SEC()-handling registration sets mandatory program type/expected attach type for a given prefix and can provide three callbacks called at different points of BPF program lifetime: - init callback for right after bpf_program is initialized and prog_type/expected_attach_type is set. This happens during bpf_object__open() step, close to the very end of constructing bpf_object, so all the libbpf APIs for querying and updating bpf_program properties should be available; - pre-load callback is called right before BPF_PROG_LOAD command is called in the kernel. This callbacks has ability to set both bpf_program properties, as well as program load attributes, overriding and augmenting the standard libbpf handling of them; - optional auto-attach callback, which makes a given SEC() handler support auto-attachment of a BPF program through bpf_program__attach() API and/or BPF skeletons __attach() method. Each callbacks gets a `long cookie` parameter passed in, which is specified during SEC() handling. This can be used by callbacks to lookup whatever additional information is necessary. This is not yet completely ready to be exposed to the outside world, mainly due to non-public nature of struct bpf_prog_load_params. Instead of making it part of public API, we'll wait until the planned low-level libbpf API improvements for BPF_PROG_LOAD and other typical bpf() syscall APIs, at which point we'll have a public, probably OPTS-based, way to fully specify BPF program load parameters, which will be used as an interface for custom pre-load callbacks. But this change itself is already a good first step to unify the BPF program hanling logic even within the libbpf itself. As one example, all the extra per-program type handling (sleepable bit, attach_btf_id resolution, unsetting optional expected attach type) is now more obvious and is gathered in one place. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Dave Marchevsky Link: https://lore.kernel.org/bpf/20210928161946.2512801-6-andrii@kernel.org --- src/libbpf.c | 129 ++++++++++++++++++++++++++++++++++----------------- 1 file changed, 87 insertions(+), 42 deletions(-) diff --git a/src/libbpf.c b/src/libbpf.c index 0bcd0a4..d4d5653 100644 --- a/src/libbpf.c +++ b/src/libbpf.c @@ -220,7 +220,9 @@ struct reloc_desc { struct bpf_sec_def; -typedef struct bpf_link *(*attach_fn_t)(const struct bpf_program *prog); +typedef int (*init_fn_t)(struct bpf_program *prog, long cookie); +typedef int (*preload_fn_t)(struct bpf_program *prog, struct bpf_prog_load_params *attr, long cookie); +typedef struct bpf_link *(*attach_fn_t)(const struct bpf_program *prog, long cookie); struct bpf_sec_def { const char *sec; @@ -231,7 +233,11 @@ struct bpf_sec_def { bool is_attachable; bool is_attach_btf; bool is_sleepable; + + init_fn_t init_fn; + preload_fn_t preload_fn; attach_fn_t attach_fn; + long cookie; }; /* @@ -6095,6 +6101,44 @@ static int bpf_object__sanitize_prog(struct bpf_object *obj, struct bpf_program return 0; } +static int libbpf_find_attach_btf_id(struct bpf_program *prog, int *btf_obj_fd, int *btf_type_id); + +/* this is called as prog->sec_def->preload_fn for libbpf-supported sec_defs */ +static int libbpf_preload_prog(struct bpf_program *prog, + struct bpf_prog_load_params *attr, long cookie) +{ + /* old kernels might not support specifying expected_attach_type */ + if (prog->sec_def->is_exp_attach_type_optional && + !kernel_supports(prog->obj, FEAT_EXP_ATTACH_TYPE)) + attr->expected_attach_type = 0; + + if (prog->sec_def->is_sleepable) + attr->prog_flags |= BPF_F_SLEEPABLE; + + if ((prog->type == BPF_PROG_TYPE_TRACING || + prog->type == BPF_PROG_TYPE_LSM || + prog->type == BPF_PROG_TYPE_EXT) && !prog->attach_btf_id) { + int btf_obj_fd = 0, btf_type_id = 0, err; + + err = libbpf_find_attach_btf_id(prog, &btf_obj_fd, &btf_type_id); + if (err) + return err; + + /* cache resolved BTF FD and BTF type ID in the prog */ + prog->attach_btf_obj_fd = btf_obj_fd; + prog->attach_btf_id = btf_type_id; + + /* but by now libbpf common logic is not utilizing + * prog->atach_btf_obj_fd/prog->attach_btf_id anymore because + * this callback is called after attrs were populated by + * libbpf, so this callback has to update attr explicitly here + */ + attr->attach_btf_obj_fd = btf_obj_fd; + attr->attach_btf_id = btf_type_id; + } + return 0; +} + static int load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, char *license, __u32 kern_version, int *pfd) @@ -6103,7 +6147,7 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, char *cp, errmsg[STRERR_BUFSIZE]; size_t log_buf_size = 0; char *log_buf = NULL; - int btf_fd, ret; + int btf_fd, ret, err; if (prog->type == BPF_PROG_TYPE_UNSPEC) { /* @@ -6119,22 +6163,15 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, return -EINVAL; load_attr.prog_type = prog->type; - /* old kernels might not support specifying expected_attach_type */ - if (!kernel_supports(prog->obj, FEAT_EXP_ATTACH_TYPE) && prog->sec_def && - prog->sec_def->is_exp_attach_type_optional) - load_attr.expected_attach_type = 0; - else - load_attr.expected_attach_type = prog->expected_attach_type; + load_attr.expected_attach_type = prog->expected_attach_type; if (kernel_supports(prog->obj, FEAT_PROG_NAME)) load_attr.name = prog->name; load_attr.insns = insns; load_attr.insn_cnt = insns_cnt; load_attr.license = license; load_attr.attach_btf_id = prog->attach_btf_id; - if (prog->attach_prog_fd) - load_attr.attach_prog_fd = prog->attach_prog_fd; - else - load_attr.attach_btf_obj_fd = prog->attach_btf_obj_fd; + load_attr.attach_prog_fd = prog->attach_prog_fd; + load_attr.attach_btf_obj_fd = prog->attach_btf_obj_fd; load_attr.attach_btf_id = prog->attach_btf_id; load_attr.kern_version = kern_version; load_attr.prog_ifindex = prog->prog_ifindex; @@ -6153,6 +6190,16 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, load_attr.log_level = prog->log_level; load_attr.prog_flags = prog->prog_flags; + /* adjust load_attr if sec_def provides custom preload callback */ + if (prog->sec_def && prog->sec_def->preload_fn) { + err = prog->sec_def->preload_fn(prog, &load_attr, prog->sec_def->cookie); + if (err < 0) { + pr_warn("prog '%s': failed to prepare load attributes: %d\n", + prog->name, err); + return err; + } + } + if (prog->obj->gen_loader) { bpf_gen__prog_load(prog->obj->gen_loader, &load_attr, prog - prog->obj->programs); @@ -6268,8 +6315,6 @@ static int bpf_program__record_externs(struct bpf_program *prog) return 0; } -static int libbpf_find_attach_btf_id(struct bpf_program *prog, int *btf_obj_fd, int *btf_type_id); - int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver) { int err = 0, fd, i; @@ -6279,19 +6324,6 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver) return libbpf_err(-EINVAL); } - if ((prog->type == BPF_PROG_TYPE_TRACING || - prog->type == BPF_PROG_TYPE_LSM || - prog->type == BPF_PROG_TYPE_EXT) && !prog->attach_btf_id) { - int btf_obj_fd = 0, btf_type_id = 0; - - err = libbpf_find_attach_btf_id(prog, &btf_obj_fd, &btf_type_id); - if (err) - return libbpf_err(err); - - prog->attach_btf_obj_fd = btf_obj_fd; - prog->attach_btf_id = btf_type_id; - } - if (prog->instances.nr < 0 || !prog->instances.fds) { if (prog->preprocessor) { pr_warn("Internal error: can't load program '%s'\n", @@ -6401,6 +6433,7 @@ static const struct bpf_sec_def *find_sec_def(const char *sec_name); static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object_open_opts *opts) { struct bpf_program *prog; + int err; bpf_object__for_each_program(prog, obj) { prog->sec_def = find_sec_def(prog->sec_name); @@ -6411,8 +6444,6 @@ static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object continue; } - if (prog->sec_def->is_sleepable) - prog->prog_flags |= BPF_F_SLEEPABLE; bpf_program__set_type(prog, prog->sec_def->prog_type); bpf_program__set_expected_attach_type(prog, prog->sec_def->expected_attach_type); @@ -6422,6 +6453,18 @@ static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object prog->sec_def->prog_type == BPF_PROG_TYPE_EXT) prog->attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0); #pragma GCC diagnostic pop + + /* sec_def can have custom callback which should be called + * after bpf_program is initialized to adjust its properties + */ + if (prog->sec_def->init_fn) { + err = prog->sec_def->init_fn(prog, prog->sec_def->cookie); + if (err < 0) { + pr_warn("prog '%s': failed to initialize: %d\n", + prog->name, err); + return err; + } + } } return 0; @@ -7919,6 +7962,7 @@ void bpf_program__set_expected_attach_type(struct bpf_program *prog, .is_exp_attach_type_optional = eatype_optional, \ .is_attachable = attachable, \ .is_attach_btf = attach_btf, \ + .preload_fn = libbpf_preload_prog, \ } /* Programs that can NOT be attached. */ @@ -7945,15 +7989,16 @@ void bpf_program__set_expected_attach_type(struct bpf_program *prog, .sec = sec_pfx, \ .len = sizeof(sec_pfx) - 1, \ .prog_type = BPF_PROG_TYPE_##ptype, \ + .preload_fn = libbpf_preload_prog, \ __VA_ARGS__ \ } -static struct bpf_link *attach_kprobe(const struct bpf_program *prog); -static struct bpf_link *attach_tp(const struct bpf_program *prog); -static struct bpf_link *attach_raw_tp(const struct bpf_program *prog); -static struct bpf_link *attach_trace(const struct bpf_program *prog); -static struct bpf_link *attach_lsm(const struct bpf_program *prog); -static struct bpf_link *attach_iter(const struct bpf_program *prog); +static struct bpf_link *attach_kprobe(const struct bpf_program *prog, long cookie); +static struct bpf_link *attach_tp(const struct bpf_program *prog, long cookie); +static struct bpf_link *attach_raw_tp(const struct bpf_program *prog, long cookie); +static struct bpf_link *attach_trace(const struct bpf_program *prog, long cookie); +static struct bpf_link *attach_lsm(const struct bpf_program *prog, long cookie); +static struct bpf_link *attach_iter(const struct bpf_program *prog, long cookie); static const struct bpf_sec_def section_defs[] = { BPF_PROG_SEC("socket", BPF_PROG_TYPE_SOCKET_FILTER), @@ -9425,7 +9470,7 @@ struct bpf_link *bpf_program__attach_kprobe(const struct bpf_program *prog, return bpf_program__attach_kprobe_opts(prog, func_name, &opts); } -static struct bpf_link *attach_kprobe(const struct bpf_program *prog) +static struct bpf_link *attach_kprobe(const struct bpf_program *prog, long cookie) { DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts); unsigned long offset = 0; @@ -9708,7 +9753,7 @@ struct bpf_link *bpf_program__attach_tracepoint(const struct bpf_program *prog, return bpf_program__attach_tracepoint_opts(prog, tp_category, tp_name, NULL); } -static struct bpf_link *attach_tp(const struct bpf_program *prog) +static struct bpf_link *attach_tp(const struct bpf_program *prog, long cookie) { char *sec_name, *tp_cat, *tp_name; struct bpf_link *link; @@ -9762,7 +9807,7 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(const struct bpf_program *pr return link; } -static struct bpf_link *attach_raw_tp(const struct bpf_program *prog) +static struct bpf_link *attach_raw_tp(const struct bpf_program *prog, long cookie) { const char *tp_name = prog->sec_name + prog->sec_def->len; @@ -9809,12 +9854,12 @@ struct bpf_link *bpf_program__attach_lsm(const struct bpf_program *prog) return bpf_program__attach_btf_id(prog); } -static struct bpf_link *attach_trace(const struct bpf_program *prog) +static struct bpf_link *attach_trace(const struct bpf_program *prog, long cookie) { return bpf_program__attach_trace(prog); } -static struct bpf_link *attach_lsm(const struct bpf_program *prog) +static struct bpf_link *attach_lsm(const struct bpf_program *prog, long cookie) { return bpf_program__attach_lsm(prog); } @@ -9945,7 +9990,7 @@ bpf_program__attach_iter(const struct bpf_program *prog, return link; } -static struct bpf_link *attach_iter(const struct bpf_program *prog) +static struct bpf_link *attach_iter(const struct bpf_program *prog, long cookie) { return bpf_program__attach_iter(prog, NULL); } @@ -9955,7 +10000,7 @@ struct bpf_link *bpf_program__attach(const struct bpf_program *prog) if (!prog->sec_def || !prog->sec_def->attach_fn) return libbpf_err_ptr(-ESRCH); - return prog->sec_def->attach_fn(prog); + return prog->sec_def->attach_fn(prog, prog->sec_def->cookie); } static int bpf_link__detach_struct_ops(struct bpf_link *link) From 3c5c62097ef15e97961461f327d8c704a250e68d Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 28 Sep 2021 09:19:42 -0700 Subject: [PATCH 48/72] libbpf: Reduce reliance of attach_fns on sec_def internals Move closer to not relying on bpf_sec_def internals that won't be part of public API, when pluggable SEC() handlers will be allowed. Drop pre-calculated prefix length, and in various helpers don't rely on this prefix length availability. Also minimize reliance on knowing bpf_sec_def's prefix for few places where section prefix shortcuts are supported (e.g., tp vs tracepoint, raw_tp vs raw_tracepoint). Given checking some string for having a given string-constant prefix is such a common operation and so annoying to be done with pure C code, add a small macro helper, str_has_pfx(), and reuse it throughout libbpf.c where prefix comparison is performed. With __builtin_constant_p() it's possible to have a convenient helper that checks some string for having a given prefix, where prefix is either string literal (or compile-time known string due to compiler optimization) or just a runtime string pointer, which is quite convenient and saves a lot of typing and string literal duplication. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Dave Marchevsky Link: https://lore.kernel.org/bpf/20210928161946.2512801-7-andrii@kernel.org --- src/libbpf.c | 41 +++++++++++++++++++++++------------------ src/libbpf_internal.h | 7 +++++++ 2 files changed, 30 insertions(+), 18 deletions(-) diff --git a/src/libbpf.c b/src/libbpf.c index d4d5653..f87ffd4 100644 --- a/src/libbpf.c +++ b/src/libbpf.c @@ -226,7 +226,6 @@ typedef struct bpf_link *(*attach_fn_t)(const struct bpf_program *prog, long coo struct bpf_sec_def { const char *sec; - size_t len; enum bpf_prog_type prog_type; enum bpf_attach_type expected_attach_type; bool is_exp_attach_type_optional; @@ -1671,7 +1670,7 @@ static int bpf_object__process_kconfig_line(struct bpf_object *obj, void *ext_val; __u64 num; - if (strncmp(buf, "CONFIG_", 7)) + if (!str_has_pfx(buf, "CONFIG_")) return 0; sep = strchr(buf, '='); @@ -2920,7 +2919,7 @@ static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn) static bool is_sec_name_dwarf(const char *name) { /* approximation, but the actual list is too long */ - return strncmp(name, ".debug_", sizeof(".debug_") - 1) == 0; + return str_has_pfx(name, ".debug_"); } static bool ignore_elf_section(GElf_Shdr *hdr, const char *name) @@ -2942,7 +2941,7 @@ static bool ignore_elf_section(GElf_Shdr *hdr, const char *name) if (is_sec_name_dwarf(name)) return true; - if (strncmp(name, ".rel", sizeof(".rel") - 1) == 0) { + if (str_has_pfx(name, ".rel")) { name += sizeof(".rel") - 1; /* DWARF section relocations */ if (is_sec_name_dwarf(name)) @@ -6891,8 +6890,7 @@ static int bpf_object__resolve_externs(struct bpf_object *obj, if (err) return err; pr_debug("extern (kcfg) %s=0x%x\n", ext->name, kver); - } else if (ext->type == EXT_KCFG && - strncmp(ext->name, "CONFIG_", 7) == 0) { + } else if (ext->type == EXT_KCFG && str_has_pfx(ext->name, "CONFIG_")) { need_config = true; } else if (ext->type == EXT_KSYM) { if (ext->ksym.type_id) @@ -7956,7 +7954,6 @@ void bpf_program__set_expected_attach_type(struct bpf_program *prog, attachable, attach_btf) \ { \ .sec = string, \ - .len = sizeof(string) - 1, \ .prog_type = ptype, \ .expected_attach_type = eatype, \ .is_exp_attach_type_optional = eatype_optional, \ @@ -7987,7 +7984,6 @@ void bpf_program__set_expected_attach_type(struct bpf_program *prog, #define SEC_DEF(sec_pfx, ptype, ...) { \ .sec = sec_pfx, \ - .len = sizeof(sec_pfx) - 1, \ .prog_type = BPF_PROG_TYPE_##ptype, \ .preload_fn = libbpf_preload_prog, \ __VA_ARGS__ \ @@ -8162,10 +8158,8 @@ static const struct bpf_sec_def *find_sec_def(const char *sec_name) int i, n = ARRAY_SIZE(section_defs); for (i = 0; i < n; i++) { - if (strncmp(sec_name, - section_defs[i].sec, section_defs[i].len)) - continue; - return §ion_defs[i]; + if (str_has_pfx(sec_name, section_defs[i].sec)) + return §ion_defs[i]; } return NULL; } @@ -8519,7 +8513,7 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog, int *btf_obj_fd, prog->sec_name); return -ESRCH; } - attach_name = prog->sec_name + prog->sec_def->len; + attach_name = prog->sec_name + strlen(prog->sec_def->sec); /* BPF program's BTF ID */ if (attach_prog_fd) { @@ -9479,8 +9473,11 @@ static struct bpf_link *attach_kprobe(const struct bpf_program *prog, long cooki char *func; int n, err; - func_name = prog->sec_name + prog->sec_def->len; - opts.retprobe = strcmp(prog->sec_def->sec, "kretprobe/") == 0; + opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe/"); + if (opts.retprobe) + func_name = prog->sec_name + sizeof("kretprobe/") - 1; + else + func_name = prog->sec_name + sizeof("kprobe/") - 1; n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%li", &func, &offset); if (n < 1) { @@ -9762,8 +9759,11 @@ static struct bpf_link *attach_tp(const struct bpf_program *prog, long cookie) if (!sec_name) return libbpf_err_ptr(-ENOMEM); - /* extract "tp//" */ - tp_cat = sec_name + prog->sec_def->len; + /* extract "tp//" or "tracepoint//" */ + if (str_has_pfx(prog->sec_name, "tp/")) + tp_cat = sec_name + sizeof("tp/") - 1; + else + tp_cat = sec_name + sizeof("tracepoint/") - 1; tp_name = strchr(tp_cat, '/'); if (!tp_name) { free(sec_name); @@ -9809,7 +9809,12 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(const struct bpf_program *pr static struct bpf_link *attach_raw_tp(const struct bpf_program *prog, long cookie) { - const char *tp_name = prog->sec_name + prog->sec_def->len; + const char *tp_name; + + if (str_has_pfx(prog->sec_name, "raw_tp/")) + tp_name = prog->sec_name + sizeof("raw_tp/") - 1; + else + tp_name = prog->sec_name + sizeof("raw_tracepoint/") - 1; return bpf_program__attach_raw_tracepoint(prog, tp_name); } diff --git a/src/libbpf_internal.h b/src/libbpf_internal.h index ceb0c98..ec79400 100644 --- a/src/libbpf_internal.h +++ b/src/libbpf_internal.h @@ -89,6 +89,13 @@ (offsetof(TYPE, FIELD) + sizeof(((TYPE *)0)->FIELD)) #endif +/* Check whether a string `str` has prefix `pfx`, regardless if `pfx` is + * a string literal known at compilation time or char * pointer known only at + * runtime. + */ +#define str_has_pfx(str, pfx) \ + (strncmp(str, pfx, __builtin_constant_p(pfx) ? sizeof(pfx) - 1 : strlen(pfx)) == 0) + /* Symbol versioning is different between static and shared library. * Properly versioned symbols are needed for shared library, but * only the symbol of the new version is needed for static library. From 62ea715f717344562102f790fac556a926a94bc8 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 28 Sep 2021 09:19:43 -0700 Subject: [PATCH 49/72] libbpf: Refactor ELF section handler definitions Refactor ELF section handler definitions table to use a set of flags and unified SEC_DEF() macro. This allows for more succinct and table-like set of definitions, and allows to more easily extend the logic without adding more verbosity (this is utilized in later patches in the series). This approach is also making libbpf-internal program pre-load callback not rely on bpf_sec_def definition, which demonstrates that future pluggable ELF section handlers will be able to achieve similar level of integration without libbpf having to expose extra types and APIs. For starters, update SEC_DEF() definitions and make them more succinct. Also convert BPF_PROG_SEC() and BPF_APROG_COMPAT() definitions to a common SEC_DEF() use. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Dave Marchevsky Link: https://lore.kernel.org/bpf/20210928161946.2512801-8-andrii@kernel.org --- src/libbpf.c | 195 ++++++++++++++++++++++----------------------------- 1 file changed, 84 insertions(+), 111 deletions(-) diff --git a/src/libbpf.c b/src/libbpf.c index f87ffd4..954c135 100644 --- a/src/libbpf.c +++ b/src/libbpf.c @@ -224,19 +224,35 @@ typedef int (*init_fn_t)(struct bpf_program *prog, long cookie); typedef int (*preload_fn_t)(struct bpf_program *prog, struct bpf_prog_load_params *attr, long cookie); typedef struct bpf_link *(*attach_fn_t)(const struct bpf_program *prog, long cookie); +/* stored as sec_def->cookie for all libbpf-supported SEC()s */ +enum sec_def_flags { + SEC_NONE = 0, + /* expected_attach_type is optional, if kernel doesn't support that */ + SEC_EXP_ATTACH_OPT = 1, + /* legacy, only used by libbpf_get_type_names() and + * libbpf_attach_type_by_name(), not used by libbpf itself at all. + * This used to be associated with cgroup (and few other) BPF programs + * that were attachable through BPF_PROG_ATTACH command. Pretty + * meaningless nowadays, though. + */ + SEC_ATTACHABLE = 2, + SEC_ATTACHABLE_OPT = SEC_ATTACHABLE | SEC_EXP_ATTACH_OPT, + /* attachment target is specified through BTF ID in either kernel or + * other BPF program's BTF object */ + SEC_ATTACH_BTF = 4, + /* BPF program type allows sleeping/blocking in kernel */ + SEC_SLEEPABLE = 8, +}; + struct bpf_sec_def { const char *sec; enum bpf_prog_type prog_type; enum bpf_attach_type expected_attach_type; - bool is_exp_attach_type_optional; - bool is_attachable; - bool is_attach_btf; - bool is_sleepable; + long cookie; init_fn_t init_fn; preload_fn_t preload_fn; attach_fn_t attach_fn; - long cookie; }; /* @@ -6100,26 +6116,30 @@ static int bpf_object__sanitize_prog(struct bpf_object *obj, struct bpf_program return 0; } -static int libbpf_find_attach_btf_id(struct bpf_program *prog, int *btf_obj_fd, int *btf_type_id); +static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attach_name, + int *btf_obj_fd, int *btf_type_id); /* this is called as prog->sec_def->preload_fn for libbpf-supported sec_defs */ static int libbpf_preload_prog(struct bpf_program *prog, struct bpf_prog_load_params *attr, long cookie) { + enum sec_def_flags def = cookie; + /* old kernels might not support specifying expected_attach_type */ - if (prog->sec_def->is_exp_attach_type_optional && - !kernel_supports(prog->obj, FEAT_EXP_ATTACH_TYPE)) + if ((def & SEC_EXP_ATTACH_OPT) && !kernel_supports(prog->obj, FEAT_EXP_ATTACH_TYPE)) attr->expected_attach_type = 0; - if (prog->sec_def->is_sleepable) + if (def & SEC_SLEEPABLE) attr->prog_flags |= BPF_F_SLEEPABLE; if ((prog->type == BPF_PROG_TYPE_TRACING || prog->type == BPF_PROG_TYPE_LSM || prog->type == BPF_PROG_TYPE_EXT) && !prog->attach_btf_id) { int btf_obj_fd = 0, btf_type_id = 0, err; + const char *attach_name; - err = libbpf_find_attach_btf_id(prog, &btf_obj_fd, &btf_type_id); + attach_name = strchr(prog->sec_name, '/') + 1; + err = libbpf_find_attach_btf_id(prog, attach_name, &btf_obj_fd, &btf_type_id); if (err) return err; @@ -7956,15 +7976,14 @@ void bpf_program__set_expected_attach_type(struct bpf_program *prog, .sec = string, \ .prog_type = ptype, \ .expected_attach_type = eatype, \ - .is_exp_attach_type_optional = eatype_optional, \ - .is_attachable = attachable, \ - .is_attach_btf = attach_btf, \ + .cookie = (long) ( \ + (eatype_optional ? SEC_EXP_ATTACH_OPT : 0) | \ + (attachable ? SEC_ATTACHABLE : 0) | \ + (attach_btf ? SEC_ATTACH_BTF : 0) \ + ), \ .preload_fn = libbpf_preload_prog, \ } -/* Programs that can NOT be attached. */ -#define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_IMPL(string, ptype, 0, 0, 0, 0) - /* Programs that can be attached. */ #define BPF_APROG_SEC(string, ptype, atype) \ BPF_PROG_SEC_IMPL(string, ptype, atype, true, 1, 0) @@ -7977,14 +7996,11 @@ void bpf_program__set_expected_attach_type(struct bpf_program *prog, #define BPF_PROG_BTF(string, ptype, eatype) \ BPF_PROG_SEC_IMPL(string, ptype, eatype, false, 0, 1) -/* Programs that can be attached but attach type can't be identified by section - * name. Kept for backward compatibility. - */ -#define BPF_APROG_COMPAT(string, ptype) BPF_PROG_SEC(string, ptype) - -#define SEC_DEF(sec_pfx, ptype, ...) { \ +#define SEC_DEF(sec_pfx, ptype, atype, flags, ...) { \ .sec = sec_pfx, \ .prog_type = BPF_PROG_TYPE_##ptype, \ + .expected_attach_type = atype, \ + .cookie = (long)(flags), \ .preload_fn = libbpf_preload_prog, \ __VA_ARGS__ \ } @@ -7997,93 +8013,50 @@ static struct bpf_link *attach_lsm(const struct bpf_program *prog, long cookie); static struct bpf_link *attach_iter(const struct bpf_program *prog, long cookie); static const struct bpf_sec_def section_defs[] = { - BPF_PROG_SEC("socket", BPF_PROG_TYPE_SOCKET_FILTER), + SEC_DEF("socket", SOCKET_FILTER, 0, SEC_NONE), BPF_EAPROG_SEC("sk_reuseport/migrate", BPF_PROG_TYPE_SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE), BPF_EAPROG_SEC("sk_reuseport", BPF_PROG_TYPE_SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT), - SEC_DEF("kprobe/", KPROBE, - .attach_fn = attach_kprobe), - BPF_PROG_SEC("uprobe/", BPF_PROG_TYPE_KPROBE), - SEC_DEF("kretprobe/", KPROBE, - .attach_fn = attach_kprobe), - BPF_PROG_SEC("uretprobe/", BPF_PROG_TYPE_KPROBE), - BPF_PROG_SEC("classifier", BPF_PROG_TYPE_SCHED_CLS), - BPF_PROG_SEC("tc", BPF_PROG_TYPE_SCHED_CLS), - BPF_PROG_SEC("action", BPF_PROG_TYPE_SCHED_ACT), - SEC_DEF("tracepoint/", TRACEPOINT, - .attach_fn = attach_tp), - SEC_DEF("tp/", TRACEPOINT, - .attach_fn = attach_tp), - SEC_DEF("raw_tracepoint/", RAW_TRACEPOINT, - .attach_fn = attach_raw_tp), - SEC_DEF("raw_tp/", RAW_TRACEPOINT, - .attach_fn = attach_raw_tp), - SEC_DEF("tp_btf/", TRACING, - .expected_attach_type = BPF_TRACE_RAW_TP, - .is_attach_btf = true, - .attach_fn = attach_trace), - SEC_DEF("fentry/", TRACING, - .expected_attach_type = BPF_TRACE_FENTRY, - .is_attach_btf = true, - .attach_fn = attach_trace), - SEC_DEF("fmod_ret/", TRACING, - .expected_attach_type = BPF_MODIFY_RETURN, - .is_attach_btf = true, - .attach_fn = attach_trace), - SEC_DEF("fexit/", TRACING, - .expected_attach_type = BPF_TRACE_FEXIT, - .is_attach_btf = true, - .attach_fn = attach_trace), - SEC_DEF("fentry.s/", TRACING, - .expected_attach_type = BPF_TRACE_FENTRY, - .is_attach_btf = true, - .is_sleepable = true, - .attach_fn = attach_trace), - SEC_DEF("fmod_ret.s/", TRACING, - .expected_attach_type = BPF_MODIFY_RETURN, - .is_attach_btf = true, - .is_sleepable = true, - .attach_fn = attach_trace), - SEC_DEF("fexit.s/", TRACING, - .expected_attach_type = BPF_TRACE_FEXIT, - .is_attach_btf = true, - .is_sleepable = true, - .attach_fn = attach_trace), - SEC_DEF("freplace/", EXT, - .is_attach_btf = true, - .attach_fn = attach_trace), - SEC_DEF("lsm/", LSM, - .is_attach_btf = true, - .expected_attach_type = BPF_LSM_MAC, - .attach_fn = attach_lsm), - SEC_DEF("lsm.s/", LSM, - .is_attach_btf = true, - .is_sleepable = true, - .expected_attach_type = BPF_LSM_MAC, - .attach_fn = attach_lsm), - SEC_DEF("iter/", TRACING, - .expected_attach_type = BPF_TRACE_ITER, - .is_attach_btf = true, - .attach_fn = attach_iter), - SEC_DEF("syscall", SYSCALL, - .is_sleepable = true), + SEC_DEF("kprobe/", KPROBE, 0, SEC_NONE, attach_kprobe), + SEC_DEF("uprobe/", KPROBE, 0, SEC_NONE), + SEC_DEF("kretprobe/", KPROBE, 0, SEC_NONE, attach_kprobe), + SEC_DEF("uretprobe/", KPROBE, 0, SEC_NONE), + SEC_DEF("classifier", SCHED_CLS, 0, SEC_NONE), + SEC_DEF("tc", SCHED_CLS, 0, SEC_NONE), + SEC_DEF("action", SCHED_ACT, 0, SEC_NONE), + SEC_DEF("tracepoint/", TRACEPOINT, 0, SEC_NONE, attach_tp), + SEC_DEF("tp/", TRACEPOINT, 0, SEC_NONE, attach_tp), + SEC_DEF("raw_tracepoint/", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), + SEC_DEF("raw_tp/", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), + SEC_DEF("tp_btf/", TRACING, BPF_TRACE_RAW_TP, SEC_ATTACH_BTF, attach_trace), + SEC_DEF("fentry/", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF, attach_trace), + SEC_DEF("fmod_ret/", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF, attach_trace), + SEC_DEF("fexit/", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF, attach_trace), + SEC_DEF("fentry.s/", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), + SEC_DEF("fmod_ret.s/", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), + SEC_DEF("fexit.s/", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), + SEC_DEF("freplace/", EXT, 0, SEC_ATTACH_BTF, attach_trace), + SEC_DEF("lsm/", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm), + SEC_DEF("lsm.s/", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm), + SEC_DEF("iter/", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter), + SEC_DEF("syscall", SYSCALL, 0, SEC_SLEEPABLE), BPF_EAPROG_SEC("xdp_devmap/", BPF_PROG_TYPE_XDP, BPF_XDP_DEVMAP), BPF_EAPROG_SEC("xdp_cpumap/", BPF_PROG_TYPE_XDP, BPF_XDP_CPUMAP), BPF_APROG_SEC("xdp", BPF_PROG_TYPE_XDP, BPF_XDP), - BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT), - BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN), - BPF_PROG_SEC("lwt_out", BPF_PROG_TYPE_LWT_OUT), - BPF_PROG_SEC("lwt_xmit", BPF_PROG_TYPE_LWT_XMIT), - BPF_PROG_SEC("lwt_seg6local", BPF_PROG_TYPE_LWT_SEG6LOCAL), + SEC_DEF("perf_event", PERF_EVENT, 0, SEC_NONE), + SEC_DEF("lwt_in", LWT_IN, 0, SEC_NONE), + SEC_DEF("lwt_out", LWT_OUT, 0, SEC_NONE), + SEC_DEF("lwt_xmit", LWT_XMIT, 0, SEC_NONE), + SEC_DEF("lwt_seg6local", LWT_SEG6LOCAL, 0, SEC_NONE), BPF_APROG_SEC("cgroup_skb/ingress", BPF_PROG_TYPE_CGROUP_SKB, BPF_CGROUP_INET_INGRESS), BPF_APROG_SEC("cgroup_skb/egress", BPF_PROG_TYPE_CGROUP_SKB, BPF_CGROUP_INET_EGRESS), - BPF_APROG_COMPAT("cgroup/skb", BPF_PROG_TYPE_CGROUP_SKB), + SEC_DEF("cgroup/skb", CGROUP_SKB, 0, SEC_NONE), BPF_EAPROG_SEC("cgroup/sock_create", BPF_PROG_TYPE_CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE), BPF_EAPROG_SEC("cgroup/sock_release", BPF_PROG_TYPE_CGROUP_SOCK, @@ -8102,7 +8075,7 @@ static const struct bpf_sec_def section_defs[] = { BPF_SK_SKB_STREAM_PARSER), BPF_APROG_SEC("sk_skb/stream_verdict", BPF_PROG_TYPE_SK_SKB, BPF_SK_SKB_STREAM_VERDICT), - BPF_APROG_COMPAT("sk_skb", BPF_PROG_TYPE_SK_SKB), + SEC_DEF("sk_skb", SK_SKB, 0, SEC_NONE), BPF_APROG_SEC("sk_msg", BPF_PROG_TYPE_SK_MSG, BPF_SK_MSG_VERDICT), BPF_APROG_SEC("lirc_mode2", BPF_PROG_TYPE_LIRC_MODE2, @@ -8139,16 +8112,14 @@ static const struct bpf_sec_def section_defs[] = { BPF_CGROUP_GETSOCKOPT), BPF_EAPROG_SEC("cgroup/setsockopt", BPF_PROG_TYPE_CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT), - BPF_PROG_SEC("struct_ops", BPF_PROG_TYPE_STRUCT_OPS), + SEC_DEF("struct_ops", STRUCT_OPS, 0, SEC_NONE), BPF_EAPROG_SEC("sk_lookup/", BPF_PROG_TYPE_SK_LOOKUP, BPF_SK_LOOKUP), }; #undef BPF_PROG_SEC_IMPL -#undef BPF_PROG_SEC #undef BPF_APROG_SEC #undef BPF_EAPROG_SEC -#undef BPF_APROG_COMPAT #undef SEC_DEF #define MAX_TYPE_NAME_SIZE 32 @@ -8176,8 +8147,15 @@ static char *libbpf_get_type_names(bool attach_type) buf[0] = '\0'; /* Forge string buf with all available names */ for (i = 0; i < ARRAY_SIZE(section_defs); i++) { - if (attach_type && !section_defs[i].is_attachable) - continue; + const struct bpf_sec_def *sec_def = §ion_defs[i]; + + if (attach_type) { + if (sec_def->preload_fn != libbpf_preload_prog) + continue; + + if (!(sec_def->cookie & SEC_ATTACHABLE)) + continue; + } if (strlen(buf) + strlen(section_defs[i].sec) + 2 > len) { free(buf); @@ -8501,20 +8479,13 @@ static int find_kernel_btf_id(struct bpf_object *obj, const char *attach_name, return -ESRCH; } -static int libbpf_find_attach_btf_id(struct bpf_program *prog, int *btf_obj_fd, int *btf_type_id) +static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attach_name, + int *btf_obj_fd, int *btf_type_id) { enum bpf_attach_type attach_type = prog->expected_attach_type; __u32 attach_prog_fd = prog->attach_prog_fd; - const char *attach_name; int err = 0; - if (!prog->sec_def || !prog->sec_def->is_attach_btf) { - pr_warn("failed to identify BTF ID based on ELF section name '%s'\n", - prog->sec_name); - return -ESRCH; - } - attach_name = prog->sec_name + strlen(prog->sec_def->sec); - /* BPF program's BTF ID */ if (attach_prog_fd) { err = libbpf_find_prog_btf_id(attach_name, attach_prog_fd); @@ -8564,7 +8535,9 @@ int libbpf_attach_type_by_name(const char *name, return libbpf_err(-EINVAL); } - if (!sec_def->is_attachable) + if (sec_def->preload_fn != libbpf_preload_prog) + return libbpf_err(-EINVAL); + if (!(sec_def->cookie & SEC_ATTACHABLE)) return libbpf_err(-EINVAL); *attach_type = sec_def->expected_attach_type; From 20c1aa11b189f3323d154720994f73eec5529404 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 28 Sep 2021 09:19:44 -0700 Subject: [PATCH 50/72] libbpf: Complete SEC() table unification for BPF_APROG_SEC/BPF_EAPROG_SEC Complete SEC() table refactoring towards unified form by rewriting BPF_APROG_SEC and BPF_EAPROG_SEC definitions with SEC_DEF(SEC_ATTACHABLE_OPT) (for optional expected_attach_type) and SEC_DEF(SEC_ATTACHABLE) (mandatory expected_attach_type), respectively. Drop BPF_APROG_SEC, BPF_EAPROG_SEC, and BPF_PROG_SEC_IMPL macros after that, leaving SEC_DEF() macro as the only one used. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Dave Marchevsky Link: https://lore.kernel.org/bpf/20210928161946.2512801-9-andrii@kernel.org --- src/libbpf.c | 136 +++++++++++++-------------------------------------- 1 file changed, 35 insertions(+), 101 deletions(-) diff --git a/src/libbpf.c b/src/libbpf.c index 954c135..4ba67df 100644 --- a/src/libbpf.c +++ b/src/libbpf.c @@ -7970,32 +7970,6 @@ void bpf_program__set_expected_attach_type(struct bpf_program *prog, prog->expected_attach_type = type; } -#define BPF_PROG_SEC_IMPL(string, ptype, eatype, eatype_optional, \ - attachable, attach_btf) \ - { \ - .sec = string, \ - .prog_type = ptype, \ - .expected_attach_type = eatype, \ - .cookie = (long) ( \ - (eatype_optional ? SEC_EXP_ATTACH_OPT : 0) | \ - (attachable ? SEC_ATTACHABLE : 0) | \ - (attach_btf ? SEC_ATTACH_BTF : 0) \ - ), \ - .preload_fn = libbpf_preload_prog, \ - } - -/* Programs that can be attached. */ -#define BPF_APROG_SEC(string, ptype, atype) \ - BPF_PROG_SEC_IMPL(string, ptype, atype, true, 1, 0) - -/* Programs that must specify expected attach type at load time. */ -#define BPF_EAPROG_SEC(string, ptype, eatype) \ - BPF_PROG_SEC_IMPL(string, ptype, eatype, false, 1, 0) - -/* Programs that use BTF to identify attach point */ -#define BPF_PROG_BTF(string, ptype, eatype) \ - BPF_PROG_SEC_IMPL(string, ptype, eatype, false, 0, 1) - #define SEC_DEF(sec_pfx, ptype, atype, flags, ...) { \ .sec = sec_pfx, \ .prog_type = BPF_PROG_TYPE_##ptype, \ @@ -8014,10 +7988,8 @@ static struct bpf_link *attach_iter(const struct bpf_program *prog, long cookie) static const struct bpf_sec_def section_defs[] = { SEC_DEF("socket", SOCKET_FILTER, 0, SEC_NONE), - BPF_EAPROG_SEC("sk_reuseport/migrate", BPF_PROG_TYPE_SK_REUSEPORT, - BPF_SK_REUSEPORT_SELECT_OR_MIGRATE), - BPF_EAPROG_SEC("sk_reuseport", BPF_PROG_TYPE_SK_REUSEPORT, - BPF_SK_REUSEPORT_SELECT), + SEC_DEF("sk_reuseport/migrate", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, SEC_ATTACHABLE), + SEC_DEF("sk_reuseport", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT, SEC_ATTACHABLE), SEC_DEF("kprobe/", KPROBE, 0, SEC_NONE, attach_kprobe), SEC_DEF("uprobe/", KPROBE, 0, SEC_NONE), SEC_DEF("kretprobe/", KPROBE, 0, SEC_NONE, attach_kprobe), @@ -8041,87 +8013,49 @@ static const struct bpf_sec_def section_defs[] = { SEC_DEF("lsm.s/", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm), SEC_DEF("iter/", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter), SEC_DEF("syscall", SYSCALL, 0, SEC_SLEEPABLE), - BPF_EAPROG_SEC("xdp_devmap/", BPF_PROG_TYPE_XDP, - BPF_XDP_DEVMAP), - BPF_EAPROG_SEC("xdp_cpumap/", BPF_PROG_TYPE_XDP, - BPF_XDP_CPUMAP), - BPF_APROG_SEC("xdp", BPF_PROG_TYPE_XDP, - BPF_XDP), + SEC_DEF("xdp_devmap/", XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE), + SEC_DEF("xdp_cpumap/", XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE), + SEC_DEF("xdp", XDP, BPF_XDP, SEC_ATTACHABLE_OPT), SEC_DEF("perf_event", PERF_EVENT, 0, SEC_NONE), SEC_DEF("lwt_in", LWT_IN, 0, SEC_NONE), SEC_DEF("lwt_out", LWT_OUT, 0, SEC_NONE), SEC_DEF("lwt_xmit", LWT_XMIT, 0, SEC_NONE), SEC_DEF("lwt_seg6local", LWT_SEG6LOCAL, 0, SEC_NONE), - BPF_APROG_SEC("cgroup_skb/ingress", BPF_PROG_TYPE_CGROUP_SKB, - BPF_CGROUP_INET_INGRESS), - BPF_APROG_SEC("cgroup_skb/egress", BPF_PROG_TYPE_CGROUP_SKB, - BPF_CGROUP_INET_EGRESS), + SEC_DEF("cgroup_skb/ingress", CGROUP_SKB, BPF_CGROUP_INET_INGRESS, SEC_ATTACHABLE_OPT), + SEC_DEF("cgroup_skb/egress", CGROUP_SKB, BPF_CGROUP_INET_EGRESS, SEC_ATTACHABLE_OPT), SEC_DEF("cgroup/skb", CGROUP_SKB, 0, SEC_NONE), - BPF_EAPROG_SEC("cgroup/sock_create", BPF_PROG_TYPE_CGROUP_SOCK, - BPF_CGROUP_INET_SOCK_CREATE), - BPF_EAPROG_SEC("cgroup/sock_release", BPF_PROG_TYPE_CGROUP_SOCK, - BPF_CGROUP_INET_SOCK_RELEASE), - BPF_APROG_SEC("cgroup/sock", BPF_PROG_TYPE_CGROUP_SOCK, - BPF_CGROUP_INET_SOCK_CREATE), - BPF_EAPROG_SEC("cgroup/post_bind4", BPF_PROG_TYPE_CGROUP_SOCK, - BPF_CGROUP_INET4_POST_BIND), - BPF_EAPROG_SEC("cgroup/post_bind6", BPF_PROG_TYPE_CGROUP_SOCK, - BPF_CGROUP_INET6_POST_BIND), - BPF_APROG_SEC("cgroup/dev", BPF_PROG_TYPE_CGROUP_DEVICE, - BPF_CGROUP_DEVICE), - BPF_APROG_SEC("sockops", BPF_PROG_TYPE_SOCK_OPS, - BPF_CGROUP_SOCK_OPS), - BPF_APROG_SEC("sk_skb/stream_parser", BPF_PROG_TYPE_SK_SKB, - BPF_SK_SKB_STREAM_PARSER), - BPF_APROG_SEC("sk_skb/stream_verdict", BPF_PROG_TYPE_SK_SKB, - BPF_SK_SKB_STREAM_VERDICT), + SEC_DEF("cgroup/sock_create", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE), + SEC_DEF("cgroup/sock_release", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_RELEASE, SEC_ATTACHABLE), + SEC_DEF("cgroup/sock", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE_OPT), + SEC_DEF("cgroup/post_bind4", CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND, SEC_ATTACHABLE), + SEC_DEF("cgroup/post_bind6", CGROUP_SOCK, BPF_CGROUP_INET6_POST_BIND, SEC_ATTACHABLE), + SEC_DEF("cgroup/dev", CGROUP_DEVICE, BPF_CGROUP_DEVICE, SEC_ATTACHABLE_OPT), + SEC_DEF("sockops", SOCK_OPS, BPF_CGROUP_SOCK_OPS, SEC_ATTACHABLE_OPT), + SEC_DEF("sk_skb/stream_parser", SK_SKB, BPF_SK_SKB_STREAM_PARSER, SEC_ATTACHABLE_OPT), + SEC_DEF("sk_skb/stream_verdict",SK_SKB, BPF_SK_SKB_STREAM_VERDICT, SEC_ATTACHABLE_OPT), SEC_DEF("sk_skb", SK_SKB, 0, SEC_NONE), - BPF_APROG_SEC("sk_msg", BPF_PROG_TYPE_SK_MSG, - BPF_SK_MSG_VERDICT), - BPF_APROG_SEC("lirc_mode2", BPF_PROG_TYPE_LIRC_MODE2, - BPF_LIRC_MODE2), - BPF_APROG_SEC("flow_dissector", BPF_PROG_TYPE_FLOW_DISSECTOR, - BPF_FLOW_DISSECTOR), - BPF_EAPROG_SEC("cgroup/bind4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, - BPF_CGROUP_INET4_BIND), - BPF_EAPROG_SEC("cgroup/bind6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, - BPF_CGROUP_INET6_BIND), - BPF_EAPROG_SEC("cgroup/connect4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, - BPF_CGROUP_INET4_CONNECT), - BPF_EAPROG_SEC("cgroup/connect6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, - BPF_CGROUP_INET6_CONNECT), - BPF_EAPROG_SEC("cgroup/sendmsg4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, - BPF_CGROUP_UDP4_SENDMSG), - BPF_EAPROG_SEC("cgroup/sendmsg6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, - BPF_CGROUP_UDP6_SENDMSG), - BPF_EAPROG_SEC("cgroup/recvmsg4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, - BPF_CGROUP_UDP4_RECVMSG), - BPF_EAPROG_SEC("cgroup/recvmsg6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, - BPF_CGROUP_UDP6_RECVMSG), - BPF_EAPROG_SEC("cgroup/getpeername4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, - BPF_CGROUP_INET4_GETPEERNAME), - BPF_EAPROG_SEC("cgroup/getpeername6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, - BPF_CGROUP_INET6_GETPEERNAME), - BPF_EAPROG_SEC("cgroup/getsockname4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, - BPF_CGROUP_INET4_GETSOCKNAME), - BPF_EAPROG_SEC("cgroup/getsockname6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, - BPF_CGROUP_INET6_GETSOCKNAME), - BPF_EAPROG_SEC("cgroup/sysctl", BPF_PROG_TYPE_CGROUP_SYSCTL, - BPF_CGROUP_SYSCTL), - BPF_EAPROG_SEC("cgroup/getsockopt", BPF_PROG_TYPE_CGROUP_SOCKOPT, - BPF_CGROUP_GETSOCKOPT), - BPF_EAPROG_SEC("cgroup/setsockopt", BPF_PROG_TYPE_CGROUP_SOCKOPT, - BPF_CGROUP_SETSOCKOPT), + SEC_DEF("sk_msg", SK_MSG, BPF_SK_MSG_VERDICT, SEC_ATTACHABLE_OPT), + SEC_DEF("lirc_mode2", LIRC_MODE2, BPF_LIRC_MODE2, SEC_ATTACHABLE_OPT), + SEC_DEF("flow_dissector", FLOW_DISSECTOR, BPF_FLOW_DISSECTOR, SEC_ATTACHABLE_OPT), + SEC_DEF("cgroup/bind4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND, SEC_ATTACHABLE), + SEC_DEF("cgroup/bind6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_BIND, SEC_ATTACHABLE), + SEC_DEF("cgroup/connect4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_CONNECT, SEC_ATTACHABLE), + SEC_DEF("cgroup/connect6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_CONNECT, SEC_ATTACHABLE), + SEC_DEF("cgroup/sendmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG, SEC_ATTACHABLE), + SEC_DEF("cgroup/sendmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG, SEC_ATTACHABLE), + SEC_DEF("cgroup/recvmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_RECVMSG, SEC_ATTACHABLE), + SEC_DEF("cgroup/recvmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_RECVMSG, SEC_ATTACHABLE), + SEC_DEF("cgroup/getpeername4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETPEERNAME, SEC_ATTACHABLE), + SEC_DEF("cgroup/getpeername6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETPEERNAME, SEC_ATTACHABLE), + SEC_DEF("cgroup/getsockname4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETSOCKNAME, SEC_ATTACHABLE), + SEC_DEF("cgroup/getsockname6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETSOCKNAME, SEC_ATTACHABLE), + SEC_DEF("cgroup/sysctl", CGROUP_SYSCTL, BPF_CGROUP_SYSCTL, SEC_ATTACHABLE), + SEC_DEF("cgroup/getsockopt", CGROUP_SOCKOPT, BPF_CGROUP_GETSOCKOPT, SEC_ATTACHABLE), + SEC_DEF("cgroup/setsockopt", CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT, SEC_ATTACHABLE), SEC_DEF("struct_ops", STRUCT_OPS, 0, SEC_NONE), - BPF_EAPROG_SEC("sk_lookup/", BPF_PROG_TYPE_SK_LOOKUP, - BPF_SK_LOOKUP), + SEC_DEF("sk_lookup/", SK_LOOKUP, BPF_SK_LOOKUP, SEC_ATTACHABLE), }; -#undef BPF_PROG_SEC_IMPL -#undef BPF_APROG_SEC -#undef BPF_EAPROG_SEC -#undef SEC_DEF - #define MAX_TYPE_NAME_SIZE 32 static const struct bpf_sec_def *find_sec_def(const char *sec_name) From 823881b4f629442dcc6e14c1d90fdaa21f9a7365 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 28 Sep 2021 09:19:45 -0700 Subject: [PATCH 51/72] libbpf: Add opt-in strict BPF program section name handling logic Implement strict ELF section name handling for BPF programs. It utilizes `libbpf_set_strict_mode()` framework and adds new flag: LIBBPF_STRICT_SEC_NAME. If this flag is set, libbpf will enforce exact section name matching for a lot of program types that previously allowed just partial prefix match. E.g., if previously SEC("xdp_whatever_i_want") was allowed, now in strict mode only SEC("xdp") will be accepted, which makes SEC("") definitions cleaner and more structured. SEC() now won't be used as yet another way to uniquely encode BPF program identifier (for that C function name is better and is guaranteed to be unique within bpf_object). Now SEC() is strictly BPF program type and, depending on program type, extra load/attach parameter specification. Libbpf completely supports multiple BPF programs in the same ELF section, so multiple BPF programs of the same type/specification easily co-exist together within the same bpf_object scope. Additionally, a new (for now internal) convention is introduced: section name that can be a stand-alone exact BPF program type specificator, but also could have extra parameters after '/' delimiter. An example of such section is "struct_ops", which can be specified by itself, but also allows to specify the intended operation to be attached to, e.g., "struct_ops/dctcp_init". Note, that "struct_ops_some_op" is not allowed. Such section definition is specified as "struct_ops+". This change is part of libbpf 1.0 effort ([0], [1]). [0] Closes: https://github.com/libbpf/libbpf/issues/271 [1] https://github.com/libbpf/libbpf/wiki/Libbpf:-the-road-to-v1.0#stricter-and-more-uniform-bpf-program-section-name-sec-handling Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Dave Marchevsky Link: https://lore.kernel.org/bpf/20210928161946.2512801-10-andrii@kernel.org --- src/libbpf.c | 136 +++++++++++++++++++++++++++++--------------- src/libbpf_legacy.h | 9 +++ 2 files changed, 99 insertions(+), 46 deletions(-) diff --git a/src/libbpf.c b/src/libbpf.c index 4ba67df..3e1f621 100644 --- a/src/libbpf.c +++ b/src/libbpf.c @@ -242,6 +242,8 @@ enum sec_def_flags { SEC_ATTACH_BTF = 4, /* BPF program type allows sleeping/blocking in kernel */ SEC_SLEEPABLE = 8, + /* allow non-strict prefix matching */ + SEC_SLOPPY_PFX = 16, }; struct bpf_sec_def { @@ -7987,16 +7989,16 @@ static struct bpf_link *attach_lsm(const struct bpf_program *prog, long cookie); static struct bpf_link *attach_iter(const struct bpf_program *prog, long cookie); static const struct bpf_sec_def section_defs[] = { - SEC_DEF("socket", SOCKET_FILTER, 0, SEC_NONE), - SEC_DEF("sk_reuseport/migrate", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, SEC_ATTACHABLE), - SEC_DEF("sk_reuseport", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT, SEC_ATTACHABLE), + SEC_DEF("socket", SOCKET_FILTER, 0, SEC_NONE | SEC_SLOPPY_PFX), + SEC_DEF("sk_reuseport/migrate", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, SEC_ATTACHABLE | SEC_SLOPPY_PFX), + SEC_DEF("sk_reuseport", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT, SEC_ATTACHABLE | SEC_SLOPPY_PFX), SEC_DEF("kprobe/", KPROBE, 0, SEC_NONE, attach_kprobe), SEC_DEF("uprobe/", KPROBE, 0, SEC_NONE), SEC_DEF("kretprobe/", KPROBE, 0, SEC_NONE, attach_kprobe), SEC_DEF("uretprobe/", KPROBE, 0, SEC_NONE), - SEC_DEF("classifier", SCHED_CLS, 0, SEC_NONE), SEC_DEF("tc", SCHED_CLS, 0, SEC_NONE), - SEC_DEF("action", SCHED_ACT, 0, SEC_NONE), + SEC_DEF("classifier", SCHED_CLS, 0, SEC_NONE | SEC_SLOPPY_PFX), + SEC_DEF("action", SCHED_ACT, 0, SEC_NONE | SEC_SLOPPY_PFX), SEC_DEF("tracepoint/", TRACEPOINT, 0, SEC_NONE, attach_tp), SEC_DEF("tp/", TRACEPOINT, 0, SEC_NONE, attach_tp), SEC_DEF("raw_tracepoint/", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), @@ -8015,44 +8017,44 @@ static const struct bpf_sec_def section_defs[] = { SEC_DEF("syscall", SYSCALL, 0, SEC_SLEEPABLE), SEC_DEF("xdp_devmap/", XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE), SEC_DEF("xdp_cpumap/", XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE), - SEC_DEF("xdp", XDP, BPF_XDP, SEC_ATTACHABLE_OPT), - SEC_DEF("perf_event", PERF_EVENT, 0, SEC_NONE), - SEC_DEF("lwt_in", LWT_IN, 0, SEC_NONE), - SEC_DEF("lwt_out", LWT_OUT, 0, SEC_NONE), - SEC_DEF("lwt_xmit", LWT_XMIT, 0, SEC_NONE), - SEC_DEF("lwt_seg6local", LWT_SEG6LOCAL, 0, SEC_NONE), - SEC_DEF("cgroup_skb/ingress", CGROUP_SKB, BPF_CGROUP_INET_INGRESS, SEC_ATTACHABLE_OPT), - SEC_DEF("cgroup_skb/egress", CGROUP_SKB, BPF_CGROUP_INET_EGRESS, SEC_ATTACHABLE_OPT), - SEC_DEF("cgroup/skb", CGROUP_SKB, 0, SEC_NONE), - SEC_DEF("cgroup/sock_create", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE), - SEC_DEF("cgroup/sock_release", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_RELEASE, SEC_ATTACHABLE), - SEC_DEF("cgroup/sock", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE_OPT), - SEC_DEF("cgroup/post_bind4", CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND, SEC_ATTACHABLE), - SEC_DEF("cgroup/post_bind6", CGROUP_SOCK, BPF_CGROUP_INET6_POST_BIND, SEC_ATTACHABLE), - SEC_DEF("cgroup/dev", CGROUP_DEVICE, BPF_CGROUP_DEVICE, SEC_ATTACHABLE_OPT), - SEC_DEF("sockops", SOCK_OPS, BPF_CGROUP_SOCK_OPS, SEC_ATTACHABLE_OPT), - SEC_DEF("sk_skb/stream_parser", SK_SKB, BPF_SK_SKB_STREAM_PARSER, SEC_ATTACHABLE_OPT), - SEC_DEF("sk_skb/stream_verdict",SK_SKB, BPF_SK_SKB_STREAM_VERDICT, SEC_ATTACHABLE_OPT), - SEC_DEF("sk_skb", SK_SKB, 0, SEC_NONE), - SEC_DEF("sk_msg", SK_MSG, BPF_SK_MSG_VERDICT, SEC_ATTACHABLE_OPT), - SEC_DEF("lirc_mode2", LIRC_MODE2, BPF_LIRC_MODE2, SEC_ATTACHABLE_OPT), - SEC_DEF("flow_dissector", FLOW_DISSECTOR, BPF_FLOW_DISSECTOR, SEC_ATTACHABLE_OPT), - SEC_DEF("cgroup/bind4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND, SEC_ATTACHABLE), - SEC_DEF("cgroup/bind6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_BIND, SEC_ATTACHABLE), - SEC_DEF("cgroup/connect4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_CONNECT, SEC_ATTACHABLE), - SEC_DEF("cgroup/connect6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_CONNECT, SEC_ATTACHABLE), - SEC_DEF("cgroup/sendmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG, SEC_ATTACHABLE), - SEC_DEF("cgroup/sendmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG, SEC_ATTACHABLE), - SEC_DEF("cgroup/recvmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_RECVMSG, SEC_ATTACHABLE), - SEC_DEF("cgroup/recvmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_RECVMSG, SEC_ATTACHABLE), - SEC_DEF("cgroup/getpeername4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETPEERNAME, SEC_ATTACHABLE), - SEC_DEF("cgroup/getpeername6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETPEERNAME, SEC_ATTACHABLE), - SEC_DEF("cgroup/getsockname4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETSOCKNAME, SEC_ATTACHABLE), - SEC_DEF("cgroup/getsockname6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETSOCKNAME, SEC_ATTACHABLE), - SEC_DEF("cgroup/sysctl", CGROUP_SYSCTL, BPF_CGROUP_SYSCTL, SEC_ATTACHABLE), - SEC_DEF("cgroup/getsockopt", CGROUP_SOCKOPT, BPF_CGROUP_GETSOCKOPT, SEC_ATTACHABLE), - SEC_DEF("cgroup/setsockopt", CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT, SEC_ATTACHABLE), - SEC_DEF("struct_ops", STRUCT_OPS, 0, SEC_NONE), + SEC_DEF("xdp", XDP, BPF_XDP, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), + SEC_DEF("perf_event", PERF_EVENT, 0, SEC_NONE | SEC_SLOPPY_PFX), + SEC_DEF("lwt_in", LWT_IN, 0, SEC_NONE | SEC_SLOPPY_PFX), + SEC_DEF("lwt_out", LWT_OUT, 0, SEC_NONE | SEC_SLOPPY_PFX), + SEC_DEF("lwt_xmit", LWT_XMIT, 0, SEC_NONE | SEC_SLOPPY_PFX), + SEC_DEF("lwt_seg6local", LWT_SEG6LOCAL, 0, SEC_NONE | SEC_SLOPPY_PFX), + SEC_DEF("cgroup_skb/ingress", CGROUP_SKB, BPF_CGROUP_INET_INGRESS, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), + SEC_DEF("cgroup_skb/egress", CGROUP_SKB, BPF_CGROUP_INET_EGRESS, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), + SEC_DEF("cgroup/skb", CGROUP_SKB, 0, SEC_NONE | SEC_SLOPPY_PFX), + SEC_DEF("cgroup/sock_create", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE | SEC_SLOPPY_PFX), + SEC_DEF("cgroup/sock_release", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_RELEASE, SEC_ATTACHABLE | SEC_SLOPPY_PFX), + SEC_DEF("cgroup/sock", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), + SEC_DEF("cgroup/post_bind4", CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND, SEC_ATTACHABLE | SEC_SLOPPY_PFX), + SEC_DEF("cgroup/post_bind6", CGROUP_SOCK, BPF_CGROUP_INET6_POST_BIND, SEC_ATTACHABLE | SEC_SLOPPY_PFX), + SEC_DEF("cgroup/dev", CGROUP_DEVICE, BPF_CGROUP_DEVICE, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), + SEC_DEF("sockops", SOCK_OPS, BPF_CGROUP_SOCK_OPS, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), + SEC_DEF("sk_skb/stream_parser", SK_SKB, BPF_SK_SKB_STREAM_PARSER, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), + SEC_DEF("sk_skb/stream_verdict",SK_SKB, BPF_SK_SKB_STREAM_VERDICT, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), + SEC_DEF("sk_skb", SK_SKB, 0, SEC_NONE | SEC_SLOPPY_PFX), + SEC_DEF("sk_msg", SK_MSG, BPF_SK_MSG_VERDICT, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), + SEC_DEF("lirc_mode2", LIRC_MODE2, BPF_LIRC_MODE2, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), + SEC_DEF("flow_dissector", FLOW_DISSECTOR, BPF_FLOW_DISSECTOR, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), + SEC_DEF("cgroup/bind4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND, SEC_ATTACHABLE | SEC_SLOPPY_PFX), + SEC_DEF("cgroup/bind6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_BIND, SEC_ATTACHABLE | SEC_SLOPPY_PFX), + SEC_DEF("cgroup/connect4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_CONNECT, SEC_ATTACHABLE | SEC_SLOPPY_PFX), + SEC_DEF("cgroup/connect6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_CONNECT, SEC_ATTACHABLE | SEC_SLOPPY_PFX), + SEC_DEF("cgroup/sendmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG, SEC_ATTACHABLE | SEC_SLOPPY_PFX), + SEC_DEF("cgroup/sendmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG, SEC_ATTACHABLE | SEC_SLOPPY_PFX), + SEC_DEF("cgroup/recvmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_RECVMSG, SEC_ATTACHABLE | SEC_SLOPPY_PFX), + SEC_DEF("cgroup/recvmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_RECVMSG, SEC_ATTACHABLE | SEC_SLOPPY_PFX), + SEC_DEF("cgroup/getpeername4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETPEERNAME, SEC_ATTACHABLE | SEC_SLOPPY_PFX), + SEC_DEF("cgroup/getpeername6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETPEERNAME, SEC_ATTACHABLE | SEC_SLOPPY_PFX), + SEC_DEF("cgroup/getsockname4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETSOCKNAME, SEC_ATTACHABLE | SEC_SLOPPY_PFX), + SEC_DEF("cgroup/getsockname6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETSOCKNAME, SEC_ATTACHABLE | SEC_SLOPPY_PFX), + SEC_DEF("cgroup/sysctl", CGROUP_SYSCTL, BPF_CGROUP_SYSCTL, SEC_ATTACHABLE | SEC_SLOPPY_PFX), + SEC_DEF("cgroup/getsockopt", CGROUP_SOCKOPT, BPF_CGROUP_GETSOCKOPT, SEC_ATTACHABLE | SEC_SLOPPY_PFX), + SEC_DEF("cgroup/setsockopt", CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT, SEC_ATTACHABLE | SEC_SLOPPY_PFX), + SEC_DEF("struct_ops+", STRUCT_OPS, 0, SEC_NONE), SEC_DEF("sk_lookup/", SK_LOOKUP, BPF_SK_LOOKUP, SEC_ATTACHABLE), }; @@ -8060,11 +8062,53 @@ static const struct bpf_sec_def section_defs[] = { static const struct bpf_sec_def *find_sec_def(const char *sec_name) { - int i, n = ARRAY_SIZE(section_defs); + const struct bpf_sec_def *sec_def; + enum sec_def_flags sec_flags; + int i, n = ARRAY_SIZE(section_defs), len; + bool strict = libbpf_mode & LIBBPF_STRICT_SEC_NAME; for (i = 0; i < n; i++) { - if (str_has_pfx(sec_name, section_defs[i].sec)) - return §ion_defs[i]; + sec_def = §ion_defs[i]; + sec_flags = sec_def->cookie; + len = strlen(sec_def->sec); + + /* "type/" always has to have proper SEC("type/extras") form */ + if (sec_def->sec[len - 1] == '/') { + if (str_has_pfx(sec_name, sec_def->sec)) + return sec_def; + continue; + } + + /* "type+" means it can be either exact SEC("type") or + * well-formed SEC("type/extras") with proper '/' separator + */ + if (sec_def->sec[len - 1] == '+') { + len--; + /* not even a prefix */ + if (strncmp(sec_name, sec_def->sec, len) != 0) + continue; + /* exact match or has '/' separator */ + if (sec_name[len] == '\0' || sec_name[len] == '/') + return sec_def; + continue; + } + + /* SEC_SLOPPY_PFX definitions are allowed to be just prefix + * matches, unless strict section name mode + * (LIBBPF_STRICT_SEC_NAME) is enabled, in which case the + * match has to be exact. + */ + if ((sec_flags & SEC_SLOPPY_PFX) && !strict) { + if (str_has_pfx(sec_name, sec_def->sec)) + return sec_def; + continue; + } + + /* Definitions not marked SEC_SLOPPY_PFX (e.g., + * SEC("syscall")) are exact matches in both modes. + */ + if (strcmp(sec_name, sec_def->sec) == 0) + return sec_def; } return NULL; } diff --git a/src/libbpf_legacy.h b/src/libbpf_legacy.h index df0d03d..74e6f86 100644 --- a/src/libbpf_legacy.h +++ b/src/libbpf_legacy.h @@ -46,6 +46,15 @@ enum libbpf_strict_mode { */ LIBBPF_STRICT_DIRECT_ERRS = 0x02, + /* + * Enforce strict BPF program section (SEC()) names. + * E.g., while prefiously SEC("xdp_whatever") or SEC("perf_event_blah") were + * allowed, with LIBBPF_STRICT_SEC_PREFIX this will become + * unrecognized by libbpf and would have to be just SEC("xdp") and + * SEC("xdp") and SEC("perf_event"). + */ + LIBBPF_STRICT_SEC_NAME = 0x04, + __LIBBPF_STRICT_LAST, }; From 3fe8ee2edb5983ddf0990d0199ccd0ff555cba53 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 28 Sep 2021 09:19:46 -0700 Subject: [PATCH 52/72] selftests/bpf: Switch sk_lookup selftests to strict SEC("sk_lookup") use Update "sk_lookup/" definition to be a stand-alone type specifier, with backwards-compatible prefix match logic in non-libbpf-1.0 mode. Currently in selftests all the "sk_lookup/" uses just use for duplicated unique name encoding, which is redundant as BPF program's name (C function name) uniquely and descriptively identifies the intended use for such BPF programs. With libbpf's SEC_DEF("sk_lookup") definition updated, switch existing sk_lookup programs to use "unqualified" SEC("sk_lookup") section names, with no random text after it. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Dave Marchevsky Link: https://lore.kernel.org/bpf/20210928161946.2512801-11-andrii@kernel.org --- src/libbpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libbpf.c b/src/libbpf.c index 3e1f621..1c859b3 100644 --- a/src/libbpf.c +++ b/src/libbpf.c @@ -8055,7 +8055,7 @@ static const struct bpf_sec_def section_defs[] = { SEC_DEF("cgroup/getsockopt", CGROUP_SOCKOPT, BPF_CGROUP_GETSOCKOPT, SEC_ATTACHABLE | SEC_SLOPPY_PFX), SEC_DEF("cgroup/setsockopt", CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT, SEC_ATTACHABLE | SEC_SLOPPY_PFX), SEC_DEF("struct_ops+", STRUCT_OPS, 0, SEC_NONE), - SEC_DEF("sk_lookup/", SK_LOOKUP, BPF_SK_LOOKUP, SEC_ATTACHABLE), + SEC_DEF("sk_lookup", SK_LOOKUP, BPF_SK_LOOKUP, SEC_ATTACHABLE | SEC_SLOPPY_PFX), }; #define MAX_TYPE_NAME_SIZE 32 From 2cfeea135cd73f02a89754b3bc1aeded9a221d75 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Fri, 24 Sep 2021 08:07:25 +0530 Subject: [PATCH 53/72] libbpf: Fix segfault in static linker for objects without BTF When a BPF object is compiled without BTF info (without -g), trying to link such objects using bpftool causes a SIGSEGV due to btf__get_nr_types accessing obj->btf which is NULL. Fix this by checking for the NULL pointer, and return error. Reproducer: $ cat a.bpf.c extern int foo(void); int bar(void) { return foo(); } $ cat b.bpf.c int foo(void) { return 0; } $ clang -O2 -target bpf -c a.bpf.c $ clang -O2 -target bpf -c b.bpf.c $ bpftool gen obj out a.bpf.o b.bpf.o Segmentation fault (core dumped) After fix: $ bpftool gen obj out a.bpf.o b.bpf.o libbpf: failed to find BTF info for object 'a.bpf.o' Error: failed to link 'a.bpf.o': Unknown error -22 (-22) Fixes: a46349227cd8 (libbpf: Add linker extern resolution support for functions and global variables) Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210924023725.70228-1-memxor@gmail.com --- src/linker.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/linker.c b/src/linker.c index 10911a8..2df880c 100644 --- a/src/linker.c +++ b/src/linker.c @@ -1649,11 +1649,17 @@ static bool btf_is_non_static(const struct btf_type *t) static int find_glob_sym_btf(struct src_obj *obj, Elf64_Sym *sym, const char *sym_name, int *out_btf_sec_id, int *out_btf_id) { - int i, j, n = btf__get_nr_types(obj->btf), m, btf_id = 0; + int i, j, n, m, btf_id = 0; const struct btf_type *t; const struct btf_var_secinfo *vi; const char *name; + if (!obj->btf) { + pr_warn("failed to find BTF info for object '%s'\n", obj->filename); + return -EINVAL; + } + + n = btf__get_nr_types(obj->btf); for (i = 1; i <= n; i++) { t = btf__type_by_id(obj->btf, i); From 151e3cb31464d072d59e7f98cbbcf93f2fa6bf9c Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 28 Sep 2021 23:29:22 -0700 Subject: [PATCH 54/72] sync: auto-generate latest BPF helpers Latest changes to BPF helper definitions. --- src/bpf_helper_defs.h | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/src/bpf_helper_defs.h b/src/bpf_helper_defs.h index 2ee6ab5..dcb23fb 100644 --- a/src/bpf_helper_defs.h +++ b/src/bpf_helper_defs.h @@ -190,7 +190,7 @@ static __u32 (*bpf_get_prandom_u32)(void) = (void *) 7; * bpf_get_smp_processor_id * * Get the SMP (symmetric multiprocessing) processor id. Note that - * all programs run with preemption disabled, which means that the + * all programs run with migration disabled, which means that the * SMP processor id is stable during all the execution of the * program. * @@ -3012,7 +3012,7 @@ static __u64 (*bpf_ktime_get_boot_ns)(void) = (void *) 125; * arguments. The *data* are a **u64** array and corresponding format string * values are stored in the array. For strings and pointers where pointees * are accessed, only the pointer values are stored in the *data* array. - * The *data_len* is the size of *data* in bytes. + * The *data_len* is the size of *data* in bytes - must be a multiple of 8. * * Formats **%s**, **%p{i,I}{4,6}** requires to read kernel memory. * Reading kernel memory may fail due to either invalid address or @@ -3873,7 +3873,8 @@ static long (*bpf_for_each_map_elem)(void *map, void *callback_fn, void *callbac * Each format specifier in **fmt** corresponds to one u64 element * in the **data** array. For strings and pointers where pointees * are accessed, only the pointer values are stored in the *data* - * array. The *data_len* is the size of *data* in bytes. + * array. The *data_len* is the size of *data* in bytes - must be + * a multiple of 8. * * Formats **%s** and **%p{i,I}{4,6}** require to read kernel * memory. Reading kernel memory may fail due to either invalid @@ -4068,4 +4069,18 @@ static long (*bpf_task_pt_regs)(struct task_struct *task) = (void *) 175; */ static long (*bpf_get_branch_snapshot)(void *entries, __u32 size, __u64 flags) = (void *) 176; +/* + * bpf_trace_vprintk + * + * Behaves like **bpf_trace_printk**\ () helper, but takes an array of u64 + * to format and can handle more format args as a result. + * + * Arguments are to be used as in **bpf_seq_printf**\ () helper. + * + * Returns + * The number of bytes written to the buffer, or a negative error + * in case of failure. + */ +static long (*bpf_trace_vprintk)(const char *fmt, __u32 fmt_size, const void *data, __u32 data_len) = (void *) 177; + From e671a47bc279c6a73d1fee5aa522b0979fd1a33a Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 28 Sep 2021 23:29:22 -0700 Subject: [PATCH 55/72] sync: latest libbpf changes from kernel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Syncing latest libbpf commits from kernel repository. Baseline bpf-next commit: 69cd823956ba8ce266a901170b1060db8073bddd Checkpoint bpf-next commit: 38261f369fb905552ebdd3feb9699c0788fd3371 Baseline bpf commit: bc23f724481759d0fac61dfb5ce979af2190bbe0 Checkpoint bpf commit: 571fa247ab411f3233eeaaf837c6e646a513b9f8 Andrii Nakryiko (15): libbpf: Use pre-setup sec_def in libbpf_find_attach_btf_id() libbpf: Deprecated bpf_object_open_opts.relaxed_core_relocs libbpf: Allow skipping attach_func_name in bpf_program__set_attach_target() libbpf: Schedule open_opts.attach_prog_fd deprecation since v0.7 libbpf: Constify all high-level program attach APIs libbpf: Fix memory leak in legacy kprobe attach logic libbpf: Refactor and simplify legacy kprobe code libbpf: Add legacy uprobe attaching support libbpf: Add "tc" SEC_DEF which is a better name for "classifier" libbpf: Refactor internal sec_def handling to enable pluggability libbpf: Reduce reliance of attach_fns on sec_def internals libbpf: Refactor ELF section handler definitions libbpf: Complete SEC() table unification for BPF_APROG_SEC/BPF_EAPROG_SEC libbpf: Add opt-in strict BPF program section name handling logic selftests/bpf: Switch sk_lookup selftests to strict SEC("sk_lookup") use Dave Marchevsky (4): bpf: Add bpf_trace_vprintk helper libbpf: Modify bpf_printk to choose helper based on arg count libbpf: Use static const fmt string in __bpf_printk bpf: Clarify data_len param in bpf_snprintf and bpf_seq_printf comments Grant Seltzer (1): libbpf: Add doc comments in libbpf.h Kumar Kartikeya Dwivedi (1): libbpf: Fix segfault in static linker for objects without BTF Matteo Croce (1): bpf: Update bpf_get_smp_processor_id() documentation Toke Høiland-Jørgensen (1): libbpf: Ignore STT_SECTION symbols in 'maps' section include/uapi/linux/bpf.h | 18 +- src/bpf_helpers.h | 51 ++- src/libbpf.c | 882 +++++++++++++++++++++++---------------- src/libbpf.h | 106 +++-- src/libbpf_common.h | 5 + src/libbpf_internal.h | 7 + src/libbpf_legacy.h | 9 + src/linker.c | 8 +- 8 files changed, 679 insertions(+), 407 deletions(-) -- 2.30.2 --- BPF-CHECKPOINT-COMMIT | 2 +- CHECKPOINT-COMMIT | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/BPF-CHECKPOINT-COMMIT b/BPF-CHECKPOINT-COMMIT index 84dc5c2..88e7f95 100644 --- a/BPF-CHECKPOINT-COMMIT +++ b/BPF-CHECKPOINT-COMMIT @@ -1 +1 @@ -bc23f724481759d0fac61dfb5ce979af2190bbe0 +571fa247ab411f3233eeaaf837c6e646a513b9f8 diff --git a/CHECKPOINT-COMMIT b/CHECKPOINT-COMMIT index 1ac683c..3e8a513 100644 --- a/CHECKPOINT-COMMIT +++ b/CHECKPOINT-COMMIT @@ -1 +1 @@ -69cd823956ba8ce266a901170b1060db8073bddd +38261f369fb905552ebdd3feb9699c0788fd3371 From b0feb9b4d5b77fccafe476091dbfe6e5a1e4646e Mon Sep 17 00:00:00 2001 From: Sergei Iudin Date: Fri, 1 Oct 2021 12:58:36 -0700 Subject: [PATCH 56/72] Remove caching of pahole test result Initial idea was to run it hourly, but when it runs hourly it produce a lot of useless noise and we had to switch it do daily. When we run it daily caching make much less sense and only make debugging more complex. --- .github/workflows/pahole.yml | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/.github/workflows/pahole.yml b/.github/workflows/pahole.yml index 379da76..a4d09b8 100644 --- a/.github/workflows/pahole.yml +++ b/.github/workflows/pahole.yml @@ -14,27 +14,7 @@ jobs: steps: - uses: actions/checkout@v2 - uses: ./.github/actions/setup - - name: Get current pahole sha - id: current_sha - run: - git ls-remote https://git.kernel.org/pub/scm/devel/pahole/pahole.git $STAGING | awk '{print "::set-output name=sha::" $1}' - - name: Get latest result for this sha - id: latest - uses: pat-s/always-upload-cache@v2 - with: - path: last_tested_pahole - key: ${{ steps.current_sha.outputs.sha }} - - name: Return cached test result - run: exit `cat last_tested_pahole || echo 1` # if file is empty that mean previous run timed out of canceled, returning failure - if: steps.latest.outputs.cache-hit == 'true' - uses: ./.github/actions/vmtest with: kernel: LATEST pahole: $STAGING - if: steps.latest.outputs.cache-hit != 'true' - - name: Save success - run: echo 0 > last_tested_pahole - if: steps.latest.outputs.cache-hit != 'true' - - name: Save failure - run: echo 1 > last_tested_pahole - if: ${{ failure() && steps.latest.outputs.cache-hit != 'true' }} From f49c65d2161b58b8d015f6a1515f0e3616fa2b8b Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 27 Sep 2021 20:29:37 +0530 Subject: [PATCH 57/72] libbpf: Make gen_loader data aligned. Align gen_loader data to 8 byte boundary to make sure union bpf_attr, bpf_insns and other structs are aligned. Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210927145941.1383001-9-memxor@gmail.com --- src/gen_loader.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/gen_loader.c b/src/gen_loader.c index 8df718a..80087b1 100644 --- a/src/gen_loader.c +++ b/src/gen_loader.c @@ -5,6 +5,7 @@ #include #include #include +#include #include "btf.h" #include "bpf.h" #include "libbpf.h" @@ -135,13 +136,17 @@ void bpf_gen__init(struct bpf_gen *gen, int log_level) static int add_data(struct bpf_gen *gen, const void *data, __u32 size) { + __u32 size8 = roundup(size, 8); + __u64 zero = 0; void *prev; - if (realloc_data_buf(gen, size)) + if (realloc_data_buf(gen, size8)) return 0; prev = gen->data_cur; memcpy(gen->data_cur, data, size); gen->data_cur += size; + memcpy(gen->data_cur, &zero, size8 - size); + gen->data_cur += size8 - size; return prev - gen->data_start; } From 531be4d879c81a215c8a24069ad20b20870db39c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Wed, 29 Sep 2021 23:38:37 +0200 Subject: [PATCH 58/72] libbpf: Properly ignore STT_SECTION symbols in legacy map definitions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous patch to ignore STT_SECTION symbols only added the ignore condition in one of them. This fails if there's more than one map definition in the 'maps' section, because the subsequent modulus check will fail, resulting in error messages like: libbpf: elf: unable to determine legacy map definition size in ./xdpdump_xdp.o Fix this by also ignoring STT_SECTION in the first loop. Fixes: c3e8c44a9063 ("libbpf: Ignore STT_SECTION symbols in 'maps' section") Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210929213837.832449-1-toke@redhat.com --- src/libbpf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/libbpf.c b/src/libbpf.c index 1c859b3..7544d7d 100644 --- a/src/libbpf.c +++ b/src/libbpf.c @@ -1868,6 +1868,8 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict) continue; if (sym.st_shndx != obj->efile.maps_shndx) continue; + if (GELF_ST_TYPE(sym.st_info) == STT_SECTION) + continue; nr_maps++; } /* Assume equally sized map definitions */ From 8c2d905ff4e72112d0b472487c158f422292c602 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Mon, 27 Sep 2021 20:29:39 +0530 Subject: [PATCH 59/72] libbpf: Fix skel_internal.h to set errno on loader retval < 0 When the loader indicates an internal error (result of a checked bpf system call), it returns the result in attr.test.retval. However, tests that rely on ASSERT_OK_PTR on NULL (returned from light skeleton) may miss that NULL denotes an error if errno is set to 0. This would result in skel pointer being NULL, while ASSERT_OK_PTR returning 1, leading to a SEGV on dereference of skel, because libbpf_get_error relies on the assumption that errno is always set in case of error for ptr == NULL. In particular, this was observed for the ksyms_module test. When executed using `./test_progs -t ksyms`, prior tests manipulated errno and the test didn't crash when it failed at ksyms_module load, while using `./test_progs -t ksyms_module` crashed due to errno being untouched. Fixes: 67234743736a (libbpf: Generate loader program out of BPF ELF file.) Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210927145941.1383001-11-memxor@gmail.com --- src/skel_internal.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/skel_internal.h b/src/skel_internal.h index b22b50c..9cf6670 100644 --- a/src/skel_internal.h +++ b/src/skel_internal.h @@ -105,10 +105,12 @@ static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts) err = skel_sys_bpf(BPF_PROG_RUN, &attr, sizeof(attr)); if (err < 0 || (int)attr.test.retval < 0) { opts->errstr = "failed to execute loader prog"; - if (err < 0) + if (err < 0) { err = -errno; - else + } else { err = (int)attr.test.retval; + errno = -err; + } goto out; } err = 0; From 962f24137978bf3321bed97785bae59b5a3f3b06 Mon Sep 17 00:00:00 2001 From: Hengqi Chen Date: Fri, 1 Oct 2021 00:14:55 +0800 Subject: [PATCH 60/72] libbpf: Support uniform BTF-defined key/value specification across all BPF maps A bunch of BPF maps do not support specifying BTF types for key and value. This is non-uniform and inconvenient[0]. Currently, libbpf uses a retry logic which removes BTF type IDs when BPF map creation failed. Instead of retrying, this commit recognizes those specialized maps and removes BTF type IDs when creating BPF map. [0] Closes: https://github.com/libbpf/libbpf/issues/355 Signed-off-by: Hengqi Chen Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210930161456.3444544-2-hengqi.chen@gmail.com --- src/libbpf.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/src/libbpf.c b/src/libbpf.c index 7544d7d..e23f1b6 100644 --- a/src/libbpf.c +++ b/src/libbpf.c @@ -4669,6 +4669,30 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b create_attr.inner_map_fd = map->inner_map_fd; } + switch (def->type) { + case BPF_MAP_TYPE_PERF_EVENT_ARRAY: + case BPF_MAP_TYPE_CGROUP_ARRAY: + case BPF_MAP_TYPE_STACK_TRACE: + case BPF_MAP_TYPE_ARRAY_OF_MAPS: + case BPF_MAP_TYPE_HASH_OF_MAPS: + case BPF_MAP_TYPE_DEVMAP: + case BPF_MAP_TYPE_DEVMAP_HASH: + case BPF_MAP_TYPE_CPUMAP: + case BPF_MAP_TYPE_XSKMAP: + case BPF_MAP_TYPE_SOCKMAP: + case BPF_MAP_TYPE_SOCKHASH: + case BPF_MAP_TYPE_QUEUE: + case BPF_MAP_TYPE_STACK: + case BPF_MAP_TYPE_RINGBUF: + create_attr.btf_fd = 0; + create_attr.btf_key_type_id = 0; + create_attr.btf_value_type_id = 0; + map->btf_key_type_id = 0; + map->btf_value_type_id = 0; + default: + break; + } + if (obj->gen_loader) { bpf_gen__map_create(obj->gen_loader, &create_attr, is_inner ? -1 : map - obj->maps); /* Pretend to have valid FD to pass various fd >= 0 checks. From 7dde8f8f8d72ef084fb140c239fca9cf1cf79245 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Sat, 2 Oct 2021 06:47:54 +0530 Subject: [PATCH 61/72] libbpf: Support kernel module function calls This patch adds libbpf support for kernel module function call support. The fd_array parameter is used during BPF program load to pass module BTFs referenced by the program. insn->off is set to index into this array, but starts from 1, because insn->off as 0 is reserved for btf_vmlinux. We try to use existing insn->off for a module, since the kernel limits the maximum distinct module BTFs for kfuncs to 256, and also because index must never exceed the maximum allowed value that can fit in insn->off (INT16_MAX). In the future, if kernel interprets signed offset as unsigned for kfunc calls, this limit can be increased to UINT16_MAX. Also introduce a btf__find_by_name_kind_own helper to start searching from module BTF's start id when we know that the BTF ID is not present in vmlinux BTF (in find_ksym_btf_id). Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211002011757.311265-7-memxor@gmail.com --- src/bpf.c | 1 + src/btf.c | 18 +++++++++-- src/libbpf.c | 74 +++++++++++++++++++++++++++++++------------ src/libbpf_internal.h | 3 ++ 4 files changed, 72 insertions(+), 24 deletions(-) diff --git a/src/bpf.c b/src/bpf.c index 2401fad..7d1741c 100644 --- a/src/bpf.c +++ b/src/bpf.c @@ -264,6 +264,7 @@ int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr) attr.line_info_rec_size = load_attr->line_info_rec_size; attr.line_info_cnt = load_attr->line_info_cnt; attr.line_info = ptr_to_u64(load_attr->line_info); + attr.fd_array = ptr_to_u64(load_attr->fd_array); if (load_attr->name) memcpy(attr.prog_name, load_attr->name, diff --git a/src/btf.c b/src/btf.c index 6ad63e4..7774f99 100644 --- a/src/btf.c +++ b/src/btf.c @@ -695,15 +695,15 @@ __s32 btf__find_by_name(const struct btf *btf, const char *type_name) return libbpf_err(-ENOENT); } -__s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name, - __u32 kind) +static __s32 btf_find_by_name_kind(const struct btf *btf, int start_id, + const char *type_name, __u32 kind) { __u32 i, nr_types = btf__get_nr_types(btf); if (kind == BTF_KIND_UNKN || !strcmp(type_name, "void")) return 0; - for (i = 1; i <= nr_types; i++) { + for (i = start_id; i <= nr_types; i++) { const struct btf_type *t = btf__type_by_id(btf, i); const char *name; @@ -717,6 +717,18 @@ __s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name, return libbpf_err(-ENOENT); } +__s32 btf__find_by_name_kind_own(const struct btf *btf, const char *type_name, + __u32 kind) +{ + return btf_find_by_name_kind(btf, btf->start_id, type_name, kind); +} + +__s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name, + __u32 kind) +{ + return btf_find_by_name_kind(btf, 1, type_name, kind); +} + static bool btf_is_modifiable(const struct btf *btf) { return (void *)btf->hdr != btf->raw_data; diff --git a/src/libbpf.c b/src/libbpf.c index e23f1b6..ea1c51d 100644 --- a/src/libbpf.c +++ b/src/libbpf.c @@ -443,6 +443,11 @@ struct extern_desc { /* local btf_id of the ksym extern's type. */ __u32 type_id; + /* BTF fd index to be patched in for insn->off, this is + * 0 for vmlinux BTF, index in obj->fd_array for module + * BTF + */ + __s16 btf_fd_idx; } ksym; }; }; @@ -454,6 +459,7 @@ struct module_btf { char *name; __u32 id; int fd; + int fd_array_idx; }; struct bpf_object { @@ -539,6 +545,10 @@ struct bpf_object { void *priv; bpf_object_clear_priv_t clear_priv; + int *fd_array; + size_t fd_array_cap; + size_t fd_array_cnt; + char path[]; }; #define obj_elf_valid(o) ((o)->efile.elf) @@ -5407,6 +5417,7 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog) ext = &obj->externs[relo->sym_off]; insn[0].src_reg = BPF_PSEUDO_KFUNC_CALL; insn[0].imm = ext->ksym.kernel_btf_id; + insn[0].off = ext->ksym.btf_fd_idx; break; case RELO_SUBPROG_ADDR: if (insn[0].src_reg != BPF_PSEUDO_FUNC) { @@ -6236,6 +6247,7 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, } load_attr.log_level = prog->log_level; load_attr.prog_flags = prog->prog_flags; + load_attr.fd_array = prog->obj->fd_array; /* adjust load_attr if sec_def provides custom preload callback */ if (prog->sec_def && prog->sec_def->preload_fn) { @@ -6752,13 +6764,14 @@ out: static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name, __u16 kind, struct btf **res_btf, - int *res_btf_fd) + struct module_btf **res_mod_btf) { - int i, id, btf_fd, err; + struct module_btf *mod_btf; struct btf *btf; + int i, id, err; btf = obj->btf_vmlinux; - btf_fd = 0; + mod_btf = NULL; id = btf__find_by_name_kind(btf, ksym_name, kind); if (id == -ENOENT) { @@ -6767,10 +6780,10 @@ static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name, return err; for (i = 0; i < obj->btf_module_cnt; i++) { - btf = obj->btf_modules[i].btf; - /* we assume module BTF FD is always >0 */ - btf_fd = obj->btf_modules[i].fd; - id = btf__find_by_name_kind(btf, ksym_name, kind); + /* we assume module_btf's BTF FD is always >0 */ + mod_btf = &obj->btf_modules[i]; + btf = mod_btf->btf; + id = btf__find_by_name_kind_own(btf, ksym_name, kind); if (id != -ENOENT) break; } @@ -6779,7 +6792,7 @@ static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name, return -ESRCH; *res_btf = btf; - *res_btf_fd = btf_fd; + *res_mod_btf = mod_btf; return id; } @@ -6788,11 +6801,12 @@ static int bpf_object__resolve_ksym_var_btf_id(struct bpf_object *obj, { const struct btf_type *targ_var, *targ_type; __u32 targ_type_id, local_type_id; + struct module_btf *mod_btf = NULL; const char *targ_var_name; - int id, btf_fd = 0, err; struct btf *btf = NULL; + int id, err; - id = find_ksym_btf_id(obj, ext->name, BTF_KIND_VAR, &btf, &btf_fd); + id = find_ksym_btf_id(obj, ext->name, BTF_KIND_VAR, &btf, &mod_btf); if (id == -ESRCH && ext->is_weak) { return 0; } else if (id < 0) { @@ -6827,7 +6841,7 @@ static int bpf_object__resolve_ksym_var_btf_id(struct bpf_object *obj, } ext->is_set = true; - ext->ksym.kernel_btf_obj_fd = btf_fd; + ext->ksym.kernel_btf_obj_fd = mod_btf ? mod_btf->fd : 0; ext->ksym.kernel_btf_id = id; pr_debug("extern (var ksym) '%s': resolved to [%d] %s %s\n", ext->name, id, btf_kind_str(targ_var), targ_var_name); @@ -6839,26 +6853,20 @@ static int bpf_object__resolve_ksym_func_btf_id(struct bpf_object *obj, struct extern_desc *ext) { int local_func_proto_id, kfunc_proto_id, kfunc_id; + struct module_btf *mod_btf = NULL; const struct btf_type *kern_func; struct btf *kern_btf = NULL; - int ret, kern_btf_fd = 0; + int ret; local_func_proto_id = ext->ksym.type_id; - kfunc_id = find_ksym_btf_id(obj, ext->name, BTF_KIND_FUNC, - &kern_btf, &kern_btf_fd); + kfunc_id = find_ksym_btf_id(obj, ext->name, BTF_KIND_FUNC, &kern_btf, &mod_btf); if (kfunc_id < 0) { pr_warn("extern (func ksym) '%s': not found in kernel BTF\n", ext->name); return kfunc_id; } - if (kern_btf != obj->btf_vmlinux) { - pr_warn("extern (func ksym) '%s': function in kernel module is not supported\n", - ext->name); - return -ENOTSUP; - } - kern_func = btf__type_by_id(kern_btf, kfunc_id); kfunc_proto_id = kern_func->type; @@ -6870,9 +6878,30 @@ static int bpf_object__resolve_ksym_func_btf_id(struct bpf_object *obj, return -EINVAL; } + /* set index for module BTF fd in fd_array, if unset */ + if (mod_btf && !mod_btf->fd_array_idx) { + /* insn->off is s16 */ + if (obj->fd_array_cnt == INT16_MAX) { + pr_warn("extern (func ksym) '%s': module BTF fd index %d too big to fit in bpf_insn offset\n", + ext->name, mod_btf->fd_array_idx); + return -E2BIG; + } + /* Cannot use index 0 for module BTF fd */ + if (!obj->fd_array_cnt) + obj->fd_array_cnt = 1; + + ret = libbpf_ensure_mem((void **)&obj->fd_array, &obj->fd_array_cap, sizeof(int), + obj->fd_array_cnt + 1); + if (ret) + return ret; + mod_btf->fd_array_idx = obj->fd_array_cnt; + /* we assume module BTF FD is always >0 */ + obj->fd_array[obj->fd_array_cnt++] = mod_btf->fd; + } + ext->is_set = true; - ext->ksym.kernel_btf_obj_fd = kern_btf_fd; ext->ksym.kernel_btf_id = kfunc_id; + ext->ksym.btf_fd_idx = mod_btf ? mod_btf->fd_array_idx : 0; pr_debug("extern (func ksym) '%s': resolved to kernel [%d]\n", ext->name, kfunc_id); @@ -7031,6 +7060,9 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr) err = bpf_gen__finish(obj->gen_loader); } + /* clean up fd_array */ + zfree(&obj->fd_array); + /* clean up module BTFs */ for (i = 0; i < obj->btf_module_cnt; i++) { close(obj->btf_modules[i].fd); diff --git a/src/libbpf_internal.h b/src/libbpf_internal.h index ec79400..f7fd394 100644 --- a/src/libbpf_internal.h +++ b/src/libbpf_internal.h @@ -298,6 +298,7 @@ struct bpf_prog_load_params { __u32 log_level; char *log_buf; size_t log_buf_sz; + int *fd_array; }; int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr); @@ -408,6 +409,8 @@ int btf_type_visit_type_ids(struct btf_type *t, type_id_visit_fn visit, void *ct int btf_type_visit_str_offs(struct btf_type *t, str_off_visit_fn visit, void *ctx); int btf_ext_visit_type_ids(struct btf_ext *btf_ext, type_id_visit_fn visit, void *ctx); int btf_ext_visit_str_offs(struct btf_ext *btf_ext, str_off_visit_fn visit, void *ctx); +__s32 btf__find_by_name_kind_own(const struct btf *btf, const char *type_name, + __u32 kind); extern enum libbpf_strict_mode libbpf_mode; From 0141d9ddeda60283cc00f5efceaa930834acefb6 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Sat, 2 Oct 2021 06:47:55 +0530 Subject: [PATCH 62/72] libbpf: Resolve invalid weak kfunc calls with imm = 0, off = 0 Preserve these calls as it allows verifier to succeed in loading the program if they are determined to be unreachable after dead code elimination during program load. If not, the verifier will fail at runtime. This is done for ext->is_weak symbols similar to the case for variable ksyms. Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211002011757.311265-8-memxor@gmail.com --- src/libbpf.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/src/libbpf.c b/src/libbpf.c index ea1c51d..092cf4b 100644 --- a/src/libbpf.c +++ b/src/libbpf.c @@ -3439,11 +3439,6 @@ static int bpf_object__collect_externs(struct bpf_object *obj) return -ENOTSUP; } } else if (strcmp(sec_name, KSYMS_SEC) == 0) { - if (btf_is_func(t) && ext->is_weak) { - pr_warn("extern weak function %s is unsupported\n", - ext->name); - return -ENOTSUP; - } ksym_sec = sec; ext->type = EXT_KSYM; skip_mods_and_typedefs(obj->btf, t->type, @@ -5416,8 +5411,13 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog) case RELO_EXTERN_FUNC: ext = &obj->externs[relo->sym_off]; insn[0].src_reg = BPF_PSEUDO_KFUNC_CALL; - insn[0].imm = ext->ksym.kernel_btf_id; - insn[0].off = ext->ksym.btf_fd_idx; + if (ext->is_set) { + insn[0].imm = ext->ksym.kernel_btf_id; + insn[0].off = ext->ksym.btf_fd_idx; + } else { /* unresolved weak kfunc */ + insn[0].imm = 0; + insn[0].off = 0; + } break; case RELO_SUBPROG_ADDR: if (insn[0].src_reg != BPF_PSEUDO_FUNC) { @@ -6807,9 +6807,9 @@ static int bpf_object__resolve_ksym_var_btf_id(struct bpf_object *obj, int id, err; id = find_ksym_btf_id(obj, ext->name, BTF_KIND_VAR, &btf, &mod_btf); - if (id == -ESRCH && ext->is_weak) { - return 0; - } else if (id < 0) { + if (id < 0) { + if (id == -ESRCH && ext->is_weak) + return 0; pr_warn("extern (var ksym) '%s': not found in kernel BTF\n", ext->name); return id; @@ -6862,7 +6862,9 @@ static int bpf_object__resolve_ksym_func_btf_id(struct bpf_object *obj, kfunc_id = find_ksym_btf_id(obj, ext->name, BTF_KIND_FUNC, &kern_btf, &mod_btf); if (kfunc_id < 0) { - pr_warn("extern (func ksym) '%s': not found in kernel BTF\n", + if (kfunc_id == -ESRCH && ext->is_weak) + return 0; + pr_warn("extern (func ksym) '%s': not found in kernel or module BTFs\n", ext->name); return kfunc_id; } From 329839374885ecbdfe29b18e3418c9bc7c296949 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Sat, 2 Oct 2021 06:47:56 +0530 Subject: [PATCH 63/72] libbpf: Update gen_loader to emit BTF_KIND_FUNC relocations This change updates the BPF syscall loader to relocate BTF_KIND_FUNC relocations, with support for weak kfunc relocations. The general idea is to move map_fds to loader map, and also use the data for storing kfunc BTF fds. Since both reuse the fd_array parameter, they need to be kept together. For map_fds, we reserve MAX_USED_MAPS slots in a region, and for kfunc, we reserve MAX_KFUNC_DESCS. This is done so that insn->off has more chances of being <= INT16_MAX than treating data map as a sparse array and adding fd as needed. When the MAX_KFUNC_DESCS limit is reached, we fall back to the sparse array model, so that as long as it does remain <= INT16_MAX, we pass an index relative to the start of fd_array. We store all ksyms in an array where we try to avoid calling the bpf_btf_find_by_name_kind helper, and also reuse the BTF fd that was already stored. This also speeds up the loading process compared to emitting calls in all cases, in later tests. Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211002011757.311265-9-memxor@gmail.com --- src/bpf_gen_internal.h | 16 ++- src/gen_loader.c | 314 ++++++++++++++++++++++++++++++++++------- src/libbpf.c | 8 +- 3 files changed, 280 insertions(+), 58 deletions(-) diff --git a/src/bpf_gen_internal.h b/src/bpf_gen_internal.h index 6154003..70eccbf 100644 --- a/src/bpf_gen_internal.h +++ b/src/bpf_gen_internal.h @@ -7,6 +7,15 @@ struct ksym_relo_desc { const char *name; int kind; int insn_idx; + bool is_weak; +}; + +struct ksym_desc { + const char *name; + int ref; + int kind; + int off; + int insn; }; struct bpf_gen { @@ -24,6 +33,10 @@ struct bpf_gen { int relo_cnt; char attach_target[128]; int attach_kind; + struct ksym_desc *ksyms; + __u32 nr_ksyms; + int fd_array; + int nr_fd_array; }; void bpf_gen__init(struct bpf_gen *gen, int log_level); @@ -36,6 +49,7 @@ void bpf_gen__prog_load(struct bpf_gen *gen, struct bpf_prog_load_params *load_a void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *value, __u32 value_size); void bpf_gen__map_freeze(struct bpf_gen *gen, int map_idx); void bpf_gen__record_attach_target(struct bpf_gen *gen, const char *name, enum bpf_attach_type type); -void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, int kind, int insn_idx); +void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, bool is_weak, int kind, + int insn_idx); #endif diff --git a/src/gen_loader.c b/src/gen_loader.c index 80087b1..937bfc7 100644 --- a/src/gen_loader.c +++ b/src/gen_loader.c @@ -14,8 +14,10 @@ #include "bpf_gen_internal.h" #include "skel_internal.h" -#define MAX_USED_MAPS 64 -#define MAX_USED_PROGS 32 +#define MAX_USED_MAPS 64 +#define MAX_USED_PROGS 32 +#define MAX_KFUNC_DESCS 256 +#define MAX_FD_ARRAY_SZ (MAX_USED_PROGS + MAX_KFUNC_DESCS) /* The following structure describes the stack layout of the loader program. * In addition R6 contains the pointer to context. @@ -30,7 +32,6 @@ */ struct loader_stack { __u32 btf_fd; - __u32 map_fd[MAX_USED_MAPS]; __u32 prog_fd[MAX_USED_PROGS]; __u32 inner_map_fd; }; @@ -143,13 +144,49 @@ static int add_data(struct bpf_gen *gen, const void *data, __u32 size) if (realloc_data_buf(gen, size8)) return 0; prev = gen->data_cur; - memcpy(gen->data_cur, data, size); - gen->data_cur += size; - memcpy(gen->data_cur, &zero, size8 - size); - gen->data_cur += size8 - size; + if (data) { + memcpy(gen->data_cur, data, size); + memcpy(gen->data_cur + size, &zero, size8 - size); + } else { + memset(gen->data_cur, 0, size8); + } + gen->data_cur += size8; return prev - gen->data_start; } +/* Get index for map_fd/btf_fd slot in reserved fd_array, or in data relative + * to start of fd_array. Caller can decide if it is usable or not. + */ +static int add_map_fd(struct bpf_gen *gen) +{ + if (!gen->fd_array) + gen->fd_array = add_data(gen, NULL, MAX_FD_ARRAY_SZ * sizeof(int)); + if (gen->nr_maps == MAX_USED_MAPS) { + pr_warn("Total maps exceeds %d\n", MAX_USED_MAPS); + gen->error = -E2BIG; + return 0; + } + return gen->nr_maps++; +} + +static int add_kfunc_btf_fd(struct bpf_gen *gen) +{ + int cur; + + if (!gen->fd_array) + gen->fd_array = add_data(gen, NULL, MAX_FD_ARRAY_SZ * sizeof(int)); + if (gen->nr_fd_array == MAX_KFUNC_DESCS) { + cur = add_data(gen, NULL, sizeof(int)); + return (cur - gen->fd_array) / sizeof(int); + } + return MAX_USED_MAPS + gen->nr_fd_array++; +} + +static int blob_fd_array_off(struct bpf_gen *gen, int index) +{ + return gen->fd_array + index * sizeof(int); +} + static int insn_bytes_to_bpf_size(__u32 sz) { switch (sz) { @@ -171,14 +208,22 @@ static void emit_rel_store(struct bpf_gen *gen, int off, int data) emit(gen, BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0)); } -/* *(u64 *)(blob + off) = (u64)(void *)(%sp + stack_off) */ -static void emit_rel_store_sp(struct bpf_gen *gen, int off, int stack_off) +static void move_blob2blob(struct bpf_gen *gen, int off, int size, int blob_off) { - emit(gen, BPF_MOV64_REG(BPF_REG_0, BPF_REG_10)); - emit(gen, BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, stack_off)); + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_2, BPF_PSEUDO_MAP_IDX_VALUE, + 0, 0, 0, blob_off)); + emit(gen, BPF_LDX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_0, BPF_REG_2, 0)); emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, 0, 0, 0, off)); - emit(gen, BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0)); + emit(gen, BPF_STX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_1, BPF_REG_0, 0)); +} + +static void move_blob2ctx(struct bpf_gen *gen, int ctx_off, int size, int blob_off) +{ + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, + 0, 0, 0, blob_off)); + emit(gen, BPF_LDX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_0, BPF_REG_1, 0)); + emit(gen, BPF_STX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_6, BPF_REG_0, ctx_off)); } static void move_ctx2blob(struct bpf_gen *gen, int off, int size, int ctx_off, @@ -326,11 +371,11 @@ int bpf_gen__finish(struct bpf_gen *gen) offsetof(struct bpf_prog_desc, prog_fd), 4, stack_off(prog_fd[i])); for (i = 0; i < gen->nr_maps; i++) - move_stack2ctx(gen, - sizeof(struct bpf_loader_ctx) + - sizeof(struct bpf_map_desc) * i + - offsetof(struct bpf_map_desc, map_fd), 4, - stack_off(map_fd[i])); + move_blob2ctx(gen, + sizeof(struct bpf_loader_ctx) + + sizeof(struct bpf_map_desc) * i + + offsetof(struct bpf_map_desc, map_fd), 4, + blob_fd_array_off(gen, i)); emit(gen, BPF_MOV64_IMM(BPF_REG_0, 0)); emit(gen, BPF_EXIT_INSN()); pr_debug("gen: finish %d\n", gen->error); @@ -390,7 +435,7 @@ void bpf_gen__map_create(struct bpf_gen *gen, { int attr_size = offsetofend(union bpf_attr, btf_vmlinux_value_type_id); bool close_inner_map_fd = false; - int map_create_attr; + int map_create_attr, idx; union bpf_attr attr; memset(&attr, 0, attr_size); @@ -467,9 +512,11 @@ void bpf_gen__map_create(struct bpf_gen *gen, gen->error = -EDOM; /* internal bug */ return; } else { - emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_7, - stack_off(map_fd[map_idx]))); - gen->nr_maps++; + /* add_map_fd does gen->nr_maps++ */ + idx = add_map_fd(gen); + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, + 0, 0, 0, blob_fd_array_off(gen, idx))); + emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_7, 0)); } if (close_inner_map_fd) emit_sys_close_stack(gen, stack_off(inner_map_fd)); @@ -511,8 +558,8 @@ static void emit_find_attach_target(struct bpf_gen *gen) */ } -void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, int kind, - int insn_idx) +void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, bool is_weak, + int kind, int insn_idx) { struct ksym_relo_desc *relo; @@ -524,38 +571,192 @@ void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, int kind, gen->relos = relo; relo += gen->relo_cnt; relo->name = name; + relo->is_weak = is_weak; relo->kind = kind; relo->insn_idx = insn_idx; gen->relo_cnt++; } -static void emit_relo(struct bpf_gen *gen, struct ksym_relo_desc *relo, int insns) +/* returns existing ksym_desc with ref incremented, or inserts a new one */ +static struct ksym_desc *get_ksym_desc(struct bpf_gen *gen, struct ksym_relo_desc *relo) { - int name, insn, len = strlen(relo->name) + 1; + struct ksym_desc *kdesc; - pr_debug("gen: emit_relo: %s at %d\n", relo->name, relo->insn_idx); - name = add_data(gen, relo->name, len); + for (int i = 0; i < gen->nr_ksyms; i++) { + if (!strcmp(gen->ksyms[i].name, relo->name)) { + gen->ksyms[i].ref++; + return &gen->ksyms[i]; + } + } + kdesc = libbpf_reallocarray(gen->ksyms, gen->nr_ksyms + 1, sizeof(*kdesc)); + if (!kdesc) { + gen->error = -ENOMEM; + return NULL; + } + gen->ksyms = kdesc; + kdesc = &gen->ksyms[gen->nr_ksyms++]; + kdesc->name = relo->name; + kdesc->kind = relo->kind; + kdesc->ref = 1; + kdesc->off = 0; + kdesc->insn = 0; + return kdesc; +} +/* Overwrites BPF_REG_{0, 1, 2, 3, 4, 7} + * Returns result in BPF_REG_7 + */ +static void emit_bpf_find_by_name_kind(struct bpf_gen *gen, struct ksym_relo_desc *relo) +{ + int name_off, len = strlen(relo->name) + 1; + + name_off = add_data(gen, relo->name, len); emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, - 0, 0, 0, name)); + 0, 0, 0, name_off)); emit(gen, BPF_MOV64_IMM(BPF_REG_2, len)); emit(gen, BPF_MOV64_IMM(BPF_REG_3, relo->kind)); emit(gen, BPF_MOV64_IMM(BPF_REG_4, 0)); emit(gen, BPF_EMIT_CALL(BPF_FUNC_btf_find_by_name_kind)); emit(gen, BPF_MOV64_REG(BPF_REG_7, BPF_REG_0)); debug_ret(gen, "find_by_name_kind(%s,%d)", relo->name, relo->kind); +} + +/* Expects: + * BPF_REG_8 - pointer to instruction + * + * We need to reuse BTF fd for same symbol otherwise each relocation takes a new + * index, while kernel limits total kfunc BTFs to 256. For duplicate symbols, + * this would mean a new BTF fd index for each entry. By pairing symbol name + * with index, we get the insn->imm, insn->off pairing that kernel uses for + * kfunc_tab, which becomes the effective limit even though all of them may + * share same index in fd_array (such that kfunc_btf_tab has 1 element). + */ +static void emit_relo_kfunc_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo, int insn) +{ + struct ksym_desc *kdesc; + int btf_fd_idx; + + kdesc = get_ksym_desc(gen, relo); + if (!kdesc) + return; + /* try to copy from existing bpf_insn */ + if (kdesc->ref > 1) { + move_blob2blob(gen, insn + offsetof(struct bpf_insn, imm), 4, + kdesc->insn + offsetof(struct bpf_insn, imm)); + move_blob2blob(gen, insn + offsetof(struct bpf_insn, off), 2, + kdesc->insn + offsetof(struct bpf_insn, off)); + goto log; + } + /* remember insn offset, so we can copy BTF ID and FD later */ + kdesc->insn = insn; + emit_bpf_find_by_name_kind(gen, relo); + if (!relo->is_weak) + emit_check_err(gen); + /* get index in fd_array to store BTF FD at */ + btf_fd_idx = add_kfunc_btf_fd(gen); + if (btf_fd_idx > INT16_MAX) { + pr_warn("BTF fd off %d for kfunc %s exceeds INT16_MAX, cannot process relocation\n", + btf_fd_idx, relo->name); + gen->error = -E2BIG; + return; + } + kdesc->off = btf_fd_idx; + /* set a default value for imm */ + emit(gen, BPF_ST_MEM(BPF_W, BPF_REG_8, offsetof(struct bpf_insn, imm), 0)); + /* skip success case store if ret < 0 */ + emit(gen, BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0, 1)); + /* store btf_id into insn[insn_idx].imm */ + emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_7, offsetof(struct bpf_insn, imm))); + /* load fd_array slot pointer */ + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE, + 0, 0, 0, blob_fd_array_off(gen, btf_fd_idx))); + /* skip store of BTF fd if ret < 0 */ + emit(gen, BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0, 3)); + /* store BTF fd in slot */ + emit(gen, BPF_MOV64_REG(BPF_REG_9, BPF_REG_7)); + emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_9, 32)); + emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_9, 0)); + /* set a default value for off */ + emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), 0)); + /* skip insn->off store if ret < 0 */ + emit(gen, BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0, 2)); + /* skip if vmlinux BTF */ + emit(gen, BPF_JMP_IMM(BPF_JEQ, BPF_REG_9, 0, 1)); + /* store index into insn[insn_idx].off */ + emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), btf_fd_idx)); +log: + if (!gen->log_level) + return; + emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_8, + offsetof(struct bpf_insn, imm))); + emit(gen, BPF_LDX_MEM(BPF_H, BPF_REG_9, BPF_REG_8, + offsetof(struct bpf_insn, off))); + debug_regs(gen, BPF_REG_7, BPF_REG_9, " func (%s:count=%d): imm: %%d, off: %%d", + relo->name, kdesc->ref); + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE, + 0, 0, 0, blob_fd_array_off(gen, kdesc->off))); + emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_0, 0)); + debug_regs(gen, BPF_REG_9, -1, " func (%s:count=%d): btf_fd", + relo->name, kdesc->ref); +} + +/* Expects: + * BPF_REG_8 - pointer to instruction + */ +static void emit_relo_ksym_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo, int insn) +{ + struct ksym_desc *kdesc; + + kdesc = get_ksym_desc(gen, relo); + if (!kdesc) + return; + /* try to copy from existing ldimm64 insn */ + if (kdesc->ref > 1) { + move_blob2blob(gen, insn + offsetof(struct bpf_insn, imm), 4, + kdesc->insn + offsetof(struct bpf_insn, imm)); + move_blob2blob(gen, insn + sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm), 4, + kdesc->insn + sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm)); + goto log; + } + /* remember insn offset, so we can copy BTF ID and FD later */ + kdesc->insn = insn; + emit_bpf_find_by_name_kind(gen, relo); emit_check_err(gen); /* store btf_id into insn[insn_idx].imm */ - insn = insns + sizeof(struct bpf_insn) * relo->insn_idx + - offsetof(struct bpf_insn, imm); - emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE, - 0, 0, 0, insn)); - emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, 0)); - if (relo->kind == BTF_KIND_VAR) { - /* store btf_obj_fd into insn[insn_idx + 1].imm */ - emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_7, 32)); - emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, - sizeof(struct bpf_insn))); + emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_7, offsetof(struct bpf_insn, imm))); + /* store btf_obj_fd into insn[insn_idx + 1].imm */ + emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_7, 32)); + emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_7, + sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm))); +log: + if (!gen->log_level) + return; + emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_8, + offsetof(struct bpf_insn, imm))); + emit(gen, BPF_LDX_MEM(BPF_H, BPF_REG_9, BPF_REG_8, sizeof(struct bpf_insn) + + offsetof(struct bpf_insn, imm))); + debug_regs(gen, BPF_REG_7, BPF_REG_9, " var (%s:count=%d): imm: %%d, fd: %%d", + relo->name, kdesc->ref); +} + +static void emit_relo(struct bpf_gen *gen, struct ksym_relo_desc *relo, int insns) +{ + int insn; + + pr_debug("gen: emit_relo (%d): %s at %d\n", relo->kind, relo->name, relo->insn_idx); + insn = insns + sizeof(struct bpf_insn) * relo->insn_idx; + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_8, BPF_PSEUDO_MAP_IDX_VALUE, 0, 0, 0, insn)); + switch (relo->kind) { + case BTF_KIND_VAR: + emit_relo_ksym_btf(gen, relo, insn); + break; + case BTF_KIND_FUNC: + emit_relo_kfunc_btf(gen, relo, insn); + break; + default: + pr_warn("Unknown relocation kind '%d'\n", relo->kind); + gen->error = -EDOM; + return; } } @@ -571,14 +772,22 @@ static void cleanup_relos(struct bpf_gen *gen, int insns) { int i, insn; - for (i = 0; i < gen->relo_cnt; i++) { - if (gen->relos[i].kind != BTF_KIND_VAR) - continue; - /* close fd recorded in insn[insn_idx + 1].imm */ - insn = insns + - sizeof(struct bpf_insn) * (gen->relos[i].insn_idx + 1) + - offsetof(struct bpf_insn, imm); - emit_sys_close_blob(gen, insn); + for (i = 0; i < gen->nr_ksyms; i++) { + if (gen->ksyms[i].kind == BTF_KIND_VAR) { + /* close fd recorded in insn[insn_idx + 1].imm */ + insn = gen->ksyms[i].insn; + insn += sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm); + emit_sys_close_blob(gen, insn); + } else { /* BTF_KIND_FUNC */ + emit_sys_close_blob(gen, blob_fd_array_off(gen, gen->ksyms[i].off)); + if (gen->ksyms[i].off < MAX_FD_ARRAY_SZ) + gen->nr_fd_array--; + } + } + if (gen->nr_ksyms) { + free(gen->ksyms); + gen->nr_ksyms = 0; + gen->ksyms = NULL; } if (gen->relo_cnt) { free(gen->relos); @@ -637,9 +846,8 @@ void bpf_gen__prog_load(struct bpf_gen *gen, /* populate union bpf_attr with a pointer to line_info */ emit_rel_store(gen, attr_field(prog_load_attr, line_info), line_info); - /* populate union bpf_attr fd_array with a pointer to stack where map_fds are saved */ - emit_rel_store_sp(gen, attr_field(prog_load_attr, fd_array), - stack_off(map_fd[0])); + /* populate union bpf_attr fd_array with a pointer to data where map_fds are saved */ + emit_rel_store(gen, attr_field(prog_load_attr, fd_array), gen->fd_array); /* populate union bpf_attr with user provided log details */ move_ctx2blob(gen, attr_field(prog_load_attr, log_level), 4, @@ -706,8 +914,8 @@ void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *pvalue, emit(gen, BPF_EMIT_CALL(BPF_FUNC_copy_from_user)); map_update_attr = add_data(gen, &attr, attr_size); - move_stack2blob(gen, attr_field(map_update_attr, map_fd), 4, - stack_off(map_fd[map_idx])); + move_blob2blob(gen, attr_field(map_update_attr, map_fd), 4, + blob_fd_array_off(gen, map_idx)); emit_rel_store(gen, attr_field(map_update_attr, key), key); emit_rel_store(gen, attr_field(map_update_attr, value), value); /* emit MAP_UPDATE_ELEM command */ @@ -725,8 +933,8 @@ void bpf_gen__map_freeze(struct bpf_gen *gen, int map_idx) memset(&attr, 0, attr_size); pr_debug("gen: map_freeze: idx %d\n", map_idx); map_freeze_attr = add_data(gen, &attr, attr_size); - move_stack2blob(gen, attr_field(map_freeze_attr, map_fd), 4, - stack_off(map_fd[map_idx])); + move_blob2blob(gen, attr_field(map_freeze_attr, map_fd), 4, + blob_fd_array_off(gen, map_idx)); /* emit MAP_FREEZE command */ emit_sys_bpf(gen, BPF_MAP_FREEZE, map_freeze_attr, attr_size); debug_ret(gen, "map_freeze"); diff --git a/src/libbpf.c b/src/libbpf.c index 092cf4b..f32fa51 100644 --- a/src/libbpf.c +++ b/src/libbpf.c @@ -6360,12 +6360,12 @@ static int bpf_program__record_externs(struct bpf_program *prog) ext->name); return -ENOTSUP; } - bpf_gen__record_extern(obj->gen_loader, ext->name, BTF_KIND_VAR, - relo->insn_idx); + bpf_gen__record_extern(obj->gen_loader, ext->name, ext->is_weak, + BTF_KIND_VAR, relo->insn_idx); break; case RELO_EXTERN_FUNC: - bpf_gen__record_extern(obj->gen_loader, ext->name, BTF_KIND_FUNC, - relo->insn_idx); + bpf_gen__record_extern(obj->gen_loader, ext->name, ext->is_weak, + BTF_KIND_FUNC, relo->insn_idx); break; default: continue; From 13ebb60ab66799abf9efd24f628e4f58963eec9e Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 5 Oct 2021 22:11:05 -0700 Subject: [PATCH 64/72] libbpf: Add API that copies all BTF types from one BTF object to another Add a bulk copying api, btf__add_btf(), that speeds up and simplifies appending entire contents of one BTF object to another one, taking care of copying BTF type data, adjusting resulting BTF type IDs according to their new locations in the destination BTF object, as well as copying and deduplicating all the referenced strings and updating all the string offsets in new BTF types as appropriate. This API is intended to be used from tools that are generating and otherwise manipulating BTFs generically, such as pahole. In pahole's case, this API is useful for speeding up parallelized BTF encoding, as it allows pahole to offload all the intricacies of BTF type copying to libbpf and handle the parallelization aspects of the process. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Song Liu Cc: Arnaldo Carvalho de Melo Link: https://lore.kernel.org/bpf/20211006051107.17921-2-andrii@kernel.org --- src/btf.c | 114 ++++++++++++++++++++++++++++++++++++++++++++++++- src/btf.h | 22 ++++++++++ src/libbpf.map | 1 + 3 files changed, 135 insertions(+), 2 deletions(-) diff --git a/src/btf.c b/src/btf.c index 7774f99..60fbd1c 100644 --- a/src/btf.c +++ b/src/btf.c @@ -189,12 +189,17 @@ int libbpf_ensure_mem(void **data, size_t *cap_cnt, size_t elem_sz, size_t need_ return 0; } +static void *btf_add_type_offs_mem(struct btf *btf, size_t add_cnt) +{ + return libbpf_add_mem((void **)&btf->type_offs, &btf->type_offs_cap, sizeof(__u32), + btf->nr_types, BTF_MAX_NR_TYPES, add_cnt); +} + static int btf_add_type_idx_entry(struct btf *btf, __u32 type_off) { __u32 *p; - p = libbpf_add_mem((void **)&btf->type_offs, &btf->type_offs_cap, sizeof(__u32), - btf->nr_types, BTF_MAX_NR_TYPES, 1); + p = btf_add_type_offs_mem(btf, 1); if (!p) return -ENOMEM; @@ -1703,6 +1708,111 @@ int btf__add_type(struct btf *btf, const struct btf *src_btf, const struct btf_t return btf_commit_type(btf, sz); } +static int btf_rewrite_type_ids(__u32 *type_id, void *ctx) +{ + struct btf *btf = ctx; + + if (!*type_id) /* nothing to do for VOID references */ + return 0; + + /* we haven't updated btf's type count yet, so + * btf->start_id + btf->nr_types - 1 is the type ID offset we should + * add to all newly added BTF types + */ + *type_id += btf->start_id + btf->nr_types - 1; + return 0; +} + +int btf__add_btf(struct btf *btf, const struct btf *src_btf) +{ + struct btf_pipe p = { .src = src_btf, .dst = btf }; + int data_sz, sz, cnt, i, err, old_strs_len; + __u32 *off; + void *t; + + /* appending split BTF isn't supported yet */ + if (src_btf->base_btf) + return libbpf_err(-ENOTSUP); + + /* deconstruct BTF, if necessary, and invalidate raw_data */ + if (btf_ensure_modifiable(btf)) + return libbpf_err(-ENOMEM); + + /* remember original strings section size if we have to roll back + * partial strings section changes + */ + old_strs_len = btf->hdr->str_len; + + data_sz = src_btf->hdr->type_len; + cnt = btf__get_nr_types(src_btf); + + /* pre-allocate enough memory for new types */ + t = btf_add_type_mem(btf, data_sz); + if (!t) + return libbpf_err(-ENOMEM); + + /* pre-allocate enough memory for type offset index for new types */ + off = btf_add_type_offs_mem(btf, cnt); + if (!off) + return libbpf_err(-ENOMEM); + + /* bulk copy types data for all types from src_btf */ + memcpy(t, src_btf->types_data, data_sz); + + for (i = 0; i < cnt; i++) { + sz = btf_type_size(t); + if (sz < 0) { + /* unlikely, has to be corrupted src_btf */ + err = sz; + goto err_out; + } + + /* fill out type ID to type offset mapping for lookups by type ID */ + *off = t - btf->types_data; + + /* add, dedup, and remap strings referenced by this BTF type */ + err = btf_type_visit_str_offs(t, btf_rewrite_str, &p); + if (err) + goto err_out; + + /* remap all type IDs referenced from this BTF type */ + err = btf_type_visit_type_ids(t, btf_rewrite_type_ids, btf); + if (err) + goto err_out; + + /* go to next type data and type offset index entry */ + t += sz; + off++; + } + + /* Up until now any of the copied type data was effectively invisible, + * so if we exited early before this point due to error, BTF would be + * effectively unmodified. There would be extra internal memory + * pre-allocated, but it would not be available for querying. But now + * that we've copied and rewritten all the data successfully, we can + * update type count and various internal offsets and sizes to + * "commit" the changes and made them visible to the outside world. + */ + btf->hdr->type_len += data_sz; + btf->hdr->str_off += data_sz; + btf->nr_types += cnt; + + /* return type ID of the first added BTF type */ + return btf->start_id + btf->nr_types - cnt; +err_out: + /* zero out preallocated memory as if it was just allocated with + * libbpf_add_mem() + */ + memset(btf->types_data + btf->hdr->type_len, 0, data_sz); + memset(btf->strs_data + old_strs_len, 0, btf->hdr->str_len - old_strs_len); + + /* and now restore original strings section size; types data size + * wasn't modified, so doesn't need restoring, see big comment above */ + btf->hdr->str_len = old_strs_len; + + return libbpf_err(err); +} + /* * Append new BTF_KIND_INT type with: * - *name* - non-empty, non-NULL type name; diff --git a/src/btf.h b/src/btf.h index 2cfe313..864eb51 100644 --- a/src/btf.h +++ b/src/btf.h @@ -173,6 +173,28 @@ LIBBPF_API int btf__find_str(struct btf *btf, const char *s); LIBBPF_API int btf__add_str(struct btf *btf, const char *s); LIBBPF_API int btf__add_type(struct btf *btf, const struct btf *src_btf, const struct btf_type *src_type); +/** + * @brief **btf__add_btf()** appends all the BTF types from *src_btf* into *btf* + * @param btf BTF object which all the BTF types and strings are added to + * @param src_btf BTF object which all BTF types and referenced strings are copied from + * @return BTF type ID of the first appended BTF type, or negative error code + * + * **btf__add_btf()** can be used to simply and efficiently append the entire + * contents of one BTF object to another one. All the BTF type data is copied + * over, all referenced type IDs are adjusted by adding a necessary ID offset. + * Only strings referenced from BTF types are copied over and deduplicated, so + * if there were some unused strings in *src_btf*, those won't be copied over, + * which is consistent with the general string deduplication semantics of BTF + * writing APIs. + * + * If any error is encountered during this process, the contents of *btf* is + * left intact, which means that **btf__add_btf()** follows the transactional + * semantics and the operation as a whole is all-or-nothing. + * + * *src_btf* has to be non-split BTF, as of now copying types from split BTF + * is not supported and will result in -ENOTSUP error code returned. + */ +LIBBPF_API int btf__add_btf(struct btf *btf, const struct btf *src_btf); LIBBPF_API int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding); LIBBPF_API int btf__add_float(struct btf *btf, const char *name, size_t byte_sz); diff --git a/src/libbpf.map b/src/libbpf.map index 9e649cf..f6b0db1 100644 --- a/src/libbpf.map +++ b/src/libbpf.map @@ -389,5 +389,6 @@ LIBBPF_0.5.0 { LIBBPF_0.6.0 { global: + btf__add_btf; btf__add_tag; } LIBBPF_0.5.0; From 744fd961c7a7d9d8fa1fe667ecf0cc54dd5fd525 Mon Sep 17 00:00:00 2001 From: Grant Seltzer Date: Mon, 4 Oct 2021 17:56:44 -0400 Subject: [PATCH 65/72] libbpf: Add API documentation convention guidelines This adds a section to the documentation for libbpf naming convention which describes how to document API features in libbpf, specifically the format of which API doc comments need to conform to. Signed-off-by: Grant Seltzer Signed-off-by: Andrii Nakryiko Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20211004215644.497327-1-grantseltzer@gmail.com --- docs/libbpf_naming_convention.rst | 40 +++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/docs/libbpf_naming_convention.rst b/docs/libbpf_naming_convention.rst index 9c68d50..f86360f 100644 --- a/docs/libbpf_naming_convention.rst +++ b/docs/libbpf_naming_convention.rst @@ -150,6 +150,46 @@ mirror of the mainline's version of libbpf for a stand-alone build. However, all changes to libbpf's code base must be upstreamed through the mainline kernel tree. + +API documentation convention +============================ + +The libbpf API is documented via comments above definitions in +header files. These comments can be rendered by doxygen and sphinx +for well organized html output. This section describes the +convention in which these comments should be formated. + +Here is an example from btf.h: + +.. code-block:: c + + /** + * @brief **btf__new()** creates a new instance of a BTF object from the raw + * bytes of an ELF's BTF section + * @param data raw bytes + * @param size number of bytes passed in `data` + * @return new BTF object instance which has to be eventually freed with + * **btf__free()** + * + * On error, error-code-encoded-as-pointer is returned, not a NULL. To extract + * error code from such a pointer `libbpf_get_error()` should be used. If + * `libbpf_set_strict_mode(LIBBPF_STRICT_CLEAN_PTRS)` is enabled, NULL is + * returned on error instead. In both cases thread-local `errno` variable is + * always set to error code as well. + */ + +The comment must start with a block comment of the form '/\*\*'. + +The documentation always starts with a @brief directive. This line is a short +description about this API. It starts with the name of the API, denoted in bold +like so: **api_name**. Please include an open and close parenthesis if this is a +function. Follow with the short description of the API. A longer form description +can be added below the last directive, at the bottom of the comment. + +Parameters are denoted with the @param directive, there should be one for each +parameter. If this is a function with a non-void return, use the @return directive +to document it. + License ------------------- From d665ca0bb0002d23ab98cc87e9d99c5673b8023f Mon Sep 17 00:00:00 2001 From: Hengqi Chen Date: Sun, 3 Oct 2021 00:10:00 +0800 Subject: [PATCH 66/72] libbpf: Deprecate bpf_object__unload() API since v0.6 BPF objects are not reloadable after unload. Users are expected to use bpf_object__close() to unload and free up resources in one operation. No need to expose bpf_object__unload() as a public API, deprecate it ([0]). Add bpf_object__unload() as an alias to internal bpf_object_unload() and replace all bpf_object__unload() uses to avoid compilation errors. [0] Closes: https://github.com/libbpf/libbpf/issues/290 Signed-off-by: Hengqi Chen Signed-off-by: Andrii Nakryiko Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20211002161000.3854559-1-hengqi.chen@gmail.com --- src/libbpf.c | 8 +++++--- src/libbpf.h | 1 + 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/libbpf.c b/src/libbpf.c index f32fa51..4b90878 100644 --- a/src/libbpf.c +++ b/src/libbpf.c @@ -6672,7 +6672,7 @@ bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz, return libbpf_ptr(__bpf_object__open(NULL, obj_buf, obj_buf_sz, &opts)); } -int bpf_object__unload(struct bpf_object *obj) +static int bpf_object_unload(struct bpf_object *obj) { size_t i; @@ -6691,6 +6691,8 @@ int bpf_object__unload(struct bpf_object *obj) return 0; } +int bpf_object__unload(struct bpf_object *obj) __attribute__((alias("bpf_object_unload"))); + static int bpf_object__sanitize_maps(struct bpf_object *obj) { struct bpf_map *m; @@ -7089,7 +7091,7 @@ out: if (obj->maps[i].pinned && !obj->maps[i].reused) bpf_map__unpin(&obj->maps[i], NULL); - bpf_object__unload(obj); + bpf_object_unload(obj); pr_warn("failed to load object '%s'\n", obj->path); return libbpf_err(err); } @@ -7698,7 +7700,7 @@ void bpf_object__close(struct bpf_object *obj) bpf_gen__free(obj->gen_loader); bpf_object__elf_finish(obj); - bpf_object__unload(obj); + bpf_object_unload(obj); btf__free(obj->btf); btf_ext__free(obj->btf_ext); diff --git a/src/libbpf.h b/src/libbpf.h index e35490c..df10d14 100644 --- a/src/libbpf.h +++ b/src/libbpf.h @@ -150,6 +150,7 @@ struct bpf_object_load_attr { /* Load/unload object into/from kernel */ LIBBPF_API int bpf_object__load(struct bpf_object *obj); LIBBPF_API int bpf_object__load_xattr(struct bpf_object_load_attr *attr); +LIBBPF_DEPRECATED_SINCE(0, 6, "bpf_object__unload() is deprecated, use bpf_object__close() instead") LIBBPF_API int bpf_object__unload(struct bpf_object *obj); LIBBPF_API const char *bpf_object__name(const struct bpf_object *obj); From d3169df7949d101830eaf63289d8c80a3e24731c Mon Sep 17 00:00:00 2001 From: Hengqi Chen Date: Mon, 4 Oct 2021 00:58:43 +0800 Subject: [PATCH 67/72] libbpf: Deprecate bpf_{map,program}__{prev,next} APIs since v0.7 Deprecate bpf_{map,program}__{prev,next} APIs. Replace them with a new set of APIs named bpf_object__{prev,next}_{program,map} which follow the libbpf API naming convention ([0]). No functionality changes. [0] Closes: https://github.com/libbpf/libbpf/issues/296 Signed-off-by: Hengqi Chen Signed-off-by: Andrii Nakryiko Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20211003165844.4054931-2-hengqi.chen@gmail.com --- src/libbpf.c | 24 ++++++++++++++++++++++++ src/libbpf.h | 35 +++++++++++++++++++++++------------ src/libbpf.map | 4 ++++ 3 files changed, 51 insertions(+), 12 deletions(-) diff --git a/src/libbpf.c b/src/libbpf.c index 4b90878..ed313fd 100644 --- a/src/libbpf.c +++ b/src/libbpf.c @@ -7834,6 +7834,12 @@ __bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj, struct bpf_program * bpf_program__next(struct bpf_program *prev, const struct bpf_object *obj) +{ + return bpf_object__next_program(obj, prev); +} + +struct bpf_program * +bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev) { struct bpf_program *prog = prev; @@ -7846,6 +7852,12 @@ bpf_program__next(struct bpf_program *prev, const struct bpf_object *obj) struct bpf_program * bpf_program__prev(struct bpf_program *next, const struct bpf_object *obj) +{ + return bpf_object__prev_program(obj, next); +} + +struct bpf_program * +bpf_object__prev_program(const struct bpf_object *obj, struct bpf_program *next) { struct bpf_program *prog = next; @@ -8778,6 +8790,12 @@ __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i) struct bpf_map * bpf_map__next(const struct bpf_map *prev, const struct bpf_object *obj) +{ + return bpf_object__next_map(obj, prev); +} + +struct bpf_map * +bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev) { if (prev == NULL) return obj->maps; @@ -8787,6 +8805,12 @@ bpf_map__next(const struct bpf_map *prev, const struct bpf_object *obj) struct bpf_map * bpf_map__prev(const struct bpf_map *next, const struct bpf_object *obj) +{ + return bpf_object__prev_map(obj, next); +} + +struct bpf_map * +bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *next) { if (next == NULL) { if (!obj->nr_maps) diff --git a/src/libbpf.h b/src/libbpf.h index df10d14..89ca9c8 100644 --- a/src/libbpf.h +++ b/src/libbpf.h @@ -190,16 +190,22 @@ LIBBPF_API int libbpf_find_vmlinux_btf_id(const char *name, /* Accessors of bpf_program */ struct bpf_program; -LIBBPF_API struct bpf_program *bpf_program__next(struct bpf_program *prog, - const struct bpf_object *obj); +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__next_program() instead") +struct bpf_program *bpf_program__next(struct bpf_program *prog, + const struct bpf_object *obj); +LIBBPF_API struct bpf_program * +bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prog); -#define bpf_object__for_each_program(pos, obj) \ - for ((pos) = bpf_program__next(NULL, (obj)); \ - (pos) != NULL; \ - (pos) = bpf_program__next((pos), (obj))) +#define bpf_object__for_each_program(pos, obj) \ + for ((pos) = bpf_object__next_program((obj), NULL); \ + (pos) != NULL; \ + (pos) = bpf_object__next_program((obj), (pos))) -LIBBPF_API struct bpf_program *bpf_program__prev(struct bpf_program *prog, - const struct bpf_object *obj); +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__prev_program() instead") +struct bpf_program *bpf_program__prev(struct bpf_program *prog, + const struct bpf_object *obj); +LIBBPF_API struct bpf_program * +bpf_object__prev_program(const struct bpf_object *obj, struct bpf_program *prog); typedef void (*bpf_program_clear_priv_t)(struct bpf_program *, void *); @@ -503,16 +509,21 @@ bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name); LIBBPF_API struct bpf_map * bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset); +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__next_map() instead") +struct bpf_map *bpf_map__next(const struct bpf_map *map, const struct bpf_object *obj); LIBBPF_API struct bpf_map * -bpf_map__next(const struct bpf_map *map, const struct bpf_object *obj); +bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *map); + #define bpf_object__for_each_map(pos, obj) \ - for ((pos) = bpf_map__next(NULL, (obj)); \ + for ((pos) = bpf_object__next_map((obj), NULL); \ (pos) != NULL; \ - (pos) = bpf_map__next((pos), (obj))) + (pos) = bpf_object__next_map((obj), (pos))) #define bpf_map__for_each bpf_object__for_each_map +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__prev_map() instead") +struct bpf_map *bpf_map__prev(const struct bpf_map *map, const struct bpf_object *obj); LIBBPF_API struct bpf_map * -bpf_map__prev(const struct bpf_map *map, const struct bpf_object *obj); +bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *map); /** * @brief **bpf_map__fd()** gets the file descriptor of the passed diff --git a/src/libbpf.map b/src/libbpf.map index f6b0db1..f270d25 100644 --- a/src/libbpf.map +++ b/src/libbpf.map @@ -389,6 +389,10 @@ LIBBPF_0.5.0 { LIBBPF_0.6.0 { global: + bpf_object__next_map; + bpf_object__next_program; + bpf_object__prev_map; + bpf_object__prev_program; btf__add_btf; btf__add_tag; } LIBBPF_0.5.0; From 7022527a7ba52ff31fe26f471d72552e0948d24f Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Thu, 30 Sep 2021 11:46:34 +0530 Subject: [PATCH 68/72] libbpf: Fix segfault in light skeleton for objects without BTF When fed an empty BPF object, bpftool gen skeleton -L crashes at btf__set_fd() since it assumes presence of obj->btf, however for the sequence below clang adds no .BTF section (hence no BTF). Reproducer: $ touch a.bpf.c $ clang -O2 -g -target bpf -c a.bpf.c $ bpftool gen skeleton -L a.bpf.o /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ /* THIS FILE IS AUTOGENERATED! */ struct a_bpf { struct bpf_loader_ctx ctx; Segmentation fault (core dumped) The same occurs for files compiled without BTF info, i.e. without clang's -g flag. Fixes: 67234743736a (libbpf: Generate loader program out of BPF ELF file.) Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210930061634.1840768-1-memxor@gmail.com --- src/libbpf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/libbpf.c b/src/libbpf.c index ed313fd..6eaff33 100644 --- a/src/libbpf.c +++ b/src/libbpf.c @@ -7057,7 +7057,8 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr) if (obj->gen_loader) { /* reset FDs */ - btf__set_fd(obj->btf, -1); + if (obj->btf) + btf__set_fd(obj->btf, -1); for (i = 0; i < obj->nr_maps; i++) obj->maps[i].fd = -1; if (!err) From b05bace770f0217e400fac7b3b61fd0fc2725f84 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 1 Oct 2021 11:59:10 -0700 Subject: [PATCH 69/72] libbpf: Fix memory leak in strset Free struct strset itself, not just its internal parts. Fixes: 90d76d3ececc ("libbpf: Extract internal set-of-strings datastructure APIs") Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20211001185910.86492-1-andrii@kernel.org --- src/strset.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/strset.c b/src/strset.c index 1fb8b49..ea65531 100644 --- a/src/strset.c +++ b/src/strset.c @@ -88,6 +88,7 @@ void strset__free(struct strset *set) hashmap__free(set->strs_hash); free(set->strs_data); + free(set); } size_t strset__data_size(const struct strset *set) From f9f6e92458899fee5d3d6c62c645755c25dd502d Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 6 Oct 2021 13:50:10 -0700 Subject: [PATCH 70/72] sync: latest libbpf changes from kernel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Syncing latest libbpf commits from kernel repository. Baseline bpf-next commit: 38261f369fb905552ebdd3feb9699c0788fd3371 Checkpoint bpf-next commit: 0e545dbaa2797133f57bf8387e8f74cd245cedea Baseline bpf commit: 571fa247ab411f3233eeaaf837c6e646a513b9f8 Checkpoint bpf commit: d0c6416bd7091647f6041599f396bfa19ae30368 Alexei Starovoitov (1): libbpf: Make gen_loader data aligned. Andrii Nakryiko (2): libbpf: Add API that copies all BTF types from one BTF object to another libbpf: Fix memory leak in strset Grant Seltzer (1): libbpf: Add API documentation convention guidelines Hengqi Chen (3): libbpf: Support uniform BTF-defined key/value specification across all BPF maps libbpf: Deprecate bpf_object__unload() API since v0.6 libbpf: Deprecate bpf_{map,program}__{prev,next} APIs since v0.7 Kumar Kartikeya Dwivedi (5): libbpf: Fix skel_internal.h to set errno on loader retval < 0 libbpf: Support kernel module function calls libbpf: Resolve invalid weak kfunc calls with imm = 0, off = 0 libbpf: Update gen_loader to emit BTF_KIND_FUNC relocations libbpf: Fix segfault in light skeleton for objects without BTF Toke Høiland-Jørgensen (1): libbpf: Properly ignore STT_SECTION symbols in legacy map definitions docs/libbpf_naming_convention.rst | 40 ++++ src/bpf.c | 1 + src/bpf_gen_internal.h | 16 +- src/btf.c | 132 ++++++++++++- src/btf.h | 22 +++ src/gen_loader.c | 317 +++++++++++++++++++++++++----- src/libbpf.c | 165 ++++++++++++---- src/libbpf.h | 36 ++-- src/libbpf.map | 5 + src/libbpf_internal.h | 3 + src/skel_internal.h | 6 +- src/strset.c | 1 + 12 files changed, 633 insertions(+), 111 deletions(-) -- 2.30.2 --- BPF-CHECKPOINT-COMMIT | 2 +- CHECKPOINT-COMMIT | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/BPF-CHECKPOINT-COMMIT b/BPF-CHECKPOINT-COMMIT index 88e7f95..06d8b5f 100644 --- a/BPF-CHECKPOINT-COMMIT +++ b/BPF-CHECKPOINT-COMMIT @@ -1 +1 @@ -571fa247ab411f3233eeaaf837c6e646a513b9f8 +d0c6416bd7091647f6041599f396bfa19ae30368 diff --git a/CHECKPOINT-COMMIT b/CHECKPOINT-COMMIT index 3e8a513..246f403 100644 --- a/CHECKPOINT-COMMIT +++ b/CHECKPOINT-COMMIT @@ -1 +1 @@ -38261f369fb905552ebdd3feb9699c0788fd3371 +0e545dbaa2797133f57bf8387e8f74cd245cedea From 133e3603ec68d1b447a9640b8f609942da152d1c Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Mon, 4 Oct 2021 17:48:56 +0800 Subject: [PATCH 71/72] libbpf: Support detecting and attaching of writable tracepoint program Program on writable tracepoint is BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, but its attachment is the same as BPF_PROG_TYPE_RAW_TRACEPOINT. Signed-off-by: Hou Tao Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211004094857.30868-3-hotforest@gmail.com --- src/libbpf.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/src/libbpf.c b/src/libbpf.c index 6eaff33..8ba1550 100644 --- a/src/libbpf.c +++ b/src/libbpf.c @@ -8078,6 +8078,8 @@ static const struct bpf_sec_def section_defs[] = { SEC_DEF("tp/", TRACEPOINT, 0, SEC_NONE, attach_tp), SEC_DEF("raw_tracepoint/", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), SEC_DEF("raw_tp/", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), + SEC_DEF("raw_tracepoint.w/", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp), + SEC_DEF("raw_tp.w/", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp), SEC_DEF("tp_btf/", TRACING, BPF_TRACE_RAW_TP, SEC_ATTACH_BTF, attach_trace), SEC_DEF("fentry/", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF, attach_trace), SEC_DEF("fmod_ret/", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF, attach_trace), @@ -9847,12 +9849,26 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(const struct bpf_program *pr static struct bpf_link *attach_raw_tp(const struct bpf_program *prog, long cookie) { - const char *tp_name; + static const char *const prefixes[] = { + "raw_tp/", + "raw_tracepoint/", + "raw_tp.w/", + "raw_tracepoint.w/", + }; + size_t i; + const char *tp_name = NULL; - if (str_has_pfx(prog->sec_name, "raw_tp/")) - tp_name = prog->sec_name + sizeof("raw_tp/") - 1; - else - tp_name = prog->sec_name + sizeof("raw_tracepoint/") - 1; + for (i = 0; i < ARRAY_SIZE(prefixes); i++) { + if (str_has_pfx(prog->sec_name, prefixes[i])) { + tp_name = prog->sec_name + strlen(prefixes[i]); + break; + } + } + if (!tp_name) { + pr_warn("prog '%s': invalid section name '%s'\n", + prog->name, prog->sec_name); + return libbpf_err_ptr(-EINVAL); + } return bpf_program__attach_raw_tracepoint(prog, tp_name); } From 92c1e61a605410b16d6330fdd4a7a4e03add86d4 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sun, 10 Oct 2021 23:39:37 -0700 Subject: [PATCH 72/72] sync: latest libbpf changes from kernel Syncing latest libbpf commits from kernel repository. Baseline bpf-next commit: 0e545dbaa2797133f57bf8387e8f74cd245cedea Checkpoint bpf-next commit: 5319255b8df9271474bc9027cabf82253934f28d Baseline bpf commit: d0c6416bd7091647f6041599f396bfa19ae30368 Checkpoint bpf commit: 8d6c414cd2fb74aa6812e9bfec6178f8246c4f3a Hou Tao (1): libbpf: Support detecting and attaching of writable tracepoint program src/libbpf.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) -- 2.30.2 --- BPF-CHECKPOINT-COMMIT | 2 +- CHECKPOINT-COMMIT | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/BPF-CHECKPOINT-COMMIT b/BPF-CHECKPOINT-COMMIT index 06d8b5f..6226e74 100644 --- a/BPF-CHECKPOINT-COMMIT +++ b/BPF-CHECKPOINT-COMMIT @@ -1 +1 @@ -d0c6416bd7091647f6041599f396bfa19ae30368 +8d6c414cd2fb74aa6812e9bfec6178f8246c4f3a diff --git a/CHECKPOINT-COMMIT b/CHECKPOINT-COMMIT index 246f403..4d3d8d8 100644 --- a/CHECKPOINT-COMMIT +++ b/CHECKPOINT-COMMIT @@ -1 +1 @@ -0e545dbaa2797133f57bf8387e8f74cd245cedea +5319255b8df9271474bc9027cabf82253934f28d