diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 23e2031..91c4388 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -133,6 +133,14 @@ enum bpf_map_type { BPF_MAP_TYPE_STACK, }; +/* Note that tracing related programs such as + * BPF_PROG_TYPE_{KPROBE,TRACEPOINT,PERF_EVENT,RAW_TRACEPOINT} + * are not subject to a stable API since kernel internal data + * structures can change from release to release and may + * therefore break existing tracing BPF programs. Tracing BPF + * programs correspond to /a/ specific kernel which is to be + * analyzed, and not /a/ specific kernel /and/ all future ones. + */ enum bpf_prog_type { BPF_PROG_TYPE_UNSPEC, BPF_PROG_TYPE_SOCKET_FILTER, @@ -232,6 +240,20 @@ enum bpf_attach_type { */ #define BPF_F_STRICT_ALIGNMENT (1U << 0) +/* If BPF_F_ANY_ALIGNMENT is used in BPF_PROF_LOAD command, the + * verifier will allow any alignment whatsoever. On platforms + * with strict alignment requirements for loads ands stores (such + * as sparc and mips) the verifier validates that all loads and + * stores provably follow this requirement. This flag turns that + * checking and enforcement off. + * + * It is mostly used for testing when we want to validate the + * context and memory access aspects of the verifier, but because + * of an unaligned access the alignment check would trigger before + * the one we are interested in. + */ +#define BPF_F_ANY_ALIGNMENT (1U << 1) + /* when bpf_ldimm64->src_reg == BPF_PSEUDO_MAP_FD, bpf_ldimm64->imm == fd */ #define BPF_PSEUDO_MAP_FD 1 @@ -329,7 +351,7 @@ union bpf_attr { __u32 log_level; /* verbosity level of verifier */ __u32 log_size; /* size of user buffer */ __aligned_u64 log_buf; /* user supplied buffer */ - __u32 kern_version; /* checked when prog_type=kprobe */ + __u32 kern_version; /* not used */ __u32 prog_flags; char prog_name[BPF_OBJ_NAME_LEN]; __u32 prog_ifindex; /* ifindex of netdev to prep for */ @@ -342,6 +364,9 @@ union bpf_attr { __u32 func_info_rec_size; /* userspace bpf_func_info size */ __aligned_u64 func_info; /* func info */ __u32 func_info_cnt; /* number of bpf_func_info records */ + __u32 line_info_rec_size; /* userspace bpf_line_info size */ + __aligned_u64 line_info; /* line info */ + __u32 line_info_cnt; /* number of bpf_line_info records */ }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -360,8 +385,11 @@ union bpf_attr { struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */ __u32 prog_fd; __u32 retval; - __u32 data_size_in; - __u32 data_size_out; + __u32 data_size_in; /* input: len of data_in */ + __u32 data_size_out; /* input/output: len of data_out + * returns ENOSPC if data_out + * is too small. + */ __aligned_u64 data_in; __aligned_u64 data_out; __u32 repeat; @@ -482,18 +510,6 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_map_pop_elem(struct bpf_map *map, void *value) - * Description - * Pop an element from *map*. - * Return - * 0 on success, or a negative error in case of failure. - * - * int bpf_map_peek_elem(struct bpf_map *map, void *value) - * Description - * Get an element from *map* without removing it. - * Return - * 0 on success, or a negative error in case of failure. - * * int bpf_probe_read(void *dst, u32 size, const void *src) * Description * For tracing programs, safely attempt to read *size* bytes from @@ -1917,9 +1933,9 @@ union bpf_attr { * is set to metric from route (IPv4/IPv6 only), and ifindex * is set to the device index of the nexthop from the FIB lookup. * - * *plen* argument is the size of the passed in struct. - * *flags* argument can be a combination of one or more of the - * following values: + * *plen* argument is the size of the passed in struct. + * *flags* argument can be a combination of one or more of the + * following values: * * **BPF_FIB_LOOKUP_DIRECT** * Do a direct table lookup vs full lookup using FIB @@ -1928,9 +1944,9 @@ union bpf_attr { * Perform lookup from an egress perspective (default is * ingress). * - * *ctx* is either **struct xdp_md** for XDP programs or - * **struct sk_buff** tc cls_act programs. - * Return + * *ctx* is either **struct xdp_md** for XDP programs or + * **struct sk_buff** tc cls_act programs. + * Return * * < 0 if any input argument is invalid * * 0 on success (packet is forwarded, nexthop neighbor exists) * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the @@ -2075,8 +2091,8 @@ union bpf_attr { * translated to a keycode using the rc keymap, and reported as * an input key down event. After a period a key up event is * generated. This period can be extended by calling either - * **bpf_rc_keydown** () again with the same values, or calling - * **bpf_rc_repeat** (). + * **bpf_rc_keydown**\ () again with the same values, or calling + * **bpf_rc_repeat**\ (). * * Some protocols include a toggle bit, in case the button was * released and pressed again between consecutive scancodes. @@ -2159,29 +2175,30 @@ union bpf_attr { * The *flags* meaning is specific for each map type, * and has to be 0 for cgroup local storage. * - * Depending on the bpf program type, a local storage area - * can be shared between multiple instances of the bpf program, + * Depending on the BPF program type, a local storage area + * can be shared between multiple instances of the BPF program, * running simultaneously. * * A user should care about the synchronization by himself. - * For example, by using the BPF_STX_XADD instruction to alter + * For example, by using the **BPF_STX_XADD** instruction to alter * the shared data. * Return - * Pointer to the local storage area. + * A pointer to the local storage area. * * int bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags) * Description - * Select a SO_REUSEPORT sk from a BPF_MAP_TYPE_REUSEPORT_ARRAY map - * It checks the selected sk is matching the incoming - * request in the skb. + * Select a **SO_REUSEPORT** socket from a + * **BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*. + * It checks the selected socket is matching the incoming + * request in the socket buffer. * Return * 0 on success, or a negative error in case of failure. * - * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags) + * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) * Description * Look for TCP socket matching *tuple*, optionally in a child * network namespace *netns*. The return value must be checked, - * and if non-NULL, released via **bpf_sk_release**\ (). + * and if non-**NULL**, released via **bpf_sk_release**\ (). * * The *ctx* should point to the context of the program, such as * the skb or socket (depending on the hook in use). This is used @@ -2194,12 +2211,14 @@ union bpf_attr { * **sizeof**\ (*tuple*\ **->ipv6**) * Look for an IPv6 socket. * - * If the *netns* is zero, then the socket lookup table in the - * netns associated with the *ctx* will be used. For the TC hooks, - * this in the netns of the device in the skb. For socket hooks, - * this in the netns of the socket. If *netns* is non-zero, then - * it specifies the ID of the netns relative to the netns - * associated with the *ctx*. + * If the *netns* is a negative signed 32-bit integer, then the + * socket lookup table in the netns associated with the *ctx* will + * will be used. For the TC hooks, this is the netns of the device + * in the skb. For socket hooks, this is the netns of the socket. + * If *netns* is any other signed 32-bit value greater than or + * equal to zero then it specifies the ID of the netns relative to + * the netns associated with the *ctx*. *netns* values beyond the + * range of 32-bit integers are reserved for future use. * * All values for *flags* are reserved for future usage, and must * be left at zero. @@ -2207,15 +2226,15 @@ union bpf_attr { * This helper is available only if the kernel was compiled with * **CONFIG_NET** configuration option. * Return - * Pointer to *struct bpf_sock*, or NULL in case of failure. - * For sockets with reuseport option, *struct bpf_sock* - * return is from reuse->socks[] using hash of the packet. + * Pointer to **struct bpf_sock**, or **NULL** in case of failure. + * For sockets with reuseport option, the **struct bpf_sock** + * result is from **reuse->socks**\ [] using the hash of the tuple. * - * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags) + * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) * Description * Look for UDP socket matching *tuple*, optionally in a child * network namespace *netns*. The return value must be checked, - * and if non-NULL, released via **bpf_sk_release**\ (). + * and if non-**NULL**, released via **bpf_sk_release**\ (). * * The *ctx* should point to the context of the program, such as * the skb or socket (depending on the hook in use). This is used @@ -2228,12 +2247,14 @@ union bpf_attr { * **sizeof**\ (*tuple*\ **->ipv6**) * Look for an IPv6 socket. * - * If the *netns* is zero, then the socket lookup table in the - * netns associated with the *ctx* will be used. For the TC hooks, - * this in the netns of the device in the skb. For socket hooks, - * this in the netns of the socket. If *netns* is non-zero, then - * it specifies the ID of the netns relative to the netns - * associated with the *ctx*. + * If the *netns* is a negative signed 32-bit integer, then the + * socket lookup table in the netns associated with the *ctx* will + * will be used. For the TC hooks, this is the netns of the device + * in the skb. For socket hooks, this is the netns of the socket. + * If *netns* is any other signed 32-bit value greater than or + * equal to zero then it specifies the ID of the netns relative to + * the netns associated with the *ctx*. *netns* values beyond the + * range of 32-bit integers are reserved for future use. * * All values for *flags* are reserved for future usage, and must * be left at zero. @@ -2241,33 +2262,71 @@ union bpf_attr { * This helper is available only if the kernel was compiled with * **CONFIG_NET** configuration option. * Return - * Pointer to *struct bpf_sock*, or NULL in case of failure. - * For sockets with reuseport option, *struct bpf_sock* - * return is from reuse->socks[] using hash of the packet. + * Pointer to **struct bpf_sock**, or **NULL** in case of failure. + * For sockets with reuseport option, the **struct bpf_sock** + * result is from **reuse->socks**\ [] using the hash of the tuple. * - * int bpf_sk_release(struct bpf_sock *sk) + * int bpf_sk_release(struct bpf_sock *sock) * Description - * Release the reference held by *sock*. *sock* must be a non-NULL - * pointer that was returned from bpf_sk_lookup_xxx\ (). + * Release the reference held by *sock*. *sock* must be a + * non-**NULL** pointer that was returned from + * **bpf_sk_lookup_xxx**\ (). * Return * 0 on success, or a negative error in case of failure. * + * int bpf_map_pop_elem(struct bpf_map *map, void *value) + * Description + * Pop an element from *map*. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_map_peek_elem(struct bpf_map *map, void *value) + * Description + * Get an element from *map* without removing it. + * Return + * 0 on success, or a negative error in case of failure. + * * int bpf_msg_push_data(struct sk_buff *skb, u32 start, u32 len, u64 flags) * Description - * For socket policies, insert *len* bytes into msg at offset + * For socket policies, insert *len* bytes into *msg* at offset * *start*. * * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a - * *msg* it may want to insert metadata or options into the msg. + * *msg* it may want to insert metadata or options into the *msg*. * This can later be read and used by any of the lower layer BPF * hooks. * * This helper may fail if under memory pressure (a malloc * fails) in these cases BPF programs will get an appropriate * error and BPF programs will need to handle them. - * * Return * 0 on success, or a negative error in case of failure. + * + * int bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 pop, u64 flags) + * Description + * Will remove *pop* bytes from a *msg* starting at byte *start*. + * This may result in **ENOMEM** errors under certain situations if + * an allocation and copy are required due to a full ring buffer. + * However, the helper will try to avoid doing the allocation + * if possible. Other errors can occur if input parameters are + * invalid either due to *start* byte not being valid part of *msg* + * payload and/or *pop* value being to large. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_rc_pointer_rel(void *ctx, s32 rel_x, s32 rel_y) + * Description + * This helper is used in programs implementing IR decoding, to + * report a successfully decoded pointer movement. + * + * The *ctx* should point to the lirc sample as passed into + * the program. + * + * This helper is only available is the kernel was compiled with + * the **CONFIG_BPF_LIRC_MODE2** configuration option set to + * "**y**". + * Return + * 0 */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2360,7 +2419,9 @@ union bpf_attr { FN(map_push_elem), \ FN(map_pop_elem), \ FN(map_peek_elem), \ - FN(msg_push_data), + FN(msg_push_data), \ + FN(msg_pop_data), \ + FN(rc_pointer_rel), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -2416,6 +2477,9 @@ enum bpf_func_id { /* BPF_FUNC_perf_event_output for sk_buff input context. */ #define BPF_F_CTXLEN_MASK (0xfffffULL << 32) +/* Current network namespace */ +#define BPF_F_CURRENT_NETNS (-1L) + /* Mode for BPF_FUNC_skb_adjust_room helper. */ enum bpf_adj_room_mode { BPF_ADJ_ROOM_NET, @@ -2433,6 +2497,12 @@ enum bpf_lwt_encap_mode { BPF_LWT_ENCAP_SEG6_INLINE }; +#define __bpf_md_ptr(type, name) \ +union { \ + type name; \ + __u64 :64; \ +} __attribute__((aligned(8))) + /* user accessible mirror of in-kernel sk_buff. * new fields can only be added to the end of this structure */ @@ -2467,8 +2537,9 @@ struct __sk_buff { /* ... here. */ __u32 data_meta; - struct bpf_flow_keys *flow_keys; + __bpf_md_ptr(struct bpf_flow_keys *, flow_keys); __u64 tstamp; + __u32 wire_len; }; struct bpf_tunnel_key { @@ -2584,8 +2655,8 @@ enum sk_action { * be added to the end of this structure */ struct sk_msg_md { - void *data; - void *data_end; + __bpf_md_ptr(void *, data); + __bpf_md_ptr(void *, data_end); __u32 family; __u32 remote_ip4; /* Stored in network byte order */ @@ -2594,6 +2665,7 @@ struct sk_msg_md { __u32 local_ip6[4]; /* Stored in network byte order */ __u32 remote_port; /* Stored in network byte order */ __u32 local_port; /* stored in host byte order */ + __u32 size; /* Total size of sk_msg */ }; struct sk_reuseport_md { @@ -2601,8 +2673,9 @@ struct sk_reuseport_md { * Start of directly accessible data. It begins from * the tcp/udp header. */ - void *data; - void *data_end; /* End of directly accessible data */ + __bpf_md_ptr(void *, data); + /* End of directly accessible data */ + __bpf_md_ptr(void *, data_end); /* * Total length of packet (starting from the tcp/udp header). * Note that the directly accessible bytes (data_end - data) @@ -2646,7 +2719,15 @@ struct bpf_prog_info { __u32 btf_id; __u32 func_info_rec_size; __aligned_u64 func_info; - __u32 func_info_cnt; + __u32 nr_func_info; + __u32 nr_line_info; + __aligned_u64 line_info; + __aligned_u64 jited_line_info; + __u32 nr_jited_line_info; + __u32 line_info_rec_size; + __u32 jited_line_info_rec_size; + __u32 nr_prog_tags; + __aligned_u64 prog_tags; } __attribute__((aligned(8))); struct bpf_map_info { @@ -2959,8 +3040,18 @@ struct bpf_flow_keys { }; struct bpf_func_info { - __u32 insn_offset; + __u32 insn_off; __u32 type_id; }; +#define BPF_LINE_INFO_LINE_NUM(line_col) ((line_col) >> 10) +#define BPF_LINE_INFO_LINE_COL(line_col) ((line_col) & 0x3ff) + +struct bpf_line_info { + __u32 insn_off; + __u32 file_name_off; + __u32 line_off; + __u32 line_col; +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h index 14f6694..7b7475e 100644 --- a/include/uapi/linux/btf.h +++ b/include/uapi/linux/btf.h @@ -34,7 +34,9 @@ struct btf_type { * bits 0-15: vlen (e.g. # of struct's members) * bits 16-23: unused * bits 24-27: kind (e.g. int, ptr, array...etc) - * bits 28-31: unused + * bits 28-30: unused + * bit 31: kind_flag, currently used by + * struct, union and fwd */ __u32 info; /* "size" is used by INT, ENUM, STRUCT and UNION. @@ -52,6 +54,7 @@ struct btf_type { #define BTF_INFO_KIND(info) (((info) >> 24) & 0x0f) #define BTF_INFO_VLEN(info) ((info) & 0xffff) +#define BTF_INFO_KFLAG(info) ((info) >> 31) #define BTF_KIND_UNKN 0 /* Unknown */ #define BTF_KIND_INT 1 /* Integer */ @@ -110,9 +113,22 @@ struct btf_array { struct btf_member { __u32 name_off; __u32 type; - __u32 offset; /* offset in bits */ + /* If the type info kind_flag is set, the btf_member offset + * contains both member bitfield size and bit offset. The + * bitfield size is set for bitfield members. If the type + * info kind_flag is not set, the offset contains only bit + * offset. + */ + __u32 offset; }; +/* If the struct/union type info kind_flag is set, the + * following two macros are used to access bitfield_size + * and bit_offset from btf_member.offset. + */ +#define BTF_MEMBER_BITFIELD_SIZE(val) ((val) >> 24) +#define BTF_MEMBER_BIT_OFFSET(val) ((val) & 0xffffff) + /* BTF_KIND_FUNC_PROTO is followed by multiple "struct btf_param". * The exact number of btf_param is stored in the vlen (of the * info in "struct btf_type"). diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h index 486ed1f..0a4d733 100644 --- a/include/uapi/linux/netlink.h +++ b/include/uapi/linux/netlink.h @@ -155,7 +155,7 @@ enum nlmsgerr_attrs { #define NETLINK_LIST_MEMBERSHIPS 9 #define NETLINK_CAP_ACK 10 #define NETLINK_EXT_ACK 11 -#define NETLINK_DUMP_STRICT_CHK 12 +#define NETLINK_GET_STRICT_CHK 12 struct nl_pktinfo { __u32 group; diff --git a/src/README.rst b/src/README.rst new file mode 100644 index 0000000..056f383 --- /dev/null +++ b/src/README.rst @@ -0,0 +1,139 @@ +.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +libbpf API naming convention +============================ + +libbpf API provides access to a few logically separated groups of +functions and types. Every group has its own naming convention +described here. It's recommended to follow these conventions whenever a +new function or type is added to keep libbpf API clean and consistent. + +All types and functions provided by libbpf API should have one of the +following prefixes: ``bpf_``, ``btf_``, ``libbpf_``. + +System call wrappers +-------------------- + +System call wrappers are simple wrappers for commands supported by +sys_bpf system call. These wrappers should go to ``bpf.h`` header file +and map one-on-one to corresponding commands. + +For example ``bpf_map_lookup_elem`` wraps ``BPF_MAP_LOOKUP_ELEM`` +command of sys_bpf, ``bpf_prog_attach`` wraps ``BPF_PROG_ATTACH``, etc. + +Objects +------- + +Another class of types and functions provided by libbpf API is "objects" +and functions to work with them. Objects are high-level abstractions +such as BPF program or BPF map. They're represented by corresponding +structures such as ``struct bpf_object``, ``struct bpf_program``, +``struct bpf_map``, etc. + +Structures are forward declared and access to their fields should be +provided via corresponding getters and setters rather than directly. + +These objects are associated with corresponding parts of ELF object that +contains compiled BPF programs. + +For example ``struct bpf_object`` represents ELF object itself created +from an ELF file or from a buffer, ``struct bpf_program`` represents a +program in ELF object and ``struct bpf_map`` is a map. + +Functions that work with an object have names built from object name, +double underscore and part that describes function purpose. + +For example ``bpf_object__open`` consists of the name of corresponding +object, ``bpf_object``, double underscore and ``open`` that defines the +purpose of the function to open ELF file and create ``bpf_object`` from +it. + +Another example: ``bpf_program__load`` is named for corresponding +object, ``bpf_program``, that is separated from other part of the name +by double underscore. + +All objects and corresponding functions other than BTF related should go +to ``libbpf.h``. BTF types and functions should go to ``btf.h``. + +Auxiliary functions +------------------- + +Auxiliary functions and types that don't fit well in any of categories +described above should have ``libbpf_`` prefix, e.g. +``libbpf_get_error`` or ``libbpf_prog_type_by_name``. + +libbpf ABI +========== + +libbpf can be both linked statically or used as DSO. To avoid possible +conflicts with other libraries an application is linked with, all +non-static libbpf symbols should have one of the prefixes mentioned in +API documentation above. See API naming convention to choose the right +name for a new symbol. + +Symbol visibility +----------------- + +libbpf follow the model when all global symbols have visibility "hidden" +by default and to make a symbol visible it has to be explicitly +attributed with ``LIBBPF_API`` macro. For example: + +.. code-block:: c + + LIBBPF_API int bpf_prog_get_fd_by_id(__u32 id); + +This prevents from accidentally exporting a symbol, that is not supposed +to be a part of ABI what, in turn, improves both libbpf developer- and +user-experiences. + +ABI versionning +--------------- + +To make future ABI extensions possible libbpf ABI is versioned. +Versioning is implemented by ``libbpf.map`` version script that is +passed to linker. + +Version name is ``LIBBPF_`` prefix + three-component numeric version, +starting from ``0.0.1``. + +Every time ABI is being changed, e.g. because a new symbol is added or +semantic of existing symbol is changed, ABI version should be bumped. + +For example, if current state of ``libbpf.map`` is: + +.. code-block:: + LIBBPF_0.0.1 { + global: + bpf_func_a; + bpf_func_b; + local: + \*; + }; + +, and a new symbol ``bpf_func_c`` is being introduced, then +``libbpf.map`` should be changed like this: + +.. code-block:: + LIBBPF_0.0.1 { + global: + bpf_func_a; + bpf_func_b; + local: + \*; + }; + LIBBPF_0.0.2 { + global: + bpf_func_c; + } LIBBPF_0.0.1; + +, where new version ``LIBBPF_0.0.2`` depends on the previous +``LIBBPF_0.0.1``. + +Format of version script and ways to handle ABI changes, including +incompatible ones, described in details in [1]. + +Links +===== + +[1] https://www.akkadia.org/drepper/dsohowto.pdf + (Chapter 3. Maintaining APIs and ABIs). diff --git a/src/bpf.c b/src/bpf.c index ce18221..3caaa34 100644 --- a/src/bpf.c +++ b/src/bpf.c @@ -173,11 +173,36 @@ int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name, -1); } +static void * +alloc_zero_tailing_info(const void *orecord, __u32 cnt, + __u32 actual_rec_size, __u32 expected_rec_size) +{ + __u64 info_len = actual_rec_size * cnt; + void *info, *nrecord; + int i; + + info = malloc(info_len); + if (!info) + return NULL; + + /* zero out bytes kernel does not understand */ + nrecord = info; + for (i = 0; i < cnt; i++) { + memcpy(nrecord, orecord, expected_rec_size); + memset(nrecord + expected_rec_size, 0, + actual_rec_size - expected_rec_size); + orecord += actual_rec_size; + nrecord += actual_rec_size; + } + + return info; +} + int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, char *log_buf, size_t log_buf_sz) { + void *finfo = NULL, *linfo = NULL; union bpf_attr attr; - void *finfo = NULL; __u32 name_len; int fd; @@ -201,53 +226,58 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, attr.func_info_rec_size = load_attr->func_info_rec_size; attr.func_info_cnt = load_attr->func_info_cnt; attr.func_info = ptr_to_u64(load_attr->func_info); + attr.line_info_rec_size = load_attr->line_info_rec_size; + attr.line_info_cnt = load_attr->line_info_cnt; + attr.line_info = ptr_to_u64(load_attr->line_info); memcpy(attr.prog_name, load_attr->name, min(name_len, BPF_OBJ_NAME_LEN - 1)); fd = sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); - if (fd >= 0 || !log_buf || !log_buf_sz) + if (fd >= 0) return fd; /* After bpf_prog_load, the kernel may modify certain attributes * to give user space a hint how to deal with loading failure. * Check to see whether we can make some changes and load again. */ - if (errno == E2BIG && attr.func_info_cnt && - attr.func_info_rec_size < load_attr->func_info_rec_size) { - __u32 actual_rec_size = load_attr->func_info_rec_size; - __u32 expected_rec_size = attr.func_info_rec_size; - __u32 finfo_cnt = load_attr->func_info_cnt; - __u64 finfo_len = actual_rec_size * finfo_cnt; - const void *orecord; - void *nrecord; - int i; + while (errno == E2BIG && (!finfo || !linfo)) { + if (!finfo && attr.func_info_cnt && + attr.func_info_rec_size < load_attr->func_info_rec_size) { + /* try with corrected func info records */ + finfo = alloc_zero_tailing_info(load_attr->func_info, + load_attr->func_info_cnt, + load_attr->func_info_rec_size, + attr.func_info_rec_size); + if (!finfo) + goto done; - finfo = malloc(finfo_len); - if (!finfo) - /* further try with log buffer won't help */ - return fd; + attr.func_info = ptr_to_u64(finfo); + attr.func_info_rec_size = load_attr->func_info_rec_size; + } else if (!linfo && attr.line_info_cnt && + attr.line_info_rec_size < + load_attr->line_info_rec_size) { + linfo = alloc_zero_tailing_info(load_attr->line_info, + load_attr->line_info_cnt, + load_attr->line_info_rec_size, + attr.line_info_rec_size); + if (!linfo) + goto done; - /* zero out bytes kernel does not understand */ - orecord = load_attr->func_info; - nrecord = finfo; - for (i = 0; i < load_attr->func_info_cnt; i++) { - memcpy(nrecord, orecord, expected_rec_size); - memset(nrecord + expected_rec_size, 0, - actual_rec_size - expected_rec_size); - orecord += actual_rec_size; - nrecord += actual_rec_size; + attr.line_info = ptr_to_u64(linfo); + attr.line_info_rec_size = load_attr->line_info_rec_size; + } else { + break; } - /* try with corrected func info records */ - attr.func_info = ptr_to_u64(finfo); - attr.func_info_rec_size = load_attr->func_info_rec_size; - fd = sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); - if (fd >= 0 || !log_buf || !log_buf_sz) + if (fd >= 0) goto done; } + if (!log_buf || !log_buf_sz) + goto done; + /* Try again with log */ attr.log_buf = ptr_to_u64(log_buf); attr.log_size = log_buf_sz; @@ -256,6 +286,7 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, fd = sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); done: free(finfo); + free(linfo); return fd; } @@ -279,9 +310,9 @@ int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, } int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, - size_t insns_cnt, int strict_alignment, - const char *license, __u32 kern_version, - char *log_buf, size_t log_buf_sz, int log_level) + size_t insns_cnt, __u32 prog_flags, const char *license, + __u32 kern_version, char *log_buf, size_t log_buf_sz, + int log_level) { union bpf_attr attr; @@ -295,7 +326,7 @@ int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, attr.log_level = log_level; log_buf[0] = 0; attr.kern_version = kern_version; - attr.prog_flags = strict_alignment ? BPF_F_STRICT_ALIGNMENT : 0; + attr.prog_flags = prog_flags; return sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); } @@ -463,6 +494,29 @@ int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size, return ret; } +int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr) +{ + union bpf_attr attr; + int ret; + + if (!test_attr->data_out && test_attr->data_size_out > 0) + return -EINVAL; + + bzero(&attr, sizeof(attr)); + attr.test.prog_fd = test_attr->prog_fd; + attr.test.data_in = ptr_to_u64(test_attr->data_in); + attr.test.data_out = ptr_to_u64(test_attr->data_out); + attr.test.data_size_in = test_attr->data_size_in; + attr.test.data_size_out = test_attr->data_size_out; + attr.test.repeat = test_attr->repeat; + + ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr)); + test_attr->data_size_out = attr.test.data_size_out; + test_attr->retval = attr.test.retval; + test_attr->duration = attr.test.duration; + return ret; +} + int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id) { union bpf_attr attr; diff --git a/src/bpf.h b/src/bpf.h index 09e8bbe..8f09de4 100644 --- a/src/bpf.h +++ b/src/bpf.h @@ -82,6 +82,9 @@ struct bpf_load_program_attr { __u32 func_info_rec_size; const void *func_info; __u32 func_info_cnt; + __u32 line_info_rec_size; + const void *line_info; + __u32 line_info_cnt; }; /* Flags to direct loading requirements */ @@ -98,7 +101,7 @@ LIBBPF_API int bpf_load_program(enum bpf_prog_type type, char *log_buf, size_t log_buf_sz); LIBBPF_API int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, - size_t insns_cnt, int strict_alignment, + size_t insns_cnt, __u32 prog_flags, const char *license, __u32 kern_version, char *log_buf, size_t log_buf_sz, int log_level); @@ -118,6 +121,25 @@ LIBBPF_API int bpf_prog_attach(int prog_fd, int attachable_fd, LIBBPF_API int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type); LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd, enum bpf_attach_type type); + +struct bpf_prog_test_run_attr { + int prog_fd; + int repeat; + const void *data_in; + __u32 data_size_in; + void *data_out; /* optional */ + __u32 data_size_out; /* in: max length of data_out + * out: length of data_out */ + __u32 retval; /* out: return code of the BPF program */ + __u32 duration; /* out: average per repetition in ns */ +}; + +LIBBPF_API int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr); + +/* + * bpf_prog_test_run does not check that data_out is large enough. Consider + * using bpf_prog_test_run_xattr instead. + */ LIBBPF_API int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size, void *data_out, __u32 *size_out, __u32 *retval, __u32 *duration); diff --git a/src/bpf_prog_linfo.c b/src/bpf_prog_linfo.c new file mode 100644 index 0000000..6978314 --- /dev/null +++ b/src/bpf_prog_linfo.c @@ -0,0 +1,249 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +/* Copyright (c) 2018 Facebook */ + +#include +#include +#include +#include +#include "libbpf.h" + +#ifndef min +#define min(x, y) ((x) < (y) ? (x) : (y)) +#endif + +struct bpf_prog_linfo { + void *raw_linfo; + void *raw_jited_linfo; + __u32 *nr_jited_linfo_per_func; + __u32 *jited_linfo_func_idx; + __u32 nr_linfo; + __u32 nr_jited_func; + __u32 rec_size; + __u32 jited_rec_size; +}; + +static int dissect_jited_func(struct bpf_prog_linfo *prog_linfo, + const __u64 *ksym_func, const __u32 *ksym_len) +{ + __u32 nr_jited_func, nr_linfo; + const void *raw_jited_linfo; + const __u64 *jited_linfo; + __u64 last_jited_linfo; + /* + * Index to raw_jited_linfo: + * i: Index for searching the next ksym_func + * prev_i: Index to the last found ksym_func + */ + __u32 i, prev_i; + __u32 f; /* Index to ksym_func */ + + raw_jited_linfo = prog_linfo->raw_jited_linfo; + jited_linfo = raw_jited_linfo; + if (ksym_func[0] != *jited_linfo) + goto errout; + + prog_linfo->jited_linfo_func_idx[0] = 0; + nr_jited_func = prog_linfo->nr_jited_func; + nr_linfo = prog_linfo->nr_linfo; + + for (prev_i = 0, i = 1, f = 1; + i < nr_linfo && f < nr_jited_func; + i++) { + raw_jited_linfo += prog_linfo->jited_rec_size; + last_jited_linfo = *jited_linfo; + jited_linfo = raw_jited_linfo; + + if (ksym_func[f] == *jited_linfo) { + prog_linfo->jited_linfo_func_idx[f] = i; + + /* Sanity check */ + if (last_jited_linfo - ksym_func[f - 1] + 1 > + ksym_len[f - 1]) + goto errout; + + prog_linfo->nr_jited_linfo_per_func[f - 1] = + i - prev_i; + prev_i = i; + + /* + * The ksym_func[f] is found in jited_linfo. + * Look for the next one. + */ + f++; + } else if (*jited_linfo <= last_jited_linfo) { + /* Ensure the addr is increasing _within_ a func */ + goto errout; + } + } + + if (f != nr_jited_func) + goto errout; + + prog_linfo->nr_jited_linfo_per_func[nr_jited_func - 1] = + nr_linfo - prev_i; + + return 0; + +errout: + return -EINVAL; +} + +void bpf_prog_linfo__free(struct bpf_prog_linfo *prog_linfo) +{ + if (!prog_linfo) + return; + + free(prog_linfo->raw_linfo); + free(prog_linfo->raw_jited_linfo); + free(prog_linfo->nr_jited_linfo_per_func); + free(prog_linfo->jited_linfo_func_idx); + free(prog_linfo); +} + +struct bpf_prog_linfo *bpf_prog_linfo__new(const struct bpf_prog_info *info) +{ + struct bpf_prog_linfo *prog_linfo; + __u32 nr_linfo, nr_jited_func; + + nr_linfo = info->nr_line_info; + + if (!nr_linfo) + return NULL; + + /* + * The min size that bpf_prog_linfo has to access for + * searching purpose. + */ + if (info->line_info_rec_size < + offsetof(struct bpf_line_info, file_name_off)) + return NULL; + + prog_linfo = calloc(1, sizeof(*prog_linfo)); + if (!prog_linfo) + return NULL; + + /* Copy xlated line_info */ + prog_linfo->nr_linfo = nr_linfo; + prog_linfo->rec_size = info->line_info_rec_size; + prog_linfo->raw_linfo = malloc(nr_linfo * prog_linfo->rec_size); + if (!prog_linfo->raw_linfo) + goto err_free; + memcpy(prog_linfo->raw_linfo, (void *)(long)info->line_info, + nr_linfo * prog_linfo->rec_size); + + nr_jited_func = info->nr_jited_ksyms; + if (!nr_jited_func || + !info->jited_line_info || + info->nr_jited_line_info != nr_linfo || + info->jited_line_info_rec_size < sizeof(__u64) || + info->nr_jited_func_lens != nr_jited_func || + !info->jited_ksyms || + !info->jited_func_lens) + /* Not enough info to provide jited_line_info */ + return prog_linfo; + + /* Copy jited_line_info */ + prog_linfo->nr_jited_func = nr_jited_func; + prog_linfo->jited_rec_size = info->jited_line_info_rec_size; + prog_linfo->raw_jited_linfo = malloc(nr_linfo * + prog_linfo->jited_rec_size); + if (!prog_linfo->raw_jited_linfo) + goto err_free; + memcpy(prog_linfo->raw_jited_linfo, + (void *)(long)info->jited_line_info, + nr_linfo * prog_linfo->jited_rec_size); + + /* Number of jited_line_info per jited func */ + prog_linfo->nr_jited_linfo_per_func = malloc(nr_jited_func * + sizeof(__u32)); + if (!prog_linfo->nr_jited_linfo_per_func) + goto err_free; + + /* + * For each jited func, + * the start idx to the "linfo" and "jited_linfo" array, + */ + prog_linfo->jited_linfo_func_idx = malloc(nr_jited_func * + sizeof(__u32)); + if (!prog_linfo->jited_linfo_func_idx) + goto err_free; + + if (dissect_jited_func(prog_linfo, + (__u64 *)(long)info->jited_ksyms, + (__u32 *)(long)info->jited_func_lens)) + goto err_free; + + return prog_linfo; + +err_free: + bpf_prog_linfo__free(prog_linfo); + return NULL; +} + +const struct bpf_line_info * +bpf_prog_linfo__lfind_addr_func(const struct bpf_prog_linfo *prog_linfo, + __u64 addr, __u32 func_idx, __u32 nr_skip) +{ + __u32 jited_rec_size, rec_size, nr_linfo, start, i; + const void *raw_jited_linfo, *raw_linfo; + const __u64 *jited_linfo; + + if (func_idx >= prog_linfo->nr_jited_func) + return NULL; + + nr_linfo = prog_linfo->nr_jited_linfo_per_func[func_idx]; + if (nr_skip >= nr_linfo) + return NULL; + + start = prog_linfo->jited_linfo_func_idx[func_idx] + nr_skip; + jited_rec_size = prog_linfo->jited_rec_size; + raw_jited_linfo = prog_linfo->raw_jited_linfo + + (start * jited_rec_size); + jited_linfo = raw_jited_linfo; + if (addr < *jited_linfo) + return NULL; + + nr_linfo -= nr_skip; + rec_size = prog_linfo->rec_size; + raw_linfo = prog_linfo->raw_linfo + (start * rec_size); + for (i = 0; i < nr_linfo; i++) { + if (addr < *jited_linfo) + break; + + raw_linfo += rec_size; + raw_jited_linfo += jited_rec_size; + jited_linfo = raw_jited_linfo; + } + + return raw_linfo - rec_size; +} + +const struct bpf_line_info * +bpf_prog_linfo__lfind(const struct bpf_prog_linfo *prog_linfo, + __u32 insn_off, __u32 nr_skip) +{ + const struct bpf_line_info *linfo; + __u32 rec_size, nr_linfo, i; + const void *raw_linfo; + + nr_linfo = prog_linfo->nr_linfo; + if (nr_skip >= nr_linfo) + return NULL; + + rec_size = prog_linfo->rec_size; + raw_linfo = prog_linfo->raw_linfo + (nr_skip * rec_size); + linfo = raw_linfo; + if (insn_off < linfo->insn_off) + return NULL; + + nr_linfo -= nr_skip; + for (i = 0; i < nr_linfo; i++) { + if (insn_off < linfo->insn_off) + break; + + raw_linfo += rec_size; + linfo = raw_linfo; + } + + return raw_linfo - rec_size; +} diff --git a/src/btf.c b/src/btf.c index 13ddc4b..d682d3b 100644 --- a/src/btf.c +++ b/src/btf.c @@ -37,18 +37,43 @@ struct btf { int fd; }; +struct btf_ext_info { + /* + * info points to a deep copy of the individual info section + * (e.g. func_info and line_info) from the .BTF.ext. + * It does not include the __u32 rec_size. + */ + void *info; + __u32 rec_size; + __u32 len; +}; + struct btf_ext { - void *func_info; - __u32 func_info_rec_size; - __u32 func_info_len; + struct btf_ext_info func_info; + struct btf_ext_info line_info; +}; + +struct btf_ext_info_sec { + __u32 sec_name_off; + __u32 num_info; + /* Followed by num_info * record_size number of bytes */ + __u8 data[0]; }; /* The minimum bpf_func_info checked by the loader */ struct bpf_func_info_min { - __u32 insn_offset; + __u32 insn_off; __u32 type_id; }; +/* The minimum bpf_line_info checked by the loader */ +struct bpf_line_info_min { + __u32 insn_off; + __u32 file_name_off; + __u32 line_off; + __u32 line_col; +}; + static inline __u64 ptr_to_u64(const void *ptr) { return (__u64) (unsigned long) ptr; @@ -415,7 +440,7 @@ const char *btf__name_by_offset(const struct btf *btf, __u32 offset) return NULL; } -int btf_get_from_id(__u32 id, struct btf **btf) +int btf__get_from_id(__u32 id, struct btf **btf) { struct bpf_btf_info btf_info = { 0 }; __u32 len = sizeof(btf_info); @@ -466,7 +491,7 @@ int btf_get_from_id(__u32 id, struct btf **btf) goto exit_free; } - *btf = btf__new((__u8 *)btf_info.btf, btf_info.btf_size, NULL); + *btf = btf__new((__u8 *)(long)btf_info.btf, btf_info.btf_size, NULL); if (IS_ERR(*btf)) { err = PTR_ERR(*btf); *btf = NULL; @@ -479,71 +504,147 @@ exit_free: return err; } -static int btf_ext_validate_func_info(const void *finfo, __u32 size, - btf_print_fn_t err_log) +struct btf_ext_sec_copy_param { + __u32 off; + __u32 len; + __u32 min_rec_size; + struct btf_ext_info *ext_info; + const char *desc; +}; + +static int btf_ext_copy_info(struct btf_ext *btf_ext, + __u8 *data, __u32 data_size, + struct btf_ext_sec_copy_param *ext_sec, + btf_print_fn_t err_log) { - int sec_hdrlen = sizeof(struct btf_sec_func_info); - __u32 size_left, num_records, record_size; - const struct btf_sec_func_info *sinfo; - __u64 total_record_size; + const struct btf_ext_header *hdr = (struct btf_ext_header *)data; + const struct btf_ext_info_sec *sinfo; + struct btf_ext_info *ext_info; + __u32 info_left, record_size; + /* The start of the info sec (including the __u32 record_size). */ + const void *info; - /* At least a func_info record size */ - if (size < sizeof(__u32)) { - elog("BTF.ext func_info record size not found"); + /* data and data_size do not include btf_ext_header from now on */ + data = data + hdr->hdr_len; + data_size -= hdr->hdr_len; + + if (ext_sec->off & 0x03) { + elog(".BTF.ext %s section is not aligned to 4 bytes\n", + ext_sec->desc); return -EINVAL; } - /* The record size needs to meet below minimum standard */ - record_size = *(__u32 *)finfo; - if (record_size < sizeof(struct bpf_func_info_min) || - record_size % sizeof(__u32)) { - elog("BTF.ext func_info invalid record size"); + if (data_size < ext_sec->off || + ext_sec->len > data_size - ext_sec->off) { + elog("%s section (off:%u len:%u) is beyond the end of the ELF section .BTF.ext\n", + ext_sec->desc, ext_sec->off, ext_sec->len); return -EINVAL; } - sinfo = finfo + sizeof(__u32); - size_left = size - sizeof(__u32); + info = data + ext_sec->off; + info_left = ext_sec->len; - /* If no func_info records, return failure now so .BTF.ext - * won't be used. - */ - if (!size_left) { - elog("BTF.ext no func info records"); + /* At least a record size */ + if (info_left < sizeof(__u32)) { + elog(".BTF.ext %s record size not found\n", ext_sec->desc); return -EINVAL; } - while (size_left) { - if (size_left < sec_hdrlen) { - elog("BTF.ext func_info header not found"); + /* The record size needs to meet the minimum standard */ + record_size = *(__u32 *)info; + if (record_size < ext_sec->min_rec_size || + record_size & 0x03) { + elog("%s section in .BTF.ext has invalid record size %u\n", + ext_sec->desc, record_size); + return -EINVAL; + } + + sinfo = info + sizeof(__u32); + info_left -= sizeof(__u32); + + /* If no records, return failure now so .BTF.ext won't be used. */ + if (!info_left) { + elog("%s section in .BTF.ext has no records", ext_sec->desc); + return -EINVAL; + } + + while (info_left) { + unsigned int sec_hdrlen = sizeof(struct btf_ext_info_sec); + __u64 total_record_size; + __u32 num_records; + + if (info_left < sec_hdrlen) { + elog("%s section header is not found in .BTF.ext\n", + ext_sec->desc); return -EINVAL; } - num_records = sinfo->num_func_info; + num_records = sinfo->num_info; if (num_records == 0) { - elog("incorrect BTF.ext num_func_info"); + elog("%s section has incorrect num_records in .BTF.ext\n", + ext_sec->desc); return -EINVAL; } total_record_size = sec_hdrlen + (__u64)num_records * record_size; - if (size_left < total_record_size) { - elog("incorrect BTF.ext num_func_info"); + if (info_left < total_record_size) { + elog("%s section has incorrect num_records in .BTF.ext\n", + ext_sec->desc); return -EINVAL; } - size_left -= total_record_size; + info_left -= total_record_size; sinfo = (void *)sinfo + total_record_size; } + ext_info = ext_sec->ext_info; + ext_info->len = ext_sec->len - sizeof(__u32); + ext_info->rec_size = record_size; + ext_info->info = malloc(ext_info->len); + if (!ext_info->info) + return -ENOMEM; + memcpy(ext_info->info, info + sizeof(__u32), ext_info->len); + return 0; } +static int btf_ext_copy_func_info(struct btf_ext *btf_ext, + __u8 *data, __u32 data_size, + btf_print_fn_t err_log) +{ + const struct btf_ext_header *hdr = (struct btf_ext_header *)data; + struct btf_ext_sec_copy_param param = { + .off = hdr->func_info_off, + .len = hdr->func_info_len, + .min_rec_size = sizeof(struct bpf_func_info_min), + .ext_info = &btf_ext->func_info, + .desc = "func_info" + }; + + return btf_ext_copy_info(btf_ext, data, data_size, ¶m, err_log); +} + +static int btf_ext_copy_line_info(struct btf_ext *btf_ext, + __u8 *data, __u32 data_size, + btf_print_fn_t err_log) +{ + const struct btf_ext_header *hdr = (struct btf_ext_header *)data; + struct btf_ext_sec_copy_param param = { + .off = hdr->line_info_off, + .len = hdr->line_info_len, + .min_rec_size = sizeof(struct bpf_line_info_min), + .ext_info = &btf_ext->line_info, + .desc = "line_info", + }; + + return btf_ext_copy_info(btf_ext, data, data_size, ¶m, err_log); +} + static int btf_ext_parse_hdr(__u8 *data, __u32 data_size, btf_print_fn_t err_log) { const struct btf_ext_header *hdr = (struct btf_ext_header *)data; - __u32 meta_left, last_func_info_pos; - void *finfo; if (data_size < offsetof(struct btf_ext_header, func_info_off) || data_size < hdr->hdr_len) { @@ -566,34 +667,12 @@ static int btf_ext_parse_hdr(__u8 *data, __u32 data_size, return -ENOTSUP; } - meta_left = data_size - hdr->hdr_len; - if (!meta_left) { + if (data_size == hdr->hdr_len) { elog("BTF.ext has no data\n"); return -EINVAL; } - if (meta_left < hdr->func_info_off) { - elog("Invalid BTF.ext func_info section offset:%u\n", - hdr->func_info_off); - return -EINVAL; - } - - if (hdr->func_info_off & 0x03) { - elog("BTF.ext func_info section is not aligned to 4 bytes\n"); - return -EINVAL; - } - - last_func_info_pos = hdr->hdr_len + hdr->func_info_off + - hdr->func_info_len; - if (last_func_info_pos > data_size) { - elog("Invalid BTF.ext func_info section size:%u\n", - hdr->func_info_len); - return -EINVAL; - } - - finfo = data + hdr->hdr_len + hdr->func_info_off; - return btf_ext_validate_func_info(finfo, hdr->func_info_len, - err_log); + return 0; } void btf_ext__free(struct btf_ext *btf_ext) @@ -601,16 +680,14 @@ void btf_ext__free(struct btf_ext *btf_ext) if (!btf_ext) return; - free(btf_ext->func_info); + free(btf_ext->func_info.info); + free(btf_ext->line_info.info); free(btf_ext); } struct btf_ext *btf_ext__new(__u8 *data, __u32 size, btf_print_fn_t err_log) { - const struct btf_ext_header *hdr; struct btf_ext *btf_ext; - void *org_fdata, *fdata; - __u32 hdrlen, size_u32; int err; err = btf_ext_parse_hdr(data, size, err_log); @@ -621,94 +698,38 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size, btf_print_fn_t err_log) if (!btf_ext) return ERR_PTR(-ENOMEM); - hdr = (const struct btf_ext_header *)data; - hdrlen = hdr->hdr_len; - size_u32 = sizeof(__u32); - fdata = malloc(hdr->func_info_len - size_u32); - if (!fdata) { - free(btf_ext); - return ERR_PTR(-ENOMEM); + err = btf_ext_copy_func_info(btf_ext, data, size, err_log); + if (err) { + btf_ext__free(btf_ext); + return ERR_PTR(err); } - /* remember record size and copy rest of func_info data */ - org_fdata = data + hdrlen + hdr->func_info_off; - btf_ext->func_info_rec_size = *(__u32 *)org_fdata; - memcpy(fdata, org_fdata + size_u32, hdr->func_info_len - size_u32); - btf_ext->func_info = fdata; - btf_ext->func_info_len = hdr->func_info_len - size_u32; + err = btf_ext_copy_line_info(btf_ext, data, size, err_log); + if (err) { + btf_ext__free(btf_ext); + return ERR_PTR(err); + } return btf_ext; } -int btf_ext__reloc_init(struct btf *btf, struct btf_ext *btf_ext, - const char *sec_name, void **func_info, - __u32 *func_info_rec_size, __u32 *func_info_len) +static int btf_ext_reloc_info(const struct btf *btf, + const struct btf_ext_info *ext_info, + const char *sec_name, __u32 insns_cnt, + void **info, __u32 *cnt) { - __u32 sec_hdrlen = sizeof(struct btf_sec_func_info); - __u32 i, record_size, records_len; - struct btf_sec_func_info *sinfo; - const char *info_sec_name; - __s64 remain_len; - void *data; - - record_size = btf_ext->func_info_rec_size; - sinfo = btf_ext->func_info; - remain_len = btf_ext->func_info_len; - - while (remain_len > 0) { - records_len = sinfo->num_func_info * record_size; - info_sec_name = btf__name_by_offset(btf, sinfo->sec_name_off); - if (strcmp(info_sec_name, sec_name)) { - remain_len -= sec_hdrlen + records_len; - sinfo = (void *)sinfo + sec_hdrlen + records_len; - continue; - } - - data = malloc(records_len); - if (!data) - return -ENOMEM; - - memcpy(data, sinfo->data, records_len); - - /* adjust the insn_offset, the data in .BTF.ext is - * the actual byte offset, and the kernel expects - * the offset in term of bpf_insn. - * - * adjust the insn offset only, the rest data will - * be passed to kernel. - */ - for (i = 0; i < sinfo->num_func_info; i++) { - struct bpf_func_info_min *record; - - record = data + i * record_size; - record->insn_offset /= sizeof(struct bpf_insn); - } - - *func_info = data; - *func_info_len = records_len; - *func_info_rec_size = record_size; - return 0; - } - - return -EINVAL; -} - -int btf_ext__reloc(struct btf *btf, struct btf_ext *btf_ext, - const char *sec_name, __u32 insns_cnt, - void **func_info, __u32 *func_info_len) -{ - __u32 sec_hdrlen = sizeof(struct btf_sec_func_info); - __u32 i, record_size, existing_flen, records_len; - struct btf_sec_func_info *sinfo; + __u32 sec_hdrlen = sizeof(struct btf_ext_info_sec); + __u32 i, record_size, existing_len, records_len; + struct btf_ext_info_sec *sinfo; const char *info_sec_name; __u64 remain_len; void *data; - record_size = btf_ext->func_info_rec_size; - sinfo = btf_ext->func_info; - remain_len = btf_ext->func_info_len; + record_size = ext_info->rec_size; + sinfo = ext_info->info; + remain_len = ext_info->len; while (remain_len > 0) { - records_len = sinfo->num_func_info * record_size; + records_len = sinfo->num_info * record_size; info_sec_name = btf__name_by_offset(btf, sinfo->sec_name_off); if (strcmp(info_sec_name, sec_name)) { remain_len -= sec_hdrlen + records_len; @@ -716,27 +737,52 @@ int btf_ext__reloc(struct btf *btf, struct btf_ext *btf_ext, continue; } - existing_flen = *func_info_len; - data = realloc(*func_info, existing_flen + records_len); + existing_len = (*cnt) * record_size; + data = realloc(*info, existing_len + records_len); if (!data) return -ENOMEM; - memcpy(data + existing_flen, sinfo->data, records_len); - /* adjust insn_offset only, the rest data will be passed + memcpy(data + existing_len, sinfo->data, records_len); + /* adjust insn_off only, the rest data will be passed * to the kernel. */ - for (i = 0; i < sinfo->num_func_info; i++) { - struct bpf_func_info_min *record; + for (i = 0; i < sinfo->num_info; i++) { + __u32 *insn_off; - record = data + existing_flen + i * record_size; - record->insn_offset = - record->insn_offset / sizeof(struct bpf_insn) + + insn_off = data + existing_len + (i * record_size); + *insn_off = *insn_off / sizeof(struct bpf_insn) + insns_cnt; } - *func_info = data; - *func_info_len = existing_flen + records_len; + *info = data; + *cnt += sinfo->num_info; return 0; } - return -EINVAL; + return -ENOENT; +} + +int btf_ext__reloc_func_info(const struct btf *btf, const struct btf_ext *btf_ext, + const char *sec_name, __u32 insns_cnt, + void **func_info, __u32 *cnt) +{ + return btf_ext_reloc_info(btf, &btf_ext->func_info, sec_name, + insns_cnt, func_info, cnt); +} + +int btf_ext__reloc_line_info(const struct btf *btf, const struct btf_ext *btf_ext, + const char *sec_name, __u32 insns_cnt, + void **line_info, __u32 *cnt) +{ + return btf_ext_reloc_info(btf, &btf_ext->line_info, sec_name, + insns_cnt, line_info, cnt); +} + +__u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext) +{ + return btf_ext->func_info.rec_size; +} + +__u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext) +{ + return btf_ext->line_info.rec_size; } diff --git a/src/btf.h b/src/btf.h index 701ad2b..b0610dc 100644 --- a/src/btf.h +++ b/src/btf.h @@ -51,13 +51,8 @@ struct btf_ext_header { /* All offsets are in bytes relative to the end of this header */ __u32 func_info_off; __u32 func_info_len; -}; - -struct btf_sec_func_info { - __u32 sec_name_off; - __u32 num_func_info; - /* Followed by num_func_info number of bpf func_info records */ - __u8 data[0]; + __u32 line_info_off; + __u32 line_info_len; }; typedef int (*btf_print_fn_t)(const char *, ...) @@ -73,16 +68,20 @@ LIBBPF_API __s64 btf__resolve_size(const struct btf *btf, __u32 type_id); LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id); LIBBPF_API int btf__fd(const struct btf *btf); LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset); -LIBBPF_API int btf_get_from_id(__u32 id, struct btf **btf); +LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf); struct btf_ext *btf_ext__new(__u8 *data, __u32 size, btf_print_fn_t err_log); void btf_ext__free(struct btf_ext *btf_ext); -int btf_ext__reloc_init(struct btf *btf, struct btf_ext *btf_ext, - const char *sec_name, void **func_info, - __u32 *func_info_rec_size, __u32 *func_info_len); -int btf_ext__reloc(struct btf *btf, struct btf_ext *btf_ext, - const char *sec_name, __u32 insns_cnt, void **func_info, - __u32 *func_info_len); +int btf_ext__reloc_func_info(const struct btf *btf, + const struct btf_ext *btf_ext, + const char *sec_name, __u32 insns_cnt, + void **func_info, __u32 *func_info_len); +int btf_ext__reloc_line_info(const struct btf *btf, + const struct btf_ext *btf_ext, + const char *sec_name, __u32 insns_cnt, + void **line_info, __u32 *cnt); +__u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext); +__u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext); #ifdef __cplusplus } /* extern "C" */ diff --git a/src/libbpf.c b/src/libbpf.c index ed4212a..169e347 100644 --- a/src/libbpf.c +++ b/src/libbpf.c @@ -9,7 +9,9 @@ * Copyright (C) 2017 Nicira, Inc. */ +#ifndef _GNU_SOURCE #define _GNU_SOURCE +#endif #include #include #include @@ -165,9 +167,13 @@ struct bpf_program { int btf_fd; void *func_info; __u32 func_info_rec_size; - __u32 func_info_len; + __u32 func_info_cnt; struct bpf_capabilities *caps; + + void *line_info; + __u32 line_info_rec_size; + __u32 line_info_cnt; }; struct bpf_map { @@ -260,6 +266,7 @@ void bpf_program__unload(struct bpf_program *prog) zclose(prog->btf_fd); zfree(&prog->func_info); + zfree(&prog->line_info); } static void bpf_program__exit(struct bpf_program *prog) @@ -777,6 +784,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags) { Elf *elf = obj->efile.elf; GElf_Ehdr *ep = &obj->efile.ehdr; + Elf_Data *btf_ext_data = NULL; Elf_Scn *scn = NULL; int idx = 0, err = 0; @@ -839,14 +847,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags) obj->btf = NULL; } } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) { - obj->btf_ext = btf_ext__new(data->d_buf, data->d_size, - __pr_debug); - if (IS_ERR(obj->btf_ext)) { - pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n", - BTF_EXT_ELF_SEC, - PTR_ERR(obj->btf_ext)); - obj->btf_ext = NULL; - } + btf_ext_data = data; } else if (sh.sh_type == SHT_SYMTAB) { if (obj->efile.symbols) { pr_warning("bpf: multiple SYMTAB in %s\n", @@ -908,6 +909,22 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags) pr_warning("Corrupted ELF file: index of strtab invalid\n"); return LIBBPF_ERRNO__FORMAT; } + if (btf_ext_data) { + if (!obj->btf) { + pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n", + BTF_EXT_ELF_SEC, BTF_ELF_SEC); + } else { + obj->btf_ext = btf_ext__new(btf_ext_data->d_buf, + btf_ext_data->d_size, + __pr_debug); + if (IS_ERR(obj->btf_ext)) { + pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n", + BTF_EXT_ELF_SEC, + PTR_ERR(obj->btf_ext)); + obj->btf_ext = NULL; + } + } + } if (obj->efile.maps_shndx >= 0) { err = bpf_object__init_maps(obj, flags); if (err) @@ -1273,6 +1290,82 @@ bpf_object__create_maps(struct bpf_object *obj) return 0; } +static int +check_btf_ext_reloc_err(struct bpf_program *prog, int err, + void *btf_prog_info, const char *info_name) +{ + if (err != -ENOENT) { + pr_warning("Error in loading %s for sec %s.\n", + info_name, prog->section_name); + return err; + } + + /* err == -ENOENT (i.e. prog->section_name not found in btf_ext) */ + + if (btf_prog_info) { + /* + * Some info has already been found but has problem + * in the last btf_ext reloc. Must have to error + * out. + */ + pr_warning("Error in relocating %s for sec %s.\n", + info_name, prog->section_name); + return err; + } + + /* + * Have problem loading the very first info. Ignore + * the rest. + */ + pr_warning("Cannot find %s for main program sec %s. Ignore all %s.\n", + info_name, prog->section_name, info_name); + return 0; +} + +static int +bpf_program_reloc_btf_ext(struct bpf_program *prog, struct bpf_object *obj, + const char *section_name, __u32 insn_offset) +{ + int err; + + if (!insn_offset || prog->func_info) { + /* + * !insn_offset => main program + * + * For sub prog, the main program's func_info has to + * be loaded first (i.e. prog->func_info != NULL) + */ + err = btf_ext__reloc_func_info(obj->btf, obj->btf_ext, + section_name, insn_offset, + &prog->func_info, + &prog->func_info_cnt); + if (err) + return check_btf_ext_reloc_err(prog, err, + prog->func_info, + "bpf_func_info"); + + prog->func_info_rec_size = btf_ext__func_info_rec_size(obj->btf_ext); + } + + if (!insn_offset || prog->line_info) { + err = btf_ext__reloc_line_info(obj->btf, obj->btf_ext, + section_name, insn_offset, + &prog->line_info, + &prog->line_info_cnt); + if (err) + return check_btf_ext_reloc_err(prog, err, + prog->line_info, + "bpf_line_info"); + + prog->line_info_rec_size = btf_ext__line_info_rec_size(obj->btf_ext); + } + + if (!insn_offset) + prog->btf_fd = btf__fd(obj->btf); + + return 0; +} + static int bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj, struct reloc_desc *relo) @@ -1304,17 +1397,12 @@ bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj, return -ENOMEM; } - if (obj->btf && obj->btf_ext) { - err = btf_ext__reloc(obj->btf, obj->btf_ext, - text->section_name, - prog->insns_cnt, - &prog->func_info, - &prog->func_info_len); - if (err) { - pr_warning("error in btf_ext__reloc for sec %s\n", - text->section_name); + if (obj->btf_ext) { + err = bpf_program_reloc_btf_ext(prog, obj, + text->section_name, + prog->insns_cnt); + if (err) return err; - } } memcpy(new_insn + prog->insns_cnt, text->insns, @@ -1339,18 +1427,11 @@ bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj) if (!prog) return 0; - if (obj->btf && obj->btf_ext) { - err = btf_ext__reloc_init(obj->btf, obj->btf_ext, - prog->section_name, - &prog->func_info, - &prog->func_info_rec_size, - &prog->func_info_len); - if (err) { - pr_warning("err in btf_ext__reloc_init for sec %s\n", - prog->section_name); + if (obj->btf_ext) { + err = bpf_program_reloc_btf_ext(prog, obj, + prog->section_name, 0); + if (err) return err; - } - prog->btf_fd = btf__fd(obj->btf); } if (!prog->reloc_desc) @@ -1442,8 +1523,7 @@ static int bpf_object__collect_reloc(struct bpf_object *obj) static int load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, - char *license, __u32 kern_version, int *pfd, - __u32 func_info_cnt) + char *license, __u32 kern_version, int *pfd) { struct bpf_load_program_attr load_attr; char *cp, errmsg[STRERR_BUFSIZE]; @@ -1463,8 +1543,10 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, load_attr.prog_btf_fd = prog->btf_fd >= 0 ? prog->btf_fd : 0; load_attr.func_info = prog->func_info; load_attr.func_info_rec_size = prog->func_info_rec_size; - load_attr.func_info_cnt = func_info_cnt; - + load_attr.func_info_cnt = prog->func_info_cnt; + load_attr.line_info = prog->line_info; + load_attr.line_info_rec_size = prog->line_info_rec_size; + load_attr.line_info_cnt = prog->line_info_cnt; if (!load_attr.insns || !load_attr.insns_cnt) return -EINVAL; @@ -1521,14 +1603,8 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_version) { - __u32 func_info_cnt; int err = 0, fd, i; - if (prog->func_info_len == 0) - func_info_cnt = 0; - else - func_info_cnt = prog->func_info_len / prog->func_info_rec_size; - if (prog->instances.nr < 0 || !prog->instances.fds) { if (prog->preprocessor) { pr_warning("Internal error: can't load program '%s'\n", @@ -1551,8 +1627,7 @@ bpf_program__load(struct bpf_program *prog, prog->section_name, prog->instances.nr); } err = load_program(prog, prog->insns, prog->insns_cnt, - license, kern_version, &fd, - func_info_cnt); + license, kern_version, &fd); if (!err) prog->instances.fds[0] = fd; goto out; @@ -1582,8 +1657,7 @@ bpf_program__load(struct bpf_program *prog, err = load_program(prog, result.new_insn_ptr, result.new_insn_cnt, - license, kern_version, &fd, - func_info_cnt); + license, kern_version, &fd); if (err) { pr_warning("Loading the %dth instance of program '%s' failed\n", diff --git a/src/libbpf.h b/src/libbpf.h index f30c3d0..5f68d7b 100644 --- a/src/libbpf.h +++ b/src/libbpf.h @@ -342,6 +342,19 @@ int libbpf_nl_get_qdisc(int sock, unsigned int nl_pid, int ifindex, int libbpf_nl_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle, libbpf_dump_nlmsg_t dump_filter_nlmsg, void *cookie); +struct bpf_prog_linfo; +struct bpf_prog_info; + +LIBBPF_API void bpf_prog_linfo__free(struct bpf_prog_linfo *prog_linfo); +LIBBPF_API struct bpf_prog_linfo * +bpf_prog_linfo__new(const struct bpf_prog_info *info); +LIBBPF_API const struct bpf_line_info * +bpf_prog_linfo__lfind_addr_func(const struct bpf_prog_linfo *prog_linfo, + __u64 addr, __u32 func_idx, __u32 nr_skip); +LIBBPF_API const struct bpf_line_info * +bpf_prog_linfo__lfind(const struct bpf_prog_linfo *prog_linfo, + __u32 insn_off, __u32 nr_skip); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/src/libbpf_errno.c b/src/libbpf_errno.c index d83b17f..4343e40 100644 --- a/src/libbpf_errno.c +++ b/src/libbpf_errno.c @@ -7,6 +7,7 @@ * Copyright (C) 2017 Nicira, Inc. */ +#undef _GNU_SOURCE #include #include