2019-03-04 20:56:19 -08:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
|
|
|
|
/*
|
|
|
|
|
* if_xdp: XDP socket user-space interface
|
|
|
|
|
* Copyright(c) 2018 Intel Corporation.
|
|
|
|
|
*
|
|
|
|
|
* Author(s): Björn Töpel <bjorn.topel@intel.com>
|
|
|
|
|
* Magnus Karlsson <magnus.karlsson@intel.com>
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#ifndef _LINUX_IF_XDP_H
|
|
|
|
|
#define _LINUX_IF_XDP_H
|
|
|
|
|
|
|
|
|
|
#include <linux/types.h>
|
|
|
|
|
|
|
|
|
|
/* Options for the sxdp_flags field */
|
|
|
|
|
#define XDP_SHARED_UMEM (1 << 0)
|
|
|
|
|
#define XDP_COPY (1 << 1) /* Force copy-mode */
|
|
|
|
|
#define XDP_ZEROCOPY (1 << 2) /* Force zero-copy mode */
|
2019-08-14 09:27:20 +02:00
|
|
|
/* If this option is set, the driver might go sleep and in that case
|
|
|
|
|
* the XDP_RING_NEED_WAKEUP flag in the fill and/or Tx rings will be
|
|
|
|
|
* set. If it is set, the application need to explicitly wake up the
|
|
|
|
|
* driver with a poll() (Rx and Tx) or sendto() (Tx only). If you are
|
|
|
|
|
* running the driver and the application on the same core, you should
|
|
|
|
|
* use this option so that the kernel will yield to the user space
|
|
|
|
|
* application.
|
|
|
|
|
*/
|
|
|
|
|
#define XDP_USE_NEED_WAKEUP (1 << 3)
|
2023-07-19 15:24:16 +02:00
|
|
|
/* By setting this option, userspace application indicates that it can
|
2023-11-27 11:03:08 -08:00
|
|
|
* handle multiple descriptors per packet thus enabling AF_XDP to split
|
2023-07-19 15:24:16 +02:00
|
|
|
* multi-buffer XDP frames into multiple Rx descriptors. Without this set
|
2023-11-27 11:03:08 -08:00
|
|
|
* such frames will be dropped.
|
2023-07-19 15:24:16 +02:00
|
|
|
*/
|
2023-11-27 11:03:08 -08:00
|
|
|
#define XDP_USE_SG (1 << 4)
|
2019-03-04 20:56:19 -08:00
|
|
|
|
2019-08-27 02:25:27 +00:00
|
|
|
/* Flags for xsk_umem_config flags */
|
2023-11-27 11:03:14 -08:00
|
|
|
#define XDP_UMEM_UNALIGNED_CHUNK_FLAG (1 << 0)
|
|
|
|
|
|
|
|
|
|
/* Force checksum calculation in software. Can be used for testing or
|
|
|
|
|
* working around potential HW issues. This option causes performance
|
|
|
|
|
* degradation and only works in XDP_COPY mode.
|
|
|
|
|
*/
|
|
|
|
|
#define XDP_UMEM_TX_SW_CSUM (1 << 1)
|
2019-08-27 02:25:27 +00:00
|
|
|
|
2019-03-04 20:56:19 -08:00
|
|
|
struct sockaddr_xdp {
|
|
|
|
|
__u16 sxdp_family;
|
|
|
|
|
__u16 sxdp_flags;
|
|
|
|
|
__u32 sxdp_ifindex;
|
|
|
|
|
__u32 sxdp_queue_id;
|
|
|
|
|
__u32 sxdp_shared_umem_fd;
|
|
|
|
|
};
|
|
|
|
|
|
2019-08-14 09:27:20 +02:00
|
|
|
/* XDP_RING flags */
|
|
|
|
|
#define XDP_RING_NEED_WAKEUP (1 << 0)
|
|
|
|
|
|
2019-03-04 20:56:19 -08:00
|
|
|
struct xdp_ring_offset {
|
|
|
|
|
__u64 producer;
|
|
|
|
|
__u64 consumer;
|
|
|
|
|
__u64 desc;
|
2019-08-14 09:27:20 +02:00
|
|
|
__u64 flags;
|
2019-03-04 20:56:19 -08:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
struct xdp_mmap_offsets {
|
|
|
|
|
struct xdp_ring_offset rx;
|
|
|
|
|
struct xdp_ring_offset tx;
|
|
|
|
|
struct xdp_ring_offset fr; /* Fill */
|
|
|
|
|
struct xdp_ring_offset cr; /* Completion */
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/* XDP socket options */
|
|
|
|
|
#define XDP_MMAP_OFFSETS 1
|
|
|
|
|
#define XDP_RX_RING 2
|
|
|
|
|
#define XDP_TX_RING 3
|
|
|
|
|
#define XDP_UMEM_REG 4
|
|
|
|
|
#define XDP_UMEM_FILL_RING 5
|
|
|
|
|
#define XDP_UMEM_COMPLETION_RING 6
|
|
|
|
|
#define XDP_STATISTICS 7
|
2019-06-26 17:35:25 +03:00
|
|
|
#define XDP_OPTIONS 8
|
2019-03-04 20:56:19 -08:00
|
|
|
|
|
|
|
|
struct xdp_umem_reg {
|
|
|
|
|
__u64 addr; /* Start of packet data area */
|
|
|
|
|
__u64 len; /* Length of packet data area */
|
|
|
|
|
__u32 chunk_size;
|
|
|
|
|
__u32 headroom;
|
2019-08-27 02:25:27 +00:00
|
|
|
__u32 flags;
|
2023-11-27 11:03:07 -08:00
|
|
|
__u32 tx_metadata_len;
|
2019-03-04 20:56:19 -08:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
struct xdp_statistics {
|
2020-07-08 07:28:33 +00:00
|
|
|
__u64 rx_dropped; /* Dropped for other reasons */
|
2019-03-04 20:56:19 -08:00
|
|
|
__u64 rx_invalid_descs; /* Dropped due to invalid descriptor */
|
|
|
|
|
__u64 tx_invalid_descs; /* Dropped due to invalid descriptor */
|
2020-07-08 07:28:33 +00:00
|
|
|
__u64 rx_ring_full; /* Dropped due to rx ring being full */
|
|
|
|
|
__u64 rx_fill_ring_empty_descs; /* Failed to retrieve item from fill ring */
|
|
|
|
|
__u64 tx_ring_empty_descs; /* Failed to retrieve item from tx ring */
|
2019-03-04 20:56:19 -08:00
|
|
|
};
|
|
|
|
|
|
2019-06-26 17:35:25 +03:00
|
|
|
struct xdp_options {
|
|
|
|
|
__u32 flags;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/* Flags for the flags field of struct xdp_options */
|
|
|
|
|
#define XDP_OPTIONS_ZEROCOPY (1 << 0)
|
|
|
|
|
|
2019-03-04 20:56:19 -08:00
|
|
|
/* Pgoff for mmaping the rings */
|
|
|
|
|
#define XDP_PGOFF_RX_RING 0
|
|
|
|
|
#define XDP_PGOFF_TX_RING 0x80000000
|
|
|
|
|
#define XDP_UMEM_PGOFF_FILL_RING 0x100000000ULL
|
|
|
|
|
#define XDP_UMEM_PGOFF_COMPLETION_RING 0x180000000ULL
|
|
|
|
|
|
2019-08-27 02:25:27 +00:00
|
|
|
/* Masks for unaligned chunks mode */
|
|
|
|
|
#define XSK_UNALIGNED_BUF_OFFSET_SHIFT 48
|
|
|
|
|
#define XSK_UNALIGNED_BUF_ADDR_MASK \
|
|
|
|
|
((1ULL << XSK_UNALIGNED_BUF_OFFSET_SHIFT) - 1)
|
|
|
|
|
|
2023-11-27 11:03:08 -08:00
|
|
|
/* Request transmit timestamp. Upon completion, put it into tx_timestamp
|
|
|
|
|
* field of union xsk_tx_metadata.
|
|
|
|
|
*/
|
|
|
|
|
#define XDP_TXMD_FLAGS_TIMESTAMP (1 << 0)
|
|
|
|
|
|
|
|
|
|
/* Request transmit checksum offload. Checksum start position and offset
|
|
|
|
|
* are communicated via csum_start and csum_offset fields of union
|
|
|
|
|
* xsk_tx_metadata.
|
|
|
|
|
*/
|
|
|
|
|
#define XDP_TXMD_FLAGS_CHECKSUM (1 << 1)
|
|
|
|
|
|
|
|
|
|
/* AF_XDP offloads request. 'request' union member is consumed by the driver
|
|
|
|
|
* when the packet is being transmitted. 'completion' union member is
|
|
|
|
|
* filled by the driver when the transmit completion arrives.
|
|
|
|
|
*/
|
|
|
|
|
struct xsk_tx_metadata {
|
|
|
|
|
__u64 flags;
|
|
|
|
|
|
|
|
|
|
union {
|
|
|
|
|
struct {
|
|
|
|
|
/* XDP_TXMD_FLAGS_CHECKSUM */
|
|
|
|
|
|
|
|
|
|
/* Offset from desc->addr where checksumming should start. */
|
|
|
|
|
__u16 csum_start;
|
|
|
|
|
/* Offset from csum_start where checksum should be stored. */
|
|
|
|
|
__u16 csum_offset;
|
|
|
|
|
} request;
|
|
|
|
|
|
|
|
|
|
struct {
|
|
|
|
|
/* XDP_TXMD_FLAGS_TIMESTAMP */
|
|
|
|
|
__u64 tx_timestamp;
|
|
|
|
|
} completion;
|
|
|
|
|
};
|
|
|
|
|
};
|
|
|
|
|
|
2019-03-04 20:56:19 -08:00
|
|
|
/* Rx/Tx descriptor */
|
|
|
|
|
struct xdp_desc {
|
|
|
|
|
__u64 addr;
|
|
|
|
|
__u32 len;
|
|
|
|
|
__u32 options;
|
|
|
|
|
};
|
|
|
|
|
|
2023-11-27 11:03:08 -08:00
|
|
|
/* UMEM descriptor is __u64 */
|
|
|
|
|
|
|
|
|
|
/* Flag indicating that the packet continues with the buffer pointed out by the
|
|
|
|
|
* next frame in the ring. The end of the packet is signalled by setting this
|
|
|
|
|
* bit to zero. For single buffer packets, every descriptor has 'options' set
|
|
|
|
|
* to 0 and this maintains backward compatibility.
|
|
|
|
|
*/
|
selftests/xsk: transmit and receive multi-buffer packets
Add the ability to send and receive packets that are larger than the
size of a umem frame, using the AF_XDP /XDP multi-buffer
support. There are three pieces of code that need to be changed to
achieve this: the Rx path, the Tx path, and the validation logic.
Both the Rx path and Tx could only deal with a single fragment per
packet. The Tx path is extended with a new function called
pkt_nb_frags() that can be used to retrieve the number of fragments a
packet will consume. We then create these many fragments in a loop and
fill the N-1 first ones to the max size limit to use the buffer space
efficiently, and the Nth one with whatever data that is left. This
goes on until we have filled in at the most BATCH_SIZE worth of
descriptors and fragments. If we detect that the next packet would
lead to BATCH_SIZE number of fragments sent being exceeded, we do not
send this packet and finish the batch. This packet is instead sent in
the next iteration of BATCH_SIZE fragments.
For Rx, we loop over all fragments we receive as usual, but for every
descriptor that we receive we call a new validation function called
is_frag_valid() to validate the consistency of this fragment. The code
then checks if the packet continues in the next frame. If so, it loops
over the next packet and performs the same validation. once we have
received the last fragment of the packet we also call the function
is_pkt_valid() to validate the packet as a whole. If we get to the end
of the batch and we are not at the end of the current packet, we back
out the partial packet and end the loop. Once we get into the receive
loop next time, we start over from the beginning of that packet. This
so the code becomes simpler at the cost of some performance.
The validation function is_frag_valid() checks that the sequence and
packet numbers are correct at the start and end of each fragment.
Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Link: https://lore.kernel.org/r/20230719132421.584801-19-maciej.fijalkowski@intel.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-07-19 15:24:15 +02:00
|
|
|
#define XDP_PKT_CONTD (1 << 0)
|
|
|
|
|
|
2023-11-27 11:03:08 -08:00
|
|
|
/* TX packet carries valid metadata. */
|
|
|
|
|
#define XDP_TX_METADATA (1 << 1)
|
2019-03-04 20:56:19 -08:00
|
|
|
|
|
|
|
|
#endif /* _LINUX_IF_XDP_H */
|