Merge branch 'libbpf:master' into master

sync: latest libbpf changes from kernel
Syncing latest libbpf commits from kernel repository. Baseline bpf-next commit: b2531d4bdce19f28364b45aac9132e153b1f23a4 Checkpoint bpf-next commit: ac6a65868a5a45db49d5ee8524df3b701110d844 Baseline bpf commit: f3f19f939c11925dadd3f4776f99f8c278a7017b Checkpoint bpf commit: f3f19f939c11925dadd3f4776f99f8c278a7017b Andrii Nakryiko (1): libbpf: fix memory leak in attach_tp for target-less tracepoint program src/libbpf.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) -- 2.30.2
2026-03-14 13:29:06 +08:00 · 2022-05-17 02:16:02 +00:00 · 2022-05-16 13:46:05 -07:00 · 2022-05-16 13:46:05 -07:00 · 2022-05-13 16:13:31 -07:00 · 2022-05-13 16:13:31 -07:00
84 changed files with 94821 additions and 81977 deletions
--- a/.github/actions/build-selftests/action.yml
+++ b/.github/actions/build-selftests/action.yml
@@ -0,0 +1,30 @@
+name: 'build-selftests'
+description: 'Build BPF selftests'
+inputs:
+  repo-path:
+    description: 'where is the source code'
+    required: true
+  kernel:
+    description: 'kernel version or LATEST'
+    required: true
+    default: 'LATEST'
+  vmlinux:
+    description: 'where is vmlinux file'
+    required: true
+    default: '${{ github.workspace }}/vmlinux'
+
+runs:
+  using: "composite"
+  steps:
+    - shell: bash
+      run: |
+        echo "::group::Setup Env"
+        sudo apt-get install -y qemu-kvm zstd binutils-dev elfutils libcap-dev libelf-dev libdw-dev python3-docutils
+        echo "::endgroup::"
+    - shell: bash
+      run: |
+        export KERNEL=${{ inputs.kernel }}
+        export REPO_ROOT="${{ github.workspace }}"
+        export REPO_PATH="${{ inputs.repo-path }}"
+        export VMLINUX_BTF="${{ inputs.vmlinux }}"
+        ${{ github.action_path }}/build_selftests.sh
--- a/.github/actions/build-selftests/build_selftests.sh
+++ b/.github/actions/build-selftests/build_selftests.sh
@@ -2,16 +2,16 @@

 set -euo pipefail

-source $(cd $(dirname $0) && pwd)/helpers.sh
+THISDIR="$(cd $(dirname $0) && pwd)"
+
+source ${THISDIR}/helpers.sh

 travis_fold start prepare_selftests "Building selftests"

-sudo apt-get -y install python-docutils # for rst2man
-
-LLVM_VER=14
+LLVM_VER=15
 LIBBPF_PATH="${REPO_ROOT}"

-PREPARE_SELFTESTS_SCRIPT=${VMTEST_ROOT}/prepare_selftests-${KERNEL}.sh
+PREPARE_SELFTESTS_SCRIPT=${THISDIR}/prepare_selftests-${KERNEL}.sh
 if [ -f "${PREPARE_SELFTESTS_SCRIPT}" ]; then
 	(cd "${REPO_ROOT}/${REPO_PATH}/tools/testing/selftests/bpf" && ${PREPARE_SELFTESTS_SCRIPT})
 fi
@@ -19,9 +19,10 @@ fi
 if [[ "${KERNEL}" = 'LATEST' ]]; then
 	VMLINUX_H=
 else
-	VMLINUX_H=${VMTEST_ROOT}/vmlinux.h
+	VMLINUX_H=${THISDIR}/vmlinux.h
 fi

+cd ${REPO_ROOT}/${REPO_PATH}
 make \
 	CLANG=clang-${LLVM_VER} \
 	LLC=llc-${LLVM_VER} \
@@ -29,7 +30,8 @@ make \
 	VMLINUX_BTF="${VMLINUX_BTF}" \
 	VMLINUX_H=${VMLINUX_H} \
 	-C "${REPO_ROOT}/${REPO_PATH}/tools/testing/selftests/bpf" \
-	-j $((4*$(nproc))) >/dev/null
+	-j $((4*$(nproc))) > /dev/null
+cd -
 mkdir ${LIBBPF_PATH}/selftests
 cp -R "${REPO_ROOT}/${REPO_PATH}/tools/testing/selftests/bpf" \
 	${LIBBPF_PATH}/selftests
@@ -37,6 +39,4 @@ cd ${LIBBPF_PATH}
 rm selftests/bpf/.gitignore
 git add selftests

-git add "${VMTEST_ROOT}/configs/blacklist"
-
 travis_fold end prepare_selftests
--- a/.github/actions/build-selftests/helpers.sh
+++ b/.github/actions/build-selftests/helpers.sh
@@ -0,0 +1,44 @@
+# $1 - start or end
+# $2 - fold identifier, no spaces
+# $3 - fold section description
+travis_fold() {
+  local YELLOW='\033[1;33m'
+  local NOCOLOR='\033[0m'
+  if [ -z ${GITHUB_WORKFLOW+x} ]; then
+    echo travis_fold:$1:$2
+    if [ ! -z "${3:-}" ]; then
+      echo -e "${YELLOW}$3${NOCOLOR}"
+    fi
+    echo
+  else
+    if [ $1 = "start" ]; then
+      line="::group::$2"
+      if [ ! -z "${3:-}" ]; then
+        line="$line - ${YELLOW}$3${NOCOLOR}"
+      fi
+    else
+      line="::endgroup::"
+    fi
+    echo -e "$line"
+  fi
+}
+
+__print() {
+  local TITLE=""
+  if [[ -n $2 ]]; then
+      TITLE=" title=$2"
+  fi
+  echo "::$1${TITLE}::$3"
+}
+
+# $1 - title
+# $2 - message
+print_error() {
+  __print error $1 $2
+}
+
+# $1 - title
+# $2 - message
+print_notice() {
+  __print notice $1 $2
+}
--- a/.github/actions/build-selftests/prepare_selftests-4.9.0.sh
+++ b/.github/actions/build-selftests/prepare_selftests-4.9.0.sh
--- a/.github/actions/build-selftests/prepare_selftests-5.5.0.sh
+++ b/.github/actions/build-selftests/prepare_selftests-5.5.0.sh
--- a/.github/actions/build-selftests/vmlinux.h
+++ b/.github/actions/build-selftests/vmlinux.h
--- a/.github/actions/setup/action.yml
+++ b/.github/actions/setup/action.yml
@@ -12,7 +12,7 @@ runs:
        echo 'echo ::group::Env setup' > /tmp/ci_setup
        echo export DEBIAN_FRONTEND=noninteractive >> /tmp/ci_setup
        echo sudo apt-get update >> /tmp/ci_setup
-        echo sudo apt-get install -y aptitude qemu-kvm zstd binutils-dev elfutils libcap-dev libelf-dev libdw-dev >> /tmp/ci_setup
+        echo sudo apt-get install -y aptitude qemu-kvm zstd binutils-dev elfutils libcap-dev libelf-dev libdw-dev libguestfs-tools >> /tmp/ci_setup
        echo export PROJECT_NAME='libbpf' >> /tmp/ci_setup
        echo export AUTHOR_EMAIL="$(git log -1 --pretty=\"%aE\")" >> /tmp/ci_setup
        echo export REPO_ROOT=$GITHUB_WORKSPACE >> /tmp/ci_setup
--- a/.github/actions/vmtest/action.yml
+++ b/.github/actions/vmtest/action.yml
@@ -5,31 +5,83 @@ inputs:
    description: 'kernel version or LATEST'
    required: true
    default: 'LATEST'
-  kernel-rev:
-    description: 'CHECKPOINT or rev/tag/branch'
+  arch:
+    description: 'what arch to test'
    required: true
-    default: 'CHECKPOINT'
-  kernel-origin:
-    description: 'kernel repo'
-    required: true
-    default: 'https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git'
+    default: 'x86_64'
  pahole:
-    description: 'pahole rev/tag/branch'
+    description: 'pahole rev or master'
    required: true
    default: 'master'
-  pahole-origin:
-    description: 'pahole repo'
-    required: true
-    default: 'https://git.kernel.org/pub/scm/devel/pahole/pahole.git'
 runs:
  using: "composite"
  steps:
-    - run: |
-        source /tmp/ci_setup
-        export KERNEL=${{ inputs.kernel }}
-        export KERNEL_BRANCH=${{ inputs.kernel-rev }}
-        export KERNEL_ORIGIN=${{ inputs.kernel-origin }}
-        export PAHOLE_BRANCH=${{ inputs.pahole }}
-        export PAHOLE_ORIGIN=${{ inputs.pahole-origin }}
-        $CI_ROOT/vmtest/run_vmtest.sh
+    # setup envinronment
+    - name: Setup environment
+      uses: libbpf/ci/setup-build-env@master
+      with:
+        pahole: ${{ inputs.pahole }}
+    # 1. download CHECKPOINT kernel source
+    - name: Get checkpoint commit
      shell: bash
+      run: |
+        cat CHECKPOINT-COMMIT
+        echo "CHECKPOINT=$(cat CHECKPOINT-COMMIT)" >> $GITHUB_ENV
+    - name: Get kernel source at checkpoint
+      uses: libbpf/ci/get-linux-source@master
+      with:
+        repo: 'https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git'
+        rev: ${{ env.CHECKPOINT }}
+        dest: '${{ github.workspace }}/.kernel'
+    - name: Patch kernel source
+      uses: libbpf/ci/patch-kernel@master
+      with:
+        patches-root: '${{ github.workspace }}/travis-ci/diffs'
+        repo-root: '.kernel'
+    - name: Prepare to build BPF selftests
+      shell: bash
+      run: |
+        echo "::group::Prepare buidling selftest"
+        cd .kernel
+        cp ${{ github.workspace }}/travis-ci/vmtest/configs/config-latest.${{ inputs.arch }} .config
+        make olddefconfig && make prepare
+        cd -
+        echo "::endgroup::"
+    # 2. if kernel == LATEST, build kernel image from tree
+    - name: Build kernel image
+      if: ${{ inputs.kernel == 'LATEST' }}
+      shell: bash
+      run: |
+        echo "::group::Build Kernel Image"
+        cd .kernel
+        make -j $((4*$(nproc))) all > /dev/null
+        cp vmlinux ${{ github.workspace }}
+        cd -
+        echo "::endgroup::"
+    # else, just download prebuilt kernel image
+    - name: Download prebuilt kernel
+      if: ${{ inputs.kernel != 'LATEST' }}
+      uses: libbpf/ci/download-vmlinux@master
+      with:
+        kernel: ${{ inputs.kernel }}
+        arch: ${{ inputs.arch }}
+    # 3. build selftests
+    - name: Build BPF selftests
+      uses: ./.github/actions/build-selftests
+      with:
+        repo-path: '.kernel'
+        kernel: ${{ inputs.kernel }}
+    # 4. prepare rootfs
+    - name: prepare rootfs
+      uses: libbpf/ci/prepare-rootfs@master
+      with:
+        kernel: ${{ inputs.kernel }}
+        project-name: 'libbpf'
+        arch: ${{ inputs.arch }}
+    # 5. run selftest in QEMU
+    - name: Run selftests
+      uses: libbpf/ci/run-qemu@master
+      with:
+        img: '/tmp/root.img'
+        vmlinuz: 'vmlinuz'
+        arch: ${{ inputs.arch }}
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -0,0 +1,81 @@
+name: libbpf-build
+
+on:
+  pull_request:
+  push:
+  schedule:
+    - cron:  '0 18 * * *'
+
+concurrency:
+  group: ci-build-${{ github.head_ref }}
+  cancel-in-progress: true
+
+jobs:
+
+  debian:
+    runs-on: ubuntu-latest
+    name: Debian Build (${{ matrix.name }})
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - name: default
+            target: RUN
+          - name: ASan+UBSan
+            target: RUN_ASAN
+          - name: clang
+            target: RUN_CLANG
+          - name: clang ASan+UBSan
+            target: RUN_CLANG_ASAN
+          - name: gcc-10
+            target: RUN_GCC10
+          - name: gcc-10 ASan+UBSan
+            target: RUN_GCC10_ASAN
+    steps:
+      - uses: actions/checkout@v2
+        name: Checkout
+      - uses: ./.github/actions/setup
+        name: Setup
+      - uses: ./.github/actions/debian
+        name: Build
+        with:
+          target: ${{ matrix.target }}
+
+  ubuntu:
+    runs-on: ubuntu-latest
+    name: Ubuntu Focal Build (${{ matrix.arch }})
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - arch: aarch64
+          - arch: ppc64le
+          - arch: s390x
+          - arch: x86
+    steps:
+      - uses: actions/checkout@v2
+        name: Checkout
+      - uses: ./.github/actions/setup
+        name: Pre-Setup
+      - run: source /tmp/ci_setup && sudo -E $CI_ROOT/managers/ubuntu.sh
+        if: matrix.arch == 'x86'
+        name: Setup
+      - uses: uraimo/run-on-arch-action@v2.0.5
+        name: Build in docker
+        if: matrix.arch != 'x86'
+        with:
+          distro:
+            ubuntu20.04
+          arch:
+            ${{ matrix.arch }}
+          setup:
+            cp /tmp/ci_setup $GITHUB_WORKSPACE
+          dockerRunArgs: |
+            --volume "${GITHUB_WORKSPACE}:${GITHUB_WORKSPACE}"
+          shell: /bin/bash
+          install: |
+            export DEBIAN_FRONTEND=noninteractive
+            export TZ="America/Los_Angeles"
+            apt-get update -y
+            apt-get install -y tzdata build-essential sudo
+          run: source ${GITHUB_WORKSPACE}/ci_setup && $CI_ROOT/managers/ubuntu.sh
--- a/.github/workflows/cifuzz.yml
+++ b/.github/workflows/cifuzz.yml
@@ -0,0 +1,40 @@
+---
+# https://google.github.io/oss-fuzz/getting-started/continuous-integration/
+name: CIFuzz
+on:
+  push:
+    branches:
+      - master
+  pull_request:
+    branches:
+      - master
+jobs:
+  Fuzzing:
+    runs-on: ubuntu-latest
+    if: github.repository == 'libbpf/libbpf'
+    strategy:
+      fail-fast: false
+      matrix:
+        sanitizer: [address, undefined, memory]
+    steps:
+      - name: Build Fuzzers (${{ matrix.sanitizer }})
+        id: build
+        uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master
+        with:
+          oss-fuzz-project-name: 'libbpf'
+          dry-run: false
+          allowed-broken-targets-percentage: 0
+          sanitizer: ${{ matrix.sanitizer }}
+      - name: Run Fuzzers (${{ matrix.sanitizer }})
+        uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master
+        with:
+          oss-fuzz-project-name: 'libbpf'
+          fuzz-seconds: 300
+          dry-run: false
+          sanitizer: ${{ matrix.sanitizer }}
+      - name: Upload Crash
+        uses: actions/upload-artifact@v1
+        if: failure() && steps.build.outcome == 'success'
+        with:
+          name: ${{ matrix.sanitizer }}-artifacts
+          path: ./out/artifacts
--- a/.github/workflows/coverity.yml
+++ b/.github/workflows/coverity.yml
@@ -8,6 +8,7 @@ on:
 jobs:
  coverity:
    runs-on: ubuntu-latest
+    if: github.repository == 'libbpf/libbpf'
    name: Coverity
    steps:
      - uses: actions/checkout@v2
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -12,15 +12,24 @@ concurrency:

 jobs:
  vmtest:
-    runs-on: ubuntu-latest
-    name: Kernel ${{ matrix.kernel }} + selftests
+    runs-on: ${{ matrix.runs_on }}
+    name: Kernel ${{ matrix.kernel }} on ${{ matrix.runs_on }} + selftests
    strategy:
      fail-fast: false
      matrix:
        include:
          - kernel: 'LATEST'
+            runs_on: ubuntu-latest
+            arch: 'x86_64'
          - kernel: '5.5.0'
+            runs_on: ubuntu-latest
+            arch: 'x86_64'
          - kernel: '4.9.0'
+            runs_on: ubuntu-latest
+            arch: 'x86_64'
+          - kernel: 'LATEST'
+            runs_on: z15
+            arch: 's390x'
    steps:
      - uses: actions/checkout@v2
        name: Checkout
@@ -30,71 +39,4 @@ jobs:
        name: vmtest
        with:
          kernel: ${{ matrix.kernel }}
-
-  debian:
-    runs-on: ubuntu-latest
-    name: Debian Build (${{ matrix.name }})
-    strategy:
-      fail-fast: false
-      matrix:
-        include:
-          - name: default
-            target: RUN
-          - name: ASan+UBSan
-            target: RUN_ASAN
-          - name: clang
-            target: RUN_CLANG
-          - name: clang ASan+UBSan
-            target: RUN_CLANG_ASAN
-          - name: gcc-10
-            target: RUN_GCC10
-          - name: gcc-10 ASan+UBSan
-            target: RUN_GCC10_ASAN
-    steps:
-      - uses: actions/checkout@v2
-        name: Checkout
-      - uses: ./.github/actions/setup
-        name: Setup
-      - uses: ./.github/actions/debian
-        name: Build
-        with:
-          target: ${{ matrix.target }}
-
-  ubuntu:
-    runs-on: ubuntu-latest
-    name: Ubuntu Focal Build (${{ matrix.arch }})
-    strategy:
-      fail-fast: false
-      matrix:
-        include:
-          - arch: aarch64
-          - arch: ppc64le
-          - arch: s390x
-          - arch: x86
-    steps:
-      - uses: actions/checkout@v2
-        name: Checkout
-      - uses: ./.github/actions/setup
-        name: Pre-Setup
-      - run: source /tmp/ci_setup && sudo -E $CI_ROOT/managers/ubuntu.sh
-        if: matrix.arch == 'x86'
-        name: Setup
-      - uses: uraimo/run-on-arch-action@v2.0.5
-        name: Build in docker
-        if: matrix.arch != 'x86'
-        with:
-          distro:
-            ubuntu20.04
-          arch:
-            ${{ matrix.arch }}
-          setup:
-            cp /tmp/ci_setup $GITHUB_WORKSPACE
-          dockerRunArgs: |
-              --volume "${GITHUB_WORKSPACE}:${GITHUB_WORKSPACE}"
-          shell: /bin/bash
-          install: |
-              export DEBIAN_FRONTEND=noninteractive
-              export TZ="America/Los_Angeles"
-              apt-get update -y
-              apt-get install -y tzdata build-essential sudo
-          run: source ${GITHUB_WORKSPACE}/ci_setup && $CI_ROOT/managers/ubuntu.sh
+          arch: ${{ matrix.arch }}
--- a/.readthedocs.yaml
+++ b/.readthedocs.yaml
@@ -10,6 +10,11 @@ sphinx:
  builder: html
  configuration: docs/conf.py

+formats:
+  - htmlzip
+  - pdf
+  - epub
+
 # Optionally set the version of Python and requirements required to build your docs
 python:
   version: 3.7
--- a/2
+++ b/2
@@ -1 +1 @@
-8d6c414cd2fb74aa6812e9bfec6178f8246c4f3a
+f3f19f939c11925dadd3f4776f99f8c278a7017b
--- a/2
+++ b/2
@@ -1 +1 @@
-5319255b8df9271474bc9027cabf82253934f28d
+ac6a65868a5a45db49d5ee8524df3b701110d844
--- a/LICENSE.BSD-2-Clause
+++ b/LICENSE.BSD-2-Clause
@@ -7,7 +7,7 @@ Usage-Guide:
    SPDX-License-Identifier: BSD-2-Clause
 License-Text:

-Copyright (c) <year> <owner> . All rights reserved.
+Copyright (c) 2015 The Libbpf Authors. All rights reserved.

 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
--- a/README.md
+++ b/README.md
@@ -19,6 +19,11 @@ the examples of building BPF applications with libbpf.
 [libbpf-tools](https://github.com/iovisor/bcc/tree/master/libbpf-tools) are also
 a good source of the real-world libbpf-based tracing tools.

+See also ["BPF CO-RE reference guide"](https://nakryiko.com/posts/bpf-core-reference-guide/)
+for the coverage of practical aspects of building BPF CO-RE applications and
+["BPF CO-RE"](https://nakryiko.com/posts/bpf-portability-and-co-re/) for
+general introduction into BPF portability issues and BPF CO-RE origins.
+
 All general BPF questions, including kernel functionality, libbpf APIs and
 their application, should be sent to bpf@vger.kernel.org mailing list. You can
 subscribe to it [here](http://vger.kernel.org/vger-lists.html#bpf) and search
@@ -35,6 +40,7 @@ Build
 [![Github Actions Builds & Tests](https://github.com/libbpf/libbpf/actions/workflows/test.yml/badge.svg)](https://github.com/libbpf/libbpf/actions/workflows/test.yml)
 [![Total alerts](https://img.shields.io/lgtm/alerts/g/libbpf/libbpf.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/libbpf/libbpf/alerts/)
 [![Coverity](https://img.shields.io/coverity/scan/18195.svg)](https://scan.coverity.com/projects/libbpf)
+[![OSS-Fuzz Status](https://oss-fuzz-build-logs.storage.googleapis.com/badges/libbpf.svg)](https://oss-fuzz-build-logs.storage.googleapis.com/index.html#libbpf)
 =====
 libelf is an internal dependency of libbpf and thus it is required to link
 against and must be installed on the system for applications to work.
@@ -67,34 +73,6 @@ $ cd src
 $ PKG_CONFIG_PATH=/build/root/lib64/pkgconfig DESTDIR=/build/root make install
 ```

-Distributions
-=============
-
-Distributions packaging libbpf from this mirror:
-  - [Fedora](https://src.fedoraproject.org/rpms/libbpf)
-  - [Gentoo](https://packages.gentoo.org/packages/dev-libs/libbpf)
-  - [Debian](https://packages.debian.org/source/sid/libbpf)
-  - [Arch](https://www.archlinux.org/packages/extra/x86_64/libbpf/)
-  - [Ubuntu](https://packages.ubuntu.com/source/groovy/libbpf)
-  - [Alpine](https://pkgs.alpinelinux.org/packages?name=libbpf)
-
-Benefits of packaging from the mirror over packaging from kernel sources:
-  - Consistent versioning across distributions.
-  - No ties to any specific kernel, transparent handling of older kernels.
-    Libbpf is designed to be kernel-agnostic and work across multitude of
-    kernel versions. It has built-in mechanisms to gracefully handle older
-    kernels, that are missing some of the features, by working around or
-    gracefully degrading functionality. Thus libbpf is not tied to a specific
-    kernel version and can/should be packaged and versioned independently.
-  - Continuous integration testing via
-    [TravisCI](https://travis-ci.org/libbpf/libbpf).
-  - Static code analysis via [LGTM](https://lgtm.com/projects/g/libbpf/libbpf)
-    and [Coverity](https://scan.coverity.com/projects/libbpf).
-
-Package dependencies of libbpf, package names may vary across distros:
-  - zlib
-  - libelf
-
 BPF CO-RE (Compile Once – Run Everywhere)
 =========================================

@@ -140,6 +118,7 @@ Otherwise, please make sure to update it on your system.

 The following resources are useful to understand what BPF CO-RE is and how to
 use it:
+- [BPF CO-RE reference guide](https://nakryiko.com/posts/bpf-core-reference-guide/)
 - [BPF Portability and CO-RE](https://nakryiko.com/posts/bpf-portability-and-co-re/)
 - [HOWTO: BCC to libbpf conversion](https://nakryiko.com/posts/bcc-to-libbpf-howto-guide/)
 - [libbpf-tools in BCC repo](https://github.com/iovisor/bcc/tree/master/libbpf-tools)
@@ -147,6 +126,36 @@ use it:
  converting some more to both contribute to the BPF community and gain some
  more experience with it.

+Distributions
+=============
+
+Distributions packaging libbpf from this mirror:
+  - [Fedora](https://src.fedoraproject.org/rpms/libbpf)
+  - [Gentoo](https://packages.gentoo.org/packages/dev-libs/libbpf)
+  - [Debian](https://packages.debian.org/source/sid/libbpf)
+  - [Arch](https://www.archlinux.org/packages/extra/x86_64/libbpf/)
+  - [Ubuntu](https://packages.ubuntu.com/source/impish/libbpf)
+  - [Alpine](https://pkgs.alpinelinux.org/packages?name=libbpf)
+
+Benefits of packaging from the mirror over packaging from kernel sources:
+  - Consistent versioning across distributions.
+  - No ties to any specific kernel, transparent handling of older kernels.
+    Libbpf is designed to be kernel-agnostic and work across multitude of
+    kernel versions. It has built-in mechanisms to gracefully handle older
+    kernels, that are missing some of the features, by working around or
+    gracefully degrading functionality. Thus libbpf is not tied to a specific
+    kernel version and can/should be packaged and versioned independently.
+  - Continuous integration testing via
+    [TravisCI](https://travis-ci.org/libbpf/libbpf).
+  - Static code analysis via [LGTM](https://lgtm.com/projects/g/libbpf/libbpf)
+    and [Coverity](https://scan.coverity.com/projects/libbpf).
+
+Package dependencies of libbpf, package names may vary across distros:
+  - zlib
+  - libelf
+
+[![libbpf distro packaging status](https://repology.org/badge/vertical-allrepos/libbpf.svg)](https://repology.org/project/libbpf/versions)
+
 License
 =======

--- a/docs/api.rst
+++ b/docs/api.rst
@@ -6,7 +6,49 @@


 LIBBPF API
-==================
+==========
+
+Error Handling
+--------------
+
+When libbpf is used in "libbpf 1.0 mode", API functions can return errors in one of two ways.
+
+You can set "libbpf 1.0" mode with the following line:
+
+.. code-block::
+
+    libbpf_set_strict_mode(LIBBPF_STRICT_DIRECT_ERRS | LIBBPF_STRICT_CLEAN_PTRS);
+
+If the function returns an error code directly, it uses 0 to indicate success
+and a negative error code to indicate what caused the error. In this case the
+error code should be checked directly from the return, you do not need to check
+errno.
+
+For example:
+
+.. code-block::
+
+    err = some_libbpf_api_with_error_return(...);
+    if (err < 0) {
+        /* Handle error accordingly */
+    }
+
+If the function returns a pointer, it will return NULL to indicate there was
+an error. In this case errno should be checked for the error code.
+
+For example:
+
+.. code-block::
+
+    ptr = some_libbpf_api_returning_ptr();
+    if (!ptr) {
+        /* note no minus sign for EINVAL and E2BIG below */
+        if (errno == EINVAL) {
+           /* handle EINVAL error */
+        } else if (errno == E2BIG) {
+           /* handle E2BIG error */
+        }
+    }

 libbpf.h
 --------
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -3,11 +3,10 @@
 libbpf
 ======

-For API documentation see the `versioned API documentation site <https://libbpf.readthedocs.io/en/latest/api.html>`_.
-
 .. toctree::
   :maxdepth: 1

+   API Documentation <https://libbpf.readthedocs.io/en/latest/api.html>
   libbpf_naming_convention
   libbpf_build

--- a/fuzz/bpf-object-fuzzer.c
+++ b/fuzz/bpf-object-fuzzer.c
@@ -0,0 +1,23 @@
+#include "libbpf.h"
+
+static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
+{
+	return 0;
+}
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+	struct bpf_object *obj = NULL;
+	DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts);
+	int err;
+
+	libbpf_set_print(libbpf_print_fn);
+
+	opts.object_name = "fuzz-object";
+	obj = bpf_object__open_mem(data, size, &opts);
+	err = libbpf_get_error(obj);
+	if (err)
+		return 0;
+
+	bpf_object__close(obj);
+	return 0;
+}
--- a/fuzz/bpf-object-fuzzer_seed_corpus.zip
+++ b/fuzz/bpf-object-fuzzer_seed_corpus.zip
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -13,6 +13,14 @@
 		.off = OFF,					\
 		.imm = IMM })

+#define BPF_ALU32_IMM(OP, DST, IMM)				\
+	((struct bpf_insn) {					\
+		.code  = BPF_ALU | BPF_OP(OP) | BPF_K,		\
+		.dst_reg = DST,					\
+		.src_reg = 0,					\
+		.off   = 0,					\
+		.imm   = IMM })
+
 #define BPF_ALU64_IMM(OP, DST, IMM)				\
 	((struct bpf_insn) {					\
 		.code  = BPF_ALU64 | BPF_OP(OP) | BPF_K,	\
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -7,6 +7,8 @@
 #include <stddef.h>
 #include <stdint.h>

+#include <linux/stddef.h>
+
 #include <asm/types.h>
 #include <asm/posix_types.h>

--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -330,6 +330,8 @@ union bpf_iter_link_info {
 *			*ctx_out*, *data_in* and *data_out* must be NULL.
 *			*repeat* must be zero.
 *
+ *		BPF_PROG_RUN is an alias for BPF_PROG_TEST_RUN.
+ *
 *	Return
 *		Returns zero on success. On error, -1 is returned and *errno*
 *		is set appropriately.
@@ -906,6 +908,7 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_RINGBUF,
 	BPF_MAP_TYPE_INODE_STORAGE,
 	BPF_MAP_TYPE_TASK_STORAGE,
+	BPF_MAP_TYPE_BLOOM_FILTER,
 };

 /* Note that tracing related programs such as
@@ -994,6 +997,7 @@ enum bpf_attach_type {
 	BPF_SK_REUSEPORT_SELECT,
 	BPF_SK_REUSEPORT_SELECT_OR_MIGRATE,
 	BPF_PERF_EVENT,
+	BPF_TRACE_KPROBE_MULTI,
 	__MAX_BPF_ATTACH_TYPE
 };

@@ -1008,6 +1012,8 @@ enum bpf_link_type {
 	BPF_LINK_TYPE_NETNS = 5,
 	BPF_LINK_TYPE_XDP = 6,
 	BPF_LINK_TYPE_PERF_EVENT = 7,
+	BPF_LINK_TYPE_KPROBE_MULTI = 8,
+	BPF_LINK_TYPE_STRUCT_OPS = 9,

 	MAX_BPF_LINK_TYPE,
 };
@@ -1110,6 +1116,16 @@ enum bpf_link_type {
 */
 #define BPF_F_SLEEPABLE		(1U << 4)

+/* If BPF_F_XDP_HAS_FRAGS is used in BPF_PROG_LOAD command, the loaded program
+ * fully support xdp frags.
+ */
+#define BPF_F_XDP_HAS_FRAGS	(1U << 5)
+
+/* link_create.kprobe_multi.flags used in LINK_CREATE command for
+ * BPF_TRACE_KPROBE_MULTI attach type to create return probe.
+ */
+#define BPF_F_KPROBE_MULTI_RETURN	(1U << 0)
+
 /* When BPF ldimm64's insn[0].src_reg != 0 then this can have
 * the following extensions:
 *
@@ -1224,6 +1240,8 @@ enum {

 /* If set, run the test on the cpu specified by bpf_attr.test.cpu */
 #define BPF_F_TEST_RUN_ON_CPU	(1U << 0)
+/* If set, XDP frames will be transmitted after processing */
+#define BPF_F_TEST_XDP_LIVE_FRAMES	(1U << 1)

 /* type for BPF_ENABLE_STATS */
 enum bpf_stats_type {
@@ -1274,6 +1292,13 @@ union bpf_attr {
 						   * struct stored as the
 						   * map value
 						   */
+		/* Any per-map-type extra fields
+		 *
+		 * BPF_MAP_TYPE_BLOOM_FILTER - the lowest 4 bits indicate the
+		 * number of hash functions (if 0, the bloom filter will default
+		 * to using 5 hash functions).
+		 */
+		__u64	map_extra;
 	};

 	struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
@@ -1334,8 +1359,10 @@ union bpf_attr {
 			/* or valid module BTF object fd or 0 to attach to vmlinux */
 			__u32		attach_btf_obj_fd;
 		};
-		__u32		:32;		/* pad */
+		__u32		core_relo_cnt;	/* number of bpf_core_relo */
 		__aligned_u64	fd_array;	/* array of FDs */
+		__aligned_u64	core_relos;
+		__u32		core_relo_rec_size; /* sizeof(struct bpf_core_relo) */
 	};

 	struct { /* anonymous struct used by BPF_OBJ_* commands */
@@ -1376,6 +1403,7 @@ union bpf_attr {
 		__aligned_u64	ctx_out;
 		__u32		flags;
 		__u32		cpu;
+		__u32		batch_size;
 	} test;

 	struct { /* anonymous struct used by BPF_*_GET_*_ID */
@@ -1455,6 +1483,22 @@ union bpf_attr {
 				 */
 				__u64		bpf_cookie;
 			} perf_event;
+			struct {
+				__u32		flags;
+				__u32		cnt;
+				__aligned_u64	syms;
+				__aligned_u64	addrs;
+				__aligned_u64	cookies;
+			} kprobe_multi;
+			struct {
+				/* this is overlaid with the target_btf_id above. */
+				__u32		target_btf_id;
+				/* black box user-provided value passed through
+				 * to BPF program at the execution time and
+				 * accessible through bpf_get_attach_cookie() BPF helper
+				 */
+				__u64		cookie;
+			} tracing;
 		};
 	} link_create;

@@ -1736,7 +1780,7 @@ union bpf_attr {
 * 		if the maximum number of tail calls has been reached for this
 * 		chain of programs. This limit is defined in the kernel by the
 * 		macro **MAX_TAIL_CALL_CNT** (not accessible to user space),
- * 		which is currently set to 32.
+ *		which is currently set to 33.
 * 	Return
 * 		0 on success, or a negative error in case of failure.
 *
@@ -1765,6 +1809,8 @@ union bpf_attr {
 * 		0 on success, or a negative error in case of failure.
 *
 * u64 bpf_get_current_pid_tgid(void)
+ * 	Description
+ * 		Get the current pid and tgid.
 * 	Return
 * 		A 64-bit integer containing the current tgid and pid, and
 * 		created as such:
@@ -1772,6 +1818,8 @@ union bpf_attr {
 * 		*current_task*\ **->pid**.
 *
 * u64 bpf_get_current_uid_gid(void)
+ * 	Description
+ * 		Get the current uid and gid.
 * 	Return
 * 		A 64-bit integer containing the current GID and UID, and
 * 		created as such: *current_gid* **<< 32 \|** *current_uid*.
@@ -2246,6 +2294,8 @@ union bpf_attr {
 * 		The 32-bit hash.
 *
 * u64 bpf_get_current_task(void)
+ * 	Description
+ * 		Get the current task.
 * 	Return
 * 		A pointer to the current task struct.
 *
@@ -2276,8 +2326,8 @@ union bpf_attr {
 * 	Return
 * 		The return value depends on the result of the test, and can be:
 *
- *		* 0, if current task belongs to the cgroup2.
- *		* 1, if current task does not belong to the cgroup2.
+ *		* 1, if current task belongs to the cgroup2.
+ *		* 0, if current task does not belong to the cgroup2.
 * 		* A negative error code, if an error occurred.
 *
 * long bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags)
@@ -2359,6 +2409,8 @@ union bpf_attr {
 * 		indicate that the hash is outdated and to trigger a
 * 		recalculation the next time the kernel tries to access this
 * 		hash or when the **bpf_get_hash_recalc**\ () helper is called.
+ * 	Return
+ * 		void.
 *
 * long bpf_get_numa_node_id(void)
 * 	Description
@@ -2456,6 +2508,8 @@ union bpf_attr {
 * 		A 8-byte long unique number or 0 if *sk* is NULL.
 *
 * u32 bpf_get_socket_uid(struct sk_buff *skb)
+ * 	Description
+ * 		Get the owner UID of the socked associated to *skb*.
 * 	Return
 * 		The owner UID of the socket associated to *skb*. If the socket
 * 		is **NULL**, or if it is not a full socket (i.e. if it is a
@@ -2965,8 +3019,8 @@ union bpf_attr {
 *
 * 			# sysctl kernel.perf_event_max_stack=<new value>
 * 	Return
- * 		A non-negative value equal to or less than *size* on success,
- * 		or a negative error in case of failure.
+ * 		The non-negative copied *buf* length equal to or less than
+ * 		*size* on success, or a negative error in case of failure.
 *
 * long bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header)
 * 	Description
@@ -3230,6 +3284,9 @@ union bpf_attr {
 * 		The id is returned or 0 in case the id could not be retrieved.
 *
 * u64 bpf_get_current_cgroup_id(void)
+ * 	Description
+ * 		Get the current cgroup id based on the cgroup within which
+ * 		the current task is running.
 * 	Return
 * 		A 64-bit integer containing the current cgroup id based
 * 		on the cgroup within which the current task is running.
@@ -4269,8 +4326,8 @@ union bpf_attr {
 *
 *			# sysctl kernel.perf_event_max_stack=<new value>
 *	Return
- *		A non-negative value equal to or less than *size* on success,
- *		or a negative error in case of failure.
+ * 		The non-negative copied *buf* length equal to or less than
+ * 		*size* on success, or a negative error in case of failure.
 *
 * long bpf_load_hdr_opt(struct bpf_sock_ops *skops, void *searchby_res, u32 len, u64 flags)
 *	Description
@@ -4909,6 +4966,212 @@ union bpf_attr {
 *	Return
 *		The number of bytes written to the buffer, or a negative error
 *		in case of failure.
+ *
+ * struct unix_sock *bpf_skc_to_unix_sock(void *sk)
+ * 	Description
+ *		Dynamically cast a *sk* pointer to a *unix_sock* pointer.
+ *	Return
+ *		*sk* if casting is valid, or **NULL** otherwise.
+ *
+ * long bpf_kallsyms_lookup_name(const char *name, int name_sz, int flags, u64 *res)
+ *	Description
+ *		Get the address of a kernel symbol, returned in *res*. *res* is
+ *		set to 0 if the symbol is not found.
+ *	Return
+ *		On success, zero. On error, a negative value.
+ *
+ *		**-EINVAL** if *flags* is not zero.
+ *
+ *		**-EINVAL** if string *name* is not the same size as *name_sz*.
+ *
+ *		**-ENOENT** if symbol is not found.
+ *
+ *		**-EPERM** if caller does not have permission to obtain kernel address.
+ *
+ * long bpf_find_vma(struct task_struct *task, u64 addr, void *callback_fn, void *callback_ctx, u64 flags)
+ *	Description
+ *		Find vma of *task* that contains *addr*, call *callback_fn*
+ *		function with *task*, *vma*, and *callback_ctx*.
+ *		The *callback_fn* should be a static function and
+ *		the *callback_ctx* should be a pointer to the stack.
+ *		The *flags* is used to control certain aspects of the helper.
+ *		Currently, the *flags* must be 0.
+ *
+ *		The expected callback signature is
+ *
+ *		long (\*callback_fn)(struct task_struct \*task, struct vm_area_struct \*vma, void \*callback_ctx);
+ *
+ *	Return
+ *		0 on success.
+ *		**-ENOENT** if *task->mm* is NULL, or no vma contains *addr*.
+ *		**-EBUSY** if failed to try lock mmap_lock.
+ *		**-EINVAL** for invalid **flags**.
+ *
+ * long bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx, u64 flags)
+ *	Description
+ *		For **nr_loops**, call **callback_fn** function
+ *		with **callback_ctx** as the context parameter.
+ *		The **callback_fn** should be a static function and
+ *		the **callback_ctx** should be a pointer to the stack.
+ *		The **flags** is used to control certain aspects of the helper.
+ *		Currently, the **flags** must be 0. Currently, nr_loops is
+ *		limited to 1 << 23 (~8 million) loops.
+ *
+ *		long (\*callback_fn)(u32 index, void \*ctx);
+ *
+ *		where **index** is the current index in the loop. The index
+ *		is zero-indexed.
+ *
+ *		If **callback_fn** returns 0, the helper will continue to the next
+ *		loop. If return value is 1, the helper will skip the rest of
+ *		the loops and return. Other return values are not used now,
+ *		and will be rejected by the verifier.
+ *
+ *	Return
+ *		The number of loops performed, **-EINVAL** for invalid **flags**,
+ *		**-E2BIG** if **nr_loops** exceeds the maximum number of loops.
+ *
+ * long bpf_strncmp(const char *s1, u32 s1_sz, const char *s2)
+ *	Description
+ *		Do strncmp() between **s1** and **s2**. **s1** doesn't need
+ *		to be null-terminated and **s1_sz** is the maximum storage
+ *		size of **s1**. **s2** must be a read-only string.
+ *	Return
+ *		An integer less than, equal to, or greater than zero
+ *		if the first **s1_sz** bytes of **s1** is found to be
+ *		less than, to match, or be greater than **s2**.
+ *
+ * long bpf_get_func_arg(void *ctx, u32 n, u64 *value)
+ *	Description
+ *		Get **n**-th argument (zero based) of the traced function (for tracing programs)
+ *		returned in **value**.
+ *
+ *	Return
+ *		0 on success.
+ *		**-EINVAL** if n >= arguments count of traced function.
+ *
+ * long bpf_get_func_ret(void *ctx, u64 *value)
+ *	Description
+ *		Get return value of the traced function (for tracing programs)
+ *		in **value**.
+ *
+ *	Return
+ *		0 on success.
+ *		**-EOPNOTSUPP** for tracing programs other than BPF_TRACE_FEXIT or BPF_MODIFY_RETURN.
+ *
+ * long bpf_get_func_arg_cnt(void *ctx)
+ *	Description
+ *		Get number of arguments of the traced function (for tracing programs).
+ *
+ *	Return
+ *		The number of arguments of the traced function.
+ *
+ * int bpf_get_retval(void)
+ *	Description
+ *		Get the syscall's return value that will be returned to userspace.
+ *
+ *		This helper is currently supported by cgroup programs only.
+ *	Return
+ *		The syscall's return value.
+ *
+ * int bpf_set_retval(int retval)
+ *	Description
+ *		Set the syscall's return value that will be returned to userspace.
+ *
+ *		This helper is currently supported by cgroup programs only.
+ *	Return
+ *		0 on success, or a negative error in case of failure.
+ *
+ * u64 bpf_xdp_get_buff_len(struct xdp_buff *xdp_md)
+ *	Description
+ *		Get the total size of a given xdp buff (linear and paged area)
+ *	Return
+ *		The total size of a given xdp buffer.
+ *
+ * long bpf_xdp_load_bytes(struct xdp_buff *xdp_md, u32 offset, void *buf, u32 len)
+ *	Description
+ *		This helper is provided as an easy way to load data from a
+ *		xdp buffer. It can be used to load *len* bytes from *offset* from
+ *		the frame associated to *xdp_md*, into the buffer pointed by
+ *		*buf*.
+ *	Return
+ *		0 on success, or a negative error in case of failure.
+ *
+ * long bpf_xdp_store_bytes(struct xdp_buff *xdp_md, u32 offset, void *buf, u32 len)
+ *	Description
+ *		Store *len* bytes from buffer *buf* into the frame
+ *		associated to *xdp_md*, at *offset*.
+ *	Return
+ *		0 on success, or a negative error in case of failure.
+ *
+ * long bpf_copy_from_user_task(void *dst, u32 size, const void *user_ptr, struct task_struct *tsk, u64 flags)
+ *	Description
+ *		Read *size* bytes from user space address *user_ptr* in *tsk*'s
+ *		address space, and stores the data in *dst*. *flags* is not
+ *		used yet and is provided for future extensibility. This helper
+ *		can only be used by sleepable programs.
+ *	Return
+ *		0 on success, or a negative error in case of failure. On error
+ *		*dst* buffer is zeroed out.
+ *
+ * long bpf_skb_set_tstamp(struct sk_buff *skb, u64 tstamp, u32 tstamp_type)
+ *	Description
+ *		Change the __sk_buff->tstamp_type to *tstamp_type*
+ *		and set *tstamp* to the __sk_buff->tstamp together.
+ *
+ *		If there is no need to change the __sk_buff->tstamp_type,
+ *		the tstamp value can be directly written to __sk_buff->tstamp
+ *		instead.
+ *
+ *		BPF_SKB_TSTAMP_DELIVERY_MONO is the only tstamp that
+ *		will be kept during bpf_redirect_*().  A non zero
+ *		*tstamp* must be used with the BPF_SKB_TSTAMP_DELIVERY_MONO
+ *		*tstamp_type*.
+ *
+ *		A BPF_SKB_TSTAMP_UNSPEC *tstamp_type* can only be used
+ *		with a zero *tstamp*.
+ *
+ *		Only IPv4 and IPv6 skb->protocol are supported.
+ *
+ *		This function is most useful when it needs to set a
+ *		mono delivery time to __sk_buff->tstamp and then
+ *		bpf_redirect_*() to the egress of an iface.  For example,
+ *		changing the (rcv) timestamp in __sk_buff->tstamp at
+ *		ingress to a mono delivery time and then bpf_redirect_*()
+ *		to sch_fq@phy-dev.
+ *	Return
+ *		0 on success.
+ *		**-EINVAL** for invalid input
+ *		**-EOPNOTSUPP** for unsupported protocol
+ *
+ * long bpf_ima_file_hash(struct file *file, void *dst, u32 size)
+ *	Description
+ *		Returns a calculated IMA hash of the *file*.
+ *		If the hash is larger than *size*, then only *size*
+ *		bytes will be copied to *dst*
+ *	Return
+ *		The **hash_algo** is returned on success,
+ *		**-EOPNOTSUP** if the hash calculation failed or **-EINVAL** if
+ *		invalid arguments are passed.
+ *
+ * void *bpf_kptr_xchg(void *map_value, void *ptr)
+ *	Description
+ *		Exchange kptr at pointer *map_value* with *ptr*, and return the
+ *		old value. *ptr* can be NULL, otherwise it must be a referenced
+ *		pointer which will be released when this helper is called.
+ *	Return
+ *		The old value of kptr (which can be NULL). The returned pointer
+ *		if not NULL, is a reference which must be released using its
+ *		corresponding release function, or moved into a BPF map before
+ *		program exit.
+ *
+ * void *bpf_map_lookup_percpu_elem(struct bpf_map *map, const void *key, u32 cpu)
+ * 	Description
+ * 		Perform a lookup in *percpu map* for an entry associated to
+ * 		*key* on *cpu*.
+ * 	Return
+ * 		Map value associated to *key* on *cpu*, or **NULL** if no entry
+ * 		was found or *cpu* is invalid.
 */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5089,6 +5352,24 @@ union bpf_attr {
 	FN(task_pt_regs),		\
 	FN(get_branch_snapshot),	\
 	FN(trace_vprintk),		\
+	FN(skc_to_unix_sock),		\
+	FN(kallsyms_lookup_name),	\
+	FN(find_vma),			\
+	FN(loop),			\
+	FN(strncmp),			\
+	FN(get_func_arg),		\
+	FN(get_func_ret),		\
+	FN(get_func_arg_cnt),		\
+	FN(get_retval),			\
+	FN(set_retval),			\
+	FN(xdp_get_buff_len),		\
+	FN(xdp_load_bytes),		\
+	FN(xdp_store_bytes),		\
+	FN(copy_from_user_task),	\
+	FN(skb_set_tstamp),		\
+	FN(ima_file_hash),		\
+	FN(kptr_xchg),			\
+	FN(map_lookup_percpu_elem),     \
 	/* */

 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -5278,6 +5559,15 @@ union {					\
 	__u64 :64;			\
 } __attribute__((aligned(8)))

+enum {
+	BPF_SKB_TSTAMP_UNSPEC,
+	BPF_SKB_TSTAMP_DELIVERY_MONO,	/* tstamp has mono delivery time */
+	/* For any BPF_SKB_TSTAMP_* that the bpf prog cannot handle,
+	 * the bpf prog should handle it like BPF_SKB_TSTAMP_UNSPEC
+	 * and try to deduce it by ingress, egress or skb->sk->sk_clockid.
+	 */
+};
+
 /* user accessible mirror of in-kernel sk_buff.
 * new fields can only be added to the end of this structure
 */
@@ -5318,7 +5608,8 @@ struct __sk_buff {
 	__u32 gso_segs;
 	__bpf_md_ptr(struct bpf_sock *, sk);
 	__u32 gso_size;
-	__u32 :32;		/* Padding, future use. */
+	__u8  tstamp_type;
+	__u32 :24;		/* Padding, future use. */
 	__u64 hwtstamp;
 };

@@ -5332,6 +5623,10 @@ struct bpf_tunnel_key {
 	__u8 tunnel_ttl;
 	__u16 tunnel_ext;	/* Padding, future use. */
 	__u32 tunnel_label;
+	union {
+		__u32 local_ipv4;
+		__u32 local_ipv6[4];
+	};
 };

 /* user accessible mirror of in-kernel xfrm_state.
@@ -5383,7 +5678,8 @@ struct bpf_sock {
 	__u32 src_ip4;
 	__u32 src_ip6[4];
 	__u32 src_port;		/* host byte order */
-	__u32 dst_port;		/* network byte order */
+	__be16 dst_port;	/* network byte order */
+	__u16 :16;		/* zero padding */
 	__u32 dst_ip4;
 	__u32 dst_ip6[4];
 	__u32 state;
@@ -5613,6 +5909,7 @@ struct bpf_prog_info {
 	__u64 run_time_ns;
 	__u64 run_cnt;
 	__u64 recursion_misses;
+	__u32 verified_insns;
 } __attribute__((aligned(8)));

 struct bpf_map_info {
@@ -5630,6 +5927,8 @@ struct bpf_map_info {
 	__u32 btf_id;
 	__u32 btf_key_type_id;
 	__u32 btf_value_type_id;
+	__u32 :32;	/* alignment pad */
+	__u64 map_extra;
 } __attribute__((aligned(8)));

 struct bpf_btf_info {
@@ -6258,10 +6557,12 @@ struct bpf_sk_lookup {
 	__u32 protocol;		/* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */
 	__u32 remote_ip4;	/* Network byte order */
 	__u32 remote_ip6[4];	/* Network byte order */
-	__u32 remote_port;	/* Network byte order */
+	__be16 remote_port;	/* Network byte order */
+	__u16 :16;		/* Zero padding */
 	__u32 local_ip4;	/* Network byte order */
 	__u32 local_ip6[4];	/* Network byte order */
 	__u32 local_port;	/* Host byte order */
+	__u32 ingress_ifindex;		/* The arriving interface. Determined by inet_iif. */
 };

 /*
@@ -6294,4 +6595,78 @@ enum {
 	BTF_F_ZERO	=	(1ULL << 3),
 };

+/* bpf_core_relo_kind encodes which aspect of captured field/type/enum value
+ * has to be adjusted by relocations. It is emitted by llvm and passed to
+ * libbpf and later to the kernel.
+ */
+enum bpf_core_relo_kind {
+	BPF_CORE_FIELD_BYTE_OFFSET = 0,      /* field byte offset */
+	BPF_CORE_FIELD_BYTE_SIZE = 1,        /* field size in bytes */
+	BPF_CORE_FIELD_EXISTS = 2,           /* field existence in target kernel */
+	BPF_CORE_FIELD_SIGNED = 3,           /* field signedness (0 - unsigned, 1 - signed) */
+	BPF_CORE_FIELD_LSHIFT_U64 = 4,       /* bitfield-specific left bitshift */
+	BPF_CORE_FIELD_RSHIFT_U64 = 5,       /* bitfield-specific right bitshift */
+	BPF_CORE_TYPE_ID_LOCAL = 6,          /* type ID in local BPF object */
+	BPF_CORE_TYPE_ID_TARGET = 7,         /* type ID in target kernel */
+	BPF_CORE_TYPE_EXISTS = 8,            /* type existence in target kernel */
+	BPF_CORE_TYPE_SIZE = 9,              /* type size in bytes */
+	BPF_CORE_ENUMVAL_EXISTS = 10,        /* enum value existence in target kernel */
+	BPF_CORE_ENUMVAL_VALUE = 11,         /* enum value integer value */
+};
+
+/*
+ * "struct bpf_core_relo" is used to pass relocation data form LLVM to libbpf
+ * and from libbpf to the kernel.
+ *
+ * CO-RE relocation captures the following data:
+ * - insn_off - instruction offset (in bytes) within a BPF program that needs
+ *   its insn->imm field to be relocated with actual field info;
+ * - type_id - BTF type ID of the "root" (containing) entity of a relocatable
+ *   type or field;
+ * - access_str_off - offset into corresponding .BTF string section. String
+ *   interpretation depends on specific relocation kind:
+ *     - for field-based relocations, string encodes an accessed field using
+ *       a sequence of field and array indices, separated by colon (:). It's
+ *       conceptually very close to LLVM's getelementptr ([0]) instruction's
+ *       arguments for identifying offset to a field.
+ *     - for type-based relocations, strings is expected to be just "0";
+ *     - for enum value-based relocations, string contains an index of enum
+ *       value within its enum type;
+ * - kind - one of enum bpf_core_relo_kind;
+ *
+ * Example:
+ *   struct sample {
+ *       int a;
+ *       struct {
+ *           int b[10];
+ *       };
+ *   };
+ *
+ *   struct sample *s = ...;
+ *   int *x = &s->a;     // encoded as "0:0" (a is field #0)
+ *   int *y = &s->b[5];  // encoded as "0:1:0:5" (anon struct is field #1,
+ *                       // b is field #0 inside anon struct, accessing elem #5)
+ *   int *z = &s[10]->b; // encoded as "10:1" (ptr is used as an array)
+ *
+ * type_id for all relocs in this example will capture BTF type id of
+ * `struct sample`.
+ *
+ * Such relocation is emitted when using __builtin_preserve_access_index()
+ * Clang built-in, passing expression that captures field address, e.g.:
+ *
+ * bpf_probe_read(&dst, sizeof(dst),
+ *		  __builtin_preserve_access_index(&src->a.b.c));
+ *
+ * In this case Clang will emit field relocation recording necessary data to
+ * be able to find offset of embedded `a.b.c` field within `src` struct.
+ *
+ * [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction
+ */
+struct bpf_core_relo {
+	__u32 insn_off;
+	__u32 type_id;
+	__u32 access_str_off;
+	enum bpf_core_relo_kind kind;
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
--- a/include/uapi/linux/btf.h
+++ b/include/uapi/linux/btf.h
@@ -33,8 +33,8 @@ struct btf_type {
 	/* "info" bits arrangement
 	 * bits  0-15: vlen (e.g. # of struct's members)
 	 * bits 16-23: unused
-	 * bits 24-27: kind (e.g. int, ptr, array...etc)
-	 * bits 28-30: unused
+	 * bits 24-28: kind (e.g. int, ptr, array...etc)
+	 * bits 29-30: unused
 	 * bit     31: kind_flag, currently used by
 	 *             struct, union and fwd
 	 */
@@ -43,7 +43,7 @@ struct btf_type {
 	 * "size" tells the size of the type it is describing.
 	 *
 	 * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT,
-	 * FUNC, FUNC_PROTO, VAR and TAG.
+	 * FUNC, FUNC_PROTO, VAR, DECL_TAG and TYPE_TAG.
 	 * "type" is a type_id referring to another type.
 	 */
 	union {
@@ -74,7 +74,8 @@ enum {
 	BTF_KIND_VAR		= 14,	/* Variable	*/
 	BTF_KIND_DATASEC	= 15,	/* Section	*/
 	BTF_KIND_FLOAT		= 16,	/* Floating point	*/
-	BTF_KIND_TAG		= 17,	/* Tag */
+	BTF_KIND_DECL_TAG	= 17,	/* Decl Tag */
+	BTF_KIND_TYPE_TAG	= 18,	/* Type Tag */

 	NR_BTF_KINDS,
 	BTF_KIND_MAX		= NR_BTF_KINDS - 1,
@@ -174,14 +175,14 @@ struct btf_var_secinfo {
 	__u32	size;
 };

-/* BTF_KIND_TAG is followed by a single "struct btf_tag" to describe
+/* BTF_KIND_DECL_TAG is followed by a single "struct btf_decl_tag" to describe
 * additional information related to the tag applied location.
 * If component_idx == -1, the tag is applied to a struct, union,
 * variable or function. Otherwise, it is applied to a struct/union
 * member or a func argument, and component_idx indicates which member
 * or argument (0 ... vlen-1).
 */
-struct btf_tag {
+struct btf_decl_tag {
       __s32   component_idx;
 };

--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -7,24 +7,23 @@

 /* This struct should be in sync with struct rtnl_link_stats64 */
 struct rtnl_link_stats {
-	__u32	rx_packets;		/* total packets received	*/
-	__u32	tx_packets;		/* total packets transmitted	*/
-	__u32	rx_bytes;		/* total bytes received 	*/
-	__u32	tx_bytes;		/* total bytes transmitted	*/
-	__u32	rx_errors;		/* bad packets received		*/
-	__u32	tx_errors;		/* packet transmit problems	*/
-	__u32	rx_dropped;		/* no space in linux buffers	*/
-	__u32	tx_dropped;		/* no space available in linux	*/
-	__u32	multicast;		/* multicast packets received	*/
+	__u32	rx_packets;
+	__u32	tx_packets;
+	__u32	rx_bytes;
+	__u32	tx_bytes;
+	__u32	rx_errors;
+	__u32	tx_errors;
+	__u32	rx_dropped;
+	__u32	tx_dropped;
+	__u32	multicast;
 	__u32	collisions;
-
 	/* detailed rx_errors: */
 	__u32	rx_length_errors;
-	__u32	rx_over_errors;		/* receiver ring buff overflow	*/
-	__u32	rx_crc_errors;		/* recved pkt with crc error	*/
-	__u32	rx_frame_errors;	/* recv'd frame alignment error */
-	__u32	rx_fifo_errors;		/* recv'r fifo overrun		*/
-	__u32	rx_missed_errors;	/* receiver missed packet	*/
+	__u32	rx_over_errors;
+	__u32	rx_crc_errors;
+	__u32	rx_frame_errors;
+	__u32	rx_fifo_errors;
+	__u32	rx_missed_errors;

 	/* detailed tx_errors */
 	__u32	tx_aborted_errors;
@@ -37,29 +36,201 @@ struct rtnl_link_stats {
 	__u32	rx_compressed;
 	__u32	tx_compressed;

-	__u32	rx_nohandler;		/* dropped, no handler found	*/
+	__u32	rx_nohandler;
 };

-/* The main device statistics structure */
+/**
+ * struct rtnl_link_stats64 - The main device statistics structure.
+ *
+ * @rx_packets: Number of good packets received by the interface.
+ *   For hardware interfaces counts all good packets received from the device
+ *   by the host, including packets which host had to drop at various stages
+ *   of processing (even in the driver).
+ *
+ * @tx_packets: Number of packets successfully transmitted.
+ *   For hardware interfaces counts packets which host was able to successfully
+ *   hand over to the device, which does not necessarily mean that packets
+ *   had been successfully transmitted out of the device, only that device
+ *   acknowledged it copied them out of host memory.
+ *
+ * @rx_bytes: Number of good received bytes, corresponding to @rx_packets.
+ *
+ *   For IEEE 802.3 devices should count the length of Ethernet Frames
+ *   excluding the FCS.
+ *
+ * @tx_bytes: Number of good transmitted bytes, corresponding to @tx_packets.
+ *
+ *   For IEEE 802.3 devices should count the length of Ethernet Frames
+ *   excluding the FCS.
+ *
+ * @rx_errors: Total number of bad packets received on this network device.
+ *   This counter must include events counted by @rx_length_errors,
+ *   @rx_crc_errors, @rx_frame_errors and other errors not otherwise
+ *   counted.
+ *
+ * @tx_errors: Total number of transmit problems.
+ *   This counter must include events counter by @tx_aborted_errors,
+ *   @tx_carrier_errors, @tx_fifo_errors, @tx_heartbeat_errors,
+ *   @tx_window_errors and other errors not otherwise counted.
+ *
+ * @rx_dropped: Number of packets received but not processed,
+ *   e.g. due to lack of resources or unsupported protocol.
+ *   For hardware interfaces this counter may include packets discarded
+ *   due to L2 address filtering but should not include packets dropped
+ *   by the device due to buffer exhaustion which are counted separately in
+ *   @rx_missed_errors (since procfs folds those two counters together).
+ *
+ * @tx_dropped: Number of packets dropped on their way to transmission,
+ *   e.g. due to lack of resources.
+ *
+ * @multicast: Multicast packets received.
+ *   For hardware interfaces this statistic is commonly calculated
+ *   at the device level (unlike @rx_packets) and therefore may include
+ *   packets which did not reach the host.
+ *
+ *   For IEEE 802.3 devices this counter may be equivalent to:
+ *
+ *    - 30.3.1.1.21 aMulticastFramesReceivedOK
+ *
+ * @collisions: Number of collisions during packet transmissions.
+ *
+ * @rx_length_errors: Number of packets dropped due to invalid length.
+ *   Part of aggregate "frame" errors in `/proc/net/dev`.
+ *
+ *   For IEEE 802.3 devices this counter should be equivalent to a sum
+ *   of the following attributes:
+ *
+ *    - 30.3.1.1.23 aInRangeLengthErrors
+ *    - 30.3.1.1.24 aOutOfRangeLengthField
+ *    - 30.3.1.1.25 aFrameTooLongErrors
+ *
+ * @rx_over_errors: Receiver FIFO overflow event counter.
+ *
+ *   Historically the count of overflow events. Such events may be
+ *   reported in the receive descriptors or via interrupts, and may
+ *   not correspond one-to-one with dropped packets.
+ *
+ *   The recommended interpretation for high speed interfaces is -
+ *   number of packets dropped because they did not fit into buffers
+ *   provided by the host, e.g. packets larger than MTU or next buffer
+ *   in the ring was not available for a scatter transfer.
+ *
+ *   Part of aggregate "frame" errors in `/proc/net/dev`.
+ *
+ *   This statistics was historically used interchangeably with
+ *   @rx_fifo_errors.
+ *
+ *   This statistic corresponds to hardware events and is not commonly used
+ *   on software devices.
+ *
+ * @rx_crc_errors: Number of packets received with a CRC error.
+ *   Part of aggregate "frame" errors in `/proc/net/dev`.
+ *
+ *   For IEEE 802.3 devices this counter must be equivalent to:
+ *
+ *    - 30.3.1.1.6 aFrameCheckSequenceErrors
+ *
+ * @rx_frame_errors: Receiver frame alignment errors.
+ *   Part of aggregate "frame" errors in `/proc/net/dev`.
+ *
+ *   For IEEE 802.3 devices this counter should be equivalent to:
+ *
+ *    - 30.3.1.1.7 aAlignmentErrors
+ *
+ * @rx_fifo_errors: Receiver FIFO error counter.
+ *
+ *   Historically the count of overflow events. Those events may be
+ *   reported in the receive descriptors or via interrupts, and may
+ *   not correspond one-to-one with dropped packets.
+ *
+ *   This statistics was used interchangeably with @rx_over_errors.
+ *   Not recommended for use in drivers for high speed interfaces.
+ *
+ *   This statistic is used on software devices, e.g. to count software
+ *   packet queue overflow (can) or sequencing errors (GRE).
+ *
+ * @rx_missed_errors: Count of packets missed by the host.
+ *   Folded into the "drop" counter in `/proc/net/dev`.
+ *
+ *   Counts number of packets dropped by the device due to lack
+ *   of buffer space. This usually indicates that the host interface
+ *   is slower than the network interface, or host is not keeping up
+ *   with the receive packet rate.
+ *
+ *   This statistic corresponds to hardware events and is not used
+ *   on software devices.
+ *
+ * @tx_aborted_errors:
+ *   Part of aggregate "carrier" errors in `/proc/net/dev`.
+ *   For IEEE 802.3 devices capable of half-duplex operation this counter
+ *   must be equivalent to:
+ *
+ *    - 30.3.1.1.11 aFramesAbortedDueToXSColls
+ *
+ *   High speed interfaces may use this counter as a general device
+ *   discard counter.
+ *
+ * @tx_carrier_errors: Number of frame transmission errors due to loss
+ *   of carrier during transmission.
+ *   Part of aggregate "carrier" errors in `/proc/net/dev`.
+ *
+ *   For IEEE 802.3 devices this counter must be equivalent to:
+ *
+ *    - 30.3.1.1.13 aCarrierSenseErrors
+ *
+ * @tx_fifo_errors: Number of frame transmission errors due to device
+ *   FIFO underrun / underflow. This condition occurs when the device
+ *   begins transmission of a frame but is unable to deliver the
+ *   entire frame to the transmitter in time for transmission.
+ *   Part of aggregate "carrier" errors in `/proc/net/dev`.
+ *
+ * @tx_heartbeat_errors: Number of Heartbeat / SQE Test errors for
+ *   old half-duplex Ethernet.
+ *   Part of aggregate "carrier" errors in `/proc/net/dev`.
+ *
+ *   For IEEE 802.3 devices possibly equivalent to:
+ *
+ *    - 30.3.2.1.4 aSQETestErrors
+ *
+ * @tx_window_errors: Number of frame transmission errors due
+ *   to late collisions (for Ethernet - after the first 64B of transmission).
+ *   Part of aggregate "carrier" errors in `/proc/net/dev`.
+ *
+ *   For IEEE 802.3 devices this counter must be equivalent to:
+ *
+ *    - 30.3.1.1.10 aLateCollisions
+ *
+ * @rx_compressed: Number of correctly received compressed packets.
+ *   This counters is only meaningful for interfaces which support
+ *   packet compression (e.g. CSLIP, PPP).
+ *
+ * @tx_compressed: Number of transmitted compressed packets.
+ *   This counters is only meaningful for interfaces which support
+ *   packet compression (e.g. CSLIP, PPP).
+ *
+ * @rx_nohandler: Number of packets received on the interface
+ *   but dropped by the networking stack because the device is
+ *   not designated to receive packets (e.g. backup link in a bond).
+ */
 struct rtnl_link_stats64 {
-	__u64	rx_packets;		/* total packets received	*/
-	__u64	tx_packets;		/* total packets transmitted	*/
-	__u64	rx_bytes;		/* total bytes received 	*/
-	__u64	tx_bytes;		/* total bytes transmitted	*/
-	__u64	rx_errors;		/* bad packets received		*/
-	__u64	tx_errors;		/* packet transmit problems	*/
-	__u64	rx_dropped;		/* no space in linux buffers	*/
-	__u64	tx_dropped;		/* no space available in linux	*/
-	__u64	multicast;		/* multicast packets received	*/
+	__u64	rx_packets;
+	__u64	tx_packets;
+	__u64	rx_bytes;
+	__u64	tx_bytes;
+	__u64	rx_errors;
+	__u64	tx_errors;
+	__u64	rx_dropped;
+	__u64	tx_dropped;
+	__u64	multicast;
 	__u64	collisions;

 	/* detailed rx_errors: */
 	__u64	rx_length_errors;
-	__u64	rx_over_errors;		/* receiver ring buff overflow	*/
-	__u64	rx_crc_errors;		/* recved pkt with crc error	*/
-	__u64	rx_frame_errors;	/* recv'd frame alignment error */
-	__u64	rx_fifo_errors;		/* recv'r fifo overrun		*/
-	__u64	rx_missed_errors;	/* receiver missed packet	*/
+	__u64	rx_over_errors;
+	__u64	rx_crc_errors;
+	__u64	rx_frame_errors;
+	__u64	rx_fifo_errors;
+	__u64	rx_missed_errors;

 	/* detailed tx_errors */
 	__u64	tx_aborted_errors;
@@ -71,8 +242,7 @@ struct rtnl_link_stats64 {
 	/* for cslip etc */
 	__u64	rx_compressed;
 	__u64	tx_compressed;
-
-	__u64	rx_nohandler;		/* dropped, no handler found	*/
+	__u64	rx_nohandler;
 };

 /* The struct should be in sync with struct ifmap */
@@ -170,12 +340,30 @@ enum {
 	IFLA_PROP_LIST,
 	IFLA_ALT_IFNAME, /* Alternative ifname */
 	IFLA_PERM_ADDRESS,
+	IFLA_PROTO_DOWN_REASON,
+
+	/* device (sysfs) name as parent, used instead
+	 * of IFLA_LINK where there's no parent netdev
+	 */
+	IFLA_PARENT_DEV_NAME,
+	IFLA_PARENT_DEV_BUS_NAME,
+	IFLA_GRO_MAX_SIZE,
+
 	__IFLA_MAX
 };


 #define IFLA_MAX (__IFLA_MAX - 1)

+enum {
+	IFLA_PROTO_DOWN_REASON_UNSPEC,
+	IFLA_PROTO_DOWN_REASON_MASK,	/* u32, mask for reason bits */
+	IFLA_PROTO_DOWN_REASON_VALUE,   /* u32, reason bit value */
+
+	__IFLA_PROTO_DOWN_REASON_CNT,
+	IFLA_PROTO_DOWN_REASON_MAX = __IFLA_PROTO_DOWN_REASON_CNT - 1
+};
+
 /* backwards compatibility for userspace */
 #ifndef __KERNEL__
 #define IFLA_RTA(r)  ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ifinfomsg))))
@@ -293,6 +481,7 @@ enum {
 	IFLA_BR_MCAST_MLD_VERSION,
 	IFLA_BR_VLAN_STATS_PER_PORT,
 	IFLA_BR_MULTI_BOOLOPT,
+	IFLA_BR_MCAST_QUERIER_STATE,
 	__IFLA_BR_MAX,
 };

@@ -346,6 +535,8 @@ enum {
 	IFLA_BRPORT_BACKUP_PORT,
 	IFLA_BRPORT_MRP_RING_OPEN,
 	IFLA_BRPORT_MRP_IN_OPEN,
+	IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT,
+	IFLA_BRPORT_MCAST_EHT_HOSTS_CNT,
 	__IFLA_BRPORT_MAX
 };
 #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
@@ -433,6 +624,7 @@ enum macvlan_macaddr_mode {
 };

 #define MACVLAN_FLAG_NOPROMISC	1
+#define MACVLAN_FLAG_NODST	2 /* skip dst macvlan if matching src macvlan */

 /* VRF section */
 enum {
@@ -597,6 +789,18 @@ enum ifla_geneve_df {
 	GENEVE_DF_MAX = __GENEVE_DF_END - 1,
 };

+/* Bareudp section  */
+enum {
+	IFLA_BAREUDP_UNSPEC,
+	IFLA_BAREUDP_PORT,
+	IFLA_BAREUDP_ETHERTYPE,
+	IFLA_BAREUDP_SRCPORT_MIN,
+	IFLA_BAREUDP_MULTIPROTO_MODE,
+	__IFLA_BAREUDP_MAX
+};
+
+#define IFLA_BAREUDP_MAX (__IFLA_BAREUDP_MAX - 1)
+
 /* PPP section */
 enum {
 	IFLA_PPP_UNSPEC,
@@ -655,6 +859,8 @@ enum {
 	IFLA_BOND_TLB_DYNAMIC_LB,
 	IFLA_BOND_PEER_NOTIF_DELAY,
 	IFLA_BOND_AD_LACP_ACTIVE,
+	IFLA_BOND_MISSED_MAX,
+	IFLA_BOND_NS_IP6_TARGET,
 	__IFLA_BOND_MAX,
 };

@@ -899,7 +1105,14 @@ enum {
 #define IFLA_IPOIB_MAX (__IFLA_IPOIB_MAX - 1)


-/* HSR section */
+/* HSR/PRP section, both uses same interface */
+
+/* Different redundancy protocols for hsr device */
+enum {
+	HSR_PROTOCOL_HSR,
+	HSR_PROTOCOL_PRP,
+	HSR_PROTOCOL_MAX,
+};

 enum {
 	IFLA_HSR_UNSPEC,
@@ -909,6 +1122,9 @@ enum {
 	IFLA_HSR_SUPERVISION_ADDR,	/* Supervision frame multicast addr */
 	IFLA_HSR_SEQ_NR,
 	IFLA_HSR_VERSION,		/* HSR version */
+	IFLA_HSR_PROTOCOL,		/* Indicate different protocol than
+					 * HSR. For example PRP.
+					 */
 	__IFLA_HSR_MAX,
 };

@@ -1033,6 +1249,8 @@ enum {
 #define RMNET_FLAGS_INGRESS_MAP_COMMANDS          (1U << 1)
 #define RMNET_FLAGS_INGRESS_MAP_CKSUMV4           (1U << 2)
 #define RMNET_FLAGS_EGRESS_MAP_CKSUMV4            (1U << 3)
+#define RMNET_FLAGS_INGRESS_MAP_CKSUMV5           (1U << 4)
+#define RMNET_FLAGS_EGRESS_MAP_CKSUMV5            (1U << 5)

 enum {
 	IFLA_RMNET_UNSPEC,
@@ -1048,4 +1266,14 @@ struct ifla_rmnet_flags {
 	__u32	mask;
 };

+/* MCTP section */
+
+enum {
+	IFLA_MCTP_UNSPEC,
+	IFLA_MCTP_NET,
+	__IFLA_MCTP_MAX,
+};
+
+#define IFLA_MCTP_MAX (__IFLA_MCTP_MAX - 1)
+
 #endif /* _UAPI_LINUX_IF_LINK_H */
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
--- a/scripts/build-fuzzers.sh
+++ b/scripts/build-fuzzers.sh
@@ -0,0 +1,81 @@
+#!/bin/bash
+set -eux
+
+SANITIZER=${SANITIZER:-address}
+flags="-O1 -fno-omit-frame-pointer -g -DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION -fsanitize=$SANITIZER -fsanitize=fuzzer-no-link"
+
+export CC=${CC:-clang}
+export CFLAGS=${CFLAGS:-$flags}
+
+export CXX=${CXX:-clang++}
+export CXXFLAGS=${CXXFLAGS:-$flags}
+
+cd "$(dirname -- "$0")/.."
+
+export OUT=${OUT:-"$(pwd)/out"}
+mkdir -p "$OUT"
+
+export LIB_FUZZING_ENGINE=${LIB_FUZZING_ENGINE:--fsanitize=fuzzer}
+
+# libelf is compiled with _FORTIFY_SOURCE by default and it
+# isn't compatible with MSan. It was borrowed
+# from https://github.com/google/oss-fuzz/pull/7422
+if [[ "$SANITIZER" == memory ]]; then
+    CFLAGS+=" -U_FORTIFY_SOURCE"
+    CXXFLAGS+=" -U_FORTIFY_SOURCE"
+fi
+
+# The alignment check is turned off by default on OSS-Fuzz/CFLite so it should be
+# turned on explicitly there. It was borrowed from
+# https://github.com/google/oss-fuzz/pull/7092
+if [[ "$SANITIZER" == undefined ]]; then
+    additional_ubsan_checks=alignment
+    UBSAN_FLAGS="-fsanitize=$additional_ubsan_checks -fno-sanitize-recover=$additional_ubsan_checks"
+    CFLAGS+=" $UBSAN_FLAGS"
+    CXXFLAGS+=" $UBSAN_FLAGS"
+fi
+
+# Ideally libbelf should be built using release tarballs available
+# at https://sourceware.org/elfutils/ftp/. Unfortunately sometimes they
+# fail to compile (for example, elfutils-0.185 fails to compile with LDFLAGS enabled
+# due to https://bugs.gentoo.org/794601) so let's just point the script to
+# commits referring to versions of libelf that actually can be built
+rm -rf elfutils
+git clone git://sourceware.org/git/elfutils.git
+(
+cd elfutils
+git checkout 83251d4091241acddbdcf16f814e3bc6ef3df49a
+git log --oneline -1
+
+# ASan isn't compatible with -Wl,--no-undefined: https://github.com/google/sanitizers/issues/380
+find -name Makefile.am | xargs sed -i 's/,--no-undefined//'
+
+# ASan isn't compatible with -Wl,-z,defs either:
+# https://clang.llvm.org/docs/AddressSanitizer.html#usage
+sed -i 's/^\(ZDEFS_LDFLAGS=\).*/\1/' configure.ac
+
+if [[ "$SANITIZER" == undefined ]]; then
+    # That's basicaly what --enable-sanitize-undefined does to turn off unaligned access
+    # elfutils heavily relies on on i386/x86_64 but without changing compiler flags along the way
+    sed -i 's/\(check_undefined_val\)=[0-9]/\1=1/' configure.ac
+fi
+
+autoreconf -i -f
+if ! ./configure --enable-maintainer-mode --disable-debuginfod --disable-libdebuginfod \
+	    CC="$CC" CFLAGS="-Wno-error $CFLAGS" CXX="$CXX" CXXFLAGS="-Wno-error $CXXFLAGS" LDFLAGS="$CFLAGS"; then
+    cat config.log
+    exit 1
+fi
+
+make -C config -j$(nproc) V=1
+make -C lib -j$(nproc) V=1
+make -C libelf -j$(nproc) V=1
+)
+
+make -C src BUILD_STATIC_ONLY=y V=1 clean
+make -C src -j$(nproc) CFLAGS="-I$(pwd)/elfutils/libelf $CFLAGS" BUILD_STATIC_ONLY=y V=1
+
+$CC $CFLAGS -Isrc -Iinclude -Iinclude/uapi -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64  -c fuzz/bpf-object-fuzzer.c -o bpf-object-fuzzer.o
+$CXX $CXXFLAGS $LIB_FUZZING_ENGINE bpf-object-fuzzer.o src/libbpf.a "$(pwd)/elfutils/libelf/libelf.a" -l:libz.a -o "$OUT/bpf-object-fuzzer"
+
+cp fuzz/bpf-object-fuzzer_seed_corpus.zip "$OUT"
--- a/scripts/sync-kernel.sh
+++ b/scripts/sync-kernel.sh
@@ -17,7 +17,7 @@ BPF_BRANCH=${3-""}
 BASELINE_COMMIT=${BPF_NEXT_BASELINE:-$(cat ${LIBBPF_REPO}/CHECKPOINT-COMMIT)}
 BPF_BASELINE_COMMIT=${BPF_BASELINE:-$(cat ${LIBBPF_REPO}/BPF-CHECKPOINT-COMMIT)}

-if [ -z "${LIBBPF_REPO}" ] || [ -z "${LINUX_REPO}" ] || [ -z "${BPF_BRANCH}" ]; then
+if [ -z "${LIBBPF_REPO}" ] || [ -z "${LINUX_REPO}" ]; then
 	echo "Error: libbpf or linux repos are not specified"
 	usage
 fi
@@ -47,6 +47,7 @@ PATH_MAP=(									\
 	[tools/include/uapi/linux/netlink.h]=include/uapi/linux/netlink.h	\
 	[tools/include/uapi/linux/pkt_cls.h]=include/uapi/linux/pkt_cls.h	\
 	[tools/include/uapi/linux/pkt_sched.h]=include/uapi/linux/pkt_sched.h	\
+	[include/uapi/linux/perf_event.h]=include/uapi/linux/perf_event.h	\
 	[Documentation/bpf/libbpf]=docs						\
 )

@@ -74,7 +75,7 @@ commit_desc()
 }

 # Create commit single-line signature, which consists of:
-# - full commit hash
+# - full commit subject
 # - author date in ISO8601 format
 # - full commit body with newlines replaced with vertical bars (|)
 # - shortstat appended at the end
@@ -263,8 +264,11 @@ cd_to ${LIBBPF_REPO}
 git checkout -b ${LIBBPF_SYNC_TAG}

 for patch in $(ls -1 ${TMP_DIR}/patches | tail -n +2); do
-	if ! git am --3way --committer-date-is-author-date "${TMP_DIR}/patches/${patch}"; then
-		read -p "Applying ${TMP_DIR}/patches/${patch} failed, please resolve manually and press <return> to proceed..."
+	if ! git am -3 --committer-date-is-author-date "${TMP_DIR}/patches/${patch}"; then
+		if ! patch -p1 --merge < "${TMP_DIR}/patches/${patch}"; then
+			read -p "Applying ${TMP_DIR}/patches/${patch} failed, please resolve manually and press <return> to proceed..."
+		fi
+		git am --continue
 	fi
 done

--- a/src/Makefile
+++ b/src/Makefile
@@ -5,13 +5,18 @@ ifeq ($(V),1)
 	msg =
 else
 	Q = @
-	msg = @printf '  %-8s %s%s\n' "$(1)" "$(notdir $(2))" "$(if $(3), $(3))";
+	msg = @printf '  %-8s %s%s\n' "$(1)" "$(2)" "$(if $(3), $(3))";
 endif

-LIBBPF_VERSION := $(shell \
-	grep -oE '^LIBBPF_([0-9.]+)' libbpf.map | \
-	sort -rV | head -n1 | cut -d'_' -f2)
-LIBBPF_MAJOR_VERSION := $(firstword $(subst ., ,$(LIBBPF_VERSION)))
+LIBBPF_MAJOR_VERSION := 0
+LIBBPF_MINOR_VERSION := 8
+LIBBPF_PATCH_VERSION := 0
+LIBBPF_VERSION := $(LIBBPF_MAJOR_VERSION).$(LIBBPF_MINOR_VERSION).$(LIBBPF_PATCH_VERSION)
+LIBBPF_MAJMIN_VERSION := $(LIBBPF_MAJOR_VERSION).$(LIBBPF_MINOR_VERSION).0
+LIBBPF_MAP_VERSION := $(shell grep -oE '^LIBBPF_([0-9.]+)' libbpf.map | sort -rV | head -n1 | cut -d'_' -f2)
+ifneq ($(LIBBPF_MAJMIN_VERSION), $(LIBBPF_MAP_VERSION))
+$(error Libbpf release ($(LIBBPF_VERSION)) and map ($(LIBBPF_MAP_VERSION)) versions are out of sync!)
+endif

 TOPDIR = ..

@@ -20,7 +25,7 @@ ALL_CFLAGS := $(INCLUDES)

 SHARED_CFLAGS += -fPIC -fvisibility=hidden -DSHARED

-CFLAGS ?= -g -O2 -Werror -Wall
+CFLAGS ?= -g -O2 -Werror -Wall -std=gnu89
 ALL_CFLAGS += $(CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
 ALL_LDFLAGS += $(LDFLAGS)
 ifdef NO_PKG_CONFIG
@@ -37,7 +42,7 @@ STATIC_OBJDIR := $(OBJDIR)/staticobjs
 OBJS := bpf.o btf.o libbpf.o libbpf_errno.o netlink.o \
 	nlattr.o str_error.o libbpf_probes.o bpf_prog_linfo.o xsk.o \
 	btf_dump.o hashmap.o ringbuf.o strset.o linker.o gen_loader.o \
-	relo_core.o
+	relo_core.o usdt.o
 SHARED_OBJS := $(addprefix $(SHARED_OBJDIR)/,$(OBJS))
 STATIC_OBJS := $(addprefix $(STATIC_OBJDIR)/,$(OBJS))

@@ -51,7 +56,8 @@ endif

 HEADERS := bpf.h libbpf.h btf.h libbpf_common.h libbpf_legacy.h xsk.h \
 	   bpf_helpers.h bpf_helper_defs.h bpf_tracing.h \
-	   bpf_endian.h bpf_core_read.h skel_internal.h libbpf_version.h
+	   bpf_endian.h bpf_core_read.h skel_internal.h libbpf_version.h \
+	   usdt.bpf.h
 UAPI_HEADERS := $(addprefix $(TOPDIR)/include/uapi/linux/,\
 			    bpf.h bpf_common.h btf.h)

@@ -99,7 +105,7 @@ $(OBJDIR)/libbpf.so.$(LIBBPF_VERSION): $(SHARED_OBJS)
 		  -Wl,-soname,libbpf.so.$(LIBBPF_MAJOR_VERSION) \
 		  $^ $(ALL_LDFLAGS) -o $@

-$(OBJDIR)/libbpf.pc:
+$(OBJDIR)/libbpf.pc: force
 	$(Q)sed -e "s|@PREFIX@|$(PREFIX)|" \
 		-e "s|@LIBDIR@|$(LIBDIR_PC)|" \
 		-e "s|@VERSION@|$(LIBBPF_VERSION)|" \
@@ -152,7 +158,7 @@ clean:
 	$(call msg,CLEAN)
 	$(Q)rm -rf *.o *.a *.so *.so.* *.pc $(SHARED_OBJDIR) $(STATIC_OBJDIR)

-.PHONY: cscope tags
+.PHONY: cscope tags force
 cscope:
 	$(call msg,CSCOPE)
 	$(Q)ls *.c *.h > cscope.files
@@ -162,3 +168,5 @@ tags:
 	$(call msg,CTAGS)
 	$(Q)rm -f TAGS tags
 	$(Q)ls *.c *.h | xargs $(TAGS_PROG) -a
+
+force:
--- a/src/bpf.c
+++ b/src/bpf.c
@@ -28,6 +28,10 @@
 #include <asm/unistd.h>
 #include <errno.h>
 #include <linux/bpf.h>
+#include <linux/filter.h>
+#include <linux/kernel.h>
+#include <limits.h>
+#include <sys/resource.h>
 #include "bpf.h"
 #include "libbpf.h"
 #include "libbpf_internal.h"
@@ -49,6 +53,12 @@
 #  define __NR_bpf 351
 # elif defined(__arc__)
 #  define __NR_bpf 280
+# elif defined(__mips__) && defined(_ABIO32)
+#  define __NR_bpf 4355
+# elif defined(__mips__) && defined(_ABIN32)
+#  define __NR_bpf 6319
+# elif defined(__mips__) && defined(_ABI64)
+#  define __NR_bpf 5315
 # else
 #  error __NR_bpf not defined. libbpf does not support your arch.
 # endif
@@ -65,133 +75,217 @@ static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr,
 	return syscall(__NR_bpf, cmd, attr, size);
 }

-static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size)
+static inline int sys_bpf_fd(enum bpf_cmd cmd, union bpf_attr *attr,
+			     unsigned int size)
+{
+	int fd;
+
+	fd = sys_bpf(cmd, attr, size);
+	return ensure_good_fd(fd);
+}
+
+#define PROG_LOAD_ATTEMPTS 5
+
+static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int attempts)
 {
-	int retries = 5;
 	int fd;

 	do {
-		fd = sys_bpf(BPF_PROG_LOAD, attr, size);
-	} while (fd < 0 && errno == EAGAIN && retries-- > 0);
+		fd = sys_bpf_fd(BPF_PROG_LOAD, attr, size);
+	} while (fd < 0 && errno == EAGAIN && --attempts > 0);

 	return fd;
 }

-int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr)
+/* Probe whether kernel switched from memlock-based (RLIMIT_MEMLOCK) to
+ * memcg-based memory accounting for BPF maps and progs. This was done in [0].
+ * We use the support for bpf_ktime_get_coarse_ns() helper, which was added in
+ * the same 5.11 Linux release ([1]), to detect memcg-based accounting for BPF.
+ *
+ *   [0] https://lore.kernel.org/bpf/20201201215900.3569844-1-guro@fb.com/
+ *   [1] d05512618056 ("bpf: Add bpf_ktime_get_coarse_ns helper")
+ */
+int probe_memcg_account(void)
 {
+	const size_t prog_load_attr_sz = offsetofend(union bpf_attr, attach_btf_obj_fd);
+	struct bpf_insn insns[] = {
+		BPF_EMIT_CALL(BPF_FUNC_ktime_get_coarse_ns),
+		BPF_EXIT_INSN(),
+	};
+	size_t insn_cnt = ARRAY_SIZE(insns);
+	union bpf_attr attr;
+	int prog_fd;
+
+	/* attempt loading freplace trying to use custom BTF */
+	memset(&attr, 0, prog_load_attr_sz);
+	attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+	attr.insns = ptr_to_u64(insns);
+	attr.insn_cnt = insn_cnt;
+	attr.license = ptr_to_u64("GPL");
+
+	prog_fd = sys_bpf_fd(BPF_PROG_LOAD, &attr, prog_load_attr_sz);
+	if (prog_fd >= 0) {
+		close(prog_fd);
+		return 1;
+	}
+	return 0;
+}
+
+static bool memlock_bumped;
+static rlim_t memlock_rlim = RLIM_INFINITY;
+
+int libbpf_set_memlock_rlim(size_t memlock_bytes)
+{
+	if (memlock_bumped)
+		return libbpf_err(-EBUSY);
+
+	memlock_rlim = memlock_bytes;
+	return 0;
+}
+
+int bump_rlimit_memlock(void)
+{
+	struct rlimit rlim;
+
+	/* this the default in libbpf 1.0, but for now user has to opt-in explicitly */
+	if (!(libbpf_mode & LIBBPF_STRICT_AUTO_RLIMIT_MEMLOCK))
+		return 0;
+
+	/* if kernel supports memcg-based accounting, skip bumping RLIMIT_MEMLOCK */
+	if (memlock_bumped || kernel_supports(NULL, FEAT_MEMCG_ACCOUNT))
+		return 0;
+
+	memlock_bumped = true;
+
+	/* zero memlock_rlim_max disables auto-bumping RLIMIT_MEMLOCK */
+	if (memlock_rlim == 0)
+		return 0;
+
+	rlim.rlim_cur = rlim.rlim_max = memlock_rlim;
+	if (setrlimit(RLIMIT_MEMLOCK, &rlim))
+		return -errno;
+
+	return 0;
+}
+
+int bpf_map_create(enum bpf_map_type map_type,
+		   const char *map_name,
+		   __u32 key_size,
+		   __u32 value_size,
+		   __u32 max_entries,
+		   const struct bpf_map_create_opts *opts)
+{
+	const size_t attr_sz = offsetofend(union bpf_attr, map_extra);
 	union bpf_attr attr;
 	int fd;

-	memset(&attr, '\0', sizeof(attr));
+	bump_rlimit_memlock();

-	attr.map_type = create_attr->map_type;
-	attr.key_size = create_attr->key_size;
-	attr.value_size = create_attr->value_size;
-	attr.max_entries = create_attr->max_entries;
-	attr.map_flags = create_attr->map_flags;
-	if (create_attr->name)
-		memcpy(attr.map_name, create_attr->name,
-		       min(strlen(create_attr->name), BPF_OBJ_NAME_LEN - 1));
-	attr.numa_node = create_attr->numa_node;
-	attr.btf_fd = create_attr->btf_fd;
-	attr.btf_key_type_id = create_attr->btf_key_type_id;
-	attr.btf_value_type_id = create_attr->btf_value_type_id;
-	attr.map_ifindex = create_attr->map_ifindex;
-	if (attr.map_type == BPF_MAP_TYPE_STRUCT_OPS)
-		attr.btf_vmlinux_value_type_id =
-			create_attr->btf_vmlinux_value_type_id;
-	else
-		attr.inner_map_fd = create_attr->inner_map_fd;
+	memset(&attr, 0, attr_sz);

-	fd = sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+	if (!OPTS_VALID(opts, bpf_map_create_opts))
+		return libbpf_err(-EINVAL);
+
+	attr.map_type = map_type;
+	if (map_name)
+		libbpf_strlcpy(attr.map_name, map_name, sizeof(attr.map_name));
+	attr.key_size = key_size;
+	attr.value_size = value_size;
+	attr.max_entries = max_entries;
+
+	attr.btf_fd = OPTS_GET(opts, btf_fd, 0);
+	attr.btf_key_type_id = OPTS_GET(opts, btf_key_type_id, 0);
+	attr.btf_value_type_id = OPTS_GET(opts, btf_value_type_id, 0);
+	attr.btf_vmlinux_value_type_id = OPTS_GET(opts, btf_vmlinux_value_type_id, 0);
+
+	attr.inner_map_fd = OPTS_GET(opts, inner_map_fd, 0);
+	attr.map_flags = OPTS_GET(opts, map_flags, 0);
+	attr.map_extra = OPTS_GET(opts, map_extra, 0);
+	attr.numa_node = OPTS_GET(opts, numa_node, 0);
+	attr.map_ifindex = OPTS_GET(opts, map_ifindex, 0);
+
+	fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, attr_sz);
 	return libbpf_err_errno(fd);
 }

+int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr)
+{
+	LIBBPF_OPTS(bpf_map_create_opts, p);
+
+	p.map_flags = create_attr->map_flags;
+	p.numa_node = create_attr->numa_node;
+	p.btf_fd = create_attr->btf_fd;
+	p.btf_key_type_id = create_attr->btf_key_type_id;
+	p.btf_value_type_id = create_attr->btf_value_type_id;
+	p.map_ifindex = create_attr->map_ifindex;
+	if (create_attr->map_type == BPF_MAP_TYPE_STRUCT_OPS)
+		p.btf_vmlinux_value_type_id = create_attr->btf_vmlinux_value_type_id;
+	else
+		p.inner_map_fd = create_attr->inner_map_fd;
+
+	return bpf_map_create(create_attr->map_type, create_attr->name,
+			      create_attr->key_size, create_attr->value_size,
+			      create_attr->max_entries, &p);
+}
+
 int bpf_create_map_node(enum bpf_map_type map_type, const char *name,
 			int key_size, int value_size, int max_entries,
 			__u32 map_flags, int node)
 {
-	struct bpf_create_map_attr map_attr = {};
+	LIBBPF_OPTS(bpf_map_create_opts, opts);

-	map_attr.name = name;
-	map_attr.map_type = map_type;
-	map_attr.map_flags = map_flags;
-	map_attr.key_size = key_size;
-	map_attr.value_size = value_size;
-	map_attr.max_entries = max_entries;
+	opts.map_flags = map_flags;
 	if (node >= 0) {
-		map_attr.numa_node = node;
-		map_attr.map_flags |= BPF_F_NUMA_NODE;
+		opts.numa_node = node;
+		opts.map_flags |= BPF_F_NUMA_NODE;
 	}

-	return bpf_create_map_xattr(&map_attr);
+	return bpf_map_create(map_type, name, key_size, value_size, max_entries, &opts);
 }

 int bpf_create_map(enum bpf_map_type map_type, int key_size,
 		   int value_size, int max_entries, __u32 map_flags)
 {
-	struct bpf_create_map_attr map_attr = {};
+	LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = map_flags);

-	map_attr.map_type = map_type;
-	map_attr.map_flags = map_flags;
-	map_attr.key_size = key_size;
-	map_attr.value_size = value_size;
-	map_attr.max_entries = max_entries;
-
-	return bpf_create_map_xattr(&map_attr);
+	return bpf_map_create(map_type, NULL, key_size, value_size, max_entries, &opts);
 }

 int bpf_create_map_name(enum bpf_map_type map_type, const char *name,
 			int key_size, int value_size, int max_entries,
 			__u32 map_flags)
 {
-	struct bpf_create_map_attr map_attr = {};
+	LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = map_flags);

-	map_attr.name = name;
-	map_attr.map_type = map_type;
-	map_attr.map_flags = map_flags;
-	map_attr.key_size = key_size;
-	map_attr.value_size = value_size;
-	map_attr.max_entries = max_entries;
-
-	return bpf_create_map_xattr(&map_attr);
+	return bpf_map_create(map_type, name, key_size, value_size, max_entries, &opts);
 }

 int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name,
 			       int key_size, int inner_map_fd, int max_entries,
 			       __u32 map_flags, int node)
 {
-	union bpf_attr attr;
-	int fd;
-
-	memset(&attr, '\0', sizeof(attr));
-
-	attr.map_type = map_type;
-	attr.key_size = key_size;
-	attr.value_size = 4;
-	attr.inner_map_fd = inner_map_fd;
-	attr.max_entries = max_entries;
-	attr.map_flags = map_flags;
-	if (name)
-		memcpy(attr.map_name, name,
-		       min(strlen(name), BPF_OBJ_NAME_LEN - 1));
+	LIBBPF_OPTS(bpf_map_create_opts, opts);

+	opts.inner_map_fd = inner_map_fd;
+	opts.map_flags = map_flags;
 	if (node >= 0) {
-		attr.map_flags |= BPF_F_NUMA_NODE;
-		attr.numa_node = node;
+		opts.map_flags |= BPF_F_NUMA_NODE;
+		opts.numa_node = node;
 	}

-	fd = sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
-	return libbpf_err_errno(fd);
+	return bpf_map_create(map_type, name, key_size, 4, max_entries, &opts);
 }

 int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name,
 			  int key_size, int inner_map_fd, int max_entries,
 			  __u32 map_flags)
 {
-	return bpf_create_map_in_map_node(map_type, name, key_size,
-					  inner_map_fd, max_entries, map_flags,
-					  -1);
+	LIBBPF_OPTS(bpf_map_create_opts, opts,
+		.inner_map_fd = inner_map_fd,
+		.map_flags = map_flags,
+	);
+
+	return bpf_map_create(map_type, name, key_size, 4, max_entries, &opts);
 }

 static void *
@@ -219,58 +313,95 @@ alloc_zero_tailing_info(const void *orecord, __u32 cnt,
 	return info;
 }

-int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr)
+DEFAULT_VERSION(bpf_prog_load_v0_6_0, bpf_prog_load, LIBBPF_0.6.0)
+int bpf_prog_load_v0_6_0(enum bpf_prog_type prog_type,
+		         const char *prog_name, const char *license,
+		         const struct bpf_insn *insns, size_t insn_cnt,
+		         const struct bpf_prog_load_opts *opts)
 {
 	void *finfo = NULL, *linfo = NULL;
+	const char *func_info, *line_info;
+	__u32 log_size, log_level, attach_prog_fd, attach_btf_obj_fd;
+	__u32 func_info_rec_size, line_info_rec_size;
+	int fd, attempts;
 	union bpf_attr attr;
-	int fd;
+	char *log_buf;

-	if (!load_attr->log_buf != !load_attr->log_buf_sz)
+	bump_rlimit_memlock();
+
+	if (!OPTS_VALID(opts, bpf_prog_load_opts))
 		return libbpf_err(-EINVAL);

-	if (load_attr->log_level > (4 | 2 | 1) || (load_attr->log_level && !load_attr->log_buf))
+	attempts = OPTS_GET(opts, attempts, 0);
+	if (attempts < 0)
 		return libbpf_err(-EINVAL);
+	if (attempts == 0)
+		attempts = PROG_LOAD_ATTEMPTS;

 	memset(&attr, 0, sizeof(attr));
-	attr.prog_type = load_attr->prog_type;
-	attr.expected_attach_type = load_attr->expected_attach_type;

-	if (load_attr->attach_prog_fd)
-		attr.attach_prog_fd = load_attr->attach_prog_fd;
+	attr.prog_type = prog_type;
+	attr.expected_attach_type = OPTS_GET(opts, expected_attach_type, 0);
+
+	attr.prog_btf_fd = OPTS_GET(opts, prog_btf_fd, 0);
+	attr.prog_flags = OPTS_GET(opts, prog_flags, 0);
+	attr.prog_ifindex = OPTS_GET(opts, prog_ifindex, 0);
+	attr.kern_version = OPTS_GET(opts, kern_version, 0);
+
+	if (prog_name)
+		libbpf_strlcpy(attr.prog_name, prog_name, sizeof(attr.prog_name));
+	attr.license = ptr_to_u64(license);
+
+	if (insn_cnt > UINT_MAX)
+		return libbpf_err(-E2BIG);
+
+	attr.insns = ptr_to_u64(insns);
+	attr.insn_cnt = (__u32)insn_cnt;
+
+	attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0);
+	attach_btf_obj_fd = OPTS_GET(opts, attach_btf_obj_fd, 0);
+
+	if (attach_prog_fd && attach_btf_obj_fd)
+		return libbpf_err(-EINVAL);
+
+	attr.attach_btf_id = OPTS_GET(opts, attach_btf_id, 0);
+	if (attach_prog_fd)
+		attr.attach_prog_fd = attach_prog_fd;
 	else
-		attr.attach_btf_obj_fd = load_attr->attach_btf_obj_fd;
-	attr.attach_btf_id = load_attr->attach_btf_id;
+		attr.attach_btf_obj_fd = attach_btf_obj_fd;

-	attr.prog_ifindex = load_attr->prog_ifindex;
-	attr.kern_version = load_attr->kern_version;
+	log_buf = OPTS_GET(opts, log_buf, NULL);
+	log_size = OPTS_GET(opts, log_size, 0);
+	log_level = OPTS_GET(opts, log_level, 0);

-	attr.insn_cnt = (__u32)load_attr->insn_cnt;
-	attr.insns = ptr_to_u64(load_attr->insns);
-	attr.license = ptr_to_u64(load_attr->license);
+	if (!!log_buf != !!log_size)
+		return libbpf_err(-EINVAL);
+	if (log_level > (4 | 2 | 1))
+		return libbpf_err(-EINVAL);
+	if (log_level && !log_buf)
+		return libbpf_err(-EINVAL);

-	attr.log_level = load_attr->log_level;
-	if (attr.log_level) {
-		attr.log_buf = ptr_to_u64(load_attr->log_buf);
-		attr.log_size = load_attr->log_buf_sz;
+	func_info_rec_size = OPTS_GET(opts, func_info_rec_size, 0);
+	func_info = OPTS_GET(opts, func_info, NULL);
+	attr.func_info_rec_size = func_info_rec_size;
+	attr.func_info = ptr_to_u64(func_info);
+	attr.func_info_cnt = OPTS_GET(opts, func_info_cnt, 0);
+
+	line_info_rec_size = OPTS_GET(opts, line_info_rec_size, 0);
+	line_info = OPTS_GET(opts, line_info, NULL);
+	attr.line_info_rec_size = line_info_rec_size;
+	attr.line_info = ptr_to_u64(line_info);
+	attr.line_info_cnt = OPTS_GET(opts, line_info_cnt, 0);
+
+	attr.fd_array = ptr_to_u64(OPTS_GET(opts, fd_array, NULL));
+
+	if (log_level) {
+		attr.log_buf = ptr_to_u64(log_buf);
+		attr.log_size = log_size;
+		attr.log_level = log_level;
 	}

-	attr.prog_btf_fd = load_attr->prog_btf_fd;
-	attr.prog_flags = load_attr->prog_flags;
-
-	attr.func_info_rec_size = load_attr->func_info_rec_size;
-	attr.func_info_cnt = load_attr->func_info_cnt;
-	attr.func_info = ptr_to_u64(load_attr->func_info);
-
-	attr.line_info_rec_size = load_attr->line_info_rec_size;
-	attr.line_info_cnt = load_attr->line_info_cnt;
-	attr.line_info = ptr_to_u64(load_attr->line_info);
-	attr.fd_array = ptr_to_u64(load_attr->fd_array);
-
-	if (load_attr->name)
-		memcpy(attr.prog_name, load_attr->name,
-		       min(strlen(load_attr->name), (size_t)BPF_OBJ_NAME_LEN - 1));
-
-	fd = sys_bpf_prog_load(&attr, sizeof(attr));
+	fd = sys_bpf_prog_load(&attr, sizeof(attr), attempts);
 	if (fd >= 0)
 		return fd;

@@ -280,11 +411,11 @@ int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr)
 	 */
 	while (errno == E2BIG && (!finfo || !linfo)) {
 		if (!finfo && attr.func_info_cnt &&
-		    attr.func_info_rec_size < load_attr->func_info_rec_size) {
+		    attr.func_info_rec_size < func_info_rec_size) {
 			/* try with corrected func info records */
-			finfo = alloc_zero_tailing_info(load_attr->func_info,
-							load_attr->func_info_cnt,
-							load_attr->func_info_rec_size,
+			finfo = alloc_zero_tailing_info(func_info,
+							attr.func_info_cnt,
+							func_info_rec_size,
 							attr.func_info_rec_size);
 			if (!finfo) {
 				errno = E2BIG;
@@ -292,13 +423,12 @@ int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr)
 			}

 			attr.func_info = ptr_to_u64(finfo);
-			attr.func_info_rec_size = load_attr->func_info_rec_size;
+			attr.func_info_rec_size = func_info_rec_size;
 		} else if (!linfo && attr.line_info_cnt &&
-			   attr.line_info_rec_size <
-			   load_attr->line_info_rec_size) {
-			linfo = alloc_zero_tailing_info(load_attr->line_info,
-							load_attr->line_info_cnt,
-							load_attr->line_info_rec_size,
+			   attr.line_info_rec_size < line_info_rec_size) {
+			linfo = alloc_zero_tailing_info(line_info,
+							attr.line_info_cnt,
+							line_info_rec_size,
 							attr.line_info_rec_size);
 			if (!linfo) {
 				errno = E2BIG;
@@ -306,26 +436,27 @@ int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr)
 			}

 			attr.line_info = ptr_to_u64(linfo);
-			attr.line_info_rec_size = load_attr->line_info_rec_size;
+			attr.line_info_rec_size = line_info_rec_size;
 		} else {
 			break;
 		}

-		fd = sys_bpf_prog_load(&attr, sizeof(attr));
+		fd = sys_bpf_prog_load(&attr, sizeof(attr), attempts);
 		if (fd >= 0)
 			goto done;
 	}

-	if (load_attr->log_level || !load_attr->log_buf)
-		goto done;
+	if (log_level == 0 && log_buf) {
+		/* log_level == 0 with non-NULL log_buf requires retrying on error
+		 * with log_level == 1 and log_buf/log_buf_size set, to get details of
+		 * failure
+		 */
+		attr.log_buf = ptr_to_u64(log_buf);
+		attr.log_size = log_size;
+		attr.log_level = 1;

-	/* Try again with log */
-	attr.log_buf = ptr_to_u64(load_attr->log_buf);
-	attr.log_size = load_attr->log_buf_sz;
-	attr.log_level = 1;
-	load_attr->log_buf[0] = 0;
-
-	fd = sys_bpf_prog_load(&attr, sizeof(attr));
+		fd = sys_bpf_prog_load(&attr, sizeof(attr), attempts);
+	}
 done:
 	/* free() doesn't affect errno, so we don't need to restore it */
 	free(finfo);
@@ -333,17 +464,20 @@ done:
 	return libbpf_err_errno(fd);
 }

+__attribute__((alias("bpf_load_program_xattr2")))
 int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
-			   char *log_buf, size_t log_buf_sz)
+			   char *log_buf, size_t log_buf_sz);
+
+static int bpf_load_program_xattr2(const struct bpf_load_program_attr *load_attr,
+				   char *log_buf, size_t log_buf_sz)
 {
-	struct bpf_prog_load_params p = {};
+	LIBBPF_OPTS(bpf_prog_load_opts, p);

 	if (!load_attr || !log_buf != !log_buf_sz)
 		return libbpf_err(-EINVAL);

-	p.prog_type = load_attr->prog_type;
 	p.expected_attach_type = load_attr->expected_attach_type;
-	switch (p.prog_type) {
+	switch (load_attr->prog_type) {
 	case BPF_PROG_TYPE_STRUCT_OPS:
 	case BPF_PROG_TYPE_LSM:
 		p.attach_btf_id = load_attr->attach_btf_id;
@@ -357,12 +491,9 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
 		p.prog_ifindex = load_attr->prog_ifindex;
 		p.kern_version = load_attr->kern_version;
 	}
-	p.insn_cnt = load_attr->insns_cnt;
-	p.insns = load_attr->insns;
-	p.license = load_attr->license;
 	p.log_level = load_attr->log_level;
 	p.log_buf = log_buf;
-	p.log_buf_sz = log_buf_sz;
+	p.log_size = log_buf_sz;
 	p.prog_btf_fd = load_attr->prog_btf_fd;
 	p.func_info_rec_size = load_attr->func_info_rec_size;
 	p.func_info_cnt = load_attr->func_info_cnt;
@@ -370,10 +501,10 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
 	p.line_info_rec_size = load_attr->line_info_rec_size;
 	p.line_info_cnt = load_attr->line_info_cnt;
 	p.line_info = load_attr->line_info;
-	p.name = load_attr->name;
 	p.prog_flags = load_attr->prog_flags;

-	return libbpf__bpf_prog_load(&p);
+	return bpf_prog_load(load_attr->prog_type, load_attr->name, load_attr->license,
+			     load_attr->insns, load_attr->insns_cnt, &p);
 }

 int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
@@ -392,7 +523,7 @@ int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
 	load_attr.license = license;
 	load_attr.kern_version = kern_version;

-	return bpf_load_program_xattr(&load_attr, log_buf, log_buf_sz);
+	return bpf_load_program_xattr2(&load_attr, log_buf, log_buf_sz);
 }

 int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns,
@@ -403,6 +534,8 @@ int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns,
 	union bpf_attr attr;
 	int fd;

+	bump_rlimit_memlock();
+
 	memset(&attr, 0, sizeof(attr));
 	attr.prog_type = type;
 	attr.insn_cnt = (__u32)insns_cnt;
@@ -415,7 +548,7 @@ int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns,
 	attr.kern_version = kern_version;
 	attr.prog_flags = prog_flags;

-	fd = sys_bpf_prog_load(&attr, sizeof(attr));
+	fd = sys_bpf_prog_load(&attr, sizeof(attr), PROG_LOAD_ATTEMPTS);
 	return libbpf_err_errno(fd);
 }

@@ -481,6 +614,7 @@ int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value)
 int bpf_map_lookup_and_delete_elem_flags(int fd, const void *key, void *value, __u64 flags)
 {
 	union bpf_attr attr;
+	int ret;

 	memset(&attr, 0, sizeof(attr));
 	attr.map_fd = fd;
@@ -488,7 +622,8 @@ int bpf_map_lookup_and_delete_elem_flags(int fd, const void *key, void *value, _
 	attr.value = ptr_to_u64(value);
 	attr.flags = flags;

-	return sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, sizeof(attr));
+	ret = sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, sizeof(attr));
+	return libbpf_err_errno(ret);
 }

 int bpf_map_delete_elem(int fd, const void *key)
@@ -504,6 +639,20 @@ int bpf_map_delete_elem(int fd, const void *key)
 	return libbpf_err_errno(ret);
 }

+int bpf_map_delete_elem_flags(int fd, const void *key, __u64 flags)
+{
+	union bpf_attr attr;
+	int ret;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.map_fd = fd;
+	attr.key = ptr_to_u64(key);
+	attr.flags = flags;
+
+	ret = sys_bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr));
+	return libbpf_err_errno(ret);
+}
+
 int bpf_map_get_next_key(int fd, const void *key, void *next_key)
 {
 	union bpf_attr attr;
@@ -557,11 +706,11 @@ static int bpf_map_batch_common(int cmd, int fd, void  *in_batch,
 	return libbpf_err_errno(ret);
 }

-int bpf_map_delete_batch(int fd, void *keys, __u32 *count,
+int bpf_map_delete_batch(int fd, const void *keys, __u32 *count,
 			 const struct bpf_map_batch_opts *opts)
 {
 	return bpf_map_batch_common(BPF_MAP_DELETE_BATCH, fd, NULL,
-				    NULL, keys, NULL, count, opts);
+				    NULL, (void *)keys, NULL, count, opts);
 }

 int bpf_map_lookup_batch(int fd, void *in_batch, void *out_batch, void *keys,
@@ -581,11 +730,11 @@ int bpf_map_lookup_and_delete_batch(int fd, void *in_batch, void *out_batch,
 				    count, opts);
 }

-int bpf_map_update_batch(int fd, void *keys, void *values, __u32 *count,
+int bpf_map_update_batch(int fd, const void *keys, const void *values, __u32 *count,
 			 const struct bpf_map_batch_opts *opts)
 {
 	return bpf_map_batch_common(BPF_MAP_UPDATE_BATCH, fd, NULL, NULL,
-				    keys, values, count, opts);
+				    (void *)keys, (void *)values, count, opts);
 }

 int bpf_obj_pin(int fd, const char *pathname)
@@ -609,7 +758,7 @@ int bpf_obj_get(const char *pathname)
 	memset(&attr, 0, sizeof(attr));
 	attr.pathname = ptr_to_u64((void *)pathname);

-	fd = sys_bpf(BPF_OBJ_GET, &attr, sizeof(attr));
+	fd = sys_bpf_fd(BPF_OBJ_GET, &attr, sizeof(attr));
 	return libbpf_err_errno(fd);
 }

@@ -620,10 +769,10 @@ int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type,
 		.flags = flags,
 	);

-	return bpf_prog_attach_xattr(prog_fd, target_fd, type, &opts);
+	return bpf_prog_attach_opts(prog_fd, target_fd, type, &opts);
 }

-int bpf_prog_attach_xattr(int prog_fd, int target_fd,
+int bpf_prog_attach_opts(int prog_fd, int target_fd,
 			  enum bpf_attach_type type,
 			  const struct bpf_prog_attach_opts *opts)
 {
@@ -644,6 +793,11 @@ int bpf_prog_attach_xattr(int prog_fd, int target_fd,
 	return libbpf_err_errno(ret);
 }

+__attribute__((alias("bpf_prog_attach_opts")))
+int bpf_prog_attach_xattr(int prog_fd, int target_fd,
+			  enum bpf_attach_type type,
+			  const struct bpf_prog_attach_opts *opts);
+
 int bpf_prog_detach(int target_fd, enum bpf_attach_type type)
 {
 	union bpf_attr attr;
@@ -677,7 +831,7 @@ int bpf_link_create(int prog_fd, int target_fd,
 {
 	__u32 target_btf_id, iter_info_len;
 	union bpf_attr attr;
-	int fd;
+	int fd, err;

 	if (!OPTS_VALID(opts, bpf_link_create_opts))
 		return libbpf_err(-EINVAL);
@@ -714,14 +868,61 @@ int bpf_link_create(int prog_fd, int target_fd,
 		if (!OPTS_ZEROED(opts, perf_event))
 			return libbpf_err(-EINVAL);
 		break;
+	case BPF_TRACE_KPROBE_MULTI:
+		attr.link_create.kprobe_multi.flags = OPTS_GET(opts, kprobe_multi.flags, 0);
+		attr.link_create.kprobe_multi.cnt = OPTS_GET(opts, kprobe_multi.cnt, 0);
+		attr.link_create.kprobe_multi.syms = ptr_to_u64(OPTS_GET(opts, kprobe_multi.syms, 0));
+		attr.link_create.kprobe_multi.addrs = ptr_to_u64(OPTS_GET(opts, kprobe_multi.addrs, 0));
+		attr.link_create.kprobe_multi.cookies = ptr_to_u64(OPTS_GET(opts, kprobe_multi.cookies, 0));
+		if (!OPTS_ZEROED(opts, kprobe_multi))
+			return libbpf_err(-EINVAL);
+		break;
+	case BPF_TRACE_FENTRY:
+	case BPF_TRACE_FEXIT:
+	case BPF_MODIFY_RETURN:
+	case BPF_LSM_MAC:
+		attr.link_create.tracing.cookie = OPTS_GET(opts, tracing.cookie, 0);
+		if (!OPTS_ZEROED(opts, tracing))
+			return libbpf_err(-EINVAL);
+		break;
 	default:
 		if (!OPTS_ZEROED(opts, flags))
 			return libbpf_err(-EINVAL);
 		break;
 	}
 proceed:
-	fd = sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr));
-	return libbpf_err_errno(fd);
+	fd = sys_bpf_fd(BPF_LINK_CREATE, &attr, sizeof(attr));
+	if (fd >= 0)
+		return fd;
+	/* we'll get EINVAL if LINK_CREATE doesn't support attaching fentry
+	 * and other similar programs
+	 */
+	err = -errno;
+	if (err != -EINVAL)
+		return libbpf_err(err);
+
+	/* if user used features not supported by
+	 * BPF_RAW_TRACEPOINT_OPEN command, then just give up immediately
+	 */
+	if (attr.link_create.target_fd || attr.link_create.target_btf_id)
+		return libbpf_err(err);
+	if (!OPTS_ZEROED(opts, sz))
+		return libbpf_err(err);
+
+	/* otherwise, for few select kinds of programs that can be
+	 * attached using BPF_RAW_TRACEPOINT_OPEN command, try that as
+	 * a fallback for older kernels
+	 */
+	switch (attach_type) {
+	case BPF_TRACE_RAW_TP:
+	case BPF_LSM_MAC:
+	case BPF_TRACE_FENTRY:
+	case BPF_TRACE_FEXIT:
+	case BPF_MODIFY_RETURN:
+		return bpf_raw_tracepoint_open(NULL, prog_fd);
+	default:
+		return libbpf_err(err);
+	}
 }

 int bpf_link_detach(int link_fd)
@@ -763,7 +964,7 @@ int bpf_iter_create(int link_fd)
 	memset(&attr, 0, sizeof(attr));
 	attr.iter_create.link_fd = link_fd;

-	fd = sys_bpf(BPF_ITER_CREATE, &attr, sizeof(attr));
+	fd = sys_bpf_fd(BPF_ITER_CREATE, &attr, sizeof(attr));
 	return libbpf_err_errno(fd);
 }

@@ -855,6 +1056,7 @@ int bpf_prog_test_run_opts(int prog_fd, struct bpf_test_run_opts *opts)

 	memset(&attr, 0, sizeof(attr));
 	attr.test.prog_fd = prog_fd;
+	attr.test.batch_size = OPTS_GET(opts, batch_size, 0);
 	attr.test.cpu = OPTS_GET(opts, cpu, 0);
 	attr.test.flags = OPTS_GET(opts, flags, 0);
 	attr.test.repeat = OPTS_GET(opts, repeat, 0);
@@ -921,7 +1123,7 @@ int bpf_prog_get_fd_by_id(__u32 id)
 	memset(&attr, 0, sizeof(attr));
 	attr.prog_id = id;

-	fd = sys_bpf(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr));
+	fd = sys_bpf_fd(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr));
 	return libbpf_err_errno(fd);
 }

@@ -933,7 +1135,7 @@ int bpf_map_get_fd_by_id(__u32 id)
 	memset(&attr, 0, sizeof(attr));
 	attr.map_id = id;

-	fd = sys_bpf(BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr));
+	fd = sys_bpf_fd(BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr));
 	return libbpf_err_errno(fd);
 }

@@ -945,7 +1147,7 @@ int bpf_btf_get_fd_by_id(__u32 id)
 	memset(&attr, 0, sizeof(attr));
 	attr.btf_id = id;

-	fd = sys_bpf(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr));
+	fd = sys_bpf_fd(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr));
 	return libbpf_err_errno(fd);
 }

@@ -957,7 +1159,7 @@ int bpf_link_get_fd_by_id(__u32 id)
 	memset(&attr, 0, sizeof(attr));
 	attr.link_id = id;

-	fd = sys_bpf(BPF_LINK_GET_FD_BY_ID, &attr, sizeof(attr));
+	fd = sys_bpf_fd(BPF_LINK_GET_FD_BY_ID, &attr, sizeof(attr));
 	return libbpf_err_errno(fd);
 }

@@ -988,28 +1190,71 @@ int bpf_raw_tracepoint_open(const char *name, int prog_fd)
 	attr.raw_tracepoint.name = ptr_to_u64(name);
 	attr.raw_tracepoint.prog_fd = prog_fd;

-	fd = sys_bpf(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr));
+	fd = sys_bpf_fd(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr));
 	return libbpf_err_errno(fd);
 }

-int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size,
-		 bool do_log)
+int bpf_btf_load(const void *btf_data, size_t btf_size, const struct bpf_btf_load_opts *opts)
 {
-	union bpf_attr attr = {};
+	const size_t attr_sz = offsetofend(union bpf_attr, btf_log_level);
+	union bpf_attr attr;
+	char *log_buf;
+	size_t log_size;
+	__u32 log_level;
 	int fd;

-	attr.btf = ptr_to_u64(btf);
+	bump_rlimit_memlock();
+
+	memset(&attr, 0, attr_sz);
+
+	if (!OPTS_VALID(opts, bpf_btf_load_opts))
+		return libbpf_err(-EINVAL);
+
+	log_buf = OPTS_GET(opts, log_buf, NULL);
+	log_size = OPTS_GET(opts, log_size, 0);
+	log_level = OPTS_GET(opts, log_level, 0);
+
+	if (log_size > UINT_MAX)
+		return libbpf_err(-EINVAL);
+	if (log_size && !log_buf)
+		return libbpf_err(-EINVAL);
+
+	attr.btf = ptr_to_u64(btf_data);
 	attr.btf_size = btf_size;
+	/* log_level == 0 and log_buf != NULL means "try loading without
+	 * log_buf, but retry with log_buf and log_level=1 on error", which is
+	 * consistent across low-level and high-level BTF and program loading
+	 * APIs within libbpf and provides a sensible behavior in practice
+	 */
+	if (log_level) {
+		attr.btf_log_buf = ptr_to_u64(log_buf);
+		attr.btf_log_size = (__u32)log_size;
+		attr.btf_log_level = log_level;
+	}
+
+	fd = sys_bpf_fd(BPF_BTF_LOAD, &attr, attr_sz);
+	if (fd < 0 && log_buf && log_level == 0) {
+		attr.btf_log_buf = ptr_to_u64(log_buf);
+		attr.btf_log_size = (__u32)log_size;
+		attr.btf_log_level = 1;
+		fd = sys_bpf_fd(BPF_BTF_LOAD, &attr, attr_sz);
+	}
+	return libbpf_err_errno(fd);
+}
+
+int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size, bool do_log)
+{
+	LIBBPF_OPTS(bpf_btf_load_opts, opts);
+	int fd;

 retry:
 	if (do_log && log_buf && log_buf_size) {
-		attr.btf_log_level = 1;
-		attr.btf_log_size = log_buf_size;
-		attr.btf_log_buf = ptr_to_u64(log_buf);
+		opts.log_buf = log_buf;
+		opts.log_size = log_buf_size;
+		opts.log_level = 1;
 	}

-	fd = sys_bpf(BPF_BTF_LOAD, &attr, sizeof(attr));
-
+	fd = bpf_btf_load(btf, btf_size, &opts);
 	if (fd < 0 && !do_log && log_buf && log_buf_size) {
 		do_log = true;
 		goto retry;
@@ -1050,7 +1295,7 @@ int bpf_enable_stats(enum bpf_stats_type type)
 	memset(&attr, 0, sizeof(attr));
 	attr.enable_stats.type = type;

-	fd = sys_bpf(BPF_ENABLE_STATS, &attr, sizeof(attr));
+	fd = sys_bpf_fd(BPF_ENABLE_STATS, &attr, sizeof(attr));
 	return libbpf_err_errno(fd);
 }

--- a/src/bpf.h
+++ b/src/bpf.h
@@ -29,11 +29,38 @@
 #include <stdint.h>

 #include "libbpf_common.h"
+#include "libbpf_legacy.h"

 #ifdef __cplusplus
 extern "C" {
 #endif

+int libbpf_set_memlock_rlim(size_t memlock_bytes);
+
+struct bpf_map_create_opts {
+	size_t sz; /* size of this struct for forward/backward compatibility */
+
+	__u32 btf_fd;
+	__u32 btf_key_type_id;
+	__u32 btf_value_type_id;
+	__u32 btf_vmlinux_value_type_id;
+
+	__u32 inner_map_fd;
+	__u32 map_flags;
+	__u64 map_extra;
+
+	__u32 numa_node;
+	__u32 map_ifindex;
+};
+#define bpf_map_create_opts__last_field map_ifindex
+
+LIBBPF_API int bpf_map_create(enum bpf_map_type map_type,
+			      const char *map_name,
+			      __u32 key_size,
+			      __u32 value_size,
+			      __u32 max_entries,
+			      const struct bpf_map_create_opts *opts);
+
 struct bpf_create_map_attr {
 	const char *name;
 	enum bpf_map_type map_type;
@@ -52,25 +79,95 @@ struct bpf_create_map_attr {
 	};
 };

-LIBBPF_API int
-bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr);
+LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead")
+LIBBPF_API int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr);
+LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead")
 LIBBPF_API int bpf_create_map_node(enum bpf_map_type map_type, const char *name,
 				   int key_size, int value_size,
 				   int max_entries, __u32 map_flags, int node);
+LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead")
 LIBBPF_API int bpf_create_map_name(enum bpf_map_type map_type, const char *name,
 				   int key_size, int value_size,
 				   int max_entries, __u32 map_flags);
+LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead")
 LIBBPF_API int bpf_create_map(enum bpf_map_type map_type, int key_size,
 			      int value_size, int max_entries, __u32 map_flags);
+LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead")
 LIBBPF_API int bpf_create_map_in_map_node(enum bpf_map_type map_type,
 					  const char *name, int key_size,
 					  int inner_map_fd, int max_entries,
 					  __u32 map_flags, int node);
+LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead")
 LIBBPF_API int bpf_create_map_in_map(enum bpf_map_type map_type,
 				     const char *name, int key_size,
 				     int inner_map_fd, int max_entries,
 				     __u32 map_flags);

+struct bpf_prog_load_opts {
+	size_t sz; /* size of this struct for forward/backward compatibility */
+
+	/* libbpf can retry BPF_PROG_LOAD command if bpf() syscall returns
+	 * -EAGAIN. This field determines how many attempts libbpf has to
+	 *  make. If not specified, libbpf will use default value of 5.
+	 */
+	int attempts;
+
+	enum bpf_attach_type expected_attach_type;
+	__u32 prog_btf_fd;
+	__u32 prog_flags;
+	__u32 prog_ifindex;
+	__u32 kern_version;
+
+	__u32 attach_btf_id;
+	__u32 attach_prog_fd;
+	__u32 attach_btf_obj_fd;
+
+	const int *fd_array;
+
+	/* .BTF.ext func info data */
+	const void *func_info;
+	__u32 func_info_cnt;
+	__u32 func_info_rec_size;
+
+	/* .BTF.ext line info data */
+	const void *line_info;
+	__u32 line_info_cnt;
+	__u32 line_info_rec_size;
+
+	/* verifier log options */
+	__u32 log_level;
+	__u32 log_size;
+	char *log_buf;
+};
+#define bpf_prog_load_opts__last_field log_buf
+
+LIBBPF_API int bpf_prog_load(enum bpf_prog_type prog_type,
+			     const char *prog_name, const char *license,
+			     const struct bpf_insn *insns, size_t insn_cnt,
+			     const struct bpf_prog_load_opts *opts);
+/* this "specialization" should go away in libbpf 1.0 */
+LIBBPF_API int bpf_prog_load_v0_6_0(enum bpf_prog_type prog_type,
+				    const char *prog_name, const char *license,
+				    const struct bpf_insn *insns, size_t insn_cnt,
+				    const struct bpf_prog_load_opts *opts);
+
+/* This is an elaborate way to not conflict with deprecated bpf_prog_load()
+ * API, defined in libbpf.h. Once we hit libbpf 1.0, all this will be gone.
+ * With this approach, if someone is calling bpf_prog_load() with
+ * 4 arguments, they will use the deprecated API, which keeps backwards
+ * compatibility (both source code and binary). If bpf_prog_load() is called
+ * with 6 arguments, though, it gets redirected to __bpf_prog_load.
+ * So looking forward to libbpf 1.0 when this hack will be gone and
+ * __bpf_prog_load() will be called just bpf_prog_load().
+ */
+#ifndef bpf_prog_load
+#define bpf_prog_load(...) ___libbpf_overload(___bpf_prog_load, __VA_ARGS__)
+#define ___bpf_prog_load4(file, type, pobj, prog_fd) \
+	bpf_prog_load_deprecated(file, type, pobj, prog_fd)
+#define ___bpf_prog_load6(prog_type, prog_name, license, insns, insn_cnt, opts) \
+	bpf_prog_load(prog_type, prog_name, license, insns, insn_cnt, opts)
+#endif /* bpf_prog_load */
+
 struct bpf_load_program_attr {
 	enum bpf_prog_type prog_type;
 	enum bpf_attach_type expected_attach_type;
@@ -100,15 +197,18 @@ struct bpf_load_program_attr {
 /* Flags to direct loading requirements */
 #define MAPS_RELAX_COMPAT	0x01

-/* Recommend log buffer size */
+/* Recommended log buffer size */
 #define BPF_LOG_BUF_SIZE (UINT32_MAX >> 8) /* verifier maximum in kernels <= 5.1 */
-LIBBPF_API int
-bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
-		       char *log_buf, size_t log_buf_sz);
+
+LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_load() instead")
+LIBBPF_API int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
+				      char *log_buf, size_t log_buf_sz);
+LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_load() instead")
 LIBBPF_API int bpf_load_program(enum bpf_prog_type type,
 				const struct bpf_insn *insns, size_t insns_cnt,
 				const char *license, __u32 kern_version,
 				char *log_buf, size_t log_buf_sz);
+LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_load() instead")
 LIBBPF_API int bpf_verify_program(enum bpf_prog_type type,
 				  const struct bpf_insn *insns,
 				  size_t insns_cnt, __u32 prog_flags,
@@ -116,6 +216,23 @@ LIBBPF_API int bpf_verify_program(enum bpf_prog_type type,
 				  char *log_buf, size_t log_buf_sz,
 				  int log_level);

+struct bpf_btf_load_opts {
+	size_t sz; /* size of this struct for forward/backward compatibility */
+
+	/* kernel log options */
+	char *log_buf;
+	__u32 log_level;
+	__u32 log_size;
+};
+#define bpf_btf_load_opts__last_field log_size
+
+LIBBPF_API int bpf_btf_load(const void *btf_data, size_t btf_size,
+			    const struct bpf_btf_load_opts *opts);
+
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_btf_load() instead")
+LIBBPF_API int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf,
+			    __u32 log_buf_size, bool do_log);
+
 LIBBPF_API int bpf_map_update_elem(int fd, const void *key, const void *value,
 				   __u64 flags);

@@ -127,6 +244,7 @@ LIBBPF_API int bpf_map_lookup_and_delete_elem(int fd, const void *key,
 LIBBPF_API int bpf_map_lookup_and_delete_elem_flags(int fd, const void *key,
 						    void *value, __u64 flags);
 LIBBPF_API int bpf_map_delete_elem(int fd, const void *key);
+LIBBPF_API int bpf_map_delete_elem_flags(int fd, const void *key, __u64 flags);
 LIBBPF_API int bpf_map_get_next_key(int fd, const void *key, void *next_key);
 LIBBPF_API int bpf_map_freeze(int fd);

@@ -137,17 +255,128 @@ struct bpf_map_batch_opts {
 };
 #define bpf_map_batch_opts__last_field flags

-LIBBPF_API int bpf_map_delete_batch(int fd, void *keys,
+
+/**
+ * @brief **bpf_map_delete_batch()** allows for batch deletion of multiple
+ * elements in a BPF map.
+ *
+ * @param fd BPF map file descriptor
+ * @param keys pointer to an array of *count* keys
+ * @param count input and output parameter; on input **count** represents the
+ * number of  elements in the map to delete in batch;
+ * on output if a non-EFAULT error is returned, **count** represents the number of deleted
+ * elements if the output **count** value is not equal to the input **count** value
+ * If EFAULT is returned, **count** should not be trusted to be correct.
+ * @param opts options for configuring the way the batch deletion works
+ * @return 0, on success; negative error code, otherwise (errno is also set to
+ * the error code)
+ */
+LIBBPF_API int bpf_map_delete_batch(int fd, const void *keys,
 				    __u32 *count,
 				    const struct bpf_map_batch_opts *opts);
+
+/**
+ * @brief **bpf_map_lookup_batch()** allows for batch lookup of BPF map elements.
+ *
+ * The parameter *in_batch* is the address of the first element in the batch to read.
+ * *out_batch* is an output parameter that should be passed as *in_batch* to subsequent
+ * calls to **bpf_map_lookup_batch()**. NULL can be passed for *in_batch* to indicate
+ * that the batched lookup starts from the beginning of the map.
+ *
+ * The *keys* and *values* are output parameters which must point to memory large enough to
+ * hold *count* items based on the key and value size of the map *map_fd*. The *keys*
+ * buffer must be of *key_size* * *count*. The *values* buffer must be of
+ * *value_size* * *count*.
+ *
+ * @param fd BPF map file descriptor
+ * @param in_batch address of the first element in batch to read, can pass NULL to
+ * indicate that the batched lookup starts from the beginning of the map.
+ * @param out_batch output parameter that should be passed to next call as *in_batch*
+ * @param keys pointer to an array large enough for *count* keys
+ * @param values pointer to an array large enough for *count* values
+ * @param count input and output parameter; on input it's the number of elements
+ * in the map to read in batch; on output it's the number of elements that were
+ * successfully read.
+ * If a non-EFAULT error is returned, count will be set as the number of elements
+ * that were read before the error occurred.
+ * If EFAULT is returned, **count** should not be trusted to be correct.
+ * @param opts options for configuring the way the batch lookup works
+ * @return 0, on success; negative error code, otherwise (errno is also set to
+ * the error code)
+ */
 LIBBPF_API int bpf_map_lookup_batch(int fd, void *in_batch, void *out_batch,
 				    void *keys, void *values, __u32 *count,
 				    const struct bpf_map_batch_opts *opts);
+
+/**
+ * @brief **bpf_map_lookup_and_delete_batch()** allows for batch lookup and deletion
+ * of BPF map elements where each element is deleted after being retrieved.
+ *
+ * @param fd BPF map file descriptor
+ * @param in_batch address of the first element in batch to read, can pass NULL to
+ * get address of the first element in *out_batch*
+ * @param out_batch output parameter that should be passed to next call as *in_batch*
+ * @param keys pointer to an array of *count* keys
+ * @param values pointer to an array large enough for *count* values
+ * @param count input and output parameter; on input it's the number of elements
+ * in the map to read and delete in batch; on output it represents the number of
+ * elements that were successfully read and deleted
+ * If a non-**EFAULT** error code is returned and if the output **count** value
+ * is not equal to the input **count** value, up to **count** elements may
+ * have been deleted.
+ * if **EFAULT** is returned up to *count* elements may have been deleted without
+ * being returned via the *keys* and *values* output parameters.
+ * @param opts options for configuring the way the batch lookup and delete works
+ * @return 0, on success; negative error code, otherwise (errno is also set to
+ * the error code)
+ */
 LIBBPF_API int bpf_map_lookup_and_delete_batch(int fd, void *in_batch,
 					void *out_batch, void *keys,
 					void *values, __u32 *count,
 					const struct bpf_map_batch_opts *opts);
-LIBBPF_API int bpf_map_update_batch(int fd, void *keys, void *values,
+
+/**
+ * @brief **bpf_map_update_batch()** updates multiple elements in a map
+ * by specifying keys and their corresponding values.
+ *
+ * The *keys* and *values* parameters must point to memory large enough
+ * to hold *count* items based on the key and value size of the map.
+ *
+ * The *opts* parameter can be used to control how *bpf_map_update_batch()*
+ * should handle keys that either do or do not already exist in the map.
+ * In particular the *flags* parameter of *bpf_map_batch_opts* can be
+ * one of the following:
+ *
+ * Note that *count* is an input and output parameter, where on output it
+ * represents how many elements were successfully updated. Also note that if
+ * **EFAULT** then *count* should not be trusted to be correct.
+ *
+ * **BPF_ANY**
+ *    Create new elements or update existing.
+ *
+ * **BPF_NOEXIST**
+ *    Create new elements only if they do not exist.
+ *
+ * **BPF_EXIST**
+ *    Update existing elements.
+ *
+ * **BPF_F_LOCK**
+ *    Update spin_lock-ed map elements. This must be
+ *    specified if the map value contains a spinlock.
+ *
+ * @param fd BPF map file descriptor
+ * @param keys pointer to an array of *count* keys
+ * @param values pointer to an array of *count* values
+ * @param count input and output parameter; on input it's the number of elements
+ * in the map to update in batch; on output if a non-EFAULT error is returned,
+ * **count** represents the number of updated elements if the output **count**
+ * value is not equal to the input **count** value.
+ * If EFAULT is returned, **count** should not be trusted to be correct.
+ * @param opts options for configuring the way the batch update works
+ * @return 0, on success; negative error code, otherwise (errno is also set to
+ * the error code)
+ */
+LIBBPF_API int bpf_map_update_batch(int fd, const void *keys, const void *values,
 				    __u32 *count,
 				    const struct bpf_map_batch_opts *opts);

@@ -163,6 +392,10 @@ struct bpf_prog_attach_opts {

 LIBBPF_API int bpf_prog_attach(int prog_fd, int attachable_fd,
 			       enum bpf_attach_type type, unsigned int flags);
+LIBBPF_API int bpf_prog_attach_opts(int prog_fd, int attachable_fd,
+				     enum bpf_attach_type type,
+				     const struct bpf_prog_attach_opts *opts);
+LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_prog_attach_opts() instead")
 LIBBPF_API int bpf_prog_attach_xattr(int prog_fd, int attachable_fd,
 				     enum bpf_attach_type type,
 				     const struct bpf_prog_attach_opts *opts);
@@ -181,10 +414,20 @@ struct bpf_link_create_opts {
 		struct {
 			__u64 bpf_cookie;
 		} perf_event;
+		struct {
+			__u32 flags;
+			__u32 cnt;
+			const char **syms;
+			const unsigned long *addrs;
+			const __u64 *cookies;
+		} kprobe_multi;
+		struct {
+			__u64 cookie;
+		} tracing;
 	};
 	size_t :0;
 };
-#define bpf_link_create_opts__last_field perf_event
+#define bpf_link_create_opts__last_field kprobe_multi.cookies

 LIBBPF_API int bpf_link_create(int prog_fd, int target_fd,
 			       enum bpf_attach_type attach_type,
@@ -221,12 +464,14 @@ struct bpf_prog_test_run_attr {
 			     * out: length of cxt_out */
 };

+LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_test_run_opts() instead")
 LIBBPF_API int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr);

 /*
 * bpf_prog_test_run does not check that data_out is large enough. Consider
- * using bpf_prog_test_run_xattr instead.
+ * using bpf_prog_test_run_opts instead.
 */
+LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_test_run_opts() instead")
 LIBBPF_API int bpf_prog_test_run(int prog_fd, int repeat, void *data,
 				 __u32 size, void *data_out, __u32 *size_out,
 				 __u32 *retval, __u32 *duration);
@@ -243,8 +488,6 @@ LIBBPF_API int bpf_prog_query(int target_fd, enum bpf_attach_type type,
 			      __u32 query_flags, __u32 *attach_flags,
 			      __u32 *prog_ids, __u32 *prog_cnt);
 LIBBPF_API int bpf_raw_tracepoint_open(const char *name, int prog_fd);
-LIBBPF_API int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf,
-			    __u32 log_buf_size, bool do_log);
 LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf,
 				 __u32 *buf_len, __u32 *prog_id, __u32 *fd_type,
 				 __u64 *probe_offset, __u64 *probe_addr);
@@ -280,8 +523,9 @@ struct bpf_test_run_opts {
 	__u32 duration;      /* out: average per repetition in ns */
 	__u32 flags;
 	__u32 cpu;
+	__u32 batch_size;
 };
-#define bpf_test_run_opts__last_field cpu
+#define bpf_test_run_opts__last_field batch_size

 LIBBPF_API int bpf_prog_test_run_opts(int prog_fd,
 				      struct bpf_test_run_opts *opts);
--- a/src/bpf_core_read.h
+++ b/src/bpf_core_read.h
@@ -40,7 +40,7 @@ enum bpf_enum_value_kind {
 #define __CORE_RELO(src, field, info)					      \
 	__builtin_preserve_field_info((src)->field, BPF_FIELD_##info)

-#if __BYTE_ORDER == __LITTLE_ENDIAN
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
 #define __CORE_BITFIELD_PROBE_READ(dst, src, fld)			      \
 	bpf_probe_read_kernel(						      \
 			(void *)dst,				      \
@@ -110,21 +110,50 @@ enum bpf_enum_value_kind {
 	val;								      \
 })

+#define ___bpf_field_ref1(field)	(field)
+#define ___bpf_field_ref2(type, field)	(((typeof(type) *)0)->field)
+#define ___bpf_field_ref(args...)					    \
+	___bpf_apply(___bpf_field_ref, ___bpf_narg(args))(args)
+
 /*
 * Convenience macro to check that field actually exists in target kernel's.
 * Returns:
 *    1, if matching field is present in target kernel;
 *    0, if no matching field found.
+ *
+ * Supports two forms:
+ *   - field reference through variable access:
+ *     bpf_core_field_exists(p->my_field);
+ *   - field reference through type and field names:
+ *     bpf_core_field_exists(struct my_type, my_field).
 */
-#define bpf_core_field_exists(field)					    \
-	__builtin_preserve_field_info(field, BPF_FIELD_EXISTS)
+#define bpf_core_field_exists(field...)					    \
+	__builtin_preserve_field_info(___bpf_field_ref(field), BPF_FIELD_EXISTS)

 /*
 * Convenience macro to get the byte size of a field. Works for integers,
 * struct/unions, pointers, arrays, and enums.
+ *
+ * Supports two forms:
+ *   - field reference through variable access:
+ *     bpf_core_field_size(p->my_field);
+ *   - field reference through type and field names:
+ *     bpf_core_field_size(struct my_type, my_field).
 */
-#define bpf_core_field_size(field)					    \
-	__builtin_preserve_field_info(field, BPF_FIELD_BYTE_SIZE)
+#define bpf_core_field_size(field...)					    \
+	__builtin_preserve_field_info(___bpf_field_ref(field), BPF_FIELD_BYTE_SIZE)
+
+/*
+ * Convenience macro to get field's byte offset.
+ *
+ * Supports two forms:
+ *   - field reference through variable access:
+ *     bpf_core_field_offset(p->my_field);
+ *   - field reference through type and field names:
+ *     bpf_core_field_offset(struct my_type, my_field).
+ */
+#define bpf_core_field_offset(field...)					    \
+	__builtin_preserve_field_info(___bpf_field_ref(field), BPF_FIELD_BYTE_OFFSET)

 /*
 * Convenience macro to get BTF type ID of a specified type, using a local BTF
--- a/src/bpf_gen_internal.h
+++ b/src/bpf_gen_internal.h
@@ -3,18 +3,26 @@
 #ifndef __BPF_GEN_INTERNAL_H
 #define __BPF_GEN_INTERNAL_H

+#include "bpf.h"
+
 struct ksym_relo_desc {
 	const char *name;
 	int kind;
 	int insn_idx;
 	bool is_weak;
+	bool is_typeless;
 };

 struct ksym_desc {
 	const char *name;
 	int ref;
 	int kind;
-	int off;
+	union {
+		/* used for kfunc */
+		int off;
+		/* used for typeless ksym */
+		bool typeless;
+	};
 	int insn;
 };

@@ -31,6 +39,8 @@ struct bpf_gen {
 	int error;
 	struct ksym_relo_desc *relos;
 	int relo_cnt;
+	struct bpf_core_relo *core_relos;
+	int core_relo_cnt;
 	char attach_target[128];
 	int attach_kind;
 	struct ksym_desc *ksyms;
@@ -39,17 +49,24 @@ struct bpf_gen {
 	int nr_fd_array;
 };

-void bpf_gen__init(struct bpf_gen *gen, int log_level);
-int bpf_gen__finish(struct bpf_gen *gen);
+void bpf_gen__init(struct bpf_gen *gen, int log_level, int nr_progs, int nr_maps);
+int bpf_gen__finish(struct bpf_gen *gen, int nr_progs, int nr_maps);
 void bpf_gen__free(struct bpf_gen *gen);
 void bpf_gen__load_btf(struct bpf_gen *gen, const void *raw_data, __u32 raw_size);
-void bpf_gen__map_create(struct bpf_gen *gen, struct bpf_create_map_attr *map_attr, int map_idx);
-struct bpf_prog_load_params;
-void bpf_gen__prog_load(struct bpf_gen *gen, struct bpf_prog_load_params *load_attr, int prog_idx);
+void bpf_gen__map_create(struct bpf_gen *gen,
+			 enum bpf_map_type map_type, const char *map_name,
+			 __u32 key_size, __u32 value_size, __u32 max_entries,
+			 struct bpf_map_create_opts *map_attr, int map_idx);
+void bpf_gen__prog_load(struct bpf_gen *gen,
+			enum bpf_prog_type prog_type, const char *prog_name,
+			const char *license, struct bpf_insn *insns, size_t insn_cnt,
+			struct bpf_prog_load_opts *load_attr, int prog_idx);
 void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *value, __u32 value_size);
 void bpf_gen__map_freeze(struct bpf_gen *gen, int map_idx);
 void bpf_gen__record_attach_target(struct bpf_gen *gen, const char *name, enum bpf_attach_type type);
-void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, bool is_weak, int kind,
-			    int insn_idx);
+void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, bool is_weak,
+			    bool is_typeless, int kind, int insn_idx);
+void bpf_gen__record_relo_core(struct bpf_gen *gen, const struct bpf_core_relo *core_relo);
+void bpf_gen__populate_outer_map(struct bpf_gen *gen, int outer_map_idx, int key, int inner_map_idx);

 #endif
--- a/src/bpf_helper_defs.h
+++ b/src/bpf_helper_defs.h
@@ -27,6 +27,7 @@ struct tcp_sock;
 struct tcp_timewait_sock;
 struct tcp_request_sock;
 struct udp6_sock;
+struct unix_sock;
 struct task_struct;
 struct __sk_buff;
 struct sk_msg_md;
@@ -313,7 +314,7 @@ static long (*bpf_l4_csum_replace)(struct __sk_buff *skb, __u32 offset, __u64 fr
 * 	if the maximum number of tail calls has been reached for this
 * 	chain of programs. This limit is defined in the kernel by the
 * 	macro **MAX_TAIL_CALL_CNT** (not accessible to user space),
- * 	which is currently set to 32.
+ * 	which is currently set to 33.
 *
 * Returns
 * 	0 on success, or a negative error in case of failure.
@@ -351,6 +352,7 @@ static long (*bpf_clone_redirect)(struct __sk_buff *skb, __u32 ifindex, __u64 fl
 /*
 * bpf_get_current_pid_tgid
 *
+ * 	Get the current pid and tgid.
 *
 * Returns
 * 	A 64-bit integer containing the current tgid and pid, and
@@ -363,6 +365,7 @@ static __u64 (*bpf_get_current_pid_tgid)(void) = (void *) 14;
 /*
 * bpf_get_current_uid_gid
 *
+ * 	Get the current uid and gid.
 *
 * Returns
 * 	A 64-bit integer containing the current GID and UID, and
@@ -918,6 +921,7 @@ static __u32 (*bpf_get_hash_recalc)(struct __sk_buff *skb) = (void *) 34;
 /*
 * bpf_get_current_task
 *
+ * 	Get the current task.
 *
 * Returns
 * 	A pointer to the current task struct.
@@ -957,8 +961,8 @@ static long (*bpf_probe_write_user)(void *dst, const void *src, __u32 len) = (vo
 * Returns
 * 	The return value depends on the result of the test, and can be:
 *
- * 	* 0, if current task belongs to the cgroup2.
- * 	* 1, if current task does not belong to the cgroup2.
+ * 	* 1, if current task belongs to the cgroup2.
+ * 	* 0, if current task does not belong to the cgroup2.
 * 	* A negative error code, if an error occurred.
 */
 static long (*bpf_current_task_under_cgroup)(void *map, __u32 index) = (void *) 37;
@@ -1056,6 +1060,8 @@ static __s64 (*bpf_csum_update)(struct __sk_buff *skb, __wsum csum) = (void *) 4
 * 	recalculation the next time the kernel tries to access this
 * 	hash or when the **bpf_get_hash_recalc**\ () helper is called.
 *
+ * Returns
+ * 	void.
 */
 static void (*bpf_set_hash_invalid)(struct __sk_buff *skb) = (void *) 41;

@@ -1155,6 +1161,7 @@ static __u64 (*bpf_get_socket_cookie)(void *ctx) = (void *) 46;
 /*
 * bpf_get_socket_uid
 *
+ * 	Get the owner UID of the socked associated to *skb*.
 *
 * Returns
 * 	The owner UID of the socket associated to *skb*. If the socket
@@ -1745,8 +1752,8 @@ static long (*bpf_skb_get_xfrm_state)(struct __sk_buff *skb, __u32 index, struct
 * 		# sysctl kernel.perf_event_max_stack=<new value>
 *
 * Returns
- * 	A non-negative value equal to or less than *size* on success,
- * 	or a negative error in case of failure.
+ * 	The non-negative copied *buf* length equal to or less than
+ * 	*size* on success, or a negative error in case of failure.
 */
 static long (*bpf_get_stack)(void *ctx, void *buf, __u32 size, __u64 flags) = (void *) 67;

@@ -2062,6 +2069,8 @@ static __u64 (*bpf_skb_cgroup_id)(struct __sk_buff *skb) = (void *) 79;
 /*
 * bpf_get_current_cgroup_id
 *
+ * 	Get the current cgroup id based on the cgroup within which
+ * 	the current task is running.
 *
 * Returns
 * 	A 64-bit integer containing the current cgroup id based
@@ -3296,8 +3305,8 @@ static struct udp6_sock *(*bpf_skc_to_udp6_sock)(void *sk) = (void *) 140;
 * 		# sysctl kernel.perf_event_max_stack=<new value>
 *
 * Returns
- * 	A non-negative value equal to or less than *size* on success,
- * 	or a negative error in case of failure.
+ * 	The non-negative copied *buf* length equal to or less than
+ * 	*size* on success, or a negative error in case of failure.
 */
 static long (*bpf_get_task_stack)(struct task_struct *task, void *buf, __u32 size, __u64 flags) = (void *) 141;

@@ -4083,4 +4092,282 @@ static long (*bpf_get_branch_snapshot)(void *entries, __u32 size, __u64 flags) =
 */
 static long (*bpf_trace_vprintk)(const char *fmt, __u32 fmt_size, const void *data, __u32 data_len) = (void *) 177;

+/*
+ * bpf_skc_to_unix_sock
+ *
+ * 	Dynamically cast a *sk* pointer to a *unix_sock* pointer.
+ *
+ * Returns
+ * 	*sk* if casting is valid, or **NULL** otherwise.
+ */
+static struct unix_sock *(*bpf_skc_to_unix_sock)(void *sk) = (void *) 178;
+
+/*
+ * bpf_kallsyms_lookup_name
+ *
+ * 	Get the address of a kernel symbol, returned in *res*. *res* is
+ * 	set to 0 if the symbol is not found.
+ *
+ * Returns
+ * 	On success, zero. On error, a negative value.
+ *
+ * 	**-EINVAL** if *flags* is not zero.
+ *
+ * 	**-EINVAL** if string *name* is not the same size as *name_sz*.
+ *
+ * 	**-ENOENT** if symbol is not found.
+ *
+ * 	**-EPERM** if caller does not have permission to obtain kernel address.
+ */
+static long (*bpf_kallsyms_lookup_name)(const char *name, int name_sz, int flags, __u64 *res) = (void *) 179;
+
+/*
+ * bpf_find_vma
+ *
+ * 	Find vma of *task* that contains *addr*, call *callback_fn*
+ * 	function with *task*, *vma*, and *callback_ctx*.
+ * 	The *callback_fn* should be a static function and
+ * 	the *callback_ctx* should be a pointer to the stack.
+ * 	The *flags* is used to control certain aspects of the helper.
+ * 	Currently, the *flags* must be 0.
+ *
+ * 	The expected callback signature is
+ *
+ * 	long (\*callback_fn)(struct task_struct \*task, struct vm_area_struct \*vma, void \*callback_ctx);
+ *
+ *
+ * Returns
+ * 	0 on success.
+ * 	**-ENOENT** if *task->mm* is NULL, or no vma contains *addr*.
+ * 	**-EBUSY** if failed to try lock mmap_lock.
+ * 	**-EINVAL** for invalid **flags**.
+ */
+static long (*bpf_find_vma)(struct task_struct *task, __u64 addr, void *callback_fn, void *callback_ctx, __u64 flags) = (void *) 180;
+
+/*
+ * bpf_loop
+ *
+ * 	For **nr_loops**, call **callback_fn** function
+ * 	with **callback_ctx** as the context parameter.
+ * 	The **callback_fn** should be a static function and
+ * 	the **callback_ctx** should be a pointer to the stack.
+ * 	The **flags** is used to control certain aspects of the helper.
+ * 	Currently, the **flags** must be 0. Currently, nr_loops is
+ * 	limited to 1 << 23 (~8 million) loops.
+ *
+ * 	long (\*callback_fn)(u32 index, void \*ctx);
+ *
+ * 	where **index** is the current index in the loop. The index
+ * 	is zero-indexed.
+ *
+ * 	If **callback_fn** returns 0, the helper will continue to the next
+ * 	loop. If return value is 1, the helper will skip the rest of
+ * 	the loops and return. Other return values are not used now,
+ * 	and will be rejected by the verifier.
+ *
+ *
+ * Returns
+ * 	The number of loops performed, **-EINVAL** for invalid **flags**,
+ * 	**-E2BIG** if **nr_loops** exceeds the maximum number of loops.
+ */
+static long (*bpf_loop)(__u32 nr_loops, void *callback_fn, void *callback_ctx, __u64 flags) = (void *) 181;
+
+/*
+ * bpf_strncmp
+ *
+ * 	Do strncmp() between **s1** and **s2**. **s1** doesn't need
+ * 	to be null-terminated and **s1_sz** is the maximum storage
+ * 	size of **s1**. **s2** must be a read-only string.
+ *
+ * Returns
+ * 	An integer less than, equal to, or greater than zero
+ * 	if the first **s1_sz** bytes of **s1** is found to be
+ * 	less than, to match, or be greater than **s2**.
+ */
+static long (*bpf_strncmp)(const char *s1, __u32 s1_sz, const char *s2) = (void *) 182;
+
+/*
+ * bpf_get_func_arg
+ *
+ * 	Get **n**-th argument (zero based) of the traced function (for tracing programs)
+ * 	returned in **value**.
+ *
+ *
+ * Returns
+ * 	0 on success.
+ * 	**-EINVAL** if n >= arguments count of traced function.
+ */
+static long (*bpf_get_func_arg)(void *ctx, __u32 n, __u64 *value) = (void *) 183;
+
+/*
+ * bpf_get_func_ret
+ *
+ * 	Get return value of the traced function (for tracing programs)
+ * 	in **value**.
+ *
+ *
+ * Returns
+ * 	0 on success.
+ * 	**-EOPNOTSUPP** for tracing programs other than BPF_TRACE_FEXIT or BPF_MODIFY_RETURN.
+ */
+static long (*bpf_get_func_ret)(void *ctx, __u64 *value) = (void *) 184;
+
+/*
+ * bpf_get_func_arg_cnt
+ *
+ * 	Get number of arguments of the traced function (for tracing programs).
+ *
+ *
+ * Returns
+ * 	The number of arguments of the traced function.
+ */
+static long (*bpf_get_func_arg_cnt)(void *ctx) = (void *) 185;
+
+/*
+ * bpf_get_retval
+ *
+ * 	Get the syscall's return value that will be returned to userspace.
+ *
+ * 	This helper is currently supported by cgroup programs only.
+ *
+ * Returns
+ * 	The syscall's return value.
+ */
+static int (*bpf_get_retval)(void) = (void *) 186;
+
+/*
+ * bpf_set_retval
+ *
+ * 	Set the syscall's return value that will be returned to userspace.
+ *
+ * 	This helper is currently supported by cgroup programs only.
+ *
+ * Returns
+ * 	0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_set_retval)(int retval) = (void *) 187;
+
+/*
+ * bpf_xdp_get_buff_len
+ *
+ * 	Get the total size of a given xdp buff (linear and paged area)
+ *
+ * Returns
+ * 	The total size of a given xdp buffer.
+ */
+static __u64 (*bpf_xdp_get_buff_len)(struct xdp_md *xdp_md) = (void *) 188;
+
+/*
+ * bpf_xdp_load_bytes
+ *
+ * 	This helper is provided as an easy way to load data from a
+ * 	xdp buffer. It can be used to load *len* bytes from *offset* from
+ * 	the frame associated to *xdp_md*, into the buffer pointed by
+ * 	*buf*.
+ *
+ * Returns
+ * 	0 on success, or a negative error in case of failure.
+ */
+static long (*bpf_xdp_load_bytes)(struct xdp_md *xdp_md, __u32 offset, void *buf, __u32 len) = (void *) 189;
+
+/*
+ * bpf_xdp_store_bytes
+ *
+ * 	Store *len* bytes from buffer *buf* into the frame
+ * 	associated to *xdp_md*, at *offset*.
+ *
+ * Returns
+ * 	0 on success, or a negative error in case of failure.
+ */
+static long (*bpf_xdp_store_bytes)(struct xdp_md *xdp_md, __u32 offset, void *buf, __u32 len) = (void *) 190;
+
+/*
+ * bpf_copy_from_user_task
+ *
+ * 	Read *size* bytes from user space address *user_ptr* in *tsk*'s
+ * 	address space, and stores the data in *dst*. *flags* is not
+ * 	used yet and is provided for future extensibility. This helper
+ * 	can only be used by sleepable programs.
+ *
+ * Returns
+ * 	0 on success, or a negative error in case of failure. On error
+ * 	*dst* buffer is zeroed out.
+ */
+static long (*bpf_copy_from_user_task)(void *dst, __u32 size, const void *user_ptr, struct task_struct *tsk, __u64 flags) = (void *) 191;
+
+/*
+ * bpf_skb_set_tstamp
+ *
+ * 	Change the __sk_buff->tstamp_type to *tstamp_type*
+ * 	and set *tstamp* to the __sk_buff->tstamp together.
+ *
+ * 	If there is no need to change the __sk_buff->tstamp_type,
+ * 	the tstamp value can be directly written to __sk_buff->tstamp
+ * 	instead.
+ *
+ * 	BPF_SKB_TSTAMP_DELIVERY_MONO is the only tstamp that
+ * 	will be kept during bpf_redirect_*().  A non zero
+ * 	*tstamp* must be used with the BPF_SKB_TSTAMP_DELIVERY_MONO
+ * 	*tstamp_type*.
+ *
+ * 	A BPF_SKB_TSTAMP_UNSPEC *tstamp_type* can only be used
+ * 	with a zero *tstamp*.
+ *
+ * 	Only IPv4 and IPv6 skb->protocol are supported.
+ *
+ * 	This function is most useful when it needs to set a
+ * 	mono delivery time to __sk_buff->tstamp and then
+ * 	bpf_redirect_*() to the egress of an iface.  For example,
+ * 	changing the (rcv) timestamp in __sk_buff->tstamp at
+ * 	ingress to a mono delivery time and then bpf_redirect_*()
+ * 	to sch_fq@phy-dev.
+ *
+ * Returns
+ * 	0 on success.
+ * 	**-EINVAL** for invalid input
+ * 	**-EOPNOTSUPP** for unsupported protocol
+ */
+static long (*bpf_skb_set_tstamp)(struct __sk_buff *skb, __u64 tstamp, __u32 tstamp_type) = (void *) 192;
+
+/*
+ * bpf_ima_file_hash
+ *
+ * 	Returns a calculated IMA hash of the *file*.
+ * 	If the hash is larger than *size*, then only *size*
+ * 	bytes will be copied to *dst*
+ *
+ * Returns
+ * 	The **hash_algo** is returned on success,
+ * 	**-EOPNOTSUP** if the hash calculation failed or **-EINVAL** if
+ * 	invalid arguments are passed.
+ */
+static long (*bpf_ima_file_hash)(struct file *file, void *dst, __u32 size) = (void *) 193;
+
+/*
+ * bpf_kptr_xchg
+ *
+ * 	Exchange kptr at pointer *map_value* with *ptr*, and return the
+ * 	old value. *ptr* can be NULL, otherwise it must be a referenced
+ * 	pointer which will be released when this helper is called.
+ *
+ * Returns
+ * 	The old value of kptr (which can be NULL). The returned pointer
+ * 	if not NULL, is a reference which must be released using its
+ * 	corresponding release function, or moved into a BPF map before
+ * 	program exit.
+ */
+static void *(*bpf_kptr_xchg)(void *map_value, void *ptr) = (void *) 194;
+
+/*
+ * bpf_map_lookup_percpu_elem
+ *
+ * 	Perform a lookup in *percpu map* for an entry associated to
+ * 	*key* on *cpu*.
+ *
+ * Returns
+ * 	Map value associated to *key* on *cpu*, or **NULL** if no entry
+ * 	was found or *cpu* is invalid.
+ */
+static void *(*bpf_map_lookup_percpu_elem)(void *map, const void *key, __u32 cpu) = (void *) 195;
+

--- a/src/bpf_helpers.h
+++ b/src/bpf_helpers.h
@@ -75,6 +75,30 @@
 	})
 #endif

+/*
+ * Compiler (optimization) barrier.
+ */
+#ifndef barrier
+#define barrier() asm volatile("" ::: "memory")
+#endif
+
+/* Variable-specific compiler (optimization) barrier. It's a no-op which makes
+ * compiler believe that there is some black box modification of a given
+ * variable and thus prevents compiler from making extra assumption about its
+ * value and potential simplifications and optimizations on this variable.
+ *
+ * E.g., compiler might often delay or even omit 32-bit to 64-bit casting of
+ * a variable, making some code patterns unverifiable. Putting barrier_var()
+ * in place will ensure that cast is performed before the barrier_var()
+ * invocation, because compiler has to pessimistically assume that embedded
+ * asm section might perform some extra operations on that variable.
+ *
+ * This is a variable-specific variant of more global barrier().
+ */
+#ifndef barrier_var
+#define barrier_var(var) asm volatile("" : "=r"(var) : "0"(var))
+#endif
+
 /*
 * Helper macro to throw a compilation error if __bpf_unreachable() gets
 * built into the resulting code. This works given BPF back end does not
@@ -133,7 +157,7 @@ struct bpf_map_def {
 	unsigned int value_size;
 	unsigned int max_entries;
 	unsigned int map_flags;
-};
+} __attribute__((deprecated("use BTF-defined maps in .maps section")));

 enum libbpf_pin_type {
 	LIBBPF_PIN_NONE,
@@ -149,6 +173,8 @@ enum libbpf_tristate {

 #define __kconfig __attribute__((section(".kconfig")))
 #define __ksym __attribute__((section(".ksyms")))
+#define __kptr __attribute__((btf_type_tag("kptr")))
+#define __kptr_ref __attribute__((btf_type_tag("kptr_ref")))

 #ifndef ___bpf_concat
 #define ___bpf_concat(a, b) a ## b
--- a/src/bpf_tracing.h
+++ b/src/bpf_tracing.h
@@ -24,6 +24,12 @@
 #elif defined(__TARGET_ARCH_sparc)
 	#define bpf_target_sparc
 	#define bpf_target_defined
+#elif defined(__TARGET_ARCH_riscv)
+	#define bpf_target_riscv
+	#define bpf_target_defined
+#elif defined(__TARGET_ARCH_arc)
+	#define bpf_target_arc
+	#define bpf_target_defined
 #else

 /* Fall back to what the compiler says */
@@ -48,6 +54,12 @@
 #elif defined(__sparc__)
 	#define bpf_target_sparc
 	#define bpf_target_defined
+#elif defined(__riscv) && __riscv_xlen == 64
+	#define bpf_target_riscv
+	#define bpf_target_defined
+#elif defined(__arc__)
+	#define bpf_target_arc
+	#define bpf_target_defined
 #endif /* no compiler target */

 #endif
@@ -60,251 +72,263 @@

 #if defined(__KERNEL__) || defined(__VMLINUX_H__)

-#define PT_REGS_PARM1(x) ((x)->di)
-#define PT_REGS_PARM2(x) ((x)->si)
-#define PT_REGS_PARM3(x) ((x)->dx)
-#define PT_REGS_PARM4(x) ((x)->cx)
-#define PT_REGS_PARM5(x) ((x)->r8)
-#define PT_REGS_RET(x) ((x)->sp)
-#define PT_REGS_FP(x) ((x)->bp)
-#define PT_REGS_RC(x) ((x)->ax)
-#define PT_REGS_SP(x) ((x)->sp)
-#define PT_REGS_IP(x) ((x)->ip)
-
-#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), di)
-#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), si)
-#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), dx)
-#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), cx)
-#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), r8)
-#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), sp)
-#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), bp)
-#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), ax)
-#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), sp)
-#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), ip)
+#define __PT_PARM1_REG di
+#define __PT_PARM2_REG si
+#define __PT_PARM3_REG dx
+#define __PT_PARM4_REG cx
+#define __PT_PARM5_REG r8
+#define __PT_RET_REG sp
+#define __PT_FP_REG bp
+#define __PT_RC_REG ax
+#define __PT_SP_REG sp
+#define __PT_IP_REG ip
+/* syscall uses r10 for PARM4 */
+#define PT_REGS_PARM4_SYSCALL(x) ((x)->r10)
+#define PT_REGS_PARM4_CORE_SYSCALL(x) BPF_CORE_READ(x, r10)

 #else

 #ifdef __i386__
+
+#define __PT_PARM1_REG eax
+#define __PT_PARM2_REG edx
+#define __PT_PARM3_REG ecx
 /* i386 kernel is built with -mregparm=3 */
-#define PT_REGS_PARM1(x) ((x)->eax)
-#define PT_REGS_PARM2(x) ((x)->edx)
-#define PT_REGS_PARM3(x) ((x)->ecx)
-#define PT_REGS_PARM4(x) 0
-#define PT_REGS_PARM5(x) 0
-#define PT_REGS_RET(x) ((x)->esp)
-#define PT_REGS_FP(x) ((x)->ebp)
-#define PT_REGS_RC(x) ((x)->eax)
-#define PT_REGS_SP(x) ((x)->esp)
-#define PT_REGS_IP(x) ((x)->eip)
+#define __PT_PARM4_REG __unsupported__
+#define __PT_PARM5_REG __unsupported__
+#define __PT_RET_REG esp
+#define __PT_FP_REG ebp
+#define __PT_RC_REG eax
+#define __PT_SP_REG esp
+#define __PT_IP_REG eip

-#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), eax)
-#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), edx)
-#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), ecx)
-#define PT_REGS_PARM4_CORE(x) 0
-#define PT_REGS_PARM5_CORE(x) 0
-#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), esp)
-#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), ebp)
-#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), eax)
-#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), esp)
-#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), eip)
+#else /* __i386__ */

-#else
+#define __PT_PARM1_REG rdi
+#define __PT_PARM2_REG rsi
+#define __PT_PARM3_REG rdx
+#define __PT_PARM4_REG rcx
+#define __PT_PARM5_REG r8
+#define __PT_RET_REG rsp
+#define __PT_FP_REG rbp
+#define __PT_RC_REG rax
+#define __PT_SP_REG rsp
+#define __PT_IP_REG rip
+/* syscall uses r10 for PARM4 */
+#define PT_REGS_PARM4_SYSCALL(x) ((x)->r10)
+#define PT_REGS_PARM4_CORE_SYSCALL(x) BPF_CORE_READ(x, r10)

-#define PT_REGS_PARM1(x) ((x)->rdi)
-#define PT_REGS_PARM2(x) ((x)->rsi)
-#define PT_REGS_PARM3(x) ((x)->rdx)
-#define PT_REGS_PARM4(x) ((x)->rcx)
-#define PT_REGS_PARM5(x) ((x)->r8)
-#define PT_REGS_RET(x) ((x)->rsp)
-#define PT_REGS_FP(x) ((x)->rbp)
-#define PT_REGS_RC(x) ((x)->rax)
-#define PT_REGS_SP(x) ((x)->rsp)
-#define PT_REGS_IP(x) ((x)->rip)
+#endif /* __i386__ */

-#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), rdi)
-#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), rsi)
-#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), rdx)
-#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), rcx)
-#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), r8)
-#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), rsp)
-#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), rbp)
-#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), rax)
-#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), rsp)
-#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), rip)
-
-#endif
-#endif
+#endif /* __KERNEL__ || __VMLINUX_H__ */

 #elif defined(bpf_target_s390)

-/* s390 provides user_pt_regs instead of struct pt_regs to userspace */
-struct pt_regs;
-#define PT_REGS_S390 const volatile user_pt_regs
-#define PT_REGS_PARM1(x) (((PT_REGS_S390 *)(x))->gprs[2])
-#define PT_REGS_PARM2(x) (((PT_REGS_S390 *)(x))->gprs[3])
-#define PT_REGS_PARM3(x) (((PT_REGS_S390 *)(x))->gprs[4])
-#define PT_REGS_PARM4(x) (((PT_REGS_S390 *)(x))->gprs[5])
-#define PT_REGS_PARM5(x) (((PT_REGS_S390 *)(x))->gprs[6])
-#define PT_REGS_RET(x) (((PT_REGS_S390 *)(x))->gprs[14])
-/* Works only with CONFIG_FRAME_POINTER */
-#define PT_REGS_FP(x) (((PT_REGS_S390 *)(x))->gprs[11])
-#define PT_REGS_RC(x) (((PT_REGS_S390 *)(x))->gprs[2])
-#define PT_REGS_SP(x) (((PT_REGS_S390 *)(x))->gprs[15])
-#define PT_REGS_IP(x) (((PT_REGS_S390 *)(x))->psw.addr)
+struct pt_regs___s390 {
+	unsigned long orig_gpr2;
+};

-#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[2])
-#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[3])
-#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[4])
-#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[5])
-#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[6])
-#define PT_REGS_RET_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[14])
-#define PT_REGS_FP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[11])
-#define PT_REGS_RC_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[2])
-#define PT_REGS_SP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[15])
-#define PT_REGS_IP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), psw.addr)
+/* s390 provides user_pt_regs instead of struct pt_regs to userspace */
+#define __PT_REGS_CAST(x) ((const user_pt_regs *)(x))
+#define __PT_PARM1_REG gprs[2]
+#define __PT_PARM2_REG gprs[3]
+#define __PT_PARM3_REG gprs[4]
+#define __PT_PARM4_REG gprs[5]
+#define __PT_PARM5_REG gprs[6]
+#define __PT_RET_REG grps[14]
+#define __PT_FP_REG gprs[11]	/* Works only with CONFIG_FRAME_POINTER */
+#define __PT_RC_REG gprs[2]
+#define __PT_SP_REG gprs[15]
+#define __PT_IP_REG psw.addr
+#define PT_REGS_PARM1_SYSCALL(x) ({ _Pragma("GCC error \"use PT_REGS_PARM1_CORE_SYSCALL() instead\""); 0l; })
+#define PT_REGS_PARM1_CORE_SYSCALL(x) BPF_CORE_READ((const struct pt_regs___s390 *)(x), orig_gpr2)

 #elif defined(bpf_target_arm)

-#define PT_REGS_PARM1(x) ((x)->uregs[0])
-#define PT_REGS_PARM2(x) ((x)->uregs[1])
-#define PT_REGS_PARM3(x) ((x)->uregs[2])
-#define PT_REGS_PARM4(x) ((x)->uregs[3])
-#define PT_REGS_PARM5(x) ((x)->uregs[4])
-#define PT_REGS_RET(x) ((x)->uregs[14])
-#define PT_REGS_FP(x) ((x)->uregs[11]) /* Works only with CONFIG_FRAME_POINTER */
-#define PT_REGS_RC(x) ((x)->uregs[0])
-#define PT_REGS_SP(x) ((x)->uregs[13])
-#define PT_REGS_IP(x) ((x)->uregs[12])
-
-#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), uregs[0])
-#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), uregs[1])
-#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), uregs[2])
-#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), uregs[3])
-#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), uregs[4])
-#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), uregs[14])
-#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), uregs[11])
-#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), uregs[0])
-#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), uregs[13])
-#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), uregs[12])
+#define __PT_PARM1_REG uregs[0]
+#define __PT_PARM2_REG uregs[1]
+#define __PT_PARM3_REG uregs[2]
+#define __PT_PARM4_REG uregs[3]
+#define __PT_PARM5_REG uregs[4]
+#define __PT_RET_REG uregs[14]
+#define __PT_FP_REG uregs[11]	/* Works only with CONFIG_FRAME_POINTER */
+#define __PT_RC_REG uregs[0]
+#define __PT_SP_REG uregs[13]
+#define __PT_IP_REG uregs[12]

 #elif defined(bpf_target_arm64)

-/* arm64 provides struct user_pt_regs instead of struct pt_regs to userspace */
-struct pt_regs;
-#define PT_REGS_ARM64 const volatile struct user_pt_regs
-#define PT_REGS_PARM1(x) (((PT_REGS_ARM64 *)(x))->regs[0])
-#define PT_REGS_PARM2(x) (((PT_REGS_ARM64 *)(x))->regs[1])
-#define PT_REGS_PARM3(x) (((PT_REGS_ARM64 *)(x))->regs[2])
-#define PT_REGS_PARM4(x) (((PT_REGS_ARM64 *)(x))->regs[3])
-#define PT_REGS_PARM5(x) (((PT_REGS_ARM64 *)(x))->regs[4])
-#define PT_REGS_RET(x) (((PT_REGS_ARM64 *)(x))->regs[30])
-/* Works only with CONFIG_FRAME_POINTER */
-#define PT_REGS_FP(x) (((PT_REGS_ARM64 *)(x))->regs[29])
-#define PT_REGS_RC(x) (((PT_REGS_ARM64 *)(x))->regs[0])
-#define PT_REGS_SP(x) (((PT_REGS_ARM64 *)(x))->sp)
-#define PT_REGS_IP(x) (((PT_REGS_ARM64 *)(x))->pc)
+struct pt_regs___arm64 {
+	unsigned long orig_x0;
+};

-#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[0])
-#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[1])
-#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[2])
-#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[3])
-#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[4])
-#define PT_REGS_RET_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[30])
-#define PT_REGS_FP_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[29])
-#define PT_REGS_RC_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[0])
-#define PT_REGS_SP_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), sp)
-#define PT_REGS_IP_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), pc)
+/* arm64 provides struct user_pt_regs instead of struct pt_regs to userspace */
+#define __PT_REGS_CAST(x) ((const struct user_pt_regs *)(x))
+#define __PT_PARM1_REG regs[0]
+#define __PT_PARM2_REG regs[1]
+#define __PT_PARM3_REG regs[2]
+#define __PT_PARM4_REG regs[3]
+#define __PT_PARM5_REG regs[4]
+#define __PT_RET_REG regs[30]
+#define __PT_FP_REG regs[29]	/* Works only with CONFIG_FRAME_POINTER */
+#define __PT_RC_REG regs[0]
+#define __PT_SP_REG sp
+#define __PT_IP_REG pc
+#define PT_REGS_PARM1_SYSCALL(x) ({ _Pragma("GCC error \"use PT_REGS_PARM1_CORE_SYSCALL() instead\""); 0l; })
+#define PT_REGS_PARM1_CORE_SYSCALL(x) BPF_CORE_READ((const struct pt_regs___arm64 *)(x), orig_x0)

 #elif defined(bpf_target_mips)

-#define PT_REGS_PARM1(x) ((x)->regs[4])
-#define PT_REGS_PARM2(x) ((x)->regs[5])
-#define PT_REGS_PARM3(x) ((x)->regs[6])
-#define PT_REGS_PARM4(x) ((x)->regs[7])
-#define PT_REGS_PARM5(x) ((x)->regs[8])
-#define PT_REGS_RET(x) ((x)->regs[31])
-#define PT_REGS_FP(x) ((x)->regs[30]) /* Works only with CONFIG_FRAME_POINTER */
-#define PT_REGS_RC(x) ((x)->regs[2])
-#define PT_REGS_SP(x) ((x)->regs[29])
-#define PT_REGS_IP(x) ((x)->cp0_epc)
-
-#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), regs[4])
-#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), regs[5])
-#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), regs[6])
-#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), regs[7])
-#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), regs[8])
-#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), regs[31])
-#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), regs[30])
-#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), regs[2])
-#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), regs[29])
-#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), cp0_epc)
+#define __PT_PARM1_REG regs[4]
+#define __PT_PARM2_REG regs[5]
+#define __PT_PARM3_REG regs[6]
+#define __PT_PARM4_REG regs[7]
+#define __PT_PARM5_REG regs[8]
+#define __PT_RET_REG regs[31]
+#define __PT_FP_REG regs[30]	/* Works only with CONFIG_FRAME_POINTER */
+#define __PT_RC_REG regs[2]
+#define __PT_SP_REG regs[29]
+#define __PT_IP_REG cp0_epc

 #elif defined(bpf_target_powerpc)

-#define PT_REGS_PARM1(x) ((x)->gpr[3])
-#define PT_REGS_PARM2(x) ((x)->gpr[4])
-#define PT_REGS_PARM3(x) ((x)->gpr[5])
-#define PT_REGS_PARM4(x) ((x)->gpr[6])
-#define PT_REGS_PARM5(x) ((x)->gpr[7])
-#define PT_REGS_RC(x) ((x)->gpr[3])
-#define PT_REGS_SP(x) ((x)->sp)
-#define PT_REGS_IP(x) ((x)->nip)
-
-#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), gpr[3])
-#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), gpr[4])
-#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), gpr[5])
-#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), gpr[6])
-#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), gpr[7])
-#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), gpr[3])
-#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), sp)
-#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), nip)
+#define __PT_PARM1_REG gpr[3]
+#define __PT_PARM2_REG gpr[4]
+#define __PT_PARM3_REG gpr[5]
+#define __PT_PARM4_REG gpr[6]
+#define __PT_PARM5_REG gpr[7]
+#define __PT_RET_REG regs[31]
+#define __PT_FP_REG __unsupported__
+#define __PT_RC_REG gpr[3]
+#define __PT_SP_REG sp
+#define __PT_IP_REG nip
+/* powerpc does not select ARCH_HAS_SYSCALL_WRAPPER. */
+#define PT_REGS_SYSCALL_REGS(ctx) ctx

 #elif defined(bpf_target_sparc)

-#define PT_REGS_PARM1(x) ((x)->u_regs[UREG_I0])
-#define PT_REGS_PARM2(x) ((x)->u_regs[UREG_I1])
-#define PT_REGS_PARM3(x) ((x)->u_regs[UREG_I2])
-#define PT_REGS_PARM4(x) ((x)->u_regs[UREG_I3])
-#define PT_REGS_PARM5(x) ((x)->u_regs[UREG_I4])
-#define PT_REGS_RET(x) ((x)->u_regs[UREG_I7])
-#define PT_REGS_RC(x) ((x)->u_regs[UREG_I0])
-#define PT_REGS_SP(x) ((x)->u_regs[UREG_FP])
-
-#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I0])
-#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I1])
-#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I2])
-#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I3])
-#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I4])
-#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I7])
-#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I0])
-#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), u_regs[UREG_FP])
-
+#define __PT_PARM1_REG u_regs[UREG_I0]
+#define __PT_PARM2_REG u_regs[UREG_I1]
+#define __PT_PARM3_REG u_regs[UREG_I2]
+#define __PT_PARM4_REG u_regs[UREG_I3]
+#define __PT_PARM5_REG u_regs[UREG_I4]
+#define __PT_RET_REG u_regs[UREG_I7]
+#define __PT_FP_REG __unsupported__
+#define __PT_RC_REG u_regs[UREG_I0]
+#define __PT_SP_REG u_regs[UREG_FP]
 /* Should this also be a bpf_target check for the sparc case? */
 #if defined(__arch64__)
-#define PT_REGS_IP(x) ((x)->tpc)
-#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), tpc)
+#define __PT_IP_REG tpc
 #else
-#define PT_REGS_IP(x) ((x)->pc)
-#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), pc)
+#define __PT_IP_REG pc
 #endif

+#elif defined(bpf_target_riscv)
+
+#define __PT_REGS_CAST(x) ((const struct user_regs_struct *)(x))
+#define __PT_PARM1_REG a0
+#define __PT_PARM2_REG a1
+#define __PT_PARM3_REG a2
+#define __PT_PARM4_REG a3
+#define __PT_PARM5_REG a4
+#define __PT_RET_REG ra
+#define __PT_FP_REG s0
+#define __PT_RC_REG a5
+#define __PT_SP_REG sp
+#define __PT_IP_REG pc
+/* riscv does not select ARCH_HAS_SYSCALL_WRAPPER. */
+#define PT_REGS_SYSCALL_REGS(ctx) ctx
+
+#elif defined(bpf_target_arc)
+
+/* arc provides struct user_pt_regs instead of struct pt_regs to userspace */
+#define __PT_REGS_CAST(x) ((const struct user_regs_struct *)(x))
+#define __PT_PARM1_REG scratch.r0
+#define __PT_PARM2_REG scratch.r1
+#define __PT_PARM3_REG scratch.r2
+#define __PT_PARM4_REG scratch.r3
+#define __PT_PARM5_REG scratch.r4
+#define __PT_RET_REG scratch.blink
+#define __PT_FP_REG __unsupported__
+#define __PT_RC_REG scratch.r0
+#define __PT_SP_REG scratch.sp
+#define __PT_IP_REG scratch.ret
+/* arc does not select ARCH_HAS_SYSCALL_WRAPPER. */
+#define PT_REGS_SYSCALL_REGS(ctx) ctx
+
 #endif

+#if defined(bpf_target_defined)
+
+struct pt_regs;
+
+/* allow some architecutres to override `struct pt_regs` */
+#ifndef __PT_REGS_CAST
+#define __PT_REGS_CAST(x) (x)
+#endif
+
+#define PT_REGS_PARM1(x) (__PT_REGS_CAST(x)->__PT_PARM1_REG)
+#define PT_REGS_PARM2(x) (__PT_REGS_CAST(x)->__PT_PARM2_REG)
+#define PT_REGS_PARM3(x) (__PT_REGS_CAST(x)->__PT_PARM3_REG)
+#define PT_REGS_PARM4(x) (__PT_REGS_CAST(x)->__PT_PARM4_REG)
+#define PT_REGS_PARM5(x) (__PT_REGS_CAST(x)->__PT_PARM5_REG)
+#define PT_REGS_RET(x) (__PT_REGS_CAST(x)->__PT_RET_REG)
+#define PT_REGS_FP(x) (__PT_REGS_CAST(x)->__PT_FP_REG)
+#define PT_REGS_RC(x) (__PT_REGS_CAST(x)->__PT_RC_REG)
+#define PT_REGS_SP(x) (__PT_REGS_CAST(x)->__PT_SP_REG)
+#define PT_REGS_IP(x) (__PT_REGS_CAST(x)->__PT_IP_REG)
+
+#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM1_REG)
+#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM2_REG)
+#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM3_REG)
+#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM4_REG)
+#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM5_REG)
+#define PT_REGS_RET_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_RET_REG)
+#define PT_REGS_FP_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_FP_REG)
+#define PT_REGS_RC_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_RC_REG)
+#define PT_REGS_SP_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_SP_REG)
+#define PT_REGS_IP_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_IP_REG)
+
 #if defined(bpf_target_powerpc)
+
 #define BPF_KPROBE_READ_RET_IP(ip, ctx)		({ (ip) = (ctx)->link; })
 #define BPF_KRETPROBE_READ_RET_IP		BPF_KPROBE_READ_RET_IP
+
 #elif defined(bpf_target_sparc)
+
 #define BPF_KPROBE_READ_RET_IP(ip, ctx)		({ (ip) = PT_REGS_RET(ctx); })
 #define BPF_KRETPROBE_READ_RET_IP		BPF_KPROBE_READ_RET_IP
-#elif defined(bpf_target_defined)
+
+#else
+
 #define BPF_KPROBE_READ_RET_IP(ip, ctx)					    \
 	({ bpf_probe_read_kernel(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); })
 #define BPF_KRETPROBE_READ_RET_IP(ip, ctx)				    \
-	({ bpf_probe_read_kernel(&(ip), sizeof(ip),			    \
-			  (void *)(PT_REGS_FP(ctx) + sizeof(ip))); })
+	({ bpf_probe_read_kernel(&(ip), sizeof(ip), (void *)(PT_REGS_FP(ctx) + sizeof(ip))); })
+
 #endif

-#if !defined(bpf_target_defined)
+#ifndef PT_REGS_PARM1_SYSCALL
+#define PT_REGS_PARM1_SYSCALL(x) PT_REGS_PARM1(x)
+#endif
+#define PT_REGS_PARM2_SYSCALL(x) PT_REGS_PARM2(x)
+#define PT_REGS_PARM3_SYSCALL(x) PT_REGS_PARM3(x)
+#ifndef PT_REGS_PARM4_SYSCALL
+#define PT_REGS_PARM4_SYSCALL(x) PT_REGS_PARM4(x)
+#endif
+#define PT_REGS_PARM5_SYSCALL(x) PT_REGS_PARM5(x)
+
+#ifndef PT_REGS_PARM1_CORE_SYSCALL
+#define PT_REGS_PARM1_CORE_SYSCALL(x) PT_REGS_PARM1_CORE(x)
+#endif
+#define PT_REGS_PARM2_CORE_SYSCALL(x) PT_REGS_PARM2_CORE(x)
+#define PT_REGS_PARM3_CORE_SYSCALL(x) PT_REGS_PARM3_CORE(x)
+#ifndef PT_REGS_PARM4_CORE_SYSCALL
+#define PT_REGS_PARM4_CORE_SYSCALL(x) PT_REGS_PARM4_CORE(x)
+#endif
+#define PT_REGS_PARM5_CORE_SYSCALL(x) PT_REGS_PARM5_CORE(x)
+
+#else /* defined(bpf_target_defined) */

 #define PT_REGS_PARM1(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
 #define PT_REGS_PARM2(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
@@ -331,7 +355,29 @@ struct pt_regs;
 #define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
 #define BPF_KRETPROBE_READ_RET_IP(ip, ctx) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })

-#endif /* !defined(bpf_target_defined) */
+#define PT_REGS_PARM1_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM2_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM3_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM4_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM5_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+
+#define PT_REGS_PARM1_CORE_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM2_CORE_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM3_CORE_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM4_CORE_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM5_CORE_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+
+#endif /* defined(bpf_target_defined) */
+
+/*
+ * When invoked from a syscall handler kprobe, returns a pointer to a
+ * struct pt_regs containing syscall arguments and suitable for passing to
+ * PT_REGS_PARMn_SYSCALL() and PT_REGS_PARMn_CORE_SYSCALL().
+ */
+#ifndef PT_REGS_SYSCALL_REGS
+/* By default, assume that the arch selects ARCH_HAS_SYSCALL_WRAPPER. */
+#define PT_REGS_SYSCALL_REGS(ctx) ((struct pt_regs *)PT_REGS_PARM1(ctx))
+#endif

 #ifndef ___bpf_concat
 #define ___bpf_concat(a, b) a ## b
@@ -343,25 +389,23 @@ struct pt_regs;
 #define ___bpf_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _a, _b, _c, N, ...) N
 #endif
 #ifndef ___bpf_narg
-#define ___bpf_narg(...) \
-	___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
+#define ___bpf_narg(...) ___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
 #endif

-#define ___bpf_ctx_cast0() ctx
-#define ___bpf_ctx_cast1(x) ___bpf_ctx_cast0(), (void *)ctx[0]
-#define ___bpf_ctx_cast2(x, args...) ___bpf_ctx_cast1(args), (void *)ctx[1]
-#define ___bpf_ctx_cast3(x, args...) ___bpf_ctx_cast2(args), (void *)ctx[2]
-#define ___bpf_ctx_cast4(x, args...) ___bpf_ctx_cast3(args), (void *)ctx[3]
-#define ___bpf_ctx_cast5(x, args...) ___bpf_ctx_cast4(args), (void *)ctx[4]
-#define ___bpf_ctx_cast6(x, args...) ___bpf_ctx_cast5(args), (void *)ctx[5]
-#define ___bpf_ctx_cast7(x, args...) ___bpf_ctx_cast6(args), (void *)ctx[6]
-#define ___bpf_ctx_cast8(x, args...) ___bpf_ctx_cast7(args), (void *)ctx[7]
-#define ___bpf_ctx_cast9(x, args...) ___bpf_ctx_cast8(args), (void *)ctx[8]
+#define ___bpf_ctx_cast0()            ctx
+#define ___bpf_ctx_cast1(x)           ___bpf_ctx_cast0(), (void *)ctx[0]
+#define ___bpf_ctx_cast2(x, args...)  ___bpf_ctx_cast1(args), (void *)ctx[1]
+#define ___bpf_ctx_cast3(x, args...)  ___bpf_ctx_cast2(args), (void *)ctx[2]
+#define ___bpf_ctx_cast4(x, args...)  ___bpf_ctx_cast3(args), (void *)ctx[3]
+#define ___bpf_ctx_cast5(x, args...)  ___bpf_ctx_cast4(args), (void *)ctx[4]
+#define ___bpf_ctx_cast6(x, args...)  ___bpf_ctx_cast5(args), (void *)ctx[5]
+#define ___bpf_ctx_cast7(x, args...)  ___bpf_ctx_cast6(args), (void *)ctx[6]
+#define ___bpf_ctx_cast8(x, args...)  ___bpf_ctx_cast7(args), (void *)ctx[7]
+#define ___bpf_ctx_cast9(x, args...)  ___bpf_ctx_cast8(args), (void *)ctx[8]
 #define ___bpf_ctx_cast10(x, args...) ___bpf_ctx_cast9(args), (void *)ctx[9]
 #define ___bpf_ctx_cast11(x, args...) ___bpf_ctx_cast10(args), (void *)ctx[10]
 #define ___bpf_ctx_cast12(x, args...) ___bpf_ctx_cast11(args), (void *)ctx[11]
-#define ___bpf_ctx_cast(args...) \
-	___bpf_apply(___bpf_ctx_cast, ___bpf_narg(args))(args)
+#define ___bpf_ctx_cast(args...)      ___bpf_apply(___bpf_ctx_cast, ___bpf_narg(args))(args)

 /*
 * BPF_PROG is a convenience wrapper for generic tp_btf/fentry/fexit and
@@ -394,19 +438,13 @@ ____##name(unsigned long long *ctx, ##args)

 struct pt_regs;

-#define ___bpf_kprobe_args0() ctx
-#define ___bpf_kprobe_args1(x) \
-	___bpf_kprobe_args0(), (void *)PT_REGS_PARM1(ctx)
-#define ___bpf_kprobe_args2(x, args...) \
-	___bpf_kprobe_args1(args), (void *)PT_REGS_PARM2(ctx)
-#define ___bpf_kprobe_args3(x, args...) \
-	___bpf_kprobe_args2(args), (void *)PT_REGS_PARM3(ctx)
-#define ___bpf_kprobe_args4(x, args...) \
-	___bpf_kprobe_args3(args), (void *)PT_REGS_PARM4(ctx)
-#define ___bpf_kprobe_args5(x, args...) \
-	___bpf_kprobe_args4(args), (void *)PT_REGS_PARM5(ctx)
-#define ___bpf_kprobe_args(args...) \
-	___bpf_apply(___bpf_kprobe_args, ___bpf_narg(args))(args)
+#define ___bpf_kprobe_args0()           ctx
+#define ___bpf_kprobe_args1(x)          ___bpf_kprobe_args0(), (void *)PT_REGS_PARM1(ctx)
+#define ___bpf_kprobe_args2(x, args...) ___bpf_kprobe_args1(args), (void *)PT_REGS_PARM2(ctx)
+#define ___bpf_kprobe_args3(x, args...) ___bpf_kprobe_args2(args), (void *)PT_REGS_PARM3(ctx)
+#define ___bpf_kprobe_args4(x, args...) ___bpf_kprobe_args3(args), (void *)PT_REGS_PARM4(ctx)
+#define ___bpf_kprobe_args5(x, args...) ___bpf_kprobe_args4(args), (void *)PT_REGS_PARM5(ctx)
+#define ___bpf_kprobe_args(args...)     ___bpf_apply(___bpf_kprobe_args, ___bpf_narg(args))(args)

 /*
 * BPF_KPROBE serves the same purpose for kprobes as BPF_PROG for
@@ -432,11 +470,9 @@ typeof(name(0)) name(struct pt_regs *ctx)				    \
 static __attribute__((always_inline)) typeof(name(0))			    \
 ____##name(struct pt_regs *ctx, ##args)

-#define ___bpf_kretprobe_args0() ctx
-#define ___bpf_kretprobe_args1(x) \
-	___bpf_kretprobe_args0(), (void *)PT_REGS_RC(ctx)
-#define ___bpf_kretprobe_args(args...) \
-	___bpf_apply(___bpf_kretprobe_args, ___bpf_narg(args))(args)
+#define ___bpf_kretprobe_args0()       ctx
+#define ___bpf_kretprobe_args1(x)      ___bpf_kretprobe_args0(), (void *)PT_REGS_RC(ctx)
+#define ___bpf_kretprobe_args(args...) ___bpf_apply(___bpf_kretprobe_args, ___bpf_narg(args))(args)

 /*
 * BPF_KRETPROBE is similar to BPF_KPROBE, except, it only provides optional
@@ -457,4 +493,39 @@ typeof(name(0)) name(struct pt_regs *ctx)				    \
 }									    \
 static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args)

+#define ___bpf_syscall_args0()           ctx
+#define ___bpf_syscall_args1(x)          ___bpf_syscall_args0(), (void *)PT_REGS_PARM1_CORE_SYSCALL(regs)
+#define ___bpf_syscall_args2(x, args...) ___bpf_syscall_args1(args), (void *)PT_REGS_PARM2_CORE_SYSCALL(regs)
+#define ___bpf_syscall_args3(x, args...) ___bpf_syscall_args2(args), (void *)PT_REGS_PARM3_CORE_SYSCALL(regs)
+#define ___bpf_syscall_args4(x, args...) ___bpf_syscall_args3(args), (void *)PT_REGS_PARM4_CORE_SYSCALL(regs)
+#define ___bpf_syscall_args5(x, args...) ___bpf_syscall_args4(args), (void *)PT_REGS_PARM5_CORE_SYSCALL(regs)
+#define ___bpf_syscall_args(args...)     ___bpf_apply(___bpf_syscall_args, ___bpf_narg(args))(args)
+
+/*
+ * BPF_KPROBE_SYSCALL is a variant of BPF_KPROBE, which is intended for
+ * tracing syscall functions, like __x64_sys_close. It hides the underlying
+ * platform-specific low-level way of getting syscall input arguments from
+ * struct pt_regs, and provides a familiar typed and named function arguments
+ * syntax and semantics of accessing syscall input parameters.
+ *
+ * Original struct pt_regs* context is preserved as 'ctx' argument. This might
+ * be necessary when using BPF helpers like bpf_perf_event_output().
+ *
+ * This macro relies on BPF CO-RE support.
+ */
+#define BPF_KPROBE_SYSCALL(name, args...)				    \
+name(struct pt_regs *ctx);						    \
+static __attribute__((always_inline)) typeof(name(0))			    \
+____##name(struct pt_regs *ctx, ##args);				    \
+typeof(name(0)) name(struct pt_regs *ctx)				    \
+{									    \
+	struct pt_regs *regs = PT_REGS_SYSCALL_REGS(ctx);		    \
+	_Pragma("GCC diagnostic push")					    \
+	_Pragma("GCC diagnostic ignored \"-Wint-conversion\"")		    \
+	return ____##name(___bpf_syscall_args(args));			    \
+	_Pragma("GCC diagnostic pop")					    \
+}									    \
+static __attribute__((always_inline)) typeof(name(0))			    \
+____##name(struct pt_regs *ctx, ##args)
+
 #endif
--- a/src/btf.c
+++ b/src/btf.c
@@ -57,7 +57,7 @@ struct btf {
 	 * representation is broken up into three independently allocated
 	 * memory regions to be able to modify them independently.
 	 * raw_data is nulled out at that point, but can be later allocated
-	 * and cached again if user calls btf__get_raw_data(), at which point
+	 * and cached again if user calls btf__raw_data(), at which point
 	 * raw_data will contain a contiguous copy of header, types, and
 	 * strings:
 	 *
@@ -236,17 +236,23 @@ static int btf_parse_hdr(struct btf *btf)
 		}
 		btf_bswap_hdr(hdr);
 	} else if (hdr->magic != BTF_MAGIC) {
-		pr_debug("Invalid BTF magic:%x\n", hdr->magic);
+		pr_debug("Invalid BTF magic: %x\n", hdr->magic);
 		return -EINVAL;
 	}

-	meta_left = btf->raw_size - sizeof(*hdr);
-	if (meta_left < hdr->str_off + hdr->str_len) {
-		pr_debug("Invalid BTF total size:%u\n", btf->raw_size);
+	if (btf->raw_size < hdr->hdr_len) {
+		pr_debug("BTF header len %u larger than data size %u\n",
+			 hdr->hdr_len, btf->raw_size);
 		return -EINVAL;
 	}

-	if (hdr->type_off + hdr->type_len > hdr->str_off) {
+	meta_left = btf->raw_size - hdr->hdr_len;
+	if (meta_left < (long long)hdr->str_off + hdr->str_len) {
+		pr_debug("Invalid BTF total size: %u\n", btf->raw_size);
+		return -EINVAL;
+	}
+
+	if ((long long)hdr->type_off + hdr->type_len > hdr->str_off) {
 		pr_debug("Invalid BTF data sections layout: type data at %u + %u, strings data at %u + %u\n",
 			 hdr->type_off, hdr->type_len, hdr->str_off, hdr->str_len);
 		return -EINVAL;
@@ -293,6 +299,7 @@ static int btf_type_size(const struct btf_type *t)
 	case BTF_KIND_TYPEDEF:
 	case BTF_KIND_FUNC:
 	case BTF_KIND_FLOAT:
+	case BTF_KIND_TYPE_TAG:
 		return base_size;
 	case BTF_KIND_INT:
 		return base_size + sizeof(__u32);
@@ -309,8 +316,8 @@ static int btf_type_size(const struct btf_type *t)
 		return base_size + sizeof(struct btf_var);
 	case BTF_KIND_DATASEC:
 		return base_size + vlen * sizeof(struct btf_var_secinfo);
-	case BTF_KIND_TAG:
-		return base_size + sizeof(struct btf_tag);
+	case BTF_KIND_DECL_TAG:
+		return base_size + sizeof(struct btf_decl_tag);
 	default:
 		pr_debug("Unsupported BTF_KIND:%u\n", btf_kind(t));
 		return -EINVAL;
@@ -343,6 +350,7 @@ static int btf_bswap_type_rest(struct btf_type *t)
 	case BTF_KIND_TYPEDEF:
 	case BTF_KIND_FUNC:
 	case BTF_KIND_FLOAT:
+	case BTF_KIND_TYPE_TAG:
 		return 0;
 	case BTF_KIND_INT:
 		*(__u32 *)(t + 1) = bswap_32(*(__u32 *)(t + 1));
@@ -383,8 +391,8 @@ static int btf_bswap_type_rest(struct btf_type *t)
 			v->size = bswap_32(v->size);
 		}
 		return 0;
-	case BTF_KIND_TAG:
-		btf_tag(t)->component_idx = bswap_32(btf_tag(t)->component_idx);
+	case BTF_KIND_DECL_TAG:
+		btf_decl_tag(t)->component_idx = bswap_32(btf_decl_tag(t)->component_idx);
 		return 0;
 	default:
 		pr_debug("Unsupported BTF_KIND:%u\n", btf_kind(t));
@@ -435,13 +443,18 @@ __u32 btf__get_nr_types(const struct btf *btf)
 	return btf->start_id + btf->nr_types - 1;
 }

+__u32 btf__type_cnt(const struct btf *btf)
+{
+	return btf->start_id + btf->nr_types;
+}
+
 const struct btf *btf__base_btf(const struct btf *btf)
 {
 	return btf->base_btf;
 }

 /* internal helper returning non-const pointer to a type */
-struct btf_type *btf_type_by_id(struct btf *btf, __u32 type_id)
+struct btf_type *btf_type_by_id(const struct btf *btf, __u32 type_id)
 {
 	if (type_id == 0)
 		return &btf_void;
@@ -466,8 +479,8 @@ static int determine_ptr_size(const struct btf *btf)
 	if (btf->base_btf && btf->base_btf->ptr_sz > 0)
 		return btf->base_btf->ptr_sz;

-	n = btf__get_nr_types(btf);
-	for (i = 1; i <= n; i++) {
+	n = btf__type_cnt(btf);
+	for (i = 1; i < n; i++) {
 		t = btf__type_by_id(btf, i);
 		if (!btf_is_int(t))
 			continue;
@@ -527,9 +540,9 @@ int btf__set_pointer_size(struct btf *btf, size_t ptr_sz)

 static bool is_host_big_endian(void)
 {
-#if __BYTE_ORDER == __LITTLE_ENDIAN
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
 	return false;
-#elif __BYTE_ORDER == __BIG_ENDIAN
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
 	return true;
 #else
 # error "Unrecognized __BYTE_ORDER__"
@@ -596,7 +609,8 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id)
 		case BTF_KIND_CONST:
 		case BTF_KIND_RESTRICT:
 		case BTF_KIND_VAR:
-		case BTF_KIND_TAG:
+		case BTF_KIND_DECL_TAG:
+		case BTF_KIND_TYPE_TAG:
 			type_id = t->type;
 			break;
 		case BTF_KIND_ARRAY:
@@ -638,6 +652,7 @@ int btf__align_of(const struct btf *btf, __u32 id)
 	case BTF_KIND_VOLATILE:
 	case BTF_KIND_CONST:
 	case BTF_KIND_RESTRICT:
+	case BTF_KIND_TYPE_TAG:
 		return btf__align_of(btf, t->type);
 	case BTF_KIND_ARRAY:
 		return btf__align_of(btf, btf_array(t)->type);
@@ -684,12 +699,12 @@ int btf__resolve_type(const struct btf *btf, __u32 type_id)

 __s32 btf__find_by_name(const struct btf *btf, const char *type_name)
 {
-	__u32 i, nr_types = btf__get_nr_types(btf);
+	__u32 i, nr_types = btf__type_cnt(btf);

 	if (!strcmp(type_name, "void"))
 		return 0;

-	for (i = 1; i <= nr_types; i++) {
+	for (i = 1; i < nr_types; i++) {
 		const struct btf_type *t = btf__type_by_id(btf, i);
 		const char *name = btf__name_by_offset(btf, t->name_off);

@@ -703,12 +718,12 @@ __s32 btf__find_by_name(const struct btf *btf, const char *type_name)
 static __s32 btf_find_by_name_kind(const struct btf *btf, int start_id,
 				   const char *type_name, __u32 kind)
 {
-	__u32 i, nr_types = btf__get_nr_types(btf);
+	__u32 i, nr_types = btf__type_cnt(btf);

 	if (kind == BTF_KIND_UNKN || !strcmp(type_name, "void"))
 		return 0;

-	for (i = start_id; i <= nr_types; i++) {
+	for (i = start_id; i < nr_types; i++) {
 		const struct btf_type *t = btf__type_by_id(btf, i);
 		const char *name;

@@ -781,7 +796,7 @@ static struct btf *btf_new_empty(struct btf *base_btf)

 	if (base_btf) {
 		btf->base_btf = base_btf;
-		btf->start_id = btf__get_nr_types(base_btf) + 1;
+		btf->start_id = btf__type_cnt(base_btf);
 		btf->start_str_off = base_btf->hdr->str_len;
 	}

@@ -831,7 +846,7 @@ static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf)

 	if (base_btf) {
 		btf->base_btf = base_btf;
-		btf->start_id = btf__get_nr_types(base_btf) + 1;
+		btf->start_id = btf__type_cnt(base_btf);
 		btf->start_str_off = base_btf->hdr->str_len;
 	}

@@ -886,7 +901,7 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf,
 		return ERR_PTR(-LIBBPF_ERRNO__LIBELF);
 	}

-	fd = open(path, O_RDONLY);
+	fd = open(path, O_RDONLY | O_CLOEXEC);
 	if (fd < 0) {
 		err = -errno;
 		pr_warn("failed to open %s: %s\n", path, strerror(errno));
@@ -1107,149 +1122,88 @@ struct btf *btf__parse_split(const char *path, struct btf *base_btf)
 	return libbpf_ptr(btf_parse(path, base_btf, NULL));
 }

-static int compare_vsi_off(const void *_a, const void *_b)
-{
-	const struct btf_var_secinfo *a = _a;
-	const struct btf_var_secinfo *b = _b;
-
-	return a->offset - b->offset;
-}
-
-static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf,
-			     struct btf_type *t)
-{
-	__u32 size = 0, off = 0, i, vars = btf_vlen(t);
-	const char *name = btf__name_by_offset(btf, t->name_off);
-	const struct btf_type *t_var;
-	struct btf_var_secinfo *vsi;
-	const struct btf_var *var;
-	int ret;
-
-	if (!name) {
-		pr_debug("No name found in string section for DATASEC kind.\n");
-		return -ENOENT;
-	}
-
-	/* .extern datasec size and var offsets were set correctly during
-	 * extern collection step, so just skip straight to sorting variables
-	 */
-	if (t->size)
-		goto sort_vars;
-
-	ret = bpf_object__section_size(obj, name, &size);
-	if (ret || !size || (t->size && t->size != size)) {
-		pr_debug("Invalid size for section %s: %u bytes\n", name, size);
-		return -ENOENT;
-	}
-
-	t->size = size;
-
-	for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) {
-		t_var = btf__type_by_id(btf, vsi->type);
-		var = btf_var(t_var);
-
-		if (!btf_is_var(t_var)) {
-			pr_debug("Non-VAR type seen in section %s\n", name);
-			return -EINVAL;
-		}
-
-		if (var->linkage == BTF_VAR_STATIC)
-			continue;
-
-		name = btf__name_by_offset(btf, t_var->name_off);
-		if (!name) {
-			pr_debug("No name found in string section for VAR kind\n");
-			return -ENOENT;
-		}
-
-		ret = bpf_object__variable_offset(obj, name, &off);
-		if (ret) {
-			pr_debug("No offset found in symbol table for VAR %s\n",
-				 name);
-			return -ENOENT;
-		}
-
-		vsi->offset = off;
-	}
-
-sort_vars:
-	qsort(btf_var_secinfos(t), vars, sizeof(*vsi), compare_vsi_off);
-	return 0;
-}
-
-int btf__finalize_data(struct bpf_object *obj, struct btf *btf)
-{
-	int err = 0;
-	__u32 i;
-
-	for (i = 1; i <= btf->nr_types; i++) {
-		struct btf_type *t = btf_type_by_id(btf, i);
-
-		/* Loader needs to fix up some of the things compiler
-		 * couldn't get its hands on while emitting BTF. This
-		 * is section size and global variable offset. We use
-		 * the info from the ELF itself for this purpose.
-		 */
-		if (btf_is_datasec(t)) {
-			err = btf_fixup_datasec(obj, btf, t);
-			if (err)
-				break;
-		}
-	}
-
-	return libbpf_err(err);
-}
-
 static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian);

-int btf__load_into_kernel(struct btf *btf)
+int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 log_level)
 {
-	__u32 log_buf_size = 0, raw_size;
-	char *log_buf = NULL;
+	LIBBPF_OPTS(bpf_btf_load_opts, opts);
+	__u32 buf_sz = 0, raw_size;
+	char *buf = NULL, *tmp;
 	void *raw_data;
 	int err = 0;

 	if (btf->fd >= 0)
 		return libbpf_err(-EEXIST);
+	if (log_sz && !log_buf)
+		return libbpf_err(-EINVAL);

-retry_load:
-	if (log_buf_size) {
-		log_buf = malloc(log_buf_size);
-		if (!log_buf)
-			return libbpf_err(-ENOMEM);
-
-		*log_buf = 0;
-	}
-
+	/* cache native raw data representation */
 	raw_data = btf_get_raw_data(btf, &raw_size, false);
 	if (!raw_data) {
 		err = -ENOMEM;
 		goto done;
 	}
-	/* cache native raw data representation */
 	btf->raw_size = raw_size;
 	btf->raw_data = raw_data;

-	btf->fd = bpf_load_btf(raw_data, raw_size, log_buf, log_buf_size, false);
-	if (btf->fd < 0) {
-		if (!log_buf || errno == ENOSPC) {
-			log_buf_size = max((__u32)BPF_LOG_BUF_SIZE,
-					   log_buf_size << 1);
-			free(log_buf);
-			goto retry_load;
+retry_load:
+	/* if log_level is 0, we won't provide log_buf/log_size to the kernel,
+	 * initially. Only if BTF loading fails, we bump log_level to 1 and
+	 * retry, using either auto-allocated or custom log_buf. This way
+	 * non-NULL custom log_buf provides a buffer just in case, but hopes
+	 * for successful load and no need for log_buf.
+	 */
+	if (log_level) {
+		/* if caller didn't provide custom log_buf, we'll keep
+		 * allocating our own progressively bigger buffers for BTF
+		 * verification log
+		 */
+		if (!log_buf) {
+			buf_sz = max((__u32)BPF_LOG_BUF_SIZE, buf_sz * 2);
+			tmp = realloc(buf, buf_sz);
+			if (!tmp) {
+				err = -ENOMEM;
+				goto done;
+			}
+			buf = tmp;
+			buf[0] = '\0';
 		}

+		opts.log_buf = log_buf ? log_buf : buf;
+		opts.log_size = log_buf ? log_sz : buf_sz;
+		opts.log_level = log_level;
+	}
+
+	btf->fd = bpf_btf_load(raw_data, raw_size, &opts);
+	if (btf->fd < 0) {
+		/* time to turn on verbose mode and try again */
+		if (log_level == 0) {
+			log_level = 1;
+			goto retry_load;
+		}
+		/* only retry if caller didn't provide custom log_buf, but
+		 * make sure we can never overflow buf_sz
+		 */
+		if (!log_buf && errno == ENOSPC && buf_sz <= UINT_MAX / 2)
+			goto retry_load;
+
 		err = -errno;
-		pr_warn("Error loading BTF: %s(%d)\n", strerror(errno), errno);
-		if (*log_buf)
-			pr_warn("%s\n", log_buf);
-		goto done;
+		pr_warn("BTF loading error: %d\n", err);
+		/* don't print out contents of custom log_buf */
+		if (!log_buf && buf[0])
+			pr_warn("-- BEGIN BTF LOAD LOG ---\n%s\n-- END BTF LOAD LOG --\n", buf);
 	}

 done:
-	free(log_buf);
+	free(buf);
 	return libbpf_err(err);
 }
+
+int btf__load_into_kernel(struct btf *btf)
+{
+	return btf_load_into_kernel(btf, NULL, 0, 0);
+}
+
 int btf__load(struct btf *) __attribute__((alias("btf__load_into_kernel")));

 int btf__fd(const struct btf *btf)
@@ -1317,7 +1271,7 @@ err_out:
 	return NULL;
 }

-const void *btf__get_raw_data(const struct btf *btf_ro, __u32 *size)
+const void *btf__raw_data(const struct btf *btf_ro, __u32 *size)
 {
 	struct btf *btf = (struct btf *)btf_ro;
 	__u32 data_sz;
@@ -1325,7 +1279,7 @@ const void *btf__get_raw_data(const struct btf *btf_ro, __u32 *size)

 	data = btf_get_raw_data(btf, &data_sz, btf->swapped_endian);
 	if (!data)
-		return errno = -ENOMEM, NULL;
+		return errno = ENOMEM, NULL;

 	btf->raw_size = data_sz;
 	if (btf->swapped_endian)
@@ -1336,6 +1290,9 @@ const void *btf__get_raw_data(const struct btf *btf_ro, __u32 *size)
 	return data;
 }

+__attribute__((alias("btf__raw_data")))
+const void *btf__get_raw_data(const struct btf *btf, __u32 *size);
+
 const char *btf__str_by_offset(const struct btf *btf, __u32 offset)
 {
 	if (offset < btf->start_str_off)
@@ -1663,20 +1620,37 @@ static int btf_commit_type(struct btf *btf, int data_sz)
 struct btf_pipe {
 	const struct btf *src;
 	struct btf *dst;
+	struct hashmap *str_off_map; /* map string offsets from src to dst */
 };

 static int btf_rewrite_str(__u32 *str_off, void *ctx)
 {
 	struct btf_pipe *p = ctx;
-	int off;
+	void *mapped_off;
+	int off, err;

 	if (!*str_off) /* nothing to do for empty strings */
 		return 0;

+	if (p->str_off_map &&
+	    hashmap__find(p->str_off_map, (void *)(long)*str_off, &mapped_off)) {
+		*str_off = (__u32)(long)mapped_off;
+		return 0;
+	}
+
 	off = btf__add_str(p->dst, btf__str_by_offset(p->src, *str_off));
 	if (off < 0)
 		return off;

+	/* Remember string mapping from src to dst.  It avoids
+	 * performing expensive string comparisons.
+	 */
+	if (p->str_off_map) {
+		err = hashmap__append(p->str_off_map, (void *)(long)*str_off, (void *)(long)off);
+		if (err)
+			return err;
+	}
+
 	*str_off = off;
 	return 0;
 }
@@ -1723,6 +1697,9 @@ static int btf_rewrite_type_ids(__u32 *type_id, void *ctx)
 	return 0;
 }

+static size_t btf_dedup_identity_hash_fn(const void *key, void *ctx);
+static bool btf_dedup_equal_fn(const void *k1, const void *k2, void *ctx);
+
 int btf__add_btf(struct btf *btf, const struct btf *src_btf)
 {
 	struct btf_pipe p = { .src = src_btf, .dst = btf };
@@ -1744,7 +1721,7 @@ int btf__add_btf(struct btf *btf, const struct btf *src_btf)
 	old_strs_len = btf->hdr->str_len;

 	data_sz = src_btf->hdr->type_len;
-	cnt = btf__get_nr_types(src_btf);
+	cnt = btf__type_cnt(src_btf) - 1;

 	/* pre-allocate enough memory for new types */
 	t = btf_add_type_mem(btf, data_sz);
@@ -1756,6 +1733,11 @@ int btf__add_btf(struct btf *btf, const struct btf *src_btf)
 	if (!off)
 		return libbpf_err(-ENOMEM);

+	/* Map the string offsets from src_btf to the offsets from btf to improve performance */
+	p.str_off_map = hashmap__new(btf_dedup_identity_hash_fn, btf_dedup_equal_fn, NULL);
+	if (IS_ERR(p.str_off_map))
+		return libbpf_err(-ENOMEM);
+
 	/* bulk copy types data for all types from src_btf */
 	memcpy(t, src_btf->types_data, data_sz);

@@ -1797,6 +1779,8 @@ int btf__add_btf(struct btf *btf, const struct btf *src_btf)
 	btf->hdr->str_off += data_sz;
 	btf->nr_types += cnt;

+	hashmap__free(p.str_off_map);
+
 	/* return type ID of the first added BTF type */
 	return btf->start_id + btf->nr_types - cnt;
 err_out:
@@ -1810,6 +1794,8 @@ err_out:
 	 * wasn't modified, so doesn't need restoring, see big comment above */
 	btf->hdr->str_len = old_strs_len;

+	hashmap__free(p.str_off_map);
+
 	return libbpf_err(err);
 }

@@ -2061,7 +2047,7 @@ int btf__add_union(struct btf *btf, const char *name, __u32 byte_sz)

 static struct btf_type *btf_last_type(struct btf *btf)
 {
-	return btf_type_by_id(btf, btf__get_nr_types(btf));
+	return btf_type_by_id(btf, btf__type_cnt(btf) - 1);
 }

 /*
@@ -2314,6 +2300,22 @@ int btf__add_restrict(struct btf *btf, int ref_type_id)
 	return btf_add_ref_kind(btf, BTF_KIND_RESTRICT, NULL, ref_type_id);
 }

+/*
+ * Append new BTF_KIND_TYPE_TAG type with:
+ *   - *value*, non-empty/non-NULL tag value;
+ *   - *ref_type_id* - referenced type ID, it might not exist yet;
+ * Returns:
+ *   - >0, type ID of newly added BTF type;
+ *   - <0, on error.
+ */
+int btf__add_type_tag(struct btf *btf, const char *value, int ref_type_id)
+{
+	if (!value|| !value[0])
+		return libbpf_err(-EINVAL);
+
+	return btf_add_ref_kind(btf, BTF_KIND_TYPE_TAG, value, ref_type_id);
+}
+
 /*
 * Append new BTF_KIND_FUNC type with:
 *   - *name*, non-empty/non-NULL name;
@@ -2569,7 +2571,7 @@ int btf__add_datasec_var_info(struct btf *btf, int var_type_id, __u32 offset, __
 }

 /*
- * Append new BTF_KIND_TAG type with:
+ * Append new BTF_KIND_DECL_TAG type with:
 *   - *value* - non-empty/non-NULL string;
 *   - *ref_type_id* - referenced type ID, it might not exist yet;
 *   - *component_idx* - -1 for tagging reference type, otherwise struct/union
@@ -2578,7 +2580,7 @@ int btf__add_datasec_var_info(struct btf *btf, int var_type_id, __u32 offset, __
 *   - >0, type ID of newly added BTF type;
 *   - <0, on error.
 */
-int btf__add_tag(struct btf *btf, const char *value, int ref_type_id,
+int btf__add_decl_tag(struct btf *btf, const char *value, int ref_type_id,
 		 int component_idx)
 {
 	struct btf_type *t;
@@ -2593,7 +2595,7 @@ int btf__add_tag(struct btf *btf, const char *value, int ref_type_id,
 	if (btf_ensure_modifiable(btf))
 		return libbpf_err(-ENOMEM);

-	sz = sizeof(struct btf_type) + sizeof(struct btf_tag);
+	sz = sizeof(struct btf_type) + sizeof(struct btf_decl_tag);
 	t = btf_add_type_mem(btf, sz);
 	if (!t)
 		return libbpf_err(-ENOMEM);
@@ -2603,9 +2605,9 @@ int btf__add_tag(struct btf *btf, const char *value, int ref_type_id,
 		return value_off;

 	t->name_off = value_off;
-	t->info = btf_type_info(BTF_KIND_TAG, 0, false);
+	t->info = btf_type_info(BTF_KIND_DECL_TAG, 0, false);
 	t->type = ref_type_id;
-	btf_tag(t)->component_idx = component_idx;
+	btf_decl_tag(t)->component_idx = component_idx;

 	return btf_commit_type(btf, sz);
 }
@@ -2624,6 +2626,7 @@ static int btf_ext_setup_info(struct btf_ext *btf_ext,
 	const struct btf_ext_info_sec *sinfo;
 	struct btf_ext_info *ext_info;
 	__u32 info_left, record_size;
+	size_t sec_cnt = 0;
 	/* The start of the info sec (including the __u32 record_size). */
 	void *info;

@@ -2687,8 +2690,7 @@ static int btf_ext_setup_info(struct btf_ext *btf_ext,
 			return -EINVAL;
 		}

-		total_record_size = sec_hdrlen +
-				    (__u64)num_records * record_size;
+		total_record_size = sec_hdrlen + (__u64)num_records * record_size;
 		if (info_left < total_record_size) {
 			pr_debug("%s section has incorrect num_records in .BTF.ext\n",
 			     ext_sec->desc);
@@ -2697,12 +2699,14 @@ static int btf_ext_setup_info(struct btf_ext *btf_ext,

 		info_left -= total_record_size;
 		sinfo = (void *)sinfo + total_record_size;
+		sec_cnt++;
 	}

 	ext_info = ext_sec->ext_info;
 	ext_info->len = ext_sec->len - sizeof(__u32);
 	ext_info->rec_size = record_size;
 	ext_info->info = info + sizeof(__u32);
+	ext_info->sec_cnt = sec_cnt;

 	return 0;
 }
@@ -2786,19 +2790,18 @@ void btf_ext__free(struct btf_ext *btf_ext)
 {
 	if (IS_ERR_OR_NULL(btf_ext))
 		return;
+	free(btf_ext->func_info.sec_idxs);
+	free(btf_ext->line_info.sec_idxs);
+	free(btf_ext->core_relo_info.sec_idxs);
 	free(btf_ext->data);
 	free(btf_ext);
 }

-struct btf_ext *btf_ext__new(__u8 *data, __u32 size)
+struct btf_ext *btf_ext__new(const __u8 *data, __u32 size)
 {
 	struct btf_ext *btf_ext;
 	int err;

-	err = btf_ext_parse_hdr(data, size);
-	if (err)
-		return libbpf_err_ptr(err);
-
 	btf_ext = calloc(1, sizeof(struct btf_ext));
 	if (!btf_ext)
 		return libbpf_err_ptr(-ENOMEM);
@@ -2811,6 +2814,10 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size)
 	}
 	memcpy(btf_ext->data, data, size);

+	err = btf_ext_parse_hdr(btf_ext->data, size);
+	if (err)
+		goto done;
+
 	if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, line_info_len)) {
 		err = -EINVAL;
 		goto done;
@@ -2824,10 +2831,8 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size)
 	if (err)
 		goto done;

-	if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, core_relo_len)) {
-		err = -EINVAL;
-		goto done;
-	}
+	if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, core_relo_len))
+		goto done; /* skip core relos parsing */

 	err = btf_ext_setup_core_relos(btf_ext);
 	if (err)
@@ -2925,8 +2930,7 @@ __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext)

 struct btf_dedup;

-static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
-				       const struct btf_dedup_opts *opts);
+static struct btf_dedup *btf_dedup_new(struct btf *btf, const struct btf_dedup_opts *opts);
 static void btf_dedup_free(struct btf_dedup *d);
 static int btf_dedup_prep(struct btf_dedup *d);
 static int btf_dedup_strings(struct btf_dedup *d);
@@ -3073,19 +3077,26 @@ static int btf_dedup_remap_types(struct btf_dedup *d);
 * deduplicating structs/unions is described in greater details in comments for
 * `btf_dedup_is_equiv` function.
 */
-int btf__dedup(struct btf *btf, struct btf_ext *btf_ext,
-	       const struct btf_dedup_opts *opts)
+
+DEFAULT_VERSION(btf__dedup_v0_6_0, btf__dedup, LIBBPF_0.6.0)
+int btf__dedup_v0_6_0(struct btf *btf, const struct btf_dedup_opts *opts)
 {
-	struct btf_dedup *d = btf_dedup_new(btf, btf_ext, opts);
+	struct btf_dedup *d;
 	int err;

+	if (!OPTS_VALID(opts, btf_dedup_opts))
+		return libbpf_err(-EINVAL);
+
+	d = btf_dedup_new(btf, opts);
 	if (IS_ERR(d)) {
 		pr_debug("btf_dedup_new failed: %ld", PTR_ERR(d));
 		return libbpf_err(-EINVAL);
 	}

-	if (btf_ensure_modifiable(btf))
-		return libbpf_err(-ENOMEM);
+	if (btf_ensure_modifiable(btf)) {
+		err = -ENOMEM;
+		goto done;
+	}

 	err = btf_dedup_prep(d);
 	if (err) {
@@ -3128,6 +3139,19 @@ done:
 	return libbpf_err(err);
 }

+COMPAT_VERSION(btf__dedup_deprecated, btf__dedup, LIBBPF_0.0.2)
+int btf__dedup_deprecated(struct btf *btf, struct btf_ext *btf_ext, const void *unused_opts)
+{
+	LIBBPF_OPTS(btf_dedup_opts, opts, .btf_ext = btf_ext);
+
+	if (unused_opts) {
+		pr_warn("please use new version of btf__dedup() that supports options\n");
+		return libbpf_err(-ENOTSUP);
+	}
+
+	return btf__dedup(btf, &opts);
+}
+
 #define BTF_UNPROCESSED_ID ((__u32)-1)
 #define BTF_IN_PROGRESS_ID ((__u32)-2)

@@ -3240,8 +3264,7 @@ static bool btf_dedup_equal_fn(const void *k1, const void *k2, void *ctx)
 	return k1 == k2;
 }

-static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
-				       const struct btf_dedup_opts *opts)
+static struct btf_dedup *btf_dedup_new(struct btf *btf, const struct btf_dedup_opts *opts)
 {
 	struct btf_dedup *d = calloc(1, sizeof(struct btf_dedup));
 	hashmap_hash_fn hash_fn = btf_dedup_identity_hash_fn;
@@ -3250,13 +3273,11 @@ static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
 	if (!d)
 		return ERR_PTR(-ENOMEM);

-	d->opts.dont_resolve_fwds = opts && opts->dont_resolve_fwds;
-	/* dedup_table_size is now used only to force collisions in tests */
-	if (opts && opts->dedup_table_size == 1)
+	if (OPTS_GET(opts, force_collisions, false))
 		hash_fn = btf_dedup_collision_hash_fn;

 	d->btf = btf;
-	d->btf_ext = btf_ext;
+	d->btf_ext = OPTS_GET(opts, btf_ext, NULL);

 	d->dedup_table = hashmap__new(hash_fn, btf_dedup_equal_fn, NULL);
 	if (IS_ERR(d->dedup_table)) {
@@ -3265,7 +3286,7 @@ static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
 		goto done;
 	}

-	type_cnt = btf__get_nr_types(btf) + 1;
+	type_cnt = btf__type_cnt(btf);
 	d->map = malloc(sizeof(__u32) * type_cnt);
 	if (!d->map) {
 		err = -ENOMEM;
@@ -3427,7 +3448,7 @@ static bool btf_equal_common(struct btf_type *t1, struct btf_type *t2)
 }

 /* Calculate type signature hash of INT or TAG. */
-static long btf_hash_int_tag(struct btf_type *t)
+static long btf_hash_int_decl_tag(struct btf_type *t)
 {
 	__u32 info = *(__u32 *)(t + 1);
 	long h;
@@ -3520,8 +3541,8 @@ static long btf_hash_struct(struct btf_type *t)
 }

 /*
- * Check structural compatibility of two FUNC_PROTOs, ignoring referenced type
- * IDs. This check is performed during type graph equivalence check and
+ * Check structural compatibility of two STRUCTs/UNIONs, ignoring referenced
+ * type IDs. This check is performed during type graph equivalence check and
 * referenced types equivalence is checked separately.
 */
 static bool btf_shallow_equal_struct(struct btf_type *t1, struct btf_type *t2)
@@ -3702,11 +3723,12 @@ static int btf_dedup_prep(struct btf_dedup *d)
 		case BTF_KIND_TYPEDEF:
 		case BTF_KIND_FUNC:
 		case BTF_KIND_FLOAT:
+		case BTF_KIND_TYPE_TAG:
 			h = btf_hash_common(t);
 			break;
 		case BTF_KIND_INT:
-		case BTF_KIND_TAG:
-			h = btf_hash_int_tag(t);
+		case BTF_KIND_DECL_TAG:
+			h = btf_hash_int_decl_tag(t);
 			break;
 		case BTF_KIND_ENUM:
 			h = btf_hash_enum(t);
@@ -3761,11 +3783,12 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
 	case BTF_KIND_FUNC_PROTO:
 	case BTF_KIND_VAR:
 	case BTF_KIND_DATASEC:
-	case BTF_KIND_TAG:
+	case BTF_KIND_DECL_TAG:
+	case BTF_KIND_TYPE_TAG:
 		return 0;

 	case BTF_KIND_INT:
-		h = btf_hash_int_tag(t);
+		h = btf_hash_int_decl_tag(t);
 		for_each_dedup_cand(d, hash_entry, h) {
 			cand_id = (__u32)(long)hash_entry->value;
 			cand = btf_type_by_id(d->btf, cand_id);
@@ -3785,8 +3808,6 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
 				new_id = cand_id;
 				break;
 			}
-			if (d->opts.dont_resolve_fwds)
-				continue;
 			if (btf_compat_enum(t, cand)) {
 				if (btf_is_enum_fwd(t)) {
 					/* resolve fwd to full enum */
@@ -3894,6 +3915,31 @@ static int btf_dedup_identical_arrays(struct btf_dedup *d, __u32 id1, __u32 id2)
 	return btf_equal_array(t1, t2);
 }

+/* Check if given two types are identical STRUCT/UNION definitions */
+static bool btf_dedup_identical_structs(struct btf_dedup *d, __u32 id1, __u32 id2)
+{
+	const struct btf_member *m1, *m2;
+	struct btf_type *t1, *t2;
+	int n, i;
+
+	t1 = btf_type_by_id(d->btf, id1);
+	t2 = btf_type_by_id(d->btf, id2);
+
+	if (!btf_is_composite(t1) || btf_kind(t1) != btf_kind(t2))
+		return false;
+
+	if (!btf_shallow_equal_struct(t1, t2))
+		return false;
+
+	m1 = btf_members(t1);
+	m2 = btf_members(t2);
+	for (i = 0, n = btf_vlen(t1); i < n; i++, m1++, m2++) {
+		if (m1->type != m2->type)
+			return false;
+	}
+	return true;
+}
+
 /*
 * Check equivalence of BTF type graph formed by candidate struct/union (we'll
 * call it "candidate graph" in this description for brevity) to a type graph
@@ -4005,6 +4051,8 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,

 	hypot_type_id = d->hypot_map[canon_id];
 	if (hypot_type_id <= BTF_MAX_NR_TYPES) {
+		if (hypot_type_id == cand_id)
+			return 1;
 		/* In some cases compiler will generate different DWARF types
 		 * for *identical* array type definitions and use them for
 		 * different fields within the *same* struct. This breaks type
@@ -4013,8 +4061,18 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
 		 * types within a single CU. So work around that by explicitly
 		 * allowing identical array types here.
 		 */
-		return hypot_type_id == cand_id ||
-		       btf_dedup_identical_arrays(d, hypot_type_id, cand_id);
+		if (btf_dedup_identical_arrays(d, hypot_type_id, cand_id))
+			return 1;
+		/* It turns out that similar situation can happen with
+		 * struct/union sometimes, sigh... Handle the case where
+		 * structs/unions are exactly the same, down to the referenced
+		 * type IDs. Anything more complicated (e.g., if referenced
+		 * types are different, but equivalent) is *way more*
+		 * complicated and requires a many-to-many equivalence mapping.
+		 */
+		if (btf_dedup_identical_structs(d, hypot_type_id, cand_id))
+			return 1;
+		return 0;
 	}

 	if (btf_dedup_hypot_map_add(d, canon_id, cand_id))
@@ -4029,8 +4087,7 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
 		return 0;

 	/* FWD <--> STRUCT/UNION equivalence check, if enabled */
-	if (!d->opts.dont_resolve_fwds
-	    && (cand_kind == BTF_KIND_FWD || canon_kind == BTF_KIND_FWD)
+	if ((cand_kind == BTF_KIND_FWD || canon_kind == BTF_KIND_FWD)
 	    && cand_kind != canon_kind) {
 		__u16 real_kind;
 		__u16 fwd_kind;
@@ -4056,10 +4113,7 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
 		return btf_equal_int_tag(cand_type, canon_type);

 	case BTF_KIND_ENUM:
-		if (d->opts.dont_resolve_fwds)
-			return btf_equal_enum(cand_type, canon_type);
-		else
-			return btf_compat_enum(cand_type, canon_type);
+		return btf_compat_enum(cand_type, canon_type);

 	case BTF_KIND_FWD:
 	case BTF_KIND_FLOAT:
@@ -4071,6 +4125,7 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
 	case BTF_KIND_PTR:
 	case BTF_KIND_TYPEDEF:
 	case BTF_KIND_FUNC:
+	case BTF_KIND_TYPE_TAG:
 		if (cand_type->info != canon_type->info)
 			return 0;
 		return btf_dedup_is_equiv(d, cand_type->type, canon_type->type);
@@ -4366,6 +4421,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
 	case BTF_KIND_PTR:
 	case BTF_KIND_TYPEDEF:
 	case BTF_KIND_FUNC:
+	case BTF_KIND_TYPE_TAG:
 		ref_type_id = btf_dedup_ref_type(d, t->type);
 		if (ref_type_id < 0)
 			return ref_type_id;
@@ -4382,13 +4438,13 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
 		}
 		break;

-	case BTF_KIND_TAG:
+	case BTF_KIND_DECL_TAG:
 		ref_type_id = btf_dedup_ref_type(d, t->type);
 		if (ref_type_id < 0)
 			return ref_type_id;
 		t->type = ref_type_id;

-		h = btf_hash_int_tag(t);
+		h = btf_hash_int_decl_tag(t);
 		for_each_dedup_cand(d, hash_entry, h) {
 			cand_id = (__u32)(long)hash_entry->value;
 			cand = btf_type_by_id(d->btf, cand_id);
@@ -4671,7 +4727,8 @@ int btf_type_visit_type_ids(struct btf_type *t, type_id_visit_fn visit, void *ct
 	case BTF_KIND_TYPEDEF:
 	case BTF_KIND_FUNC:
 	case BTF_KIND_VAR:
-	case BTF_KIND_TAG:
+	case BTF_KIND_DECL_TAG:
+	case BTF_KIND_TYPE_TAG:
 		return visit(&t->type, ctx);

 	case BTF_KIND_ARRAY: {
--- a/src/btf.h
+++ b/src/btf.h
@@ -123,6 +123,7 @@ LIBBPF_API struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *b
 LIBBPF_DEPRECATED_SINCE(0, 6, "use btf__load_from_kernel_by_id instead")
 LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf);

+LIBBPF_DEPRECATED_SINCE(0, 6, "intended for internal libbpf use only")
 LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf);
 LIBBPF_DEPRECATED_SINCE(0, 6, "use btf__load_into_kernel instead")
 LIBBPF_API int btf__load(struct btf *btf);
@@ -131,7 +132,9 @@ LIBBPF_API __s32 btf__find_by_name(const struct btf *btf,
 				   const char *type_name);
 LIBBPF_API __s32 btf__find_by_name_kind(const struct btf *btf,
 					const char *type_name, __u32 kind);
+LIBBPF_DEPRECATED_SINCE(0, 7, "use btf__type_cnt() instead; note that btf__get_nr_types() == btf__type_cnt() - 1")
 LIBBPF_API __u32 btf__get_nr_types(const struct btf *btf);
+LIBBPF_API __u32 btf__type_cnt(const struct btf *btf);
 LIBBPF_API const struct btf *btf__base_btf(const struct btf *btf);
 LIBBPF_API const struct btf_type *btf__type_by_id(const struct btf *btf,
 						  __u32 id);
@@ -144,18 +147,18 @@ LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id);
 LIBBPF_API int btf__align_of(const struct btf *btf, __u32 id);
 LIBBPF_API int btf__fd(const struct btf *btf);
 LIBBPF_API void btf__set_fd(struct btf *btf, int fd);
-LIBBPF_API const void *btf__get_raw_data(const struct btf *btf, __u32 *size);
+LIBBPF_API const void *btf__raw_data(const struct btf *btf, __u32 *size);
 LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset);
 LIBBPF_API const char *btf__str_by_offset(const struct btf *btf, __u32 offset);
+LIBBPF_DEPRECATED_SINCE(0, 7, "this API is not necessary when BTF-defined maps are used")
 LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
 				    __u32 expected_key_size,
 				    __u32 expected_value_size,
 				    __u32 *key_type_id, __u32 *value_type_id);

-LIBBPF_API struct btf_ext *btf_ext__new(__u8 *data, __u32 size);
+LIBBPF_API struct btf_ext *btf_ext__new(const __u8 *data, __u32 size);
 LIBBPF_API void btf_ext__free(struct btf_ext *btf_ext);
-LIBBPF_API const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext,
-					     __u32 *size);
+LIBBPF_API const void *btf_ext__raw_data(const struct btf_ext *btf_ext, __u32 *size);
 LIBBPF_API LIBBPF_DEPRECATED("btf_ext__reloc_func_info was never meant as a public API and has wrong assumptions embedded in it; it will be removed in the future libbpf versions")
 int btf_ext__reloc_func_info(const struct btf *btf,
 			     const struct btf_ext *btf_ext,
@@ -166,8 +169,10 @@ int btf_ext__reloc_line_info(const struct btf *btf,
 			     const struct btf_ext *btf_ext,
 			     const char *sec_name, __u32 insns_cnt,
 			     void **line_info, __u32 *cnt);
-LIBBPF_API __u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext);
-LIBBPF_API __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext);
+LIBBPF_API LIBBPF_DEPRECATED("btf_ext__reloc_func_info is deprecated; write custom func_info parsing to fetch rec_size")
+__u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext);
+LIBBPF_API LIBBPF_DEPRECATED("btf_ext__reloc_line_info is deprecated; write custom line_info parsing to fetch rec_size")
+__u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext);

 LIBBPF_API int btf__find_str(struct btf *btf, const char *s);
 LIBBPF_API int btf__add_str(struct btf *btf, const char *s);
@@ -222,6 +227,7 @@ LIBBPF_API int btf__add_typedef(struct btf *btf, const char *name, int ref_type_
 LIBBPF_API int btf__add_volatile(struct btf *btf, int ref_type_id);
 LIBBPF_API int btf__add_const(struct btf *btf, int ref_type_id);
 LIBBPF_API int btf__add_restrict(struct btf *btf, int ref_type_id);
+LIBBPF_API int btf__add_type_tag(struct btf *btf, const char *value, int ref_type_id);

 /* func and func_proto construction APIs */
 LIBBPF_API int btf__add_func(struct btf *btf, const char *name,
@@ -236,29 +242,90 @@ LIBBPF_API int btf__add_datasec_var_info(struct btf *btf, int var_type_id,
 					 __u32 offset, __u32 byte_sz);

 /* tag construction API */
-LIBBPF_API int btf__add_tag(struct btf *btf, const char *value, int ref_type_id,
+LIBBPF_API int btf__add_decl_tag(struct btf *btf, const char *value, int ref_type_id,
 			    int component_idx);

 struct btf_dedup_opts {
-	unsigned int dedup_table_size;
-	bool dont_resolve_fwds;
+	size_t sz;
+	/* optional .BTF.ext info to dedup along the main BTF info */
+	struct btf_ext *btf_ext;
+	/* force hash collisions (used for testing) */
+	bool force_collisions;
+	size_t :0;
 };
+#define btf_dedup_opts__last_field force_collisions

-LIBBPF_API int btf__dedup(struct btf *btf, struct btf_ext *btf_ext,
-			  const struct btf_dedup_opts *opts);
+LIBBPF_API int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts);
+
+LIBBPF_API int btf__dedup_v0_6_0(struct btf *btf, const struct btf_dedup_opts *opts);
+
+LIBBPF_DEPRECATED_SINCE(0, 7, "use btf__dedup() instead")
+LIBBPF_API int btf__dedup_deprecated(struct btf *btf, struct btf_ext *btf_ext, const void *opts);
+#define btf__dedup(...) ___libbpf_overload(___btf_dedup, __VA_ARGS__)
+#define ___btf_dedup3(btf, btf_ext, opts) btf__dedup_deprecated(btf, btf_ext, opts)
+#define ___btf_dedup2(btf, opts) btf__dedup(btf, opts)

 struct btf_dump;

 struct btf_dump_opts {
-	void *ctx;
+	union {
+		size_t sz;
+		void *ctx; /* DEPRECATED: will be gone in v1.0 */
+	};
 };

 typedef void (*btf_dump_printf_fn_t)(void *ctx, const char *fmt, va_list args);

 LIBBPF_API struct btf_dump *btf_dump__new(const struct btf *btf,
-					  const struct btf_ext *btf_ext,
-					  const struct btf_dump_opts *opts,
-					  btf_dump_printf_fn_t printf_fn);
+					  btf_dump_printf_fn_t printf_fn,
+					  void *ctx,
+					  const struct btf_dump_opts *opts);
+
+LIBBPF_API struct btf_dump *btf_dump__new_v0_6_0(const struct btf *btf,
+						 btf_dump_printf_fn_t printf_fn,
+						 void *ctx,
+						 const struct btf_dump_opts *opts);
+
+LIBBPF_API struct btf_dump *btf_dump__new_deprecated(const struct btf *btf,
+						     const struct btf_ext *btf_ext,
+						     const struct btf_dump_opts *opts,
+						     btf_dump_printf_fn_t printf_fn);
+
+/* Choose either btf_dump__new() or btf_dump__new_deprecated() based on the
+ * type of 4th argument. If it's btf_dump's print callback, use deprecated
+ * API; otherwise, choose the new btf_dump__new(). ___libbpf_override()
+ * doesn't work here because both variants have 4 input arguments.
+ *
+ * (void *) casts are necessary to avoid compilation warnings about type
+ * mismatches, because even though __builtin_choose_expr() only ever evaluates
+ * one side the other side still has to satisfy type constraints (this is
+ * compiler implementation limitation which might be lifted eventually,
+ * according to the documentation). So passing struct btf_ext in place of
+ * btf_dump_printf_fn_t would be generating compilation warning.  Casting to
+ * void * avoids this issue.
+ *
+ * Also, two type compatibility checks for a function and function pointer are
+ * required because passing function reference into btf_dump__new() as
+ * btf_dump__new(..., my_callback, ...) and as btf_dump__new(...,
+ * &my_callback, ...) (not explicit ampersand in the latter case) actually
+ * differs as far as __builtin_types_compatible_p() is concerned. Thus two
+ * checks are combined to detect callback argument.
+ *
+ * The rest works just like in case of ___libbpf_override() usage with symbol
+ * versioning.
+ *
+ * C++ compilers don't support __builtin_types_compatible_p(), so at least
+ * don't screw up compilation for them and let C++ users pick btf_dump__new
+ * vs btf_dump__new_deprecated explicitly.
+ */
+#ifndef __cplusplus
+#define btf_dump__new(a1, a2, a3, a4) __builtin_choose_expr(				\
+	__builtin_types_compatible_p(typeof(a4), btf_dump_printf_fn_t) ||		\
+	__builtin_types_compatible_p(typeof(a4), void(void *, const char *, va_list)),	\
+	btf_dump__new_deprecated((void *)a1, (void *)a2, (void *)a3, (void *)a4),	\
+	btf_dump__new((void *)a1, (void *)a2, (void *)a3, (void *)a4))
+#endif
+
 LIBBPF_API void btf_dump__free(struct btf_dump *d);

 LIBBPF_API int btf_dump__dump_type(struct btf_dump *d, __u32 id);
@@ -308,8 +375,28 @@ btf_dump__dump_type_data(struct btf_dump *d, __u32 id,
 			 const struct btf_dump_type_data_opts *opts);

 /*
- * A set of helpers for easier BTF types handling
+ * A set of helpers for easier BTF types handling.
+ *
+ * The inline functions below rely on constants from the kernel headers which
+ * may not be available for applications including this header file. To avoid
+ * compilation errors, we define all the constants here that were added after
+ * the initial introduction of the BTF_KIND* constants.
 */
+#ifndef BTF_KIND_FUNC
+#define BTF_KIND_FUNC		12	/* Function	*/
+#define BTF_KIND_FUNC_PROTO	13	/* Function Proto	*/
+#endif
+#ifndef BTF_KIND_VAR
+#define BTF_KIND_VAR		14	/* Variable	*/
+#define BTF_KIND_DATASEC	15	/* Section	*/
+#endif
+#ifndef BTF_KIND_FLOAT
+#define BTF_KIND_FLOAT		16	/* Floating point	*/
+#endif
+/* The kernel header switched to enums, so these two were never #defined */
+#define BTF_KIND_DECL_TAG	17	/* Decl Tag */
+#define BTF_KIND_TYPE_TAG	18	/* Type Tag */
+
 static inline __u16 btf_kind(const struct btf_type *t)
 {
 	return BTF_INFO_KIND(t->info);
@@ -398,7 +485,8 @@ static inline bool btf_is_mod(const struct btf_type *t)

 	return kind == BTF_KIND_VOLATILE ||
 	       kind == BTF_KIND_CONST ||
-	       kind == BTF_KIND_RESTRICT;
+	       kind == BTF_KIND_RESTRICT ||
+	       kind == BTF_KIND_TYPE_TAG;
 }

 static inline bool btf_is_func(const struct btf_type *t)
@@ -426,9 +514,14 @@ static inline bool btf_is_float(const struct btf_type *t)
 	return btf_kind(t) == BTF_KIND_FLOAT;
 }

-static inline bool btf_is_tag(const struct btf_type *t)
+static inline bool btf_is_decl_tag(const struct btf_type *t)
 {
-	return btf_kind(t) == BTF_KIND_TAG;
+	return btf_kind(t) == BTF_KIND_DECL_TAG;
+}
+
+static inline bool btf_is_type_tag(const struct btf_type *t)
+{
+	return btf_kind(t) == BTF_KIND_TYPE_TAG;
 }

 static inline __u8 btf_int_encoding(const struct btf_type *t)
@@ -499,10 +592,10 @@ btf_var_secinfos(const struct btf_type *t)
 	return (struct btf_var_secinfo *)(t + 1);
 }

-struct btf_tag;
-static inline struct btf_tag *btf_tag(const struct btf_type *t)
+struct btf_decl_tag;
+static inline struct btf_decl_tag *btf_decl_tag(const struct btf_type *t)
 {
-	return (struct btf_tag *)(t + 1);
+	return (struct btf_decl_tag *)(t + 1);
 }

 #ifdef __cplusplus
--- a/src/btf_dump.c
+++ b/src/btf_dump.c
@@ -77,9 +77,8 @@ struct btf_dump_data {

 struct btf_dump {
 	const struct btf *btf;
-	const struct btf_ext *btf_ext;
 	btf_dump_printf_fn_t printf_fn;
-	struct btf_dump_opts opts;
+	void *cb_ctx;
 	int ptr_sz;
 	bool strip_mods;
 	bool skip_anon_defs;
@@ -138,29 +137,32 @@ static void btf_dump_printf(const struct btf_dump *d, const char *fmt, ...)
 	va_list args;

 	va_start(args, fmt);
-	d->printf_fn(d->opts.ctx, fmt, args);
+	d->printf_fn(d->cb_ctx, fmt, args);
 	va_end(args);
 }

 static int btf_dump_mark_referenced(struct btf_dump *d);
 static int btf_dump_resize(struct btf_dump *d);

-struct btf_dump *btf_dump__new(const struct btf *btf,
-			       const struct btf_ext *btf_ext,
-			       const struct btf_dump_opts *opts,
-			       btf_dump_printf_fn_t printf_fn)
+DEFAULT_VERSION(btf_dump__new_v0_6_0, btf_dump__new, LIBBPF_0.6.0)
+struct btf_dump *btf_dump__new_v0_6_0(const struct btf *btf,
+				      btf_dump_printf_fn_t printf_fn,
+				      void *ctx,
+				      const struct btf_dump_opts *opts)
 {
 	struct btf_dump *d;
 	int err;

+	if (!printf_fn)
+		return libbpf_err_ptr(-EINVAL);
+
 	d = calloc(1, sizeof(struct btf_dump));
 	if (!d)
 		return libbpf_err_ptr(-ENOMEM);

 	d->btf = btf;
-	d->btf_ext = btf_ext;
 	d->printf_fn = printf_fn;
-	d->opts.ctx = opts ? opts->ctx : NULL;
+	d->cb_ctx = ctx;
 	d->ptr_sz = btf__pointer_size(btf) ? : sizeof(void *);

 	d->type_names = hashmap__new(str_hash_fn, str_equal_fn, NULL);
@@ -186,9 +188,20 @@ err:
 	return libbpf_err_ptr(err);
 }

+COMPAT_VERSION(btf_dump__new_deprecated, btf_dump__new, LIBBPF_0.0.4)
+struct btf_dump *btf_dump__new_deprecated(const struct btf *btf,
+					  const struct btf_ext *btf_ext,
+					  const struct btf_dump_opts *opts,
+					  btf_dump_printf_fn_t printf_fn)
+{
+	if (!printf_fn)
+		return libbpf_err_ptr(-EINVAL);
+	return btf_dump__new_v0_6_0(btf, printf_fn, opts ? opts->ctx : NULL, opts);
+}
+
 static int btf_dump_resize(struct btf_dump *d)
 {
-	int err, last_id = btf__get_nr_types(d->btf);
+	int err, last_id = btf__type_cnt(d->btf) - 1;

 	if (last_id <= d->last_id)
 		return 0;
@@ -262,7 +275,7 @@ int btf_dump__dump_type(struct btf_dump *d, __u32 id)
 {
 	int err, i;

-	if (id > btf__get_nr_types(d->btf))
+	if (id >= btf__type_cnt(d->btf))
 		return libbpf_err(-EINVAL);

 	err = btf_dump_resize(d);
@@ -294,11 +307,11 @@ int btf_dump__dump_type(struct btf_dump *d, __u32 id)
 */
 static int btf_dump_mark_referenced(struct btf_dump *d)
 {
-	int i, j, n = btf__get_nr_types(d->btf);
+	int i, j, n = btf__type_cnt(d->btf);
 	const struct btf_type *t;
 	__u16 vlen;

-	for (i = d->last_id + 1; i <= n; i++) {
+	for (i = d->last_id + 1; i < n; i++) {
 		t = btf__type_by_id(d->btf, i);
 		vlen = btf_vlen(t);

@@ -316,7 +329,8 @@ static int btf_dump_mark_referenced(struct btf_dump *d)
 		case BTF_KIND_TYPEDEF:
 		case BTF_KIND_FUNC:
 		case BTF_KIND_VAR:
-		case BTF_KIND_TAG:
+		case BTF_KIND_DECL_TAG:
+		case BTF_KIND_TYPE_TAG:
 			d->type_states[t->type].referenced = 1;
 			break;

@@ -560,6 +574,7 @@ static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr)
 	case BTF_KIND_VOLATILE:
 	case BTF_KIND_CONST:
 	case BTF_KIND_RESTRICT:
+	case BTF_KIND_TYPE_TAG:
 		return btf_dump_order_type(d, t->type, through_ptr);

 	case BTF_KIND_FUNC_PROTO: {
@@ -584,7 +599,7 @@ static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr)
 	case BTF_KIND_FUNC:
 	case BTF_KIND_VAR:
 	case BTF_KIND_DATASEC:
-	case BTF_KIND_TAG:
+	case BTF_KIND_DECL_TAG:
 		d->type_states[id].order_state = ORDERED;
 		return 0;

@@ -734,6 +749,7 @@ static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id)
 	case BTF_KIND_VOLATILE:
 	case BTF_KIND_CONST:
 	case BTF_KIND_RESTRICT:
+	case BTF_KIND_TYPE_TAG:
 		btf_dump_emit_type(d, t->type, cont_id);
 		break;
 	case BTF_KIND_ARRAY:
@@ -1154,6 +1170,7 @@ skip_mod:
 		case BTF_KIND_CONST:
 		case BTF_KIND_RESTRICT:
 		case BTF_KIND_FUNC_PROTO:
+		case BTF_KIND_TYPE_TAG:
 			id = t->type;
 			break;
 		case BTF_KIND_ARRAY:
@@ -1322,6 +1339,11 @@ static void btf_dump_emit_type_chain(struct btf_dump *d,
 		case BTF_KIND_RESTRICT:
 			btf_dump_printf(d, " restrict");
 			break;
+		case BTF_KIND_TYPE_TAG:
+			btf_dump_emit_mods(d, decls);
+			name = btf_name_of(d, t->name_off);
+			btf_dump_printf(d, " __attribute__((btf_type_tag(\"%s\")))", name);
+			break;
 		case BTF_KIND_ARRAY: {
 			const struct btf_array *a = btf_array(t);
 			const struct btf_type *next_t;
@@ -1483,6 +1505,11 @@ static const char *btf_dump_resolve_name(struct btf_dump *d, __u32 id,
 	if (s->name_resolved)
 		return *cached_name ? *cached_name : orig_name;

+	if (btf_is_fwd(t) || (btf_is_enum(t) && btf_vlen(t) == 0)) {
+		s->name_resolved = 1;
+		return orig_name;
+	}
+
 	dup_cnt = btf_dump_name_dups(d, name_map, orig_name);
 	if (dup_cnt > 1) {
 		const size_t max_len = 256;
@@ -1562,29 +1589,28 @@ static int btf_dump_get_bitfield_value(struct btf_dump *d,
 				       __u64 *value)
 {
 	__u16 left_shift_bits, right_shift_bits;
-	__u8 nr_copy_bits, nr_copy_bytes;
 	const __u8 *bytes = data;
-	int sz = t->size;
+	__u8 nr_copy_bits;
 	__u64 num = 0;
 	int i;

 	/* Maximum supported bitfield size is 64 bits */
-	if (sz > 8) {
-		pr_warn("unexpected bitfield size %d\n", sz);
+	if (t->size > 8) {
+		pr_warn("unexpected bitfield size %d\n", t->size);
 		return -EINVAL;
 	}

 	/* Bitfield value retrieval is done in two steps; first relevant bytes are
 	 * stored in num, then we left/right shift num to eliminate irrelevant bits.
 	 */
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+	for (i = t->size - 1; i >= 0; i--)
+		num = num * 256 + bytes[i];
 	nr_copy_bits = bit_sz + bits_offset;
-	nr_copy_bytes = t->size;
-#if __BYTE_ORDER == __LITTLE_ENDIAN
-	for (i = nr_copy_bytes - 1; i >= 0; i--)
-		num = num * 256 + bytes[i];
-#elif __BYTE_ORDER == __BIG_ENDIAN
-	for (i = 0; i < nr_copy_bytes; i++)
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+	for (i = 0; i < t->size; i++)
 		num = num * 256 + bytes[i];
+	nr_copy_bits = t->size * 8 - bits_offset;
 #else
 # error "Unrecognized __BYTE_ORDER__"
 #endif
@@ -1658,9 +1684,15 @@ static int btf_dump_base_type_check_zero(struct btf_dump *d,
 	return 0;
 }

-static bool ptr_is_aligned(const void *data, int data_sz)
+static bool ptr_is_aligned(const struct btf *btf, __u32 type_id,
+			   const void *data)
 {
-	return ((uintptr_t)data) % data_sz == 0;
+	int alignment = btf__align_of(btf, type_id);
+
+	if (alignment == 0)
+		return false;
+
+	return ((uintptr_t)data) % alignment == 0;
 }

 static int btf_dump_int_data(struct btf_dump *d,
@@ -1671,9 +1703,10 @@ static int btf_dump_int_data(struct btf_dump *d,
 {
 	__u8 encoding = btf_int_encoding(t);
 	bool sign = encoding & BTF_INT_SIGNED;
+	char buf[16] __attribute__((aligned(16)));
 	int sz = t->size;

-	if (sz == 0) {
+	if (sz == 0 || sz > sizeof(buf)) {
 		pr_warn("unexpected size %d for id [%u]\n", sz, type_id);
 		return -EINVAL;
 	}
@@ -1681,8 +1714,10 @@ static int btf_dump_int_data(struct btf_dump *d,
 	/* handle packed int data - accesses of integers not aligned on
 	 * int boundaries can cause problems on some platforms.
 	 */
-	if (!ptr_is_aligned(data, sz))
-		return btf_dump_bitfield_data(d, t, data, 0, 0);
+	if (!ptr_is_aligned(d->btf, type_id, data)) {
+		memcpy(buf, data, sz);
+		data = buf;
+	}

 	switch (sz) {
 	case 16: {
@@ -1692,10 +1727,10 @@ static int btf_dump_int_data(struct btf_dump *d,
 		/* avoid use of __int128 as some 32-bit platforms do not
 		 * support it.
 		 */
-#if __BYTE_ORDER == __LITTLE_ENDIAN
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
 		lsi = ints[0];
 		msi = ints[1];
-#elif __BYTE_ORDER == __BIG_ENDIAN
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
 		lsi = ints[1];
 		msi = ints[0];
 #else
@@ -1768,7 +1803,7 @@ static int btf_dump_float_data(struct btf_dump *d,
 	int sz = t->size;

 	/* handle unaligned data; copy to local union */
-	if (!ptr_is_aligned(data, sz)) {
+	if (!ptr_is_aligned(d->btf, type_id, data)) {
 		memcpy(&fl, data, sz);
 		flp = &fl;
 	}
@@ -1831,14 +1866,16 @@ static int btf_dump_array_data(struct btf_dump *d,
 {
 	const struct btf_array *array = btf_array(t);
 	const struct btf_type *elem_type;
-	__u32 i, elem_size = 0, elem_type_id;
+	__u32 i, elem_type_id;
+	__s64 elem_size;
 	bool is_array_member;

 	elem_type_id = array->type;
 	elem_type = skip_mods_and_typedefs(d->btf, elem_type_id, NULL);
 	elem_size = btf__resolve_size(d->btf, elem_type_id);
 	if (elem_size <= 0) {
-		pr_warn("unexpected elem size %d for array type [%u]\n", elem_size, id);
+		pr_warn("unexpected elem size %zd for array type [%u]\n",
+			(ssize_t)elem_size, id);
 		return -EINVAL;
 	}

@@ -1931,7 +1968,7 @@ static int btf_dump_ptr_data(struct btf_dump *d,
 			      __u32 id,
 			      const void *data)
 {
-	if (ptr_is_aligned(data, d->ptr_sz) && d->ptr_sz == sizeof(void *)) {
+	if (ptr_is_aligned(d->btf, id, data) && d->ptr_sz == sizeof(void *)) {
 		btf_dump_type_values(d, "%p", *(void **)data);
 	} else {
 		union ptr_data pt;
@@ -1951,10 +1988,8 @@ static int btf_dump_get_enum_value(struct btf_dump *d,
 				   __u32 id,
 				   __s64 *value)
 {
-	int sz = t->size;
-
 	/* handle unaligned enum value */
-	if (!ptr_is_aligned(data, sz)) {
+	if (!ptr_is_aligned(d->btf, id, data)) {
 		__u64 val;
 		int err;

@@ -2188,7 +2223,7 @@ static int btf_dump_dump_type_data(struct btf_dump *d,
 				   __u8 bits_offset,
 				   __u8 bit_sz)
 {
-	int size, err;
+	int size, err = 0;

 	size = btf_dump_type_data_check_overflow(d, t, id, data, bits_offset);
 	if (size < 0)
@@ -2217,7 +2252,7 @@ static int btf_dump_dump_type_data(struct btf_dump *d,
 	case BTF_KIND_FWD:
 	case BTF_KIND_FUNC:
 	case BTF_KIND_FUNC_PROTO:
-	case BTF_KIND_TAG:
+	case BTF_KIND_DECL_TAG:
 		err = btf_dump_unsupported_data(d, t, id);
 		break;
 	case BTF_KIND_INT:
@@ -2293,8 +2328,8 @@ int btf_dump__dump_type_data(struct btf_dump *d, __u32 id,
 	if (!opts->indent_str)
 		d->typed_dump->indent_str[0] = '\t';
 	else
-		strncat(d->typed_dump->indent_str, opts->indent_str,
-			sizeof(d->typed_dump->indent_str) - 1);
+		libbpf_strlcpy(d->typed_dump->indent_str, opts->indent_str,
+			       sizeof(d->typed_dump->indent_str));

 	d->typed_dump->compact = OPTS_GET(opts, compact, false);
 	d->typed_dump->skip_names = OPTS_GET(opts, skip_names, false);
--- a/src/gen_loader.c
+++ b/src/gen_loader.c
@@ -13,11 +13,12 @@
 #include "hashmap.h"
 #include "bpf_gen_internal.h"
 #include "skel_internal.h"
+#include <asm/byteorder.h>

 #define MAX_USED_MAPS	64
 #define MAX_USED_PROGS	32
 #define MAX_KFUNC_DESCS 256
-#define MAX_FD_ARRAY_SZ (MAX_USED_PROGS + MAX_KFUNC_DESCS)
+#define MAX_FD_ARRAY_SZ (MAX_USED_MAPS + MAX_KFUNC_DESCS)

 /* The following structure describes the stack layout of the loader program.
 * In addition R6 contains the pointer to context.
@@ -32,8 +33,8 @@
 */
 struct loader_stack {
 	__u32 btf_fd;
-	__u32 prog_fd[MAX_USED_PROGS];
 	__u32 inner_map_fd;
+	__u32 prog_fd[MAX_USED_PROGS];
 };

 #define stack_off(field) \
@@ -41,6 +42,11 @@ struct loader_stack {

 #define attr_field(attr, field) (attr + offsetof(union bpf_attr, field))

+static int blob_fd_array_off(struct bpf_gen *gen, int index)
+{
+	return gen->fd_array + index * sizeof(int);
+}
+
 static int realloc_insn_buf(struct bpf_gen *gen, __u32 size)
 {
 	size_t off = gen->insn_cur - gen->insn_start;
@@ -101,11 +107,15 @@ static void emit2(struct bpf_gen *gen, struct bpf_insn insn1, struct bpf_insn in
 	emit(gen, insn2);
 }

-void bpf_gen__init(struct bpf_gen *gen, int log_level)
+static int add_data(struct bpf_gen *gen, const void *data, __u32 size);
+static void emit_sys_close_blob(struct bpf_gen *gen, int blob_off);
+
+void bpf_gen__init(struct bpf_gen *gen, int log_level, int nr_progs, int nr_maps)
 {
-	size_t stack_sz = sizeof(struct loader_stack);
+	size_t stack_sz = sizeof(struct loader_stack), nr_progs_sz;
 	int i;

+	gen->fd_array = add_data(gen, NULL, MAX_FD_ARRAY_SZ * sizeof(int));
 	gen->log_level = log_level;
 	/* save ctx pointer into R6 */
 	emit(gen, BPF_MOV64_REG(BPF_REG_6, BPF_REG_1));
@@ -117,19 +127,27 @@ void bpf_gen__init(struct bpf_gen *gen, int log_level)
 	emit(gen, BPF_MOV64_IMM(BPF_REG_3, 0));
 	emit(gen, BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel));

+	/* amount of stack actually used, only used to calculate iterations, not stack offset */
+	nr_progs_sz = offsetof(struct loader_stack, prog_fd[nr_progs]);
 	/* jump over cleanup code */
 	emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0,
-			      /* size of cleanup code below */
-			      (stack_sz / 4) * 3 + 2));
+			      /* size of cleanup code below (including map fd cleanup) */
+			      (nr_progs_sz / 4) * 3 + 2 +
+			      /* 6 insns for emit_sys_close_blob,
+			       * 6 insns for debug_regs in emit_sys_close_blob
+			       */
+			      nr_maps * (6 + (gen->log_level ? 6 : 0))));

 	/* remember the label where all error branches will jump to */
 	gen->cleanup_label = gen->insn_cur - gen->insn_start;
 	/* emit cleanup code: close all temp FDs */
-	for (i = 0; i < stack_sz; i += 4) {
+	for (i = 0; i < nr_progs_sz; i += 4) {
 		emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_10, -stack_sz + i));
 		emit(gen, BPF_JMP_IMM(BPF_JSLE, BPF_REG_1, 0, 1));
 		emit(gen, BPF_EMIT_CALL(BPF_FUNC_sys_close));
 	}
+	for (i = 0; i < nr_maps; i++)
+		emit_sys_close_blob(gen, blob_fd_array_off(gen, i));
 	/* R7 contains the error code from sys_bpf. Copy it into R0 and exit. */
 	emit(gen, BPF_MOV64_REG(BPF_REG_0, BPF_REG_7));
 	emit(gen, BPF_EXIT_INSN());
@@ -159,8 +177,6 @@ static int add_data(struct bpf_gen *gen, const void *data, __u32 size)
 */
 static int add_map_fd(struct bpf_gen *gen)
 {
-	if (!gen->fd_array)
-		gen->fd_array = add_data(gen, NULL, MAX_FD_ARRAY_SZ * sizeof(int));
 	if (gen->nr_maps == MAX_USED_MAPS) {
 		pr_warn("Total maps exceeds %d\n", MAX_USED_MAPS);
 		gen->error = -E2BIG;
@@ -173,8 +189,6 @@ static int add_kfunc_btf_fd(struct bpf_gen *gen)
 {
 	int cur;

-	if (!gen->fd_array)
-		gen->fd_array = add_data(gen, NULL, MAX_FD_ARRAY_SZ * sizeof(int));
 	if (gen->nr_fd_array == MAX_KFUNC_DESCS) {
 		cur = add_data(gen, NULL, sizeof(int));
 		return (cur - gen->fd_array) / sizeof(int);
@@ -182,11 +196,6 @@ static int add_kfunc_btf_fd(struct bpf_gen *gen)
 	return MAX_USED_MAPS + gen->nr_fd_array++;
 }

-static int blob_fd_array_off(struct bpf_gen *gen, int index)
-{
-	return gen->fd_array + index * sizeof(int);
-}
-
 static int insn_bytes_to_bpf_size(__u32 sz)
 {
 	switch (sz) {
@@ -358,10 +367,16 @@ static void emit_sys_close_blob(struct bpf_gen *gen, int blob_off)
 	__emit_sys_close(gen);
 }

-int bpf_gen__finish(struct bpf_gen *gen)
+int bpf_gen__finish(struct bpf_gen *gen, int nr_progs, int nr_maps)
 {
 	int i;

+	if (nr_progs < gen->nr_progs || nr_maps != gen->nr_maps) {
+		pr_warn("nr_progs %d/%d nr_maps %d/%d mismatch\n",
+			nr_progs, gen->nr_progs, nr_maps, gen->nr_maps);
+		gen->error = -EFAULT;
+		return gen->error;
+	}
 	emit_sys_close_stack(gen, stack_off(btf_fd));
 	for (i = 0; i < gen->nr_progs; i++)
 		move_stack2ctx(gen,
@@ -431,46 +446,32 @@ void bpf_gen__load_btf(struct bpf_gen *gen, const void *btf_raw_data,
 }

 void bpf_gen__map_create(struct bpf_gen *gen,
-			 struct bpf_create_map_attr *map_attr, int map_idx)
+			 enum bpf_map_type map_type,
+			 const char *map_name,
+			 __u32 key_size, __u32 value_size, __u32 max_entries,
+			 struct bpf_map_create_opts *map_attr, int map_idx)
 {
-	int attr_size = offsetofend(union bpf_attr, btf_vmlinux_value_type_id);
+	int attr_size = offsetofend(union bpf_attr, map_extra);
 	bool close_inner_map_fd = false;
 	int map_create_attr, idx;
 	union bpf_attr attr;

 	memset(&attr, 0, attr_size);
-	attr.map_type = map_attr->map_type;
-	attr.key_size = map_attr->key_size;
-	attr.value_size = map_attr->value_size;
+	attr.map_type = map_type;
+	attr.key_size = key_size;
+	attr.value_size = value_size;
 	attr.map_flags = map_attr->map_flags;
-	memcpy(attr.map_name, map_attr->name,
-	       min((unsigned)strlen(map_attr->name), BPF_OBJ_NAME_LEN - 1));
+	attr.map_extra = map_attr->map_extra;
+	if (map_name)
+		libbpf_strlcpy(attr.map_name, map_name, sizeof(attr.map_name));
 	attr.numa_node = map_attr->numa_node;
 	attr.map_ifindex = map_attr->map_ifindex;
-	attr.max_entries = map_attr->max_entries;
-	switch (attr.map_type) {
-	case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
-	case BPF_MAP_TYPE_CGROUP_ARRAY:
-	case BPF_MAP_TYPE_STACK_TRACE:
-	case BPF_MAP_TYPE_ARRAY_OF_MAPS:
-	case BPF_MAP_TYPE_HASH_OF_MAPS:
-	case BPF_MAP_TYPE_DEVMAP:
-	case BPF_MAP_TYPE_DEVMAP_HASH:
-	case BPF_MAP_TYPE_CPUMAP:
-	case BPF_MAP_TYPE_XSKMAP:
-	case BPF_MAP_TYPE_SOCKMAP:
-	case BPF_MAP_TYPE_SOCKHASH:
-	case BPF_MAP_TYPE_QUEUE:
-	case BPF_MAP_TYPE_STACK:
-	case BPF_MAP_TYPE_RINGBUF:
-		break;
-	default:
-		attr.btf_key_type_id = map_attr->btf_key_type_id;
-		attr.btf_value_type_id = map_attr->btf_value_type_id;
-	}
+	attr.max_entries = max_entries;
+	attr.btf_key_type_id = map_attr->btf_key_type_id;
+	attr.btf_value_type_id = map_attr->btf_value_type_id;

 	pr_debug("gen: map_create: %s idx %d type %d value_type_id %d\n",
-		 attr.map_name, map_idx, map_attr->map_type, attr.btf_value_type_id);
+		 attr.map_name, map_idx, map_type, attr.btf_value_type_id);

 	map_create_attr = add_data(gen, &attr, attr_size);
 	if (attr.btf_value_type_id)
@@ -497,7 +498,7 @@ void bpf_gen__map_create(struct bpf_gen *gen,
 	/* emit MAP_CREATE command */
 	emit_sys_bpf(gen, BPF_MAP_CREATE, map_create_attr, attr_size);
 	debug_ret(gen, "map_create %s idx %d type %d value_size %d value_btf_id %d",
-		  attr.map_name, map_idx, map_attr->map_type, attr.value_size,
+		  attr.map_name, map_idx, map_type, value_size,
 		  attr.btf_value_type_id);
 	emit_check_err(gen);
 	/* remember map_fd in the stack, if successful */
@@ -559,7 +560,7 @@ static void emit_find_attach_target(struct bpf_gen *gen)
 }

 void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, bool is_weak,
-			    int kind, int insn_idx)
+			    bool is_typeless, int kind, int insn_idx)
 {
 	struct ksym_relo_desc *relo;

@@ -572,6 +573,7 @@ void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, bool is_weak,
 	relo += gen->relo_cnt;
 	relo->name = name;
 	relo->is_weak = is_weak;
+	relo->is_typeless = is_typeless;
 	relo->kind = kind;
 	relo->insn_idx = insn_idx;
 	gen->relo_cnt++;
@@ -581,8 +583,9 @@ void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, bool is_weak,
 static struct ksym_desc *get_ksym_desc(struct bpf_gen *gen, struct ksym_relo_desc *relo)
 {
 	struct ksym_desc *kdesc;
+	int i;

-	for (int i = 0; i < gen->nr_ksyms; i++) {
+	for (i = 0; i < gen->nr_ksyms; i++) {
 		if (!strcmp(gen->ksyms[i].name, relo->name)) {
 			gen->ksyms[i].ref++;
 			return &gen->ksyms[i];
@@ -621,6 +624,29 @@ static void emit_bpf_find_by_name_kind(struct bpf_gen *gen, struct ksym_relo_des
 	debug_ret(gen, "find_by_name_kind(%s,%d)", relo->name, relo->kind);
 }

+/* Overwrites BPF_REG_{0, 1, 2, 3, 4, 7}
+ * Returns result in BPF_REG_7
+ * Returns u64 symbol addr in BPF_REG_9
+ */
+static void emit_bpf_kallsyms_lookup_name(struct bpf_gen *gen, struct ksym_relo_desc *relo)
+{
+	int name_off, len = strlen(relo->name) + 1, res_off;
+
+	name_off = add_data(gen, relo->name, len);
+	res_off = add_data(gen, NULL, 8); /* res is u64 */
+	emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE,
+					 0, 0, 0, name_off));
+	emit(gen, BPF_MOV64_IMM(BPF_REG_2, len));
+	emit(gen, BPF_MOV64_IMM(BPF_REG_3, 0));
+	emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_4, BPF_PSEUDO_MAP_IDX_VALUE,
+					 0, 0, 0, res_off));
+	emit(gen, BPF_MOV64_REG(BPF_REG_7, BPF_REG_4));
+	emit(gen, BPF_EMIT_CALL(BPF_FUNC_kallsyms_lookup_name));
+	emit(gen, BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0));
+	emit(gen, BPF_MOV64_REG(BPF_REG_7, BPF_REG_0));
+	debug_ret(gen, "kallsyms_lookup_name(%s,%d)", relo->name, relo->kind);
+}
+
 /* Expects:
 * BPF_REG_8 - pointer to instruction
 *
@@ -661,27 +687,29 @@ static void emit_relo_kfunc_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo
 		return;
 	}
 	kdesc->off = btf_fd_idx;
-	/* set a default value for imm */
+	/* jump to success case */
+	emit(gen, BPF_JMP_IMM(BPF_JSGE, BPF_REG_7, 0, 3));
+	/* set value for imm, off as 0 */
 	emit(gen, BPF_ST_MEM(BPF_W, BPF_REG_8, offsetof(struct bpf_insn, imm), 0));
-	/* skip success case store if ret < 0 */
-	emit(gen, BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0, 1));
+	emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), 0));
+	/* skip success case for ret < 0 */
+	emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 10));
 	/* store btf_id into insn[insn_idx].imm */
 	emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_7, offsetof(struct bpf_insn, imm)));
+	/* obtain fd in BPF_REG_9 */
+	emit(gen, BPF_MOV64_REG(BPF_REG_9, BPF_REG_7));
+	emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_9, 32));
+	/* jump to fd_array store if fd denotes module BTF */
+	emit(gen, BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0, 2));
+	/* set the default value for off */
+	emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), 0));
+	/* skip BTF fd store for vmlinux BTF */
+	emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 4));
 	/* load fd_array slot pointer */
 	emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE,
 					 0, 0, 0, blob_fd_array_off(gen, btf_fd_idx)));
-	/* skip store of BTF fd if ret < 0 */
-	emit(gen, BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0, 3));
 	/* store BTF fd in slot */
-	emit(gen, BPF_MOV64_REG(BPF_REG_9, BPF_REG_7));
-	emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_9, 32));
 	emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_9, 0));
-	/* set a default value for off */
-	emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), 0));
-	/* skip insn->off store if ret < 0 */
-	emit(gen, BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0, 2));
-	/* skip if vmlinux BTF */
-	emit(gen, BPF_JMP_IMM(BPF_JEQ, BPF_REG_9, 0, 1));
 	/* store index into insn[insn_idx].off */
 	emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), btf_fd_idx));
 log:
@@ -700,10 +728,27 @@ log:
 		   relo->name, kdesc->ref);
 }

+static void emit_ksym_relo_log(struct bpf_gen *gen, struct ksym_relo_desc *relo,
+			       int ref)
+{
+	if (!gen->log_level)
+		return;
+	emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_8,
+			      offsetof(struct bpf_insn, imm)));
+	emit(gen, BPF_LDX_MEM(BPF_H, BPF_REG_9, BPF_REG_8, sizeof(struct bpf_insn) +
+			      offsetof(struct bpf_insn, imm)));
+	debug_regs(gen, BPF_REG_7, BPF_REG_9, " var t=%d w=%d (%s:count=%d): imm[0]: %%d, imm[1]: %%d",
+		   relo->is_typeless, relo->is_weak, relo->name, ref);
+	emit(gen, BPF_LDX_MEM(BPF_B, BPF_REG_9, BPF_REG_8, offsetofend(struct bpf_insn, code)));
+	debug_regs(gen, BPF_REG_9, -1, " var t=%d w=%d (%s:count=%d): insn.reg",
+		   relo->is_typeless, relo->is_weak, relo->name, ref);
+}
+
 /* Expects:
 * BPF_REG_8 - pointer to instruction
 */
-static void emit_relo_ksym_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo, int insn)
+static void emit_relo_ksym_typeless(struct bpf_gen *gen,
+				    struct ksym_relo_desc *relo, int insn)
 {
 	struct ksym_desc *kdesc;

@@ -718,25 +763,99 @@ static void emit_relo_ksym_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo,
 			       kdesc->insn + sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm));
 		goto log;
 	}
+	/* remember insn offset, so we can copy ksym addr later */
+	kdesc->insn = insn;
+	/* skip typeless ksym_desc in fd closing loop in cleanup_relos */
+	kdesc->typeless = true;
+	emit_bpf_kallsyms_lookup_name(gen, relo);
+	emit(gen, BPF_JMP_IMM(BPF_JEQ, BPF_REG_7, -ENOENT, 1));
+	emit_check_err(gen);
+	/* store lower half of addr into insn[insn_idx].imm */
+	emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_9, offsetof(struct bpf_insn, imm)));
+	/* store upper half of addr into insn[insn_idx + 1].imm */
+	emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_9, 32));
+	emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_9,
+		      sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm)));
+log:
+	emit_ksym_relo_log(gen, relo, kdesc->ref);
+}
+
+static __u32 src_reg_mask(void)
+{
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+	return 0x0f; /* src_reg,dst_reg,... */
+#elif defined(__BIG_ENDIAN_BITFIELD)
+	return 0xf0; /* dst_reg,src_reg,... */
+#else
+#error "Unsupported bit endianness, cannot proceed"
+#endif
+}
+
+/* Expects:
+ * BPF_REG_8 - pointer to instruction
+ */
+static void emit_relo_ksym_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo, int insn)
+{
+	struct ksym_desc *kdesc;
+	__u32 reg_mask;
+
+	kdesc = get_ksym_desc(gen, relo);
+	if (!kdesc)
+		return;
+	/* try to copy from existing ldimm64 insn */
+	if (kdesc->ref > 1) {
+		move_blob2blob(gen, insn + offsetof(struct bpf_insn, imm), 4,
+			       kdesc->insn + offsetof(struct bpf_insn, imm));
+		move_blob2blob(gen, insn + sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm), 4,
+			       kdesc->insn + sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm));
+		/* jump over src_reg adjustment if imm is not 0, reuse BPF_REG_0 from move_blob2blob */
+		emit(gen, BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3));
+		goto clear_src_reg;
+	}
 	/* remember insn offset, so we can copy BTF ID and FD later */
 	kdesc->insn = insn;
 	emit_bpf_find_by_name_kind(gen, relo);
-	emit_check_err(gen);
+	if (!relo->is_weak)
+		emit_check_err(gen);
+	/* jump to success case */
+	emit(gen, BPF_JMP_IMM(BPF_JSGE, BPF_REG_7, 0, 3));
+	/* set values for insn[insn_idx].imm, insn[insn_idx + 1].imm as 0 */
+	emit(gen, BPF_ST_MEM(BPF_W, BPF_REG_8, offsetof(struct bpf_insn, imm), 0));
+	emit(gen, BPF_ST_MEM(BPF_W, BPF_REG_8, sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm), 0));
+	/* skip success case for ret < 0 */
+	emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 4));
 	/* store btf_id into insn[insn_idx].imm */
 	emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_7, offsetof(struct bpf_insn, imm)));
 	/* store btf_obj_fd into insn[insn_idx + 1].imm */
 	emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_7, 32));
 	emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_7,
 			      sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm)));
-log:
-	if (!gen->log_level)
+	/* skip src_reg adjustment */
+	emit(gen, BPF_JMP_IMM(BPF_JSGE, BPF_REG_7, 0, 3));
+clear_src_reg:
+	/* clear bpf_object__relocate_data's src_reg assignment, otherwise we get a verifier failure */
+	reg_mask = src_reg_mask();
+	emit(gen, BPF_LDX_MEM(BPF_B, BPF_REG_9, BPF_REG_8, offsetofend(struct bpf_insn, code)));
+	emit(gen, BPF_ALU32_IMM(BPF_AND, BPF_REG_9, reg_mask));
+	emit(gen, BPF_STX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, offsetofend(struct bpf_insn, code)));
+
+	emit_ksym_relo_log(gen, relo, kdesc->ref);
+}
+
+void bpf_gen__record_relo_core(struct bpf_gen *gen,
+			       const struct bpf_core_relo *core_relo)
+{
+	struct bpf_core_relo *relos;
+
+	relos = libbpf_reallocarray(gen->core_relos, gen->core_relo_cnt + 1, sizeof(*relos));
+	if (!relos) {
+		gen->error = -ENOMEM;
 		return;
-	emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_8,
-			      offsetof(struct bpf_insn, imm)));
-	emit(gen, BPF_LDX_MEM(BPF_H, BPF_REG_9, BPF_REG_8, sizeof(struct bpf_insn) +
-			      offsetof(struct bpf_insn, imm)));
-	debug_regs(gen, BPF_REG_7, BPF_REG_9, " var (%s:count=%d): imm: %%d, fd: %%d",
-		   relo->name, kdesc->ref);
+	}
+	gen->core_relos = relos;
+	relos += gen->core_relo_cnt;
+	memcpy(relos, core_relo, sizeof(*relos));
+	gen->core_relo_cnt++;
 }

 static void emit_relo(struct bpf_gen *gen, struct ksym_relo_desc *relo, int insns)
@@ -748,7 +867,10 @@ static void emit_relo(struct bpf_gen *gen, struct ksym_relo_desc *relo, int insn
 	emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_8, BPF_PSEUDO_MAP_IDX_VALUE, 0, 0, 0, insn));
 	switch (relo->kind) {
 	case BTF_KIND_VAR:
-		emit_relo_ksym_btf(gen, relo, insn);
+		if (relo->is_typeless)
+			emit_relo_ksym_typeless(gen, relo, insn);
+		else
+			emit_relo_ksym_btf(gen, relo, insn);
 		break;
 	case BTF_KIND_FUNC:
 		emit_relo_kfunc_btf(gen, relo, insn);
@@ -768,17 +890,27 @@ static void emit_relos(struct bpf_gen *gen, int insns)
 		emit_relo(gen, gen->relos + i, insns);
 }

+static void cleanup_core_relo(struct bpf_gen *gen)
+{
+	if (!gen->core_relo_cnt)
+		return;
+	free(gen->core_relos);
+	gen->core_relo_cnt = 0;
+	gen->core_relos = NULL;
+}
+
 static void cleanup_relos(struct bpf_gen *gen, int insns)
 {
 	int i, insn;

 	for (i = 0; i < gen->nr_ksyms; i++) {
-		if (gen->ksyms[i].kind == BTF_KIND_VAR) {
+		/* only close fds for typed ksyms and kfuncs */
+		if (gen->ksyms[i].kind == BTF_KIND_VAR && !gen->ksyms[i].typeless) {
 			/* close fd recorded in insn[insn_idx + 1].imm */
 			insn = gen->ksyms[i].insn;
 			insn += sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm);
 			emit_sys_close_blob(gen, insn);
-		} else { /* BTF_KIND_FUNC */
+		} else if (gen->ksyms[i].kind == BTF_KIND_FUNC) {
 			emit_sys_close_blob(gen, blob_fd_array_off(gen, gen->ksyms[i].off));
 			if (gen->ksyms[i].off < MAX_FD_ARRAY_SZ)
 				gen->nr_fd_array--;
@@ -794,30 +926,32 @@ static void cleanup_relos(struct bpf_gen *gen, int insns)
 		gen->relo_cnt = 0;
 		gen->relos = NULL;
 	}
+	cleanup_core_relo(gen);
 }

 void bpf_gen__prog_load(struct bpf_gen *gen,
-			struct bpf_prog_load_params *load_attr, int prog_idx)
+			enum bpf_prog_type prog_type, const char *prog_name,
+			const char *license, struct bpf_insn *insns, size_t insn_cnt,
+			struct bpf_prog_load_opts *load_attr, int prog_idx)
 {
-	int attr_size = offsetofend(union bpf_attr, fd_array);
-	int prog_load_attr, license, insns, func_info, line_info;
+	int prog_load_attr, license_off, insns_off, func_info, line_info, core_relos;
+	int attr_size = offsetofend(union bpf_attr, core_relo_rec_size);
 	union bpf_attr attr;

 	memset(&attr, 0, attr_size);
-	pr_debug("gen: prog_load: type %d insns_cnt %zd\n",
-		 load_attr->prog_type, load_attr->insn_cnt);
+	pr_debug("gen: prog_load: type %d insns_cnt %zd progi_idx %d\n",
+		 prog_type, insn_cnt, prog_idx);
 	/* add license string to blob of bytes */
-	license = add_data(gen, load_attr->license, strlen(load_attr->license) + 1);
+	license_off = add_data(gen, license, strlen(license) + 1);
 	/* add insns to blob of bytes */
-	insns = add_data(gen, load_attr->insns,
-			 load_attr->insn_cnt * sizeof(struct bpf_insn));
+	insns_off = add_data(gen, insns, insn_cnt * sizeof(struct bpf_insn));

-	attr.prog_type = load_attr->prog_type;
+	attr.prog_type = prog_type;
 	attr.expected_attach_type = load_attr->expected_attach_type;
 	attr.attach_btf_id = load_attr->attach_btf_id;
 	attr.prog_ifindex = load_attr->prog_ifindex;
 	attr.kern_version = 0;
-	attr.insn_cnt = (__u32)load_attr->insn_cnt;
+	attr.insn_cnt = (__u32)insn_cnt;
 	attr.prog_flags = load_attr->prog_flags;

 	attr.func_info_rec_size = load_attr->func_info_rec_size;
@@ -830,15 +964,19 @@ void bpf_gen__prog_load(struct bpf_gen *gen,
 	line_info = add_data(gen, load_attr->line_info,
 			     attr.line_info_cnt * attr.line_info_rec_size);

-	memcpy(attr.prog_name, load_attr->name,
-	       min((unsigned)strlen(load_attr->name), BPF_OBJ_NAME_LEN - 1));
+	attr.core_relo_rec_size = sizeof(struct bpf_core_relo);
+	attr.core_relo_cnt = gen->core_relo_cnt;
+	core_relos = add_data(gen, gen->core_relos,
+			     attr.core_relo_cnt * attr.core_relo_rec_size);
+
+	libbpf_strlcpy(attr.prog_name, prog_name, sizeof(attr.prog_name));
 	prog_load_attr = add_data(gen, &attr, attr_size);

 	/* populate union bpf_attr with a pointer to license */
-	emit_rel_store(gen, attr_field(prog_load_attr, license), license);
+	emit_rel_store(gen, attr_field(prog_load_attr, license), license_off);

 	/* populate union bpf_attr with a pointer to instructions */
-	emit_rel_store(gen, attr_field(prog_load_attr, insns), insns);
+	emit_rel_store(gen, attr_field(prog_load_attr, insns), insns_off);

 	/* populate union bpf_attr with a pointer to func_info */
 	emit_rel_store(gen, attr_field(prog_load_attr, func_info), func_info);
@@ -846,6 +984,9 @@ void bpf_gen__prog_load(struct bpf_gen *gen,
 	/* populate union bpf_attr with a pointer to line_info */
 	emit_rel_store(gen, attr_field(prog_load_attr, line_info), line_info);

+	/* populate union bpf_attr with a pointer to core_relos */
+	emit_rel_store(gen, attr_field(prog_load_attr, core_relos), core_relos);
+
 	/* populate union bpf_attr fd_array with a pointer to data where map_fds are saved */
 	emit_rel_store(gen, attr_field(prog_load_attr, fd_array), gen->fd_array);

@@ -870,15 +1011,17 @@ void bpf_gen__prog_load(struct bpf_gen *gen,
 		emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_7,
 				      offsetof(union bpf_attr, attach_btf_obj_fd)));
 	}
-	emit_relos(gen, insns);
+	emit_relos(gen, insns_off);
 	/* emit PROG_LOAD command */
 	emit_sys_bpf(gen, BPF_PROG_LOAD, prog_load_attr, attr_size);
 	debug_ret(gen, "prog_load %s insn_cnt %d", attr.prog_name, attr.insn_cnt);
 	/* successful or not, close btf module FDs used in extern ksyms and attach_btf_obj_fd */
-	cleanup_relos(gen, insns);
-	if (gen->attach_kind)
+	cleanup_relos(gen, insns_off);
+	if (gen->attach_kind) {
 		emit_sys_close_blob(gen,
 				    attr_field(prog_load_attr, attach_btf_obj_fd));
+		gen->attach_kind = 0;
+	}
 	emit_check_err(gen);
 	/* remember prog_fd in the stack, if successful */
 	emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_7,
@@ -900,18 +1043,27 @@ void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *pvalue,
 	value = add_data(gen, pvalue, value_size);
 	key = add_data(gen, &zero, sizeof(zero));

-	/* if (map_desc[map_idx].initial_value)
-	 *    copy_from_user(value, initial_value, value_size);
+	/* if (map_desc[map_idx].initial_value) {
+	 *    if (ctx->flags & BPF_SKEL_KERNEL)
+	 *        bpf_probe_read_kernel(value, value_size, initial_value);
+	 *    else
+	 *        bpf_copy_from_user(value, value_size, initial_value);
+	 * }
 	 */
 	emit(gen, BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_6,
 			      sizeof(struct bpf_loader_ctx) +
 			      sizeof(struct bpf_map_desc) * map_idx +
 			      offsetof(struct bpf_map_desc, initial_value)));
-	emit(gen, BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0, 4));
+	emit(gen, BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0, 8));
 	emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE,
 					 0, 0, 0, value));
 	emit(gen, BPF_MOV64_IMM(BPF_REG_2, value_size));
+	emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6,
+			      offsetof(struct bpf_loader_ctx, flags)));
+	emit(gen, BPF_JMP_IMM(BPF_JSET, BPF_REG_0, BPF_SKEL_KERNEL, 2));
 	emit(gen, BPF_EMIT_CALL(BPF_FUNC_copy_from_user));
+	emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 1));
+	emit(gen, BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel));

 	map_update_attr = add_data(gen, &attr, attr_size);
 	move_blob2blob(gen, attr_field(map_update_attr, map_fd), 4,
@@ -924,6 +1076,33 @@ void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *pvalue,
 	emit_check_err(gen);
 }

+void bpf_gen__populate_outer_map(struct bpf_gen *gen, int outer_map_idx, int slot,
+				 int inner_map_idx)
+{
+	int attr_size = offsetofend(union bpf_attr, flags);
+	int map_update_attr, key;
+	union bpf_attr attr;
+
+	memset(&attr, 0, attr_size);
+	pr_debug("gen: populate_outer_map: outer %d key %d inner %d\n",
+		 outer_map_idx, slot, inner_map_idx);
+
+	key = add_data(gen, &slot, sizeof(slot));
+
+	map_update_attr = add_data(gen, &attr, attr_size);
+	move_blob2blob(gen, attr_field(map_update_attr, map_fd), 4,
+		       blob_fd_array_off(gen, outer_map_idx));
+	emit_rel_store(gen, attr_field(map_update_attr, key), key);
+	emit_rel_store(gen, attr_field(map_update_attr, value),
+		       blob_fd_array_off(gen, inner_map_idx));
+
+	/* emit MAP_UPDATE_ELEM command */
+	emit_sys_bpf(gen, BPF_MAP_UPDATE_ELEM, map_update_attr, attr_size);
+	debug_ret(gen, "populate_outer_map outer %d key %d inner %d",
+		  outer_map_idx, slot, inner_map_idx);
+	emit_check_err(gen);
+}
+
 void bpf_gen__map_freeze(struct bpf_gen *gen, int map_idx)
 {
 	int attr_size = offsetofend(union bpf_attr, map_fd);
--- a/src/hashmap.c
+++ b/src/hashmap.c
@@ -75,7 +75,7 @@ void hashmap__clear(struct hashmap *map)

 void hashmap__free(struct hashmap *map)
 {
-	if (!map)
+	if (IS_ERR_OR_NULL(map))
 		return;

 	hashmap__clear(map);
@@ -238,4 +238,3 @@ bool hashmap__delete(struct hashmap *map, const void *key,

 	return true;
 }
-
--- a/src/libbpf.c
+++ b/src/libbpf.c
--- a/src/libbpf.h
+++ b/src/libbpf.h
--- a/src/libbpf.map
+++ b/src/libbpf.map
@@ -247,6 +247,7 @@ LIBBPF_0.0.8 {
 		bpf_link_create;
 		bpf_link_update;
 		bpf_map__set_initial_value;
+		bpf_prog_attach_opts;
 		bpf_program__attach_cgroup;
 		bpf_program__attach_lsm;
 		bpf_program__is_lsm;
@@ -389,10 +390,72 @@ LIBBPF_0.5.0 {

 LIBBPF_0.6.0 {
 	global:
+		bpf_map__map_extra;
+		bpf_map__set_map_extra;
+		bpf_map_create;
 		bpf_object__next_map;
 		bpf_object__next_program;
 		bpf_object__prev_map;
 		bpf_object__prev_program;
+		bpf_prog_load_deprecated;
+		bpf_prog_load;
+		bpf_program__flags;
+		bpf_program__insn_cnt;
+		bpf_program__insns;
+		bpf_program__set_flags;
 		btf__add_btf;
-		btf__add_tag;
+		btf__add_decl_tag;
+		btf__add_type_tag;
+		btf__dedup;
+		btf__dedup_deprecated;
+		btf__raw_data;
+		btf__type_cnt;
+		btf_dump__new;
+		btf_dump__new_deprecated;
+		libbpf_major_version;
+		libbpf_minor_version;
+		libbpf_version_string;
+		perf_buffer__new;
+		perf_buffer__new_deprecated;
+		perf_buffer__new_raw;
+		perf_buffer__new_raw_deprecated;
 } LIBBPF_0.5.0;
+
+LIBBPF_0.7.0 {
+	global:
+		bpf_btf_load;
+		bpf_program__expected_attach_type;
+		bpf_program__log_buf;
+		bpf_program__log_level;
+		bpf_program__set_log_buf;
+		bpf_program__set_log_level;
+		bpf_program__type;
+		bpf_xdp_attach;
+		bpf_xdp_detach;
+		bpf_xdp_query;
+		bpf_xdp_query_id;
+		libbpf_probe_bpf_helper;
+		libbpf_probe_bpf_map_type;
+		libbpf_probe_bpf_prog_type;
+		libbpf_set_memlock_rlim_max;
+} LIBBPF_0.6.0;
+
+LIBBPF_0.8.0 {
+	global:
+		bpf_map__autocreate;
+		bpf_map__get_next_key;
+		bpf_map__delete_elem;
+		bpf_map__lookup_and_delete_elem;
+		bpf_map__lookup_elem;
+		bpf_map__set_autocreate;
+		bpf_map__update_elem;
+		bpf_map_delete_elem_flags;
+		bpf_object__destroy_subskeleton;
+		bpf_object__open_subskeleton;
+		bpf_program__attach_kprobe_multi_opts;
+		bpf_program__attach_trace_opts;
+		bpf_program__attach_usdt;
+		bpf_program__set_insns;
+		libbpf_register_prog_handler;
+		libbpf_unregister_prog_handler;
+} LIBBPF_0.7.0;
--- a/src/libbpf_common.h
+++ b/src/libbpf_common.h
@@ -40,6 +40,23 @@
 #else
 #define __LIBBPF_MARK_DEPRECATED_0_7(X)
 #endif
+#if __LIBBPF_CURRENT_VERSION_GEQ(0, 8)
+#define __LIBBPF_MARK_DEPRECATED_0_8(X) X
+#else
+#define __LIBBPF_MARK_DEPRECATED_0_8(X)
+#endif
+
+/* This set of internal macros allows to do "function overloading" based on
+ * number of arguments provided by used in backwards-compatible way during the
+ * transition to libbpf 1.0
+ * It's ugly but necessary evil that will be cleaned up when we get to 1.0.
+ * See bpf_prog_load() overload for example.
+ */
+#define ___libbpf_cat(A, B) A ## B
+#define ___libbpf_select(NAME, NUM) ___libbpf_cat(NAME, NUM)
+#define ___libbpf_nth(_1, _2, _3, _4, _5, _6, N, ...) N
+#define ___libbpf_cnt(...) ___libbpf_nth(__VA_ARGS__, 6, 5, 4, 3, 2, 1)
+#define ___libbpf_overload(NAME, ...) ___libbpf_select(NAME, ___libbpf_cnt(__VA_ARGS__))(__VA_ARGS__)

 /* Helper macro to declare and initialize libbpf options struct
 *
@@ -54,7 +71,7 @@
 * including any extra padding, it with memset() and then assigns initial
 * values provided by users in struct initializer-syntax as varargs.
 */
-#define DECLARE_LIBBPF_OPTS(TYPE, NAME, ...)				    \
+#define LIBBPF_OPTS(TYPE, NAME, ...)					    \
 	struct TYPE NAME = ({ 						    \
 		memset(&NAME, 0, sizeof(struct TYPE));			    \
 		(struct TYPE) {						    \
--- a/src/libbpf_internal.h
+++ b/src/libbpf_internal.h
@@ -13,6 +13,8 @@
 #include <limits.h>
 #include <errno.h>
 #include <linux/err.h>
+#include <fcntl.h>
+#include <unistd.h>
 #include "libbpf_legacy.h"
 #include "relo_core.h"

@@ -52,8 +54,8 @@
 #endif

 /* Older libelf all end up in this expression, for both 32 and 64 bit */
-#ifndef GELF_ST_VISIBILITY
-#define GELF_ST_VISIBILITY(o) ((o) & 0x03)
+#ifndef ELF64_ST_VISIBILITY
+#define ELF64_ST_VISIBILITY(o) ((o) & 0x03)
 #endif

 #define BTF_INFO_ENC(kind, kind_flag, vlen) \
@@ -69,8 +71,10 @@
 #define BTF_VAR_SECINFO_ENC(type, offset, size) (type), (offset), (size)
 #define BTF_TYPE_FLOAT_ENC(name, sz) \
 	BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FLOAT, 0, 0), sz)
-#define BTF_TYPE_TAG_ENC(value, type, component_idx) \
-	BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_TAG, 0, 0), type), (component_idx)
+#define BTF_TYPE_DECL_TAG_ENC(value, type, component_idx) \
+	BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 0, 0), type), (component_idx)
+#define BTF_TYPE_TYPE_TAG_ENC(value, type) \
+	BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_TYPE_TAG, 0, 0), type)

 #ifndef likely
 #define likely(x) __builtin_expect(!!(x), 1)
@@ -88,6 +92,9 @@
 # define offsetofend(TYPE, FIELD) \
 	(offsetof(TYPE, FIELD) + sizeof(((TYPE *)0)->FIELD))
 #endif
+#ifndef __alias
+#define __alias(symbol) __attribute__((alias(#symbol)))
+#endif

 /* Check whether a string `str` has prefix `pfx`, regardless if `pfx` is
 * a string literal known at compilation time or char * pointer known only at
@@ -96,6 +103,17 @@
 #define str_has_pfx(str, pfx) \
 	(strncmp(str, pfx, __builtin_constant_p(pfx) ? sizeof(pfx) - 1 : strlen(pfx)) == 0)

+/* suffix check */
+static inline bool str_has_sfx(const char *str, const char *sfx)
+{
+	size_t str_len = strlen(str);
+	size_t sfx_len = strlen(sfx);
+
+	if (sfx_len <= str_len)
+		return strcmp(str + str_len - sfx_len, sfx);
+	return false;
+}
+
 /* Symbol versioning is different between static and shared library.
 * Properly versioned symbols are needed for shared library, but
 * only the symbol of the new version is needed for static library.
@@ -141,6 +159,15 @@ do {				\
 #ifndef __has_builtin
 #define __has_builtin(x) 0
 #endif
+
+struct bpf_link {
+	int (*detach)(struct bpf_link *link);
+	void (*dealloc)(struct bpf_link *link);
+	char *pin_path;		/* NULL, if not pinned */
+	int fd;			/* hook FD, -1 if not applicable */
+	bool disconnected;
+};
+
 /*
 * Re-implement glibc's reallocarray() for libbpf internal-only use.
 * reallocarray(), unfortunately, is not available in all versions of glibc,
@@ -165,10 +192,31 @@ static inline void *libbpf_reallocarray(void *ptr, size_t nmemb, size_t size)
 	return realloc(ptr, total);
 }

+/* Copy up to sz - 1 bytes from zero-terminated src string and ensure that dst
+ * is zero-terminated string no matter what (unless sz == 0, in which case
+ * it's a no-op). It's conceptually close to FreeBSD's strlcpy(), but differs
+ * in what is returned. Given this is internal helper, it's trivial to extend
+ * this, when necessary. Use this instead of strncpy inside libbpf source code.
+ */
+static inline void libbpf_strlcpy(char *dst, const char *src, size_t sz)
+{
+	size_t i;
+
+	if (sz == 0)
+		return;
+
+	sz--;
+	for (i = 0; i < sz && src[i]; i++)
+		dst[i] = src[i];
+	dst[i] = '\0';
+}
+
+__u32 get_kernel_version(void);
+
 struct btf;
 struct btf_type;

-struct btf_type *btf_type_by_id(struct btf *btf, __u32 type_id);
+struct btf_type *btf_type_by_id(const struct btf *btf, __u32 type_id);
 const char *btf_kind_str(const struct btf_type *t);
 const struct btf_type *skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id);

@@ -193,8 +241,9 @@ enum map_def_parts {
 	MAP_DEF_NUMA_NODE	= 0x080,
 	MAP_DEF_PINNING		= 0x100,
 	MAP_DEF_INNER_MAP	= 0x200,
+	MAP_DEF_MAP_EXTRA	= 0x400,

-	MAP_DEF_ALL		= 0x3ff, /* combination of all above */
+	MAP_DEF_ALL		= 0x7ff, /* combination of all above */
 };

 struct btf_map_def {
@@ -208,6 +257,7 @@ struct btf_map_def {
 	__u32 map_flags;
 	__u32 numa_node;
 	__u32 pinning;
+	__u64 map_extra;
 };

 int parse_btf_map_def(const char *map_name, struct btf *btf,
@@ -266,47 +316,54 @@ static inline bool libbpf_validate_opts(const char *opts,
 					(opts)->sz - __off);		      \
 })

+enum kern_feature_id {
+	/* v4.14: kernel support for program & map names. */
+	FEAT_PROG_NAME,
+	/* v5.2: kernel support for global data sections. */
+	FEAT_GLOBAL_DATA,
+	/* BTF support */
+	FEAT_BTF,
+	/* BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO support */
+	FEAT_BTF_FUNC,
+	/* BTF_KIND_VAR and BTF_KIND_DATASEC support */
+	FEAT_BTF_DATASEC,
+	/* BTF_FUNC_GLOBAL is supported */
+	FEAT_BTF_GLOBAL_FUNC,
+	/* BPF_F_MMAPABLE is supported for arrays */
+	FEAT_ARRAY_MMAP,
+	/* kernel support for expected_attach_type in BPF_PROG_LOAD */
+	FEAT_EXP_ATTACH_TYPE,
+	/* bpf_probe_read_{kernel,user}[_str] helpers */
+	FEAT_PROBE_READ_KERN,
+	/* BPF_PROG_BIND_MAP is supported */
+	FEAT_PROG_BIND_MAP,
+	/* Kernel support for module BTFs */
+	FEAT_MODULE_BTF,
+	/* BTF_KIND_FLOAT support */
+	FEAT_BTF_FLOAT,
+	/* BPF perf link support */
+	FEAT_PERF_LINK,
+	/* BTF_KIND_DECL_TAG support */
+	FEAT_BTF_DECL_TAG,
+	/* BTF_KIND_TYPE_TAG support */
+	FEAT_BTF_TYPE_TAG,
+	/* memcg-based accounting for BPF maps and progs */
+	FEAT_MEMCG_ACCOUNT,
+	/* BPF cookie (bpf_get_attach_cookie() BPF helper) support */
+	FEAT_BPF_COOKIE,
+	__FEAT_CNT,
+};
+
+int probe_memcg_account(void);
+bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id);
+int bump_rlimit_memlock(void);

 int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz);
 int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz);
 int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
 			 const char *str_sec, size_t str_len);
+int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 log_level);

-struct bpf_prog_load_params {
-	enum bpf_prog_type prog_type;
-	enum bpf_attach_type expected_attach_type;
-	const char *name;
-	const struct bpf_insn *insns;
-	size_t insn_cnt;
-	const char *license;
-	__u32 kern_version;
-	__u32 attach_prog_fd;
-	__u32 attach_btf_obj_fd;
-	__u32 attach_btf_id;
-	__u32 prog_ifindex;
-	__u32 prog_btf_fd;
-	__u32 prog_flags;
-
-	__u32 func_info_rec_size;
-	const void *func_info;
-	__u32 func_info_cnt;
-
-	__u32 line_info_rec_size;
-	const void *line_info;
-	__u32 line_info_cnt;
-
-	__u32 log_level;
-	char *log_buf;
-	size_t log_buf_sz;
-	int *fd_array;
-};
-
-int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr);
-
-int bpf_object__section_size(const struct bpf_object *obj, const char *name,
-			     __u32 *size);
-int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,
-				__u32 *off);
 struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf);
 void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type,
 				const char **prefix, int *kind);
@@ -319,6 +376,13 @@ struct btf_ext_info {
 	void *info;
 	__u32 rec_size;
 	__u32 len;
+	/* optional (maintained internally by libbpf) mapping between .BTF.ext
+	 * section and corresponding ELF section. This is used to join
+	 * information like CO-RE relocation records with corresponding BPF
+	 * programs defined in ELF sections
+	 */
+	__u32 *sec_idxs;
+	int sec_cnt;
 };

 #define for_each_btf_ext_sec(seg, sec)					\
@@ -414,6 +478,11 @@ __s32 btf__find_by_name_kind_own(const struct btf *btf, const char *type_name,

 extern enum libbpf_strict_mode libbpf_mode;

+typedef int (*kallsyms_cb_t)(unsigned long long sym_addr, char sym_type,
+			     const char *sym_name, void *ctx);
+
+int libbpf_kallsyms_parse(kallsyms_cb_t cb, void *arg);
+
 /* handle direct returned errors */
 static inline int libbpf_err(int ret)
 {
@@ -472,4 +541,43 @@ static inline bool is_ldimm64_insn(struct bpf_insn *insn)
 	return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
 }

+/* if fd is stdin, stdout, or stderr, dup to a fd greater than 2
+ * Takes ownership of the fd passed in, and closes it if calling
+ * fcntl(fd, F_DUPFD_CLOEXEC, 3).
+ */
+static inline int ensure_good_fd(int fd)
+{
+	int old_fd = fd, saved_errno;
+
+	if (fd < 0)
+		return fd;
+	if (fd < 3) {
+		fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
+		saved_errno = errno;
+		close(old_fd);
+		if (fd < 0) {
+			pr_warn("failed to dup FD %d to FD > 2: %d\n", old_fd, -saved_errno);
+			errno = saved_errno;
+		}
+	}
+	return fd;
+}
+
+/* The following two functions are exposed to bpftool */
+int bpf_core_add_cands(struct bpf_core_cand *local_cand,
+		       size_t local_essent_len,
+		       const struct btf *targ_btf,
+		       const char *targ_btf_name,
+		       int targ_start_id,
+		       struct bpf_core_cand_list *cands);
+void bpf_core_free_cands(struct bpf_core_cand_list *cands);
+
+struct usdt_manager *usdt_manager_new(struct bpf_object *obj);
+void usdt_manager_free(struct usdt_manager *man);
+struct bpf_link * usdt_manager_attach_usdt(struct usdt_manager *man,
+					   const struct bpf_program *prog,
+					   pid_t pid, const char *path,
+					   const char *usdt_provider, const char *usdt_name,
+					   __u64 usdt_cookie);
+
 #endif /* __LIBBPF_LIBBPF_INTERNAL_H */
--- a/src/libbpf_legacy.h
+++ b/src/libbpf_legacy.h
@@ -45,21 +45,67 @@ enum libbpf_strict_mode {
 	 * (positive) error code.
 	 */
 	LIBBPF_STRICT_DIRECT_ERRS = 0x02,
-
 	/*
 	 * Enforce strict BPF program section (SEC()) names.
 	 * E.g., while prefiously SEC("xdp_whatever") or SEC("perf_event_blah") were
 	 * allowed, with LIBBPF_STRICT_SEC_PREFIX this will become
 	 * unrecognized by libbpf and would have to be just SEC("xdp") and
 	 * SEC("xdp") and SEC("perf_event").
+	 *
+	 * Note, in this mode the program pin path will be based on the
+	 * function name instead of section name.
+	 *
+	 * Additionally, routines in the .text section are always considered
+	 * sub-programs. Legacy behavior allows for a single routine in .text
+	 * to be a program.
 	 */
 	LIBBPF_STRICT_SEC_NAME = 0x04,
+	/*
+	 * Disable the global 'bpf_objects_list'. Maintaining this list adds
+	 * a race condition to bpf_object__open() and bpf_object__close().
+	 * Clients can maintain it on their own if it is valuable for them.
+	 */
+	LIBBPF_STRICT_NO_OBJECT_LIST = 0x08,
+	/*
+	 * Automatically bump RLIMIT_MEMLOCK using setrlimit() before the
+	 * first BPF program or map creation operation. This is done only if
+	 * kernel is too old to support memcg-based memory accounting for BPF
+	 * subsystem. By default, RLIMIT_MEMLOCK limit is set to RLIM_INFINITY,
+	 * but it can be overriden with libbpf_set_memlock_rlim_max() API.
+	 * Note that libbpf_set_memlock_rlim_max() needs to be called before
+	 * the very first bpf_prog_load(), bpf_map_create() or bpf_object__load()
+	 * operation.
+	 */
+	LIBBPF_STRICT_AUTO_RLIMIT_MEMLOCK = 0x10,
+	/*
+	 * Error out on any SEC("maps") map definition, which are deprecated
+	 * in favor of BTF-defined map definitions in SEC(".maps").
+	 */
+	LIBBPF_STRICT_MAP_DEFINITIONS = 0x20,

 	__LIBBPF_STRICT_LAST,
 };

 LIBBPF_API int libbpf_set_strict_mode(enum libbpf_strict_mode mode);

+#define DECLARE_LIBBPF_OPTS LIBBPF_OPTS
+
+/* "Discouraged" APIs which don't follow consistent libbpf naming patterns.
+ * They are normally a trivial aliases or wrappers for proper APIs and are
+ * left to minimize unnecessary disruption for users of libbpf. But they
+ * shouldn't be used going forward.
+ */
+
+struct bpf_program;
+struct bpf_map;
+struct btf;
+struct btf_ext;
+
+LIBBPF_API enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog);
+LIBBPF_API enum bpf_attach_type bpf_program__get_expected_attach_type(const struct bpf_program *prog);
+LIBBPF_API const char *bpf_map__get_pin_path(const struct bpf_map *map);
+LIBBPF_API const void *btf__get_raw_data(const struct btf *btf, __u32 *size);
+LIBBPF_API const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext, __u32 *size);

 #ifdef __cplusplus
 } /* extern "C" */
--- a/src/libbpf_probes.c
+++ b/src/libbpf_probes.c
@@ -33,7 +33,7 @@ static int get_vendor_id(int ifindex)

 	snprintf(path, sizeof(path), "/sys/class/net/%s/device/vendor", ifname);

-	fd = open(path, O_RDONLY);
+	fd = open(path, O_RDONLY | O_CLOEXEC);
 	if (fd < 0)
 		return -1;

@@ -48,41 +48,65 @@ static int get_vendor_id(int ifindex)
 	return strtol(buf, NULL, 0);
 }

-static int get_kernel_version(void)
+static int probe_prog_load(enum bpf_prog_type prog_type,
+			   const struct bpf_insn *insns, size_t insns_cnt,
+			   char *log_buf, size_t log_buf_sz,
+			   __u32 ifindex)
 {
-	int version, subversion, patchlevel;
-	struct utsname utsn;
-
-	/* Return 0 on failure, and attempt to probe with empty kversion */
-	if (uname(&utsn))
-		return 0;
-
-	if (sscanf(utsn.release, "%d.%d.%d",
-		   &version, &subversion, &patchlevel) != 3)
-		return 0;
-
-	return (version << 16) + (subversion << 8) + patchlevel;
-}
-
-static void
-probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns,
-	   size_t insns_cnt, char *buf, size_t buf_len, __u32 ifindex)
-{
-	struct bpf_load_program_attr xattr = {};
-	int fd;
+	LIBBPF_OPTS(bpf_prog_load_opts, opts,
+		.log_buf = log_buf,
+		.log_size = log_buf_sz,
+		.log_level = log_buf ? 1 : 0,
+		.prog_ifindex = ifindex,
+	);
+	int fd, err, exp_err = 0;
+	const char *exp_msg = NULL;
+	char buf[4096];

 	switch (prog_type) {
 	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
-		xattr.expected_attach_type = BPF_CGROUP_INET4_CONNECT;
+		opts.expected_attach_type = BPF_CGROUP_INET4_CONNECT;
 		break;
 	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
-		xattr.expected_attach_type = BPF_CGROUP_GETSOCKOPT;
+		opts.expected_attach_type = BPF_CGROUP_GETSOCKOPT;
 		break;
 	case BPF_PROG_TYPE_SK_LOOKUP:
-		xattr.expected_attach_type = BPF_SK_LOOKUP;
+		opts.expected_attach_type = BPF_SK_LOOKUP;
 		break;
 	case BPF_PROG_TYPE_KPROBE:
-		xattr.kern_version = get_kernel_version();
+		opts.kern_version = get_kernel_version();
+		break;
+	case BPF_PROG_TYPE_LIRC_MODE2:
+		opts.expected_attach_type = BPF_LIRC_MODE2;
+		break;
+	case BPF_PROG_TYPE_TRACING:
+	case BPF_PROG_TYPE_LSM:
+		opts.log_buf = buf;
+		opts.log_size = sizeof(buf);
+		opts.log_level = 1;
+		if (prog_type == BPF_PROG_TYPE_TRACING)
+			opts.expected_attach_type = BPF_TRACE_FENTRY;
+		else
+			opts.expected_attach_type = BPF_MODIFY_RETURN;
+		opts.attach_btf_id = 1;
+
+		exp_err = -EINVAL;
+		exp_msg = "attach_btf_id 1 is not a function";
+		break;
+	case BPF_PROG_TYPE_EXT:
+		opts.log_buf = buf;
+		opts.log_size = sizeof(buf);
+		opts.log_level = 1;
+		opts.attach_btf_id = 1;
+
+		exp_err = -EINVAL;
+		exp_msg = "Cannot replace kernel functions";
+		break;
+	case BPF_PROG_TYPE_SYSCALL:
+		opts.prog_flags = BPF_F_SLEEPABLE;
+		break;
+	case BPF_PROG_TYPE_STRUCT_OPS:
+		exp_err = -524; /* -ENOTSUPP */
 		break;
 	case BPF_PROG_TYPE_UNSPEC:
 	case BPF_PROG_TYPE_SOCKET_FILTER:
@@ -103,27 +127,42 @@ probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns,
 	case BPF_PROG_TYPE_RAW_TRACEPOINT:
 	case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
 	case BPF_PROG_TYPE_LWT_SEG6LOCAL:
-	case BPF_PROG_TYPE_LIRC_MODE2:
 	case BPF_PROG_TYPE_SK_REUSEPORT:
 	case BPF_PROG_TYPE_FLOW_DISSECTOR:
 	case BPF_PROG_TYPE_CGROUP_SYSCTL:
-	case BPF_PROG_TYPE_TRACING:
-	case BPF_PROG_TYPE_STRUCT_OPS:
-	case BPF_PROG_TYPE_EXT:
-	case BPF_PROG_TYPE_LSM:
-	default:
 		break;
+	default:
+		return -EOPNOTSUPP;
 	}

-	xattr.prog_type = prog_type;
-	xattr.insns = insns;
-	xattr.insns_cnt = insns_cnt;
-	xattr.license = "GPL";
-	xattr.prog_ifindex = ifindex;
-
-	fd = bpf_load_program_xattr(&xattr, buf, buf_len);
+	fd = bpf_prog_load(prog_type, NULL, "GPL", insns, insns_cnt, &opts);
+	err = -errno;
 	if (fd >= 0)
 		close(fd);
+	if (exp_err) {
+		if (fd >= 0 || err != exp_err)
+			return 0;
+		if (exp_msg && !strstr(buf, exp_msg))
+			return 0;
+		return 1;
+	}
+	return fd >= 0 ? 1 : 0;
+}
+
+int libbpf_probe_bpf_prog_type(enum bpf_prog_type prog_type, const void *opts)
+{
+	struct bpf_insn insns[] = {
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN()
+	};
+	const size_t insn_cnt = ARRAY_SIZE(insns);
+	int ret;
+
+	if (opts)
+		return libbpf_err(-EINVAL);
+
+	ret = probe_prog_load(prog_type, insns, insn_cnt, NULL, 0, 0);
+	return libbpf_err(ret);
 }

 bool bpf_probe_prog_type(enum bpf_prog_type prog_type, __u32 ifindex)
@@ -133,12 +172,16 @@ bool bpf_probe_prog_type(enum bpf_prog_type prog_type, __u32 ifindex)
 		BPF_EXIT_INSN()
 	};

+	/* prefer libbpf_probe_bpf_prog_type() unless offload is requested */
+	if (ifindex == 0)
+		return libbpf_probe_bpf_prog_type(prog_type, NULL) == 1;
+
 	if (ifindex && prog_type == BPF_PROG_TYPE_SCHED_CLS)
 		/* nfp returns -EINVAL on exit(0) with TC offload */
 		insns[0].imm = 2;

 	errno = 0;
-	probe_load(prog_type, insns, ARRAY_SIZE(insns), NULL, 0, ifindex);
+	probe_prog_load(prog_type, insns, ARRAY_SIZE(insns), NULL, 0, ifindex);

 	return errno != EINVAL && errno != EOPNOTSUPP;
 }
@@ -166,7 +209,7 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
 	memcpy(raw_btf + hdr.hdr_len, raw_types, hdr.type_len);
 	memcpy(raw_btf + hdr.hdr_len + hdr.type_len, str_sec, hdr.str_len);

-	btf_fd = bpf_load_btf(raw_btf, btf_len, NULL, 0, false);
+	btf_fd = bpf_btf_load(raw_btf, btf_len, NULL);

 	free(raw_btf);
 	return btf_fd;
@@ -199,17 +242,18 @@ static int load_local_storage_btf(void)
 				     strs, sizeof(strs));
 }

-bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex)
+static int probe_map_create(enum bpf_map_type map_type, __u32 ifindex)
 {
-	int key_size, value_size, max_entries, map_flags;
+	LIBBPF_OPTS(bpf_map_create_opts, opts);
+	int key_size, value_size, max_entries;
 	__u32 btf_key_type_id = 0, btf_value_type_id = 0;
-	struct bpf_create_map_attr attr = {};
-	int fd = -1, btf_fd = -1, fd_inner;
+	int fd = -1, btf_fd = -1, fd_inner = -1, exp_err = 0, err;
+
+	opts.map_ifindex = ifindex;

 	key_size	= sizeof(__u32);
 	value_size	= sizeof(__u32);
 	max_entries	= 1;
-	map_flags	= 0;

 	switch (map_type) {
 	case BPF_MAP_TYPE_STACK_TRACE:
@@ -218,7 +262,7 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex)
 	case BPF_MAP_TYPE_LPM_TRIE:
 		key_size	= sizeof(__u64);
 		value_size	= sizeof(__u64);
-		map_flags	= BPF_F_NO_PREALLOC;
+		opts.map_flags	= BPF_F_NO_PREALLOC;
 		break;
 	case BPF_MAP_TYPE_CGROUP_STORAGE:
 	case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
@@ -237,17 +281,25 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex)
 		btf_value_type_id = 3;
 		value_size = 8;
 		max_entries = 0;
-		map_flags = BPF_F_NO_PREALLOC;
+		opts.map_flags = BPF_F_NO_PREALLOC;
 		btf_fd = load_local_storage_btf();
 		if (btf_fd < 0)
-			return false;
+			return btf_fd;
 		break;
 	case BPF_MAP_TYPE_RINGBUF:
 		key_size = 0;
 		value_size = 0;
 		max_entries = 4096;
 		break;
-	case BPF_MAP_TYPE_UNSPEC:
+	case BPF_MAP_TYPE_STRUCT_OPS:
+		/* we'll get -ENOTSUPP for invalid BTF type ID for struct_ops */
+		opts.btf_vmlinux_value_type_id = 1;
+		exp_err = -524; /* -ENOTSUPP */
+		break;
+	case BPF_MAP_TYPE_BLOOM_FILTER:
+		key_size = 0;
+		max_entries = 1;
+		break;
 	case BPF_MAP_TYPE_HASH:
 	case BPF_MAP_TYPE_ARRAY:
 	case BPF_MAP_TYPE_PROG_ARRAY:
@@ -266,9 +318,10 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex)
 	case BPF_MAP_TYPE_XSKMAP:
 	case BPF_MAP_TYPE_SOCKHASH:
 	case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
-	case BPF_MAP_TYPE_STRUCT_OPS:
-	default:
 		break;
+	case BPF_MAP_TYPE_UNSPEC:
+	default:
+		return -EOPNOTSUPP;
 	}

 	if (map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
@@ -277,37 +330,102 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex)
 		 * map-in-map for offload
 		 */
 		if (ifindex)
-			return false;
+			goto cleanup;

-		fd_inner = bpf_create_map(BPF_MAP_TYPE_HASH,
-					  sizeof(__u32), sizeof(__u32), 1, 0);
+		fd_inner = bpf_map_create(BPF_MAP_TYPE_HASH, NULL,
+					  sizeof(__u32), sizeof(__u32), 1, NULL);
 		if (fd_inner < 0)
-			return false;
-		fd = bpf_create_map_in_map(map_type, NULL, sizeof(__u32),
-					   fd_inner, 1, 0);
-		close(fd_inner);
-	} else {
-		/* Note: No other restriction on map type probes for offload */
-		attr.map_type = map_type;
-		attr.key_size = key_size;
-		attr.value_size = value_size;
-		attr.max_entries = max_entries;
-		attr.map_flags = map_flags;
-		attr.map_ifindex = ifindex;
-		if (btf_fd >= 0) {
-			attr.btf_fd = btf_fd;
-			attr.btf_key_type_id = btf_key_type_id;
-			attr.btf_value_type_id = btf_value_type_id;
-		}
+			goto cleanup;

-		fd = bpf_create_map_xattr(&attr);
+		opts.inner_map_fd = fd_inner;
 	}
+
+	if (btf_fd >= 0) {
+		opts.btf_fd = btf_fd;
+		opts.btf_key_type_id = btf_key_type_id;
+		opts.btf_value_type_id = btf_value_type_id;
+	}
+
+	fd = bpf_map_create(map_type, NULL, key_size, value_size, max_entries, &opts);
+	err = -errno;
+
+cleanup:
 	if (fd >= 0)
 		close(fd);
+	if (fd_inner >= 0)
+		close(fd_inner);
 	if (btf_fd >= 0)
 		close(btf_fd);

-	return fd >= 0;
+	if (exp_err)
+		return fd < 0 && err == exp_err ? 1 : 0;
+	else
+		return fd >= 0 ? 1 : 0;
+}
+
+int libbpf_probe_bpf_map_type(enum bpf_map_type map_type, const void *opts)
+{
+	int ret;
+
+	if (opts)
+		return libbpf_err(-EINVAL);
+
+	ret = probe_map_create(map_type, 0);
+	return libbpf_err(ret);
+}
+
+bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex)
+{
+	return probe_map_create(map_type, ifindex) == 1;
+}
+
+int libbpf_probe_bpf_helper(enum bpf_prog_type prog_type, enum bpf_func_id helper_id,
+			    const void *opts)
+{
+	struct bpf_insn insns[] = {
+		BPF_EMIT_CALL((__u32)helper_id),
+		BPF_EXIT_INSN(),
+	};
+	const size_t insn_cnt = ARRAY_SIZE(insns);
+	char buf[4096];
+	int ret;
+
+	if (opts)
+		return libbpf_err(-EINVAL);
+
+	/* we can't successfully load all prog types to check for BPF helper
+	 * support, so bail out with -EOPNOTSUPP error
+	 */
+	switch (prog_type) {
+	case BPF_PROG_TYPE_TRACING:
+	case BPF_PROG_TYPE_EXT:
+	case BPF_PROG_TYPE_LSM:
+	case BPF_PROG_TYPE_STRUCT_OPS:
+		return -EOPNOTSUPP;
+	default:
+		break;
+	}
+
+	buf[0] = '\0';
+	ret = probe_prog_load(prog_type, insns, insn_cnt, buf, sizeof(buf), 0);
+	if (ret < 0)
+		return libbpf_err(ret);
+
+	/* If BPF verifier doesn't recognize BPF helper ID (enum bpf_func_id)
+	 * at all, it will emit something like "invalid func unknown#181".
+	 * If BPF verifier recognizes BPF helper but it's not supported for
+	 * given BPF program type, it will emit "unknown func bpf_sys_bpf#166".
+	 * In both cases, provided combination of BPF program type and BPF
+	 * helper is not supported by the kernel.
+	 * In all other cases, probe_prog_load() above will either succeed (e.g.,
+	 * because BPF helper happens to accept no input arguments or it
+	 * accepts one input argument and initial PTR_TO_CTX is fine for
+	 * that), or we'll get some more specific BPF verifier error about
+	 * some unsatisfied conditions.
+	 */
+	if (ret == 0 && (strstr(buf, "invalid func ") || strstr(buf, "unknown func ")))
+		return 0;
+	return 1; /* assume supported */
 }

 bool bpf_probe_helper(enum bpf_func_id id, enum bpf_prog_type prog_type,
@@ -320,8 +438,7 @@ bool bpf_probe_helper(enum bpf_func_id id, enum bpf_prog_type prog_type,
 	char buf[4096] = {};
 	bool res;

-	probe_load(prog_type, insns, ARRAY_SIZE(insns), buf, sizeof(buf),
-		   ifindex);
+	probe_prog_load(prog_type, insns, ARRAY_SIZE(insns), buf, sizeof(buf), ifindex);
 	res = !grep(buf, "invalid func ") && !grep(buf, "unknown func ");

 	if (ifindex) {
@@ -353,8 +470,8 @@ bool bpf_probe_large_insn_limit(__u32 ifindex)
 	insns[BPF_MAXINSNS] = BPF_EXIT_INSN();

 	errno = 0;
-	probe_load(BPF_PROG_TYPE_SCHED_CLS, insns, ARRAY_SIZE(insns), NULL, 0,
-		   ifindex);
+	probe_prog_load(BPF_PROG_TYPE_SCHED_CLS, insns, ARRAY_SIZE(insns), NULL, 0,
+			ifindex);

 	return errno != E2BIG && errno != EINVAL;
 }
--- a/src/libbpf_version.h
+++ b/src/libbpf_version.h
@@ -4,6 +4,6 @@
 #define __LIBBPF_VERSION_H

 #define LIBBPF_MAJOR_VERSION 0
-#define LIBBPF_MINOR_VERSION 6
+#define LIBBPF_MINOR_VERSION 8

 #endif /* __LIBBPF_VERSION_H */
--- a/src/linker.c
+++ b/src/linker.c
@@ -15,7 +15,6 @@
 #include <linux/btf.h>
 #include <elf.h>
 #include <libelf.h>
-#include <gelf.h>
 #include <fcntl.h>
 #include "libbpf.h"
 #include "btf.h"
@@ -211,6 +210,7 @@ void bpf_linker__free(struct bpf_linker *linker)
 	}
 	free(linker->secs);

+	free(linker->glob_syms);
 	free(linker);
 }

@@ -302,7 +302,7 @@ static int init_output_elf(struct bpf_linker *linker, const char *file)
 	if (!linker->filename)
 		return -ENOMEM;

-	linker->fd = open(file, O_WRONLY | O_CREAT | O_TRUNC, 0644);
+	linker->fd = open(file, O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0644);
 	if (linker->fd < 0) {
 		err = -errno;
 		pr_warn("failed to create '%s': %d\n", file, err);
@@ -324,12 +324,12 @@ static int init_output_elf(struct bpf_linker *linker, const char *file)

 	linker->elf_hdr->e_machine = EM_BPF;
 	linker->elf_hdr->e_type = ET_REL;
-#if __BYTE_ORDER == __LITTLE_ENDIAN
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
 	linker->elf_hdr->e_ident[EI_DATA] = ELFDATA2LSB;
-#elif __BYTE_ORDER == __BIG_ENDIAN
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
 	linker->elf_hdr->e_ident[EI_DATA] = ELFDATA2MSB;
 #else
-#error "Unknown __BYTE_ORDER"
+#error "Unknown __BYTE_ORDER__"
 #endif

 	/* STRTAB */
@@ -539,12 +539,12 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename,
 				const struct bpf_linker_file_opts *opts,
 				struct src_obj *obj)
 {
-#if __BYTE_ORDER == __LITTLE_ENDIAN
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
 	const int host_endianness = ELFDATA2LSB;
-#elif __BYTE_ORDER == __BIG_ENDIAN
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
 	const int host_endianness = ELFDATA2MSB;
 #else
-#error "Unknown __BYTE_ORDER"
+#error "Unknown __BYTE_ORDER__"
 #endif
 	int err = 0;
 	Elf_Scn *scn;
@@ -557,7 +557,7 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename,

 	obj->filename = filename;

-	obj->fd = open(filename, O_RDONLY);
+	obj->fd = open(filename, O_RDONLY | O_CLOEXEC);
 	if (obj->fd < 0) {
 		err = -errno;
 		pr_warn("failed to open file '%s': %d\n", filename, err);
@@ -921,7 +921,7 @@ static int check_btf_type_id(__u32 *type_id, void *ctx)
 {
 	struct btf *btf = ctx;

-	if (*type_id > btf__get_nr_types(btf))
+	if (*type_id >= btf__type_cnt(btf))
 		return -EINVAL;

 	return 0;
@@ -948,8 +948,8 @@ static int linker_sanity_check_btf(struct src_obj *obj)
 	if (!obj->btf)
 		return 0;

-	n = btf__get_nr_types(obj->btf);
-	for (i = 1; i <= n; i++) {
+	n = btf__type_cnt(obj->btf);
+	for (i = 1; i < n; i++) {
 		t = btf_type_by_id(obj->btf, i);

 		err = err ?: btf_type_visit_type_ids(t, check_btf_type_id, obj->btf);
@@ -1659,8 +1659,8 @@ static int find_glob_sym_btf(struct src_obj *obj, Elf64_Sym *sym, const char *sy
 		return -EINVAL;
 	}

-	n = btf__get_nr_types(obj->btf);
-	for (i = 1; i <= n; i++) {
+	n = btf__type_cnt(obj->btf);
+	for (i = 1; i < n; i++) {
 		t = btf__type_by_id(obj->btf, i);

 		/* some global and extern FUNCs and VARs might not be associated with any
@@ -2000,7 +2000,7 @@ add_sym:
 static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *obj)
 {
 	struct src_sec *src_symtab = &obj->secs[obj->symtab_sec_idx];
-	struct dst_sec *dst_symtab = &linker->secs[linker->symtab_sec_idx];
+	struct dst_sec *dst_symtab;
 	int i, err;

 	for (i = 1; i < obj->sec_cnt; i++) {
@@ -2033,6 +2033,9 @@ static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *ob
 			return -1;
 		}

+		/* add_dst_sec() above could have invalidated linker->secs */
+		dst_symtab = &linker->secs[linker->symtab_sec_idx];
+
 		/* shdr->sh_link points to SYMTAB */
 		dst_sec->shdr->sh_link = linker->symtab_sec_idx;

@@ -2131,8 +2134,8 @@ static int linker_fixup_btf(struct src_obj *obj)
 	if (!obj->btf)
 		return 0;

-	n = btf__get_nr_types(obj->btf);
-	for (i = 1; i <= n; i++) {
+	n = btf__type_cnt(obj->btf);
+	for (i = 1; i < n; i++) {
 		struct btf_var_secinfo *vi;
 		struct btf_type *t;

@@ -2235,14 +2238,14 @@ static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj)
 	if (!obj->btf)
 		return 0;

-	start_id = btf__get_nr_types(linker->btf) + 1;
-	n = btf__get_nr_types(obj->btf);
+	start_id = btf__type_cnt(linker->btf);
+	n = btf__type_cnt(obj->btf);

 	obj->btf_type_map = calloc(n + 1, sizeof(int));
 	if (!obj->btf_type_map)
 		return -ENOMEM;

-	for (i = 1; i <= n; i++) {
+	for (i = 1; i < n; i++) {
 		struct glob_sym *glob_sym = NULL;

 		t = btf__type_by_id(obj->btf, i);
@@ -2297,8 +2300,8 @@ static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj)
 	}

 	/* remap all the types except DATASECs */
-	n = btf__get_nr_types(linker->btf);
-	for (i = start_id; i <= n; i++) {
+	n = btf__type_cnt(linker->btf);
+	for (i = start_id; i < n; i++) {
 		struct btf_type *dst_t = btf_type_by_id(linker->btf, i);

 		if (btf_type_visit_type_ids(dst_t, remap_type_id, obj->btf_type_map))
@@ -2651,13 +2654,14 @@ static int emit_elf_data_sec(struct bpf_linker *linker, const char *sec_name,

 static int finalize_btf(struct bpf_linker *linker)
 {
+	LIBBPF_OPTS(btf_dedup_opts, opts);
 	struct btf *btf = linker->btf;
 	const void *raw_data;
 	int i, j, id, err;
 	__u32 raw_sz;

 	/* bail out if no BTF data was produced */
-	if (btf__get_nr_types(linker->btf) == 0)
+	if (btf__type_cnt(linker->btf) == 1)
 		return 0;

 	for (i = 1; i < linker->sec_cnt; i++) {
@@ -2687,14 +2691,15 @@ static int finalize_btf(struct bpf_linker *linker)
 		return err;
 	}

-	err = btf__dedup(linker->btf, linker->btf_ext, NULL);
+	opts.btf_ext = linker->btf_ext;
+	err = btf__dedup(linker->btf, &opts);
 	if (err) {
 		pr_warn("BTF dedup failed: %d\n", err);
 		return err;
 	}

 	/* Emit .BTF section */
-	raw_data = btf__get_raw_data(linker->btf, &raw_sz);
+	raw_data = btf__raw_data(linker->btf, &raw_sz);
 	if (!raw_data)
 		return -ENOMEM;

--- a/src/netlink.c
+++ b/src/netlink.c
@@ -87,29 +87,75 @@ enum {
 	NL_DONE,
 };

+static int netlink_recvmsg(int sock, struct msghdr *mhdr, int flags)
+{
+	int len;
+
+	do {
+		len = recvmsg(sock, mhdr, flags);
+	} while (len < 0 && (errno == EINTR || errno == EAGAIN));
+
+	if (len < 0)
+		return -errno;
+	return len;
+}
+
+static int alloc_iov(struct iovec *iov, int len)
+{
+	void *nbuf;
+
+	nbuf = realloc(iov->iov_base, len);
+	if (!nbuf)
+		return -ENOMEM;
+
+	iov->iov_base = nbuf;
+	iov->iov_len = len;
+	return 0;
+}
+
 static int libbpf_netlink_recv(int sock, __u32 nl_pid, int seq,
 			       __dump_nlmsg_t _fn, libbpf_dump_nlmsg_t fn,
 			       void *cookie)
 {
+	struct iovec iov = {};
+	struct msghdr mhdr = {
+		.msg_iov = &iov,
+		.msg_iovlen = 1,
+	};
 	bool multipart = true;
 	struct nlmsgerr *err;
 	struct nlmsghdr *nh;
-	char buf[4096];
 	int len, ret;

+	ret = alloc_iov(&iov, 4096);
+	if (ret)
+		goto done;
+
 	while (multipart) {
 start:
 		multipart = false;
-		len = recv(sock, buf, sizeof(buf), 0);
+		len = netlink_recvmsg(sock, &mhdr, MSG_PEEK | MSG_TRUNC);
 		if (len < 0) {
-			ret = -errno;
+			ret = len;
+			goto done;
+		}
+
+		if (len > iov.iov_len) {
+			ret = alloc_iov(&iov, len);
+			if (ret)
+				goto done;
+		}
+
+		len = netlink_recvmsg(sock, &mhdr, 0);
+		if (len < 0) {
+			ret = len;
 			goto done;
 		}

 		if (len == 0)
 			break;

-		for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
+		for (nh = (struct nlmsghdr *)iov.iov_base; NLMSG_OK(nh, len);
 		     nh = NLMSG_NEXT(nh, len)) {
 			if (nh->nlmsg_pid != nl_pid) {
 				ret = -LIBBPF_ERRNO__WRNGPID;
@@ -130,7 +176,8 @@ start:
 				libbpf_nla_dump_errormsg(nh);
 				goto done;
 			case NLMSG_DONE:
-				return 0;
+				ret = 0;
+				goto done;
 			default:
 				break;
 			}
@@ -142,15 +189,17 @@ start:
 				case NL_NEXT:
 					goto start;
 				case NL_DONE:
-					return 0;
+					ret = 0;
+					goto done;
 				default:
-					return ret;
+					goto done;
 				}
 			}
 		}
 	}
 	ret = 0;
 done:
+	free(iov.iov_base);
 	return ret;
 }

@@ -217,6 +266,28 @@ static int __bpf_set_link_xdp_fd_replace(int ifindex, int fd, int old_fd,
 	return libbpf_netlink_send_recv(&req, NULL, NULL, NULL);
 }

+int bpf_xdp_attach(int ifindex, int prog_fd, __u32 flags, const struct bpf_xdp_attach_opts *opts)
+{
+	int old_prog_fd, err;
+
+	if (!OPTS_VALID(opts, bpf_xdp_attach_opts))
+		return libbpf_err(-EINVAL);
+
+	old_prog_fd = OPTS_GET(opts, old_prog_fd, 0);
+	if (old_prog_fd)
+		flags |= XDP_FLAGS_REPLACE;
+	else
+		old_prog_fd = -1;
+
+	err = __bpf_set_link_xdp_fd_replace(ifindex, prog_fd, old_prog_fd, flags);
+	return libbpf_err(err);
+}
+
+int bpf_xdp_detach(int ifindex, __u32 flags, const struct bpf_xdp_attach_opts *opts)
+{
+	return bpf_xdp_attach(ifindex, -1, flags, opts);
+}
+
 int bpf_set_link_xdp_fd_opts(int ifindex, int fd, __u32 flags,
 			     const struct bpf_xdp_set_link_opts *opts)
 {
@@ -303,69 +374,98 @@ static int get_xdp_info(void *cookie, void *msg, struct nlattr **tb)
 	return 0;
 }

-int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
-			  size_t info_size, __u32 flags)
+int bpf_xdp_query(int ifindex, int xdp_flags, struct bpf_xdp_query_opts *opts)
 {
-	struct xdp_id_md xdp_id = {};
-	__u32 mask;
-	int ret;
 	struct libbpf_nla_req req = {
 		.nh.nlmsg_len      = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
 		.nh.nlmsg_type     = RTM_GETLINK,
 		.nh.nlmsg_flags    = NLM_F_DUMP | NLM_F_REQUEST,
 		.ifinfo.ifi_family = AF_PACKET,
 	};
+	struct xdp_id_md xdp_id = {};
+	int err;

-	if (flags & ~XDP_FLAGS_MASK || !info_size)
+	if (!OPTS_VALID(opts, bpf_xdp_query_opts))
+		return libbpf_err(-EINVAL);
+
+	if (xdp_flags & ~XDP_FLAGS_MASK)
 		return libbpf_err(-EINVAL);

 	/* Check whether the single {HW,DRV,SKB} mode is set */
-	flags &= (XDP_FLAGS_SKB_MODE | XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE);
-	mask = flags - 1;
-	if (flags && flags & mask)
+	xdp_flags &= XDP_FLAGS_SKB_MODE | XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE;
+	if (xdp_flags & (xdp_flags - 1))
 		return libbpf_err(-EINVAL);

 	xdp_id.ifindex = ifindex;
-	xdp_id.flags = flags;
+	xdp_id.flags = xdp_flags;

-	ret = libbpf_netlink_send_recv(&req, __dump_link_nlmsg,
+	err = libbpf_netlink_send_recv(&req, __dump_link_nlmsg,
 				       get_xdp_info, &xdp_id);
-	if (!ret) {
-		size_t sz = min(info_size, sizeof(xdp_id.info));
+	if (err)
+		return libbpf_err(err);

-		memcpy(info, &xdp_id.info, sz);
-		memset((void *) info + sz, 0, info_size - sz);
-	}
-
-	return libbpf_err(ret);
-}
-
-static __u32 get_xdp_id(struct xdp_link_info *info, __u32 flags)
-{
-	flags &= XDP_FLAGS_MODES;
-
-	if (info->attach_mode != XDP_ATTACHED_MULTI && !flags)
-		return info->prog_id;
-	if (flags & XDP_FLAGS_DRV_MODE)
-		return info->drv_prog_id;
-	if (flags & XDP_FLAGS_HW_MODE)
-		return info->hw_prog_id;
-	if (flags & XDP_FLAGS_SKB_MODE)
-		return info->skb_prog_id;
+	OPTS_SET(opts, prog_id, xdp_id.info.prog_id);
+	OPTS_SET(opts, drv_prog_id, xdp_id.info.drv_prog_id);
+	OPTS_SET(opts, hw_prog_id, xdp_id.info.hw_prog_id);
+	OPTS_SET(opts, skb_prog_id, xdp_id.info.skb_prog_id);
+	OPTS_SET(opts, attach_mode, xdp_id.info.attach_mode);

 	return 0;
 }

-int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags)
+int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
+			  size_t info_size, __u32 flags)
 {
-	struct xdp_link_info info;
+	LIBBPF_OPTS(bpf_xdp_query_opts, opts);
+	size_t sz;
+	int err;
+
+	if (!info_size)
+		return libbpf_err(-EINVAL);
+
+	err = bpf_xdp_query(ifindex, flags, &opts);
+	if (err)
+		return libbpf_err(err);
+
+	/* struct xdp_link_info field layout matches struct bpf_xdp_query_opts
+	 * layout after sz field
+	 */
+	sz = min(info_size, offsetofend(struct xdp_link_info, attach_mode));
+	memcpy(info, &opts.prog_id, sz);
+	memset((void *)info + sz, 0, info_size - sz);
+
+	return 0;
+}
+
+int bpf_xdp_query_id(int ifindex, int flags, __u32 *prog_id)
+{
+	LIBBPF_OPTS(bpf_xdp_query_opts, opts);
 	int ret;

-	ret = bpf_get_link_xdp_info(ifindex, &info, sizeof(info), flags);
-	if (!ret)
-		*prog_id = get_xdp_id(&info, flags);
+	ret = bpf_xdp_query(ifindex, flags, &opts);
+	if (ret)
+		return libbpf_err(ret);

-	return libbpf_err(ret);
+	flags &= XDP_FLAGS_MODES;
+
+	if (opts.attach_mode != XDP_ATTACHED_MULTI && !flags)
+		*prog_id = opts.prog_id;
+	else if (flags & XDP_FLAGS_DRV_MODE)
+		*prog_id = opts.drv_prog_id;
+	else if (flags & XDP_FLAGS_HW_MODE)
+		*prog_id = opts.hw_prog_id;
+	else if (flags & XDP_FLAGS_SKB_MODE)
+		*prog_id = opts.skb_prog_id;
+	else
+		*prog_id = 0;
+
+	return 0;
+}
+
+
+int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags)
+{
+	return bpf_xdp_query_id(ifindex, flags, prog_id);
 }

 typedef int (*qdisc_config_t)(struct libbpf_nla_req *req);
--- a/src/relo_core.c
+++ b/src/relo_core.c
@@ -1,6 +1,60 @@
 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
 /* Copyright (c) 2019 Facebook */

+#ifdef __KERNEL__
+#include <linux/bpf.h>
+#include <linux/btf.h>
+#include <linux/string.h>
+#include <linux/bpf_verifier.h>
+#include "relo_core.h"
+
+static const char *btf_kind_str(const struct btf_type *t)
+{
+	return btf_type_str(t);
+}
+
+static bool is_ldimm64_insn(struct bpf_insn *insn)
+{
+	return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
+}
+
+static const struct btf_type *
+skip_mods_and_typedefs(const struct btf *btf, u32 id, u32 *res_id)
+{
+	return btf_type_skip_modifiers(btf, id, res_id);
+}
+
+static const char *btf__name_by_offset(const struct btf *btf, u32 offset)
+{
+	return btf_name_by_offset(btf, offset);
+}
+
+static s64 btf__resolve_size(const struct btf *btf, u32 type_id)
+{
+	const struct btf_type *t;
+	int size;
+
+	t = btf_type_by_id(btf, type_id);
+	t = btf_resolve_size(btf, t, &size);
+	if (IS_ERR(t))
+		return PTR_ERR(t);
+	return size;
+}
+
+enum libbpf_print_level {
+	LIBBPF_WARN,
+	LIBBPF_INFO,
+	LIBBPF_DEBUG,
+};
+
+#undef pr_warn
+#undef pr_info
+#undef pr_debug
+#define pr_warn(fmt, log, ...)	bpf_log((void *)log, fmt, "", ##__VA_ARGS__)
+#define pr_info(fmt, log, ...)	bpf_log((void *)log, fmt, "", ##__VA_ARGS__)
+#define pr_debug(fmt, log, ...)	bpf_log((void *)log, fmt, "", ##__VA_ARGS__)
+#define libbpf_print(level, fmt, ...)	bpf_log((void *)prog_name, fmt, ##__VA_ARGS__)
+#else
 #include <stdio.h>
 #include <string.h>
 #include <errno.h>
@@ -12,33 +66,7 @@
 #include "btf.h"
 #include "str_error.h"
 #include "libbpf_internal.h"
-
-#define BPF_CORE_SPEC_MAX_LEN 64
-
-/* represents BPF CO-RE field or array element accessor */
-struct bpf_core_accessor {
-	__u32 type_id;		/* struct/union type or array element type */
-	__u32 idx;		/* field index or array index */
-	const char *name;	/* field name or NULL for array accessor */
-};
-
-struct bpf_core_spec {
-	const struct btf *btf;
-	/* high-level spec: named fields and array indices only */
-	struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN];
-	/* original unresolved (no skip_mods_or_typedefs) root type ID */
-	__u32 root_type_id;
-	/* CO-RE relocation kind */
-	enum bpf_core_relo_kind relo_kind;
-	/* high-level spec length */
-	int len;
-	/* raw, low-level spec: 1-to-1 with accessor spec string */
-	int raw_spec[BPF_CORE_SPEC_MAX_LEN];
-	/* raw spec length */
-	int raw_len;
-	/* field bit offset represented by spec */
-	__u32 bit_offset;
-};
+#endif

 static bool is_flex_arr(const struct btf *btf,
 			const struct bpf_core_accessor *acc,
@@ -51,25 +79,25 @@ static bool is_flex_arr(const struct btf *btf,
 		return false;

 	/* has to be the last member of enclosing struct */
-	t = btf__type_by_id(btf, acc->type_id);
+	t = btf_type_by_id(btf, acc->type_id);
 	return acc->idx == btf_vlen(t) - 1;
 }

 static const char *core_relo_kind_str(enum bpf_core_relo_kind kind)
 {
 	switch (kind) {
-	case BPF_FIELD_BYTE_OFFSET: return "byte_off";
-	case BPF_FIELD_BYTE_SIZE: return "byte_sz";
-	case BPF_FIELD_EXISTS: return "field_exists";
-	case BPF_FIELD_SIGNED: return "signed";
-	case BPF_FIELD_LSHIFT_U64: return "lshift_u64";
-	case BPF_FIELD_RSHIFT_U64: return "rshift_u64";
-	case BPF_TYPE_ID_LOCAL: return "local_type_id";
-	case BPF_TYPE_ID_TARGET: return "target_type_id";
-	case BPF_TYPE_EXISTS: return "type_exists";
-	case BPF_TYPE_SIZE: return "type_size";
-	case BPF_ENUMVAL_EXISTS: return "enumval_exists";
-	case BPF_ENUMVAL_VALUE: return "enumval_value";
+	case BPF_CORE_FIELD_BYTE_OFFSET: return "byte_off";
+	case BPF_CORE_FIELD_BYTE_SIZE: return "byte_sz";
+	case BPF_CORE_FIELD_EXISTS: return "field_exists";
+	case BPF_CORE_FIELD_SIGNED: return "signed";
+	case BPF_CORE_FIELD_LSHIFT_U64: return "lshift_u64";
+	case BPF_CORE_FIELD_RSHIFT_U64: return "rshift_u64";
+	case BPF_CORE_TYPE_ID_LOCAL: return "local_type_id";
+	case BPF_CORE_TYPE_ID_TARGET: return "target_type_id";
+	case BPF_CORE_TYPE_EXISTS: return "type_exists";
+	case BPF_CORE_TYPE_SIZE: return "type_size";
+	case BPF_CORE_ENUMVAL_EXISTS: return "enumval_exists";
+	case BPF_CORE_ENUMVAL_VALUE: return "enumval_value";
 	default: return "unknown";
 	}
 }
@@ -77,12 +105,12 @@ static const char *core_relo_kind_str(enum bpf_core_relo_kind kind)
 static bool core_relo_is_field_based(enum bpf_core_relo_kind kind)
 {
 	switch (kind) {
-	case BPF_FIELD_BYTE_OFFSET:
-	case BPF_FIELD_BYTE_SIZE:
-	case BPF_FIELD_EXISTS:
-	case BPF_FIELD_SIGNED:
-	case BPF_FIELD_LSHIFT_U64:
-	case BPF_FIELD_RSHIFT_U64:
+	case BPF_CORE_FIELD_BYTE_OFFSET:
+	case BPF_CORE_FIELD_BYTE_SIZE:
+	case BPF_CORE_FIELD_EXISTS:
+	case BPF_CORE_FIELD_SIGNED:
+	case BPF_CORE_FIELD_LSHIFT_U64:
+	case BPF_CORE_FIELD_RSHIFT_U64:
 		return true;
 	default:
 		return false;
@@ -92,10 +120,10 @@ static bool core_relo_is_field_based(enum bpf_core_relo_kind kind)
 static bool core_relo_is_type_based(enum bpf_core_relo_kind kind)
 {
 	switch (kind) {
-	case BPF_TYPE_ID_LOCAL:
-	case BPF_TYPE_ID_TARGET:
-	case BPF_TYPE_EXISTS:
-	case BPF_TYPE_SIZE:
+	case BPF_CORE_TYPE_ID_LOCAL:
+	case BPF_CORE_TYPE_ID_TARGET:
+	case BPF_CORE_TYPE_EXISTS:
+	case BPF_CORE_TYPE_SIZE:
 		return true;
 	default:
 		return false;
@@ -105,8 +133,8 @@ static bool core_relo_is_type_based(enum bpf_core_relo_kind kind)
 static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind)
 {
 	switch (kind) {
-	case BPF_ENUMVAL_EXISTS:
-	case BPF_ENUMVAL_VALUE:
+	case BPF_CORE_ENUMVAL_EXISTS:
+	case BPF_CORE_ENUMVAL_VALUE:
 		return true;
 	default:
 		return false;
@@ -150,29 +178,28 @@ static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind)
 * Enum value-based relocations (ENUMVAL_EXISTS/ENUMVAL_VALUE) use access
 * string to specify enumerator's value index that need to be relocated.
 */
-static int bpf_core_parse_spec(const struct btf *btf,
-			       __u32 type_id,
-			       const char *spec_str,
-			       enum bpf_core_relo_kind relo_kind,
-			       struct bpf_core_spec *spec)
+int bpf_core_parse_spec(const char *prog_name, const struct btf *btf,
+			const struct bpf_core_relo *relo,
+			struct bpf_core_spec *spec)
 {
 	int access_idx, parsed_len, i;
 	struct bpf_core_accessor *acc;
 	const struct btf_type *t;
-	const char *name;
+	const char *name, *spec_str;
 	__u32 id;
 	__s64 sz;

+	spec_str = btf__name_by_offset(btf, relo->access_str_off);
 	if (str_is_empty(spec_str) || *spec_str == ':')
 		return -EINVAL;

 	memset(spec, 0, sizeof(*spec));
 	spec->btf = btf;
-	spec->root_type_id = type_id;
-	spec->relo_kind = relo_kind;
+	spec->root_type_id = relo->type_id;
+	spec->relo_kind = relo->kind;

 	/* type-based relocations don't have a field access string */
-	if (core_relo_is_type_based(relo_kind)) {
+	if (core_relo_is_type_based(relo->kind)) {
 		if (strcmp(spec_str, "0"))
 			return -EINVAL;
 		return 0;
@@ -193,7 +220,7 @@ static int bpf_core_parse_spec(const struct btf *btf,
 	if (spec->raw_len == 0)
 		return -EINVAL;

-	t = skip_mods_and_typedefs(btf, type_id, &id);
+	t = skip_mods_and_typedefs(btf, relo->type_id, &id);
 	if (!t)
 		return -EINVAL;

@@ -203,7 +230,7 @@ static int bpf_core_parse_spec(const struct btf *btf,
 	acc->idx = access_idx;
 	spec->len++;

-	if (core_relo_is_enumval_based(relo_kind)) {
+	if (core_relo_is_enumval_based(relo->kind)) {
 		if (!btf_is_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t))
 			return -EINVAL;

@@ -212,7 +239,7 @@ static int bpf_core_parse_spec(const struct btf *btf,
 		return 0;
 	}

-	if (!core_relo_is_field_based(relo_kind))
+	if (!core_relo_is_field_based(relo->kind))
 		return -EINVAL;

 	sz = btf__resolve_size(btf, id);
@@ -272,8 +299,8 @@ static int bpf_core_parse_spec(const struct btf *btf,
 				return sz;
 			spec->bit_offset += access_idx * sz * 8;
 		} else {
-			pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n",
-				type_id, spec_str, i, id, btf_kind_str(t));
+			pr_warn("prog '%s': relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n",
+				prog_name, relo->type_id, spec_str, i, id, btf_kind_str(t));
 			return -EINVAL;
 		}
 	}
@@ -346,8 +373,6 @@ recur:
 		targ_id = btf_array(targ_type)->type;
 		goto recur;
 	default:
-		pr_warn("unexpected kind %d relocated, local [%d], target [%d]\n",
-			btf_kind(local_type), local_id, targ_id);
 		return 0;
 	}
 }
@@ -388,7 +413,7 @@ static int bpf_core_match_member(const struct btf *local_btf,
 		return 0;

 	local_id = local_acc->type_id;
-	local_type = btf__type_by_id(local_btf, local_id);
+	local_type = btf_type_by_id(local_btf, local_id);
 	local_member = btf_members(local_type) + local_acc->idx;
 	local_name = btf__name_by_offset(local_btf, local_member->name_off);

@@ -571,7 +596,7 @@ static int bpf_core_calc_field_relo(const char *prog_name,

 	*field_sz = 0;

-	if (relo->kind == BPF_FIELD_EXISTS) {
+	if (relo->kind == BPF_CORE_FIELD_EXISTS) {
 		*val = spec ? 1 : 0;
 		return 0;
 	}
@@ -580,11 +605,11 @@ static int bpf_core_calc_field_relo(const char *prog_name,
 		return -EUCLEAN; /* request instruction poisoning */

 	acc = &spec->spec[spec->len - 1];
-	t = btf__type_by_id(spec->btf, acc->type_id);
+	t = btf_type_by_id(spec->btf, acc->type_id);

 	/* a[n] accessor needs special handling */
 	if (!acc->name) {
-		if (relo->kind == BPF_FIELD_BYTE_OFFSET) {
+		if (relo->kind == BPF_CORE_FIELD_BYTE_OFFSET) {
 			*val = spec->bit_offset / 8;
 			/* remember field size for load/store mem size */
 			sz = btf__resolve_size(spec->btf, acc->type_id);
@@ -592,7 +617,7 @@ static int bpf_core_calc_field_relo(const char *prog_name,
 				return -EINVAL;
 			*field_sz = sz;
 			*type_id = acc->type_id;
-		} else if (relo->kind == BPF_FIELD_BYTE_SIZE) {
+		} else if (relo->kind == BPF_CORE_FIELD_BYTE_SIZE) {
 			sz = btf__resolve_size(spec->btf, acc->type_id);
 			if (sz < 0)
 				return -EINVAL;
@@ -644,36 +669,36 @@ static int bpf_core_calc_field_relo(const char *prog_name,
 		*validate = !bitfield;

 	switch (relo->kind) {
-	case BPF_FIELD_BYTE_OFFSET:
+	case BPF_CORE_FIELD_BYTE_OFFSET:
 		*val = byte_off;
 		if (!bitfield) {
 			*field_sz = byte_sz;
 			*type_id = field_type_id;
 		}
 		break;
-	case BPF_FIELD_BYTE_SIZE:
+	case BPF_CORE_FIELD_BYTE_SIZE:
 		*val = byte_sz;
 		break;
-	case BPF_FIELD_SIGNED:
+	case BPF_CORE_FIELD_SIGNED:
 		/* enums will be assumed unsigned */
 		*val = btf_is_enum(mt) ||
 		       (btf_int_encoding(mt) & BTF_INT_SIGNED);
 		if (validate)
 			*validate = true; /* signedness is never ambiguous */
 		break;
-	case BPF_FIELD_LSHIFT_U64:
-#if __BYTE_ORDER == __LITTLE_ENDIAN
+	case BPF_CORE_FIELD_LSHIFT_U64:
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
 		*val = 64 - (bit_off + bit_sz - byte_off  * 8);
 #else
 		*val = (8 - byte_sz) * 8 + (bit_off - byte_off * 8);
 #endif
 		break;
-	case BPF_FIELD_RSHIFT_U64:
+	case BPF_CORE_FIELD_RSHIFT_U64:
 		*val = 64 - bit_sz;
 		if (validate)
 			*validate = true; /* right shift is never ambiguous */
 		break;
-	case BPF_FIELD_EXISTS:
+	case BPF_CORE_FIELD_EXISTS:
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -683,10 +708,14 @@ static int bpf_core_calc_field_relo(const char *prog_name,

 static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo,
 				   const struct bpf_core_spec *spec,
-				   __u32 *val)
+				   __u32 *val, bool *validate)
 {
 	__s64 sz;

+	/* by default, always check expected value in bpf_insn */
+	if (validate)
+		*validate = true;
+
 	/* type-based relos return zero when target type is not found */
 	if (!spec) {
 		*val = 0;
@@ -694,20 +723,25 @@ static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo,
 	}

 	switch (relo->kind) {
-	case BPF_TYPE_ID_TARGET:
+	case BPF_CORE_TYPE_ID_TARGET:
 		*val = spec->root_type_id;
+		/* type ID, embedded in bpf_insn, might change during linking,
+		 * so enforcing it is pointless
+		 */
+		if (validate)
+			*validate = false;
 		break;
-	case BPF_TYPE_EXISTS:
+	case BPF_CORE_TYPE_EXISTS:
 		*val = 1;
 		break;
-	case BPF_TYPE_SIZE:
+	case BPF_CORE_TYPE_SIZE:
 		sz = btf__resolve_size(spec->btf, spec->root_type_id);
 		if (sz < 0)
 			return -EINVAL;
 		*val = sz;
 		break;
-	case BPF_TYPE_ID_LOCAL:
-	/* BPF_TYPE_ID_LOCAL is handled specially and shouldn't get here */
+	case BPF_CORE_TYPE_ID_LOCAL:
+	/* BPF_CORE_TYPE_ID_LOCAL is handled specially and shouldn't get here */
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -723,13 +757,13 @@ static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo,
 	const struct btf_enum *e;

 	switch (relo->kind) {
-	case BPF_ENUMVAL_EXISTS:
+	case BPF_CORE_ENUMVAL_EXISTS:
 		*val = spec ? 1 : 0;
 		break;
-	case BPF_ENUMVAL_VALUE:
+	case BPF_CORE_ENUMVAL_VALUE:
 		if (!spec)
 			return -EUCLEAN; /* request instruction poisoning */
-		t = btf__type_by_id(spec->btf, spec->spec[0].type_id);
+		t = btf_type_by_id(spec->btf, spec->spec[0].type_id);
 		e = btf_enum(t) + spec->spec[0].idx;
 		*val = e->val;
 		break;
@@ -740,31 +774,6 @@ static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo,
 	return 0;
 }

-struct bpf_core_relo_res
-{
-	/* expected value in the instruction, unless validate == false */
-	__u32 orig_val;
-	/* new value that needs to be patched up to */
-	__u32 new_val;
-	/* relocation unsuccessful, poison instruction, but don't fail load */
-	bool poison;
-	/* some relocations can't be validated against orig_val */
-	bool validate;
-	/* for field byte offset relocations or the forms:
-	 *     *(T *)(rX + <off>) = rY
-	 *     rX = *(T *)(rY + <off>),
-	 * we remember original and resolved field size to adjust direct
-	 * memory loads of pointers and integers; this is necessary for 32-bit
-	 * host kernel architectures, but also allows to automatically
-	 * relocate fields that were resized from, e.g., u32 to u64, etc.
-	 */
-	bool fail_memsz_adjust;
-	__u32 orig_sz;
-	__u32 orig_type_id;
-	__u32 new_sz;
-	__u32 new_type_id;
-};
-
 /* Calculate original and target relocation values, given local and target
 * specs and relocation kind. These values are calculated for each candidate.
 * If there are multiple candidates, resulting values should all be consistent
@@ -805,8 +814,8 @@ static int bpf_core_calc_relo(const char *prog_name,
 		if (res->orig_sz != res->new_sz) {
 			const struct btf_type *orig_t, *new_t;

-			orig_t = btf__type_by_id(local_spec->btf, res->orig_type_id);
-			new_t = btf__type_by_id(targ_spec->btf, res->new_type_id);
+			orig_t = btf_type_by_id(local_spec->btf, res->orig_type_id);
+			new_t = btf_type_by_id(targ_spec->btf, res->new_type_id);

 			/* There are two use cases in which it's safe to
 			 * adjust load/store's mem size:
@@ -835,8 +844,8 @@ static int bpf_core_calc_relo(const char *prog_name,
 			res->fail_memsz_adjust = true;
 		}
 	} else if (core_relo_is_type_based(relo->kind)) {
-		err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val);
-		err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val);
+		err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val, &res->validate);
+		err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val, NULL);
 	} else if (core_relo_is_enumval_based(relo->kind)) {
 		err = bpf_core_calc_enumval_relo(relo, local_spec, &res->orig_val);
 		err = err ?: bpf_core_calc_enumval_relo(relo, targ_spec, &res->new_val);
@@ -916,9 +925,9 @@ static int insn_bytes_to_bpf_size(__u32 sz)
 * 5. *(T *)(rX + <off>) = rY, where T is one of {u8, u16, u32, u64};
 * 6. *(T *)(rX + <off>) = <imm>, where T is one of {u8, u16, u32, u64}.
 */
-static int bpf_core_patch_insn(const char *prog_name, struct bpf_insn *insn,
-			       int insn_idx, const struct bpf_core_relo *relo,
-			       int relo_idx, const struct bpf_core_relo_res *res)
+int bpf_core_patch_insn(const char *prog_name, struct bpf_insn *insn,
+			int insn_idx, const struct bpf_core_relo *relo,
+			int relo_idx, const struct bpf_core_relo_res *res)
 {
 	__u32 orig_val, new_val;
 	__u8 class;
@@ -1045,55 +1054,70 @@ poison:
 * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>,
 * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b
 */
-static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec)
+int bpf_core_format_spec(char *buf, size_t buf_sz, const struct bpf_core_spec *spec)
 {
 	const struct btf_type *t;
 	const struct btf_enum *e;
 	const char *s;
 	__u32 type_id;
-	int i;
+	int i, len = 0;
+
+#define append_buf(fmt, args...)				\
+	({							\
+		int r;						\
+		r = snprintf(buf, buf_sz, fmt, ##args);		\
+		len += r;					\
+		if (r >= buf_sz)				\
+			r = buf_sz;				\
+		buf += r;					\
+		buf_sz -= r;					\
+	})

 	type_id = spec->root_type_id;
-	t = btf__type_by_id(spec->btf, type_id);
+	t = btf_type_by_id(spec->btf, type_id);
 	s = btf__name_by_offset(spec->btf, t->name_off);

-	libbpf_print(level, "[%u] %s %s", type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s);
+	append_buf("<%s> [%u] %s %s",
+		   core_relo_kind_str(spec->relo_kind),
+		   type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s);

 	if (core_relo_is_type_based(spec->relo_kind))
-		return;
+		return len;

 	if (core_relo_is_enumval_based(spec->relo_kind)) {
 		t = skip_mods_and_typedefs(spec->btf, type_id, NULL);
 		e = btf_enum(t) + spec->raw_spec[0];
 		s = btf__name_by_offset(spec->btf, e->name_off);

-		libbpf_print(level, "::%s = %u", s, e->val);
-		return;
+		append_buf("::%s = %u", s, e->val);
+		return len;
 	}

 	if (core_relo_is_field_based(spec->relo_kind)) {
 		for (i = 0; i < spec->len; i++) {
 			if (spec->spec[i].name)
-				libbpf_print(level, ".%s", spec->spec[i].name);
+				append_buf(".%s", spec->spec[i].name);
 			else if (i > 0 || spec->spec[i].idx > 0)
-				libbpf_print(level, "[%u]", spec->spec[i].idx);
+				append_buf("[%u]", spec->spec[i].idx);
 		}

-		libbpf_print(level, " (");
+		append_buf(" (");
 		for (i = 0; i < spec->raw_len; i++)
-			libbpf_print(level, "%s%d", i == 0 ? "" : ":", spec->raw_spec[i]);
+			append_buf("%s%d", i == 0 ? "" : ":", spec->raw_spec[i]);

 		if (spec->bit_offset % 8)
-			libbpf_print(level, " @ offset %u.%u)",
-				     spec->bit_offset / 8, spec->bit_offset % 8);
+			append_buf(" @ offset %u.%u)", spec->bit_offset / 8, spec->bit_offset % 8);
 		else
-			libbpf_print(level, " @ offset %u)", spec->bit_offset / 8);
-		return;
+			append_buf(" @ offset %u)", spec->bit_offset / 8);
+		return len;
 	}
+
+	return len;
+#undef append_buf
 }

 /*
- * CO-RE relocate single instruction.
+ * Calculate CO-RE relocation target result.
 *
 * The outline and important points of the algorithm:
 * 1. For given local type, find corresponding candidate target types.
@@ -1142,100 +1166,97 @@ static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec)
 *    between multiple relocations for the same type ID and is updated as some
 *    of the candidates are pruned due to structural incompatibility.
 */
-int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn,
-			     int insn_idx,
-			     const struct bpf_core_relo *relo,
-			     int relo_idx,
-			     const struct btf *local_btf,
-			     struct bpf_core_cand_list *cands)
+int bpf_core_calc_relo_insn(const char *prog_name,
+			    const struct bpf_core_relo *relo,
+			    int relo_idx,
+			    const struct btf *local_btf,
+			    struct bpf_core_cand_list *cands,
+			    struct bpf_core_spec *specs_scratch,
+			    struct bpf_core_relo_res *targ_res)
 {
-	struct bpf_core_spec local_spec, cand_spec, targ_spec = {};
-	struct bpf_core_relo_res cand_res, targ_res;
+	struct bpf_core_spec *local_spec = &specs_scratch[0];
+	struct bpf_core_spec *cand_spec = &specs_scratch[1];
+	struct bpf_core_spec *targ_spec = &specs_scratch[2];
+	struct bpf_core_relo_res cand_res;
 	const struct btf_type *local_type;
 	const char *local_name;
 	__u32 local_id;
-	const char *spec_str;
+	char spec_buf[256];
 	int i, j, err;

 	local_id = relo->type_id;
-	local_type = btf__type_by_id(local_btf, local_id);
-	if (!local_type)
-		return -EINVAL;
-
+	local_type = btf_type_by_id(local_btf, local_id);
 	local_name = btf__name_by_offset(local_btf, local_type->name_off);
 	if (!local_name)
 		return -EINVAL;

-	spec_str = btf__name_by_offset(local_btf, relo->access_str_off);
-	if (str_is_empty(spec_str))
-		return -EINVAL;
-
-	err = bpf_core_parse_spec(local_btf, local_id, spec_str, relo->kind, &local_spec);
+	err = bpf_core_parse_spec(prog_name, local_btf, relo, local_spec);
 	if (err) {
+		const char *spec_str;
+
+		spec_str = btf__name_by_offset(local_btf, relo->access_str_off);
 		pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n",
 			prog_name, relo_idx, local_id, btf_kind_str(local_type),
 			str_is_empty(local_name) ? "<anon>" : local_name,
-			spec_str, err);
+			spec_str ?: "<?>", err);
 		return -EINVAL;
 	}

-	pr_debug("prog '%s': relo #%d: kind <%s> (%d), spec is ", prog_name,
-		 relo_idx, core_relo_kind_str(relo->kind), relo->kind);
-	bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec);
-	libbpf_print(LIBBPF_DEBUG, "\n");
+	bpf_core_format_spec(spec_buf, sizeof(spec_buf), local_spec);
+	pr_debug("prog '%s': relo #%d: %s\n", prog_name, relo_idx, spec_buf);

 	/* TYPE_ID_LOCAL relo is special and doesn't need candidate search */
-	if (relo->kind == BPF_TYPE_ID_LOCAL) {
-		targ_res.validate = true;
-		targ_res.poison = false;
-		targ_res.orig_val = local_spec.root_type_id;
-		targ_res.new_val = local_spec.root_type_id;
-		goto patch_insn;
+	if (relo->kind == BPF_CORE_TYPE_ID_LOCAL) {
+		/* bpf_insn's imm value could get out of sync during linking */
+		memset(targ_res, 0, sizeof(*targ_res));
+		targ_res->validate = false;
+		targ_res->poison = false;
+		targ_res->orig_val = local_spec->root_type_id;
+		targ_res->new_val = local_spec->root_type_id;
+		return 0;
 	}

 	/* libbpf doesn't support candidate search for anonymous types */
-	if (str_is_empty(spec_str)) {
+	if (str_is_empty(local_name)) {
 		pr_warn("prog '%s': relo #%d: <%s> (%d) relocation doesn't support anonymous types\n",
 			prog_name, relo_idx, core_relo_kind_str(relo->kind), relo->kind);
 		return -EOPNOTSUPP;
 	}

-
 	for (i = 0, j = 0; i < cands->len; i++) {
-		err = bpf_core_spec_match(&local_spec, cands->cands[i].btf,
-					  cands->cands[i].id, &cand_spec);
+		err = bpf_core_spec_match(local_spec, cands->cands[i].btf,
+					  cands->cands[i].id, cand_spec);
 		if (err < 0) {
-			pr_warn("prog '%s': relo #%d: error matching candidate #%d ",
-				prog_name, relo_idx, i);
-			bpf_core_dump_spec(LIBBPF_WARN, &cand_spec);
-			libbpf_print(LIBBPF_WARN, ": %d\n", err);
+			bpf_core_format_spec(spec_buf, sizeof(spec_buf), cand_spec);
+			pr_warn("prog '%s': relo #%d: error matching candidate #%d %s: %d\n ",
+				prog_name, relo_idx, i, spec_buf, err);
 			return err;
 		}

-		pr_debug("prog '%s': relo #%d: %s candidate #%d ", prog_name,
-			 relo_idx, err == 0 ? "non-matching" : "matching", i);
-		bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec);
-		libbpf_print(LIBBPF_DEBUG, "\n");
+		bpf_core_format_spec(spec_buf, sizeof(spec_buf), cand_spec);
+		pr_debug("prog '%s': relo #%d: %s candidate #%d %s\n", prog_name,
+			 relo_idx, err == 0 ? "non-matching" : "matching", i, spec_buf);

 		if (err == 0)
 			continue;

-		err = bpf_core_calc_relo(prog_name, relo, relo_idx, &local_spec, &cand_spec, &cand_res);
+		err = bpf_core_calc_relo(prog_name, relo, relo_idx, local_spec, cand_spec, &cand_res);
 		if (err)
 			return err;

 		if (j == 0) {
-			targ_res = cand_res;
-			targ_spec = cand_spec;
-		} else if (cand_spec.bit_offset != targ_spec.bit_offset) {
+			*targ_res = cand_res;
+			*targ_spec = *cand_spec;
+		} else if (cand_spec->bit_offset != targ_spec->bit_offset) {
 			/* if there are many field relo candidates, they
 			 * should all resolve to the same bit offset
 			 */
 			pr_warn("prog '%s': relo #%d: field offset ambiguity: %u != %u\n",
-				prog_name, relo_idx, cand_spec.bit_offset,
-				targ_spec.bit_offset);
+				prog_name, relo_idx, cand_spec->bit_offset,
+				targ_spec->bit_offset);
 			return -EINVAL;
-		} else if (cand_res.poison != targ_res.poison || cand_res.new_val != targ_res.new_val) {
+		} else if (cand_res.poison != targ_res->poison ||
+			   cand_res.new_val != targ_res->new_val) {
 			/* all candidates should result in the same relocation
 			 * decision and value, otherwise it's dangerous to
 			 * proceed due to ambiguity
@@ -1243,7 +1264,7 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn,
 			pr_warn("prog '%s': relo #%d: relocation decision ambiguity: %s %u != %s %u\n",
 				prog_name, relo_idx,
 				cand_res.poison ? "failure" : "success", cand_res.new_val,
-				targ_res.poison ? "failure" : "success", targ_res.new_val);
+				targ_res->poison ? "failure" : "success", targ_res->new_val);
 			return -EINVAL;
 		}

@@ -1251,7 +1272,7 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn,
 	}

 	/*
-	 * For BPF_FIELD_EXISTS relo or when used BPF program has field
+	 * For BPF_CORE_FIELD_EXISTS relo or when used BPF program has field
 	 * existence checks or kernel version/config checks, it's expected
 	 * that we might not find any candidates. In this case, if field
 	 * wasn't found in any candidate, the list of candidates shouldn't
@@ -1277,19 +1298,10 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn,
 			 prog_name, relo_idx);

 		/* calculate single target relo result explicitly */
-		err = bpf_core_calc_relo(prog_name, relo, relo_idx, &local_spec, NULL, &targ_res);
+		err = bpf_core_calc_relo(prog_name, relo, relo_idx, local_spec, NULL, targ_res);
 		if (err)
 			return err;
 	}

-patch_insn:
-	/* bpf_core_patch_insn() should know how to handle missing targ_spec */
-	err = bpf_core_patch_insn(prog_name, insn, insn_idx, relo, relo_idx, &targ_res);
-	if (err) {
-		pr_warn("prog '%s': relo #%d: failed to patch insn #%u: %d\n",
-			prog_name, relo_idx, relo->insn_off / 8, err);
-		return -EINVAL;
-	}
-
 	return 0;
 }
--- a/src/relo_core.h
+++ b/src/relo_core.h
@@ -4,81 +4,10 @@
 #ifndef __RELO_CORE_H
 #define __RELO_CORE_H

-/* bpf_core_relo_kind encodes which aspect of captured field/type/enum value
- * has to be adjusted by relocations.
- */
-enum bpf_core_relo_kind {
-	BPF_FIELD_BYTE_OFFSET = 0,	/* field byte offset */
-	BPF_FIELD_BYTE_SIZE = 1,	/* field size in bytes */
-	BPF_FIELD_EXISTS = 2,		/* field existence in target kernel */
-	BPF_FIELD_SIGNED = 3,		/* field signedness (0 - unsigned, 1 - signed) */
-	BPF_FIELD_LSHIFT_U64 = 4,	/* bitfield-specific left bitshift */
-	BPF_FIELD_RSHIFT_U64 = 5,	/* bitfield-specific right bitshift */
-	BPF_TYPE_ID_LOCAL = 6,		/* type ID in local BPF object */
-	BPF_TYPE_ID_TARGET = 7,		/* type ID in target kernel */
-	BPF_TYPE_EXISTS = 8,		/* type existence in target kernel */
-	BPF_TYPE_SIZE = 9,		/* type size in bytes */
-	BPF_ENUMVAL_EXISTS = 10,	/* enum value existence in target kernel */
-	BPF_ENUMVAL_VALUE = 11,		/* enum value integer value */
-};
-
-/* The minimum bpf_core_relo checked by the loader
- *
- * CO-RE relocation captures the following data:
- * - insn_off - instruction offset (in bytes) within a BPF program that needs
- *   its insn->imm field to be relocated with actual field info;
- * - type_id - BTF type ID of the "root" (containing) entity of a relocatable
- *   type or field;
- * - access_str_off - offset into corresponding .BTF string section. String
- *   interpretation depends on specific relocation kind:
- *     - for field-based relocations, string encodes an accessed field using
- *     a sequence of field and array indices, separated by colon (:). It's
- *     conceptually very close to LLVM's getelementptr ([0]) instruction's
- *     arguments for identifying offset to a field.
- *     - for type-based relocations, strings is expected to be just "0";
- *     - for enum value-based relocations, string contains an index of enum
- *     value within its enum type;
- *
- * Example to provide a better feel.
- *
- *   struct sample {
- *       int a;
- *       struct {
- *           int b[10];
- *       };
- *   };
- *
- *   struct sample *s = ...;
- *   int x = &s->a;     // encoded as "0:0" (a is field #0)
- *   int y = &s->b[5];  // encoded as "0:1:0:5" (anon struct is field #1,
- *                      // b is field #0 inside anon struct, accessing elem #5)
- *   int z = &s[10]->b; // encoded as "10:1" (ptr is used as an array)
- *
- * type_id for all relocs in this example  will capture BTF type id of
- * `struct sample`.
- *
- * Such relocation is emitted when using __builtin_preserve_access_index()
- * Clang built-in, passing expression that captures field address, e.g.:
- *
- * bpf_probe_read(&dst, sizeof(dst),
- *		  __builtin_preserve_access_index(&src->a.b.c));
- *
- * In this case Clang will emit field relocation recording necessary data to
- * be able to find offset of embedded `a.b.c` field within `src` struct.
- *
- *   [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction
- */
-struct bpf_core_relo {
-	__u32   insn_off;
-	__u32   type_id;
-	__u32   access_str_off;
-	enum bpf_core_relo_kind kind;
-};
+#include <linux/bpf.h>

 struct bpf_core_cand {
 	const struct btf *btf;
-	const struct btf_type *t;
-	const char *name;
 	__u32 id;
 };

@@ -88,13 +17,77 @@ struct bpf_core_cand_list {
 	int len;
 };

-int bpf_core_apply_relo_insn(const char *prog_name,
-			     struct bpf_insn *insn, int insn_idx,
-			     const struct bpf_core_relo *relo, int relo_idx,
-			     const struct btf *local_btf,
-			     struct bpf_core_cand_list *cands);
+#define BPF_CORE_SPEC_MAX_LEN 64
+
+/* represents BPF CO-RE field or array element accessor */
+struct bpf_core_accessor {
+	__u32 type_id;		/* struct/union type or array element type */
+	__u32 idx;		/* field index or array index */
+	const char *name;	/* field name or NULL for array accessor */
+};
+
+struct bpf_core_spec {
+	const struct btf *btf;
+	/* high-level spec: named fields and array indices only */
+	struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN];
+	/* original unresolved (no skip_mods_or_typedefs) root type ID */
+	__u32 root_type_id;
+	/* CO-RE relocation kind */
+	enum bpf_core_relo_kind relo_kind;
+	/* high-level spec length */
+	int len;
+	/* raw, low-level spec: 1-to-1 with accessor spec string */
+	int raw_spec[BPF_CORE_SPEC_MAX_LEN];
+	/* raw spec length */
+	int raw_len;
+	/* field bit offset represented by spec */
+	__u32 bit_offset;
+};
+
+struct bpf_core_relo_res {
+	/* expected value in the instruction, unless validate == false */
+	__u32 orig_val;
+	/* new value that needs to be patched up to */
+	__u32 new_val;
+	/* relocation unsuccessful, poison instruction, but don't fail load */
+	bool poison;
+	/* some relocations can't be validated against orig_val */
+	bool validate;
+	/* for field byte offset relocations or the forms:
+	 *     *(T *)(rX + <off>) = rY
+	 *     rX = *(T *)(rY + <off>),
+	 * we remember original and resolved field size to adjust direct
+	 * memory loads of pointers and integers; this is necessary for 32-bit
+	 * host kernel architectures, but also allows to automatically
+	 * relocate fields that were resized from, e.g., u32 to u64, etc.
+	 */
+	bool fail_memsz_adjust;
+	__u32 orig_sz;
+	__u32 orig_type_id;
+	__u32 new_sz;
+	__u32 new_type_id;
+};
+
 int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
 			      const struct btf *targ_btf, __u32 targ_id);

 size_t bpf_core_essential_name_len(const char *name);
+
+int bpf_core_calc_relo_insn(const char *prog_name,
+			    const struct bpf_core_relo *relo, int relo_idx,
+			    const struct btf *local_btf,
+			    struct bpf_core_cand_list *cands,
+			    struct bpf_core_spec *specs_scratch,
+			    struct bpf_core_relo_res *targ_res);
+
+int bpf_core_patch_insn(const char *prog_name, struct bpf_insn *insn,
+			int insn_idx, const struct bpf_core_relo *relo,
+			int relo_idx, const struct bpf_core_relo_res *res);
+
+int bpf_core_parse_spec(const char *prog_name, const struct btf *btf,
+		        const struct bpf_core_relo *relo,
+		        struct bpf_core_spec *spec);
+
+int bpf_core_format_spec(char *buf, size_t buf_sz, const struct bpf_core_spec *spec);
+
 #endif
--- a/src/skel_internal.h
+++ b/src/skel_internal.h
@@ -3,9 +3,29 @@
 #ifndef __SKEL_INTERNAL_H
 #define __SKEL_INTERNAL_H

+#ifdef __KERNEL__
+#include <linux/fdtable.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/slab.h>
+#include <linux/bpf.h>
+#else
 #include <unistd.h>
 #include <sys/syscall.h>
 #include <sys/mman.h>
+#include <stdlib.h>
+#include "bpf.h"
+#endif
+
+#ifndef __NR_bpf
+# if defined(__mips__) && defined(_ABIO32)
+#  define __NR_bpf 4355
+# elif defined(__mips__) && defined(_ABIN32)
+#  define __NR_bpf 6319
+# elif defined(__mips__) && defined(_ABI64)
+#  define __NR_bpf 5315
+# endif
+#endif

 /* This file is a base header for auto-generated *.lskel.h files.
 * Its contents will change and may become part of auto-generation in the future.
@@ -15,24 +35,23 @@
 * requested during loader program generation.
 */
 struct bpf_map_desc {
-	union {
-		/* input for the loader prog */
-		struct {
-			__aligned_u64 initial_value;
-			__u32 max_entries;
-		};
-		/* output of the loader prog */
-		struct {
-			int map_fd;
-		};
-	};
+	/* output of the loader prog */
+	int map_fd;
+	/* input for the loader prog */
+	__u32 max_entries;
+	__aligned_u64 initial_value;
 };
 struct bpf_prog_desc {
 	int prog_fd;
 };

+enum {
+	BPF_SKEL_KERNEL = (1ULL << 0),
+};
+
 struct bpf_loader_ctx {
-	size_t sz;
+	__u32 sz;
+	__u32 flags;
 	__u32 log_level;
 	__u32 log_size;
 	__u64 log_buf;
@@ -47,12 +66,144 @@ struct bpf_load_and_run_opts {
 	const char *errstr;
 };

+long bpf_sys_bpf(__u32 cmd, void *attr, __u32 attr_size);
+
 static inline int skel_sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr,
 			  unsigned int size)
 {
+#ifdef __KERNEL__
+	return bpf_sys_bpf(cmd, attr, size);
+#else
 	return syscall(__NR_bpf, cmd, attr, size);
+#endif
 }

+#ifdef __KERNEL__
+static inline int close(int fd)
+{
+	return close_fd(fd);
+}
+
+static inline void *skel_alloc(size_t size)
+{
+	struct bpf_loader_ctx *ctx = kzalloc(size, GFP_KERNEL);
+
+	if (!ctx)
+		return NULL;
+	ctx->flags |= BPF_SKEL_KERNEL;
+	return ctx;
+}
+
+static inline void skel_free(const void *p)
+{
+	kfree(p);
+}
+
+/* skel->bss/rodata maps are populated the following way:
+ *
+ * For kernel use:
+ * skel_prep_map_data() allocates kernel memory that kernel module can directly access.
+ * Generated lskel stores the pointer in skel->rodata and in skel->maps.rodata.initial_value.
+ * The loader program will perform probe_read_kernel() from maps.rodata.initial_value.
+ * skel_finalize_map_data() sets skel->rodata to point to actual value in a bpf map and
+ * does maps.rodata.initial_value = ~0ULL to signal skel_free_map_data() that kvfree
+ * is not nessary.
+ *
+ * For user space:
+ * skel_prep_map_data() mmaps anon memory into skel->rodata that can be accessed directly.
+ * Generated lskel stores the pointer in skel->rodata and in skel->maps.rodata.initial_value.
+ * The loader program will perform copy_from_user() from maps.rodata.initial_value.
+ * skel_finalize_map_data() remaps bpf array map value from the kernel memory into
+ * skel->rodata address.
+ *
+ * The "bpftool gen skeleton -L" command generates lskel.h that is suitable for
+ * both kernel and user space. The generated loader program does
+ * either bpf_probe_read_kernel() or bpf_copy_from_user() from initial_value
+ * depending on bpf_loader_ctx->flags.
+ */
+static inline void skel_free_map_data(void *p, __u64 addr, size_t sz)
+{
+	if (addr != ~0ULL)
+		kvfree(p);
+	/* When addr == ~0ULL the 'p' points to
+	 * ((struct bpf_array *)map)->value. See skel_finalize_map_data.
+	 */
+}
+
+static inline void *skel_prep_map_data(const void *val, size_t mmap_sz, size_t val_sz)
+{
+	void *addr;
+
+	addr = kvmalloc(val_sz, GFP_KERNEL);
+	if (!addr)
+		return NULL;
+	memcpy(addr, val, val_sz);
+	return addr;
+}
+
+static inline void *skel_finalize_map_data(__u64 *init_val, size_t mmap_sz, int flags, int fd)
+{
+	struct bpf_map *map;
+	void *addr = NULL;
+
+	kvfree((void *) (long) *init_val);
+	*init_val = ~0ULL;
+
+	/* At this point bpf_load_and_run() finished without error and
+	 * 'fd' is a valid bpf map FD. All sanity checks below should succeed.
+	 */
+	map = bpf_map_get(fd);
+	if (IS_ERR(map))
+		return NULL;
+	if (map->map_type != BPF_MAP_TYPE_ARRAY)
+		goto out;
+	addr = ((struct bpf_array *)map)->value;
+	/* the addr stays valid, since FD is not closed */
+out:
+	bpf_map_put(map);
+	return addr;
+}
+
+#else
+
+static inline void *skel_alloc(size_t size)
+{
+	return calloc(1, size);
+}
+
+static inline void skel_free(void *p)
+{
+	free(p);
+}
+
+static inline void skel_free_map_data(void *p, __u64 addr, size_t sz)
+{
+	munmap(p, sz);
+}
+
+static inline void *skel_prep_map_data(const void *val, size_t mmap_sz, size_t val_sz)
+{
+	void *addr;
+
+	addr = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE,
+		    MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+	if (addr == (void *) -1)
+		return NULL;
+	memcpy(addr, val, val_sz);
+	return addr;
+}
+
+static inline void *skel_finalize_map_data(__u64 *init_val, size_t mmap_sz, int flags, int fd)
+{
+	void *addr;
+
+	addr = mmap((void *) (long) *init_val, mmap_sz, flags, MAP_SHARED | MAP_FIXED, fd, 0);
+	if (addr == (void *) -1)
+		return NULL;
+	return addr;
+}
+#endif
+
 static inline int skel_closenz(int fd)
 {
 	if (fd > 0)
@@ -60,23 +211,94 @@ static inline int skel_closenz(int fd)
 	return -EINVAL;
 }

+#ifndef offsetofend
+#define offsetofend(TYPE, MEMBER) \
+	(offsetof(TYPE, MEMBER)	+ sizeof((((TYPE *)0)->MEMBER)))
+#endif
+
+static inline int skel_map_create(enum bpf_map_type map_type,
+				  const char *map_name,
+				  __u32 key_size,
+				  __u32 value_size,
+				  __u32 max_entries)
+{
+	const size_t attr_sz = offsetofend(union bpf_attr, map_extra);
+	union bpf_attr attr;
+
+	memset(&attr, 0, attr_sz);
+
+	attr.map_type = map_type;
+	strncpy(attr.map_name, map_name, sizeof(attr.map_name));
+	attr.key_size = key_size;
+	attr.value_size = value_size;
+	attr.max_entries = max_entries;
+
+	return skel_sys_bpf(BPF_MAP_CREATE, &attr, attr_sz);
+}
+
+static inline int skel_map_update_elem(int fd, const void *key,
+				       const void *value, __u64 flags)
+{
+	const size_t attr_sz = offsetofend(union bpf_attr, flags);
+	union bpf_attr attr;
+
+	memset(&attr, 0, attr_sz);
+	attr.map_fd = fd;
+	attr.key = (long) key;
+	attr.value = (long) value;
+	attr.flags = flags;
+
+	return skel_sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, attr_sz);
+}
+
+static inline int skel_raw_tracepoint_open(const char *name, int prog_fd)
+{
+	const size_t attr_sz = offsetofend(union bpf_attr, raw_tracepoint.prog_fd);
+	union bpf_attr attr;
+
+	memset(&attr, 0, attr_sz);
+	attr.raw_tracepoint.name = (long) name;
+	attr.raw_tracepoint.prog_fd = prog_fd;
+
+	return skel_sys_bpf(BPF_RAW_TRACEPOINT_OPEN, &attr, attr_sz);
+}
+
+static inline int skel_link_create(int prog_fd, int target_fd,
+				   enum bpf_attach_type attach_type)
+{
+	const size_t attr_sz = offsetofend(union bpf_attr, link_create.iter_info_len);
+	union bpf_attr attr;
+
+	memset(&attr, 0, attr_sz);
+	attr.link_create.prog_fd = prog_fd;
+	attr.link_create.target_fd = target_fd;
+	attr.link_create.attach_type = attach_type;
+
+	return skel_sys_bpf(BPF_LINK_CREATE, &attr, attr_sz);
+}
+
+#ifdef __KERNEL__
+#define set_err
+#else
+#define set_err err = -errno
+#endif
+
 static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts)
 {
 	int map_fd = -1, prog_fd = -1, key = 0, err;
 	union bpf_attr attr;

-	map_fd = bpf_create_map_name(BPF_MAP_TYPE_ARRAY, "__loader.map", 4,
-				     opts->data_sz, 1, 0);
+	err = map_fd = skel_map_create(BPF_MAP_TYPE_ARRAY, "__loader.map", 4, opts->data_sz, 1);
 	if (map_fd < 0) {
 		opts->errstr = "failed to create loader map";
-		err = -errno;
+		set_err;
 		goto out;
 	}

-	err = bpf_map_update_elem(map_fd, &key, opts->data, 0);
+	err = skel_map_update_elem(map_fd, &key, opts->data, 0);
 	if (err < 0) {
 		opts->errstr = "failed to update loader map";
-		err = -errno;
+		set_err;
 		goto out;
 	}

@@ -91,10 +313,10 @@ static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts)
 	attr.log_size = opts->ctx->log_size;
 	attr.log_buf = opts->ctx->log_buf;
 	attr.prog_flags = BPF_F_SLEEPABLE;
-	prog_fd = skel_sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
+	err = prog_fd = skel_sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
 	if (prog_fd < 0) {
 		opts->errstr = "failed to load loader prog";
-		err = -errno;
+		set_err;
 		goto out;
 	}

@@ -106,10 +328,12 @@ static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts)
 	if (err < 0 || (int)attr.test.retval < 0) {
 		opts->errstr = "failed to execute loader prog";
 		if (err < 0) {
-			err = -errno;
+			set_err;
 		} else {
 			err = (int)attr.test.retval;
+#ifndef __KERNEL__
 			errno = -err;
+#endif
 		}
 		goto out;
 	}
--- a/src/usdt.bpf.h
+++ b/src/usdt.bpf.h
@@ -0,0 +1,259 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#ifndef __USDT_BPF_H__
+#define __USDT_BPF_H__
+
+#include <linux/errno.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+/* Below types and maps are internal implementation details of libbpf's USDT
+ * support and are subjects to change. Also, bpf_usdt_xxx() API helpers should
+ * be considered an unstable API as well and might be adjusted based on user
+ * feedback from using libbpf's USDT support in production.
+ */
+
+/* User can override BPF_USDT_MAX_SPEC_CNT to change default size of internal
+ * map that keeps track of USDT argument specifications. This might be
+ * necessary if there are a lot of USDT attachments.
+ */
+#ifndef BPF_USDT_MAX_SPEC_CNT
+#define BPF_USDT_MAX_SPEC_CNT 256
+#endif
+/* User can override BPF_USDT_MAX_IP_CNT to change default size of internal
+ * map that keeps track of IP (memory address) mapping to USDT argument
+ * specification.
+ * Note, if kernel supports BPF cookies, this map is not used and could be
+ * resized all the way to 1 to save a bit of memory.
+ */
+#ifndef BPF_USDT_MAX_IP_CNT
+#define BPF_USDT_MAX_IP_CNT (4 * BPF_USDT_MAX_SPEC_CNT)
+#endif
+/* We use BPF CO-RE to detect support for BPF cookie from BPF side. This is
+ * the only dependency on CO-RE, so if it's undesirable, user can override
+ * BPF_USDT_HAS_BPF_COOKIE to specify whether to BPF cookie is supported or not.
+ */
+#ifndef BPF_USDT_HAS_BPF_COOKIE
+#define BPF_USDT_HAS_BPF_COOKIE \
+	bpf_core_enum_value_exists(enum bpf_func_id___usdt, BPF_FUNC_get_attach_cookie___usdt)
+#endif
+
+enum __bpf_usdt_arg_type {
+	BPF_USDT_ARG_CONST,
+	BPF_USDT_ARG_REG,
+	BPF_USDT_ARG_REG_DEREF,
+};
+
+struct __bpf_usdt_arg_spec {
+	/* u64 scalar interpreted depending on arg_type, see below */
+	__u64 val_off;
+	/* arg location case, see bpf_udst_arg() for details */
+	enum __bpf_usdt_arg_type arg_type;
+	/* offset of referenced register within struct pt_regs */
+	short reg_off;
+	/* whether arg should be interpreted as signed value */
+	bool arg_signed;
+	/* number of bits that need to be cleared and, optionally,
+	 * sign-extended to cast arguments that are 1, 2, or 4 bytes
+	 * long into final 8-byte u64/s64 value returned to user
+	 */
+	char arg_bitshift;
+};
+
+/* should match USDT_MAX_ARG_CNT in usdt.c exactly */
+#define BPF_USDT_MAX_ARG_CNT 12
+struct __bpf_usdt_spec {
+	struct __bpf_usdt_arg_spec args[BPF_USDT_MAX_ARG_CNT];
+	__u64 usdt_cookie;
+	short arg_cnt;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, BPF_USDT_MAX_SPEC_CNT);
+	__type(key, int);
+	__type(value, struct __bpf_usdt_spec);
+} __bpf_usdt_specs SEC(".maps") __weak;
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, BPF_USDT_MAX_IP_CNT);
+	__type(key, long);
+	__type(value, __u32);
+} __bpf_usdt_ip_to_spec_id SEC(".maps") __weak;
+
+/* don't rely on user's BPF code to have latest definition of bpf_func_id */
+enum bpf_func_id___usdt {
+	BPF_FUNC_get_attach_cookie___usdt = 0xBAD, /* value doesn't matter */
+};
+
+static __always_inline
+int __bpf_usdt_spec_id(struct pt_regs *ctx)
+{
+	if (!BPF_USDT_HAS_BPF_COOKIE) {
+		long ip = PT_REGS_IP(ctx);
+		int *spec_id_ptr;
+
+		spec_id_ptr = bpf_map_lookup_elem(&__bpf_usdt_ip_to_spec_id, &ip);
+		return spec_id_ptr ? *spec_id_ptr : -ESRCH;
+	}
+
+	return bpf_get_attach_cookie(ctx);
+}
+
+/* Return number of USDT arguments defined for currently traced USDT. */
+__weak __hidden
+int bpf_usdt_arg_cnt(struct pt_regs *ctx)
+{
+	struct __bpf_usdt_spec *spec;
+	int spec_id;
+
+	spec_id = __bpf_usdt_spec_id(ctx);
+	if (spec_id < 0)
+		return -ESRCH;
+
+	spec = bpf_map_lookup_elem(&__bpf_usdt_specs, &spec_id);
+	if (!spec)
+		return -ESRCH;
+
+	return spec->arg_cnt;
+}
+
+/* Fetch USDT argument #*arg_num* (zero-indexed) and put its value into *res.
+ * Returns 0 on success; negative error, otherwise.
+ * On error *res is guaranteed to be set to zero.
+ */
+__weak __hidden
+int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res)
+{
+	struct __bpf_usdt_spec *spec;
+	struct __bpf_usdt_arg_spec *arg_spec;
+	unsigned long val;
+	int err, spec_id;
+
+	*res = 0;
+
+	spec_id = __bpf_usdt_spec_id(ctx);
+	if (spec_id < 0)
+		return -ESRCH;
+
+	spec = bpf_map_lookup_elem(&__bpf_usdt_specs, &spec_id);
+	if (!spec)
+		return -ESRCH;
+
+	if (arg_num >= BPF_USDT_MAX_ARG_CNT || arg_num >= spec->arg_cnt)
+		return -ENOENT;
+
+	arg_spec = &spec->args[arg_num];
+	switch (arg_spec->arg_type) {
+	case BPF_USDT_ARG_CONST:
+		/* Arg is just a constant ("-4@$-9" in USDT arg spec).
+		 * value is recorded in arg_spec->val_off directly.
+		 */
+		val = arg_spec->val_off;
+		break;
+	case BPF_USDT_ARG_REG:
+		/* Arg is in a register (e.g, "8@%rax" in USDT arg spec),
+		 * so we read the contents of that register directly from
+		 * struct pt_regs. To keep things simple user-space parts
+		 * record offsetof(struct pt_regs, <regname>) in arg_spec->reg_off.
+		 */
+		err = bpf_probe_read_kernel(&val, sizeof(val), (void *)ctx + arg_spec->reg_off);
+		if (err)
+			return err;
+		break;
+	case BPF_USDT_ARG_REG_DEREF:
+		/* Arg is in memory addressed by register, plus some offset
+		 * (e.g., "-4@-1204(%rbp)" in USDT arg spec). Register is
+		 * identified like with BPF_USDT_ARG_REG case, and the offset
+		 * is in arg_spec->val_off. We first fetch register contents
+		 * from pt_regs, then do another user-space probe read to
+		 * fetch argument value itself.
+		 */
+		err = bpf_probe_read_kernel(&val, sizeof(val), (void *)ctx + arg_spec->reg_off);
+		if (err)
+			return err;
+		err = bpf_probe_read_user(&val, sizeof(val), (void *)val + arg_spec->val_off);
+		if (err)
+			return err;
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+		val >>= arg_spec->arg_bitshift;
+#endif
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* cast arg from 1, 2, or 4 bytes to final 8 byte size clearing
+	 * necessary upper arg_bitshift bits, with sign extension if argument
+	 * is signed
+	 */
+	val <<= arg_spec->arg_bitshift;
+	if (arg_spec->arg_signed)
+		val = ((long)val) >> arg_spec->arg_bitshift;
+	else
+		val = val >> arg_spec->arg_bitshift;
+	*res = val;
+	return 0;
+}
+
+/* Retrieve user-specified cookie value provided during attach as
+ * bpf_usdt_opts.usdt_cookie. This serves the same purpose as BPF cookie
+ * returned by bpf_get_attach_cookie(). Libbpf's support for USDT is itself
+ * utilizing BPF cookies internally, so user can't use BPF cookie directly
+ * for USDT programs and has to use bpf_usdt_cookie() API instead.
+ */
+__weak __hidden
+long bpf_usdt_cookie(struct pt_regs *ctx)
+{
+	struct __bpf_usdt_spec *spec;
+	int spec_id;
+
+	spec_id = __bpf_usdt_spec_id(ctx);
+	if (spec_id < 0)
+		return 0;
+
+	spec = bpf_map_lookup_elem(&__bpf_usdt_specs, &spec_id);
+	if (!spec)
+		return 0;
+
+	return spec->usdt_cookie;
+}
+
+/* we rely on ___bpf_apply() and ___bpf_narg() macros already defined in bpf_tracing.h */
+#define ___bpf_usdt_args0() ctx
+#define ___bpf_usdt_args1(x) ___bpf_usdt_args0(), ({ long _x; bpf_usdt_arg(ctx, 0, &_x); (void *)_x; })
+#define ___bpf_usdt_args2(x, args...) ___bpf_usdt_args1(args), ({ long _x; bpf_usdt_arg(ctx, 1, &_x); (void *)_x; })
+#define ___bpf_usdt_args3(x, args...) ___bpf_usdt_args2(args), ({ long _x; bpf_usdt_arg(ctx, 2, &_x); (void *)_x; })
+#define ___bpf_usdt_args4(x, args...) ___bpf_usdt_args3(args), ({ long _x; bpf_usdt_arg(ctx, 3, &_x); (void *)_x; })
+#define ___bpf_usdt_args5(x, args...) ___bpf_usdt_args4(args), ({ long _x; bpf_usdt_arg(ctx, 4, &_x); (void *)_x; })
+#define ___bpf_usdt_args6(x, args...) ___bpf_usdt_args5(args), ({ long _x; bpf_usdt_arg(ctx, 5, &_x); (void *)_x; })
+#define ___bpf_usdt_args7(x, args...) ___bpf_usdt_args6(args), ({ long _x; bpf_usdt_arg(ctx, 6, &_x); (void *)_x; })
+#define ___bpf_usdt_args8(x, args...) ___bpf_usdt_args7(args), ({ long _x; bpf_usdt_arg(ctx, 7, &_x); (void *)_x; })
+#define ___bpf_usdt_args9(x, args...) ___bpf_usdt_args8(args), ({ long _x; bpf_usdt_arg(ctx, 8, &_x); (void *)_x; })
+#define ___bpf_usdt_args10(x, args...) ___bpf_usdt_args9(args), ({ long _x; bpf_usdt_arg(ctx, 9, &_x); (void *)_x; })
+#define ___bpf_usdt_args11(x, args...) ___bpf_usdt_args10(args), ({ long _x; bpf_usdt_arg(ctx, 10, &_x); (void *)_x; })
+#define ___bpf_usdt_args12(x, args...) ___bpf_usdt_args11(args), ({ long _x; bpf_usdt_arg(ctx, 11, &_x); (void *)_x; })
+#define ___bpf_usdt_args(args...) ___bpf_apply(___bpf_usdt_args, ___bpf_narg(args))(args)
+
+/*
+ * BPF_USDT serves the same purpose for USDT handlers as BPF_PROG for
+ * tp_btf/fentry/fexit BPF programs and BPF_KPROBE for kprobes.
+ * Original struct pt_regs * context is preserved as 'ctx' argument.
+ */
+#define BPF_USDT(name, args...)						    \
+name(struct pt_regs *ctx);						    \
+static __attribute__((always_inline)) typeof(name(0))			    \
+____##name(struct pt_regs *ctx, ##args);				    \
+typeof(name(0)) name(struct pt_regs *ctx)				    \
+{									    \
+        _Pragma("GCC diagnostic push")					    \
+        _Pragma("GCC diagnostic ignored \"-Wint-conversion\"")		    \
+        return ____##name(___bpf_usdt_args(args));			    \
+        _Pragma("GCC diagnostic pop")					    \
+}									    \
+static __attribute__((always_inline)) typeof(name(0))			    \
+____##name(struct pt_regs *ctx, ##args)
+
+#endif /* __USDT_BPF_H__ */
--- a/src/usdt.c
+++ b/src/usdt.c
--- a/src/xsk.c
+++ b/src/xsk.c
@@ -35,6 +35,11 @@
 #include "libbpf_internal.h"
 #include "xsk.h"

+/* entire xsk.h and xsk.c is going away in libbpf 1.0, so ignore all internal
+ * uses of deprecated APIs
+ */
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+
 #ifndef SOL_XDP
 #define SOL_XDP 283
 #endif
@@ -300,7 +305,7 @@ int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area,
 	if (!umem)
 		return -ENOMEM;

-	umem->fd = socket(AF_XDP, SOCK_RAW, 0);
+	umem->fd = socket(AF_XDP, SOCK_RAW | SOCK_CLOEXEC, 0);
 	if (umem->fd < 0) {
 		err = -errno;
 		goto out_umem_alloc;
@@ -364,8 +369,6 @@ int xsk_umem__create_v0_0_2(struct xsk_umem **umem_ptr, void *umem_area,
 static enum xsk_prog get_xsk_prog(void)
 {
 	enum xsk_prog detected = XSK_PROG_FALLBACK;
-	struct bpf_load_program_attr prog_attr;
-	struct bpf_create_map_attr map_attr;
 	__u32 size_out, retval, duration;
 	char data_in = 0, data_out;
 	struct bpf_insn insns[] = {
@@ -375,27 +378,15 @@ static enum xsk_prog get_xsk_prog(void)
 		BPF_EMIT_CALL(BPF_FUNC_redirect_map),
 		BPF_EXIT_INSN(),
 	};
-	int prog_fd, map_fd, ret;
+	int prog_fd, map_fd, ret, insn_cnt = ARRAY_SIZE(insns);

-	memset(&map_attr, 0, sizeof(map_attr));
-	map_attr.map_type = BPF_MAP_TYPE_XSKMAP;
-	map_attr.key_size = sizeof(int);
-	map_attr.value_size = sizeof(int);
-	map_attr.max_entries = 1;
-
-	map_fd = bpf_create_map_xattr(&map_attr);
+	map_fd = bpf_map_create(BPF_MAP_TYPE_XSKMAP, NULL, sizeof(int), sizeof(int), 1, NULL);
 	if (map_fd < 0)
 		return detected;

 	insns[0].imm = map_fd;

-	memset(&prog_attr, 0, sizeof(prog_attr));
-	prog_attr.prog_type = BPF_PROG_TYPE_XDP;
-	prog_attr.insns = insns;
-	prog_attr.insns_cnt = ARRAY_SIZE(insns);
-	prog_attr.license = "GPL";
-
-	prog_fd = bpf_load_program_xattr(&prog_attr, NULL, 0);
+	prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "GPL", insns, insn_cnt, NULL);
 	if (prog_fd < 0) {
 		close(map_fd);
 		return detected;
@@ -490,15 +481,18 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)
 		BPF_EMIT_CALL(BPF_FUNC_redirect_map),
 		BPF_EXIT_INSN(),
 	};
-	size_t insns_cnt[] = {sizeof(prog) / sizeof(struct bpf_insn),
-			      sizeof(prog_redirect_flags) / sizeof(struct bpf_insn),
+	size_t insns_cnt[] = {ARRAY_SIZE(prog),
+			      ARRAY_SIZE(prog_redirect_flags),
 	};
 	struct bpf_insn *progs[] = {prog, prog_redirect_flags};
 	enum xsk_prog option = get_xsk_prog();
+	LIBBPF_OPTS(bpf_prog_load_opts, opts,
+		.log_buf = log_buf,
+		.log_size = log_buf_size,
+	);

-	prog_fd = bpf_load_program(BPF_PROG_TYPE_XDP, progs[option], insns_cnt[option],
-				   "LGPL-2.1 or BSD-2-Clause", 0, log_buf,
-				   log_buf_size);
+	prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "LGPL-2.1 or BSD-2-Clause",
+				progs[option], insns_cnt[option], &opts);
 	if (prog_fd < 0) {
 		pr_warn("BPF log buffer:\n%s", log_buf);
 		return prog_fd;
@@ -549,13 +543,12 @@ static int xsk_get_max_queues(struct xsk_socket *xsk)
 	struct ifreq ifr = {};
 	int fd, err, ret;

-	fd = socket(AF_LOCAL, SOCK_DGRAM, 0);
+	fd = socket(AF_LOCAL, SOCK_DGRAM | SOCK_CLOEXEC, 0);
 	if (fd < 0)
 		return -errno;

 	ifr.ifr_data = (void *)&channels;
-	memcpy(ifr.ifr_name, ctx->ifname, IFNAMSIZ - 1);
-	ifr.ifr_name[IFNAMSIZ - 1] = '\0';
+	libbpf_strlcpy(ifr.ifr_name, ctx->ifname, IFNAMSIZ);
 	err = ioctl(fd, SIOCETHTOOL, &ifr);
 	if (err && errno != EOPNOTSUPP) {
 		ret = -errno;
@@ -590,8 +583,8 @@ static int xsk_create_bpf_maps(struct xsk_socket *xsk)
 	if (max_queues < 0)
 		return max_queues;

-	fd = bpf_create_map_name(BPF_MAP_TYPE_XSKMAP, "xsks_map",
-				 sizeof(int), sizeof(int), max_queues, 0);
+	fd = bpf_map_create(BPF_MAP_TYPE_XSKMAP, "xsks_map",
+			    sizeof(int), sizeof(int), max_queues, NULL);
 	if (fd < 0)
 		return fd;

@@ -725,14 +718,12 @@ static int xsk_link_lookup(int ifindex, __u32 *prog_id, int *link_fd)

 static bool xsk_probe_bpf_link(void)
 {
-	DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts,
-			    .flags = XDP_FLAGS_SKB_MODE);
-	struct bpf_load_program_attr prog_attr;
+	LIBBPF_OPTS(bpf_link_create_opts, opts, .flags = XDP_FLAGS_SKB_MODE);
 	struct bpf_insn insns[2] = {
 		BPF_MOV64_IMM(BPF_REG_0, XDP_PASS),
 		BPF_EXIT_INSN()
 	};
-	int prog_fd, link_fd = -1;
+	int prog_fd, link_fd = -1, insn_cnt = ARRAY_SIZE(insns);
 	int ifindex_lo = 1;
 	bool ret = false;
 	int err;
@@ -744,13 +735,7 @@ static bool xsk_probe_bpf_link(void)
 	if (link_fd >= 0)
 		return true;

-	memset(&prog_attr, 0, sizeof(prog_attr));
-	prog_attr.prog_type = BPF_PROG_TYPE_XDP;
-	prog_attr.insns = insns;
-	prog_attr.insns_cnt = ARRAY_SIZE(insns);
-	prog_attr.license = "GPL";
-
-	prog_fd = bpf_load_program_xattr(&prog_attr, NULL, 0);
+	prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "GPL", insns, insn_cnt, NULL);
 	if (prog_fd < 0)
 		return ret;

@@ -782,8 +767,7 @@ static int xsk_create_xsk_struct(int ifindex, struct xsk_socket *xsk)
 	}

 	ctx->ifindex = ifindex;
-	memcpy(ctx->ifname, ifname, IFNAMSIZ -1);
-	ctx->ifname[IFNAMSIZ - 1] = 0;
+	libbpf_strlcpy(ctx->ifname, ifname, IFNAMSIZ);

 	xsk->ctx = ctx;
 	xsk->ctx->has_bpf_link = xsk_probe_bpf_link();
@@ -965,8 +949,7 @@ static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk,
 	ctx->refcount = 1;
 	ctx->umem = umem;
 	ctx->queue_id = queue_id;
-	memcpy(ctx->ifname, ifname, IFNAMSIZ - 1);
-	ctx->ifname[IFNAMSIZ - 1] = '\0';
+	libbpf_strlcpy(ctx->ifname, ifname, IFNAMSIZ);

 	ctx->fill = fill;
 	ctx->comp = comp;
@@ -1046,7 +1029,7 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
 	}

 	if (umem->refcount++ > 0) {
-		xsk->fd = socket(AF_XDP, SOCK_RAW, 0);
+		xsk->fd = socket(AF_XDP, SOCK_RAW | SOCK_CLOEXEC, 0);
 		if (xsk->fd < 0) {
 			err = -errno;
 			goto out_xsk_alloc;
@@ -1210,12 +1193,23 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,

 int xsk_umem__delete(struct xsk_umem *umem)
 {
+	struct xdp_mmap_offsets off;
+	int err;
+
 	if (!umem)
 		return 0;

 	if (umem->refcount)
 		return -EBUSY;

+	err = xsk_get_mmap_offsets(umem->fd, &off);
+	if (!err && umem->fill_save && umem->comp_save) {
+		munmap(umem->fill_save->ring - off.fr.desc,
+		       off.fr.desc + umem->config.fill_size * sizeof(__u64));
+		munmap(umem->comp_save->ring - off.cr.desc,
+		       off.cr.desc + umem->config.comp_size * sizeof(__u64));
+	}
+
 	close(umem->fd);
 	free(umem);

--- a/src/xsk.h
+++ b/src/xsk.h
@@ -23,6 +23,12 @@
 extern "C" {
 #endif

+/* This whole API has been deprecated and moved to libxdp that can be found at
+ * https://github.com/xdp-project/xdp-tools. The APIs are exactly the same so
+ * it should just be linking with libxdp instead of libbpf for this set of
+ * functionality. If not, please submit a bug report on the aforementioned page.
+ */
+
 /* Load-Acquire Store-Release barriers used by the XDP socket
 * library. The following macros should *NOT* be considered part of
 * the xsk.h API, and is subject to change anytime.
@@ -245,8 +251,10 @@ static inline __u64 xsk_umem__add_offset_to_addr(__u64 addr)
 	return xsk_umem__extract_addr(addr) + xsk_umem__extract_offset(addr);
 }

-LIBBPF_API int xsk_umem__fd(const struct xsk_umem *umem);
-LIBBPF_API int xsk_socket__fd(const struct xsk_socket *xsk);
+LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp")
+int xsk_umem__fd(const struct xsk_umem *umem);
+LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp")
+int xsk_socket__fd(const struct xsk_socket *xsk);

 #define XSK_RING_CONS__DEFAULT_NUM_DESCS      2048
 #define XSK_RING_PROD__DEFAULT_NUM_DESCS      2048
@@ -263,10 +271,10 @@ struct xsk_umem_config {
 	__u32 flags;
 };

-LIBBPF_API int xsk_setup_xdp_prog(int ifindex,
-				  int *xsks_map_fd);
-LIBBPF_API int xsk_socket__update_xskmap(struct xsk_socket *xsk,
-					 int xsks_map_fd);
+LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp")
+int xsk_setup_xdp_prog(int ifindex, int *xsks_map_fd);
+LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp")
+int xsk_socket__update_xskmap(struct xsk_socket *xsk, int xsks_map_fd);

 /* Flags for the libbpf_flags field. */
 #define XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD (1 << 0)
@@ -280,40 +288,46 @@ struct xsk_socket_config {
 };

 /* Set config to NULL to get the default configuration. */
-LIBBPF_API int xsk_umem__create(struct xsk_umem **umem,
-				void *umem_area, __u64 size,
-				struct xsk_ring_prod *fill,
-				struct xsk_ring_cons *comp,
-				const struct xsk_umem_config *config);
-LIBBPF_API int xsk_umem__create_v0_0_2(struct xsk_umem **umem,
-				       void *umem_area, __u64 size,
-				       struct xsk_ring_prod *fill,
-				       struct xsk_ring_cons *comp,
-				       const struct xsk_umem_config *config);
-LIBBPF_API int xsk_umem__create_v0_0_4(struct xsk_umem **umem,
-				       void *umem_area, __u64 size,
-				       struct xsk_ring_prod *fill,
-				       struct xsk_ring_cons *comp,
-				       const struct xsk_umem_config *config);
-LIBBPF_API int xsk_socket__create(struct xsk_socket **xsk,
-				  const char *ifname, __u32 queue_id,
-				  struct xsk_umem *umem,
-				  struct xsk_ring_cons *rx,
-				  struct xsk_ring_prod *tx,
-				  const struct xsk_socket_config *config);
-LIBBPF_API int
-xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
-			  const char *ifname,
-			  __u32 queue_id, struct xsk_umem *umem,
-			  struct xsk_ring_cons *rx,
-			  struct xsk_ring_prod *tx,
-			  struct xsk_ring_prod *fill,
-			  struct xsk_ring_cons *comp,
-			  const struct xsk_socket_config *config);
+LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp")
+int xsk_umem__create(struct xsk_umem **umem,
+		     void *umem_area, __u64 size,
+		     struct xsk_ring_prod *fill,
+		     struct xsk_ring_cons *comp,
+		     const struct xsk_umem_config *config);
+LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp")
+int xsk_umem__create_v0_0_2(struct xsk_umem **umem,
+			    void *umem_area, __u64 size,
+			    struct xsk_ring_prod *fill,
+			    struct xsk_ring_cons *comp,
+			    const struct xsk_umem_config *config);
+LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp")
+int xsk_umem__create_v0_0_4(struct xsk_umem **umem,
+			    void *umem_area, __u64 size,
+			    struct xsk_ring_prod *fill,
+			    struct xsk_ring_cons *comp,
+			    const struct xsk_umem_config *config);
+LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp")
+int xsk_socket__create(struct xsk_socket **xsk,
+		       const char *ifname, __u32 queue_id,
+		       struct xsk_umem *umem,
+		       struct xsk_ring_cons *rx,
+		       struct xsk_ring_prod *tx,
+		       const struct xsk_socket_config *config);
+LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp")
+int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
+			      const char *ifname,
+			      __u32 queue_id, struct xsk_umem *umem,
+			      struct xsk_ring_cons *rx,
+			      struct xsk_ring_prod *tx,
+			      struct xsk_ring_prod *fill,
+			      struct xsk_ring_cons *comp,
+			      const struct xsk_socket_config *config);

 /* Returns 0 for success and -EBUSY if the umem is still in use. */
-LIBBPF_API int xsk_umem__delete(struct xsk_umem *umem);
-LIBBPF_API void xsk_socket__delete(struct xsk_socket *xsk);
+LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp")
+int xsk_umem__delete(struct xsk_umem *umem);
+LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp")
+void xsk_socket__delete(struct xsk_socket *xsk);

 #ifdef __cplusplus
 } /* extern "C" */
--- a/travis-ci/diffs/.do_not_use_dot_patch_here
+++ b/travis-ci/diffs/.do_not_use_dot_patch_here
--- a/travis-ci/diffs/001-fix-oob-write-in-test_verifier.diff
+++ b/travis-ci/diffs/001-fix-oob-write-in-test_verifier.diff
@@ -0,0 +1,35 @@
+From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
+To: bpf@vger.kernel.org
+Cc: Alexei Starovoitov <ast@kernel.org>,
+	Daniel Borkmann <daniel@iogearbox.net>,
+	Andrii Nakryiko <andrii@kernel.org>
+Subject: [PATCH bpf-next] selftests/bpf: Fix OOB write in test_verifier
+Date: Tue, 14 Dec 2021 07:18:00 +0530	[thread overview]
+Message-ID: <20211214014800.78762-1-memxor@gmail.com> (raw)
+
+The commit referenced below added fixup_map_timer support (to create a
+BPF map containing timers), but failed to increase the size of the
+map_fds array, leading to out of bounds write. Fix this by changing
+MAX_NR_MAPS to 22.
+
+Fixes: e60e6962c503 ("selftests/bpf: Add tests for restricted helpers")
+Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
+---
+ tools/testing/selftests/bpf/test_verifier.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
+index ad5d30bafd93..33e2ecb3bef9 100644
+--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
+@@ -54,7 +54,7 @@
+ #define MAX_INSNS	BPF_MAXINSNS
+ #define MAX_TEST_INSNS	1000000
+ #define MAX_FIXUPS	8
+-#define MAX_NR_MAPS	21
+#define MAX_NR_MAPS	22
+ #define MAX_TEST_RUNS	8
+ #define POINTER_VALUE	0xcafe4all
+ #define TEST_DATA_LEN	64
+--
+2.34.1
--- a/travis-ci/rootfs/mkrootfs_arch.sh
+++ b/travis-ci/rootfs/mkrootfs_arch.sh
@@ -80,6 +80,7 @@ packages=(
 	# selftests test_progs dependencies.
 	binutils
 	elfutils
+	ethtool
 	glibc
 	iproute2
 	# selftests test_verifier dependencies.
@@ -100,59 +101,7 @@ rm -rf "$root/var/lib/pacman/sync/"
 # We don't need any documentation.
 rm -rf "$root/usr/share/{doc,help,man,texinfo}"

-chroot "${root}" /bin/busybox --install
+"$(dirname "$0")"/mkrootfs_tweak.sh "$root"

-cat > "$root/etc/inittab" << "EOF"
-::sysinit:/etc/init.d/rcS
-::ctrlaltdel:/sbin/reboot
-::shutdown:/sbin/swapoff -a
-::shutdown:/bin/umount -a -r
-::restart:/sbin/init
-EOF
-chmod 644 "$root/etc/inittab"
-
-mkdir -m 755 "$root/etc/init.d" "$root/etc/rcS.d"
-cat > "$root/etc/rcS.d/S10-mount" << "EOF"
-#!/bin/sh
-
-set -eux
-
-/bin/mount proc /proc -t proc
-
-# Mount devtmpfs if not mounted
-if [[ -z $(/bin/mount -l -t devtmpfs) ]]; then
-	/bin/mount devtmpfs /dev -t devtmpfs
-fi
-
-/bin/mount sysfs /sys -t sysfs
-/bin/mount bpffs /sys/fs/bpf -t bpf
-/bin/mount debugfs /sys/kernel/debug -t debugfs
-
-echo 'Listing currently mounted file systems'
-/bin/mount
-EOF
-chmod 755 "$root/etc/rcS.d/S10-mount"
-
-cat > "$root/etc/rcS.d/S40-network" << "EOF"
-#!/bin/sh
-
-set -eux
-
-ip link set lo up
-EOF
-chmod 755 "$root/etc/rcS.d/S40-network"
-
-cat > "$root/etc/init.d/rcS" << "EOF"
-#!/bin/sh
-
-set -eux
-
-for path in /etc/rcS.d/S*; do
-	[ -x "$path" ] && "$path"
-done
-EOF
-chmod 755 "$root/etc/init.d/rcS"
-
-chmod 755 "$root"
 tar -C "$root" -c . | zstd -T0 -19 -o "$NAME"
 chmod 644 "$NAME"
--- a/travis-ci/rootfs/mkrootfs_debian.sh
+++ b/travis-ci/rootfs/mkrootfs_debian.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+# This script builds a Debian root filesystem image for testing libbpf in a
+# virtual machine. Requires debootstrap >= 1.0.95 and zstd.
+
+# Use e.g. ./mkrootfs_debian.sh --arch=s390x to generate a rootfs for a
+# foreign architecture. Requires configured binfmt_misc, e.g. using
+# Debian/Ubuntu's qemu-user-binfmt package or
+# https://github.com/multiarch/qemu-user-static.
+
+set -e -u -x -o pipefail
+
+# Check whether we are root now in order to avoid confusing errors later.
+if [ "$(id -u)" != 0 ]; then
+	echo "$0 must run as root" >&2
+	exit 1
+fi
+
+# Create a working directory and schedule its deletion.
+root=$(mktemp -d -p "$PWD")
+trap 'rm -r "$root"' EXIT
+
+# Install packages.
+packages=binutils,busybox,elfutils,ethtool,iproute2,libcap2,libelf1,strace,zlib1g
+debootstrap --include="$packages" --variant=minbase "$@" bookworm "$root"
+
+# Remove the init scripts (tests use their own). Also remove various
+# unnecessary files in order to save space.
+rm -rf \
+	"$root"/etc/rcS.d \
+	"$root"/usr/share/{doc,info,locale,man,zoneinfo} \
+	"$root"/var/cache/apt/archives/* \
+	"$root"/var/lib/apt/lists/*
+
+# Save some more space by removing coreutils - the tests use busybox. Before
+# doing that, delete the buggy postrm script, which uses the rm command.
+rm -f "$root/var/lib/dpkg/info/coreutils.postrm"
+chroot "$root" dpkg --remove --force-remove-essential coreutils
+
+# Apply common tweaks.
+"$(dirname "$0")"/mkrootfs_tweak.sh "$root"
+
+# Save the result.
+name="libbpf-vmtest-rootfs-$(date +%Y.%m.%d).tar.zst"
+rm -f "$name"
+tar -C "$root" -c . | zstd -T0 -19 -o "$name"
--- a/travis-ci/rootfs/mkrootfs_tweak.sh
+++ b/travis-ci/rootfs/mkrootfs_tweak.sh
@@ -0,0 +1,61 @@
+#!/bin/bash
+# This script prepares a mounted root filesystem for testing libbpf in a virtual
+# machine.
+set -e -u -x -o pipefail
+root=$1
+shift
+
+chroot "${root}" /bin/busybox --install
+
+cat > "$root/etc/inittab" << "EOF"
+::sysinit:/etc/init.d/rcS
+::ctrlaltdel:/sbin/reboot
+::shutdown:/sbin/swapoff -a
+::shutdown:/bin/umount -a -r
+::restart:/sbin/init
+EOF
+chmod 644 "$root/etc/inittab"
+
+mkdir -m 755 -p "$root/etc/init.d" "$root/etc/rcS.d"
+cat > "$root/etc/rcS.d/S10-mount" << "EOF"
+#!/bin/sh
+
+set -eux
+
+/bin/mount proc /proc -t proc
+
+# Mount devtmpfs if not mounted
+if [[ -z $(/bin/mount -t devtmpfs) ]]; then
+	/bin/mount devtmpfs /dev -t devtmpfs
+fi
+
+/bin/mount sysfs /sys -t sysfs
+/bin/mount bpffs /sys/fs/bpf -t bpf
+/bin/mount debugfs /sys/kernel/debug -t debugfs
+
+echo 'Listing currently mounted file systems'
+/bin/mount
+EOF
+chmod 755 "$root/etc/rcS.d/S10-mount"
+
+cat > "$root/etc/rcS.d/S40-network" << "EOF"
+#!/bin/sh
+
+set -eux
+
+ip link set lo up
+EOF
+chmod 755 "$root/etc/rcS.d/S40-network"
+
+cat > "$root/etc/init.d/rcS" << "EOF"
+#!/bin/sh
+
+set -eux
+
+for path in /etc/rcS.d/S*; do
+	[ -x "$path" ] && "$path"
+done
+EOF
+chmod 755 "$root/etc/init.d/rcS"
+
+chmod 755 "$root"
--- a/travis-ci/rootfs/s390x-self-hosted-builder/README.md
+++ b/travis-ci/rootfs/s390x-self-hosted-builder/README.md
@@ -0,0 +1,107 @@
+# IBM Z self-hosted builder
+
+libbpf CI uses an IBM-provided z15 self-hosted builder. There are no IBM Z
+builds of GitHub (GH) Actions runner, and stable qemu-user has problems with .NET
+apps, so the builder runs the x86_64 runner version with qemu-user built from
+the master branch.
+
+We are currently supporting runners for the following repositories:
+* libbpf/libbpf
+* kernel-patches/bpf
+* kernel-patches/vmtest
+
+Below instructions are directly applicable to libbpf, and require minor
+modifications for kernel-patches repos. Currently, qemu-user-static Docker
+image is shared between all GitHub runners, but separate actions-runner-\*
+service / Docker image is created for each runner type.
+
+## Configuring the builder.
+
+### Install prerequisites.
+
+```
+$ sudo apt install -y docker.io  # Ubuntu
+```
+
+### Add services.
+
+```
+$ sudo cp *.service /etc/systemd/system/
+$ sudo systemctl daemon-reload
+```
+
+### Create a config file.
+
+```
+$ sudo tee /etc/actions-runner-libbpf
+repo=<owner>/<name>
+access_token=<ghp_***>
+```
+
+Access token should have the repo scope, consult
+https://docs.github.com/en/rest/reference/actions#create-a-registration-token-for-a-repository
+for details.
+
+### Autostart the x86_64 emulation support.
+
+This step is important, you would not be able to build docker container
+without having this service running. If container build fails, make sure
+service is running properly.
+
+```
+$ sudo systemctl enable --now qemu-user-static
+```
+
+### Autostart the runner.
+
+```
+$ sudo systemctl enable --now actions-runner-libbpf
+```
+
+## Rebuilding the image
+
+In order to update the `iiilinuxibmcom/actions-runner-libbpf` image, e.g. to
+get the latest OS security fixes, use the following commands:
+
+```
+$ sudo docker build \
+      --pull \
+      -f actions-runner-libbpf.Dockerfile \
+      -t iiilinuxibmcom/actions-runner-libbpf \
+      .
+$ sudo systemctl restart actions-runner-libbpf
+```
+
+## Removing persistent data
+
+The `actions-runner-libbpf` service stores various temporary data, such as
+runner registration information, work directories and logs, in the
+`actions-runner-libbpf` volume. In order to remove it and start from scratch,
+e.g. when upgrading the runner or switching it to a different repository, use
+the following commands:
+
+```
+$ sudo systemctl stop actions-runner-libbpf
+$ sudo docker rm -f actions-runner-libbpf
+$ sudo docker volume rm actions-runner-libbpf
+```
+
+## Troubleshooting
+
+In order to check if service is running, use the following command:
+
+```
+$ sudo systemctl status <service name>
+```
+
+In order to get logs for service:
+
+```
+$ journalctl -u <service name>
+```
+
+In order to check which containers are currently active:
+
+```
+$ sudo docker ps
+```
--- a/travis-ci/rootfs/s390x-self-hosted-builder/actions-runner-libbpf.Dockerfile
+++ b/travis-ci/rootfs/s390x-self-hosted-builder/actions-runner-libbpf.Dockerfile
@@ -0,0 +1,50 @@
+# Self-Hosted IBM Z Github Actions Runner.
+
+# Temporary image: amd64 dependencies.
+FROM amd64/ubuntu:20.04 as ld-prefix
+ENV DEBIAN_FRONTEND=noninteractive
+RUN apt-get update && apt-get -y install ca-certificates libicu66 libssl1.1
+
+# Main image.
+FROM s390x/ubuntu:20.04
+
+# Packages for libbpf testing that are not installed by .github/actions/setup.
+ENV DEBIAN_FRONTEND=noninteractive
+RUN apt-get update && apt-get -y install \
+        bc \
+        bison \
+        cmake \
+        cpu-checker \
+        curl \
+        flex \
+        git \
+        jq \
+        linux-image-generic \
+        qemu-system-s390x \
+        rsync \
+        software-properties-common \
+        sudo \
+        tree
+
+# amd64 dependencies.
+COPY --from=ld-prefix / /usr/x86_64-linux-gnu/
+RUN ln -fs ../lib/x86_64-linux-gnu/ld-linux-x86-64.so.2 /usr/x86_64-linux-gnu/lib64/
+RUN ln -fs /etc/resolv.conf /usr/x86_64-linux-gnu/etc/
+ENV QEMU_LD_PREFIX=/usr/x86_64-linux-gnu
+
+# amd64 Github Actions Runner.
+ARG version=2.285.0
+RUN useradd -m actions-runner
+RUN echo "actions-runner ALL=(ALL) NOPASSWD: ALL" >>/etc/sudoers
+RUN echo "Defaults env_keep += \"DEBIAN_FRONTEND\"" >>/etc/sudoers
+RUN usermod -a -G kvm actions-runner
+USER actions-runner
+ENV USER=actions-runner
+WORKDIR /home/actions-runner
+RUN curl -L https://github.com/actions/runner/releases/download/v${version}/actions-runner-linux-x64-${version}.tar.gz | tar -xz
+VOLUME /home/actions-runner
+
+# Scripts.
+COPY fs/ /
+ENTRYPOINT ["/usr/bin/entrypoint"]
+CMD ["/usr/bin/actions-runner"]
--- a/travis-ci/rootfs/s390x-self-hosted-builder/actions-runner-libbpf.service
+++ b/travis-ci/rootfs/s390x-self-hosted-builder/actions-runner-libbpf.service
@@ -0,0 +1,24 @@
+[Unit]
+Description=Self-Hosted IBM Z Github Actions Runner
+Wants=qemu-user-static
+After=qemu-user-static
+StartLimitIntervalSec=0
+
+[Service]
+Type=simple
+Restart=always
+ExecStart=/usr/bin/docker run \
+              --device=/dev/kvm \
+              --env-file=/etc/actions-runner-libbpf \
+              --init \
+              --interactive \
+              --name=actions-runner-libbpf \
+              --rm \
+              --volume=actions-runner-libbpf:/home/actions-runner \
+              iiilinuxibmcom/actions-runner-libbpf
+ExecStop=/bin/sh -c "docker exec actions-runner-libbpf kill -INT -- -1"
+ExecStop=/bin/sh -c "docker wait actions-runner-libbpf"
+ExecStop=/bin/sh -c "docker rm actions-runner-libbpf"
+
+[Install]
+WantedBy=multi-user.target
--- a/travis-ci/rootfs/s390x-self-hosted-builder/fs/usr/bin/actions-runner
+++ b/travis-ci/rootfs/s390x-self-hosted-builder/fs/usr/bin/actions-runner
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+#
+# Ephemeral runner startup script.
+#
+# Expects the following environment variables:
+#
+# - repo=<owner>/<name>
+# - access_token=<ghp_***>
+#
+
+set -e -u
+
+# Check the cached registration token.
+token_file=registration-token.json
+set +e
+expires_at=$(jq --raw-output .expires_at "$token_file" 2>/dev/null)
+status=$?
+set -e
+if [[ $status -ne 0 || $(date +%s) -ge $(date -d "$expires_at" +%s) ]]; then
+    # Refresh the cached registration token.
+    curl \
+        -X POST \
+        -H "Accept: application/vnd.github.v3+json" \
+        -H "Authorization: token $access_token" \
+        "https://api.github.com/repos/$repo/actions/runners/registration-token" \
+        -o "$token_file"
+fi
+
+# (Re-)register the runner.
+registration_token=$(jq --raw-output .token "$token_file")
+./config.sh remove --token "$registration_token" || true
+./config.sh \
+    --url "https://github.com/$repo" \
+    --token "$registration_token" \
+    --labels z15 \
+    --ephemeral
+
+# Run one job.
+./run.sh
--- a/travis-ci/rootfs/s390x-self-hosted-builder/fs/usr/bin/entrypoint
+++ b/travis-ci/rootfs/s390x-self-hosted-builder/fs/usr/bin/entrypoint
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+#
+# Container entrypoint that waits for all spawned processes.
+#
+
+set -e -u
+
+# /dev/kvm has host permissions, fix it.
+if [ -e /dev/kvm ]; then
+    sudo chown root:kvm /dev/kvm
+fi
+
+# Create a FIFO and start reading from its read end.
+tempdir=$(mktemp -d "/tmp/done.XXXXXXXXXX")
+trap 'rm -r "$tempdir"' EXIT
+done="$tempdir/pipe"
+mkfifo "$done"
+cat "$done" & waiter=$!
+
+# Start the workload. Its descendants will inherit the FIFO's write end.
+status=0
+if [ "$#" -eq 0 ]; then
+    bash 9>"$done" || status=$?
+else
+    "$@" 9>"$done" || status=$?
+fi
+
+# When the workload and all of its descendants exit, the FIFO's write end will
+# be closed and `cat "$done"` will exit. Wait until it happens. This is needed
+# in order to handle SelfUpdater, which the workload may start in background
+# before exiting.
+wait "$waiter"
+
+exit "$status"
--- a/travis-ci/rootfs/s390x-self-hosted-builder/qemu-user-static.service
+++ b/travis-ci/rootfs/s390x-self-hosted-builder/qemu-user-static.service
@@ -0,0 +1,11 @@
+[Unit]
+Description=Support for transparent execution of non-native binaries with QEMU user emulation
+
+[Service]
+Type=oneshot
+# The source code for iiilinuxibmcom/qemu-user-static is at https://github.com/iii-i/qemu-user-static/tree/v6.1.0-1
+# TODO: replace it with multiarch/qemu-user-static once version >6.1 is available
+ExecStart=/usr/bin/docker run --rm --interactive --privileged iiilinuxibmcom/qemu-user-static --reset -p yes
+
+[Install]
+WantedBy=multi-user.target
--- a/travis-ci/vmtest/build_pahole.sh
+++ b/travis-ci/vmtest/build_pahole.sh
@@ -1,34 +0,0 @@
-#!/bin/bash
-
-set -eu
-
-source $(cd $(dirname $0) && pwd)/helpers.sh
-
-CWD=$(pwd)
-REPO_PATH=$1
-PAHOLE_ORIGIN=${PAHOLE_ORIGIN:-https://git.kernel.org/pub/scm/devel/pahole/pahole.git}
-PAHOLE_BRANCH=${PAHOLE_BRANCH:-master}
-
-travis_fold start build_pahole "Building pahole ${PAHOLE_ORIGIN} ${PAHOLE_BRANCH}"
-
-mkdir -p ${REPO_PATH}
-cd ${REPO_PATH}
-git init
-git remote add origin ${PAHOLE_ORIGIN}
-git fetch origin
-git checkout ${PAHOLE_BRANCH}
-
-# temporary work-around to bump pahole to 1.22 before it is officially released
-sed -i 's/DDWARVES_MINOR_VERSION=21/DDWARVES_MINOR_VERSION=22/' CMakeLists.txt
-
-mkdir -p build
-cd build
-cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo -D__LIB=lib ..
-make -j$((4*$(nproc))) all
-sudo make install
-
-export LD_LIBRARY_PATH=${LD_LIBRARY_PATH:-}:/usr/local/lib
-ldd $(which pahole)
-pahole --version
-
-travis_fold end build_pahole
--- a/travis-ci/vmtest/checkout_latest_kernel.sh
+++ b/travis-ci/vmtest/checkout_latest_kernel.sh
@@ -1,54 +0,0 @@
-#!/bin/bash
-
-set -eu
-
-source $(cd $(dirname $0) && pwd)/helpers.sh
-
-CWD=$(pwd)
-LIBBPF_PATH=$(pwd)
-REPO_PATH=$1
-
-KERNEL_ORIGIN=${KERNEL_ORIGIN:-https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git}
-KERNEL_BRANCH=${KERNEL_BRANCH:-CHECKPOINT}
-if [[ "${KERNEL_BRANCH}" = 'CHECKPOINT' ]]; then
-  echo "using CHECKPOINT sha1"
-  LINUX_SHA=$(cat ${LIBBPF_PATH}/CHECKPOINT-COMMIT)
-else
-  echo "using ${KERNEL_BRANCH} sha1"
-  LINUX_SHA=$(git ls-remote ${KERNEL_ORIGIN} ${KERNEL_BRANCH} | awk '{print $1}')
-fi
-SNAPSHOT_URL=${KERNEL_ORIGIN}/snapshot/bpf-next-${LINUX_SHA}.tar.gz
-
-echo REPO_PATH = ${REPO_PATH}
-
-echo KERNEL_ORIGIN = ${KERNEL_ORIGIN}
-echo LINUX_SHA = ${LINUX_SHA}
-echo SNAPSHOT_URL = ${SNAPSHOT_URL}
-
-if [ ! -d "${REPO_PATH}" ]; then
-	echo
-	travis_fold start pull_kernel_srcs "Fetching kernel sources"
-
-	mkdir -p $(dirname "${REPO_PATH}")
-	cd $(dirname "${REPO_PATH}")
-	# attempt to fetch desired bpf-next repo snapshot
-	if wget -nv ${SNAPSHOT_URL} && tar xf bpf-next-${LINUX_SHA}.tar.gz --totals ; then
-		mv bpf-next-${LINUX_SHA} $(basename ${REPO_PATH})
-	else
-		# but fallback to git fetch approach if that fails
-		mkdir -p $(basename ${REPO_PATH})
-		cd $(basename ${REPO_PATH})
-		git init
-		git remote add bpf-next ${KERNEL_ORIGIN}
-		# try shallow clone first
-		git fetch --depth 32 bpf-next
-		# check if desired SHA exists
-		if ! git cat-file -e ${LINUX_SHA}^{commit} ; then
-			# if not, fetch all of bpf-next; slow and painful
-			git fetch bpf-next
-		fi
-		git reset --hard ${LINUX_SHA}
-	fi
-
-	travis_fold end pull_kernel_srcs
-fi
--- a/travis-ci/vmtest/configs/INDEX
+++ b/travis-ci/vmtest/configs/INDEX
@@ -1,8 +0,0 @@
-INDEX	https://libbpf-vmtest.s3-us-west-1.amazonaws.com/x86_64/INDEX
-libbpf-vmtest-rootfs-2020.09.27.tar.zst	https://libbpf-vmtest.s3-us-west-1.amazonaws.com/x86_64/libbpf-vmtest-rootfs-2020.09.27.tar.zst
-vmlinux-4.9.0.zst	https://libbpf-vmtest.s3-us-west-1.amazonaws.com/x86_64/vmlinux-4.9.0.zst
-vmlinux-5.5.0-rc6.zst	https://libbpf-vmtest.s3-us-west-1.amazonaws.com/x86_64/vmlinux-5.5.0-rc6.zst
-vmlinux-5.5.0.zst	https://libbpf-vmtest.s3-us-west-1.amazonaws.com/x86_64/vmlinux-5.5.0.zst
-vmlinuz-5.5.0-rc6	https://libbpf-vmtest.s3-us-west-1.amazonaws.com/x86_64/vmlinuz-5.5.0-rc6
-vmlinuz-5.5.0	https://libbpf-vmtest.s3-us-west-1.amazonaws.com/x86_64/vmlinuz-5.5.0
-vmlinuz-4.9.0	https://libbpf-vmtest.s3-us-west-1.amazonaws.com/x86_64/vmlinuz-4.9.0
--- a/travis-ci/vmtest/configs/blacklist/BLACKLIST-5.5.0
+++ b/travis-ci/vmtest/configs/blacklist/BLACKLIST-5.5.0
@@ -1,3 +1,6 @@
+# This file is not used and is there for historic purposes only.
+# See WHITELIST-5.5.0 instead.
+
 # PERMANENTLY DISABLED
 align			# verifier output format changed
 atomics			# new atomic operations (v5.12+)
--- a/travis-ci/vmtest/configs/blacklist/BLACKLIST-latest.s390x
+++ b/travis-ci/vmtest/configs/blacklist/BLACKLIST-latest.s390x
@@ -0,0 +1,64 @@
+# TEMPORARY
+atomics                                  # attach(add): actual -524 <= expected 0                                      (trampoline)
+bpf_iter_setsockopt                      # JIT does not support calling kernel function                                (kfunc)
+bloom_filter_map                         # failed to find kernel BTF type ID of '__x64_sys_getpgid': -3                (?)
+bpf_tcp_ca                               # JIT does not support calling kernel function                                (kfunc)
+bpf_loop                                 # attaches to __x64_sys_nanosleep
+bpf_mod_race                             # BPF trampoline
+bpf_nf                                   # JIT does not support calling kernel function
+core_read_macros                         # unknown func bpf_probe_read#4                                               (overlapping)
+d_path                                   # failed to auto-attach program 'prog_stat': -524                             (trampoline)
+dummy_st_ops                             # test_run unexpected error: -524 (errno 524)                                 (trampoline)
+fentry_fexit                             # fentry attach failed: -524                                                  (trampoline)
+fentry_test                              # fentry_first_attach unexpected error: -524                                  (trampoline)
+fexit_bpf2bpf                            # freplace_attach_trace unexpected error: -524                                (trampoline)
+fexit_sleep                              # fexit_skel_load fexit skeleton failed                                       (trampoline)
+fexit_stress                             # fexit attach failed prog 0 failed: -524                                     (trampoline)
+fexit_test                               # fexit_first_attach unexpected error: -524                                   (trampoline)
+get_func_args_test	                 # trampoline
+get_func_ip_test                         # get_func_ip_test__attach unexpected error: -524                             (trampoline)
+get_stack_raw_tp                         # user_stack corrupted user stack                                             (no backchain userspace)
+kfree_skb                                # attach fentry unexpected error: -524                                        (trampoline)
+kfunc_call                               # 'bpf_prog_active': not found in kernel BTF                                  (?)
+ksyms_module                             # test_ksyms_module__open_and_load unexpected error: -9                       (?)
+ksyms_module_libbpf                      # JIT does not support calling kernel function                                (kfunc)
+ksyms_module_lskel                       # test_ksyms_module_lskel__open_and_load unexpected error: -9                 (?)
+modify_return                            # modify_return attach failed: -524                                           (trampoline)
+module_attach                            # skel_attach skeleton attach failed: -524                                    (trampoline)
+kprobe_multi_test                        # relies on fentry
+netcnt                                   # failed to load BPF skeleton 'netcnt_prog': -7                               (?)
+probe_user                               # check_kprobe_res wrong kprobe res from probe read                           (?)
+recursion                                # skel_attach unexpected error: -524                                          (trampoline)
+ringbuf                                  # skel_load skeleton load failed                                              (?)
+sk_assign                                # Can't read on server: Invalid argument                                      (?)
+sk_lookup                                # endianness problem
+sk_storage_tracing                       # test_sk_storage_tracing__attach unexpected error: -524                      (trampoline)
+skc_to_unix_sock                         # could not attach BPF object unexpected error: -524                          (trampoline)
+socket_cookie                            # prog_attach unexpected error: -524                                          (trampoline)
+stacktrace_build_id                      # compare_map_keys stackid_hmap vs. stackmap err -2 errno 2                   (?)
+tailcalls                                # tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls      (?)
+task_local_storage                       # failed to auto-attach program 'trace_exit_creds': -524                      (trampoline)
+test_bpffs                               # bpffs test  failed 255                                                      (iterator)
+test_bprm_opts                           # failed to auto-attach program 'secure_exec': -524                           (trampoline)
+test_ima                                 # failed to auto-attach program 'ima': -524                                   (trampoline)
+test_local_storage                       # failed to auto-attach program 'unlink_hook': -524                           (trampoline)
+test_lsm                                 # failed to find kernel BTF type ID of '__x64_sys_setdomainname': -3          (?)
+test_overhead                            # attach_fentry unexpected error: -524                                        (trampoline)
+test_profiler                            # unknown func bpf_probe_read_str#45                                          (overlapping)
+timer                                    # failed to auto-attach program 'test1': -524                                 (trampoline)
+timer_crash                              # trampoline
+timer_mim                                # failed to auto-attach program 'test1': -524                                 (trampoline)
+trace_ext                                # failed to auto-attach program 'test_pkt_md_access_new': -524                (trampoline)
+trace_printk                             # trace_printk__load unexpected error: -2 (errno 2)                           (?)
+trace_vprintk                            # trace_vprintk__open_and_load unexpected error: -9                           (?)
+trampoline_count                         # prog 'prog1': failed to attach: ERROR: strerror_r(-524)=22                  (trampoline)
+verif_stats                              # trace_vprintk__open_and_load unexpected error: -9                           (?)
+vmlinux                                  # failed to auto-attach program 'handle__fentry': -524                        (trampoline)
+xdp_adjust_tail                          # case-128 err 0 errno 28 retval 1 size 128 expect-size 3520                  (?)
+xdp_bonding                              # failed to auto-attach program 'trace_on_entry': -524                        (trampoline)
+xdp_bpf2bpf                              # failed to auto-attach program 'trace_on_entry': -524                        (trampoline)
+map_kptr                                 # failed to open_and_load program: -524 (trampoline)
+bpf_cookie                               # failed to open_and_load program: -524 (trampoline)
+xdp_do_redirect                          # prog_run_max_size unexpected error: -22 (errno 22)
+send_signal                              # intermittently fails to receive signal
+select_reuseport                         # intermittently fails on new s390x setup
--- a/travis-ci/vmtest/configs/config-latest.s390x
+++ b/travis-ci/vmtest/configs/config-latest.s390x
--- a/travis-ci/vmtest/configs/config-latest.x86_64
+++ b/travis-ci/vmtest/configs/config-latest.x86_64
@@ -223,6 +223,8 @@ CONFIG_SHMEM=y
 CONFIG_AIO=y
 CONFIG_IO_URING=y
 CONFIG_ADVISE_SYSCALLS=y
+CONFIG_HAVE_ARCH_USERFAULTFD_WP=y
+CONFIG_HAVE_ARCH_USERFAULTFD_MINOR=y
 CONFIG_MEMBARRIER=y
 CONFIG_KALLSYMS=y
 CONFIG_KALLSYMS_ALL=y
@@ -236,7 +238,7 @@ CONFIG_BPF_JIT_DEFAULT_ON=y
 CONFIG_USERMODE_DRIVER=y
 CONFIG_BPF_PRELOAD=y
 CONFIG_BPF_PRELOAD_UMD=y
-# CONFIG_USERFAULTFD is not set
+CONFIG_USERFAULTFD=y
 CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE=y
 CONFIG_RSEQ=y
 # CONFIG_DEBUG_RSEQ is not set
@@ -975,8 +977,9 @@ CONFIG_NETFILTER_NETLINK=y
 CONFIG_NETFILTER_NETLINK_QUEUE=y
 CONFIG_NETFILTER_NETLINK_LOG=y
 # CONFIG_NETFILTER_NETLINK_OSF is not set
-# CONFIG_NF_CONNTRACK is not set
+CONFIG_NF_CONNTRACK=y
 # CONFIG_NF_LOG_NETDEV is not set
+CONFIG_NETFILTER_SYNPROXY=y
 # CONFIG_NF_TABLES is not set
 CONFIG_NETFILTER_XTABLES=y

@@ -990,6 +993,7 @@ CONFIG_NETFILTER_XTABLES=y
 #
 # CONFIG_NETFILTER_XT_TARGET_AUDIT is not set
 # CONFIG_NETFILTER_XT_TARGET_CLASSIFY is not set
+CONFIG_NETFILTER_XT_TARGET_CT=y
 # CONFIG_NETFILTER_XT_TARGET_HMARK is not set
 # CONFIG_NETFILTER_XT_TARGET_IDLETIMER is not set
 # CONFIG_NETFILTER_XT_TARGET_LOG is not set
@@ -1035,6 +1039,7 @@ CONFIG_NETFILTER_XT_MATCH_BPF=y
 # CONFIG_NETFILTER_XT_MATCH_RECENT is not set
 # CONFIG_NETFILTER_XT_MATCH_SCTP is not set
 # CONFIG_NETFILTER_XT_MATCH_SOCKET is not set
+CONFIG_NETFILTER_XT_MATCH_STATE=y
 CONFIG_NETFILTER_XT_MATCH_STATISTIC=y
 # CONFIG_NETFILTER_XT_MATCH_STRING is not set
 # CONFIG_NETFILTER_XT_MATCH_TCPMSS is not set
@@ -1048,6 +1053,7 @@ CONFIG_NETFILTER_XT_MATCH_STATISTIC=y
 #
 # IP: Netfilter Configuration
 #
+CONFIG_NF_DEFRAG_IPV4=y
 # CONFIG_NF_SOCKET_IPV4 is not set
 # CONFIG_NF_TPROXY_IPV4 is not set
 # CONFIG_NF_DUP_IPV4 is not set
@@ -1058,9 +1064,10 @@ CONFIG_IP_NF_IPTABLES=y
 # CONFIG_IP_NF_MATCH_AH is not set
 # CONFIG_IP_NF_MATCH_ECN is not set
 # CONFIG_IP_NF_MATCH_TTL is not set
-# CONFIG_IP_NF_FILTER is not set
+CONFIG_IP_NF_FILTER=y
+CONFIG_IP_NF_TARGET_SYNPROXY=y
 # CONFIG_IP_NF_MANGLE is not set
-# CONFIG_IP_NF_RAW is not set
+CONFIG_IP_NF_RAW=y
 # CONFIG_IP_NF_SECURITY is not set
 # CONFIG_IP_NF_ARPTABLES is not set
 # end of IP: Netfilter Configuration
@@ -1089,6 +1096,7 @@ CONFIG_IP6_NF_IPTABLES=y
 # CONFIG_IP6_NF_SECURITY is not set
 # end of IPv6: Netfilter Configuration

+CONFIG_NF_DEFRAG_IPV6=y
 CONFIG_BPFILTER=y
 CONFIG_BPFILTER_UMH=m
 # CONFIG_IP_DCCP is not set
@@ -1791,7 +1799,14 @@ CONFIG_BCMA_POSSIBLE=y
 # end of Multifunction device drivers

 # CONFIG_REGULATOR is not set
-# CONFIG_RC_CORE is not set
+CONFIG_RC_CORE=y
+# CONFIG_RC_MAP is not set
+CONFIG_LIRC=y
+CONFIG_BPF_LIRC_MODE2=y
+# CONFIG_RC_DECODERS is not set
+CONFIG_RC_DEVICES=y
+CONFIG_RC_LOOPBACK=y
+# CONFIG_IR_SERIAL is not set
 # CONFIG_MEDIA_CEC_SUPPORT is not set
 # CONFIG_MEDIA_SUPPORT is not set

@@ -2778,6 +2793,7 @@ CONFIG_DEBUG_BUGVERBOSE=y
 # Compile-time checks and compiler options
 #
 CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
 # CONFIG_DEBUG_INFO_REDUCED is not set
 # CONFIG_DEBUG_INFO_COMPRESSED is not set
 # CONFIG_DEBUG_INFO_SPLIT is not set
@@ -2965,6 +2981,7 @@ CONFIG_FUNCTION_GRAPH_TRACER=y
 CONFIG_DYNAMIC_FTRACE=y
 CONFIG_DYNAMIC_FTRACE_WITH_REGS=y
 CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS=y
+CONFIG_FPROBE=y
 # CONFIG_FUNCTION_PROFILER is not set
 # CONFIG_STACK_TRACER is not set
 # CONFIG_IRQSOFF_TRACER is not set
--- a/travis-ci/vmtest/configs/whitelist/WHITELIST-4.9.0
+++ b/travis-ci/vmtest/configs/whitelist/WHITELIST-4.9.0
@@ -2,6 +2,7 @@
 core_retro
 cpu_mask
 hashmap
+legacy_printk
 perf_buffer
 section_names

--- a/travis-ci/vmtest/configs/whitelist/WHITELIST-5.5.0
+++ b/travis-ci/vmtest/configs/whitelist/WHITELIST-5.5.0
@@ -18,6 +18,7 @@ global_data_init
 global_func_args
 hashmap
 l4lb_all
+legacy_printk
 linked_funcs
 linked_maps
 map_lock
@@ -42,13 +43,13 @@ spinlock
 stacktrace_map
 stacktrace_map_raw_tp
 static_linked
-subprogs
 task_fd_query_rawtp
 task_fd_query_tp
 tc_bpf
 tcp_estats
 tcp_rtt
 tp_attach_query
+usdt/urand_pid_attach
 xdp
 xdp_info
 xdp_noinline
--- a/travis-ci/vmtest/helpers.sh
+++ b/travis-ci/vmtest/helpers.sh
@@ -22,3 +22,23 @@ travis_fold() {
    echo -e "$line"
  fi
 }
+
+__print() {
+  local TITLE=""
+  if [[ -n $2 ]]; then
+      TITLE=" title=$2"
+  fi
+  echo "::$1${TITLE}::$3"
+}
+
+# $1 - title
+# $2 - message
+print_error() {
+  __print error $1 $2
+}
+
+# $1 - title
+# $2 - message
+print_notice() {
+  __print notice $1 $2
+}
--- a/travis-ci/vmtest/prepare_selftests.sh
+++ b/travis-ci/vmtest/prepare_selftests.sh
@@ -1,21 +0,0 @@
-#!/bin/bash
-
-set -eu
-
-source $(cd $(dirname $0) && pwd)/helpers.sh
-
-REPO_PATH=${1:-}
-
-if [[ ! -z "$REPO_PATH" ]]; then
-	${VMTEST_ROOT}/checkout_latest_kernel.sh ${REPO_PATH}
-	cd ${REPO_PATH}
-fi
-
-if [[ "${KERNEL}" = 'LATEST' ]]; then
-	travis_fold start build_kernel "Kernel build"
-
-	cp ${VMTEST_ROOT}/configs/latest.config .config
-	make -j $((4*$(nproc))) olddefconfig all >/dev/null
-	travis_fold end build_kernel
-fi
-
--- a/travis-ci/vmtest/run.sh
+++ b/travis-ci/vmtest/run.sh
@@ -1,477 +0,0 @@
-#!/bin/bash
-
-set -uo pipefail
-trap 'exit 2' ERR
-
-source $(cd $(dirname $0) && pwd)/helpers.sh
-
-usage () {
-	USAGE_STRING="usage: $0 [-k KERNELRELEASE|-b DIR] [[-r ROOTFSVERSION] [-fo]|-I] [-Si] [-d DIR] IMG
-       $0 [-k KERNELRELEASE] -l
-       $0 -h
-
-Run "${PROJECT_NAME}" tests in a virtual machine.
-
-This exits with status 0 on success, 1 if the virtual machine ran successfully
-but tests failed, and 2 if we encountered a fatal error.
-
-This script uses sudo to mount and modify the disk image.
-
-Arguments:
-  IMG                 path of virtual machine disk image to create
-
-Versions:
-  -k, --kernel=KERNELRELEASE
-                       kernel release to test. This is a glob pattern; the
-                       newest (sorted by version number) release that matches
-                       the pattern is used (default: newest available release)
-
-  -b, --build DIR      use the kernel built in the given directory. This option
-                       cannot be combined with -k
-
-  -r, --rootfs=ROOTFSVERSION
-                       version of root filesystem to use (default: newest
-                       available version)
-
-Setup:
-  -f, --force          overwrite IMG if it already exists
-
-  -o, --one-shot       one-shot mode. By default, this script saves a clean copy
-                       of the downloaded root filesystem image and vmlinux and
-                       makes a copy (reflinked, when possible) for executing the
-                       virtual machine. This allows subsequent runs to skip
-                       downloading these files. If this option is given, the
-                       root filesystem image and vmlinux are always
-                       re-downloaded and are not saved. This option implies -f
-
-  -s, --setup-cmd      setup commands run on VM boot. Whitespace characters
-                       should be escaped with preceding '\'.
-
-  -I, --skip-image     skip creating the disk image; use the existing one at
-                       IMG. This option cannot be combined with -r, -f, or -o
-
-  -S, --skip-source    skip copying the source files and init scripts
-
-Miscellaneous:
-  -i, --interactive    interactive mode. Boot the virtual machine into an
-                       interactive shell instead of automatically running tests
-
-  -d, --dir=DIR        working directory to use for downloading and caching
-                       files (default: current working directory)
-
-  -l, --list           list available kernel releases instead of running tests.
-                       The list may be filtered with -k
-
-  -h, --help           display this help message and exit"
-
-	case "$1" in
-		out)
-			echo "$USAGE_STRING"
-			exit 0
-			;;
-		err)
-			echo "$USAGE_STRING" >&2
-			exit 2
-			;;
-	esac
-}
-
-TEMP=$(getopt -o 'k:b:r:fos:ISid:lh' --long 'kernel:,build:,rootfs:,force,one-shot,setup-cmd,skip-image,skip-source:,interactive,dir:,list,help' -n "$0" -- "$@")
-eval set -- "$TEMP"
-unset TEMP
-
-unset KERNELRELEASE
-unset BUILDDIR
-unset ROOTFSVERSION
-unset IMG
-unset SETUPCMD
-FORCE=0
-ONESHOT=0
-SKIPIMG=0
-SKIPSOURCE=0
-APPEND=""
-DIR="$PWD"
-LIST=0
-
-# by default will copy all files that aren't listed in git exclusions
-# but it doesn't work for entire kernel tree very well
-# so for full kernel tree you may need to SOURCE_FULLCOPY=0
-SOURCE_FULLCOPY=${SOURCE_FULLCOPY:-1}
-
-while true; do
-	case "$1" in
-		-k|--kernel)
-			KERNELRELEASE="$2"
-			shift 2
-			;;
-		-b|--build)
-			BUILDDIR="$2"
-			shift 2
-			;;
-		-r|--rootfs)
-			ROOTFSVERSION="$2"
-			shift 2
-			;;
-		-f|--force)
-			FORCE=1
-			shift
-			;;
-		-o|--one-shot)
-			ONESHOT=1
-			FORCE=1
-			shift
-			;;
-		-s|--setup-cmd)
-			SETUPCMD="$2"
-			shift 2
-			;;
-		-I|--skip-image)
-			SKIPIMG=1
-			shift
-			;;
-		-S|--skip-source)
-			SKIPSOURCE=1
-			shift
-			;;
-		-i|--interactive)
-			APPEND=" single"
-			shift
-			;;
-		-d|--dir)
-			DIR="$2"
-			shift 2
-			;;
-		-l|--list)
-			LIST=1
-			;;
-		-h|--help)
-			usage out
-			;;
-		--)
-			shift
-			break
-			;;
-		*)
-			usage err
-			;;
-	esac
-done
-if [[ -v BUILDDIR ]]; then
-	if [[ -v KERNELRELEASE ]]; then
-		usage err
-	fi
-elif [[ ! -v KERNELRELEASE ]]; then
-	KERNELRELEASE='*'
-fi
-if [[ $SKIPIMG -ne 0 && ( -v ROOTFSVERSION || $FORCE -ne 0 ) ]]; then
-	usage err
-fi
-if (( LIST )); then
-	if [[ $# -ne 0 || -v BUILDDIR || -v ROOTFSVERSION || $FORCE -ne 0 ||
-	      $SKIPIMG -ne 0 || $SKIPSOURCE -ne 0 || -n $APPEND ]]; then
-		usage err
-	fi
-else
-	if [[ $# -ne 1 ]]; then
-		usage err
-	fi
-	IMG="${!OPTIND}"
-fi
-
-unset URLS
-cache_urls() {
-	if ! declare -p URLS &> /dev/null; then
-		# This URL contains a mapping from file names to URLs where
-		# those files can be downloaded.
-		declare -gA URLS
-		while IFS=$'\t' read -r name url; do
-			URLS["$name"]="$url"
-		done < <(cat "${VMTEST_ROOT}/configs/INDEX")
-	fi
-}
-
-matching_kernel_releases() {
-	local pattern="$1"
-	{
-	for file in "${!URLS[@]}"; do
-		if [[ $file =~ ^vmlinux-(.*).zst$ ]]; then
-			release="${BASH_REMATCH[1]}"
-			case "$release" in
-				$pattern)
-					# sort -V handles rc versions properly
-					# if we use "~" instead of "-".
-					echo "${release//-rc/~rc}"
-					;;
-			esac
-		fi
-	done
-	} | sort -rV | sed 's/~rc/-rc/g'
-}
-
-newest_rootfs_version() {
-	{
-	for file in "${!URLS[@]}"; do
-		if [[ $file =~ ^${PROJECT_NAME}-vmtest-rootfs-(.*)\.tar\.zst$ ]]; then
-			echo "${BASH_REMATCH[1]}"
-		fi
-	done
-	} | sort -rV | head -1
-}
-
-download() {
-	local file="$1"
-	cache_urls
-	if [[ ! -v URLS[$file] ]]; then
-		echo "$file not found" >&2
-		return 1
-	fi
-	echo "Downloading $file..." >&2
-	curl -Lf "${URLS[$file]}" "${@:2}"
-}
-
-set_nocow() {
-	touch "$@"
-	chattr +C "$@" >/dev/null 2>&1 || true
-}
-
-cp_img() {
-	set_nocow "$2"
-	cp --reflink=auto "$1" "$2"
-}
-
-create_rootfs_img() {
-	local path="$1"
-	set_nocow "$path"
-	truncate -s 2G "$path"
-	mkfs.ext4 -q "$path"
-}
-
-download_rootfs() {
-	local rootfsversion="$1"
-	local dir="$2"
-	download "${PROJECT_NAME}-vmtest-rootfs-$rootfsversion.tar.zst" |
-		zstd -d | sudo tar -C "$dir" -x
-}
-
-if (( LIST )); then
-	cache_urls
-	matching_kernel_releases "$KERNELRELEASE"
-	exit 0
-fi
-
-if [[ $FORCE -eq 0 && $SKIPIMG -eq 0 && -e $IMG ]]; then
-	echo "$IMG already exists; use -f to overwrite it or -I to reuse it" >&2
-	exit 1
-fi
-
-# Only go to the network if it's actually a glob pattern.
-if [[ -v BUILDDIR ]]; then
-	KERNELRELEASE="$(make -C "$BUILDDIR" -s kernelrelease)"
-elif [[ ! $KERNELRELEASE =~ ^([^\\*?[]|\\[*?[])*\\?$ ]]; then
-	# We need to cache the list of URLs outside of the command
-	# substitution, which happens in a subshell.
-	cache_urls
-	KERNELRELEASE="$(matching_kernel_releases "$KERNELRELEASE" | head -1)"
-	if [[ -z $KERNELRELEASE ]]; then
-		echo "No matching kernel release found" >&2
-		exit 1
-	fi
-fi
-if [[ $SKIPIMG -eq 0 && ! -v ROOTFSVERSION ]]; then
-	cache_urls
-	ROOTFSVERSION="$(newest_rootfs_version)"
-fi
-
-echo "Kernel release: $KERNELRELEASE" >&2
-echo
-
-travis_fold start vmlinux_setup "Preparing Linux image"
-
-if (( SKIPIMG )); then
-	echo "Not extracting root filesystem" >&2
-else
-	echo "Root filesystem version: $ROOTFSVERSION" >&2
-fi
-echo "Disk image: $IMG" >&2
-
-tmp=
-ARCH_DIR="$DIR/x86_64"
-mkdir -p "$ARCH_DIR"
-mnt="$(mktemp -d -p "$DIR" mnt.XXXXXXXXXX)"
-
-cleanup() {
-	if [[ -n $tmp ]]; then
-		rm -f "$tmp" || true
-	fi
-	if mountpoint -q "$mnt"; then
-		sudo umount "$mnt" || true
-	fi
-	if [[ -d "$mnt" ]]; then
-		rmdir "$mnt" || true
-	fi
-}
-trap cleanup EXIT
-
-if [[ -v BUILDDIR ]]; then
-	vmlinuz="$BUILDDIR/$(make -C "$BUILDDIR" -s image_name)"
-else
-	vmlinuz="${ARCH_DIR}/vmlinuz-${KERNELRELEASE}"
-	if [[ ! -e $vmlinuz ]]; then
-		tmp="$(mktemp "$vmlinuz.XXX.part")"
-		download "vmlinuz-${KERNELRELEASE}" -o "$tmp"
-		mv "$tmp" "$vmlinuz"
-		tmp=
-	fi
-fi
-
-# Mount and set up the rootfs image.
-if (( ONESHOT )); then
-	rm -f "$IMG"
-	create_rootfs_img "$IMG"
-	sudo mount -o loop "$IMG" "$mnt"
-	download_rootfs "$ROOTFSVERSION" "$mnt"
-else
-	if (( ! SKIPIMG )); then
-		rootfs_img="${ARCH_DIR}/${PROJECT_NAME}-vmtest-rootfs-${ROOTFSVERSION}.img"
-
-		if [[ ! -e $rootfs_img ]]; then
-			tmp="$(mktemp "$rootfs_img.XXX.part")"
-			set_nocow "$tmp"
-			truncate -s 2G "$tmp"
-			mkfs.ext4 -q "$tmp"
-			sudo mount -o loop "$tmp" "$mnt"
-
-			download_rootfs "$ROOTFSVERSION" "$mnt"
-
-			sudo umount "$mnt"
-			mv "$tmp" "$rootfs_img"
-			tmp=
-		fi
-
-		rm -f "$IMG"
-		cp_img "$rootfs_img" "$IMG"
-	fi
-	sudo mount -o loop "$IMG" "$mnt"
-fi
-
-# Install vmlinux.
-vmlinux="$mnt/boot/vmlinux-${KERNELRELEASE}"
-if [[ -v BUILDDIR || $ONESHOT -eq 0 ]]; then
-	if [[ -v BUILDDIR ]]; then
-		source_vmlinux="${BUILDDIR}/vmlinux"
-	else
-		source_vmlinux="${ARCH_DIR}/vmlinux-${KERNELRELEASE}"
-		if [[ ! -e $source_vmlinux ]]; then
-			tmp="$(mktemp "$source_vmlinux.XXX.part")"
-			download "vmlinux-${KERNELRELEASE}.zst" | zstd -dfo "$tmp"
-			mv "$tmp" "$source_vmlinux"
-			tmp=
-		fi
-	fi
-	echo "Copying vmlinux..." >&2
-	sudo rsync -cp --chmod 0644 "$source_vmlinux" "$vmlinux"
-else
-	# We could use "sudo zstd -o", but let's not run zstd as root with
-	# input from the internet.
-	download "vmlinux-${KERNELRELEASE}.zst" |
-		zstd -d | sudo tee "$vmlinux" > /dev/null
-	sudo chmod 644 "$vmlinux"
-fi
-
-travis_fold end vmlinux_setup
-
-REPO_PATH="${SELFTEST_REPO_PATH:-travis-ci/vmtest/bpf-next}"
-LIBBPF_PATH="${REPO_ROOT}" \
-	VMTEST_ROOT="${VMTEST_ROOT}" \
-	REPO_PATH="${REPO_PATH}" \
-	VMLINUX_BTF=${vmlinux} ${VMTEST_ROOT}/build_selftests.sh
-
-travis_fold start bpftool_checks "Running bpftool checks..."
-if [[ "${KERNEL}" = 'LATEST' ]]; then
-	"${REPO_ROOT}/${REPO_PATH}/tools/testing/selftests/bpf/test_bpftool_synctypes.py" && \
-		echo "Consistency checks passed successfully."
-else
-	echo "Consistency checks skipped."
-fi
-travis_fold end bpftool_checks
-
-travis_fold start vm_init "Starting virtual machine..."
-
-if (( SKIPSOURCE )); then
-	echo "Not copying source files..." >&2
-else
-	echo "Copying source files..." >&2
-	# Copy the source files in.
-	sudo mkdir -p -m 0755 "$mnt/${PROJECT_NAME}"
-	if [[ "${SOURCE_FULLCOPY}" == "1" ]]; then
-		git ls-files -z | sudo rsync --files-from=- -0cpt . "$mnt/${PROJECT_NAME}"
-	else
-		sudo mkdir -p -m 0755 ${mnt}/${PROJECT_NAME}/{selftests,travis-ci}
-		tree --du -shaC "${REPO_ROOT}/selftests/bpf"
-		sudo rsync -avm "${REPO_ROOT}/selftests/bpf" "$mnt/${PROJECT_NAME}/selftests/"
-		sudo rsync -avm "${REPO_ROOT}/travis-ci/vmtest" "$mnt/${PROJECT_NAME}/travis-ci/"
-        fi
-
-fi
-
-setup_script="#!/bin/sh
-
-echo 'Skipping setup commands'
-echo 0 > /exitstatus
-chmod 644 /exitstatus"
-
-# Create the init scripts.
-if [[ ! -z SETUPCMD ]]; then
-	# Unescape whitespace characters.
-	setup_cmd=$(sed 's/\(\\\)\([[:space:]]\)/\2/g' <<< "${SETUPCMD}")
-	kernel="${KERNELRELEASE}"
-	if [[ -v BUILDDIR ]]; then kernel='latest'; fi
-	setup_envvars="export KERNEL=${kernel}"
-	setup_script=$(printf "#!/bin/sh
-set -eux
-
-echo 'Running setup commands'
-%s
-set +e; %s; exitstatus=\$?; set -e
-echo \$exitstatus > /exitstatus
-chmod 644 /exitstatus" "${setup_envvars}" "${setup_cmd}")
-fi
-
-echo "${setup_script}" | sudo tee "$mnt/etc/rcS.d/S50-run-tests" > /dev/null
-sudo chmod 755 "$mnt/etc/rcS.d/S50-run-tests"
-
-fold_shutdown="$(travis_fold start shutdown)"
-poweroff_script="#!/bin/sh
-
-echo ${fold_shutdown}
-echo -e '\033[1;33mShutdown\033[0m\n'
-
-poweroff"
-echo "${poweroff_script}" | sudo tee "$mnt/etc/rcS.d/S99-poweroff" > /dev/null
-sudo chmod 755 "$mnt/etc/rcS.d/S99-poweroff"
-
-sudo umount "$mnt"
-
-echo "Starting VM with $(nproc) CPUs..."
-
-if kvm-ok ; then
-  accel="-cpu kvm64 -enable-kvm"
-else
-  accel="-cpu qemu64 -machine accel=tcg"
-fi
-qemu-system-x86_64 -nodefaults -display none -serial mon:stdio -no-reboot \
-  ${accel} -smp "$(nproc)" -m 4G \
-  -drive file="$IMG",format=raw,index=1,media=disk,if=virtio,cache=none \
-  -kernel "$vmlinuz" -append "root=/dev/vda rw console=ttyS0,115200 kernel.panic=-1 $APPEND"
-sudo mount -o loop "$IMG" "$mnt"
-if exitstatus="$(cat "$mnt/exitstatus" 2>/dev/null)"; then
-	printf '\nTests exit status: %s\n' "$exitstatus" >&2
-else
-	printf '\nCould not read tests exit status\n' >&2
-	exitstatus=1
-fi
-sudo umount "$mnt"
-
-travis_fold end shutdown
-
-exit "$exitstatus"
--- a/travis-ci/vmtest/run_selftests.sh
+++ b/travis-ci/vmtest/run_selftests.sh
@@ -4,44 +4,56 @@ set -euo pipefail

 source $(cd $(dirname $0) && pwd)/helpers.sh

+ARCH=$(uname -m)
+
+STATUS_FILE=/exitstatus
+
+read_lists() {
+	(for path in "$@"; do
+		if [[ -s "$path" ]]; then
+			cat "$path"
+		fi;
+	done) | cut -d'#' -f1 | tr -s ' \t\n' ','
+}
+
 test_progs() {
 	if [[ "${KERNEL}" != '4.9.0' ]]; then
 		travis_fold start test_progs "Testing test_progs"
-		./test_progs ${BLACKLIST:+-d$BLACKLIST} ${WHITELIST:+-a$WHITELIST}
+		# "&& true" does not change the return code (it is not executed
+		# if the Python script fails), but it prevents exiting on a
+		# failure due to the "set -e".
+		./test_progs ${BLACKLIST:+-d$BLACKLIST} ${WHITELIST:+-a$WHITELIST} && true
+		echo "test_progs:$?" >> "${STATUS_FILE}"
 		travis_fold end test_progs
 	fi

 	travis_fold start test_progs-no_alu32 "Testing test_progs-no_alu32"
-	./test_progs-no_alu32 ${BLACKLIST:+-d$BLACKLIST} ${WHITELIST:+-a$WHITELIST}
+	./test_progs-no_alu32 ${BLACKLIST:+-d$BLACKLIST} ${WHITELIST:+-a$WHITELIST} && true
+	echo "test_progs-no_alu32:$?" >> "${STATUS_FILE}"
 	travis_fold end test_progs-no_alu32
 }

 test_maps() {
 	travis_fold start test_maps "Testing test_maps"
-	./test_maps
+	./test_maps && true
+	echo "test_maps:$?" >> "${STATUS_FILE}"
 	travis_fold end test_maps
 }

 test_verifier() {
 	travis_fold start test_verifier "Testing test_verifier"
-	./test_verifier
+	./test_verifier && true
+	echo "test_verifier:$?" >> "${STATUS_FILE}"
 	travis_fold end test_verifier
 }

 travis_fold end vm_init

-configs_path='libbpf/travis-ci/vmtest/configs'
-blacklist_path="$configs_path/blacklist/BLACKLIST-${KERNEL}"
-if [[ -s "${blacklist_path}" ]]; then
-	BLACKLIST=$(cat "${blacklist_path}" | cut -d'#' -f1 | tr -s '[:space:]' ',')
-fi
+configs_path=${PROJECT_NAME}/vmtest/configs
+BLACKLIST=$(read_lists "$configs_path/blacklist/BLACKLIST-${KERNEL}" "$configs_path/blacklist/BLACKLIST-${KERNEL}.${ARCH}")
+WHITELIST=$(read_lists "$configs_path/whitelist/WHITELIST-${KERNEL}" "$configs_path/whitelist/WHITELIST-${KERNEL}.${ARCH}")

-whitelist_path="$configs_path/whitelist/WHITELIST-${KERNEL}"
-if [[ -s "${whitelist_path}" ]]; then
-	WHITELIST=$(cat "${whitelist_path}" | cut -d'#' -f1 | tr -s '[:space:]' ',')
-fi
-
-cd libbpf/selftests/bpf
+cd ${PROJECT_NAME}/selftests/bpf

 test_progs

--- a/travis-ci/vmtest/run_vmtest.sh
+++ b/travis-ci/vmtest/run_vmtest.sh
@@ -1,51 +0,0 @@
-#!/bin/bash
-
-set -eu
-
-source $(cd $(dirname $0) && pwd)/helpers.sh
-
-VMTEST_SETUPCMD="GITHUB_WORKFLOW=${GITHUB_WORKFLOW:-} PROJECT_NAME=${PROJECT_NAME} ./${PROJECT_NAME}/travis-ci/vmtest/run_selftests.sh"
-
-# if CHECKOUT_KERNEL is 1 code will consider that kernel code lives elsewhere
-# if 0 it will consider that REPO_ROOT is a kernel tree
-CHECKOUT_KERNEL=${CHECKOUT_KERNEL:-1}
-
-echo "KERNEL: $KERNEL"
-echo
-
-# Build latest pahole
-${VMTEST_ROOT}/build_pahole.sh travis-ci/vmtest/pahole
-
-travis_fold start install_clang "Installing Clang/LLVM"
-
-wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add -
-sudo add-apt-repository "deb http://apt.llvm.org/focal/ llvm-toolchain-focal main"
-sudo apt-get update
-sudo apt-get install --allow-downgrades -y libc6=2.31-0ubuntu9.2
-sudo aptitude install -y g++ libelf-dev
-sudo aptitude install -y clang-14 lld-14 llvm-14
-
-travis_fold end install_clang
-
-# Build selftests (and latest kernel, if necessary)
-
-if [[ "$CHECKOUT_KERNEL" == "1" ]]; then
-  ${VMTEST_ROOT}/prepare_selftests.sh travis-ci/vmtest/bpf-next
-else
-  ${VMTEST_ROOT}/prepare_selftests.sh
-fi
-
-# Escape whitespace characters.
-setup_cmd=$(sed 's/\([[:space:]]\)/\\\1/g' <<< "${VMTEST_SETUPCMD}")
-
-sudo adduser "${USER}" kvm
-
-if [[ "${KERNEL}" = 'LATEST' ]]; then
-  if [[ "$CHECKOUT_KERNEL" == "1" ]]; then
-    sudo -E sudo -E -u "${USER}" "${VMTEST_ROOT}/run.sh" -b travis-ci/vmtest/bpf-next -o -d ~ -s "${setup_cmd}" ~/root.img
-  else
-    sudo -E sudo -E -u "${USER}" "${VMTEST_ROOT}/run.sh" -b "${REPO_ROOT}" -o -d ~ -s "${setup_cmd}" ~/root.img
-  fi
-else
-  sudo -E sudo -E -u "${USER}" "${VMTEST_ROOT}/run.sh" -k "${KERNEL}*" -o -d ~ -s "${setup_cmd}" ~/root.img
-fi