netdata_patch_1_2_1: Add a patch to run binaries on Debian 10

Merge branch 'libbpf:master' into master
sync: latest libbpf changes from kernel
2026-03-14 21:39:07 +08:00 · 2023-07-10 22:48:42 +00:00 · 2023-07-10 22:36:12 +00:00 · 2023-07-10 14:24:42 -07:00 · 2023-07-10 14:24:42 -07:00 · 2023-07-07 18:55:44 -07:00
116 changed files with 102154 additions and 89719 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -0,0 +1 @@
+assets/** export-ignore
--- a/.github/actions/build-selftests/action.yml
+++ b/.github/actions/build-selftests/action.yml
@@ -18,9 +18,10 @@ runs:
  steps:
    - shell: bash
      run: |
-        echo "::group::Setup Env"
+        source $GITHUB_ACTION_PATH/../../../ci/vmtest/helpers.sh
+        foldable start "Setup Env"
        sudo apt-get install -y qemu-kvm zstd binutils-dev elfutils libcap-dev libelf-dev libdw-dev python3-docutils
-        echo "::endgroup::"
+        foldable end
    - shell: bash
      run: |
        export KERNEL=${{ inputs.kernel }}
--- a/.github/actions/build-selftests/build_selftests.sh
+++ b/.github/actions/build-selftests/build_selftests.sh
@@ -6,11 +6,28 @@ THISDIR="$(cd $(dirname $0) && pwd)"

 source ${THISDIR}/helpers.sh

-travis_fold start prepare_selftests "Building selftests"
+foldable start prepare_selftests "Building selftests"

-LLVM_VER=15
 LIBBPF_PATH="${REPO_ROOT}"

+llvm_default_version() {
+	echo "16"
+}
+
+llvm_latest_version() {
+	echo "17"
+}
+
+LLVM_VERSION=$(llvm_default_version)
+if [[ "${LLVM_VERSION}" == $(llvm_latest_version) ]]; then
+	REPO_DISTRO_SUFFIX=""
+else
+	REPO_DISTRO_SUFFIX="-${LLVM_VERSION}"
+fi
+
+echo "deb https://apt.llvm.org/focal/ llvm-toolchain-focal${REPO_DISTRO_SUFFIX} main" \
+	| sudo tee /etc/apt/sources.list.d/llvm.list
+
 PREPARE_SELFTESTS_SCRIPT=${THISDIR}/prepare_selftests-${KERNEL}.sh
 if [ -f "${PREPARE_SELFTESTS_SCRIPT}" ]; then
 	(cd "${REPO_ROOT}/${REPO_PATH}/tools/testing/selftests/bpf" && ${PREPARE_SELFTESTS_SCRIPT})
@@ -23,10 +40,11 @@ else
 fi

 cd ${REPO_ROOT}/${REPO_PATH}
+make headers
 make \
-	CLANG=clang-${LLVM_VER} \
-	LLC=llc-${LLVM_VER} \
-	LLVM_STRIP=llvm-strip-${LLVM_VER} \
+	CLANG=clang-${LLVM_VERSION} \
+	LLC=llc-${LLVM_VERSION} \
+	LLVM_STRIP=llvm-strip-${LLVM_VERSION} \
 	VMLINUX_BTF="${VMLINUX_BTF}" \
 	VMLINUX_H=${VMLINUX_H} \
 	-C "${REPO_ROOT}/${REPO_PATH}/tools/testing/selftests/bpf" \
@@ -39,4 +57,4 @@ cd ${LIBBPF_PATH}
 rm selftests/bpf/.gitignore
 git add selftests

-travis_fold end prepare_selftests
+foldable end prepare_selftests
--- a/.github/actions/build-selftests/helpers.sh
+++ b/.github/actions/build-selftests/helpers.sh
@@ -1,26 +1,20 @@
+# shellcheck shell=bash
+
 # $1 - start or end
 # $2 - fold identifier, no spaces
 # $3 - fold section description
-travis_fold() {
+foldable() {
  local YELLOW='\033[1;33m'
  local NOCOLOR='\033[0m'
-  if [ -z ${GITHUB_WORKFLOW+x} ]; then
-    echo travis_fold:$1:$2
+  if [ $1 = "start" ]; then
+    line="::group::$2"
    if [ ! -z "${3:-}" ]; then
-      echo -e "${YELLOW}$3${NOCOLOR}"
+      line="$line - ${YELLOW}$3${NOCOLOR}"
    fi
-    echo
  else
-    if [ $1 = "start" ]; then
-      line="::group::$2"
-      if [ ! -z "${3:-}" ]; then
-        line="$line - ${YELLOW}$3${NOCOLOR}"
-      fi
-    else
-      line="::endgroup::"
-    fi
-    echo -e "$line"
+    line="::endgroup::"
  fi
+  echo -e "$line"
 }

 __print() {
--- a/.github/actions/build-selftests/vmlinux.h
+++ b/.github/actions/build-selftests/vmlinux.h
--- a/.github/actions/setup/action.yml
+++ b/.github/actions/setup/action.yml
@@ -6,7 +6,7 @@ runs:
    - id: variables
      run: |
        export REPO_ROOT=$GITHUB_WORKSPACE
-        export CI_ROOT=$REPO_ROOT/travis-ci
+        export CI_ROOT=$REPO_ROOT/ci
        # this is somewhat ugly, but that is the easiest way to share this code with
        # arch specific docker
        echo 'echo ::group::Env setup' > /tmp/ci_setup
@@ -16,7 +16,7 @@ runs:
        echo export PROJECT_NAME='libbpf' >> /tmp/ci_setup
        echo export AUTHOR_EMAIL="$(git log -1 --pretty=\"%aE\")" >> /tmp/ci_setup
        echo export REPO_ROOT=$GITHUB_WORKSPACE >> /tmp/ci_setup
-        echo export CI_ROOT=$REPO_ROOT/travis-ci >> /tmp/ci_setup
+        echo export CI_ROOT=$REPO_ROOT/ci >> /tmp/ci_setup
        echo export VMTEST_ROOT=$CI_ROOT/vmtest >> /tmp/ci_setup
        echo 'echo ::endgroup::' >> /tmp/ci_setup
      shell: bash
--- a/.github/actions/vmtest/action.yml
+++ b/.github/actions/vmtest/action.yml
@@ -16,9 +16,9 @@ inputs:
 runs:
  using: "composite"
  steps:
-    # setup envinronment
+    # setup environment
    - name: Setup environment
-      uses: libbpf/ci/setup-build-env@master
+      uses: libbpf/ci/setup-build-env@main
      with:
        pahole: ${{ inputs.pahole }}
    # 1. download CHECKPOINT kernel source
@@ -28,40 +28,43 @@ runs:
        cat CHECKPOINT-COMMIT
        echo "CHECKPOINT=$(cat CHECKPOINT-COMMIT)" >> $GITHUB_ENV
    - name: Get kernel source at checkpoint
-      uses: libbpf/ci/get-linux-source@master
+      uses: libbpf/ci/get-linux-source@main
      with:
        repo: 'https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git'
        rev: ${{ env.CHECKPOINT }}
        dest: '${{ github.workspace }}/.kernel'
    - name: Patch kernel source
-      uses: libbpf/ci/patch-kernel@master
+      uses: libbpf/ci/patch-kernel@main
      with:
-        patches-root: '${{ github.workspace }}/travis-ci/diffs'
+        patches-root: '${{ github.workspace }}/ci/diffs'
        repo-root: '.kernel'
    - name: Prepare to build BPF selftests
      shell: bash
      run: |
-        echo "::group::Prepare buidling selftest"
+        source $GITHUB_ACTION_PATH/../../../ci/vmtest/helpers.sh
+        foldable start "Prepare building selftest"
        cd .kernel
-        cp ${{ github.workspace }}/travis-ci/vmtest/configs/config-latest.${{ inputs.arch }} .config
+        cat tools/testing/selftests/bpf/config \
+            tools/testing/selftests/bpf/config.${{ inputs.arch }} > .config
        make olddefconfig && make prepare
        cd -
-        echo "::endgroup::"
+        foldable end
    # 2. if kernel == LATEST, build kernel image from tree
    - name: Build kernel image
      if: ${{ inputs.kernel == 'LATEST' }}
      shell: bash
      run: |
-        echo "::group::Build Kernel Image"
+        source $GITHUB_ACTION_PATH/../../../ci/vmtest/helpers.sh
+        foldable start "Build Kernel Image"
        cd .kernel
        make -j $((4*$(nproc))) all > /dev/null
        cp vmlinux ${{ github.workspace }}
        cd -
-        echo "::endgroup::"
+        foldable end
    # else, just download prebuilt kernel image
    - name: Download prebuilt kernel
      if: ${{ inputs.kernel != 'LATEST' }}
-      uses: libbpf/ci/download-vmlinux@master
+      uses: libbpf/ci/download-vmlinux@main
      with:
        kernel: ${{ inputs.kernel }}
        arch: ${{ inputs.arch }}
@@ -73,15 +76,24 @@ runs:
        kernel: ${{ inputs.kernel }}
    # 4. prepare rootfs
    - name: prepare rootfs
-      uses: libbpf/ci/prepare-rootfs@master
+      uses: libbpf/ci/prepare-rootfs@main
+      env:
+        KBUILD_OUTPUT: '.kernel'
      with:
-        kernel: ${{ inputs.kernel }}
        project-name: 'libbpf'
        arch: ${{ inputs.arch }}
+        kernel: ${{ inputs.kernel }}
+        kernel-root: '.kernel'
+        kbuild-output: ${{ env.KBUILD_OUTPUT }}
+        image-output: '/tmp/root.img'
    # 5. run selftest in QEMU
    - name: Run selftests
-      uses: libbpf/ci/run-qemu@master
+      env:
+        KERNEL: ${{ inputs.kernel }}
+        REPO_ROOT: ${{ github.workspace }}
+      uses: libbpf/ci/run-qemu@main
      with:
+        arch: ${{ inputs.arch }}
        img: '/tmp/root.img'
        vmlinuz: 'vmlinuz'
-        arch: ${{ inputs.arch }}
+        kernel-root: '.kernel'
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -23,16 +23,26 @@ jobs:
            target: RUN
          - name: ASan+UBSan
            target: RUN_ASAN
-          - name: clang
-            target: RUN_CLANG
          - name: clang ASan+UBSan
            target: RUN_CLANG_ASAN
-          - name: gcc-10
-            target: RUN_GCC10
          - name: gcc-10 ASan+UBSan
            target: RUN_GCC10_ASAN
+          - name: clang
+            target: RUN_CLANG
+          - name: clang-14
+            target: RUN_CLANG14
+          - name: clang-15
+            target: RUN_CLANG15
+          - name: clang-16
+            target: RUN_CLANG16
+          - name: gcc-10
+            target: RUN_GCC10
+          - name: gcc-11
+            target: RUN_GCC11
+          - name: gcc-12
+            target: RUN_GCC12
    steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
        name: Checkout
      - uses: ./.github/actions/setup
        name: Setup
@@ -53,7 +63,7 @@ jobs:
          - arch: s390x
          - arch: x86
    steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
        name: Checkout
      - uses: ./.github/actions/setup
        name: Pre-Setup
--- a/.github/workflows/codeql.yml
+++ b/.github/workflows/codeql.yml
@@ -0,0 +1,52 @@
+---
+# vi: ts=2 sw=2 et:
+
+name: "CodeQL"
+
+on:
+  push:
+    branches:
+      - master
+  pull_request:
+    branches:
+      - master
+
+permissions:
+  contents: read
+
+jobs:
+  analyze:
+    name: Analyze
+    runs-on: ubuntu-22.04
+    concurrency:
+      group: ${{ github.workflow }}-${{ matrix.language }}-${{ github.ref }}
+      cancel-in-progress: true
+    permissions:
+      actions: read
+      security-events: write
+
+    strategy:
+      fail-fast: false
+      matrix:
+        language: ['cpp', 'python']
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v3
+
+      - name: Initialize CodeQL
+        uses: github/codeql-action/init@v2
+        with:
+          languages: ${{ matrix.language }}
+          queries: +security-extended,security-and-quality
+
+      - name: Setup
+        uses: ./.github/actions/setup
+
+      - name: Build
+        run: |
+          source /tmp/ci_setup
+          make -C ./src
+
+      - name: Perform CodeQL Analysis
+        uses: github/codeql-action/analyze@v2
--- a/.github/workflows/coverity.yml
+++ b/.github/workflows/coverity.yml
@@ -11,16 +11,17 @@ jobs:
    if: github.repository == 'libbpf/libbpf'
    name: Coverity
    steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
      - uses: ./.github/actions/setup
      - name: Run coverity
        run: |
-          echo ::group::Setup CI env
+          source "${GITHUB_WORKSPACE}"/ci/vmtest/helpers.sh
+          foldable start "Setup CI env"
          source /tmp/ci_setup
          export COVERITY_SCAN_NOTIFICATION_EMAIL="${AUTHOR_EMAIL}"
          export COVERITY_SCAN_BRANCH_PATTERN=${GITHUB_REF##refs/*/}
          export TRAVIS_BRANCH=${COVERITY_SCAN_BRANCH_PATTERN}
-          echo ::endgroup::
+          foldable end
          scripts/coverity.sh
        env:
          COVERITY_SCAN_TOKEN: ${{ secrets.COVERITY_SCAN_TOKEN }}
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -0,0 +1,19 @@
+name: "lint"
+
+on:
+  pull_request:
+  push:
+    branches:
+      - master
+
+jobs:
+  shellcheck:
+    name: ShellCheck
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v3
+      - name: Run ShellCheck
+        uses: ludeeus/action-shellcheck@master
+        env:
+          SHELLCHECK_OPTS: --severity=error
--- a/.github/workflows/ondemand.yml
+++ b/.github/workflows/ondemand.yml
@@ -25,7 +25,7 @@ jobs:
    runs-on: ubuntu-latest
    name: vmtest with customized pahole/Kernel
    steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
      - uses: ./.github/actions/setup
      - uses: ./.github/actions/vmtest
        with:
--- a/.github/workflows/pahole.yml
+++ b/.github/workflows/pahole.yml
@@ -7,12 +7,12 @@ on:

 jobs:
  vmtest:
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-20.04
    name: Kernel LATEST + staging pahole
    env:
      STAGING: tmp.master
    steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
      - uses: ./.github/actions/setup
      - uses: ./.github/actions/vmtest
        with:
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -19,19 +19,19 @@ jobs:
      matrix:
        include:
          - kernel: 'LATEST'
-            runs_on: ubuntu-latest
+            runs_on: ubuntu-20.04
            arch: 'x86_64'
          - kernel: '5.5.0'
-            runs_on: ubuntu-latest
+            runs_on: ubuntu-20.04
            arch: 'x86_64'
          - kernel: '4.9.0'
-            runs_on: ubuntu-latest
+            runs_on: ubuntu-20.04
            arch: 'x86_64'
          - kernel: 'LATEST'
-            runs_on: z15
+            runs_on: s390x
            arch: 's390x'
    steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
        name: Checkout
      - uses: ./.github/actions/setup
        name: Setup
--- a/.lgtm.yml
+++ b/.lgtm.yml
@@ -1,14 +0,0 @@
-# vi: set ts=2 sw=2:
-extraction:
-  cpp:
-    prepare:
-      packages:
-        - libelf-dev
-        - pkg-config
-    after_prepare:
-      # As the buildsystem detection by LGTM is performed _only_ during the
-      # 'configure' phase, we need to trick LGTM we use a supported build
-      # system (configure, meson, cmake, etc.). This way LGTM correctly detects
-      # that our sources are in the src/ subfolder.
-      - touch src/configure
-      - chmod +x src/configure
--- a/.readthedocs.yaml
+++ b/.readthedocs.yaml
@@ -10,6 +10,11 @@ sphinx:
  builder: html
  configuration: docs/conf.py

+formats:
+  - htmlzip
+  - pdf
+  - epub
+
 # Optionally set the version of Python and requirements required to build your docs
 python:
   version: 3.7
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,130 +0,0 @@
-sudo: required
-language: bash
-dist: focal
-services:
-    - docker
-
-env:
-    global:
-        - PROJECT_NAME='libbpf'
-        - AUTHOR_EMAIL="$(git log -1 --pretty=\"%aE\")"
-        - REPO_ROOT="$TRAVIS_BUILD_DIR"
-        - CI_ROOT="$REPO_ROOT/travis-ci"
-        - VMTEST_ROOT="$CI_ROOT/vmtest"
-
-addons:
-    apt:
-        packages:
-            - qemu-kvm
-            - zstd
-            - binutils-dev
-            - elfutils
-            - libcap-dev
-            - libelf-dev
-            - libdw-dev
-
-stages:
-    # Run Coverity periodically instead of for each PR for following reasons:
-    # 1) Coverity jobs are heavily rate-limited
-    # 2) Due to security restrictions of encrypted environment variables
-    #    in Travis CI, pull requests made from forks can't access encrypted
-    #    env variables, making Coverity unusable
-    #    See: https://docs.travis-ci.com/user/pull-requests#pull-requests-and-security-restrictions
-    - name: Coverity
-      if: type = cron
-
-jobs:
-    include:
-        - stage: Builds & Tests
-          name: Kernel 5.5.0 + selftests
-          language: bash
-          env: KERNEL=5.5.0
-          script:  $CI_ROOT/vmtest/run_vmtest.sh || travis_terminate 1
-
-        - name: Kernel LATEST + selftests
-          language: bash
-          env: KERNEL=LATEST
-          script:  $CI_ROOT/vmtest/run_vmtest.sh || travis_terminate 1
-
-        - name: Kernel 4.9.0 + selftests
-          language: bash
-          env: KERNEL=4.9.0
-          script:  $CI_ROOT/vmtest/run_vmtest.sh || travis_terminate 1
-
-        - name: Debian Build
-          language: bash
-          install:        $CI_ROOT/managers/debian.sh SETUP
-          script:         $CI_ROOT/managers/debian.sh RUN || travis_terminate 1
-          after_script:   $CI_ROOT/managers/debian.sh CLEANUP
-
-        - name: Debian Build (ASan+UBSan)
-          language: bash
-          install:        $CI_ROOT/managers/debian.sh SETUP
-          script:         $CI_ROOT/managers/debian.sh RUN_ASAN || travis_terminate 1
-          after_script:   $CI_ROOT/managers/debian.sh CLEANUP
-
-        - name: Debian Build (clang)
-          language: bash
-          install:        $CI_ROOT/managers/debian.sh SETUP
-          script:         $CI_ROOT/managers/debian.sh RUN_CLANG || travis_terminate 1
-          after_script:   $CI_ROOT/managers/debian.sh CLEANUP
-
-        - name: Debian Build (clang ASan+UBSan)
-          language: bash
-          install:        $CI_ROOT/managers/debian.sh SETUP
-          script:         $CI_ROOT/managers/debian.sh RUN_CLANG_ASAN || travis_terminate 1
-          after_script:   $CI_ROOT/managers/debian.sh CLEANUP
-
-        - name: Debian Build (gcc-10)
-          language: bash
-          install:        $CI_ROOT/managers/debian.sh SETUP
-          script:         $CI_ROOT/managers/debian.sh RUN_GCC10 || travis_terminate 1
-          after_script:   $CI_ROOT/managers/debian.sh CLEANUP
-
-        - name: Debian Build (gcc-10 ASan+UBSan)
-          language: bash
-          install:        $CI_ROOT/managers/debian.sh SETUP
-          script:         $CI_ROOT/managers/debian.sh RUN_GCC10_ASAN || travis_terminate 1
-          after_script:   $CI_ROOT/managers/debian.sh CLEANUP
-
-        - name: Ubuntu Focal Build
-          language: bash
-          script: sudo $CI_ROOT/managers/ubuntu.sh || travis_terminate 1
-
-        - name: Ubuntu Focal Build (arm)
-          arch: arm64
-          language: bash
-          script: sudo $CI_ROOT/managers/ubuntu.sh || travis_terminate 1
-
-        - name: Ubuntu Focal Build (s390x)
-          arch: s390x
-          language: bash
-          script: sudo $CI_ROOT/managers/ubuntu.sh || travis_terminate 1
-
-        - name: Ubuntu Focal Build (ppc64le)
-          arch: ppc64le
-          language: bash
-          script: sudo $CI_ROOT/managers/ubuntu.sh || travis_terminate 1
-
-        - stage: Coverity
-          language: bash
-          env:
-              # Coverity configuration
-              # COVERITY_SCAN_TOKEN=xxx
-              # Encrypted using `travis encrypt --repo libbpf/libbpf COVERITY_SCAN_TOKEN=xxx`
-              - secure: "I9OsMRHbb82IUivDp+I+w/jEQFOJgBDAqYqf1ollqCM1QhocxMcS9bwIAgfPhdXi2hohV7sRrVMZstahY67FAvJLGxNopi4tAPDIAaIFxgO0yDxMhaTMx5xDfMwlIm2FOP/9gB9BQsd6M7CmoQZgXYwBIv7xd1ooxoQrh2rOK1YrRl7UQu3+c3zPTjDfIYZzR3bFttMqZ9/c4U0v8Ry5IFXrel3hCshndHA1TtttJrUSrILlZcmVc1ch7JIy6zCbCU/2lGv0B/7rWXfF8MT7O9jPtFOhJ1DEcd2zhw2n4j9YT3a8OhtnM61LA6ask632mwCOsxpFLTun7AzuR1Cb5mdPHsxhxnCHcXXARa2mJjem0QG1NhwxwJE8sbRDapojexxCvweYlEN40ofwMDSnj/qNt95XIcrk0tiIhGFx0gVNWvAdmZwx+N4mwGPMTAN0AEOFjpgI+ZdB89m+tL/CbEgE1flc8QxUxJhcp5OhH6yR0z9qYOp0nXIbHsIaCiRvt/7LqFRQfheifztWVz4mdQlCdKS9gcOQ09oKicPevKO1L0Ue3cb7Ug7jOpMs+cdh3XokJtUeYEr1NijMHT9+CTAhhO5RToWXIZRon719z3fwoUBNDREATwVFMlVxqSO/pbYgaKminigYbl785S89YYaZ6E5UvaKRHM6KHKMDszs="
-              - COVERITY_SCAN_PROJECT_NAME="libbpf"
-              - COVERITY_SCAN_NOTIFICATION_EMAIL="${AUTHOR_EMAIL}"
-              - COVERITY_SCAN_BRANCH_PATTERN="$TRAVIS_BRANCH"
-              # Note: `make -C src/` as a BUILD_COMMAND will not work here
-              - COVERITY_SCAN_BUILD_COMMAND_PREPEND="cd src/"
-              - COVERITY_SCAN_BUILD_COMMAND="make"
-          install:
-              - sudo echo 'deb-src http://archive.ubuntu.com/ubuntu/ focal main restricted universe multiverse' >>/etc/apt/sources.list
-              - sudo apt-get update
-              - sudo apt-get -y build-dep libelf-dev
-              - sudo apt-get install -y libelf-dev pkg-config
-          script:
-              - scripts/coverity.sh || travis_terminate 1
-    allow_failures:
-        - env: KERNEL=x.x.x
--- a/2
+++ b/2
@@ -1 +1 @@
-fe68195daf34d5dddacd3f93dd3eafc4beca3a0e
+496720b7cfb6574a8f6f4d434f23e3d1e6cfaeb9
--- a/2
+++ b/2
@@ -1 +1 @@
-dc37dc617fabfb1c3a16d49f5d8cc20e9e3608ca
+c628747cc8800cf6d33d09f7f42c8b6f91e64dc7
--- a/LICENSE.BSD-2-Clause
+++ b/LICENSE.BSD-2-Clause
@@ -7,7 +7,7 @@ Usage-Guide:
    SPDX-License-Identifier: BSD-2-Clause
 License-Text:

-Copyright (c) <year> <owner> . All rights reserved.
+Copyright (c) 2015 The Libbpf Authors. All rights reserved.

 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
--- a/README.md
+++ b/README.md
@@ -1,17 +1,33 @@
-This is a mirror of [bpf-next Linux source
-tree](https://kernel.googlesource.com/pub/scm/linux/kernel/git/bpf/bpf-next)'s
-`tools/lib/bpf` directory plus its supporting header files.
+<picture>
+  <source media="(prefers-color-scheme: dark)" srcset="assets/libbpf-logo-sideways-darkbg.png" width="40%">
+  <img src="assets/libbpf-logo-sideways.png" width="40%">
+</picture>

-All the gory details of syncing can be found in `scripts/sync-kernel.sh`
-script.
+libbpf
+[![Github Actions Builds & Tests](https://github.com/libbpf/libbpf/actions/workflows/test.yml/badge.svg)](https://github.com/libbpf/libbpf/actions/workflows/test.yml)
+[![Coverity](https://img.shields.io/coverity/scan/18195.svg)](https://scan.coverity.com/projects/libbpf)
+[![CodeQL](https://github.com/libbpf/libbpf/workflows/CodeQL/badge.svg?branch=master)](https://github.com/libbpf/libbpf/actions?query=workflow%3ACodeQL+branch%3Amaster)
+[![OSS-Fuzz Status](https://oss-fuzz-build-logs.storage.googleapis.com/badges/libbpf.svg)](https://oss-fuzz-build-logs.storage.googleapis.com/index.html#libbpf)
+[![Read the Docs](https://readthedocs.org/projects/libbpf/badge/?version=latest)](https://libbpf.readthedocs.io/en/latest/)
+======

-Some header files in this repo (`include/linux/*.h`) are reduced versions of
-their counterpart files at
-[bpf-next](https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/)'s
-`tools/include/linux/*.h` to make compilation successful.
+**This is the official home of the libbpf library.**

-BPF/libbpf usage and questions
-==============================
+*Please use this Github repository for building and packaging libbpf
+and when using it in your projects through Git submodule.*
+
+Libbpf *authoritative source code* is developed as part of [bpf-next Linux source
+tree](https://kernel.googlesource.com/pub/scm/linux/kernel/git/bpf/bpf-next) under
+`tools/lib/bpf` subdirectory and is periodically synced to Github. As such, all the
+libbpf changes should be sent to [BPF mailing list](http://vger.kernel.org/vger-lists.html#bpf),
+please don't open PRs here unless you are changing Github-specific parts of libbpf
+(e.g., Github-specific Makefile).
+
+Libbpf and general BPF usage questions
+======================================
+
+Libbpf documentation can be found [here](https://libbpf.readthedocs.io/en/latest/api.html).
+It's an ongoing effort and has ways to go, but please take a look and consider contributing as well.

 Please check out [libbpf-bootstrap](https://github.com/libbpf/libbpf-bootstrap)
 and [the companion blog post](https://nakryiko.com/posts/libbpf-bootstrap/) for
@@ -36,12 +52,8 @@ to help you with whatever issue you have. This repository's PRs and issues
 should be opened only for dealing with issues pertaining to specific way this
 libbpf mirror repo is set up and organized.

-Build
-[![Github Actions Builds & Tests](https://github.com/libbpf/libbpf/actions/workflows/test.yml/badge.svg)](https://github.com/libbpf/libbpf/actions/workflows/test.yml)
-[![Total alerts](https://img.shields.io/lgtm/alerts/g/libbpf/libbpf.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/libbpf/libbpf/alerts/)
-[![Coverity](https://img.shields.io/coverity/scan/18195.svg)](https://scan.coverity.com/projects/libbpf)
-[![OSS-Fuzz Status](https://oss-fuzz-build-logs.storage.googleapis.com/badges/libbpf.svg)](https://oss-fuzz-build-logs.storage.googleapis.com/index.html#libbpf)
-=====
+Building libbpf
+===============
 libelf is an internal dependency of libbpf and thus it is required to link
 against and must be installed on the system for applications to work.
 pkg-config is used by default to find libelf, and the program called can be
@@ -73,34 +85,6 @@ $ cd src
 $ PKG_CONFIG_PATH=/build/root/lib64/pkgconfig DESTDIR=/build/root make install
 ```

-Distributions
-=============
-
-Distributions packaging libbpf from this mirror:
-  - [Fedora](https://src.fedoraproject.org/rpms/libbpf)
-  - [Gentoo](https://packages.gentoo.org/packages/dev-libs/libbpf)
-  - [Debian](https://packages.debian.org/source/sid/libbpf)
-  - [Arch](https://www.archlinux.org/packages/extra/x86_64/libbpf/)
-  - [Ubuntu](https://packages.ubuntu.com/source/impish/libbpf)
-  - [Alpine](https://pkgs.alpinelinux.org/packages?name=libbpf)
-
-Benefits of packaging from the mirror over packaging from kernel sources:
-  - Consistent versioning across distributions.
-  - No ties to any specific kernel, transparent handling of older kernels.
-    Libbpf is designed to be kernel-agnostic and work across multitude of
-    kernel versions. It has built-in mechanisms to gracefully handle older
-    kernels, that are missing some of the features, by working around or
-    gracefully degrading functionality. Thus libbpf is not tied to a specific
-    kernel version and can/should be packaged and versioned independently.
-  - Continuous integration testing via
-    [TravisCI](https://travis-ci.org/libbpf/libbpf).
-  - Static code analysis via [LGTM](https://lgtm.com/projects/g/libbpf/libbpf)
-    and [Coverity](https://scan.coverity.com/projects/libbpf).
-
-Package dependencies of libbpf, package names may vary across distros:
-  - zlib
-  - libelf
-
 BPF CO-RE (Compile Once – Run Everywhere)
 =========================================

@@ -154,6 +138,48 @@ use it:
  converting some more to both contribute to the BPF community and gain some
  more experience with it.

+Distributions
+=============
+
+Distributions packaging libbpf from this mirror:
+  - [Fedora](https://src.fedoraproject.org/rpms/libbpf)
+  - [Gentoo](https://packages.gentoo.org/packages/dev-libs/libbpf)
+  - [Debian](https://packages.debian.org/source/sid/libbpf)
+  - [Arch](https://archlinux.org/packages/core/x86_64/libbpf/)
+  - [Ubuntu](https://packages.ubuntu.com/source/impish/libbpf)
+  - [Alpine](https://pkgs.alpinelinux.org/packages?name=libbpf)
+
+Benefits of packaging from the mirror over packaging from kernel sources:
+  - Consistent versioning across distributions.
+  - No ties to any specific kernel, transparent handling of older kernels.
+    Libbpf is designed to be kernel-agnostic and work across multitude of
+    kernel versions. It has built-in mechanisms to gracefully handle older
+    kernels, that are missing some of the features, by working around or
+    gracefully degrading functionality. Thus libbpf is not tied to a specific
+    kernel version and can/should be packaged and versioned independently.
+  - Continuous integration testing via
+    [GitHub Actions](https://github.com/libbpf/libbpf/actions).
+  - Static code analysis via [LGTM](https://lgtm.com/projects/g/libbpf/libbpf)
+    and [Coverity](https://scan.coverity.com/projects/libbpf).
+
+Package dependencies of libbpf, package names may vary across distros:
+  - zlib
+  - libelf
+
+[![libbpf distro packaging status](https://repology.org/badge/vertical-allrepos/libbpf.svg)](https://repology.org/project/libbpf/versions)
+
+
+bpf-next to Github sync
+=======================
+
+All the gory details of syncing can be found in `scripts/sync-kernel.sh`
+script. See [SYNC.md](SYNC.md) for instruction.
+
+Some header files in this repo (`include/linux/*.h`) are reduced versions of
+their counterpart files at
+[bpf-next](https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/)'s
+`tools/include/linux/*.h` to make compilation successful.
+
 License
 =======

--- a/SYNC.md
+++ b/SYNC.md
@@ -0,0 +1,281 @@
+<picture>
+  <source media="(prefers-color-scheme: dark)" srcset="assets/libbpf-logo-sideways-darkbg.png" width="40%">
+  <img src="assets/libbpf-logo-sideways.png" width="40%">
+</picture>
+
+Libbpf sync
+===========
+
+Libbpf *authoritative source code* is developed as part of [bpf-next Linux source
+tree](https://kernel.googlesource.com/pub/scm/linux/kernel/git/bpf/bpf-next) under
+`tools/lib/bpf` subdirectory and is periodically synced to Github.
+
+Most of the mundane mechanical things like bpf and bpf-next tree merge, Git
+history transformation, cherry-picking relevant commits, re-generating
+auto-generated headers, etc. are taken care by
+[sync-kernel.sh script](https://github.com/libbpf/libbpf/blob/master/scripts/sync-kernel.sh).
+But occasionally human needs to do few extra things to make everything work
+nicely.
+
+This document goes over the process of syncing libbpf sources from Linux repo
+to this Github repository. Feel free to contribute fixes and additions if you
+run into new problems not outlined here.
+
+Setup expectations
+------------------
+
+Sync script has particular expectation of upstream Linux repo setup. It
+expects that current HEAD of that repo points to bpf-next's master branch and
+that there is a separate local branch pointing to bpf tree's master branch.
+This is important, as the script will automatically merge their histories for
+the purpose of libbpf sync.
+
+Below, we assume that Linux repo is located at `~/linux`, it's current head is
+at latest `bpf-next/master`, and libbpf's Github repo is located at
+`~/libbpf`, checked out to latest commit on `master` branch. It doesn't matter
+from where to run `sync-kernel.sh` script, but we'll be running it from inside
+`~/libbpf`.
+
+```
+$ cd ~/linux && git remote -v | grep -E '^(bpf|bpf-next)'
+bpf     https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git (fetch)
+bpf     ssh://git@gitolite.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git
+(push)
+bpf-next
+https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git (fetch)
+bpf-next
+ssh://git@gitolite.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git (push)
+$ git branch -vv | grep -E '^? (master|bpf-master)'
+* bpf-master                               2d311f480b52 [bpf/master] riscv, bpf: Fix patch_text implicit declaration
+  master                                   c8ee37bde402 [bpf-next/master] libbpf: Fix bpf_xdp_query() in old kernels
+$ git checkout bpf-master && git pull && git checkout master && git pull
+...
+$ git log --oneline -n1
+c8ee37bde402 (HEAD -> master, bpf-next/master) libbpf: Fix bpf_xdp_query() in old kernels
+$ cd ~/libbpf && git checkout master && git pull
+Your branch is up to date with 'libbpf/master'.
+Already up to date.
+```
+
+Running setup script
+--------------------
+
+First step is to always run `sync-kernel.sh` script. It expects three arguments:
+
+```
+$ scripts/sync-kernel.sh <libbpf-repo> <kernel-repo> <bpf-branch>
+```
+
+Note, that we'll store script's entire output in `/tmp/libbpf-sync.txt` and
+put it into PR summary later on. **Please store scripts output and include it
+in PR summary for others to check for anything unexpected and suspicious.**
+
+```
+$ scripts/sync-kernel.sh ~/libbpf ~/linux bpf-master | tee /tmp/libbpf-sync.txt
+Dumping existing libbpf commit signatures...
+WORKDIR:          /home/andriin/libbpf
+LINUX REPO:       /home/andriin/linux
+LIBBPF REPO:      /home/andriin/libbpf
+...
+```
+
+Most of the time this will go very uneventful. One expected case when sync
+script might require user intervention is if `bpf` tree has some libbpf fixes,
+which is nowadays not a very frequent occurence. But if that happens, script
+will show you a diff between expected state as of latest bpf-next and synced
+Github repo state. And will ask if these changes look good. Please use your
+best judgement to verify that differences are indeed from expected `bpf` tree
+fixes. E.g., it might look like below:
+
+```
+Comparing list of files...
+Comparing file contents...
+--- /home/andriin/linux/include/uapi/linux/netdev.h     2023-02-27 16:54:42.270583372 -0800
+++ /home/andriin/libbpf/include/uapi/linux/netdev.h    2023-02-27 16:54:34.615530796 -0800
+@@ -19,7 +19,7 @@
+  * @NETDEV_XDP_ACT_XSK_ZEROCOPY: This feature informs if netdev supports AF_XDP
+  *   in zero copy mode.
+  * @NETDEV_XDP_ACT_HW_OFFLOAD: This feature informs if netdev supports XDP hw
+- *   oflloading.
+ *   offloading.
+  * @NETDEV_XDP_ACT_RX_SG: This feature informs if netdev implements non-linear
+  *   XDP buffer support in the driver napi callback.
+  * @NETDEV_XDP_ACT_NDO_XMIT_SG: This feature informs if netdev implements
+/home/andriin/linux/include/uapi/linux/netdev.h and /home/andriin/libbpf/include/uapi/linux/netdev.h are different!
+Unfortunately, there are some inconsistencies, please double check.
+Does everything look good? [y/N]:
+```
+
+If it looks sensible and expected, type `y` and script will proceed.
+
+If sync is successful, your `~/linux` repo will be left in original state on
+the original HEAD commit. `~/libbpf` repo will now be on a new branch, named
+`libbpf-sync-<timestamp>` (e.g., `libbpf-sync-2023-02-28T00-53-40.072Z`).
+
+Push this branch into your fork of `libbpf/libbpf` Github repo and create a PR:
+
+```
+$ git push --set-upstream origin libbpf-sync-2023-02-28T00-53-40.072Z
+Enumerating objects: 130, done.
+Counting objects: 100% (115/115), done.
+Delta compression using up to 80 threads
+Compressing objects: 100% (28/28), done.
+Writing objects: 100% (32/32), 5.57 KiB | 1.86 MiB/s, done.
+Total 32 (delta 21), reused 0 (delta 0), pack-reused 0
+remote: Resolving deltas: 100% (21/21), completed with 9 local objects.
+remote:
+remote: Create a pull request for 'libbpf-sync-2023-02-28T00-53-40.072Z' on GitHub by visiting:
+remote:      https://github.com/anakryiko/libbpf/pull/new/libbpf-sync-2023-02-28T00-53-40.072Z
+remote:
+To github.com:anakryiko/libbpf.git
+ * [new branch]                libbpf-sync-2023-02-28T00-53-40.072Z -> libbpf-sync-2023-02-28T00-53-40.072Z
+Branch 'libbpf-sync-2023-02-28T00-53-40.072Z' set up to track remote branch 'libbpf-sync-2023-02-28T00-53-40.072Z' from 'origin'.
+```
+
+**Please, adjust PR name to have a properly looking timestamp. Libbpf
+maintainers will be very thankful for that!**
+
+By default Github will turn above branch name into PR with subject "Libbpf sync
+2023 02 28 t00 53 40.072 z". Please fix this into a proper timestamp, e.g.:
+"Libbpf sync 2023-02-28T00:53:40.072Z". Thank you!
+
+**Please don't forget to paste contents of /tmp/libbpf-sync.txt into PR
+summary!**
+
+Once PR is created, libbpf CI will run a bunch of tests to check that
+everything is good. In simple cases that would be all you'd need to do. In more
+complicated cases some extra adjustments might be necessary.
+
+**Please, keep naming and style consistent.** Prefix CI-related fixes with `ci: `
+prefix. If you had to modify sync script, prefix it with `sync: `. Also make
+sure that each such commit has `Signed-off-by: Your Full Name <your@email.com>`,
+just like you'd do that for Linux upstream patch. Libbpf closely follows kernel
+conventions and styling, so please help maintaining that.
+
+Including new sources
+---------------------
+
+If entirely new source files (typically `*.c`) were added to the library in the
+kernel repository, it may be necessary to add these to the build system
+manually (you may notice linker errors otherwise), because the script cannot
+handle such changes automatically. To that end, edit `src/Makefile` as
+necessary. Commit
+[c2495832ced4](https://github.com/libbpf/libbpf/commit/c2495832ced4239bcd376b9954db38a6addd89ca)
+is an example of how to go about doing that.
+
+Similarly, if new public API header files were added, the `Makefile` will need
+to be adjusted as well.
+
+Updating allow/deny lists
+-------------------------
+
+Libbpf CI intentionally runs a subset of latest BPF selftests on old kernel
+(4.9 and 5.5, currently). It happens from time to time that some tests that
+previously were successfully running on old kernels now don't, typically due to
+reliance on some freshly added kernel feature. It might look something like this in [CI logs](https://github.com/libbpf/libbpf/actions/runs/4206303272/jobs/7299609578#step:4:2733):
+
+```
+  All error logs:
+  serial_test_xdp_info:FAIL:get_xdp_none errno=2
+  #283     xdp_info:FAIL
+  Summary: 49/166 PASSED, 5 SKIPPED, 1 FAILED
+```
+
+In such case we can either work with upstream to fix test to be compatible with
+old kernels, or we'll have to add a test into a denylist (or remove it from
+allowlist, like was [done](https://github.com/libbpf/libbpf/commit/ea284299025bf85b85b4923191de6463cd43ccd6)
+for the case above).
+
+```
+$ find . -name '*LIST*'
+./ci/vmtest/configs/ALLOWLIST-4.9.0
+./ci/vmtest/configs/DENYLIST-5.5.0
+./ci/vmtest/configs/DENYLIST-latest.s390x
+./ci/vmtest/configs/DENYLIST-latest
+./ci/vmtest/configs/ALLOWLIST-5.5.0
+```
+
+Please determine which tests need to be added/removed from which list. And then
+add that as a separate commit. **Please keep using the same branch name, so
+that the same PR can be updated.** There is no need to open new PRs for each
+such fix.
+
+Regenerating vmlinux.h header
+-----------------------------
+
+To compile latest BPF selftests against old kernels, we check in pre-generated
+[vmlinux.h](https://github.com/libbpf/libbpf/blob/master/.github/actions/build-selftests/vmlinux.h)
+header file, located at `.github/actions/build-selftests/vmlinux.h`, which
+contains type definitions from latest upstream kernel. When after libbpf sync
+upstream BPF selftests require new kernel types, we'd need to regenerate
+`vmlinux.h` and check it in as well.
+
+This will looks something like this in [CI logs](https://github.com/libbpf/libbpf/actions/runs/4198939244/jobs/7283214243#step:4:1903):
+
+```
+  In file included from progs/test_spin_lock_fail.c:5:
+  /home/runner/work/libbpf/libbpf/.kernel/tools/testing/selftests/bpf/bpf_experimental.h:73:53: error: declaration of 'struct bpf_rb_root' will not be visible outside of this function [-Werror,-Wvisibility]
+  extern struct bpf_rb_node *bpf_rbtree_remove(struct bpf_rb_root *root,
+                                                      ^
+  /home/runner/work/libbpf/libbpf/.kernel/tools/testing/selftests/bpf/bpf_experimental.h:81:35: error: declaration of 'struct bpf_rb_root' will not be visible outside of this function [-Werror,-Wvisibility]
+  extern void bpf_rbtree_add(struct bpf_rb_root *root, struct bpf_rb_node *node,
+                                    ^
+  /home/runner/work/libbpf/libbpf/.kernel/tools/testing/selftests/bpf/bpf_experimental.h:90:52: error: declaration of 'struct bpf_rb_root' will not be visible outside of this function [-Werror,-Wvisibility]
+  extern struct bpf_rb_node *bpf_rbtree_first(struct bpf_rb_root *root) __ksym;
+                                                     ^
+  3 errors generated.
+  make: *** [Makefile:572: /home/runner/work/libbpf/libbpf/.kernel/tools/testing/selftests/bpf/test_spin_lock_fail.bpf.o] Error 1
+  make: *** Waiting for unfinished jobs....
+  Error: Process completed with exit code 2.
+```
+
+You'll need to build latest upstream kernel from `bpf-next` tree, using BPF
+selftest configs. Concat arch-agnostic and arch-specific configs, build kernel,
+then use bpftool to dump `vmlinux.h`:
+
+```
+$ cd ~/linux
+$ cat tools/testing/selftests/bpf/config \
+   tools/testing/selftests/bpf/config.x86_64 > .config
+$ make -j$(nproc) olddefconfig all
+...
+$ bpftool btf dump file ~/linux/vmlinux format c > ~/libbpf/.github/actions/build-selftests/vmlinux.h
+$ cd ~/libbpf && git add . && git commit -s
+```
+
+Check in generated `vmlinux.h`, don't forget to use `ci: ` commit prefix, add
+it on top of sync commits. Push to Github and let libbpf CI do the checking for
+you. See [this commit](https://github.com/libbpf/libbpf/commit/34212c94a64df8eeb1dd5d064630a65e1dfd4c20)
+for reference.
+
+Troubleshooting
+---------------
+
+If something goes wrong and sync script exits early or is terminated early by
+user, you might end up with `~/linux` repo on temporary sync-related branch.
+Don't worry, though, sync script never destroys repo state, it follows
+"copy-on-write" philosophy and creates new branches where necessary. So it's
+very easy to restore previous state. So if anything goes wrong, it's easy to
+start fresh:
+
+```
+$ git branch | grep -E 'libbpf-.*Z'
+  libbpf-baseline-2023-02-28T00-43-35.146Z
+  libbpf-bpf-baseline-2023-02-28T00-43-35.146Z
+  libbpf-bpf-tip-2023-02-28T00-43-35.146Z
+  libbpf-squash-base-2023-02-28T00-43-35.146Z
+* libbpf-squash-tip-2023-02-28T00-43-35.146Z
+$ git cherry-pick --abort
+$ git checkout master && git branch | grep -E 'libbpf-.*Z' | xargs git br -D
+Switched to branch 'master'
+Your branch is up to date with 'bpf-next/master'.
+Deleted branch libbpf-baseline-2023-02-28T00-43-35.146Z (was 951bce29c898).
+Deleted branch libbpf-bpf-baseline-2023-02-28T00-43-35.146Z (was 3a70e0d4c9d7).
+Deleted branch libbpf-bpf-tip-2023-02-28T00-43-35.146Z (was 2d311f480b52).
+Deleted branch libbpf-squash-base-2023-02-28T00-43-35.146Z (was 957f109ef883).
+Deleted branch libbpf-squash-tip-2023-02-28T00-43-35.146Z (was be66130d2339).
+Deleted branch libbpf-tip-2023-02-28T00-43-35.146Z (was 2d311f480b52).
+```
+
+You might need to do the same for your `~/libbpf` repo sometimes, depending at
+which stage sync script was terminated.
--- a/assets/libbpf-logo-compact-darkbg.png
+++ b/assets/libbpf-logo-compact-darkbg.png
--- a/assets/libbpf-logo-compact-mono.png
+++ b/assets/libbpf-logo-compact-mono.png
--- a/assets/libbpf-logo-compact.png
+++ b/assets/libbpf-logo-compact.png
--- a/assets/libbpf-logo-sideways-darkbg.png
+++ b/assets/libbpf-logo-sideways-darkbg.png
--- a/assets/libbpf-logo-sideways-mono.png
+++ b/assets/libbpf-logo-sideways-mono.png
--- a/assets/libbpf-logo-sideways.png
+++ b/assets/libbpf-logo-sideways.png
--- a/assets/libbpf-logo-sparse-darkbg.png
+++ b/assets/libbpf-logo-sparse-darkbg.png
--- a/assets/libbpf-logo-sparse-mono.png
+++ b/assets/libbpf-logo-sparse-mono.png
--- a/assets/libbpf-logo-sparse.png
+++ b/assets/libbpf-logo-sparse.png
--- a/travis-ci/diffs/.do_not_use_dot_patch_here
+++ b/travis-ci/diffs/.do_not_use_dot_patch_here
--- a/ci/diffs/0001-s390-define-RUNTIME_DISCARD_EXIT-to-fix-link-error-w.patch
+++ b/ci/diffs/0001-s390-define-RUNTIME_DISCARD_EXIT-to-fix-link-error-w.patch
@@ -0,0 +1,70 @@
+From 6fba14e2ed9d159f76b23fa5c16f3ea99acbc003 Mon Sep 17 00:00:00 2001
+From: Masahiro Yamada <masahiroy@kernel.org>
+Date: Thu, 5 Jan 2023 12:13:06 +0900
+Subject: [PATCH] s390: define RUNTIME_DISCARD_EXIT to fix link error with GNU
+ ld < 2.36
+
+Nathan Chancellor reports that the s390 vmlinux fails to link with
+GNU ld < 2.36 since commit 99cb0d917ffa ("arch: fix broken BuildID
+for arm64 and riscv").
+
+It happens for defconfig, or more specifically for CONFIG_EXPOLINE=y.
+
+  $ s390x-linux-gnu-ld --version | head -n1
+  GNU ld (GNU Binutils for Debian) 2.35.2
+  $ make -s ARCH=s390 CROSS_COMPILE=s390x-linux-gnu- allnoconfig
+  $ ./scripts/config -e CONFIG_EXPOLINE
+  $ make -s ARCH=s390 CROSS_COMPILE=s390x-linux-gnu- olddefconfig
+  $ make -s ARCH=s390 CROSS_COMPILE=s390x-linux-gnu-
+  `.exit.text' referenced in section `.s390_return_reg' of drivers/base/dd.o: defined in discarded section `.exit.text' of drivers/base/dd.o
+  make[1]: *** [scripts/Makefile.vmlinux:34: vmlinux] Error 1
+  make: *** [Makefile:1252: vmlinux] Error 2
+
+arch/s390/kernel/vmlinux.lds.S wants to keep EXIT_TEXT:
+
+        .exit.text : {
+                EXIT_TEXT
+        }
+
+But, at the same time, EXIT_TEXT is thrown away by DISCARD because
+s390 does not define RUNTIME_DISCARD_EXIT.
+
+I still do not understand why the latter wins after 99cb0d917ffa,
+but defining RUNTIME_DISCARD_EXIT seems correct because the comment
+line in arch/s390/kernel/vmlinux.lds.S says:
+
+        /*
+         * .exit.text is discarded at runtime, not link time,
+         * to deal with references from __bug_table
+         */
+
+Nathan also found that binutils commit 21401fc7bf67 ("Duplicate output
+sections in scripts") cured this issue, so we cannot reproduce it with
+binutils 2.36+, but it is better to not rely on it.
+
+Fixes: 99cb0d917ffa ("arch: fix broken BuildID for arm64 and riscv")
+Link: https://lore.kernel.org/all/Y7Jal56f6UBh1abE@dev-arch.thelio-3990X/
+Reported-by: Nathan Chancellor <nathan@kernel.org>
+Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
+Link: https://lore.kernel.org/r/20230105031306.1455409-1-masahiroy@kernel.org
+Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
+---
+ arch/s390/kernel/vmlinux.lds.S | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
+index 5ea3830af0cc..6e101e6f499d 100644
+--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
+@@ -17,6 +17,8 @@
+ /* Handle ro_after_init data on our own. */
+ #define RO_AFTER_INIT_DATA
+ 
+#define RUNTIME_DISCARD_EXIT
+
+ #define EMITS_PT_NOTE
+ 
+ #include <asm-generic/vmlinux.lds.h>
+-- 
+2.30.2
+
--- a/ci/diffs/0001-selftests-bpf-Check-whether-to-run-selftest.patch
+++ b/ci/diffs/0001-selftests-bpf-Check-whether-to-run-selftest.patch
@@ -0,0 +1,37 @@
+From ff8be5401b359e23ec2b74184034082564bac7c5 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Daniel=20M=C3=BCller?= <deso@posteo.net>
+Date: Thu, 25 May 2023 16:04:20 -0700
+Subject: [PATCH] selftests/bpf: Check whether to run selftest
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+The sockopt test invokes test__start_subtest and then unconditionally
+asserts the success. That means that even if deny-listed, any test will
+still run and potentially fail.
+Evaluate the return value of test__start_subtest() to achieve the
+desired behavior, as other tests do.
+
+Signed-off-by: Daniel Müller <deso@posteo.net>
+---
+ tools/testing/selftests/bpf/prog_tests/sockopt.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt.c b/tools/testing/selftests/bpf/prog_tests/sockopt.c
+index 33dd45..9e6a5e 100644
+--- a/tools/testing/selftests/bpf/prog_tests/sockopt.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt.c
+@@ -1060,7 +1060,9 @@ void test_sockopt(void)
+ 		return;
+ 
+ 	for (i = 0; i < ARRAY_SIZE(tests); i++) {
+-		test__start_subtest(tests[i].descr);
+		if (!test__start_subtest(tests[i].descr))
+			continue;
+
+ 		ASSERT_OK(run_test(cgroup_fd, &tests[i]), tests[i].descr);
+ 	}
+ 
+-- 
+2.34.1
+
--- a/ci/diffs/0001-selftests-bpf-Select-CONFIG_FUNCTION_ERROR_INJECTION.patch
+++ b/ci/diffs/0001-selftests-bpf-Select-CONFIG_FUNCTION_ERROR_INJECTION.patch
@@ -0,0 +1,46 @@
+From a8dfde09c90109e3a98af54847e91bde7dc2d5c2 Mon Sep 17 00:00:00 2001
+From: Song Liu <song@kernel.org>
+Date: Tue, 13 Dec 2022 14:05:00 -0800
+Subject: [PATCH] selftests/bpf: Select CONFIG_FUNCTION_ERROR_INJECTION
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+BPF selftests require CONFIG_FUNCTION_ERROR_INJECTION to work. However,
+CONFIG_FUNCTION_ERROR_INJECTION is no longer 'y' by default after recent
+changes. As a result, we are seeing errors like the following from BPF CI:
+
+   bpf_testmod_test_read() is not modifiable
+   __x64_sys_setdomainname is not sleepable
+   __x64_sys_getpgid is not sleepable
+
+Fix this by explicitly selecting CONFIG_FUNCTION_ERROR_INJECTION in the
+selftest config.
+
+Fixes: a4412fdd49dc ("error-injection: Add prompt for function error injection")
+Reported-by: Daniel Müller <deso@posteo.net>
+Signed-off-by: Song Liu <song@kernel.org>
+Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Daniel Müller <deso@posteo.net>
+Link: https://lore.kernel.org/bpf/20221213220500.3427947-1-song@kernel.org
+Signed-off-by: Daniel Müller <deso@posteo.net>
+---
+ tools/testing/selftests/bpf/config | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
+index 612f69..63cd4a 100644
+--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
+@@ -16,6 +16,7 @@ CONFIG_CRYPTO_USER_API_HASH=y
+ CONFIG_DYNAMIC_FTRACE=y
+ CONFIG_FPROBE=y
+ CONFIG_FTRACE_SYSCALLS=y
+CONFIG_FUNCTION_ERROR_INJECTION=y
+ CONFIG_FUNCTION_TRACER=y
+ CONFIG_GENEVE=y
+ CONFIG_IKCONFIG=y
+-- 
+2.30.2
+
--- a/ci/diffs/0001-tracing-fprobe-Initialize-ret-valiable-to-fix-smatch.patch
+++ b/ci/diffs/0001-tracing-fprobe-Initialize-ret-valiable-to-fix-smatch.patch
@@ -0,0 +1,68 @@
+From d3484f640bc82cff459beb85a00f7ebab20f0a41 Mon Sep 17 00:00:00 2001
+From: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
+Date: Sun, 9 Apr 2023 11:28:31 +0900
+Subject: [PATCH] tracing: fprobe: Initialize ret valiable to fix smatch error
+
+The commit 39d954200bf6 ("fprobe: Skip exit_handler if entry_handler returns
+!0") introduced a hidden dependency of 'ret' local variable in the
+fprobe_handler(), Smatch warns the `ret` can be accessed without
+initialization.
+
+	kernel/trace/fprobe.c:59 fprobe_handler()
+	error: uninitialized symbol 'ret'.
+
+kernel/trace/fprobe.c
+    49                 fpr->entry_ip = ip;
+    50                 if (fp->entry_data_size)
+    51                         entry_data = fpr->data;
+    52         }
+    53
+    54         if (fp->entry_handler)
+    55                 ret = fp->entry_handler(fp, ip, ftrace_get_regs(fregs), entry_data);
+
+ret is only initialized if there is an ->entry_handler
+
+    56
+    57         /* If entry_handler returns !0, nmissed is not counted. */
+    58         if (rh) {
+
+rh is only true if there is an ->exit_handler.  Presumably if you have
+and ->exit_handler that means you also have a ->entry_handler but Smatch
+is not smart enough to figure it out.
+
+--> 59                 if (ret)
+                           ^^^
+Warning here.
+
+    60                         rethook_recycle(rh);
+    61                 else
+    62                         rethook_hook(rh, ftrace_get_regs(fregs), true);
+    63         }
+    64 out:
+    65         ftrace_test_recursion_unlock(bit);
+    66 }
+
+Reported-by: Dan Carpenter <error27@gmail.com>
+Link: https://lore.kernel.org/all/85429a5c-a4b9-499e-b6c0-cbd313291c49@kili.mountain
+Fixes: 39d954200bf6 ("fprobe: Skip exit_handler if entry_handler returns !0")
+Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+---
+ kernel/trace/fprobe.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
+index 9abb3905bc8e..293184227394 100644
+--- a/kernel/trace/fprobe.c
+++ b/kernel/trace/fprobe.c
+@@ -27,7 +27,7 @@ static void fprobe_handler(unsigned long ip, unsigned long parent_ip,
+ 	struct rethook_node *rh = NULL;
+ 	struct fprobe *fp;
+ 	void *entry_data = NULL;
+-	int bit, ret;
+	int bit, ret = 0;
+ 
+ 	fp = container_of(ops, struct fprobe, ops);
+ 	if (fprobe_disabled(fp))
+-- 
+2.34.1
+
--- a/ci/diffs/0001-veth-take-into-account-peer-device-for-NETDEV_XDP_AC.patch
+++ b/ci/diffs/0001-veth-take-into-account-peer-device-for-NETDEV_XDP_AC.patch
@@ -0,0 +1,83 @@
+From 8267fc71abb2dc47338570e56dd3473a58313fce Mon Sep 17 00:00:00 2001
+From: Lorenzo Bianconi <lorenzo@kernel.org>
+Date: Mon, 17 Apr 2023 23:53:22 +0200
+Subject: [PATCH] veth: take into account peer device for
+ NETDEV_XDP_ACT_NDO_XMIT xdp_features flag
+
+For veth pairs, NETDEV_XDP_ACT_NDO_XMIT is supported by the current
+device if the peer one is running a XDP program or if it has GRO enabled.
+Fix the xdp_features flags reporting considering peer device and not
+current one for NETDEV_XDP_ACT_NDO_XMIT.
+
+Fixes: fccca038f300 ("veth: take into account device reconfiguration for xdp_features flag")
+Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
+Link: https://lore.kernel.org/r/4f1ca6f6f6b42ae125bfdb5c7782217c83968b2e.1681767806.git.lorenzo@kernel.org
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+---
+ drivers/net/veth.c | 17 +++++++++++------
+ 1 file changed, 11 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/net/veth.c b/drivers/net/veth.c
+index e1b38fbf1dd9..4b3c6647edc6 100644
+--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
+@@ -1262,11 +1262,12 @@ static void veth_set_xdp_features(struct net_device *dev)
+ 
+ 	peer = rtnl_dereference(priv->peer);
+ 	if (peer && peer->real_num_tx_queues <= dev->real_num_rx_queues) {
+		struct veth_priv *priv_peer = netdev_priv(peer);
+ 		xdp_features_t val = NETDEV_XDP_ACT_BASIC |
+ 				     NETDEV_XDP_ACT_REDIRECT |
+ 				     NETDEV_XDP_ACT_RX_SG;
+ 
+-		if (priv->_xdp_prog || veth_gro_requested(dev))
+		if (priv_peer->_xdp_prog || veth_gro_requested(peer))
+ 			val |= NETDEV_XDP_ACT_NDO_XMIT |
+ 			       NETDEV_XDP_ACT_NDO_XMIT_SG;
+ 		xdp_set_features_flag(dev, val);
+@@ -1504,19 +1505,23 @@ static int veth_set_features(struct net_device *dev,
+ {
+ 	netdev_features_t changed = features ^ dev->features;
+ 	struct veth_priv *priv = netdev_priv(dev);
+	struct net_device *peer;
+ 	int err;
+ 
+ 	if (!(changed & NETIF_F_GRO) || !(dev->flags & IFF_UP) || priv->_xdp_prog)
+ 		return 0;
+ 
+	peer = rtnl_dereference(priv->peer);
+ 	if (features & NETIF_F_GRO) {
+ 		err = veth_napi_enable(dev);
+ 		if (err)
+ 			return err;
+ 
+-		xdp_features_set_redirect_target(dev, true);
+		if (peer)
+			xdp_features_set_redirect_target(peer, true);
+ 	} else {
+-		xdp_features_clear_redirect_target(dev);
+		if (peer)
+			xdp_features_clear_redirect_target(peer);
+ 		veth_napi_del(dev);
+ 	}
+ 	return 0;
+@@ -1598,13 +1603,13 @@ static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog,
+ 			peer->max_mtu = max_mtu;
+ 		}
+ 
+-		xdp_features_set_redirect_target(dev, true);
+		xdp_features_set_redirect_target(peer, true);
+ 	}
+ 
+ 	if (old_prog) {
+ 		if (!prog) {
+-			if (!veth_gro_requested(dev))
+-				xdp_features_clear_redirect_target(dev);
+			if (peer && !veth_gro_requested(dev))
+				xdp_features_clear_redirect_target(peer);
+ 
+ 			if (dev->flags & IFF_UP)
+ 				veth_disable_xdp(dev);
+-- 
+2.34.1
+
--- a/travis-ci/managers/debian.sh
+++ b/travis-ci/managers/debian.sh
@@ -6,8 +6,9 @@ CONT_NAME="${CONT_NAME:-libbpf-debian-$DEBIAN_RELEASE}"
 ENV_VARS="${ENV_VARS:-}"
 DOCKER_RUN="${DOCKER_RUN:-docker run}"
 REPO_ROOT="${REPO_ROOT:-$PWD}"
-ADDITIONAL_DEPS=(clang pkg-config gcc-10)
-CFLAGS="-g -O2 -Werror -Wall"
+ADDITIONAL_DEPS=(pkgconf)
+EXTRA_CFLAGS=""
+EXTRA_LDFLAGS=""

 function info() {
    echo -e "\033[33;1m$1\033[0m"
@@ -42,30 +43,35 @@ for phase in "${PHASES[@]}"; do
            docker_exec bash -c "echo deb-src http://deb.debian.org/debian $DEBIAN_RELEASE main >>/etc/apt/sources.list"
            docker_exec apt-get -y update
            docker_exec apt-get -y install aptitude
-            docker_exec aptitude -y build-dep libelf-dev
-            docker_exec aptitude -y install libelf-dev
+            docker_exec aptitude -y install make libz-dev libelf-dev
            docker_exec aptitude -y install "${ADDITIONAL_DEPS[@]}"
            echo -e "::endgroup::"
            ;;
-        RUN|RUN_CLANG|RUN_GCC10|RUN_ASAN|RUN_CLANG_ASAN|RUN_GCC10_ASAN)
+        RUN|RUN_CLANG|RUN_CLANG14|RUN_CLANG15|RUN_CLANG16|RUN_GCC10|RUN_GCC11|RUN_GCC12|RUN_ASAN|RUN_CLANG_ASAN|RUN_GCC10_ASAN)
            CC="cc"
-            if [[ "$phase" = *"CLANG"* ]]; then
+            if [[ "$phase" =~ "RUN_CLANG(\d+)(_ASAN)?" ]]; then
+                ENV_VARS="-e CC=clang-${BASH_REMATCH[1]} -e CXX=clang++-${BASH_REMATCH[1]}"
+                CC="clang-${BASH_REMATCH[1]}"
+            elif [[ "$phase" = *"CLANG"* ]]; then
                ENV_VARS="-e CC=clang -e CXX=clang++"
                CC="clang"
-            elif [[ "$phase" = *"GCC10"* ]]; then
-                ENV_VARS="-e CC=gcc-10 -e CXX=g++-10"
-                CC="gcc-10"
-                CFLAGS="${CFLAGS} -Wno-stringop-truncation"
-            else
-                CFLAGS="${CFLAGS} -Wno-stringop-truncation"
+            elif [[ "$phase" =~ "RUN_GCC(\d+)(_ASAN)?" ]]; then
+                ENV_VARS="-e CC=gcc-${BASH_REMATCH[1]} -e CXX=g++-${BASH_REMATCH[1]}"
+                CC="gcc-${BASH_REMATCH[1]}"
            fi
            if [[ "$phase" = *"ASAN"* ]]; then
-                CFLAGS="${CFLAGS} -fsanitize=address,undefined"
+                EXTRA_CFLAGS="${EXTRA_CFLAGS} -fsanitize=address,undefined"
+                EXTRA_LDFLAGS="${EXTRA_LDFLAGS} -fsanitize=address,undefined"
+            fi
+            if [[ "$CC" != "cc" ]]; then
+                docker_exec aptitude -y install "$CC"
+            else
+                docker_exec aptitude -y install gcc
            fi
            docker_exec mkdir build install
            docker_exec ${CC} --version
            info "build"
-            docker_exec make -j$((4*$(nproc))) CFLAGS="${CFLAGS}" -C ./src -B OBJDIR=../build
+            docker_exec make -j$((4*$(nproc))) EXTRA_CFLAGS="${EXTRA_CFLAGS}" EXTRA_LDFLAGS="${EXTRA_LDFLAGS}" -C ./src -B OBJDIR=../build
            info "ldd build/libbpf.so:"
            docker_exec ldd build/libbpf.so
            if ! docker_exec ldd build/libbpf.so | grep -q libelf; then
@@ -75,7 +81,7 @@ for phase in "${PHASES[@]}"; do
            info "install"
            docker_exec make -j$((4*$(nproc))) -C src OBJDIR=../build DESTDIR=../install install
            info "link binary"
-            docker_exec bash -c "CFLAGS=\"${CFLAGS}\" ./travis-ci/managers/test_compile.sh"
+            docker_exec bash -c "EXTRA_CFLAGS=\"${EXTRA_CFLAGS}\" EXTRA_LDFLAGS=\"${EXTRA_LDFLAGS}\" ./ci/managers/test_compile.sh"
            ;;
        CLEANUP)
            info "Cleanup phase"
--- a/ci/managers/test_compile.sh
+++ b/ci/managers/test_compile.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+set -euox pipefail
+
+EXTRA_CFLAGS=${EXTRA_CFLAGS:-}
+EXTRA_LDFLAGS=${EXTRA_LDFLAGS:-}
+
+cat << EOF > main.c
+#include <bpf/libbpf.h>
+int main() {
+  return bpf_object__open(0) < 0;
+}
+EOF
+
+# static linking
+${CC:-cc} ${EXTRA_CFLAGS} ${EXTRA_LDFLAGS} -o main -I./include/uapi -I./install/usr/include main.c ./build/libbpf.a -lelf -lz
--- a/travis-ci/managers/travis_wait.bash
+++ b/travis-ci/managers/travis_wait.bash
--- a/travis-ci/managers/ubuntu.sh
+++ b/travis-ci/managers/ubuntu.sh
@@ -10,14 +10,15 @@ source "$(dirname $0)/travis_wait.bash"

 cd $REPO_ROOT

-CFLAGS="-g -O2 -Werror -Wall -fsanitize=address,undefined -Wno-stringop-truncation"
+EXTRA_CFLAGS="-Werror -Wall -fsanitize=address,undefined"
+EXTRA_LDFLAGS="-Werror -Wall -fsanitize=address,undefined"
 mkdir build install
 cc --version
-make -j$((4*$(nproc))) CFLAGS="${CFLAGS}" -C ./src -B OBJDIR=../build
+make -j$((4*$(nproc))) EXTRA_CFLAGS="${EXTRA_CFLAGS}" EXTRA_LDFLAGS="${EXTRA_LDFLAGS}" -C ./src -B OBJDIR=../build
 ldd build/libbpf.so
 if ! ldd build/libbpf.so | grep -q libelf; then
    echo "FAIL: No reference to libelf.so in libbpf.so!"
    exit 1
 fi
 make -j$((4*$(nproc))) -C src OBJDIR=../build DESTDIR=../install install
-CFLAGS=${CFLAGS} $(dirname $0)/test_compile.sh
+EXTRA_CFLAGS=${EXTRA_CFLAGS} EXTRA_LDFLAGS=${EXTRA_LDFLAGS} $(dirname $0)/test_compile.sh
--- a/travis-ci/vmtest/configs/whitelist/WHITELIST-4.9.0
+++ b/travis-ci/vmtest/configs/whitelist/WHITELIST-4.9.0
--- a/travis-ci/vmtest/configs/whitelist/WHITELIST-5.5.0
+++ b/travis-ci/vmtest/configs/whitelist/WHITELIST-5.5.0
@@ -1,4 +1,4 @@
-attach_probe
+# attach_probe
 autoload
 bpf_verif_scale
 cgroup_attach_autodetach
@@ -10,14 +10,12 @@ core_reloc
 core_retro
 cpu_mask
 endian
-fexit_stress
 get_branch_snapshot
 get_stackid_cannot_attach
 global_data
 global_data_init
 global_func_args
 hashmap
-l4lb_all
 legacy_printk
 linked_funcs
 linked_maps
@@ -37,20 +35,17 @@ signal_pending
 skeleton
 sockmap_ktls
 sockopt
-sockopt_inherit
-sockopt_multi
 spinlock
 stacktrace_map
 stacktrace_map_raw_tp
 static_linked
-subprogs
 task_fd_query_rawtp
 task_fd_query_tp
 tc_bpf
 tcp_estats
 tcp_rtt
 tp_attach_query
+usdt/urand_pid_attach
 xdp
-xdp_info
 xdp_noinline
 xdp_perf
--- a/travis-ci/vmtest/configs/blacklist/BLACKLIST-5.5.0
+++ b/travis-ci/vmtest/configs/blacklist/BLACKLIST-5.5.0
@@ -1,5 +1,5 @@
 # This file is not used and is there for historic purposes only.
-# See WHITELIST-5.5.0 instead.
+# See ALLOWLIST-5.5.0 instead.

 # PERMANENTLY DISABLED
 align			# verifier output format changed
@@ -71,12 +71,15 @@ sk_lookup		# v5.9+
 sk_storage_tracing	# missing bpf_sk_storage_get() helper
 skb_ctx			# ctx_{size, }_{in, out} in BPF_PROG_TEST_RUN is missing
 skb_helpers		# helpers added in 5.8+
+skeleton		# creates too big ARRAY map
 snprintf		# v5.13+
 snprintf_btf		# v5.10+
 sock_fields		# v5.10+
 socket_cookie		# v5.12+
 sockmap_basic		# uses new socket fields, 5.8+
 sockmap_listen		# no listen socket supportin SOCKMAP
+sockopt/getsockopt: ignore >PAGE_SIZE optlen
+sockopt/setsockopt: ignore >PAGE_SIZE optlen
 sockopt_sk
 sockopt_qos_to_cc	# v5.15+
 stacktrace_build_id	# v5.9+
--- a/ci/vmtest/configs/DENYLIST-latest
+++ b/ci/vmtest/configs/DENYLIST-latest
@@ -0,0 +1,4 @@
+decap_sanity  # weird failure with decap_sanity_ns netns already existing, TBD
+bpf_nf/tc-bpf-ct # test consistently failing on x86: https://github.com/libbpf/libbpf/pull/698#issuecomment-1590341200
+bpf_nf/xdp-ct   # test consistently failing on x86: https://github.com/libbpf/libbpf/pull/698#issuecomment-1590341200
+kprobe_multi_bench_attach # suspected to cause crashes in CI
--- a/ci/vmtest/configs/DENYLIST-latest.s390x
+++ b/ci/vmtest/configs/DENYLIST-latest.s390x
@@ -0,0 +1,4 @@
+# TEMPORARY
+sockmap_listen/sockhash VSOCK test_vsock_redir
+usdt/basic                               # failing verifier due to bounds check after LLVM update
+usdt/multispec                           # same as above
--- a/travis-ci/vmtest/helpers.sh
+++ b/travis-ci/vmtest/helpers.sh
@@ -1,26 +1,20 @@
+# shellcheck shell=bash
+
 # $1 - start or end
 # $2 - fold identifier, no spaces
 # $3 - fold section description
-travis_fold() {
+foldable() {
  local YELLOW='\033[1;33m'
  local NOCOLOR='\033[0m'
-  if [ -z ${GITHUB_WORKFLOW+x} ]; then
-    echo travis_fold:$1:$2
+  if [ $1 = "start" ]; then
+    line="::group::$2"
    if [ ! -z "${3:-}" ]; then
-      echo -e "${YELLOW}$3${NOCOLOR}"
+      line="$line - ${YELLOW}$3${NOCOLOR}"
    fi
-    echo
  else
-    if [ $1 = "start" ]; then
-      line="::group::$2"
-      if [ ! -z "${3:-}" ]; then
-        line="$line - ${YELLOW}$3${NOCOLOR}"
-      fi
-    else
-      line="::endgroup::"
-    fi
-    echo -e "$line"
+    line="::endgroup::"
  fi
+  echo -e "$line"
 }

 __print() {
--- a/ci/vmtest/run_selftests.sh
+++ b/ci/vmtest/run_selftests.sh
@@ -0,0 +1,94 @@
+#!/bin/bash
+
+set -euo pipefail
+
+source $(cd $(dirname $0) && pwd)/helpers.sh
+
+ARCH=$(uname -m)
+
+STATUS_FILE=/exitstatus
+
+read_lists() {
+	(for path in "$@"; do
+		if [[ -s "$path" ]]; then
+			cat "$path"
+		fi;
+	done) | cut -d'#' -f1 | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//' | tr -s '\n' ','
+}
+
+test_progs() {
+	if [[ "${KERNEL}" != '4.9.0' ]]; then
+		foldable start test_progs "Testing test_progs"
+		# "&& true" does not change the return code (it is not executed
+		# if the Python script fails), but it prevents exiting on a
+		# failure due to the "set -e".
+		./test_progs ${DENYLIST:+-d"$DENYLIST"} ${ALLOWLIST:+-a"$ALLOWLIST"} && true
+		echo "test_progs:$?" >> "${STATUS_FILE}"
+		foldable end test_progs
+	fi
+}
+
+test_progs_no_alu32() {
+	foldable start test_progs-no_alu32 "Testing test_progs-no_alu32"
+	./test_progs-no_alu32 ${DENYLIST:+-d"$DENYLIST"} ${ALLOWLIST:+-a"$ALLOWLIST"} && true
+	echo "test_progs-no_alu32:$?" >> "${STATUS_FILE}"
+	foldable end test_progs-no_alu32
+}
+
+test_maps() {
+	if [[ "${KERNEL}" == 'latest' ]]; then
+		foldable start test_maps "Testing test_maps"
+		./test_maps && true
+		echo "test_maps:$?" >> "${STATUS_FILE}"
+		foldable end test_maps
+	fi
+}
+
+test_verifier() {
+	if [[ "${KERNEL}" == 'latest' ]]; then
+		foldable start test_verifier "Testing test_verifier"
+		./test_verifier && true
+		echo "test_verifier:$?" >> "${STATUS_FILE}"
+		foldable end test_verifier
+	fi
+}
+
+foldable end vm_init
+
+foldable start kernel_config "Kconfig"
+
+zcat /proc/config.gz
+
+foldable end kernel_config
+
+
+configs_path=/${PROJECT_NAME}/selftests/bpf
+local_configs_path=${PROJECT_NAME}/vmtest/configs
+DENYLIST=$(read_lists \
+	"$configs_path/DENYLIST" \
+	"$configs_path/DENYLIST.${ARCH}" \
+	"$local_configs_path/DENYLIST-${KERNEL}" \
+	"$local_configs_path/DENYLIST-${KERNEL}.${ARCH}" \
+)
+ALLOWLIST=$(read_lists \
+	"$configs_path/ALLOWLIST" \
+	"$configs_path/ALLOWLIST.${ARCH}" \
+	"$local_configs_path/ALLOWLIST-${KERNEL}" \
+	"$local_configs_path/ALLOWLIST-${KERNEL}.${ARCH}" \
+)
+
+echo "DENYLIST: ${DENYLIST}"
+echo "ALLOWLIST: ${ALLOWLIST}"
+
+cd ${PROJECT_NAME}/selftests/bpf
+
+if [ $# -eq 0 ]; then
+	test_progs
+	test_progs_no_alu32
+	# test_maps
+	test_verifier
+else
+	for test_name in "$@"; do
+		"${test_name}"
+	done
+fi
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -1,22 +1,33 @@
 .. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)

+.. _libbpf:
+
+======
 libbpf
 ======

+If you are looking to develop BPF applications using the libbpf library, this
+directory contains important documentation that you should read.
+
+To get started, it is recommended to begin with the :doc:`libbpf Overview
+<libbpf_overview>` document, which provides a high-level understanding of the
+libbpf APIs and their usage. This will give you a solid foundation to start
+exploring and utilizing the various features of libbpf to develop your BPF
+applications.
+
 .. toctree::
   :maxdepth: 1

+   libbpf_overview
+   API Documentation <https://libbpf.readthedocs.io/en/latest/api.html>
+   program_types
   libbpf_naming_convention
   libbpf_build

-This is documentation for libbpf, a userspace library for loading and
-interacting with bpf programs.

-For API documentation see the `versioned API documentation site <https://libbpf.readthedocs.io/en/latest/api.html>`_.
-
-All general BPF questions, including kernel functionality, libbpf APIs and
-their application, should be sent to bpf@vger.kernel.org mailing list.
-You can `subscribe <http://vger.kernel.org/vger-lists.html#bpf>`_ to the
-mailing list search its `archive <https://lore.kernel.org/bpf/>`_.
-Please search the archive before asking new questions. It very well might
-be that this was already addressed or answered before.
+All general BPF questions, including kernel functionality, libbpf APIs and their
+application, should be sent to bpf@vger.kernel.org mailing list.  You can
+`subscribe <http://vger.kernel.org/vger-lists.html#bpf>`_ to the mailing list
+search its `archive <https://lore.kernel.org/bpf/>`_.  Please search the archive
+before asking new questions. It may be that this was already addressed or
+answered before.
--- a/docs/libbpf_naming_convention.rst
+++ b/docs/libbpf_naming_convention.rst
@@ -9,8 +9,8 @@ described here. It's recommended to follow these conventions whenever a
 new function or type is added to keep libbpf API clean and consistent.

 All types and functions provided by libbpf API should have one of the
-following prefixes: ``bpf_``, ``btf_``, ``libbpf_``, ``xsk_``,
-``btf_dump_``, ``ring_buffer_``, ``perf_buffer_``.
+following prefixes: ``bpf_``, ``btf_``, ``libbpf_``, ``btf_dump_``,
+``ring_buffer_``, ``perf_buffer_``.

 System call wrappers
 --------------------
@@ -59,15 +59,6 @@ Auxiliary functions and types that don't fit well in any of categories
 described above should have ``libbpf_`` prefix, e.g.
 ``libbpf_get_error`` or ``libbpf_prog_type_by_name``.

-AF_XDP functions
-------------------
-
-AF_XDP functions should have an ``xsk_`` prefix, e.g.
-``xsk_umem__get_data`` or ``xsk_umem__create``. The interface consists
-of both low-level ring access functions and high-level configuration
-functions. These can be mixed and matched. Note that these functions
-are not reentrant for performance reasons.
-
 ABI
 ---

@@ -92,8 +83,8 @@ This prevents from accidentally exporting a symbol, that is not supposed
 to be a part of ABI what, in turn, improves both libbpf developer- and
 user-experiences.

-ABI versionning
---------------
+ABI versioning
+--------------

 To make future ABI extensions possible libbpf ABI is versioned.
 Versioning is implemented by ``libbpf.map`` version script that is
@@ -157,7 +148,7 @@ API documentation convention
 The libbpf API is documented via comments above definitions in
 header files. These comments can be rendered by doxygen and sphinx
 for well organized html output. This section describes the
-convention in which these comments should be formated.
+convention in which these comments should be formatted.

 Here is an example from btf.h:

--- a/docs/libbpf_overview.rst
+++ b/docs/libbpf_overview.rst
@@ -0,0 +1,228 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============
+libbpf Overview
+===============
+
+libbpf is a C-based library containing a BPF loader that takes compiled BPF
+object files and prepares and loads them into the Linux kernel. libbpf takes the
+heavy lifting of loading, verifying, and attaching BPF programs to various
+kernel hooks, allowing BPF application developers to focus only on BPF program
+correctness and performance.
+
+The following are the high-level features supported by libbpf:
+
+* Provides high-level and low-level APIs for user space programs to interact
+  with BPF programs. The low-level APIs wrap all the bpf system call
+  functionality, which is useful when users need more fine-grained control
+  over the interactions between user space and BPF programs.
+* Provides overall support for the BPF object skeleton generated by bpftool.
+  The skeleton file simplifies the process for the user space programs to access
+  global variables and work with BPF programs.
+* Provides BPF-side APIS, including BPF helper definitions, BPF maps support,
+  and tracing helpers, allowing developers to simplify BPF code writing.
+* Supports BPF CO-RE mechanism, enabling BPF developers to write portable
+  BPF programs that can be compiled once and run across different kernel
+  versions.
+
+This document will delve into the above concepts in detail, providing a deeper
+understanding of the capabilities and advantages of libbpf and how it can help
+you develop BPF applications efficiently.
+
+BPF App Lifecycle and libbpf APIs
+==================================
+
+A BPF application consists of one or more BPF programs (either cooperating or
+completely independent), BPF maps, and global variables. The global
+variables are shared between all BPF programs, which allows them to cooperate on
+a common set of data. libbpf provides APIs that user space programs can use to
+manipulate the BPF programs by triggering different phases of a BPF application
+lifecycle.
+
+The following section provides a brief overview of each phase in the BPF life
+cycle:
+
+* **Open phase**: In this phase, libbpf parses the BPF
+  object file and discovers BPF maps, BPF programs, and global variables. After
+  a BPF app is opened, user space apps can make additional adjustments
+  (setting BPF program types, if necessary; pre-setting initial values for
+  global variables, etc.) before all the entities are created and loaded.
+
+* **Load phase**: In the load phase, libbpf creates BPF
+  maps, resolves various relocations, and verifies and loads BPF programs into
+  the kernel. At this point, libbpf validates all the parts of a BPF application
+  and loads the BPF program into the kernel, but no BPF program has yet been
+  executed. After the load phase, it’s possible to set up the initial BPF map
+  state without racing with the BPF program code execution.
+
+* **Attachment phase**: In this phase, libbpf
+  attaches BPF programs to various BPF hook points (e.g., tracepoints, kprobes,
+  cgroup hooks, network packet processing pipeline, etc.). During this
+  phase, BPF programs perform useful work such as processing
+  packets, or updating BPF maps and global variables that can be read from user
+  space.
+
+* **Tear down phase**: In the tear down phase,
+  libbpf detaches BPF programs and unloads them from the kernel. BPF maps are
+  destroyed, and all the resources used by the BPF app are freed.
+
+BPF Object Skeleton File
+========================
+
+BPF skeleton is an alternative interface to libbpf APIs for working with BPF
+objects. Skeleton code abstract away generic libbpf APIs to significantly
+simplify code for manipulating BPF programs from user space. Skeleton code
+includes a bytecode representation of the BPF object file, simplifying the
+process of distributing your BPF code. With BPF bytecode embedded, there are no
+extra files to deploy along with your application binary.
+
+You can generate the skeleton header file ``(.skel.h)`` for a specific object
+file by passing the BPF object to the bpftool. The generated BPF skeleton
+provides the following custom functions that correspond to the BPF lifecycle,
+each of them prefixed with the specific object name:
+
+* ``<name>__open()`` – creates and opens BPF application (``<name>`` stands for
+  the specific bpf object name)
+* ``<name>__load()`` – instantiates, loads,and verifies BPF application parts
+* ``<name>__attach()`` – attaches all auto-attachable BPF programs (it’s
+  optional, you can have more control by using libbpf APIs directly)
+* ``<name>__destroy()`` – detaches all BPF programs and
+  frees up all used resources
+
+Using the skeleton code is the recommended way to work with bpf programs. Keep
+in mind, BPF skeleton provides access to the underlying BPF object, so whatever
+was possible to do with generic libbpf APIs is still possible even when the BPF
+skeleton is used. It's an additive convenience feature, with no syscalls, and no
+cumbersome code.
+
+Other Advantages of Using Skeleton File
+---------------------------------------
+
+* BPF skeleton provides an interface for user space programs to work with BPF
+  global variables. The skeleton code memory maps global variables as a struct
+  into user space. The struct interface allows user space programs to initialize
+  BPF programs before the BPF load phase and fetch and update data from user
+  space afterward.
+
+* The ``skel.h`` file reflects the object file structure by listing out the
+  available maps, programs, etc. BPF skeleton provides direct access to all the
+  BPF maps and BPF programs as struct fields. This eliminates the need for
+  string-based lookups with ``bpf_object_find_map_by_name()`` and
+  ``bpf_object_find_program_by_name()`` APIs, reducing errors due to BPF source
+  code and user-space code getting out of sync.
+
+* The embedded bytecode representation of the object file ensures that the
+  skeleton and the BPF object file are always in sync.
+
+BPF Helpers
+===========
+
+libbpf provides BPF-side APIs that BPF programs can use to interact with the
+system. The BPF helpers definition allows developers to use them in BPF code as
+any other plain C function. For example, there are helper functions to print
+debugging messages, get the time since the system was booted, interact with BPF
+maps, manipulate network packets, etc.
+
+For a complete description of what the helpers do, the arguments they take, and
+the return value, see the `bpf-helpers
+<https://man7.org/linux/man-pages/man7/bpf-helpers.7.html>`_ man page.
+
+BPF CO-RE (Compile Once – Run Everywhere)
+=========================================
+
+BPF programs work in the kernel space and have access to kernel memory and data
+structures. One limitation that BPF applications come across is the lack of
+portability across different kernel versions and configurations. `BCC
+<https://github.com/iovisor/bcc/>`_ is one of the solutions for BPF
+portability. However, it comes with runtime overhead and a large binary size
+from embedding the compiler with the application.
+
+libbpf steps up the BPF program portability by supporting the BPF CO-RE concept.
+BPF CO-RE brings together BTF type information, libbpf, and the compiler to
+produce a single executable binary that you can run on multiple kernel versions
+and configurations.
+
+To make BPF programs portable libbpf relies on the BTF type information of the
+running kernel. Kernel also exposes this self-describing authoritative BTF
+information through ``sysfs`` at ``/sys/kernel/btf/vmlinux``.
+
+You can generate the BTF information for the running kernel with the following
+command:
+
+::
+
+  $ bpftool btf dump file /sys/kernel/btf/vmlinux format c > vmlinux.h
+
+The command generates a ``vmlinux.h`` header file with all kernel types
+(:doc:`BTF types <../btf>`) that the running kernel uses. Including
+``vmlinux.h`` in your BPF program eliminates dependency on system-wide kernel
+headers.
+
+libbpf enables portability of BPF programs by looking at the BPF program’s
+recorded BTF type and relocation information and matching them to BTF
+information (vmlinux) provided by the running kernel. libbpf then resolves and
+matches all the types and fields, and updates necessary offsets and other
+relocatable data to ensure that BPF program’s logic functions correctly for a
+specific kernel on the host. BPF CO-RE concept thus eliminates overhead
+associated with BPF development and allows developers to write portable BPF
+applications without modifications and runtime source code compilation on the
+target machine.
+
+The following code snippet shows how to read the parent field of a kernel
+``task_struct`` using BPF CO-RE and libbf. The basic helper to read a field in a
+CO-RE relocatable manner is ``bpf_core_read(dst, sz, src)``, which will read
+``sz`` bytes from the field referenced by ``src`` into the memory pointed to by
+``dst``.
+
+.. code-block:: C
+   :emphasize-lines: 6
+
+    //...
+    struct task_struct *task = (void *)bpf_get_current_task();
+    struct task_struct *parent_task;
+    int err;
+
+    err = bpf_core_read(&parent_task, sizeof(void *), &task->parent);
+    if (err) {
+      /* handle error */
+    }
+
+    /* parent_task contains the value of task->parent pointer */
+
+In the code snippet, we first get a pointer to the current ``task_struct`` using
+``bpf_get_current_task()``.  We then use ``bpf_core_read()`` to read the parent
+field of task struct into the ``parent_task`` variable. ``bpf_core_read()`` is
+just like ``bpf_probe_read_kernel()`` BPF helper, except it records information
+about the field that should be relocated on the target kernel. i.e, if the
+``parent`` field gets shifted to a different offset within
+``struct task_struct`` due to some new field added in front of it, libbpf will
+automatically adjust the actual offset to the proper value.
+
+Getting Started with libbpf
+===========================
+
+Check out the `libbpf-bootstrap <https://github.com/libbpf/libbpf-bootstrap>`_
+repository with simple examples of using libbpf to build various BPF
+applications.
+
+See also `libbpf API documentation
+<https://libbpf.readthedocs.io/en/latest/api.html>`_.
+
+libbpf and Rust
+===============
+
+If you are building BPF applications in Rust, it is recommended to use the
+`Libbpf-rs <https://github.com/libbpf/libbpf-rs>`_ library instead of bindgen
+bindings directly to libbpf. Libbpf-rs wraps libbpf functionality in
+Rust-idiomatic interfaces and provides libbpf-cargo plugin to handle BPF code
+compilation and skeleton generation. Using Libbpf-rs will make building user
+space part of the BPF application easier. Note that the BPF program themselves
+must still be written in plain C.
+
+Additional Documentation
+========================
+
+* `Program types and ELF Sections <https://libbpf.readthedocs.io/en/latest/program_types.html>`_
+* `API naming convention <https://libbpf.readthedocs.io/en/latest/libbpf_naming_convention.html>`_
+* `Building libbpf <https://libbpf.readthedocs.io/en/latest/libbpf_build.html>`_
+* `API documentation Convention <https://libbpf.readthedocs.io/en/latest/libbpf_naming_convention.html#api-documentation-convention>`_
--- a/docs/program_types.rst
+++ b/docs/program_types.rst
@@ -0,0 +1,203 @@
+.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+.. _program_types_and_elf:
+
+Program Types and ELF Sections
+==============================
+
+The table below lists the program types, their attach types where relevant and the ELF section
+names supported by libbpf for them. The ELF section names follow these rules:
+
+- ``type`` is an exact match, e.g. ``SEC("socket")``
+- ``type+`` means it can be either exact ``SEC("type")`` or well-formed ``SEC("type/extras")``
+  with a '``/``' separator between ``type`` and ``extras``.
+
+When ``extras`` are specified, they provide details of how to auto-attach the BPF program.  The
+format of ``extras`` depends on the program type, e.g. ``SEC("tracepoint/<category>/<name>")``
+for tracepoints or ``SEC("usdt/<path>:<provider>:<name>")`` for USDT probes. The extras are
+described in more detail in the footnotes.
+
+
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| Program Type                              | Attach Type                            | ELF Section Name                 | Sleepable |
+===========================================+========================================+==================================+===========+
+| ``BPF_PROG_TYPE_CGROUP_DEVICE``           | ``BPF_CGROUP_DEVICE``                  | ``cgroup/dev``                   |           |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_CGROUP_SKB``              |                                        | ``cgroup/skb``                   |           |
+                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_CGROUP_INET_EGRESS``             | ``cgroup_skb/egress``            |           |
+                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_CGROUP_INET_INGRESS``            | ``cgroup_skb/ingress``           |           |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_CGROUP_SOCKOPT``          | ``BPF_CGROUP_GETSOCKOPT``              | ``cgroup/getsockopt``            |           |
+                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_CGROUP_SETSOCKOPT``              | ``cgroup/setsockopt``            |           |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_CGROUP_SOCK_ADDR``        | ``BPF_CGROUP_INET4_BIND``              | ``cgroup/bind4``                 |           |
+                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_CGROUP_INET4_CONNECT``           | ``cgroup/connect4``              |           |
+                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_CGROUP_INET4_GETPEERNAME``       | ``cgroup/getpeername4``          |           |
+                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_CGROUP_INET4_GETSOCKNAME``       | ``cgroup/getsockname4``          |           |
+                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_CGROUP_INET6_BIND``              | ``cgroup/bind6``                 |           |
+                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_CGROUP_INET6_CONNECT``           | ``cgroup/connect6``              |           |
+                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_CGROUP_INET6_GETPEERNAME``       | ``cgroup/getpeername6``          |           |
+                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_CGROUP_INET6_GETSOCKNAME``       | ``cgroup/getsockname6``          |           |
+                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_CGROUP_UDP4_RECVMSG``            | ``cgroup/recvmsg4``              |           |
+                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_CGROUP_UDP4_SENDMSG``            | ``cgroup/sendmsg4``              |           |
+                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_CGROUP_UDP6_RECVMSG``            | ``cgroup/recvmsg6``              |           |
+                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_CGROUP_UDP6_SENDMSG``            | ``cgroup/sendmsg6``              |           |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_CGROUP_SOCK``             | ``BPF_CGROUP_INET4_POST_BIND``         | ``cgroup/post_bind4``            |           |
+                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_CGROUP_INET6_POST_BIND``         | ``cgroup/post_bind6``            |           |
+                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_CGROUP_INET_SOCK_CREATE``        | ``cgroup/sock_create``           |           |
+                                           +                                        +----------------------------------+-----------+
+|                                           |                                        | ``cgroup/sock``                  |           |
+                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_CGROUP_INET_SOCK_RELEASE``       | ``cgroup/sock_release``          |           |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_CGROUP_SYSCTL``           | ``BPF_CGROUP_SYSCTL``                  | ``cgroup/sysctl``                |           |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_EXT``                     |                                        | ``freplace+`` [#fentry]_         |           |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_FLOW_DISSECTOR``          | ``BPF_FLOW_DISSECTOR``                 | ``flow_dissector``               |           |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_KPROBE``                  |                                        | ``kprobe+`` [#kprobe]_           |           |
+                                           +                                        +----------------------------------+-----------+
+|                                           |                                        | ``kretprobe+`` [#kprobe]_        |           |
+                                           +                                        +----------------------------------+-----------+
+|                                           |                                        | ``ksyscall+`` [#ksyscall]_       |           |
+                                           +                                        +----------------------------------+-----------+
+|                                           |                                        |  ``kretsyscall+`` [#ksyscall]_   |           |
+                                           +                                        +----------------------------------+-----------+
+|                                           |                                        | ``uprobe+`` [#uprobe]_           |           |
+                                           +                                        +----------------------------------+-----------+
+|                                           |                                        | ``uprobe.s+`` [#uprobe]_         | Yes       |
+                                           +                                        +----------------------------------+-----------+
+|                                           |                                        | ``uretprobe+`` [#uprobe]_        |           |
+                                           +                                        +----------------------------------+-----------+
+|                                           |                                        | ``uretprobe.s+`` [#uprobe]_      | Yes       |
+                                           +                                        +----------------------------------+-----------+
+|                                           |                                        | ``usdt+`` [#usdt]_               |           |
+                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_TRACE_KPROBE_MULTI``             | ``kprobe.multi+`` [#kpmulti]_    |           |
+                                           +                                        +----------------------------------+-----------+
+|                                           |                                        | ``kretprobe.multi+`` [#kpmulti]_ |           |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_LIRC_MODE2``              | ``BPF_LIRC_MODE2``                     | ``lirc_mode2``                   |           |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_LSM``                     | ``BPF_LSM_CGROUP``                     | ``lsm_cgroup+``                  |           |
+                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_LSM_MAC``                        | ``lsm+`` [#lsm]_                 |           |
+                                           +                                        +----------------------------------+-----------+
+|                                           |                                        | ``lsm.s+`` [#lsm]_               | Yes       |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_LWT_IN``                  |                                        | ``lwt_in``                       |           |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_LWT_OUT``                 |                                        | ``lwt_out``                      |           |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_LWT_SEG6LOCAL``           |                                        | ``lwt_seg6local``                |           |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_LWT_XMIT``                |                                        | ``lwt_xmit``                     |           |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_PERF_EVENT``              |                                        | ``perf_event``                   |           |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE`` |                                        | ``raw_tp.w+`` [#rawtp]_          |           |
+                                           +                                        +----------------------------------+-----------+
+|                                           |                                        | ``raw_tracepoint.w+``            |           |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_RAW_TRACEPOINT``          |                                        | ``raw_tp+`` [#rawtp]_            |           |
+                                           +                                        +----------------------------------+-----------+
+|                                           |                                        | ``raw_tracepoint+``              |           |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_SCHED_ACT``               |                                        | ``action``                       |           |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_SCHED_CLS``               |                                        | ``classifier``                   |           |
+                                           +                                        +----------------------------------+-----------+
+|                                           |                                        | ``tc``                           |           |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_SK_LOOKUP``               | ``BPF_SK_LOOKUP``                      | ``sk_lookup``                    |           |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_SK_MSG``                  | ``BPF_SK_MSG_VERDICT``                 | ``sk_msg``                       |           |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_SK_REUSEPORT``            | ``BPF_SK_REUSEPORT_SELECT_OR_MIGRATE`` | ``sk_reuseport/migrate``         |           |
+                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_SK_REUSEPORT_SELECT``            | ``sk_reuseport``                 |           |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_SK_SKB``                  |                                        | ``sk_skb``                       |           |
+                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_SK_SKB_STREAM_PARSER``           | ``sk_skb/stream_parser``         |           |
+                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_SK_SKB_STREAM_VERDICT``          | ``sk_skb/stream_verdict``        |           |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_SOCKET_FILTER``           |                                        | ``socket``                       |           |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_SOCK_OPS``                | ``BPF_CGROUP_SOCK_OPS``                | ``sockops``                      |           |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_STRUCT_OPS``              |                                        | ``struct_ops+``                  |           |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_SYSCALL``                 |                                        | ``syscall``                      | Yes       |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_TRACEPOINT``              |                                        | ``tp+`` [#tp]_                   |           |
+                                           +                                        +----------------------------------+-----------+
+|                                           |                                        | ``tracepoint+`` [#tp]_           |           |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_TRACING``                 | ``BPF_MODIFY_RETURN``                  | ``fmod_ret+`` [#fentry]_         |           |
+                                           +                                        +----------------------------------+-----------+
+|                                           |                                        | ``fmod_ret.s+`` [#fentry]_       | Yes       |
+                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_TRACE_FENTRY``                   | ``fentry+`` [#fentry]_           |           |
+                                           +                                        +----------------------------------+-----------+
+|                                           |                                        | ``fentry.s+`` [#fentry]_         | Yes       |
+                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_TRACE_FEXIT``                    | ``fexit+`` [#fentry]_            |           |
+                                           +                                        +----------------------------------+-----------+
+|                                           |                                        | ``fexit.s+`` [#fentry]_          | Yes       |
+                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_TRACE_ITER``                     | ``iter+`` [#iter]_               |           |
+                                           +                                        +----------------------------------+-----------+
+|                                           |                                        | ``iter.s+`` [#iter]_             | Yes       |
+                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_TRACE_RAW_TP``                   | ``tp_btf+`` [#fentry]_           |           |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_XDP``                     | ``BPF_XDP_CPUMAP``                     | ``xdp.frags/cpumap``             |           |
+                                           +                                        +----------------------------------+-----------+
+|                                           |                                        | ``xdp/cpumap``                   |           |
+                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_XDP_DEVMAP``                     | ``xdp.frags/devmap``             |           |
+                                           +                                        +----------------------------------+-----------+
+|                                           |                                        | ``xdp/devmap``                   |           |
+                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_XDP``                            | ``xdp.frags``                    |           |
+                                           +                                        +----------------------------------+-----------+
+|                                           |                                        | ``xdp``                          |           |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+
+
+.. rubric:: Footnotes
+
+.. [#fentry] The ``fentry`` attach format is ``fentry[.s]/<function>``.
+.. [#kprobe] The ``kprobe`` attach format is ``kprobe/<function>[+<offset>]``. Valid
+             characters for ``function`` are ``a-zA-Z0-9_.`` and ``offset`` must be a valid
+             non-negative integer.
+.. [#ksyscall] The ``ksyscall`` attach format is ``ksyscall/<syscall>``.
+.. [#uprobe] The ``uprobe`` attach format is ``uprobe[.s]/<path>:<function>[+<offset>]``.
+.. [#usdt] The ``usdt`` attach format is ``usdt/<path>:<provider>:<name>``.
+.. [#kpmulti] The ``kprobe.multi`` attach format is ``kprobe.multi/<pattern>`` where ``pattern``
+              supports ``*`` and ``?`` wildcards. Valid characters for pattern are
+              ``a-zA-Z0-9_.*?``.
+.. [#lsm] The ``lsm`` attachment format is ``lsm[.s]/<hook>``.
+.. [#rawtp] The ``raw_tp`` attach format is ``raw_tracepoint[.w]/<tracepoint>``.
+.. [#tp] The ``tracepoint`` attach format is ``tracepoint/<category>/<name>``.
+.. [#iter] The ``iter`` attach format is ``iter[.s]/<struct-name>``.
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
--- a/include/uapi/linux/btf.h
+++ b/include/uapi/linux/btf.h
@@ -33,13 +33,13 @@ struct btf_type {
 	/* "info" bits arrangement
 	 * bits  0-15: vlen (e.g. # of struct's members)
 	 * bits 16-23: unused
-	 * bits 24-27: kind (e.g. int, ptr, array...etc)
-	 * bits 28-30: unused
+	 * bits 24-28: kind (e.g. int, ptr, array...etc)
+	 * bits 29-30: unused
 	 * bit     31: kind_flag, currently used by
-	 *             struct, union and fwd
+	 *             struct, union, enum, fwd and enum64
 	 */
 	__u32 info;
-	/* "size" is used by INT, ENUM, STRUCT, UNION and DATASEC.
+	/* "size" is used by INT, ENUM, STRUCT, UNION, DATASEC and ENUM64.
 	 * "size" tells the size of the type it is describing.
 	 *
 	 * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT,
@@ -63,7 +63,7 @@ enum {
 	BTF_KIND_ARRAY		= 3,	/* Array	*/
 	BTF_KIND_STRUCT		= 4,	/* Struct	*/
 	BTF_KIND_UNION		= 5,	/* Union	*/
-	BTF_KIND_ENUM		= 6,	/* Enumeration	*/
+	BTF_KIND_ENUM		= 6,	/* Enumeration up to 32-bit values */
 	BTF_KIND_FWD		= 7,	/* Forward	*/
 	BTF_KIND_TYPEDEF	= 8,	/* Typedef	*/
 	BTF_KIND_VOLATILE	= 9,	/* Volatile	*/
@@ -76,6 +76,7 @@ enum {
 	BTF_KIND_FLOAT		= 16,	/* Floating point	*/
 	BTF_KIND_DECL_TAG	= 17,	/* Decl Tag */
 	BTF_KIND_TYPE_TAG	= 18,	/* Type Tag */
+	BTF_KIND_ENUM64		= 19,	/* Enumeration up to 64-bit values */

 	NR_BTF_KINDS,
 	BTF_KIND_MAX		= NR_BTF_KINDS - 1,
@@ -186,4 +187,14 @@ struct btf_decl_tag {
       __s32   component_idx;
 };

+/* BTF_KIND_ENUM64 is followed by multiple "struct btf_enum64".
+ * The exact number of btf_enum64 is stored in the vlen (of the
+ * info in "struct btf_type").
+ */
+struct btf_enum64 {
+	__u32	name_off;
+	__u32	val_lo32;
+	__u32	val_hi32;
+};
+
 #endif /* _UAPI__LINUX_BTF_H__ */
--- a/include/uapi/linux/fcntl.h
+++ b/include/uapi/linux/fcntl.h
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_LINUX_FCNTL_H
+#define _UAPI_LINUX_FCNTL_H
+
+#include <asm/fcntl.h>
+#include <linux/openat2.h>
+
+#define F_SETLEASE	(F_LINUX_SPECIFIC_BASE + 0)
+#define F_GETLEASE	(F_LINUX_SPECIFIC_BASE + 1)
+
+/*
+ * Cancel a blocking posix lock; internal use only until we expose an
+ * asynchronous lock api to userspace:
+ */
+#define F_CANCELLK	(F_LINUX_SPECIFIC_BASE + 5)
+
+/* Create a file descriptor with FD_CLOEXEC set. */
+#define F_DUPFD_CLOEXEC	(F_LINUX_SPECIFIC_BASE + 6)
+
+/*
+ * Request nofications on a directory.
+ * See below for events that may be notified.
+ */
+#define F_NOTIFY	(F_LINUX_SPECIFIC_BASE+2)
+
+/*
+ * Set and get of pipe page size array
+ */
+#define F_SETPIPE_SZ	(F_LINUX_SPECIFIC_BASE + 7)
+#define F_GETPIPE_SZ	(F_LINUX_SPECIFIC_BASE + 8)
+
+/*
+ * Set/Get seals
+ */
+#define F_ADD_SEALS	(F_LINUX_SPECIFIC_BASE + 9)
+#define F_GET_SEALS	(F_LINUX_SPECIFIC_BASE + 10)
+
+/*
+ * Types of seals
+ */
+#define F_SEAL_SEAL	0x0001	/* prevent further seals from being set */
+#define F_SEAL_SHRINK	0x0002	/* prevent file from shrinking */
+#define F_SEAL_GROW	0x0004	/* prevent file from growing */
+#define F_SEAL_WRITE	0x0008	/* prevent writes */
+#define F_SEAL_FUTURE_WRITE	0x0010  /* prevent future writes while mapped */
+#define F_SEAL_EXEC	0x0020  /* prevent chmod modifying exec bits */
+/* (1U << 31) is reserved for signed error codes */
+
+/*
+ * Set/Get write life time hints. {GET,SET}_RW_HINT operate on the
+ * underlying inode, while {GET,SET}_FILE_RW_HINT operate only on
+ * the specific file.
+ */
+#define F_GET_RW_HINT		(F_LINUX_SPECIFIC_BASE + 11)
+#define F_SET_RW_HINT		(F_LINUX_SPECIFIC_BASE + 12)
+#define F_GET_FILE_RW_HINT	(F_LINUX_SPECIFIC_BASE + 13)
+#define F_SET_FILE_RW_HINT	(F_LINUX_SPECIFIC_BASE + 14)
+
+/*
+ * Valid hint values for F_{GET,SET}_RW_HINT. 0 is "not set", or can be
+ * used to clear any hints previously set.
+ */
+#define RWH_WRITE_LIFE_NOT_SET	0
+#define RWH_WRITE_LIFE_NONE	1
+#define RWH_WRITE_LIFE_SHORT	2
+#define RWH_WRITE_LIFE_MEDIUM	3
+#define RWH_WRITE_LIFE_LONG	4
+#define RWH_WRITE_LIFE_EXTREME	5
+
+/*
+ * The originally introduced spelling is remained from the first
+ * versions of the patch set that introduced the feature, see commit
+ * v4.13-rc1~212^2~51.
+ */
+#define RWF_WRITE_LIFE_NOT_SET	RWH_WRITE_LIFE_NOT_SET
+
+/*
+ * Types of directory notifications that may be requested.
+ */
+#define DN_ACCESS	0x00000001	/* File accessed */
+#define DN_MODIFY	0x00000002	/* File modified */
+#define DN_CREATE	0x00000004	/* File created */
+#define DN_DELETE	0x00000008	/* File removed */
+#define DN_RENAME	0x00000010	/* File renamed */
+#define DN_ATTRIB	0x00000020	/* File changed attibutes */
+#define DN_MULTISHOT	0x80000000	/* Don't remove notifier */
+
+/*
+ * The constants AT_REMOVEDIR and AT_EACCESS have the same value.  AT_EACCESS is
+ * meaningful only to faccessat, while AT_REMOVEDIR is meaningful only to
+ * unlinkat.  The two functions do completely different things and therefore,
+ * the flags can be allowed to overlap.  For example, passing AT_REMOVEDIR to
+ * faccessat would be undefined behavior and thus treating it equivalent to
+ * AT_EACCESS is valid undefined behavior.
+ */
+#define AT_FDCWD		-100    /* Special value used to indicate
+                                           openat should use the current
+                                           working directory. */
+#define AT_SYMLINK_NOFOLLOW	0x100   /* Do not follow symbolic links.  */
+#define AT_EACCESS		0x200	/* Test access permitted for
+                                           effective IDs, not real IDs.  */
+#define AT_REMOVEDIR		0x200   /* Remove directory instead of
+                                           unlinking file.  */
+#define AT_SYMLINK_FOLLOW	0x400   /* Follow symbolic links.  */
+#define AT_NO_AUTOMOUNT		0x800	/* Suppress terminal automount traversal */
+#define AT_EMPTY_PATH		0x1000	/* Allow empty relative pathname */
+
+#define AT_STATX_SYNC_TYPE	0x6000	/* Type of synchronisation required from statx() */
+#define AT_STATX_SYNC_AS_STAT	0x0000	/* - Do whatever stat() does */
+#define AT_STATX_FORCE_SYNC	0x2000	/* - Force the attributes to be sync'd with the server */
+#define AT_STATX_DONT_SYNC	0x4000	/* - Don't sync attributes with the server */
+
+#define AT_RECURSIVE		0x8000	/* Apply to the entire subtree */
+
+#endif /* _UAPI_LINUX_FCNTL_H */
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -348,6 +348,8 @@ enum {
 	IFLA_PARENT_DEV_NAME,
 	IFLA_PARENT_DEV_BUS_NAME,
 	IFLA_GRO_MAX_SIZE,
+	IFLA_TSO_MAX_SIZE,
+	IFLA_TSO_MAX_SEGS,

 	__IFLA_MAX
 };
@@ -603,6 +605,7 @@ enum {
 	IFLA_MACVLAN_MACADDR_COUNT,
 	IFLA_MACVLAN_BC_QUEUE_LEN,
 	IFLA_MACVLAN_BC_QUEUE_LEN_USED,
+	IFLA_MACVLAN_BC_CUTOFF,
 	__IFLA_MACVLAN_MAX,
 };

@@ -671,6 +674,7 @@ enum {
 	IFLA_XFRM_UNSPEC,
 	IFLA_XFRM_LINK,
 	IFLA_XFRM_IF_ID,
+	IFLA_XFRM_COLLECT_METADATA,
 	__IFLA_XFRM_MAX
 };

@@ -860,6 +864,7 @@ enum {
 	IFLA_BOND_PEER_NOTIF_DELAY,
 	IFLA_BOND_AD_LACP_ACTIVE,
 	IFLA_BOND_MISSED_MAX,
+	IFLA_BOND_NS_IP6_TARGET,
 	__IFLA_BOND_MAX,
 };

@@ -887,6 +892,7 @@ enum {
 	IFLA_BOND_SLAVE_AD_AGGREGATOR_ID,
 	IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE,
 	IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE,
+	IFLA_BOND_SLAVE_PRIO,
 	__IFLA_BOND_SLAVE_MAX,
 };

--- a/include/uapi/linux/netdev.h
+++ b/include/uapi/linux/netdev.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* Do not edit directly, auto-generated from: */
+/*	Documentation/netlink/specs/netdev.yaml */
+/* YNL-GEN uapi header */
+
+#ifndef _UAPI_LINUX_NETDEV_H
+#define _UAPI_LINUX_NETDEV_H
+
+#define NETDEV_FAMILY_NAME	"netdev"
+#define NETDEV_FAMILY_VERSION	1
+
+/**
+ * enum netdev_xdp_act
+ * @NETDEV_XDP_ACT_BASIC: XDP feautues set supported by all drivers
+ *   (XDP_ABORTED, XDP_DROP, XDP_PASS, XDP_TX)
+ * @NETDEV_XDP_ACT_REDIRECT: The netdev supports XDP_REDIRECT
+ * @NETDEV_XDP_ACT_NDO_XMIT: This feature informs if netdev implements
+ *   ndo_xdp_xmit callback.
+ * @NETDEV_XDP_ACT_XSK_ZEROCOPY: This feature informs if netdev supports AF_XDP
+ *   in zero copy mode.
+ * @NETDEV_XDP_ACT_HW_OFFLOAD: This feature informs if netdev supports XDP hw
+ *   offloading.
+ * @NETDEV_XDP_ACT_RX_SG: This feature informs if netdev implements non-linear
+ *   XDP buffer support in the driver napi callback.
+ * @NETDEV_XDP_ACT_NDO_XMIT_SG: This feature informs if netdev implements
+ *   non-linear XDP buffer support in ndo_xdp_xmit callback.
+ */
+enum netdev_xdp_act {
+	NETDEV_XDP_ACT_BASIC = 1,
+	NETDEV_XDP_ACT_REDIRECT = 2,
+	NETDEV_XDP_ACT_NDO_XMIT = 4,
+	NETDEV_XDP_ACT_XSK_ZEROCOPY = 8,
+	NETDEV_XDP_ACT_HW_OFFLOAD = 16,
+	NETDEV_XDP_ACT_RX_SG = 32,
+	NETDEV_XDP_ACT_NDO_XMIT_SG = 64,
+
+	NETDEV_XDP_ACT_MASK = 127,
+};
+
+enum {
+	NETDEV_A_DEV_IFINDEX = 1,
+	NETDEV_A_DEV_PAD,
+	NETDEV_A_DEV_XDP_FEATURES,
+
+	__NETDEV_A_DEV_MAX,
+	NETDEV_A_DEV_MAX = (__NETDEV_A_DEV_MAX - 1)
+};
+
+enum {
+	NETDEV_CMD_DEV_GET = 1,
+	NETDEV_CMD_DEV_ADD_NTF,
+	NETDEV_CMD_DEV_DEL_NTF,
+	NETDEV_CMD_DEV_CHANGE_NTF,
+
+	__NETDEV_CMD_MAX,
+	NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1)
+};
+
+#define NETDEV_MCGRP_MGMT	"mgmt"
+
+#endif /* _UAPI_LINUX_NETDEV_H */
--- a/include/uapi/linux/openat2.h
+++ b/include/uapi/linux/openat2.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_LINUX_OPENAT2_H
+#define _UAPI_LINUX_OPENAT2_H
+
+#include <linux/types.h>
+
+/*
+ * Arguments for how openat2(2) should open the target path. If only @flags and
+ * @mode are non-zero, then openat2(2) operates very similarly to openat(2).
+ *
+ * However, unlike openat(2), unknown or invalid bits in @flags result in
+ * -EINVAL rather than being silently ignored. @mode must be zero unless one of
+ * {O_CREAT, O_TMPFILE} are set.
+ *
+ * @flags: O_* flags.
+ * @mode: O_CREAT/O_TMPFILE file mode.
+ * @resolve: RESOLVE_* flags.
+ */
+struct open_how {
+	__u64 flags;
+	__u64 mode;
+	__u64 resolve;
+};
+
+/* how->resolve flags for openat2(2). */
+#define RESOLVE_NO_XDEV		0x01 /* Block mount-point crossings
+					(includes bind-mounts). */
+#define RESOLVE_NO_MAGICLINKS	0x02 /* Block traversal through procfs-style
+					"magic-links". */
+#define RESOLVE_NO_SYMLINKS	0x04 /* Block traversal through all symlinks
+					(implies OEXT_NO_MAGICLINKS) */
+#define RESOLVE_BENEATH		0x08 /* Block "lexical" trickery like
+					"..", symlinks, and absolute
+					paths which escape the dirfd. */
+#define RESOLVE_IN_ROOT		0x10 /* Make all jumps to "/" and ".."
+					be scoped inside the dirfd
+					(similar to chroot(2)). */
+#define RESOLVE_CACHED		0x20 /* Only complete if resolution can be
+					completed through cached lookup. May
+					return -EAGAIN if that's not
+					possible. */
+
+#endif /* _UAPI_LINUX_OPENAT2_H */
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -164,8 +164,6 @@ enum perf_event_sample_format {
 	PERF_SAMPLE_WEIGHT_STRUCT		= 1U << 24,

 	PERF_SAMPLE_MAX = 1U << 25,		/* non-ABI */
-
-	__PERF_SAMPLE_CALLCHAIN_EARLY		= 1ULL << 63, /* non-ABI; internal use */
 };

 #define PERF_SAMPLE_WEIGHT_TYPE	(PERF_SAMPLE_WEIGHT | PERF_SAMPLE_WEIGHT_STRUCT)
@@ -204,6 +202,8 @@ enum perf_branch_sample_type_shift {

 	PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT	= 17, /* save low level index of raw branch records */

+	PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT	= 18, /* save privilege mode */
+
 	PERF_SAMPLE_BRANCH_MAX_SHIFT		/* non-ABI */
 };

@@ -233,6 +233,8 @@ enum perf_branch_sample_type {

 	PERF_SAMPLE_BRANCH_HW_INDEX	= 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT,

+	PERF_SAMPLE_BRANCH_PRIV_SAVE	= 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT,
+
 	PERF_SAMPLE_BRANCH_MAX		= 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT,
 };

@@ -251,9 +253,50 @@ enum {
 	PERF_BR_SYSRET		= 8,	/* syscall return */
 	PERF_BR_COND_CALL	= 9,	/* conditional function call */
 	PERF_BR_COND_RET	= 10,	/* conditional function return */
+	PERF_BR_ERET		= 11,	/* exception return */
+	PERF_BR_IRQ		= 12,	/* irq */
+	PERF_BR_SERROR		= 13,	/* system error */
+	PERF_BR_NO_TX		= 14,	/* not in transaction */
+	PERF_BR_EXTEND_ABI	= 15,	/* extend ABI */
 	PERF_BR_MAX,
 };

+/*
+ * Common branch speculation outcome classification
+ */
+enum {
+	PERF_BR_SPEC_NA			= 0,	/* Not available */
+	PERF_BR_SPEC_WRONG_PATH		= 1,	/* Speculative but on wrong path */
+	PERF_BR_NON_SPEC_CORRECT_PATH	= 2,	/* Non-speculative but on correct path */
+	PERF_BR_SPEC_CORRECT_PATH	= 3,	/* Speculative and on correct path */
+	PERF_BR_SPEC_MAX,
+};
+
+enum {
+	PERF_BR_NEW_FAULT_ALGN		= 0,    /* Alignment fault */
+	PERF_BR_NEW_FAULT_DATA		= 1,    /* Data fault */
+	PERF_BR_NEW_FAULT_INST		= 2,    /* Inst fault */
+	PERF_BR_NEW_ARCH_1		= 3,    /* Architecture specific */
+	PERF_BR_NEW_ARCH_2		= 4,    /* Architecture specific */
+	PERF_BR_NEW_ARCH_3		= 5,    /* Architecture specific */
+	PERF_BR_NEW_ARCH_4		= 6,    /* Architecture specific */
+	PERF_BR_NEW_ARCH_5		= 7,    /* Architecture specific */
+	PERF_BR_NEW_MAX,
+};
+
+enum {
+	PERF_BR_PRIV_UNKNOWN	= 0,
+	PERF_BR_PRIV_USER	= 1,
+	PERF_BR_PRIV_KERNEL	= 2,
+	PERF_BR_PRIV_HV		= 3,
+};
+
+#define PERF_BR_ARM64_FIQ		PERF_BR_NEW_ARCH_1
+#define PERF_BR_ARM64_DEBUG_HALT	PERF_BR_NEW_ARCH_2
+#define PERF_BR_ARM64_DEBUG_EXIT	PERF_BR_NEW_ARCH_3
+#define PERF_BR_ARM64_DEBUG_INST	PERF_BR_NEW_ARCH_4
+#define PERF_BR_ARM64_DEBUG_DATA	PERF_BR_NEW_ARCH_5
+
 #define PERF_SAMPLE_BRANCH_PLM_ALL \
 	(PERF_SAMPLE_BRANCH_USER|\
 	 PERF_SAMPLE_BRANCH_KERNEL|\
@@ -299,6 +342,7 @@ enum {
 *	  { u64		time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED
 *	  { u64		time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING
 *	  { u64		id;           } && PERF_FORMAT_ID
+ *	  { u64		lost;         } && PERF_FORMAT_LOST
 *	} && !PERF_FORMAT_GROUP
 *
 *	{ u64		nr;
@@ -306,6 +350,7 @@ enum {
 *	  { u64		time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING
 *	  { u64		value;
 *	    { u64	id;           } && PERF_FORMAT_ID
+ *	    { u64	lost;         } && PERF_FORMAT_LOST
 *	  }		cntr[nr];
 *	} && PERF_FORMAT_GROUP
 * };
@@ -315,8 +360,9 @@ enum perf_event_read_format {
 	PERF_FORMAT_TOTAL_TIME_RUNNING		= 1U << 1,
 	PERF_FORMAT_ID				= 1U << 2,
 	PERF_FORMAT_GROUP			= 1U << 3,
+	PERF_FORMAT_LOST			= 1U << 4,

-	PERF_FORMAT_MAX = 1U << 4,		/* non-ABI */
+	PERF_FORMAT_MAX = 1U << 5,		/* non-ABI */
 };

 #define PERF_ATTR_SIZE_VER0	64	/* sizeof first published struct */
@@ -328,6 +374,7 @@ enum perf_event_read_format {
 #define PERF_ATTR_SIZE_VER5	112	/* add: aux_watermark */
 #define PERF_ATTR_SIZE_VER6	120	/* add: aux_sample_size */
 #define PERF_ATTR_SIZE_VER7	128	/* add: sig_data */
+#define PERF_ATTR_SIZE_VER8	136	/* add: config3 */

 /*
 * Hardware event_id to monitor via a performance monitoring event:
@@ -465,8 +512,12 @@ struct perf_event_attr {
 	/*
 	 * User provided data if sigtrap=1, passed back to user via
 	 * siginfo_t::si_perf_data, e.g. to permit user to identify the event.
+	 * Note, siginfo_t::si_perf_data is long-sized, and sig_data will be
+	 * truncated accordingly on 32 bit architectures.
 	 */
 	__u64	sig_data;
+
+	__u64	config3; /* extension of config2 */
 };

 /*
@@ -487,7 +538,7 @@ struct perf_event_query_bpf {
 	/*
 	 * User provided buffer to store program ids
 	 */
-	__u32	ids[0];
+	__u32	ids[];
 };

 /*
@@ -1288,7 +1339,9 @@ union perf_mem_data_src {
 #define PERF_MEM_LVLNUM_L2	0x02 /* L2 */
 #define PERF_MEM_LVLNUM_L3	0x03 /* L3 */
 #define PERF_MEM_LVLNUM_L4	0x04 /* L4 */
-/* 5-0xa available */
+/* 5-0x8 available */
+#define PERF_MEM_LVLNUM_CXL	0x09 /* CXL */
+#define PERF_MEM_LVLNUM_IO	0x0a /* I/O */
 #define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */
 #define PERF_MEM_LVLNUM_LFB	0x0c /* LFB */
 #define PERF_MEM_LVLNUM_RAM	0x0d /* RAM */
@@ -1306,7 +1359,7 @@ union perf_mem_data_src {
 #define PERF_MEM_SNOOP_SHIFT	19

 #define PERF_MEM_SNOOPX_FWD	0x01 /* forward */
-/* 1 free */
+#define PERF_MEM_SNOOPX_PEER	0x02 /* xfer from peer */
 #define PERF_MEM_SNOOPX_SHIFT  38

 /* locked instruction */
@@ -1356,6 +1409,7 @@ union perf_mem_data_src {
 *     abort: aborting a hardware transaction
 *    cycles: cycles from last branch (or 0 if not supported)
 *      type: branch type
+ *      spec: branch speculation info (or 0 if not supported)
 */
 struct perf_branch_entry {
 	__u64	from;
@@ -1366,7 +1420,10 @@ struct perf_branch_entry {
 		abort:1,    /* transaction abort */
 		cycles:16,  /* cycle count to last branch */
 		type:4,     /* branch type */
-		reserved:40;
+		spec:2,     /* branch speculation info */
+		new_type:4, /* additional branch type */
+		priv:3,     /* privilege level */
+		reserved:31;
 };

 union perf_sample_weight {
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -180,7 +180,7 @@ struct tc_u32_sel {

 	short			hoff;
 	__be32			hmask;
-	struct tc_u32_key	keys[0];
+	struct tc_u32_key	keys[];
 };

 struct tc_u32_mark {
@@ -192,7 +192,7 @@ struct tc_u32_mark {
 struct tc_u32_pcnt {
 	__u64 rcnt;
 	__u64 rhit;
-	__u64 kcnts[0];
+	__u64 kcnts[];
 };

 /* Flags */
--- a/scripts/build-fuzzers.sh
+++ b/scripts/build-fuzzers.sh
@@ -17,6 +17,24 @@ mkdir -p "$OUT"

 export LIB_FUZZING_ENGINE=${LIB_FUZZING_ENGINE:--fsanitize=fuzzer}

+# libelf is compiled with _FORTIFY_SOURCE by default and it
+# isn't compatible with MSan. It was borrowed
+# from https://github.com/google/oss-fuzz/pull/7422
+if [[ "$SANITIZER" == memory ]]; then
+    CFLAGS+=" -U_FORTIFY_SOURCE"
+    CXXFLAGS+=" -U_FORTIFY_SOURCE"
+fi
+
+# The alignment check is turned off by default on OSS-Fuzz/CFLite so it should be
+# turned on explicitly there. It was borrowed from
+# https://github.com/google/oss-fuzz/pull/7092
+if [[ "$SANITIZER" == undefined ]]; then
+    additional_ubsan_checks=alignment
+    UBSAN_FLAGS="-fsanitize=$additional_ubsan_checks -fno-sanitize-recover=$additional_ubsan_checks"
+    CFLAGS+=" $UBSAN_FLAGS"
+    CXXFLAGS+=" $UBSAN_FLAGS"
+fi
+
 # Ideally libbelf should be built using release tarballs available
 # at https://sourceware.org/elfutils/ftp/. Unfortunately sometimes they
 # fail to compile (for example, elfutils-0.185 fails to compile with LDFLAGS enabled
@@ -26,19 +44,25 @@ rm -rf elfutils
 git clone git://sourceware.org/git/elfutils.git
 (
 cd elfutils
-git checkout 983e86fd89e8bf02f2d27ba5dce5bf078af4ceda
+git checkout 67a187d4c1790058fc7fd218317851cb68bb087c
 git log --oneline -1

 # ASan isn't compatible with -Wl,--no-undefined: https://github.com/google/sanitizers/issues/380
-find -name Makefile.am | xargs sed -i 's/,--no-undefined//'
+sed -i 's/^\(NO_UNDEFINED=\).*/\1/' configure.ac

 # ASan isn't compatible with -Wl,-z,defs either:
 # https://clang.llvm.org/docs/AddressSanitizer.html#usage
 sed -i 's/^\(ZDEFS_LDFLAGS=\).*/\1/' configure.ac

+if [[ "$SANITIZER" == undefined ]]; then
+    # That's basicaly what --enable-sanitize-undefined does to turn off unaligned access
+    # elfutils heavily relies on on i386/x86_64 but without changing compiler flags along the way
+    sed -i 's/\(check_undefined_val\)=[0-9]/\1=1/' configure.ac
+fi

 autoreconf -i -f
 if ! ./configure --enable-maintainer-mode --disable-debuginfod --disable-libdebuginfod \
+            --disable-demangler --without-bzlib --without-lzma --without-zstd \
 	    CC="$CC" CFLAGS="-Wno-error $CFLAGS" CXX="$CXX" CXXFLAGS="-Wno-error $CXXFLAGS" LDFLAGS="$CFLAGS"; then
    cat config.log
    exit 1
--- a/scripts/sync-kernel.sh
+++ b/scripts/sync-kernel.sh
@@ -42,8 +42,11 @@ PATH_MAP=(									\
 	[tools/include/uapi/linux/bpf_common.h]=include/uapi/linux/bpf_common.h	\
 	[tools/include/uapi/linux/bpf.h]=include/uapi/linux/bpf.h		\
 	[tools/include/uapi/linux/btf.h]=include/uapi/linux/btf.h		\
+	[tools/include/uapi/linux/fcntl.h]=include/uapi/linux/fcntl.h		\
+	[tools/include/uapi/linux/openat2.h]=include/uapi/linux/openat2.h	\
 	[tools/include/uapi/linux/if_link.h]=include/uapi/linux/if_link.h	\
 	[tools/include/uapi/linux/if_xdp.h]=include/uapi/linux/if_xdp.h		\
+	[tools/include/uapi/linux/netdev.h]=include/uapi/linux/netdev.h		\
 	[tools/include/uapi/linux/netlink.h]=include/uapi/linux/netlink.h	\
 	[tools/include/uapi/linux/pkt_cls.h]=include/uapi/linux/pkt_cls.h	\
 	[tools/include/uapi/linux/pkt_sched.h]=include/uapi/linux/pkt_sched.h	\
@@ -51,8 +54,8 @@ PATH_MAP=(									\
 	[Documentation/bpf/libbpf]=docs						\
 )

-LIBBPF_PATHS="${!PATH_MAP[@]} :^tools/lib/bpf/Makefile :^tools/lib/bpf/Build :^tools/lib/bpf/.gitignore :^tools/include/tools/libc_compat.h"
-LIBBPF_VIEW_PATHS="${PATH_MAP[@]}"
+LIBBPF_PATHS=("${!PATH_MAP[@]}" ":^tools/lib/bpf/Makefile" ":^tools/lib/bpf/Build" ":^tools/lib/bpf/.gitignore" ":^tools/include/tools/libc_compat.h")
+LIBBPF_VIEW_PATHS=("${PATH_MAP[@]}")
 LIBBPF_VIEW_EXCLUDE_REGEX='^src/(Makefile|Build|test_libbpf\.c|bpf_helper_defs\.h|\.gitignore)$|^docs/(\.gitignore|api\.rst|conf\.py)$|^docs/sphinx/.*'
 LINUX_VIEW_EXCLUDE_REGEX='^include/tools/libc_compat.h$'

@@ -85,7 +88,9 @@ commit_desc()
 # $2 - paths filter
 commit_signature()
 {
-	git show --pretty='("%s")|%aI|%b' --shortstat $1 -- ${2-.} | tr '\n' '|'
+	local ref=$1
+	shift
+	git show --pretty='("%s")|%aI|%b' --shortstat $ref -- "${@-.}" | tr '\n' '|'
 }

 # Cherry-pick commits touching libbpf-related files
@@ -104,7 +109,7 @@ cherry_pick_commits()
 	local libbpf_conflict_cnt
 	local desc

-	new_commits=$(git rev-list --no-merges --topo-order --reverse ${baseline_tag}..${tip_tag} ${LIBBPF_PATHS[@]})
+	new_commits=$(git rev-list --no-merges --topo-order --reverse ${baseline_tag}..${tip_tag} -- "${LIBBPF_PATHS[@]}")
 	for new_commit in ${new_commits}; do
 		desc="$(commit_desc ${new_commit})"
 		signature="$(commit_signature ${new_commit} "${LIBBPF_PATHS[@]}")"
@@ -138,7 +143,7 @@ cherry_pick_commits()
 		echo "Picking '${desc}'..."
 		if ! git cherry-pick ${new_commit} &>/dev/null; then
 			echo "Warning! Cherry-picking '${desc} failed, checking if it's non-libbpf files causing problems..."
-			libbpf_conflict_cnt=$(git diff --name-only --diff-filter=U -- ${LIBBPF_PATHS[@]} | wc -l)
+			libbpf_conflict_cnt=$(git diff --name-only --diff-filter=U -- "${LIBBPF_PATHS[@]}" | wc -l)
 			conflict_cnt=$(git diff --name-only | wc -l)
 			prompt_resolution=1

@@ -257,7 +262,7 @@ if ((${COMMIT_CNT} <= 0)); then
 fi

 # Exclude baseline commit and generate nice cover letter with summary
-git format-patch ${SQUASH_BASE_TAG}..${SQUASH_TIP_TAG} --cover-letter -o ${TMP_DIR}/patches
+git format-patch --no-signature ${SQUASH_BASE_TAG}..${SQUASH_TIP_TAG} --cover-letter -o ${TMP_DIR}/patches

 # Now is time to re-apply libbpf-related linux patches to libbpf repo
 cd_to ${LIBBPF_REPO}
@@ -284,7 +289,7 @@ cd_to ${LIBBPF_REPO}
 helpers_changes=$(git status --porcelain src/bpf_helper_defs.h | wc -l)
 if ((${helpers_changes} == 1)); then
 	git add src/bpf_helper_defs.h
-	git commit -m "sync: auto-generate latest BPF helpers
+	git commit -s -m "sync: auto-generate latest BPF helpers

 Latest changes to BPF helper definitions.
 " -- src/bpf_helper_defs.h
@@ -306,7 +311,7 @@ Baseline bpf-next commit:   ${BASELINE_COMMIT}\n\
 Checkpoint bpf-next commit: ${TIP_COMMIT}\n\
 Baseline bpf commit:        ${BPF_BASELINE_COMMIT}\n\
 Checkpoint bpf commit:      ${BPF_TIP_COMMIT}/" |				      \
-git commit --file=-
+git commit -s --file=-

 echo "SUCCESS! ${COMMIT_CNT} commits synced."

@@ -316,10 +321,10 @@ cd_to ${LINUX_REPO}
 git checkout -b ${VIEW_TAG} ${TIP_COMMIT}
 FILTER_BRANCH_SQUELCH_WARNING=1 git filter-branch -f --tree-filter "${LIBBPF_TREE_FILTER}" ${VIEW_TAG}^..${VIEW_TAG}
 FILTER_BRANCH_SQUELCH_WARNING=1 git filter-branch -f --subdirectory-filter __libbpf ${VIEW_TAG}^..${VIEW_TAG}
-git ls-files -- ${LIBBPF_VIEW_PATHS[@]} | grep -v -E "${LINUX_VIEW_EXCLUDE_REGEX}" > ${TMP_DIR}/linux-view.ls
+git ls-files -- "${LIBBPF_VIEW_PATHS[@]}" | grep -v -E "${LINUX_VIEW_EXCLUDE_REGEX}" > ${TMP_DIR}/linux-view.ls

 cd_to ${LIBBPF_REPO}
-git ls-files -- ${LIBBPF_VIEW_PATHS[@]} | grep -v -E "${LIBBPF_VIEW_EXCLUDE_REGEX}" > ${TMP_DIR}/github-view.ls
+git ls-files -- "${LIBBPF_VIEW_PATHS[@]}" | grep -v -E "${LIBBPF_VIEW_EXCLUDE_REGEX}" > ${TMP_DIR}/github-view.ls

 echo "Comparing list of files..."
 diff -u ${TMP_DIR}/linux-view.ls ${TMP_DIR}/github-view.ls
--- a/src/Makefile
+++ b/src/Makefile
@@ -8,10 +8,24 @@ else
 	msg = @printf '  %-8s %s%s\n' "$(1)" "$(2)" "$(if $(3), $(3))";
 endif

-LIBBPF_VERSION := $(shell \
-	grep -oE '^LIBBPF_([0-9.]+)' libbpf.map | \
-	sort -rV | head -n1 | cut -d'_' -f2)
-LIBBPF_MAJOR_VERSION := $(firstword $(subst ., ,$(LIBBPF_VERSION)))
+LIBBPF_MAJOR_VERSION := 1
+LIBBPF_MINOR_VERSION := 3
+LIBBPF_PATCH_VERSION := 0
+LIBBPF_VERSION := $(LIBBPF_MAJOR_VERSION).$(LIBBPF_MINOR_VERSION).$(LIBBPF_PATCH_VERSION)
+LIBBPF_MAJMIN_VERSION := $(LIBBPF_MAJOR_VERSION).$(LIBBPF_MINOR_VERSION).0
+LIBBPF_MAP_VERSION := $(shell grep -oE '^LIBBPF_([0-9.]+)' libbpf.map | sort -rV | head -n1 | cut -d'_' -f2)
+ifneq ($(LIBBPF_MAJMIN_VERSION), $(LIBBPF_MAP_VERSION))
+$(error Libbpf release ($(LIBBPF_VERSION)) and map ($(LIBBPF_MAP_VERSION)) versions are out of sync!)
+endif
+
+define allow-override
+  $(if $(or $(findstring environment,$(origin $(1))),\
+            $(findstring command line,$(origin $(1)))),,\
+    $(eval $(1) = $(2)))
+endef
+
+$(call allow-override,CC,$(CROSS_COMPILE)cc)
+$(call allow-override,LD,$(CROSS_COMPILE)ld)

 TOPDIR = ..

@@ -21,8 +35,9 @@ ALL_CFLAGS := $(INCLUDES)
 SHARED_CFLAGS += -fPIC -fvisibility=hidden -DSHARED

 CFLAGS ?= -g -O2 -Werror -Wall -std=gnu89
-ALL_CFLAGS += $(CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
-ALL_LDFLAGS += $(LDFLAGS)
+ALL_CFLAGS += $(CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 $(EXTRA_CFLAGS)
+ALL_LDFLAGS += $(LDFLAGS) $(EXTRA_LDFLAGS)
+
 ifdef NO_PKG_CONFIG
 	ALL_LDFLAGS += -lelf -lz
 else
@@ -35,9 +50,9 @@ OBJDIR ?= .
 SHARED_OBJDIR := $(OBJDIR)/sharedobjs
 STATIC_OBJDIR := $(OBJDIR)/staticobjs
 OBJS := bpf.o btf.o libbpf.o libbpf_errno.o netlink.o \
-	nlattr.o str_error.o libbpf_probes.o bpf_prog_linfo.o xsk.o \
+	nlattr.o str_error.o libbpf_probes.o bpf_prog_linfo.o \
 	btf_dump.o hashmap.o ringbuf.o strset.o linker.o gen_loader.o \
-	relo_core.o
+	relo_core.o usdt.o zip.o
 SHARED_OBJS := $(addprefix $(SHARED_OBJDIR)/,$(OBJS))
 STATIC_OBJS := $(addprefix $(STATIC_OBJDIR)/,$(OBJS))

@@ -49,9 +64,10 @@ ifndef BUILD_STATIC_ONLY
 	VERSION_SCRIPT := libbpf.map
 endif

-HEADERS := bpf.h libbpf.h btf.h libbpf_common.h libbpf_legacy.h xsk.h \
+HEADERS := bpf.h libbpf.h btf.h libbpf_common.h libbpf_legacy.h \
 	   bpf_helpers.h bpf_helper_defs.h bpf_tracing.h \
-	   bpf_endian.h bpf_core_read.h skel_internal.h libbpf_version.h
+	   bpf_endian.h bpf_core_read.h skel_internal.h libbpf_version.h \
+	   usdt.bpf.h
 UAPI_HEADERS := $(addprefix $(TOPDIR)/include/uapi/linux/,\
 			    bpf.h bpf_common.h btf.h)

@@ -61,7 +77,8 @@ INSTALL = install

 DESTDIR ?=

-ifeq ($(filter-out %64 %64be %64eb %64le %64el s390x, $(shell uname -m)),)
+HOSTARCH = $(firstword $(subst -, ,$(shell $(CC) -dumpmachine)))
+ifeq ($(filter-out %64 %64be %64eb %64le %64el s390x, $(HOSTARCH)),)
 	LIBSUBDIR := lib64
 else
 	LIBSUBDIR := lib
@@ -99,7 +116,7 @@ $(OBJDIR)/libbpf.so.$(LIBBPF_VERSION): $(SHARED_OBJS)
 		  -Wl,-soname,libbpf.so.$(LIBBPF_MAJOR_VERSION) \
 		  $^ $(ALL_LDFLAGS) -o $@

-$(OBJDIR)/libbpf.pc:
+$(OBJDIR)/libbpf.pc: force
 	$(Q)sed -e "s|@PREFIX@|$(PREFIX)|" \
 		-e "s|@LIBDIR@|$(LIBDIR_PC)|" \
 		-e "s|@VERSION@|$(LIBBPF_VERSION)|" \
@@ -152,7 +169,7 @@ clean:
 	$(call msg,CLEAN)
 	$(Q)rm -rf *.o *.a *.so *.so.* *.pc $(SHARED_OBJDIR) $(STATIC_OBJDIR)

-.PHONY: cscope tags
+.PHONY: cscope tags force
 cscope:
 	$(call msg,CSCOPE)
 	$(Q)ls *.c *.h > cscope.files
@@ -162,3 +179,5 @@ tags:
 	$(call msg,CTAGS)
 	$(Q)rm -f TAGS tags
 	$(Q)ls *.c *.h | xargs $(TAGS_PROG) -a
+
+force:
--- a/src/bpf.c
+++ b/src/bpf.c
--- a/src/bpf.h
+++ b/src/bpf.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */

 /*
- * common eBPF ELF operations.
+ * Common BPF ELF operations.
 *
 * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
 * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
@@ -61,48 +61,6 @@ LIBBPF_API int bpf_map_create(enum bpf_map_type map_type,
 			      __u32 max_entries,
 			      const struct bpf_map_create_opts *opts);

-struct bpf_create_map_attr {
-	const char *name;
-	enum bpf_map_type map_type;
-	__u32 map_flags;
-	__u32 key_size;
-	__u32 value_size;
-	__u32 max_entries;
-	__u32 numa_node;
-	__u32 btf_fd;
-	__u32 btf_key_type_id;
-	__u32 btf_value_type_id;
-	__u32 map_ifindex;
-	union {
-		__u32 inner_map_fd;
-		__u32 btf_vmlinux_value_type_id;
-	};
-};
-
-LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead")
-LIBBPF_API int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr);
-LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead")
-LIBBPF_API int bpf_create_map_node(enum bpf_map_type map_type, const char *name,
-				   int key_size, int value_size,
-				   int max_entries, __u32 map_flags, int node);
-LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead")
-LIBBPF_API int bpf_create_map_name(enum bpf_map_type map_type, const char *name,
-				   int key_size, int value_size,
-				   int max_entries, __u32 map_flags);
-LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead")
-LIBBPF_API int bpf_create_map(enum bpf_map_type map_type, int key_size,
-			      int value_size, int max_entries, __u32 map_flags);
-LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead")
-LIBBPF_API int bpf_create_map_in_map_node(enum bpf_map_type map_type,
-					  const char *name, int key_size,
-					  int inner_map_fd, int max_entries,
-					  __u32 map_flags, int node);
-LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead")
-LIBBPF_API int bpf_create_map_in_map(enum bpf_map_type map_type,
-				     const char *name, int key_size,
-				     int inner_map_fd, int max_entries,
-				     __u32 map_flags);
-
 struct bpf_prog_load_opts {
 	size_t sz; /* size of this struct for forward/backward compatibility */

@@ -138,61 +96,20 @@ struct bpf_prog_load_opts {
 	__u32 log_level;
 	__u32 log_size;
 	char *log_buf;
+	/* output: actual total log contents size (including termintaing zero).
+	 * It could be both larger than original log_size (if log was
+	 * truncated), or smaller (if log buffer wasn't filled completely).
+	 * If kernel doesn't support this feature, log_size is left unchanged.
+	 */
+	__u32 log_true_size;
+	size_t :0;
 };
-#define bpf_prog_load_opts__last_field log_buf
+#define bpf_prog_load_opts__last_field log_true_size

 LIBBPF_API int bpf_prog_load(enum bpf_prog_type prog_type,
 			     const char *prog_name, const char *license,
 			     const struct bpf_insn *insns, size_t insn_cnt,
-			     const struct bpf_prog_load_opts *opts);
-/* this "specialization" should go away in libbpf 1.0 */
-LIBBPF_API int bpf_prog_load_v0_6_0(enum bpf_prog_type prog_type,
-				    const char *prog_name, const char *license,
-				    const struct bpf_insn *insns, size_t insn_cnt,
-				    const struct bpf_prog_load_opts *opts);
-
-/* This is an elaborate way to not conflict with deprecated bpf_prog_load()
- * API, defined in libbpf.h. Once we hit libbpf 1.0, all this will be gone.
- * With this approach, if someone is calling bpf_prog_load() with
- * 4 arguments, they will use the deprecated API, which keeps backwards
- * compatibility (both source code and binary). If bpf_prog_load() is called
- * with 6 arguments, though, it gets redirected to __bpf_prog_load.
- * So looking forward to libbpf 1.0 when this hack will be gone and
- * __bpf_prog_load() will be called just bpf_prog_load().
- */
-#ifndef bpf_prog_load
-#define bpf_prog_load(...) ___libbpf_overload(___bpf_prog_load, __VA_ARGS__)
-#define ___bpf_prog_load4(file, type, pobj, prog_fd) \
-	bpf_prog_load_deprecated(file, type, pobj, prog_fd)
-#define ___bpf_prog_load6(prog_type, prog_name, license, insns, insn_cnt, opts) \
-	bpf_prog_load(prog_type, prog_name, license, insns, insn_cnt, opts)
-#endif /* bpf_prog_load */
-
-struct bpf_load_program_attr {
-	enum bpf_prog_type prog_type;
-	enum bpf_attach_type expected_attach_type;
-	const char *name;
-	const struct bpf_insn *insns;
-	size_t insns_cnt;
-	const char *license;
-	union {
-		__u32 kern_version;
-		__u32 attach_prog_fd;
-	};
-	union {
-		__u32 prog_ifindex;
-		__u32 attach_btf_id;
-	};
-	__u32 prog_btf_fd;
-	__u32 func_info_rec_size;
-	const void *func_info;
-	__u32 func_info_cnt;
-	__u32 line_info_rec_size;
-	const void *line_info;
-	__u32 line_info_cnt;
-	__u32 log_level;
-	__u32 prog_flags;
-};
+			     struct bpf_prog_load_opts *opts);

 /* Flags to direct loading requirements */
 #define MAPS_RELAX_COMPAT	0x01
@@ -200,22 +117,6 @@ struct bpf_load_program_attr {
 /* Recommended log buffer size */
 #define BPF_LOG_BUF_SIZE (UINT32_MAX >> 8) /* verifier maximum in kernels <= 5.1 */

-LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_load() instead")
-LIBBPF_API int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
-				      char *log_buf, size_t log_buf_sz);
-LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_load() instead")
-LIBBPF_API int bpf_load_program(enum bpf_prog_type type,
-				const struct bpf_insn *insns, size_t insns_cnt,
-				const char *license, __u32 kern_version,
-				char *log_buf, size_t log_buf_sz);
-LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_load() instead")
-LIBBPF_API int bpf_verify_program(enum bpf_prog_type type,
-				  const struct bpf_insn *insns,
-				  size_t insns_cnt, __u32 prog_flags,
-				  const char *license, __u32 kern_version,
-				  char *log_buf, size_t log_buf_sz,
-				  int log_level);
-
 struct bpf_btf_load_opts {
 	size_t sz; /* size of this struct for forward/backward compatibility */

@@ -223,15 +124,18 @@ struct bpf_btf_load_opts {
 	char *log_buf;
 	__u32 log_level;
 	__u32 log_size;
+	/* output: actual total log contents size (including termintaing zero).
+	 * It could be both larger than original log_size (if log was
+	 * truncated), or smaller (if log buffer wasn't filled completely).
+	 * If kernel doesn't support this feature, log_size is left unchanged.
+	 */
+	__u32 log_true_size;
+	size_t :0;
 };
-#define bpf_btf_load_opts__last_field log_size
+#define bpf_btf_load_opts__last_field log_true_size

 LIBBPF_API int bpf_btf_load(const void *btf_data, size_t btf_size,
-			    const struct bpf_btf_load_opts *opts);
-
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_btf_load() instead")
-LIBBPF_API int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf,
-			    __u32 log_buf_size, bool do_log);
+			    struct bpf_btf_load_opts *opts);

 LIBBPF_API int bpf_map_update_elem(int fd, const void *key, const void *value,
 				   __u64 flags);
@@ -244,6 +148,7 @@ LIBBPF_API int bpf_map_lookup_and_delete_elem(int fd, const void *key,
 LIBBPF_API int bpf_map_lookup_and_delete_elem_flags(int fd, const void *key,
 						    void *value, __u64 flags);
 LIBBPF_API int bpf_map_delete_elem(int fd, const void *key);
+LIBBPF_API int bpf_map_delete_elem_flags(int fd, const void *key, __u64 flags);
 LIBBPF_API int bpf_map_get_next_key(int fd, const void *key, void *next_key);
 LIBBPF_API int bpf_map_freeze(int fd);

@@ -379,8 +284,33 @@ LIBBPF_API int bpf_map_update_batch(int fd, const void *keys, const void *values
 				    __u32 *count,
 				    const struct bpf_map_batch_opts *opts);

+struct bpf_obj_pin_opts {
+	size_t sz; /* size of this struct for forward/backward compatibility */
+
+	__u32 file_flags;
+	int path_fd;
+
+	size_t :0;
+};
+#define bpf_obj_pin_opts__last_field path_fd
+
 LIBBPF_API int bpf_obj_pin(int fd, const char *pathname);
+LIBBPF_API int bpf_obj_pin_opts(int fd, const char *pathname,
+				const struct bpf_obj_pin_opts *opts);
+
+struct bpf_obj_get_opts {
+	size_t sz; /* size of this struct for forward/backward compatibility */
+
+	__u32 file_flags;
+	int path_fd;
+
+	size_t :0;
+};
+#define bpf_obj_get_opts__last_field path_fd
+
 LIBBPF_API int bpf_obj_get(const char *pathname);
+LIBBPF_API int bpf_obj_get_opts(const char *pathname,
+				const struct bpf_obj_get_opts *opts);

 struct bpf_prog_attach_opts {
 	size_t sz; /* size of this struct for forward/backward compatibility */
@@ -394,10 +324,6 @@ LIBBPF_API int bpf_prog_attach(int prog_fd, int attachable_fd,
 LIBBPF_API int bpf_prog_attach_opts(int prog_fd, int attachable_fd,
 				     enum bpf_attach_type type,
 				     const struct bpf_prog_attach_opts *opts);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_prog_attach_opts() instead")
-LIBBPF_API int bpf_prog_attach_xattr(int prog_fd, int attachable_fd,
-				     enum bpf_attach_type type,
-				     const struct bpf_prog_attach_opts *opts);
 LIBBPF_API int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type);
 LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd,
 				enum bpf_attach_type type);
@@ -413,10 +339,26 @@ struct bpf_link_create_opts {
 		struct {
 			__u64 bpf_cookie;
 		} perf_event;
+		struct {
+			__u32 flags;
+			__u32 cnt;
+			const char **syms;
+			const unsigned long *addrs;
+			const __u64 *cookies;
+		} kprobe_multi;
+		struct {
+			__u64 cookie;
+		} tracing;
+		struct {
+			__u32 pf;
+			__u32 hooknum;
+			__s32 priority;
+			__u32 flags;
+		} netfilter;
 	};
 	size_t :0;
 };
-#define bpf_link_create_opts__last_field perf_event
+#define bpf_link_create_opts__last_field kprobe_multi.cookies

 LIBBPF_API int bpf_link_create(int prog_fd, int target_fd,
 			       enum bpf_attach_type attach_type,
@@ -428,8 +370,9 @@ struct bpf_link_update_opts {
 	size_t sz; /* size of this struct for forward/backward compatibility */
 	__u32 flags;	   /* extra flags */
 	__u32 old_prog_fd; /* expected old program FD */
+	__u32 old_map_fd;  /* expected old map FD */
 };
-#define bpf_link_update_opts__last_field old_prog_fd
+#define bpf_link_update_opts__last_field old_map_fd

 LIBBPF_API int bpf_link_update(int link_fd, int new_prog_fd,
 			       const struct bpf_link_update_opts *opts);
@@ -453,36 +396,131 @@ struct bpf_prog_test_run_attr {
 			     * out: length of cxt_out */
 };

-LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_test_run_opts() instead")
-LIBBPF_API int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr);
-
-/*
- * bpf_prog_test_run does not check that data_out is large enough. Consider
- * using bpf_prog_test_run_opts instead.
- */
-LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_test_run_opts() instead")
-LIBBPF_API int bpf_prog_test_run(int prog_fd, int repeat, void *data,
-				 __u32 size, void *data_out, __u32 *size_out,
-				 __u32 *retval, __u32 *duration);
 LIBBPF_API int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id);
 LIBBPF_API int bpf_map_get_next_id(__u32 start_id, __u32 *next_id);
 LIBBPF_API int bpf_btf_get_next_id(__u32 start_id, __u32 *next_id);
 LIBBPF_API int bpf_link_get_next_id(__u32 start_id, __u32 *next_id);
+
+struct bpf_get_fd_by_id_opts {
+	size_t sz; /* size of this struct for forward/backward compatibility */
+	__u32 open_flags; /* permissions requested for the operation on fd */
+	size_t :0;
+};
+#define bpf_get_fd_by_id_opts__last_field open_flags
+
 LIBBPF_API int bpf_prog_get_fd_by_id(__u32 id);
+LIBBPF_API int bpf_prog_get_fd_by_id_opts(__u32 id,
+				const struct bpf_get_fd_by_id_opts *opts);
 LIBBPF_API int bpf_map_get_fd_by_id(__u32 id);
+LIBBPF_API int bpf_map_get_fd_by_id_opts(__u32 id,
+				const struct bpf_get_fd_by_id_opts *opts);
 LIBBPF_API int bpf_btf_get_fd_by_id(__u32 id);
+LIBBPF_API int bpf_btf_get_fd_by_id_opts(__u32 id,
+				const struct bpf_get_fd_by_id_opts *opts);
 LIBBPF_API int bpf_link_get_fd_by_id(__u32 id);
+LIBBPF_API int bpf_link_get_fd_by_id_opts(__u32 id,
+				const struct bpf_get_fd_by_id_opts *opts);
 LIBBPF_API int bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len);
+
+/**
+ * @brief **bpf_prog_get_info_by_fd()** obtains information about the BPF
+ * program corresponding to *prog_fd*.
+ *
+ * Populates up to *info_len* bytes of *info* and updates *info_len* with the
+ * actual number of bytes written to *info*.
+ *
+ * @param prog_fd BPF program file descriptor
+ * @param info pointer to **struct bpf_prog_info** that will be populated with
+ * BPF program information
+ * @param info_len pointer to the size of *info*; on success updated with the
+ * number of bytes written to *info*
+ * @return 0, on success; negative error code, otherwise (errno is also set to
+ * the error code)
+ */
+LIBBPF_API int bpf_prog_get_info_by_fd(int prog_fd, struct bpf_prog_info *info, __u32 *info_len);
+
+/**
+ * @brief **bpf_map_get_info_by_fd()** obtains information about the BPF
+ * map corresponding to *map_fd*.
+ *
+ * Populates up to *info_len* bytes of *info* and updates *info_len* with the
+ * actual number of bytes written to *info*.
+ *
+ * @param map_fd BPF map file descriptor
+ * @param info pointer to **struct bpf_map_info** that will be populated with
+ * BPF map information
+ * @param info_len pointer to the size of *info*; on success updated with the
+ * number of bytes written to *info*
+ * @return 0, on success; negative error code, otherwise (errno is also set to
+ * the error code)
+ */
+LIBBPF_API int bpf_map_get_info_by_fd(int map_fd, struct bpf_map_info *info, __u32 *info_len);
+
+/**
+ * @brief **bpf_btf_get_info_by_fd()** obtains information about the 
+ * BTF object corresponding to *btf_fd*.
+ *
+ * Populates up to *info_len* bytes of *info* and updates *info_len* with the
+ * actual number of bytes written to *info*.
+ *
+ * @param btf_fd BTF object file descriptor
+ * @param info pointer to **struct bpf_btf_info** that will be populated with
+ * BTF object information
+ * @param info_len pointer to the size of *info*; on success updated with the
+ * number of bytes written to *info*
+ * @return 0, on success; negative error code, otherwise (errno is also set to
+ * the error code)
+ */
+LIBBPF_API int bpf_btf_get_info_by_fd(int btf_fd, struct bpf_btf_info *info, __u32 *info_len);
+
+/**
+ * @brief **bpf_btf_get_info_by_fd()** obtains information about the BPF
+ * link corresponding to *link_fd*.
+ *
+ * Populates up to *info_len* bytes of *info* and updates *info_len* with the
+ * actual number of bytes written to *info*.
+ *
+ * @param link_fd BPF link file descriptor
+ * @param info pointer to **struct bpf_link_info** that will be populated with
+ * BPF link information
+ * @param info_len pointer to the size of *info*; on success updated with the
+ * number of bytes written to *info*
+ * @return 0, on success; negative error code, otherwise (errno is also set to
+ * the error code)
+ */
+LIBBPF_API int bpf_link_get_info_by_fd(int link_fd, struct bpf_link_info *info, __u32 *info_len);
+
+struct bpf_prog_query_opts {
+	size_t sz; /* size of this struct for forward/backward compatibility */
+	__u32 query_flags;
+	__u32 attach_flags; /* output argument */
+	__u32 *prog_ids;
+	__u32 prog_cnt; /* input+output argument */
+	__u32 *prog_attach_flags;
+};
+#define bpf_prog_query_opts__last_field prog_attach_flags
+
+LIBBPF_API int bpf_prog_query_opts(int target_fd,
+				   enum bpf_attach_type type,
+				   struct bpf_prog_query_opts *opts);
 LIBBPF_API int bpf_prog_query(int target_fd, enum bpf_attach_type type,
 			      __u32 query_flags, __u32 *attach_flags,
 			      __u32 *prog_ids, __u32 *prog_cnt);
+
 LIBBPF_API int bpf_raw_tracepoint_open(const char *name, int prog_fd);
 LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf,
 				 __u32 *buf_len, __u32 *prog_id, __u32 *fd_type,
 				 __u64 *probe_offset, __u64 *probe_addr);

+#ifdef __cplusplus
+/* forward-declaring enums in C++ isn't compatible with pure C enums, so
+ * instead define bpf_enable_stats() as accepting int as an input
+ */
+LIBBPF_API int bpf_enable_stats(int type);
+#else
 enum bpf_stats_type; /* defined in up-to-date linux/bpf.h */
 LIBBPF_API int bpf_enable_stats(enum bpf_stats_type type);
+#endif

 struct bpf_prog_bind_opts {
 	size_t sz; /* size of this struct for forward/backward compatibility */
@@ -512,8 +550,9 @@ struct bpf_test_run_opts {
 	__u32 duration;      /* out: average per repetition in ns */
 	__u32 flags;
 	__u32 cpu;
+	__u32 batch_size;
 };
-#define bpf_test_run_opts__last_field cpu
+#define bpf_test_run_opts__last_field batch_size

 LIBBPF_API int bpf_prog_test_run_opts(int prog_fd,
 				      struct bpf_test_run_opts *opts);
--- a/src/bpf_core_read.h
+++ b/src/bpf_core_read.h
@@ -29,6 +29,7 @@ enum bpf_type_id_kind {
 enum bpf_type_info_kind {
 	BPF_TYPE_EXISTS = 0,		/* type existence in target kernel */
 	BPF_TYPE_SIZE = 1,		/* type size in target kernel */
+	BPF_TYPE_MATCHES = 2,		/* type match in target kernel */
 };

 /* second argument to __builtin_preserve_enum_value() built-in */
@@ -110,21 +111,50 @@ enum bpf_enum_value_kind {
 	val;								      \
 })

+#define ___bpf_field_ref1(field)	(field)
+#define ___bpf_field_ref2(type, field)	(((typeof(type) *)0)->field)
+#define ___bpf_field_ref(args...)					    \
+	___bpf_apply(___bpf_field_ref, ___bpf_narg(args))(args)
+
 /*
 * Convenience macro to check that field actually exists in target kernel's.
 * Returns:
 *    1, if matching field is present in target kernel;
 *    0, if no matching field found.
+ *
+ * Supports two forms:
+ *   - field reference through variable access:
+ *     bpf_core_field_exists(p->my_field);
+ *   - field reference through type and field names:
+ *     bpf_core_field_exists(struct my_type, my_field).
 */
-#define bpf_core_field_exists(field)					    \
-	__builtin_preserve_field_info(field, BPF_FIELD_EXISTS)
+#define bpf_core_field_exists(field...)					    \
+	__builtin_preserve_field_info(___bpf_field_ref(field), BPF_FIELD_EXISTS)

 /*
 * Convenience macro to get the byte size of a field. Works for integers,
 * struct/unions, pointers, arrays, and enums.
+ *
+ * Supports two forms:
+ *   - field reference through variable access:
+ *     bpf_core_field_size(p->my_field);
+ *   - field reference through type and field names:
+ *     bpf_core_field_size(struct my_type, my_field).
 */
-#define bpf_core_field_size(field)					    \
-	__builtin_preserve_field_info(field, BPF_FIELD_BYTE_SIZE)
+#define bpf_core_field_size(field...)					    \
+	__builtin_preserve_field_info(___bpf_field_ref(field), BPF_FIELD_BYTE_SIZE)
+
+/*
+ * Convenience macro to get field's byte offset.
+ *
+ * Supports two forms:
+ *   - field reference through variable access:
+ *     bpf_core_field_offset(p->my_field);
+ *   - field reference through type and field names:
+ *     bpf_core_field_offset(struct my_type, my_field).
+ */
+#define bpf_core_field_offset(field...)					    \
+	__builtin_preserve_field_info(___bpf_field_ref(field), BPF_FIELD_BYTE_OFFSET)

 /*
 * Convenience macro to get BTF type ID of a specified type, using a local BTF
@@ -154,6 +184,16 @@ enum bpf_enum_value_kind {
 #define bpf_core_type_exists(type)					    \
 	__builtin_preserve_type_info(*(typeof(type) *)0, BPF_TYPE_EXISTS)

+/*
+ * Convenience macro to check that provided named type
+ * (struct/union/enum/typedef) "matches" that in a target kernel.
+ * Returns:
+ *    1, if the type matches in the target kernel's BTF;
+ *    0, if the type does not match any in the target kernel
+ */
+#define bpf_core_type_matches(type)					    \
+	__builtin_preserve_type_info(*(typeof(type) *)0, BPF_TYPE_MATCHES)
+
 /*
 * Convenience macro to get the byte size of a provided named type
 * (struct/union/enum/typedef) in a target kernel.
@@ -324,7 +364,7 @@ enum bpf_enum_value_kind {

 /* Non-CO-RE variant of BPF_CORE_READ_INTO() */
 #define BPF_PROBE_READ_INTO(dst, src, a, ...) ({			    \
-	___core_read(bpf_probe_read, bpf_probe_read,			    \
+	___core_read(bpf_probe_read_kernel, bpf_probe_read_kernel,	    \
 		     dst, (src), a, ##__VA_ARGS__)			    \
 })

@@ -360,7 +400,7 @@ enum bpf_enum_value_kind {

 /* Non-CO-RE variant of BPF_CORE_READ_STR_INTO() */
 #define BPF_PROBE_READ_STR_INTO(dst, src, a, ...) ({			    \
-	___core_read(bpf_probe_read_str, bpf_probe_read,		    \
+	___core_read(bpf_probe_read_kernel_str, bpf_probe_read_kernel,	    \
 		     dst, (src), a, ##__VA_ARGS__)			    \
 })

--- a/src/bpf_gen_internal.h
+++ b/src/bpf_gen_internal.h
@@ -11,6 +11,7 @@ struct ksym_relo_desc {
 	int insn_idx;
 	bool is_weak;
 	bool is_typeless;
+	bool is_ld64;
 };

 struct ksym_desc {
@@ -24,6 +25,7 @@ struct ksym_desc {
 		bool typeless;
 	};
 	int insn;
+	bool is_ld64;
 };

 struct bpf_gen {
@@ -65,7 +67,7 @@ void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *value, __u
 void bpf_gen__map_freeze(struct bpf_gen *gen, int map_idx);
 void bpf_gen__record_attach_target(struct bpf_gen *gen, const char *name, enum bpf_attach_type type);
 void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, bool is_weak,
-			    bool is_typeless, int kind, int insn_idx);
+			    bool is_typeless, bool is_ld64, int kind, int insn_idx);
 void bpf_gen__record_relo_core(struct bpf_gen *gen, const struct bpf_core_relo *core_relo);
 void bpf_gen__populate_outer_map(struct bpf_gen *gen, int outer_map_idx, int key, int inner_map_idx);

--- a/src/bpf_helper_defs.h
+++ b/src/bpf_helper_defs.h
@@ -29,6 +29,7 @@ struct tcp_request_sock;
 struct udp6_sock;
 struct unix_sock;
 struct task_struct;
+struct cgroup;
 struct __sk_buff;
 struct sk_msg_md;
 struct xdp_md;
@@ -38,6 +39,10 @@ struct inode;
 struct socket;
 struct file;
 struct bpf_timer;
+struct mptcp_sock;
+struct bpf_dynptr;
+struct iphdr;
+struct ipv6hdr;

 /*
 * bpf_map_lookup_elem
@@ -113,17 +118,17 @@ static __u64 (*bpf_ktime_get_ns)(void) = (void *) 5;
 *
 * 	This helper is a "printk()-like" facility for debugging. It
 * 	prints a message defined by format *fmt* (of size *fmt_size*)
- * 	to file *\/sys/kernel/debug/tracing/trace* from DebugFS, if
+ * 	to file *\/sys/kernel/tracing/trace* from TraceFS, if
 * 	available. It can take up to three additional **u64**
 * 	arguments (as an eBPF helpers, the total number of arguments is
 * 	limited to five).
 *
 * 	Each time the helper is called, it appends a line to the trace.
- * 	Lines are discarded while *\/sys/kernel/debug/tracing/trace* is
- * 	open, use *\/sys/kernel/debug/tracing/trace_pipe* to avoid this.
+ * 	Lines are discarded while *\/sys/kernel/tracing/trace* is
+ * 	open, use *\/sys/kernel/tracing/trace_pipe* to avoid this.
 * 	The format of the trace is customizable, and the exact output
 * 	one will get depends on the options set in
- * 	*\/sys/kernel/debug/tracing/trace_options* (see also the
+ * 	*\/sys/kernel/tracing/trace_options* (see also the
 * 	*README* file under the same directory). However, it usually
 * 	defaults to something like:
 *
@@ -532,6 +537,9 @@ static long (*bpf_skb_get_tunnel_key)(struct __sk_buff *skb, struct bpf_tunnel_k
 * 		sending the packet. This flag was added for GRE
 * 		encapsulation, but might be used with other protocols
 * 		as well in the future.
+ * 	**BPF_F_NO_TUNNEL_KEY**
+ * 		Add a flag to tunnel metadata indicating that no tunnel
+ * 		key should be set in the resulting tunnel header.
 *
 * 	Here is a typical usage on the transmit path:
 *
@@ -961,8 +969,8 @@ static long (*bpf_probe_write_user)(void *dst, const void *src, __u32 len) = (vo
 * Returns
 * 	The return value depends on the result of the test, and can be:
 *
- * 	* 0, if current task belongs to the cgroup2.
- * 	* 1, if current task does not belong to the cgroup2.
+ * 	* 1, if current task belongs to the cgroup2.
+ * 	* 0, if current task does not belong to the cgroup2.
 * 	* A negative error code, if an error occurred.
 */
 static long (*bpf_current_task_under_cgroup)(void *map, __u32 index) = (void *) 37;
@@ -1001,7 +1009,8 @@ static long (*bpf_skb_change_tail)(struct __sk_buff *skb, __u32 len, __u64 flags
 * 	Pull in non-linear data in case the *skb* is non-linear and not
 * 	all of *len* are part of the linear section. Make *len* bytes
 * 	from *skb* readable and writable. If a zero value is passed for
- * 	*len*, then the whole length of the *skb* is pulled.
+ * 	*len*, then all bytes in the linear part of *skb* will be made
+ * 	readable and writable.
 *
 * 	This helper is only needed for reading and writing with direct
 * 	packet access.
@@ -1204,14 +1213,19 @@ static long (*bpf_set_hash)(struct __sk_buff *skb, __u32 hash) = (void *) 48;
 * 	* **SOL_SOCKET**, which supports the following *optname*\ s:
 * 	  **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**,
 * 	  **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**,
- * 	  **SO_BINDTODEVICE**, **SO_KEEPALIVE**.
+ * 	  **SO_BINDTODEVICE**, **SO_KEEPALIVE**, **SO_REUSEADDR**,
+ * 	  **SO_REUSEPORT**, **SO_BINDTOIFINDEX**, **SO_TXREHASH**.
 * 	* **IPPROTO_TCP**, which supports the following *optname*\ s:
 * 	  **TCP_CONGESTION**, **TCP_BPF_IW**,
 * 	  **TCP_BPF_SNDCWND_CLAMP**, **TCP_SAVE_SYN**,
 * 	  **TCP_KEEPIDLE**, **TCP_KEEPINTVL**, **TCP_KEEPCNT**,
- * 	  **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**.
+ * 	  **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**,
+ * 	  **TCP_NODELAY**, **TCP_MAXSEG**, **TCP_WINDOW_CLAMP**,
+ * 	  **TCP_THIN_LINEAR_TIMEOUTS**, **TCP_BPF_DELACK_MAX**,
+ * 	  **TCP_BPF_RTO_MIN**.
 * 	* **IPPROTO_IP**, which supports *optname* **IP_TOS**.
- * 	* **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
+ * 	* **IPPROTO_IPV6**, which supports the following *optname*\ s:
+ * 	  **IPV6_TCLASS**, **IPV6_AUTOFLOWLABEL**.
 *
 * Returns
 * 	0 on success, or a negative error in case of failure.
@@ -1234,10 +1248,12 @@ static long (*bpf_setsockopt)(void *bpf_socket, int level, int optname, void *op
 * 	There are two supported modes at this time:
 *
 * 	* **BPF_ADJ_ROOM_MAC**: Adjust room at the mac layer
- * 	  (room space is added or removed below the layer 2 header).
+ * 	  (room space is added or removed between the layer 2 and
+ * 	  layer 3 headers).
 *
 * 	* **BPF_ADJ_ROOM_NET**: Adjust room at the network layer
- * 	  (room space is added or removed below the layer 3 header).
+ * 	  (room space is added or removed between the layer 3 and
+ * 	  layer 4 headers).
 *
 * 	The following flags are supported at this time:
 *
@@ -1261,6 +1277,11 @@ static long (*bpf_setsockopt)(void *bpf_socket, int level, int optname, void *op
 * 	  Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the
 * 	  L2 type as Ethernet.
 *
+ * 	* **BPF_F_ADJ_ROOM_DECAP_L3_IPV4**,
+ * 	  **BPF_F_ADJ_ROOM_DECAP_L3_IPV6**:
+ * 	  Indicate the new IP header version after decapsulating the outer
+ * 	  IP header. Used when the inner and outer IP versions are different.
+ *
 * 	A call to this helper is susceptible to change the underlying
 * 	packet buffer. Therefore, at load time, all checks on pointers
 * 	previously done by the verifier are invalidated and must be
@@ -1299,7 +1320,7 @@ static long (*bpf_skb_adjust_room)(struct __sk_buff *skb, __s32 len_diff, __u32
 * 	**XDP_REDIRECT** on success, or the value of the two lower bits
 * 	of the *flags* argument on error.
 */
-static long (*bpf_redirect_map)(void *map, __u32 key, __u64 flags) = (void *) 51;
+static long (*bpf_redirect_map)(void *map, __u64 key, __u64 flags) = (void *) 51;

 /*
 * bpf_sk_redirect_map
@@ -1429,7 +1450,7 @@ static long (*bpf_perf_event_read_value)(void *map, __u64 flags, struct bpf_perf
 /*
 * bpf_perf_prog_read_value
 *
- * 	For en eBPF program attached to a perf event, retrieve the
+ * 	For an eBPF program attached to a perf event, retrieve the
 * 	value of the event counter associated to *ctx* and store it in
 * 	the structure pointed by *buf* and of size *buf_size*. Enabled
 * 	and running times are also stored in the structure (see
@@ -1458,12 +1479,10 @@ static long (*bpf_perf_prog_read_value)(struct bpf_perf_event_data *ctx, struct
 * 	  and **BPF_CGROUP_INET6_CONNECT**.
 *
 * 	This helper actually implements a subset of **getsockopt()**.
- * 	It supports the following *level*\ s:
- *
- * 	* **IPPROTO_TCP**, which supports *optname*
- * 	  **TCP_CONGESTION**.
- * 	* **IPPROTO_IP**, which supports *optname* **IP_TOS**.
- * 	* **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
+ * 	It supports the same set of *optname*\ s that is supported by
+ * 	the **bpf_setsockopt**\ () helper.  The exceptions are
+ * 	**TCP_BPF_*** is **bpf_setsockopt**\ () only and
+ * 	**TCP_SAVED_SYN** is **bpf_getsockopt**\ () only.
 *
 * Returns
 * 	0 on success, or a negative error in case of failure.
@@ -1737,8 +1756,18 @@ static long (*bpf_skb_get_xfrm_state)(struct __sk_buff *skb, __u32 index, struct
 * 	**BPF_F_USER_STACK**
 * 		Collect a user space stack instead of a kernel stack.
 * 	**BPF_F_USER_BUILD_ID**
- * 		Collect buildid+offset instead of ips for user stack,
- * 		only valid if **BPF_F_USER_STACK** is also specified.
+ * 		Collect (build_id, file_offset) instead of ips for user
+ * 		stack, only valid if **BPF_F_USER_STACK** is also
+ * 		specified.
+ *
+ * 		*file_offset* is an offset relative to the beginning
+ * 		of the executable or shared object file backing the vma
+ * 		which the *ip* falls in. It is *not* an offset relative
+ * 		to that object's base address. Accordingly, it must be
+ * 		adjusted by adding (sh_addr - sh_offset), where
+ * 		sh_{addr,offset} correspond to the executable section
+ * 		containing *file_offset* in the object, for comparisons
+ * 		to symbols' st_value to be valid.
 *
 * 	**bpf_get_stack**\ () can collect up to
 * 	**PERF_MAX_STACK_DEPTH** both kernel and user frames, subject
@@ -1752,8 +1781,8 @@ static long (*bpf_skb_get_xfrm_state)(struct __sk_buff *skb, __u32 index, struct
 * 		# sysctl kernel.perf_event_max_stack=<new value>
 *
 * Returns
- * 	A non-negative value equal to or less than *size* on success,
- * 	or a negative error in case of failure.
+ * 	The non-negative copied *buf* length equal to or less than
+ * 	*size* on success, or a negative error in case of failure.
 */
 static long (*bpf_get_stack)(void *ctx, void *buf, __u32 size, __u64 flags) = (void *) 67;

@@ -1803,9 +1832,18 @@ static long (*bpf_skb_load_bytes_relative)(const void *skb, __u32 offset, void *
 * 	**BPF_FIB_LOOKUP_DIRECT**
 * 		Do a direct table lookup vs full lookup using FIB
 * 		rules.
+ * 	**BPF_FIB_LOOKUP_TBID**
+ * 		Used with BPF_FIB_LOOKUP_DIRECT.
+ * 		Use the routing table ID present in *params*->tbid
+ * 		for the fib lookup.
 * 	**BPF_FIB_LOOKUP_OUTPUT**
 * 		Perform lookup from an egress perspective (default is
 * 		ingress).
+ * 	**BPF_FIB_LOOKUP_SKIP_NEIGH**
+ * 		Skip the neighbour table lookup. *params*->dmac
+ * 		and *params*->smac will not be set as output. A common
+ * 		use case is to call **bpf_redirect_neigh**\ () after
+ * 		doing **bpf_fib_lookup**\ ().
 *
 * 	*ctx* is either **struct xdp_md** for XDP programs or
 * 	**struct sk_buff** tc cls_act programs.
@@ -2461,10 +2499,11 @@ static struct bpf_sock *(*bpf_skc_lookup_tcp)(void *ctx, struct bpf_sock_tuple *
 *
 * 	*iph* points to the start of the IPv4 or IPv6 header, while
 * 	*iph_len* contains **sizeof**\ (**struct iphdr**) or
- * 	**sizeof**\ (**struct ip6hdr**).
+ * 	**sizeof**\ (**struct ipv6hdr**).
 *
 * 	*th* points to the start of the TCP header, while *th_len*
- * 	contains **sizeof**\ (**struct tcphdr**).
+ * 	contains the length of the TCP header (at least
+ * 	**sizeof**\ (**struct tcphdr**)).
 *
 * Returns
 * 	0 if *iph* and *th* are a valid SYN cookie ACK, or a negative
@@ -2687,10 +2726,11 @@ static long (*bpf_send_signal)(__u32 sig) = (void *) 109;
 *
 * 	*iph* points to the start of the IPv4 or IPv6 header, while
 * 	*iph_len* contains **sizeof**\ (**struct iphdr**) or
- * 	**sizeof**\ (**struct ip6hdr**).
+ * 	**sizeof**\ (**struct ipv6hdr**).
 *
 * 	*th* points to the start of the TCP header, while *th_len*
- * 	contains the length of the TCP header.
+ * 	contains the length of the TCP header with options (at least
+ * 	**sizeof**\ (**struct tcphdr**)).
 *
 * Returns
 * 	On success, lower 32 bits hold the generated SYN cookie in
@@ -3305,8 +3345,8 @@ static struct udp6_sock *(*bpf_skc_to_udp6_sock)(void *sk) = (void *) 140;
 * 		# sysctl kernel.perf_event_max_stack=<new value>
 *
 * Returns
- * 	A non-negative value equal to or less than *size* on success,
- * 	or a negative error in case of failure.
+ * 	The non-negative copied *buf* length equal to or less than
+ * 	*size* on success, or a negative error in case of failure.
 */
 static long (*bpf_get_task_stack)(struct task_struct *task, void *buf, __u32 size, __u64 flags) = (void *) 141;

@@ -3407,7 +3447,7 @@ static long (*bpf_load_hdr_opt)(struct bpf_sock_ops *skops, void *searchby_res,
 *
 * 	**-EEXIST** if the option already exists.
 *
- * 	**-EFAULT** on failrue to parse the existing header options.
+ * 	**-EFAULT** on failure to parse the existing header options.
 *
 * 	**-EPERM** if the helper cannot be used under the current
 * 	*skops*\ **->op**.
@@ -3667,7 +3707,7 @@ static long (*bpf_redirect_peer)(__u32 ifindex, __u64 flags) = (void *) 155;
 * 	a *map* with *task* as the **key**.  From this
 * 	perspective,  the usage is not much different from
 * 	**bpf_map_lookup_elem**\ (*map*, **&**\ *task*) except this
- * 	helper enforces the key must be an task_struct and the map must also
+ * 	helper enforces the key must be a task_struct and the map must also
 * 	be a **BPF_MAP_TYPE_TASK_STORAGE**.
 *
 * 	Underneath, the value is stored locally at *task* instead of
@@ -3745,7 +3785,7 @@ static __u64 (*bpf_ktime_get_coarse_ns)(void) = (void *) 160;
 /*
 * bpf_ima_inode_hash
 *
- * 	Returns the stored IMA hash of the *inode* (if it's avaialable).
+ * 	Returns the stored IMA hash of the *inode* (if it's available).
 * 	If the hash is larger than *size*, then only *size*
 * 	bytes will be copied to *dst*
 *
@@ -3777,12 +3817,12 @@ static struct socket *(*bpf_sock_from_file)(struct file *file) = (void *) 162;
 *
 * 	The argument *len_diff* can be used for querying with a planned
 * 	size change. This allows to check MTU prior to changing packet
- * 	ctx. Providing an *len_diff* adjustment that is larger than the
+ * 	ctx. Providing a *len_diff* adjustment that is larger than the
 * 	actual packet size (resulting in negative packet size) will in
- * 	principle not exceed the MTU, why it is not considered a
- * 	failure.  Other BPF-helpers are needed for performing the
- * 	planned size change, why the responsability for catch a negative
- * 	packet size belong in those helpers.
+ * 	principle not exceed the MTU, which is why it is not considered
+ * 	a failure.  Other BPF helpers are needed for performing the
+ * 	planned size change; therefore the responsibility for catching
+ * 	a negative packet size belongs in those helpers.
 *
 * 	Specifying *ifindex* zero means the MTU check is performed
 * 	against the current net device.  This is practical if this isn't
@@ -3992,6 +4032,12 @@ static long (*bpf_timer_set_callback)(struct bpf_timer *timer, void *callback_fn
 * 	different maps if key/value layout matches across maps.
 * 	Every bpf_timer_set_callback() can have different callback_fn.
 *
+ * 	*flags* can be one of:
+ *
+ * 	**BPF_F_TIMER_ABS**
+ * 		Start the timer in absolute expire value instead of the
+ * 		default relative one.
+ *
 *
 * Returns
 * 	0 on success.
@@ -4021,6 +4067,7 @@ static long (*bpf_timer_cancel)(struct bpf_timer *timer) = (void *) 172;
 *
 * Returns
 * 	Address of the traced function.
+ * 	0 for kprobes placed within the function (not at the entry).
 */
 static __u64 (*bpf_get_func_ip)(void *ctx) = (void *) 173;

@@ -4189,13 +4236,13 @@ static long (*bpf_strncmp)(const char *s1, __u32 s1_sz, const char *s2) = (void
 /*
 * bpf_get_func_arg
 *
- * 	Get **n**-th argument (zero based) of the traced function (for tracing programs)
+ * 	Get **n**-th argument register (zero based) of the traced function (for tracing programs)
 * 	returned in **value**.
 *
 *
 * Returns
 * 	0 on success.
- * 	**-EINVAL** if n >= arguments count of traced function.
+ * 	**-EINVAL** if n >= argument register count of traced function.
 */
 static long (*bpf_get_func_arg)(void *ctx, __u32 n, __u64 *value) = (void *) 183;

@@ -4215,32 +4262,45 @@ static long (*bpf_get_func_ret)(void *ctx, __u64 *value) = (void *) 184;
 /*
 * bpf_get_func_arg_cnt
 *
- * 	Get number of arguments of the traced function (for tracing programs).
+ * 	Get number of registers of the traced function (for tracing programs) where
+ * 	function arguments are stored in these registers.
 *
 *
 * Returns
- * 	The number of arguments of the traced function.
+ * 	The number of argument registers of the traced function.
 */
 static long (*bpf_get_func_arg_cnt)(void *ctx) = (void *) 185;

 /*
 * bpf_get_retval
 *
- * 	Get the syscall's return value that will be returned to userspace.
+ * 	Get the BPF program's return value that will be returned to the upper layers.
 *
- * 	This helper is currently supported by cgroup programs only.
+ * 	This helper is currently supported by cgroup programs and only by the hooks
+ * 	where BPF program's return value is returned to the userspace via errno.
 *
 * Returns
- * 	The syscall's return value.
+ * 	The BPF program's return value.
 */
 static int (*bpf_get_retval)(void) = (void *) 186;

 /*
 * bpf_set_retval
 *
- * 	Set the syscall's return value that will be returned to userspace.
+ * 	Set the BPF program's return value that will be returned to the upper layers.
+ *
+ * 	This helper is currently supported by cgroup programs and only by the hooks
+ * 	where BPF program's return value is returned to the userspace via errno.
+ *
+ * 	Note that there is the following corner case where the program exports an error
+ * 	via bpf_set_retval but signals success via 'return 1':
+ *
+ * 		bpf_set_retval(-EPERM);
+ * 		return 1;
+ *
+ * 	In this case, the BPF program's return value will use helper's -EPERM. This
+ * 	still holds true for cgroup/bind{4,6} which supports extra 'return 3' success case.
 *
- * 	This helper is currently supported by cgroup programs only.
 *
 * Returns
 * 	0 on success, or a negative error in case of failure.
@@ -4295,4 +4355,398 @@ static long (*bpf_xdp_store_bytes)(struct xdp_md *xdp_md, __u32 offset, void *bu
 */
 static long (*bpf_copy_from_user_task)(void *dst, __u32 size, const void *user_ptr, struct task_struct *tsk, __u64 flags) = (void *) 191;

+/*
+ * bpf_skb_set_tstamp
+ *
+ * 	Change the __sk_buff->tstamp_type to *tstamp_type*
+ * 	and set *tstamp* to the __sk_buff->tstamp together.
+ *
+ * 	If there is no need to change the __sk_buff->tstamp_type,
+ * 	the tstamp value can be directly written to __sk_buff->tstamp
+ * 	instead.
+ *
+ * 	BPF_SKB_TSTAMP_DELIVERY_MONO is the only tstamp that
+ * 	will be kept during bpf_redirect_*().  A non zero
+ * 	*tstamp* must be used with the BPF_SKB_TSTAMP_DELIVERY_MONO
+ * 	*tstamp_type*.
+ *
+ * 	A BPF_SKB_TSTAMP_UNSPEC *tstamp_type* can only be used
+ * 	with a zero *tstamp*.
+ *
+ * 	Only IPv4 and IPv6 skb->protocol are supported.
+ *
+ * 	This function is most useful when it needs to set a
+ * 	mono delivery time to __sk_buff->tstamp and then
+ * 	bpf_redirect_*() to the egress of an iface.  For example,
+ * 	changing the (rcv) timestamp in __sk_buff->tstamp at
+ * 	ingress to a mono delivery time and then bpf_redirect_*()
+ * 	to sch_fq@phy-dev.
+ *
+ * Returns
+ * 	0 on success.
+ * 	**-EINVAL** for invalid input
+ * 	**-EOPNOTSUPP** for unsupported protocol
+ */
+static long (*bpf_skb_set_tstamp)(struct __sk_buff *skb, __u64 tstamp, __u32 tstamp_type) = (void *) 192;
+
+/*
+ * bpf_ima_file_hash
+ *
+ * 	Returns a calculated IMA hash of the *file*.
+ * 	If the hash is larger than *size*, then only *size*
+ * 	bytes will be copied to *dst*
+ *
+ * Returns
+ * 	The **hash_algo** is returned on success,
+ * 	**-EOPNOTSUP** if the hash calculation failed or **-EINVAL** if
+ * 	invalid arguments are passed.
+ */
+static long (*bpf_ima_file_hash)(struct file *file, void *dst, __u32 size) = (void *) 193;
+
+/*
+ * bpf_kptr_xchg
+ *
+ * 	Exchange kptr at pointer *map_value* with *ptr*, and return the
+ * 	old value. *ptr* can be NULL, otherwise it must be a referenced
+ * 	pointer which will be released when this helper is called.
+ *
+ * Returns
+ * 	The old value of kptr (which can be NULL). The returned pointer
+ * 	if not NULL, is a reference which must be released using its
+ * 	corresponding release function, or moved into a BPF map before
+ * 	program exit.
+ */
+static void *(*bpf_kptr_xchg)(void *map_value, void *ptr) = (void *) 194;
+
+/*
+ * bpf_map_lookup_percpu_elem
+ *
+ * 	Perform a lookup in *percpu map* for an entry associated to
+ * 	*key* on *cpu*.
+ *
+ * Returns
+ * 	Map value associated to *key* on *cpu*, or **NULL** if no entry
+ * 	was found or *cpu* is invalid.
+ */
+static void *(*bpf_map_lookup_percpu_elem)(void *map, const void *key, __u32 cpu) = (void *) 195;
+
+/*
+ * bpf_skc_to_mptcp_sock
+ *
+ * 	Dynamically cast a *sk* pointer to a *mptcp_sock* pointer.
+ *
+ * Returns
+ * 	*sk* if casting is valid, or **NULL** otherwise.
+ */
+static struct mptcp_sock *(*bpf_skc_to_mptcp_sock)(void *sk) = (void *) 196;
+
+/*
+ * bpf_dynptr_from_mem
+ *
+ * 	Get a dynptr to local memory *data*.
+ *
+ * 	*data* must be a ptr to a map value.
+ * 	The maximum *size* supported is DYNPTR_MAX_SIZE.
+ * 	*flags* is currently unused.
+ *
+ * Returns
+ * 	0 on success, -E2BIG if the size exceeds DYNPTR_MAX_SIZE,
+ * 	-EINVAL if flags is not 0.
+ */
+static long (*bpf_dynptr_from_mem)(void *data, __u32 size, __u64 flags, struct bpf_dynptr *ptr) = (void *) 197;
+
+/*
+ * bpf_ringbuf_reserve_dynptr
+ *
+ * 	Reserve *size* bytes of payload in a ring buffer *ringbuf*
+ * 	through the dynptr interface. *flags* must be 0.
+ *
+ * 	Please note that a corresponding bpf_ringbuf_submit_dynptr or
+ * 	bpf_ringbuf_discard_dynptr must be called on *ptr*, even if the
+ * 	reservation fails. This is enforced by the verifier.
+ *
+ * Returns
+ * 	0 on success, or a negative error in case of failure.
+ */
+static long (*bpf_ringbuf_reserve_dynptr)(void *ringbuf, __u32 size, __u64 flags, struct bpf_dynptr *ptr) = (void *) 198;
+
+/*
+ * bpf_ringbuf_submit_dynptr
+ *
+ * 	Submit reserved ring buffer sample, pointed to by *data*,
+ * 	through the dynptr interface. This is a no-op if the dynptr is
+ * 	invalid/null.
+ *
+ * 	For more information on *flags*, please see
+ * 	'bpf_ringbuf_submit'.
+ *
+ * Returns
+ * 	Nothing. Always succeeds.
+ */
+static void (*bpf_ringbuf_submit_dynptr)(struct bpf_dynptr *ptr, __u64 flags) = (void *) 199;
+
+/*
+ * bpf_ringbuf_discard_dynptr
+ *
+ * 	Discard reserved ring buffer sample through the dynptr
+ * 	interface. This is a no-op if the dynptr is invalid/null.
+ *
+ * 	For more information on *flags*, please see
+ * 	'bpf_ringbuf_discard'.
+ *
+ * Returns
+ * 	Nothing. Always succeeds.
+ */
+static void (*bpf_ringbuf_discard_dynptr)(struct bpf_dynptr *ptr, __u64 flags) = (void *) 200;
+
+/*
+ * bpf_dynptr_read
+ *
+ * 	Read *len* bytes from *src* into *dst*, starting from *offset*
+ * 	into *src*.
+ * 	*flags* is currently unused.
+ *
+ * Returns
+ * 	0 on success, -E2BIG if *offset* + *len* exceeds the length
+ * 	of *src*'s data, -EINVAL if *src* is an invalid dynptr or if
+ * 	*flags* is not 0.
+ */
+static long (*bpf_dynptr_read)(void *dst, __u32 len, const struct bpf_dynptr *src, __u32 offset, __u64 flags) = (void *) 201;
+
+/*
+ * bpf_dynptr_write
+ *
+ * 	Write *len* bytes from *src* into *dst*, starting from *offset*
+ * 	into *dst*.
+ *
+ * 	*flags* must be 0 except for skb-type dynptrs.
+ *
+ * 	For skb-type dynptrs:
+ * 	    *  All data slices of the dynptr are automatically
+ * 	       invalidated after **bpf_dynptr_write**\ (). This is
+ * 	       because writing may pull the skb and change the
+ * 	       underlying packet buffer.
+ *
+ * 	    *  For *flags*, please see the flags accepted by
+ * 	       **bpf_skb_store_bytes**\ ().
+ *
+ * Returns
+ * 	0 on success, -E2BIG if *offset* + *len* exceeds the length
+ * 	of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst*
+ * 	is a read-only dynptr or if *flags* is not correct. For skb-type dynptrs,
+ * 	other errors correspond to errors returned by **bpf_skb_store_bytes**\ ().
+ */
+static long (*bpf_dynptr_write)(const struct bpf_dynptr *dst, __u32 offset, void *src, __u32 len, __u64 flags) = (void *) 202;
+
+/*
+ * bpf_dynptr_data
+ *
+ * 	Get a pointer to the underlying dynptr data.
+ *
+ * 	*len* must be a statically known value. The returned data slice
+ * 	is invalidated whenever the dynptr is invalidated.
+ *
+ * 	skb and xdp type dynptrs may not use bpf_dynptr_data. They should
+ * 	instead use bpf_dynptr_slice and bpf_dynptr_slice_rdwr.
+ *
+ * Returns
+ * 	Pointer to the underlying dynptr data, NULL if the dynptr is
+ * 	read-only, if the dynptr is invalid, or if the offset and length
+ * 	is out of bounds.
+ */
+static void *(*bpf_dynptr_data)(const struct bpf_dynptr *ptr, __u32 offset, __u32 len) = (void *) 203;
+
+/*
+ * bpf_tcp_raw_gen_syncookie_ipv4
+ *
+ * 	Try to issue a SYN cookie for the packet with corresponding
+ * 	IPv4/TCP headers, *iph* and *th*, without depending on a
+ * 	listening socket.
+ *
+ * 	*iph* points to the IPv4 header.
+ *
+ * 	*th* points to the start of the TCP header, while *th_len*
+ * 	contains the length of the TCP header (at least
+ * 	**sizeof**\ (**struct tcphdr**)).
+ *
+ * Returns
+ * 	On success, lower 32 bits hold the generated SYN cookie in
+ * 	followed by 16 bits which hold the MSS value for that cookie,
+ * 	and the top 16 bits are unused.
+ *
+ * 	On failure, the returned value is one of the following:
+ *
+ * 	**-EINVAL** if *th_len* is invalid.
+ */
+static __s64 (*bpf_tcp_raw_gen_syncookie_ipv4)(struct iphdr *iph, struct tcphdr *th, __u32 th_len) = (void *) 204;
+
+/*
+ * bpf_tcp_raw_gen_syncookie_ipv6
+ *
+ * 	Try to issue a SYN cookie for the packet with corresponding
+ * 	IPv6/TCP headers, *iph* and *th*, without depending on a
+ * 	listening socket.
+ *
+ * 	*iph* points to the IPv6 header.
+ *
+ * 	*th* points to the start of the TCP header, while *th_len*
+ * 	contains the length of the TCP header (at least
+ * 	**sizeof**\ (**struct tcphdr**)).
+ *
+ * Returns
+ * 	On success, lower 32 bits hold the generated SYN cookie in
+ * 	followed by 16 bits which hold the MSS value for that cookie,
+ * 	and the top 16 bits are unused.
+ *
+ * 	On failure, the returned value is one of the following:
+ *
+ * 	**-EINVAL** if *th_len* is invalid.
+ *
+ * 	**-EPROTONOSUPPORT** if CONFIG_IPV6 is not builtin.
+ */
+static __s64 (*bpf_tcp_raw_gen_syncookie_ipv6)(struct ipv6hdr *iph, struct tcphdr *th, __u32 th_len) = (void *) 205;
+
+/*
+ * bpf_tcp_raw_check_syncookie_ipv4
+ *
+ * 	Check whether *iph* and *th* contain a valid SYN cookie ACK
+ * 	without depending on a listening socket.
+ *
+ * 	*iph* points to the IPv4 header.
+ *
+ * 	*th* points to the TCP header.
+ *
+ * Returns
+ * 	0 if *iph* and *th* are a valid SYN cookie ACK.
+ *
+ * 	On failure, the returned value is one of the following:
+ *
+ * 	**-EACCES** if the SYN cookie is not valid.
+ */
+static long (*bpf_tcp_raw_check_syncookie_ipv4)(struct iphdr *iph, struct tcphdr *th) = (void *) 206;
+
+/*
+ * bpf_tcp_raw_check_syncookie_ipv6
+ *
+ * 	Check whether *iph* and *th* contain a valid SYN cookie ACK
+ * 	without depending on a listening socket.
+ *
+ * 	*iph* points to the IPv6 header.
+ *
+ * 	*th* points to the TCP header.
+ *
+ * Returns
+ * 	0 if *iph* and *th* are a valid SYN cookie ACK.
+ *
+ * 	On failure, the returned value is one of the following:
+ *
+ * 	**-EACCES** if the SYN cookie is not valid.
+ *
+ * 	**-EPROTONOSUPPORT** if CONFIG_IPV6 is not builtin.
+ */
+static long (*bpf_tcp_raw_check_syncookie_ipv6)(struct ipv6hdr *iph, struct tcphdr *th) = (void *) 207;
+
+/*
+ * bpf_ktime_get_tai_ns
+ *
+ * 	A nonsettable system-wide clock derived from wall-clock time but
+ * 	ignoring leap seconds.  This clock does not experience
+ * 	discontinuities and backwards jumps caused by NTP inserting leap
+ * 	seconds as CLOCK_REALTIME does.
+ *
+ * 	See: **clock_gettime**\ (**CLOCK_TAI**)
+ *
+ * Returns
+ * 	Current *ktime*.
+ */
+static __u64 (*bpf_ktime_get_tai_ns)(void) = (void *) 208;
+
+/*
+ * bpf_user_ringbuf_drain
+ *
+ * 	Drain samples from the specified user ring buffer, and invoke
+ * 	the provided callback for each such sample:
+ *
+ * 	long (\*callback_fn)(const struct bpf_dynptr \*dynptr, void \*ctx);
+ *
+ * 	If **callback_fn** returns 0, the helper will continue to try
+ * 	and drain the next sample, up to a maximum of
+ * 	BPF_MAX_USER_RINGBUF_SAMPLES samples. If the return value is 1,
+ * 	the helper will skip the rest of the samples and return. Other
+ * 	return values are not used now, and will be rejected by the
+ * 	verifier.
+ *
+ * Returns
+ * 	The number of drained samples if no error was encountered while
+ * 	draining samples, or 0 if no samples were present in the ring
+ * 	buffer. If a user-space producer was epoll-waiting on this map,
+ * 	and at least one sample was drained, they will receive an event
+ * 	notification notifying them of available space in the ring
+ * 	buffer. If the BPF_RB_NO_WAKEUP flag is passed to this
+ * 	function, no wakeup notification will be sent. If the
+ * 	BPF_RB_FORCE_WAKEUP flag is passed, a wakeup notification will
+ * 	be sent even if no sample was drained.
+ *
+ * 	On failure, the returned value is one of the following:
+ *
+ * 	**-EBUSY** if the ring buffer is contended, and another calling
+ * 	context was concurrently draining the ring buffer.
+ *
+ * 	**-EINVAL** if user-space is not properly tracking the ring
+ * 	buffer due to the producer position not being aligned to 8
+ * 	bytes, a sample not being aligned to 8 bytes, or the producer
+ * 	position not matching the advertised length of a sample.
+ *
+ * 	**-E2BIG** if user-space has tried to publish a sample which is
+ * 	larger than the size of the ring buffer, or which cannot fit
+ * 	within a struct bpf_dynptr.
+ */
+static long (*bpf_user_ringbuf_drain)(void *map, void *callback_fn, void *ctx, __u64 flags) = (void *) 209;
+
+/*
+ * bpf_cgrp_storage_get
+ *
+ * 	Get a bpf_local_storage from the *cgroup*.
+ *
+ * 	Logically, it could be thought of as getting the value from
+ * 	a *map* with *cgroup* as the **key**.  From this
+ * 	perspective,  the usage is not much different from
+ * 	**bpf_map_lookup_elem**\ (*map*, **&**\ *cgroup*) except this
+ * 	helper enforces the key must be a cgroup struct and the map must also
+ * 	be a **BPF_MAP_TYPE_CGRP_STORAGE**.
+ *
+ * 	In reality, the local-storage value is embedded directly inside of the
+ * 	*cgroup* object itself, rather than being located in the
+ * 	**BPF_MAP_TYPE_CGRP_STORAGE** map. When the local-storage value is
+ * 	queried for some *map* on a *cgroup* object, the kernel will perform an
+ * 	O(n) iteration over all of the live local-storage values for that
+ * 	*cgroup* object until the local-storage value for the *map* is found.
+ *
+ * 	An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be
+ * 	used such that a new bpf_local_storage will be
+ * 	created if one does not exist.  *value* can be used
+ * 	together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify
+ * 	the initial value of a bpf_local_storage.  If *value* is
+ * 	**NULL**, the new bpf_local_storage will be zero initialized.
+ *
+ * Returns
+ * 	A bpf_local_storage pointer is returned on success.
+ *
+ * 	**NULL** if not found or there was an error in adding
+ * 	a new bpf_local_storage.
+ */
+static void *(*bpf_cgrp_storage_get)(void *map, struct cgroup *cgroup, void *value, __u64 flags) = (void *) 210;
+
+/*
+ * bpf_cgrp_storage_delete
+ *
+ * 	Delete a bpf_local_storage from a *cgroup*.
+ *
+ * Returns
+ * 	0 on success.
+ *
+ * 	**-ENOENT** if the bpf_local_storage cannot be found.
+ */
+static long (*bpf_cgrp_storage_delete)(void *map, struct cgroup *cgroup) = (void *) 211;
+

--- a/src/bpf_helpers.h
+++ b/src/bpf_helpers.h
@@ -22,12 +22,25 @@
 * To allow use of SEC() with externs (e.g., for extern .maps declarations),
 * make sure __attribute__((unused)) doesn't trigger compilation warning.
 */
+#if __GNUC__ && !__clang__
+
+/*
+ * Pragma macros are broken on GCC
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=55578
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90400
+ */
+#define SEC(name) __attribute__((section(name), used))
+
+#else
+
 #define SEC(name) \
 	_Pragma("GCC diagnostic push")					    \
 	_Pragma("GCC diagnostic ignored \"-Wignored-attributes\"")	    \
 	__attribute__((section(name), used))				    \
 	_Pragma("GCC diagnostic pop")					    \

+#endif
+
 /* Avoid 'linux/stddef.h' definition of '__always_inline'. */
 #undef __always_inline
 #define __always_inline inline __attribute__((always_inline))
@@ -64,15 +77,44 @@
 /*
 * Helper macros to manipulate data structures
 */
-#ifndef offsetof
-#define offsetof(TYPE, MEMBER)	((unsigned long)&((TYPE *)0)->MEMBER)
-#endif
-#ifndef container_of
+
+/* offsetof() definition that uses __builtin_offset() might not preserve field
+ * offset CO-RE relocation properly, so force-redefine offsetof() using
+ * old-school approach which works with CO-RE correctly
+ */
+#undef offsetof
+#define offsetof(type, member)	((unsigned long)&((type *)0)->member)
+
+/* redefined container_of() to ensure we use the above offsetof() macro */
+#undef container_of
 #define container_of(ptr, type, member)				\
 	({							\
 		void *__mptr = (void *)(ptr);			\
 		((type *)(__mptr - offsetof(type, member)));	\
 	})
+
+/*
+ * Compiler (optimization) barrier.
+ */
+#ifndef barrier
+#define barrier() asm volatile("" ::: "memory")
+#endif
+
+/* Variable-specific compiler (optimization) barrier. It's a no-op which makes
+ * compiler believe that there is some black box modification of a given
+ * variable and thus prevents compiler from making extra assumption about its
+ * value and potential simplifications and optimizations on this variable.
+ *
+ * E.g., compiler might often delay or even omit 32-bit to 64-bit casting of
+ * a variable, making some code patterns unverifiable. Putting barrier_var()
+ * in place will ensure that cast is performed before the barrier_var()
+ * invocation, because compiler has to pessimistically assume that embedded
+ * asm section might perform some extra operations on that variable.
+ *
+ * This is a variable-specific variant of more global barrier().
+ */
+#ifndef barrier_var
+#define barrier_var(var) asm volatile("" : "+r"(var))
 #endif

 /*
@@ -123,18 +165,6 @@ bpf_tail_call_static(void *ctx, const void *map, const __u32 slot)
 }
 #endif

-/*
- * Helper structure used by eBPF C program
- * to describe BPF map attributes to libbpf loader
- */
-struct bpf_map_def {
-	unsigned int type;
-	unsigned int key_size;
-	unsigned int value_size;
-	unsigned int max_entries;
-	unsigned int map_flags;
-} __attribute__((deprecated("use BTF-defined maps in .maps section")));
-
 enum libbpf_pin_type {
 	LIBBPF_PIN_NONE,
 	/* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */
@@ -149,6 +179,13 @@ enum libbpf_tristate {

 #define __kconfig __attribute__((section(".kconfig")))
 #define __ksym __attribute__((section(".ksyms")))
+#define __kptr_untrusted __attribute__((btf_type_tag("kptr_untrusted")))
+#define __kptr __attribute__((btf_type_tag("kptr")))
+
+#define bpf_ksym_exists(sym) ({									\
+	_Static_assert(!__builtin_constant_p(!!sym), #sym " should be marked as __weak");	\
+	!!sym;											\
+})

 #ifndef ___bpf_concat
 #define ___bpf_concat(a, b) a ## b
@@ -259,4 +296,107 @@ enum libbpf_tristate {
 /* Helper macro to print out debug messages */
 #define bpf_printk(fmt, args...) ___bpf_pick_printk(args)(fmt, ##args)

+struct bpf_iter_num;
+
+extern int bpf_iter_num_new(struct bpf_iter_num *it, int start, int end) __weak __ksym;
+extern int *bpf_iter_num_next(struct bpf_iter_num *it) __weak __ksym;
+extern void bpf_iter_num_destroy(struct bpf_iter_num *it) __weak __ksym;
+
+#ifndef bpf_for_each
+/* bpf_for_each(iter_type, cur_elem, args...) provides generic construct for
+ * using BPF open-coded iterators without having to write mundane explicit
+ * low-level loop logic. Instead, it provides for()-like generic construct
+ * that can be used pretty naturally. E.g., for some hypothetical cgroup
+ * iterator, you'd write:
+ *
+ * struct cgroup *cg, *parent_cg = <...>;
+ *
+ * bpf_for_each(cgroup, cg, parent_cg, CG_ITER_CHILDREN) {
+ *     bpf_printk("Child cgroup id = %d", cg->cgroup_id);
+ *     if (cg->cgroup_id == 123)
+ *         break;
+ * }
+ *
+ * I.e., it looks almost like high-level for each loop in other languages,
+ * supports continue/break, and is verifiable by BPF verifier.
+ *
+ * For iterating integers, the difference betwen bpf_for_each(num, i, N, M)
+ * and bpf_for(i, N, M) is in that bpf_for() provides additional proof to
+ * verifier that i is in [N, M) range, and in bpf_for_each() case i is `int
+ * *`, not just `int`. So for integers bpf_for() is more convenient.
+ *
+ * Note: this macro relies on C99 feature of allowing to declare variables
+ * inside for() loop, bound to for() loop lifetime. It also utilizes GCC
+ * extension: __attribute__((cleanup(<func>))), supported by both GCC and
+ * Clang.
+ */
+#define bpf_for_each(type, cur, args...) for (							\
+	/* initialize and define destructor */							\
+	struct bpf_iter_##type ___it __attribute__((aligned(8), /* enforce, just in case */,	\
+						    cleanup(bpf_iter_##type##_destroy))),	\
+	/* ___p pointer is just to call bpf_iter_##type##_new() *once* to init ___it */		\
+			       *___p __attribute__((unused)) = (				\
+					bpf_iter_##type##_new(&___it, ##args),			\
+	/* this is a workaround for Clang bug: it currently doesn't emit BTF */			\
+	/* for bpf_iter_##type##_destroy() when used from cleanup() attribute */		\
+					(void)bpf_iter_##type##_destroy, (void *)0);		\
+	/* iteration and termination check */							\
+	(((cur) = bpf_iter_##type##_next(&___it)));						\
+)
+#endif /* bpf_for_each */
+
+#ifndef bpf_for
+/* bpf_for(i, start, end) implements a for()-like looping construct that sets
+ * provided integer variable *i* to values starting from *start* through,
+ * but not including, *end*. It also proves to BPF verifier that *i* belongs
+ * to range [start, end), so this can be used for accessing arrays without
+ * extra checks.
+ *
+ * Note: *start* and *end* are assumed to be expressions with no side effects
+ * and whose values do not change throughout bpf_for() loop execution. They do
+ * not have to be statically known or constant, though.
+ *
+ * Note: similarly to bpf_for_each(), it relies on C99 feature of declaring for()
+ * loop bound variables and cleanup attribute, supported by GCC and Clang.
+ */
+#define bpf_for(i, start, end) for (								\
+	/* initialize and define destructor */							\
+	struct bpf_iter_num ___it __attribute__((aligned(8), /* enforce, just in case */	\
+						 cleanup(bpf_iter_num_destroy))),		\
+	/* ___p pointer is necessary to call bpf_iter_num_new() *once* to init ___it */		\
+			    *___p __attribute__((unused)) = (					\
+				bpf_iter_num_new(&___it, (start), (end)),			\
+	/* this is a workaround for Clang bug: it currently doesn't emit BTF */			\
+	/* for bpf_iter_num_destroy() when used from cleanup() attribute */			\
+				(void)bpf_iter_num_destroy, (void *)0);				\
+	({											\
+		/* iteration step */								\
+		int *___t = bpf_iter_num_next(&___it);						\
+		/* termination and bounds check */						\
+		(___t && ((i) = *___t, (i) >= (start) && (i) < (end)));				\
+	});											\
+)
+#endif /* bpf_for */
+
+#ifndef bpf_repeat
+/* bpf_repeat(N) performs N iterations without exposing iteration number
+ *
+ * Note: similarly to bpf_for_each(), it relies on C99 feature of declaring for()
+ * loop bound variables and cleanup attribute, supported by GCC and Clang.
+ */
+#define bpf_repeat(N) for (									\
+	/* initialize and define destructor */							\
+	struct bpf_iter_num ___it __attribute__((aligned(8), /* enforce, just in case */	\
+						 cleanup(bpf_iter_num_destroy))),		\
+	/* ___p pointer is necessary to call bpf_iter_num_new() *once* to init ___it */		\
+			    *___p __attribute__((unused)) = (					\
+				bpf_iter_num_new(&___it, 0, (N)),				\
+	/* this is a workaround for Clang bug: it currently doesn't emit BTF */			\
+	/* for bpf_iter_num_destroy() when used from cleanup() attribute */			\
+				(void)bpf_iter_num_destroy, (void *)0);				\
+	bpf_iter_num_next(&___it);								\
+	/* nothing here  */									\
+)
+#endif /* bpf_repeat */
+
 #endif
--- a/src/bpf_tracing.h
+++ b/src/bpf_tracing.h
@@ -2,6 +2,8 @@
 #ifndef __BPF_TRACING_H__
 #define __BPF_TRACING_H__

+#include <bpf/bpf_helpers.h>
+
 /* Scan the ARCH passed in from ARCH env variable (see Makefile) */
 #if defined(__TARGET_ARCH_x86)
 	#define bpf_target_x86
@@ -27,6 +29,12 @@
 #elif defined(__TARGET_ARCH_riscv)
 	#define bpf_target_riscv
 	#define bpf_target_defined
+#elif defined(__TARGET_ARCH_arc)
+	#define bpf_target_arc
+	#define bpf_target_defined
+#elif defined(__TARGET_ARCH_loongarch)
+	#define bpf_target_loongarch
+	#define bpf_target_defined
 #else

 /* Fall back to what the compiler says */
@@ -54,6 +62,12 @@
 #elif defined(__riscv) && __riscv_xlen == 64
 	#define bpf_target_riscv
 	#define bpf_target_defined
+#elif defined(__arc__)
+	#define bpf_target_arc
+	#define bpf_target_defined
+#elif defined(__loongarch__)
+	#define bpf_target_loongarch
+	#define bpf_target_defined
 #endif /* no compiler target */

 #endif
@@ -64,6 +78,10 @@

 #if defined(bpf_target_x86)

+/*
+ * https://en.wikipedia.org/wiki/X86_calling_conventions#System_V_AMD64_ABI
+ */
+
 #if defined(__KERNEL__) || defined(__VMLINUX_H__)

 #define __PT_PARM1_REG di
@@ -71,25 +89,40 @@
 #define __PT_PARM3_REG dx
 #define __PT_PARM4_REG cx
 #define __PT_PARM5_REG r8
+#define __PT_PARM6_REG r9
+/*
+ * Syscall uses r10 for PARM4. See arch/x86/entry/entry_64.S:entry_SYSCALL_64
+ * comments in Linux sources. And refer to syscall(2) manpage.
+ */
+#define __PT_PARM1_SYSCALL_REG __PT_PARM1_REG
+#define __PT_PARM2_SYSCALL_REG __PT_PARM2_REG
+#define __PT_PARM3_SYSCALL_REG __PT_PARM3_REG
+#define __PT_PARM4_SYSCALL_REG r10
+#define __PT_PARM5_SYSCALL_REG __PT_PARM5_REG
+#define __PT_PARM6_SYSCALL_REG __PT_PARM6_REG
+
 #define __PT_RET_REG sp
 #define __PT_FP_REG bp
 #define __PT_RC_REG ax
 #define __PT_SP_REG sp
 #define __PT_IP_REG ip
-/* syscall uses r10 for PARM4 */
-#define PT_REGS_PARM4_SYSCALL(x) ((x)->r10)
-#define PT_REGS_PARM4_CORE_SYSCALL(x) BPF_CORE_READ(x, r10)

 #else

 #ifdef __i386__

+/* i386 kernel is built with -mregparm=3 */
 #define __PT_PARM1_REG eax
 #define __PT_PARM2_REG edx
 #define __PT_PARM3_REG ecx
-/* i386 kernel is built with -mregparm=3 */
-#define __PT_PARM4_REG __unsupported__
-#define __PT_PARM5_REG __unsupported__
+/* i386 syscall ABI is very different, refer to syscall(2) manpage */
+#define __PT_PARM1_SYSCALL_REG ebx
+#define __PT_PARM2_SYSCALL_REG ecx
+#define __PT_PARM3_SYSCALL_REG edx
+#define __PT_PARM4_SYSCALL_REG esi
+#define __PT_PARM5_SYSCALL_REG edi
+#define __PT_PARM6_SYSCALL_REG ebp
+
 #define __PT_RET_REG esp
 #define __PT_FP_REG ebp
 #define __PT_RC_REG eax
@@ -103,14 +136,20 @@
 #define __PT_PARM3_REG rdx
 #define __PT_PARM4_REG rcx
 #define __PT_PARM5_REG r8
+#define __PT_PARM6_REG r9
+
+#define __PT_PARM1_SYSCALL_REG __PT_PARM1_REG
+#define __PT_PARM2_SYSCALL_REG __PT_PARM2_REG
+#define __PT_PARM3_SYSCALL_REG __PT_PARM3_REG
+#define __PT_PARM4_SYSCALL_REG r10
+#define __PT_PARM5_SYSCALL_REG __PT_PARM5_REG
+#define __PT_PARM6_SYSCALL_REG __PT_PARM6_REG
+
 #define __PT_RET_REG rsp
 #define __PT_FP_REG rbp
 #define __PT_RC_REG rax
 #define __PT_SP_REG rsp
 #define __PT_IP_REG rip
-/* syscall uses r10 for PARM4 */
-#define PT_REGS_PARM4_SYSCALL(x) ((x)->r10)
-#define PT_REGS_PARM4_CORE_SYSCALL(x) BPF_CORE_READ(x, r10)

 #endif /* __i386__ */

@@ -118,6 +157,10 @@

 #elif defined(bpf_target_s390)

+/*
+ * https://github.com/IBM/s390x-abi/releases/download/v1.6/lzsabi_s390x.pdf
+ */
+
 struct pt_regs___s390 {
 	unsigned long orig_gpr2;
 };
@@ -129,21 +172,42 @@ struct pt_regs___s390 {
 #define __PT_PARM3_REG gprs[4]
 #define __PT_PARM4_REG gprs[5]
 #define __PT_PARM5_REG gprs[6]
-#define __PT_RET_REG grps[14]
+
+#define __PT_PARM1_SYSCALL_REG orig_gpr2
+#define __PT_PARM2_SYSCALL_REG __PT_PARM2_REG
+#define __PT_PARM3_SYSCALL_REG __PT_PARM3_REG
+#define __PT_PARM4_SYSCALL_REG __PT_PARM4_REG
+#define __PT_PARM5_SYSCALL_REG __PT_PARM5_REG
+#define __PT_PARM6_SYSCALL_REG gprs[7]
+#define PT_REGS_PARM1_SYSCALL(x) PT_REGS_PARM1_CORE_SYSCALL(x)
+#define PT_REGS_PARM1_CORE_SYSCALL(x) \
+	BPF_CORE_READ((const struct pt_regs___s390 *)(x), __PT_PARM1_SYSCALL_REG)
+
+#define __PT_RET_REG gprs[14]
 #define __PT_FP_REG gprs[11]	/* Works only with CONFIG_FRAME_POINTER */
 #define __PT_RC_REG gprs[2]
 #define __PT_SP_REG gprs[15]
 #define __PT_IP_REG psw.addr
-#define PT_REGS_PARM1_SYSCALL(x) ({ _Pragma("GCC error \"use PT_REGS_PARM1_CORE_SYSCALL() instead\""); 0l; })
-#define PT_REGS_PARM1_CORE_SYSCALL(x) BPF_CORE_READ((const struct pt_regs___s390 *)(x), orig_gpr2)

 #elif defined(bpf_target_arm)

+/*
+ * https://github.com/ARM-software/abi-aa/blob/main/aapcs32/aapcs32.rst#machine-registers
+ */
+
 #define __PT_PARM1_REG uregs[0]
 #define __PT_PARM2_REG uregs[1]
 #define __PT_PARM3_REG uregs[2]
 #define __PT_PARM4_REG uregs[3]
-#define __PT_PARM5_REG uregs[4]
+
+#define __PT_PARM1_SYSCALL_REG __PT_PARM1_REG
+#define __PT_PARM2_SYSCALL_REG __PT_PARM2_REG
+#define __PT_PARM3_SYSCALL_REG __PT_PARM3_REG
+#define __PT_PARM4_SYSCALL_REG __PT_PARM4_REG
+#define __PT_PARM5_SYSCALL_REG uregs[4]
+#define __PT_PARM6_SYSCALL_REG uregs[5]
+#define __PT_PARM7_SYSCALL_REG uregs[6]
+
 #define __PT_RET_REG uregs[14]
 #define __PT_FP_REG uregs[11]	/* Works only with CONFIG_FRAME_POINTER */
 #define __PT_RC_REG uregs[0]
@@ -152,6 +216,10 @@ struct pt_regs___s390 {

 #elif defined(bpf_target_arm64)

+/*
+ * https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst#machine-registers
+ */
+
 struct pt_regs___arm64 {
 	unsigned long orig_x0;
 };
@@ -163,21 +231,49 @@ struct pt_regs___arm64 {
 #define __PT_PARM3_REG regs[2]
 #define __PT_PARM4_REG regs[3]
 #define __PT_PARM5_REG regs[4]
+#define __PT_PARM6_REG regs[5]
+#define __PT_PARM7_REG regs[6]
+#define __PT_PARM8_REG regs[7]
+
+#define __PT_PARM1_SYSCALL_REG orig_x0
+#define __PT_PARM2_SYSCALL_REG __PT_PARM2_REG
+#define __PT_PARM3_SYSCALL_REG __PT_PARM3_REG
+#define __PT_PARM4_SYSCALL_REG __PT_PARM4_REG
+#define __PT_PARM5_SYSCALL_REG __PT_PARM5_REG
+#define __PT_PARM6_SYSCALL_REG __PT_PARM6_REG
+#define PT_REGS_PARM1_SYSCALL(x) PT_REGS_PARM1_CORE_SYSCALL(x)
+#define PT_REGS_PARM1_CORE_SYSCALL(x) \
+	BPF_CORE_READ((const struct pt_regs___arm64 *)(x), __PT_PARM1_SYSCALL_REG)
+
 #define __PT_RET_REG regs[30]
 #define __PT_FP_REG regs[29]	/* Works only with CONFIG_FRAME_POINTER */
 #define __PT_RC_REG regs[0]
 #define __PT_SP_REG sp
 #define __PT_IP_REG pc
-#define PT_REGS_PARM1_SYSCALL(x) ({ _Pragma("GCC error \"use PT_REGS_PARM1_CORE_SYSCALL() instead\""); 0l; })
-#define PT_REGS_PARM1_CORE_SYSCALL(x) BPF_CORE_READ((const struct pt_regs___arm64 *)(x), orig_x0)

 #elif defined(bpf_target_mips)

+/*
+ * N64 ABI is assumed right now.
+ * https://en.wikipedia.org/wiki/MIPS_architecture#Calling_conventions
+ */
+
 #define __PT_PARM1_REG regs[4]
 #define __PT_PARM2_REG regs[5]
 #define __PT_PARM3_REG regs[6]
 #define __PT_PARM4_REG regs[7]
 #define __PT_PARM5_REG regs[8]
+#define __PT_PARM6_REG regs[9]
+#define __PT_PARM7_REG regs[10]
+#define __PT_PARM8_REG regs[11]
+
+#define __PT_PARM1_SYSCALL_REG __PT_PARM1_REG
+#define __PT_PARM2_SYSCALL_REG __PT_PARM2_REG
+#define __PT_PARM3_SYSCALL_REG __PT_PARM3_REG
+#define __PT_PARM4_SYSCALL_REG __PT_PARM4_REG
+#define __PT_PARM5_SYSCALL_REG __PT_PARM5_REG /* only N32/N64 */
+#define __PT_PARM6_SYSCALL_REG __PT_PARM6_REG /* only N32/N64 */
+
 #define __PT_RET_REG regs[31]
 #define __PT_FP_REG regs[30]	/* Works only with CONFIG_FRAME_POINTER */
 #define __PT_RC_REG regs[2]
@@ -186,26 +282,58 @@ struct pt_regs___arm64 {

 #elif defined(bpf_target_powerpc)

+/*
+ * http://refspecs.linux-foundation.org/elf/elfspec_ppc.pdf (page 3-14,
+ * section "Function Calling Sequence")
+ */
+
 #define __PT_PARM1_REG gpr[3]
 #define __PT_PARM2_REG gpr[4]
 #define __PT_PARM3_REG gpr[5]
 #define __PT_PARM4_REG gpr[6]
 #define __PT_PARM5_REG gpr[7]
+#define __PT_PARM6_REG gpr[8]
+#define __PT_PARM7_REG gpr[9]
+#define __PT_PARM8_REG gpr[10]
+
+/* powerpc does not select ARCH_HAS_SYSCALL_WRAPPER. */
+#define PT_REGS_SYSCALL_REGS(ctx) ctx
+#define __PT_PARM1_SYSCALL_REG orig_gpr3
+#define __PT_PARM2_SYSCALL_REG __PT_PARM2_REG
+#define __PT_PARM3_SYSCALL_REG __PT_PARM3_REG
+#define __PT_PARM4_SYSCALL_REG __PT_PARM4_REG
+#define __PT_PARM5_SYSCALL_REG __PT_PARM5_REG
+#define __PT_PARM6_SYSCALL_REG __PT_PARM6_REG
+#if !defined(__arch64__)
+#define __PT_PARM7_SYSCALL_REG __PT_PARM7_REG /* only powerpc (not powerpc64) */
+#endif
+
 #define __PT_RET_REG regs[31]
 #define __PT_FP_REG __unsupported__
 #define __PT_RC_REG gpr[3]
 #define __PT_SP_REG sp
 #define __PT_IP_REG nip
-/* powerpc does not select ARCH_HAS_SYSCALL_WRAPPER. */
-#define PT_REGS_SYSCALL_REGS(ctx) ctx

 #elif defined(bpf_target_sparc)

+/*
+ * https://en.wikipedia.org/wiki/Calling_convention#SPARC
+ */
+
 #define __PT_PARM1_REG u_regs[UREG_I0]
 #define __PT_PARM2_REG u_regs[UREG_I1]
 #define __PT_PARM3_REG u_regs[UREG_I2]
 #define __PT_PARM4_REG u_regs[UREG_I3]
 #define __PT_PARM5_REG u_regs[UREG_I4]
+#define __PT_PARM6_REG u_regs[UREG_I5]
+
+#define __PT_PARM1_SYSCALL_REG __PT_PARM1_REG
+#define __PT_PARM2_SYSCALL_REG __PT_PARM2_REG
+#define __PT_PARM3_SYSCALL_REG __PT_PARM3_REG
+#define __PT_PARM4_SYSCALL_REG __PT_PARM4_REG
+#define __PT_PARM5_SYSCALL_REG __PT_PARM5_REG
+#define __PT_PARM6_SYSCALL_REG __PT_PARM6_REG
+
 #define __PT_RET_REG u_regs[UREG_I7]
 #define __PT_FP_REG __unsupported__
 #define __PT_RC_REG u_regs[UREG_I0]
@@ -219,19 +347,101 @@ struct pt_regs___arm64 {

 #elif defined(bpf_target_riscv)

+/*
+ * https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc#risc-v-calling-conventions
+ */
+
+/* riscv provides struct user_regs_struct instead of struct pt_regs to userspace */
 #define __PT_REGS_CAST(x) ((const struct user_regs_struct *)(x))
 #define __PT_PARM1_REG a0
 #define __PT_PARM2_REG a1
 #define __PT_PARM3_REG a2
 #define __PT_PARM4_REG a3
 #define __PT_PARM5_REG a4
-#define __PT_RET_REG ra
-#define __PT_FP_REG s0
-#define __PT_RC_REG a5
-#define __PT_SP_REG sp
-#define __PT_IP_REG pc
+#define __PT_PARM6_REG a5
+#define __PT_PARM7_REG a6
+#define __PT_PARM8_REG a7
+
 /* riscv does not select ARCH_HAS_SYSCALL_WRAPPER. */
 #define PT_REGS_SYSCALL_REGS(ctx) ctx
+#define __PT_PARM1_SYSCALL_REG __PT_PARM1_REG
+#define __PT_PARM2_SYSCALL_REG __PT_PARM2_REG
+#define __PT_PARM3_SYSCALL_REG __PT_PARM3_REG
+#define __PT_PARM4_SYSCALL_REG __PT_PARM4_REG
+#define __PT_PARM5_SYSCALL_REG __PT_PARM5_REG
+#define __PT_PARM6_SYSCALL_REG __PT_PARM6_REG
+
+#define __PT_RET_REG ra
+#define __PT_FP_REG s0
+#define __PT_RC_REG a0
+#define __PT_SP_REG sp
+#define __PT_IP_REG pc
+
+#elif defined(bpf_target_arc)
+
+/*
+ * Section "Function Calling Sequence" (page 24):
+ * https://raw.githubusercontent.com/wiki/foss-for-synopsys-dwc-arc-processors/toolchain/files/ARCv2_ABI.pdf
+ */
+
+/* arc provides struct user_regs_struct instead of struct pt_regs to userspace */
+#define __PT_REGS_CAST(x) ((const struct user_regs_struct *)(x))
+#define __PT_PARM1_REG scratch.r0
+#define __PT_PARM2_REG scratch.r1
+#define __PT_PARM3_REG scratch.r2
+#define __PT_PARM4_REG scratch.r3
+#define __PT_PARM5_REG scratch.r4
+#define __PT_PARM6_REG scratch.r5
+#define __PT_PARM7_REG scratch.r6
+#define __PT_PARM8_REG scratch.r7
+
+/* arc does not select ARCH_HAS_SYSCALL_WRAPPER. */
+#define PT_REGS_SYSCALL_REGS(ctx) ctx
+#define __PT_PARM1_SYSCALL_REG __PT_PARM1_REG
+#define __PT_PARM2_SYSCALL_REG __PT_PARM2_REG
+#define __PT_PARM3_SYSCALL_REG __PT_PARM3_REG
+#define __PT_PARM4_SYSCALL_REG __PT_PARM4_REG
+#define __PT_PARM5_SYSCALL_REG __PT_PARM5_REG
+#define __PT_PARM6_SYSCALL_REG __PT_PARM6_REG
+
+#define __PT_RET_REG scratch.blink
+#define __PT_FP_REG scratch.fp
+#define __PT_RC_REG scratch.r0
+#define __PT_SP_REG scratch.sp
+#define __PT_IP_REG scratch.ret
+
+#elif defined(bpf_target_loongarch)
+
+/*
+ * https://docs.kernel.org/loongarch/introduction.html
+ * https://loongson.github.io/LoongArch-Documentation/LoongArch-ELF-ABI-EN.html
+ */
+
+/* loongarch provides struct user_pt_regs instead of struct pt_regs to userspace */
+#define __PT_REGS_CAST(x) ((const struct user_pt_regs *)(x))
+#define __PT_PARM1_REG regs[4]
+#define __PT_PARM2_REG regs[5]
+#define __PT_PARM3_REG regs[6]
+#define __PT_PARM4_REG regs[7]
+#define __PT_PARM5_REG regs[8]
+#define __PT_PARM6_REG regs[9]
+#define __PT_PARM7_REG regs[10]
+#define __PT_PARM8_REG regs[11]
+
+/* loongarch does not select ARCH_HAS_SYSCALL_WRAPPER. */
+#define PT_REGS_SYSCALL_REGS(ctx) ctx
+#define __PT_PARM1_SYSCALL_REG __PT_PARM1_REG
+#define __PT_PARM2_SYSCALL_REG __PT_PARM2_REG
+#define __PT_PARM3_SYSCALL_REG __PT_PARM3_REG
+#define __PT_PARM4_SYSCALL_REG __PT_PARM4_REG
+#define __PT_PARM5_SYSCALL_REG __PT_PARM5_REG
+#define __PT_PARM6_SYSCALL_REG __PT_PARM6_REG
+
+#define __PT_RET_REG regs[1]
+#define __PT_FP_REG regs[22]
+#define __PT_RC_REG regs[4]
+#define __PT_SP_REG regs[3]
+#define __PT_IP_REG csr_era

 #endif

@@ -239,16 +449,49 @@ struct pt_regs___arm64 {

 struct pt_regs;

-/* allow some architecutres to override `struct pt_regs` */
+/* allow some architectures to override `struct pt_regs` */
 #ifndef __PT_REGS_CAST
 #define __PT_REGS_CAST(x) (x)
 #endif

+/*
+ * Different architectures support different number of arguments passed
+ * through registers. i386 supports just 3, some arches support up to 8.
+ */
+#ifndef __PT_PARM4_REG
+#define __PT_PARM4_REG __unsupported__
+#endif
+#ifndef __PT_PARM5_REG
+#define __PT_PARM5_REG __unsupported__
+#endif
+#ifndef __PT_PARM6_REG
+#define __PT_PARM6_REG __unsupported__
+#endif
+#ifndef __PT_PARM7_REG
+#define __PT_PARM7_REG __unsupported__
+#endif
+#ifndef __PT_PARM8_REG
+#define __PT_PARM8_REG __unsupported__
+#endif
+/*
+ * Similarly, syscall-specific conventions might differ between function call
+ * conventions within each architecutre. All supported architectures pass
+ * either 6 or 7 syscall arguments in registers.
+ *
+ * See syscall(2) manpage for succinct table with information on each arch.
+ */
+#ifndef __PT_PARM7_SYSCALL_REG
+#define __PT_PARM7_SYSCALL_REG __unsupported__
+#endif
+
 #define PT_REGS_PARM1(x) (__PT_REGS_CAST(x)->__PT_PARM1_REG)
 #define PT_REGS_PARM2(x) (__PT_REGS_CAST(x)->__PT_PARM2_REG)
 #define PT_REGS_PARM3(x) (__PT_REGS_CAST(x)->__PT_PARM3_REG)
 #define PT_REGS_PARM4(x) (__PT_REGS_CAST(x)->__PT_PARM4_REG)
 #define PT_REGS_PARM5(x) (__PT_REGS_CAST(x)->__PT_PARM5_REG)
+#define PT_REGS_PARM6(x) (__PT_REGS_CAST(x)->__PT_PARM6_REG)
+#define PT_REGS_PARM7(x) (__PT_REGS_CAST(x)->__PT_PARM7_REG)
+#define PT_REGS_PARM8(x) (__PT_REGS_CAST(x)->__PT_PARM8_REG)
 #define PT_REGS_RET(x) (__PT_REGS_CAST(x)->__PT_RET_REG)
 #define PT_REGS_FP(x) (__PT_REGS_CAST(x)->__PT_FP_REG)
 #define PT_REGS_RC(x) (__PT_REGS_CAST(x)->__PT_RC_REG)
@@ -260,6 +503,9 @@ struct pt_regs;
 #define PT_REGS_PARM3_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM3_REG)
 #define PT_REGS_PARM4_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM4_REG)
 #define PT_REGS_PARM5_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM5_REG)
+#define PT_REGS_PARM6_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM6_REG)
+#define PT_REGS_PARM7_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM7_REG)
+#define PT_REGS_PARM8_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM8_REG)
 #define PT_REGS_RET_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_RET_REG)
 #define PT_REGS_FP_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_FP_REG)
 #define PT_REGS_RC_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_RC_REG)
@@ -286,24 +532,33 @@ struct pt_regs;
 #endif

 #ifndef PT_REGS_PARM1_SYSCALL
-#define PT_REGS_PARM1_SYSCALL(x) PT_REGS_PARM1(x)
+#define PT_REGS_PARM1_SYSCALL(x) (__PT_REGS_CAST(x)->__PT_PARM1_SYSCALL_REG)
+#define PT_REGS_PARM1_CORE_SYSCALL(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM1_SYSCALL_REG)
+#endif
+#ifndef PT_REGS_PARM2_SYSCALL
+#define PT_REGS_PARM2_SYSCALL(x) (__PT_REGS_CAST(x)->__PT_PARM2_SYSCALL_REG)
+#define PT_REGS_PARM2_CORE_SYSCALL(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM2_SYSCALL_REG)
+#endif
+#ifndef PT_REGS_PARM3_SYSCALL
+#define PT_REGS_PARM3_SYSCALL(x) (__PT_REGS_CAST(x)->__PT_PARM3_SYSCALL_REG)
+#define PT_REGS_PARM3_CORE_SYSCALL(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM3_SYSCALL_REG)
 #endif
-#define PT_REGS_PARM2_SYSCALL(x) PT_REGS_PARM2(x)
-#define PT_REGS_PARM3_SYSCALL(x) PT_REGS_PARM3(x)
 #ifndef PT_REGS_PARM4_SYSCALL
-#define PT_REGS_PARM4_SYSCALL(x) PT_REGS_PARM4(x)
+#define PT_REGS_PARM4_SYSCALL(x) (__PT_REGS_CAST(x)->__PT_PARM4_SYSCALL_REG)
+#define PT_REGS_PARM4_CORE_SYSCALL(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM4_SYSCALL_REG)
 #endif
-#define PT_REGS_PARM5_SYSCALL(x) PT_REGS_PARM5(x)
-
-#ifndef PT_REGS_PARM1_CORE_SYSCALL
-#define PT_REGS_PARM1_CORE_SYSCALL(x) PT_REGS_PARM1_CORE(x)
+#ifndef PT_REGS_PARM5_SYSCALL
+#define PT_REGS_PARM5_SYSCALL(x) (__PT_REGS_CAST(x)->__PT_PARM5_SYSCALL_REG)
+#define PT_REGS_PARM5_CORE_SYSCALL(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM5_SYSCALL_REG)
 #endif
-#define PT_REGS_PARM2_CORE_SYSCALL(x) PT_REGS_PARM2_CORE(x)
-#define PT_REGS_PARM3_CORE_SYSCALL(x) PT_REGS_PARM3_CORE(x)
-#ifndef PT_REGS_PARM4_CORE_SYSCALL
-#define PT_REGS_PARM4_CORE_SYSCALL(x) PT_REGS_PARM4_CORE(x)
+#ifndef PT_REGS_PARM6_SYSCALL
+#define PT_REGS_PARM6_SYSCALL(x) (__PT_REGS_CAST(x)->__PT_PARM6_SYSCALL_REG)
+#define PT_REGS_PARM6_CORE_SYSCALL(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM6_SYSCALL_REG)
+#endif
+#ifndef PT_REGS_PARM7_SYSCALL
+#define PT_REGS_PARM7_SYSCALL(x) (__PT_REGS_CAST(x)->__PT_PARM7_SYSCALL_REG)
+#define PT_REGS_PARM7_CORE_SYSCALL(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM7_SYSCALL_REG)
 #endif
-#define PT_REGS_PARM5_CORE_SYSCALL(x) PT_REGS_PARM5_CORE(x)

 #else /* defined(bpf_target_defined) */

@@ -312,6 +567,9 @@ struct pt_regs;
 #define PT_REGS_PARM3(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
 #define PT_REGS_PARM4(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
 #define PT_REGS_PARM5(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM6(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM7(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM8(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
 #define PT_REGS_RET(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
 #define PT_REGS_FP(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
 #define PT_REGS_RC(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
@@ -323,6 +581,9 @@ struct pt_regs;
 #define PT_REGS_PARM3_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
 #define PT_REGS_PARM4_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
 #define PT_REGS_PARM5_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM6_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM7_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM8_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
 #define PT_REGS_RET_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
 #define PT_REGS_FP_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
 #define PT_REGS_RC_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
@@ -337,12 +598,16 @@ struct pt_regs;
 #define PT_REGS_PARM3_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
 #define PT_REGS_PARM4_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
 #define PT_REGS_PARM5_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM6_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM7_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })

 #define PT_REGS_PARM1_CORE_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
 #define PT_REGS_PARM2_CORE_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
 #define PT_REGS_PARM3_CORE_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
 #define PT_REGS_PARM4_CORE_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
 #define PT_REGS_PARM5_CORE_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM6_CORE_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM7_CORE_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })

 #endif /* defined(bpf_target_defined) */

@@ -401,7 +666,7 @@ struct pt_regs;
 */
 #define BPF_PROG(name, args...)						    \
 name(unsigned long long *ctx);						    \
-static __attribute__((always_inline)) typeof(name(0))			    \
+static __always_inline typeof(name(0))					    \
 ____##name(unsigned long long *ctx, ##args);				    \
 typeof(name(0)) name(unsigned long long *ctx)				    \
 {									    \
@@ -410,9 +675,116 @@ typeof(name(0)) name(unsigned long long *ctx)				    \
 	return ____##name(___bpf_ctx_cast(args));			    \
 	_Pragma("GCC diagnostic pop")					    \
 }									    \
-static __attribute__((always_inline)) typeof(name(0))			    \
+static __always_inline typeof(name(0))					    \
 ____##name(unsigned long long *ctx, ##args)

+#ifndef ___bpf_nth2
+#define ___bpf_nth2(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13,	\
+		    _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, N, ...) N
+#endif
+#ifndef ___bpf_narg2
+#define ___bpf_narg2(...)	\
+	___bpf_nth2(_, ##__VA_ARGS__, 12, 12, 11, 11, 10, 10, 9, 9, 8, 8, 7, 7,	\
+		    6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0)
+#endif
+
+#define ___bpf_treg_cnt(t) \
+	__builtin_choose_expr(sizeof(t) == 1, 1,	\
+	__builtin_choose_expr(sizeof(t) == 2, 1,	\
+	__builtin_choose_expr(sizeof(t) == 4, 1,	\
+	__builtin_choose_expr(sizeof(t) == 8, 1,	\
+	__builtin_choose_expr(sizeof(t) == 16, 2,	\
+			      (void)0)))))
+
+#define ___bpf_reg_cnt0()		(0)
+#define ___bpf_reg_cnt1(t, x)		(___bpf_reg_cnt0() + ___bpf_treg_cnt(t))
+#define ___bpf_reg_cnt2(t, x, args...)	(___bpf_reg_cnt1(args) + ___bpf_treg_cnt(t))
+#define ___bpf_reg_cnt3(t, x, args...)	(___bpf_reg_cnt2(args) + ___bpf_treg_cnt(t))
+#define ___bpf_reg_cnt4(t, x, args...)	(___bpf_reg_cnt3(args) + ___bpf_treg_cnt(t))
+#define ___bpf_reg_cnt5(t, x, args...)	(___bpf_reg_cnt4(args) + ___bpf_treg_cnt(t))
+#define ___bpf_reg_cnt6(t, x, args...)	(___bpf_reg_cnt5(args) + ___bpf_treg_cnt(t))
+#define ___bpf_reg_cnt7(t, x, args...)	(___bpf_reg_cnt6(args) + ___bpf_treg_cnt(t))
+#define ___bpf_reg_cnt8(t, x, args...)	(___bpf_reg_cnt7(args) + ___bpf_treg_cnt(t))
+#define ___bpf_reg_cnt9(t, x, args...)	(___bpf_reg_cnt8(args) + ___bpf_treg_cnt(t))
+#define ___bpf_reg_cnt10(t, x, args...)	(___bpf_reg_cnt9(args) + ___bpf_treg_cnt(t))
+#define ___bpf_reg_cnt11(t, x, args...)	(___bpf_reg_cnt10(args) + ___bpf_treg_cnt(t))
+#define ___bpf_reg_cnt12(t, x, args...)	(___bpf_reg_cnt11(args) + ___bpf_treg_cnt(t))
+#define ___bpf_reg_cnt(args...)	 ___bpf_apply(___bpf_reg_cnt, ___bpf_narg2(args))(args)
+
+#define ___bpf_union_arg(t, x, n) \
+	__builtin_choose_expr(sizeof(t) == 1, ({ union { __u8 z[1]; t x; } ___t = { .z = {ctx[n]}}; ___t.x; }), \
+	__builtin_choose_expr(sizeof(t) == 2, ({ union { __u16 z[1]; t x; } ___t = { .z = {ctx[n]} }; ___t.x; }), \
+	__builtin_choose_expr(sizeof(t) == 4, ({ union { __u32 z[1]; t x; } ___t = { .z = {ctx[n]} }; ___t.x; }), \
+	__builtin_choose_expr(sizeof(t) == 8, ({ union { __u64 z[1]; t x; } ___t = {.z = {ctx[n]} }; ___t.x; }), \
+	__builtin_choose_expr(sizeof(t) == 16, ({ union { __u64 z[2]; t x; } ___t = {.z = {ctx[n], ctx[n + 1]} }; ___t.x; }), \
+			      (void)0)))))
+
+#define ___bpf_ctx_arg0(n, args...)
+#define ___bpf_ctx_arg1(n, t, x)		, ___bpf_union_arg(t, x, n - ___bpf_reg_cnt1(t, x))
+#define ___bpf_ctx_arg2(n, t, x, args...)	, ___bpf_union_arg(t, x, n - ___bpf_reg_cnt2(t, x, args)) ___bpf_ctx_arg1(n, args)
+#define ___bpf_ctx_arg3(n, t, x, args...)	, ___bpf_union_arg(t, x, n - ___bpf_reg_cnt3(t, x, args)) ___bpf_ctx_arg2(n, args)
+#define ___bpf_ctx_arg4(n, t, x, args...)	, ___bpf_union_arg(t, x, n - ___bpf_reg_cnt4(t, x, args)) ___bpf_ctx_arg3(n, args)
+#define ___bpf_ctx_arg5(n, t, x, args...)	, ___bpf_union_arg(t, x, n - ___bpf_reg_cnt5(t, x, args)) ___bpf_ctx_arg4(n, args)
+#define ___bpf_ctx_arg6(n, t, x, args...)	, ___bpf_union_arg(t, x, n - ___bpf_reg_cnt6(t, x, args)) ___bpf_ctx_arg5(n, args)
+#define ___bpf_ctx_arg7(n, t, x, args...)	, ___bpf_union_arg(t, x, n - ___bpf_reg_cnt7(t, x, args)) ___bpf_ctx_arg6(n, args)
+#define ___bpf_ctx_arg8(n, t, x, args...)	, ___bpf_union_arg(t, x, n - ___bpf_reg_cnt8(t, x, args)) ___bpf_ctx_arg7(n, args)
+#define ___bpf_ctx_arg9(n, t, x, args...)	, ___bpf_union_arg(t, x, n - ___bpf_reg_cnt9(t, x, args)) ___bpf_ctx_arg8(n, args)
+#define ___bpf_ctx_arg10(n, t, x, args...)	, ___bpf_union_arg(t, x, n - ___bpf_reg_cnt10(t, x, args)) ___bpf_ctx_arg9(n, args)
+#define ___bpf_ctx_arg11(n, t, x, args...)	, ___bpf_union_arg(t, x, n - ___bpf_reg_cnt11(t, x, args)) ___bpf_ctx_arg10(n, args)
+#define ___bpf_ctx_arg12(n, t, x, args...)	, ___bpf_union_arg(t, x, n - ___bpf_reg_cnt12(t, x, args)) ___bpf_ctx_arg11(n, args)
+#define ___bpf_ctx_arg(args...)	___bpf_apply(___bpf_ctx_arg, ___bpf_narg2(args))(___bpf_reg_cnt(args), args)
+
+#define ___bpf_ctx_decl0()
+#define ___bpf_ctx_decl1(t, x)			, t x
+#define ___bpf_ctx_decl2(t, x, args...)		, t x ___bpf_ctx_decl1(args)
+#define ___bpf_ctx_decl3(t, x, args...)		, t x ___bpf_ctx_decl2(args)
+#define ___bpf_ctx_decl4(t, x, args...)		, t x ___bpf_ctx_decl3(args)
+#define ___bpf_ctx_decl5(t, x, args...)		, t x ___bpf_ctx_decl4(args)
+#define ___bpf_ctx_decl6(t, x, args...)		, t x ___bpf_ctx_decl5(args)
+#define ___bpf_ctx_decl7(t, x, args...)		, t x ___bpf_ctx_decl6(args)
+#define ___bpf_ctx_decl8(t, x, args...)		, t x ___bpf_ctx_decl7(args)
+#define ___bpf_ctx_decl9(t, x, args...)		, t x ___bpf_ctx_decl8(args)
+#define ___bpf_ctx_decl10(t, x, args...)	, t x ___bpf_ctx_decl9(args)
+#define ___bpf_ctx_decl11(t, x, args...)	, t x ___bpf_ctx_decl10(args)
+#define ___bpf_ctx_decl12(t, x, args...)	, t x ___bpf_ctx_decl11(args)
+#define ___bpf_ctx_decl(args...)	___bpf_apply(___bpf_ctx_decl, ___bpf_narg2(args))(args)
+
+/*
+ * BPF_PROG2 is an enhanced version of BPF_PROG in order to handle struct
+ * arguments. Since each struct argument might take one or two u64 values
+ * in the trampoline stack, argument type size is needed to place proper number
+ * of u64 values for each argument. Therefore, BPF_PROG2 has different
+ * syntax from BPF_PROG. For example, for the following BPF_PROG syntax:
+ *
+ *   int BPF_PROG(test2, int a, int b) { ... }
+ *
+ * the corresponding BPF_PROG2 syntax is:
+ *
+ *   int BPF_PROG2(test2, int, a, int, b) { ... }
+ *
+ * where type and the corresponding argument name are separated by comma.
+ *
+ * Use BPF_PROG2 macro if one of the arguments might be a struct/union larger
+ * than 8 bytes:
+ *
+ *   int BPF_PROG2(test_struct_arg, struct bpf_testmod_struct_arg_1, a, int, b,
+ *		   int, c, int, d, struct bpf_testmod_struct_arg_2, e, int, ret)
+ *   {
+ *        // access a, b, c, d, e, and ret directly
+ *        ...
+ *   }
+ */
+#define BPF_PROG2(name, args...)						\
+name(unsigned long long *ctx);							\
+static __always_inline typeof(name(0))						\
+____##name(unsigned long long *ctx ___bpf_ctx_decl(args));			\
+typeof(name(0)) name(unsigned long long *ctx)					\
+{										\
+	return ____##name(ctx ___bpf_ctx_arg(args));				\
+}										\
+static __always_inline typeof(name(0))						\
+____##name(unsigned long long *ctx ___bpf_ctx_decl(args))
+
 struct pt_regs;

 #define ___bpf_kprobe_args0()           ctx
@@ -421,6 +793,9 @@ struct pt_regs;
 #define ___bpf_kprobe_args3(x, args...) ___bpf_kprobe_args2(args), (void *)PT_REGS_PARM3(ctx)
 #define ___bpf_kprobe_args4(x, args...) ___bpf_kprobe_args3(args), (void *)PT_REGS_PARM4(ctx)
 #define ___bpf_kprobe_args5(x, args...) ___bpf_kprobe_args4(args), (void *)PT_REGS_PARM5(ctx)
+#define ___bpf_kprobe_args6(x, args...) ___bpf_kprobe_args5(args), (void *)PT_REGS_PARM6(ctx)
+#define ___bpf_kprobe_args7(x, args...) ___bpf_kprobe_args6(args), (void *)PT_REGS_PARM7(ctx)
+#define ___bpf_kprobe_args8(x, args...) ___bpf_kprobe_args7(args), (void *)PT_REGS_PARM8(ctx)
 #define ___bpf_kprobe_args(args...)     ___bpf_apply(___bpf_kprobe_args, ___bpf_narg(args))(args)

 /*
@@ -435,7 +810,7 @@ struct pt_regs;
 */
 #define BPF_KPROBE(name, args...)					    \
 name(struct pt_regs *ctx);						    \
-static __attribute__((always_inline)) typeof(name(0))			    \
+static __always_inline typeof(name(0))					    \
 ____##name(struct pt_regs *ctx, ##args);				    \
 typeof(name(0)) name(struct pt_regs *ctx)				    \
 {									    \
@@ -444,7 +819,7 @@ typeof(name(0)) name(struct pt_regs *ctx)				    \
 	return ____##name(___bpf_kprobe_args(args));			    \
 	_Pragma("GCC diagnostic pop")					    \
 }									    \
-static __attribute__((always_inline)) typeof(name(0))			    \
+static __always_inline typeof(name(0))					    \
 ____##name(struct pt_regs *ctx, ##args)

 #define ___bpf_kretprobe_args0()       ctx
@@ -459,7 +834,7 @@ ____##name(struct pt_regs *ctx, ##args)
 */
 #define BPF_KRETPROBE(name, args...)					    \
 name(struct pt_regs *ctx);						    \
-static __attribute__((always_inline)) typeof(name(0))			    \
+static __always_inline typeof(name(0))					    \
 ____##name(struct pt_regs *ctx, ##args);				    \
 typeof(name(0)) name(struct pt_regs *ctx)				    \
 {									    \
@@ -470,39 +845,80 @@ typeof(name(0)) name(struct pt_regs *ctx)				    \
 }									    \
 static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args)

+/* If kernel has CONFIG_ARCH_HAS_SYSCALL_WRAPPER, read pt_regs directly */
 #define ___bpf_syscall_args0()           ctx
-#define ___bpf_syscall_args1(x)          ___bpf_syscall_args0(), (void *)PT_REGS_PARM1_CORE_SYSCALL(regs)
-#define ___bpf_syscall_args2(x, args...) ___bpf_syscall_args1(args), (void *)PT_REGS_PARM2_CORE_SYSCALL(regs)
-#define ___bpf_syscall_args3(x, args...) ___bpf_syscall_args2(args), (void *)PT_REGS_PARM3_CORE_SYSCALL(regs)
-#define ___bpf_syscall_args4(x, args...) ___bpf_syscall_args3(args), (void *)PT_REGS_PARM4_CORE_SYSCALL(regs)
-#define ___bpf_syscall_args5(x, args...) ___bpf_syscall_args4(args), (void *)PT_REGS_PARM5_CORE_SYSCALL(regs)
+#define ___bpf_syscall_args1(x)          ___bpf_syscall_args0(), (void *)PT_REGS_PARM1_SYSCALL(regs)
+#define ___bpf_syscall_args2(x, args...) ___bpf_syscall_args1(args), (void *)PT_REGS_PARM2_SYSCALL(regs)
+#define ___bpf_syscall_args3(x, args...) ___bpf_syscall_args2(args), (void *)PT_REGS_PARM3_SYSCALL(regs)
+#define ___bpf_syscall_args4(x, args...) ___bpf_syscall_args3(args), (void *)PT_REGS_PARM4_SYSCALL(regs)
+#define ___bpf_syscall_args5(x, args...) ___bpf_syscall_args4(args), (void *)PT_REGS_PARM5_SYSCALL(regs)
+#define ___bpf_syscall_args6(x, args...) ___bpf_syscall_args5(args), (void *)PT_REGS_PARM6_SYSCALL(regs)
+#define ___bpf_syscall_args7(x, args...) ___bpf_syscall_args6(args), (void *)PT_REGS_PARM7_SYSCALL(regs)
 #define ___bpf_syscall_args(args...)     ___bpf_apply(___bpf_syscall_args, ___bpf_narg(args))(args)

+/* If kernel doesn't have CONFIG_ARCH_HAS_SYSCALL_WRAPPER, we have to BPF_CORE_READ from pt_regs */
+#define ___bpf_syswrap_args0()           ctx
+#define ___bpf_syswrap_args1(x)          ___bpf_syswrap_args0(), (void *)PT_REGS_PARM1_CORE_SYSCALL(regs)
+#define ___bpf_syswrap_args2(x, args...) ___bpf_syswrap_args1(args), (void *)PT_REGS_PARM2_CORE_SYSCALL(regs)
+#define ___bpf_syswrap_args3(x, args...) ___bpf_syswrap_args2(args), (void *)PT_REGS_PARM3_CORE_SYSCALL(regs)
+#define ___bpf_syswrap_args4(x, args...) ___bpf_syswrap_args3(args), (void *)PT_REGS_PARM4_CORE_SYSCALL(regs)
+#define ___bpf_syswrap_args5(x, args...) ___bpf_syswrap_args4(args), (void *)PT_REGS_PARM5_CORE_SYSCALL(regs)
+#define ___bpf_syswrap_args6(x, args...) ___bpf_syswrap_args5(args), (void *)PT_REGS_PARM6_CORE_SYSCALL(regs)
+#define ___bpf_syswrap_args7(x, args...) ___bpf_syswrap_args6(args), (void *)PT_REGS_PARM7_CORE_SYSCALL(regs)
+#define ___bpf_syswrap_args(args...)     ___bpf_apply(___bpf_syswrap_args, ___bpf_narg(args))(args)
+
 /*
- * BPF_KPROBE_SYSCALL is a variant of BPF_KPROBE, which is intended for
+ * BPF_KSYSCALL is a variant of BPF_KPROBE, which is intended for
 * tracing syscall functions, like __x64_sys_close. It hides the underlying
 * platform-specific low-level way of getting syscall input arguments from
 * struct pt_regs, and provides a familiar typed and named function arguments
 * syntax and semantics of accessing syscall input parameters.
 *
- * Original struct pt_regs* context is preserved as 'ctx' argument. This might
+ * Original struct pt_regs * context is preserved as 'ctx' argument. This might
 * be necessary when using BPF helpers like bpf_perf_event_output().
 *
- * This macro relies on BPF CO-RE support.
+ * At the moment BPF_KSYSCALL does not transparently handle all the calling
+ * convention quirks for the following syscalls:
+ *
+ * - mmap(): __ARCH_WANT_SYS_OLD_MMAP.
+ * - clone(): CONFIG_CLONE_BACKWARDS, CONFIG_CLONE_BACKWARDS2 and
+ *            CONFIG_CLONE_BACKWARDS3.
+ * - socket-related syscalls: __ARCH_WANT_SYS_SOCKETCALL.
+ * - compat syscalls.
+ *
+ * This may or may not change in the future. User needs to take extra measures
+ * to handle such quirks explicitly, if necessary.
+ *
+ * This macro relies on BPF CO-RE support and virtual __kconfig externs.
 */
-#define BPF_KPROBE_SYSCALL(name, args...)				    \
+#define BPF_KSYSCALL(name, args...)					    \
 name(struct pt_regs *ctx);						    \
-static __attribute__((always_inline)) typeof(name(0))			    \
+extern _Bool LINUX_HAS_SYSCALL_WRAPPER __kconfig;			    \
+static __always_inline typeof(name(0))					    \
 ____##name(struct pt_regs *ctx, ##args);				    \
 typeof(name(0)) name(struct pt_regs *ctx)				    \
 {									    \
-	struct pt_regs *regs = PT_REGS_SYSCALL_REGS(ctx);		    \
+	struct pt_regs *regs = LINUX_HAS_SYSCALL_WRAPPER		    \
+			       ? (struct pt_regs *)PT_REGS_PARM1(ctx)	    \
+			       : ctx;					    \
 	_Pragma("GCC diagnostic push")					    \
 	_Pragma("GCC diagnostic ignored \"-Wint-conversion\"")		    \
-	return ____##name(___bpf_syscall_args(args));			    \
+	if (LINUX_HAS_SYSCALL_WRAPPER)					    \
+		return ____##name(___bpf_syswrap_args(args));		    \
+	else								    \
+		return ____##name(___bpf_syscall_args(args));		    \
 	_Pragma("GCC diagnostic pop")					    \
 }									    \
-static __attribute__((always_inline)) typeof(name(0))			    \
+static __always_inline typeof(name(0))					    \
 ____##name(struct pt_regs *ctx, ##args)

+#define BPF_KPROBE_SYSCALL BPF_KSYSCALL
+
+/* BPF_UPROBE and BPF_URETPROBE are identical to BPF_KPROBE and BPF_KRETPROBE,
+ * but are named way less confusingly for SEC("uprobe") and SEC("uretprobe")
+ * use cases.
+ */
+#define BPF_UPROBE(name, args...)  BPF_KPROBE(name, ##args)
+#define BPF_URETPROBE(name, args...)  BPF_KRETPROBE(name, ##args)
+
 #endif
--- a/src/btf.c
+++ b/src/btf.c
--- a/src/btf.h
+++ b/src/btf.h
@@ -116,24 +116,15 @@ LIBBPF_API struct btf *btf__parse_raw_split(const char *path, struct btf *base_b

 LIBBPF_API struct btf *btf__load_vmlinux_btf(void);
 LIBBPF_API struct btf *btf__load_module_btf(const char *module_name, struct btf *vmlinux_btf);
-LIBBPF_API struct btf *libbpf_find_kernel_btf(void);

 LIBBPF_API struct btf *btf__load_from_kernel_by_id(__u32 id);
 LIBBPF_API struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf);
-LIBBPF_DEPRECATED_SINCE(0, 6, "use btf__load_from_kernel_by_id instead")
-LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf);

-LIBBPF_DEPRECATED_SINCE(0, 6, "intended for internal libbpf use only")
-LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf);
-LIBBPF_DEPRECATED_SINCE(0, 6, "use btf__load_into_kernel instead")
-LIBBPF_API int btf__load(struct btf *btf);
 LIBBPF_API int btf__load_into_kernel(struct btf *btf);
 LIBBPF_API __s32 btf__find_by_name(const struct btf *btf,
 				   const char *type_name);
 LIBBPF_API __s32 btf__find_by_name_kind(const struct btf *btf,
 					const char *type_name, __u32 kind);
-LIBBPF_DEPRECATED_SINCE(0, 7, "use btf__type_cnt() instead; note that btf__get_nr_types() == btf__type_cnt() - 1")
-LIBBPF_API __u32 btf__get_nr_types(const struct btf *btf);
 LIBBPF_API __u32 btf__type_cnt(const struct btf *btf);
 LIBBPF_API const struct btf *btf__base_btf(const struct btf *btf);
 LIBBPF_API const struct btf_type *btf__type_by_id(const struct btf *btf,
@@ -150,29 +141,10 @@ LIBBPF_API void btf__set_fd(struct btf *btf, int fd);
 LIBBPF_API const void *btf__raw_data(const struct btf *btf, __u32 *size);
 LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset);
 LIBBPF_API const char *btf__str_by_offset(const struct btf *btf, __u32 offset);
-LIBBPF_DEPRECATED_SINCE(0, 7, "this API is not necessary when BTF-defined maps are used")
-LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
-				    __u32 expected_key_size,
-				    __u32 expected_value_size,
-				    __u32 *key_type_id, __u32 *value_type_id);

 LIBBPF_API struct btf_ext *btf_ext__new(const __u8 *data, __u32 size);
 LIBBPF_API void btf_ext__free(struct btf_ext *btf_ext);
 LIBBPF_API const void *btf_ext__raw_data(const struct btf_ext *btf_ext, __u32 *size);
-LIBBPF_API LIBBPF_DEPRECATED("btf_ext__reloc_func_info was never meant as a public API and has wrong assumptions embedded in it; it will be removed in the future libbpf versions")
-int btf_ext__reloc_func_info(const struct btf *btf,
-			     const struct btf_ext *btf_ext,
-			     const char *sec_name, __u32 insns_cnt,
-			     void **func_info, __u32 *cnt);
-LIBBPF_API LIBBPF_DEPRECATED("btf_ext__reloc_line_info was never meant as a public API and has wrong assumptions embedded in it; it will be removed in the future libbpf versions")
-int btf_ext__reloc_line_info(const struct btf *btf,
-			     const struct btf_ext *btf_ext,
-			     const char *sec_name, __u32 insns_cnt,
-			     void **line_info, __u32 *cnt);
-LIBBPF_API LIBBPF_DEPRECATED("btf_ext__reloc_func_info is deprecated; write custom func_info parsing to fetch rec_size")
-__u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext);
-LIBBPF_API LIBBPF_DEPRECATED("btf_ext__reloc_line_info is deprecated; write custom line_info parsing to fetch rec_size")
-__u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext);

 LIBBPF_API int btf__find_str(struct btf *btf, const char *s);
 LIBBPF_API int btf__add_str(struct btf *btf, const char *s);
@@ -215,6 +187,8 @@ LIBBPF_API int btf__add_field(struct btf *btf, const char *name, int field_type_
 /* enum construction APIs */
 LIBBPF_API int btf__add_enum(struct btf *btf, const char *name, __u32 bytes_sz);
 LIBBPF_API int btf__add_enum_value(struct btf *btf, const char *name, __s64 value);
+LIBBPF_API int btf__add_enum64(struct btf *btf, const char *name, __u32 bytes_sz, bool is_signed);
+LIBBPF_API int btf__add_enum64_value(struct btf *btf, const char *name, __u64 value);

 enum btf_fwd_kind {
 	BTF_FWD_STRUCT = 0,
@@ -257,22 +231,12 @@ struct btf_dedup_opts {

 LIBBPF_API int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts);

-LIBBPF_API int btf__dedup_v0_6_0(struct btf *btf, const struct btf_dedup_opts *opts);
-
-LIBBPF_DEPRECATED_SINCE(0, 7, "use btf__dedup() instead")
-LIBBPF_API int btf__dedup_deprecated(struct btf *btf, struct btf_ext *btf_ext, const void *opts);
-#define btf__dedup(...) ___libbpf_overload(___btf_dedup, __VA_ARGS__)
-#define ___btf_dedup3(btf, btf_ext, opts) btf__dedup_deprecated(btf, btf_ext, opts)
-#define ___btf_dedup2(btf, opts) btf__dedup(btf, opts)
-
 struct btf_dump;

 struct btf_dump_opts {
-	union {
-		size_t sz;
-		void *ctx; /* DEPRECATED: will be gone in v1.0 */
-	};
+	size_t sz;
 };
+#define btf_dump_opts__last_field sz

 typedef void (*btf_dump_printf_fn_t)(void *ctx, const char *fmt, va_list args);

@@ -281,51 +245,6 @@ LIBBPF_API struct btf_dump *btf_dump__new(const struct btf *btf,
 					  void *ctx,
 					  const struct btf_dump_opts *opts);

-LIBBPF_API struct btf_dump *btf_dump__new_v0_6_0(const struct btf *btf,
-						 btf_dump_printf_fn_t printf_fn,
-						 void *ctx,
-						 const struct btf_dump_opts *opts);
-
-LIBBPF_API struct btf_dump *btf_dump__new_deprecated(const struct btf *btf,
-						     const struct btf_ext *btf_ext,
-						     const struct btf_dump_opts *opts,
-						     btf_dump_printf_fn_t printf_fn);
-
-/* Choose either btf_dump__new() or btf_dump__new_deprecated() based on the
- * type of 4th argument. If it's btf_dump's print callback, use deprecated
- * API; otherwise, choose the new btf_dump__new(). ___libbpf_override()
- * doesn't work here because both variants have 4 input arguments.
- *
- * (void *) casts are necessary to avoid compilation warnings about type
- * mismatches, because even though __builtin_choose_expr() only ever evaluates
- * one side the other side still has to satisfy type constraints (this is
- * compiler implementation limitation which might be lifted eventually,
- * according to the documentation). So passing struct btf_ext in place of
- * btf_dump_printf_fn_t would be generating compilation warning.  Casting to
- * void * avoids this issue.
- *
- * Also, two type compatibility checks for a function and function pointer are
- * required because passing function reference into btf_dump__new() as
- * btf_dump__new(..., my_callback, ...) and as btf_dump__new(...,
- * &my_callback, ...) (not explicit ampersand in the latter case) actually
- * differs as far as __builtin_types_compatible_p() is concerned. Thus two
- * checks are combined to detect callback argument.
- *
- * The rest works just like in case of ___libbpf_override() usage with symbol
- * versioning.
- *
- * C++ compilers don't support __builtin_types_compatible_p(), so at least
- * don't screw up compilation for them and let C++ users pick btf_dump__new
- * vs btf_dump__new_deprecated explicitly.
- */
-#ifndef __cplusplus
-#define btf_dump__new(a1, a2, a3, a4) __builtin_choose_expr(				\
-	__builtin_types_compatible_p(typeof(a4), btf_dump_printf_fn_t) ||		\
-	__builtin_types_compatible_p(typeof(a4), void(void *, const char *, va_list)),	\
-	btf_dump__new_deprecated((void *)a1, (void *)a2, (void *)a3, (void *)a4),	\
-	btf_dump__new((void *)a1, (void *)a2, (void *)a3, (void *)a4))
-#endif
-
 LIBBPF_API void btf_dump__free(struct btf_dump *d);

 LIBBPF_API int btf_dump__dump_type(struct btf_dump *d, __u32 id);
@@ -393,9 +312,10 @@ btf_dump__dump_type_data(struct btf_dump *d, __u32 id,
 #ifndef BTF_KIND_FLOAT
 #define BTF_KIND_FLOAT		16	/* Floating point	*/
 #endif
-/* The kernel header switched to enums, so these two were never #defined */
+/* The kernel header switched to enums, so the following were never #defined */
 #define BTF_KIND_DECL_TAG	17	/* Decl Tag */
 #define BTF_KIND_TYPE_TAG	18	/* Type Tag */
+#define BTF_KIND_ENUM64		19	/* Enum for up-to 64bit values */

 static inline __u16 btf_kind(const struct btf_type *t)
 {
@@ -454,6 +374,11 @@ static inline bool btf_is_enum(const struct btf_type *t)
 	return btf_kind(t) == BTF_KIND_ENUM;
 }

+static inline bool btf_is_enum64(const struct btf_type *t)
+{
+	return btf_kind(t) == BTF_KIND_ENUM64;
+}
+
 static inline bool btf_is_fwd(const struct btf_type *t)
 {
 	return btf_kind(t) == BTF_KIND_FWD;
@@ -524,6 +449,18 @@ static inline bool btf_is_type_tag(const struct btf_type *t)
 	return btf_kind(t) == BTF_KIND_TYPE_TAG;
 }

+static inline bool btf_is_any_enum(const struct btf_type *t)
+{
+	return btf_is_enum(t) || btf_is_enum64(t);
+}
+
+static inline bool btf_kind_core_compat(const struct btf_type *t1,
+					const struct btf_type *t2)
+{
+	return btf_kind(t1) == btf_kind(t2) ||
+	       (btf_is_any_enum(t1) && btf_is_any_enum(t2));
+}
+
 static inline __u8 btf_int_encoding(const struct btf_type *t)
 {
 	return BTF_INT_ENCODING(*(__u32 *)(t + 1));
@@ -549,6 +486,39 @@ static inline struct btf_enum *btf_enum(const struct btf_type *t)
 	return (struct btf_enum *)(t + 1);
 }

+struct btf_enum64;
+
+static inline struct btf_enum64 *btf_enum64(const struct btf_type *t)
+{
+	return (struct btf_enum64 *)(t + 1);
+}
+
+static inline __u64 btf_enum64_value(const struct btf_enum64 *e)
+{
+	/* struct btf_enum64 is introduced in Linux 6.0, which is very
+	 * bleeding-edge. Here we are avoiding relying on struct btf_enum64
+	 * definition coming from kernel UAPI headers to support wider range
+	 * of system-wide kernel headers.
+	 *
+	 * Given this header can be also included from C++ applications, that
+	 * further restricts C tricks we can use (like using compatible
+	 * anonymous struct). So just treat struct btf_enum64 as
+	 * a three-element array of u32 and access second (lo32) and third
+	 * (hi32) elements directly.
+	 *
+	 * For reference, here is a struct btf_enum64 definition:
+	 *
+	 * const struct btf_enum64 {
+	 *	__u32	name_off;
+	 *	__u32	val_lo32;
+	 *	__u32	val_hi32;
+	 * };
+	 */
+	const __u32 *e64 = (const __u32 *)e;
+
+	return ((__u64)e64[2] << 32) | e64[1];
+}
+
 static inline struct btf_member *btf_members(const struct btf_type *t)
 {
 	return (struct btf_member *)(t + 1);
--- a/src/btf_dump.c
+++ b/src/btf_dump.c
@@ -13,6 +13,7 @@
 #include <ctype.h>
 #include <endian.h>
 #include <errno.h>
+#include <limits.h>
 #include <linux/err.h>
 #include <linux/btf.h>
 #include <linux/kernel.h>
@@ -117,14 +118,14 @@ struct btf_dump {
 	struct btf_dump_data *typed_dump;
 };

-static size_t str_hash_fn(const void *key, void *ctx)
+static size_t str_hash_fn(long key, void *ctx)
 {
-	return str_hash(key);
+	return str_hash((void *)key);
 }

-static bool str_equal_fn(const void *a, const void *b, void *ctx)
+static bool str_equal_fn(long a, long b, void *ctx)
 {
-	return strcmp(a, b) == 0;
+	return strcmp((void *)a, (void *)b) == 0;
 }

 static const char *btf_name_of(const struct btf_dump *d, __u32 name_off)
@@ -144,15 +145,17 @@ static void btf_dump_printf(const struct btf_dump *d, const char *fmt, ...)
 static int btf_dump_mark_referenced(struct btf_dump *d);
 static int btf_dump_resize(struct btf_dump *d);

-DEFAULT_VERSION(btf_dump__new_v0_6_0, btf_dump__new, LIBBPF_0.6.0)
-struct btf_dump *btf_dump__new_v0_6_0(const struct btf *btf,
-				      btf_dump_printf_fn_t printf_fn,
-				      void *ctx,
-				      const struct btf_dump_opts *opts)
+struct btf_dump *btf_dump__new(const struct btf *btf,
+			       btf_dump_printf_fn_t printf_fn,
+			       void *ctx,
+			       const struct btf_dump_opts *opts)
 {
 	struct btf_dump *d;
 	int err;

+	if (!OPTS_VALID(opts, btf_dump_opts))
+		return libbpf_err_ptr(-EINVAL);
+
 	if (!printf_fn)
 		return libbpf_err_ptr(-EINVAL);

@@ -188,17 +191,6 @@ err:
 	return libbpf_err_ptr(err);
 }

-COMPAT_VERSION(btf_dump__new_deprecated, btf_dump__new, LIBBPF_0.0.4)
-struct btf_dump *btf_dump__new_deprecated(const struct btf *btf,
-					  const struct btf_ext *btf_ext,
-					  const struct btf_dump_opts *opts,
-					  btf_dump_printf_fn_t printf_fn)
-{
-	if (!printf_fn)
-		return libbpf_err_ptr(-EINVAL);
-	return btf_dump__new_v0_6_0(btf, printf_fn, opts ? opts->ctx : NULL, opts);
-}
-
 static int btf_dump_resize(struct btf_dump *d)
 {
 	int err, last_id = btf__type_cnt(d->btf) - 1;
@@ -228,6 +220,17 @@ static int btf_dump_resize(struct btf_dump *d)
 	return 0;
 }

+static void btf_dump_free_names(struct hashmap *map)
+{
+	size_t bkt;
+	struct hashmap_entry *cur;
+
+	hashmap__for_each_entry(map, cur, bkt)
+		free((void *)cur->pkey);
+
+	hashmap__free(map);
+}
+
 void btf_dump__free(struct btf_dump *d)
 {
 	int i;
@@ -246,8 +249,8 @@ void btf_dump__free(struct btf_dump *d)
 	free(d->cached_names);
 	free(d->emit_queue);
 	free(d->decl_stack);
-	hashmap__free(d->type_names);
-	hashmap__free(d->ident_names);
+	btf_dump_free_names(d->type_names);
+	btf_dump_free_names(d->ident_names);

 	free(d);
 }
@@ -318,6 +321,7 @@ static int btf_dump_mark_referenced(struct btf_dump *d)
 		switch (btf_kind(t)) {
 		case BTF_KIND_INT:
 		case BTF_KIND_ENUM:
+		case BTF_KIND_ENUM64:
 		case BTF_KIND_FWD:
 		case BTF_KIND_FLOAT:
 			break;
@@ -538,6 +542,7 @@ static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr)
 		return 1;
 	}
 	case BTF_KIND_ENUM:
+	case BTF_KIND_ENUM64:
 	case BTF_KIND_FWD:
 		/*
 		 * non-anonymous or non-referenced enums are top-level
@@ -739,6 +744,7 @@ static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id)
 		tstate->emit_state = EMITTED;
 		break;
 	case BTF_KIND_ENUM:
+	case BTF_KIND_ENUM64:
 		if (top_level_def) {
 			btf_dump_emit_enum_def(d, id, t, 0);
 			btf_dump_printf(d, ";\n\n");
@@ -828,14 +834,9 @@ static bool btf_is_struct_packed(const struct btf *btf, __u32 id,
 				 const struct btf_type *t)
 {
 	const struct btf_member *m;
-	int align, i, bit_sz;
+	int max_align = 1, align, i, bit_sz;
 	__u16 vlen;

-	align = btf__align_of(btf, id);
-	/* size of a non-packed struct has to be a multiple of its alignment*/
-	if (align && t->size % align)
-		return true;
-
 	m = btf_members(t);
 	vlen = btf_vlen(t);
 	/* all non-bitfield fields have to be naturally aligned */
@@ -844,8 +845,11 @@ static bool btf_is_struct_packed(const struct btf *btf, __u32 id,
 		bit_sz = btf_member_bitfield_size(t, i);
 		if (align && bit_sz == 0 && m->offset % (8 * align) != 0)
 			return true;
+		max_align = max(align, max_align);
 	}
-
+	/* size of a non-packed struct has to be a multiple of its alignment */
+	if (t->size % max_align != 0)
+		return true;
 	/*
 	 * if original struct was marked as packed, but its layout is
 	 * naturally aligned, we'll detect that it's not packed
@@ -853,44 +857,97 @@ static bool btf_is_struct_packed(const struct btf *btf, __u32 id,
 	return false;
 }

-static int chip_away_bits(int total, int at_most)
-{
-	return total % at_most ? : at_most;
-}
-
 static void btf_dump_emit_bit_padding(const struct btf_dump *d,
-				      int cur_off, int m_off, int m_bit_sz,
-				      int align, int lvl)
+				      int cur_off, int next_off, int next_align,
+				      bool in_bitfield, int lvl)
 {
-	int off_diff = m_off - cur_off;
-	int ptr_bits = d->ptr_sz * 8;
+	const struct {
+		const char *name;
+		int bits;
+	} pads[] = {
+		{"long", d->ptr_sz * 8}, {"int", 32}, {"short", 16}, {"char", 8}
+	};
+	int new_off, pad_bits, bits, i;
+	const char *pad_type;

-	if (off_diff <= 0)
-		/* no gap */
-		return;
-	if (m_bit_sz == 0 && off_diff < align * 8)
-		/* natural padding will take care of a gap */
-		return;
+	if (cur_off >= next_off)
+		return; /* no gap */

-	while (off_diff > 0) {
-		const char *pad_type;
-		int pad_bits;
+	/* For filling out padding we want to take advantage of
+	 * natural alignment rules to minimize unnecessary explicit
+	 * padding. First, we find the largest type (among long, int,
+	 * short, or char) that can be used to force naturally aligned
+	 * boundary. Once determined, we'll use such type to fill in
+	 * the remaining padding gap. In some cases we can rely on
+	 * compiler filling some gaps, but sometimes we need to force
+	 * alignment to close natural alignment with markers like
+	 * `long: 0` (this is always the case for bitfields).  Note
+	 * that even if struct itself has, let's say 4-byte alignment
+	 * (i.e., it only uses up to int-aligned types), using `long:
+	 * X;` explicit padding doesn't actually change struct's
+	 * overall alignment requirements, but compiler does take into
+	 * account that type's (long, in this example) natural
+	 * alignment requirements when adding implicit padding. We use
+	 * this fact heavily and don't worry about ruining correct
+	 * struct alignment requirement.
+	 */
+	for (i = 0; i < ARRAY_SIZE(pads); i++) {
+		pad_bits = pads[i].bits;
+		pad_type = pads[i].name;

-		if (ptr_bits > 32 && off_diff > 32) {
-			pad_type = "long";
-			pad_bits = chip_away_bits(off_diff, ptr_bits);
-		} else if (off_diff > 16) {
-			pad_type = "int";
-			pad_bits = chip_away_bits(off_diff, 32);
-		} else if (off_diff > 8) {
-			pad_type = "short";
-			pad_bits = chip_away_bits(off_diff, 16);
-		} else {
-			pad_type = "char";
-			pad_bits = chip_away_bits(off_diff, 8);
+		new_off = roundup(cur_off, pad_bits);
+		if (new_off <= next_off)
+			break;
+	}
+
+	if (new_off > cur_off && new_off <= next_off) {
+		/* We need explicit `<type>: 0` aligning mark if next
+		 * field is right on alignment offset and its
+		 * alignment requirement is less strict than <type>'s
+		 * alignment (so compiler won't naturally align to the
+		 * offset we expect), or if subsequent `<type>: X`,
+		 * will actually completely fit in the remaining hole,
+		 * making compiler basically ignore `<type>: X`
+		 * completely.
+		 */
+		if (in_bitfield ||
+		    (new_off == next_off && roundup(cur_off, next_align * 8) != new_off) ||
+		    (new_off != next_off && next_off - new_off <= new_off - cur_off))
+			/* but for bitfields we'll emit explicit bit count */
+			btf_dump_printf(d, "\n%s%s: %d;", pfx(lvl), pad_type,
+					in_bitfield ? new_off - cur_off : 0);
+		cur_off = new_off;
+	}
+
+	/* Now we know we start at naturally aligned offset for a chosen
+	 * padding type (long, int, short, or char), and so the rest is just
+	 * a straightforward filling of remaining padding gap with full
+	 * `<type>: sizeof(<type>);` markers, except for the last one, which
+	 * might need smaller than sizeof(<type>) padding.
+	 */
+	while (cur_off != next_off) {
+		bits = min(next_off - cur_off, pad_bits);
+		if (bits == pad_bits) {
+			btf_dump_printf(d, "\n%s%s: %d;", pfx(lvl), pad_type, pad_bits);
+			cur_off += bits;
+			continue;
+		}
+		/* For the remainder padding that doesn't cover entire
+		 * pad_type bit length, we pick the smallest necessary type.
+		 * This is pure aesthetics, we could have just used `long`,
+		 * but having smallest necessary one communicates better the
+		 * scale of the padding gap.
+		 */
+		for (i = ARRAY_SIZE(pads) - 1; i >= 0; i--) {
+			pad_type = pads[i].name;
+			pad_bits = pads[i].bits;
+			if (pad_bits < bits)
+				continue;
+
+			btf_dump_printf(d, "\n%s%s: %d;", pfx(lvl), pad_type, bits);
+			cur_off += bits;
+			break;
 		}
-		btf_dump_printf(d, "\n%s%s: %d;", pfx(lvl), pad_type, pad_bits);
-		off_diff -= pad_bits;
 	}
 }

@@ -910,9 +967,11 @@ static void btf_dump_emit_struct_def(struct btf_dump *d,
 {
 	const struct btf_member *m = btf_members(t);
 	bool is_struct = btf_is_struct(t);
-	int align, i, packed, off = 0;
+	bool packed, prev_bitfield = false;
+	int align, i, off = 0;
 	__u16 vlen = btf_vlen(t);

+	align = btf__align_of(d->btf, id);
 	packed = is_struct ? btf_is_struct_packed(d->btf, id, t) : 0;

 	btf_dump_printf(d, "%s%s%s {",
@@ -922,37 +981,47 @@ static void btf_dump_emit_struct_def(struct btf_dump *d,

 	for (i = 0; i < vlen; i++, m++) {
 		const char *fname;
-		int m_off, m_sz;
+		int m_off, m_sz, m_align;
+		bool in_bitfield;

 		fname = btf_name_of(d, m->name_off);
 		m_sz = btf_member_bitfield_size(t, i);
 		m_off = btf_member_bit_offset(t, i);
-		align = packed ? 1 : btf__align_of(d->btf, m->type);
+		m_align = packed ? 1 : btf__align_of(d->btf, m->type);

-		btf_dump_emit_bit_padding(d, off, m_off, m_sz, align, lvl + 1);
+		in_bitfield = prev_bitfield && m_sz != 0;
+
+		btf_dump_emit_bit_padding(d, off, m_off, m_align, in_bitfield, lvl + 1);
 		btf_dump_printf(d, "\n%s", pfx(lvl + 1));
 		btf_dump_emit_type_decl(d, m->type, fname, lvl + 1);

 		if (m_sz) {
 			btf_dump_printf(d, ": %d", m_sz);
 			off = m_off + m_sz;
+			prev_bitfield = true;
 		} else {
 			m_sz = max((__s64)0, btf__resolve_size(d->btf, m->type));
 			off = m_off + m_sz * 8;
+			prev_bitfield = false;
 		}
+
 		btf_dump_printf(d, ";");
 	}

 	/* pad at the end, if necessary */
-	if (is_struct) {
-		align = packed ? 1 : btf__align_of(d->btf, id);
-		btf_dump_emit_bit_padding(d, off, t->size * 8, 0, align,
-					  lvl + 1);
-	}
+	if (is_struct)
+		btf_dump_emit_bit_padding(d, off, t->size * 8, align, false, lvl + 1);

-	if (vlen)
+	/*
+	 * Keep `struct empty {}` on a single line,
+	 * only print newline when there are regular or padding fields.
+	 */
+	if (vlen || t->size) {
 		btf_dump_printf(d, "\n");
-	btf_dump_printf(d, "%s}", pfx(lvl));
+		btf_dump_printf(d, "%s}", pfx(lvl));
+	} else {
+		btf_dump_printf(d, "}");
+	}
 	if (packed)
 		btf_dump_printf(d, " __attribute__((packed))");
 }
@@ -989,38 +1058,118 @@ static void btf_dump_emit_enum_fwd(struct btf_dump *d, __u32 id,
 	btf_dump_printf(d, "enum %s", btf_dump_type_name(d, id));
 }

+static void btf_dump_emit_enum32_val(struct btf_dump *d,
+				     const struct btf_type *t,
+				     int lvl, __u16 vlen)
+{
+	const struct btf_enum *v = btf_enum(t);
+	bool is_signed = btf_kflag(t);
+	const char *fmt_str;
+	const char *name;
+	size_t dup_cnt;
+	int i;
+
+	for (i = 0; i < vlen; i++, v++) {
+		name = btf_name_of(d, v->name_off);
+		/* enumerators share namespace with typedef idents */
+		dup_cnt = btf_dump_name_dups(d, d->ident_names, name);
+		if (dup_cnt > 1) {
+			fmt_str = is_signed ? "\n%s%s___%zd = %d," : "\n%s%s___%zd = %u,";
+			btf_dump_printf(d, fmt_str, pfx(lvl + 1), name, dup_cnt, v->val);
+		} else {
+			fmt_str = is_signed ? "\n%s%s = %d," : "\n%s%s = %u,";
+			btf_dump_printf(d, fmt_str, pfx(lvl + 1), name, v->val);
+		}
+	}
+}
+
+static void btf_dump_emit_enum64_val(struct btf_dump *d,
+				     const struct btf_type *t,
+				     int lvl, __u16 vlen)
+{
+	const struct btf_enum64 *v = btf_enum64(t);
+	bool is_signed = btf_kflag(t);
+	const char *fmt_str;
+	const char *name;
+	size_t dup_cnt;
+	__u64 val;
+	int i;
+
+	for (i = 0; i < vlen; i++, v++) {
+		name = btf_name_of(d, v->name_off);
+		dup_cnt = btf_dump_name_dups(d, d->ident_names, name);
+		val = btf_enum64_value(v);
+		if (dup_cnt > 1) {
+			fmt_str = is_signed ? "\n%s%s___%zd = %lldLL,"
+					    : "\n%s%s___%zd = %lluULL,";
+			btf_dump_printf(d, fmt_str,
+					pfx(lvl + 1), name, dup_cnt,
+					(unsigned long long)val);
+		} else {
+			fmt_str = is_signed ? "\n%s%s = %lldLL,"
+					    : "\n%s%s = %lluULL,";
+			btf_dump_printf(d, fmt_str,
+					pfx(lvl + 1), name,
+					(unsigned long long)val);
+		}
+	}
+}
 static void btf_dump_emit_enum_def(struct btf_dump *d, __u32 id,
 				   const struct btf_type *t,
 				   int lvl)
 {
-	const struct btf_enum *v = btf_enum(t);
 	__u16 vlen = btf_vlen(t);
-	const char *name;
-	size_t dup_cnt;
-	int i;

 	btf_dump_printf(d, "enum%s%s",
 			t->name_off ? " " : "",
 			btf_dump_type_name(d, id));

-	if (vlen) {
-		btf_dump_printf(d, " {");
-		for (i = 0; i < vlen; i++, v++) {
-			name = btf_name_of(d, v->name_off);
-			/* enumerators share namespace with typedef idents */
-			dup_cnt = btf_dump_name_dups(d, d->ident_names, name);
-			if (dup_cnt > 1) {
-				btf_dump_printf(d, "\n%s%s___%zu = %u,",
-						pfx(lvl + 1), name, dup_cnt,
-						(__u32)v->val);
-			} else {
-				btf_dump_printf(d, "\n%s%s = %u,",
-						pfx(lvl + 1), name,
-						(__u32)v->val);
+	if (!vlen)
+		return;
+
+	btf_dump_printf(d, " {");
+	if (btf_is_enum(t))
+		btf_dump_emit_enum32_val(d, t, lvl, vlen);
+	else
+		btf_dump_emit_enum64_val(d, t, lvl, vlen);
+	btf_dump_printf(d, "\n%s}", pfx(lvl));
+
+	/* special case enums with special sizes */
+	if (t->size == 1) {
+		/* one-byte enums can be forced with mode(byte) attribute */
+		btf_dump_printf(d, " __attribute__((mode(byte)))");
+	} else if (t->size == 8 && d->ptr_sz == 8) {
+		/* enum can be 8-byte sized if one of the enumerator values
+		 * doesn't fit in 32-bit integer, or by adding mode(word)
+		 * attribute (but probably only on 64-bit architectures); do
+		 * our best here to try to satisfy the contract without adding
+		 * unnecessary attributes
+		 */
+		bool needs_word_mode;
+
+		if (btf_is_enum(t)) {
+			/* enum can't represent 64-bit values, so we need word mode */
+			needs_word_mode = true;
+		} else {
+			/* enum64 needs mode(word) if none of its values has
+			 * non-zero upper 32-bits (which means that all values
+			 * fit in 32-bit integers and won't cause compiler to
+			 * bump enum to be 64-bit naturally
+			 */
+			int i;
+
+			needs_word_mode = true;
+			for (i = 0; i < vlen; i++) {
+				if (btf_enum64(t)[i].val_hi32 != 0) {
+					needs_word_mode = false;
+					break;
+				}
 			}
 		}
-		btf_dump_printf(d, "\n%s}", pfx(lvl));
+		if (needs_word_mode)
+			btf_dump_printf(d, " __attribute__((mode(word)))");
 	}
+
 }

 static void btf_dump_emit_fwd_def(struct btf_dump *d, __u32 id,
@@ -1178,6 +1327,7 @@ skip_mod:
 			break;
 		case BTF_KIND_INT:
 		case BTF_KIND_ENUM:
+		case BTF_KIND_ENUM64:
 		case BTF_KIND_FWD:
 		case BTF_KIND_STRUCT:
 		case BTF_KIND_UNION:
@@ -1312,6 +1462,7 @@ static void btf_dump_emit_type_chain(struct btf_dump *d,
 				btf_dump_emit_struct_fwd(d, id, t);
 			break;
 		case BTF_KIND_ENUM:
+		case BTF_KIND_ENUM64:
 			btf_dump_emit_mods(d, decls);
 			/* inline anonymous enum */
 			if (t->name_off == 0 && !d->skip_anon_defs)
@@ -1481,11 +1632,22 @@ static void btf_dump_emit_type_cast(struct btf_dump *d, __u32 id,
 static size_t btf_dump_name_dups(struct btf_dump *d, struct hashmap *name_map,
 				 const char *orig_name)
 {
+	char *old_name, *new_name;
 	size_t dup_cnt = 0;
+	int err;

-	hashmap__find(name_map, orig_name, (void **)&dup_cnt);
+	new_name = strdup(orig_name);
+	if (!new_name)
+		return 1;
+
+	(void)hashmap__find(name_map, orig_name, &dup_cnt);
 	dup_cnt++;
-	hashmap__set(name_map, orig_name, (void *)dup_cnt, NULL, NULL);
+
+	err = hashmap__set(name_map, new_name, dup_cnt, &old_name, NULL);
+	if (err)
+		free(new_name);
+
+	free(old_name);

 	return dup_cnt;
 }
@@ -1505,6 +1667,11 @@ static const char *btf_dump_resolve_name(struct btf_dump *d, __u32 id,
 	if (s->name_resolved)
 		return *cached_name ? *cached_name : orig_name;

+	if (btf_is_fwd(t) || (btf_is_enum(t) && btf_vlen(t) == 0)) {
+		s->name_resolved = 1;
+		return orig_name;
+	}
+
 	dup_cnt = btf_dump_name_dups(d, name_map, orig_name);
 	if (dup_cnt > 1) {
 		const size_t max_len = 256;
@@ -1919,7 +2086,7 @@ static int btf_dump_struct_data(struct btf_dump *d,
 {
 	const struct btf_member *m = btf_members(t);
 	__u16 n = btf_vlen(t);
-	int i, err;
+	int i, err = 0;

 	/* note that we increment depth before calling btf_dump_print() below;
 	 * this is intentional.  btf_dump_data_newline() will not print a
@@ -1983,7 +2150,8 @@ static int btf_dump_get_enum_value(struct btf_dump *d,
 				   __u32 id,
 				   __s64 *value)
 {
-	/* handle unaligned enum value */
+	bool is_signed = btf_kflag(t);
+
 	if (!ptr_is_aligned(d->btf, id, data)) {
 		__u64 val;
 		int err;
@@ -2000,13 +2168,13 @@ static int btf_dump_get_enum_value(struct btf_dump *d,
 		*value = *(__s64 *)data;
 		return 0;
 	case 4:
-		*value = *(__s32 *)data;
+		*value = is_signed ? (__s64)*(__s32 *)data : *(__u32 *)data;
 		return 0;
 	case 2:
-		*value = *(__s16 *)data;
+		*value = is_signed ? *(__s16 *)data : *(__u16 *)data;
 		return 0;
 	case 1:
-		*value = *(__s8 *)data;
+		*value = is_signed ? *(__s8 *)data : *(__u8 *)data;
 		return 0;
 	default:
 		pr_warn("unexpected size %d for enum, id:[%u]\n", t->size, id);
@@ -2019,7 +2187,7 @@ static int btf_dump_enum_data(struct btf_dump *d,
 			      __u32 id,
 			      const void *data)
 {
-	const struct btf_enum *e;
+	bool is_signed;
 	__s64 value;
 	int i, err;

@@ -2027,14 +2195,31 @@ static int btf_dump_enum_data(struct btf_dump *d,
 	if (err)
 		return err;

-	for (i = 0, e = btf_enum(t); i < btf_vlen(t); i++, e++) {
-		if (value != e->val)
-			continue;
-		btf_dump_type_values(d, "%s", btf_name_of(d, e->name_off));
-		return 0;
-	}
+	is_signed = btf_kflag(t);
+	if (btf_is_enum(t)) {
+		const struct btf_enum *e;

-	btf_dump_type_values(d, "%d", value);
+		for (i = 0, e = btf_enum(t); i < btf_vlen(t); i++, e++) {
+			if (value != e->val)
+				continue;
+			btf_dump_type_values(d, "%s", btf_name_of(d, e->name_off));
+			return 0;
+		}
+
+		btf_dump_type_values(d, is_signed ? "%d" : "%u", value);
+	} else {
+		const struct btf_enum64 *e;
+
+		for (i = 0, e = btf_enum64(t); i < btf_vlen(t); i++, e++) {
+			if (value != btf_enum64_value(e))
+				continue;
+			btf_dump_type_values(d, "%s", btf_name_of(d, e->name_off));
+			return 0;
+		}
+
+		btf_dump_type_values(d, is_signed ? "%lldLL" : "%lluULL",
+				     (unsigned long long)value);
+	}
 	return 0;
 }

@@ -2065,9 +2250,25 @@ static int btf_dump_type_data_check_overflow(struct btf_dump *d,
 					     const struct btf_type *t,
 					     __u32 id,
 					     const void *data,
-					     __u8 bits_offset)
+					     __u8 bits_offset,
+					     __u8 bit_sz)
 {
-	__s64 size = btf__resolve_size(d->btf, id);
+	__s64 size;
+
+	if (bit_sz) {
+		/* bits_offset is at most 7. bit_sz is at most 128. */
+		__u8 nr_bytes = (bits_offset + bit_sz + 7) / 8;
+
+		/* When bit_sz is non zero, it is called from
+		 * btf_dump_struct_data() where it only cares about
+		 * negative error value.
+		 * Return nr_bytes in success case to make it
+		 * consistent as the regular integer case below.
+		 */
+		return data + nr_bytes > d->typed_dump->data_end ? -E2BIG : nr_bytes;
+	}
+
+	size = btf__resolve_size(d->btf, id);

 	if (size < 0 || size >= INT_MAX) {
 		pr_warn("unexpected size [%zu] for id [%u]\n",
@@ -2094,6 +2295,7 @@ static int btf_dump_type_data_check_overflow(struct btf_dump *d,
 	case BTF_KIND_FLOAT:
 	case BTF_KIND_PTR:
 	case BTF_KIND_ENUM:
+	case BTF_KIND_ENUM64:
 		if (data + bits_offset / 8 + size > d->typed_dump->data_end)
 			return -E2BIG;
 		break;
@@ -2198,6 +2400,7 @@ static int btf_dump_type_data_check_zero(struct btf_dump *d,
 		return -ENODATA;
 	}
 	case BTF_KIND_ENUM:
+	case BTF_KIND_ENUM64:
 		err = btf_dump_get_enum_value(d, t, data, id, &value);
 		if (err)
 			return err;
@@ -2220,7 +2423,7 @@ static int btf_dump_dump_type_data(struct btf_dump *d,
 {
 	int size, err = 0;

-	size = btf_dump_type_data_check_overflow(d, t, id, data, bits_offset);
+	size = btf_dump_type_data_check_overflow(d, t, id, data, bits_offset, bit_sz);
 	if (size < 0)
 		return size;
 	err = btf_dump_type_data_check_zero(d, t, id, data, bits_offset, bit_sz);
@@ -2270,6 +2473,7 @@ static int btf_dump_dump_type_data(struct btf_dump *d,
 		err = btf_dump_struct_data(d, t, id, data);
 		break;
 	case BTF_KIND_ENUM:
+	case BTF_KIND_ENUM64:
 		/* handle bitfield and int enum values */
 		if (bit_sz) {
 			__u64 print_num;
@@ -2320,7 +2524,7 @@ int btf_dump__dump_type_data(struct btf_dump *d, __u32 id,
 	d->typed_dump->indent_lvl = OPTS_GET(opts, indent_level, 0);

 	/* default indent string is a tab */
-	if (!opts->indent_str)
+	if (!OPTS_GET(opts, indent_str, NULL))
 		d->typed_dump->indent_str[0] = '\t';
 	else
 		libbpf_strlcpy(d->typed_dump->indent_str, opts->indent_str,
--- a/src/gen_loader.c
+++ b/src/gen_loader.c
@@ -533,7 +533,7 @@ void bpf_gen__record_attach_target(struct bpf_gen *gen, const char *attach_name,
 	gen->attach_kind = kind;
 	ret = snprintf(gen->attach_target, sizeof(gen->attach_target), "%s%s",
 		       prefix, attach_name);
-	if (ret == sizeof(gen->attach_target))
+	if (ret >= sizeof(gen->attach_target))
 		gen->error = -ENOSPC;
 }

@@ -560,7 +560,7 @@ static void emit_find_attach_target(struct bpf_gen *gen)
 }

 void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, bool is_weak,
-			    bool is_typeless, int kind, int insn_idx)
+			    bool is_typeless, bool is_ld64, int kind, int insn_idx)
 {
 	struct ksym_relo_desc *relo;

@@ -574,6 +574,7 @@ void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, bool is_weak,
 	relo->name = name;
 	relo->is_weak = is_weak;
 	relo->is_typeless = is_typeless;
+	relo->is_ld64 = is_ld64;
 	relo->kind = kind;
 	relo->insn_idx = insn_idx;
 	gen->relo_cnt++;
@@ -586,9 +587,11 @@ static struct ksym_desc *get_ksym_desc(struct bpf_gen *gen, struct ksym_relo_des
 	int i;

 	for (i = 0; i < gen->nr_ksyms; i++) {
-		if (!strcmp(gen->ksyms[i].name, relo->name)) {
-			gen->ksyms[i].ref++;
-			return &gen->ksyms[i];
+		kdesc = &gen->ksyms[i];
+		if (kdesc->kind == relo->kind && kdesc->is_ld64 == relo->is_ld64 &&
+		    !strcmp(kdesc->name, relo->name)) {
+			kdesc->ref++;
+			return kdesc;
 		}
 	}
 	kdesc = libbpf_reallocarray(gen->ksyms, gen->nr_ksyms + 1, sizeof(*kdesc));
@@ -603,6 +606,7 @@ static struct ksym_desc *get_ksym_desc(struct bpf_gen *gen, struct ksym_relo_des
 	kdesc->ref = 1;
 	kdesc->off = 0;
 	kdesc->insn = 0;
+	kdesc->is_ld64 = relo->is_ld64;
 	return kdesc;
 }

@@ -699,17 +703,17 @@ static void emit_relo_kfunc_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo
 	/* obtain fd in BPF_REG_9 */
 	emit(gen, BPF_MOV64_REG(BPF_REG_9, BPF_REG_7));
 	emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_9, 32));
-	/* jump to fd_array store if fd denotes module BTF */
+	/* load fd_array slot pointer */
+	emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE,
+					 0, 0, 0, blob_fd_array_off(gen, btf_fd_idx)));
+	/* store BTF fd in slot, 0 for vmlinux */
+	emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_9, 0));
+	/* jump to insn[insn_idx].off store if fd denotes module BTF */
 	emit(gen, BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0, 2));
 	/* set the default value for off */
 	emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), 0));
 	/* skip BTF fd store for vmlinux BTF */
-	emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 4));
-	/* load fd_array slot pointer */
-	emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE,
-					 0, 0, 0, blob_fd_array_off(gen, btf_fd_idx)));
-	/* store BTF fd in slot */
-	emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_9, 0));
+	emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 1));
 	/* store index into insn[insn_idx].off */
 	emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), btf_fd_idx));
 log:
@@ -804,11 +808,13 @@ static void emit_relo_ksym_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo,
 		return;
 	/* try to copy from existing ldimm64 insn */
 	if (kdesc->ref > 1) {
-		move_blob2blob(gen, insn + offsetof(struct bpf_insn, imm), 4,
-			       kdesc->insn + offsetof(struct bpf_insn, imm));
 		move_blob2blob(gen, insn + sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm), 4,
 			       kdesc->insn + sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm));
-		/* jump over src_reg adjustment if imm is not 0, reuse BPF_REG_0 from move_blob2blob */
+		move_blob2blob(gen, insn + offsetof(struct bpf_insn, imm), 4,
+			       kdesc->insn + offsetof(struct bpf_insn, imm));
+		/* jump over src_reg adjustment if imm (btf_id) is not 0, reuse BPF_REG_0 from move_blob2blob
+		 * If btf_id is zero, clear BPF_PSEUDO_BTF_ID flag in src_reg of ld_imm64 insn
+		 */
 		emit(gen, BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3));
 		goto clear_src_reg;
 	}
@@ -831,7 +837,7 @@ static void emit_relo_ksym_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo,
 	emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_7,
 			      sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm)));
 	/* skip src_reg adjustment */
-	emit(gen, BPF_JMP_IMM(BPF_JSGE, BPF_REG_7, 0, 3));
+	emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 3));
 clear_src_reg:
 	/* clear bpf_object__relocate_data's src_reg assignment, otherwise we get a verifier failure */
 	reg_mask = src_reg_mask();
@@ -862,23 +868,17 @@ static void emit_relo(struct bpf_gen *gen, struct ksym_relo_desc *relo, int insn
 {
 	int insn;

-	pr_debug("gen: emit_relo (%d): %s at %d\n", relo->kind, relo->name, relo->insn_idx);
+	pr_debug("gen: emit_relo (%d): %s at %d %s\n",
+		 relo->kind, relo->name, relo->insn_idx, relo->is_ld64 ? "ld64" : "call");
 	insn = insns + sizeof(struct bpf_insn) * relo->insn_idx;
 	emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_8, BPF_PSEUDO_MAP_IDX_VALUE, 0, 0, 0, insn));
-	switch (relo->kind) {
-	case BTF_KIND_VAR:
+	if (relo->is_ld64) {
 		if (relo->is_typeless)
 			emit_relo_ksym_typeless(gen, relo, insn);
 		else
 			emit_relo_ksym_btf(gen, relo, insn);
-		break;
-	case BTF_KIND_FUNC:
+	} else {
 		emit_relo_kfunc_btf(gen, relo, insn);
-		break;
-	default:
-		pr_warn("Unknown relocation kind '%d'\n", relo->kind);
-		gen->error = -EDOM;
-		return;
 	}
 }

@@ -901,18 +901,20 @@ static void cleanup_core_relo(struct bpf_gen *gen)

 static void cleanup_relos(struct bpf_gen *gen, int insns)
 {
+	struct ksym_desc *kdesc;
 	int i, insn;

 	for (i = 0; i < gen->nr_ksyms; i++) {
+		kdesc = &gen->ksyms[i];
 		/* only close fds for typed ksyms and kfuncs */
-		if (gen->ksyms[i].kind == BTF_KIND_VAR && !gen->ksyms[i].typeless) {
+		if (kdesc->is_ld64 && !kdesc->typeless) {
 			/* close fd recorded in insn[insn_idx + 1].imm */
-			insn = gen->ksyms[i].insn;
+			insn = kdesc->insn;
 			insn += sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm);
 			emit_sys_close_blob(gen, insn);
-		} else if (gen->ksyms[i].kind == BTF_KIND_FUNC) {
-			emit_sys_close_blob(gen, blob_fd_array_off(gen, gen->ksyms[i].off));
-			if (gen->ksyms[i].off < MAX_FD_ARRAY_SZ)
+		} else if (!kdesc->is_ld64) {
+			emit_sys_close_blob(gen, blob_fd_array_off(gen, kdesc->off));
+			if (kdesc->off < MAX_FD_ARRAY_SZ)
 				gen->nr_fd_array--;
 		}
 	}
@@ -1043,18 +1045,27 @@ void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *pvalue,
 	value = add_data(gen, pvalue, value_size);
 	key = add_data(gen, &zero, sizeof(zero));

-	/* if (map_desc[map_idx].initial_value)
-	 *    copy_from_user(value, initial_value, value_size);
+	/* if (map_desc[map_idx].initial_value) {
+	 *    if (ctx->flags & BPF_SKEL_KERNEL)
+	 *        bpf_probe_read_kernel(value, value_size, initial_value);
+	 *    else
+	 *        bpf_copy_from_user(value, value_size, initial_value);
+	 * }
 	 */
 	emit(gen, BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_6,
 			      sizeof(struct bpf_loader_ctx) +
 			      sizeof(struct bpf_map_desc) * map_idx +
 			      offsetof(struct bpf_map_desc, initial_value)));
-	emit(gen, BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0, 4));
+	emit(gen, BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0, 8));
 	emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE,
 					 0, 0, 0, value));
 	emit(gen, BPF_MOV64_IMM(BPF_REG_2, value_size));
+	emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6,
+			      offsetof(struct bpf_loader_ctx, flags)));
+	emit(gen, BPF_JMP_IMM(BPF_JSET, BPF_REG_0, BPF_SKEL_KERNEL, 2));
 	emit(gen, BPF_EMIT_CALL(BPF_FUNC_copy_from_user));
+	emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 1));
+	emit(gen, BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel));

 	map_update_attr = add_data(gen, &attr, attr_size);
 	move_blob2blob(gen, attr_field(map_update_attr, map_fd), 4,
--- a/src/hashmap.c
+++ b/src/hashmap.c
@@ -128,7 +128,7 @@ static int hashmap_grow(struct hashmap *map)
 }

 static bool hashmap_find_entry(const struct hashmap *map,
-			       const void *key, size_t hash,
+			       const long key, size_t hash,
 			       struct hashmap_entry ***pprev,
 			       struct hashmap_entry **entry)
 {
@@ -151,18 +151,18 @@ static bool hashmap_find_entry(const struct hashmap *map,
 	return false;
 }

-int hashmap__insert(struct hashmap *map, const void *key, void *value,
-		    enum hashmap_insert_strategy strategy,
-		    const void **old_key, void **old_value)
+int hashmap_insert(struct hashmap *map, long key, long value,
+		   enum hashmap_insert_strategy strategy,
+		   long *old_key, long *old_value)
 {
 	struct hashmap_entry *entry;
 	size_t h;
 	int err;

 	if (old_key)
-		*old_key = NULL;
+		*old_key = 0;
 	if (old_value)
-		*old_value = NULL;
+		*old_value = 0;

 	h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
 	if (strategy != HASHMAP_APPEND &&
@@ -203,7 +203,7 @@ int hashmap__insert(struct hashmap *map, const void *key, void *value,
 	return 0;
 }

-bool hashmap__find(const struct hashmap *map, const void *key, void **value)
+bool hashmap_find(const struct hashmap *map, long key, long *value)
 {
 	struct hashmap_entry *entry;
 	size_t h;
@@ -217,8 +217,8 @@ bool hashmap__find(const struct hashmap *map, const void *key, void **value)
 	return true;
 }

-bool hashmap__delete(struct hashmap *map, const void *key,
-		     const void **old_key, void **old_value)
+bool hashmap_delete(struct hashmap *map, long key,
+		    long *old_key, long *old_value)
 {
 	struct hashmap_entry **pprev, *entry;
 	size_t h;
--- a/src/hashmap.h
+++ b/src/hashmap.h
@@ -40,12 +40,32 @@ static inline size_t str_hash(const char *s)
 	return h;
 }

-typedef size_t (*hashmap_hash_fn)(const void *key, void *ctx);
-typedef bool (*hashmap_equal_fn)(const void *key1, const void *key2, void *ctx);
+typedef size_t (*hashmap_hash_fn)(long key, void *ctx);
+typedef bool (*hashmap_equal_fn)(long key1, long key2, void *ctx);

+/*
+ * Hashmap interface is polymorphic, keys and values could be either
+ * long-sized integers or pointers, this is achieved as follows:
+ * - interface functions that operate on keys and values are hidden
+ *   behind auxiliary macros, e.g. hashmap_insert <-> hashmap__insert;
+ * - these auxiliary macros cast the key and value parameters as
+ *   long or long *, so the user does not have to specify the casts explicitly;
+ * - for pointer parameters (e.g. old_key) the size of the pointed
+ *   type is verified by hashmap_cast_ptr using _Static_assert;
+ * - when iterating using hashmap__for_each_* forms
+ *   hasmap_entry->key should be used for integer keys and
+ *   hasmap_entry->pkey should be used for pointer keys,
+ *   same goes for values.
+ */
 struct hashmap_entry {
-	const void *key;
-	void *value;
+	union {
+		long key;
+		const void *pkey;
+	};
+	union {
+		long value;
+		void *pvalue;
+	};
 	struct hashmap_entry *next;
 };

@@ -102,6 +122,13 @@ enum hashmap_insert_strategy {
 	HASHMAP_APPEND,
 };

+#define hashmap_cast_ptr(p) ({								\
+	_Static_assert((__builtin_constant_p((p)) ? (p) == NULL : 0) ||			\
+				sizeof(*(p)) == sizeof(long),				\
+		       #p " pointee should be a long-sized integer or a pointer");	\
+	(long *)(p);									\
+})
+
 /*
 * hashmap__insert() adds key/value entry w/ various semantics, depending on
 * provided strategy value. If a given key/value pair replaced already
@@ -109,42 +136,38 @@ enum hashmap_insert_strategy {
 * through old_key and old_value to allow calling code do proper memory
 * management.
 */
-int hashmap__insert(struct hashmap *map, const void *key, void *value,
-		    enum hashmap_insert_strategy strategy,
-		    const void **old_key, void **old_value);
+int hashmap_insert(struct hashmap *map, long key, long value,
+		   enum hashmap_insert_strategy strategy,
+		   long *old_key, long *old_value);

-static inline int hashmap__add(struct hashmap *map,
-			       const void *key, void *value)
-{
-	return hashmap__insert(map, key, value, HASHMAP_ADD, NULL, NULL);
-}
+#define hashmap__insert(map, key, value, strategy, old_key, old_value) \
+	hashmap_insert((map), (long)(key), (long)(value), (strategy),  \
+		       hashmap_cast_ptr(old_key),		       \
+		       hashmap_cast_ptr(old_value))

-static inline int hashmap__set(struct hashmap *map,
-			       const void *key, void *value,
-			       const void **old_key, void **old_value)
-{
-	return hashmap__insert(map, key, value, HASHMAP_SET,
-			       old_key, old_value);
-}
+#define hashmap__add(map, key, value) \
+	hashmap__insert((map), (key), (value), HASHMAP_ADD, NULL, NULL)

-static inline int hashmap__update(struct hashmap *map,
-				  const void *key, void *value,
-				  const void **old_key, void **old_value)
-{
-	return hashmap__insert(map, key, value, HASHMAP_UPDATE,
-			       old_key, old_value);
-}
+#define hashmap__set(map, key, value, old_key, old_value) \
+	hashmap__insert((map), (key), (value), HASHMAP_SET, (old_key), (old_value))

-static inline int hashmap__append(struct hashmap *map,
-				  const void *key, void *value)
-{
-	return hashmap__insert(map, key, value, HASHMAP_APPEND, NULL, NULL);
-}
+#define hashmap__update(map, key, value, old_key, old_value) \
+	hashmap__insert((map), (key), (value), HASHMAP_UPDATE, (old_key), (old_value))

-bool hashmap__delete(struct hashmap *map, const void *key,
-		     const void **old_key, void **old_value);
+#define hashmap__append(map, key, value) \
+	hashmap__insert((map), (key), (value), HASHMAP_APPEND, NULL, NULL)

-bool hashmap__find(const struct hashmap *map, const void *key, void **value);
+bool hashmap_delete(struct hashmap *map, long key, long *old_key, long *old_value);
+
+#define hashmap__delete(map, key, old_key, old_value)		       \
+	hashmap_delete((map), (long)(key),			       \
+		       hashmap_cast_ptr(old_key),		       \
+		       hashmap_cast_ptr(old_value))
+
+bool hashmap_find(const struct hashmap *map, long key, long *value);
+
+#define hashmap__find(map, key, value) \
+	hashmap_find((map), (long)(key), hashmap_cast_ptr(value))

 /*
 * hashmap__for_each_entry - iterate over all entries in hashmap
--- a/src/libbpf.c
+++ b/src/libbpf.c
--- a/src/libbpf.h
+++ b/src/libbpf.h
--- a/src/libbpf.map
+++ b/src/libbpf.map
@@ -1,29 +1,14 @@
 LIBBPF_0.0.1 {
 	global:
 		bpf_btf_get_fd_by_id;
-		bpf_create_map;
-		bpf_create_map_in_map;
-		bpf_create_map_in_map_node;
-		bpf_create_map_name;
-		bpf_create_map_node;
-		bpf_create_map_xattr;
-		bpf_load_btf;
-		bpf_load_program;
-		bpf_load_program_xattr;
 		bpf_map__btf_key_type_id;
 		bpf_map__btf_value_type_id;
-		bpf_map__def;
 		bpf_map__fd;
-		bpf_map__is_offload_neutral;
 		bpf_map__name;
-		bpf_map__next;
 		bpf_map__pin;
-		bpf_map__prev;
-		bpf_map__priv;
 		bpf_map__reuse_fd;
 		bpf_map__set_ifindex;
 		bpf_map__set_inner_map_fd;
-		bpf_map__set_priv;
 		bpf_map__unpin;
 		bpf_map_delete_elem;
 		bpf_map_get_fd_by_id;
@@ -38,79 +23,37 @@ LIBBPF_0.0.1 {
 		bpf_object__btf_fd;
 		bpf_object__close;
 		bpf_object__find_map_by_name;
-		bpf_object__find_map_by_offset;
-		bpf_object__find_program_by_title;
 		bpf_object__kversion;
 		bpf_object__load;
 		bpf_object__name;
-		bpf_object__next;
 		bpf_object__open;
-		bpf_object__open_buffer;
-		bpf_object__open_xattr;
 		bpf_object__pin;
 		bpf_object__pin_maps;
 		bpf_object__pin_programs;
-		bpf_object__priv;
-		bpf_object__set_priv;
-		bpf_object__unload;
 		bpf_object__unpin_maps;
 		bpf_object__unpin_programs;
-		bpf_perf_event_read_simple;
 		bpf_prog_attach;
 		bpf_prog_detach;
 		bpf_prog_detach2;
 		bpf_prog_get_fd_by_id;
 		bpf_prog_get_next_id;
-		bpf_prog_load;
-		bpf_prog_load_xattr;
 		bpf_prog_query;
-		bpf_prog_test_run;
-		bpf_prog_test_run_xattr;
 		bpf_program__fd;
-		bpf_program__is_kprobe;
-		bpf_program__is_perf_event;
-		bpf_program__is_raw_tracepoint;
-		bpf_program__is_sched_act;
-		bpf_program__is_sched_cls;
-		bpf_program__is_socket_filter;
-		bpf_program__is_tracepoint;
-		bpf_program__is_xdp;
-		bpf_program__load;
-		bpf_program__next;
-		bpf_program__nth_fd;
 		bpf_program__pin;
-		bpf_program__pin_instance;
-		bpf_program__prev;
-		bpf_program__priv;
 		bpf_program__set_expected_attach_type;
 		bpf_program__set_ifindex;
-		bpf_program__set_kprobe;
-		bpf_program__set_perf_event;
-		bpf_program__set_prep;
-		bpf_program__set_priv;
-		bpf_program__set_raw_tracepoint;
-		bpf_program__set_sched_act;
-		bpf_program__set_sched_cls;
-		bpf_program__set_socket_filter;
-		bpf_program__set_tracepoint;
 		bpf_program__set_type;
-		bpf_program__set_xdp;
-		bpf_program__title;
 		bpf_program__unload;
 		bpf_program__unpin;
-		bpf_program__unpin_instance;
 		bpf_prog_linfo__free;
 		bpf_prog_linfo__new;
 		bpf_prog_linfo__lfind_addr_func;
 		bpf_prog_linfo__lfind;
 		bpf_raw_tracepoint_open;
-		bpf_set_link_xdp_fd;
 		bpf_task_fd_query;
-		bpf_verify_program;
 		btf__fd;
 		btf__find_by_name;
 		btf__free;
-		btf__get_from_id;
 		btf__name_by_offset;
 		btf__new;
 		btf__resolve_size;
@@ -127,48 +70,24 @@ LIBBPF_0.0.1 {

 LIBBPF_0.0.2 {
 	global:
-		bpf_probe_helper;
-		bpf_probe_map_type;
-		bpf_probe_prog_type;
-		bpf_map__resize;
 		bpf_map_lookup_elem_flags;
 		bpf_object__btf;
 		bpf_object__find_map_fd_by_name;
-		bpf_get_link_xdp_id;
-		btf__dedup;
-		btf__get_map_kv_tids;
-		btf__get_nr_types;
 		btf__get_raw_data;
-		btf__load;
 		btf_ext__free;
-		btf_ext__func_info_rec_size;
 		btf_ext__get_raw_data;
-		btf_ext__line_info_rec_size;
 		btf_ext__new;
-		btf_ext__reloc_func_info;
-		btf_ext__reloc_line_info;
-		xsk_umem__create;
-		xsk_socket__create;
-		xsk_umem__delete;
-		xsk_socket__delete;
-		xsk_umem__fd;
-		xsk_socket__fd;
-		bpf_program__get_prog_info_linear;
-		bpf_program__bpil_addr_to_offs;
-		bpf_program__bpil_offs_to_addr;
 } LIBBPF_0.0.1;

 LIBBPF_0.0.3 {
 	global:
 		bpf_map__is_internal;
 		bpf_map_freeze;
-		btf__finalize_data;
 } LIBBPF_0.0.2;

 LIBBPF_0.0.4 {
 	global:
 		bpf_link__destroy;
-		bpf_object__load_xattr;
 		bpf_program__attach_kprobe;
 		bpf_program__attach_perf_event;
 		bpf_program__attach_raw_tracepoint;
@@ -176,14 +95,10 @@ LIBBPF_0.0.4 {
 		bpf_program__attach_uprobe;
 		btf_dump__dump_type;
 		btf_dump__free;
-		btf_dump__new;
 		btf__parse_elf;
 		libbpf_num_possible_cpus;
 		perf_buffer__free;
-		perf_buffer__new;
-		perf_buffer__new_raw;
 		perf_buffer__poll;
-		xsk_umem__create;
 } LIBBPF_0.0.3;

 LIBBPF_0.0.5 {
@@ -193,7 +108,6 @@ LIBBPF_0.0.5 {

 LIBBPF_0.0.6 {
 	global:
-		bpf_get_link_xdp_info;
 		bpf_map__get_pin_path;
 		bpf_map__is_pinned;
 		bpf_map__set_pin_path;
@@ -202,9 +116,6 @@ LIBBPF_0.0.6 {
 		bpf_program__attach_trace;
 		bpf_program__get_expected_attach_type;
 		bpf_program__get_type;
-		bpf_program__is_tracing;
-		bpf_program__set_tracing;
-		bpf_program__size;
 		btf__find_by_name_kind;
 		libbpf_find_vmlinux_btf_id;
 } LIBBPF_0.0.5;
@@ -224,14 +135,8 @@ LIBBPF_0.0.7 {
 		bpf_object__detach_skeleton;
 		bpf_object__load_skeleton;
 		bpf_object__open_skeleton;
-		bpf_probe_large_insn_limit;
-		bpf_prog_attach_xattr;
 		bpf_program__attach;
 		bpf_program__name;
-		bpf_program__is_extension;
-		bpf_program__is_struct_ops;
-		bpf_program__set_extension;
-		bpf_program__set_struct_ops;
 		btf__align_of;
 		libbpf_find_kernel_btf;
 } LIBBPF_0.0.6;
@@ -250,10 +155,7 @@ LIBBPF_0.0.8 {
 		bpf_prog_attach_opts;
 		bpf_program__attach_cgroup;
 		bpf_program__attach_lsm;
-		bpf_program__is_lsm;
 		bpf_program__set_attach_target;
-		bpf_program__set_lsm;
-		bpf_set_link_xdp_fd_opts;
 } LIBBPF_0.0.7;

 LIBBPF_0.0.9 {
@@ -291,9 +193,7 @@ LIBBPF_0.1.0 {
 		bpf_map__value_size;
 		bpf_program__attach_xdp;
 		bpf_program__autoload;
-		bpf_program__is_sk_lookup;
 		bpf_program__set_autoload;
-		bpf_program__set_sk_lookup;
 		btf__parse;
 		btf__parse_raw;
 		btf__pointer_size;
@@ -336,7 +236,6 @@ LIBBPF_0.2.0 {
 		perf_buffer__buffer_fd;
 		perf_buffer__epoll_fd;
 		perf_buffer__consume_buffer;
-		xsk_socket__create_shared;
 } LIBBPF_0.1.0;

 LIBBPF_0.3.0 {
@@ -348,8 +247,6 @@ LIBBPF_0.3.0 {
 		btf__new_empty_split;
 		btf__new_split;
 		ring_buffer__epoll_fd;
-		xsk_setup_xdp_prog;
-		xsk_socket__update_xskmap;
 } LIBBPF_0.2.0;

 LIBBPF_0.4.0 {
@@ -397,7 +294,6 @@ LIBBPF_0.6.0 {
 		bpf_object__next_program;
 		bpf_object__prev_map;
 		bpf_object__prev_program;
-		bpf_prog_load_deprecated;
 		bpf_prog_load;
 		bpf_program__flags;
 		bpf_program__insn_cnt;
@@ -407,18 +303,14 @@ LIBBPF_0.6.0 {
 		btf__add_decl_tag;
 		btf__add_type_tag;
 		btf__dedup;
-		btf__dedup_deprecated;
 		btf__raw_data;
 		btf__type_cnt;
 		btf_dump__new;
-		btf_dump__new_deprecated;
 		libbpf_major_version;
 		libbpf_minor_version;
 		libbpf_version_string;
 		perf_buffer__new;
-		perf_buffer__new_deprecated;
 		perf_buffer__new_raw;
-		perf_buffer__new_raw_deprecated;
 } LIBBPF_0.5.0;

 LIBBPF_0.7.0 {
@@ -434,8 +326,74 @@ LIBBPF_0.7.0 {
 		bpf_xdp_detach;
 		bpf_xdp_query;
 		bpf_xdp_query_id;
+		btf_ext__raw_data;
 		libbpf_probe_bpf_helper;
 		libbpf_probe_bpf_map_type;
 		libbpf_probe_bpf_prog_type;
-		libbpf_set_memlock_rlim_max;
-};
+		libbpf_set_memlock_rlim;
+} LIBBPF_0.6.0;
+
+LIBBPF_0.8.0 {
+	global:
+		bpf_map__autocreate;
+		bpf_map__get_next_key;
+		bpf_map__delete_elem;
+		bpf_map__lookup_and_delete_elem;
+		bpf_map__lookup_elem;
+		bpf_map__set_autocreate;
+		bpf_map__update_elem;
+		bpf_map_delete_elem_flags;
+		bpf_object__destroy_subskeleton;
+		bpf_object__open_subskeleton;
+		bpf_program__attach_kprobe_multi_opts;
+		bpf_program__attach_trace_opts;
+		bpf_program__attach_usdt;
+		bpf_program__set_insns;
+		libbpf_register_prog_handler;
+		libbpf_unregister_prog_handler;
+} LIBBPF_0.7.0;
+
+LIBBPF_1.0.0 {
+	global:
+		bpf_obj_get_opts;
+		bpf_prog_query_opts;
+		bpf_program__attach_ksyscall;
+		bpf_program__autoattach;
+		bpf_program__set_autoattach;
+		btf__add_enum64;
+		btf__add_enum64_value;
+		libbpf_bpf_attach_type_str;
+		libbpf_bpf_link_type_str;
+		libbpf_bpf_map_type_str;
+		libbpf_bpf_prog_type_str;
+		perf_buffer__buffer;
+} LIBBPF_0.8.0;
+
+LIBBPF_1.1.0 {
+	global:
+		bpf_btf_get_fd_by_id_opts;
+		bpf_link_get_fd_by_id_opts;
+		bpf_map_get_fd_by_id_opts;
+		bpf_prog_get_fd_by_id_opts;
+		user_ring_buffer__discard;
+		user_ring_buffer__free;
+		user_ring_buffer__new;
+		user_ring_buffer__reserve;
+		user_ring_buffer__reserve_blocking;
+		user_ring_buffer__submit;
+} LIBBPF_1.0.0;
+
+LIBBPF_1.2.0 {
+	global:
+		bpf_btf_get_info_by_fd;
+		bpf_link__update_map;
+		bpf_link_get_info_by_fd;
+		bpf_map_get_info_by_fd;
+		bpf_prog_get_info_by_fd;
+} LIBBPF_1.1.0;
+
+LIBBPF_1.3.0 {
+	global:
+		bpf_obj_pin_opts;
+		bpf_program__attach_netfilter;
+} LIBBPF_1.2.0;
--- a/src/libbpf_common.h
+++ b/src/libbpf_common.h
@@ -30,20 +30,10 @@
 /* Add checks for other versions below when planning deprecation of API symbols
 * with the LIBBPF_DEPRECATED_SINCE macro.
 */
-#if __LIBBPF_CURRENT_VERSION_GEQ(0, 6)
-#define __LIBBPF_MARK_DEPRECATED_0_6(X) X
+#if __LIBBPF_CURRENT_VERSION_GEQ(1, 0)
+#define __LIBBPF_MARK_DEPRECATED_1_0(X) X
 #else
-#define __LIBBPF_MARK_DEPRECATED_0_6(X)
-#endif
-#if __LIBBPF_CURRENT_VERSION_GEQ(0, 7)
-#define __LIBBPF_MARK_DEPRECATED_0_7(X) X
-#else
-#define __LIBBPF_MARK_DEPRECATED_0_7(X)
-#endif
-#if __LIBBPF_CURRENT_VERSION_GEQ(0, 8)
-#define __LIBBPF_MARK_DEPRECATED_0_8(X) X
-#else
-#define __LIBBPF_MARK_DEPRECATED_0_8(X)
+#define __LIBBPF_MARK_DEPRECATED_1_0(X)
 #endif

 /* This set of internal macros allows to do "function overloading" based on
--- a/src/libbpf_errno.c
+++ b/src/libbpf_errno.c
@@ -39,14 +39,14 @@ static const char *libbpf_strerror_table[NR_ERRNO] = {

 int libbpf_strerror(int err, char *buf, size_t size)
 {
+	int ret;
+
 	if (!buf || !size)
 		return libbpf_err(-EINVAL);

 	err = err > 0 ? err : -err;

 	if (err < __LIBBPF_ERRNO__START) {
-		int ret;
-
 		ret = strerror_r(err, buf, size);
 		buf[size - 1] = '\0';
 		return libbpf_err_errno(ret);
@@ -56,12 +56,20 @@ int libbpf_strerror(int err, char *buf, size_t size)
 		const char *msg;

 		msg = libbpf_strerror_table[ERRNO_OFFSET(err)];
-		snprintf(buf, size, "%s", msg);
+		ret = snprintf(buf, size, "%s", msg);
 		buf[size - 1] = '\0';
+		/* The length of the buf and msg is positive.
+		 * A negative number may be returned only when the
+		 * size exceeds INT_MAX. Not likely to appear.
+		 */
+		if (ret >= size)
+			return libbpf_err(-ERANGE);
 		return 0;
 	}

-	snprintf(buf, size, "Unknown libbpf error %d", err);
+	ret = snprintf(buf, size, "Unknown libbpf error %d", err);
 	buf[size - 1] = '\0';
+	if (ret >= size)
+		return libbpf_err(-ERANGE);
 	return libbpf_err(-ENOENT);
 }
--- a/src/libbpf_internal.h
+++ b/src/libbpf_internal.h
@@ -15,7 +15,6 @@
 #include <linux/err.h>
 #include <fcntl.h>
 #include <unistd.h>
-#include "libbpf_legacy.h"
 #include "relo_core.h"

 /* make sure libbpf doesn't use kernel-only integer typedefs */
@@ -103,6 +102,17 @@
 #define str_has_pfx(str, pfx) \
 	(strncmp(str, pfx, __builtin_constant_p(pfx) ? sizeof(pfx) - 1 : strlen(pfx)) == 0)

+/* suffix check */
+static inline bool str_has_sfx(const char *str, const char *sfx)
+{
+	size_t str_len = strlen(str);
+	size_t sfx_len = strlen(sfx);
+
+	if (sfx_len > str_len)
+		return false;
+	return strcmp(str + str_len - sfx_len, sfx) == 0;
+}
+
 /* Symbol versioning is different between static and shared library.
 * Properly versioned symbols are needed for shared library, but
 * only the symbol of the new version is needed for static library.
@@ -148,6 +158,15 @@ do {				\
 #ifndef __has_builtin
 #define __has_builtin(x) 0
 #endif
+
+struct bpf_link {
+	int (*detach)(struct bpf_link *link);
+	void (*dealloc)(struct bpf_link *link);
+	char *pin_path;		/* NULL, if not pinned */
+	int fd;			/* hook FD, -1 if not applicable */
+	bool disconnected;
+};
+
 /*
 * Re-implement glibc's reallocarray() for libbpf internal-only use.
 * reallocarray(), unfortunately, is not available in all versions of glibc,
@@ -329,6 +348,12 @@ enum kern_feature_id {
 	FEAT_BTF_TYPE_TAG,
 	/* memcg-based accounting for BPF maps and progs */
 	FEAT_MEMCG_ACCOUNT,
+	/* BPF cookie (bpf_get_attach_cookie() BPF helper) support */
+	FEAT_BPF_COOKIE,
+	/* BTF_KIND_ENUM64 support and BTF_KIND_ENUM kflag support */
+	FEAT_BTF_ENUM64,
+	/* Kernel uses syscall wrapper (CONFIG_ARCH_HAS_SYSCALL_WRAPPER) */
+	FEAT_SYSCALL_WRAPPER,
 	__FEAT_CNT,
 };

@@ -354,6 +379,13 @@ struct btf_ext_info {
 	void *info;
 	__u32 rec_size;
 	__u32 len;
+	/* optional (maintained internally by libbpf) mapping between .BTF.ext
+	 * section and corresponding ELF section. This is used to join
+	 * information like CO-RE relocation records with corresponding BPF
+	 * programs defined in ELF sections
+	 */
+	__u32 *sec_idxs;
+	int sec_cnt;
 };

 #define for_each_btf_ext_sec(seg, sec)					\
@@ -447,7 +479,10 @@ int btf_ext_visit_str_offs(struct btf_ext *btf_ext, str_off_visit_fn visit, void
 __s32 btf__find_by_name_kind_own(const struct btf *btf, const char *type_name,
 				 __u32 kind);

-extern enum libbpf_strict_mode libbpf_mode;
+typedef int (*kallsyms_cb_t)(unsigned long long sym_addr, char sym_type,
+			     const char *sym_name, void *ctx);
+
+int libbpf_kallsyms_parse(kallsyms_cb_t cb, void *arg);

 /* handle direct returned errors */
 static inline int libbpf_err(int ret)
@@ -462,12 +497,8 @@ static inline int libbpf_err(int ret)
 */
 static inline int libbpf_err_errno(int ret)
 {
-	if (libbpf_mode & LIBBPF_STRICT_DIRECT_ERRS)
-		/* errno is already assumed to be set on error */
-		return ret < 0 ? -errno : ret;
-
-	/* legacy: on error return -1 directly and don't touch errno */
-	return ret;
+	/* errno is already assumed to be set on error */
+	return ret < 0 ? -errno : ret;
 }

 /* handle error for pointer-returning APIs, err is assumed to be < 0 always */
@@ -475,12 +506,7 @@ static inline void *libbpf_err_ptr(int err)
 {
 	/* set errno on error, this doesn't break anything */
 	errno = -err;
-
-	if (libbpf_mode & LIBBPF_STRICT_CLEAN_PTRS)
-		return NULL;
-
-	/* legacy: encode err as ptr */
-	return ERR_PTR(err);
+	return NULL;
 }

 /* handle pointer-returning APIs' error handling */
@@ -490,11 +516,7 @@ static inline void *libbpf_ptr(void *ret)
 	if (IS_ERR(ret))
 		errno = -PTR_ERR(ret);

-	if (libbpf_mode & LIBBPF_STRICT_CLEAN_PTRS)
-		return IS_ERR(ret) ? NULL : ret;
-
-	/* legacy: pass-through original pointer */
-	return ret;
+	return IS_ERR(ret) ? NULL : ret;
 }

 static inline bool str_is_empty(const char *s)
@@ -521,6 +543,7 @@ static inline int ensure_good_fd(int fd)
 		fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
 		saved_errno = errno;
 		close(old_fd);
+		errno = saved_errno;
 		if (fd < 0) {
 			pr_warn("failed to dup FD %d to FD > 2: %d\n", old_fd, -saved_errno);
 			errno = saved_errno;
@@ -529,4 +552,29 @@ static inline int ensure_good_fd(int fd)
 	return fd;
 }

+/* The following two functions are exposed to bpftool */
+int bpf_core_add_cands(struct bpf_core_cand *local_cand,
+		       size_t local_essent_len,
+		       const struct btf *targ_btf,
+		       const char *targ_btf_name,
+		       int targ_start_id,
+		       struct bpf_core_cand_list *cands);
+void bpf_core_free_cands(struct bpf_core_cand_list *cands);
+
+struct usdt_manager *usdt_manager_new(struct bpf_object *obj);
+void usdt_manager_free(struct usdt_manager *man);
+struct bpf_link * usdt_manager_attach_usdt(struct usdt_manager *man,
+					   const struct bpf_program *prog,
+					   pid_t pid, const char *path,
+					   const char *usdt_provider, const char *usdt_name,
+					   __u64 usdt_cookie);
+
+static inline bool is_pow_of_2(size_t x)
+{
+	return x && (x & (x - 1)) == 0;
+}
+
+#define PROG_LOAD_ATTEMPTS 5
+int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int attempts);
+
 #endif /* __LIBBPF_LIBBPF_INTERNAL_H */
--- a/src/libbpf_legacy.h
+++ b/src/libbpf_legacy.h
@@ -20,6 +20,11 @@
 extern "C" {
 #endif

+/* As of libbpf 1.0 libbpf_set_strict_mode() and enum libbpf_struct_mode have
+ * no effect. But they are left in libbpf_legacy.h so that applications that
+ * prepared for libbpf 1.0 before final release by using
+ * libbpf_set_strict_mode() still work with libbpf 1.0+ without any changes.
+ */
 enum libbpf_strict_mode {
 	/* Turn on all supported strict features of libbpf to simulate libbpf
 	 * v1.0 behavior.
@@ -54,6 +59,10 @@ enum libbpf_strict_mode {
 	 *
 	 * Note, in this mode the program pin path will be based on the
 	 * function name instead of section name.
+	 *
+	 * Additionally, routines in the .text section are always considered
+	 * sub-programs. Legacy behavior allows for a single routine in .text
+	 * to be a program.
 	 */
 	LIBBPF_STRICT_SEC_NAME = 0x04,
 	/*
@@ -67,8 +76,8 @@ enum libbpf_strict_mode {
 	 * first BPF program or map creation operation. This is done only if
 	 * kernel is too old to support memcg-based memory accounting for BPF
 	 * subsystem. By default, RLIMIT_MEMLOCK limit is set to RLIM_INFINITY,
-	 * but it can be overriden with libbpf_set_memlock_rlim_max() API.
-	 * Note that libbpf_set_memlock_rlim_max() needs to be called before
+	 * but it can be overriden with libbpf_set_memlock_rlim() API.
+	 * Note that libbpf_set_memlock_rlim() needs to be called before
 	 * the very first bpf_prog_load(), bpf_map_create() or bpf_object__load()
 	 * operation.
 	 */
@@ -84,6 +93,25 @@ enum libbpf_strict_mode {

 LIBBPF_API int libbpf_set_strict_mode(enum libbpf_strict_mode mode);

+/**
+ * @brief **libbpf_get_error()** extracts the error code from the passed
+ * pointer
+ * @param ptr pointer returned from libbpf API function
+ * @return error code; or 0 if no error occured
+ *
+ * Note, as of libbpf 1.0 this function is not necessary and not recommended
+ * to be used. Libbpf doesn't return error code embedded into the pointer
+ * itself. Instead, NULL is returned on error and error code is passed through
+ * thread-local errno variable. **libbpf_get_error()** is just returning -errno
+ * value if it receives NULL, which is correct only if errno hasn't been
+ * modified between libbpf API call and corresponding **libbpf_get_error()**
+ * call. Prefer to check return for NULL and use errno directly.
+ *
+ * This API is left in libbpf 1.0 to allow applications that were 1.0-ready
+ * before final libbpf 1.0 without needing to change them.
+ */
+LIBBPF_API long libbpf_get_error(const void *ptr);
+
 #define DECLARE_LIBBPF_OPTS LIBBPF_OPTS

 /* "Discouraged" APIs which don't follow consistent libbpf naming patterns.
@@ -97,6 +125,8 @@ struct bpf_map;
 struct btf;
 struct btf_ext;

+LIBBPF_API struct btf *libbpf_find_kernel_btf(void);
+
 LIBBPF_API enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog);
 LIBBPF_API enum bpf_attach_type bpf_program__get_expected_attach_type(const struct bpf_program *prog);
 LIBBPF_API const char *bpf_map__get_pin_path(const struct bpf_map *map);
--- a/src/libbpf_probes.c
+++ b/src/libbpf_probes.c
@@ -12,52 +12,106 @@
 #include <linux/btf.h>
 #include <linux/filter.h>
 #include <linux/kernel.h>
+#include <linux/version.h>

 #include "bpf.h"
 #include "libbpf.h"
 #include "libbpf_internal.h"

-static bool grep(const char *buffer, const char *pattern)
+/* On Ubuntu LINUX_VERSION_CODE doesn't correspond to info.release,
+ * but Ubuntu provides /proc/version_signature file, as described at
+ * https://ubuntu.com/kernel, with an example contents below, which we
+ * can use to get a proper LINUX_VERSION_CODE.
+ *
+ *   Ubuntu 5.4.0-12.15-generic 5.4.8
+ *
+ * In the above, 5.4.8 is what kernel is actually expecting, while
+ * uname() call will return 5.4.0 in info.release.
+ */
+static __u32 get_ubuntu_kernel_version(void)
 {
-	return !!strstr(buffer, pattern);
+	const char *ubuntu_kver_file = "/proc/version_signature";
+	__u32 major, minor, patch;
+	int ret;
+	FILE *f;
+
+	if (faccessat(AT_FDCWD, ubuntu_kver_file, R_OK, AT_EACCESS) != 0)
+		return 0;
+
+	f = fopen(ubuntu_kver_file, "re");
+	if (!f)
+		return 0;
+
+	ret = fscanf(f, "%*s %*s %u.%u.%u\n", &major, &minor, &patch);
+	fclose(f);
+	if (ret != 3)
+		return 0;
+
+	return KERNEL_VERSION(major, minor, patch);
 }

-static int get_vendor_id(int ifindex)
+/* On Debian LINUX_VERSION_CODE doesn't correspond to info.release.
+ * Instead, it is provided in info.version. An example content of
+ * Debian 10 looks like the below.
+ *
+ *   utsname::release   4.19.0-22-amd64
+ *   utsname::version   #1 SMP Debian 4.19.260-1 (2022-09-29)
+ *
+ * In the above, 4.19.260 is what kernel is actually expecting, while
+ * uname() call will return 4.19.0 in info.release.
+ */
+static __u32 get_debian_kernel_version(struct utsname *info)
 {
-	char ifname[IF_NAMESIZE], path[64], buf[8];
-	ssize_t len;
-	int fd;
+	__u32 major, minor, patch;
+	char *p;

-	if (!if_indextoname(ifindex, ifname))
-		return -1;
+	p = strstr(info->version, "Debian ");
+	if (!p) {
+		/* This is not a Debian kernel. */
+		return 0;
+	}

-	snprintf(path, sizeof(path), "/sys/class/net/%s/device/vendor", ifname);
+	if (sscanf(p, "Debian %u.%u.%u", &major, &minor, &patch) != 3)
+		return 0;

-	fd = open(path, O_RDONLY | O_CLOEXEC);
-	if (fd < 0)
-		return -1;
+	// Patch to run on Debian 10
+	if (major == 4 && minor == 19)
+                return KERNEL_VERSION(major, minor, 255);

-	len = read(fd, buf, sizeof(buf));
-	close(fd);
-	if (len < 0)
-		return -1;
-	if (len >= (ssize_t)sizeof(buf))
-		return -1;
-	buf[len] = '\0';
+	return KERNEL_VERSION(major, minor, patch);
+}

-	return strtol(buf, NULL, 0);
+__u32 get_kernel_version(void)
+{
+	__u32 major, minor, patch, version;
+	struct utsname info;
+
+	/* Check if this is an Ubuntu kernel. */
+	version = get_ubuntu_kernel_version();
+	if (version != 0)
+		return version;
+
+	uname(&info);
+
+	/* Check if this is a Debian kernel. */
+	version = get_debian_kernel_version(&info);
+	if (version != 0)
+		return version;
+
+	if (sscanf(info.release, "%u.%u.%u", &major, &minor, &patch) != 3)
+		return 0;
+
+	return KERNEL_VERSION(major, minor, patch);
 }

 static int probe_prog_load(enum bpf_prog_type prog_type,
 			   const struct bpf_insn *insns, size_t insns_cnt,
-			   char *log_buf, size_t log_buf_sz,
-			   __u32 ifindex)
+			   char *log_buf, size_t log_buf_sz)
 {
 	LIBBPF_OPTS(bpf_prog_load_opts, opts,
 		.log_buf = log_buf,
 		.log_size = log_buf_sz,
 		.log_level = log_buf ? 1 : 0,
-		.prog_ifindex = ifindex,
 	);
 	int fd, err, exp_err = 0;
 	const char *exp_msg = NULL;
@@ -131,6 +185,9 @@ static int probe_prog_load(enum bpf_prog_type prog_type,
 	case BPF_PROG_TYPE_FLOW_DISSECTOR:
 	case BPF_PROG_TYPE_CGROUP_SYSCTL:
 		break;
+	case BPF_PROG_TYPE_NETFILTER:
+		opts.expected_attach_type = BPF_NETFILTER;
+		break;
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -161,31 +218,10 @@ int libbpf_probe_bpf_prog_type(enum bpf_prog_type prog_type, const void *opts)
 	if (opts)
 		return libbpf_err(-EINVAL);

-	ret = probe_prog_load(prog_type, insns, insn_cnt, NULL, 0, 0);
+	ret = probe_prog_load(prog_type, insns, insn_cnt, NULL, 0);
 	return libbpf_err(ret);
 }

-bool bpf_probe_prog_type(enum bpf_prog_type prog_type, __u32 ifindex)
-{
-	struct bpf_insn insns[2] = {
-		BPF_MOV64_IMM(BPF_REG_0, 0),
-		BPF_EXIT_INSN()
-	};
-
-	/* prefer libbpf_probe_bpf_prog_type() unless offload is requested */
-	if (ifindex == 0)
-		return libbpf_probe_bpf_prog_type(prog_type, NULL) == 1;
-
-	if (ifindex && prog_type == BPF_PROG_TYPE_SCHED_CLS)
-		/* nfp returns -EINVAL on exit(0) with TC offload */
-		insns[0].imm = 2;
-
-	errno = 0;
-	probe_prog_load(prog_type, insns, ARRAY_SIZE(insns), NULL, 0, ifindex);
-
-	return errno != EINVAL && errno != EOPNOTSUPP;
-}
-
 int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
 			 const char *str_sec, size_t str_len)
 {
@@ -242,14 +278,12 @@ static int load_local_storage_btf(void)
 				     strs, sizeof(strs));
 }

-static int probe_map_create(enum bpf_map_type map_type, __u32 ifindex)
+static int probe_map_create(enum bpf_map_type map_type)
 {
 	LIBBPF_OPTS(bpf_map_create_opts, opts);
 	int key_size, value_size, max_entries;
 	__u32 btf_key_type_id = 0, btf_value_type_id = 0;
-	int fd = -1, btf_fd = -1, fd_inner = -1, exp_err = 0, err;
-
-	opts.map_ifindex = ifindex;
+	int fd = -1, btf_fd = -1, fd_inner = -1, exp_err = 0, err = 0;

 	key_size	= sizeof(__u32);
 	value_size	= sizeof(__u32);
@@ -277,6 +311,7 @@ static int probe_map_create(enum bpf_map_type map_type, __u32 ifindex)
 	case BPF_MAP_TYPE_SK_STORAGE:
 	case BPF_MAP_TYPE_INODE_STORAGE:
 	case BPF_MAP_TYPE_TASK_STORAGE:
+	case BPF_MAP_TYPE_CGRP_STORAGE:
 		btf_key_type_id = 1;
 		btf_value_type_id = 3;
 		value_size = 8;
@@ -287,9 +322,10 @@ static int probe_map_create(enum bpf_map_type map_type, __u32 ifindex)
 			return btf_fd;
 		break;
 	case BPF_MAP_TYPE_RINGBUF:
+	case BPF_MAP_TYPE_USER_RINGBUF:
 		key_size = 0;
 		value_size = 0;
-		max_entries = 4096;
+		max_entries = sysconf(_SC_PAGE_SIZE);
 		break;
 	case BPF_MAP_TYPE_STRUCT_OPS:
 		/* we'll get -ENOTSUPP for invalid BTF type ID for struct_ops */
@@ -326,12 +362,6 @@ static int probe_map_create(enum bpf_map_type map_type, __u32 ifindex)

 	if (map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
 	    map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
-		/* TODO: probe for device, once libbpf has a function to create
-		 * map-in-map for offload
-		 */
-		if (ifindex)
-			goto cleanup;
-
 		fd_inner = bpf_map_create(BPF_MAP_TYPE_HASH, NULL,
 					  sizeof(__u32), sizeof(__u32), 1, NULL);
 		if (fd_inner < 0)
@@ -370,15 +400,10 @@ int libbpf_probe_bpf_map_type(enum bpf_map_type map_type, const void *opts)
 	if (opts)
 		return libbpf_err(-EINVAL);

-	ret = probe_map_create(map_type, 0);
+	ret = probe_map_create(map_type);
 	return libbpf_err(ret);
 }

-bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex)
-{
-	return probe_map_create(map_type, ifindex) == 1;
-}
-
 int libbpf_probe_bpf_helper(enum bpf_prog_type prog_type, enum bpf_func_id helper_id,
 			    const void *opts)
 {
@@ -407,7 +432,7 @@ int libbpf_probe_bpf_helper(enum bpf_prog_type prog_type, enum bpf_func_id helpe
 	}

 	buf[0] = '\0';
-	ret = probe_prog_load(prog_type, insns, insn_cnt, buf, sizeof(buf), 0);
+	ret = probe_prog_load(prog_type, insns, insn_cnt, buf, sizeof(buf));
 	if (ret < 0)
 		return libbpf_err(ret);

@@ -427,51 +452,3 @@ int libbpf_probe_bpf_helper(enum bpf_prog_type prog_type, enum bpf_func_id helpe
 		return 0;
 	return 1; /* assume supported */
 }
-
-bool bpf_probe_helper(enum bpf_func_id id, enum bpf_prog_type prog_type,
-		      __u32 ifindex)
-{
-	struct bpf_insn insns[2] = {
-		BPF_EMIT_CALL(id),
-		BPF_EXIT_INSN()
-	};
-	char buf[4096] = {};
-	bool res;
-
-	probe_prog_load(prog_type, insns, ARRAY_SIZE(insns), buf, sizeof(buf), ifindex);
-	res = !grep(buf, "invalid func ") && !grep(buf, "unknown func ");
-
-	if (ifindex) {
-		switch (get_vendor_id(ifindex)) {
-		case 0x19ee: /* Netronome specific */
-			res = res && !grep(buf, "not supported by FW") &&
-				!grep(buf, "unsupported function id");
-			break;
-		default:
-			break;
-		}
-	}
-
-	return res;
-}
-
-/*
- * Probe for availability of kernel commit (5.3):
- *
- * c04c0d2b968a ("bpf: increase complexity limit and maximum program size")
- */
-bool bpf_probe_large_insn_limit(__u32 ifindex)
-{
-	struct bpf_insn insns[BPF_MAXINSNS + 1];
-	int i;
-
-	for (i = 0; i < BPF_MAXINSNS; i++)
-		insns[i] = BPF_MOV64_IMM(BPF_REG_0, 1);
-	insns[BPF_MAXINSNS] = BPF_EXIT_INSN();
-
-	errno = 0;
-	probe_prog_load(BPF_PROG_TYPE_SCHED_CLS, insns, ARRAY_SIZE(insns), NULL, 0,
-			ifindex);
-
-	return errno != E2BIG && errno != EINVAL;
-}
--- a/src/libbpf_version.h
+++ b/src/libbpf_version.h
@@ -3,7 +3,7 @@
 #ifndef __LIBBPF_VERSION_H
 #define __LIBBPF_VERSION_H

-#define LIBBPF_MAJOR_VERSION 0
-#define LIBBPF_MINOR_VERSION 7
+#define LIBBPF_MAJOR_VERSION 1
+#define LIBBPF_MINOR_VERSION 3

 #endif /* __LIBBPF_VERSION_H */
--- a/src/linker.c
+++ b/src/linker.c
@@ -697,11 +697,6 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename,
 	return err;
 }

-static bool is_pow_of_2(size_t x)
-{
-	return x && (x & (x - 1)) == 0;
-}
-
 static int linker_sanity_check_elf(struct src_obj *obj)
 {
 	struct src_sec *sec;
@@ -1120,7 +1115,19 @@ static int extend_sec(struct bpf_linker *linker, struct dst_sec *dst, struct src

 	if (src->shdr->sh_type != SHT_NOBITS) {
 		tmp = realloc(dst->raw_data, dst_final_sz);
-		if (!tmp)
+		/* If dst_align_sz == 0, realloc() behaves in a special way:
+		 * 1. When dst->raw_data is NULL it returns:
+		 *    "either NULL or a pointer suitable to be passed to free()" [1].
+		 * 2. When dst->raw_data is not-NULL it frees dst->raw_data and returns NULL,
+		 *    thus invalidating any "pointer suitable to be passed to free()" obtained
+		 *    at step (1).
+		 *
+		 * The dst_align_sz > 0 check avoids error exit after (2), otherwise
+		 * dst->raw_data would be freed again in bpf_linker__free().
+		 *
+		 * [1] man 3 realloc
+		 */
+		if (!tmp && dst_align_sz > 0)
 			return -ENOMEM;
 		dst->raw_data = tmp;

@@ -1340,6 +1347,7 @@ recur:
 	case BTF_KIND_STRUCT:
 	case BTF_KIND_UNION:
 	case BTF_KIND_ENUM:
+	case BTF_KIND_ENUM64:
 	case BTF_KIND_FWD:
 	case BTF_KIND_FUNC:
 	case BTF_KIND_VAR:
@@ -1362,6 +1370,7 @@ recur:
 	case BTF_KIND_INT:
 	case BTF_KIND_FLOAT:
 	case BTF_KIND_ENUM:
+	case BTF_KIND_ENUM64:
 		/* ignore encoding for int and enum values for enum */
 		if (t1->size != t2->size) {
 			pr_warn("global '%s': incompatible %s '%s' size %u and %u\n",
@@ -2000,7 +2009,6 @@ add_sym:
 static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *obj)
 {
 	struct src_sec *src_symtab = &obj->secs[obj->symtab_sec_idx];
-	struct dst_sec *dst_symtab;
 	int i, err;

 	for (i = 1; i < obj->sec_cnt; i++) {
@@ -2033,9 +2041,6 @@ static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *ob
 			return -1;
 		}

-		/* add_dst_sec() above could have invalidated linker->secs */
-		dst_symtab = &linker->secs[linker->symtab_sec_idx];
-
 		/* shdr->sh_link points to SYMTAB */
 		dst_sec->shdr->sh_link = linker->symtab_sec_idx;

@@ -2052,16 +2057,13 @@ static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *ob
 		dst_rel = dst_sec->raw_data + src_sec->dst_off;
 		n = src_sec->shdr->sh_size / src_sec->shdr->sh_entsize;
 		for (j = 0; j < n; j++, src_rel++, dst_rel++) {
-			size_t src_sym_idx = ELF64_R_SYM(src_rel->r_info);
-			size_t sym_type = ELF64_R_TYPE(src_rel->r_info);
-			Elf64_Sym *src_sym, *dst_sym;
-			size_t dst_sym_idx;
+			size_t src_sym_idx, dst_sym_idx, sym_type;
+			Elf64_Sym *src_sym;

 			src_sym_idx = ELF64_R_SYM(src_rel->r_info);
 			src_sym = src_symtab->data->d_buf + sizeof(*src_sym) * src_sym_idx;

 			dst_sym_idx = obj->sym_map[src_sym_idx];
-			dst_sym = dst_symtab->raw_data + sizeof(*dst_sym) * dst_sym_idx;
 			dst_rel->r_offset += src_linked_sec->dst_off;
 			sym_type = ELF64_R_TYPE(src_rel->r_info);
 			dst_rel->r_info = ELF64_R_INFO(dst_sym_idx, sym_type);
--- a/src/netlink.c
+++ b/src/netlink.c
@@ -9,6 +9,7 @@
 #include <linux/if_ether.h>
 #include <linux/pkt_cls.h>
 #include <linux/rtnetlink.h>
+#include <linux/netdev.h>
 #include <sys/socket.h>
 #include <errno.h>
 #include <time.h>
@@ -27,13 +28,27 @@ typedef int (*libbpf_dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb);
 typedef int (*__dump_nlmsg_t)(struct nlmsghdr *nlmsg, libbpf_dump_nlmsg_t,
 			      void *cookie);

+struct xdp_link_info {
+	__u32 prog_id;
+	__u32 drv_prog_id;
+	__u32 hw_prog_id;
+	__u32 skb_prog_id;
+	__u8 attach_mode;
+};
+
 struct xdp_id_md {
 	int ifindex;
 	__u32 flags;
 	struct xdp_link_info info;
+	__u64 feature_flags;
 };

-static int libbpf_netlink_open(__u32 *nl_pid)
+struct xdp_features_md {
+	int ifindex;
+	__u64 flags;
+};
+
+static int libbpf_netlink_open(__u32 *nl_pid, int proto)
 {
 	struct sockaddr_nl sa;
 	socklen_t addrlen;
@@ -43,7 +58,7 @@ static int libbpf_netlink_open(__u32 *nl_pid)
 	memset(&sa, 0, sizeof(sa));
 	sa.nl_family = AF_NETLINK;

-	sock = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE);
+	sock = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, proto);
 	if (sock < 0)
 		return -errno;

@@ -87,29 +102,75 @@ enum {
 	NL_DONE,
 };

+static int netlink_recvmsg(int sock, struct msghdr *mhdr, int flags)
+{
+	int len;
+
+	do {
+		len = recvmsg(sock, mhdr, flags);
+	} while (len < 0 && (errno == EINTR || errno == EAGAIN));
+
+	if (len < 0)
+		return -errno;
+	return len;
+}
+
+static int alloc_iov(struct iovec *iov, int len)
+{
+	void *nbuf;
+
+	nbuf = realloc(iov->iov_base, len);
+	if (!nbuf)
+		return -ENOMEM;
+
+	iov->iov_base = nbuf;
+	iov->iov_len = len;
+	return 0;
+}
+
 static int libbpf_netlink_recv(int sock, __u32 nl_pid, int seq,
 			       __dump_nlmsg_t _fn, libbpf_dump_nlmsg_t fn,
 			       void *cookie)
 {
+	struct iovec iov = {};
+	struct msghdr mhdr = {
+		.msg_iov = &iov,
+		.msg_iovlen = 1,
+	};
 	bool multipart = true;
 	struct nlmsgerr *err;
 	struct nlmsghdr *nh;
-	char buf[4096];
 	int len, ret;

+	ret = alloc_iov(&iov, 4096);
+	if (ret)
+		goto done;
+
 	while (multipart) {
 start:
 		multipart = false;
-		len = recv(sock, buf, sizeof(buf), 0);
+		len = netlink_recvmsg(sock, &mhdr, MSG_PEEK | MSG_TRUNC);
 		if (len < 0) {
-			ret = -errno;
+			ret = len;
+			goto done;
+		}
+
+		if (len > iov.iov_len) {
+			ret = alloc_iov(&iov, len);
+			if (ret)
+				goto done;
+		}
+
+		len = netlink_recvmsg(sock, &mhdr, 0);
+		if (len < 0) {
+			ret = len;
 			goto done;
 		}

 		if (len == 0)
 			break;

-		for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
+		for (nh = (struct nlmsghdr *)iov.iov_base; NLMSG_OK(nh, len);
 		     nh = NLMSG_NEXT(nh, len)) {
 			if (nh->nlmsg_pid != nl_pid) {
 				ret = -LIBBPF_ERRNO__WRNGPID;
@@ -130,7 +191,8 @@ start:
 				libbpf_nla_dump_errormsg(nh);
 				goto done;
 			case NLMSG_DONE:
-				return 0;
+				ret = 0;
+				goto done;
 			default:
 				break;
 			}
@@ -142,27 +204,29 @@ start:
 				case NL_NEXT:
 					goto start;
 				case NL_DONE:
-					return 0;
+					ret = 0;
+					goto done;
 				default:
-					return ret;
+					goto done;
 				}
 			}
 		}
 	}
 	ret = 0;
 done:
+	free(iov.iov_base);
 	return ret;
 }

 static int libbpf_netlink_send_recv(struct libbpf_nla_req *req,
-				    __dump_nlmsg_t parse_msg,
+				    int proto, __dump_nlmsg_t parse_msg,
 				    libbpf_dump_nlmsg_t parse_attr,
 				    void *cookie)
 {
 	__u32 nl_pid = 0;
 	int sock, ret;

-	sock = libbpf_netlink_open(&nl_pid);
+	sock = libbpf_netlink_open(&nl_pid, proto);
 	if (sock < 0)
 		return sock;

@@ -181,6 +245,43 @@ out:
 	return ret;
 }

+static int parse_genl_family_id(struct nlmsghdr *nh, libbpf_dump_nlmsg_t fn,
+				void *cookie)
+{
+	struct genlmsghdr *gnl = NLMSG_DATA(nh);
+	struct nlattr *na = (struct nlattr *)((void *)gnl + GENL_HDRLEN);
+	struct nlattr *tb[CTRL_ATTR_FAMILY_ID + 1];
+	__u16 *id = cookie;
+
+	libbpf_nla_parse(tb, CTRL_ATTR_FAMILY_ID, na,
+			 NLMSG_PAYLOAD(nh, sizeof(*gnl)), NULL);
+	if (!tb[CTRL_ATTR_FAMILY_ID])
+		return NL_CONT;
+
+	*id = libbpf_nla_getattr_u16(tb[CTRL_ATTR_FAMILY_ID]);
+	return NL_DONE;
+}
+
+static int libbpf_netlink_resolve_genl_family_id(const char *name,
+						 __u16 len, __u16 *id)
+{
+	struct libbpf_nla_req req = {
+		.nh.nlmsg_len	= NLMSG_LENGTH(GENL_HDRLEN),
+		.nh.nlmsg_type	= GENL_ID_CTRL,
+		.nh.nlmsg_flags	= NLM_F_REQUEST,
+		.gnl.cmd	= CTRL_CMD_GETFAMILY,
+		.gnl.version	= 2,
+	};
+	int err;
+
+	err = nlattr_add(&req, CTRL_ATTR_FAMILY_NAME, name, len);
+	if (err < 0)
+		return err;
+
+	return libbpf_netlink_send_recv(&req, NETLINK_GENERIC,
+					parse_genl_family_id, NULL, id);
+}
+
 static int __bpf_set_link_xdp_fd_replace(int ifindex, int fd, int old_fd,
 					 __u32 flags)
 {
@@ -214,7 +315,7 @@ static int __bpf_set_link_xdp_fd_replace(int ifindex, int fd, int old_fd,
 	}
 	nlattr_end_nested(&req, nla);

-	return libbpf_netlink_send_recv(&req, NULL, NULL, NULL);
+	return libbpf_netlink_send_recv(&req, NETLINK_ROUTE, NULL, NULL, NULL);
 }

 int bpf_xdp_attach(int ifindex, int prog_fd, __u32 flags, const struct bpf_xdp_attach_opts *opts)
@@ -239,31 +340,6 @@ int bpf_xdp_detach(int ifindex, __u32 flags, const struct bpf_xdp_attach_opts *o
 	return bpf_xdp_attach(ifindex, -1, flags, opts);
 }

-int bpf_set_link_xdp_fd_opts(int ifindex, int fd, __u32 flags,
-			     const struct bpf_xdp_set_link_opts *opts)
-{
-	int old_fd = -1, ret;
-
-	if (!OPTS_VALID(opts, bpf_xdp_set_link_opts))
-		return libbpf_err(-EINVAL);
-
-	if (OPTS_HAS(opts, old_fd)) {
-		old_fd = OPTS_GET(opts, old_fd, -1);
-		flags |= XDP_FLAGS_REPLACE;
-	}
-
-	ret = __bpf_set_link_xdp_fd_replace(ifindex, fd, old_fd, flags);
-	return libbpf_err(ret);
-}
-
-int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags)
-{
-	int ret;
-
-	ret = __bpf_set_link_xdp_fd_replace(ifindex, fd, 0, flags);
-	return libbpf_err(ret);
-}
-
 static int __dump_link_nlmsg(struct nlmsghdr *nlh,
 			     libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie)
 {
@@ -325,6 +401,29 @@ static int get_xdp_info(void *cookie, void *msg, struct nlattr **tb)
 	return 0;
 }

+static int parse_xdp_features(struct nlmsghdr *nh, libbpf_dump_nlmsg_t fn,
+			      void *cookie)
+{
+	struct genlmsghdr *gnl = NLMSG_DATA(nh);
+	struct nlattr *na = (struct nlattr *)((void *)gnl + GENL_HDRLEN);
+	struct nlattr *tb[NETDEV_CMD_MAX + 1];
+	struct xdp_features_md *md = cookie;
+	__u32 ifindex;
+
+	libbpf_nla_parse(tb, NETDEV_CMD_MAX, na,
+			 NLMSG_PAYLOAD(nh, sizeof(*gnl)), NULL);
+
+	if (!tb[NETDEV_A_DEV_IFINDEX] || !tb[NETDEV_A_DEV_XDP_FEATURES])
+		return NL_CONT;
+
+	ifindex = libbpf_nla_getattr_u32(tb[NETDEV_A_DEV_IFINDEX]);
+	if (ifindex != md->ifindex)
+		return NL_CONT;
+
+	md->flags = libbpf_nla_getattr_u64(tb[NETDEV_A_DEV_XDP_FEATURES]);
+	return NL_DONE;
+}
+
 int bpf_xdp_query(int ifindex, int xdp_flags, struct bpf_xdp_query_opts *opts)
 {
 	struct libbpf_nla_req req = {
@@ -334,6 +433,10 @@ int bpf_xdp_query(int ifindex, int xdp_flags, struct bpf_xdp_query_opts *opts)
 		.ifinfo.ifi_family = AF_PACKET,
 	};
 	struct xdp_id_md xdp_id = {};
+	struct xdp_features_md md = {
+		.ifindex = ifindex,
+	};
+	__u16 id;
 	int err;

 	if (!OPTS_VALID(opts, bpf_xdp_query_opts))
@@ -350,7 +453,7 @@ int bpf_xdp_query(int ifindex, int xdp_flags, struct bpf_xdp_query_opts *opts)
 	xdp_id.ifindex = ifindex;
 	xdp_id.flags = xdp_flags;

-	err = libbpf_netlink_send_recv(&req, __dump_link_nlmsg,
+	err = libbpf_netlink_send_recv(&req, NETLINK_ROUTE, __dump_link_nlmsg,
 				       get_xdp_info, &xdp_id);
 	if (err)
 		return libbpf_err(err);
@@ -361,30 +464,37 @@ int bpf_xdp_query(int ifindex, int xdp_flags, struct bpf_xdp_query_opts *opts)
 	OPTS_SET(opts, skb_prog_id, xdp_id.info.skb_prog_id);
 	OPTS_SET(opts, attach_mode, xdp_id.info.attach_mode);

-	return 0;
-}
+	if (!OPTS_HAS(opts, feature_flags))
+		return 0;

-int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
-			  size_t info_size, __u32 flags)
-{
-	LIBBPF_OPTS(bpf_xdp_query_opts, opts);
-	size_t sz;
-	int err;
+	err = libbpf_netlink_resolve_genl_family_id("netdev", sizeof("netdev"), &id);
+	if (err < 0) {
+		if (err == -ENOENT) {
+			opts->feature_flags = 0;
+			goto skip_feature_flags;
+		}
+		return libbpf_err(err);
+	}

-	if (!info_size)
-		return libbpf_err(-EINVAL);
+	memset(&req, 0, sizeof(req));
+	req.nh.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN);
+	req.nh.nlmsg_flags = NLM_F_REQUEST;
+	req.nh.nlmsg_type = id;
+	req.gnl.cmd = NETDEV_CMD_DEV_GET;
+	req.gnl.version = 2;

-	err = bpf_xdp_query(ifindex, flags, &opts);
+	err = nlattr_add(&req, NETDEV_A_DEV_IFINDEX, &ifindex, sizeof(ifindex));
+	if (err < 0)
+		return libbpf_err(err);
+
+	err = libbpf_netlink_send_recv(&req, NETLINK_GENERIC,
+				       parse_xdp_features, NULL, &md);
 	if (err)
 		return libbpf_err(err);

-	/* struct xdp_link_info field layout matches struct bpf_xdp_query_opts
-	 * layout after sz field
-	 */
-	sz = min(info_size, offsetofend(struct xdp_link_info, attach_mode));
-	memcpy(info, &opts.prog_id, sz);
-	memset((void *)info + sz, 0, info_size - sz);
+	opts->feature_flags = md.flags;

+skip_feature_flags:
 	return 0;
 }

@@ -414,11 +524,6 @@ int bpf_xdp_query_id(int ifindex, int flags, __u32 *prog_id)
 }


-int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags)
-{
-	return bpf_xdp_query_id(ifindex, flags, prog_id);
-}
-
 typedef int (*qdisc_config_t)(struct libbpf_nla_req *req);

 static int clsact_config(struct libbpf_nla_req *req)
@@ -490,7 +595,7 @@ static int tc_qdisc_modify(struct bpf_tc_hook *hook, int cmd, int flags)
 	if (ret < 0)
 		return ret;

-	return libbpf_netlink_send_recv(&req, NULL, NULL, NULL);
+	return libbpf_netlink_send_recv(&req, NETLINK_ROUTE, NULL, NULL, NULL);
 }

 static int tc_qdisc_create_excl(struct bpf_tc_hook *hook)
@@ -584,12 +689,13 @@ static int get_tc_info(struct nlmsghdr *nh, libbpf_dump_nlmsg_t fn,

 static int tc_add_fd_and_name(struct libbpf_nla_req *req, int fd)
 {
-	struct bpf_prog_info info = {};
+	struct bpf_prog_info info;
 	__u32 info_len = sizeof(info);
 	char name[256];
 	int len, ret;

-	ret = bpf_obj_get_info_by_fd(fd, &info, &info_len);
+	memset(&info, 0, info_len);
+	ret = bpf_prog_get_info_by_fd(fd, &info, &info_len);
 	if (ret < 0)
 		return ret;

@@ -669,7 +775,8 @@ int bpf_tc_attach(const struct bpf_tc_hook *hook, struct bpf_tc_opts *opts)

 	info.opts = opts;

-	ret = libbpf_netlink_send_recv(&req, get_tc_info, NULL, &info);
+	ret = libbpf_netlink_send_recv(&req, NETLINK_ROUTE, get_tc_info, NULL,
+				       &info);
 	if (ret < 0)
 		return libbpf_err(ret);
 	if (!info.processed)
@@ -735,7 +842,7 @@ static int __bpf_tc_detach(const struct bpf_tc_hook *hook,
 			return ret;
 	}

-	return libbpf_netlink_send_recv(&req, NULL, NULL, NULL);
+	return libbpf_netlink_send_recv(&req, NETLINK_ROUTE, NULL, NULL, NULL);
 }

 int bpf_tc_detach(const struct bpf_tc_hook *hook,
@@ -800,7 +907,8 @@ int bpf_tc_query(const struct bpf_tc_hook *hook, struct bpf_tc_opts *opts)

 	info.opts = opts;

-	ret = libbpf_netlink_send_recv(&req, get_tc_info, NULL, &info);
+	ret = libbpf_netlink_send_recv(&req, NETLINK_ROUTE, get_tc_info, NULL,
+				       &info);
 	if (ret < 0)
 		return libbpf_err(ret);
 	if (!info.processed)
--- a/src/nlattr.c
+++ b/src/nlattr.c
@@ -32,7 +32,7 @@ static struct nlattr *nla_next(const struct nlattr *nla, int *remaining)

 static int nla_ok(const struct nlattr *nla, int remaining)
 {
-	return remaining >= sizeof(*nla) &&
+	return remaining >= (int)sizeof(*nla) &&
 	       nla->nla_len >= sizeof(*nla) &&
 	       nla->nla_len <= remaining;
 }
@@ -178,7 +178,7 @@ int libbpf_nla_dump_errormsg(struct nlmsghdr *nlh)
 		hlen += nlmsg_len(&err->msg);

 	attr = (struct nlattr *) ((void *) err + hlen);
-	alen = nlh->nlmsg_len - hlen;
+	alen = (void *)nlh + nlh->nlmsg_len - (void *)attr;

 	if (libbpf_nla_parse(tb, NLMSGERR_ATTR_MAX, attr, alen,
 			     extack_policy) != 0) {
--- a/src/nlattr.h
+++ b/src/nlattr.h
@@ -14,6 +14,7 @@
 #include <errno.h>
 #include <linux/netlink.h>
 #include <linux/rtnetlink.h>
+#include <linux/genetlink.h>

 /* avoid multiple definition of netlink features */
 #define __LINUX_NETLINK_H
@@ -58,6 +59,7 @@ struct libbpf_nla_req {
 	union {
 		struct ifinfomsg ifinfo;
 		struct tcmsg tc;
+		struct genlmsghdr gnl;
 	};
 	char buf[128];
 };
@@ -89,11 +91,21 @@ static inline uint8_t libbpf_nla_getattr_u8(const struct nlattr *nla)
 	return *(uint8_t *)libbpf_nla_data(nla);
 }

+static inline uint16_t libbpf_nla_getattr_u16(const struct nlattr *nla)
+{
+	return *(uint16_t *)libbpf_nla_data(nla);
+}
+
 static inline uint32_t libbpf_nla_getattr_u32(const struct nlattr *nla)
 {
 	return *(uint32_t *)libbpf_nla_data(nla);
 }

+static inline uint64_t libbpf_nla_getattr_u64(const struct nlattr *nla)
+{
+	return *(uint64_t *)libbpf_nla_data(nla);
+}
+
 static inline const char *libbpf_nla_getattr_str(const struct nlattr *nla)
 {
 	return (const char *)libbpf_nla_data(nla);
--- a/src/relo_core.c
+++ b/src/relo_core.c
@@ -95,6 +95,7 @@ static const char *core_relo_kind_str(enum bpf_core_relo_kind kind)
 	case BPF_CORE_TYPE_ID_LOCAL: return "local_type_id";
 	case BPF_CORE_TYPE_ID_TARGET: return "target_type_id";
 	case BPF_CORE_TYPE_EXISTS: return "type_exists";
+	case BPF_CORE_TYPE_MATCHES: return "type_matches";
 	case BPF_CORE_TYPE_SIZE: return "type_size";
 	case BPF_CORE_ENUMVAL_EXISTS: return "enumval_exists";
 	case BPF_CORE_ENUMVAL_VALUE: return "enumval_value";
@@ -123,6 +124,7 @@ static bool core_relo_is_type_based(enum bpf_core_relo_kind kind)
 	case BPF_CORE_TYPE_ID_LOCAL:
 	case BPF_CORE_TYPE_ID_TARGET:
 	case BPF_CORE_TYPE_EXISTS:
+	case BPF_CORE_TYPE_MATCHES:
 	case BPF_CORE_TYPE_SIZE:
 		return true;
 	default:
@@ -141,6 +143,86 @@ static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind)
 	}
 }

+int __bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
+				const struct btf *targ_btf, __u32 targ_id, int level)
+{
+	const struct btf_type *local_type, *targ_type;
+	int depth = 32; /* max recursion depth */
+
+	/* caller made sure that names match (ignoring flavor suffix) */
+	local_type = btf_type_by_id(local_btf, local_id);
+	targ_type = btf_type_by_id(targ_btf, targ_id);
+	if (!btf_kind_core_compat(local_type, targ_type))
+		return 0;
+
+recur:
+	depth--;
+	if (depth < 0)
+		return -EINVAL;
+
+	local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id);
+	targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
+	if (!local_type || !targ_type)
+		return -EINVAL;
+
+	if (!btf_kind_core_compat(local_type, targ_type))
+		return 0;
+
+	switch (btf_kind(local_type)) {
+	case BTF_KIND_UNKN:
+	case BTF_KIND_STRUCT:
+	case BTF_KIND_UNION:
+	case BTF_KIND_ENUM:
+	case BTF_KIND_FWD:
+	case BTF_KIND_ENUM64:
+		return 1;
+	case BTF_KIND_INT:
+		/* just reject deprecated bitfield-like integers; all other
+		 * integers are by default compatible between each other
+		 */
+		return btf_int_offset(local_type) == 0 && btf_int_offset(targ_type) == 0;
+	case BTF_KIND_PTR:
+		local_id = local_type->type;
+		targ_id = targ_type->type;
+		goto recur;
+	case BTF_KIND_ARRAY:
+		local_id = btf_array(local_type)->type;
+		targ_id = btf_array(targ_type)->type;
+		goto recur;
+	case BTF_KIND_FUNC_PROTO: {
+		struct btf_param *local_p = btf_params(local_type);
+		struct btf_param *targ_p = btf_params(targ_type);
+		__u16 local_vlen = btf_vlen(local_type);
+		__u16 targ_vlen = btf_vlen(targ_type);
+		int i, err;
+
+		if (local_vlen != targ_vlen)
+			return 0;
+
+		for (i = 0; i < local_vlen; i++, local_p++, targ_p++) {
+			if (level <= 0)
+				return -EINVAL;
+
+			skip_mods_and_typedefs(local_btf, local_p->type, &local_id);
+			skip_mods_and_typedefs(targ_btf, targ_p->type, &targ_id);
+			err = __bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id,
+							  level - 1);
+			if (err <= 0)
+				return err;
+		}
+
+		/* tail recurse for return type check */
+		skip_mods_and_typedefs(local_btf, local_type->type, &local_id);
+		skip_mods_and_typedefs(targ_btf, targ_type->type, &targ_id);
+		goto recur;
+	}
+	default:
+		pr_warn("unexpected kind %s relocated, local [%d], target [%d]\n",
+			btf_kind_str(local_type), local_id, targ_id);
+		return 0;
+	}
+}
+
 /*
 * Turn bpf_core_relo into a low- and high-level spec representation,
 * validating correctness along the way, as well as calculating resulting
@@ -167,40 +249,39 @@ static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind)
 * just a parsed access string representation): [0, 1, 2, 3].
 *
 * High-level spec will capture only 3 points:
- *   - intial zero-index access by pointer (&s->... is the same as &s[0]...);
+ *   - initial zero-index access by pointer (&s->... is the same as &s[0]...);
 *   - field 'a' access (corresponds to '2' in low-level spec);
 *   - array element #3 access (corresponds to '3' in low-level spec).
 *
- * Type-based relocations (TYPE_EXISTS/TYPE_SIZE,
+ * Type-based relocations (TYPE_EXISTS/TYPE_MATCHES/TYPE_SIZE,
 * TYPE_ID_LOCAL/TYPE_ID_TARGET) don't capture any field information. Their
 * spec and raw_spec are kept empty.
 *
 * Enum value-based relocations (ENUMVAL_EXISTS/ENUMVAL_VALUE) use access
 * string to specify enumerator's value index that need to be relocated.
 */
-static int bpf_core_parse_spec(const char *prog_name, const struct btf *btf,
-			       __u32 type_id,
-			       const char *spec_str,
-			       enum bpf_core_relo_kind relo_kind,
-			       struct bpf_core_spec *spec)
+int bpf_core_parse_spec(const char *prog_name, const struct btf *btf,
+			const struct bpf_core_relo *relo,
+			struct bpf_core_spec *spec)
 {
 	int access_idx, parsed_len, i;
 	struct bpf_core_accessor *acc;
 	const struct btf_type *t;
-	const char *name;
-	__u32 id;
+	const char *name, *spec_str;
+	__u32 id, name_off;
 	__s64 sz;

+	spec_str = btf__name_by_offset(btf, relo->access_str_off);
 	if (str_is_empty(spec_str) || *spec_str == ':')
 		return -EINVAL;

 	memset(spec, 0, sizeof(*spec));
 	spec->btf = btf;
-	spec->root_type_id = type_id;
-	spec->relo_kind = relo_kind;
+	spec->root_type_id = relo->type_id;
+	spec->relo_kind = relo->kind;

 	/* type-based relocations don't have a field access string */
-	if (core_relo_is_type_based(relo_kind)) {
+	if (core_relo_is_type_based(relo->kind)) {
 		if (strcmp(spec_str, "0"))
 			return -EINVAL;
 		return 0;
@@ -221,7 +302,7 @@ static int bpf_core_parse_spec(const char *prog_name, const struct btf *btf,
 	if (spec->raw_len == 0)
 		return -EINVAL;

-	t = skip_mods_and_typedefs(btf, type_id, &id);
+	t = skip_mods_and_typedefs(btf, relo->type_id, &id);
 	if (!t)
 		return -EINVAL;

@@ -231,16 +312,18 @@ static int bpf_core_parse_spec(const char *prog_name, const struct btf *btf,
 	acc->idx = access_idx;
 	spec->len++;

-	if (core_relo_is_enumval_based(relo_kind)) {
-		if (!btf_is_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t))
+	if (core_relo_is_enumval_based(relo->kind)) {
+		if (!btf_is_any_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t))
 			return -EINVAL;

 		/* record enumerator name in a first accessor */
-		acc->name = btf__name_by_offset(btf, btf_enum(t)[access_idx].name_off);
+		name_off = btf_is_enum(t) ? btf_enum(t)[access_idx].name_off
+					  : btf_enum64(t)[access_idx].name_off;
+		acc->name = btf__name_by_offset(btf, name_off);
 		return 0;
 	}

-	if (!core_relo_is_field_based(relo_kind))
+	if (!core_relo_is_field_based(relo->kind))
 		return -EINVAL;

 	sz = btf__resolve_size(btf, id);
@@ -301,7 +384,7 @@ static int bpf_core_parse_spec(const char *prog_name, const struct btf *btf,
 			spec->bit_offset += access_idx * sz * 8;
 		} else {
 			pr_warn("prog '%s': relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n",
-				prog_name, type_id, spec_str, i, id, btf_kind_str(t));
+				prog_name, relo->type_id, spec_str, i, id, btf_kind_str(t));
 			return -EINVAL;
 		}
 	}
@@ -341,7 +424,7 @@ recur:

 	if (btf_is_composite(local_type) && btf_is_composite(targ_type))
 		return 1;
-	if (btf_kind(local_type) != btf_kind(targ_type))
+	if (!btf_kind_core_compat(local_type, targ_type))
 		return 0;

 	switch (btf_kind(local_type)) {
@@ -349,6 +432,7 @@ recur:
 	case BTF_KIND_FLOAT:
 		return 1;
 	case BTF_KIND_FWD:
+	case BTF_KIND_ENUM64:
 	case BTF_KIND_ENUM: {
 		const char *local_name, *targ_name;
 		size_t local_len, targ_len;
@@ -478,6 +562,7 @@ static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
 	const struct bpf_core_accessor *local_acc;
 	struct bpf_core_accessor *targ_acc;
 	int i, sz, matched;
+	__u32 name_off;

 	memset(targ_spec, 0, sizeof(*targ_spec));
 	targ_spec->btf = targ_btf;
@@ -485,9 +570,14 @@ static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
 	targ_spec->relo_kind = local_spec->relo_kind;

 	if (core_relo_is_type_based(local_spec->relo_kind)) {
-		return bpf_core_types_are_compat(local_spec->btf,
-						 local_spec->root_type_id,
-						 targ_btf, targ_id);
+		if (local_spec->relo_kind == BPF_CORE_TYPE_MATCHES)
+			return bpf_core_types_match(local_spec->btf,
+						    local_spec->root_type_id,
+						    targ_btf, targ_id);
+		else
+			return bpf_core_types_are_compat(local_spec->btf,
+							 local_spec->root_type_id,
+							 targ_btf, targ_id);
 	}

 	local_acc = &local_spec->spec[0];
@@ -495,18 +585,22 @@ static int bpf_core_spec_match(struct bpf_core_spec *local_spec,

 	if (core_relo_is_enumval_based(local_spec->relo_kind)) {
 		size_t local_essent_len, targ_essent_len;
-		const struct btf_enum *e;
 		const char *targ_name;

 		/* has to resolve to an enum */
 		targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, &targ_id);
-		if (!btf_is_enum(targ_type))
+		if (!btf_is_any_enum(targ_type))
 			return 0;

 		local_essent_len = bpf_core_essential_name_len(local_acc->name);

-		for (i = 0, e = btf_enum(targ_type); i < btf_vlen(targ_type); i++, e++) {
-			targ_name = btf__name_by_offset(targ_spec->btf, e->name_off);
+		for (i = 0; i < btf_vlen(targ_type); i++) {
+			if (btf_is_enum(targ_type))
+				name_off = btf_enum(targ_type)[i].name_off;
+			else
+				name_off = btf_enum64(targ_type)[i].name_off;
+
+			targ_name = btf__name_by_offset(targ_spec->btf, name_off);
 			targ_essent_len = bpf_core_essential_name_len(targ_name);
 			if (targ_essent_len != local_essent_len)
 				continue;
@@ -584,7 +678,7 @@ static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
 static int bpf_core_calc_field_relo(const char *prog_name,
 				    const struct bpf_core_relo *relo,
 				    const struct bpf_core_spec *spec,
-				    __u32 *val, __u32 *field_sz, __u32 *type_id,
+				    __u64 *val, __u32 *field_sz, __u32 *type_id,
 				    bool *validate)
 {
 	const struct bpf_core_accessor *acc;
@@ -681,8 +775,7 @@ static int bpf_core_calc_field_relo(const char *prog_name,
 		*val = byte_sz;
 		break;
 	case BPF_CORE_FIELD_SIGNED:
-		/* enums will be assumed unsigned */
-		*val = btf_is_enum(mt) ||
+		*val = (btf_is_any_enum(mt) && BTF_INFO_KFLAG(mt->info)) ||
 		       (btf_int_encoding(mt) & BTF_INT_SIGNED);
 		if (validate)
 			*validate = true; /* signedness is never ambiguous */
@@ -709,7 +802,7 @@ static int bpf_core_calc_field_relo(const char *prog_name,

 static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo,
 				   const struct bpf_core_spec *spec,
-				   __u32 *val, bool *validate)
+				   __u64 *val, bool *validate)
 {
 	__s64 sz;

@@ -733,6 +826,7 @@ static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo,
 			*validate = false;
 		break;
 	case BPF_CORE_TYPE_EXISTS:
+	case BPF_CORE_TYPE_MATCHES:
 		*val = 1;
 		break;
 	case BPF_CORE_TYPE_SIZE:
@@ -752,10 +846,9 @@ static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo,

 static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo,
 				      const struct bpf_core_spec *spec,
-				      __u32 *val)
+				      __u64 *val)
 {
 	const struct btf_type *t;
-	const struct btf_enum *e;

 	switch (relo->kind) {
 	case BPF_CORE_ENUMVAL_EXISTS:
@@ -765,8 +858,10 @@ static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo,
 		if (!spec)
 			return -EUCLEAN; /* request instruction poisoning */
 		t = btf_type_by_id(spec->btf, spec->spec[0].type_id);
-		e = btf_enum(t) + spec->spec[0].idx;
-		*val = e->val;
+		if (btf_is_enum(t))
+			*val = btf_enum(t)[spec->spec[0].idx].val;
+		else
+			*val = btf_enum64_value(btf_enum64(t) + spec->spec[0].idx);
 		break;
 	default:
 		return -EOPNOTSUPP;
@@ -775,31 +870,6 @@ static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo,
 	return 0;
 }

-struct bpf_core_relo_res
-{
-	/* expected value in the instruction, unless validate == false */
-	__u32 orig_val;
-	/* new value that needs to be patched up to */
-	__u32 new_val;
-	/* relocation unsuccessful, poison instruction, but don't fail load */
-	bool poison;
-	/* some relocations can't be validated against orig_val */
-	bool validate;
-	/* for field byte offset relocations or the forms:
-	 *     *(T *)(rX + <off>) = rY
-	 *     rX = *(T *)(rY + <off>),
-	 * we remember original and resolved field size to adjust direct
-	 * memory loads of pointers and integers; this is necessary for 32-bit
-	 * host kernel architectures, but also allows to automatically
-	 * relocate fields that were resized from, e.g., u32 to u64, etc.
-	 */
-	bool fail_memsz_adjust;
-	__u32 orig_sz;
-	__u32 orig_type_id;
-	__u32 new_sz;
-	__u32 new_type_id;
-};
-
 /* Calculate original and target relocation values, given local and target
 * specs and relocation kind. These values are calculated for each candidate.
 * If there are multiple candidates, resulting values should all be consistent
@@ -951,11 +1021,11 @@ static int insn_bytes_to_bpf_size(__u32 sz)
 * 5. *(T *)(rX + <off>) = rY, where T is one of {u8, u16, u32, u64};
 * 6. *(T *)(rX + <off>) = <imm>, where T is one of {u8, u16, u32, u64}.
 */
-static int bpf_core_patch_insn(const char *prog_name, struct bpf_insn *insn,
-			       int insn_idx, const struct bpf_core_relo *relo,
-			       int relo_idx, const struct bpf_core_relo_res *res)
+int bpf_core_patch_insn(const char *prog_name, struct bpf_insn *insn,
+			int insn_idx, const struct bpf_core_relo *relo,
+			int relo_idx, const struct bpf_core_relo_res *res)
 {
-	__u32 orig_val, new_val;
+	__u64 orig_val, new_val;
 	__u8 class;

 	class = BPF_CLASS(insn->code);
@@ -980,28 +1050,30 @@ poison:
 		if (BPF_SRC(insn->code) != BPF_K)
 			return -EINVAL;
 		if (res->validate && insn->imm != orig_val) {
-			pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n",
+			pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %llu -> %llu\n",
 				prog_name, relo_idx,
-				insn_idx, insn->imm, orig_val, new_val);
+				insn_idx, insn->imm, (unsigned long long)orig_val,
+				(unsigned long long)new_val);
 			return -EINVAL;
 		}
 		orig_val = insn->imm;
 		insn->imm = new_val;
-		pr_debug("prog '%s': relo #%d: patched insn #%d (ALU/ALU64) imm %u -> %u\n",
+		pr_debug("prog '%s': relo #%d: patched insn #%d (ALU/ALU64) imm %llu -> %llu\n",
 			 prog_name, relo_idx, insn_idx,
-			 orig_val, new_val);
+			 (unsigned long long)orig_val, (unsigned long long)new_val);
 		break;
 	case BPF_LDX:
 	case BPF_ST:
 	case BPF_STX:
 		if (res->validate && insn->off != orig_val) {
-			pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDX/ST/STX) value: got %u, exp %u -> %u\n",
-				prog_name, relo_idx, insn_idx, insn->off, orig_val, new_val);
+			pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDX/ST/STX) value: got %u, exp %llu -> %llu\n",
+				prog_name, relo_idx, insn_idx, insn->off, (unsigned long long)orig_val,
+				(unsigned long long)new_val);
 			return -EINVAL;
 		}
 		if (new_val > SHRT_MAX) {
-			pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) value too big: %u\n",
-				prog_name, relo_idx, insn_idx, new_val);
+			pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) value too big: %llu\n",
+				prog_name, relo_idx, insn_idx, (unsigned long long)new_val);
 			return -ERANGE;
 		}
 		if (res->fail_memsz_adjust) {
@@ -1013,8 +1085,9 @@ poison:

 		orig_val = insn->off;
 		insn->off = new_val;
-		pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %u -> %u\n",
-			 prog_name, relo_idx, insn_idx, orig_val, new_val);
+		pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %llu -> %llu\n",
+			 prog_name, relo_idx, insn_idx, (unsigned long long)orig_val,
+			 (unsigned long long)new_val);

 		if (res->new_sz != res->orig_sz) {
 			int insn_bytes_sz, insn_bpf_sz;
@@ -1050,20 +1123,20 @@ poison:
 			return -EINVAL;
 		}

-		imm = insn[0].imm + ((__u64)insn[1].imm << 32);
+		imm = (__u32)insn[0].imm | ((__u64)insn[1].imm << 32);
 		if (res->validate && imm != orig_val) {
-			pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDIMM64) value: got %llu, exp %u -> %u\n",
+			pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDIMM64) value: got %llu, exp %llu -> %llu\n",
 				prog_name, relo_idx,
 				insn_idx, (unsigned long long)imm,
-				orig_val, new_val);
+				(unsigned long long)orig_val, (unsigned long long)new_val);
 			return -EINVAL;
 		}

 		insn[0].imm = new_val;
-		insn[1].imm = 0; /* currently only 32-bit values are supported */
-		pr_debug("prog '%s': relo #%d: patched insn #%d (LDIMM64) imm64 %llu -> %u\n",
+		insn[1].imm = new_val >> 32;
+		pr_debug("prog '%s': relo #%d: patched insn #%d (LDIMM64) imm64 %llu -> %llu\n",
 			 prog_name, relo_idx, insn_idx,
-			 (unsigned long long)imm, new_val);
+			 (unsigned long long)imm, (unsigned long long)new_val);
 		break;
 	}
 	default:
@@ -1080,55 +1153,82 @@ poison:
 * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>,
 * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b
 */
-static void bpf_core_dump_spec(const char *prog_name, int level, const struct bpf_core_spec *spec)
+int bpf_core_format_spec(char *buf, size_t buf_sz, const struct bpf_core_spec *spec)
 {
 	const struct btf_type *t;
-	const struct btf_enum *e;
 	const char *s;
 	__u32 type_id;
-	int i;
+	int i, len = 0;
+
+#define append_buf(fmt, args...)				\
+	({							\
+		int r;						\
+		r = snprintf(buf, buf_sz, fmt, ##args);		\
+		len += r;					\
+		if (r >= buf_sz)				\
+			r = buf_sz;				\
+		buf += r;					\
+		buf_sz -= r;					\
+	})

 	type_id = spec->root_type_id;
 	t = btf_type_by_id(spec->btf, type_id);
 	s = btf__name_by_offset(spec->btf, t->name_off);

-	libbpf_print(level, "[%u] %s %s", type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s);
+	append_buf("<%s> [%u] %s %s",
+		   core_relo_kind_str(spec->relo_kind),
+		   type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s);

 	if (core_relo_is_type_based(spec->relo_kind))
-		return;
+		return len;

 	if (core_relo_is_enumval_based(spec->relo_kind)) {
 		t = skip_mods_and_typedefs(spec->btf, type_id, NULL);
-		e = btf_enum(t) + spec->raw_spec[0];
-		s = btf__name_by_offset(spec->btf, e->name_off);
+		if (btf_is_enum(t)) {
+			const struct btf_enum *e;
+			const char *fmt_str;

-		libbpf_print(level, "::%s = %u", s, e->val);
-		return;
+			e = btf_enum(t) + spec->raw_spec[0];
+			s = btf__name_by_offset(spec->btf, e->name_off);
+			fmt_str = BTF_INFO_KFLAG(t->info) ? "::%s = %d" : "::%s = %u";
+			append_buf(fmt_str, s, e->val);
+		} else {
+			const struct btf_enum64 *e;
+			const char *fmt_str;
+
+			e = btf_enum64(t) + spec->raw_spec[0];
+			s = btf__name_by_offset(spec->btf, e->name_off);
+			fmt_str = BTF_INFO_KFLAG(t->info) ? "::%s = %lld" : "::%s = %llu";
+			append_buf(fmt_str, s, (unsigned long long)btf_enum64_value(e));
+		}
+		return len;
 	}

 	if (core_relo_is_field_based(spec->relo_kind)) {
 		for (i = 0; i < spec->len; i++) {
 			if (spec->spec[i].name)
-				libbpf_print(level, ".%s", spec->spec[i].name);
+				append_buf(".%s", spec->spec[i].name);
 			else if (i > 0 || spec->spec[i].idx > 0)
-				libbpf_print(level, "[%u]", spec->spec[i].idx);
+				append_buf("[%u]", spec->spec[i].idx);
 		}

-		libbpf_print(level, " (");
+		append_buf(" (");
 		for (i = 0; i < spec->raw_len; i++)
-			libbpf_print(level, "%s%d", i == 0 ? "" : ":", spec->raw_spec[i]);
+			append_buf("%s%d", i == 0 ? "" : ":", spec->raw_spec[i]);

 		if (spec->bit_offset % 8)
-			libbpf_print(level, " @ offset %u.%u)",
-				     spec->bit_offset / 8, spec->bit_offset % 8);
+			append_buf(" @ offset %u.%u)", spec->bit_offset / 8, spec->bit_offset % 8);
 		else
-			libbpf_print(level, " @ offset %u)", spec->bit_offset / 8);
-		return;
+			append_buf(" @ offset %u)", spec->bit_offset / 8);
+		return len;
 	}
+
+	return len;
+#undef append_buf
 }

 /*
- * CO-RE relocate single instruction.
+ * Calculate CO-RE relocation target result.
 *
 * The outline and important points of the algorithm:
 * 1. For given local type, find corresponding candidate target types.
@@ -1159,11 +1259,11 @@ static void bpf_core_dump_spec(const char *prog_name, int level, const struct bp
 * 3. It is supported and expected that there might be multiple flavors
 *    matching the spec. As long as all the specs resolve to the same set of
 *    offsets across all candidates, there is no error. If there is any
- *    ambiguity, CO-RE relocation will fail. This is necessary to accomodate
- *    imprefection of BTF deduplication, which can cause slight duplication of
+ *    ambiguity, CO-RE relocation will fail. This is necessary to accommodate
+ *    imperfection of BTF deduplication, which can cause slight duplication of
 *    the same BTF type, if some directly or indirectly referenced (by
 *    pointer) type gets resolved to different actual types in different
- *    object files. If such situation occurs, deduplicated BTF will end up
+ *    object files. If such a situation occurs, deduplicated BTF will end up
 *    with two (or more) structurally identical types, which differ only in
 *    types they refer to through pointer. This should be OK in most cases and
 *    is not an error.
@@ -1177,22 +1277,22 @@ static void bpf_core_dump_spec(const char *prog_name, int level, const struct bp
 *    between multiple relocations for the same type ID and is updated as some
 *    of the candidates are pruned due to structural incompatibility.
 */
-int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn,
-			     int insn_idx,
-			     const struct bpf_core_relo *relo,
-			     int relo_idx,
-			     const struct btf *local_btf,
-			     struct bpf_core_cand_list *cands,
-			     struct bpf_core_spec *specs_scratch)
+int bpf_core_calc_relo_insn(const char *prog_name,
+			    const struct bpf_core_relo *relo,
+			    int relo_idx,
+			    const struct btf *local_btf,
+			    struct bpf_core_cand_list *cands,
+			    struct bpf_core_spec *specs_scratch,
+			    struct bpf_core_relo_res *targ_res)
 {
 	struct bpf_core_spec *local_spec = &specs_scratch[0];
 	struct bpf_core_spec *cand_spec = &specs_scratch[1];
 	struct bpf_core_spec *targ_spec = &specs_scratch[2];
-	struct bpf_core_relo_res cand_res, targ_res;
+	struct bpf_core_relo_res cand_res;
 	const struct btf_type *local_type;
 	const char *local_name;
 	__u32 local_id;
-	const char *spec_str;
+	char spec_buf[256];
 	int i, j, err;

 	local_id = relo->type_id;
@@ -1201,38 +1301,34 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn,
 	if (!local_name)
 		return -EINVAL;

-	spec_str = btf__name_by_offset(local_btf, relo->access_str_off);
-	if (str_is_empty(spec_str))
-		return -EINVAL;
-
-	err = bpf_core_parse_spec(prog_name, local_btf, local_id, spec_str,
-				  relo->kind, local_spec);
+	err = bpf_core_parse_spec(prog_name, local_btf, relo, local_spec);
 	if (err) {
+		const char *spec_str;
+
+		spec_str = btf__name_by_offset(local_btf, relo->access_str_off);
 		pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n",
 			prog_name, relo_idx, local_id, btf_kind_str(local_type),
 			str_is_empty(local_name) ? "<anon>" : local_name,
-			spec_str, err);
+			spec_str ?: "<?>", err);
 		return -EINVAL;
 	}

-	pr_debug("prog '%s': relo #%d: kind <%s> (%d), spec is ", prog_name,
-		 relo_idx, core_relo_kind_str(relo->kind), relo->kind);
-	bpf_core_dump_spec(prog_name, LIBBPF_DEBUG, local_spec);
-	libbpf_print(LIBBPF_DEBUG, "\n");
+	bpf_core_format_spec(spec_buf, sizeof(spec_buf), local_spec);
+	pr_debug("prog '%s': relo #%d: %s\n", prog_name, relo_idx, spec_buf);

 	/* TYPE_ID_LOCAL relo is special and doesn't need candidate search */
 	if (relo->kind == BPF_CORE_TYPE_ID_LOCAL) {
 		/* bpf_insn's imm value could get out of sync during linking */
-		memset(&targ_res, 0, sizeof(targ_res));
-		targ_res.validate = false;
-		targ_res.poison = false;
-		targ_res.orig_val = local_spec->root_type_id;
-		targ_res.new_val = local_spec->root_type_id;
-		goto patch_insn;
+		memset(targ_res, 0, sizeof(*targ_res));
+		targ_res->validate = false;
+		targ_res->poison = false;
+		targ_res->orig_val = local_spec->root_type_id;
+		targ_res->new_val = local_spec->root_type_id;
+		return 0;
 	}

 	/* libbpf doesn't support candidate search for anonymous types */
-	if (str_is_empty(spec_str)) {
+	if (str_is_empty(local_name)) {
 		pr_warn("prog '%s': relo #%d: <%s> (%d) relocation doesn't support anonymous types\n",
 			prog_name, relo_idx, core_relo_kind_str(relo->kind), relo->kind);
 		return -EOPNOTSUPP;
@@ -1242,17 +1338,15 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn,
 		err = bpf_core_spec_match(local_spec, cands->cands[i].btf,
 					  cands->cands[i].id, cand_spec);
 		if (err < 0) {
-			pr_warn("prog '%s': relo #%d: error matching candidate #%d ",
-				prog_name, relo_idx, i);
-			bpf_core_dump_spec(prog_name, LIBBPF_WARN, cand_spec);
-			libbpf_print(LIBBPF_WARN, ": %d\n", err);
+			bpf_core_format_spec(spec_buf, sizeof(spec_buf), cand_spec);
+			pr_warn("prog '%s': relo #%d: error matching candidate #%d %s: %d\n ",
+				prog_name, relo_idx, i, spec_buf, err);
 			return err;
 		}

-		pr_debug("prog '%s': relo #%d: %s candidate #%d ", prog_name,
-			 relo_idx, err == 0 ? "non-matching" : "matching", i);
-		bpf_core_dump_spec(prog_name, LIBBPF_DEBUG, cand_spec);
-		libbpf_print(LIBBPF_DEBUG, "\n");
+		bpf_core_format_spec(spec_buf, sizeof(spec_buf), cand_spec);
+		pr_debug("prog '%s': relo #%d: %s candidate #%d %s\n", prog_name,
+			 relo_idx, err == 0 ? "non-matching" : "matching", i, spec_buf);

 		if (err == 0)
 			continue;
@@ -1262,7 +1356,7 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn,
 			return err;

 		if (j == 0) {
-			targ_res = cand_res;
+			*targ_res = cand_res;
 			*targ_spec = *cand_spec;
 		} else if (cand_spec->bit_offset != targ_spec->bit_offset) {
 			/* if there are many field relo candidates, they
@@ -1272,15 +1366,18 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn,
 				prog_name, relo_idx, cand_spec->bit_offset,
 				targ_spec->bit_offset);
 			return -EINVAL;
-		} else if (cand_res.poison != targ_res.poison || cand_res.new_val != targ_res.new_val) {
+		} else if (cand_res.poison != targ_res->poison ||
+			   cand_res.new_val != targ_res->new_val) {
 			/* all candidates should result in the same relocation
 			 * decision and value, otherwise it's dangerous to
 			 * proceed due to ambiguity
 			 */
-			pr_warn("prog '%s': relo #%d: relocation decision ambiguity: %s %u != %s %u\n",
+			pr_warn("prog '%s': relo #%d: relocation decision ambiguity: %s %llu != %s %llu\n",
 				prog_name, relo_idx,
-				cand_res.poison ? "failure" : "success", cand_res.new_val,
-				targ_res.poison ? "failure" : "success", targ_res.new_val);
+				cand_res.poison ? "failure" : "success",
+				(unsigned long long)cand_res.new_val,
+				targ_res->poison ? "failure" : "success",
+				(unsigned long long)targ_res->new_val);
 			return -EINVAL;
 		}

@@ -1314,19 +1411,277 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn,
 			 prog_name, relo_idx);

 		/* calculate single target relo result explicitly */
-		err = bpf_core_calc_relo(prog_name, relo, relo_idx, local_spec, NULL, &targ_res);
+		err = bpf_core_calc_relo(prog_name, relo, relo_idx, local_spec, NULL, targ_res);
 		if (err)
 			return err;
 	}

-patch_insn:
-	/* bpf_core_patch_insn() should know how to handle missing targ_spec */
-	err = bpf_core_patch_insn(prog_name, insn, insn_idx, relo, relo_idx, &targ_res);
-	if (err) {
-		pr_warn("prog '%s': relo #%d: failed to patch insn #%u: %d\n",
-			prog_name, relo_idx, relo->insn_off / 8, err);
-		return -EINVAL;
-	}
-
 	return 0;
 }
+
+static bool bpf_core_names_match(const struct btf *local_btf, size_t local_name_off,
+				 const struct btf *targ_btf, size_t targ_name_off)
+{
+	const char *local_n, *targ_n;
+	size_t local_len, targ_len;
+
+	local_n = btf__name_by_offset(local_btf, local_name_off);
+	targ_n = btf__name_by_offset(targ_btf, targ_name_off);
+
+	if (str_is_empty(targ_n))
+		return str_is_empty(local_n);
+
+	targ_len = bpf_core_essential_name_len(targ_n);
+	local_len = bpf_core_essential_name_len(local_n);
+
+	return targ_len == local_len && strncmp(local_n, targ_n, local_len) == 0;
+}
+
+static int bpf_core_enums_match(const struct btf *local_btf, const struct btf_type *local_t,
+				const struct btf *targ_btf, const struct btf_type *targ_t)
+{
+	__u16 local_vlen = btf_vlen(local_t);
+	__u16 targ_vlen = btf_vlen(targ_t);
+	int i, j;
+
+	if (local_t->size != targ_t->size)
+		return 0;
+
+	if (local_vlen > targ_vlen)
+		return 0;
+
+	/* iterate over the local enum's variants and make sure each has
+	 * a symbolic name correspondent in the target
+	 */
+	for (i = 0; i < local_vlen; i++) {
+		bool matched = false;
+		__u32 local_n_off, targ_n_off;
+
+		local_n_off = btf_is_enum(local_t) ? btf_enum(local_t)[i].name_off :
+						     btf_enum64(local_t)[i].name_off;
+
+		for (j = 0; j < targ_vlen; j++) {
+			targ_n_off = btf_is_enum(targ_t) ? btf_enum(targ_t)[j].name_off :
+							   btf_enum64(targ_t)[j].name_off;
+
+			if (bpf_core_names_match(local_btf, local_n_off, targ_btf, targ_n_off)) {
+				matched = true;
+				break;
+			}
+		}
+
+		if (!matched)
+			return 0;
+	}
+	return 1;
+}
+
+static int bpf_core_composites_match(const struct btf *local_btf, const struct btf_type *local_t,
+				     const struct btf *targ_btf, const struct btf_type *targ_t,
+				     bool behind_ptr, int level)
+{
+	const struct btf_member *local_m = btf_members(local_t);
+	__u16 local_vlen = btf_vlen(local_t);
+	__u16 targ_vlen = btf_vlen(targ_t);
+	int i, j, err;
+
+	if (local_vlen > targ_vlen)
+		return 0;
+
+	/* check that all local members have a match in the target */
+	for (i = 0; i < local_vlen; i++, local_m++) {
+		const struct btf_member *targ_m = btf_members(targ_t);
+		bool matched = false;
+
+		for (j = 0; j < targ_vlen; j++, targ_m++) {
+			if (!bpf_core_names_match(local_btf, local_m->name_off,
+						  targ_btf, targ_m->name_off))
+				continue;
+
+			err = __bpf_core_types_match(local_btf, local_m->type, targ_btf,
+						     targ_m->type, behind_ptr, level - 1);
+			if (err < 0)
+				return err;
+			if (err > 0) {
+				matched = true;
+				break;
+			}
+		}
+
+		if (!matched)
+			return 0;
+	}
+	return 1;
+}
+
+/* Check that two types "match". This function assumes that root types were
+ * already checked for name match.
+ *
+ * The matching relation is defined as follows:
+ * - modifiers and typedefs are stripped (and, hence, effectively ignored)
+ * - generally speaking types need to be of same kind (struct vs. struct, union
+ *   vs. union, etc.)
+ *   - exceptions are struct/union behind a pointer which could also match a
+ *     forward declaration of a struct or union, respectively, and enum vs.
+ *     enum64 (see below)
+ * Then, depending on type:
+ * - integers:
+ *   - match if size and signedness match
+ * - arrays & pointers:
+ *   - target types are recursively matched
+ * - structs & unions:
+ *   - local members need to exist in target with the same name
+ *   - for each member we recursively check match unless it is already behind a
+ *     pointer, in which case we only check matching names and compatible kind
+ * - enums:
+ *   - local variants have to have a match in target by symbolic name (but not
+ *     numeric value)
+ *   - size has to match (but enum may match enum64 and vice versa)
+ * - function pointers:
+ *   - number and position of arguments in local type has to match target
+ *   - for each argument and the return value we recursively check match
+ */
+int __bpf_core_types_match(const struct btf *local_btf, __u32 local_id, const struct btf *targ_btf,
+			   __u32 targ_id, bool behind_ptr, int level)
+{
+	const struct btf_type *local_t, *targ_t;
+	int depth = 32; /* max recursion depth */
+	__u16 local_k, targ_k;
+
+	if (level <= 0)
+		return -EINVAL;
+
+recur:
+	depth--;
+	if (depth < 0)
+		return -EINVAL;
+
+	local_t = skip_mods_and_typedefs(local_btf, local_id, &local_id);
+	targ_t = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
+	if (!local_t || !targ_t)
+		return -EINVAL;
+
+	/* While the name check happens after typedefs are skipped, root-level
+	 * typedefs would still be name-matched as that's the contract with
+	 * callers.
+	 */
+	if (!bpf_core_names_match(local_btf, local_t->name_off, targ_btf, targ_t->name_off))
+		return 0;
+
+	local_k = btf_kind(local_t);
+	targ_k = btf_kind(targ_t);
+
+	switch (local_k) {
+	case BTF_KIND_UNKN:
+		return local_k == targ_k;
+	case BTF_KIND_FWD: {
+		bool local_f = BTF_INFO_KFLAG(local_t->info);
+
+		if (behind_ptr) {
+			if (local_k == targ_k)
+				return local_f == BTF_INFO_KFLAG(targ_t->info);
+
+			/* for forward declarations kflag dictates whether the
+			 * target is a struct (0) or union (1)
+			 */
+			return (targ_k == BTF_KIND_STRUCT && !local_f) ||
+			       (targ_k == BTF_KIND_UNION && local_f);
+		} else {
+			if (local_k != targ_k)
+				return 0;
+
+			/* match if the forward declaration is for the same kind */
+			return local_f == BTF_INFO_KFLAG(targ_t->info);
+		}
+	}
+	case BTF_KIND_ENUM:
+	case BTF_KIND_ENUM64:
+		if (!btf_is_any_enum(targ_t))
+			return 0;
+
+		return bpf_core_enums_match(local_btf, local_t, targ_btf, targ_t);
+	case BTF_KIND_STRUCT:
+	case BTF_KIND_UNION:
+		if (behind_ptr) {
+			bool targ_f = BTF_INFO_KFLAG(targ_t->info);
+
+			if (local_k == targ_k)
+				return 1;
+
+			if (targ_k != BTF_KIND_FWD)
+				return 0;
+
+			return (local_k == BTF_KIND_UNION) == targ_f;
+		} else {
+			if (local_k != targ_k)
+				return 0;
+
+			return bpf_core_composites_match(local_btf, local_t, targ_btf, targ_t,
+							 behind_ptr, level);
+		}
+	case BTF_KIND_INT: {
+		__u8 local_sgn;
+		__u8 targ_sgn;
+
+		if (local_k != targ_k)
+			return 0;
+
+		local_sgn = btf_int_encoding(local_t) & BTF_INT_SIGNED;
+		targ_sgn = btf_int_encoding(targ_t) & BTF_INT_SIGNED;
+
+		return local_t->size == targ_t->size && local_sgn == targ_sgn;
+	}
+	case BTF_KIND_PTR:
+		if (local_k != targ_k)
+			return 0;
+
+		behind_ptr = true;
+
+		local_id = local_t->type;
+		targ_id = targ_t->type;
+		goto recur;
+	case BTF_KIND_ARRAY: {
+		const struct btf_array *local_array = btf_array(local_t);
+		const struct btf_array *targ_array = btf_array(targ_t);
+
+		if (local_k != targ_k)
+			return 0;
+
+		if (local_array->nelems != targ_array->nelems)
+			return 0;
+
+		local_id = local_array->type;
+		targ_id = targ_array->type;
+		goto recur;
+	}
+	case BTF_KIND_FUNC_PROTO: {
+		struct btf_param *local_p = btf_params(local_t);
+		struct btf_param *targ_p = btf_params(targ_t);
+		__u16 local_vlen = btf_vlen(local_t);
+		__u16 targ_vlen = btf_vlen(targ_t);
+		int i, err;
+
+		if (local_k != targ_k)
+			return 0;
+
+		if (local_vlen != targ_vlen)
+			return 0;
+
+		for (i = 0; i < local_vlen; i++, local_p++, targ_p++) {
+			err = __bpf_core_types_match(local_btf, local_p->type, targ_btf,
+						     targ_p->type, behind_ptr, level - 1);
+			if (err <= 0)
+				return err;
+		}
+
+		/* tail recurse for return type check */
+		local_id = local_t->type;
+		targ_id = targ_t->type;
+		goto recur;
+	}
+	default:
+		pr_warn("unexpected kind %s relocated, local [%d], target [%d]\n",
+			btf_kind_str(local_t), local_id, targ_id);
+		return 0;
+	}
+}
--- a/src/relo_core.h
+++ b/src/relo_core.h
@@ -44,14 +44,56 @@ struct bpf_core_spec {
 	__u32 bit_offset;
 };

-int bpf_core_apply_relo_insn(const char *prog_name,
-			     struct bpf_insn *insn, int insn_idx,
-			     const struct bpf_core_relo *relo, int relo_idx,
-			     const struct btf *local_btf,
-			     struct bpf_core_cand_list *cands,
-			     struct bpf_core_spec *specs_scratch);
+struct bpf_core_relo_res {
+	/* expected value in the instruction, unless validate == false */
+	__u64 orig_val;
+	/* new value that needs to be patched up to */
+	__u64 new_val;
+	/* relocation unsuccessful, poison instruction, but don't fail load */
+	bool poison;
+	/* some relocations can't be validated against orig_val */
+	bool validate;
+	/* for field byte offset relocations or the forms:
+	 *     *(T *)(rX + <off>) = rY
+	 *     rX = *(T *)(rY + <off>),
+	 * we remember original and resolved field size to adjust direct
+	 * memory loads of pointers and integers; this is necessary for 32-bit
+	 * host kernel architectures, but also allows to automatically
+	 * relocate fields that were resized from, e.g., u32 to u64, etc.
+	 */
+	bool fail_memsz_adjust;
+	__u32 orig_sz;
+	__u32 orig_type_id;
+	__u32 new_sz;
+	__u32 new_type_id;
+};
+
+int __bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
+				const struct btf *targ_btf, __u32 targ_id, int level);
 int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
 			      const struct btf *targ_btf, __u32 targ_id);
+int __bpf_core_types_match(const struct btf *local_btf, __u32 local_id, const struct btf *targ_btf,
+			   __u32 targ_id, bool behind_ptr, int level);
+int bpf_core_types_match(const struct btf *local_btf, __u32 local_id, const struct btf *targ_btf,
+			 __u32 targ_id);

 size_t bpf_core_essential_name_len(const char *name);
+
+int bpf_core_calc_relo_insn(const char *prog_name,
+			    const struct bpf_core_relo *relo, int relo_idx,
+			    const struct btf *local_btf,
+			    struct bpf_core_cand_list *cands,
+			    struct bpf_core_spec *specs_scratch,
+			    struct bpf_core_relo_res *targ_res);
+
+int bpf_core_patch_insn(const char *prog_name, struct bpf_insn *insn,
+			int insn_idx, const struct bpf_core_relo *relo,
+			int relo_idx, const struct bpf_core_relo_res *res);
+
+int bpf_core_parse_spec(const char *prog_name, const struct btf *btf,
+		        const struct bpf_core_relo *relo,
+		        struct bpf_core_spec *spec);
+
+int bpf_core_format_spec(char *buf, size_t buf_sz, const struct bpf_core_spec *spec);
+
 #endif
--- a/src/ringbuf.c
+++ b/src/ringbuf.c
@@ -16,6 +16,7 @@
 #include <asm/barrier.h>
 #include <sys/mman.h>
 #include <sys/epoll.h>
+#include <time.h>

 #include "libbpf.h"
 #include "libbpf_internal.h"
@@ -39,6 +40,23 @@ struct ring_buffer {
 	int ring_cnt;
 };

+struct user_ring_buffer {
+	struct epoll_event event;
+	unsigned long *consumer_pos;
+	unsigned long *producer_pos;
+	void *data;
+	unsigned long mask;
+	size_t page_size;
+	int map_fd;
+	int epoll_fd;
+};
+
+/* 8-byte ring buffer header structure */
+struct ringbuf_hdr {
+	__u32 len;
+	__u32 pad;
+};
+
 static void ringbuf_unmap_ring(struct ring_buffer *rb, struct ring *r)
 {
 	if (r->consumer_pos) {
@@ -59,12 +77,13 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd,
 	__u32 len = sizeof(info);
 	struct epoll_event *e;
 	struct ring *r;
+	__u64 mmap_sz;
 	void *tmp;
 	int err;

 	memset(&info, 0, sizeof(info));

-	err = bpf_obj_get_info_by_fd(map_fd, &info, &len);
+	err = bpf_map_get_info_by_fd(map_fd, &info, &len);
 	if (err) {
 		err = -errno;
 		pr_warn("ringbuf: failed to get map info for fd=%d: %d\n",
@@ -97,8 +116,7 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd,
 	r->mask = info.max_entries - 1;

 	/* Map writable consumer page */
-	tmp = mmap(NULL, rb->page_size, PROT_READ | PROT_WRITE, MAP_SHARED,
-		   map_fd, 0);
+	tmp = mmap(NULL, rb->page_size, PROT_READ | PROT_WRITE, MAP_SHARED, map_fd, 0);
 	if (tmp == MAP_FAILED) {
 		err = -errno;
 		pr_warn("ringbuf: failed to mmap consumer page for map fd=%d: %d\n",
@@ -110,9 +128,13 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd,
 	/* Map read-only producer page and data pages. We map twice as big
 	 * data size to allow simple reading of samples that wrap around the
 	 * end of a ring buffer. See kernel implementation for details.
-	 * */
-	tmp = mmap(NULL, rb->page_size + 2 * info.max_entries, PROT_READ,
-		   MAP_SHARED, map_fd, rb->page_size);
+	 */
+	mmap_sz = rb->page_size + 2 * (__u64)info.max_entries;
+	if (mmap_sz != (__u64)(size_t)mmap_sz) {
+		pr_warn("ringbuf: ring buffer size (%u) is too big\n", info.max_entries);
+		return libbpf_err(-E2BIG);
+	}
+	tmp = mmap(NULL, (size_t)mmap_sz, PROT_READ, MAP_SHARED, map_fd, rb->page_size);
 	if (tmp == MAP_FAILED) {
 		err = -errno;
 		ringbuf_unmap_ring(rb, r);
@@ -202,7 +224,7 @@ static inline int roundup_len(__u32 len)
 	return (len + 7) / 8 * 8;
 }

-static int64_t ringbuf_process_ring(struct ring* r)
+static int64_t ringbuf_process_ring(struct ring *r)
 {
 	int *len_ptr, len, err;
 	/* 64-bit to avoid overflow in case of extreme application behavior */
@@ -300,3 +322,266 @@ int ring_buffer__epoll_fd(const struct ring_buffer *rb)
 {
 	return rb->epoll_fd;
 }
+
+static void user_ringbuf_unmap_ring(struct user_ring_buffer *rb)
+{
+	if (rb->consumer_pos) {
+		munmap(rb->consumer_pos, rb->page_size);
+		rb->consumer_pos = NULL;
+	}
+	if (rb->producer_pos) {
+		munmap(rb->producer_pos, rb->page_size + 2 * (rb->mask + 1));
+		rb->producer_pos = NULL;
+	}
+}
+
+void user_ring_buffer__free(struct user_ring_buffer *rb)
+{
+	if (!rb)
+		return;
+
+	user_ringbuf_unmap_ring(rb);
+
+	if (rb->epoll_fd >= 0)
+		close(rb->epoll_fd);
+
+	free(rb);
+}
+
+static int user_ringbuf_map(struct user_ring_buffer *rb, int map_fd)
+{
+	struct bpf_map_info info;
+	__u32 len = sizeof(info);
+	__u64 mmap_sz;
+	void *tmp;
+	struct epoll_event *rb_epoll;
+	int err;
+
+	memset(&info, 0, sizeof(info));
+
+	err = bpf_map_get_info_by_fd(map_fd, &info, &len);
+	if (err) {
+		err = -errno;
+		pr_warn("user ringbuf: failed to get map info for fd=%d: %d\n", map_fd, err);
+		return err;
+	}
+
+	if (info.type != BPF_MAP_TYPE_USER_RINGBUF) {
+		pr_warn("user ringbuf: map fd=%d is not BPF_MAP_TYPE_USER_RINGBUF\n", map_fd);
+		return -EINVAL;
+	}
+
+	rb->map_fd = map_fd;
+	rb->mask = info.max_entries - 1;
+
+	/* Map read-only consumer page */
+	tmp = mmap(NULL, rb->page_size, PROT_READ, MAP_SHARED, map_fd, 0);
+	if (tmp == MAP_FAILED) {
+		err = -errno;
+		pr_warn("user ringbuf: failed to mmap consumer page for map fd=%d: %d\n",
+			map_fd, err);
+		return err;
+	}
+	rb->consumer_pos = tmp;
+
+	/* Map read-write the producer page and data pages. We map the data
+	 * region as twice the total size of the ring buffer to allow the
+	 * simple reading and writing of samples that wrap around the end of
+	 * the buffer.  See the kernel implementation for details.
+	 */
+	mmap_sz = rb->page_size + 2 * (__u64)info.max_entries;
+	if (mmap_sz != (__u64)(size_t)mmap_sz) {
+		pr_warn("user ringbuf: ring buf size (%u) is too big\n", info.max_entries);
+		return -E2BIG;
+	}
+	tmp = mmap(NULL, (size_t)mmap_sz, PROT_READ | PROT_WRITE, MAP_SHARED,
+		   map_fd, rb->page_size);
+	if (tmp == MAP_FAILED) {
+		err = -errno;
+		pr_warn("user ringbuf: failed to mmap data pages for map fd=%d: %d\n",
+			map_fd, err);
+		return err;
+	}
+
+	rb->producer_pos = tmp;
+	rb->data = tmp + rb->page_size;
+
+	rb_epoll = &rb->event;
+	rb_epoll->events = EPOLLOUT;
+	if (epoll_ctl(rb->epoll_fd, EPOLL_CTL_ADD, map_fd, rb_epoll) < 0) {
+		err = -errno;
+		pr_warn("user ringbuf: failed to epoll add map fd=%d: %d\n", map_fd, err);
+		return err;
+	}
+
+	return 0;
+}
+
+struct user_ring_buffer *
+user_ring_buffer__new(int map_fd, const struct user_ring_buffer_opts *opts)
+{
+	struct user_ring_buffer *rb;
+	int err;
+
+	if (!OPTS_VALID(opts, user_ring_buffer_opts))
+		return errno = EINVAL, NULL;
+
+	rb = calloc(1, sizeof(*rb));
+	if (!rb)
+		return errno = ENOMEM, NULL;
+
+	rb->page_size = getpagesize();
+
+	rb->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
+	if (rb->epoll_fd < 0) {
+		err = -errno;
+		pr_warn("user ringbuf: failed to create epoll instance: %d\n", err);
+		goto err_out;
+	}
+
+	err = user_ringbuf_map(rb, map_fd);
+	if (err)
+		goto err_out;
+
+	return rb;
+
+err_out:
+	user_ring_buffer__free(rb);
+	return errno = -err, NULL;
+}
+
+static void user_ringbuf_commit(struct user_ring_buffer *rb, void *sample, bool discard)
+{
+	__u32 new_len;
+	struct ringbuf_hdr *hdr;
+	uintptr_t hdr_offset;
+
+	hdr_offset = rb->mask + 1 + (sample - rb->data) - BPF_RINGBUF_HDR_SZ;
+	hdr = rb->data + (hdr_offset & rb->mask);
+
+	new_len = hdr->len & ~BPF_RINGBUF_BUSY_BIT;
+	if (discard)
+		new_len |= BPF_RINGBUF_DISCARD_BIT;
+
+	/* Synchronizes with smp_load_acquire() in __bpf_user_ringbuf_peek() in
+	 * the kernel.
+	 */
+	__atomic_exchange_n(&hdr->len, new_len, __ATOMIC_ACQ_REL);
+}
+
+void user_ring_buffer__discard(struct user_ring_buffer *rb, void *sample)
+{
+	user_ringbuf_commit(rb, sample, true);
+}
+
+void user_ring_buffer__submit(struct user_ring_buffer *rb, void *sample)
+{
+	user_ringbuf_commit(rb, sample, false);
+}
+
+void *user_ring_buffer__reserve(struct user_ring_buffer *rb, __u32 size)
+{
+	__u32 avail_size, total_size, max_size;
+	/* 64-bit to avoid overflow in case of extreme application behavior */
+	__u64 cons_pos, prod_pos;
+	struct ringbuf_hdr *hdr;
+
+	/* The top two bits are used as special flags */
+	if (size & (BPF_RINGBUF_BUSY_BIT | BPF_RINGBUF_DISCARD_BIT))
+		return errno = E2BIG, NULL;
+
+	/* Synchronizes with smp_store_release() in __bpf_user_ringbuf_peek() in
+	 * the kernel.
+	 */
+	cons_pos = smp_load_acquire(rb->consumer_pos);
+	/* Synchronizes with smp_store_release() in user_ringbuf_commit() */
+	prod_pos = smp_load_acquire(rb->producer_pos);
+
+	max_size = rb->mask + 1;
+	avail_size = max_size - (prod_pos - cons_pos);
+	/* Round up total size to a multiple of 8. */
+	total_size = (size + BPF_RINGBUF_HDR_SZ + 7) / 8 * 8;
+
+	if (total_size > max_size)
+		return errno = E2BIG, NULL;
+
+	if (avail_size < total_size)
+		return errno = ENOSPC, NULL;
+
+	hdr = rb->data + (prod_pos & rb->mask);
+	hdr->len = size | BPF_RINGBUF_BUSY_BIT;
+	hdr->pad = 0;
+
+	/* Synchronizes with smp_load_acquire() in __bpf_user_ringbuf_peek() in
+	 * the kernel.
+	 */
+	smp_store_release(rb->producer_pos, prod_pos + total_size);
+
+	return (void *)rb->data + ((prod_pos + BPF_RINGBUF_HDR_SZ) & rb->mask);
+}
+
+static __u64 ns_elapsed_timespec(const struct timespec *start, const struct timespec *end)
+{
+	__u64 start_ns, end_ns, ns_per_s = 1000000000;
+
+	start_ns = (__u64)start->tv_sec * ns_per_s + start->tv_nsec;
+	end_ns = (__u64)end->tv_sec * ns_per_s + end->tv_nsec;
+
+	return end_ns - start_ns;
+}
+
+void *user_ring_buffer__reserve_blocking(struct user_ring_buffer *rb, __u32 size, int timeout_ms)
+{
+	void *sample;
+	int err, ms_remaining = timeout_ms;
+	struct timespec start;
+
+	if (timeout_ms < 0 && timeout_ms != -1)
+		return errno = EINVAL, NULL;
+
+	if (timeout_ms != -1) {
+		err = clock_gettime(CLOCK_MONOTONIC, &start);
+		if (err)
+			return NULL;
+	}
+
+	do {
+		int cnt, ms_elapsed;
+		struct timespec curr;
+		__u64 ns_per_ms = 1000000;
+
+		sample = user_ring_buffer__reserve(rb, size);
+		if (sample)
+			return sample;
+		else if (errno != ENOSPC)
+			return NULL;
+
+		/* The kernel guarantees at least one event notification
+		 * delivery whenever at least one sample is drained from the
+		 * ring buffer in an invocation to bpf_ringbuf_drain(). Other
+		 * additional events may be delivered at any time, but only one
+		 * event is guaranteed per bpf_ringbuf_drain() invocation,
+		 * provided that a sample is drained, and the BPF program did
+		 * not pass BPF_RB_NO_WAKEUP to bpf_ringbuf_drain(). If
+		 * BPF_RB_FORCE_WAKEUP is passed to bpf_ringbuf_drain(), a
+		 * wakeup event will be delivered even if no samples are
+		 * drained.
+		 */
+		cnt = epoll_wait(rb->epoll_fd, &rb->event, 1, ms_remaining);
+		if (cnt < 0)
+			return NULL;
+
+		if (timeout_ms == -1)
+			continue;
+
+		err = clock_gettime(CLOCK_MONOTONIC, &curr);
+		if (err)
+			return NULL;
+
+		ms_elapsed = ns_elapsed_timespec(&start, &curr) / ns_per_ms;
+		ms_remaining = timeout_ms - ms_elapsed;
+	} while (ms_remaining > 0);
+
+	/* Try one more time to reserve a sample after the specified timeout has elapsed. */
+	return user_ring_buffer__reserve(rb, size);
+}
--- a/src/skel_internal.h
+++ b/src/skel_internal.h
@@ -3,9 +3,19 @@
 #ifndef __SKEL_INTERNAL_H
 #define __SKEL_INTERNAL_H

+#ifdef __KERNEL__
+#include <linux/fdtable.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/slab.h>
+#include <linux/bpf.h>
+#else
 #include <unistd.h>
 #include <sys/syscall.h>
 #include <sys/mman.h>
+#include <stdlib.h>
+#include "bpf.h"
+#endif

 #ifndef __NR_bpf
 # if defined(__mips__) && defined(_ABIO32)
@@ -25,24 +35,23 @@
 * requested during loader program generation.
 */
 struct bpf_map_desc {
-	union {
-		/* input for the loader prog */
-		struct {
-			__aligned_u64 initial_value;
-			__u32 max_entries;
-		};
-		/* output of the loader prog */
-		struct {
-			int map_fd;
-		};
-	};
+	/* output of the loader prog */
+	int map_fd;
+	/* input for the loader prog */
+	__u32 max_entries;
+	__aligned_u64 initial_value;
 };
 struct bpf_prog_desc {
 	int prog_fd;
 };

+enum {
+	BPF_SKEL_KERNEL = (1ULL << 0),
+};
+
 struct bpf_loader_ctx {
-	size_t sz;
+	__u32 sz;
+	__u32 flags;
 	__u32 log_level;
 	__u32 log_size;
 	__u64 log_buf;
@@ -57,12 +66,144 @@ struct bpf_load_and_run_opts {
 	const char *errstr;
 };

+long kern_sys_bpf(__u32 cmd, void *attr, __u32 attr_size);
+
 static inline int skel_sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr,
 			  unsigned int size)
 {
+#ifdef __KERNEL__
+	return kern_sys_bpf(cmd, attr, size);
+#else
 	return syscall(__NR_bpf, cmd, attr, size);
+#endif
 }

+#ifdef __KERNEL__
+static inline int close(int fd)
+{
+	return close_fd(fd);
+}
+
+static inline void *skel_alloc(size_t size)
+{
+	struct bpf_loader_ctx *ctx = kzalloc(size, GFP_KERNEL);
+
+	if (!ctx)
+		return NULL;
+	ctx->flags |= BPF_SKEL_KERNEL;
+	return ctx;
+}
+
+static inline void skel_free(const void *p)
+{
+	kfree(p);
+}
+
+/* skel->bss/rodata maps are populated the following way:
+ *
+ * For kernel use:
+ * skel_prep_map_data() allocates kernel memory that kernel module can directly access.
+ * Generated lskel stores the pointer in skel->rodata and in skel->maps.rodata.initial_value.
+ * The loader program will perform probe_read_kernel() from maps.rodata.initial_value.
+ * skel_finalize_map_data() sets skel->rodata to point to actual value in a bpf map and
+ * does maps.rodata.initial_value = ~0ULL to signal skel_free_map_data() that kvfree
+ * is not nessary.
+ *
+ * For user space:
+ * skel_prep_map_data() mmaps anon memory into skel->rodata that can be accessed directly.
+ * Generated lskel stores the pointer in skel->rodata and in skel->maps.rodata.initial_value.
+ * The loader program will perform copy_from_user() from maps.rodata.initial_value.
+ * skel_finalize_map_data() remaps bpf array map value from the kernel memory into
+ * skel->rodata address.
+ *
+ * The "bpftool gen skeleton -L" command generates lskel.h that is suitable for
+ * both kernel and user space. The generated loader program does
+ * either bpf_probe_read_kernel() or bpf_copy_from_user() from initial_value
+ * depending on bpf_loader_ctx->flags.
+ */
+static inline void skel_free_map_data(void *p, __u64 addr, size_t sz)
+{
+	if (addr != ~0ULL)
+		kvfree(p);
+	/* When addr == ~0ULL the 'p' points to
+	 * ((struct bpf_array *)map)->value. See skel_finalize_map_data.
+	 */
+}
+
+static inline void *skel_prep_map_data(const void *val, size_t mmap_sz, size_t val_sz)
+{
+	void *addr;
+
+	addr = kvmalloc(val_sz, GFP_KERNEL);
+	if (!addr)
+		return NULL;
+	memcpy(addr, val, val_sz);
+	return addr;
+}
+
+static inline void *skel_finalize_map_data(__u64 *init_val, size_t mmap_sz, int flags, int fd)
+{
+	struct bpf_map *map;
+	void *addr = NULL;
+
+	kvfree((void *) (long) *init_val);
+	*init_val = ~0ULL;
+
+	/* At this point bpf_load_and_run() finished without error and
+	 * 'fd' is a valid bpf map FD. All sanity checks below should succeed.
+	 */
+	map = bpf_map_get(fd);
+	if (IS_ERR(map))
+		return NULL;
+	if (map->map_type != BPF_MAP_TYPE_ARRAY)
+		goto out;
+	addr = ((struct bpf_array *)map)->value;
+	/* the addr stays valid, since FD is not closed */
+out:
+	bpf_map_put(map);
+	return addr;
+}
+
+#else
+
+static inline void *skel_alloc(size_t size)
+{
+	return calloc(1, size);
+}
+
+static inline void skel_free(void *p)
+{
+	free(p);
+}
+
+static inline void skel_free_map_data(void *p, __u64 addr, size_t sz)
+{
+	munmap(p, sz);
+}
+
+static inline void *skel_prep_map_data(const void *val, size_t mmap_sz, size_t val_sz)
+{
+	void *addr;
+
+	addr = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE,
+		    MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+	if (addr == (void *) -1)
+		return NULL;
+	memcpy(addr, val, val_sz);
+	return addr;
+}
+
+static inline void *skel_finalize_map_data(__u64 *init_val, size_t mmap_sz, int flags, int fd)
+{
+	void *addr;
+
+	addr = mmap((void *) (long) *init_val, mmap_sz, flags, MAP_SHARED | MAP_FIXED, fd, 0);
+	if (addr == (void *) -1)
+		return NULL;
+	return addr;
+}
+#endif
+
 static inline int skel_closenz(int fd)
 {
 	if (fd > 0)
@@ -110,6 +251,29 @@ static inline int skel_map_update_elem(int fd, const void *key,
 	return skel_sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, attr_sz);
 }

+static inline int skel_map_delete_elem(int fd, const void *key)
+{
+	const size_t attr_sz = offsetofend(union bpf_attr, flags);
+	union bpf_attr attr;
+
+	memset(&attr, 0, attr_sz);
+	attr.map_fd = fd;
+	attr.key = (long)key;
+
+	return skel_sys_bpf(BPF_MAP_DELETE_ELEM, &attr, attr_sz);
+}
+
+static inline int skel_map_get_fd_by_id(__u32 id)
+{
+	const size_t attr_sz = offsetofend(union bpf_attr, flags);
+	union bpf_attr attr;
+
+	memset(&attr, 0, attr_sz);
+	attr.map_id = id;
+
+	return skel_sys_bpf(BPF_MAP_GET_FD_BY_ID, &attr, attr_sz);
+}
+
 static inline int skel_raw_tracepoint_open(const char *name, int prog_fd)
 {
 	const size_t attr_sz = offsetofend(union bpf_attr, raw_tracepoint.prog_fd);
@@ -136,26 +300,34 @@ static inline int skel_link_create(int prog_fd, int target_fd,
 	return skel_sys_bpf(BPF_LINK_CREATE, &attr, attr_sz);
 }

+#ifdef __KERNEL__
+#define set_err
+#else
+#define set_err err = -errno
+#endif
+
 static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts)
 {
+	const size_t prog_load_attr_sz = offsetofend(union bpf_attr, fd_array);
+	const size_t test_run_attr_sz = offsetofend(union bpf_attr, test);
 	int map_fd = -1, prog_fd = -1, key = 0, err;
 	union bpf_attr attr;

-	map_fd = skel_map_create(BPF_MAP_TYPE_ARRAY, "__loader.map", 4, opts->data_sz, 1);
+	err = map_fd = skel_map_create(BPF_MAP_TYPE_ARRAY, "__loader.map", 4, opts->data_sz, 1);
 	if (map_fd < 0) {
 		opts->errstr = "failed to create loader map";
-		err = -errno;
+		set_err;
 		goto out;
 	}

 	err = skel_map_update_elem(map_fd, &key, opts->data, 0);
 	if (err < 0) {
 		opts->errstr = "failed to update loader map";
-		err = -errno;
+		set_err;
 		goto out;
 	}

-	memset(&attr, 0, sizeof(attr));
+	memset(&attr, 0, prog_load_attr_sz);
 	attr.prog_type = BPF_PROG_TYPE_SYSCALL;
 	attr.insns = (long) opts->insns;
 	attr.insn_cnt = opts->insns_sz / sizeof(struct bpf_insn);
@@ -166,25 +338,27 @@ static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts)
 	attr.log_size = opts->ctx->log_size;
 	attr.log_buf = opts->ctx->log_buf;
 	attr.prog_flags = BPF_F_SLEEPABLE;
-	prog_fd = skel_sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
+	err = prog_fd = skel_sys_bpf(BPF_PROG_LOAD, &attr, prog_load_attr_sz);
 	if (prog_fd < 0) {
 		opts->errstr = "failed to load loader prog";
-		err = -errno;
+		set_err;
 		goto out;
 	}

-	memset(&attr, 0, sizeof(attr));
+	memset(&attr, 0, test_run_attr_sz);
 	attr.test.prog_fd = prog_fd;
 	attr.test.ctx_in = (long) opts->ctx;
 	attr.test.ctx_size_in = opts->ctx->sz;
-	err = skel_sys_bpf(BPF_PROG_RUN, &attr, sizeof(attr));
+	err = skel_sys_bpf(BPF_PROG_RUN, &attr, test_run_attr_sz);
 	if (err < 0 || (int)attr.test.retval < 0) {
 		opts->errstr = "failed to execute loader prog";
 		if (err < 0) {
-			err = -errno;
+			set_err;
 		} else {
 			err = (int)attr.test.retval;
+#ifndef __KERNEL__
 			errno = -err;
+#endif
 		}
 		goto out;
 	}
--- a/src/strset.c
+++ b/src/strset.c
@@ -19,19 +19,19 @@ struct strset {
 	struct hashmap *strs_hash;
 };

-static size_t strset_hash_fn(const void *key, void *ctx)
+static size_t strset_hash_fn(long key, void *ctx)
 {
 	const struct strset *s = ctx;
-	const char *str = s->strs_data + (long)key;
+	const char *str = s->strs_data + key;

 	return str_hash(str);
 }

-static bool strset_equal_fn(const void *key1, const void *key2, void *ctx)
+static bool strset_equal_fn(long key1, long key2, void *ctx)
 {
 	const struct strset *s = ctx;
-	const char *str1 = s->strs_data + (long)key1;
-	const char *str2 = s->strs_data + (long)key2;
+	const char *str1 = s->strs_data + key1;
+	const char *str2 = s->strs_data + key2;

 	return strcmp(str1, str2) == 0;
 }
@@ -67,7 +67,7 @@ struct strset *strset__new(size_t max_data_sz, const char *init_data, size_t ini
 			/* hashmap__add() returns EEXIST if string with the same
 			 * content already is in the hash map
 			 */
-			err = hashmap__add(hash, (void *)off, (void *)off);
+			err = hashmap__add(hash, off, off);
 			if (err == -EEXIST)
 				continue; /* duplicate */
 			if (err)
@@ -127,7 +127,7 @@ int strset__find_str(struct strset *set, const char *s)
 	new_off = set->strs_data_len;
 	memcpy(p, s, len);

-	if (hashmap__find(set->strs_hash, (void *)new_off, (void **)&old_off))
+	if (hashmap__find(set->strs_hash, new_off, &old_off))
 		return old_off;

 	return -ENOENT;
@@ -165,8 +165,8 @@ int strset__add_str(struct strset *set, const char *s)
 	 * contents doesn't exist already (HASHMAP_ADD strategy). If such
 	 * string exists, we'll get its offset in old_off (that's old_key).
 	 */
-	err = hashmap__insert(set->strs_hash, (void *)new_off, (void *)new_off,
-			      HASHMAP_ADD, (const void **)&old_off, NULL);
+	err = hashmap__insert(set->strs_hash, new_off, new_off,
+			      HASHMAP_ADD, &old_off, NULL);
 	if (err == -EEXIST)
 		return old_off; /* duplicated string, return existing offset */
 	if (err)
--- a/src/usdt.bpf.h
+++ b/src/usdt.bpf.h
@@ -0,0 +1,250 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#ifndef __USDT_BPF_H__
+#define __USDT_BPF_H__
+
+#include <linux/errno.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+/* Below types and maps are internal implementation details of libbpf's USDT
+ * support and are subjects to change. Also, bpf_usdt_xxx() API helpers should
+ * be considered an unstable API as well and might be adjusted based on user
+ * feedback from using libbpf's USDT support in production.
+ */
+
+/* User can override BPF_USDT_MAX_SPEC_CNT to change default size of internal
+ * map that keeps track of USDT argument specifications. This might be
+ * necessary if there are a lot of USDT attachments.
+ */
+#ifndef BPF_USDT_MAX_SPEC_CNT
+#define BPF_USDT_MAX_SPEC_CNT 256
+#endif
+/* User can override BPF_USDT_MAX_IP_CNT to change default size of internal
+ * map that keeps track of IP (memory address) mapping to USDT argument
+ * specification.
+ * Note, if kernel supports BPF cookies, this map is not used and could be
+ * resized all the way to 1 to save a bit of memory.
+ */
+#ifndef BPF_USDT_MAX_IP_CNT
+#define BPF_USDT_MAX_IP_CNT (4 * BPF_USDT_MAX_SPEC_CNT)
+#endif
+
+enum __bpf_usdt_arg_type {
+	BPF_USDT_ARG_CONST,
+	BPF_USDT_ARG_REG,
+	BPF_USDT_ARG_REG_DEREF,
+};
+
+struct __bpf_usdt_arg_spec {
+	/* u64 scalar interpreted depending on arg_type, see below */
+	__u64 val_off;
+	/* arg location case, see bpf_udst_arg() for details */
+	enum __bpf_usdt_arg_type arg_type;
+	/* offset of referenced register within struct pt_regs */
+	short reg_off;
+	/* whether arg should be interpreted as signed value */
+	bool arg_signed;
+	/* number of bits that need to be cleared and, optionally,
+	 * sign-extended to cast arguments that are 1, 2, or 4 bytes
+	 * long into final 8-byte u64/s64 value returned to user
+	 */
+	char arg_bitshift;
+};
+
+/* should match USDT_MAX_ARG_CNT in usdt.c exactly */
+#define BPF_USDT_MAX_ARG_CNT 12
+struct __bpf_usdt_spec {
+	struct __bpf_usdt_arg_spec args[BPF_USDT_MAX_ARG_CNT];
+	__u64 usdt_cookie;
+	short arg_cnt;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, BPF_USDT_MAX_SPEC_CNT);
+	__type(key, int);
+	__type(value, struct __bpf_usdt_spec);
+} __bpf_usdt_specs SEC(".maps") __weak;
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, BPF_USDT_MAX_IP_CNT);
+	__type(key, long);
+	__type(value, __u32);
+} __bpf_usdt_ip_to_spec_id SEC(".maps") __weak;
+
+extern const _Bool LINUX_HAS_BPF_COOKIE __kconfig;
+
+static __always_inline
+int __bpf_usdt_spec_id(struct pt_regs *ctx)
+{
+	if (!LINUX_HAS_BPF_COOKIE) {
+		long ip = PT_REGS_IP(ctx);
+		int *spec_id_ptr;
+
+		spec_id_ptr = bpf_map_lookup_elem(&__bpf_usdt_ip_to_spec_id, &ip);
+		return spec_id_ptr ? *spec_id_ptr : -ESRCH;
+	}
+
+	return bpf_get_attach_cookie(ctx);
+}
+
+/* Return number of USDT arguments defined for currently traced USDT. */
+__weak __hidden
+int bpf_usdt_arg_cnt(struct pt_regs *ctx)
+{
+	struct __bpf_usdt_spec *spec;
+	int spec_id;
+
+	spec_id = __bpf_usdt_spec_id(ctx);
+	if (spec_id < 0)
+		return -ESRCH;
+
+	spec = bpf_map_lookup_elem(&__bpf_usdt_specs, &spec_id);
+	if (!spec)
+		return -ESRCH;
+
+	return spec->arg_cnt;
+}
+
+/* Fetch USDT argument #*arg_num* (zero-indexed) and put its value into *res.
+ * Returns 0 on success; negative error, otherwise.
+ * On error *res is guaranteed to be set to zero.
+ */
+__weak __hidden
+int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res)
+{
+	struct __bpf_usdt_spec *spec;
+	struct __bpf_usdt_arg_spec *arg_spec;
+	unsigned long val;
+	int err, spec_id;
+
+	*res = 0;
+
+	spec_id = __bpf_usdt_spec_id(ctx);
+	if (spec_id < 0)
+		return -ESRCH;
+
+	spec = bpf_map_lookup_elem(&__bpf_usdt_specs, &spec_id);
+	if (!spec)
+		return -ESRCH;
+
+	if (arg_num >= BPF_USDT_MAX_ARG_CNT)
+		return -ENOENT;
+	barrier_var(arg_num);
+	if (arg_num >= spec->arg_cnt)
+		return -ENOENT;
+
+	arg_spec = &spec->args[arg_num];
+	switch (arg_spec->arg_type) {
+	case BPF_USDT_ARG_CONST:
+		/* Arg is just a constant ("-4@$-9" in USDT arg spec).
+		 * value is recorded in arg_spec->val_off directly.
+		 */
+		val = arg_spec->val_off;
+		break;
+	case BPF_USDT_ARG_REG:
+		/* Arg is in a register (e.g, "8@%rax" in USDT arg spec),
+		 * so we read the contents of that register directly from
+		 * struct pt_regs. To keep things simple user-space parts
+		 * record offsetof(struct pt_regs, <regname>) in arg_spec->reg_off.
+		 */
+		err = bpf_probe_read_kernel(&val, sizeof(val), (void *)ctx + arg_spec->reg_off);
+		if (err)
+			return err;
+		break;
+	case BPF_USDT_ARG_REG_DEREF:
+		/* Arg is in memory addressed by register, plus some offset
+		 * (e.g., "-4@-1204(%rbp)" in USDT arg spec). Register is
+		 * identified like with BPF_USDT_ARG_REG case, and the offset
+		 * is in arg_spec->val_off. We first fetch register contents
+		 * from pt_regs, then do another user-space probe read to
+		 * fetch argument value itself.
+		 */
+		err = bpf_probe_read_kernel(&val, sizeof(val), (void *)ctx + arg_spec->reg_off);
+		if (err)
+			return err;
+		err = bpf_probe_read_user(&val, sizeof(val), (void *)val + arg_spec->val_off);
+		if (err)
+			return err;
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+		val >>= arg_spec->arg_bitshift;
+#endif
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* cast arg from 1, 2, or 4 bytes to final 8 byte size clearing
+	 * necessary upper arg_bitshift bits, with sign extension if argument
+	 * is signed
+	 */
+	val <<= arg_spec->arg_bitshift;
+	if (arg_spec->arg_signed)
+		val = ((long)val) >> arg_spec->arg_bitshift;
+	else
+		val = val >> arg_spec->arg_bitshift;
+	*res = val;
+	return 0;
+}
+
+/* Retrieve user-specified cookie value provided during attach as
+ * bpf_usdt_opts.usdt_cookie. This serves the same purpose as BPF cookie
+ * returned by bpf_get_attach_cookie(). Libbpf's support for USDT is itself
+ * utilizing BPF cookies internally, so user can't use BPF cookie directly
+ * for USDT programs and has to use bpf_usdt_cookie() API instead.
+ */
+__weak __hidden
+long bpf_usdt_cookie(struct pt_regs *ctx)
+{
+	struct __bpf_usdt_spec *spec;
+	int spec_id;
+
+	spec_id = __bpf_usdt_spec_id(ctx);
+	if (spec_id < 0)
+		return 0;
+
+	spec = bpf_map_lookup_elem(&__bpf_usdt_specs, &spec_id);
+	if (!spec)
+		return 0;
+
+	return spec->usdt_cookie;
+}
+
+/* we rely on ___bpf_apply() and ___bpf_narg() macros already defined in bpf_tracing.h */
+#define ___bpf_usdt_args0() ctx
+#define ___bpf_usdt_args1(x) ___bpf_usdt_args0(), ({ long _x; bpf_usdt_arg(ctx, 0, &_x); (void *)_x; })
+#define ___bpf_usdt_args2(x, args...) ___bpf_usdt_args1(args), ({ long _x; bpf_usdt_arg(ctx, 1, &_x); (void *)_x; })
+#define ___bpf_usdt_args3(x, args...) ___bpf_usdt_args2(args), ({ long _x; bpf_usdt_arg(ctx, 2, &_x); (void *)_x; })
+#define ___bpf_usdt_args4(x, args...) ___bpf_usdt_args3(args), ({ long _x; bpf_usdt_arg(ctx, 3, &_x); (void *)_x; })
+#define ___bpf_usdt_args5(x, args...) ___bpf_usdt_args4(args), ({ long _x; bpf_usdt_arg(ctx, 4, &_x); (void *)_x; })
+#define ___bpf_usdt_args6(x, args...) ___bpf_usdt_args5(args), ({ long _x; bpf_usdt_arg(ctx, 5, &_x); (void *)_x; })
+#define ___bpf_usdt_args7(x, args...) ___bpf_usdt_args6(args), ({ long _x; bpf_usdt_arg(ctx, 6, &_x); (void *)_x; })
+#define ___bpf_usdt_args8(x, args...) ___bpf_usdt_args7(args), ({ long _x; bpf_usdt_arg(ctx, 7, &_x); (void *)_x; })
+#define ___bpf_usdt_args9(x, args...) ___bpf_usdt_args8(args), ({ long _x; bpf_usdt_arg(ctx, 8, &_x); (void *)_x; })
+#define ___bpf_usdt_args10(x, args...) ___bpf_usdt_args9(args), ({ long _x; bpf_usdt_arg(ctx, 9, &_x); (void *)_x; })
+#define ___bpf_usdt_args11(x, args...) ___bpf_usdt_args10(args), ({ long _x; bpf_usdt_arg(ctx, 10, &_x); (void *)_x; })
+#define ___bpf_usdt_args12(x, args...) ___bpf_usdt_args11(args), ({ long _x; bpf_usdt_arg(ctx, 11, &_x); (void *)_x; })
+#define ___bpf_usdt_args(args...) ___bpf_apply(___bpf_usdt_args, ___bpf_narg(args))(args)
+
+/*
+ * BPF_USDT serves the same purpose for USDT handlers as BPF_PROG for
+ * tp_btf/fentry/fexit BPF programs and BPF_KPROBE for kprobes.
+ * Original struct pt_regs * context is preserved as 'ctx' argument.
+ */
+#define BPF_USDT(name, args...)						    \
+name(struct pt_regs *ctx);						    \
+static __always_inline typeof(name(0))					    \
+____##name(struct pt_regs *ctx, ##args);				    \
+typeof(name(0)) name(struct pt_regs *ctx)				    \
+{									    \
+        _Pragma("GCC diagnostic push")					    \
+        _Pragma("GCC diagnostic ignored \"-Wint-conversion\"")		    \
+        return ____##name(___bpf_usdt_args(args));			    \
+        _Pragma("GCC diagnostic pop")					    \
+}									    \
+static __always_inline typeof(name(0))					    \
+____##name(struct pt_regs *ctx, ##args)
+
+#endif /* __USDT_BPF_H__ */
--- a/src/usdt.c
+++ b/src/usdt.c
--- a/src/xsk.c
+++ b/src/xsk.c
--- a/src/xsk.h
+++ b/src/xsk.h
@@ -1,336 +0,0 @@
-/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
-
-/*
- * AF_XDP user-space access library.
- *
- * Copyright (c) 2018 - 2019 Intel Corporation.
- * Copyright (c) 2019 Facebook
- *
- * Author(s): Magnus Karlsson <magnus.karlsson@intel.com>
- */
-
-#ifndef __LIBBPF_XSK_H
-#define __LIBBPF_XSK_H
-
-#include <stdio.h>
-#include <stdint.h>
-#include <stdbool.h>
-#include <linux/if_xdp.h>
-
-#include "libbpf.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* This whole API has been deprecated and moved to libxdp that can be found at
- * https://github.com/xdp-project/xdp-tools. The APIs are exactly the same so
- * it should just be linking with libxdp instead of libbpf for this set of
- * functionality. If not, please submit a bug report on the aforementioned page.
- */
-
-/* Load-Acquire Store-Release barriers used by the XDP socket
- * library. The following macros should *NOT* be considered part of
- * the xsk.h API, and is subject to change anytime.
- *
- * LIBRARY INTERNAL
- */
-
-#define __XSK_READ_ONCE(x) (*(volatile typeof(x) *)&x)
-#define __XSK_WRITE_ONCE(x, v) (*(volatile typeof(x) *)&x) = (v)
-
-#if defined(__i386__) || defined(__x86_64__)
-# define libbpf_smp_store_release(p, v)					\
-	do {								\
-		asm volatile("" : : : "memory");			\
-		__XSK_WRITE_ONCE(*p, v);				\
-	} while (0)
-# define libbpf_smp_load_acquire(p)					\
-	({								\
-		typeof(*p) ___p1 = __XSK_READ_ONCE(*p);			\
-		asm volatile("" : : : "memory");			\
-		___p1;							\
-	})
-#elif defined(__aarch64__)
-# define libbpf_smp_store_release(p, v)					\
-		asm volatile ("stlr %w1, %0" : "=Q" (*p) : "r" (v) : "memory")
-# define libbpf_smp_load_acquire(p)					\
-	({								\
-		typeof(*p) ___p1;					\
-		asm volatile ("ldar %w0, %1"				\
-			      : "=r" (___p1) : "Q" (*p) : "memory");	\
-		___p1;							\
-	})
-#elif defined(__riscv)
-# define libbpf_smp_store_release(p, v)					\
-	do {								\
-		asm volatile ("fence rw,w" : : : "memory");		\
-		__XSK_WRITE_ONCE(*p, v);				\
-	} while (0)
-# define libbpf_smp_load_acquire(p)					\
-	({								\
-		typeof(*p) ___p1 = __XSK_READ_ONCE(*p);			\
-		asm volatile ("fence r,rw" : : : "memory");		\
-		___p1;							\
-	})
-#endif
-
-#ifndef libbpf_smp_store_release
-#define libbpf_smp_store_release(p, v)					\
-	do {								\
-		__sync_synchronize();					\
-		__XSK_WRITE_ONCE(*p, v);				\
-	} while (0)
-#endif
-
-#ifndef libbpf_smp_load_acquire
-#define libbpf_smp_load_acquire(p)					\
-	({								\
-		typeof(*p) ___p1 = __XSK_READ_ONCE(*p);			\
-		__sync_synchronize();					\
-		___p1;							\
-	})
-#endif
-
-/* LIBRARY INTERNAL -- END */
-
-/* Do not access these members directly. Use the functions below. */
-#define DEFINE_XSK_RING(name) \
-struct name { \
-	__u32 cached_prod; \
-	__u32 cached_cons; \
-	__u32 mask; \
-	__u32 size; \
-	__u32 *producer; \
-	__u32 *consumer; \
-	void *ring; \
-	__u32 *flags; \
-}
-
-DEFINE_XSK_RING(xsk_ring_prod);
-DEFINE_XSK_RING(xsk_ring_cons);
-
-/* For a detailed explanation on the memory barriers associated with the
- * ring, please take a look at net/xdp/xsk_queue.h.
- */
-
-struct xsk_umem;
-struct xsk_socket;
-
-static inline __u64 *xsk_ring_prod__fill_addr(struct xsk_ring_prod *fill,
-					      __u32 idx)
-{
-	__u64 *addrs = (__u64 *)fill->ring;
-
-	return &addrs[idx & fill->mask];
-}
-
-static inline const __u64 *
-xsk_ring_cons__comp_addr(const struct xsk_ring_cons *comp, __u32 idx)
-{
-	const __u64 *addrs = (const __u64 *)comp->ring;
-
-	return &addrs[idx & comp->mask];
-}
-
-static inline struct xdp_desc *xsk_ring_prod__tx_desc(struct xsk_ring_prod *tx,
-						      __u32 idx)
-{
-	struct xdp_desc *descs = (struct xdp_desc *)tx->ring;
-
-	return &descs[idx & tx->mask];
-}
-
-static inline const struct xdp_desc *
-xsk_ring_cons__rx_desc(const struct xsk_ring_cons *rx, __u32 idx)
-{
-	const struct xdp_desc *descs = (const struct xdp_desc *)rx->ring;
-
-	return &descs[idx & rx->mask];
-}
-
-static inline int xsk_ring_prod__needs_wakeup(const struct xsk_ring_prod *r)
-{
-	return *r->flags & XDP_RING_NEED_WAKEUP;
-}
-
-static inline __u32 xsk_prod_nb_free(struct xsk_ring_prod *r, __u32 nb)
-{
-	__u32 free_entries = r->cached_cons - r->cached_prod;
-
-	if (free_entries >= nb)
-		return free_entries;
-
-	/* Refresh the local tail pointer.
-	 * cached_cons is r->size bigger than the real consumer pointer so
-	 * that this addition can be avoided in the more frequently
-	 * executed code that computs free_entries in the beginning of
-	 * this function. Without this optimization it whould have been
-	 * free_entries = r->cached_prod - r->cached_cons + r->size.
-	 */
-	r->cached_cons = libbpf_smp_load_acquire(r->consumer);
-	r->cached_cons += r->size;
-
-	return r->cached_cons - r->cached_prod;
-}
-
-static inline __u32 xsk_cons_nb_avail(struct xsk_ring_cons *r, __u32 nb)
-{
-	__u32 entries = r->cached_prod - r->cached_cons;
-
-	if (entries == 0) {
-		r->cached_prod = libbpf_smp_load_acquire(r->producer);
-		entries = r->cached_prod - r->cached_cons;
-	}
-
-	return (entries > nb) ? nb : entries;
-}
-
-static inline __u32 xsk_ring_prod__reserve(struct xsk_ring_prod *prod, __u32 nb, __u32 *idx)
-{
-	if (xsk_prod_nb_free(prod, nb) < nb)
-		return 0;
-
-	*idx = prod->cached_prod;
-	prod->cached_prod += nb;
-
-	return nb;
-}
-
-static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, __u32 nb)
-{
-	/* Make sure everything has been written to the ring before indicating
-	 * this to the kernel by writing the producer pointer.
-	 */
-	libbpf_smp_store_release(prod->producer, *prod->producer + nb);
-}
-
-static inline __u32 xsk_ring_cons__peek(struct xsk_ring_cons *cons, __u32 nb, __u32 *idx)
-{
-	__u32 entries = xsk_cons_nb_avail(cons, nb);
-
-	if (entries > 0) {
-		*idx = cons->cached_cons;
-		cons->cached_cons += entries;
-	}
-
-	return entries;
-}
-
-static inline void xsk_ring_cons__cancel(struct xsk_ring_cons *cons, __u32 nb)
-{
-	cons->cached_cons -= nb;
-}
-
-static inline void xsk_ring_cons__release(struct xsk_ring_cons *cons, __u32 nb)
-{
-	/* Make sure data has been read before indicating we are done
-	 * with the entries by updating the consumer pointer.
-	 */
-	libbpf_smp_store_release(cons->consumer, *cons->consumer + nb);
-
-}
-
-static inline void *xsk_umem__get_data(void *umem_area, __u64 addr)
-{
-	return &((char *)umem_area)[addr];
-}
-
-static inline __u64 xsk_umem__extract_addr(__u64 addr)
-{
-	return addr & XSK_UNALIGNED_BUF_ADDR_MASK;
-}
-
-static inline __u64 xsk_umem__extract_offset(__u64 addr)
-{
-	return addr >> XSK_UNALIGNED_BUF_OFFSET_SHIFT;
-}
-
-static inline __u64 xsk_umem__add_offset_to_addr(__u64 addr)
-{
-	return xsk_umem__extract_addr(addr) + xsk_umem__extract_offset(addr);
-}
-
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp")
-int xsk_umem__fd(const struct xsk_umem *umem);
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp")
-int xsk_socket__fd(const struct xsk_socket *xsk);
-
-#define XSK_RING_CONS__DEFAULT_NUM_DESCS      2048
-#define XSK_RING_PROD__DEFAULT_NUM_DESCS      2048
-#define XSK_UMEM__DEFAULT_FRAME_SHIFT    12 /* 4096 bytes */
-#define XSK_UMEM__DEFAULT_FRAME_SIZE     (1 << XSK_UMEM__DEFAULT_FRAME_SHIFT)
-#define XSK_UMEM__DEFAULT_FRAME_HEADROOM 0
-#define XSK_UMEM__DEFAULT_FLAGS 0
-
-struct xsk_umem_config {
-	__u32 fill_size;
-	__u32 comp_size;
-	__u32 frame_size;
-	__u32 frame_headroom;
-	__u32 flags;
-};
-
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp")
-int xsk_setup_xdp_prog(int ifindex, int *xsks_map_fd);
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp")
-int xsk_socket__update_xskmap(struct xsk_socket *xsk, int xsks_map_fd);
-
-/* Flags for the libbpf_flags field. */
-#define XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD (1 << 0)
-
-struct xsk_socket_config {
-	__u32 rx_size;
-	__u32 tx_size;
-	__u32 libbpf_flags;
-	__u32 xdp_flags;
-	__u16 bind_flags;
-};
-
-/* Set config to NULL to get the default configuration. */
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp")
-int xsk_umem__create(struct xsk_umem **umem,
-		     void *umem_area, __u64 size,
-		     struct xsk_ring_prod *fill,
-		     struct xsk_ring_cons *comp,
-		     const struct xsk_umem_config *config);
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp")
-int xsk_umem__create_v0_0_2(struct xsk_umem **umem,
-			    void *umem_area, __u64 size,
-			    struct xsk_ring_prod *fill,
-			    struct xsk_ring_cons *comp,
-			    const struct xsk_umem_config *config);
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp")
-int xsk_umem__create_v0_0_4(struct xsk_umem **umem,
-			    void *umem_area, __u64 size,
-			    struct xsk_ring_prod *fill,
-			    struct xsk_ring_cons *comp,
-			    const struct xsk_umem_config *config);
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp")
-int xsk_socket__create(struct xsk_socket **xsk,
-		       const char *ifname, __u32 queue_id,
-		       struct xsk_umem *umem,
-		       struct xsk_ring_cons *rx,
-		       struct xsk_ring_prod *tx,
-		       const struct xsk_socket_config *config);
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp")
-int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
-			      const char *ifname,
-			      __u32 queue_id, struct xsk_umem *umem,
-			      struct xsk_ring_cons *rx,
-			      struct xsk_ring_prod *tx,
-			      struct xsk_ring_prod *fill,
-			      struct xsk_ring_cons *comp,
-			      const struct xsk_socket_config *config);
-
-/* Returns 0 for success and -EBUSY if the umem is still in use. */
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp")
-int xsk_umem__delete(struct xsk_umem *umem);
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp")
-void xsk_socket__delete(struct xsk_socket *xsk);
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#endif /* __LIBBPF_XSK_H */
--- a/src/zip.c
+++ b/src/zip.c
@@ -0,0 +1,333 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/*
+ * Routines for dealing with .zip archives.
+ *
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include "libbpf_internal.h"
+#include "zip.h"
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wpacked"
+#pragma GCC diagnostic ignored "-Wattributes"
+
+/* Specification of ZIP file format can be found here:
+ * https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT
+ * For a high level overview of the structure of a ZIP file see
+ * sections 4.3.1 - 4.3.6.
+ *
+ * Data structures appearing in ZIP files do not contain any
+ * padding and they might be misaligned. To allow us to safely
+ * operate on pointers to such structures and their members, we
+ * declare the types as packed.
+ */
+
+#define END_OF_CD_RECORD_MAGIC 0x06054b50
+
+/* See section 4.3.16 of the spec. */
+struct end_of_cd_record {
+	/* Magic value equal to END_OF_CD_RECORD_MAGIC */
+	__u32 magic;
+
+	/* Number of the file containing this structure or 0xFFFF if ZIP64 archive.
+	 * Zip archive might span multiple files (disks).
+	 */
+	__u16 this_disk;
+
+	/* Number of the file containing the beginning of the central directory or
+	 * 0xFFFF if ZIP64 archive.
+	 */
+	__u16 cd_disk;
+
+	/* Number of central directory records on this disk or 0xFFFF if ZIP64
+	 * archive.
+	 */
+	__u16 cd_records;
+
+	/* Number of central directory records on all disks or 0xFFFF if ZIP64
+	 * archive.
+	 */
+	__u16 cd_records_total;
+
+	/* Size of the central directory record or 0xFFFFFFFF if ZIP64 archive. */
+	__u32 cd_size;
+
+	/* Offset of the central directory from the beginning of the archive or
+	 * 0xFFFFFFFF if ZIP64 archive.
+	 */
+	__u32 cd_offset;
+
+	/* Length of comment data following end of central directory record. */
+	__u16 comment_length;
+
+	/* Up to 64k of arbitrary bytes. */
+	/* uint8_t comment[comment_length] */
+} __attribute__((packed));
+
+#define CD_FILE_HEADER_MAGIC 0x02014b50
+#define FLAG_ENCRYPTED (1 << 0)
+#define FLAG_HAS_DATA_DESCRIPTOR (1 << 3)
+
+/* See section 4.3.12 of the spec. */
+struct cd_file_header {
+	/* Magic value equal to CD_FILE_HEADER_MAGIC. */
+	__u32 magic;
+	__u16 version;
+	/* Minimum zip version needed to extract the file. */
+	__u16 min_version;
+	__u16 flags;
+	__u16 compression;
+	__u16 last_modified_time;
+	__u16 last_modified_date;
+	__u32 crc;
+	__u32 compressed_size;
+	__u32 uncompressed_size;
+	__u16 file_name_length;
+	__u16 extra_field_length;
+	__u16 file_comment_length;
+	/* Number of the disk where the file starts or 0xFFFF if ZIP64 archive. */
+	__u16 disk;
+	__u16 internal_attributes;
+	__u32 external_attributes;
+	/* Offset from the start of the disk containing the local file header to the
+	 * start of the local file header.
+	 */
+	__u32 offset;
+} __attribute__((packed));
+
+#define LOCAL_FILE_HEADER_MAGIC 0x04034b50
+
+/* See section 4.3.7 of the spec. */
+struct local_file_header {
+	/* Magic value equal to LOCAL_FILE_HEADER_MAGIC. */
+	__u32 magic;
+	/* Minimum zip version needed to extract the file. */
+	__u16 min_version;
+	__u16 flags;
+	__u16 compression;
+	__u16 last_modified_time;
+	__u16 last_modified_date;
+	__u32 crc;
+	__u32 compressed_size;
+	__u32 uncompressed_size;
+	__u16 file_name_length;
+	__u16 extra_field_length;
+} __attribute__((packed));
+
+#pragma GCC diagnostic pop
+
+struct zip_archive {
+	void *data;
+	__u32 size;
+	__u32 cd_offset;
+	__u32 cd_records;
+};
+
+static void *check_access(struct zip_archive *archive, __u32 offset, __u32 size)
+{
+	if (offset + size > archive->size || offset > offset + size)
+		return NULL;
+
+	return archive->data + offset;
+}
+
+/* Returns 0 on success, -EINVAL on error and -ENOTSUP if the eocd indicates the
+ * archive uses features which are not supported.
+ */
+static int try_parse_end_of_cd(struct zip_archive *archive, __u32 offset)
+{
+	__u16 comment_length, cd_records;
+	struct end_of_cd_record *eocd;
+	__u32 cd_offset, cd_size;
+
+	eocd = check_access(archive, offset, sizeof(*eocd));
+	if (!eocd || eocd->magic != END_OF_CD_RECORD_MAGIC)
+		return -EINVAL;
+
+	comment_length = eocd->comment_length;
+	if (offset + sizeof(*eocd) + comment_length != archive->size)
+		return -EINVAL;
+
+	cd_records = eocd->cd_records;
+	if (eocd->this_disk != 0 || eocd->cd_disk != 0 || eocd->cd_records_total != cd_records)
+		/* This is a valid eocd, but we only support single-file non-ZIP64 archives. */
+		return -ENOTSUP;
+
+	cd_offset = eocd->cd_offset;
+	cd_size = eocd->cd_size;
+	if (!check_access(archive, cd_offset, cd_size))
+		return -EINVAL;
+
+	archive->cd_offset = cd_offset;
+	archive->cd_records = cd_records;
+	return 0;
+}
+
+static int find_cd(struct zip_archive *archive)
+{
+	int64_t limit, offset;
+	int rc = -EINVAL;
+
+	if (archive->size <= sizeof(struct end_of_cd_record))
+		return -EINVAL;
+
+	/* Because the end of central directory ends with a variable length array of
+	 * up to 0xFFFF bytes we can't know exactly where it starts and need to
+	 * search for it at the end of the file, scanning the (limit, offset] range.
+	 */
+	offset = archive->size - sizeof(struct end_of_cd_record);
+	limit = (int64_t)offset - (1 << 16);
+
+	for (; offset >= 0 && offset > limit && rc != 0; offset--) {
+		rc = try_parse_end_of_cd(archive, offset);
+		if (rc == -ENOTSUP)
+			break;
+	}
+	return rc;
+}
+
+struct zip_archive *zip_archive_open(const char *path)
+{
+	struct zip_archive *archive;
+	int err, fd;
+	off_t size;
+	void *data;
+
+	fd = open(path, O_RDONLY | O_CLOEXEC);
+	if (fd < 0)
+		return ERR_PTR(-errno);
+
+	size = lseek(fd, 0, SEEK_END);
+	if (size == (off_t)-1 || size > UINT32_MAX) {
+		close(fd);
+		return ERR_PTR(-EINVAL);
+	}
+
+	data = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
+	err = -errno;
+	close(fd);
+
+	if (data == MAP_FAILED)
+		return ERR_PTR(err);
+
+	archive = malloc(sizeof(*archive));
+	if (!archive) {
+		munmap(data, size);
+		return ERR_PTR(-ENOMEM);
+	};
+
+	archive->data = data;
+	archive->size = size;
+
+	err = find_cd(archive);
+	if (err) {
+		munmap(data, size);
+		free(archive);
+		return ERR_PTR(err);
+	}
+
+	return archive;
+}
+
+void zip_archive_close(struct zip_archive *archive)
+{
+	munmap(archive->data, archive->size);
+	free(archive);
+}
+
+static struct local_file_header *local_file_header_at_offset(struct zip_archive *archive,
+							     __u32 offset)
+{
+	struct local_file_header *lfh;
+
+	lfh = check_access(archive, offset, sizeof(*lfh));
+	if (!lfh || lfh->magic != LOCAL_FILE_HEADER_MAGIC)
+		return NULL;
+
+	return lfh;
+}
+
+static int get_entry_at_offset(struct zip_archive *archive, __u32 offset, struct zip_entry *out)
+{
+	struct local_file_header *lfh;
+	__u32 compressed_size;
+	const char *name;
+	void *data;
+
+	lfh = local_file_header_at_offset(archive, offset);
+	if (!lfh)
+		return -EINVAL;
+
+	offset += sizeof(*lfh);
+	if ((lfh->flags & FLAG_ENCRYPTED) || (lfh->flags & FLAG_HAS_DATA_DESCRIPTOR))
+		return -EINVAL;
+
+	name = check_access(archive, offset, lfh->file_name_length);
+	if (!name)
+		return -EINVAL;
+
+	offset += lfh->file_name_length;
+	if (!check_access(archive, offset, lfh->extra_field_length))
+		return -EINVAL;
+
+	offset += lfh->extra_field_length;
+	compressed_size = lfh->compressed_size;
+	data = check_access(archive, offset, compressed_size);
+	if (!data)
+		return -EINVAL;
+
+	out->compression = lfh->compression;
+	out->name_length = lfh->file_name_length;
+	out->name = name;
+	out->data = data;
+	out->data_length = compressed_size;
+	out->data_offset = offset;
+
+	return 0;
+}
+
+int zip_archive_find_entry(struct zip_archive *archive, const char *file_name,
+			   struct zip_entry *out)
+{
+	size_t file_name_length = strlen(file_name);
+	__u32 i, offset = archive->cd_offset;
+
+	for (i = 0; i < archive->cd_records; ++i) {
+		__u16 cdfh_name_length, cdfh_flags;
+		struct cd_file_header *cdfh;
+		const char *cdfh_name;
+
+		cdfh = check_access(archive, offset, sizeof(*cdfh));
+		if (!cdfh || cdfh->magic != CD_FILE_HEADER_MAGIC)
+			return -EINVAL;
+
+		offset += sizeof(*cdfh);
+		cdfh_name_length = cdfh->file_name_length;
+		cdfh_name = check_access(archive, offset, cdfh_name_length);
+		if (!cdfh_name)
+			return -EINVAL;
+
+		cdfh_flags = cdfh->flags;
+		if ((cdfh_flags & FLAG_ENCRYPTED) == 0 &&
+		    (cdfh_flags & FLAG_HAS_DATA_DESCRIPTOR) == 0 &&
+		    file_name_length == cdfh_name_length &&
+		    memcmp(file_name, archive->data + offset, file_name_length) == 0) {
+			return get_entry_at_offset(archive, cdfh->offset, out);
+		}
+
+		offset += cdfh_name_length;
+		offset += cdfh->extra_field_length;
+		offset += cdfh->file_comment_length;
+	}
+
+	return -ENOENT;
+}
--- a/src/zip.h
+++ b/src/zip.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+#ifndef __LIBBPF_ZIP_H
+#define __LIBBPF_ZIP_H
+
+#include <linux/types.h>
+
+/* Represents an open zip archive.
+ * Only basic ZIP files are supported, in particular the following are not
+ * supported:
+ * - encryption
+ * - streaming
+ * - multi-part ZIP files
+ * - ZIP64
+ */
+struct zip_archive;
+
+/* Carries information on name, compression method, and data corresponding to a
+ * file in a zip archive.
+ */
+struct zip_entry {
+	/* Compression method as defined in pkzip spec. 0 means data is uncompressed. */
+	__u16 compression;
+
+	/* Non-null terminated name of the file. */
+	const char *name;
+	/* Length of the file name. */
+	__u16 name_length;
+
+	/* Pointer to the file data. */
+	const void *data;
+	/* Length of the file data. */
+	__u32 data_length;
+	/* Offset of the file data within the archive. */
+	__u32 data_offset;
+};
+
+/* Open a zip archive. Returns NULL in case of an error. */
+struct zip_archive *zip_archive_open(const char *path);
+
+/* Close a zip archive and release resources. */
+void zip_archive_close(struct zip_archive *archive);
+
+/* Look up an entry corresponding to a file in given zip archive. */
+int zip_archive_find_entry(struct zip_archive *archive, const char *name, struct zip_entry *out);
+
+#endif
--- a/Show More
+++ b/Show More