Merge branch 'android14-5.15' into arpi-5.15.92

2023-02-27 20:11:53 +09:00
parent 14b35093ca 8705a3d943
commit 670c6da635
1820 changed files with 134442 additions and 22152 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -45,6 +45,7 @@
 *.symversions
 *.tab.[ch]
 *.tar
+*.usyms
 *.xz
 *.zst
 Module.symvers
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -0,0 +1,566 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2021 The Android Open Source Project
+load("//build/bazel_common_rules/dist:dist.bzl", "copy_to_dist_dir")
+load("//build/kernel/kleaf:common_kernels.bzl", "define_common_kernels", "define_db845c")
+load(
+    "//build/kernel/kleaf:kernel.bzl",
+    "ddk_headers",
+    "kernel_abi",
+    "kernel_build",
+    "kernel_images",
+    "kernel_modules_install",
+    "kernel_unstripped_modules_archive",
+)
+load(":modules.bzl", "COMMON_GKI_MODULES_LIST")
+
+package(
+    default_visibility = [
+        "//visibility:public",
+    ],
+)
+
+_aarch64_additional_kmi_symbol_lists = [
+    # keep sorted
+    "android/abi_gki_aarch64_db845c",
+    "android/abi_gki_aarch64_exynos",
+    "android/abi_gki_aarch64_pixel",
+    "android/abi_gki_aarch64_virtual_device",
+]
+
+define_common_kernels(target_configs = {
+    "kernel_aarch64": {
+        # TODO(b/188620248): re-enable trimming
+        "trim_nonlisted_kmi": False,
+        "kmi_symbol_list_strict_mode": False,
+        "module_implicit_outs": COMMON_GKI_MODULES_LIST,
+        "kmi_symbol_list": "android/abi_gki_aarch64",
+        "additional_kmi_symbol_lists": _aarch64_additional_kmi_symbol_lists,
+    },
+    "kernel_aarch64_16k": {
+        "module_implicit_outs": COMMON_GKI_MODULES_LIST,
+    },
+    "kernel_aarch64_debug": {
+        # TODO(b/188620248): re-enable trimming
+        "trim_nonlisted_kmi": False,
+        "kmi_symbol_list_strict_mode": False,
+        "module_implicit_outs": COMMON_GKI_MODULES_LIST,
+        "kmi_symbol_list": "android/abi_gki_aarch64",
+        "additional_kmi_symbol_lists": _aarch64_additional_kmi_symbol_lists,
+    },
+    "kernel_x86_64": {
+        "kmi_symbol_list_strict_mode": False,
+        "module_implicit_outs": COMMON_GKI_MODULES_LIST,
+    },
+    "kernel_x86_64_debug": {
+        "kmi_symbol_list_strict_mode": False,
+        "module_implicit_outs": COMMON_GKI_MODULES_LIST,
+    },
+})
+
+define_db845c(
+    name = "db845c",
+    outs = [
+        "arch/arm64/boot/dts/qcom/qrb5165-rb5.dtb",
+        "arch/arm64/boot/dts/qcom/sdm845-db845c.dtb",
+    ],
+    define_abi_targets = True,
+    kmi_symbol_list = "//common:android/abi_gki_aarch64_db845c",
+    kmi_symbol_list_add_only = True,
+    module_outs = [
+        # keep sorted
+        "crypto/michael_mic.ko",
+        "drivers/base/regmap/regmap-sdw.ko",
+        "drivers/base/regmap/regmap-slimbus.ko",
+        "drivers/bus/mhi/core/mhi.ko",
+        "drivers/clk/qcom/clk-qcom.ko",
+        "drivers/clk/qcom/clk-rpmh.ko",
+        "drivers/clk/qcom/clk-spmi-pmic-div.ko",
+        "drivers/clk/qcom/dispcc-sdm845.ko",
+        "drivers/clk/qcom/dispcc-sm8250.ko",
+        "drivers/clk/qcom/gcc-sdm845.ko",
+        "drivers/clk/qcom/gcc-sm8250.ko",
+        "drivers/clk/qcom/gpucc-sdm845.ko",
+        "drivers/clk/qcom/gpucc-sm8250.ko",
+        "drivers/clk/qcom/lpass-gfm-sm8250.ko",
+        "drivers/clk/qcom/videocc-sdm845.ko",
+        "drivers/clk/qcom/videocc-sm8250.ko",
+        "drivers/cpufreq/qcom-cpufreq-hw.ko",
+        "drivers/dma-buf/heaps/system_heap.ko",
+        "drivers/dma/qcom/bam_dma.ko",
+        "drivers/extcon/extcon-usb-gpio.ko",
+        "drivers/firmware/qcom-scm.ko",
+        "drivers/gpio/gpio-wcd934x.ko",
+        "drivers/gpu/drm/bridge/display-connector.ko",
+        "drivers/gpu/drm/bridge/lontium-lt9611.ko",
+        "drivers/gpu/drm/bridge/lontium-lt9611uxc.ko",
+        "drivers/gpu/drm/msm/msm.ko",
+        "drivers/gpu/drm/scheduler/gpu-sched.ko",
+        "drivers/hwspinlock/qcom_hwspinlock.ko",
+        "drivers/i2c/busses/i2c-designware-core.ko",
+        "drivers/i2c/busses/i2c-designware-platform.ko",
+        "drivers/i2c/busses/i2c-qcom-geni.ko",
+        "drivers/i2c/busses/i2c-qup.ko",
+        "drivers/i2c/busses/i2c-rk3x.ko",
+        "drivers/i2c/i2c-dev.ko",
+        "drivers/i2c/i2c-mux.ko",
+        "drivers/i2c/muxes/i2c-mux-pca954x.ko",
+        "drivers/iio/adc/qcom-spmi-adc5.ko",
+        "drivers/iio/adc/qcom-vadc-common.ko",
+        "drivers/input/misc/pm8941-pwrkey.ko",
+        "drivers/interconnect/qcom/icc-bcm-voter.ko",
+        "drivers/interconnect/qcom/icc-osm-l3.ko",
+        "drivers/interconnect/qcom/icc-rpmh.ko",
+        "drivers/interconnect/qcom/qnoc-sdm845.ko",
+        "drivers/interconnect/qcom/qnoc-sm8250.ko",
+        "drivers/iommu/arm/arm-smmu/arm_smmu.ko",
+        "drivers/irqchip/qcom-pdc.ko",
+        "drivers/leds/led-class-multicolor.ko",
+        "drivers/mailbox/qcom-apcs-ipc-mailbox.ko",
+        "drivers/mailbox/qcom-ipcc.ko",
+        "drivers/mfd/qcom-spmi-pmic.ko",
+        "drivers/mfd/wcd934x.ko",
+        "drivers/misc/fastrpc.ko",
+        "drivers/mmc/host/cqhci.ko",
+        "drivers/mmc/host/sdhci-msm.ko",
+        "drivers/net/can/spi/mcp251xfd/mcp251xfd.ko",
+        "drivers/net/wireless/ath/ath.ko",
+        "drivers/net/wireless/ath/ath10k/ath10k_core.ko",
+        "drivers/net/wireless/ath/ath10k/ath10k_pci.ko",
+        "drivers/net/wireless/ath/ath10k/ath10k_snoc.ko",
+        "drivers/net/wireless/ath/ath11k/ath11k.ko",
+        "drivers/net/wireless/ath/ath11k/ath11k_ahb.ko",
+        "drivers/net/wireless/ath/ath11k/ath11k_pci.ko",
+        "drivers/nvmem/nvmem_qfprom.ko",
+        "drivers/phy/qualcomm/phy-qcom-qmp.ko",
+        "drivers/phy/qualcomm/phy-qcom-qusb2.ko",
+        "drivers/phy/qualcomm/phy-qcom-snps-femto-v2.ko",
+        "drivers/phy/qualcomm/phy-qcom-usb-hs.ko",
+        "drivers/pinctrl/qcom/pinctrl-lpass-lpi.ko",
+        "drivers/pinctrl/qcom/pinctrl-msm.ko",
+        "drivers/pinctrl/qcom/pinctrl-sdm845.ko",
+        "drivers/pinctrl/qcom/pinctrl-sm8250.ko",
+        "drivers/pinctrl/qcom/pinctrl-spmi-gpio.ko",
+        "drivers/pinctrl/qcom/pinctrl-spmi-mpp.ko",
+        "drivers/power/reset/qcom-pon.ko",
+        "drivers/power/reset/reboot-mode.ko",
+        "drivers/power/reset/syscon-reboot-mode.ko",
+        "drivers/regulator/gpio-regulator.ko",
+        "drivers/regulator/qcom-rpmh-regulator.ko",
+        "drivers/regulator/qcom_spmi-regulator.ko",
+        "drivers/regulator/qcom_usb_vbus-regulator.ko",
+        "drivers/remoteproc/qcom_common.ko",
+        "drivers/remoteproc/qcom_pil_info.ko",
+        "drivers/remoteproc/qcom_q6v5.ko",
+        "drivers/remoteproc/qcom_q6v5_adsp.ko",
+        "drivers/remoteproc/qcom_q6v5_mss.ko",
+        "drivers/remoteproc/qcom_q6v5_pas.ko",
+        "drivers/remoteproc/qcom_q6v5_wcss.ko",
+        "drivers/remoteproc/qcom_sysmon.ko",
+        "drivers/reset/reset-qcom-aoss.ko",
+        "drivers/reset/reset-qcom-pdc.ko",
+        "drivers/rpmsg/qcom_glink.ko",
+        "drivers/rpmsg/qcom_glink_rpm.ko",
+        "drivers/rpmsg/qcom_glink_smem.ko",
+        "drivers/rpmsg/qcom_smd.ko",
+        "drivers/rpmsg/rpmsg_ns.ko",
+        "drivers/rtc/rtc-pm8xxx.ko",
+        "drivers/slimbus/slim-qcom-ngd-ctrl.ko",
+        "drivers/slimbus/slimbus.ko",
+        "drivers/soc/qcom/apr.ko",
+        "drivers/soc/qcom/cmd-db.ko",
+        "drivers/soc/qcom/llcc-qcom.ko",
+        "drivers/soc/qcom/mdt_loader.ko",
+        "drivers/soc/qcom/pdr_interface.ko",
+        "drivers/soc/qcom/qcom_aoss.ko",
+        "drivers/soc/qcom/qcom_rpmh.ko",
+        "drivers/soc/qcom/qmi_helpers.ko",
+        "drivers/soc/qcom/rmtfs_mem.ko",
+        "drivers/soc/qcom/rpmhpd.ko",
+        "drivers/soc/qcom/smem.ko",
+        "drivers/soc/qcom/smp2p.ko",
+        "drivers/soc/qcom/smsm.ko",
+        "drivers/soc/qcom/socinfo.ko",
+        "drivers/soundwire/soundwire-bus.ko",
+        "drivers/soundwire/soundwire-qcom.ko",
+        "drivers/spi/spi-geni-qcom.ko",
+        "drivers/spi/spi-pl022.ko",
+        "drivers/spi/spi-qcom-qspi.ko",
+        "drivers/spi/spi-qup.ko",
+        "drivers/spmi/spmi-pmic-arb.ko",
+        "drivers/thermal/qcom/lmh.ko",
+        "drivers/thermal/qcom/qcom-spmi-adc-tm5.ko",
+        "drivers/thermal/qcom/qcom-spmi-temp-alarm.ko",
+        "drivers/thermal/qcom/qcom_tsens.ko",
+        "drivers/tty/serial/msm_serial.ko",
+        "drivers/ufs/host/ufs_qcom.ko",
+        "drivers/usb/common/ulpi.ko",
+        "drivers/usb/host/ohci-hcd.ko",
+        "drivers/usb/host/ohci-pci.ko",
+        "drivers/usb/host/ohci-platform.ko",
+        "drivers/usb/typec/qcom-pmic-typec.ko",
+        "drivers/watchdog/pm8916_wdt.ko",
+        "drivers/watchdog/qcom-wdt.ko",
+        "net/qrtr/ns.ko",
+        "net/qrtr/qrtr.ko",
+        "net/qrtr/qrtr-mhi.ko",
+        "net/qrtr/qrtr-smd.ko",
+        "net/qrtr/qrtr-tun.ko",
+        "sound/soc/codecs/snd-soc-dmic.ko",
+        "sound/soc/codecs/snd-soc-hdmi-codec.ko",
+        "sound/soc/codecs/snd-soc-lpass-va-macro.ko",
+        "sound/soc/codecs/snd-soc-lpass-wsa-macro.ko",
+        "sound/soc/codecs/snd-soc-max98927.ko",
+        "sound/soc/codecs/snd-soc-rl6231.ko",
+        "sound/soc/codecs/snd-soc-rt5663.ko",
+        "sound/soc/codecs/snd-soc-wcd-mbhc.ko",
+        "sound/soc/codecs/snd-soc-wcd9335.ko",
+        "sound/soc/codecs/snd-soc-wcd934x.ko",
+        "sound/soc/codecs/snd-soc-wsa881x.ko",
+        "sound/soc/qcom/qdsp6/q6adm.ko",
+        "sound/soc/qcom/qdsp6/q6afe.ko",
+        "sound/soc/qcom/qdsp6/q6afe-clocks.ko",
+        "sound/soc/qcom/qdsp6/q6afe-dai.ko",
+        "sound/soc/qcom/qdsp6/q6asm.ko",
+        "sound/soc/qcom/qdsp6/q6asm-dai.ko",
+        "sound/soc/qcom/qdsp6/q6core.ko",
+        "sound/soc/qcom/qdsp6/q6dsp-common.ko",
+        "sound/soc/qcom/qdsp6/q6routing.ko",
+        "sound/soc/qcom/snd-soc-qcom-common.ko",
+        "sound/soc/qcom/snd-soc-sdm845.ko",
+        "sound/soc/qcom/snd-soc-sm8250.ko",
+    ],
+)
+
+# TODO(b/258259749): Convert rockpi4 to mixed build
+kernel_build(
+    name = "rockpi4",
+    outs = [
+        "Image",
+        "System.map",
+        "modules.builtin",
+        "modules.builtin.modinfo",
+        "rk3399-rock-pi-4b.dtb",
+        "vmlinux",
+        "vmlinux.symvers",
+    ],
+    build_config = "build.config.rockpi4",
+    collect_unstripped_modules = True,
+    kmi_symbol_list = "//common:android/abi_gki_rockpi4",
+    module_outs = COMMON_GKI_MODULES_LIST + [
+        # keep sorted
+        "drivers/block/virtio_blk.ko",
+        "drivers/char/hw_random/virtio-rng.ko",
+        "drivers/clk/clk-rk808.ko",
+        "drivers/cpufreq/cpufreq-dt.ko",
+        "drivers/dma/pl330.ko",
+        "drivers/gpu/drm/bridge/analogix/analogix_dp.ko",
+        "drivers/gpu/drm/bridge/synopsys/dw-hdmi.ko",
+        "drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.ko",
+        "drivers/gpu/drm/rockchip/rockchipdrm.ko",
+        "drivers/i2c/busses/i2c-rk3x.ko",
+        "drivers/iio/adc/rockchip_saradc.ko",
+        "drivers/iio/buffer/industrialio-triggered-buffer.ko",
+        "drivers/iio/buffer/kfifo_buf.ko",
+        "drivers/mfd/rk808.ko",
+        "drivers/mmc/core/pwrseq_simple.ko",
+        "drivers/mmc/host/cqhci.ko",
+        "drivers/mmc/host/dw_mmc.ko",
+        "drivers/mmc/host/dw_mmc-pltfm.ko",
+        "drivers/mmc/host/dw_mmc-rockchip.ko",
+        "drivers/mmc/host/sdhci-of-arasan.ko",
+        "drivers/net/ethernet/stmicro/stmmac/dwmac-rk.ko",
+        "drivers/net/ethernet/stmicro/stmmac/stmmac.ko",
+        "drivers/net/ethernet/stmicro/stmmac/stmmac-platform.ko",
+        "drivers/net/net_failover.ko",
+        "drivers/net/pcs/pcs_xpcs.ko",
+        "drivers/net/virtio_net.ko",
+        "drivers/nvmem/nvmem_rockchip_efuse.ko",
+        "drivers/pci/controller/pcie-rockchip-host.ko",
+        "drivers/phy/rockchip/phy-rockchip-emmc.ko",
+        "drivers/phy/rockchip/phy-rockchip-inno-usb2.ko",
+        "drivers/phy/rockchip/phy-rockchip-pcie.ko",
+        "drivers/phy/rockchip/phy-rockchip-typec.ko",
+        "drivers/pwm/pwm-rockchip.ko",
+        "drivers/regulator/fan53555.ko",
+        "drivers/regulator/pwm-regulator.ko",
+        "drivers/regulator/rk808-regulator.ko",
+        "drivers/rtc/rtc-rk808.ko",
+        "drivers/soc/rockchip/io-domain.ko",
+        "drivers/thermal/rockchip_thermal.ko",
+        "drivers/usb/host/ohci-hcd.ko",
+        "drivers/usb/host/ohci-platform.ko",
+        "drivers/virtio/virtio_pci.ko",
+        "drivers/virtio/virtio_pci_modern_dev.ko",
+        "drivers/watchdog/dw_wdt.ko",
+        "net/core/failover.ko",
+    ],
+)
+
+kernel_abi(
+    name = "rockpi4_abi",
+    kernel_build = "//common:rockpi4",
+    kmi_symbol_list_add_only = True,
+)
+
+kernel_modules_install(
+    name = "rockpi4_modules_install",
+    kernel_build = "//common:rockpi4",
+)
+
+kernel_unstripped_modules_archive(
+    name = "rockpi4_unstripped_modules_archive",
+    kernel_build = ":rockpi4",
+)
+
+kernel_images(
+    name = "rockpi4_images",
+    build_initramfs = True,
+    kernel_build = "//common:rockpi4",
+    kernel_modules_install = "//common:rockpi4_modules_install",
+)
+
+copy_to_dist_dir(
+    name = "rockpi4_dist",
+    data = [
+        ":rockpi4",
+        ":rockpi4_images",
+        ":rockpi4_modules_install",
+        ":rockpi4_unstripped_modules_archive",
+    ],
+    dist_dir = "out/rockpi4/dist",
+    flat = True,
+)
+
+kernel_build(
+    name = "fips140",
+    outs = [],
+    base_kernel = ":kernel_aarch64",
+    build_config = "build.config.gki.aarch64.fips140",
+    module_outs = ["crypto/fips140.ko"],
+)
+
+copy_to_dist_dir(
+    name = "fips140_dist",
+    data = [
+        ":fips140",
+    ],
+    dist_dir = "out/fips140/dist",
+    flat = True,
+)
+
+# allmodconfig build tests.
+# These are build tests only, so:
+# - outs are intentionally set to empty to not copy anything to DIST_DIR
+# - --allow-undeclared-modules must be used so modules are not declared or copied.
+# - No dist target because these are build tests. We don't care about the artifacts.
+
+# tools/bazel build --allow_undeclared_modules //common:kernel_aarch64_allmodconfig
+kernel_build(
+    name = "kernel_aarch64_allmodconfig",
+    # Hack to actually check the build.
+    # Otherwise, Bazel thinks that there are no output files, and skip building.
+    outs = [".config"],
+    build_config = "build.config.allmodconfig.aarch64",
+    visibility = ["//visibility:private"],
+)
+
+# tools/bazel build --allow_undeclared_modules //common:kernel_x86_64_allmodconfig
+kernel_build(
+    name = "kernel_x86_64_allmodconfig",
+    # Hack to actually check the build.
+    # Otherwise, Bazel thinks that there are no output files, and skip building.
+    outs = [".config"],
+    build_config = "build.config.allmodconfig.x86_64",
+    visibility = ["//visibility:private"],
+)
+
+# tools/bazel build --allow_undeclared_modules //common:kernel_arm_allmodconfig
+kernel_build(
+    name = "kernel_arm_allmodconfig",
+    # Hack to actually check the build.
+    # Otherwise, Bazel thinks that there are no output files, and skip building.
+    outs = [".config"],
+    build_config = "build.config.allmodconfig.arm",
+    visibility = ["//visibility:private"],
+)
+
+# DDK Headers
+# All headers. These are the public targets for DDK modules to use.
+alias(
+    name = "all_headers",
+    actual = "all_headers_aarch64",
+    visibility = ["//visibility:public"],
+)
+
+ddk_headers(
+    name = "all_headers_aarch64",
+    hdrs = [":all_headers_allowlist_aarch64"] + select({
+        "//build/kernel/kleaf:allow_ddk_unsafe_headers_set": [":all_headers_unsafe"],
+        "//conditions:default": [],
+    }),
+    visibility = ["//visibility:public"],
+)
+
+ddk_headers(
+    name = "all_headers_arm",
+    hdrs = [":all_headers_allowlist_arm"] + select({
+        "//build/kernel/kleaf:allow_ddk_unsafe_headers_set": [":all_headers_unsafe"],
+        "//conditions:default": [],
+    }),
+    visibility = ["//visibility:public"],
+)
+
+ddk_headers(
+    name = "all_headers_x86_64",
+    hdrs = [":all_headers_allowlist_x86_64"] + select({
+        "//build/kernel/kleaf:allow_ddk_unsafe_headers_set": [":all_headers_unsafe"],
+        "//conditions:default": [],
+    }),
+    visibility = ["//visibility:public"],
+)
+
+# Implementation details for DDK headers. The targets below cannot be directly
+# depended on by DDK modules.
+
+# DDK headers allowlist. This is the list of all headers and include
+# directories that are safe to use in DDK modules.
+ddk_headers(
+    name = "all_headers_allowlist_aarch64",
+    hdrs = [
+        ":all_headers_allowlist_aarch64_globs",
+        ":all_headers_allowlist_common_globs",
+    ],
+    # The list of include directories where source files can #include headers
+    # from. In other words, these are the `-I` option to the C compiler.
+    # These are prepended to LINUXINCLUDE.
+    linux_includes = [
+        "arch/arm64/include",
+        "arch/arm64/include/uapi",
+        "include",
+        "include/uapi",
+    ],
+    visibility = ["//visibility:private"],
+)
+
+ddk_headers(
+    name = "all_headers_allowlist_arm",
+    hdrs = [
+        ":all_headers_allowlist_arm_globs",
+        ":all_headers_allowlist_common_globs",
+    ],
+    # The list of include directories where source files can #include headers
+    # from. In other words, these are the `-I` option to the C compiler.
+    # These are prepended to LINUXINCLUDE.
+    linux_includes = [
+        "arch/arm/include",
+        "arch/arm/include/uapi",
+        "include",
+        "include/uapi",
+    ],
+    visibility = ["//visibility:private"],
+)
+
+ddk_headers(
+    name = "all_headers_allowlist_x86_64",
+    hdrs = [
+        ":all_headers_allowlist_common_globs",
+        ":all_headers_allowlist_x86_64_globs",
+    ],
+    # The list of include directories where source files can #include headers
+    # from. In other words, these are the `-I` option to the C compiler.
+    # These are prepended to LINUXINCLUDE.
+    linux_includes = [
+        "arch/x86/include",
+        "arch/x86/include/uapi",
+        "include",
+        "include/uapi",
+    ],
+    visibility = ["//visibility:private"],
+)
+
+# List of DDK headers allowlist that are glob()-ed to avoid changes of BUILD
+# file when the list of files changes. All headers in these directories
+# are safe to use.
+# These are separate filegroup targets so the all_headers_allowlist_* are
+# more friendly to batch BUILD file update tools like buildozer.
+
+# globs() for arm only
+filegroup(
+    name = "all_headers_allowlist_arm_globs",
+    srcs = glob(["arch/arm/include/**/*.h"]),
+    visibility = ["//visibility:private"],
+)
+
+# globs() for arm64 only
+filegroup(
+    name = "all_headers_allowlist_aarch64_globs",
+    srcs = glob(["arch/arm64/include/**/*.h"]),
+    visibility = ["//visibility:private"],
+)
+
+# globs() for x86 only
+filegroup(
+    name = "all_headers_allowlist_x86_64_globs",
+    srcs = glob(["arch/x86/include/**/*.h"]),
+    visibility = ["//visibility:private"],
+)
+
+# globs() for all architectures
+filegroup(
+    name = "all_headers_allowlist_common_globs",
+    srcs = glob(["include/**/*.h"]),
+    visibility = ["//visibility:private"],
+)
+
+# DDK headers unsafe list. This is the list of all headers and include
+# directories that may be used during migration from kernel_module's, but
+# should be avoided in general.
+# Use with caution; items may:
+# - be removed without notice
+# - be moved into all_headers
+ddk_headers(
+    name = "all_headers_unsafe",
+    hdrs = [
+        "drivers/devfreq/governor.h",
+        "drivers/dma-buf/heaps/deferred-free-helper.h",
+        "drivers/dma-buf/heaps/page_pool.h",
+        "drivers/dma/dmaengine.h",
+        "drivers/pci/controller/dwc/pcie-designware.h",
+        "drivers/pinctrl/core.h",
+        "drivers/pinctrl/samsung/pinctrl-samsung.h",
+        "drivers/staging/android/debug_kinfo.h",
+        "drivers/thermal/thermal_core.h",
+        "drivers/thermal/thermal_netlink.h",
+        "drivers/usb/core/phy.h",
+        "drivers/usb/dwc3/core.h",
+        "drivers/usb/dwc3/debug.h",
+        "drivers/usb/dwc3/gadget.h",
+        "drivers/usb/dwc3/io.h",
+        "drivers/usb/dwc3/trace.h",
+        "drivers/usb/gadget/configfs.h",
+        "drivers/usb/gadget/function/u_serial.h",
+        "drivers/usb/host/pci-quirks.h",
+        "drivers/usb/host/xhci.h",
+        "drivers/usb/host/xhci-ext-caps.h",
+        "drivers/usb/host/xhci-mvebu.h",
+        "drivers/usb/host/xhci-plat.h",
+        "drivers/usb/host/xhci-rcar.h",
+        "drivers/usb/typec/tcpm/tcpci.h",
+    ],
+    # The list of include directories where source files can #include headers
+    # from. In other words, these are the `-I` option to the C compiler.
+    # Unsafe include directories are appended to ccflags-y.
+    includes = [
+        "drivers/devfreq",
+        "drivers/dma",
+        "drivers/dma-buf",
+        "drivers/pci/controller/dwc",
+        "drivers/pinctrl",
+        "drivers/scsi/ufs",
+        "drivers/thermal",
+        "drivers/usb",
+        "drivers/usb/gadget/function",
+        "drivers/usb/typec",
+    ],
+    visibility = ["//visibility:private"],
+)
--- a/Documentation/ABI/stable/sysfs-module
+++ b/Documentation/ABI/stable/sysfs-module
@@ -32,3 +32,21 @@ Description:
 		Note: If the module is built into the kernel, or if the
 		CONFIG_MODULE_UNLOAD kernel configuration value is not enabled,
 		this file will not be present.
+
+What:		/sys/module/MODULENAME/scmversion
+Date:		November 2020
+KernelVersion:	Android Common Kernel -- android12-5.10+
+Contact:	Will McVicker <willmcvicker@google.com>
+Description:	This read-only file will appear if modpost was supplied with an
+		SCM version for the module. It can be enabled with the config
+		MODULE_SCMVERSION. The SCM version is retrieved by
+		scripts/setlocalversion, which means that the presence of this
+		file depends on CONFIG_LOCALVERSION_AUTO=y. When read, the SCM
+		version that the module was compiled with is returned. The SCM
+		version is returned in the following format::
+
+		===
+		Git:		g[a-f0-9]\+(-dirty)\?
+		Mercurial:	hg[a-f0-9]\+(-dirty)\?
+		Subversion:	svn[0-9]\+
+		===
--- a/Documentation/ABI/testing/OWNERS
+++ b/Documentation/ABI/testing/OWNERS
@@ -0,0 +1 @@
+per-file sysfs-fs-f2fs=file:/fs/f2fs/OWNERS
--- a/Documentation/ABI/testing/configfs-usb-gadget-uvc
+++ b/Documentation/ABI/testing/configfs-usb-gadget-uvc
@@ -7,6 +7,7 @@ Description:	UVC function directory
 		streaming_maxburst	0..15 (ss only)
 		streaming_maxpacket	1..1023 (fs), 1..3072 (hs/ss)
 		streaming_interval	1..16
+		function_name		string [32]
 		===================	=============================

 What:		/config/usb-gadget/gadget/functions/uvc.name/control
@@ -196,7 +197,7 @@ Description:	Specific MJPEG format descriptors
 					read-only
 		bmaControls		this format's data for bmaControls in
 					the streaming header
-		bmInterfaceFlags	specifies interlace information,
+		bmInterlaceFlags	specifies interlace information,
 					read-only
 		bAspectRatioY		the X dimension of the picture aspect
 					ratio, read-only
@@ -252,7 +253,7 @@ Description:	Specific uncompressed format descriptors
 					read-only
 		bmaControls		this format's data for bmaControls in
 					the streaming header
-		bmInterfaceFlags	specifies interlace information,
+		bmInterlaceFlags	specifies interlace information,
 					read-only
 		bAspectRatioY		the X dimension of the picture aspect
 					ratio, read-only
--- a/Documentation/ABI/testing/dm-bow
+++ b/Documentation/ABI/testing/dm-bow
@@ -0,0 +1,19 @@
+What:		/sys/block/dm-<num>/bow/free
+Date:		January 2023
+KernelVersion:	5.15
+Contact:	paullawrence@google.com
+Description:	free space
+		Free space on device in bytes. Only valid in state 0
+Users:		Android vold to determine if there is sufficient space for expected size
+		of checksum
+
+What:		/sys/block/dm-<num>/bow/state
+Date:		January 2023
+KernelVersion:	5.15
+Contact:	paullawrence@google.com
+Description:	dm-bow state
+		Read-write string containing 0, 1 or 2
+		0: Trim mode
+		1: Checkpoint mode
+		2: Committed mode
+		See Documentation/device-mapper/dm-bow for details
--- a/Documentation/ABI/testing/sysfs-driver-typec-displayport
+++ b/Documentation/ABI/testing/sysfs-driver-typec-displayport
@@ -47,3 +47,18 @@ Description:
 		USB SuperSpeed protocol. From user perspective pin assignments C
 		and E are equal, where all channels on the connector are used
 		for carrying DisplayPort protocol (allowing higher resolutions).
+
+What:		/sys/bus/typec/devices/.../displayport/hpd
+Date:		Dec 2022
+Contact:	Badhri Jagan Sridharan <badhri@google.com>
+Description:
+		VESA DisplayPort Alt Mode on USB Type-C Standard defines how
+		HotPlugDetect(HPD) shall be supported on the USB-C connector when
+		operating in DisplayPort Alt Mode. This is a read only node which
+		reflects the current state of HPD.
+
+		Valid values:
+			- 1: when HPD’s logical state is high (HPD_High) as defined
+			     by VESA DisplayPort Alt Mode on USB Type-C Standard.
+			- 0 when HPD’s logical state is low (HPD_Low) as defined by
+			     VESA DisplayPort Alt Mode on USB Type-C Standard.
--- a/Documentation/ABI/testing/sysfs-driver-ufs
+++ b/Documentation/ABI/testing/sysfs-driver-ufs
@@ -1299,6 +1299,15 @@ Description:	This node is used to set or display whether UFS WriteBooster is
 		platform that doesn't support UFSHCD_CAP_CLK_SCALING, we can
 		disable/enable WriteBooster through this sysfs node.

+What:		/sys/bus/platform/drivers/ufshcd/*/enable_wb_buf_flush
+What:		/sys/bus/platform/devices/*.ufs/enable_wb_buf_flush
+Date:		July 2022
+Contact:	Jinyoung Choi <j-young.choi@samsung.com>
+Description:	This entry shows the status of WriteBooster buffer flushing
+		and it can be used to enable or disable the flushing.
+		If flushing is enabled, the device executes the flush
+		operation when the command queue is empty.
+
 What:		/sys/bus/platform/drivers/ufshcd/*/device_descriptor/hpb_version
 Date:		June 2021
 Contact:	Daejun Park <daejun7.park@samsung.com>
@@ -1394,7 +1403,7 @@ Description:	This entry shows the number of reads that cannot be changed to

 		The file is read only.

-What:		/sys/class/scsi_device/*/device/hpb_stats/rb_noti_cnt
+What:		/sys/class/scsi_device/*/device/hpb_stats/rcmd_noti_cnt
 Date:		June 2021
 Contact:	Daejun Park <daejun7.park@samsung.com>
 Description:	This entry shows the number of response UPIUs that has
@@ -1402,19 +1411,23 @@ Description:	This entry shows the number of response UPIUs that has

 		The file is read only.

-What:		/sys/class/scsi_device/*/device/hpb_stats/rb_active_cnt
+What:		/sys/class/scsi_device/*/device/hpb_stats/rcmd_active_cnt
 Date:		June 2021
 Contact:	Daejun Park <daejun7.park@samsung.com>
-Description:	This entry shows the number of active sub-regions recommended by
-		response UPIUs.
+Description:	For the HPB device control mode, this entry shows the number of
+        active sub-regions recommended by response UPIUs. For the HPB host control
+        mode, this entry shows the number of active sub-regions recommended by the
+        HPB host control mode heuristic algorithm.

 		The file is read only.

-What:		/sys/class/scsi_device/*/device/hpb_stats/rb_inactive_cnt
+What:		/sys/class/scsi_device/*/device/hpb_stats/rcmd_inactive_cnt
 Date:		June 2021
 Contact:	Daejun Park <daejun7.park@samsung.com>
-Description:	This entry shows the number of inactive regions recommended by
-		response UPIUs.
+Description:	For the HPB device control mode, this entry shows the number of
+        inactive regions recommended by response UPIUs. For the HPB host control
+        mode, this entry shows the number of inactive regions recommended by the
+        HPB host control mode heuristic algorithm.

 		The file is read only.

@@ -1461,6 +1474,43 @@ Description:	This entry shows the status of HPB.

 		The file is read only.

+Contact:	Daniil Lunev <dlunev@chromium.org>
+What:		/sys/bus/platform/drivers/ufshcd/*/capabilities/
+What:		/sys/bus/platform/devices/*.ufs/capabilities/
+Date:		August 2022
+Description:	The group represents the effective capabilities of the
+		host-device pair. i.e. the capabilities which are enabled in the
+		driver for the specific host controller, supported by the host
+		controller and are supported and/or have compatible
+		configuration on the device side.
+
+Contact:	Daniil Lunev <dlunev@chromium.org>
+What:		/sys/bus/platform/drivers/ufshcd/*/capabilities/clock_scaling
+What:		/sys/bus/platform/devices/*.ufs/capabilities/clock_scaling
+Date:		August 2022
+Contact:	Daniil Lunev <dlunev@chromium.org>
+Description:	Indicates status of clock scaling.
+
+		== ============================
+		0  Clock scaling is not supported.
+		1  Clock scaling is supported.
+		== ============================
+
+		The file is read only.
+
+What:		/sys/bus/platform/drivers/ufshcd/*/capabilities/write_booster
+What:		/sys/bus/platform/devices/*.ufs/capabilities/write_booster
+Date:		August 2022
+Contact:	Daniil Lunev <dlunev@chromium.org>
+Description:	Indicates status of Write Booster.
+
+		== ============================
+		0  Write Booster can not be enabled.
+		1  Write Booster can be enabled.
+		== ============================
+
+		The file is read only.
+
 What:		/sys/class/scsi_device/*/device/hpb_param_sysfs/activation_thld
 Date:		February 2021
 Contact:	Avri Altman <avri.altman@wdc.com>
--- a/Documentation/ABI/testing/sysfs-fs-erofs
+++ b/Documentation/ABI/testing/sysfs-fs-erofs
@@ -0,0 +1,7 @@
+What:		/sys/fs/erofs/features/
+Date:		November 2021
+Contact:	"Huang Jianan" <huangjianan@oppo.com>
+Description:	Shows all enabled kernel features.
+		Supported features:
+		zero_padding, compr_cfgs, big_pcluster, chunked_file,
+		device_table, compr_head2, sb_chksum.
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -55,8 +55,9 @@ Description:	Controls the in-place-update policy.
 		0x04  F2FS_IPU_UTIL
 		0x08  F2FS_IPU_SSR_UTIL
 		0x10  F2FS_IPU_FSYNC
-		0x20  F2FS_IPU_ASYNC,
+		0x20  F2FS_IPU_ASYNC
 		0x40  F2FS_IPU_NOCACHE
+		0x80  F2FS_IPU_HONOR_OPU_WRITE
 		====  =================

 		Refer segment.h for details.
@@ -98,13 +99,47 @@ Description:	Controls the issue rate of discard commands that consist of small
 		checkpoint is triggered, and issued during the checkpoint.
 		By default, it is disabled with 0.

+What:		/sys/fs/f2fs/<disk>/max_ordered_discard
+Date:		October 2022
+Contact:	"Yangtao Li" <frank.li@vivo.com>
+Description:	Controls the maximum ordered discard, the unit size is one block(4KB).
+		Set it to 16 by default.
+
+What:		/sys/fs/f2fs/<disk>/max_discard_request
+Date:		December 2021
+Contact:	"Konstantin Vyshetsky" <vkon@google.com>
+Description:	Controls the number of discards a thread will issue at a time.
+		Higher number will allow the discard thread to finish its work
+		faster, at the cost of higher latency for incomming I/O.
+
+What:		/sys/fs/f2fs/<disk>/min_discard_issue_time
+Date:		December 2021
+Contact:	"Konstantin Vyshetsky" <vkon@google.com>
+Description:	Controls the interval the discard thread will wait between
+		issuing discard requests when there are discards to be issued and
+		no I/O aware interruptions occur.
+
+What:		/sys/fs/f2fs/<disk>/mid_discard_issue_time
+Date:		December 2021
+Contact:	"Konstantin Vyshetsky" <vkon@google.com>
+Description:	Controls the interval the discard thread will wait between
+		issuing discard requests when there are discards to be issued and
+		an I/O aware interruption occurs.
+
+What:		/sys/fs/f2fs/<disk>/max_discard_issue_time
+Date:		December 2021
+Contact:	"Konstantin Vyshetsky" <vkon@google.com>
+Description:	Controls the interval the discard thread will wait when there are
+		no discard operations to be issued.
+
 What:		/sys/fs/f2fs/<disk>/discard_granularity
 Date:		July 2017
 Contact:	"Chao Yu" <yuchao0@huawei.com>
 Description:	Controls discard granularity of inner discard thread. Inner thread
 		will not issue discards with size that is smaller than granularity.
 		The unit size is one block(4KB), now only support configuring
-		in range of [1, 512]. Default value is 4(=16KB).
+		in range of [1, 512]. Default value is 16.
+		For small devices, default value is 1.

 What:		/sys/fs/f2fs/<disk>/umount_discard_timeout
 Date:		January 2019
@@ -112,6 +147,11 @@ Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
 Description:	Set timeout to issue discard commands during umount.
 	        Default: 5 secs

+What:		/sys/fs/f2fs/<disk>/pending_discard
+Date:		November 2021
+Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
+Description:	Shows the number of pending discard commands in the queue.
+
 What:		/sys/fs/f2fs/<disk>/max_victim_search
 Date:		January 2014
 Contact:	"Jaegeuk Kim" <jaegeuk.kim@samsung.com>
@@ -202,7 +242,7 @@ Description:	Shows total written kbytes issued to disk.
 What:		/sys/fs/f2fs/<disk>/features
 Date:		July 2017
 Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
-Description:	<deprecated: should use /sys/fs/f2fs/<disk>/feature_list/
+Description:	<deprecated: should use /sys/fs/f2fs/<disk>/feature_list/>
 		Shows all enabled features in current device.
 		Supported features:
 		encryption, blkzoned, extra_attr, projquota, inode_checksum,
@@ -264,11 +304,16 @@ Description:	Shows current reserved blocks in system, it may be temporarily
 What:		/sys/fs/f2fs/<disk>/gc_urgent
 Date:		August 2017
 Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
-Description:	Do background GC aggressively when set. When gc_urgent = 1,
-		background thread starts to do GC by given gc_urgent_sleep_time
-		interval. When gc_urgent = 2, F2FS will lower the bar of
-		checking idle in order to process outstanding discard commands
-		and GC a little bit aggressively. It is set to 0 by default.
+Description:	Do background GC aggressively when set. Set to 0 by default.
+		gc urgent high(1): does GC forcibly in a period of given
+		gc_urgent_sleep_time and ignores I/O idling check. uses greedy
+		GC approach and turns SSR mode on.
+		gc urgent low(2): lowers the bar of checking I/O idling in
+		order to process outstanding discard commands and GC a
+		little bit aggressively. uses cost benefit GC approach.
+		gc urgent mid(3): does GC forcibly in a period of given
+		gc_urgent_sleep_time and executes a mid level of I/O idling check.
+		uses cost benefit GC approach.

 What:		/sys/fs/f2fs/<disk>/gc_urgent_sleep_time
 Date:		August 2017
@@ -428,6 +473,30 @@ Description:	Show status of f2fs superblock in real time.
 		0x4000 SBI_IS_FREEZING       freefs is in process
 		====== ===================== =================================

+What:		/sys/fs/f2fs/<disk>/stat/cp_status
+Date:		September 2022
+Contact:	"Chao Yu" <chao.yu@oppo.com>
+Description:	Show status of f2fs checkpoint in real time.
+
+		=============================== ==============================
+		cp flag				value
+		CP_UMOUNT_FLAG			0x00000001
+		CP_ORPHAN_PRESENT_FLAG		0x00000002
+		CP_COMPACT_SUM_FLAG		0x00000004
+		CP_ERROR_FLAG			0x00000008
+		CP_FSCK_FLAG			0x00000010
+		CP_FASTBOOT_FLAG		0x00000020
+		CP_CRC_RECOVERY_FLAG		0x00000040
+		CP_NAT_BITS_FLAG		0x00000080
+		CP_TRIMMED_FLAG			0x00000100
+		CP_NOCRC_RECOVERY_FLAG		0x00000200
+		CP_LARGE_NAT_BITMAP_FLAG	0x00000400
+		CP_QUOTA_NEED_FSCK_FLAG		0x00000800
+		CP_DISABLED_FLAG		0x00001000
+		CP_DISABLED_QUICK_FLAG		0x00002000
+		CP_RESIZEFS_FLAG		0x00004000
+		=============================== ==============================
+
 What:		/sys/fs/f2fs/<disk>/ckpt_thread_ioprio
 Date:		January 2021
 Contact:	"Daeho Jeong" <daehojeong@google.com>
@@ -499,7 +568,7 @@ Date:		July 2021
 Contact:	"Daeho Jeong" <daehojeong@google.com>
 Description:	Show how many segments have been reclaimed by GC during a specific
 		GC mode (0: GC normal, 1: GC idle CB, 2: GC idle greedy,
-		3: GC idle AT, 4: GC urgent high, 5: GC urgent low)
+		3: GC idle AT, 4: GC urgent high, 5: GC urgent low 6: GC urgent mid)
 		You can re-initialize this value to "0".

 What:		/sys/fs/f2fs/<disk>/gc_segment_mode
@@ -513,3 +582,90 @@ Date:		July 2021
 Contact:	"Daeho Jeong" <daehojeong@google.com>
 Description:	You can	control the multiplier value of	bdi device readahead window size
 		between 2 (default) and 256 for POSIX_FADV_SEQUENTIAL advise option.
+
+What:		/sys/fs/f2fs/<disk>/max_fragment_chunk
+Date:		August 2021
+Contact:	"Daeho Jeong" <daehojeong@google.com>
+Description:	With "mode=fragment:block" mount options, we can scatter block allocation.
+		f2fs will allocate 1..<max_fragment_chunk> blocks in a chunk and make a hole
+		in the length of 1..<max_fragment_hole> by turns. This value can be set
+		between 1..512 and the default value is 4.
+
+What:		/sys/fs/f2fs/<disk>/max_fragment_hole
+Date:		August 2021
+Contact:	"Daeho Jeong" <daehojeong@google.com>
+Description:	With "mode=fragment:block" mount options, we can scatter block allocation.
+		f2fs will allocate 1..<max_fragment_chunk> blocks in a chunk and make a hole
+		in the length of 1..<max_fragment_hole> by turns. This value can be set
+		between 1..512 and the default value is 4.
+
+What:		/sys/fs/f2fs/<disk>/gc_remaining_trials
+Date:		October 2022
+Contact:	"Yangtao Li" <frank.li@vivo.com>
+Description:	You can set the trial count limit for GC urgent and idle mode with this value.
+		If GC thread gets to the limit, the mode will turn back to GC normal mode.
+		By default, the value is zero, which means there is no limit like before.
+
+What:		/sys/fs/f2fs/<disk>/max_roll_forward_node_blocks
+Date:		January 2022
+Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
+Description:	Controls max # of node block writes to be used for roll forward
+		recovery. This can limit the roll forward recovery time.
+
+What:		/sys/fs/f2fs/<disk>/unusable_blocks_per_sec
+Date:		June 2022
+Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
+Description:	Shows the number of unusable blocks in a section which was defined by
+		the zone capacity reported by underlying zoned device.
+
+What:		/sys/fs/f2fs/<disk>/current_atomic_write
+Date:		July 2022
+Contact:	"Daeho Jeong" <daehojeong@google.com>
+Description:	Show the total current atomic write block count, which is not committed yet.
+		This is a read-only entry.
+
+What:		/sys/fs/f2fs/<disk>/peak_atomic_write
+Date:		July 2022
+Contact:	"Daeho Jeong" <daehojeong@google.com>
+Description:	Show the peak value of total current atomic write block count after boot.
+		If you write "0" here, you can initialize to "0".
+
+What:		/sys/fs/f2fs/<disk>/committed_atomic_block
+Date:		July 2022
+Contact:	"Daeho Jeong" <daehojeong@google.com>
+Description:	Show the accumulated total committed atomic write block count after boot.
+		If you write "0" here, you can initialize to "0".
+
+What:		/sys/fs/f2fs/<disk>/revoked_atomic_block
+Date:		July 2022
+Contact:	"Daeho Jeong" <daehojeong@google.com>
+Description:	Show the accumulated total revoked atomic write block count after boot.
+		If you write "0" here, you can initialize to "0".
+
+What:		/sys/fs/f2fs/<disk>/gc_mode
+Date:		October 2022
+Contact:	"Yangtao Li" <frank.li@vivo.com>
+Description:	Show the current gc_mode as a string.
+		This is a read-only entry.
+
+What:		/sys/fs/f2fs/<disk>/discard_urgent_util
+Date:		November 2022
+Contact:	"Yangtao Li" <frank.li@vivo.com>
+Description:	When space utilization exceeds this, do background DISCARD aggressively.
+		Does DISCARD forcibly in a period of given min_discard_issue_time when the number
+		of discards is not 0 and set discard granularity to 1.
+		Default: 80
+
+What:		/sys/fs/f2fs/<disk>/hot_data_age_threshold
+Date:		November 2022
+Contact:	"Ping Xiong" <xiongping1@xiaomi.com>
+Description:	When DATA SEPARATION is on, it controls the age threshold to indicate
+		the data blocks as hot. By default it was initialized as 262144 blocks
+		(equals to 1GB).
+
+What:		/sys/fs/f2fs/<disk>/warm_data_age_threshold
+Date:		November 2022
+Contact:	"Ping Xiong" <xiongping1@xiaomi.com>
+Description:	When DATA SEPARATION is on, it controls the age threshold to indicate
+		the data blocks as warm. By default it was initialized as 2621440 blocks
+		(equals to 10GB).
--- a/Documentation/ABI/testing/sysfs-fs-fuse
+++ b/Documentation/ABI/testing/sysfs-fs-fuse
@@ -0,0 +1,19 @@
+What:		/sys/fs/fuse/features/fuse_bpf
+Date:		December 2022
+Contact:	Paul Lawrence <paullawrence@google.com>
+Description:
+		Read-only file that contains the word 'supported' if fuse-bpf is
+		supported, does not exist otherwise
+
+What:		/sys/fs/fuse/bpf_prog_type_fuse
+Date:		December 2022
+Contact:	Paul Lawrence <paullawrence@google.com>
+Description:
+		bpf_prog_type_fuse defines the program type of bpf programs that
+		may be passed to fuse-bpf. For upstream bpf program types, this
+		is a constant defined in a contiguous array of constants.
+		bpf_prog_type_fuse is appended to the end of the list, so it may
+		change and therefore its value must be read from this file.
+
+		Contents is ASCII decimal representation of bpf_prog_type_fuse
+
--- a/Documentation/ABI/testing/sysfs-fs-incfs
+++ b/Documentation/ABI/testing/sysfs-fs-incfs
@@ -0,0 +1,70 @@
+What:		/sys/fs/incremental-fs/features/corefs
+Date:		2019
+Contact:	Paul Lawrence <paullawrence@google.com>
+Description:	Reads 'supported'. Always present.
+
+What:		/sys/fs/incremental-fs/features/v2
+Date:		April 2021
+Contact:	Paul Lawrence <paullawrence@google.com>
+Description:	Reads 'supported'. Present if all v2 features of incfs are
+		supported.
+
+What:		/sys/fs/incremental-fs/features/zstd
+Date:		April 2021
+Contact:	Paul Lawrence <paullawrence@google.com>
+Description:	Reads 'supported'. Present if zstd compression is supported
+		for data blocks.
+
+What:		/sys/fs/incremental-fs/features/bugfix_throttling
+Date:		January 2023
+Contact:	Paul Lawrence <paullawrence@google.com>
+Description:	Reads 'supported'. Present if the throttling lock bug is fixed
+		https://android-review.git.corp.google.com/c/kernel/common/+/2381827
+
+What:		/sys/fs/incremental-fs/instances/[name]
+Date:		April 2021
+Contact:	Paul Lawrence <paullawrence@google.com>
+Description:	Folder created when incfs is mounted with the sysfs_name=[name]
+		option. If this option is used, the following values are created
+		in this folder.
+
+What:		/sys/fs/incremental-fs/instances/[name]/reads_delayed_min
+Date:		April 2021
+Contact:	Paul Lawrence <paullawrence@google.com>
+Description:	Returns a count of the number of reads that were delayed as a
+		result of the per UID read timeouts min time setting.
+
+What:		/sys/fs/incremental-fs/instances/[name]/reads_delayed_min_us
+Date:		April 2021
+Contact:	Paul Lawrence <paullawrence@google.com>
+Description:	Returns total delay time for all files since first mount as a
+		result of the per UID read timeouts min time setting.
+
+What:		/sys/fs/incremental-fs/instances/[name]/reads_delayed_pending
+Date:		April 2021
+Contact:	Paul Lawrence <paullawrence@google.com>
+Description:	Returns a count of the number of reads that were delayed as a
+		result of waiting for a pending read.
+
+What:		/sys/fs/incremental-fs/instances/[name]/reads_delayed_pending_us
+Date:		April 2021
+Contact:	Paul Lawrence <paullawrence@google.com>
+Description:	Returns total delay time for all files since first mount as a
+		result of waiting for a pending read.
+
+What:		/sys/fs/incremental-fs/instances/[name]/reads_failed_hash_verification
+Date:		April 2021
+Contact:	Paul Lawrence <paullawrence@google.com>
+Description:	Returns number of reads that failed because of hash verification
+		failures.
+
+What:		/sys/fs/incremental-fs/instances/[name]/reads_failed_other
+Date:		April 2021
+Contact:	Paul Lawrence <paullawrence@google.com>
+Description:	Returns number of reads that failed for reasons other than
+		timing out or hash failures.
+
+What:		/sys/fs/incremental-fs/instances/[name]/reads_failed_timed_out
+Date:		April 2021
+Contact:	Paul Lawrence <paullawrence@google.com>
+Description:	Returns number of reads that timed out.
--- a/Documentation/ABI/testing/sysfs-kernel-dmaheap
+++ b/Documentation/ABI/testing/sysfs-kernel-dmaheap
@@ -0,0 +1,7 @@
+What:		/sys/kernel/dma_heap/total_pools_kb
+Date:		Feb 2021
+KernelVersion:	5.10
+Contact:	Hridya Valsaraju <hridya@google.com>,
+Description:
+		The total_pools_kb file is read-only and specifies how much
+		memory in Kb is allocated to DMA-BUF heap pools.
--- a/Documentation/ABI/testing/sysfs-kernel-wakeup_reasons
+++ b/Documentation/ABI/testing/sysfs-kernel-wakeup_reasons
@@ -0,0 +1,16 @@
+What:		/sys/kernel/wakeup_reasons/last_resume_reason
+Date:		February 2014
+Contact:	Ruchi Kandoi <kandoiruchi@google.com>
+Description:
+		The /sys/kernel/wakeup_reasons/last_resume_reason is
+		used to report wakeup reasons after system exited suspend.
+
+What:		/sys/kernel/wakeup_reasons/last_suspend_time
+Date:		March 2015
+Contact:	jinqian <jinqian@google.com>
+Description:
+		The /sys/kernel/wakeup_reasons/last_suspend_time is
+		used to report time spent in last suspend cycle. It contains
+		two numbers (in seconds) separated by space. First number is
+		the time spent in suspend and resume processes. Second number
+		is the time spent in sleep state.
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1296,6 +1296,11 @@ PAGE_SIZE multiple when read back.
 	  pagetables
                Amount of memory allocated for page tables.

+	  sec_pagetables
+		Amount of memory allocated for secondary page tables,
+		this currently includes KVM mmu allocations on x86
+		and arm64.
+
 	  percpu (npn)
 		Amount of memory used for storing per-cpu kernel
 		data structures.
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -961,6 +961,10 @@
 			can be useful when debugging issues that require an SLB
 			miss to occur.

+	disable_dma32=	[KNL]
+			Dynamically disable ZONE_DMA32 on kernels compiled with
+			CONFIG_ZONE_DMA32=y.
+
 	stress_slb	[PPC]
 			Limits the number of kernel SLB entries, and flushes
 			them frequently to increase the rate of SLB faults
@@ -1396,6 +1400,10 @@
 			Format: { "fix" }
 			Permit 'security.evm' to be updated regardless of
 			current integrity status.
+	export_pmu_events
+			[KNL,ARM64] Sets the PMU export bit (PMCR_EL0.X), which enables
+			the exporting of events over an IMPLEMENTATION DEFINED PMU event
+			export bus to another device.

 	failslab=
 	fail_usercopy=
@@ -1646,6 +1654,10 @@
 				If specified, z/VM IUCV HVC accepts connections
 				from listed z/VM user IDs only.

+	hvc_dcc.enable=	[ARM,ARM64]	Enable DCC driver at runtime. For GKI,
+				disabled at runtime by default to prevent
+				crashes in devices which do not support DCC.
+
 	hv_nopvspin	[X86,HYPER_V] Disables the paravirt spinlock optimizations
 				      which allow the hypervisor to 'idle' the
 				      guest on lock contention.
@@ -2056,6 +2068,14 @@
 			  forcing Dual Address Cycle for PCI cards supporting
 			  greater than 32-bit addressing.

+	iommu.max_align_shift=
+			[ARM64, X86] Limit the alignment of IOVAs to a maximum
+			PAGE_SIZE order. Larger IOVAs will be aligned to this
+			specified order. The order is expressed as a power of
+			two multiplied by the PAGE_SIZE.
+			Format: { "4" | "5" | "6" | "7" | "8" | "9" }
+			Default: 9
+
 	iommu.strict=	[ARM64, X86] Configure TLB invalidation behaviour
 			Format: { "0" | "1" }
 			0 - Lazy mode.
@@ -2078,6 +2098,9 @@
 			1 - Bypass the IOMMU for DMA.
 			unset - Use value of CONFIG_IOMMU_DEFAULT_PASSTHROUGH.

+	ioremap_guard	[ARM64] enable the KVM MMIO guard functionality
+			if available.
+
 	io7=		[HW] IO7 for Marvel-based Alpha systems
 			See comment before marvel_specify_io7 in
 			arch/alpha/kernel/core_marvel.c.
@@ -2367,14 +2390,19 @@
 	kvm-arm.mode=
 			[KVM,ARM] Select one of KVM/arm64's modes of operation.

+			none: Forcefully disable KVM.
+
 			nvhe: Standard nVHE-based mode, without support for
 			      protected guests.

 			protected: nVHE-based mode with support for guests whose
-				   state is kept private from the host.
-				   Not valid if the kernel is running in EL2.
+				   state is kept private from the host. See
+				   Documentation/virt/kvm/arm/pkvm.rst for more
+				   information about this mode of operation.

-			Defaults to VHE/nVHE based on hardware support.
+			Defaults to VHE/nVHE based on hardware support. Setting
+			mode to "protected" will disable kexec and hibernation
+			for the host.

 	kvm-arm.vgic_v3_group0_trap=
 			[KVM,ARM] Trap guest accesses to GICv3 group-0
--- a/Documentation/admin-guide/mm/index.rst
+++ b/Documentation/admin-guide/mm/index.rst
@@ -32,6 +32,7 @@ the Linux memory management.
   idle_page_tracking
   ksm
   memory-hotplug
+   multigen_lru
   nommu-mmap
   numa_memory_policy
   numaperf
--- a/Documentation/admin-guide/mm/multigen_lru.rst
+++ b/Documentation/admin-guide/mm/multigen_lru.rst
@@ -0,0 +1,162 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=============
+Multi-Gen LRU
+=============
+The multi-gen LRU is an alternative LRU implementation that optimizes
+page reclaim and improves performance under memory pressure. Page
+reclaim decides the kernel's caching policy and ability to overcommit
+memory. It directly impacts the kswapd CPU usage and RAM efficiency.
+
+Quick start
+===========
+Build the kernel with the following configurations.
+
+* ``CONFIG_LRU_GEN=y``
+* ``CONFIG_LRU_GEN_ENABLED=y``
+
+All set!
+
+Runtime options
+===============
+``/sys/kernel/mm/lru_gen/`` contains stable ABIs described in the
+following subsections.
+
+Kill switch
+-----------
+``enabled`` accepts different values to enable or disable the
+following components. Its default value depends on
+``CONFIG_LRU_GEN_ENABLED``. All the components should be enabled
+unless some of them have unforeseen side effects. Writing to
+``enabled`` has no effect when a component is not supported by the
+hardware, and valid values will be accepted even when the main switch
+is off.
+
+====== ===============================================================
+Values Components
+====== ===============================================================
+0x0001 The main switch for the multi-gen LRU.
+0x0002 Clearing the accessed bit in leaf page table entries in large
+       batches, when MMU sets it (e.g., on x86). This behavior can
+       theoretically worsen lock contention (mmap_lock). If it is
+       disabled, the multi-gen LRU will suffer a minor performance
+       degradation for workloads that contiguously map hot pages,
+       whose accessed bits can be otherwise cleared by fewer larger
+       batches.
+0x0004 Clearing the accessed bit in non-leaf page table entries as
+       well, when MMU sets it (e.g., on x86). This behavior was not
+       verified on x86 varieties other than Intel and AMD. If it is
+       disabled, the multi-gen LRU will suffer a negligible
+       performance degradation.
+[yYnN] Apply to all the components above.
+====== ===============================================================
+
+E.g.,
+::
+
+    echo y >/sys/kernel/mm/lru_gen/enabled
+    cat /sys/kernel/mm/lru_gen/enabled
+    0x0007
+    echo 5 >/sys/kernel/mm/lru_gen/enabled
+    cat /sys/kernel/mm/lru_gen/enabled
+    0x0005
+
+Thrashing prevention
+--------------------
+Personal computers are more sensitive to thrashing because it can
+cause janks (lags when rendering UI) and negatively impact user
+experience. The multi-gen LRU offers thrashing prevention to the
+majority of laptop and desktop users who do not have ``oomd``.
+
+Users can write ``N`` to ``min_ttl_ms`` to prevent the working set of
+``N`` milliseconds from getting evicted. The OOM killer is triggered
+if this working set cannot be kept in memory. In other words, this
+option works as an adjustable pressure relief valve, and when open, it
+terminates applications that are hopefully not being used.
+
+Based on the average human detectable lag (~100ms), ``N=1000`` usually
+eliminates intolerable janks due to thrashing. Larger values like
+``N=3000`` make janks less noticeable at the risk of premature OOM
+kills.
+
+The default value ``0`` means disabled.
+
+Experimental features
+=====================
+``/sys/kernel/debug/lru_gen`` accepts commands described in the
+following subsections. Multiple command lines are supported, so does
+concatenation with delimiters ``,`` and ``;``.
+
+``/sys/kernel/debug/lru_gen_full`` provides additional stats for
+debugging. ``CONFIG_LRU_GEN_STATS=y`` keeps historical stats from
+evicted generations in this file.
+
+Working set estimation
+----------------------
+Working set estimation measures how much memory an application needs
+in a given time interval, and it is usually done with little impact on
+the performance of the application. E.g., data centers want to
+optimize job scheduling (bin packing) to improve memory utilizations.
+When a new job comes in, the job scheduler needs to find out whether
+each server it manages can allocate a certain amount of memory for
+this new job before it can pick a candidate. To do so, the job
+scheduler needs to estimate the working sets of the existing jobs.
+
+When it is read, ``lru_gen`` returns a histogram of numbers of pages
+accessed over different time intervals for each memcg and node.
+``MAX_NR_GENS`` decides the number of bins for each histogram. The
+histograms are noncumulative.
+::
+
+    memcg  memcg_id  memcg_path
+       node  node_id
+           min_gen_nr  age_in_ms  nr_anon_pages  nr_file_pages
+           ...
+           max_gen_nr  age_in_ms  nr_anon_pages  nr_file_pages
+
+Each bin contains an estimated number of pages that have been accessed
+within ``age_in_ms``. E.g., ``min_gen_nr`` contains the coldest pages
+and ``max_gen_nr`` contains the hottest pages, since ``age_in_ms`` of
+the former is the largest and that of the latter is the smallest.
+
+Users can write the following command to ``lru_gen`` to create a new
+generation ``max_gen_nr+1``:
+
+    ``+ memcg_id node_id max_gen_nr [can_swap [force_scan]]``
+
+``can_swap`` defaults to the swap setting and, if it is set to ``1``,
+it forces the scan of anon pages when swap is off, and vice versa.
+``force_scan`` defaults to ``1`` and, if it is set to ``0``, it
+employs heuristics to reduce the overhead, which is likely to reduce
+the coverage as well.
+
+A typical use case is that a job scheduler runs this command at a
+certain time interval to create new generations, and it ranks the
+servers it manages based on the sizes of their cold pages defined by
+this time interval.
+
+Proactive reclaim
+-----------------
+Proactive reclaim induces page reclaim when there is no memory
+pressure. It usually targets cold pages only. E.g., when a new job
+comes in, the job scheduler wants to proactively reclaim cold pages on
+the server it selected, to improve the chance of successfully landing
+this new job.
+
+Users can write the following command to ``lru_gen`` to evict
+generations less than or equal to ``min_gen_nr``.
+
+    ``- memcg_id node_id min_gen_nr [swappiness [nr_to_reclaim]]``
+
+``min_gen_nr`` should be less than ``max_gen_nr-1``, since
+``max_gen_nr`` and ``max_gen_nr-1`` are not fully aged (equivalent to
+the active list) and therefore cannot be evicted. ``swappiness``
+overrides the default value in ``/proc/sys/vm/swappiness``.
+``nr_to_reclaim`` limits the number of pages to evict.
+
+A typical use case is that a job scheduler runs this command before it
+tries to land a new job on a server. If it fails to materialize enough
+cold pages because of the overestimation, it retries on the next
+server according to the ranking result obtained from the working set
+estimation step. This less forceful approach limits the impacts on the
+existing jobs.
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -267,6 +267,17 @@ domain names are in general different. For a detailed discussion
 see the ``hostname(1)`` man page.


+export_pmu_events (arm64 only)
+==============================
+
+Controls the PMU export bit (PMCR_EL0.X), which enables the exporting of
+events over an IMPLEMENTATION DEFINED PMU event export bus to another device.
+
+0: disables exporting of events (default).
+
+1: enables exporting of events.
+
+
 firmware_config
 ===============

@@ -915,6 +926,17 @@ enabled, otherwise writing to this file will return ``-EBUSY``.
 The default value is 8.


+perf_user_access (arm64 only)
+=================================
+
+Controls user space access for reading perf event counters. When set to 1,
+user space can read performance monitor counter registers directly.
+
+The default value is 0 (access disabled).
+
+See Documentation/arm64/perf.rst for more information.
+
+
 pid_max
 =======

--- a/Documentation/arm64/cpu-feature-registers.rst
+++ b/Documentation/arm64/cpu-feature-registers.rst
@@ -92,7 +92,7 @@ operation if the source belongs to the supported system register space.

 The infrastructure emulates only the following system register space::

-	Op0=3, Op1=0, CRn=0, CRm=0,4,5,6,7
+	Op0=3, Op1=0, CRn=0, CRm=0,2,3,4,5,6,7

 (See Table C5-6 'System instruction encodings for non-Debug System
 register accesses' in ARMv8 ARM DDI 0487A.h, for the list of
@@ -290,6 +290,44 @@ infrastructure:
     +------------------------------+---------+---------+
     | RPRES                        | [7-4]   |    y    |
     +------------------------------+---------+---------+
+     | WFXT                         | [3-0]   |    y    |
+     +------------------------------+---------+---------+
+
+  10) MVFR0_EL1 - AArch32 Media and VFP Feature Register 0
+
+     +------------------------------+---------+---------+
+     | Name                         |  bits   | visible |
+     +------------------------------+---------+---------+
+     | FPDP                         | [11-8]  |    y    |
+     +------------------------------+---------+---------+
+
+  11) MVFR1_EL1 - AArch32 Media and VFP Feature Register 1
+
+     +------------------------------+---------+---------+
+     | Name                         |  bits   | visible |
+     +------------------------------+---------+---------+
+     | SIMDFMAC                     | [31-28] |    y    |
+     +------------------------------+---------+---------+
+     | SIMDSP                       | [19-16] |    y    |
+     +------------------------------+---------+---------+
+     | SIMDInt                      | [15-12] |    y    |
+     +------------------------------+---------+---------+
+     | SIMDLS                       | [11-8]  |    y    |
+     +------------------------------+---------+---------+
+
+  12) ID_ISAR5_EL1 - AArch32 Instruction Set Attribute Register 5
+
+     +------------------------------+---------+---------+
+     | Name                         |  bits   | visible |
+     +------------------------------+---------+---------+
+     | CRC32                        | [19-16] |    y    |
+     +------------------------------+---------+---------+
+     | SHA2                         | [15-12] |    y    |
+     +------------------------------+---------+---------+
+     | SHA1                         | [11-8]  |    y    |
+     +------------------------------+---------+---------+
+     | AES                          | [7-4]   |    y    |
+     +------------------------------+---------+---------+


 Appendix I: Example
--- a/Documentation/arm64/elf_hwcaps.rst
+++ b/Documentation/arm64/elf_hwcaps.rst
@@ -259,6 +259,48 @@ HWCAP2_RPRES

    Functionality implied by ID_AA64ISAR2_EL1.RPRES == 0b0001.

+HWCAP2_MTE3
+
+    Functionality implied by ID_AA64PFR1_EL1.MTE == 0b0011, as described
+    by Documentation/arm64/memory-tagging-extension.rst.
+
+HWCAP2_SME
+
+    Functionality implied by ID_AA64PFR1_EL1.SME == 0b0001, as described
+    by Documentation/arm64/sme.rst.
+
+HWCAP2_SME_I16I64
+
+    Functionality implied by ID_AA64SMFR0_EL1.I16I64 == 0b1111.
+
+HWCAP2_SME_F64F64
+
+    Functionality implied by ID_AA64SMFR0_EL1.F64F64 == 0b1.
+
+HWCAP2_SME_I8I32
+
+    Functionality implied by ID_AA64SMFR0_EL1.I8I32 == 0b1111.
+
+HWCAP2_SME_F16F32
+
+    Functionality implied by ID_AA64SMFR0_EL1.F16F32 == 0b1.
+
+HWCAP2_SME_B16F32
+
+    Functionality implied by ID_AA64SMFR0_EL1.B16F32 == 0b1.
+
+HWCAP2_SME_F32F32
+
+    Functionality implied by ID_AA64SMFR0_EL1.F32F32 == 0b1.
+
+HWCAP2_SME_FA64
+
+    Functionality implied by ID_AA64SMFR0_EL1.FA64 == 0b1.
+
+HWCAP2_WFXT
+
+    Functionality implied by ID_AA64ISAR2_EL1.WFXT == 0b0010.
+
 4. Unused AT_HWCAP bits
 -----------------------

--- a/Documentation/arm64/index.rst
+++ b/Documentation/arm64/index.rst
@@ -21,6 +21,7 @@ ARM64 Architecture
    perf
    pointer-authentication
    silicon-errata
+    sme
    sve
    tagged-address-abi
    tagged-pointers
--- a/Documentation/arm64/memory-tagging-extension.rst
+++ b/Documentation/arm64/memory-tagging-extension.rst
@@ -76,6 +76,9 @@ configurable behaviours:
  with ``.si_code = SEGV_MTEAERR`` and ``.si_addr = 0`` (the faulting
  address is unknown).

+- *Asymmetric* - Reads are handled as for synchronous mode while writes
+  are handled as for asynchronous mode.
+
 The user can select the above modes, per thread, using the
 ``prctl(PR_SET_TAGGED_ADDR_CTRL, flags, 0, 0, 0)`` system call where ``flags``
 contains any number of the following values in the ``PR_MTE_TCF_MASK``
@@ -139,18 +142,25 @@ tag checking mode as the CPU's preferred tag checking mode.

 The preferred tag checking mode for each CPU is controlled by
 ``/sys/devices/system/cpu/cpu<N>/mte_tcf_preferred``, to which a
-privileged user may write the value ``async`` or ``sync``.  The default
-preferred mode for each CPU is ``async``.
+privileged user may write the value ``async``, ``sync`` or ``asymm``.  The
+default preferred mode for each CPU is ``async``.

 To allow a program to potentially run in the CPU's preferred tag
 checking mode, the user program may set multiple tag check fault mode
 bits in the ``flags`` argument to the ``prctl(PR_SET_TAGGED_ADDR_CTRL,
-flags, 0, 0, 0)`` system call. If the CPU's preferred tag checking
-mode is in the task's set of provided tag checking modes (this will
-always be the case at present because the kernel only supports two
-tag checking modes, but future kernels may support more modes), that
-mode will be selected. Otherwise, one of the modes in the task's mode
-set will be selected in a currently unspecified manner.
+flags, 0, 0, 0)`` system call. If both synchronous and asynchronous
+modes are requested then asymmetric mode may also be selected by the
+kernel. If the CPU's preferred tag checking mode is in the task's set
+of provided tag checking modes, that mode will be selected. Otherwise,
+one of the modes in the task's mode will be selected by the kernel
+from the task's mode set using the preference order:
+
+	1. Asynchronous
+	2. Asymmetric
+	3. Synchronous
+
+Note that there is no way for userspace to request multiple modes and
+also disable asymmetric mode.

 Initial process state
 ---------------------
--- a/Documentation/arm64/silicon-errata.rst
+++ b/Documentation/arm64/silicon-errata.rst
@@ -102,12 +102,26 @@ stable kernels.
 +----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-A510     | #2457168        | ARM64_ERRATUM_2457168       |
 +----------------+-----------------+-----------------+-----------------------------+
+| ARM            | Cortex-A510     | #2658417        | ARM64_ERRATUM_2658417       |
+----------------+-----------------+-----------------+-----------------------------+
+| ARM            | Cortex-A710     | #2119858        | ARM64_ERRATUM_2119858       |
+----------------+-----------------+-----------------+-----------------------------+
+| ARM            | Cortex-A710     | #2054223        | ARM64_ERRATUM_2054223       |
+----------------+-----------------+-----------------+-----------------------------+
+| ARM            | Cortex-A710     | #2224489        | ARM64_ERRATUM_2224489       |
+----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Neoverse-N1     | #1188873,1418040| ARM64_ERRATUM_1418040       |
 +----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Neoverse-N1     | #1349291        | N/A                         |
 +----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Neoverse-N1     | #1542419        | ARM64_ERRATUM_1542419       |
 +----------------+-----------------+-----------------+-----------------------------+
+| ARM            | Neoverse-N2     | #2139208        | ARM64_ERRATUM_2139208       |
+----------------+-----------------+-----------------+-----------------------------+
+| ARM            | Neoverse-N2     | #2067961        | ARM64_ERRATUM_2067961       |
+----------------+-----------------+-----------------+-----------------------------+
+| ARM            | Neoverse-N2     | #2253138        | ARM64_ERRATUM_2253138       |
+----------------+-----------------+-----------------+-----------------------------+
 | ARM            | MMU-500         | #841119,826419  | N/A                         |
 +----------------+-----------------+-----------------+-----------------------------+
 +----------------+-----------------+-----------------+-----------------------------+
--- a/Documentation/arm64/sme.rst
+++ b/Documentation/arm64/sme.rst
@@ -0,0 +1,428 @@
+===================================================
+Scalable Matrix Extension support for AArch64 Linux
+===================================================
+
+This document outlines briefly the interface provided to userspace by Linux in
+order to support use of the ARM Scalable Matrix Extension (SME).
+
+This is an outline of the most important features and issues only and not
+intended to be exhaustive.  It should be read in conjunction with the SVE
+documentation in sve.rst which provides details on the Streaming SVE mode
+included in SME.
+
+This document does not aim to describe the SME architecture or programmer's
+model.  To aid understanding, a minimal description of relevant programmer's
+model features for SME is included in Appendix A.
+
+
+1.  General
+-----------
+
+* PSTATE.SM, PSTATE.ZA, the streaming mode vector length, the ZA
+  register state and TPIDR2_EL0 are tracked per thread.
+
+* The presence of SME is reported to userspace via HWCAP2_SME in the aux vector
+  AT_HWCAP2 entry.  Presence of this flag implies the presence of the SME
+  instructions and registers, and the Linux-specific system interfaces
+  described in this document.  SME is reported in /proc/cpuinfo as "sme".
+
+* Support for the execution of SME instructions in userspace can also be
+  detected by reading the CPU ID register ID_AA64PFR1_EL1 using an MRS
+  instruction, and checking that the value of the SME field is nonzero. [3]
+
+  It does not guarantee the presence of the system interfaces described in the
+  following sections: software that needs to verify that those interfaces are
+  present must check for HWCAP2_SME instead.
+
+* There are a number of optional SME features, presence of these is reported
+  through AT_HWCAP2 through:
+
+	HWCAP2_SME_I16I64
+	HWCAP2_SME_F64F64
+	HWCAP2_SME_I8I32
+	HWCAP2_SME_F16F32
+	HWCAP2_SME_B16F32
+	HWCAP2_SME_F32F32
+	HWCAP2_SME_FA64
+
+  This list may be extended over time as the SME architecture evolves.
+
+  These extensions are also reported via the CPU ID register ID_AA64SMFR0_EL1,
+  which userspace can read using an MRS instruction.  See elf_hwcaps.txt and
+  cpu-feature-registers.txt for details.
+
+* Debuggers should restrict themselves to interacting with the target via the
+  NT_ARM_SVE, NT_ARM_SSVE and NT_ARM_ZA regsets.  The recommended way
+  of detecting support for these regsets is to connect to a target process
+  first and then attempt a
+
+	ptrace(PTRACE_GETREGSET, pid, NT_ARM_<regset>, &iov).
+
+* Whenever ZA register values are exchanged in memory between userspace and
+  the kernel, the register value is encoded in memory as a series of horizontal
+  vectors from 0 to VL/8-1 stored in the same endianness invariant format as is
+  used for SVE vectors.
+
+* On thread creation TPIDR2_EL0 is preserved unless CLONE_SETTLS is specified,
+  in which case it is set to 0.
+
+2.  Vector lengths
+------------------
+
+SME defines a second vector length similar to the SVE vector length which is
+controls the size of the streaming mode SVE vectors and the ZA matrix array.
+The ZA matrix is square with each side having as many bytes as a streaming
+mode SVE vector.
+
+
+3.  Sharing of streaming and non-streaming mode SVE state
+---------------------------------------------------------
+
+It is implementation defined which if any parts of the SVE state are shared
+between streaming and non-streaming modes.  When switching between modes
+via software interfaces such as ptrace if no register content is provided as
+part of switching no state will be assumed to be shared and everything will
+be zeroed.
+
+
+4.  System call behaviour
+-------------------------
+
+* On syscall PSTATE.ZA is preserved, if PSTATE.ZA==1 then the contents of the
+  ZA matrix are preserved.
+
+* On syscall PSTATE.SM will be cleared and the SVE registers will be handled
+  as per the standard SVE ABI.
+
+* Neither the SVE registers nor ZA are used to pass arguments to or receive
+  results from any syscall.
+
+* On process creation (eg, clone()) the newly created process will have
+  PSTATE.SM cleared.
+
+* All other SME state of a thread, including the currently configured vector
+  length, the state of the PR_SME_VL_INHERIT flag, and the deferred vector
+  length (if any), is preserved across all syscalls, subject to the specific
+  exceptions for execve() described in section 6.
+
+
+5.  Signal handling
+-------------------
+
+* Signal handlers are invoked with streaming mode and ZA disabled.
+
+* A new signal frame record za_context encodes the ZA register contents on
+  signal delivery. [1]
+
+* The signal frame record for ZA always contains basic metadata, in particular
+  the thread's vector length (in za_context.vl).
+
+* The ZA matrix may or may not be included in the record, depending on
+  the value of PSTATE.ZA.  The registers are present if and only if:
+  za_context.head.size >= ZA_SIG_CONTEXT_SIZE(sve_vq_from_vl(za_context.vl))
+  in which case PSTATE.ZA == 1.
+
+* If matrix data is present, the remainder of the record has a vl-dependent
+  size and layout.  Macros ZA_SIG_* are defined [1] to facilitate access to
+  them.
+
+* The matrix is stored as a series of horizontal vectors in the same format as
+  is used for SVE vectors.
+
+* If the ZA context is too big to fit in sigcontext.__reserved[], then extra
+  space is allocated on the stack, an extra_context record is written in
+  __reserved[] referencing this space.  za_context is then written in the
+  extra space.  Refer to [1] for further details about this mechanism.
+
+
+5.  Signal return
+-----------------
+
+When returning from a signal handler:
+
+* If there is no za_context record in the signal frame, or if the record is
+  present but contains no register data as described in the previous section,
+  then ZA is disabled.
+
+* If za_context is present in the signal frame and contains matrix data then
+  PSTATE.ZA is set to 1 and ZA is populated with the specified data.
+
+* The vector length cannot be changed via signal return.  If za_context.vl in
+  the signal frame does not match the current vector length, the signal return
+  attempt is treated as illegal, resulting in a forced SIGSEGV.
+
+
+6.  prctl extensions
+--------------------
+
+Some new prctl() calls are added to allow programs to manage the SME vector
+length:
+
+prctl(PR_SME_SET_VL, unsigned long arg)
+
+    Sets the vector length of the calling thread and related flags, where
+    arg == vl | flags.  Other threads of the calling process are unaffected.
+
+    vl is the desired vector length, where sve_vl_valid(vl) must be true.
+
+    flags:
+
+	PR_SME_VL_INHERIT
+
+	    Inherit the current vector length across execve().  Otherwise, the
+	    vector length is reset to the system default at execve().  (See
+	    Section 9.)
+
+	PR_SME_SET_VL_ONEXEC
+
+	    Defer the requested vector length change until the next execve()
+	    performed by this thread.
+
+	    The effect is equivalent to implicit execution of the following
+	    call immediately after the next execve() (if any) by the thread:
+
+		prctl(PR_SME_SET_VL, arg & ~PR_SME_SET_VL_ONEXEC)
+
+	    This allows launching of a new program with a different vector
+	    length, while avoiding runtime side effects in the caller.
+
+	    Without PR_SME_SET_VL_ONEXEC, the requested change takes effect
+	    immediately.
+
+
+    Return value: a nonnegative on success, or a negative value on error:
+	EINVAL: SME not supported, invalid vector length requested, or
+	    invalid flags.
+
+
+    On success:
+
+    * Either the calling thread's vector length or the deferred vector length
+      to be applied at the next execve() by the thread (dependent on whether
+      PR_SME_SET_VL_ONEXEC is present in arg), is set to the largest value
+      supported by the system that is less than or equal to vl.  If vl ==
+      SVE_VL_MAX, the value set will be the largest value supported by the
+      system.
+
+    * Any previously outstanding deferred vector length change in the calling
+      thread is cancelled.
+
+    * The returned value describes the resulting configuration, encoded as for
+      PR_SME_GET_VL.  The vector length reported in this value is the new
+      current vector length for this thread if PR_SME_SET_VL_ONEXEC was not
+      present in arg; otherwise, the reported vector length is the deferred
+      vector length that will be applied at the next execve() by the calling
+      thread.
+
+    * Changing the vector length causes all of ZA, P0..P15, FFR and all bits of
+      Z0..Z31 except for Z0 bits [127:0] .. Z31 bits [127:0] to become
+      unspecified, including both streaming and non-streaming SVE state.
+      Calling PR_SME_SET_VL with vl equal to the thread's current vector
+      length, or calling PR_SME_SET_VL with the PR_SVE_SET_VL_ONEXEC flag,
+      does not constitute a change to the vector length for this purpose.
+
+    * Changing the vector length causes PSTATE.ZA and PSTATE.SM to be cleared.
+      Calling PR_SME_SET_VL with vl equal to the thread's current vector
+      length, or calling PR_SME_SET_VL with the PR_SVE_SET_VL_ONEXEC flag,
+      does not constitute a change to the vector length for this purpose.
+
+
+prctl(PR_SME_GET_VL)
+
+    Gets the vector length of the calling thread.
+
+    The following flag may be OR-ed into the result:
+
+	PR_SME_VL_INHERIT
+
+	    Vector length will be inherited across execve().
+
+    There is no way to determine whether there is an outstanding deferred
+    vector length change (which would only normally be the case between a
+    fork() or vfork() and the corresponding execve() in typical use).
+
+    To extract the vector length from the result, bitwise and it with
+    PR_SME_VL_LEN_MASK.
+
+    Return value: a nonnegative value on success, or a negative value on error:
+	EINVAL: SME not supported.
+
+
+7.  ptrace extensions
+---------------------
+
+* A new regset NT_ARM_SSVE is defined for access to streaming mode SVE
+  state via PTRACE_GETREGSET and  PTRACE_SETREGSET, this is documented in
+  sve.rst.
+
+* A new regset NT_ARM_ZA is defined for ZA state for access to ZA state via
+  PTRACE_GETREGSET and PTRACE_SETREGSET.
+
+  Refer to [2] for definitions.
+
+The regset data starts with struct user_za_header, containing:
+
+    size
+
+	Size of the complete regset, in bytes.
+	This depends on vl and possibly on other things in the future.
+
+	If a call to PTRACE_GETREGSET requests less data than the value of
+	size, the caller can allocate a larger buffer and retry in order to
+	read the complete regset.
+
+    max_size
+
+	Maximum size in bytes that the regset can grow to for the target
+	thread.  The regset won't grow bigger than this even if the target
+	thread changes its vector length etc.
+
+    vl
+
+	Target thread's current streaming vector length, in bytes.
+
+    max_vl
+
+	Maximum possible streaming vector length for the target thread.
+
+    flags
+
+	Zero or more of the following flags, which have the same
+	meaning and behaviour as the corresponding PR_SET_VL_* flags:
+
+	    SME_PT_VL_INHERIT
+
+	    SME_PT_VL_ONEXEC (SETREGSET only).
+
+* The effects of changing the vector length and/or flags are equivalent to
+  those documented for PR_SME_SET_VL.
+
+  The caller must make a further GETREGSET call if it needs to know what VL is
+  actually set by SETREGSET, unless is it known in advance that the requested
+  VL is supported.
+
+* The size and layout of the payload depends on the header fields.  The
+  SME_PT_ZA_*() macros are provided to facilitate access to the data.
+
+* In either case, for SETREGSET it is permissible to omit the payload, in which
+  case the vector length and flags are changed and PSTATE.ZA is set to 0
+  (along with any consequences of those changes).  If a payload is provided
+  then PSTATE.ZA will be set to 1.
+
+* For SETREGSET, if the requested VL is not supported, the effect will be the
+  same as if the payload were omitted, except that an EIO error is reported.
+  No attempt is made to translate the payload data to the correct layout
+  for the vector length actually set.  It is up to the caller to translate the
+  payload layout for the actual VL and retry.
+
+* The effect of writing a partial, incomplete payload is unspecified.
+
+
+8.  ELF coredump extensions
+---------------------------
+
+* NT_ARM_SSVE notes will be added to each coredump for
+  each thread of the dumped process.  The contents will be equivalent to the
+  data that would have been read if a PTRACE_GETREGSET of the corresponding
+  type were executed for each thread when the coredump was generated.
+
+* A NT_ARM_ZA note will be added to each coredump for each thread of the
+  dumped process.  The contents will be equivalent to the data that would have
+  been read if a PTRACE_GETREGSET of NT_ARM_ZA were executed for each thread
+  when the coredump was generated.
+
+
+9.  System runtime configuration
+--------------------------------
+
+* To mitigate the ABI impact of expansion of the signal frame, a policy
+  mechanism is provided for administrators, distro maintainers and developers
+  to set the default vector length for userspace processes:
+
+/proc/sys/abi/sme_default_vector_length
+
+    Writing the text representation of an integer to this file sets the system
+    default vector length to the specified value, unless the value is greater
+    than the maximum vector length supported by the system in which case the
+    default vector length is set to that maximum.
+
+    The result can be determined by reopening the file and reading its
+    contents.
+
+    At boot, the default vector length is initially set to 32 or the maximum
+    supported vector length, whichever is smaller and supported.  This
+    determines the initial vector length of the init process (PID 1).
+
+    Reading this file returns the current system default vector length.
+
+* At every execve() call, the new vector length of the new process is set to
+  the system default vector length, unless
+
+    * PR_SME_VL_INHERIT (or equivalently SME_PT_VL_INHERIT) is set for the
+      calling thread, or
+
+    * a deferred vector length change is pending, established via the
+      PR_SME_SET_VL_ONEXEC flag (or SME_PT_VL_ONEXEC).
+
+* Modifying the system default vector length does not affect the vector length
+  of any existing process or thread that does not make an execve() call.
+
+
+Appendix A.  SME programmer's model (informative)
+=================================================
+
+This section provides a minimal description of the additions made by SVE to the
+ARMv8-A programmer's model that are relevant to this document.
+
+Note: This section is for information only and not intended to be complete or
+to replace any architectural specification.
+
+A.1.  Registers
+---------------
+
+In A64 state, SME adds the following:
+
+* A new mode, streaming mode, in which a subset of the normal FPSIMD and SVE
+  features are available.  When supported EL0 software may enter and leave
+  streaming mode at any time.
+
+  For best system performance it is strongly encouraged for software to enable
+  streaming mode only when it is actively being used.
+
+* A new vector length controlling the size of ZA and the Z registers when in
+  streaming mode, separately to the vector length used for SVE when not in
+  streaming mode.  There is no requirement that either the currently selected
+  vector length or the set of vector lengths supported for the two modes in
+  a given system have any relationship.  The streaming mode vector length
+  is referred to as SVL.
+
+* A new ZA matrix register.  This is a square matrix of SVLxSVL bits.  Most
+  operations on ZA require that streaming mode be enabled but ZA can be
+  enabled without streaming mode in order to load, save and retain data.
+
+  For best system performance it is strongly encouraged for software to enable
+  ZA only when it is actively being used.
+
+* Two new 1 bit fields in PSTATE which may be controlled via the SMSTART and
+  SMSTOP instructions or by access to the SVCR system register:
+
+  * PSTATE.ZA, if this is 1 then the ZA matrix is accessible and has valid
+    data while if it is 0 then ZA can not be accessed.  When PSTATE.ZA is
+    changed from 0 to 1 all bits in ZA are cleared.
+
+  * PSTATE.SM, if this is 1 then the PE is in streaming mode.  When the value
+    of PSTATE.SM is changed then it is implementation defined if the subset
+    of the floating point register bits valid in both modes may be retained.
+    Any other bits will be cleared.
+
+
+References
+==========
+
+[1] arch/arm64/include/uapi/asm/sigcontext.h
+    AArch64 Linux signal ABI definitions
+
+[2] arch/arm64/include/uapi/asm/ptrace.h
+    AArch64 Linux ptrace ABI definitions
+
+[3] Documentation/arm64/cpu-feature-registers.rst
--- a/Documentation/arm64/sve.rst
+++ b/Documentation/arm64/sve.rst
@@ -7,7 +7,9 @@ Author: Dave Martin <Dave.Martin@arm.com>
 Date:   4 August 2017

 This document outlines briefly the interface provided to userspace by Linux in
-order to support use of the ARM Scalable Vector Extension (SVE).
+order to support use of the ARM Scalable Vector Extension (SVE), including
+interactions with Streaming SVE mode added by the Scalable Matrix Extension
+(SME).

 This is an outline of the most important features and issues only and not
 intended to be exhaustive.
@@ -23,6 +25,10 @@ model features for SVE is included in Appendix A.
 * SVE registers Z0..Z31, P0..P15 and FFR and the current vector length VL, are
  tracked per-thread.

+* In streaming mode FFR is not accessible unless HWCAP2_SME_FA64 is present
+  in the system, when it is not supported and these interfaces are used to
+  access streaming mode FFR is read and written as zero.
+
 * The presence of SVE is reported to userspace via HWCAP_SVE in the aux vector
  AT_HWCAP entry.  Presence of this flag implies the presence of the SVE
  instructions and registers, and the Linux-specific system interfaces
@@ -53,10 +59,19 @@ model features for SVE is included in Appendix A.
  which userspace can read using an MRS instruction.  See elf_hwcaps.txt and
  cpu-feature-registers.txt for details.

+* On hardware that supports the SME extensions, HWCAP2_SME will also be
+  reported in the AT_HWCAP2 aux vector entry.  Among other things SME adds
+  streaming mode which provides a subset of the SVE feature set using a
+  separate SME vector length and the same Z/V registers.  See sme.rst
+  for more details.
+
 * Debuggers should restrict themselves to interacting with the target via the
  NT_ARM_SVE regset.  The recommended way of detecting support for this regset
  is to connect to a target process first and then attempt a
-  ptrace(PTRACE_GETREGSET, pid, NT_ARM_SVE, &iov).
+  ptrace(PTRACE_GETREGSET, pid, NT_ARM_SVE, &iov).  Note that when SME is
+  present and streaming SVE mode is in use the FPSIMD subset of registers
+  will be read via NT_ARM_SVE and NT_ARM_SVE writes will exit streaming mode
+  in the target.

 * Whenever SVE scalable register values (Zn, Pn, FFR) are exchanged in memory
  between userspace and the kernel, the register value is encoded in memory in
@@ -126,6 +141,11 @@ the SVE instruction set architecture.
  are only present in fpsimd_context.  For convenience, the content of V0..V31
  is duplicated between sve_context and fpsimd_context.

+* The record contains a flag field which includes a flag SVE_SIG_FLAG_SM which
+  if set indicates that the thread is in streaming mode and the vector length
+  and register data (if present) describe the streaming SVE data and vector
+  length.
+
 * The signal frame record for SVE always contains basic metadata, in particular
  the thread's vector length (in sve_context.vl).

@@ -170,6 +190,11 @@ When returning from a signal handler:
  the signal frame does not match the current vector length, the signal return
  attempt is treated as illegal, resulting in a forced SIGSEGV.

+* It is permitted to enter or leave streaming mode by setting or clearing
+  the SVE_SIG_FLAG_SM flag but applications should take care to ensure that
+  when doing so sve_context.vl and any register data are appropriate for the
+  vector length in the new mode.
+

 6.  prctl extensions
 --------------------
@@ -255,7 +280,7 @@ prctl(PR_SVE_GET_VL)
    vector length change (which would only normally be the case between a
    fork() or vfork() and the corresponding execve() in typical use).

-    To extract the vector length from the result, and it with
+    To extract the vector length from the result, bitwise and it with
    PR_SVE_VL_LEN_MASK.

    Return value: a nonnegative value on success, or a negative value on error:
@@ -265,8 +290,14 @@ prctl(PR_SVE_GET_VL)
 7.  ptrace extensions
 ---------------------

-* A new regset NT_ARM_SVE is defined for use with PTRACE_GETREGSET and
-  PTRACE_SETREGSET.
+* New regsets NT_ARM_SVE and NT_ARM_SSVE are defined for use with
+  PTRACE_GETREGSET and PTRACE_SETREGSET. NT_ARM_SSVE describes the
+  streaming mode SVE registers and NT_ARM_SVE describes the
+  non-streaming mode SVE registers.
+
+  In this description a register set is referred to as being "live" when
+  the target is in the appropriate streaming or non-streaming mode and is
+  using data beyond the subset shared with the FPSIMD Vn registers.

  Refer to [2] for definitions.

@@ -297,7 +328,7 @@ The regset data starts with struct user_sve_header, containing:

    flags

-	either
+	at most one of

 	    SVE_PT_REGS_FPSIMD

@@ -331,6 +362,10 @@ The regset data starts with struct user_sve_header, containing:

 	    SVE_PT_VL_ONEXEC (SETREGSET only).

+	If neither FPSIMD nor SVE flags are provided then no register
+	payload is available, this is only possible when SME is implemented.
+
+
 * The effects of changing the vector length and/or flags are equivalent to
  those documented for PR_SVE_SET_VL.

@@ -346,6 +381,13 @@ The regset data starts with struct user_sve_header, containing:
  case only the vector length and flags are changed (along with any
  consequences of those changes).

+* In systems supporting SME when in streaming mode a GETREGSET for
+  NT_REG_SVE will return only the user_sve_header with no register data,
+  similarly a GETREGSET for NT_REG_SSVE will not return any register data
+  when not in streaming mode.
+
+* A GETREGSET for NT_ARM_SSVE will never return SVE_PT_REGS_FPSIMD.
+
 * For SETREGSET, if an SVE_PT_REGS_SVE payload is present and the
  requested VL is not supported, the effect will be the same as if the
  payload were omitted, except that an EIO error is reported.  No
@@ -355,17 +397,25 @@ The regset data starts with struct user_sve_header, containing:
  unspecified.  It is up to the caller to translate the payload layout
  for the actual VL and retry.

+* Where SME is implemented it is not possible to GETREGSET the register
+  state for normal SVE when in streaming mode, nor the streaming mode
+  register state when in normal mode, regardless of the implementation defined
+  behaviour of the hardware for sharing data between the two modes.
+
+* Any SETREGSET of NT_ARM_SVE will exit streaming mode if the target was in
+  streaming mode and any SETREGSET of NT_ARM_SSVE will enter streaming mode
+  if the target was not in streaming mode.
+
 * The effect of writing a partial, incomplete payload is unspecified.


 8.  ELF coredump extensions
 ---------------------------

-* A NT_ARM_SVE note will be added to each coredump for each thread of the
-  dumped process.  The contents will be equivalent to the data that would have
-  been read if a PTRACE_GETREGSET of NT_ARM_SVE were executed for each thread
-  when the coredump was generated.
-
+* NT_ARM_SVE and NT_ARM_SSVE notes will be added to each coredump for
+  each thread of the dumped process.  The contents will be equivalent to the
+  data that would have been read if a PTRACE_GETREGSET of the corresponding
+  type were executed for each thread when the coredump was generated.

 9.  System runtime configuration
 --------------------------------
--- a/Documentation/block/inline-encryption.rst
+++ b/Documentation/block/inline-encryption.rst
@@ -1,5 +1,7 @@
 .. SPDX-License-Identifier: GPL-2.0

+.. _inline_encryption:
+
 =================
 Inline Encryption
 =================
@@ -7,230 +9,269 @@ Inline Encryption
 Background
 ==========

-Inline encryption hardware sits logically between memory and the disk, and can
-en/decrypt data as it goes in/out of the disk. Inline encryption hardware has a
-fixed number of "keyslots" - slots into which encryption contexts (i.e. the
-encryption key, encryption algorithm, data unit size) can be programmed by the
-kernel at any time. Each request sent to the disk can be tagged with the index
-of a keyslot (and also a data unit number to act as an encryption tweak), and
-the inline encryption hardware will en/decrypt the data in the request with the
-encryption context programmed into that keyslot. This is very different from
-full disk encryption solutions like self encrypting drives/TCG OPAL/ATA
-Security standards, since with inline encryption, any block on disk could be
-encrypted with any encryption context the kernel chooses.
+Inline encryption hardware sits logically between memory and disk, and can
+en/decrypt data as it goes in/out of the disk.  For each I/O request, software
+can control exactly how the inline encryption hardware will en/decrypt the data
+in terms of key, algorithm, data unit size (the granularity of en/decryption),
+and data unit number (a value that determines the initialization vector(s)).

+Some inline encryption hardware accepts all encryption parameters including raw
+keys directly in low-level I/O requests.  However, most inline encryption
+hardware instead has a fixed number of "keyslots" and requires that the key,
+algorithm, and data unit size first be programmed into a keyslot.  Each
+low-level I/O request then just contains a keyslot index and data unit number.
+
+Note that inline encryption hardware is very different from traditional crypto
+accelerators, which are supported through the kernel crypto API.  Traditional
+crypto accelerators operate on memory regions, whereas inline encryption
+hardware operates on I/O requests.  Thus, inline encryption hardware needs to be
+managed by the block layer, not the kernel crypto API.
+
+Inline encryption hardware is also very different from "self-encrypting drives",
+such as those based on the TCG Opal or ATA Security standards.  Self-encrypting
+drives don't provide fine-grained control of encryption and provide no way to
+verify the correctness of the resulting ciphertext.  Inline encryption hardware
+provides fine-grained control of encryption, including the choice of key and
+initialization vector for each sector, and can be tested for correctness.

 Objective
 =========

-We want to support inline encryption (IE) in the kernel.
-To allow for testing, we also want a crypto API fallback when actual
-IE hardware is absent. We also want IE to work with layered devices
-like dm and loopback (i.e. we want to be able to use the IE hardware
-of the underlying devices if present, or else fall back to crypto API
-en/decryption).
-
+We want to support inline encryption in the kernel.  To make testing easier, we
+also want support for falling back to the kernel crypto API when actual inline
+encryption hardware is absent.  We also want inline encryption to work with
+layered devices like device-mapper and loopback (i.e. we want to be able to use
+the inline encryption hardware of the underlying devices if present, or else
+fall back to crypto API en/decryption).

 Constraints and notes
 =====================

- IE hardware has a limited number of "keyslots" that can be programmed
-  with an encryption context (key, algorithm, data unit size, etc.) at any time.
-  One can specify a keyslot in a data request made to the device, and the
-  device will en/decrypt the data using the encryption context programmed into
-  that specified keyslot. When possible, we want to make multiple requests with
-  the same encryption context share the same keyslot.
+- We need a way for upper layers (e.g. filesystems) to specify an encryption
+  context to use for en/decrypting a bio, and device drivers (e.g. UFSHCD) need
+  to be able to use that encryption context when they process the request.
+  Encryption contexts also introduce constraints on bio merging; the block layer
+  needs to be aware of these constraints.

- We need a way for upper layers like filesystems to specify an encryption
-  context to use for en/decrypting a struct bio, and a device driver (like UFS)
-  needs to be able to use that encryption context when it processes the bio.
+- Different inline encryption hardware has different supported algorithms,
+  supported data unit sizes, maximum data unit numbers, etc.  We call these
+  properties the "crypto capabilities".  We need a way for device drivers to
+  advertise crypto capabilities to upper layers in a generic way.

- We need a way for device drivers to expose their inline encryption
-  capabilities in a unified way to the upper layers.
+- Inline encryption hardware usually (but not always) requires that keys be
+  programmed into keyslots before being used.  Since programming keyslots may be
+  slow and there may not be very many keyslots, we shouldn't just program the
+  key for every I/O request, but rather keep track of which keys are in the
+  keyslots and reuse an already-programmed keyslot when possible.

+- Upper layers typically define a specific end-of-life for crypto keys, e.g.
+  when an encrypted directory is locked or when a crypto mapping is torn down.
+  At these times, keys are wiped from memory.  We must provide a way for upper
+  layers to also evict keys from any keyslots they are present in.

-Design
-======
+- When possible, device-mapper devices must be able to pass through the inline
+  encryption support of their underlying devices.  However, it doesn't make
+  sense for device-mapper devices to have keyslots themselves.

-We add a struct bio_crypt_ctx to struct bio that can
-represent an encryption context, because we need to be able to pass this
-encryption context from the upper layers (like the fs layer) to the
-device driver to act upon.
+Basic design
+============

-While IE hardware works on the notion of keyslots, the FS layer has no
-knowledge of keyslots - it simply wants to specify an encryption context to
-use while en/decrypting a bio.
+We introduce ``struct blk_crypto_key`` to represent an inline encryption key and
+how it will be used.  This includes the type of the key (standard or
+hardware-wrapped); the actual bytes of the key; the size of the key; the
+algorithm and data unit size the key will be used with; and the number of bytes
+needed to represent the maximum data unit number the key will be used with.

-We introduce a keyslot manager (KSM) that handles the translation from
-encryption contexts specified by the FS to keyslots on the IE hardware.
-This KSM also serves as the way IE hardware can expose its capabilities to
-upper layers. The generic mode of operation is: each device driver that wants
-to support IE will construct a KSM and set it up in its struct request_queue.
-Upper layers that want to use IE on this device can then use this KSM in
-the device's struct request_queue to translate an encryption context into
-a keyslot. The presence of the KSM in the request queue shall be used to mean
-that the device supports IE.
+We introduce ``struct bio_crypt_ctx`` to represent an encryption context.  It
+contains a data unit number and a pointer to a blk_crypto_key.  We add pointers
+to a bio_crypt_ctx to ``struct bio`` and ``struct request``; this allows users
+of the block layer (e.g. filesystems) to provide an encryption context when
+creating a bio and have it be passed down the stack for processing by the block
+layer and device drivers.  Note that the encryption context doesn't explicitly
+say whether to encrypt or decrypt, as that is implicit from the direction of the
+bio; WRITE means encrypt, and READ means decrypt.

-The KSM uses refcounts to track which keyslots are idle (either they have no
-encryption context programmed, or there are no in-flight struct bios
-referencing that keyslot). When a new encryption context needs a keyslot, it
-tries to find a keyslot that has already been programmed with the same
-encryption context, and if there is no such keyslot, it evicts the least
-recently used idle keyslot and programs the new encryption context into that
-one. If no idle keyslots are available, then the caller will sleep until there
-is at least one.
+We also introduce ``struct blk_crypto_profile`` to contain all generic inline
+encryption-related state for a particular inline encryption device.  The
+blk_crypto_profile serves as the way that drivers for inline encryption hardware
+advertise their crypto capabilities and provide certain functions (e.g.,
+functions to program and evict keys) to upper layers.  Each device driver that
+wants to support inline encryption will construct a blk_crypto_profile, then
+associate it with the disk's request_queue.

+The blk_crypto_profile also manages the hardware's keyslots, when applicable.
+This happens in the block layer, so that users of the block layer can just
+specify encryption contexts and don't need to know about keyslots at all, nor do
+device drivers need to care about most details of keyslot management.

-blk-mq changes, other block layer changes and blk-crypto-fallback
-=================================================================
+Specifically, for each keyslot, the block layer (via the blk_crypto_profile)
+keeps track of which blk_crypto_key that keyslot contains (if any), and how many
+in-flight I/O requests are using it.  When the block layer creates a
+``struct request`` for a bio that has an encryption context, it grabs a keyslot
+that already contains the key if possible.  Otherwise it waits for an idle
+keyslot (a keyslot that isn't in-use by any I/O), then programs the key into the
+least-recently-used idle keyslot using the function the device driver provided.
+In both cases, the resulting keyslot is stored in the ``crypt_keyslot`` field of
+the request, where it is then accessible to device drivers and is released after
+the request completes.

-We add a pointer to a ``bi_crypt_context`` and ``keyslot`` to
-struct request. These will be referred to as the ``crypto fields``
-for the request. This ``keyslot`` is the keyslot into which the
-``bi_crypt_context`` has been programmed in the KSM of the ``request_queue``
-that this request is being sent to.
+``struct request`` also contains a pointer to the original bio_crypt_ctx.
+Requests can be built from multiple bios, and the block layer must take the
+encryption context into account when trying to merge bios and requests.  For two
+bios/requests to be merged, they must have compatible encryption contexts: both
+unencrypted, or both encrypted with the same key and contiguous data unit
+numbers.  Only the encryption context for the first bio in a request is
+retained, since the remaining bios have been verified to be merge-compatible
+with the first bio.

-We introduce ``block/blk-crypto-fallback.c``, which allows upper layers to remain
-blissfully unaware of whether or not real inline encryption hardware is present
-underneath. When a bio is submitted with a target ``request_queue`` that doesn't
-support the encryption context specified with the bio, the block layer will
-en/decrypt the bio with the blk-crypto-fallback.
+To make it possible for inline encryption to work with request_queue based
+layered devices, when a request is cloned, its encryption context is cloned as
+well.  When the cloned request is submitted, it is then processed as usual; this
+includes getting a keyslot from the clone's target device if needed.

-If the bio is a ``WRITE`` bio, a bounce bio is allocated, and the data in the bio
-is encrypted stored in the bounce bio - blk-mq will then proceed to process the
-bounce bio as if it were not encrypted at all (except when blk-integrity is
-concerned). ``blk-crypto-fallback`` sets the bounce bio's ``bi_end_io`` to an
-internal function that cleans up the bounce bio and ends the original bio.
+blk-crypto-fallback
+===================

-If the bio is a ``READ`` bio, the bio's ``bi_end_io`` (and also ``bi_private``)
-is saved and overwritten by ``blk-crypto-fallback`` to
-``bio_crypto_fallback_decrypt_bio``.  The bio's ``bi_crypt_context`` is also
-overwritten with ``NULL``, so that to the rest of the stack, the bio looks
-as if it was a regular bio that never had an encryption context specified.
-``bio_crypto_fallback_decrypt_bio`` will decrypt the bio, restore the original
-``bi_end_io`` (and also ``bi_private``) and end the bio again.
+It is desirable for the inline encryption support of upper layers (e.g.
+filesystems) to be testable without real inline encryption hardware, and
+likewise for the block layer's keyslot management logic.  It is also desirable
+to allow upper layers to just always use inline encryption rather than have to
+implement encryption in multiple ways.

-Regardless of whether real inline encryption hardware is used or the
+Therefore, we also introduce *blk-crypto-fallback*, which is an implementation
+of inline encryption using the kernel crypto API.  blk-crypto-fallback is built
+into the block layer, so it works on any block device without any special setup.
+Essentially, when a bio with an encryption context is submitted to a
+block_device that doesn't support that encryption context, the block layer will
+handle en/decryption of the bio using blk-crypto-fallback.
+
+For encryption, the data cannot be encrypted in-place, as callers usually rely
+on it being unmodified.  Instead, blk-crypto-fallback allocates bounce pages,
+fills a new bio with those bounce pages, encrypts the data into those bounce
+pages, and submits that "bounce" bio.  When the bounce bio completes,
+blk-crypto-fallback completes the original bio.  If the original bio is too
+large, multiple bounce bios may be required; see the code for details.
+
+For decryption, blk-crypto-fallback "wraps" the bio's completion callback
+(``bi_complete``) and private data (``bi_private``) with its own, unsets the
+bio's encryption context, then submits the bio.  If the read completes
+successfully, blk-crypto-fallback restores the bio's original completion
+callback and private data, then decrypts the bio's data in-place using the
+kernel crypto API.  Decryption happens from a workqueue, as it may sleep.
+Afterwards, blk-crypto-fallback completes the bio.
+
+In both cases, the bios that blk-crypto-fallback submits no longer have an
+encryption context.  Therefore, lower layers only see standard unencrypted I/O.
+
+blk-crypto-fallback also defines its own blk_crypto_profile and has its own
+"keyslots"; its keyslots contain ``struct crypto_skcipher`` objects.  The reason
+for this is twofold.  First, it allows the keyslot management logic to be tested
+without actual inline encryption hardware.  Second, similar to actual inline
+encryption hardware, the crypto API doesn't accept keys directly in requests but
+rather requires that keys be set ahead of time, and setting keys can be
+expensive; moreover, allocating a crypto_skcipher can't happen on the I/O path
+at all due to the locks it takes.  Therefore, the concept of keyslots still
+makes sense for blk-crypto-fallback.
+
+Note that regardless of whether real inline encryption hardware or
 blk-crypto-fallback is used, the ciphertext written to disk (and hence the
-on-disk format of data) will be the same (assuming the hardware's implementation
-of the algorithm being used adheres to spec and functions correctly).
-
-If a ``request queue``'s inline encryption hardware claimed to support the
-encryption context specified with a bio, then it will not be handled by the
-``blk-crypto-fallback``. We will eventually reach a point in blk-mq when a
-struct request needs to be allocated for that bio. At that point,
-blk-mq tries to program the encryption context into the ``request_queue``'s
-keyslot_manager, and obtain a keyslot, which it stores in its newly added
-``keyslot`` field. This keyslot is released when the request is completed.
-
-When the first bio is added to a request, ``blk_crypto_rq_bio_prep`` is called,
-which sets the request's ``crypt_ctx`` to a copy of the bio's
-``bi_crypt_context``. bio_crypt_do_front_merge is called whenever a subsequent
-bio is merged to the front of the request, which updates the ``crypt_ctx`` of
-the request so that it matches the newly merged bio's ``bi_crypt_context``. In particular, the request keeps a copy of the ``bi_crypt_context`` of the first
-bio in its bio-list (blk-mq needs to be careful to maintain this invariant
-during bio and request merges).
-
-To make it possible for inline encryption to work with request queue based
-layered devices, when a request is cloned, its ``crypto fields`` are cloned as
-well. When the cloned request is submitted, blk-mq programs the
-``bi_crypt_context`` of the request into the clone's request_queue's keyslot
-manager, and stores the returned keyslot in the clone's ``keyslot``.
+on-disk format of data) will be the same (assuming that both the inline
+encryption hardware's implementation and the kernel crypto API's implementation
+of the algorithm being used adhere to spec and function correctly).

+blk-crypto-fallback is optional and is controlled by the
+``CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK`` kernel configuration option.

 API presented to users of the block layer
 =========================================

-``struct blk_crypto_key`` represents a crypto key (the raw key, size of the
-key, the crypto algorithm to use, the data unit size to use, and the number of
-bytes required to represent data unit numbers that will be specified with the
-``bi_crypt_context``).
+``blk_crypto_config_supported()`` allows users to check ahead of time whether
+inline encryption with particular crypto settings will work on a particular
+block_device -- either via hardware or via blk-crypto-fallback.  This function
+takes in a ``struct blk_crypto_config`` which is like blk_crypto_key, but omits
+the actual bytes of the key and instead just contains the algorithm, data unit
+size, etc.  This function can be useful if blk-crypto-fallback is disabled.

-``blk_crypto_init_key`` allows upper layers to initialize such a
-``blk_crypto_key``.
+``blk_crypto_init_key()`` allows users to initialize a blk_crypto_key.

-``bio_crypt_set_ctx`` should be called on any bio that a user of
-the block layer wants en/decrypted via inline encryption (or the
-blk-crypto-fallback, if hardware support isn't available for the desired
-crypto configuration). This function takes the ``blk_crypto_key`` and the
-data unit number (DUN) to use when en/decrypting the bio.
+Users must call ``blk_crypto_start_using_key()`` before actually starting to use
+a blk_crypto_key on a block_device (even if ``blk_crypto_config_supported()``
+was called earlier).  This is needed to initialize blk-crypto-fallback if it
+will be needed.  This must not be called from the data path, as this may have to
+allocate resources, which may deadlock in that case.

-``blk_crypto_config_supported`` allows upper layers to query whether or not the
-an encryption context passed to request queue can be handled by blk-crypto
-(either by real inline encryption hardware, or by the blk-crypto-fallback).
-This is useful e.g. when blk-crypto-fallback is disabled, and the upper layer
-wants to use an algorithm that may not supported by hardware - this function
-lets the upper layer know ahead of time that the algorithm isn't supported,
-and the upper layer can fallback to something else if appropriate.
+Next, to attach an encryption context to a bio, users should call
+``bio_crypt_set_ctx()``.  This function allocates a bio_crypt_ctx and attaches
+it to a bio, given the blk_crypto_key and the data unit number that will be used
+for en/decryption.  Users don't need to worry about freeing the bio_crypt_ctx
+later, as that happens automatically when the bio is freed or reset.

-``blk_crypto_start_using_key`` - Upper layers must call this function on
-``blk_crypto_key`` and a ``request_queue`` before using the key with any bio
-headed for that ``request_queue``. This function ensures that either the
-hardware supports the key's crypto settings, or the crypto API fallback has
-transforms for the needed mode allocated and ready to go. Note that this
-function may allocate an ``skcipher``, and must not be called from the data
-path, since allocating ``skciphers`` from the data path can deadlock.
+Finally, when done using inline encryption with a blk_crypto_key on a
+block_device, users must call ``blk_crypto_evict_key()``.  This ensures that
+the key is evicted from all keyslots it may be programmed into and unlinked from
+any kernel data structures it may be linked into.

-``blk_crypto_evict_key`` *must* be called by upper layers before a
-``blk_crypto_key`` is freed. Further, it *must* only be called only once
-there are no more in-flight requests that use that ``blk_crypto_key``.
-``blk_crypto_evict_key`` will ensure that a key is removed from any keyslots in
-inline encryption hardware that the key might have been programmed into (or the blk-crypto-fallback).
+In summary, for users of the block layer, the lifecycle of a blk_crypto_key is
+as follows:
+
+1. ``blk_crypto_config_supported()`` (optional)
+2. ``blk_crypto_init_key()``
+3. ``blk_crypto_start_using_key()``
+4. ``bio_crypt_set_ctx()`` (potentially many times)
+5. ``blk_crypto_evict_key()`` (after all I/O has completed)
+6. Zeroize the blk_crypto_key (this has no dedicated function)
+
+If a blk_crypto_key is being used on multiple block_devices, then
+``blk_crypto_config_supported()`` (if used), ``blk_crypto_start_using_key()``,
+and ``blk_crypto_evict_key()`` must be called on each block_device.

 API presented to device drivers
 ===============================

-A :c:type:``struct blk_keyslot_manager`` should be set up by device drivers in
-the ``request_queue`` of the device. The device driver needs to call
-``blk_ksm_init`` (or its resource-managed variant ``devm_blk_ksm_init``) on the
-``blk_keyslot_manager``, while specifying the number of keyslots supported by
-the hardware.
+A device driver that wants to support inline encryption must set up a
+blk_crypto_profile in the request_queue of its device.  To do this, it first
+must call ``blk_crypto_profile_init()`` (or its resource-managed variant
+``devm_blk_crypto_profile_init()``), providing the number of keyslots.

-The device driver also needs to tell the KSM how to actually manipulate the
-IE hardware in the device to do things like programming the crypto key into
-the IE hardware into a particular keyslot. All this is achieved through the
-struct blk_ksm_ll_ops field in the KSM that the device driver
-must fill up after initing the ``blk_keyslot_manager``.
+Next, it must advertise its crypto capabilities by setting fields in the
+blk_crypto_profile, e.g. ``modes_supported`` and ``max_dun_bytes_supported``.

-The KSM also handles runtime power management for the device when applicable
-(e.g. when it wants to program a crypto key into the IE hardware, the device
-must be runtime powered on) - so the device driver must also set the ``dev``
-field in the ksm to point to the `struct device` for the KSM to use for runtime
-power management.
+It then must set function pointers in the ``ll_ops`` field of the
+blk_crypto_profile to tell upper layers how to control the inline encryption
+hardware, e.g. how to program and evict keyslots.  Most drivers will need to
+implement ``keyslot_program`` and ``keyslot_evict``.  For details, see the
+comments for ``struct blk_crypto_ll_ops``.

-``blk_ksm_reprogram_all_keys`` can be called by device drivers if the device
-needs each and every of its keyslots to be reprogrammed with the key it
-"should have" at the point in time when the function is called. This is useful
-e.g. if a device loses all its keys on runtime power down/up.
+Once the driver registers a blk_crypto_profile with a request_queue, I/O
+requests the driver receives via that queue may have an encryption context.  All
+encryption contexts will be compatible with the crypto capabilities declared in
+the blk_crypto_profile, so drivers don't need to worry about handling
+unsupported requests.  Also, if a nonzero number of keyslots was declared in the
+blk_crypto_profile, then all I/O requests that have an encryption context will
+also have a keyslot which was already programmed with the appropriate key.

-If the driver used ``blk_ksm_init`` instead of ``devm_blk_ksm_init``, then
-``blk_ksm_destroy`` should be called to free up all resources used by a
-``blk_keyslot_manager`` once it is no longer needed.
+If the driver implements runtime suspend and its blk_crypto_ll_ops don't work
+while the device is runtime-suspended, then the driver must also set the ``dev``
+field of the blk_crypto_profile to point to the ``struct device`` that will be
+resumed before any of the low-level operations are called.
+
+If there are situations where the inline encryption hardware loses the contents
+of its keyslots, e.g. device resets, the driver must handle reprogramming the
+keyslots.  To do this, the driver may call ``blk_crypto_reprogram_all_keys()``.
+
+Finally, if the driver used ``blk_crypto_profile_init()`` instead of
+``devm_blk_crypto_profile_init()``, then it is responsible for calling
+``blk_crypto_profile_destroy()`` when the crypto profile is no longer needed.

 Layered Devices
 ===============

-Request queue based layered devices like dm-rq that wish to support IE need to
-create their own keyslot manager for their request queue, and expose whatever
-functionality they choose. When a layered device wants to pass a clone of that
-request to another ``request_queue``, blk-crypto will initialize and prepare the
-clone as necessary - see ``blk_crypto_insert_cloned_request`` in
-``blk-crypto.c``.
-
-
-Future Optimizations for layered devices
-========================================
-
-Creating a keyslot manager for a layered device uses up memory for each
-keyslot, and in general, a layered device merely passes the request on to a
-"child" device, so the keyslots in the layered device itself are completely
-unused, and don't need any refcounting or keyslot programming. We can instead
-define a new type of KSM; the "passthrough KSM", that layered devices can use
-to advertise an unlimited number of keyslots, and support for any encryption
-algorithms they choose, while not actually using any memory for each keyslot.
-Another use case for the "passthrough KSM" is for IE devices that do not have a
-limited number of keyslots.
-
+Request queue based layered devices like dm-rq that wish to support inline
+encryption need to create their own blk_crypto_profile for their request_queue,
+and expose whatever functionality they choose. When a layered device wants to
+pass a clone of that request to another request_queue, blk-crypto will
+initialize and prepare the clone as necessary; see
+``blk_crypto_insert_cloned_request()``.

 Interaction between inline encryption and blk integrity
 =======================================================
@@ -257,7 +298,220 @@ Because there isn't any real hardware yet, it seems prudent to assume that
 hardware implementations might not implement both features together correctly,
 and disallow the combination for now. Whenever a device supports integrity, the
 kernel will pretend that the device does not support hardware inline encryption
-(by essentially setting the keyslot manager in the request_queue of the device
-to NULL). When the crypto API fallback is enabled, this means that all bios with
-and encryption context will use the fallback, and IO will complete as usual.
-When the fallback is disabled, a bio with an encryption context will be failed.
+(by setting the blk_crypto_profile in the request_queue of the device to NULL).
+When the crypto API fallback is enabled, this means that all bios with and
+encryption context will use the fallback, and IO will complete as usual.  When
+the fallback is disabled, a bio with an encryption context will be failed.
+
+.. _hardware_wrapped_keys:
+
+Hardware-wrapped keys
+=====================
+
+Motivation and threat model
+---------------------------
+
+Linux storage encryption (dm-crypt, fscrypt, eCryptfs, etc.) traditionally
+relies on the raw encryption key(s) being present in kernel memory so that the
+encryption can be performed.  This traditionally isn't seen as a problem because
+the key(s) won't be present during an offline attack, which is the main type of
+attack that storage encryption is intended to protect from.
+
+However, there is an increasing desire to also protect users' data from other
+types of attacks (to the extent possible), including:
+
+- Cold boot attacks, where an attacker with physical access to a system suddenly
+  powers it off, then immediately dumps the system memory to extract recently
+  in-use encryption keys, then uses these keys to decrypt user data on-disk.
+
+- Online attacks where the attacker is able to read kernel memory without fully
+  compromising the system, followed by an offline attack where any extracted
+  keys can be used to decrypt user data on-disk.  An example of such an online
+  attack would be if the attacker is able to run some code on the system that
+  exploits a Meltdown-like vulnerability but is unable to escalate privileges.
+
+- Online attacks where the attacker fully compromises the system, but their data
+  exfiltration is significantly time-limited and/or bandwidth-limited, so in
+  order to completely exfiltrate the data they need to extract the encryption
+  keys to use in a later offline attack.
+
+Hardware-wrapped keys are a feature of inline encryption hardware that is
+designed to protect users' data from the above attacks (to the extent possible),
+without introducing limitations such as a maximum number of keys.
+
+Note that it is impossible to **fully** protect users' data from these attacks.
+Even in the attacks where the attacker "just" gets read access to kernel memory,
+they can still extract any user data that is present in memory, including
+plaintext pagecache pages of encrypted files.  The focus here is just on
+protecting the encryption keys, as those instantly give access to **all** user
+data in any following offline attack, rather than just some of it (where which
+data is included in that "some" might not be controlled by the attacker).
+
+Solution overview
+-----------------
+
+Inline encryption hardware typically has "keyslots" into which software can
+program keys for the hardware to use; the contents of keyslots typically can't
+be read back by software.  As such, the above security goals could be achieved
+if the kernel simply erased its copy of the key(s) after programming them into
+keyslot(s) and thereafter only referred to them via keyslot number.
+
+However, that naive approach runs into the problem that it limits the number of
+unlocked keys to the number of keyslots, which typically is a small number.  In
+cases where there is only one encryption key system-wide (e.g., a full-disk
+encryption key), that can be tolerable.  However, in general there can be many
+logged-in users with many different keys, and/or many running applications with
+application-specific encrypted storage areas.  This is especially true if
+file-based encryption (e.g. fscrypt) is being used.
+
+Thus, it is important for the kernel to still have a way to "remind" the
+hardware about a key, without actually having the raw key itself.  This would
+ensure that the number of hardware keyslots only limits the number of active I/O
+requests, not other things such as the number of logged-in users, the number of
+running apps, or the number of encrypted storage areas that apps can create.
+
+Somewhat less importantly, it is also desirable that the raw keys are never
+visible to software at all, even while being initially unlocked.  This would
+ensure that a read-only compromise of system memory will never allow a key to be
+extracted to be used off-system, even if it occurs when a key is being unlocked.
+
+To solve all these problems, some vendors of inline encryption hardware have
+made their hardware support *hardware-wrapped keys*.  Hardware-wrapped keys
+are encrypted keys that can only be unwrapped (decrypted) and used by hardware
+-- either by the inline encryption hardware itself, or by a dedicated hardware
+block that can directly provision keys to the inline encryption hardware.
+
+(We refer to them as "hardware-wrapped keys" rather than simply "wrapped keys"
+to add some clarity in cases where there could be other types of wrapped keys,
+such as in file-based encryption.  Key wrapping is a commonly used technique.)
+
+The key which wraps (encrypts) hardware-wrapped keys is a hardware-internal key
+that is never exposed to software; it is either a persistent key (a "long-term
+wrapping key") or a per-boot key (an "ephemeral wrapping key").  The long-term
+wrapped form of the key is what is initially unlocked, but it is erased from
+memory as soon as it is converted into an ephemerally-wrapped key.  In-use
+hardware-wrapped keys are always ephemerally-wrapped, not long-term wrapped.
+
+As inline encryption hardware can only be used to encrypt/decrypt data on-disk,
+the hardware also includes a level of indirection; it doesn't use the unwrapped
+key directly for inline encryption, but rather derives both an inline encryption
+key and a "software secret" from it.  Software can use the "software secret" for
+tasks that can't use the inline encryption hardware, such as filenames
+encryption.  The software secret is not protected from memory compromise.
+
+Key hierarchy
+-------------
+
+Here is the key hierarchy for a hardware-wrapped key::
+
+                       Hardware-wrapped key
+                                |
+                                |
+                          <Hardware KDF>
+                                |
+                  -----------------------------
+                  |                           |
+        Inline encryption key           Software secret
+
+The components are:
+
+- *Hardware-wrapped key*: a key for the hardware's KDF (Key Derivation
+  Function), in ephemerally-wrapped form.  The key wrapping algorithm is a
+  hardware implementation detail that doesn't impact kernel operation, but a
+  strong authenticated encryption algorithm such as AES-256-GCM is recommended.
+
+- *Hardware KDF*: a KDF (Key Derivation Function) which the hardware uses to
+  derive subkeys after unwrapping the wrapped key.  The hardware's choice of KDF
+  doesn't impact kernel operation, but it does need to be known for testing
+  purposes, and it's also assumed to have at least a 256-bit security strength.
+  All known hardware uses the SP800-108 KDF in Counter Mode with AES-256-CMAC,
+  with a particular choice of labels and contexts; new hardware should use this
+  already-vetted KDF.
+
+- *Inline encryption key*: a derived key which the hardware directly provisions
+  to a keyslot of the inline encryption hardware, without exposing it to
+  software.  In all known hardware, this will always be an AES-256-XTS key.
+  However, in principle other encryption algorithms could be supported too.
+  Hardware must derive distinct subkeys for each supported encryption algorithm.
+
+- *Software secret*: a derived key which the hardware returns to software so
+  that software can use it for cryptographic tasks that can't use inline
+  encryption.  This value is cryptographically isolated from the inline
+  encryption key, i.e. knowing one doesn't reveal the other.  (The KDF ensures
+  this.)  Currently, the software secret is always 32 bytes and thus is suitable
+  for cryptographic applications that require up to a 256-bit security strength.
+  Some use cases (e.g. full-disk encryption) won't require the software secret.
+
+Example: in the case of fscrypt, the fscrypt master key (the key that protects a
+particular set of encrypted directories) is made hardware-wrapped.  The inline
+encryption key is used as the file contents encryption key, while the software
+secret (rather than the master key directly) is used to key fscrypt's KDF
+(HKDF-SHA512) to derive other subkeys such as filenames encryption keys.
+
+Note that currently this design assumes a single inline encryption key per
+hardware-wrapped key, without any further key derivation.  Thus, in the case of
+fscrypt, currently hardware-wrapped keys are only compatible with the "inline
+encryption optimized" settings, which use one file contents encryption key per
+encryption policy rather than one per file.  This design could be extended to
+make the hardware derive per-file keys using per-file nonces passed down the
+storage stack, and in fact some hardware already supports this; future work is
+planned to remove this limitation by adding the corresponding kernel support.
+
+Kernel support
+--------------
+
+The inline encryption support of the kernel's block layer ("blk-crypto") has
+been extended to support hardware-wrapped keys as an alternative to standard
+keys, when hardware support is available.  This works in the following way:
+
+- A ``key_types_supported`` field is added to the crypto capabilities in
+  ``struct blk_crypto_profile``.  This allows device drivers to declare that
+  they support standard keys, hardware-wrapped keys, or both.
+
+- ``struct blk_crypto_key`` can now contain a hardware-wrapped key as an
+  alternative to a standard key; a ``key_type`` field is added to
+  ``struct blk_crypto_config`` to distinguish between the different key types.
+  This allows users of blk-crypto to en/decrypt data using a hardware-wrapped
+  key in a way very similar to using a standard key.
+
+- A new method ``blk_crypto_ll_ops::derive_sw_secret`` is added.  Device drivers
+  that support hardware-wrapped keys must implement this method.  Users of
+  blk-crypto can call ``blk_crypto_derive_sw_secret()`` to access this method.
+
+- The programming and eviction of hardware-wrapped keys happens via
+  ``blk_crypto_ll_ops::keyslot_program`` and
+  ``blk_crypto_ll_ops::keyslot_evict``, just like it does for standard keys.  If
+  a driver supports hardware-wrapped keys, then it must handle hardware-wrapped
+  keys being passed to these methods.
+
+blk-crypto-fallback doesn't support hardware-wrapped keys.  Therefore,
+hardware-wrapped keys can only be used with actual inline encryption hardware.
+
+Currently, the kernel only works with hardware-wrapped keys in
+ephemerally-wrapped form.  No generic kernel interfaces are provided for
+generating or importing hardware-wrapped keys in the first place, or converting
+them to ephemerally-wrapped form.  In Android, SoC vendors are required to
+support these operations in their KeyMint implementation (a hardware abstraction
+layer in userspace); for details, see the `Android documentation
+<https://source.android.com/security/encryption/hw-wrapped-keys>`_.
+
+Testability
+-----------
+
+Both the hardware KDF and the inline encryption itself are well-defined
+algorithms that don't depend on any secrets other than the unwrapped key.
+Therefore, if the unwrapped key is known to software, these algorithms can be
+reproduced in software in order to verify the ciphertext that is written to disk
+by the inline encryption hardware.
+
+However, the unwrapped key will only be known to software for testing if the
+"import" functionality is used.  Proper testing is not possible in the
+"generate" case where the hardware generates the key itself.  The correct
+operation of the "generate" mode thus relies on the security and correctness of
+the hardware RNG and its use to generate the key, as well as the testing of the
+"import" mode as that should cover all parts other than the key generation.
+
+For an example of a test that verifies the ciphertext written to disk in the
+"import" mode, see the fscrypt hardware-wrapped key tests in xfstests, or
+`Android's vts_kernel_encryption_test
+<https://android.googlesource.com/platform/test/vts-testcase/kernel/+/refs/heads/master/encryption/>`_.
--- a/Documentation/dev-tools/kasan.rst
+++ b/Documentation/dev-tools/kasan.rst
@@ -4,39 +4,76 @@ The Kernel Address Sanitizer (KASAN)
 Overview
 --------

-KernelAddressSANitizer (KASAN) is a dynamic memory safety error detector
-designed to find out-of-bound and use-after-free bugs. KASAN has three modes:
+Kernel Address Sanitizer (KASAN) is a dynamic memory safety error detector
+designed to find out-of-bounds and use-after-free bugs.

-1. generic KASAN (similar to userspace ASan),
-2. software tag-based KASAN (similar to userspace HWASan),
-3. hardware tag-based KASAN (based on hardware memory tagging).
+KASAN has three modes:

-Generic KASAN is mainly used for debugging due to a large memory overhead.
-Software tag-based KASAN can be used for dogfood testing as it has a lower
-memory overhead that allows using it with real workloads. Hardware tag-based
-KASAN comes with low memory and performance overheads and, therefore, can be
-used in production. Either as an in-field memory bug detector or as a security
-mitigation.
+1. Generic KASAN
+2. Software Tag-Based KASAN
+3. Hardware Tag-Based KASAN

-Software KASAN modes (#1 and #2) use compile-time instrumentation to insert
-validity checks before every memory access and, therefore, require a compiler
-version that supports that.
+Generic KASAN, enabled with CONFIG_KASAN_GENERIC, is the mode intended for
+debugging, similar to userspace ASan. This mode is supported on many CPU
+architectures, but it has significant performance and memory overheads.

-Generic KASAN is supported in GCC and Clang. With GCC, it requires version
-8.3.0 or later. Any supported Clang version is compatible, but detection of
-out-of-bounds accesses for global variables is only supported since Clang 11.
+Software Tag-Based KASAN or SW_TAGS KASAN, enabled with CONFIG_KASAN_SW_TAGS,
+can be used for both debugging and dogfood testing, similar to userspace HWASan.
+This mode is only supported for arm64, but its moderate memory overhead allows
+using it for testing on memory-restricted devices with real workloads.

-Software tag-based KASAN mode is only supported in Clang.
+Hardware Tag-Based KASAN or HW_TAGS KASAN, enabled with CONFIG_KASAN_HW_TAGS,
+is the mode intended to be used as an in-field memory bug detector or as a
+security mitigation. This mode only works on arm64 CPUs that support MTE
+(Memory Tagging Extension), but it has low memory and performance overheads and
+thus can be used in production.

-The hardware KASAN mode (#3) relies on hardware to perform the checks but
-still requires a compiler version that supports memory tagging instructions.
-This mode is supported in GCC 10+ and Clang 11+.
+For details about the memory and performance impact of each KASAN mode, see the
+descriptions of the corresponding Kconfig options.

-Both software KASAN modes work with SLUB and SLAB memory allocators,
-while the hardware tag-based KASAN currently only supports SLUB.
+The Generic and the Software Tag-Based modes are commonly referred to as the
+software modes. The Software Tag-Based and the Hardware Tag-Based modes are
+referred to as the tag-based modes.

-Currently, generic KASAN is supported for the x86_64, arm, arm64, xtensa, s390,
-and riscv architectures, and tag-based KASAN modes are supported only for arm64.
+Support
+-------
+
+Architectures
+~~~~~~~~~~~~~
+
+Generic KASAN is supported on x86_64, arm, arm64, powerpc, riscv, s390, and
+xtensa, and the tag-based KASAN modes are supported only on arm64.
+
+Compilers
+~~~~~~~~~
+
+Software KASAN modes use compile-time instrumentation to insert validity checks
+before every memory access and thus require a compiler version that provides
+support for that. The Hardware Tag-Based mode relies on hardware to perform
+these checks but still requires a compiler version that supports the memory
+tagging instructions.
+
+Generic KASAN requires GCC version 8.3.0 or later
+or any Clang version supported by the kernel.
+
+Software Tag-Based KASAN requires GCC 11+
+or any Clang version supported by the kernel.
+
+Hardware Tag-Based KASAN requires GCC 10+ or Clang 12+.
+
+Memory types
+~~~~~~~~~~~~
+
+Generic KASAN supports finding bugs in all of slab, page_alloc, vmap, vmalloc,
+stack, and global memory.
+
+Software Tag-Based KASAN supports slab, page_alloc, vmalloc, and stack memory.
+
+Hardware Tag-Based KASAN supports slab, page_alloc, and non-executable vmalloc
+memory.
+
+For slab, both software KASAN modes support SLUB and SLAB allocators, while
+Hardware Tag-Based KASAN only supports SLUB.

 Usage
 -----
@@ -45,18 +82,81 @@ To enable KASAN, configure the kernel with::

 	  CONFIG_KASAN=y

-and choose between ``CONFIG_KASAN_GENERIC`` (to enable generic KASAN),
-``CONFIG_KASAN_SW_TAGS`` (to enable software tag-based KASAN), and
-``CONFIG_KASAN_HW_TAGS`` (to enable hardware tag-based KASAN).
+and choose between ``CONFIG_KASAN_GENERIC`` (to enable Generic KASAN),
+``CONFIG_KASAN_SW_TAGS`` (to enable Software Tag-Based KASAN), and
+``CONFIG_KASAN_HW_TAGS`` (to enable Hardware Tag-Based KASAN).

-For software modes, also choose between ``CONFIG_KASAN_OUTLINE`` and
+For the software modes, also choose between ``CONFIG_KASAN_OUTLINE`` and
 ``CONFIG_KASAN_INLINE``. Outline and inline are compiler instrumentation types.
-The former produces a smaller binary while the latter is 1.1-2 times faster.
+The former produces a smaller binary while the latter is up to 2 times faster.

 To include alloc and free stack traces of affected slab objects into reports,
 enable ``CONFIG_STACKTRACE``. To include alloc and free stack traces of affected
 physical pages, enable ``CONFIG_PAGE_OWNER`` and boot with ``page_owner=on``.

+Boot parameters
+~~~~~~~~~~~~~~~
+
+KASAN is affected by the generic ``panic_on_warn`` command line parameter.
+When it is enabled, KASAN panics the kernel after printing a bug report.
+
+By default, KASAN prints a bug report only for the first invalid memory access.
+With ``kasan_multi_shot``, KASAN prints a report on every invalid access. This
+effectively disables ``panic_on_warn`` for KASAN reports.
+
+Alternatively, independent of ``panic_on_warn``, the ``kasan.fault=`` boot
+parameter can be used to control panic and reporting behaviour:
+
+- ``kasan.fault=report`` or ``=panic`` controls whether to only print a KASAN
+  report or also panic the kernel (default: ``report``). The panic happens even
+  if ``kasan_multi_shot`` is enabled.
+
+Software and Hardware Tag-Based KASAN modes (see the section about various
+modes below) support altering stack trace collection behavior:
+
+- ``kasan.stacktrace=off`` or ``=on`` disables or enables alloc and free stack
+  traces collection (default: ``on``).
+- ``kasan.stack_ring_size=<number of entries>`` specifies the number of entries
+  in the stack ring (default: ``32768``).
+
+Hardware Tag-Based KASAN mode is intended for use in production as a security
+mitigation. Therefore, it supports additional boot parameters that allow
+disabling KASAN altogether or controlling its features:
+
+- ``kasan=off`` or ``=on`` controls whether KASAN is enabled (default: ``on``).
+
+- ``kasan.mode=sync``, ``=async`` or ``=asymm`` controls whether KASAN
+  is configured in synchronous, asynchronous or asymmetric mode of
+  execution (default: ``sync``).
+  Synchronous mode: a bad access is detected immediately when a tag
+  check fault occurs.
+  Asynchronous mode: a bad access detection is delayed. When a tag check
+  fault occurs, the information is stored in hardware (in the TFSR_EL1
+  register for arm64). The kernel periodically checks the hardware and
+  only reports tag faults during these checks.
+  Asymmetric mode: a bad access is detected synchronously on reads and
+  asynchronously on writes.
+
+- ``kasan.vmalloc=off`` or ``=on`` disables or enables tagging of vmalloc
+  allocations (default: ``on``).
+
+- ``kasan.page_alloc.sample=<sampling interval>`` makes KASAN tag only every
+  Nth page_alloc allocation with the order equal or greater than
+  ``kasan.page_alloc.sample.order``, where N is the value of the ``sample``
+  parameter (default: ``1``, or tag every such allocation).
+  This parameter is intended to mitigate the performance overhead introduced
+  by KASAN.
+  Note that enabling this parameter makes Hardware Tag-Based KASAN skip checks
+  of allocations chosen by sampling and thus miss bad accesses to these
+  allocations. Use the default value for accurate bug detection.
+
+- ``kasan.page_alloc.sample.order=<minimum page order>`` specifies the minimum
+  order of allocations that are affected by sampling (default: ``3``).
+  Only applies when ``kasan.page_alloc.sample`` is set to a value greater
+  than ``1``.
+  This parameter is intended to allow sampling only large page_alloc
+  allocations, which is the biggest source of the performance overhead.
+
 Error reports
 ~~~~~~~~~~~~~

@@ -146,7 +246,7 @@ is either 8 or 16 aligned bytes depending on KASAN mode. Each number in the
 memory state section of the report shows the state of one of the memory
 granules that surround the accessed address.

-For generic KASAN, the size of each memory granule is 8. The state of each
+For Generic KASAN, the size of each memory granule is 8. The state of each
 granule is encoded in one shadow byte. Those 8 bytes can be accessible,
 partially accessible, freed, or be a part of a redzone. KASAN uses the following
 encoding for each shadow byte: 00 means that all 8 bytes of the corresponding
@@ -171,41 +271,6 @@ traces point to places in code that interacted with the object but that are not
 directly present in the bad access stack trace. Currently, this includes
 call_rcu() and workqueue queuing.

-Boot parameters
-~~~~~~~~~~~~~~~
-
-KASAN is affected by the generic ``panic_on_warn`` command line parameter.
-When it is enabled, KASAN panics the kernel after printing a bug report.
-
-By default, KASAN prints a bug report only for the first invalid memory access.
-With ``kasan_multi_shot``, KASAN prints a report on every invalid access. This
-effectively disables ``panic_on_warn`` for KASAN reports.
-
-Alternatively, independent of ``panic_on_warn`` the ``kasan.fault=`` boot
-parameter can be used to control panic and reporting behaviour:
-
- ``kasan.fault=report`` or ``=panic`` controls whether to only print a KASAN
-  report or also panic the kernel (default: ``report``). The panic happens even
-  if ``kasan_multi_shot`` is enabled.
-
-Hardware tag-based KASAN mode (see the section about various modes below) is
-intended for use in production as a security mitigation. Therefore, it supports
-additional boot parameters that allow disabling KASAN or controlling features:
-
- ``kasan=off`` or ``=on`` controls whether KASAN is enabled (default: ``on``).
-
- ``kasan.mode=sync`` or ``=async`` controls whether KASAN is configured in
-  synchronous or asynchronous mode of execution (default: ``sync``).
-  Synchronous mode: a bad access is detected immediately when a tag
-  check fault occurs.
-  Asynchronous mode: a bad access detection is delayed. When a tag check
-  fault occurs, the information is stored in hardware (in the TFSR_EL1
-  register for arm64). The kernel periodically checks the hardware and
-  only reports tag faults during these checks.
-
- ``kasan.stacktrace=off`` or ``=on`` disables or enables alloc and free stack
-  traces collection (default: ``on``).
-
 Implementation details
 ----------------------

@@ -244,49 +309,46 @@ outline-instrumented kernel.
 Generic KASAN is the only mode that delays the reuse of freed objects via
 quarantine (see mm/kasan/quarantine.c for implementation).

-Software tag-based KASAN
+Software Tag-Based KASAN
 ~~~~~~~~~~~~~~~~~~~~~~~~

-Software tag-based KASAN uses a software memory tagging approach to checking
+Software Tag-Based KASAN uses a software memory tagging approach to checking
 access validity. It is currently only implemented for the arm64 architecture.

-Software tag-based KASAN uses the Top Byte Ignore (TBI) feature of arm64 CPUs
+Software Tag-Based KASAN uses the Top Byte Ignore (TBI) feature of arm64 CPUs
 to store a pointer tag in the top byte of kernel pointers. It uses shadow memory
 to store memory tags associated with each 16-byte memory cell (therefore, it
 dedicates 1/16th of the kernel memory for shadow memory).

-On each memory allocation, software tag-based KASAN generates a random tag, tags
+On each memory allocation, Software Tag-Based KASAN generates a random tag, tags
 the allocated memory with this tag, and embeds the same tag into the returned
 pointer.

-Software tag-based KASAN uses compile-time instrumentation to insert checks
+Software Tag-Based KASAN uses compile-time instrumentation to insert checks
 before each memory access. These checks make sure that the tag of the memory
 that is being accessed is equal to the tag of the pointer that is used to access
-this memory. In case of a tag mismatch, software tag-based KASAN prints a bug
+this memory. In case of a tag mismatch, Software Tag-Based KASAN prints a bug
 report.

-Software tag-based KASAN also has two instrumentation modes (outline, which
+Software Tag-Based KASAN also has two instrumentation modes (outline, which
 emits callbacks to check memory accesses; and inline, which performs the shadow
 memory checks inline). With outline instrumentation mode, a bug report is
 printed from the function that performs the access check. With inline
 instrumentation, a ``brk`` instruction is emitted by the compiler, and a
 dedicated ``brk`` handler is used to print bug reports.

-Software tag-based KASAN uses 0xFF as a match-all pointer tag (accesses through
+Software Tag-Based KASAN uses 0xFF as a match-all pointer tag (accesses through
 pointers with the 0xFF pointer tag are not checked). The value 0xFE is currently
 reserved to tag freed memory regions.

-Software tag-based KASAN currently only supports tagging of slab and page_alloc
-memory.
-
-Hardware tag-based KASAN
+Hardware Tag-Based KASAN
 ~~~~~~~~~~~~~~~~~~~~~~~~

-Hardware tag-based KASAN is similar to the software mode in concept but uses
+Hardware Tag-Based KASAN is similar to the software mode in concept but uses
 hardware memory tagging support instead of compiler instrumentation and
 shadow memory.

-Hardware tag-based KASAN is currently only implemented for arm64 architecture
+Hardware Tag-Based KASAN is currently only implemented for arm64 architecture
 and based on both arm64 Memory Tagging Extension (MTE) introduced in ARMv8.5
 Instruction Set Architecture and Top Byte Ignore (TBI).

@@ -296,26 +358,25 @@ access, hardware makes sure that the tag of the memory that is being accessed is
 equal to the tag of the pointer that is used to access this memory. In case of a
 tag mismatch, a fault is generated, and a report is printed.

-Hardware tag-based KASAN uses 0xFF as a match-all pointer tag (accesses through
+Hardware Tag-Based KASAN uses 0xFF as a match-all pointer tag (accesses through
 pointers with the 0xFF pointer tag are not checked). The value 0xFE is currently
 reserved to tag freed memory regions.

-Hardware tag-based KASAN currently only supports tagging of slab and page_alloc
-memory.
-
-If the hardware does not support MTE (pre ARMv8.5), hardware tag-based KASAN
+If the hardware does not support MTE (pre ARMv8.5), Hardware Tag-Based KASAN
 will not be enabled. In this case, all KASAN boot parameters are ignored.

 Note that enabling CONFIG_KASAN_HW_TAGS always results in in-kernel TBI being
 enabled. Even when ``kasan.mode=off`` is provided or when the hardware does not
 support MTE (but supports TBI).

-Hardware tag-based KASAN only reports the first found bug. After that, MTE tag
+Hardware Tag-Based KASAN only reports the first found bug. After that, MTE tag
 checking gets disabled.

 Shadow memory
 -------------

+The contents of this section are only applicable to software KASAN modes.
+
 The kernel maps memory in several different parts of the address space.
 The range of kernel virtual addresses is large: there is not enough real
 memory to support a real shadow region for every address that could be
@@ -346,7 +407,7 @@ CONFIG_KASAN_VMALLOC

 With ``CONFIG_KASAN_VMALLOC``, KASAN can cover vmalloc space at the
 cost of greater memory usage. Currently, this is supported on x86,
-riscv, s390, and powerpc.
+arm64, riscv, s390, and powerpc.

 This works by hooking into vmalloc and vmap and dynamically
 allocating real shadow memory to back the mappings.
@@ -406,19 +467,18 @@ generic ``noinstr`` one.
 Note that disabling compiler instrumentation (either on a per-file or a
 per-function basis) makes KASAN ignore the accesses that happen directly in
 that code for software KASAN modes. It does not help when the accesses happen
-indirectly (through calls to instrumented functions) or with the hardware
-tag-based mode that does not use compiler instrumentation.
+indirectly (through calls to instrumented functions) or with Hardware
+Tag-Based KASAN, which does not use compiler instrumentation.

 For software KASAN modes, to disable KASAN reports in a part of the kernel code
 for the current task, annotate this part of the code with a
 ``kasan_disable_current()``/``kasan_enable_current()`` section. This also
 disables the reports for indirect accesses that happen through function calls.

-For tag-based KASAN modes (include the hardware one), to disable access
-checking, use ``kasan_reset_tag()`` or ``page_kasan_tag_reset()``. Note that
-temporarily disabling access checking via ``page_kasan_tag_reset()`` requires
-saving and restoring the per-page KASAN tag via
-``page_kasan_tag``/``page_kasan_tag_set``.
+For tag-based KASAN modes, to disable access checking, use
+``kasan_reset_tag()`` or ``page_kasan_tag_reset()``. Note that temporarily
+disabling access checking via ``page_kasan_tag_reset()`` requires saving and
+restoring the per-page KASAN tag via ``page_kasan_tag``/``page_kasan_tag_set``.

 Tests
 ~~~~~
--- a/Documentation/device-mapper/dm-bow.txt
+++ b/Documentation/device-mapper/dm-bow.txt
@@ -0,0 +1,99 @@
+dm_bow (backup on write)
+========================
+
+dm_bow is a device mapper driver that uses the free space on a device to back up
+data that is overwritten. The changes can then be committed by a simple state
+change, or rolled back by removing the dm_bow device and running a command line
+utility over the underlying device.
+
+dm_bow has three states, set by writing ‘1’ or ‘2’ to /sys/block/dm-?/bow/state.
+It is only possible to go from state 0 (initial state) to state 1, and then from
+state 1 to state 2.
+
+State 0: dm_bow collects all trims to the device and assumes that these mark
+free space on the overlying file system that can be safely used. Typically the
+mount code would create the dm_bow device, mount the file system, call the
+FITRIM ioctl on the file system then switch to state 1. These trims are not
+propagated to the underlying device.
+
+State 1: All writes to the device cause the underlying data to be backed up to
+the free (trimmed) area as needed in such a way as they can be restored.
+However, the writes, with one exception, then happen exactly as they would
+without dm_bow, so the device is always in a good final state. The exception is
+that sector 0 is used to keep a log of the latest changes, both to indicate that
+we are in this state and to allow rollback. See below for all details. If there
+isn't enough free space, writes are failed with -ENOSPC.
+
+State 2: The transition to state 2 triggers replacing the special sector 0 with
+the normal sector 0, and the freeing of all state information. dm_bow then
+becomes a pass-through driver, allowing the device to continue to be used with
+minimal performance impact.
+
+Usage
+=====
+dm-bow takes one command line parameter, the name of the underlying device.
+
+dm-bow will typically be used in the following way. dm-bow will be loaded with a
+suitable underlying device and the resultant device will be mounted. A file
+system trim will be issued via the FITRIM ioctl, then the device will be
+switched to state 1. The file system will now be used as normal. At some point,
+the changes can either be committed by switching to state 2, or rolled back by
+unmounting the file system, removing the dm-bow device and running the command
+line utility. Note that rebooting the device will be equivalent to unmounting
+and removing, but the command line utility must still be run
+
+Details of operation in state 1
+===============================
+
+dm_bow maintains a type for all sectors. A sector can be any of:
+
+SECTOR0
+SECTOR0_CURRENT
+UNCHANGED
+FREE
+CHANGED
+BACKUP
+
+SECTOR0 is the first sector on the device, and is used to hold the log of
+changes. This is the one exception.
+
+SECTOR0_CURRENT is a sector picked from the FREE sectors, and is where reads and
+writes from the true sector zero are redirected to. Note that like any backup
+sector, if the sector is written to directly, it must be moved again.
+
+UNCHANGED means that the sector has not been changed since we entered state 1.
+Thus if it is written to or trimmed, the contents must first be backed up.
+
+FREE means that the sector was trimmed in state 0 and has not yet been written
+to or used for backup. On being written to, a FREE sector is changed to CHANGED.
+
+CHANGED means that the sector has been modified, and can be further modified
+without further backup.
+
+BACKUP means that this is a free sector being used as a backup. On being written
+to, the contents must first be backed up again.
+
+All backup operations are logged to the first sector. The log sector has the
+format:
+--------------------------------------------------------
+| Magic | Count | Sequence | Log entry | Log entry | …
+--------------------------------------------------------
+
+Magic is a magic number. Count is the number of log entries. Sequence is 0
+initially. A log entry is
+
+-----------------------------------
+| Source | Dest | Size | Checksum |
+-----------------------------------
+
+When SECTOR0 is full, the log sector is backed up and another empty log sector
+created with sequence number one higher. The first entry in any log entry with
+sequence > 0 therefore must be the log of the backing up of the previous log
+sector. Note that sequence is not strictly needed, but is a useful sanity check
+and potentially limits the time spent trying to restore a corrupted snapshot.
+
+On entering state 1, dm_bow has a list of free sectors. All other sectors are
+unchanged. Sector0_current is selected from the free sectors and the contents of
+sector 0 are copied there. The sector 0 is backed up, which triggers the first
+log entry to be written.
+
--- a/Documentation/devicetree/bindings/misc/qemu,vcpu-stall-detector.yaml
+++ b/Documentation/devicetree/bindings/misc/qemu,vcpu-stall-detector.yaml
@@ -0,0 +1,51 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/misc/qemu,vcpu-stall-detector.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: VCPU stall detector
+
+description:
+  This binding describes a CPU stall detector mechanism for virtual CPUs
+  which is accessed through MMIO.
+
+maintainers:
+  - Sebastian Ene <sebastianene@google.com>
+
+properties:
+  compatible:
+    enum:
+      - qemu,vcpu-stall-detector
+
+  reg:
+    maxItems: 1
+
+  clock-frequency:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description: |
+      The internal clock of the stall detector peripheral measure in Hz used
+      to decrement its internal counter register on each tick.
+      Defaults to 10 if unset.
+    default: 10
+
+  timeout-sec:
+    description: |
+      The stall detector expiration timeout measured in seconds.
+      Defaults to 8 if unset. Please note that it also takes into account the
+      time spent while the VCPU is not running.
+    default: 8
+
+required:
+  - compatible
+
+additionalProperties: false
+
+examples:
+  - |
+    vmwdt@9030000 {
+      compatible = "qemu,vcpu-stall-detector";
+      reg = <0x9030000 0x10000>;
+      clock-frequency = <10>;
+      timeout-sec = <8>;
+    };
--- a/Documentation/devicetree/bindings/mmc/mtk-sd.yaml
+++ b/Documentation/devicetree/bindings/mmc/mtk-sd.yaml
@@ -119,6 +119,18 @@ properties:
      If present, HS400 command responses are sampled on rising edges.
      If not present, HS400 command responses are sampled on falling edges.

+  mediatek,hs400-ds-dly3:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      Gear of the third delay line for DS for input data latch in data
+      pad macro, there are 32 stages from 0 to 31.
+      For different corner IC, the time is different about one step, it is
+      about 100ps.
+      The value is confirmed by doing scan and calibration to find a best
+      value with corner IC and it is valid only for HS400 mode.
+    minimum: 0
+    maximum: 31
+
  mediatek,latch-ck:
    $ref: /schemas/types.yaml#/definitions/uint32
    description:
--- a/Documentation/devicetree/bindings/perf/arm,cmn.yaml
+++ b/Documentation/devicetree/bindings/perf/arm,cmn.yaml
@@ -12,12 +12,14 @@ maintainers:

 properties:
  compatible:
-    const: arm,cmn-600
+    enum:
+      - arm,cmn-600
+      - arm,ci-700

  reg:
    items:
      - description: Physical address of the base (PERIPHBASE) and
-          size (up to 64MB) of the configuration address space.
+          size of the configuration address space.

  interrupts:
    minItems: 1
@@ -31,14 +33,23 @@ properties:

  arm,root-node:
    $ref: /schemas/types.yaml#/definitions/uint32
-    description: Offset from PERIPHBASE of the configuration
-      discovery node (see TRM definition of ROOTNODEBASE).
+    description: Offset from PERIPHBASE of CMN-600's configuration
+      discovery node (see TRM definition of ROOTNODEBASE). Not
+      relevant for newer CMN/CI products.

 required:
  - compatible
  - reg
  - interrupts
-  - arm,root-node
+
+if:
+  properties:
+    compatible:
+      contains:
+        const: arm,cmn-600
+then:
+  required:
+    - arm,root-node

 additionalProperties: false

--- a/Documentation/devicetree/bindings/reserved-memory/google,open-dice.yaml
+++ b/Documentation/devicetree/bindings/reserved-memory/google,open-dice.yaml
@@ -0,0 +1,46 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/reserved-memory/google,open-dice.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Open Profile for DICE Device Tree Bindings
+
+description: |
+  This binding represents a reserved memory region containing data
+  generated by the Open Profile for DICE protocol.
+
+  See https://pigweed.googlesource.com/open-dice/
+
+maintainers:
+  - David Brazdil <dbrazdil@google.com>
+
+allOf:
+  - $ref: "reserved-memory.yaml"
+
+properties:
+  compatible:
+    const: google,open-dice
+
+  reg:
+    description: page-aligned region of memory containing DICE data
+
+required:
+  - compatible
+  - reg
+  - no-map
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    reserved-memory {
+        #address-cells = <2>;
+        #size-cells = <1>;
+
+        dice: dice@12340000 {
+            compatible = "google,open-dice";
+            reg = <0x00 0x12340000 0x2000>;
+            no-map;
+        };
+    };
--- a/Documentation/filesystems/OWNERS
+++ b/Documentation/filesystems/OWNERS
@@ -0,0 +1 @@
+per-file f2fs**=file:/fs/f2fs/OWNERS
--- a/Documentation/filesystems/erofs.rst
+++ b/Documentation/filesystems/erofs.rst
@@ -19,9 +19,10 @@ It is designed as a better filesystem solution for the following scenarios:
   immutable and bit-for-bit identical to the official golden image for
   their releases due to security and other considerations and

- - hope to save some extra storage space with guaranteed end-to-end performance
-   by using reduced metadata and transparent file compression, especially
-   for those embedded devices with limited memory (ex, smartphone);
+ - hope to minimize extra storage space with guaranteed end-to-end performance
+   by using compact layout, transparent file compression and direct access,
+   especially for those embedded devices with limited memory and high-density
+   hosts with numerous containers;

 Here is the main features of EROFS:

@@ -51,7 +52,9 @@ Here is the main features of EROFS:
 - Support POSIX.1e ACLs by using xattrs;

 - Support transparent data compression as an option:
-   LZ4 algorithm with the fixed-sized output compression for high performance.
+   LZ4 algorithm with the fixed-sized output compression for high performance;
+
+ - Multiple device support for multi-layer container images.

 The following git tree provides the file system user-space tools under
 development (ex, formatting tool mkfs.erofs):
@@ -87,8 +90,17 @@ cache_strategy=%s      Select a strategy for cached decompression from now on:
 dax={always,never}     Use direct access (no page cache).  See
                       Documentation/filesystems/dax.rst.
 dax                    A legacy option which is an alias for ``dax=always``.
+device=%s              Specify a path to an extra device to be used together.
 ===================    =========================================================

+Sysfs Entries
+=============
+
+Information about mounted erofs file systems can be found in /sys/fs/erofs.
+Each mounted filesystem will have a directory in /sys/fs/erofs based on its
+device name (i.e., /sys/fs/erofs/sda).
+(see also Documentation/ABI/testing/sysfs-fs-erofs)
+
 On-disk details
 ===============

--- a/Documentation/filesystems/f2fs.rst
+++ b/Documentation/filesystems/f2fs.rst
@@ -25,10 +25,14 @@ a consistency checking tool (fsck.f2fs), and a debugging tool (dump.f2fs).

 - git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs-tools.git

-For reporting bugs and sending patches, please use the following mailing list:
+For sending patches, please use the following mailing list:

 - linux-f2fs-devel@lists.sourceforge.net

+For reporting bugs, please use the following f2fs bug tracker link:
+
+- https://bugzilla.kernel.org/enter_bug.cgi?product=File%20System&component=f2fs
+
 Background and Design issues
 ============================

@@ -154,6 +158,8 @@ nobarrier		 This option can be used if underlying storage guarantees
 			 If this option is set, no cache_flush commands are issued
 			 but f2fs still guarantees the write ordering of all the
 			 data writes.
+barrier		 If this option is set, cache_flush commands are allowed to be
+			 issued.
 fastboot		 This option is used when a system wants to reduce mount
 			 time as much as possible, even though normal performance
 			 can be sacrificed.
@@ -198,10 +204,30 @@ fault_type=%d		 Support configuring fault injection type, should be
 			 FAULT_WRITE_IO		  0x000004000
 			 FAULT_SLAB_ALLOC	  0x000008000
 			 FAULT_DQUOT_INIT	  0x000010000
+			 FAULT_LOCK_OP		  0x000020000
+			 FAULT_BLKADDR		  0x000040000
 			 ===================	  ===========
 mode=%s			 Control block allocation mode which supports "adaptive"
 			 and "lfs". In "lfs" mode, there should be no random
 			 writes towards main area.
+			 "fragment:segment" and "fragment:block" are newly added here.
+			 These are developer options for experiments to simulate filesystem
+			 fragmentation/after-GC situation itself. The developers use these
+			 modes to understand filesystem fragmentation/after-GC condition well,
+			 and eventually get some insights to handle them better.
+			 In "fragment:segment", f2fs allocates a new segment in ramdom
+			 position. With this, we can simulate the after-GC condition.
+			 In "fragment:block", we can scatter block allocation with
+			 "max_fragment_chunk" and "max_fragment_hole" sysfs nodes.
+			 We added some randomness to both chunk and hole size to make
+			 it close to realistic IO pattern. So, in this mode, f2fs will allocate
+			 1..<max_fragment_chunk> blocks in a chunk and make a hole in the
+			 length of 1..<max_fragment_hole> by turns. With this, the newly
+			 allocated blocks will be scattered throughout the whole partition.
+			 Note that "fragment:block" implicitly enables "fragment:segment"
+			 option for more randomness.
+			 Please, use these options for your experiments and we strongly
+			 recommend to re-format the filesystem after using these options.
 io_bits=%u		 Set the bit size of write IO requests. It should be set
 			 with "mode=lfs".
 usrquota		 Enable plain user disk quota accounting.
@@ -216,12 +242,6 @@ offgrpjquota		 Turn off group journalled quota.
 offprjjquota		 Turn off project journalled quota.
 quota			 Enable plain user disk quota accounting.
 noquota			 Disable all plain disk quota option.
-whint_mode=%s		 Control which write hints are passed down to block
-			 layer. This supports "off", "user-based", and
-			 "fs-based".  In "off" mode (default), f2fs does not pass
-			 down hints. In "user-based" mode, f2fs tries to pass
-			 down hints given by users. And in "fs-based" mode, f2fs
-			 passes down hints with its policy.
 alloc_mode=%s		 Adjust block allocation policy, which supports "reuse"
 			 and "default".
 fsync_mode=%s		 Control the policy of fsync. Currently supports "posix",
@@ -323,6 +343,15 @@ discard_unit=%s		 Control discard unit, the argument can be "block", "segment"
 			 default, it is helpful for large sized SMR or ZNS devices to
 			 reduce memory cost by getting rid of fs metadata supports small
 			 discard.
+memory=%s		 Control memory mode. This supports "normal" and "low" modes.
+			 "low" mode is introduced to support low memory devices.
+			 Because of the nature of low memory devices, in this mode, f2fs
+			 will try to save memory sometimes by sacrificing performance.
+			 "normal" mode is the default mode and same as before.
+age_extent_cache	 Enable an age extent cache based on rb-tree. It records
+			 data block update frequency of the extent per inode, in
+			 order to provide better temperature hints for data block
+			 allocation.
 ======================== ============================================================

 Debugfs Entries
@@ -732,70 +761,6 @@ In order to identify whether the data in the victim segment are valid or not,
 F2FS manages a bitmap. Each bit represents the validity of a block, and the
 bitmap is composed of a bit stream covering whole blocks in main area.

-Write-hint Policy
-----------------
-
-1) whint_mode=off. F2FS only passes down WRITE_LIFE_NOT_SET.
-
-2) whint_mode=user-based. F2FS tries to pass down hints given by
-users.
-
-===================== ======================== ===================
-User                  F2FS                     Block
-===================== ======================== ===================
-N/A                   META                     WRITE_LIFE_NOT_SET
-N/A                   HOT_NODE                 "
-N/A                   WARM_NODE                "
-N/A                   COLD_NODE                "
-ioctl(COLD)           COLD_DATA                WRITE_LIFE_EXTREME
-extension list        "                        "
-
-- buffered io
-WRITE_LIFE_EXTREME    COLD_DATA                WRITE_LIFE_EXTREME
-WRITE_LIFE_SHORT      HOT_DATA                 WRITE_LIFE_SHORT
-WRITE_LIFE_NOT_SET    WARM_DATA                WRITE_LIFE_NOT_SET
-WRITE_LIFE_NONE       "                        "
-WRITE_LIFE_MEDIUM     "                        "
-WRITE_LIFE_LONG       "                        "
-
-- direct io
-WRITE_LIFE_EXTREME    COLD_DATA                WRITE_LIFE_EXTREME
-WRITE_LIFE_SHORT      HOT_DATA                 WRITE_LIFE_SHORT
-WRITE_LIFE_NOT_SET    WARM_DATA                WRITE_LIFE_NOT_SET
-WRITE_LIFE_NONE       "                        WRITE_LIFE_NONE
-WRITE_LIFE_MEDIUM     "                        WRITE_LIFE_MEDIUM
-WRITE_LIFE_LONG       "                        WRITE_LIFE_LONG
-===================== ======================== ===================
-
-3) whint_mode=fs-based. F2FS passes down hints with its policy.
-
-===================== ======================== ===================
-User                  F2FS                     Block
-===================== ======================== ===================
-N/A                   META                     WRITE_LIFE_MEDIUM;
-N/A                   HOT_NODE                 WRITE_LIFE_NOT_SET
-N/A                   WARM_NODE                "
-N/A                   COLD_NODE                WRITE_LIFE_NONE
-ioctl(COLD)           COLD_DATA                WRITE_LIFE_EXTREME
-extension list        "                        "
-
-- buffered io
-WRITE_LIFE_EXTREME    COLD_DATA                WRITE_LIFE_EXTREME
-WRITE_LIFE_SHORT      HOT_DATA                 WRITE_LIFE_SHORT
-WRITE_LIFE_NOT_SET    WARM_DATA                WRITE_LIFE_LONG
-WRITE_LIFE_NONE       "                        "
-WRITE_LIFE_MEDIUM     "                        "
-WRITE_LIFE_LONG       "                        "
-
-- direct io
-WRITE_LIFE_EXTREME    COLD_DATA                WRITE_LIFE_EXTREME
-WRITE_LIFE_SHORT      HOT_DATA                 WRITE_LIFE_SHORT
-WRITE_LIFE_NOT_SET    WARM_DATA                WRITE_LIFE_NOT_SET
-WRITE_LIFE_NONE       "                        WRITE_LIFE_NONE
-WRITE_LIFE_MEDIUM     "                        WRITE_LIFE_MEDIUM
-WRITE_LIFE_LONG       "                        WRITE_LIFE_LONG
-===================== ======================== ===================
-
 Fallocate(2) Policy
 -------------------

--- a/Documentation/filesystems/fscrypt.rst
+++ b/Documentation/filesystems/fscrypt.rst
@@ -77,11 +77,11 @@ Side-channel attacks

 fscrypt is only resistant to side-channel attacks, such as timing or
 electromagnetic attacks, to the extent that the underlying Linux
-Cryptographic API algorithms are.  If a vulnerable algorithm is used,
-such as a table-based implementation of AES, it may be possible for an
-attacker to mount a side channel attack against the online system.
-Side channel attacks may also be mounted against applications
-consuming decrypted data.
+Cryptographic API algorithms or inline encryption hardware are.  If a
+vulnerable algorithm is used, such as a table-based implementation of
+AES, it may be possible for an attacker to mount a side channel attack
+against the online system.  Side channel attacks may also be mounted
+against applications consuming decrypted data.

 Unauthorized file access
 ~~~~~~~~~~~~~~~~~~~~~~~~
@@ -337,6 +337,8 @@ Currently, the following pairs of encryption modes are supported:
 - AES-256-XTS for contents and AES-256-CTS-CBC for filenames
 - AES-128-CBC for contents and AES-128-CTS-CBC for filenames
 - Adiantum for both contents and filenames
+- AES-256-XTS for contents and AES-256-HCTR2 for filenames (v2 policies only)
+- SM4-XTS for contents and SM4-CTS-CBC for filenames (v2 policies only)

 If unsure, you should use the (AES-256-XTS, AES-256-CTS-CBC) pair.

@@ -357,6 +359,23 @@ To use Adiantum, CONFIG_CRYPTO_ADIANTUM must be enabled.  Also, fast
 implementations of ChaCha and NHPoly1305 should be enabled, e.g.
 CONFIG_CRYPTO_CHACHA20_NEON and CONFIG_CRYPTO_NHPOLY1305_NEON for ARM.

+AES-256-HCTR2 is another true wide-block encryption mode that is intended for
+use on CPUs with dedicated crypto instructions.  AES-256-HCTR2 has the property
+that a bitflip in the plaintext changes the entire ciphertext.  This property
+makes it desirable for filename encryption since initialization vectors are
+reused within a directory.  For more details on AES-256-HCTR2, see the paper
+"Length-preserving encryption with HCTR2"
+(https://eprint.iacr.org/2021/1441.pdf).  To use AES-256-HCTR2,
+CONFIG_CRYPTO_HCTR2 must be enabled.  Also, fast implementations of XCTR and
+POLYVAL should be enabled, e.g. CRYPTO_POLYVAL_ARM64_CE and
+CRYPTO_AES_ARM64_CE_BLK for ARM64.
+
+SM4 is a Chinese block cipher that is an alternative to AES.  It has
+not seen as much security review as AES, and it only has a 128-bit key
+size.  It may be useful in cases where its use is mandated.
+Otherwise, it should not be used.  For SM4 support to be available, it
+also needs to be enabled in the kernel crypto API.
+
 New encryption modes can be added relatively easily, without changes
 to individual filesystems.  However, authenticated encryption (AE)
 modes are not currently supported because of the difficulty of dealing
@@ -404,11 +423,11 @@ alternatively has the file's nonce (for `DIRECT_KEY policies`_) or
 inode number (for `IV_INO_LBLK_64 policies`_) included in the IVs.
 Thus, IV reuse is limited to within a single directory.

-With CTS-CBC, the IV reuse means that when the plaintext filenames
-share a common prefix at least as long as the cipher block size (16
-bytes for AES), the corresponding encrypted filenames will also share
-a common prefix.  This is undesirable.  Adiantum does not have this
-weakness, as it is a wide-block encryption mode.
+With CTS-CBC, the IV reuse means that when the plaintext filenames share a
+common prefix at least as long as the cipher block size (16 bytes for AES), the
+corresponding encrypted filenames will also share a common prefix.  This is
+undesirable.  Adiantum and HCTR2 do not have this weakness, as they are
+wide-block encryption modes.

 All supported filenames encryption modes accept any plaintext length
 >= 16 bytes; cipher block alignment is not required.  However,
@@ -1047,8 +1066,8 @@ astute users may notice some differences in behavior:
  may be used to overwrite the source files but isn't guaranteed to be
  effective on all filesystems and storage devices.

- Direct I/O is not supported on encrypted files.  Attempts to use
-  direct I/O on such files will fall back to buffered I/O.
+- Direct I/O is supported on encrypted files only under some
+  circumstances.  For details, see `Direct I/O support`_.

 - The fallocate operations FALLOC_FL_COLLAPSE_RANGE and
  FALLOC_FL_INSERT_RANGE are not supported on encrypted files and will
@@ -1135,6 +1154,71 @@ where applications may later write sensitive data.  It is recommended
 that systems implementing a form of "verified boot" take advantage of
 this by validating all top-level encryption policies prior to access.

+Inline encryption support
+=========================
+
+By default, fscrypt uses the kernel crypto API for all cryptographic
+operations (other than HKDF, which fscrypt partially implements
+itself).  The kernel crypto API supports hardware crypto accelerators,
+but only ones that work in the traditional way where all inputs and
+outputs (e.g. plaintexts and ciphertexts) are in memory.  fscrypt can
+take advantage of such hardware, but the traditional acceleration
+model isn't particularly efficient and fscrypt hasn't been optimized
+for it.
+
+Instead, many newer systems (especially mobile SoCs) have *inline
+encryption hardware* that can encrypt/decrypt data while it is on its
+way to/from the storage device.  Linux supports inline encryption
+through a set of extensions to the block layer called *blk-crypto*.
+blk-crypto allows filesystems to attach encryption contexts to bios
+(I/O requests) to specify how the data will be encrypted or decrypted
+in-line.  For more information about blk-crypto, see
+:ref:`Documentation/block/inline-encryption.rst <inline_encryption>`.
+
+On supported filesystems (currently ext4 and f2fs), fscrypt can use
+blk-crypto instead of the kernel crypto API to encrypt/decrypt file
+contents.  To enable this, set CONFIG_FS_ENCRYPTION_INLINE_CRYPT=y in
+the kernel configuration, and specify the "inlinecrypt" mount option
+when mounting the filesystem.
+
+Note that the "inlinecrypt" mount option just specifies to use inline
+encryption when possible; it doesn't force its use.  fscrypt will
+still fall back to using the kernel crypto API on files where the
+inline encryption hardware doesn't have the needed crypto capabilities
+(e.g. support for the needed encryption algorithm and data unit size)
+and where blk-crypto-fallback is unusable.  (For blk-crypto-fallback
+to be usable, it must be enabled in the kernel configuration with
+CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK=y.)
+
+Currently fscrypt always uses the filesystem block size (which is
+usually 4096 bytes) as the data unit size.  Therefore, it can only use
+inline encryption hardware that supports that data unit size.
+
+Inline encryption doesn't affect the ciphertext or other aspects of
+the on-disk format, so users may freely switch back and forth between
+using "inlinecrypt" and not using "inlinecrypt".
+
+Direct I/O support
+==================
+
+For direct I/O on an encrypted file to work, the following conditions
+must be met (in addition to the conditions for direct I/O on an
+unencrypted file):
+
+* The file must be using inline encryption.  Usually this means that
+  the filesystem must be mounted with ``-o inlinecrypt`` and inline
+  encryption hardware must be present.  However, a software fallback
+  is also available.  For details, see `Inline encryption support`_.
+
+* The I/O request must be fully aligned to the filesystem block size.
+  This means that the file position the I/O is targeting, the lengths
+  of all I/O segments, and the memory addresses of all I/O buffers
+  must be multiples of this value.  Note that the filesystem block
+  size may be greater than the logical block size of the block device.
+
+If either of the above conditions is not met, then direct I/O on the
+encrypted file will fall back to buffered I/O.
+
 Implementation details
 ======================

@@ -1184,6 +1268,13 @@ keys`_ and `DIRECT_KEY policies`_.
 Data path changes
 -----------------

+When inline encryption is used, filesystems just need to associate
+encryption contexts with bios to specify how the block layer or the
+inline encryption hardware will encrypt/decrypt the file contents.
+
+When inline encryption isn't used, filesystems must encrypt/decrypt
+the file contents themselves, as described below:
+
 For the read path (->readpage()) of regular files, filesystems can
 read the ciphertext into the page cache and decrypt it in-place.  The
 page lock must be held until decryption has finished, to prevent the
@@ -1197,18 +1288,6 @@ buffer.  Some filesystems, such as UBIFS, already use temporary
 buffers regardless of encryption.  Other filesystems, such as ext4 and
 F2FS, have to allocate bounce pages specially for encryption.

-Fscrypt is also able to use inline encryption hardware instead of the
-kernel crypto API for en/decryption of file contents.  When possible,
-and if directed to do so (by specifying the 'inlinecrypt' mount option
-for an ext4/F2FS filesystem), it adds encryption contexts to bios and
-uses blk-crypto to perform the en/decryption instead of making use of
-the above read/write path changes.  Of course, even if directed to
-make use of inline encryption, fscrypt will only be able to do so if
-either hardware inline encryption support is available for the
-selected encryption algorithm or CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK
-is selected.  If neither is the case, fscrypt will fall back to using
-the above mentioned read/write path changes for en/decryption.
-
 Filename hashing and encoding
 -----------------------------

--- a/Documentation/filesystems/incfs.rst
+++ b/Documentation/filesystems/incfs.rst
@@ -0,0 +1,85 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=================================================
+incfs: A stacked incremental filesystem for Linux
+=================================================
+
+/sys/fs interface
+=================
+
+Please update Documentation/ABI/testing/sysfs-fs-incfs if you update this
+section.
+
+incfs creates the following files in /sys/fs.
+
+Features
+--------
+
+/sys/fs/incremental-fs/features/corefs
+  Reads 'supported'. Always present.
+
+/sys/fs/incremental-fs/features/v2
+  Reads 'supported'. Present if all v2 features of incfs are supported. These
+  are:
+    fs-verity support
+    inotify support
+    ioclts:
+      INCFS_IOC_SET_READ_TIMEOUTS
+      INCFS_IOC_GET_READ_TIMEOUTS
+      INCFS_IOC_GET_BLOCK_COUNT
+      INCFS_IOC_CREATE_MAPPED_FILE
+    .incomplete folder
+    .blocks_written pseudo file
+    report_uid mount option
+
+/sys/fs/incremental-fs/features/zstd
+  Reads 'supported'. Present if zstd compression is supported for data blocks.
+
+/sys/fs/incremental-fs/features/bugfix_throttling
+  Reads 'supported'. Present if the throttling lock bug is fixed
+
+Optional per mount
+------------------
+
+For each incfs mount, the mount option sysfs_name=[name] creates a /sys/fs
+node called:
+
+/sys/fs/incremental-fs/instances/[name]
+
+This will contain the following files:
+
+/sys/fs/incremental-fs/instances/[name]/reads_delayed_min
+  Returns a count of the number of reads that were delayed as a result of the
+  per UID read timeouts min time setting.
+
+/sys/fs/incremental-fs/instances/[name]/reads_delayed_min_us
+  Returns total delay time for all files since first mount as a result of the
+  per UID read timeouts min time setting.
+
+/sys/fs/incremental-fs/instances/[name]/reads_delayed_pending
+  Returns a count of the number of reads that were delayed as a result of
+  waiting for a pending read.
+
+/sys/fs/incremental-fs/instances/[name]/reads_delayed_pending_us
+  Returns total delay time for all files since first mount as a result of
+  waiting for a pending read.
+
+/sys/fs/incremental-fs/instances/[name]/reads_failed_hash_verification
+  Returns number of reads that failed because of hash verification failures.
+
+/sys/fs/incremental-fs/instances/[name]/reads_failed_other
+  Returns number of reads that failed for reasons other than timing out or
+  hash failures.
+
+/sys/fs/incremental-fs/instances/[name]/reads_failed_timed_out
+  Returns number of reads that timed out.
+
+For reads_delayed_*** settings, note that a file can count for both
+reads_delayed_min and reads_delayed_pending if incfs first waits for a pending
+read then has to wait further for the min time. In that case, the time spent
+waiting is split between reads_delayed_pending_us, which is increased by the
+time spent waiting for the pending read, and reads_delayed_min_us, which is
+increased by the remainder of the time spent waiting.
+
+Reads that timed out are not added to the reads_delayed_pending or the
+reads_delayed_pending_us counters.
--- a/Documentation/filesystems/overlayfs.rst
+++ b/Documentation/filesystems/overlayfs.rst
@@ -195,7 +195,7 @@ handle it in two different ways:

 1. return EXDEV error: this error is returned by rename(2) when trying to
   move a file or directory across filesystem boundaries.  Hence
-   applications are usually prepared to hande this error (mv(1) for example
+   applications are usually prepared to handle this error (mv(1) for example
   recursively copies the directory tree).  This is the default behavior.

 2. If the "redirect_dir" feature is enabled, then the directory will be
@@ -324,6 +324,30 @@ and
 The resulting access permissions should be the same.  The difference is in
 the time of copy (on-demand vs. up-front).

+### Non overlapping credentials
+
+As noted above, all access to the upper, lower and work directories is the
+recorded mounter's MAC and DAC credentials.  The incoming accesses are
+checked against the caller's credentials.
+
+In the case where caller MAC or DAC credentials do not overlap the mounter, a
+use case available in older versions of the driver, the override_creds mount
+flag can be turned off.  For when the use pattern has caller with legitimate
+credentials where the mounter does not.  For example init may have been the
+mounter, but the caller would have execute or read MAC permissions where
+init would not.  override_creds off means all access, incoming, upper, lower
+or working, will be tested against the caller.
+
+Several unintended side effects will occur though.  The caller without certain
+key capabilities or lower privilege will not always be able to delete files or
+directories, create nodes, or search some restricted directories.  The ability
+to search and read a directory entry is spotty as a result of the cache
+mechanism not re-testing the credentials because of the assumption, a
+privileged caller can fill cache, then a lower privilege can read the directory
+cache.  The uneven security model where cache, upperdir and workdir are opened
+at privilege, but accessed without creating a form of privilege escalation,
+should only be used with strict understanding of the side effects and of the
+security policies.

 Multiple lower layers
 ---------------------
--- a/Documentation/filesystems/proc.rst
+++ b/Documentation/filesystems/proc.rst
@@ -426,12 +426,14 @@ with the memory region, as the case would be with BSS (uninitialized data).
 The "pathname" shows the name associated file for this mapping.  If the mapping
 is not associated with a file:

- =======                    ====================================
+ =============              ====================================
 [heap]                     the heap of the program
 [stack]                    the stack of the main process
 [vdso]                     the "virtual dynamic shared object",
                            the kernel system call handler
- =======                    ====================================
+ [anon:<name>]              an anonymous mapping that has been
+                            named by userspace
+ =============              ====================================

 or if empty, the mapping is anonymous.

@@ -971,6 +973,7 @@ You may not have all of these fields.
    SReclaimable:   159856 kB
    SUnreclaim:     124508 kB
    PageTables:      24448 kB
+    SecPageTables:       0 kB
    NFS_Unstable:        0 kB
    Bounce:              0 kB
    WritebackTmp:        0 kB
@@ -1065,6 +1068,9 @@ SUnreclaim
 PageTables
              amount of memory dedicated to the lowest level of page
              tables.
+SecPageTables
+              Memory consumed by secondary page tables, this currently
+              currently includes KVM mmu allocations on x86 and arm64.
 NFS_Unstable
              Always zero. Previous counted pages which had been written to
              the server, but has not been committed to stable storage.
--- a/Documentation/kbuild/kbuild.rst
+++ b/Documentation/kbuild/kbuild.rst
@@ -77,6 +77,17 @@ HOSTLDLIBS
 ----------
 Additional libraries to link against when building host programs.

+.. _userkbuildflags:
+
+USERCFLAGS
+----------
+Additional options used for $(CC) when compiling userprogs.
+
+USERLDFLAGS
+-----------
+Additional options used for $(LD) when linking userprogs. userprogs are linked
+with CC, so $(USERLDFLAGS) should include "-Wl," prefix as applicable.
+
 KBUILD_KCONFIG
 --------------
 Set the top-level Kconfig file to the value of this environment
--- a/Documentation/kbuild/makefiles.rst
+++ b/Documentation/kbuild/makefiles.rst
@@ -982,6 +982,8 @@ The syntax is quite similar. The difference is to use "userprogs" instead of

 	When linking bpfilter_umh, it will be passed the extra option -static.

+	From command line, :ref:`USERCFLAGS and USERLDFLAGS <userkbuildflags>` will also be used.
+
 5.4 When userspace programs are actually built
 ----------------------------------------------

--- a/Documentation/kbuild/modules.rst
+++ b/Documentation/kbuild/modules.rst
@@ -21,6 +21,7 @@ This document describes how to build an out-of-tree kernel module.
 	   --- 4.1 Kernel Includes
 	   --- 4.2 Single Subdirectory
 	   --- 4.3 Several Subdirectories
+	   --- 4.4 UAPI Headers Installation
 	=== 5. Module Installation
 	   --- 5.1 INSTALL_MOD_PATH
 	   --- 5.2 INSTALL_MOD_DIR
@@ -131,6 +132,10 @@ executed to make module versioning work.
 		/lib/modules/<kernel_release>/extra/, but a prefix may
 		be added with INSTALL_MOD_PATH (discussed in section 5).

+	headers_install
+		Export headers in a format suitable for userspace. The default
+		location is $PWD/usr. INSTALL_HDR_PATH can change this path.
+
 	clean
 		Remove all generated files in the module directory only.

@@ -406,6 +411,17 @@ according to the following rule:
 	pointing to the directory where the currently executing kbuild
 	file is located.

+4.4 UAPI Headers Installation
+-----------------------------
+
+	External modules may export headers to userspace in a similar
+	fashion to the in-tree counterpart drivers. kbuild supports
+	running headers_install target in an out-of-tree. The location
+	where kbuild searches for headers is $(M)/include/uapi and
+	$(M)/arch/$(SRCARCH)/include/uapi.
+
+	See also Documentation/kbuild/headers_install.rst.
+

 5. Module Installation
 ======================
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -1133,6 +1133,19 @@ ip_local_reserved_ports - list of comma separated ranges

 	Default: Empty

+ip_local_unbindable_ports - list of comma separated ranges
+	Specify the ports which are not directly bind()able.
+
+	Usually you would use this to block the use of ports which
+	are invalid due to something outside of the control of the
+	kernel.  For example a port stolen by the nic for serial
+	console, remote power management or debugging.
+
+	There's a relatively high chance you will also want to list
+	these ports in 'ip_local_reserved_ports' to prevent autobinding.
+
+	Default: Empty
+
 ip_unprivileged_port_start - INTEGER
 	This is a per-namespace sysctl.  It defines the first
 	unprivileged port in the network namespace.  Privileged ports
--- a/Documentation/scheduler/sched-energy.rst
+++ b/Documentation/scheduler/sched-energy.rst
@@ -402,7 +402,7 @@ Consequently, the only sane governor to use together with EAS is schedutil,
 because it is the only one providing some degree of consistency between
 frequency requests and energy predictions.

-Using EAS with any other governor than schedutil is not supported.
+Using EAS with any other governor than schedutil is not recommended.


 6.5 Scale-invariant utilization signals
--- a/Documentation/scsi/scsi_eh.rst
+++ b/Documentation/scsi/scsi_eh.rst
@@ -93,16 +93,19 @@ function
 1. invokes optional hostt->eh_timed_out() callback.  Return value can
    be one of

-    - BLK_EH_RESET_TIMER
+    - SCSI_EH_RESET_TIMER
 	This indicates that more time is required to finish the
 	command.  Timer is restarted.  This action is counted as a
 	retry and only allowed scmd->allowed + 1(!) times.  Once the
 	limit is reached, action for BLK_EH_DONE is taken instead.

-    - BLK_EH_DONE
+    - SCSI_EH_NOT_HANDLED
        eh_timed_out() callback did not handle the command.
 	Step #2 is taken.

+    - SCSI_EH_DONE
+        eh_timed_out() completed the command.
+
 2. scsi_abort_command() is invoked to schedule an asynchrous abort.
    Asynchronous abort are not invoked for commands which the
    SCSI_EH_ABORT_SCHEDULED flag is set (this indicates that the command
--- a/Documentation/scsi/ufs.rst
+++ b/Documentation/scsi/ufs.rst
@@ -17,6 +17,8 @@ Universal Flash Storage
     3.2 UTP Transfer requests
     3.3 UFS error handling
     3.4 SCSI Error handling
+   4. BSG Support
+   5. UFS Reference Clock Frequency configuration


 1. Overview
@@ -193,3 +195,16 @@ UFS Specifications can be found at:

 - UFS - http://www.jedec.org/sites/default/files/docs/JESD220.pdf
 - UFSHCI - http://www.jedec.org/sites/default/files/docs/JESD223.pdf
+
+5. UFS Reference Clock Frequency configuration
+==============================================
+
+Devicetree can define a clock named "ref_clk" under the UFS controller node
+to specify the intended reference clock frequency for the UFS storage
+parts. ACPI-based system can specify the frequency using ACPI
+Device-Specific Data property named "ref-clk-freq". In both ways the value
+is interpreted as frequency in Hz and must match one of the values given in
+the UFS specification. UFS subsystem will attempt to read the value when
+executing common controller initialization. If the value is available, UFS
+subsytem will ensure the bRefClkFreq attribute of the UFS storage device is
+set accordingly and will modify it if there is a mismatch.
--- a/Documentation/sound/alsa-configuration.rst
+++ b/Documentation/sound/alsa-configuration.rst
@@ -100,6 +100,15 @@ amidi_map
    MIDI device number maps assigned to the 2st OSS device;
    Default: 1

+Module snd-soc-core
+-------------------
+
+The soc core module. It is used by all ALSA card drivers.
+It takes the following options which have global effects.
+
+prealloc_buffer_size_kbytes
+    Specify prealloc buffer size in kbytes (default: 512).
+
 Common parameters for top sound card modules
 --------------------------------------------

--- a/Documentation/trace/histogram.rst
+++ b/Documentation/trace/histogram.rst
@@ -1763,6 +1763,21 @@ using the same key and variable from yet another event::

  # echo 'hist:key=pid:wakeupswitch_lat=$wakeup_lat+$switchtime_lat ...' >> event3/trigger

+Expressions support the use of addition, subtraction, multiplication and
+division operators (+-\*/).
+
+Note if division by zero cannot be detected at parse time (i.e. the
+divisor is not a constant), the result will be -1.
+
+Numeric constants can also be used directly in an expression::
+
+  # echo 'hist:keys=next_pid:timestamp_secs=common_timestamp/1000000 ...' >> event/trigger
+
+or assigned to a variable and referenced in a subsequent expression::
+
+  # echo 'hist:keys=next_pid:us_per_sec=1000000 ...' >> event/trigger
+  # echo 'hist:keys=next_pid:timestamp_secs=common_timestamp/$us_per_sec ...' >> event/trigger
+
 2.2.2 Synthetic Events
 ----------------------

--- a/Documentation/usb/gadget-testing.rst
+++ b/Documentation/usb/gadget-testing.rst
@@ -784,6 +784,7 @@ The uvc function provides these attributes in its function directory:
 	streaming_maxpacket maximum packet size this endpoint is capable of
 			    sending or receiving when this configuration is
 			    selected
+	function_name       name of the interface
 	=================== ================================================

 There are also "control" and "streaming" subdirectories, each of which contain
--- a/Documentation/userspace-api/media/v4l/control.rst
+++ b/Documentation/userspace-api/media/v4l/control.rst
@@ -242,8 +242,17 @@ Control IDs
    * - ``V4L2_COLORFX_SET_CBCR``
      - The Cb and Cr chroma components are replaced by fixed coefficients
 	determined by ``V4L2_CID_COLORFX_CBCR`` control.
+    * - ``V4L2_COLORFX_SET_RGB``
+      - The RGB components are replaced by the fixed RGB components determined
+        by ``V4L2_CID_COLORFX_RGB`` control.


+``V4L2_CID_COLORFX_RGB`` ``(integer)``
+    Determines the Red, Green, and Blue coefficients for
+    ``V4L2_COLORFX_SET_RGB`` color effect.
+    Bits [7:0] of the supplied 32 bit value are interpreted as Blue component,
+    bits [15:8] as Green component, bits [23:16] as Red component, and
+    bits [31:24] must be zero.

 ``V4L2_CID_COLORFX_CBCR`` ``(integer)``
    Determines the Cb and Cr coefficients for ``V4L2_COLORFX_SET_CBCR``
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -414,7 +414,7 @@ kvm_run' (see below).
 -----------------

 :Capability: basic
-:Architectures: all except ARM, arm64
+:Architectures: all except arm64
 :Type: vcpu ioctl
 :Parameters: struct kvm_regs (out)
 :Returns: 0 on success, -1 on error
@@ -447,7 +447,7 @@ Reads the general purpose registers from the vcpu.
 -----------------

 :Capability: basic
-:Architectures: all except ARM, arm64
+:Architectures: all except arm64
 :Type: vcpu ioctl
 :Parameters: struct kvm_regs (in)
 :Returns: 0 on success, -1 on error
@@ -804,7 +804,7 @@ Writes the floating point state to the vcpu.
 -----------------------

 :Capability: KVM_CAP_IRQCHIP, KVM_CAP_S390_IRQCHIP (s390)
-:Architectures: x86, ARM, arm64, s390
+:Architectures: x86, arm64, s390
 :Type: vm ioctl
 :Parameters: none
 :Returns: 0 on success, -1 on error
@@ -813,7 +813,7 @@ Creates an interrupt controller model in the kernel.
 On x86, creates a virtual ioapic, a virtual PIC (two PICs, nested), and sets up
 future vcpus to have a local APIC.  IRQ routing for GSIs 0-15 is set to both
 PIC and IOAPIC; GSI 16-23 only go to the IOAPIC.
-On ARM/arm64, a GICv2 is created. Any other GIC versions require the usage of
+On arm64, a GICv2 is created. Any other GIC versions require the usage of
 KVM_CREATE_DEVICE, which also supports creating a GICv2.  Using
 KVM_CREATE_DEVICE is preferred over KVM_CREATE_IRQCHIP for GICv2.
 On s390, a dummy irq routing table is created.
@@ -826,7 +826,7 @@ before KVM_CREATE_IRQCHIP can be used.
 -----------------

 :Capability: KVM_CAP_IRQCHIP
-:Architectures: x86, arm, arm64
+:Architectures: x86, arm64
 :Type: vm ioctl
 :Parameters: struct kvm_irq_level
 :Returns: 0 on success, -1 on error
@@ -850,7 +850,7 @@ capability is present (or unless it is not using the in-kernel irqchip,
 of course).


-ARM/arm64 can signal an interrupt either at the CPU level, or at the
+arm64 can signal an interrupt either at the CPU level, or at the
 in-kernel irqchip (GIC), and for in-kernel irqchip can tell the GIC to
 use PPIs designated for specific cpus.  The irq field is interpreted
 like this::
@@ -876,7 +876,7 @@ When KVM_CAP_ARM_IRQ_LINE_LAYOUT_2 is supported, the target vcpu is
 identified as (256 * vcpu2_index + vcpu_index). Otherwise, vcpu2_index
 must be zero.

-Note that on arm/arm64, the KVM_CAP_IRQCHIP capability only conditions
+Note that on arm64, the KVM_CAP_IRQCHIP capability only conditions
 injection of interrupts for the in-kernel irqchip. KVM_IRQ_LINE can always
 be used for a userspace interrupt controller.

@@ -1037,7 +1037,7 @@ such as migration.

 :Capability: KVM_CAP_VCPU_EVENTS
 :Extended by: KVM_CAP_INTR_SHADOW
-:Architectures: x86, arm, arm64
+:Architectures: x86, arm64
 :Type: vcpu ioctl
 :Parameters: struct kvm_vcpu_event (out)
 :Returns: 0 on success, -1 on error
@@ -1096,8 +1096,8 @@ The following bits are defined in the flags field:
  fields contain a valid state. This bit will be set whenever
  KVM_CAP_EXCEPTION_PAYLOAD is enabled.

-ARM/ARM64:
-^^^^^^^^^^
+ARM64:
+^^^^^^

 If the guest accesses a device that is being emulated by the host kernel in
 such a way that a real device would generate a physical SError, KVM may make
@@ -1156,7 +1156,7 @@ directly to the virtual CPU).

 :Capability: KVM_CAP_VCPU_EVENTS
 :Extended by: KVM_CAP_INTR_SHADOW
-:Architectures: x86, arm, arm64
+:Architectures: x86, arm64
 :Type: vcpu ioctl
 :Parameters: struct kvm_vcpu_event (in)
 :Returns: 0 on success, -1 on error
@@ -1191,8 +1191,8 @@ can be set in the flags field to signal that the
 exception_has_payload, exception_payload, and exception.pending fields
 contain a valid state and shall be written into the VCPU.

-ARM/ARM64:
-^^^^^^^^^^
+ARM64:
+^^^^^^

 User space may need to inject several types of events to the guest.

@@ -1399,7 +1399,7 @@ for vm-wide capabilities.
 ---------------------

 :Capability: KVM_CAP_MP_STATE
-:Architectures: x86, s390, arm, arm64
+:Architectures: x86, s390, arm64
 :Type: vcpu ioctl
 :Parameters: struct kvm_mp_state (out)
 :Returns: 0 on success; -1 on error
@@ -1416,7 +1416,7 @@ uniprocessor guests).
 Possible values are:

   ==========================    ===============================================
-   KVM_MP_STATE_RUNNABLE         the vcpu is currently running [x86,arm/arm64]
+   KVM_MP_STATE_RUNNABLE         the vcpu is currently running [x86,arm64]
   KVM_MP_STATE_UNINITIALIZED    the vcpu is an application processor (AP)
                                 which has not yet received an INIT signal [x86]
   KVM_MP_STATE_INIT_RECEIVED    the vcpu has received an INIT signal, and is
@@ -1425,29 +1425,52 @@ Possible values are:
                                 is waiting for an interrupt [x86]
   KVM_MP_STATE_SIPI_RECEIVED    the vcpu has just received a SIPI (vector
                                 accessible via KVM_GET_VCPU_EVENTS) [x86]
-   KVM_MP_STATE_STOPPED          the vcpu is stopped [s390,arm/arm64]
+   KVM_MP_STATE_STOPPED          the vcpu is stopped [s390,arm64]
   KVM_MP_STATE_CHECK_STOP       the vcpu is in a special error state [s390]
   KVM_MP_STATE_OPERATING        the vcpu is operating (running or halted)
                                 [s390]
   KVM_MP_STATE_LOAD             the vcpu is in a special load/startup state
                                 [s390]
+   KVM_MP_STATE_SUSPENDED        the vcpu is in a suspend state and is waiting
+                                 for a wakeup event [arm64]
   ==========================    ===============================================

 On x86, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an
 in-kernel irqchip, the multiprocessing state must be maintained by userspace on
 these architectures.

-For arm/arm64:
-^^^^^^^^^^^^^^
+For arm64:
+^^^^^^^^^^

-The only states that are valid are KVM_MP_STATE_STOPPED and
-KVM_MP_STATE_RUNNABLE which reflect if the vcpu is paused or not.
+If a vCPU is in the KVM_MP_STATE_SUSPENDED state, KVM will emulate the
+architectural execution of a WFI instruction.
+
+If a wakeup event is recognized, KVM will exit to userspace with a
+KVM_SYSTEM_EVENT exit, where the event type is KVM_SYSTEM_EVENT_WAKEUP. If
+userspace wants to honor the wakeup, it must set the vCPU's MP state to
+KVM_MP_STATE_RUNNABLE. If it does not, KVM will continue to await a wakeup
+event in subsequent calls to KVM_RUN.
+
+.. warning::
+
+     If userspace intends to keep the vCPU in a SUSPENDED state, it is
+     strongly recommended that userspace take action to suppress the
+     wakeup event (such as masking an interrupt). Otherwise, subsequent
+     calls to KVM_RUN will immediately exit with a KVM_SYSTEM_EVENT_WAKEUP
+     event and inadvertently waste CPU cycles.
+
+     Additionally, if userspace takes action to suppress a wakeup event,
+     it is strongly recommended that it also restores the vCPU to its
+     original state when the vCPU is made RUNNABLE again. For example,
+     if userspace masked a pending interrupt to suppress the wakeup,
+     the interrupt should be unmasked before returning control to the
+     guest.

 4.39 KVM_SET_MP_STATE
 ---------------------

 :Capability: KVM_CAP_MP_STATE
-:Architectures: x86, s390, arm, arm64
+:Architectures: x86, s390, arm64
 :Type: vcpu ioctl
 :Parameters: struct kvm_mp_state (in)
 :Returns: 0 on success; -1 on error
@@ -1459,8 +1482,8 @@ On x86, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an
 in-kernel irqchip, the multiprocessing state must be maintained by userspace on
 these architectures.

-For arm/arm64:
-^^^^^^^^^^^^^^
+For arm64:
+^^^^^^^^^^

 The only states that are valid are KVM_MP_STATE_STOPPED and
 KVM_MP_STATE_RUNNABLE which reflect if the vcpu should be paused or not.
@@ -1715,14 +1738,14 @@ The flags bitmap is defined as::
 ------------------------

 :Capability: KVM_CAP_IRQ_ROUTING
-:Architectures: x86 s390 arm arm64
+:Architectures: x86 s390 arm64
 :Type: vm ioctl
 :Parameters: struct kvm_irq_routing (in)
 :Returns: 0 on success, -1 on error

 Sets the GSI routing table entries, overwriting any previously set entries.

-On arm/arm64, GSI routing has the following limitation:
+On arm64, GSI routing has the following limitation:

 - GSI routing does not apply to KVM_IRQ_LINE but only to KVM_IRQFD.

@@ -2526,6 +2549,24 @@ EINVAL.
 After the vcpu's SVE configuration is finalized, further attempts to
 write this register will fail with EPERM.

+arm64 bitmap feature firmware pseudo-registers have the following bit pattern::
+
+  0x6030 0000 0016 <regno:16>
+
+The bitmap feature firmware registers exposes the hypercall services that
+are available for userspace to configure. The set bits corresponds to the
+services that are available for the guests to access. By default, KVM
+sets all the supported bits during VM initialization. The userspace can
+discover the available services via KVM_GET_ONE_REG, and write back the
+bitmap corresponding to the features that it wishes guests to see via
+KVM_SET_ONE_REG.
+
+Note: These registers are immutable once any of the vCPUs of the VM has
+run at least once. A KVM_SET_ONE_REG in such a scenario will return
+a -EBUSY to userspace.
+
+(See Documentation/virt/kvm/arm/hypercalls.rst for more details.)
+

 MIPS registers are mapped using the lower 32 bits.  The upper 16 of that is
 the register group type:
@@ -2636,7 +2677,7 @@ after pausing the vcpu, but before it is resumed.
 -------------------

 :Capability: KVM_CAP_SIGNAL_MSI
-:Architectures: x86 arm arm64
+:Architectures: x86 arm64
 :Type: vm ioctl
 :Parameters: struct kvm_msi (in)
 :Returns: >0 on delivery, 0 if guest blocked the MSI, and -1 on error
@@ -2824,7 +2865,7 @@ into the hash PTE second double word).
 --------------

 :Capability: KVM_CAP_IRQFD
-:Architectures: x86 s390 arm arm64
+:Architectures: x86 s390 arm64
 :Type: vm ioctl
 :Parameters: struct kvm_irqfd (in)
 :Returns: 0 on success, -1 on error
@@ -2850,7 +2891,7 @@ Note that closing the resamplefd is not sufficient to disable the
 irqfd.  The KVM_IRQFD_FLAG_RESAMPLE is only necessary on assignment
 and need not be specified with KVM_IRQFD_FLAG_DEASSIGN.

-On arm/arm64, gsi routing being supported, the following can happen:
+On arm64, gsi routing being supported, the following can happen:

 - in case no routing entry is associated to this gsi, injection fails
 - in case the gsi is associated to an irqchip routing entry,
@@ -3104,7 +3145,7 @@ current state.  "addr" is ignored.
 ----------------------

 :Capability: basic
-:Architectures: arm, arm64
+:Architectures: arm64
 :Type: vcpu ioctl
 :Parameters: struct kvm_vcpu_init (in)
 :Returns: 0 on success; -1 on error
@@ -3202,7 +3243,7 @@ Possible features:
 -----------------------------

 :Capability: basic
-:Architectures: arm, arm64
+:Architectures: arm64
 :Type: vm ioctl
 :Parameters: struct kvm_vcpu_init (out)
 :Returns: 0 on success; -1 on error
@@ -3231,7 +3272,7 @@ VCPU matching underlying host.
 ---------------------

 :Capability: basic
-:Architectures: arm, arm64, mips
+:Architectures: arm64, mips
 :Type: vcpu ioctl
 :Parameters: struct kvm_reg_list (in/out)
 :Returns: 0 on success; -1 on error
@@ -3258,7 +3299,7 @@ KVM_GET_ONE_REG/KVM_SET_ONE_REG calls.
 -----------------------------------------

 :Capability: KVM_CAP_ARM_SET_DEVICE_ADDR
-:Architectures: arm, arm64
+:Architectures: arm64
 :Type: vm ioctl
 :Parameters: struct kvm_arm_device_address (in)
 :Returns: 0 on success, -1 on error
@@ -3285,13 +3326,13 @@ can access emulated or directly exposed devices, which the host kernel needs
 to know about. The id field is an architecture specific identifier for a
 specific device.

-ARM/arm64 divides the id field into two parts, a device id and an
+arm64 divides the id field into two parts, a device id and an
 address type id specific to the individual device::

  bits:  | 63        ...       32 | 31    ...    16 | 15    ...    0 |
  field: |        0x00000000      |     device id   |  addr type id  |

-ARM/arm64 currently only require this when using the in-kernel GIC
+arm64 currently only require this when using the in-kernel GIC
 support for the hardware VGIC features, using KVM_ARM_DEVICE_VGIC_V2
 as the device id.  When setting the base address for the guest's
 mapping of the VGIC virtual CPU and distributor interface, the ioctl
@@ -4505,7 +4546,7 @@ to I/O ports.
 ------------------------------------

 :Capability: KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2
-:Architectures: x86, arm, arm64, mips
+:Architectures: x86, arm64, mips
 :Type: vm ioctl
 :Parameters: struct kvm_clear_dirty_log (in)
 :Returns: 0 on success, -1 on error
@@ -4617,7 +4658,7 @@ version has the following quirks:
 4.119 KVM_ARM_VCPU_FINALIZE
 ---------------------------

-:Architectures: arm, arm64
+:Architectures: arm64
 :Type: vcpu ioctl
 :Parameters: int feature (in)
 :Returns: 0 on success, -1 on error
@@ -5656,13 +5697,15 @@ should put the acknowledged interrupt vector into the 'epr' field.
  #define KVM_SYSTEM_EVENT_SHUTDOWN       1
  #define KVM_SYSTEM_EVENT_RESET          2
  #define KVM_SYSTEM_EVENT_CRASH          3
+  #define KVM_SYSTEM_EVENT_WAKEUP         4
+  #define KVM_SYSTEM_EVENT_SUSPEND        5
 			__u32 type;
 			__u64 flags;
 		} system_event;

 If exit_reason is KVM_EXIT_SYSTEM_EVENT then the vcpu has triggered
 a system-level event using some architecture specific mechanism (hypercall
-or some special instruction). In case of ARM/ARM64, this is triggered using
+or some special instruction). In case of ARM64, this is triggered using
 HVC instruction based PSCI call from the vcpu. The 'type' field describes
 the system-level event type. The 'flags' field describes architecture
 specific flags for the system-level event.
@@ -5680,6 +5723,42 @@ Valid values for 'type' are:
   has requested a crash condition maintenance. Userspace can choose
   to ignore the request, or to gather VM memory core dump and/or
   reset/shutdown of the VM.
+ - KVM_SYSTEM_EVENT_WAKEUP -- the exiting vCPU is in a suspended state and
+   KVM has recognized a wakeup event. Userspace may honor this event by
+   marking the exiting vCPU as runnable, or deny it and call KVM_RUN again.
+ - KVM_SYSTEM_EVENT_SUSPEND -- the guest has requested a suspension of
+   the VM.
+
+For arm/arm64:
+--------------
+
+   KVM_SYSTEM_EVENT_SUSPEND exits are enabled with the
+   KVM_CAP_ARM_SYSTEM_SUSPEND VM capability. If a guest invokes the PSCI
+   SYSTEM_SUSPEND function, KVM will exit to userspace with this event
+   type.
+
+   It is the sole responsibility of userspace to implement the PSCI
+   SYSTEM_SUSPEND call according to ARM DEN0022D.b 5.19 "SYSTEM_SUSPEND".
+   KVM does not change the vCPU's state before exiting to userspace, so
+   the call parameters are left in-place in the vCPU registers.
+
+   Userspace is _required_ to take action for such an exit. It must
+   either:
+
+    - Honor the guest request to suspend the VM. Userspace can request
+      in-kernel emulation of suspension by setting the calling vCPU's
+      state to KVM_MP_STATE_SUSPENDED. Userspace must configure the vCPU's
+      state according to the parameters passed to the PSCI function when
+      the calling vCPU is resumed. See ARM DEN0022D.b 5.19.1 "Intended use"
+      for details on the function parameters.
+
+    - Deny the guest request to suspend the VM. See ARM DEN0022D.b 5.19.2
+      "Caller responsibilities" for possible return values.
+
+Valid flags are:
+
+ - KVM_SYSTEM_EVENT_RESET_FLAG_PSCI_RESET2 (arm64 only) -- the guest issued
+   a SYSTEM_RESET2 call according to v1.1 of the PSCI specification.

 ::

@@ -5755,7 +5834,7 @@ in send_page or recv a buffer to recv_page).
 			__u64 fault_ipa;
 		} arm_nisv;

-Used on arm and arm64 systems. If a guest accesses memory not in a memslot,
+Used on arm64 systems. If a guest accesses memory not in a memslot,
 KVM will typically return to userspace and ask it to do MMIO emulation on its
 behalf. However, for certain classes of instructions, no instruction decode
 (direction, length of memory access) is provided, and fetching and decoding
@@ -5772,16 +5851,22 @@ did not fall within an I/O window.
 Userspace implementations can query for KVM_CAP_ARM_NISV_TO_USER, and enable
 this capability at VM creation. Once this is done, these types of errors will
 instead return to userspace with KVM_EXIT_ARM_NISV, with the valid bits from
-the HSR (arm) and ESR_EL2 (arm64) in the esr_iss field, and the faulting IPA
-in the fault_ipa field. Userspace can either fix up the access if it's
-actually an I/O access by decoding the instruction from guest memory (if it's
-very brave) and continue executing the guest, or it can decide to suspend,
-dump, or restart the guest.
+the ESR_EL2 in the esr_iss field, and the faulting IPA in the fault_ipa field.
+Userspace can either fix up the access if it's actually an I/O access by
+decoding the instruction from guest memory (if it's very brave) and continue
+executing the guest, or it can decide to suspend, dump, or restart the guest.

 Note that KVM does not skip the faulting instruction as it does for
 KVM_EXIT_MMIO, but userspace has to emulate any change to the processing state
 if it decides to decode and emulate the instruction.

+This feature isn't available to protected VMs, as userspace does not
+have access to the state that is required to perform the emulation.
+Instead, a data abort exception is directly injected in the guest.
+Note that although KVM_CAP_ARM_NISV_TO_USER will be reported if
+queried outside of a protected VM context, the feature will not be
+exposed if queried on a protected VM file descriptor.
+
 ::

 		/* KVM_EXIT_X86_RDMSR / KVM_EXIT_X86_WRMSR */
@@ -6464,7 +6549,7 @@ and injected exceptions.

 7.18 KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2

-:Architectures: x86, arm, arm64, mips
+:Architectures: x86, arm64, mips
 :Parameters: args[0] whether feature should be enabled or not

 Valid flags are::
@@ -6833,7 +6918,7 @@ reserved.
 8.9 KVM_CAP_ARM_USER_IRQ
 ------------------------

-:Architectures: arm, arm64
+:Architectures: arm64

 This capability, if KVM_CHECK_EXTENSION indicates that it is available, means
 that if userspace creates a VM without an in-kernel interrupt controller, it
@@ -6960,7 +7045,7 @@ HvFlushVirtualAddressList, HvFlushVirtualAddressListEx.
 8.19 KVM_CAP_ARM_INJECT_SERROR_ESR
 ----------------------------------

-:Architectures: arm, arm64
+:Architectures: arm64

 This capability indicates that userspace can specify (via the
 KVM_SET_VCPU_EVENTS ioctl) the syndrome value reported to the guest when it
@@ -7266,6 +7351,16 @@ of the result of KVM_CHECK_EXTENSION.  KVM will forward to userspace
 the hypercalls whose corresponding bit is in the argument, and return
 ENOSYS for the others.

+8.36 KVM_CAP_ARM_SYSTEM_SUSPEND
+-------------------------------
+
+:Capability: KVM_CAP_ARM_SYSTEM_SUSPEND
+:Architectures: arm64
+:Type: vm
+
+When enabled, KVM will exit to userspace with KVM_EXIT_SYSTEM_EVENT of
+type KVM_SYSTEM_EVENT_SUSPEND to process the guest suspend request.
+
 9. Known KVM API problems
 =========================

--- a/Documentation/virt/kvm/arm/fw-pseudo-registers.rst
+++ b/Documentation/virt/kvm/arm/fw-pseudo-registers.rst
@@ -0,0 +1,138 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=======================================
+ARM firmware pseudo-registers interface
+=======================================
+
+KVM handles the hypercall services as requested by the guests. New hypercall
+services are regularly made available by the ARM specification or by KVM (as
+vendor services) if they make sense from a virtualization point of view.
+
+This means that a guest booted on two different versions of KVM can observe
+two different "firmware" revisions. This could cause issues if a given guest
+is tied to a particular version of a hypercall service, or if a migration
+causes a different version to be exposed out of the blue to an unsuspecting
+guest.
+
+In order to remedy this situation, KVM exposes a set of "firmware
+pseudo-registers" that can be manipulated using the GET/SET_ONE_REG
+interface. These registers can be saved/restored by userspace, and set
+to a convenient value as required.
+
+The following registers are defined:
+
+* KVM_REG_ARM_PSCI_VERSION:
+
+  KVM implements the PSCI (Power State Coordination Interface)
+  specification in order to provide services such as CPU on/off, reset
+  and power-off to the guest.
+
+  - Only valid if the vcpu has the KVM_ARM_VCPU_PSCI_0_2 feature set
+    (and thus has already been initialized)
+  - Returns the current PSCI version on GET_ONE_REG (defaulting to the
+    highest PSCI version implemented by KVM and compatible with v0.2)
+  - Allows any PSCI version implemented by KVM and compatible with
+    v0.2 to be set with SET_ONE_REG
+  - Affects the whole VM (even if the register view is per-vcpu)
+
+* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
+    Holds the state of the firmware support to mitigate CVE-2017-5715, as
+    offered by KVM to the guest via a HVC call. The workaround is described
+    under SMCCC_ARCH_WORKAROUND_1 in [1].
+
+  Accepted values are:
+
+    KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL:
+      KVM does not offer
+      firmware support for the workaround. The mitigation status for the
+      guest is unknown.
+    KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL:
+      The workaround HVC call is
+      available to the guest and required for the mitigation.
+    KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED:
+      The workaround HVC call
+      is available to the guest, but it is not needed on this VCPU.
+
+* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
+    Holds the state of the firmware support to mitigate CVE-2018-3639, as
+    offered by KVM to the guest via a HVC call. The workaround is described
+    under SMCCC_ARCH_WORKAROUND_2 in [1]_.
+
+  Accepted values are:
+
+    KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL:
+      A workaround is not
+      available. KVM does not offer firmware support for the workaround.
+    KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN:
+      The workaround state is
+      unknown. KVM does not offer firmware support for the workaround.
+    KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL:
+      The workaround is available,
+      and can be disabled by a vCPU. If
+      KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED is set, it is active for
+      this vCPU.
+    KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED:
+      The workaround is always active on this vCPU or it is not needed.
+
+
+Bitmap Feature Firmware Registers
+---------------------------------
+
+Contrary to the above registers, the following registers exposes the
+hypercall services in the form of a feature-bitmap to the userspace. This
+bitmap is translated to the services that are available to the guest.
+There is a register defined per service call owner and can be accessed via
+GET/SET_ONE_REG interface.
+
+By default, these registers are set with the upper limit of the features
+that are supported. This way userspace can discover all the usable
+hypercall services via GET_ONE_REG. The user-space can write-back the
+desired bitmap back via SET_ONE_REG. The features for the registers that
+are untouched, probably because userspace isn't aware of them, will be
+exposed as is to the guest.
+
+Note that KVM will not allow the userspace to configure the registers
+anymore once any of the vCPUs has run at least once. Instead, it will
+return a -EBUSY.
+
+The pseudo-firmware bitmap register are as follows:
+
+* KVM_REG_ARM_STD_BMAP:
+    Controls the bitmap of the ARM Standard Secure Service Calls.
+
+  The following bits are accepted:
+
+    Bit-0: KVM_REG_ARM_STD_BIT_TRNG_V1_0:
+      The bit represents the services offered under v1.0 of ARM True Random
+      Number Generator (TRNG) specification, ARM DEN0098.
+
+* KVM_REG_ARM_STD_HYP_BMAP:
+    Controls the bitmap of the ARM Standard Hypervisor Service Calls.
+
+  The following bits are accepted:
+
+    Bit-0: KVM_REG_ARM_STD_HYP_BIT_PV_TIME:
+      The bit represents the Paravirtualized Time service as represented by
+      ARM DEN0057A.
+
+* KVM_REG_ARM_VENDOR_HYP_BMAP:
+    Controls the bitmap of the Vendor specific Hypervisor Service Calls.
+
+  The following bits are accepted:
+
+    Bit-0: KVM_REG_ARM_VENDOR_HYP_BIT_FUNC_FEAT
+      The bit represents the ARM_SMCCC_VENDOR_HYP_KVM_FEATURES_FUNC_ID
+      and ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID function-ids.
+
+    Bit-1: KVM_REG_ARM_VENDOR_HYP_BIT_PTP:
+      The bit represents the Precision Time Protocol KVM service.
+
+Errors:
+
+    =======  =============================================================
+    -ENOENT   Unknown register accessed.
+    -EBUSY    Attempt a 'write' to the register after the VM has started.
+    -EINVAL   Invalid bitmap written to the register.
+    =======  =============================================================
+
+.. [1] https://developer.arm.com/-/media/developer/pdf/ARM_DEN_0070A_Firmware_interfaces_for_mitigating_CVE-2017-5715.pdf
--- a/Documentation/virt/kvm/arm/hypercalls.rst
+++ b/Documentation/virt/kvm/arm/hypercalls.rst
@@ -0,0 +1,152 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============================================
+KVM/arm64-specific hypercalls exposed to guests
+===============================================
+
+This file documents the KVM/arm64-specific hypercalls which may be
+exposed by KVM/arm64 to guest operating systems. These hypercalls are
+issued using the HVC instruction according to version 1.1 of the Arm SMC
+Calling Convention (DEN0028/C):
+
+https://developer.arm.com/docs/den0028/c
+
+All KVM/arm64-specific hypercalls are allocated within the "Vendor
+Specific Hypervisor Service Call" range with a UID of
+``28b46fb6-2ec5-11e9-a9ca-4b564d003a74``. This UID should be queried by the
+guest using the standard "Call UID" function for the service range in
+order to determine that the KVM/arm64-specific hypercalls are available.
+
+``ARM_SMCCC_KVM_FUNC_FEATURES``
+---------------------------------------------
+
+Provides a discovery mechanism for other KVM/arm64 hypercalls.
+
+---------------------+-------------------------------------------------------------+
+| Presence:           | Mandatory for the KVM/arm64 UID                             |
+---------------------+-------------------------------------------------------------+
+| Calling convention: | HVC32                                                       |
+---------------------+----------+--------------------------------------------------+
+| Function ID:        | (uint32) | 0x86000000                                       |
+---------------------+----------+--------------------------------------------------+
+| Arguments:          | None                                                        |
+---------------------+----------+----+---------------------------------------------+
+| Return Values:      | (uint32) | R0 | Bitmap of available function numbers 0-31   |
+|                     +----------+----+---------------------------------------------+
+|                     | (uint32) | R1 | Bitmap of available function numbers 32-63  |
+|                     +----------+----+---------------------------------------------+
+|                     | (uint32) | R2 | Bitmap of available function numbers 64-95  |
+|                     +----------+----+---------------------------------------------+
+|                     | (uint32) | R3 | Bitmap of available function numbers 96-127 |
+---------------------+----------+----+---------------------------------------------+
+
+``ARM_SMCCC_KVM_FUNC_PTP``
+----------------------------------------
+
+See ptp_kvm.rst
+
+``ARM_SMCCC_KVM_FUNC_HYP_MEMINFO``
+----------------------------------
+
+Query the memory protection parameters for a protected virtual machine.
+
+---------------------+-------------------------------------------------------------+
+| Presence:           | Optional; protected guests only.                            |
+---------------------+-------------------------------------------------------------+
+| Calling convention: | HVC64                                                       |
+---------------------+----------+--------------------------------------------------+
+| Function ID:        | (uint32) | 0xC6000002                                       |
+---------------------+----------+----+---------------------------------------------+
+| Arguments:          | (uint64) | R1 | Reserved / Must be zero                     |
+|                     +----------+----+---------------------------------------------+
+|                     | (uint64) | R2 | Reserved / Must be zero                     |
+|                     +----------+----+---------------------------------------------+
+|                     | (uint64) | R3 | Reserved / Must be zero                     |
+---------------------+----------+----+---------------------------------------------+
+| Return Values:      | (int64)  | R0 | ``INVALID_PARAMETER (-3)`` on error, else   |
+|                     |          |    | memory protection granule in bytes          |
+---------------------+----------+----+---------------------------------------------+
+
+``ARM_SMCCC_KVM_FUNC_MEM_SHARE``
+--------------------------------
+
+Share a region of memory with the KVM host, granting it read, write and execute
+permissions. The size of the region is equal to the memory protection granule
+advertised by ``ARM_SMCCC_KVM_FUNC_HYP_MEMINFO``.
+
+---------------------+-------------------------------------------------------------+
+| Presence:           | Optional; protected guests only.                            |
+---------------------+-------------------------------------------------------------+
+| Calling convention: | HVC64                                                       |
+---------------------+----------+--------------------------------------------------+
+| Function ID:        | (uint32) | 0xC6000003                                       |
+---------------------+----------+----+---------------------------------------------+
+| Arguments:          | (uint64) | R1 | Base IPA of memory region to share          |
+|                     +----------+----+---------------------------------------------+
+|                     | (uint64) | R2 | Reserved / Must be zero                     |
+|                     +----------+----+---------------------------------------------+
+|                     | (uint64) | R3 | Reserved / Must be zero                     |
+---------------------+----------+----+---------------------------------------------+
+| Return Values:      | (int64)  | R0 | ``SUCCESS (0)``                             |
+|                     |          |    +---------------------------------------------+
+|                     |          |    | ``INVALID_PARAMETER (-3)``                  |
+---------------------+----------+----+---------------------------------------------+
+
+``ARM_SMCCC_KVM_FUNC_MEM_UNSHARE``
+----------------------------------
+
+Revoke access permission from the KVM host to a memory region previously shared
+with ``ARM_SMCCC_KVM_FUNC_MEM_SHARE``. The size of the region is equal to the
+memory protection granule advertised by ``ARM_SMCCC_KVM_FUNC_HYP_MEMINFO``.
+
+---------------------+-------------------------------------------------------------+
+| Presence:           | Optional; protected guests only.                            |
+---------------------+-------------------------------------------------------------+
+| Calling convention: | HVC64                                                       |
+---------------------+----------+--------------------------------------------------+
+| Function ID:        | (uint32) | 0xC6000004                                       |
+---------------------+----------+----+---------------------------------------------+
+| Arguments:          | (uint64) | R1 | Base IPA of memory region to unshare        |
+|                     +----------+----+---------------------------------------------+
+|                     | (uint64) | R2 | Reserved / Must be zero                     |
+|                     +----------+----+---------------------------------------------+
+|                     | (uint64) | R3 | Reserved / Must be zero                     |
+---------------------+----------+----+---------------------------------------------+
+| Return Values:      | (int64)  | R0 | ``SUCCESS (0)``                             |
+|                     |          |    +---------------------------------------------+
+|                     |          |    | ``INVALID_PARAMETER (-3)``                  |
+---------------------+----------+----+---------------------------------------------+
+
+``ARM_SMCCC_KVM_FUNC_MEM_RELINQUISH``
+--------------------------------------
+
+Cooperatively relinquish ownership of a memory region. The size of the
+region is equal to the memory protection granule advertised by
+``ARM_SMCCC_KVM_FUNC_HYP_MEMINFO``. If this hypercall is advertised
+then it is mandatory to call it before freeing memory via, for
+example, virtio balloon. If the caller is a protected VM, it is
+guaranteed that the memory region will be completely cleared before
+becoming visible to another VM.
+
+---------------------+-------------------------------------------------------------+
+| Presence:           | Optional.                                                   |
+---------------------+-------------------------------------------------------------+
+| Calling convention: | HVC64                                                       |
+---------------------+----------+--------------------------------------------------+
+| Function ID:        | (uint32) | 0xC6000009                                       |
+---------------------+----------+----+---------------------------------------------+
+| Arguments:          | (uint64) | R1 | Base IPA of memory region to relinquish     |
+|                     +----------+----+---------------------------------------------+
+|                     | (uint64) | R2 | Reserved / Must be zero                     |
+|                     +----------+----+---------------------------------------------+
+|                     | (uint64) | R3 | Reserved / Must be zero                     |
+---------------------+----------+----+---------------------------------------------+
+| Return Values:      | (int64)  | R0 | ``SUCCESS (0)``                             |
+|                     |          |    +---------------------------------------------+
+|                     |          |    | ``INVALID_PARAMETER (-3)``                  |
+---------------------+----------+----+---------------------------------------------+
+
+``ARM_SMCCC_KVM_FUNC_MMIO_GUARD_*``
+-----------------------------------
+
+See mmio-guard.rst
--- a/Documentation/virt/kvm/arm/index.rst
+++ b/Documentation/virt/kvm/arm/index.rst
@@ -7,7 +7,10 @@ ARM
 .. toctree::
   :maxdepth: 2

+   fw-pseudo-registers
   hyp-abi
-   psci
+   hypercalls
+   pkvm
   pvtime
   ptp_kvm
+   mmio-guard
--- a/Documentation/virt/kvm/arm/mmio-guard.rst
+++ b/Documentation/virt/kvm/arm/mmio-guard.rst
@@ -0,0 +1,74 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==============
+KVM MMIO guard
+==============
+
+KVM implements device emulation by handling translation faults to any
+IPA range that is not contained in a memory slot. Such a translation
+fault is in most cases passed on to userspace (or in rare cases to the
+host kernel) with the address, size and possibly data of the access
+for emulation.
+
+Should the guest exit with an address that is not one that corresponds
+to an emulatable device, userspace may take measures that are not the
+most graceful as far as the guest is concerned (such as terminating it
+or delivering a fatal exception).
+
+There is also an element of trust: by forwarding the request to
+userspace, the kernel assumes that the guest trusts userspace to do
+the right thing.
+
+The KVM MMIO guard offers a way to mitigate this last point: a guest
+can request that only certain regions of the IPA space are valid as
+MMIO. Only these regions will be handled as an MMIO, and any other
+will result in an exception being delivered to the guest.
+
+This relies on a set of hypercalls defined in the KVM-specific range,
+using the HVC64 calling convention.
+
+* ARM_SMCCC_KVM_FUNC_MMIO_GUARD_INFO
+
+    ==============    ========    ================================
+    Function ID:      (uint32)    0xC6000005
+    Arguments:        r1-r3       Reserved / Must be zero
+    Return Values:    (int64)     NOT_SUPPORTED(-1) on error, or
+                      (uint64)    Protection Granule (PG) size in
+                                  bytes (r0)
+    ==============    ========    ================================
+
+* ARM_SMCCC_KVM_FUNC_MMIO_GUARD_ENROLL
+
+    ==============    ========    ==============================
+    Function ID:      (uint32)    0xC6000006
+    Arguments:        none
+    Return Values:    (int64)     NOT_SUPPORTED(-1) on error, or
+                                  RET_SUCCESS(0) (r0)
+    ==============    ========    ==============================
+
+* ARM_SMCCC_KVM_FUNC_MMIO_GUARD_MAP
+
+    ==============    ========    ====================================
+    Function ID:      (uint32)    0xC6000007
+    Arguments:        (uint64)    The base of the PG-sized IPA range
+                                  that is allowed to be accessed as
+                                  MMIO. Must be aligned to the PG size
+                                  (r1)
+                      (uint64)    Index in the MAIR_EL1 register
+		                  providing the memory attribute that
+				  is used by the guest (r2)
+    Return Values:    (int64)     NOT_SUPPORTED(-1) on error, or
+                                  RET_SUCCESS(0) (r0)
+    ==============    ========    ====================================
+
+* ARM_SMCCC_KVM_FUNC_MMIO_GUARD_UNMAP
+
+    ==============    ========    ======================================
+    Function ID:      (uint32)    0xC6000008
+    Arguments:        (uint64)    PG-sized IPA range aligned to the PG
+                                  size which has been previously mapped.
+                                  Must be aligned to the PG size and
+                                  have been previously mapped (r1)
+    Return Values:    (int64)     NOT_SUPPORTED(-1) on error, or
+                                  RET_SUCCESS(0) (r0)
+    ==============    ========    ======================================
--- a/Documentation/virt/kvm/arm/pkvm.rst
+++ b/Documentation/virt/kvm/arm/pkvm.rst
@@ -0,0 +1,96 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Protected virtual machines (pKVM)
+=================================
+
+Introduction
+------------
+
+Protected KVM (pKVM) is a KVM/arm64 extension which uses the two-stage
+translation capability of the Armv8 MMU to isolate guest memory from the host
+system. This allows for the creation of a confidential computing environment
+without relying on whizz-bang features in hardware, but still allowing room for
+complementary technologies such as memory encryption and hardware-backed
+attestation.
+
+The major implementation change brought about by pKVM is that the hypervisor
+code running at EL2 is now largely independent of (and isolated from) the rest
+of the host kernel running at EL1 and therefore additional hypercalls are
+introduced to manage manipulation of guest stage-2 page tables, creation of VM
+data structures and reclamation of memory on teardown. An immediate consequence
+of this change is that the host itself runs with an identity mapping enabled
+at stage-2, providing the hypervisor code with a mechanism to restrict host
+access to an arbitrary physical page.
+
+Enabling pKVM
+-------------
+
+The pKVM hypervisor is enabled by booting the host kernel at EL2 with
+"``kvm-arm.mode=protected``" on the command-line. Once enabled, VMs can be spawned
+in either protected or non-protected state, although the hypervisor is still
+responsible for managing most of the VM metadata in either case.
+
+Limitations
+-----------
+
+Enabling pKVM places some significant limitations on KVM guests, regardless of
+whether they are spawned in protected state. It is therefore recommended only
+to enable pKVM if protected VMs are required, with non-protected state acting
+primarily as a debug and development aid.
+
+If you're still keen, then here is an incomplete list of caveats that apply
+to all VMs running under pKVM:
+
+- Guest memory cannot be file-backed (with the exception of shmem/memfd) and is
+  pinned as it is mapped into the guest. This prevents the host from
+  swapping-out, migrating, merging or generally doing anything useful with the
+  guest pages. It also requires that the VMM has either ``CAP_IPC_LOCK`` or
+  sufficient ``RLIMIT_MEMLOCK`` to account for this pinned memory.
+
+- GICv2 is not supported and therefore GICv3 hardware is required in order
+  to expose a virtual GICv3 to the guest.
+
+- Read-only memslots are unsupported and therefore dirty logging cannot be
+  enabled.
+
+- Memslot configuration is fixed once a VM has started running, with subsequent
+  move or deletion requests being rejected with ``-EPERM``.
+
+- There are probably many others.
+
+Since the host is unable to tear down the hypervisor when pKVM is enabled,
+hibernation (``CONFIG_HIBERNATION``) and kexec (``CONFIG_KEXEC``) will fail
+with ``-EBUSY``.
+
+If you are not happy with these limitations, then please don't enable pKVM :)
+
+VM creation
+-----------
+
+When pKVM is enabled, protected VMs can be created by specifying the
+``KVM_VM_TYPE_ARM_PROTECTED`` flag in the machine type identifier parameter
+passed to ``KVM_CREATE_VM``.
+
+Protected VMs are instantiated according to a fixed vCPU configuration
+described by the ID register definitions in
+``arch/arm64/include/asm/kvm_pkvm.h``. Only a subset of the architectural
+features that may be available to the host are exposed to the guest and the
+capabilities advertised by ``KVM_CHECK_EXTENSION`` are limited accordingly,
+with the vCPU registers being initialised to their architecturally-defined
+values.
+
+Where not defined by the architecture, the registers of a protected vCPU
+are reset to zero with the exception of the PC and X0 which can be set
+either by the ``KVM_SET_ONE_REG`` interface or by a call to PSCI ``CPU_ON``.
+
+VM runtime
+----------
+
+By default, memory pages mapped into a protected guest are inaccessible to the
+host and any attempt by the host to access such a page will result in the
+injection of an abort at EL1 by the hypervisor. For accesses originating from
+EL0, the host will then terminate the current task with a ``SIGSEGV``.
+
+pKVM exposes additional hypercalls to protected guests, primarily for the
+purpose of establishing shared-memory regions with the host for communication
+and I/O. These hypercalls are documented in hypercalls.rst.
--- a/Documentation/virt/kvm/arm/psci.rst
+++ b/Documentation/virt/kvm/arm/psci.rst
@@ -1,77 +0,0 @@
-.. SPDX-License-Identifier: GPL-2.0
-
-=========================================
-Power State Coordination Interface (PSCI)
-=========================================
-
-KVM implements the PSCI (Power State Coordination Interface)
-specification in order to provide services such as CPU on/off, reset
-and power-off to the guest.
-
-The PSCI specification is regularly updated to provide new features,
-and KVM implements these updates if they make sense from a virtualization
-point of view.
-
-This means that a guest booted on two different versions of KVM can
-observe two different "firmware" revisions. This could cause issues if
-a given guest is tied to a particular PSCI revision (unlikely), or if
-a migration causes a different PSCI version to be exposed out of the
-blue to an unsuspecting guest.
-
-In order to remedy this situation, KVM exposes a set of "firmware
-pseudo-registers" that can be manipulated using the GET/SET_ONE_REG
-interface. These registers can be saved/restored by userspace, and set
-to a convenient value if required.
-
-The following register is defined:
-
-* KVM_REG_ARM_PSCI_VERSION:
-
-  - Only valid if the vcpu has the KVM_ARM_VCPU_PSCI_0_2 feature set
-    (and thus has already been initialized)
-  - Returns the current PSCI version on GET_ONE_REG (defaulting to the
-    highest PSCI version implemented by KVM and compatible with v0.2)
-  - Allows any PSCI version implemented by KVM and compatible with
-    v0.2 to be set with SET_ONE_REG
-  - Affects the whole VM (even if the register view is per-vcpu)
-
-* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
-    Holds the state of the firmware support to mitigate CVE-2017-5715, as
-    offered by KVM to the guest via a HVC call. The workaround is described
-    under SMCCC_ARCH_WORKAROUND_1 in [1].
-
-  Accepted values are:
-
-    KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL:
-      KVM does not offer
-      firmware support for the workaround. The mitigation status for the
-      guest is unknown.
-    KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL:
-      The workaround HVC call is
-      available to the guest and required for the mitigation.
-    KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED:
-      The workaround HVC call
-      is available to the guest, but it is not needed on this VCPU.
-
-* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
-    Holds the state of the firmware support to mitigate CVE-2018-3639, as
-    offered by KVM to the guest via a HVC call. The workaround is described
-    under SMCCC_ARCH_WORKAROUND_2 in [1]_.
-
-  Accepted values are:
-
-    KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL:
-      A workaround is not
-      available. KVM does not offer firmware support for the workaround.
-    KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN:
-      The workaround state is
-      unknown. KVM does not offer firmware support for the workaround.
-    KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL:
-      The workaround is available,
-      and can be disabled by a vCPU. If
-      KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED is set, it is active for
-      this vCPU.
-    KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED:
-      The workaround is always active on this vCPU or it is not needed.
-
-.. [1] https://developer.arm.com/-/media/developer/pdf/ARM_DEN_0070A_Firmware_interfaces_for_mitigating_CVE-2017-5715.pdf
--- a/Documentation/virt/kvm/arm/ptp_kvm.rst
+++ b/Documentation/virt/kvm/arm/ptp_kvm.rst
@@ -7,19 +7,29 @@ PTP_KVM is used for high precision time sync between host and guests.
 It relies on transferring the wall clock and counter value from the
 host to the guest using a KVM-specific hypercall.

-* ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID: 0x86000001
+``ARM_SMCCC_KVM_FUNC_PTP``
+----------------------------------------

-This hypercall uses the SMC32/HVC32 calling convention:
+Retrieve current time information for the specific counter. There are no
+endianness restrictions.

-ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID
-    ==============    ========    =====================================
-    Function ID:      (uint32)    0x86000001
-    Arguments:        (uint32)    KVM_PTP_VIRT_COUNTER(0)
-                                  KVM_PTP_PHYS_COUNTER(1)
-    Return Values:    (int32)     NOT_SUPPORTED(-1) on error, or
-                      (uint32)    Upper 32 bits of wall clock time (r0)
-                      (uint32)    Lower 32 bits of wall clock time (r1)
-                      (uint32)    Upper 32 bits of counter (r2)
-                      (uint32)    Lower 32 bits of counter (r3)
-    Endianness:                   No Restrictions.
-    ==============    ========    =====================================
+---------------------+-------------------------------------------------------+
+| Presence:           | Optional                                              |
+---------------------+-------------------------------------------------------+
+| Calling convention: | HVC32                                                 |
+---------------------+----------+--------------------------------------------+
+| Function ID:        | (uint32) | 0x86000001                                 |
+---------------------+----------+----+---------------------------------------+
+| Arguments:          | (uint32) | R1 | ``KVM_PTP_VIRT_COUNTER (0)``          |
+|                     |          |    +---------------------------------------+
+|                     |          |    | ``KVM_PTP_PHYS_COUNTER (1)``          |
+---------------------+----------+----+---------------------------------------+
+| Return Values:      | (int32)  | R0 | ``NOT_SUPPORTED (-1)`` on error, else |
+|                     |          |    | upper 32 bits of wall clock time      |
+|                     +----------+----+---------------------------------------+
+|                     | (uint32) | R1 | Lower 32 bits of wall clock time      |
+|                     +----------+----+---------------------------------------+
+|                     | (uint32) | R2 | Upper 32 bits of counter              |
+|                     +----------+----+---------------------------------------+
+|                     | (uint32) | R3 | Lower 32 bits of counter              |
+---------------------+----------+----+---------------------------------------+
--- a/Documentation/virt/kvm/devices/vcpu.rst
+++ b/Documentation/virt/kvm/devices/vcpu.rst
@@ -70,7 +70,7 @@ irqchip.
 	 -ENODEV  PMUv3 not supported or GIC not initialized
 	 -ENXIO   PMUv3 not properly configured or in-kernel irqchip not
 	 	  configured as required prior to calling this attribute
-	 -EBUSY   PMUv3 already initialized
+	 -EBUSY   PMUv3 already initialized or a VCPU has already run
 	 -EINVAL  Invalid filter range
 	 =======  ======================================================

@@ -104,11 +104,43 @@ hardware event. Filtering event 0x1E (CHAIN) has no effect either, as it
 isn't strictly speaking an event. Filtering the cycle counter is possible
 using event 0x11 (CPU_CYCLES).

+1.4 ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_SET_PMU
+------------------------------------------
+
+:Parameters: in kvm_device_attr.addr the address to an int representing the PMU
+             identifier.
+
+:Returns:
+
+	 =======  ====================================================
+	 -EBUSY   PMUv3 already initialized, a VCPU has already run or
+                  an event filter has already been set
+	 -EFAULT  Error accessing the PMU identifier
+	 -ENXIO   PMU not found
+	 -ENODEV  PMUv3 not supported or GIC not initialized
+	 -ENOMEM  Could not allocate memory
+	 =======  ====================================================
+
+Request that the VCPU uses the specified hardware PMU when creating guest events
+for the purpose of PMU emulation. The PMU identifier can be read from the "type"
+file for the desired PMU instance under /sys/devices (or, equivalent,
+/sys/bus/even_source). This attribute is particularly useful on heterogeneous
+systems where there are at least two CPU PMUs on the system. The PMU that is set
+for one VCPU will be used by all the other VCPUs. It isn't possible to set a PMU
+if a PMU event filter is already present.
+
+Note that KVM will not make any attempts to run the VCPU on the physical CPUs
+associated with the PMU specified by this attribute. This is entirely left to
+userspace. However, attempting to run the VCPU on a physical CPU not supported
+by the PMU will fail and KVM_RUN will return with
+exit_reason = KVM_EXIT_FAIL_ENTRY and populate the fail_entry struct by setting
+hardare_entry_failure_reason field to KVM_EXIT_FAIL_ENTRY_CPU_UNSUPPORTED and
+the cpu field to the processor id.

 2. GROUP: KVM_ARM_VCPU_TIMER_CTRL
 =================================

-:Architectures: ARM, ARM64
+:Architectures: ARM64

 2.1. ATTRIBUTES: KVM_ARM_VCPU_TIMER_IRQ_VTIMER, KVM_ARM_VCPU_TIMER_IRQ_PTIMER
 -----------------------------------------------------------------------------
--- a/Documentation/vm/index.rst
+++ b/Documentation/vm/index.rst
@@ -42,6 +42,7 @@ descriptions of data structures and algorithms.
   ksm
   memory-model
   mmu_notifier
+   multigen_lru
   numa
   overcommit-accounting
   page_migration
--- a/Documentation/vm/multigen_lru.rst
+++ b/Documentation/vm/multigen_lru.rst
@@ -0,0 +1,159 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=============
+Multi-Gen LRU
+=============
+The multi-gen LRU is an alternative LRU implementation that optimizes
+page reclaim and improves performance under memory pressure. Page
+reclaim decides the kernel's caching policy and ability to overcommit
+memory. It directly impacts the kswapd CPU usage and RAM efficiency.
+
+Design overview
+===============
+Objectives
+----------
+The design objectives are:
+
+* Good representation of access recency
+* Try to profit from spatial locality
+* Fast paths to make obvious choices
+* Simple self-correcting heuristics
+
+The representation of access recency is at the core of all LRU
+implementations. In the multi-gen LRU, each generation represents a
+group of pages with similar access recency. Generations establish a
+(time-based) common frame of reference and therefore help make better
+choices, e.g., between different memcgs on a computer or different
+computers in a data center (for job scheduling).
+
+Exploiting spatial locality improves efficiency when gathering the
+accessed bit. A rmap walk targets a single page and does not try to
+profit from discovering a young PTE. A page table walk can sweep all
+the young PTEs in an address space, but the address space can be too
+sparse to make a profit. The key is to optimize both methods and use
+them in combination.
+
+Fast paths reduce code complexity and runtime overhead. Unmapped pages
+do not require TLB flushes; clean pages do not require writeback.
+These facts are only helpful when other conditions, e.g., access
+recency, are similar. With generations as a common frame of reference,
+additional factors stand out. But obvious choices might not be good
+choices; thus self-correction is necessary.
+
+The benefits of simple self-correcting heuristics are self-evident.
+Again, with generations as a common frame of reference, this becomes
+attainable. Specifically, pages in the same generation can be
+categorized based on additional factors, and a feedback loop can
+statistically compare the refault percentages across those categories
+and infer which of them are better choices.
+
+Assumptions
+-----------
+The protection of hot pages and the selection of cold pages are based
+on page access channels and patterns. There are two access channels:
+
+* Accesses through page tables
+* Accesses through file descriptors
+
+The protection of the former channel is by design stronger because:
+
+1. The uncertainty in determining the access patterns of the former
+   channel is higher due to the approximation of the accessed bit.
+2. The cost of evicting the former channel is higher due to the TLB
+   flushes required and the likelihood of encountering the dirty bit.
+3. The penalty of underprotecting the former channel is higher because
+   applications usually do not prepare themselves for major page
+   faults like they do for blocked I/O. E.g., GUI applications
+   commonly use dedicated I/O threads to avoid blocking rendering
+   threads.
+
+There are also two access patterns:
+
+* Accesses exhibiting temporal locality
+* Accesses not exhibiting temporal locality
+
+For the reasons listed above, the former channel is assumed to follow
+the former pattern unless ``VM_SEQ_READ`` or ``VM_RAND_READ`` is
+present, and the latter channel is assumed to follow the latter
+pattern unless outlying refaults have been observed.
+
+Workflow overview
+=================
+Evictable pages are divided into multiple generations for each
+``lruvec``. The youngest generation number is stored in
+``lrugen->max_seq`` for both anon and file types as they are aged on
+an equal footing. The oldest generation numbers are stored in
+``lrugen->min_seq[]`` separately for anon and file types as clean file
+pages can be evicted regardless of swap constraints. These three
+variables are monotonically increasing.
+
+Generation numbers are truncated into ``order_base_2(MAX_NR_GENS+1)``
+bits in order to fit into the gen counter in ``folio->flags``. Each
+truncated generation number is an index to ``lrugen->lists[]``. The
+sliding window technique is used to track at least ``MIN_NR_GENS`` and
+at most ``MAX_NR_GENS`` generations. The gen counter stores a value
+within ``[1, MAX_NR_GENS]`` while a page is on one of
+``lrugen->lists[]``; otherwise it stores zero.
+
+Each generation is divided into multiple tiers. A page accessed ``N``
+times through file descriptors is in tier ``order_base_2(N)``. Unlike
+generations, tiers do not have dedicated ``lrugen->lists[]``. In
+contrast to moving across generations, which requires the LRU lock,
+moving across tiers only involves atomic operations on
+``folio->flags`` and therefore has a negligible cost. A feedback loop
+modeled after the PID controller monitors refaults over all the tiers
+from anon and file types and decides which tiers from which types to
+evict or protect.
+
+There are two conceptually independent procedures: the aging and the
+eviction. They form a closed-loop system, i.e., the page reclaim.
+
+Aging
+-----
+The aging produces young generations. Given an ``lruvec``, it
+increments ``max_seq`` when ``max_seq-min_seq+1`` approaches
+``MIN_NR_GENS``. The aging promotes hot pages to the youngest
+generation when it finds them accessed through page tables; the
+demotion of cold pages happens consequently when it increments
+``max_seq``. The aging uses page table walks and rmap walks to find
+young PTEs. For the former, it iterates ``lruvec_memcg()->mm_list``
+and calls ``walk_page_range()`` with each ``mm_struct`` on this list
+to scan PTEs, and after each iteration, it increments ``max_seq``. For
+the latter, when the eviction walks the rmap and finds a young PTE,
+the aging scans the adjacent PTEs. For both, on finding a young PTE,
+the aging clears the accessed bit and updates the gen counter of the
+page mapped by this PTE to ``(max_seq%MAX_NR_GENS)+1``.
+
+Eviction
+--------
+The eviction consumes old generations. Given an ``lruvec``, it
+increments ``min_seq`` when ``lrugen->lists[]`` indexed by
+``min_seq%MAX_NR_GENS`` becomes empty. To select a type and a tier to
+evict from, it first compares ``min_seq[]`` to select the older type.
+If both types are equally old, it selects the one whose first tier has
+a lower refault percentage. The first tier contains single-use
+unmapped clean pages, which are the best bet. The eviction sorts a
+page according to its gen counter if the aging has found this page
+accessed through page tables and updated its gen counter. It also
+moves a page to the next generation, i.e., ``min_seq+1``, if this page
+was accessed multiple times through file descriptors and the feedback
+loop has detected outlying refaults from the tier this page is in. To
+this end, the feedback loop uses the first tier as the baseline, for
+the reason stated earlier.
+
+Summary
+-------
+The multi-gen LRU can be disassembled into the following parts:
+
+* Generations
+* Rmap walks
+* Page table walks
+* Bloom filters
+* PID controller
+
+The aging and the eviction form a producer-consumer model;
+specifically, the latter drives the former by the sliding window over
+generations. Within the aging, rmap walks drive page table walks by
+inserting hot densely populated page tables to the Bloom filters.
+Within the eviction, the PID controller uses refaults as the feedback
+to select types to evict and tiers to protect.
--- a/3
+++ b/3
@@ -30,3 +30,6 @@ source "lib/Kconfig"
 source "lib/Kconfig.debug"

 source "Documentation/Kconfig"
+
+# ANDROID: Set KCONFIG_EXT_PREFIX to decend into an external project.
+source "$(KCONFIG_EXT_PREFIX)Kconfig.ext"
--- a/Kconfig.ext
+++ b/Kconfig.ext
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+# This file is intentionally empty. It's used as a placeholder for when
+# KCONFIG_EXT_PREFIX isn't defined.
--- a/29
+++ b/29
@@ -2424,7 +2424,7 @@ F:	drivers/pci/controller/dwc/pcie-qcom.c
 F:	drivers/phy/qualcomm/
 F:	drivers/power/*/msm*
 F:	drivers/reset/reset-qcom-*
-F:	drivers/scsi/ufs/ufs-qcom*
+F:	drivers/ufs/host/ufs-qcom*
 F:	drivers/spi/spi-geni-qcom.c
 F:	drivers/spi/spi-qcom-qspi.c
 F:	drivers/spi/spi-qup.c
@@ -7176,6 +7176,7 @@ M:	Chao Yu <chao@kernel.org>
 L:	linux-f2fs-devel@lists.sourceforge.net
 S:	Maintained
 W:	https://f2fs.wiki.kernel.org/
+B:	https://bugzilla.kernel.org/enter_bug.cgi?product=File%20System&component=f2fs
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git
 F:	Documentation/ABI/testing/sysfs-fs-f2fs
 F:	Documentation/filesystems/f2fs.rst
@@ -9224,6 +9225,13 @@ F:	Documentation/hwmon/ina2xx.rst
 F:	drivers/hwmon/ina2xx.c
 F:	include/linux/platform_data/ina2xx.h

+INCREMENTAL FILE SYSTEM
+M:	Paul Lawrence <paullawrence@google.com>
+L:	linux-unionfs@vger.kernel.org
+S:	Supported
+F:	fs/incfs/
+F:	tools/testing/selftests/filesystems/incfs/
+
 INDUSTRY PACK SUBSYSTEM (IPACK)
 M:	Samuel Iglesias Gonsalvez <siglesias@igalia.com>
 M:	Jens Taprogge <jens.taprogge@taprogge.org>
@@ -10136,7 +10144,6 @@ F:	arch/*/include/asm/*kasan.h
 F:	arch/*/mm/kasan_init*
 F:	include/linux/kasan*.h
 F:	lib/Kconfig.kasan
-F:	lib/test_kasan*.c
 F:	mm/kasan/
 F:	scripts/Makefile.kasan

@@ -10305,8 +10312,10 @@ M:	Marc Zyngier <maz@kernel.org>
 R:	James Morse <james.morse@arm.com>
 R:	Alexandru Elisei <alexandru.elisei@arm.com>
 R:	Suzuki K Poulose <suzuki.poulose@arm.com>
+R:	Oliver Upton <oliver.upton@linux.dev>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-L:	kvmarm@lists.cs.columbia.edu (moderated for non-subscribers)
+L:	kvmarm@lists.linux.dev
+L:	kvmarm@lists.cs.columbia.edu (deprecated, moderated for non-subscribers)
 S:	Maintained
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm.git
 F:	arch/arm64/include/asm/kvm*
@@ -13322,6 +13331,12 @@ W:	http://www.netlab.is.tsukuba.ac.jp/~yokota/izumi/ninja/
 F:	Documentation/scsi/NinjaSCSI.rst
 F:	drivers/scsi/nsp32*

+NINTENDO HID DRIVER
+M:	Daniel J. Ogorchock <djogorchock@gmail.com>
+L:	linux-input@vger.kernel.org
+S:	Maintained
+F:	drivers/hid/hid-nintendo*
+
 NIOS2 ARCHITECTURE
 M:	Dinh Nguyen <dinguyen@kernel.org>
 S:	Maintained
@@ -16757,6 +16772,7 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi.git
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/mkp/scsi.git
 F:	Documentation/devicetree/bindings/scsi/
 F:	drivers/scsi/
+F:	drivers/ufs/
 F:	include/scsi/

 SCSI TAPE DRIVER
@@ -19312,23 +19328,24 @@ F:	include/linux/visorbus.h
 UNIVERSAL FLASH STORAGE HOST CONTROLLER DRIVER
 R:	Alim Akhtar <alim.akhtar@samsung.com>
 R:	Avri Altman <avri.altman@wdc.com>
+R:	Bart Van Assche <bvanassche@acm.org>
 L:	linux-scsi@vger.kernel.org
 S:	Supported
 F:	Documentation/scsi/ufs.rst
-F:	drivers/scsi/ufs/
+F:	drivers/ufs/core/

 UNIVERSAL FLASH STORAGE HOST CONTROLLER DRIVER DWC HOOKS
 M:	Pedro Sousa <pedrom.sousa@synopsys.com>
 L:	linux-scsi@vger.kernel.org
 S:	Supported
-F:	drivers/scsi/ufs/*dwc*
+F:	drivers/ufs/host/*dwc*

 UNIVERSAL FLASH STORAGE HOST CONTROLLER DRIVER MEDIATEK HOOKS
 M:	Stanley Chu <stanley.chu@mediatek.com>
 L:	linux-scsi@vger.kernel.org
 L:	linux-mediatek@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
-F:	drivers/scsi/ufs/ufs-mediatek*
+F:	drivers/ufs/host/ufs-mediatek*

 UNSORTED BLOCK IMAGES (UBI)
 M:	Richard Weinberger <richard@nod.at>
--- a/133
+++ b/133
@@ -136,6 +136,24 @@ endif

 export KBUILD_EXTMOD

+# ANDROID: set up mixed-build support. mixed-build allows device kernel modules
+# to be compiled against a GKI kernel. This approach still uses the headers and
+# Kbuild from device kernel, so care must be taken to ensure that those headers match.
+ifdef KBUILD_MIXED_TREE
+# Need vmlinux.symvers for modpost and System.map for depmod, check whether they exist in KBUILD_MIXED_TREE
+required_mixed_files=vmlinux.symvers System.map
+$(if $(filter-out $(words $(required_mixed_files)), \
+		$(words $(wildcard $(add-prefix $(KBUILD_MIXED_TREE)/,$(required_mixed_files))))),,\
+	$(error KBUILD_MIXED_TREE=$(KBUILD_MIXED_TREE) doesn't contain $(required_mixed_files)))
+endif
+
+mixed-build-prefix = $(if $(KBUILD_MIXED_TREE),$(KBUILD_MIXED_TREE)/)
+export KBUILD_MIXED_TREE
+# This is a hack for kleaf to set mixed-build-prefix within the execution of a make rule, e.g.
+# within __modinst_pre.
+# TODO(b/205893923): Revert this hack once it is properly handled.
+export mixed-build-prefix
+
 # Kbuild will save output files in the current working directory.
 # This does not need to match to the root of the kernel source tree.
 #
@@ -432,11 +450,12 @@ HOSTCXX	= g++
 endif
 HOSTPKG_CONFIG	= pkg-config

-export KBUILD_USERCFLAGS := -Wall -Wmissing-prototypes -Wstrict-prototypes \
-			      -O2 -fomit-frame-pointer -std=gnu89
-export KBUILD_USERLDFLAGS :=
+KBUILD_USERHOSTCFLAGS := -Wall -Wmissing-prototypes -Wstrict-prototypes \
+			 -O2 -fomit-frame-pointer -std=gnu89
+KBUILD_USERCFLAGS  := $(KBUILD_USERHOSTCFLAGS) $(USERCFLAGS)
+KBUILD_USERLDFLAGS := $(USERLDFLAGS)

-KBUILD_HOSTCFLAGS   := $(KBUILD_USERCFLAGS) $(HOST_LFS_CFLAGS) $(HOSTCFLAGS)
+KBUILD_HOSTCFLAGS   := $(KBUILD_USERHOSTCFLAGS) $(HOST_LFS_CFLAGS) $(HOSTCFLAGS)
 KBUILD_HOSTCXXFLAGS := -Wall -O2 $(HOST_LFS_CFLAGS) $(HOSTCXXFLAGS)
 KBUILD_HOSTLDFLAGS  := $(HOST_LFS_LDFLAGS) $(HOSTLDFLAGS)
 KBUILD_HOSTLDLIBS   := $(HOST_LFS_LIBS) $(HOSTLDLIBS)
@@ -477,7 +496,7 @@ KGZIP		= gzip
 KBZIP2		= bzip2
 KLZOP		= lzop
 LZMA		= lzma
-LZ4		= lz4c
+LZ4		= lz4
 XZ		= xz
 ZSTD		= zstd

@@ -531,6 +550,7 @@ export CPP AR NM STRIP OBJCOPY OBJDUMP READELF PAHOLE RESOLVE_BTFIDS LEX YACC AW
 export PERL PYTHON3 CHECK CHECKFLAGS MAKE UTS_MACHINE HOSTCXX
 export KGZIP KBZIP2 KLZOP LZMA LZ4 XZ ZSTD
 export KBUILD_HOSTCXXFLAGS KBUILD_HOSTLDFLAGS KBUILD_HOSTLDLIBS LDFLAGS_MODULE
+export KBUILD_USERCFLAGS KBUILD_USERLDFLAGS

 export KBUILD_CPPFLAGS NOSTDINC_FLAGS LINUXINCLUDE OBJCOPYFLAGS KBUILD_LDFLAGS
 export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE
@@ -672,11 +692,13 @@ drivers-y	+= virt/
 libs-y		:= lib/
 endif # KBUILD_EXTMOD

+ifndef KBUILD_MIXED_TREE
 # The all: target is the default when no target is given on the
 # command line.
 # This allow a user to issue only 'make' to build a kernel including modules
 # Defaults to vmlinux, but the arch makefile usually adds further targets
 all: vmlinux
+endif

 CFLAGS_GCOV	:= -fprofile-arcs -ftest-coverage
 ifdef CONFIG_CC_IS_GCC
@@ -955,7 +977,13 @@ KBUILD_LDFLAGS	+= --thinlto-cache-dir=$(extmod_prefix).thinlto-cache
 else
 CC_FLAGS_LTO	:= -flto
 endif
+
+ifeq ($(SRCARCH),x86)
+# Workaround for compiler / linker bug
 CC_FLAGS_LTO	+= -fvisibility=hidden
+else
+CC_FLAGS_LTO	+= -fvisibility=default
+endif

 # Limit inlining across translation units to reduce binary size
 KBUILD_LDFLAGS += -mllvm -import-instr-limit=5
@@ -1150,6 +1178,40 @@ export extmod_prefix = $(if $(KBUILD_EXTMOD),$(KBUILD_EXTMOD)/)
 export MODORDER := $(extmod_prefix)modules.order
 export MODULES_NSDEPS := $(extmod_prefix)modules.nsdeps

+# ---------------------------------------------------------------------------
+# Kernel headers
+
+PHONY += headers
+
+#Default location for installed headers
+ifeq ($(KBUILD_EXTMOD),)
+PHONY += archheaders archscripts
+hdr-inst := -f $(srctree)/scripts/Makefile.headersinst obj
+headers: $(version_h) scripts_unifdef uapi-asm-generic archheaders archscripts
+else
+hdr-prefix = $(KBUILD_EXTMOD)/
+hdr-inst := -f $(srctree)/scripts/Makefile.headersinst dst=$(KBUILD_EXTMOD)/usr/include objtree=$(objtree)/$(KBUILD_EXTMOD) obj
+endif
+
+export INSTALL_HDR_PATH = $(objtree)/$(hdr-prefix)usr
+
+quiet_cmd_headers_install = INSTALL $(INSTALL_HDR_PATH)/include
+      cmd_headers_install = \
+	mkdir -p $(INSTALL_HDR_PATH); \
+	rsync -mrl --include='*/' --include='*\.h' --exclude='*' \
+	$(hdr-prefix)usr/include $(INSTALL_HDR_PATH);
+
+PHONY += headers_install
+headers_install: headers
+	$(call cmd,headers_install)
+
+headers:
+ifeq ($(KBUILD_EXTMOD),)
+	$(if $(filter um, $(SRCARCH)), $(error Headers not exportable for UML))
+endif
+	$(Q)$(MAKE) $(hdr-inst)=$(hdr-prefix)include/uapi
+	$(Q)$(MAKE) $(hdr-inst)=$(hdr-prefix)arch/$(SRCARCH)/include/uapi
+
 ifeq ($(KBUILD_EXTMOD),)
 core-y			+= kernel/ certs/ mm/ fs/ ipc/ security/ crypto/
 core-$(CONFIG_BLOCK)	+= block/
@@ -1215,8 +1277,10 @@ cmd_link-vmlinux =                                                 \
 	$(CONFIG_SHELL) $< "$(LD)" "$(KBUILD_LDFLAGS)" "$(LDFLAGS_vmlinux)";    \
 	$(if $(ARCH_POSTLINK), $(MAKE) -f $(ARCH_POSTLINK) $@, true)

+ifndef KBUILD_MIXED_TREE
 vmlinux: scripts/link-vmlinux.sh autoksyms_recursive $(vmlinux-deps) FORCE
 	+$(call if_changed_dep,link-vmlinux)
+endif

 targets := vmlinux

@@ -1225,7 +1289,8 @@ targets := vmlinux
 $(sort $(vmlinux-deps) $(subdir-modorder)): descend ;

 filechk_kernel.release = \
-	echo "$(KERNELVERSION)$$($(CONFIG_SHELL) $(srctree)/scripts/setlocalversion $(srctree))"
+	echo "$(KERNELVERSION)$$($(CONFIG_SHELL) $(srctree)/scripts/setlocalversion \
+		$(srctree) $(BRANCH) $(KMI_GENERATION))"

 # Store (new) KERNELRELEASE string in include/config/kernel.release
 include/config/kernel.release: FORCE
@@ -1315,32 +1380,6 @@ headerdep:
 	$(Q)find $(srctree)/include/ -name '*.h' | xargs --max-args 1 \
 	$(srctree)/scripts/headerdep.pl -I$(srctree)/include

-# ---------------------------------------------------------------------------
-# Kernel headers
-
-#Default location for installed headers
-export INSTALL_HDR_PATH = $(objtree)/usr
-
-quiet_cmd_headers_install = INSTALL $(INSTALL_HDR_PATH)/include
-      cmd_headers_install = \
-	mkdir -p $(INSTALL_HDR_PATH); \
-	rsync -mrl --include='*/' --include='*\.h' --exclude='*' \
-	usr/include $(INSTALL_HDR_PATH)
-
-PHONY += headers_install
-headers_install: headers
-	$(call cmd,headers_install)
-
-PHONY += archheaders archscripts
-
-hdr-inst := -f $(srctree)/scripts/Makefile.headersinst obj
-
-PHONY += headers
-headers: $(version_h) scripts_unifdef uapi-asm-generic archheaders archscripts
-	$(if $(filter um, $(SRCARCH)), $(error Headers not exportable for UML))
-	$(Q)$(MAKE) $(hdr-inst)=include/uapi
-	$(Q)$(MAKE) $(hdr-inst)=arch/$(SRCARCH)/include/uapi
-
 # Deprecated. It is no-op now.
 PHONY += headers_check
 headers_check:
@@ -1426,7 +1465,9 @@ kselftest-merge:
 # Devicetree files

 ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/boot/dts/),)
-dtstree := arch/$(SRCARCH)/boot/dts
+# ANDROID: allow this to be overridden by the build environment. This allows
+# one to compile a device tree that is located out-of-tree.
+dtstree ?= arch/$(SRCARCH)/boot/dts
 endif

 ifneq ($(dtstree),)
@@ -1492,7 +1533,9 @@ endif
 # using awk while concatenating to the final file.

 PHONY += modules
-modules: $(if $(KBUILD_BUILTIN),vmlinux) modules_check modules_prepare
+# if KBUILD_BUILTIN && !KBUILD_MIXED_TREE, depend on vmlinux
+modules: $(if $(KBUILD_BUILTIN), $(if $(KBUILD_MIXED_TREE),,vmlinux))
+modules: modules_check modules_prepare

 cmd_modules_order = $(AWK) '!x[$$0]++' $(real-prereqs) > $@

@@ -1537,8 +1580,8 @@ __modinst_pre:
 		ln -s $(CURDIR) $(MODLIB)/build ; \
 	fi
 	@sed 's:^:kernel/:' modules.order > $(MODLIB)/modules.order
-	@cp -f modules.builtin $(MODLIB)/
-	@cp -f $(objtree)/modules.builtin.modinfo $(MODLIB)/
+	@cp -f $(mixed-build-prefix)modules.builtin $(MODLIB)/
+	@cp -f $(or $(mixed-build-prefix),$(objtree)/)modules.builtin.modinfo $(MODLIB)/

 endif # CONFIG_MODULES

@@ -1799,6 +1842,8 @@ help:
 	@echo  ''
 	@echo  '  modules         - default target, build the module(s)'
 	@echo  '  modules_install - install the module'
+	@echo  '  headers_install - Install sanitised kernel headers to INSTALL_HDR_PATH'
+	@echo  '                    (default: $(abspath $(INSTALL_HDR_PATH)))'
 	@echo  '  clean           - remove generated files in module directory only'
 	@echo  ''

@@ -1823,7 +1868,7 @@ modules_check: $(MODORDER)

 quiet_cmd_depmod = DEPMOD  $(MODLIB)
      cmd_depmod = $(CONFIG_SHELL) $(srctree)/scripts/depmod.sh $(DEPMOD) \
-                   $(KERNELRELEASE)
+                   $(KERNELRELEASE) $(mixed-build-prefix)

 modules_install:
 	$(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modinst
@@ -1860,7 +1905,8 @@ ifdef single-build

 # .ko is special because modpost is needed
 single-ko := $(sort $(filter %.ko, $(MAKECMDGOALS)))
-single-no-ko := $(sort $(patsubst %.ko,%.mod, $(MAKECMDGOALS)))
+single-no-ko := $(filter-out $(single-ko), $(MAKECMDGOALS)) \
+		$(foreach x, o mod, $(patsubst %.ko, %.$x, $(single-ko)))

 $(single-ko): single_modpost
 	@:
@@ -1902,7 +1948,7 @@ descend: $(build-dirs)
 $(build-dirs): prepare
 	$(Q)$(MAKE) $(build)=$@ \
 	single-build=$(if $(filter-out $@/, $(filter $@/%, $(KBUILD_SINGLE_TARGETS))),1) \
-	need-builtin=1 need-modorder=1
+	$(if $(KBUILD_MIXED_TREE),,need-builtin=1) need-modorder=1

 clean-dirs := $(addprefix _clean_, $(clean-dirs))
 PHONY += $(clean-dirs) clean
@@ -1911,12 +1957,14 @@ $(clean-dirs):

 clean: $(clean-dirs)
 	$(call cmd,rmfiles)
-	@find $(if $(KBUILD_EXTMOD), $(KBUILD_EXTMOD), .) $(RCS_FIND_IGNORE) \
+	@find $(if $(KBUILD_EXTMOD), $(KBUILD_EXTMOD), .) \
+		$(if $(filter-out arch/$(SRCARCH)/boot/dts, $(dtstree)), $(dtstree)) \
+		$(RCS_FIND_IGNORE) \
 		\( -name '*.[aios]' -o -name '*.ko' -o -name '.*.cmd' \
 		-o -name '*.ko.*' \
 		-o -name '*.dtb' -o -name '*.dtbo' -o -name '*.dtb.S' -o -name '*.dt.yaml' \
 		-o -name '*.dwo' -o -name '*.lst' \
-		-o -name '*.su' -o -name '*.mod' \
+		-o -name '*.su' -o -name '*.mod' -o -name '*.usyms' \
 		-o -name '.*.d' -o -name '.*.tmp' -o -name '*.mod.c' \
 		-o -name '*.lex.c' -o -name '*.tab.[ch]' \
 		-o -name '*.asn1.[ch]' \
@@ -2007,7 +2055,8 @@ checkstack:
 	$(PERL) $(srctree)/scripts/checkstack.pl $(CHECKSTACK_ARCH)

 kernelrelease:
-	@echo "$(KERNELVERSION)$$($(CONFIG_SHELL) $(srctree)/scripts/setlocalversion $(srctree))"
+	@echo "$(KERNELVERSION)$$($(CONFIG_SHELL) $(srctree)/scripts/setlocalversion \
+		$(srctree) $(BRANCH) $(KMI_GENERATION))"

 kernelversion:
 	@echo $(KERNELVERSION)
--- a/2
+++ b/2
@@ -0,0 +1,2 @@
+# include OWNERS from the authoritative android-mainline branch
+include kernel/common:android-mainline:/OWNERS
--- a/android/OWNERS
+++ b/android/OWNERS
@@ -0,0 +1,8 @@
+# If we ever add another OWNERS above this directory, it's likely to be
+# more permissive, so don't inherit from it
+set noparent
+include kernel/common:android-mainline:/OWNERS_DrNo
+
+# Downstream boards maintained directly in this manifest branch
+per-file abi_gki_aarch64_cuttlefish = adelva@google.com, rammuthiah@google.com
+per-file abi_gki_aarch64_goldfish = rkir@google.com
--- a/android/abi_gki_aarch64
+++ b/android/abi_gki_aarch64
@@ -0,0 +1,4 @@
+[abi_symbol_list]
+# commonly used symbols
+  module_layout
+  __put_task_struct
--- a/android/abi_gki_aarch64_db845c
+++ b/android/abi_gki_aarch64_db845c
--- a/android/abi_gki_aarch64_exynos
+++ b/android/abi_gki_aarch64_exynos
--- a/android/abi_gki_aarch64_pixel
+++ b/android/abi_gki_aarch64_pixel
--- a/android/abi_gki_aarch64_virtual_device
+++ b/android/abi_gki_aarch64_virtual_device
--- a/android/abi_gki_protected_exports
+++ b/android/abi_gki_protected_exports
@@ -0,0 +1,517 @@
+__cfg80211_alloc_event_skb
+__cfg80211_alloc_reply_skb
+__cfg80211_radar_event
+__cfg80211_send_event_skb
+__hci_cmd_send
+__hci_cmd_sync
+__hci_cmd_sync_ev
+__nfc_alloc_vendor_cmd_reply_skb
+alloc_can_err_skb
+alloc_can_skb
+alloc_candev_mqs
+alloc_canfd_skb
+arc4_crypt
+arc4_setkey
+baswap
+bridge_tunnel_header
+bt_accept_dequeue
+bt_accept_enqueue
+bt_accept_unlink
+bt_debugfs
+bt_err
+bt_err_ratelimited
+bt_info
+bt_procfs_cleanup
+bt_procfs_init
+bt_sock_ioctl
+bt_sock_link
+bt_sock_poll
+bt_sock_reclassify_lock
+bt_sock_recvmsg
+bt_sock_register
+bt_sock_stream_recvmsg
+bt_sock_unlink
+bt_sock_unregister
+bt_sock_wait_ready
+bt_sock_wait_state
+bt_to_errno
+bt_warn
+bt_warn_ratelimited
+btbcm_check_bdaddr
+btbcm_finalize
+btbcm_initialize
+btbcm_patchram
+btbcm_read_pcm_int_params
+btbcm_set_bdaddr
+btbcm_setup_apple
+btbcm_setup_patchram
+btbcm_write_pcm_int_params
+can_bus_off
+can_change_mtu
+can_change_state
+can_fd_dlc2len
+can_fd_len2dlc
+can_free_echo_skb
+can_get_echo_skb
+can_get_state_str
+can_proto_register
+can_proto_unregister
+can_put_echo_skb
+can_rx_offload_add_fifo
+can_rx_offload_add_manual
+can_rx_offload_add_timestamp
+can_rx_offload_del
+can_rx_offload_enable
+can_rx_offload_get_echo_skb
+can_rx_offload_irq_finish
+can_rx_offload_irq_offload_fifo
+can_rx_offload_irq_offload_timestamp
+can_rx_offload_queue_sorted
+can_rx_offload_queue_tail
+can_rx_offload_threaded_irq_finish
+can_rx_register
+can_rx_unregister
+can_send
+can_skb_get_frame_len
+can_sock_destruct
+cfg80211_any_usable_channels
+cfg80211_assoc_comeback
+cfg80211_assoc_failure
+cfg80211_auth_timeout
+cfg80211_background_cac_abort
+cfg80211_bss_color_notify
+cfg80211_bss_flush
+cfg80211_bss_iter
+cfg80211_cac_event
+cfg80211_calculate_bitrate
+cfg80211_ch_switch_notify
+cfg80211_ch_switch_started_notify
+cfg80211_chandef_compatible
+cfg80211_chandef_create
+cfg80211_chandef_dfs_required
+cfg80211_chandef_usable
+cfg80211_chandef_valid
+cfg80211_check_combinations
+cfg80211_check_station_change
+cfg80211_classify8021d
+cfg80211_conn_failed
+cfg80211_connect_done
+cfg80211_control_port_tx_status
+cfg80211_cqm_beacon_loss_notify
+cfg80211_cqm_pktloss_notify
+cfg80211_cqm_rssi_notify
+cfg80211_cqm_txe_notify
+cfg80211_crit_proto_stopped
+cfg80211_del_sta_sinfo
+cfg80211_disconnected
+cfg80211_external_auth_request
+cfg80211_find_elem_match
+cfg80211_find_vendor_elem
+cfg80211_free_nan_func
+cfg80211_ft_event
+cfg80211_get_bss
+cfg80211_get_drvinfo
+cfg80211_get_iftype_ext_capa
+cfg80211_get_p2p_attr
+cfg80211_get_station
+cfg80211_gtk_rekey_notify
+cfg80211_ibss_joined
+cfg80211_iftype_allowed
+cfg80211_inform_bss_data
+cfg80211_inform_bss_frame_data
+cfg80211_is_element_inherited
+cfg80211_iter_combinations
+cfg80211_merge_profile
+cfg80211_mgmt_tx_status_ext
+cfg80211_michael_mic_failure
+cfg80211_nan_func_terminated
+cfg80211_nan_match
+cfg80211_new_sta
+cfg80211_notify_new_peer_candidate
+cfg80211_pmksa_candidate_notify
+cfg80211_pmsr_complete
+cfg80211_pmsr_report
+cfg80211_port_authorized
+cfg80211_probe_status
+cfg80211_put_bss
+cfg80211_ready_on_channel
+cfg80211_ref_bss
+cfg80211_reg_can_beacon
+cfg80211_reg_can_beacon_relax
+cfg80211_register_netdevice
+cfg80211_remain_on_channel_expired
+cfg80211_report_obss_beacon_khz
+cfg80211_report_wowlan_wakeup
+cfg80211_roamed
+cfg80211_rx_assoc_resp
+cfg80211_rx_control_port
+cfg80211_rx_mgmt_ext
+cfg80211_rx_mlme_mgmt
+cfg80211_rx_spurious_frame
+cfg80211_rx_unexpected_4addr_frame
+cfg80211_rx_unprot_mlme_mgmt
+cfg80211_scan_done
+cfg80211_sched_scan_results
+cfg80211_sched_scan_stopped
+cfg80211_sched_scan_stopped_locked
+cfg80211_send_layer2_update
+cfg80211_shutdown_all_interfaces
+cfg80211_sinfo_alloc_tid_stats
+cfg80211_sta_opmode_change_notify
+cfg80211_stop_iface
+cfg80211_tdls_oper_request
+cfg80211_tx_mgmt_expired
+cfg80211_tx_mlme_mgmt
+cfg80211_unlink_bss
+cfg80211_unregister_wdev
+cfg80211_update_owe_info_event
+cfg80211_vendor_cmd_get_sender
+cfg80211_vendor_cmd_reply
+close_candev
+free_candev
+freq_reg_info
+get_wiphy_regdom
+h4_recv_buf
+hci_alloc_dev_priv
+hci_cmd_sync
+hci_conn_check_secure
+hci_conn_security
+hci_conn_switch_role
+hci_free_dev
+hci_get_route
+hci_mgmt_chan_register
+hci_mgmt_chan_unregister
+hci_recv_diag
+hci_recv_frame
+hci_register_cb
+hci_register_dev
+hci_release_dev
+hci_reset_dev
+hci_resume_dev
+hci_set_fw_info
+hci_set_hw_info
+hci_suspend_dev
+hci_uart_register_device
+hci_uart_tx_wakeup
+hci_uart_unregister_device
+hci_unregister_cb
+hci_unregister_dev
+hidp_hid_driver
+ieee80211_alloc_hw_nm
+ieee80211_amsdu_to_8023s
+ieee80211_ap_probereq_get
+ieee80211_ave_rssi
+ieee80211_beacon_cntdwn_is_complete
+ieee80211_beacon_get_template
+ieee80211_beacon_get_tim
+ieee80211_beacon_loss
+ieee80211_beacon_set_cntdwn
+ieee80211_beacon_update_cntdwn
+ieee80211_bss_get_elem
+ieee80211_calc_rx_airtime
+ieee80211_calc_tx_airtime
+ieee80211_chandef_to_operating_class
+ieee80211_channel_to_freq_khz
+ieee80211_chswitch_done
+ieee80211_color_change_finish
+ieee80211_connection_loss
+ieee80211_cqm_beacon_loss_notify
+ieee80211_cqm_rssi_notify
+ieee80211_csa_finish
+ieee80211_ctstoself_duration
+ieee80211_ctstoself_get
+ieee80211_data_to_8023_exthdr
+ieee80211_disable_rssi_reports
+ieee80211_disconnect
+ieee80211_enable_rssi_reports
+ieee80211_find_sta
+ieee80211_find_sta_by_ifaddr
+ieee80211_free_hw
+ieee80211_free_txskb
+ieee80211_freq_khz_to_channel
+ieee80211_generic_frame_duration
+ieee80211_get_bssid
+ieee80211_get_buffered_bc
+ieee80211_get_channel_khz
+ieee80211_get_fils_discovery_tmpl
+ieee80211_get_hdrlen_from_skb
+ieee80211_get_key_rx_seq
+ieee80211_get_mesh_hdrlen
+ieee80211_get_num_supported_channels
+ieee80211_get_response_rate
+ieee80211_get_tkip_p1k_iv
+ieee80211_get_tkip_p2k
+ieee80211_get_tkip_rx_p1k
+ieee80211_get_tx_rates
+ieee80211_get_unsol_bcast_probe_resp_tmpl
+ieee80211_get_vht_max_nss
+ieee80211_gtk_rekey_add
+ieee80211_gtk_rekey_notify
+ieee80211_hdrlen
+ieee80211_ie_split_ric
+ieee80211_iter_chan_contexts_atomic
+ieee80211_iter_keys
+ieee80211_iter_keys_rcu
+ieee80211_iterate_active_interfaces_atomic
+ieee80211_iterate_active_interfaces_mtx
+ieee80211_iterate_interfaces
+ieee80211_iterate_stations_atomic
+ieee80211_key_mic_failure
+ieee80211_key_replay
+ieee80211_manage_rx_ba_offl
+ieee80211_mandatory_rates
+ieee80211_mark_rx_ba_filtered_frames
+ieee80211_nan_func_match
+ieee80211_nan_func_terminated
+ieee80211_next_txq
+ieee80211_nullfunc_get
+ieee80211_operating_class_to_band
+ieee80211_parse_p2p_noa
+ieee80211_probereq_get
+ieee80211_proberesp_get
+ieee80211_pspoll_get
+ieee80211_queue_delayed_work
+ieee80211_queue_stopped
+ieee80211_queue_work
+ieee80211_radar_detected
+ieee80211_radiotap_iterator_init
+ieee80211_radiotap_iterator_next
+ieee80211_rate_control_register
+ieee80211_rate_control_unregister
+ieee80211_ready_on_channel
+ieee80211_register_hw
+ieee80211_remain_on_channel_expired
+ieee80211_remove_key
+ieee80211_report_low_ack
+ieee80211_report_wowlan_wakeup
+ieee80211_request_smps
+ieee80211_reserve_tid
+ieee80211_restart_hw
+ieee80211_resume_disconnect
+ieee80211_return_txq
+ieee80211_rts_duration
+ieee80211_rts_get
+ieee80211_rx_ba_timer_expired
+ieee80211_rx_irqsafe
+ieee80211_rx_list
+ieee80211_rx_napi
+ieee80211_s1g_channel_width
+ieee80211_scan_completed
+ieee80211_sched_scan_results
+ieee80211_sched_scan_stopped
+ieee80211_schedule_txq
+ieee80211_send_bar
+ieee80211_send_eosp_nullfunc
+ieee80211_set_key_rx_seq
+ieee80211_sta_block_awake
+ieee80211_sta_eosp
+ieee80211_sta_ps_transition
+ieee80211_sta_pspoll
+ieee80211_sta_register_airtime
+ieee80211_sta_set_buffered
+ieee80211_sta_uapsd_trigger
+ieee80211_start_tx_ba_cb_irqsafe
+ieee80211_start_tx_ba_session
+ieee80211_stop_queue
+ieee80211_stop_queues
+ieee80211_stop_rx_ba_session
+ieee80211_stop_tx_ba_cb_irqsafe
+ieee80211_stop_tx_ba_session
+ieee80211_tdls_oper_request
+ieee80211_tkip_add_iv
+ieee80211_tx_dequeue
+ieee80211_tx_prepare_skb
+ieee80211_tx_rate_update
+ieee80211_tx_status
+ieee80211_tx_status_8023
+ieee80211_tx_status_ext
+ieee80211_tx_status_irqsafe
+ieee80211_txq_airtime_check
+ieee80211_txq_get_depth
+ieee80211_txq_may_transmit
+ieee80211_txq_schedule_start
+ieee80211_unregister_hw
+ieee80211_unreserve_tid
+ieee80211_update_mu_groups
+ieee80211_update_p2p_noa
+ieee80211_vif_to_wdev
+ieee80211_wake_queue
+ieee80211_wake_queues
+ieee802154_alloc_hw
+ieee802154_free_hw
+ieee802154_hdr_peek
+ieee802154_hdr_peek_addrs
+ieee802154_hdr_pull
+ieee802154_hdr_push
+ieee802154_max_payload
+ieee802154_register_hw
+ieee802154_rx_irqsafe
+ieee802154_stop_queue
+ieee802154_unregister_hw
+ieee802154_wake_queue
+ieee802154_xmit_complete
+ieeee80211_obss_color_collision_notify
+l2cap_add_psm
+l2cap_chan_close
+l2cap_chan_connect
+l2cap_chan_create
+l2cap_chan_del
+l2cap_chan_list
+l2cap_chan_put
+l2cap_chan_send
+l2cap_chan_set_defaults
+l2cap_conn_get
+l2cap_conn_put
+l2cap_is_socket
+l2cap_register_user
+l2cap_unregister_user
+l2tp_recv_common
+l2tp_session_create
+l2tp_session_dec_refcount
+l2tp_session_delete
+l2tp_session_get
+l2tp_session_get_by_ifname
+l2tp_session_get_nth
+l2tp_session_inc_refcount
+l2tp_session_register
+l2tp_session_set_header_len
+l2tp_sk_to_tunnel
+l2tp_tunnel_create
+l2tp_tunnel_dec_refcount
+l2tp_tunnel_delete
+l2tp_tunnel_get
+l2tp_tunnel_get_nth
+l2tp_tunnel_get_session
+l2tp_tunnel_inc_refcount
+l2tp_tunnel_register
+l2tp_udp_encap_recv
+l2tp_xmit_skb
+lowpan_header_compress
+lowpan_header_decompress
+lowpan_nhc_add
+lowpan_nhc_del
+lowpan_register_netdev
+lowpan_register_netdevice
+lowpan_unregister_netdev
+lowpan_unregister_netdevice
+nfc_add_se
+nfc_alloc_recv_skb
+nfc_allocate_device
+nfc_class
+nfc_dep_link_is_up
+nfc_driver_failure
+nfc_find_se
+nfc_fw_download_done
+nfc_get_local_general_bytes
+nfc_proto_register
+nfc_proto_unregister
+nfc_register_device
+nfc_remove_se
+nfc_se_connectivity
+nfc_se_transaction
+nfc_send_to_raw_sock
+nfc_set_remote_general_bytes
+nfc_target_lost
+nfc_targets_found
+nfc_tm_activated
+nfc_tm_data_received
+nfc_tm_deactivated
+nfc_unregister_device
+nfc_vendor_cmd_reply
+of_can_transceiver
+open_candev
+ppp_channel_index
+ppp_dev_name
+ppp_input
+ppp_input_error
+ppp_output_wakeup
+ppp_register_channel
+ppp_register_compressor
+ppp_register_net_channel
+ppp_unit_number
+ppp_unregister_channel
+ppp_unregister_compressor
+pppox_compat_ioctl
+pppox_ioctl
+pppox_unbind_sock
+qca_read_soc_version
+qca_send_pre_shutdown_cmd
+qca_set_bdaddr
+qca_set_bdaddr_rome
+qca_uart_setup
+rate_control_set_rates
+reg_initiator_name
+reg_query_regdb_wmm
+register_candev
+register_pppox_proto
+regulatory_hint
+regulatory_pre_cac_allowed
+regulatory_set_wiphy_regd
+regulatory_set_wiphy_regd_sync
+rfc1042_header
+rfkill_alloc
+rfkill_blocked
+rfkill_destroy
+rfkill_find_type
+rfkill_get_led_trigger_name
+rfkill_init_sw_state
+rfkill_pause_polling
+rfkill_register
+rfkill_resume_polling
+rfkill_set_hw_state_reason
+rfkill_set_led_trigger_name
+rfkill_set_states
+rfkill_set_sw_state
+rfkill_unregister
+safe_candev_priv
+slhc_compress
+slhc_free
+slhc_init
+slhc_remember
+slhc_toss
+slhc_uncompress
+tipc_dump_done
+tipc_dump_start
+tipc_nl_sk_walk
+tipc_sk_fill_sock_diag
+unregister_candev
+unregister_pppox_proto
+usb_serial_claim_interface
+usb_serial_deregister_drivers
+usb_serial_generic_chars_in_buffer
+usb_serial_generic_close
+usb_serial_generic_get_icount
+usb_serial_generic_open
+usb_serial_generic_process_read_urb
+usb_serial_generic_read_bulk_callback
+usb_serial_generic_resume
+usb_serial_generic_submit_read_urbs
+usb_serial_generic_throttle
+usb_serial_generic_tiocmiwait
+usb_serial_generic_unthrottle
+usb_serial_generic_wait_until_sent
+usb_serial_generic_write
+usb_serial_generic_write_bulk_callback
+usb_serial_generic_write_start
+usb_serial_handle_dcd_change
+usb_serial_port_softint
+usb_serial_register_drivers
+usb_serial_resume
+usb_serial_suspend
+wdev_chandef
+wdev_to_ieee80211_vif
+wiphy_apply_custom_regulatory
+wiphy_free
+wiphy_new_nm
+wiphy_read_of_freq_limits
+wiphy_register
+wiphy_rfkill_set_hw_state_reason
+wiphy_rfkill_start_polling
+wiphy_to_ieee80211_hw
+wiphy_unregister
+wpan_phy_find
+wpan_phy_for_each
+wpan_phy_free
+wpan_phy_new
+wpan_phy_register
+wpan_phy_unregister
--- a/android/abi_gki_rockpi4
+++ b/android/abi_gki_rockpi4
@@ -0,0 +1,4 @@
+[abi_symbol_list]
+# commonly used symbols
+  module_layout
+  __put_task_struct
--- a/android/gki_aarch64_modules
+++ b/android/gki_aarch64_modules
@@ -0,0 +1,48 @@
+mm/zsmalloc.ko
+drivers/block/zram/zram.ko
+drivers/net/can/dev/can-dev.ko
+drivers/net/can/vcan.ko
+drivers/net/can/slcan.ko
+drivers/net/ppp/ppp_generic.ko
+drivers/net/ppp/bsd_comp.ko
+drivers/net/ppp/ppp_deflate.ko
+drivers/net/ppp/ppp_mppe.ko
+drivers/net/ppp/pppox.ko
+drivers/net/ppp/pptp.ko
+drivers/net/slip/slhc.ko
+drivers/usb/class/cdc-acm.ko
+drivers/usb/serial/usbserial.ko
+drivers/usb/serial/ftdi_sio.ko
+drivers/bluetooth/hci_uart.ko
+drivers/bluetooth/btsdio.ko
+drivers/bluetooth/btbcm.ko
+drivers/bluetooth/btqca.ko
+net/8021q/8021q.ko
+net/wireless/cfg80211.ko
+net/can/can.ko
+net/can/can-raw.ko
+net/can/can-bcm.ko
+net/can/can-gw.ko
+net/bluetooth/bluetooth.ko
+net/bluetooth/rfcomm/rfcomm.ko
+net/bluetooth/hidp/hidp.ko
+net/l2tp/l2tp_core.ko
+net/l2tp/l2tp_ppp.ko
+net/mac80211/mac80211.ko
+net/tipc/tipc.ko
+net/tipc/diag.ko
+net/rfkill/rfkill.ko
+net/6lowpan/6lowpan.ko
+net/6lowpan/nhc_dest.ko
+net/6lowpan/nhc_fragment.ko
+net/6lowpan/nhc_hop.ko
+net/6lowpan/nhc_ipv6.ko
+net/6lowpan/nhc_mobility.ko
+net/6lowpan/nhc_routing.ko
+net/6lowpan/nhc_udp.ko
+net/ieee802154/6lowpan/ieee802154_6lowpan.ko
+net/ieee802154/ieee802154.ko
+net/ieee802154/ieee802154_socket.ko
+net/mac802154/mac802154.ko
+net/nfc/nfc.ko
+lib/crypto/libarc4.ko
--- a/android/gki_protected_modules
+++ b/android/gki_protected_modules
@@ -0,0 +1,47 @@
+drivers/bluetooth/btbcm.ko
+drivers/bluetooth/btqca.ko
+drivers/bluetooth/btsdio.ko
+drivers/bluetooth/hci_uart.ko
+drivers/net/can/dev/can-dev.ko
+drivers/net/can/slcan.ko
+drivers/net/can/vcan.ko
+drivers/net/ppp/bsd_comp.ko
+drivers/net/ppp/ppp_deflate.ko
+drivers/net/ppp/ppp_generic.ko
+drivers/net/ppp/ppp_mppe.ko
+drivers/net/ppp/pppox.ko
+drivers/net/ppp/pptp.ko
+drivers/net/slip/slhc.ko
+drivers/usb/class/cdc-acm.ko
+drivers/usb/serial/ftdi_sio.ko
+drivers/usb/serial/usbserial.ko
+lib/crypto/libarc4.ko
+net/6lowpan/6lowpan.ko
+net/6lowpan/nhc_dest.ko
+net/6lowpan/nhc_fragment.ko
+net/6lowpan/nhc_hop.ko
+net/6lowpan/nhc_ipv6.ko
+net/6lowpan/nhc_mobility.ko
+net/6lowpan/nhc_routing.ko
+net/6lowpan/nhc_udp.ko
+net/8021q/8021q.ko
+net/bluetooth/bluetooth.ko
+net/bluetooth/hidp/hidp.ko
+net/bluetooth/rfcomm/rfcomm.ko
+net/can/can.ko
+net/can/can-bcm.ko
+net/can/can-gw.ko
+net/can/can-raw.ko
+net/ieee802154/6lowpan/ieee802154_6lowpan.ko
+net/ieee802154/ieee802154.ko
+net/ieee802154/ieee802154_socket.ko
+net/l2tp/l2tp_core.ko
+net/l2tp/l2tp_ppp.ko
+net/mac80211/mac80211.ko
+net/mac802154/mac802154.ko
+net/nfc/nfc.ko
+net/rfkill/rfkill.ko
+net/tipc/diag.ko
+net/tipc/tipc.ko
+net/wireless/cfg80211.ko
+
--- a/android/gki_system_dlkm_modules
+++ b/android/gki_system_dlkm_modules
@@ -0,0 +1,49 @@
+drivers/block/zram/zram.ko
+drivers/bluetooth/btbcm.ko
+drivers/bluetooth/btqca.ko
+drivers/bluetooth/btsdio.ko
+drivers/bluetooth/hci_uart.ko
+drivers/net/can/dev/can-dev.ko
+drivers/net/can/slcan.ko
+drivers/net/can/vcan.ko
+drivers/net/ppp/bsd_comp.ko
+drivers/net/ppp/ppp_deflate.ko
+drivers/net/ppp/ppp_generic.ko
+drivers/net/ppp/ppp_mppe.ko
+drivers/net/ppp/pppox.ko
+drivers/net/ppp/pptp.ko
+drivers/net/slip/slhc.ko
+drivers/usb/class/cdc-acm.ko
+drivers/usb/serial/ftdi_sio.ko
+drivers/usb/serial/usbserial.ko
+lib/crypto/libarc4.ko
+mm/zsmalloc.ko
+net/6lowpan/6lowpan.ko
+net/6lowpan/nhc_dest.ko
+net/6lowpan/nhc_fragment.ko
+net/6lowpan/nhc_hop.ko
+net/6lowpan/nhc_ipv6.ko
+net/6lowpan/nhc_mobility.ko
+net/6lowpan/nhc_routing.ko
+net/6lowpan/nhc_udp.ko
+net/8021q/8021q.ko
+net/bluetooth/bluetooth.ko
+net/bluetooth/hidp/hidp.ko
+net/bluetooth/rfcomm/rfcomm.ko
+net/can/can.ko
+net/can/can-bcm.ko
+net/can/can-gw.ko
+net/can/can-raw.ko
+net/ieee802154/6lowpan/ieee802154_6lowpan.ko
+net/ieee802154/ieee802154.ko
+net/ieee802154/ieee802154_socket.ko
+net/l2tp/l2tp_core.ko
+net/l2tp/l2tp_ppp.ko
+net/mac80211/mac80211.ko
+net/mac802154/mac802154.ko
+net/nfc/nfc.ko
+net/rfkill/rfkill.ko
+net/tipc/diag.ko
+net/tipc/tipc.ko
+net/wireless/cfg80211.ko
+
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -24,6 +24,13 @@ config KEXEC_ELF
 config HAVE_IMA_KEXEC
 	bool

+config ARCH_HAS_SUBPAGE_FAULTS
+	bool
+	help
+	  Select if the architecture can check permissions at sub-page
+	  granularity (e.g. arm64 MTE). The probe_user_*() functions
+	  must be implemented.
+
 config SET_FS
 	bool

@@ -713,10 +720,7 @@ config ARCH_SUPPORTS_CFI_CLANG
 config CFI_CLANG
 	bool "Use Clang's Control Flow Integrity (CFI)"
 	depends on LTO_CLANG && ARCH_SUPPORTS_CFI_CLANG
-	# Clang >= 12:
-	# - https://bugs.llvm.org/show_bug.cgi?id=46258
-	# - https://bugs.llvm.org/show_bug.cgi?id=47479
-	depends on CLANG_VERSION >= 120000
+	depends on CLANG_VERSION >= 140000
 	select KALLSYMS
 	help
 	  This option enables Clang’s forward-edge Control Flow Integrity
@@ -1238,6 +1242,9 @@ config RELR
 config ARCH_HAS_MEM_ENCRYPT
 	bool

+config ARCH_HAS_MEM_RELINQUISH
+	bool
+
 config ARCH_HAS_CC_PLATFORM
 	bool

@@ -1295,6 +1302,17 @@ config ARCH_HAS_ELFCORE_COMPAT
 config ARCH_HAS_PARANOID_L1D_FLUSH
 	bool

+config ARCH_HAVE_TRACE_MMIO_ACCESS
+	bool
+
+config ARCH_HAS_NONLEAF_PMD_YOUNG
+	bool
+	help
+	  Architectures that select this option are capable of setting the
+	  accessed bit in non-leaf PMD entries when using them as part of linear
+	  address translations. Page table walkers that clear the accessed bit
+	  may use this capability to reduce their search space.
+
 source "kernel/gcov/Kconfig"

 source "scripts/gcc-plugins/Kconfig"
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -54,6 +54,7 @@ config ARM
 	select GENERIC_ATOMIC64 if CPU_V7M || CPU_V6 || !CPU_32v6K || !AEABI
 	select GENERIC_CLOCKEVENTS_BROADCAST if SMP
 	select GENERIC_IRQ_IPI if SMP
+	select ARCH_WANTS_IRQ_RAW if GENERIC_IRQ_IPI
 	select GENERIC_CPU_AUTOPROBE
 	select GENERIC_EARLY_IOREMAP
 	select GENERIC_IDLE_POLL_SETUP
--- a/arch/arm/OWNERS
+++ b/arch/arm/OWNERS
@@ -0,0 +1 @@
+include ../arm64/OWNERS
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -77,10 +77,10 @@ CPPFLAGS_vmlinux.lds += -DTEXT_OFFSET="$(TEXT_OFFSET)"
 CPPFLAGS_vmlinux.lds += -DMALLOC_SIZE="$(MALLOC_SIZE)"

 compress-$(CONFIG_KERNEL_GZIP) = gzip
-compress-$(CONFIG_KERNEL_LZO)  = lzo
-compress-$(CONFIG_KERNEL_LZMA) = lzma
-compress-$(CONFIG_KERNEL_XZ)   = xzkern
-compress-$(CONFIG_KERNEL_LZ4)  = lz4
+compress-$(CONFIG_KERNEL_LZO)  = lzo_with_size
+compress-$(CONFIG_KERNEL_LZMA) = lzma_with_size
+compress-$(CONFIG_KERNEL_XZ)   = xzkern_with_size
+compress-$(CONFIG_KERNEL_LZ4)  = lz4_with_size

 libfdt_objs := fdt_rw.o fdt_ro.o fdt_wip.o fdt.o

--- a/arch/arm/include/asm/hypervisor.h
+++ b/arch/arm/include/asm/hypervisor.h
@@ -6,5 +6,6 @@

 void kvm_init_hyp_services(void);
 bool kvm_arm_hyp_service_available(u32 func_id);
+void kvm_arm_init_hyp_services(void);

 #endif
--- a/arch/arm/kernel/perf_callchain.c
+++ b/arch/arm/kernel/perf_callchain.c
@@ -62,14 +62,8 @@ user_backtrace(struct frame_tail __user *tail,
 void
 perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs)
 {
-	struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
 	struct frame_tail __user *tail;

-	if (guest_cbs && guest_cbs->is_in_guest()) {
-		/* We don't support guest os callchain now */
-		return;
-	}
-
 	perf_callchain_store(entry, regs->ARM_pc);

 	if (!current->mm)
@@ -99,44 +93,25 @@ callchain_trace(struct stackframe *fr,
 void
 perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs)
 {
-	struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
 	struct stackframe fr;

-	if (guest_cbs && guest_cbs->is_in_guest()) {
-		/* We don't support guest os callchain now */
-		return;
-	}
-
 	arm_get_current_stackframe(regs, &fr);
 	walk_stackframe(&fr, callchain_trace, entry);
 }

 unsigned long perf_instruction_pointer(struct pt_regs *regs)
 {
-	struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
-
-	if (guest_cbs && guest_cbs->is_in_guest())
-		return guest_cbs->get_guest_ip();
-
 	return instruction_pointer(regs);
 }

 unsigned long perf_misc_flags(struct pt_regs *regs)
 {
-	struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
 	int misc = 0;

-	if (guest_cbs && guest_cbs->is_in_guest()) {
-		if (guest_cbs->is_user_mode())
-			misc |= PERF_RECORD_MISC_GUEST_USER;
-		else
-			misc |= PERF_RECORD_MISC_GUEST_KERNEL;
-	} else {
-		if (user_mode(regs))
-			misc |= PERF_RECORD_MISC_USER;
-		else
-			misc |= PERF_RECORD_MISC_KERNEL;
-	}
+	if (user_mode(regs))
+		misc |= PERF_RECORD_MISC_USER;
+	else
+		misc |= PERF_RECORD_MISC_KERNEL;

 	return misc;
 }
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -51,6 +51,10 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/ipi.h>

+EXPORT_TRACEPOINT_SYMBOL_GPL(ipi_raise);
+EXPORT_TRACEPOINT_SYMBOL_GPL(ipi_entry);
+EXPORT_TRACEPOINT_SYMBOL_GPL(ipi_exit);
+
 /*
 * as from 2.5, kernels no longer have an init_tasks structure
 * so we need some other way of telling a new secondary core
@@ -727,7 +731,12 @@ void __init set_smp_ipi_range(int ipi_base, int n)
 		WARN_ON(err);

 		ipi_desc[i] = irq_to_desc(ipi_base + i);
-		irq_set_status_flags(ipi_base + i, IRQ_HIDDEN);
+
+		if (i != IPI_RESCHEDULE)
+			irq_set_status_flags(ipi_base + i, IRQ_HIDDEN);
+		else
+			/* The recheduling IPI is special... */
+			irq_set_status_flags(ipi_base + i, IRQ_HIDDEN|IRQ_RAW);
 	}

 	ipi_irq_base = ipi_base;
--- a/arch/arm/mm/kasan_init.c
+++ b/arch/arm/mm/kasan_init.c
@@ -32,7 +32,7 @@ pmd_t tmp_pmd_table[PTRS_PER_PMD] __page_aligned_bss;
 static __init void *kasan_alloc_block(size_t size)
 {
 	return memblock_alloc_try_nid(size, size, __pa(MAX_DMA_ADDRESS),
-				      MEMBLOCK_ALLOC_KASAN, NUMA_NO_NODE);
+				      MEMBLOCK_ALLOC_NOLEAKTRACE, NUMA_NO_NODE);
 }

 static void __init kasan_pte_populate(pmd_t *pmdp, unsigned long addr,
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -10,6 +10,7 @@ config ARM64
 	select ACPI_SPCR_TABLE if ACPI
 	select ACPI_PPTT if ACPI
 	select ARCH_HAS_DEBUG_WX
+	select ARCH_BINFMT_ELF_EXTRA_PHDRS
 	select ARCH_BINFMT_ELF_STATE
 	select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION
 	select ARCH_ENABLE_MEMORY_HOTPLUG
@@ -25,9 +26,12 @@ config ARM64
 	select ARCH_HAS_FORTIFY_SOURCE
 	select ARCH_HAS_GCOV_PROFILE_ALL
 	select ARCH_HAS_GIGANTIC_PAGE
+	select ARCH_HAS_IOREMAP_PHYS_HOOKS
 	select ARCH_HAS_KCOV
 	select ARCH_HAS_KEEPINITRD
 	select ARCH_HAS_MEMBARRIER_SYNC_CORE
+	select ARCH_HAS_MEM_ENCRYPT
+	select ARCH_HAS_MEM_RELINQUISH
 	select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
 	select ARCH_HAS_PTE_DEVMAP
 	select ARCH_HAS_PTE_SPECIAL
@@ -45,6 +49,7 @@ config ARM64
 	select ARCH_HAS_ZONE_DMA_SET if EXPERT
 	select ARCH_HAVE_ELF_PROT
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
+	select ARCH_HAVE_TRACE_MMIO_ACCESS
 	select ARCH_INLINE_READ_LOCK if !PREEMPTION
 	select ARCH_INLINE_READ_LOCK_BH if !PREEMPTION
 	select ARCH_INLINE_READ_LOCK_IRQ if !PREEMPTION
@@ -122,6 +127,7 @@ config ARM64
 	select GENERIC_FIND_FIRST_BIT
 	select GENERIC_IDLE_POLL_SETUP
 	select GENERIC_IRQ_IPI
+	select ARCH_WANTS_IRQ_RAW
 	select GENERIC_IRQ_PROBE
 	select GENERIC_IRQ_SHOW
 	select GENERIC_IRQ_SHOW_LEVEL
@@ -135,6 +141,7 @@ config ARM64
 	select GENERIC_VDSO_TIME_NS
 	select HANDLE_DOMAIN_IRQ
 	select HARDIRQS_SW_RESEND
+	select HAVE_MOD_ARCH_SPECIFIC if (ARM64_MODULE_PLTS || KVM)
 	select HAVE_MOVE_PMD
 	select HAVE_MOVE_PUD
 	select HAVE_PCI
@@ -185,6 +192,7 @@ config ARM64
 	select HAVE_GCC_PLUGINS
 	select HAVE_HW_BREAKPOINT if PERF_EVENTS
 	select HAVE_IRQ_TIME_ACCOUNTING
+	select HAVE_KVM
 	select HAVE_NMI
 	select HAVE_PATA_PLATFORM
 	select HAVE_PERF_EVENTS
@@ -203,7 +211,7 @@ config ARM64
 	select IOMMU_DMA if IOMMU_SUPPORT
 	select IRQ_DOMAIN
 	select IRQ_FORCED_THREADING
-	select KASAN_VMALLOC if KASAN_GENERIC
+	select KASAN_VMALLOC if KASAN
 	select MODULES_USE_ELF_RELA
 	select NEED_DMA_MAP_STATE
 	select NEED_SG_DMA_LENGTH
@@ -221,6 +229,7 @@ config ARM64
 	select HAVE_ARCH_USERFAULTFD_MINOR if USERFAULTFD
 	select TRACE_IRQFLAGS_SUPPORT
 	select TRACE_IRQFLAGS_NMI_SUPPORT
+	select ARCH_SUPPORTS_SPECULATIVE_PAGE_FAULT
 	help
 	  ARM 64-bit (AArch64) Linux support.

@@ -699,6 +708,130 @@ config ARM64_ERRATUM_1508412

 	  If unsure, say Y.

+config ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE
+	bool
+
+config ARM64_ERRATUM_2658417
+	bool "Cortex-A510: 2658417: remove BF16 support due to incorrect result"
+	default y
+	help
+	  This option adds the workaround for ARM Cortex-A510 erratum 2658417.
+	  Affected Cortex-A510 (r0p0 to r1p1) may produce the wrong result for
+	  BFMMLA or VMMLA instructions in rare circumstances when a pair of
+	  A510 CPUs are using shared neon hardware. As the sharing is not
+	  discoverable by the kernel, hide the BF16 HWCAP to indicate that
+	  user-space should not be using these instructions.
+
+	  If unsure, say Y.
+
+config ARM64_ERRATUM_2119858
+	bool "Cortex-A710: 2119858: workaround TRBE overwriting trace data in FILL mode"
+	default y
+	depends on CORESIGHT_TRBE
+	select ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE
+	help
+	  This option adds the workaround for ARM Cortex-A710 erratum 2119858.
+
+	  Affected Cortex-A710 cores could overwrite up to 3 cache lines of trace
+	  data at the base of the buffer (pointed to by TRBASER_EL1) in FILL mode in
+	  the event of a WRAP event.
+
+	  Work around the issue by always making sure we move the TRBPTR_EL1 by
+	  256 bytes before enabling the buffer and filling the first 256 bytes of
+	  the buffer with ETM ignore packets upon disabling.
+
+	  If unsure, say Y.
+
+config ARM64_ERRATUM_2139208
+	bool "Neoverse-N2: 2139208: workaround TRBE overwriting trace data in FILL mode"
+	default y
+	depends on CORESIGHT_TRBE
+	select ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE
+	help
+	  This option adds the workaround for ARM Neoverse-N2 erratum 2139208.
+
+	  Affected Neoverse-N2 cores could overwrite up to 3 cache lines of trace
+	  data at the base of the buffer (pointed to by TRBASER_EL1) in FILL mode in
+	  the event of a WRAP event.
+
+	  Work around the issue by always making sure we move the TRBPTR_EL1 by
+	  256 bytes before enabling the buffer and filling the first 256 bytes of
+	  the buffer with ETM ignore packets upon disabling.
+
+	  If unsure, say Y.
+
+config ARM64_WORKAROUND_TSB_FLUSH_FAILURE
+	bool
+
+config ARM64_ERRATUM_2054223
+	bool "Cortex-A710: 2054223: workaround TSB instruction failing to flush trace"
+	default y
+	select ARM64_WORKAROUND_TSB_FLUSH_FAILURE
+	help
+	  Enable workaround for ARM Cortex-A710 erratum 2054223
+
+	  Affected cores may fail to flush the trace data on a TSB instruction, when
+	  the PE is in trace prohibited state. This will cause losing a few bytes
+	  of the trace cached.
+
+	  Workaround is to issue two TSB consecutively on affected cores.
+
+	  If unsure, say Y.
+
+config ARM64_ERRATUM_2067961
+	bool "Neoverse-N2: 2067961: workaround TSB instruction failing to flush trace"
+	default y
+	select ARM64_WORKAROUND_TSB_FLUSH_FAILURE
+	help
+	  Enable workaround for ARM Neoverse-N2 erratum 2067961
+
+	  Affected cores may fail to flush the trace data on a TSB instruction, when
+	  the PE is in trace prohibited state. This will cause losing a few bytes
+	  of the trace cached.
+
+	  Workaround is to issue two TSB consecutively on affected cores.
+
+	  If unsure, say Y.
+
+config ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE
+	bool
+
+config ARM64_ERRATUM_2253138
+	bool "Neoverse-N2: 2253138: workaround TRBE writing to address out-of-range"
+	depends on CORESIGHT_TRBE
+	default y
+	select ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE
+	help
+	  This option adds the workaround for ARM Neoverse-N2 erratum 2253138.
+
+	  Affected Neoverse-N2 cores might write to an out-of-range address, not reserved
+	  for TRBE. Under some conditions, the TRBE might generate a write to the next
+	  virtually addressed page following the last page of the TRBE address space
+	  (i.e., the TRBLIMITR_EL1.LIMIT), instead of wrapping around to the base.
+
+	  Work around this in the driver by always making sure that there is a
+	  page beyond the TRBLIMITR_EL1.LIMIT, within the space allowed for the TRBE.
+
+	  If unsure, say Y.
+
+config ARM64_ERRATUM_2224489
+	bool "Cortex-A710: 2224489: workaround TRBE writing to address out-of-range"
+	depends on CORESIGHT_TRBE
+	default y
+	select ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE
+	help
+	  This option adds the workaround for ARM Cortex-A710 erratum 2224489.
+
+	  Affected Cortex-A710 cores might write to an out-of-range address, not reserved
+	  for TRBE. Under some conditions, the TRBE might generate a write to the next
+	  virtually addressed page following the last page of the TRBE address space
+	  (i.e., the TRBLIMITR_EL1.LIMIT), instead of wrapping around to the base.
+
+	  Work around this in the driver by always making sure that there is a
+	  page beyond the TRBLIMITR_EL1.LIMIT, within the space allowed for the TRBE.
+
+	  If unsure, say Y.
+
 config ARM64_ERRATUM_2441009
 	bool "Cortex-A510: Completion of affected memory accesses might not be guaranteed by completion of a TLBI"
 	default y
@@ -1639,6 +1772,21 @@ config ARM64_TLB_RANGE
 	  The feature introduces new assembly instructions, and they were
 	  support when binutils >= 2.30.

+config ARM64_MPAM
+	bool "Enable support for MPAM"
+	help
+	  Memory Partitioning and Monitoring is an optional extension
+	  that allows the CPUs to mark load and store transactions with
+	  labels for partition-id and performance-monitoring-group.
+	  System components, such as the caches, can use the partition-id
+	  to apply a performance policy. MPAM monitors can use the
+	  partition-id and performance-monitoring-group to measure the
+	  cache occupancy or data throughput.
+
+	  Use of this extension requires CPU support, support in the
+	  memory system components (MSC), and a description from firmware
+	  of where the MSC are in the address space.
+
 endmenu

 menu "ARMv8.5 architectural features"
@@ -1727,6 +1875,7 @@ config ARM64_MTE
 	depends on AS_HAS_LSE_ATOMICS
 	# Required for tag checking in the uaccess routines
 	depends on ARM64_PAN
+	select ARCH_HAS_SUBPAGE_FAULTS
 	select ARCH_USES_HIGH_VMA_FLAGS
 	help
 	  Memory Tagging (part of the ARMv8.5 Extensions) provides
@@ -1798,7 +1947,6 @@ config ARM64_SVE
 config ARM64_MODULE_PLTS
 	bool "Use PLTs to allow module memory to spill over into vmalloc area"
 	depends on MODULES
-	select HAVE_MOD_ARCH_SPECIFIC
 	help
 	  Allocate PLTs when loading modules so that jumps and calls whose
 	  targets are too far away for their relative offsets to be encoded
@@ -1932,6 +2080,12 @@ config CMDLINE_FROM_BOOTLOADER
 	  the boot loader doesn't provide any, the default kernel command
 	  string provided in CMDLINE will be used.

+config CMDLINE_EXTEND
+	bool "Extend bootloader kernel arguments"
+	help
+	  The command-line arguments provided by the boot loader will be
+	  appended to the default kernel command string.
+
 config CMDLINE_FORCE
 	bool "Always use the default kernel command string"
 	help
--- a/arch/arm64/Kconfig.platforms
+++ b/arch/arm64/Kconfig.platforms
@@ -167,7 +167,6 @@ config ARCH_MEDIATEK
 config ARCH_MESON
 	bool "Amlogic Platforms"
 	select COMMON_CLK
-	select MESON_IRQ_GPIO
 	help
 	  This enables support for the arm64 based Amlogic SoCs
 	  such as the s905, S905X/D, S912, A113X/D or S905X/D2
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -148,7 +148,10 @@ libs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a
 boot		:= arch/arm64/boot
 KBUILD_IMAGE	:= $(boot)/Image.gz

+# Don't compile Image in mixed build with "all" target
+ifndef KBUILD_MIXED_TREE
 all:	Image.gz
+endif


 Image: vmlinux
@@ -189,6 +192,11 @@ archclean:
 	$(Q)$(MAKE) $(clean)=arch/arm64/kernel/vdso
 	$(Q)$(MAKE) $(clean)=arch/arm64/kernel/vdso32

+ifeq ($(CONFIG_KVM),y)
+archscripts:
+	$(Q)$(MAKE) $(build)=arch/arm64/tools gen-hyprel
+endif
+
 ifeq ($(KBUILD_EXTMOD),)
 # We need to generate vdso-offsets.h before compiling certain files in kernel/.
 # In order to do that, we should use the archprepare target, but we can't since
--- a/arch/arm64/Makefile.postlink
+++ b/arch/arm64/Makefile.postlink
@@ -0,0 +1,49 @@
+# SPDX-License-Identifier: GPL-2.0
+
+#
+# This file is included by the generic Kbuild makefile to permit the
+# architecture to perform postlink actions on vmlinux and any .ko module file.
+# In this case, we only need it for fips140.ko, which needs some postprocessing
+# for the integrity check mandated by FIPS. This involves making copies of the
+# relocation sections so that the module will have access to them at
+# initialization time, and calculating and injecting a HMAC digest into the
+# module. All other targets are NOPs.
+#
+
+PHONY := __archpost
+__archpost:
+
+-include include/config/auto.conf
+include scripts/Kbuild.include
+
+CMD_FIPS140_GEN_HMAC = crypto/fips140_gen_hmac
+quiet_cmd_gen_hmac = HMAC    $@
+      cmd_gen_hmac = $(OBJCOPY) $@ \
+	--dump-section=$(shell $(READELF) -SW $@|grep -Eo '\.rela\.text\S*')=$@.rela.text \
+	--dump-section=$(shell $(READELF) -SW $@|grep -Eo '\.rela\.rodata\S*')=$@.rela.rodata && \
+	$(OBJCOPY) $@ \
+	--add-section=.init.rela.text=$@.rela.text \
+	--add-section=.init.rela.rodata=$@.rela.rodata \
+	--set-section-flags=.init.rela.text=alloc,readonly \
+	--set-section-flags=.init.rela.rodata=alloc,readonly && \
+	$(CMD_FIPS140_GEN_HMAC) $@
+
+# `@true` prevents complaints when there is nothing to be done
+
+vmlinux: FORCE
+	@true
+
+$(objtree)/crypto/fips140.ko: FORCE
+	$(call cmd,gen_hmac)
+
+%.ko: FORCE
+	@true
+
+clean:
+	rm -f $(objtree)/crypto/fips140.ko.rela.*
+
+PHONY += FORCE clean
+
+FORCE:
+
+.PHONY: $(PHONY)
--- a/arch/arm64/OWNERS
+++ b/arch/arm64/OWNERS
@@ -0,0 +1,4 @@
+per-file crypto/**=file:/crypto/OWNERS
+per-file {include,kernel,kvm,lib}/**=mzyngier@google.com,willdeacon@google.com
+per-file mm/**=file:/mm/OWNERS
+per-file net/**=file:/net/OWNERS
--- a/arch/arm64/boot/dts/amlogic/Makefile
+++ b/arch/arm64/boot/dts/amlogic/Makefile
@@ -1,12 +1,14 @@
 # SPDX-License-Identifier: GPL-2.0
 dtb-$(CONFIG_ARCH_MESON) += meson-axg-s400.dtb
 dtb-$(CONFIG_ARCH_MESON) += meson-g12a-sei510.dtb
+dtb-$(CONFIG_ARCH_MESON) += meson-g12a-sei510-android.dtb
 dtb-$(CONFIG_ARCH_MESON) += meson-g12a-u200.dtb
 dtb-$(CONFIG_ARCH_MESON) += meson-g12a-x96-max.dtb
 dtb-$(CONFIG_ARCH_MESON) += meson-g12b-gsking-x.dtb
 dtb-$(CONFIG_ARCH_MESON) += meson-g12b-gtking.dtb
 dtb-$(CONFIG_ARCH_MESON) += meson-g12b-gtking-pro.dtb
 dtb-$(CONFIG_ARCH_MESON) += meson-g12b-a311d-khadas-vim3.dtb
+dtb-$(CONFIG_ARCH_MESON) += meson-g12b-a311d-khadas-vim3-android.dtb
 dtb-$(CONFIG_ARCH_MESON) += meson-g12b-s922x-khadas-vim3.dtb
 dtb-$(CONFIG_ARCH_MESON) += meson-g12b-odroid-n2.dtb
 dtb-$(CONFIG_ARCH_MESON) += meson-g12b-odroid-n2-plus.dtb
@@ -50,7 +52,9 @@ dtb-$(CONFIG_ARCH_MESON) += meson-gxm-vega-s96.dtb
 dtb-$(CONFIG_ARCH_MESON) += meson-gxm-wetek-core2.dtb
 dtb-$(CONFIG_ARCH_MESON) += meson-sm1-bananapi-m5.dtb
 dtb-$(CONFIG_ARCH_MESON) += meson-sm1-khadas-vim3l.dtb
+dtb-$(CONFIG_ARCH_MESON) += meson-sm1-khadas-vim3l-android.dtb
 dtb-$(CONFIG_ARCH_MESON) += meson-sm1-odroid-c4.dtb
 dtb-$(CONFIG_ARCH_MESON) += meson-sm1-odroid-hc4.dtb
 dtb-$(CONFIG_ARCH_MESON) += meson-sm1-sei610.dtb
+dtb-$(CONFIG_ARCH_MESON) += meson-sm1-sei610-android.dtb
 dtb-$(CONFIG_ARCH_MESON) += meson-a1-ad401.dtb
--- a/arch/arm64/boot/dts/amlogic/meson-g12a-sei510-android.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-g12a-sei510-android.dts
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Copyright (c) 2020 BayLibre SAS. All rights reserved.
+ */
+
+/dts-v1/;
+/plugin/;
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/gpio/meson-g12a-gpio.h>
+#include <dt-bindings/input/input.h>
+#include <dt-bindings/interrupt-controller/irq.h>
+#include <dt-bindings/interrupt-controller/arm-gic.h>
+
+/ {
+  compatible = "seirobotics,sei510", "amlogic,g12a";
+  model = "SEI Robotics SEI510";
+  fragment@101 {
+        target-path = "/";
+
+        __overlay__ {
+                reserved-memory {
+                        #address-cells = <2>;
+                        #size-cells = <2>;
+                        ramoops@d000000 {
+                                compatible = "ramoops";
+                                reg = <0x0 0x0d000000 0x0 0x00100000>;
+                                record-size = <0x8000>;
+                                console-size = <0x8000>;
+                                ftrace-size = <0x0>;
+                                pmsg-size = <0x8000>;
+                        };
+                };
+
+                adc_keys {
+                        button-onoff {
+                                linux,code = <BTN_0>;
+                        };
+                };
+
+                cvbs-connector {
+                        status = "disabled";
+                };
+        };
+  };
+};
+
+&vddao_3v3_t {
+	gpio-open-drain;
+};
+
+&uart_A {
+	bluetooth {
+        interrupt-parent = <&gpio_intc>;
+        interrupts = <95 IRQ_TYPE_LEVEL_HIGH>;
+		interrupt-names = "host-wakeup";
+    };
+};
--- a/arch/arm64/boot/dts/amlogic/meson-g12b-a311d-khadas-vim3-android.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-g12b-a311d-khadas-vim3-android.dts
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Copyright (c) 2019 BayLibre SAS. All rights reserved.
+ */
+
+/dts-v1/;
+/plugin/;
+
+#include <dt-bindings/phy/phy.h>
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/gpio/meson-g12a-gpio.h>
+#include <dt-bindings/input/input.h>
+#include <dt-bindings/interrupt-controller/irq.h>
+
+/ {
+  compatible = "khadas,vim3", "amlogic,a311d", "amlogic,g12b";
+  model = "Khadas VIM3";
+  fragment@101 {
+        target-path = "/";
+        __overlay__ {
+                reserved-memory {
+                        #address-cells = <2>;
+                        #size-cells = <2>;
+                        ramoops@d000000 {
+                                compatible = "ramoops";
+                                reg = <0x0 0x0d000000 0x0 0x00100000>;
+                                record-size = <0x8000>;
+                                console-size = <0x8000>;
+                                ftrace-size = <0x0>;
+                                pmsg-size = <0x8000>;
+                        };
+                };
+
+        };
+  };
+};
+
+&vcc_5v {
+	gpio-open-drain;
+};
+
+&uart_C {
+        status = "okay";
+        pinctrl-0 = <&uart_c_pins>;
+        pinctrl-names = "default";
+};
+
+&emmc_pwrseq{
+	status = "okay";
+};
+
+&sd_emmc_a {
+     /* WiFi firmware requires power to be kept while in suspend */
+    keep-power-in-suspend;
+};
--- a/arch/arm64/boot/dts/amlogic/meson-sm1-khadas-vim3l-android.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-sm1-khadas-vim3l-android.dts
@@ -0,0 +1,133 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Copyright (c) 2019 BayLibre SAS. All rights reserved.
+ */
+
+/dts-v1/;
+/plugin/;
+
+#include <dt-bindings/phy/phy.h>
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/gpio/meson-g12a-gpio.h>
+#include <dt-bindings/input/input.h>
+#include <dt-bindings/interrupt-controller/irq.h>
+
+/ {
+  compatible = "khadas,vim3l", "amlogic,sm1";
+  model = "Khadas VIM3L";
+  fragment@101 {
+        target-path = "/";
+        __overlay__ {
+                reserved-memory {
+                        #address-cells = <2>;
+                        #size-cells = <2>;
+                        ramoops@d000000 {
+                                compatible = "ramoops";
+                                reg = <0x0 0x0d000000 0x0 0x00100000>;
+                                record-size = <0x8000>;
+                                console-size = <0x8000>;
+                                ftrace-size = <0x0>;
+                                pmsg-size = <0x8000>;
+                        };
+                };
+        };
+  };
+};
+
+&vcc_5v {
+    gpio-open-drain;
+};
+
+&uart_A {
+	bluetooth {
+        interrupt-parent = <&gpio_intc>;
+        interrupts = <95 IRQ_TYPE_LEVEL_HIGH>;
+		interrupt-names = "host-wakeup";
+    };
+};
+
+&uart_C {
+        status = "disabled";
+        pinctrl-0 = <&uart_c_pins>;
+        pinctrl-names = "default";
+};
+
+&emmc_pwrseq{
+	status = "okay";
+};
+
+&sd_emmc_a {
+     /* WiFi firmware requires power to be kept while in suspend */
+    keep-power-in-suspend;
+};
+
+&spicc1 {
+	status = "okay";
+	pinctrl-names = "default";
+	pinctrl-0 = <&spicc1_pins>;
+	cs-gpios = <&gpio GPIOH_6 GPIO_ACTIVE_LOW>;
+	#address-cells = <1>;
+	#size-cells = <0>;
+
+	spidev@0 {
+		compatible = "rohm,dh2228fv";
+		reg = <0>;
+		spi-max-frequency = <500000>;
+		status = "okay";
+	};
+
+	neonkey@0 {
+		compatible = "nanohub";
+		reg = <0>;
+		spi-max-frequency = <500000>;
+
+		sensorhub,nreset-gpio = <&gpio GPIOA_0 0>;
+		sensorhub,boot0-gpio = <&gpio GPIOA_3 0>;   /* Fake */
+		sensorhub,wakeup-gpio = <&gpio GPIOA_2 0>;  /* A2 -> PB9 */
+		sensorhub,irq1-gpio = <&gpio GPIOA_1 0>;    /* A1 -> PB5 */
+		interrupt-parent = <&gpio_intc>;
+		interrupts = <62 IRQ_TYPE_EDGE_RISING>;     /* A1 */
+		/* sensorhub,spi-cs-gpio = <&gpio GPIOH_6 GPIO_ACTIVE_LOW>; Optional */
+		sensorhub,bl-addr = <0x08000000>;
+		sensorhub,kernel-addr = <0x0800C000>;
+		sensorhub,shared-addr = <0x08040000>;
+		sensorhub,flash-banks = <0 0x08000000 0x04000>,
+					<3 0x0800C000 0x04000>,
+					<4 0x08010000 0x10000>,
+					<5 0x08020000 0x20000>,
+					<6 0x08040000 0x20000>,
+					<7 0x08060000 0x20000>;
+		sensorhub,num-flash-banks = <6>;
+		status = "disabled";
+	};
+
+	argonkey@0 {
+		compatible = "nanohub";
+		reg = <0>;
+		spi-max-frequency = <500000>;
+		spi-cpol;
+
+		sensorhub,nreset-gpio = <&gpio GPIOA_0 0>;
+		sensorhub,boot0-gpio = <&gpio GPIOA_3 0>;
+		sensorhub,wakeup-gpio = <&gpio GPIOA_1 0>;  /* A1 -> PA0 */
+		sensorhub,irq1-gpio = <&gpio GPIOA_2 0>;    /* A2 -> PA1 */
+		interrupt-parent = <&gpio_intc>;
+		interrupts = <63 IRQ_TYPE_EDGE_RISING>;     /* A2 */
+		sensorhub,bl-addr = <0x08000000>;
+		sensorhub,kernel-addr = <0x0800C000>;
+		sensorhub,num-flash-banks = <4>;
+		sensorhub,flash-banks =	<0 0x08000000 0x04000>,
+					<3 0x0800C000 0x04000>,
+					<4 0x08010000 0x10000>,
+					<5 0x08020000 0x20000>;
+		sensorhub,shared-addr = <0x08040000>;
+		sensorhub,num-shared-flash-banks = <6>;
+		sensorhub,shared-flash-banks = <6 0x08040000 0x20000>,
+					<7 0x08060000 0x20000>,
+					<8 0x08080000 0x20000>,
+					<9 0x080A0000 0x20000>,
+					<10 0x080C0000 0x20000>,
+					<11 0x080E0000 0x20000>;
+		status = "disabled";
+	};
+};
--- a/arch/arm64/boot/dts/amlogic/meson-sm1-sei610-android.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-sm1-sei610-android.dts
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Copyright (c) 2020 BayLibre SAS. All rights reserved.
+ */
+
+/dts-v1/;
+/plugin/;
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/gpio/meson-g12a-gpio.h>
+#include <dt-bindings/input/input.h>
+#include <dt-bindings/interrupt-controller/irq.h>
+#include <dt-bindings/interrupt-controller/arm-gic.h>
+
+/ {
+  compatible = "seirobotics,sei610", "amlogic,sm1";
+  model = "SEI Robotics SEI610";
+  fragment@101 {
+	target-path = "/";
+	__overlay__ {
+
+		reserved-memory {
+			#address-cells = <2>;
+			#size-cells = <2>;
+			ramoops@d000000 {
+				compatible = "ramoops";
+				reg = <0x0 0x0d000000 0x0 0x00100000>;
+				record-size = <0x8000>;
+				console-size = <0x8000>;
+				ftrace-size = <0x0>;
+				pmsg-size = <0x8000>;
+			};
+		};
+	};
+   };
+};
+
+&vddao_3v3_t {
+	gpio-open-drain;
+};
+
+&emmc_pwrseq {
+	status = "okay";
+};
+
+&sd_emmc_a {
+	/* WiFi firmware requires power to be kept while in suspend */
+	keep-power-in-suspend;
+};
+
+&uart_C {
+        status = "disabled";
+        pinctrl-0 = <&uart_c_pins>;
+        pinctrl-names = "default";
+};
+
+&spicc0 {
+        status = "disabled";
+        pinctrl-names = "default";
+        pinctrl-0 = <&spicc0_x_pins>;
+        cs-gpios = <&gpio GPIOX_10 GPIO_ACTIVE_LOW>;
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        spidev@0 {
+            compatible = "rohm,dh2228fv";
+            reg = <0>;
+            spi-max-frequency = <500000>;
+            status = "disabled";
+        };
+};
--- a/arch/arm64/boot/dts/qcom/qrb5165-rb5.dts
+++ b/arch/arm64/boot/dts/qcom/qrb5165-rb5.dts
@@ -16,6 +16,8 @@
 / {
 	model = "Qualcomm Technologies, Inc. Robotics RB5";
 	compatible = "qcom,qrb5165-rb5", "qcom,sm8250";
+	qcom,msm-id = <455 0x20001>;
+	qcom,board-id = <11 3>;

 	aliases {
 		serial0 = &uart12;
--- a/arch/arm64/boot/dts/qcom/sdm845-db845c.dts
+++ b/arch/arm64/boot/dts/qcom/sdm845-db845c.dts
@@ -17,6 +17,8 @@
 / {
 	model = "Thundercomm Dragonboard 845c";
 	compatible = "thundercomm,db845c", "qcom,sdm845";
+	qcom,msm-id = <341 0x20001>;
+	qcom,board-id = <8 0>;

 	aliases {
 		serial0 = &uart9;
--- a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4b.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4b.dts
@@ -31,7 +31,7 @@
 };

 &uart0 {
-	status = "okay";
+	status = "disabled";

 	bluetooth {
 		compatible = "brcm,bcm43438-bt";
--- a/arch/arm64/configs/16k_gki.fragment
+++ b/arch/arm64/configs/16k_gki.fragment
@@ -0,0 +1,3 @@
+CONFIG_ARM64_16K_PAGES=y
+# b/241785095
+# CONFIG_INCREMENTAL_FS is not set
--- a/Show More
+++ b/Show More
				`@@ -0,0 +1 @@`
				`per-file sysfs-fs-f2fs=file:/fs/f2fs/OWNERS`