Merge branch 'android14-5.15' into arpi-5.15.92

This commit is contained in:
Peter Yoon
2023-02-27 20:11:53 +09:00
1820 changed files with 134442 additions and 22152 deletions

1
.gitignore vendored
View File

@@ -45,6 +45,7 @@
*.symversions
*.tab.[ch]
*.tar
*.usyms
*.xz
*.zst
Module.symvers

566
BUILD.bazel Normal file
View File

@@ -0,0 +1,566 @@
# SPDX-License-Identifier: GPL-2.0
# Copyright (C) 2021 The Android Open Source Project
load("//build/bazel_common_rules/dist:dist.bzl", "copy_to_dist_dir")
load("//build/kernel/kleaf:common_kernels.bzl", "define_common_kernels", "define_db845c")
load(
"//build/kernel/kleaf:kernel.bzl",
"ddk_headers",
"kernel_abi",
"kernel_build",
"kernel_images",
"kernel_modules_install",
"kernel_unstripped_modules_archive",
)
load(":modules.bzl", "COMMON_GKI_MODULES_LIST")
package(
default_visibility = [
"//visibility:public",
],
)
_aarch64_additional_kmi_symbol_lists = [
# keep sorted
"android/abi_gki_aarch64_db845c",
"android/abi_gki_aarch64_exynos",
"android/abi_gki_aarch64_pixel",
"android/abi_gki_aarch64_virtual_device",
]
define_common_kernels(target_configs = {
"kernel_aarch64": {
# TODO(b/188620248): re-enable trimming
"trim_nonlisted_kmi": False,
"kmi_symbol_list_strict_mode": False,
"module_implicit_outs": COMMON_GKI_MODULES_LIST,
"kmi_symbol_list": "android/abi_gki_aarch64",
"additional_kmi_symbol_lists": _aarch64_additional_kmi_symbol_lists,
},
"kernel_aarch64_16k": {
"module_implicit_outs": COMMON_GKI_MODULES_LIST,
},
"kernel_aarch64_debug": {
# TODO(b/188620248): re-enable trimming
"trim_nonlisted_kmi": False,
"kmi_symbol_list_strict_mode": False,
"module_implicit_outs": COMMON_GKI_MODULES_LIST,
"kmi_symbol_list": "android/abi_gki_aarch64",
"additional_kmi_symbol_lists": _aarch64_additional_kmi_symbol_lists,
},
"kernel_x86_64": {
"kmi_symbol_list_strict_mode": False,
"module_implicit_outs": COMMON_GKI_MODULES_LIST,
},
"kernel_x86_64_debug": {
"kmi_symbol_list_strict_mode": False,
"module_implicit_outs": COMMON_GKI_MODULES_LIST,
},
})
define_db845c(
name = "db845c",
outs = [
"arch/arm64/boot/dts/qcom/qrb5165-rb5.dtb",
"arch/arm64/boot/dts/qcom/sdm845-db845c.dtb",
],
define_abi_targets = True,
kmi_symbol_list = "//common:android/abi_gki_aarch64_db845c",
kmi_symbol_list_add_only = True,
module_outs = [
# keep sorted
"crypto/michael_mic.ko",
"drivers/base/regmap/regmap-sdw.ko",
"drivers/base/regmap/regmap-slimbus.ko",
"drivers/bus/mhi/core/mhi.ko",
"drivers/clk/qcom/clk-qcom.ko",
"drivers/clk/qcom/clk-rpmh.ko",
"drivers/clk/qcom/clk-spmi-pmic-div.ko",
"drivers/clk/qcom/dispcc-sdm845.ko",
"drivers/clk/qcom/dispcc-sm8250.ko",
"drivers/clk/qcom/gcc-sdm845.ko",
"drivers/clk/qcom/gcc-sm8250.ko",
"drivers/clk/qcom/gpucc-sdm845.ko",
"drivers/clk/qcom/gpucc-sm8250.ko",
"drivers/clk/qcom/lpass-gfm-sm8250.ko",
"drivers/clk/qcom/videocc-sdm845.ko",
"drivers/clk/qcom/videocc-sm8250.ko",
"drivers/cpufreq/qcom-cpufreq-hw.ko",
"drivers/dma-buf/heaps/system_heap.ko",
"drivers/dma/qcom/bam_dma.ko",
"drivers/extcon/extcon-usb-gpio.ko",
"drivers/firmware/qcom-scm.ko",
"drivers/gpio/gpio-wcd934x.ko",
"drivers/gpu/drm/bridge/display-connector.ko",
"drivers/gpu/drm/bridge/lontium-lt9611.ko",
"drivers/gpu/drm/bridge/lontium-lt9611uxc.ko",
"drivers/gpu/drm/msm/msm.ko",
"drivers/gpu/drm/scheduler/gpu-sched.ko",
"drivers/hwspinlock/qcom_hwspinlock.ko",
"drivers/i2c/busses/i2c-designware-core.ko",
"drivers/i2c/busses/i2c-designware-platform.ko",
"drivers/i2c/busses/i2c-qcom-geni.ko",
"drivers/i2c/busses/i2c-qup.ko",
"drivers/i2c/busses/i2c-rk3x.ko",
"drivers/i2c/i2c-dev.ko",
"drivers/i2c/i2c-mux.ko",
"drivers/i2c/muxes/i2c-mux-pca954x.ko",
"drivers/iio/adc/qcom-spmi-adc5.ko",
"drivers/iio/adc/qcom-vadc-common.ko",
"drivers/input/misc/pm8941-pwrkey.ko",
"drivers/interconnect/qcom/icc-bcm-voter.ko",
"drivers/interconnect/qcom/icc-osm-l3.ko",
"drivers/interconnect/qcom/icc-rpmh.ko",
"drivers/interconnect/qcom/qnoc-sdm845.ko",
"drivers/interconnect/qcom/qnoc-sm8250.ko",
"drivers/iommu/arm/arm-smmu/arm_smmu.ko",
"drivers/irqchip/qcom-pdc.ko",
"drivers/leds/led-class-multicolor.ko",
"drivers/mailbox/qcom-apcs-ipc-mailbox.ko",
"drivers/mailbox/qcom-ipcc.ko",
"drivers/mfd/qcom-spmi-pmic.ko",
"drivers/mfd/wcd934x.ko",
"drivers/misc/fastrpc.ko",
"drivers/mmc/host/cqhci.ko",
"drivers/mmc/host/sdhci-msm.ko",
"drivers/net/can/spi/mcp251xfd/mcp251xfd.ko",
"drivers/net/wireless/ath/ath.ko",
"drivers/net/wireless/ath/ath10k/ath10k_core.ko",
"drivers/net/wireless/ath/ath10k/ath10k_pci.ko",
"drivers/net/wireless/ath/ath10k/ath10k_snoc.ko",
"drivers/net/wireless/ath/ath11k/ath11k.ko",
"drivers/net/wireless/ath/ath11k/ath11k_ahb.ko",
"drivers/net/wireless/ath/ath11k/ath11k_pci.ko",
"drivers/nvmem/nvmem_qfprom.ko",
"drivers/phy/qualcomm/phy-qcom-qmp.ko",
"drivers/phy/qualcomm/phy-qcom-qusb2.ko",
"drivers/phy/qualcomm/phy-qcom-snps-femto-v2.ko",
"drivers/phy/qualcomm/phy-qcom-usb-hs.ko",
"drivers/pinctrl/qcom/pinctrl-lpass-lpi.ko",
"drivers/pinctrl/qcom/pinctrl-msm.ko",
"drivers/pinctrl/qcom/pinctrl-sdm845.ko",
"drivers/pinctrl/qcom/pinctrl-sm8250.ko",
"drivers/pinctrl/qcom/pinctrl-spmi-gpio.ko",
"drivers/pinctrl/qcom/pinctrl-spmi-mpp.ko",
"drivers/power/reset/qcom-pon.ko",
"drivers/power/reset/reboot-mode.ko",
"drivers/power/reset/syscon-reboot-mode.ko",
"drivers/regulator/gpio-regulator.ko",
"drivers/regulator/qcom-rpmh-regulator.ko",
"drivers/regulator/qcom_spmi-regulator.ko",
"drivers/regulator/qcom_usb_vbus-regulator.ko",
"drivers/remoteproc/qcom_common.ko",
"drivers/remoteproc/qcom_pil_info.ko",
"drivers/remoteproc/qcom_q6v5.ko",
"drivers/remoteproc/qcom_q6v5_adsp.ko",
"drivers/remoteproc/qcom_q6v5_mss.ko",
"drivers/remoteproc/qcom_q6v5_pas.ko",
"drivers/remoteproc/qcom_q6v5_wcss.ko",
"drivers/remoteproc/qcom_sysmon.ko",
"drivers/reset/reset-qcom-aoss.ko",
"drivers/reset/reset-qcom-pdc.ko",
"drivers/rpmsg/qcom_glink.ko",
"drivers/rpmsg/qcom_glink_rpm.ko",
"drivers/rpmsg/qcom_glink_smem.ko",
"drivers/rpmsg/qcom_smd.ko",
"drivers/rpmsg/rpmsg_ns.ko",
"drivers/rtc/rtc-pm8xxx.ko",
"drivers/slimbus/slim-qcom-ngd-ctrl.ko",
"drivers/slimbus/slimbus.ko",
"drivers/soc/qcom/apr.ko",
"drivers/soc/qcom/cmd-db.ko",
"drivers/soc/qcom/llcc-qcom.ko",
"drivers/soc/qcom/mdt_loader.ko",
"drivers/soc/qcom/pdr_interface.ko",
"drivers/soc/qcom/qcom_aoss.ko",
"drivers/soc/qcom/qcom_rpmh.ko",
"drivers/soc/qcom/qmi_helpers.ko",
"drivers/soc/qcom/rmtfs_mem.ko",
"drivers/soc/qcom/rpmhpd.ko",
"drivers/soc/qcom/smem.ko",
"drivers/soc/qcom/smp2p.ko",
"drivers/soc/qcom/smsm.ko",
"drivers/soc/qcom/socinfo.ko",
"drivers/soundwire/soundwire-bus.ko",
"drivers/soundwire/soundwire-qcom.ko",
"drivers/spi/spi-geni-qcom.ko",
"drivers/spi/spi-pl022.ko",
"drivers/spi/spi-qcom-qspi.ko",
"drivers/spi/spi-qup.ko",
"drivers/spmi/spmi-pmic-arb.ko",
"drivers/thermal/qcom/lmh.ko",
"drivers/thermal/qcom/qcom-spmi-adc-tm5.ko",
"drivers/thermal/qcom/qcom-spmi-temp-alarm.ko",
"drivers/thermal/qcom/qcom_tsens.ko",
"drivers/tty/serial/msm_serial.ko",
"drivers/ufs/host/ufs_qcom.ko",
"drivers/usb/common/ulpi.ko",
"drivers/usb/host/ohci-hcd.ko",
"drivers/usb/host/ohci-pci.ko",
"drivers/usb/host/ohci-platform.ko",
"drivers/usb/typec/qcom-pmic-typec.ko",
"drivers/watchdog/pm8916_wdt.ko",
"drivers/watchdog/qcom-wdt.ko",
"net/qrtr/ns.ko",
"net/qrtr/qrtr.ko",
"net/qrtr/qrtr-mhi.ko",
"net/qrtr/qrtr-smd.ko",
"net/qrtr/qrtr-tun.ko",
"sound/soc/codecs/snd-soc-dmic.ko",
"sound/soc/codecs/snd-soc-hdmi-codec.ko",
"sound/soc/codecs/snd-soc-lpass-va-macro.ko",
"sound/soc/codecs/snd-soc-lpass-wsa-macro.ko",
"sound/soc/codecs/snd-soc-max98927.ko",
"sound/soc/codecs/snd-soc-rl6231.ko",
"sound/soc/codecs/snd-soc-rt5663.ko",
"sound/soc/codecs/snd-soc-wcd-mbhc.ko",
"sound/soc/codecs/snd-soc-wcd9335.ko",
"sound/soc/codecs/snd-soc-wcd934x.ko",
"sound/soc/codecs/snd-soc-wsa881x.ko",
"sound/soc/qcom/qdsp6/q6adm.ko",
"sound/soc/qcom/qdsp6/q6afe.ko",
"sound/soc/qcom/qdsp6/q6afe-clocks.ko",
"sound/soc/qcom/qdsp6/q6afe-dai.ko",
"sound/soc/qcom/qdsp6/q6asm.ko",
"sound/soc/qcom/qdsp6/q6asm-dai.ko",
"sound/soc/qcom/qdsp6/q6core.ko",
"sound/soc/qcom/qdsp6/q6dsp-common.ko",
"sound/soc/qcom/qdsp6/q6routing.ko",
"sound/soc/qcom/snd-soc-qcom-common.ko",
"sound/soc/qcom/snd-soc-sdm845.ko",
"sound/soc/qcom/snd-soc-sm8250.ko",
],
)
# TODO(b/258259749): Convert rockpi4 to mixed build
kernel_build(
name = "rockpi4",
outs = [
"Image",
"System.map",
"modules.builtin",
"modules.builtin.modinfo",
"rk3399-rock-pi-4b.dtb",
"vmlinux",
"vmlinux.symvers",
],
build_config = "build.config.rockpi4",
collect_unstripped_modules = True,
kmi_symbol_list = "//common:android/abi_gki_rockpi4",
module_outs = COMMON_GKI_MODULES_LIST + [
# keep sorted
"drivers/block/virtio_blk.ko",
"drivers/char/hw_random/virtio-rng.ko",
"drivers/clk/clk-rk808.ko",
"drivers/cpufreq/cpufreq-dt.ko",
"drivers/dma/pl330.ko",
"drivers/gpu/drm/bridge/analogix/analogix_dp.ko",
"drivers/gpu/drm/bridge/synopsys/dw-hdmi.ko",
"drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.ko",
"drivers/gpu/drm/rockchip/rockchipdrm.ko",
"drivers/i2c/busses/i2c-rk3x.ko",
"drivers/iio/adc/rockchip_saradc.ko",
"drivers/iio/buffer/industrialio-triggered-buffer.ko",
"drivers/iio/buffer/kfifo_buf.ko",
"drivers/mfd/rk808.ko",
"drivers/mmc/core/pwrseq_simple.ko",
"drivers/mmc/host/cqhci.ko",
"drivers/mmc/host/dw_mmc.ko",
"drivers/mmc/host/dw_mmc-pltfm.ko",
"drivers/mmc/host/dw_mmc-rockchip.ko",
"drivers/mmc/host/sdhci-of-arasan.ko",
"drivers/net/ethernet/stmicro/stmmac/dwmac-rk.ko",
"drivers/net/ethernet/stmicro/stmmac/stmmac.ko",
"drivers/net/ethernet/stmicro/stmmac/stmmac-platform.ko",
"drivers/net/net_failover.ko",
"drivers/net/pcs/pcs_xpcs.ko",
"drivers/net/virtio_net.ko",
"drivers/nvmem/nvmem_rockchip_efuse.ko",
"drivers/pci/controller/pcie-rockchip-host.ko",
"drivers/phy/rockchip/phy-rockchip-emmc.ko",
"drivers/phy/rockchip/phy-rockchip-inno-usb2.ko",
"drivers/phy/rockchip/phy-rockchip-pcie.ko",
"drivers/phy/rockchip/phy-rockchip-typec.ko",
"drivers/pwm/pwm-rockchip.ko",
"drivers/regulator/fan53555.ko",
"drivers/regulator/pwm-regulator.ko",
"drivers/regulator/rk808-regulator.ko",
"drivers/rtc/rtc-rk808.ko",
"drivers/soc/rockchip/io-domain.ko",
"drivers/thermal/rockchip_thermal.ko",
"drivers/usb/host/ohci-hcd.ko",
"drivers/usb/host/ohci-platform.ko",
"drivers/virtio/virtio_pci.ko",
"drivers/virtio/virtio_pci_modern_dev.ko",
"drivers/watchdog/dw_wdt.ko",
"net/core/failover.ko",
],
)
kernel_abi(
name = "rockpi4_abi",
kernel_build = "//common:rockpi4",
kmi_symbol_list_add_only = True,
)
kernel_modules_install(
name = "rockpi4_modules_install",
kernel_build = "//common:rockpi4",
)
kernel_unstripped_modules_archive(
name = "rockpi4_unstripped_modules_archive",
kernel_build = ":rockpi4",
)
kernel_images(
name = "rockpi4_images",
build_initramfs = True,
kernel_build = "//common:rockpi4",
kernel_modules_install = "//common:rockpi4_modules_install",
)
copy_to_dist_dir(
name = "rockpi4_dist",
data = [
":rockpi4",
":rockpi4_images",
":rockpi4_modules_install",
":rockpi4_unstripped_modules_archive",
],
dist_dir = "out/rockpi4/dist",
flat = True,
)
kernel_build(
name = "fips140",
outs = [],
base_kernel = ":kernel_aarch64",
build_config = "build.config.gki.aarch64.fips140",
module_outs = ["crypto/fips140.ko"],
)
copy_to_dist_dir(
name = "fips140_dist",
data = [
":fips140",
],
dist_dir = "out/fips140/dist",
flat = True,
)
# allmodconfig build tests.
# These are build tests only, so:
# - outs are intentionally set to empty to not copy anything to DIST_DIR
# - --allow-undeclared-modules must be used so modules are not declared or copied.
# - No dist target because these are build tests. We don't care about the artifacts.
# tools/bazel build --allow_undeclared_modules //common:kernel_aarch64_allmodconfig
kernel_build(
name = "kernel_aarch64_allmodconfig",
# Hack to actually check the build.
# Otherwise, Bazel thinks that there are no output files, and skip building.
outs = [".config"],
build_config = "build.config.allmodconfig.aarch64",
visibility = ["//visibility:private"],
)
# tools/bazel build --allow_undeclared_modules //common:kernel_x86_64_allmodconfig
kernel_build(
name = "kernel_x86_64_allmodconfig",
# Hack to actually check the build.
# Otherwise, Bazel thinks that there are no output files, and skip building.
outs = [".config"],
build_config = "build.config.allmodconfig.x86_64",
visibility = ["//visibility:private"],
)
# tools/bazel build --allow_undeclared_modules //common:kernel_arm_allmodconfig
kernel_build(
name = "kernel_arm_allmodconfig",
# Hack to actually check the build.
# Otherwise, Bazel thinks that there are no output files, and skip building.
outs = [".config"],
build_config = "build.config.allmodconfig.arm",
visibility = ["//visibility:private"],
)
# DDK Headers
# All headers. These are the public targets for DDK modules to use.
alias(
name = "all_headers",
actual = "all_headers_aarch64",
visibility = ["//visibility:public"],
)
ddk_headers(
name = "all_headers_aarch64",
hdrs = [":all_headers_allowlist_aarch64"] + select({
"//build/kernel/kleaf:allow_ddk_unsafe_headers_set": [":all_headers_unsafe"],
"//conditions:default": [],
}),
visibility = ["//visibility:public"],
)
ddk_headers(
name = "all_headers_arm",
hdrs = [":all_headers_allowlist_arm"] + select({
"//build/kernel/kleaf:allow_ddk_unsafe_headers_set": [":all_headers_unsafe"],
"//conditions:default": [],
}),
visibility = ["//visibility:public"],
)
ddk_headers(
name = "all_headers_x86_64",
hdrs = [":all_headers_allowlist_x86_64"] + select({
"//build/kernel/kleaf:allow_ddk_unsafe_headers_set": [":all_headers_unsafe"],
"//conditions:default": [],
}),
visibility = ["//visibility:public"],
)
# Implementation details for DDK headers. The targets below cannot be directly
# depended on by DDK modules.
# DDK headers allowlist. This is the list of all headers and include
# directories that are safe to use in DDK modules.
ddk_headers(
name = "all_headers_allowlist_aarch64",
hdrs = [
":all_headers_allowlist_aarch64_globs",
":all_headers_allowlist_common_globs",
],
# The list of include directories where source files can #include headers
# from. In other words, these are the `-I` option to the C compiler.
# These are prepended to LINUXINCLUDE.
linux_includes = [
"arch/arm64/include",
"arch/arm64/include/uapi",
"include",
"include/uapi",
],
visibility = ["//visibility:private"],
)
ddk_headers(
name = "all_headers_allowlist_arm",
hdrs = [
":all_headers_allowlist_arm_globs",
":all_headers_allowlist_common_globs",
],
# The list of include directories where source files can #include headers
# from. In other words, these are the `-I` option to the C compiler.
# These are prepended to LINUXINCLUDE.
linux_includes = [
"arch/arm/include",
"arch/arm/include/uapi",
"include",
"include/uapi",
],
visibility = ["//visibility:private"],
)
ddk_headers(
name = "all_headers_allowlist_x86_64",
hdrs = [
":all_headers_allowlist_common_globs",
":all_headers_allowlist_x86_64_globs",
],
# The list of include directories where source files can #include headers
# from. In other words, these are the `-I` option to the C compiler.
# These are prepended to LINUXINCLUDE.
linux_includes = [
"arch/x86/include",
"arch/x86/include/uapi",
"include",
"include/uapi",
],
visibility = ["//visibility:private"],
)
# List of DDK headers allowlist that are glob()-ed to avoid changes of BUILD
# file when the list of files changes. All headers in these directories
# are safe to use.
# These are separate filegroup targets so the all_headers_allowlist_* are
# more friendly to batch BUILD file update tools like buildozer.
# globs() for arm only
filegroup(
name = "all_headers_allowlist_arm_globs",
srcs = glob(["arch/arm/include/**/*.h"]),
visibility = ["//visibility:private"],
)
# globs() for arm64 only
filegroup(
name = "all_headers_allowlist_aarch64_globs",
srcs = glob(["arch/arm64/include/**/*.h"]),
visibility = ["//visibility:private"],
)
# globs() for x86 only
filegroup(
name = "all_headers_allowlist_x86_64_globs",
srcs = glob(["arch/x86/include/**/*.h"]),
visibility = ["//visibility:private"],
)
# globs() for all architectures
filegroup(
name = "all_headers_allowlist_common_globs",
srcs = glob(["include/**/*.h"]),
visibility = ["//visibility:private"],
)
# DDK headers unsafe list. This is the list of all headers and include
# directories that may be used during migration from kernel_module's, but
# should be avoided in general.
# Use with caution; items may:
# - be removed without notice
# - be moved into all_headers
ddk_headers(
name = "all_headers_unsafe",
hdrs = [
"drivers/devfreq/governor.h",
"drivers/dma-buf/heaps/deferred-free-helper.h",
"drivers/dma-buf/heaps/page_pool.h",
"drivers/dma/dmaengine.h",
"drivers/pci/controller/dwc/pcie-designware.h",
"drivers/pinctrl/core.h",
"drivers/pinctrl/samsung/pinctrl-samsung.h",
"drivers/staging/android/debug_kinfo.h",
"drivers/thermal/thermal_core.h",
"drivers/thermal/thermal_netlink.h",
"drivers/usb/core/phy.h",
"drivers/usb/dwc3/core.h",
"drivers/usb/dwc3/debug.h",
"drivers/usb/dwc3/gadget.h",
"drivers/usb/dwc3/io.h",
"drivers/usb/dwc3/trace.h",
"drivers/usb/gadget/configfs.h",
"drivers/usb/gadget/function/u_serial.h",
"drivers/usb/host/pci-quirks.h",
"drivers/usb/host/xhci.h",
"drivers/usb/host/xhci-ext-caps.h",
"drivers/usb/host/xhci-mvebu.h",
"drivers/usb/host/xhci-plat.h",
"drivers/usb/host/xhci-rcar.h",
"drivers/usb/typec/tcpm/tcpci.h",
],
# The list of include directories where source files can #include headers
# from. In other words, these are the `-I` option to the C compiler.
# Unsafe include directories are appended to ccflags-y.
includes = [
"drivers/devfreq",
"drivers/dma",
"drivers/dma-buf",
"drivers/pci/controller/dwc",
"drivers/pinctrl",
"drivers/scsi/ufs",
"drivers/thermal",
"drivers/usb",
"drivers/usb/gadget/function",
"drivers/usb/typec",
],
visibility = ["//visibility:private"],
)

View File

@@ -32,3 +32,21 @@ Description:
Note: If the module is built into the kernel, or if the
CONFIG_MODULE_UNLOAD kernel configuration value is not enabled,
this file will not be present.
What: /sys/module/MODULENAME/scmversion
Date: November 2020
KernelVersion: Android Common Kernel -- android12-5.10+
Contact: Will McVicker <willmcvicker@google.com>
Description: This read-only file will appear if modpost was supplied with an
SCM version for the module. It can be enabled with the config
MODULE_SCMVERSION. The SCM version is retrieved by
scripts/setlocalversion, which means that the presence of this
file depends on CONFIG_LOCALVERSION_AUTO=y. When read, the SCM
version that the module was compiled with is returned. The SCM
version is returned in the following format::
===
Git: g[a-f0-9]\+(-dirty)\?
Mercurial: hg[a-f0-9]\+(-dirty)\?
Subversion: svn[0-9]\+
===

View File

@@ -0,0 +1 @@
per-file sysfs-fs-f2fs=file:/fs/f2fs/OWNERS

View File

@@ -7,6 +7,7 @@ Description: UVC function directory
streaming_maxburst 0..15 (ss only)
streaming_maxpacket 1..1023 (fs), 1..3072 (hs/ss)
streaming_interval 1..16
function_name string [32]
=================== =============================
What: /config/usb-gadget/gadget/functions/uvc.name/control
@@ -196,7 +197,7 @@ Description: Specific MJPEG format descriptors
read-only
bmaControls this format's data for bmaControls in
the streaming header
bmInterfaceFlags specifies interlace information,
bmInterlaceFlags specifies interlace information,
read-only
bAspectRatioY the X dimension of the picture aspect
ratio, read-only
@@ -252,7 +253,7 @@ Description: Specific uncompressed format descriptors
read-only
bmaControls this format's data for bmaControls in
the streaming header
bmInterfaceFlags specifies interlace information,
bmInterlaceFlags specifies interlace information,
read-only
bAspectRatioY the X dimension of the picture aspect
ratio, read-only

View File

@@ -0,0 +1,19 @@
What: /sys/block/dm-<num>/bow/free
Date: January 2023
KernelVersion: 5.15
Contact: paullawrence@google.com
Description: free space
Free space on device in bytes. Only valid in state 0
Users: Android vold to determine if there is sufficient space for expected size
of checksum
What: /sys/block/dm-<num>/bow/state
Date: January 2023
KernelVersion: 5.15
Contact: paullawrence@google.com
Description: dm-bow state
Read-write string containing 0, 1 or 2
0: Trim mode
1: Checkpoint mode
2: Committed mode
See Documentation/device-mapper/dm-bow for details

View File

@@ -47,3 +47,18 @@ Description:
USB SuperSpeed protocol. From user perspective pin assignments C
and E are equal, where all channels on the connector are used
for carrying DisplayPort protocol (allowing higher resolutions).
What: /sys/bus/typec/devices/.../displayport/hpd
Date: Dec 2022
Contact: Badhri Jagan Sridharan <badhri@google.com>
Description:
VESA DisplayPort Alt Mode on USB Type-C Standard defines how
HotPlugDetect(HPD) shall be supported on the USB-C connector when
operating in DisplayPort Alt Mode. This is a read only node which
reflects the current state of HPD.
Valid values:
- 1: when HPDs logical state is high (HPD_High) as defined
by VESA DisplayPort Alt Mode on USB Type-C Standard.
- 0 when HPDs logical state is low (HPD_Low) as defined by
VESA DisplayPort Alt Mode on USB Type-C Standard.

View File

@@ -1299,6 +1299,15 @@ Description: This node is used to set or display whether UFS WriteBooster is
platform that doesn't support UFSHCD_CAP_CLK_SCALING, we can
disable/enable WriteBooster through this sysfs node.
What: /sys/bus/platform/drivers/ufshcd/*/enable_wb_buf_flush
What: /sys/bus/platform/devices/*.ufs/enable_wb_buf_flush
Date: July 2022
Contact: Jinyoung Choi <j-young.choi@samsung.com>
Description: This entry shows the status of WriteBooster buffer flushing
and it can be used to enable or disable the flushing.
If flushing is enabled, the device executes the flush
operation when the command queue is empty.
What: /sys/bus/platform/drivers/ufshcd/*/device_descriptor/hpb_version
Date: June 2021
Contact: Daejun Park <daejun7.park@samsung.com>
@@ -1394,7 +1403,7 @@ Description: This entry shows the number of reads that cannot be changed to
The file is read only.
What: /sys/class/scsi_device/*/device/hpb_stats/rb_noti_cnt
What: /sys/class/scsi_device/*/device/hpb_stats/rcmd_noti_cnt
Date: June 2021
Contact: Daejun Park <daejun7.park@samsung.com>
Description: This entry shows the number of response UPIUs that has
@@ -1402,19 +1411,23 @@ Description: This entry shows the number of response UPIUs that has
The file is read only.
What: /sys/class/scsi_device/*/device/hpb_stats/rb_active_cnt
What: /sys/class/scsi_device/*/device/hpb_stats/rcmd_active_cnt
Date: June 2021
Contact: Daejun Park <daejun7.park@samsung.com>
Description: This entry shows the number of active sub-regions recommended by
response UPIUs.
Description: For the HPB device control mode, this entry shows the number of
active sub-regions recommended by response UPIUs. For the HPB host control
mode, this entry shows the number of active sub-regions recommended by the
HPB host control mode heuristic algorithm.
The file is read only.
What: /sys/class/scsi_device/*/device/hpb_stats/rb_inactive_cnt
What: /sys/class/scsi_device/*/device/hpb_stats/rcmd_inactive_cnt
Date: June 2021
Contact: Daejun Park <daejun7.park@samsung.com>
Description: This entry shows the number of inactive regions recommended by
response UPIUs.
Description: For the HPB device control mode, this entry shows the number of
inactive regions recommended by response UPIUs. For the HPB host control
mode, this entry shows the number of inactive regions recommended by the
HPB host control mode heuristic algorithm.
The file is read only.
@@ -1461,6 +1474,43 @@ Description: This entry shows the status of HPB.
The file is read only.
Contact: Daniil Lunev <dlunev@chromium.org>
What: /sys/bus/platform/drivers/ufshcd/*/capabilities/
What: /sys/bus/platform/devices/*.ufs/capabilities/
Date: August 2022
Description: The group represents the effective capabilities of the
host-device pair. i.e. the capabilities which are enabled in the
driver for the specific host controller, supported by the host
controller and are supported and/or have compatible
configuration on the device side.
Contact: Daniil Lunev <dlunev@chromium.org>
What: /sys/bus/platform/drivers/ufshcd/*/capabilities/clock_scaling
What: /sys/bus/platform/devices/*.ufs/capabilities/clock_scaling
Date: August 2022
Contact: Daniil Lunev <dlunev@chromium.org>
Description: Indicates status of clock scaling.
== ============================
0 Clock scaling is not supported.
1 Clock scaling is supported.
== ============================
The file is read only.
What: /sys/bus/platform/drivers/ufshcd/*/capabilities/write_booster
What: /sys/bus/platform/devices/*.ufs/capabilities/write_booster
Date: August 2022
Contact: Daniil Lunev <dlunev@chromium.org>
Description: Indicates status of Write Booster.
== ============================
0 Write Booster can not be enabled.
1 Write Booster can be enabled.
== ============================
The file is read only.
What: /sys/class/scsi_device/*/device/hpb_param_sysfs/activation_thld
Date: February 2021
Contact: Avri Altman <avri.altman@wdc.com>

View File

@@ -0,0 +1,7 @@
What: /sys/fs/erofs/features/
Date: November 2021
Contact: "Huang Jianan" <huangjianan@oppo.com>
Description: Shows all enabled kernel features.
Supported features:
zero_padding, compr_cfgs, big_pcluster, chunked_file,
device_table, compr_head2, sb_chksum.

View File

@@ -55,8 +55,9 @@ Description: Controls the in-place-update policy.
0x04 F2FS_IPU_UTIL
0x08 F2FS_IPU_SSR_UTIL
0x10 F2FS_IPU_FSYNC
0x20 F2FS_IPU_ASYNC,
0x20 F2FS_IPU_ASYNC
0x40 F2FS_IPU_NOCACHE
0x80 F2FS_IPU_HONOR_OPU_WRITE
==== =================
Refer segment.h for details.
@@ -98,13 +99,47 @@ Description: Controls the issue rate of discard commands that consist of small
checkpoint is triggered, and issued during the checkpoint.
By default, it is disabled with 0.
What: /sys/fs/f2fs/<disk>/max_ordered_discard
Date: October 2022
Contact: "Yangtao Li" <frank.li@vivo.com>
Description: Controls the maximum ordered discard, the unit size is one block(4KB).
Set it to 16 by default.
What: /sys/fs/f2fs/<disk>/max_discard_request
Date: December 2021
Contact: "Konstantin Vyshetsky" <vkon@google.com>
Description: Controls the number of discards a thread will issue at a time.
Higher number will allow the discard thread to finish its work
faster, at the cost of higher latency for incomming I/O.
What: /sys/fs/f2fs/<disk>/min_discard_issue_time
Date: December 2021
Contact: "Konstantin Vyshetsky" <vkon@google.com>
Description: Controls the interval the discard thread will wait between
issuing discard requests when there are discards to be issued and
no I/O aware interruptions occur.
What: /sys/fs/f2fs/<disk>/mid_discard_issue_time
Date: December 2021
Contact: "Konstantin Vyshetsky" <vkon@google.com>
Description: Controls the interval the discard thread will wait between
issuing discard requests when there are discards to be issued and
an I/O aware interruption occurs.
What: /sys/fs/f2fs/<disk>/max_discard_issue_time
Date: December 2021
Contact: "Konstantin Vyshetsky" <vkon@google.com>
Description: Controls the interval the discard thread will wait when there are
no discard operations to be issued.
What: /sys/fs/f2fs/<disk>/discard_granularity
Date: July 2017
Contact: "Chao Yu" <yuchao0@huawei.com>
Description: Controls discard granularity of inner discard thread. Inner thread
will not issue discards with size that is smaller than granularity.
The unit size is one block(4KB), now only support configuring
in range of [1, 512]. Default value is 4(=16KB).
in range of [1, 512]. Default value is 16.
For small devices, default value is 1.
What: /sys/fs/f2fs/<disk>/umount_discard_timeout
Date: January 2019
@@ -112,6 +147,11 @@ Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
Description: Set timeout to issue discard commands during umount.
Default: 5 secs
What: /sys/fs/f2fs/<disk>/pending_discard
Date: November 2021
Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
Description: Shows the number of pending discard commands in the queue.
What: /sys/fs/f2fs/<disk>/max_victim_search
Date: January 2014
Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
@@ -202,7 +242,7 @@ Description: Shows total written kbytes issued to disk.
What: /sys/fs/f2fs/<disk>/features
Date: July 2017
Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
Description: <deprecated: should use /sys/fs/f2fs/<disk>/feature_list/
Description: <deprecated: should use /sys/fs/f2fs/<disk>/feature_list/>
Shows all enabled features in current device.
Supported features:
encryption, blkzoned, extra_attr, projquota, inode_checksum,
@@ -264,11 +304,16 @@ Description: Shows current reserved blocks in system, it may be temporarily
What: /sys/fs/f2fs/<disk>/gc_urgent
Date: August 2017
Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
Description: Do background GC aggressively when set. When gc_urgent = 1,
background thread starts to do GC by given gc_urgent_sleep_time
interval. When gc_urgent = 2, F2FS will lower the bar of
checking idle in order to process outstanding discard commands
and GC a little bit aggressively. It is set to 0 by default.
Description: Do background GC aggressively when set. Set to 0 by default.
gc urgent high(1): does GC forcibly in a period of given
gc_urgent_sleep_time and ignores I/O idling check. uses greedy
GC approach and turns SSR mode on.
gc urgent low(2): lowers the bar of checking I/O idling in
order to process outstanding discard commands and GC a
little bit aggressively. uses cost benefit GC approach.
gc urgent mid(3): does GC forcibly in a period of given
gc_urgent_sleep_time and executes a mid level of I/O idling check.
uses cost benefit GC approach.
What: /sys/fs/f2fs/<disk>/gc_urgent_sleep_time
Date: August 2017
@@ -428,6 +473,30 @@ Description: Show status of f2fs superblock in real time.
0x4000 SBI_IS_FREEZING freefs is in process
====== ===================== =================================
What: /sys/fs/f2fs/<disk>/stat/cp_status
Date: September 2022
Contact: "Chao Yu" <chao.yu@oppo.com>
Description: Show status of f2fs checkpoint in real time.
=============================== ==============================
cp flag value
CP_UMOUNT_FLAG 0x00000001
CP_ORPHAN_PRESENT_FLAG 0x00000002
CP_COMPACT_SUM_FLAG 0x00000004
CP_ERROR_FLAG 0x00000008
CP_FSCK_FLAG 0x00000010
CP_FASTBOOT_FLAG 0x00000020
CP_CRC_RECOVERY_FLAG 0x00000040
CP_NAT_BITS_FLAG 0x00000080
CP_TRIMMED_FLAG 0x00000100
CP_NOCRC_RECOVERY_FLAG 0x00000200
CP_LARGE_NAT_BITMAP_FLAG 0x00000400
CP_QUOTA_NEED_FSCK_FLAG 0x00000800
CP_DISABLED_FLAG 0x00001000
CP_DISABLED_QUICK_FLAG 0x00002000
CP_RESIZEFS_FLAG 0x00004000
=============================== ==============================
What: /sys/fs/f2fs/<disk>/ckpt_thread_ioprio
Date: January 2021
Contact: "Daeho Jeong" <daehojeong@google.com>
@@ -499,7 +568,7 @@ Date: July 2021
Contact: "Daeho Jeong" <daehojeong@google.com>
Description: Show how many segments have been reclaimed by GC during a specific
GC mode (0: GC normal, 1: GC idle CB, 2: GC idle greedy,
3: GC idle AT, 4: GC urgent high, 5: GC urgent low)
3: GC idle AT, 4: GC urgent high, 5: GC urgent low 6: GC urgent mid)
You can re-initialize this value to "0".
What: /sys/fs/f2fs/<disk>/gc_segment_mode
@@ -513,3 +582,90 @@ Date: July 2021
Contact: "Daeho Jeong" <daehojeong@google.com>
Description: You can control the multiplier value of bdi device readahead window size
between 2 (default) and 256 for POSIX_FADV_SEQUENTIAL advise option.
What: /sys/fs/f2fs/<disk>/max_fragment_chunk
Date: August 2021
Contact: "Daeho Jeong" <daehojeong@google.com>
Description: With "mode=fragment:block" mount options, we can scatter block allocation.
f2fs will allocate 1..<max_fragment_chunk> blocks in a chunk and make a hole
in the length of 1..<max_fragment_hole> by turns. This value can be set
between 1..512 and the default value is 4.
What: /sys/fs/f2fs/<disk>/max_fragment_hole
Date: August 2021
Contact: "Daeho Jeong" <daehojeong@google.com>
Description: With "mode=fragment:block" mount options, we can scatter block allocation.
f2fs will allocate 1..<max_fragment_chunk> blocks in a chunk and make a hole
in the length of 1..<max_fragment_hole> by turns. This value can be set
between 1..512 and the default value is 4.
What: /sys/fs/f2fs/<disk>/gc_remaining_trials
Date: October 2022
Contact: "Yangtao Li" <frank.li@vivo.com>
Description: You can set the trial count limit for GC urgent and idle mode with this value.
If GC thread gets to the limit, the mode will turn back to GC normal mode.
By default, the value is zero, which means there is no limit like before.
What: /sys/fs/f2fs/<disk>/max_roll_forward_node_blocks
Date: January 2022
Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
Description: Controls max # of node block writes to be used for roll forward
recovery. This can limit the roll forward recovery time.
What: /sys/fs/f2fs/<disk>/unusable_blocks_per_sec
Date: June 2022
Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
Description: Shows the number of unusable blocks in a section which was defined by
the zone capacity reported by underlying zoned device.
What: /sys/fs/f2fs/<disk>/current_atomic_write
Date: July 2022
Contact: "Daeho Jeong" <daehojeong@google.com>
Description: Show the total current atomic write block count, which is not committed yet.
This is a read-only entry.
What: /sys/fs/f2fs/<disk>/peak_atomic_write
Date: July 2022
Contact: "Daeho Jeong" <daehojeong@google.com>
Description: Show the peak value of total current atomic write block count after boot.
If you write "0" here, you can initialize to "0".
What: /sys/fs/f2fs/<disk>/committed_atomic_block
Date: July 2022
Contact: "Daeho Jeong" <daehojeong@google.com>
Description: Show the accumulated total committed atomic write block count after boot.
If you write "0" here, you can initialize to "0".
What: /sys/fs/f2fs/<disk>/revoked_atomic_block
Date: July 2022
Contact: "Daeho Jeong" <daehojeong@google.com>
Description: Show the accumulated total revoked atomic write block count after boot.
If you write "0" here, you can initialize to "0".
What: /sys/fs/f2fs/<disk>/gc_mode
Date: October 2022
Contact: "Yangtao Li" <frank.li@vivo.com>
Description: Show the current gc_mode as a string.
This is a read-only entry.
What: /sys/fs/f2fs/<disk>/discard_urgent_util
Date: November 2022
Contact: "Yangtao Li" <frank.li@vivo.com>
Description: When space utilization exceeds this, do background DISCARD aggressively.
Does DISCARD forcibly in a period of given min_discard_issue_time when the number
of discards is not 0 and set discard granularity to 1.
Default: 80
What: /sys/fs/f2fs/<disk>/hot_data_age_threshold
Date: November 2022
Contact: "Ping Xiong" <xiongping1@xiaomi.com>
Description: When DATA SEPARATION is on, it controls the age threshold to indicate
the data blocks as hot. By default it was initialized as 262144 blocks
(equals to 1GB).
What: /sys/fs/f2fs/<disk>/warm_data_age_threshold
Date: November 2022
Contact: "Ping Xiong" <xiongping1@xiaomi.com>
Description: When DATA SEPARATION is on, it controls the age threshold to indicate
the data blocks as warm. By default it was initialized as 2621440 blocks
(equals to 10GB).

View File

@@ -0,0 +1,19 @@
What: /sys/fs/fuse/features/fuse_bpf
Date: December 2022
Contact: Paul Lawrence <paullawrence@google.com>
Description:
Read-only file that contains the word 'supported' if fuse-bpf is
supported, does not exist otherwise
What: /sys/fs/fuse/bpf_prog_type_fuse
Date: December 2022
Contact: Paul Lawrence <paullawrence@google.com>
Description:
bpf_prog_type_fuse defines the program type of bpf programs that
may be passed to fuse-bpf. For upstream bpf program types, this
is a constant defined in a contiguous array of constants.
bpf_prog_type_fuse is appended to the end of the list, so it may
change and therefore its value must be read from this file.
Contents is ASCII decimal representation of bpf_prog_type_fuse

View File

@@ -0,0 +1,70 @@
What: /sys/fs/incremental-fs/features/corefs
Date: 2019
Contact: Paul Lawrence <paullawrence@google.com>
Description: Reads 'supported'. Always present.
What: /sys/fs/incremental-fs/features/v2
Date: April 2021
Contact: Paul Lawrence <paullawrence@google.com>
Description: Reads 'supported'. Present if all v2 features of incfs are
supported.
What: /sys/fs/incremental-fs/features/zstd
Date: April 2021
Contact: Paul Lawrence <paullawrence@google.com>
Description: Reads 'supported'. Present if zstd compression is supported
for data blocks.
What: /sys/fs/incremental-fs/features/bugfix_throttling
Date: January 2023
Contact: Paul Lawrence <paullawrence@google.com>
Description: Reads 'supported'. Present if the throttling lock bug is fixed
https://android-review.git.corp.google.com/c/kernel/common/+/2381827
What: /sys/fs/incremental-fs/instances/[name]
Date: April 2021
Contact: Paul Lawrence <paullawrence@google.com>
Description: Folder created when incfs is mounted with the sysfs_name=[name]
option. If this option is used, the following values are created
in this folder.
What: /sys/fs/incremental-fs/instances/[name]/reads_delayed_min
Date: April 2021
Contact: Paul Lawrence <paullawrence@google.com>
Description: Returns a count of the number of reads that were delayed as a
result of the per UID read timeouts min time setting.
What: /sys/fs/incremental-fs/instances/[name]/reads_delayed_min_us
Date: April 2021
Contact: Paul Lawrence <paullawrence@google.com>
Description: Returns total delay time for all files since first mount as a
result of the per UID read timeouts min time setting.
What: /sys/fs/incremental-fs/instances/[name]/reads_delayed_pending
Date: April 2021
Contact: Paul Lawrence <paullawrence@google.com>
Description: Returns a count of the number of reads that were delayed as a
result of waiting for a pending read.
What: /sys/fs/incremental-fs/instances/[name]/reads_delayed_pending_us
Date: April 2021
Contact: Paul Lawrence <paullawrence@google.com>
Description: Returns total delay time for all files since first mount as a
result of waiting for a pending read.
What: /sys/fs/incremental-fs/instances/[name]/reads_failed_hash_verification
Date: April 2021
Contact: Paul Lawrence <paullawrence@google.com>
Description: Returns number of reads that failed because of hash verification
failures.
What: /sys/fs/incremental-fs/instances/[name]/reads_failed_other
Date: April 2021
Contact: Paul Lawrence <paullawrence@google.com>
Description: Returns number of reads that failed for reasons other than
timing out or hash failures.
What: /sys/fs/incremental-fs/instances/[name]/reads_failed_timed_out
Date: April 2021
Contact: Paul Lawrence <paullawrence@google.com>
Description: Returns number of reads that timed out.

View File

@@ -0,0 +1,7 @@
What: /sys/kernel/dma_heap/total_pools_kb
Date: Feb 2021
KernelVersion: 5.10
Contact: Hridya Valsaraju <hridya@google.com>,
Description:
The total_pools_kb file is read-only and specifies how much
memory in Kb is allocated to DMA-BUF heap pools.

View File

@@ -0,0 +1,16 @@
What: /sys/kernel/wakeup_reasons/last_resume_reason
Date: February 2014
Contact: Ruchi Kandoi <kandoiruchi@google.com>
Description:
The /sys/kernel/wakeup_reasons/last_resume_reason is
used to report wakeup reasons after system exited suspend.
What: /sys/kernel/wakeup_reasons/last_suspend_time
Date: March 2015
Contact: jinqian <jinqian@google.com>
Description:
The /sys/kernel/wakeup_reasons/last_suspend_time is
used to report time spent in last suspend cycle. It contains
two numbers (in seconds) separated by space. First number is
the time spent in suspend and resume processes. Second number
is the time spent in sleep state.

View File

@@ -1296,6 +1296,11 @@ PAGE_SIZE multiple when read back.
pagetables
Amount of memory allocated for page tables.
sec_pagetables
Amount of memory allocated for secondary page tables,
this currently includes KVM mmu allocations on x86
and arm64.
percpu (npn)
Amount of memory used for storing per-cpu kernel
data structures.

View File

@@ -961,6 +961,10 @@
can be useful when debugging issues that require an SLB
miss to occur.
disable_dma32= [KNL]
Dynamically disable ZONE_DMA32 on kernels compiled with
CONFIG_ZONE_DMA32=y.
stress_slb [PPC]
Limits the number of kernel SLB entries, and flushes
them frequently to increase the rate of SLB faults
@@ -1396,6 +1400,10 @@
Format: { "fix" }
Permit 'security.evm' to be updated regardless of
current integrity status.
export_pmu_events
[KNL,ARM64] Sets the PMU export bit (PMCR_EL0.X), which enables
the exporting of events over an IMPLEMENTATION DEFINED PMU event
export bus to another device.
failslab=
fail_usercopy=
@@ -1646,6 +1654,10 @@
If specified, z/VM IUCV HVC accepts connections
from listed z/VM user IDs only.
hvc_dcc.enable= [ARM,ARM64] Enable DCC driver at runtime. For GKI,
disabled at runtime by default to prevent
crashes in devices which do not support DCC.
hv_nopvspin [X86,HYPER_V] Disables the paravirt spinlock optimizations
which allow the hypervisor to 'idle' the
guest on lock contention.
@@ -2056,6 +2068,14 @@
forcing Dual Address Cycle for PCI cards supporting
greater than 32-bit addressing.
iommu.max_align_shift=
[ARM64, X86] Limit the alignment of IOVAs to a maximum
PAGE_SIZE order. Larger IOVAs will be aligned to this
specified order. The order is expressed as a power of
two multiplied by the PAGE_SIZE.
Format: { "4" | "5" | "6" | "7" | "8" | "9" }
Default: 9
iommu.strict= [ARM64, X86] Configure TLB invalidation behaviour
Format: { "0" | "1" }
0 - Lazy mode.
@@ -2078,6 +2098,9 @@
1 - Bypass the IOMMU for DMA.
unset - Use value of CONFIG_IOMMU_DEFAULT_PASSTHROUGH.
ioremap_guard [ARM64] enable the KVM MMIO guard functionality
if available.
io7= [HW] IO7 for Marvel-based Alpha systems
See comment before marvel_specify_io7 in
arch/alpha/kernel/core_marvel.c.
@@ -2367,14 +2390,19 @@
kvm-arm.mode=
[KVM,ARM] Select one of KVM/arm64's modes of operation.
none: Forcefully disable KVM.
nvhe: Standard nVHE-based mode, without support for
protected guests.
protected: nVHE-based mode with support for guests whose
state is kept private from the host.
Not valid if the kernel is running in EL2.
state is kept private from the host. See
Documentation/virt/kvm/arm/pkvm.rst for more
information about this mode of operation.
Defaults to VHE/nVHE based on hardware support.
Defaults to VHE/nVHE based on hardware support. Setting
mode to "protected" will disable kexec and hibernation
for the host.
kvm-arm.vgic_v3_group0_trap=
[KVM,ARM] Trap guest accesses to GICv3 group-0

View File

@@ -32,6 +32,7 @@ the Linux memory management.
idle_page_tracking
ksm
memory-hotplug
multigen_lru
nommu-mmap
numa_memory_policy
numaperf

View File

@@ -0,0 +1,162 @@
.. SPDX-License-Identifier: GPL-2.0
=============
Multi-Gen LRU
=============
The multi-gen LRU is an alternative LRU implementation that optimizes
page reclaim and improves performance under memory pressure. Page
reclaim decides the kernel's caching policy and ability to overcommit
memory. It directly impacts the kswapd CPU usage and RAM efficiency.
Quick start
===========
Build the kernel with the following configurations.
* ``CONFIG_LRU_GEN=y``
* ``CONFIG_LRU_GEN_ENABLED=y``
All set!
Runtime options
===============
``/sys/kernel/mm/lru_gen/`` contains stable ABIs described in the
following subsections.
Kill switch
-----------
``enabled`` accepts different values to enable or disable the
following components. Its default value depends on
``CONFIG_LRU_GEN_ENABLED``. All the components should be enabled
unless some of them have unforeseen side effects. Writing to
``enabled`` has no effect when a component is not supported by the
hardware, and valid values will be accepted even when the main switch
is off.
====== ===============================================================
Values Components
====== ===============================================================
0x0001 The main switch for the multi-gen LRU.
0x0002 Clearing the accessed bit in leaf page table entries in large
batches, when MMU sets it (e.g., on x86). This behavior can
theoretically worsen lock contention (mmap_lock). If it is
disabled, the multi-gen LRU will suffer a minor performance
degradation for workloads that contiguously map hot pages,
whose accessed bits can be otherwise cleared by fewer larger
batches.
0x0004 Clearing the accessed bit in non-leaf page table entries as
well, when MMU sets it (e.g., on x86). This behavior was not
verified on x86 varieties other than Intel and AMD. If it is
disabled, the multi-gen LRU will suffer a negligible
performance degradation.
[yYnN] Apply to all the components above.
====== ===============================================================
E.g.,
::
echo y >/sys/kernel/mm/lru_gen/enabled
cat /sys/kernel/mm/lru_gen/enabled
0x0007
echo 5 >/sys/kernel/mm/lru_gen/enabled
cat /sys/kernel/mm/lru_gen/enabled
0x0005
Thrashing prevention
--------------------
Personal computers are more sensitive to thrashing because it can
cause janks (lags when rendering UI) and negatively impact user
experience. The multi-gen LRU offers thrashing prevention to the
majority of laptop and desktop users who do not have ``oomd``.
Users can write ``N`` to ``min_ttl_ms`` to prevent the working set of
``N`` milliseconds from getting evicted. The OOM killer is triggered
if this working set cannot be kept in memory. In other words, this
option works as an adjustable pressure relief valve, and when open, it
terminates applications that are hopefully not being used.
Based on the average human detectable lag (~100ms), ``N=1000`` usually
eliminates intolerable janks due to thrashing. Larger values like
``N=3000`` make janks less noticeable at the risk of premature OOM
kills.
The default value ``0`` means disabled.
Experimental features
=====================
``/sys/kernel/debug/lru_gen`` accepts commands described in the
following subsections. Multiple command lines are supported, so does
concatenation with delimiters ``,`` and ``;``.
``/sys/kernel/debug/lru_gen_full`` provides additional stats for
debugging. ``CONFIG_LRU_GEN_STATS=y`` keeps historical stats from
evicted generations in this file.
Working set estimation
----------------------
Working set estimation measures how much memory an application needs
in a given time interval, and it is usually done with little impact on
the performance of the application. E.g., data centers want to
optimize job scheduling (bin packing) to improve memory utilizations.
When a new job comes in, the job scheduler needs to find out whether
each server it manages can allocate a certain amount of memory for
this new job before it can pick a candidate. To do so, the job
scheduler needs to estimate the working sets of the existing jobs.
When it is read, ``lru_gen`` returns a histogram of numbers of pages
accessed over different time intervals for each memcg and node.
``MAX_NR_GENS`` decides the number of bins for each histogram. The
histograms are noncumulative.
::
memcg memcg_id memcg_path
node node_id
min_gen_nr age_in_ms nr_anon_pages nr_file_pages
...
max_gen_nr age_in_ms nr_anon_pages nr_file_pages
Each bin contains an estimated number of pages that have been accessed
within ``age_in_ms``. E.g., ``min_gen_nr`` contains the coldest pages
and ``max_gen_nr`` contains the hottest pages, since ``age_in_ms`` of
the former is the largest and that of the latter is the smallest.
Users can write the following command to ``lru_gen`` to create a new
generation ``max_gen_nr+1``:
``+ memcg_id node_id max_gen_nr [can_swap [force_scan]]``
``can_swap`` defaults to the swap setting and, if it is set to ``1``,
it forces the scan of anon pages when swap is off, and vice versa.
``force_scan`` defaults to ``1`` and, if it is set to ``0``, it
employs heuristics to reduce the overhead, which is likely to reduce
the coverage as well.
A typical use case is that a job scheduler runs this command at a
certain time interval to create new generations, and it ranks the
servers it manages based on the sizes of their cold pages defined by
this time interval.
Proactive reclaim
-----------------
Proactive reclaim induces page reclaim when there is no memory
pressure. It usually targets cold pages only. E.g., when a new job
comes in, the job scheduler wants to proactively reclaim cold pages on
the server it selected, to improve the chance of successfully landing
this new job.
Users can write the following command to ``lru_gen`` to evict
generations less than or equal to ``min_gen_nr``.
``- memcg_id node_id min_gen_nr [swappiness [nr_to_reclaim]]``
``min_gen_nr`` should be less than ``max_gen_nr-1``, since
``max_gen_nr`` and ``max_gen_nr-1`` are not fully aged (equivalent to
the active list) and therefore cannot be evicted. ``swappiness``
overrides the default value in ``/proc/sys/vm/swappiness``.
``nr_to_reclaim`` limits the number of pages to evict.
A typical use case is that a job scheduler runs this command before it
tries to land a new job on a server. If it fails to materialize enough
cold pages because of the overestimation, it retries on the next
server according to the ranking result obtained from the working set
estimation step. This less forceful approach limits the impacts on the
existing jobs.

View File

@@ -267,6 +267,17 @@ domain names are in general different. For a detailed discussion
see the ``hostname(1)`` man page.
export_pmu_events (arm64 only)
==============================
Controls the PMU export bit (PMCR_EL0.X), which enables the exporting of
events over an IMPLEMENTATION DEFINED PMU event export bus to another device.
0: disables exporting of events (default).
1: enables exporting of events.
firmware_config
===============
@@ -915,6 +926,17 @@ enabled, otherwise writing to this file will return ``-EBUSY``.
The default value is 8.
perf_user_access (arm64 only)
=================================
Controls user space access for reading perf event counters. When set to 1,
user space can read performance monitor counter registers directly.
The default value is 0 (access disabled).
See Documentation/arm64/perf.rst for more information.
pid_max
=======

View File

@@ -92,7 +92,7 @@ operation if the source belongs to the supported system register space.
The infrastructure emulates only the following system register space::
Op0=3, Op1=0, CRn=0, CRm=0,4,5,6,7
Op0=3, Op1=0, CRn=0, CRm=0,2,3,4,5,6,7
(See Table C5-6 'System instruction encodings for non-Debug System
register accesses' in ARMv8 ARM DDI 0487A.h, for the list of
@@ -290,6 +290,44 @@ infrastructure:
+------------------------------+---------+---------+
| RPRES | [7-4] | y |
+------------------------------+---------+---------+
| WFXT | [3-0] | y |
+------------------------------+---------+---------+
10) MVFR0_EL1 - AArch32 Media and VFP Feature Register 0
+------------------------------+---------+---------+
| Name | bits | visible |
+------------------------------+---------+---------+
| FPDP | [11-8] | y |
+------------------------------+---------+---------+
11) MVFR1_EL1 - AArch32 Media and VFP Feature Register 1
+------------------------------+---------+---------+
| Name | bits | visible |
+------------------------------+---------+---------+
| SIMDFMAC | [31-28] | y |
+------------------------------+---------+---------+
| SIMDSP | [19-16] | y |
+------------------------------+---------+---------+
| SIMDInt | [15-12] | y |
+------------------------------+---------+---------+
| SIMDLS | [11-8] | y |
+------------------------------+---------+---------+
12) ID_ISAR5_EL1 - AArch32 Instruction Set Attribute Register 5
+------------------------------+---------+---------+
| Name | bits | visible |
+------------------------------+---------+---------+
| CRC32 | [19-16] | y |
+------------------------------+---------+---------+
| SHA2 | [15-12] | y |
+------------------------------+---------+---------+
| SHA1 | [11-8] | y |
+------------------------------+---------+---------+
| AES | [7-4] | y |
+------------------------------+---------+---------+
Appendix I: Example

View File

@@ -259,6 +259,48 @@ HWCAP2_RPRES
Functionality implied by ID_AA64ISAR2_EL1.RPRES == 0b0001.
HWCAP2_MTE3
Functionality implied by ID_AA64PFR1_EL1.MTE == 0b0011, as described
by Documentation/arm64/memory-tagging-extension.rst.
HWCAP2_SME
Functionality implied by ID_AA64PFR1_EL1.SME == 0b0001, as described
by Documentation/arm64/sme.rst.
HWCAP2_SME_I16I64
Functionality implied by ID_AA64SMFR0_EL1.I16I64 == 0b1111.
HWCAP2_SME_F64F64
Functionality implied by ID_AA64SMFR0_EL1.F64F64 == 0b1.
HWCAP2_SME_I8I32
Functionality implied by ID_AA64SMFR0_EL1.I8I32 == 0b1111.
HWCAP2_SME_F16F32
Functionality implied by ID_AA64SMFR0_EL1.F16F32 == 0b1.
HWCAP2_SME_B16F32
Functionality implied by ID_AA64SMFR0_EL1.B16F32 == 0b1.
HWCAP2_SME_F32F32
Functionality implied by ID_AA64SMFR0_EL1.F32F32 == 0b1.
HWCAP2_SME_FA64
Functionality implied by ID_AA64SMFR0_EL1.FA64 == 0b1.
HWCAP2_WFXT
Functionality implied by ID_AA64ISAR2_EL1.WFXT == 0b0010.
4. Unused AT_HWCAP bits
-----------------------

View File

@@ -21,6 +21,7 @@ ARM64 Architecture
perf
pointer-authentication
silicon-errata
sme
sve
tagged-address-abi
tagged-pointers

View File

@@ -76,6 +76,9 @@ configurable behaviours:
with ``.si_code = SEGV_MTEAERR`` and ``.si_addr = 0`` (the faulting
address is unknown).
- *Asymmetric* - Reads are handled as for synchronous mode while writes
are handled as for asynchronous mode.
The user can select the above modes, per thread, using the
``prctl(PR_SET_TAGGED_ADDR_CTRL, flags, 0, 0, 0)`` system call where ``flags``
contains any number of the following values in the ``PR_MTE_TCF_MASK``
@@ -139,18 +142,25 @@ tag checking mode as the CPU's preferred tag checking mode.
The preferred tag checking mode for each CPU is controlled by
``/sys/devices/system/cpu/cpu<N>/mte_tcf_preferred``, to which a
privileged user may write the value ``async`` or ``sync``. The default
preferred mode for each CPU is ``async``.
privileged user may write the value ``async``, ``sync`` or ``asymm``. The
default preferred mode for each CPU is ``async``.
To allow a program to potentially run in the CPU's preferred tag
checking mode, the user program may set multiple tag check fault mode
bits in the ``flags`` argument to the ``prctl(PR_SET_TAGGED_ADDR_CTRL,
flags, 0, 0, 0)`` system call. If the CPU's preferred tag checking
mode is in the task's set of provided tag checking modes (this will
always be the case at present because the kernel only supports two
tag checking modes, but future kernels may support more modes), that
mode will be selected. Otherwise, one of the modes in the task's mode
set will be selected in a currently unspecified manner.
flags, 0, 0, 0)`` system call. If both synchronous and asynchronous
modes are requested then asymmetric mode may also be selected by the
kernel. If the CPU's preferred tag checking mode is in the task's set
of provided tag checking modes, that mode will be selected. Otherwise,
one of the modes in the task's mode will be selected by the kernel
from the task's mode set using the preference order:
1. Asynchronous
2. Asymmetric
3. Synchronous
Note that there is no way for userspace to request multiple modes and
also disable asymmetric mode.
Initial process state
---------------------

View File

@@ -102,12 +102,26 @@ stable kernels.
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A510 | #2457168 | ARM64_ERRATUM_2457168 |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A510 | #2658417 | ARM64_ERRATUM_2658417 |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A710 | #2119858 | ARM64_ERRATUM_2119858 |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A710 | #2054223 | ARM64_ERRATUM_2054223 |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A710 | #2224489 | ARM64_ERRATUM_2224489 |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Neoverse-N1 | #1188873,1418040| ARM64_ERRATUM_1418040 |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Neoverse-N1 | #1349291 | N/A |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Neoverse-N1 | #1542419 | ARM64_ERRATUM_1542419 |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Neoverse-N2 | #2139208 | ARM64_ERRATUM_2139208 |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Neoverse-N2 | #2067961 | ARM64_ERRATUM_2067961 |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Neoverse-N2 | #2253138 | ARM64_ERRATUM_2253138 |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | MMU-500 | #841119,826419 | N/A |
+----------------+-----------------+-----------------+-----------------------------+
+----------------+-----------------+-----------------+-----------------------------+

428
Documentation/arm64/sme.rst Normal file
View File

@@ -0,0 +1,428 @@
===================================================
Scalable Matrix Extension support for AArch64 Linux
===================================================
This document outlines briefly the interface provided to userspace by Linux in
order to support use of the ARM Scalable Matrix Extension (SME).
This is an outline of the most important features and issues only and not
intended to be exhaustive. It should be read in conjunction with the SVE
documentation in sve.rst which provides details on the Streaming SVE mode
included in SME.
This document does not aim to describe the SME architecture or programmer's
model. To aid understanding, a minimal description of relevant programmer's
model features for SME is included in Appendix A.
1. General
-----------
* PSTATE.SM, PSTATE.ZA, the streaming mode vector length, the ZA
register state and TPIDR2_EL0 are tracked per thread.
* The presence of SME is reported to userspace via HWCAP2_SME in the aux vector
AT_HWCAP2 entry. Presence of this flag implies the presence of the SME
instructions and registers, and the Linux-specific system interfaces
described in this document. SME is reported in /proc/cpuinfo as "sme".
* Support for the execution of SME instructions in userspace can also be
detected by reading the CPU ID register ID_AA64PFR1_EL1 using an MRS
instruction, and checking that the value of the SME field is nonzero. [3]
It does not guarantee the presence of the system interfaces described in the
following sections: software that needs to verify that those interfaces are
present must check for HWCAP2_SME instead.
* There are a number of optional SME features, presence of these is reported
through AT_HWCAP2 through:
HWCAP2_SME_I16I64
HWCAP2_SME_F64F64
HWCAP2_SME_I8I32
HWCAP2_SME_F16F32
HWCAP2_SME_B16F32
HWCAP2_SME_F32F32
HWCAP2_SME_FA64
This list may be extended over time as the SME architecture evolves.
These extensions are also reported via the CPU ID register ID_AA64SMFR0_EL1,
which userspace can read using an MRS instruction. See elf_hwcaps.txt and
cpu-feature-registers.txt for details.
* Debuggers should restrict themselves to interacting with the target via the
NT_ARM_SVE, NT_ARM_SSVE and NT_ARM_ZA regsets. The recommended way
of detecting support for these regsets is to connect to a target process
first and then attempt a
ptrace(PTRACE_GETREGSET, pid, NT_ARM_<regset>, &iov).
* Whenever ZA register values are exchanged in memory between userspace and
the kernel, the register value is encoded in memory as a series of horizontal
vectors from 0 to VL/8-1 stored in the same endianness invariant format as is
used for SVE vectors.
* On thread creation TPIDR2_EL0 is preserved unless CLONE_SETTLS is specified,
in which case it is set to 0.
2. Vector lengths
------------------
SME defines a second vector length similar to the SVE vector length which is
controls the size of the streaming mode SVE vectors and the ZA matrix array.
The ZA matrix is square with each side having as many bytes as a streaming
mode SVE vector.
3. Sharing of streaming and non-streaming mode SVE state
---------------------------------------------------------
It is implementation defined which if any parts of the SVE state are shared
between streaming and non-streaming modes. When switching between modes
via software interfaces such as ptrace if no register content is provided as
part of switching no state will be assumed to be shared and everything will
be zeroed.
4. System call behaviour
-------------------------
* On syscall PSTATE.ZA is preserved, if PSTATE.ZA==1 then the contents of the
ZA matrix are preserved.
* On syscall PSTATE.SM will be cleared and the SVE registers will be handled
as per the standard SVE ABI.
* Neither the SVE registers nor ZA are used to pass arguments to or receive
results from any syscall.
* On process creation (eg, clone()) the newly created process will have
PSTATE.SM cleared.
* All other SME state of a thread, including the currently configured vector
length, the state of the PR_SME_VL_INHERIT flag, and the deferred vector
length (if any), is preserved across all syscalls, subject to the specific
exceptions for execve() described in section 6.
5. Signal handling
-------------------
* Signal handlers are invoked with streaming mode and ZA disabled.
* A new signal frame record za_context encodes the ZA register contents on
signal delivery. [1]
* The signal frame record for ZA always contains basic metadata, in particular
the thread's vector length (in za_context.vl).
* The ZA matrix may or may not be included in the record, depending on
the value of PSTATE.ZA. The registers are present if and only if:
za_context.head.size >= ZA_SIG_CONTEXT_SIZE(sve_vq_from_vl(za_context.vl))
in which case PSTATE.ZA == 1.
* If matrix data is present, the remainder of the record has a vl-dependent
size and layout. Macros ZA_SIG_* are defined [1] to facilitate access to
them.
* The matrix is stored as a series of horizontal vectors in the same format as
is used for SVE vectors.
* If the ZA context is too big to fit in sigcontext.__reserved[], then extra
space is allocated on the stack, an extra_context record is written in
__reserved[] referencing this space. za_context is then written in the
extra space. Refer to [1] for further details about this mechanism.
5. Signal return
-----------------
When returning from a signal handler:
* If there is no za_context record in the signal frame, or if the record is
present but contains no register data as described in the previous section,
then ZA is disabled.
* If za_context is present in the signal frame and contains matrix data then
PSTATE.ZA is set to 1 and ZA is populated with the specified data.
* The vector length cannot be changed via signal return. If za_context.vl in
the signal frame does not match the current vector length, the signal return
attempt is treated as illegal, resulting in a forced SIGSEGV.
6. prctl extensions
--------------------
Some new prctl() calls are added to allow programs to manage the SME vector
length:
prctl(PR_SME_SET_VL, unsigned long arg)
Sets the vector length of the calling thread and related flags, where
arg == vl | flags. Other threads of the calling process are unaffected.
vl is the desired vector length, where sve_vl_valid(vl) must be true.
flags:
PR_SME_VL_INHERIT
Inherit the current vector length across execve(). Otherwise, the
vector length is reset to the system default at execve(). (See
Section 9.)
PR_SME_SET_VL_ONEXEC
Defer the requested vector length change until the next execve()
performed by this thread.
The effect is equivalent to implicit execution of the following
call immediately after the next execve() (if any) by the thread:
prctl(PR_SME_SET_VL, arg & ~PR_SME_SET_VL_ONEXEC)
This allows launching of a new program with a different vector
length, while avoiding runtime side effects in the caller.
Without PR_SME_SET_VL_ONEXEC, the requested change takes effect
immediately.
Return value: a nonnegative on success, or a negative value on error:
EINVAL: SME not supported, invalid vector length requested, or
invalid flags.
On success:
* Either the calling thread's vector length or the deferred vector length
to be applied at the next execve() by the thread (dependent on whether
PR_SME_SET_VL_ONEXEC is present in arg), is set to the largest value
supported by the system that is less than or equal to vl. If vl ==
SVE_VL_MAX, the value set will be the largest value supported by the
system.
* Any previously outstanding deferred vector length change in the calling
thread is cancelled.
* The returned value describes the resulting configuration, encoded as for
PR_SME_GET_VL. The vector length reported in this value is the new
current vector length for this thread if PR_SME_SET_VL_ONEXEC was not
present in arg; otherwise, the reported vector length is the deferred
vector length that will be applied at the next execve() by the calling
thread.
* Changing the vector length causes all of ZA, P0..P15, FFR and all bits of
Z0..Z31 except for Z0 bits [127:0] .. Z31 bits [127:0] to become
unspecified, including both streaming and non-streaming SVE state.
Calling PR_SME_SET_VL with vl equal to the thread's current vector
length, or calling PR_SME_SET_VL with the PR_SVE_SET_VL_ONEXEC flag,
does not constitute a change to the vector length for this purpose.
* Changing the vector length causes PSTATE.ZA and PSTATE.SM to be cleared.
Calling PR_SME_SET_VL with vl equal to the thread's current vector
length, or calling PR_SME_SET_VL with the PR_SVE_SET_VL_ONEXEC flag,
does not constitute a change to the vector length for this purpose.
prctl(PR_SME_GET_VL)
Gets the vector length of the calling thread.
The following flag may be OR-ed into the result:
PR_SME_VL_INHERIT
Vector length will be inherited across execve().
There is no way to determine whether there is an outstanding deferred
vector length change (which would only normally be the case between a
fork() or vfork() and the corresponding execve() in typical use).
To extract the vector length from the result, bitwise and it with
PR_SME_VL_LEN_MASK.
Return value: a nonnegative value on success, or a negative value on error:
EINVAL: SME not supported.
7. ptrace extensions
---------------------
* A new regset NT_ARM_SSVE is defined for access to streaming mode SVE
state via PTRACE_GETREGSET and PTRACE_SETREGSET, this is documented in
sve.rst.
* A new regset NT_ARM_ZA is defined for ZA state for access to ZA state via
PTRACE_GETREGSET and PTRACE_SETREGSET.
Refer to [2] for definitions.
The regset data starts with struct user_za_header, containing:
size
Size of the complete regset, in bytes.
This depends on vl and possibly on other things in the future.
If a call to PTRACE_GETREGSET requests less data than the value of
size, the caller can allocate a larger buffer and retry in order to
read the complete regset.
max_size
Maximum size in bytes that the regset can grow to for the target
thread. The regset won't grow bigger than this even if the target
thread changes its vector length etc.
vl
Target thread's current streaming vector length, in bytes.
max_vl
Maximum possible streaming vector length for the target thread.
flags
Zero or more of the following flags, which have the same
meaning and behaviour as the corresponding PR_SET_VL_* flags:
SME_PT_VL_INHERIT
SME_PT_VL_ONEXEC (SETREGSET only).
* The effects of changing the vector length and/or flags are equivalent to
those documented for PR_SME_SET_VL.
The caller must make a further GETREGSET call if it needs to know what VL is
actually set by SETREGSET, unless is it known in advance that the requested
VL is supported.
* The size and layout of the payload depends on the header fields. The
SME_PT_ZA_*() macros are provided to facilitate access to the data.
* In either case, for SETREGSET it is permissible to omit the payload, in which
case the vector length and flags are changed and PSTATE.ZA is set to 0
(along with any consequences of those changes). If a payload is provided
then PSTATE.ZA will be set to 1.
* For SETREGSET, if the requested VL is not supported, the effect will be the
same as if the payload were omitted, except that an EIO error is reported.
No attempt is made to translate the payload data to the correct layout
for the vector length actually set. It is up to the caller to translate the
payload layout for the actual VL and retry.
* The effect of writing a partial, incomplete payload is unspecified.
8. ELF coredump extensions
---------------------------
* NT_ARM_SSVE notes will be added to each coredump for
each thread of the dumped process. The contents will be equivalent to the
data that would have been read if a PTRACE_GETREGSET of the corresponding
type were executed for each thread when the coredump was generated.
* A NT_ARM_ZA note will be added to each coredump for each thread of the
dumped process. The contents will be equivalent to the data that would have
been read if a PTRACE_GETREGSET of NT_ARM_ZA were executed for each thread
when the coredump was generated.
9. System runtime configuration
--------------------------------
* To mitigate the ABI impact of expansion of the signal frame, a policy
mechanism is provided for administrators, distro maintainers and developers
to set the default vector length for userspace processes:
/proc/sys/abi/sme_default_vector_length
Writing the text representation of an integer to this file sets the system
default vector length to the specified value, unless the value is greater
than the maximum vector length supported by the system in which case the
default vector length is set to that maximum.
The result can be determined by reopening the file and reading its
contents.
At boot, the default vector length is initially set to 32 or the maximum
supported vector length, whichever is smaller and supported. This
determines the initial vector length of the init process (PID 1).
Reading this file returns the current system default vector length.
* At every execve() call, the new vector length of the new process is set to
the system default vector length, unless
* PR_SME_VL_INHERIT (or equivalently SME_PT_VL_INHERIT) is set for the
calling thread, or
* a deferred vector length change is pending, established via the
PR_SME_SET_VL_ONEXEC flag (or SME_PT_VL_ONEXEC).
* Modifying the system default vector length does not affect the vector length
of any existing process or thread that does not make an execve() call.
Appendix A. SME programmer's model (informative)
=================================================
This section provides a minimal description of the additions made by SVE to the
ARMv8-A programmer's model that are relevant to this document.
Note: This section is for information only and not intended to be complete or
to replace any architectural specification.
A.1. Registers
---------------
In A64 state, SME adds the following:
* A new mode, streaming mode, in which a subset of the normal FPSIMD and SVE
features are available. When supported EL0 software may enter and leave
streaming mode at any time.
For best system performance it is strongly encouraged for software to enable
streaming mode only when it is actively being used.
* A new vector length controlling the size of ZA and the Z registers when in
streaming mode, separately to the vector length used for SVE when not in
streaming mode. There is no requirement that either the currently selected
vector length or the set of vector lengths supported for the two modes in
a given system have any relationship. The streaming mode vector length
is referred to as SVL.
* A new ZA matrix register. This is a square matrix of SVLxSVL bits. Most
operations on ZA require that streaming mode be enabled but ZA can be
enabled without streaming mode in order to load, save and retain data.
For best system performance it is strongly encouraged for software to enable
ZA only when it is actively being used.
* Two new 1 bit fields in PSTATE which may be controlled via the SMSTART and
SMSTOP instructions or by access to the SVCR system register:
* PSTATE.ZA, if this is 1 then the ZA matrix is accessible and has valid
data while if it is 0 then ZA can not be accessed. When PSTATE.ZA is
changed from 0 to 1 all bits in ZA are cleared.
* PSTATE.SM, if this is 1 then the PE is in streaming mode. When the value
of PSTATE.SM is changed then it is implementation defined if the subset
of the floating point register bits valid in both modes may be retained.
Any other bits will be cleared.
References
==========
[1] arch/arm64/include/uapi/asm/sigcontext.h
AArch64 Linux signal ABI definitions
[2] arch/arm64/include/uapi/asm/ptrace.h
AArch64 Linux ptrace ABI definitions
[3] Documentation/arm64/cpu-feature-registers.rst

View File

@@ -7,7 +7,9 @@ Author: Dave Martin <Dave.Martin@arm.com>
Date: 4 August 2017
This document outlines briefly the interface provided to userspace by Linux in
order to support use of the ARM Scalable Vector Extension (SVE).
order to support use of the ARM Scalable Vector Extension (SVE), including
interactions with Streaming SVE mode added by the Scalable Matrix Extension
(SME).
This is an outline of the most important features and issues only and not
intended to be exhaustive.
@@ -23,6 +25,10 @@ model features for SVE is included in Appendix A.
* SVE registers Z0..Z31, P0..P15 and FFR and the current vector length VL, are
tracked per-thread.
* In streaming mode FFR is not accessible unless HWCAP2_SME_FA64 is present
in the system, when it is not supported and these interfaces are used to
access streaming mode FFR is read and written as zero.
* The presence of SVE is reported to userspace via HWCAP_SVE in the aux vector
AT_HWCAP entry. Presence of this flag implies the presence of the SVE
instructions and registers, and the Linux-specific system interfaces
@@ -53,10 +59,19 @@ model features for SVE is included in Appendix A.
which userspace can read using an MRS instruction. See elf_hwcaps.txt and
cpu-feature-registers.txt for details.
* On hardware that supports the SME extensions, HWCAP2_SME will also be
reported in the AT_HWCAP2 aux vector entry. Among other things SME adds
streaming mode which provides a subset of the SVE feature set using a
separate SME vector length and the same Z/V registers. See sme.rst
for more details.
* Debuggers should restrict themselves to interacting with the target via the
NT_ARM_SVE regset. The recommended way of detecting support for this regset
is to connect to a target process first and then attempt a
ptrace(PTRACE_GETREGSET, pid, NT_ARM_SVE, &iov).
ptrace(PTRACE_GETREGSET, pid, NT_ARM_SVE, &iov). Note that when SME is
present and streaming SVE mode is in use the FPSIMD subset of registers
will be read via NT_ARM_SVE and NT_ARM_SVE writes will exit streaming mode
in the target.
* Whenever SVE scalable register values (Zn, Pn, FFR) are exchanged in memory
between userspace and the kernel, the register value is encoded in memory in
@@ -126,6 +141,11 @@ the SVE instruction set architecture.
are only present in fpsimd_context. For convenience, the content of V0..V31
is duplicated between sve_context and fpsimd_context.
* The record contains a flag field which includes a flag SVE_SIG_FLAG_SM which
if set indicates that the thread is in streaming mode and the vector length
and register data (if present) describe the streaming SVE data and vector
length.
* The signal frame record for SVE always contains basic metadata, in particular
the thread's vector length (in sve_context.vl).
@@ -170,6 +190,11 @@ When returning from a signal handler:
the signal frame does not match the current vector length, the signal return
attempt is treated as illegal, resulting in a forced SIGSEGV.
* It is permitted to enter or leave streaming mode by setting or clearing
the SVE_SIG_FLAG_SM flag but applications should take care to ensure that
when doing so sve_context.vl and any register data are appropriate for the
vector length in the new mode.
6. prctl extensions
--------------------
@@ -255,7 +280,7 @@ prctl(PR_SVE_GET_VL)
vector length change (which would only normally be the case between a
fork() or vfork() and the corresponding execve() in typical use).
To extract the vector length from the result, and it with
To extract the vector length from the result, bitwise and it with
PR_SVE_VL_LEN_MASK.
Return value: a nonnegative value on success, or a negative value on error:
@@ -265,8 +290,14 @@ prctl(PR_SVE_GET_VL)
7. ptrace extensions
---------------------
* A new regset NT_ARM_SVE is defined for use with PTRACE_GETREGSET and
PTRACE_SETREGSET.
* New regsets NT_ARM_SVE and NT_ARM_SSVE are defined for use with
PTRACE_GETREGSET and PTRACE_SETREGSET. NT_ARM_SSVE describes the
streaming mode SVE registers and NT_ARM_SVE describes the
non-streaming mode SVE registers.
In this description a register set is referred to as being "live" when
the target is in the appropriate streaming or non-streaming mode and is
using data beyond the subset shared with the FPSIMD Vn registers.
Refer to [2] for definitions.
@@ -297,7 +328,7 @@ The regset data starts with struct user_sve_header, containing:
flags
either
at most one of
SVE_PT_REGS_FPSIMD
@@ -331,6 +362,10 @@ The regset data starts with struct user_sve_header, containing:
SVE_PT_VL_ONEXEC (SETREGSET only).
If neither FPSIMD nor SVE flags are provided then no register
payload is available, this is only possible when SME is implemented.
* The effects of changing the vector length and/or flags are equivalent to
those documented for PR_SVE_SET_VL.
@@ -346,6 +381,13 @@ The regset data starts with struct user_sve_header, containing:
case only the vector length and flags are changed (along with any
consequences of those changes).
* In systems supporting SME when in streaming mode a GETREGSET for
NT_REG_SVE will return only the user_sve_header with no register data,
similarly a GETREGSET for NT_REG_SSVE will not return any register data
when not in streaming mode.
* A GETREGSET for NT_ARM_SSVE will never return SVE_PT_REGS_FPSIMD.
* For SETREGSET, if an SVE_PT_REGS_SVE payload is present and the
requested VL is not supported, the effect will be the same as if the
payload were omitted, except that an EIO error is reported. No
@@ -355,17 +397,25 @@ The regset data starts with struct user_sve_header, containing:
unspecified. It is up to the caller to translate the payload layout
for the actual VL and retry.
* Where SME is implemented it is not possible to GETREGSET the register
state for normal SVE when in streaming mode, nor the streaming mode
register state when in normal mode, regardless of the implementation defined
behaviour of the hardware for sharing data between the two modes.
* Any SETREGSET of NT_ARM_SVE will exit streaming mode if the target was in
streaming mode and any SETREGSET of NT_ARM_SSVE will enter streaming mode
if the target was not in streaming mode.
* The effect of writing a partial, incomplete payload is unspecified.
8. ELF coredump extensions
---------------------------
* A NT_ARM_SVE note will be added to each coredump for each thread of the
dumped process. The contents will be equivalent to the data that would have
been read if a PTRACE_GETREGSET of NT_ARM_SVE were executed for each thread
when the coredump was generated.
* NT_ARM_SVE and NT_ARM_SSVE notes will be added to each coredump for
each thread of the dumped process. The contents will be equivalent to the
data that would have been read if a PTRACE_GETREGSET of the corresponding
type were executed for each thread when the coredump was generated.
9. System runtime configuration
--------------------------------

View File

@@ -1,5 +1,7 @@
.. SPDX-License-Identifier: GPL-2.0
.. _inline_encryption:
=================
Inline Encryption
=================
@@ -7,230 +9,269 @@ Inline Encryption
Background
==========
Inline encryption hardware sits logically between memory and the disk, and can
en/decrypt data as it goes in/out of the disk. Inline encryption hardware has a
fixed number of "keyslots" - slots into which encryption contexts (i.e. the
encryption key, encryption algorithm, data unit size) can be programmed by the
kernel at any time. Each request sent to the disk can be tagged with the index
of a keyslot (and also a data unit number to act as an encryption tweak), and
the inline encryption hardware will en/decrypt the data in the request with the
encryption context programmed into that keyslot. This is very different from
full disk encryption solutions like self encrypting drives/TCG OPAL/ATA
Security standards, since with inline encryption, any block on disk could be
encrypted with any encryption context the kernel chooses.
Inline encryption hardware sits logically between memory and disk, and can
en/decrypt data as it goes in/out of the disk. For each I/O request, software
can control exactly how the inline encryption hardware will en/decrypt the data
in terms of key, algorithm, data unit size (the granularity of en/decryption),
and data unit number (a value that determines the initialization vector(s)).
Some inline encryption hardware accepts all encryption parameters including raw
keys directly in low-level I/O requests. However, most inline encryption
hardware instead has a fixed number of "keyslots" and requires that the key,
algorithm, and data unit size first be programmed into a keyslot. Each
low-level I/O request then just contains a keyslot index and data unit number.
Note that inline encryption hardware is very different from traditional crypto
accelerators, which are supported through the kernel crypto API. Traditional
crypto accelerators operate on memory regions, whereas inline encryption
hardware operates on I/O requests. Thus, inline encryption hardware needs to be
managed by the block layer, not the kernel crypto API.
Inline encryption hardware is also very different from "self-encrypting drives",
such as those based on the TCG Opal or ATA Security standards. Self-encrypting
drives don't provide fine-grained control of encryption and provide no way to
verify the correctness of the resulting ciphertext. Inline encryption hardware
provides fine-grained control of encryption, including the choice of key and
initialization vector for each sector, and can be tested for correctness.
Objective
=========
We want to support inline encryption (IE) in the kernel.
To allow for testing, we also want a crypto API fallback when actual
IE hardware is absent. We also want IE to work with layered devices
like dm and loopback (i.e. we want to be able to use the IE hardware
of the underlying devices if present, or else fall back to crypto API
en/decryption).
We want to support inline encryption in the kernel. To make testing easier, we
also want support for falling back to the kernel crypto API when actual inline
encryption hardware is absent. We also want inline encryption to work with
layered devices like device-mapper and loopback (i.e. we want to be able to use
the inline encryption hardware of the underlying devices if present, or else
fall back to crypto API en/decryption).
Constraints and notes
=====================
- IE hardware has a limited number of "keyslots" that can be programmed
with an encryption context (key, algorithm, data unit size, etc.) at any time.
One can specify a keyslot in a data request made to the device, and the
device will en/decrypt the data using the encryption context programmed into
that specified keyslot. When possible, we want to make multiple requests with
the same encryption context share the same keyslot.
- We need a way for upper layers (e.g. filesystems) to specify an encryption
context to use for en/decrypting a bio, and device drivers (e.g. UFSHCD) need
to be able to use that encryption context when they process the request.
Encryption contexts also introduce constraints on bio merging; the block layer
needs to be aware of these constraints.
- We need a way for upper layers like filesystems to specify an encryption
context to use for en/decrypting a struct bio, and a device driver (like UFS)
needs to be able to use that encryption context when it processes the bio.
- Different inline encryption hardware has different supported algorithms,
supported data unit sizes, maximum data unit numbers, etc. We call these
properties the "crypto capabilities". We need a way for device drivers to
advertise crypto capabilities to upper layers in a generic way.
- We need a way for device drivers to expose their inline encryption
capabilities in a unified way to the upper layers.
- Inline encryption hardware usually (but not always) requires that keys be
programmed into keyslots before being used. Since programming keyslots may be
slow and there may not be very many keyslots, we shouldn't just program the
key for every I/O request, but rather keep track of which keys are in the
keyslots and reuse an already-programmed keyslot when possible.
- Upper layers typically define a specific end-of-life for crypto keys, e.g.
when an encrypted directory is locked or when a crypto mapping is torn down.
At these times, keys are wiped from memory. We must provide a way for upper
layers to also evict keys from any keyslots they are present in.
Design
======
- When possible, device-mapper devices must be able to pass through the inline
encryption support of their underlying devices. However, it doesn't make
sense for device-mapper devices to have keyslots themselves.
We add a struct bio_crypt_ctx to struct bio that can
represent an encryption context, because we need to be able to pass this
encryption context from the upper layers (like the fs layer) to the
device driver to act upon.
Basic design
============
While IE hardware works on the notion of keyslots, the FS layer has no
knowledge of keyslots - it simply wants to specify an encryption context to
use while en/decrypting a bio.
We introduce ``struct blk_crypto_key`` to represent an inline encryption key and
how it will be used. This includes the type of the key (standard or
hardware-wrapped); the actual bytes of the key; the size of the key; the
algorithm and data unit size the key will be used with; and the number of bytes
needed to represent the maximum data unit number the key will be used with.
We introduce a keyslot manager (KSM) that handles the translation from
encryption contexts specified by the FS to keyslots on the IE hardware.
This KSM also serves as the way IE hardware can expose its capabilities to
upper layers. The generic mode of operation is: each device driver that wants
to support IE will construct a KSM and set it up in its struct request_queue.
Upper layers that want to use IE on this device can then use this KSM in
the device's struct request_queue to translate an encryption context into
a keyslot. The presence of the KSM in the request queue shall be used to mean
that the device supports IE.
We introduce ``struct bio_crypt_ctx`` to represent an encryption context. It
contains a data unit number and a pointer to a blk_crypto_key. We add pointers
to a bio_crypt_ctx to ``struct bio`` and ``struct request``; this allows users
of the block layer (e.g. filesystems) to provide an encryption context when
creating a bio and have it be passed down the stack for processing by the block
layer and device drivers. Note that the encryption context doesn't explicitly
say whether to encrypt or decrypt, as that is implicit from the direction of the
bio; WRITE means encrypt, and READ means decrypt.
The KSM uses refcounts to track which keyslots are idle (either they have no
encryption context programmed, or there are no in-flight struct bios
referencing that keyslot). When a new encryption context needs a keyslot, it
tries to find a keyslot that has already been programmed with the same
encryption context, and if there is no such keyslot, it evicts the least
recently used idle keyslot and programs the new encryption context into that
one. If no idle keyslots are available, then the caller will sleep until there
is at least one.
We also introduce ``struct blk_crypto_profile`` to contain all generic inline
encryption-related state for a particular inline encryption device. The
blk_crypto_profile serves as the way that drivers for inline encryption hardware
advertise their crypto capabilities and provide certain functions (e.g.,
functions to program and evict keys) to upper layers. Each device driver that
wants to support inline encryption will construct a blk_crypto_profile, then
associate it with the disk's request_queue.
The blk_crypto_profile also manages the hardware's keyslots, when applicable.
This happens in the block layer, so that users of the block layer can just
specify encryption contexts and don't need to know about keyslots at all, nor do
device drivers need to care about most details of keyslot management.
blk-mq changes, other block layer changes and blk-crypto-fallback
=================================================================
Specifically, for each keyslot, the block layer (via the blk_crypto_profile)
keeps track of which blk_crypto_key that keyslot contains (if any), and how many
in-flight I/O requests are using it. When the block layer creates a
``struct request`` for a bio that has an encryption context, it grabs a keyslot
that already contains the key if possible. Otherwise it waits for an idle
keyslot (a keyslot that isn't in-use by any I/O), then programs the key into the
least-recently-used idle keyslot using the function the device driver provided.
In both cases, the resulting keyslot is stored in the ``crypt_keyslot`` field of
the request, where it is then accessible to device drivers and is released after
the request completes.
We add a pointer to a ``bi_crypt_context`` and ``keyslot`` to
struct request. These will be referred to as the ``crypto fields``
for the request. This ``keyslot`` is the keyslot into which the
``bi_crypt_context`` has been programmed in the KSM of the ``request_queue``
that this request is being sent to.
``struct request`` also contains a pointer to the original bio_crypt_ctx.
Requests can be built from multiple bios, and the block layer must take the
encryption context into account when trying to merge bios and requests. For two
bios/requests to be merged, they must have compatible encryption contexts: both
unencrypted, or both encrypted with the same key and contiguous data unit
numbers. Only the encryption context for the first bio in a request is
retained, since the remaining bios have been verified to be merge-compatible
with the first bio.
We introduce ``block/blk-crypto-fallback.c``, which allows upper layers to remain
blissfully unaware of whether or not real inline encryption hardware is present
underneath. When a bio is submitted with a target ``request_queue`` that doesn't
support the encryption context specified with the bio, the block layer will
en/decrypt the bio with the blk-crypto-fallback.
To make it possible for inline encryption to work with request_queue based
layered devices, when a request is cloned, its encryption context is cloned as
well. When the cloned request is submitted, it is then processed as usual; this
includes getting a keyslot from the clone's target device if needed.
If the bio is a ``WRITE`` bio, a bounce bio is allocated, and the data in the bio
is encrypted stored in the bounce bio - blk-mq will then proceed to process the
bounce bio as if it were not encrypted at all (except when blk-integrity is
concerned). ``blk-crypto-fallback`` sets the bounce bio's ``bi_end_io`` to an
internal function that cleans up the bounce bio and ends the original bio.
blk-crypto-fallback
===================
If the bio is a ``READ`` bio, the bio's ``bi_end_io`` (and also ``bi_private``)
is saved and overwritten by ``blk-crypto-fallback`` to
``bio_crypto_fallback_decrypt_bio``. The bio's ``bi_crypt_context`` is also
overwritten with ``NULL``, so that to the rest of the stack, the bio looks
as if it was a regular bio that never had an encryption context specified.
``bio_crypto_fallback_decrypt_bio`` will decrypt the bio, restore the original
``bi_end_io`` (and also ``bi_private``) and end the bio again.
It is desirable for the inline encryption support of upper layers (e.g.
filesystems) to be testable without real inline encryption hardware, and
likewise for the block layer's keyslot management logic. It is also desirable
to allow upper layers to just always use inline encryption rather than have to
implement encryption in multiple ways.
Regardless of whether real inline encryption hardware is used or the
Therefore, we also introduce *blk-crypto-fallback*, which is an implementation
of inline encryption using the kernel crypto API. blk-crypto-fallback is built
into the block layer, so it works on any block device without any special setup.
Essentially, when a bio with an encryption context is submitted to a
block_device that doesn't support that encryption context, the block layer will
handle en/decryption of the bio using blk-crypto-fallback.
For encryption, the data cannot be encrypted in-place, as callers usually rely
on it being unmodified. Instead, blk-crypto-fallback allocates bounce pages,
fills a new bio with those bounce pages, encrypts the data into those bounce
pages, and submits that "bounce" bio. When the bounce bio completes,
blk-crypto-fallback completes the original bio. If the original bio is too
large, multiple bounce bios may be required; see the code for details.
For decryption, blk-crypto-fallback "wraps" the bio's completion callback
(``bi_complete``) and private data (``bi_private``) with its own, unsets the
bio's encryption context, then submits the bio. If the read completes
successfully, blk-crypto-fallback restores the bio's original completion
callback and private data, then decrypts the bio's data in-place using the
kernel crypto API. Decryption happens from a workqueue, as it may sleep.
Afterwards, blk-crypto-fallback completes the bio.
In both cases, the bios that blk-crypto-fallback submits no longer have an
encryption context. Therefore, lower layers only see standard unencrypted I/O.
blk-crypto-fallback also defines its own blk_crypto_profile and has its own
"keyslots"; its keyslots contain ``struct crypto_skcipher`` objects. The reason
for this is twofold. First, it allows the keyslot management logic to be tested
without actual inline encryption hardware. Second, similar to actual inline
encryption hardware, the crypto API doesn't accept keys directly in requests but
rather requires that keys be set ahead of time, and setting keys can be
expensive; moreover, allocating a crypto_skcipher can't happen on the I/O path
at all due to the locks it takes. Therefore, the concept of keyslots still
makes sense for blk-crypto-fallback.
Note that regardless of whether real inline encryption hardware or
blk-crypto-fallback is used, the ciphertext written to disk (and hence the
on-disk format of data) will be the same (assuming the hardware's implementation
of the algorithm being used adheres to spec and functions correctly).
If a ``request queue``'s inline encryption hardware claimed to support the
encryption context specified with a bio, then it will not be handled by the
``blk-crypto-fallback``. We will eventually reach a point in blk-mq when a
struct request needs to be allocated for that bio. At that point,
blk-mq tries to program the encryption context into the ``request_queue``'s
keyslot_manager, and obtain a keyslot, which it stores in its newly added
``keyslot`` field. This keyslot is released when the request is completed.
When the first bio is added to a request, ``blk_crypto_rq_bio_prep`` is called,
which sets the request's ``crypt_ctx`` to a copy of the bio's
``bi_crypt_context``. bio_crypt_do_front_merge is called whenever a subsequent
bio is merged to the front of the request, which updates the ``crypt_ctx`` of
the request so that it matches the newly merged bio's ``bi_crypt_context``. In particular, the request keeps a copy of the ``bi_crypt_context`` of the first
bio in its bio-list (blk-mq needs to be careful to maintain this invariant
during bio and request merges).
To make it possible for inline encryption to work with request queue based
layered devices, when a request is cloned, its ``crypto fields`` are cloned as
well. When the cloned request is submitted, blk-mq programs the
``bi_crypt_context`` of the request into the clone's request_queue's keyslot
manager, and stores the returned keyslot in the clone's ``keyslot``.
on-disk format of data) will be the same (assuming that both the inline
encryption hardware's implementation and the kernel crypto API's implementation
of the algorithm being used adhere to spec and function correctly).
blk-crypto-fallback is optional and is controlled by the
``CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK`` kernel configuration option.
API presented to users of the block layer
=========================================
``struct blk_crypto_key`` represents a crypto key (the raw key, size of the
key, the crypto algorithm to use, the data unit size to use, and the number of
bytes required to represent data unit numbers that will be specified with the
``bi_crypt_context``).
``blk_crypto_config_supported()`` allows users to check ahead of time whether
inline encryption with particular crypto settings will work on a particular
block_device -- either via hardware or via blk-crypto-fallback. This function
takes in a ``struct blk_crypto_config`` which is like blk_crypto_key, but omits
the actual bytes of the key and instead just contains the algorithm, data unit
size, etc. This function can be useful if blk-crypto-fallback is disabled.
``blk_crypto_init_key`` allows upper layers to initialize such a
``blk_crypto_key``.
``blk_crypto_init_key()`` allows users to initialize a blk_crypto_key.
``bio_crypt_set_ctx`` should be called on any bio that a user of
the block layer wants en/decrypted via inline encryption (or the
blk-crypto-fallback, if hardware support isn't available for the desired
crypto configuration). This function takes the ``blk_crypto_key`` and the
data unit number (DUN) to use when en/decrypting the bio.
Users must call ``blk_crypto_start_using_key()`` before actually starting to use
a blk_crypto_key on a block_device (even if ``blk_crypto_config_supported()``
was called earlier). This is needed to initialize blk-crypto-fallback if it
will be needed. This must not be called from the data path, as this may have to
allocate resources, which may deadlock in that case.
``blk_crypto_config_supported`` allows upper layers to query whether or not the
an encryption context passed to request queue can be handled by blk-crypto
(either by real inline encryption hardware, or by the blk-crypto-fallback).
This is useful e.g. when blk-crypto-fallback is disabled, and the upper layer
wants to use an algorithm that may not supported by hardware - this function
lets the upper layer know ahead of time that the algorithm isn't supported,
and the upper layer can fallback to something else if appropriate.
Next, to attach an encryption context to a bio, users should call
``bio_crypt_set_ctx()``. This function allocates a bio_crypt_ctx and attaches
it to a bio, given the blk_crypto_key and the data unit number that will be used
for en/decryption. Users don't need to worry about freeing the bio_crypt_ctx
later, as that happens automatically when the bio is freed or reset.
``blk_crypto_start_using_key`` - Upper layers must call this function on
``blk_crypto_key`` and a ``request_queue`` before using the key with any bio
headed for that ``request_queue``. This function ensures that either the
hardware supports the key's crypto settings, or the crypto API fallback has
transforms for the needed mode allocated and ready to go. Note that this
function may allocate an ``skcipher``, and must not be called from the data
path, since allocating ``skciphers`` from the data path can deadlock.
Finally, when done using inline encryption with a blk_crypto_key on a
block_device, users must call ``blk_crypto_evict_key()``. This ensures that
the key is evicted from all keyslots it may be programmed into and unlinked from
any kernel data structures it may be linked into.
``blk_crypto_evict_key`` *must* be called by upper layers before a
``blk_crypto_key`` is freed. Further, it *must* only be called only once
there are no more in-flight requests that use that ``blk_crypto_key``.
``blk_crypto_evict_key`` will ensure that a key is removed from any keyslots in
inline encryption hardware that the key might have been programmed into (or the blk-crypto-fallback).
In summary, for users of the block layer, the lifecycle of a blk_crypto_key is
as follows:
1. ``blk_crypto_config_supported()`` (optional)
2. ``blk_crypto_init_key()``
3. ``blk_crypto_start_using_key()``
4. ``bio_crypt_set_ctx()`` (potentially many times)
5. ``blk_crypto_evict_key()`` (after all I/O has completed)
6. Zeroize the blk_crypto_key (this has no dedicated function)
If a blk_crypto_key is being used on multiple block_devices, then
``blk_crypto_config_supported()`` (if used), ``blk_crypto_start_using_key()``,
and ``blk_crypto_evict_key()`` must be called on each block_device.
API presented to device drivers
===============================
A :c:type:``struct blk_keyslot_manager`` should be set up by device drivers in
the ``request_queue`` of the device. The device driver needs to call
``blk_ksm_init`` (or its resource-managed variant ``devm_blk_ksm_init``) on the
``blk_keyslot_manager``, while specifying the number of keyslots supported by
the hardware.
A device driver that wants to support inline encryption must set up a
blk_crypto_profile in the request_queue of its device. To do this, it first
must call ``blk_crypto_profile_init()`` (or its resource-managed variant
``devm_blk_crypto_profile_init()``), providing the number of keyslots.
The device driver also needs to tell the KSM how to actually manipulate the
IE hardware in the device to do things like programming the crypto key into
the IE hardware into a particular keyslot. All this is achieved through the
struct blk_ksm_ll_ops field in the KSM that the device driver
must fill up after initing the ``blk_keyslot_manager``.
Next, it must advertise its crypto capabilities by setting fields in the
blk_crypto_profile, e.g. ``modes_supported`` and ``max_dun_bytes_supported``.
The KSM also handles runtime power management for the device when applicable
(e.g. when it wants to program a crypto key into the IE hardware, the device
must be runtime powered on) - so the device driver must also set the ``dev``
field in the ksm to point to the `struct device` for the KSM to use for runtime
power management.
It then must set function pointers in the ``ll_ops`` field of the
blk_crypto_profile to tell upper layers how to control the inline encryption
hardware, e.g. how to program and evict keyslots. Most drivers will need to
implement ``keyslot_program`` and ``keyslot_evict``. For details, see the
comments for ``struct blk_crypto_ll_ops``.
``blk_ksm_reprogram_all_keys`` can be called by device drivers if the device
needs each and every of its keyslots to be reprogrammed with the key it
"should have" at the point in time when the function is called. This is useful
e.g. if a device loses all its keys on runtime power down/up.
Once the driver registers a blk_crypto_profile with a request_queue, I/O
requests the driver receives via that queue may have an encryption context. All
encryption contexts will be compatible with the crypto capabilities declared in
the blk_crypto_profile, so drivers don't need to worry about handling
unsupported requests. Also, if a nonzero number of keyslots was declared in the
blk_crypto_profile, then all I/O requests that have an encryption context will
also have a keyslot which was already programmed with the appropriate key.
If the driver used ``blk_ksm_init`` instead of ``devm_blk_ksm_init``, then
``blk_ksm_destroy`` should be called to free up all resources used by a
``blk_keyslot_manager`` once it is no longer needed.
If the driver implements runtime suspend and its blk_crypto_ll_ops don't work
while the device is runtime-suspended, then the driver must also set the ``dev``
field of the blk_crypto_profile to point to the ``struct device`` that will be
resumed before any of the low-level operations are called.
If there are situations where the inline encryption hardware loses the contents
of its keyslots, e.g. device resets, the driver must handle reprogramming the
keyslots. To do this, the driver may call ``blk_crypto_reprogram_all_keys()``.
Finally, if the driver used ``blk_crypto_profile_init()`` instead of
``devm_blk_crypto_profile_init()``, then it is responsible for calling
``blk_crypto_profile_destroy()`` when the crypto profile is no longer needed.
Layered Devices
===============
Request queue based layered devices like dm-rq that wish to support IE need to
create their own keyslot manager for their request queue, and expose whatever
functionality they choose. When a layered device wants to pass a clone of that
request to another ``request_queue``, blk-crypto will initialize and prepare the
clone as necessary - see ``blk_crypto_insert_cloned_request`` in
``blk-crypto.c``.
Future Optimizations for layered devices
========================================
Creating a keyslot manager for a layered device uses up memory for each
keyslot, and in general, a layered device merely passes the request on to a
"child" device, so the keyslots in the layered device itself are completely
unused, and don't need any refcounting or keyslot programming. We can instead
define a new type of KSM; the "passthrough KSM", that layered devices can use
to advertise an unlimited number of keyslots, and support for any encryption
algorithms they choose, while not actually using any memory for each keyslot.
Another use case for the "passthrough KSM" is for IE devices that do not have a
limited number of keyslots.
Request queue based layered devices like dm-rq that wish to support inline
encryption need to create their own blk_crypto_profile for their request_queue,
and expose whatever functionality they choose. When a layered device wants to
pass a clone of that request to another request_queue, blk-crypto will
initialize and prepare the clone as necessary; see
``blk_crypto_insert_cloned_request()``.
Interaction between inline encryption and blk integrity
=======================================================
@@ -257,7 +298,220 @@ Because there isn't any real hardware yet, it seems prudent to assume that
hardware implementations might not implement both features together correctly,
and disallow the combination for now. Whenever a device supports integrity, the
kernel will pretend that the device does not support hardware inline encryption
(by essentially setting the keyslot manager in the request_queue of the device
to NULL). When the crypto API fallback is enabled, this means that all bios with
and encryption context will use the fallback, and IO will complete as usual.
When the fallback is disabled, a bio with an encryption context will be failed.
(by setting the blk_crypto_profile in the request_queue of the device to NULL).
When the crypto API fallback is enabled, this means that all bios with and
encryption context will use the fallback, and IO will complete as usual. When
the fallback is disabled, a bio with an encryption context will be failed.
.. _hardware_wrapped_keys:
Hardware-wrapped keys
=====================
Motivation and threat model
---------------------------
Linux storage encryption (dm-crypt, fscrypt, eCryptfs, etc.) traditionally
relies on the raw encryption key(s) being present in kernel memory so that the
encryption can be performed. This traditionally isn't seen as a problem because
the key(s) won't be present during an offline attack, which is the main type of
attack that storage encryption is intended to protect from.
However, there is an increasing desire to also protect users' data from other
types of attacks (to the extent possible), including:
- Cold boot attacks, where an attacker with physical access to a system suddenly
powers it off, then immediately dumps the system memory to extract recently
in-use encryption keys, then uses these keys to decrypt user data on-disk.
- Online attacks where the attacker is able to read kernel memory without fully
compromising the system, followed by an offline attack where any extracted
keys can be used to decrypt user data on-disk. An example of such an online
attack would be if the attacker is able to run some code on the system that
exploits a Meltdown-like vulnerability but is unable to escalate privileges.
- Online attacks where the attacker fully compromises the system, but their data
exfiltration is significantly time-limited and/or bandwidth-limited, so in
order to completely exfiltrate the data they need to extract the encryption
keys to use in a later offline attack.
Hardware-wrapped keys are a feature of inline encryption hardware that is
designed to protect users' data from the above attacks (to the extent possible),
without introducing limitations such as a maximum number of keys.
Note that it is impossible to **fully** protect users' data from these attacks.
Even in the attacks where the attacker "just" gets read access to kernel memory,
they can still extract any user data that is present in memory, including
plaintext pagecache pages of encrypted files. The focus here is just on
protecting the encryption keys, as those instantly give access to **all** user
data in any following offline attack, rather than just some of it (where which
data is included in that "some" might not be controlled by the attacker).
Solution overview
-----------------
Inline encryption hardware typically has "keyslots" into which software can
program keys for the hardware to use; the contents of keyslots typically can't
be read back by software. As such, the above security goals could be achieved
if the kernel simply erased its copy of the key(s) after programming them into
keyslot(s) and thereafter only referred to them via keyslot number.
However, that naive approach runs into the problem that it limits the number of
unlocked keys to the number of keyslots, which typically is a small number. In
cases where there is only one encryption key system-wide (e.g., a full-disk
encryption key), that can be tolerable. However, in general there can be many
logged-in users with many different keys, and/or many running applications with
application-specific encrypted storage areas. This is especially true if
file-based encryption (e.g. fscrypt) is being used.
Thus, it is important for the kernel to still have a way to "remind" the
hardware about a key, without actually having the raw key itself. This would
ensure that the number of hardware keyslots only limits the number of active I/O
requests, not other things such as the number of logged-in users, the number of
running apps, or the number of encrypted storage areas that apps can create.
Somewhat less importantly, it is also desirable that the raw keys are never
visible to software at all, even while being initially unlocked. This would
ensure that a read-only compromise of system memory will never allow a key to be
extracted to be used off-system, even if it occurs when a key is being unlocked.
To solve all these problems, some vendors of inline encryption hardware have
made their hardware support *hardware-wrapped keys*. Hardware-wrapped keys
are encrypted keys that can only be unwrapped (decrypted) and used by hardware
-- either by the inline encryption hardware itself, or by a dedicated hardware
block that can directly provision keys to the inline encryption hardware.
(We refer to them as "hardware-wrapped keys" rather than simply "wrapped keys"
to add some clarity in cases where there could be other types of wrapped keys,
such as in file-based encryption. Key wrapping is a commonly used technique.)
The key which wraps (encrypts) hardware-wrapped keys is a hardware-internal key
that is never exposed to software; it is either a persistent key (a "long-term
wrapping key") or a per-boot key (an "ephemeral wrapping key"). The long-term
wrapped form of the key is what is initially unlocked, but it is erased from
memory as soon as it is converted into an ephemerally-wrapped key. In-use
hardware-wrapped keys are always ephemerally-wrapped, not long-term wrapped.
As inline encryption hardware can only be used to encrypt/decrypt data on-disk,
the hardware also includes a level of indirection; it doesn't use the unwrapped
key directly for inline encryption, but rather derives both an inline encryption
key and a "software secret" from it. Software can use the "software secret" for
tasks that can't use the inline encryption hardware, such as filenames
encryption. The software secret is not protected from memory compromise.
Key hierarchy
-------------
Here is the key hierarchy for a hardware-wrapped key::
Hardware-wrapped key
|
|
<Hardware KDF>
|
-----------------------------
| |
Inline encryption key Software secret
The components are:
- *Hardware-wrapped key*: a key for the hardware's KDF (Key Derivation
Function), in ephemerally-wrapped form. The key wrapping algorithm is a
hardware implementation detail that doesn't impact kernel operation, but a
strong authenticated encryption algorithm such as AES-256-GCM is recommended.
- *Hardware KDF*: a KDF (Key Derivation Function) which the hardware uses to
derive subkeys after unwrapping the wrapped key. The hardware's choice of KDF
doesn't impact kernel operation, but it does need to be known for testing
purposes, and it's also assumed to have at least a 256-bit security strength.
All known hardware uses the SP800-108 KDF in Counter Mode with AES-256-CMAC,
with a particular choice of labels and contexts; new hardware should use this
already-vetted KDF.
- *Inline encryption key*: a derived key which the hardware directly provisions
to a keyslot of the inline encryption hardware, without exposing it to
software. In all known hardware, this will always be an AES-256-XTS key.
However, in principle other encryption algorithms could be supported too.
Hardware must derive distinct subkeys for each supported encryption algorithm.
- *Software secret*: a derived key which the hardware returns to software so
that software can use it for cryptographic tasks that can't use inline
encryption. This value is cryptographically isolated from the inline
encryption key, i.e. knowing one doesn't reveal the other. (The KDF ensures
this.) Currently, the software secret is always 32 bytes and thus is suitable
for cryptographic applications that require up to a 256-bit security strength.
Some use cases (e.g. full-disk encryption) won't require the software secret.
Example: in the case of fscrypt, the fscrypt master key (the key that protects a
particular set of encrypted directories) is made hardware-wrapped. The inline
encryption key is used as the file contents encryption key, while the software
secret (rather than the master key directly) is used to key fscrypt's KDF
(HKDF-SHA512) to derive other subkeys such as filenames encryption keys.
Note that currently this design assumes a single inline encryption key per
hardware-wrapped key, without any further key derivation. Thus, in the case of
fscrypt, currently hardware-wrapped keys are only compatible with the "inline
encryption optimized" settings, which use one file contents encryption key per
encryption policy rather than one per file. This design could be extended to
make the hardware derive per-file keys using per-file nonces passed down the
storage stack, and in fact some hardware already supports this; future work is
planned to remove this limitation by adding the corresponding kernel support.
Kernel support
--------------
The inline encryption support of the kernel's block layer ("blk-crypto") has
been extended to support hardware-wrapped keys as an alternative to standard
keys, when hardware support is available. This works in the following way:
- A ``key_types_supported`` field is added to the crypto capabilities in
``struct blk_crypto_profile``. This allows device drivers to declare that
they support standard keys, hardware-wrapped keys, or both.
- ``struct blk_crypto_key`` can now contain a hardware-wrapped key as an
alternative to a standard key; a ``key_type`` field is added to
``struct blk_crypto_config`` to distinguish between the different key types.
This allows users of blk-crypto to en/decrypt data using a hardware-wrapped
key in a way very similar to using a standard key.
- A new method ``blk_crypto_ll_ops::derive_sw_secret`` is added. Device drivers
that support hardware-wrapped keys must implement this method. Users of
blk-crypto can call ``blk_crypto_derive_sw_secret()`` to access this method.
- The programming and eviction of hardware-wrapped keys happens via
``blk_crypto_ll_ops::keyslot_program`` and
``blk_crypto_ll_ops::keyslot_evict``, just like it does for standard keys. If
a driver supports hardware-wrapped keys, then it must handle hardware-wrapped
keys being passed to these methods.
blk-crypto-fallback doesn't support hardware-wrapped keys. Therefore,
hardware-wrapped keys can only be used with actual inline encryption hardware.
Currently, the kernel only works with hardware-wrapped keys in
ephemerally-wrapped form. No generic kernel interfaces are provided for
generating or importing hardware-wrapped keys in the first place, or converting
them to ephemerally-wrapped form. In Android, SoC vendors are required to
support these operations in their KeyMint implementation (a hardware abstraction
layer in userspace); for details, see the `Android documentation
<https://source.android.com/security/encryption/hw-wrapped-keys>`_.
Testability
-----------
Both the hardware KDF and the inline encryption itself are well-defined
algorithms that don't depend on any secrets other than the unwrapped key.
Therefore, if the unwrapped key is known to software, these algorithms can be
reproduced in software in order to verify the ciphertext that is written to disk
by the inline encryption hardware.
However, the unwrapped key will only be known to software for testing if the
"import" functionality is used. Proper testing is not possible in the
"generate" case where the hardware generates the key itself. The correct
operation of the "generate" mode thus relies on the security and correctness of
the hardware RNG and its use to generate the key, as well as the testing of the
"import" mode as that should cover all parts other than the key generation.
For an example of a test that verifies the ciphertext written to disk in the
"import" mode, see the fscrypt hardware-wrapped key tests in xfstests, or
`Android's vts_kernel_encryption_test
<https://android.googlesource.com/platform/test/vts-testcase/kernel/+/refs/heads/master/encryption/>`_.

View File

@@ -4,39 +4,76 @@ The Kernel Address Sanitizer (KASAN)
Overview
--------
KernelAddressSANitizer (KASAN) is a dynamic memory safety error detector
designed to find out-of-bound and use-after-free bugs. KASAN has three modes:
Kernel Address Sanitizer (KASAN) is a dynamic memory safety error detector
designed to find out-of-bounds and use-after-free bugs.
1. generic KASAN (similar to userspace ASan),
2. software tag-based KASAN (similar to userspace HWASan),
3. hardware tag-based KASAN (based on hardware memory tagging).
KASAN has three modes:
Generic KASAN is mainly used for debugging due to a large memory overhead.
Software tag-based KASAN can be used for dogfood testing as it has a lower
memory overhead that allows using it with real workloads. Hardware tag-based
KASAN comes with low memory and performance overheads and, therefore, can be
used in production. Either as an in-field memory bug detector or as a security
mitigation.
1. Generic KASAN
2. Software Tag-Based KASAN
3. Hardware Tag-Based KASAN
Software KASAN modes (#1 and #2) use compile-time instrumentation to insert
validity checks before every memory access and, therefore, require a compiler
version that supports that.
Generic KASAN, enabled with CONFIG_KASAN_GENERIC, is the mode intended for
debugging, similar to userspace ASan. This mode is supported on many CPU
architectures, but it has significant performance and memory overheads.
Generic KASAN is supported in GCC and Clang. With GCC, it requires version
8.3.0 or later. Any supported Clang version is compatible, but detection of
out-of-bounds accesses for global variables is only supported since Clang 11.
Software Tag-Based KASAN or SW_TAGS KASAN, enabled with CONFIG_KASAN_SW_TAGS,
can be used for both debugging and dogfood testing, similar to userspace HWASan.
This mode is only supported for arm64, but its moderate memory overhead allows
using it for testing on memory-restricted devices with real workloads.
Software tag-based KASAN mode is only supported in Clang.
Hardware Tag-Based KASAN or HW_TAGS KASAN, enabled with CONFIG_KASAN_HW_TAGS,
is the mode intended to be used as an in-field memory bug detector or as a
security mitigation. This mode only works on arm64 CPUs that support MTE
(Memory Tagging Extension), but it has low memory and performance overheads and
thus can be used in production.
The hardware KASAN mode (#3) relies on hardware to perform the checks but
still requires a compiler version that supports memory tagging instructions.
This mode is supported in GCC 10+ and Clang 11+.
For details about the memory and performance impact of each KASAN mode, see the
descriptions of the corresponding Kconfig options.
Both software KASAN modes work with SLUB and SLAB memory allocators,
while the hardware tag-based KASAN currently only supports SLUB.
The Generic and the Software Tag-Based modes are commonly referred to as the
software modes. The Software Tag-Based and the Hardware Tag-Based modes are
referred to as the tag-based modes.
Currently, generic KASAN is supported for the x86_64, arm, arm64, xtensa, s390,
and riscv architectures, and tag-based KASAN modes are supported only for arm64.
Support
-------
Architectures
~~~~~~~~~~~~~
Generic KASAN is supported on x86_64, arm, arm64, powerpc, riscv, s390, and
xtensa, and the tag-based KASAN modes are supported only on arm64.
Compilers
~~~~~~~~~
Software KASAN modes use compile-time instrumentation to insert validity checks
before every memory access and thus require a compiler version that provides
support for that. The Hardware Tag-Based mode relies on hardware to perform
these checks but still requires a compiler version that supports the memory
tagging instructions.
Generic KASAN requires GCC version 8.3.0 or later
or any Clang version supported by the kernel.
Software Tag-Based KASAN requires GCC 11+
or any Clang version supported by the kernel.
Hardware Tag-Based KASAN requires GCC 10+ or Clang 12+.
Memory types
~~~~~~~~~~~~
Generic KASAN supports finding bugs in all of slab, page_alloc, vmap, vmalloc,
stack, and global memory.
Software Tag-Based KASAN supports slab, page_alloc, vmalloc, and stack memory.
Hardware Tag-Based KASAN supports slab, page_alloc, and non-executable vmalloc
memory.
For slab, both software KASAN modes support SLUB and SLAB allocators, while
Hardware Tag-Based KASAN only supports SLUB.
Usage
-----
@@ -45,18 +82,81 @@ To enable KASAN, configure the kernel with::
CONFIG_KASAN=y
and choose between ``CONFIG_KASAN_GENERIC`` (to enable generic KASAN),
``CONFIG_KASAN_SW_TAGS`` (to enable software tag-based KASAN), and
``CONFIG_KASAN_HW_TAGS`` (to enable hardware tag-based KASAN).
and choose between ``CONFIG_KASAN_GENERIC`` (to enable Generic KASAN),
``CONFIG_KASAN_SW_TAGS`` (to enable Software Tag-Based KASAN), and
``CONFIG_KASAN_HW_TAGS`` (to enable Hardware Tag-Based KASAN).
For software modes, also choose between ``CONFIG_KASAN_OUTLINE`` and
For the software modes, also choose between ``CONFIG_KASAN_OUTLINE`` and
``CONFIG_KASAN_INLINE``. Outline and inline are compiler instrumentation types.
The former produces a smaller binary while the latter is 1.1-2 times faster.
The former produces a smaller binary while the latter is up to 2 times faster.
To include alloc and free stack traces of affected slab objects into reports,
enable ``CONFIG_STACKTRACE``. To include alloc and free stack traces of affected
physical pages, enable ``CONFIG_PAGE_OWNER`` and boot with ``page_owner=on``.
Boot parameters
~~~~~~~~~~~~~~~
KASAN is affected by the generic ``panic_on_warn`` command line parameter.
When it is enabled, KASAN panics the kernel after printing a bug report.
By default, KASAN prints a bug report only for the first invalid memory access.
With ``kasan_multi_shot``, KASAN prints a report on every invalid access. This
effectively disables ``panic_on_warn`` for KASAN reports.
Alternatively, independent of ``panic_on_warn``, the ``kasan.fault=`` boot
parameter can be used to control panic and reporting behaviour:
- ``kasan.fault=report`` or ``=panic`` controls whether to only print a KASAN
report or also panic the kernel (default: ``report``). The panic happens even
if ``kasan_multi_shot`` is enabled.
Software and Hardware Tag-Based KASAN modes (see the section about various
modes below) support altering stack trace collection behavior:
- ``kasan.stacktrace=off`` or ``=on`` disables or enables alloc and free stack
traces collection (default: ``on``).
- ``kasan.stack_ring_size=<number of entries>`` specifies the number of entries
in the stack ring (default: ``32768``).
Hardware Tag-Based KASAN mode is intended for use in production as a security
mitigation. Therefore, it supports additional boot parameters that allow
disabling KASAN altogether or controlling its features:
- ``kasan=off`` or ``=on`` controls whether KASAN is enabled (default: ``on``).
- ``kasan.mode=sync``, ``=async`` or ``=asymm`` controls whether KASAN
is configured in synchronous, asynchronous or asymmetric mode of
execution (default: ``sync``).
Synchronous mode: a bad access is detected immediately when a tag
check fault occurs.
Asynchronous mode: a bad access detection is delayed. When a tag check
fault occurs, the information is stored in hardware (in the TFSR_EL1
register for arm64). The kernel periodically checks the hardware and
only reports tag faults during these checks.
Asymmetric mode: a bad access is detected synchronously on reads and
asynchronously on writes.
- ``kasan.vmalloc=off`` or ``=on`` disables or enables tagging of vmalloc
allocations (default: ``on``).
- ``kasan.page_alloc.sample=<sampling interval>`` makes KASAN tag only every
Nth page_alloc allocation with the order equal or greater than
``kasan.page_alloc.sample.order``, where N is the value of the ``sample``
parameter (default: ``1``, or tag every such allocation).
This parameter is intended to mitigate the performance overhead introduced
by KASAN.
Note that enabling this parameter makes Hardware Tag-Based KASAN skip checks
of allocations chosen by sampling and thus miss bad accesses to these
allocations. Use the default value for accurate bug detection.
- ``kasan.page_alloc.sample.order=<minimum page order>`` specifies the minimum
order of allocations that are affected by sampling (default: ``3``).
Only applies when ``kasan.page_alloc.sample`` is set to a value greater
than ``1``.
This parameter is intended to allow sampling only large page_alloc
allocations, which is the biggest source of the performance overhead.
Error reports
~~~~~~~~~~~~~
@@ -146,7 +246,7 @@ is either 8 or 16 aligned bytes depending on KASAN mode. Each number in the
memory state section of the report shows the state of one of the memory
granules that surround the accessed address.
For generic KASAN, the size of each memory granule is 8. The state of each
For Generic KASAN, the size of each memory granule is 8. The state of each
granule is encoded in one shadow byte. Those 8 bytes can be accessible,
partially accessible, freed, or be a part of a redzone. KASAN uses the following
encoding for each shadow byte: 00 means that all 8 bytes of the corresponding
@@ -171,41 +271,6 @@ traces point to places in code that interacted with the object but that are not
directly present in the bad access stack trace. Currently, this includes
call_rcu() and workqueue queuing.
Boot parameters
~~~~~~~~~~~~~~~
KASAN is affected by the generic ``panic_on_warn`` command line parameter.
When it is enabled, KASAN panics the kernel after printing a bug report.
By default, KASAN prints a bug report only for the first invalid memory access.
With ``kasan_multi_shot``, KASAN prints a report on every invalid access. This
effectively disables ``panic_on_warn`` for KASAN reports.
Alternatively, independent of ``panic_on_warn`` the ``kasan.fault=`` boot
parameter can be used to control panic and reporting behaviour:
- ``kasan.fault=report`` or ``=panic`` controls whether to only print a KASAN
report or also panic the kernel (default: ``report``). The panic happens even
if ``kasan_multi_shot`` is enabled.
Hardware tag-based KASAN mode (see the section about various modes below) is
intended for use in production as a security mitigation. Therefore, it supports
additional boot parameters that allow disabling KASAN or controlling features:
- ``kasan=off`` or ``=on`` controls whether KASAN is enabled (default: ``on``).
- ``kasan.mode=sync`` or ``=async`` controls whether KASAN is configured in
synchronous or asynchronous mode of execution (default: ``sync``).
Synchronous mode: a bad access is detected immediately when a tag
check fault occurs.
Asynchronous mode: a bad access detection is delayed. When a tag check
fault occurs, the information is stored in hardware (in the TFSR_EL1
register for arm64). The kernel periodically checks the hardware and
only reports tag faults during these checks.
- ``kasan.stacktrace=off`` or ``=on`` disables or enables alloc and free stack
traces collection (default: ``on``).
Implementation details
----------------------
@@ -244,49 +309,46 @@ outline-instrumented kernel.
Generic KASAN is the only mode that delays the reuse of freed objects via
quarantine (see mm/kasan/quarantine.c for implementation).
Software tag-based KASAN
Software Tag-Based KASAN
~~~~~~~~~~~~~~~~~~~~~~~~
Software tag-based KASAN uses a software memory tagging approach to checking
Software Tag-Based KASAN uses a software memory tagging approach to checking
access validity. It is currently only implemented for the arm64 architecture.
Software tag-based KASAN uses the Top Byte Ignore (TBI) feature of arm64 CPUs
Software Tag-Based KASAN uses the Top Byte Ignore (TBI) feature of arm64 CPUs
to store a pointer tag in the top byte of kernel pointers. It uses shadow memory
to store memory tags associated with each 16-byte memory cell (therefore, it
dedicates 1/16th of the kernel memory for shadow memory).
On each memory allocation, software tag-based KASAN generates a random tag, tags
On each memory allocation, Software Tag-Based KASAN generates a random tag, tags
the allocated memory with this tag, and embeds the same tag into the returned
pointer.
Software tag-based KASAN uses compile-time instrumentation to insert checks
Software Tag-Based KASAN uses compile-time instrumentation to insert checks
before each memory access. These checks make sure that the tag of the memory
that is being accessed is equal to the tag of the pointer that is used to access
this memory. In case of a tag mismatch, software tag-based KASAN prints a bug
this memory. In case of a tag mismatch, Software Tag-Based KASAN prints a bug
report.
Software tag-based KASAN also has two instrumentation modes (outline, which
Software Tag-Based KASAN also has two instrumentation modes (outline, which
emits callbacks to check memory accesses; and inline, which performs the shadow
memory checks inline). With outline instrumentation mode, a bug report is
printed from the function that performs the access check. With inline
instrumentation, a ``brk`` instruction is emitted by the compiler, and a
dedicated ``brk`` handler is used to print bug reports.
Software tag-based KASAN uses 0xFF as a match-all pointer tag (accesses through
Software Tag-Based KASAN uses 0xFF as a match-all pointer tag (accesses through
pointers with the 0xFF pointer tag are not checked). The value 0xFE is currently
reserved to tag freed memory regions.
Software tag-based KASAN currently only supports tagging of slab and page_alloc
memory.
Hardware tag-based KASAN
Hardware Tag-Based KASAN
~~~~~~~~~~~~~~~~~~~~~~~~
Hardware tag-based KASAN is similar to the software mode in concept but uses
Hardware Tag-Based KASAN is similar to the software mode in concept but uses
hardware memory tagging support instead of compiler instrumentation and
shadow memory.
Hardware tag-based KASAN is currently only implemented for arm64 architecture
Hardware Tag-Based KASAN is currently only implemented for arm64 architecture
and based on both arm64 Memory Tagging Extension (MTE) introduced in ARMv8.5
Instruction Set Architecture and Top Byte Ignore (TBI).
@@ -296,26 +358,25 @@ access, hardware makes sure that the tag of the memory that is being accessed is
equal to the tag of the pointer that is used to access this memory. In case of a
tag mismatch, a fault is generated, and a report is printed.
Hardware tag-based KASAN uses 0xFF as a match-all pointer tag (accesses through
Hardware Tag-Based KASAN uses 0xFF as a match-all pointer tag (accesses through
pointers with the 0xFF pointer tag are not checked). The value 0xFE is currently
reserved to tag freed memory regions.
Hardware tag-based KASAN currently only supports tagging of slab and page_alloc
memory.
If the hardware does not support MTE (pre ARMv8.5), hardware tag-based KASAN
If the hardware does not support MTE (pre ARMv8.5), Hardware Tag-Based KASAN
will not be enabled. In this case, all KASAN boot parameters are ignored.
Note that enabling CONFIG_KASAN_HW_TAGS always results in in-kernel TBI being
enabled. Even when ``kasan.mode=off`` is provided or when the hardware does not
support MTE (but supports TBI).
Hardware tag-based KASAN only reports the first found bug. After that, MTE tag
Hardware Tag-Based KASAN only reports the first found bug. After that, MTE tag
checking gets disabled.
Shadow memory
-------------
The contents of this section are only applicable to software KASAN modes.
The kernel maps memory in several different parts of the address space.
The range of kernel virtual addresses is large: there is not enough real
memory to support a real shadow region for every address that could be
@@ -346,7 +407,7 @@ CONFIG_KASAN_VMALLOC
With ``CONFIG_KASAN_VMALLOC``, KASAN can cover vmalloc space at the
cost of greater memory usage. Currently, this is supported on x86,
riscv, s390, and powerpc.
arm64, riscv, s390, and powerpc.
This works by hooking into vmalloc and vmap and dynamically
allocating real shadow memory to back the mappings.
@@ -406,19 +467,18 @@ generic ``noinstr`` one.
Note that disabling compiler instrumentation (either on a per-file or a
per-function basis) makes KASAN ignore the accesses that happen directly in
that code for software KASAN modes. It does not help when the accesses happen
indirectly (through calls to instrumented functions) or with the hardware
tag-based mode that does not use compiler instrumentation.
indirectly (through calls to instrumented functions) or with Hardware
Tag-Based KASAN, which does not use compiler instrumentation.
For software KASAN modes, to disable KASAN reports in a part of the kernel code
for the current task, annotate this part of the code with a
``kasan_disable_current()``/``kasan_enable_current()`` section. This also
disables the reports for indirect accesses that happen through function calls.
For tag-based KASAN modes (include the hardware one), to disable access
checking, use ``kasan_reset_tag()`` or ``page_kasan_tag_reset()``. Note that
temporarily disabling access checking via ``page_kasan_tag_reset()`` requires
saving and restoring the per-page KASAN tag via
``page_kasan_tag``/``page_kasan_tag_set``.
For tag-based KASAN modes, to disable access checking, use
``kasan_reset_tag()`` or ``page_kasan_tag_reset()``. Note that temporarily
disabling access checking via ``page_kasan_tag_reset()`` requires saving and
restoring the per-page KASAN tag via ``page_kasan_tag``/``page_kasan_tag_set``.
Tests
~~~~~

View File

@@ -0,0 +1,99 @@
dm_bow (backup on write)
========================
dm_bow is a device mapper driver that uses the free space on a device to back up
data that is overwritten. The changes can then be committed by a simple state
change, or rolled back by removing the dm_bow device and running a command line
utility over the underlying device.
dm_bow has three states, set by writing 1 or 2 to /sys/block/dm-?/bow/state.
It is only possible to go from state 0 (initial state) to state 1, and then from
state 1 to state 2.
State 0: dm_bow collects all trims to the device and assumes that these mark
free space on the overlying file system that can be safely used. Typically the
mount code would create the dm_bow device, mount the file system, call the
FITRIM ioctl on the file system then switch to state 1. These trims are not
propagated to the underlying device.
State 1: All writes to the device cause the underlying data to be backed up to
the free (trimmed) area as needed in such a way as they can be restored.
However, the writes, with one exception, then happen exactly as they would
without dm_bow, so the device is always in a good final state. The exception is
that sector 0 is used to keep a log of the latest changes, both to indicate that
we are in this state and to allow rollback. See below for all details. If there
isn't enough free space, writes are failed with -ENOSPC.
State 2: The transition to state 2 triggers replacing the special sector 0 with
the normal sector 0, and the freeing of all state information. dm_bow then
becomes a pass-through driver, allowing the device to continue to be used with
minimal performance impact.
Usage
=====
dm-bow takes one command line parameter, the name of the underlying device.
dm-bow will typically be used in the following way. dm-bow will be loaded with a
suitable underlying device and the resultant device will be mounted. A file
system trim will be issued via the FITRIM ioctl, then the device will be
switched to state 1. The file system will now be used as normal. At some point,
the changes can either be committed by switching to state 2, or rolled back by
unmounting the file system, removing the dm-bow device and running the command
line utility. Note that rebooting the device will be equivalent to unmounting
and removing, but the command line utility must still be run
Details of operation in state 1
===============================
dm_bow maintains a type for all sectors. A sector can be any of:
SECTOR0
SECTOR0_CURRENT
UNCHANGED
FREE
CHANGED
BACKUP
SECTOR0 is the first sector on the device, and is used to hold the log of
changes. This is the one exception.
SECTOR0_CURRENT is a sector picked from the FREE sectors, and is where reads and
writes from the true sector zero are redirected to. Note that like any backup
sector, if the sector is written to directly, it must be moved again.
UNCHANGED means that the sector has not been changed since we entered state 1.
Thus if it is written to or trimmed, the contents must first be backed up.
FREE means that the sector was trimmed in state 0 and has not yet been written
to or used for backup. On being written to, a FREE sector is changed to CHANGED.
CHANGED means that the sector has been modified, and can be further modified
without further backup.
BACKUP means that this is a free sector being used as a backup. On being written
to, the contents must first be backed up again.
All backup operations are logged to the first sector. The log sector has the
format:
--------------------------------------------------------
| Magic | Count | Sequence | Log entry | Log entry | …
--------------------------------------------------------
Magic is a magic number. Count is the number of log entries. Sequence is 0
initially. A log entry is
-----------------------------------
| Source | Dest | Size | Checksum |
-----------------------------------
When SECTOR0 is full, the log sector is backed up and another empty log sector
created with sequence number one higher. The first entry in any log entry with
sequence > 0 therefore must be the log of the backing up of the previous log
sector. Note that sequence is not strictly needed, but is a useful sanity check
and potentially limits the time spent trying to restore a corrupted snapshot.
On entering state 1, dm_bow has a list of free sectors. All other sectors are
unchanged. Sector0_current is selected from the free sectors and the contents of
sector 0 are copied there. The sector 0 is backed up, which triggers the first
log entry to be written.

View File

@@ -0,0 +1,51 @@
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
%YAML 1.2
---
$id: http://devicetree.org/schemas/misc/qemu,vcpu-stall-detector.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: VCPU stall detector
description:
This binding describes a CPU stall detector mechanism for virtual CPUs
which is accessed through MMIO.
maintainers:
- Sebastian Ene <sebastianene@google.com>
properties:
compatible:
enum:
- qemu,vcpu-stall-detector
reg:
maxItems: 1
clock-frequency:
$ref: /schemas/types.yaml#/definitions/uint32
description: |
The internal clock of the stall detector peripheral measure in Hz used
to decrement its internal counter register on each tick.
Defaults to 10 if unset.
default: 10
timeout-sec:
description: |
The stall detector expiration timeout measured in seconds.
Defaults to 8 if unset. Please note that it also takes into account the
time spent while the VCPU is not running.
default: 8
required:
- compatible
additionalProperties: false
examples:
- |
vmwdt@9030000 {
compatible = "qemu,vcpu-stall-detector";
reg = <0x9030000 0x10000>;
clock-frequency = <10>;
timeout-sec = <8>;
};

View File

@@ -119,6 +119,18 @@ properties:
If present, HS400 command responses are sampled on rising edges.
If not present, HS400 command responses are sampled on falling edges.
mediatek,hs400-ds-dly3:
$ref: /schemas/types.yaml#/definitions/uint32
description:
Gear of the third delay line for DS for input data latch in data
pad macro, there are 32 stages from 0 to 31.
For different corner IC, the time is different about one step, it is
about 100ps.
The value is confirmed by doing scan and calibration to find a best
value with corner IC and it is valid only for HS400 mode.
minimum: 0
maximum: 31
mediatek,latch-ck:
$ref: /schemas/types.yaml#/definitions/uint32
description:

View File

@@ -12,12 +12,14 @@ maintainers:
properties:
compatible:
const: arm,cmn-600
enum:
- arm,cmn-600
- arm,ci-700
reg:
items:
- description: Physical address of the base (PERIPHBASE) and
size (up to 64MB) of the configuration address space.
size of the configuration address space.
interrupts:
minItems: 1
@@ -31,14 +33,23 @@ properties:
arm,root-node:
$ref: /schemas/types.yaml#/definitions/uint32
description: Offset from PERIPHBASE of the configuration
discovery node (see TRM definition of ROOTNODEBASE).
description: Offset from PERIPHBASE of CMN-600's configuration
discovery node (see TRM definition of ROOTNODEBASE). Not
relevant for newer CMN/CI products.
required:
- compatible
- reg
- interrupts
- arm,root-node
if:
properties:
compatible:
contains:
const: arm,cmn-600
then:
required:
- arm,root-node
additionalProperties: false

View File

@@ -0,0 +1,46 @@
# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
%YAML 1.2
---
$id: http://devicetree.org/schemas/reserved-memory/google,open-dice.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Open Profile for DICE Device Tree Bindings
description: |
This binding represents a reserved memory region containing data
generated by the Open Profile for DICE protocol.
See https://pigweed.googlesource.com/open-dice/
maintainers:
- David Brazdil <dbrazdil@google.com>
allOf:
- $ref: "reserved-memory.yaml"
properties:
compatible:
const: google,open-dice
reg:
description: page-aligned region of memory containing DICE data
required:
- compatible
- reg
- no-map
unevaluatedProperties: false
examples:
- |
reserved-memory {
#address-cells = <2>;
#size-cells = <1>;
dice: dice@12340000 {
compatible = "google,open-dice";
reg = <0x00 0x12340000 0x2000>;
no-map;
};
};

View File

@@ -0,0 +1 @@
per-file f2fs**=file:/fs/f2fs/OWNERS

View File

@@ -19,9 +19,10 @@ It is designed as a better filesystem solution for the following scenarios:
immutable and bit-for-bit identical to the official golden image for
their releases due to security and other considerations and
- hope to save some extra storage space with guaranteed end-to-end performance
by using reduced metadata and transparent file compression, especially
for those embedded devices with limited memory (ex, smartphone);
- hope to minimize extra storage space with guaranteed end-to-end performance
by using compact layout, transparent file compression and direct access,
especially for those embedded devices with limited memory and high-density
hosts with numerous containers;
Here is the main features of EROFS:
@@ -51,7 +52,9 @@ Here is the main features of EROFS:
- Support POSIX.1e ACLs by using xattrs;
- Support transparent data compression as an option:
LZ4 algorithm with the fixed-sized output compression for high performance.
LZ4 algorithm with the fixed-sized output compression for high performance;
- Multiple device support for multi-layer container images.
The following git tree provides the file system user-space tools under
development (ex, formatting tool mkfs.erofs):
@@ -87,8 +90,17 @@ cache_strategy=%s Select a strategy for cached decompression from now on:
dax={always,never} Use direct access (no page cache). See
Documentation/filesystems/dax.rst.
dax A legacy option which is an alias for ``dax=always``.
device=%s Specify a path to an extra device to be used together.
=================== =========================================================
Sysfs Entries
=============
Information about mounted erofs file systems can be found in /sys/fs/erofs.
Each mounted filesystem will have a directory in /sys/fs/erofs based on its
device name (i.e., /sys/fs/erofs/sda).
(see also Documentation/ABI/testing/sysfs-fs-erofs)
On-disk details
===============

View File

@@ -25,10 +25,14 @@ a consistency checking tool (fsck.f2fs), and a debugging tool (dump.f2fs).
- git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs-tools.git
For reporting bugs and sending patches, please use the following mailing list:
For sending patches, please use the following mailing list:
- linux-f2fs-devel@lists.sourceforge.net
For reporting bugs, please use the following f2fs bug tracker link:
- https://bugzilla.kernel.org/enter_bug.cgi?product=File%20System&component=f2fs
Background and Design issues
============================
@@ -154,6 +158,8 @@ nobarrier This option can be used if underlying storage guarantees
If this option is set, no cache_flush commands are issued
but f2fs still guarantees the write ordering of all the
data writes.
barrier If this option is set, cache_flush commands are allowed to be
issued.
fastboot This option is used when a system wants to reduce mount
time as much as possible, even though normal performance
can be sacrificed.
@@ -198,10 +204,30 @@ fault_type=%d Support configuring fault injection type, should be
FAULT_WRITE_IO 0x000004000
FAULT_SLAB_ALLOC 0x000008000
FAULT_DQUOT_INIT 0x000010000
FAULT_LOCK_OP 0x000020000
FAULT_BLKADDR 0x000040000
=================== ===========
mode=%s Control block allocation mode which supports "adaptive"
and "lfs". In "lfs" mode, there should be no random
writes towards main area.
"fragment:segment" and "fragment:block" are newly added here.
These are developer options for experiments to simulate filesystem
fragmentation/after-GC situation itself. The developers use these
modes to understand filesystem fragmentation/after-GC condition well,
and eventually get some insights to handle them better.
In "fragment:segment", f2fs allocates a new segment in ramdom
position. With this, we can simulate the after-GC condition.
In "fragment:block", we can scatter block allocation with
"max_fragment_chunk" and "max_fragment_hole" sysfs nodes.
We added some randomness to both chunk and hole size to make
it close to realistic IO pattern. So, in this mode, f2fs will allocate
1..<max_fragment_chunk> blocks in a chunk and make a hole in the
length of 1..<max_fragment_hole> by turns. With this, the newly
allocated blocks will be scattered throughout the whole partition.
Note that "fragment:block" implicitly enables "fragment:segment"
option for more randomness.
Please, use these options for your experiments and we strongly
recommend to re-format the filesystem after using these options.
io_bits=%u Set the bit size of write IO requests. It should be set
with "mode=lfs".
usrquota Enable plain user disk quota accounting.
@@ -216,12 +242,6 @@ offgrpjquota Turn off group journalled quota.
offprjjquota Turn off project journalled quota.
quota Enable plain user disk quota accounting.
noquota Disable all plain disk quota option.
whint_mode=%s Control which write hints are passed down to block
layer. This supports "off", "user-based", and
"fs-based". In "off" mode (default), f2fs does not pass
down hints. In "user-based" mode, f2fs tries to pass
down hints given by users. And in "fs-based" mode, f2fs
passes down hints with its policy.
alloc_mode=%s Adjust block allocation policy, which supports "reuse"
and "default".
fsync_mode=%s Control the policy of fsync. Currently supports "posix",
@@ -323,6 +343,15 @@ discard_unit=%s Control discard unit, the argument can be "block", "segment"
default, it is helpful for large sized SMR or ZNS devices to
reduce memory cost by getting rid of fs metadata supports small
discard.
memory=%s Control memory mode. This supports "normal" and "low" modes.
"low" mode is introduced to support low memory devices.
Because of the nature of low memory devices, in this mode, f2fs
will try to save memory sometimes by sacrificing performance.
"normal" mode is the default mode and same as before.
age_extent_cache Enable an age extent cache based on rb-tree. It records
data block update frequency of the extent per inode, in
order to provide better temperature hints for data block
allocation.
======================== ============================================================
Debugfs Entries
@@ -732,70 +761,6 @@ In order to identify whether the data in the victim segment are valid or not,
F2FS manages a bitmap. Each bit represents the validity of a block, and the
bitmap is composed of a bit stream covering whole blocks in main area.
Write-hint Policy
-----------------
1) whint_mode=off. F2FS only passes down WRITE_LIFE_NOT_SET.
2) whint_mode=user-based. F2FS tries to pass down hints given by
users.
===================== ======================== ===================
User F2FS Block
===================== ======================== ===================
N/A META WRITE_LIFE_NOT_SET
N/A HOT_NODE "
N/A WARM_NODE "
N/A COLD_NODE "
ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME
extension list " "
-- buffered io
WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME
WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT
WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET
WRITE_LIFE_NONE " "
WRITE_LIFE_MEDIUM " "
WRITE_LIFE_LONG " "
-- direct io
WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME
WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT
WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET
WRITE_LIFE_NONE " WRITE_LIFE_NONE
WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM
WRITE_LIFE_LONG " WRITE_LIFE_LONG
===================== ======================== ===================
3) whint_mode=fs-based. F2FS passes down hints with its policy.
===================== ======================== ===================
User F2FS Block
===================== ======================== ===================
N/A META WRITE_LIFE_MEDIUM;
N/A HOT_NODE WRITE_LIFE_NOT_SET
N/A WARM_NODE "
N/A COLD_NODE WRITE_LIFE_NONE
ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME
extension list " "
-- buffered io
WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME
WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT
WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_LONG
WRITE_LIFE_NONE " "
WRITE_LIFE_MEDIUM " "
WRITE_LIFE_LONG " "
-- direct io
WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME
WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT
WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET
WRITE_LIFE_NONE " WRITE_LIFE_NONE
WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM
WRITE_LIFE_LONG " WRITE_LIFE_LONG
===================== ======================== ===================
Fallocate(2) Policy
-------------------

View File

@@ -77,11 +77,11 @@ Side-channel attacks
fscrypt is only resistant to side-channel attacks, such as timing or
electromagnetic attacks, to the extent that the underlying Linux
Cryptographic API algorithms are. If a vulnerable algorithm is used,
such as a table-based implementation of AES, it may be possible for an
attacker to mount a side channel attack against the online system.
Side channel attacks may also be mounted against applications
consuming decrypted data.
Cryptographic API algorithms or inline encryption hardware are. If a
vulnerable algorithm is used, such as a table-based implementation of
AES, it may be possible for an attacker to mount a side channel attack
against the online system. Side channel attacks may also be mounted
against applications consuming decrypted data.
Unauthorized file access
~~~~~~~~~~~~~~~~~~~~~~~~
@@ -337,6 +337,8 @@ Currently, the following pairs of encryption modes are supported:
- AES-256-XTS for contents and AES-256-CTS-CBC for filenames
- AES-128-CBC for contents and AES-128-CTS-CBC for filenames
- Adiantum for both contents and filenames
- AES-256-XTS for contents and AES-256-HCTR2 for filenames (v2 policies only)
- SM4-XTS for contents and SM4-CTS-CBC for filenames (v2 policies only)
If unsure, you should use the (AES-256-XTS, AES-256-CTS-CBC) pair.
@@ -357,6 +359,23 @@ To use Adiantum, CONFIG_CRYPTO_ADIANTUM must be enabled. Also, fast
implementations of ChaCha and NHPoly1305 should be enabled, e.g.
CONFIG_CRYPTO_CHACHA20_NEON and CONFIG_CRYPTO_NHPOLY1305_NEON for ARM.
AES-256-HCTR2 is another true wide-block encryption mode that is intended for
use on CPUs with dedicated crypto instructions. AES-256-HCTR2 has the property
that a bitflip in the plaintext changes the entire ciphertext. This property
makes it desirable for filename encryption since initialization vectors are
reused within a directory. For more details on AES-256-HCTR2, see the paper
"Length-preserving encryption with HCTR2"
(https://eprint.iacr.org/2021/1441.pdf). To use AES-256-HCTR2,
CONFIG_CRYPTO_HCTR2 must be enabled. Also, fast implementations of XCTR and
POLYVAL should be enabled, e.g. CRYPTO_POLYVAL_ARM64_CE and
CRYPTO_AES_ARM64_CE_BLK for ARM64.
SM4 is a Chinese block cipher that is an alternative to AES. It has
not seen as much security review as AES, and it only has a 128-bit key
size. It may be useful in cases where its use is mandated.
Otherwise, it should not be used. For SM4 support to be available, it
also needs to be enabled in the kernel crypto API.
New encryption modes can be added relatively easily, without changes
to individual filesystems. However, authenticated encryption (AE)
modes are not currently supported because of the difficulty of dealing
@@ -404,11 +423,11 @@ alternatively has the file's nonce (for `DIRECT_KEY policies`_) or
inode number (for `IV_INO_LBLK_64 policies`_) included in the IVs.
Thus, IV reuse is limited to within a single directory.
With CTS-CBC, the IV reuse means that when the plaintext filenames
share a common prefix at least as long as the cipher block size (16
bytes for AES), the corresponding encrypted filenames will also share
a common prefix. This is undesirable. Adiantum does not have this
weakness, as it is a wide-block encryption mode.
With CTS-CBC, the IV reuse means that when the plaintext filenames share a
common prefix at least as long as the cipher block size (16 bytes for AES), the
corresponding encrypted filenames will also share a common prefix. This is
undesirable. Adiantum and HCTR2 do not have this weakness, as they are
wide-block encryption modes.
All supported filenames encryption modes accept any plaintext length
>= 16 bytes; cipher block alignment is not required. However,
@@ -1047,8 +1066,8 @@ astute users may notice some differences in behavior:
may be used to overwrite the source files but isn't guaranteed to be
effective on all filesystems and storage devices.
- Direct I/O is not supported on encrypted files. Attempts to use
direct I/O on such files will fall back to buffered I/O.
- Direct I/O is supported on encrypted files only under some
circumstances. For details, see `Direct I/O support`_.
- The fallocate operations FALLOC_FL_COLLAPSE_RANGE and
FALLOC_FL_INSERT_RANGE are not supported on encrypted files and will
@@ -1135,6 +1154,71 @@ where applications may later write sensitive data. It is recommended
that systems implementing a form of "verified boot" take advantage of
this by validating all top-level encryption policies prior to access.
Inline encryption support
=========================
By default, fscrypt uses the kernel crypto API for all cryptographic
operations (other than HKDF, which fscrypt partially implements
itself). The kernel crypto API supports hardware crypto accelerators,
but only ones that work in the traditional way where all inputs and
outputs (e.g. plaintexts and ciphertexts) are in memory. fscrypt can
take advantage of such hardware, but the traditional acceleration
model isn't particularly efficient and fscrypt hasn't been optimized
for it.
Instead, many newer systems (especially mobile SoCs) have *inline
encryption hardware* that can encrypt/decrypt data while it is on its
way to/from the storage device. Linux supports inline encryption
through a set of extensions to the block layer called *blk-crypto*.
blk-crypto allows filesystems to attach encryption contexts to bios
(I/O requests) to specify how the data will be encrypted or decrypted
in-line. For more information about blk-crypto, see
:ref:`Documentation/block/inline-encryption.rst <inline_encryption>`.
On supported filesystems (currently ext4 and f2fs), fscrypt can use
blk-crypto instead of the kernel crypto API to encrypt/decrypt file
contents. To enable this, set CONFIG_FS_ENCRYPTION_INLINE_CRYPT=y in
the kernel configuration, and specify the "inlinecrypt" mount option
when mounting the filesystem.
Note that the "inlinecrypt" mount option just specifies to use inline
encryption when possible; it doesn't force its use. fscrypt will
still fall back to using the kernel crypto API on files where the
inline encryption hardware doesn't have the needed crypto capabilities
(e.g. support for the needed encryption algorithm and data unit size)
and where blk-crypto-fallback is unusable. (For blk-crypto-fallback
to be usable, it must be enabled in the kernel configuration with
CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK=y.)
Currently fscrypt always uses the filesystem block size (which is
usually 4096 bytes) as the data unit size. Therefore, it can only use
inline encryption hardware that supports that data unit size.
Inline encryption doesn't affect the ciphertext or other aspects of
the on-disk format, so users may freely switch back and forth between
using "inlinecrypt" and not using "inlinecrypt".
Direct I/O support
==================
For direct I/O on an encrypted file to work, the following conditions
must be met (in addition to the conditions for direct I/O on an
unencrypted file):
* The file must be using inline encryption. Usually this means that
the filesystem must be mounted with ``-o inlinecrypt`` and inline
encryption hardware must be present. However, a software fallback
is also available. For details, see `Inline encryption support`_.
* The I/O request must be fully aligned to the filesystem block size.
This means that the file position the I/O is targeting, the lengths
of all I/O segments, and the memory addresses of all I/O buffers
must be multiples of this value. Note that the filesystem block
size may be greater than the logical block size of the block device.
If either of the above conditions is not met, then direct I/O on the
encrypted file will fall back to buffered I/O.
Implementation details
======================
@@ -1184,6 +1268,13 @@ keys`_ and `DIRECT_KEY policies`_.
Data path changes
-----------------
When inline encryption is used, filesystems just need to associate
encryption contexts with bios to specify how the block layer or the
inline encryption hardware will encrypt/decrypt the file contents.
When inline encryption isn't used, filesystems must encrypt/decrypt
the file contents themselves, as described below:
For the read path (->readpage()) of regular files, filesystems can
read the ciphertext into the page cache and decrypt it in-place. The
page lock must be held until decryption has finished, to prevent the
@@ -1197,18 +1288,6 @@ buffer. Some filesystems, such as UBIFS, already use temporary
buffers regardless of encryption. Other filesystems, such as ext4 and
F2FS, have to allocate bounce pages specially for encryption.
Fscrypt is also able to use inline encryption hardware instead of the
kernel crypto API for en/decryption of file contents. When possible,
and if directed to do so (by specifying the 'inlinecrypt' mount option
for an ext4/F2FS filesystem), it adds encryption contexts to bios and
uses blk-crypto to perform the en/decryption instead of making use of
the above read/write path changes. Of course, even if directed to
make use of inline encryption, fscrypt will only be able to do so if
either hardware inline encryption support is available for the
selected encryption algorithm or CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK
is selected. If neither is the case, fscrypt will fall back to using
the above mentioned read/write path changes for en/decryption.
Filename hashing and encoding
-----------------------------

View File

@@ -0,0 +1,85 @@
.. SPDX-License-Identifier: GPL-2.0
=================================================
incfs: A stacked incremental filesystem for Linux
=================================================
/sys/fs interface
=================
Please update Documentation/ABI/testing/sysfs-fs-incfs if you update this
section.
incfs creates the following files in /sys/fs.
Features
--------
/sys/fs/incremental-fs/features/corefs
Reads 'supported'. Always present.
/sys/fs/incremental-fs/features/v2
Reads 'supported'. Present if all v2 features of incfs are supported. These
are:
fs-verity support
inotify support
ioclts:
INCFS_IOC_SET_READ_TIMEOUTS
INCFS_IOC_GET_READ_TIMEOUTS
INCFS_IOC_GET_BLOCK_COUNT
INCFS_IOC_CREATE_MAPPED_FILE
.incomplete folder
.blocks_written pseudo file
report_uid mount option
/sys/fs/incremental-fs/features/zstd
Reads 'supported'. Present if zstd compression is supported for data blocks.
/sys/fs/incremental-fs/features/bugfix_throttling
Reads 'supported'. Present if the throttling lock bug is fixed
Optional per mount
------------------
For each incfs mount, the mount option sysfs_name=[name] creates a /sys/fs
node called:
/sys/fs/incremental-fs/instances/[name]
This will contain the following files:
/sys/fs/incremental-fs/instances/[name]/reads_delayed_min
Returns a count of the number of reads that were delayed as a result of the
per UID read timeouts min time setting.
/sys/fs/incremental-fs/instances/[name]/reads_delayed_min_us
Returns total delay time for all files since first mount as a result of the
per UID read timeouts min time setting.
/sys/fs/incremental-fs/instances/[name]/reads_delayed_pending
Returns a count of the number of reads that were delayed as a result of
waiting for a pending read.
/sys/fs/incremental-fs/instances/[name]/reads_delayed_pending_us
Returns total delay time for all files since first mount as a result of
waiting for a pending read.
/sys/fs/incremental-fs/instances/[name]/reads_failed_hash_verification
Returns number of reads that failed because of hash verification failures.
/sys/fs/incremental-fs/instances/[name]/reads_failed_other
Returns number of reads that failed for reasons other than timing out or
hash failures.
/sys/fs/incremental-fs/instances/[name]/reads_failed_timed_out
Returns number of reads that timed out.
For reads_delayed_*** settings, note that a file can count for both
reads_delayed_min and reads_delayed_pending if incfs first waits for a pending
read then has to wait further for the min time. In that case, the time spent
waiting is split between reads_delayed_pending_us, which is increased by the
time spent waiting for the pending read, and reads_delayed_min_us, which is
increased by the remainder of the time spent waiting.
Reads that timed out are not added to the reads_delayed_pending or the
reads_delayed_pending_us counters.

View File

@@ -195,7 +195,7 @@ handle it in two different ways:
1. return EXDEV error: this error is returned by rename(2) when trying to
move a file or directory across filesystem boundaries. Hence
applications are usually prepared to hande this error (mv(1) for example
applications are usually prepared to handle this error (mv(1) for example
recursively copies the directory tree). This is the default behavior.
2. If the "redirect_dir" feature is enabled, then the directory will be
@@ -324,6 +324,30 @@ and
The resulting access permissions should be the same. The difference is in
the time of copy (on-demand vs. up-front).
### Non overlapping credentials
As noted above, all access to the upper, lower and work directories is the
recorded mounter's MAC and DAC credentials. The incoming accesses are
checked against the caller's credentials.
In the case where caller MAC or DAC credentials do not overlap the mounter, a
use case available in older versions of the driver, the override_creds mount
flag can be turned off. For when the use pattern has caller with legitimate
credentials where the mounter does not. For example init may have been the
mounter, but the caller would have execute or read MAC permissions where
init would not. override_creds off means all access, incoming, upper, lower
or working, will be tested against the caller.
Several unintended side effects will occur though. The caller without certain
key capabilities or lower privilege will not always be able to delete files or
directories, create nodes, or search some restricted directories. The ability
to search and read a directory entry is spotty as a result of the cache
mechanism not re-testing the credentials because of the assumption, a
privileged caller can fill cache, then a lower privilege can read the directory
cache. The uneven security model where cache, upperdir and workdir are opened
at privilege, but accessed without creating a form of privilege escalation,
should only be used with strict understanding of the side effects and of the
security policies.
Multiple lower layers
---------------------

View File

@@ -426,12 +426,14 @@ with the memory region, as the case would be with BSS (uninitialized data).
The "pathname" shows the name associated file for this mapping. If the mapping
is not associated with a file:
======= ====================================
============= ====================================
[heap] the heap of the program
[stack] the stack of the main process
[vdso] the "virtual dynamic shared object",
the kernel system call handler
======= ====================================
[anon:<name>] an anonymous mapping that has been
named by userspace
============= ====================================
or if empty, the mapping is anonymous.
@@ -971,6 +973,7 @@ You may not have all of these fields.
SReclaimable: 159856 kB
SUnreclaim: 124508 kB
PageTables: 24448 kB
SecPageTables: 0 kB
NFS_Unstable: 0 kB
Bounce: 0 kB
WritebackTmp: 0 kB
@@ -1065,6 +1068,9 @@ SUnreclaim
PageTables
amount of memory dedicated to the lowest level of page
tables.
SecPageTables
Memory consumed by secondary page tables, this currently
currently includes KVM mmu allocations on x86 and arm64.
NFS_Unstable
Always zero. Previous counted pages which had been written to
the server, but has not been committed to stable storage.

View File

@@ -77,6 +77,17 @@ HOSTLDLIBS
----------
Additional libraries to link against when building host programs.
.. _userkbuildflags:
USERCFLAGS
----------
Additional options used for $(CC) when compiling userprogs.
USERLDFLAGS
-----------
Additional options used for $(LD) when linking userprogs. userprogs are linked
with CC, so $(USERLDFLAGS) should include "-Wl," prefix as applicable.
KBUILD_KCONFIG
--------------
Set the top-level Kconfig file to the value of this environment

View File

@@ -982,6 +982,8 @@ The syntax is quite similar. The difference is to use "userprogs" instead of
When linking bpfilter_umh, it will be passed the extra option -static.
From command line, :ref:`USERCFLAGS and USERLDFLAGS <userkbuildflags>` will also be used.
5.4 When userspace programs are actually built
----------------------------------------------

View File

@@ -21,6 +21,7 @@ This document describes how to build an out-of-tree kernel module.
--- 4.1 Kernel Includes
--- 4.2 Single Subdirectory
--- 4.3 Several Subdirectories
--- 4.4 UAPI Headers Installation
=== 5. Module Installation
--- 5.1 INSTALL_MOD_PATH
--- 5.2 INSTALL_MOD_DIR
@@ -131,6 +132,10 @@ executed to make module versioning work.
/lib/modules/<kernel_release>/extra/, but a prefix may
be added with INSTALL_MOD_PATH (discussed in section 5).
headers_install
Export headers in a format suitable for userspace. The default
location is $PWD/usr. INSTALL_HDR_PATH can change this path.
clean
Remove all generated files in the module directory only.
@@ -406,6 +411,17 @@ according to the following rule:
pointing to the directory where the currently executing kbuild
file is located.
4.4 UAPI Headers Installation
-----------------------------
External modules may export headers to userspace in a similar
fashion to the in-tree counterpart drivers. kbuild supports
running headers_install target in an out-of-tree. The location
where kbuild searches for headers is $(M)/include/uapi and
$(M)/arch/$(SRCARCH)/include/uapi.
See also Documentation/kbuild/headers_install.rst.
5. Module Installation
======================

View File

@@ -1133,6 +1133,19 @@ ip_local_reserved_ports - list of comma separated ranges
Default: Empty
ip_local_unbindable_ports - list of comma separated ranges
Specify the ports which are not directly bind()able.
Usually you would use this to block the use of ports which
are invalid due to something outside of the control of the
kernel. For example a port stolen by the nic for serial
console, remote power management or debugging.
There's a relatively high chance you will also want to list
these ports in 'ip_local_reserved_ports' to prevent autobinding.
Default: Empty
ip_unprivileged_port_start - INTEGER
This is a per-namespace sysctl. It defines the first
unprivileged port in the network namespace. Privileged ports

View File

@@ -402,7 +402,7 @@ Consequently, the only sane governor to use together with EAS is schedutil,
because it is the only one providing some degree of consistency between
frequency requests and energy predictions.
Using EAS with any other governor than schedutil is not supported.
Using EAS with any other governor than schedutil is not recommended.
6.5 Scale-invariant utilization signals

View File

@@ -93,16 +93,19 @@ function
1. invokes optional hostt->eh_timed_out() callback. Return value can
be one of
- BLK_EH_RESET_TIMER
- SCSI_EH_RESET_TIMER
This indicates that more time is required to finish the
command. Timer is restarted. This action is counted as a
retry and only allowed scmd->allowed + 1(!) times. Once the
limit is reached, action for BLK_EH_DONE is taken instead.
- BLK_EH_DONE
- SCSI_EH_NOT_HANDLED
eh_timed_out() callback did not handle the command.
Step #2 is taken.
- SCSI_EH_DONE
eh_timed_out() completed the command.
2. scsi_abort_command() is invoked to schedule an asynchrous abort.
Asynchronous abort are not invoked for commands which the
SCSI_EH_ABORT_SCHEDULED flag is set (this indicates that the command

View File

@@ -17,6 +17,8 @@ Universal Flash Storage
3.2 UTP Transfer requests
3.3 UFS error handling
3.4 SCSI Error handling
4. BSG Support
5. UFS Reference Clock Frequency configuration
1. Overview
@@ -193,3 +195,16 @@ UFS Specifications can be found at:
- UFS - http://www.jedec.org/sites/default/files/docs/JESD220.pdf
- UFSHCI - http://www.jedec.org/sites/default/files/docs/JESD223.pdf
5. UFS Reference Clock Frequency configuration
==============================================
Devicetree can define a clock named "ref_clk" under the UFS controller node
to specify the intended reference clock frequency for the UFS storage
parts. ACPI-based system can specify the frequency using ACPI
Device-Specific Data property named "ref-clk-freq". In both ways the value
is interpreted as frequency in Hz and must match one of the values given in
the UFS specification. UFS subsystem will attempt to read the value when
executing common controller initialization. If the value is available, UFS
subsytem will ensure the bRefClkFreq attribute of the UFS storage device is
set accordingly and will modify it if there is a mismatch.

View File

@@ -100,6 +100,15 @@ amidi_map
MIDI device number maps assigned to the 2st OSS device;
Default: 1
Module snd-soc-core
-------------------
The soc core module. It is used by all ALSA card drivers.
It takes the following options which have global effects.
prealloc_buffer_size_kbytes
Specify prealloc buffer size in kbytes (default: 512).
Common parameters for top sound card modules
--------------------------------------------

View File

@@ -1763,6 +1763,21 @@ using the same key and variable from yet another event::
# echo 'hist:key=pid:wakeupswitch_lat=$wakeup_lat+$switchtime_lat ...' >> event3/trigger
Expressions support the use of addition, subtraction, multiplication and
division operators (+-\*/).
Note if division by zero cannot be detected at parse time (i.e. the
divisor is not a constant), the result will be -1.
Numeric constants can also be used directly in an expression::
# echo 'hist:keys=next_pid:timestamp_secs=common_timestamp/1000000 ...' >> event/trigger
or assigned to a variable and referenced in a subsequent expression::
# echo 'hist:keys=next_pid:us_per_sec=1000000 ...' >> event/trigger
# echo 'hist:keys=next_pid:timestamp_secs=common_timestamp/$us_per_sec ...' >> event/trigger
2.2.2 Synthetic Events
----------------------

View File

@@ -784,6 +784,7 @@ The uvc function provides these attributes in its function directory:
streaming_maxpacket maximum packet size this endpoint is capable of
sending or receiving when this configuration is
selected
function_name name of the interface
=================== ================================================
There are also "control" and "streaming" subdirectories, each of which contain

View File

@@ -242,8 +242,17 @@ Control IDs
* - ``V4L2_COLORFX_SET_CBCR``
- The Cb and Cr chroma components are replaced by fixed coefficients
determined by ``V4L2_CID_COLORFX_CBCR`` control.
* - ``V4L2_COLORFX_SET_RGB``
- The RGB components are replaced by the fixed RGB components determined
by ``V4L2_CID_COLORFX_RGB`` control.
``V4L2_CID_COLORFX_RGB`` ``(integer)``
Determines the Red, Green, and Blue coefficients for
``V4L2_COLORFX_SET_RGB`` color effect.
Bits [7:0] of the supplied 32 bit value are interpreted as Blue component,
bits [15:8] as Green component, bits [23:16] as Red component, and
bits [31:24] must be zero.
``V4L2_CID_COLORFX_CBCR`` ``(integer)``
Determines the Cb and Cr coefficients for ``V4L2_COLORFX_SET_CBCR``

View File

@@ -414,7 +414,7 @@ kvm_run' (see below).
-----------------
:Capability: basic
:Architectures: all except ARM, arm64
:Architectures: all except arm64
:Type: vcpu ioctl
:Parameters: struct kvm_regs (out)
:Returns: 0 on success, -1 on error
@@ -447,7 +447,7 @@ Reads the general purpose registers from the vcpu.
-----------------
:Capability: basic
:Architectures: all except ARM, arm64
:Architectures: all except arm64
:Type: vcpu ioctl
:Parameters: struct kvm_regs (in)
:Returns: 0 on success, -1 on error
@@ -804,7 +804,7 @@ Writes the floating point state to the vcpu.
-----------------------
:Capability: KVM_CAP_IRQCHIP, KVM_CAP_S390_IRQCHIP (s390)
:Architectures: x86, ARM, arm64, s390
:Architectures: x86, arm64, s390
:Type: vm ioctl
:Parameters: none
:Returns: 0 on success, -1 on error
@@ -813,7 +813,7 @@ Creates an interrupt controller model in the kernel.
On x86, creates a virtual ioapic, a virtual PIC (two PICs, nested), and sets up
future vcpus to have a local APIC. IRQ routing for GSIs 0-15 is set to both
PIC and IOAPIC; GSI 16-23 only go to the IOAPIC.
On ARM/arm64, a GICv2 is created. Any other GIC versions require the usage of
On arm64, a GICv2 is created. Any other GIC versions require the usage of
KVM_CREATE_DEVICE, which also supports creating a GICv2. Using
KVM_CREATE_DEVICE is preferred over KVM_CREATE_IRQCHIP for GICv2.
On s390, a dummy irq routing table is created.
@@ -826,7 +826,7 @@ before KVM_CREATE_IRQCHIP can be used.
-----------------
:Capability: KVM_CAP_IRQCHIP
:Architectures: x86, arm, arm64
:Architectures: x86, arm64
:Type: vm ioctl
:Parameters: struct kvm_irq_level
:Returns: 0 on success, -1 on error
@@ -850,7 +850,7 @@ capability is present (or unless it is not using the in-kernel irqchip,
of course).
ARM/arm64 can signal an interrupt either at the CPU level, or at the
arm64 can signal an interrupt either at the CPU level, or at the
in-kernel irqchip (GIC), and for in-kernel irqchip can tell the GIC to
use PPIs designated for specific cpus. The irq field is interpreted
like this::
@@ -876,7 +876,7 @@ When KVM_CAP_ARM_IRQ_LINE_LAYOUT_2 is supported, the target vcpu is
identified as (256 * vcpu2_index + vcpu_index). Otherwise, vcpu2_index
must be zero.
Note that on arm/arm64, the KVM_CAP_IRQCHIP capability only conditions
Note that on arm64, the KVM_CAP_IRQCHIP capability only conditions
injection of interrupts for the in-kernel irqchip. KVM_IRQ_LINE can always
be used for a userspace interrupt controller.
@@ -1037,7 +1037,7 @@ such as migration.
:Capability: KVM_CAP_VCPU_EVENTS
:Extended by: KVM_CAP_INTR_SHADOW
:Architectures: x86, arm, arm64
:Architectures: x86, arm64
:Type: vcpu ioctl
:Parameters: struct kvm_vcpu_event (out)
:Returns: 0 on success, -1 on error
@@ -1096,8 +1096,8 @@ The following bits are defined in the flags field:
fields contain a valid state. This bit will be set whenever
KVM_CAP_EXCEPTION_PAYLOAD is enabled.
ARM/ARM64:
^^^^^^^^^^
ARM64:
^^^^^^
If the guest accesses a device that is being emulated by the host kernel in
such a way that a real device would generate a physical SError, KVM may make
@@ -1156,7 +1156,7 @@ directly to the virtual CPU).
:Capability: KVM_CAP_VCPU_EVENTS
:Extended by: KVM_CAP_INTR_SHADOW
:Architectures: x86, arm, arm64
:Architectures: x86, arm64
:Type: vcpu ioctl
:Parameters: struct kvm_vcpu_event (in)
:Returns: 0 on success, -1 on error
@@ -1191,8 +1191,8 @@ can be set in the flags field to signal that the
exception_has_payload, exception_payload, and exception.pending fields
contain a valid state and shall be written into the VCPU.
ARM/ARM64:
^^^^^^^^^^
ARM64:
^^^^^^
User space may need to inject several types of events to the guest.
@@ -1399,7 +1399,7 @@ for vm-wide capabilities.
---------------------
:Capability: KVM_CAP_MP_STATE
:Architectures: x86, s390, arm, arm64
:Architectures: x86, s390, arm64
:Type: vcpu ioctl
:Parameters: struct kvm_mp_state (out)
:Returns: 0 on success; -1 on error
@@ -1416,7 +1416,7 @@ uniprocessor guests).
Possible values are:
========================== ===============================================
KVM_MP_STATE_RUNNABLE the vcpu is currently running [x86,arm/arm64]
KVM_MP_STATE_RUNNABLE the vcpu is currently running [x86,arm64]
KVM_MP_STATE_UNINITIALIZED the vcpu is an application processor (AP)
which has not yet received an INIT signal [x86]
KVM_MP_STATE_INIT_RECEIVED the vcpu has received an INIT signal, and is
@@ -1425,29 +1425,52 @@ Possible values are:
is waiting for an interrupt [x86]
KVM_MP_STATE_SIPI_RECEIVED the vcpu has just received a SIPI (vector
accessible via KVM_GET_VCPU_EVENTS) [x86]
KVM_MP_STATE_STOPPED the vcpu is stopped [s390,arm/arm64]
KVM_MP_STATE_STOPPED the vcpu is stopped [s390,arm64]
KVM_MP_STATE_CHECK_STOP the vcpu is in a special error state [s390]
KVM_MP_STATE_OPERATING the vcpu is operating (running or halted)
[s390]
KVM_MP_STATE_LOAD the vcpu is in a special load/startup state
[s390]
KVM_MP_STATE_SUSPENDED the vcpu is in a suspend state and is waiting
for a wakeup event [arm64]
========================== ===============================================
On x86, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an
in-kernel irqchip, the multiprocessing state must be maintained by userspace on
these architectures.
For arm/arm64:
^^^^^^^^^^^^^^
For arm64:
^^^^^^^^^^
The only states that are valid are KVM_MP_STATE_STOPPED and
KVM_MP_STATE_RUNNABLE which reflect if the vcpu is paused or not.
If a vCPU is in the KVM_MP_STATE_SUSPENDED state, KVM will emulate the
architectural execution of a WFI instruction.
If a wakeup event is recognized, KVM will exit to userspace with a
KVM_SYSTEM_EVENT exit, where the event type is KVM_SYSTEM_EVENT_WAKEUP. If
userspace wants to honor the wakeup, it must set the vCPU's MP state to
KVM_MP_STATE_RUNNABLE. If it does not, KVM will continue to await a wakeup
event in subsequent calls to KVM_RUN.
.. warning::
If userspace intends to keep the vCPU in a SUSPENDED state, it is
strongly recommended that userspace take action to suppress the
wakeup event (such as masking an interrupt). Otherwise, subsequent
calls to KVM_RUN will immediately exit with a KVM_SYSTEM_EVENT_WAKEUP
event and inadvertently waste CPU cycles.
Additionally, if userspace takes action to suppress a wakeup event,
it is strongly recommended that it also restores the vCPU to its
original state when the vCPU is made RUNNABLE again. For example,
if userspace masked a pending interrupt to suppress the wakeup,
the interrupt should be unmasked before returning control to the
guest.
4.39 KVM_SET_MP_STATE
---------------------
:Capability: KVM_CAP_MP_STATE
:Architectures: x86, s390, arm, arm64
:Architectures: x86, s390, arm64
:Type: vcpu ioctl
:Parameters: struct kvm_mp_state (in)
:Returns: 0 on success; -1 on error
@@ -1459,8 +1482,8 @@ On x86, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an
in-kernel irqchip, the multiprocessing state must be maintained by userspace on
these architectures.
For arm/arm64:
^^^^^^^^^^^^^^
For arm64:
^^^^^^^^^^
The only states that are valid are KVM_MP_STATE_STOPPED and
KVM_MP_STATE_RUNNABLE which reflect if the vcpu should be paused or not.
@@ -1715,14 +1738,14 @@ The flags bitmap is defined as::
------------------------
:Capability: KVM_CAP_IRQ_ROUTING
:Architectures: x86 s390 arm arm64
:Architectures: x86 s390 arm64
:Type: vm ioctl
:Parameters: struct kvm_irq_routing (in)
:Returns: 0 on success, -1 on error
Sets the GSI routing table entries, overwriting any previously set entries.
On arm/arm64, GSI routing has the following limitation:
On arm64, GSI routing has the following limitation:
- GSI routing does not apply to KVM_IRQ_LINE but only to KVM_IRQFD.
@@ -2526,6 +2549,24 @@ EINVAL.
After the vcpu's SVE configuration is finalized, further attempts to
write this register will fail with EPERM.
arm64 bitmap feature firmware pseudo-registers have the following bit pattern::
0x6030 0000 0016 <regno:16>
The bitmap feature firmware registers exposes the hypercall services that
are available for userspace to configure. The set bits corresponds to the
services that are available for the guests to access. By default, KVM
sets all the supported bits during VM initialization. The userspace can
discover the available services via KVM_GET_ONE_REG, and write back the
bitmap corresponding to the features that it wishes guests to see via
KVM_SET_ONE_REG.
Note: These registers are immutable once any of the vCPUs of the VM has
run at least once. A KVM_SET_ONE_REG in such a scenario will return
a -EBUSY to userspace.
(See Documentation/virt/kvm/arm/hypercalls.rst for more details.)
MIPS registers are mapped using the lower 32 bits. The upper 16 of that is
the register group type:
@@ -2636,7 +2677,7 @@ after pausing the vcpu, but before it is resumed.
-------------------
:Capability: KVM_CAP_SIGNAL_MSI
:Architectures: x86 arm arm64
:Architectures: x86 arm64
:Type: vm ioctl
:Parameters: struct kvm_msi (in)
:Returns: >0 on delivery, 0 if guest blocked the MSI, and -1 on error
@@ -2824,7 +2865,7 @@ into the hash PTE second double word).
--------------
:Capability: KVM_CAP_IRQFD
:Architectures: x86 s390 arm arm64
:Architectures: x86 s390 arm64
:Type: vm ioctl
:Parameters: struct kvm_irqfd (in)
:Returns: 0 on success, -1 on error
@@ -2850,7 +2891,7 @@ Note that closing the resamplefd is not sufficient to disable the
irqfd. The KVM_IRQFD_FLAG_RESAMPLE is only necessary on assignment
and need not be specified with KVM_IRQFD_FLAG_DEASSIGN.
On arm/arm64, gsi routing being supported, the following can happen:
On arm64, gsi routing being supported, the following can happen:
- in case no routing entry is associated to this gsi, injection fails
- in case the gsi is associated to an irqchip routing entry,
@@ -3104,7 +3145,7 @@ current state. "addr" is ignored.
----------------------
:Capability: basic
:Architectures: arm, arm64
:Architectures: arm64
:Type: vcpu ioctl
:Parameters: struct kvm_vcpu_init (in)
:Returns: 0 on success; -1 on error
@@ -3202,7 +3243,7 @@ Possible features:
-----------------------------
:Capability: basic
:Architectures: arm, arm64
:Architectures: arm64
:Type: vm ioctl
:Parameters: struct kvm_vcpu_init (out)
:Returns: 0 on success; -1 on error
@@ -3231,7 +3272,7 @@ VCPU matching underlying host.
---------------------
:Capability: basic
:Architectures: arm, arm64, mips
:Architectures: arm64, mips
:Type: vcpu ioctl
:Parameters: struct kvm_reg_list (in/out)
:Returns: 0 on success; -1 on error
@@ -3258,7 +3299,7 @@ KVM_GET_ONE_REG/KVM_SET_ONE_REG calls.
-----------------------------------------
:Capability: KVM_CAP_ARM_SET_DEVICE_ADDR
:Architectures: arm, arm64
:Architectures: arm64
:Type: vm ioctl
:Parameters: struct kvm_arm_device_address (in)
:Returns: 0 on success, -1 on error
@@ -3285,13 +3326,13 @@ can access emulated or directly exposed devices, which the host kernel needs
to know about. The id field is an architecture specific identifier for a
specific device.
ARM/arm64 divides the id field into two parts, a device id and an
arm64 divides the id field into two parts, a device id and an
address type id specific to the individual device::
bits: | 63 ... 32 | 31 ... 16 | 15 ... 0 |
field: | 0x00000000 | device id | addr type id |
ARM/arm64 currently only require this when using the in-kernel GIC
arm64 currently only require this when using the in-kernel GIC
support for the hardware VGIC features, using KVM_ARM_DEVICE_VGIC_V2
as the device id. When setting the base address for the guest's
mapping of the VGIC virtual CPU and distributor interface, the ioctl
@@ -4505,7 +4546,7 @@ to I/O ports.
------------------------------------
:Capability: KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2
:Architectures: x86, arm, arm64, mips
:Architectures: x86, arm64, mips
:Type: vm ioctl
:Parameters: struct kvm_clear_dirty_log (in)
:Returns: 0 on success, -1 on error
@@ -4617,7 +4658,7 @@ version has the following quirks:
4.119 KVM_ARM_VCPU_FINALIZE
---------------------------
:Architectures: arm, arm64
:Architectures: arm64
:Type: vcpu ioctl
:Parameters: int feature (in)
:Returns: 0 on success, -1 on error
@@ -5656,13 +5697,15 @@ should put the acknowledged interrupt vector into the 'epr' field.
#define KVM_SYSTEM_EVENT_SHUTDOWN 1
#define KVM_SYSTEM_EVENT_RESET 2
#define KVM_SYSTEM_EVENT_CRASH 3
#define KVM_SYSTEM_EVENT_WAKEUP 4
#define KVM_SYSTEM_EVENT_SUSPEND 5
__u32 type;
__u64 flags;
} system_event;
If exit_reason is KVM_EXIT_SYSTEM_EVENT then the vcpu has triggered
a system-level event using some architecture specific mechanism (hypercall
or some special instruction). In case of ARM/ARM64, this is triggered using
or some special instruction). In case of ARM64, this is triggered using
HVC instruction based PSCI call from the vcpu. The 'type' field describes
the system-level event type. The 'flags' field describes architecture
specific flags for the system-level event.
@@ -5680,6 +5723,42 @@ Valid values for 'type' are:
has requested a crash condition maintenance. Userspace can choose
to ignore the request, or to gather VM memory core dump and/or
reset/shutdown of the VM.
- KVM_SYSTEM_EVENT_WAKEUP -- the exiting vCPU is in a suspended state and
KVM has recognized a wakeup event. Userspace may honor this event by
marking the exiting vCPU as runnable, or deny it and call KVM_RUN again.
- KVM_SYSTEM_EVENT_SUSPEND -- the guest has requested a suspension of
the VM.
For arm/arm64:
--------------
KVM_SYSTEM_EVENT_SUSPEND exits are enabled with the
KVM_CAP_ARM_SYSTEM_SUSPEND VM capability. If a guest invokes the PSCI
SYSTEM_SUSPEND function, KVM will exit to userspace with this event
type.
It is the sole responsibility of userspace to implement the PSCI
SYSTEM_SUSPEND call according to ARM DEN0022D.b 5.19 "SYSTEM_SUSPEND".
KVM does not change the vCPU's state before exiting to userspace, so
the call parameters are left in-place in the vCPU registers.
Userspace is _required_ to take action for such an exit. It must
either:
- Honor the guest request to suspend the VM. Userspace can request
in-kernel emulation of suspension by setting the calling vCPU's
state to KVM_MP_STATE_SUSPENDED. Userspace must configure the vCPU's
state according to the parameters passed to the PSCI function when
the calling vCPU is resumed. See ARM DEN0022D.b 5.19.1 "Intended use"
for details on the function parameters.
- Deny the guest request to suspend the VM. See ARM DEN0022D.b 5.19.2
"Caller responsibilities" for possible return values.
Valid flags are:
- KVM_SYSTEM_EVENT_RESET_FLAG_PSCI_RESET2 (arm64 only) -- the guest issued
a SYSTEM_RESET2 call according to v1.1 of the PSCI specification.
::
@@ -5755,7 +5834,7 @@ in send_page or recv a buffer to recv_page).
__u64 fault_ipa;
} arm_nisv;
Used on arm and arm64 systems. If a guest accesses memory not in a memslot,
Used on arm64 systems. If a guest accesses memory not in a memslot,
KVM will typically return to userspace and ask it to do MMIO emulation on its
behalf. However, for certain classes of instructions, no instruction decode
(direction, length of memory access) is provided, and fetching and decoding
@@ -5772,16 +5851,22 @@ did not fall within an I/O window.
Userspace implementations can query for KVM_CAP_ARM_NISV_TO_USER, and enable
this capability at VM creation. Once this is done, these types of errors will
instead return to userspace with KVM_EXIT_ARM_NISV, with the valid bits from
the HSR (arm) and ESR_EL2 (arm64) in the esr_iss field, and the faulting IPA
in the fault_ipa field. Userspace can either fix up the access if it's
actually an I/O access by decoding the instruction from guest memory (if it's
very brave) and continue executing the guest, or it can decide to suspend,
dump, or restart the guest.
the ESR_EL2 in the esr_iss field, and the faulting IPA in the fault_ipa field.
Userspace can either fix up the access if it's actually an I/O access by
decoding the instruction from guest memory (if it's very brave) and continue
executing the guest, or it can decide to suspend, dump, or restart the guest.
Note that KVM does not skip the faulting instruction as it does for
KVM_EXIT_MMIO, but userspace has to emulate any change to the processing state
if it decides to decode and emulate the instruction.
This feature isn't available to protected VMs, as userspace does not
have access to the state that is required to perform the emulation.
Instead, a data abort exception is directly injected in the guest.
Note that although KVM_CAP_ARM_NISV_TO_USER will be reported if
queried outside of a protected VM context, the feature will not be
exposed if queried on a protected VM file descriptor.
::
/* KVM_EXIT_X86_RDMSR / KVM_EXIT_X86_WRMSR */
@@ -6464,7 +6549,7 @@ and injected exceptions.
7.18 KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2
:Architectures: x86, arm, arm64, mips
:Architectures: x86, arm64, mips
:Parameters: args[0] whether feature should be enabled or not
Valid flags are::
@@ -6833,7 +6918,7 @@ reserved.
8.9 KVM_CAP_ARM_USER_IRQ
------------------------
:Architectures: arm, arm64
:Architectures: arm64
This capability, if KVM_CHECK_EXTENSION indicates that it is available, means
that if userspace creates a VM without an in-kernel interrupt controller, it
@@ -6960,7 +7045,7 @@ HvFlushVirtualAddressList, HvFlushVirtualAddressListEx.
8.19 KVM_CAP_ARM_INJECT_SERROR_ESR
----------------------------------
:Architectures: arm, arm64
:Architectures: arm64
This capability indicates that userspace can specify (via the
KVM_SET_VCPU_EVENTS ioctl) the syndrome value reported to the guest when it
@@ -7266,6 +7351,16 @@ of the result of KVM_CHECK_EXTENSION. KVM will forward to userspace
the hypercalls whose corresponding bit is in the argument, and return
ENOSYS for the others.
8.36 KVM_CAP_ARM_SYSTEM_SUSPEND
-------------------------------
:Capability: KVM_CAP_ARM_SYSTEM_SUSPEND
:Architectures: arm64
:Type: vm
When enabled, KVM will exit to userspace with KVM_EXIT_SYSTEM_EVENT of
type KVM_SYSTEM_EVENT_SUSPEND to process the guest suspend request.
9. Known KVM API problems
=========================

View File

@@ -0,0 +1,138 @@
.. SPDX-License-Identifier: GPL-2.0
=======================================
ARM firmware pseudo-registers interface
=======================================
KVM handles the hypercall services as requested by the guests. New hypercall
services are regularly made available by the ARM specification or by KVM (as
vendor services) if they make sense from a virtualization point of view.
This means that a guest booted on two different versions of KVM can observe
two different "firmware" revisions. This could cause issues if a given guest
is tied to a particular version of a hypercall service, or if a migration
causes a different version to be exposed out of the blue to an unsuspecting
guest.
In order to remedy this situation, KVM exposes a set of "firmware
pseudo-registers" that can be manipulated using the GET/SET_ONE_REG
interface. These registers can be saved/restored by userspace, and set
to a convenient value as required.
The following registers are defined:
* KVM_REG_ARM_PSCI_VERSION:
KVM implements the PSCI (Power State Coordination Interface)
specification in order to provide services such as CPU on/off, reset
and power-off to the guest.
- Only valid if the vcpu has the KVM_ARM_VCPU_PSCI_0_2 feature set
(and thus has already been initialized)
- Returns the current PSCI version on GET_ONE_REG (defaulting to the
highest PSCI version implemented by KVM and compatible with v0.2)
- Allows any PSCI version implemented by KVM and compatible with
v0.2 to be set with SET_ONE_REG
- Affects the whole VM (even if the register view is per-vcpu)
* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
Holds the state of the firmware support to mitigate CVE-2017-5715, as
offered by KVM to the guest via a HVC call. The workaround is described
under SMCCC_ARCH_WORKAROUND_1 in [1].
Accepted values are:
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL:
KVM does not offer
firmware support for the workaround. The mitigation status for the
guest is unknown.
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL:
The workaround HVC call is
available to the guest and required for the mitigation.
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED:
The workaround HVC call
is available to the guest, but it is not needed on this VCPU.
* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
Holds the state of the firmware support to mitigate CVE-2018-3639, as
offered by KVM to the guest via a HVC call. The workaround is described
under SMCCC_ARCH_WORKAROUND_2 in [1]_.
Accepted values are:
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL:
A workaround is not
available. KVM does not offer firmware support for the workaround.
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN:
The workaround state is
unknown. KVM does not offer firmware support for the workaround.
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL:
The workaround is available,
and can be disabled by a vCPU. If
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED is set, it is active for
this vCPU.
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED:
The workaround is always active on this vCPU or it is not needed.
Bitmap Feature Firmware Registers
---------------------------------
Contrary to the above registers, the following registers exposes the
hypercall services in the form of a feature-bitmap to the userspace. This
bitmap is translated to the services that are available to the guest.
There is a register defined per service call owner and can be accessed via
GET/SET_ONE_REG interface.
By default, these registers are set with the upper limit of the features
that are supported. This way userspace can discover all the usable
hypercall services via GET_ONE_REG. The user-space can write-back the
desired bitmap back via SET_ONE_REG. The features for the registers that
are untouched, probably because userspace isn't aware of them, will be
exposed as is to the guest.
Note that KVM will not allow the userspace to configure the registers
anymore once any of the vCPUs has run at least once. Instead, it will
return a -EBUSY.
The pseudo-firmware bitmap register are as follows:
* KVM_REG_ARM_STD_BMAP:
Controls the bitmap of the ARM Standard Secure Service Calls.
The following bits are accepted:
Bit-0: KVM_REG_ARM_STD_BIT_TRNG_V1_0:
The bit represents the services offered under v1.0 of ARM True Random
Number Generator (TRNG) specification, ARM DEN0098.
* KVM_REG_ARM_STD_HYP_BMAP:
Controls the bitmap of the ARM Standard Hypervisor Service Calls.
The following bits are accepted:
Bit-0: KVM_REG_ARM_STD_HYP_BIT_PV_TIME:
The bit represents the Paravirtualized Time service as represented by
ARM DEN0057A.
* KVM_REG_ARM_VENDOR_HYP_BMAP:
Controls the bitmap of the Vendor specific Hypervisor Service Calls.
The following bits are accepted:
Bit-0: KVM_REG_ARM_VENDOR_HYP_BIT_FUNC_FEAT
The bit represents the ARM_SMCCC_VENDOR_HYP_KVM_FEATURES_FUNC_ID
and ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID function-ids.
Bit-1: KVM_REG_ARM_VENDOR_HYP_BIT_PTP:
The bit represents the Precision Time Protocol KVM service.
Errors:
======= =============================================================
-ENOENT Unknown register accessed.
-EBUSY Attempt a 'write' to the register after the VM has started.
-EINVAL Invalid bitmap written to the register.
======= =============================================================
.. [1] https://developer.arm.com/-/media/developer/pdf/ARM_DEN_0070A_Firmware_interfaces_for_mitigating_CVE-2017-5715.pdf

View File

@@ -0,0 +1,152 @@
.. SPDX-License-Identifier: GPL-2.0
===============================================
KVM/arm64-specific hypercalls exposed to guests
===============================================
This file documents the KVM/arm64-specific hypercalls which may be
exposed by KVM/arm64 to guest operating systems. These hypercalls are
issued using the HVC instruction according to version 1.1 of the Arm SMC
Calling Convention (DEN0028/C):
https://developer.arm.com/docs/den0028/c
All KVM/arm64-specific hypercalls are allocated within the "Vendor
Specific Hypervisor Service Call" range with a UID of
``28b46fb6-2ec5-11e9-a9ca-4b564d003a74``. This UID should be queried by the
guest using the standard "Call UID" function for the service range in
order to determine that the KVM/arm64-specific hypercalls are available.
``ARM_SMCCC_KVM_FUNC_FEATURES``
---------------------------------------------
Provides a discovery mechanism for other KVM/arm64 hypercalls.
+---------------------+-------------------------------------------------------------+
| Presence: | Mandatory for the KVM/arm64 UID |
+---------------------+-------------------------------------------------------------+
| Calling convention: | HVC32 |
+---------------------+----------+--------------------------------------------------+
| Function ID: | (uint32) | 0x86000000 |
+---------------------+----------+--------------------------------------------------+
| Arguments: | None |
+---------------------+----------+----+---------------------------------------------+
| Return Values: | (uint32) | R0 | Bitmap of available function numbers 0-31 |
| +----------+----+---------------------------------------------+
| | (uint32) | R1 | Bitmap of available function numbers 32-63 |
| +----------+----+---------------------------------------------+
| | (uint32) | R2 | Bitmap of available function numbers 64-95 |
| +----------+----+---------------------------------------------+
| | (uint32) | R3 | Bitmap of available function numbers 96-127 |
+---------------------+----------+----+---------------------------------------------+
``ARM_SMCCC_KVM_FUNC_PTP``
----------------------------------------
See ptp_kvm.rst
``ARM_SMCCC_KVM_FUNC_HYP_MEMINFO``
----------------------------------
Query the memory protection parameters for a protected virtual machine.
+---------------------+-------------------------------------------------------------+
| Presence: | Optional; protected guests only. |
+---------------------+-------------------------------------------------------------+
| Calling convention: | HVC64 |
+---------------------+----------+--------------------------------------------------+
| Function ID: | (uint32) | 0xC6000002 |
+---------------------+----------+----+---------------------------------------------+
| Arguments: | (uint64) | R1 | Reserved / Must be zero |
| +----------+----+---------------------------------------------+
| | (uint64) | R2 | Reserved / Must be zero |
| +----------+----+---------------------------------------------+
| | (uint64) | R3 | Reserved / Must be zero |
+---------------------+----------+----+---------------------------------------------+
| Return Values: | (int64) | R0 | ``INVALID_PARAMETER (-3)`` on error, else |
| | | | memory protection granule in bytes |
+---------------------+----------+----+---------------------------------------------+
``ARM_SMCCC_KVM_FUNC_MEM_SHARE``
--------------------------------
Share a region of memory with the KVM host, granting it read, write and execute
permissions. The size of the region is equal to the memory protection granule
advertised by ``ARM_SMCCC_KVM_FUNC_HYP_MEMINFO``.
+---------------------+-------------------------------------------------------------+
| Presence: | Optional; protected guests only. |
+---------------------+-------------------------------------------------------------+
| Calling convention: | HVC64 |
+---------------------+----------+--------------------------------------------------+
| Function ID: | (uint32) | 0xC6000003 |
+---------------------+----------+----+---------------------------------------------+
| Arguments: | (uint64) | R1 | Base IPA of memory region to share |
| +----------+----+---------------------------------------------+
| | (uint64) | R2 | Reserved / Must be zero |
| +----------+----+---------------------------------------------+
| | (uint64) | R3 | Reserved / Must be zero |
+---------------------+----------+----+---------------------------------------------+
| Return Values: | (int64) | R0 | ``SUCCESS (0)`` |
| | | +---------------------------------------------+
| | | | ``INVALID_PARAMETER (-3)`` |
+---------------------+----------+----+---------------------------------------------+
``ARM_SMCCC_KVM_FUNC_MEM_UNSHARE``
----------------------------------
Revoke access permission from the KVM host to a memory region previously shared
with ``ARM_SMCCC_KVM_FUNC_MEM_SHARE``. The size of the region is equal to the
memory protection granule advertised by ``ARM_SMCCC_KVM_FUNC_HYP_MEMINFO``.
+---------------------+-------------------------------------------------------------+
| Presence: | Optional; protected guests only. |
+---------------------+-------------------------------------------------------------+
| Calling convention: | HVC64 |
+---------------------+----------+--------------------------------------------------+
| Function ID: | (uint32) | 0xC6000004 |
+---------------------+----------+----+---------------------------------------------+
| Arguments: | (uint64) | R1 | Base IPA of memory region to unshare |
| +----------+----+---------------------------------------------+
| | (uint64) | R2 | Reserved / Must be zero |
| +----------+----+---------------------------------------------+
| | (uint64) | R3 | Reserved / Must be zero |
+---------------------+----------+----+---------------------------------------------+
| Return Values: | (int64) | R0 | ``SUCCESS (0)`` |
| | | +---------------------------------------------+
| | | | ``INVALID_PARAMETER (-3)`` |
+---------------------+----------+----+---------------------------------------------+
``ARM_SMCCC_KVM_FUNC_MEM_RELINQUISH``
--------------------------------------
Cooperatively relinquish ownership of a memory region. The size of the
region is equal to the memory protection granule advertised by
``ARM_SMCCC_KVM_FUNC_HYP_MEMINFO``. If this hypercall is advertised
then it is mandatory to call it before freeing memory via, for
example, virtio balloon. If the caller is a protected VM, it is
guaranteed that the memory region will be completely cleared before
becoming visible to another VM.
+---------------------+-------------------------------------------------------------+
| Presence: | Optional. |
+---------------------+-------------------------------------------------------------+
| Calling convention: | HVC64 |
+---------------------+----------+--------------------------------------------------+
| Function ID: | (uint32) | 0xC6000009 |
+---------------------+----------+----+---------------------------------------------+
| Arguments: | (uint64) | R1 | Base IPA of memory region to relinquish |
| +----------+----+---------------------------------------------+
| | (uint64) | R2 | Reserved / Must be zero |
| +----------+----+---------------------------------------------+
| | (uint64) | R3 | Reserved / Must be zero |
+---------------------+----------+----+---------------------------------------------+
| Return Values: | (int64) | R0 | ``SUCCESS (0)`` |
| | | +---------------------------------------------+
| | | | ``INVALID_PARAMETER (-3)`` |
+---------------------+----------+----+---------------------------------------------+
``ARM_SMCCC_KVM_FUNC_MMIO_GUARD_*``
-----------------------------------
See mmio-guard.rst

View File

@@ -7,7 +7,10 @@ ARM
.. toctree::
:maxdepth: 2
fw-pseudo-registers
hyp-abi
psci
hypercalls
pkvm
pvtime
ptp_kvm
mmio-guard

View File

@@ -0,0 +1,74 @@
.. SPDX-License-Identifier: GPL-2.0
==============
KVM MMIO guard
==============
KVM implements device emulation by handling translation faults to any
IPA range that is not contained in a memory slot. Such a translation
fault is in most cases passed on to userspace (or in rare cases to the
host kernel) with the address, size and possibly data of the access
for emulation.
Should the guest exit with an address that is not one that corresponds
to an emulatable device, userspace may take measures that are not the
most graceful as far as the guest is concerned (such as terminating it
or delivering a fatal exception).
There is also an element of trust: by forwarding the request to
userspace, the kernel assumes that the guest trusts userspace to do
the right thing.
The KVM MMIO guard offers a way to mitigate this last point: a guest
can request that only certain regions of the IPA space are valid as
MMIO. Only these regions will be handled as an MMIO, and any other
will result in an exception being delivered to the guest.
This relies on a set of hypercalls defined in the KVM-specific range,
using the HVC64 calling convention.
* ARM_SMCCC_KVM_FUNC_MMIO_GUARD_INFO
============== ======== ================================
Function ID: (uint32) 0xC6000005
Arguments: r1-r3 Reserved / Must be zero
Return Values: (int64) NOT_SUPPORTED(-1) on error, or
(uint64) Protection Granule (PG) size in
bytes (r0)
============== ======== ================================
* ARM_SMCCC_KVM_FUNC_MMIO_GUARD_ENROLL
============== ======== ==============================
Function ID: (uint32) 0xC6000006
Arguments: none
Return Values: (int64) NOT_SUPPORTED(-1) on error, or
RET_SUCCESS(0) (r0)
============== ======== ==============================
* ARM_SMCCC_KVM_FUNC_MMIO_GUARD_MAP
============== ======== ====================================
Function ID: (uint32) 0xC6000007
Arguments: (uint64) The base of the PG-sized IPA range
that is allowed to be accessed as
MMIO. Must be aligned to the PG size
(r1)
(uint64) Index in the MAIR_EL1 register
providing the memory attribute that
is used by the guest (r2)
Return Values: (int64) NOT_SUPPORTED(-1) on error, or
RET_SUCCESS(0) (r0)
============== ======== ====================================
* ARM_SMCCC_KVM_FUNC_MMIO_GUARD_UNMAP
============== ======== ======================================
Function ID: (uint32) 0xC6000008
Arguments: (uint64) PG-sized IPA range aligned to the PG
size which has been previously mapped.
Must be aligned to the PG size and
have been previously mapped (r1)
Return Values: (int64) NOT_SUPPORTED(-1) on error, or
RET_SUCCESS(0) (r0)
============== ======== ======================================

View File

@@ -0,0 +1,96 @@
.. SPDX-License-Identifier: GPL-2.0
Protected virtual machines (pKVM)
=================================
Introduction
------------
Protected KVM (pKVM) is a KVM/arm64 extension which uses the two-stage
translation capability of the Armv8 MMU to isolate guest memory from the host
system. This allows for the creation of a confidential computing environment
without relying on whizz-bang features in hardware, but still allowing room for
complementary technologies such as memory encryption and hardware-backed
attestation.
The major implementation change brought about by pKVM is that the hypervisor
code running at EL2 is now largely independent of (and isolated from) the rest
of the host kernel running at EL1 and therefore additional hypercalls are
introduced to manage manipulation of guest stage-2 page tables, creation of VM
data structures and reclamation of memory on teardown. An immediate consequence
of this change is that the host itself runs with an identity mapping enabled
at stage-2, providing the hypervisor code with a mechanism to restrict host
access to an arbitrary physical page.
Enabling pKVM
-------------
The pKVM hypervisor is enabled by booting the host kernel at EL2 with
"``kvm-arm.mode=protected``" on the command-line. Once enabled, VMs can be spawned
in either protected or non-protected state, although the hypervisor is still
responsible for managing most of the VM metadata in either case.
Limitations
-----------
Enabling pKVM places some significant limitations on KVM guests, regardless of
whether they are spawned in protected state. It is therefore recommended only
to enable pKVM if protected VMs are required, with non-protected state acting
primarily as a debug and development aid.
If you're still keen, then here is an incomplete list of caveats that apply
to all VMs running under pKVM:
- Guest memory cannot be file-backed (with the exception of shmem/memfd) and is
pinned as it is mapped into the guest. This prevents the host from
swapping-out, migrating, merging or generally doing anything useful with the
guest pages. It also requires that the VMM has either ``CAP_IPC_LOCK`` or
sufficient ``RLIMIT_MEMLOCK`` to account for this pinned memory.
- GICv2 is not supported and therefore GICv3 hardware is required in order
to expose a virtual GICv3 to the guest.
- Read-only memslots are unsupported and therefore dirty logging cannot be
enabled.
- Memslot configuration is fixed once a VM has started running, with subsequent
move or deletion requests being rejected with ``-EPERM``.
- There are probably many others.
Since the host is unable to tear down the hypervisor when pKVM is enabled,
hibernation (``CONFIG_HIBERNATION``) and kexec (``CONFIG_KEXEC``) will fail
with ``-EBUSY``.
If you are not happy with these limitations, then please don't enable pKVM :)
VM creation
-----------
When pKVM is enabled, protected VMs can be created by specifying the
``KVM_VM_TYPE_ARM_PROTECTED`` flag in the machine type identifier parameter
passed to ``KVM_CREATE_VM``.
Protected VMs are instantiated according to a fixed vCPU configuration
described by the ID register definitions in
``arch/arm64/include/asm/kvm_pkvm.h``. Only a subset of the architectural
features that may be available to the host are exposed to the guest and the
capabilities advertised by ``KVM_CHECK_EXTENSION`` are limited accordingly,
with the vCPU registers being initialised to their architecturally-defined
values.
Where not defined by the architecture, the registers of a protected vCPU
are reset to zero with the exception of the PC and X0 which can be set
either by the ``KVM_SET_ONE_REG`` interface or by a call to PSCI ``CPU_ON``.
VM runtime
----------
By default, memory pages mapped into a protected guest are inaccessible to the
host and any attempt by the host to access such a page will result in the
injection of an abort at EL1 by the hypervisor. For accesses originating from
EL0, the host will then terminate the current task with a ``SIGSEGV``.
pKVM exposes additional hypercalls to protected guests, primarily for the
purpose of establishing shared-memory regions with the host for communication
and I/O. These hypercalls are documented in hypercalls.rst.

View File

@@ -1,77 +0,0 @@
.. SPDX-License-Identifier: GPL-2.0
=========================================
Power State Coordination Interface (PSCI)
=========================================
KVM implements the PSCI (Power State Coordination Interface)
specification in order to provide services such as CPU on/off, reset
and power-off to the guest.
The PSCI specification is regularly updated to provide new features,
and KVM implements these updates if they make sense from a virtualization
point of view.
This means that a guest booted on two different versions of KVM can
observe two different "firmware" revisions. This could cause issues if
a given guest is tied to a particular PSCI revision (unlikely), or if
a migration causes a different PSCI version to be exposed out of the
blue to an unsuspecting guest.
In order to remedy this situation, KVM exposes a set of "firmware
pseudo-registers" that can be manipulated using the GET/SET_ONE_REG
interface. These registers can be saved/restored by userspace, and set
to a convenient value if required.
The following register is defined:
* KVM_REG_ARM_PSCI_VERSION:
- Only valid if the vcpu has the KVM_ARM_VCPU_PSCI_0_2 feature set
(and thus has already been initialized)
- Returns the current PSCI version on GET_ONE_REG (defaulting to the
highest PSCI version implemented by KVM and compatible with v0.2)
- Allows any PSCI version implemented by KVM and compatible with
v0.2 to be set with SET_ONE_REG
- Affects the whole VM (even if the register view is per-vcpu)
* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
Holds the state of the firmware support to mitigate CVE-2017-5715, as
offered by KVM to the guest via a HVC call. The workaround is described
under SMCCC_ARCH_WORKAROUND_1 in [1].
Accepted values are:
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL:
KVM does not offer
firmware support for the workaround. The mitigation status for the
guest is unknown.
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL:
The workaround HVC call is
available to the guest and required for the mitigation.
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED:
The workaround HVC call
is available to the guest, but it is not needed on this VCPU.
* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
Holds the state of the firmware support to mitigate CVE-2018-3639, as
offered by KVM to the guest via a HVC call. The workaround is described
under SMCCC_ARCH_WORKAROUND_2 in [1]_.
Accepted values are:
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL:
A workaround is not
available. KVM does not offer firmware support for the workaround.
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN:
The workaround state is
unknown. KVM does not offer firmware support for the workaround.
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL:
The workaround is available,
and can be disabled by a vCPU. If
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED is set, it is active for
this vCPU.
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED:
The workaround is always active on this vCPU or it is not needed.
.. [1] https://developer.arm.com/-/media/developer/pdf/ARM_DEN_0070A_Firmware_interfaces_for_mitigating_CVE-2017-5715.pdf

View File

@@ -7,19 +7,29 @@ PTP_KVM is used for high precision time sync between host and guests.
It relies on transferring the wall clock and counter value from the
host to the guest using a KVM-specific hypercall.
* ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID: 0x86000001
``ARM_SMCCC_KVM_FUNC_PTP``
----------------------------------------
This hypercall uses the SMC32/HVC32 calling convention:
Retrieve current time information for the specific counter. There are no
endianness restrictions.
ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID
============== ======== =====================================
Function ID: (uint32) 0x86000001
Arguments: (uint32) KVM_PTP_VIRT_COUNTER(0)
KVM_PTP_PHYS_COUNTER(1)
Return Values: (int32) NOT_SUPPORTED(-1) on error, or
(uint32) Upper 32 bits of wall clock time (r0)
(uint32) Lower 32 bits of wall clock time (r1)
(uint32) Upper 32 bits of counter (r2)
(uint32) Lower 32 bits of counter (r3)
Endianness: No Restrictions.
============== ======== =====================================
+---------------------+-------------------------------------------------------+
| Presence: | Optional |
+---------------------+-------------------------------------------------------+
| Calling convention: | HVC32 |
+---------------------+----------+--------------------------------------------+
| Function ID: | (uint32) | 0x86000001 |
+---------------------+----------+----+---------------------------------------+
| Arguments: | (uint32) | R1 | ``KVM_PTP_VIRT_COUNTER (0)`` |
| | | +---------------------------------------+
| | | | ``KVM_PTP_PHYS_COUNTER (1)`` |
+---------------------+----------+----+---------------------------------------+
| Return Values: | (int32) | R0 | ``NOT_SUPPORTED (-1)`` on error, else |
| | | | upper 32 bits of wall clock time |
| +----------+----+---------------------------------------+
| | (uint32) | R1 | Lower 32 bits of wall clock time |
| +----------+----+---------------------------------------+
| | (uint32) | R2 | Upper 32 bits of counter |
| +----------+----+---------------------------------------+
| | (uint32) | R3 | Lower 32 bits of counter |
+---------------------+----------+----+---------------------------------------+

View File

@@ -70,7 +70,7 @@ irqchip.
-ENODEV PMUv3 not supported or GIC not initialized
-ENXIO PMUv3 not properly configured or in-kernel irqchip not
configured as required prior to calling this attribute
-EBUSY PMUv3 already initialized
-EBUSY PMUv3 already initialized or a VCPU has already run
-EINVAL Invalid filter range
======= ======================================================
@@ -104,11 +104,43 @@ hardware event. Filtering event 0x1E (CHAIN) has no effect either, as it
isn't strictly speaking an event. Filtering the cycle counter is possible
using event 0x11 (CPU_CYCLES).
1.4 ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_SET_PMU
------------------------------------------
:Parameters: in kvm_device_attr.addr the address to an int representing the PMU
identifier.
:Returns:
======= ====================================================
-EBUSY PMUv3 already initialized, a VCPU has already run or
an event filter has already been set
-EFAULT Error accessing the PMU identifier
-ENXIO PMU not found
-ENODEV PMUv3 not supported or GIC not initialized
-ENOMEM Could not allocate memory
======= ====================================================
Request that the VCPU uses the specified hardware PMU when creating guest events
for the purpose of PMU emulation. The PMU identifier can be read from the "type"
file for the desired PMU instance under /sys/devices (or, equivalent,
/sys/bus/even_source). This attribute is particularly useful on heterogeneous
systems where there are at least two CPU PMUs on the system. The PMU that is set
for one VCPU will be used by all the other VCPUs. It isn't possible to set a PMU
if a PMU event filter is already present.
Note that KVM will not make any attempts to run the VCPU on the physical CPUs
associated with the PMU specified by this attribute. This is entirely left to
userspace. However, attempting to run the VCPU on a physical CPU not supported
by the PMU will fail and KVM_RUN will return with
exit_reason = KVM_EXIT_FAIL_ENTRY and populate the fail_entry struct by setting
hardare_entry_failure_reason field to KVM_EXIT_FAIL_ENTRY_CPU_UNSUPPORTED and
the cpu field to the processor id.
2. GROUP: KVM_ARM_VCPU_TIMER_CTRL
=================================
:Architectures: ARM, ARM64
:Architectures: ARM64
2.1. ATTRIBUTES: KVM_ARM_VCPU_TIMER_IRQ_VTIMER, KVM_ARM_VCPU_TIMER_IRQ_PTIMER
-----------------------------------------------------------------------------

View File

@@ -42,6 +42,7 @@ descriptions of data structures and algorithms.
ksm
memory-model
mmu_notifier
multigen_lru
numa
overcommit-accounting
page_migration

View File

@@ -0,0 +1,159 @@
.. SPDX-License-Identifier: GPL-2.0
=============
Multi-Gen LRU
=============
The multi-gen LRU is an alternative LRU implementation that optimizes
page reclaim and improves performance under memory pressure. Page
reclaim decides the kernel's caching policy and ability to overcommit
memory. It directly impacts the kswapd CPU usage and RAM efficiency.
Design overview
===============
Objectives
----------
The design objectives are:
* Good representation of access recency
* Try to profit from spatial locality
* Fast paths to make obvious choices
* Simple self-correcting heuristics
The representation of access recency is at the core of all LRU
implementations. In the multi-gen LRU, each generation represents a
group of pages with similar access recency. Generations establish a
(time-based) common frame of reference and therefore help make better
choices, e.g., between different memcgs on a computer or different
computers in a data center (for job scheduling).
Exploiting spatial locality improves efficiency when gathering the
accessed bit. A rmap walk targets a single page and does not try to
profit from discovering a young PTE. A page table walk can sweep all
the young PTEs in an address space, but the address space can be too
sparse to make a profit. The key is to optimize both methods and use
them in combination.
Fast paths reduce code complexity and runtime overhead. Unmapped pages
do not require TLB flushes; clean pages do not require writeback.
These facts are only helpful when other conditions, e.g., access
recency, are similar. With generations as a common frame of reference,
additional factors stand out. But obvious choices might not be good
choices; thus self-correction is necessary.
The benefits of simple self-correcting heuristics are self-evident.
Again, with generations as a common frame of reference, this becomes
attainable. Specifically, pages in the same generation can be
categorized based on additional factors, and a feedback loop can
statistically compare the refault percentages across those categories
and infer which of them are better choices.
Assumptions
-----------
The protection of hot pages and the selection of cold pages are based
on page access channels and patterns. There are two access channels:
* Accesses through page tables
* Accesses through file descriptors
The protection of the former channel is by design stronger because:
1. The uncertainty in determining the access patterns of the former
channel is higher due to the approximation of the accessed bit.
2. The cost of evicting the former channel is higher due to the TLB
flushes required and the likelihood of encountering the dirty bit.
3. The penalty of underprotecting the former channel is higher because
applications usually do not prepare themselves for major page
faults like they do for blocked I/O. E.g., GUI applications
commonly use dedicated I/O threads to avoid blocking rendering
threads.
There are also two access patterns:
* Accesses exhibiting temporal locality
* Accesses not exhibiting temporal locality
For the reasons listed above, the former channel is assumed to follow
the former pattern unless ``VM_SEQ_READ`` or ``VM_RAND_READ`` is
present, and the latter channel is assumed to follow the latter
pattern unless outlying refaults have been observed.
Workflow overview
=================
Evictable pages are divided into multiple generations for each
``lruvec``. The youngest generation number is stored in
``lrugen->max_seq`` for both anon and file types as they are aged on
an equal footing. The oldest generation numbers are stored in
``lrugen->min_seq[]`` separately for anon and file types as clean file
pages can be evicted regardless of swap constraints. These three
variables are monotonically increasing.
Generation numbers are truncated into ``order_base_2(MAX_NR_GENS+1)``
bits in order to fit into the gen counter in ``folio->flags``. Each
truncated generation number is an index to ``lrugen->lists[]``. The
sliding window technique is used to track at least ``MIN_NR_GENS`` and
at most ``MAX_NR_GENS`` generations. The gen counter stores a value
within ``[1, MAX_NR_GENS]`` while a page is on one of
``lrugen->lists[]``; otherwise it stores zero.
Each generation is divided into multiple tiers. A page accessed ``N``
times through file descriptors is in tier ``order_base_2(N)``. Unlike
generations, tiers do not have dedicated ``lrugen->lists[]``. In
contrast to moving across generations, which requires the LRU lock,
moving across tiers only involves atomic operations on
``folio->flags`` and therefore has a negligible cost. A feedback loop
modeled after the PID controller monitors refaults over all the tiers
from anon and file types and decides which tiers from which types to
evict or protect.
There are two conceptually independent procedures: the aging and the
eviction. They form a closed-loop system, i.e., the page reclaim.
Aging
-----
The aging produces young generations. Given an ``lruvec``, it
increments ``max_seq`` when ``max_seq-min_seq+1`` approaches
``MIN_NR_GENS``. The aging promotes hot pages to the youngest
generation when it finds them accessed through page tables; the
demotion of cold pages happens consequently when it increments
``max_seq``. The aging uses page table walks and rmap walks to find
young PTEs. For the former, it iterates ``lruvec_memcg()->mm_list``
and calls ``walk_page_range()`` with each ``mm_struct`` on this list
to scan PTEs, and after each iteration, it increments ``max_seq``. For
the latter, when the eviction walks the rmap and finds a young PTE,
the aging scans the adjacent PTEs. For both, on finding a young PTE,
the aging clears the accessed bit and updates the gen counter of the
page mapped by this PTE to ``(max_seq%MAX_NR_GENS)+1``.
Eviction
--------
The eviction consumes old generations. Given an ``lruvec``, it
increments ``min_seq`` when ``lrugen->lists[]`` indexed by
``min_seq%MAX_NR_GENS`` becomes empty. To select a type and a tier to
evict from, it first compares ``min_seq[]`` to select the older type.
If both types are equally old, it selects the one whose first tier has
a lower refault percentage. The first tier contains single-use
unmapped clean pages, which are the best bet. The eviction sorts a
page according to its gen counter if the aging has found this page
accessed through page tables and updated its gen counter. It also
moves a page to the next generation, i.e., ``min_seq+1``, if this page
was accessed multiple times through file descriptors and the feedback
loop has detected outlying refaults from the tier this page is in. To
this end, the feedback loop uses the first tier as the baseline, for
the reason stated earlier.
Summary
-------
The multi-gen LRU can be disassembled into the following parts:
* Generations
* Rmap walks
* Page table walks
* Bloom filters
* PID controller
The aging and the eviction form a producer-consumer model;
specifically, the latter drives the former by the sliding window over
generations. Within the aging, rmap walks drive page table walks by
inserting hot densely populated page tables to the Bloom filters.
Within the eviction, the PID controller uses refaults as the feedback
to select types to evict and tiers to protect.

View File

@@ -30,3 +30,6 @@ source "lib/Kconfig"
source "lib/Kconfig.debug"
source "Documentation/Kconfig"
# ANDROID: Set KCONFIG_EXT_PREFIX to decend into an external project.
source "$(KCONFIG_EXT_PREFIX)Kconfig.ext"

3
Kconfig.ext Normal file
View File

@@ -0,0 +1,3 @@
# SPDX-License-Identifier: GPL-2.0
# This file is intentionally empty. It's used as a placeholder for when
# KCONFIG_EXT_PREFIX isn't defined.

View File

@@ -2424,7 +2424,7 @@ F: drivers/pci/controller/dwc/pcie-qcom.c
F: drivers/phy/qualcomm/
F: drivers/power/*/msm*
F: drivers/reset/reset-qcom-*
F: drivers/scsi/ufs/ufs-qcom*
F: drivers/ufs/host/ufs-qcom*
F: drivers/spi/spi-geni-qcom.c
F: drivers/spi/spi-qcom-qspi.c
F: drivers/spi/spi-qup.c
@@ -7176,6 +7176,7 @@ M: Chao Yu <chao@kernel.org>
L: linux-f2fs-devel@lists.sourceforge.net
S: Maintained
W: https://f2fs.wiki.kernel.org/
B: https://bugzilla.kernel.org/enter_bug.cgi?product=File%20System&component=f2fs
T: git git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git
F: Documentation/ABI/testing/sysfs-fs-f2fs
F: Documentation/filesystems/f2fs.rst
@@ -9224,6 +9225,13 @@ F: Documentation/hwmon/ina2xx.rst
F: drivers/hwmon/ina2xx.c
F: include/linux/platform_data/ina2xx.h
INCREMENTAL FILE SYSTEM
M: Paul Lawrence <paullawrence@google.com>
L: linux-unionfs@vger.kernel.org
S: Supported
F: fs/incfs/
F: tools/testing/selftests/filesystems/incfs/
INDUSTRY PACK SUBSYSTEM (IPACK)
M: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
M: Jens Taprogge <jens.taprogge@taprogge.org>
@@ -10136,7 +10144,6 @@ F: arch/*/include/asm/*kasan.h
F: arch/*/mm/kasan_init*
F: include/linux/kasan*.h
F: lib/Kconfig.kasan
F: lib/test_kasan*.c
F: mm/kasan/
F: scripts/Makefile.kasan
@@ -10305,8 +10312,10 @@ M: Marc Zyngier <maz@kernel.org>
R: James Morse <james.morse@arm.com>
R: Alexandru Elisei <alexandru.elisei@arm.com>
R: Suzuki K Poulose <suzuki.poulose@arm.com>
R: Oliver Upton <oliver.upton@linux.dev>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
L: kvmarm@lists.cs.columbia.edu (moderated for non-subscribers)
L: kvmarm@lists.linux.dev
L: kvmarm@lists.cs.columbia.edu (deprecated, moderated for non-subscribers)
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm.git
F: arch/arm64/include/asm/kvm*
@@ -13322,6 +13331,12 @@ W: http://www.netlab.is.tsukuba.ac.jp/~yokota/izumi/ninja/
F: Documentation/scsi/NinjaSCSI.rst
F: drivers/scsi/nsp32*
NINTENDO HID DRIVER
M: Daniel J. Ogorchock <djogorchock@gmail.com>
L: linux-input@vger.kernel.org
S: Maintained
F: drivers/hid/hid-nintendo*
NIOS2 ARCHITECTURE
M: Dinh Nguyen <dinguyen@kernel.org>
S: Maintained
@@ -16757,6 +16772,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi.git
T: git git://git.kernel.org/pub/scm/linux/kernel/git/mkp/scsi.git
F: Documentation/devicetree/bindings/scsi/
F: drivers/scsi/
F: drivers/ufs/
F: include/scsi/
SCSI TAPE DRIVER
@@ -19312,23 +19328,24 @@ F: include/linux/visorbus.h
UNIVERSAL FLASH STORAGE HOST CONTROLLER DRIVER
R: Alim Akhtar <alim.akhtar@samsung.com>
R: Avri Altman <avri.altman@wdc.com>
R: Bart Van Assche <bvanassche@acm.org>
L: linux-scsi@vger.kernel.org
S: Supported
F: Documentation/scsi/ufs.rst
F: drivers/scsi/ufs/
F: drivers/ufs/core/
UNIVERSAL FLASH STORAGE HOST CONTROLLER DRIVER DWC HOOKS
M: Pedro Sousa <pedrom.sousa@synopsys.com>
L: linux-scsi@vger.kernel.org
S: Supported
F: drivers/scsi/ufs/*dwc*
F: drivers/ufs/host/*dwc*
UNIVERSAL FLASH STORAGE HOST CONTROLLER DRIVER MEDIATEK HOOKS
M: Stanley Chu <stanley.chu@mediatek.com>
L: linux-scsi@vger.kernel.org
L: linux-mediatek@lists.infradead.org (moderated for non-subscribers)
S: Maintained
F: drivers/scsi/ufs/ufs-mediatek*
F: drivers/ufs/host/ufs-mediatek*
UNSORTED BLOCK IMAGES (UBI)
M: Richard Weinberger <richard@nod.at>

133
Makefile
View File

@@ -136,6 +136,24 @@ endif
export KBUILD_EXTMOD
# ANDROID: set up mixed-build support. mixed-build allows device kernel modules
# to be compiled against a GKI kernel. This approach still uses the headers and
# Kbuild from device kernel, so care must be taken to ensure that those headers match.
ifdef KBUILD_MIXED_TREE
# Need vmlinux.symvers for modpost and System.map for depmod, check whether they exist in KBUILD_MIXED_TREE
required_mixed_files=vmlinux.symvers System.map
$(if $(filter-out $(words $(required_mixed_files)), \
$(words $(wildcard $(add-prefix $(KBUILD_MIXED_TREE)/,$(required_mixed_files))))),,\
$(error KBUILD_MIXED_TREE=$(KBUILD_MIXED_TREE) doesn't contain $(required_mixed_files)))
endif
mixed-build-prefix = $(if $(KBUILD_MIXED_TREE),$(KBUILD_MIXED_TREE)/)
export KBUILD_MIXED_TREE
# This is a hack for kleaf to set mixed-build-prefix within the execution of a make rule, e.g.
# within __modinst_pre.
# TODO(b/205893923): Revert this hack once it is properly handled.
export mixed-build-prefix
# Kbuild will save output files in the current working directory.
# This does not need to match to the root of the kernel source tree.
#
@@ -432,11 +450,12 @@ HOSTCXX = g++
endif
HOSTPKG_CONFIG = pkg-config
export KBUILD_USERCFLAGS := -Wall -Wmissing-prototypes -Wstrict-prototypes \
-O2 -fomit-frame-pointer -std=gnu89
export KBUILD_USERLDFLAGS :=
KBUILD_USERHOSTCFLAGS := -Wall -Wmissing-prototypes -Wstrict-prototypes \
-O2 -fomit-frame-pointer -std=gnu89
KBUILD_USERCFLAGS := $(KBUILD_USERHOSTCFLAGS) $(USERCFLAGS)
KBUILD_USERLDFLAGS := $(USERLDFLAGS)
KBUILD_HOSTCFLAGS := $(KBUILD_USERCFLAGS) $(HOST_LFS_CFLAGS) $(HOSTCFLAGS)
KBUILD_HOSTCFLAGS := $(KBUILD_USERHOSTCFLAGS) $(HOST_LFS_CFLAGS) $(HOSTCFLAGS)
KBUILD_HOSTCXXFLAGS := -Wall -O2 $(HOST_LFS_CFLAGS) $(HOSTCXXFLAGS)
KBUILD_HOSTLDFLAGS := $(HOST_LFS_LDFLAGS) $(HOSTLDFLAGS)
KBUILD_HOSTLDLIBS := $(HOST_LFS_LIBS) $(HOSTLDLIBS)
@@ -477,7 +496,7 @@ KGZIP = gzip
KBZIP2 = bzip2
KLZOP = lzop
LZMA = lzma
LZ4 = lz4c
LZ4 = lz4
XZ = xz
ZSTD = zstd
@@ -531,6 +550,7 @@ export CPP AR NM STRIP OBJCOPY OBJDUMP READELF PAHOLE RESOLVE_BTFIDS LEX YACC AW
export PERL PYTHON3 CHECK CHECKFLAGS MAKE UTS_MACHINE HOSTCXX
export KGZIP KBZIP2 KLZOP LZMA LZ4 XZ ZSTD
export KBUILD_HOSTCXXFLAGS KBUILD_HOSTLDFLAGS KBUILD_HOSTLDLIBS LDFLAGS_MODULE
export KBUILD_USERCFLAGS KBUILD_USERLDFLAGS
export KBUILD_CPPFLAGS NOSTDINC_FLAGS LINUXINCLUDE OBJCOPYFLAGS KBUILD_LDFLAGS
export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE
@@ -672,11 +692,13 @@ drivers-y += virt/
libs-y := lib/
endif # KBUILD_EXTMOD
ifndef KBUILD_MIXED_TREE
# The all: target is the default when no target is given on the
# command line.
# This allow a user to issue only 'make' to build a kernel including modules
# Defaults to vmlinux, but the arch makefile usually adds further targets
all: vmlinux
endif
CFLAGS_GCOV := -fprofile-arcs -ftest-coverage
ifdef CONFIG_CC_IS_GCC
@@ -955,7 +977,13 @@ KBUILD_LDFLAGS += --thinlto-cache-dir=$(extmod_prefix).thinlto-cache
else
CC_FLAGS_LTO := -flto
endif
ifeq ($(SRCARCH),x86)
# Workaround for compiler / linker bug
CC_FLAGS_LTO += -fvisibility=hidden
else
CC_FLAGS_LTO += -fvisibility=default
endif
# Limit inlining across translation units to reduce binary size
KBUILD_LDFLAGS += -mllvm -import-instr-limit=5
@@ -1150,6 +1178,40 @@ export extmod_prefix = $(if $(KBUILD_EXTMOD),$(KBUILD_EXTMOD)/)
export MODORDER := $(extmod_prefix)modules.order
export MODULES_NSDEPS := $(extmod_prefix)modules.nsdeps
# ---------------------------------------------------------------------------
# Kernel headers
PHONY += headers
#Default location for installed headers
ifeq ($(KBUILD_EXTMOD),)
PHONY += archheaders archscripts
hdr-inst := -f $(srctree)/scripts/Makefile.headersinst obj
headers: $(version_h) scripts_unifdef uapi-asm-generic archheaders archscripts
else
hdr-prefix = $(KBUILD_EXTMOD)/
hdr-inst := -f $(srctree)/scripts/Makefile.headersinst dst=$(KBUILD_EXTMOD)/usr/include objtree=$(objtree)/$(KBUILD_EXTMOD) obj
endif
export INSTALL_HDR_PATH = $(objtree)/$(hdr-prefix)usr
quiet_cmd_headers_install = INSTALL $(INSTALL_HDR_PATH)/include
cmd_headers_install = \
mkdir -p $(INSTALL_HDR_PATH); \
rsync -mrl --include='*/' --include='*\.h' --exclude='*' \
$(hdr-prefix)usr/include $(INSTALL_HDR_PATH);
PHONY += headers_install
headers_install: headers
$(call cmd,headers_install)
headers:
ifeq ($(KBUILD_EXTMOD),)
$(if $(filter um, $(SRCARCH)), $(error Headers not exportable for UML))
endif
$(Q)$(MAKE) $(hdr-inst)=$(hdr-prefix)include/uapi
$(Q)$(MAKE) $(hdr-inst)=$(hdr-prefix)arch/$(SRCARCH)/include/uapi
ifeq ($(KBUILD_EXTMOD),)
core-y += kernel/ certs/ mm/ fs/ ipc/ security/ crypto/
core-$(CONFIG_BLOCK) += block/
@@ -1215,8 +1277,10 @@ cmd_link-vmlinux = \
$(CONFIG_SHELL) $< "$(LD)" "$(KBUILD_LDFLAGS)" "$(LDFLAGS_vmlinux)"; \
$(if $(ARCH_POSTLINK), $(MAKE) -f $(ARCH_POSTLINK) $@, true)
ifndef KBUILD_MIXED_TREE
vmlinux: scripts/link-vmlinux.sh autoksyms_recursive $(vmlinux-deps) FORCE
+$(call if_changed_dep,link-vmlinux)
endif
targets := vmlinux
@@ -1225,7 +1289,8 @@ targets := vmlinux
$(sort $(vmlinux-deps) $(subdir-modorder)): descend ;
filechk_kernel.release = \
echo "$(KERNELVERSION)$$($(CONFIG_SHELL) $(srctree)/scripts/setlocalversion $(srctree))"
echo "$(KERNELVERSION)$$($(CONFIG_SHELL) $(srctree)/scripts/setlocalversion \
$(srctree) $(BRANCH) $(KMI_GENERATION))"
# Store (new) KERNELRELEASE string in include/config/kernel.release
include/config/kernel.release: FORCE
@@ -1315,32 +1380,6 @@ headerdep:
$(Q)find $(srctree)/include/ -name '*.h' | xargs --max-args 1 \
$(srctree)/scripts/headerdep.pl -I$(srctree)/include
# ---------------------------------------------------------------------------
# Kernel headers
#Default location for installed headers
export INSTALL_HDR_PATH = $(objtree)/usr
quiet_cmd_headers_install = INSTALL $(INSTALL_HDR_PATH)/include
cmd_headers_install = \
mkdir -p $(INSTALL_HDR_PATH); \
rsync -mrl --include='*/' --include='*\.h' --exclude='*' \
usr/include $(INSTALL_HDR_PATH)
PHONY += headers_install
headers_install: headers
$(call cmd,headers_install)
PHONY += archheaders archscripts
hdr-inst := -f $(srctree)/scripts/Makefile.headersinst obj
PHONY += headers
headers: $(version_h) scripts_unifdef uapi-asm-generic archheaders archscripts
$(if $(filter um, $(SRCARCH)), $(error Headers not exportable for UML))
$(Q)$(MAKE) $(hdr-inst)=include/uapi
$(Q)$(MAKE) $(hdr-inst)=arch/$(SRCARCH)/include/uapi
# Deprecated. It is no-op now.
PHONY += headers_check
headers_check:
@@ -1426,7 +1465,9 @@ kselftest-merge:
# Devicetree files
ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/boot/dts/),)
dtstree := arch/$(SRCARCH)/boot/dts
# ANDROID: allow this to be overridden by the build environment. This allows
# one to compile a device tree that is located out-of-tree.
dtstree ?= arch/$(SRCARCH)/boot/dts
endif
ifneq ($(dtstree),)
@@ -1492,7 +1533,9 @@ endif
# using awk while concatenating to the final file.
PHONY += modules
modules: $(if $(KBUILD_BUILTIN),vmlinux) modules_check modules_prepare
# if KBUILD_BUILTIN && !KBUILD_MIXED_TREE, depend on vmlinux
modules: $(if $(KBUILD_BUILTIN), $(if $(KBUILD_MIXED_TREE),,vmlinux))
modules: modules_check modules_prepare
cmd_modules_order = $(AWK) '!x[$$0]++' $(real-prereqs) > $@
@@ -1537,8 +1580,8 @@ __modinst_pre:
ln -s $(CURDIR) $(MODLIB)/build ; \
fi
@sed 's:^:kernel/:' modules.order > $(MODLIB)/modules.order
@cp -f modules.builtin $(MODLIB)/
@cp -f $(objtree)/modules.builtin.modinfo $(MODLIB)/
@cp -f $(mixed-build-prefix)modules.builtin $(MODLIB)/
@cp -f $(or $(mixed-build-prefix),$(objtree)/)modules.builtin.modinfo $(MODLIB)/
endif # CONFIG_MODULES
@@ -1799,6 +1842,8 @@ help:
@echo ''
@echo ' modules - default target, build the module(s)'
@echo ' modules_install - install the module'
@echo ' headers_install - Install sanitised kernel headers to INSTALL_HDR_PATH'
@echo ' (default: $(abspath $(INSTALL_HDR_PATH)))'
@echo ' clean - remove generated files in module directory only'
@echo ''
@@ -1823,7 +1868,7 @@ modules_check: $(MODORDER)
quiet_cmd_depmod = DEPMOD $(MODLIB)
cmd_depmod = $(CONFIG_SHELL) $(srctree)/scripts/depmod.sh $(DEPMOD) \
$(KERNELRELEASE)
$(KERNELRELEASE) $(mixed-build-prefix)
modules_install:
$(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modinst
@@ -1860,7 +1905,8 @@ ifdef single-build
# .ko is special because modpost is needed
single-ko := $(sort $(filter %.ko, $(MAKECMDGOALS)))
single-no-ko := $(sort $(patsubst %.ko,%.mod, $(MAKECMDGOALS)))
single-no-ko := $(filter-out $(single-ko), $(MAKECMDGOALS)) \
$(foreach x, o mod, $(patsubst %.ko, %.$x, $(single-ko)))
$(single-ko): single_modpost
@:
@@ -1902,7 +1948,7 @@ descend: $(build-dirs)
$(build-dirs): prepare
$(Q)$(MAKE) $(build)=$@ \
single-build=$(if $(filter-out $@/, $(filter $@/%, $(KBUILD_SINGLE_TARGETS))),1) \
need-builtin=1 need-modorder=1
$(if $(KBUILD_MIXED_TREE),,need-builtin=1) need-modorder=1
clean-dirs := $(addprefix _clean_, $(clean-dirs))
PHONY += $(clean-dirs) clean
@@ -1911,12 +1957,14 @@ $(clean-dirs):
clean: $(clean-dirs)
$(call cmd,rmfiles)
@find $(if $(KBUILD_EXTMOD), $(KBUILD_EXTMOD), .) $(RCS_FIND_IGNORE) \
@find $(if $(KBUILD_EXTMOD), $(KBUILD_EXTMOD), .) \
$(if $(filter-out arch/$(SRCARCH)/boot/dts, $(dtstree)), $(dtstree)) \
$(RCS_FIND_IGNORE) \
\( -name '*.[aios]' -o -name '*.ko' -o -name '.*.cmd' \
-o -name '*.ko.*' \
-o -name '*.dtb' -o -name '*.dtbo' -o -name '*.dtb.S' -o -name '*.dt.yaml' \
-o -name '*.dwo' -o -name '*.lst' \
-o -name '*.su' -o -name '*.mod' \
-o -name '*.su' -o -name '*.mod' -o -name '*.usyms' \
-o -name '.*.d' -o -name '.*.tmp' -o -name '*.mod.c' \
-o -name '*.lex.c' -o -name '*.tab.[ch]' \
-o -name '*.asn1.[ch]' \
@@ -2007,7 +2055,8 @@ checkstack:
$(PERL) $(srctree)/scripts/checkstack.pl $(CHECKSTACK_ARCH)
kernelrelease:
@echo "$(KERNELVERSION)$$($(CONFIG_SHELL) $(srctree)/scripts/setlocalversion $(srctree))"
@echo "$(KERNELVERSION)$$($(CONFIG_SHELL) $(srctree)/scripts/setlocalversion \
$(srctree) $(BRANCH) $(KMI_GENERATION))"
kernelversion:
@echo $(KERNELVERSION)

2
OWNERS Normal file
View File

@@ -0,0 +1,2 @@
# include OWNERS from the authoritative android-mainline branch
include kernel/common:android-mainline:/OWNERS

8
android/OWNERS Normal file
View File

@@ -0,0 +1,8 @@
# If we ever add another OWNERS above this directory, it's likely to be
# more permissive, so don't inherit from it
set noparent
include kernel/common:android-mainline:/OWNERS_DrNo
# Downstream boards maintained directly in this manifest branch
per-file abi_gki_aarch64_cuttlefish = adelva@google.com, rammuthiah@google.com
per-file abi_gki_aarch64_goldfish = rkir@google.com

4
android/abi_gki_aarch64 Normal file
View File

@@ -0,0 +1,4 @@
[abi_symbol_list]
# commonly used symbols
module_layout
__put_task_struct

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,517 @@
__cfg80211_alloc_event_skb
__cfg80211_alloc_reply_skb
__cfg80211_radar_event
__cfg80211_send_event_skb
__hci_cmd_send
__hci_cmd_sync
__hci_cmd_sync_ev
__nfc_alloc_vendor_cmd_reply_skb
alloc_can_err_skb
alloc_can_skb
alloc_candev_mqs
alloc_canfd_skb
arc4_crypt
arc4_setkey
baswap
bridge_tunnel_header
bt_accept_dequeue
bt_accept_enqueue
bt_accept_unlink
bt_debugfs
bt_err
bt_err_ratelimited
bt_info
bt_procfs_cleanup
bt_procfs_init
bt_sock_ioctl
bt_sock_link
bt_sock_poll
bt_sock_reclassify_lock
bt_sock_recvmsg
bt_sock_register
bt_sock_stream_recvmsg
bt_sock_unlink
bt_sock_unregister
bt_sock_wait_ready
bt_sock_wait_state
bt_to_errno
bt_warn
bt_warn_ratelimited
btbcm_check_bdaddr
btbcm_finalize
btbcm_initialize
btbcm_patchram
btbcm_read_pcm_int_params
btbcm_set_bdaddr
btbcm_setup_apple
btbcm_setup_patchram
btbcm_write_pcm_int_params
can_bus_off
can_change_mtu
can_change_state
can_fd_dlc2len
can_fd_len2dlc
can_free_echo_skb
can_get_echo_skb
can_get_state_str
can_proto_register
can_proto_unregister
can_put_echo_skb
can_rx_offload_add_fifo
can_rx_offload_add_manual
can_rx_offload_add_timestamp
can_rx_offload_del
can_rx_offload_enable
can_rx_offload_get_echo_skb
can_rx_offload_irq_finish
can_rx_offload_irq_offload_fifo
can_rx_offload_irq_offload_timestamp
can_rx_offload_queue_sorted
can_rx_offload_queue_tail
can_rx_offload_threaded_irq_finish
can_rx_register
can_rx_unregister
can_send
can_skb_get_frame_len
can_sock_destruct
cfg80211_any_usable_channels
cfg80211_assoc_comeback
cfg80211_assoc_failure
cfg80211_auth_timeout
cfg80211_background_cac_abort
cfg80211_bss_color_notify
cfg80211_bss_flush
cfg80211_bss_iter
cfg80211_cac_event
cfg80211_calculate_bitrate
cfg80211_ch_switch_notify
cfg80211_ch_switch_started_notify
cfg80211_chandef_compatible
cfg80211_chandef_create
cfg80211_chandef_dfs_required
cfg80211_chandef_usable
cfg80211_chandef_valid
cfg80211_check_combinations
cfg80211_check_station_change
cfg80211_classify8021d
cfg80211_conn_failed
cfg80211_connect_done
cfg80211_control_port_tx_status
cfg80211_cqm_beacon_loss_notify
cfg80211_cqm_pktloss_notify
cfg80211_cqm_rssi_notify
cfg80211_cqm_txe_notify
cfg80211_crit_proto_stopped
cfg80211_del_sta_sinfo
cfg80211_disconnected
cfg80211_external_auth_request
cfg80211_find_elem_match
cfg80211_find_vendor_elem
cfg80211_free_nan_func
cfg80211_ft_event
cfg80211_get_bss
cfg80211_get_drvinfo
cfg80211_get_iftype_ext_capa
cfg80211_get_p2p_attr
cfg80211_get_station
cfg80211_gtk_rekey_notify
cfg80211_ibss_joined
cfg80211_iftype_allowed
cfg80211_inform_bss_data
cfg80211_inform_bss_frame_data
cfg80211_is_element_inherited
cfg80211_iter_combinations
cfg80211_merge_profile
cfg80211_mgmt_tx_status_ext
cfg80211_michael_mic_failure
cfg80211_nan_func_terminated
cfg80211_nan_match
cfg80211_new_sta
cfg80211_notify_new_peer_candidate
cfg80211_pmksa_candidate_notify
cfg80211_pmsr_complete
cfg80211_pmsr_report
cfg80211_port_authorized
cfg80211_probe_status
cfg80211_put_bss
cfg80211_ready_on_channel
cfg80211_ref_bss
cfg80211_reg_can_beacon
cfg80211_reg_can_beacon_relax
cfg80211_register_netdevice
cfg80211_remain_on_channel_expired
cfg80211_report_obss_beacon_khz
cfg80211_report_wowlan_wakeup
cfg80211_roamed
cfg80211_rx_assoc_resp
cfg80211_rx_control_port
cfg80211_rx_mgmt_ext
cfg80211_rx_mlme_mgmt
cfg80211_rx_spurious_frame
cfg80211_rx_unexpected_4addr_frame
cfg80211_rx_unprot_mlme_mgmt
cfg80211_scan_done
cfg80211_sched_scan_results
cfg80211_sched_scan_stopped
cfg80211_sched_scan_stopped_locked
cfg80211_send_layer2_update
cfg80211_shutdown_all_interfaces
cfg80211_sinfo_alloc_tid_stats
cfg80211_sta_opmode_change_notify
cfg80211_stop_iface
cfg80211_tdls_oper_request
cfg80211_tx_mgmt_expired
cfg80211_tx_mlme_mgmt
cfg80211_unlink_bss
cfg80211_unregister_wdev
cfg80211_update_owe_info_event
cfg80211_vendor_cmd_get_sender
cfg80211_vendor_cmd_reply
close_candev
free_candev
freq_reg_info
get_wiphy_regdom
h4_recv_buf
hci_alloc_dev_priv
hci_cmd_sync
hci_conn_check_secure
hci_conn_security
hci_conn_switch_role
hci_free_dev
hci_get_route
hci_mgmt_chan_register
hci_mgmt_chan_unregister
hci_recv_diag
hci_recv_frame
hci_register_cb
hci_register_dev
hci_release_dev
hci_reset_dev
hci_resume_dev
hci_set_fw_info
hci_set_hw_info
hci_suspend_dev
hci_uart_register_device
hci_uart_tx_wakeup
hci_uart_unregister_device
hci_unregister_cb
hci_unregister_dev
hidp_hid_driver
ieee80211_alloc_hw_nm
ieee80211_amsdu_to_8023s
ieee80211_ap_probereq_get
ieee80211_ave_rssi
ieee80211_beacon_cntdwn_is_complete
ieee80211_beacon_get_template
ieee80211_beacon_get_tim
ieee80211_beacon_loss
ieee80211_beacon_set_cntdwn
ieee80211_beacon_update_cntdwn
ieee80211_bss_get_elem
ieee80211_calc_rx_airtime
ieee80211_calc_tx_airtime
ieee80211_chandef_to_operating_class
ieee80211_channel_to_freq_khz
ieee80211_chswitch_done
ieee80211_color_change_finish
ieee80211_connection_loss
ieee80211_cqm_beacon_loss_notify
ieee80211_cqm_rssi_notify
ieee80211_csa_finish
ieee80211_ctstoself_duration
ieee80211_ctstoself_get
ieee80211_data_to_8023_exthdr
ieee80211_disable_rssi_reports
ieee80211_disconnect
ieee80211_enable_rssi_reports
ieee80211_find_sta
ieee80211_find_sta_by_ifaddr
ieee80211_free_hw
ieee80211_free_txskb
ieee80211_freq_khz_to_channel
ieee80211_generic_frame_duration
ieee80211_get_bssid
ieee80211_get_buffered_bc
ieee80211_get_channel_khz
ieee80211_get_fils_discovery_tmpl
ieee80211_get_hdrlen_from_skb
ieee80211_get_key_rx_seq
ieee80211_get_mesh_hdrlen
ieee80211_get_num_supported_channels
ieee80211_get_response_rate
ieee80211_get_tkip_p1k_iv
ieee80211_get_tkip_p2k
ieee80211_get_tkip_rx_p1k
ieee80211_get_tx_rates
ieee80211_get_unsol_bcast_probe_resp_tmpl
ieee80211_get_vht_max_nss
ieee80211_gtk_rekey_add
ieee80211_gtk_rekey_notify
ieee80211_hdrlen
ieee80211_ie_split_ric
ieee80211_iter_chan_contexts_atomic
ieee80211_iter_keys
ieee80211_iter_keys_rcu
ieee80211_iterate_active_interfaces_atomic
ieee80211_iterate_active_interfaces_mtx
ieee80211_iterate_interfaces
ieee80211_iterate_stations_atomic
ieee80211_key_mic_failure
ieee80211_key_replay
ieee80211_manage_rx_ba_offl
ieee80211_mandatory_rates
ieee80211_mark_rx_ba_filtered_frames
ieee80211_nan_func_match
ieee80211_nan_func_terminated
ieee80211_next_txq
ieee80211_nullfunc_get
ieee80211_operating_class_to_band
ieee80211_parse_p2p_noa
ieee80211_probereq_get
ieee80211_proberesp_get
ieee80211_pspoll_get
ieee80211_queue_delayed_work
ieee80211_queue_stopped
ieee80211_queue_work
ieee80211_radar_detected
ieee80211_radiotap_iterator_init
ieee80211_radiotap_iterator_next
ieee80211_rate_control_register
ieee80211_rate_control_unregister
ieee80211_ready_on_channel
ieee80211_register_hw
ieee80211_remain_on_channel_expired
ieee80211_remove_key
ieee80211_report_low_ack
ieee80211_report_wowlan_wakeup
ieee80211_request_smps
ieee80211_reserve_tid
ieee80211_restart_hw
ieee80211_resume_disconnect
ieee80211_return_txq
ieee80211_rts_duration
ieee80211_rts_get
ieee80211_rx_ba_timer_expired
ieee80211_rx_irqsafe
ieee80211_rx_list
ieee80211_rx_napi
ieee80211_s1g_channel_width
ieee80211_scan_completed
ieee80211_sched_scan_results
ieee80211_sched_scan_stopped
ieee80211_schedule_txq
ieee80211_send_bar
ieee80211_send_eosp_nullfunc
ieee80211_set_key_rx_seq
ieee80211_sta_block_awake
ieee80211_sta_eosp
ieee80211_sta_ps_transition
ieee80211_sta_pspoll
ieee80211_sta_register_airtime
ieee80211_sta_set_buffered
ieee80211_sta_uapsd_trigger
ieee80211_start_tx_ba_cb_irqsafe
ieee80211_start_tx_ba_session
ieee80211_stop_queue
ieee80211_stop_queues
ieee80211_stop_rx_ba_session
ieee80211_stop_tx_ba_cb_irqsafe
ieee80211_stop_tx_ba_session
ieee80211_tdls_oper_request
ieee80211_tkip_add_iv
ieee80211_tx_dequeue
ieee80211_tx_prepare_skb
ieee80211_tx_rate_update
ieee80211_tx_status
ieee80211_tx_status_8023
ieee80211_tx_status_ext
ieee80211_tx_status_irqsafe
ieee80211_txq_airtime_check
ieee80211_txq_get_depth
ieee80211_txq_may_transmit
ieee80211_txq_schedule_start
ieee80211_unregister_hw
ieee80211_unreserve_tid
ieee80211_update_mu_groups
ieee80211_update_p2p_noa
ieee80211_vif_to_wdev
ieee80211_wake_queue
ieee80211_wake_queues
ieee802154_alloc_hw
ieee802154_free_hw
ieee802154_hdr_peek
ieee802154_hdr_peek_addrs
ieee802154_hdr_pull
ieee802154_hdr_push
ieee802154_max_payload
ieee802154_register_hw
ieee802154_rx_irqsafe
ieee802154_stop_queue
ieee802154_unregister_hw
ieee802154_wake_queue
ieee802154_xmit_complete
ieeee80211_obss_color_collision_notify
l2cap_add_psm
l2cap_chan_close
l2cap_chan_connect
l2cap_chan_create
l2cap_chan_del
l2cap_chan_list
l2cap_chan_put
l2cap_chan_send
l2cap_chan_set_defaults
l2cap_conn_get
l2cap_conn_put
l2cap_is_socket
l2cap_register_user
l2cap_unregister_user
l2tp_recv_common
l2tp_session_create
l2tp_session_dec_refcount
l2tp_session_delete
l2tp_session_get
l2tp_session_get_by_ifname
l2tp_session_get_nth
l2tp_session_inc_refcount
l2tp_session_register
l2tp_session_set_header_len
l2tp_sk_to_tunnel
l2tp_tunnel_create
l2tp_tunnel_dec_refcount
l2tp_tunnel_delete
l2tp_tunnel_get
l2tp_tunnel_get_nth
l2tp_tunnel_get_session
l2tp_tunnel_inc_refcount
l2tp_tunnel_register
l2tp_udp_encap_recv
l2tp_xmit_skb
lowpan_header_compress
lowpan_header_decompress
lowpan_nhc_add
lowpan_nhc_del
lowpan_register_netdev
lowpan_register_netdevice
lowpan_unregister_netdev
lowpan_unregister_netdevice
nfc_add_se
nfc_alloc_recv_skb
nfc_allocate_device
nfc_class
nfc_dep_link_is_up
nfc_driver_failure
nfc_find_se
nfc_fw_download_done
nfc_get_local_general_bytes
nfc_proto_register
nfc_proto_unregister
nfc_register_device
nfc_remove_se
nfc_se_connectivity
nfc_se_transaction
nfc_send_to_raw_sock
nfc_set_remote_general_bytes
nfc_target_lost
nfc_targets_found
nfc_tm_activated
nfc_tm_data_received
nfc_tm_deactivated
nfc_unregister_device
nfc_vendor_cmd_reply
of_can_transceiver
open_candev
ppp_channel_index
ppp_dev_name
ppp_input
ppp_input_error
ppp_output_wakeup
ppp_register_channel
ppp_register_compressor
ppp_register_net_channel
ppp_unit_number
ppp_unregister_channel
ppp_unregister_compressor
pppox_compat_ioctl
pppox_ioctl
pppox_unbind_sock
qca_read_soc_version
qca_send_pre_shutdown_cmd
qca_set_bdaddr
qca_set_bdaddr_rome
qca_uart_setup
rate_control_set_rates
reg_initiator_name
reg_query_regdb_wmm
register_candev
register_pppox_proto
regulatory_hint
regulatory_pre_cac_allowed
regulatory_set_wiphy_regd
regulatory_set_wiphy_regd_sync
rfc1042_header
rfkill_alloc
rfkill_blocked
rfkill_destroy
rfkill_find_type
rfkill_get_led_trigger_name
rfkill_init_sw_state
rfkill_pause_polling
rfkill_register
rfkill_resume_polling
rfkill_set_hw_state_reason
rfkill_set_led_trigger_name
rfkill_set_states
rfkill_set_sw_state
rfkill_unregister
safe_candev_priv
slhc_compress
slhc_free
slhc_init
slhc_remember
slhc_toss
slhc_uncompress
tipc_dump_done
tipc_dump_start
tipc_nl_sk_walk
tipc_sk_fill_sock_diag
unregister_candev
unregister_pppox_proto
usb_serial_claim_interface
usb_serial_deregister_drivers
usb_serial_generic_chars_in_buffer
usb_serial_generic_close
usb_serial_generic_get_icount
usb_serial_generic_open
usb_serial_generic_process_read_urb
usb_serial_generic_read_bulk_callback
usb_serial_generic_resume
usb_serial_generic_submit_read_urbs
usb_serial_generic_throttle
usb_serial_generic_tiocmiwait
usb_serial_generic_unthrottle
usb_serial_generic_wait_until_sent
usb_serial_generic_write
usb_serial_generic_write_bulk_callback
usb_serial_generic_write_start
usb_serial_handle_dcd_change
usb_serial_port_softint
usb_serial_register_drivers
usb_serial_resume
usb_serial_suspend
wdev_chandef
wdev_to_ieee80211_vif
wiphy_apply_custom_regulatory
wiphy_free
wiphy_new_nm
wiphy_read_of_freq_limits
wiphy_register
wiphy_rfkill_set_hw_state_reason
wiphy_rfkill_start_polling
wiphy_to_ieee80211_hw
wiphy_unregister
wpan_phy_find
wpan_phy_for_each
wpan_phy_free
wpan_phy_new
wpan_phy_register
wpan_phy_unregister

4
android/abi_gki_rockpi4 Normal file
View File

@@ -0,0 +1,4 @@
[abi_symbol_list]
# commonly used symbols
module_layout
__put_task_struct

View File

@@ -0,0 +1,48 @@
mm/zsmalloc.ko
drivers/block/zram/zram.ko
drivers/net/can/dev/can-dev.ko
drivers/net/can/vcan.ko
drivers/net/can/slcan.ko
drivers/net/ppp/ppp_generic.ko
drivers/net/ppp/bsd_comp.ko
drivers/net/ppp/ppp_deflate.ko
drivers/net/ppp/ppp_mppe.ko
drivers/net/ppp/pppox.ko
drivers/net/ppp/pptp.ko
drivers/net/slip/slhc.ko
drivers/usb/class/cdc-acm.ko
drivers/usb/serial/usbserial.ko
drivers/usb/serial/ftdi_sio.ko
drivers/bluetooth/hci_uart.ko
drivers/bluetooth/btsdio.ko
drivers/bluetooth/btbcm.ko
drivers/bluetooth/btqca.ko
net/8021q/8021q.ko
net/wireless/cfg80211.ko
net/can/can.ko
net/can/can-raw.ko
net/can/can-bcm.ko
net/can/can-gw.ko
net/bluetooth/bluetooth.ko
net/bluetooth/rfcomm/rfcomm.ko
net/bluetooth/hidp/hidp.ko
net/l2tp/l2tp_core.ko
net/l2tp/l2tp_ppp.ko
net/mac80211/mac80211.ko
net/tipc/tipc.ko
net/tipc/diag.ko
net/rfkill/rfkill.ko
net/6lowpan/6lowpan.ko
net/6lowpan/nhc_dest.ko
net/6lowpan/nhc_fragment.ko
net/6lowpan/nhc_hop.ko
net/6lowpan/nhc_ipv6.ko
net/6lowpan/nhc_mobility.ko
net/6lowpan/nhc_routing.ko
net/6lowpan/nhc_udp.ko
net/ieee802154/6lowpan/ieee802154_6lowpan.ko
net/ieee802154/ieee802154.ko
net/ieee802154/ieee802154_socket.ko
net/mac802154/mac802154.ko
net/nfc/nfc.ko
lib/crypto/libarc4.ko

View File

@@ -0,0 +1,47 @@
drivers/bluetooth/btbcm.ko
drivers/bluetooth/btqca.ko
drivers/bluetooth/btsdio.ko
drivers/bluetooth/hci_uart.ko
drivers/net/can/dev/can-dev.ko
drivers/net/can/slcan.ko
drivers/net/can/vcan.ko
drivers/net/ppp/bsd_comp.ko
drivers/net/ppp/ppp_deflate.ko
drivers/net/ppp/ppp_generic.ko
drivers/net/ppp/ppp_mppe.ko
drivers/net/ppp/pppox.ko
drivers/net/ppp/pptp.ko
drivers/net/slip/slhc.ko
drivers/usb/class/cdc-acm.ko
drivers/usb/serial/ftdi_sio.ko
drivers/usb/serial/usbserial.ko
lib/crypto/libarc4.ko
net/6lowpan/6lowpan.ko
net/6lowpan/nhc_dest.ko
net/6lowpan/nhc_fragment.ko
net/6lowpan/nhc_hop.ko
net/6lowpan/nhc_ipv6.ko
net/6lowpan/nhc_mobility.ko
net/6lowpan/nhc_routing.ko
net/6lowpan/nhc_udp.ko
net/8021q/8021q.ko
net/bluetooth/bluetooth.ko
net/bluetooth/hidp/hidp.ko
net/bluetooth/rfcomm/rfcomm.ko
net/can/can.ko
net/can/can-bcm.ko
net/can/can-gw.ko
net/can/can-raw.ko
net/ieee802154/6lowpan/ieee802154_6lowpan.ko
net/ieee802154/ieee802154.ko
net/ieee802154/ieee802154_socket.ko
net/l2tp/l2tp_core.ko
net/l2tp/l2tp_ppp.ko
net/mac80211/mac80211.ko
net/mac802154/mac802154.ko
net/nfc/nfc.ko
net/rfkill/rfkill.ko
net/tipc/diag.ko
net/tipc/tipc.ko
net/wireless/cfg80211.ko

View File

@@ -0,0 +1,49 @@
drivers/block/zram/zram.ko
drivers/bluetooth/btbcm.ko
drivers/bluetooth/btqca.ko
drivers/bluetooth/btsdio.ko
drivers/bluetooth/hci_uart.ko
drivers/net/can/dev/can-dev.ko
drivers/net/can/slcan.ko
drivers/net/can/vcan.ko
drivers/net/ppp/bsd_comp.ko
drivers/net/ppp/ppp_deflate.ko
drivers/net/ppp/ppp_generic.ko
drivers/net/ppp/ppp_mppe.ko
drivers/net/ppp/pppox.ko
drivers/net/ppp/pptp.ko
drivers/net/slip/slhc.ko
drivers/usb/class/cdc-acm.ko
drivers/usb/serial/ftdi_sio.ko
drivers/usb/serial/usbserial.ko
lib/crypto/libarc4.ko
mm/zsmalloc.ko
net/6lowpan/6lowpan.ko
net/6lowpan/nhc_dest.ko
net/6lowpan/nhc_fragment.ko
net/6lowpan/nhc_hop.ko
net/6lowpan/nhc_ipv6.ko
net/6lowpan/nhc_mobility.ko
net/6lowpan/nhc_routing.ko
net/6lowpan/nhc_udp.ko
net/8021q/8021q.ko
net/bluetooth/bluetooth.ko
net/bluetooth/hidp/hidp.ko
net/bluetooth/rfcomm/rfcomm.ko
net/can/can.ko
net/can/can-bcm.ko
net/can/can-gw.ko
net/can/can-raw.ko
net/ieee802154/6lowpan/ieee802154_6lowpan.ko
net/ieee802154/ieee802154.ko
net/ieee802154/ieee802154_socket.ko
net/l2tp/l2tp_core.ko
net/l2tp/l2tp_ppp.ko
net/mac80211/mac80211.ko
net/mac802154/mac802154.ko
net/nfc/nfc.ko
net/rfkill/rfkill.ko
net/tipc/diag.ko
net/tipc/tipc.ko
net/wireless/cfg80211.ko

View File

@@ -24,6 +24,13 @@ config KEXEC_ELF
config HAVE_IMA_KEXEC
bool
config ARCH_HAS_SUBPAGE_FAULTS
bool
help
Select if the architecture can check permissions at sub-page
granularity (e.g. arm64 MTE). The probe_user_*() functions
must be implemented.
config SET_FS
bool
@@ -713,10 +720,7 @@ config ARCH_SUPPORTS_CFI_CLANG
config CFI_CLANG
bool "Use Clang's Control Flow Integrity (CFI)"
depends on LTO_CLANG && ARCH_SUPPORTS_CFI_CLANG
# Clang >= 12:
# - https://bugs.llvm.org/show_bug.cgi?id=46258
# - https://bugs.llvm.org/show_bug.cgi?id=47479
depends on CLANG_VERSION >= 120000
depends on CLANG_VERSION >= 140000
select KALLSYMS
help
This option enables Clangs forward-edge Control Flow Integrity
@@ -1238,6 +1242,9 @@ config RELR
config ARCH_HAS_MEM_ENCRYPT
bool
config ARCH_HAS_MEM_RELINQUISH
bool
config ARCH_HAS_CC_PLATFORM
bool
@@ -1295,6 +1302,17 @@ config ARCH_HAS_ELFCORE_COMPAT
config ARCH_HAS_PARANOID_L1D_FLUSH
bool
config ARCH_HAVE_TRACE_MMIO_ACCESS
bool
config ARCH_HAS_NONLEAF_PMD_YOUNG
bool
help
Architectures that select this option are capable of setting the
accessed bit in non-leaf PMD entries when using them as part of linear
address translations. Page table walkers that clear the accessed bit
may use this capability to reduce their search space.
source "kernel/gcov/Kconfig"
source "scripts/gcc-plugins/Kconfig"

View File

@@ -54,6 +54,7 @@ config ARM
select GENERIC_ATOMIC64 if CPU_V7M || CPU_V6 || !CPU_32v6K || !AEABI
select GENERIC_CLOCKEVENTS_BROADCAST if SMP
select GENERIC_IRQ_IPI if SMP
select ARCH_WANTS_IRQ_RAW if GENERIC_IRQ_IPI
select GENERIC_CPU_AUTOPROBE
select GENERIC_EARLY_IOREMAP
select GENERIC_IDLE_POLL_SETUP

1
arch/arm/OWNERS Normal file
View File

@@ -0,0 +1 @@
include ../arm64/OWNERS

View File

@@ -77,10 +77,10 @@ CPPFLAGS_vmlinux.lds += -DTEXT_OFFSET="$(TEXT_OFFSET)"
CPPFLAGS_vmlinux.lds += -DMALLOC_SIZE="$(MALLOC_SIZE)"
compress-$(CONFIG_KERNEL_GZIP) = gzip
compress-$(CONFIG_KERNEL_LZO) = lzo
compress-$(CONFIG_KERNEL_LZMA) = lzma
compress-$(CONFIG_KERNEL_XZ) = xzkern
compress-$(CONFIG_KERNEL_LZ4) = lz4
compress-$(CONFIG_KERNEL_LZO) = lzo_with_size
compress-$(CONFIG_KERNEL_LZMA) = lzma_with_size
compress-$(CONFIG_KERNEL_XZ) = xzkern_with_size
compress-$(CONFIG_KERNEL_LZ4) = lz4_with_size
libfdt_objs := fdt_rw.o fdt_ro.o fdt_wip.o fdt.o

View File

@@ -6,5 +6,6 @@
void kvm_init_hyp_services(void);
bool kvm_arm_hyp_service_available(u32 func_id);
void kvm_arm_init_hyp_services(void);
#endif

View File

@@ -62,14 +62,8 @@ user_backtrace(struct frame_tail __user *tail,
void
perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs)
{
struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
struct frame_tail __user *tail;
if (guest_cbs && guest_cbs->is_in_guest()) {
/* We don't support guest os callchain now */
return;
}
perf_callchain_store(entry, regs->ARM_pc);
if (!current->mm)
@@ -99,44 +93,25 @@ callchain_trace(struct stackframe *fr,
void
perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs)
{
struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
struct stackframe fr;
if (guest_cbs && guest_cbs->is_in_guest()) {
/* We don't support guest os callchain now */
return;
}
arm_get_current_stackframe(regs, &fr);
walk_stackframe(&fr, callchain_trace, entry);
}
unsigned long perf_instruction_pointer(struct pt_regs *regs)
{
struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
if (guest_cbs && guest_cbs->is_in_guest())
return guest_cbs->get_guest_ip();
return instruction_pointer(regs);
}
unsigned long perf_misc_flags(struct pt_regs *regs)
{
struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
int misc = 0;
if (guest_cbs && guest_cbs->is_in_guest()) {
if (guest_cbs->is_user_mode())
misc |= PERF_RECORD_MISC_GUEST_USER;
else
misc |= PERF_RECORD_MISC_GUEST_KERNEL;
} else {
if (user_mode(regs))
misc |= PERF_RECORD_MISC_USER;
else
misc |= PERF_RECORD_MISC_KERNEL;
}
if (user_mode(regs))
misc |= PERF_RECORD_MISC_USER;
else
misc |= PERF_RECORD_MISC_KERNEL;
return misc;
}

View File

@@ -51,6 +51,10 @@
#define CREATE_TRACE_POINTS
#include <trace/events/ipi.h>
EXPORT_TRACEPOINT_SYMBOL_GPL(ipi_raise);
EXPORT_TRACEPOINT_SYMBOL_GPL(ipi_entry);
EXPORT_TRACEPOINT_SYMBOL_GPL(ipi_exit);
/*
* as from 2.5, kernels no longer have an init_tasks structure
* so we need some other way of telling a new secondary core
@@ -727,7 +731,12 @@ void __init set_smp_ipi_range(int ipi_base, int n)
WARN_ON(err);
ipi_desc[i] = irq_to_desc(ipi_base + i);
irq_set_status_flags(ipi_base + i, IRQ_HIDDEN);
if (i != IPI_RESCHEDULE)
irq_set_status_flags(ipi_base + i, IRQ_HIDDEN);
else
/* The recheduling IPI is special... */
irq_set_status_flags(ipi_base + i, IRQ_HIDDEN|IRQ_RAW);
}
ipi_irq_base = ipi_base;

View File

@@ -32,7 +32,7 @@ pmd_t tmp_pmd_table[PTRS_PER_PMD] __page_aligned_bss;
static __init void *kasan_alloc_block(size_t size)
{
return memblock_alloc_try_nid(size, size, __pa(MAX_DMA_ADDRESS),
MEMBLOCK_ALLOC_KASAN, NUMA_NO_NODE);
MEMBLOCK_ALLOC_NOLEAKTRACE, NUMA_NO_NODE);
}
static void __init kasan_pte_populate(pmd_t *pmdp, unsigned long addr,

View File

@@ -10,6 +10,7 @@ config ARM64
select ACPI_SPCR_TABLE if ACPI
select ACPI_PPTT if ACPI
select ARCH_HAS_DEBUG_WX
select ARCH_BINFMT_ELF_EXTRA_PHDRS
select ARCH_BINFMT_ELF_STATE
select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION
select ARCH_ENABLE_MEMORY_HOTPLUG
@@ -25,9 +26,12 @@ config ARM64
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_GIGANTIC_PAGE
select ARCH_HAS_IOREMAP_PHYS_HOOKS
select ARCH_HAS_KCOV
select ARCH_HAS_KEEPINITRD
select ARCH_HAS_MEMBARRIER_SYNC_CORE
select ARCH_HAS_MEM_ENCRYPT
select ARCH_HAS_MEM_RELINQUISH
select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
select ARCH_HAS_PTE_DEVMAP
select ARCH_HAS_PTE_SPECIAL
@@ -45,6 +49,7 @@ config ARM64
select ARCH_HAS_ZONE_DMA_SET if EXPERT
select ARCH_HAVE_ELF_PROT
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select ARCH_HAVE_TRACE_MMIO_ACCESS
select ARCH_INLINE_READ_LOCK if !PREEMPTION
select ARCH_INLINE_READ_LOCK_BH if !PREEMPTION
select ARCH_INLINE_READ_LOCK_IRQ if !PREEMPTION
@@ -122,6 +127,7 @@ config ARM64
select GENERIC_FIND_FIRST_BIT
select GENERIC_IDLE_POLL_SETUP
select GENERIC_IRQ_IPI
select ARCH_WANTS_IRQ_RAW
select GENERIC_IRQ_PROBE
select GENERIC_IRQ_SHOW
select GENERIC_IRQ_SHOW_LEVEL
@@ -135,6 +141,7 @@ config ARM64
select GENERIC_VDSO_TIME_NS
select HANDLE_DOMAIN_IRQ
select HARDIRQS_SW_RESEND
select HAVE_MOD_ARCH_SPECIFIC if (ARM64_MODULE_PLTS || KVM)
select HAVE_MOVE_PMD
select HAVE_MOVE_PUD
select HAVE_PCI
@@ -185,6 +192,7 @@ config ARM64
select HAVE_GCC_PLUGINS
select HAVE_HW_BREAKPOINT if PERF_EVENTS
select HAVE_IRQ_TIME_ACCOUNTING
select HAVE_KVM
select HAVE_NMI
select HAVE_PATA_PLATFORM
select HAVE_PERF_EVENTS
@@ -203,7 +211,7 @@ config ARM64
select IOMMU_DMA if IOMMU_SUPPORT
select IRQ_DOMAIN
select IRQ_FORCED_THREADING
select KASAN_VMALLOC if KASAN_GENERIC
select KASAN_VMALLOC if KASAN
select MODULES_USE_ELF_RELA
select NEED_DMA_MAP_STATE
select NEED_SG_DMA_LENGTH
@@ -221,6 +229,7 @@ config ARM64
select HAVE_ARCH_USERFAULTFD_MINOR if USERFAULTFD
select TRACE_IRQFLAGS_SUPPORT
select TRACE_IRQFLAGS_NMI_SUPPORT
select ARCH_SUPPORTS_SPECULATIVE_PAGE_FAULT
help
ARM 64-bit (AArch64) Linux support.
@@ -699,6 +708,130 @@ config ARM64_ERRATUM_1508412
If unsure, say Y.
config ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE
bool
config ARM64_ERRATUM_2658417
bool "Cortex-A510: 2658417: remove BF16 support due to incorrect result"
default y
help
This option adds the workaround for ARM Cortex-A510 erratum 2658417.
Affected Cortex-A510 (r0p0 to r1p1) may produce the wrong result for
BFMMLA or VMMLA instructions in rare circumstances when a pair of
A510 CPUs are using shared neon hardware. As the sharing is not
discoverable by the kernel, hide the BF16 HWCAP to indicate that
user-space should not be using these instructions.
If unsure, say Y.
config ARM64_ERRATUM_2119858
bool "Cortex-A710: 2119858: workaround TRBE overwriting trace data in FILL mode"
default y
depends on CORESIGHT_TRBE
select ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE
help
This option adds the workaround for ARM Cortex-A710 erratum 2119858.
Affected Cortex-A710 cores could overwrite up to 3 cache lines of trace
data at the base of the buffer (pointed to by TRBASER_EL1) in FILL mode in
the event of a WRAP event.
Work around the issue by always making sure we move the TRBPTR_EL1 by
256 bytes before enabling the buffer and filling the first 256 bytes of
the buffer with ETM ignore packets upon disabling.
If unsure, say Y.
config ARM64_ERRATUM_2139208
bool "Neoverse-N2: 2139208: workaround TRBE overwriting trace data in FILL mode"
default y
depends on CORESIGHT_TRBE
select ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE
help
This option adds the workaround for ARM Neoverse-N2 erratum 2139208.
Affected Neoverse-N2 cores could overwrite up to 3 cache lines of trace
data at the base of the buffer (pointed to by TRBASER_EL1) in FILL mode in
the event of a WRAP event.
Work around the issue by always making sure we move the TRBPTR_EL1 by
256 bytes before enabling the buffer and filling the first 256 bytes of
the buffer with ETM ignore packets upon disabling.
If unsure, say Y.
config ARM64_WORKAROUND_TSB_FLUSH_FAILURE
bool
config ARM64_ERRATUM_2054223
bool "Cortex-A710: 2054223: workaround TSB instruction failing to flush trace"
default y
select ARM64_WORKAROUND_TSB_FLUSH_FAILURE
help
Enable workaround for ARM Cortex-A710 erratum 2054223
Affected cores may fail to flush the trace data on a TSB instruction, when
the PE is in trace prohibited state. This will cause losing a few bytes
of the trace cached.
Workaround is to issue two TSB consecutively on affected cores.
If unsure, say Y.
config ARM64_ERRATUM_2067961
bool "Neoverse-N2: 2067961: workaround TSB instruction failing to flush trace"
default y
select ARM64_WORKAROUND_TSB_FLUSH_FAILURE
help
Enable workaround for ARM Neoverse-N2 erratum 2067961
Affected cores may fail to flush the trace data on a TSB instruction, when
the PE is in trace prohibited state. This will cause losing a few bytes
of the trace cached.
Workaround is to issue two TSB consecutively on affected cores.
If unsure, say Y.
config ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE
bool
config ARM64_ERRATUM_2253138
bool "Neoverse-N2: 2253138: workaround TRBE writing to address out-of-range"
depends on CORESIGHT_TRBE
default y
select ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE
help
This option adds the workaround for ARM Neoverse-N2 erratum 2253138.
Affected Neoverse-N2 cores might write to an out-of-range address, not reserved
for TRBE. Under some conditions, the TRBE might generate a write to the next
virtually addressed page following the last page of the TRBE address space
(i.e., the TRBLIMITR_EL1.LIMIT), instead of wrapping around to the base.
Work around this in the driver by always making sure that there is a
page beyond the TRBLIMITR_EL1.LIMIT, within the space allowed for the TRBE.
If unsure, say Y.
config ARM64_ERRATUM_2224489
bool "Cortex-A710: 2224489: workaround TRBE writing to address out-of-range"
depends on CORESIGHT_TRBE
default y
select ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE
help
This option adds the workaround for ARM Cortex-A710 erratum 2224489.
Affected Cortex-A710 cores might write to an out-of-range address, not reserved
for TRBE. Under some conditions, the TRBE might generate a write to the next
virtually addressed page following the last page of the TRBE address space
(i.e., the TRBLIMITR_EL1.LIMIT), instead of wrapping around to the base.
Work around this in the driver by always making sure that there is a
page beyond the TRBLIMITR_EL1.LIMIT, within the space allowed for the TRBE.
If unsure, say Y.
config ARM64_ERRATUM_2441009
bool "Cortex-A510: Completion of affected memory accesses might not be guaranteed by completion of a TLBI"
default y
@@ -1639,6 +1772,21 @@ config ARM64_TLB_RANGE
The feature introduces new assembly instructions, and they were
support when binutils >= 2.30.
config ARM64_MPAM
bool "Enable support for MPAM"
help
Memory Partitioning and Monitoring is an optional extension
that allows the CPUs to mark load and store transactions with
labels for partition-id and performance-monitoring-group.
System components, such as the caches, can use the partition-id
to apply a performance policy. MPAM monitors can use the
partition-id and performance-monitoring-group to measure the
cache occupancy or data throughput.
Use of this extension requires CPU support, support in the
memory system components (MSC), and a description from firmware
of where the MSC are in the address space.
endmenu
menu "ARMv8.5 architectural features"
@@ -1727,6 +1875,7 @@ config ARM64_MTE
depends on AS_HAS_LSE_ATOMICS
# Required for tag checking in the uaccess routines
depends on ARM64_PAN
select ARCH_HAS_SUBPAGE_FAULTS
select ARCH_USES_HIGH_VMA_FLAGS
help
Memory Tagging (part of the ARMv8.5 Extensions) provides
@@ -1798,7 +1947,6 @@ config ARM64_SVE
config ARM64_MODULE_PLTS
bool "Use PLTs to allow module memory to spill over into vmalloc area"
depends on MODULES
select HAVE_MOD_ARCH_SPECIFIC
help
Allocate PLTs when loading modules so that jumps and calls whose
targets are too far away for their relative offsets to be encoded
@@ -1932,6 +2080,12 @@ config CMDLINE_FROM_BOOTLOADER
the boot loader doesn't provide any, the default kernel command
string provided in CMDLINE will be used.
config CMDLINE_EXTEND
bool "Extend bootloader kernel arguments"
help
The command-line arguments provided by the boot loader will be
appended to the default kernel command string.
config CMDLINE_FORCE
bool "Always use the default kernel command string"
help

View File

@@ -167,7 +167,6 @@ config ARCH_MEDIATEK
config ARCH_MESON
bool "Amlogic Platforms"
select COMMON_CLK
select MESON_IRQ_GPIO
help
This enables support for the arm64 based Amlogic SoCs
such as the s905, S905X/D, S912, A113X/D or S905X/D2

View File

@@ -148,7 +148,10 @@ libs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a
boot := arch/arm64/boot
KBUILD_IMAGE := $(boot)/Image.gz
# Don't compile Image in mixed build with "all" target
ifndef KBUILD_MIXED_TREE
all: Image.gz
endif
Image: vmlinux
@@ -189,6 +192,11 @@ archclean:
$(Q)$(MAKE) $(clean)=arch/arm64/kernel/vdso
$(Q)$(MAKE) $(clean)=arch/arm64/kernel/vdso32
ifeq ($(CONFIG_KVM),y)
archscripts:
$(Q)$(MAKE) $(build)=arch/arm64/tools gen-hyprel
endif
ifeq ($(KBUILD_EXTMOD),)
# We need to generate vdso-offsets.h before compiling certain files in kernel/.
# In order to do that, we should use the archprepare target, but we can't since

View File

@@ -0,0 +1,49 @@
# SPDX-License-Identifier: GPL-2.0
#
# This file is included by the generic Kbuild makefile to permit the
# architecture to perform postlink actions on vmlinux and any .ko module file.
# In this case, we only need it for fips140.ko, which needs some postprocessing
# for the integrity check mandated by FIPS. This involves making copies of the
# relocation sections so that the module will have access to them at
# initialization time, and calculating and injecting a HMAC digest into the
# module. All other targets are NOPs.
#
PHONY := __archpost
__archpost:
-include include/config/auto.conf
include scripts/Kbuild.include
CMD_FIPS140_GEN_HMAC = crypto/fips140_gen_hmac
quiet_cmd_gen_hmac = HMAC $@
cmd_gen_hmac = $(OBJCOPY) $@ \
--dump-section=$(shell $(READELF) -SW $@|grep -Eo '\.rela\.text\S*')=$@.rela.text \
--dump-section=$(shell $(READELF) -SW $@|grep -Eo '\.rela\.rodata\S*')=$@.rela.rodata && \
$(OBJCOPY) $@ \
--add-section=.init.rela.text=$@.rela.text \
--add-section=.init.rela.rodata=$@.rela.rodata \
--set-section-flags=.init.rela.text=alloc,readonly \
--set-section-flags=.init.rela.rodata=alloc,readonly && \
$(CMD_FIPS140_GEN_HMAC) $@
# `@true` prevents complaints when there is nothing to be done
vmlinux: FORCE
@true
$(objtree)/crypto/fips140.ko: FORCE
$(call cmd,gen_hmac)
%.ko: FORCE
@true
clean:
rm -f $(objtree)/crypto/fips140.ko.rela.*
PHONY += FORCE clean
FORCE:
.PHONY: $(PHONY)

4
arch/arm64/OWNERS Normal file
View File

@@ -0,0 +1,4 @@
per-file crypto/**=file:/crypto/OWNERS
per-file {include,kernel,kvm,lib}/**=mzyngier@google.com,willdeacon@google.com
per-file mm/**=file:/mm/OWNERS
per-file net/**=file:/net/OWNERS

View File

@@ -1,12 +1,14 @@
# SPDX-License-Identifier: GPL-2.0
dtb-$(CONFIG_ARCH_MESON) += meson-axg-s400.dtb
dtb-$(CONFIG_ARCH_MESON) += meson-g12a-sei510.dtb
dtb-$(CONFIG_ARCH_MESON) += meson-g12a-sei510-android.dtb
dtb-$(CONFIG_ARCH_MESON) += meson-g12a-u200.dtb
dtb-$(CONFIG_ARCH_MESON) += meson-g12a-x96-max.dtb
dtb-$(CONFIG_ARCH_MESON) += meson-g12b-gsking-x.dtb
dtb-$(CONFIG_ARCH_MESON) += meson-g12b-gtking.dtb
dtb-$(CONFIG_ARCH_MESON) += meson-g12b-gtking-pro.dtb
dtb-$(CONFIG_ARCH_MESON) += meson-g12b-a311d-khadas-vim3.dtb
dtb-$(CONFIG_ARCH_MESON) += meson-g12b-a311d-khadas-vim3-android.dtb
dtb-$(CONFIG_ARCH_MESON) += meson-g12b-s922x-khadas-vim3.dtb
dtb-$(CONFIG_ARCH_MESON) += meson-g12b-odroid-n2.dtb
dtb-$(CONFIG_ARCH_MESON) += meson-g12b-odroid-n2-plus.dtb
@@ -50,7 +52,9 @@ dtb-$(CONFIG_ARCH_MESON) += meson-gxm-vega-s96.dtb
dtb-$(CONFIG_ARCH_MESON) += meson-gxm-wetek-core2.dtb
dtb-$(CONFIG_ARCH_MESON) += meson-sm1-bananapi-m5.dtb
dtb-$(CONFIG_ARCH_MESON) += meson-sm1-khadas-vim3l.dtb
dtb-$(CONFIG_ARCH_MESON) += meson-sm1-khadas-vim3l-android.dtb
dtb-$(CONFIG_ARCH_MESON) += meson-sm1-odroid-c4.dtb
dtb-$(CONFIG_ARCH_MESON) += meson-sm1-odroid-hc4.dtb
dtb-$(CONFIG_ARCH_MESON) += meson-sm1-sei610.dtb
dtb-$(CONFIG_ARCH_MESON) += meson-sm1-sei610-android.dtb
dtb-$(CONFIG_ARCH_MESON) += meson-a1-ad401.dtb

View File

@@ -0,0 +1,58 @@
// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
/*
* Copyright (c) 2020 BayLibre SAS. All rights reserved.
*/
/dts-v1/;
/plugin/;
#include <dt-bindings/gpio/gpio.h>
#include <dt-bindings/gpio/meson-g12a-gpio.h>
#include <dt-bindings/input/input.h>
#include <dt-bindings/interrupt-controller/irq.h>
#include <dt-bindings/interrupt-controller/arm-gic.h>
/ {
compatible = "seirobotics,sei510", "amlogic,g12a";
model = "SEI Robotics SEI510";
fragment@101 {
target-path = "/";
__overlay__ {
reserved-memory {
#address-cells = <2>;
#size-cells = <2>;
ramoops@d000000 {
compatible = "ramoops";
reg = <0x0 0x0d000000 0x0 0x00100000>;
record-size = <0x8000>;
console-size = <0x8000>;
ftrace-size = <0x0>;
pmsg-size = <0x8000>;
};
};
adc_keys {
button-onoff {
linux,code = <BTN_0>;
};
};
cvbs-connector {
status = "disabled";
};
};
};
};
&vddao_3v3_t {
gpio-open-drain;
};
&uart_A {
bluetooth {
interrupt-parent = <&gpio_intc>;
interrupts = <95 IRQ_TYPE_LEVEL_HIGH>;
interrupt-names = "host-wakeup";
};
};

View File

@@ -0,0 +1,55 @@
// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
/*
* Copyright (c) 2019 BayLibre SAS. All rights reserved.
*/
/dts-v1/;
/plugin/;
#include <dt-bindings/phy/phy.h>
#include <dt-bindings/gpio/gpio.h>
#include <dt-bindings/gpio/meson-g12a-gpio.h>
#include <dt-bindings/input/input.h>
#include <dt-bindings/interrupt-controller/irq.h>
/ {
compatible = "khadas,vim3", "amlogic,a311d", "amlogic,g12b";
model = "Khadas VIM3";
fragment@101 {
target-path = "/";
__overlay__ {
reserved-memory {
#address-cells = <2>;
#size-cells = <2>;
ramoops@d000000 {
compatible = "ramoops";
reg = <0x0 0x0d000000 0x0 0x00100000>;
record-size = <0x8000>;
console-size = <0x8000>;
ftrace-size = <0x0>;
pmsg-size = <0x8000>;
};
};
};
};
};
&vcc_5v {
gpio-open-drain;
};
&uart_C {
status = "okay";
pinctrl-0 = <&uart_c_pins>;
pinctrl-names = "default";
};
&emmc_pwrseq{
status = "okay";
};
&sd_emmc_a {
/* WiFi firmware requires power to be kept while in suspend */
keep-power-in-suspend;
};

View File

@@ -0,0 +1,133 @@
// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
/*
* Copyright (c) 2019 BayLibre SAS. All rights reserved.
*/
/dts-v1/;
/plugin/;
#include <dt-bindings/phy/phy.h>
#include <dt-bindings/gpio/gpio.h>
#include <dt-bindings/gpio/meson-g12a-gpio.h>
#include <dt-bindings/input/input.h>
#include <dt-bindings/interrupt-controller/irq.h>
/ {
compatible = "khadas,vim3l", "amlogic,sm1";
model = "Khadas VIM3L";
fragment@101 {
target-path = "/";
__overlay__ {
reserved-memory {
#address-cells = <2>;
#size-cells = <2>;
ramoops@d000000 {
compatible = "ramoops";
reg = <0x0 0x0d000000 0x0 0x00100000>;
record-size = <0x8000>;
console-size = <0x8000>;
ftrace-size = <0x0>;
pmsg-size = <0x8000>;
};
};
};
};
};
&vcc_5v {
gpio-open-drain;
};
&uart_A {
bluetooth {
interrupt-parent = <&gpio_intc>;
interrupts = <95 IRQ_TYPE_LEVEL_HIGH>;
interrupt-names = "host-wakeup";
};
};
&uart_C {
status = "disabled";
pinctrl-0 = <&uart_c_pins>;
pinctrl-names = "default";
};
&emmc_pwrseq{
status = "okay";
};
&sd_emmc_a {
/* WiFi firmware requires power to be kept while in suspend */
keep-power-in-suspend;
};
&spicc1 {
status = "okay";
pinctrl-names = "default";
pinctrl-0 = <&spicc1_pins>;
cs-gpios = <&gpio GPIOH_6 GPIO_ACTIVE_LOW>;
#address-cells = <1>;
#size-cells = <0>;
spidev@0 {
compatible = "rohm,dh2228fv";
reg = <0>;
spi-max-frequency = <500000>;
status = "okay";
};
neonkey@0 {
compatible = "nanohub";
reg = <0>;
spi-max-frequency = <500000>;
sensorhub,nreset-gpio = <&gpio GPIOA_0 0>;
sensorhub,boot0-gpio = <&gpio GPIOA_3 0>; /* Fake */
sensorhub,wakeup-gpio = <&gpio GPIOA_2 0>; /* A2 -> PB9 */
sensorhub,irq1-gpio = <&gpio GPIOA_1 0>; /* A1 -> PB5 */
interrupt-parent = <&gpio_intc>;
interrupts = <62 IRQ_TYPE_EDGE_RISING>; /* A1 */
/* sensorhub,spi-cs-gpio = <&gpio GPIOH_6 GPIO_ACTIVE_LOW>; Optional */
sensorhub,bl-addr = <0x08000000>;
sensorhub,kernel-addr = <0x0800C000>;
sensorhub,shared-addr = <0x08040000>;
sensorhub,flash-banks = <0 0x08000000 0x04000>,
<3 0x0800C000 0x04000>,
<4 0x08010000 0x10000>,
<5 0x08020000 0x20000>,
<6 0x08040000 0x20000>,
<7 0x08060000 0x20000>;
sensorhub,num-flash-banks = <6>;
status = "disabled";
};
argonkey@0 {
compatible = "nanohub";
reg = <0>;
spi-max-frequency = <500000>;
spi-cpol;
sensorhub,nreset-gpio = <&gpio GPIOA_0 0>;
sensorhub,boot0-gpio = <&gpio GPIOA_3 0>;
sensorhub,wakeup-gpio = <&gpio GPIOA_1 0>; /* A1 -> PA0 */
sensorhub,irq1-gpio = <&gpio GPIOA_2 0>; /* A2 -> PA1 */
interrupt-parent = <&gpio_intc>;
interrupts = <63 IRQ_TYPE_EDGE_RISING>; /* A2 */
sensorhub,bl-addr = <0x08000000>;
sensorhub,kernel-addr = <0x0800C000>;
sensorhub,num-flash-banks = <4>;
sensorhub,flash-banks = <0 0x08000000 0x04000>,
<3 0x0800C000 0x04000>,
<4 0x08010000 0x10000>,
<5 0x08020000 0x20000>;
sensorhub,shared-addr = <0x08040000>;
sensorhub,num-shared-flash-banks = <6>;
sensorhub,shared-flash-banks = <6 0x08040000 0x20000>,
<7 0x08060000 0x20000>,
<8 0x08080000 0x20000>,
<9 0x080A0000 0x20000>,
<10 0x080C0000 0x20000>,
<11 0x080E0000 0x20000>;
status = "disabled";
};
};

View File

@@ -0,0 +1,71 @@
// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
/*
* Copyright (c) 2020 BayLibre SAS. All rights reserved.
*/
/dts-v1/;
/plugin/;
#include <dt-bindings/gpio/gpio.h>
#include <dt-bindings/gpio/meson-g12a-gpio.h>
#include <dt-bindings/input/input.h>
#include <dt-bindings/interrupt-controller/irq.h>
#include <dt-bindings/interrupt-controller/arm-gic.h>
/ {
compatible = "seirobotics,sei610", "amlogic,sm1";
model = "SEI Robotics SEI610";
fragment@101 {
target-path = "/";
__overlay__ {
reserved-memory {
#address-cells = <2>;
#size-cells = <2>;
ramoops@d000000 {
compatible = "ramoops";
reg = <0x0 0x0d000000 0x0 0x00100000>;
record-size = <0x8000>;
console-size = <0x8000>;
ftrace-size = <0x0>;
pmsg-size = <0x8000>;
};
};
};
};
};
&vddao_3v3_t {
gpio-open-drain;
};
&emmc_pwrseq {
status = "okay";
};
&sd_emmc_a {
/* WiFi firmware requires power to be kept while in suspend */
keep-power-in-suspend;
};
&uart_C {
status = "disabled";
pinctrl-0 = <&uart_c_pins>;
pinctrl-names = "default";
};
&spicc0 {
status = "disabled";
pinctrl-names = "default";
pinctrl-0 = <&spicc0_x_pins>;
cs-gpios = <&gpio GPIOX_10 GPIO_ACTIVE_LOW>;
#address-cells = <1>;
#size-cells = <0>;
spidev@0 {
compatible = "rohm,dh2228fv";
reg = <0>;
spi-max-frequency = <500000>;
status = "disabled";
};
};

View File

@@ -16,6 +16,8 @@
/ {
model = "Qualcomm Technologies, Inc. Robotics RB5";
compatible = "qcom,qrb5165-rb5", "qcom,sm8250";
qcom,msm-id = <455 0x20001>;
qcom,board-id = <11 3>;
aliases {
serial0 = &uart12;

View File

@@ -17,6 +17,8 @@
/ {
model = "Thundercomm Dragonboard 845c";
compatible = "thundercomm,db845c", "qcom,sdm845";
qcom,msm-id = <341 0x20001>;
qcom,board-id = <8 0>;
aliases {
serial0 = &uart9;

View File

@@ -31,7 +31,7 @@
};
&uart0 {
status = "okay";
status = "disabled";
bluetooth {
compatible = "brcm,bcm43438-bt";

View File

@@ -0,0 +1,3 @@
CONFIG_ARM64_16K_PAGES=y
# b/241785095
# CONFIG_INCREMENTAL_FS is not set

Some files were not shown because too many files have changed in this diff Show More