Merge tag 'v5.15.60' into rpi-5.15.y

This is the 5.15.60 stable release
This commit is contained in:
Dom Cobley
2022-08-23 14:07:41 +01:00
44 changed files with 519 additions and 127 deletions

View File

@@ -422,6 +422,14 @@ The possible values in this file are:
'RSB filling' Protection of RSB on context switch enabled 'RSB filling' Protection of RSB on context switch enabled
============= =========================================== ============= ===========================================
- EIBRS Post-barrier Return Stack Buffer (PBRSB) protection status:
=========================== =======================================================
'PBRSB-eIBRS: SW sequence' CPU is affected and protection of RSB on VMEXIT enabled
'PBRSB-eIBRS: Vulnerable' CPU is vulnerable
'PBRSB-eIBRS: Not affected' CPU is not affected by PBRSB
=========================== =======================================================
Full mitigation might require a microcode update from the CPU Full mitigation might require a microcode update from the CPU
vendor. When the necessary microcode is not available, the kernel will vendor. When the necessary microcode is not available, the kernel will
report vulnerability. report vulnerability.

View File

@@ -23,6 +23,7 @@ properties:
- brcm,bcm4345c5 - brcm,bcm4345c5
- brcm,bcm43540-bt - brcm,bcm43540-bt
- brcm,bcm4335a0 - brcm,bcm4335a0
- brcm,bcm4349-bt
shutdown-gpios: shutdown-gpios:
maxItems: 1 maxItems: 1

View File

@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0 # SPDX-License-Identifier: GPL-2.0
VERSION = 5 VERSION = 5
PATCHLEVEL = 15 PATCHLEVEL = 15
SUBLEVEL = 59 SUBLEVEL = 60
EXTRAVERSION = EXTRAVERSION =
NAME = Trick or Treat NAME = Trick or Treat

View File

@@ -52,7 +52,7 @@ static void neon_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
{ {
if (unlikely(!dctx->sset)) { if (unlikely(!dctx->sset)) {
if (!dctx->rset) { if (!dctx->rset) {
poly1305_init_arch(dctx, src); poly1305_init_arm64(&dctx->h, src);
src += POLY1305_BLOCK_SIZE; src += POLY1305_BLOCK_SIZE;
len -= POLY1305_BLOCK_SIZE; len -= POLY1305_BLOCK_SIZE;
dctx->rset = 1; dctx->rset = 1;

View File

@@ -103,8 +103,8 @@
/* /*
* Initial memory map attributes. * Initial memory map attributes.
*/ */
#define SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) #define SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED | PTE_UXN)
#define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) #define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S | PMD_SECT_UXN)
#if ARM64_KERNEL_USES_PMD_MAPS #if ARM64_KERNEL_USES_PMD_MAPS
#define SWAPPER_MM_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS) #define SWAPPER_MM_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS)

View File

@@ -285,7 +285,7 @@ SYM_FUNC_START_LOCAL(__create_page_tables)
subs x1, x1, #64 subs x1, x1, #64
b.ne 1b b.ne 1b
mov x7, SWAPPER_MM_MMUFLAGS mov_q x7, SWAPPER_MM_MMUFLAGS
/* /*
* Create the identity mapping. * Create the identity mapping.

View File

@@ -2427,7 +2427,7 @@ config RETPOLINE
config RETHUNK config RETHUNK
bool "Enable return-thunks" bool "Enable return-thunks"
depends on RETPOLINE && CC_HAS_RETURN_THUNK depends on RETPOLINE && CC_HAS_RETURN_THUNK
default y default y if X86_64
help help
Compile the kernel with the return-thunks compiler option to guard Compile the kernel with the return-thunks compiler option to guard
against kernel-to-user data leaks by avoiding return speculation. against kernel-to-user data leaks by avoiding return speculation.
@@ -2436,21 +2436,21 @@ config RETHUNK
config CPU_UNRET_ENTRY config CPU_UNRET_ENTRY
bool "Enable UNRET on kernel entry" bool "Enable UNRET on kernel entry"
depends on CPU_SUP_AMD && RETHUNK depends on CPU_SUP_AMD && RETHUNK && X86_64
default y default y
help help
Compile the kernel with support for the retbleed=unret mitigation. Compile the kernel with support for the retbleed=unret mitigation.
config CPU_IBPB_ENTRY config CPU_IBPB_ENTRY
bool "Enable IBPB on kernel entry" bool "Enable IBPB on kernel entry"
depends on CPU_SUP_AMD depends on CPU_SUP_AMD && X86_64
default y default y
help help
Compile the kernel with support for the retbleed=ibpb mitigation. Compile the kernel with support for the retbleed=ibpb mitigation.
config CPU_IBRS_ENTRY config CPU_IBRS_ENTRY
bool "Enable IBRS on kernel entry" bool "Enable IBRS on kernel entry"
depends on CPU_SUP_INTEL depends on CPU_SUP_INTEL && X86_64
default y default y
help help
Compile the kernel with support for the spectre_v2=ibrs mitigation. Compile the kernel with support for the spectre_v2=ibrs mitigation.

View File

@@ -301,6 +301,7 @@
#define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */ #define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */
#define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */ #define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */
#define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */ #define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */
#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
@@ -446,5 +447,6 @@
#define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */ #define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */
#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */ #define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */
#define X86_BUG_RETBLEED X86_BUG(26) /* CPU is affected by RETBleed */ #define X86_BUG_RETBLEED X86_BUG(26) /* CPU is affected by RETBleed */
#define X86_BUG_EIBRS_PBRSB X86_BUG(27) /* EIBRS is vulnerable to Post Barrier RSB Predictions */
#endif /* _ASM_X86_CPUFEATURES_H */ #endif /* _ASM_X86_CPUFEATURES_H */

View File

@@ -642,6 +642,7 @@ struct kvm_vcpu_arch {
u64 ia32_misc_enable_msr; u64 ia32_misc_enable_msr;
u64 smbase; u64 smbase;
u64 smi_count; u64 smi_count;
bool at_instruction_boundary;
bool tpr_access_reporting; bool tpr_access_reporting;
bool xsaves_enabled; bool xsaves_enabled;
u64 ia32_xss; u64 ia32_xss;
@@ -1271,6 +1272,8 @@ struct kvm_vcpu_stat {
u64 nested_run; u64 nested_run;
u64 directed_yield_attempted; u64 directed_yield_attempted;
u64 directed_yield_successful; u64 directed_yield_successful;
u64 preemption_reported;
u64 preemption_other;
u64 guest_mode; u64 guest_mode;
}; };

View File

@@ -148,6 +148,10 @@
* are restricted to targets in * are restricted to targets in
* kernel. * kernel.
*/ */
#define ARCH_CAP_PBRSB_NO BIT(24) /*
* Not susceptible to Post-Barrier
* Return Stack Buffer Predictions.
*/
#define MSR_IA32_FLUSH_CMD 0x0000010b #define MSR_IA32_FLUSH_CMD 0x0000010b
#define L1D_FLUSH BIT(0) /* #define L1D_FLUSH BIT(0) /*

View File

@@ -60,7 +60,9 @@
774: \ 774: \
add $(BITS_PER_LONG/8) * 2, sp; \ add $(BITS_PER_LONG/8) * 2, sp; \
dec reg; \ dec reg; \
jnz 771b; jnz 771b; \
/* barrier for jnz misprediction */ \
lfence;
#ifdef __ASSEMBLY__ #ifdef __ASSEMBLY__
@@ -118,13 +120,28 @@
#endif #endif
.endm .endm
.macro ISSUE_UNBALANCED_RET_GUARD
ANNOTATE_INTRA_FUNCTION_CALL
call .Lunbalanced_ret_guard_\@
int3
.Lunbalanced_ret_guard_\@:
add $(BITS_PER_LONG/8), %_ASM_SP
lfence
.endm
/* /*
* A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
* monstrosity above, manually. * monstrosity above, manually.
*/ */
.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req .macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2
.ifb \ftr2
ALTERNATIVE "jmp .Lskip_rsb_\@", "", \ftr ALTERNATIVE "jmp .Lskip_rsb_\@", "", \ftr
.else
ALTERNATIVE_2 "jmp .Lskip_rsb_\@", "", \ftr, "jmp .Lunbalanced_\@", \ftr2
.endif
__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP) __FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)
.Lunbalanced_\@:
ISSUE_UNBALANCED_RET_GUARD
.Lskip_rsb_\@: .Lskip_rsb_\@:
.endm .endm

View File

@@ -1328,6 +1328,53 @@ static void __init spec_ctrl_disable_kernel_rrsba(void)
} }
} }
static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_mitigation mode)
{
/*
* Similar to context switches, there are two types of RSB attacks
* after VM exit:
*
* 1) RSB underflow
*
* 2) Poisoned RSB entry
*
* When retpoline is enabled, both are mitigated by filling/clearing
* the RSB.
*
* When IBRS is enabled, while #1 would be mitigated by the IBRS branch
* prediction isolation protections, RSB still needs to be cleared
* because of #2. Note that SMEP provides no protection here, unlike
* user-space-poisoned RSB entries.
*
* eIBRS should protect against RSB poisoning, but if the EIBRS_PBRSB
* bug is present then a LITE version of RSB protection is required,
* just a single call needs to retire before a RET is executed.
*/
switch (mode) {
case SPECTRE_V2_NONE:
return;
case SPECTRE_V2_EIBRS_LFENCE:
case SPECTRE_V2_EIBRS:
if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) {
setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT_LITE);
pr_info("Spectre v2 / PBRSB-eIBRS: Retire a single CALL on VMEXIT\n");
}
return;
case SPECTRE_V2_EIBRS_RETPOLINE:
case SPECTRE_V2_RETPOLINE:
case SPECTRE_V2_LFENCE:
case SPECTRE_V2_IBRS:
setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT);
pr_info("Spectre v2 / SpectreRSB : Filling RSB on VMEXIT\n");
return;
}
pr_warn_once("Unknown Spectre v2 mode, disabling RSB mitigation at VM exit");
dump_stack();
}
static void __init spectre_v2_select_mitigation(void) static void __init spectre_v2_select_mitigation(void)
{ {
enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
@@ -1478,28 +1525,7 @@ static void __init spectre_v2_select_mitigation(void)
setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n"); pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n");
/* spectre_v2_determine_rsb_fill_type_at_vmexit(mode);
* Similar to context switches, there are two types of RSB attacks
* after vmexit:
*
* 1) RSB underflow
*
* 2) Poisoned RSB entry
*
* When retpoline is enabled, both are mitigated by filling/clearing
* the RSB.
*
* When IBRS is enabled, while #1 would be mitigated by the IBRS branch
* prediction isolation protections, RSB still needs to be cleared
* because of #2. Note that SMEP provides no protection here, unlike
* user-space-poisoned RSB entries.
*
* eIBRS, on the other hand, has RSB-poisoning protections, so it
* doesn't need RSB clearing after vmexit.
*/
if (boot_cpu_has(X86_FEATURE_RETPOLINE) ||
boot_cpu_has(X86_FEATURE_KERNEL_IBRS))
setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT);
/* /*
* Retpoline protects the kernel, but doesn't protect firmware. IBRS * Retpoline protects the kernel, but doesn't protect firmware. IBRS
@@ -2285,6 +2311,19 @@ static char *ibpb_state(void)
return ""; return "";
} }
static char *pbrsb_eibrs_state(void)
{
if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) {
if (boot_cpu_has(X86_FEATURE_RSB_VMEXIT_LITE) ||
boot_cpu_has(X86_FEATURE_RSB_VMEXIT))
return ", PBRSB-eIBRS: SW sequence";
else
return ", PBRSB-eIBRS: Vulnerable";
} else {
return ", PBRSB-eIBRS: Not affected";
}
}
static ssize_t spectre_v2_show_state(char *buf) static ssize_t spectre_v2_show_state(char *buf)
{ {
if (spectre_v2_enabled == SPECTRE_V2_LFENCE) if (spectre_v2_enabled == SPECTRE_V2_LFENCE)
@@ -2297,12 +2336,13 @@ static ssize_t spectre_v2_show_state(char *buf)
spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE) spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE)
return sprintf(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n"); return sprintf(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n");
return sprintf(buf, "%s%s%s%s%s%s\n", return sprintf(buf, "%s%s%s%s%s%s%s\n",
spectre_v2_strings[spectre_v2_enabled], spectre_v2_strings[spectre_v2_enabled],
ibpb_state(), ibpb_state(),
boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
stibp_state(), stibp_state(),
boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "", boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "",
pbrsb_eibrs_state(),
spectre_v2_module_string()); spectre_v2_module_string());
} }

View File

@@ -1027,6 +1027,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
#define NO_SWAPGS BIT(6) #define NO_SWAPGS BIT(6)
#define NO_ITLB_MULTIHIT BIT(7) #define NO_ITLB_MULTIHIT BIT(7)
#define NO_SPECTRE_V2 BIT(8) #define NO_SPECTRE_V2 BIT(8)
#define NO_EIBRS_PBRSB BIT(9)
#define VULNWL(vendor, family, model, whitelist) \ #define VULNWL(vendor, family, model, whitelist) \
X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, whitelist) X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, whitelist)
@@ -1067,7 +1068,7 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
VULNWL_INTEL(ATOM_GOLDMONT_D, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), VULNWL_INTEL(ATOM_GOLDMONT_D, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB),
/* /*
* Technically, swapgs isn't serializing on AMD (despite it previously * Technically, swapgs isn't serializing on AMD (despite it previously
@@ -1077,7 +1078,9 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
* good enough for our purposes. * good enough for our purposes.
*/ */
VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT), VULNWL_INTEL(ATOM_TREMONT, NO_EIBRS_PBRSB),
VULNWL_INTEL(ATOM_TREMONT_L, NO_EIBRS_PBRSB),
VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB),
/* AMD Family 0xf - 0x12 */ /* AMD Family 0xf - 0x12 */
VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
@@ -1255,6 +1258,11 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
setup_force_cpu_bug(X86_BUG_RETBLEED); setup_force_cpu_bug(X86_BUG_RETBLEED);
} }
if (cpu_has(c, X86_FEATURE_IBRS_ENHANCED) &&
!cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) &&
!(ia32_cap & ARCH_CAP_PBRSB_NO))
setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB);
if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
return; return;

View File

@@ -832,7 +832,7 @@ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
/* If source buffer is not aligned then use an intermediate buffer */ /* If source buffer is not aligned then use an intermediate buffer */
if (!IS_ALIGNED((unsigned long)vaddr, 16)) { if (!IS_ALIGNED((unsigned long)vaddr, 16)) {
src_tpage = alloc_page(GFP_KERNEL); src_tpage = alloc_page(GFP_KERNEL_ACCOUNT);
if (!src_tpage) if (!src_tpage)
return -ENOMEM; return -ENOMEM;
@@ -853,7 +853,7 @@ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
if (!IS_ALIGNED((unsigned long)dst_vaddr, 16) || !IS_ALIGNED(size, 16)) { if (!IS_ALIGNED((unsigned long)dst_vaddr, 16) || !IS_ALIGNED(size, 16)) {
int dst_offset; int dst_offset;
dst_tpage = alloc_page(GFP_KERNEL); dst_tpage = alloc_page(GFP_KERNEL_ACCOUNT);
if (!dst_tpage) { if (!dst_tpage) {
ret = -ENOMEM; ret = -ENOMEM;
goto e_free; goto e_free;

View File

@@ -4263,6 +4263,8 @@ out:
static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu) static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu)
{ {
if (to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_INTR)
vcpu->arch.at_instruction_boundary = true;
} }
static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu) static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)

View File

@@ -197,11 +197,13 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL)
* entries and (in some cases) RSB underflow. * entries and (in some cases) RSB underflow.
* *
* eIBRS has its own protection against poisoned RSB, so it doesn't * eIBRS has its own protection against poisoned RSB, so it doesn't
* need the RSB filling sequence. But it does need to be enabled * need the RSB filling sequence. But it does need to be enabled, and a
* before the first unbalanced RET. * single call to retire, before the first unbalanced RET.
*/ */
FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT,\
X86_FEATURE_RSB_VMEXIT_LITE
pop %_ASM_ARG2 /* @flags */ pop %_ASM_ARG2 /* @flags */
pop %_ASM_ARG1 /* @vmx */ pop %_ASM_ARG1 /* @vmx */

View File

@@ -6471,6 +6471,7 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
return; return;
handle_interrupt_nmi_irqoff(vcpu, gate_offset(desc)); handle_interrupt_nmi_irqoff(vcpu, gate_offset(desc));
vcpu->arch.at_instruction_boundary = true;
} }
static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu) static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)

View File

@@ -277,6 +277,8 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
STATS_DESC_COUNTER(VCPU, nested_run), STATS_DESC_COUNTER(VCPU, nested_run),
STATS_DESC_COUNTER(VCPU, directed_yield_attempted), STATS_DESC_COUNTER(VCPU, directed_yield_attempted),
STATS_DESC_COUNTER(VCPU, directed_yield_successful), STATS_DESC_COUNTER(VCPU, directed_yield_successful),
STATS_DESC_COUNTER(VCPU, preemption_reported),
STATS_DESC_COUNTER(VCPU, preemption_other),
STATS_DESC_ICOUNTER(VCPU, guest_mode) STATS_DESC_ICOUNTER(VCPU, guest_mode)
}; };
@@ -4371,6 +4373,19 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
struct kvm_memslots *slots; struct kvm_memslots *slots;
static const u8 preempted = KVM_VCPU_PREEMPTED; static const u8 preempted = KVM_VCPU_PREEMPTED;
/*
* The vCPU can be marked preempted if and only if the VM-Exit was on
* an instruction boundary and will not trigger guest emulation of any
* kind (see vcpu_run). Vendor specific code controls (conservatively)
* when this is true, for example allowing the vCPU to be marked
* preempted if and only if the VM-Exit was due to a host interrupt.
*/
if (!vcpu->arch.at_instruction_boundary) {
vcpu->stat.preemption_other++;
return;
}
vcpu->stat.preemption_reported++;
if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
return; return;
@@ -4400,7 +4415,8 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{ {
int idx; int idx;
if (vcpu->preempted && !vcpu->arch.guest_state_protected) if (vcpu->preempted) {
if (!vcpu->arch.guest_state_protected)
vcpu->arch.preempted_in_kernel = !static_call(kvm_x86_get_cpl)(vcpu); vcpu->arch.preempted_in_kernel = !static_call(kvm_x86_get_cpl)(vcpu);
/* /*
@@ -4413,6 +4429,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
else else
kvm_steal_time_set_preempted(vcpu); kvm_steal_time_set_preempted(vcpu);
srcu_read_unlock(&vcpu->kvm->srcu, idx); srcu_read_unlock(&vcpu->kvm->srcu, idx);
}
static_call(kvm_x86_vcpu_put)(vcpu); static_call(kvm_x86_vcpu_put)(vcpu);
vcpu->arch.last_host_tsc = rdtsc(); vcpu->arch.last_host_tsc = rdtsc();
@@ -9934,6 +9951,13 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
vcpu->arch.l1tf_flush_l1d = true; vcpu->arch.l1tf_flush_l1d = true;
for (;;) { for (;;) {
/*
* If another guest vCPU requests a PV TLB flush in the middle
* of instruction emulation, the rest of the emulation could
* use a stale page translation. Assume that any code after
* this point can start executing an instruction.
*/
vcpu->arch.at_instruction_boundary = false;
if (kvm_vcpu_running(vcpu)) { if (kvm_vcpu_running(vcpu)) {
r = vcpu_enter_guest(vcpu); r = vcpu_enter_guest(vcpu);
} else { } else {

View File

@@ -97,7 +97,9 @@ static inline void kvm_xen_runstate_set_preempted(struct kvm_vcpu *vcpu)
* behalf of the vCPU. Only if the VMM does actually block * behalf of the vCPU. Only if the VMM does actually block
* does it need to enter RUNSTATE_blocked. * does it need to enter RUNSTATE_blocked.
*/ */
if (vcpu->preempted) if (WARN_ON_ONCE(!vcpu->preempted))
return;
kvm_xen_update_runstate_guest(vcpu, RUNSTATE_runnable); kvm_xen_update_runstate_guest(vcpu, RUNSTATE_runnable);
} }

View File

@@ -265,6 +265,7 @@ int create_task_io_context(struct task_struct *task, gfp_t gfp_flags, int node)
INIT_RADIX_TREE(&ioc->icq_tree, GFP_ATOMIC); INIT_RADIX_TREE(&ioc->icq_tree, GFP_ATOMIC);
INIT_HLIST_HEAD(&ioc->icq_list); INIT_HLIST_HEAD(&ioc->icq_list);
INIT_WORK(&ioc->release_work, ioc_release_fn); INIT_WORK(&ioc->release_work, ioc_release_fn);
ioc->ioprio = IOPRIO_DEFAULT;
/* /*
* Try to install. ioc shouldn't be installed if someone else * Try to install. ioc shouldn't be installed if someone else

View File

@@ -189,9 +189,9 @@ out:
int ioprio_best(unsigned short aprio, unsigned short bprio) int ioprio_best(unsigned short aprio, unsigned short bprio)
{ {
if (!ioprio_valid(aprio)) if (!ioprio_valid(aprio))
aprio = IOPRIO_DEFAULT; aprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_BE_NORM);
if (!ioprio_valid(bprio)) if (!ioprio_valid(bprio))
bprio = IOPRIO_DEFAULT; bprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_BE_NORM);
return min(aprio, bprio); return min(aprio, bprio);
} }

View File

@@ -29,16 +29,26 @@
#undef pr_fmt #undef pr_fmt
#define pr_fmt(fmt) "BERT: " fmt #define pr_fmt(fmt) "BERT: " fmt
#define ACPI_BERT_PRINT_MAX_RECORDS 5
#define ACPI_BERT_PRINT_MAX_LEN 1024 #define ACPI_BERT_PRINT_MAX_LEN 1024
static int bert_disable; static int bert_disable;
/*
* Print "all" the error records in the BERT table, but avoid huge spam to
* the console if the BIOS included oversize records, or too many records.
* Skipping some records here does not lose anything because the full
* data is available to user tools in:
* /sys/firmware/acpi/tables/data/BERT
*/
static void __init bert_print_all(struct acpi_bert_region *region, static void __init bert_print_all(struct acpi_bert_region *region,
unsigned int region_len) unsigned int region_len)
{ {
struct acpi_hest_generic_status *estatus = struct acpi_hest_generic_status *estatus =
(struct acpi_hest_generic_status *)region; (struct acpi_hest_generic_status *)region;
int remain = region_len; int remain = region_len;
int printed = 0, skipped = 0;
u32 estatus_len; u32 estatus_len;
while (remain >= sizeof(struct acpi_bert_region)) { while (remain >= sizeof(struct acpi_bert_region)) {
@@ -46,24 +56,26 @@ static void __init bert_print_all(struct acpi_bert_region *region,
if (remain < estatus_len) { if (remain < estatus_len) {
pr_err(FW_BUG "Truncated status block (length: %u).\n", pr_err(FW_BUG "Truncated status block (length: %u).\n",
estatus_len); estatus_len);
return; break;
} }
/* No more error records. */ /* No more error records. */
if (!estatus->block_status) if (!estatus->block_status)
return; break;
if (cper_estatus_check(estatus)) { if (cper_estatus_check(estatus)) {
pr_err(FW_BUG "Invalid error record.\n"); pr_err(FW_BUG "Invalid error record.\n");
return; break;
} }
if (estatus_len < ACPI_BERT_PRINT_MAX_LEN &&
printed < ACPI_BERT_PRINT_MAX_RECORDS) {
pr_info_once("Error records from previous boot:\n"); pr_info_once("Error records from previous boot:\n");
if (region_len < ACPI_BERT_PRINT_MAX_LEN)
cper_estatus_print(KERN_INFO HW_ERR, estatus); cper_estatus_print(KERN_INFO HW_ERR, estatus);
else printed++;
pr_info_once("Max print length exceeded, table data is available at:\n" } else {
"/sys/firmware/acpi/tables/data/BERT"); skipped++;
}
/* /*
* Because the boot error source is "one-time polled" type, * Because the boot error source is "one-time polled" type,
@@ -75,6 +87,9 @@ static void __init bert_print_all(struct acpi_bert_region *region,
estatus = (void *)estatus + estatus_len; estatus = (void *)estatus + estatus_len;
remain -= estatus_len; remain -= estatus_len;
} }
if (skipped)
pr_info(HW_ERR "Skipped %d error records\n", skipped);
} }
static int __init setup_bert_disable(char *str) static int __init setup_bert_disable(char *str)

View File

@@ -424,23 +424,6 @@ static const struct dmi_system_id video_detect_dmi_table[] = {
.callback = video_detect_force_native, .callback = video_detect_force_native,
.ident = "Clevo NL5xRU", .ident = "Clevo NL5xRU",
.matches = { .matches = {
DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"),
DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"),
},
},
{
.callback = video_detect_force_native,
.ident = "Clevo NL5xRU",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "SchenkerTechnologiesGmbH"),
DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"),
},
},
{
.callback = video_detect_force_native,
.ident = "Clevo NL5xRU",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Notebook"),
DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"), DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"),
}, },
}, },
@@ -463,28 +446,60 @@ static const struct dmi_system_id video_detect_dmi_table[] = {
{ {
.callback = video_detect_force_native, .callback = video_detect_force_native,
.ident = "Clevo NL5xNU", .ident = "Clevo NL5xNU",
.matches = {
DMI_MATCH(DMI_BOARD_NAME, "NL5xNU"),
},
},
/*
* The TongFang PF5PU1G, PF4NU1F, PF5NU1G, and PF5LUXG/TUXEDO BA15 Gen10,
* Pulse 14/15 Gen1, and Pulse 15 Gen2 have the same problem as the Clevo
* NL5xRU and NL5xNU/TUXEDO Aura 15 Gen1 and Gen2. See the description
* above.
*/
{
.callback = video_detect_force_native,
.ident = "TongFang PF5PU1G",
.matches = {
DMI_MATCH(DMI_BOARD_NAME, "PF5PU1G"),
},
},
{
.callback = video_detect_force_native,
.ident = "TongFang PF4NU1F",
.matches = {
DMI_MATCH(DMI_BOARD_NAME, "PF4NU1F"),
},
},
{
.callback = video_detect_force_native,
.ident = "TongFang PF4NU1F",
.matches = { .matches = {
DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"), DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"),
DMI_MATCH(DMI_BOARD_NAME, "NL5xNU"), DMI_MATCH(DMI_BOARD_NAME, "PULSE1401"),
}, },
}, },
{ {
.callback = video_detect_force_native, .callback = video_detect_force_native,
.ident = "Clevo NL5xNU", .ident = "TongFang PF5NU1G",
.matches = { .matches = {
DMI_MATCH(DMI_SYS_VENDOR, "SchenkerTechnologiesGmbH"), DMI_MATCH(DMI_BOARD_NAME, "PF5NU1G"),
DMI_MATCH(DMI_BOARD_NAME, "NL5xNU"),
}, },
}, },
{ {
.callback = video_detect_force_native, .callback = video_detect_force_native,
.ident = "Clevo NL5xNU", .ident = "TongFang PF5NU1G",
.matches = { .matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Notebook"), DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"),
DMI_MATCH(DMI_BOARD_NAME, "NL5xNU"), DMI_MATCH(DMI_BOARD_NAME, "PULSE1501"),
},
},
{
.callback = video_detect_force_native,
.ident = "TongFang PF5LUXG",
.matches = {
DMI_MATCH(DMI_BOARD_NAME, "PF5LUXG"),
}, },
}, },
/* /*
* Desktops which falsely report a backlight and which our heuristics * Desktops which falsely report a backlight and which our heuristics
* for this do not catch. * for this do not catch.

View File

@@ -453,6 +453,8 @@ static const struct bcm_subver_table bcm_uart_subver_table[] = {
{ 0x6606, "BCM4345C5" }, /* 003.006.006 */ { 0x6606, "BCM4345C5" }, /* 003.006.006 */
{ 0x230f, "BCM4356A2" }, /* 001.003.015 */ { 0x230f, "BCM4356A2" }, /* 001.003.015 */
{ 0x220e, "BCM20702A1" }, /* 001.002.014 */ { 0x220e, "BCM20702A1" }, /* 001.002.014 */
{ 0x420d, "BCM4349B1" }, /* 002.002.013 */
{ 0x420e, "BCM4349B1" }, /* 002.002.014 */
{ 0x4217, "BCM4329B1" }, /* 002.002.023 */ { 0x4217, "BCM4329B1" }, /* 002.002.023 */
{ 0x6106, "BCM4359C0" }, /* 003.001.006 */ { 0x6106, "BCM4359C0" }, /* 003.001.006 */
{ 0x4106, "BCM4335A0" }, /* 002.001.006 */ { 0x4106, "BCM4335A0" }, /* 002.001.006 */

View File

@@ -420,6 +420,18 @@ static const struct usb_device_id blacklist_table[] = {
{ USB_DEVICE(0x04ca, 0x4006), .driver_info = BTUSB_REALTEK | { USB_DEVICE(0x04ca, 0x4006), .driver_info = BTUSB_REALTEK |
BTUSB_WIDEBAND_SPEECH }, BTUSB_WIDEBAND_SPEECH },
/* Realtek 8852CE Bluetooth devices */
{ USB_DEVICE(0x04ca, 0x4007), .driver_info = BTUSB_REALTEK |
BTUSB_WIDEBAND_SPEECH },
{ USB_DEVICE(0x04c5, 0x1675), .driver_info = BTUSB_REALTEK |
BTUSB_WIDEBAND_SPEECH },
{ USB_DEVICE(0x0cb8, 0xc558), .driver_info = BTUSB_REALTEK |
BTUSB_WIDEBAND_SPEECH },
{ USB_DEVICE(0x13d3, 0x3587), .driver_info = BTUSB_REALTEK |
BTUSB_WIDEBAND_SPEECH },
{ USB_DEVICE(0x13d3, 0x3586), .driver_info = BTUSB_REALTEK |
BTUSB_WIDEBAND_SPEECH },
/* Realtek Bluetooth devices */ /* Realtek Bluetooth devices */
{ USB_VENDOR_AND_INTERFACE_INFO(0x0bda, 0xe0, 0x01, 0x01), { USB_VENDOR_AND_INTERFACE_INFO(0x0bda, 0xe0, 0x01, 0x01),
.driver_info = BTUSB_REALTEK }, .driver_info = BTUSB_REALTEK },
@@ -459,6 +471,9 @@ static const struct usb_device_id blacklist_table[] = {
{ USB_DEVICE(0x0489, 0xe0d9), .driver_info = BTUSB_MEDIATEK | { USB_DEVICE(0x0489, 0xe0d9), .driver_info = BTUSB_MEDIATEK |
BTUSB_WIDEBAND_SPEECH | BTUSB_WIDEBAND_SPEECH |
BTUSB_VALID_LE_STATES }, BTUSB_VALID_LE_STATES },
{ USB_DEVICE(0x13d3, 0x3568), .driver_info = BTUSB_MEDIATEK |
BTUSB_WIDEBAND_SPEECH |
BTUSB_VALID_LE_STATES },
/* Additional Realtek 8723AE Bluetooth devices */ /* Additional Realtek 8723AE Bluetooth devices */
{ USB_DEVICE(0x0930, 0x021d), .driver_info = BTUSB_REALTEK }, { USB_DEVICE(0x0930, 0x021d), .driver_info = BTUSB_REALTEK },

View File

@@ -1515,8 +1515,10 @@ static const struct of_device_id bcm_bluetooth_of_match[] = {
{ .compatible = "brcm,bcm4345c5" }, { .compatible = "brcm,bcm4345c5" },
{ .compatible = "brcm,bcm4330-bt" }, { .compatible = "brcm,bcm4330-bt" },
{ .compatible = "brcm,bcm43438-bt", .data = &bcm43438_device_data }, { .compatible = "brcm,bcm43438-bt", .data = &bcm43438_device_data },
{ .compatible = "brcm,bcm4349-bt", .data = &bcm43438_device_data },
{ .compatible = "brcm,bcm43540-bt", .data = &bcm4354_device_data }, { .compatible = "brcm,bcm43540-bt", .data = &bcm4354_device_data },
{ .compatible = "brcm,bcm4335a0" }, { .compatible = "brcm,bcm4335a0" },
{ .compatible = "infineon,cyw55572-bt" },
{ }, { },
}; };
MODULE_DEVICE_TABLE(of, bcm_bluetooth_of_match); MODULE_DEVICE_TABLE(of, bcm_bluetooth_of_match);

View File

@@ -647,7 +647,7 @@ do_adb_query(struct adb_request *req)
switch(req->data[1]) { switch(req->data[1]) {
case ADB_QUERY_GETDEVINFO: case ADB_QUERY_GETDEVINFO:
if (req->nbytes < 3) if (req->nbytes < 3 || req->data[2] >= 16)
break; break;
mutex_lock(&adb_handler_mutex); mutex_lock(&adb_handler_mutex);
req->reply[0] = adb_handler[req->data[2]].original_address; req->reply[0] = adb_handler[req->data[2]].original_address;

View File

@@ -98,6 +98,7 @@ struct btrfs_block_group {
unsigned int to_copy:1; unsigned int to_copy:1;
unsigned int relocating_repair:1; unsigned int relocating_repair:1;
unsigned int chunk_item_inserted:1; unsigned int chunk_item_inserted:1;
unsigned int zoned_data_reloc_ongoing:1;
int disk_cache_state; int disk_cache_state;

View File

@@ -3804,7 +3804,7 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
block_group->start == fs_info->data_reloc_bg || block_group->start == fs_info->data_reloc_bg ||
fs_info->data_reloc_bg == 0); fs_info->data_reloc_bg == 0);
if (block_group->ro) { if (block_group->ro || block_group->zoned_data_reloc_ongoing) {
ret = 1; ret = 1;
goto out; goto out;
} }
@@ -3865,8 +3865,24 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
out: out:
if (ret && ffe_ctl->for_treelog) if (ret && ffe_ctl->for_treelog)
fs_info->treelog_bg = 0; fs_info->treelog_bg = 0;
if (ret && ffe_ctl->for_data_reloc) if (ret && ffe_ctl->for_data_reloc &&
fs_info->data_reloc_bg == block_group->start) {
/*
* Do not allow further allocations from this block group.
* Compared to increasing the ->ro, setting the
* ->zoned_data_reloc_ongoing flag still allows nocow
* writers to come in. See btrfs_inc_nocow_writers().
*
* We need to disable an allocation to avoid an allocation of
* regular (non-relocation data) extent. With mix of relocation
* extents and regular extents, we can dispatch WRITE commands
* (for relocation extents) and ZONE APPEND commands (for
* regular extents) at the same time to the same zone, which
* easily break the write pointer.
*/
block_group->zoned_data_reloc_ongoing = 1;
fs_info->data_reloc_bg = 0; fs_info->data_reloc_bg = 0;
}
spin_unlock(&fs_info->relocation_bg_lock); spin_unlock(&fs_info->relocation_bg_lock);
spin_unlock(&fs_info->treelog_bg_lock); spin_unlock(&fs_info->treelog_bg_lock);
spin_unlock(&block_group->lock); spin_unlock(&block_group->lock);

View File

@@ -5152,13 +5152,14 @@ int extent_writepages(struct address_space *mapping,
*/ */
btrfs_zoned_data_reloc_lock(BTRFS_I(inode)); btrfs_zoned_data_reloc_lock(BTRFS_I(inode));
ret = extent_write_cache_pages(mapping, wbc, &epd); ret = extent_write_cache_pages(mapping, wbc, &epd);
btrfs_zoned_data_reloc_unlock(BTRFS_I(inode));
ASSERT(ret <= 0); ASSERT(ret <= 0);
if (ret < 0) { if (ret < 0) {
btrfs_zoned_data_reloc_unlock(BTRFS_I(inode));
end_write_bio(&epd, ret); end_write_bio(&epd, ret);
return ret; return ret;
} }
ret = flush_write_bio(&epd); ret = flush_write_bio(&epd);
btrfs_zoned_data_reloc_unlock(BTRFS_I(inode));
return ret; return ret;
} }

View File

@@ -3069,6 +3069,8 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
ordered_extent->file_offset, ordered_extent->file_offset,
ordered_extent->file_offset + ordered_extent->file_offset +
logical_len); logical_len);
btrfs_zoned_release_data_reloc_bg(fs_info, ordered_extent->disk_bytenr,
ordered_extent->disk_num_bytes);
} else { } else {
BUG_ON(root == fs_info->tree_root); BUG_ON(root == fs_info->tree_root);
ret = insert_ordered_extent_file_extent(trans, ordered_extent); ret = insert_ordered_extent_file_extent(trans, ordered_extent);

View File

@@ -1623,3 +1623,30 @@ void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info)
} }
mutex_unlock(&fs_devices->device_list_mutex); mutex_unlock(&fs_devices->device_list_mutex);
} }
void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, u64 logical,
u64 length)
{
struct btrfs_block_group *block_group;
if (!btrfs_is_zoned(fs_info))
return;
block_group = btrfs_lookup_block_group(fs_info, logical);
/* It should be called on a previous data relocation block group. */
ASSERT(block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA));
spin_lock(&block_group->lock);
if (!block_group->zoned_data_reloc_ongoing)
goto out;
/* All relocation extents are written. */
if (block_group->start + block_group->alloc_offset == logical + length) {
/* Now, release this block group for further allocations. */
block_group->zoned_data_reloc_ongoing = 0;
}
out:
spin_unlock(&block_group->lock);
btrfs_put_block_group(block_group);
}

View File

@@ -70,6 +70,8 @@ struct btrfs_device *btrfs_zoned_get_device(struct btrfs_fs_info *fs_info,
u64 logical, u64 length); u64 logical, u64 length);
void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg); void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg);
void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info); void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info);
void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, u64 logical,
u64 length);
#else /* CONFIG_BLK_DEV_ZONED */ #else /* CONFIG_BLK_DEV_ZONED */
static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos, static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
struct blk_zone *zone) struct blk_zone *zone)
@@ -207,6 +209,9 @@ static inline struct btrfs_device *btrfs_zoned_get_device(
static inline void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg) { } static inline void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg) { }
static inline void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info) { } static inline void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info) { }
static inline void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info,
u64 logical, u64 length) { }
#endif #endif
static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos) static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos)

View File

@@ -11,7 +11,7 @@
/* /*
* Default IO priority. * Default IO priority.
*/ */
#define IOPRIO_DEFAULT IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_BE_NORM) #define IOPRIO_DEFAULT IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0)
/* /*
* Check that a priority value has a valid class. * Check that a priority value has a valid class.

View File

@@ -300,6 +300,7 @@
#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */ #define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */
#define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */ #define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */
#define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */ #define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */
#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM-Exit when EIBRS is enabled */
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */

View File

@@ -148,6 +148,10 @@
* are restricted to targets in * are restricted to targets in
* kernel. * kernel.
*/ */
#define ARCH_CAP_PBRSB_NO BIT(24) /*
* Not susceptible to Post-Barrier
* Return Stack Buffer Predictions.
*/
#define MSR_IA32_FLUSH_CMD 0x0000010b #define MSR_IA32_FLUSH_CMD 0x0000010b
#define L1D_FLUSH BIT(0) /* #define L1D_FLUSH BIT(0) /*

View File

@@ -5347,7 +5347,8 @@ struct bpf_sock {
__u32 src_ip4; __u32 src_ip4;
__u32 src_ip6[4]; __u32 src_ip6[4];
__u32 src_port; /* host byte order */ __u32 src_port; /* host byte order */
__u32 dst_port; /* network byte order */ __be16 dst_port; /* network byte order */
__u16 :16; /* zero padding */
__u32 dst_ip4; __u32 dst_ip4;
__u32 dst_ip6[4]; __u32 dst_ip6[4];
__u32 state; __u32 state;

View File

@@ -1646,7 +1646,8 @@ Press any other key to refresh statistics immediately.
.format(values)) .format(values))
if len(pids) > 1: if len(pids) > 1:
sys.exit('Error: Multiple processes found (pids: {}). Use "-p"' sys.exit('Error: Multiple processes found (pids: {}). Use "-p"'
' to specify the desired pid'.format(" ".join(pids))) ' to specify the desired pid'
.format(" ".join(map(str, pids))))
namespace.pid = pids[0] namespace.pid = pids[0]
argparser = argparse.ArgumentParser(description=description_text, argparser = argparse.ArgumentParser(description=description_text,

View File

@@ -1,9 +1,11 @@
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */ /* Copyright (c) 2019 Facebook */
#define _GNU_SOURCE
#include <netinet/in.h> #include <netinet/in.h>
#include <arpa/inet.h> #include <arpa/inet.h>
#include <unistd.h> #include <unistd.h>
#include <sched.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <errno.h> #include <errno.h>
@@ -21,6 +23,7 @@
enum bpf_linum_array_idx { enum bpf_linum_array_idx {
EGRESS_LINUM_IDX, EGRESS_LINUM_IDX,
INGRESS_LINUM_IDX, INGRESS_LINUM_IDX,
READ_SK_DST_PORT_LINUM_IDX,
__NR_BPF_LINUM_ARRAY_IDX, __NR_BPF_LINUM_ARRAY_IDX,
}; };
@@ -43,8 +46,16 @@ static __u64 child_cg_id;
static int linum_map_fd; static int linum_map_fd;
static __u32 duration; static __u32 duration;
static __u32 egress_linum_idx = EGRESS_LINUM_IDX; static bool create_netns(void)
static __u32 ingress_linum_idx = INGRESS_LINUM_IDX; {
if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns"))
return false;
if (!ASSERT_OK(system("ip link set dev lo up"), "bring up lo"))
return false;
return true;
}
static void print_sk(const struct bpf_sock *sk, const char *prefix) static void print_sk(const struct bpf_sock *sk, const char *prefix)
{ {
@@ -92,19 +103,24 @@ static void check_result(void)
{ {
struct bpf_tcp_sock srv_tp, cli_tp, listen_tp; struct bpf_tcp_sock srv_tp, cli_tp, listen_tp;
struct bpf_sock srv_sk, cli_sk, listen_sk; struct bpf_sock srv_sk, cli_sk, listen_sk;
__u32 ingress_linum, egress_linum; __u32 idx, ingress_linum, egress_linum, linum;
int err; int err;
err = bpf_map_lookup_elem(linum_map_fd, &egress_linum_idx, idx = EGRESS_LINUM_IDX;
&egress_linum); err = bpf_map_lookup_elem(linum_map_fd, &idx, &egress_linum);
CHECK(err < 0, "bpf_map_lookup_elem(linum_map_fd)", CHECK(err < 0, "bpf_map_lookup_elem(linum_map_fd)",
"err:%d errno:%d\n", err, errno); "err:%d errno:%d\n", err, errno);
err = bpf_map_lookup_elem(linum_map_fd, &ingress_linum_idx, idx = INGRESS_LINUM_IDX;
&ingress_linum); err = bpf_map_lookup_elem(linum_map_fd, &idx, &ingress_linum);
CHECK(err < 0, "bpf_map_lookup_elem(linum_map_fd)", CHECK(err < 0, "bpf_map_lookup_elem(linum_map_fd)",
"err:%d errno:%d\n", err, errno); "err:%d errno:%d\n", err, errno);
idx = READ_SK_DST_PORT_LINUM_IDX;
err = bpf_map_lookup_elem(linum_map_fd, &idx, &linum);
ASSERT_OK(err, "bpf_map_lookup_elem(linum_map_fd, READ_SK_DST_PORT_IDX)");
ASSERT_EQ(linum, 0, "failure in read_sk_dst_port on line");
memcpy(&srv_sk, &skel->bss->srv_sk, sizeof(srv_sk)); memcpy(&srv_sk, &skel->bss->srv_sk, sizeof(srv_sk));
memcpy(&srv_tp, &skel->bss->srv_tp, sizeof(srv_tp)); memcpy(&srv_tp, &skel->bss->srv_tp, sizeof(srv_tp));
memcpy(&cli_sk, &skel->bss->cli_sk, sizeof(cli_sk)); memcpy(&cli_sk, &skel->bss->cli_sk, sizeof(cli_sk));
@@ -263,7 +279,7 @@ static void test(void)
char buf[DATA_LEN]; char buf[DATA_LEN];
/* Prepare listen_fd */ /* Prepare listen_fd */
listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0); listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0xcafe, 0);
/* start_server() has logged the error details */ /* start_server() has logged the error details */
if (CHECK_FAIL(listen_fd == -1)) if (CHECK_FAIL(listen_fd == -1))
goto done; goto done;
@@ -331,8 +347,12 @@ done:
void test_sock_fields(void) void test_sock_fields(void)
{ {
struct bpf_link *egress_link = NULL, *ingress_link = NULL;
int parent_cg_fd = -1, child_cg_fd = -1; int parent_cg_fd = -1, child_cg_fd = -1;
struct bpf_link *link;
/* Use a dedicated netns to have a fixed listen port */
if (!create_netns())
return;
/* Create a cgroup, get fd, and join it */ /* Create a cgroup, get fd, and join it */
parent_cg_fd = test__join_cgroup(PARENT_CGROUP); parent_cg_fd = test__join_cgroup(PARENT_CGROUP);
@@ -353,15 +373,20 @@ void test_sock_fields(void)
if (CHECK(!skel, "test_sock_fields__open_and_load", "failed\n")) if (CHECK(!skel, "test_sock_fields__open_and_load", "failed\n"))
goto done; goto done;
egress_link = bpf_program__attach_cgroup(skel->progs.egress_read_sock_fields, link = bpf_program__attach_cgroup(skel->progs.egress_read_sock_fields, child_cg_fd);
child_cg_fd); if (!ASSERT_OK_PTR(link, "attach_cgroup(egress_read_sock_fields)"))
if (!ASSERT_OK_PTR(egress_link, "attach_cgroup(egress)"))
goto done; goto done;
skel->links.egress_read_sock_fields = link;
ingress_link = bpf_program__attach_cgroup(skel->progs.ingress_read_sock_fields, link = bpf_program__attach_cgroup(skel->progs.ingress_read_sock_fields, child_cg_fd);
child_cg_fd); if (!ASSERT_OK_PTR(link, "attach_cgroup(ingress_read_sock_fields)"))
if (!ASSERT_OK_PTR(ingress_link, "attach_cgroup(ingress)"))
goto done; goto done;
skel->links.ingress_read_sock_fields = link;
link = bpf_program__attach_cgroup(skel->progs.read_sk_dst_port, child_cg_fd);
if (!ASSERT_OK_PTR(link, "attach_cgroup(read_sk_dst_port"))
goto done;
skel->links.read_sk_dst_port = link;
linum_map_fd = bpf_map__fd(skel->maps.linum_map); linum_map_fd = bpf_map__fd(skel->maps.linum_map);
sk_pkt_out_cnt_fd = bpf_map__fd(skel->maps.sk_pkt_out_cnt); sk_pkt_out_cnt_fd = bpf_map__fd(skel->maps.sk_pkt_out_cnt);
@@ -370,8 +395,7 @@ void test_sock_fields(void)
test(); test();
done: done:
bpf_link__destroy(egress_link); test_sock_fields__detach(skel);
bpf_link__destroy(ingress_link);
test_sock_fields__destroy(skel); test_sock_fields__destroy(skel);
if (child_cg_fd >= 0) if (child_cg_fd >= 0)
close(child_cg_fd); close(child_cg_fd);

View File

@@ -12,6 +12,7 @@
enum bpf_linum_array_idx { enum bpf_linum_array_idx {
EGRESS_LINUM_IDX, EGRESS_LINUM_IDX,
INGRESS_LINUM_IDX, INGRESS_LINUM_IDX,
READ_SK_DST_PORT_LINUM_IDX,
__NR_BPF_LINUM_ARRAY_IDX, __NR_BPF_LINUM_ARRAY_IDX,
}; };
@@ -250,4 +251,48 @@ int ingress_read_sock_fields(struct __sk_buff *skb)
return CG_OK; return CG_OK;
} }
static __noinline bool sk_dst_port__load_word(struct bpf_sock *sk)
{
__u32 *word = (__u32 *)&sk->dst_port;
return word[0] == bpf_htonl(0xcafe0000);
}
static __noinline bool sk_dst_port__load_half(struct bpf_sock *sk)
{
__u16 *half = (__u16 *)&sk->dst_port;
return half[0] == bpf_htons(0xcafe);
}
static __noinline bool sk_dst_port__load_byte(struct bpf_sock *sk)
{
__u8 *byte = (__u8 *)&sk->dst_port;
return byte[0] == 0xca && byte[1] == 0xfe;
}
SEC("cgroup_skb/egress")
int read_sk_dst_port(struct __sk_buff *skb)
{
__u32 linum, linum_idx;
struct bpf_sock *sk;
linum_idx = READ_SK_DST_PORT_LINUM_IDX;
sk = skb->sk;
if (!sk)
RET_LOG();
/* Ignore everything but the SYN from the client socket */
if (sk->state != BPF_TCP_SYN_SENT)
return CG_OK;
if (!sk_dst_port__load_word(sk))
RET_LOG();
if (!sk_dst_port__load_half(sk))
RET_LOG();
if (!sk_dst_port__load_byte(sk))
RET_LOG();
return CG_OK;
}
char _license[] SEC("license") = "GPL"; char _license[] SEC("license") = "GPL";

View File

@@ -121,7 +121,25 @@
.result = ACCEPT, .result = ACCEPT,
}, },
{ {
"sk_fullsock(skb->sk): sk->dst_port [narrow load]", "sk_fullsock(skb->sk): sk->dst_port [word load] (backward compatibility)",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port)),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.result = ACCEPT,
},
{
"sk_fullsock(skb->sk): sk->dst_port [half load]",
.insns = { .insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
@@ -139,7 +157,7 @@
.result = ACCEPT, .result = ACCEPT,
}, },
{ {
"sk_fullsock(skb->sk): sk->dst_port [load 2nd byte]", "sk_fullsock(skb->sk): sk->dst_port [half load] (invalid)",
.insns = { .insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
@@ -149,7 +167,64 @@
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0), BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(), BPF_EXIT_INSN(),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port) + 1), BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port) + 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.result = REJECT,
.errstr = "invalid sock access",
},
{
"sk_fullsock(skb->sk): sk->dst_port [byte load]",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_LDX_MEM(BPF_B, BPF_REG_2, BPF_REG_0, offsetof(struct bpf_sock, dst_port)),
BPF_LDX_MEM(BPF_B, BPF_REG_2, BPF_REG_0, offsetof(struct bpf_sock, dst_port) + 1),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.result = ACCEPT,
},
{
"sk_fullsock(skb->sk): sk->dst_port [byte load] (invalid)",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port) + 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.result = REJECT,
.errstr = "invalid sock access",
},
{
"sk_fullsock(skb->sk): past sk->dst_port [half load] (invalid)",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_0, offsetofend(struct bpf_sock, dst_port)),
BPF_MOV64_IMM(BPF_REG_0, 0), BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(), BPF_EXIT_INSN(),
}, },

View File

@@ -73,20 +73,19 @@ void ucall_uninit(struct kvm_vm *vm)
void ucall(uint64_t cmd, int nargs, ...) void ucall(uint64_t cmd, int nargs, ...)
{ {
struct ucall uc = { struct ucall uc = {};
.cmd = cmd,
};
va_list va; va_list va;
int i; int i;
WRITE_ONCE(uc.cmd, cmd);
nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS; nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS;
va_start(va, nargs); va_start(va, nargs);
for (i = 0; i < nargs; ++i) for (i = 0; i < nargs; ++i)
uc.args[i] = va_arg(va, uint64_t); WRITE_ONCE(uc.args[i], va_arg(va, uint64_t));
va_end(va); va_end(va);
*ucall_exit_mmio_addr = (vm_vaddr_t)&uc; WRITE_ONCE(*ucall_exit_mmio_addr, (vm_vaddr_t)&uc);
} }
uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc) uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)

View File

@@ -44,7 +44,7 @@ static inline void nop_loop(void)
{ {
int i; int i;
for (i = 0; i < 1000000; i++) for (i = 0; i < 100000000; i++)
asm volatile("nop"); asm volatile("nop");
} }
@@ -56,12 +56,14 @@ static inline void check_tsc_msr_rdtsc(void)
tsc_freq = rdmsr(HV_X64_MSR_TSC_FREQUENCY); tsc_freq = rdmsr(HV_X64_MSR_TSC_FREQUENCY);
GUEST_ASSERT(tsc_freq > 0); GUEST_ASSERT(tsc_freq > 0);
/* First, check MSR-based clocksource */ /* For increased accuracy, take mean rdtsc() before and afrer rdmsr() */
r1 = rdtsc(); r1 = rdtsc();
t1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT); t1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
r1 = (r1 + rdtsc()) / 2;
nop_loop(); nop_loop();
r2 = rdtsc(); r2 = rdtsc();
t2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT); t2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
r2 = (r2 + rdtsc()) / 2;
GUEST_ASSERT(r2 > r1 && t2 > t1); GUEST_ASSERT(r2 > r1 && t2 > t1);
@@ -181,12 +183,14 @@ static void host_check_tsc_msr_rdtsc(struct kvm_vm *vm)
tsc_freq = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TSC_FREQUENCY); tsc_freq = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TSC_FREQUENCY);
TEST_ASSERT(tsc_freq > 0, "TSC frequency must be nonzero"); TEST_ASSERT(tsc_freq > 0, "TSC frequency must be nonzero");
/* First, check MSR-based clocksource */ /* For increased accuracy, take mean rdtsc() before and afrer ioctl */
r1 = rdtsc(); r1 = rdtsc();
t1 = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TIME_REF_COUNT); t1 = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TIME_REF_COUNT);
r1 = (r1 + rdtsc()) / 2;
nop_loop(); nop_loop();
r2 = rdtsc(); r2 = rdtsc();
t2 = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TIME_REF_COUNT); t2 = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TIME_REF_COUNT);
r2 = (r2 + rdtsc()) / 2;
TEST_ASSERT(t2 > t1, "Time reference MSR is not monotonic (%ld <= %ld)", t1, t2); TEST_ASSERT(t2 > t1, "Time reference MSR is not monotonic (%ld <= %ld)", t1, t2);

View File

@@ -233,6 +233,24 @@ static unsigned long read_slab_obj(struct slabinfo *s, const char *name)
return l; return l;
} }
static unsigned long read_debug_slab_obj(struct slabinfo *s, const char *name)
{
char x[128];
FILE *f;
size_t l;
snprintf(x, 128, "/sys/kernel/debug/slab/%s/%s", s->name, name);
f = fopen(x, "r");
if (!f) {
buffer[0] = 0;
l = 0;
} else {
l = fread(buffer, 1, sizeof(buffer), f);
buffer[l] = 0;
fclose(f);
}
return l;
}
/* /*
* Put a size string together * Put a size string together
@@ -409,14 +427,18 @@ static void show_tracking(struct slabinfo *s)
{ {
printf("\n%s: Kernel object allocation\n", s->name); printf("\n%s: Kernel object allocation\n", s->name);
printf("-----------------------------------------------------------------------\n"); printf("-----------------------------------------------------------------------\n");
if (read_slab_obj(s, "alloc_calls")) if (read_debug_slab_obj(s, "alloc_traces"))
printf("%s", buffer);
else if (read_slab_obj(s, "alloc_calls"))
printf("%s", buffer); printf("%s", buffer);
else else
printf("No Data\n"); printf("No Data\n");
printf("\n%s: Kernel object freeing\n", s->name); printf("\n%s: Kernel object freeing\n", s->name);
printf("------------------------------------------------------------------------\n"); printf("------------------------------------------------------------------------\n");
if (read_slab_obj(s, "free_calls")) if (read_debug_slab_obj(s, "free_traces"))
printf("%s", buffer);
else if (read_slab_obj(s, "free_calls"))
printf("%s", buffer); printf("%s", buffer);
else else
printf("No Data\n"); printf("No Data\n");