Merge tag 'v5.15.60' into rpi-5.15.y
This is the 5.15.60 stable release
This commit is contained in:
@@ -422,6 +422,14 @@ The possible values in this file are:
|
||||
'RSB filling' Protection of RSB on context switch enabled
|
||||
============= ===========================================
|
||||
|
||||
- EIBRS Post-barrier Return Stack Buffer (PBRSB) protection status:
|
||||
|
||||
=========================== =======================================================
|
||||
'PBRSB-eIBRS: SW sequence' CPU is affected and protection of RSB on VMEXIT enabled
|
||||
'PBRSB-eIBRS: Vulnerable' CPU is vulnerable
|
||||
'PBRSB-eIBRS: Not affected' CPU is not affected by PBRSB
|
||||
=========================== =======================================================
|
||||
|
||||
Full mitigation might require a microcode update from the CPU
|
||||
vendor. When the necessary microcode is not available, the kernel will
|
||||
report vulnerability.
|
||||
|
||||
@@ -23,6 +23,7 @@ properties:
|
||||
- brcm,bcm4345c5
|
||||
- brcm,bcm43540-bt
|
||||
- brcm,bcm4335a0
|
||||
- brcm,bcm4349-bt
|
||||
|
||||
shutdown-gpios:
|
||||
maxItems: 1
|
||||
|
||||
2
Makefile
2
Makefile
@@ -1,7 +1,7 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
VERSION = 5
|
||||
PATCHLEVEL = 15
|
||||
SUBLEVEL = 59
|
||||
SUBLEVEL = 60
|
||||
EXTRAVERSION =
|
||||
NAME = Trick or Treat
|
||||
|
||||
|
||||
@@ -52,7 +52,7 @@ static void neon_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
|
||||
{
|
||||
if (unlikely(!dctx->sset)) {
|
||||
if (!dctx->rset) {
|
||||
poly1305_init_arch(dctx, src);
|
||||
poly1305_init_arm64(&dctx->h, src);
|
||||
src += POLY1305_BLOCK_SIZE;
|
||||
len -= POLY1305_BLOCK_SIZE;
|
||||
dctx->rset = 1;
|
||||
|
||||
@@ -103,8 +103,8 @@
|
||||
/*
|
||||
* Initial memory map attributes.
|
||||
*/
|
||||
#define SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
|
||||
#define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
|
||||
#define SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED | PTE_UXN)
|
||||
#define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S | PMD_SECT_UXN)
|
||||
|
||||
#if ARM64_KERNEL_USES_PMD_MAPS
|
||||
#define SWAPPER_MM_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS)
|
||||
|
||||
@@ -285,7 +285,7 @@ SYM_FUNC_START_LOCAL(__create_page_tables)
|
||||
subs x1, x1, #64
|
||||
b.ne 1b
|
||||
|
||||
mov x7, SWAPPER_MM_MMUFLAGS
|
||||
mov_q x7, SWAPPER_MM_MMUFLAGS
|
||||
|
||||
/*
|
||||
* Create the identity mapping.
|
||||
|
||||
@@ -2427,7 +2427,7 @@ config RETPOLINE
|
||||
config RETHUNK
|
||||
bool "Enable return-thunks"
|
||||
depends on RETPOLINE && CC_HAS_RETURN_THUNK
|
||||
default y
|
||||
default y if X86_64
|
||||
help
|
||||
Compile the kernel with the return-thunks compiler option to guard
|
||||
against kernel-to-user data leaks by avoiding return speculation.
|
||||
@@ -2436,21 +2436,21 @@ config RETHUNK
|
||||
|
||||
config CPU_UNRET_ENTRY
|
||||
bool "Enable UNRET on kernel entry"
|
||||
depends on CPU_SUP_AMD && RETHUNK
|
||||
depends on CPU_SUP_AMD && RETHUNK && X86_64
|
||||
default y
|
||||
help
|
||||
Compile the kernel with support for the retbleed=unret mitigation.
|
||||
|
||||
config CPU_IBPB_ENTRY
|
||||
bool "Enable IBPB on kernel entry"
|
||||
depends on CPU_SUP_AMD
|
||||
depends on CPU_SUP_AMD && X86_64
|
||||
default y
|
||||
help
|
||||
Compile the kernel with support for the retbleed=ibpb mitigation.
|
||||
|
||||
config CPU_IBRS_ENTRY
|
||||
bool "Enable IBRS on kernel entry"
|
||||
depends on CPU_SUP_INTEL
|
||||
depends on CPU_SUP_INTEL && X86_64
|
||||
default y
|
||||
help
|
||||
Compile the kernel with support for the spectre_v2=ibrs mitigation.
|
||||
|
||||
@@ -301,6 +301,7 @@
|
||||
#define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */
|
||||
#define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */
|
||||
#define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */
|
||||
#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */
|
||||
|
||||
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
|
||||
#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
|
||||
@@ -446,5 +447,6 @@
|
||||
#define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */
|
||||
#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */
|
||||
#define X86_BUG_RETBLEED X86_BUG(26) /* CPU is affected by RETBleed */
|
||||
#define X86_BUG_EIBRS_PBRSB X86_BUG(27) /* EIBRS is vulnerable to Post Barrier RSB Predictions */
|
||||
|
||||
#endif /* _ASM_X86_CPUFEATURES_H */
|
||||
|
||||
@@ -642,6 +642,7 @@ struct kvm_vcpu_arch {
|
||||
u64 ia32_misc_enable_msr;
|
||||
u64 smbase;
|
||||
u64 smi_count;
|
||||
bool at_instruction_boundary;
|
||||
bool tpr_access_reporting;
|
||||
bool xsaves_enabled;
|
||||
u64 ia32_xss;
|
||||
@@ -1271,6 +1272,8 @@ struct kvm_vcpu_stat {
|
||||
u64 nested_run;
|
||||
u64 directed_yield_attempted;
|
||||
u64 directed_yield_successful;
|
||||
u64 preemption_reported;
|
||||
u64 preemption_other;
|
||||
u64 guest_mode;
|
||||
};
|
||||
|
||||
|
||||
@@ -148,6 +148,10 @@
|
||||
* are restricted to targets in
|
||||
* kernel.
|
||||
*/
|
||||
#define ARCH_CAP_PBRSB_NO BIT(24) /*
|
||||
* Not susceptible to Post-Barrier
|
||||
* Return Stack Buffer Predictions.
|
||||
*/
|
||||
|
||||
#define MSR_IA32_FLUSH_CMD 0x0000010b
|
||||
#define L1D_FLUSH BIT(0) /*
|
||||
|
||||
@@ -60,7 +60,9 @@
|
||||
774: \
|
||||
add $(BITS_PER_LONG/8) * 2, sp; \
|
||||
dec reg; \
|
||||
jnz 771b;
|
||||
jnz 771b; \
|
||||
/* barrier for jnz misprediction */ \
|
||||
lfence;
|
||||
|
||||
#ifdef __ASSEMBLY__
|
||||
|
||||
@@ -118,13 +120,28 @@
|
||||
#endif
|
||||
.endm
|
||||
|
||||
.macro ISSUE_UNBALANCED_RET_GUARD
|
||||
ANNOTATE_INTRA_FUNCTION_CALL
|
||||
call .Lunbalanced_ret_guard_\@
|
||||
int3
|
||||
.Lunbalanced_ret_guard_\@:
|
||||
add $(BITS_PER_LONG/8), %_ASM_SP
|
||||
lfence
|
||||
.endm
|
||||
|
||||
/*
|
||||
* A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
|
||||
* monstrosity above, manually.
|
||||
*/
|
||||
.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
|
||||
.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2
|
||||
.ifb \ftr2
|
||||
ALTERNATIVE "jmp .Lskip_rsb_\@", "", \ftr
|
||||
.else
|
||||
ALTERNATIVE_2 "jmp .Lskip_rsb_\@", "", \ftr, "jmp .Lunbalanced_\@", \ftr2
|
||||
.endif
|
||||
__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)
|
||||
.Lunbalanced_\@:
|
||||
ISSUE_UNBALANCED_RET_GUARD
|
||||
.Lskip_rsb_\@:
|
||||
.endm
|
||||
|
||||
|
||||
@@ -1328,6 +1328,53 @@ static void __init spec_ctrl_disable_kernel_rrsba(void)
|
||||
}
|
||||
}
|
||||
|
||||
static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_mitigation mode)
|
||||
{
|
||||
/*
|
||||
* Similar to context switches, there are two types of RSB attacks
|
||||
* after VM exit:
|
||||
*
|
||||
* 1) RSB underflow
|
||||
*
|
||||
* 2) Poisoned RSB entry
|
||||
*
|
||||
* When retpoline is enabled, both are mitigated by filling/clearing
|
||||
* the RSB.
|
||||
*
|
||||
* When IBRS is enabled, while #1 would be mitigated by the IBRS branch
|
||||
* prediction isolation protections, RSB still needs to be cleared
|
||||
* because of #2. Note that SMEP provides no protection here, unlike
|
||||
* user-space-poisoned RSB entries.
|
||||
*
|
||||
* eIBRS should protect against RSB poisoning, but if the EIBRS_PBRSB
|
||||
* bug is present then a LITE version of RSB protection is required,
|
||||
* just a single call needs to retire before a RET is executed.
|
||||
*/
|
||||
switch (mode) {
|
||||
case SPECTRE_V2_NONE:
|
||||
return;
|
||||
|
||||
case SPECTRE_V2_EIBRS_LFENCE:
|
||||
case SPECTRE_V2_EIBRS:
|
||||
if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) {
|
||||
setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT_LITE);
|
||||
pr_info("Spectre v2 / PBRSB-eIBRS: Retire a single CALL on VMEXIT\n");
|
||||
}
|
||||
return;
|
||||
|
||||
case SPECTRE_V2_EIBRS_RETPOLINE:
|
||||
case SPECTRE_V2_RETPOLINE:
|
||||
case SPECTRE_V2_LFENCE:
|
||||
case SPECTRE_V2_IBRS:
|
||||
setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT);
|
||||
pr_info("Spectre v2 / SpectreRSB : Filling RSB on VMEXIT\n");
|
||||
return;
|
||||
}
|
||||
|
||||
pr_warn_once("Unknown Spectre v2 mode, disabling RSB mitigation at VM exit");
|
||||
dump_stack();
|
||||
}
|
||||
|
||||
static void __init spectre_v2_select_mitigation(void)
|
||||
{
|
||||
enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
|
||||
@@ -1478,28 +1525,7 @@ static void __init spectre_v2_select_mitigation(void)
|
||||
setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
|
||||
pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n");
|
||||
|
||||
/*
|
||||
* Similar to context switches, there are two types of RSB attacks
|
||||
* after vmexit:
|
||||
*
|
||||
* 1) RSB underflow
|
||||
*
|
||||
* 2) Poisoned RSB entry
|
||||
*
|
||||
* When retpoline is enabled, both are mitigated by filling/clearing
|
||||
* the RSB.
|
||||
*
|
||||
* When IBRS is enabled, while #1 would be mitigated by the IBRS branch
|
||||
* prediction isolation protections, RSB still needs to be cleared
|
||||
* because of #2. Note that SMEP provides no protection here, unlike
|
||||
* user-space-poisoned RSB entries.
|
||||
*
|
||||
* eIBRS, on the other hand, has RSB-poisoning protections, so it
|
||||
* doesn't need RSB clearing after vmexit.
|
||||
*/
|
||||
if (boot_cpu_has(X86_FEATURE_RETPOLINE) ||
|
||||
boot_cpu_has(X86_FEATURE_KERNEL_IBRS))
|
||||
setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT);
|
||||
spectre_v2_determine_rsb_fill_type_at_vmexit(mode);
|
||||
|
||||
/*
|
||||
* Retpoline protects the kernel, but doesn't protect firmware. IBRS
|
||||
@@ -2285,6 +2311,19 @@ static char *ibpb_state(void)
|
||||
return "";
|
||||
}
|
||||
|
||||
static char *pbrsb_eibrs_state(void)
|
||||
{
|
||||
if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) {
|
||||
if (boot_cpu_has(X86_FEATURE_RSB_VMEXIT_LITE) ||
|
||||
boot_cpu_has(X86_FEATURE_RSB_VMEXIT))
|
||||
return ", PBRSB-eIBRS: SW sequence";
|
||||
else
|
||||
return ", PBRSB-eIBRS: Vulnerable";
|
||||
} else {
|
||||
return ", PBRSB-eIBRS: Not affected";
|
||||
}
|
||||
}
|
||||
|
||||
static ssize_t spectre_v2_show_state(char *buf)
|
||||
{
|
||||
if (spectre_v2_enabled == SPECTRE_V2_LFENCE)
|
||||
@@ -2297,12 +2336,13 @@ static ssize_t spectre_v2_show_state(char *buf)
|
||||
spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE)
|
||||
return sprintf(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n");
|
||||
|
||||
return sprintf(buf, "%s%s%s%s%s%s\n",
|
||||
return sprintf(buf, "%s%s%s%s%s%s%s\n",
|
||||
spectre_v2_strings[spectre_v2_enabled],
|
||||
ibpb_state(),
|
||||
boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
|
||||
stibp_state(),
|
||||
boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "",
|
||||
pbrsb_eibrs_state(),
|
||||
spectre_v2_module_string());
|
||||
}
|
||||
|
||||
|
||||
@@ -1027,6 +1027,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
|
||||
#define NO_SWAPGS BIT(6)
|
||||
#define NO_ITLB_MULTIHIT BIT(7)
|
||||
#define NO_SPECTRE_V2 BIT(8)
|
||||
#define NO_EIBRS_PBRSB BIT(9)
|
||||
|
||||
#define VULNWL(vendor, family, model, whitelist) \
|
||||
X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, whitelist)
|
||||
@@ -1067,7 +1068,7 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
|
||||
|
||||
VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
|
||||
VULNWL_INTEL(ATOM_GOLDMONT_D, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
|
||||
VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
|
||||
VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB),
|
||||
|
||||
/*
|
||||
* Technically, swapgs isn't serializing on AMD (despite it previously
|
||||
@@ -1077,7 +1078,9 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
|
||||
* good enough for our purposes.
|
||||
*/
|
||||
|
||||
VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT),
|
||||
VULNWL_INTEL(ATOM_TREMONT, NO_EIBRS_PBRSB),
|
||||
VULNWL_INTEL(ATOM_TREMONT_L, NO_EIBRS_PBRSB),
|
||||
VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB),
|
||||
|
||||
/* AMD Family 0xf - 0x12 */
|
||||
VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
|
||||
@@ -1255,6 +1258,11 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
|
||||
setup_force_cpu_bug(X86_BUG_RETBLEED);
|
||||
}
|
||||
|
||||
if (cpu_has(c, X86_FEATURE_IBRS_ENHANCED) &&
|
||||
!cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) &&
|
||||
!(ia32_cap & ARCH_CAP_PBRSB_NO))
|
||||
setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB);
|
||||
|
||||
if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
|
||||
return;
|
||||
|
||||
|
||||
@@ -832,7 +832,7 @@ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
|
||||
|
||||
/* If source buffer is not aligned then use an intermediate buffer */
|
||||
if (!IS_ALIGNED((unsigned long)vaddr, 16)) {
|
||||
src_tpage = alloc_page(GFP_KERNEL);
|
||||
src_tpage = alloc_page(GFP_KERNEL_ACCOUNT);
|
||||
if (!src_tpage)
|
||||
return -ENOMEM;
|
||||
|
||||
@@ -853,7 +853,7 @@ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
|
||||
if (!IS_ALIGNED((unsigned long)dst_vaddr, 16) || !IS_ALIGNED(size, 16)) {
|
||||
int dst_offset;
|
||||
|
||||
dst_tpage = alloc_page(GFP_KERNEL);
|
||||
dst_tpage = alloc_page(GFP_KERNEL_ACCOUNT);
|
||||
if (!dst_tpage) {
|
||||
ret = -ENOMEM;
|
||||
goto e_free;
|
||||
|
||||
@@ -4263,6 +4263,8 @@ out:
|
||||
|
||||
static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_INTR)
|
||||
vcpu->arch.at_instruction_boundary = true;
|
||||
}
|
||||
|
||||
static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
|
||||
|
||||
@@ -197,11 +197,13 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL)
|
||||
* entries and (in some cases) RSB underflow.
|
||||
*
|
||||
* eIBRS has its own protection against poisoned RSB, so it doesn't
|
||||
* need the RSB filling sequence. But it does need to be enabled
|
||||
* before the first unbalanced RET.
|
||||
* need the RSB filling sequence. But it does need to be enabled, and a
|
||||
* single call to retire, before the first unbalanced RET.
|
||||
*/
|
||||
|
||||
FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT
|
||||
FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT,\
|
||||
X86_FEATURE_RSB_VMEXIT_LITE
|
||||
|
||||
|
||||
pop %_ASM_ARG2 /* @flags */
|
||||
pop %_ASM_ARG1 /* @vmx */
|
||||
|
||||
@@ -6471,6 +6471,7 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
|
||||
return;
|
||||
|
||||
handle_interrupt_nmi_irqoff(vcpu, gate_offset(desc));
|
||||
vcpu->arch.at_instruction_boundary = true;
|
||||
}
|
||||
|
||||
static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
|
||||
|
||||
@@ -277,6 +277,8 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
|
||||
STATS_DESC_COUNTER(VCPU, nested_run),
|
||||
STATS_DESC_COUNTER(VCPU, directed_yield_attempted),
|
||||
STATS_DESC_COUNTER(VCPU, directed_yield_successful),
|
||||
STATS_DESC_COUNTER(VCPU, preemption_reported),
|
||||
STATS_DESC_COUNTER(VCPU, preemption_other),
|
||||
STATS_DESC_ICOUNTER(VCPU, guest_mode)
|
||||
};
|
||||
|
||||
@@ -4371,6 +4373,19 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
|
||||
struct kvm_memslots *slots;
|
||||
static const u8 preempted = KVM_VCPU_PREEMPTED;
|
||||
|
||||
/*
|
||||
* The vCPU can be marked preempted if and only if the VM-Exit was on
|
||||
* an instruction boundary and will not trigger guest emulation of any
|
||||
* kind (see vcpu_run). Vendor specific code controls (conservatively)
|
||||
* when this is true, for example allowing the vCPU to be marked
|
||||
* preempted if and only if the VM-Exit was due to a host interrupt.
|
||||
*/
|
||||
if (!vcpu->arch.at_instruction_boundary) {
|
||||
vcpu->stat.preemption_other++;
|
||||
return;
|
||||
}
|
||||
|
||||
vcpu->stat.preemption_reported++;
|
||||
if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
|
||||
return;
|
||||
|
||||
@@ -4400,19 +4415,21 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int idx;
|
||||
|
||||
if (vcpu->preempted && !vcpu->arch.guest_state_protected)
|
||||
vcpu->arch.preempted_in_kernel = !static_call(kvm_x86_get_cpl)(vcpu);
|
||||
if (vcpu->preempted) {
|
||||
if (!vcpu->arch.guest_state_protected)
|
||||
vcpu->arch.preempted_in_kernel = !static_call(kvm_x86_get_cpl)(vcpu);
|
||||
|
||||
/*
|
||||
* Take the srcu lock as memslots will be accessed to check the gfn
|
||||
* cache generation against the memslots generation.
|
||||
*/
|
||||
idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||
if (kvm_xen_msr_enabled(vcpu->kvm))
|
||||
kvm_xen_runstate_set_preempted(vcpu);
|
||||
else
|
||||
kvm_steal_time_set_preempted(vcpu);
|
||||
srcu_read_unlock(&vcpu->kvm->srcu, idx);
|
||||
/*
|
||||
* Take the srcu lock as memslots will be accessed to check the gfn
|
||||
* cache generation against the memslots generation.
|
||||
*/
|
||||
idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||
if (kvm_xen_msr_enabled(vcpu->kvm))
|
||||
kvm_xen_runstate_set_preempted(vcpu);
|
||||
else
|
||||
kvm_steal_time_set_preempted(vcpu);
|
||||
srcu_read_unlock(&vcpu->kvm->srcu, idx);
|
||||
}
|
||||
|
||||
static_call(kvm_x86_vcpu_put)(vcpu);
|
||||
vcpu->arch.last_host_tsc = rdtsc();
|
||||
@@ -9934,6 +9951,13 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
|
||||
vcpu->arch.l1tf_flush_l1d = true;
|
||||
|
||||
for (;;) {
|
||||
/*
|
||||
* If another guest vCPU requests a PV TLB flush in the middle
|
||||
* of instruction emulation, the rest of the emulation could
|
||||
* use a stale page translation. Assume that any code after
|
||||
* this point can start executing an instruction.
|
||||
*/
|
||||
vcpu->arch.at_instruction_boundary = false;
|
||||
if (kvm_vcpu_running(vcpu)) {
|
||||
r = vcpu_enter_guest(vcpu);
|
||||
} else {
|
||||
|
||||
@@ -97,8 +97,10 @@ static inline void kvm_xen_runstate_set_preempted(struct kvm_vcpu *vcpu)
|
||||
* behalf of the vCPU. Only if the VMM does actually block
|
||||
* does it need to enter RUNSTATE_blocked.
|
||||
*/
|
||||
if (vcpu->preempted)
|
||||
kvm_xen_update_runstate_guest(vcpu, RUNSTATE_runnable);
|
||||
if (WARN_ON_ONCE(!vcpu->preempted))
|
||||
return;
|
||||
|
||||
kvm_xen_update_runstate_guest(vcpu, RUNSTATE_runnable);
|
||||
}
|
||||
|
||||
/* 32-bit compatibility definitions, also used natively in 32-bit build */
|
||||
|
||||
@@ -265,6 +265,7 @@ int create_task_io_context(struct task_struct *task, gfp_t gfp_flags, int node)
|
||||
INIT_RADIX_TREE(&ioc->icq_tree, GFP_ATOMIC);
|
||||
INIT_HLIST_HEAD(&ioc->icq_list);
|
||||
INIT_WORK(&ioc->release_work, ioc_release_fn);
|
||||
ioc->ioprio = IOPRIO_DEFAULT;
|
||||
|
||||
/*
|
||||
* Try to install. ioc shouldn't be installed if someone else
|
||||
|
||||
@@ -189,9 +189,9 @@ out:
|
||||
int ioprio_best(unsigned short aprio, unsigned short bprio)
|
||||
{
|
||||
if (!ioprio_valid(aprio))
|
||||
aprio = IOPRIO_DEFAULT;
|
||||
aprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_BE_NORM);
|
||||
if (!ioprio_valid(bprio))
|
||||
bprio = IOPRIO_DEFAULT;
|
||||
bprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_BE_NORM);
|
||||
|
||||
return min(aprio, bprio);
|
||||
}
|
||||
|
||||
@@ -29,16 +29,26 @@
|
||||
|
||||
#undef pr_fmt
|
||||
#define pr_fmt(fmt) "BERT: " fmt
|
||||
|
||||
#define ACPI_BERT_PRINT_MAX_RECORDS 5
|
||||
#define ACPI_BERT_PRINT_MAX_LEN 1024
|
||||
|
||||
static int bert_disable;
|
||||
|
||||
/*
|
||||
* Print "all" the error records in the BERT table, but avoid huge spam to
|
||||
* the console if the BIOS included oversize records, or too many records.
|
||||
* Skipping some records here does not lose anything because the full
|
||||
* data is available to user tools in:
|
||||
* /sys/firmware/acpi/tables/data/BERT
|
||||
*/
|
||||
static void __init bert_print_all(struct acpi_bert_region *region,
|
||||
unsigned int region_len)
|
||||
{
|
||||
struct acpi_hest_generic_status *estatus =
|
||||
(struct acpi_hest_generic_status *)region;
|
||||
int remain = region_len;
|
||||
int printed = 0, skipped = 0;
|
||||
u32 estatus_len;
|
||||
|
||||
while (remain >= sizeof(struct acpi_bert_region)) {
|
||||
@@ -46,24 +56,26 @@ static void __init bert_print_all(struct acpi_bert_region *region,
|
||||
if (remain < estatus_len) {
|
||||
pr_err(FW_BUG "Truncated status block (length: %u).\n",
|
||||
estatus_len);
|
||||
return;
|
||||
break;
|
||||
}
|
||||
|
||||
/* No more error records. */
|
||||
if (!estatus->block_status)
|
||||
return;
|
||||
break;
|
||||
|
||||
if (cper_estatus_check(estatus)) {
|
||||
pr_err(FW_BUG "Invalid error record.\n");
|
||||
return;
|
||||
break;
|
||||
}
|
||||
|
||||
pr_info_once("Error records from previous boot:\n");
|
||||
if (region_len < ACPI_BERT_PRINT_MAX_LEN)
|
||||
if (estatus_len < ACPI_BERT_PRINT_MAX_LEN &&
|
||||
printed < ACPI_BERT_PRINT_MAX_RECORDS) {
|
||||
pr_info_once("Error records from previous boot:\n");
|
||||
cper_estatus_print(KERN_INFO HW_ERR, estatus);
|
||||
else
|
||||
pr_info_once("Max print length exceeded, table data is available at:\n"
|
||||
"/sys/firmware/acpi/tables/data/BERT");
|
||||
printed++;
|
||||
} else {
|
||||
skipped++;
|
||||
}
|
||||
|
||||
/*
|
||||
* Because the boot error source is "one-time polled" type,
|
||||
@@ -75,6 +87,9 @@ static void __init bert_print_all(struct acpi_bert_region *region,
|
||||
estatus = (void *)estatus + estatus_len;
|
||||
remain -= estatus_len;
|
||||
}
|
||||
|
||||
if (skipped)
|
||||
pr_info(HW_ERR "Skipped %d error records\n", skipped);
|
||||
}
|
||||
|
||||
static int __init setup_bert_disable(char *str)
|
||||
|
||||
@@ -424,23 +424,6 @@ static const struct dmi_system_id video_detect_dmi_table[] = {
|
||||
.callback = video_detect_force_native,
|
||||
.ident = "Clevo NL5xRU",
|
||||
.matches = {
|
||||
DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"),
|
||||
DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"),
|
||||
},
|
||||
},
|
||||
{
|
||||
.callback = video_detect_force_native,
|
||||
.ident = "Clevo NL5xRU",
|
||||
.matches = {
|
||||
DMI_MATCH(DMI_SYS_VENDOR, "SchenkerTechnologiesGmbH"),
|
||||
DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"),
|
||||
},
|
||||
},
|
||||
{
|
||||
.callback = video_detect_force_native,
|
||||
.ident = "Clevo NL5xRU",
|
||||
.matches = {
|
||||
DMI_MATCH(DMI_SYS_VENDOR, "Notebook"),
|
||||
DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"),
|
||||
},
|
||||
},
|
||||
@@ -463,28 +446,60 @@ static const struct dmi_system_id video_detect_dmi_table[] = {
|
||||
{
|
||||
.callback = video_detect_force_native,
|
||||
.ident = "Clevo NL5xNU",
|
||||
.matches = {
|
||||
DMI_MATCH(DMI_BOARD_NAME, "NL5xNU"),
|
||||
},
|
||||
},
|
||||
/*
|
||||
* The TongFang PF5PU1G, PF4NU1F, PF5NU1G, and PF5LUXG/TUXEDO BA15 Gen10,
|
||||
* Pulse 14/15 Gen1, and Pulse 15 Gen2 have the same problem as the Clevo
|
||||
* NL5xRU and NL5xNU/TUXEDO Aura 15 Gen1 and Gen2. See the description
|
||||
* above.
|
||||
*/
|
||||
{
|
||||
.callback = video_detect_force_native,
|
||||
.ident = "TongFang PF5PU1G",
|
||||
.matches = {
|
||||
DMI_MATCH(DMI_BOARD_NAME, "PF5PU1G"),
|
||||
},
|
||||
},
|
||||
{
|
||||
.callback = video_detect_force_native,
|
||||
.ident = "TongFang PF4NU1F",
|
||||
.matches = {
|
||||
DMI_MATCH(DMI_BOARD_NAME, "PF4NU1F"),
|
||||
},
|
||||
},
|
||||
{
|
||||
.callback = video_detect_force_native,
|
||||
.ident = "TongFang PF4NU1F",
|
||||
.matches = {
|
||||
DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"),
|
||||
DMI_MATCH(DMI_BOARD_NAME, "NL5xNU"),
|
||||
DMI_MATCH(DMI_BOARD_NAME, "PULSE1401"),
|
||||
},
|
||||
},
|
||||
{
|
||||
.callback = video_detect_force_native,
|
||||
.ident = "Clevo NL5xNU",
|
||||
.ident = "TongFang PF5NU1G",
|
||||
.matches = {
|
||||
DMI_MATCH(DMI_SYS_VENDOR, "SchenkerTechnologiesGmbH"),
|
||||
DMI_MATCH(DMI_BOARD_NAME, "NL5xNU"),
|
||||
DMI_MATCH(DMI_BOARD_NAME, "PF5NU1G"),
|
||||
},
|
||||
},
|
||||
{
|
||||
.callback = video_detect_force_native,
|
||||
.ident = "Clevo NL5xNU",
|
||||
.ident = "TongFang PF5NU1G",
|
||||
.matches = {
|
||||
DMI_MATCH(DMI_SYS_VENDOR, "Notebook"),
|
||||
DMI_MATCH(DMI_BOARD_NAME, "NL5xNU"),
|
||||
DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"),
|
||||
DMI_MATCH(DMI_BOARD_NAME, "PULSE1501"),
|
||||
},
|
||||
},
|
||||
{
|
||||
.callback = video_detect_force_native,
|
||||
.ident = "TongFang PF5LUXG",
|
||||
.matches = {
|
||||
DMI_MATCH(DMI_BOARD_NAME, "PF5LUXG"),
|
||||
},
|
||||
},
|
||||
|
||||
/*
|
||||
* Desktops which falsely report a backlight and which our heuristics
|
||||
* for this do not catch.
|
||||
|
||||
@@ -453,6 +453,8 @@ static const struct bcm_subver_table bcm_uart_subver_table[] = {
|
||||
{ 0x6606, "BCM4345C5" }, /* 003.006.006 */
|
||||
{ 0x230f, "BCM4356A2" }, /* 001.003.015 */
|
||||
{ 0x220e, "BCM20702A1" }, /* 001.002.014 */
|
||||
{ 0x420d, "BCM4349B1" }, /* 002.002.013 */
|
||||
{ 0x420e, "BCM4349B1" }, /* 002.002.014 */
|
||||
{ 0x4217, "BCM4329B1" }, /* 002.002.023 */
|
||||
{ 0x6106, "BCM4359C0" }, /* 003.001.006 */
|
||||
{ 0x4106, "BCM4335A0" }, /* 002.001.006 */
|
||||
|
||||
@@ -420,6 +420,18 @@ static const struct usb_device_id blacklist_table[] = {
|
||||
{ USB_DEVICE(0x04ca, 0x4006), .driver_info = BTUSB_REALTEK |
|
||||
BTUSB_WIDEBAND_SPEECH },
|
||||
|
||||
/* Realtek 8852CE Bluetooth devices */
|
||||
{ USB_DEVICE(0x04ca, 0x4007), .driver_info = BTUSB_REALTEK |
|
||||
BTUSB_WIDEBAND_SPEECH },
|
||||
{ USB_DEVICE(0x04c5, 0x1675), .driver_info = BTUSB_REALTEK |
|
||||
BTUSB_WIDEBAND_SPEECH },
|
||||
{ USB_DEVICE(0x0cb8, 0xc558), .driver_info = BTUSB_REALTEK |
|
||||
BTUSB_WIDEBAND_SPEECH },
|
||||
{ USB_DEVICE(0x13d3, 0x3587), .driver_info = BTUSB_REALTEK |
|
||||
BTUSB_WIDEBAND_SPEECH },
|
||||
{ USB_DEVICE(0x13d3, 0x3586), .driver_info = BTUSB_REALTEK |
|
||||
BTUSB_WIDEBAND_SPEECH },
|
||||
|
||||
/* Realtek Bluetooth devices */
|
||||
{ USB_VENDOR_AND_INTERFACE_INFO(0x0bda, 0xe0, 0x01, 0x01),
|
||||
.driver_info = BTUSB_REALTEK },
|
||||
@@ -459,6 +471,9 @@ static const struct usb_device_id blacklist_table[] = {
|
||||
{ USB_DEVICE(0x0489, 0xe0d9), .driver_info = BTUSB_MEDIATEK |
|
||||
BTUSB_WIDEBAND_SPEECH |
|
||||
BTUSB_VALID_LE_STATES },
|
||||
{ USB_DEVICE(0x13d3, 0x3568), .driver_info = BTUSB_MEDIATEK |
|
||||
BTUSB_WIDEBAND_SPEECH |
|
||||
BTUSB_VALID_LE_STATES },
|
||||
|
||||
/* Additional Realtek 8723AE Bluetooth devices */
|
||||
{ USB_DEVICE(0x0930, 0x021d), .driver_info = BTUSB_REALTEK },
|
||||
|
||||
@@ -1515,8 +1515,10 @@ static const struct of_device_id bcm_bluetooth_of_match[] = {
|
||||
{ .compatible = "brcm,bcm4345c5" },
|
||||
{ .compatible = "brcm,bcm4330-bt" },
|
||||
{ .compatible = "brcm,bcm43438-bt", .data = &bcm43438_device_data },
|
||||
{ .compatible = "brcm,bcm4349-bt", .data = &bcm43438_device_data },
|
||||
{ .compatible = "brcm,bcm43540-bt", .data = &bcm4354_device_data },
|
||||
{ .compatible = "brcm,bcm4335a0" },
|
||||
{ .compatible = "infineon,cyw55572-bt" },
|
||||
{ },
|
||||
};
|
||||
MODULE_DEVICE_TABLE(of, bcm_bluetooth_of_match);
|
||||
|
||||
@@ -647,7 +647,7 @@ do_adb_query(struct adb_request *req)
|
||||
|
||||
switch(req->data[1]) {
|
||||
case ADB_QUERY_GETDEVINFO:
|
||||
if (req->nbytes < 3)
|
||||
if (req->nbytes < 3 || req->data[2] >= 16)
|
||||
break;
|
||||
mutex_lock(&adb_handler_mutex);
|
||||
req->reply[0] = adb_handler[req->data[2]].original_address;
|
||||
|
||||
@@ -98,6 +98,7 @@ struct btrfs_block_group {
|
||||
unsigned int to_copy:1;
|
||||
unsigned int relocating_repair:1;
|
||||
unsigned int chunk_item_inserted:1;
|
||||
unsigned int zoned_data_reloc_ongoing:1;
|
||||
|
||||
int disk_cache_state;
|
||||
|
||||
|
||||
@@ -3804,7 +3804,7 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
|
||||
block_group->start == fs_info->data_reloc_bg ||
|
||||
fs_info->data_reloc_bg == 0);
|
||||
|
||||
if (block_group->ro) {
|
||||
if (block_group->ro || block_group->zoned_data_reloc_ongoing) {
|
||||
ret = 1;
|
||||
goto out;
|
||||
}
|
||||
@@ -3865,8 +3865,24 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
|
||||
out:
|
||||
if (ret && ffe_ctl->for_treelog)
|
||||
fs_info->treelog_bg = 0;
|
||||
if (ret && ffe_ctl->for_data_reloc)
|
||||
if (ret && ffe_ctl->for_data_reloc &&
|
||||
fs_info->data_reloc_bg == block_group->start) {
|
||||
/*
|
||||
* Do not allow further allocations from this block group.
|
||||
* Compared to increasing the ->ro, setting the
|
||||
* ->zoned_data_reloc_ongoing flag still allows nocow
|
||||
* writers to come in. See btrfs_inc_nocow_writers().
|
||||
*
|
||||
* We need to disable an allocation to avoid an allocation of
|
||||
* regular (non-relocation data) extent. With mix of relocation
|
||||
* extents and regular extents, we can dispatch WRITE commands
|
||||
* (for relocation extents) and ZONE APPEND commands (for
|
||||
* regular extents) at the same time to the same zone, which
|
||||
* easily break the write pointer.
|
||||
*/
|
||||
block_group->zoned_data_reloc_ongoing = 1;
|
||||
fs_info->data_reloc_bg = 0;
|
||||
}
|
||||
spin_unlock(&fs_info->relocation_bg_lock);
|
||||
spin_unlock(&fs_info->treelog_bg_lock);
|
||||
spin_unlock(&block_group->lock);
|
||||
|
||||
@@ -5152,13 +5152,14 @@ int extent_writepages(struct address_space *mapping,
|
||||
*/
|
||||
btrfs_zoned_data_reloc_lock(BTRFS_I(inode));
|
||||
ret = extent_write_cache_pages(mapping, wbc, &epd);
|
||||
btrfs_zoned_data_reloc_unlock(BTRFS_I(inode));
|
||||
ASSERT(ret <= 0);
|
||||
if (ret < 0) {
|
||||
btrfs_zoned_data_reloc_unlock(BTRFS_I(inode));
|
||||
end_write_bio(&epd, ret);
|
||||
return ret;
|
||||
}
|
||||
ret = flush_write_bio(&epd);
|
||||
btrfs_zoned_data_reloc_unlock(BTRFS_I(inode));
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
@@ -3069,6 +3069,8 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
|
||||
ordered_extent->file_offset,
|
||||
ordered_extent->file_offset +
|
||||
logical_len);
|
||||
btrfs_zoned_release_data_reloc_bg(fs_info, ordered_extent->disk_bytenr,
|
||||
ordered_extent->disk_num_bytes);
|
||||
} else {
|
||||
BUG_ON(root == fs_info->tree_root);
|
||||
ret = insert_ordered_extent_file_extent(trans, ordered_extent);
|
||||
|
||||
@@ -1623,3 +1623,30 @@ void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info)
|
||||
}
|
||||
mutex_unlock(&fs_devices->device_list_mutex);
|
||||
}
|
||||
|
||||
void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, u64 logical,
|
||||
u64 length)
|
||||
{
|
||||
struct btrfs_block_group *block_group;
|
||||
|
||||
if (!btrfs_is_zoned(fs_info))
|
||||
return;
|
||||
|
||||
block_group = btrfs_lookup_block_group(fs_info, logical);
|
||||
/* It should be called on a previous data relocation block group. */
|
||||
ASSERT(block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA));
|
||||
|
||||
spin_lock(&block_group->lock);
|
||||
if (!block_group->zoned_data_reloc_ongoing)
|
||||
goto out;
|
||||
|
||||
/* All relocation extents are written. */
|
||||
if (block_group->start + block_group->alloc_offset == logical + length) {
|
||||
/* Now, release this block group for further allocations. */
|
||||
block_group->zoned_data_reloc_ongoing = 0;
|
||||
}
|
||||
|
||||
out:
|
||||
spin_unlock(&block_group->lock);
|
||||
btrfs_put_block_group(block_group);
|
||||
}
|
||||
|
||||
@@ -70,6 +70,8 @@ struct btrfs_device *btrfs_zoned_get_device(struct btrfs_fs_info *fs_info,
|
||||
u64 logical, u64 length);
|
||||
void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg);
|
||||
void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info);
|
||||
void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, u64 logical,
|
||||
u64 length);
|
||||
#else /* CONFIG_BLK_DEV_ZONED */
|
||||
static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
|
||||
struct blk_zone *zone)
|
||||
@@ -207,6 +209,9 @@ static inline struct btrfs_device *btrfs_zoned_get_device(
|
||||
static inline void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg) { }
|
||||
|
||||
static inline void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info) { }
|
||||
|
||||
static inline void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info,
|
||||
u64 logical, u64 length) { }
|
||||
#endif
|
||||
|
||||
static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos)
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
/*
|
||||
* Default IO priority.
|
||||
*/
|
||||
#define IOPRIO_DEFAULT IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_BE_NORM)
|
||||
#define IOPRIO_DEFAULT IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0)
|
||||
|
||||
/*
|
||||
* Check that a priority value has a valid class.
|
||||
|
||||
@@ -300,6 +300,7 @@
|
||||
#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */
|
||||
#define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */
|
||||
#define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */
|
||||
#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM-Exit when EIBRS is enabled */
|
||||
|
||||
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
|
||||
#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
|
||||
|
||||
@@ -148,6 +148,10 @@
|
||||
* are restricted to targets in
|
||||
* kernel.
|
||||
*/
|
||||
#define ARCH_CAP_PBRSB_NO BIT(24) /*
|
||||
* Not susceptible to Post-Barrier
|
||||
* Return Stack Buffer Predictions.
|
||||
*/
|
||||
|
||||
#define MSR_IA32_FLUSH_CMD 0x0000010b
|
||||
#define L1D_FLUSH BIT(0) /*
|
||||
|
||||
@@ -5347,7 +5347,8 @@ struct bpf_sock {
|
||||
__u32 src_ip4;
|
||||
__u32 src_ip6[4];
|
||||
__u32 src_port; /* host byte order */
|
||||
__u32 dst_port; /* network byte order */
|
||||
__be16 dst_port; /* network byte order */
|
||||
__u16 :16; /* zero padding */
|
||||
__u32 dst_ip4;
|
||||
__u32 dst_ip6[4];
|
||||
__u32 state;
|
||||
|
||||
@@ -1646,7 +1646,8 @@ Press any other key to refresh statistics immediately.
|
||||
.format(values))
|
||||
if len(pids) > 1:
|
||||
sys.exit('Error: Multiple processes found (pids: {}). Use "-p"'
|
||||
' to specify the desired pid'.format(" ".join(pids)))
|
||||
' to specify the desired pid'
|
||||
.format(" ".join(map(str, pids))))
|
||||
namespace.pid = pids[0]
|
||||
|
||||
argparser = argparse.ArgumentParser(description=description_text,
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2019 Facebook */
|
||||
|
||||
#define _GNU_SOURCE
|
||||
#include <netinet/in.h>
|
||||
#include <arpa/inet.h>
|
||||
#include <unistd.h>
|
||||
#include <sched.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
@@ -21,6 +23,7 @@
|
||||
enum bpf_linum_array_idx {
|
||||
EGRESS_LINUM_IDX,
|
||||
INGRESS_LINUM_IDX,
|
||||
READ_SK_DST_PORT_LINUM_IDX,
|
||||
__NR_BPF_LINUM_ARRAY_IDX,
|
||||
};
|
||||
|
||||
@@ -43,8 +46,16 @@ static __u64 child_cg_id;
|
||||
static int linum_map_fd;
|
||||
static __u32 duration;
|
||||
|
||||
static __u32 egress_linum_idx = EGRESS_LINUM_IDX;
|
||||
static __u32 ingress_linum_idx = INGRESS_LINUM_IDX;
|
||||
static bool create_netns(void)
|
||||
{
|
||||
if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns"))
|
||||
return false;
|
||||
|
||||
if (!ASSERT_OK(system("ip link set dev lo up"), "bring up lo"))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void print_sk(const struct bpf_sock *sk, const char *prefix)
|
||||
{
|
||||
@@ -92,19 +103,24 @@ static void check_result(void)
|
||||
{
|
||||
struct bpf_tcp_sock srv_tp, cli_tp, listen_tp;
|
||||
struct bpf_sock srv_sk, cli_sk, listen_sk;
|
||||
__u32 ingress_linum, egress_linum;
|
||||
__u32 idx, ingress_linum, egress_linum, linum;
|
||||
int err;
|
||||
|
||||
err = bpf_map_lookup_elem(linum_map_fd, &egress_linum_idx,
|
||||
&egress_linum);
|
||||
idx = EGRESS_LINUM_IDX;
|
||||
err = bpf_map_lookup_elem(linum_map_fd, &idx, &egress_linum);
|
||||
CHECK(err < 0, "bpf_map_lookup_elem(linum_map_fd)",
|
||||
"err:%d errno:%d\n", err, errno);
|
||||
|
||||
err = bpf_map_lookup_elem(linum_map_fd, &ingress_linum_idx,
|
||||
&ingress_linum);
|
||||
idx = INGRESS_LINUM_IDX;
|
||||
err = bpf_map_lookup_elem(linum_map_fd, &idx, &ingress_linum);
|
||||
CHECK(err < 0, "bpf_map_lookup_elem(linum_map_fd)",
|
||||
"err:%d errno:%d\n", err, errno);
|
||||
|
||||
idx = READ_SK_DST_PORT_LINUM_IDX;
|
||||
err = bpf_map_lookup_elem(linum_map_fd, &idx, &linum);
|
||||
ASSERT_OK(err, "bpf_map_lookup_elem(linum_map_fd, READ_SK_DST_PORT_IDX)");
|
||||
ASSERT_EQ(linum, 0, "failure in read_sk_dst_port on line");
|
||||
|
||||
memcpy(&srv_sk, &skel->bss->srv_sk, sizeof(srv_sk));
|
||||
memcpy(&srv_tp, &skel->bss->srv_tp, sizeof(srv_tp));
|
||||
memcpy(&cli_sk, &skel->bss->cli_sk, sizeof(cli_sk));
|
||||
@@ -263,7 +279,7 @@ static void test(void)
|
||||
char buf[DATA_LEN];
|
||||
|
||||
/* Prepare listen_fd */
|
||||
listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
|
||||
listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0xcafe, 0);
|
||||
/* start_server() has logged the error details */
|
||||
if (CHECK_FAIL(listen_fd == -1))
|
||||
goto done;
|
||||
@@ -331,8 +347,12 @@ done:
|
||||
|
||||
void test_sock_fields(void)
|
||||
{
|
||||
struct bpf_link *egress_link = NULL, *ingress_link = NULL;
|
||||
int parent_cg_fd = -1, child_cg_fd = -1;
|
||||
struct bpf_link *link;
|
||||
|
||||
/* Use a dedicated netns to have a fixed listen port */
|
||||
if (!create_netns())
|
||||
return;
|
||||
|
||||
/* Create a cgroup, get fd, and join it */
|
||||
parent_cg_fd = test__join_cgroup(PARENT_CGROUP);
|
||||
@@ -353,15 +373,20 @@ void test_sock_fields(void)
|
||||
if (CHECK(!skel, "test_sock_fields__open_and_load", "failed\n"))
|
||||
goto done;
|
||||
|
||||
egress_link = bpf_program__attach_cgroup(skel->progs.egress_read_sock_fields,
|
||||
child_cg_fd);
|
||||
if (!ASSERT_OK_PTR(egress_link, "attach_cgroup(egress)"))
|
||||
link = bpf_program__attach_cgroup(skel->progs.egress_read_sock_fields, child_cg_fd);
|
||||
if (!ASSERT_OK_PTR(link, "attach_cgroup(egress_read_sock_fields)"))
|
||||
goto done;
|
||||
skel->links.egress_read_sock_fields = link;
|
||||
|
||||
ingress_link = bpf_program__attach_cgroup(skel->progs.ingress_read_sock_fields,
|
||||
child_cg_fd);
|
||||
if (!ASSERT_OK_PTR(ingress_link, "attach_cgroup(ingress)"))
|
||||
link = bpf_program__attach_cgroup(skel->progs.ingress_read_sock_fields, child_cg_fd);
|
||||
if (!ASSERT_OK_PTR(link, "attach_cgroup(ingress_read_sock_fields)"))
|
||||
goto done;
|
||||
skel->links.ingress_read_sock_fields = link;
|
||||
|
||||
link = bpf_program__attach_cgroup(skel->progs.read_sk_dst_port, child_cg_fd);
|
||||
if (!ASSERT_OK_PTR(link, "attach_cgroup(read_sk_dst_port"))
|
||||
goto done;
|
||||
skel->links.read_sk_dst_port = link;
|
||||
|
||||
linum_map_fd = bpf_map__fd(skel->maps.linum_map);
|
||||
sk_pkt_out_cnt_fd = bpf_map__fd(skel->maps.sk_pkt_out_cnt);
|
||||
@@ -370,8 +395,7 @@ void test_sock_fields(void)
|
||||
test();
|
||||
|
||||
done:
|
||||
bpf_link__destroy(egress_link);
|
||||
bpf_link__destroy(ingress_link);
|
||||
test_sock_fields__detach(skel);
|
||||
test_sock_fields__destroy(skel);
|
||||
if (child_cg_fd >= 0)
|
||||
close(child_cg_fd);
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
enum bpf_linum_array_idx {
|
||||
EGRESS_LINUM_IDX,
|
||||
INGRESS_LINUM_IDX,
|
||||
READ_SK_DST_PORT_LINUM_IDX,
|
||||
__NR_BPF_LINUM_ARRAY_IDX,
|
||||
};
|
||||
|
||||
@@ -250,4 +251,48 @@ int ingress_read_sock_fields(struct __sk_buff *skb)
|
||||
return CG_OK;
|
||||
}
|
||||
|
||||
static __noinline bool sk_dst_port__load_word(struct bpf_sock *sk)
|
||||
{
|
||||
__u32 *word = (__u32 *)&sk->dst_port;
|
||||
return word[0] == bpf_htonl(0xcafe0000);
|
||||
}
|
||||
|
||||
static __noinline bool sk_dst_port__load_half(struct bpf_sock *sk)
|
||||
{
|
||||
__u16 *half = (__u16 *)&sk->dst_port;
|
||||
return half[0] == bpf_htons(0xcafe);
|
||||
}
|
||||
|
||||
static __noinline bool sk_dst_port__load_byte(struct bpf_sock *sk)
|
||||
{
|
||||
__u8 *byte = (__u8 *)&sk->dst_port;
|
||||
return byte[0] == 0xca && byte[1] == 0xfe;
|
||||
}
|
||||
|
||||
SEC("cgroup_skb/egress")
|
||||
int read_sk_dst_port(struct __sk_buff *skb)
|
||||
{
|
||||
__u32 linum, linum_idx;
|
||||
struct bpf_sock *sk;
|
||||
|
||||
linum_idx = READ_SK_DST_PORT_LINUM_IDX;
|
||||
|
||||
sk = skb->sk;
|
||||
if (!sk)
|
||||
RET_LOG();
|
||||
|
||||
/* Ignore everything but the SYN from the client socket */
|
||||
if (sk->state != BPF_TCP_SYN_SENT)
|
||||
return CG_OK;
|
||||
|
||||
if (!sk_dst_port__load_word(sk))
|
||||
RET_LOG();
|
||||
if (!sk_dst_port__load_half(sk))
|
||||
RET_LOG();
|
||||
if (!sk_dst_port__load_byte(sk))
|
||||
RET_LOG();
|
||||
|
||||
return CG_OK;
|
||||
}
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
||||
|
||||
@@ -121,7 +121,25 @@
|
||||
.result = ACCEPT,
|
||||
},
|
||||
{
|
||||
"sk_fullsock(skb->sk): sk->dst_port [narrow load]",
|
||||
"sk_fullsock(skb->sk): sk->dst_port [word load] (backward compatibility)",
|
||||
.insns = {
|
||||
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
|
||||
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
|
||||
BPF_MOV64_IMM(BPF_REG_0, 0),
|
||||
BPF_EXIT_INSN(),
|
||||
BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
|
||||
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
|
||||
BPF_MOV64_IMM(BPF_REG_0, 0),
|
||||
BPF_EXIT_INSN(),
|
||||
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port)),
|
||||
BPF_MOV64_IMM(BPF_REG_0, 0),
|
||||
BPF_EXIT_INSN(),
|
||||
},
|
||||
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
|
||||
.result = ACCEPT,
|
||||
},
|
||||
{
|
||||
"sk_fullsock(skb->sk): sk->dst_port [half load]",
|
||||
.insns = {
|
||||
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
|
||||
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
|
||||
@@ -139,7 +157,7 @@
|
||||
.result = ACCEPT,
|
||||
},
|
||||
{
|
||||
"sk_fullsock(skb->sk): sk->dst_port [load 2nd byte]",
|
||||
"sk_fullsock(skb->sk): sk->dst_port [half load] (invalid)",
|
||||
.insns = {
|
||||
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
|
||||
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
|
||||
@@ -149,7 +167,64 @@
|
||||
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
|
||||
BPF_MOV64_IMM(BPF_REG_0, 0),
|
||||
BPF_EXIT_INSN(),
|
||||
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port) + 1),
|
||||
BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port) + 2),
|
||||
BPF_MOV64_IMM(BPF_REG_0, 0),
|
||||
BPF_EXIT_INSN(),
|
||||
},
|
||||
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
|
||||
.result = REJECT,
|
||||
.errstr = "invalid sock access",
|
||||
},
|
||||
{
|
||||
"sk_fullsock(skb->sk): sk->dst_port [byte load]",
|
||||
.insns = {
|
||||
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
|
||||
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
|
||||
BPF_MOV64_IMM(BPF_REG_0, 0),
|
||||
BPF_EXIT_INSN(),
|
||||
BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
|
||||
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
|
||||
BPF_MOV64_IMM(BPF_REG_0, 0),
|
||||
BPF_EXIT_INSN(),
|
||||
BPF_LDX_MEM(BPF_B, BPF_REG_2, BPF_REG_0, offsetof(struct bpf_sock, dst_port)),
|
||||
BPF_LDX_MEM(BPF_B, BPF_REG_2, BPF_REG_0, offsetof(struct bpf_sock, dst_port) + 1),
|
||||
BPF_MOV64_IMM(BPF_REG_0, 0),
|
||||
BPF_EXIT_INSN(),
|
||||
},
|
||||
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
|
||||
.result = ACCEPT,
|
||||
},
|
||||
{
|
||||
"sk_fullsock(skb->sk): sk->dst_port [byte load] (invalid)",
|
||||
.insns = {
|
||||
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
|
||||
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
|
||||
BPF_MOV64_IMM(BPF_REG_0, 0),
|
||||
BPF_EXIT_INSN(),
|
||||
BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
|
||||
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
|
||||
BPF_MOV64_IMM(BPF_REG_0, 0),
|
||||
BPF_EXIT_INSN(),
|
||||
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port) + 2),
|
||||
BPF_MOV64_IMM(BPF_REG_0, 0),
|
||||
BPF_EXIT_INSN(),
|
||||
},
|
||||
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
|
||||
.result = REJECT,
|
||||
.errstr = "invalid sock access",
|
||||
},
|
||||
{
|
||||
"sk_fullsock(skb->sk): past sk->dst_port [half load] (invalid)",
|
||||
.insns = {
|
||||
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
|
||||
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
|
||||
BPF_MOV64_IMM(BPF_REG_0, 0),
|
||||
BPF_EXIT_INSN(),
|
||||
BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
|
||||
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
|
||||
BPF_MOV64_IMM(BPF_REG_0, 0),
|
||||
BPF_EXIT_INSN(),
|
||||
BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_0, offsetofend(struct bpf_sock, dst_port)),
|
||||
BPF_MOV64_IMM(BPF_REG_0, 0),
|
||||
BPF_EXIT_INSN(),
|
||||
},
|
||||
|
||||
@@ -73,20 +73,19 @@ void ucall_uninit(struct kvm_vm *vm)
|
||||
|
||||
void ucall(uint64_t cmd, int nargs, ...)
|
||||
{
|
||||
struct ucall uc = {
|
||||
.cmd = cmd,
|
||||
};
|
||||
struct ucall uc = {};
|
||||
va_list va;
|
||||
int i;
|
||||
|
||||
WRITE_ONCE(uc.cmd, cmd);
|
||||
nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS;
|
||||
|
||||
va_start(va, nargs);
|
||||
for (i = 0; i < nargs; ++i)
|
||||
uc.args[i] = va_arg(va, uint64_t);
|
||||
WRITE_ONCE(uc.args[i], va_arg(va, uint64_t));
|
||||
va_end(va);
|
||||
|
||||
*ucall_exit_mmio_addr = (vm_vaddr_t)&uc;
|
||||
WRITE_ONCE(*ucall_exit_mmio_addr, (vm_vaddr_t)&uc);
|
||||
}
|
||||
|
||||
uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
|
||||
|
||||
@@ -44,7 +44,7 @@ static inline void nop_loop(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 1000000; i++)
|
||||
for (i = 0; i < 100000000; i++)
|
||||
asm volatile("nop");
|
||||
}
|
||||
|
||||
@@ -56,12 +56,14 @@ static inline void check_tsc_msr_rdtsc(void)
|
||||
tsc_freq = rdmsr(HV_X64_MSR_TSC_FREQUENCY);
|
||||
GUEST_ASSERT(tsc_freq > 0);
|
||||
|
||||
/* First, check MSR-based clocksource */
|
||||
/* For increased accuracy, take mean rdtsc() before and afrer rdmsr() */
|
||||
r1 = rdtsc();
|
||||
t1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
|
||||
r1 = (r1 + rdtsc()) / 2;
|
||||
nop_loop();
|
||||
r2 = rdtsc();
|
||||
t2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
|
||||
r2 = (r2 + rdtsc()) / 2;
|
||||
|
||||
GUEST_ASSERT(r2 > r1 && t2 > t1);
|
||||
|
||||
@@ -181,12 +183,14 @@ static void host_check_tsc_msr_rdtsc(struct kvm_vm *vm)
|
||||
tsc_freq = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TSC_FREQUENCY);
|
||||
TEST_ASSERT(tsc_freq > 0, "TSC frequency must be nonzero");
|
||||
|
||||
/* First, check MSR-based clocksource */
|
||||
/* For increased accuracy, take mean rdtsc() before and afrer ioctl */
|
||||
r1 = rdtsc();
|
||||
t1 = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TIME_REF_COUNT);
|
||||
r1 = (r1 + rdtsc()) / 2;
|
||||
nop_loop();
|
||||
r2 = rdtsc();
|
||||
t2 = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TIME_REF_COUNT);
|
||||
r2 = (r2 + rdtsc()) / 2;
|
||||
|
||||
TEST_ASSERT(t2 > t1, "Time reference MSR is not monotonic (%ld <= %ld)", t1, t2);
|
||||
|
||||
|
||||
@@ -233,6 +233,24 @@ static unsigned long read_slab_obj(struct slabinfo *s, const char *name)
|
||||
return l;
|
||||
}
|
||||
|
||||
static unsigned long read_debug_slab_obj(struct slabinfo *s, const char *name)
|
||||
{
|
||||
char x[128];
|
||||
FILE *f;
|
||||
size_t l;
|
||||
|
||||
snprintf(x, 128, "/sys/kernel/debug/slab/%s/%s", s->name, name);
|
||||
f = fopen(x, "r");
|
||||
if (!f) {
|
||||
buffer[0] = 0;
|
||||
l = 0;
|
||||
} else {
|
||||
l = fread(buffer, 1, sizeof(buffer), f);
|
||||
buffer[l] = 0;
|
||||
fclose(f);
|
||||
}
|
||||
return l;
|
||||
}
|
||||
|
||||
/*
|
||||
* Put a size string together
|
||||
@@ -409,14 +427,18 @@ static void show_tracking(struct slabinfo *s)
|
||||
{
|
||||
printf("\n%s: Kernel object allocation\n", s->name);
|
||||
printf("-----------------------------------------------------------------------\n");
|
||||
if (read_slab_obj(s, "alloc_calls"))
|
||||
if (read_debug_slab_obj(s, "alloc_traces"))
|
||||
printf("%s", buffer);
|
||||
else if (read_slab_obj(s, "alloc_calls"))
|
||||
printf("%s", buffer);
|
||||
else
|
||||
printf("No Data\n");
|
||||
|
||||
printf("\n%s: Kernel object freeing\n", s->name);
|
||||
printf("------------------------------------------------------------------------\n");
|
||||
if (read_slab_obj(s, "free_calls"))
|
||||
if (read_debug_slab_obj(s, "free_traces"))
|
||||
printf("%s", buffer);
|
||||
else if (read_slab_obj(s, "free_calls"))
|
||||
printf("%s", buffer);
|
||||
else
|
||||
printf("No Data\n");
|
||||
|
||||
Reference in New Issue
Block a user