ANDROID: KVM: arm64: Refactor the guest teardown path
The __pkvm_teardown_vm hypercall can take a long time. According to my measurement on Pixel 6, up to 150+ms. The vast majority of that time is spent walking the guest stage-2 page-table to put its pages in the 'pending reclaim' state, which was introduced to allow poisoning the pages asynchronously. Given that pKVM is fundamentally non-preemptible, those 150+ms are not acceptable. In order to spread the work in multiple smaller sections, let's split the teardown procedure in two. A first hypercall will be used to place a VM in a 'dying' state after all the required sanity checks have been done (e.g. checking that no vCPUs are currently loaded). Once in a dying state, the hypervisor will deny any attempt to load vCPUs and run the VM, but accept requests to reclaim guest pages. Once all guest pages have been reclaimed, the host can issue a second hypercall to finalize the teardown, which will free the handle and return all pages used to store guest metadata at EL2 back to EL1. This was tested on Pixel 6 with android14-6.1, and concurrently running a memory intensive benchmark on the host and a large protected guest. The length of EL2 periods have been measured by parsing pKVM traces and the results showed that all outliers of 200+us have been entirely eliminated. Bug: 229972313 Bug: 238945523 Change-Id: Iaa426a964e1f7a5e48e9365aaec4700a62b3b776 Signed-off-by: Quentin Perret <qperret@google.com>
This commit is contained in:
@@ -79,7 +79,6 @@ enum __kvm_host_smccc_func {
|
|||||||
/* Hypercalls available after pKVM finalisation */
|
/* Hypercalls available after pKVM finalisation */
|
||||||
__KVM_HOST_SMCCC_FUNC___pkvm_host_share_hyp,
|
__KVM_HOST_SMCCC_FUNC___pkvm_host_share_hyp,
|
||||||
__KVM_HOST_SMCCC_FUNC___pkvm_host_unshare_hyp,
|
__KVM_HOST_SMCCC_FUNC___pkvm_host_unshare_hyp,
|
||||||
__KVM_HOST_SMCCC_FUNC___pkvm_host_reclaim_page,
|
|
||||||
__KVM_HOST_SMCCC_FUNC___pkvm_host_map_guest,
|
__KVM_HOST_SMCCC_FUNC___pkvm_host_map_guest,
|
||||||
__KVM_HOST_SMCCC_FUNC___kvm_adjust_pc,
|
__KVM_HOST_SMCCC_FUNC___kvm_adjust_pc,
|
||||||
__KVM_HOST_SMCCC_FUNC___kvm_vcpu_run,
|
__KVM_HOST_SMCCC_FUNC___kvm_vcpu_run,
|
||||||
@@ -88,7 +87,9 @@ enum __kvm_host_smccc_func {
|
|||||||
__KVM_HOST_SMCCC_FUNC___vgic_v3_restore_vmcr_aprs,
|
__KVM_HOST_SMCCC_FUNC___vgic_v3_restore_vmcr_aprs,
|
||||||
__KVM_HOST_SMCCC_FUNC___pkvm_init_vm,
|
__KVM_HOST_SMCCC_FUNC___pkvm_init_vm,
|
||||||
__KVM_HOST_SMCCC_FUNC___pkvm_init_vcpu,
|
__KVM_HOST_SMCCC_FUNC___pkvm_init_vcpu,
|
||||||
__KVM_HOST_SMCCC_FUNC___pkvm_teardown_vm,
|
__KVM_HOST_SMCCC_FUNC___pkvm_start_teardown_vm,
|
||||||
|
__KVM_HOST_SMCCC_FUNC___pkvm_finalize_teardown_vm,
|
||||||
|
__KVM_HOST_SMCCC_FUNC___pkvm_reclaim_dying_guest_page,
|
||||||
__KVM_HOST_SMCCC_FUNC___pkvm_vcpu_load,
|
__KVM_HOST_SMCCC_FUNC___pkvm_vcpu_load,
|
||||||
__KVM_HOST_SMCCC_FUNC___pkvm_vcpu_put,
|
__KVM_HOST_SMCCC_FUNC___pkvm_vcpu_put,
|
||||||
__KVM_HOST_SMCCC_FUNC___pkvm_vcpu_sync_state,
|
__KVM_HOST_SMCCC_FUNC___pkvm_vcpu_sync_state,
|
||||||
|
|||||||
@@ -70,7 +70,7 @@ extern unsigned long hyp_nr_cpus;
|
|||||||
int __pkvm_prot_finalize(void);
|
int __pkvm_prot_finalize(void);
|
||||||
int __pkvm_host_share_hyp(u64 pfn);
|
int __pkvm_host_share_hyp(u64 pfn);
|
||||||
int __pkvm_host_unshare_hyp(u64 pfn);
|
int __pkvm_host_unshare_hyp(u64 pfn);
|
||||||
int __pkvm_host_reclaim_page(u64 pfn);
|
int __pkvm_host_reclaim_page(struct pkvm_hyp_vm *vm, u64 pfn, u64 ipa);
|
||||||
int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages);
|
int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages);
|
||||||
int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages);
|
int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages);
|
||||||
int __pkvm_host_share_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu);
|
int __pkvm_host_share_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu);
|
||||||
@@ -98,13 +98,15 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt);
|
|||||||
int hyp_register_host_perm_fault_handler(int (*cb)(struct kvm_cpu_context *ctxt, u64 esr, u64 addr));
|
int hyp_register_host_perm_fault_handler(int (*cb)(struct kvm_cpu_context *ctxt, u64 esr, u64 addr));
|
||||||
int hyp_pin_shared_mem(void *from, void *to);
|
int hyp_pin_shared_mem(void *from, void *to);
|
||||||
void hyp_unpin_shared_mem(void *from, void *to);
|
void hyp_unpin_shared_mem(void *from, void *to);
|
||||||
void reclaim_guest_pages(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc);
|
|
||||||
int host_stage2_get_leaf(phys_addr_t phys, kvm_pte_t *ptep, u32 *level);
|
int host_stage2_get_leaf(phys_addr_t phys, kvm_pte_t *ptep, u32 *level);
|
||||||
int refill_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages,
|
int refill_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages,
|
||||||
struct kvm_hyp_memcache *host_mc);
|
struct kvm_hyp_memcache *host_mc);
|
||||||
|
|
||||||
int module_change_host_page_prot(u64 pfn, enum kvm_pgtable_prot prot);
|
int module_change_host_page_prot(u64 pfn, enum kvm_pgtable_prot prot);
|
||||||
|
|
||||||
|
void destroy_hyp_vm_pgt(struct pkvm_hyp_vm *vm);
|
||||||
|
void drain_hyp_pool(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc);
|
||||||
|
|
||||||
void psci_mem_protect_inc(u64 n);
|
void psci_mem_protect_inc(u64 n);
|
||||||
void psci_mem_protect_dec(u64 n);
|
void psci_mem_protect_dec(u64 n);
|
||||||
|
|
||||||
|
|||||||
@@ -64,6 +64,13 @@ struct pkvm_hyp_vm {
|
|||||||
*/
|
*/
|
||||||
unsigned int nr_vcpus;
|
unsigned int nr_vcpus;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* True when the guest is being torn down. When in this state, the
|
||||||
|
* guest's vCPUs can't be loaded anymore, but its pages can be
|
||||||
|
* reclaimed by the host.
|
||||||
|
*/
|
||||||
|
bool is_dying;
|
||||||
|
|
||||||
/* Array of the hyp vCPU structures for this VM. */
|
/* Array of the hyp vCPU structures for this VM. */
|
||||||
struct pkvm_hyp_vcpu *vcpus[];
|
struct pkvm_hyp_vcpu *vcpus[];
|
||||||
};
|
};
|
||||||
@@ -96,7 +103,9 @@ int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva,
|
|||||||
unsigned long pgd_hva, unsigned long last_ran_hva);
|
unsigned long pgd_hva, unsigned long last_ran_hva);
|
||||||
int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu,
|
int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu,
|
||||||
unsigned long vcpu_hva);
|
unsigned long vcpu_hva);
|
||||||
int __pkvm_teardown_vm(pkvm_handle_t handle);
|
int __pkvm_start_teardown_vm(pkvm_handle_t handle);
|
||||||
|
int __pkvm_finalize_teardown_vm(pkvm_handle_t handle);
|
||||||
|
int __pkvm_reclaim_dying_guest_page(pkvm_handle_t handle, u64 pfn, u64 ipa);
|
||||||
|
|
||||||
struct pkvm_hyp_vcpu *pkvm_load_hyp_vcpu(pkvm_handle_t handle,
|
struct pkvm_hyp_vcpu *pkvm_load_hyp_vcpu(pkvm_handle_t handle,
|
||||||
unsigned int vcpu_idx);
|
unsigned int vcpu_idx);
|
||||||
|
|||||||
@@ -1061,11 +1061,13 @@ static void handle___pkvm_host_unshare_hyp(struct kvm_cpu_context *host_ctxt)
|
|||||||
cpu_reg(host_ctxt, 1) = __pkvm_host_unshare_hyp(pfn);
|
cpu_reg(host_ctxt, 1) = __pkvm_host_unshare_hyp(pfn);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void handle___pkvm_host_reclaim_page(struct kvm_cpu_context *host_ctxt)
|
static void handle___pkvm_reclaim_dying_guest_page(struct kvm_cpu_context *host_ctxt)
|
||||||
{
|
{
|
||||||
DECLARE_REG(u64, pfn, host_ctxt, 1);
|
DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
|
||||||
|
DECLARE_REG(u64, pfn, host_ctxt, 2);
|
||||||
|
DECLARE_REG(u64, ipa, host_ctxt, 3);
|
||||||
|
|
||||||
cpu_reg(host_ctxt, 1) = __pkvm_host_reclaim_page(pfn);
|
cpu_reg(host_ctxt, 1) = __pkvm_reclaim_dying_guest_page(handle, pfn, ipa);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void handle___pkvm_create_private_mapping(struct kvm_cpu_context *host_ctxt)
|
static void handle___pkvm_create_private_mapping(struct kvm_cpu_context *host_ctxt)
|
||||||
@@ -1120,13 +1122,19 @@ static void handle___pkvm_init_vcpu(struct kvm_cpu_context *host_ctxt)
|
|||||||
cpu_reg(host_ctxt, 1) = __pkvm_init_vcpu(handle, host_vcpu, vcpu_hva);
|
cpu_reg(host_ctxt, 1) = __pkvm_init_vcpu(handle, host_vcpu, vcpu_hva);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void handle___pkvm_teardown_vm(struct kvm_cpu_context *host_ctxt)
|
static void handle___pkvm_start_teardown_vm(struct kvm_cpu_context *host_ctxt)
|
||||||
{
|
{
|
||||||
DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
|
DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
|
||||||
|
|
||||||
cpu_reg(host_ctxt, 1) = __pkvm_teardown_vm(handle);
|
cpu_reg(host_ctxt, 1) = __pkvm_start_teardown_vm(handle);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void handle___pkvm_finalize_teardown_vm(struct kvm_cpu_context *host_ctxt)
|
||||||
|
{
|
||||||
|
DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
|
||||||
|
|
||||||
|
cpu_reg(host_ctxt, 1) = __pkvm_finalize_teardown_vm(handle);
|
||||||
|
}
|
||||||
static void handle___pkvm_iommu_driver_init(struct kvm_cpu_context *host_ctxt)
|
static void handle___pkvm_iommu_driver_init(struct kvm_cpu_context *host_ctxt)
|
||||||
{
|
{
|
||||||
DECLARE_REG(struct pkvm_iommu_driver*, drv, host_ctxt, 1);
|
DECLARE_REG(struct pkvm_iommu_driver*, drv, host_ctxt, 1);
|
||||||
@@ -1275,7 +1283,6 @@ static const hcall_t host_hcall[] = {
|
|||||||
|
|
||||||
HANDLE_FUNC(__pkvm_host_share_hyp),
|
HANDLE_FUNC(__pkvm_host_share_hyp),
|
||||||
HANDLE_FUNC(__pkvm_host_unshare_hyp),
|
HANDLE_FUNC(__pkvm_host_unshare_hyp),
|
||||||
HANDLE_FUNC(__pkvm_host_reclaim_page),
|
|
||||||
HANDLE_FUNC(__pkvm_host_map_guest),
|
HANDLE_FUNC(__pkvm_host_map_guest),
|
||||||
HANDLE_FUNC(__kvm_adjust_pc),
|
HANDLE_FUNC(__kvm_adjust_pc),
|
||||||
HANDLE_FUNC(__kvm_vcpu_run),
|
HANDLE_FUNC(__kvm_vcpu_run),
|
||||||
@@ -1284,7 +1291,9 @@ static const hcall_t host_hcall[] = {
|
|||||||
HANDLE_FUNC(__vgic_v3_restore_vmcr_aprs),
|
HANDLE_FUNC(__vgic_v3_restore_vmcr_aprs),
|
||||||
HANDLE_FUNC(__pkvm_init_vm),
|
HANDLE_FUNC(__pkvm_init_vm),
|
||||||
HANDLE_FUNC(__pkvm_init_vcpu),
|
HANDLE_FUNC(__pkvm_init_vcpu),
|
||||||
HANDLE_FUNC(__pkvm_teardown_vm),
|
HANDLE_FUNC(__pkvm_start_teardown_vm),
|
||||||
|
HANDLE_FUNC(__pkvm_finalize_teardown_vm),
|
||||||
|
HANDLE_FUNC(__pkvm_reclaim_dying_guest_page),
|
||||||
HANDLE_FUNC(__pkvm_vcpu_load),
|
HANDLE_FUNC(__pkvm_vcpu_load),
|
||||||
HANDLE_FUNC(__pkvm_vcpu_put),
|
HANDLE_FUNC(__pkvm_vcpu_put),
|
||||||
HANDLE_FUNC(__pkvm_vcpu_sync_state),
|
HANDLE_FUNC(__pkvm_vcpu_sync_state),
|
||||||
|
|||||||
@@ -284,61 +284,6 @@ int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int reclaim_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
|
|
||||||
enum kvm_pgtable_walk_flags flag, void * const arg)
|
|
||||||
{
|
|
||||||
kvm_pte_t pte = *ptep;
|
|
||||||
struct hyp_page *page;
|
|
||||||
|
|
||||||
if (!kvm_pte_valid(pte))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
page = hyp_phys_to_page(kvm_pte_to_phys(pte));
|
|
||||||
switch (pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte))) {
|
|
||||||
case PKVM_PAGE_OWNED:
|
|
||||||
page->flags |= HOST_PAGE_NEED_POISONING;
|
|
||||||
fallthrough;
|
|
||||||
case PKVM_PAGE_SHARED_BORROWED:
|
|
||||||
case PKVM_PAGE_SHARED_OWNED:
|
|
||||||
page->flags |= HOST_PAGE_PENDING_RECLAIM;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
return -EPERM;
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
void reclaim_guest_pages(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc)
|
|
||||||
{
|
|
||||||
|
|
||||||
struct kvm_pgtable_walker walker = {
|
|
||||||
.cb = reclaim_walker,
|
|
||||||
.flags = KVM_PGTABLE_WALK_LEAF
|
|
||||||
};
|
|
||||||
void *addr;
|
|
||||||
|
|
||||||
host_lock_component();
|
|
||||||
guest_lock_component(vm);
|
|
||||||
|
|
||||||
/* Reclaim all guest pages and dump all pgtable pages in the hyp_pool */
|
|
||||||
BUG_ON(kvm_pgtable_walk(&vm->pgt, 0, BIT(vm->pgt.ia_bits), &walker));
|
|
||||||
kvm_pgtable_stage2_destroy(&vm->pgt);
|
|
||||||
vm->kvm.arch.mmu.pgd_phys = 0ULL;
|
|
||||||
|
|
||||||
guest_unlock_component(vm);
|
|
||||||
host_unlock_component();
|
|
||||||
|
|
||||||
/* Drain the hyp_pool into the memcache */
|
|
||||||
addr = hyp_alloc_pages(&vm->pool, 0);
|
|
||||||
while (addr) {
|
|
||||||
memset(hyp_virt_to_page(addr), 0, sizeof(struct hyp_page));
|
|
||||||
push_hyp_memcache(mc, addr, hyp_virt_to_phys);
|
|
||||||
WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(addr), 1));
|
|
||||||
addr = hyp_alloc_pages(&vm->pool, 0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
struct relinquish_data {
|
struct relinquish_data {
|
||||||
enum pkvm_page_state expected_state;
|
enum pkvm_page_state expected_state;
|
||||||
u64 pa;
|
u64 pa;
|
||||||
@@ -2123,40 +2068,69 @@ void hyp_poison_page(phys_addr_t phys)
|
|||||||
hyp_fixmap_unmap();
|
hyp_fixmap_unmap();
|
||||||
}
|
}
|
||||||
|
|
||||||
int __pkvm_host_reclaim_page(u64 pfn)
|
void destroy_hyp_vm_pgt(struct pkvm_hyp_vm *vm)
|
||||||
{
|
{
|
||||||
u64 addr = hyp_pfn_to_phys(pfn);
|
guest_lock_component(vm);
|
||||||
struct hyp_page *page;
|
kvm_pgtable_stage2_destroy(&vm->pgt);
|
||||||
|
guest_unlock_component(vm);
|
||||||
|
}
|
||||||
|
|
||||||
|
void drain_hyp_pool(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc)
|
||||||
|
{
|
||||||
|
void *addr = hyp_alloc_pages(&vm->pool, 0);
|
||||||
|
|
||||||
|
while (addr) {
|
||||||
|
memset(hyp_virt_to_page(addr), 0, sizeof(struct hyp_page));
|
||||||
|
push_hyp_memcache(mc, addr, hyp_virt_to_phys);
|
||||||
|
WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(addr), 1));
|
||||||
|
addr = hyp_alloc_pages(&vm->pool, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int __pkvm_host_reclaim_page(struct pkvm_hyp_vm *vm, u64 pfn, u64 ipa)
|
||||||
|
{
|
||||||
|
phys_addr_t phys = hyp_pfn_to_phys(pfn);
|
||||||
kvm_pte_t pte;
|
kvm_pte_t pte;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
host_lock_component();
|
host_lock_component();
|
||||||
|
guest_lock_component(vm);
|
||||||
|
|
||||||
ret = kvm_pgtable_get_leaf(&host_mmu.pgt, addr, &pte, NULL);
|
ret = kvm_pgtable_get_leaf(&vm->pgt, ipa, &pte, NULL);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto unlock;
|
goto unlock;
|
||||||
|
|
||||||
if (host_get_page_state(pte, addr) == PKVM_PAGE_OWNED)
|
if (!kvm_pte_valid(pte)) {
|
||||||
|
ret = -EINVAL;
|
||||||
goto unlock;
|
goto unlock;
|
||||||
|
} else if (phys != kvm_pte_to_phys(pte)) {
|
||||||
page = hyp_phys_to_page(addr);
|
|
||||||
if (!(page->flags & HOST_PAGE_PENDING_RECLAIM)) {
|
|
||||||
ret = -EPERM;
|
ret = -EPERM;
|
||||||
goto unlock;
|
goto unlock;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (page->flags & HOST_PAGE_NEED_POISONING) {
|
/* We could avoid TLB inval, it is done per VMID on the finalize path */
|
||||||
hyp_poison_page(addr);
|
WARN_ON(kvm_pgtable_stage2_unmap(&vm->pgt, ipa, PAGE_SIZE));
|
||||||
page->flags &= ~HOST_PAGE_NEED_POISONING;
|
|
||||||
|
switch(guest_get_page_state(pte, ipa)) {
|
||||||
|
case PKVM_PAGE_OWNED:
|
||||||
|
WARN_ON(__host_check_page_state_range(phys, PAGE_SIZE, PKVM_NOPAGE));
|
||||||
|
hyp_poison_page(phys);
|
||||||
psci_mem_protect_dec(1);
|
psci_mem_protect_dec(1);
|
||||||
|
break;
|
||||||
|
case PKVM_PAGE_SHARED_BORROWED:
|
||||||
|
WARN_ON(__host_check_page_state_range(phys, PAGE_SIZE, PKVM_PAGE_SHARED_OWNED));
|
||||||
|
break;
|
||||||
|
case PKVM_PAGE_SHARED_OWNED:
|
||||||
|
WARN_ON(__host_check_page_state_range(phys, PAGE_SIZE, PKVM_PAGE_SHARED_BORROWED));
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
BUG_ON(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = host_stage2_set_owner_locked(addr, PAGE_SIZE, PKVM_ID_HOST);
|
WARN_ON(host_stage2_set_owner_locked(phys, PAGE_SIZE, PKVM_ID_HOST));
|
||||||
if (ret)
|
|
||||||
goto unlock;
|
|
||||||
page->flags &= ~HOST_PAGE_PENDING_RECLAIM;
|
|
||||||
|
|
||||||
unlock:
|
unlock:
|
||||||
|
guest_unlock_component(vm);
|
||||||
host_unlock_component();
|
host_unlock_component();
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
|
|||||||
@@ -268,6 +268,27 @@ static struct pkvm_hyp_vm *get_vm_by_handle(pkvm_handle_t handle)
|
|||||||
return vm_table[idx];
|
return vm_table[idx];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int __pkvm_reclaim_dying_guest_page(pkvm_handle_t handle, u64 pfn, u64 ipa)
|
||||||
|
{
|
||||||
|
struct pkvm_hyp_vm *hyp_vm;
|
||||||
|
int ret = -EINVAL;
|
||||||
|
|
||||||
|
hyp_spin_lock(&vm_table_lock);
|
||||||
|
hyp_vm = get_vm_by_handle(handle);
|
||||||
|
if (!hyp_vm || !hyp_vm->is_dying)
|
||||||
|
goto unlock;
|
||||||
|
|
||||||
|
ret = __pkvm_host_reclaim_page(hyp_vm, pfn, ipa);
|
||||||
|
if (ret)
|
||||||
|
goto unlock;
|
||||||
|
|
||||||
|
drain_hyp_pool(hyp_vm, &hyp_vm->host_kvm->arch.pkvm.teardown_stage2_mc);
|
||||||
|
unlock:
|
||||||
|
hyp_spin_unlock(&vm_table_lock);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
struct pkvm_hyp_vcpu *pkvm_load_hyp_vcpu(pkvm_handle_t handle,
|
struct pkvm_hyp_vcpu *pkvm_load_hyp_vcpu(pkvm_handle_t handle,
|
||||||
unsigned int vcpu_idx)
|
unsigned int vcpu_idx)
|
||||||
{
|
{
|
||||||
@@ -280,7 +301,7 @@ struct pkvm_hyp_vcpu *pkvm_load_hyp_vcpu(pkvm_handle_t handle,
|
|||||||
|
|
||||||
hyp_spin_lock(&vm_table_lock);
|
hyp_spin_lock(&vm_table_lock);
|
||||||
hyp_vm = get_vm_by_handle(handle);
|
hyp_vm = get_vm_by_handle(handle);
|
||||||
if (!hyp_vm || hyp_vm->nr_vcpus <= vcpu_idx)
|
if (!hyp_vm || hyp_vm->is_dying || hyp_vm->nr_vcpus <= vcpu_idx)
|
||||||
goto unlock;
|
goto unlock;
|
||||||
|
|
||||||
hyp_vcpu = hyp_vm->vcpus[vcpu_idx];
|
hyp_vcpu = hyp_vm->vcpus[vcpu_idx];
|
||||||
@@ -796,7 +817,33 @@ teardown_donated_memory(struct kvm_hyp_memcache *mc, void *addr, size_t size)
|
|||||||
unmap_donated_memory_noclear(addr, size);
|
unmap_donated_memory_noclear(addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
int __pkvm_teardown_vm(pkvm_handle_t handle)
|
int __pkvm_start_teardown_vm(pkvm_handle_t handle)
|
||||||
|
{
|
||||||
|
struct pkvm_hyp_vm *hyp_vm;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
hyp_spin_lock(&vm_table_lock);
|
||||||
|
hyp_vm = get_vm_by_handle(handle);
|
||||||
|
if (!hyp_vm) {
|
||||||
|
ret = -ENOENT;
|
||||||
|
goto unlock;
|
||||||
|
} else if (WARN_ON(hyp_page_count(hyp_vm))) {
|
||||||
|
ret = -EBUSY;
|
||||||
|
goto unlock;
|
||||||
|
} else if (hyp_vm->is_dying) {
|
||||||
|
ret = -EINVAL;
|
||||||
|
goto unlock;
|
||||||
|
}
|
||||||
|
|
||||||
|
hyp_vm->is_dying = true;
|
||||||
|
|
||||||
|
unlock:
|
||||||
|
hyp_spin_unlock(&vm_table_lock);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
int __pkvm_finalize_teardown_vm(pkvm_handle_t handle)
|
||||||
{
|
{
|
||||||
struct kvm_hyp_memcache *mc, *stage2_mc;
|
struct kvm_hyp_memcache *mc, *stage2_mc;
|
||||||
size_t vm_size, last_ran_size;
|
size_t vm_size, last_ran_size;
|
||||||
@@ -811,9 +858,7 @@ int __pkvm_teardown_vm(pkvm_handle_t handle)
|
|||||||
if (!hyp_vm) {
|
if (!hyp_vm) {
|
||||||
err = -ENOENT;
|
err = -ENOENT;
|
||||||
goto err_unlock;
|
goto err_unlock;
|
||||||
}
|
} else if (!hyp_vm->is_dying) {
|
||||||
|
|
||||||
if (WARN_ON(hyp_page_count(hyp_vm))) {
|
|
||||||
err = -EBUSY;
|
err = -EBUSY;
|
||||||
goto err_unlock;
|
goto err_unlock;
|
||||||
}
|
}
|
||||||
@@ -828,8 +873,8 @@ int __pkvm_teardown_vm(pkvm_handle_t handle)
|
|||||||
mc = &host_kvm->arch.pkvm.teardown_mc;
|
mc = &host_kvm->arch.pkvm.teardown_mc;
|
||||||
stage2_mc = &host_kvm->arch.pkvm.teardown_stage2_mc;
|
stage2_mc = &host_kvm->arch.pkvm.teardown_stage2_mc;
|
||||||
|
|
||||||
/* Reclaim guest pages (including page-table pages) */
|
destroy_hyp_vm_pgt(hyp_vm);
|
||||||
reclaim_guest_pages(hyp_vm, stage2_mc);
|
drain_hyp_pool(hyp_vm, stage2_mc);
|
||||||
unpin_host_vcpus(hyp_vm->vcpus, hyp_vm->nr_vcpus);
|
unpin_host_vcpus(hyp_vm->vcpus, hyp_vm->nr_vcpus);
|
||||||
|
|
||||||
/* Push the metadata pages to the teardown memcache */
|
/* Push the metadata pages to the teardown memcache */
|
||||||
|
|||||||
@@ -315,21 +315,18 @@ void pkvm_destroy_hyp_vm(struct kvm *host_kvm)
|
|||||||
struct mm_struct *mm = current->mm;
|
struct mm_struct *mm = current->mm;
|
||||||
struct rb_node *node;
|
struct rb_node *node;
|
||||||
|
|
||||||
if (host_kvm->arch.pkvm.handle) {
|
if (!host_kvm->arch.pkvm.handle)
|
||||||
WARN_ON(kvm_call_hyp_nvhe(__pkvm_teardown_vm,
|
goto out_free;
|
||||||
host_kvm->arch.pkvm.handle));
|
|
||||||
}
|
|
||||||
|
|
||||||
host_kvm->arch.pkvm.handle = 0;
|
WARN_ON(kvm_call_hyp_nvhe(__pkvm_start_teardown_vm, host_kvm->arch.pkvm.handle));
|
||||||
free_hyp_memcache(&host_kvm->arch.pkvm.teardown_mc, host_kvm);
|
|
||||||
free_hyp_stage2_memcache(&host_kvm->arch.pkvm.teardown_stage2_mc,
|
|
||||||
host_kvm);
|
|
||||||
|
|
||||||
node = rb_first(&host_kvm->arch.pkvm.pinned_pages);
|
node = rb_first(&host_kvm->arch.pkvm.pinned_pages);
|
||||||
while (node) {
|
while (node) {
|
||||||
ppage = rb_entry(node, struct kvm_pinned_page, node);
|
ppage = rb_entry(node, struct kvm_pinned_page, node);
|
||||||
WARN_ON(kvm_call_hyp_nvhe(__pkvm_host_reclaim_page,
|
WARN_ON(kvm_call_hyp_nvhe(__pkvm_reclaim_dying_guest_page,
|
||||||
page_to_pfn(ppage->page)));
|
host_kvm->arch.pkvm.handle,
|
||||||
|
page_to_pfn(ppage->page),
|
||||||
|
ppage->ipa));
|
||||||
cond_resched();
|
cond_resched();
|
||||||
|
|
||||||
account_locked_vm(mm, 1, false);
|
account_locked_vm(mm, 1, false);
|
||||||
@@ -338,6 +335,14 @@ void pkvm_destroy_hyp_vm(struct kvm *host_kvm)
|
|||||||
rb_erase(&ppage->node, &host_kvm->arch.pkvm.pinned_pages);
|
rb_erase(&ppage->node, &host_kvm->arch.pkvm.pinned_pages);
|
||||||
kfree(ppage);
|
kfree(ppage);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
WARN_ON(kvm_call_hyp_nvhe(__pkvm_finalize_teardown_vm, host_kvm->arch.pkvm.handle));
|
||||||
|
|
||||||
|
out_free:
|
||||||
|
host_kvm->arch.pkvm.handle = 0;
|
||||||
|
free_hyp_memcache(&host_kvm->arch.pkvm.teardown_mc, host_kvm);
|
||||||
|
free_hyp_stage2_memcache(&host_kvm->arch.pkvm.teardown_stage2_mc,
|
||||||
|
host_kvm);
|
||||||
}
|
}
|
||||||
|
|
||||||
int pkvm_init_host_vm(struct kvm *host_kvm, unsigned long type)
|
int pkvm_init_host_vm(struct kvm *host_kvm, unsigned long type)
|
||||||
|
|||||||
Reference in New Issue
Block a user