Skip to content

Commit

Permalink
KVM: x86/PVM: Add support for 5-level paging mode
Browse files Browse the repository at this point in the history
When 5-level paging mode is enabled on the host, the guest can be either
in 4-level paging mode or 5-level paging mode. For 4-level paging mode,
only the topmost 128TB is canonical. Therefore, the hypervisor needs to
reserve two ranges: one in the vmalloc area for the 5-level paging mode
guest, and another in the topmost 128TB for the 4-level paging mode
guest. If the allocation of the range for the 5-level paging mode guest
fails, then 5-level paging mode is disabled for the guest.

Signed-off-by: Hou Wenlong <[email protected]>
Link: #6
  • Loading branch information
bysui committed Apr 25, 2024
1 parent 5ceb9c5 commit 7405002
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 21 deletions.
62 changes: 44 additions & 18 deletions arch/x86/kvm/pvm/host_mmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,12 @@
#include "mmu/spte.h"
#include "pvm.h"

static struct vm_struct *pvm_va_range_l4;
#define L4_PT_INDEX(address) __PT_INDEX(address, 4, 9)
#define L5_PT_INDEX(address) __PT_INDEX(address, 5, 9)

#define PVM_GUEST_MAPPING_START (-1UL << 47)

static struct vm_struct *pvm_va_range;

u32 pml4_index_start;
u32 pml4_index_end;
Expand All @@ -35,15 +40,39 @@ static int __init guest_address_space_init(void)
return -1;
}

pvm_va_range_l4 = get_vm_area_align(DEFAULT_RANGE_L4_SIZE, PT_L4_SIZE,
VM_ALLOC|VM_NO_GUARD);
if (!pvm_va_range_l4)
return -1;
if (pgtable_l5_enabled()) {
if (IS_ENABLED(CONFIG_KASAN)) {
pr_warn("CONFIG_KASAN is not compatible with PVM on 5-level paging mode");
return -1;
}

BUILD_BUG_ON(PVM_GUEST_MAPPING_START != VADDR_END_L5);

pml4_index_start = L4_PT_INDEX(PVM_GUEST_MAPPING_START);
pml4_index_end = L4_PT_INDEX(RAW_CPU_ENTRY_AREA_BASE);

pvm_va_range = get_vm_area_align(DEFAULT_RANGE_L5_SIZE, PT_L5_SIZE,
VM_ALLOC|VM_NO_GUARD);
if (!pvm_va_range) {
pml5_index_start = 0x1ff;
pml5_index_end = 0x1ff;
} else {
pml5_index_start = L5_PT_INDEX((u64)pvm_va_range->addr);
pml5_index_end = L5_PT_INDEX((u64)pvm_va_range->addr +
(u64)pvm_va_range->size);
}
} else {
pvm_va_range = get_vm_area_align(DEFAULT_RANGE_L4_SIZE, PT_L4_SIZE,
VM_ALLOC|VM_NO_GUARD);
if (!pvm_va_range)
return -1;

pml4_index_start = L4_PT_INDEX((u64)pvm_va_range->addr);
pml4_index_end = L4_PT_INDEX((u64)pvm_va_range->addr + (u64)pvm_va_range->size);
pml5_index_start = 0x1ff;
pml5_index_end = 0x1ff;
}

pml4_index_start = __PT_INDEX((u64)pvm_va_range_l4->addr, 4, 9);
pml4_index_end = __PT_INDEX((u64)pvm_va_range_l4->addr + (u64)pvm_va_range_l4->size, 4, 9);
pml5_index_start = 0x1ff;
pml5_index_end = 0x1ff;
return 0;
}

Expand Down Expand Up @@ -92,28 +121,25 @@ int __init host_mmu_init(void)
clone_host_mmu(host_mmu_root_pgd, host_pgd, pml5_index_start, pml5_index_end);
clone_host_mmu(host_mmu_la57_top_p4d, __va(host_pgd[511] & SPTE_BASE_ADDR_MASK),
pml4_index_start, pml4_index_end);
host_mmu_root_pgd[511] = (host_pgd[511] & ~SPTE_BASE_ADDR_MASK) |
__pa(host_mmu_la57_top_p4d);
host_mmu_root_pgd[511] &= ~(_PAGE_USER | SPTE_MMU_PRESENT_MASK);
} else {
clone_host_mmu(host_mmu_root_pgd, host_pgd, pml4_index_start, pml4_index_end);
}

if (pgtable_l5_enabled()) {
pr_warn("Supporting for LA57 host is not fully implemented yet.\n");
host_mmu_destroy();
return -EOPNOTSUPP;
}

return 0;
}

void host_mmu_destroy(void)
{
if (pvm_va_range_l4)
free_vm_area(pvm_va_range_l4);
if (pvm_va_range)
free_vm_area(pvm_va_range);
if (host_mmu_root_pgd)
free_page((unsigned long)(void *)host_mmu_root_pgd);
if (host_mmu_la57_top_p4d)
free_page((unsigned long)(void *)host_mmu_la57_top_p4d);
pvm_va_range_l4 = NULL;
pvm_va_range = NULL;
host_mmu_root_pgd = NULL;
host_mmu_la57_top_p4d = NULL;
}
26 changes: 23 additions & 3 deletions arch/x86/kvm/pvm/pvm.c
Original file line number Diff line number Diff line change
Expand Up @@ -953,9 +953,26 @@ static bool pvm_check_and_set_msr_linear_address_range(struct vcpu_pvm *pvm, u64
if ((msr & 0xff00ff00ff00ff00) != 0xff00ff00ff00ff00)
return false;

if (pml4_i_s > pml4_i_e || pml5_i_s > pml5_i_e)
return false;

/*
* PVM specification requires the index to be set as '0x1ff' if the
* size of range is 0.
*/
if ((pml4_i_s == pml4_i_e && pml4_i_s != 0x1ff) ||
(pml5_i_s == pml5_i_e && pml5_i_s != 0x1ff))
return false;

/* Guest ranges should be inside what the hypervisor can provide. */
if (pml4_i_s < pml4_index_start || pml4_i_e > pml4_index_end ||
pml5_i_s < pml5_index_start || pml5_i_e > pml5_index_end)
if (pml4_i_s < pml4_index_start || pml4_i_e > pml4_index_end)
return false;

/*
* Allow for migration of guest in 4-level paging mode between hosts in
* both 4-level paging mode and 5-level paging mode.
*/
if (pml5_i_s != 0x1ff && (pml5_i_s < pml5_index_start || pml5_i_e > pml5_index_end))
return false;

pvm_set_msr_linear_address_range(pvm, pml4_i_s, pml4_i_e, pml5_i_s, pml5_i_e);
Expand Down Expand Up @@ -2979,8 +2996,11 @@ static __init void pvm_set_cpu_caps(void)
/*
* Unlike VMX/SVM which can switches paging mode atomically, PVM
* implements guest LA57 through host LA57 shadow paging.
*
* If the allocation of the reserved range fails, disable support for
* 5-level paging support.
*/
if (!pgtable_l5_enabled())
if (!pgtable_l5_enabled() || pml5_index_start == 0x1ff)
kvm_cpu_cap_clear(X86_FEATURE_LA57);

/*
Expand Down

0 comments on commit 7405002

Please sign in to comment.