From 7405002d81d07b86beba160a83a9f224906d5ce3 Mon Sep 17 00:00:00 2001 From: Hou Wenlong Date: Tue, 16 Apr 2024 10:44:48 +0800 Subject: [PATCH] KVM: x86/PVM: Add support for 5-level paging mode When 5-level paging mode is enabled on the host, the guest can be either in 4-level paging mode or 5-level paging mode. For 4-level paging mode, only the topmost 128TB is canonical. Therefore, the hypervisor needs to reserve two ranges: one in the vmalloc area for the 5-level paging mode guest, and another in the topmost 128TB for the 4-level paging mode guest. If the allocation of the range for the 5-level paging mode guest fails, then 5-level paging mode is disabled for the guest. Signed-off-by: Hou Wenlong Link: https://github.com/virt-pvm/linux/issues/6 --- arch/x86/kvm/pvm/host_mmu.c | 62 ++++++++++++++++++++++++++----------- arch/x86/kvm/pvm/pvm.c | 26 ++++++++++++++-- 2 files changed, 67 insertions(+), 21 deletions(-) diff --git a/arch/x86/kvm/pvm/host_mmu.c b/arch/x86/kvm/pvm/host_mmu.c index 35e97f4f7055b7..d3ce4c556457dd 100644 --- a/arch/x86/kvm/pvm/host_mmu.c +++ b/arch/x86/kvm/pvm/host_mmu.c @@ -21,7 +21,12 @@ #include "mmu/spte.h" #include "pvm.h" -static struct vm_struct *pvm_va_range_l4; +#define L4_PT_INDEX(address) __PT_INDEX(address, 4, 9) +#define L5_PT_INDEX(address) __PT_INDEX(address, 5, 9) + +#define PVM_GUEST_MAPPING_START (-1UL << 47) + +static struct vm_struct *pvm_va_range; u32 pml4_index_start; u32 pml4_index_end; @@ -35,15 +40,39 @@ static int __init guest_address_space_init(void) return -1; } - pvm_va_range_l4 = get_vm_area_align(DEFAULT_RANGE_L4_SIZE, PT_L4_SIZE, - VM_ALLOC|VM_NO_GUARD); - if (!pvm_va_range_l4) - return -1; + if (pgtable_l5_enabled()) { + if (IS_ENABLED(CONFIG_KASAN)) { + pr_warn("CONFIG_KASAN is not compatible with PVM on 5-level paging mode"); + return -1; + } + + BUILD_BUG_ON(PVM_GUEST_MAPPING_START != VADDR_END_L5); + + pml4_index_start = L4_PT_INDEX(PVM_GUEST_MAPPING_START); + pml4_index_end = L4_PT_INDEX(RAW_CPU_ENTRY_AREA_BASE); + + pvm_va_range = get_vm_area_align(DEFAULT_RANGE_L5_SIZE, PT_L5_SIZE, + VM_ALLOC|VM_NO_GUARD); + if (!pvm_va_range) { + pml5_index_start = 0x1ff; + pml5_index_end = 0x1ff; + } else { + pml5_index_start = L5_PT_INDEX((u64)pvm_va_range->addr); + pml5_index_end = L5_PT_INDEX((u64)pvm_va_range->addr + + (u64)pvm_va_range->size); + } + } else { + pvm_va_range = get_vm_area_align(DEFAULT_RANGE_L4_SIZE, PT_L4_SIZE, + VM_ALLOC|VM_NO_GUARD); + if (!pvm_va_range) + return -1; + + pml4_index_start = L4_PT_INDEX((u64)pvm_va_range->addr); + pml4_index_end = L4_PT_INDEX((u64)pvm_va_range->addr + (u64)pvm_va_range->size); + pml5_index_start = 0x1ff; + pml5_index_end = 0x1ff; + } - pml4_index_start = __PT_INDEX((u64)pvm_va_range_l4->addr, 4, 9); - pml4_index_end = __PT_INDEX((u64)pvm_va_range_l4->addr + (u64)pvm_va_range_l4->size, 4, 9); - pml5_index_start = 0x1ff; - pml5_index_end = 0x1ff; return 0; } @@ -92,28 +121,25 @@ int __init host_mmu_init(void) clone_host_mmu(host_mmu_root_pgd, host_pgd, pml5_index_start, pml5_index_end); clone_host_mmu(host_mmu_la57_top_p4d, __va(host_pgd[511] & SPTE_BASE_ADDR_MASK), pml4_index_start, pml4_index_end); + host_mmu_root_pgd[511] = (host_pgd[511] & ~SPTE_BASE_ADDR_MASK) | + __pa(host_mmu_la57_top_p4d); + host_mmu_root_pgd[511] &= ~(_PAGE_USER | SPTE_MMU_PRESENT_MASK); } else { clone_host_mmu(host_mmu_root_pgd, host_pgd, pml4_index_start, pml4_index_end); } - if (pgtable_l5_enabled()) { - pr_warn("Supporting for LA57 host is not fully implemented yet.\n"); - host_mmu_destroy(); - return -EOPNOTSUPP; - } - return 0; } void host_mmu_destroy(void) { - if (pvm_va_range_l4) - free_vm_area(pvm_va_range_l4); + if (pvm_va_range) + free_vm_area(pvm_va_range); if (host_mmu_root_pgd) free_page((unsigned long)(void *)host_mmu_root_pgd); if (host_mmu_la57_top_p4d) free_page((unsigned long)(void *)host_mmu_la57_top_p4d); - pvm_va_range_l4 = NULL; + pvm_va_range = NULL; host_mmu_root_pgd = NULL; host_mmu_la57_top_p4d = NULL; } diff --git a/arch/x86/kvm/pvm/pvm.c b/arch/x86/kvm/pvm/pvm.c index 37e8a19bc0649c..14a362c672f6ca 100644 --- a/arch/x86/kvm/pvm/pvm.c +++ b/arch/x86/kvm/pvm/pvm.c @@ -953,9 +953,26 @@ static bool pvm_check_and_set_msr_linear_address_range(struct vcpu_pvm *pvm, u64 if ((msr & 0xff00ff00ff00ff00) != 0xff00ff00ff00ff00) return false; + if (pml4_i_s > pml4_i_e || pml5_i_s > pml5_i_e) + return false; + + /* + * PVM specification requires the index to be set as '0x1ff' if the + * size of range is 0. + */ + if ((pml4_i_s == pml4_i_e && pml4_i_s != 0x1ff) || + (pml5_i_s == pml5_i_e && pml5_i_s != 0x1ff)) + return false; + /* Guest ranges should be inside what the hypervisor can provide. */ - if (pml4_i_s < pml4_index_start || pml4_i_e > pml4_index_end || - pml5_i_s < pml5_index_start || pml5_i_e > pml5_index_end) + if (pml4_i_s < pml4_index_start || pml4_i_e > pml4_index_end) + return false; + + /* + * Allow for migration of guest in 4-level paging mode between hosts in + * both 4-level paging mode and 5-level paging mode. + */ + if (pml5_i_s != 0x1ff && (pml5_i_s < pml5_index_start || pml5_i_e > pml5_index_end)) return false; pvm_set_msr_linear_address_range(pvm, pml4_i_s, pml4_i_e, pml5_i_s, pml5_i_e); @@ -2979,8 +2996,11 @@ static __init void pvm_set_cpu_caps(void) /* * Unlike VMX/SVM which can switches paging mode atomically, PVM * implements guest LA57 through host LA57 shadow paging. + * + * If the allocation of the reserved range fails, disable support for + * 5-level paging support. */ - if (!pgtable_l5_enabled()) + if (!pgtable_l5_enabled() || pml5_index_start == 0x1ff) kvm_cpu_cap_clear(X86_FEATURE_LA57); /*