Skip to content

Commit

Permalink
64-bit-vms: Add 64-bit VM support to VMM
Browse files Browse the repository at this point in the history
This large commit combines a number of smaller commits in order to add
64-bit VM support to the VMM. The commits do the following:

    * Load elfs of the same architecture size
    * Support reading 64-bit vaddrs from elf files
    * Create initial address space based on guest architecture
    * Allow instruction decoding of 4-level paging scheme
    * Do not exit VMX state based on CR3 load/store in 64-bit mode
    * Use seL4_Word for vmcs and user context fields
    * Handle additional general purpose registers
    * Handle 64-bit MSRs
    * Configure 64-bit guests to boot in long mode
    * Let guest know about 64-bit hardware features in 64-bit mode
    * Add FADT and DSDT tables
    * Track guest state for additional 64-bit registers
    * Add new function to print 64-bit guest context
    * Properly emulate 64-bit trampoline code
    * Define access rights macros for vmcs initialization
    * Hardcode FADT table information
    * Set an initial stack pointer before running guest
    * Handle fetching cross-page instructions
    * Add additional x86 instruction prefixes

CCDC-GVSC DISTRIBUTION A.  Approved for public release; distribution
unlimited. OPSEC#4481.

Co-authored-by: Chris Guikema <[email protected]>
Signed-off-by: Damon Lee <[email protected]>
  • Loading branch information
2 people authored and abrandnewusername committed May 2, 2023
1 parent e13b3fa commit ee86c5f
Show file tree
Hide file tree
Showing 24 changed files with 1,141 additions and 224 deletions.
24 changes: 17 additions & 7 deletions libsel4vm/arch_include/x86/sel4vm/arch/guest_x86_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,17 @@ typedef enum vcpu_context_reg {
VCPU_CONTEXT_EDX,
VCPU_CONTEXT_ESI,
VCPU_CONTEXT_EDI,
VCPU_CONTEXT_EBP
VCPU_CONTEXT_EBP,
#ifdef CONFIG_ARCH_X86_64
VCPU_CONTEXT_R8,
VCPU_CONTEXT_R9,
VCPU_CONTEXT_R10,
VCPU_CONTEXT_R11,
VCPU_CONTEXT_R12,
VCPU_CONTEXT_R13,
VCPU_CONTEXT_R14,
VCPU_CONTEXT_R15
#endif
} vcpu_context_reg_t;

/***
Expand All @@ -40,10 +50,10 @@ int vm_set_thread_context(vm_vcpu_t *vcpu, seL4_VCPUContext context);
* Set a single VCPU's thread register in a seL4_VCPUContext
* @param {vm_vcpu_t *} vcpu Handle to the vcpu
* @param {vcpu_context_reg_t} reg Register enumerated by vcpu_context_reg
* @param {uint32_t} value Value to set register with
* @param {seL4_Word} value Value to set register with
* @return 0 on success, otherwise -1 for error
*/
int vm_set_thread_context_reg(vm_vcpu_t *vcpu, vcpu_context_reg_t reg, uint32_t value);
int vm_set_thread_context_reg(vm_vcpu_t *vcpu, vcpu_context_reg_t reg, seL4_Word value);

/***
* @function vm_get_thread_context(vcpu, context)
Expand All @@ -59,10 +69,10 @@ int vm_get_thread_context(vm_vcpu_t *vcpu, seL4_VCPUContext *context);
* Get a single VCPU's thread register
* @param {vm_vcpu_t *} vcpu Handle to the vcpu
* @param {vcpu_context_reg_t} reg Register enumerated by vcpu_context_reg
* @param {uint32_t *} value Pointer to user supplied variable to populate register value with
* @param {seL4_Word *} value Pointer to user supplied variable to populate register value with
* @return 0 on success, otherwise -1 for error
*/
int vm_get_thread_context_reg(vm_vcpu_t *vcpu, vcpu_context_reg_t reg, uint32_t *value);
int vm_get_thread_context_reg(vm_vcpu_t *vcpu, vcpu_context_reg_t reg, seL4_Word *value);

/* VMCS Getters and Setters */

Expand All @@ -74,7 +84,7 @@ int vm_get_thread_context_reg(vm_vcpu_t *vcpu, vcpu_context_reg_t reg, uint32_t
* @param {uint32_t} value Value to set VMCS field with
* @return 0 on success, otherwise -1 for error
*/
int vm_set_vmcs_field(vm_vcpu_t *vcpu, seL4_Word field, uint32_t value);
int vm_set_vmcs_field(vm_vcpu_t *vcpu, seL4_Word field, seL4_Word value);

/***
* @function vm_get_vmcs_field(vcpu, field, value)
Expand All @@ -84,4 +94,4 @@ int vm_set_vmcs_field(vm_vcpu_t *vcpu, seL4_Word field, uint32_t value);
* @param {uint32_t *} value Pointer to user supplied variable to populate VMCS field value with
* @return 0 on success, otherwise -1 for error
*/
int vm_get_vmcs_field(vm_vcpu_t *vcpu, seL4_Word field, uint32_t *value);
int vm_get_vmcs_field(vm_vcpu_t *vcpu, seL4_Word field, seL4_Word *value);
20 changes: 20 additions & 0 deletions libsel4vm/arch_include/x86/sel4vm/arch/vmcs_fields.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,3 +95,23 @@
#define VMX_CONTROL_EXIT_CONTROLS 0x0000400C
#define VMX_CONTROL_ENTRY_INTERRUPTION_INFO 0x00004016
#define VMX_CONTROL_ENTRY_EXCEPTION_ERROR_CODE 0x00004018

/* Access rights bits */

#define AR(x) (ACCESS_RIGHTS_##x)
#define AR_T(x) (ACCESS_RIGHTS_TYPE_##x)

#define ACCESS_RIGHTS_S BIT(4)
#define ACCESS_RIGHTS_P BIT(7)
#define ACCESS_RIGHTS_AVL BIT(12)
#define ACCESS_RIGHTS_L BIT(13)
#define ACCESS_RIGHTS_DB BIT(14)
#define ACCESS_RIGHTS_G BIT(15)
#define ACCESS_RIGHTS_UNUSABLE BIT(16)

#define ACCESS_RIGHTS_TYPE_BUSY_TSS 0xB
#define ACCESS_RIGHTS_TYPE_BUSY_16_TSS 0x3
#define ACCESS_RIGHTS_TYPE_LDT 0x2

#define DEFAULT_TR_ACCESS_RIGHTS AR(P) | AR_T(BUSY_TSS)
#define DEFAULT_ACCESS_RIGHTS AR(G) | AR(L) | AR(P) | AR(S) | AR_T(BUSY_16_TSS)
158 changes: 132 additions & 26 deletions libsel4vm/src/arch/x86/boot.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,23 +37,30 @@
/* We need to own the PSE and PAE bits up until the guest has actually turned on paging,
* then it can control them
*/
#ifdef CONFIG_ARCH_X86_64
#define VM_VMCS_CR4_MASK (X86_CR4_VMXE)
#define VM_VMCS_CR4_VALUE (X86_CR4_PAE)
#else
#define VM_VMCS_CR4_MASK (X86_CR4_PSE | X86_CR4_PAE | X86_CR4_VMXE)
#define VM_VMCS_CR4_VALUE (X86_CR4_PSE | X86_CR4_VMXE)
#endif

#define GUEST_PAGE_DIR 0x10000000
#define PAGE_PRESENT BIT(0)
#define PAGE_WRITE BIT(1)
#define PAGE_SUPERVISOR BIT(2)
#define PAGE_SET_SIZE BIT(7)

static int make_guest_page_dir_continued(void *access_addr, void *vaddr, void *cookie)
{
/* Write into this frame as the init page directory: 4M pages, 1 to 1 mapping. */
uint32_t *pd = vaddr;
for (int i = 0; i < 1024; i++) {
/* Present, write, user, page size 4M */
pd[i] = (i << PAGE_BITS_4M) | 0x87;
}
return 0;
}
#define PAGE_DEFAULT PAGE_PRESENT | PAGE_WRITE | PAGE_SUPERVISOR
#define PAGE_ENTRY PAGE_DEFAULT | PAGE_SET_SIZE
#define PAGE_REFERENCE PAGE_DEFAULT

#define PAGE_MASK 0x7FFFFFFFFF000ULL

static vm_frame_t pd_alloc_iterator(uintptr_t addr, void *cookie)
#define GUEST_VSPACE_ROOT 0x10000000
#define GUEST_VSPACE_PDPT 0x10001000
#define GUEST_VSPACE_PD 0x10002000

static vm_frame_t vspace_alloc_iterator(uintptr_t addr, void *cookie)
{
int ret;
vka_object_t object;
Expand All @@ -76,25 +83,119 @@ static vm_frame_t pd_alloc_iterator(uintptr_t addr, void *cookie)
return frame_result;
}

static int make_guest_pd_continued(void *access_addr, void *vaddr, void *cookie)
{
uint64_t *pd = vaddr;
int num_entries = BIT(seL4_PageBits) / sizeof(pd[0]);

static int make_guest_page_dir(vm_t *vm)
/* Brute force 1:1 entries. */
for (int i = 0; i < num_entries; i++) {
/* Present, write, user, page size 2M */
pd[i] = (uint64_t)((uint64_t)((i) << PAGE_BITS_2M) | PAGE_ENTRY);
}

return 0;
}

static int make_guest_pdpt_continued(void *access_addr, void *vaddr, void *cookie)
{
/* Create a 4K Page to be our 1-1 pd */
/* This is constructed with magical new memory that we will not tell Linux about */
vm_memory_reservation_t *pd_reservation = vm_reserve_memory_at(vm, GUEST_PAGE_DIR, BIT(seL4_PageBits),
default_error_fault_callback, NULL);
vm_t *vm = (vm_t *)cookie;

vm_memory_reservation_t *pd_reservation = vm_reserve_memory_at(vm, GUEST_VSPACE_PD,
BIT(seL4_PageBits),
default_error_fault_callback,
NULL);

if (!pd_reservation) {
ZF_LOGE("Failed to reserve page for initial guest pd");
ZF_LOGE("Failed to reserve page for initial guest PD");
return -1;
}
int err = map_vm_memory_reservation(vm, pd_reservation, vspace_alloc_iterator, (void *)vm);
if (err) {
ZF_LOGE("Failed to map page for initial guest PD");
}

uint64_t *pdpt = vaddr;
pdpt[0] = (GUEST_VSPACE_PD & PAGE_MASK) | PAGE_REFERENCE;

return vspace_access_page_with_callback(&vm->mem.vm_vspace, &vm->mem.vmm_vspace,
(void *)GUEST_VSPACE_PD,
seL4_PageBits, seL4_AllRights, 1,
make_guest_pd_continued, NULL);
}

static int make_guest_root_pd_continued(void *access_addr, void *vaddr, void *cookie)
{
#ifdef CONFIG_ARCH_X86_64
assert(NULL != cookie);

vm_t *vm = (vm_t *)cookie;

vm_memory_reservation_t *pdpt_reservation = vm_reserve_memory_at(vm, GUEST_VSPACE_PDPT,
BIT(seL4_PageBits),
default_error_fault_callback,
NULL);

if (!pdpt_reservation) {
ZF_LOGE("Failed to reserve page for initial guest PDPT");
return -1;
}
int err = map_vm_memory_reservation(vm, pd_reservation, pd_alloc_iterator, (void *)vm);
int err = map_vm_memory_reservation(vm, pdpt_reservation, vspace_alloc_iterator, (void *)vm);
if (err) {
ZF_LOGE("Failed to map page for initial guest pd");
ZF_LOGE("Failed to map page for initial guest PDPT");
return -1;
}
printf("Guest page dir allocated at 0x%x. Creating 1-1 entries\n", (unsigned int)GUEST_PAGE_DIR);
vm->arch.guest_pd = GUEST_PAGE_DIR;
return vspace_access_page_with_callback(&vm->mem.vm_vspace, &vm->mem.vmm_vspace, (void *)GUEST_PAGE_DIR,
seL4_PageBits, seL4_AllRights, 1, make_guest_page_dir_continued, NULL);

uint64_t *pml4 = vaddr;
pml4[0] = (GUEST_VSPACE_PDPT & PAGE_MASK) | PAGE_REFERENCE;

int error = vspace_access_page_with_callback(&vm->mem.vm_vspace, &vm->mem.vmm_vspace,
(void *)GUEST_VSPACE_PDPT,
seL4_PageBits, seL4_AllRights, 1,
make_guest_pdpt_continued, vm);
if (error) {
return error;
}
#else
/* Write into this frame as the init page directory: 4M pages, 1 to 1 mapping. */
uint32_t *pd = vaddr;
for (int i = 0; i < 1024; i++) {
/* Present, write, user, page size 4M */
pd[i] = (i << PAGE_BITS_4M) | PAGE_ENTRY;
}
#endif
return 0;
}

static int make_guest_address_space(vm_t *vm)
{
/* Create a 4K Page to be our 1-1 vspace */
/* This is constructed with magical new memory that we will not tell Linux about */
vm_memory_reservation_t *vspace_reservation = vm_reserve_memory_at(vm, GUEST_VSPACE_ROOT,
BIT(seL4_PageBits),
default_error_fault_callback,
NULL);
if (!vspace_reservation) {
ZF_LOGE("Failed to reserve page for initial guest vspace");
return -1;
}
int err = map_vm_memory_reservation(vm, vspace_reservation, vspace_alloc_iterator, (void *)vm);
if (err) {
ZF_LOGE("Failed to map page for initial guest vspace");
return -1;
}
printf("Guest address space root allocated at 0x%x. Creating 1-1 entries\n", (unsigned int)GUEST_VSPACE_ROOT);
vm->arch.guest_pd = GUEST_VSPACE_ROOT;

void *cookie = NULL;

#ifdef CONFIG_ARCH_X86_64
cookie = (void *) vm;
#endif
return vspace_access_page_with_callback(&vm->mem.vm_vspace, &vm->mem.vmm_vspace,
(void *)GUEST_VSPACE_ROOT,
seL4_PageBits, seL4_AllRights, 1,
make_guest_root_pd_continued, cookie);
}

int vm_init_arch(vm_t *vm)
Expand Down Expand Up @@ -150,16 +251,21 @@ int vm_create_vcpu_arch(vm_t *vm, vm_vcpu_t *vcpu)
return -1;
}

/* Create our 4K page 1-1 pd */
err = make_guest_page_dir(vm);
/* Create the guest root vspace */
err = make_guest_address_space(vm);
if (err) {
return -1;
}

vm_guest_state_initialise(vcpu->vcpu_arch.guest_state);
/* Set the initial CR state */
vcpu->vcpu_arch.guest_state->virt.cr.cr0_mask = VM_VMCS_CR0_MASK;
#ifdef CONFIG_ARCH_X86_64
/* In 64-bit mode, PG and PE always need to be enabled, otherwise a fault will occur. */
vcpu->vcpu_arch.guest_state->virt.cr.cr0_shadow = VM_VMCS_CR0_MASK;
#else
vcpu->vcpu_arch.guest_state->virt.cr.cr0_shadow = 0;
#endif
vcpu->vcpu_arch.guest_state->virt.cr.cr0_host_bits = VM_VMCS_CR0_VALUE;
vcpu->vcpu_arch.guest_state->virt.cr.cr4_mask = VM_VMCS_CR4_MASK;
vcpu->vcpu_arch.guest_state->virt.cr.cr4_shadow = 0;
Expand Down
70 changes: 66 additions & 4 deletions libsel4vm/src/arch/x86/debug.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,70 @@
#include "guest_state.h"
#include "vmcs.h"

#ifdef CONFIG_ARCH_X86_64

void vm_print_guest_context(vm_vcpu_t *vcpu)
{
seL4_Word data_exit_info, data_exit_error;
if (vm_vmcs_read(vcpu->vcpu.cptr, VMX_DATA_EXIT_INTERRUPT_INFO, &data_exit_info) ||
vm_vmcs_read(vcpu->vcpu.cptr, VMX_DATA_EXIT_INTERRUPT_ERROR, &data_exit_error)) {
return;
}

printf("================== GUEST OS CONTEXT =================\n");

printf("exit info : reason 0x%lx qualification 0x%lx instruction len 0x%lx interrupt info 0x%lx interrupt error 0x%x\n",
vm_guest_exit_get_reason(vcpu->vcpu_arch.guest_state),
vm_guest_exit_get_qualification(vcpu->vcpu_arch.guest_state),
vm_guest_exit_get_int_len(vcpu->vcpu_arch.guest_state), data_exit_info, data_exit_error);
printf(" guest physical 0x%lx rflags 0x%lx\n",
vm_guest_exit_get_physical(vcpu->vcpu_arch.guest_state),
vm_guest_state_get_rflags(vcpu->vcpu_arch.guest_state, vcpu->vcpu.cptr));
printf(" guest interruptibility 0x%lx control entry 0x%lx\n",
vm_guest_state_get_interruptibility(vcpu->vcpu_arch.guest_state, vcpu->vcpu.cptr),
vm_guest_state_get_control_entry(vcpu->vcpu_arch.guest_state));

printf("rip 0x%lx\n",
vm_guest_state_get_eip(vcpu->vcpu_arch.guest_state));
seL4_Word rax, rbx, rcx;
vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_EAX, &rax);
vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_EBX, &rbx);
vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_ECX, &rcx);
printf("rax 0x%lx rbx 0x%lx rcx 0x%lx\n", rax, rbx, rcx);
seL4_Word rdx, rsi, rdi;
vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_EDX, &rdx);
vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_ESI, &rsi);
vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_EDI, &rdi);
printf("rdx 0x%lx rsi 0x%lx rdi 0x%lx\n", rdx, rsi, rdi);
seL4_Word rbp;
vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_EBP, &rbp);
printf("rbp 0x%lx\n", rbp);
seL4_Word r8, r9, r10;
vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_R8, &r8);
vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_R9, &r9);
vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_R10, &r10);
printf("r8 0x%lx r9 0x%lx r10 0x%lx\n", r8, r9, r10);
seL4_Word r11, r12, r13;
vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_R11, &r11);
vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_R12, &r12);
vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_R13, &r13);
printf("r11 0x%lx r12 0x%lx r13 0x%lx\n", r11, r12, r13);
seL4_Word r14, r15;
vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_R14, &r14);
vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_R15, &r15);
printf("r14 0x%lx r15 0x%lx\n", r14, r15);
printf("cr0 0x%lx cr3 0x%lx cr4 0x%lx\n",
vm_guest_state_get_cr0(vcpu->vcpu_arch.guest_state, vcpu->vcpu.cptr),
vm_guest_state_get_cr3(vcpu->vcpu_arch.guest_state, vcpu->vcpu.cptr),
vm_guest_state_get_cr4(vcpu->vcpu_arch.guest_state, vcpu->vcpu.cptr));
}

#else

/* Print out the context of a guest OS thread. */
void vm_print_guest_context(vm_vcpu_t *vcpu)
{
unsigned int data_exit_info, data_exit_error;
seL4_Word data_exit_info, data_exit_error;
if (vm_vmcs_read(vcpu->vcpu.cptr, VMX_DATA_EXIT_INTERRUPT_INFO, &data_exit_info) ||
vm_vmcs_read(vcpu->vcpu.cptr, VMX_DATA_EXIT_INTERRUPT_ERROR, &data_exit_error)) {
return;
Expand All @@ -43,21 +103,23 @@ void vm_print_guest_context(vm_vcpu_t *vcpu)

printf("eip 0x%8x\n",
vm_guest_state_get_eip(vcpu->vcpu_arch.guest_state));
unsigned int eax, ebx, ecx;
seL4_Word eax, ebx, ecx;
vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_EAX, &eax);
vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_EBX, &ebx);
vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_ECX, &ecx);
printf("eax 0x%8x ebx 0x%8x ecx 0x%8x\n", eax, ebx, ecx);
unsigned int edx, esi, edi;
seL4_Word edx, esi, edi;
vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_EDX, &edx);
vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_ESI, &esi);
vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_EDI, &edi);
printf("edx 0x%8x esi 0x%8x edi 0x%8x\n", edx, esi, edi);
unsigned int ebp;
seL4_Word ebp;
vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_EBP, &ebp);
printf("ebp 0x%8x\n", ebp);

printf("cr0 0x%x cr3 0x%x cr4 0x%x\n", vm_guest_state_get_cr0(vcpu->vcpu_arch.guest_state, vcpu->vcpu.cptr),
vm_guest_state_get_cr3(vcpu->vcpu_arch.guest_state, vcpu->vcpu.cptr),
vm_guest_state_get_cr4(vcpu->vcpu_arch.guest_state, vcpu->vcpu.cptr));
}

#endif
Loading

0 comments on commit ee86c5f

Please sign in to comment.