diff --git a/libsel4vm/arch_include/x86/sel4vm/arch/guest_x86_context.h b/libsel4vm/arch_include/x86/sel4vm/arch/guest_x86_context.h index 6f85d5e7c..16a84b773 100644 --- a/libsel4vm/arch_include/x86/sel4vm/arch/guest_x86_context.h +++ b/libsel4vm/arch_include/x86/sel4vm/arch/guest_x86_context.h @@ -23,7 +23,17 @@ typedef enum vcpu_context_reg { VCPU_CONTEXT_EDX, VCPU_CONTEXT_ESI, VCPU_CONTEXT_EDI, - VCPU_CONTEXT_EBP + VCPU_CONTEXT_EBP, +#ifdef CONFIG_ARCH_X86_64 + VCPU_CONTEXT_R8, + VCPU_CONTEXT_R9, + VCPU_CONTEXT_R10, + VCPU_CONTEXT_R11, + VCPU_CONTEXT_R12, + VCPU_CONTEXT_R13, + VCPU_CONTEXT_R14, + VCPU_CONTEXT_R15 +#endif } vcpu_context_reg_t; /*** @@ -40,10 +50,10 @@ int vm_set_thread_context(vm_vcpu_t *vcpu, seL4_VCPUContext context); * Set a single VCPU's thread register in a seL4_VCPUContext * @param {vm_vcpu_t *} vcpu Handle to the vcpu * @param {vcpu_context_reg_t} reg Register enumerated by vcpu_context_reg - * @param {uint32_t} value Value to set register with + * @param {seL4_Word} value Value to set register with * @return 0 on success, otherwise -1 for error */ -int vm_set_thread_context_reg(vm_vcpu_t *vcpu, vcpu_context_reg_t reg, uint32_t value); +int vm_set_thread_context_reg(vm_vcpu_t *vcpu, vcpu_context_reg_t reg, seL4_Word value); /*** * @function vm_get_thread_context(vcpu, context) @@ -59,10 +69,10 @@ int vm_get_thread_context(vm_vcpu_t *vcpu, seL4_VCPUContext *context); * Get a single VCPU's thread register * @param {vm_vcpu_t *} vcpu Handle to the vcpu * @param {vcpu_context_reg_t} reg Register enumerated by vcpu_context_reg - * @param {uint32_t *} value Pointer to user supplied variable to populate register value with + * @param {seL4_Word *} value Pointer to user supplied variable to populate register value with * @return 0 on success, otherwise -1 for error */ -int vm_get_thread_context_reg(vm_vcpu_t *vcpu, vcpu_context_reg_t reg, uint32_t *value); +int vm_get_thread_context_reg(vm_vcpu_t *vcpu, vcpu_context_reg_t reg, seL4_Word *value); /* VMCS Getters and Setters */ @@ -74,7 +84,7 @@ int vm_get_thread_context_reg(vm_vcpu_t *vcpu, vcpu_context_reg_t reg, uint32_t * @param {uint32_t} value Value to set VMCS field with * @return 0 on success, otherwise -1 for error */ -int vm_set_vmcs_field(vm_vcpu_t *vcpu, seL4_Word field, uint32_t value); +int vm_set_vmcs_field(vm_vcpu_t *vcpu, seL4_Word field, seL4_Word value); /*** * @function vm_get_vmcs_field(vcpu, field, value) @@ -84,4 +94,4 @@ int vm_set_vmcs_field(vm_vcpu_t *vcpu, seL4_Word field, uint32_t value); * @param {uint32_t *} value Pointer to user supplied variable to populate VMCS field value with * @return 0 on success, otherwise -1 for error */ -int vm_get_vmcs_field(vm_vcpu_t *vcpu, seL4_Word field, uint32_t *value); +int vm_get_vmcs_field(vm_vcpu_t *vcpu, seL4_Word field, seL4_Word *value); diff --git a/libsel4vm/arch_include/x86/sel4vm/arch/vmcs_fields.h b/libsel4vm/arch_include/x86/sel4vm/arch/vmcs_fields.h index d8e723915..9d97cbf20 100644 --- a/libsel4vm/arch_include/x86/sel4vm/arch/vmcs_fields.h +++ b/libsel4vm/arch_include/x86/sel4vm/arch/vmcs_fields.h @@ -95,3 +95,23 @@ #define VMX_CONTROL_EXIT_CONTROLS 0x0000400C #define VMX_CONTROL_ENTRY_INTERRUPTION_INFO 0x00004016 #define VMX_CONTROL_ENTRY_EXCEPTION_ERROR_CODE 0x00004018 + +/* Access rights bits */ + +#define AR(x) (ACCESS_RIGHTS_##x) +#define AR_T(x) (ACCESS_RIGHTS_TYPE_##x) + +#define ACCESS_RIGHTS_S BIT(4) +#define ACCESS_RIGHTS_P BIT(7) +#define ACCESS_RIGHTS_AVL BIT(12) +#define ACCESS_RIGHTS_L BIT(13) +#define ACCESS_RIGHTS_DB BIT(14) +#define ACCESS_RIGHTS_G BIT(15) +#define ACCESS_RIGHTS_UNUSABLE BIT(16) + +#define ACCESS_RIGHTS_TYPE_BUSY_TSS 0xB +#define ACCESS_RIGHTS_TYPE_BUSY_16_TSS 0x3 +#define ACCESS_RIGHTS_TYPE_LDT 0x2 + +#define DEFAULT_TR_ACCESS_RIGHTS (AR(P) | AR_T(BUSY_TSS)) +#define DEFAULT_ACCESS_RIGHTS (AR(G) | AR(L) | AR(P) | AR(S) | AR_T(BUSY_16_TSS)) diff --git a/libsel4vm/src/arch/x86/boot.c b/libsel4vm/src/arch/x86/boot.c index 45c1ab7f9..0a2d6d385 100644 --- a/libsel4vm/src/arch/x86/boot.c +++ b/libsel4vm/src/arch/x86/boot.c @@ -37,23 +37,30 @@ /* We need to own the PSE and PAE bits up until the guest has actually turned on paging, * then it can control them */ +#ifdef CONFIG_ARCH_X86_64 +#define VM_VMCS_CR4_MASK (X86_CR4_VMXE) +#define VM_VMCS_CR4_VALUE (X86_CR4_PAE) +#else #define VM_VMCS_CR4_MASK (X86_CR4_PSE | X86_CR4_PAE | X86_CR4_VMXE) #define VM_VMCS_CR4_VALUE (X86_CR4_PSE | X86_CR4_VMXE) +#endif -#define GUEST_PAGE_DIR 0x10000000 +#define PAGE_PRESENT BIT(0) +#define PAGE_WRITE BIT(1) +#define PAGE_SUPERVISOR BIT(2) +#define PAGE_SET_SIZE BIT(7) -static int make_guest_page_dir_continued(void *access_addr, void *vaddr, void *cookie) -{ - /* Write into this frame as the init page directory: 4M pages, 1 to 1 mapping. */ - uint32_t *pd = vaddr; - for (int i = 0; i < 1024; i++) { - /* Present, write, user, page size 4M */ - pd[i] = (i << PAGE_BITS_4M) | 0x87; - } - return 0; -} +#define PAGE_DEFAULT PAGE_PRESENT | PAGE_WRITE | PAGE_SUPERVISOR +#define PAGE_ENTRY PAGE_DEFAULT | PAGE_SET_SIZE +#define PAGE_REFERENCE PAGE_DEFAULT + +#define PAGE_MASK 0x7FFFFFFFFF000ULL -static vm_frame_t pd_alloc_iterator(uintptr_t addr, void *cookie) +#define GUEST_VSPACE_ROOT 0x10000000 +#define GUEST_VSPACE_PDPT 0x10001000 +#define GUEST_VSPACE_PD 0x10002000 + +static vm_frame_t vspace_alloc_iterator(uintptr_t addr, void *cookie) { int ret; vka_object_t object; @@ -76,25 +83,119 @@ static vm_frame_t pd_alloc_iterator(uintptr_t addr, void *cookie) return frame_result; } +static int make_guest_pd_continued(void *access_addr, void *vaddr, void *cookie) +{ + uint64_t *pd = vaddr; + int num_entries = BIT(seL4_PageBits) / sizeof(pd[0]); -static int make_guest_page_dir(vm_t *vm) + /* Brute force 1:1 entries. */ + for (int i = 0; i < num_entries; i++) { + /* Present, write, user, page size 2M */ + pd[i] = ((uint64_t)i) << PAGE_BITS_2M | PAGE_ENTRY; + } + + return 0; +} + +static int make_guest_pdpt_continued(void *access_addr, void *vaddr, void *cookie) { - /* Create a 4K Page to be our 1-1 pd */ - /* This is constructed with magical new memory that we will not tell Linux about */ - vm_memory_reservation_t *pd_reservation = vm_reserve_memory_at(vm, GUEST_PAGE_DIR, BIT(seL4_PageBits), - default_error_fault_callback, NULL); + vm_t *vm = (vm_t *)cookie; + + vm_memory_reservation_t *pd_reservation = vm_reserve_memory_at(vm, GUEST_VSPACE_PD, + BIT(seL4_PageBits), + default_error_fault_callback, + NULL); + if (!pd_reservation) { - ZF_LOGE("Failed to reserve page for initial guest pd"); + ZF_LOGE("Failed to reserve page for initial guest PD"); + return -1; + } + int err = map_vm_memory_reservation(vm, pd_reservation, vspace_alloc_iterator, (void *)vm); + if (err) { + ZF_LOGE("Failed to map page for initial guest PD"); + } + + uint64_t *pdpt = vaddr; + pdpt[0] = (GUEST_VSPACE_PD & PAGE_MASK) | PAGE_REFERENCE; + + return vspace_access_page_with_callback(&vm->mem.vm_vspace, &vm->mem.vmm_vspace, + (void *)GUEST_VSPACE_PD, + seL4_PageBits, seL4_AllRights, 1, + make_guest_pd_continued, NULL); +} + +static int make_guest_root_pd_continued(void *access_addr, void *vaddr, void *cookie) +{ +#ifdef CONFIG_ARCH_X86_64 + assert(NULL != cookie); + + vm_t *vm = (vm_t *)cookie; + + vm_memory_reservation_t *pdpt_reservation = vm_reserve_memory_at(vm, GUEST_VSPACE_PDPT, + BIT(seL4_PageBits), + default_error_fault_callback, + NULL); + + if (!pdpt_reservation) { + ZF_LOGE("Failed to reserve page for initial guest PDPT"); return -1; } - int err = map_vm_memory_reservation(vm, pd_reservation, pd_alloc_iterator, (void *)vm); + int err = map_vm_memory_reservation(vm, pdpt_reservation, vspace_alloc_iterator, (void *)vm); if (err) { - ZF_LOGE("Failed to map page for initial guest pd"); + ZF_LOGE("Failed to map page for initial guest PDPT"); + return -1; } - printf("Guest page dir allocated at 0x%x. Creating 1-1 entries\n", (unsigned int)GUEST_PAGE_DIR); - vm->arch.guest_pd = GUEST_PAGE_DIR; - return vspace_access_page_with_callback(&vm->mem.vm_vspace, &vm->mem.vmm_vspace, (void *)GUEST_PAGE_DIR, - seL4_PageBits, seL4_AllRights, 1, make_guest_page_dir_continued, NULL); + + uint64_t *pml4 = vaddr; + pml4[0] = (GUEST_VSPACE_PDPT & PAGE_MASK) | PAGE_REFERENCE; + + int error = vspace_access_page_with_callback(&vm->mem.vm_vspace, &vm->mem.vmm_vspace, + (void *)GUEST_VSPACE_PDPT, + seL4_PageBits, seL4_AllRights, 1, + make_guest_pdpt_continued, vm); + if (error) { + return error; + } +#else + /* Write into this frame as the init page directory: 4M pages, 1 to 1 mapping. */ + uint32_t *pd = vaddr; + for (int i = 0; i < 1024; i++) { + /* Present, write, user, page size 4M */ + pd[i] = (i << PAGE_BITS_4M) | PAGE_ENTRY; + } +#endif + return 0; +} + +static int make_guest_address_space(vm_t *vm) +{ + /* Create a 4K Page to be our 1-1 vspace */ + /* This is constructed with magical new memory that we will not tell Linux about */ + vm_memory_reservation_t *vspace_reservation = vm_reserve_memory_at(vm, GUEST_VSPACE_ROOT, + BIT(seL4_PageBits), + default_error_fault_callback, + NULL); + if (!vspace_reservation) { + ZF_LOGE("Failed to reserve page for initial guest vspace"); + return -1; + } + int err = map_vm_memory_reservation(vm, vspace_reservation, vspace_alloc_iterator, (void *)vm); + if (err) { + ZF_LOGE("Failed to map page for initial guest vspace"); + return -1; + } + printf("Guest address space root allocated at 0x%x. Creating 1-1 entries\n", (unsigned int)GUEST_VSPACE_ROOT); + vm->arch.guest_pd = GUEST_VSPACE_ROOT; + + void *cookie = NULL; + +#ifdef CONFIG_ARCH_X86_64 + cookie = (void *) vm; +#endif + return vspace_access_page_with_callback(&vm->mem.vm_vspace, &vm->mem.vmm_vspace, + (void *)GUEST_VSPACE_ROOT, + seL4_PageBits, seL4_AllRights, 1, + make_guest_root_pd_continued, cookie); } int vm_init_arch(vm_t *vm) @@ -150,8 +251,8 @@ int vm_create_vcpu_arch(vm_t *vm, vm_vcpu_t *vcpu) return -1; } - /* Create our 4K page 1-1 pd */ - err = make_guest_page_dir(vm); + /* Create the guest root vspace */ + err = make_guest_address_space(vm); if (err) { return -1; } @@ -159,7 +260,12 @@ int vm_create_vcpu_arch(vm_t *vm, vm_vcpu_t *vcpu) vm_guest_state_initialise(vcpu->vcpu_arch.guest_state); /* Set the initial CR state */ vcpu->vcpu_arch.guest_state->virt.cr.cr0_mask = VM_VMCS_CR0_MASK; +#ifdef CONFIG_X86_64_VTX_64BIT_GUESTS + /* In 64-bit mode, PG and PE always need to be enabled, otherwise a fault will occur. */ + vcpu->vcpu_arch.guest_state->virt.cr.cr0_shadow = VM_VMCS_CR0_MASK; +#else vcpu->vcpu_arch.guest_state->virt.cr.cr0_shadow = 0; +#endif /* CONFIG_X86_64_VTX_64BIT_GUESTS */ vcpu->vcpu_arch.guest_state->virt.cr.cr0_host_bits = VM_VMCS_CR0_VALUE; vcpu->vcpu_arch.guest_state->virt.cr.cr4_mask = VM_VMCS_CR4_MASK; vcpu->vcpu_arch.guest_state->virt.cr.cr4_shadow = 0; diff --git a/libsel4vm/src/arch/x86/debug.c b/libsel4vm/src/arch/x86/debug.c index b46ef843c..8a4f5ebb7 100644 --- a/libsel4vm/src/arch/x86/debug.c +++ b/libsel4vm/src/arch/x86/debug.c @@ -21,10 +21,71 @@ #include "guest_state.h" #include "vmcs.h" +#ifdef CONFIG_X86_64_VTX_64BIT_GUESTS + +void vm_print_guest_context(vm_vcpu_t *vcpu) +{ + seL4_Word data_exit_info, data_exit_error; + if (vm_vmcs_read(vcpu->vcpu.cptr, VMX_DATA_EXIT_INTERRUPT_INFO, &data_exit_info) || + vm_vmcs_read(vcpu->vcpu.cptr, VMX_DATA_EXIT_INTERRUPT_ERROR, &data_exit_error)) { + return; + } + + printf("================== GUEST OS CONTEXT =================\n"); + + printf("exit info : reason 0x"SEL4_PRIx_word" qualification 0x"SEL4_PRIx_word" " + "instruction len 0x"SEL4_PRIx_word" interrupt info 0x"SEL4_PRIx_word" interrupt error 0x"SEL4_PRIx_word"\n", + vm_guest_exit_get_reason(vcpu->vcpu_arch.guest_state), + vm_guest_exit_get_qualification(vcpu->vcpu_arch.guest_state), + vm_guest_exit_get_int_len(vcpu->vcpu_arch.guest_state), data_exit_info, data_exit_error); + printf(" guest physical 0x"SEL4_PRIx_word" rflags 0x"SEL4_PRIx_word"\n", + vm_guest_exit_get_physical(vcpu->vcpu_arch.guest_state), + vm_guest_state_get_rflags(vcpu->vcpu_arch.guest_state, vcpu->vcpu.cptr)); + printf(" guest interruptibility 0x"SEL4_PRIx_word" control entry 0x"SEL4_PRIx_word"\n", + vm_guest_state_get_interruptibility(vcpu->vcpu_arch.guest_state, vcpu->vcpu.cptr), + vm_guest_state_get_control_entry(vcpu->vcpu_arch.guest_state)); + + printf("rip 0x"SEL4_PRIx_word"\n", + vm_guest_state_get_eip(vcpu->vcpu_arch.guest_state)); + seL4_Word rax, rbx, rcx; + vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_EAX, &rax); + vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_EBX, &rbx); + vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_ECX, &rcx); + printf("rax 0x"SEL4_PRIx_word" rbx 0x"SEL4_PRIx_word" rcx 0x"SEL4_PRIx_word"\n", rax, rbx, rcx); + seL4_Word rdx, rsi, rdi; + vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_EDX, &rdx); + vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_ESI, &rsi); + vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_EDI, &rdi); + printf("rdx 0x"SEL4_PRIx_word" rsi 0x"SEL4_PRIx_word" rdi 0x"SEL4_PRIx_word"\n", rdx, rsi, rdi); + seL4_Word rbp; + vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_EBP, &rbp); + printf("rbp 0x"SEL4_PRIx_word"\n", rbp); + seL4_Word r8, r9, r10; + vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_R8, &r8); + vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_R9, &r9); + vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_R10, &r10); + printf("r8 0x"SEL4_PRIx_word" r9 0x"SEL4_PRIx_word" r10 0x"SEL4_PRIx_word"\n", r8, r9, r10); + seL4_Word r11, r12, r13; + vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_R11, &r11); + vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_R12, &r12); + vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_R13, &r13); + printf("r11 0x"SEL4_PRIx_word" r12 0x"SEL4_PRIx_word" r13 0x"SEL4_PRIx_word"\n", r11, r12, r13); + seL4_Word r14, r15; + vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_R14, &r14); + vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_R15, &r15); + printf("r14 0x"SEL4_PRIx_word" r15 0x"SEL4_PRIx_word"\n", r14, r15); + printf("cr0 0x"SEL4_PRIx_word" cr3 0x"SEL4_PRIx_word" cr4 0x"SEL4_PRIx_word"\n", + vm_guest_state_get_cr0(vcpu->vcpu_arch.guest_state, vcpu->vcpu.cptr), + vm_guest_state_get_cr3(vcpu->vcpu_arch.guest_state, vcpu->vcpu.cptr), + vm_guest_state_get_cr4(vcpu->vcpu_arch.guest_state, vcpu->vcpu.cptr)); +} + +#else /* not CONFIG_X86_64_VTX_64BIT_GUESTS */ + /* Print out the context of a guest OS thread. */ void vm_print_guest_context(vm_vcpu_t *vcpu) { - unsigned int data_exit_info, data_exit_error; + seL4_Word data_exit_info, data_exit_error; if (vm_vmcs_read(vcpu->vcpu.cptr, VMX_DATA_EXIT_INTERRUPT_INFO, &data_exit_info) || vm_vmcs_read(vcpu->vcpu.cptr, VMX_DATA_EXIT_INTERRUPT_ERROR, &data_exit_error)) { return; @@ -43,17 +104,17 @@ void vm_print_guest_context(vm_vcpu_t *vcpu) printf("eip 0x%8x\n", vm_guest_state_get_eip(vcpu->vcpu_arch.guest_state)); - unsigned int eax, ebx, ecx; + seL4_Word eax, ebx, ecx; vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_EAX, &eax); vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_EBX, &ebx); vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_ECX, &ecx); printf("eax 0x%8x ebx 0x%8x ecx 0x%8x\n", eax, ebx, ecx); - unsigned int edx, esi, edi; + seL4_Word edx, esi, edi; vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_EDX, &edx); vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_ESI, &esi); vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_EDI, &edi); printf("edx 0x%8x esi 0x%8x edi 0x%8x\n", edx, esi, edi); - unsigned int ebp; + seL4_Word ebp; vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_EBP, &ebp); printf("ebp 0x%8x\n", ebp); @@ -61,3 +122,5 @@ void vm_print_guest_context(vm_vcpu_t *vcpu) vm_guest_state_get_cr3(vcpu->vcpu_arch.guest_state, vcpu->vcpu.cptr), vm_guest_state_get_cr4(vcpu->vcpu_arch.guest_state, vcpu->vcpu.cptr)); } + +#endif /* CONFIG_X86_64_VTX_64BIT_GUESTS */ diff --git a/libsel4vm/src/arch/x86/ept.c b/libsel4vm/src/arch/x86/ept.c index 3a4e2f0ab..7666b7882 100644 --- a/libsel4vm/src/arch/x86/ept.c +++ b/libsel4vm/src/arch/x86/ept.c @@ -28,13 +28,15 @@ void print_ept_violation(vm_vcpu_t *vcpu) { /* Read linear address that guest is trying to access. */ - unsigned int linear_address; + seL4_Word linear_address; vm_vmcs_read(vcpu->vcpu.cptr, VMX_DATA_GUEST_LINEAR_ADDRESS, &linear_address); printf(COLOUR_R "!!!!!!!! ALERT :: GUEST OS PAGE FAULT !!!!!!!!\n"); printf(" Guest OS VMExit due to EPT Violation:\n"); - printf(" Linear address 0x%x.\n", linear_address); - printf(" Guest-Physical address 0x%x.\n", vm_guest_exit_get_physical(vcpu->vcpu_arch.guest_state)); - printf(" Instruction pointer 0x%x.\n", vm_guest_state_get_eip(vcpu->vcpu_arch.guest_state)); + printf(" Linear address 0x"SEL4_PRIx_word".\n", linear_address); + printf(" Guest-Physical address 0x"SEL4_PRIx_word".\n", + (seL4_Word) vm_guest_exit_get_physical(vcpu->vcpu_arch.guest_state)); + printf(" Instruction pointer 0x"SEL4_PRIx_word".\n", + (seL4_Word) vm_guest_state_get_eip(vcpu->vcpu_arch.guest_state)); printf(" This is most likely due to a bug or misconfiguration.\n" COLOUR_RESET); } @@ -59,7 +61,7 @@ int vm_ept_violation_handler(vm_vcpu_t *vcpu) { int err; uintptr_t guest_phys = vm_guest_exit_get_physical(vcpu->vcpu_arch.guest_state); - unsigned int qualification = vm_guest_exit_get_qualification(vcpu->vcpu_arch.guest_state); + seL4_Word qualification = vm_guest_exit_get_qualification(vcpu->vcpu_arch.guest_state); int read = EPT_VIOL_READ(qualification); int write = EPT_VIOL_WRITE(qualification); @@ -74,7 +76,7 @@ int vm_ept_violation_handler(vm_vcpu_t *vcpu) } int reg; - uint32_t imm; + seL4_Word imm; int size; vm_decode_ept_violation(vcpu, ®, &imm, &size); memory_fault_result_t fault_result = vm_memory_handle_fault(vcpu->vm, vcpu, guest_phys, size); diff --git a/libsel4vm/src/arch/x86/guest_state.h b/libsel4vm/src/arch/x86/guest_state.h index 2d48650bb..694504d31 100644 --- a/libsel4vm/src/arch/x86/guest_state.h +++ b/libsel4vm/src/arch/x86/guest_state.h @@ -16,27 +16,27 @@ typedef struct guest_cr_virt_state { /* mask represents bits that are owned by us, the host */ - unsigned int cr0_mask; - unsigned int cr4_mask; + seL4_Word cr0_mask; + seL4_Word cr4_mask; /* the shadow represents what the values of our owned bits should be seen as by the guest. * i.e. the value they set it to */ - unsigned int cr0_shadow; - unsigned int cr4_shadow; + seL4_Word cr0_shadow; + seL4_Word cr4_shadow; /* for any bits owned by us, this represents what those bits should actually be set to */ - unsigned int cr0_host_bits; - unsigned int cr4_host_bits; + seL4_Word cr0_host_bits; + seL4_Word cr4_host_bits; /* the raw cr3 is only valid if we're trapping guest accesses to cr3, which we * only do if the guest has not yet enabled paging for itself. If the guest has * enabled paging then the value should be retrieved from the guest machine state */ - uint32_t cr3_guest; + seL4_Word cr3_guest; } guest_cr_virt_state_t; typedef struct guest_exit_information { bool in_exit; - unsigned int reason; - unsigned int qualification; - unsigned int instruction_length; - unsigned int guest_physical; + seL4_Word reason; + seL4_Word qualification; + seL4_Word instruction_length; + seL4_Word guest_physical; } guest_exit_information_t; typedef enum machine_state_status { @@ -65,32 +65,53 @@ typedef enum machine_state_status { typedef struct guest_machine_state { MACHINE_STATE(seL4_VCPUContext, context); - MACHINE_STATE(unsigned int, cr0); - MACHINE_STATE(unsigned int, cr3); - MACHINE_STATE(unsigned int, cr4); - MACHINE_STATE(unsigned int, rflags); - MACHINE_STATE(unsigned int, guest_interruptibility); - MACHINE_STATE(unsigned int, idt_base); - MACHINE_STATE(unsigned int, idt_limit); - MACHINE_STATE(unsigned int, gdt_base); - MACHINE_STATE(unsigned int, gdt_limit); - MACHINE_STATE(unsigned int, cs_selector); - MACHINE_STATE(unsigned int, entry_exception_error_code); + MACHINE_STATE(seL4_Word, cr0); + MACHINE_STATE(seL4_Word, cr3); + MACHINE_STATE(seL4_Word, cr4); + MACHINE_STATE(seL4_Word, rflags); + MACHINE_STATE(seL4_Word, guest_interruptibility); + MACHINE_STATE(seL4_Word, idt_base); + MACHINE_STATE(seL4_Word, idt_limit); + MACHINE_STATE(seL4_Word, gdt_base); + MACHINE_STATE(seL4_Word, gdt_limit); + MACHINE_STATE(seL4_Word, cs_selector); +#ifdef CONFIG_X86_64_VTX_64BIT_GUESTS + MACHINE_STATE(seL4_Word, ss_selector); + MACHINE_STATE(seL4_Word, ds_selector); + MACHINE_STATE(seL4_Word, es_selector); + MACHINE_STATE(seL4_Word, fs_selector); + MACHINE_STATE(seL4_Word, gs_selector); + MACHINE_STATE(seL4_Word, esp); +#endif /* CONFIG_X86_64_VTX_64BIT_GUESTS */ + MACHINE_STATE(seL4_Word, entry_exception_error_code); /* This is state that we set on VMentry and get back on * a vmexit, therefore it is always valid and correct */ - unsigned int eip; - unsigned int control_entry; - unsigned int control_ppc; + seL4_Word eip; + seL4_Word control_entry; + seL4_Word control_ppc; } guest_machine_state_t; /* Define the seL4_UserContext layout so we can treat it as an array */ -#define USER_CONTEXT_EAX 0 -#define USER_CONTEXT_EBX 1 -#define USER_CONTEXT_ECX 2 -#define USER_CONTEXT_EDX 3 -#define USER_CONTEXT_ESI 4 -#define USER_CONTEXT_EDI 5 -#define USER_CONTEXT_EBP 6 +typedef enum guest_user_context { + USER_CONTEXT_EAX = 0, + USER_CONTEXT_EBX, + USER_CONTEXT_ECX, + USER_CONTEXT_EDX, + USER_CONTEXT_ESI, + USER_CONTEXT_EDI, + USER_CONTEXT_EBP, +#ifdef CONFIG_ARCH_X86_64 + USER_CONTEXT_R8, + USER_CONTEXT_R9, + USER_CONTEXT_R10, + USER_CONTEXT_R11, + USER_CONTEXT_R12, + USER_CONTEXT_R13, + USER_CONTEXT_R14, + USER_CONTEXT_R15, +#endif + NUM_USER_CONTEXT_REGS +} guest_user_context_t; typedef struct guest_virt_state { guest_cr_virt_state_t cr; @@ -124,6 +145,14 @@ static inline bool vm_guest_state_no_modified(guest_state_t *gs) IS_MACHINE_STATE_MODIFIED(gs->machine.gdt_base) || IS_MACHINE_STATE_MODIFIED(gs->machine.gdt_limit) || IS_MACHINE_STATE_MODIFIED(gs->machine.cs_selector) || +#ifdef CONFIG_X86_64_VTX_64BIT_GUESTS + IS_MACHINE_STATE_MODIFIED(gs->machine.ss_selector) || + IS_MACHINE_STATE_MODIFIED(gs->machine.ds_selector) || + IS_MACHINE_STATE_MODIFIED(gs->machine.es_selector) || + IS_MACHINE_STATE_MODIFIED(gs->machine.fs_selector) || + IS_MACHINE_STATE_MODIFIED(gs->machine.gs_selector) || + IS_MACHINE_STATE_MODIFIED(gs->machine.esp) || +#endif /* CONFIG_X86_64_VTX_64BIT_GUESTS */ IS_MACHINE_STATE_MODIFIED(gs->machine.entry_exception_error_code) ); } @@ -142,6 +171,14 @@ static inline void vm_guest_state_initialise(guest_state_t *gs) MACHINE_STATE_INIT(gs->machine.gdt_base); MACHINE_STATE_INIT(gs->machine.gdt_limit); MACHINE_STATE_INIT(gs->machine.cs_selector); +#ifdef CONFIG_X86_64_VTX_64BIT_GUESTS + MACHINE_STATE_INIT(gs->machine.ss_selector); + MACHINE_STATE_INIT(gs->machine.ds_selector); + MACHINE_STATE_INIT(gs->machine.es_selector); + MACHINE_STATE_INIT(gs->machine.fs_selector); + MACHINE_STATE_INIT(gs->machine.gs_selector); + MACHINE_STATE_INIT(gs->machine.esp); +#endif /* CONFIG_X86_64_VTX_64BIT_GUESTS */ MACHINE_STATE_INIT(gs->machine.entry_exception_error_code); } @@ -158,19 +195,27 @@ static inline void vm_guest_state_invalidate_all(guest_state_t *gs) MACHINE_STATE_INVAL(gs->machine.gdt_base); MACHINE_STATE_INVAL(gs->machine.gdt_limit); MACHINE_STATE_INVAL(gs->machine.cs_selector); +#ifdef CONFIG_X86_64_VTX_64BIT_GUESTS + MACHINE_STATE_INVAL(gs->machine.ss_selector); + MACHINE_STATE_INVAL(gs->machine.ds_selector); + MACHINE_STATE_INVAL(gs->machine.es_selector); + MACHINE_STATE_INVAL(gs->machine.fs_selector); + MACHINE_STATE_INVAL(gs->machine.gs_selector); + MACHINE_STATE_INVAL(gs->machine.esp); +#endif /* CONFIG_X86_64_VTX_64BIT_GUESTS */ MACHINE_STATE_INVAL(gs->machine.entry_exception_error_code); } /* get */ -static inline unsigned int vm_guest_state_get_eip(guest_state_t *gs) +static inline seL4_Word vm_guest_state_get_eip(guest_state_t *gs) { return gs->machine.eip; } -static inline unsigned int vm_guest_state_get_cr0(guest_state_t *gs, seL4_CPtr vcpu) +static inline seL4_Word vm_guest_state_get_cr0(guest_state_t *gs, seL4_CPtr vcpu) { int err; - unsigned int value; + seL4_Word value; if (IS_MACHINE_STATE_UNKNOWN(gs->machine.cr0)) { err = vm_vmcs_read(vcpu, VMX_GUEST_CR0, &value); assert(!err); @@ -179,10 +224,10 @@ static inline unsigned int vm_guest_state_get_cr0(guest_state_t *gs, seL4_CPtr v return gs->machine.cr0; } -static inline unsigned int vm_guest_state_get_cr3(guest_state_t *gs, seL4_CPtr vcpu) +static inline seL4_Word vm_guest_state_get_cr3(guest_state_t *gs, seL4_CPtr vcpu) { int err; - unsigned int value; + seL4_Word value; if (IS_MACHINE_STATE_UNKNOWN(gs->machine.cr3)) { err = vm_vmcs_read(vcpu, VMX_GUEST_CR3, &value); assert(!err); @@ -191,10 +236,10 @@ static inline unsigned int vm_guest_state_get_cr3(guest_state_t *gs, seL4_CPtr v return gs->machine.cr3; } -static inline unsigned int vm_guest_state_get_cr4(guest_state_t *gs, seL4_CPtr vcpu) +static inline seL4_Word vm_guest_state_get_cr4(guest_state_t *gs, seL4_CPtr vcpu) { int err; - unsigned int value; + seL4_Word value; if (IS_MACHINE_STATE_UNKNOWN(gs->machine.cr4)) { err = vm_vmcs_read(vcpu, VMX_GUEST_CR4, &value); assert(!err); @@ -203,10 +248,10 @@ static inline unsigned int vm_guest_state_get_cr4(guest_state_t *gs, seL4_CPtr v return gs->machine.cr4; } -static inline unsigned int vm_guest_state_get_rflags(guest_state_t *gs, seL4_CPtr vcpu) +static inline seL4_Word vm_guest_state_get_rflags(guest_state_t *gs, seL4_CPtr vcpu) { int err; - unsigned int value; + seL4_Word value; if (IS_MACHINE_STATE_UNKNOWN(gs->machine.rflags)) { err = vm_vmcs_read(vcpu, VMX_GUEST_RFLAGS, &value); assert(!err); @@ -215,10 +260,10 @@ static inline unsigned int vm_guest_state_get_rflags(guest_state_t *gs, seL4_CPt return gs->machine.rflags; } -static inline unsigned int vm_guest_state_get_interruptibility(guest_state_t *gs, seL4_CPtr vcpu) +static inline seL4_Word vm_guest_state_get_interruptibility(guest_state_t *gs, seL4_CPtr vcpu) { int err; - unsigned int value; + seL4_Word value; if (IS_MACHINE_STATE_UNKNOWN(gs->machine.guest_interruptibility)) { err = vm_vmcs_read(vcpu, VMX_GUEST_INTERRUPTABILITY, &value); assert(!err); @@ -227,20 +272,20 @@ static inline unsigned int vm_guest_state_get_interruptibility(guest_state_t *gs return gs->machine.guest_interruptibility; } -static inline unsigned int vm_guest_state_get_control_entry(guest_state_t *gs) +static inline seL4_Word vm_guest_state_get_control_entry(guest_state_t *gs) { return gs->machine.control_entry; } -static inline unsigned int vm_guest_state_get_control_ppc(guest_state_t *gs) +static inline seL4_Word vm_guest_state_get_control_ppc(guest_state_t *gs) { return gs->machine.control_ppc; } -static inline unsigned int vm_guest_state_get_idt_base(guest_state_t *gs, seL4_CPtr vcpu) +static inline seL4_Word vm_guest_state_get_idt_base(guest_state_t *gs, seL4_CPtr vcpu) { int err; - unsigned int value; + seL4_Word value; if (IS_MACHINE_STATE_UNKNOWN(gs->machine.idt_base)) { err = vm_vmcs_read(vcpu, VMX_GUEST_IDTR_BASE, &value); assert(!err); @@ -249,10 +294,10 @@ static inline unsigned int vm_guest_state_get_idt_base(guest_state_t *gs, seL4_C return gs->machine.idt_base; } -static inline unsigned int vm_guest_state_get_idt_limit(guest_state_t *gs, seL4_CPtr vcpu) +static inline seL4_Word vm_guest_state_get_idt_limit(guest_state_t *gs, seL4_CPtr vcpu) { int err; - unsigned int value; + seL4_Word value; if (IS_MACHINE_STATE_UNKNOWN(gs->machine.idt_limit)) { err = vm_vmcs_read(vcpu, VMX_GUEST_IDTR_LIMIT, &value); assert(!err); @@ -261,10 +306,10 @@ static inline unsigned int vm_guest_state_get_idt_limit(guest_state_t *gs, seL4_ return gs->machine.idt_limit; } -static inline unsigned int vm_guest_state_get_gdt_base(guest_state_t *gs, seL4_CPtr vcpu) +static inline seL4_Word vm_guest_state_get_gdt_base(guest_state_t *gs, seL4_CPtr vcpu) { int err; - unsigned int value; + seL4_Word value; if (IS_MACHINE_STATE_UNKNOWN(gs->machine.gdt_base)) { err = vm_vmcs_read(vcpu, VMX_GUEST_GDTR_BASE, &value); assert(!err); @@ -273,10 +318,10 @@ static inline unsigned int vm_guest_state_get_gdt_base(guest_state_t *gs, seL4_C return gs->machine.gdt_base; } -static inline unsigned int vm_guest_state_get_gdt_limit(guest_state_t *gs, seL4_CPtr vcpu) +static inline seL4_Word vm_guest_state_get_gdt_limit(guest_state_t *gs, seL4_CPtr vcpu) { int err; - unsigned int value; + seL4_Word value; if (IS_MACHINE_STATE_UNKNOWN(gs->machine.gdt_limit)) { err = vm_vmcs_read(vcpu, VMX_GUEST_GDTR_LIMIT, &value); assert(!err); @@ -285,10 +330,10 @@ static inline unsigned int vm_guest_state_get_gdt_limit(guest_state_t *gs, seL4_ return gs->machine.gdt_limit; } -static inline unsigned int vm_guest_state_get_cs_selector(guest_state_t *gs, seL4_CPtr vcpu) +static inline seL4_Word vm_guest_state_get_cs_selector(guest_state_t *gs, seL4_CPtr vcpu) { int err; - unsigned int value; + seL4_Word value; if (IS_MACHINE_STATE_UNKNOWN(gs->machine.cs_selector)) { err = vm_vmcs_read(vcpu, VMX_GUEST_CS_SELECTOR, &value); assert(!err); @@ -297,77 +342,189 @@ static inline unsigned int vm_guest_state_get_cs_selector(guest_state_t *gs, seL return gs->machine.cs_selector; } +#ifdef CONFIG_X86_64_VTX_64BIT_GUESTS +static inline seL4_Word vm_guest_state_get_ss_selector(guest_state_t *gs, seL4_CPtr vcpu) +{ + int err; + seL4_Word value; + if (IS_MACHINE_STATE_UNKNOWN(gs->machine.ss_selector)) { + err = vm_vmcs_read(vcpu, VMX_GUEST_SS_SELECTOR, &value); + assert(!err); + MACHINE_STATE_READ(gs->machine.ss_selector, value); + } + return gs->machine.ss_selector; +} + +static inline seL4_Word vm_guest_state_get_ds_selector(guest_state_t *gs, seL4_CPtr vcpu) +{ + int err; + seL4_Word value; + if (IS_MACHINE_STATE_UNKNOWN(gs->machine.ds_selector)) { + err = vm_vmcs_read(vcpu, VMX_GUEST_DS_SELECTOR, &value); + assert(!err); + MACHINE_STATE_READ(gs->machine.ds_selector, value); + } + return gs->machine.ds_selector; +} + +static inline seL4_Word vm_guest_state_get_es_selector(guest_state_t *gs, seL4_CPtr vcpu) +{ + int err; + seL4_Word value; + if (IS_MACHINE_STATE_UNKNOWN(gs->machine.es_selector)) { + err = vm_vmcs_read(vcpu, VMX_GUEST_ES_SELECTOR, &value); + assert(!err); + MACHINE_STATE_READ(gs->machine.es_selector, value); + } + return gs->machine.es_selector; +} + +static inline seL4_Word vm_guest_state_get_fs_selector(guest_state_t *gs, seL4_CPtr vcpu) +{ + int err; + seL4_Word value; + if (IS_MACHINE_STATE_UNKNOWN(gs->machine.fs_selector)) { + err = vm_vmcs_read(vcpu, VMX_GUEST_FS_SELECTOR, &value); + assert(!err); + MACHINE_STATE_READ(gs->machine.fs_selector, value); + } + return gs->machine.fs_selector; +} + +static inline seL4_Word vm_guest_state_get_gs_selector(guest_state_t *gs, seL4_CPtr vcpu) +{ + int err; + seL4_Word value; + if (IS_MACHINE_STATE_UNKNOWN(gs->machine.gs_selector)) { + err = vm_vmcs_read(vcpu, VMX_GUEST_GS_SELECTOR, &value); + assert(!err); + MACHINE_STATE_READ(gs->machine.gs_selector, value); + } + return gs->machine.gs_selector; +} + +static inline seL4_Word vm_guest_state_get_esp(guest_state_t *gs, seL4_CPtr vcpu) +{ + int err; + seL4_Word value; + if (IS_MACHINE_STATE_UNKNOWN(gs->machine.esp)) { + err = vm_vmcs_read(vcpu, VMX_GUEST_RSP, &value); + assert(!err); + MACHINE_STATE_READ(gs->machine.esp, value); + } + return gs->machine.esp; +} +#endif /* CONFIG_X86_64_VTX_64BIT_GUESTS */ + /* set */ -static inline void vm_guest_state_set_eip(guest_state_t *gs, unsigned int val) +static inline void vm_guest_state_set_eip(guest_state_t *gs, seL4_Word val) { gs->machine.eip = val; } -static inline void vm_guest_state_set_cr0(guest_state_t *gs, unsigned int val) +static inline void vm_guest_state_set_cr0(guest_state_t *gs, seL4_Word val) { MACHINE_STATE_DIRTY(gs->machine.cr0); gs->machine.cr0 = val; } -static inline void vm_guest_state_set_cr3(guest_state_t *gs, unsigned int val) +static inline void vm_guest_state_set_cr3(guest_state_t *gs, seL4_Word val) { MACHINE_STATE_DIRTY(gs->machine.cr3); gs->machine.cr3 = val; } -static inline void vm_guest_state_set_cr4(guest_state_t *gs, unsigned int val) +static inline void vm_guest_state_set_cr4(guest_state_t *gs, seL4_Word val) { MACHINE_STATE_DIRTY(gs->machine.cr4); gs->machine.cr4 = val; } -static inline void vm_guest_state_set_rflags(guest_state_t *gs, unsigned int val) +static inline void vm_guest_state_set_rflags(guest_state_t *gs, seL4_Word val) { MACHINE_STATE_DIRTY(gs->machine.rflags); gs->machine.rflags = val; } -static inline void vm_guest_state_set_control_entry(guest_state_t *gs, unsigned int val) +static inline void vm_guest_state_set_control_entry(guest_state_t *gs, seL4_Word val) { gs->machine.control_entry = val; } -static inline void vm_guest_state_set_control_ppc(guest_state_t *gs, unsigned int val) +static inline void vm_guest_state_set_control_ppc(guest_state_t *gs, seL4_Word val) { gs->machine.control_ppc = val; } -static inline void vm_guest_state_set_idt_base(guest_state_t *gs, unsigned int val) +static inline void vm_guest_state_set_idt_base(guest_state_t *gs, seL4_Word val) { MACHINE_STATE_DIRTY(gs->machine.idt_base); gs->machine.idt_base = val; } -static inline void vm_guest_state_set_idt_limit(guest_state_t *gs, unsigned int val) +static inline void vm_guest_state_set_idt_limit(guest_state_t *gs, seL4_Word val) { MACHINE_STATE_DIRTY(gs->machine.idt_limit); gs->machine.idt_limit = val; } -static inline void vm_guest_state_set_gdt_base(guest_state_t *gs, unsigned int val) +static inline void vm_guest_state_set_gdt_base(guest_state_t *gs, seL4_Word val) { MACHINE_STATE_DIRTY(gs->machine.gdt_base); gs->machine.gdt_base = val; } -static inline void vm_guest_state_set_gdt_limit(guest_state_t *gs, unsigned int val) +static inline void vm_guest_state_set_gdt_limit(guest_state_t *gs, seL4_Word val) { MACHINE_STATE_DIRTY(gs->machine.gdt_limit); gs->machine.gdt_limit = val; } -static inline void vm_guest_state_set_cs_selector(guest_state_t *gs, unsigned int val) +static inline void vm_guest_state_set_cs_selector(guest_state_t *gs, seL4_Word val) { MACHINE_STATE_DIRTY(gs->machine.cs_selector); gs->machine.cs_selector = val; } -static inline void vm_guest_state_set_entry_exception_error_code(guest_state_t *gs, unsigned int val) +#ifdef CONFIG_X86_64_VTX_64BIT_GUESTS +static inline void vm_guest_state_set_ss_selector(guest_state_t *gs, seL4_Word val) +{ + MACHINE_STATE_DIRTY(gs->machine.ss_selector); + gs->machine.ss_selector = val; +} + +static inline void vm_guest_state_set_ds_selector(guest_state_t *gs, seL4_Word val) +{ + MACHINE_STATE_DIRTY(gs->machine.ds_selector); + gs->machine.ds_selector = val; +} + +static inline void vm_guest_state_set_es_selector(guest_state_t *gs, seL4_Word val) +{ + MACHINE_STATE_DIRTY(gs->machine.es_selector); + gs->machine.es_selector = val; +} + +static inline void vm_guest_state_set_fs_selector(guest_state_t *gs, seL4_Word val) +{ + MACHINE_STATE_DIRTY(gs->machine.fs_selector); + gs->machine.fs_selector = val; +} + +static inline void vm_guest_state_set_gs_selector(guest_state_t *gs, seL4_Word val) +{ + MACHINE_STATE_DIRTY(gs->machine.gs_selector); + gs->machine.gs_selector = val; +} + +static inline void vm_guest_state_set_esp(guest_state_t *gs, seL4_Word val) +{ + MACHINE_STATE_DIRTY(gs->machine.esp); + gs->machine.esp = val; +} +#endif /* CONFIG_X86_64_VTX_64BIT_GUESTS */ + +static inline void vm_guest_state_set_entry_exception_error_code(guest_state_t *gs, seL4_Word val) { MACHINE_STATE_DIRTY(gs->machine.entry_exception_error_code); gs->machine.entry_exception_error_code = val; @@ -455,6 +612,62 @@ static inline void vm_guest_state_sync_cs_selector(guest_state_t *gs, seL4_CPtr } } +#ifdef CONFIG_X86_64_VTX_64BIT_GUESTS +static inline void vm_guest_state_sync_ss_selector(guest_state_t *gs, seL4_CPtr vcpu) +{ + if (IS_MACHINE_STATE_MODIFIED(gs->machine.ss_selector)) { + int err = vm_vmcs_write(vcpu, VMX_GUEST_SS_SELECTOR, gs->machine.ss_selector); + assert(!err); + MACHINE_STATE_SYNC(gs->machine.ss_selector); + } +} + +static inline void vm_guest_state_sync_ds_selector(guest_state_t *gs, seL4_CPtr vcpu) +{ + if (IS_MACHINE_STATE_MODIFIED(gs->machine.ds_selector)) { + int err = vm_vmcs_write(vcpu, VMX_GUEST_DS_SELECTOR, gs->machine.ds_selector); + assert(!err); + MACHINE_STATE_SYNC(gs->machine.ds_selector); + } +} + +static inline void vm_guest_state_sync_es_selector(guest_state_t *gs, seL4_CPtr vcpu) +{ + if (IS_MACHINE_STATE_MODIFIED(gs->machine.es_selector)) { + int err = vm_vmcs_write(vcpu, VMX_GUEST_ES_SELECTOR, gs->machine.es_selector); + assert(!err); + MACHINE_STATE_SYNC(gs->machine.es_selector); + } +} + +static inline void vm_guest_state_sync_fs_selector(guest_state_t *gs, seL4_CPtr vcpu) +{ + if (IS_MACHINE_STATE_MODIFIED(gs->machine.fs_selector)) { + int err = vm_vmcs_write(vcpu, VMX_GUEST_FS_SELECTOR, gs->machine.fs_selector); + assert(!err); + MACHINE_STATE_SYNC(gs->machine.fs_selector); + } +} + +static inline void vm_guest_state_sync_gs_selector(guest_state_t *gs, seL4_CPtr vcpu) +{ + if (IS_MACHINE_STATE_MODIFIED(gs->machine.gs_selector)) { + int err = vm_vmcs_write(vcpu, VMX_GUEST_GS_SELECTOR, gs->machine.gs_selector); + assert(!err); + MACHINE_STATE_SYNC(gs->machine.gs_selector); + } +} + +static inline void vm_guest_state_sync_esp(guest_state_t *gs, seL4_CPtr vcpu) +{ + if (IS_MACHINE_STATE_MODIFIED(gs->machine.esp)) { + int err = vm_vmcs_write(vcpu, VMX_GUEST_RSP, gs->machine.esp); + assert(!err); + MACHINE_STATE_SYNC(gs->machine.esp); + } +} +#endif /* CONFIG_X86_64_VTX_64BIT_GUESTS */ + static inline void vm_guest_state_sync_entry_exception_error_code(guest_state_t *gs, seL4_CPtr vcpu) { if (IS_MACHINE_STATE_MODIFIED(gs->machine.entry_exception_error_code)) { @@ -475,6 +688,14 @@ static inline void vm_sync_guest_vmcs_state(vm_vcpu_t *vcpu) vm_guest_state_sync_gdt_base(vcpu->vcpu_arch.guest_state, vcpu->vcpu.cptr); vm_guest_state_sync_gdt_limit(vcpu->vcpu_arch.guest_state, vcpu->vcpu.cptr); vm_guest_state_sync_cs_selector(vcpu->vcpu_arch.guest_state, vcpu->vcpu.cptr); +#ifdef CONFIG_X86_64_VTX_64BIT_GUESTS + vm_guest_state_sync_ss_selector(vcpu->vcpu_arch.guest_state, vcpu->vcpu.cptr); + vm_guest_state_sync_ds_selector(vcpu->vcpu_arch.guest_state, vcpu->vcpu.cptr); + vm_guest_state_sync_es_selector(vcpu->vcpu_arch.guest_state, vcpu->vcpu.cptr); + vm_guest_state_sync_fs_selector(vcpu->vcpu_arch.guest_state, vcpu->vcpu.cptr); + vm_guest_state_sync_gs_selector(vcpu->vcpu_arch.guest_state, vcpu->vcpu.cptr); + vm_guest_state_sync_esp(vcpu->vcpu_arch.guest_state, vcpu->vcpu.cptr); +#endif /* CONFIG_X86_64_VTX_64BIT_GUESTS */ vm_guest_state_sync_entry_exception_error_code(vcpu->vcpu_arch.guest_state, vcpu->vcpu.cptr); } @@ -486,25 +707,25 @@ static inline void vm_sync_guest_vmcs_state(vm_vcpu_t *vcpu) int vm_sync_guest_context(vm_vcpu_t *vcpu); /* Exit */ -static inline unsigned int vm_guest_exit_get_reason(guest_state_t *gs) +static inline seL4_Word vm_guest_exit_get_reason(guest_state_t *gs) { assert(gs->exit.in_exit); return gs->exit.reason; } -static inline unsigned int vm_guest_exit_get_physical(guest_state_t *gs) +static inline seL4_Word vm_guest_exit_get_physical(guest_state_t *gs) { assert(gs->exit.in_exit); return gs->exit.guest_physical; } -static inline unsigned int vm_guest_exit_get_int_len(guest_state_t *gs) +static inline seL4_Word vm_guest_exit_get_int_len(guest_state_t *gs) { assert(gs->exit.in_exit); return gs->exit.instruction_length; } -static inline unsigned int vm_guest_exit_get_qualification(guest_state_t *gs) +static inline seL4_Word vm_guest_exit_get_qualification(guest_state_t *gs) { assert(gs->exit.in_exit); return gs->exit.qualification; diff --git a/libsel4vm/src/arch/x86/guest_vcpu_fault_arch.c b/libsel4vm/src/arch/x86/guest_vcpu_fault_arch.c index 7c0915dbe..dd65f9267 100644 --- a/libsel4vm/src/arch/x86/guest_vcpu_fault_arch.c +++ b/libsel4vm/src/arch/x86/guest_vcpu_fault_arch.c @@ -24,11 +24,11 @@ seL4_Word get_vcpu_fault_ip(vm_vcpu_t *vcpu) seL4_Word get_vcpu_fault_data(vm_vcpu_t *vcpu) { int reg; - uint32_t imm; + seL4_Word imm; int size; vm_decode_ept_violation(vcpu, ®, &imm, &size); int vcpu_reg = vm_decoder_reg_mapw[reg]; - unsigned int data; + seL4_Word data; vm_get_thread_context_reg(vcpu, vcpu_reg, &data); return data; } @@ -36,7 +36,7 @@ seL4_Word get_vcpu_fault_data(vm_vcpu_t *vcpu) size_t get_vcpu_fault_size(vm_vcpu_t *vcpu) { int reg; - uint32_t imm; + seL4_Word imm; int size; vm_decode_ept_violation(vcpu, ®, &imm, &size); return size; @@ -74,7 +74,7 @@ bool is_vcpu_read_fault(vm_vcpu_t *vcpu) int set_vcpu_fault_data(vm_vcpu_t *vcpu, seL4_Word data) { int reg; - uint32_t imm; + seL4_Word imm; int size; vm_decode_ept_violation(vcpu, ®, &imm, &size); int vcpu_reg = vm_decoder_reg_mapw[reg]; diff --git a/libsel4vm/src/arch/x86/guest_x86_context.c b/libsel4vm/src/arch/x86/guest_x86_context.c index ee75eb193..02732f663 100644 --- a/libsel4vm/src/arch/x86/guest_x86_context.c +++ b/libsel4vm/src/arch/x86/guest_x86_context.c @@ -4,6 +4,13 @@ * SPDX-License-Identifier: BSD-2-Clause */ +/* Under release mode, this file will get built using -O3. Howerver on gcc 8.4, + * the -O3 optimisation level is too aggressive and causes issues for the guest + * VM. Hence, instead of using -O3 we use -O2 to avoid the issue. */ +#ifdef NDEBUG +#pragma GCC optimize ("O2") +#endif + #include #include @@ -19,7 +26,7 @@ int vm_set_thread_context(vm_vcpu_t *vcpu, seL4_VCPUContext context) return 0; } -int vm_set_thread_context_reg(vm_vcpu_t *vcpu, vcpu_context_reg_t reg, uint32_t value) +int vm_set_thread_context_reg(vm_vcpu_t *vcpu, vcpu_context_reg_t reg, seL4_Word value) { MACHINE_STATE_DIRTY(vcpu->vcpu_arch.guest_state->machine.context); (&vcpu->vcpu_arch.guest_state->machine.context.eax)[reg] = value; @@ -36,7 +43,7 @@ int vm_get_thread_context(vm_vcpu_t *vcpu, seL4_VCPUContext *context) return 0; } -int vm_get_thread_context_reg(vm_vcpu_t *vcpu, unsigned int reg, uint32_t *value) +int vm_get_thread_context_reg(vm_vcpu_t *vcpu, unsigned int reg, seL4_Word *value) { if (IS_MACHINE_STATE_UNKNOWN(vcpu->vcpu_arch.guest_state->machine.context)) { ZF_LOGE("Failed to get thread context register: Context is unsynchronised. The VCPU hasn't exited?"); @@ -46,7 +53,7 @@ int vm_get_thread_context_reg(vm_vcpu_t *vcpu, unsigned int reg, uint32_t *value return 0; } -int vm_set_vmcs_field(vm_vcpu_t *vcpu, seL4_Word field, uint32_t value) +int vm_set_vmcs_field(vm_vcpu_t *vcpu, seL4_Word field, seL4_Word value) { int err = 0; switch (field) { @@ -93,10 +100,10 @@ int vm_set_vmcs_field(vm_vcpu_t *vcpu, seL4_Word field, uint32_t value) return err; } -int vm_get_vmcs_field(vm_vcpu_t *vcpu, seL4_Word field, uint32_t *value) +int vm_get_vmcs_field(vm_vcpu_t *vcpu, seL4_Word field, seL4_Word *value) { int err = 0; - uint32_t val; + seL4_Word val; switch (field) { case VMX_GUEST_CR0: val = vm_guest_state_get_cr0(vcpu->vcpu_arch.guest_state, vcpu->vcpu.cptr); @@ -142,7 +149,7 @@ int vm_get_vmcs_field(vm_vcpu_t *vcpu, seL4_Word field, uint32_t *value) return err; } -int vm_vmcs_read(seL4_CPtr vcpu, seL4_Word field, unsigned int *value) +int vm_vmcs_read(seL4_CPtr vcpu, seL4_Word field, seL4_Word *value) { seL4_X86_VCPU_ReadVMCS_t UNUSED result; diff --git a/libsel4vm/src/arch/x86/interrupt.c b/libsel4vm/src/arch/x86/interrupt.c index 4bab8f8a1..f08b72d6e 100644 --- a/libsel4vm/src/arch/x86/interrupt.c +++ b/libsel4vm/src/arch/x86/interrupt.c @@ -47,7 +47,8 @@ void vm_inject_exception(vm_vcpu_t *vcpu, int exception, int has_error, uint32_t ZF_LOGF("Cannot inject exception"); } if (has_error) { - vm_guest_state_set_entry_exception_error_code(vcpu->vcpu_arch.guest_state, error_code); + seL4_Word code_to_set = error_code; + vm_guest_state_set_entry_exception_error_code(vcpu->vcpu_arch.guest_state, code_to_set); } vm_guest_state_set_control_entry(vcpu->vcpu_arch.guest_state, BIT(31) | exception | 3 << 8 | (has_error ? BIT(11) : 0)); } @@ -137,7 +138,18 @@ void vm_start_ap_vcpu(vm_vcpu_t *vcpu, unsigned int sipi_vector) TRAMPOLINE_LENGTH, instr); eip = vm_emulate_realmode(vcpu, instr, &segment, eip, - TRAMPOLINE_LENGTH, gs); + TRAMPOLINE_LENGTH, gs, 0); + +#ifdef CONFIG_X86_64_VTX_64BIT_GUESTS + /* 64-bit guests go from realmode to 32-bit emulation mode to longmode */ + memset(instr, 0, TRAMPOLINE_LENGTH); + + vm_fetch_instruction(vcpu, eip, vm_guest_state_get_cr3(gs, vcpu->vcpu.cptr), + TRAMPOLINE_LENGTH, instr); + + eip = vm_emulate_realmode(vcpu, instr, &segment, eip, + TRAMPOLINE_LENGTH, gs, 1); +#endif /* CONFIG_X86_64_VTX_64BIT_GUESTS */ vm_guest_state_set_eip(vcpu->vcpu_arch.guest_state, eip); diff --git a/libsel4vm/src/arch/x86/ioports.c b/libsel4vm/src/arch/x86/ioports.c index aa7cc6290..bd79d9101 100644 --- a/libsel4vm/src/arch/x86/ioports.c +++ b/libsel4vm/src/arch/x86/ioports.c @@ -51,7 +51,7 @@ static vm_ioport_entry_t *search_port(vm_io_port_list_t *ioports, unsigned int p static void set_io_in_unhandled(vm_vcpu_t *vcpu, unsigned int size) { - uint32_t eax; + seL4_Word eax; if (size < 4) { vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_EAX, &eax); eax |= MASK(size * 8); @@ -63,7 +63,7 @@ static void set_io_in_unhandled(vm_vcpu_t *vcpu, unsigned int size) static void set_io_in_value(vm_vcpu_t *vcpu, unsigned int value, unsigned int size) { - uint32_t eax; + seL4_Word eax; if (size < 4) { vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_EAX, &eax); eax &= ~MASK(size * 8); @@ -124,12 +124,13 @@ int vm_enable_passthrough_ioport(vm_vcpu_t *vcpu, uint16_t port_start, uint16_t int vm_io_instruction_handler(vm_vcpu_t *vcpu) { - unsigned int exit_qualification = vm_guest_exit_get_qualification(vcpu->vcpu_arch.guest_state); + seL4_Word exit_qualification = vm_guest_exit_get_qualification(vcpu->vcpu_arch.guest_state); unsigned int string, rep; int ret; unsigned int port_no; unsigned int size; - unsigned int value; + unsigned int port_value; + seL4_Word eax_value; int is_in; ioport_fault_result_t res; @@ -146,25 +147,26 @@ int vm_io_instruction_handler(vm_vcpu_t *vcpu) } if (!is_in) { - ret = vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_EAX, &value); + ret = vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_EAX, &eax_value); if (ret) { return VM_EXIT_HANDLE_ERROR; } if (size < 4) { - value &= MASK(size * 8); + eax_value &= MASK(size * 8); } + port_value = eax_value; } /* Search internal ioport list */ vm_ioport_entry_t *port = search_port(&vcpu->vm->arch.ioport_list, port_no); if (port) { if (is_in) { - res = port->interface.port_in(vcpu, port->interface.cookie, port_no, size, &value); + res = port->interface.port_in(vcpu, port->interface.cookie, port_no, size, &port_value); } else { - res = port->interface.port_out(vcpu, port->interface.cookie, port_no, size, value); + res = port->interface.port_out(vcpu, port->interface.cookie, port_no, size, port_value); } } else if (vcpu->vm->arch.unhandled_ioport_callback) { - res = vcpu->vm->arch.unhandled_ioport_callback(vcpu, port_no, is_in, &value, size, + res = vcpu->vm->arch.unhandled_ioport_callback(vcpu, port_no, is_in, &port_value, size, vcpu->vm->arch.unhandled_ioport_callback_cookie); } else { /* No means of handling ioport instruction */ @@ -182,7 +184,7 @@ int vm_io_instruction_handler(vm_vcpu_t *vcpu) if (res == IO_FAULT_UNHANDLED) { set_io_in_unhandled(vcpu, size); } else { - set_io_in_value(vcpu, value, size); + set_io_in_value(vcpu, port_value, size); } } diff --git a/libsel4vm/src/arch/x86/processor/cpuid.c b/libsel4vm/src/arch/x86/processor/cpuid.c index 7b342b876..4d56877dd 100644 --- a/libsel4vm/src/arch/x86/processor/cpuid.c +++ b/libsel4vm/src/arch/x86/processor/cpuid.c @@ -49,8 +49,13 @@ static int vm_cpuid_virt(unsigned int function, unsigned int index, struct cpuid /* cpuid 1.edx */ const unsigned int kvm_supported_word0_x86_features = +#ifdef CONFIG_X86_64_VTX_64BIT_GUESTS + F(FPU) | 0 /*F(VME)*/ | 0 /*F(DE)*/ | F(PSE) | + F(TSC) | F(MSR) | F(PAE) | 0/*F(MCE)*/ | +#else F(FPU) | 0 /*F(VME)*/ | 0 /*F(DE)*/ | 0/*F(PSE)*/ | F(TSC) | 0/*F(MSR)*/ | 0 /*F(PAE)*/ | 0/*F(MCE)*/ | +#endif /* CONFIG_X86_64_VTX_64BIT_GUESTS */ 0 /*F(CX8)*/ | F(APIC) | 0 /* Reserved */ | F(SEP) | /*F(MTRR)*/ 0 | F(PGE) | 0/*F(MCA)*/ | F(CMOV) | 0 /*F(PAT)*/ | 0 /* F(PSE36)*/ | 0 /* PSN */ | 0/*F(CLFLSH)*/ | @@ -71,10 +76,19 @@ static int vm_cpuid_virt(unsigned int function, unsigned int index, struct cpuid /* cpuid 0x80000001.edx */ const unsigned int kvm_supported_word1_x86_features = - 0 /*F(NX)*/ | 0/*F(RDTSCP)*/; /*not support x86 64*/ +#ifdef CONFIG_X86_64_VTX_64BIT_GUESTS + 0 /*F(NX)*/ | F(LM) | F(GBPAGES) | F(SYSCALL) | 0/*F(RDTSCP)*/; /*support x86 64*/ +#else + 0 /*F(NX)*/ | 0/*F(RDTSCP)*/; /*do not support x86 64*/ +#endif /* CONFIG_X86_64_VTX_64BIT_GUESTS */ /* cpuid 0x80000001.ecx */ - const unsigned int kvm_supported_word6_x86_features = 0; + const unsigned int kvm_supported_word6_x86_features = +#ifdef CONFIG_X86_64_VTX_64BIT_GUESTS + F(LAHF_LM); +#else + 0; +#endif /* CONFIG_X86_64_VTX_64BIT_GUESTS */ #if 0 /* cpuid 0xC0000001.edx */ @@ -380,7 +394,7 @@ int vm_cpuid_handler(vm_vcpu_t *vcpu) struct cpuid_val val; /* Read parameter information. */ - unsigned int function, index; + seL4_Word function, index; if (vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_EAX, &function) || vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_ECX, &index)) { return VM_EXIT_HANDLE_ERROR; diff --git a/libsel4vm/src/arch/x86/processor/cr.c b/libsel4vm/src/arch/x86/processor/cr.c index 5b5d2e5e6..aa786959a 100644 --- a/libsel4vm/src/arch/x86/processor/cr.c +++ b/libsel4vm/src/arch/x86/processor/cr.c @@ -21,7 +21,7 @@ #include "vmcs.h" #include "processor/platfeature.h" -static inline unsigned int apply_cr_bits(unsigned int cr, unsigned int mask, unsigned int host_bits) +static inline seL4_Word apply_cr_bits(seL4_Word cr, seL4_Word mask, seL4_Word host_bits) { /* force any bit in the mask to be the value from the shadow (both enabled and disabled) */ cr |= (mask & host_bits); @@ -29,7 +29,7 @@ static inline unsigned int apply_cr_bits(unsigned int cr, unsigned int mask, uns return cr; } -static int vm_cr_set_cr0(vm_vcpu_t *vcpu, unsigned int value) +static int vm_cr_set_cr0(vm_vcpu_t *vcpu, seL4_Word value) { int err; if (value & CR0_RESERVED_BITS) { @@ -40,8 +40,8 @@ static int vm_cr_set_cr0(vm_vcpu_t *vcpu, unsigned int value) if ((value & X86_CR0_PG) && !(vcpu->vcpu_arch.guest_state->virt.cr.cr0_shadow & X86_CR0_PG)) { /* guest is taking over paging. So we can no longer care about some of our CR4 values, and * we don't need cr3 load/store exiting anymore */ - unsigned int new_mask = vcpu->vcpu_arch.guest_state->virt.cr.cr4_mask & ~(X86_CR4_PSE | X86_CR4_PAE); - unsigned int cr4_value = vm_guest_state_get_cr4(vcpu->vcpu_arch.guest_state, vcpu->vcpu.cptr); + seL4_Word new_mask = vcpu->vcpu_arch.guest_state->virt.cr.cr4_mask & ~(X86_CR4_PSE | X86_CR4_PAE); + seL4_Word cr4_value = vm_guest_state_get_cr4(vcpu->vcpu_arch.guest_state, vcpu->vcpu.cptr); /* for any bits that have changed in the mask, grab them from the shadow */ cr4_value = apply_cr_bits(cr4_value, new_mask ^ vcpu->vcpu_arch.guest_state->virt.cr.cr4_mask, vcpu->vcpu_arch.guest_state->virt.cr.cr4_shadow); @@ -53,7 +53,7 @@ static int vm_cr_set_cr0(vm_vcpu_t *vcpu, unsigned int value) } vm_guest_state_set_cr4(vcpu->vcpu_arch.guest_state, cr4_value); /* now turn of cr3 load/store exiting */ - unsigned int ppc = vm_guest_state_get_control_ppc(vcpu->vcpu_arch.guest_state); + seL4_Word ppc = vm_guest_state_get_control_ppc(vcpu->vcpu_arch.guest_state); ppc &= ~(VMX_CONTROL_PPC_CR3_LOAD_EXITING | VMX_CONTROL_PPC_CR3_STORE_EXITING); vm_guest_state_set_control_ppc(vcpu->vcpu_arch.guest_state, ppc); /* load the cached cr3 value */ @@ -82,7 +82,7 @@ static int vm_cr_set_cr0(vm_vcpu_t *vcpu, unsigned int value) return 0; } -static int vm_cr_set_cr3(vm_vcpu_t *vcpu, unsigned int value) +static int vm_cr_set_cr3(vm_vcpu_t *vcpu, seL4_Word value) { /* if the guest hasn't turned on paging then just cache this */ vcpu->vcpu_arch.guest_state->virt.cr.cr3_guest = value; @@ -92,7 +92,7 @@ static int vm_cr_set_cr3(vm_vcpu_t *vcpu, unsigned int value) return 0; } -static int vm_cr_get_cr3(vm_vcpu_t *vcpu, unsigned int *value) +static int vm_cr_get_cr3(vm_vcpu_t *vcpu, seL4_Word *value) { if (vcpu->vcpu_arch.guest_state->virt.cr.cr0_shadow & X86_CR0_PG) { *value = vm_guest_state_get_cr3(vcpu->vcpu_arch.guest_state, vcpu->vcpu.cptr); @@ -103,7 +103,7 @@ static int vm_cr_get_cr3(vm_vcpu_t *vcpu, unsigned int *value) } -static int vm_cr_set_cr4(vm_vcpu_t *vcpu, unsigned int value) +static int vm_cr_set_cr4(vm_vcpu_t *vcpu, seL4_Word value) { if (value & CR4_RESERVED_BITS) { @@ -132,7 +132,7 @@ static int vm_cr_clts(vm_vcpu_t *vcpu) return -1; } -static int vm_cr_lmsw(vm_vcpu_t *vcpu, unsigned int value) +static int vm_cr_lmsw(vm_vcpu_t *vcpu, seL4_Word value) { ZF_LOGI("Ignoring call of lmsw"); @@ -149,13 +149,23 @@ static int crExitRegs[] = { /*VCPU_CONTEXT_ESP*/ -1, VCPU_CONTEXT_EBP, VCPU_CONTEXT_ESI, - VCPU_CONTEXT_EDI + VCPU_CONTEXT_EDI, +#ifdef CONFIG_X86_64_VTX_64BIT_GUESTS + VCPU_CONTEXT_R8, + VCPU_CONTEXT_R9, + VCPU_CONTEXT_R10, + VCPU_CONTEXT_R11, + VCPU_CONTEXT_R12, + VCPU_CONTEXT_R13, + VCPU_CONTEXT_R14, + VCPU_CONTEXT_R15, +#endif /* CONFIG_X86_64_VTX_64BIT_GUESTS */ }; int vm_cr_access_handler(vm_vcpu_t *vcpu) { - unsigned int exit_qualification, val; + seL4_Word exit_qualification, val; int cr, reg, ret = -1; exit_qualification = vm_guest_exit_get_qualification(vcpu->vcpu_arch.guest_state); diff --git a/libsel4vm/src/arch/x86/processor/decode.c b/libsel4vm/src/arch/x86/processor/decode.c index 2304c7817..777206d2c 100644 --- a/libsel4vm/src/arch/x86/processor/decode.c +++ b/libsel4vm/src/arch/x86/processor/decode.c @@ -21,13 +21,16 @@ Author: W.A. #include "processor/platfeature.h" #include "processor/decode.h" +#include "processor/msr.h" #include "guest_state.h" /* TODO are these defined elsewhere? */ #define IA32_PDE_SIZE(pde) (pde & BIT(7)) #define IA32_PDE_PRESENT(pde) (pde & BIT(0)) #define IA32_PTE_ADDR(pte) (pte & 0xFFFFF000) -#define IA32_PSE_ADDR(pde) (pde & 0xFFC00000) +#define IA32_PDPTE_ADDR(pdpte) (pdpte & 0xC0000000) +#define IA32_PDE_ADDR(pde) (pde & 0xFFE00000) +#define IA32_PSE_ADDR(pse) (pse & 0xFFC00000) #define IA32_OPCODE_S(op) (op & BIT(0)) #define IA32_OPCODE_D(op) (op & BIT(1)) @@ -36,6 +39,8 @@ Author: W.A. #define SEG_MULT (0x10) +#define EXTRACT_BITS(num, x, y) ((MASK(x) & ((num) >> (y)))) + enum decode_instr { DECODE_INSTR_MOV, DECODE_INSTR_MOVQ, @@ -47,6 +52,8 @@ enum decode_prefix { CS_SEG_OVERRIDE = 0x2e, SS_SEG_OVERRIDE = 0x36, DS_SEG_OVERRIDE = 0x3e, + REX_PREFIX_START = 0x40, + REX_PREFIX_END = 0x4f, FS_SEG_OVERRIDE = 0x64, GS_SEG_OVERRIDE = 0x65, OP_SIZE_OVERRIDE = 0x66, @@ -57,6 +64,7 @@ struct x86_op { int reg; uint32_t val; size_t len; + size_t reg_mod; }; struct decode_op { @@ -71,14 +79,21 @@ struct decode_table { void (*decode_fn)(struct decode_op *); }; -static void debug_print_instruction(uint8_t *instr, int instr_len); +static void debug_print_instruction(uint8_t *instr, int instr_len) +{ + printf("instruction dump: "); + for (int j = 0; j < instr_len; j++) { + printf("%2x ", instr[j]); + } + printf("\n"); +} static void decode_modrm_reg_op(struct decode_op *decode_op) { /* Mov with register */ uint8_t modrm = decode_op->instr[decode_op->curr_byte]; decode_op->curr_byte++; - decode_op->op.reg = IA32_MODRM_REG(modrm); + decode_op->op.reg = IA32_MODRM_REG(modrm) + decode_op->op.reg_mod; return; } @@ -119,56 +134,116 @@ static const struct decode_table decode_table_2op[] = { }; /* Get a word from a guest physical address */ -inline static uint32_t guest_get_phys_word(vm_t *vm, uintptr_t addr) +inline static seL4_Word guest_get_phys_word(vm_t *vm, uintptr_t addr) { - uint32_t val; + seL4_Word val; - vm_ram_touch(vm, addr, sizeof(uint32_t), + vm_ram_touch(vm, addr, sizeof(seL4_Word), vm_guest_ram_read_callback, &val); return val; } /* Fetch a guest's instruction */ -int vm_fetch_instruction(vm_vcpu_t *vcpu, uint32_t eip, uintptr_t cr3, +int vm_fetch_instruction(vm_vcpu_t *vcpu, uintptr_t eip, uintptr_t cr3, int len, uint8_t *buf) { /* Walk page tables to get physical address of instruction */ uintptr_t instr_phys = 0; + uintptr_t cr4 = vm_guest_state_get_cr4(vcpu->vcpu_arch.guest_state, vcpu->vcpu.cptr); /* ensure that PAE is not enabled */ - if (vm_guest_state_get_cr4(vcpu->vcpu_arch.guest_state, vcpu->vcpu.cptr) & X86_CR4_PAE) { +#ifndef CONFIG_X86_64_VTX_64BIT_GUESTS + if (cr4 & X86_CR4_PAE) { ZF_LOGE("Do not support walking PAE paging structures"); return -1; } +#endif /* not CONFIG_X86_64_VTX_64BIT_GUESTS */ + + int extra_instr = 0; + int read_instr = len; + + if ((eip >> seL4_PageBits) != ((eip + len) >> seL4_PageBits)) { + extra_instr = (eip + len) % BIT(seL4_PageBits); + read_instr -= extra_instr; + } + + if (cr4 & X86_CR4_PAE) { + /* assert that pcid is off */ + assert(!(cr4 & X86_CR4_PCIDE)); + + uint64_t eip_47_39 = EXTRACT_BITS(eip, 9, 39); /* Bits 47:39 of linear address */ + uint64_t eip_38_30 = EXTRACT_BITS(eip, 9, 30); /* Bits 38:30 of linear address */ + uint64_t eip_29_21 = EXTRACT_BITS(eip, 9, 21); /* Bits 29:21 of linear address */ + uint64_t eip_20_0 = EXTRACT_BITS(eip, 21, 0); /* Bits 20:0 of linear address */ + + uint64_t pml4e = guest_get_phys_word(vcpu->vm, cr3 | (eip_47_39 << 3)); + + assert(IA32_PDE_PRESENT(pml4e)); + + uint64_t pdpte = guest_get_phys_word(vcpu->vm, IA32_PTE_ADDR(pml4e) | (eip_38_30 << 3)); + + assert(IA32_PDE_PRESENT(pdpte)); + + /* If this maps a 1GB page, then we can fetch the instruction now. */ + if (IA32_PDE_SIZE(pdpte)) { + instr_phys = IA32_PDPTE_ADDR(pdpte) + EXTRACT_BITS(eip, 29, 0); + goto fetch; + } + + uint64_t pde = guest_get_phys_word(vcpu->vm, IA32_PTE_ADDR(pdpte) | (eip_29_21 << 3)); - // TODO implement page-boundary crossing properly - assert((eip >> 12) == ((eip + len) >> 12)); + assert(IA32_PDE_PRESENT(pde)); - uint32_t pdi = eip >> 22; - uint32_t pti = (eip >> 12) & 0x3FF; + /* If this maps a 2MB page, then we can fetch the instruction now. */ + if (IA32_PDE_SIZE(pde)) { + instr_phys = IA32_PDE_ADDR(pde) + eip_20_0; + goto fetch; + } - uint32_t pde = guest_get_phys_word(vcpu->vm, cr3 + pdi * 4); + uint64_t pte = guest_get_phys_word(vcpu->vm, IA32_PTE_ADDR(pde) | (eip_20_0 << 3)); - assert(IA32_PDE_PRESENT(pde)); /* WTF? */ + /* If this maps a 4KB page, then we can fetch the instruction now. */ + if (IA32_PDE_SIZE(pte)) { + instr_phys = IA32_PTE_ADDR(pte) + EXTRACT_BITS(eip, 11, 0); + goto fetch; + } - if (IA32_PDE_SIZE(pde)) { - /* PSE is used, 4M pages */ - instr_phys = (uintptr_t)IA32_PSE_ADDR(pde) + (eip & 0x3FFFFF); + return -1; } else { - /* 4k pages */ - uint32_t pte = guest_get_phys_word(vcpu->vm, - (uintptr_t)IA32_PTE_ADDR(pde) + pti * 4); + // TODO implement page-boundary crossing properly + assert((eip >> 12) == ((eip + len) >> 12)); - assert(IA32_PDE_PRESENT(pte)); + uint32_t pdi = eip >> 22; + uint32_t pti = (eip >> 12) & 0x3FF; - instr_phys = (uintptr_t)IA32_PTE_ADDR(pte) + (eip & 0xFFF); + uint32_t pde = guest_get_phys_word(vcpu->vm, cr3 + pdi * 4); + + assert(IA32_PDE_PRESENT(pde)); /* WTF? */ + + if (IA32_PDE_SIZE(pde)) { + /* PSE is used, 4M pages */ + instr_phys = (uintptr_t)IA32_PSE_ADDR(pde) + (eip & 0x3FFFFF); + } else { + /* 4k pages */ + uint32_t pte = guest_get_phys_word(vcpu->vm, + (uintptr_t)IA32_PTE_ADDR(pde) + pti * 4); + + assert(IA32_PDE_PRESENT(pte)); + + instr_phys = (uintptr_t)IA32_PTE_ADDR(pte) + (eip & 0xFFF); + } } +fetch: /* Fetch instruction */ - vm_ram_touch(vcpu->vm, instr_phys, len, + vm_ram_touch(vcpu->vm, instr_phys, read_instr, vm_guest_ram_read_callback, buf); + if (extra_instr > 0) { + vm_fetch_instruction(vcpu, eip + read_instr, cr3, extra_instr, buf + read_instr); + } + return 0; } @@ -180,6 +255,9 @@ static int is_prefix(uint8_t byte) case CS_SEG_OVERRIDE: case SS_SEG_OVERRIDE: case DS_SEG_OVERRIDE: +#ifdef CONFIG_X86_64_VTX_64BIT_GUESTS + case REX_PREFIX_START ... REX_PREFIX_END: +#endif /* CONFIG_X86_64_VTX_64BIT_GUESTS */ case FS_SEG_OVERRIDE: case GS_SEG_OVERRIDE: case ADDR_SIZE_OVERRIDE: @@ -190,23 +268,27 @@ static int is_prefix(uint8_t byte) return 0; } -static void debug_print_instruction(uint8_t *instr, int instr_len) +static int is_high_reg_prefix(uint8_t byte) { - printf("instruction dump: "); - for (int j = 0; j < instr_len; j++) { - printf("%2x ", instr[j]); + switch (byte) { + case 0x44: + case 0x4c: + case 0x4d: + return 1; } - printf("\n"); + return 0; } + /* Partial support to decode an instruction for a memory access This is very crude. It can break in many ways. */ -int vm_decode_instruction(uint8_t *instr, int instr_len, int *reg, uint32_t *imm, int *op_len) +int vm_decode_instruction(uint8_t *instr, int instr_len, int *reg, seL4_Word *imm, int *op_len) { struct decode_op dec_op; dec_op.instr = instr; dec_op.instr_len = instr_len; dec_op.op.len = 1; + dec_op.op.reg_mod = 0; /* First loop through and check prefixes */ int i; for (i = 0; i < instr_len; i++) { @@ -215,6 +297,9 @@ int vm_decode_instruction(uint8_t *instr, int instr_len, int *reg, uint32_t *imm /* 16 bit modifier */ dec_op.op.len = 2; } + if (is_high_reg_prefix(instr[i])) { + dec_op.op.reg_mod = 8; + } } else { /* We've hit the opcode */ break; @@ -244,7 +329,7 @@ int vm_decode_instruction(uint8_t *instr, int instr_len, int *reg, uint32_t *imm return 0; } -void vm_decode_ept_violation(vm_vcpu_t *vcpu, int *reg, uint32_t *imm, int *size) +void vm_decode_ept_violation(vm_vcpu_t *vcpu, int *reg, seL4_Word *imm, int *size) { /* Decode instruction */ uint8_t ibuf[15]; @@ -266,9 +351,17 @@ void vm_decode_ept_violation(vm_vcpu_t *vcpu, int *reg, uint32_t *imm, int *size */ /* Interpret just enough virtual 8086 instructions to run trampoline code. - Returns the final jump address */ + Returns the final jump address + + For 64-bit guests, this function first emulates the 8086 instructions, and then + also emulates the 32-bit instructions before returning the final jump address. + NOTE: This function does not emulate the "call verify_cpu" function, since in + order to get this far, a 64-bit guest would have to make it through init + code, thus verifying the cpu. +*/ uintptr_t vm_emulate_realmode(vm_vcpu_t *vcpu, uint8_t *instr_buf, - uint16_t *segment, uintptr_t eip, uint32_t len, guest_state_t *gs) + uint16_t *segment, uintptr_t eip, uint32_t len, guest_state_t *gs, + int m66_set) { /* We only track one segment, and assume that code and data are in the same segment, which is valid for most trampoline and bootloader code */ @@ -278,7 +371,11 @@ uintptr_t vm_emulate_realmode(vm_vcpu_t *vcpu, uint8_t *instr_buf, while (instr - instr_buf < len) { uintptr_t mem = 0; uint32_t lit = 0; - int m66 = 0; + /* Since 64-bit guests emulate two sections, the second section is already in 32-bit mode, + * thus every memory read/write will automatically be 4 bytes. This allows the caller to + * pass in an operating mode + */ + int m66 = m66_set; uint32_t base = 0; uint32_t limit = 0; @@ -329,6 +426,40 @@ uintptr_t vm_emulate_realmode(vm_vcpu_t *vcpu, uint8_t *instr_buf, //ignore instr++; } +#ifdef CONFIG_X86_64_VTX_64BIT_GUESTS + } else if (*instr == 0x22) { + // mov eax crX + instr++; + seL4_Word eax; + vm_get_thread_context_reg(vcpu, USER_CONTEXT_EAX, &eax); + + if (*instr == 0xc0) { + vm_guest_state_set_cr0(gs, eax); + ZF_LOGD("cr0 %lx\n", (long unsigned int)eax); + } + if (*instr == 0xd8) { + vm_guest_state_set_cr3(gs, eax); + ZF_LOGD("cr3 %lx\n", (long unsigned int)eax); + } + if (*instr == 0xe0) { + vm_guest_state_set_cr4(gs, eax); + ZF_LOGD("cr4 %lx\n", (long unsigned int)eax); + } + } else if (*instr == 0x30) { + // wrmsr + instr++; + seL4_Word eax; + seL4_Word ecx; + seL4_Word edx; + + vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_EAX, &eax); + vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_ECX, &ecx); + vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_EDX, &edx); + if (MSR_EFER == ecx) { + vm_set_vmcs_field(vcpu, VMX_GUEST_EFER, (edx << 32) | eax); + ZF_LOGD("wrmsr %lx %lx\n", ecx, (edx << 32) | eax); + } +#endif /* CONFIG_X86_64_VTX_64BIT_GUESTS */ } else { //ignore instr++; @@ -368,15 +499,65 @@ uintptr_t vm_emulate_realmode(vm_vcpu_t *vcpu, uint8_t *instr_buf, case 0xa1: /* mov offset memory to eax */ instr++; +#ifdef CONFIG_ARCH_X86_64 + memcpy(&mem, instr, 4); + instr += 4; +#else memcpy(&mem, instr, 2); instr += 2; mem += *segment * SEG_MULT; +#endif /* CONFIG_X86_64_VTX_64BIT_GUESTS */ ZF_LOGD("mov %p, eax\n", (void *)mem); uint32_t eax; vm_ram_touch(vcpu->vm, mem, 4, vm_guest_ram_read_callback, &eax); vm_set_thread_context_reg(vcpu, VCPU_CONTEXT_EAX, eax); break; +#ifdef CONFIG_X86_64_VTX_64BIT_GUESTS + case 0xb8: + /* mov const to eax */ + instr++; + memcpy(&mem, instr, 4); + instr += 4; + ZF_LOGD("mov %lx, eax\n", mem); + vm_set_thread_context_reg(vcpu, VCPU_CONTEXT_EAX, mem); + break; + case 0xb9: + /* mov const to ecx */ + instr++; + memcpy(&mem, instr, 4); + instr += 4; + ZF_LOGD("mov %lx, ecx\n", mem); + vm_set_thread_context_reg(vcpu, VCPU_CONTEXT_ECX, mem); + break; + case 0x8b: + /* mov offset memory to edx */ + instr++; + if (*instr == 0x15) { + instr++; + memcpy(&mem, instr, 4); + instr += 4; + uint32_t edx; + vm_ram_touch(vcpu->vm, mem, + 4, vm_guest_ram_read_callback, &edx); + ZF_LOGD("mov %x, edx\n", edx); + vm_set_thread_context_reg(vcpu, VCPU_CONTEXT_EDX, mem); + } + break; + case 0x81: + instr++; + if (*instr = 0xc4) { + /* add lit to rsp */ + instr++; + memcpy(&mem, instr, 4); + instr += 4; + seL4_Word esp = vm_guest_state_get_esp(gs, mem); + esp += mem; + vm_guest_state_set_esp(gs, esp); + ZF_LOGD("add %lx, rsp\n", mem); + } + break; +#endif /* CONFIG_X86_64_VTX_64BIT_GUESTS */ case 0xc7: instr++; if (*instr == 0x06) { // modrm @@ -400,15 +581,85 @@ uintptr_t vm_emulate_realmode(vm_vcpu_t *vcpu, uint8_t *instr_buf, } break; case 0xba: +#ifdef CONFIG_X86_64_VTX_64BIT_GUESTS + /* mov const to edx */ + instr++; + memcpy(&mem, instr, 4); + instr += 4; + ZF_LOGD("mov %lx, edx\n", mem); + vm_set_thread_context_reg(vcpu, VCPU_CONTEXT_EDX, mem); +#else //?????mov literal to dx /* ignore */ instr += 2; +#endif /* CONFIG_X86_64_VTX_64BIT_GUESTS */ + break; + case 0xbc: +#ifdef CONFIG_X86_64_VTX_64BIT_GUESTS + // mov lit esp + instr++; + memcpy(&mem, instr, 4); + instr += 4; + ZF_LOGD(4, "mov %lx, esp\n", mem); + vm_guest_state_set_esp(gs, mem); +#endif /* CONFIG_X86_64_VTX_64BIT_GUESTS */ break; case 0x8c: + /* mov to/from sreg. ignore */ + instr += 2; + break; case 0x8e: +#ifdef CONFIG_X86_64_VTX_64BIT_GUESTS + // mov eax/edx -> segment register + instr++; + + seL4_Word val = 0; + + if ((*instr == 0xc0) || (*instr == 0xd0) || (*instr == 0xd8)) { + vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_EAX, &val); + } else if ((*instr == 0xc2) || (*instr == 0xd2) || (*instr == 0xda) + || (*instr == 0xe2) || (*instr == 0xea)) { + vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_EDX, &val); + } + + /* Mask everything but lowest 16 bits */ + val &= 0xffff; + + if ((*instr == 0xd0) || (*instr == 0xd2)) { + vm_guest_state_set_ss_selector(gs, val); + ZF_LOGD("ss %lx\n", (long unsigned int)val); + } else if ((*instr == 0xd8) || (*instr == 0xda)) { + vm_guest_state_set_ds_selector(gs, val); + ZF_LOGD("ds %lx\n", (long unsigned int)val); + } else if ((*instr == 0xc0) || (*instr == 0xc2)) { + vm_guest_state_set_es_selector(gs, val); + ZF_LOGD("es %lx\n", (long unsigned int)val); + } else if (*instr == 0xe2) { + vm_guest_state_set_fs_selector(gs, val); + ZF_LOGD("fs %lx\n", (long unsigned int)val); + } else if (*instr == 0xea) { + vm_guest_state_set_gs_selector(gs, val); + ZF_LOGD("gs %lx\n", (long unsigned int)val); + } + + instr++; +#else /* mov to/from sreg. ignore */ instr += 2; +#endif /* CONFIG_X86_64_VTX_64BIT_GUESTS */ + break; +#ifdef CONFIG_X86_64_VTX_64BIT_GUESTS + case 0x75: + /* jne */ + case 0x85: + /* test eax, eax */ + instr += 2; + break; + case 0xe8: + /* call rel */ + instr += 3; break; +#endif /* CONFIG_X86_64_VTX_64BIT_GUESTS */ default: /* Assume this is a single byte instruction we can ignore */ instr++; diff --git a/libsel4vm/src/arch/x86/processor/decode.h b/libsel4vm/src/arch/x86/processor/decode.h index ddaaaaea0..5521d1641 100644 --- a/libsel4vm/src/arch/x86/processor/decode.h +++ b/libsel4vm/src/arch/x86/processor/decode.h @@ -11,16 +11,17 @@ #define MAX_INSTR_OPCODES 255 #define OP_ESCAPE 0xf -int vm_fetch_instruction(vm_vcpu_t *vcpu, uint32_t eip, uintptr_t cr3, int len, uint8_t *buf); +int vm_fetch_instruction(vm_vcpu_t *vcpu, uintptr_t eip, uintptr_t cr3, int len, uint8_t *buf); -int vm_decode_instruction(uint8_t *instr, int instr_len, int *reg, uint32_t *imm, int *op_len); +int vm_decode_instruction(uint8_t *instr, int instr_len, int *reg, seL4_Word *imm, int *op_len); -void vm_decode_ept_violation(vm_vcpu_t *vcpu, int *reg, uint32_t *imm, int *size); +void vm_decode_ept_violation(vm_vcpu_t *vcpu, int *reg, seL4_Word *imm, int *size); /* Interpret just enough virtual 8086 instructions to run trampoline code. Returns the final jump address */ uintptr_t vm_emulate_realmode(vm_vcpu_t *vcpu, uint8_t *instr_buf, - uint16_t *segment, uintptr_t eip, uint32_t len, guest_state_t *gs); + uint16_t *segment, uintptr_t eip, uint32_t len, guest_state_t *gs, + int m66_set); // TODO don't have these in a header, make them inline functions const static int vm_decoder_reg_mapw[] = { @@ -31,7 +32,17 @@ const static int vm_decoder_reg_mapw[] = { /*VCPU_CONTEXT_ESP*/ -1, VCPU_CONTEXT_EBP, VCPU_CONTEXT_ESI, - VCPU_CONTEXT_EDI + VCPU_CONTEXT_EDI, +#ifdef CONFIG_X86_64_VTX_64BIT_GUESTS + VCPU_CONTEXT_R8, + VCPU_CONTEXT_R9, + VCPU_CONTEXT_R10, + VCPU_CONTEXT_R11, + VCPU_CONTEXT_R12, + VCPU_CONTEXT_R13, + VCPU_CONTEXT_R14, + VCPU_CONTEXT_R15, +#endif /* CONFIG_X86_64_VTX_64BIT_GUESTS */ }; const static int vm_decoder_reg_mapb[] = { diff --git a/libsel4vm/src/arch/x86/processor/lapic.c b/libsel4vm/src/arch/x86/processor/lapic.c index 282d40f83..c08774c92 100644 --- a/libsel4vm/src/arch/x86/processor/lapic.c +++ b/libsel4vm/src/arch/x86/processor/lapic.c @@ -796,7 +796,7 @@ static int apic_reg_write(vm_vcpu_t *vcpu, uint32_t reg, uint32_t val) } void vm_apic_mmio_write(vm_vcpu_t *vcpu, void *cookie, uint32_t offset, - int len, const uint32_t data) + int len, const seL4_Word data) { (void)cookie; @@ -813,7 +813,7 @@ void vm_apic_mmio_write(vm_vcpu_t *vcpu, void *cookie, uint32_t offset, /* too common printing */ if (offset != APIC_EOI) apic_debug(6, "lapic mmio write at %s: offset 0x%x with length 0x%x, and value is " - "0x%x\n", __func__, offset, len, data); + "0x"SEL4_PRIx_word"\n", __func__, offset, len, data); apic_reg_write(vcpu, offset & 0xff0, data); } @@ -855,14 +855,14 @@ static int apic_reg_read(vm_lapic_t *apic, uint32_t offset, int len, } void vm_apic_mmio_read(vm_vcpu_t *vcpu, void *cookie, uint32_t offset, - int len, uint32_t *data) + int len, seL4_Word *data) { vm_lapic_t *apic = vcpu->vcpu_arch.lapic; (void)cookie; apic_reg_read(apic, offset, len, data); - apic_debug(6, "lapic mmio read on vcpu %d, reg %08x = %08x\n", vcpu->vcpu_id, offset, *data); + apic_debug(6, "lapic mmio read on vcpu %d, reg %08x = "SEL4_PRIx_word"\n", vcpu->vcpu_id, offset, *data); return; } @@ -870,7 +870,7 @@ void vm_apic_mmio_read(vm_vcpu_t *vcpu, void *cookie, uint32_t offset, memory_fault_result_t apic_fault_callback(vm_t *vm, vm_vcpu_t *vcpu, uintptr_t fault_addr, size_t fault_length, void *cookie) { - uint32_t data; + seL4_Word data; if (is_vcpu_read_fault(vcpu)) { vm_apic_mmio_read(vcpu, cookie, APIC_DEFAULT_PHYS_BASE - fault_addr, fault_length, &data); set_vcpu_fault_data(vcpu, data); diff --git a/libsel4vm/src/arch/x86/processor/msr.c b/libsel4vm/src/arch/x86/processor/msr.c index f18a7c91f..c62b5994f 100644 --- a/libsel4vm/src/arch/x86/processor/msr.c +++ b/libsel4vm/src/arch/x86/processor/msr.c @@ -20,17 +20,44 @@ #include "processor/lapic.h" #include "interrupt.h" +#ifdef CONFIG_X86_64_VTX_64BIT_GUESTS +static seL4_Word vm_msr_read(seL4_CPtr vcpu, unsigned int field) +{ + + seL4_X86_VCPU_ReadMSR_t result; + + assert(vcpu); + + result = seL4_X86_VCPU_ReadMSR(vcpu, (seL4_Word)field); + assert(result.error == seL4_NoError); + + return result.value; +} + +static void vm_msr_write(seL4_CPtr vcpu, unsigned int field, seL4_Word value) +{ + + seL4_X86_VCPU_WriteMSR_t result; + + assert(vcpu); + + result = seL4_X86_VCPU_WriteMSR(vcpu, (seL4_Word)field, value); + ZF_LOGF_IF(result.error != seL4_NoError, "MSR writing failed, error number: %d", result.error); +} +#endif /* CONFIG_X86_64_VTX_64BIT_GUESTS */ + int vm_rdmsr_handler(vm_vcpu_t *vcpu) { int ret = 0; - unsigned int msr_no; + seL4_Word msr_no; if (vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_ECX, &msr_no)) { return VM_EXIT_HANDLE_ERROR; } uint64_t data = 0; + seL4_Word vm_data; - ZF_LOGD("rdmsr ecx 0x%x\n", msr_no); + ZF_LOGD("rdmsr ecx 0x"SEL4_PRIx_word"\n", msr_no); // src reference: Linux kernel 3.11 kvm arch/x86/kvm/x86.c switch (msr_no) { @@ -70,6 +97,21 @@ int vm_rdmsr_handler(vm_vcpu_t *vcpu) data = vm_lapic_get_base_msr(vcpu); break; +#ifdef CONFIG_X86_64_VTX_64BIT_GUESTS + case MSR_EFER: + vm_get_vmcs_field(vcpu, VMX_GUEST_EFER, &vm_data); + data = (uint64_t) vm_data; + break; + + case MSR_STAR: + case MSR_LSTAR: + case MSR_CSTAR: + case MSR_SYSCALL_MASK: + vm_data = vm_msr_read(vcpu->vcpu.cptr, msr_no); + data = (uint64_t) vm_data; + break; +#endif /* CONFIG_X86_64_VTX_64BIT_GUESTS */ + default: ZF_LOGW("rdmsr WARNING unsupported msr_no 0x%x\n", msr_no); // generate a GP fault @@ -93,11 +135,11 @@ int vm_wrmsr_handler(vm_vcpu_t *vcpu) int ret = 0; - unsigned int msr_no; + seL4_Word msr_no; if (vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_ECX, &msr_no)) { return VM_EXIT_HANDLE_ERROR; } - unsigned int val_high, val_low; + seL4_Word val_high, val_low; if (vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_EDX, &val_high) || vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_EAX, &val_low)) { @@ -124,6 +166,19 @@ int vm_wrmsr_handler(vm_vcpu_t *vcpu) vm_lapic_set_base_msr(vcpu, val_low); break; +#ifdef CONFIG_X86_64_VTX_64BIT_GUESTS + case MSR_EFER: + vm_set_vmcs_field(vcpu, VMX_GUEST_EFER, val_low); + break; + + case MSR_STAR: + case MSR_LSTAR: + case MSR_CSTAR: + case MSR_SYSCALL_MASK: + vm_msr_write(vcpu->vcpu.cptr, msr_no, (seL4_Word)(((seL4_Word)val_high << 32ull) | val_low)); + break; +#endif /* CONFIG_X86_64_VTX_64BIT_GUESTS */ + default: ZF_LOGW("wrmsr WARNING unsupported msr_no 0x%x\n", msr_no); // generate a GP fault diff --git a/libsel4vm/src/arch/x86/processor/msr.h b/libsel4vm/src/arch/x86/processor/msr.h index 800cbe74e..0c06290d3 100644 --- a/libsel4vm/src/arch/x86/processor/msr.h +++ b/libsel4vm/src/arch/x86/processor/msr.h @@ -284,3 +284,13 @@ #define MSR_IA32_TSC_DEADLINE 0x000006E0 +/* x86-64 specific MSRs */ +#define MSR_EFER 0xc0000080 /* extended feature register */ +#define MSR_STAR 0xc0000081 /* legacy mode SYSCALL target */ +#define MSR_LSTAR 0xc0000082 /* long mode SYSCALL target */ +#define MSR_CSTAR 0xc0000083 /* compat mode SYSCALL target */ +#define MSR_SYSCALL_MASK 0xc0000084 /* EFLAGS mask for syscall */ +#define MSR_FS_BASE 0xc0000100 /* 64bit FS base */ +#define MSR_GS_BASE 0xc0000101 /* 64bit GS base */ +#define MSR_SHADOW_GS_BASE 0xc0000102 /* SwapGS GS shadow */ +#define MSR_TSC_AUX 0xc0000103 /* Auxiliary TSC */ diff --git a/libsel4vm/src/arch/x86/vm.c b/libsel4vm/src/arch/x86/vm.c index 9a0668943..83124d7ba 100644 --- a/libsel4vm/src/arch/x86/vm.c +++ b/libsel4vm/src/arch/x86/vm.c @@ -27,6 +27,8 @@ #include "debug.h" #include "vmexit.h" +#define VMM_INITIAL_STACK 0x96000 + static vm_exit_handler_fn_t x86_exit_handlers[VM_EXIT_REASON_NUM] = { [EXIT_REASON_PENDING_INTERRUPT] = vm_pending_interrupt_handler, [EXIT_REASON_CPUID] = vm_cpuid_handler, @@ -121,6 +123,16 @@ static void vm_update_guest_state_from_fault(volatile vm_vcpu_t *vcpu, volatile context.esi = msg[SEL4_VMENTER_FAULT_ESI]; context.edi = msg[SEL4_VMENTER_FAULT_EDI]; context.ebp = msg[SEL4_VMENTER_FAULT_EBP]; +#ifdef CONFIG_X86_64_VTX_64BIT_GUESTS + context.r8 = msg[SEL4_VMENTER_FAULT_R8]; + context.r9 = msg[SEL4_VMENTER_FAULT_R9]; + context.r10 = msg[SEL4_VMENTER_FAULT_R10]; + context.r11 = msg[SEL4_VMENTER_FAULT_R11]; + context.r12 = msg[SEL4_VMENTER_FAULT_R12]; + context.r13 = msg[SEL4_VMENTER_FAULT_R13]; + context.r14 = msg[SEL4_VMENTER_FAULT_R14]; + context.r15 = msg[SEL4_VMENTER_FAULT_R15]; +#endif /* CONFIG_X86_64_VTX_64BIT_GUESTS */ MACHINE_STATE_READ(vcpu->vcpu_arch.guest_state->machine.context, context); } @@ -135,6 +147,15 @@ int vm_run_arch(vm_t *vm) int ret; vm_vcpu_t *vcpu = vm->vcpus[BOOT_VCPU]; +#ifdef CONFIG_X86_64_VTX_64BIT_GUESTS + /* On Linux Kernels below 4.7, startup_64 does not setup a stack before + * calling verify_cpu, which causes a triple fault. This sets an initial + * stack in low memory. For Linux kernels > 4.7, this will simply get + * overwritten + */ + vm_guest_state_set_esp(vcpu->vcpu_arch.guest_state, VMM_INITIAL_STACK); +#endif /* CONFIG_X86_64_VTX_64BIT_GUESTS */ + vcpu->vcpu_arch.guest_state->virt.interrupt_halt = 0; vcpu->vcpu_arch.guest_state->exit.in_exit = 0; diff --git a/libsel4vm/src/arch/x86/vmcall.c b/libsel4vm/src/arch/x86/vmcall.c index 79e708681..8a56c5985 100644 --- a/libsel4vm/src/arch/x86/vmcall.c +++ b/libsel4vm/src/arch/x86/vmcall.c @@ -50,13 +50,13 @@ int vm_vmcall_handler(vm_vcpu_t *vcpu) { int res; vmcall_handler_t *h; - int token; + seL4_Word token; if (vm_get_thread_context_reg(vcpu, VCPU_CONTEXT_EAX, &token)) { return VM_EXIT_HANDLE_ERROR; } - h = get_handle(vcpu->vm, token); + h = get_handle(vcpu->vm, (int) token); if (h == NULL) { - ZF_LOGE("Failed to find handler for token:%x\n", token); + ZF_LOGE("Failed to find handler for token:"SEL4_PRIx_word"\n", token); vm_guest_exit_next_instruction(vcpu->vcpu_arch.guest_state, vcpu->vcpu.cptr); return VM_EXIT_HANDLED; } diff --git a/libsel4vm/src/arch/x86/vmcs.c b/libsel4vm/src/arch/x86/vmcs.c index 4198d1e65..f59ee7532 100644 --- a/libsel4vm/src/arch/x86/vmcs.c +++ b/libsel4vm/src/arch/x86/vmcs.c @@ -15,6 +15,46 @@ #include "guest_state.h" #include "vmcs.h" +#ifdef CONFIG_X86_64_VTX_64BIT_GUESTS +#define CS_ACCESS_RIGHTS AR(G) | AR(L) | AR(P) | AR(S) | AR_T(BUSY_TSS) +#define LDTR_ACCESS_RIGHTS AR(P) | AR_T(LDT) +#else +#define CS_ACCESS_RIGHTS AR(G) | AR(DB) | AR(P) | AR(S) | AR_T(BUSY_TSS) +#define LDTR_ACCESS_RIGHTS ACCESS_RIGHTS_UNUSABLE +#endif /* CONFIG_X86_64_VTX_64BIT_GUESTS */ + +/*init the vmcs structure for a 32-bit guest os thread*/ +static void vm_vmcs_init_32_bit_guest(vm_vcpu_t *vcpu) +{ + assert(vcpu); + + vm_vmcs_write(vcpu->vcpu.cptr, VMX_GUEST_FS_LIMIT, 0); + vm_vmcs_write(vcpu->vcpu.cptr, VMX_GUEST_GS_LIMIT, 0); + vm_vmcs_write(vcpu->vcpu.cptr, VMX_GUEST_TR_LIMIT, 0x0); + + vm_vmcs_write(vcpu->vcpu.cptr, VMX_GUEST_FS_ACCESS_RIGHTS, ACCESS_RIGHTS_UNUSABLE); + vm_vmcs_write(vcpu->vcpu.cptr, VMX_GUEST_GS_ACCESS_RIGHTS, ACCESS_RIGHTS_UNUSABLE); + + vcpu->vcpu_arch.guest_state->machine.control_ppc = VMX_CONTROL_PPC_HLT_EXITING | VMX_CONTROL_PPC_CR3_LOAD_EXITING | + VMX_CONTROL_PPC_CR3_STORE_EXITING; +} + +/*init the vmcs structure for a 32-bit guest os thread*/ +static void vm_vmcs_init_64_bit_guest(vm_vcpu_t *vcpu) +{ + assert(vcpu); + + vm_vmcs_write(vcpu->vcpu.cptr, VMX_GUEST_FS_LIMIT, ~0u); + vm_vmcs_write(vcpu->vcpu.cptr, VMX_GUEST_GS_LIMIT, ~0u); + vm_vmcs_write(vcpu->vcpu.cptr, VMX_GUEST_TR_LIMIT, 0xFF); + + /* Allows guest to use FS and GS registers */ + vm_vmcs_write(vcpu->vcpu.cptr, VMX_GUEST_FS_ACCESS_RIGHTS, DEFAULT_ACCESS_RIGHTS); + vm_vmcs_write(vcpu->vcpu.cptr, VMX_GUEST_GS_ACCESS_RIGHTS, DEFAULT_ACCESS_RIGHTS); + + vcpu->vcpu_arch.guest_state->machine.control_ppc = VMX_CONTROL_PPC_HLT_EXITING; +} + /*init the vmcs structure for a guest os thread*/ void vm_vmcs_init_guest(vm_vcpu_t *vcpu) { @@ -26,24 +66,19 @@ void vm_vmcs_init_guest(vm_vcpu_t *vcpu) vm_vmcs_write(vcpu->vcpu.cptr, VMX_GUEST_GS_SELECTOR, 0); vm_vmcs_write(vcpu->vcpu.cptr, VMX_GUEST_LDTR_SELECTOR, 0); vm_vmcs_write(vcpu->vcpu.cptr, VMX_GUEST_TR_SELECTOR, 0); - vm_vmcs_write(vcpu->vcpu.cptr, VMX_GUEST_ES_LIMIT, ~0); - vm_vmcs_write(vcpu->vcpu.cptr, VMX_GUEST_CS_LIMIT, ~0); - vm_vmcs_write(vcpu->vcpu.cptr, VMX_GUEST_SS_LIMIT, ~0); - vm_vmcs_write(vcpu->vcpu.cptr, VMX_GUEST_DS_LIMIT, ~0); - vm_vmcs_write(vcpu->vcpu.cptr, VMX_GUEST_FS_LIMIT, 0); - vm_vmcs_write(vcpu->vcpu.cptr, VMX_GUEST_GS_LIMIT, 0); + vm_vmcs_write(vcpu->vcpu.cptr, VMX_GUEST_ES_LIMIT, ~0u); + vm_vmcs_write(vcpu->vcpu.cptr, VMX_GUEST_CS_LIMIT, ~0u); + vm_vmcs_write(vcpu->vcpu.cptr, VMX_GUEST_SS_LIMIT, ~0u); + vm_vmcs_write(vcpu->vcpu.cptr, VMX_GUEST_DS_LIMIT, ~0u); vm_vmcs_write(vcpu->vcpu.cptr, VMX_GUEST_LDTR_LIMIT, 0); - vm_vmcs_write(vcpu->vcpu.cptr, VMX_GUEST_TR_LIMIT, 0x0); vm_vmcs_write(vcpu->vcpu.cptr, VMX_GUEST_GDTR_LIMIT, 0x0); vm_vmcs_write(vcpu->vcpu.cptr, VMX_GUEST_IDTR_LIMIT, 0); - vm_vmcs_write(vcpu->vcpu.cptr, VMX_GUEST_ES_ACCESS_RIGHTS, 0xC093); - vm_vmcs_write(vcpu->vcpu.cptr, VMX_GUEST_CS_ACCESS_RIGHTS, 0xC09B); - vm_vmcs_write(vcpu->vcpu.cptr, VMX_GUEST_SS_ACCESS_RIGHTS, 0xC093); - vm_vmcs_write(vcpu->vcpu.cptr, VMX_GUEST_DS_ACCESS_RIGHTS, 0xC093); - vm_vmcs_write(vcpu->vcpu.cptr, VMX_GUEST_FS_ACCESS_RIGHTS, BIT(16)); - vm_vmcs_write(vcpu->vcpu.cptr, VMX_GUEST_GS_ACCESS_RIGHTS, BIT(16)); - vm_vmcs_write(vcpu->vcpu.cptr, VMX_GUEST_LDTR_ACCESS_RIGHTS, BIT(16)); - vm_vmcs_write(vcpu->vcpu.cptr, VMX_GUEST_TR_ACCESS_RIGHTS, 0x8B); + vm_vmcs_write(vcpu->vcpu.cptr, VMX_GUEST_ES_ACCESS_RIGHTS, DEFAULT_ACCESS_RIGHTS); + vm_vmcs_write(vcpu->vcpu.cptr, VMX_GUEST_CS_ACCESS_RIGHTS, CS_ACCESS_RIGHTS); + vm_vmcs_write(vcpu->vcpu.cptr, VMX_GUEST_SS_ACCESS_RIGHTS, DEFAULT_ACCESS_RIGHTS); + vm_vmcs_write(vcpu->vcpu.cptr, VMX_GUEST_DS_ACCESS_RIGHTS, DEFAULT_ACCESS_RIGHTS); + vm_vmcs_write(vcpu->vcpu.cptr, VMX_GUEST_LDTR_ACCESS_RIGHTS, LDTR_ACCESS_RIGHTS); + vm_vmcs_write(vcpu->vcpu.cptr, VMX_GUEST_TR_ACCESS_RIGHTS, DEFAULT_TR_ACCESS_RIGHTS); vm_vmcs_write(vcpu->vcpu.cptr, VMX_GUEST_SYSENTER_CS, 0); vm_vmcs_write(vcpu->vcpu.cptr, VMX_CONTROL_CR0_MASK, vcpu->vcpu_arch.guest_state->virt.cr.cr0_mask); vm_vmcs_write(vcpu->vcpu.cptr, VMX_CONTROL_CR4_MASK, vcpu->vcpu_arch.guest_state->virt.cr.cr4_mask); @@ -62,8 +97,13 @@ void vm_vmcs_init_guest(vm_vcpu_t *vcpu) vm_vmcs_write(vcpu->vcpu.cptr, VMX_GUEST_RFLAGS, BIT(1)); vm_vmcs_write(vcpu->vcpu.cptr, VMX_GUEST_SYSENTER_ESP, 0); vm_vmcs_write(vcpu->vcpu.cptr, VMX_GUEST_SYSENTER_EIP, 0); - vcpu->vcpu_arch.guest_state->machine.control_ppc = VMX_CONTROL_PPC_HLT_EXITING | VMX_CONTROL_PPC_CR3_LOAD_EXITING | - VMX_CONTROL_PPC_CR3_STORE_EXITING; + +#ifdef CONFIG_X86_64_VTX_64BIT_GUESTS + vm_vmcs_init_64_bit_guest(vcpu); +#else + vm_vmcs_init_32_bit_guest(vcpu); +#endif /* CONFIG_X86_64_VTX_64BIT_GUESTS */ + vm_vmcs_write(vcpu->vcpu.cptr, VMX_CONTROL_PRIMARY_PROCESSOR_CONTROLS, vcpu->vcpu_arch.guest_state->machine.control_ppc); vm_vmcs_read(vcpu->vcpu.cptr, VMX_CONTROL_ENTRY_INTERRUPTION_INFO, &vcpu->vcpu_arch.guest_state->machine.control_entry); diff --git a/libsel4vm/src/arch/x86/vmcs.h b/libsel4vm/src/arch/x86/vmcs.h index 2b5ecf43d..990f563e2 100644 --- a/libsel4vm/src/arch/x86/vmcs.h +++ b/libsel4vm/src/arch/x86/vmcs.h @@ -10,6 +10,6 @@ #include -int vm_vmcs_read(seL4_CPtr vcpu, seL4_Word field, unsigned int *value); +int vm_vmcs_read(seL4_CPtr vcpu, seL4_Word field, seL4_Word *value); int vm_vmcs_write(seL4_CPtr vcpu, seL4_Word field, seL4_Word value); void vm_vmcs_init_guest(vm_vcpu_t *vcpu); diff --git a/libsel4vmmplatsupport/arch_include/x86/sel4vmmplatsupport/arch/acpi.h b/libsel4vmmplatsupport/arch_include/x86/sel4vmmplatsupport/arch/acpi.h index 1a1c100f9..20e3c767c 100644 --- a/libsel4vmmplatsupport/arch_include/x86/sel4vmmplatsupport/arch/acpi.h +++ b/libsel4vmmplatsupport/arch_include/x86/sel4vmmplatsupport/arch/acpi.h @@ -18,7 +18,7 @@ #define ACPI_START (LOWER_BIOS_START) // Start of ACPI tables; RSD PTR is right here #define XSDT_START (ACPI_START + 0x1000) -#define MAX_ACPI_TABLES (2) +#define MAX_ACPI_TABLES (16) #include diff --git a/libsel4vmmplatsupport/src/arch/x86/acpi.c b/libsel4vmmplatsupport/src/arch/x86/acpi.c index 12aea83e1..8b897b676 100644 --- a/libsel4vmmplatsupport/src/arch/x86/acpi.c +++ b/libsel4vmmplatsupport/src/arch/x86/acpi.c @@ -197,6 +197,50 @@ int make_guest_acpi_tables(vm_t *vm) // Could set up other tables here... + // DSDT + int dsdt_size = sizeof(acpi_dsdt_t); + acpi_dsdt_t *dsdt = calloc(1, dsdt_size); + assert(NULL != dsdt); + + acpi_fill_table_head(&dsdt->header, "DSDT", 1); + + dsdt->header.length = dsdt_size; + dsdt->header.checksum = acpi_calc_checksum((char *)dsdt, dsdt_size); + + tables[num_tables] = dsdt; + table_sizes[num_tables] = dsdt_size; + + int dsdt_index = num_tables; + + num_tables++; + + // FADT + int fadt_size = sizeof(acpi_fadt_t); + acpi_fadt_t *fadt = calloc(1, fadt_size); + assert(NULL != fadt); + + acpi_fill_table_head(&fadt->header, "FACP", 1); + + fadt->header.length = fadt_size; + + /* Hardcode some necessary data; Addresses taken from QEMU. + * Info may vary on hardware platforms, but differences don't seem to affect + * the guest + */ + fadt->pm1a_evt_blk = 0x600; + fadt->pm1_evt_len = 0x4; + fadt->pm1a_cnt_blk = 0x604; + fadt->pm1_cnt_len = 0x2; + + fadt->sci_int = 9; + + tables[num_tables] = fadt; + table_sizes[num_tables] = fadt_size; + + int fadt_index = num_tables; + + num_tables++; + // XSDT size_t xsdt_size = sizeof(acpi_xsdt_t) + sizeof(uint64_t) * (num_tables - 1); @@ -213,16 +257,17 @@ int make_guest_acpi_tables(vm_t *vm) } uintptr_t xsdt_addr = lower_bios_addr + (XSDT_START - LOWER_BIOS_START); + uintptr_t xsdt_vaddr = XSDT_START; acpi_xsdt_t *xsdt = calloc(1, xsdt_size); acpi_fill_table_head(&xsdt->header, "XSDT", 1); // Add previous tables to XSDT pointer list - uintptr_t table_paddr = xsdt_addr + xsdt_size; + uintptr_t table_vaddr = xsdt_vaddr + xsdt_size; uint64_t *entry = (uint64_t *)((char *)xsdt + sizeof(acpi_xsdt_t)); for (int i = 1; i < num_tables; i++) { - *entry++ = (uint64_t)table_paddr; - table_paddr += table_sizes[i]; + *entry++ = (uint64_t)table_vaddr; + table_vaddr += table_sizes[i]; } xsdt->header.length = xsdt_size; @@ -232,12 +277,22 @@ int make_guest_acpi_tables(vm_t *vm) table_sizes[0] = xsdt_size; // Copy all the tables to guest - table_paddr = xsdt_addr; + uintptr_t table_paddr = xsdt_addr; + table_vaddr = xsdt_vaddr; for (int i = 0; i < num_tables; i++) { + + // Need to fill in DSDT address + if (i == fadt_index) { + fadt->dsdt_address = table_vaddr - table_sizes[dsdt_index]; + fadt->x_dsdt_address = table_vaddr - table_sizes[dsdt_index]; + fadt->header.checksum = acpi_calc_checksum((char *)fadt, fadt_size); + } + ZF_LOGD("ACPI table \"%.4s\", addr = %p, size = %zu bytes\n", - (char *)tables[i], (void *)table_paddr, table_sizes[i]); + (char *)tables[i], (void *)table_vaddr, table_sizes[i]); memcpy((void *)table_paddr, (char *)tables[i], table_sizes[i]); table_paddr += table_sizes[i]; + table_vaddr += table_sizes[i]; } // RSDP @@ -247,11 +302,11 @@ int make_guest_acpi_tables(vm_t *vm) .oem_id = "NICTA ", .revision = 2, /* ACPI v3*/ .checksum = 0, - .rsdt_address = xsdt_addr, + .rsdt_address = xsdt_vaddr, /* rsdt_addrss will not be inspected as the xsdt is present. This is not ACPI 1 compliant */ .length = sizeof(acpi_rsdp_t), - .xsdt_address = xsdt_addr, + .xsdt_address = xsdt_vaddr, .extended_checksum = 0, .reserved = {0} }; diff --git a/libsel4vmmplatsupport/src/arch/x86/drivers/vmm_pci_helper.c b/libsel4vmmplatsupport/src/arch/x86/drivers/vmm_pci_helper.c index 0977d2da2..00c97f393 100644 --- a/libsel4vmmplatsupport/src/arch/x86/drivers/vmm_pci_helper.c +++ b/libsel4vmmplatsupport/src/arch/x86/drivers/vmm_pci_helper.c @@ -42,7 +42,7 @@ int vmm_pci_helper_map_bars(vm_t *vm, libpci_device_iocfg_t *cfg, vmm_pci_bar_t } /* Make sure that the address is naturally aligned to its size */ if (addr % size) { - ZF_LOGE("Guest PCI bar address %p is not aligned to size %zu", addr, size); + ZF_LOGE("Guest PCI bar address 0x%lx is not aligned to size %zu", addr, size); return -1; } int err = map_ut_alloc_reservation_with_base_paddr(vm, (uintptr_t)cfg->base_addr[i], reservation); diff --git a/libsel4vmmplatsupport/src/arch/x86/guest_image.c b/libsel4vmmplatsupport/src/arch/x86/guest_image.c index 762666f01..fa7da6f80 100644 --- a/libsel4vmmplatsupport/src/arch/x86/guest_image.c +++ b/libsel4vmmplatsupport/src/arch/x86/guest_image.c @@ -19,6 +19,15 @@ #include #include +#ifdef CONFIG_X86_64_VTX_64BIT_GUESTS +#define ELF_HEADER_SIZE 512 +#else +#define ELF_HEADER_SIZE 256 +#endif /* CONFIG_X86_64_VTX_64BIT_GUESTS */ + +#define ISELF32(elfFile) ( ((Elf32_Ehdr *)elfFile)->e_ident[EI_CLASS] == ELFCLASS32 ) +#define ISELF64(elfFile) ( ((Elf64_Ehdr *)elfFile)->e_ident[EI_CLASS] == ELFCLASS64 ) + typedef struct boot_guest_cookie { vm_t *vm; FILE *file; @@ -30,7 +39,7 @@ typedef struct boot_guest_cookie { static int read_elf_headers(void *buf, vm_t *vm, FILE *file, size_t buf_size, elf_t *elf) { size_t result; - if (buf_size < sizeof(Elf32_Ehdr)) { + if (buf_size < ELF_HEADER_SIZE) { return -1; } fseek(file, 0, SEEK_SET); @@ -39,6 +48,17 @@ static int read_elf_headers(void *buf, vm_t *vm, FILE *file, size_t buf_size, el return -1; } + /* Check for correct ELF version on the current architecture */ +#ifdef CONFIG_X86_64_VTX_64BIT_GUESTS + if (ISELF32(buf)) { + return -1; + } +#else + if (ISELF64(buf)) { + return -1; + } +#endif /* CONFIG_X86_64_VTX_64BIT_GUESTS */ + return elf_newFile_maybe_unsafe(buf, buf_size, true, false, elf); } @@ -220,7 +240,7 @@ static int load_guest_elf(vm_t *vm, const char *image_name, uintptr_t load_addre guest_kernel_image_t *guest_image) { elf_t kernel_elf; - char elf_file[256]; + char elf_file[ELF_HEADER_SIZE]; int ret; FILE *file = fopen(image_name, "r"); if (!file) { @@ -240,18 +260,18 @@ static int load_guest_elf(vm_t *vm, const char *image_name, uintptr_t load_addre * if it isn't we will just fail when we try and get the frame */ uintptr_t load_addr = ROUND_UP(load_address, alignment); /* Calculate relocation offset. */ - uintptr_t guest_kernel_addr = 0xFFFFFFFF; - uintptr_t guest_kernel_vaddr = 0xFFFFFFFF; + uintptr_t guest_kernel_addr = UINTPTR_MAX; + uintptr_t guest_kernel_vaddr = UINTPTR_MAX; for (int i = 0; i < n_headers; i++) { if (elf_getProgramHeaderType(&kernel_elf, i) != PT_LOAD) { continue; } - uint32_t addr = elf_getProgramHeaderPaddr(&kernel_elf, i); + seL4_Word addr = elf_getProgramHeaderPaddr(&kernel_elf, i); if (addr < guest_kernel_addr) { guest_kernel_addr = addr; } - uint32_t vaddr = elf_getProgramHeaderVaddr(&kernel_elf, i); - if (vaddr < guest_kernel_vaddr) { + seL4_Word vaddr = elf_getProgramHeaderVaddr(&kernel_elf, i); + if (vaddr && (vaddr < guest_kernel_vaddr)) { guest_kernel_vaddr = vaddr; } }