diff --git a/docs/manual.md b/docs/manual.md index fdbeba812..8740ffaa6 100644 --- a/docs/manual.md +++ b/docs/manual.md @@ -649,6 +649,8 @@ To find the full list of possible faults that could occur and details regarding kind of fault, please see the 'Faults' section of the [seL4 reference manual](https://sel4.systems/Info/Docs/seL4-manual-latest.pdf). +For x86 VCPU faults, please see the 'Virtualisation' section of the [seL4 reference manual](https://sel4.systems/Info/Docs/seL4-manual-latest.pdf). + ## `microkit_msginfo microkit_ppcall(microkit_channel ch, microkit_msginfo msginfo)` Performs a call to a protected procedure in a different PD. @@ -984,6 +986,18 @@ virtual CPU with ID `vcpu`. Write the registers of a given virtual CPU with ID `vcpu`. The `regs` argument is the pointer to the struct of registers `seL4_VCPUContext` that are written from. +## `void microkit_vcpu_x86_on(void)` + +Allow the PD to switch to guest execution mode with the bound vCPU every time `init()` or `notified()` return. + +The caller is responsible for initialising the VCPU's instruction pointer, Primary Processor-Based VM-Execution Controls +and VM-Entry Interruption-Information Field to the corresponding VMCS fields. For more details, +please see the 'Virtualisation' section of the [seL4 reference manual](https://sel4.systems/Info/Docs/seL4-manual-latest.pdf). + +## `void microkit_vcpu_x86_off(void)` + +Stop the PD from switching to guest execution mode when `init()` or `notified()` return. + # System Description File {#sysdesc} This section describes the format of the System Description File (SDF). @@ -1154,6 +1168,7 @@ The `end` element has the following attributes: * `id`: Channel identifier in the context of the named protection domain. Must be at least 0 and less than 63. * `pp`: (optional) Indicates that the protection domain for this end can perform a protected procedure call to the other end; defaults to false. Protected procedure calls can only be to PDs of strictly higher priority. + On x86-64, PDs with virtual machines cannot receive protected procedure calls. * `notify`: (optional) Indicates that the protection domain for this end can send a notification to the other end; defaults to true. * `setvar_id`: (optional) Specifies a symbol in the program image. This symbol will be rewritten with the channel identifier. diff --git a/libmicrokit/include/microkit.h b/libmicrokit/include/microkit.h index 90155d21e..f625b1de6 100644 --- a/libmicrokit/include/microkit.h +++ b/libmicrokit/include/microkit.h @@ -10,6 +10,9 @@ #pragma once #include +#ifdef CONFIG_VTX +#include +#endif /* CONFIG_VTX */ typedef unsigned int microkit_channel; typedef unsigned int microkit_child; @@ -45,6 +48,12 @@ extern char microkit_name[MICROKIT_PD_NAME_LENGTH]; extern seL4_Bool microkit_have_signal; extern seL4_CPtr microkit_signal_cap; extern seL4_MessageInfo_t microkit_signal_msg; +#if defined(CONFIG_VTX) +extern seL4_Bool microkit_x86_do_vcpu_resume; +extern seL4_Word microkit_x86_vcpu_rip; +extern seL4_Word microkit_x86_vcpu_prim_proc_ctl; +extern seL4_Word microkit_x86_vcpu_irq_info; +#endif /* Symbols for error checking libmicrokit API calls. Patched by the Microkit tool * to set bits corresponding to valid channels for this PD. */ @@ -188,13 +197,8 @@ static inline void microkit_vcpu_restart(microkit_child vcpu, seL4_Word entry_po { seL4_Error err; seL4_UserContext ctxt = {0}; -#if defined(CONFIG_ARCH_AARCH64) ctxt.pc = entry_point; -#elif defined(CONFIG_ARCH_X86_64) - ctxt.rip = entry_point; -#else -#error "unknown architecture for 'microkit_vcpu_restart'" -#endif + err = seL4_TCB_WriteRegisters( BASE_VM_TCB_CAP + vcpu, seL4_True, @@ -218,9 +222,7 @@ static inline void microkit_vcpu_stop(microkit_child vcpu) microkit_internal_crash(err); } } -#endif -#if defined(CONFIG_ARM_HYPERVISOR_SUPPORT) static inline void microkit_vcpu_arm_inject_irq(microkit_child vcpu, seL4_Uint16 irq, seL4_Uint8 priority, seL4_Uint8 group, seL4_Uint8 index) { @@ -263,7 +265,7 @@ static inline void microkit_vcpu_arm_write_reg(microkit_child vcpu, seL4_Word re microkit_internal_crash(err); } } -#endif +#endif /* CONFIG_ARM_HYPERVISOR_SUPPORT */ #if defined(CONFIG_ALLOW_SMC_CALLS) static inline void microkit_arm_smc_call(seL4_ARM_SMCContext *args, seL4_ARM_SMCContext *response) @@ -275,7 +277,7 @@ static inline void microkit_arm_smc_call(seL4_ARM_SMCContext *args, seL4_ARM_SMC microkit_internal_crash(err); } } -#endif +#endif /* CONFIG_ALLOW_SMC_CALLS */ #if defined(CONFIG_ARCH_X86_64) static inline void microkit_x86_ioport_write_8(microkit_ioport ioport_id, seL4_Word port_addr, seL4_Word data) @@ -391,28 +393,74 @@ static inline seL4_Uint32 microkit_x86_ioport_read_32(microkit_ioport ioport_id, return ret.result; } -#endif -#if defined(CONFIG_ARCH_X86_64) && defined(CONFIG_VTX) +#if defined(CONFIG_VTX) +/* These value came from seL4 source, see: include/arch/x86/arch/object/vcpu.h */ +#define VMX_GUEST_RIP 0x0000681E +#define VMX_CONTROL_PRIMARY_PROCESSOR_CONTROLS 0x00004002 +#define VMX_CONTROL_ENTRY_INTERRUPTION_INFO 0x00004016 + static inline seL4_Word microkit_vcpu_x86_read_vmcs(microkit_child vcpu, seL4_Word field) { - seL4_X86_VCPU_ReadVMCS_t ret; - ret = seL4_X86_VCPU_ReadVMCS(BASE_VCPU_CAP + vcpu, field); - if (ret.error != seL4_NoError) { - microkit_dbg_puts("microkit_x86_read_vmcs: error reading data\n"); - microkit_internal_crash(ret.error); + /* Serve the guest's: + * - RIP, + * - Primary Processor Based VM Execution Controls, and + * - VM Entry Interruption-Information + * from local variables. Since they will be passed to the kernel on `seL4_VMEnter()`, + * then the kernel will write them to the VMCS for us. + * + * Note that this logic will be incorrect if we give a single PD access to multiple VCPU caps, + * but I don't think that will be a valid scenario since you can only have 1 bound VCPU + * with your TCB. */ + + seL4_Word value; + + switch (field) { + case VMX_GUEST_RIP: + value = microkit_x86_vcpu_rip; + break; + case VMX_CONTROL_PRIMARY_PROCESSOR_CONTROLS: + value = microkit_x86_vcpu_prim_proc_ctl; + break; + case VMX_CONTROL_ENTRY_INTERRUPTION_INFO: + value = microkit_x86_vcpu_irq_info; + break; + default: { + seL4_X86_VCPU_ReadVMCS_t ret; + ret = seL4_X86_VCPU_ReadVMCS(BASE_VCPU_CAP + vcpu, field); + if (ret.error != seL4_NoError) { + microkit_dbg_puts("microkit_x86_read_vmcs: error reading data\n"); + microkit_internal_crash(ret.error); + } + value = ret.value; + break; + } } - return ret.value; + return value; } static inline void microkit_vcpu_x86_write_vmcs(microkit_child vcpu, seL4_Word field, seL4_Word value) { - seL4_X86_VCPU_WriteVMCS_t ret; - ret = seL4_X86_VCPU_WriteVMCS(BASE_VCPU_CAP + vcpu, field, value); - if (ret.error != seL4_NoError) { - microkit_dbg_puts("microkit_x86_write_vmcs: error writing data\n"); - microkit_internal_crash(ret.error); + switch (field) { + case VMX_GUEST_RIP: + microkit_x86_vcpu_rip = value; + break; + case VMX_CONTROL_PRIMARY_PROCESSOR_CONTROLS: + microkit_x86_vcpu_prim_proc_ctl = value; + break; + case VMX_CONTROL_ENTRY_INTERRUPTION_INFO: + microkit_x86_vcpu_irq_info = value; + break; + default: { + seL4_X86_VCPU_WriteVMCS_t ret; + ret = seL4_X86_VCPU_WriteVMCS(BASE_VCPU_CAP + vcpu, field, value); + if (ret.error != seL4_NoError) { + microkit_dbg_puts("microkit_x86_write_vmcs: error writing data\n"); + microkit_internal_crash(ret.error); + } + break; + } } } @@ -477,7 +525,20 @@ static inline void microkit_vcpu_x86_write_regs(microkit_child vcpu, seL4_VCPUCo } } -#endif +static inline void microkit_vcpu_x86_on(void) +{ + /* On x86, a TCB can only have one bound VCPU at any given time. + * So we don't take a `microkit_child vcpu` here. */ + microkit_x86_do_vcpu_resume = seL4_True; +} + +static inline void microkit_vcpu_x86_off(void) +{ + microkit_x86_do_vcpu_resume = seL4_False; +} + +#endif /* CONFIG_VTX */ +#endif /* CONFIG_ARCH_X86_64 */ static inline void microkit_deferred_notify(microkit_channel ch) { diff --git a/libmicrokit/src/main.c b/libmicrokit/src/main.c index cff14f642..eceb9aa54 100644 --- a/libmicrokit/src/main.c +++ b/libmicrokit/src/main.c @@ -27,6 +27,16 @@ seL4_Bool microkit_have_signal = seL4_False; seL4_CPtr microkit_signal_cap; seL4_MessageInfo_t microkit_signal_msg; +#if defined(CONFIG_VTX) +seL4_Bool microkit_x86_do_vcpu_resume = seL4_False; +seL4_Word microkit_x86_vcpu_rip; +seL4_Word microkit_x86_vcpu_prim_proc_ctl; +seL4_Word microkit_x86_vcpu_irq_info; + +seL4_Bool microkit_x86_vmenter_result_valid = seL4_False; +seL4_Word microkit_x86_vmenter_result; +#endif /* CONFIG_VTX */ + seL4_Word microkit_irqs; seL4_Word microkit_notifications; seL4_Word microkit_pps; @@ -63,10 +73,98 @@ static void run_init_funcs(void) } } +static seL4_MessageInfo_t receive_event(bool have_reply, seL4_MessageInfo_t reply_tag, seL4_Word *badge) +{ + if (have_reply) { + return seL4_ReplyRecv(INPUT_CAP, reply_tag, badge, REPLY_CAP); + } else if (microkit_have_signal) { + return seL4_NBSendRecv(microkit_signal_cap, microkit_signal_msg, INPUT_CAP, badge, REPLY_CAP); + microkit_have_signal = seL4_False; + } else { + return seL4_Recv(INPUT_CAP, badge, REPLY_CAP); + } +} + +static seL4_MessageInfo_t send_recv_event(bool have_reply, seL4_MessageInfo_t reply_tag, seL4_Word *badge) +{ +#if defined(CONFIG_VTX) + microkit_x86_vmenter_result_valid = seL4_False; + if (microkit_x86_do_vcpu_resume) { + /* There isn't a syscall that atomically send signal then perform vmenter while waiting for + incoming notifications. So we have to dispatch any deferred signal first. Then switch execution + to the bound VCPU. A PD with a VCPU can't receive PPC on x86 so no need to check `have_reply`. */ + if (microkit_have_signal) { + seL4_NBSend(microkit_signal_cap, microkit_signal_msg); + microkit_have_signal = seL4_False; + } + + microkit_mr_set(SEL4_VMENTER_CALL_EIP_MR, microkit_x86_vcpu_rip); + microkit_mr_set(SEL4_VMENTER_CALL_CONTROL_PPC_MR, microkit_x86_vcpu_prim_proc_ctl); + microkit_mr_set(SEL4_VMENTER_CALL_INTERRUPT_INFO_MR, microkit_x86_vcpu_irq_info); + + microkit_x86_vmenter_result = seL4_VMEnter(badge); + + microkit_x86_vmenter_result_valid = seL4_True; + microkit_x86_vcpu_rip = microkit_mr_get(SEL4_VMENTER_CALL_EIP_MR); + microkit_x86_vcpu_prim_proc_ctl = microkit_mr_get(SEL4_VMENTER_CALL_CONTROL_PPC_MR); + microkit_x86_vcpu_irq_info = microkit_mr_get(SEL4_VMENTER_CALL_INTERRUPT_INFO_MR); + + /* Create a dummy tag so that we can call `fault()`, as VM Exits on x86 isn't modelled as an IPC like ARM. */ + if (microkit_x86_vmenter_result == SEL4_VMENTER_RESULT_FAULT) { + return seL4_MessageInfo_new(0, 0, 0, SEL4_VMENTER_NUM_FAULT_MSGS); + } else { + /* VCPU got interrupted due to a notification, no tag. */ + return seL4_MessageInfo_new(0, 0, 0, 0); + } + + } else { + return receive_event(have_reply, reply_tag, badge); + } +#else + return receive_event(have_reply, reply_tag, badge); +#endif +} + +static seL4_Bool is_endpoint(seL4_Word badge) +{ + return !!(badge >> 63); +} + +static seL4_Bool is_fault(seL4_Word badge) +{ +#if defined(CONFIG_VTX) + if (microkit_x86_vmenter_result_valid && microkit_x86_vmenter_result == SEL4_VMENTER_RESULT_FAULT) { + return seL4_True; + } else { + return !!((badge >> 62) & 1); + } +#else + return !!((badge >> 62) & 1); +#endif +} + +static seL4_Bool handle_fault(seL4_Word badge, seL4_MessageInfo_t tag, seL4_MessageInfo_t *reply_tag) +{ +#if defined(CONFIG_VTX) + seL4_Bool reply_to_fault = fault(badge & PD_MASK, tag, reply_tag); + if (microkit_x86_vmenter_result_valid == seL4_True) { + /* There won't be anything to reply to for a VCPU fault. But + * if fault() returns false then we shouldn't resume the VCPU. */ + if (!reply_to_fault) { + microkit_x86_do_vcpu_resume = seL4_False; + } + reply_to_fault = seL4_False; + } + return reply_to_fault; +#else + return fault(badge & PD_MASK, tag, reply_tag); +#endif +} + static void handler_loop(void) { bool have_reply = false; - seL4_MessageInfo_t reply_tag; + seL4_MessageInfo_t reply_tag = seL4_MessageInfo_new(0, 0, 0, 0); /** * Because of https://github.com/seL4/seL4/issues/1536 @@ -86,29 +184,14 @@ static void handler_loop(void) } for (;;) { - seL4_Word badge; - seL4_MessageInfo_t tag; - - if (have_reply) { - tag = seL4_ReplyRecv(INPUT_CAP, reply_tag, &badge, REPLY_CAP); - } else if (microkit_have_signal) { - tag = seL4_NBSendRecv(microkit_signal_cap, microkit_signal_msg, INPUT_CAP, &badge, REPLY_CAP); - microkit_have_signal = seL4_False; - } else { - tag = seL4_Recv(INPUT_CAP, &badge, REPLY_CAP); - } - - uint64_t is_endpoint = badge >> 63; - uint64_t is_fault = (badge >> 62) & 1; + seL4_Word badge = 0; + seL4_MessageInfo_t tag = send_recv_event(have_reply, reply_tag, &badge); have_reply = false; - if (is_fault) { - seL4_Bool reply_to_fault = fault(badge & PD_MASK, tag, &reply_tag); - if (reply_to_fault) { - have_reply = true; - } - } else if (is_endpoint) { + if (is_fault(badge)) { + have_reply = handle_fault(badge, tag, &reply_tag); + } else if (is_endpoint(badge)) { have_reply = true; reply_tag = protected(badge & CHANNEL_MASK, tag); } else { diff --git a/tool/microkit/src/sdf.rs b/tool/microkit/src/sdf.rs index 56901164a..56ebda6ef 100644 --- a/tool/microkit/src/sdf.rs +++ b/tool/microkit/src/sdf.rs @@ -2018,6 +2018,26 @@ pub fn parse( )); } + // Due to a kernel limitation in `src/arch/x86/c_traps.c`, + // only the VMM's bound ntfn is checked for pending ntfn. + // The endpoint object isn't passed to the kernel and checked for + // pending messages so PPC won't work while the VCPU is running. + // Technically PPC still works if the VCPU isn't resumed. But + // we shouldn't expose this footgun to users. + if config.arch == Arch::X86_64 + && ((ch.end_a.pp && pd_b.virtual_machine.is_some()) + || (ch.end_b.pp && pd_a.virtual_machine.is_some())) + { + return Err(format!( + "Error: It is not possible to PPC to PD '{}' with a bound vCPU from PD '{}' on x86_64 @ {}:{}:{}", + if ch.end_a.pp {&pd_b.name} else {&pd_a.name}, + if ch.end_a.pp {&pd_a.name} else {&pd_b.name}, + filename.display(), + pd_a.text_pos.unwrap().row, + pd_a.text_pos.unwrap().col + )); + } + ch_ids[ch.end_a.pd].push(ch.end_a.id); ch_ids[ch.end_b.pd].push(ch.end_b.id); } diff --git a/tool/microkit/tests/sdf/vm_x86_ppc_invalid_1.system b/tool/microkit/tests/sdf/vm_x86_ppc_invalid_1.system new file mode 100644 index 000000000..f1fab020a --- /dev/null +++ b/tool/microkit/tests/sdf/vm_x86_ppc_invalid_1.system @@ -0,0 +1,39 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tool/microkit/tests/sdf/vm_x86_ppc_invalid_2.system b/tool/microkit/tests/sdf/vm_x86_ppc_invalid_2.system new file mode 100644 index 000000000..db763e43e --- /dev/null +++ b/tool/microkit/tests/sdf/vm_x86_ppc_invalid_2.system @@ -0,0 +1,39 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tool/microkit/tests/sdf/vm_x86_ppc_valid.system b/tool/microkit/tests/sdf/vm_x86_ppc_valid.system new file mode 100644 index 000000000..66d0faa1f --- /dev/null +++ b/tool/microkit/tests/sdf/vm_x86_ppc_valid.system @@ -0,0 +1,30 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tool/microkit/tests/test.rs b/tool/microkit/tests/test.rs index 056a32368..1b4511943 100644 --- a/tool/microkit/tests/test.rs +++ b/tool/microkit/tests/test.rs @@ -761,6 +761,29 @@ mod virtual_machine { "Error: invalid memory region name 'mr1' on 'map' @", ) } + + #[test] + fn test_x86_ppc_invalid_1() { + check_error( + &DEFAULT_X86_64_KERNEL_CONFIG, + "vm_x86_ppc_invalid_1.system", + "Error: It is not possible to PPC to PD 'VMM' with a bound vCPU from PD 'some_service' on x86_64", + ) + } + + #[test] + fn test_x86_ppc_invalid_2() { + check_error( + &DEFAULT_X86_64_KERNEL_CONFIG, + "vm_x86_ppc_invalid_2.system", + "Error: It is not possible to PPC to PD 'VMM' with a bound vCPU from PD 'some_service' on x86_64", + ) + } + + #[test] + fn test_x86_ppc_valid() { + check_success(&DEFAULT_X86_64_KERNEL_CONFIG, "vm_x86_ppc_valid.system") + } } #[cfg(test)]