120 likes | 333 Views
Paravirtual/HVM linux examples. Argentina Software Development Center Software and Solutions Group 31 July 2008. void __init trap_init(void) -> for Xen. Xen patch at /arch/i386/kernels/traps-xen.c static trap_info_t trap_table[] = { { 0, 0, __KERNEL_CS, (unsigned long)divide_error },
E N D
Paravirtual/HVM linux examples Argentina Software Development Center Software and Solutions Group 31 July 2008
void __init trap_init(void) -> for Xen Xen patch at /arch/i386/kernels/traps-xen.c static trap_info_t trap_table[] = { { 0, 0, __KERNEL_CS, (unsigned long)divide_error }, { 1, 0|4, __KERNEL_CS, (unsigned long)debug }, { 3, 3|4, __KERNEL_CS, (unsigned long)int3 }, { 4, 3, __KERNEL_CS, (unsigned long)overflow }, { 5, 0, __KERNEL_CS, (unsigned long)bounds }, { 6, 0, __KERNEL_CS, (unsigned long)invalid_op }, { 7, 0|4, __KERNEL_CS, (unsigned long)device_not_available }, { 9, 0, __KERNEL_CS, (unsigned long)coprocessor_segment_overrun }, { 10, 0, __KERNEL_CS, (unsigned long)invalid_TSS }, { 11, 0, __KERNEL_CS, (unsigned long)segment_not_present }, { 12, 0, __KERNEL_CS, (unsigned long)stack_segment }, { 13, 0, __KERNEL_CS, (unsigned long)general_protection }, { 14, 0|4, __KERNEL_CS, (unsigned long)page_fault }, { 15, 0, __KERNEL_CS, (unsigned long)fixup_4gb_segment }, { 16, 0, __KERNEL_CS, (unsigned long)coprocessor_error }, { 17, 0, __KERNEL_CS, (unsigned long)alignment_check }, { 19, 0, __KERNEL_CS, (unsigned long)simd_coprocessor_error }, { SYSCALL_VECTOR, 3, __KERNEL_CS, (unsigned long)system_call }, { 0, 0, 0, 0 } }; void __init trap_init(void) { HYPERVISOR_set_trap_table(trap_table);
void __init trap_init(void) -> for linux /arch/i386/kernels/traps-xen.c void __init trap_init(void) { #ifdef CONFIG_X86_LOCAL_APIC init_apic_mappings(); #endif set_trap_gate(0,÷_error); set_intr_gate(1,&debug); set_intr_gate(2,&nmi); set_system_intr_gate(3, &int3); /* int3/4 can be called from all */ set_system_gate(4,&overflow); … static inline void _set_gate(int gate, unsigned int type, void *addr, unsigned short seg) { __u32 a, b; pack_gate(&a, &b, (unsigned long)addr, seg, type, 0); write_idt_entry(idt_table, gate, a, b); } LGDT, LIDT, LLDT, LTR, SGDT, SIDT, SLDT, STR. These instructions cause VM exits if the “descriptor-table exiting” VM-execution control is 1
void __init time_init (void) -> for Xen Xen patch at /arch/i386/kernels/time-xen.c void __init time_init(void) { … HYPERVISOR_vcpu_op(VCPUOP_set_periodic_timer, 0, &xen_set_periodic_tick); get_time_values_from_xen(0);
void __init time_init (void) -> for linux /arch/i386/kernels/time.c void __init time_init(void) { struct timespec ts; ts.tv_sec = get_cmos_time(); ts.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ); do_settimeofday(&ts); do_time_init(); } static inline int native_set_wallclock(unsigned long nowtime) /* time.h */ { int retval; if (efi_enabled) retval = efi_set_rtc_mmss(nowtime); else retval = mach_set_rtc_mmss(nowtime); return retval; } IN, INS/INSB/INSW/INSD, OUT, OUTS/OUTSB/OUTSW/OUTSD. The behavior of each of these instructions is determined by the settings of the “unconditional I/O exiting” and “use I/O bitmaps” VM-execution controls
void __cpuinit cpu_gdt_init(…) -> for Xen Xen patch at /arch/i386/kernels/cpu/common-xen.c void __cpuinit cpu_gdt_init(struct Xgt_desc_struct *gdt_descr) { unsigned long frames[16]; unsigned long va; int f; for (va = gdt_descr->address, f = 0; va < gdt_descr->address + gdt_descr->size; va += PAGE_SIZE, f++) { frames[f] = virt_to_mfn(va); make_lowmem_page_readonly( (void *)va, XENFEAT_writable_descriptor_tables); } if (HYPERVISOR_set_gdt(frames, gdt_descr->size / 8)) BUG(); }
void __cpuinit cpu_gdt_init(…) -> for linux /arch/i386/kernels/cpu/common.c void __cpuinit cpu_set_gdt(int cpu) { struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); /* Reinit these anyway, even if they've already been done (on the boot CPU, this will transition from the boot gdt+pda to the real ones). */ load_gdt(cpu_gdt_descr); set_kernel_gs(); } #define load_gdt(dtr) __asm__ __volatile("lgdt %0"::"m" (*dtr)) LGDT, LIDT, LLDT, LTR, SGDT, SIDT, SLDT, STR. These instructions cause VM exits if the “descriptor-table exiting” VM-execution control is 1
Managing the VMCS • Before entering VMX operation, the host VMM allocates a VMXON region • A unique VMCS region is required for each virtual machine; a VMXON region is required for the VMM itself. • A VMM determines the VMCS region size by reading IA32_VMX_BASIC MSR • The address of the VMXON region for the VMM is provided as an operand to VMXON instruction • Once in VMX root operation, the VMM needs to prepare data fields in the VMCS that control the execution of a VM upon a VM entry • The VMM can make a VMCS the current VMCS by using the VMPTRLD instruction. • VMCS data fields must be read or written only through VMREAD and VMWRITE commands respectively • Every component of the VMCS is identified by a 32-bit encoding that is provided as an operand to VMREAD and VMWRITE • Software must maintain the VMCS structures in cache-coherent memory
Exercise 3: basics for an HVM driver • Determine whether the OS is running over Xen • Xen provides a standard string “XenVMMXenVMM” when calling CPUID with EAX = 40000000h • CPUID traps and Xen handles the VM Exit at vmexit.c:vmx_vmexit_handler • Response at traps.c:cpuid_hypervisor_leaves • Define the initial address for the hypercalls page • Call CPUID with EAX = 40000002h • You’ll receive the # of hypercalls to be supported, use it to allocate a block to store callbacks