| // SPDX-License-Identifier: GPL-2.0-only |
| /* |
| * svm_vmcall_test |
| * |
| * Copyright © 2021 Amazon.com, Inc. or its affiliates. |
| * |
| * Xen shared_info / pvclock testing |
| */ |
| |
| #include "test_util.h" |
| #include "kvm_util.h" |
| #include "processor.h" |
| |
| #include <stdint.h> |
| #include <time.h> |
| #include <sched.h> |
| #include <signal.h> |
| #include <pthread.h> |
| |
| #include <sys/eventfd.h> |
| |
| /* Defined in include/linux/kvm_types.h */ |
| #define GPA_INVALID (~(ulong)0) |
| |
| #define SHINFO_REGION_GVA 0xc0000000ULL |
| #define SHINFO_REGION_GPA 0xc0000000ULL |
| #define SHINFO_REGION_SLOT 10 |
| |
| #define DUMMY_REGION_GPA (SHINFO_REGION_GPA + (3 * PAGE_SIZE)) |
| #define DUMMY_REGION_SLOT 11 |
| |
| #define SHINFO_ADDR (SHINFO_REGION_GPA) |
| #define VCPU_INFO_ADDR (SHINFO_REGION_GPA + 0x40) |
| #define PVTIME_ADDR (SHINFO_REGION_GPA + PAGE_SIZE) |
| #define RUNSTATE_ADDR (SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE - 15) |
| |
| #define SHINFO_VADDR (SHINFO_REGION_GVA) |
| #define VCPU_INFO_VADDR (SHINFO_REGION_GVA + 0x40) |
| #define RUNSTATE_VADDR (SHINFO_REGION_GVA + PAGE_SIZE + PAGE_SIZE - 15) |
| |
| #define EVTCHN_VECTOR 0x10 |
| |
| #define EVTCHN_TEST1 15 |
| #define EVTCHN_TEST2 66 |
| #define EVTCHN_TIMER 13 |
| |
| #define XEN_HYPERCALL_MSR 0x40000000 |
| |
| #define MIN_STEAL_TIME 50000 |
| |
| #define SHINFO_RACE_TIMEOUT 2 /* seconds */ |
| |
| #define __HYPERVISOR_set_timer_op 15 |
| #define __HYPERVISOR_sched_op 29 |
| #define __HYPERVISOR_event_channel_op 32 |
| |
| #define SCHEDOP_poll 3 |
| |
| #define EVTCHNOP_send 4 |
| |
| #define EVTCHNSTAT_interdomain 2 |
| |
| struct evtchn_send { |
| u32 port; |
| }; |
| |
| struct sched_poll { |
| u32 *ports; |
| unsigned int nr_ports; |
| u64 timeout; |
| }; |
| |
| struct pvclock_vcpu_time_info { |
| u32 version; |
| u32 pad0; |
| u64 tsc_timestamp; |
| u64 system_time; |
| u32 tsc_to_system_mul; |
| s8 tsc_shift; |
| u8 flags; |
| u8 pad[2]; |
| } __attribute__((__packed__)); /* 32 bytes */ |
| |
| struct pvclock_wall_clock { |
| u32 version; |
| u32 sec; |
| u32 nsec; |
| } __attribute__((__packed__)); |
| |
| struct vcpu_runstate_info { |
| uint32_t state; |
| uint64_t state_entry_time; |
| uint64_t time[5]; /* Extra field for overrun check */ |
| }; |
| |
| struct compat_vcpu_runstate_info { |
| uint32_t state; |
| uint64_t state_entry_time; |
| uint64_t time[5]; |
| } __attribute__((__packed__));; |
| |
| struct arch_vcpu_info { |
| unsigned long cr2; |
| unsigned long pad; /* sizeof(vcpu_info_t) == 64 */ |
| }; |
| |
| struct vcpu_info { |
| uint8_t evtchn_upcall_pending; |
| uint8_t evtchn_upcall_mask; |
| unsigned long evtchn_pending_sel; |
| struct arch_vcpu_info arch; |
| struct pvclock_vcpu_time_info time; |
| }; /* 64 bytes (x86) */ |
| |
| struct shared_info { |
| struct vcpu_info vcpu_info[32]; |
| unsigned long evtchn_pending[64]; |
| unsigned long evtchn_mask[64]; |
| struct pvclock_wall_clock wc; |
| uint32_t wc_sec_hi; |
| /* arch_shared_info here */ |
| }; |
| |
| #define RUNSTATE_running 0 |
| #define RUNSTATE_runnable 1 |
| #define RUNSTATE_blocked 2 |
| #define RUNSTATE_offline 3 |
| |
| static const char *runstate_names[] = { |
| "running", |
| "runnable", |
| "blocked", |
| "offline" |
| }; |
| |
| struct { |
| struct kvm_irq_routing info; |
| struct kvm_irq_routing_entry entries[2]; |
| } irq_routes; |
| |
| static volatile bool guest_saw_irq; |
| |
| static void evtchn_handler(struct ex_regs *regs) |
| { |
| struct vcpu_info *vi = (void *)VCPU_INFO_VADDR; |
| vi->evtchn_upcall_pending = 0; |
| vi->evtchn_pending_sel = 0; |
| guest_saw_irq = true; |
| |
| GUEST_SYNC(0x20); |
| } |
| |
| static void guest_wait_for_irq(void) |
| { |
| while (!guest_saw_irq) |
| __asm__ __volatile__ ("rep nop" : : : "memory"); |
| guest_saw_irq = false; |
| } |
| |
| static void guest_code(void) |
| { |
| struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR; |
| int i; |
| |
| __asm__ __volatile__( |
| "sti\n" |
| "nop\n" |
| ); |
| |
| /* Trigger an interrupt injection */ |
| GUEST_SYNC(0); |
| |
| guest_wait_for_irq(); |
| |
| /* Test having the host set runstates manually */ |
| GUEST_SYNC(RUNSTATE_runnable); |
| GUEST_ASSERT(rs->time[RUNSTATE_runnable] != 0); |
| GUEST_ASSERT(rs->state == 0); |
| |
| GUEST_SYNC(RUNSTATE_blocked); |
| GUEST_ASSERT(rs->time[RUNSTATE_blocked] != 0); |
| GUEST_ASSERT(rs->state == 0); |
| |
| GUEST_SYNC(RUNSTATE_offline); |
| GUEST_ASSERT(rs->time[RUNSTATE_offline] != 0); |
| GUEST_ASSERT(rs->state == 0); |
| |
| /* Test runstate time adjust */ |
| GUEST_SYNC(4); |
| GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x5a); |
| GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x6b6b); |
| |
| /* Test runstate time set */ |
| GUEST_SYNC(5); |
| GUEST_ASSERT(rs->state_entry_time >= 0x8000); |
| GUEST_ASSERT(rs->time[RUNSTATE_runnable] == 0); |
| GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x6b6b); |
| GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x5a); |
| |
| /* sched_yield() should result in some 'runnable' time */ |
| GUEST_SYNC(6); |
| GUEST_ASSERT(rs->time[RUNSTATE_runnable] >= MIN_STEAL_TIME); |
| |
| /* Attempt to deliver a *masked* interrupt */ |
| GUEST_SYNC(7); |
| |
| /* Wait until we see the bit set */ |
| struct shared_info *si = (void *)SHINFO_VADDR; |
| while (!si->evtchn_pending[0]) |
| __asm__ __volatile__ ("rep nop" : : : "memory"); |
| |
| /* Now deliver an *unmasked* interrupt */ |
| GUEST_SYNC(8); |
| |
| guest_wait_for_irq(); |
| |
| /* Change memslots and deliver an interrupt */ |
| GUEST_SYNC(9); |
| |
| guest_wait_for_irq(); |
| |
| /* Deliver event channel with KVM_XEN_HVM_EVTCHN_SEND */ |
| GUEST_SYNC(10); |
| |
| guest_wait_for_irq(); |
| |
| GUEST_SYNC(11); |
| |
| /* Our turn. Deliver event channel (to ourselves) with |
| * EVTCHNOP_send hypercall. */ |
| unsigned long rax; |
| struct evtchn_send s = { .port = 127 }; |
| __asm__ __volatile__ ("vmcall" : |
| "=a" (rax) : |
| "a" (__HYPERVISOR_event_channel_op), |
| "D" (EVTCHNOP_send), |
| "S" (&s)); |
| |
| GUEST_ASSERT(rax == 0); |
| |
| guest_wait_for_irq(); |
| |
| GUEST_SYNC(12); |
| |
| /* Deliver "outbound" event channel to an eventfd which |
| * happens to be one of our own irqfds. */ |
| s.port = 197; |
| __asm__ __volatile__ ("vmcall" : |
| "=a" (rax) : |
| "a" (__HYPERVISOR_event_channel_op), |
| "D" (EVTCHNOP_send), |
| "S" (&s)); |
| |
| GUEST_ASSERT(rax == 0); |
| |
| guest_wait_for_irq(); |
| |
| GUEST_SYNC(13); |
| |
| /* Set a timer 100ms in the future. */ |
| __asm__ __volatile__ ("vmcall" : |
| "=a" (rax) : |
| "a" (__HYPERVISOR_set_timer_op), |
| "D" (rs->state_entry_time + 100000000)); |
| GUEST_ASSERT(rax == 0); |
| |
| GUEST_SYNC(14); |
| |
| /* Now wait for the timer */ |
| guest_wait_for_irq(); |
| |
| GUEST_SYNC(15); |
| |
| /* The host has 'restored' the timer. Just wait for it. */ |
| guest_wait_for_irq(); |
| |
| GUEST_SYNC(16); |
| |
| /* Poll for an event channel port which is already set */ |
| u32 ports[1] = { EVTCHN_TIMER }; |
| struct sched_poll p = { |
| .ports = ports, |
| .nr_ports = 1, |
| .timeout = 0, |
| }; |
| |
| __asm__ __volatile__ ("vmcall" : |
| "=a" (rax) : |
| "a" (__HYPERVISOR_sched_op), |
| "D" (SCHEDOP_poll), |
| "S" (&p)); |
| |
| GUEST_ASSERT(rax == 0); |
| |
| GUEST_SYNC(17); |
| |
| /* Poll for an unset port and wait for the timeout. */ |
| p.timeout = 100000000; |
| __asm__ __volatile__ ("vmcall" : |
| "=a" (rax) : |
| "a" (__HYPERVISOR_sched_op), |
| "D" (SCHEDOP_poll), |
| "S" (&p)); |
| |
| GUEST_ASSERT(rax == 0); |
| |
| GUEST_SYNC(18); |
| |
| /* A timer will wake the masked port we're waiting on, while we poll */ |
| p.timeout = 0; |
| __asm__ __volatile__ ("vmcall" : |
| "=a" (rax) : |
| "a" (__HYPERVISOR_sched_op), |
| "D" (SCHEDOP_poll), |
| "S" (&p)); |
| |
| GUEST_ASSERT(rax == 0); |
| |
| GUEST_SYNC(19); |
| |
| /* A timer wake an *unmasked* port which should wake us with an |
| * actual interrupt, while we're polling on a different port. */ |
| ports[0]++; |
| p.timeout = 0; |
| __asm__ __volatile__ ("vmcall" : |
| "=a" (rax) : |
| "a" (__HYPERVISOR_sched_op), |
| "D" (SCHEDOP_poll), |
| "S" (&p)); |
| |
| GUEST_ASSERT(rax == 0); |
| |
| guest_wait_for_irq(); |
| |
| GUEST_SYNC(20); |
| |
| /* Timer should have fired already */ |
| guest_wait_for_irq(); |
| |
| GUEST_SYNC(21); |
| /* Racing host ioctls */ |
| |
| guest_wait_for_irq(); |
| |
| GUEST_SYNC(22); |
| /* Racing vmcall against host ioctl */ |
| |
| ports[0] = 0; |
| |
| p = (struct sched_poll) { |
| .ports = ports, |
| .nr_ports = 1, |
| .timeout = 0 |
| }; |
| |
| wait_for_timer: |
| /* |
| * Poll for a timer wake event while the worker thread is mucking with |
| * the shared info. KVM XEN drops timer IRQs if the shared info is |
| * invalid when the timer expires. Arbitrarily poll 100 times before |
| * giving up and asking the VMM to re-arm the timer. 100 polls should |
| * consume enough time to beat on KVM without taking too long if the |
| * timer IRQ is dropped due to an invalid event channel. |
| */ |
| for (i = 0; i < 100 && !guest_saw_irq; i++) |
| asm volatile("vmcall" |
| : "=a" (rax) |
| : "a" (__HYPERVISOR_sched_op), |
| "D" (SCHEDOP_poll), |
| "S" (&p) |
| : "memory"); |
| |
| /* |
| * Re-send the timer IRQ if it was (likely) dropped due to the timer |
| * expiring while the event channel was invalid. |
| */ |
| if (!guest_saw_irq) { |
| GUEST_SYNC(23); |
| goto wait_for_timer; |
| } |
| guest_saw_irq = false; |
| |
| GUEST_SYNC(24); |
| } |
| |
| static int cmp_timespec(struct timespec *a, struct timespec *b) |
| { |
| if (a->tv_sec > b->tv_sec) |
| return 1; |
| else if (a->tv_sec < b->tv_sec) |
| return -1; |
| else if (a->tv_nsec > b->tv_nsec) |
| return 1; |
| else if (a->tv_nsec < b->tv_nsec) |
| return -1; |
| else |
| return 0; |
| } |
| |
| static struct vcpu_info *vinfo; |
| static struct kvm_vcpu *vcpu; |
| |
| static void handle_alrm(int sig) |
| { |
| if (vinfo) |
| printf("evtchn_upcall_pending 0x%x\n", vinfo->evtchn_upcall_pending); |
| vcpu_dump(stdout, vcpu, 0); |
| TEST_FAIL("IRQ delivery timed out"); |
| } |
| |
| static void *juggle_shinfo_state(void *arg) |
| { |
| struct kvm_vm *vm = (struct kvm_vm *)arg; |
| |
| struct kvm_xen_hvm_attr cache_init = { |
| .type = KVM_XEN_ATTR_TYPE_SHARED_INFO, |
| .u.shared_info.gfn = SHINFO_REGION_GPA / PAGE_SIZE |
| }; |
| |
| struct kvm_xen_hvm_attr cache_destroy = { |
| .type = KVM_XEN_ATTR_TYPE_SHARED_INFO, |
| .u.shared_info.gfn = GPA_INVALID |
| }; |
| |
| for (;;) { |
| __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_init); |
| __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_destroy); |
| pthread_testcancel(); |
| }; |
| |
| return NULL; |
| } |
| |
| int main(int argc, char *argv[]) |
| { |
| struct timespec min_ts, max_ts, vm_ts; |
| struct kvm_vm *vm; |
| pthread_t thread; |
| bool verbose; |
| int ret; |
| |
| verbose = argc > 1 && (!strncmp(argv[1], "-v", 3) || |
| !strncmp(argv[1], "--verbose", 10)); |
| |
| int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM); |
| TEST_REQUIRE(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO); |
| |
| bool do_runstate_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE); |
| bool do_runstate_flag = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG); |
| bool do_eventfd_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL); |
| bool do_evtchn_tests = do_eventfd_tests && !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND); |
| |
| clock_gettime(CLOCK_REALTIME, &min_ts); |
| |
| vm = vm_create_with_one_vcpu(&vcpu, guest_code); |
| |
| /* Map a region for the shared_info page */ |
| vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, |
| SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 3, 0); |
| virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 3); |
| |
| struct shared_info *shinfo = addr_gpa2hva(vm, SHINFO_VADDR); |
| |
| int zero_fd = open("/dev/zero", O_RDONLY); |
| TEST_ASSERT(zero_fd != -1, "Failed to open /dev/zero"); |
| |
| struct kvm_xen_hvm_config hvmc = { |
| .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL, |
| .msr = XEN_HYPERCALL_MSR, |
| }; |
| |
| /* Let the kernel know that we *will* use it for sending all |
| * event channels, which lets it intercept SCHEDOP_poll */ |
| if (do_evtchn_tests) |
| hvmc.flags |= KVM_XEN_HVM_CONFIG_EVTCHN_SEND; |
| |
| vm_ioctl(vm, KVM_XEN_HVM_CONFIG, &hvmc); |
| |
| struct kvm_xen_hvm_attr lm = { |
| .type = KVM_XEN_ATTR_TYPE_LONG_MODE, |
| .u.long_mode = 1, |
| }; |
| vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm); |
| |
| if (do_runstate_flag) { |
| struct kvm_xen_hvm_attr ruf = { |
| .type = KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG, |
| .u.runstate_update_flag = 1, |
| }; |
| vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ruf); |
| |
| ruf.u.runstate_update_flag = 0; |
| vm_ioctl(vm, KVM_XEN_HVM_GET_ATTR, &ruf); |
| TEST_ASSERT(ruf.u.runstate_update_flag == 1, |
| "Failed to read back RUNSTATE_UPDATE_FLAG attr"); |
| } |
| |
| struct kvm_xen_hvm_attr ha = { |
| .type = KVM_XEN_ATTR_TYPE_SHARED_INFO, |
| .u.shared_info.gfn = SHINFO_REGION_GPA / PAGE_SIZE, |
| }; |
| vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ha); |
| |
| /* |
| * Test what happens when the HVA of the shinfo page is remapped after |
| * the kernel has a reference to it. But make sure we copy the clock |
| * info over since that's only set at setup time, and we test it later. |
| */ |
| struct pvclock_wall_clock wc_copy = shinfo->wc; |
| void *m = mmap(shinfo, PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_PRIVATE, zero_fd, 0); |
| TEST_ASSERT(m == shinfo, "Failed to map /dev/zero over shared info"); |
| shinfo->wc = wc_copy; |
| |
| struct kvm_xen_vcpu_attr vi = { |
| .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO, |
| .u.gpa = VCPU_INFO_ADDR, |
| }; |
| vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &vi); |
| |
| struct kvm_xen_vcpu_attr pvclock = { |
| .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO, |
| .u.gpa = PVTIME_ADDR, |
| }; |
| vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &pvclock); |
| |
| struct kvm_xen_hvm_attr vec = { |
| .type = KVM_XEN_ATTR_TYPE_UPCALL_VECTOR, |
| .u.vector = EVTCHN_VECTOR, |
| }; |
| vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &vec); |
| |
| vm_init_descriptor_tables(vm); |
| vcpu_init_descriptor_tables(vcpu); |
| vm_install_exception_handler(vm, EVTCHN_VECTOR, evtchn_handler); |
| |
| if (do_runstate_tests) { |
| struct kvm_xen_vcpu_attr st = { |
| .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR, |
| .u.gpa = RUNSTATE_ADDR, |
| }; |
| vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &st); |
| } |
| |
| int irq_fd[2] = { -1, -1 }; |
| |
| if (do_eventfd_tests) { |
| irq_fd[0] = eventfd(0, 0); |
| irq_fd[1] = eventfd(0, 0); |
| |
| /* Unexpected, but not a KVM failure */ |
| if (irq_fd[0] == -1 || irq_fd[1] == -1) |
| do_evtchn_tests = do_eventfd_tests = false; |
| } |
| |
| if (do_eventfd_tests) { |
| irq_routes.info.nr = 2; |
| |
| irq_routes.entries[0].gsi = 32; |
| irq_routes.entries[0].type = KVM_IRQ_ROUTING_XEN_EVTCHN; |
| irq_routes.entries[0].u.xen_evtchn.port = EVTCHN_TEST1; |
| irq_routes.entries[0].u.xen_evtchn.vcpu = vcpu->id; |
| irq_routes.entries[0].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL; |
| |
| irq_routes.entries[1].gsi = 33; |
| irq_routes.entries[1].type = KVM_IRQ_ROUTING_XEN_EVTCHN; |
| irq_routes.entries[1].u.xen_evtchn.port = EVTCHN_TEST2; |
| irq_routes.entries[1].u.xen_evtchn.vcpu = vcpu->id; |
| irq_routes.entries[1].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL; |
| |
| vm_ioctl(vm, KVM_SET_GSI_ROUTING, &irq_routes.info); |
| |
| struct kvm_irqfd ifd = { }; |
| |
| ifd.fd = irq_fd[0]; |
| ifd.gsi = 32; |
| vm_ioctl(vm, KVM_IRQFD, &ifd); |
| |
| ifd.fd = irq_fd[1]; |
| ifd.gsi = 33; |
| vm_ioctl(vm, KVM_IRQFD, &ifd); |
| |
| struct sigaction sa = { }; |
| sa.sa_handler = handle_alrm; |
| sigaction(SIGALRM, &sa, NULL); |
| } |
| |
| struct kvm_xen_vcpu_attr tmr = { |
| .type = KVM_XEN_VCPU_ATTR_TYPE_TIMER, |
| .u.timer.port = EVTCHN_TIMER, |
| .u.timer.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL, |
| .u.timer.expires_ns = 0 |
| }; |
| |
| if (do_evtchn_tests) { |
| struct kvm_xen_hvm_attr inj = { |
| .type = KVM_XEN_ATTR_TYPE_EVTCHN, |
| .u.evtchn.send_port = 127, |
| .u.evtchn.type = EVTCHNSTAT_interdomain, |
| .u.evtchn.flags = 0, |
| .u.evtchn.deliver.port.port = EVTCHN_TEST1, |
| .u.evtchn.deliver.port.vcpu = vcpu->id + 1, |
| .u.evtchn.deliver.port.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL, |
| }; |
| vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj); |
| |
| /* Test migration to a different vCPU */ |
| inj.u.evtchn.flags = KVM_XEN_EVTCHN_UPDATE; |
| inj.u.evtchn.deliver.port.vcpu = vcpu->id; |
| vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj); |
| |
| inj.u.evtchn.send_port = 197; |
| inj.u.evtchn.deliver.eventfd.port = 0; |
| inj.u.evtchn.deliver.eventfd.fd = irq_fd[1]; |
| inj.u.evtchn.flags = 0; |
| vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj); |
| |
| vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); |
| } |
| vinfo = addr_gpa2hva(vm, VCPU_INFO_VADDR); |
| vinfo->evtchn_upcall_pending = 0; |
| |
| struct vcpu_runstate_info *rs = addr_gpa2hva(vm, RUNSTATE_ADDR); |
| rs->state = 0x5a; |
| |
| bool evtchn_irq_expected = false; |
| |
| for (;;) { |
| volatile struct kvm_run *run = vcpu->run; |
| struct ucall uc; |
| |
| vcpu_run(vcpu); |
| |
| TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, |
| "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n", |
| run->exit_reason, |
| exit_reason_str(run->exit_reason)); |
| |
| switch (get_ucall(vcpu, &uc)) { |
| case UCALL_ABORT: |
| REPORT_GUEST_ASSERT(uc); |
| /* NOT REACHED */ |
| case UCALL_SYNC: { |
| struct kvm_xen_vcpu_attr rst; |
| long rundelay; |
| |
| if (do_runstate_tests) |
| TEST_ASSERT(rs->state_entry_time == rs->time[0] + |
| rs->time[1] + rs->time[2] + rs->time[3], |
| "runstate times don't add up"); |
| |
| switch (uc.args[1]) { |
| case 0: |
| if (verbose) |
| printf("Delivering evtchn upcall\n"); |
| evtchn_irq_expected = true; |
| vinfo->evtchn_upcall_pending = 1; |
| break; |
| |
| case RUNSTATE_runnable...RUNSTATE_offline: |
| TEST_ASSERT(!evtchn_irq_expected, "Event channel IRQ not seen"); |
| if (!do_runstate_tests) |
| goto done; |
| if (verbose) |
| printf("Testing runstate %s\n", runstate_names[uc.args[1]]); |
| rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT; |
| rst.u.runstate.state = uc.args[1]; |
| vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst); |
| break; |
| |
| case 4: |
| if (verbose) |
| printf("Testing RUNSTATE_ADJUST\n"); |
| rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST; |
| memset(&rst.u, 0, sizeof(rst.u)); |
| rst.u.runstate.state = (uint64_t)-1; |
| rst.u.runstate.time_blocked = |
| 0x5a - rs->time[RUNSTATE_blocked]; |
| rst.u.runstate.time_offline = |
| 0x6b6b - rs->time[RUNSTATE_offline]; |
| rst.u.runstate.time_runnable = -rst.u.runstate.time_blocked - |
| rst.u.runstate.time_offline; |
| vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst); |
| break; |
| |
| case 5: |
| if (verbose) |
| printf("Testing RUNSTATE_DATA\n"); |
| rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA; |
| memset(&rst.u, 0, sizeof(rst.u)); |
| rst.u.runstate.state = RUNSTATE_running; |
| rst.u.runstate.state_entry_time = 0x6b6b + 0x5a; |
| rst.u.runstate.time_blocked = 0x6b6b; |
| rst.u.runstate.time_offline = 0x5a; |
| vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst); |
| break; |
| |
| case 6: |
| if (verbose) |
| printf("Testing steal time\n"); |
| /* Yield until scheduler delay exceeds target */ |
| rundelay = get_run_delay() + MIN_STEAL_TIME; |
| do { |
| sched_yield(); |
| } while (get_run_delay() < rundelay); |
| break; |
| |
| case 7: |
| if (!do_eventfd_tests) |
| goto done; |
| if (verbose) |
| printf("Testing masked event channel\n"); |
| shinfo->evtchn_mask[0] = 1UL << EVTCHN_TEST1; |
| eventfd_write(irq_fd[0], 1UL); |
| alarm(1); |
| break; |
| |
| case 8: |
| if (verbose) |
| printf("Testing unmasked event channel\n"); |
| /* Unmask that, but deliver the other one */ |
| shinfo->evtchn_pending[0] = 0; |
| shinfo->evtchn_mask[0] = 0; |
| eventfd_write(irq_fd[1], 1UL); |
| evtchn_irq_expected = true; |
| alarm(1); |
| break; |
| |
| case 9: |
| TEST_ASSERT(!evtchn_irq_expected, |
| "Expected event channel IRQ but it didn't happen"); |
| shinfo->evtchn_pending[1] = 0; |
| if (verbose) |
| printf("Testing event channel after memslot change\n"); |
| vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, |
| DUMMY_REGION_GPA, DUMMY_REGION_SLOT, 1, 0); |
| eventfd_write(irq_fd[0], 1UL); |
| evtchn_irq_expected = true; |
| alarm(1); |
| break; |
| |
| case 10: |
| TEST_ASSERT(!evtchn_irq_expected, |
| "Expected event channel IRQ but it didn't happen"); |
| if (!do_evtchn_tests) |
| goto done; |
| |
| shinfo->evtchn_pending[0] = 0; |
| if (verbose) |
| printf("Testing injection with KVM_XEN_HVM_EVTCHN_SEND\n"); |
| |
| struct kvm_irq_routing_xen_evtchn e; |
| e.port = EVTCHN_TEST2; |
| e.vcpu = vcpu->id; |
| e.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL; |
| |
| vm_ioctl(vm, KVM_XEN_HVM_EVTCHN_SEND, &e); |
| evtchn_irq_expected = true; |
| alarm(1); |
| break; |
| |
| case 11: |
| TEST_ASSERT(!evtchn_irq_expected, |
| "Expected event channel IRQ but it didn't happen"); |
| shinfo->evtchn_pending[1] = 0; |
| |
| if (verbose) |
| printf("Testing guest EVTCHNOP_send direct to evtchn\n"); |
| evtchn_irq_expected = true; |
| alarm(1); |
| break; |
| |
| case 12: |
| TEST_ASSERT(!evtchn_irq_expected, |
| "Expected event channel IRQ but it didn't happen"); |
| shinfo->evtchn_pending[0] = 0; |
| |
| if (verbose) |
| printf("Testing guest EVTCHNOP_send to eventfd\n"); |
| evtchn_irq_expected = true; |
| alarm(1); |
| break; |
| |
| case 13: |
| TEST_ASSERT(!evtchn_irq_expected, |
| "Expected event channel IRQ but it didn't happen"); |
| shinfo->evtchn_pending[1] = 0; |
| |
| if (verbose) |
| printf("Testing guest oneshot timer\n"); |
| break; |
| |
| case 14: |
| memset(&tmr, 0, sizeof(tmr)); |
| tmr.type = KVM_XEN_VCPU_ATTR_TYPE_TIMER; |
| vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr); |
| TEST_ASSERT(tmr.u.timer.port == EVTCHN_TIMER, |
| "Timer port not returned"); |
| TEST_ASSERT(tmr.u.timer.priority == KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL, |
| "Timer priority not returned"); |
| TEST_ASSERT(tmr.u.timer.expires_ns > rs->state_entry_time, |
| "Timer expiry not returned"); |
| evtchn_irq_expected = true; |
| alarm(1); |
| break; |
| |
| case 15: |
| TEST_ASSERT(!evtchn_irq_expected, |
| "Expected event channel IRQ but it didn't happen"); |
| shinfo->evtchn_pending[0] = 0; |
| |
| if (verbose) |
| printf("Testing restored oneshot timer\n"); |
| |
| tmr.u.timer.expires_ns = rs->state_entry_time + 100000000; |
| vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); |
| evtchn_irq_expected = true; |
| alarm(1); |
| break; |
| |
| case 16: |
| TEST_ASSERT(!evtchn_irq_expected, |
| "Expected event channel IRQ but it didn't happen"); |
| |
| if (verbose) |
| printf("Testing SCHEDOP_poll with already pending event\n"); |
| shinfo->evtchn_pending[0] = shinfo->evtchn_mask[0] = 1UL << EVTCHN_TIMER; |
| alarm(1); |
| break; |
| |
| case 17: |
| if (verbose) |
| printf("Testing SCHEDOP_poll timeout\n"); |
| shinfo->evtchn_pending[0] = 0; |
| alarm(1); |
| break; |
| |
| case 18: |
| if (verbose) |
| printf("Testing SCHEDOP_poll wake on masked event\n"); |
| |
| tmr.u.timer.expires_ns = rs->state_entry_time + 100000000; |
| vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); |
| alarm(1); |
| break; |
| |
| case 19: |
| shinfo->evtchn_pending[0] = shinfo->evtchn_mask[0] = 0; |
| if (verbose) |
| printf("Testing SCHEDOP_poll wake on unmasked event\n"); |
| |
| evtchn_irq_expected = true; |
| tmr.u.timer.expires_ns = rs->state_entry_time + 100000000; |
| vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); |
| |
| /* Read it back and check the pending time is reported correctly */ |
| tmr.u.timer.expires_ns = 0; |
| vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr); |
| TEST_ASSERT(tmr.u.timer.expires_ns == rs->state_entry_time + 100000000, |
| "Timer not reported pending"); |
| alarm(1); |
| break; |
| |
| case 20: |
| TEST_ASSERT(!evtchn_irq_expected, |
| "Expected event channel IRQ but it didn't happen"); |
| /* Read timer and check it is no longer pending */ |
| vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr); |
| TEST_ASSERT(!tmr.u.timer.expires_ns, "Timer still reported pending"); |
| |
| shinfo->evtchn_pending[0] = 0; |
| if (verbose) |
| printf("Testing timer in the past\n"); |
| |
| evtchn_irq_expected = true; |
| tmr.u.timer.expires_ns = rs->state_entry_time - 100000000ULL; |
| vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); |
| alarm(1); |
| break; |
| |
| case 21: |
| TEST_ASSERT(!evtchn_irq_expected, |
| "Expected event channel IRQ but it didn't happen"); |
| alarm(0); |
| |
| if (verbose) |
| printf("Testing shinfo lock corruption (KVM_XEN_HVM_EVTCHN_SEND)\n"); |
| |
| ret = pthread_create(&thread, NULL, &juggle_shinfo_state, (void *)vm); |
| TEST_ASSERT(ret == 0, "pthread_create() failed: %s", strerror(ret)); |
| |
| struct kvm_irq_routing_xen_evtchn uxe = { |
| .port = 1, |
| .vcpu = vcpu->id, |
| .priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL |
| }; |
| |
| evtchn_irq_expected = true; |
| for (time_t t = time(NULL) + SHINFO_RACE_TIMEOUT; time(NULL) < t;) |
| __vm_ioctl(vm, KVM_XEN_HVM_EVTCHN_SEND, &uxe); |
| break; |
| |
| case 22: |
| TEST_ASSERT(!evtchn_irq_expected, |
| "Expected event channel IRQ but it didn't happen"); |
| |
| if (verbose) |
| printf("Testing shinfo lock corruption (SCHEDOP_poll)\n"); |
| |
| shinfo->evtchn_pending[0] = 1; |
| |
| evtchn_irq_expected = true; |
| tmr.u.timer.expires_ns = rs->state_entry_time + |
| SHINFO_RACE_TIMEOUT * 1000000000ULL; |
| vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); |
| break; |
| |
| case 23: |
| /* |
| * Optional and possibly repeated sync point. |
| * Injecting the timer IRQ may fail if the |
| * shinfo is invalid when the timer expires. |
| * If the timer has expired but the IRQ hasn't |
| * been delivered, rearm the timer and retry. |
| */ |
| vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr); |
| |
| /* Resume the guest if the timer is still pending. */ |
| if (tmr.u.timer.expires_ns) |
| break; |
| |
| /* All done if the IRQ was delivered. */ |
| if (!evtchn_irq_expected) |
| break; |
| |
| tmr.u.timer.expires_ns = rs->state_entry_time + |
| SHINFO_RACE_TIMEOUT * 1000000000ULL; |
| vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); |
| break; |
| case 24: |
| TEST_ASSERT(!evtchn_irq_expected, |
| "Expected event channel IRQ but it didn't happen"); |
| |
| ret = pthread_cancel(thread); |
| TEST_ASSERT(ret == 0, "pthread_cancel() failed: %s", strerror(ret)); |
| |
| ret = pthread_join(thread, 0); |
| TEST_ASSERT(ret == 0, "pthread_join() failed: %s", strerror(ret)); |
| goto done; |
| |
| case 0x20: |
| TEST_ASSERT(evtchn_irq_expected, "Unexpected event channel IRQ"); |
| evtchn_irq_expected = false; |
| break; |
| } |
| break; |
| } |
| case UCALL_DONE: |
| goto done; |
| default: |
| TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd); |
| } |
| } |
| |
| done: |
| alarm(0); |
| clock_gettime(CLOCK_REALTIME, &max_ts); |
| |
| /* |
| * Just a *really* basic check that things are being put in the |
| * right place. The actual calculations are much the same for |
| * Xen as they are for the KVM variants, so no need to check. |
| */ |
| struct pvclock_wall_clock *wc; |
| struct pvclock_vcpu_time_info *ti, *ti2; |
| |
| wc = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0xc00); |
| ti = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0x40 + 0x20); |
| ti2 = addr_gpa2hva(vm, PVTIME_ADDR); |
| |
| if (verbose) { |
| printf("Wall clock (v %d) %d.%09d\n", wc->version, wc->sec, wc->nsec); |
| printf("Time info 1: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n", |
| ti->version, ti->tsc_timestamp, ti->system_time, ti->tsc_to_system_mul, |
| ti->tsc_shift, ti->flags); |
| printf("Time info 2: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n", |
| ti2->version, ti2->tsc_timestamp, ti2->system_time, ti2->tsc_to_system_mul, |
| ti2->tsc_shift, ti2->flags); |
| } |
| |
| vm_ts.tv_sec = wc->sec; |
| vm_ts.tv_nsec = wc->nsec; |
| TEST_ASSERT(wc->version && !(wc->version & 1), |
| "Bad wallclock version %x", wc->version); |
| TEST_ASSERT(cmp_timespec(&min_ts, &vm_ts) <= 0, "VM time too old"); |
| TEST_ASSERT(cmp_timespec(&max_ts, &vm_ts) >= 0, "VM time too new"); |
| |
| TEST_ASSERT(ti->version && !(ti->version & 1), |
| "Bad time_info version %x", ti->version); |
| TEST_ASSERT(ti2->version && !(ti2->version & 1), |
| "Bad time_info version %x", ti->version); |
| |
| if (do_runstate_tests) { |
| /* |
| * Fetch runstate and check sanity. Strictly speaking in the |
| * general case we might not expect the numbers to be identical |
| * but in this case we know we aren't running the vCPU any more. |
| */ |
| struct kvm_xen_vcpu_attr rst = { |
| .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA, |
| }; |
| vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &rst); |
| |
| if (verbose) { |
| printf("Runstate: %s(%d), entry %" PRIu64 " ns\n", |
| rs->state <= RUNSTATE_offline ? runstate_names[rs->state] : "unknown", |
| rs->state, rs->state_entry_time); |
| for (int i = RUNSTATE_running; i <= RUNSTATE_offline; i++) { |
| printf("State %s: %" PRIu64 " ns\n", |
| runstate_names[i], rs->time[i]); |
| } |
| } |
| |
| /* |
| * Exercise runstate info at all points across the page boundary, in |
| * 32-bit and 64-bit mode. In particular, test the case where it is |
| * configured in 32-bit mode and then switched to 64-bit mode while |
| * active, which takes it onto the second page. |
| */ |
| unsigned long runstate_addr; |
| struct compat_vcpu_runstate_info *crs; |
| for (runstate_addr = SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE - sizeof(*rs) - 4; |
| runstate_addr < SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE + 4; runstate_addr++) { |
| |
| rs = addr_gpa2hva(vm, runstate_addr); |
| crs = (void *)rs; |
| |
| memset(rs, 0xa5, sizeof(*rs)); |
| |
| /* Set to compatibility mode */ |
| lm.u.long_mode = 0; |
| vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm); |
| |
| /* Set runstate to new address (kernel will write it) */ |
| struct kvm_xen_vcpu_attr st = { |
| .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR, |
| .u.gpa = runstate_addr, |
| }; |
| vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &st); |
| |
| if (verbose) |
| printf("Compatibility runstate at %08lx\n", runstate_addr); |
| |
| TEST_ASSERT(crs->state == rst.u.runstate.state, "Runstate mismatch"); |
| TEST_ASSERT(crs->state_entry_time == rst.u.runstate.state_entry_time, |
| "State entry time mismatch"); |
| TEST_ASSERT(crs->time[RUNSTATE_running] == rst.u.runstate.time_running, |
| "Running time mismatch"); |
| TEST_ASSERT(crs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable, |
| "Runnable time mismatch"); |
| TEST_ASSERT(crs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked, |
| "Blocked time mismatch"); |
| TEST_ASSERT(crs->time[RUNSTATE_offline] == rst.u.runstate.time_offline, |
| "Offline time mismatch"); |
| TEST_ASSERT(crs->time[RUNSTATE_offline + 1] == 0xa5a5a5a5a5a5a5a5ULL, |
| "Structure overrun"); |
| TEST_ASSERT(crs->state_entry_time == crs->time[0] + |
| crs->time[1] + crs->time[2] + crs->time[3], |
| "runstate times don't add up"); |
| |
| |
| /* Now switch to 64-bit mode */ |
| lm.u.long_mode = 1; |
| vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm); |
| |
| memset(rs, 0xa5, sizeof(*rs)); |
| |
| /* Don't change the address, just trigger a write */ |
| struct kvm_xen_vcpu_attr adj = { |
| .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST, |
| .u.runstate.state = (uint64_t)-1 |
| }; |
| vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &adj); |
| |
| if (verbose) |
| printf("64-bit runstate at %08lx\n", runstate_addr); |
| |
| TEST_ASSERT(rs->state == rst.u.runstate.state, "Runstate mismatch"); |
| TEST_ASSERT(rs->state_entry_time == rst.u.runstate.state_entry_time, |
| "State entry time mismatch"); |
| TEST_ASSERT(rs->time[RUNSTATE_running] == rst.u.runstate.time_running, |
| "Running time mismatch"); |
| TEST_ASSERT(rs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable, |
| "Runnable time mismatch"); |
| TEST_ASSERT(rs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked, |
| "Blocked time mismatch"); |
| TEST_ASSERT(rs->time[RUNSTATE_offline] == rst.u.runstate.time_offline, |
| "Offline time mismatch"); |
| TEST_ASSERT(rs->time[RUNSTATE_offline + 1] == 0xa5a5a5a5a5a5a5a5ULL, |
| "Structure overrun"); |
| |
| TEST_ASSERT(rs->state_entry_time == rs->time[0] + |
| rs->time[1] + rs->time[2] + rs->time[3], |
| "runstate times don't add up"); |
| } |
| } |
| |
| kvm_vm_free(vm); |
| return 0; |
| } |