ARM fixes:

* Another state update on exit to userspace fix
 
 * Prevent the creation of mixed 32/64 VMs
 
 * Fix regression with irqbypass not restarting the guest on failed connect
 
 * Fix regression with debug register decoding resulting in overlapping access
 
 * Commit exception state on exit to usrspace
 
 * Fix the MMU notifier return values
 
 * Add missing 'static' qualifiers in the new host stage-2 code
 
 x86 fixes:
 * fix guest missed wakeup with assigned devices
 
 * fix WARN reported by syzkaller
 
 * do not use BIT() in UAPI headers
 
 * make the kvm_amd.avic parameter bool
 
 PPC fixes:
 * make halt polling heuristics consistent with other architectures
 
 selftests:
 * various fixes
 
 * new performance selftest memslot_perf_test
 
 * test UFFD minor faults in demand_paging_test
 -----BEGIN PGP SIGNATURE-----
 
 iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmCyF0MUHHBib256aW5p
 QHJlZGhhdC5jb20ACgkQv/vSX3jHroOHSgf/Q4Hm5e12Bj2xJy6A+iShnrbbT8PW
 hcIIOA7zGWXfjVYcBV7anbj7CcpzfIz0otcRBABa5mkhj+fb3YmPEb0EzCPi4Hru
 zxpcpB2w7W7WtUOIKe2EmaT+4Pk6/iLcfr8UMHMqx460akE9OmIg10QNWai3My/3
 RIOeakSckBI9e/1TQZbxH66dsLwCT0lLco7i7AWHdFxkzUQyoA34HX5pczOCBsO5
 3nXH+/txnRVhqlcyzWLVVGVzFqmpHtBqkIInDOXfUqIoxo/gOhOgF1QdMUEKomxn
 5ZFXlL5IXNtr+7yiI67iHX7CWkGZE9oJ04TgPHn6LR6wRnVvc3JInzcB5Q==
 =ollO
 -----END PGP SIGNATURE-----

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM fixes from Paolo Bonzini:
 "ARM fixes:

   - Another state update on exit to userspace fix

   - Prevent the creation of mixed 32/64 VMs

   - Fix regression with irqbypass not restarting the guest on failed
     connect

   - Fix regression with debug register decoding resulting in
     overlapping access

   - Commit exception state on exit to usrspace

   - Fix the MMU notifier return values

   - Add missing 'static' qualifiers in the new host stage-2 code

  x86 fixes:

   - fix guest missed wakeup with assigned devices

   - fix WARN reported by syzkaller

   - do not use BIT() in UAPI headers

   - make the kvm_amd.avic parameter bool

  PPC fixes:

   - make halt polling heuristics consistent with other architectures

  selftests:

   - various fixes

   - new performance selftest memslot_perf_test

   - test UFFD minor faults in demand_paging_test"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (44 commits)
  selftests: kvm: fix overlapping addresses in memslot_perf_test
  KVM: X86: Kill off ctxt->ud
  KVM: X86: Fix warning caused by stale emulation context
  KVM: X86: Use kvm_get_linear_rip() in single-step and #DB/#BP interception
  KVM: x86/mmu: Fix comment mentioning skip_4k
  KVM: VMX: update vcpu posted-interrupt descriptor when assigning device
  KVM: rename KVM_REQ_PENDING_TIMER to KVM_REQ_UNBLOCK
  KVM: x86: add start_assignment hook to kvm_x86_ops
  KVM: LAPIC: Narrow the timer latency between wait_lapic_expire and world switch
  selftests: kvm: do only 1 memslot_perf_test run by default
  KVM: X86: Use _BITUL() macro in UAPI headers
  KVM: selftests: add shared hugetlbfs backing source type
  KVM: selftests: allow using UFFD minor faults for demand paging
  KVM: selftests: create alias mappings when using shared memory
  KVM: selftests: add shmem backing source type
  KVM: selftests: refactor vm_mem_backing_src_type flags
  KVM: selftests: allow different backing source types
  KVM: selftests: compute correct demand paging size
  KVM: selftests: simplify setup_demand_paging error handling
  KVM: selftests: Print a message if /dev/kvm is missing
  ...
This commit is contained in:
Linus Torvalds 2021-05-29 06:02:25 -10:00
commit 224478289c
52 changed files with 1695 additions and 288 deletions

View file

@ -118,10 +118,12 @@ KVM_REQ_MMU_RELOAD
necessary to inform each VCPU to completely refresh the tables. This necessary to inform each VCPU to completely refresh the tables. This
request is used for that. request is used for that.
KVM_REQ_PENDING_TIMER KVM_REQ_UNBLOCK
This request may be made from a timer handler run on the host on behalf This request informs the vCPU to exit kvm_vcpu_block. It is used for
of a VCPU. It informs the VCPU thread to inject a timer interrupt. example from timer handlers that run on the host on behalf of a vCPU,
or in order to update the interrupt routing and ensure that assigned
devices will wake up the vCPU.
KVM_REQ_UNHALT KVM_REQ_UNHALT

View file

@ -63,6 +63,7 @@
#define __KVM_HOST_SMCCC_FUNC___pkvm_cpu_set_vector 18 #define __KVM_HOST_SMCCC_FUNC___pkvm_cpu_set_vector 18
#define __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize 19 #define __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize 19
#define __KVM_HOST_SMCCC_FUNC___pkvm_mark_hyp 20 #define __KVM_HOST_SMCCC_FUNC___pkvm_mark_hyp 20
#define __KVM_HOST_SMCCC_FUNC___kvm_adjust_pc 21
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
@ -201,6 +202,8 @@ extern void __kvm_timer_set_cntvoff(u64 cntvoff);
extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
extern void __kvm_adjust_pc(struct kvm_vcpu *vcpu);
extern u64 __vgic_v3_get_gic_config(void); extern u64 __vgic_v3_get_gic_config(void);
extern u64 __vgic_v3_read_vmcr(void); extern u64 __vgic_v3_read_vmcr(void);
extern void __vgic_v3_write_vmcr(u32 vmcr); extern void __vgic_v3_write_vmcr(u32 vmcr);

View file

@ -463,4 +463,9 @@ static __always_inline void kvm_incr_pc(struct kvm_vcpu *vcpu)
vcpu->arch.flags |= KVM_ARM64_INCREMENT_PC; vcpu->arch.flags |= KVM_ARM64_INCREMENT_PC;
} }
static inline bool vcpu_has_feature(struct kvm_vcpu *vcpu, int feature)
{
return test_bit(feature, vcpu->arch.features);
}
#endif /* __ARM64_KVM_EMULATE_H__ */ #endif /* __ARM64_KVM_EMULATE_H__ */

View file

@ -720,11 +720,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
return ret; return ret;
} }
if (run->immediate_exit)
return -EINTR;
vcpu_load(vcpu); vcpu_load(vcpu);
if (run->immediate_exit) {
ret = -EINTR;
goto out;
}
kvm_sigset_activate(vcpu); kvm_sigset_activate(vcpu);
ret = 1; ret = 1;
@ -897,6 +899,18 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
kvm_sigset_deactivate(vcpu); kvm_sigset_deactivate(vcpu);
out:
/*
* In the unlikely event that we are returning to userspace
* with pending exceptions or PC adjustment, commit these
* adjustments in order to give userspace a consistent view of
* the vcpu state. Note that this relies on __kvm_adjust_pc()
* being preempt-safe on VHE.
*/
if (unlikely(vcpu->arch.flags & (KVM_ARM64_PENDING_EXCEPTION |
KVM_ARM64_INCREMENT_PC)))
kvm_call_hyp(__kvm_adjust_pc, vcpu);
vcpu_put(vcpu); vcpu_put(vcpu);
return ret; return ret;
} }

View file

@ -296,7 +296,7 @@ static void enter_exception32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset)
*vcpu_pc(vcpu) = vect_offset; *vcpu_pc(vcpu) = vect_offset;
} }
void kvm_inject_exception(struct kvm_vcpu *vcpu) static void kvm_inject_exception(struct kvm_vcpu *vcpu)
{ {
if (vcpu_el1_is_32bit(vcpu)) { if (vcpu_el1_is_32bit(vcpu)) {
switch (vcpu->arch.flags & KVM_ARM64_EXCEPT_MASK) { switch (vcpu->arch.flags & KVM_ARM64_EXCEPT_MASK) {
@ -329,3 +329,19 @@ void kvm_inject_exception(struct kvm_vcpu *vcpu)
} }
} }
} }
/*
* Adjust the guest PC (and potentially exception state) depending on
* flags provided by the emulation code.
*/
void __kvm_adjust_pc(struct kvm_vcpu *vcpu)
{
if (vcpu->arch.flags & KVM_ARM64_PENDING_EXCEPTION) {
kvm_inject_exception(vcpu);
vcpu->arch.flags &= ~(KVM_ARM64_PENDING_EXCEPTION |
KVM_ARM64_EXCEPT_MASK);
} else if (vcpu->arch.flags & KVM_ARM64_INCREMENT_PC) {
kvm_skip_instr(vcpu);
vcpu->arch.flags &= ~KVM_ARM64_INCREMENT_PC;
}
}

View file

@ -13,8 +13,6 @@
#include <asm/kvm_emulate.h> #include <asm/kvm_emulate.h>
#include <asm/kvm_host.h> #include <asm/kvm_host.h>
void kvm_inject_exception(struct kvm_vcpu *vcpu);
static inline void kvm_skip_instr(struct kvm_vcpu *vcpu) static inline void kvm_skip_instr(struct kvm_vcpu *vcpu)
{ {
if (vcpu_mode_is_32bit(vcpu)) { if (vcpu_mode_is_32bit(vcpu)) {
@ -43,22 +41,6 @@ static inline void __kvm_skip_instr(struct kvm_vcpu *vcpu)
write_sysreg_el2(*vcpu_pc(vcpu), SYS_ELR); write_sysreg_el2(*vcpu_pc(vcpu), SYS_ELR);
} }
/*
* Adjust the guest PC on entry, depending on flags provided by EL1
* for the purpose of emulation (MMIO, sysreg) or exception injection.
*/
static inline void __adjust_pc(struct kvm_vcpu *vcpu)
{
if (vcpu->arch.flags & KVM_ARM64_PENDING_EXCEPTION) {
kvm_inject_exception(vcpu);
vcpu->arch.flags &= ~(KVM_ARM64_PENDING_EXCEPTION |
KVM_ARM64_EXCEPT_MASK);
} else if (vcpu->arch.flags & KVM_ARM64_INCREMENT_PC) {
kvm_skip_instr(vcpu);
vcpu->arch.flags &= ~KVM_ARM64_INCREMENT_PC;
}
}
/* /*
* Skip an instruction while host sysregs are live. * Skip an instruction while host sysregs are live.
* Assumes host is always 64-bit. * Assumes host is always 64-bit.

View file

@ -28,6 +28,13 @@ static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt)
cpu_reg(host_ctxt, 1) = __kvm_vcpu_run(kern_hyp_va(vcpu)); cpu_reg(host_ctxt, 1) = __kvm_vcpu_run(kern_hyp_va(vcpu));
} }
static void handle___kvm_adjust_pc(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(struct kvm_vcpu *, vcpu, host_ctxt, 1);
__kvm_adjust_pc(kern_hyp_va(vcpu));
}
static void handle___kvm_flush_vm_context(struct kvm_cpu_context *host_ctxt) static void handle___kvm_flush_vm_context(struct kvm_cpu_context *host_ctxt)
{ {
__kvm_flush_vm_context(); __kvm_flush_vm_context();
@ -170,6 +177,7 @@ typedef void (*hcall_t)(struct kvm_cpu_context *);
static const hcall_t host_hcall[] = { static const hcall_t host_hcall[] = {
HANDLE_FUNC(__kvm_vcpu_run), HANDLE_FUNC(__kvm_vcpu_run),
HANDLE_FUNC(__kvm_adjust_pc),
HANDLE_FUNC(__kvm_flush_vm_context), HANDLE_FUNC(__kvm_flush_vm_context),
HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa), HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa),
HANDLE_FUNC(__kvm_tlb_flush_vmid), HANDLE_FUNC(__kvm_tlb_flush_vmid),

View file

@ -23,8 +23,8 @@
extern unsigned long hyp_nr_cpus; extern unsigned long hyp_nr_cpus;
struct host_kvm host_kvm; struct host_kvm host_kvm;
struct hyp_pool host_s2_mem; static struct hyp_pool host_s2_mem;
struct hyp_pool host_s2_dev; static struct hyp_pool host_s2_dev;
/* /*
* Copies of the host's CPU features registers holding sanitized values. * Copies of the host's CPU features registers holding sanitized values.

View file

@ -17,7 +17,6 @@
#include <nvhe/trap_handler.h> #include <nvhe/trap_handler.h>
struct hyp_pool hpool; struct hyp_pool hpool;
struct kvm_pgtable_mm_ops pkvm_pgtable_mm_ops;
unsigned long hyp_nr_cpus; unsigned long hyp_nr_cpus;
#define hyp_percpu_size ((unsigned long)__per_cpu_end - \ #define hyp_percpu_size ((unsigned long)__per_cpu_end - \
@ -27,6 +26,7 @@ static void *vmemmap_base;
static void *hyp_pgt_base; static void *hyp_pgt_base;
static void *host_s2_mem_pgt_base; static void *host_s2_mem_pgt_base;
static void *host_s2_dev_pgt_base; static void *host_s2_dev_pgt_base;
static struct kvm_pgtable_mm_ops pkvm_pgtable_mm_ops;
static int divide_memory_pool(void *virt, unsigned long size) static int divide_memory_pool(void *virt, unsigned long size)
{ {

View file

@ -4,7 +4,6 @@
* Author: Marc Zyngier <marc.zyngier@arm.com> * Author: Marc Zyngier <marc.zyngier@arm.com>
*/ */
#include <hyp/adjust_pc.h>
#include <hyp/switch.h> #include <hyp/switch.h>
#include <hyp/sysreg-sr.h> #include <hyp/sysreg-sr.h>
@ -201,7 +200,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
*/ */
__debug_save_host_buffers_nvhe(vcpu); __debug_save_host_buffers_nvhe(vcpu);
__adjust_pc(vcpu); __kvm_adjust_pc(vcpu);
/* /*
* We must restore the 32-bit state before the sysregs, thanks * We must restore the 32-bit state before the sysregs, thanks

View file

@ -4,7 +4,6 @@
* Author: Marc Zyngier <marc.zyngier@arm.com> * Author: Marc Zyngier <marc.zyngier@arm.com>
*/ */
#include <hyp/adjust_pc.h>
#include <hyp/switch.h> #include <hyp/switch.h>
#include <linux/arm-smccc.h> #include <linux/arm-smccc.h>
@ -132,7 +131,7 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
__load_guest_stage2(vcpu->arch.hw_mmu); __load_guest_stage2(vcpu->arch.hw_mmu);
__activate_traps(vcpu); __activate_traps(vcpu);
__adjust_pc(vcpu); __kvm_adjust_pc(vcpu);
sysreg_restore_guest_state_vhe(guest_ctxt); sysreg_restore_guest_state_vhe(guest_ctxt);
__debug_switch_to_guest(vcpu); __debug_switch_to_guest(vcpu);

View file

@ -1156,13 +1156,13 @@ out_unlock:
bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
{ {
if (!kvm->arch.mmu.pgt) if (!kvm->arch.mmu.pgt)
return 0; return false;
__unmap_stage2_range(&kvm->arch.mmu, range->start << PAGE_SHIFT, __unmap_stage2_range(&kvm->arch.mmu, range->start << PAGE_SHIFT,
(range->end - range->start) << PAGE_SHIFT, (range->end - range->start) << PAGE_SHIFT,
range->may_block); range->may_block);
return 0; return false;
} }
bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
@ -1170,7 +1170,7 @@ bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
kvm_pfn_t pfn = pte_pfn(range->pte); kvm_pfn_t pfn = pte_pfn(range->pte);
if (!kvm->arch.mmu.pgt) if (!kvm->arch.mmu.pgt)
return 0; return false;
WARN_ON(range->end - range->start != 1); WARN_ON(range->end - range->start != 1);
@ -1190,7 +1190,7 @@ bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
PAGE_SIZE, __pfn_to_phys(pfn), PAGE_SIZE, __pfn_to_phys(pfn),
KVM_PGTABLE_PROT_R, NULL); KVM_PGTABLE_PROT_R, NULL);
return 0; return false;
} }
bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
@ -1200,7 +1200,7 @@ bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
pte_t pte; pte_t pte;
if (!kvm->arch.mmu.pgt) if (!kvm->arch.mmu.pgt)
return 0; return false;
WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE); WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE);
@ -1213,7 +1213,7 @@ bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{ {
if (!kvm->arch.mmu.pgt) if (!kvm->arch.mmu.pgt)
return 0; return false;
return kvm_pgtable_stage2_is_young(kvm->arch.mmu.pgt, return kvm_pgtable_stage2_is_young(kvm->arch.mmu.pgt,
range->start << PAGE_SHIFT); range->start << PAGE_SHIFT);

View file

@ -166,6 +166,25 @@ static int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu)
return 0; return 0;
} }
static bool vcpu_allowed_register_width(struct kvm_vcpu *vcpu)
{
struct kvm_vcpu *tmp;
bool is32bit;
int i;
is32bit = vcpu_has_feature(vcpu, KVM_ARM_VCPU_EL1_32BIT);
if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1) && is32bit)
return false;
/* Check that the vcpus are either all 32bit or all 64bit */
kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
if (vcpu_has_feature(tmp, KVM_ARM_VCPU_EL1_32BIT) != is32bit)
return false;
}
return true;
}
/** /**
* kvm_reset_vcpu - sets core registers and sys_regs to reset value * kvm_reset_vcpu - sets core registers and sys_regs to reset value
* @vcpu: The VCPU pointer * @vcpu: The VCPU pointer
@ -217,13 +236,14 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
} }
} }
if (!vcpu_allowed_register_width(vcpu)) {
ret = -EINVAL;
goto out;
}
switch (vcpu->arch.target) { switch (vcpu->arch.target) {
default: default:
if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) { if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) {
if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1)) {
ret = -EINVAL;
goto out;
}
pstate = VCPU_RESET_PSTATE_SVC; pstate = VCPU_RESET_PSTATE_SVC;
} else { } else {
pstate = VCPU_RESET_PSTATE_EL1; pstate = VCPU_RESET_PSTATE_EL1;

View file

@ -399,14 +399,14 @@ static bool trap_bvr(struct kvm_vcpu *vcpu,
struct sys_reg_params *p, struct sys_reg_params *p,
const struct sys_reg_desc *rd) const struct sys_reg_desc *rd)
{ {
u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg]; u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm];
if (p->is_write) if (p->is_write)
reg_to_dbg(vcpu, p, rd, dbg_reg); reg_to_dbg(vcpu, p, rd, dbg_reg);
else else
dbg_to_reg(vcpu, p, rd, dbg_reg); dbg_to_reg(vcpu, p, rd, dbg_reg);
trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg); trace_trap_reg(__func__, rd->CRm, p->is_write, *dbg_reg);
return true; return true;
} }
@ -414,7 +414,7 @@ static bool trap_bvr(struct kvm_vcpu *vcpu,
static int set_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, static int set_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
const struct kvm_one_reg *reg, void __user *uaddr) const struct kvm_one_reg *reg, void __user *uaddr)
{ {
__u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg]; __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm];
if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0) if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0)
return -EFAULT; return -EFAULT;
@ -424,7 +424,7 @@ static int set_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
static int get_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, static int get_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
const struct kvm_one_reg *reg, void __user *uaddr) const struct kvm_one_reg *reg, void __user *uaddr)
{ {
__u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg]; __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm];
if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
return -EFAULT; return -EFAULT;
@ -434,21 +434,21 @@ static int get_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
static void reset_bvr(struct kvm_vcpu *vcpu, static void reset_bvr(struct kvm_vcpu *vcpu,
const struct sys_reg_desc *rd) const struct sys_reg_desc *rd)
{ {
vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg] = rd->val; vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm] = rd->val;
} }
static bool trap_bcr(struct kvm_vcpu *vcpu, static bool trap_bcr(struct kvm_vcpu *vcpu,
struct sys_reg_params *p, struct sys_reg_params *p,
const struct sys_reg_desc *rd) const struct sys_reg_desc *rd)
{ {
u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg]; u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm];
if (p->is_write) if (p->is_write)
reg_to_dbg(vcpu, p, rd, dbg_reg); reg_to_dbg(vcpu, p, rd, dbg_reg);
else else
dbg_to_reg(vcpu, p, rd, dbg_reg); dbg_to_reg(vcpu, p, rd, dbg_reg);
trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg); trace_trap_reg(__func__, rd->CRm, p->is_write, *dbg_reg);
return true; return true;
} }
@ -456,7 +456,7 @@ static bool trap_bcr(struct kvm_vcpu *vcpu,
static int set_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, static int set_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
const struct kvm_one_reg *reg, void __user *uaddr) const struct kvm_one_reg *reg, void __user *uaddr)
{ {
__u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg]; __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm];
if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0) if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0)
return -EFAULT; return -EFAULT;
@ -467,7 +467,7 @@ static int set_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
static int get_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, static int get_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
const struct kvm_one_reg *reg, void __user *uaddr) const struct kvm_one_reg *reg, void __user *uaddr)
{ {
__u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg]; __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm];
if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
return -EFAULT; return -EFAULT;
@ -477,22 +477,22 @@ static int get_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
static void reset_bcr(struct kvm_vcpu *vcpu, static void reset_bcr(struct kvm_vcpu *vcpu,
const struct sys_reg_desc *rd) const struct sys_reg_desc *rd)
{ {
vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg] = rd->val; vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm] = rd->val;
} }
static bool trap_wvr(struct kvm_vcpu *vcpu, static bool trap_wvr(struct kvm_vcpu *vcpu,
struct sys_reg_params *p, struct sys_reg_params *p,
const struct sys_reg_desc *rd) const struct sys_reg_desc *rd)
{ {
u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]; u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm];
if (p->is_write) if (p->is_write)
reg_to_dbg(vcpu, p, rd, dbg_reg); reg_to_dbg(vcpu, p, rd, dbg_reg);
else else
dbg_to_reg(vcpu, p, rd, dbg_reg); dbg_to_reg(vcpu, p, rd, dbg_reg);
trace_trap_reg(__func__, rd->reg, p->is_write, trace_trap_reg(__func__, rd->CRm, p->is_write,
vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]); vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm]);
return true; return true;
} }
@ -500,7 +500,7 @@ static bool trap_wvr(struct kvm_vcpu *vcpu,
static int set_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, static int set_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
const struct kvm_one_reg *reg, void __user *uaddr) const struct kvm_one_reg *reg, void __user *uaddr)
{ {
__u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]; __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm];
if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0) if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0)
return -EFAULT; return -EFAULT;
@ -510,7 +510,7 @@ static int set_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
static int get_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, static int get_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
const struct kvm_one_reg *reg, void __user *uaddr) const struct kvm_one_reg *reg, void __user *uaddr)
{ {
__u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]; __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm];
if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
return -EFAULT; return -EFAULT;
@ -520,21 +520,21 @@ static int get_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
static void reset_wvr(struct kvm_vcpu *vcpu, static void reset_wvr(struct kvm_vcpu *vcpu,
const struct sys_reg_desc *rd) const struct sys_reg_desc *rd)
{ {
vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg] = rd->val; vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm] = rd->val;
} }
static bool trap_wcr(struct kvm_vcpu *vcpu, static bool trap_wcr(struct kvm_vcpu *vcpu,
struct sys_reg_params *p, struct sys_reg_params *p,
const struct sys_reg_desc *rd) const struct sys_reg_desc *rd)
{ {
u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg]; u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm];
if (p->is_write) if (p->is_write)
reg_to_dbg(vcpu, p, rd, dbg_reg); reg_to_dbg(vcpu, p, rd, dbg_reg);
else else
dbg_to_reg(vcpu, p, rd, dbg_reg); dbg_to_reg(vcpu, p, rd, dbg_reg);
trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg); trace_trap_reg(__func__, rd->CRm, p->is_write, *dbg_reg);
return true; return true;
} }
@ -542,7 +542,7 @@ static bool trap_wcr(struct kvm_vcpu *vcpu,
static int set_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, static int set_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
const struct kvm_one_reg *reg, void __user *uaddr) const struct kvm_one_reg *reg, void __user *uaddr)
{ {
__u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg]; __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm];
if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0) if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0)
return -EFAULT; return -EFAULT;
@ -552,7 +552,7 @@ static int set_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
static int get_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, static int get_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
const struct kvm_one_reg *reg, void __user *uaddr) const struct kvm_one_reg *reg, void __user *uaddr)
{ {
__u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg]; __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm];
if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
return -EFAULT; return -EFAULT;
@ -562,7 +562,7 @@ static int get_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
static void reset_wcr(struct kvm_vcpu *vcpu, static void reset_wcr(struct kvm_vcpu *vcpu,
const struct sys_reg_desc *rd) const struct sys_reg_desc *rd)
{ {
vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg] = rd->val; vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm] = rd->val;
} }
static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)

View file

@ -51,6 +51,7 @@
/* PPC-specific vcpu->requests bit members */ /* PPC-specific vcpu->requests bit members */
#define KVM_REQ_WATCHDOG KVM_ARCH_REQ(0) #define KVM_REQ_WATCHDOG KVM_ARCH_REQ(0)
#define KVM_REQ_EPR_EXIT KVM_ARCH_REQ(1) #define KVM_REQ_EPR_EXIT KVM_ARCH_REQ(1)
#define KVM_REQ_PENDING_TIMER KVM_ARCH_REQ(2)
#include <linux/mmu_notifier.h> #include <linux/mmu_notifier.h>

View file

@ -3936,7 +3936,7 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
break; break;
} }
cur = ktime_get(); cur = ktime_get();
} while (single_task_running() && ktime_before(cur, stop)); } while (kvm_vcpu_can_poll(cur, stop));
spin_lock(&vc->lock); spin_lock(&vc->lock);
vc->vcore_state = VCORE_INACTIVE; vc->vcore_state = VCORE_INACTIVE;

View file

@ -99,6 +99,7 @@ KVM_X86_OP_NULL(post_block)
KVM_X86_OP_NULL(vcpu_blocking) KVM_X86_OP_NULL(vcpu_blocking)
KVM_X86_OP_NULL(vcpu_unblocking) KVM_X86_OP_NULL(vcpu_unblocking)
KVM_X86_OP_NULL(update_pi_irte) KVM_X86_OP_NULL(update_pi_irte)
KVM_X86_OP_NULL(start_assignment)
KVM_X86_OP_NULL(apicv_post_state_restore) KVM_X86_OP_NULL(apicv_post_state_restore)
KVM_X86_OP_NULL(dy_apicv_has_pending_interrupt) KVM_X86_OP_NULL(dy_apicv_has_pending_interrupt)
KVM_X86_OP_NULL(set_hv_timer) KVM_X86_OP_NULL(set_hv_timer)

View file

@ -1352,6 +1352,7 @@ struct kvm_x86_ops {
int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq, int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq,
uint32_t guest_irq, bool set); uint32_t guest_irq, bool set);
void (*start_assignment)(struct kvm *kvm);
void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu); void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu);
bool (*dy_apicv_has_pending_interrupt)(struct kvm_vcpu *vcpu); bool (*dy_apicv_has_pending_interrupt)(struct kvm_vcpu *vcpu);

View file

@ -5111,7 +5111,7 @@ done:
return rc; return rc;
} }
int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int emulation_type)
{ {
int rc = X86EMUL_CONTINUE; int rc = X86EMUL_CONTINUE;
int mode = ctxt->mode; int mode = ctxt->mode;
@ -5322,7 +5322,8 @@ done_prefixes:
ctxt->execute = opcode.u.execute; ctxt->execute = opcode.u.execute;
if (unlikely(ctxt->ud) && likely(!(ctxt->d & EmulateOnUD))) if (unlikely(emulation_type & EMULTYPE_TRAP_UD) &&
likely(!(ctxt->d & EmulateOnUD)))
return EMULATION_FAILED; return EMULATION_FAILED;
if (unlikely(ctxt->d & if (unlikely(ctxt->d &

View file

@ -1172,6 +1172,7 @@ void kvm_hv_invalidate_tsc_page(struct kvm *kvm)
{ {
struct kvm_hv *hv = to_kvm_hv(kvm); struct kvm_hv *hv = to_kvm_hv(kvm);
u64 gfn; u64 gfn;
int idx;
if (hv->hv_tsc_page_status == HV_TSC_PAGE_BROKEN || if (hv->hv_tsc_page_status == HV_TSC_PAGE_BROKEN ||
hv->hv_tsc_page_status == HV_TSC_PAGE_UNSET || hv->hv_tsc_page_status == HV_TSC_PAGE_UNSET ||
@ -1190,9 +1191,16 @@ void kvm_hv_invalidate_tsc_page(struct kvm *kvm)
gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
hv->tsc_ref.tsc_sequence = 0; hv->tsc_ref.tsc_sequence = 0;
/*
* Take the srcu lock as memslots will be accessed to check the gfn
* cache generation against the memslots generation.
*/
idx = srcu_read_lock(&kvm->srcu);
if (kvm_write_guest(kvm, gfn_to_gpa(gfn), if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
&hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence))) &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence)))
hv->hv_tsc_page_status = HV_TSC_PAGE_BROKEN; hv->hv_tsc_page_status = HV_TSC_PAGE_BROKEN;
srcu_read_unlock(&kvm->srcu, idx);
out_unlock: out_unlock:
mutex_unlock(&hv->hv_lock); mutex_unlock(&hv->hv_lock);

View file

@ -314,7 +314,6 @@ struct x86_emulate_ctxt {
int interruptibility; int interruptibility;
bool perm_ok; /* do not check permissions if true */ bool perm_ok; /* do not check permissions if true */
bool ud; /* inject an #UD if host doesn't support insn */
bool tf; /* TF value before instruction (after for syscall/sysret) */ bool tf; /* TF value before instruction (after for syscall/sysret) */
bool have_exception; bool have_exception;
@ -491,7 +490,7 @@ enum x86_intercept {
#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64 #define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64
#endif #endif
int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len); int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int emulation_type);
bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt); bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt);
#define EMULATION_FAILED -1 #define EMULATION_FAILED -1
#define EMULATION_OK 0 #define EMULATION_OK 0

View file

@ -1598,11 +1598,19 @@ static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
apic->lapic_timer.advance_expire_delta = guest_tsc - tsc_deadline; apic->lapic_timer.advance_expire_delta = guest_tsc - tsc_deadline;
if (lapic_timer_advance_dynamic) {
adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta);
/*
* If the timer fired early, reread the TSC to account for the
* overhead of the above adjustment to avoid waiting longer
* than is necessary.
*/
if (guest_tsc < tsc_deadline)
guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
}
if (guest_tsc < tsc_deadline) if (guest_tsc < tsc_deadline)
__wait_lapic_expire(vcpu, tsc_deadline - guest_tsc); __wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
if (lapic_timer_advance_dynamic)
adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta);
} }
void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu) void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
@ -1661,7 +1669,7 @@ static void apic_timer_expired(struct kvm_lapic *apic, bool from_timer_fn)
} }
atomic_inc(&apic->lapic_timer.pending); atomic_inc(&apic->lapic_timer.pending);
kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu); kvm_make_request(KVM_REQ_UNBLOCK, vcpu);
if (from_timer_fn) if (from_timer_fn)
kvm_vcpu_kick(vcpu); kvm_vcpu_kick(vcpu);
} }

View file

@ -1192,9 +1192,9 @@ bool kvm_tdp_mmu_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
} }
/* /*
* Remove write access from all the SPTEs mapping GFNs [start, end). If * Remove write access from all SPTEs at or above min_level that map GFNs
* skip_4k is set, SPTEs that map 4k pages, will not be write-protected. * [start, end). Returns true if an SPTE has been changed and the TLBs need to
* Returns true if an SPTE has been changed and the TLBs need to be flushed. * be flushed.
*/ */
static bool wrprot_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, static bool wrprot_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
gfn_t start, gfn_t end, int min_level) gfn_t start, gfn_t end, int min_level)

View file

@ -28,10 +28,8 @@
#include "svm.h" #include "svm.h"
/* enable / disable AVIC */ /* enable / disable AVIC */
int avic; bool avic;
#ifdef CONFIG_X86_LOCAL_APIC module_param(avic, bool, S_IRUGO);
module_param(avic, int, S_IRUGO);
#endif
#define SVM_AVIC_DOORBELL 0xc001011b #define SVM_AVIC_DOORBELL 0xc001011b

View file

@ -1010,9 +1010,7 @@ static __init int svm_hardware_setup(void)
} }
if (avic) { if (avic) {
if (!npt_enabled || if (!npt_enabled || !boot_cpu_has(X86_FEATURE_AVIC)) {
!boot_cpu_has(X86_FEATURE_AVIC) ||
!IS_ENABLED(CONFIG_X86_LOCAL_APIC)) {
avic = false; avic = false;
} else { } else {
pr_info("AVIC enabled\n"); pr_info("AVIC enabled\n");

View file

@ -480,7 +480,7 @@ extern struct kvm_x86_nested_ops svm_nested_ops;
#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL #define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL
extern int avic; extern bool avic;
static inline void avic_update_vapic_bar(struct vcpu_svm *svm, u64 data) static inline void avic_update_vapic_bar(struct vcpu_svm *svm, u64 data)
{ {

View file

@ -90,8 +90,7 @@ static inline bool cpu_has_vmx_preemption_timer(void)
static inline bool cpu_has_vmx_posted_intr(void) static inline bool cpu_has_vmx_posted_intr(void)
{ {
return IS_ENABLED(CONFIG_X86_LOCAL_APIC) && return vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR;
vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR;
} }
static inline bool cpu_has_load_ia32_efer(void) static inline bool cpu_has_load_ia32_efer(void)

View file

@ -237,6 +237,20 @@ bool pi_has_pending_interrupt(struct kvm_vcpu *vcpu)
} }
/*
* Bail out of the block loop if the VM has an assigned
* device, but the blocking vCPU didn't reconfigure the
* PI.NV to the wakeup vector, i.e. the assigned device
* came along after the initial check in pi_pre_block().
*/
void vmx_pi_start_assignment(struct kvm *kvm)
{
if (!irq_remapping_cap(IRQ_POSTING_CAP))
return;
kvm_make_all_cpus_request(kvm, KVM_REQ_UNBLOCK);
}
/* /*
* pi_update_irte - set IRTE for Posted-Interrupts * pi_update_irte - set IRTE for Posted-Interrupts
* *

View file

@ -95,5 +95,6 @@ void __init pi_init_cpu(int cpu);
bool pi_has_pending_interrupt(struct kvm_vcpu *vcpu); bool pi_has_pending_interrupt(struct kvm_vcpu *vcpu);
int pi_update_irte(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq, int pi_update_irte(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq,
bool set); bool set);
void vmx_pi_start_assignment(struct kvm *kvm);
#endif /* __KVM_X86_VMX_POSTED_INTR_H */ #endif /* __KVM_X86_VMX_POSTED_INTR_H */

View file

@ -4843,7 +4843,7 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu); struct vcpu_vmx *vmx = to_vmx(vcpu);
struct kvm_run *kvm_run = vcpu->run; struct kvm_run *kvm_run = vcpu->run;
u32 intr_info, ex_no, error_code; u32 intr_info, ex_no, error_code;
unsigned long cr2, rip, dr6; unsigned long cr2, dr6;
u32 vect_info; u32 vect_info;
vect_info = vmx->idt_vectoring_info; vect_info = vmx->idt_vectoring_info;
@ -4933,8 +4933,7 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
vmx->vcpu.arch.event_exit_inst_len = vmx->vcpu.arch.event_exit_inst_len =
vmcs_read32(VM_EXIT_INSTRUCTION_LEN); vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
kvm_run->exit_reason = KVM_EXIT_DEBUG; kvm_run->exit_reason = KVM_EXIT_DEBUG;
rip = kvm_rip_read(vcpu); kvm_run->debug.arch.pc = kvm_get_linear_rip(vcpu);
kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
kvm_run->debug.arch.exception = ex_no; kvm_run->debug.arch.exception = ex_no;
break; break;
case AC_VECTOR: case AC_VECTOR:
@ -7721,6 +7720,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.nested_ops = &vmx_nested_ops, .nested_ops = &vmx_nested_ops,
.update_pi_irte = pi_update_irte, .update_pi_irte = pi_update_irte,
.start_assignment = vmx_pi_start_assignment,
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
.set_hv_timer = vmx_set_hv_timer, .set_hv_timer = vmx_set_hv_timer,

View file

@ -3105,6 +3105,8 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
st->preempted & KVM_VCPU_FLUSH_TLB); st->preempted & KVM_VCPU_FLUSH_TLB);
if (xchg(&st->preempted, 0) & KVM_VCPU_FLUSH_TLB) if (xchg(&st->preempted, 0) & KVM_VCPU_FLUSH_TLB)
kvm_vcpu_flush_tlb_guest(vcpu); kvm_vcpu_flush_tlb_guest(vcpu);
} else {
st->preempted = 0;
} }
vcpu->arch.st.preempted = 0; vcpu->arch.st.preempted = 0;
@ -7226,6 +7228,11 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
BUILD_BUG_ON(HF_SMM_MASK != X86EMUL_SMM_MASK); BUILD_BUG_ON(HF_SMM_MASK != X86EMUL_SMM_MASK);
BUILD_BUG_ON(HF_SMM_INSIDE_NMI_MASK != X86EMUL_SMM_INSIDE_NMI_MASK); BUILD_BUG_ON(HF_SMM_INSIDE_NMI_MASK != X86EMUL_SMM_INSIDE_NMI_MASK);
ctxt->interruptibility = 0;
ctxt->have_exception = false;
ctxt->exception.vector = -1;
ctxt->perm_ok = false;
init_decode_cache(ctxt); init_decode_cache(ctxt);
vcpu->arch.emulate_regs_need_sync_from_vcpu = false; vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
} }
@ -7561,14 +7568,7 @@ int x86_decode_emulated_instruction(struct kvm_vcpu *vcpu, int emulation_type,
kvm_vcpu_check_breakpoint(vcpu, &r)) kvm_vcpu_check_breakpoint(vcpu, &r))
return r; return r;
ctxt->interruptibility = 0; r = x86_decode_insn(ctxt, insn, insn_len, emulation_type);
ctxt->have_exception = false;
ctxt->exception.vector = -1;
ctxt->perm_ok = false;
ctxt->ud = emulation_type & EMULTYPE_TRAP_UD;
r = x86_decode_insn(ctxt, insn, insn_len);
trace_kvm_emulate_insn_start(vcpu); trace_kvm_emulate_insn_start(vcpu);
++vcpu->stat.insn_emulation; ++vcpu->stat.insn_emulation;
@ -8360,6 +8360,9 @@ static void kvm_sched_yield(struct kvm_vcpu *vcpu, unsigned long dest_id)
vcpu->stat.directed_yield_attempted++; vcpu->stat.directed_yield_attempted++;
if (single_task_running())
goto no_yield;
rcu_read_lock(); rcu_read_lock();
map = rcu_dereference(vcpu->kvm->arch.apic_map); map = rcu_dereference(vcpu->kvm->arch.apic_map);
@ -9496,7 +9499,7 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
if (r <= 0) if (r <= 0)
break; break;
kvm_clear_request(KVM_REQ_PENDING_TIMER, vcpu); kvm_clear_request(KVM_REQ_UNBLOCK, vcpu);
if (kvm_cpu_has_pending_timer(vcpu)) if (kvm_cpu_has_pending_timer(vcpu))
kvm_inject_pending_timer_irqs(vcpu); kvm_inject_pending_timer_irqs(vcpu);
@ -10115,8 +10118,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
kvm_update_dr7(vcpu); kvm_update_dr7(vcpu);
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
vcpu->arch.singlestep_rip = kvm_rip_read(vcpu) + vcpu->arch.singlestep_rip = kvm_get_linear_rip(vcpu);
get_segment_base(vcpu, VCPU_SREG_CS);
/* /*
* Trigger an rflags update that will inject or remove the trace * Trigger an rflags update that will inject or remove the trace
@ -11499,7 +11501,8 @@ bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
void kvm_arch_start_assignment(struct kvm *kvm) void kvm_arch_start_assignment(struct kvm *kvm)
{ {
atomic_inc(&kvm->arch.assigned_device_count); if (atomic_inc_return(&kvm->arch.assigned_device_count) == 1)
static_call_cond(kvm_x86_start_assignment)(kvm);
} }
EXPORT_SYMBOL_GPL(kvm_arch_start_assignment); EXPORT_SYMBOL_GPL(kvm_arch_start_assignment);

View file

@ -10,6 +10,7 @@
#include <linux/spinlock.h> #include <linux/spinlock.h>
#include <linux/signal.h> #include <linux/signal.h>
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/sched/stat.h>
#include <linux/bug.h> #include <linux/bug.h>
#include <linux/minmax.h> #include <linux/minmax.h>
#include <linux/mm.h> #include <linux/mm.h>
@ -146,7 +147,7 @@ static inline bool is_error_page(struct page *page)
*/ */
#define KVM_REQ_TLB_FLUSH (0 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_TLB_FLUSH (0 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_MMU_RELOAD (1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_MMU_RELOAD (1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_PENDING_TIMER 2 #define KVM_REQ_UNBLOCK 2
#define KVM_REQ_UNHALT 3 #define KVM_REQ_UNHALT 3
#define KVM_REQUEST_ARCH_BASE 8 #define KVM_REQUEST_ARCH_BASE 8
@ -265,6 +266,11 @@ static inline bool kvm_vcpu_mapped(struct kvm_host_map *map)
return !!map->hva; return !!map->hva;
} }
static inline bool kvm_vcpu_can_poll(ktime_t cur, ktime_t stop)
{
return single_task_running() && !need_resched() && ktime_before(cur, stop);
}
/* /*
* Sometimes a large or cross-page mmio needs to be broken up into separate * Sometimes a large or cross-page mmio needs to be broken up into separate
* exits for userspace servicing. * exits for userspace servicing.

View file

@ -8,6 +8,7 @@
* Note: you must update KVM_API_VERSION if you change this interface. * Note: you must update KVM_API_VERSION if you change this interface.
*/ */
#include <linux/const.h>
#include <linux/types.h> #include <linux/types.h>
#include <linux/compiler.h> #include <linux/compiler.h>
#include <linux/ioctl.h> #include <linux/ioctl.h>
@ -1879,8 +1880,8 @@ struct kvm_hyperv_eventfd {
* conversion after harvesting an entry. Also, it must not skip any * conversion after harvesting an entry. Also, it must not skip any
* dirty bits, so that dirty bits are always harvested in sequence. * dirty bits, so that dirty bits are always harvested in sequence.
*/ */
#define KVM_DIRTY_GFN_F_DIRTY BIT(0) #define KVM_DIRTY_GFN_F_DIRTY _BITUL(0)
#define KVM_DIRTY_GFN_F_RESET BIT(1) #define KVM_DIRTY_GFN_F_RESET _BITUL(1)
#define KVM_DIRTY_GFN_F_MASK 0x3 #define KVM_DIRTY_GFN_F_MASK 0x3
/* /*

View file

@ -8,6 +8,7 @@
* Note: you must update KVM_API_VERSION if you change this interface. * Note: you must update KVM_API_VERSION if you change this interface.
*/ */
#include <linux/const.h>
#include <linux/types.h> #include <linux/types.h>
#include <linux/compiler.h> #include <linux/compiler.h>
#include <linux/ioctl.h> #include <linux/ioctl.h>
@ -1879,8 +1880,8 @@ struct kvm_hyperv_eventfd {
* conversion after harvesting an entry. Also, it must not skip any * conversion after harvesting an entry. Also, it must not skip any
* dirty bits, so that dirty bits are always harvested in sequence. * dirty bits, so that dirty bits are always harvested in sequence.
*/ */
#define KVM_DIRTY_GFN_F_DIRTY BIT(0) #define KVM_DIRTY_GFN_F_DIRTY _BITUL(0)
#define KVM_DIRTY_GFN_F_RESET BIT(1) #define KVM_DIRTY_GFN_F_RESET _BITUL(1)
#define KVM_DIRTY_GFN_F_MASK 0x3 #define KVM_DIRTY_GFN_F_MASK 0x3
/* /*

View file

@ -41,5 +41,6 @@
/kvm_create_max_vcpus /kvm_create_max_vcpus
/kvm_page_table_test /kvm_page_table_test
/memslot_modification_stress_test /memslot_modification_stress_test
/memslot_perf_test
/set_memory_region_test /set_memory_region_test
/steal_time /steal_time

View file

@ -33,7 +33,7 @@ ifeq ($(ARCH),s390)
UNAME_M := s390x UNAME_M := s390x
endif endif
LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c lib/test_util.c lib/guest_modes.c lib/perf_test_util.c LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/rbtree.c lib/sparsebit.c lib/test_util.c lib/guest_modes.c lib/perf_test_util.c
LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S
LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c
LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c
@ -74,6 +74,7 @@ TEST_GEN_PROGS_x86_64 += hardware_disable_test
TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus
TEST_GEN_PROGS_x86_64 += kvm_page_table_test TEST_GEN_PROGS_x86_64 += kvm_page_table_test
TEST_GEN_PROGS_x86_64 += memslot_modification_stress_test TEST_GEN_PROGS_x86_64 += memslot_modification_stress_test
TEST_GEN_PROGS_x86_64 += memslot_perf_test
TEST_GEN_PROGS_x86_64 += set_memory_region_test TEST_GEN_PROGS_x86_64 += set_memory_region_test
TEST_GEN_PROGS_x86_64 += steal_time TEST_GEN_PROGS_x86_64 += steal_time

View file

@ -9,6 +9,7 @@
#define _GNU_SOURCE /* for pipe2 */ #define _GNU_SOURCE /* for pipe2 */
#include <inttypes.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <time.h> #include <time.h>
@ -38,6 +39,7 @@
static int nr_vcpus = 1; static int nr_vcpus = 1;
static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
static size_t demand_paging_size;
static char *guest_data_prototype; static char *guest_data_prototype;
static void *vcpu_worker(void *data) static void *vcpu_worker(void *data)
@ -71,36 +73,51 @@ static void *vcpu_worker(void *data)
return NULL; return NULL;
} }
static int handle_uffd_page_request(int uffd, uint64_t addr) static int handle_uffd_page_request(int uffd_mode, int uffd, uint64_t addr)
{ {
pid_t tid; pid_t tid = syscall(__NR_gettid);
struct timespec start; struct timespec start;
struct timespec ts_diff; struct timespec ts_diff;
struct uffdio_copy copy;
int r; int r;
tid = syscall(__NR_gettid);
copy.src = (uint64_t)guest_data_prototype;
copy.dst = addr;
copy.len = perf_test_args.host_page_size;
copy.mode = 0;
clock_gettime(CLOCK_MONOTONIC, &start); clock_gettime(CLOCK_MONOTONIC, &start);
r = ioctl(uffd, UFFDIO_COPY, &copy); if (uffd_mode == UFFDIO_REGISTER_MODE_MISSING) {
if (r == -1) { struct uffdio_copy copy;
pr_info("Failed Paged in 0x%lx from thread %d with errno: %d\n",
addr, tid, errno); copy.src = (uint64_t)guest_data_prototype;
return r; copy.dst = addr;
copy.len = demand_paging_size;
copy.mode = 0;
r = ioctl(uffd, UFFDIO_COPY, &copy);
if (r == -1) {
pr_info("Failed UFFDIO_COPY in 0x%lx from thread %d with errno: %d\n",
addr, tid, errno);
return r;
}
} else if (uffd_mode == UFFDIO_REGISTER_MODE_MINOR) {
struct uffdio_continue cont = {0};
cont.range.start = addr;
cont.range.len = demand_paging_size;
r = ioctl(uffd, UFFDIO_CONTINUE, &cont);
if (r == -1) {
pr_info("Failed UFFDIO_CONTINUE in 0x%lx from thread %d with errno: %d\n",
addr, tid, errno);
return r;
}
} else {
TEST_FAIL("Invalid uffd mode %d", uffd_mode);
} }
ts_diff = timespec_elapsed(start); ts_diff = timespec_elapsed(start);
PER_PAGE_DEBUG("UFFDIO_COPY %d \t%ld ns\n", tid, PER_PAGE_DEBUG("UFFD page-in %d \t%ld ns\n", tid,
timespec_to_ns(ts_diff)); timespec_to_ns(ts_diff));
PER_PAGE_DEBUG("Paged in %ld bytes at 0x%lx from thread %d\n", PER_PAGE_DEBUG("Paged in %ld bytes at 0x%lx from thread %d\n",
perf_test_args.host_page_size, addr, tid); demand_paging_size, addr, tid);
return 0; return 0;
} }
@ -108,6 +125,7 @@ static int handle_uffd_page_request(int uffd, uint64_t addr)
bool quit_uffd_thread; bool quit_uffd_thread;
struct uffd_handler_args { struct uffd_handler_args {
int uffd_mode;
int uffd; int uffd;
int pipefd; int pipefd;
useconds_t delay; useconds_t delay;
@ -169,7 +187,7 @@ static void *uffd_handler_thread_fn(void *arg)
if (r == -1) { if (r == -1) {
if (errno == EAGAIN) if (errno == EAGAIN)
continue; continue;
pr_info("Read of uffd gor errno %d", errno); pr_info("Read of uffd got errno %d\n", errno);
return NULL; return NULL;
} }
@ -184,7 +202,7 @@ static void *uffd_handler_thread_fn(void *arg)
if (delay) if (delay)
usleep(delay); usleep(delay);
addr = msg.arg.pagefault.address; addr = msg.arg.pagefault.address;
r = handle_uffd_page_request(uffd, addr); r = handle_uffd_page_request(uffd_args->uffd_mode, uffd, addr);
if (r < 0) if (r < 0)
return NULL; return NULL;
pages++; pages++;
@ -198,43 +216,53 @@ static void *uffd_handler_thread_fn(void *arg)
return NULL; return NULL;
} }
static int setup_demand_paging(struct kvm_vm *vm, static void setup_demand_paging(struct kvm_vm *vm,
pthread_t *uffd_handler_thread, int pipefd, pthread_t *uffd_handler_thread, int pipefd,
useconds_t uffd_delay, int uffd_mode, useconds_t uffd_delay,
struct uffd_handler_args *uffd_args, struct uffd_handler_args *uffd_args,
void *hva, uint64_t len) void *hva, void *alias, uint64_t len)
{ {
bool is_minor = (uffd_mode == UFFDIO_REGISTER_MODE_MINOR);
int uffd; int uffd;
struct uffdio_api uffdio_api; struct uffdio_api uffdio_api;
struct uffdio_register uffdio_register; struct uffdio_register uffdio_register;
uint64_t expected_ioctls = ((uint64_t) 1) << _UFFDIO_COPY;
PER_PAGE_DEBUG("Userfaultfd %s mode, faults resolved with %s\n",
is_minor ? "MINOR" : "MISSING",
is_minor ? "UFFDIO_CONINUE" : "UFFDIO_COPY");
/* In order to get minor faults, prefault via the alias. */
if (is_minor) {
size_t p;
expected_ioctls = ((uint64_t) 1) << _UFFDIO_CONTINUE;
TEST_ASSERT(alias != NULL, "Alias required for minor faults");
for (p = 0; p < (len / demand_paging_size); ++p) {
memcpy(alias + (p * demand_paging_size),
guest_data_prototype, demand_paging_size);
}
}
uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
if (uffd == -1) { TEST_ASSERT(uffd >= 0, "uffd creation failed, errno: %d", errno);
pr_info("uffd creation failed\n");
return -1;
}
uffdio_api.api = UFFD_API; uffdio_api.api = UFFD_API;
uffdio_api.features = 0; uffdio_api.features = 0;
if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1) { TEST_ASSERT(ioctl(uffd, UFFDIO_API, &uffdio_api) != -1,
pr_info("ioctl uffdio_api failed\n"); "ioctl UFFDIO_API failed: %" PRIu64,
return -1; (uint64_t)uffdio_api.api);
}
uffdio_register.range.start = (uint64_t)hva; uffdio_register.range.start = (uint64_t)hva;
uffdio_register.range.len = len; uffdio_register.range.len = len;
uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING; uffdio_register.mode = uffd_mode;
if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1) { TEST_ASSERT(ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) != -1,
pr_info("ioctl uffdio_register failed\n"); "ioctl UFFDIO_REGISTER failed");
return -1; TEST_ASSERT((uffdio_register.ioctls & expected_ioctls) ==
} expected_ioctls, "missing userfaultfd ioctls");
if ((uffdio_register.ioctls & UFFD_API_RANGE_IOCTLS) !=
UFFD_API_RANGE_IOCTLS) {
pr_info("unexpected userfaultfd ioctl set\n");
return -1;
}
uffd_args->uffd_mode = uffd_mode;
uffd_args->uffd = uffd; uffd_args->uffd = uffd;
uffd_args->pipefd = pipefd; uffd_args->pipefd = pipefd;
uffd_args->delay = uffd_delay; uffd_args->delay = uffd_delay;
@ -243,13 +271,12 @@ static int setup_demand_paging(struct kvm_vm *vm,
PER_VCPU_DEBUG("Created uffd thread for HVA range [%p, %p)\n", PER_VCPU_DEBUG("Created uffd thread for HVA range [%p, %p)\n",
hva, hva + len); hva, hva + len);
return 0;
} }
struct test_params { struct test_params {
bool use_uffd; int uffd_mode;
useconds_t uffd_delay; useconds_t uffd_delay;
enum vm_mem_backing_src_type src_type;
bool partition_vcpu_memory_access; bool partition_vcpu_memory_access;
}; };
@ -267,14 +294,16 @@ static void run_test(enum vm_guest_mode mode, void *arg)
int r; int r;
vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size,
VM_MEM_SRC_ANONYMOUS); p->src_type);
perf_test_args.wr_fract = 1; perf_test_args.wr_fract = 1;
guest_data_prototype = malloc(perf_test_args.host_page_size); demand_paging_size = get_backing_src_pagesz(p->src_type);
guest_data_prototype = malloc(demand_paging_size);
TEST_ASSERT(guest_data_prototype, TEST_ASSERT(guest_data_prototype,
"Failed to allocate buffer for guest data pattern"); "Failed to allocate buffer for guest data pattern");
memset(guest_data_prototype, 0xAB, perf_test_args.host_page_size); memset(guest_data_prototype, 0xAB, demand_paging_size);
vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads)); vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads));
TEST_ASSERT(vcpu_threads, "Memory allocation failed"); TEST_ASSERT(vcpu_threads, "Memory allocation failed");
@ -282,7 +311,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size, perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size,
p->partition_vcpu_memory_access); p->partition_vcpu_memory_access);
if (p->use_uffd) { if (p->uffd_mode) {
uffd_handler_threads = uffd_handler_threads =
malloc(nr_vcpus * sizeof(*uffd_handler_threads)); malloc(nr_vcpus * sizeof(*uffd_handler_threads));
TEST_ASSERT(uffd_handler_threads, "Memory allocation failed"); TEST_ASSERT(uffd_handler_threads, "Memory allocation failed");
@ -296,6 +325,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
vm_paddr_t vcpu_gpa; vm_paddr_t vcpu_gpa;
void *vcpu_hva; void *vcpu_hva;
void *vcpu_alias;
uint64_t vcpu_mem_size; uint64_t vcpu_mem_size;
@ -310,8 +340,9 @@ static void run_test(enum vm_guest_mode mode, void *arg)
PER_VCPU_DEBUG("Added VCPU %d with test mem gpa [%lx, %lx)\n", PER_VCPU_DEBUG("Added VCPU %d with test mem gpa [%lx, %lx)\n",
vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_mem_size); vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_mem_size);
/* Cache the HVA pointer of the region */ /* Cache the host addresses of the region */
vcpu_hva = addr_gpa2hva(vm, vcpu_gpa); vcpu_hva = addr_gpa2hva(vm, vcpu_gpa);
vcpu_alias = addr_gpa2alias(vm, vcpu_gpa);
/* /*
* Set up user fault fd to handle demand paging * Set up user fault fd to handle demand paging
@ -321,13 +352,11 @@ static void run_test(enum vm_guest_mode mode, void *arg)
O_CLOEXEC | O_NONBLOCK); O_CLOEXEC | O_NONBLOCK);
TEST_ASSERT(!r, "Failed to set up pipefd"); TEST_ASSERT(!r, "Failed to set up pipefd");
r = setup_demand_paging(vm, setup_demand_paging(vm, &uffd_handler_threads[vcpu_id],
&uffd_handler_threads[vcpu_id], pipefds[vcpu_id * 2], p->uffd_mode,
pipefds[vcpu_id * 2], p->uffd_delay, &uffd_args[vcpu_id],
p->uffd_delay, &uffd_args[vcpu_id], vcpu_hva, vcpu_alias,
vcpu_hva, vcpu_mem_size); vcpu_mem_size);
if (r < 0)
exit(-r);
} }
} }
@ -355,7 +384,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
pr_info("All vCPU threads joined\n"); pr_info("All vCPU threads joined\n");
if (p->use_uffd) { if (p->uffd_mode) {
char c; char c;
/* Tell the user fault fd handler threads to quit */ /* Tell the user fault fd handler threads to quit */
@ -377,7 +406,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
free(guest_data_prototype); free(guest_data_prototype);
free(vcpu_threads); free(vcpu_threads);
if (p->use_uffd) { if (p->uffd_mode) {
free(uffd_handler_threads); free(uffd_handler_threads);
free(uffd_args); free(uffd_args);
free(pipefds); free(pipefds);
@ -387,17 +416,19 @@ static void run_test(enum vm_guest_mode mode, void *arg)
static void help(char *name) static void help(char *name)
{ {
puts(""); puts("");
printf("usage: %s [-h] [-m mode] [-u] [-d uffd_delay_usec]\n" printf("usage: %s [-h] [-m vm_mode] [-u uffd_mode] [-d uffd_delay_usec]\n"
" [-b memory] [-v vcpus] [-o]\n", name); " [-b memory] [-t type] [-v vcpus] [-o]\n", name);
guest_modes_help(); guest_modes_help();
printf(" -u: use User Fault FD to handle vCPU page\n" printf(" -u: use userfaultfd to handle vCPU page faults. Mode is a\n"
" faults.\n"); " UFFD registration mode: 'MISSING' or 'MINOR'.\n");
printf(" -d: add a delay in usec to the User Fault\n" printf(" -d: add a delay in usec to the User Fault\n"
" FD handler to simulate demand paging\n" " FD handler to simulate demand paging\n"
" overheads. Ignored without -u.\n"); " overheads. Ignored without -u.\n");
printf(" -b: specify the size of the memory region which should be\n" printf(" -b: specify the size of the memory region which should be\n"
" demand paged by each vCPU. e.g. 10M or 3G.\n" " demand paged by each vCPU. e.g. 10M or 3G.\n"
" Default: 1G\n"); " Default: 1G\n");
printf(" -t: The type of backing memory to use. Default: anonymous\n");
backing_src_help();
printf(" -v: specify the number of vCPUs to run.\n"); printf(" -v: specify the number of vCPUs to run.\n");
printf(" -o: Overlap guest memory accesses instead of partitioning\n" printf(" -o: Overlap guest memory accesses instead of partitioning\n"
" them into a separate region of memory for each vCPU.\n"); " them into a separate region of memory for each vCPU.\n");
@ -409,19 +440,24 @@ int main(int argc, char *argv[])
{ {
int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS); int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
struct test_params p = { struct test_params p = {
.src_type = VM_MEM_SRC_ANONYMOUS,
.partition_vcpu_memory_access = true, .partition_vcpu_memory_access = true,
}; };
int opt; int opt;
guest_modes_append_default(); guest_modes_append_default();
while ((opt = getopt(argc, argv, "hm:ud:b:v:o")) != -1) { while ((opt = getopt(argc, argv, "hm:u:d:b:t:v:o")) != -1) {
switch (opt) { switch (opt) {
case 'm': case 'm':
guest_modes_cmdline(optarg); guest_modes_cmdline(optarg);
break; break;
case 'u': case 'u':
p.use_uffd = true; if (!strcmp("MISSING", optarg))
p.uffd_mode = UFFDIO_REGISTER_MODE_MISSING;
else if (!strcmp("MINOR", optarg))
p.uffd_mode = UFFDIO_REGISTER_MODE_MINOR;
TEST_ASSERT(p.uffd_mode, "UFFD mode must be 'MISSING' or 'MINOR'.");
break; break;
case 'd': case 'd':
p.uffd_delay = strtoul(optarg, NULL, 0); p.uffd_delay = strtoul(optarg, NULL, 0);
@ -430,6 +466,9 @@ int main(int argc, char *argv[])
case 'b': case 'b':
guest_percpu_mem_size = parse_size(optarg); guest_percpu_mem_size = parse_size(optarg);
break; break;
case 't':
p.src_type = parse_backing_src_type(optarg);
break;
case 'v': case 'v':
nr_vcpus = atoi(optarg); nr_vcpus = atoi(optarg);
TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus, TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus,
@ -445,6 +484,11 @@ int main(int argc, char *argv[])
} }
} }
if (p.uffd_mode == UFFDIO_REGISTER_MODE_MINOR &&
!backing_src_is_shared(p.src_type)) {
TEST_FAIL("userfaultfd MINOR mode requires shared memory; pick a different -t");
}
for_each_guest_mode(run_test, &p); for_each_guest_mode(run_test, &p);
return 0; return 0;

View file

@ -132,6 +132,36 @@ static void run_test(uint32_t run)
TEST_ASSERT(false, "%s: [%d] child escaped the ninja\n", __func__, run); TEST_ASSERT(false, "%s: [%d] child escaped the ninja\n", __func__, run);
} }
void wait_for_child_setup(pid_t pid)
{
/*
* Wait for the child to post to the semaphore, but wake up periodically
* to check if the child exited prematurely.
*/
for (;;) {
const struct timespec wait_period = { .tv_sec = 1 };
int status;
if (!sem_timedwait(sem, &wait_period))
return;
/* Child is still running, keep waiting. */
if (pid != waitpid(pid, &status, WNOHANG))
continue;
/*
* Child is no longer running, which is not expected.
*
* If it exited with a non-zero status, we explicitly forward
* the child's status in case it exited with KSFT_SKIP.
*/
if (WIFEXITED(status))
exit(WEXITSTATUS(status));
else
TEST_ASSERT(false, "Child exited unexpectedly");
}
}
int main(int argc, char **argv) int main(int argc, char **argv)
{ {
uint32_t i; uint32_t i;
@ -148,7 +178,7 @@ int main(int argc, char **argv)
run_test(i); /* This function always exits */ run_test(i); /* This function always exits */
pr_debug("%s: [%d] waiting semaphore\n", __func__, i); pr_debug("%s: [%d] waiting semaphore\n", __func__, i);
sem_wait(sem); wait_for_child_setup(pid);
r = (rand() % DELAY_US_MAX) + 1; r = (rand() % DELAY_US_MAX) + 1;
pr_debug("%s: [%d] waiting %dus\n", __func__, i, r); pr_debug("%s: [%d] waiting %dus\n", __func__, i, r);
usleep(r); usleep(r);

View file

@ -77,6 +77,7 @@ struct vm_guest_mode_params {
}; };
extern const struct vm_guest_mode_params vm_guest_mode_params[]; extern const struct vm_guest_mode_params vm_guest_mode_params[];
int open_kvm_dev_path_or_exit(void);
int kvm_check_cap(long cap); int kvm_check_cap(long cap);
int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap); int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap);
int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id, int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id,
@ -146,6 +147,7 @@ void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa); void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa);
void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva); void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva);
vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva); vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva);
void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa);
/* /*
* Address Guest Virtual to Guest Physical * Address Guest Virtual to Guest Physical
@ -302,7 +304,7 @@ bool vm_is_unrestricted_guest(struct kvm_vm *vm);
unsigned int vm_get_page_size(struct kvm_vm *vm); unsigned int vm_get_page_size(struct kvm_vm *vm);
unsigned int vm_get_page_shift(struct kvm_vm *vm); unsigned int vm_get_page_shift(struct kvm_vm *vm);
unsigned int vm_get_max_gfn(struct kvm_vm *vm); uint64_t vm_get_max_gfn(struct kvm_vm *vm);
int vm_get_fd(struct kvm_vm *vm); int vm_get_fd(struct kvm_vm *vm);
unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size); unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size);

View file

@ -17,6 +17,7 @@
#include <errno.h> #include <errno.h>
#include <unistd.h> #include <unistd.h>
#include <fcntl.h> #include <fcntl.h>
#include <sys/mman.h>
#include "kselftest.h" #include "kselftest.h"
static inline int _no_printf(const char *format, ...) { return 0; } static inline int _no_printf(const char *format, ...) { return 0; }
@ -84,6 +85,8 @@ enum vm_mem_backing_src_type {
VM_MEM_SRC_ANONYMOUS_HUGETLB_1GB, VM_MEM_SRC_ANONYMOUS_HUGETLB_1GB,
VM_MEM_SRC_ANONYMOUS_HUGETLB_2GB, VM_MEM_SRC_ANONYMOUS_HUGETLB_2GB,
VM_MEM_SRC_ANONYMOUS_HUGETLB_16GB, VM_MEM_SRC_ANONYMOUS_HUGETLB_16GB,
VM_MEM_SRC_SHMEM,
VM_MEM_SRC_SHARED_HUGETLB,
NUM_SRC_TYPES, NUM_SRC_TYPES,
}; };
@ -100,4 +103,13 @@ size_t get_backing_src_pagesz(uint32_t i);
void backing_src_help(void); void backing_src_help(void);
enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name); enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name);
/*
* Whether or not the given source type is shared memory (as opposed to
* anonymous).
*/
static inline bool backing_src_is_shared(enum vm_mem_backing_src_type t)
{
return vm_mem_backing_src_alias(t)->flag & MAP_SHARED;
}
#endif /* SELFTEST_KVM_TEST_UTIL_H */ #endif /* SELFTEST_KVM_TEST_UTIL_H */

View file

@ -31,6 +31,34 @@ static void *align(void *x, size_t size)
return (void *) (((size_t) x + mask) & ~mask); return (void *) (((size_t) x + mask) & ~mask);
} }
/*
* Open KVM_DEV_PATH if available, otherwise exit the entire program.
*
* Input Args:
* flags - The flags to pass when opening KVM_DEV_PATH.
*
* Return:
* The opened file descriptor of /dev/kvm.
*/
static int _open_kvm_dev_path_or_exit(int flags)
{
int fd;
fd = open(KVM_DEV_PATH, flags);
if (fd < 0) {
print_skip("%s not available, is KVM loaded? (errno: %d)",
KVM_DEV_PATH, errno);
exit(KSFT_SKIP);
}
return fd;
}
int open_kvm_dev_path_or_exit(void)
{
return _open_kvm_dev_path_or_exit(O_RDONLY);
}
/* /*
* Capability * Capability
* *
@ -52,10 +80,7 @@ int kvm_check_cap(long cap)
int ret; int ret;
int kvm_fd; int kvm_fd;
kvm_fd = open(KVM_DEV_PATH, O_RDONLY); kvm_fd = open_kvm_dev_path_or_exit();
if (kvm_fd < 0)
exit(KSFT_SKIP);
ret = ioctl(kvm_fd, KVM_CHECK_EXTENSION, cap); ret = ioctl(kvm_fd, KVM_CHECK_EXTENSION, cap);
TEST_ASSERT(ret != -1, "KVM_CHECK_EXTENSION IOCTL failed,\n" TEST_ASSERT(ret != -1, "KVM_CHECK_EXTENSION IOCTL failed,\n"
" rc: %i errno: %i", ret, errno); " rc: %i errno: %i", ret, errno);
@ -128,9 +153,7 @@ void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size)
static void vm_open(struct kvm_vm *vm, int perm) static void vm_open(struct kvm_vm *vm, int perm)
{ {
vm->kvm_fd = open(KVM_DEV_PATH, perm); vm->kvm_fd = _open_kvm_dev_path_or_exit(perm);
if (vm->kvm_fd < 0)
exit(KSFT_SKIP);
if (!kvm_check_cap(KVM_CAP_IMMEDIATE_EXIT)) { if (!kvm_check_cap(KVM_CAP_IMMEDIATE_EXIT)) {
print_skip("immediate_exit not available"); print_skip("immediate_exit not available");
@ -203,7 +226,9 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
TEST_ASSERT(vm != NULL, "Insufficient Memory"); TEST_ASSERT(vm != NULL, "Insufficient Memory");
INIT_LIST_HEAD(&vm->vcpus); INIT_LIST_HEAD(&vm->vcpus);
INIT_LIST_HEAD(&vm->userspace_mem_regions); vm->regions.gpa_tree = RB_ROOT;
vm->regions.hva_tree = RB_ROOT;
hash_init(vm->regions.slot_hash);
vm->mode = mode; vm->mode = mode;
vm->type = 0; vm->type = 0;
@ -295,7 +320,7 @@ struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus,
*/ */
uint64_t vcpu_pages = (DEFAULT_STACK_PGS + num_percpu_pages) * nr_vcpus; uint64_t vcpu_pages = (DEFAULT_STACK_PGS + num_percpu_pages) * nr_vcpus;
uint64_t extra_pg_pages = (extra_mem_pages + vcpu_pages) / PTES_PER_MIN_PAGE * 2; uint64_t extra_pg_pages = (extra_mem_pages + vcpu_pages) / PTES_PER_MIN_PAGE * 2;
uint64_t pages = DEFAULT_GUEST_PHY_PAGES + vcpu_pages + extra_pg_pages; uint64_t pages = DEFAULT_GUEST_PHY_PAGES + extra_mem_pages + vcpu_pages + extra_pg_pages;
struct kvm_vm *vm; struct kvm_vm *vm;
int i; int i;
@ -355,13 +380,14 @@ struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
*/ */
void kvm_vm_restart(struct kvm_vm *vmp, int perm) void kvm_vm_restart(struct kvm_vm *vmp, int perm)
{ {
int ctr;
struct userspace_mem_region *region; struct userspace_mem_region *region;
vm_open(vmp, perm); vm_open(vmp, perm);
if (vmp->has_irqchip) if (vmp->has_irqchip)
vm_create_irqchip(vmp); vm_create_irqchip(vmp);
list_for_each_entry(region, &vmp->userspace_mem_regions, list) { hash_for_each(vmp->regions.slot_hash, ctr, region, slot_node) {
int ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION, &region->region); int ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
" rc: %i errno: %i\n" " rc: %i errno: %i\n"
@ -424,14 +450,21 @@ uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm)
static struct userspace_mem_region * static struct userspace_mem_region *
userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end) userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end)
{ {
struct userspace_mem_region *region; struct rb_node *node;
list_for_each_entry(region, &vm->userspace_mem_regions, list) { for (node = vm->regions.gpa_tree.rb_node; node; ) {
struct userspace_mem_region *region =
container_of(node, struct userspace_mem_region, gpa_node);
uint64_t existing_start = region->region.guest_phys_addr; uint64_t existing_start = region->region.guest_phys_addr;
uint64_t existing_end = region->region.guest_phys_addr uint64_t existing_end = region->region.guest_phys_addr
+ region->region.memory_size - 1; + region->region.memory_size - 1;
if (start <= existing_end && end >= existing_start) if (start <= existing_end && end >= existing_start)
return region; return region;
if (start < existing_start)
node = node->rb_left;
else
node = node->rb_right;
} }
return NULL; return NULL;
@ -546,11 +579,16 @@ void kvm_vm_release(struct kvm_vm *vmp)
} }
static void __vm_mem_region_delete(struct kvm_vm *vm, static void __vm_mem_region_delete(struct kvm_vm *vm,
struct userspace_mem_region *region) struct userspace_mem_region *region,
bool unlink)
{ {
int ret; int ret;
list_del(&region->list); if (unlink) {
rb_erase(&region->gpa_node, &vm->regions.gpa_tree);
rb_erase(&region->hva_node, &vm->regions.hva_tree);
hash_del(&region->slot_node);
}
region->region.memory_size = 0; region->region.memory_size = 0;
ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region); ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
@ -569,14 +607,16 @@ static void __vm_mem_region_delete(struct kvm_vm *vm,
*/ */
void kvm_vm_free(struct kvm_vm *vmp) void kvm_vm_free(struct kvm_vm *vmp)
{ {
struct userspace_mem_region *region, *tmp; int ctr;
struct hlist_node *node;
struct userspace_mem_region *region;
if (vmp == NULL) if (vmp == NULL)
return; return;
/* Free userspace_mem_regions. */ /* Free userspace_mem_regions. */
list_for_each_entry_safe(region, tmp, &vmp->userspace_mem_regions, list) hash_for_each_safe(vmp->regions.slot_hash, ctr, node, region, slot_node)
__vm_mem_region_delete(vmp, region); __vm_mem_region_delete(vmp, region, false);
/* Free sparsebit arrays. */ /* Free sparsebit arrays. */
sparsebit_free(&vmp->vpages_valid); sparsebit_free(&vmp->vpages_valid);
@ -658,13 +698,64 @@ int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, vm_vaddr_t gva, size_t len)
return 0; return 0;
} }
static void vm_userspace_mem_region_gpa_insert(struct rb_root *gpa_tree,
struct userspace_mem_region *region)
{
struct rb_node **cur, *parent;
for (cur = &gpa_tree->rb_node, parent = NULL; *cur; ) {
struct userspace_mem_region *cregion;
cregion = container_of(*cur, typeof(*cregion), gpa_node);
parent = *cur;
if (region->region.guest_phys_addr <
cregion->region.guest_phys_addr)
cur = &(*cur)->rb_left;
else {
TEST_ASSERT(region->region.guest_phys_addr !=
cregion->region.guest_phys_addr,
"Duplicate GPA in region tree");
cur = &(*cur)->rb_right;
}
}
rb_link_node(&region->gpa_node, parent, cur);
rb_insert_color(&region->gpa_node, gpa_tree);
}
static void vm_userspace_mem_region_hva_insert(struct rb_root *hva_tree,
struct userspace_mem_region *region)
{
struct rb_node **cur, *parent;
for (cur = &hva_tree->rb_node, parent = NULL; *cur; ) {
struct userspace_mem_region *cregion;
cregion = container_of(*cur, typeof(*cregion), hva_node);
parent = *cur;
if (region->host_mem < cregion->host_mem)
cur = &(*cur)->rb_left;
else {
TEST_ASSERT(region->host_mem !=
cregion->host_mem,
"Duplicate HVA in region tree");
cur = &(*cur)->rb_right;
}
}
rb_link_node(&region->hva_node, parent, cur);
rb_insert_color(&region->hva_node, hva_tree);
}
/* /*
* VM Userspace Memory Region Add * VM Userspace Memory Region Add
* *
* Input Args: * Input Args:
* vm - Virtual Machine * vm - Virtual Machine
* backing_src - Storage source for this region. * src_type - Storage source for this region.
* NULL to use anonymous memory. * NULL to use anonymous memory.
* guest_paddr - Starting guest physical address * guest_paddr - Starting guest physical address
* slot - KVM region slot * slot - KVM region slot
* npages - Number of physical pages * npages - Number of physical pages
@ -722,7 +813,8 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
(uint64_t) region->region.memory_size); (uint64_t) region->region.memory_size);
/* Confirm no region with the requested slot already exists. */ /* Confirm no region with the requested slot already exists. */
list_for_each_entry(region, &vm->userspace_mem_regions, list) { hash_for_each_possible(vm->regions.slot_hash, region, slot_node,
slot) {
if (region->region.slot != slot) if (region->region.slot != slot)
continue; continue;
@ -755,11 +847,30 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
if (alignment > 1) if (alignment > 1)
region->mmap_size += alignment; region->mmap_size += alignment;
region->fd = -1;
if (backing_src_is_shared(src_type)) {
int memfd_flags = MFD_CLOEXEC;
if (src_type == VM_MEM_SRC_SHARED_HUGETLB)
memfd_flags |= MFD_HUGETLB;
region->fd = memfd_create("kvm_selftest", memfd_flags);
TEST_ASSERT(region->fd != -1,
"memfd_create failed, errno: %i", errno);
ret = ftruncate(region->fd, region->mmap_size);
TEST_ASSERT(ret == 0, "ftruncate failed, errno: %i", errno);
ret = fallocate(region->fd,
FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0,
region->mmap_size);
TEST_ASSERT(ret == 0, "fallocate failed, errno: %i", errno);
}
region->mmap_start = mmap(NULL, region->mmap_size, region->mmap_start = mmap(NULL, region->mmap_size,
PROT_READ | PROT_WRITE, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS vm_mem_backing_src_alias(src_type)->flag,
| vm_mem_backing_src_alias(src_type)->flag, region->fd, 0);
-1, 0);
TEST_ASSERT(region->mmap_start != MAP_FAILED, TEST_ASSERT(region->mmap_start != MAP_FAILED,
"test_malloc failed, mmap_start: %p errno: %i", "test_malloc failed, mmap_start: %p errno: %i",
region->mmap_start, errno); region->mmap_start, errno);
@ -793,8 +904,23 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
ret, errno, slot, flags, ret, errno, slot, flags,
guest_paddr, (uint64_t) region->region.memory_size); guest_paddr, (uint64_t) region->region.memory_size);
/* Add to linked-list of memory regions. */ /* Add to quick lookup data structures */
list_add(&region->list, &vm->userspace_mem_regions); vm_userspace_mem_region_gpa_insert(&vm->regions.gpa_tree, region);
vm_userspace_mem_region_hva_insert(&vm->regions.hva_tree, region);
hash_add(vm->regions.slot_hash, &region->slot_node, slot);
/* If shared memory, create an alias. */
if (region->fd >= 0) {
region->mmap_alias = mmap(NULL, region->mmap_size,
PROT_READ | PROT_WRITE,
vm_mem_backing_src_alias(src_type)->flag,
region->fd, 0);
TEST_ASSERT(region->mmap_alias != MAP_FAILED,
"mmap of alias failed, errno: %i", errno);
/* Align host alias address */
region->host_alias = align(region->mmap_alias, alignment);
}
} }
/* /*
@ -817,10 +943,10 @@ memslot2region(struct kvm_vm *vm, uint32_t memslot)
{ {
struct userspace_mem_region *region; struct userspace_mem_region *region;
list_for_each_entry(region, &vm->userspace_mem_regions, list) { hash_for_each_possible(vm->regions.slot_hash, region, slot_node,
memslot)
if (region->region.slot == memslot) if (region->region.slot == memslot)
return region; return region;
}
fprintf(stderr, "No mem region with the requested slot found,\n" fprintf(stderr, "No mem region with the requested slot found,\n"
" requested slot: %u\n", memslot); " requested slot: %u\n", memslot);
@ -905,7 +1031,7 @@ void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa)
*/ */
void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot) void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot)
{ {
__vm_mem_region_delete(vm, memslot2region(vm, slot)); __vm_mem_region_delete(vm, memslot2region(vm, slot), true);
} }
/* /*
@ -925,9 +1051,7 @@ static int vcpu_mmap_sz(void)
{ {
int dev_fd, ret; int dev_fd, ret;
dev_fd = open(KVM_DEV_PATH, O_RDONLY); dev_fd = open_kvm_dev_path_or_exit();
if (dev_fd < 0)
exit(KSFT_SKIP);
ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL); ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL);
TEST_ASSERT(ret >= sizeof(struct kvm_run), TEST_ASSERT(ret >= sizeof(struct kvm_run),
@ -1099,6 +1223,9 @@ vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0); uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0);
virt_pgd_alloc(vm, pgd_memslot); virt_pgd_alloc(vm, pgd_memslot);
vm_paddr_t paddr = vm_phy_pages_alloc(vm, pages,
KVM_UTIL_MIN_PFN * vm->page_size,
data_memslot);
/* /*
* Find an unused range of virtual page addresses of at least * Find an unused range of virtual page addresses of at least
@ -1108,11 +1235,7 @@ vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
/* Map the virtual pages. */ /* Map the virtual pages. */
for (vm_vaddr_t vaddr = vaddr_start; pages > 0; for (vm_vaddr_t vaddr = vaddr_start; pages > 0;
pages--, vaddr += vm->page_size) { pages--, vaddr += vm->page_size, paddr += vm->page_size) {
vm_paddr_t paddr;
paddr = vm_phy_page_alloc(vm,
KVM_UTIL_MIN_PFN * vm->page_size, data_memslot);
virt_pg_map(vm, vaddr, paddr, pgd_memslot); virt_pg_map(vm, vaddr, paddr, pgd_memslot);
@ -1177,16 +1300,14 @@ void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa)
{ {
struct userspace_mem_region *region; struct userspace_mem_region *region;
list_for_each_entry(region, &vm->userspace_mem_regions, list) { region = userspace_mem_region_find(vm, gpa, gpa);
if ((gpa >= region->region.guest_phys_addr) if (!region) {
&& (gpa <= (region->region.guest_phys_addr TEST_FAIL("No vm physical memory at 0x%lx", gpa);
+ region->region.memory_size - 1))) return NULL;
return (void *) ((uintptr_t) region->host_mem
+ (gpa - region->region.guest_phys_addr));
} }
TEST_FAIL("No vm physical memory at 0x%lx", gpa); return (void *)((uintptr_t)region->host_mem
return NULL; + (gpa - region->region.guest_phys_addr));
} }
/* /*
@ -1208,21 +1329,64 @@ void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa)
*/ */
vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva) vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva)
{ {
struct userspace_mem_region *region; struct rb_node *node;
list_for_each_entry(region, &vm->userspace_mem_regions, list) { for (node = vm->regions.hva_tree.rb_node; node; ) {
if ((hva >= region->host_mem) struct userspace_mem_region *region =
&& (hva <= (region->host_mem container_of(node, struct userspace_mem_region, hva_node);
+ region->region.memory_size - 1)))
return (vm_paddr_t) ((uintptr_t) if (hva >= region->host_mem) {
region->region.guest_phys_addr if (hva <= (region->host_mem
+ (hva - (uintptr_t) region->host_mem)); + region->region.memory_size - 1))
return (vm_paddr_t)((uintptr_t)
region->region.guest_phys_addr
+ (hva - (uintptr_t)region->host_mem));
node = node->rb_right;
} else
node = node->rb_left;
} }
TEST_FAIL("No mapping to a guest physical address, hva: %p", hva); TEST_FAIL("No mapping to a guest physical address, hva: %p", hva);
return -1; return -1;
} }
/*
* Address VM physical to Host Virtual *alias*.
*
* Input Args:
* vm - Virtual Machine
* gpa - VM physical address
*
* Output Args: None
*
* Return:
* Equivalent address within the host virtual *alias* area, or NULL
* (without failing the test) if the guest memory is not shared (so
* no alias exists).
*
* When vm_create() and related functions are called with a shared memory
* src_type, we also create a writable, shared alias mapping of the
* underlying guest memory. This allows the host to manipulate guest memory
* without mapping that memory in the guest's address space. And, for
* userfaultfd-based demand paging, we can do so without triggering userfaults.
*/
void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa)
{
struct userspace_mem_region *region;
uintptr_t offset;
region = userspace_mem_region_find(vm, gpa, gpa);
if (!region)
return NULL;
if (!region->host_alias)
return NULL;
offset = gpa - region->region.guest_phys_addr;
return (void *) ((uintptr_t) region->host_alias + offset);
}
/* /*
* VM Create IRQ Chip * VM Create IRQ Chip
* *
@ -1822,6 +1986,7 @@ int kvm_device_access(int dev_fd, uint32_t group, uint64_t attr,
*/ */
void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
{ {
int ctr;
struct userspace_mem_region *region; struct userspace_mem_region *region;
struct vcpu *vcpu; struct vcpu *vcpu;
@ -1829,7 +1994,7 @@ void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
fprintf(stream, "%*sfd: %i\n", indent, "", vm->fd); fprintf(stream, "%*sfd: %i\n", indent, "", vm->fd);
fprintf(stream, "%*spage_size: 0x%x\n", indent, "", vm->page_size); fprintf(stream, "%*spage_size: 0x%x\n", indent, "", vm->page_size);
fprintf(stream, "%*sMem Regions:\n", indent, ""); fprintf(stream, "%*sMem Regions:\n", indent, "");
list_for_each_entry(region, &vm->userspace_mem_regions, list) { hash_for_each(vm->regions.slot_hash, ctr, region, slot_node) {
fprintf(stream, "%*sguest_phys: 0x%lx size: 0x%lx " fprintf(stream, "%*sguest_phys: 0x%lx size: 0x%lx "
"host_virt: %p\n", indent + 2, "", "host_virt: %p\n", indent + 2, "",
(uint64_t) region->region.guest_phys_addr, (uint64_t) region->region.guest_phys_addr,
@ -2015,10 +2180,7 @@ bool vm_is_unrestricted_guest(struct kvm_vm *vm)
if (vm == NULL) { if (vm == NULL) {
/* Ensure that the KVM vendor-specific module is loaded. */ /* Ensure that the KVM vendor-specific module is loaded. */
f = fopen(KVM_DEV_PATH, "r"); close(open_kvm_dev_path_or_exit());
TEST_ASSERT(f != NULL, "Error in opening KVM dev file: %d",
errno);
fclose(f);
} }
f = fopen("/sys/module/kvm_intel/parameters/unrestricted_guest", "r"); f = fopen("/sys/module/kvm_intel/parameters/unrestricted_guest", "r");
@ -2041,7 +2203,7 @@ unsigned int vm_get_page_shift(struct kvm_vm *vm)
return vm->page_shift; return vm->page_shift;
} }
unsigned int vm_get_max_gfn(struct kvm_vm *vm) uint64_t vm_get_max_gfn(struct kvm_vm *vm)
{ {
return vm->max_gfn; return vm->max_gfn;
} }

View file

@ -8,6 +8,9 @@
#ifndef SELFTEST_KVM_UTIL_INTERNAL_H #ifndef SELFTEST_KVM_UTIL_INTERNAL_H
#define SELFTEST_KVM_UTIL_INTERNAL_H #define SELFTEST_KVM_UTIL_INTERNAL_H
#include "linux/hashtable.h"
#include "linux/rbtree.h"
#include "sparsebit.h" #include "sparsebit.h"
struct userspace_mem_region { struct userspace_mem_region {
@ -16,9 +19,13 @@ struct userspace_mem_region {
int fd; int fd;
off_t offset; off_t offset;
void *host_mem; void *host_mem;
void *host_alias;
void *mmap_start; void *mmap_start;
void *mmap_alias;
size_t mmap_size; size_t mmap_size;
struct list_head list; struct rb_node gpa_node;
struct rb_node hva_node;
struct hlist_node slot_node;
}; };
struct vcpu { struct vcpu {
@ -31,6 +38,12 @@ struct vcpu {
uint32_t dirty_gfns_count; uint32_t dirty_gfns_count;
}; };
struct userspace_mem_regions {
struct rb_root gpa_tree;
struct rb_root hva_tree;
DECLARE_HASHTABLE(slot_hash, 9);
};
struct kvm_vm { struct kvm_vm {
int mode; int mode;
unsigned long type; unsigned long type;
@ -43,7 +56,7 @@ struct kvm_vm {
unsigned int va_bits; unsigned int va_bits;
uint64_t max_gfn; uint64_t max_gfn;
struct list_head vcpus; struct list_head vcpus;
struct list_head userspace_mem_regions; struct userspace_mem_regions regions;
struct sparsebit *vpages_valid; struct sparsebit *vpages_valid;
struct sparsebit *vpages_mapped; struct sparsebit *vpages_mapped;
bool has_irqchip; bool has_irqchip;

View file

@ -2,6 +2,7 @@
/* /*
* Copyright (C) 2020, Google LLC. * Copyright (C) 2020, Google LLC.
*/ */
#include <inttypes.h>
#include "kvm_util.h" #include "kvm_util.h"
#include "perf_test_util.h" #include "perf_test_util.h"
@ -80,7 +81,8 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
*/ */
TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm), TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm),
"Requested more guest memory than address space allows.\n" "Requested more guest memory than address space allows.\n"
" guest pages: %lx max gfn: %x vcpus: %d wss: %lx]\n", " guest pages: %" PRIx64 " max gfn: %" PRIx64
" vcpus: %d wss: %" PRIx64 "]\n",
guest_num_pages, vm_get_max_gfn(vm), vcpus, guest_num_pages, vm_get_max_gfn(vm), vcpus,
vcpu_memory_bytes); vcpu_memory_bytes);

View file

@ -0,0 +1 @@
#include "../../../../lib/rbtree.c"

View file

@ -168,70 +168,87 @@ size_t get_def_hugetlb_pagesz(void)
const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(uint32_t i) const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(uint32_t i)
{ {
static const int anon_flags = MAP_PRIVATE | MAP_ANONYMOUS;
static const int anon_huge_flags = anon_flags | MAP_HUGETLB;
static const struct vm_mem_backing_src_alias aliases[] = { static const struct vm_mem_backing_src_alias aliases[] = {
[VM_MEM_SRC_ANONYMOUS] = { [VM_MEM_SRC_ANONYMOUS] = {
.name = "anonymous", .name = "anonymous",
.flag = 0, .flag = anon_flags,
}, },
[VM_MEM_SRC_ANONYMOUS_THP] = { [VM_MEM_SRC_ANONYMOUS_THP] = {
.name = "anonymous_thp", .name = "anonymous_thp",
.flag = 0, .flag = anon_flags,
}, },
[VM_MEM_SRC_ANONYMOUS_HUGETLB] = { [VM_MEM_SRC_ANONYMOUS_HUGETLB] = {
.name = "anonymous_hugetlb", .name = "anonymous_hugetlb",
.flag = MAP_HUGETLB, .flag = anon_huge_flags,
}, },
[VM_MEM_SRC_ANONYMOUS_HUGETLB_16KB] = { [VM_MEM_SRC_ANONYMOUS_HUGETLB_16KB] = {
.name = "anonymous_hugetlb_16kb", .name = "anonymous_hugetlb_16kb",
.flag = MAP_HUGETLB | MAP_HUGE_16KB, .flag = anon_huge_flags | MAP_HUGE_16KB,
}, },
[VM_MEM_SRC_ANONYMOUS_HUGETLB_64KB] = { [VM_MEM_SRC_ANONYMOUS_HUGETLB_64KB] = {
.name = "anonymous_hugetlb_64kb", .name = "anonymous_hugetlb_64kb",
.flag = MAP_HUGETLB | MAP_HUGE_64KB, .flag = anon_huge_flags | MAP_HUGE_64KB,
}, },
[VM_MEM_SRC_ANONYMOUS_HUGETLB_512KB] = { [VM_MEM_SRC_ANONYMOUS_HUGETLB_512KB] = {
.name = "anonymous_hugetlb_512kb", .name = "anonymous_hugetlb_512kb",
.flag = MAP_HUGETLB | MAP_HUGE_512KB, .flag = anon_huge_flags | MAP_HUGE_512KB,
}, },
[VM_MEM_SRC_ANONYMOUS_HUGETLB_1MB] = { [VM_MEM_SRC_ANONYMOUS_HUGETLB_1MB] = {
.name = "anonymous_hugetlb_1mb", .name = "anonymous_hugetlb_1mb",
.flag = MAP_HUGETLB | MAP_HUGE_1MB, .flag = anon_huge_flags | MAP_HUGE_1MB,
}, },
[VM_MEM_SRC_ANONYMOUS_HUGETLB_2MB] = { [VM_MEM_SRC_ANONYMOUS_HUGETLB_2MB] = {
.name = "anonymous_hugetlb_2mb", .name = "anonymous_hugetlb_2mb",
.flag = MAP_HUGETLB | MAP_HUGE_2MB, .flag = anon_huge_flags | MAP_HUGE_2MB,
}, },
[VM_MEM_SRC_ANONYMOUS_HUGETLB_8MB] = { [VM_MEM_SRC_ANONYMOUS_HUGETLB_8MB] = {
.name = "anonymous_hugetlb_8mb", .name = "anonymous_hugetlb_8mb",
.flag = MAP_HUGETLB | MAP_HUGE_8MB, .flag = anon_huge_flags | MAP_HUGE_8MB,
}, },
[VM_MEM_SRC_ANONYMOUS_HUGETLB_16MB] = { [VM_MEM_SRC_ANONYMOUS_HUGETLB_16MB] = {
.name = "anonymous_hugetlb_16mb", .name = "anonymous_hugetlb_16mb",
.flag = MAP_HUGETLB | MAP_HUGE_16MB, .flag = anon_huge_flags | MAP_HUGE_16MB,
}, },
[VM_MEM_SRC_ANONYMOUS_HUGETLB_32MB] = { [VM_MEM_SRC_ANONYMOUS_HUGETLB_32MB] = {
.name = "anonymous_hugetlb_32mb", .name = "anonymous_hugetlb_32mb",
.flag = MAP_HUGETLB | MAP_HUGE_32MB, .flag = anon_huge_flags | MAP_HUGE_32MB,
}, },
[VM_MEM_SRC_ANONYMOUS_HUGETLB_256MB] = { [VM_MEM_SRC_ANONYMOUS_HUGETLB_256MB] = {
.name = "anonymous_hugetlb_256mb", .name = "anonymous_hugetlb_256mb",
.flag = MAP_HUGETLB | MAP_HUGE_256MB, .flag = anon_huge_flags | MAP_HUGE_256MB,
}, },
[VM_MEM_SRC_ANONYMOUS_HUGETLB_512MB] = { [VM_MEM_SRC_ANONYMOUS_HUGETLB_512MB] = {
.name = "anonymous_hugetlb_512mb", .name = "anonymous_hugetlb_512mb",
.flag = MAP_HUGETLB | MAP_HUGE_512MB, .flag = anon_huge_flags | MAP_HUGE_512MB,
}, },
[VM_MEM_SRC_ANONYMOUS_HUGETLB_1GB] = { [VM_MEM_SRC_ANONYMOUS_HUGETLB_1GB] = {
.name = "anonymous_hugetlb_1gb", .name = "anonymous_hugetlb_1gb",
.flag = MAP_HUGETLB | MAP_HUGE_1GB, .flag = anon_huge_flags | MAP_HUGE_1GB,
}, },
[VM_MEM_SRC_ANONYMOUS_HUGETLB_2GB] = { [VM_MEM_SRC_ANONYMOUS_HUGETLB_2GB] = {
.name = "anonymous_hugetlb_2gb", .name = "anonymous_hugetlb_2gb",
.flag = MAP_HUGETLB | MAP_HUGE_2GB, .flag = anon_huge_flags | MAP_HUGE_2GB,
}, },
[VM_MEM_SRC_ANONYMOUS_HUGETLB_16GB] = { [VM_MEM_SRC_ANONYMOUS_HUGETLB_16GB] = {
.name = "anonymous_hugetlb_16gb", .name = "anonymous_hugetlb_16gb",
.flag = MAP_HUGETLB | MAP_HUGE_16GB, .flag = anon_huge_flags | MAP_HUGE_16GB,
},
[VM_MEM_SRC_SHMEM] = {
.name = "shmem",
.flag = MAP_SHARED,
},
[VM_MEM_SRC_SHARED_HUGETLB] = {
.name = "shared_hugetlb",
/*
* No MAP_HUGETLB, we use MFD_HUGETLB instead. Since
* we're using "file backed" memory, we need to specify
* this when the FD is created, not when the area is
* mapped.
*/
.flag = MAP_SHARED,
}, },
}; };
_Static_assert(ARRAY_SIZE(aliases) == NUM_SRC_TYPES, _Static_assert(ARRAY_SIZE(aliases) == NUM_SRC_TYPES,
@ -250,10 +267,12 @@ size_t get_backing_src_pagesz(uint32_t i)
switch (i) { switch (i) {
case VM_MEM_SRC_ANONYMOUS: case VM_MEM_SRC_ANONYMOUS:
case VM_MEM_SRC_SHMEM:
return getpagesize(); return getpagesize();
case VM_MEM_SRC_ANONYMOUS_THP: case VM_MEM_SRC_ANONYMOUS_THP:
return get_trans_hugepagesz(); return get_trans_hugepagesz();
case VM_MEM_SRC_ANONYMOUS_HUGETLB: case VM_MEM_SRC_ANONYMOUS_HUGETLB:
case VM_MEM_SRC_SHARED_HUGETLB:
return get_def_hugetlb_pagesz(); return get_def_hugetlb_pagesz();
default: default:
return MAP_HUGE_PAGE_SIZE(flag); return MAP_HUGE_PAGE_SIZE(flag);

View file

@ -657,9 +657,7 @@ struct kvm_cpuid2 *kvm_get_supported_cpuid(void)
return cpuid; return cpuid;
cpuid = allocate_kvm_cpuid2(); cpuid = allocate_kvm_cpuid2();
kvm_fd = open(KVM_DEV_PATH, O_RDONLY); kvm_fd = open_kvm_dev_path_or_exit();
if (kvm_fd < 0)
exit(KSFT_SKIP);
ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid); ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid);
TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_CPUID failed %d %d\n", TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_CPUID failed %d %d\n",
@ -691,9 +689,7 @@ uint64_t kvm_get_feature_msr(uint64_t msr_index)
buffer.header.nmsrs = 1; buffer.header.nmsrs = 1;
buffer.entry.index = msr_index; buffer.entry.index = msr_index;
kvm_fd = open(KVM_DEV_PATH, O_RDONLY); kvm_fd = open_kvm_dev_path_or_exit();
if (kvm_fd < 0)
exit(KSFT_SKIP);
r = ioctl(kvm_fd, KVM_GET_MSRS, &buffer.header); r = ioctl(kvm_fd, KVM_GET_MSRS, &buffer.header);
TEST_ASSERT(r == 1, "KVM_GET_MSRS IOCTL failed,\n" TEST_ASSERT(r == 1, "KVM_GET_MSRS IOCTL failed,\n"
@ -986,9 +982,7 @@ struct kvm_msr_list *kvm_get_msr_index_list(void)
struct kvm_msr_list *list; struct kvm_msr_list *list;
int nmsrs, r, kvm_fd; int nmsrs, r, kvm_fd;
kvm_fd = open(KVM_DEV_PATH, O_RDONLY); kvm_fd = open_kvm_dev_path_or_exit();
if (kvm_fd < 0)
exit(KSFT_SKIP);
nmsrs = kvm_get_num_msrs_fd(kvm_fd); nmsrs = kvm_get_num_msrs_fd(kvm_fd);
list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0])); list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0]));
@ -1312,9 +1306,7 @@ struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void)
return cpuid; return cpuid;
cpuid = allocate_kvm_cpuid2(); cpuid = allocate_kvm_cpuid2();
kvm_fd = open(KVM_DEV_PATH, O_RDONLY); kvm_fd = open_kvm_dev_path_or_exit();
if (kvm_fd < 0)
exit(KSFT_SKIP);
ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_HV_CPUID, cpuid); ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_HV_CPUID failed %d %d\n", TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_HV_CPUID failed %d %d\n",

View file

@ -71,14 +71,22 @@ struct memslot_antagonist_args {
}; };
static void add_remove_memslot(struct kvm_vm *vm, useconds_t delay, static void add_remove_memslot(struct kvm_vm *vm, useconds_t delay,
uint64_t nr_modifications, uint64_t gpa) uint64_t nr_modifications)
{ {
const uint64_t pages = 1;
uint64_t gpa;
int i; int i;
/*
* Add the dummy memslot just below the perf_test_util memslot, which is
* at the top of the guest physical address space.
*/
gpa = guest_test_phys_mem - pages * vm_get_page_size(vm);
for (i = 0; i < nr_modifications; i++) { for (i = 0; i < nr_modifications; i++) {
usleep(delay); usleep(delay);
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, gpa, vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, gpa,
DUMMY_MEMSLOT_INDEX, 1, 0); DUMMY_MEMSLOT_INDEX, pages, 0);
vm_mem_region_delete(vm, DUMMY_MEMSLOT_INDEX); vm_mem_region_delete(vm, DUMMY_MEMSLOT_INDEX);
} }
@ -120,11 +128,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
pr_info("Started all vCPUs\n"); pr_info("Started all vCPUs\n");
add_remove_memslot(vm, p->memslot_modification_delay, add_remove_memslot(vm, p->memslot_modification_delay,
p->nr_memslot_modifications, p->nr_memslot_modifications);
guest_test_phys_mem +
(guest_percpu_mem_size * nr_vcpus) +
perf_test_args.host_page_size +
perf_test_args.guest_page_size);
run_vcpus = false; run_vcpus = false;

File diff suppressed because it is too large Load diff

View file

@ -19,7 +19,12 @@ struct {
u32 function; u32 function;
u32 index; u32 index;
} mangled_cpuids[] = { } mangled_cpuids[] = {
/*
* These entries depend on the vCPU's XCR0 register and IA32_XSS MSR,
* which are not controlled for by this test.
*/
{.function = 0xd, .index = 0}, {.function = 0xd, .index = 0},
{.function = 0xd, .index = 1},
}; };
static void test_guest_cpuids(struct kvm_cpuid2 *guest_cpuid) static void test_guest_cpuids(struct kvm_cpuid2 *guest_cpuid)

View file

@ -37,9 +37,7 @@ static void test_get_msr_index(void)
int old_res, res, kvm_fd, r; int old_res, res, kvm_fd, r;
struct kvm_msr_list *list; struct kvm_msr_list *list;
kvm_fd = open(KVM_DEV_PATH, O_RDONLY); kvm_fd = open_kvm_dev_path_or_exit();
if (kvm_fd < 0)
exit(KSFT_SKIP);
old_res = kvm_num_index_msrs(kvm_fd, 0); old_res = kvm_num_index_msrs(kvm_fd, 0);
TEST_ASSERT(old_res != 0, "Expecting nmsrs to be > 0"); TEST_ASSERT(old_res != 0, "Expecting nmsrs to be > 0");
@ -101,9 +99,7 @@ static void test_get_msr_feature(void)
int res, old_res, i, kvm_fd; int res, old_res, i, kvm_fd;
struct kvm_msr_list *feature_list; struct kvm_msr_list *feature_list;
kvm_fd = open(KVM_DEV_PATH, O_RDONLY); kvm_fd = open_kvm_dev_path_or_exit();
if (kvm_fd < 0)
exit(KSFT_SKIP);
old_res = kvm_num_feature_msrs(kvm_fd, 0); old_res = kvm_num_feature_msrs(kvm_fd, 0);
TEST_ASSERT(old_res != 0, "Expecting nmsrs to be > 0"); TEST_ASSERT(old_res != 0, "Expecting nmsrs to be > 0");

View file

@ -307,6 +307,7 @@ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
{ {
return kvm_make_all_cpus_request_except(kvm, req, NULL); return kvm_make_all_cpus_request_except(kvm, req, NULL);
} }
EXPORT_SYMBOL_GPL(kvm_make_all_cpus_request);
#ifndef CONFIG_HAVE_KVM_ARCH_TLB_FLUSH_ALL #ifndef CONFIG_HAVE_KVM_ARCH_TLB_FLUSH_ALL
void kvm_flush_remote_tlbs(struct kvm *kvm) void kvm_flush_remote_tlbs(struct kvm *kvm)
@ -2929,6 +2930,8 @@ static int kvm_vcpu_check_block(struct kvm_vcpu *vcpu)
goto out; goto out;
if (signal_pending(current)) if (signal_pending(current))
goto out; goto out;
if (kvm_check_request(KVM_REQ_UNBLOCK, vcpu))
goto out;
ret = 0; ret = 0;
out: out:
@ -2973,8 +2976,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
goto out; goto out;
} }
poll_end = cur = ktime_get(); poll_end = cur = ktime_get();
} while (single_task_running() && !need_resched() && } while (kvm_vcpu_can_poll(cur, stop));
ktime_before(cur, stop));
} }
prepare_to_rcuwait(&vcpu->wait); prepare_to_rcuwait(&vcpu->wait);

View file

@ -40,21 +40,17 @@ static int __connect(struct irq_bypass_producer *prod,
if (prod->add_consumer) if (prod->add_consumer)
ret = prod->add_consumer(prod, cons); ret = prod->add_consumer(prod, cons);
if (ret) if (!ret) {
goto err_add_consumer; ret = cons->add_producer(cons, prod);
if (ret && prod->del_consumer)
ret = cons->add_producer(cons, prod); prod->del_consumer(prod, cons);
if (ret) }
goto err_add_producer;
if (cons->start) if (cons->start)
cons->start(cons); cons->start(cons);
if (prod->start) if (prod->start)
prod->start(prod); prod->start(prod);
err_add_producer:
if (prod->del_consumer)
prod->del_consumer(prod, cons);
err_add_consumer:
return ret; return ret;
} }