mirror of
https://github.com/Fishwaldo/Star64_linux.git
synced 2025-07-01 19:41:19 +00:00
x86/xen/time: setup vcpu 0 time info page
In order to support pvclock vdso on xen we need to setup the time info page for vcpu 0 and register the page with Xen using the VCPUOP_register_vcpu_time_memory_area hypercall. This hypercall will also forcefully update the pvti which will set some of the necessary flags for vdso. Afterwards we check if it supports the PVCLOCK_TSC_STABLE_BIT flag which is mandatory for having vdso/vsyscall support. And if so, it will set the cpu 0 pvti that will be later on used when mapping the vdso image. The xen headers are also updated to include the new hypercall for registering the secondary vcpu_time_info struct. Signed-off-by: Joao Martins <joao.m.martins@oracle.com> Reviewed-by: Juergen Gross <jgross@suse.com> Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
This commit is contained in:
parent
b888808093
commit
2229f70b5b
4 changed files with 137 additions and 1 deletions
|
@ -16,6 +16,8 @@
|
||||||
|
|
||||||
void xen_arch_pre_suspend(void)
|
void xen_arch_pre_suspend(void)
|
||||||
{
|
{
|
||||||
|
xen_save_time_memory_area();
|
||||||
|
|
||||||
if (xen_pv_domain())
|
if (xen_pv_domain())
|
||||||
xen_pv_pre_suspend();
|
xen_pv_pre_suspend();
|
||||||
}
|
}
|
||||||
|
@ -26,6 +28,8 @@ void xen_arch_post_suspend(int cancelled)
|
||||||
xen_pv_post_suspend(cancelled);
|
xen_pv_post_suspend(cancelled);
|
||||||
else
|
else
|
||||||
xen_hvm_post_suspend(cancelled);
|
xen_hvm_post_suspend(cancelled);
|
||||||
|
|
||||||
|
xen_restore_time_memory_area();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void xen_vcpu_notify_restore(void *data)
|
static void xen_vcpu_notify_restore(void *data)
|
||||||
|
|
|
@ -370,6 +370,92 @@ static const struct pv_time_ops xen_time_ops __initconst = {
|
||||||
.steal_clock = xen_steal_clock,
|
.steal_clock = xen_steal_clock,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static struct pvclock_vsyscall_time_info *xen_clock __read_mostly;
|
||||||
|
|
||||||
|
void xen_save_time_memory_area(void)
|
||||||
|
{
|
||||||
|
struct vcpu_register_time_memory_area t;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
if (!xen_clock)
|
||||||
|
return;
|
||||||
|
|
||||||
|
t.addr.v = NULL;
|
||||||
|
|
||||||
|
ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area, 0, &t);
|
||||||
|
if (ret != 0)
|
||||||
|
pr_notice("Cannot save secondary vcpu_time_info (err %d)",
|
||||||
|
ret);
|
||||||
|
else
|
||||||
|
clear_page(xen_clock);
|
||||||
|
}
|
||||||
|
|
||||||
|
void xen_restore_time_memory_area(void)
|
||||||
|
{
|
||||||
|
struct vcpu_register_time_memory_area t;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
if (!xen_clock)
|
||||||
|
return;
|
||||||
|
|
||||||
|
t.addr.v = &xen_clock->pvti;
|
||||||
|
|
||||||
|
ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area, 0, &t);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We don't disable VCLOCK_PVCLOCK entirely if it fails to register the
|
||||||
|
* secondary time info with Xen or if we migrated to a host without the
|
||||||
|
* necessary flags. On both of these cases what happens is either
|
||||||
|
* process seeing a zeroed out pvti or seeing no PVCLOCK_TSC_STABLE_BIT
|
||||||
|
* bit set. Userspace checks the latter and if 0, it discards the data
|
||||||
|
* in pvti and fallbacks to a system call for a reliable timestamp.
|
||||||
|
*/
|
||||||
|
if (ret != 0)
|
||||||
|
pr_notice("Cannot restore secondary vcpu_time_info (err %d)",
|
||||||
|
ret);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void xen_setup_vsyscall_time_info(void)
|
||||||
|
{
|
||||||
|
struct vcpu_register_time_memory_area t;
|
||||||
|
struct pvclock_vsyscall_time_info *ti;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
ti = (struct pvclock_vsyscall_time_info *)get_zeroed_page(GFP_KERNEL);
|
||||||
|
if (!ti)
|
||||||
|
return;
|
||||||
|
|
||||||
|
t.addr.v = &ti->pvti;
|
||||||
|
|
||||||
|
ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area, 0, &t);
|
||||||
|
if (ret) {
|
||||||
|
pr_notice("xen: VCLOCK_PVCLOCK not supported (err %d)\n", ret);
|
||||||
|
free_page((unsigned long)ti);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If primary time info had this bit set, secondary should too since
|
||||||
|
* it's the same data on both just different memory regions. But we
|
||||||
|
* still check it in case hypervisor is buggy.
|
||||||
|
*/
|
||||||
|
if (!(ti->pvti.flags & PVCLOCK_TSC_STABLE_BIT)) {
|
||||||
|
t.addr.v = NULL;
|
||||||
|
ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area,
|
||||||
|
0, &t);
|
||||||
|
if (!ret)
|
||||||
|
free_page((unsigned long)ti);
|
||||||
|
|
||||||
|
pr_notice("xen: VCLOCK_PVCLOCK not supported (tsc unstable)\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
xen_clock = ti;
|
||||||
|
pvclock_set_pvti_cpu0_va(xen_clock);
|
||||||
|
|
||||||
|
xen_clocksource.archdata.vclock_mode = VCLOCK_PVCLOCK;
|
||||||
|
}
|
||||||
|
|
||||||
static void __init xen_time_init(void)
|
static void __init xen_time_init(void)
|
||||||
{
|
{
|
||||||
struct pvclock_vcpu_time_info *pvti;
|
struct pvclock_vcpu_time_info *pvti;
|
||||||
|
@ -401,8 +487,10 @@ static void __init xen_time_init(void)
|
||||||
* bit is supported hence speeding up Xen clocksource.
|
* bit is supported hence speeding up Xen clocksource.
|
||||||
*/
|
*/
|
||||||
pvti = &__this_cpu_read(xen_vcpu)->time;
|
pvti = &__this_cpu_read(xen_vcpu)->time;
|
||||||
if (pvti->flags & PVCLOCK_TSC_STABLE_BIT)
|
if (pvti->flags & PVCLOCK_TSC_STABLE_BIT) {
|
||||||
pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT);
|
pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT);
|
||||||
|
xen_setup_vsyscall_time_info();
|
||||||
|
}
|
||||||
|
|
||||||
xen_setup_runstate_info(cpu);
|
xen_setup_runstate_info(cpu);
|
||||||
xen_setup_timer(cpu);
|
xen_setup_timer(cpu);
|
||||||
|
|
|
@ -69,6 +69,8 @@ void xen_setup_runstate_info(int cpu);
|
||||||
void xen_teardown_timer(int cpu);
|
void xen_teardown_timer(int cpu);
|
||||||
u64 xen_clocksource_read(void);
|
u64 xen_clocksource_read(void);
|
||||||
void xen_setup_cpu_clockevents(void);
|
void xen_setup_cpu_clockevents(void);
|
||||||
|
void xen_save_time_memory_area(void);
|
||||||
|
void xen_restore_time_memory_area(void);
|
||||||
void __init xen_init_time_ops(void);
|
void __init xen_init_time_ops(void);
|
||||||
void __init xen_hvm_init_time_ops(void);
|
void __init xen_hvm_init_time_ops(void);
|
||||||
|
|
||||||
|
|
|
@ -178,4 +178,46 @@ DEFINE_GUEST_HANDLE_STRUCT(vcpu_register_vcpu_info);
|
||||||
|
|
||||||
/* Send an NMI to the specified VCPU. @extra_arg == NULL. */
|
/* Send an NMI to the specified VCPU. @extra_arg == NULL. */
|
||||||
#define VCPUOP_send_nmi 11
|
#define VCPUOP_send_nmi 11
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Get the physical ID information for a pinned vcpu's underlying physical
|
||||||
|
* processor. The physical ID informmation is architecture-specific.
|
||||||
|
* On x86: id[31:0]=apic_id, id[63:32]=acpi_id.
|
||||||
|
* This command returns -EINVAL if it is not a valid operation for this VCPU.
|
||||||
|
*/
|
||||||
|
#define VCPUOP_get_physid 12 /* arg == vcpu_get_physid_t */
|
||||||
|
struct vcpu_get_physid {
|
||||||
|
uint64_t phys_id;
|
||||||
|
};
|
||||||
|
DEFINE_GUEST_HANDLE_STRUCT(vcpu_get_physid);
|
||||||
|
#define xen_vcpu_physid_to_x86_apicid(physid) ((uint32_t)(physid))
|
||||||
|
#define xen_vcpu_physid_to_x86_acpiid(physid) ((uint32_t)((physid) >> 32))
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Register a memory location to get a secondary copy of the vcpu time
|
||||||
|
* parameters. The master copy still exists as part of the vcpu shared
|
||||||
|
* memory area, and this secondary copy is updated whenever the master copy
|
||||||
|
* is updated (and using the same versioning scheme for synchronisation).
|
||||||
|
*
|
||||||
|
* The intent is that this copy may be mapped (RO) into userspace so
|
||||||
|
* that usermode can compute system time using the time info and the
|
||||||
|
* tsc. Usermode will see an array of vcpu_time_info structures, one
|
||||||
|
* for each vcpu, and choose the right one by an existing mechanism
|
||||||
|
* which allows it to get the current vcpu number (such as via a
|
||||||
|
* segment limit). It can then apply the normal algorithm to compute
|
||||||
|
* system time from the tsc.
|
||||||
|
*
|
||||||
|
* @extra_arg == pointer to vcpu_register_time_info_memory_area structure.
|
||||||
|
*/
|
||||||
|
#define VCPUOP_register_vcpu_time_memory_area 13
|
||||||
|
DEFINE_GUEST_HANDLE_STRUCT(vcpu_time_info);
|
||||||
|
struct vcpu_register_time_memory_area {
|
||||||
|
union {
|
||||||
|
GUEST_HANDLE(vcpu_time_info) h;
|
||||||
|
struct pvclock_vcpu_time_info *v;
|
||||||
|
uint64_t p;
|
||||||
|
} addr;
|
||||||
|
};
|
||||||
|
DEFINE_GUEST_HANDLE_STRUCT(vcpu_register_time_memory_area);
|
||||||
|
|
||||||
#endif /* __XEN_PUBLIC_VCPU_H__ */
|
#endif /* __XEN_PUBLIC_VCPU_H__ */
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue