From 579d7ebe90a332cc5b6c02db9250fd0816a64f63 Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Thu, 3 Nov 2022 15:05:06 +0000 Subject: [PATCH 1/3] KVM: arm64: Fix kvm init failure when mode!=vhe and VA_BITS=52. For nvhe and protected modes, the hyp stage 1 page-tables were previously configured to have the same number of VA bits as the kernel's idmap. However, for kernel configs with VA_BITS=52 and where the kernel is loaded in physical memory below 48 bits, the idmap VA bits is actually smaller than the kernel's normal stage 1 VA bits. This can lead to kernel addresses that can't be mapped into the hypervisor, leading to kvm initialization failure during boot: kvm [1]: IPA Size Limit: 48 bits kvm [1]: Cannot map world-switch code kvm [1]: error initializing Hyp mode: -34 Fix this by ensuring that the hyp stage 1 VA size is the maximum of what's used for the idmap and the regular kernel stage 1. At the same time, refactor the code so that the hyp VA bits is only calculated in one place. Prior to 7ba8f2b2d652, the idmap was always 52 bits for a 52 VA bits kernel and therefore the hyp stage1 was also always 52 bits. Fixes: 7ba8f2b2d652 ("arm64: mm: use a 48-bit ID map when possible on 52-bit VA builds") Signed-off-by: Ryan Roberts [maz: commit message fixes] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221103150507.32948-2-ryan.roberts@arm.com --- arch/arm64/kvm/arm.c | 20 +++----------------- arch/arm64/kvm/mmu.c | 28 +++++++++++++++++++++++++++- 2 files changed, 30 insertions(+), 18 deletions(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 94d33e296e10..803055da3ee3 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1518,7 +1518,7 @@ static int kvm_init_vector_slots(void) return 0; } -static void cpu_prepare_hyp_mode(int cpu) +static void cpu_prepare_hyp_mode(int cpu, u32 hyp_va_bits) { struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu); unsigned long tcr; @@ -1534,23 +1534,9 @@ static void cpu_prepare_hyp_mode(int cpu) params->mair_el2 = read_sysreg(mair_el1); - /* - * The ID map may be configured to use an extended virtual address - * range. This is only the case if system RAM is out of range for the - * currently configured page size and VA_BITS, in which case we will - * also need the extended virtual range for the HYP ID map, or we won't - * be able to enable the EL2 MMU. - * - * However, at EL2, there is only one TTBR register, and we can't switch - * between translation tables *and* update TCR_EL2.T0SZ at the same - * time. Bottom line: we need to use the extended range with *both* our - * translation tables. - * - * So use the same T0SZ value we use for the ID map. - */ tcr = (read_sysreg(tcr_el1) & TCR_EL2_MASK) | TCR_EL2_RES1; tcr &= ~TCR_T0SZ_MASK; - tcr |= (idmap_t0sz & GENMASK(TCR_TxSZ_WIDTH - 1, 0)) << TCR_T0SZ_OFFSET; + tcr |= TCR_T0SZ(hyp_va_bits); params->tcr_el2 = tcr; params->pgd_pa = kvm_mmu_get_httbr(); @@ -2054,7 +2040,7 @@ static int init_hyp_mode(void) } /* Prepare the CPU initialization parameters */ - cpu_prepare_hyp_mode(cpu); + cpu_prepare_hyp_mode(cpu, hyp_va_bits); } if (is_protected_kvm_enabled()) { diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 60ee3d9f01f8..4efb983cff43 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1618,6 +1618,8 @@ static struct kvm_pgtable_mm_ops kvm_hyp_mm_ops = { int kvm_mmu_init(u32 *hyp_va_bits) { int err; + u32 idmap_bits; + u32 kernel_bits; hyp_idmap_start = __pa_symbol(__hyp_idmap_text_start); hyp_idmap_start = ALIGN_DOWN(hyp_idmap_start, PAGE_SIZE); @@ -1631,7 +1633,31 @@ int kvm_mmu_init(u32 *hyp_va_bits) */ BUG_ON((hyp_idmap_start ^ (hyp_idmap_end - 1)) & PAGE_MASK); - *hyp_va_bits = 64 - ((idmap_t0sz & TCR_T0SZ_MASK) >> TCR_T0SZ_OFFSET); + /* + * The ID map may be configured to use an extended virtual address + * range. This is only the case if system RAM is out of range for the + * currently configured page size and VA_BITS_MIN, in which case we will + * also need the extended virtual range for the HYP ID map, or we won't + * be able to enable the EL2 MMU. + * + * However, in some cases the ID map may be configured for fewer than + * the number of VA bits used by the regular kernel stage 1. This + * happens when VA_BITS=52 and the kernel image is placed in PA space + * below 48 bits. + * + * At EL2, there is only one TTBR register, and we can't switch between + * translation tables *and* update TCR_EL2.T0SZ at the same time. Bottom + * line: we need to use the extended range with *both* our translation + * tables. + * + * So use the maximum of the idmap VA bits and the regular kernel stage + * 1 VA bits to assure that the hypervisor can both ID map its code page + * and map any kernel memory. + */ + idmap_bits = 64 - ((idmap_t0sz & TCR_T0SZ_MASK) >> TCR_T0SZ_OFFSET); + kernel_bits = vabits_actual; + *hyp_va_bits = max(idmap_bits, kernel_bits); + kvm_debug("Using %u-bit virtual addresses at EL2\n", *hyp_va_bits); kvm_debug("IDMAP page: %lx\n", hyp_idmap_start); kvm_debug("HYP VA range: %lx:%lx\n", From a0d37784bfd7f699986ba3a64cfeb68a03cb7fd0 Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Thu, 3 Nov 2022 15:05:07 +0000 Subject: [PATCH 2/3] KVM: arm64: Fix PAR_TO_HPFAR() to work independently of PA_BITS. Kernel configs with PAGE_SIZE=64KB and PA_BITS=48 still advertise 52 bit IPA space on HW that implements LPA. This is by design (admitedly this is a very unlikely configuration in the real world). However on such a config, attempting to create a vm with the guest kernel placed above 48 bits in IPA space results in misbehaviour due to the hypervisor incorrectly interpretting a faulting IPA. Fix up PAR_TO_HPFAR() to always take 52 bits out of the PAR rather than masking to CONFIG_ARM64_PA_BITS. If the system has a smaller implemented PARange this should be safe because the bits are res0. A more robust approach would be to discover the IPA size in use by the page-table and mask based on that, to avoid relying on res0 reading back as zero. But this information is difficult to access safely from the code's location, so take the easy way out. Fixes: bc1d7de8c550 ("kvm: arm64: Add 52bit support for PAR to HPFAR conversoin") Signed-off-by: Ryan Roberts [maz: commit message fixes] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221103150507.32948-3-ryan.roberts@arm.com --- arch/arm64/include/asm/kvm_arm.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 8aa8492dafc0..a82f2493a72b 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -340,9 +340,13 @@ * We have * PAR [PA_Shift - 1 : 12] = PA [PA_Shift - 1 : 12] * HPFAR [PA_Shift - 9 : 4] = FIPA [PA_Shift - 1 : 12] + * + * Always assume 52 bit PA since at this point, we don't know how many PA bits + * the page table has been set up for. This should be safe since unused address + * bits in PAR are res0. */ #define PAR_TO_HPFAR(par) \ - (((par) & GENMASK_ULL(PHYS_MASK_SHIFT - 1, 12)) >> 8) + (((par) & GENMASK_ULL(52 - 1, 12)) >> 8) #define ECN(x) { ESR_ELx_EC_##x, #x } From 219072c09abde0f1d0a6ce091be375e8eb7d08f0 Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Mon, 5 Dec 2022 11:40:31 +0000 Subject: [PATCH 3/3] KVM: arm64: Fix benign bug with incorrect use of VA_BITS get_user_mapping_size() uses kvm's pgtable library to walk a user space page table created by the kernel, and in doing so, passes metadata that the library needs, including ia_bits, which defines the size of the input address. For the case where the kernel is compiled for 52 VA bits but runs on HW that does not support LVA, it will fall back to 48 VA bits at runtime. Therefore we must use vabits_actual rather than VA_BITS to get the true address size. This is benign in the current code base because the pgtable library only uses it for error checking. Fixes: 6011cf68c885 ("KVM: arm64: Walk userspace page tables to compute the THP mapping size") Signed-off-by: Ryan Roberts Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221205114031.3972780-1-ryan.roberts@arm.com --- arch/arm64/kvm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 4efb983cff43..1ef0704420d9 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -641,7 +641,7 @@ static int get_user_mapping_size(struct kvm *kvm, u64 addr) { struct kvm_pgtable pgt = { .pgd = (kvm_pte_t *)kvm->mm->pgd, - .ia_bits = VA_BITS, + .ia_bits = vabits_actual, .start_level = (KVM_PGTABLE_MAX_LEVELS - CONFIG_PGTABLE_LEVELS), .mm_ops = &kvm_user_mm_ops,