diff --git a/include/sbi/sbi_fifo.h b/include/sbi/sbi_fifo.h index 3f36c59..bc8f8f6 100644 --- a/include/sbi/sbi_fifo.h +++ b/include/sbi/sbi_fifo.h @@ -26,7 +26,6 @@ struct sbi_fifo { enum sbi_fifo_inplace_update_types { SBI_FIFO_SKIP, SBI_FIFO_UPDATED, - SBI_FIFO_RESET, SBI_FIFO_UNCHANGED, }; diff --git a/include/sbi/sbi_tlb.h b/include/sbi/sbi_tlb.h index f3d93d4..af77f89 100644 --- a/include/sbi/sbi_tlb.h +++ b/include/sbi/sbi_tlb.h @@ -20,12 +20,13 @@ /* clang-format on */ -#define SBI_TLB_FIFO_NUM_ENTRIES 4 +#define SBI_TLB_FIFO_NUM_ENTRIES 8 enum sbi_tlb_info_types { SBI_TLB_FLUSH_VMA, SBI_TLB_FLUSH_VMA_ASID, - SBI_TLB_FLUSH_VMA_VMID + SBI_TLB_FLUSH_VMA_VMID, + SBI_ITLB_FLUSH }; struct sbi_scratch; @@ -35,14 +36,17 @@ struct sbi_tlb_info { unsigned long size; unsigned long asid; unsigned long type; + unsigned long shart_mask; }; #define SBI_TLB_INFO_SIZE sizeof(struct sbi_tlb_info) -int sbi_tlb_fifo_update(struct sbi_scratch *scratch, u32 event, void *data); +int sbi_tlb_fifo_update(struct sbi_scratch *scratch, u32 hartid, void *data); -void sbi_tlb_fifo_process(struct sbi_scratch *scratch, u32 event); +void sbi_tlb_fifo_process(struct sbi_scratch *scratch); int sbi_tlb_fifo_init(struct sbi_scratch *scratch, bool cold_boot); +void sbi_tlb_fifo_sync(struct sbi_scratch *scratch); + #endif diff --git a/lib/sbi/sbi_ecall.c b/lib/sbi/sbi_ecall.c index 50c05d6..c6ec76e 100644 --- a/lib/sbi/sbi_ecall.c +++ b/lib/sbi/sbi_ecall.c @@ -16,6 +16,7 @@ #include #include #include +#include #define SBI_ECALL_VERSION_MAJOR 0 #define SBI_ECALL_VERSION_MINOR 1 @@ -36,6 +37,7 @@ int sbi_ecall_handler(u32 hartid, ulong mcause, struct sbi_trap_regs *regs, int ret = SBI_ENOTSUPP; struct unpriv_trap uptrap; struct sbi_tlb_info tlb_info; + u32 source_hart = sbi_current_hartid(); switch (regs->a7) { case SBI_ECALL_SET_TIMER: @@ -64,13 +66,18 @@ int sbi_ecall_handler(u32 hartid, ulong mcause, struct sbi_trap_regs *regs, SBI_IPI_EVENT_SOFT, NULL); break; case SBI_ECALL_REMOTE_FENCE_I: + tlb_info.start = 0; + tlb_info.size = 0; + tlb_info.type = SBI_ITLB_FLUSH; + tlb_info.shart_mask = 1UL << source_hart; ret = sbi_ipi_send_many(scratch, &uptrap, (ulong *)regs->a0, - SBI_IPI_EVENT_FENCE_I, NULL); + SBI_IPI_EVENT_FENCE_I, &tlb_info); break; case SBI_ECALL_REMOTE_SFENCE_VMA: tlb_info.start = (unsigned long)regs->a1; tlb_info.size = (unsigned long)regs->a2; tlb_info.type = SBI_TLB_FLUSH_VMA; + tlb_info.shart_mask = 1UL << source_hart; ret = sbi_ipi_send_many(scratch, &uptrap, (ulong *)regs->a0, SBI_IPI_EVENT_SFENCE_VMA, &tlb_info); @@ -80,6 +87,7 @@ int sbi_ecall_handler(u32 hartid, ulong mcause, struct sbi_trap_regs *regs, tlb_info.size = (unsigned long)regs->a2; tlb_info.asid = (unsigned long)regs->a3; tlb_info.type = SBI_TLB_FLUSH_VMA_ASID; + tlb_info.shart_mask = 1UL << source_hart; ret = sbi_ipi_send_many(scratch, &uptrap, (ulong *)regs->a0, SBI_IPI_EVENT_SFENCE_VMA_ASID, diff --git a/lib/sbi/sbi_fifo.c b/lib/sbi/sbi_fifo.c index 18ff0d6..8d1dbf0 100644 --- a/lib/sbi/sbi_fifo.c +++ b/lib/sbi/sbi_fifo.c @@ -54,6 +54,21 @@ bool sbi_fifo_is_full(struct sbi_fifo *fifo) return ret; } +/* Note: must be called with fifo->qlock held */ +static inline void __sbi_fifo_enqueue(struct sbi_fifo *fifo, void *data) +{ + u32 head; + + head = (u32)fifo->tail + fifo->avail; + if (head >= fifo->num_entries) + head = head - fifo->num_entries; + + sbi_memcpy(fifo->queue + head * fifo->entry_size, data, fifo->entry_size); + + fifo->avail++; +} + + /* Note: must be called with fifo->qlock held */ static inline bool __sbi_fifo_is_empty(struct sbi_fifo *fifo) { @@ -109,7 +124,9 @@ int sbi_fifo_inplace_update(struct sbi_fifo *fifo, void *in, if (!fifo || !in) return ret; + spin_lock(&fifo->qlock); + if (__sbi_fifo_is_empty(fifo)) { spin_unlock(&fifo->qlock); return ret; @@ -120,12 +137,10 @@ int sbi_fifo_inplace_update(struct sbi_fifo *fifo, void *in, if (index >= fifo->num_entries) index = index - fifo->num_entries; entry = (void *)fifo->queue + (u32)index * fifo->entry_size; - ret = fptr(in, entry); + ret = fptr(in, entry); + if (ret == SBI_FIFO_SKIP || ret == SBI_FIFO_UPDATED) { break; - } else if (ret == SBI_FIFO_RESET) { - __sbi_fifo_reset(fifo); - break; } } spin_unlock(&fifo->qlock); @@ -135,8 +150,6 @@ int sbi_fifo_inplace_update(struct sbi_fifo *fifo, void *in, int sbi_fifo_enqueue(struct sbi_fifo *fifo, void *data) { - u32 head; - if (!fifo || !data) return SBI_EINVAL; @@ -146,14 +159,7 @@ int sbi_fifo_enqueue(struct sbi_fifo *fifo, void *data) spin_unlock(&fifo->qlock); return SBI_ENOSPC; } - - head = (u32)fifo->tail + fifo->avail; - if (head >= fifo->num_entries) - head = head - fifo->num_entries; - - sbi_memcpy(fifo->queue + head * fifo->entry_size, data, fifo->entry_size); - - fifo->avail++; + __sbi_fifo_enqueue(fifo, data); spin_unlock(&fifo->qlock); diff --git a/lib/sbi/sbi_ipi.c b/lib/sbi/sbi_ipi.c index b1077b5..1c84f9e 100644 --- a/lib/sbi/sbi_ipi.c +++ b/lib/sbi/sbi_ipi.c @@ -9,15 +9,13 @@ */ #include -#include #include +#include #include #include -#include #include #include #include -#include #include static unsigned long ipi_data_off; @@ -40,20 +38,22 @@ static int sbi_ipi_send(struct sbi_scratch *scratch, u32 hartid, u32 event, remote_scratch = sbi_hart_id_to_scratch(scratch, hartid); ipi_data = sbi_scratch_offset_ptr(remote_scratch, ipi_data_off); if (event == SBI_IPI_EVENT_SFENCE_VMA || - event == SBI_IPI_EVENT_SFENCE_VMA_ASID) { - ret = sbi_tlb_fifo_update(remote_scratch, event, data); - if (ret > 0) - goto done; - else if (ret < 0) + event == SBI_IPI_EVENT_SFENCE_VMA_ASID || + event == SBI_IPI_EVENT_FENCE_I ) { + ret = sbi_tlb_fifo_update(remote_scratch, hartid, data); + if (ret < 0) return ret; } atomic_raw_set_bit(event, &ipi_data->ipi_type); - mb(); + smp_wmb(); sbi_platform_ipi_send(plat, hartid); - if (event != SBI_IPI_EVENT_SOFT) - sbi_platform_ipi_sync(plat, hartid); -done: + if (event == SBI_IPI_EVENT_SFENCE_VMA || + event == SBI_IPI_EVENT_SFENCE_VMA_ASID || + event == SBI_IPI_EVENT_FENCE_I ) { + sbi_tlb_fifo_sync(scratch); + } + return 0; } @@ -70,12 +70,13 @@ int sbi_ipi_send_many(struct sbi_scratch *scratch, struct unpriv_trap *uptrap, return SBI_ETRAP; } - /* send IPIs to every other hart on the set */ + /* Send IPIs to every other hart on the set */ for (i = 0, m = mask; m; i++, m >>= 1) if ((m & 1UL) && (i != hartid)) sbi_ipi_send(scratch, i, event, data); - /* If the current hart is on the set, send an IPI + /* + * If the current hart is on the set, send an IPI * to it as well */ if (mask & (1UL << hartid)) @@ -91,7 +92,7 @@ void sbi_ipi_clear_smode(struct sbi_scratch *scratch) void sbi_ipi_process(struct sbi_scratch *scratch) { - volatile unsigned long ipi_type; + unsigned long ipi_type; unsigned int ipi_event; const struct sbi_platform *plat = sbi_platform_ptr(scratch); struct sbi_ipi_data *ipi_data = @@ -100,27 +101,32 @@ void sbi_ipi_process(struct sbi_scratch *scratch) u32 hartid = sbi_current_hartid(); sbi_platform_ipi_clear(plat, hartid); - do { - ipi_type = ipi_data->ipi_type; - rmb(); - ipi_event = __ffs(ipi_type); + ipi_type = atomic_raw_xchg_ulong(&ipi_data->ipi_type, 0); + ipi_event = 0; + while (ipi_type) { + if (!(ipi_type & 1UL)) + goto skip; + switch (ipi_event) { case SBI_IPI_EVENT_SOFT: csr_set(CSR_MIP, MIP_SSIP); break; case SBI_IPI_EVENT_FENCE_I: - __asm__ __volatile("fence.i"); - break; case SBI_IPI_EVENT_SFENCE_VMA: case SBI_IPI_EVENT_SFENCE_VMA_ASID: - sbi_tlb_fifo_process(scratch, ipi_event); + sbi_tlb_fifo_process(scratch); break; case SBI_IPI_EVENT_HALT: sbi_hart_hang(); break; + default: + break; }; - ipi_type = atomic_raw_clear_bit(ipi_event, &ipi_data->ipi_type); - } while (ipi_type > 0); + +skip: + ipi_type = ipi_type >> 1; + ipi_event++; + }; } int sbi_ipi_init(struct sbi_scratch *scratch, bool cold_boot) diff --git a/lib/sbi/sbi_tlb.c b/lib/sbi/sbi_tlb.c index 104c066..e4f6fe2 100644 --- a/lib/sbi/sbi_tlb.c +++ b/lib/sbi/sbi_tlb.c @@ -9,122 +9,20 @@ */ #include +#include #include #include #include #include -#include #include #include #include +#include +#include -static unsigned long ipi_tlb_fifo_off; -static unsigned long ipi_tlb_fifo_mem_off; - -static inline int __sbi_tlb_fifo_range_check(struct sbi_tlb_info *curr, - struct sbi_tlb_info *next) -{ - unsigned long curr_end; - unsigned long next_end; - int ret = SBI_FIFO_UNCHANGED; - - if (!curr || !next) - return ret; - - next_end = next->start + next->size; - curr_end = curr->start + curr->size; - if (next->start <= curr->start && next_end > curr_end) { - curr->start = next->start; - curr->size = next->size; - ret = SBI_FIFO_UPDATED; - } else if (next->start >= curr->start && next_end <= curr_end) { - ret = SBI_FIFO_SKIP; - } - - return ret; -} - -/** - * Call back to decide if an inplace fifo update is required or next entry can - * can be skipped. Here are the different cases that are being handled. - * - * Case1: - * if next flush request range lies within one of the existing entry, skip - * the next entry. - * Case2: - * if flush request range in current fifo entry lies within next flush - * request, update the current entry. - * Case3: - if a complete vma flush is requested, then all entries can be deleted - and new request can be enqueued. This will not be done for ASID case - as that means we have to iterate again in the fifo to figure out which - entries belong to that ASID. - */ -static int sbi_tlb_fifo_update_cb(void *in, void *data) -{ - struct sbi_tlb_info *curr; - struct sbi_tlb_info *next; - int ret = SBI_FIFO_UNCHANGED; - - if (!in || !data) - return ret; - - curr = (struct sbi_tlb_info *)data; - next = (struct sbi_tlb_info *)in; - if (next->type == SBI_TLB_FLUSH_VMA_ASID && - curr->type == SBI_TLB_FLUSH_VMA_ASID) { - if (next->asid == curr->asid) - ret = __sbi_tlb_fifo_range_check(curr, next); - } else if (next->type == SBI_TLB_FLUSH_VMA && - curr->type == SBI_TLB_FLUSH_VMA) { - if (next->size == SBI_TLB_FLUSH_ALL) - ret = SBI_FIFO_RESET; - else - ret = __sbi_tlb_fifo_range_check(curr, next); - } - - return ret; -} - -int sbi_tlb_fifo_update(struct sbi_scratch *scratch, u32 event, void *data) -{ - int ret; - struct sbi_fifo *ipi_tlb_fifo; - struct sbi_tlb_info *tinfo = data; - - ipi_tlb_fifo = sbi_scratch_offset_ptr(scratch, - ipi_tlb_fifo_off); - /* - * If address range to flush is too big then simply - * upgrade it to flush all because we can only flush - * 4KB at a time. - */ - if (tinfo->size >= SBI_TLB_FLUSH_MAX_SIZE) { - tinfo->start = 0; - tinfo->size = SBI_TLB_FLUSH_ALL; - } - - ret = sbi_fifo_inplace_update(ipi_tlb_fifo, data, - sbi_tlb_fifo_update_cb); - if (ret == SBI_FIFO_SKIP || ret == SBI_FIFO_UPDATED) { - return 1; - } - - while (sbi_fifo_enqueue(ipi_tlb_fifo, data) < 0) { - /** - * For now, Busy loop until there is space in the fifo. - * There may be case where target hart is also - * enqueue in source hart's fifo. Both hart may busy - * loop leading to a deadlock. - * TODO: Introduce a wait/wakeup event mechansim to handle - * this properly. - */ - __asm__ __volatile("nop"); - __asm__ __volatile("nop"); - } - - return 0; -} +static unsigned long tlb_sync_off; +static unsigned long tlb_fifo_off; +static unsigned long tlb_fifo_mem_off; static void sbi_tlb_flush_all(void) { @@ -179,48 +77,239 @@ static void sbi_tlb_fifo_sfence_vma_asid(struct sbi_tlb_info *tinfo) } } -void sbi_tlb_fifo_process(struct sbi_scratch *scratch, u32 event) +static void sbi_tlb_local_flush(struct sbi_tlb_info *tinfo) +{ + if (tinfo->type == SBI_TLB_FLUSH_VMA) { + sbi_tlb_fifo_sfence_vma(tinfo); + } else if (tinfo->type == SBI_TLB_FLUSH_VMA_ASID) { + sbi_tlb_fifo_sfence_vma_asid(tinfo); + } else if (tinfo->type == SBI_ITLB_FLUSH) + __asm__ __volatile("fence.i"); + else + sbi_printf("Invalid tlb flush request type [%lu]\n", + tinfo->type); + return; +} + +static void sbi_tlb_entry_process(struct sbi_scratch *scratch, + struct sbi_tlb_info *tinfo) +{ + u32 i; + u64 m; + struct sbi_scratch *rscratch = NULL; + unsigned long *rtlb_sync = NULL; + + sbi_tlb_local_flush(tinfo); + for (i = 0, m = tinfo->shart_mask; m; i++, m >>= 1) { + if (!(m & 1UL)) + continue; + + rscratch = sbi_hart_id_to_scratch(scratch, i); + rtlb_sync = sbi_scratch_offset_ptr(rscratch, tlb_sync_off); + while (atomic_raw_xchg_ulong(rtlb_sync, 1)) ; + } +} + +static void sbi_tlb_fifo_process_count(struct sbi_scratch *scratch, int count) { struct sbi_tlb_info tinfo; - struct sbi_fifo *ipi_tlb_fifo = - sbi_scratch_offset_ptr(scratch, ipi_tlb_fifo_off); + u32 deq_count = 0; + struct sbi_fifo *tlb_fifo = + sbi_scratch_offset_ptr(scratch, tlb_fifo_off); + + while (!sbi_fifo_dequeue(tlb_fifo, &tinfo)) { + sbi_tlb_entry_process(scratch, &tinfo); + deq_count++; + if (deq_count > count) + break; - while (!sbi_fifo_dequeue(ipi_tlb_fifo, &tinfo)) { - if (tinfo.type == SBI_TLB_FLUSH_VMA) - sbi_tlb_fifo_sfence_vma(&tinfo); - else if (tinfo.type == SBI_TLB_FLUSH_VMA_ASID) - sbi_tlb_fifo_sfence_vma_asid(&tinfo); - sbi_memset(&tinfo, 0, SBI_TLB_INFO_SIZE); } } +void sbi_tlb_fifo_process(struct sbi_scratch *scratch) +{ + struct sbi_tlb_info tinfo; + struct sbi_fifo *tlb_fifo = + sbi_scratch_offset_ptr(scratch, tlb_fifo_off); + + while (!sbi_fifo_dequeue(tlb_fifo, &tinfo)) + sbi_tlb_entry_process(scratch, &tinfo); +} + +void sbi_tlb_fifo_sync(struct sbi_scratch *scratch) +{ + unsigned long *tlb_sync = + sbi_scratch_offset_ptr(scratch, tlb_sync_off); + + while (!atomic_raw_xchg_ulong(tlb_sync, 0)) { + /* + * While we are waiting for remote hart to set the sync, + * consume fifo requests to avoid deadlock. + */ + sbi_tlb_fifo_process_count(scratch, 1); + } + + return; +} + +static inline int __sbi_tlb_fifo_range_check(struct sbi_tlb_info *curr, + struct sbi_tlb_info *next) +{ + unsigned long curr_end; + unsigned long next_end; + int ret = SBI_FIFO_UNCHANGED; + + if (!curr || !next) + return ret; + + next_end = next->start + next->size; + curr_end = curr->start + curr->size; + if (next->start <= curr->start && next_end > curr_end) { + curr->start = next->start; + curr->size = next->size; + curr->shart_mask = curr->shart_mask | next->shart_mask; + ret = SBI_FIFO_UPDATED; + } else if (next->start >= curr->start && next_end <= curr_end) { + curr->shart_mask = curr->shart_mask | next->shart_mask; + ret = SBI_FIFO_SKIP; + } + + return ret; +} + +/** + * Call back to decide if an inplace fifo update is required or next entry can + * can be skipped. Here are the different cases that are being handled. + * + * Case1: + * if next flush request range lies within one of the existing entry, skip + * the next entry. + * Case2: + * if flush request range in current fifo entry lies within next flush + * request, update the current entry. + * + * Note: + * We can not issue a fifo reset anymore if a complete vma flush is requested. + * This is because we are queueing FENCE.I requests as well now. + * To ease up the pressure in enqueue/fifo sync path, try to dequeue 1 element + * before continuing the while loop. This method is preferred over wfi/ipi because + * of MMIO cost involved in later method. + */ +static int sbi_tlb_fifo_update_cb(void *in, void *data) +{ + struct sbi_tlb_info *curr; + struct sbi_tlb_info *next; + int ret = SBI_FIFO_UNCHANGED; + + if (!in || !data) + return ret; + + curr = (struct sbi_tlb_info *)data; + next = (struct sbi_tlb_info *)in; + + if (next->type == SBI_TLB_FLUSH_VMA_ASID && + curr->type == SBI_TLB_FLUSH_VMA_ASID) { + if (next->asid == curr->asid) + ret = __sbi_tlb_fifo_range_check(curr, next); + } else if (next->type == SBI_TLB_FLUSH_VMA && + curr->type == SBI_TLB_FLUSH_VMA) { + ret = __sbi_tlb_fifo_range_check(curr, next); + } + + return ret; +} + +int sbi_tlb_fifo_update(struct sbi_scratch *rscratch, u32 hartid, void *data) +{ + int ret; + struct sbi_fifo *tlb_fifo_r; + struct sbi_scratch *lscratch; + struct sbi_tlb_info *tinfo = data; + u32 curr_hartid = sbi_current_hartid(); + + /* + * If address range to flush is too big then simply + * upgrade it to flush all because we can only flush + * 4KB at a time. + */ + if (tinfo->size >= SBI_TLB_FLUSH_MAX_SIZE) { + tinfo->start = 0; + tinfo->size = SBI_TLB_FLUSH_ALL; + } + + /* + * If the request is to queue a tlb flush entry for itself + * then just do a local flush and return; + */ + if (hartid == curr_hartid) { + sbi_tlb_local_flush(tinfo); + return -1; + } + + lscratch = sbi_hart_id_to_scratch(rscratch, curr_hartid); + tlb_fifo_r = sbi_scratch_offset_ptr(rscratch, tlb_fifo_off); + + ret = sbi_fifo_inplace_update(tlb_fifo_r, data, sbi_tlb_fifo_update_cb); + if (ret != SBI_FIFO_UNCHANGED) { + return 1; + } + + while (sbi_fifo_enqueue(tlb_fifo_r, data) < 0) { + /** + * For now, Busy loop until there is space in the fifo. + * There may be case where target hart is also + * enqueue in source hart's fifo. Both hart may busy + * loop leading to a deadlock. + * TODO: Introduce a wait/wakeup event mechanism to handle + * this properly. + */ + sbi_tlb_fifo_process_count(lscratch, 1); + sbi_dprintf(rscratch, "hart%d: hart%d tlb fifo full\n", + curr_hartid, hartid); + } + + return 0; +} + int sbi_tlb_fifo_init(struct sbi_scratch *scratch, bool cold_boot) { - void *ipi_tlb_mem; - struct sbi_fifo *ipi_tlb_q; + void *tlb_mem; + unsigned long *tlb_sync; + struct sbi_fifo *tlb_q; if (cold_boot) { - ipi_tlb_fifo_off = sbi_scratch_alloc_offset(sizeof(*ipi_tlb_q), - "IPI_TLB_FIFO"); - if (!ipi_tlb_fifo_off) + tlb_sync_off = sbi_scratch_alloc_offset(sizeof(*tlb_sync), + "IPI_TLB_SYNC"); + if (!tlb_sync_off) return SBI_ENOMEM; - ipi_tlb_fifo_mem_off = sbi_scratch_alloc_offset( + tlb_fifo_off = sbi_scratch_alloc_offset(sizeof(*tlb_q), + "IPI_TLB_FIFO"); + if (!tlb_fifo_off) { + sbi_scratch_free_offset(tlb_sync_off); + return SBI_ENOMEM; + } + tlb_fifo_mem_off = sbi_scratch_alloc_offset( SBI_TLB_FIFO_NUM_ENTRIES * SBI_TLB_INFO_SIZE, "IPI_TLB_FIFO_MEM"); - if (!ipi_tlb_fifo_mem_off) { - sbi_scratch_free_offset(ipi_tlb_fifo_off); + if (!tlb_fifo_mem_off) { + sbi_scratch_free_offset(tlb_fifo_off); + sbi_scratch_free_offset(tlb_sync_off); return SBI_ENOMEM; } } else { - if (!ipi_tlb_fifo_off || - !ipi_tlb_fifo_mem_off) + if (!tlb_sync_off || + !tlb_fifo_off || + !tlb_fifo_mem_off) return SBI_ENOMEM; } - ipi_tlb_q = sbi_scratch_offset_ptr(scratch, ipi_tlb_fifo_off); - ipi_tlb_mem = sbi_scratch_offset_ptr(scratch, ipi_tlb_fifo_mem_off); + tlb_sync = sbi_scratch_offset_ptr(scratch, tlb_sync_off); + tlb_q = sbi_scratch_offset_ptr(scratch, tlb_fifo_off); + tlb_mem = sbi_scratch_offset_ptr(scratch, tlb_fifo_mem_off); - sbi_fifo_init(ipi_tlb_q, ipi_tlb_mem, + *tlb_sync = 0; + + sbi_fifo_init(tlb_q, tlb_mem, SBI_TLB_FIFO_NUM_ENTRIES, SBI_TLB_INFO_SIZE); return 0;