firmware: Use lla to access all global symbols

When OpenSBI is compiled as fPIE mode, the assembler will translate "la"
to GOT reference pattern. It will cause to cost an additional load
instruction when obtaining the symbol address. However, if the symbol
locates within the positive or negative 2GB region, we can use "lla"
instead of "la" to avoid unneeded GOT references. This patch assumes that
the OpenSBI image excluding the payload does not exceed 2GB. Based on
this assumption, all "la" instructions are replaced by "lla" to avoid
performance degradation when compiling as fPIE mode.

Signed-off-by: Vincent Chen <vincent.chen@sifive.com>
Reviewed-by: Anup Patel <anup.patel@wdc.com>
This commit is contained in:
Vincent Chen 2021-03-17 09:16:37 +08:00 committed by Anup Patel
parent ff5bd949d5
commit 22d8ee9758
5 changed files with 64 additions and 64 deletions

View file

@ -57,39 +57,39 @@ _start:
bne a0, a6, _wait_relocate_copy_done
_try_lottery:
/* Jump to relocation wait loop if we don't get relocation lottery */
la a6, _relocate_lottery
lla a6, _relocate_lottery
li a7, 1
amoadd.w a6, a7, (a6)
bnez a6, _wait_relocate_copy_done
/* Save load address */
la t0, _load_start
la t1, _start
lla t0, _load_start
lla t1, _start
REG_S t1, 0(t0)
/* Relocate if load address != link address */
_relocate:
la t0, _link_start
lla t0, _link_start
REG_L t0, 0(t0)
la t1, _link_end
lla t1, _link_end
REG_L t1, 0(t1)
la t2, _load_start
lla t2, _load_start
REG_L t2, 0(t2)
sub t3, t1, t0
add t3, t3, t2
beq t0, t2, _relocate_done
la t4, _relocate_done
lla t4, _relocate_done
sub t4, t4, t2
add t4, t4, t0
blt t2, t0, _relocate_copy_to_upper
_relocate_copy_to_lower:
ble t1, t2, _relocate_copy_to_lower_loop
la t3, _relocate_lottery
lla t3, _relocate_lottery
BRANGE t2, t1, t3, _start_hang
la t3, _boot_status
lla t3, _boot_status
BRANGE t2, t1, t3, _start_hang
la t3, _relocate
la t5, _relocate_done
lla t3, _relocate
lla t5, _relocate_done
BRANGE t2, t1, t3, _start_hang
BRANGE t2, t1, t5, _start_hang
BRANGE t3, t5, t2, _start_hang
@ -102,12 +102,12 @@ _relocate_copy_to_lower_loop:
jr t4
_relocate_copy_to_upper:
ble t3, t0, _relocate_copy_to_upper_loop
la t2, _relocate_lottery
lla t2, _relocate_lottery
BRANGE t0, t3, t2, _start_hang
la t2, _boot_status
lla t2, _boot_status
BRANGE t0, t3, t2, _start_hang
la t2, _relocate
la t5, _relocate_done
lla t2, _relocate
lla t5, _relocate_done
BRANGE t0, t3, t2, _start_hang
BRANGE t0, t3, t5, _start_hang
BRANGE t2, t5, t0, _start_hang
@ -119,12 +119,12 @@ _relocate_copy_to_upper_loop:
blt t0, t1, _relocate_copy_to_upper_loop
jr t4
_wait_relocate_copy_done:
la t0, _start
la t1, _link_start
lla t0, _start
lla t1, _link_start
REG_L t1, 0(t1)
beq t0, t1, _wait_for_boot_hart
la t2, _boot_status
la t3, _wait_for_boot_hart
lla t2, _boot_status
lla t3, _wait_for_boot_hart
sub t3, t3, t0
add t3, t3, t1
1:
@ -143,10 +143,10 @@ _relocate_done:
* Mark relocate copy done
* Use _boot_status copy relative to the load address
*/
la t0, _boot_status
la t1, _link_start
lla t0, _boot_status
lla t1, _link_start
REG_L t1, 0(t1)
la t2, _load_start
lla t2, _load_start
REG_L t2, 0(t2)
sub t0, t0, t1
add t0, t0, t2
@ -161,19 +161,19 @@ _relocate_done:
call _reset_regs
/* Zero-out BSS */
la s4, _bss_start
la s5, _bss_end
lla s4, _bss_start
lla s5, _bss_end
_bss_zero:
REG_S zero, (s4)
add s4, s4, __SIZEOF_POINTER__
blt s4, s5, _bss_zero
/* Setup temporary trap handler */
la s4, _start_hang
lla s4, _start_hang
csrw CSR_MTVEC, s4
/* Setup temporary stack */
la s4, _fw_end
lla s4, _fw_end
li s5, (SBI_SCRATCH_SIZE * 2)
add sp, s4, s5
@ -184,7 +184,7 @@ _bss_zero:
#ifdef FW_FDT_PATH
/* Override previous arg1 */
la a1, fw_fdt_bin
lla a1, fw_fdt_bin
#endif
/*
@ -202,7 +202,7 @@ _bss_zero:
* s7 -> HART Count
* s8 -> HART Stack Size
*/
la a4, platform
lla a4, platform
#if __riscv_xlen == 64
lwu s7, SBI_PLATFORM_HART_COUNT_OFFSET(a4)
lwu s8, SBI_PLATFORM_HART_STACK_SIZE_OFFSET(a4)
@ -212,7 +212,7 @@ _bss_zero:
#endif
/* Setup scratch space for all the HARTs*/
la tp, _fw_end
lla tp, _fw_end
mul a5, s7, s8
add tp, tp, a5
/* Keep a copy of tp */
@ -230,8 +230,8 @@ _scratch_init:
/* Initialize scratch space */
/* Store fw_start and fw_size in scratch space */
la a4, _fw_start
la a5, _fw_end
lla a4, _fw_start
lla a5, _fw_end
mul t0, s7, s8
add a5, a5, t0
sub a5, a5, a4
@ -253,16 +253,16 @@ _scratch_init:
REG_S a0, SBI_SCRATCH_NEXT_MODE_OFFSET(tp)
MOV_3R a0, s0, a1, s1, a2, s2
/* Store warm_boot address in scratch space */
la a4, _start_warm
lla a4, _start_warm
REG_S a4, SBI_SCRATCH_WARMBOOT_ADDR_OFFSET(tp)
/* Store platform address in scratch space */
la a4, platform
lla a4, platform
REG_S a4, SBI_SCRATCH_PLATFORM_ADDR_OFFSET(tp)
/* Store hartid-to-scratch function address in scratch space */
la a4, _hartid_to_scratch
lla a4, _hartid_to_scratch
REG_S a4, SBI_SCRATCH_HARTID_TO_SCRATCH_OFFSET(tp)
/* Store trap-exit function address in scratch space */
la a4, _trap_exit
lla a4, _trap_exit
REG_S a4, SBI_SCRATCH_TRAP_EXIT_OFFSET(tp)
/* Clear tmp0 in scratch space */
REG_S zero, SBI_SCRATCH_TMP0_OFFSET(tp)
@ -343,7 +343,7 @@ _fdt_reloc_done:
/* mark boot hart done */
li t0, BOOT_STATUS_BOOT_HART_DONE
la t1, _boot_status
lla t1, _boot_status
REG_S t0, 0(t1)
fence rw, rw
j _start_warm
@ -351,7 +351,7 @@ _fdt_reloc_done:
/* waiting for boot hart to be done (_boot_status == 2) */
_wait_for_boot_hart:
li t0, BOOT_STATUS_BOOT_HART_DONE
la t1, _boot_status
lla t1, _boot_status
REG_L t1, 0(t1)
/* Reduce the bus traffic so that boot hart may proceed faster */
nop
@ -369,7 +369,7 @@ _start_warm:
csrw CSR_MIP, zero
/* Find HART count and HART stack size */
la a4, platform
lla a4, platform
#if __riscv_xlen == 64
lwu s7, SBI_PLATFORM_HART_COUNT_OFFSET(a4)
lwu s8, SBI_PLATFORM_HART_STACK_SIZE_OFFSET(a4)
@ -400,7 +400,7 @@ _start_warm:
3: bge s6, s7, _start_hang
/* Find the scratch space based on HART index */
la tp, _fw_end
lla tp, _fw_end
mul a5, s7, s8
add tp, tp, a5
mul a5, s8, s6
@ -415,13 +415,13 @@ _start_warm:
add sp, tp, zero
/* Setup trap handler */
la a4, _trap_handler
lla a4, _trap_handler
#if __riscv_xlen == 32
csrr a5, CSR_MISA
srli a5, a5, ('H' - 'A')
andi a5, a5, 0x1
beq a5, zero, _skip_trap_handler_rv32_hyp
la a4, _trap_handler_rv32_hyp
lla a4, _trap_handler_rv32_hyp
_skip_trap_handler_rv32_hyp:
#endif
csrw CSR_MTVEC, a4
@ -432,7 +432,7 @@ _skip_trap_handler_rv32_hyp:
srli a5, a5, ('H' - 'A')
andi a5, a5, 0x1
beq a5, zero, _skip_trap_exit_rv32_hyp
la a4, _trap_exit_rv32_hyp
lla a4, _trap_exit_rv32_hyp
csrr a5, CSR_MSCRATCH
REG_S a4, SBI_SCRATCH_TRAP_EXIT_OFFSET(a5)
_skip_trap_exit_rv32_hyp:
@ -468,7 +468,7 @@ _hartid_to_scratch:
* t1 -> HART Stack End
* t2 -> Temporary
*/
la t2, platform
lla t2, platform
#if __riscv_xlen == 64
lwu t0, SBI_PLATFORM_HART_STACK_SIZE_OFFSET(t2)
lwu t2, SBI_PLATFORM_HART_COUNT_OFFSET(t2)
@ -478,7 +478,7 @@ _hartid_to_scratch:
#endif
sub t2, t2, a1
mul t2, t2, t0
la t1, _fw_end
lla t1, _fw_end
add t1, t1, t2
li t2, SBI_SCRATCH_SIZE
sub a0, t1, t2

View file

@ -54,7 +54,7 @@ fw_boot_hart:
*/
fw_save_info:
/* Save next arg1 in 'a1' */
la a4, _dynamic_next_arg1
lla a4, _dynamic_next_arg1
REG_S a1, (a4)
/* Sanity checks */
@ -66,13 +66,13 @@ fw_save_info:
bgt a3, a4, _bad_dynamic_info
/* Save version == 0x1 fields */
la a4, _dynamic_next_addr
lla a4, _dynamic_next_addr
REG_L a3, FW_DYNAMIC_INFO_NEXT_ADDR_OFFSET(a2)
REG_S a3, (a4)
la a4, _dynamic_next_mode
lla a4, _dynamic_next_mode
REG_L a3, FW_DYNAMIC_INFO_NEXT_MODE_OFFSET(a2)
REG_S a3, (a4)
la a4, _dynamic_options
lla a4, _dynamic_options
REG_L a3, FW_DYNAMIC_INFO_OPTIONS_OFFSET(a2)
REG_S a3, (a4)
@ -80,7 +80,7 @@ fw_save_info:
li a4, 0x2
REG_L a3, FW_DYNAMIC_INFO_VERSION_OFFSET(a2)
blt a3, a4, 2f
la a4, _dynamic_boot_hart
lla a4, _dynamic_boot_hart
REG_L a3, FW_DYNAMIC_INFO_BOOT_HART_OFFSET(a2)
REG_S a3, (a4)
2:
@ -96,7 +96,7 @@ fw_save_info:
* The next arg1 should be returned in 'a0'.
*/
fw_next_arg1:
la a0, _dynamic_next_arg1
lla a0, _dynamic_next_arg1
REG_L a0, (a0)
ret
@ -108,7 +108,7 @@ fw_next_arg1:
* The next address should be returned in 'a0'.
*/
fw_next_addr:
la a0, _dynamic_next_addr
lla a0, _dynamic_next_addr
REG_L a0, (a0)
ret
@ -120,7 +120,7 @@ fw_next_addr:
* The next address should be returned in 'a0'
*/
fw_next_mode:
la a0, _dynamic_next_mode
lla a0, _dynamic_next_mode
REG_L a0, (a0)
ret
@ -133,7 +133,7 @@ fw_next_mode:
* The next address should be returned in 'a0'.
*/
fw_options:
la a0, _dynamic_options
lla a0, _dynamic_options
REG_L a0, (a0)
ret

View file

@ -59,7 +59,7 @@ fw_next_arg1:
* The next address should be returned in 'a0'.
*/
fw_next_addr:
la a0, _jump_addr
lla a0, _jump_addr
REG_L a0, (a0)
ret

View file

@ -59,7 +59,7 @@ fw_next_arg1:
* The next address should be returned in 'a0'.
*/
fw_next_addr:
la a0, payload_bin
lla a0, payload_bin
ret
.section .entry, "ax", %progbits

View file

@ -28,20 +28,20 @@
.globl _start
_start:
/* Pick one hart to run the main boot sequence */
la a3, _hart_lottery
lla a3, _hart_lottery
li a2, 1
amoadd.w a3, a2, (a3)
bnez a3, _start_hang
/* Save a0 and a1 */
la a3, _boot_a0
lla a3, _boot_a0
REG_S a0, 0(a3)
la a3, _boot_a1
lla a3, _boot_a1
REG_S a1, 0(a3)
/* Zero-out BSS */
la a4, _bss_start
la a5, _bss_end
lla a4, _bss_start
lla a5, _bss_end
_bss_zero:
REG_S zero, (a4)
add a4, a4, __SIZEOF_POINTER__
@ -53,18 +53,18 @@ _start_warm:
csrw CSR_SIP, zero
/* Setup exception vectors */
la a3, _start_hang
lla a3, _start_hang
csrw CSR_STVEC, a3
/* Setup stack */
la a3, _payload_end
lla a3, _payload_end
li a4, 0x2000
add sp, a3, a4
/* Jump to C main */
la a3, _boot_a0
lla a3, _boot_a0
REG_L a0, 0(a3)
la a3, _boot_a1
lla a3, _boot_a1
REG_L a1, 0(a3)
call test_main