diff --git a/arch/arm/cpu/armv8/cache.S b/arch/arm/cpu/armv8/cache.S
index 46f25e63f0..f1deaa7230 100644
--- a/arch/arm/cpu/armv8/cache.S
+++ b/arch/arm/cpu/armv8/cache.S
@@ -150,11 +150,23 @@ ENTRY(__asm_invalidate_icache_all)
 	ret
 ENDPROC(__asm_invalidate_icache_all)
 
-ENTRY(__asm_flush_l3_cache)
+ENTRY(__asm_invalidate_l3_dcache)
 	mov	x0, #0			/* return status as success */
 	ret
-ENDPROC(__asm_flush_l3_cache)
-	.weak	__asm_flush_l3_cache
+ENDPROC(__asm_invalidate_l3_dcache)
+	.weak	__asm_invalidate_l3_dcache
+
+ENTRY(__asm_flush_l3_dcache)
+	mov	x0, #0			/* return status as success */
+	ret
+ENDPROC(__asm_flush_l3_dcache)
+	.weak	__asm_flush_l3_dcache
+
+ENTRY(__asm_invalidate_l3_icache)
+	mov	x0, #0			/* return status as success */
+	ret
+ENDPROC(__asm_invalidate_l3_icache)
+	.weak	__asm_invalidate_l3_icache
 
 /*
  * void __asm_switch_ttbr(ulong new_ttbr)
diff --git a/arch/arm/cpu/armv8/cache_v8.c b/arch/arm/cpu/armv8/cache_v8.c
index cd3f6c10ae..6c5630c0a8 100644
--- a/arch/arm/cpu/armv8/cache_v8.c
+++ b/arch/arm/cpu/armv8/cache_v8.c
@@ -421,19 +421,20 @@ __weak void mmu_setup(void)
 void invalidate_dcache_all(void)
 {
 	__asm_invalidate_dcache_all();
+	__asm_invalidate_l3_dcache();
 }
 
 /*
  * Performs a clean & invalidation of the entire data cache at all levels.
  * This function needs to be inline to avoid using stack.
- * __asm_flush_l3_cache return status of timeout
+ * __asm_flush_l3_dcache return status of timeout
  */
 inline void flush_dcache_all(void)
 {
 	int ret;
 
 	__asm_flush_dcache_all();
-	ret = __asm_flush_l3_cache();
+	ret = __asm_flush_l3_dcache();
 	if (ret)
 		debug("flushing dcache returns 0x%x\n", ret);
 	else
@@ -623,7 +624,7 @@ void mmu_set_region_dcache_behaviour(phys_addr_t start, size_t size,
 
 void icache_enable(void)
 {
-	__asm_invalidate_icache_all();
+	invalidate_icache_all();
 	set_sctlr(get_sctlr() | CR_I);
 }
 
@@ -640,6 +641,7 @@ int icache_status(void)
 void invalidate_icache_all(void)
 {
 	__asm_invalidate_icache_all();
+	__asm_invalidate_l3_icache();
 }
 
 #else	/* CONFIG_SYS_ICACHE_OFF */
diff --git a/arch/arm/cpu/armv8/fsl-layerscape/lowlevel.S b/arch/arm/cpu/armv8/fsl-layerscape/lowlevel.S
index 5d0b7a45c3..5700b1fb65 100644
--- a/arch/arm/cpu/armv8/fsl-layerscape/lowlevel.S
+++ b/arch/arm/cpu/armv8/fsl-layerscape/lowlevel.S
@@ -245,7 +245,7 @@ hnf_set_pstate:
 
 	ret
 
-ENTRY(__asm_flush_l3_cache)
+ENTRY(__asm_flush_l3_dcache)
 	/*
 	 * Return status in x0
 	 *    success 0
@@ -275,7 +275,7 @@ ENTRY(__asm_flush_l3_cache)
 	mov	x0, x8
 	mov	lr, x29
 	ret
-ENDPROC(__asm_flush_l3_cache)
+ENDPROC(__asm_flush_l3_dcache)
 #endif
 
 #ifdef CONFIG_MP
diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h
index b928bd8d17..c3c88d2b30 100644
--- a/arch/arm/include/asm/system.h
+++ b/arch/arm/include/asm/system.h
@@ -93,7 +93,9 @@ void __asm_invalidate_dcache_all(void);
 void __asm_flush_dcache_range(u64 start, u64 end);
 void __asm_invalidate_tlb_all(void);
 void __asm_invalidate_icache_all(void);
-int __asm_flush_l3_cache(void);
+int __asm_invalidate_l3_dcache(void);
+int __asm_flush_l3_dcache(void);
+int __asm_invalidate_l3_icache(void);
 void __asm_switch_ttbr(u64 new_ttbr);
 
 void armv8_switch_to_el2(void);
diff --git a/arch/arm/mach-tegra/tegra186/cache.S b/arch/arm/mach-tegra/tegra186/cache.S
new file mode 100644
index 0000000000..3061dc2ecf
--- /dev/null
+++ b/arch/arm/mach-tegra/tegra186/cache.S
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ */
+
+#include <config.h>
+#include <linux/linkage.h>
+
+#define SMC_SIP_INVOKE_MCE		0x82FFFF00
+#define MCE_SMC_ROC_FLUSH_CACHE		(SMC_SIP_INVOKE_MCE | 11)
+#define MCE_SMC_ROC_FLUSH_CACHE_ONLY	(SMC_SIP_INVOKE_MCE | 14)
+#define MCE_SMC_ROC_CLEAN_CACHE_ONLY	(SMC_SIP_INVOKE_MCE | 15)
+
+ENTRY(__asm_tegra_cache_smc)
+	mov	x1, #0
+	mov	x2, #0
+	mov	x3, #0
+	mov	x4, #0
+	mov	x5, #0
+	mov	x6, #0
+	smc	#0
+	mov	x0, #0
+	ret
+ENDPROC(__asm_invalidate_l3_dcache)
+
+ENTRY(__asm_invalidate_l3_dcache)
+	mov	x0, #(MCE_SMC_ROC_FLUSH_CACHE_ONLY & 0xffff)
+	movk	x0, #(MCE_SMC_ROC_FLUSH_CACHE_ONLY >> 16), lsl #16
+	b	__asm_tegra_cache_smc
+ENDPROC(__asm_invalidate_l3_dcache)
+
+ENTRY(__asm_flush_l3_dcache)
+	mov	x0, #(MCE_SMC_ROC_CLEAN_CACHE_ONLY & 0xffff)
+	movk	x0, #(MCE_SMC_ROC_CLEAN_CACHE_ONLY >> 16), lsl #16
+	b	__asm_tegra_cache_smc
+ENDPROC(__asm_flush_l3_dcache)
+
+ENTRY(__asm_invalidate_l3_icache)
+	mov	x0, #(MCE_SMC_ROC_FLUSH_CACHE & 0xffff)
+	movk	x0, #(MCE_SMC_ROC_FLUSH_CACHE >> 16), lsl #16
+	b	__asm_tegra_cache_smc
+ENDPROC(__asm_invalidate_l3_icache)
diff --git a/arch/arm/mach-tegra/tegra186/cache.c b/arch/arm/mach-tegra/tegra186/cache.c
deleted file mode 100644
index adaed8968e..0000000000
--- a/arch/arm/mach-tegra/tegra186/cache.c
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright (c) 2016, NVIDIA CORPORATION.
- *
- * SPDX-License-Identifier: GPL-2.0
- */
-
-#include <common.h>
-#include <asm/system.h>
-
-#define SMC_SIP_INVOKE_MCE	0x82FFFF00
-#define MCE_SMC_ROC_FLUSH_CACHE	11
-
-int __asm_flush_l3_cache(void)
-{
-	struct pt_regs regs = {0};
-
-	isb();
-
-	regs.regs[0] = SMC_SIP_INVOKE_MCE | MCE_SMC_ROC_FLUSH_CACHE;
-	smc_call(&regs);
-
-	return 0;
-}
diff --git a/arch/arm/mach-tegra/tegra186/nvtboot_ll.S b/arch/arm/mach-tegra/tegra186/nvtboot_ll.S
index 1eab890958..899c9cccbe 100644
--- a/arch/arm/mach-tegra/tegra186/nvtboot_ll.S
+++ b/arch/arm/mach-tegra/tegra186/nvtboot_ll.S
@@ -9,6 +9,7 @@
 #include <config.h>
 #include <linux/linkage.h>
 
+.align 8
 .globl	nvtboot_boot_x0
 nvtboot_boot_x0:
 	.dword 0