nyan: tegra124: Enable I, D and L2 caches in romstage.
This speeds up execution but does require cache management in drivers.
BUG=None
TEST=Built and booted into depthcharge on nyan. Measured a speed up in
execution.
BRANCH=None
Change-Id: I7efe6af2c38e41402fa874ed59798f136e7e8ad4
Signed-off-by: Gabe Black <gabeblack@google.com>
Reviewed-on: https://chromium-review.googlesource.com/173777
Reviewed-by: Gabe Black <gabeblack@chromium.org>
Commit-Queue: Gabe Black <gabeblack@chromium.org>
Tested-by: Gabe Black <gabeblack@chromium.org>
diff --git a/src/arch/arm/armv7/Makefile.inc b/src/arch/arm/armv7/Makefile.inc
index 481898d..99560dd 100644
--- a/src/arch/arm/armv7/Makefile.inc
+++ b/src/arch/arm/armv7/Makefile.inc
@@ -45,6 +45,9 @@
ifeq ($(CONFIG_ARM_ROMSTAGE_ARMV7),y)
romstage-y += cache.c
+romstage-y += exception.c
+romstage-y += exception_asm.S
+romstage-y += mmu.c
romstage-c-ccopts += $(armv7_flags)
romstage-S-ccopts += $(armv7_flags)
diff --git a/src/mainboard/google/nyan/Kconfig b/src/mainboard/google/nyan/Kconfig
index 9164489..99a39e0 100644
--- a/src/mainboard/google/nyan/Kconfig
+++ b/src/mainboard/google/nyan/Kconfig
@@ -40,6 +40,14 @@
int
default 2048
+config DRAM_DMA_START
+ hex
+ default 0x83000000
+
+config DRAM_DMA_SIZE
+ hex
+ default 0x00100000
+
choice
prompt "BCT boot media"
default BCT_CFG_SPI
diff --git a/src/mainboard/google/nyan/mainboard.c b/src/mainboard/google/nyan/mainboard.c
index d7a6656..466c14e 100644
--- a/src/mainboard/google/nyan/mainboard.c
+++ b/src/mainboard/google/nyan/mainboard.c
@@ -223,3 +223,14 @@
.name = "nyan",
.enable_dev = mainboard_enable,
};
+
+void lb_board(struct lb_header *header)
+{
+ struct lb_range *dma;
+
+ dma = (struct lb_range *)lb_new_record(header);
+ dma->tag = LB_TAB_DMA;
+ dma->size = sizeof(*dma);
+ dma->range_start = CONFIG_DRAM_DMA_START;
+ dma->range_size = CONFIG_DRAM_DMA_SIZE;
+}
diff --git a/src/mainboard/google/nyan/romstage.c b/src/mainboard/google/nyan/romstage.c
index 5a66dde..a31f1f1 100644
--- a/src/mainboard/google/nyan/romstage.c
+++ b/src/mainboard/google/nyan/romstage.c
@@ -17,6 +17,9 @@
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
+#include <arch/cache.h>
+#include <arch/cpu.h>
+#include <arch/exception.h>
#include <arch/stages.h>
#include <device/device.h>
#include <cbfs.h>
@@ -25,11 +28,78 @@
#include "soc/nvidia/tegra124/chip.h"
#include <soc/display.h>
+// Convenient shorthand (in MB)
+#define DRAM_START (CONFIG_SYS_SDRAM_BASE >> 20)
+#define DRAM_SIZE CONFIG_DRAM_SIZE_MB
+#define DRAM_END (DRAM_START + DRAM_SIZE) /* plus one... */
+
+enum {
+ L2CTLR_ECC_PARITY = 0x1 << 21,
+ L2CTLR_TAG_RAM_LATENCY_MASK = 0x7 << 6,
+ L2CTLR_TAG_RAM_LATENCY_CYCLES_3 = 2 << 6,
+ L2CTLR_DATA_RAM_LATENCY_MASK = 0x7 << 0,
+ L2CTLR_DATA_RAM_LATENCY_CYCLES_3 = 2 << 0
+};
+
+enum {
+ L2ACTLR_FORCE_L2_LOGIC_CLOCK_ENABLE_ACTIVE = 0x1 << 27,
+ L2ACTLR_ENABLE_HAZARD_DETECT_TIMEOUT = 0x1 << 7,
+ L2ACTLR_DISABLE_CLEAN_EVICT_PUSH_EXTERNAL = 0x1 << 3
+};
+
+/* Configures L2 Control Register to use 3 cycles for DATA/TAG RAM latency. */
+static void configure_l2ctlr(void)
+{
+ uint32_t val;
+
+ val = read_l2ctlr();
+ val &= ~(L2CTLR_DATA_RAM_LATENCY_MASK | L2CTLR_TAG_RAM_LATENCY_MASK);
+ val |= (L2CTLR_DATA_RAM_LATENCY_CYCLES_3 | L2CTLR_TAG_RAM_LATENCY_CYCLES_3 |
+ L2CTLR_ECC_PARITY);
+ write_l2ctlr(val);
+}
+
+/* Configures L2 Auxiliary Control Register for Cortex A15. */
+static void configure_l2actlr(void)
+{
+ uint32_t val;
+
+ val = read_l2actlr();
+ val |= (L2ACTLR_DISABLE_CLEAN_EVICT_PUSH_EXTERNAL |
+ L2ACTLR_ENABLE_HAZARD_DETECT_TIMEOUT |
+ L2ACTLR_FORCE_L2_LOGIC_CLOCK_ENABLE_ACTIVE);
+ write_l2actlr(val);
+}
+
void main(void)
{
- void *entry;
- const struct device *soc;
- const struct soc_nvidia_tegra124_config *config;
+ // Globally disable MMU, caches and branch prediction (these should
+ // already be disabled by default on reset).
+ uint32_t sctlr = read_sctlr();
+ sctlr &= ~(SCTLR_M | SCTLR_C | SCTLR_Z | SCTLR_I);
+ write_sctlr(sctlr);
+
+ arm_invalidate_caches();
+
+ // Renable icache and branch prediction.
+ sctlr = read_sctlr();
+ sctlr |= SCTLR_Z | SCTLR_I;
+ write_sctlr(sctlr);
+
+ configure_l2ctlr();
+ configure_l2actlr();
+
+ mmu_init();
+ mmu_config_range(0, DRAM_START, DCACHE_OFF);
+ mmu_config_range(DRAM_START, DRAM_SIZE, DCACHE_WRITEBACK);
+ mmu_config_range(CONFIG_DRAM_DMA_START >> 20,
+ CONFIG_DRAM_DMA_SIZE >> 20, DCACHE_OFF);
+ mmu_config_range(DRAM_END, 4096 - DRAM_END, DCACHE_OFF);
+ mmu_disable_range(0, 1);
+ dcache_invalidate_all();
+ dcache_mmu_enable();
+
+ exception_init();
/* for quality of the user interface, it's important to get
* the video going ASAP. Because there are long delays in some
@@ -39,15 +109,17 @@
* early as we can in the RW stage, but never in the RO stage.
*/
- soc = dev_find_slot(DEVICE_PATH_CPU_CLUSTER, 0);
+ const struct device *soc = dev_find_slot(DEVICE_PATH_CPU_CLUSTER, 0);
printk(BIOS_SPEW, "s%s: soc is %p\n", __func__, soc);
- if (soc && soc->chip_info){
- config = soc->chip_info;
+ if (soc && soc->chip_info) {
+ const struct soc_nvidia_tegra124_config *config =
+ soc->chip_info;
setup_display((struct soc_nvidia_tegra124_config *)config);
}
cbmem_initialize_empty();
- entry = cbfs_load_stage(CBFS_DEFAULT_MEDIA, "fallback/coreboot_ram");
+ void *entry = cbfs_load_stage(CBFS_DEFAULT_MEDIA,
+ "fallback/coreboot_ram");
stage_exit(entry);
}
diff --git a/src/soc/nvidia/tegra124/Kconfig b/src/soc/nvidia/tegra124/Kconfig
index d0f6888..dc4b634 100644
--- a/src/soc/nvidia/tegra124/Kconfig
+++ b/src/soc/nvidia/tegra124/Kconfig
@@ -68,6 +68,11 @@
hex
default 0x800
+# TTB needs to be aligned to 16KB. Stick it in iRAM.
+config TTB_BUFFER
+ hex "memory address of the TTB buffer"
+ default 0x40000000
+
config CBFS_CACHE_ADDRESS
hex "memory address to put CBFS cache data"
default 0x803c0000