From 4c31be59248127ae00a2ff0c22bf88b1cf34bf74 Mon Sep 17 00:00:00 2001 From: aottaviano Date: Sun, 18 May 2025 17:34:22 +0200 Subject: [PATCH 01/11] include: Update drivers for carfield's cluster --- include/archi/chips/carfield-cluster/memory_map.h | 3 +++ include/archi/chips/carfield-cluster/pulp.h | 1 + include/hal/chips/carfield-cluster/pulp.h | 1 + 3 files changed, 5 insertions(+) diff --git a/include/archi/chips/carfield-cluster/memory_map.h b/include/archi/chips/carfield-cluster/memory_map.h index 729021ed..10db2bb5 100644 --- a/include/archi/chips/carfield-cluster/memory_map.h +++ b/include/archi/chips/carfield-cluster/memory_map.h @@ -100,6 +100,7 @@ #define ARCHI_MCHAN_EXT_OFFSET 0x00001800 #define ARCHI_HMR_OFFSET 0x00002000 #define ARCHI_TCDM_SCRUBBER_OFFSET 0x00002400 +#define ARCHI_HWPE_HCI_ECC_OFFSET 0x00002800 #define ARCHI_CLUSTER_PERIPHERALS_ADDR ( ARCHI_CLUSTER_ADDR + ARCHI_CLUSTER_PERIPHERALS_OFFSET ) #define ARCHI_CLUSTER_PERIPHERALS_GLOBAL_ADDR(cid) ( ARCHI_CLUSTER_GLOBAL_ADDR(cid) + ARCHI_CLUSTER_PERIPHERALS_OFFSET ) @@ -111,6 +112,7 @@ #define ARCHI_MCHAN_EXT_ADDR ( ARCHI_CLUSTER_PERIPHERALS_ADDR + ARCHI_MCHAN_EXT_OFFSET ) #define ARCHI_HMR_ADDR ( ARCHI_CLUSTER_PERIPHERALS_ADDR + ARCHI_HMR_OFFSET ) #define ARCHI_TCDM_SCRUBBER_ADDR ( ARCHI_CLUSTER_PERIPHERALS_ADDR + ARCHI_TCDM_SCRUBBER_OFFSET ) +#define ARCHI_HWPE_HCI_ECC_ADDR ( ARCHI_CLUSTER_PERIPHERALS_ADDR + ARCHI_HWPE_HCI_ECC_OFFSET ) #define ARCHI_CLUSTER_CTRL_GLOBAL_ADDR(cid) ( ARCHI_CLUSTER_PERIPHERALS_GLOBAL_ADDR(cid) + ARCHI_CLUSTER_CTRL_OFFSET ) #define ARCHI_ICACHE_CTRL_GLOBAL_ADDR(cid) ( ARCHI_CLUSTER_PERIPHERALS_GLOBAL_ADDR(cid) + ARCHI_ICACHE_CTRL_OFFSET ) @@ -120,6 +122,7 @@ #define ARCHI_IDMA_EXT_GLOBAL_ADDR(cid) ( ARCHI_CLUSTER_PERIPHERALS_GLOBAL_ADDR(cid) + ARCHI_IDMA_EXT_OFFSET ) #define ARCHI_HMR_GLOBAL_ADDR(cid) ( ARCHI_CLUSTER_PERIPHERALS_GLOBAL_ADDR(cid) + ARCHI_HMR_OFFSET ) #define ARCHI_TCDM_SCRUBBER_GLOBAL_ADDR(cid) ( ARCHI_CLUSTER_PERIPHERALS_GLOBAL_ADDR(cid) + ARCHI_TCDM_SCRUBBER_OFFSET ) +#define ARCHI_HWPE_HCI_ECC_GLOBAL_ADDR(cid) ( ARCHI_CLUSTER_PERIPHERALS_GLOBAL_ADDR(cid) + ARCHI_HWPE_HCI_ECC_OFFSET ) /* diff --git a/include/archi/chips/carfield-cluster/pulp.h b/include/archi/chips/carfield-cluster/pulp.h index 83ba2a6e..54209f0b 100644 --- a/include/archi/chips/carfield-cluster/pulp.h +++ b/include/archi/chips/carfield-cluster/pulp.h @@ -47,5 +47,6 @@ #include "archi/udma/udma_v3.h" #include "archi/hmr/hmr_v1.h" #include "archi/tcdm_scrubber/tcdm_scrubber.h" +#include "archi/hwpe_hci_ecc/hwpe_hci_ecc.h" #endif diff --git a/include/hal/chips/carfield-cluster/pulp.h b/include/hal/chips/carfield-cluster/pulp.h index 1cf1d49a..671dd443 100644 --- a/include/hal/chips/carfield-cluster/pulp.h +++ b/include/hal/chips/carfield-cluster/pulp.h @@ -43,5 +43,6 @@ #include "hal/udma/uart/udma_uart_v1.h" #include "hal/hmr/hmr_v1.h" #include "hal/tcdm_scrubber/tcdm_scrubber.h" +#include "hal/hwpe_hci_ecc/hwpe_hci_ecc.h" #endif From d65361a5ab2b02b4fe2a970e8f1be553e5987532 Mon Sep 17 00:00:00 2001 From: aottaviano Date: Wed, 9 Jul 2025 20:18:15 +0200 Subject: [PATCH 02/11] include/archi/chips/carfield: Set default cores to 8 --- include/archi/chips/carfield-cluster/properties.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/archi/chips/carfield-cluster/properties.h b/include/archi/chips/carfield-cluster/properties.h index d2777ed5..36ad6c9a 100644 --- a/include/archi/chips/carfield-cluster/properties.h +++ b/include/archi/chips/carfield-cluster/properties.h @@ -89,7 +89,7 @@ #define ARCHI_HAS_CLUSTER 1 #define ARCHI_L1_TAS_BIT 20 #ifndef ARCHI_CLUSTER_NB_PE -#define ARCHI_CLUSTER_NB_PE 12 +#define ARCHI_CLUSTER_NB_PE 8 #endif #define ARCHI_NB_CLUSTER 1 From 461dc33aba3443db549352afb2d167ee5072ab0a Mon Sep 17 00:00:00 2001 From: Luigi Ghionda Date: Tue, 30 Sep 2025 23:31:25 +0200 Subject: [PATCH 03/11] Add UART drivers when it is a host peripheral (instead of using udma) This mod is tailored for Astral and its host, Cheshire, whose basic functions are reused. --- drivers/host_uart.c | 49 +++++++++++++++++++ drivers/include/host_uart.h | 42 ++++++++++++++++ drivers/{uart.c => udma_uart.c} | 0 .../archi/chips/astral-cluster/memory_map.h | 1 + include/pulp.h | 4 +- lib/libc/minimal/io.c | 9 ++++ rules/pulpos/default_rules.mk | 4 ++ rules/pulpos/src.mk | 10 +++- 8 files changed, 116 insertions(+), 3 deletions(-) create mode 100644 drivers/host_uart.c create mode 100644 drivers/include/host_uart.h rename drivers/{uart.c => udma_uart.c} (100%) diff --git a/drivers/host_uart.c b/drivers/host_uart.c new file mode 100644 index 00000000..f02d0157 --- /dev/null +++ b/drivers/host_uart.c @@ -0,0 +1,49 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 +// +// Nils Wistoff +// Paul Scheffler + +// Adapted from Cheshire + +#include "host_uart.h" + +static inline volatile uint8_t *reg8(void *base, int offs) { + return (volatile uint8_t *)(base + offs); +} + +static inline void fence() { + asm volatile("fence" ::: "memory"); +} + +int uart_read_ready(void *uart_base) { + return *reg8(uart_base, UART_LINE_STATUS_REG_OFFSET) & (1 << UART_LINE_STATUS_DATA_READY_BIT); +} + +static inline int __uart_write_ready(void *uart_base) { + return *reg8(uart_base, UART_LINE_STATUS_REG_OFFSET) & (1 << UART_LINE_STATUS_THR_EMPTY_BIT); +} + +static inline int __uart_write_idle(void *uart_base) { + return __uart_write_ready(uart_base) && + *reg8(uart_base, UART_LINE_STATUS_REG_OFFSET) & (1 << UART_LINE_STATUS_TMIT_EMPTY_BIT); +} + +void uart_write(void *uart_base, uint8_t byte) { + while (!__uart_write_ready(uart_base)) + ; + *reg8(uart_base, UART_THR_REG_OFFSET) = byte; +} + +void uart_write_flush(void *uart_base) { + fence(); + while (!__uart_write_idle(uart_base)) + ; +} + +uint8_t uart_read(void *uart_base) { + while (!uart_read_ready(uart_base)) + ; + return *reg8(uart_base, UART_RBR_REG_OFFSET); +} diff --git a/drivers/include/host_uart.h b/drivers/include/host_uart.h new file mode 100644 index 00000000..05572c1a --- /dev/null +++ b/drivers/include/host_uart.h @@ -0,0 +1,42 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 +// +// Nils Wistoff +// Paul Scheffler + +// Adapted from Cheshire + +#pragma once + +#include + +// Register offsets +#define UART_RBR_REG_OFFSET 0 +#define UART_THR_REG_OFFSET 0 +#define UART_INTR_ENABLE_REG_OFFSET 4 +#define UART_INTR_IDENT_REG_OFFSET 8 +#define UART_FIFO_CONTROL_REG_OFFSET 8 +#define UART_LINE_CONTROL_REG_OFFSET 12 +#define UART_MODEM_CONTROL_REG_OFFSET 16 +#define UART_LINE_STATUS_REG_OFFSET 20 +#define UART_MODEM_STATUS_REG_OFFSET 24 +#define UART_DLAB_LSB_REG_OFFSET 0 +#define UART_DLAB_MSB_REG_OFFSET 4 + +// Register fields +#define UART_LINE_STATUS_DATA_READY_BIT 0 +#define UART_LINE_STATUS_THR_EMPTY_BIT 5 +#define UART_LINE_STATUS_TMIT_EMPTY_BIT 6 + +int uart_read_ready(void *uart_base); + +void uart_write(void *uart_base, uint8_t byte); + +void uart_write_str(void *uart_base, void *src, uint64_t len); + +void uart_write_flush(void *uart_base); + +uint8_t uart_read(void *uart_base); + +void uart_read_str(void *uart_base, void *dst, uint64_t len); diff --git a/drivers/uart.c b/drivers/udma_uart.c similarity index 100% rename from drivers/uart.c rename to drivers/udma_uart.c diff --git a/include/archi/chips/astral-cluster/memory_map.h b/include/archi/chips/astral-cluster/memory_map.h index 10db2bb5..d4d1bd0d 100644 --- a/include/archi/chips/astral-cluster/memory_map.h +++ b/include/archi/chips/astral-cluster/memory_map.h @@ -60,6 +60,7 @@ #define ARCHI_FC_ITC_ADDR ( ARCHI_SOC_PERIPHERALS_ADDR + ARCHI_FC_ITC_OFFSET ) #define ARCHI_FC_TIMER_ADDR ( ARCHI_SOC_PERIPHERALS_ADDR + ARCHI_FC_TIMER_OFFSET ) #define ARCHI_STDOUT_ADDR 0x03002000 +#define ARCHI_HOST_UART_ADDR 0x03002000 #define ARCHI_FLL_AREA_SIZE 0x00000010 diff --git a/include/pulp.h b/include/pulp.h index 30d1355c..cb39c7d3 100755 --- a/include/pulp.h +++ b/include/pulp.h @@ -93,12 +93,12 @@ void _start(); #ifdef ARCHI_CLUSTER_NB_PE static inline int get_core_num() { return ARCHI_CLUSTER_NB_PE; } #endif - - +#if defined(CONFIG_IO_UART) && CONFIG_IO_UART == 1 int uart_open(int uart_id, int baudrate); void uart_close(int uart_id); int uart_write(int uart_id, void *buffer, uint32_t size); int uart_read(int uart_id, void *buffer, uint32_t size); +#endif void synch_barrier(); diff --git a/lib/libc/minimal/io.c b/lib/libc/minimal/io.c index 3a0857a1..3510ec45 100644 --- a/lib/libc/minimal/io.c +++ b/lib/libc/minimal/io.c @@ -239,12 +239,21 @@ static void pos_libc_putc_uart(char c) } #endif +#if defined(CONFIG_IO_UART) && CONFIG_IO_UART == 2 +static void pos_libc_putc_host_uart(char c) +{ + uart_write(ARCHI_HOST_UART_ADDR, c); + uart_write_flush(ARCHI_HOST_UART_ADDR); +} +#endif static void pos_putc(char c) { #if defined(CONFIG_IO_UART) && CONFIG_IO_UART == 1 pos_libc_putc_uart(c); +#elif defined(CONFIG_IO_UART) && CONFIG_IO_UART == 2 + pos_libc_putc_host_uart(c); #else pos_libc_putc_stdout(c); #endif diff --git a/rules/pulpos/default_rules.mk b/rules/pulpos/default_rules.mk index e6c2695c..43a20284 100644 --- a/rules/pulpos/default_rules.mk +++ b/rules/pulpos/default_rules.mk @@ -41,6 +41,10 @@ ifdef io ifeq '$(io)' 'uart' CONFIG_IO_UART=1 endif +ifeq '$(io)' 'host_uart' +CONFIG_IO_UART=2 +PULP_APP_CFLAGS += -I$(PULPRT_HOME)/drivers/include +endif endif VPATH = $(PULPRT_HOME) diff --git a/rules/pulpos/src.mk b/rules/pulpos/src.mk index 22152d02..50ae84c6 100644 --- a/rules/pulpos/src.mk +++ b/rules/pulpos/src.mk @@ -6,7 +6,15 @@ ifeq '$(CONFIG_LIBC_MINIMAL)' '1' PULP_SRCS += lib/libc/minimal/io.c lib/libc/minimal/fprintf.c lib/libc/minimal/prf.c lib/libc/minimal/sprintf.c endif -PULP_SRCS += kernel/init.c kernel/kernel.c kernel/alloc.c kernel/alloc_pool.c kernel/irq.c kernel/soc_event.c kernel/bench.c drivers/uart.c +ifeq '$(CONFIG_IO_UART)' '1' +PULP_SRCS += drivers/udma_uart.c +endif + +ifeq '$(CONFIG_IO_UART)' '2' +PULP_SRCS += drivers/host_uart.c +endif + +PULP_SRCS += kernel/init.c kernel/kernel.c kernel/alloc.c kernel/alloc_pool.c kernel/irq.c kernel/soc_event.c kernel/bench.c PULP_ASM_SRCS += kernel/irq_asm.S From 274a211da2f2bd9f366df08bcde78e28f33a77e2 Mon Sep 17 00:00:00 2001 From: Riccardo Gandolfi Date: Tue, 6 May 2025 12:57:15 +0200 Subject: [PATCH 04/11] [iDMA] Updates on iDMA drivers 3D transfers support Added ARCHI_HAS_DMA_DEMUX define for correct operation of iDMA in multi-core situations Fix for ENTRY variable in default_rules.mk Fix on DMA demux offsets in pulp_cluster memory map Remapped iDMA registers for pulp_cluster instance --- include/archi/chips/pulp_cluster/properties.h | 2 ++ include/archi/dma/idma_v2.h | 2 +- include/hal/dma/idma_v2.h | 11 +++++++++-- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/include/archi/chips/pulp_cluster/properties.h b/include/archi/chips/pulp_cluster/properties.h index 08bab729..2a7c0ef5 100644 --- a/include/archi/chips/pulp_cluster/properties.h +++ b/include/archi/chips/pulp_cluster/properties.h @@ -18,6 +18,8 @@ #ifndef __ARCHI_CHIPS_PULP_PROPERTIES_H__ #define __ARCHI_CHIPS_PULP_PROPERTIES_H__ +#define ARCHI_HAS_DMA_DEMUX 1 + /* * FPGA */ diff --git a/include/archi/dma/idma_v2.h b/include/archi/dma/idma_v2.h index 06d8b414..dbc4398f 100644 --- a/include/archi/dma/idma_v2.h +++ b/include/archi/dma/idma_v2.h @@ -301,4 +301,4 @@ extern "C" { } // extern "C" #endif #endif // _IDMA_REG32_3D_REG_DEFS_ -// End generated register defines for idma_reg32_3d \ No newline at end of file +// End generated register defines for idma_reg32_3d diff --git a/include/hal/dma/idma_v2.h b/include/hal/dma/idma_v2.h index 40ed2a1e..13d250af 100644 --- a/include/hal/dma/idma_v2.h +++ b/include/hal/dma/idma_v2.h @@ -266,7 +266,7 @@ static inline int pulp_cl_idma_L2ToL1_2d(unsigned int src, unsigned int dst, uns */ static inline int pulp_idma_L1ToL1_2d(unsigned int src, unsigned int dst, unsigned short size, unsigned int src_stride, unsigned int dst_stride, unsigned int num_reps); -static inline int pulp_idma_cl_L1ToL1_2d(unsigned int src, unsigned int dst, unsigned short size, unsigned int src_stride, unsigned int dst_stride, unsigned int num_reps); +static inline int pulp_cl_idma_L1ToL1_2d(unsigned int src, unsigned int dst, unsigned short size, unsigned int src_stride, unsigned int dst_stride, unsigned int num_reps); @@ -341,6 +341,7 @@ static inline int pulp_cl_idma_zeromem(unsigned int dst, unsigned short size, id /** DMA barrier. * This blocks the core until no transfer is on-going in the DMA. + * Careful: these only wait for transfers towards L2 */ static inline void plp_dma_barrier(); static inline void plp_cl_dma_barrier(); @@ -900,6 +901,7 @@ static inline int pulp_idma_L2ToL1_2d(unsigned int src, unsigned int dst, unsign dma_tx_id = DMA_READ(IDMA_REG32_3D_NEXT_ID_1_REG_OFFSET); return dma_tx_id; } + static inline int pulp_cl_idma_L2ToL1_2d(unsigned int src, unsigned int dst, unsigned short size, unsigned int src_stride, unsigned int dst_stride, unsigned int num_reps) { unsigned int dma_tx_id; unsigned int cfg = IDMA_DEFAULT_CONFIG_L2TOL1_2D; @@ -916,7 +918,10 @@ static inline int pulp_cl_idma_L2ToL1_2d(unsigned int src, unsigned int dst, uns return dma_tx_id; } +<<<<<<< HEAD +======= +>>>>>>> 004a4d4 ([iDMA] Updates on iDMA drivers) static inline int pulp_idma_L1ToL1_2d(unsigned int src, unsigned int dst, unsigned short size, unsigned int src_stride, unsigned int dst_stride, unsigned int num_reps) { unsigned int dma_tx_id; unsigned int cfg = IDMA_DEFAULT_CONFIG_L1TOL1_2D; @@ -932,6 +937,7 @@ static inline int pulp_idma_L1ToL1_2d(unsigned int src, unsigned int dst, unsign dma_tx_id = DMA_READ(IDMA_REG32_3D_NEXT_ID_1_REG_OFFSET); return dma_tx_id; } + static inline int pulp_cl_idma_L1ToL1_2d(unsigned int src, unsigned int dst, unsigned short size, unsigned int src_stride, unsigned int dst_stride, unsigned int num_reps) { unsigned int dma_tx_id; unsigned int cfg = IDMA_DEFAULT_CONFIG_L1TOL1_2D; @@ -1076,6 +1082,7 @@ static inline int pulp_idma_zeromem(unsigned int dst, unsigned short size, idma_ asm volatile("" : : : "memory"); return dma_tx_id; } + static inline int pulp_cl_idma_zeromem(unsigned int dst, unsigned short size, idma_prot_t dst_prot) { unsigned int dma_tx_id; unsigned int cfg = IDMA_DEFAULT_CONFIG; @@ -1125,4 +1132,4 @@ static inline void plp_cl_dma_barrier_toL2() { } } -#endif // __HAL_IDMA_V2_H__ \ No newline at end of file +#endif // __HAL_IDMA_V2_H__ From 2481e98c637452839139c9f7b5df69aaf8ed3aa7 Mon Sep 17 00:00:00 2001 From: RiccardoGandolfi Date: Wed, 10 Sep 2025 11:58:37 +0200 Subject: [PATCH 05/11] [QUESTAONE] Add QuestaOne flow for pulp cluster Fix to avoid using numpy + QuestaOne flow --- bin/slm_hyper.py | 10 ++++++---- rules/pulpos/default_rules.mk | 20 ++++++++++++++++++++ rules/pulpos/targets/pulp_cluster.mk | 14 +++++++++++++- 3 files changed, 39 insertions(+), 5 deletions(-) diff --git a/bin/slm_hyper.py b/bin/slm_hyper.py index 91ab3bfc..59a71ef4 100755 --- a/bin/slm_hyper.py +++ b/bin/slm_hyper.py @@ -1,7 +1,6 @@ #!/usr/bin/python3 #Written by ABA to update the format of the slm file to be compliant with hyperflash model used in testbench -import numpy as np import os import os.path import argparse @@ -24,7 +23,10 @@ with open(args.input_file, "rU") as fi: data = list(map(lambda x:x.split(delimiter), fi.read().strip().split("\n"))) fo=open(args.output_file, "w") -A=np.array(data) + +# Write the header fo.write('@000000\n') -for i in range(0, A.shape[0],2): - fo.write('%s%s\n' %(A[i+1][1],A[i][1])) + +# Iterate over rows in pairs +for i in range(0, len(data)-1, 2): # subtract 1 to avoid IndexError if odd number of rows + fo.write(f'{data[i+1][1]}{data[i][1]}\n') diff --git a/rules/pulpos/default_rules.mk b/rules/pulpos/default_rules.mk index 43a20284..c91ac17b 100644 --- a/rules/pulpos/default_rules.mk +++ b/rules/pulpos/default_rules.mk @@ -16,6 +16,8 @@ ifndef PULP_RUNTIME_GCC_TOOLCHAIN PULP_RUNTIME_GCC_TOOLCHAIN := $(PULP_RISCV_GCC_TOOLCHAIN) endif endif +QSIM ?= qsim +USE_QONE ?= 0 ifdef PULP_RUNTIME_GCC_TOOLCHAIN PULP_CC := $(PULP_RUNTIME_GCC_TOOLCHAIN)/bin/$(PULP_CC) @@ -351,6 +353,24 @@ else cd $(TARGET_BUILD_DIR) && export VSIM_RUNNER_FLAGS='$(vsim_flags)' && $(VSIM) -64 -c -do 'source $(VSIM_PATH)/tcl_files/config/run_and_exit.tcl' -do 'source $(VSIM_PATH)/tcl_files/run.tcl; run_and_exit;' endif +link_design_file: + ln -sf $(VSIM_PATH)/design.bin build/design.bin + +run_qone: $(TARGET_BUILD_DIR)/modelsim.ini $(TARGET_BUILD_DIR)/work $(TARGET_BUILD_DIR)/boot $(TARGET_BUILD_DIR)/tcl_files $(TARGET_BUILD_DIR)/stdout $(TARGET_BUILD_DIR)/fs $(TARGET_BUILD_DIR)/waves link_design_file + $(PULPRT_HOME)/bin/stim_utils.py --binary=$(TARGETS) --vectors=$(TARGET_BUILD_DIR)/vectors/stim.txt + $(PULPRT_HOME)/bin/plp_mkflash --flash-boot-binary=$(TARGETS) --stimuli=$(TARGET_BUILD_DIR)/vectors/qspi_stim.slm --flash-type=spi --qpi + $(PULPRT_HOME)/bin/slm_hyper.py --input=$(TARGET_BUILD_DIR)/vectors/qspi_stim.slm --output=$(TARGET_BUILD_DIR)/vectors/hyper_stim.slm +ifndef VSIM_PATH + $(error "VSIM_PATH is undefined. Either call \ + 'source $$YOUR_HW_DIR/setup/vsim.sh' or set it manually.") +endif + +ifdef gui + export USE_QONE=1 && cd $(TARGET_BUILD_DIR) && export VSIM_RUNNER_FLAGS='$(vsim_flags)' && export VOPT_ACC_ENA="YES" && $(QSIM) -do 'source $(VSIM_PATH)/tcl_files/config/run_and_exit.tcl' -do 'source $(VSIM_PATH)/tcl_files/run.tcl; ' +else + export USE_QONE=1 && cd $(TARGET_BUILD_DIR) && export VSIM_RUNNER_FLAGS='$(vsim_flags)' && $(QSIM) -c -do 'source $(VSIM_PATH)/tcl_files/config/run_and_exit.tcl' -do 'source $(VSIM_PATH)/tcl_files/run.tcl; run_and_exit;' +endif + endif ifeq '$(platform)' 'fpga' diff --git a/rules/pulpos/targets/pulp_cluster.mk b/rules/pulpos/targets/pulp_cluster.mk index 07508710..e41d6bfb 100644 --- a/rules/pulpos/targets/pulp_cluster.mk +++ b/rules/pulpos/targets/pulp_cluster.mk @@ -36,6 +36,8 @@ PULP_AR ?= riscv32-unknown-elf-ar PULP_LD ?= riscv32-unknown-elf-gcc PULP_OBJDUMP ?= riscv32-unknown-elf-objdump +USE_QONE ?= 0 + fc/archi=riscv pe/archi=riscv pulp_chip=pulp_cluster @@ -94,4 +96,14 @@ ifdef gui $(QUESTA) vsim $(vsim-flags) -do "set VSIM_PATH $(VSIM_PATH); set APP $(TARGET_BUILD_DIR)/$(PULP_APP)/$(PULP_APP); source $(VSIM_PATH)/scripts/start.tcl" else $(QUESTA) vsim $(vsim-flags) -do "set VSIM_PATH $(VSIM_PATH); set APP $(TARGET_BUILD_DIR)/$(PULP_APP)/$(PULP_APP); source $(VSIM_PATH)/scripts/run_and_exit.tcl" -endif \ No newline at end of file +endif + +run_qone: link_design_file +ifdef gui + $(QUESTA) qsim $(vsim-flags) -do "set VSIM_PATH $(VSIM_PATH); set APP $(TARGET_BUILD_DIR)/$(PULP_APP)/$(PULP_APP); set USE_QONE 1; source $(VSIM_PATH)/scripts/start.tcl" +else + $(QUESTA) qsim $(vsim-flags) -do "set VSIM_PATH $(VSIM_PATH); set APP $(TARGET_BUILD_DIR)/$(PULP_APP)/$(PULP_APP); set USE_QONE 1; source $(VSIM_PATH)/scripts/run_and_exit.tcl" +endif + +link_design_file: + ln -sf $(VSIM_PATH)/design.bin design.bin From 6bc764ac447cd1a456f5841a147cd7e605dd742b Mon Sep 17 00:00:00 2001 From: rgandolfi Date: Wed, 31 Dec 2025 17:14:56 +0100 Subject: [PATCH 06/11] [MCHAN & LINKER SCRIPT] MCHAN Drivers renaming + clean-up | Align Memory size in linker script --- include/hal/dma/mchan_v7.h | 464 +++++++++++++++--------------- kernel/chips/pulp_cluster/link.ld | 2 +- 2 files changed, 233 insertions(+), 233 deletions(-) diff --git a/include/hal/dma/mchan_v7.h b/include/hal/dma/mchan_v7.h index d95dee50..0f8ebc04 100644 --- a/include/hal/dma/mchan_v7.h +++ b/include/hal/dma/mchan_v7.h @@ -20,23 +20,23 @@ #include #include "hal/pulp.h" -#define PLP_DMA_LOC2EXT 0 -#define PLP_DMA_EXT2LOC 1 +#define PLP_MCHAN_LOC2EXT 0 +#define PLP_MCHAN_EXT2LOC 1 -#define PLP_DMA_1D 0 -#define PLP_DMA_2D 1 +#define PLP_MCHAN_1D 0 +#define PLP_MCHAN_2D 1 -#define PLP_DMA_NO_TRIG_EVT 0 -#define PLP_DMA_TRIG_EVT 1 +#define PLP_MCHAN_NO_TRIG_EVT 0 +#define PLP_MCHAN_TRIG_EVT 1 -#define PLP_DMA_NO_TRIG_IRQ 0 -#define PLP_DMA_TRIG_IRQ 1 +#define PLP_MCHAN_NO_TRIG_IRQ 0 +#define PLP_MCHAN_TRIG_IRQ 1 -#define PLP_DMA_PRIV 0 -#define PLP_DMA_SHARED 1 +#define PLP_MCHAN_PRIV 0 +#define PLP_MCHAN_SHARED 1 -#define PLP_DMA_FIX 0 -#define PLP_DMA_INC 1 +#define PLP_MCHAN_FIX 0 +#define PLP_MCHAN_INC 1 #if defined(ARCHI_HAS_MCHAN_64) && ARCHI_HAS_MCHAN_64 == 1 typedef unsigned long long mchan_ext_t; @@ -61,27 +61,27 @@ typedef unsigned int mchan_ext_t; \param loc Address in the cluster memory where to access the data. There is no restriction on memory alignment. \param size Number of bytes to be transfered. The only restriction is that this size must fit 16 bits, i.e. must be inferior to 65536. \param ext2loc If 1, the transfer is loading data from external memory and storing to cluster memory. If 0, it is the contrary - \return The identifier of the transfer. This can be used with plp_dma_wait to wait for the completion of this transfer. + \return The identifier of the transfer. This can be used with plp_mchan_wait to wait for the completion of this transfer. */ -static inline int plp_dma_memcpy(mchan_ext_t ext, unsigned int loc, unsigned short size, int ext2loc); +static inline int plp_mchan_memcpy(mchan_ext_t ext, unsigned int loc, unsigned short size, int ext2loc); /** Cluster memory to external memory transfer with event-based completion. * \param ext Address in the external memory where to store the data. There is no restriction on memory alignment. \param loc Address in the cluster memory where to load the data. There is no restriction on memory alignment. \param size Number of bytes to be transfered. The only restriction is that this size must fit 16 bits, i.e. must be inferior to 65536. - \return The identifier of the transfer. This can be used with plp_dma_wait to wait for the completion of this transfer. + \return The identifier of the transfer. This can be used with plp_mchan_wait to wait for the completion of this transfer. */ -static inline int plp_dma_l1ToExt(mchan_ext_t ext, unsigned int loc, unsigned short size); +static inline int plp_mchan_l1ToExt(mchan_ext_t ext, unsigned int loc, unsigned short size); /** External memory to cluster memory transfer with event-based completion. * \param loc Address in the cluster memory where to store the data. There is no restriction on memory alignment. \param ext Address in the external memory where to load the data. There is no restriction on memory alignment. \param size Number of bytes to be transfered. The only restriction is that this size must fit 16 bits, i.e. must be inferior to 65536. - \return The identifier of the transfer. This can be used with plp_dma_wait to wait for the completion of this transfer. + \return The identifier of the transfer. This can be used with plp_mchan_wait to wait for the completion of this transfer. */ -static inline int plp_dma_extToL1(unsigned int loc, mchan_ext_t ext, unsigned short size); +static inline int plp_mchan_extToL1(unsigned int loc, mchan_ext_t ext, unsigned short size); /** Memory transfer with irq-based completion. * @@ -89,27 +89,27 @@ static inline int plp_dma_extToL1(unsigned int loc, mchan_ext_t ext, unsigned sh \param loc Address in the cluster memory where to access the data. There is no restriction on memory alignment. \param size Number of bytes to be transfered. The only restriction is that this size must fit 16 bits, i.e. must be inferior to 65536. \param ext2loc If 1, the transfer is loading data from external memory and storing to cluster memory. If 0, it is the contrary - \return The identifier of the transfer. This can be used with plp_dma_wait to wait for the completion of this transfer. + \return The identifier of the transfer. This can be used with plp_mchan_wait to wait for the completion of this transfer. */ -static inline int plp_dma_memcpy_irq(mchan_ext_t ext, unsigned int loc, unsigned short size, int ext2loc); +static inline int plp_mchan_memcpy_irq(mchan_ext_t ext, unsigned int loc, unsigned short size, int ext2loc); /** Cluster memory to external memory transfer with irq-based completion. * \param ext Address in the external memory where to store the data. There is no restriction on memory alignment. \param loc Address in the cluster memory where to load the data. There is no restriction on memory alignment. \param size Number of bytes to be transfered. The only restriction is that this size must fit 16 bits, i.e. must be inferior to 65536. - \return The identifier of the transfer. This can be used with plp_dma_wait to wait for the completion of this transfer. + \return The identifier of the transfer. This can be used with plp_mchan_wait to wait for the completion of this transfer. */ -static inline int plp_dma_l1ToExt_irq(mchan_ext_t ext, unsigned int loc, unsigned short size); +static inline int plp_mchan_l1ToExt_irq(mchan_ext_t ext, unsigned int loc, unsigned short size); /** External memory to cluster memory transfer with irq-based completion. * \param loc Address in the cluster memory where to store the data. There is no restriction on memory alignment. \param ext Address in the external memory where to load the data. There is no restriction on memory alignment. \param size Number of bytes to be transfered. The only restriction is that this size must fit 16 bits, i.e. must be inferior to 65536. - \return The identifier of the transfer. This can be used with plp_dma_wait to wait for the completion of this transfer. + \return The identifier of the transfer. This can be used with plp_mchan_wait to wait for the completion of this transfer. */ -static inline int plp_dma_extToL1_irq(unsigned int loc, mchan_ext_t ext, unsigned short size); +static inline int plp_mchan_extToL1_irq(unsigned int loc, mchan_ext_t ext, unsigned short size); /** 2-dimensional memory transfer with event-based completion. * @@ -119,9 +119,9 @@ static inline int plp_dma_extToL1_irq(unsigned int loc, mchan_ext_t ext, unsigne \param stride 2D stride, which is the number of bytes which are added to the beginning of the current line to switch to the next one. Must fit 16 bits, i.e. must be inferior to 65536. \param length 2D length, which is the number of transfered bytes after which the DMA will switch to the next line. Must fit 16 bits, i.e. must be inferior to 65536. \param ext2loc If 1, the transfer is loading data from external memory and storing to cluster memory. If 0, it is the contrary - \return The identifier of the transfer. This can be used with plp_dma_wait to wait for the completion of this transfer. + \return The identifier of the transfer. This can be used with plp_mchan_wait to wait for the completion of this transfer. */ -static inline int plp_dma_memcpy_2d(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length, int ext2loc); +static inline int plp_mchan_memcpy_2d(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length, int ext2loc); /** Cluster memory to external memory 2-dimensional transfer with event-based completion. * @@ -130,9 +130,9 @@ static inline int plp_dma_memcpy_2d(mchan_ext_t ext, unsigned int loc, unsigned \param size Number of bytes to be transfered. The only restriction is that this size must fit 16 bits, i.e. must be inferior to 65536. \param stride 2D stride, which is the number of bytes which are added to the beginning of the current line to switch to the next one. Must fit 16 bits, i.e. must be inferior to 65536. This applies only to the external memory. \param length 2D length, which is the number of transfered bytes after which the DMA will switch to the next line. Must fit 16 bits, i.e. must be inferior to 65536. This applies only to the external memory. - \return The identifier of the transfer. This can be used with plp_dma_wait to wait for the completion of this transfer. + \return The identifier of the transfer. This can be used with plp_mchan_wait to wait for the completion of this transfer. */ -static inline int plp_dma_l1ToExt_2d(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length); +static inline int plp_mchan_l1ToExt_2d(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length); /** External memory to cluster memory 2-dimensional transfer with event-based completion. * @@ -141,9 +141,9 @@ static inline int plp_dma_l1ToExt_2d(mchan_ext_t ext, unsigned int loc, unsigned \param size Number of bytes to be transfered. The only restriction is that this size must fit 16 bits, i.e. must be inferior to 65536. \param stride 2D stride, which is the number of bytes which are added to the beginning of the current line to switch to the next one. Must fit 16 bits, i.e. must be inferior to 65536. This applies only to the external memory. \param length 2D length, which is the number of transfered bytes after which the DMA will switch to the next line. Must fit 16 bits, i.e. must be inferior to 65536. This applies only to the external memory. - \return The identifier of the transfer. This can be used with plp_dma_wait to wait for the completion of this transfer + \return The identifier of the transfer. This can be used with plp_mchan_wait to wait for the completion of this transfer */ -static inline int plp_dma_extToL1_2d(unsigned int loc, mchan_ext_t ext, unsigned short size, unsigned short stride, unsigned short length); +static inline int plp_mchan_extToL1_2d(unsigned int loc, mchan_ext_t ext, unsigned short size, unsigned short stride, unsigned short length); /** 2-dimensional memory transfer with irq-based completion. * @@ -153,9 +153,9 @@ static inline int plp_dma_extToL1_2d(unsigned int loc, mchan_ext_t ext, unsigned \param stride 2D stride, which is the number of bytes which are added to the beginning of the current line to switch to the next one. Must fit 16 bits, i.e. must be inferior to 65536. This applies only to the external memory. \param length 2D length, which is the number of transfered bytes after which the DMA will switch to the next line. Must fit 16 bits, i.e. must be inferior to 65536. This applies only to the external memory. \param ext2loc If 1, the transfer is loading data from external memory and storing to cluster memory. If 0, it is the contrary - \return The identifier of the transfer. This can be used with plp_dma_wait to wait for the completion of this transfer. + \return The identifier of the transfer. This can be used with plp_mchan_wait to wait for the completion of this transfer. */ -static inline int plp_dma_memcpy_2d_irq(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length, int ext2loc); +static inline int plp_mchan_memcpy_2d_irq(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length, int ext2loc); /** Cluster memory to external memory 2-dimensional transfer with irq-based completion. * @@ -164,9 +164,9 @@ static inline int plp_dma_memcpy_2d_irq(mchan_ext_t ext, unsigned int loc, unsig \param size Number of bytes to be transfered. The only restriction is that this size must fit 16 bits, i.e. must be inferior to 65536. \param stride 2D stride, which is the number of bytes which are added to the beginning of the current line to switch to the next one. Must fit 16 bits, i.e. must be inferior to 65536. This applies only to the external memory. \param length 2D length, which is the number of transfered bytes after which the DMA will switch to the next line. Must fit 16 bits, i.e. must be inferior to 65536. This applies only to the external memory. - \return The identifier of the transfer. This can be used with plp_dma_wait to wait for the completion of this transfer. + \return The identifier of the transfer. This can be used with plp_mchan_wait to wait for the completion of this transfer. */ -static inline int plp_dma_l1ToExt_2d_irq(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length); +static inline int plp_mchan_l1ToExt_2d_irq(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length); /** External memory to cluster memory 2-dimensional transfer with irq-based completion. * @@ -175,9 +175,9 @@ static inline int plp_dma_l1ToExt_2d_irq(mchan_ext_t ext, unsigned int loc, unsi \param size Number of bytes to be transfered. The only restriction is that this size must fit 16 bits, i.e. must be inferior to 65536. \param stride 2D stride, which is the number of bytes which are added to the beginning of the current line to switch to the next one. Must fit 16 bits, i.e. must be inferior to 65536. This applies only to the external memory. \param length 2D length, which is the number of transfered bytes after which the DMA will switch to the next line. Must fit 16 bits, i.e. must be inferior to 65536. This applies only to the external memory. - \return The identifier of the transfer. This can be used with plp_dma_wait to wait for the completion of this transfer + \return The identifier of the transfer. This can be used with plp_mchan_wait to wait for the completion of this transfer */ -static inline int plp_dma_extToL1_2d_irq(unsigned int loc, mchan_ext_t ext, unsigned short size, unsigned short stride, unsigned short length); +static inline int plp_mchan_extToL1_2d_irq(unsigned int loc, mchan_ext_t ext, unsigned short size, unsigned short stride, unsigned short length); //!@} @@ -187,14 +187,14 @@ static inline int plp_dma_extToL1_2d_irq(unsigned int loc, mchan_ext_t ext, unsi /** DMA barrier. * This blocks the core until no transfer is on-going in the DMA. */ -static inline void plp_dma_barrier(); +static inline void plp_mchan_barrier(); /** DMA wait. * This blocks the core until the specified transfer is finished. * - \param counter The counter ID identifying the transfer. This has either been allocated explicitly or returned from an enqueued transfer (e.g. plp_dma_extToL1_2d_irq) + \param counter The counter ID identifying the transfer. This has either been allocated explicitly or returned from an enqueued transfer (e.g. plp_mchan_extToL1_2d_irq) */ -static inline void plp_dma_wait(unsigned int counter); +static inline void plp_mchan_wait(unsigned int counter); //!@} @@ -207,16 +207,16 @@ static inline void plp_dma_wait(unsigned int counter); * This allocates a counter and activate it for all the next transfers until another one is allocated. This means during this period, all transfers will be accounted on this counter and thus waiting * on this counter will wait for all these transfers. * - \return The identifier of the transfer. This can be used with plp_dma_wait to wait for the completion of this transfer. + \return The identifier of the transfer. This can be used with plp_mchan_wait to wait for the completion of this transfer. */ -static inline int plp_dma_counter_alloc(); +static inline int plp_mchan_counter_alloc(); /** DMA counter release. * This makes the counter available for another transfer through the DMA counter allocator * \param counter The counter number to be released */ -static inline void plp_dma_counter_free(int counter); +static inline void plp_mchan_counter_free(int counter); /** DMA command generation. * Can be used to generate the 32 bits command to be pushed to the DMA, depending on the required mode. @@ -229,7 +229,7 @@ static inline void plp_dma_counter_free(int counter); \param broadcast If 1 the event or irq generated when the transfer is finished is sent to all cores, otherwise it is only sent to the core enqueueing the transfer. \return The generated command. */ -static inline unsigned int plp_dma_getCmd(int ext2loc, unsigned int size, int is2D, int trigEvt, int trigIrq, int broadcast); +static inline unsigned int plp_mchan_getCmd(int ext2loc, unsigned int size, int is2D, int trigEvt, int trigIrq, int broadcast); /** Generate the stride command for 2D transfers. * @@ -237,410 +237,410 @@ static inline unsigned int plp_dma_getCmd(int ext2loc, unsigned int size, int is \param len The length of the 2D transfer, i.e. the number of bytes transfered after which the DMA should switch to the new line. Must fit 16 bits, i.e. must be inferior to 65536. \return The generated command. */ -static inline unsigned int plp_dma_getStrides(unsigned short stride, unsigned short len); +static inline unsigned int plp_mchan_getStrides(unsigned short stride, unsigned short len); /** Push a transfer to the DMA * \param locAddr The address of the transfer for the cluster memory \param extAddr The address of the transfer for the external memory - \param cmd The command that specifies the type of the transfer. This can be generated using plp_dma_getCmd. + \param cmd The command that specifies the type of the transfer. This can be generated using plp_mchan_getCmd. */ -static inline void plp_dma_cmd_push(unsigned int cmd, unsigned int locAddr, mchan_ext_t extAddr); +static inline void plp_mchan_cmd_push(unsigned int cmd, unsigned int locAddr, mchan_ext_t extAddr); /** Push a 2D transfer to the DMA * \param locAddr The address of the transfer for the cluster memory \param extAddr The address of the transfer for the external memory - \param cmd The command that specifies the type of the transfer. This can be generated using plp_dma_getStrides. - \param strides The command that specifies the 2D transfer (stride and len). This can be generated using plp_dma_getStrides. + \param cmd The command that specifies the type of the transfer. This can be generated using plp_mchan_getStrides. + \param strides The command that specifies the 2D transfer (stride and len). This can be generated using plp_mchan_getStrides. */ -static inline void plp_dma_cmd_push_2d(unsigned int cmd, unsigned int locAddr, mchan_ext_t extAddr, unsigned int stride, unsigned int length); +static inline void plp_mchan_cmd_push_2d(unsigned int cmd, unsigned int locAddr, mchan_ext_t extAddr, unsigned int stride, unsigned int length); /** Return the counter status. * \return Counter status. There is one bit per counter. 1 means there are still on-going transfers for this counter, 0 means nothing is on-going. */ -static inline unsigned int plp_dma_status(); +static inline unsigned int plp_mchan_status(); //!@} /// @cond IMPLEM +#if ARCHI_HAS_DMA_DEMUX +#define MCHAN_ADDR ARCHI_MCHAN_DEMUX_ADDR +#else +#define MCHAN_ADDR ARCHI_MCHAN_EXT_ADDR +#endif + #if defined(__riscv__) && !defined(RV_ISA_RV32) && !defined(__LLVM__) -#ifdef ARCHI_HAS_DMA_DEMUX -#define DMA_WRITE_DEMUX(value, offset) __builtin_pulp_OffsetedWrite((value), (int *)ARCHI_MCHAN_DEMUX_ADDR, (offset)) -#define DMA_READ_DEMUX(offset) __builtin_pulp_OffsetedRead((int *)ARCHI_MCHAN_DEMUX_ADDR, (offset)) -#endif // ARCHI_HAS_DMA_DEMUX -#define DMA_WRITE(value, offset) __builtin_pulp_OffsetedWrite((value), (int *)ARCHI_MCHAN_EXT_ADDR, (offset)) -#define DMA_READ(offset) __builtin_pulp_OffsetedRead((int *)ARCHI_MCHAN_EXT_ADDR, (offset)) +#define MCHAN_WRITE(value, offset) __builtin_pulp_OffsetedWrite((value), (int *)MCHAN_ADDR, (offset)) +#define MCHAN_READ(offset) __builtin_pulp_OffsetedRead((int *)MCHAN_ADDR, (offset)) #else -#ifdef ARCHI_HAS_DMA_DEMUX -#define DMA_WRITE_DEMUX(value, offset) pulp_write32(ARCHI_MCHAN_DEMUX_ADDR + (offset), (value)) -#define DMA_READ_DEMUX(value, offset) pulp_read32(ARCHI_MCHAN_DEMUX_ADDR + (offset)) -#endif // ARCHI_HAS_DMA_DEMUX -#define DMA_WRITE(value, offset) pulp_write32(ARCHI_MCHAN_EXT_ADDR + (offset), (value)) -#define DMA_READ(offset) pulp_read32(ARCHI_MCHAN_EXT_ADDR + (offset)) +#define MCHAN_WRITE(value, offset) pulp_write32(MCHAN_ADDR + (offset), (value)) +#define MCHAN_READ(offset) pulp_read32(MCHAN_ADDR + (offset)) #endif -static inline int plp_dma_counter_alloc() { - return DMA_READ(MCHAN_CMD_OFFSET); + +static inline int plp_mchan_counter_alloc() { + return MCHAN_READ(MCHAN_CMD_OFFSET); } -static inline int plp_cl_dma_counter_alloc() { +static inline int plp_cl_mchan_counter_alloc() { #ifdef ARCHI_HAS_DMA_DEMUX - return DMA_READ_DEMUX(MCHAN_CMD_OFFSET); + return MCHAN_READ(MCHAN_CMD_OFFSET); #else // ARCHI_HAS_DMA_DEMUX - return plp_dma_counter_alloc(); + return plp_mchan_counter_alloc(); #endif // ARCHI_HAS_DMA_DEMUX } -static inline void plp_dma_counter_free(int counter) { - DMA_WRITE(1<>32), MCHAN_CMD_OFFSET); + MCHAN_WRITE((int)extAddr, MCHAN_CMD_OFFSET); + MCHAN_WRITE((int)(extAddr>>32), MCHAN_CMD_OFFSET); #else - DMA_WRITE(extAddr, MCHAN_CMD_OFFSET); + MCHAN_WRITE(extAddr, MCHAN_CMD_OFFSET); #endif } -static inline void plp_cl_dma_cmd_push(unsigned int cmd, unsigned int locAddr, mchan_ext_t extAddr) { +static inline void plp_cl_mchan_cmd_push(unsigned int cmd, unsigned int locAddr, mchan_ext_t extAddr) { #ifdef ARCHI_HAS_DMA_DEMUX - DMA_WRITE_DEMUX(cmd, MCHAN_CMD_OFFSET); - DMA_WRITE_DEMUX(locAddr, MCHAN_CMD_OFFSET); + MCHAN_WRITE(cmd, MCHAN_CMD_OFFSET); + MCHAN_WRITE(locAddr, MCHAN_CMD_OFFSET); #if defined(ARCHI_HAS_MCHAN_64) && ARCHI_HAS_MCHAN_64 == 1 - DMA_WRITE_DEMUX((int)extAddr, MCHAN_CMD_OFFSET); - DMA_WRITE_DEMUX((int)(extAddr>>32), MCHAN_CMD_OFFSET); + MCHAN_WRITE((int)extAddr, MCHAN_CMD_OFFSET); + MCHAN_WRITE((int)(extAddr>>32), MCHAN_CMD_OFFSET); #else - DMA_WRITE_DEMUX(extAddr, MCHAN_CMD_OFFSET); + MCHAN_WRITE(extAddr, MCHAN_CMD_OFFSET); #endif #else // ARCHI_HAS_DMA_DEMUX - plp_dma_cmd_push(cmd, locAddr, extAddr); + plp_mchan_cmd_push(cmd, locAddr, extAddr); #endif // ARCHI_HAS_DMA_DEMUX } -static inline void plp_dma_cmd_push_2d(unsigned int cmd, unsigned int locAddr, mchan_ext_t extAddr, unsigned int stride, unsigned int length) { - plp_dma_cmd_push(cmd, locAddr, extAddr); - DMA_WRITE(length, MCHAN_CMD_OFFSET); - DMA_WRITE(stride, MCHAN_CMD_OFFSET); +static inline void plp_mchan_cmd_push_2d(unsigned int cmd, unsigned int locAddr, mchan_ext_t extAddr, unsigned int stride, unsigned int length) { + plp_mchan_cmd_push(cmd, locAddr, extAddr); + MCHAN_WRITE(length, MCHAN_CMD_OFFSET); + MCHAN_WRITE(stride, MCHAN_CMD_OFFSET); } -static inline void plp_cl_dma_cmd_push_2d(unsigned int cmd, unsigned int locAddr, mchan_ext_t extAddr, unsigned int stride, unsigned int length) { +static inline void plp_cl_mchan_cmd_push_2d(unsigned int cmd, unsigned int locAddr, mchan_ext_t extAddr, unsigned int stride, unsigned int length) { #ifdef ARCHI_HAS_DMA_DEMUX - plp_cl_dma_cmd_push(cmd, locAddr, extAddr); - DMA_WRITE_DEMUX(length, MCHAN_CMD_OFFSET); - DMA_WRITE_DEMUX(stride, MCHAN_CMD_OFFSET); + plp_cl_mchan_cmd_push(cmd, locAddr, extAddr); + MCHAN_WRITE(length, MCHAN_CMD_OFFSET); + MCHAN_WRITE(stride, MCHAN_CMD_OFFSET); #else // ARCHI_HAS_DMA_DEMUX - plp_dma_cmd_push_2d(cmd, locAddr, extAddr, stride, length); + plp_mchan_cmd_push_2d(cmd, locAddr, extAddr, stride, length); #endif // ARCHI_HAS_DMA_DEMUX } -static inline int plp_dma_memcpy(mchan_ext_t ext, unsigned int loc, unsigned short size, int ext2loc) { - unsigned int counter = plp_dma_counter_alloc(); - unsigned int cmd = plp_dma_getCmd(ext2loc, size, PLP_DMA_1D, PLP_DMA_TRIG_EVT, PLP_DMA_NO_TRIG_IRQ, PLP_DMA_SHARED); - plp_dma_cmd_push(cmd, loc, ext); +static inline int plp_mchan_memcpy(mchan_ext_t ext, unsigned int loc, unsigned short size, int ext2loc) { + unsigned int counter = plp_mchan_counter_alloc(); + unsigned int cmd = plp_mchan_getCmd(ext2loc, size, PLP_MCHAN_1D, PLP_MCHAN_TRIG_EVT, PLP_MCHAN_NO_TRIG_IRQ, PLP_MCHAN_SHARED); + plp_mchan_cmd_push(cmd, loc, ext); return counter; } -static inline int plp_cl_dma_memcpy(mchan_ext_t ext, unsigned int loc, unsigned short size, int ext2loc) { +static inline int plp_cl_mchan_memcpy(mchan_ext_t ext, unsigned int loc, unsigned short size, int ext2loc) { #ifdef ARCHI_HAS_DMA_DEMUX - unsigned int counter = plp_cl_dma_counter_alloc(); - unsigned int cmd = plp_cl_dma_getCmd(ext2loc, size, PLP_DMA_1D, PLP_DMA_TRIG_EVT, PLP_DMA_NO_TRIG_IRQ, PLP_DMA_SHARED); - plp_cl_dma_cmd_push(cmd, loc, ext); + unsigned int counter = plp_cl_mchan_counter_alloc(); + unsigned int cmd = plp_cl_mchan_getCmd(ext2loc, size, PLP_MCHAN_1D, PLP_MCHAN_TRIG_EVT, PLP_MCHAN_NO_TRIG_IRQ, PLP_MCHAN_SHARED); + plp_cl_mchan_cmd_push(cmd, loc, ext); return counter; #else // ARCHI_HAS_DMA_DEMUX - return plp_dma_memcpy(ext, loc, size, ext2loc); + return plp_mchan_memcpy(ext, loc, size, ext2loc); #endif // ARCHI_HAS_DMA_DEMUX } -static inline int plp_dma_l1ToExt(mchan_ext_t ext, unsigned int loc, unsigned short size) { - unsigned int counter = plp_dma_counter_alloc(); - unsigned int cmd = plp_dma_getCmd(PLP_DMA_LOC2EXT, size, PLP_DMA_1D, PLP_DMA_TRIG_EVT, PLP_DMA_NO_TRIG_IRQ, PLP_DMA_SHARED); - plp_dma_cmd_push(cmd, loc, ext); +static inline int plp_mchan_l1ToExt(mchan_ext_t ext, unsigned int loc, unsigned short size) { + unsigned int counter = plp_mchan_counter_alloc(); + unsigned int cmd = plp_mchan_getCmd(PLP_MCHAN_LOC2EXT, size, PLP_MCHAN_1D, PLP_MCHAN_TRIG_EVT, PLP_MCHAN_NO_TRIG_IRQ, PLP_MCHAN_SHARED); + plp_mchan_cmd_push(cmd, loc, ext); return counter; } -static inline int plp_cl_dma_l1ToExt(mchan_ext_t ext, unsigned int loc, unsigned short size) { +static inline int plp_cl_mchan_l1ToExt(mchan_ext_t ext, unsigned int loc, unsigned short size) { #ifdef ARCHI_HAS_DMA_DEMUX - unsigned int counter = plp_cl_dma_counter_alloc(); - unsigned int cmd = plp_cl_dma_getCmd(PLP_DMA_LOC2EXT, size, PLP_DMA_1D, PLP_DMA_TRIG_EVT, PLP_DMA_NO_TRIG_IRQ, PLP_DMA_SHARED); - plp_cl_dma_cmd_push(cmd, loc, ext); + unsigned int counter = plp_cl_mchan_counter_alloc(); + unsigned int cmd = plp_cl_mchan_getCmd(PLP_MCHAN_LOC2EXT, size, PLP_MCHAN_1D, PLP_MCHAN_TRIG_EVT, PLP_MCHAN_NO_TRIG_IRQ, PLP_MCHAN_SHARED); + plp_cl_mchan_cmd_push(cmd, loc, ext); return counter; #else // ARCHI_HAS_DMA_DEMUX - return plp_dma_l1ToExt(ext, loc, size); + return plp_mchan_l1ToExt(ext, loc, size); #endif // ARCHI_HAS_DMA_DEMUX } -static inline int plp_dma_extToL1(unsigned int loc, mchan_ext_t ext, unsigned short size) { - unsigned int counter = plp_dma_counter_alloc(); - unsigned int cmd = plp_dma_getCmd(PLP_DMA_EXT2LOC, size, PLP_DMA_1D, PLP_DMA_TRIG_EVT, PLP_DMA_NO_TRIG_IRQ, PLP_DMA_SHARED); - plp_dma_cmd_push(cmd, loc, ext); +static inline int plp_mchan_extToL1(unsigned int loc, mchan_ext_t ext, unsigned short size) { + unsigned int counter = plp_mchan_counter_alloc(); + unsigned int cmd = plp_mchan_getCmd(PLP_MCHAN_EXT2LOC, size, PLP_MCHAN_1D, PLP_MCHAN_TRIG_EVT, PLP_MCHAN_NO_TRIG_IRQ, PLP_MCHAN_SHARED); + plp_mchan_cmd_push(cmd, loc, ext); return counter; } -static inline int plp_cl_dma_extToL1(unsigned int loc, mchan_ext_t ext, unsigned short size) { +static inline int plp_cl_mchan_extToL1(unsigned int loc, mchan_ext_t ext, unsigned short size) { #ifdef ARCHI_HAS_DMA_DEMUX - unsigned int counter = plp_cl_dma_counter_alloc(); - unsigned int cmd = plp_cl_dma_getCmd(PLP_DMA_EXT2LOC, size, PLP_DMA_1D, PLP_DMA_TRIG_EVT, PLP_DMA_NO_TRIG_IRQ, PLP_DMA_SHARED); - plp_cl_dma_cmd_push(cmd, loc, ext); + unsigned int counter = plp_cl_mchan_counter_alloc(); + unsigned int cmd = plp_cl_mchan_getCmd(PLP_MCHAN_EXT2LOC, size, PLP_MCHAN_1D, PLP_MCHAN_TRIG_EVT, PLP_MCHAN_NO_TRIG_IRQ, PLP_MCHAN_SHARED); + plp_cl_mchan_cmd_push(cmd, loc, ext); return counter; #else // ARCHI_HAS_DMA_DEMUX - return plp_dma_extToL1(loc, ext, size); + return plp_mchan_extToL1(loc, ext, size); #endif // ARCHI_HAS_DMA_DEMUX } -static inline int plp_dma_memcpy_irq(mchan_ext_t ext, unsigned int loc, unsigned short size, int ext2loc) { - unsigned int counter = plp_dma_counter_alloc(); - unsigned int cmd = plp_dma_getCmd(ext2loc, size, PLP_DMA_1D, PLP_DMA_NO_TRIG_EVT, PLP_DMA_TRIG_IRQ, PLP_DMA_SHARED); - plp_dma_cmd_push(cmd, loc, ext); +static inline int plp_mchan_memcpy_irq(mchan_ext_t ext, unsigned int loc, unsigned short size, int ext2loc) { + unsigned int counter = plp_mchan_counter_alloc(); + unsigned int cmd = plp_mchan_getCmd(ext2loc, size, PLP_MCHAN_1D, PLP_MCHAN_NO_TRIG_EVT, PLP_MCHAN_TRIG_IRQ, PLP_MCHAN_SHARED); + plp_mchan_cmd_push(cmd, loc, ext); return counter; } -static inline int plp_cl_dma_memcpy_irq(mchan_ext_t ext, unsigned int loc, unsigned short size, int ext2loc) { +static inline int plp_cl_mchan_memcpy_irq(mchan_ext_t ext, unsigned int loc, unsigned short size, int ext2loc) { #ifdef ARCHI_HAS_DMA_DEMUX - unsigned int counter = plp_cl_dma_counter_alloc(); - unsigned int cmd = plp_cl_dma_getCmd(ext2loc, size, PLP_DMA_1D, PLP_DMA_NO_TRIG_EVT, PLP_DMA_TRIG_IRQ, PLP_DMA_SHARED); - plp_cl_dma_cmd_push(cmd, loc, ext); + unsigned int counter = plp_cl_mchan_counter_alloc(); + unsigned int cmd = plp_cl_mchan_getCmd(ext2loc, size, PLP_MCHAN_1D, PLP_MCHAN_NO_TRIG_EVT, PLP_MCHAN_TRIG_IRQ, PLP_MCHAN_SHARED); + plp_cl_mchan_cmd_push(cmd, loc, ext); return counter; #else // ARCHI_HAS_DMA_DEMUX - return plp_dma_memcpy_irq(ext, loc, size, ext2loc); + return plp_mchan_memcpy_irq(ext, loc, size, ext2loc); #endif // ARCHI_HAS_DMA_DEMUX } -static inline int plp_dma_l1ToExt_irq(mchan_ext_t ext, unsigned int loc, unsigned short size) { - unsigned int counter = plp_dma_counter_alloc(); - unsigned int cmd = plp_dma_getCmd(PLP_DMA_LOC2EXT, size, PLP_DMA_1D, PLP_DMA_NO_TRIG_EVT, PLP_DMA_TRIG_IRQ, PLP_DMA_SHARED); - plp_dma_cmd_push(cmd, loc, ext); +static inline int plp_mchan_l1ToExt_irq(mchan_ext_t ext, unsigned int loc, unsigned short size) { + unsigned int counter = plp_mchan_counter_alloc(); + unsigned int cmd = plp_mchan_getCmd(PLP_MCHAN_LOC2EXT, size, PLP_MCHAN_1D, PLP_MCHAN_NO_TRIG_EVT, PLP_MCHAN_TRIG_IRQ, PLP_MCHAN_SHARED); + plp_mchan_cmd_push(cmd, loc, ext); return counter; } -static inline int plp_cl_dma_l1ToExt_irq(mchan_ext_t ext, unsigned int loc, unsigned short size) { +static inline int plp_cl_mchan_l1ToExt_irq(mchan_ext_t ext, unsigned int loc, unsigned short size) { #ifdef ARCHI_HAS_DMA_DEMUX - unsigned int counter = plp_cl_dma_counter_alloc(); - unsigned int cmd = plp_cl_dma_getCmd(PLP_DMA_LOC2EXT, size, PLP_DMA_1D, PLP_DMA_NO_TRIG_EVT, PLP_DMA_TRIG_IRQ, PLP_DMA_SHARED); - plp_cl_dma_cmd_push(cmd, loc, ext); + unsigned int counter = plp_cl_mchan_counter_alloc(); + unsigned int cmd = plp_cl_mchan_getCmd(PLP_MCHAN_LOC2EXT, size, PLP_MCHAN_1D, PLP_MCHAN_NO_TRIG_EVT, PLP_MCHAN_TRIG_IRQ, PLP_MCHAN_SHARED); + plp_cl_mchan_cmd_push(cmd, loc, ext); return counter; #else // ARCHI_HAS_DMA_DEMUX - return plp_dma_l1ToExt_irq(ext, loc, size); + return plp_mchan_l1ToExt_irq(ext, loc, size); #endif // ARCHI_HAS_DMA_DEMUX } -static inline int plp_dma_extToL1_irq(unsigned int loc, mchan_ext_t ext, unsigned short size) { - unsigned int counter = plp_dma_counter_alloc(); - unsigned int cmd = plp_dma_getCmd(PLP_DMA_EXT2LOC, size, PLP_DMA_1D, PLP_DMA_NO_TRIG_EVT, PLP_DMA_TRIG_IRQ, PLP_DMA_SHARED); - plp_dma_cmd_push(cmd, loc, ext); +static inline int plp_mchan_extToL1_irq(unsigned int loc, mchan_ext_t ext, unsigned short size) { + unsigned int counter = plp_mchan_counter_alloc(); + unsigned int cmd = plp_mchan_getCmd(PLP_MCHAN_EXT2LOC, size, PLP_MCHAN_1D, PLP_MCHAN_NO_TRIG_EVT, PLP_MCHAN_TRIG_IRQ, PLP_MCHAN_SHARED); + plp_mchan_cmd_push(cmd, loc, ext); return counter; } -static inline int plp_cl_dma_extToL1_irq(unsigned int loc, mchan_ext_t ext, unsigned short size) { +static inline int plp_cl_mchan_extToL1_irq(unsigned int loc, mchan_ext_t ext, unsigned short size) { #ifdef ARCHI_HAS_DMA_DEMUX - unsigned int counter = plp_cl_dma_counter_alloc(); - unsigned int cmd = plp_cl_dma_getCmd(PLP_DMA_EXT2LOC, size, PLP_DMA_1D, PLP_DMA_NO_TRIG_EVT, PLP_DMA_TRIG_IRQ, PLP_DMA_SHARED); - plp_cl_dma_cmd_push(cmd, loc, ext); + unsigned int counter = plp_cl_mchan_counter_alloc(); + unsigned int cmd = plp_cl_mchan_getCmd(PLP_MCHAN_EXT2LOC, size, PLP_MCHAN_1D, PLP_MCHAN_NO_TRIG_EVT, PLP_MCHAN_TRIG_IRQ, PLP_MCHAN_SHARED); + plp_cl_mchan_cmd_push(cmd, loc, ext); return counter; #else // ARCHI_HAS_DMA_DEMUX - return plp_dma_extToL1_irq(loc, ext, size); + return plp_mchan_extToL1_irq(loc, ext, size); #endif // ARCHI_HAS_DMA_DEMUX } -static inline void plp_dma_memcpy_2d_keepCounter(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length, int ext2loc) { - unsigned int cmd = plp_dma_getCmd(ext2loc, size, PLP_DMA_2D, PLP_DMA_TRIG_EVT, PLP_DMA_NO_TRIG_IRQ, PLP_DMA_SHARED); - plp_dma_cmd_push_2d(cmd, loc, ext, stride, length); +static inline void plp_mchan_memcpy_2d_keepCounter(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length, int ext2loc) { + unsigned int cmd = plp_mchan_getCmd(ext2loc, size, PLP_MCHAN_2D, PLP_MCHAN_TRIG_EVT, PLP_MCHAN_NO_TRIG_IRQ, PLP_MCHAN_SHARED); + plp_mchan_cmd_push_2d(cmd, loc, ext, stride, length); } -static inline void plp_cl_dma_memcpy_2d_keepCounter(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length, int ext2loc) { +static inline void plp_cl_mchan_memcpy_2d_keepCounter(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length, int ext2loc) { #ifdef ARCHI_HAS_DMA_DEMUX - unsigned int cmd = plp_cl_dma_getCmd(ext2loc, size, PLP_DMA_2D, PLP_DMA_TRIG_EVT, PLP_DMA_NO_TRIG_IRQ, PLP_DMA_SHARED); - plp_cl_dma_cmd_push_2d(cmd, loc, ext, stride, length); + unsigned int cmd = plp_cl_mchan_getCmd(ext2loc, size, PLP_MCHAN_2D, PLP_MCHAN_TRIG_EVT, PLP_MCHAN_NO_TRIG_IRQ, PLP_MCHAN_SHARED); + plp_cl_mchan_cmd_push_2d(cmd, loc, ext, stride, length); #else // ARCHI_HAS_DMA_DEMUX - plp_dma_memcpy_2d_keepCounter(ext, loc, size, stride, length, ext2loc); + plp_mchan_memcpy_2d_keepCounter(ext, loc, size, stride, length, ext2loc); #endif // ARCHI_HAS_DMA_DEMUX } -static inline int plp_dma_memcpy_2d(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length, int ext2loc) { - unsigned int counter = plp_dma_counter_alloc(); - plp_dma_memcpy_2d_keepCounter(ext, loc, size, stride, length, ext2loc); +static inline int plp_mchan_memcpy_2d(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length, int ext2loc) { + unsigned int counter = plp_mchan_counter_alloc(); + plp_mchan_memcpy_2d_keepCounter(ext, loc, size, stride, length, ext2loc); return counter; } -static inline int plp_cl_dma_memcpy_2d(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length, int ext2loc) { +static inline int plp_cl_mchan_memcpy_2d(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length, int ext2loc) { #ifdef ARCHI_HAS_DMA_DEMUX - unsigned int counter = plp_cl_dma_counter_alloc(); - plp_cl_dma_memcpy_2d_keepCounter(ext, loc, size, stride, length, ext2loc); + unsigned int counter = plp_cl_mchan_counter_alloc(); + plp_cl_mchan_memcpy_2d_keepCounter(ext, loc, size, stride, length, ext2loc); return counter; #else // ARCHI_HAS_DMA_DEMUX - return plp_dma_memcpy_2d(ext, loc, size, stride, length, ext2loc); + return plp_mchan_memcpy_2d(ext, loc, size, stride, length, ext2loc); #endif // ARCHI_HAS_DMA_DEMUX } -static inline int plp_dma_l1ToExt_2d(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length) { - unsigned int counter = plp_dma_counter_alloc(); - unsigned int cmd = plp_dma_getCmd(PLP_DMA_LOC2EXT, size, PLP_DMA_2D, PLP_DMA_TRIG_EVT, PLP_DMA_NO_TRIG_IRQ, PLP_DMA_SHARED); - plp_dma_cmd_push_2d(cmd, loc, ext, stride, length); +static inline int plp_mchan_l1ToExt_2d(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length) { + unsigned int counter = plp_mchan_counter_alloc(); + unsigned int cmd = plp_mchan_getCmd(PLP_MCHAN_LOC2EXT, size, PLP_MCHAN_2D, PLP_MCHAN_TRIG_EVT, PLP_MCHAN_NO_TRIG_IRQ, PLP_MCHAN_SHARED); + plp_mchan_cmd_push_2d(cmd, loc, ext, stride, length); return counter; } -static inline int plp_cl_dma_l1ToExt_2d(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length) { +static inline int plp_cl_mchan_l1ToExt_2d(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length) { #ifdef ARCHI_HAS_DMA_DEMUX - unsigned int counter = plp_cl_dma_counter_alloc(); - unsigned int cmd = plp_cl_dma_getCmd(PLP_DMA_LOC2EXT, size, PLP_DMA_2D, PLP_DMA_TRIG_EVT, PLP_DMA_NO_TRIG_IRQ, PLP_DMA_SHARED); - plp_cl_dma_cmd_push_2d(cmd, loc, ext, stride, length); + unsigned int counter = plp_cl_mchan_counter_alloc(); + unsigned int cmd = plp_cl_mchan_getCmd(PLP_MCHAN_LOC2EXT, size, PLP_MCHAN_2D, PLP_MCHAN_TRIG_EVT, PLP_MCHAN_NO_TRIG_IRQ, PLP_MCHAN_SHARED); + plp_cl_mchan_cmd_push_2d(cmd, loc, ext, stride, length); return counter; #else // ARCHI_HAS_DMA_DEMUX - return plp_dma_l1ToExt_2d(ext, loc, size, stride, length); + return plp_mchan_l1ToExt_2d(ext, loc, size, stride, length); #endif // ARCHI_HAS_DMA_DEMUX } -static inline int plp_dma_extToL1_2d(unsigned int loc, mchan_ext_t ext, unsigned short size, unsigned short stride, unsigned short length) { - unsigned int counter = plp_dma_counter_alloc(); - unsigned int cmd = plp_dma_getCmd(PLP_DMA_EXT2LOC, size, PLP_DMA_2D, PLP_DMA_TRIG_EVT, PLP_DMA_NO_TRIG_IRQ, PLP_DMA_SHARED); - plp_dma_cmd_push_2d(cmd, loc, ext, stride, length); +static inline int plp_mchan_extToL1_2d(unsigned int loc, mchan_ext_t ext, unsigned short size, unsigned short stride, unsigned short length) { + unsigned int counter = plp_mchan_counter_alloc(); + unsigned int cmd = plp_mchan_getCmd(PLP_MCHAN_EXT2LOC, size, PLP_MCHAN_2D, PLP_MCHAN_TRIG_EVT, PLP_MCHAN_NO_TRIG_IRQ, PLP_MCHAN_SHARED); + plp_mchan_cmd_push_2d(cmd, loc, ext, stride, length); return counter; } -static inline int plp_cl_dma_extToL1_2d(unsigned int loc, mchan_ext_t ext, unsigned short size, unsigned short stride, unsigned short length) { +static inline int plp_cl_mchan_extToL1_2d(unsigned int loc, mchan_ext_t ext, unsigned short size, unsigned short stride, unsigned short length) { #ifdef ARCHI_HAS_DMA_DEMUX - unsigned int counter = plp_cl_dma_counter_alloc(); - unsigned int cmd = plp_cl_dma_getCmd(PLP_DMA_EXT2LOC, size, PLP_DMA_2D, PLP_DMA_TRIG_EVT, PLP_DMA_NO_TRIG_IRQ, PLP_DMA_SHARED); - plp_cl_dma_cmd_push_2d(cmd, loc, ext, stride, length); + unsigned int counter = plp_cl_mchan_counter_alloc(); + unsigned int cmd = plp_cl_mchan_getCmd(PLP_MCHAN_EXT2LOC, size, PLP_MCHAN_2D, PLP_MCHAN_TRIG_EVT, PLP_MCHAN_NO_TRIG_IRQ, PLP_MCHAN_SHARED); + plp_cl_mchan_cmd_push_2d(cmd, loc, ext, stride, length); return counter; #else // ARCHI_HAS_DMA_DEMUX - return plp_dma_extToL1_2d(loc, ext, size, stride, length); + return plp_mchan_extToL1_2d(loc, ext, size, stride, length); #endif // ARCHI_HAS_DMA_DEMUX } -static inline int plp_dma_memcpy_2d_irq(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length, int ext2loc) { - unsigned int counter = plp_dma_counter_alloc(); - unsigned int cmd = plp_dma_getCmd(ext2loc, size, PLP_DMA_2D, PLP_DMA_NO_TRIG_EVT, PLP_DMA_TRIG_IRQ, PLP_DMA_SHARED); - plp_dma_cmd_push_2d(cmd, loc, ext, stride, length); +static inline int plp_mchan_memcpy_2d_irq(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length, int ext2loc) { + unsigned int counter = plp_mchan_counter_alloc(); + unsigned int cmd = plp_mchan_getCmd(ext2loc, size, PLP_MCHAN_2D, PLP_MCHAN_NO_TRIG_EVT, PLP_MCHAN_TRIG_IRQ, PLP_MCHAN_SHARED); + plp_mchan_cmd_push_2d(cmd, loc, ext, stride, length); return counter; } -static inline int plp_cl_dma_memcpy_2d_irq(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length, int ext2loc) { +static inline int plp_cl_mchan_memcpy_2d_irq(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length, int ext2loc) { #ifdef ARCHI_HAS_DMA_DEMUX - unsigned int counter = plp_cl_dma_counter_alloc(); - unsigned int cmd = plp_cl_dma_getCmd(ext2loc, size, PLP_DMA_2D, PLP_DMA_NO_TRIG_EVT, PLP_DMA_TRIG_IRQ, PLP_DMA_SHARED); - plp_cl_dma_cmd_push_2d(cmd, loc, ext, stride, length); + unsigned int counter = plp_cl_mchan_counter_alloc(); + unsigned int cmd = plp_cl_mchan_getCmd(ext2loc, size, PLP_MCHAN_2D, PLP_MCHAN_NO_TRIG_EVT, PLP_MCHAN_TRIG_IRQ, PLP_MCHAN_SHARED); + plp_cl_mchan_cmd_push_2d(cmd, loc, ext, stride, length); return counter; #else // ARCHI_HAS_DMA_DEMUX - return plp_dma_memcpy_2d_irq(ext, loc, size, stride, length, ext2loc); + return plp_mchan_memcpy_2d_irq(ext, loc, size, stride, length, ext2loc); #endif // ARCHI_HAS_DMA_DEMUX } -static inline int plp_dma_l1ToExt_2d_irq(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length) { - unsigned int counter = plp_dma_counter_alloc(); - unsigned int cmd = plp_dma_getCmd(PLP_DMA_LOC2EXT, size, PLP_DMA_2D, PLP_DMA_NO_TRIG_EVT, PLP_DMA_TRIG_IRQ, PLP_DMA_SHARED); - plp_dma_cmd_push_2d(cmd, loc, ext, stride, length); +static inline int plp_mchan_l1ToExt_2d_irq(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length) { + unsigned int counter = plp_mchan_counter_alloc(); + unsigned int cmd = plp_mchan_getCmd(PLP_MCHAN_LOC2EXT, size, PLP_MCHAN_2D, PLP_MCHAN_NO_TRIG_EVT, PLP_MCHAN_TRIG_IRQ, PLP_MCHAN_SHARED); + plp_mchan_cmd_push_2d(cmd, loc, ext, stride, length); return counter; } -static inline int plp_cl_dma_l1ToExt_2d_irq(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length) { +static inline int plp_cl_mchan_l1ToExt_2d_irq(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length) { #ifdef ARCHI_HAS_DMA_DEMUX - unsigned int counter = plp_cl_dma_counter_alloc(); - unsigned int cmd = plp_cl_dma_getCmd(PLP_DMA_LOC2EXT, size, PLP_DMA_2D, PLP_DMA_NO_TRIG_EVT, PLP_DMA_TRIG_IRQ, PLP_DMA_SHARED); - plp_cl_dma_cmd_push_2d(cmd, loc, ext, stride, length); + unsigned int counter = plp_cl_mchan_counter_alloc(); + unsigned int cmd = plp_cl_mchan_getCmd(PLP_MCHAN_LOC2EXT, size, PLP_MCHAN_2D, PLP_MCHAN_NO_TRIG_EVT, PLP_MCHAN_TRIG_IRQ, PLP_MCHAN_SHARED); + plp_cl_mchan_cmd_push_2d(cmd, loc, ext, stride, length); return counter; #else // ARCHI_HAS_DMA_DEMUX - return plp_dma_l1ToExt_2d_irq(ext, loc, size, stride, length); + return plp_mchan_l1ToExt_2d_irq(ext, loc, size, stride, length); #endif // ARCHI_HAS_DMA_DEMUX } -static inline int plp_dma_extToL1_2d_irq(unsigned int loc, mchan_ext_t ext, unsigned short size, unsigned short stride, unsigned short length) { - unsigned int counter = plp_dma_counter_alloc(); - unsigned int cmd = plp_dma_getCmd(PLP_DMA_EXT2LOC, size, PLP_DMA_2D, PLP_DMA_NO_TRIG_EVT, PLP_DMA_TRIG_IRQ, PLP_DMA_SHARED); - plp_dma_cmd_push_2d(cmd, loc, ext, stride, length); +static inline int plp_mchan_extToL1_2d_irq(unsigned int loc, mchan_ext_t ext, unsigned short size, unsigned short stride, unsigned short length) { + unsigned int counter = plp_mchan_counter_alloc(); + unsigned int cmd = plp_mchan_getCmd(PLP_MCHAN_EXT2LOC, size, PLP_MCHAN_2D, PLP_MCHAN_NO_TRIG_EVT, PLP_MCHAN_TRIG_IRQ, PLP_MCHAN_SHARED); + plp_mchan_cmd_push_2d(cmd, loc, ext, stride, length); return counter; } -static inline int plp_cl_dma_extToL1_2d_irq(unsigned int loc, mchan_ext_t ext, unsigned short size, unsigned short stride, unsigned short length) { +static inline int plp_cl_mchan_extToL1_2d_irq(unsigned int loc, mchan_ext_t ext, unsigned short size, unsigned short stride, unsigned short length) { #ifdef ARCHI_HAS_DMA_DEMUX - unsigned int counter = plp_cl_dma_counter_alloc(); - unsigned int cmd = plp_cl_dma_getCmd(PLP_DMA_EXT2LOC, size, PLP_DMA_2D, PLP_DMA_NO_TRIG_EVT, PLP_DMA_TRIG_IRQ, PLP_DMA_SHARED); - plp_cl_dma_cmd_push_2d(cmd, loc, ext, stride, length); + unsigned int counter = plp_cl_mchan_counter_alloc(); + unsigned int cmd = plp_cl_mchan_getCmd(PLP_MCHAN_EXT2LOC, size, PLP_MCHAN_2D, PLP_MCHAN_NO_TRIG_EVT, PLP_MCHAN_TRIG_IRQ, PLP_MCHAN_SHARED); + plp_cl_mchan_cmd_push_2d(cmd, loc, ext, stride, length); return counter; #else // ARCHI_HAS_DMA_DEMUX - return plp_dma_extToL1_2d_irq(loc, ext, size, stride, length); + return plp_mchan_extToL1_2d_irq(loc, ext, size, stride, length); #endif // ARCHI_HAS_DMA_DEMUX } -static inline void plp_dma_barrier() { - while(DMA_READ(MCHAN_STATUS_OFFSET) & 0xFFFF) { +static inline void plp_mchan_barrier() { + while(MCHAN_READ(MCHAN_STATUS_OFFSET) & 0xFFFF) { eu_evt_maskWaitAndClr(1< Date: Mon, 5 Jan 2026 13:54:38 +0100 Subject: [PATCH 07/11] [PI_L2_MALLOC] Some fixes on pi_l2_malloc --- kernel/alloc_pool.c | 47 ++++++++++++++++++++++++++------------------- 1 file changed, 27 insertions(+), 20 deletions(-) diff --git a/kernel/alloc_pool.c b/kernel/alloc_pool.c index a04278ae..3a6d00f8 100644 --- a/kernel/alloc_pool.c +++ b/kernel/alloc_pool.c @@ -28,6 +28,11 @@ pos_alloc_t pos_alloc_fc_tcdm; #if defined(ARCHI_HAS_L2) pos_alloc_t pos_alloc_l2[POS_NB_ALLOC_L2]; + +#define POS_L2_PRIV0 0 +#define POS_L2_PRIV1 1 +#define POS_L2_SHARED 2 + #endif #ifdef CONFIG_ALLOC_L2_PWD_NB_BANKS @@ -38,7 +43,7 @@ static uint32_t pos_alloc_account_1[CONFIG_ALLOC_L2_PWD_NB_BANKS]; #if defined(ARCHI_HAS_FC_TCDM) static inline pos_alloc_t *get_fc_alloc() { return &pos_alloc_fc_tcdm; } #else -static inline pos_alloc_t *get_fc_alloc() { return &pos_alloc_l2[0]; } +static inline pos_alloc_t *get_fc_alloc() { return &pos_alloc_l2[POS_L2_PRIV0]; } #endif @@ -49,31 +54,31 @@ void pos_allocs_init() #if defined(ARCHI_HAS_L2) #if defined(ARCHI_HAS_L2_MULTI) - //pos_trace(//pos_trace_INIT, "Initializing L2 private bank0 allocator (base: 0x%8x, size: 0x%8x)\n", (int)pos_l2_priv0_base(), pos_l2_priv0_size()); - pos_alloc_init(&pos_alloc_l2[0], pos_l2_priv0_base(), pos_l2_priv0_size()); + ALLOC_TRACE(POS_LOG_TRACE, "Initializing L2 private bank0 allocator (base: 0x%8x, size: 0x%8x)\n", (int)pos_l2_priv0_base(), pos_l2_priv0_size()); + pos_alloc_init(&pos_alloc_l2[POS_L2_PRIV0], pos_l2_priv0_base(), pos_l2_priv0_size()); - //pos_trace(//pos_trace_INIT, "Initializing L2 private bank1 allocator (base: 0x%8x, size: 0x%8x)\n", (int)pos_l2_priv1_base(), pos_l2_priv1_size()); - pos_alloc_init(&pos_alloc_l2[1], pos_l2_priv1_base(), pos_l2_priv1_size()); + ALLOC_TRACE(POS_LOG_TRACE, "Initializing L2 private bank1 allocator (base: 0x%8x, size: 0x%8x)\n", (int)pos_l2_priv1_base(), pos_l2_priv1_size()); + pos_alloc_init(&pos_alloc_l2[POS_L2_PRIV1], pos_l2_priv1_base(), pos_l2_priv1_size()); - //pos_trace(//pos_trace_INIT, "Initializing L2 shared banks allocator (base: 0x%8x, size: 0x%8x)\n", (int)pos_l2_shared_base(), pos_l2_shared_size()); - pos_alloc_init(&pos_alloc_l2[2], pos_l2_shared_base(), pos_l2_shared_size()); + ALLOC_TRACE(POS_LOG_TRACE, "Initializing L2 shared banks allocator (base: 0x%8x, size: 0x%8x)\n", (int)pos_l2_shared_base(), pos_l2_shared_size()); + pos_alloc_init(&pos_alloc_l2[POS_L2_SHARED], pos_l2_shared_base(), pos_l2_shared_size()); #ifdef CONFIG_ALLOC_L2_PWD_NB_BANKS - pos_alloc_l2[2].track_pwd = 1; - pos_alloc_l2[2].pwd_count = pos_alloc_account_0; - pos_alloc_l2[2].ret_count = pos_alloc_account_0; + pos_alloc_l2[POS_L2_SHARED].track_pwd = 1; + pos_alloc_l2[POS_L2_SHARED].pwd_count = pos_alloc_account_0; + pos_alloc_l2[POS_L2_SHARED].ret_count = pos_alloc_account_0; for (int i=0; i Date: Wed, 25 Feb 2026 16:02:47 +0100 Subject: [PATCH 08/11] [IDMA CLOCK GATING] Updated iDMA and Cluster Control unit drivers for clock gating control on iDMA * [CLUSTER_CTRL_DRIVERS] Added functions to write and read from cluster cfg register * [iDMA CLOCK GATING] Updated drivers for iDMA and cluster_ctrl_unit to have sw-controlled clock gating for iDMA --- include/hal/cluster_ctrl/cluster_ctrl_v2.h | 8 +++++ include/hal/dma/idma_v2.h | 34 +++++++++++++++++++++- 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/include/hal/cluster_ctrl/cluster_ctrl_v2.h b/include/hal/cluster_ctrl/cluster_ctrl_v2.h index 322aa864..0ab57b5c 100644 --- a/include/hal/cluster_ctrl/cluster_ctrl_v2.h +++ b/include/hal/cluster_ctrl/cluster_ctrl_v2.h @@ -71,4 +71,12 @@ static inline void hal_cluster_ctrl_return_set_remote(int cid, int value){ pulp_write32(ARCHI_CLUSTER_PERIPHERALS_GLOBAL_ADDR(cid)+ARCHI_CLUSTER_CTRL_OFFSET+ARCHI_CLUSTER_CTRL_RETURN, value | 1 << ARCHI_CLUSTER_CTRL_RETURN_SHIFT_BITS); } +static inline void plp_ctrl_cluster_cfg_set(unsigned int mask) { + pulp_write32(ARCHI_CLUSTER_CTRL_ADDR + ARCHI_CLUSTER_CTRL_CLUSTER_CFG, mask); +} + +static inline int plp_ctrl_cluster_cfg_get() { + return pulp_read32(ARCHI_CLUSTER_CTRL_ADDR + ARCHI_CLUSTER_CTRL_CLUSTER_CFG); +} + #endif diff --git a/include/hal/dma/idma_v2.h b/include/hal/dma/idma_v2.h index 13d250af..b22a77f4 100644 --- a/include/hal/dma/idma_v2.h +++ b/include/hal/dma/idma_v2.h @@ -477,6 +477,19 @@ static inline unsigned int plp_cl_dma_status_toL1(); static inline unsigned int plp_dma_status_toL2(); static inline unsigned int plp_cl_dma_status_toL2(); +/* CLOCK GATING PROCEDURE FOR iDMA */ +/* Three modes are supported: + - No clock: the whole iDMA is unresponsive + - Frontend-only clock: only the iDMA frontend is clocked. This way power consumption + is kept to a minimum while still being responsive to incoming transfer requests. + - Fully clocked: both the frontend and datapath of iDMA are clocked. Notice that + clock gating for the dapath is fully managed in rtl. +*/ + +// Enables the frontend clock +static inline void plp_idma_enable_clk(); +// Disables the frontend clock +static inline void plp_idma_disable_clk(); //!@} @@ -511,6 +524,22 @@ static inline unsigned int plp_cl_dma_status_toL2(); #define DMA_CL_READ(offset) DMA_READ(value, offset) #endif +// +// CLOCK GATING CONTROL +// + +static inline void plp_idma_enable_clk() { + uint32_t cluster_ctrl_cfg_reg; + cluster_ctrl_cfg_reg = plp_ctrl_cluster_cfg_get(); + plp_ctrl_cluster_cfg_set(cluster_ctrl_cfg_reg | (1 << 17)); +} + +static inline void plp_idma_disable_clk() { + uint32_t cluster_ctrl_cfg_reg; + cluster_ctrl_cfg_reg = plp_ctrl_cluster_cfg_get(); + plp_ctrl_cluster_cfg_set(cluster_ctrl_cfg_reg & (0 << 17)); +} + static inline int plp_dma_memcpy(dma_ext_t ext, unsigned int loc, unsigned short size, int ext2loc) { if (ext2loc) return pulp_idma_L2ToL1(ext, loc, size); @@ -783,7 +812,6 @@ static inline int pulp_cl_idma_L2ToL1(unsigned int src, unsigned int dst, unsign asm volatile("" : : : "memory"); // Launch TX dma_tx_id = DMA_CL_READ(IDMA_REG32_3D_NEXT_ID_1_REG_OFFSET); - return dma_tx_id; } @@ -1098,6 +1126,10 @@ static inline int pulp_cl_idma_zeromem(unsigned int dst, unsigned short size, id return dma_tx_id; } +// +// BARRIERS +// + static inline void plp_dma_barrier() { while(plp_dma_status()) { From 1da02e11d5459662714ad3eca151b35f52f1c1c3 Mon Sep 17 00:00:00 2001 From: rgandolfi Date: Wed, 29 Apr 2026 16:16:15 +0200 Subject: [PATCH 09/11] [TRACE] Add trace.h inclusion in alloc_pool.c --- kernel/alloc_pool.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/alloc_pool.c b/kernel/alloc_pool.c index 3a6d00f8..05cdc536 100644 --- a/kernel/alloc_pool.c +++ b/kernel/alloc_pool.c @@ -15,6 +15,7 @@ */ #include "pulp.h" +#include "implem/trace.h" #include #include From 8b80c73a4d263ddcd92d2882611551c12622307c Mon Sep 17 00:00:00 2001 From: rgandolfi Date: Mon, 25 May 2026 10:40:40 +0200 Subject: [PATCH 10/11] [RUNTIME] Fix inclusion of cluster_ctrl_unit drivers + clean on rebase conflicts --- include/hal/chips/pulp_cluster/pulp.h | 2 +- include/hal/dma/idma_v2.h | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/include/hal/chips/pulp_cluster/pulp.h b/include/hal/chips/pulp_cluster/pulp.h index c43f06b6..3008c89a 100644 --- a/include/hal/chips/pulp_cluster/pulp.h +++ b/include/hal/chips/pulp_cluster/pulp.h @@ -26,6 +26,7 @@ #endif // __ibex__ #include "hal/eu/eu_v3.h" #include "hal/itc/itc_v1.h" +#include "hal/cluster_ctrl/cluster_ctrl_v2.h" #ifndef USE_IDMA #include "hal/dma/mchan_v7.h" #else @@ -33,7 +34,6 @@ #endif #include "hal/timer/timer_v2.h" #include "hal/soc_eu/soc_eu_v2.h" -#include "hal/cluster_ctrl/cluster_ctrl_v2.h" #include "hal/icache/icache_ctrl_v2.h" #include "hal/apb_soc/apb_soc_v3.h" #include "hal/fll/fll_v1.h" diff --git a/include/hal/dma/idma_v2.h b/include/hal/dma/idma_v2.h index b22a77f4..14b31ad7 100644 --- a/include/hal/dma/idma_v2.h +++ b/include/hal/dma/idma_v2.h @@ -946,10 +946,6 @@ static inline int pulp_cl_idma_L2ToL1_2d(unsigned int src, unsigned int dst, uns return dma_tx_id; } -<<<<<<< HEAD - -======= ->>>>>>> 004a4d4 ([iDMA] Updates on iDMA drivers) static inline int pulp_idma_L1ToL1_2d(unsigned int src, unsigned int dst, unsigned short size, unsigned int src_stride, unsigned int dst_stride, unsigned int num_reps) { unsigned int dma_tx_id; unsigned int cfg = IDMA_DEFAULT_CONFIG_L1TOL1_2D; From 04f8bfecd585e122f058fe126c9c82e0828eff6e Mon Sep 17 00:00:00 2001 From: rgandolfi Date: Tue, 26 May 2026 16:04:21 +0200 Subject: [PATCH 11/11] [CRT0] Moved stack initialization right after register file clear --- kernel/crt0.S | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/kernel/crt0.S b/kernel/crt0.S index a87d9e65..e2571bcd 100644 --- a/kernel/crt0.S +++ b/kernel/crt0.S @@ -72,13 +72,6 @@ pos_init_entry: addi t0, t0, 4 bltu t0, t1, 1b - - - # Stack initialization - la x2, stack - - - /* Do all other initializations from C code */ jal x1, pos_init_start @@ -196,6 +189,8 @@ _start: lui x29, 0 lui x30, 0 lui x31, 0 + # Stack initialization + la x2, stack /* Move on to normal boot */ jal x0, pos_init_entry