From d7b13fec900ad621bee91d4e7b5e1019ace09558 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 19 Mar 2026 08:25:53 +0100 Subject: [PATCH 1/3] Provide a default GEMM_DIVIDE_LIMIT and add it to DYNAMIC_ARCH --- common_param.h | 1 + param.h | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/common_param.h b/common_param.h index 9e5edbb816..108d593f9f 100644 --- a/common_param.h +++ b/common_param.h @@ -47,6 +47,7 @@ typedef struct { int dtb_entries; int switch_ratio; + int divide_limit; int offsetA, offsetB, align; #if BUILD_HFLOAT16 == 1 int shgemm_p, shgemm_q, shgemm_r; diff --git a/param.h b/param.h index 7e4a04501b..ca12cb630f 100644 --- a/param.h +++ b/param.h @@ -4260,6 +4260,10 @@ Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy rout #define SWITCH_RATIO 2 #endif +#ifndef GEMM_DIVIDE_LIMIT +#define GEMM_DIVIDE_LIMIT 0 +#endif + #ifndef QGEMM_DEFAULT_UNROLL_M #define QGEMM_DEFAULT_UNROLL_M 2 #endif From 8f5e49556fb9e1a22cf42528260f52e390c470eb Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 19 Mar 2026 08:26:33 +0100 Subject: [PATCH 2/3] Add GEMM_DIVIDE_LIMIT to parameters --- kernel/setparam-ref.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/setparam-ref.c b/kernel/setparam-ref.c index 51981c6253..88e95b830b 100644 --- a/kernel/setparam-ref.c +++ b/kernel/setparam-ref.c @@ -54,6 +54,8 @@ gotoblas_t TABLE_NAME = { SWITCH_RATIO, + GEMM_DIVIDE_LIMIT, + GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN, #ifdef BUILD_HFLOAT16 From b7601ea92f6e950670610da2f6d5b0c0211bec3b Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 19 Mar 2026 08:29:15 +0100 Subject: [PATCH 3/3] Retrieve cpu-specific GEMM_DIVIDE_LIMIT if DYNAMIC_ARCH --- driver/level3/gemm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/driver/level3/gemm.c b/driver/level3/gemm.c index e37d86c28d..99320bab58 100644 --- a/driver/level3/gemm.c +++ b/driver/level3/gemm.c @@ -63,6 +63,10 @@ #define DIVIDE_RATE GEMM_DIVIDE_RATE #endif +#ifdef DYNAMIC_ARCH +#define GEMM_DIVIDE_LIMIT gotoblas->divide_limit +#endif + #ifdef GEMM_DIVIDE_LIMIT #define DIVIDE_LIMIT GEMM_DIVIDE_LIMIT #endif