From eebfa18fb25c2a7d0bd81f605484c9478b5f265c Mon Sep 17 00:00:00 2001
From: Robrecht Cannoodt <rcannood@gmail.com>
Date: Tue, 19 May 2026 11:12:54 +0200
Subject: [PATCH 1/4] update scripts

Signed-off-by: Robrecht Cannoodt <rcannood@gmail.com>
---
 .../run_test_aws_eu-central-1.sh              | 32 +++++++++++++++++++
 .../run_benchmark/run_test_aws_eu-west-2.sh   |  2 +-
 scripts/run_benchmark/run_test_nebius.sh      |  2 +-
 3 files changed, 34 insertions(+), 2 deletions(-)
 create mode 100755 scripts/run_benchmark/run_test_aws_eu-central-1.sh

diff --git a/scripts/run_benchmark/run_test_aws_eu-central-1.sh b/scripts/run_benchmark/run_test_aws_eu-central-1.sh
new file mode 100755
index 0000000..d93918c
--- /dev/null
+++ b/scripts/run_benchmark/run_test_aws_eu-central-1.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+
+# get the root of the directory
+REPO_ROOT=$(git rev-parse --show-toplevel)
+
+# ensure that the command below is run from the root of the repository
+cd "$REPO_ROOT"
+
+set -e
+
+resources_test_s3=s3://openproblems-data/resources_test/task_spatial_segmentation
+publish_dir_s3="s3://hca-op-spatial/temp/results/$(date +%Y-%m-%d_%H-%M-%S)"
+
+# write the parameters to file
+cat > /tmp/params.yaml << HERE
+id: mouse_brain_combined
+input_spatial_unlabelled: $resources_test_s3/mouse_brain_combined/spatial_unlabelled.zarr
+input_spatial_solution: $resources_test_s3/mouse_brain_combined/spatial_solution.zarr
+input_scrnaseq_reference: $resources_test_s3/mouse_brain_combined/scrnaseq_reference.h5ad
+output_state: "state.yaml"
+publish_dir: $publish_dir_s3
+HERE
+
+tw launch https://github.com/openproblems-bio/task_spatial_segmentation.git \
+  --revision build/main \
+  --pull-latest \
+  --main-script target/nextflow/workflows/run_benchmark/main.nf \
+  --workspace 8386213183400 \
+  --compute-env 6pOLSmxyAEvsCYsNbfrCSK \
+  --params-file /tmp/params.yaml \
+  --config src/base/labels_aws_eu-central-1.config \
+  --labels task_spatial_segmentation,test
diff --git a/scripts/run_benchmark/run_test_aws_eu-west-2.sh b/scripts/run_benchmark/run_test_aws_eu-west-2.sh
index 44f14c5..d1e09e2 100755
--- a/scripts/run_benchmark/run_test_aws_eu-west-2.sh
+++ b/scripts/run_benchmark/run_test_aws_eu-west-2.sh
@@ -28,5 +28,5 @@ tw launch https://github.com/openproblems-bio/task_spatial_segmentation.git \
   --workspace 8386213183400 \
   --compute-env 7Odt43ln9XureGja6Frdm7 \
   --params-file /tmp/params.yaml \
-  --config src/base/labels_tw.config \
+  --config src/base/labels_aws_eu-west-2.config \
   --labels task_spatial_segmentation,test
diff --git a/scripts/run_benchmark/run_test_nebius.sh b/scripts/run_benchmark/run_test_nebius.sh
index 5cd46fe..c491051 100755
--- a/scripts/run_benchmark/run_test_nebius.sh
+++ b/scripts/run_benchmark/run_test_nebius.sh
@@ -9,7 +9,7 @@ cd "$REPO_ROOT"
 set -e
 
 resources_test_s3=s3://openproblems-data/resources_test/task_spatial_segmentation
-publish_dir_s3="/scratch/temp/results/$(date +%Y-%m-%d_%H-%M-%S)"
+publish_dir_s3="/scratch/results/runs/$(date +%Y-%m-%d_%H-%M-%S)"
 
 # write the parameters to file
 cat > /tmp/params.yaml << HERE

From 556b8af2c9dbf278313b6f3ba2d3ed21df3f851d Mon Sep 17 00:00:00 2001
From: Robrecht Cannoodt <rcannood@gmail.com>
Date: Tue, 19 May 2026 11:57:22 +0200
Subject: [PATCH 2/4] add eu central config

Signed-off-by: Robrecht Cannoodt <rcannood@gmail.com>
---
 src/base/labels_aws_eu-central-1.config | 152 ++++++++++++++++++++++++
 1 file changed, 152 insertions(+)
 create mode 100644 src/base/labels_aws_eu-central-1.config

diff --git a/src/base/labels_aws_eu-central-1.config b/src/base/labels_aws_eu-central-1.config
new file mode 100644
index 0000000..3c448b1
--- /dev/null
+++ b/src/base/labels_aws_eu-central-1.config
@@ -0,0 +1,152 @@
+// copied from 'common/nextflow_helpers/labels_tw.config', but the queues in the gpu labels have been updated
+
+def exitStrat(task, max_attempts = 3) {
+  println "Determining exit strategy for task (attempt '${task.attempt}', exit status '${task.exitStatus}')"
+
+  // if the component failed 3 times, ignore the error so the workflow can continue
+  // it's important 'ignore' is returned even if maxRetries is set to 3,
+  // otherwise the workflow will stop
+  if (task.attempt >= 3) {
+    return 'ignore'
+  }
+  // when an aws spot instance is reclaimed, nextflow seems to use exit code 2147483647
+  // throwing in some extra conditions just in case
+  if (task.exitStatus == null || task.exitStatus <= -1 || task.exitStatus > 2100000000 || !(task.exitStatus.toString().isNumber())) {
+    return 'retry'
+  }
+  // if component failed, retry once
+  if (task.exitStatus == 1 && task.attempt < 2) {
+    return 'retry'
+  }
+  // if component ran out of memory, retry with more memory and disk
+  if (task.exitStatus in [137, 139] && task.attempt < max_attempts) {
+    return 'retry'
+  }
+  // return 'ignore' for all other cases to ignore the error,
+  // otherwise the workflow will stop
+  return 'ignore'
+}
+
+aws {
+  batch {
+    maxTransferAttempts = 3
+    delayBetweenAttempts = '5 sec'
+    maxSpotAttempts = 8
+  }
+}
+
+process {
+  executor = 'awsbatch'
+
+  // Default disk space
+  disk = 50.GB
+
+  // Retry for exit codes that have something to do with memory issues
+  // always retry once
+  errorStrategy = { exitStrat(task) }
+  maxRetries = 3
+  maxMemory = null
+
+  // Resource labels
+  withLabel: lowcpu { cpus = 5 }
+  withLabel: midcpu { cpus = 15 }
+  withLabel: highcpu { cpus = 30 }
+  withLabel: lowmem {
+    memory = { get_memory( 20.GB * task.attempt ) }
+    disk = { 50.GB * task.attempt }
+  }
+  withLabel: midmem {
+    memory = { get_memory( 50.GB * task.attempt ) }
+    disk = { 100.GB * task.attempt }
+  }
+  withLabel: highmem {
+    memory = { get_memory( 100.GB * task.attempt ) }
+    disk = { 200.GB * task.attempt }
+  }
+  withLabel: veryhighmem {
+    memory = { get_memory( 200.GB * task.attempt ) }
+    disk = { 400.GB * task.attempt }
+  }
+  withLabel: lowsharedmem {
+    containerOptions = { workflow.containerEngine != 'singularity' ? "--shm-size ${String.format("%.0f",task.memory.mega * 0.05)}" : ""}
+  }
+  withLabel: midsharedmem {
+    containerOptions = { workflow.containerEngine != 'singularity' ? "--shm-size ${String.format("%.0f",task.memory.mega * 0.1)}" : ""}
+  }
+  withLabel: highsharedmem {
+    containerOptions = { workflow.containerEngine != 'singularity' ? "--shm-size ${String.format("%.0f",task.memory.mega * 0.25)}" : ""}
+  }
+  withLabel: gpu {
+    // assuming g6.8xlarge
+    cpus = 32
+    accelerator = 1
+    memory = 100.GB
+    queue = "TowerForge-9YTjlzYCo5nGhuhJw2daF-work"
+    containerOptions = { workflow.containerEngine == "singularity" ? '--nv':
+       ( workflow.containerEngine == "docker" ? '--gpus all': null ) }
+  }
+  withLabel: midgpu {
+    // assuming g6.8xlarge
+    cpus = 32
+    accelerator = 4
+    memory = 100.GB
+    queue = "TowerForge-9YTjlzYCo5nGhuhJw2daF-work"
+    containerOptions = { workflow.containerEngine == "singularity" ? '--nv':
+       ( workflow.containerEngine == "docker" ? '--gpus all': null ) }
+  }
+  withLabel: highgpu {
+    // assuming g6.16xlarge
+    cpus = 64
+    accelerator = 8
+    memory = 200.GB
+    queue = "TowerForge-9YTjlzYCo5nGhuhJw2daF-work"
+    containerOptions = { workflow.containerEngine == "singularity" ? '--nv':
+       ( workflow.containerEngine == "docker" ? '--gpus all': null ) }
+  }
+  withLabel: biggpu {
+    // assuming p5.4xlarge
+    cpus = 16
+    accelerator = 1
+    memory = 200.GB
+    queue = "TowerForge-...-work"
+    containerOptions = { workflow.containerEngine == "singularity" ? '--nv':
+       ( workflow.containerEngine == "docker" ? '--gpus all': null ) }
+  }
+
+  // make sure publishstates gets enough disk space and memory
+  withName:'.*publishStatesProc' {
+    memory = '16GB'
+    disk = '100GB'
+  }
+}
+
+def get_memory(to_compare) {
+  if (!process.containsKey("maxMemory") || !process.maxMemory) {
+    return to_compare
+  }
+
+  try {
+    if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
+      return process.maxMemory
+    }
+    else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
+      return max_memory as nextflow.util.MemoryUnit
+    }
+    else {
+      return to_compare
+    }
+  } catch (all) {
+        println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
+        System.exit(1)
+  }
+}
+
+// set tracing file
+trace {
+    enabled = true
+    overwrite = true
+    file = "${params.publish_dir}/trace.txt"
+}
+
+aws.batch.maxSpotAttempts = 5
+google.batch.maxSpotAttempts = 5

From e1907966949761ff066f60127c3738eab893a72c Mon Sep 17 00:00:00 2001
From: Robrecht Cannoodt <rcannood@gmail.com>
Date: Tue, 19 May 2026 12:25:52 +0200
Subject: [PATCH 3/4] update publish dir

Signed-off-by: Robrecht Cannoodt <rcannood@gmail.com>
---
 scripts/run_benchmark/run_test_nebius.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/run_benchmark/run_test_nebius.sh b/scripts/run_benchmark/run_test_nebius.sh
index c491051..415b02d 100755
--- a/scripts/run_benchmark/run_test_nebius.sh
+++ b/scripts/run_benchmark/run_test_nebius.sh
@@ -9,7 +9,7 @@ cd "$REPO_ROOT"
 set -e
 
 resources_test_s3=s3://openproblems-data/resources_test/task_spatial_segmentation
-publish_dir_s3="/scratch/results/runs/$(date +%Y-%m-%d_%H-%M-%S)"
+publish_dir_s3="/mnt/data/results/runs/$(date +%Y-%m-%d_%H-%M-%S)"
 
 # write the parameters to file
 cat > /tmp/params.yaml << HERE

From 95de0bd08fdec0abcca4831fbfb0f632e370dfd7 Mon Sep 17 00:00:00 2001
From: Robrecht Cannoodt <rcannood@gmail.com>
Date: Tue, 19 May 2026 14:34:42 +0200
Subject: [PATCH 4/4] Change S3 publish directory path in run_test_nebius.sh

---
 scripts/run_benchmark/run_test_nebius.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/run_benchmark/run_test_nebius.sh b/scripts/run_benchmark/run_test_nebius.sh
index 415b02d..c491051 100755
--- a/scripts/run_benchmark/run_test_nebius.sh
+++ b/scripts/run_benchmark/run_test_nebius.sh
@@ -9,7 +9,7 @@ cd "$REPO_ROOT"
 set -e
 
 resources_test_s3=s3://openproblems-data/resources_test/task_spatial_segmentation
-publish_dir_s3="/mnt/data/results/runs/$(date +%Y-%m-%d_%H-%M-%S)"
+publish_dir_s3="/scratch/results/runs/$(date +%Y-%m-%d_%H-%M-%S)"
 
 # write the parameters to file
 cat > /tmp/params.yaml << HERE