openproblems-bio · koenvandenberge · May 19, 2026 · May 19, 2026 · May 19, 2026 · May 19, 2026
diff --git a/src/metrics/sepcificity_celltype_markers/config.vsh.yaml b/src/metrics/sepcificity_celltype_markers/config.vsh.yaml
@@ -0,0 +1,37 @@
+__merge__: ../../api/comp_metric.yaml
+
+name: specificity_celltype_marker
+
+info:
+  metrics:
+    - name: specificity_celltype_marker
+      label: "Specificty based on cell type superset marker gene expression"
+      summary: "Using a curated list of marker genes, exclusive marker gene expression is checked for each cell."
+      description: |
+        We start from a list of curated marker genes for each cell type superset. The number of cell
+        type supersets can vary. For each cell, we check if we observe at least one RNA molecule from
+        at least one marker gene of each superset. Biologically, a cell is expected to express marker
+        genes from no more than one superset. This specificity metric quantifies the fraction of cells
+        that express genes from at least two supersets (lower is better). Note that this metric will
+        favor conservative segmentation algorithms (only segmenting DAPI will have high specificity),
+        hence the metric will be most useful when contrasted with a sensitivity metric.
+      references:
+        doi: NULL
+      min: 0
+      max: 1
+      maximize: false
+
+resources:
+  - type: python_script
+    path: script.py
+
+engines:
+  - type: docker
+    image: openproblems/base_python:1
+    __merge__: ../../base/setup_spatialdata_partial.yaml
+
+runners:
+  - type: executable
+  - type: nextflow
+    directives:
+      label: [midtime, midmem, midcpu]
diff --git a/src/metrics/sepcificity_celltype_markers/script.py b/src/metrics/sepcificity_celltype_markers/script.py
@@ -0,0 +1,45 @@
+import numpy as np
+import xarray as xr
+import anndata as ad
+import spatialdata as sd
+from sklearn.metrics import adjusted_rand_score
+
+## VIASH START
+par = {
+    # TODO: add path
+    'input_prediction': 'resources_test/task_spatial_segmentation/XXX',
+    # TODO: this solution should be a list of marker genes from each superset
+    'input_solution': 'resources_test/task_spatial_segmentation/XXXX',
+    'output': 'output.h5ad'
+}
+meta = {
+    'name': 'specificity_celltype_marker'
+}
+## VIASH END
+
+print(">> Reading input files", flush=True)
+sdata_pred = sd.read_zarr(par["input_prediction"])
+# TODO: this should be reading in the list, which will not be a Zarr file
+sdata_sol = sd.read_zarr(par["input_solution"])
+
+dataset_id = sdata_sol.tables["table"].uns["dataset_id"]
+method_id = sdata_pred.tables["table"].uns["method_id"]
+
+print(">> Get ground truth cell IDs from cell_labels", flush=True)
+gt_cell_ids = sdata.Labels['groundtruth_cell_labels'] 
+
+# TODO: calculate expression of marker superset for each cell
+
+# TODO: calculate specificity metric
+
+print(">> Writing output", flush=True)
+output = ad.AnnData(
+    uns={
+        "dataset_id": dataset_id,
+        "normalization_id": "counts",
+        "method_id": method_id,
+        "metric_ids": ["specificity_celltype_marker"],
+        "metric_values": [float(specificity_score)],
+    }
+)
+output.write_h5ad(par["output"], compression="gzip")