diff --git a/src/metrics/sepcificity_celltype_markers/config.vsh.yaml b/src/metrics/sepcificity_celltype_markers/config.vsh.yaml new file mode 100644 index 0000000..b4b00f2 --- /dev/null +++ b/src/metrics/sepcificity_celltype_markers/config.vsh.yaml @@ -0,0 +1,37 @@ +__merge__: ../../api/comp_metric.yaml + +name: specificity_celltype_marker + +info: + metrics: + - name: specificity_celltype_marker + label: "Specificty based on cell type superset marker gene expression" + summary: "Using a curated list of marker genes, exclusive marker gene expression is checked for each cell." + description: | + We start from a list of curated marker genes for each cell type superset. The number of cell + type supersets can vary. For each cell, we check if we observe at least one RNA molecule from + at least one marker gene of each superset. Biologically, a cell is expected to express marker + genes from no more than one superset. This specificity metric quantifies the fraction of cells + that express genes from at least two supersets (lower is better). Note that this metric will + favor conservative segmentation algorithms (only segmenting DAPI will have high specificity), + hence the metric will be most useful when contrasted with a sensitivity metric. + references: + doi: NULL + min: 0 + max: 1 + maximize: false + +resources: + - type: python_script + path: script.py + +engines: + - type: docker + image: openproblems/base_python:1 + __merge__: ../../base/setup_spatialdata_partial.yaml + +runners: + - type: executable + - type: nextflow + directives: + label: [midtime, midmem, midcpu] \ No newline at end of file diff --git a/src/metrics/sepcificity_celltype_markers/script.py b/src/metrics/sepcificity_celltype_markers/script.py new file mode 100644 index 0000000..f705c50 --- /dev/null +++ b/src/metrics/sepcificity_celltype_markers/script.py @@ -0,0 +1,45 @@ +import numpy as np +import xarray as xr +import anndata as ad +import spatialdata as sd +from sklearn.metrics import adjusted_rand_score + +## VIASH START +par = { + # TODO: add path + 'input_prediction': 'resources_test/task_spatial_segmentation/XXX', + # TODO: this solution should be a list of marker genes from each superset + 'input_solution': 'resources_test/task_spatial_segmentation/XXXX', + 'output': 'output.h5ad' +} +meta = { + 'name': 'specificity_celltype_marker' +} +## VIASH END + +print(">> Reading input files", flush=True) +sdata_pred = sd.read_zarr(par["input_prediction"]) +# TODO: this should be reading in the list, which will not be a Zarr file +sdata_sol = sd.read_zarr(par["input_solution"]) + +dataset_id = sdata_sol.tables["table"].uns["dataset_id"] +method_id = sdata_pred.tables["table"].uns["method_id"] + +print(">> Get ground truth cell IDs from cell_labels", flush=True) +gt_cell_ids = sdata.Labels['groundtruth_cell_labels'] + +# TODO: calculate expression of marker superset for each cell + +# TODO: calculate specificity metric + +print(">> Writing output", flush=True) +output = ad.AnnData( + uns={ + "dataset_id": dataset_id, + "normalization_id": "counts", + "method_id": method_id, + "metric_ids": ["specificity_celltype_marker"], + "metric_values": [float(specificity_score)], + } +) +output.write_h5ad(par["output"], compression="gzip") \ No newline at end of file