Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 88 additions & 0 deletions src/datasets/loaders/vzg_merscope/convert_to_spatialdata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
"""Convert MERSCOPE regions to SpatialData .zarr files.

Produces one .zarr per region with:
images: 'morphology_mip'
points: 'transcripts'
shapes: 'cell_boundaries' (from cell_boundaries.parquet in region folder)
tables: 'table' (obs × var counts, linked to cell_boundaries)
coordinate_systems: 'global'
"""

import shutil
from pathlib import Path

import spatialdata as sd
import yaml
from spatialdata_io import merscope

# ---------------------------------------------------------------------------
# Config
# ---------------------------------------------------------------------------
with open("metadata.yml") as f:
cfg = yaml.safe_load(f)

BASE_DIR = Path(cfg["base_dir"])
OUTPUT_DIR = Path(cfg["output_dir"])
REGIONS = cfg["regions"]
SLIDE_NAME = BASE_DIR.name
Z_LAYER = cfg["z_layer"]
DATASET_METADATA = cfg["dataset"]

# ---------------------------------------------------------------------------
# Load & convert each region
# ---------------------------------------------------------------------------
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

for region in REGIONS:
region_dir = BASE_DIR / region
dataset_id = f"{SLIDE_NAME}_{region}"
print(f"\n{'='*60}")
print(f"Processing {region} (dataset_id={dataset_id})")
print(f"{'='*60}")

# --- Load with spatialdata-io merscope reader ---
sdata = merscope(
path=region_dir,
z_layers=Z_LAYER,
region_name=region,
slide_name=SLIDE_NAME,
backend=None,
transcripts=True,
cells_boundaries=True,
cells_table=True,
mosaic_images=True,
)
print(f"[load] {sdata}")

# --- Rename elements to canonical names ---

# Images: {dataset_id}_z{Z_LAYER} → morphology_mip
image_key = f"{dataset_id}_z{Z_LAYER}"
if image_key in sdata.images:
sdata.images["morphology_mip"] = sdata.images.pop(image_key)

# Points: {dataset_id}_transcripts → transcripts
points_key = f"{dataset_id}_transcripts"
if points_key in sdata.points:
sdata.points["transcripts"] = sdata.points.pop(points_key)

# Shapes: {dataset_id}_polygons → cell_boundaries
polygons_key = f"{dataset_id}_polygons"
if polygons_key in sdata.shapes:
sdata.shapes["cell_boundaries"] = sdata.shapes.pop(polygons_key)

# --- Update uns metadata on table ---
DATASET_METADATA["dataset_id"] = dataset_id
for key, value in DATASET_METADATA.items():
sdata.tables["table"].uns[key] = value

print(f"[done] {sdata}")

# --- Write output ---
out_path = OUTPUT_DIR / f"{region}.zarr"
if out_path.exists():
shutil.rmtree(out_path)
sdata.write(out_path)
print(f"[write] {out_path}")

print("\nDone.")
18 changes: 18 additions & 0 deletions src/datasets/loaders/vzg_merscope/metadata_breastcancer.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
base_dir: "data/BreastCancerTMA/202409242358_240916JHHUBC0005XQ-V2V-HubcTMA-V2-BY_VMSC02511"
output_dir: "output/BreastCancerTMA"
regions:
- region_R1
- region_R2
- region_R3
- region_R4
- region_R5
z_layer: 3

dataset:
dataset_name: "240916JHHUBC0005XQ-V2V-HubcTMA-V2-BY"
dataset_url: "https://info.vizgen.com/merfish-2.0-data-release-form"
dataset_reference: ""
dataset_summary: "MERSCOPE V2 data release."
dataset_description: "MERSCOPE V2 data release. Human Breast Cancer TMA FFPE, D2M1774, V2 SOP, cell bond stain"
dataset_organism: "Homo sapiens"
segmentation_id: "cellpose"
14 changes: 14 additions & 0 deletions src/datasets/loaders/vzg_merscope/metadata_humanbrain.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
base_dir: "data/HumanBrain/202503211645_HuBrain-FF-22676A1-D2M1769-RC_VMSC16710"
output_dir: "output/HumanBrain"
regions:
- region_R1
z_layer: 3

dataset:
dataset_name: "HuBrain-FF-22676A1-D2M1769-RC"
dataset_url: "https://info.vizgen.com/access-to-merfish-2.0-data-release"
dataset_reference: ""
dataset_summary: "MERSCOPE V2 Human Brain fresh frozen sample."
dataset_description: "MERSCOPE V2 Human Brain fresh frozen, sample 22676A1, panel DM1769, Cellpose segmentation."
dataset_organism: "Homo sapiens"
segmentation_id: "cellpose"
Loading