Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 73 additions & 0 deletions scripts/create_test_resources/xenium_multiome_combined.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
#!/bin/bash

# get the root of the directory
REPO_ROOT=$(git rev-parse --show-toplevel)

# ensure that the command below is run from the root of the repository
cd "$REPO_ROOT"

# # remove this when you have implemented the script
# echo "TODO: replace the commands in this script with the sequence of components that you need to run to generate test_resources."
# echo " Inside this script, you will need to place commands to generate example files for each of the 'src/api/file_*.yaml' files."
# exit 1

set -e

DATASET_ID=Xenium_V1_Human_Kidney_FFPE

RAW_DATA=resources_test/common
DATASET_DIR=resources_test/task_spatial_segmentation/$DATASET_ID

if [ -d "$DATASET_DIR" ]; then
rm -rf "$DATASET_DIR"
fi
mkdir -p "$DATASET_DIR"

# process dataset
viash run src/data_processors/process_dataset_multimodal/config.vsh.yaml -- \
--input_sp $RAW_DATA/Xenium_V1_Human_Kidney_FFPE/Xenium_V1_Human_Kidney_FFPE_crop.zarr \
--output_spatial_unlabelled $DATASET_DIR/spatial_unlabelled.zarr \
--output_spatial_solution $DATASET_DIR/spatial_solution.zarr \
--output_scrnaseq_reference $DATASET_DIR/scrnaseq_reference.h5ad \
--dataset_id $DATASET_ID \
--dataset_name "Test the multimodal approach from 10X" \
--dataset_url "https://www.10xgenomics.com/datasets/xenium-protein-ffpe-human-renal-carcinoma" \
--dataset_reference "10.1038/s41586-023-06812-z" \
--dataset_summary "Demonstration of gene expression and proteomce profiling for fresh frozen mouse brain on the Xenium platform" \
--dataset_description "Demonstration of gene expression profiling for fresh frozen mouse brain" \
--dataset_organism "homo_sapiens"

# run one method
viash run src/control_methods/random_voronoi/config.vsh.yaml -- \
--input $DATASET_DIR/spatial_unlabelled.zarr \
--input_solution $DATASET_DIR/spatial_solution.zarr \
--output $DATASET_DIR/prediction.zarr

# run prediction processor
viash run src/data_processors/process_prediction/config.vsh.yaml -- \
--input_prediction $DATASET_DIR/prediction.zarr \
--input_spatial_unlabelled $DATASET_DIR/spatial_unlabelled.zarr \
--output $DATASET_DIR/processed_prediction.zarr

# run one metric
viash run src/metrics/ari/config.vsh.yaml -- \
--input_prediction $DATASET_DIR/processed_prediction.zarr \
--input_solution $DATASET_DIR/spatial_solution.zarr \
--output $DATASET_DIR/score.h5ad

# write manual state.yaml. this is not actually necessary but you never know it might be useful
cat > $DATASET_DIR/state.yaml << HERE
id: $DATASET_ID
spatial_unlabelled: spatial_unlabelled.zarr
spatial_solution: spatial_solution.zarr
scrnaseq_reference: scrnaseq_reference.h5ad
prediction: prediction.zarr
processed_prediction: processed_prediction.zarr
score: score.h5ad
HERE

# only run this if you have access to the openproblems-data bucket
aws s3 sync --profile op \
resources_test/task_spatial_segmentation/mouse_brain_combined/ \
s3://openproblems-data/resources_test/task_spatial_segmentation/mouse_brain_combined/ \
--delete --dryrun
85 changes: 85 additions & 0 deletions src/api/comp_data_processor_protein.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
namespace: "data_processors"
info:
type: data_processor
type_info:
label: Data processor
summary: A data processor.
description: |
A component for processing a Common Dataset into a task-specific dataset.
argument_groups:
- name: Inputs
arguments:
- name: "--input_sp"
__merge__: file_common_ist.yaml
required: true
direction: input
- name: "--input_sc"
__merge__: file_common_scrnaseq.yaml
required: false
direction: input
- name: Outputs
arguments:
- name: "--output_spatial_unlabelled"
__merge__: file_spatial_unlabelled.yaml
direction: output
required: true
- name: "--output_spatial_solution"
__merge__: file_spatial_solution.yaml
direction: output
required: true
- name: "--output_scrnaseq_reference"
__merge__: file_scrnaseq_reference.yaml
direction: output
required: false
- name: Combined Dataset Metadata
description: Metadata for the combined dataset that will be stored.
arguments:
- type: string
name: --dataset_id
description: "A unique identifier for the dataset"
required: true
info:
test_default: "mouse_brain_combined"
- name: --dataset_name
type: string
description: Nicely formatted name.
required: true
info:
test_default: "Mouse brain combined dataset"
- type: string
name: --dataset_url
description: Link to the original source of the dataset.
required: true
info:
test_default: "https://example.com/mouse_brain_combined"
- name: --dataset_reference
type: string
description: Bibtex reference of the paper in which the dataset was published.
required: true
info:
test_default: "10.1234/example.doi"
- name: --dataset_summary
type: string
description: Short description of the dataset.
required: true
info:
test_default: "Combined dataset for mouse brain spatial transcriptomics"
- name: --dataset_description
type: string
description: Long description of the dataset.
required: true
info:
test_default: "This is a combined dataset for mouse brain spatial transcriptomics."
- name: --dataset_organism
type: string
description: The organism of the sample in the dataset.
required: true
info:
test_default: "Mus musculus"
test_resources:
- path: /resources_test/common/2023_10x_mouse_brain_xenium_rep1
dest: resources_test/common/2023_10x_mouse_brain_xenium_rep1
- path: /resources_test/common/2023_yao_mouse_brain_scrnaseq_10xv2
dest: resources_test/common/2023_yao_mouse_brain_scrnaseq_10xv2
- type: python_script
path: /common/component_tests/run_and_check_output.py
35 changes: 35 additions & 0 deletions src/data_processors/process_dataset_multimodal/config.vsh.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
__merge__: ../../api/comp_data_processor_protein.yaml

name: process_dataset

argument_groups:
- name: "Processing parameters"
arguments:
- name: "--span"
type: double
description: The fraction of the data (cells) used when estimating the variance in the loess model fit if flavor='seurat_v3'.
default: 0.3
- name: "--n_top_genes"
type: integer
description: Number of highly-variable genes to keep. Mandatory if flavor='seurat_v3'.
default: 3000

resources:
- type: python_script
path: script.py

engines:
- type: docker
image: openproblems/base_python:1
setup:
- type: python
packages: [scikit-learn, scikit-misc]
__merge__:
- /src/base/setup_spatialdata_partial.yaml
- type: native

runners:
- type: executable
- type: nextflow
directives:
label: [midmem, midcpu, midtime]
Loading
Loading