Visium HD Example (spots)

Contents

Visium HD Example (spots)#

This notebook shows how to visualize Visium HD data at spot level.

It uses a SpotLayer linked to adata.obsm[spatial_key]. For a bin-level visualization, see this notebook.

import harpy_vitessce as hpv

import tempfile
from pathlib import Path

tmp_dir = Path(tempfile.mkdtemp(prefix="visium_HD"))

import os

import harpy as hp
import pooch
from harpy.datasets import get_registry
from spatialdata import read_zarr

BASE_DIR = tmp_dir  # change the base directory

# fetch the data
registry = get_registry()
unzip_path = registry.fetch(
    "transcriptomics/visium_hd/mouse/visium_hd_mouse_small_intestine.zip",
    processor=pooch.Unzip(),
)

# path to a visium experiment
path = os.path.commonpath(unzip_path)

# read the downloaded data with harpy.
sdata = hp.io.visium_hd(
    path=path,
    bin_size=[16],
    dataset_id="Visium_HD_Mouse_Small_Intestine",
    output=os.path.join(BASE_DIR, "sdata_visium_hd.zarr"),
)

sdata = read_zarr(os.path.join(BASE_DIR, "sdata_visium_hd.zarr"))

INFO     The instance_key column in 'table.obs' ('table.obs[location_id]') will be relabeled to ensure a numeric   
         data type, with a continuous range and without including the value 0 (which is reserved for the           
         background). The new labels will be stored in a new column named 'relabeled_location_id'.

Run a Scanpy pipeline

from harpy_vitessce.data_utils import (
    copy_annotations,
    downcast_int64_to_int32,
    example_visium_hd_processing,
)

microns_per_pixel = 0.27376126715315274  # microns per pixel

adata = sdata["square_016um"].copy()
# takes around 5 minutes
adata = example_visium_hd_processing(
    adata,
    spatial_spot_size=100,  # for visualization
    min_counts=20,
    target_sum=None,
)

# copy the annotation from the preprocessed adata to the adata with the raw counts (we want to visualize these)

adata_annotated = copy_annotations(
    src=adata,
    tgt=sdata["square_016um"],
    obs_keys=["leiden", "total_counts", "n_genes_by_counts"],
    obsm_keys=["X_umap"],
)
# vitessce can not work with int64
_df, _, _ = downcast_int64_to_int32(
    adata_annotated.obs,
    strict=True,
)
adata_annotated.obs = _df

filtered out 143 cells that have less than 20 counts
filtered out 436 genes that are detected in less than 10 cells
normalizing counts per cell
    finished (0:00:01)
extracting highly variable genes
    finished (0:00:01)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)
computing PCA
    with n_comps=50
    finished (0:01:36)
computing neighbors
    using 'X_pca' with n_pcs = 30
    finished: added to `.uns['neighbors']`
    `.obsp['distances']`, distances for each pair of neighbors
    `.obsp['connectivities']`, weighted adjacency matrix (0:00:18)
computing UMAP
    finished: added
    'X_umap', UMAP coordinates (adata.obsm)
    'umap', UMAP parameters (adata.uns) (0:00:38)
running Leiden clustering
    finished: found 18 clusters and added
    'leiden', the cluster labels (adata.obs, categorical) (0:04:06)

../../_images/c4034eedb70ae30f55e198cf83b6e4e1e08f243b897ba244e160a6482f4d520c.png

../../_images/9fda6a24a7a5d31eea20e9a59eb26558f028d04f35904a86f3fd6ecaee6705df.png

Add AnnData to the SpatialData zarr store, and add a spatial key in microns.#

from spatialdata.models import TableModel

# convert to micron, because we will use this spatial key to visualize the bins/spots
adata_annotated.obsm["spatial_microns"] = (
    adata_annotated.obsm["spatial"] * microns_per_pixel
)

sdata = hp.tb.add_table(
    sdata,
    adata=adata_annotated.copy(),
    region=adata_annotated.uns[TableModel.ATTRS_KEY][TableModel.REGION_KEY],
    output_table_name="square_016um_annotated",
    overwrite=True,
)

Add a global coordinate system.#

from spatialdata.transformations import (
    Identity,
    set_transformation,
)

# add a global coordinate system -> required by vitessce when rendering from ome zarr source.
set_transformation(
    sdata["Visium_HD_Mouse_Small_Intestine_full_image"],
    transformation=Identity(),
    set_all=False,
    to_coordinate_system="global",
    write_to_sdata=sdata,  # back to zarr
)

Create Vitessce config#

import numpy as np

# get the center of the tissue
xy = sdata["square_016um_annotated"].obsm["spatial_microns"]
center_x = float(np.mean(xy[:, 0]))
center_y = float(np.mean(xy[:, 1]))

from IPython.display import HTML, display

vc = hpv.seq_based_from_split_sources(
    image_source=sdata.path / "images" / "Visium_HD_Mouse_Small_Intestine_full_image",
    adata_source=sdata.path
    / "tables"
    / "square_016um_annotated",  # We use spot based implementation of vitessce, so vitessce will look at .obsm["spatial_microns"]
    cluster_key="leiden",
    cluster_key_display_name="Leiden",
    spatial_key="spatial_microns",  # obsm in microns
    microns_per_pixel=microns_per_pixel,  # add a scale when rendering
    spot_radius_size_micron=8,  # 16//2
    visualize_as_rgb=True,
    center=(center_x, center_y),
    zoom=-3.2,
    qc_obs_feature_keys=["total_counts", "n_genes_by_counts"],
    embedding_key="X_umap",
    embedding_display_name="UMAP",
)

url = vc.web_app()
display(HTML(f'<a href="{url}" target="_blank">Open in Vitessce</a>'))

Open in Vitessce