Skip to content

implement conversion from spaceranger output to Zarr and H5AD #4

@matbonfanti

Description

@matbonfanti

Description of feature

implement a module using this code from @SaraTerzol

def load_and_save(NFG_path, sample_id, output_dir="/home/jovyan/work/results/object/"):
    """
    Load a Visium HD sample, extract the 016um table, and save both the full dataset (Zarr)
    and the 016um table (H5AD) to specified output directories.

    Parameters
    ----------
    NFG_path : str
        Path to the raw Visium HD data folder.
    sample_id : str
        Sample identifier.
    output_dir : str
        Base directory where Zarr and H5AD folders will be created.
        Default is "/home/jovyan/work/results/object/".

    Returns
    -------
    output_zarr : str
        Path to the saved Zarr dataset.
    output_h5ad : str
        Path to the saved H5AD 016um table.
    """

    # ------------------------
    # Load dataset using spatialdata_io
    # ------------------------
    # 
    # Here, the bin size is set to 16, but we should allow the option to also read 002 and 008
    data = spatialdata_io.visium_hd(
        NFG_path,
        bin_size=16,
        filtered_counts_file=True,
        dataset_id=sample_id
    )

    # ------------------------
    # Extract 016um table (high-resolution spots)
    # ------------------------
    table_016um = data.tables['square_016um']

    # ------------------------
    # Create output folders if they don't exist
    # ------------------------
    zarr_dir = os.path.join(output_dir, "Zarr")
    os.makedirs(zarr_dir, exist_ok=True)
    h5ad_dir = os.path.join(output_dir, "H5AD")
    os.makedirs(h5ad_dir, exist_ok=True)

    # ------------------------
    # Save full dataset as Zarr
    # ------------------------
    output_zarr = os.path.join(zarr_dir, f"{sample_id}.zarr")
    data.write(output_zarr, overwrite=True)

    # ------------------------
    # Save only 016um table as H5AD
    # ------------------------
    output_h5ad = os.path.join(h5ad_dir, f"{sample_id}_016um.h5ad")
    table_016um.write_h5ad(output_h5ad)

    # ------------------------
    # Print summary and return paths
    # ------------------------
    print(f"Saved {sample_id} -> Zarr: {output_zarr}, H5AD: {output_h5ad}")

    return output_zarr, output_h5ad

Metadata

Metadata

Assignees

Labels

enhancementNew feature or request

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions