From 326f7bdeeeb84c77b35e7be3e3fbc972b46816b7 Mon Sep 17 00:00:00 2001 From: Ayush Kamat Date: Tue, 12 Nov 2024 19:04:56 -0800 Subject: [PATCH 01/18] save state Signed-off-by: Ayush Kamat --- Justfile | 6 +- pyproject.toml | 14 +- src/latch/resources/tasks.py | 38 +- src/latch/types/glob.py | 2 +- src/latch/types/metadata.py | 891 ----------------------- src/latch/types/metadata/__init__.py | 66 ++ src/latch/types/metadata/flows.py | 197 +++++ src/latch/types/metadata/latch.py | 293 ++++++++ src/latch/types/metadata/nextflow.py | 205 ++++++ src/latch/types/metadata/snakemake.py | 196 +++++ src/latch/types/metadata/snakemake_v2.py | 77 ++ src/latch/types/metadata/utils.py | 36 + src/latch_cli/centromere/ctx.py | 85 +-- src/latch_cli/main.py | 248 +++---- src/latch_cli/snakemake/config/utils.py | 88 ++- src/latch_cli/snakemake/v2/__init__.py | 0 src/latch_cli/snakemake/v2/utils.py | 26 + src/latch_cli/snakemake/v2/workflow.py | 161 ++++ src/latch_cli/utils/stateful_writer.py | 29 + uv.lock | 305 +++++++- 20 files changed, 1795 insertions(+), 1168 deletions(-) delete mode 100644 src/latch/types/metadata.py create mode 100644 src/latch/types/metadata/__init__.py create mode 100644 src/latch/types/metadata/flows.py create mode 100644 src/latch/types/metadata/latch.py create mode 100644 src/latch/types/metadata/nextflow.py create mode 100644 src/latch/types/metadata/snakemake.py create mode 100644 src/latch/types/metadata/snakemake_v2.py create mode 100644 src/latch/types/metadata/utils.py create mode 100644 src/latch_cli/snakemake/v2/__init__.py create mode 100644 src/latch_cli/snakemake/v2/utils.py create mode 100644 src/latch_cli/snakemake/v2/workflow.py create mode 100644 src/latch_cli/utils/stateful_writer.py diff --git a/Justfile b/Justfile index bb726da65..e9497eaf0 100644 --- a/Justfile +++ b/Justfile @@ -1,7 +1,7 @@ # Setup install: - uv sync + uv sync --group dev --group docs # Packaging @@ -16,8 +16,8 @@ publish: # Testing test: - export TEST_TOKEN=$(cat ~/.latch/token) &&\ - pytest -s tests + export TEST_TOKEN=$(cat ~/.latch/token) + pytest -s # Docs diff --git a/pyproject.toml b/pyproject.toml index 5670cafc4..12884e5c7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ include = ["src/**/*.py", "src/latch_cli/services/init/*"] [project] name = "latch" -version = "2.54.0.a3" +version = "2.54.0.a8" description = "The Latch SDK" authors = [{ name = "Kenny Workman", email = "kenny@latch.bio" }] maintainers = [ @@ -34,13 +34,13 @@ dependencies = [ "scp>=0.14.0", "boto3>=1.26.0", "tqdm>=4.63.0", - "lytekit==0.15.13", + "lytekit==0.15.14", "lytekitplugins-pods==0.6.2", "typing-extensions>=4.12.0", "apscheduler>=3.10.0", - "gql==3.4.0", + "gql>=3.5.0,<4.0.0", "graphql-core==3.2.3", - "requests-toolbelt==0.10.1", + "requests-toolbelt>=1.0.0,<2.0.0", "python-dateutil>=2.8", "GitPython==3.1.40", @@ -98,11 +98,10 @@ docs = [ ] [tool.ruff] +line-length = 100 target-version = "py39" [tool.ruff.lint] -preview = true - pydocstyle = { convention = "google" } extend-select = [ "F", @@ -171,6 +170,9 @@ ignore = [ "ANN101", "ANN102", + "ANN201", + + "D202", "E402", "E501", diff --git a/src/latch/resources/tasks.py b/src/latch/resources/tasks.py index 06b1323cd..a5d6aed15 100644 --- a/src/latch/resources/tasks.py +++ b/src/latch/resources/tasks.py @@ -296,8 +296,7 @@ def _get_small_pod() -> Pod: ) }, pod_spec=V1PodSpec( - runtime_class_name="sysbox-runc", - containers=[primary_container], + runtime_class_name="sysbox-runc", containers=[primary_container] ), primary_container_name="primary", ) @@ -517,11 +516,7 @@ class _NGConfig: max_storage_gb_ish = int(max_storage_gib * Units.GiB / Units.GB) -def _custom_task_config( - cpu: int, - memory: int, - storage_gib: int, -) -> Pod: +def _custom_task_config(cpu: int, memory: int, storage_gib: int) -> Pod: target_ng = None for ng in taint_data: if ( @@ -591,10 +586,7 @@ def custom_task( """ if callable(cpu) or callable(memory) or callable(storage_gib): task_config = DynamicTaskConfig( - cpu=cpu, - memory=memory, - storage=storage_gib, - pod_config=_get_small_pod(), + cpu=cpu, memory=memory, storage=storage_gib, pod_config=_get_small_pod() ) return functools.partial(task, task_config=task_config, timeout=timeout) @@ -627,3 +619,27 @@ def nextflow_runtime_task(cpu: int, memory: int, storage_gib: int = 50): ] return functools.partial(task, task_config=task_config) + + +def snakemake_runtime_task(*, cpu: int, memory: int, storage_gib: int = 50): + task_config = _custom_task_config(cpu, memory, storage_gib) + + task_config.pod_spec.automount_service_account_token = True + + assert len(task_config.pod_spec.containers) == 1 + task_config.pod_spec.containers[0].volume_mounts = [ + V1VolumeMount(mount_path="/snakemake-workdir", name="snakemake-workdir") + ] + + task_config.pod_spec.volumes = [ + V1Volume( + name="snakemake-workdir", + persistent_volume_claim=V1PersistentVolumeClaimVolumeSource( + # this value will be injected by flytepropeller + # ayush: this is also used by snakemake bc why not + claim_name="nextflow-pvc-placeholder" + ), + ) + ] + + return functools.partial(task, task_config=task_config) diff --git a/src/latch/types/glob.py b/src/latch/types/glob.py index 1dbee9488..0f365ff95 100644 --- a/src/latch/types/glob.py +++ b/src/latch/types/glob.py @@ -20,7 +20,7 @@ def file_glob( represented by the `remote_directory`. Args: - pattern: A glob pattern to match a set of files, eg. '\*.py'. Will + pattern: A glob pattern to match a set of files, eg. '*.py'. Will resolve paths with respect to the working directory of the caller. remote_directory: A valid latch URL pointing to a directory, eg. latch:///foo. This _must_ be a directory and not a file. diff --git a/src/latch/types/metadata.py b/src/latch/types/metadata.py deleted file mode 100644 index 32026ddb3..000000000 --- a/src/latch/types/metadata.py +++ /dev/null @@ -1,891 +0,0 @@ -import csv -import functools -import re -from dataclasses import Field, asdict, dataclass, field, fields, is_dataclass -from enum import Enum -from pathlib import Path -from textwrap import dedent, indent -from typing import ( - Any, - Callable, - ClassVar, - Collection, - Dict, - Generic, - List, - Literal, - Optional, - Protocol, - Tuple, - Type, - TypeVar, - Union, - get_args, - get_origin, -) - -import click -import yaml -from typing_extensions import TypeAlias - -from latch_cli.snakemake.config.utils import validate_snakemake_type -from latch_cli.utils import identifier_suffix_from_str - -from .directory import LatchDir, LatchOutputDir -from .file import LatchFile - - -@dataclass -class LatchRule: - """Class describing a rule that a parameter input must follow""" - - regex: str - """A string regular expression which inputs must match""" - message: str - """The message to render when an input does not match the regex""" - - @property - def dict(self): - return asdict(self) - - def __post_init__(self): - try: - re.compile(self.regex) - except re.error as e: - raise ValueError(f"Malformed regex {self.regex}: {e.msg}") - - -class LatchAppearanceEnum(Enum): - line = "line" - paragraph = "paragraph" - - -@dataclass(frozen=True) -class MultiselectOption: - name: str - value: object - - -@dataclass(frozen=True) -class Multiselect: - options: List[MultiselectOption] = field(default_factory=list) - allow_custom: bool = False - - -# backwards compatibility -LatchAppearanceType = LatchAppearanceEnum - -LatchAppearance: TypeAlias = Union[LatchAppearanceEnum, Multiselect] - - -@dataclass -class LatchAuthor: - """Class describing metadata about the workflow author""" - - name: Optional[str] = None - """The name of the author""" - email: Optional[str] = None - """The email of the author""" - github: Optional[str] = None - """A link to the github profile of the author""" - - -@dataclass(frozen=True) -class FlowBase: - """Parent class for all flow elements - - Available flow elements: - - * :class:`~latch.types.metadata.Params` - - * :class:`~latch.types.metadata.Text` - - * :class:`~latch.types.metadata.Title` - - * :class:`~latch.types.metadata.Section` - - * :class:`~latch.types.metadata.Spoiler` - - * :class:`~latch.types.metadata.Fork` - """ - - ... - - -@dataclass(frozen=True, init=False) -class Section(FlowBase): - """Flow element that displays a child flow in a card with a given title - - Example: - - - .. image:: ../assets/flow-example/flow_example_1.png - :alt: Example of a user interface for a workflow with a custom flow - - .. image:: ../assets/flow-example/flow_example_spoiler.png - :alt: Example of a spoiler flow element - - - The `LatchMetadata` for the example above can be defined as follows: - - .. code-block:: python - - from latch.types import LatchMetadata, LatchParameter - from latch.types.metadata import FlowBase, Section, Text, Params, Fork, Spoiler - from latch import workflow - - flow = [ - Section( - "Samples", - Text( - "Sample provided has to include an identifier for the sample (Sample name)" - " and one or two files corresponding to the reads (single-end or paired-end, respectively)" - ), - Fork( - "sample_fork", - "Choose read type", - paired_end=ForkBranch("Paired-end", Params("paired_end")), - single_end=ForkBranch("Single-end", Params("single_end")), - ), - ), - Section( - "Quality threshold", - Text( - "Select the quality value in which a base is qualified." - "Quality value refers to a Phred quality score" - ), - Params("quality_threshold"), - ), - Spoiler( - "Output directory", - Text("Name of the output directory to send results to."), - Params("output_directory"), - ), - ] - - metadata = LatchMetadata( - display_name="fastp - Flow Tutorial", - author=LatchAuthor( - name="LatchBio", - ), - parameters={ - "sample_fork": LatchParameter(), - "paired_end": LatchParameter( - display_name="Paired-end reads", - description="FASTQ files", - batch_table_column=True, - ), - "single_end": LatchParameter( - display_name="Single-end reads", - description="FASTQ files", - batch_table_column=True, - ), - "output_directory": LatchParameter( - display_name="Output directory", - ), - }, - flow=flow, - ) - - @workflow(metadata) - def fastp( - sample_fork: str, - paired_end: PairedEnd, - single_end: Optional[SingleEnd] = None, - output_directory: str = "fastp_results", - ) -> LatchDir: - ... - """ - - section: str - """Title of the section""" - flow: List[FlowBase] - """Flow displayed in the section card""" - - def __init__(self, section: str, *flow: FlowBase): - object.__setattr__(self, "section", section) - object.__setattr__(self, "flow", list(flow)) - - -@dataclass(frozen=True) -class Text(FlowBase): - """Flow element that displays a markdown string""" - - text: str - """Markdown body text""" - - -@dataclass(frozen=True) -class Title(FlowBase): - """Flow element that displays a markdown title""" - - title: str - """Markdown title text""" - - -@dataclass(frozen=True, init=False) -class Params(FlowBase): - """Flow element that displays parameter widgets""" - - params: List[str] - """ - Names of parameters whose widgets will be displayed. - Order is preserved. Duplicates are allowed - """ - - def __init__(self, *args: str): - object.__setattr__(self, "params", list(args)) - - -@dataclass(frozen=True, init=False) -class Spoiler(FlowBase): - """Flow element that displays a collapsible card with a given title""" - - spoiler: str - """Title of the spoiler""" - flow: List[FlowBase] - """Flow displayed in the spoiler card""" - - def __init__(self, spoiler: str, *flow: FlowBase): - object.__setattr__(self, "spoiler", spoiler) - object.__setattr__(self, "flow", list(flow)) - - -@dataclass(frozen=True, init=False) -class ForkBranch: - """Definition of a :class:`~latch.types.metadata.Fork` branch""" - - display_name: str - """String displayed in the fork's multibutton""" - flow: List[FlowBase] - """Child flow displayed in the fork card when the branch is active""" - - def __init__(self, display_name: str, *flow: FlowBase): - object.__setattr__(self, "display_name", display_name) - object.__setattr__(self, "flow", list(flow)) - - -@dataclass(frozen=True, init=False) -class Fork(FlowBase): - """Flow element that displays a set of mutually exclusive alternatives - - Displays a title, followed by a horizontal multibutton for selecting a branch, - then a card for the active branch - """ - - fork: str - """Name of a `str`-typed parameter to store the active branch's key""" - display_name: str - """Title shown above the fork selector""" - flows: Dict[str, ForkBranch] - """ - Mapping between branch keys to branch definitions. - Order determines the order of options in the multibutton - """ - - def __init__(self, fork: str, display_name: str, **flows: ForkBranch): - object.__setattr__(self, "fork", fork) - object.__setattr__(self, "display_name", display_name) - object.__setattr__(self, "flows", flows) - - -@dataclass -class LatchParameter: - """Class for organizing parameter metadata""" - - display_name: Optional[str] = None - """The name used to display the parameter on Latch Console""" - description: Optional[str] = None - """The description of the parameter's role in the workflow""" - hidden: bool = False - """Whether or not the parameter should be hidden by default""" - section_title: Optional[str] = None - """Whether this parameter should start a new section""" - placeholder: Optional[str] = None - """ - What should be rendered as a placeholder in the input box - of the parameter before any value is inputed. - """ - comment: Optional[str] = None - """Any comment on the parameter itself""" - output: bool = False - """ - Whether or not this parameter is an output (used to disable - path validation before launching a workflow) - """ - batch_table_column: bool = False - """ - Whether this parameter should be given a column in the batch - table at the top of the workflow inputs - """ - allow_dir: bool = True - """ - Whether or not this parameter should accept directories in UI - """ - allow_file: bool = True - """ - Whether or not this parameter should accept files in UI. - """ - appearance_type: LatchAppearance = LatchAppearanceEnum.line - """ - Whether the parameter should be rendered as a line or paragraph - (must be exactly one of either LatchAppearanceType.line or - LatchAppearanceType.paragraph) - """ - rules: List[LatchRule] = field(default_factory=list) - """ - A list of LatchRule objects that inputs to this parameter must follow - """ - detail: Optional[str] = None - samplesheet: Optional[bool] = None - """ - Use samplesheet input UI. Allows importing from Latch Registry. - Parameter type must be a list of dataclasses - """ - allowed_tables: Optional[List[int]] = None - """ - If using the samplesheet component, specify a set of Registry Tables (by ID) to allow selection from. - If not provided, all Tables are allowed. - - Only has an effect if `samplesheet=True`. - """ - _custom_ingestion: Optional[str] = None - - def __str__(self): - metadata_yaml = yaml.safe_dump(self.dict, sort_keys=False) - if self.description is not None: - return f"{self.description}\n{metadata_yaml}" - return metadata_yaml - - @property - def dict(self): - parameter_dict: Dict[str, Any] = {"display_name": self.display_name} - - if self.output: - parameter_dict["output"] = True - if self.batch_table_column: - parameter_dict["batch_table_column"] = True - if self.samplesheet: - parameter_dict["samplesheet"] = True - if self.allowed_tables is not None: - parameter_dict["allowed_tables"] = [str(x) for x in self.allowed_tables] - - temp_dict: Dict[str, Any] = {"hidden": self.hidden} - if self.section_title is not None: - temp_dict["section_title"] = self.section_title - if self._custom_ingestion is not None: - temp_dict["custom_ingestion"] = self._custom_ingestion - - parameter_dict["_tmp"] = temp_dict - - appearance_dict: Dict[str, Any] - if isinstance(self.appearance_type, LatchAppearanceEnum): - appearance_dict = {"type": self.appearance_type.value} - elif isinstance(self.appearance_type, Multiselect): - appearance_dict = {"multiselect": asdict(self.appearance_type)} - else: - appearance_dict = {} - - if self.placeholder is not None: - appearance_dict["placeholder"] = self.placeholder - if self.comment is not None: - appearance_dict["comment"] = self.comment - if self.detail is not None: - appearance_dict["detail"] = self.detail - - appearance_dict["file_type"] = ( - "ANY" - if self.allow_file and self.allow_dir - else "FILE" if self.allow_file else "DIR" if self.allow_dir else "NONE" - ) - - parameter_dict["appearance"] = appearance_dict - - if len(self.rules) > 0: - rules = [] - for rule in self.rules: - rules.append(rule.dict) - parameter_dict["rules"] = rules - - return {"__metadata__": parameter_dict} - - -# https://stackoverflow.com/questions/54668000/type-hint-for-an-instance-of-a-non-specific-dataclass -class _IsDataclass(Protocol): - __dataclass_fields__: ClassVar[Dict[str, Field]] - - -ParameterType: TypeAlias = Union[ - None, - int, - float, - str, - bool, - LatchFile, - LatchDir, - Enum, - _IsDataclass, - Collection["ParameterType"], -] - - -T = TypeVar("T", bound=ParameterType) - - -@dataclass -class SnakemakeParameter(Generic[T], LatchParameter): - type: Optional[Type[T]] = None - """ - The python type of the parameter. - """ - default: Optional[T] = None - - -@dataclass -class SnakemakeFileParameter(SnakemakeParameter[Union[LatchFile, LatchDir]]): - """ - Deprecated: use `file_metadata` keyword in `SnakemakeMetadata` instead - """ - - type: Optional[ - Union[ - Type[LatchFile], - Type[LatchDir], - ] - ] = None - """ - The python type of the parameter. - """ - path: Optional[Path] = None - """ - The path where the file passed to this parameter will be copied. - """ - config: bool = False - """ - Whether or not the file path is exposed in the Snakemake config - """ - download: bool = False - """ - Whether or not the file is downloaded in the JIT step - """ - - -@dataclass -class SnakemakeFileMetadata: - path: Path - """ - The local path where the file passed to this parameter will be copied - """ - config: bool = False - """ - If `True`, expose the file in the Snakemake config - """ - download: bool = False - """ - If `True`, download the file in the JIT step - """ - - -@dataclass -class NextflowParameter(Generic[T], LatchParameter): - type: Optional[Type[T]] = None - """ - The python type of the parameter. - """ - default: Optional[T] = None - """ - Default value of the parameter - """ - - samplesheet_type: Literal["csv", "tsv", None] = None - """ - The type of samplesheet to construct from the input parameter. - - Only used if the provided parameter is a samplesheet (samplesheet=True) - """ - samplesheet_constructor: Optional[Callable[[T], Path]] = None - """ - A custom samplesheet constructor. - - Should return the path of the constructed samplesheet. If samplesheet_type is also specified, this takes precedence. - Only used if the provided parameter is a samplesheet (samplesheet=True) - """ - results_paths: Optional[List[Path]] = None - """ - Output sub-paths that will be exposed in the UI under the "Results" tab on the workflow execution page. - - Only valid where the `type` attribute is a LatchDir - """ - - def __post_init__(self): - if self.results_paths is not None and self.type not in { - LatchDir, - LatchOutputDir, - }: - click.secho( - "`results_paths` attribute can only be defined for parameters" - " of type `LatchDir`.", - fg="red", - ) - raise click.exceptions.Exit(1) - - if not self.samplesheet or self.samplesheet_constructor is not None: - return - - t = self.type - if get_origin(t) is not list or not is_dataclass(get_args(t)[0]): - click.secho("Samplesheets must be a list of dataclasses.", fg="red") - raise click.exceptions.Exit(1) - - if self.samplesheet_type is not None: - delim = "," if self.samplesheet_type == "csv" else "\t" - self.samplesheet_constructor = functools.partial( - _samplesheet_constructor, t=get_args(self.type)[0], delim=delim - ) - return - - click.secho( - dedent("""\ - A Samplesheet constructor is required for a samplesheet parameter. Please either provide a value for - `samplesheet_type` or provide a custom callable to the `samplesheet_constructor` argument. - """), - fg="red", - ) - raise click.exceptions.Exit(1) - - -DC = TypeVar("DC", bound=_IsDataclass) - - -def _samplesheet_repr(v: Any) -> str: - if v is None: - return "" - if isinstance(v, LatchFile) or isinstance(v, LatchDir): - return v.remote_path - if isinstance(v, Enum): - return getattr(v, "value") - - return str(v) - - -def _samplesheet_constructor(samples: List[DC], t: DC, delim: str = ",") -> Path: - samplesheet = Path("samplesheet.csv") - - with open(samplesheet, "w") as f: - writer = csv.DictWriter(f, [f.name for f in fields(t)], delimiter=delim) - writer.writeheader() - - for sample in samples: - row_data = { - f.name: _samplesheet_repr(getattr(sample, f.name)) - for f in fields(sample) - } - writer.writerow(row_data) - - return samplesheet - - -@dataclass(frozen=True) -class NextflowRuntimeResources: - """Resources for Nextflow runtime tasks""" - - cpus: Optional[int] = 4 - """ - Number of CPUs required for the task - """ - memory: Optional[int] = 8 - """ - Memory required for the task in GiB - """ - storage_gib: Optional[int] = 100 - """ - Storage required for the task in GiB - """ - storage_expiration_hours: int = 0 - """ - Number of hours after execution failure that workdir should be retained in EFS. - Warning: Increasing this number will increase your Nextflow EFS Storage costs. - """ - - -@dataclass -class LatchMetadata: - """Class for organizing workflow metadata - - Example: - - .. code-block:: python - - from latch.types import LatchMetadata, LatchAuthor, LatchRule, LatchAppearanceType - - metadata = LatchMetadata( - parameters={ - "read1": LatchParameter( - display_name="Read 1", - description="Paired-end read 1 file to be assembled.", - hidden=True, - section_title="Sample Reads", - placeholder="Select a file", - comment="This is a comment", - output=False, - appearance_type=LatchAppearanceType.paragraph, - rules=[ - LatchRule( - regex="(.fasta|.fa|.faa|.fas)$", - message="Only .fasta, .fa, .fas, or .faa extensions are valid" - ) - ], - batch_table_column=True, # Show this parameter in batched mode. - # The below parameters will be displayed on the side bar of the workflow - documentation="https://github.com/author/my_workflow/README.md", - author=LatchAuthor( - name="Workflow Author", - email="licensing@company.com", - github="https://github.com/author", - ), - repository="https://github.com/author/my_workflow", - license="MIT", - # If the workflow is public, display it under the defined categories on Latch to be more easily discovered by users - tags=["NGS", "MAG"], - ), - ) - - @workflow(metadata) - def wf(read1: LatchFile): - ... - - """ - - display_name: str - """The human-readable name of the workflow""" - author: LatchAuthor - """ A `LatchAuthor` object that describes the author of the workflow""" - documentation: Optional[str] = None - """A link to documentation for the workflow itself""" - repository: Optional[str] = None - """A link to the repository where the code for the workflow is hosted""" - license: str = "MIT" - """A SPDX identifier""" - parameters: Dict[str, LatchParameter] = field(default_factory=dict) - """A dictionary mapping parameter names (strings) to `LatchParameter` objects""" - wiki_url: Optional[str] = None - video_tutorial: Optional[str] = None - tags: List[str] = field(default_factory=list) - flow: List[FlowBase] = field(default_factory=list) - - no_standard_bulk_execution: bool = False - """ - Disable the standard CSV-based bulk execution. Intended for workflows that - support an alternative way of processing bulk data e.g. using a samplesheet - parameter - """ - _non_standard: Dict[str, object] = field(default_factory=dict) - - @property - def dict(self): - metadata_dict = asdict(self) - # remove parameters since that will be handled by each parameters' dict() method - del metadata_dict["parameters"] - metadata_dict["license"] = {"id": self.license} - - # flows override all other rendering, so disable them entirely if not provided - if len(self.flow) == 0: - del metadata_dict["flow"] - - for key in self._non_standard: - metadata_dict[key] = self._non_standard[key] - - return {"__metadata__": metadata_dict} - - def __str__(self): - def _parameter_str(t: Tuple[str, LatchParameter]): - parameter_name, parameter_meta = t - return f"{parameter_name}:\n" + indent( - str(parameter_meta), " ", lambda _: True - ) - - metadata_yaml = yaml.safe_dump(self.dict, sort_keys=False) - parameter_yaml = "".join(map(_parameter_str, self.parameters.items())) - return ( - metadata_yaml + "Args:\n" + indent(parameter_yaml, " ", lambda _: True) - ).strip("\n ") - - -@dataclass -class DockerMetadata: - """Class describing credentials for private docker repositories""" - - username: str - """ - The account username for the private repository - """ - secret_name: str - """ - The name of the Latch Secret that contains the password for the private repository - """ - - -@dataclass -class EnvironmentConfig: - """Class describing environment for spawning Snakemake tasks""" - - use_conda: bool = False - """ - Use Snakemake `conda` directive to spawn tasks in conda environments - """ - use_container: bool = False - """ - Use Snakemake `container` directive to spawn tasks in Docker containers - """ - container_args: List[str] = field(default_factory=list) - """ - Additional arguments to use when running Docker containers - """ - - -FileMetadata: TypeAlias = Dict[str, Union[SnakemakeFileMetadata, "FileMetadata"]] - - -@dataclass -class SnakemakeMetadata(LatchMetadata): - """Class for organizing Snakemake workflow metadata""" - - output_dir: Optional[LatchDir] = None - """ - Directory for snakemake workflow outputs - """ - name: Optional[str] = None - """ - Name of the workflow - """ - docker_metadata: Optional[DockerMetadata] = None - """ - Credentials configuration for private docker repositories - """ - env_config: EnvironmentConfig = field(default_factory=EnvironmentConfig) - """ - Environment configuration for spawning Snakemake tasks - """ - parameters: Dict[str, SnakemakeParameter] = field(default_factory=dict) - """ - A dictionary mapping parameter names (strings) to `SnakemakeParameter` objects - """ - file_metadata: FileMetadata = field(default_factory=dict) - """ - A dictionary mapping parameter names to `SnakemakeFileMetadata` objects - """ - cores: int = 4 - """ - Number of cores to use for Snakemake tasks (equivalent of Snakemake's `--cores` flag) - """ - about_page_content: Optional[Path] = None - """ - Path to a markdown file containing information about the pipeline - rendered in the About page. - """ - - def validate(self): - if self.about_page_content is not None: - if not isinstance(self.about_page_content, Path): - click.secho( - f"`about_page_content` parameter ({self.about_page_content}) must" - " be a Path object.", - fg="red", - ) - raise click.exceptions.Exit(1) - - for name, param in self.parameters.items(): - if param.default is None: - continue - try: - validate_snakemake_type(name, param.type, param.default) - except ValueError as e: - click.secho(e, fg="red") - raise click.exceptions.Exit(1) - - def __post_init__(self): - self.validate() - - if self.name is None: - self.name = ( - f"snakemake_{identifier_suffix_from_str(self.display_name.lower())}" - ) - - global _snakemake_metadata - _snakemake_metadata = self - - @property - def dict(self): - d = super().dict - # ayush: Paths aren't JSON serializable but ribosome doesn't need it anyway so we can just delete it - del d["__metadata__"]["about_page_content"] - return d - - -_snakemake_metadata: Optional[SnakemakeMetadata] = None - - -@dataclass -class NextflowMetadata(LatchMetadata): - name: Optional[str] = None - """ - Name of the workflow - """ - parameters: Dict[str, NextflowParameter] = field(default_factory=dict) - """ - A dictionary mapping parameter names (strings) to `NextflowParameter` objects - """ - runtime_resources: NextflowRuntimeResources = field( - default_factory=NextflowRuntimeResources - ) - """ - Resources (cpu/memory/storage) for Nextflow runtime task - """ - execution_profiles: List[str] = field(default_factory=list) - """ - Execution config profiles to expose to users in the Latch console - """ - log_dir: Optional[LatchDir] = None - """ - Directory to dump Nextflow logs - """ - upload_command_logs: bool = False - """ - Upload .command.* logs to Latch Data after each task execution - """ - about_page_path: Optional[Path] = None - """ - Path to a markdown file containing information about the pipeline - rendered in the About page. - """ - - def validate(self): - if self.about_page_path is not None: - if not isinstance(self.about_page_path, Path): - click.secho( - f"`about_page_path` parameter ({self.about_page_path}) must be a" - " Path object.", - fg="red", - ) - - @property - def dict(self): - d = super().dict - del d["__metadata__"]["about_page_path"] - return d - - def __post_init__(self): - self.validate() - - if self.name is None: - if self.display_name is None: - click.secho( - "Name or display_name must be provided in metadata", fg="red" - ) - self.name = f"nf_{identifier_suffix_from_str(self.display_name.lower())}" - else: - self.name = identifier_suffix_from_str(self.name) - - global _nextflow_metadata - _nextflow_metadata = self - - -_nextflow_metadata: Optional[NextflowMetadata] = None diff --git a/src/latch/types/metadata/__init__.py b/src/latch/types/metadata/__init__.py new file mode 100644 index 000000000..3dc713798 --- /dev/null +++ b/src/latch/types/metadata/__init__.py @@ -0,0 +1,66 @@ +# for backwards compatibility so as not to break existing imports + +from .flows import FlowBase, Fork, ForkBranch, Params, Section, Spoiler, Text, Title +from .latch import ( + LatchAppearance, + LatchAppearanceEnum, + LatchAppearanceType, + LatchAuthor, + LatchMetadata, + LatchParameter, + LatchRule, + Multiselect, + MultiselectOption, +) +from .nextflow import ( + NextflowMetadata, + NextflowParameter, + NextflowRuntimeResources, + _nextflow_metadata, + _samplesheet_constructor, + _samplesheet_repr, +) +from .snakemake import ( + DockerMetadata, + EnvironmentConfig, + FileMetadata, + SnakemakeFileMetadata, + SnakemakeFileParameter, + SnakemakeMetadata, + SnakemakeParameter, + _snakemake_metadata, +) + +__all__ = [ + "FlowBase", + "Fork", + "ForkBranch", + "Params", + "Section", + "Spoiler", + "Text", + "Title", + "LatchAppearance", + "LatchAppearanceEnum", + "LatchAppearanceType", + "LatchAuthor", + "LatchMetadata", + "LatchParameter", + "LatchRule", + "Multiselect", + "MultiselectOption", + "NextflowMetadata", + "NextflowParameter", + "NextflowRuntimeResources", + "_nextflow_metadata", + "_samplesheet_constructor", + "_samplesheet_repr", + "DockerMetadata", + "EnvironmentConfig", + "FileMetadata", + "SnakemakeFileMetadata", + "SnakemakeFileParameter", + "SnakemakeMetadata", + "SnakemakeParameter", + "_snakemake_metadata", +] diff --git a/src/latch/types/metadata/flows.py b/src/latch/types/metadata/flows.py new file mode 100644 index 000000000..3d0a21d8e --- /dev/null +++ b/src/latch/types/metadata/flows.py @@ -0,0 +1,197 @@ +from dataclasses import dataclass + + +@dataclass(frozen=True) +class FlowBase: + """Parent class for all flow elements + + Available flow elements: + + * :class:`~latch.types.metadata.Params` + + * :class:`~latch.types.metadata.Text` + + * :class:`~latch.types.metadata.Title` + + * :class:`~latch.types.metadata.Section` + + * :class:`~latch.types.metadata.Spoiler` + + * :class:`~latch.types.metadata.Fork` + """ + + +@dataclass(frozen=True, init=False) +class Section(FlowBase): + """Flow element that displays a child flow in a card with a given title + + Example: + + .. image:: ../assets/flow-example/flow_example_1.png + :alt: Example of a user interface for a workflow with a custom flow + + .. image:: ../assets/flow-example/flow_example_spoiler.png + :alt: Example of a spoiler flow element + + + The `LatchMetadata` for the example above can be defined as follows: + + .. code-block:: python + + from latch.types import LatchMetadata, LatchParameter + from latch.types.metadata import FlowBase, Section, Text, Params, Fork, Spoiler + from latch import workflow + + flow = [ + Section( + "Samples", + Text( + "Sample provided has to include an identifier for the sample (Sample name)" + " and one or two files corresponding to the reads (single-end or paired-end, respectively)" + ), + Fork( + "sample_fork", + "Choose read type", + paired_end=ForkBranch("Paired-end", Params("paired_end")), + single_end=ForkBranch("Single-end", Params("single_end")), + ), + ), + Section( + "Quality threshold", + Text( + "Select the quality value in which a base is qualified." + "Quality value refers to a Phred quality score" + ), + Params("quality_threshold"), + ), + Spoiler( + "Output directory", + Text("Name of the output directory to send results to."), + Params("output_directory"), + ), + ] + + metadata = LatchMetadata( + display_name="fastp - Flow Tutorial", + author=LatchAuthor( + name="LatchBio", + ), + parameters={ + "sample_fork": LatchParameter(), + "paired_end": LatchParameter( + display_name="Paired-end reads", + description="FASTQ files", + batch_table_column=True, + ), + "single_end": LatchParameter( + display_name="Single-end reads", + description="FASTQ files", + batch_table_column=True, + ), + "output_directory": LatchParameter( + display_name="Output directory", + ), + }, + flow=flow, + ) + + @workflow(metadata) + def fastp( + sample_fork: str, + paired_end: PairedEnd, + single_end: Optional[SingleEnd] = None, + output_directory: str = "fastp_results", + ) -> LatchDir: + ... + """ + + section: str + """Title of the section""" + flow: list[FlowBase] + """Flow displayed in the section card""" + + def __init__(self, section: str, *flow: FlowBase): + object.__setattr__(self, "section", section) + object.__setattr__(self, "flow", list(flow)) + + +@dataclass(frozen=True) +class Text(FlowBase): + """Flow element that displays a markdown string""" + + text: str + """Markdown body text""" + + +@dataclass(frozen=True) +class Title(FlowBase): + """Flow element that displays a markdown title""" + + title: str + """Markdown title text""" + + +@dataclass(frozen=True, init=False) +class Params(FlowBase): + """Flow element that displays parameter widgets""" + + params: list[str] + """ + Names of parameters whose widgets will be displayed. + Order is preserved. Duplicates are allowed + """ + + def __init__(self, *args: str): + object.__setattr__(self, "params", list(args)) + + +@dataclass(frozen=True, init=False) +class Spoiler(FlowBase): + """Flow element that displays a collapsible card with a given title""" + + spoiler: str + """Title of the spoiler""" + flow: list[FlowBase] + """Flow displayed in the spoiler card""" + + def __init__(self, spoiler: str, *flow: FlowBase): + object.__setattr__(self, "spoiler", spoiler) + object.__setattr__(self, "flow", list(flow)) + + +@dataclass(frozen=True, init=False) +class ForkBranch: + """Definition of a :class:`~latch.types.metadata.Fork` branch""" + + display_name: str + """String displayed in the fork's multibutton""" + flow: list[FlowBase] + """Child flow displayed in the fork card when the branch is active""" + + def __init__(self, display_name: str, *flow: FlowBase): + object.__setattr__(self, "display_name", display_name) + object.__setattr__(self, "flow", list(flow)) + + +@dataclass(frozen=True, init=False) +class Fork(FlowBase): + """Flow element that displays a set of mutually exclusive alternatives + + Displays a title, followed by a horizontal multibutton for selecting a branch, + then a card for the active branch + """ + + fork: str + """Name of a `str`-typed parameter to store the active branch's key""" + display_name: str + """Title shown above the fork selector""" + flows: dict[str, ForkBranch] + """ + Mapping between branch keys to branch definitions. + Order determines the order of options in the multibutton + """ + + def __init__(self, fork: str, display_name: str, **flows: ForkBranch): + object.__setattr__(self, "fork", fork) + object.__setattr__(self, "display_name", display_name) + object.__setattr__(self, "flows", flows) diff --git a/src/latch/types/metadata/latch.py b/src/latch/types/metadata/latch.py new file mode 100644 index 000000000..b4c45f064 --- /dev/null +++ b/src/latch/types/metadata/latch.py @@ -0,0 +1,293 @@ +from __future__ import annotations + +import re +from dataclasses import asdict, dataclass, field +from enum import Enum +from textwrap import indent +from typing import TYPE_CHECKING, Any, Union + +import yaml +from typing_extensions import TypeAlias + +if TYPE_CHECKING: + from .flows import FlowBase + + +@dataclass +class LatchRule: + """Class describing a rule that a parameter input must follow""" + + regex: str + """A string regular expression which inputs must match""" + message: str + """The message to render when an input does not match the regex""" + + @property + def dict(self): + return asdict(self) + + def __post_init__(self): + try: + re.compile(self.regex) + except re.error as e: + raise ValueError(f"Malformed regex {self.regex}: {e.msg}") from e + + +class LatchAppearanceEnum(Enum): + line = "line" + paragraph = "paragraph" + + +@dataclass(frozen=True) +class MultiselectOption: + name: str + value: object + + +@dataclass(frozen=True) +class Multiselect: + options: list[MultiselectOption] = field(default_factory=list) + allow_custom: bool = False + + +# backwards compatibility +LatchAppearanceType = LatchAppearanceEnum + +LatchAppearance: TypeAlias = Union[LatchAppearanceEnum, Multiselect] + + +@dataclass +class LatchAuthor: + """Class describing metadata about the workflow author""" + + name: str | None = None + """The name of the author""" + email: str | None = None + """The email of the author""" + github: str | None = None + """A link to the github profile of the author""" + + +@dataclass +class LatchParameter: + """Class for organizing parameter metadata""" + + display_name: str | None = None + """The name used to display the parameter on Latch Console""" + description: str | None = None + """The description of the parameter's role in the workflow""" + hidden: bool = False + """Whether or not the parameter should be hidden by default""" + section_title: str | None = None + """Whether this parameter should start a new section""" + placeholder: str | None = None + """ + What should be rendered as a placeholder in the input box + of the parameter before any value is inputed. + """ + comment: str | None = None + """Any comment on the parameter itself""" + output: bool = False + """ + Whether or not this parameter is an output (used to disable + path validation before launching a workflow) + """ + batch_table_column: bool = False + """ + Whether this parameter should be given a column in the batch + table at the top of the workflow inputs + """ + allow_dir: bool = True + """ + Whether or not this parameter should accept directories in UI + """ + allow_file: bool = True + """ + Whether or not this parameter should accept files in UI. + """ + appearance_type: LatchAppearance = LatchAppearanceEnum.line + """ + Whether the parameter should be rendered as a line or paragraph + (must be exactly one of either LatchAppearanceType.line or + LatchAppearanceType.paragraph) + """ + rules: list[LatchRule] = field(default_factory=list) + """ + A list of LatchRule objects that inputs to this parameter must follow + """ + detail: str | None = None + samplesheet: bool | None = None + """ + Use samplesheet input UI. Allows importing from Latch Registry. + Parameter type must be a list of dataclasses + """ + allowed_tables: list[int] | None = None + """ + If using the samplesheet component, specify a set of Registry Tables (by ID) to allow selection from. + If not provided, all Tables are allowed. + + Only has an effect if `samplesheet=True`. + """ + _custom_ingestion: str | None = None + + def __str__(self): + metadata_yaml = yaml.safe_dump(self.dict, sort_keys=False) + if self.description is not None: + return f"{self.description}\n{metadata_yaml}" + return metadata_yaml + + @property + def dict(self): + parameter_dict: dict[str, Any] = {"display_name": self.display_name} + + if self.output: + parameter_dict["output"] = True + if self.batch_table_column: + parameter_dict["batch_table_column"] = True + if self.samplesheet: + parameter_dict["samplesheet"] = True + if self.allowed_tables is not None: + parameter_dict["allowed_tables"] = [str(x) for x in self.allowed_tables] + + temp_dict: dict[str, Any] = {"hidden": self.hidden} + if self.section_title is not None: + temp_dict["section_title"] = self.section_title + if self._custom_ingestion is not None: + temp_dict["custom_ingestion"] = self._custom_ingestion + + parameter_dict["_tmp"] = temp_dict + + appearance_dict: dict[str, Any] + if isinstance(self.appearance_type, LatchAppearanceEnum): + appearance_dict = {"type": self.appearance_type.value} + elif isinstance(self.appearance_type, Multiselect): + appearance_dict = {"multiselect": asdict(self.appearance_type)} + else: + appearance_dict = {} + + if self.placeholder is not None: + appearance_dict["placeholder"] = self.placeholder + if self.comment is not None: + appearance_dict["comment"] = self.comment + if self.detail is not None: + appearance_dict["detail"] = self.detail + + appearance_dict["file_type"] = ( + "ANY" + if self.allow_file and self.allow_dir + else "FILE" + if self.allow_file + else "DIR" + if self.allow_dir + else "NONE" + ) + + parameter_dict["appearance"] = appearance_dict + + if len(self.rules) > 0: + parameter_dict["rules"] = [rule.dict for rule in self.rules] + + return {"__metadata__": parameter_dict} + + +@dataclass +class LatchMetadata: + """Class for organizing workflow metadata + + Example: + + .. code-block:: python + + from latch.types import LatchMetadata, LatchAuthor, LatchRule, LatchAppearanceType + + metadata = LatchMetadata( + parameters={ + "read1": LatchParameter( + display_name="Read 1", + description="Paired-end read 1 file to be assembled.", + hidden=True, + section_title="Sample Reads", + placeholder="Select a file", + comment="This is a comment", + output=False, + appearance_type=LatchAppearanceType.paragraph, + rules=[ + LatchRule( + regex="(.fasta|.fa|.faa|.fas)$", + message="Only .fasta, .fa, .fas, or .faa extensions are valid" + ) + ], + batch_table_column=True, # Show this parameter in batched mode. + # The below parameters will be displayed on the side bar of the workflow + documentation="https://github.com/author/my_workflow/README.md", + author=LatchAuthor( + name="Workflow Author", + email="licensing@company.com", + github="https://github.com/author", + ), + repository="https://github.com/author/my_workflow", + license="MIT", + # If the workflow is public, display it under the defined categories on Latch to be more easily discovered by users + tags=["NGS", "MAG"], + ), + ) + + @workflow(metadata) + def wf(read1: LatchFile): + ... + + """ + + display_name: str + """The human-readable name of the workflow""" + author: LatchAuthor + """ A `LatchAuthor` object that describes the author of the workflow""" + documentation: str | None = None + """A link to documentation for the workflow itself""" + repository: str | None = None + """A link to the repository where the code for the workflow is hosted""" + license: str = "MIT" + """A SPDX identifier""" + parameters: dict[str, LatchParameter] = field(default_factory=dict) + """A dictionary mapping parameter names (strings) to `LatchParameter` objects""" + wiki_url: str | None = None + video_tutorial: str | None = None + tags: list[str] = field(default_factory=list) + flow: list[FlowBase] = field(default_factory=list) + + no_standard_bulk_execution: bool = False + """ + Disable the standard CSV-based bulk execution. Intended for workflows that + support an alternative way of processing bulk data e.g. using a samplesheet + parameter + """ + _non_standard: dict[str, object] = field(default_factory=dict) + + @property + def dict(self): + metadata_dict = asdict(self) + # remove parameters since that will be handled by each parameters' dict() method + del metadata_dict["parameters"] + metadata_dict["license"] = {"id": self.license} + + # flows override all other rendering, so disable them entirely if not provided + if len(self.flow) == 0: + del metadata_dict["flow"] + + for key in self._non_standard: + metadata_dict[key] = self._non_standard[key] + + return {"__metadata__": metadata_dict} + + def __str__(self): + def _parameter_str(t: tuple[str, LatchParameter]): + parameter_name, parameter_meta = t + return f"{parameter_name}:\n" + indent( + str(parameter_meta), " ", lambda _: True + ) + + metadata_yaml = yaml.safe_dump(self.dict, sort_keys=False) + parameter_yaml = "".join(map(_parameter_str, self.parameters.items())) + return ( + metadata_yaml + "Args:\n" + indent(parameter_yaml, " ", lambda _: True) + ).strip("\n ") diff --git a/src/latch/types/metadata/nextflow.py b/src/latch/types/metadata/nextflow.py new file mode 100644 index 000000000..9315d7b16 --- /dev/null +++ b/src/latch/types/metadata/nextflow.py @@ -0,0 +1,205 @@ +from __future__ import annotations + +import csv +import functools +from dataclasses import dataclass, field, fields, is_dataclass +from enum import Enum +from pathlib import Path +from textwrap import dedent +from typing import Any, Callable, Generic, Literal, get_args, get_origin + +import click + +from latch_cli.utils import identifier_suffix_from_str + +from ..directory import LatchDir, LatchOutputDir +from ..file import LatchFile +from .latch import LatchMetadata, LatchParameter +from .utils import DC, P + + +@dataclass +class NextflowParameter(Generic[P], LatchParameter): + type: type[P] | None = None + """ + The python type of the parameter. + """ + default: P | None = None + """ + Default value of the parameter + """ + + samplesheet_type: Literal["csv", "tsv", None] = None + """ + The type of samplesheet to construct from the input parameter. + + Only used if the provided parameter is a samplesheet (samplesheet=True) + """ + samplesheet_constructor: Callable[[P], Path] | None = None + """ + A custom samplesheet constructor. + + Should return the path of the constructed samplesheet. If samplesheet_type is also specified, this takes precedence. + Only used if the provided parameter is a samplesheet (samplesheet=True) + """ + results_paths: list[Path] | None = None + """ + Output sub-paths that will be exposed in the UI under the "Results" tab on the workflow execution page. + + Only valid where the `type` attribute is a LatchDir + """ + + def __post_init__(self): + if self.results_paths is not None and self.type not in { + LatchDir, + LatchOutputDir, + }: + click.secho( + "`results_paths` attribute can only be defined for parameters" + " of type `LatchDir`.", + fg="red", + ) + raise click.exceptions.Exit(1) + + if not self.samplesheet or self.samplesheet_constructor is not None: + return + + t = self.type + if get_origin(t) is not list or not is_dataclass(get_args(t)[0]): + click.secho("Samplesheets must be a list of dataclasses.", fg="red") + raise click.exceptions.Exit(1) + + if self.samplesheet_type is not None: + delim = "," if self.samplesheet_type == "csv" else "\t" + self.samplesheet_constructor = functools.partial( + _samplesheet_constructor, t=get_args(self.type)[0], delim=delim + ) + return + + click.secho( + dedent("""\ + A Samplesheet constructor is required for a samplesheet parameter. Please either provide a value for + `samplesheet_type` or provide a custom callable to the `samplesheet_constructor` argument. + """), + fg="red", + ) + raise click.exceptions.Exit(1) + + +def _samplesheet_repr(v: Any) -> str: + if v is None: + return "" + if isinstance(v, (LatchFile, LatchDir)): + return str(v.remote_path) + if isinstance(v, Enum): + return v.value + + return str(v) + + +def _samplesheet_constructor(samples: list[DC], t: DC, delim: str = ",") -> Path: + samplesheet = Path("samplesheet.csv") + + with samplesheet.open("w") as f: + writer = csv.DictWriter(f, [f.name for f in fields(t)], delimiter=delim) + writer.writeheader() + + for sample in samples: + row_data = { + f.name: _samplesheet_repr(getattr(sample, f.name)) + for f in fields(sample) + } + writer.writerow(row_data) + + return samplesheet + + +@dataclass(frozen=True) +class NextflowRuntimeResources: + """Resources for Nextflow runtime tasks""" + + cpus: int | None = 4 + """ + Number of CPUs required for the task + """ + memory: int | None = 8 + """ + Memory required for the task in GiB + """ + storage_gib: int | None = 100 + """ + Storage required for the task in GiB + """ + storage_expiration_hours: int = 0 + """ + Number of hours after execution failure that workdir should be retained in EFS. + Warning: Increasing this number will increase your Nextflow EFS Storage costs. + """ + + +@dataclass +class NextflowMetadata(LatchMetadata): + name: str | None = None + """ + Name of the workflow + """ + parameters: dict[str, NextflowParameter[Any]] = field(default_factory=dict) + """ + A dictionary mapping parameter names (strings) to `NextflowParameter` objects + """ + runtime_resources: NextflowRuntimeResources = field( + default_factory=NextflowRuntimeResources + ) + """ + Resources (cpu/memory/storage) for Nextflow runtime task + """ + execution_profiles: list[str] = field(default_factory=list) + """ + Execution config profiles to expose to users in the Latch console + """ + log_dir: LatchDir | None = None + """ + Directory to dump Nextflow logs + """ + upload_command_logs: bool = False + """ + Upload .command.* logs to Latch Data after each task execution + """ + about_page_path: Path | None = None + """ + Path to a markdown file containing information about the pipeline - rendered in the About page. + """ + + def validate(self): + if self.about_page_path is not None and not isinstance( + self.about_page_path, Path + ): # type: ignore + click.secho( + f"`about_page_path` parameter ({self.about_page_path}) must be a" + " Path object.", + fg="red", + ) + + @property + def dict(self): + d = super().dict + del d["__metadata__"]["about_page_path"] + return d + + def __post_init__(self): + self.validate() + + if self.name is None: + if self.display_name is None: + click.secho( + "Name or display_name must be provided in metadata", fg="red" + ) + self.name = f"nf_{identifier_suffix_from_str(self.display_name.lower())}" + else: + self.name = identifier_suffix_from_str(self.name) + + global _nextflow_metadata + _nextflow_metadata = self + + +_nextflow_metadata: NextflowMetadata | None = None diff --git a/src/latch/types/metadata/snakemake.py b/src/latch/types/metadata/snakemake.py new file mode 100644 index 000000000..298a3c339 --- /dev/null +++ b/src/latch/types/metadata/snakemake.py @@ -0,0 +1,196 @@ +from __future__ import annotations + +from dataclasses import dataclass, field +from pathlib import Path +from typing import Callable, Generic, Literal, Union + +import click +from typing_extensions import TypeAlias + +from latch_cli.snakemake.config.utils import validate_snakemake_type +from latch_cli.utils import identifier_suffix_from_str + +from ..directory import LatchDir +from ..file import LatchFile +from .latch import LatchMetadata, LatchParameter +from .utils import P, ParameterType + + +@dataclass +class SnakemakeParameter(LatchParameter, Generic[P]): + type: type[P] | None = None + """ + The python type of the parameter. + """ + default: P | None = None + """ + Optional default value for this parameter + """ + + samplesheet_type: Literal["csv", "tsv", None] = None + """ + The type of samplesheet to construct from the input parameter. + + Only used if the provided parameter is a samplesheet (samplesheet=True) + """ + samplesheet_constructor: Callable[[P], Path] | None = None + """ + A custom samplesheet constructor. + + Should return the path of the constructed samplesheet. If samplesheet_type is also specified, this takes precedence. + Only used if the provided parameter is a samplesheet (samplesheet=True) + """ + + def __post_init__(self): + if self.type is None: + click.secho("All SnakemakeParameter objects must specify a type.", fg="red") + raise click.exceptions.Exit(1) + + +@dataclass +class SnakemakeFileParameter(SnakemakeParameter[Union[LatchFile, LatchDir]]): + """Deprecated: use `file_metadata` keyword in `SnakemakeMetadata` instead""" + + type: type[LatchFile | LatchDir] | None = None + """ + The python type of the parameter. + """ + path: Path | None = None + """ + The path where the file passed to this parameter will be copied. + """ + config: bool = False + """ + Whether or not the file path is exposed in the Snakemake config + """ + download: bool = False + """ + Whether or not the file is downloaded in the JIT step + """ + + +@dataclass +class SnakemakeFileMetadata: + path: Path + """ + The local path where the file passed to this parameter will be copied + """ + config: bool = False + """ + If `True`, expose the file in the Snakemake config + """ + download: bool = False + """ + If `True`, download the file in the JIT step + """ + + +@dataclass +class DockerMetadata: + """Class describing credentials for private docker repositories""" + + username: str + """ + The account username for the private repository + """ + secret_name: str + """ + The name of the Latch Secret that contains the password for the private repository + """ + + +@dataclass +class EnvironmentConfig: + """Class describing environment for spawning Snakemake tasks""" + + use_conda: bool = False + """ + Use Snakemake `conda` directive to spawn tasks in conda environments + """ + use_container: bool = False + """ + Use Snakemake `container` directive to spawn tasks in Docker containers + """ + container_args: list[str] = field(default_factory=list) + """ + Additional arguments to use when running Docker containers + """ + + +FileMetadata: TypeAlias = dict[str, Union[SnakemakeFileMetadata, "FileMetadata"]] + + +@dataclass +class SnakemakeMetadata(LatchMetadata): + """Class for organizing Snakemake workflow metadata""" + + output_dir: LatchDir | None = None + """ + Directory for snakemake workflow outputs + """ + name: str | None = None + """ + Name of the workflow + """ + docker_metadata: DockerMetadata | None = None + """ + Credentials configuration for private docker repositories + """ + env_config: EnvironmentConfig = field(default_factory=EnvironmentConfig) + """ + Environment configuration for spawning Snakemake tasks + """ + parameters: dict[str, SnakemakeParameter[ParameterType]] = field(default_factory=dict) + """ + A dictionary mapping parameter names (strings) to `SnakemakeParameter` objects + """ + file_metadata: FileMetadata = field(default_factory=dict) + """ + A dictionary mapping parameter names to `SnakemakeFileMetadata` objects + """ + cores: int = 4 + """ + Number of cores to use for Snakemake tasks (equivalent of Snakemake's `--cores` flag) + """ + about_page_content: Path | None = None + """ + Path to a markdown file containing information about the pipeline - rendered in the About page. + """ + + def validate(self): + if self.about_page_content is not None: + if not isinstance(self.about_page_content, Path): + click.secho( + f"`about_page_content` parameter ({self.about_page_content}) must" + " be a Path object.", + fg="red", + ) + raise click.exceptions.Exit(1) + + for name, param in self.parameters.items(): + if param.default is None: + continue + try: + validate_snakemake_type(name, param.type, param.default) + except ValueError as e: + click.secho(e, fg="red") + raise click.exceptions.Exit(1) from e + + def __post_init__(self): + self.validate() + + if self.name is None: + self.name = f"snakemake_{identifier_suffix_from_str(self.display_name.lower())}" + + global _snakemake_metadata + _snakemake_metadata = self + + @property + def dict(self): + d = super().dict + # ayush: Paths aren't JSON serializable but ribosome doesn't need it anyway so we can just delete it + del d["__metadata__"]["about_page_content"] + return d + + +_snakemake_metadata: SnakemakeMetadata | None = None diff --git a/src/latch/types/metadata/snakemake_v2.py b/src/latch/types/metadata/snakemake_v2.py new file mode 100644 index 000000000..bf3663057 --- /dev/null +++ b/src/latch/types/metadata/snakemake_v2.py @@ -0,0 +1,77 @@ +from __future__ import annotations + +from dataclasses import dataclass, field +from pathlib import Path +from typing import TYPE_CHECKING, Any + +import click + +from latch_cli.utils import identifier_suffix_from_str + +from .latch import LatchMetadata + +if TYPE_CHECKING: + from .snakemake import SnakemakeParameter + + +@dataclass(frozen=True) +class SnakemakeRuntimeResources: + """Resources for Snakemake runtime tasks""" + + cpus: int = 4 + """ + Number of CPUs required for the task + """ + memory: int = 8 + """ + Memory required for the task in GiB + """ + storage_gib: int = 100 + """ + Storage required for the task in GiB + """ + + +@dataclass +class SnakemakeV2Metadata(LatchMetadata): + parameters: dict[str, SnakemakeParameter[Any]] = field(default_factory=dict) + """ + A dictionary mapping parameter names (strings) to `SnakemakeParameter` objects + """ + about_page_path: Path | None = None + """ + Path to a markdown file containing information about the pipeline - rendered in the About page. + """ + runtime_resources: SnakemakeRuntimeResources = field( + default_factory=SnakemakeRuntimeResources + ) + + def validate(self): + if self.about_page_path is not None and not isinstance( + self.about_page_path, Path + ): # type: ignore + click.secho( + f"SnakemakeV2Metadata.about_page_path ({self.about_page_path}) must be a" + " `Path` object.", + fg="red", + ) + raise click.exceptions.Exit(1) + + def __post_init__(self): + self.validate() + + self.name = identifier_suffix_from_str( + f"snakemake_v2_{self.display_name}".lower() + ) + + global _snakemake_v2_metadata + _snakemake_v2_metadata = self + + @property + def dict(self): + d = super().dict + del d["__metadata__"]["about_page_path"] + return d + + +_snakemake_v2_metadata: SnakemakeV2Metadata | None = None diff --git a/src/latch/types/metadata/utils.py b/src/latch/types/metadata/utils.py new file mode 100644 index 000000000..f2bcb1374 --- /dev/null +++ b/src/latch/types/metadata/utils.py @@ -0,0 +1,36 @@ +from __future__ import annotations + +from collections.abc import Collection +from enum import Enum +from typing import TYPE_CHECKING, Annotated, Any, ClassVar, Protocol, TypeVar, Union + +from typing_extensions import TypeAlias + +from ..directory import LatchDir +from ..file import LatchFile + +if TYPE_CHECKING: + from dataclasses import Field + + +# https://stackoverflow.com/questions/54668000/type-hint-for-an-instance-of-a-non-specific-dataclass +class _IsDataclass(Protocol): + __dataclass_fields__: ClassVar[dict[str, Field[Any]]] + + +DC = TypeVar("DC", bound=_IsDataclass) + +ParameterType: TypeAlias = Union[ + None, + int, + float, + str, + bool, + LatchFile, + LatchDir, + Enum, + _IsDataclass, + Collection["ParameterType"], +] + +P = TypeVar("P", bound=ParameterType) diff --git a/src/latch_cli/centromere/ctx.py b/src/latch_cli/centromere/ctx.py index 4e7f9dffe..936bd7ce6 100644 --- a/src/latch_cli/centromere/ctx.py +++ b/src/latch_cli/centromere/ctx.py @@ -14,7 +14,6 @@ from flytekit.core.base_task import PythonTask from flytekit.core.context_manager import FlyteEntities from flytekit.core.workflow import PythonFunctionWorkflow -from latch_sdk_config.latch import config import latch_cli.tinyrequests as tinyrequests from latch.utils import account_id_from_token, current_workspace, retrieve_or_login @@ -26,11 +25,8 @@ ) from latch_cli.constants import docker_image_name_illegal_pat, latch_constants from latch_cli.docker_utils import get_default_dockerfile -from latch_cli.utils import ( - WorkflowType, - generate_temporary_ssh_credentials, - hash_directory, -) +from latch_cli.utils import WorkflowType, generate_temporary_ssh_credentials, hash_directory +from latch_sdk_config.latch import config @dataclass @@ -124,9 +120,7 @@ def __init__( except FileNotFoundError: self.version = "0.1.0" version_file.write_text(f"{self.version}\n") - click.echo( - f"Created a version file with initial version {self.version}." - ) + click.echo(f"Created a version file with initial version {self.version}.") self.version = self.version.strip() @@ -144,8 +138,7 @@ def __init__( pass except Exception as e: click.secho( - "WARN: Exception occurred while getting git hash from" - f" {self.pkg_root}: {e}", + f"WARN: Exception occurred while getting git hash from {self.pkg_root}: {e}", fg="yellow", ) @@ -221,9 +214,7 @@ def __init__( meta_file = load_snakemake_metadata(pkg_root, metadata_root) if meta_file is not None: - click.echo( - f"Using metadata file {click.style(meta_file, italic=True)}" - ) + click.echo(f"Using metadata file {click.style(meta_file, italic=True)}") else: new_meta = pkg_root / "latch_metadata" / "__init__.py" click.echo("Trying to extract metadata from the Snakefile") @@ -248,20 +239,11 @@ def __init__( ) click.secho("\nExample ", fg="red", nl=False) - snakemake_metadata_example = get_snakemake_metadata_example( - pkg_root.name - ) + snakemake_metadata_example = get_snakemake_metadata_example(pkg_root.name) click.secho(f"`{new_meta}`", bold=True, fg="red", nl=False) - click.secho( - f" file:\n```\n{snakemake_metadata_example}```", - fg="red", - ) + click.secho(f" file:\n```\n{snakemake_metadata_example}```", fg="red") if click.confirm( - click.style( - "Generate example metadata file now?", - bold=True, - fg="red", - ), + click.style("Generate example metadata file now?", bold=True, fg="red"), default=True, ): new_meta.write_text(snakemake_metadata_example) @@ -269,22 +251,14 @@ def __init__( import platform system = platform.system() - if system in { - "Windows", - "Linux", - "Darwin", - } and click.confirm( - click.style( - "Open the generated file?", bold=True, fg="red" - ), + if system in {"Windows", "Linux", "Darwin"} and click.confirm( + click.style("Open the generated file?", bold=True, fg="red"), default=True, ): import subprocess if system == "Linux": - res = subprocess.run( - ["xdg-open", new_meta] - ).returncode + res = subprocess.run(["xdg-open", new_meta]).returncode elif system == "Darwin": res = subprocess.run(["open", new_meta]).returncode elif system == "Windows": @@ -303,7 +277,7 @@ def __init__( dedent( """ Make sure a `latch_metadata` exists in the Snakemake - project root or provide a metadata folder with the `--metadata-root` argument.""", + project root or provide a metadata folder with the `--metadata-root` argument.""" ), fg="red", ) @@ -356,9 +330,7 @@ def __init__( sys.exit(1) self.default_container = _Container( - dockerfile=get_default_dockerfile( - self.pkg_root, wf_type=self.workflow_type - ), + dockerfile=get_default_dockerfile(self.pkg_root, wf_type=self.workflow_type), image_name=self.image_tagged, pkg_dir=self.pkg_root, ) @@ -372,9 +344,7 @@ def __init__( ) if use_new_centromere: - self.internal_ip, self.username = ( - self.provision_register_deployment() - ) + self.internal_ip, self.username = self.provision_register_deployment() else: self.internal_ip, self.username = self.get_old_centromere_info() @@ -402,6 +372,7 @@ def _patched_create_paramiko_client(self, base_url): else: self.dkr_client = _construct_dkr_client() + self.remote_conn_info = None except (Exception, KeyboardInterrupt) as e: self.cleanup() raise e @@ -481,11 +452,7 @@ def get_old_centromere_info(self) -> Tuple[str, str]: headers = {"Authorization": f"Bearer {self.token}"} response = tinyrequests.post( - self.latch_provision_url, - headers=headers, - json={ - "public_key": self.public_key, - }, + self.latch_provision_url, headers=headers, json={"public_key": self.public_key} ) resp = response.json() @@ -542,20 +509,14 @@ def nucleus_get_image(self, task_name: str, version: Optional[str] = None) -> st headers = {"Authorization": f"Bearer {self.token}"} response = tinyrequests.post( - self.latch_get_image_url, - headers=headers, - json={ - "task_name": task_name, - }, + self.latch_get_image_url, headers=headers, json={"task_name": task_name} ) resp = response.json() try: return resp["image_name"] except KeyError as e: - raise ValueError( - f"Malformed response from request for image url {resp}" - ) from e + raise ValueError(f"Malformed response from request for image url {resp}") from e def nucleus_check_version(self, version: str, workflow_name: str) -> bool: """Check if version has already been registered for given workflow""" @@ -569,20 +530,14 @@ def nucleus_check_version(self, version: str, workflow_name: str) -> bool: response = tinyrequests.post( self.latch_check_version_url, headers=headers, - json={ - "version": version, - "workflow_name": workflow_name, - "ws_account_id": ws_id, - }, + json={"version": version, "workflow_name": workflow_name, "ws_account_id": ws_id}, ) resp = response.json() try: return resp["exists"] except KeyError as e: - raise ValueError( - f"Malformed response from request for version check {resp}" - ) from e + raise ValueError(f"Malformed response from request for version check {resp}") from e def __enter__(self): return self diff --git a/src/latch_cli/main.py b/src/latch_cli/main.py index e573679b5..4538f5a4a 100644 --- a/src/latch_cli/main.py +++ b/src/latch_cli/main.py @@ -60,12 +60,7 @@ def decorated(*args: P.args, **kwargs: P.kwargs): return decorated -@click.group( - "latch", - context_settings={ - "max_content_width": 160, - }, -) +@click.group("latch", context_settings={"max_content_width": 160}) @click.version_option(package_name="latch") def main(): """ @@ -96,10 +91,7 @@ def main(): @main.command("login") @click.option( - "--connection", - type=str, - default=None, - help="Specific AuthO connection name e.g. for SSO.", + "--connection", type=str, default=None, help="Specific AuthO connection name e.g. for SSO." ) def login(connection: Optional[str]): """Manually login to Latch.""" @@ -136,10 +128,7 @@ def workspace(): @click.option( "--template", "-t", - type=click.Choice( - list(template_flag_to_option.keys()), - case_sensitive=False, - ), + type=click.Choice(list(template_flag_to_option.keys()), case_sensitive=False), ) @click.option( "--dockerfile", @@ -152,10 +141,7 @@ def workspace(): "--base-image", "-b", help="Which base image to use for the Dockerfile.", - type=click.Choice( - list(BaseImageOptions._member_names_), - case_sensitive=False, - ), + type=click.Choice(list(BaseImageOptions._member_names_), case_sensitive=False), default="default", ) def init( @@ -314,13 +300,9 @@ def generate_metadata( config_file = Path("nextflow_schema.json") generate_metadata( - config_file, - metadata_root, - skip_confirmation=yes, - generate_defaults=not no_defaults, + config_file, metadata_root, skip_confirmation=yes, generate_defaults=not no_defaults ) else: - from latch_cli.snakemake.config.parser import generate_metadata if config_file is None: @@ -345,19 +327,9 @@ def generate_metadata( @main.command("develop") @click.argument("pkg_root", nargs=1, type=click.Path(exists=True, path_type=Path)) @click.option( - "--yes", - "-y", - is_flag=True, - default=False, - type=bool, - help="Skip the confirmation dialog.", -) -@click.option( - "--image", - "-i", - type=str, - help="Image to use for develop session.", + "--yes", "-y", is_flag=True, default=False, type=bool, help="Skip the confirmation dialog." ) +@click.option("--image", "-i", type=str, help="Image to use for develop session.") @click.option( "--wf-version", "-v", @@ -407,23 +379,16 @@ def local_development( from latch_cli.services.local_dev import local_development local_development( - pkg_root.resolve(), - skip_confirm_dialog=yes, - size=TaskSize.small_task, - image=image, + pkg_root.resolve(), skip_confirm_dialog=yes, size=TaskSize.small_task, image=image ) else: from latch_cli.services.local_dev_old import local_development - local_development( - pkg_root.resolve(), snakemake, wf_version, metadata_root, disable_sync - ) + local_development(pkg_root.resolve(), snakemake, wf_version, metadata_root, disable_sync) @main.command("exec") -@click.option( - "--execution-id", "-e", type=str, help="Optional execution ID to inspect." -) +@click.option("--execution-id", "-e", type=str, help="Optional execution ID to inspect.") @click.option("--egn-id", "-g", type=str, help="Optional task execution ID to inspect.") @click.option( "--container-index", @@ -432,9 +397,7 @@ def local_development( help="Optional container index to inspect (only used for Map Tasks)", ) @requires_login -def execute( - execution_id: Optional[str], egn_id: Optional[str], container_index: Optional[int] -): +def execute(execution_id: Optional[str], egn_id: Optional[str], container_index: Optional[int]): """Drops the user into an interactive shell from within a task.""" from latch_cli.services.k8s.execute import exec @@ -451,8 +414,7 @@ def execute( default=False, type=bool, help=( - "Whether to automatically bump the version of the workflow each time register" - " is called." + "Whether to automatically bump the version of the workflow each time register is called." ), ) @click.option( @@ -473,12 +435,7 @@ def execute( ), ) @click.option( - "-y", - "--yes", - is_flag=True, - default=False, - type=bool, - help="Skip the confirmation dialog.", + "-y", "--yes", is_flag=True, default=False, type=bool, help="Skip the confirmation dialog." ) @click.option( "--open", @@ -505,10 +462,7 @@ def execute( is_flag=True, default=False, type=bool, - help=( - "Whether or not to cache snakemake tasks. Ignored if --snakefile is not" - " provided." - ), + help=("Whether or not to cache snakemake tasks. Ignored if --snakefile is not provided."), ) @click.option( "--nf-script", @@ -576,9 +530,7 @@ def register( @main.command("launch") @click.argument("params_file", nargs=1, type=click.Path(exists=True)) @click.option( - "--version", - default=None, - help="The version of the workflow to launch. Defaults to latest.", + "--version", default=None, help="The version of the workflow to launch. Defaults to latest." ) @requires_login def launch(params_file: Path, version: Union[str, None] = None): @@ -594,18 +546,13 @@ def launch(params_file: Path, version: Union[str, None] = None): version = "latest" click.secho( - f"Successfully launched workflow named {wf_name} with version {version}.", - fg="green", + f"Successfully launched workflow named {wf_name} with version {version}.", fg="green" ) @main.command("get-params") @click.argument("wf_name", nargs=1) -@click.option( - "--version", - default=None, - help="The version of the workflow. Defaults to latest.", -) +@click.option("--version", default=None, help="The version of the workflow. Defaults to latest.") @requires_login def get_params(wf_name: Union[str, None], version: Union[str, None] = None): """Generate a python parameter map for a workflow.""" @@ -626,9 +573,7 @@ def get_params(wf_name: Union[str, None], version: Union[str, None] = None): @main.command("get-wf") @click.option( - "--name", - default=None, - help="The name of the workflow to list. Will display all versions", + "--name", default=None, help="The name of the workflow to list. Will display all versions" ) @requires_login def get_wf(name: Union[str, None] = None): @@ -648,9 +593,7 @@ def get_wf(name: Union[str, None] = None): version_padding = max(version_padding, version_len) # TODO(ayush): make this much better - click.secho( - f"ID{id_padding * ' '}\tName{name_padding * ' '}\tVersion{version_padding * ' '}" - ) + click.secho(f"ID{id_padding * ' '}\tName{name_padding * ' '}\tVersion{version_padding * ' '}") for wf in wfs: click.secho( f"{wf[0]}{(id_padding - len(str(wf[0]))) * ' '}\t{wf[1]}{(name_padding - len(wf[1])) * ' '}\t{wf[2]}{(version_padding - len(wf[2])) * ' '}" @@ -713,15 +656,9 @@ def get_executions(): default=False, show_default=True, ) +@click.option("--cores", help="Manually specify number of cores to parallelize over", type=int) @click.option( - "--cores", - help="Manually specify number of cores to parallelize over", - type=int, -) -@click.option( - "--chunk-size-mib", - help="Manually specify the upload chunk size in MiB. Must be >= 5", - type=int, + "--chunk-size-mib", help="Manually specify the upload chunk size in MiB. Must be >= 5", type=int ) @requires_login def cp( @@ -805,10 +742,7 @@ def ls(paths: Tuple[str], group_directories_first: bool): if len(paths) > 1: click.echo(f"{path}:") - ls( - path, - group_directories_first=group_directories_first, - ) + ls(path, group_directories_first=group_directories_first) if len(paths) > 1: click.echo("") @@ -817,12 +751,7 @@ def ls(paths: Tuple[str], group_directories_first: bool): @main.command("rmr") @click.argument("remote_path", nargs=1, type=str) @click.option( - "-y", - "--yes", - is_flag=True, - default=False, - type=bool, - help="Skip the confirmation dialog.", + "-y", "--yes", is_flag=True, default=False, type=bool, help="Skip the confirmation dialog." ) @click.option( "--no-glob", @@ -868,31 +797,18 @@ def mkdir(remote_directory: str): @click.argument("srcs", nargs=-1) @click.argument("dst", nargs=1) @click.option( - "--delete", - help="Delete extraneous files from destination.", - is_flag=True, - default=False, + "--delete", help="Delete extraneous files from destination.", is_flag=True, default=False ) @click.option( "--ignore-unsyncable", - help=( - "Synchronize even if some source paths do not exist or refer to special files." - ), + help=("Synchronize even if some source paths do not exist or refer to special files."), is_flag=True, default=False, ) -@click.option( - "--cores", - help="Number of cores to use for parallel syncing.", - type=int, -) +@click.option("--cores", help="Number of cores to use for parallel syncing.", type=int) @requires_login def sync( - srcs: List[str], - dst: str, - delete: bool, - ignore_unsyncable: bool, - cores: Optional[int] = None, + srcs: List[str], dst: str, delete: bool, ignore_unsyncable: bool, cores: Optional[int] = None ): """ Update the contents of a remote directory with local data. @@ -901,13 +817,7 @@ def sync( # todo(maximsmol): remote -> local # todo(maximsmol): remote -> remote - sync( - srcs, - dst, - delete=delete, - ignore_unsyncable=ignore_unsyncable, - cores=cores, - ) + sync(srcs, dst, delete=delete, ignore_unsyncable=ignore_unsyncable, cores=cores) """ @@ -918,7 +828,6 @@ def sync( @main.group() def nextflow(): """Manage nextflow""" - pass @nextflow.command("version") @@ -951,11 +860,8 @@ def version(pkg_root: Path): default=None, help="Set execution profile for Nextflow workflow", ) -def generate_entrypoint( - pkg_root: Path, - metadata_root: Optional[Path], - nf_script: Path, - execution_profile: Optional[str], +def nf_generate_entrypoint( + pkg_root: Path, metadata_root: Optional[Path], nf_script: Path, execution_profile: Optional[str] ): """Generate a `wf/entrypoint.py` file from a Nextflow workflow""" @@ -982,27 +888,19 @@ def generate_entrypoint( if metadata._nextflow_metadata is None: click.secho( dedent(f"""\ - Failed to generate Nextflow entrypoint. - Make sure the project root contains a `{meta}` - with a `NextflowMetadata` object defined. + Failed to generate Nextflow entrypoint. Make sure the project root contains a `{meta}` with a `NextflowMetadata` object defined. """), fg="red", ) raise click.exceptions.Exit(1) generate_nextflow_workflow( - pkg_root, - metadata_root, - nf_script, - dest, - execution_profile=execution_profile, + pkg_root, metadata_root, nf_script, dest, execution_profile=execution_profile ) @nextflow.command("attach") -@click.option( - "--execution-id", "-e", type=str, help="Optional execution ID to inspect." -) +@click.option("--execution-id", "-e", type=str, help="Optional execution ID to inspect.") @requires_login def attach(execution_id: Optional[str]): """Drops the user into an interactive shell to inspect the workdir of a nextflow execution.""" @@ -1012,6 +910,83 @@ def attach(execution_id: Optional[str]): attach(execution_id) +@main.group() +def snakemake(): + """Manage snakemake-specific commands""" + + +@snakemake.command("generate-entrypoint") +@click.argument("pkg-root", nargs=1, type=click.Path(exists=True, path_type=Path)) +@click.option( + "--metadata-root", + type=click.Path(exists=True, path_type=Path, file_okay=False), + help="Path to a directory containing a python package defining a SnakemakeV2Metadata " + "object. If not provided, will default to searching the package root for a directory called " + "`latch_metadata`.", +) +@click.option( + "--snakefile", + required=False, + type=click.Path(exists=True, path_type=Path, dir_okay=False), + help="Path to the Snakefile to register. If not provided, will default to searching the package " + "root for a file named `Snakefile`.", +) +def sm_generate_entrypoint( + pkg_root: Path, metadata_root: Optional[Path], snakefile: Optional[Path] +): + """Generate a `wf/entrypoint.py` file from a Snakemake workflow""" + + from latch_cli.services.register.utils import import_module_by_path + from latch_cli.snakemake.v2.workflow import get_entrypoint_content + + dest = pkg_root / "wf" / "entrypoint.py" + dest.parent.mkdir(exist_ok=True) + + if dest.exists() and not click.confirm( + f"Workflow entrypoint already exists at `{dest}`. Overwrite?" + ): + return + + if metadata_root is None: + metadata_root = pkg_root / "latch_metadata" + + metadata_path = metadata_root / "__init__.py" + if metadata_path.exists(): + click.echo(f"Using metadata file {click.style(metadata_path, italic=True)}") + import_module_by_path(metadata_path) + else: + click.secho( + f"Unable to find file `{metadata_path}` with a `SnakemakeV2Metadata` object " + "defined. If you have a custom metadata root please provide a path " + "to it using the `--metadata-root` option", + fg="red", + ) + raise click.exceptions.Exit(1) + + import latch.types.metadata.snakemake_v2 as metadata + + if metadata._snakemake_v2_metadata is None: + click.secho( + "Failed to generate entrypoint. Make sure the python package at path " + f"`{metadata_path}` defines a `SnakemakeV2Metadata` object.", + fg="red", + ) + raise click.exceptions.Exit(1) + + if snakefile is None: + snakefile = pkg_root / "Snakefile" + + if not snakefile.exists(): + click.secho( + f"Unable to find a Snakefile at `{snakefile}`. If your Snakefile is " + "in a different location please provide an explicit path to it " + "using the `--snakefile` option." + ) + raise click.exceptions.Exit(1) + + dest.write_text(get_entrypoint_content(pkg_root, metadata_path, snakefile)) + + """ POD COMMANDS """ @@ -1046,8 +1021,7 @@ def stop_pod(pod_id: Optional[int] = None): err_str = f"Error reading Pod ID from `{id_path}`" click.secho( - f"{err_str} -- please provide a Pod ID as a command line argument.", - fg="red", + f"{err_str} -- please provide a Pod ID as a command line argument.", fg="red" ) return diff --git a/src/latch_cli/snakemake/config/utils.py b/src/latch_cli/snakemake/config/utils.py index ca13c1ef9..d347908a1 100644 --- a/src/latch_cli/snakemake/config/utils.py +++ b/src/latch_cli/snakemake/config/utils.py @@ -1,11 +1,24 @@ +from __future__ import annotations + from dataclasses import fields, is_dataclass, make_dataclass from enum import Enum -from typing import Any, Dict, List, Optional, Type, Union, get_args, get_origin +from types import UnionType +from typing import ( + Annotated, + Any, + Dict, + List, + Optional, + Type, + Union, + get_args, + get_origin, +) from flytekit.core.annotation import FlyteAnnotation -from typing_extensions import Annotated, TypeAlias, TypeGuard +from typing_extensions import TypeAlias, TypeGuard -from latch.types.directory import LatchDir +from latch.types.directory import LatchDir, LatchOutputDir from latch.types.file import LatchFile from latch_cli.utils import identifier_from_str @@ -107,19 +120,12 @@ def parse_type( return type(v) if isinstance(v, list): - parsed_types = tuple( - parse_type( - x, - name, - infer_files=infer_files, - ) - for x in v - ) + parsed_types = tuple(parse_type(x, name, infer_files=infer_files) for x in v) if len(set(parsed_types)) != 1: raise ValueError( "Generic Lists are not supported - please" - f" ensure that all elements in {name} are of the same type", + f" ensure that all elements in {name} are of the same type" ) typ = parsed_types[0] if typ in {LatchFile, LatchDir}: @@ -134,9 +140,7 @@ def parse_type( fields: Dict[str, Type] = {} for k, x in v.items(): fields[identifier_from_str(k)] = parse_type( - x, - f"{name}_{k}", - infer_files=infer_files, + x, f"{name}_{k}", infer_files=infer_files ) return make_dataclass(identifier_from_str(name), fields.items()) @@ -201,7 +205,10 @@ def is_list_type(typ: Type) -> TypeGuard[Type[List]]: return get_origin(typ) is list -def type_repr(t: Type, *, add_namespace: bool = False) -> str: +def type_repr(t: type[Any] | str, *, add_namespace: bool = False) -> str: + if isinstance(t, str): + return type_repr(eval(t), add_namespace=add_namespace) + if is_primitive_type(t) or t in {LatchFile, LatchDir}: return t.__name__ @@ -215,9 +222,16 @@ def type_repr(t: Type, *, add_namespace: bool = False) -> str: return "typing.List" - if get_origin(t) is Union: + if get_origin(t) is dict: args = get_args(t) + if len(args) != 2: + return "typing.Dict" + s = ", ".join([type_repr(x, add_namespace=add_namespace) for x in args]) + return f"typing.Dict[{s}]" + + if get_origin(t) is Union: + args = get_args(t) if len(args) != 2 or args[1] is not type(None): raise ValueError("Union types other than Optional are not yet supported") @@ -227,6 +241,9 @@ def type_repr(t: Type, *, add_namespace: bool = False) -> str: args = get_args(t) assert len(args) > 1 if isinstance(args[1], FlyteAnnotation): + if "output" in args[1].data: + return "LatchOutputDir" + return ( f"typing_extensions.Annotated[{type_repr(args[0], add_namespace=add_namespace)}," f" FlyteAnnotation({repr(args[1].data)})]" @@ -236,7 +253,7 @@ def type_repr(t: Type, *, add_namespace: bool = False) -> str: return t.__name__ -def dataclass_repr(typ: Type) -> str: +def dataclass_repr(typ: type[Any]) -> str: assert is_dataclass(typ) lines = ["@dataclass", f"class {typ.__name__}:"] @@ -256,24 +273,45 @@ def enum_repr(typ: Type) -> str: return "\n".join(lines) + "\n\n\n" -def get_preamble(typ: Type) -> str: +def get_preamble(typ: type[Any] | str, *, defined_names: set[str] | None = None) -> str: + # ayush: some dataclass fields have strings as their types so attempt to eval them here + if isinstance(typ, str): + try: + typ = eval(typ) + except Exception: + return "" + + assert not isinstance(typ, str) + + if defined_names is None: + defined_names = set() + if get_origin(typ) is Annotated: args = get_args(typ) assert len(args) > 0 - return get_preamble(args[0]) + return get_preamble(args[0], defined_names=defined_names) if is_primitive_type(typ) or typ in {LatchFile, LatchDir}: return "" - if get_origin(typ) in {Union, list}: - return "".join([get_preamble(t) for t in get_args(typ)]) + if get_origin(typ) in {Union, UnionType, list, dict}: + return "".join([ + get_preamble(t, defined_names=defined_names) for t in get_args(typ) + ]) + + if typ.__name__ in defined_names: + return "" + + defined_names.add(typ.__name__) if issubclass(typ, Enum): return enum_repr(typ) assert is_dataclass(typ), typ - preamble = "".join([get_preamble(f.type) for f in fields(typ)]) + preamble = "".join([ + get_preamble(f.type, defined_names=defined_names) for f in fields(typ) + ]) return "".join([preamble, dataclass_repr(typ)]) @@ -313,7 +351,7 @@ def validate_snakemake_type(name: str, t: Type, param: Any) -> None: if len(args) == 0: raise ValueError( "Generic Lists are not supported - please specify a subtype," - " e.g. List[LatchFile]", + " e.g. List[LatchFile]" ) list_typ = args[0] for i, val in enumerate(param): @@ -325,3 +363,5 @@ def validate_snakemake_type(name: str, t: Type, param: Any) -> None: validate_snakemake_type( f"{name}.{field.name}", field.type, getattr(param, field.name) ) + for i, val in enumerate(param): + validate_snakemake_type(f"{name}[{i}]", list_typ, val) diff --git a/src/latch_cli/snakemake/v2/__init__.py b/src/latch_cli/snakemake/v2/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/latch_cli/snakemake/v2/utils.py b/src/latch_cli/snakemake/v2/utils.py new file mode 100644 index 000000000..b89d68d32 --- /dev/null +++ b/src/latch_cli/snakemake/v2/utils.py @@ -0,0 +1,26 @@ +from dataclasses import fields, is_dataclass +from enum import Enum +from typing import Any + +from latch.types.directory import LatchDir +from latch.types.file import LatchFile + + +def get_config_val(val: Any): + if isinstance(val, list): + return [get_config_val(x) for x in val] + if isinstance(val, dict): + return {k: get_config_val(v) for k, v in val.items()} + if isinstance(val, (LatchFile, LatchDir)): + if val.remote_path is not None: + return val.remote_path + + return str(val.path) + if isinstance(val, (int, float, bool, type(None))): + return val + if is_dataclass(val): + return {f.name: get_config_val(getattr(val, f.name)) for f in fields(val)} + if isinstance(val, Enum): + return val.value + + return str(val) diff --git a/src/latch_cli/snakemake/v2/workflow.py b/src/latch_cli/snakemake/v2/workflow.py new file mode 100644 index 000000000..32102242d --- /dev/null +++ b/src/latch_cli/snakemake/v2/workflow.py @@ -0,0 +1,161 @@ +from pathlib import Path + +import latch.types.metadata.snakemake_v2 as snakemake +from latch_cli.snakemake.config.utils import get_preamble, type_repr + +_template = """\ +import json +import os +import shutil +import subprocess +import sys +import typing +import typing_extensions +from dataclasses import dataclass +from enum import Enum +from pathlib import Path + +import requests + +from latch.resources.tasks import custom_task, snakemake_runtime_task +from latch.resources.workflow import workflow +from latch.types.directory import LatchDir, LatchOutputDir +from latch.types.file import LatchFile +from latch_cli.snakemake.v2.utils import get_config_val +from latch_cli.services.register.utils import import_module_by_path + +import_module_by_path(Path({metadata_path})) + +import latch.types.metadata.snakemake_v2 as smv2 + + +{preambles} +@custom_task(cpu=0.25, memory=0.5, storage_gib=1) +def initialize() -> str: + token = os.environ.get("FLYTE_INTERNAL_EXECUTION_ID") + if token is None: + raise RuntimeError("failed to get execution token") + + headers = {{"Authorization": f"Latch-Execution-Token {{token}}"}} + + print("Provisioning shared storage volume... ", end="") + resp = requests.post( + "http://nf-dispatcher-service.flyte.svc.cluster.local/provision-storage-ofs", + headers=headers, + json={{ + "storage_expiration_hours": 0, + "version": 2, + "snakemake": True, + }}, + ) + resp.raise_for_status() + print("Done.") + + return resp.json()["name"] + +@snakemake_runtime_task(cpu=1, memory=2, storage_gib=50) +def snakemake_runtime(pvc_name: str, {parameters}): + print(f"Using shared filesystem: {{pvc_name}}") + + shared = Path("/snakemake-workdir") + snakefile = shared / {snakefile_path} + + config = {{{config_builders}}} + + config_path = (shared / "__latch.config.json").resolve() + config_path.write_text(json.dumps(config, indent=2)) + + ignore_list = [ + "latch", + ".latch", + ".git", + "nextflow", + ".nextflow", + ".snakemake", + "results", + "miniconda", + "anaconda3", + "mambaforge", + ] + + shutil.copytree( + Path("/root"), + shared, + ignore=lambda src, names: ignore_list, + ignore_dangling_symlinks=True, + dirs_exist_ok=True, + ) + + cmd = [ + "snakemake", + "--snakefile", + str(snakefile), + "--configfile", + str(config_path), + "--executor", + "latch", + "--jobs", + "1000", + ] + + print("Launching Snakemake Runtime") + print(" ".join(cmd), flush=True) + + failed = False + try: + subprocess.run(cmd, cwd=shared, check=True) + except subprocess.CalledProcessError: + failed = True + finally: + if not failed: + return + + sys.exit(1) + + +@workflow(smv2._snakemake_v2_metadata) +def {workflow_name}({parameters}): + \"\"\" + Sample Description + \"\"\" + + snakemake_runtime(pvc_name=initialize(), {assignments}) +""" + + +def get_entrypoint_content(pkg_root: Path, metadata_path: Path, snakefile_path: Path) -> str: + metadata = snakemake._snakemake_v2_metadata + assert metadata is not None + + defined_names: set[str] = set() + preambles: list[str] = [] + + defaults: list[str] = [] + no_defaults: list[str] = [] + config_builders: list[str] = [] + assignments: list[str] = [] + + for name, param in metadata.parameters.items(): + assert param.type is not None + + param_str = f"{name}: {type_repr(param.type)}" + if param.default is None: + no_defaults.append(param_str) + else: + param_str = f"{param_str} = {param.default!r}" + defaults.append(param_str) + + config_builders.append(f"{name!r}: get_config_val({name})") + assignments.append(f"{name}={name}") + + preambles.append(get_preamble(param.type, defined_names=defined_names)) + + return _template.format( + metadata_path=repr(str(metadata_path.relative_to(pkg_root))), + preambles="".join(preambles), + parameters=", ".join(no_defaults + defaults), + snakefile_path=repr(str(snakefile_path.relative_to(pkg_root))), + config_builders=", ".join(config_builders), + workflow_name=metadata.name, + assignments=", ".join(assignments), + ) diff --git a/src/latch_cli/utils/stateful_writer.py b/src/latch_cli/utils/stateful_writer.py new file mode 100644 index 000000000..13e7ea0bb --- /dev/null +++ b/src/latch_cli/utils/stateful_writer.py @@ -0,0 +1,29 @@ +from contextlib import contextmanager + + +class StatefulWriter: + def __init__(self, indent: int = 4): + self._indent = " " * indent + + self._buf = [] + self._cur = "" + + @contextmanager + def indent(self): + self._cur += self._indent + yield + self._cur = self._cur.removesuffix(self._indent) + + def clear(self): + self._buf = [] + self._cur = "" + + def write(self, s: str, *, nl: bool = True): + self._buf.append(self._indent) + self._buf.append(s) + + if nl: + self._buf.append("\n") + + def get(self): + return "".join(self._buf) diff --git a/uv.lock b/uv.lock index bd7e4402f..81c480c01 100644 --- a/uv.lock +++ b/uv.lock @@ -52,6 +52,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e4/f5/f2b75d2fc6f1a260f340f0e7c6a060f4dd2961cc16884ed851b0d18da06a/anyio-4.6.2.post1-py3-none-any.whl", hash = "sha256:6d170c36fba3bdd840c73d3868c1e777e33676a69c3a72cf0a0d5d6d8009b61d", size = 90377 }, ] +[[package]] +name = "appdirs" +version = "1.4.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d7/d8/05696357e0311f5b5c316d7b95f46c669dd9c15aaeecbb48c7d0aeb88c40/appdirs-1.4.4.tar.gz", hash = "sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41", size = 13470 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3b/00/2344469e2084fb287c2e0b57b72910309874c3245463acd6cf5e3db69324/appdirs-1.4.4-py2.py3-none-any.whl", hash = "sha256:a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128", size = 9566 }, +] + [[package]] name = "apscheduler" version = "3.10.4" @@ -424,6 +433,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335 }, ] +[[package]] +name = "configargparse" +version = "1.7" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/70/8a/73f1008adfad01cb923255b924b1528727b8270e67cb4ef41eabdc7d783e/ConfigArgParse-1.7.tar.gz", hash = "sha256:e7067471884de5478c58a511e529f0f9bd1c66bfef1dea90935438d6c23306d1", size = 43817 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6f/b3/b4ac838711fd74a2b4e6f746703cf9dd2cf5462d17dac07e349234e21b97/ConfigArgParse-1.7-py3-none-any.whl", hash = "sha256:d249da6591465c6c26df64a9f73d2536e743be2f244eb3ebe61114af2f94f86b", size = 25489 }, +] + +[[package]] +name = "connection-pool" +version = "0.0.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/bd/df/c9b4e25dce00f6349fd28aadba7b6c3f7431cc8bd4308a158fbe57b6a22e/connection_pool-0.0.3.tar.gz", hash = "sha256:bf429e7aef65921c69b4ed48f3d48d3eac1383b05d2df91884705842d974d0dc", size = 3795 } + [[package]] name = "cookiecutter" version = "2.6.0" @@ -507,6 +531,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/07/1f/3d9ae865addc9ef6cb7b102d7d93e227c46b6e5e94db345cae2a30944efa/dataclasses_json-0.5.6-py3-none-any.whl", hash = "sha256:1d7f3a284a49d350ddbabde0e7d0c5ffa34a144aaf1bcb5b9f2c87673ff0c76e", size = 25539 }, ] +[[package]] +name = "datrie" +version = "0.8.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/9d/fe/db74bd405d515f06657f11ad529878fd389576dca4812bea6f98d9b31574/datrie-0.8.2.tar.gz", hash = "sha256:525b08f638d5cf6115df6ccd818e5a01298cd230b2dac91c8ff2e6499d18765d", size = 63278 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/44/02/53f0cf0bf0cd629ba6c2cc13f2f9db24323459e9c19463783d890a540a96/datrie-0.8.2-pp273-pypy_73-win32.whl", hash = "sha256:b07bd5fdfc3399a6dab86d6e35c72b1dbd598e80c97509c7c7518ab8774d3fda", size = 91292 }, +] + [[package]] name = "decorator" version = "5.1.1" @@ -581,6 +614,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8f/d7/9322c609343d929e75e7e5e6255e614fcc67572cfd083959cdef3b7aad79/docutils-0.21.2-py3-none-any.whl", hash = "sha256:dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2", size = 587408 }, ] +[[package]] +name = "dpath" +version = "2.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b5/ce/e1fd64d36e4a5717bd5e6b2ad188f5eaa2e902fde871ea73a79875793fc9/dpath-2.2.0.tar.gz", hash = "sha256:34f7e630dc55ea3f219e555726f5da4b4b25f2200319c8e6902c394258dd6a3e", size = 28266 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/05/d1/8952806fbf9583004ab479d8f58a9496c3d35f6b6009ddd458bdd9978eaf/dpath-2.2.0-py3-none-any.whl", hash = "sha256:b330a375ded0a0d2ed404440f6c6a715deae5313af40bbb01c8a41d891900576", size = 17618 }, +] + [[package]] name = "durationpy" version = "0.9" @@ -599,6 +641,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/02/cc/b7e31358aac6ed1ef2bb790a9746ac2c69bcb3c8588b41616914eb106eaf/exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b", size = 16453 }, ] +[[package]] +name = "fastjsonschema" +version = "2.20.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/03/3f/3ad5e7be13b4b8b55f4477141885ab2364f65d5f6ad5f7a9daffd634d066/fastjsonschema-2.20.0.tar.gz", hash = "sha256:3d48fc5300ee96f5d116f10fe6f28d938e6008f59a6a025c2649475b87f76a23", size = 373056 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6d/ca/086311cdfc017ec964b2436fe0c98c1f4efcb7e4c328956a22456e497655/fastjsonschema-2.20.0-py3-none-any.whl", hash = "sha256:5875f0b0fa7a0043a91e93a9b8f793bcbbba9691e7fd83dca95c28ba26d21f0a", size = 23543 }, +] + [[package]] name = "furo" version = "2024.8.6" @@ -754,6 +805,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/14/32/754cd4474790239c7436a7a9490bc0c4a0a2ed604cb9a940151a3b1055b9/grpcio_status-1.48.2-py3-none-any.whl", hash = "sha256:2c33bbdbe20188b2953f46f31af669263b6ee2a9b2d38fa0d36ee091532e21bf", size = 14441 }, ] +[[package]] +name = "humanfriendly" +version = "10.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pyreadline3", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/cc/3f/2c29224acb2e2df4d2046e4c73ee2662023c58ff5b113c4c1adac0886c43/humanfriendly-10.0.tar.gz", hash = "sha256:6b0b831ce8f15f7300721aa49829fc4e83921a9a301cc7f606be6686a2288ddc", size = 360702 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f0/0f/310fb31e39e2d734ccaa2c0fb981ee41f7bd5056ce9bc29b2248bd569169/humanfriendly-10.0-py2.py3-none-any.whl", hash = "sha256:1697e1a8a8f550fd43c2865cd84542fc175a61dcb779b6fee18cf6b6ccba1477", size = 86794 }, +] + [[package]] name = "idna" version = "3.10" @@ -886,6 +949,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d1/0f/8910b19ac0670a0f80ce1008e5e751c4a57e14d2c4c13a482aa6079fa9d6/jsonschema_specifications-2024.10.1-py3-none-any.whl", hash = "sha256:a09a0680616357d9a0ecf05c12ad234479f549239d0f5b55f3deea67475da9bf", size = 18459 }, ] +[[package]] +name = "jupyter-core" +version = "5.7.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "platformdirs" }, + { name = "pywin32", marker = "platform_python_implementation != 'PyPy' and sys_platform == 'win32'" }, + { name = "traitlets" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/00/11/b56381fa6c3f4cc5d2cf54a7dbf98ad9aa0b339ef7a601d6053538b079a7/jupyter_core-5.7.2.tar.gz", hash = "sha256:aa5f8d32bbf6b431ac830496da7392035d6f61b4f54872f15c4bd2a9c3f536d9", size = 87629 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c9/fb/108ecd1fe961941959ad0ee4e12ee7b8b1477247f30b1fdfd83ceaf017f0/jupyter_core-5.7.2-py3-none-any.whl", hash = "sha256:4f7315d2f6b4bcf2e3e7cb6e46772eba760ae459cd1f59d29eb57b0a01bd7409", size = 28965 }, +] + [[package]] name = "keyring" version = "25.5.0" @@ -928,7 +1005,7 @@ wheels = [ [[package]] name = "latch" -version = "2.54.0" +version = "2.54.0a4" source = { editable = "." } dependencies = [ { name = "aioconsole" }, @@ -941,8 +1018,6 @@ dependencies = [ { name = "gql" }, { name = "graphql-core" }, { name = "kubernetes" }, - { name = "latch-sdk-config" }, - { name = "latch-sdk-gql" }, { name = "lytekit" }, { name = "lytekitplugins-pods" }, { name = "paramiko" }, @@ -962,6 +1037,10 @@ dependencies = [ pandas = [ { name = "pandas" }, ] +snakemake = [ + { name = "pulp" }, + { name = "snakemake" }, +] [package.dev-dependencies] dev = [ @@ -989,20 +1068,20 @@ requires-dist = [ { name = "gql", specifier = "==3.4.0" }, { name = "graphql-core", specifier = "==3.2.3" }, { name = "kubernetes", specifier = ">=24.2.0" }, - { name = "latch-sdk-config", specifier = "==0.0.4" }, - { name = "latch-sdk-gql", specifier = "==0.0.6" }, - { name = "lytekit", specifier = "==0.15.13" }, + { name = "lytekit", specifier = "==0.15.14" }, { name = "lytekitplugins-pods", specifier = "==0.6.2" }, { name = "pandas", marker = "extra == 'pandas'", specifier = ">=2.0.0" }, { name = "paramiko", specifier = ">=3.4.0" }, + { name = "pulp", marker = "extra == 'snakemake'", specifier = ">=2.0,<2.8" }, { name = "pyjwt", specifier = ">=0.2.0" }, { name = "python-dateutil", specifier = ">=2.8" }, { name = "requests", specifier = ">=2.28.1" }, { name = "requests-toolbelt", specifier = "==0.10.1" }, { name = "scp", specifier = ">=0.14.0" }, { name = "setuptools", specifier = ">=75.3.0" }, + { name = "snakemake", marker = "extra == 'snakemake'", specifier = ">=7.18.0,<7.30.2" }, { name = "tqdm", specifier = ">=4.63.0" }, - { name = "typing-extensions", specifier = "==4.7.1" }, + { name = "typing-extensions", specifier = ">=4.12.0" }, { name = "watchfiles", specifier = "==0.19.0" }, { name = "websockets", specifier = "==11.0.3" }, ] @@ -1022,27 +1101,15 @@ docs = [ ] [[package]] -name = "latch-sdk-config" -version = "0.0.4" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e7/ae/3597280830ca310569554daeee252cd597894407986529fb44f00fb46646/latch-sdk-config-0.0.4.tar.gz", hash = "sha256:72bfec47364abce7f6096d794fe3ebed29c4cb1aafb60adc4438fca15ddea15d", size = 3758 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/13/e4/72725cedb6770af45bfae52804b37b4b1db6aed687137a7f3a1de5427514/latch_sdk_config-0.0.4-py3-none-any.whl", hash = "sha256:3f4d9dd7f5d7cda65ab96557a0792b910886bf7f1b6bd36b722d89df9b610609", size = 4515 }, -] - -[[package]] -name = "latch-sdk-gql" -version = "0.0.6" +name = "latch-persistence" +version = "0.1.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "gql" }, - { name = "graphql-core" }, - { name = "latch-sdk-config" }, - { name = "requests-toolbelt" }, + { name = "requests" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/16/c3/bc3081ae616acae3ec255c803bae774b41229e78bbac57e0c25908f3d90b/latch-sdk-gql-0.0.6.tar.gz", hash = "sha256:d945c4da710e664751abbb0bae333c7df14f12d347ca4ba55fd5e062ac472800", size = 3241 } +sdist = { url = "https://files.pythonhosted.org/packages/8e/e0/a6f8427a9b1e6d9bce808b6ad7acfc98e67f08d5f6e9b4311090a2df1c1a/latch_persistence-0.1.0.tar.gz", hash = "sha256:527b01a5986871cb954bb69fd1d5134402304b7529dedf7096ac97ab3dd6e827", size = 9380 } wheels = [ - { url = "https://files.pythonhosted.org/packages/ae/4c/d30b24eeda4733d5b22bd82cb26a22e072005e503a2339cb6776fdf791d4/latch_sdk_gql-0.0.6-py3-none-any.whl", hash = "sha256:04e5348491c7eb748508f4e8bca6ff4a6baba3e955ff3bcc920ab0fdada5e31c", size = 4034 }, + { url = "https://files.pythonhosted.org/packages/73/92/d1669cbaa7184dc73523e6874e4f2ab79fb7104a621597e9fdff82db35b2/latch_persistence-0.1.0-py3-none-any.whl", hash = "sha256:ab6fff7fe7c48f960caf55474810b465803f920eb6245c80c73ec8e3dd333046", size = 5371 }, ] [[package]] @@ -1061,7 +1128,7 @@ wheels = [ [[package]] name = "lytekit" -version = "0.15.13" +version = "0.15.14" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "click" }, @@ -1078,6 +1145,7 @@ dependencies = [ { name = "grpcio-status" }, { name = "jsonschema" }, { name = "keyring" }, + { name = "latch-persistence" }, { name = "lyteidl" }, { name = "marshmallow-enum" }, { name = "marshmallow-jsonschema" }, @@ -1097,9 +1165,9 @@ dependencies = [ { name = "wheel" }, { name = "wrapt" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/0a/54/20266d5b9bfe6fa56fe5a0366f8bb550e18e4ecb635ed8e7b27774a297f9/lytekit-0.15.13.tar.gz", hash = "sha256:147449e255d3bbc13d6f7a39f32f18c891295f38f9bc7e0f9754f546c8aa3a69", size = 295586 } +sdist = { url = "https://files.pythonhosted.org/packages/6a/15/c205b4adfcfe65fd01b21365d7f02f9c8e25e6a8554a26419f30275ad374/lytekit-0.15.14.tar.gz", hash = "sha256:92c7662a3cb8d38fe670397183ae517d2b2e175ad43c3949bbc3d1363b604046", size = 292268 } wheels = [ - { url = "https://files.pythonhosted.org/packages/ea/3f/d9bbd2b97eddeae3e7a86abde1f7444cca9b6a578a71f6fa8a3a53c791f6/lytekit-0.15.13-py3-none-any.whl", hash = "sha256:4565a4df2a3cad2aadc02f325923eddf55672921884a839ce7cfeb10ba4cec38", size = 389398 }, + { url = "https://files.pythonhosted.org/packages/d0/13/a90708989e90b274f3d1ac13425ad6d7de3ab77d58cbe1c6ae1d00c878d5/lytekit-0.15.14-py3-none-any.whl", hash = "sha256:9e207b532df63d644c7d17a878337cb160b190b7322de9a7407857a1b104e185", size = 385453 }, ] [[package]] @@ -1383,6 +1451,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ef/82/7a9d0550484a62c6da82858ee9419f3dd1ccc9aa1c26a1e43da3ecd20b0d/natsort-8.4.0-py3-none-any.whl", hash = "sha256:4732914fb471f56b5cce04d7bae6f164a592c7712e1c85f9ef585e197299521c", size = 38268 }, ] +[[package]] +name = "nbformat" +version = "5.10.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "fastjsonschema" }, + { name = "jsonschema" }, + { name = "jupyter-core" }, + { name = "traitlets" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/6d/fd/91545e604bc3dad7dca9ed03284086039b294c6b3d75c0d2fa45f9e9caf3/nbformat-5.10.4.tar.gz", hash = "sha256:322168b14f937a5d11362988ecac2a4952d3d8e3a2cbeb2319584631226d5b3a", size = 142749 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a9/82/0340caa499416c78e5d8f5f05947ae4bc3cba53c9f038ab6e9ed964e22f1/nbformat-5.10.4-py3-none-any.whl", hash = "sha256:3b48d6c8fbca4b299bf3982ea7db1af21580e4fec269ad087b9e81588891200b", size = 78454 }, +] + [[package]] name = "numpy" version = "2.0.2" @@ -1522,6 +1605,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1f/66/14b2c030fcce69cba482d205c2d1462ca5c77303a263260dcb1192801c85/paramiko-3.5.0-py3-none-any.whl", hash = "sha256:1fedf06b085359051cd7d0d270cebe19e755a8a921cc2ddbfa647fb0cd7d68f9", size = 227143 }, ] +[[package]] +name = "plac" +version = "1.4.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/9b/79/1edb4c836c69306d0ecb0865f46d62ea7e28ef16b3f95bb394e4f2a46330/plac-1.4.3.tar.gz", hash = "sha256:d4cb3387b2113a28aebd509433d0264a4e5d9bb7c1a86db4fbd0a8f11af74eb3", size = 38984 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8f/af/4c61d2ac0d589719f548f5a1ba919738e44bac7b0c723ce147de5556d233/plac-1.4.3-py2.py3-none-any.whl", hash = "sha256:8a84fde8f950c9de6588a2d53c9deeac3ba1ddb456d887a33228460cf6549750", size = 22458 }, +] + +[[package]] +name = "platformdirs" +version = "4.3.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/13/fc/128cc9cb8f03208bdbf93d3aa862e16d376844a14f9a0ce5cf4507372de4/platformdirs-4.3.6.tar.gz", hash = "sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907", size = 21302 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3c/a6/bc1012356d8ece4d66dd75c4b9fc6c1f6650ddd5991e421177d9f8f671be/platformdirs-4.3.6-py3-none-any.whl", hash = "sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb", size = 18439 }, +] + [[package]] name = "pluggy" version = "1.5.0" @@ -1649,6 +1750,30 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7d/7f/d8f8d81a921f07e703cabf8a0b5bb6cbc26e3bce7614db905c3c7637315a/protoc_gen_swagger-0.1.0-py2.py3-none-any.whl", hash = "sha256:cdc043da538865f055a7f22b304a35085cef269dc33e2f3408b12d397e8d8b4b", size = 9443 }, ] +[[package]] +name = "psutil" +version = "6.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/26/10/2a30b13c61e7cf937f4adf90710776b7918ed0a9c434e2c38224732af310/psutil-6.1.0.tar.gz", hash = "sha256:353815f59a7f64cdaca1c0307ee13558a0512f6db064e92fe833784f08539c7a", size = 508565 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/01/9e/8be43078a171381953cfee33c07c0d628594b5dbfc5157847b85022c2c1b/psutil-6.1.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:6e2dcd475ce8b80522e51d923d10c7871e45f20918e027ab682f94f1c6351688", size = 247762 }, + { url = "https://files.pythonhosted.org/packages/1d/cb/313e80644ea407f04f6602a9e23096540d9dc1878755f3952ea8d3d104be/psutil-6.1.0-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:0895b8414afafc526712c498bd9de2b063deaac4021a3b3c34566283464aff8e", size = 248777 }, + { url = "https://files.pythonhosted.org/packages/65/8e/bcbe2025c587b5d703369b6a75b65d41d1367553da6e3f788aff91eaf5bd/psutil-6.1.0-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9dcbfce5d89f1d1f2546a2090f4fcf87c7f669d1d90aacb7d7582addece9fb38", size = 284259 }, + { url = "https://files.pythonhosted.org/packages/58/4d/8245e6f76a93c98aab285a43ea71ff1b171bcd90c9d238bf81f7021fb233/psutil-6.1.0-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:498c6979f9c6637ebc3a73b3f87f9eb1ec24e1ce53a7c5173b8508981614a90b", size = 287255 }, + { url = "https://files.pythonhosted.org/packages/27/c2/d034856ac47e3b3cdfa9720d0e113902e615f4190d5d1bdb8df4b2015fb2/psutil-6.1.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d905186d647b16755a800e7263d43df08b790d709d575105d419f8b6ef65423a", size = 288804 }, + { url = "https://files.pythonhosted.org/packages/ea/55/5389ed243c878725feffc0d6a3bc5ef6764312b6fc7c081faaa2cfa7ef37/psutil-6.1.0-cp37-abi3-win32.whl", hash = "sha256:1ad45a1f5d0b608253b11508f80940985d1d0c8f6111b5cb637533a0e6ddc13e", size = 250386 }, + { url = "https://files.pythonhosted.org/packages/11/91/87fa6f060e649b1e1a7b19a4f5869709fbf750b7c8c262ee776ec32f3028/psutil-6.1.0-cp37-abi3-win_amd64.whl", hash = "sha256:a8fb3752b491d246034fa4d279ff076501588ce8cbcdbb62c32fd7a377d996be", size = 254228 }, +] + +[[package]] +name = "pulp" +version = "2.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/59/41/44d617a67407ea5db026500025b8aa7cad0b2b52621c04991b248c3b383d/PuLP-2.7.0.tar.gz", hash = "sha256:e73ee6b32d639c9b8cf4b4aded334ba158be5f8313544e056f796ace0a10ae63", size = 1400315 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a6/60/b91acaa7995bfcd72f1739ea2b0f5cda707329e17f0b7f921fd8acc79889/PuLP-2.7.0-py3-none-any.whl", hash = "sha256:b6de42c929e80325bf44cc7a2997f02535440800c376b9eb8cb7b4670ed53769", size = 14251631 }, +] + [[package]] name = "py" version = "1.11.0" @@ -1744,6 +1869,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5e/22/d3db169895faaf3e2eda892f005f433a62db2decbcfbc2f61e6517adfa87/PyNaCl-1.5.0-cp36-abi3-win_amd64.whl", hash = "sha256:20f42270d27e1b6a29f54032090b972d97f0a1b0948cc52392041ef7831fee93", size = 212141 }, ] +[[package]] +name = "pyreadline3" +version = "3.5.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0f/49/4cea918a08f02817aabae639e3d0ac046fef9f9180518a3ad394e22da148/pyreadline3-3.5.4.tar.gz", hash = "sha256:8d57d53039a1c75adba8e50dd3d992b28143480816187ea5efbd5c78e6c885b7", size = 99839 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5a/dc/491b7661614ab97483abf2056be1deee4dc2490ecbf7bff9ab5cdbac86e1/pyreadline3-3.5.4-py3-none-any.whl", hash = "sha256:eaf8e6cc3c49bcccf145fc6067ba8643d1df34d604a1ec0eccbf7a18e6d3fae6", size = 83178 }, +] + [[package]] name = "pytest" version = "8.3.3" @@ -2033,6 +2167,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/05/d3/bf87a36bff1cb88fd30a509fd366c70ec30676517ee791b2f77e0e29817a/requests_toolbelt-0.10.1-py2.py3-none-any.whl", hash = "sha256:18565aa58116d9951ac39baa288d3adb5b3ff975c4f25eee78555d89e8f247f7", size = 54525 }, ] +[[package]] +name = "reretry" +version = "0.11.8" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/40/1d/25d562a62b7471616bccd7c15a7533062eb383927e68667bf331db990415/reretry-0.11.8.tar.gz", hash = "sha256:f2791fcebe512ea2f1d153a2874778523a8064860b591cd90afc21a8bed432e3", size = 4836 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/66/11/e295e07d4ae500144177f875a8de11daa4d86b8246ab41c76a98ce9280ca/reretry-0.11.8-py2.py3-none-any.whl", hash = "sha256:5ec1084cd9644271ee386d34cd5dd24bdb3e91d55961b076d1a31d585ad68a79", size = 5609 }, +] + [[package]] name = "responses" version = "0.25.3" @@ -2263,6 +2406,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d9/5a/e7c31adbe875f2abbb91bd84cf2dc52d792b5a01506781dbcf25c91daf11/six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254", size = 11053 }, ] +[[package]] +name = "smart-open" +version = "7.0.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "wrapt" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a3/d8/1481294b2d110b805c0f5d23ef34158b7d5d4283633c0d34c69ea89bb76b/smart_open-7.0.5.tar.gz", hash = "sha256:d3672003b1dbc85e2013e4983b88eb9a5ccfd389b0d4e5015f39a9ee5620ec18", size = 71693 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/06/bc/706838af28a542458bffe74a5d0772ca7f207b5495cd9fccfce61ef71f2a/smart_open-7.0.5-py3-none-any.whl", hash = "sha256:8523ed805c12dff3eaa50e9c903a6cb0ae78800626631c5fe7ea073439847b89", size = 61387 }, +] + [[package]] name = "smmap" version = "5.0.1" @@ -2272,6 +2427,40 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a7/a5/10f97f73544edcdef54409f1d839f6049a0d79df68adbc1ceb24d1aaca42/smmap-5.0.1-py3-none-any.whl", hash = "sha256:e6d8668fa5f93e706934a62d7b4db19c8d9eb8cf2adbb75ef1b675aa332b69da", size = 24282 }, ] +[[package]] +name = "snakemake" +version = "7.30.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "appdirs" }, + { name = "configargparse" }, + { name = "connection-pool" }, + { name = "datrie" }, + { name = "docutils" }, + { name = "gitpython" }, + { name = "humanfriendly" }, + { name = "jinja2" }, + { name = "jsonschema" }, + { name = "nbformat" }, + { name = "packaging" }, + { name = "psutil" }, + { name = "pulp" }, + { name = "pyyaml" }, + { name = "requests" }, + { name = "reretry" }, + { name = "smart-open" }, + { name = "stopit" }, + { name = "tabulate" }, + { name = "throttler" }, + { name = "toposort" }, + { name = "wrapt" }, + { name = "yte" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ef/d7/8173e012d95dc7ea0fae8128e90240b65ac5e24a91a59ef812076a46ac68/snakemake-7.30.1.tar.gz", hash = "sha256:0e907ae6ea18a7e7c8b9f08976ee1874da66f79cbd4ad1b25cc7e7b9a8670f80", size = 365081 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9e/5e/1b6ccd64a7bb6205beb3e5fdd996eabdf6b6cb153007f06203ba2ac78c91/snakemake-7.30.1-py3-none-any.whl", hash = "sha256:559e4c544a90eb34e79c211afaa70c7c373be00690af885e7c12a5f4f7a70f17", size = 377156 }, +] + [[package]] name = "sniffio" version = "1.3.1" @@ -2451,6 +2640,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/47/33/c824f799128dfcfce2142f18d9bc6c55c46a939f6e4250639134222d99eb/statsd-3.3.0-py2.py3-none-any.whl", hash = "sha256:c610fb80347fca0ef62666d241bce64184bd7cc1efe582f9690e045c25535eaa", size = 11990 }, ] +[[package]] +name = "stopit" +version = "1.1.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/35/58/e8bb0b0fb05baf07bbac1450c447d753da65f9701f551dca79823ce15d50/stopit-1.1.2.tar.gz", hash = "sha256:f7f39c583fd92027bd9d06127b259aee7a5b7945c1f1fa56263811e1e766996d", size = 18281 } + +[[package]] +name = "tabulate" +version = "0.9.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ec/fe/802052aecb21e3797b8f7902564ab6ea0d60ff8ca23952079064155d1ae1/tabulate-0.9.0.tar.gz", hash = "sha256:0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c", size = 81090 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/40/44/4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854/tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f", size = 35252 }, +] + [[package]] name = "text-unidecode" version = "1.3" @@ -2460,6 +2664,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a6/a5/c0b6468d3824fe3fde30dbb5e1f687b291608f9473681bbf7dabbf5a87d7/text_unidecode-1.3-py2.py3-none-any.whl", hash = "sha256:1311f10e8b895935241623731c2ba64f4c455287888b18189350b67134a822e8", size = 78154 }, ] +[[package]] +name = "throttler" +version = "1.2.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b4/22/638451122136d5280bc477c8075ea448b9ebdfbd319f0f120edaecea2038/throttler-1.2.2.tar.gz", hash = "sha256:d54db406d98e1b54d18a9ba2b31ab9f093ac64a0a59d730c1cf7bb1cdfc94a58", size = 7970 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/df/d4/36bf6010b184286000b2334622bfb3446a40c22c1d2a9776bff025cb0fe5/throttler-1.2.2-py3-none-any.whl", hash = "sha256:fc6ae612a2529e01110b32335af40375258b98e3b81232ec77cd07f51bf71392", size = 7609 }, +] + [[package]] name = "tomli" version = "2.0.2" @@ -2469,6 +2682,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cf/db/ce8eda256fa131af12e0a76d481711abe4681b6923c27efb9a255c9e4594/tomli-2.0.2-py3-none-any.whl", hash = "sha256:2ebe24485c53d303f690b0ec092806a085f07af5a5aa1464f3931eec36caaa38", size = 13237 }, ] +[[package]] +name = "toposort" +version = "1.10" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/69/19/8e955d90985ecbd3b9adb2a759753a6840da2dff3c569d412b2c9217678b/toposort-1.10.tar.gz", hash = "sha256:bfbb479c53d0a696ea7402601f4e693c97b0367837c8898bc6471adfca37a6bd", size = 11132 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f6/17/57b444fd314d5e1593350b9a31d000e7411ba8e17ce12dc7ad54ca76b810/toposort-1.10-py3-none-any.whl", hash = "sha256:cbdbc0d0bee4d2695ab2ceec97fe0679e9c10eab4b2a87a9372b929e70563a87", size = 8500 }, +] + [[package]] name = "tqdm" version = "4.66.6" @@ -2481,6 +2703,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/41/73/02342de9c2d20922115f787e101527b831c0cffd2105c946c4a4826bcfd4/tqdm-4.66.6-py3-none-any.whl", hash = "sha256:223e8b5359c2efc4b30555531f09e9f2f3589bcd7fdd389271191031b49b7a63", size = 78326 }, ] +[[package]] +name = "traitlets" +version = "5.14.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/eb/79/72064e6a701c2183016abbbfedaba506d81e30e232a68c9f0d6f6fcd1574/traitlets-5.14.3.tar.gz", hash = "sha256:9ed0579d3502c94b4b3732ac120375cda96f923114522847de4b3bb98b96b6b7", size = 161621 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/00/c0/8f5d070730d7836adc9c9b6408dec68c6ced86b304a9b26a14df072a6e8c/traitlets-5.14.3-py3-none-any.whl", hash = "sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f", size = 85359 }, +] + [[package]] name = "types-python-dateutil" version = "2.9.0.20241003" @@ -2492,11 +2723,11 @@ wheels = [ [[package]] name = "typing-extensions" -version = "4.7.1" +version = "4.12.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/3c/8b/0111dd7d6c1478bf83baa1cab85c686426c7a6274119aceb2bd9d35395ad/typing_extensions-4.7.1.tar.gz", hash = "sha256:b75ddc264f0ba5615db7ba217daeb99701ad295353c45f9e95963337ceeeffb2", size = 72876 } +sdist = { url = "https://files.pythonhosted.org/packages/df/db/f35a00659bc03fec321ba8bce9420de607a1d37f8342eee1863174c69557/typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8", size = 85321 } wheels = [ - { url = "https://files.pythonhosted.org/packages/ec/6b/63cc3df74987c36fe26157ee12e09e8f9db4de771e0f3404263117e75b95/typing_extensions-4.7.1-py3-none-any.whl", hash = "sha256:440d5dd3af93b060174bf433bccd69b0babc3b15b1a8dca43789fd7f61514b36", size = 33232 }, + { url = "https://files.pythonhosted.org/packages/26/9f/ad63fc0248c5379346306f8668cda6e2e2e9c95e01216d2b8ffd9ff037d0/typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d", size = 37438 }, ] [[package]] @@ -2778,6 +3009,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/52/ad/1fe7ff5f3e8869d4c5070f47b96bac2b4d15e67c100a8278d8e7876329fc/yarl-1.17.1-py3-none-any.whl", hash = "sha256:f1790a4b1e8e8e028c391175433b9c8122c39b46e1663228158e61e6f915bf06", size = 44352 }, ] +[[package]] +name = "yte" +version = "1.5.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "dpath" }, + { name = "plac" }, + { name = "pyyaml" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/58/4b/3f89f96417e4e39c3f3e3f4a17d6233e81dc1e5cd5b5ed0a2498faedf690/yte-1.5.4.tar.gz", hash = "sha256:d2d77e53eafca74f58234fcd3fea28cc0a719e4f3784911511e35e86594bc880", size = 6352 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/15/64/97df1886abf11291e9a18b1672b2b79eb940499263c85339a1645d870600/yte-1.5.4-py3-none-any.whl", hash = "sha256:14ccfcb57d60b7652041b606129851423805140b22f52f5152f7c2692cd7b905", size = 7655 }, +] + [[package]] name = "zipp" version = "3.20.2" From 5ee630d03ef571375e9ff6ecbc37ab966e49bbac Mon Sep 17 00:00:00 2001 From: Ayush Kamat Date: Mon, 16 Dec 2024 10:04:16 -0800 Subject: [PATCH 02/18] stuff Signed-off-by: Ayush Kamat --- pyproject.toml | 6 ++- src/latch_cli/docker_utils/__init__.py | 30 ++----------- uv.lock | 61 +++++--------------------- 3 files changed, 19 insertions(+), 78 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 86415afbe..b16d7f0d5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -73,7 +73,11 @@ classifiers = [ [project.optional-dependencies] pandas = ["pandas>=2.0.0"] -snakemake = ["snakemake>=7.18.0,<7.30.2", "pulp>=2.0,<2.8"] +snakemake = [ + "snakemake", + "snakemake-storage-plugin-latch", + "snakemake-executor-plugin-latch", +] [project.scripts] latch = "latch_cli.main:main" diff --git a/src/latch_cli/docker_utils/__init__.py b/src/latch_cli/docker_utils/__init__.py index 146b198b8..1bc3b73a3 100644 --- a/src/latch_cli/docker_utils/__init__.py +++ b/src/latch_cli/docker_utils/__init__.py @@ -290,10 +290,7 @@ def infer_env_commands(self): return click.echo( - " ".join([ - click.style(f"{self.direnv.name}:", bold=True), - "Environment variable setup", - ]) + " ".join([click.style(f"{self.direnv.name}:", bold=True), "Environment variable setup"]) ) envs: list[str] = [] for line in self.direnv.read_text().splitlines(): @@ -323,15 +320,6 @@ def infer_dependencies(self): def get_copy_file_commands(self): cmd = ["copy . /root/"] - if self.wf_type == WorkflowType.snakemake: - cmd.extend([ - "", - "# Latch snakemake workflow entrypoint", - "# DO NOT CHANGE", - "", - "copy .latch/snakemake_jit_entrypoint.py /root/snakemake_jit_entrypoint.py", - ]) - self.commands.append( DockerCmdBlock( comment="Copy workflow data (use .dockerignore to skip files)", @@ -347,20 +335,13 @@ def generate(self, *, dest: Optional[Path] = None, overwrite: bool = False): if ( dest.exists() and not overwrite - and not ( - click.confirm(f"Dockerfile already exists at `{dest}`. Overwrite?") - ) + and not (click.confirm(f"Dockerfile already exists at `{dest}`. Overwrite?")) ): return click.secho("Generating Dockerfile", bold=True) - click.echo( - " ".join([ - click.style("Base image:", fg="bright_blue"), - self.config.base_image, - ]) - ) + click.echo(" ".join([click.style("Base image:", fg="bright_blue"), self.config.base_image])) click.echo( " ".join([ click.style("Latch SDK version:", fg="bright_blue"), @@ -400,10 +381,7 @@ def generate_dockerignore( dest = Path(pkg_root) / ".dockerignore" if dest.exists(): if dest.is_dir(): - click.secho( - f".dockerignore already exists at `{dest}` and is a directory.", - fg="red", - ) + click.secho(f".dockerignore already exists at `{dest}` and is a directory.", fg="red") raise click.exceptions.Exit(1) if not overwrite and not click.confirm( diff --git a/uv.lock b/uv.lock index 05fe665e3..29fbf1f01 100644 --- a/uv.lock +++ b/uv.lock @@ -717,16 +717,17 @@ wheels = [ [[package]] name = "gql" -version = "3.4.0" +version = "3.5.0" source = { registry = "https://pypi.org/simple" } dependencies = [ + { name = "anyio" }, { name = "backoff" }, { name = "graphql-core" }, { name = "yarl" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/f6/1e/e7f0231f05127466a1fc63c4a9e69a95676f4729a80f1c3b5106e5c42cf9/gql-3.4.0.tar.gz", hash = "sha256:ca81aa8314fa88a8c57dd1ce34941278e0c352d762eb721edcba0387829ea7c0", size = 151663 } +sdist = { url = "https://files.pythonhosted.org/packages/3d/85/feda24b33adcc6c8463a62a8e2ca2cc3425dc6d687388ff728ceae231204/gql-3.5.0.tar.gz", hash = "sha256:ccb9c5db543682b28f577069950488218ed65d4ac70bb03b6929aaadaf636de9", size = 179939 } wheels = [ - { url = "https://files.pythonhosted.org/packages/1c/08/28d4a370e7f63a679ab754804af39c0c0e3959621f90cb51a43e591ba0ea/gql-3.4.0-py2.py3-none-any.whl", hash = "sha256:59c8a0b8f0a2f3b0b2ff970c94de86f82f65cb1da3340bfe57143e5f7ea82f71", size = 65151 }, + { url = "https://files.pythonhosted.org/packages/74/fb/01a200e1c31b79690427c8e983014e4220d2652b4372a46fe4598e1d7a8e/gql-3.5.0-py2.py3-none-any.whl", hash = "sha256:70dda5694a5b194a8441f077aa5fb70cc94e4ec08016117523f013680901ecb7", size = 74001 }, ] [[package]] @@ -1005,13 +1006,7 @@ wheels = [ [[package]] name = "latch" -<<<<<<< HEAD -version = "2.54.0a4" -||||||| 2c739ac -version = "2.54.0" -======= -version = "2.54.10" ->>>>>>> f29f295791653c218ebbec7f584fe4c142a356b9 +version = "2.54.0a8" source = { editable = "." } dependencies = [ { name = "aioconsole" }, @@ -1073,18 +1068,10 @@ requires-dist = [ { name = "click", specifier = ">=8.0" }, { name = "docker", specifier = ">=7.1.0" }, { name = "gitpython", specifier = "==3.1.40" }, - { name = "gql", specifier = "==3.4.0" }, + { name = "gql", specifier = ">=3.5.0,<4.0.0" }, { name = "graphql-core", specifier = "==3.2.3" }, { name = "kubernetes", specifier = ">=24.2.0" }, -<<<<<<< HEAD { name = "lytekit", specifier = "==0.15.14" }, -||||||| 2c739ac - { name = "latch-sdk-config", specifier = "==0.0.4" }, - { name = "latch-sdk-gql", specifier = "==0.0.6" }, - { name = "lytekit", specifier = "==0.15.13" }, -======= - { name = "lytekit", specifier = "==0.15.13" }, ->>>>>>> f29f295791653c218ebbec7f584fe4c142a356b9 { name = "lytekitplugins-pods", specifier = "==0.6.2" }, { name = "orjson", specifier = ">=3.10.12" }, { name = "pandas", marker = "extra == 'pandas'", specifier = ">=2.0.0" }, @@ -1094,7 +1081,7 @@ requires-dist = [ { name = "python-dateutil", specifier = ">=2.8" }, { name = "pyxattr", specifier = ">=0.8.1" }, { name = "requests", specifier = ">=2.28.1" }, - { name = "requests-toolbelt", specifier = "==0.10.1" }, + { name = "requests-toolbelt", specifier = ">=1.0.0,<2.0.0" }, { name = "scp", specifier = ">=0.14.0" }, { name = "setuptools", specifier = ">=75.3.0" }, { name = "snakemake", marker = "extra == 'snakemake'", specifier = ">=7.18.0,<7.30.2" }, @@ -1119,7 +1106,6 @@ docs = [ ] [[package]] -<<<<<<< HEAD name = "latch-persistence" version = "0.1.0" source = { registry = "https://pypi.org/simple" } @@ -1132,33 +1118,6 @@ wheels = [ ] [[package]] -||||||| 2c739ac -name = "latch-sdk-config" -version = "0.0.4" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e7/ae/3597280830ca310569554daeee252cd597894407986529fb44f00fb46646/latch-sdk-config-0.0.4.tar.gz", hash = "sha256:72bfec47364abce7f6096d794fe3ebed29c4cb1aafb60adc4438fca15ddea15d", size = 3758 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/13/e4/72725cedb6770af45bfae52804b37b4b1db6aed687137a7f3a1de5427514/latch_sdk_config-0.0.4-py3-none-any.whl", hash = "sha256:3f4d9dd7f5d7cda65ab96557a0792b910886bf7f1b6bd36b722d89df9b610609", size = 4515 }, -] - -[[package]] -name = "latch-sdk-gql" -version = "0.0.6" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "gql" }, - { name = "graphql-core" }, - { name = "latch-sdk-config" }, - { name = "requests-toolbelt" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/16/c3/bc3081ae616acae3ec255c803bae774b41229e78bbac57e0c25908f3d90b/latch-sdk-gql-0.0.6.tar.gz", hash = "sha256:d945c4da710e664751abbb0bae333c7df14f12d347ca4ba55fd5e062ac472800", size = 3241 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ae/4c/d30b24eeda4733d5b22bd82cb26a22e072005e503a2339cb6776fdf791d4/latch_sdk_gql-0.0.6-py3-none-any.whl", hash = "sha256:04e5348491c7eb748508f4e8bca6ff4a6baba3e955ff3bcc920ab0fdada5e31c", size = 4034 }, -] - -[[package]] -======= ->>>>>>> f29f295791653c218ebbec7f584fe4c142a356b9 name = "lyteidl" version = "0.2.1" source = { registry = "https://pypi.org/simple" } @@ -2278,14 +2237,14 @@ wheels = [ [[package]] name = "requests-toolbelt" -version = "0.10.1" +version = "1.0.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "requests" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/0c/4c/07f01c6ac44f7784fa399137fbc8d0cdc1b5d35304e8c0f278ad82105b58/requests-toolbelt-0.10.1.tar.gz", hash = "sha256:62e09f7ff5ccbda92772a29f394a49c3ad6cb181d568b1337626b2abb628a63d", size = 208956 } +sdist = { url = "https://files.pythonhosted.org/packages/f3/61/d7545dafb7ac2230c70d38d31cbfe4cc64f7144dc41f6e4e4b78ecd9f5bb/requests-toolbelt-1.0.0.tar.gz", hash = "sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6", size = 206888 } wheels = [ - { url = "https://files.pythonhosted.org/packages/05/d3/bf87a36bff1cb88fd30a509fd366c70ec30676517ee791b2f77e0e29817a/requests_toolbelt-0.10.1-py2.py3-none-any.whl", hash = "sha256:18565aa58116d9951ac39baa288d3adb5b3ff975c4f25eee78555d89e8f247f7", size = 54525 }, + { url = "https://files.pythonhosted.org/packages/3f/51/d4db610ef29373b879047326cbf6fa98b6c1969d6f6dc423279de2b1be2c/requests_toolbelt-1.0.0-py2.py3-none-any.whl", hash = "sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06", size = 54481 }, ] [[package]] From ce4421a576204161b1566e3dee13e23646fc4c24 Mon Sep 17 00:00:00 2001 From: Ayush Kamat Date: Wed, 18 Dec 2024 10:46:01 -0800 Subject: [PATCH 03/18] stuff Signed-off-by: Ayush Kamat --- pyproject.toml | 4 +-- src/latch/types/directory.py | 41 ++++++------------------ src/latch/types/file.py | 31 ++++-------------- src/latch/types/metadata/snakemake_v2.py | 18 ++++------- src/latch_cli/snakemake/v2/utils.py | 12 +++++-- 5 files changed, 33 insertions(+), 73 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index b16d7f0d5..980140067 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ include = ["src/**/*.py", "src/latch_cli/services/init/*"] [project] name = "latch" -version = "2.54.0.a8" +version = "2.54.0.a9" description = "The Latch SDK" authors = [{ name = "Kenny Workman", email = "kenny@latch.bio" }] maintainers = [ @@ -34,7 +34,7 @@ dependencies = [ "scp>=0.14.0", "boto3>=1.26.0", "tqdm>=4.63.0", - "lytekit==0.15.14", + "lytekit==0.15.15", "lytekitplugins-pods==0.6.2", "typing-extensions>=4.12.0", "apscheduler>=3.10.0", diff --git a/src/latch/types/directory.py b/src/latch/types/directory.py index 27088f1ea..ea10104fe 100644 --- a/src/latch/types/directory.py +++ b/src/latch/types/directory.py @@ -9,11 +9,7 @@ from flytekit.core.type_engine import TypeEngine, TypeTransformer from flytekit.exceptions.user import FlyteUserException from flytekit.models.literals import Literal -from flytekit.types.directory.types import ( - FlyteDirectory, - FlyteDirToMultipartBlobTransformer, -) -from latch_sdk_gql.execute import execute +from flytekit.types.directory.types import FlyteDirectory, FlyteDirToMultipartBlobTransformer from typing_extensions import Annotated from latch.ldata.path import LPath @@ -21,6 +17,7 @@ from latch.types.utils import format_path, is_valid_url from latch_cli.utils import urljoins from latch_cli.utils.path import normalize_path +from latch_sdk_gql.execute import execute class IterdirChild(TypedDict): @@ -93,10 +90,7 @@ def task(dir: LatchFile): """ def __init__( - self, - path: Union[str, PathLike], - remote_path: Optional[PathLike] = None, - **kwargs, + self, path: Union[str, PathLike], remote_path: Optional[PathLike] = None, **kwargs ): if path is None: raise ValueError("Unable to instantiate LatchDir with None") @@ -136,9 +130,7 @@ def downloader(): self._idempotent_set_path() return ctx.file_access.get_data( - self._remote_directory, - self.path, - is_multipart=True, + self._remote_directory, self.path, is_multipart=True ) super().__init__(self.path, downloader, self._remote_directory) @@ -252,10 +244,7 @@ def __repr__(self): if self.remote_path is None: return f"LatchDir({repr(format_path(self.local_path))})" - return ( - f"LatchDir({repr(self.path)}," - f" remote_path={repr( format_path(self.remote_path))})" - ) + return f"LatchDir({repr(self.path)}, remote_path={repr(format_path(self.remote_path))})" def __str__(self): if self.remote_path is None: @@ -264,12 +253,7 @@ def __str__(self): return f"LatchDir({format_path(self.remote_path)})" -LatchOutputDir = Annotated[ - LatchDir, - FlyteAnnotation( - {"output": True}, - ), -] +LatchOutputDir = Annotated[LatchDir, FlyteAnnotation({"output": True})] """A LatchDir tagged as the output of some workflow. The Latch Console uses this metadata to avoid checking for existence of the @@ -284,24 +268,17 @@ def __init__(self): TypeTransformer.__init__(self, name="LatchDirPath", t=LatchDir) def to_python_value( - self, - ctx: FlyteContext, - lv: Literal, - expected_python_type: Union[Type[LatchDir], PathLike], + self, ctx: FlyteContext, lv: Literal, expected_python_type: Union[Type[LatchDir], PathLike] ) -> FlyteDirectory: uri = lv.scalar.blob.uri if expected_python_type is PathLike: - raise TypeError( - "Casting from Pathlike to LatchDir is currently not supported." - ) + raise TypeError("Casting from Pathlike to LatchDir is currently not supported.") while get_origin(expected_python_type) == Annotated: expected_python_type = get_args(expected_python_type)[0] if not issubclass(expected_python_type, LatchDir): - raise TypeError( - f"Neither os.PathLike nor LatchDir specified {expected_python_type}" - ) + raise TypeError(f"Neither os.PathLike nor LatchDir specified {expected_python_type}") # This is a local file path, like /usr/local/my_file, don't mess with it. Certainly, downloading it doesn't # make any sense. diff --git a/src/latch/types/file.py b/src/latch/types/file.py index 15ded3526..5f2e41ac5 100644 --- a/src/latch/types/file.py +++ b/src/latch/types/file.py @@ -11,12 +11,12 @@ from flytekit.core.type_engine import TypeEngine, TypeTransformer from flytekit.models.literals import Literal from flytekit.types.file.file import FlyteFile, FlyteFilePathTransformer -from latch_sdk_gql.execute import execute from typing_extensions import Annotated from latch.ldata.path import LPath from latch.types.utils import format_path, is_absolute_node_path, is_valid_url from latch_cli.utils.path import normalize_path +from latch_sdk_gql.execute import execute class LatchFile(FlyteFile): @@ -115,9 +115,7 @@ def downloader(): self._idempotent_set_path(local_path_hint) return ctx.file_access.get_data( - self._remote_path, - self.path, - is_multipart=False, + self._remote_path, self.path, is_multipart=False ) super().__init__(self.path, downloader, self._remote_path) @@ -162,10 +160,7 @@ def __repr__(self): if self.remote_path is None: return f"LatchFile({repr(format_path(self.local_path))})" - return ( - f"LatchFile({repr(self.path)}," - f" remote_path={repr(format_path(self.remote_path))})" - ) + return f"LatchFile({repr(self.path)}, remote_path={repr(format_path(self.remote_path))})" def __str__(self): if self.remote_path is None: @@ -173,12 +168,7 @@ def __str__(self): return f"LatchFile({format_path(self.remote_path)})" -LatchOutputFile = Annotated[ - LatchFile, - FlyteAnnotation( - {"output": True}, - ), -] +LatchOutputFile = Annotated[LatchFile, FlyteAnnotation({"output": True})] """A LatchFile tagged as the output of some workflow. The Latch Console uses this metadata to avoid checking for existence of the @@ -193,21 +183,14 @@ def __init__(self): TypeTransformer.__init__(self, name="LatchFilePath", t=LatchFile) def to_python_value( - self, - ctx: FlyteContext, - lv: Literal, - expected_python_type: Union[Type[LatchFile], PathLike], + self, ctx: FlyteContext, lv: Literal, expected_python_type: Union[Type[LatchFile], PathLike] ) -> LatchFile: uri = lv.scalar.blob.uri if expected_python_type is PathLike: - raise TypeError( - "Casting from Pathlike to LatchFile is currently not supported." - ) + raise TypeError("Casting from Pathlike to LatchFile is currently not supported.") if not issubclass(expected_python_type, LatchFile): - raise TypeError( - f"Neither os.PathLike nor LatchFile specified {expected_python_type}" - ) + raise TypeError(f"Neither os.PathLike nor LatchFile specified {expected_python_type}") # This is a local file path, like /usr/local/my_file, don't mess with it. Certainly, downloading it doesn't # make any sense. diff --git a/src/latch/types/metadata/snakemake_v2.py b/src/latch/types/metadata/snakemake_v2.py index bf3663057..7cc300d8d 100644 --- a/src/latch/types/metadata/snakemake_v2.py +++ b/src/latch/types/metadata/snakemake_v2.py @@ -18,15 +18,15 @@ class SnakemakeRuntimeResources: """Resources for Snakemake runtime tasks""" - cpus: int = 4 + cpus: int = 1 """ Number of CPUs required for the task """ - memory: int = 8 + memory: int = 2 """ Memory required for the task in GiB """ - storage_gib: int = 100 + storage_gib: int = 50 """ Storage required for the task in GiB """ @@ -42,14 +42,10 @@ class SnakemakeV2Metadata(LatchMetadata): """ Path to a markdown file containing information about the pipeline - rendered in the About page. """ - runtime_resources: SnakemakeRuntimeResources = field( - default_factory=SnakemakeRuntimeResources - ) + runtime_resources: SnakemakeRuntimeResources = field(default_factory=SnakemakeRuntimeResources) def validate(self): - if self.about_page_path is not None and not isinstance( - self.about_page_path, Path - ): # type: ignore + if self.about_page_path is not None and not isinstance(self.about_page_path, Path): click.secho( f"SnakemakeV2Metadata.about_page_path ({self.about_page_path}) must be a" " `Path` object.", @@ -60,9 +56,7 @@ def validate(self): def __post_init__(self): self.validate() - self.name = identifier_suffix_from_str( - f"snakemake_v2_{self.display_name}".lower() - ) + self.name = identifier_suffix_from_str(f"snakemake_v2_{self.display_name}".lower()) global _snakemake_v2_metadata _snakemake_v2_metadata = self diff --git a/src/latch_cli/snakemake/v2/utils.py b/src/latch_cli/snakemake/v2/utils.py index b89d68d32..28a955c9b 100644 --- a/src/latch_cli/snakemake/v2/utils.py +++ b/src/latch_cli/snakemake/v2/utils.py @@ -1,6 +1,7 @@ from dataclasses import fields, is_dataclass from enum import Enum from typing import Any +from urllib.parse import urlparse from latch.types.directory import LatchDir from latch.types.file import LatchFile @@ -12,10 +13,15 @@ def get_config_val(val: Any): if isinstance(val, dict): return {k: get_config_val(v) for k, v in val.items()} if isinstance(val, (LatchFile, LatchDir)): - if val.remote_path is not None: - return val.remote_path + if val.remote_path is None: + return str(val.path) - return str(val.path) + parsed = urlparse(val.remote_path) + domain = parsed.netloc + if domain == "": + domain = "inferred" + + return f"/ldata/{domain}{parsed.path}" if isinstance(val, (int, float, bool, type(None))): return val if is_dataclass(val): From b471a431f056d459205adcb11c95dfc30a7fb661 Mon Sep 17 00:00:00 2001 From: Ayush Kamat Date: Mon, 23 Dec 2024 12:19:13 -0800 Subject: [PATCH 04/18] save state Signed-off-by: Ayush Kamat --- pyproject.toml | 4 ++-- src/latch_cli/docker_utils/__init__.py | 19 +++++++++---------- src/latch_cli/snakemake/v2/workflow.py | 2 ++ 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 980140067..c23cd8581 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ include = ["src/**/*.py", "src/latch_cli/services/init/*"] [project] name = "latch" -version = "2.54.0.a9" +version = "2.55.0.a6" description = "The Latch SDK" authors = [{ name = "Kenny Workman", email = "kenny@latch.bio" }] maintainers = [ @@ -34,7 +34,7 @@ dependencies = [ "scp>=0.14.0", "boto3>=1.26.0", "tqdm>=4.63.0", - "lytekit==0.15.15", + "lytekit==0.15.17", "lytekitplugins-pods==0.6.2", "typing-extensions>=4.12.0", "apscheduler>=3.10.0", diff --git a/src/latch_cli/docker_utils/__init__.py b/src/latch_cli/docker_utils/__init__.py index 1bc3b73a3..d3999a79b 100644 --- a/src/latch_cli/docker_utils/__init__.py +++ b/src/latch_cli/docker_utils/__init__.py @@ -48,11 +48,6 @@ class DockerfileBuilder: direnv: Optional[Path] = None def get_prologue(self): - if self.wf_type == WorkflowType.snakemake: - library_name = '"latch[snakemake]"' - else: - library_name = "latch" - self.commands.append( DockerCmdBlock( comment="Prologue", @@ -79,21 +74,25 @@ def get_prologue(self): "env LANG='en_US.UTF-8'", "", "arg DEBIAN_FRONTEND=noninteractive", - "", - "# Latch SDK", - "# DO NOT REMOVE", - f"run pip install {library_name}=={self.config.latch_version}", - "run mkdir /opt/latch", ], order=DockerCmdBlockOrder.precopy, ) ) def get_epilogue(self): + if self.wf_type == WorkflowType.snakemake: + library_name = '"latch[snakemake]"' + else: + library_name = "latch" + self.commands.append( DockerCmdBlock( comment="Epilogue", commands=[ + "", + "# Latch SDK", + "# DO NOT REMOVE", + f"run pip install {library_name}=={self.config.latch_version}", "", "# Latch workflow registration metadata", "# DO NOT CHANGE", diff --git a/src/latch_cli/snakemake/v2/workflow.py b/src/latch_cli/snakemake/v2/workflow.py index 32102242d..cef56f1d9 100644 --- a/src/latch_cli/snakemake/v2/workflow.py +++ b/src/latch_cli/snakemake/v2/workflow.py @@ -94,6 +94,8 @@ def snakemake_runtime(pvc_name: str, {parameters}): str(config_path), "--executor", "latch", + "--default-storage-provider", + "latch", "--jobs", "1000", ] From b11de4a322fa579177ef108f9931969857f0c0d2 Mon Sep 17 00:00:00 2001 From: Ayush Kamat Date: Fri, 31 Jan 2025 15:52:38 -0800 Subject: [PATCH 05/18] ver Signed-off-by: Ayush Kamat --- pyproject.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 423eaf1a3..fd831fedd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ include = ["src/**/*.py", "src/latch_cli/services/init/*"] [project] name = "latch" -version = "2.55.3.a1" +version = "2.55.3.a2" description = "The Latch SDK" authors = [{ name = "Kenny Workman", email = "kenny@latch.bio" }] maintainers = [ @@ -75,8 +75,8 @@ classifiers = [ pandas = ["pandas>=2.0.0"] snakemake = [ "snakemake", - "snakemake-storage-plugin-latch==0.1.8", - "snakemake-executor-plugin-latch==0.1.10", + "snakemake-storage-plugin-latch==0.1.10", + "snakemake-executor-plugin-latch==0.1.8", ] [project.scripts] From 5b42825d4156dab69b939ff0511e77e75e550485 Mon Sep 17 00:00:00 2001 From: Ayush Kamat Date: Fri, 28 Feb 2025 10:19:58 -0800 Subject: [PATCH 06/18] dont break 3.9 Signed-off-by: Ayush Kamat --- src/latch_cli/snakemake/config/utils.py | 44 ++++++++--------------- uv.lock | 48 ------------------------- 2 files changed, 14 insertions(+), 78 deletions(-) diff --git a/src/latch_cli/snakemake/config/utils.py b/src/latch_cli/snakemake/config/utils.py index d347908a1..2ec4ac25b 100644 --- a/src/latch_cli/snakemake/config/utils.py +++ b/src/latch_cli/snakemake/config/utils.py @@ -1,30 +1,25 @@ from __future__ import annotations +import sys from dataclasses import fields, is_dataclass, make_dataclass from enum import Enum -from types import UnionType -from typing import ( - Annotated, - Any, - Dict, - List, - Optional, - Type, - Union, - get_args, - get_origin, -) +from typing import Annotated, Any, Dict, List, Optional, Type, Union, get_args, get_origin from flytekit.core.annotation import FlyteAnnotation from typing_extensions import TypeAlias, TypeGuard -from latch.types.directory import LatchDir, LatchOutputDir +from latch.types.directory import LatchDir from latch.types.file import LatchFile from latch_cli.utils import identifier_from_str JSONValue: TypeAlias = Union[int, str, bool, float, None, List["JSONValue"], "JSONDict"] JSONDict: TypeAlias = Dict[str, "JSONValue"] +if sys.version_info >= (3, 10): + from types import UnionType +else: + UnionType = Union + # ayush: yoinked from console valid_extensions = { "bed", @@ -104,9 +99,7 @@ } -def parse_type( - v: JSONValue, name: Optional[str] = None, *, infer_files: bool = False -) -> Type: +def parse_type(v: JSONValue, name: Optional[str] = None, *, infer_files: bool = False) -> Type: if v is None: return str @@ -139,9 +132,7 @@ def parse_type( fields: Dict[str, Type] = {} for k, x in v.items(): - fields[identifier_from_str(k)] = parse_type( - x, f"{name}_{k}", infer_files=infer_files - ) + fields[identifier_from_str(k)] = parse_type(x, f"{name}_{k}", infer_files=infer_files) return make_dataclass(identifier_from_str(name), fields.items()) @@ -295,9 +286,7 @@ def get_preamble(typ: type[Any] | str, *, defined_names: set[str] | None = None) return "" if get_origin(typ) in {Union, UnionType, list, dict}: - return "".join([ - get_preamble(t, defined_names=defined_names) for t in get_args(typ) - ]) + return "".join([get_preamble(t, defined_names=defined_names) for t in get_args(typ)]) if typ.__name__ in defined_names: return "" @@ -309,9 +298,7 @@ def get_preamble(typ: type[Any] | str, *, defined_names: set[str] | None = None) assert is_dataclass(typ), typ - preamble = "".join([ - get_preamble(f.type, defined_names=defined_names) for f in fields(typ) - ]) + preamble = "".join([get_preamble(f.type, defined_names=defined_names) for f in fields(typ)]) return "".join([preamble, dataclass_repr(typ)]) @@ -350,8 +337,7 @@ def validate_snakemake_type(name: str, t: Type, param: Any) -> None: args = get_args(t) if len(args) == 0: raise ValueError( - "Generic Lists are not supported - please specify a subtype," - " e.g. List[LatchFile]" + "Generic Lists are not supported - please specify a subtype, e.g. List[LatchFile]" ) list_typ = args[0] for i, val in enumerate(param): @@ -360,8 +346,6 @@ def validate_snakemake_type(name: str, t: Type, param: Any) -> None: else: assert is_dataclass(t) for field in fields(t): - validate_snakemake_type( - f"{name}.{field.name}", field.type, getattr(param, field.name) - ) + validate_snakemake_type(f"{name}.{field.name}", field.type, getattr(param, field.name)) for i, val in enumerate(param): validate_snakemake_type(f"{name}[{i}]", list_typ, val) diff --git a/uv.lock b/uv.lock index 06580d1a2..c257400f6 100644 --- a/uv.lock +++ b/uv.lock @@ -1006,13 +1006,7 @@ wheels = [ [[package]] name = "latch" -<<<<<<< HEAD -version = "2.54.0a8" -||||||| 5493f7e -version = "2.54.10" -======= version = "2.56.10" ->>>>>>> aa8a34f82fa2d04b7c2576c97c9a559532668df3 source = { editable = "." } dependencies = [ { name = "aioconsole" }, @@ -1078,17 +1072,9 @@ requires-dist = [ { name = "gql", specifier = ">=3.5.0,<4.0.0" }, { name = "graphql-core", specifier = "==3.2.3" }, { name = "kubernetes", specifier = ">=24.2.0" }, -<<<<<<< HEAD - { name = "lytekit", specifier = "==0.15.14" }, - { name = "lytekitplugins-pods", specifier = "==0.6.2" }, -||||||| 5493f7e - { name = "lytekit", specifier = "==0.15.13" }, - { name = "lytekitplugins-pods", specifier = "==0.6.2" }, -======= { name = "latch-persistence", specifier = ">=0.1.5" }, { name = "lytekit", specifier = "==0.15.28" }, { name = "lytekitplugins-pods", specifier = "==0.7.4" }, ->>>>>>> aa8a34f82fa2d04b7c2576c97c9a559532668df3 { name = "orjson", specifier = ">=3.10.12" }, { name = "pandas", marker = "extra == 'pandas'", specifier = ">=2.0.0" }, { name = "paramiko", specifier = ">=3.4.0" }, @@ -1122,21 +1108,6 @@ docs = [ ] [[package]] -<<<<<<< HEAD -name = "latch-persistence" -version = "0.1.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "requests" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/8e/e0/a6f8427a9b1e6d9bce808b6ad7acfc98e67f08d5f6e9b4311090a2df1c1a/latch_persistence-0.1.0.tar.gz", hash = "sha256:527b01a5986871cb954bb69fd1d5134402304b7529dedf7096ac97ab3dd6e827", size = 9380 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/73/92/d1669cbaa7184dc73523e6874e4f2ab79fb7104a621597e9fdff82db35b2/latch_persistence-0.1.0-py3-none-any.whl", hash = "sha256:ab6fff7fe7c48f960caf55474810b465803f920eb6245c80c73ec8e3dd333046", size = 5371 }, -] - -[[package]] -||||||| 5493f7e -======= name = "latch-persistence" version = "0.1.5" source = { registry = "https://pypi.org/simple" } @@ -1149,7 +1120,6 @@ wheels = [ ] [[package]] ->>>>>>> aa8a34f82fa2d04b7c2576c97c9a559532668df3 name = "lyteidl" version = "0.2.1" source = { registry = "https://pypi.org/simple" } @@ -1165,13 +1135,7 @@ wheels = [ [[package]] name = "lytekit" -<<<<<<< HEAD -version = "0.15.14" -||||||| 5493f7e -version = "0.15.13" -======= version = "0.15.28" ->>>>>>> aa8a34f82fa2d04b7c2576c97c9a559532668df3 source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "click" }, @@ -1209,21 +1173,9 @@ dependencies = [ { name = "wheel" }, { name = "wrapt" }, ] -<<<<<<< HEAD -sdist = { url = "https://files.pythonhosted.org/packages/6a/15/c205b4adfcfe65fd01b21365d7f02f9c8e25e6a8554a26419f30275ad374/lytekit-0.15.14.tar.gz", hash = "sha256:92c7662a3cb8d38fe670397183ae517d2b2e175ad43c3949bbc3d1363b604046", size = 292268 } -||||||| 5493f7e -sdist = { url = "https://files.pythonhosted.org/packages/0a/54/20266d5b9bfe6fa56fe5a0366f8bb550e18e4ecb635ed8e7b27774a297f9/lytekit-0.15.13.tar.gz", hash = "sha256:147449e255d3bbc13d6f7a39f32f18c891295f38f9bc7e0f9754f546c8aa3a69", size = 295586 } -======= sdist = { url = "https://files.pythonhosted.org/packages/21/14/9fc90dd03dbf3921036e830083adc2dcd88c60690b451d5857f7583bce2a/lytekit-0.15.28.tar.gz", hash = "sha256:37fb835cc6bb6f3ad9b8e493b6c751c29ba50541cc94f831ba267132556a9da5", size = 293243 } ->>>>>>> aa8a34f82fa2d04b7c2576c97c9a559532668df3 wheels = [ -<<<<<<< HEAD - { url = "https://files.pythonhosted.org/packages/d0/13/a90708989e90b274f3d1ac13425ad6d7de3ab77d58cbe1c6ae1d00c878d5/lytekit-0.15.14-py3-none-any.whl", hash = "sha256:9e207b532df63d644c7d17a878337cb160b190b7322de9a7407857a1b104e185", size = 385453 }, -||||||| 5493f7e - { url = "https://files.pythonhosted.org/packages/ea/3f/d9bbd2b97eddeae3e7a86abde1f7444cca9b6a578a71f6fa8a3a53c791f6/lytekit-0.15.13-py3-none-any.whl", hash = "sha256:4565a4df2a3cad2aadc02f325923eddf55672921884a839ce7cfeb10ba4cec38", size = 389398 }, -======= { url = "https://files.pythonhosted.org/packages/1d/39/2d0c24472751aa0ded66e043f59c6db51d64a292deb43e78b4e441d87615/lytekit-0.15.28-py3-none-any.whl", hash = "sha256:7f33f71bf7f6497a6ea355540cf7c2b82ff5b9a2dbd5810eae475d7a0abcbc25", size = 385950 }, ->>>>>>> aa8a34f82fa2d04b7c2576c97c9a559532668df3 ] [[package]] From ed5088fae4c6eb0a51901011966ab71be5495582 Mon Sep 17 00:00:00 2001 From: Ayush Kamat Date: Fri, 28 Feb 2025 13:27:46 -0800 Subject: [PATCH 07/18] save state Signed-off-by: Ayush Kamat --- packages/latch-snakemake/.python-version | 1 + .../latch-snakemake/README.md | 0 packages/latch-snakemake/pyproject.toml | 30 +++ .../src/latch_snakemake/__init__.py | 83 ++++++++ .../src/latch_snakemake/py.typed | 0 .../src/latch_snakemake}/utils.py | 0 .../src/latch_snakemake}/workflow.py | 0 pyproject.toml | 2 +- src/latch/types/metadata/snakemake_v2.py | 1 + src/latch_cli/main.py | 191 +++--------------- 10 files changed, 141 insertions(+), 167 deletions(-) create mode 100644 packages/latch-snakemake/.python-version rename src/latch_cli/snakemake/v2/__init__.py => packages/latch-snakemake/README.md (100%) create mode 100644 packages/latch-snakemake/pyproject.toml create mode 100644 packages/latch-snakemake/src/latch_snakemake/__init__.py create mode 100644 packages/latch-snakemake/src/latch_snakemake/py.typed rename {src/latch_cli/snakemake/v2 => packages/latch-snakemake/src/latch_snakemake}/utils.py (100%) rename {src/latch_cli/snakemake/v2 => packages/latch-snakemake/src/latch_snakemake}/workflow.py (100%) diff --git a/packages/latch-snakemake/.python-version b/packages/latch-snakemake/.python-version new file mode 100644 index 000000000..2c0733315 --- /dev/null +++ b/packages/latch-snakemake/.python-version @@ -0,0 +1 @@ +3.11 diff --git a/src/latch_cli/snakemake/v2/__init__.py b/packages/latch-snakemake/README.md similarity index 100% rename from src/latch_cli/snakemake/v2/__init__.py rename to packages/latch-snakemake/README.md diff --git a/packages/latch-snakemake/pyproject.toml b/packages/latch-snakemake/pyproject.toml new file mode 100644 index 000000000..d0f06bebc --- /dev/null +++ b/packages/latch-snakemake/pyproject.toml @@ -0,0 +1,30 @@ +[project] +name = "latch-snakemake" +version = "2.57.0" +description = "Add your description here" +readme = "README.md" +authors = [{ name = "Ayush Kamat", email = "ayush@latch.bio" }] +requires-python = ">=3.11" +dependencies = [ + "latch==2.57.0", + "snakemake", + "snakemake-storage-plugin-latch==0.1.10", + "snakemake-executor-plugin-latch==0.1.8", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.pyright] +typeCheckingMode = "strict" + +pythonVersion = "3.11" + +reportUnknownArgumentType = "none" +reportUnknownLambdaType = "none" +reportUnknownMemberType = "none" +reportUnknownParameterType = "none" +reportUnknownVariableType = "none" + +reportUnusedImport = "error" diff --git a/packages/latch-snakemake/src/latch_snakemake/__init__.py b/packages/latch-snakemake/src/latch_snakemake/__init__.py new file mode 100644 index 000000000..890f64d3d --- /dev/null +++ b/packages/latch-snakemake/src/latch_snakemake/__init__.py @@ -0,0 +1,83 @@ +from pathlib import Path +from typing import Optional + +import click + +from latch_cli.main import main +from latch_cli.services.register.utils import import_module_by_path + +from .workflow import get_entrypoint_content + + +@main.group() +def snakemake(): + """Manage snakemake-specific commands""" + + +@snakemake.command("generate-entrypoint") +@click.argument("pkg-root", nargs=1, type=click.Path(exists=True, path_type=Path)) +@click.option( + "--metadata-root", + type=click.Path(exists=True, path_type=Path, file_okay=False), + help="Path to a directory containing a python package defining a SnakemakeV2Metadata " + "object. If not provided, will default to searching the package root for a directory called " + "`latch_metadata`.", +) +@click.option( + "--snakefile", + required=False, + type=click.Path(exists=True, path_type=Path, dir_okay=False), + help="Path to the Snakefile to register. If not provided, will default to searching the package " + "root for a file named `Snakefile`.", +) +def sm_generate_entrypoint( + pkg_root: Path, metadata_root: Optional[Path], snakefile: Optional[Path] +): + """Generate a `wf/entrypoint.py` file from a Snakemake workflow""" + + dest = pkg_root / "wf" / "entrypoint.py" + dest.parent.mkdir(exist_ok=True) + + if dest.exists() and not click.confirm( + f"Workflow entrypoint already exists at `{dest}`. Overwrite?" + ): + return + + if metadata_root is None: + metadata_root = pkg_root / "latch_metadata" + + metadata_path = metadata_root / "__init__.py" + if metadata_path.exists(): + click.echo(f"Using metadata file {click.style(metadata_path, italic=True)}") + import_module_by_path(metadata_path) + else: + click.secho( + f"Unable to find file `{metadata_path}` with a `SnakemakeV2Metadata` object " + "defined. If you have a custom metadata root please provide a path " + "to it using the `--metadata-root` option", + fg="red", + ) + raise click.exceptions.Exit(1) + + import latch.types.metadata.snakemake_v2 as metadata + + if metadata._snakemake_v2_metadata is None: + click.secho( + "Failed to generate entrypoint. Make sure the python package at path " + f"`{metadata_path}` defines a `SnakemakeV2Metadata` object.", + fg="red", + ) + raise click.exceptions.Exit(1) + + if snakefile is None: + snakefile = pkg_root / "Snakefile" + + if not snakefile.exists(): + click.secho( + f"Unable to find a Snakefile at `{snakefile}`. If your Snakefile is " + "in a different location please provide an explicit path to it " + "using the `--snakefile` option." + ) + raise click.exceptions.Exit(1) + + dest.write_text(get_entrypoint_content(pkg_root, metadata_path, snakefile)) diff --git a/packages/latch-snakemake/src/latch_snakemake/py.typed b/packages/latch-snakemake/src/latch_snakemake/py.typed new file mode 100644 index 000000000..e69de29bb diff --git a/src/latch_cli/snakemake/v2/utils.py b/packages/latch-snakemake/src/latch_snakemake/utils.py similarity index 100% rename from src/latch_cli/snakemake/v2/utils.py rename to packages/latch-snakemake/src/latch_snakemake/utils.py diff --git a/src/latch_cli/snakemake/v2/workflow.py b/packages/latch-snakemake/src/latch_snakemake/workflow.py similarity index 100% rename from src/latch_cli/snakemake/v2/workflow.py rename to packages/latch-snakemake/src/latch_snakemake/workflow.py diff --git a/pyproject.toml b/pyproject.toml index 65ba13c85..18f7f0d32 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,6 +22,7 @@ maintainers = [ readme = "README.md" license = { file = "LICENSE" } +requires-python = ">=3.9" dependencies = [ "kubernetes>=24.2.0", @@ -53,7 +54,6 @@ dependencies = [ "orjson>=3.10.12", "latch-persistence>=0.1.5", ] -requires-python = ">=3.9" classifiers = [ "Development Status :: 4 - Beta", diff --git a/src/latch/types/metadata/snakemake_v2.py b/src/latch/types/metadata/snakemake_v2.py index 7cc300d8d..20d917f5c 100644 --- a/src/latch/types/metadata/snakemake_v2.py +++ b/src/latch/types/metadata/snakemake_v2.py @@ -1,3 +1,4 @@ +# todo(ayush): consolidate this into latch-snakemake from __future__ import annotations from dataclasses import dataclass, field diff --git a/src/latch_cli/main.py b/src/latch_cli/main.py index f0faebf33..142e97d0d 100644 --- a/src/latch_cli/main.py +++ b/src/latch_cli/main.py @@ -91,10 +91,7 @@ def main(): @main.command("login") @click.option( - "--connection", - type=str, - default=None, - help="Specific AuthO connection name e.g. for SSO.", + "--connection", type=str, default=None, help="Specific AuthO connection name e.g. for SSO." ) def login(connection: Optional[str]): """Manually login to Latch.""" @@ -172,9 +169,7 @@ def init( @main.command("dockerfile") -@click.argument( - "pkg_root", type=click.Path(exists=True, file_okay=False, path_type=Path) -) +@click.argument("pkg_root", type=click.Path(exists=True, file_okay=False, path_type=Path)) @click.option( "-s", "--snakemake", @@ -271,9 +266,7 @@ def dockerfile( workflow_type = WorkflowType.nextflow base_image = BaseImageOptions.nextflow - config = get_or_create_workflow_config( - pkg_root=pkg_root, base_image_type=base_image - ) + config = get_or_create_workflow_config(pkg_root=pkg_root, base_image_type=base_image) builder = DockerfileBuilder( pkg_root, @@ -375,10 +368,7 @@ def generate_metadata( config_file = Path("nextflow_schema.json") generate_metadata( - config_file, - metadata_root, - skip_confirmation=yes, - generate_defaults=not no_defaults, + config_file, metadata_root, skip_confirmation=yes, generate_defaults=not no_defaults ) else: from latch_cli.snakemake.config.parser import generate_metadata @@ -405,12 +395,7 @@ def generate_metadata( @main.command("develop") @click.argument("pkg_root", nargs=1, type=click.Path(exists=True, path_type=Path)) @click.option( - "--yes", - "-y", - is_flag=True, - default=False, - type=bool, - help="Skip the confirmation dialog.", + "--yes", "-y", is_flag=True, default=False, type=bool, help="Skip the confirmation dialog." ) @click.option("--image", "-i", type=str, help="Image to use for develop session.") @click.option( @@ -462,23 +447,16 @@ def local_development( from latch_cli.services.local_dev import local_development local_development( - pkg_root.resolve(), - skip_confirm_dialog=yes, - size=TaskSize.small_task, - image=image, + pkg_root.resolve(), skip_confirm_dialog=yes, size=TaskSize.small_task, image=image ) else: from latch_cli.services.local_dev_old import local_development - local_development( - pkg_root.resolve(), snakemake, wf_version, metadata_root, disable_sync - ) + local_development(pkg_root.resolve(), snakemake, wf_version, metadata_root, disable_sync) @main.command("exec") -@click.option( - "--execution-id", "-e", type=str, help="Optional execution ID to inspect." -) +@click.option("--execution-id", "-e", type=str, help="Optional execution ID to inspect.") @click.option("--egn-id", "-g", type=str, help="Optional task execution ID to inspect.") @click.option( "--container-index", @@ -487,9 +465,7 @@ def local_development( help="Optional container index to inspect (only used for Map Tasks)", ) @requires_login -def execute( - execution_id: Optional[str], egn_id: Optional[str], container_index: Optional[int] -): +def execute(execution_id: Optional[str], egn_id: Optional[str], container_index: Optional[int]): """Drops the user into an interactive shell from within a task.""" from latch_cli.services.k8s.execute import exec @@ -527,12 +503,7 @@ def execute( ), ) @click.option( - "-y", - "--yes", - is_flag=True, - default=False, - type=bool, - help="Skip the confirmation dialog.", + "-y", "--yes", is_flag=True, default=False, type=bool, help="Skip the confirmation dialog." ) @click.option( "--open", @@ -565,9 +536,7 @@ def execute( is_flag=True, default=False, type=bool, - help=( - "Whether or not to cache snakemake tasks. Ignored if --snakefile is not provided." - ), + help=("Whether or not to cache snakemake tasks. Ignored if --snakefile is not provided."), ) @click.option( "--nf-script", @@ -637,9 +606,7 @@ def register( @main.command("launch") @click.argument("params_file", nargs=1, type=click.Path(exists=True)) @click.option( - "--version", - default=None, - help="The version of the workflow to launch. Defaults to latest.", + "--version", default=None, help="The version of the workflow to launch. Defaults to latest." ) @requires_login def launch(params_file: Path, version: Union[str, None] = None): @@ -655,16 +622,13 @@ def launch(params_file: Path, version: Union[str, None] = None): version = "latest" click.secho( - f"Successfully launched workflow named {wf_name} with version {version}.", - fg="green", + f"Successfully launched workflow named {wf_name} with version {version}.", fg="green" ) @main.command("get-params") @click.argument("wf_name", nargs=1) -@click.option( - "--version", default=None, help="The version of the workflow. Defaults to latest." -) +@click.option("--version", default=None, help="The version of the workflow. Defaults to latest.") @requires_login def get_params(wf_name: Union[str, None], version: Union[str, None] = None): """Generate a python parameter map for a workflow.""" @@ -685,9 +649,7 @@ def get_params(wf_name: Union[str, None], version: Union[str, None] = None): @main.command("get-wf") @click.option( - "--name", - default=None, - help="The name of the workflow to list. Will display all versions", + "--name", default=None, help="The name of the workflow to list. Will display all versions" ) @requires_login def get_wf(name: Union[str, None] = None): @@ -707,9 +669,7 @@ def get_wf(name: Union[str, None] = None): version_padding = max(version_padding, version_len) # TODO(ayush): make this much better - click.secho( - f"ID{id_padding * ' '}\tName{name_padding * ' '}\tVersion{version_padding * ' '}" - ) + click.secho(f"ID{id_padding * ' '}\tName{name_padding * ' '}\tVersion{version_padding * ' '}") for wf in wfs: click.secho( f"{wf[0]}{(id_padding - len(str(wf[0]))) * ' '}\t{wf[1]}{(name_padding - len(wf[1])) * ' '}\t{wf[2]}{(version_padding - len(wf[2])) * ' '}" @@ -772,13 +732,9 @@ def get_executions(): default=False, show_default=True, ) +@click.option("--cores", help="Manually specify number of cores to parallelize over", type=int) @click.option( - "--cores", help="Manually specify number of cores to parallelize over", type=int -) -@click.option( - "--chunk-size-mib", - help="Manually specify the upload chunk size in MiB. Must be >= 5", - type=int, + "--chunk-size-mib", help="Manually specify the upload chunk size in MiB. Must be >= 5", type=int ) @requires_login def cp( @@ -871,12 +827,7 @@ def ls(paths: Tuple[str], group_directories_first: bool): @main.command("rmr") @click.argument("remote_path", nargs=1, type=str) @click.option( - "-y", - "--yes", - is_flag=True, - default=False, - type=bool, - help="Skip the confirmation dialog.", + "-y", "--yes", is_flag=True, default=False, type=bool, help="Skip the confirmation dialog." ) @click.option( "--no-glob", @@ -922,27 +873,18 @@ def mkdir(remote_directory: str): @click.argument("srcs", nargs=-1) @click.argument("dst", nargs=1) @click.option( - "--delete", - help="Delete extraneous files from destination.", - is_flag=True, - default=False, + "--delete", help="Delete extraneous files from destination.", is_flag=True, default=False ) @click.option( "--ignore-unsyncable", - help=( - "Synchronize even if some source paths do not exist or refer to special files." - ), + help=("Synchronize even if some source paths do not exist or refer to special files."), is_flag=True, default=False, ) @click.option("--cores", help="Number of cores to use for parallel syncing.", type=int) @requires_login def sync( - srcs: List[str], - dst: str, - delete: bool, - ignore_unsyncable: bool, - cores: Optional[int] = None, + srcs: List[str], dst: str, delete: bool, ignore_unsyncable: bool, cores: Optional[int] = None ): """ Update the contents of a remote directory with local data. @@ -995,10 +937,7 @@ def version(pkg_root: Path): help="Set execution profile for Nextflow workflow", ) def nf_generate_entrypoint( - pkg_root: Path, - metadata_root: Optional[Path], - nf_script: Path, - execution_profile: Optional[str], + pkg_root: Path, metadata_root: Optional[Path], nf_script: Path, execution_profile: Optional[str] ): """Generate a `wf/entrypoint.py` file from a Nextflow workflow""" @@ -1037,9 +976,7 @@ def nf_generate_entrypoint( @nextflow.command("attach") -@click.option( - "--execution-id", "-e", type=str, help="Optional execution ID to inspect." -) +@click.option("--execution-id", "-e", type=str, help="Optional execution ID to inspect.") @requires_login def attach(execution_id: Optional[str]): """Drops the user into an interactive shell to inspect the workdir of a nextflow execution.""" @@ -1049,83 +986,6 @@ def attach(execution_id: Optional[str]): attach(execution_id) -@main.group() -def snakemake(): - """Manage snakemake-specific commands""" - - -@snakemake.command("generate-entrypoint") -@click.argument("pkg-root", nargs=1, type=click.Path(exists=True, path_type=Path)) -@click.option( - "--metadata-root", - type=click.Path(exists=True, path_type=Path, file_okay=False), - help="Path to a directory containing a python package defining a SnakemakeV2Metadata " - "object. If not provided, will default to searching the package root for a directory called " - "`latch_metadata`.", -) -@click.option( - "--snakefile", - required=False, - type=click.Path(exists=True, path_type=Path, dir_okay=False), - help="Path to the Snakefile to register. If not provided, will default to searching the package " - "root for a file named `Snakefile`.", -) -def sm_generate_entrypoint( - pkg_root: Path, metadata_root: Optional[Path], snakefile: Optional[Path] -): - """Generate a `wf/entrypoint.py` file from a Snakemake workflow""" - - from latch_cli.services.register.utils import import_module_by_path - from latch_cli.snakemake.v2.workflow import get_entrypoint_content - - dest = pkg_root / "wf" / "entrypoint.py" - dest.parent.mkdir(exist_ok=True) - - if dest.exists() and not click.confirm( - f"Workflow entrypoint already exists at `{dest}`. Overwrite?" - ): - return - - if metadata_root is None: - metadata_root = pkg_root / "latch_metadata" - - metadata_path = metadata_root / "__init__.py" - if metadata_path.exists(): - click.echo(f"Using metadata file {click.style(metadata_path, italic=True)}") - import_module_by_path(metadata_path) - else: - click.secho( - f"Unable to find file `{metadata_path}` with a `SnakemakeV2Metadata` object " - "defined. If you have a custom metadata root please provide a path " - "to it using the `--metadata-root` option", - fg="red", - ) - raise click.exceptions.Exit(1) - - import latch.types.metadata.snakemake_v2 as metadata - - if metadata._snakemake_v2_metadata is None: - click.secho( - "Failed to generate entrypoint. Make sure the python package at path " - f"`{metadata_path}` defines a `SnakemakeV2Metadata` object.", - fg="red", - ) - raise click.exceptions.Exit(1) - - if snakefile is None: - snakefile = pkg_root / "Snakefile" - - if not snakefile.exists(): - click.secho( - f"Unable to find a Snakefile at `{snakefile}`. If your Snakefile is " - "in a different location please provide an explicit path to it " - "using the `--snakefile` option." - ) - raise click.exceptions.Exit(1) - - dest.write_text(get_entrypoint_content(pkg_root, metadata_path, snakefile)) - - """ POD COMMANDS """ @@ -1160,8 +1020,7 @@ def stop_pod(pod_id: Optional[int] = None): err_str = f"Error reading Pod ID from `{id_path}`" click.secho( - f"{err_str} -- please provide a Pod ID as a command line argument.", - fg="red", + f"{err_str} -- please provide a Pod ID as a command line argument.", fg="red" ) return From 5c335f0f04d42629e70bd286122c19ab157a9552 Mon Sep 17 00:00:00 2001 From: Ayush Kamat Date: Mon, 3 Mar 2025 10:11:12 -0800 Subject: [PATCH 08/18] make latch-snakemake a shell package Signed-off-by: Ayush Kamat --- src/latch/types/metadata/snakemake_v2.py | 1 - src/latch_cli/main.py | 191 ++++++++++++++++++++--- src/latch_cli/snakemake/v2/__init__.py | 0 src/latch_cli/snakemake/v2/utils.py | 32 ++++ src/latch_cli/snakemake/v2/workflow.py | 163 +++++++++++++++++++ 5 files changed, 361 insertions(+), 26 deletions(-) create mode 100644 src/latch_cli/snakemake/v2/__init__.py create mode 100644 src/latch_cli/snakemake/v2/utils.py create mode 100644 src/latch_cli/snakemake/v2/workflow.py diff --git a/src/latch/types/metadata/snakemake_v2.py b/src/latch/types/metadata/snakemake_v2.py index 20d917f5c..7cc300d8d 100644 --- a/src/latch/types/metadata/snakemake_v2.py +++ b/src/latch/types/metadata/snakemake_v2.py @@ -1,4 +1,3 @@ -# todo(ayush): consolidate this into latch-snakemake from __future__ import annotations from dataclasses import dataclass, field diff --git a/src/latch_cli/main.py b/src/latch_cli/main.py index 142e97d0d..f0faebf33 100644 --- a/src/latch_cli/main.py +++ b/src/latch_cli/main.py @@ -91,7 +91,10 @@ def main(): @main.command("login") @click.option( - "--connection", type=str, default=None, help="Specific AuthO connection name e.g. for SSO." + "--connection", + type=str, + default=None, + help="Specific AuthO connection name e.g. for SSO.", ) def login(connection: Optional[str]): """Manually login to Latch.""" @@ -169,7 +172,9 @@ def init( @main.command("dockerfile") -@click.argument("pkg_root", type=click.Path(exists=True, file_okay=False, path_type=Path)) +@click.argument( + "pkg_root", type=click.Path(exists=True, file_okay=False, path_type=Path) +) @click.option( "-s", "--snakemake", @@ -266,7 +271,9 @@ def dockerfile( workflow_type = WorkflowType.nextflow base_image = BaseImageOptions.nextflow - config = get_or_create_workflow_config(pkg_root=pkg_root, base_image_type=base_image) + config = get_or_create_workflow_config( + pkg_root=pkg_root, base_image_type=base_image + ) builder = DockerfileBuilder( pkg_root, @@ -368,7 +375,10 @@ def generate_metadata( config_file = Path("nextflow_schema.json") generate_metadata( - config_file, metadata_root, skip_confirmation=yes, generate_defaults=not no_defaults + config_file, + metadata_root, + skip_confirmation=yes, + generate_defaults=not no_defaults, ) else: from latch_cli.snakemake.config.parser import generate_metadata @@ -395,7 +405,12 @@ def generate_metadata( @main.command("develop") @click.argument("pkg_root", nargs=1, type=click.Path(exists=True, path_type=Path)) @click.option( - "--yes", "-y", is_flag=True, default=False, type=bool, help="Skip the confirmation dialog." + "--yes", + "-y", + is_flag=True, + default=False, + type=bool, + help="Skip the confirmation dialog.", ) @click.option("--image", "-i", type=str, help="Image to use for develop session.") @click.option( @@ -447,16 +462,23 @@ def local_development( from latch_cli.services.local_dev import local_development local_development( - pkg_root.resolve(), skip_confirm_dialog=yes, size=TaskSize.small_task, image=image + pkg_root.resolve(), + skip_confirm_dialog=yes, + size=TaskSize.small_task, + image=image, ) else: from latch_cli.services.local_dev_old import local_development - local_development(pkg_root.resolve(), snakemake, wf_version, metadata_root, disable_sync) + local_development( + pkg_root.resolve(), snakemake, wf_version, metadata_root, disable_sync + ) @main.command("exec") -@click.option("--execution-id", "-e", type=str, help="Optional execution ID to inspect.") +@click.option( + "--execution-id", "-e", type=str, help="Optional execution ID to inspect." +) @click.option("--egn-id", "-g", type=str, help="Optional task execution ID to inspect.") @click.option( "--container-index", @@ -465,7 +487,9 @@ def local_development( help="Optional container index to inspect (only used for Map Tasks)", ) @requires_login -def execute(execution_id: Optional[str], egn_id: Optional[str], container_index: Optional[int]): +def execute( + execution_id: Optional[str], egn_id: Optional[str], container_index: Optional[int] +): """Drops the user into an interactive shell from within a task.""" from latch_cli.services.k8s.execute import exec @@ -503,7 +527,12 @@ def execute(execution_id: Optional[str], egn_id: Optional[str], container_index: ), ) @click.option( - "-y", "--yes", is_flag=True, default=False, type=bool, help="Skip the confirmation dialog." + "-y", + "--yes", + is_flag=True, + default=False, + type=bool, + help="Skip the confirmation dialog.", ) @click.option( "--open", @@ -536,7 +565,9 @@ def execute(execution_id: Optional[str], egn_id: Optional[str], container_index: is_flag=True, default=False, type=bool, - help=("Whether or not to cache snakemake tasks. Ignored if --snakefile is not provided."), + help=( + "Whether or not to cache snakemake tasks. Ignored if --snakefile is not provided." + ), ) @click.option( "--nf-script", @@ -606,7 +637,9 @@ def register( @main.command("launch") @click.argument("params_file", nargs=1, type=click.Path(exists=True)) @click.option( - "--version", default=None, help="The version of the workflow to launch. Defaults to latest." + "--version", + default=None, + help="The version of the workflow to launch. Defaults to latest.", ) @requires_login def launch(params_file: Path, version: Union[str, None] = None): @@ -622,13 +655,16 @@ def launch(params_file: Path, version: Union[str, None] = None): version = "latest" click.secho( - f"Successfully launched workflow named {wf_name} with version {version}.", fg="green" + f"Successfully launched workflow named {wf_name} with version {version}.", + fg="green", ) @main.command("get-params") @click.argument("wf_name", nargs=1) -@click.option("--version", default=None, help="The version of the workflow. Defaults to latest.") +@click.option( + "--version", default=None, help="The version of the workflow. Defaults to latest." +) @requires_login def get_params(wf_name: Union[str, None], version: Union[str, None] = None): """Generate a python parameter map for a workflow.""" @@ -649,7 +685,9 @@ def get_params(wf_name: Union[str, None], version: Union[str, None] = None): @main.command("get-wf") @click.option( - "--name", default=None, help="The name of the workflow to list. Will display all versions" + "--name", + default=None, + help="The name of the workflow to list. Will display all versions", ) @requires_login def get_wf(name: Union[str, None] = None): @@ -669,7 +707,9 @@ def get_wf(name: Union[str, None] = None): version_padding = max(version_padding, version_len) # TODO(ayush): make this much better - click.secho(f"ID{id_padding * ' '}\tName{name_padding * ' '}\tVersion{version_padding * ' '}") + click.secho( + f"ID{id_padding * ' '}\tName{name_padding * ' '}\tVersion{version_padding * ' '}" + ) for wf in wfs: click.secho( f"{wf[0]}{(id_padding - len(str(wf[0]))) * ' '}\t{wf[1]}{(name_padding - len(wf[1])) * ' '}\t{wf[2]}{(version_padding - len(wf[2])) * ' '}" @@ -732,9 +772,13 @@ def get_executions(): default=False, show_default=True, ) -@click.option("--cores", help="Manually specify number of cores to parallelize over", type=int) @click.option( - "--chunk-size-mib", help="Manually specify the upload chunk size in MiB. Must be >= 5", type=int + "--cores", help="Manually specify number of cores to parallelize over", type=int +) +@click.option( + "--chunk-size-mib", + help="Manually specify the upload chunk size in MiB. Must be >= 5", + type=int, ) @requires_login def cp( @@ -827,7 +871,12 @@ def ls(paths: Tuple[str], group_directories_first: bool): @main.command("rmr") @click.argument("remote_path", nargs=1, type=str) @click.option( - "-y", "--yes", is_flag=True, default=False, type=bool, help="Skip the confirmation dialog." + "-y", + "--yes", + is_flag=True, + default=False, + type=bool, + help="Skip the confirmation dialog.", ) @click.option( "--no-glob", @@ -873,18 +922,27 @@ def mkdir(remote_directory: str): @click.argument("srcs", nargs=-1) @click.argument("dst", nargs=1) @click.option( - "--delete", help="Delete extraneous files from destination.", is_flag=True, default=False + "--delete", + help="Delete extraneous files from destination.", + is_flag=True, + default=False, ) @click.option( "--ignore-unsyncable", - help=("Synchronize even if some source paths do not exist or refer to special files."), + help=( + "Synchronize even if some source paths do not exist or refer to special files." + ), is_flag=True, default=False, ) @click.option("--cores", help="Number of cores to use for parallel syncing.", type=int) @requires_login def sync( - srcs: List[str], dst: str, delete: bool, ignore_unsyncable: bool, cores: Optional[int] = None + srcs: List[str], + dst: str, + delete: bool, + ignore_unsyncable: bool, + cores: Optional[int] = None, ): """ Update the contents of a remote directory with local data. @@ -937,7 +995,10 @@ def version(pkg_root: Path): help="Set execution profile for Nextflow workflow", ) def nf_generate_entrypoint( - pkg_root: Path, metadata_root: Optional[Path], nf_script: Path, execution_profile: Optional[str] + pkg_root: Path, + metadata_root: Optional[Path], + nf_script: Path, + execution_profile: Optional[str], ): """Generate a `wf/entrypoint.py` file from a Nextflow workflow""" @@ -976,7 +1037,9 @@ def nf_generate_entrypoint( @nextflow.command("attach") -@click.option("--execution-id", "-e", type=str, help="Optional execution ID to inspect.") +@click.option( + "--execution-id", "-e", type=str, help="Optional execution ID to inspect." +) @requires_login def attach(execution_id: Optional[str]): """Drops the user into an interactive shell to inspect the workdir of a nextflow execution.""" @@ -986,6 +1049,83 @@ def attach(execution_id: Optional[str]): attach(execution_id) +@main.group() +def snakemake(): + """Manage snakemake-specific commands""" + + +@snakemake.command("generate-entrypoint") +@click.argument("pkg-root", nargs=1, type=click.Path(exists=True, path_type=Path)) +@click.option( + "--metadata-root", + type=click.Path(exists=True, path_type=Path, file_okay=False), + help="Path to a directory containing a python package defining a SnakemakeV2Metadata " + "object. If not provided, will default to searching the package root for a directory called " + "`latch_metadata`.", +) +@click.option( + "--snakefile", + required=False, + type=click.Path(exists=True, path_type=Path, dir_okay=False), + help="Path to the Snakefile to register. If not provided, will default to searching the package " + "root for a file named `Snakefile`.", +) +def sm_generate_entrypoint( + pkg_root: Path, metadata_root: Optional[Path], snakefile: Optional[Path] +): + """Generate a `wf/entrypoint.py` file from a Snakemake workflow""" + + from latch_cli.services.register.utils import import_module_by_path + from latch_cli.snakemake.v2.workflow import get_entrypoint_content + + dest = pkg_root / "wf" / "entrypoint.py" + dest.parent.mkdir(exist_ok=True) + + if dest.exists() and not click.confirm( + f"Workflow entrypoint already exists at `{dest}`. Overwrite?" + ): + return + + if metadata_root is None: + metadata_root = pkg_root / "latch_metadata" + + metadata_path = metadata_root / "__init__.py" + if metadata_path.exists(): + click.echo(f"Using metadata file {click.style(metadata_path, italic=True)}") + import_module_by_path(metadata_path) + else: + click.secho( + f"Unable to find file `{metadata_path}` with a `SnakemakeV2Metadata` object " + "defined. If you have a custom metadata root please provide a path " + "to it using the `--metadata-root` option", + fg="red", + ) + raise click.exceptions.Exit(1) + + import latch.types.metadata.snakemake_v2 as metadata + + if metadata._snakemake_v2_metadata is None: + click.secho( + "Failed to generate entrypoint. Make sure the python package at path " + f"`{metadata_path}` defines a `SnakemakeV2Metadata` object.", + fg="red", + ) + raise click.exceptions.Exit(1) + + if snakefile is None: + snakefile = pkg_root / "Snakefile" + + if not snakefile.exists(): + click.secho( + f"Unable to find a Snakefile at `{snakefile}`. If your Snakefile is " + "in a different location please provide an explicit path to it " + "using the `--snakefile` option." + ) + raise click.exceptions.Exit(1) + + dest.write_text(get_entrypoint_content(pkg_root, metadata_path, snakefile)) + + """ POD COMMANDS """ @@ -1020,7 +1160,8 @@ def stop_pod(pod_id: Optional[int] = None): err_str = f"Error reading Pod ID from `{id_path}`" click.secho( - f"{err_str} -- please provide a Pod ID as a command line argument.", fg="red" + f"{err_str} -- please provide a Pod ID as a command line argument.", + fg="red", ) return diff --git a/src/latch_cli/snakemake/v2/__init__.py b/src/latch_cli/snakemake/v2/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/latch_cli/snakemake/v2/utils.py b/src/latch_cli/snakemake/v2/utils.py new file mode 100644 index 000000000..28a955c9b --- /dev/null +++ b/src/latch_cli/snakemake/v2/utils.py @@ -0,0 +1,32 @@ +from dataclasses import fields, is_dataclass +from enum import Enum +from typing import Any +from urllib.parse import urlparse + +from latch.types.directory import LatchDir +from latch.types.file import LatchFile + + +def get_config_val(val: Any): + if isinstance(val, list): + return [get_config_val(x) for x in val] + if isinstance(val, dict): + return {k: get_config_val(v) for k, v in val.items()} + if isinstance(val, (LatchFile, LatchDir)): + if val.remote_path is None: + return str(val.path) + + parsed = urlparse(val.remote_path) + domain = parsed.netloc + if domain == "": + domain = "inferred" + + return f"/ldata/{domain}{parsed.path}" + if isinstance(val, (int, float, bool, type(None))): + return val + if is_dataclass(val): + return {f.name: get_config_val(getattr(val, f.name)) for f in fields(val)} + if isinstance(val, Enum): + return val.value + + return str(val) diff --git a/src/latch_cli/snakemake/v2/workflow.py b/src/latch_cli/snakemake/v2/workflow.py new file mode 100644 index 000000000..cef56f1d9 --- /dev/null +++ b/src/latch_cli/snakemake/v2/workflow.py @@ -0,0 +1,163 @@ +from pathlib import Path + +import latch.types.metadata.snakemake_v2 as snakemake +from latch_cli.snakemake.config.utils import get_preamble, type_repr + +_template = """\ +import json +import os +import shutil +import subprocess +import sys +import typing +import typing_extensions +from dataclasses import dataclass +from enum import Enum +from pathlib import Path + +import requests + +from latch.resources.tasks import custom_task, snakemake_runtime_task +from latch.resources.workflow import workflow +from latch.types.directory import LatchDir, LatchOutputDir +from latch.types.file import LatchFile +from latch_cli.snakemake.v2.utils import get_config_val +from latch_cli.services.register.utils import import_module_by_path + +import_module_by_path(Path({metadata_path})) + +import latch.types.metadata.snakemake_v2 as smv2 + + +{preambles} +@custom_task(cpu=0.25, memory=0.5, storage_gib=1) +def initialize() -> str: + token = os.environ.get("FLYTE_INTERNAL_EXECUTION_ID") + if token is None: + raise RuntimeError("failed to get execution token") + + headers = {{"Authorization": f"Latch-Execution-Token {{token}}"}} + + print("Provisioning shared storage volume... ", end="") + resp = requests.post( + "http://nf-dispatcher-service.flyte.svc.cluster.local/provision-storage-ofs", + headers=headers, + json={{ + "storage_expiration_hours": 0, + "version": 2, + "snakemake": True, + }}, + ) + resp.raise_for_status() + print("Done.") + + return resp.json()["name"] + +@snakemake_runtime_task(cpu=1, memory=2, storage_gib=50) +def snakemake_runtime(pvc_name: str, {parameters}): + print(f"Using shared filesystem: {{pvc_name}}") + + shared = Path("/snakemake-workdir") + snakefile = shared / {snakefile_path} + + config = {{{config_builders}}} + + config_path = (shared / "__latch.config.json").resolve() + config_path.write_text(json.dumps(config, indent=2)) + + ignore_list = [ + "latch", + ".latch", + ".git", + "nextflow", + ".nextflow", + ".snakemake", + "results", + "miniconda", + "anaconda3", + "mambaforge", + ] + + shutil.copytree( + Path("/root"), + shared, + ignore=lambda src, names: ignore_list, + ignore_dangling_symlinks=True, + dirs_exist_ok=True, + ) + + cmd = [ + "snakemake", + "--snakefile", + str(snakefile), + "--configfile", + str(config_path), + "--executor", + "latch", + "--default-storage-provider", + "latch", + "--jobs", + "1000", + ] + + print("Launching Snakemake Runtime") + print(" ".join(cmd), flush=True) + + failed = False + try: + subprocess.run(cmd, cwd=shared, check=True) + except subprocess.CalledProcessError: + failed = True + finally: + if not failed: + return + + sys.exit(1) + + +@workflow(smv2._snakemake_v2_metadata) +def {workflow_name}({parameters}): + \"\"\" + Sample Description + \"\"\" + + snakemake_runtime(pvc_name=initialize(), {assignments}) +""" + + +def get_entrypoint_content(pkg_root: Path, metadata_path: Path, snakefile_path: Path) -> str: + metadata = snakemake._snakemake_v2_metadata + assert metadata is not None + + defined_names: set[str] = set() + preambles: list[str] = [] + + defaults: list[str] = [] + no_defaults: list[str] = [] + config_builders: list[str] = [] + assignments: list[str] = [] + + for name, param in metadata.parameters.items(): + assert param.type is not None + + param_str = f"{name}: {type_repr(param.type)}" + if param.default is None: + no_defaults.append(param_str) + else: + param_str = f"{param_str} = {param.default!r}" + defaults.append(param_str) + + config_builders.append(f"{name!r}: get_config_val({name})") + assignments.append(f"{name}={name}") + + preambles.append(get_preamble(param.type, defined_names=defined_names)) + + return _template.format( + metadata_path=repr(str(metadata_path.relative_to(pkg_root))), + preambles="".join(preambles), + parameters=", ".join(no_defaults + defaults), + snakefile_path=repr(str(snakefile_path.relative_to(pkg_root))), + config_builders=", ".join(config_builders), + workflow_name=metadata.name, + assignments=", ".join(assignments), + ) From ea526d0b5529047e9707b27c4d144f4afb2bc4a3 Mon Sep 17 00:00:00 2001 From: Ayush Kamat Date: Fri, 7 Mar 2025 13:57:25 -0800 Subject: [PATCH 09/18] save state Signed-off-by: Ayush Kamat --- .../src/latch_snakemake/__init__.py | 83 --------- .../src/latch_snakemake/utils.py | 32 ---- .../src/latch_snakemake/workflow.py | 163 ------------------ 3 files changed, 278 deletions(-) delete mode 100644 packages/latch-snakemake/src/latch_snakemake/utils.py delete mode 100644 packages/latch-snakemake/src/latch_snakemake/workflow.py diff --git a/packages/latch-snakemake/src/latch_snakemake/__init__.py b/packages/latch-snakemake/src/latch_snakemake/__init__.py index 890f64d3d..e69de29bb 100644 --- a/packages/latch-snakemake/src/latch_snakemake/__init__.py +++ b/packages/latch-snakemake/src/latch_snakemake/__init__.py @@ -1,83 +0,0 @@ -from pathlib import Path -from typing import Optional - -import click - -from latch_cli.main import main -from latch_cli.services.register.utils import import_module_by_path - -from .workflow import get_entrypoint_content - - -@main.group() -def snakemake(): - """Manage snakemake-specific commands""" - - -@snakemake.command("generate-entrypoint") -@click.argument("pkg-root", nargs=1, type=click.Path(exists=True, path_type=Path)) -@click.option( - "--metadata-root", - type=click.Path(exists=True, path_type=Path, file_okay=False), - help="Path to a directory containing a python package defining a SnakemakeV2Metadata " - "object. If not provided, will default to searching the package root for a directory called " - "`latch_metadata`.", -) -@click.option( - "--snakefile", - required=False, - type=click.Path(exists=True, path_type=Path, dir_okay=False), - help="Path to the Snakefile to register. If not provided, will default to searching the package " - "root for a file named `Snakefile`.", -) -def sm_generate_entrypoint( - pkg_root: Path, metadata_root: Optional[Path], snakefile: Optional[Path] -): - """Generate a `wf/entrypoint.py` file from a Snakemake workflow""" - - dest = pkg_root / "wf" / "entrypoint.py" - dest.parent.mkdir(exist_ok=True) - - if dest.exists() and not click.confirm( - f"Workflow entrypoint already exists at `{dest}`. Overwrite?" - ): - return - - if metadata_root is None: - metadata_root = pkg_root / "latch_metadata" - - metadata_path = metadata_root / "__init__.py" - if metadata_path.exists(): - click.echo(f"Using metadata file {click.style(metadata_path, italic=True)}") - import_module_by_path(metadata_path) - else: - click.secho( - f"Unable to find file `{metadata_path}` with a `SnakemakeV2Metadata` object " - "defined. If you have a custom metadata root please provide a path " - "to it using the `--metadata-root` option", - fg="red", - ) - raise click.exceptions.Exit(1) - - import latch.types.metadata.snakemake_v2 as metadata - - if metadata._snakemake_v2_metadata is None: - click.secho( - "Failed to generate entrypoint. Make sure the python package at path " - f"`{metadata_path}` defines a `SnakemakeV2Metadata` object.", - fg="red", - ) - raise click.exceptions.Exit(1) - - if snakefile is None: - snakefile = pkg_root / "Snakefile" - - if not snakefile.exists(): - click.secho( - f"Unable to find a Snakefile at `{snakefile}`. If your Snakefile is " - "in a different location please provide an explicit path to it " - "using the `--snakefile` option." - ) - raise click.exceptions.Exit(1) - - dest.write_text(get_entrypoint_content(pkg_root, metadata_path, snakefile)) diff --git a/packages/latch-snakemake/src/latch_snakemake/utils.py b/packages/latch-snakemake/src/latch_snakemake/utils.py deleted file mode 100644 index 28a955c9b..000000000 --- a/packages/latch-snakemake/src/latch_snakemake/utils.py +++ /dev/null @@ -1,32 +0,0 @@ -from dataclasses import fields, is_dataclass -from enum import Enum -from typing import Any -from urllib.parse import urlparse - -from latch.types.directory import LatchDir -from latch.types.file import LatchFile - - -def get_config_val(val: Any): - if isinstance(val, list): - return [get_config_val(x) for x in val] - if isinstance(val, dict): - return {k: get_config_val(v) for k, v in val.items()} - if isinstance(val, (LatchFile, LatchDir)): - if val.remote_path is None: - return str(val.path) - - parsed = urlparse(val.remote_path) - domain = parsed.netloc - if domain == "": - domain = "inferred" - - return f"/ldata/{domain}{parsed.path}" - if isinstance(val, (int, float, bool, type(None))): - return val - if is_dataclass(val): - return {f.name: get_config_val(getattr(val, f.name)) for f in fields(val)} - if isinstance(val, Enum): - return val.value - - return str(val) diff --git a/packages/latch-snakemake/src/latch_snakemake/workflow.py b/packages/latch-snakemake/src/latch_snakemake/workflow.py deleted file mode 100644 index cef56f1d9..000000000 --- a/packages/latch-snakemake/src/latch_snakemake/workflow.py +++ /dev/null @@ -1,163 +0,0 @@ -from pathlib import Path - -import latch.types.metadata.snakemake_v2 as snakemake -from latch_cli.snakemake.config.utils import get_preamble, type_repr - -_template = """\ -import json -import os -import shutil -import subprocess -import sys -import typing -import typing_extensions -from dataclasses import dataclass -from enum import Enum -from pathlib import Path - -import requests - -from latch.resources.tasks import custom_task, snakemake_runtime_task -from latch.resources.workflow import workflow -from latch.types.directory import LatchDir, LatchOutputDir -from latch.types.file import LatchFile -from latch_cli.snakemake.v2.utils import get_config_val -from latch_cli.services.register.utils import import_module_by_path - -import_module_by_path(Path({metadata_path})) - -import latch.types.metadata.snakemake_v2 as smv2 - - -{preambles} -@custom_task(cpu=0.25, memory=0.5, storage_gib=1) -def initialize() -> str: - token = os.environ.get("FLYTE_INTERNAL_EXECUTION_ID") - if token is None: - raise RuntimeError("failed to get execution token") - - headers = {{"Authorization": f"Latch-Execution-Token {{token}}"}} - - print("Provisioning shared storage volume... ", end="") - resp = requests.post( - "http://nf-dispatcher-service.flyte.svc.cluster.local/provision-storage-ofs", - headers=headers, - json={{ - "storage_expiration_hours": 0, - "version": 2, - "snakemake": True, - }}, - ) - resp.raise_for_status() - print("Done.") - - return resp.json()["name"] - -@snakemake_runtime_task(cpu=1, memory=2, storage_gib=50) -def snakemake_runtime(pvc_name: str, {parameters}): - print(f"Using shared filesystem: {{pvc_name}}") - - shared = Path("/snakemake-workdir") - snakefile = shared / {snakefile_path} - - config = {{{config_builders}}} - - config_path = (shared / "__latch.config.json").resolve() - config_path.write_text(json.dumps(config, indent=2)) - - ignore_list = [ - "latch", - ".latch", - ".git", - "nextflow", - ".nextflow", - ".snakemake", - "results", - "miniconda", - "anaconda3", - "mambaforge", - ] - - shutil.copytree( - Path("/root"), - shared, - ignore=lambda src, names: ignore_list, - ignore_dangling_symlinks=True, - dirs_exist_ok=True, - ) - - cmd = [ - "snakemake", - "--snakefile", - str(snakefile), - "--configfile", - str(config_path), - "--executor", - "latch", - "--default-storage-provider", - "latch", - "--jobs", - "1000", - ] - - print("Launching Snakemake Runtime") - print(" ".join(cmd), flush=True) - - failed = False - try: - subprocess.run(cmd, cwd=shared, check=True) - except subprocess.CalledProcessError: - failed = True - finally: - if not failed: - return - - sys.exit(1) - - -@workflow(smv2._snakemake_v2_metadata) -def {workflow_name}({parameters}): - \"\"\" - Sample Description - \"\"\" - - snakemake_runtime(pvc_name=initialize(), {assignments}) -""" - - -def get_entrypoint_content(pkg_root: Path, metadata_path: Path, snakefile_path: Path) -> str: - metadata = snakemake._snakemake_v2_metadata - assert metadata is not None - - defined_names: set[str] = set() - preambles: list[str] = [] - - defaults: list[str] = [] - no_defaults: list[str] = [] - config_builders: list[str] = [] - assignments: list[str] = [] - - for name, param in metadata.parameters.items(): - assert param.type is not None - - param_str = f"{name}: {type_repr(param.type)}" - if param.default is None: - no_defaults.append(param_str) - else: - param_str = f"{param_str} = {param.default!r}" - defaults.append(param_str) - - config_builders.append(f"{name!r}: get_config_val({name})") - assignments.append(f"{name}={name}") - - preambles.append(get_preamble(param.type, defined_names=defined_names)) - - return _template.format( - metadata_path=repr(str(metadata_path.relative_to(pkg_root))), - preambles="".join(preambles), - parameters=", ".join(no_defaults + defaults), - snakefile_path=repr(str(snakefile_path.relative_to(pkg_root))), - config_builders=", ".join(config_builders), - workflow_name=metadata.name, - assignments=", ".join(assignments), - ) From cb9cda7be3cb8439c9ba2b3af5f82b388772e31b Mon Sep 17 00:00:00 2001 From: Ayush Kamat Date: Mon, 10 Mar 2025 09:58:09 -0700 Subject: [PATCH 10/18] latch-snakemkae Signed-off-by: Ayush Kamat --- packages/latch-snakemake/pyproject.toml | 4 ++-- pyproject.toml | 6 +----- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/packages/latch-snakemake/pyproject.toml b/packages/latch-snakemake/pyproject.toml index d0f06bebc..e45b9c391 100644 --- a/packages/latch-snakemake/pyproject.toml +++ b/packages/latch-snakemake/pyproject.toml @@ -1,12 +1,12 @@ [project] name = "latch-snakemake" -version = "2.57.0" +version = "2.57.2" description = "Add your description here" readme = "README.md" authors = [{ name = "Ayush Kamat", email = "ayush@latch.bio" }] requires-python = ">=3.11" dependencies = [ - "latch==2.57.0", + "latch==2.57.2", "snakemake", "snakemake-storage-plugin-latch==0.1.10", "snakemake-executor-plugin-latch==0.1.8", diff --git a/pyproject.toml b/pyproject.toml index a3d18fbce..96bd0e9e3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -74,11 +74,7 @@ classifiers = [ [project.optional-dependencies] pandas = ["pandas>=2.0.0"] -snakemake = [ - "snakemake", - "snakemake-storage-plugin-latch==0.1.10", - "snakemake-executor-plugin-latch==0.1.8", -] +snakemake = ["snakemake>=7.18.0,<7.30.2", "pulp>=2.0,<2.8"] [project.scripts] latch = "latch_cli.main:main" From be458fdc0c29591b55457b04ebfbfdd869bbc6a6 Mon Sep 17 00:00:00 2001 From: Ayush Kamat Date: Fri, 23 May 2025 15:01:49 -0700 Subject: [PATCH 11/18] del shell for now Signed-off-by: Ayush Kamat --- packages/latch-snakemake/.python-version | 1 - packages/latch-snakemake/README.md | 0 packages/latch-snakemake/pyproject.toml | 30 ------------------- .../src/latch_snakemake/__init__.py | 0 .../src/latch_snakemake/py.typed | 0 pyproject.toml | 6 +++- 6 files changed, 5 insertions(+), 32 deletions(-) delete mode 100644 packages/latch-snakemake/.python-version delete mode 100644 packages/latch-snakemake/README.md delete mode 100644 packages/latch-snakemake/pyproject.toml delete mode 100644 packages/latch-snakemake/src/latch_snakemake/__init__.py delete mode 100644 packages/latch-snakemake/src/latch_snakemake/py.typed diff --git a/packages/latch-snakemake/.python-version b/packages/latch-snakemake/.python-version deleted file mode 100644 index 2c0733315..000000000 --- a/packages/latch-snakemake/.python-version +++ /dev/null @@ -1 +0,0 @@ -3.11 diff --git a/packages/latch-snakemake/README.md b/packages/latch-snakemake/README.md deleted file mode 100644 index e69de29bb..000000000 diff --git a/packages/latch-snakemake/pyproject.toml b/packages/latch-snakemake/pyproject.toml deleted file mode 100644 index e45b9c391..000000000 --- a/packages/latch-snakemake/pyproject.toml +++ /dev/null @@ -1,30 +0,0 @@ -[project] -name = "latch-snakemake" -version = "2.57.2" -description = "Add your description here" -readme = "README.md" -authors = [{ name = "Ayush Kamat", email = "ayush@latch.bio" }] -requires-python = ">=3.11" -dependencies = [ - "latch==2.57.2", - "snakemake", - "snakemake-storage-plugin-latch==0.1.10", - "snakemake-executor-plugin-latch==0.1.8", -] - -[build-system] -requires = ["hatchling"] -build-backend = "hatchling.build" - -[tool.pyright] -typeCheckingMode = "strict" - -pythonVersion = "3.11" - -reportUnknownArgumentType = "none" -reportUnknownLambdaType = "none" -reportUnknownMemberType = "none" -reportUnknownParameterType = "none" -reportUnknownVariableType = "none" - -reportUnusedImport = "error" diff --git a/packages/latch-snakemake/src/latch_snakemake/__init__.py b/packages/latch-snakemake/src/latch_snakemake/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/packages/latch-snakemake/src/latch_snakemake/py.typed b/packages/latch-snakemake/src/latch_snakemake/py.typed deleted file mode 100644 index e69de29bb..000000000 diff --git a/pyproject.toml b/pyproject.toml index 96bd0e9e3..a3d18fbce 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -74,7 +74,11 @@ classifiers = [ [project.optional-dependencies] pandas = ["pandas>=2.0.0"] -snakemake = ["snakemake>=7.18.0,<7.30.2", "pulp>=2.0,<2.8"] +snakemake = [ + "snakemake", + "snakemake-storage-plugin-latch==0.1.10", + "snakemake-executor-plugin-latch==0.1.8", +] [project.scripts] latch = "latch_cli.main:main" From 4eda8866c2c97fa972041ce8ac674454beed4d9e Mon Sep 17 00:00:00 2001 From: Ayush Kamat Date: Fri, 23 May 2025 15:28:54 -0700 Subject: [PATCH 12/18] morge + new release Signed-off-by: Ayush Kamat --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 49319d67f..272d2cec1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ include = ["src/**/*.py", "src/latch_cli/services/init/*"] [project] name = "latch" -version = "2.62.1" +version = "2.62.1.a1" description = "The Latch SDK" authors = [{ name = "Kenny Workman", email = "kenny@latch.bio" }] maintainers = [ @@ -78,7 +78,7 @@ pandas = ["pandas>=2.0.0"] snakemake = [ "snakemake", "snakemake-storage-plugin-latch==0.1.10", - "snakemake-executor-plugin-latch==0.1.8", + "snakemake-executor-plugin-latch==0.1.9", ] [project.scripts] From f7d9fb9c68e47164ecca5e99703adbd18321758b Mon Sep 17 00:00:00 2001 From: Ayush Kamat Date: Fri, 23 May 2025 15:36:50 -0700 Subject: [PATCH 13/18] dumb Signed-off-by: Ayush Kamat --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 272d2cec1..0761f18c4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ include = ["src/**/*.py", "src/latch_cli/services/init/*"] [project] name = "latch" -version = "2.62.1.a1" +version = "2.62.1.a2" description = "The Latch SDK" authors = [{ name = "Kenny Workman", email = "kenny@latch.bio" }] maintainers = [ @@ -77,7 +77,7 @@ classifiers = [ pandas = ["pandas>=2.0.0"] snakemake = [ "snakemake", - "snakemake-storage-plugin-latch==0.1.10", + "snakemake-storage-plugin-latch==0.1.11", "snakemake-executor-plugin-latch==0.1.9", ] From b0e3f87340bfbd5306f28a016c53797a66e6002f Mon Sep 17 00:00:00 2001 From: Ayush Kamat Date: Fri, 26 Sep 2025 12:56:07 -0700 Subject: [PATCH 14/18] updates Signed-off-by: Ayush Kamat --- src/latch/registry/upstream_types/values.py | 20 +- src/latch/resources/workflow.py | 25 +-- src/latch/utils.py | 34 ++- src/latch_cli/nextflow/config.py | 28 +-- src/latch_cli/snakemake/config/parser.py | 232 ++++++-------------- src/latch_cli/snakemake/config/utils.py | 96 ++++---- src/latch_cli/snakemake/v2/utils.py | 3 +- src/latch_cli/snakemake/v2/workflow.py | 44 +--- src/latch_cli/utils/__init__.py | 46 ++-- 9 files changed, 194 insertions(+), 334 deletions(-) diff --git a/src/latch/registry/upstream_types/values.py b/src/latch/registry/upstream_types/values.py index e1b79027a..f236cdf6b 100644 --- a/src/latch/registry/upstream_types/values.py +++ b/src/latch/registry/upstream_types/values.py @@ -3,6 +3,8 @@ from typing_extensions import Self, TypeAlias +from latch.utils import Singleton + class InvalidValue(TypedDict): rawValue: str @@ -62,9 +64,7 @@ class PrimitiveUnresolvedBlobValueValid(TypedDict): valid: Literal[True] -PrimitiveUnresolvedBlobValue: TypeAlias = Union[ - PrimitiveUnresolvedBlobValueValid, InvalidValue -] +PrimitiveUnresolvedBlobValue: TypeAlias = Union[PrimitiveUnresolvedBlobValueValid, InvalidValue] class LinkValue(TypedDict): @@ -108,23 +108,11 @@ class UnionValue(TypedDict): DBValue: TypeAlias = Union[PrimitiveValue, ArrayValue, UnionValue] -@dataclass(frozen=True) -class EmptyCell: +class EmptyCell(Singleton): """Empty Registry :class:`Record` value. Singleton. - The constructor returns a referentially identical instance each call. That is, - `EmptyCell() is EmptyCell()` - Used to distinguish explicit `None` values from missing values. """ - _singleton: ClassVar[Optional["EmptyCell"]] = None - - def __new__(cls) -> Self: - if cls._singleton is None: - cls._singleton = super().__new__(cls) - - return cls._singleton - Value: TypeAlias = Union[DBValue, EmptyCell] diff --git a/src/latch/resources/workflow.py b/src/latch/resources/workflow.py index e0f741f17..0812514d1 100644 --- a/src/latch/resources/workflow.py +++ b/src/latch/resources/workflow.py @@ -11,12 +11,7 @@ from flytekit.core.interface import transform_function_to_interface from flytekit.core.workflow import PythonFunctionWorkflow -from latch.types.metadata import ( - LatchAuthor, - LatchMetadata, - LatchParameter, - NextflowMetadata, -) +from latch.types.metadata import LatchAuthor, LatchMetadata, LatchParameter, NextflowMetadata from latch_cli.utils import best_effort_display_name @@ -44,9 +39,7 @@ def _inject_metadata(f: Callable, metadata: LatchMetadata) -> None: # this weird Union thing is to ensure backwards compatibility, # so that when users call @workflow without any arguments or # parentheses, the workflow still serializes as expected -def workflow( - metadata: Union[LatchMetadata, Callable], -) -> Union[PythonFunctionWorkflow, Callable]: +def workflow(metadata: Union[LatchMetadata, Callable]) -> Union[PythonFunctionWorkflow, Callable]: if isinstance(metadata, Callable): f = metadata if f.__doc__ is None or "__metadata__:" not in f.__doc__: @@ -107,9 +100,7 @@ def decorator(f: Callable): raise click.exceptions.Exit(1) arg_origin = get_origin(args[0]) - valid = is_dataclass(args[0]) or ( - arg_origin is not None and is_dataclass(arg_origin) - ) + valid = is_dataclass(args[0]) or (arg_origin is not None and is_dataclass(arg_origin)) if not valid: click.secho( f"parameter marked as samplesheet is not valid: {name} " @@ -148,9 +139,13 @@ def decorator(f: Callable): return decorator -def nextflow_workflow( - metadata: NextflowMetadata, -) -> Callable[[Callable], PythonFunctionWorkflow]: +def nextflow_workflow(metadata: NextflowMetadata) -> Callable[[Callable], PythonFunctionWorkflow]: + metadata._non_standard["unpack_records"] = True + + return workflow(metadata) + + +def snakemake_workflow(metadata: NextflowMetadata) -> Callable[[Callable], PythonFunctionWorkflow]: metadata._non_standard["unpack_records"] = True return workflow(metadata) diff --git a/src/latch/utils.py b/src/latch/utils.py index 23d5bb334..a6656261e 100644 --- a/src/latch/utils.py +++ b/src/latch/utils.py @@ -1,9 +1,12 @@ import itertools import os -from typing import Dict, TypedDict +from dataclasses import dataclass +from typing import ClassVar, Dict, Optional, TypedDict import gql import jwt +from typing_extensions import Self + from latch_sdk_config.user import user_config from latch_sdk_gql.execute import execute @@ -112,9 +115,7 @@ def get_workspaces() -> Dict[str, WSInfo]: owned_org_teams = [x["teamInfosByOrgId"]["nodes"] for x in res["orgInfos"]["nodes"]] owned_org_teams = list(itertools.chain(*owned_org_teams)) - member_org_teams = [ - x["org"]["teamInfosByOrgId"]["nodes"] for x in res["orgMembers"]["nodes"] - ] + member_org_teams = [x["org"]["teamInfosByOrgId"]["nodes"] for x in res["orgMembers"]["nodes"]] member_org_teams = list(itertools.chain(*member_org_teams)) default_account = ( @@ -130,11 +131,7 @@ def get_workspaces() -> Dict[str, WSInfo]: ) for x in owned_teams + member_teams - + ( - [res["teamInfoByAccountId"]] - if res["teamInfoByAccountId"] is not None - else [] - ) + + ([res["teamInfoByAccountId"]] if res["teamInfoByAccountId"] is not None else []) + owned_org_teams + member_org_teams } @@ -167,7 +164,7 @@ def current_workspace() -> str: } } } - """), + """) )["accountInfoCurrent"] ws = res["id"] @@ -180,3 +177,20 @@ def current_workspace() -> str: class NotFoundError(ValueError): ... + + +@dataclass(frozen=True) +class Singleton: + """Base class for singleton objects. + + The constructor returns a referentially identical instance each call. That is, + `Singleton() is Singleton()` + """ + + _singleton: ClassVar[Optional[Self]] = None + + def __new__(cls) -> Self: + if cls._singleton is None: + cls._singleton = super().__new__(cls) + + return cls._singleton diff --git a/src/latch_cli/nextflow/config.py b/src/latch_cli/nextflow/config.py index 2c07fd821..204e1a24d 100644 --- a/src/latch_cli/nextflow/config.py +++ b/src/latch_cli/nextflow/config.py @@ -16,19 +16,11 @@ from latch.types.directory import LatchDir from latch.types.file import LatchFile from latch.types.samplesheet_item import SamplesheetItem -from latch_cli.snakemake.config.utils import get_preamble -from latch_cli.utils import best_effort_display_name, identifier_from_str +from ..snakemake.config.utils import get_preamble +from ..utils import best_effort_display_name, best_effort_title_case, identifier_from_str from .parse_schema import NfType, parse_schema -underscores = re.compile(r"_+") -spaces = re.compile(r"\s+") - - -def best_effort_title_case(s: str) -> str: - return identifier_from_str(spaces.sub("", underscores.sub(" ", s).title())) - - T = TypeVar("T") @@ -118,14 +110,10 @@ def get_python_type_inner( else: defaults.append((field_name, field_type, field_obj)) - return make_dataclass( - f"{best_effort_title_case(param_name)}Type", no_defaults + defaults - ) + return make_dataclass(f"{best_effort_title_case(param_name)}Type", no_defaults + defaults) if typ["type"] == "samplesheet": - dc = get_python_type( - param_name, {**typ, "type": "object", "properties": typ["schema"]} - ) + dc = get_python_type(param_name, {**typ, "type": "object", "properties": typ["schema"]}) return list[SamplesheetItem[dc]] assert typ["type"] == "enum", f"unsupported type {typ['typ']!r}" @@ -157,9 +145,7 @@ def get_python_type( return Optional[inner] -def generate_flow( - raw_schema_content: dict[str, object], parsed: dict[str, NfType] -) -> str: +def generate_flow(raw_schema_content: dict[str, object], parsed: dict[str, NfType]) -> str: if "$defs" not in raw_schema_content: return "generated_flow = None" @@ -220,9 +206,7 @@ def generate_flow( return f"generated_flow = [{', '.join(flow_elements)}]" -def generate_metadata( - schema_path: Path, metadata_root: Path, *, skip_confirmation: bool = False -): +def generate_metadata(schema_path: Path, metadata_root: Path, *, skip_confirmation: bool = False): raw_schema_content: dict[str, object] = json.loads(schema_path.read_text()) display_name: Optional[str] = raw_schema_content.get("title") diff --git a/src/latch_cli/snakemake/config/parser.py b/src/latch_cli/snakemake/config/parser.py index 35a8df546..440db5b3d 100644 --- a/src/latch_cli/snakemake/config/parser.py +++ b/src/latch_cli/snakemake/config/parser.py @@ -1,16 +1,17 @@ -from dataclasses import fields, is_dataclass +from dataclasses import Field, field, fields, is_dataclass, make_dataclass from pathlib import Path -from typing import Dict, List, Tuple, Type, TypeVar, get_args, get_origin +from typing import Annotated, TypeVar, Union, get_args, get_origin import click import yaml -from typing_extensions import Annotated from latch.types.directory import LatchDir from latch.types.file import LatchFile +from latch.utils import Singleton from latch_cli.snakemake.utils import reindent from latch_cli.utils import best_effort_display_name, identifier_from_str +from ...utils import best_effort_title_case, exit from .utils import ( JSONValue, get_preamble, @@ -24,29 +25,20 @@ T = TypeVar("T") -def parse_config( - config_path: Path, - *, - infer_files: bool = False, -) -> Dict[str, Tuple[Type[T], T]]: +class NoValue(Singleton): ... + + +def parse_config(config_path: Path) -> dict[str, tuple[type[T], Union[T, NoValue]]]: if not config_path.exists(): - click.secho( - f"No config file found at {config_path}.", - fg="red", - ) - raise click.exceptions.Exit(1) + raise exit(f"No config file found at {config_path}.") if config_path.is_dir(): - click.secho( - f"Path {config_path} points to a directory.", - fg="red", - ) - raise click.exceptions.Exit(1) + raise exit(f"Path {config_path} points to a directory.") try: res: JSONValue = yaml.safe_load(config_path.read_text()) except yaml.YAMLError as e: - click.secho( + raise exit( reindent( f""" Error loading config from {config_path}: @@ -54,86 +46,28 @@ def parse_config( {e} """, 0, - ), - fg="red", - ) - raise click.exceptions.Exit(1) from e + ) + ) from e - if not isinstance(res, dict): - # ayush: this case doesn't matter bc a non-dict .yaml file isn't valid snakemake - return {"snakemake_parameter": (parse_type(res, infer_files=infer_files), res)} + assert isinstance(res, dict) - parsed: Dict[str, Type] = {} + parsed: dict[str, tuple[type[T], T]] = {} for k, v in res.items(): try: - typ = parse_type(v, k, infer_files=infer_files) + typ = parse_type(v, k) except ValueError as e: - click.secho( - f"WARNING: Skipping parameter {k}. Failed to parse type: {e}.", - fg="yellow", - ) + click.secho(f"WARNING: Skipping parameter {k}. Failed to parse type: {e}.", fg="yellow") continue - val, default = parse_value(typ, v) - parsed[k] = (typ, (val, default)) - - return parsed - - -def file_metadata_str(typ: Type, value: JSONValue, level: int = 0) -> str: - if get_origin(typ) is Annotated: - args = get_args(typ) - assert len(args) > 0 - return file_metadata_str(args[0], value, level) - - if is_primitive_type(typ): - return "" - - if typ in {LatchFile, LatchDir}: - return reindent( - f"""\ - SnakemakeFileMetadata( - path={repr(value)}, - config=True, - ),\n""", - level, - ) - - metadata: List[str] = [] - if is_list_type(typ): - template = """ - [ - __metadata__],\n""" - - args = get_args(typ) - assert len(args) > 0 - for val in value: - metadata_str = file_metadata_str(get_args(typ)[0], val, level + 1) - if metadata_str == "": - continue - metadata.append(metadata_str) - else: - template = """ - { - __metadata__},\n""" - - assert is_dataclass(typ) - for field in fields(typ): - metadata_str = file_metadata_str( - field.type, getattr(value, field.name), level - ) - if metadata_str == "": - continue - metadata_str = f"{repr(identifier_from_str(field.name))}: {metadata_str}" - metadata.append(reindent(metadata_str, level + 1)) + default = NoValue() + try: + default = parse_value(typ, v) + except AssertionError as e: + click.secho(f"WARNING: Unable to parse default for parameter {k}: {e}.", fg="yellow") - if len(metadata) == 0: - return "" + parsed[k] = (typ, default) - return reindent( - template, - level, - ).replace("__metadata__", "".join(metadata), level + 1) + return parsed # todo(ayush): print informative stuff here ala register @@ -143,40 +77,26 @@ def generate_metadata( *, skip_confirmation: bool = False, generate_defaults: bool = False, - infer_files: bool = False, ): - parsed = parse_config(config_path, infer_files=infer_files) - - preambles: List[str] = [] - params: List[str] = [] - file_metadata: List[str] = [] + parsed = parse_config(config_path) - for k, (typ, (val, default)) in parsed.items(): - preambles.append(get_preamble(typ)) + no_defaults: list[tuple[str, type, Field[object]]] = [] + defaults: list[tuple[str, type, Field[object]]] = [] - param_str = reindent( - f"""\ - {repr(identifier_from_str(k))}: SnakemakeParameter( - display_name={repr(best_effort_display_name(k))}, - type={type_repr(typ)}, - __default__),""", - 0, - ) + for k, (typ, default) in parsed.items(): + name = identifier_from_str(k) - default_str = "" - if generate_defaults and default is not None: - default_str = f" default={repr(default)},\n" + if not generate_defaults or default is NoValue(): + no_defaults.append((name, typ, field())) + continue - param_str = param_str.replace("__default__", default_str) + if isinstance(default, (list, dict, LatchFile, LatchDir)): + defaults.append((name, typ, field(default_factory=lambda: default))) + continue - param_str = reindent(param_str, 1) - params.append(param_str) + defaults.append((name, typ, field(default=default))) - metadata_str = file_metadata_str(typ, val) - if metadata_str == "": - continue - metadata_str = f"{repr(identifier_from_str(k))}: {metadata_str}" - file_metadata.append(reindent(metadata_str, 1)) + generated_args_type = make_dataclass("SnakemakeArgsType", no_defaults + defaults) if metadata_root.is_file(): if not click.confirm(f"A file exists at `{metadata_root}`. Delete it?"): @@ -187,47 +107,28 @@ def generate_metadata( metadata_root.mkdir(exist_ok=True) metadata_path = metadata_root / Path("__init__.py") - old_metadata_path = Path("latch_metadata.py") - - if old_metadata_path.exists() and not metadata_path.exists(): - if click.confirm( - "Found legacy `latch_metadata.py` file in current directory. This is" - " deprecated and will be ignored in future releases. Move to" - f" `{metadata_path}`? (This will not change file contents)" - ): - old_metadata_path.rename(metadata_path) - elif old_metadata_path.exists() and metadata_path.exists(): - click.secho( - "Warning: Found both `latch_metadata.py` and" - f" `{metadata_path}` in current directory." - " `latch_metadata.py` will be ignored.", - fg="yellow", - ) if not metadata_path.exists(): metadata_path.write_text( reindent( r""" - from latch.types.metadata import SnakemakeMetadata, LatchAuthor, EnvironmentConfig - from latch.types.directory import LatchDir + from latch.types.metadata import LatchAuthor + from latch.types.metadata.snakemake_v2 import SnakemakeV2Metadata, SnakemakeParameter - from .parameters import generated_parameters, file_metadata + from .generated import SnakemakeArgsType - SnakemakeMetadata( - output_dir=LatchDir("latch:///your_output_directory"), + class WorkflowArgsType(SnakemakeArgsType): + # add custom parameters here + ... + + SnakemakeV2Metadata( display_name="Your Workflow Name", author=LatchAuthor( name="Your Name", ), - env_config=EnvironmentConfig( - use_conda=False, - use_container=False, - ), - cores=4, - # Add more parameters - parameters=generated_parameters, - file_metadata=file_metadata, - + parameters={ + "args": SnakemakeParameter(type=WorkflowArgsType) + }, ) """, 0, @@ -235,7 +136,7 @@ def generate_metadata( ) click.secho(f"Generated `{metadata_path}`.", fg="green") - params_path = metadata_root / Path("parameters.py") + params_path = metadata_root / Path("generated.py") if ( params_path.exists() and not skip_confirmation @@ -245,35 +146,30 @@ def generate_metadata( params_path.write_text( reindent( - r""" - from dataclasses import dataclass + rf""" + # This file is auto-generated, PLEASE DO NOT EDIT DIRECTLY! To update, run + # + # $ latch generate-metadata --snakemake {config_path} + # + # Add any custom logic or parameters in `latch_metadata/__init__.py`. + import typing - import typing_extensions + from dataclasses import dataclass, field + from enum import Enum + import typing_extensions from flytekit.core.annotation import FlyteAnnotation - from latch.types.metadata import SnakemakeParameter, SnakemakeFileParameter, SnakemakeFileMetadata - from latch.types.file import LatchFile + from latch.ldata.path import LPath from latch.types.directory import LatchDir + from latch.types.file import LatchFile + from latch.types.metadata import Params, Section, Spoiler, Text + from latch.types.samplesheet_item import SamplesheetItem __preambles__ - # Import these into your `__init__.py` file: - # - # from .parameters import generated_parameters, file_metadata - - generated_parameters = { - __params__ - } - - file_metadata = { - __file_metadata__} - """, 0, - ) - .replace("__preambles__", "".join(preambles)) - .replace("__params__", "\n".join(params)) - .replace("__file_metadata__", "".join(file_metadata)) + ).replace("__preambles__", get_preamble(generated_args_type)) ) click.secho(f"Generated `{params_path}`.", fg="green") diff --git a/src/latch_cli/snakemake/config/utils.py b/src/latch_cli/snakemake/config/utils.py index f41546bdb..9644dd71a 100644 --- a/src/latch_cli/snakemake/config/utils.py +++ b/src/latch_cli/snakemake/config/utils.py @@ -1,9 +1,10 @@ from __future__ import annotations +import re import sys from dataclasses import MISSING, Field, fields, is_dataclass, make_dataclass from enum import Enum -from typing import Annotated, Any, Callable, Dict, List, Optional, Type, Union, get_args, get_origin +from typing import Annotated, Any, Callable, TypeVar, Union, get_args, get_origin from flytekit.core.annotation import FlyteAnnotation from typing_extensions import TypeAlias, TypeGuard @@ -14,8 +15,8 @@ from latch.types.samplesheet_item import SamplesheetItem from latch_cli.utils import identifier_from_str -JSONValue: TypeAlias = Union[int, str, bool, float, None, List["JSONValue"], "JSONDict"] -JSONDict: TypeAlias = Dict[str, "JSONValue"] +JSONValue: TypeAlias = Union[int, str, bool, float, None, list["JSONValue"], "JSONDict"] +JSONDict: TypeAlias = dict[str, "JSONValue"] if sys.version_info >= (3, 10): from types import UnionType @@ -101,48 +102,71 @@ } -def parse_type(v: JSONValue, name: Optional[str] = None, *, infer_files: bool = False) -> Type: +expr = re.compile( + r""" + ^( + (latch://.*) + | (s3://.*) + | ( + /? + ([^/]/)+ + [^/]* + ) + )$ + """, + re.VERBOSE, +) + + +def is_file_like(s: str) -> bool: + if expr.match(s): + return True + + return any(s.endswith(x) for x in valid_extensions) + + +def parse_type(v: JSONValue, name: str) -> type: if v is None: return str - if infer_files and isinstance(v, str): - if any([v.endswith(ext) for ext in valid_extensions]): - return LatchFile - elif v.endswith("/"): + if isinstance(v, str) and is_file_like(v): + if v.endswith("/"): return LatchDir + return LatchFile + if is_primitive_value(v): return type(v) if isinstance(v, list): - parsed_types = tuple(parse_type(x, name, infer_files=infer_files) for x in v) + parsed_types = tuple(parse_type(x, name) for x in v) if len(set(parsed_types)) != 1: raise ValueError( "Generic Lists are not supported - please" f" ensure that all elements in {name} are of the same type" ) + typ = parsed_types[0] - if typ in {LatchFile, LatchDir}: - return Annotated[List[typ], FlyteAnnotation({"size": len(v)})] - return List[typ] - assert isinstance(v, dict) + return list[typ] - if name is None: - name = "SnakemakeRecord" + assert isinstance(v, dict) - fields: Dict[str, Type] = {} + fields: dict[str, type] = {} for k, x in v.items(): - fields[identifier_from_str(k)] = parse_type(x, f"{name}_{k}", infer_files=infer_files) + fields[identifier_from_str(k)] = parse_type(x, f"{name}_{k}") return make_dataclass(identifier_from_str(name), fields.items()) -# returns raw value and generated default -def parse_value(t: Type, v: JSONValue): +T = TypeVar("T") + + +def parse_value(t: type[T], v: JSONValue) -> T: if v is None: - return None, None + assert t is type(None) + return None if get_origin(t) is Annotated: args = get_args(t) @@ -150,12 +174,11 @@ def parse_value(t: Type, v: JSONValue): return parse_value(args[0], v) if t in {LatchFile, LatchDir}: - # ayush: autogenerated defaults don't make sense for files/dirs since their - # value in the config is their local path - return v, None + assert isinstance(v, str) + return t(v) if is_primitive_value(v): - return v, v + return v if isinstance(v, list): assert get_origin(t) is list @@ -164,29 +187,26 @@ def parse_value(t: Type, v: JSONValue): assert len(args) > 0 sub_type = args[0] - res = [parse_value(sub_type, x) for x in v] - return [x[0] for x in res], [x[1] for x in res] + return [parse_value(sub_type, x) for x in v] assert isinstance(v, dict), v assert is_dataclass(t), t - ret = {} defaults = {} fs = {identifier_from_str(f.name): f for f in fields(t)} for k, x in v.items(): sanitized = identifier_from_str(k) assert sanitized in fs, sanitized - val, default = parse_value(fs[sanitized].type, x) - ret[sanitized] = val + default = parse_value(fs[sanitized].type, x) defaults[sanitized] = default - return t(**ret), t(**defaults) + return t(**defaults) def is_primitive_type( - typ: Type, -) -> TypeGuard[Union[Type[None], Type[str], Type[bool], Type[int], Type[float]]]: + typ: type, +) -> TypeGuard[Union[type[None], type[str], type[bool], type[int], type[float]]]: return typ in {type(None), str, bool, int, float} @@ -194,7 +214,7 @@ def is_primitive_value(val: object) -> TypeGuard[Union[None, str, bool, int, flo return is_primitive_type(type(val)) -def is_list_type(typ: Type) -> TypeGuard[Type[List]]: +def is_list_type(typ: type) -> TypeGuard[type[list[object]]]: return get_origin(typ) is list @@ -247,7 +267,7 @@ def type_repr(t: type[Any] | str, *, add_namespace: bool = False) -> str: return ( f"typing_extensions.Annotated[{type_repr(args[0], add_namespace=add_namespace)}," - f" FlyteAnnotation({repr(args[1].data)})]" + f" FlyteAnnotation({args[1].data!r})]" ) return type_repr(args[0], add_namespace=add_namespace) @@ -284,7 +304,7 @@ def field_repr(f: Field[object]) -> str: return f"{f.name}: {type_repr(f.type)}{suffix}" -def dataclass_repr(typ: Type) -> str: +def dataclass_repr(typ: type) -> str: assert is_dataclass(typ) lines = ["@dataclass", f"class {typ.__name__}:"] @@ -294,12 +314,12 @@ def dataclass_repr(typ: Type) -> str: return "\n".join(lines) + "\n\n\n" -def enum_repr(typ: Type) -> str: +def enum_repr(typ: type) -> str: assert issubclass(typ, Enum), typ lines = [f"class {typ.__name__}(Enum):"] for name, val in typ._member_map_.items(): - lines.append(f" {name} = {repr(val.value)}") + lines.append(f" {name} = {val.value!r}") return "\n".join(lines) + "\n\n\n" @@ -343,7 +363,7 @@ def get_preamble(typ: type[Any] | str, *, defined_names: set[str] | None = None) return "".join([preamble, dataclass_repr(typ)]) -def validate_snakemake_type(name: str, t: Type, param: Any) -> None: +def validate_snakemake_type(name: str, t: type, param: Any) -> None: if t is type(None) and param is not None: raise ValueError("parameter of type `NoneType` must be None") diff --git a/src/latch_cli/snakemake/v2/utils.py b/src/latch_cli/snakemake/v2/utils.py index 28a955c9b..0e9431f6e 100644 --- a/src/latch_cli/snakemake/v2/utils.py +++ b/src/latch_cli/snakemake/v2/utils.py @@ -1,13 +1,12 @@ from dataclasses import fields, is_dataclass from enum import Enum -from typing import Any from urllib.parse import urlparse from latch.types.directory import LatchDir from latch.types.file import LatchFile -def get_config_val(val: Any): +def get_config_val(val: object): if isinstance(val, list): return [get_config_val(x) for x in val] if isinstance(val, dict): diff --git a/src/latch_cli/snakemake/v2/workflow.py b/src/latch_cli/snakemake/v2/workflow.py index cef56f1d9..af257d710 100644 --- a/src/latch_cli/snakemake/v2/workflow.py +++ b/src/latch_cli/snakemake/v2/workflow.py @@ -1,7 +1,6 @@ from pathlib import Path import latch.types.metadata.snakemake_v2 as snakemake -from latch_cli.snakemake.config.utils import get_preamble, type_repr _template = """\ import json @@ -18,18 +17,17 @@ import requests from latch.resources.tasks import custom_task, snakemake_runtime_task -from latch.resources.workflow import workflow +from latch.resources.workflow import snakemake_workflow from latch.types.directory import LatchDir, LatchOutputDir from latch.types.file import LatchFile from latch_cli.snakemake.v2.utils import get_config_val from latch_cli.services.register.utils import import_module_by_path -import_module_by_path(Path({metadata_path})) +latch_metadata = import_module_by_path(Path({metadata_path})) import latch.types.metadata.snakemake_v2 as smv2 -{preambles} @custom_task(cpu=0.25, memory=0.5, storage_gib=1) def initialize() -> str: token = os.environ.get("FLYTE_INTERNAL_EXECUTION_ID") @@ -54,13 +52,13 @@ def initialize() -> str: return resp.json()["name"] @snakemake_runtime_task(cpu=1, memory=2, storage_gib=50) -def snakemake_runtime(pvc_name: str, {parameters}): +def snakemake_runtime(pvc_name: str, args: latch_metadata.WorkflowArgsType): print(f"Using shared filesystem: {{pvc_name}}") shared = Path("/snakemake-workdir") snakefile = shared / {snakefile_path} - config = {{{config_builders}}} + config = get_config_val(args) config_path = (shared / "__latch.config.json").resolve() config_path.write_text(json.dumps(config, indent=2)) @@ -115,13 +113,14 @@ def snakemake_runtime(pvc_name: str, {parameters}): sys.exit(1) -@workflow(smv2._snakemake_v2_metadata) -def {workflow_name}({parameters}): +@snakemake_workflow(smv2._snakemake_v2_metadata) +def { +}(args: latch_metadata.WorkflowArgsType): \"\"\" Sample Description \"\"\" - snakemake_runtime(pvc_name=initialize(), {assignments}) + snakemake_runtime(pvc_name=initialize(), args=args) """ @@ -129,35 +128,8 @@ def get_entrypoint_content(pkg_root: Path, metadata_path: Path, snakefile_path: metadata = snakemake._snakemake_v2_metadata assert metadata is not None - defined_names: set[str] = set() - preambles: list[str] = [] - - defaults: list[str] = [] - no_defaults: list[str] = [] - config_builders: list[str] = [] - assignments: list[str] = [] - - for name, param in metadata.parameters.items(): - assert param.type is not None - - param_str = f"{name}: {type_repr(param.type)}" - if param.default is None: - no_defaults.append(param_str) - else: - param_str = f"{param_str} = {param.default!r}" - defaults.append(param_str) - - config_builders.append(f"{name!r}: get_config_val({name})") - assignments.append(f"{name}={name}") - - preambles.append(get_preamble(param.type, defined_names=defined_names)) - return _template.format( metadata_path=repr(str(metadata_path.relative_to(pkg_root))), - preambles="".join(preambles), - parameters=", ".join(no_defaults + defaults), snakefile_path=repr(str(snakefile_path.relative_to(pkg_root))), - config_builders=", ".join(config_builders), workflow_name=metadata.name, - assignments=", ".join(assignments), ) diff --git a/src/latch_cli/utils/__init__.py b/src/latch_cli/utils/__init__.py index 3548e6853..2137d70e7 100644 --- a/src/latch_cli/utils/__init__.py +++ b/src/latch_cli/utils/__init__.py @@ -96,8 +96,7 @@ def sub_from_jwt(token: str) -> str: sub = payload["sub"] except KeyError: raise ValueError( - "Provided token lacks a user sub in the data payload" - " and is not a valid token." + "Provided token lacks a user sub in the data payload and is not a valid token." ) return sub @@ -156,9 +155,7 @@ def human_readable_time(t_seconds: float) -> str: def hash_directory(dir_path: Path, *, silent: bool = False) -> str: # todo(maximsmol): store per-file hashes to show which files triggered a version change if not silent: - click.secho( - "Calculating workflow version based on file content hash", bold=True - ) + click.secho("Calculating workflow version based on file content hash", bold=True) click.secho(" Disable with --disable-auto-version/-d", italic=True, dim=True) m = hashlib.new("sha256") @@ -203,8 +200,7 @@ def hash_directory(dir_path: Path, *, silent: bool = False) -> str: if not stat.S_ISREG(p_stat.st_mode): if not silent: click.secho( - f"{p.relative_to(dir_path.resolve())} is not a regular file." - " Ignoring contents", + f"{p.relative_to(dir_path.resolve())} is not a regular file. Ignoring contents", fg="yellow", bold=True, ) @@ -225,9 +221,7 @@ def hash_directory(dir_path: Path, *, silent: bool = False) -> str: return m.hexdigest() -def generate_temporary_ssh_credentials( - ssh_key_path: Path, *, add_to_agent: bool = True -) -> str: +def generate_temporary_ssh_credentials(ssh_key_path: Path, *, add_to_agent: bool = True) -> str: # check if there is already a valid key at that path, and if so, use that # otherwise, if its not valid, remove it if ssh_key_path.exists(): @@ -242,14 +236,10 @@ def generate_temporary_ssh_credentials( raise # if both files are valid and their fingerprints match, use them instead of generating a new pair - click.secho( - f"Found existing key pair at {ssh_key_path}.", dim=True, italic=True - ) + click.secho(f"Found existing key pair at {ssh_key_path}.", dim=True, italic=True) except: click.secho( - f"Found malformed key-pair at {ssh_key_path}. Overwriting.", - dim=True, - italic=True, + f"Found malformed key-pair at {ssh_key_path}. Overwriting.", dim=True, italic=True ) ssh_key_path.unlink(missing_ok=True) @@ -363,13 +353,8 @@ def generate(self): self._public_key = generate_temporary_ssh_credentials(self._ssh_key_path) def cleanup(self): - if ( - self._ssh_key_path.exists() - and self._ssh_key_path.with_suffix(".pub").exists() - ): - subprocess.run( - ["ssh-add", "-d", self._ssh_key_path], check=True, capture_output=True - ) + if self._ssh_key_path.exists() and self._ssh_key_path.with_suffix(".pub").exists(): + subprocess.run(["ssh-add", "-d", self._ssh_key_path], check=True, capture_output=True) self._ssh_key_path.unlink(missing_ok=True) self._ssh_key_path.with_suffix(".pub").unlink(missing_ok=True) @@ -442,10 +427,7 @@ def check_exists_and_rename(old: Path, new: Path): return if new.is_file(): - print( - f"Warning: {old} is a directory but {new} is not. {new} will be" - " overwritten." - ) + print(f"Warning: {old} is a directory but {new} is not. {new} will be overwritten.") shutil.rmtree(new) os.renames(old, new) return @@ -455,7 +437,17 @@ def check_exists_and_rename(old: Path, new: Path): underscores = re.compile(r"_+") +spaces = re.compile(r"\s+") def best_effort_display_name(x: str) -> str: return underscores.sub(" ", x).title().strip() + + +def best_effort_title_case(s: str) -> str: + return identifier_from_str(spaces.sub("", underscores.sub(" ", s).title())) + + +def exit(msg: str, *, exit_code: int = 1) -> click.exceptions.Exit: + click.secho(msg, fg="red") + return click.exceptions.Exit(exit_code) From 5b6b4b707ebf76f74147aac05d9eb5c49143fd91 Mon Sep 17 00:00:00 2001 From: Ayush Kamat Date: Fri, 26 Sep 2025 14:24:29 -0700 Subject: [PATCH 15/18] various fixums Signed-off-by: Ayush Kamat --- pyproject.toml | 2 +- src/latch/types/metadata/snakemake_v2.py | 6 ++-- src/latch_cli/centromere/ast_parsing.py | 13 ++++----- src/latch_cli/main.py | 6 +--- src/latch_cli/snakemake/config/parser.py | 35 ++++++++++++++++++++++-- src/latch_cli/snakemake/config/utils.py | 20 +++++++------- src/latch_cli/snakemake/v2/workflow.py | 3 +- 7 files changed, 53 insertions(+), 32 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 5c499fea2..d8e42a43c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ include = ["src/**/*.py", "src/**/py.typed", "src/latch_cli/services/init/*"] [project] name = "latch" -version = "2.67.5" +version = "2.67.5.a1" description = "The Latch SDK" authors = [{ name = "Kenny Workman", email = "kenny@latch.bio" }] maintainers = [{ name = "Ayush Kamat", email = "ayush@latch.bio" }] diff --git a/src/latch/types/metadata/snakemake_v2.py b/src/latch/types/metadata/snakemake_v2.py index 7cc300d8d..44f53f59b 100644 --- a/src/latch/types/metadata/snakemake_v2.py +++ b/src/latch/types/metadata/snakemake_v2.py @@ -2,16 +2,14 @@ from dataclasses import dataclass, field from pathlib import Path -from typing import TYPE_CHECKING, Any +from typing import Any import click from latch_cli.utils import identifier_suffix_from_str from .latch import LatchMetadata - -if TYPE_CHECKING: - from .snakemake import SnakemakeParameter +from .snakemake import SnakemakeParameter # noqa: TCH001 @dataclass(frozen=True) diff --git a/src/latch_cli/centromere/ast_parsing.py b/src/latch_cli/centromere/ast_parsing.py index 1df006cec..db9c746da 100644 --- a/src/latch_cli/centromere/ast_parsing.py +++ b/src/latch_cli/centromere/ast_parsing.py @@ -41,7 +41,7 @@ def visit_FunctionDef(self, node: ast.FunctionDef): # noqa: N802 # 3. save fully qualified name for tasks (need to parse based on import graph) for decorator in node.decorator_list: if isinstance(decorator, ast.Name): - if decorator.id in {"workflow", "nextflow_workflow"}: + if decorator.id in {"workflow", "nextflow_workflow", "snakemake_workflow"}: self.flyte_objects.append(FlyteObject("workflow", fqn)) elif decorator.id in task_decorators: self.flyte_objects.append(FlyteObject("task", fqn)) @@ -53,10 +53,11 @@ def visit_FunctionDef(self, node: ast.FunctionDef): # noqa: N802 if func.id not in task_decorators and func.id not in { "workflow", "nextflow_workflow", + "snakemake_workflow", }: continue - if func.id in {"workflow", "nextflow_workflow"}: + if func.id in {"workflow", "nextflow_workflow", "snakemake_workflow"}: self.flyte_objects.append(FlyteObject("workflow", fqn)) continue @@ -105,9 +106,7 @@ def get_flyte_objects(module: Path) -> list[FlyteObject]: if file.suffix != ".py": continue - module_name = str(file.with_suffix("").relative_to(module.parent)).replace( - os.sep, "." - ) + module_name = str(file.with_suffix("").relative_to(module.parent)).replace(os.sep, ".") v = Visitor(file, module_name) @@ -115,9 +114,7 @@ def get_flyte_objects(module: Path) -> list[FlyteObject]: parsed = ast.parse(file.read_text(), filename=file) except SyntaxError as e: traceback.print_exc() - click.secho( - "\nRegistration failed due to a syntax error (see above)", fg="red" - ) + click.secho("\nRegistration failed due to a syntax error (see above)", fg="red") raise click.exceptions.Exit(1) from e v.visit(parsed) diff --git a/src/latch_cli/main.py b/src/latch_cli/main.py index 4e7668e52..6d903fc16 100644 --- a/src/latch_cli/main.py +++ b/src/latch_cli/main.py @@ -426,11 +426,7 @@ def generate_metadata( raise click.exceptions.Exit(1) generate_metadata( - config_file, - metadata_root, - skip_confirmation=yes, - infer_files=not no_infer_files, - generate_defaults=not no_defaults, + config_file, metadata_root, skip_confirmation=yes, generate_defaults=not no_defaults ) diff --git a/src/latch_cli/snakemake/config/parser.py b/src/latch_cli/snakemake/config/parser.py index 440db5b3d..f2d0b9d3d 100644 --- a/src/latch_cli/snakemake/config/parser.py +++ b/src/latch_cli/snakemake/config/parser.py @@ -3,7 +3,11 @@ from typing import Annotated, TypeVar, Union, get_args, get_origin import click +import google.protobuf.json_format as gpjson import yaml +from flytekit.core.annotation import FlyteAnnotation +from flytekit.core.context_manager import FlyteContextManager +from flytekit.core.type_engine import TypeEngine from latch.types.directory import LatchDir from latch.types.file import LatchFile @@ -70,6 +74,17 @@ def parse_config(config_path: Path) -> dict[str, tuple[type[T], Union[T, NoValue return parsed +# doing bare lambda: variable_name doesn't work because we call the lambda to get its return value +# and print it so if its something of the form lambda: variable_name, the call will always result +# in the latest value of variable_name, as opposed to the value of variable_name at the time the +# lambda was created +def get_lambda(value: object): + def inner(): + return value + + return inner + + # todo(ayush): print informative stuff here ala register def generate_metadata( config_path: Path, @@ -83,15 +98,31 @@ def generate_metadata( no_defaults: list[tuple[str, type, Field[object]]] = [] defaults: list[tuple[str, type, Field[object]]] = [] + ctx = FlyteContextManager.current_context() + for k, (typ, default) in parsed.items(): name = identifier_from_str(k) + annotations: dict[str, object] = { + "display_name": best_effort_display_name(k), + "output": name == "outdir", + } + annotated_typ = Annotated[typ, FlyteAnnotation(annotations)] + if not generate_defaults or default is NoValue(): - no_defaults.append((name, typ, field())) + no_defaults.append((name, annotated_typ, field())) continue + annotations["default"] = gpjson.MessageToDict( + TypeEngine.to_literal(ctx, default, typ, TypeEngine.to_literal_type(typ)).to_flyte_idl() + ) + if isinstance(default, (list, dict, LatchFile, LatchDir)): - defaults.append((name, typ, field(default_factory=lambda: default))) + defaults.append((name, annotated_typ, field(default_factory=get_lambda(default)))) + continue + + if is_dataclass(default): + defaults.append((name, annotated_typ, field(default_factory=default))) continue defaults.append((name, typ, field(default=default))) diff --git a/src/latch_cli/snakemake/config/utils.py b/src/latch_cli/snakemake/config/utils.py index 9644dd71a..c18262691 100644 --- a/src/latch_cli/snakemake/config/utils.py +++ b/src/latch_cli/snakemake/config/utils.py @@ -13,7 +13,7 @@ from latch.types.directory import LatchDir from latch.types.file import LatchFile from latch.types.samplesheet_item import SamplesheetItem -from latch_cli.utils import identifier_from_str +from latch_cli.utils import best_effort_title_case, identifier_from_str JSONValue: TypeAlias = Union[int, str, bool, float, None, list["JSONValue"], "JSONDict"] JSONDict: TypeAlias = dict[str, "JSONValue"] @@ -118,19 +118,22 @@ ) -def is_file_like(s: str) -> bool: - if expr.match(s): +def is_file_like(name: str, value: str) -> bool: + if name == "outdir": return True - return any(s.endswith(x) for x in valid_extensions) + if expr.match(value): + return True + + return any(value.endswith(x) for x in valid_extensions) def parse_type(v: JSONValue, name: str) -> type: if v is None: return str - if isinstance(v, str) and is_file_like(v): - if v.endswith("/"): + if isinstance(v, str) and is_file_like(name, v): + if v.endswith("/") or name == "outdir": return LatchDir return LatchFile @@ -157,7 +160,7 @@ def parse_type(v: JSONValue, name: str) -> type: for k, x in v.items(): fields[identifier_from_str(k)] = parse_type(x, f"{name}_{k}") - return make_dataclass(identifier_from_str(name), fields.items()) + return make_dataclass(best_effort_title_case(f"{name}_type"), fields.items()) T = TypeVar("T") @@ -262,9 +265,6 @@ def type_repr(t: type[Any] | str, *, add_namespace: bool = False) -> str: args = get_args(t) assert len(args) > 1 if isinstance(args[1], FlyteAnnotation): - if "output" in args[1].data: - return "LatchOutputDir" - return ( f"typing_extensions.Annotated[{type_repr(args[0], add_namespace=add_namespace)}," f" FlyteAnnotation({args[1].data!r})]" diff --git a/src/latch_cli/snakemake/v2/workflow.py b/src/latch_cli/snakemake/v2/workflow.py index af257d710..0594bcf53 100644 --- a/src/latch_cli/snakemake/v2/workflow.py +++ b/src/latch_cli/snakemake/v2/workflow.py @@ -114,8 +114,7 @@ def snakemake_runtime(pvc_name: str, args: latch_metadata.WorkflowArgsType): @snakemake_workflow(smv2._snakemake_v2_metadata) -def { -}(args: latch_metadata.WorkflowArgsType): +def {workflow_name}(args: latch_metadata.WorkflowArgsType): \"\"\" Sample Description \"\"\" From 1970ed750fad2d59548641823eaa325f28a20596 Mon Sep 17 00:00:00 2001 From: Ayush Kamat Date: Fri, 26 Sep 2025 15:27:54 -0700 Subject: [PATCH 16/18] final thingd Signed-off-by: Ayush Kamat --- pyproject.toml | 4 ++-- src/latch_cli/docker_utils/__init__.py | 8 ++------ src/latch_cli/main.py | 24 ++++++++++++++++++++++-- 3 files changed, 26 insertions(+), 10 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index d8e42a43c..13e7df000 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ include = ["src/**/*.py", "src/**/py.typed", "src/latch_cli/services/init/*"] [project] name = "latch" -version = "2.67.5.a1" +version = "2.67.5.a2" description = "The Latch SDK" authors = [{ name = "Kenny Workman", email = "kenny@latch.bio" }] maintainers = [{ name = "Ayush Kamat", email = "ayush@latch.bio" }] @@ -31,7 +31,7 @@ dependencies = [ "scp>=0.14.0", "boto3>=1.26.0", "tqdm>=4.63.0", - "lytekit==0.15.29", + "lytekit==0.15.30", "lytekitplugins-pods==0.7.4", "typing-extensions>=4.12.0", "apscheduler>=3.10.0", diff --git a/src/latch_cli/docker_utils/__init__.py b/src/latch_cli/docker_utils/__init__.py index 6693440d1..1d05edb25 100644 --- a/src/latch_cli/docker_utils/__init__.py +++ b/src/latch_cli/docker_utils/__init__.py @@ -378,9 +378,7 @@ def generate(self, *, dest: Optional[Path] = None, overwrite: bool = False): click.secho(f"Successfully generated dockerfile `{dest}`", fg="green") -def generate_dockerignore( - dest: Path, *, wf_type: WorkflowType, overwrite: bool = False -) -> None: +def generate_dockerignore(dest: Path, *, wf_type: WorkflowType, overwrite: bool = False) -> None: if dest.exists(): if dest.is_dir(): click.secho(f".dockerignore already exists at `{dest}` and is a directory.", fg="red") @@ -401,9 +399,7 @@ def generate_dockerignore( click.secho(f"Successfully generated .dockerignore `{dest}`", fg="green") -def get_default_dockerfile( - pkg_root: Path, *, wf_type: WorkflowType, overwrite: bool = False -): +def get_default_dockerfile(pkg_root: Path, *, wf_type: WorkflowType, overwrite: bool = False): default_dockerfile = pkg_root / "Dockerfile" config = get_or_create_workflow_config( diff --git a/src/latch_cli/main.py b/src/latch_cli/main.py index 6d903fc16..6ecd4f830 100644 --- a/src/latch_cli/main.py +++ b/src/latch_cli/main.py @@ -17,6 +17,7 @@ from latch.ldata._transfer.progress import Progress as _Progress # noqa: PLC2701 from latch.utils import current_workspace from latch_cli.click_utils import EnumChoice +from latch_cli.docker_utils import DockerfileBuilder from latch_cli.exceptions.handler import CrashHandler from latch_cli.services.cp.autocomplete import complete as cp_complete from latch_cli.services.cp.autocomplete import remote_complete @@ -30,7 +31,7 @@ get_local_package_version, hash_directory, ) -from latch_cli.workflow_config import BaseImageOptions +from latch_cli.workflow_config import BaseImageOptions, get_or_create_workflow_config from latch_sdk_gql.execute import execute as gql_execute latch_cli.click_utils.patch() @@ -1329,6 +1330,10 @@ def snakemake(): """Manage snakemake-specific commands""" +# todo(ayush): allow providing destinations for +# - config path +# - dockerfile path +# - entrypoint output @snakemake.command("generate-entrypoint") @click.argument("pkg-root", nargs=1, type=click.Path(exists=True, path_type=Path)) @click.option( @@ -1345,8 +1350,16 @@ def snakemake(): help="Path to the Snakefile to register. If not provided, will default to searching the package " "root for a file named `Snakefile`.", ) +@click.option( + "--no-dockerfile", + "-D", + is_flag=True, + default=False, + type=bool, + help="Disable automatically generating a Dockerfile.", +) def sm_generate_entrypoint( - pkg_root: Path, metadata_root: Optional[Path], snakefile: Optional[Path] + pkg_root: Path, metadata_root: Optional[Path], snakefile: Optional[Path], no_dockerfile: bool ): """Generate a `wf/entrypoint.py` file from a Snakemake workflow""" @@ -1398,7 +1411,14 @@ def sm_generate_entrypoint( ) raise click.exceptions.Exit(1) + if not no_dockerfile: + config = get_or_create_workflow_config( + pkg_root / ".latch/config", base_image_type=BaseImageOptions.default + ) + DockerfileBuilder(pkg_root, config, wf_type=WorkflowType.snakemake).generate() + dest.write_text(get_entrypoint_content(pkg_root, metadata_path, snakefile)) + click.secho(f"Successfully generated entrypoint file `{dest}`", fg="green") """ From 6046e125c619aea6a0307624ffa6d60fa5992406 Mon Sep 17 00:00:00 2001 From: Ayush Kamat Date: Fri, 26 Sep 2025 15:29:42 -0700 Subject: [PATCH 17/18] new version Signed-off-by: Ayush Kamat --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 13e7df000..83bb13342 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ include = ["src/**/*.py", "src/**/py.typed", "src/latch_cli/services/init/*"] [project] name = "latch" -version = "2.67.5.a2" +version = "2.67.5.a3" description = "The Latch SDK" authors = [{ name = "Kenny Workman", email = "kenny@latch.bio" }] maintainers = [{ name = "Ayush Kamat", email = "ayush@latch.bio" }] From ad5a829bd38326baadb21a460d433f9b339980cb Mon Sep 17 00:00:00 2001 From: Ayush Kamat Date: Fri, 26 Sep 2025 16:13:31 -0700 Subject: [PATCH 18/18] durrrr Signed-off-by: Ayush Kamat --- pyproject.toml | 2 +- src/latch_cli/snakemake/config/parser.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 83bb13342..3fe4f7d8e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ include = ["src/**/*.py", "src/**/py.typed", "src/latch_cli/services/init/*"] [project] name = "latch" -version = "2.67.5.a3" +version = "2.67.5.a4" description = "The Latch SDK" authors = [{ name = "Kenny Workman", email = "kenny@latch.bio" }] maintainers = [{ name = "Ayush Kamat", email = "ayush@latch.bio" }] diff --git a/src/latch_cli/snakemake/config/parser.py b/src/latch_cli/snakemake/config/parser.py index f2d0b9d3d..b238b5f15 100644 --- a/src/latch_cli/snakemake/config/parser.py +++ b/src/latch_cli/snakemake/config/parser.py @@ -125,7 +125,7 @@ def generate_metadata( defaults.append((name, annotated_typ, field(default_factory=default))) continue - defaults.append((name, typ, field(default=default))) + defaults.append((name, annotated_typ, field(default=default))) generated_args_type = make_dataclass("SnakemakeArgsType", no_defaults + defaults)