diff --git a/.devcontainer/license_header.txt b/.devcontainer/license_header.txt index e193141..39f431d 100644 --- a/.devcontainer/license_header.txt +++ b/.devcontainer/license_header.txt @@ -1,5 +1,4 @@ -Copyright 2021 - 2022 Universität Tübingen, DKFZ and EMBL -for the German Human Genome-Phenome Archive (GHGA) +Copyright 2021 - 2022 German Cancer Research Center (DKFZ) Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/Dockerfile b/Dockerfile index 19f5604..1d7687d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,4 @@ -# Copyright 2021 - 2022 Universität Tübingen, DKFZ and EMBL -# for the German Human Genome-Phenome Archive (GHGA) +# Copyright 2021 - 2022 German Cancer Research Center (DKFZ) # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/LICENSE b/LICENSE index edf280c..34ed0f0 100644 --- a/LICENSE +++ b/LICENSE @@ -186,8 +186,7 @@ same "printed page" as the copyright notice for easier identification within third-party archives. - Copyright 2021 - 2022 Universität Tübingen, DKFZ and EMBL - for the German Human Genome-Phenome Archive (GHGA) + Copyright 2021 - 2022 German Cancer Research Center (DKFZ) Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/README.md b/README.md index 836f544..e1905c1 100644 --- a/README.md +++ b/README.md @@ -1,143 +1,35 @@ -# Microservice Repository Template - -This repo is a template for creating a new microservice. - -The directories, files, and their structure herein are recommendations -from the GHGA Dev Team. - -## Naming Conventions -The github repository contains only lowercase letters, numbers, and hyphens "-", -e.g.: `my-microservice` - -The python package (and thus the source repository) contains underscores "_" -instead of hyphens, e.g.: `exec_manager` - -The command-line script that is used to run the service, the docker repository -(published to docker hub), and the helm chart (not part of this repository) use the -same pattern as the repository name, e.g.: `my-microservice` -## Adapt to your service -This is just a template and needs some adaption to your specific use case. - -Please search for **"please adapt"** comments. They will indicate all locations -that need modification. Once the adaptions are in place, please remove these # -comments. - -The following should serve as a template for the final repo's README, -please adapt it accordingly (e.g. replace all occurences of `my-microservice` or `exec_manager` with the final package name and don't forget to adapt the links): - ---- - -**\# please adapt the links of following badges:** -![tests](https://github.com/ghga-de/my-microservice/actions/workflows/unit_and_int_tests.yaml/badge.svg) -[![Coverage Status](https://coveralls.io/repos/github/ghga-de/my-microservice/badge.svg?branch=main)](https://coveralls.io/github/ghga-de/my-microservice?branch=main) -# My-Microservice - -A description explaining the use case of this service. - -## Documentation: - -An extensive documentation can be found [here](...) (coming soon). - -## Quick Start -### Installation -We recommend using the provided Docker container. - -A pre-build version is available at [docker hub](https://hub.docker.com/repository/docker/ghga/my-microservice): -```bash -# Please feel free to choose the version as needed: -docker pull ghga/my-microservice: -``` - -Or you can build the container yourself from the [`./Dockerfile`](./Dockerfile): -```bash -# Execute in the repo's root dir: -# (Please feel free to adapt the name/tag.) -docker build -t ghga/my-microservice: . -``` - -For production-ready deployment, we recommend using Kubernetes, however, -for simple use cases, you could execute the service using docker -on a single server: -```bash -# The entrypoint is preconfigured: -docker run -p 8080:8080 ghga/my-microservice: -``` - -If you prefer not to use containers, you may install the service from source: -```bash -# Execute in the repo's root dir: -pip install . - -# to run the service: -my-microservice -``` - -### Configuration: -The [`./example-config.yaml`](./example-config.yaml) gives an overview of the available configuration options. -Please adapt it and choose one of the following options for injecting it into the service: -- specify the path to via the `exec_manager_CONFIG_YAML` env variable -- rename it to `.exec_manager.yaml` and place it into one of the following locations: - - the current working directory were you are execute the service (on unix: `./.exec_manager.yaml`) - - your home directory (on unix: `~/.exec_manager.yaml`) - -The config yaml will be automatically parsed by the service. - -**Important: If you are using containers, the locations refer to paths within the container.** - -All parameters mentioned in the [`./example-config.yaml`](./example-config.yaml) -could also be set using environment variables or file secrets. - -For naming the environment variables, just prefix the parameter name with `exec_manager_`, -e.g. for the `host` set an environment variable named `exec_manager_HOST` -(you may use both upper or lower cases, however, it is standard to define all env -variables in upper cases). - -To using file secrets please refer to the -[corresponding section](https://pydantic-docs.helpmanual.io/usage/settings/#secret-support) -of the pydantic documentation. - - -## Development -For setting up the development environment, we rely on the -[devcontainer feature](https://code.visualstudio.com/docs/remote/containers) of vscode -in combination with Docker Compose. - -To use it, you have to have Docker Compose as well as vscode with its "Remote - Containers" extension (`ms-vscode-remote.remote-containers`) installed. -Then open this repository in vscode and run the command -`Remote-Containers: Reopen in Container` from the vscode "Command Palette". - -This will give you a full-fledged, pre-configured development environment including: -- infrastructural dependencies of the service (databases, etc.) -- all relevant vscode extensions pre-installed -- pre-configured linting and auto-formating -- a pre-configured debugger -- automatic license-header insertion - -Moreover, inside the devcontainer, there are two convenience commands available -(please type them in the integrated terminal of vscode): -- `dev_install` - install the service with all development dependencies, -installs pre-commit, and applies any migration scripts to the test database -(please run that if you are starting the devcontainer for the first time -or if you added any python dependencies to the [`./setup.cfg`](./setup.cfg)) -- `dev_launcher` - starts the service with the development config yaml -(located in the `./.devcontainer/` dir) - -If you prefer not to use vscode, you could get a similar setup (without the editor specific features) -by running the following commands: -``` bash -# Execute in the repo's root dir: -cd ./.devcontainer - -# build and run the environment with docker-compose -docker-compose up - -# attach to the main container: -# (you can open multiple shell sessions like this) -docker exec -it devcontainer_app_1 /bin/bash +# Execution Manager for WorkflUX + +The execution manager manages the execution of jobs which will be runned with workflUX. There will be three types to execute a workflow: by Python, Bash or WES. + +## Execution Profiles +Yet, there is only the python exec profile but in future there will be the bash exec profile and the WES exec profile as well. The execution contains four steps: prepare, exec, eval, finalize. But only the exec step is required and the others are optional. +- __prepare:__ +This step will be executed before the actual workflow execution. For example there can be load required python or conda environments. +- __exec:__ +This step will execute the actual workflow and is the only required step. At the end of this step, the status of the job should be updated depending on the exit code of the job execution. +- __eval:__ +This step can evaluate the success of the workflow execution. But the exit code in the exec step should be used to set the new status (FAILED or SUCCEDED) of the job. + +- __finalize:__ +This step will be executed at the end of the whole job execution. It can be used for cleaning up temporary files. + + +### Python +For the python exec profile you have to implement the exec method from the PythonJob class. Therefore you create a new python file which contains a class that inherit the PythonJob class. Then you implement at least the exec method. +After that you have to create yaml file which looks like the file below: +```yaml +EXEC_PROFILES: + NAMEOFEXECPROFILE: + type: python + max_retries: 2 # please adat this number + py_module: ./python_script_with_implemented_methods.py + py_class: ClassOfImplementedMethods ``` +```max_retries``` gives an numeric value for the maximum retries when the execution (consisting of the four steps) fails. ## License This repository is free to use and modify according to the [Apache 2.0 License](./LICENSE). diff --git a/exec_manager/__init__.py b/exec_manager/__init__.py index 7589079..8faf810 100644 --- a/exec_manager/__init__.py +++ b/exec_manager/__init__.py @@ -1,5 +1,4 @@ -# Copyright 2021 - 2022 Universität Tübingen, DKFZ and EMBL -# for the German Human Genome-Phenome Archive (GHGA) +# Copyright 2021 - 2022 German Cancer Research Center (DKFZ) # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,6 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Short description of package""" # Please adapt to package - +""" +A package managing execution of jobs in a way that is agnostic to +- the workflow execution environment +- the language used to describe the workflow +""" __version__ = "0.1.0" diff --git a/exec_manager/__main__.py b/exec_manager/__main__.py index 01ff8ce..4643630 100644 --- a/exec_manager/__main__.py +++ b/exec_manager/__main__.py @@ -1,5 +1,4 @@ -# Copyright 2021 - 2022 Universität Tübingen, DKFZ and EMBL -# for the German Human Genome-Phenome Archive (GHGA) +# Copyright 2021 - 2022 German Cancer Research Center (DKFZ) # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/exec_manager/dao/__init__.py b/exec_manager/dao/__init__.py new file mode 100644 index 0000000..82e3792 --- /dev/null +++ b/exec_manager/dao/__init__.py @@ -0,0 +1,15 @@ +# Copyright 2021 - 2022 German Cancer Research Center (DKFZ) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Package containing DAO classes.""" diff --git a/exec_manager/dao/db_models.py b/exec_manager/dao/db_models.py index 805180e..e504f60 100644 --- a/exec_manager/dao/db_models.py +++ b/exec_manager/dao/db_models.py @@ -1,5 +1,4 @@ -# Copyright 2021 - 2022 Universität Tübingen, DKFZ and EMBL -# for the German Human Genome-Phenome Archive (GHGA) +# Copyright 2021 - 2022 German Cancer Research Center (DKFZ) # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -15,26 +14,32 @@ """Defines all database specific ORM models""" -from sqlalchemy import JSON, Boolean, Column, Integer, String +import uuid + +from sqlalchemy import JSON, Column, String from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm.decl_api import DeclarativeMeta Base: DeclarativeMeta = declarative_base() +metadata = Base.metadata +# this method is neccessary to avoid IntegrityError +def generate_uuid_str() -> str: + """Generates a uuid with type string. -class ExampleObjectA(Base): - """An example object stored in the DB""" + Returns: + str: job id + """ + return str(uuid.uuid4()) - __tablename__ = "visas" - id = Column(Integer, primary_key=True) - name = Column(String, nullable=False) - some_json_details = Column(JSON, nullable=False) +class DBJob(Base): + """An job object stored in the DB""" -class ExampleObjectB(Base): - """Another example object stored in the DB""" + __tablename__ = "job" - __tablename__ = "table_b" - id = Column(Integer, primary_key=True) - name = Column(String, nullable=False) - active = Column(Boolean, nullable=False) + job_id = Column(String, default=generate_uuid_str, primary_key=True) + job_status = Column(String, nullable=False) + exec_profile = Column(JSON, nullable=False) + workflow = Column(JSON, nullable=False) + inputs = Column(JSON, nullable=False) diff --git a/exec_manager/dao/job_dao.py b/exec_manager/dao/job_dao.py new file mode 100644 index 0000000..85b8ee1 --- /dev/null +++ b/exec_manager/dao/job_dao.py @@ -0,0 +1,185 @@ +# Copyright 2021 - 2022 German Cancer Research Center (DKFZ) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""module for job dao""" + +from abc import ABC, abstractmethod +from uuid import UUID + +from sqlalchemy import create_engine, insert, select, update + +from exec_manager.dao.db_models import DBJob, metadata +from exec_manager.exec_profiles import ExecProfile, ExecProfileType +from exec_manager.jobs import Job, JobStatusType, PythonJob +from exec_manager.utils import WfLangType + + +class JobDAO(ABC): + """abstract class for job dao + + Methods: + create (UUID): inserts a job into database + get (Job): returns a job by its id + update (): updates a job by its id + """ + + @abstractmethod + def create( + self, + job_status: JobStatusType, + exec_profile: ExecProfile, + workflow: dict, + inputs: dict, + ) -> UUID: + """Inserts a job into the database. + + Args: + job_status (JobStatusType): current status of the job; + initially it is JobStatusType.NOTSTARTED + exec_profile (ExecProfile): exec profile of this job + workflow (dict): the jobs workflow + inputs (dict): the input parameters of the job + + Returns: + UUID: job id + """ + ... + + @abstractmethod + def get(self, job_id: UUID) -> Job: + """Returns a job by its job id. + + Args: + job_id (UUID): id of the job + + Returns: + Job: job belongig to job id + """ + ... + + @abstractmethod + def update(self, job_id: UUID, job: Job) -> None: + """Updates a jobs by its id. + + Args: + job_id (UUID): id of the job + job (Job): updated job + """ + ... + + +class SQLJobDAO(JobDAO): + """class for sql job dao + + Methods: + create (UUID): inserts a job into database + get (Job): returns a job by its id + update (): updates a job by its id + """ + + def __init__(self, db_url: str): + """Initialize DB.""" + self._engine = create_engine(db_url) + metadata.create_all(self._engine) + + def create( + self, + job_status: JobStatusType, + exec_profile: ExecProfile, + workflow: dict, + inputs: dict, + ) -> UUID: + """Inserts a job into the database. + + Args: + job_status (JobStatusType): current status of the job; + initially it is JobStatusType.NOTSTARTED + exec_profile (ExecProfile): exec profile of this job + workflow (dict): the jobs workflow + inputs (dict): the input parameters of the job + + Returns: + UUID: job id + """ + with self._engine.connect() as connection: + cursor = connection.execute( + insert(DBJob.__table__).values( + job_status=job_status.value, + exec_profile={ + "exec_profile_type": exec_profile.type_.value, + "wf_lang_type": exec_profile.wf_lang.value, + }, + workflow=workflow, + inputs=inputs, + ) + ) + job_id = cursor.inserted_primary_key[0] + return job_id + + def get(self, job_id: UUID) -> Job: + """Returns a job by its job id. + + Args: + job_id (UUID): id of the job + + Raises: + NotImplementedError: Bash exec profile is not implemented yet + NotImplementedError: WES exec profile is not implemented yet + + Returns: + Job: job belonging to job_id + """ + with self._engine.connect() as connection: + cursor = connection.execute( + select([DBJob.job_id, DBJob.job_status, DBJob.exec_profile]).where( + DBJob.job_id == str(job_id) + ) + ) + result = cursor.fetchall() + job_status = JobStatusType(result[0][1]) + exec_profile = result[0][2] + exec_profile = ExecProfile( + type_=ExecProfileType(exec_profile["exec_profile_type"]), + wf_lang=WfLangType(exec_profile["wf_lang_type"]), + ) + if exec_profile.type_ == ExecProfileType.PYTHON: + inputs = result[0][4] + return PythonJob(job_id, job_status, exec_profile, inputs) + if exec_profile.type_ == ExecProfileType.BASH: + raise NotImplementedError( + "Execution profiles of type Bash not supported, yet" + ) + raise NotImplementedError( + "Execution profiles of type WES not supported, yet" + ) + + def update(self, job_id: UUID, job: Job) -> None: + """Updates a jobs by its id. + + Args: + job_id (UUID): id of the job + job (Job): updated job + """ + with self._engine.connect() as connection: + connection.execute( + update(DBJob.__table__) + .where(DBJob.job_id == str(job_id)) + .values( + job_status=job.job_status.value, + exec_profile={ + "exec_profile_type": job.exec_profile.type_.value, + "wf_lang_type": job.exec_profile.wf_lang.value, + }, + ) + ) diff --git a/exec_manager/exec_profiles.py b/exec_manager/exec_profiles.py new file mode 100644 index 0000000..8223ef8 --- /dev/null +++ b/exec_manager/exec_profiles.py @@ -0,0 +1,45 @@ +# Copyright 2021 - 2022 German Cancer Research Center (DKFZ) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""module for execution profiles""" + +from enum import Enum + +from pydantic import BaseModel + +from exec_manager.utils import WfLangType + + +class ExecProfileType(Enum): + """Enumerate exec profile types: + - BASH: execution with bash + - PYTHON: execution with python + - WES: execution with wes + """ + + BASH = "bash" + PYTHON = "python" + WES = "wes" + + +class ExecProfile(BaseModel): + """Base model class for execution profiles. + + Args: + type_ (ExecProfileType): type of the execution profile (Python, Bash or WES) + wf_lang (WfLangType): language type of workflow (CWL, WDL, Snakemake, Nextflow) + """ + + type_: ExecProfileType + wf_lang: WfLangType diff --git a/exec_manager/job_execution.py b/exec_manager/job_execution.py new file mode 100644 index 0000000..3c7132a --- /dev/null +++ b/exec_manager/job_execution.py @@ -0,0 +1,112 @@ +# Copyright 2021 - 2022 German Cancer Research Center (DKFZ) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""module for job execution""" + +from typing import Callable +from uuid import UUID + +from exec_manager.dao.job_dao import SQLJobDAO +from exec_manager.exec_profiles import ExecProfile, ExecProfileType +from exec_manager.jobs import Job, JobStatusType, PythonJob + + +class PyExecSession: + """Class for PyExecSession + + Methods: + run (): runs a job + """ + + def __init__( + self, + max_retries: int = 0, + ) -> None: + """Constructs all the necessary attributes for the python exec session. + + Args: + max_retries (int, optional): number of maximum retries when the execution + fails. Defaults to 0. + """ + self._max_retries = max_retries + + def run( + self, + job_id: UUID, + job_status: JobStatusType, + exec_profile: ExecProfile, + inputs: dict, + ) -> None: + """Runs a job. + + Args: + job_id (UUID): id of the job + job_status (JobStatusType): current status of the job + (e.g. notstarted, executing, failed, ...) + exec_profile (ExecProfile): exec profile with which the job should be + executed (bash, python, WES) + inputs (dict): input parameters of the workflow + """ + counter = -1 + sql_job_dao = SQLJobDAO("sqlite+pysqlite://") + while self._max_retries > counter: + python_job = PythonJob(job_id, job_status, exec_profile, inputs) + python_job.job_status = JobStatusType.PREPARING + sql_job_dao.update(job_id, python_job) + python_job.prepare() + python_job.job_status = JobStatusType.EXECUTING + sql_job_dao.update(job_id, python_job) + python_job.exec() + python_job.job_status = JobStatusType.EVALUATING + sql_job_dao.update(job_id, python_job) + python_job.eval() + python_job.job_status = JobStatusType.FINALZING + sql_job_dao.update(job_id, python_job) + python_job.finalize() + if sql_job_dao.get(job_id).job_status == JobStatusType.SUCCEEDED: + break + counter = counter + 1 + + +def create_job( + inputs: dict, + workflow: dict, + exec_profile: ExecProfile, + create: Callable = SQLJobDAO.create, +) -> Job: + """Creates a job. + + Args: + inputs (dict): input paramters of the job + workflow (dict): the job's workflow + exec_profile (ExecProfile): exec profile with which the job should be executed + (bash, python, WES) + create (Callable, optional): function that stores the job in a database. + Defaults to SQLJobDAO.create. + + Raises: + NotImplementedError: Bash execution profile not implemented yet + NotImplementedError: WES execution profile not implemented yet + + Returns: + Job: created job + """ + job_status = JobStatusType.NOTSTARTET + job_id = create(job_status, exec_profile, workflow, inputs) + if exec_profile.type_ == ExecProfileType.PYTHON: + return PythonJob(job_id, job_status, exec_profile, inputs) + if exec_profile.type_ == ExecProfileType.BASH: + raise NotImplementedError("Execution profiles of type Bash not supported, yet") + raise NotImplementedError("Execution profiles of type WES not supported, yet") diff --git a/exec_manager/jobs.py b/exec_manager/jobs.py new file mode 100644 index 0000000..0e4a2c9 --- /dev/null +++ b/exec_manager/jobs.py @@ -0,0 +1,161 @@ +# Copyright 2021 - 2022 German Cancer Research Center (DKFZ) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""module for jobs""" + +from abc import ABC, abstractmethod +from enum import Enum +from uuid import UUID + +from exec_manager.exec_profiles import ExecProfile + + +class JobStatusType(Enum): + """Enumerate job status types: + - NOTSTARTET: job is not started yet + - PREPARING: job is preparing + - EXECUTING: job is executing + - EVALUATING: job is evaluating + - FINALZING: job is finalizing + - CANCELED: job is canceled + - FAILED: job is failed + - SUCCEEDED: job is succeeded + """ + + NOTSTARTET = "not started" + PREPARING = "preparing" + EXECUTING = "executing" + EVALUATING = "evaluating" + FINALZING = "finalizing" + CANCELED = "canceled" + FAILED = "failed" + SUCCEEDED = "succeeded" + + +class Job(ABC): + """abstract class for job + + Args: + job_id (UUID): id of the job + job_status (JobStatusType): current status of the job (eg. notstarted, succeeded, failed) + exec_profile (ExecProfile): exec profile with which the job should be executed + + Methods: + prepare (): prepares the job + exec (): executes the job + eval (): evaluates the job + finalize (): finalizes the job + cancel (): cancels the job + + """ + + def __init__( + self, job_id: UUID, job_status: JobStatusType, exec_profile: ExecProfile + ) -> None: + """Constructs all the necessary attributes for the job object. + + Args: + job_id (UUID): id of the job + job_status (JobStatusType): current status of the job + (eg. notstarted, succeeded, failed, ...) + exec_profile (ExecProfile): exec profile with which the job should be + executed (bash, python, WES) + """ + self.job_id = job_id + self.job_status = job_status + self.exec_profile = exec_profile + + @abstractmethod + def prepare(self) -> None: + """Prepares the job.""" + ... + + @abstractmethod + def exec(self) -> None: + """Executes the job.""" + ... + + @abstractmethod + def eval(self) -> None: + """Evaluates the job.""" + ... + + @abstractmethod + def finalize(self) -> None: + """Finalizes the job.""" + ... + + @abstractmethod + def cancel(self) -> None: + """Cancels the job.""" + ... + + +class PythonJob(Job): + """ + class for python job + + Args: + job_id (UUID): id of the job + job_status (JobStatusType): current status of the job (eg. notstarted, succeeded, failed) + exec_profile (ExecProfile): python exec profile + inputs (dict): input parameters of the job + + Methods: + prepare (): prepares the job + exec (): executes the job + eval (): evaluates the job + finalize (): finalizes the job + cancel (): cancels the job + """ + + def __init__( + self, + job_id: UUID, + job_status: JobStatusType, + exec_profile: ExecProfile, + inputs: dict, + ) -> None: + """Constructs all the necessary attributes for the python job object. + + Args: + job_id (UUID): id of the job + job_status (JobStatusType): current status of the job + (eg. notstarted, succeeded, failed) + exec_profile (ExecProfile): python exec profile + inputs (dict): input parameters of the job + """ + Job.__init__(self, job_id, job_status, exec_profile) + self.inputs = inputs + self.wf_lang = exec_profile.wf_lang + + def prepare(self) -> None: + """Prepares the job.""" + ... + + def exec(self) -> None: + """Executes the job.""" + ... + + def eval(self) -> None: + """Evaluates the job.""" + ... + + def finalize(self) -> None: + """Finalizes the job.""" + ... + + def cancel(self) -> None: + """Cancels the job.""" + ... diff --git a/exec_manager/utils.py b/exec_manager/utils.py new file mode 100644 index 0000000..3293609 --- /dev/null +++ b/exec_manager/utils.py @@ -0,0 +1,31 @@ +# Copyright 2021 - 2022 German Cancer Research Center (DKFZ) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""module for utility stuff""" + +from enum import Enum + + +class WfLangType(Enum): + """Enumerate workflow language types: + - CWL: cwl language + - WDL: wdl language + - NEXTFLOW: nextflow language + - SNAKEMAKE: snakemake language + """ + + CWL = "cwl" + WDL = "wdl" + NEXTFLOW = "nextflow" + SNAKEMAKE = "snakemake" diff --git a/scripts/get_package_name.py b/scripts/get_package_name.py index f43e467..23ec881 100755 --- a/scripts/get_package_name.py +++ b/scripts/get_package_name.py @@ -1,7 +1,6 @@ #!/usr/bin/env python3 -# Copyright 2021 - 2022 Universität Tübingen, DKFZ and EMBL -# for the German Human Genome-Phenome Archive (GHGA) +# Copyright 2021 - 2022 German Cancer Research Center (DKFZ) # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/scripts/license_checker.py b/scripts/license_checker.py index b85be3f..2608590 100755 --- a/scripts/license_checker.py +++ b/scripts/license_checker.py @@ -1,7 +1,6 @@ #!/usr/bin/env python3 -# Copyright 2021 - 2022 Universität Tübingen, DKFZ and EMBL -# for the German Human Genome-Phenome Archive (GHGA) +# Copyright 2021 - 2022 German Cancer Research Center (DKFZ) # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -93,8 +92,7 @@ # A list of all chars that may be used to introduce a comment: COMMENT_CHARS = ["#"] -AUTHOR = """Universität Tübingen, DKFZ and EMBL -for the German Human Genome-Phenome Archive (GHGA)""" +AUTHOR = """German Cancer Research Center (DKFZ)""" # The copyright notice should not date earlier than this year: MIN_YEAR = 2021 diff --git a/setup.cfg b/setup.cfg index caab8e3..07d04da 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,4 @@ -# Copyright 2021 - 2022 Universität Tübingen, DKFZ and EMBL -# for the German Human Genome-Phenome Archive (GHGA) +# Copyright 2021 - 2022 German Cancer Research Center (DKFZ) # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -73,6 +72,7 @@ dev = typer==0.4.1 sqlalchemy-utils==0.38.2 sqlalchemy-stubs==0.4 + pydantic==1.9.1 # Please adapt: Only needed if you are using alembic for database versioning (Probably for PostgreSQL) db_migration = diff --git a/setup.py b/setup.py index 1766b45..f56b210 100755 --- a/setup.py +++ b/setup.py @@ -1,7 +1,6 @@ #!/usr/bin/env python3 -# Copyright 2021 - 2022 Universität Tübingen, DKFZ and EMBL -# for the German Human Genome-Phenome Archive (GHGA) +# Copyright 2021 - 2022 German Cancer Research Center (DKFZ) # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/__init__.py b/tests/__init__.py index 93766df..62f385b 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,5 +1,4 @@ -# Copyright 2021 - 2022 Universität Tübingen, DKFZ and EMBL -# for the German Human Genome-Phenome Archive (GHGA) +# Copyright 2021 - 2022 German Cancer Research Center (DKFZ) # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/fixtures/__init__.py b/tests/fixtures/__init__.py index dc01178..81a2573 100644 --- a/tests/fixtures/__init__.py +++ b/tests/fixtures/__init__.py @@ -1,5 +1,4 @@ -# Copyright 2021 - 2022 Universität Tübingen, DKFZ and EMBL -# for the German Human Genome-Phenome Archive (GHGA) +# Copyright 2021 - 2022 German Cancer Research Center (DKFZ) # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/fixtures/utils.py b/tests/fixtures/utils.py index a38f1a9..154491a 100644 --- a/tests/fixtures/utils.py +++ b/tests/fixtures/utils.py @@ -1,5 +1,4 @@ -# Copyright 2021 - 2022 Universität Tübingen, DKFZ and EMBL -# for the German Human Genome-Phenome Archive (GHGA) +# Copyright 2021 - 2022 German Cancer Research Center (DKFZ) # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py index 00db53a..500ae8e 100644 --- a/tests/integration/__init__.py +++ b/tests/integration/__init__.py @@ -1,5 +1,4 @@ -# Copyright 2021 - 2022 Universität Tübingen, DKFZ and EMBL -# for the German Human Genome-Phenome Archive (GHGA) +# Copyright 2021 - 2022 German Cancer Research Center (DKFZ) # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/fixtures/__init__.py b/tests/integration/fixtures/__init__.py index 8031adf..91891cb 100644 --- a/tests/integration/fixtures/__init__.py +++ b/tests/integration/fixtures/__init__.py @@ -1,5 +1,4 @@ -# Copyright 2021 - 2022 Universität Tübingen, DKFZ and EMBL -# for the German Human Genome-Phenome Archive (GHGA) +# Copyright 2021 - 2022 German Cancer Research Center (DKFZ) # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/fixtures/utils.py b/tests/integration/fixtures/utils.py index a38f1a9..154491a 100644 --- a/tests/integration/fixtures/utils.py +++ b/tests/integration/fixtures/utils.py @@ -1,5 +1,4 @@ -# Copyright 2021 - 2022 Universität Tübingen, DKFZ and EMBL -# for the German Human Genome-Phenome Archive (GHGA) +# Copyright 2021 - 2022 German Cancer Research Center (DKFZ) # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py index e4011ec..1c514b4 100644 --- a/tests/unit/__init__.py +++ b/tests/unit/__init__.py @@ -1,5 +1,4 @@ -# Copyright 2021 - 2022 Universität Tübingen, DKFZ and EMBL -# for the German Human Genome-Phenome Archive (GHGA) +# Copyright 2021 - 2022 German Cancer Research Center (DKFZ) # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/unit/fixtures/__init__.py b/tests/unit/fixtures/__init__.py index dea2cf7..4d86eb7 100644 --- a/tests/unit/fixtures/__init__.py +++ b/tests/unit/fixtures/__init__.py @@ -1,5 +1,4 @@ -# Copyright 2021 - 2022 Universität Tübingen, DKFZ and EMBL -# for the German Human Genome-Phenome Archive (GHGA) +# Copyright 2021 - 2022 German Cancer Research Center (DKFZ) # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/unit/fixtures/utils.py b/tests/unit/fixtures/utils.py index a38f1a9..154491a 100644 --- a/tests/unit/fixtures/utils.py +++ b/tests/unit/fixtures/utils.py @@ -1,5 +1,4 @@ -# Copyright 2021 - 2022 Universität Tübingen, DKFZ and EMBL -# for the German Human Genome-Phenome Archive (GHGA) +# Copyright 2021 - 2022 German Cancer Research Center (DKFZ) # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/unit/test_dummy.py b/tests/unit/test_dummy.py index dbd2856..ee6d874 100644 --- a/tests/unit/test_dummy.py +++ b/tests/unit/test_dummy.py @@ -1,5 +1,4 @@ -# Copyright 2021 - 2022 Universität Tübingen, DKFZ and EMBL -# for the German Human Genome-Phenome Archive (GHGA) +# Copyright 2021 - 2022 German Cancer Research Center (DKFZ) # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/unit/test_job_dao.py b/tests/unit/test_job_dao.py new file mode 100644 index 0000000..d091171 --- /dev/null +++ b/tests/unit/test_job_dao.py @@ -0,0 +1,231 @@ +# Copyright 2021 - 2022 German Cancer Research Center (DKFZ) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +from typing import Generator +from uuid import UUID + +import pytest + +from exec_manager.dao.job_dao import SQLJobDAO +from exec_manager.exec_profiles import ExecProfile, ExecProfileType +from exec_manager.jobs import JobStatusType, PythonJob +from exec_manager.utils import WfLangType + + +@pytest.fixture +def example_sqlite_job_dao() -> Generator: + """Creates sqlite engine. + + Yields: + Generator: sqlite engine + """ + engine = SQLJobDAO("sqlite+pysqlite://") + yield engine + + +@pytest.fixture +def example_job_status() -> JobStatusType: + """Creates a job status. + + Returns: + JobStatusType: job status for tests + """ + return JobStatusType.NOTSTARTET + + +@pytest.fixture +def example_exec_profile() -> ExecProfile: + """Creates an exec profile. + + Returns: + ExecProfile: exec profile for tests + """ + return ExecProfile(type_=ExecProfileType.PYTHON, wf_lang=WfLangType.CWL) + + +@pytest.fixture +def example_workflow() -> dict: + """Creates a workflow. + + Returns: + dict: workflow + """ + return {"test": 1} + + +@pytest.fixture +def example_inputs() -> dict: + """Creates inputs. + + Returns: + dict: input parameters for the workflow + """ + return {"hello": "world"} + + +@pytest.fixture +def example_job_id( + example_job_status: JobStatusType, + example_exec_profile: ExecProfile, + example_workflow: dict, + example_inputs: dict, + example_sqlite_job_dao, +) -> UUID: + """Creates a job id. + + Args: + example_job_status (JobStatusType): status of the job + example_exec_profile (ExecProfile): execution profile of the job + example_workflow (dict): job's workflow + example_inputs (dict): input parameters of the workflow + example_sqlite_job_dao (_type_): database engine + + Returns: + UUID: job id + """ + return example_sqlite_job_dao.create( + example_job_status, + example_exec_profile, + example_workflow, + example_inputs, + ) + + +@pytest.mark.usefixtures( + "example_sqlite_job_dao", + "example_job_id", + "example_exec_profile", + "example_workflow", + "example_inputs", +) +def test_create( + example_job_status: JobStatusType, + example_exec_profile: ExecProfile, + example_workflow: dict, + example_inputs: dict, + example_sqlite_job_dao, +) -> None: + """Tests the create method from SQLJobDAO class. + + Args: + example_job_status (JobStatusType): status of the job + example_exec_profile (ExecProfile): execution profile of the job + example_workflow (dict): job's workflow + example_inputs (dict): input parameters of the workflow + example_sqlite_job_dao (_type_): database engine + """ + job_id = example_sqlite_job_dao.create( + example_job_status, + example_exec_profile, + example_workflow, + example_inputs, + ) + db_job = example_sqlite_job_dao.get(job_id) + assert ( + str(db_job.job_id) == str(job_id) + and db_job.job_status.value == example_job_status.value + and ( + json.dumps( + { + "exec_profile_type": db_job.exec_profile.type_.value, + "wf_lang": db_job.exec_profile.wf_lang.value, + } + ) + == json.dumps( + { + "exec_profile_type": example_exec_profile.type_.value, + "wf_lang": example_exec_profile.wf_lang.value, + } + ) + ) + ) + + +@pytest.mark.usefixtures( + "example_sqlite_job_dgao", "example_job_id", "example_exec_profile" +) +def test_get( + example_job_id: UUID, + example_job_status: JobStatusType, + example_exec_profile: ExecProfile, + example_sqlite_job_dao, +) -> None: + """Tests the get method from SQLJobDAO class. + + Args: + example_job_id (UUID): id of the job + example_job_status (JobStatusType): status of the job + example_exec_profile (ExecProfile): execution profile of the job + example_sqlite_job_dao (_type_): database engine + """ + job_id = example_job_id + db_job = example_sqlite_job_dao.get(job_id) + assert ( + str(db_job.job_id) == str(job_id) + and db_job.job_status.value == example_job_status.value + and ( + json.dumps( + { + "exec_profile_type": db_job.exec_profile.type_.value, + "wf_lang": db_job.exec_profile.wf_lang.value, + } + ) + == json.dumps( + { + "exec_profile_type": example_exec_profile.type_.value, + "wf_lang": example_exec_profile.wf_lang.value, + } + ) + ) + ) + + +@pytest.mark.usefixtures( + "example_sqlite_job_dao", "example_job_id", "example_exec_profile" +) +def test_update( + example_job_id: UUID, + example_exec_profile: ExecProfile, + example_inputs: dict, + example_sqlite_job_dao, +) -> None: + """Tests the update method from SQLJobDAO class. + + Args: + example_job_id (UUID): id of the job + example_exec_profile (ExecProfile): execution profile of the job + example_inputs (dict): input parameters of the workflow + example_sqlite_job_dao (_type_): database engine + """ + job_id = example_job_id + job = PythonJob( + job_id, JobStatusType.PREPARING, example_exec_profile, example_inputs + ) + example_sqlite_job_dao.update(job_id, job) + db_job = example_sqlite_job_dao.get(job_id) + assert ( + str(db_job.job_id) == str(job_id) + and db_job.job_status.value == JobStatusType.PREPARING.value + and ( + { + "exec_profile_type": db_job.exec_profile.type_.value, + "wf_lang": db_job.exec_profile.wf_lang.value, + } + == { + "exec_profile_type": example_exec_profile.type_.value, + "wf_lang": example_exec_profile.wf_lang.value, + } + ) + )