From 25015058831b2cd25814f8ef1c77b40abc7b3fc5 Mon Sep 17 00:00:00 2001 From: Daniel Standage Date: Mon, 28 Apr 2025 13:53:49 -0400 Subject: [PATCH 01/11] Upgrade versioneer, add Python 3.12 to CI build --- .github/workflows/cibuild.yml | 2 +- microhapulator/_version.py | 324 +++++++--- versioneer.py | 1135 +++++++++++++++++++++++---------- 3 files changed, 1043 insertions(+), 418 deletions(-) diff --git a/.github/workflows/cibuild.yml b/.github/workflows/cibuild.yml index 494ddab..56b9e88 100644 --- a/.github/workflows/cibuild.yml +++ b/.github/workflows/cibuild.yml @@ -8,7 +8,7 @@ jobs: strategy: max-parallel: 4 matrix: - python-version: ["3.9", "3.10", "3.11"] + python-version: ["3.9", "3.10", "3.11", "3.12"] steps: - uses: actions/checkout@v1 - name: Set up Python ${{ matrix.python-version }} diff --git a/microhapulator/_version.py b/microhapulator/_version.py index 992ceac..cea9343 100644 --- a/microhapulator/_version.py +++ b/microhapulator/_version.py @@ -4,8 +4,9 @@ # directories (produced by setup.py build) will contain a much shorter file # that just contains the computed version number. -# This file is released into the public domain. Generated by -# versioneer-0.18 (https://github.com/warner/python-versioneer) +# This file is released into the public domain. +# Generated by versioneer-0.29 +# https://github.com/python-versioneer/python-versioneer """Git implementation of _version.py.""" @@ -14,9 +15,11 @@ import re import subprocess import sys +from typing import Any, Callable, Dict, List, Optional, Tuple +import functools -def get_keywords(): +def get_keywords() -> Dict[str, str]: """Get the keywords needed to look up the version information.""" # these strings will be replaced by git during git-archive. # setup.py/versioneer.py will grep for the variable names, so they must @@ -32,8 +35,15 @@ def get_keywords(): class VersioneerConfig: """Container for Versioneer configuration parameters.""" + VCS: str + style: str + tag_prefix: str + parentdir_prefix: str + versionfile_source: str + verbose: bool -def get_config(): + +def get_config() -> VersioneerConfig: """Create, populate and return the VersioneerConfig() object.""" # these strings are filled in when 'setup.py versioneer' creates # _version.py @@ -51,14 +61,14 @@ class NotThisMethod(Exception): """Exception raised if a method is not valid for the current scenario.""" -LONG_VERSION_PY = {} -HANDLERS = {} +LONG_VERSION_PY: Dict[str, str] = {} +HANDLERS: Dict[str, Dict[str, Callable]] = {} -def register_vcs_handler(vcs, method): # decorator - """Decorator to mark a method as the handler for a particular VCS.""" +def register_vcs_handler(vcs: str, method: str) -> Callable: # decorator + """Create decorator to mark a method as the handler of a VCS.""" - def decorate(f): + def decorate(f: Callable) -> Callable: """Store f in HANDLERS[vcs][method].""" if vcs not in HANDLERS: HANDLERS[vcs] = {} @@ -68,24 +78,39 @@ def decorate(f): return decorate -def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env=None): +def run_command( + commands: List[str], + args: List[str], + cwd: Optional[str] = None, + verbose: bool = False, + hide_stderr: bool = False, + env: Optional[Dict[str, str]] = None, +) -> Tuple[Optional[str], Optional[int]]: """Call the given command(s).""" assert isinstance(commands, list) - p = None - for c in commands: + process = None + + popen_kwargs: Dict[str, Any] = {} + if sys.platform == "win32": + # This hides the console window if pythonw.exe is used + startupinfo = subprocess.STARTUPINFO() + startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW + popen_kwargs["startupinfo"] = startupinfo + + for command in commands: try: - dispcmd = str([c] + args) + dispcmd = str([command] + args) # remember shell=False, so use git.cmd on windows, not just git - p = subprocess.Popen( - [c] + args, + process = subprocess.Popen( + [command] + args, cwd=cwd, env=env, stdout=subprocess.PIPE, stderr=(subprocess.PIPE if hide_stderr else None), + **popen_kwargs, ) break - except EnvironmentError: - e = sys.exc_info()[1] + except OSError as e: if e.errno == errno.ENOENT: continue if verbose: @@ -96,18 +121,20 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env= if verbose: print("unable to find command, tried %s" % (commands,)) return None, None - stdout = p.communicate()[0].strip() - if sys.version_info[0] >= 3: - stdout = stdout.decode() - if p.returncode != 0: + stdout = process.communicate()[0].strip().decode() + if process.returncode != 0: if verbose: print("unable to run %s (error)" % dispcmd) print("stdout was %s" % stdout) - return None, p.returncode - return stdout, p.returncode + return None, process.returncode + return stdout, process.returncode -def versions_from_parentdir(parentdir_prefix, root, verbose): +def versions_from_parentdir( + parentdir_prefix: str, + root: str, + verbose: bool, +) -> Dict[str, Any]: """Try to determine the version from the parent directory name. Source tarballs conventionally unpack into a directory that includes both @@ -116,7 +143,7 @@ def versions_from_parentdir(parentdir_prefix, root, verbose): """ rootdirs = [] - for i in range(3): + for _ in range(3): dirname = os.path.basename(root) if dirname.startswith(parentdir_prefix): return { @@ -126,9 +153,8 @@ def versions_from_parentdir(parentdir_prefix, root, verbose): "error": None, "date": None, } - else: - rootdirs.append(root) - root = os.path.dirname(root) # up a level + rootdirs.append(root) + root = os.path.dirname(root) # up a level if verbose: print( @@ -139,41 +165,48 @@ def versions_from_parentdir(parentdir_prefix, root, verbose): @register_vcs_handler("git", "get_keywords") -def git_get_keywords(versionfile_abs): +def git_get_keywords(versionfile_abs: str) -> Dict[str, str]: """Extract version information from the given file.""" # the code embedded in _version.py can just fetch the value of these # keywords. When used from setup.py, we don't want to import _version.py, # so we do it with a regexp instead. This function is not used from # _version.py. - keywords = {} + keywords: Dict[str, str] = {} try: - f = open(versionfile_abs, "r") - for line in f.readlines(): - if line.strip().startswith("git_refnames ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["refnames"] = mo.group(1) - if line.strip().startswith("git_full ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["full"] = mo.group(1) - if line.strip().startswith("git_date ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["date"] = mo.group(1) - f.close() - except EnvironmentError: + with open(versionfile_abs, "r") as fobj: + for line in fobj: + if line.strip().startswith("git_refnames ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["refnames"] = mo.group(1) + if line.strip().startswith("git_full ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["full"] = mo.group(1) + if line.strip().startswith("git_date ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["date"] = mo.group(1) + except OSError: pass return keywords @register_vcs_handler("git", "keywords") -def git_versions_from_keywords(keywords, tag_prefix, verbose): +def git_versions_from_keywords( + keywords: Dict[str, str], + tag_prefix: str, + verbose: bool, +) -> Dict[str, Any]: """Get version information from git keywords.""" - if not keywords: - raise NotThisMethod("no keywords at all, weird") + if "refnames" not in keywords: + raise NotThisMethod("Short version file found") date = keywords.get("date") if date is not None: + # Use only the last line. Previous lines may contain GPG signature + # information. + date = date.splitlines()[-1] + # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 # -like" string, which we must then edit to make compliant), because @@ -186,11 +219,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): if verbose: print("keywords are unexpanded, not using") raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = set([r.strip() for r in refnames.strip("()").split(",")]) + refs = {r.strip() for r in refnames.strip("()").split(",")} # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " - tags = set([r[len(TAG) :] for r in refs if r.startswith(TAG)]) + tags = {r[len(TAG) :] for r in refs if r.startswith(TAG)} if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d @@ -199,7 +232,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". - tags = set([r for r in refs if re.search(r"\d", r)]) + tags = {r for r in refs if re.search(r"\d", r)} if verbose: print("discarding '%s', no digits" % ",".join(refs - tags)) if verbose: @@ -208,6 +241,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # sorting will prefer e.g. "2.0" over "2.0rc1" if ref.startswith(tag_prefix): r = ref[len(tag_prefix) :] + # Filter out refs that exactly match prefix or that don't start + # with a number once the prefix is stripped (mostly a concern + # when prefix is '') + if not re.match(r"\d", r): + continue if verbose: print("picking %s" % r) return { @@ -230,7 +268,9 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): @register_vcs_handler("git", "pieces_from_vcs") -def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): +def git_pieces_from_vcs( + tag_prefix: str, root: str, verbose: bool, runner: Callable = run_command +) -> Dict[str, Any]: """Get version from 'git describe' in the root of the source tree. This only gets called if the git-archive 'subst' keywords were *not* @@ -241,7 +281,14 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): if sys.platform == "win32": GITS = ["git.cmd", "git.exe"] - out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True) + # GIT_DIR can interfere with correct operation of Versioneer. + # It may be intended to be passed to the Versioneer-versioned project, + # but that should not change where we get our version from. + env = os.environ.copy() + env.pop("GIT_DIR", None) + runner = functools.partial(runner, env=env) + + _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=not verbose) if rc != 0: if verbose: print("Directory %s not under git control" % root) @@ -249,25 +296,65 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] # if there isn't one, this yields HEX[-dirty] (no NUM) - describe_out, rc = run_command( + describe_out, rc = runner( GITS, - ["describe", "--tags", "--dirty", "--always", "--long", "--match", "%s*" % tag_prefix], + [ + "describe", + "--tags", + "--dirty", + "--always", + "--long", + "--match", + f"{tag_prefix}[[:digit:]]*", + ], cwd=root, ) # --long was added in git-1.5.5 if describe_out is None: raise NotThisMethod("'git describe' failed") describe_out = describe_out.strip() - full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) + full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root) if full_out is None: raise NotThisMethod("'git rev-parse' failed") full_out = full_out.strip() - pieces = {} + pieces: Dict[str, Any] = {} pieces["long"] = full_out pieces["short"] = full_out[:7] # maybe improved later pieces["error"] = None + branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], cwd=root) + # --abbrev-ref was added in git-1.6.3 + if rc != 0 or branch_name is None: + raise NotThisMethod("'git rev-parse --abbrev-ref' returned error") + branch_name = branch_name.strip() + + if branch_name == "HEAD": + # If we aren't exactly on a branch, pick a branch which represents + # the current commit. If all else fails, we are on a branchless + # commit. + branches, rc = runner(GITS, ["branch", "--contains"], cwd=root) + # --contains was added in git-1.5.4 + if rc != 0 or branches is None: + raise NotThisMethod("'git branch --contains' returned error") + branches = branches.split("\n") + + # Remove the first line if we're running detached + if "(" in branches[0]: + branches.pop(0) + + # Strip off the leading "* " from the list of branches. + branches = [branch[2:] for branch in branches] + if "master" in branches: + branch_name = "master" + elif not branches: + branch_name = None + else: + # Pick the first branch that is returned. Good or bad. + branch_name = branches[0] + + pieces["branch"] = branch_name + # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] # TAG might have hyphens. git_describe = describe_out @@ -284,7 +371,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): # TAG-NUM-gHEX mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe) if not mo: - # unparseable. Maybe git-describe is misbehaving? + # unparsable. Maybe git-describe is misbehaving? pieces["error"] = "unable to parse git-describe output: '%s'" % describe_out return pieces @@ -307,24 +394,27 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): else: # HEX: no tags pieces["closest-tag"] = None - count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], cwd=root) - pieces["distance"] = int(count_out) # total number of commits + out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root) + pieces["distance"] = len(out.split()) # total number of commits # commit date: see ISO-8601 comment in git_versions_from_keywords() - date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip() + date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip() + # Use only the last line. Previous lines may contain GPG signature + # information. + date = date.splitlines()[-1] pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) return pieces -def plus_or_dot(pieces): +def plus_or_dot(pieces: Dict[str, Any]) -> str: """Return a + if we don't already have one, else return a .""" if "+" in pieces.get("closest-tag", ""): return "." return "+" -def render_pep440(pieces): +def render_pep440(pieces: Dict[str, Any]) -> str: """Build up version string, with post-release "local version identifier". Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you @@ -348,23 +438,70 @@ def render_pep440(pieces): return rendered -def render_pep440_pre(pieces): - """TAG[.post.devDISTANCE] -- No -dirty. +def render_pep440_branch(pieces: Dict[str, Any]) -> str: + """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] . + + The ".dev0" means not master branch. Note that .dev0 sorts backwards + (a feature branch will appear "older" than the master branch). Exceptions: - 1: no tags. 0.post.devDISTANCE + 1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0" + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += "+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def pep440_split_post(ver: str) -> Tuple[str, Optional[int]]: + """Split pep440 version string at the post-release segment. + + Returns the release segments before the post-release and the + post-release version number (or -1 if no post-release segment is present). + """ + vc = str.split(ver, ".post") + return vc[0], int(vc[1] or 0) if len(vc) == 2 else None + + +def render_pep440_pre(pieces: Dict[str, Any]) -> str: + """TAG[.postN.devDISTANCE] -- No -dirty. + + Exceptions: + 1: no tags. 0.post0.devDISTANCE + """ + if pieces["closest-tag"]: if pieces["distance"]: - rendered += ".post.dev%d" % pieces["distance"] + # update the post release segment + tag_version, post_version = pep440_split_post(pieces["closest-tag"]) + rendered = tag_version + if post_version is not None: + rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"]) + else: + rendered += ".post0.dev%d" % (pieces["distance"]) + else: + # no commits, use the tag as the version + rendered = pieces["closest-tag"] else: # exception #1 - rendered = "0.post.dev%d" % pieces["distance"] + rendered = "0.post0.dev%d" % pieces["distance"] return rendered -def render_pep440_post(pieces): +def render_pep440_post(pieces: Dict[str, Any]) -> str: """TAG[.postDISTANCE[.dev0]+gHEX] . The ".dev0" means dirty. Note that .dev0 sorts backwards @@ -391,12 +528,41 @@ def render_pep440_post(pieces): return rendered -def render_pep440_old(pieces): +def render_pep440_post_branch(pieces: Dict[str, Any]) -> str: + """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] . + + The ".dev0" means not master branch. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%d" % pieces["distance"] + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "g%s" % pieces["short"] + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0.post%d" % pieces["distance"] + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += "+g%s" % pieces["short"] + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def render_pep440_old(pieces: Dict[str, Any]) -> str: """TAG[.postDISTANCE[.dev0]] . The ".dev0" means dirty. - Eexceptions: + Exceptions: 1: no tags. 0.postDISTANCE[.dev0] """ if pieces["closest-tag"]: @@ -413,7 +579,7 @@ def render_pep440_old(pieces): return rendered -def render_git_describe(pieces): +def render_git_describe(pieces: Dict[str, Any]) -> str: """TAG[-DISTANCE-gHEX][-dirty]. Like 'git describe --tags --dirty --always'. @@ -433,7 +599,7 @@ def render_git_describe(pieces): return rendered -def render_git_describe_long(pieces): +def render_git_describe_long(pieces: Dict[str, Any]) -> str: """TAG-DISTANCE-gHEX[-dirty]. Like 'git describe --tags --dirty --always -long'. @@ -453,7 +619,7 @@ def render_git_describe_long(pieces): return rendered -def render(pieces, style): +def render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]: """Render the given version pieces into the requested style.""" if pieces["error"]: return { @@ -469,10 +635,14 @@ def render(pieces, style): if style == "pep440": rendered = render_pep440(pieces) + elif style == "pep440-branch": + rendered = render_pep440_branch(pieces) elif style == "pep440-pre": rendered = render_pep440_pre(pieces) elif style == "pep440-post": rendered = render_pep440_post(pieces) + elif style == "pep440-post-branch": + rendered = render_pep440_post_branch(pieces) elif style == "pep440-old": rendered = render_pep440_old(pieces) elif style == "git-describe": @@ -491,7 +661,7 @@ def render(pieces, style): } -def get_versions(): +def get_versions() -> Dict[str, Any]: """Get version information or return default if unable to do so.""" # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have # __file__, we can work backwards from there to the root. Some @@ -511,7 +681,7 @@ def get_versions(): # versionfile_source is the relative path from the top of the source # tree (where the .git directory might live) to this file. Invert # this to find the root from __file__. - for i in cfg.versionfile_source.split("/"): + for _ in cfg.versionfile_source.split("/"): root = os.path.dirname(root) except NameError: return { diff --git a/versioneer.py b/versioneer.py index 64fea1c..1e3753e 100644 --- a/versioneer.py +++ b/versioneer.py @@ -1,5 +1,5 @@ -# Version: 0.18 +# Version: 0.29 """The Versioneer - like a rocketeer, but for versions. @@ -7,18 +7,14 @@ ============== * like a rocketeer, but for versions! -* https://github.com/warner/python-versioneer +* https://github.com/python-versioneer/python-versioneer * Brian Warner -* License: Public Domain -* Compatible With: python2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6, and pypy -* [![Latest Version] -(https://pypip.in/version/versioneer/badge.svg?style=flat) -](https://pypi.python.org/pypi/versioneer/) -* [![Build Status] -(https://travis-ci.org/warner/python-versioneer.png?branch=master) -](https://travis-ci.org/warner/python-versioneer) - -This is a tool for managing a recorded version number in distutils-based +* License: Public Domain (Unlicense) +* Compatible with: Python 3.7, 3.8, 3.9, 3.10, 3.11 and pypy3 +* [![Latest Version][pypi-image]][pypi-url] +* [![Build Status][travis-image]][travis-url] + +This is a tool for managing a recorded version number in setuptools-based python projects. The goal is to remove the tedious and error-prone "update the embedded version string" step from your release process. Making a new release should be as easy as recording a new tag in your version-control @@ -27,9 +23,38 @@ ## Quick Install -* `pip install versioneer` to somewhere to your $PATH -* add a `[versioneer]` section to your setup.cfg (see below) -* run `versioneer install` in your source tree, commit the results +Versioneer provides two installation modes. The "classic" vendored mode installs +a copy of versioneer into your repository. The experimental build-time dependency mode +is intended to allow you to skip this step and simplify the process of upgrading. + +### Vendored mode + +* `pip install versioneer` to somewhere in your $PATH + * A [conda-forge recipe](https://github.com/conda-forge/versioneer-feedstock) is + available, so you can also use `conda install -c conda-forge versioneer` +* add a `[tool.versioneer]` section to your `pyproject.toml` or a + `[versioneer]` section to your `setup.cfg` (see [Install](INSTALL.md)) + * Note that you will need to add `tomli; python_version < "3.11"` to your + build-time dependencies if you use `pyproject.toml` +* run `versioneer install --vendor` in your source tree, commit the results +* verify version information with `python setup.py version` + +### Build-time dependency mode + +* `pip install versioneer` to somewhere in your $PATH + * A [conda-forge recipe](https://github.com/conda-forge/versioneer-feedstock) is + available, so you can also use `conda install -c conda-forge versioneer` +* add a `[tool.versioneer]` section to your `pyproject.toml` or a + `[versioneer]` section to your `setup.cfg` (see [Install](INSTALL.md)) +* add `versioneer` (with `[toml]` extra, if configuring in `pyproject.toml`) + to the `requires` key of the `build-system` table in `pyproject.toml`: + ```toml + [build-system] + requires = ["setuptools", "versioneer[toml]"] + build-backend = "setuptools.build_meta" + ``` +* run `versioneer install --no-vendor` in your source tree, commit the results +* verify version information with `python setup.py version` ## Version Identifiers @@ -61,7 +86,7 @@ for example `git describe --tags --dirty --always` reports things like "0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the 0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has -uncommitted changes. +uncommitted changes). The version identifier is used for multiple purposes: @@ -166,7 +191,7 @@ Some situations are known to cause problems for Versioneer. This details the most significant ones. More can be found on Github -[issues page](https://github.com/warner/python-versioneer/issues). +[issues page](https://github.com/python-versioneer/python-versioneer/issues). ### Subprojects @@ -180,7 +205,7 @@ `setup.cfg`, and `tox.ini`. Projects like these produce multiple PyPI distributions (and upload multiple independently-installable tarballs). * Source trees whose main purpose is to contain a C library, but which also - provide bindings to Python (and perhaps other langauges) in subdirectories. + provide bindings to Python (and perhaps other languages) in subdirectories. Versioneer will look for `.git` in parent directories, and most operations should get the right version string. However `pip` and `setuptools` have bugs @@ -194,9 +219,9 @@ Pip-8.1.1 is known to have this problem, but hopefully it will get fixed in some later version. -[Bug #38](https://github.com/warner/python-versioneer/issues/38) is tracking +[Bug #38](https://github.com/python-versioneer/python-versioneer/issues/38) is tracking this issue. The discussion in -[PR #61](https://github.com/warner/python-versioneer/pull/61) describes the +[PR #61](https://github.com/python-versioneer/python-versioneer/pull/61) describes the issue from the Versioneer side in more detail. [pip PR#3176](https://github.com/pypa/pip/pull/3176) and [pip PR#3615](https://github.com/pypa/pip/pull/3615) contain work to improve @@ -224,31 +249,20 @@ cause egg_info to be rebuilt (including `sdist`, `wheel`, and installing into a different virtualenv), so this can be surprising. -[Bug #83](https://github.com/warner/python-versioneer/issues/83) describes +[Bug #83](https://github.com/python-versioneer/python-versioneer/issues/83) describes this one, but upgrading to a newer version of setuptools should probably resolve it. -### Unicode version strings - -While Versioneer works (and is continually tested) with both Python 2 and -Python 3, it is not entirely consistent with bytes-vs-unicode distinctions. -Newer releases probably generate unicode version strings on py2. It's not -clear that this is wrong, but it may be surprising for applications when then -write these strings to a network connection or include them in bytes-oriented -APIs like cryptographic checksums. - -[Bug #71](https://github.com/warner/python-versioneer/issues/71) investigates -this question. - ## Updating Versioneer To upgrade your project to a new release of Versioneer, do the following: * install the new Versioneer (`pip install -U versioneer` or equivalent) -* edit `setup.cfg`, if necessary, to include any new configuration settings - indicated by the release notes. See [UPGRADING](./UPGRADING.md) for details. -* re-run `versioneer install` in your source tree, to replace +* edit `setup.cfg` and `pyproject.toml`, if necessary, + to include any new configuration settings indicated by the release notes. + See [UPGRADING](./UPGRADING.md) for details. +* re-run `versioneer install --[no-]vendor` in your source tree, to replace `SRC/_version.py` * commit any changed files @@ -265,35 +279,70 @@ direction and include code from all supported VCS systems, reducing the number of intermediate scripts. +## Similar projects + +* [setuptools_scm](https://github.com/pypa/setuptools_scm/) - a non-vendored build-time + dependency +* [minver](https://github.com/jbweston/miniver) - a lightweight reimplementation of + versioneer +* [versioningit](https://github.com/jwodder/versioningit) - a PEP 518-based setuptools + plugin ## License To make Versioneer easier to embed, all its code is dedicated to the public domain. The `_version.py` that it creates is also in the public domain. -Specifically, both are released under the Creative Commons "Public Domain -Dedication" license (CC0-1.0), as described in -https://creativecommons.org/publicdomain/zero/1.0/ . +Specifically, both are released under the "Unlicense", as described in +https://unlicense.org/. + +[pypi-image]: https://img.shields.io/pypi/v/versioneer.svg +[pypi-url]: https://pypi.python.org/pypi/versioneer/ +[travis-image]: +https://img.shields.io/travis/com/python-versioneer/python-versioneer.svg +[travis-url]: https://travis-ci.com/github/python-versioneer/python-versioneer """ +# pylint:disable=invalid-name,import-outside-toplevel,missing-function-docstring +# pylint:disable=missing-class-docstring,too-many-branches,too-many-statements +# pylint:disable=raise-missing-from,too-many-lines,too-many-locals,import-error +# pylint:disable=too-few-public-methods,redefined-outer-name,consider-using-with +# pylint:disable=attribute-defined-outside-init,too-many-arguments -from __future__ import print_function -try: - import configparser -except ImportError: - import ConfigParser as configparser +import configparser import errno import json import os import re import subprocess import sys +from pathlib import Path +from typing import Any, Callable, cast, Dict, List, Optional, Tuple, Union +from typing import NoReturn +import functools + +have_tomllib = True +if sys.version_info >= (3, 11): + import tomllib +else: + try: + import tomli as tomllib + except ImportError: + have_tomllib = False class VersioneerConfig: """Container for Versioneer configuration parameters.""" + VCS: str + style: str + tag_prefix: str + versionfile_source: str + versionfile_build: Optional[str] + parentdir_prefix: Optional[str] + verbose: Optional[bool] + -def get_root(): +def get_root() -> str: """Get the project root directory. We require that all commands are run from the project root, i.e. the @@ -301,13 +350,23 @@ def get_root(): """ root = os.path.realpath(os.path.abspath(os.getcwd())) setup_py = os.path.join(root, "setup.py") + pyproject_toml = os.path.join(root, "pyproject.toml") versioneer_py = os.path.join(root, "versioneer.py") - if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): + if not ( + os.path.exists(setup_py) + or os.path.exists(pyproject_toml) + or os.path.exists(versioneer_py) + ): # allow 'python path/to/setup.py COMMAND' root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0]))) setup_py = os.path.join(root, "setup.py") + pyproject_toml = os.path.join(root, "pyproject.toml") versioneer_py = os.path.join(root, "versioneer.py") - if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): + if not ( + os.path.exists(setup_py) + or os.path.exists(pyproject_toml) + or os.path.exists(versioneer_py) + ): err = ("Versioneer was unable to run the project root directory. " "Versioneer requires setup.py to be executed from " "its immediate directory (like 'python setup.py COMMAND'), " @@ -321,43 +380,62 @@ def get_root(): # module-import table will cache the first one. So we can't use # os.path.dirname(__file__), as that will find whichever # versioneer.py was first imported, even in later projects. - me = os.path.realpath(os.path.abspath(__file__)) - me_dir = os.path.normcase(os.path.splitext(me)[0]) + my_path = os.path.realpath(os.path.abspath(__file__)) + me_dir = os.path.normcase(os.path.splitext(my_path)[0]) vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0]) - if me_dir != vsr_dir: + if me_dir != vsr_dir and "VERSIONEER_PEP518" not in globals(): print("Warning: build in %s is using versioneer.py from %s" - % (os.path.dirname(me), versioneer_py)) + % (os.path.dirname(my_path), versioneer_py)) except NameError: pass return root -def get_config_from_root(root): +def get_config_from_root(root: str) -> VersioneerConfig: """Read the project setup.cfg file to determine Versioneer config.""" - # This might raise EnvironmentError (if setup.cfg is missing), or + # This might raise OSError (if setup.cfg is missing), or # configparser.NoSectionError (if it lacks a [versioneer] section), or # configparser.NoOptionError (if it lacks "VCS="). See the docstring at # the top of versioneer.py for instructions on writing your setup.cfg . - setup_cfg = os.path.join(root, "setup.cfg") - parser = configparser.SafeConfigParser() - with open(setup_cfg, "r") as f: - parser.readfp(f) - VCS = parser.get("versioneer", "VCS") # mandatory - - def get(parser, name): - if parser.has_option("versioneer", name): - return parser.get("versioneer", name) - return None + root_pth = Path(root) + pyproject_toml = root_pth / "pyproject.toml" + setup_cfg = root_pth / "setup.cfg" + section: Union[Dict[str, Any], configparser.SectionProxy, None] = None + if pyproject_toml.exists() and have_tomllib: + try: + with open(pyproject_toml, 'rb') as fobj: + pp = tomllib.load(fobj) + section = pp['tool']['versioneer'] + except (tomllib.TOMLDecodeError, KeyError) as e: + print(f"Failed to load config from {pyproject_toml}: {e}") + print("Try to load it from setup.cfg") + if not section: + parser = configparser.ConfigParser() + with open(setup_cfg) as cfg_file: + parser.read_file(cfg_file) + parser.get("versioneer", "VCS") # raise error if missing + + section = parser["versioneer"] + + # `cast`` really shouldn't be used, but its simplest for the + # common VersioneerConfig users at the moment. We verify against + # `None` values elsewhere where it matters + cfg = VersioneerConfig() - cfg.VCS = VCS - cfg.style = get(parser, "style") or "" - cfg.versionfile_source = get(parser, "versionfile_source") - cfg.versionfile_build = get(parser, "versionfile_build") - cfg.tag_prefix = get(parser, "tag_prefix") - if cfg.tag_prefix in ("''", '""'): + cfg.VCS = section['VCS'] + cfg.style = section.get("style", "") + cfg.versionfile_source = cast(str, section.get("versionfile_source")) + cfg.versionfile_build = section.get("versionfile_build") + cfg.tag_prefix = cast(str, section.get("tag_prefix")) + if cfg.tag_prefix in ("''", '""', None): cfg.tag_prefix = "" - cfg.parentdir_prefix = get(parser, "parentdir_prefix") - cfg.verbose = get(parser, "verbose") + cfg.parentdir_prefix = section.get("parentdir_prefix") + if isinstance(section, configparser.SectionProxy): + # Make sure configparser translates to bool + cfg.verbose = section.getboolean("verbose") + else: + cfg.verbose = section.get("verbose") + return cfg @@ -366,37 +444,48 @@ class NotThisMethod(Exception): # these dictionaries contain VCS-specific tools -LONG_VERSION_PY = {} -HANDLERS = {} +LONG_VERSION_PY: Dict[str, str] = {} +HANDLERS: Dict[str, Dict[str, Callable]] = {} -def register_vcs_handler(vcs, method): # decorator - """Decorator to mark a method as the handler for a particular VCS.""" - def decorate(f): +def register_vcs_handler(vcs: str, method: str) -> Callable: # decorator + """Create decorator to mark a method as the handler of a VCS.""" + def decorate(f: Callable) -> Callable: """Store f in HANDLERS[vcs][method].""" - if vcs not in HANDLERS: - HANDLERS[vcs] = {} - HANDLERS[vcs][method] = f + HANDLERS.setdefault(vcs, {})[method] = f return f return decorate -def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, - env=None): +def run_command( + commands: List[str], + args: List[str], + cwd: Optional[str] = None, + verbose: bool = False, + hide_stderr: bool = False, + env: Optional[Dict[str, str]] = None, +) -> Tuple[Optional[str], Optional[int]]: """Call the given command(s).""" assert isinstance(commands, list) - p = None - for c in commands: + process = None + + popen_kwargs: Dict[str, Any] = {} + if sys.platform == "win32": + # This hides the console window if pythonw.exe is used + startupinfo = subprocess.STARTUPINFO() + startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW + popen_kwargs["startupinfo"] = startupinfo + + for command in commands: try: - dispcmd = str([c] + args) + dispcmd = str([command] + args) # remember shell=False, so use git.cmd on windows, not just git - p = subprocess.Popen([c] + args, cwd=cwd, env=env, - stdout=subprocess.PIPE, - stderr=(subprocess.PIPE if hide_stderr - else None)) + process = subprocess.Popen([command] + args, cwd=cwd, env=env, + stdout=subprocess.PIPE, + stderr=(subprocess.PIPE if hide_stderr + else None), **popen_kwargs) break - except EnvironmentError: - e = sys.exc_info()[1] + except OSError as e: if e.errno == errno.ENOENT: continue if verbose: @@ -407,26 +496,25 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, if verbose: print("unable to find command, tried %s" % (commands,)) return None, None - stdout = p.communicate()[0].strip() - if sys.version_info[0] >= 3: - stdout = stdout.decode() - if p.returncode != 0: + stdout = process.communicate()[0].strip().decode() + if process.returncode != 0: if verbose: print("unable to run %s (error)" % dispcmd) print("stdout was %s" % stdout) - return None, p.returncode - return stdout, p.returncode + return None, process.returncode + return stdout, process.returncode -LONG_VERSION_PY['git'] = ''' +LONG_VERSION_PY['git'] = r''' # This file helps to compute a version number in source trees obtained from # git-archive tarball (such as those provided by githubs download-from-tag # feature). Distribution tarballs (built by setup.py sdist) and build # directories (produced by setup.py build) will contain a much shorter file # that just contains the computed version number. -# This file is released into the public domain. Generated by -# versioneer-0.18 (https://github.com/warner/python-versioneer) +# This file is released into the public domain. +# Generated by versioneer-0.29 +# https://github.com/python-versioneer/python-versioneer """Git implementation of _version.py.""" @@ -435,9 +523,11 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, import re import subprocess import sys +from typing import Any, Callable, Dict, List, Optional, Tuple +import functools -def get_keywords(): +def get_keywords() -> Dict[str, str]: """Get the keywords needed to look up the version information.""" # these strings will be replaced by git during git-archive. # setup.py/versioneer.py will grep for the variable names, so they must @@ -453,8 +543,15 @@ def get_keywords(): class VersioneerConfig: """Container for Versioneer configuration parameters.""" + VCS: str + style: str + tag_prefix: str + parentdir_prefix: str + versionfile_source: str + verbose: bool + -def get_config(): +def get_config() -> VersioneerConfig: """Create, populate and return the VersioneerConfig() object.""" # these strings are filled in when 'setup.py versioneer' creates # _version.py @@ -472,13 +569,13 @@ class NotThisMethod(Exception): """Exception raised if a method is not valid for the current scenario.""" -LONG_VERSION_PY = {} -HANDLERS = {} +LONG_VERSION_PY: Dict[str, str] = {} +HANDLERS: Dict[str, Dict[str, Callable]] = {} -def register_vcs_handler(vcs, method): # decorator - """Decorator to mark a method as the handler for a particular VCS.""" - def decorate(f): +def register_vcs_handler(vcs: str, method: str) -> Callable: # decorator + """Create decorator to mark a method as the handler of a VCS.""" + def decorate(f: Callable) -> Callable: """Store f in HANDLERS[vcs][method].""" if vcs not in HANDLERS: HANDLERS[vcs] = {} @@ -487,22 +584,35 @@ def decorate(f): return decorate -def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, - env=None): +def run_command( + commands: List[str], + args: List[str], + cwd: Optional[str] = None, + verbose: bool = False, + hide_stderr: bool = False, + env: Optional[Dict[str, str]] = None, +) -> Tuple[Optional[str], Optional[int]]: """Call the given command(s).""" assert isinstance(commands, list) - p = None - for c in commands: + process = None + + popen_kwargs: Dict[str, Any] = {} + if sys.platform == "win32": + # This hides the console window if pythonw.exe is used + startupinfo = subprocess.STARTUPINFO() + startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW + popen_kwargs["startupinfo"] = startupinfo + + for command in commands: try: - dispcmd = str([c] + args) + dispcmd = str([command] + args) # remember shell=False, so use git.cmd on windows, not just git - p = subprocess.Popen([c] + args, cwd=cwd, env=env, - stdout=subprocess.PIPE, - stderr=(subprocess.PIPE if hide_stderr - else None)) + process = subprocess.Popen([command] + args, cwd=cwd, env=env, + stdout=subprocess.PIPE, + stderr=(subprocess.PIPE if hide_stderr + else None), **popen_kwargs) break - except EnvironmentError: - e = sys.exc_info()[1] + except OSError as e: if e.errno == errno.ENOENT: continue if verbose: @@ -513,18 +623,20 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, if verbose: print("unable to find command, tried %%s" %% (commands,)) return None, None - stdout = p.communicate()[0].strip() - if sys.version_info[0] >= 3: - stdout = stdout.decode() - if p.returncode != 0: + stdout = process.communicate()[0].strip().decode() + if process.returncode != 0: if verbose: print("unable to run %%s (error)" %% dispcmd) print("stdout was %%s" %% stdout) - return None, p.returncode - return stdout, p.returncode + return None, process.returncode + return stdout, process.returncode -def versions_from_parentdir(parentdir_prefix, root, verbose): +def versions_from_parentdir( + parentdir_prefix: str, + root: str, + verbose: bool, +) -> Dict[str, Any]: """Try to determine the version from the parent directory name. Source tarballs conventionally unpack into a directory that includes both @@ -533,15 +645,14 @@ def versions_from_parentdir(parentdir_prefix, root, verbose): """ rootdirs = [] - for i in range(3): + for _ in range(3): dirname = os.path.basename(root) if dirname.startswith(parentdir_prefix): return {"version": dirname[len(parentdir_prefix):], "full-revisionid": None, "dirty": False, "error": None, "date": None} - else: - rootdirs.append(root) - root = os.path.dirname(root) # up a level + rootdirs.append(root) + root = os.path.dirname(root) # up a level if verbose: print("Tried directories %%s but none started with prefix %%s" %% @@ -550,41 +661,48 @@ def versions_from_parentdir(parentdir_prefix, root, verbose): @register_vcs_handler("git", "get_keywords") -def git_get_keywords(versionfile_abs): +def git_get_keywords(versionfile_abs: str) -> Dict[str, str]: """Extract version information from the given file.""" # the code embedded in _version.py can just fetch the value of these # keywords. When used from setup.py, we don't want to import _version.py, # so we do it with a regexp instead. This function is not used from # _version.py. - keywords = {} + keywords: Dict[str, str] = {} try: - f = open(versionfile_abs, "r") - for line in f.readlines(): - if line.strip().startswith("git_refnames ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["refnames"] = mo.group(1) - if line.strip().startswith("git_full ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["full"] = mo.group(1) - if line.strip().startswith("git_date ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["date"] = mo.group(1) - f.close() - except EnvironmentError: + with open(versionfile_abs, "r") as fobj: + for line in fobj: + if line.strip().startswith("git_refnames ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["refnames"] = mo.group(1) + if line.strip().startswith("git_full ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["full"] = mo.group(1) + if line.strip().startswith("git_date ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["date"] = mo.group(1) + except OSError: pass return keywords @register_vcs_handler("git", "keywords") -def git_versions_from_keywords(keywords, tag_prefix, verbose): +def git_versions_from_keywords( + keywords: Dict[str, str], + tag_prefix: str, + verbose: bool, +) -> Dict[str, Any]: """Get version information from git keywords.""" - if not keywords: - raise NotThisMethod("no keywords at all, weird") + if "refnames" not in keywords: + raise NotThisMethod("Short version file found") date = keywords.get("date") if date is not None: + # Use only the last line. Previous lines may contain GPG signature + # information. + date = date.splitlines()[-1] + # git-2.2.0 added "%%cI", which expands to an ISO-8601 -compliant # datestamp. However we prefer "%%ci" (which expands to an "ISO-8601 # -like" string, which we must then edit to make compliant), because @@ -597,11 +715,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): if verbose: print("keywords are unexpanded, not using") raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = set([r.strip() for r in refnames.strip("()").split(",")]) + refs = {r.strip() for r in refnames.strip("()").split(",")} # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " - tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) + tags = {r[len(TAG):] for r in refs if r.startswith(TAG)} if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %%d @@ -610,7 +728,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". - tags = set([r for r in refs if re.search(r'\d', r)]) + tags = {r for r in refs if re.search(r'\d', r)} if verbose: print("discarding '%%s', no digits" %% ",".join(refs - tags)) if verbose: @@ -619,6 +737,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # sorting will prefer e.g. "2.0" over "2.0rc1" if ref.startswith(tag_prefix): r = ref[len(tag_prefix):] + # Filter out refs that exactly match prefix or that don't start + # with a number once the prefix is stripped (mostly a concern + # when prefix is '') + if not re.match(r'\d', r): + continue if verbose: print("picking %%s" %% r) return {"version": r, @@ -634,7 +757,12 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): @register_vcs_handler("git", "pieces_from_vcs") -def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): +def git_pieces_from_vcs( + tag_prefix: str, + root: str, + verbose: bool, + runner: Callable = run_command +) -> Dict[str, Any]: """Get version from 'git describe' in the root of the source tree. This only gets called if the git-archive 'subst' keywords were *not* @@ -645,8 +773,15 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): if sys.platform == "win32": GITS = ["git.cmd", "git.exe"] - out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, - hide_stderr=True) + # GIT_DIR can interfere with correct operation of Versioneer. + # It may be intended to be passed to the Versioneer-versioned project, + # but that should not change where we get our version from. + env = os.environ.copy() + env.pop("GIT_DIR", None) + runner = functools.partial(runner, env=env) + + _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, + hide_stderr=not verbose) if rc != 0: if verbose: print("Directory %%s not under git control" %% root) @@ -654,24 +789,57 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] # if there isn't one, this yields HEX[-dirty] (no NUM) - describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty", - "--always", "--long", - "--match", "%%s*" %% tag_prefix], - cwd=root) + describe_out, rc = runner(GITS, [ + "describe", "--tags", "--dirty", "--always", "--long", + "--match", f"{tag_prefix}[[:digit:]]*" + ], cwd=root) # --long was added in git-1.5.5 if describe_out is None: raise NotThisMethod("'git describe' failed") describe_out = describe_out.strip() - full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) + full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root) if full_out is None: raise NotThisMethod("'git rev-parse' failed") full_out = full_out.strip() - pieces = {} + pieces: Dict[str, Any] = {} pieces["long"] = full_out pieces["short"] = full_out[:7] # maybe improved later pieces["error"] = None + branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], + cwd=root) + # --abbrev-ref was added in git-1.6.3 + if rc != 0 or branch_name is None: + raise NotThisMethod("'git rev-parse --abbrev-ref' returned error") + branch_name = branch_name.strip() + + if branch_name == "HEAD": + # If we aren't exactly on a branch, pick a branch which represents + # the current commit. If all else fails, we are on a branchless + # commit. + branches, rc = runner(GITS, ["branch", "--contains"], cwd=root) + # --contains was added in git-1.5.4 + if rc != 0 or branches is None: + raise NotThisMethod("'git branch --contains' returned error") + branches = branches.split("\n") + + # Remove the first line if we're running detached + if "(" in branches[0]: + branches.pop(0) + + # Strip off the leading "* " from the list of branches. + branches = [branch[2:] for branch in branches] + if "master" in branches: + branch_name = "master" + elif not branches: + branch_name = None + else: + # Pick the first branch that is returned. Good or bad. + branch_name = branches[0] + + pieces["branch"] = branch_name + # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] # TAG might have hyphens. git_describe = describe_out @@ -688,7 +856,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): # TAG-NUM-gHEX mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) if not mo: - # unparseable. Maybe git-describe is misbehaving? + # unparsable. Maybe git-describe is misbehaving? pieces["error"] = ("unable to parse git-describe output: '%%s'" %% describe_out) return pieces @@ -713,26 +881,27 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): else: # HEX: no tags pieces["closest-tag"] = None - count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], - cwd=root) - pieces["distance"] = int(count_out) # total number of commits + out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root) + pieces["distance"] = len(out.split()) # total number of commits # commit date: see ISO-8601 comment in git_versions_from_keywords() - date = run_command(GITS, ["show", "-s", "--format=%%ci", "HEAD"], - cwd=root)[0].strip() + date = runner(GITS, ["show", "-s", "--format=%%ci", "HEAD"], cwd=root)[0].strip() + # Use only the last line. Previous lines may contain GPG signature + # information. + date = date.splitlines()[-1] pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) return pieces -def plus_or_dot(pieces): +def plus_or_dot(pieces: Dict[str, Any]) -> str: """Return a + if we don't already have one, else return a .""" if "+" in pieces.get("closest-tag", ""): return "." return "+" -def render_pep440(pieces): +def render_pep440(pieces: Dict[str, Any]) -> str: """Build up version string, with post-release "local version identifier". Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you @@ -757,23 +926,71 @@ def render_pep440(pieces): return rendered -def render_pep440_pre(pieces): - """TAG[.post.devDISTANCE] -- No -dirty. +def render_pep440_branch(pieces: Dict[str, Any]) -> str: + """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] . + + The ".dev0" means not master branch. Note that .dev0 sorts backwards + (a feature branch will appear "older" than the master branch). Exceptions: - 1: no tags. 0.post.devDISTANCE + 1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0" + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += "+untagged.%%d.g%%s" %% (pieces["distance"], + pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def pep440_split_post(ver: str) -> Tuple[str, Optional[int]]: + """Split pep440 version string at the post-release segment. + + Returns the release segments before the post-release and the + post-release version number (or -1 if no post-release segment is present). + """ + vc = str.split(ver, ".post") + return vc[0], int(vc[1] or 0) if len(vc) == 2 else None + + +def render_pep440_pre(pieces: Dict[str, Any]) -> str: + """TAG[.postN.devDISTANCE] -- No -dirty. + + Exceptions: + 1: no tags. 0.post0.devDISTANCE + """ + if pieces["closest-tag"]: if pieces["distance"]: - rendered += ".post.dev%%d" %% pieces["distance"] + # update the post release segment + tag_version, post_version = pep440_split_post(pieces["closest-tag"]) + rendered = tag_version + if post_version is not None: + rendered += ".post%%d.dev%%d" %% (post_version + 1, pieces["distance"]) + else: + rendered += ".post0.dev%%d" %% (pieces["distance"]) + else: + # no commits, use the tag as the version + rendered = pieces["closest-tag"] else: # exception #1 - rendered = "0.post.dev%%d" %% pieces["distance"] + rendered = "0.post0.dev%%d" %% pieces["distance"] return rendered -def render_pep440_post(pieces): +def render_pep440_post(pieces: Dict[str, Any]) -> str: """TAG[.postDISTANCE[.dev0]+gHEX] . The ".dev0" means dirty. Note that .dev0 sorts backwards @@ -800,12 +1017,41 @@ def render_pep440_post(pieces): return rendered -def render_pep440_old(pieces): +def render_pep440_post_branch(pieces: Dict[str, Any]) -> str: + """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] . + + The ".dev0" means not master branch. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%%d" %% pieces["distance"] + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "g%%s" %% pieces["short"] + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0.post%%d" %% pieces["distance"] + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += "+g%%s" %% pieces["short"] + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def render_pep440_old(pieces: Dict[str, Any]) -> str: """TAG[.postDISTANCE[.dev0]] . The ".dev0" means dirty. - Eexceptions: + Exceptions: 1: no tags. 0.postDISTANCE[.dev0] """ if pieces["closest-tag"]: @@ -822,7 +1068,7 @@ def render_pep440_old(pieces): return rendered -def render_git_describe(pieces): +def render_git_describe(pieces: Dict[str, Any]) -> str: """TAG[-DISTANCE-gHEX][-dirty]. Like 'git describe --tags --dirty --always'. @@ -842,7 +1088,7 @@ def render_git_describe(pieces): return rendered -def render_git_describe_long(pieces): +def render_git_describe_long(pieces: Dict[str, Any]) -> str: """TAG-DISTANCE-gHEX[-dirty]. Like 'git describe --tags --dirty --always -long'. @@ -862,7 +1108,7 @@ def render_git_describe_long(pieces): return rendered -def render(pieces, style): +def render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]: """Render the given version pieces into the requested style.""" if pieces["error"]: return {"version": "unknown", @@ -876,10 +1122,14 @@ def render(pieces, style): if style == "pep440": rendered = render_pep440(pieces) + elif style == "pep440-branch": + rendered = render_pep440_branch(pieces) elif style == "pep440-pre": rendered = render_pep440_pre(pieces) elif style == "pep440-post": rendered = render_pep440_post(pieces) + elif style == "pep440-post-branch": + rendered = render_pep440_post_branch(pieces) elif style == "pep440-old": rendered = render_pep440_old(pieces) elif style == "git-describe": @@ -894,7 +1144,7 @@ def render(pieces, style): "date": pieces.get("date")} -def get_versions(): +def get_versions() -> Dict[str, Any]: """Get version information or return default if unable to do so.""" # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have # __file__, we can work backwards from there to the root. Some @@ -915,7 +1165,7 @@ def get_versions(): # versionfile_source is the relative path from the top of the source # tree (where the .git directory might live) to this file. Invert # this to find the root from __file__. - for i in cfg.versionfile_source.split('/'): + for _ in cfg.versionfile_source.split('/'): root = os.path.dirname(root) except NameError: return {"version": "0+unknown", "full-revisionid": None, @@ -942,41 +1192,48 @@ def get_versions(): @register_vcs_handler("git", "get_keywords") -def git_get_keywords(versionfile_abs): +def git_get_keywords(versionfile_abs: str) -> Dict[str, str]: """Extract version information from the given file.""" # the code embedded in _version.py can just fetch the value of these # keywords. When used from setup.py, we don't want to import _version.py, # so we do it with a regexp instead. This function is not used from # _version.py. - keywords = {} + keywords: Dict[str, str] = {} try: - f = open(versionfile_abs, "r") - for line in f.readlines(): - if line.strip().startswith("git_refnames ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["refnames"] = mo.group(1) - if line.strip().startswith("git_full ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["full"] = mo.group(1) - if line.strip().startswith("git_date ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["date"] = mo.group(1) - f.close() - except EnvironmentError: + with open(versionfile_abs, "r") as fobj: + for line in fobj: + if line.strip().startswith("git_refnames ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["refnames"] = mo.group(1) + if line.strip().startswith("git_full ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["full"] = mo.group(1) + if line.strip().startswith("git_date ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["date"] = mo.group(1) + except OSError: pass return keywords @register_vcs_handler("git", "keywords") -def git_versions_from_keywords(keywords, tag_prefix, verbose): +def git_versions_from_keywords( + keywords: Dict[str, str], + tag_prefix: str, + verbose: bool, +) -> Dict[str, Any]: """Get version information from git keywords.""" - if not keywords: - raise NotThisMethod("no keywords at all, weird") + if "refnames" not in keywords: + raise NotThisMethod("Short version file found") date = keywords.get("date") if date is not None: + # Use only the last line. Previous lines may contain GPG signature + # information. + date = date.splitlines()[-1] + # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 # -like" string, which we must then edit to make compliant), because @@ -989,11 +1246,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): if verbose: print("keywords are unexpanded, not using") raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = set([r.strip() for r in refnames.strip("()").split(",")]) + refs = {r.strip() for r in refnames.strip("()").split(",")} # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " - tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) + tags = {r[len(TAG):] for r in refs if r.startswith(TAG)} if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d @@ -1002,7 +1259,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". - tags = set([r for r in refs if re.search(r'\d', r)]) + tags = {r for r in refs if re.search(r'\d', r)} if verbose: print("discarding '%s', no digits" % ",".join(refs - tags)) if verbose: @@ -1011,6 +1268,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # sorting will prefer e.g. "2.0" over "2.0rc1" if ref.startswith(tag_prefix): r = ref[len(tag_prefix):] + # Filter out refs that exactly match prefix or that don't start + # with a number once the prefix is stripped (mostly a concern + # when prefix is '') + if not re.match(r'\d', r): + continue if verbose: print("picking %s" % r) return {"version": r, @@ -1026,7 +1288,12 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): @register_vcs_handler("git", "pieces_from_vcs") -def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): +def git_pieces_from_vcs( + tag_prefix: str, + root: str, + verbose: bool, + runner: Callable = run_command +) -> Dict[str, Any]: """Get version from 'git describe' in the root of the source tree. This only gets called if the git-archive 'subst' keywords were *not* @@ -1037,8 +1304,15 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): if sys.platform == "win32": GITS = ["git.cmd", "git.exe"] - out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, - hide_stderr=True) + # GIT_DIR can interfere with correct operation of Versioneer. + # It may be intended to be passed to the Versioneer-versioned project, + # but that should not change where we get our version from. + env = os.environ.copy() + env.pop("GIT_DIR", None) + runner = functools.partial(runner, env=env) + + _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, + hide_stderr=not verbose) if rc != 0: if verbose: print("Directory %s not under git control" % root) @@ -1046,24 +1320,57 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] # if there isn't one, this yields HEX[-dirty] (no NUM) - describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty", - "--always", "--long", - "--match", "%s*" % tag_prefix], - cwd=root) + describe_out, rc = runner(GITS, [ + "describe", "--tags", "--dirty", "--always", "--long", + "--match", f"{tag_prefix}[[:digit:]]*" + ], cwd=root) # --long was added in git-1.5.5 if describe_out is None: raise NotThisMethod("'git describe' failed") describe_out = describe_out.strip() - full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) + full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root) if full_out is None: raise NotThisMethod("'git rev-parse' failed") full_out = full_out.strip() - pieces = {} + pieces: Dict[str, Any] = {} pieces["long"] = full_out pieces["short"] = full_out[:7] # maybe improved later pieces["error"] = None + branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], + cwd=root) + # --abbrev-ref was added in git-1.6.3 + if rc != 0 or branch_name is None: + raise NotThisMethod("'git rev-parse --abbrev-ref' returned error") + branch_name = branch_name.strip() + + if branch_name == "HEAD": + # If we aren't exactly on a branch, pick a branch which represents + # the current commit. If all else fails, we are on a branchless + # commit. + branches, rc = runner(GITS, ["branch", "--contains"], cwd=root) + # --contains was added in git-1.5.4 + if rc != 0 or branches is None: + raise NotThisMethod("'git branch --contains' returned error") + branches = branches.split("\n") + + # Remove the first line if we're running detached + if "(" in branches[0]: + branches.pop(0) + + # Strip off the leading "* " from the list of branches. + branches = [branch[2:] for branch in branches] + if "master" in branches: + branch_name = "master" + elif not branches: + branch_name = None + else: + # Pick the first branch that is returned. Good or bad. + branch_name = branches[0] + + pieces["branch"] = branch_name + # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] # TAG might have hyphens. git_describe = describe_out @@ -1080,7 +1387,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): # TAG-NUM-gHEX mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) if not mo: - # unparseable. Maybe git-describe is misbehaving? + # unparsable. Maybe git-describe is misbehaving? pieces["error"] = ("unable to parse git-describe output: '%s'" % describe_out) return pieces @@ -1105,19 +1412,20 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): else: # HEX: no tags pieces["closest-tag"] = None - count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], - cwd=root) - pieces["distance"] = int(count_out) # total number of commits + out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root) + pieces["distance"] = len(out.split()) # total number of commits # commit date: see ISO-8601 comment in git_versions_from_keywords() - date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], - cwd=root)[0].strip() + date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip() + # Use only the last line. Previous lines may contain GPG signature + # information. + date = date.splitlines()[-1] pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) return pieces -def do_vcs_install(manifest_in, versionfile_source, ipy): +def do_vcs_install(versionfile_source: str, ipy: Optional[str]) -> None: """Git-specific installation logic for Versioneer. For Git, this means creating/changing .gitattributes to mark _version.py @@ -1126,36 +1434,40 @@ def do_vcs_install(manifest_in, versionfile_source, ipy): GITS = ["git"] if sys.platform == "win32": GITS = ["git.cmd", "git.exe"] - files = [manifest_in, versionfile_source] + files = [versionfile_source] if ipy: files.append(ipy) - try: - me = __file__ - if me.endswith(".pyc") or me.endswith(".pyo"): - me = os.path.splitext(me)[0] + ".py" - versioneer_file = os.path.relpath(me) - except NameError: - versioneer_file = "versioneer.py" - files.append(versioneer_file) + if "VERSIONEER_PEP518" not in globals(): + try: + my_path = __file__ + if my_path.endswith((".pyc", ".pyo")): + my_path = os.path.splitext(my_path)[0] + ".py" + versioneer_file = os.path.relpath(my_path) + except NameError: + versioneer_file = "versioneer.py" + files.append(versioneer_file) present = False try: - f = open(".gitattributes", "r") - for line in f.readlines(): - if line.strip().startswith(versionfile_source): - if "export-subst" in line.strip().split()[1:]: - present = True - f.close() - except EnvironmentError: + with open(".gitattributes", "r") as fobj: + for line in fobj: + if line.strip().startswith(versionfile_source): + if "export-subst" in line.strip().split()[1:]: + present = True + break + except OSError: pass if not present: - f = open(".gitattributes", "a+") - f.write("%s export-subst\n" % versionfile_source) - f.close() + with open(".gitattributes", "a+") as fobj: + fobj.write(f"{versionfile_source} export-subst\n") files.append(".gitattributes") run_command(GITS, ["add", "--"] + files) -def versions_from_parentdir(parentdir_prefix, root, verbose): +def versions_from_parentdir( + parentdir_prefix: str, + root: str, + verbose: bool, +) -> Dict[str, Any]: """Try to determine the version from the parent directory name. Source tarballs conventionally unpack into a directory that includes both @@ -1164,15 +1476,14 @@ def versions_from_parentdir(parentdir_prefix, root, verbose): """ rootdirs = [] - for i in range(3): + for _ in range(3): dirname = os.path.basename(root) if dirname.startswith(parentdir_prefix): return {"version": dirname[len(parentdir_prefix):], "full-revisionid": None, "dirty": False, "error": None, "date": None} - else: - rootdirs.append(root) - root = os.path.dirname(root) # up a level + rootdirs.append(root) + root = os.path.dirname(root) # up a level if verbose: print("Tried directories %s but none started with prefix %s" % @@ -1181,7 +1492,7 @@ def versions_from_parentdir(parentdir_prefix, root, verbose): SHORT_VERSION_PY = """ -# This file was generated by 'versioneer.py' (0.18) from +# This file was generated by 'versioneer.py' (0.29) from # revision-control system data, or from the parent directory name of an # unpacked source archive. Distribution tarballs contain a pre-generated copy # of this file. @@ -1198,12 +1509,12 @@ def get_versions(): """ -def versions_from_file(filename): +def versions_from_file(filename: str) -> Dict[str, Any]: """Try to determine the version from _version.py if present.""" try: with open(filename) as f: contents = f.read() - except EnvironmentError: + except OSError: raise NotThisMethod("unable to read _version.py") mo = re.search(r"version_json = '''\n(.*)''' # END VERSION_JSON", contents, re.M | re.S) @@ -1215,9 +1526,8 @@ def versions_from_file(filename): return json.loads(mo.group(1)) -def write_to_version_file(filename, versions): +def write_to_version_file(filename: str, versions: Dict[str, Any]) -> None: """Write the given version number to the given _version.py file.""" - os.unlink(filename) contents = json.dumps(versions, sort_keys=True, indent=1, separators=(",", ": ")) with open(filename, "w") as f: @@ -1226,14 +1536,14 @@ def write_to_version_file(filename, versions): print("set %s to '%s'" % (filename, versions["version"])) -def plus_or_dot(pieces): +def plus_or_dot(pieces: Dict[str, Any]) -> str: """Return a + if we don't already have one, else return a .""" if "+" in pieces.get("closest-tag", ""): return "." return "+" -def render_pep440(pieces): +def render_pep440(pieces: Dict[str, Any]) -> str: """Build up version string, with post-release "local version identifier". Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you @@ -1258,23 +1568,71 @@ def render_pep440(pieces): return rendered -def render_pep440_pre(pieces): - """TAG[.post.devDISTANCE] -- No -dirty. +def render_pep440_branch(pieces: Dict[str, Any]) -> str: + """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] . + + The ".dev0" means not master branch. Note that .dev0 sorts backwards + (a feature branch will appear "older" than the master branch). Exceptions: - 1: no tags. 0.post.devDISTANCE + 1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0" + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += "+untagged.%d.g%s" % (pieces["distance"], + pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def pep440_split_post(ver: str) -> Tuple[str, Optional[int]]: + """Split pep440 version string at the post-release segment. + + Returns the release segments before the post-release and the + post-release version number (or -1 if no post-release segment is present). + """ + vc = str.split(ver, ".post") + return vc[0], int(vc[1] or 0) if len(vc) == 2 else None + + +def render_pep440_pre(pieces: Dict[str, Any]) -> str: + """TAG[.postN.devDISTANCE] -- No -dirty. + + Exceptions: + 1: no tags. 0.post0.devDISTANCE + """ + if pieces["closest-tag"]: if pieces["distance"]: - rendered += ".post.dev%d" % pieces["distance"] + # update the post release segment + tag_version, post_version = pep440_split_post(pieces["closest-tag"]) + rendered = tag_version + if post_version is not None: + rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"]) + else: + rendered += ".post0.dev%d" % (pieces["distance"]) + else: + # no commits, use the tag as the version + rendered = pieces["closest-tag"] else: # exception #1 - rendered = "0.post.dev%d" % pieces["distance"] + rendered = "0.post0.dev%d" % pieces["distance"] return rendered -def render_pep440_post(pieces): +def render_pep440_post(pieces: Dict[str, Any]) -> str: """TAG[.postDISTANCE[.dev0]+gHEX] . The ".dev0" means dirty. Note that .dev0 sorts backwards @@ -1301,12 +1659,41 @@ def render_pep440_post(pieces): return rendered -def render_pep440_old(pieces): +def render_pep440_post_branch(pieces: Dict[str, Any]) -> str: + """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] . + + The ".dev0" means not master branch. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%d" % pieces["distance"] + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "g%s" % pieces["short"] + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0.post%d" % pieces["distance"] + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += "+g%s" % pieces["short"] + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def render_pep440_old(pieces: Dict[str, Any]) -> str: """TAG[.postDISTANCE[.dev0]] . The ".dev0" means dirty. - Eexceptions: + Exceptions: 1: no tags. 0.postDISTANCE[.dev0] """ if pieces["closest-tag"]: @@ -1323,7 +1710,7 @@ def render_pep440_old(pieces): return rendered -def render_git_describe(pieces): +def render_git_describe(pieces: Dict[str, Any]) -> str: """TAG[-DISTANCE-gHEX][-dirty]. Like 'git describe --tags --dirty --always'. @@ -1343,7 +1730,7 @@ def render_git_describe(pieces): return rendered -def render_git_describe_long(pieces): +def render_git_describe_long(pieces: Dict[str, Any]) -> str: """TAG-DISTANCE-gHEX[-dirty]. Like 'git describe --tags --dirty --always -long'. @@ -1363,7 +1750,7 @@ def render_git_describe_long(pieces): return rendered -def render(pieces, style): +def render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]: """Render the given version pieces into the requested style.""" if pieces["error"]: return {"version": "unknown", @@ -1377,10 +1764,14 @@ def render(pieces, style): if style == "pep440": rendered = render_pep440(pieces) + elif style == "pep440-branch": + rendered = render_pep440_branch(pieces) elif style == "pep440-pre": rendered = render_pep440_pre(pieces) elif style == "pep440-post": rendered = render_pep440_post(pieces) + elif style == "pep440-post-branch": + rendered = render_pep440_post_branch(pieces) elif style == "pep440-old": rendered = render_pep440_old(pieces) elif style == "git-describe": @@ -1399,7 +1790,7 @@ class VersioneerBadRootError(Exception): """The project root directory is unknown or missing key files.""" -def get_versions(verbose=False): +def get_versions(verbose: bool = False) -> Dict[str, Any]: """Get the project version from whatever source is available. Returns dict with two keys: 'version' and 'full'. @@ -1414,7 +1805,7 @@ def get_versions(verbose=False): assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg" handlers = HANDLERS.get(cfg.VCS) assert handlers, "unrecognized VCS '%s'" % cfg.VCS - verbose = verbose or cfg.verbose + verbose = verbose or bool(cfg.verbose) # `bool()` used to avoid `None` assert cfg.versionfile_source is not None, \ "please set versioneer.versionfile_source" assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix" @@ -1475,13 +1866,17 @@ def get_versions(verbose=False): "date": None} -def get_version(): +def get_version() -> str: """Get the short version string for this project.""" return get_versions()["version"] -def get_cmdclass(): - """Get the custom setuptools/distutils subclasses used by Versioneer.""" +def get_cmdclass(cmdclass: Optional[Dict[str, Any]] = None): + """Get the custom setuptools subclasses used by Versioneer. + + If the package uses a different cmdclass (e.g. one from numpy), it + should be provide as an argument. + """ if "versioneer" in sys.modules: del sys.modules["versioneer"] # this fixes the "python setup.py develop" case (also 'install' and @@ -1495,25 +1890,25 @@ def get_cmdclass(): # parent is protected against the child's "import versioneer". By # removing ourselves from sys.modules here, before the child build # happens, we protect the child from the parent's versioneer too. - # Also see https://github.com/warner/python-versioneer/issues/52 + # Also see https://github.com/python-versioneer/python-versioneer/issues/52 - cmds = {} + cmds = {} if cmdclass is None else cmdclass.copy() - # we add "version" to both distutils and setuptools - from distutils.core import Command + # we add "version" to setuptools + from setuptools import Command class cmd_version(Command): description = "report generated version string" - user_options = [] - boolean_options = [] + user_options: List[Tuple[str, str, str]] = [] + boolean_options: List[str] = [] - def initialize_options(self): + def initialize_options(self) -> None: pass - def finalize_options(self): + def finalize_options(self) -> None: pass - def run(self): + def run(self) -> None: vers = get_versions(verbose=True) print("Version: %s" % vers["version"]) print(" full-revisionid: %s" % vers.get("full-revisionid")) @@ -1523,7 +1918,7 @@ def run(self): print(" error: %s" % vers["error"]) cmds["version"] = cmd_version - # we override "build_py" in both distutils and setuptools + # we override "build_py" in setuptools # # most invocation pathways end up running build_py: # distutils/build -> build_py @@ -1538,18 +1933,25 @@ def run(self): # then does setup.py bdist_wheel, or sometimes setup.py install # setup.py egg_info -> ? + # pip install -e . and setuptool/editable_wheel will invoke build_py + # but the build_py command is not expected to copy any files. + # we override different "build_py" commands for both environments - if "setuptools" in sys.modules: - from setuptools.command.build_py import build_py as _build_py + if 'build_py' in cmds: + _build_py: Any = cmds['build_py'] else: - from distutils.command.build_py import build_py as _build_py + from setuptools.command.build_py import build_py as _build_py class cmd_build_py(_build_py): - def run(self): + def run(self) -> None: root = get_root() cfg = get_config_from_root(root) versions = get_versions() _build_py.run(self) + if getattr(self, "editable_mode", False): + # During editable installs `.py` and data files are + # not copied to build_lib + return # now locate _version.py in the new build/ directory and replace # it with an updated value if cfg.versionfile_build: @@ -1559,8 +1961,40 @@ def run(self): write_to_version_file(target_versionfile, versions) cmds["build_py"] = cmd_build_py + if 'build_ext' in cmds: + _build_ext: Any = cmds['build_ext'] + else: + from setuptools.command.build_ext import build_ext as _build_ext + + class cmd_build_ext(_build_ext): + def run(self) -> None: + root = get_root() + cfg = get_config_from_root(root) + versions = get_versions() + _build_ext.run(self) + if self.inplace: + # build_ext --inplace will only build extensions in + # build/lib<..> dir with no _version.py to write to. + # As in place builds will already have a _version.py + # in the module dir, we do not need to write one. + return + # now locate _version.py in the new build/ directory and replace + # it with an updated value + if not cfg.versionfile_build: + return + target_versionfile = os.path.join(self.build_lib, + cfg.versionfile_build) + if not os.path.exists(target_versionfile): + print(f"Warning: {target_versionfile} does not exist, skipping " + "version update. This can happen if you are running build_ext " + "without first running build_py.") + return + print("UPDATING %s" % target_versionfile) + write_to_version_file(target_versionfile, versions) + cmds["build_ext"] = cmd_build_ext + if "cx_Freeze" in sys.modules: # cx_freeze enabled? - from cx_Freeze.dist import build_exe as _build_exe + from cx_Freeze.dist import build_exe as _build_exe # type: ignore # nczeczulin reports that py2exe won't like the pep440-style string # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g. # setup(console=[{ @@ -1569,7 +2003,7 @@ def run(self): # ... class cmd_build_exe(_build_exe): - def run(self): + def run(self) -> None: root = get_root() cfg = get_config_from_root(root) versions = get_versions() @@ -1593,12 +2027,12 @@ def run(self): if 'py2exe' in sys.modules: # py2exe enabled? try: - from py2exe.distutils_buildexe import py2exe as _py2exe # py3 + from py2exe.setuptools_buildexe import py2exe as _py2exe # type: ignore except ImportError: - from py2exe.build_exe import py2exe as _py2exe # py2 + from py2exe.distutils_buildexe import py2exe as _py2exe # type: ignore class cmd_py2exe(_py2exe): - def run(self): + def run(self) -> None: root = get_root() cfg = get_config_from_root(root) versions = get_versions() @@ -1619,14 +2053,51 @@ def run(self): }) cmds["py2exe"] = cmd_py2exe + # sdist farms its file list building out to egg_info + if 'egg_info' in cmds: + _egg_info: Any = cmds['egg_info'] + else: + from setuptools.command.egg_info import egg_info as _egg_info + + class cmd_egg_info(_egg_info): + def find_sources(self) -> None: + # egg_info.find_sources builds the manifest list and writes it + # in one shot + super().find_sources() + + # Modify the filelist and normalize it + root = get_root() + cfg = get_config_from_root(root) + self.filelist.append('versioneer.py') + if cfg.versionfile_source: + # There are rare cases where versionfile_source might not be + # included by default, so we must be explicit + self.filelist.append(cfg.versionfile_source) + self.filelist.sort() + self.filelist.remove_duplicates() + + # The write method is hidden in the manifest_maker instance that + # generated the filelist and was thrown away + # We will instead replicate their final normalization (to unicode, + # and POSIX-style paths) + from setuptools import unicode_utils + normalized = [unicode_utils.filesys_decode(f).replace(os.sep, '/') + for f in self.filelist.files] + + manifest_filename = os.path.join(self.egg_info, 'SOURCES.txt') + with open(manifest_filename, 'w') as fobj: + fobj.write('\n'.join(normalized)) + + cmds['egg_info'] = cmd_egg_info + # we override different "sdist" commands for both environments - if "setuptools" in sys.modules: - from setuptools.command.sdist import sdist as _sdist + if 'sdist' in cmds: + _sdist: Any = cmds['sdist'] else: - from distutils.command.sdist import sdist as _sdist + from setuptools.command.sdist import sdist as _sdist class cmd_sdist(_sdist): - def run(self): + def run(self) -> None: versions = get_versions() self._versioneer_generated_versions = versions # unless we update this, the command will keep using the old @@ -1634,7 +2105,7 @@ def run(self): self.distribution.metadata.version = versions["version"] return _sdist.run(self) - def make_release_tree(self, base_dir, files): + def make_release_tree(self, base_dir: str, files: List[str]) -> None: root = get_root() cfg = get_config_from_root(root) _sdist.make_release_tree(self, base_dir, files) @@ -1687,21 +2158,26 @@ def make_release_tree(self, base_dir, files): """ -INIT_PY_SNIPPET = """ +OLD_SNIPPET = """ from ._version import get_versions __version__ = get_versions()['version'] del get_versions """ +INIT_PY_SNIPPET = """ +from . import {0} +__version__ = {0}.get_versions()['version'] +""" -def do_setup(): - """Main VCS-independent setup function for installing Versioneer.""" + +def do_setup() -> int: + """Do main VCS-independent setup function for installing Versioneer.""" root = get_root() try: cfg = get_config_from_root(root) - except (EnvironmentError, configparser.NoSectionError, + except (OSError, configparser.NoSectionError, configparser.NoOptionError) as e: - if isinstance(e, (EnvironmentError, configparser.NoSectionError)): + if isinstance(e, (OSError, configparser.NoSectionError)): print("Adding sample versioneer config to setup.cfg", file=sys.stderr) with open(os.path.join(root, "setup.cfg"), "a") as f: @@ -1721,62 +2197,37 @@ def do_setup(): ipy = os.path.join(os.path.dirname(cfg.versionfile_source), "__init__.py") + maybe_ipy: Optional[str] = ipy if os.path.exists(ipy): try: with open(ipy, "r") as f: old = f.read() - except EnvironmentError: + except OSError: old = "" - if INIT_PY_SNIPPET not in old: + module = os.path.splitext(os.path.basename(cfg.versionfile_source))[0] + snippet = INIT_PY_SNIPPET.format(module) + if OLD_SNIPPET in old: + print(" replacing boilerplate in %s" % ipy) + with open(ipy, "w") as f: + f.write(old.replace(OLD_SNIPPET, snippet)) + elif snippet not in old: print(" appending to %s" % ipy) with open(ipy, "a") as f: - f.write(INIT_PY_SNIPPET) + f.write(snippet) else: print(" %s unmodified" % ipy) else: print(" %s doesn't exist, ok" % ipy) - ipy = None - - # Make sure both the top-level "versioneer.py" and versionfile_source - # (PKG/_version.py, used by runtime code) are in MANIFEST.in, so - # they'll be copied into source distributions. Pip won't be able to - # install the package without this. - manifest_in = os.path.join(root, "MANIFEST.in") - simple_includes = set() - try: - with open(manifest_in, "r") as f: - for line in f: - if line.startswith("include "): - for include in line.split()[1:]: - simple_includes.add(include) - except EnvironmentError: - pass - # That doesn't cover everything MANIFEST.in can do - # (http://docs.python.org/2/distutils/sourcedist.html#commands), so - # it might give some false negatives. Appending redundant 'include' - # lines is safe, though. - if "versioneer.py" not in simple_includes: - print(" appending 'versioneer.py' to MANIFEST.in") - with open(manifest_in, "a") as f: - f.write("include versioneer.py\n") - else: - print(" 'versioneer.py' already in MANIFEST.in") - if cfg.versionfile_source not in simple_includes: - print(" appending versionfile_source ('%s') to MANIFEST.in" % - cfg.versionfile_source) - with open(manifest_in, "a") as f: - f.write("include %s\n" % cfg.versionfile_source) - else: - print(" versionfile_source already in MANIFEST.in") + maybe_ipy = None # Make VCS-specific changes. For git, this means creating/changing # .gitattributes to mark _version.py for export-subst keyword # substitution. - do_vcs_install(manifest_in, cfg.versionfile_source, ipy) + do_vcs_install(cfg.versionfile_source, maybe_ipy) return 0 -def scan_setup_py(): +def scan_setup_py() -> int: """Validate the contents of setup.py against Versioneer's expectations.""" found = set() setters = False @@ -1813,10 +2264,14 @@ def scan_setup_py(): return errors +def setup_command() -> NoReturn: + """Set up Versioneer and exit with appropriate error code.""" + errors = do_setup() + errors += scan_setup_py() + sys.exit(1 if errors else 0) + + if __name__ == "__main__": cmd = sys.argv[1] if cmd == "setup": - errors = do_setup() - errors += scan_setup_py() - if errors: - sys.exit(1) + setup_command() From fcd140ab0275e19c92903a681a64fe18d3578865 Mon Sep 17 00:00:00 2001 From: Daniel Standage Date: Mon, 28 Apr 2025 14:22:15 -0400 Subject: [PATCH 02/11] Integrate happer as a subpackage --- Makefile | 4 +- microhapulator/__init__.py | 1 + microhapulator/happer/__init__.py | 19 ++++ microhapulator/happer/__main__.py | 51 +++++++++ microhapulator/happer/allele.py | 105 ++++++++++++++++++ microhapulator/happer/mutablestring.py | 69 ++++++++++++ microhapulator/happer/mutate.py | 95 ++++++++++++++++ microhapulator/happer/seqio.py | 43 +++++++ microhapulator/happer/tests/__init__.py | 17 +++ .../happer/tests/data/haplo-test1.bed | 5 + microhapulator/happer/tests/data/ind1.bed | 6 + .../happer/tests/data/pico-hapl-1.bed | 3 + microhapulator/happer/tests/data/pico-refr.fa | 13 +++ .../happer/tests/data/ploidy-mismatch.bed | 3 + microhapulator/happer/tests/test_allele.py | 104 +++++++++++++++++ .../happer/tests/test_mutablestring.py | 49 ++++++++ microhapulator/happer/tests/test_mutate.py | 90 +++++++++++++++ microhapulator/happer/tests/test_seqio.py | 53 +++++++++ microhapulator/profile.py | 2 +- setup.py | 9 +- 20 files changed, 736 insertions(+), 5 deletions(-) create mode 100644 microhapulator/happer/__init__.py create mode 100644 microhapulator/happer/__main__.py create mode 100644 microhapulator/happer/allele.py create mode 100644 microhapulator/happer/mutablestring.py create mode 100644 microhapulator/happer/mutate.py create mode 100644 microhapulator/happer/seqio.py create mode 100644 microhapulator/happer/tests/__init__.py create mode 100644 microhapulator/happer/tests/data/haplo-test1.bed create mode 100644 microhapulator/happer/tests/data/ind1.bed create mode 100644 microhapulator/happer/tests/data/pico-hapl-1.bed create mode 100644 microhapulator/happer/tests/data/pico-refr.fa create mode 100644 microhapulator/happer/tests/data/ploidy-mismatch.bed create mode 100644 microhapulator/happer/tests/test_allele.py create mode 100644 microhapulator/happer/tests/test_mutablestring.py create mode 100644 microhapulator/happer/tests/test_mutate.py create mode 100644 microhapulator/happer/tests/test_seqio.py diff --git a/Makefile b/Makefile index d80c1f3..60dd4d7 100644 --- a/Makefile +++ b/Makefile @@ -35,10 +35,10 @@ clean: ## style: check code style vs Black style: - black --line-length=99 --check microhapulator/*.py microhapulator/*/*.py setup.py + black --line-length=99 --check microhapulator/*.py microhapulator/*/*.py microhapulator/*/*/*.py setup.py snakefmt --line-length=99 --check microhapulator/workflows/*.smk ## format: autoformat Python code format: - black --line-length=99 microhapulator/*.py microhapulator/*/*.py setup.py + black --line-length=99 microhapulator/*.py microhapulator/*/*.py microhapulator/*/*/*.py setup.py snakefmt --line-length=99 microhapulator/workflows/*.smk diff --git a/microhapulator/__init__.py b/microhapulator/__init__.py index 7c73612..ca76370 100644 --- a/microhapulator/__init__.py +++ b/microhapulator/__init__.py @@ -21,3 +21,4 @@ from . import profile from . import api from . import cli +from . import happer diff --git a/microhapulator/happer/__init__.py b/microhapulator/happer/__init__.py new file mode 100644 index 0000000..7d194ca --- /dev/null +++ b/microhapulator/happer/__init__.py @@ -0,0 +1,19 @@ +# ------------------------------------------------------------------------------------------------- +# Copyright (c) 2018, DHS. +# +# This file is part of MicroHapulator (https://github.com/bioforensics/microhapulator) and is +# licensed under the BSD license: see LICENSE.txt. +# +# This software was prepared for the Department of Homeland Security (DHS) by the Battelle National +# Biodefense Institute, LLC (BNBI) as part of contract HSHQDC-15-C-00064 to manage and operate the +# National Biodefense Analysis and Countermeasures Center (NBACC), a Federally Funded Research and +# Development Center. +# ------------------------------------------------------------------------------------------------- + +from .allele import Allele +from .mutablestring import MutableString +from . import mutate +from . import seqio +from . import tests +from . import __main__ +from .__main__ import get_parser diff --git a/microhapulator/happer/__main__.py b/microhapulator/happer/__main__.py new file mode 100644 index 0000000..25b43cc --- /dev/null +++ b/microhapulator/happer/__main__.py @@ -0,0 +1,51 @@ +# ------------------------------------------------------------------------------------------------- +# Copyright (c) 2018, DHS. +# +# This file is part of MicroHapulator (https://github.com/bioforensics/microhapulator) and is +# licensed under the BSD license: see LICENSE.txt. +# +# This software was prepared for the Department of Homeland Security (DHS) by the Battelle National +# Biodefense Institute, LLC (BNBI) as part of contract HSHQDC-15-C-00064 to manage and operate the +# National Biodefense Analysis and Countermeasures Center (NBACC), a Federally Funded Research and +# Development Center. +# ------------------------------------------------------------------------------------------------- + +import argparse +import happer +import sys + + +def get_parser(): + parser = argparse.ArgumentParser() + parser.add_argument( + "-v", "--version", action="version", version="happer v{}".format(happer.__version__) + ) + parser.add_argument( + "-o", + "--out", + metavar="FILE", + default="-", + help="write haplotype sequences to the specified " + "file; default is the terminal (stdout)", + ) + parser.add_argument("seqfile", help="input sequences in Fasta format") + parser.add_argument("bed", help="haplotypes annotated in BED format") + return parser + + +def main(args=None): + """Entry point for the happer CLI. + + Isolated as a method so that the CLI can be called by other Python code + (e.g. for testing), in which case the arguments are passed to the function. + If no arguments are passed to the function, parse them from the command + line. + """ + if args is None: # pragma: no cover + if len(sys.argv) == 1: + get_parser().parse_args(["-h"]) + args = get_parser().parse_args() + + versionmessage = "[happer] running version {}".format(happer.__version__) + print(versionmessage, file=sys.stderr) + happer.mutate.main(args) diff --git a/microhapulator/happer/allele.py b/microhapulator/happer/allele.py new file mode 100644 index 0000000..e8f054c --- /dev/null +++ b/microhapulator/happer/allele.py @@ -0,0 +1,105 @@ +# ------------------------------------------------------------------------------------------------- +# Copyright (c) 2018, DHS. +# +# This file is part of MicroHapulator (https://github.com/bioforensics/microhapulator) and is +# licensed under the BSD license: see LICENSE.txt. +# +# This software was prepared for the Department of Homeland Security (DHS) by the Battelle National +# Biodefense Institute, LLC (BNBI) as part of contract HSHQDC-15-C-00064 to manage and operate the +# National Biodefense Analysis and Countermeasures Center (NBACC), a Federally Funded Research and +# Development Center. +# ------------------------------------------------------------------------------------------------- + + +class InvalidGenomicCoordinateError(ValueError): + pass + + +class Allele(object): + """Class for handling alleles""" + + def __init__(self, seqid, start, end, alleleseq, refrseq=None): + """Constructor for allele objects. + + Genomic intervals should use half-open 0-indexed interval notation: + that is, the first nucleotide of a sequence has an index of 0, the + first 10 nucleotides are designated by the interval [0, 10), and the + next 10 nucleotides are designated by the interval [10, 20). + + For alleles associated with indels or structural variants, the length + of the alleleseq can be different from the start and end coordinates of + the allele, which always correspond to the reference sequence. + """ + self.seqid = seqid + self.start = start + self.end = end + self.seq = alleleseq + self.refr = refrseq + if self.end < self.start: + message = "allele end cannot be before allele start; " + message += "{:s}:{:d}-{:d}".format(seqid, start, end) + raise InvalidGenomicCoordinateError(message) + + def transform(self, offset): + """Apply an offset to this allele's coordinates. + + This can be helpful when converting between global (chromosome) + coordinates and local (locus-relative) coordinates. + """ + newstart = self.start + offset + newend = self.end + offset + if self.start + offset < 0: + message = "invalid allele transformation; " + message += "{:s}:{:d}-{:d}".format(self.seqid, newstart, newend) + raise InvalidGenomicCoordinateError(message) + self.start = newstart + self.end = newend + + @property + def slug(self): + return "{:s}:{:d}-{:d}".format(self.seqid, self.start, self.end) + + @property + def refrlength(self): + return self.end - self.start + + def __len__(self): + return len(self.seq) + + def __eq__(self, other): + return ( + self.seqid == other.seqid + and self.start == other.start + and self.end == other.end + and self.seq == other.seq + ) + + def __lt__(self, other): + if self.seqid != other.seqid: + return self.seqid < other.seqid + if self.start != other.start: + return self.start < other.start + if self.end != other.end: + return self.end < other.end + if len(self) != len(other): + return len(self) < len(other) + return self.seq < other.seq + + +def parse_alleles(instream): + """Read allele info from a BED-formatted input stream. + + The first 4 columns should be tab-delimited, and formatted as follows. + + chr1 7324977 7324978 A|T + chr1 7325021 7325022 C|T + chr1 7325106 7325107 G|C + + Any other columns, if present, are ignored. + """ + for line in instream: + if line.startswith("#") or line.strip() == "": + continue + seqid, start, end, alleles, *remainder = line.strip().split() + start, end = int(start), int(end) + yield tuple([Allele(seqid, start, end, a) for a in alleles.split("|")]) diff --git a/microhapulator/happer/mutablestring.py b/microhapulator/happer/mutablestring.py new file mode 100644 index 0000000..c29fea5 --- /dev/null +++ b/microhapulator/happer/mutablestring.py @@ -0,0 +1,69 @@ +# ------------------------------------------------------------------------------------------------- +# Copyright (c) 2018, DHS. +# +# This file is part of MicroHapulator (https://github.com/bioforensics/microhapulator) and is +# licensed under the BSD license: see LICENSE.txt. +# +# This software was prepared for the Department of Homeland Security (DHS) by the Battelle National +# Biodefense Institute, LLC (BNBI) as part of contract HSHQDC-15-C-00064 to manage and operate the +# National Biodefense Analysis and Countermeasures Center (NBACC), a Federally Funded Research and +# Development Center. +# ------------------------------------------------------------------------------------------------- + + +class MutableString(object): + """Mutable string class + + A string class that supports editing of the string contents without + creating a new copy of the string with each edit. The string is stored + internally as a list of chararacters. Despite the overhead in converting + strings to lists and back to strings when edits are done, the overall + approach provides substantial performance improvements when, for example, + applying mutations to a long DNA sequence. + + Borrows heavily from https://stackoverflow.com/a/10572792/459780. + """ + + def __init__(self, data): + self.data = list(data) + + def __str__(self): + return "".join(self.data) + + def __repr__(self): + return str(self) + + def __eq__(self, other): + return str(self) == str(other) + + def __add__(self, chars): + """Addition operator + + For consistency with other Python objects, the addition operator + creates a new object rather than appending in place. However, this + makes a copy of the data which goes against the performance this data + structure optimizes for. + """ + newdata = list(self.data) + list(str(chars)) + return MutableString("".join(newdata)) + + def __iadd__(self, chars): + self.data.extend(list(str(chars))) + return self + + def __contains__(self, teststr): + return teststr in str(self) + + def __setitem__(self, index, value): + self.data[index] = value + + def __getitem__(self, index): + if type(index) == slice: + return "".join(self.data[index]) + return self.data[index] + + def __delitem__(self, index): + del self.data[index] + + def __len__(self): + return len(self.data) diff --git a/microhapulator/happer/mutate.py b/microhapulator/happer/mutate.py new file mode 100644 index 0000000..99bac48 --- /dev/null +++ b/microhapulator/happer/mutate.py @@ -0,0 +1,95 @@ +# ------------------------------------------------------------------------------------------------- +# Copyright (c) 2018, DHS. +# +# This file is part of MicroHapulator (https://github.com/bioforensics/microhapulator) and is +# licensed under the BSD license: see LICENSE.txt. +# +# This software was prepared for the Department of Homeland Security (DHS) by the Battelle National +# Biodefense Institute, LLC (BNBI) as part of contract HSHQDC-15-C-00064 to manage and operate the +# National Biodefense Analysis and Countermeasures Center (NBACC), a Federally Funded Research and +# Development Center. +# ------------------------------------------------------------------------------------------------- + +import happer +from happer import MutableString +import re + + +class PloidyMismatchError(ValueError): + pass + + +def populate_haplotype_index(allelestream): + """Parse a BED file to populate an index of haplotypes. + + Returns a tuple containing the ploidy and the haplotype index. + + The index is a dictionary mapping each sequence ID to a list of haplotypes + for the corresponding sequence. Each list then contains nested lists of + alleles representing each haplotype. The nested lists are sorted by genomic + position. + + haplotypes = { + 'chr1': [ + [AlleleObj1a, AlleleObj1b, ..., AlleleObj1N], # haplotype 1 + [AlleleObj2a, AlleleObj2b, ..., AlleleObj2N], # haplotype 2 + ], + 'chr2': [ + [AlleleObj3a, AlleleObj3b, ..., AlleleObj3N], # haplotype 1 + [AlleleObj4a, AlleleObj3b, ..., AlleleObj3N], # haplotype 2 + ], + ... + } + """ + n = None + haplotypes = dict() + for genotype in allelestream: + seqid = genotype[0].seqid + n = len(genotype) + if seqid not in haplotypes: + haplotypes[seqid] = list() + while len(haplotypes[seqid]) < n: + haplotypes[seqid].append(list()) + else: + testn = len(haplotypes[seqid]) + if n != testn: + message = "ploidy confusion: {:d} vs {:d}".format(n, testn) + raise PloidyMismatchError(message) + for index, allele in zip(haplotypes[seqid], genotype): + index.append(allele) + for seqid in haplotypes: + for hap in haplotypes[seqid]: + hap.sort() + if n is None: + raise ValueError("no input provided to populate haplotype index") + return n, haplotypes + + +def mutate(seqstream, alleles): + allelestream = happer.allele.parse_alleles(alleles) + ploidy, haplotypes = populate_haplotype_index(allelestream) + + for defline, sequence in seqstream: + haploseqs = list() + while len(haploseqs) < ploidy: + haploseqs.append(MutableString(sequence)) + seqid = defline.strip().split()[0] + if seqid in haplotypes: + seqhaps = haplotypes[seqid] + for sequence, allelelist in zip(haploseqs, seqhaps): + for allele in reversed(allelelist): + sequence[allele.start : allele.end] = allele.seq + for n, hs in enumerate(haploseqs, 1): + newdefline = re.sub(r"^(\S+)", r"\1:hap{}".format(n), defline) + yield newdefline, str(hs) + + +def main(args): + seq = open(args.seqfile, "r") + als = open(args.bed, "r") + out = open(args.out, "w") + seqstream = happer.seqio.parse_fasta(seq) + for defline, haploseq in mutate(seqstream, als): + print(">", defline, sep="", file=out) + happer.seqio.format(haploseq, out) + out.close() diff --git a/microhapulator/happer/seqio.py b/microhapulator/happer/seqio.py new file mode 100644 index 0000000..78bc13c --- /dev/null +++ b/microhapulator/happer/seqio.py @@ -0,0 +1,43 @@ +# ------------------------------------------------------------------------------------------------- +# Copyright (c) 2018, DHS. +# +# This file is part of MicroHapulator (https://github.com/bioforensics/microhapulator) and is +# licensed under the BSD license: see LICENSE.txt. +# +# This software was prepared for the Department of Homeland Security (DHS) by the Battelle National +# Biodefense Institute, LLC (BNBI) as part of contract HSHQDC-15-C-00064 to manage and operate the +# National Biodefense Analysis and Countermeasures Center (NBACC), a Federally Funded Research and +# Development Center. +# ------------------------------------------------------------------------------------------------- + +import sys + + +def parse_fasta(data): + """Load sequences in Fasta format. + + This generator function yields a tuple containing a defline and a sequence + for each record in the Fasta data. Stolen shamelessly from + http://stackoverflow.com/a/7655072/459780. + """ + defline, sequence = None, list() + for line in data: + line = line.rstrip() + if line.startswith(">"): + if defline: + yield (defline[1:], "".join(sequence)) + defline, sequence = line, list() + else: + sequence.append(line) + yield (defline[1:], "".join(sequence)) + + +def format(seq, outstream=sys.stdout, linewidth=70): + """Print the sequence to be legible in a human-readable width.""" + if linewidth == 0 or len(seq) < linewidth: + print(seq, file=outstream) + return + i = 0 + while i < len(seq): + print(seq[i : i + linewidth], file=outstream) + i += linewidth diff --git a/microhapulator/happer/tests/__init__.py b/microhapulator/happer/tests/__init__.py new file mode 100644 index 0000000..4058321 --- /dev/null +++ b/microhapulator/happer/tests/__init__.py @@ -0,0 +1,17 @@ +# ------------------------------------------------------------------------------------------------- +# Copyright (c) 2018, DHS. +# +# This file is part of MicroHapulator (https://github.com/bioforensics/microhapulator) and is +# licensed under the BSD license: see LICENSE.txt. +# +# This software was prepared for the Department of Homeland Security (DHS) by the Battelle National +# Biodefense Institute, LLC (BNBI) as part of contract HSHQDC-15-C-00064 to manage and operate the +# National Biodefense Analysis and Countermeasures Center (NBACC), a Federally Funded Research and +# Development Center. +# ------------------------------------------------------------------------------------------------- + +from importlib.resources import files + + +def data_file(path): + return files("microhapulator") / "happer" / "tests" / "data" / path diff --git a/microhapulator/happer/tests/data/haplo-test1.bed b/microhapulator/happer/tests/data/haplo-test1.bed new file mode 100644 index 0000000..3e0d50a --- /dev/null +++ b/microhapulator/happer/tests/data/haplo-test1.bed @@ -0,0 +1,5 @@ +chr17 36385017 36385018 G|A +chr17 40212446 40212447 C|G +chr17 3547690 3547691 A|G +chr17 9090635 9090636 T|A +chr17 47453988 47453989 G|C diff --git a/microhapulator/happer/tests/data/ind1.bed b/microhapulator/happer/tests/data/ind1.bed new file mode 100644 index 0000000..38e99c5 --- /dev/null +++ b/microhapulator/happer/tests/data/ind1.bed @@ -0,0 +1,6 @@ +# comment line +chr1 55 56 A|C +chr1 66 67 C|G ThisShouldBeIgnored + +chr1 77 78 G|T +chr1 88 89 T|A diff --git a/microhapulator/happer/tests/data/pico-hapl-1.bed b/microhapulator/happer/tests/data/pico-hapl-1.bed new file mode 100644 index 0000000..1747135 --- /dev/null +++ b/microhapulator/happer/tests/data/pico-hapl-1.bed @@ -0,0 +1,3 @@ +chr1 70 71 A|C CAACCTTACGATCTA|CAACCTTCCGATCTA +chr1 140 141 A|G GGGATAGACCCGTGG|GGGATAGGCCCGTGG +chr3 20 21 G|T TGGACCGCATTGCAG|TGGACCTCATTGCAG diff --git a/microhapulator/happer/tests/data/pico-refr.fa b/microhapulator/happer/tests/data/pico-refr.fa new file mode 100644 index 0000000..99eab83 --- /dev/null +++ b/microhapulator/happer/tests/data/pico-refr.fa @@ -0,0 +1,13 @@ +>chr1 +CGACGTAGGGCACTCCATCGAACCAGTGGAAGATATTCGCGACGTATTGCATGGTTAATATATCAACCTT +GCGATCTATCCGATGTTAATCTGTCTATTGGCTTAAGCTGGACTGTTCAACTAGCTTGCACAGGGGATAG +ACCCGTGGT +>chr2 +GAGGCTCACAAAGCTGGGGGACCGTGCGTCATCCAAAATAGTAGGCTTATTTCGCAGGTCAACGCAGGCA +TCTTAGGGTAAAAGCCTTCGCGACTTATGTGCATTGGGACTCGGAAATTTCCTATCGTGATCGCGATCCT +CGACAGTTG +>chr3 +TCTCTAATATCTTATGGACCCCATTGCAGTGGGCCGTGCTGGACACTTCTCAGCAGATGCTGTCCTATCT +TGAATAAAACCGTGCCGCCGTCAAGACATATCACCTTCTAAGTTGTGATCTCGACGAACTTCCCCGCTCG +GTGAGACAGTTTATAGGTCCGTTCTCAAACGTGTACATTTTGACCACCGTGTGTCCGGTGACGCTATCTA +TAAGCCATC diff --git a/microhapulator/happer/tests/data/ploidy-mismatch.bed b/microhapulator/happer/tests/data/ploidy-mismatch.bed new file mode 100644 index 0000000..37cae81 --- /dev/null +++ b/microhapulator/happer/tests/data/ploidy-mismatch.bed @@ -0,0 +1,3 @@ +chr1 70 71 A|C +chr1 140 141 A|G|T +chr3 20 21 G|T diff --git a/microhapulator/happer/tests/test_allele.py b/microhapulator/happer/tests/test_allele.py new file mode 100644 index 0000000..bb44eca --- /dev/null +++ b/microhapulator/happer/tests/test_allele.py @@ -0,0 +1,104 @@ +# ------------------------------------------------------------------------------------------------- +# Copyright (c) 2018, DHS. +# +# This file is part of MicroHapulator (https://github.com/bioforensics/microhapulator) and is +# licensed under the BSD license: see LICENSE.txt. +# +# This software was prepared for the Department of Homeland Security (DHS) by the Battelle National +# Biodefense Institute, LLC (BNBI) as part of contract HSHQDC-15-C-00064 to manage and operate the +# National Biodefense Analysis and Countermeasures Center (NBACC), a Federally Funded Research and +# Development Center. +# ------------------------------------------------------------------------------------------------- + +from microhapulator.happer.allele import Allele, InvalidGenomicCoordinateError, parse_alleles +from microhapulator.happer.tests import data_file +import pytest +from random import shuffle + + +@pytest.fixture +def allele1(): + return Allele("contig1776", 2468, 2469, "A") + + +@pytest.fixture +def allele2(): + return Allele("contig1776", 2468, 2469, "CAT", refrseq="G") + + +@pytest.fixture +def allele3(): + return Allele("contig1812", 13579, 13580, "C", refrseq="T") + + +@pytest.fixture +def allele4(): + return Allele("contig1812", 13579, 13580, "G", refrseq="T") + + +@pytest.fixture +def allele5(): + return Allele("contig1776", 123456788, 123456789, "T") + + +@pytest.fixture +def allele6(): + return Allele("chr17", 1944, 1945, "GATTACA") + + +def test_basic(allele1): + assert allele1.seqid == "contig1776" + assert allele1.start == 2468 + assert allele1.end == 2469 + assert allele1.seq == "A" + assert allele1.refr is None + assert len(allele1) == 1 + assert allele1.refrlength == 1 + + +def test_bad_coord(): + with pytest.raises(InvalidGenomicCoordinateError) as e: + Allele("scaffold1492", 3000, 2999, "C") + assert "allele end cannot be before allele start" in str(e) + + +def test_indel(allele2): + assert allele2.refrlength == 1 + assert len(allele2) == 3 + assert allele2.refr == "G" + assert allele2 < Allele("contig1776", 2468, 2500, "ATG") + + +def test_transform(allele3, allele4): + allele3.transform(100) + assert allele3.start == 13679 + assert allele3.end == 13680 + + allele4.transform(-1000) + assert allele4.start == 12579 + assert allele4.end == 12580 + + with pytest.raises(InvalidGenomicCoordinateError) as e: + allele3.transform(-20000) + assert "invalid allele transformation" in str(e) + + +def test_compare(allele1, allele2, allele3, allele4, allele5, allele6): + assert allele1 == Allele("contig1776", 2468, 2469, "A", refrseq="G") + assert allele3 < allele4 + for _ in range(5): + alleles = [allele2, allele4, allele3, allele1, allele6, allele5] + shuffle(alleles) + sa = sorted(alleles) + print([a.slug for a in sa]) + assert sa == [allele6, allele1, allele2, allele5, allele3, allele4] + + +def test_parse_alleles(): + infile = data_file("ind1.bed") + with open(infile, "r") as instream: + genotypes = list(parse_alleles(instream)) + assert len(genotypes) == 4 + assert [len(g) for g in genotypes] == [2, 2, 2, 2] + assert genotypes[0][0].slug == "chr1:55-56" + assert genotypes[3][1].slug == "chr1:88-89" diff --git a/microhapulator/happer/tests/test_mutablestring.py b/microhapulator/happer/tests/test_mutablestring.py new file mode 100644 index 0000000..b8cc686 --- /dev/null +++ b/microhapulator/happer/tests/test_mutablestring.py @@ -0,0 +1,49 @@ +# ------------------------------------------------------------------------------------------------- +# Copyright (c) 2018, DHS. +# +# This file is part of MicroHapulator (https://github.com/bioforensics/microhapulator) and is +# licensed under the BSD license: see LICENSE.txt. +# +# This software was prepared for the Department of Homeland Security (DHS) by the Battelle National +# Biodefense Institute, LLC (BNBI) as part of contract HSHQDC-15-C-00064 to manage and operate the +# National Biodefense Analysis and Countermeasures Center (NBACC), a Federally Funded Research and +# Development Center. +# ------------------------------------------------------------------------------------------------- + +from microhapulator.happer import MutableString +import pytest + + +@pytest.mark.parametrize( + "thestring", [("GATTACA"), ("ATGNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNTGA"), ("MAHERSHALALHASHBAZ")] +) +def test_basic(thestring): + ms = MutableString(thestring) + assert str(ms) == thestring + assert repr(ms) == thestring + assert ms == thestring + assert len(ms) == len(thestring) + assert ms[3] == thestring[3] + assert ms[2:6] == thestring[2:6] + + +def test_add_ops(): + ms = MutableString("HOWGOESIT") + assert ms + "MAN" == "HOWGOESITMAN" + assert ms + MutableString("MAN") == "HOWGOESITMAN" + + ms += "MANS!" + assert ms == "HOWGOESITMANS!" + assert "ITMANS" in ms + + ms[9:9] = "FELLOWHU" + assert ms == "HOWGOESITFELLOWHUMANS!" + + +def test_del_ops(): + ms = MutableString("Sesame seed buns") + del ms[15] + assert ms == "Sesame seed bun" + + del ms[7:12] + assert ms == "Sesame bun" diff --git a/microhapulator/happer/tests/test_mutate.py b/microhapulator/happer/tests/test_mutate.py new file mode 100644 index 0000000..9817559 --- /dev/null +++ b/microhapulator/happer/tests/test_mutate.py @@ -0,0 +1,90 @@ +# ------------------------------------------------------------------------------------------------- +# Copyright (c) 2018, DHS. +# +# This file is part of MicroHapulator (https://github.com/bioforensics/microhapulator) and is +# licensed under the BSD license: see LICENSE.txt. +# +# This software was prepared for the Department of Homeland Security (DHS) by the Battelle National +# Biodefense Institute, LLC (BNBI) as part of contract HSHQDC-15-C-00064 to manage and operate the +# National Biodefense Analysis and Countermeasures Center (NBACC), a Federally Funded Research and +# Development Center. +# ------------------------------------------------------------------------------------------------- + +from microhapulator import happer +from microhapulator.happer.mutate import PloidyMismatchError +from microhapulator.happer.tests import data_file +import pytest + + +def test_populate_hap_index(): + with open(data_file("haplo-test1.bed"), "r") as alsfile: + alleles = happer.allele.parse_alleles(alsfile) + ploidy, haplotypes = happer.mutate.populate_haplotype_index(alleles) + print(haplotypes) + assert ploidy == 2 + assert len(haplotypes) == 1 + assert "chr17" in haplotypes + assert len(haplotypes["chr17"]) == 2 + assert [len(hap) for hap in haplotypes["chr17"]] == [5, 5] + + +def test_empty_input(): + with pytest.raises(ValueError) as ve: + ploidy, hapl = happer.mutate.populate_haplotype_index("") + assert "no input provided to populate haplotype index" in str(ve) + + +def test_bad_ploidy(): + with open(data_file("ploidy-mismatch.bed"), "r") as alsfile: + with pytest.raises(PloidyMismatchError) as pme: + alleles = happer.allele.parse_alleles(alsfile) + pldy, hapl = happer.mutate.populate_haplotype_index(alleles) + assert "ploidy confusion" in str(pme) + + +def test_mutate_simple(): + with open(data_file("pico-refr.fa"), "r") as seqfile: + seqdata = [data for data in happer.seqio.parse_fasta(seqfile)] + sequences = [seq for label, seq in seqdata] + with open(data_file("pico-hapl-1.bed"), "r") as allelefile: + mutator = happer.mutate.mutate(seqdata, allelefile) + haploseqs = [sequence for label, sequence in mutator] + assert len(haploseqs) == 6 + assert "CAACCTTACGATCTA" in haploseqs[0] + assert "CAACCTTCCGATCTA" in haploseqs[1] + assert "GGGATAGACCCGTGG" in haploseqs[0] + assert "GGGATAGGCCCGTGG" in haploseqs[1] + assert haploseqs[2] == sequences[1] + assert haploseqs[3] == sequences[1] + assert "TGGACCGCATTGCAG" in haploseqs[4] + assert "TGGACCTCATTGCAG" in haploseqs[5] + + +@pytest.mark.parametrize( + "mainmethod", + [ + happer.mutate.main, + happer.__main__.main, + ], +) +def test_mutate_cli(mainmethod, tmp_path): + outfile = tmp_path / "out.fasta" + arglist = ["--out", outfile, data_file("pico-refr.fa"), data_file("pico-hapl-1.bed")] + arglist = map(str, arglist) + args = happer.get_parser().parse_args(arglist) + mainmethod(args) + outseqs = list(happer.seqio.parse_fasta(open(outfile, "r"))) + deflines = [label for label, sequence in outseqs] + sequences = [sequence for label, sequence in outseqs] + assert len(deflines) == 6 + assert len(sequences) == 6 + assert deflines == [ + "chr1:hap1", + "chr1:hap2", + "chr2:hap1", + "chr2:hap2", + "chr3:hap1", + "chr3:hap2", + ] + assert "CAACCTTACGATCTA" in sequences[0] + assert "TGGACCTCATTGCAG" in sequences[5] diff --git a/microhapulator/happer/tests/test_seqio.py b/microhapulator/happer/tests/test_seqio.py new file mode 100644 index 0000000..a463167 --- /dev/null +++ b/microhapulator/happer/tests/test_seqio.py @@ -0,0 +1,53 @@ +# ------------------------------------------------------------------------------------------------- +# Copyright (c) 2018, DHS. +# +# This file is part of MicroHapulator (https://github.com/bioforensics/microhapulator) and is +# licensed under the BSD license: see LICENSE.txt. +# +# This software was prepared for the Department of Homeland Security (DHS) by the Battelle National +# Biodefense Institute, LLC (BNBI) as part of contract HSHQDC-15-C-00064 to manage and operate the +# National Biodefense Analysis and Countermeasures Center (NBACC), a Federally Funded Research and +# Development Center. +# ------------------------------------------------------------------------------------------------- + +from microhapulator import happer +from io import StringIO + + +def test_parse_fasta(): + seq = ">seq1\nACGT\n>seq2 yo\nGATTACA\nGATTACA\n>seq3\tdescrip\nATGATGTGA" + seqstream = seq.split("\n") + recordstream = happer.seqio.parse_fasta(seqstream) + records = list(recordstream) + assert records == [ + ("seq1", "ACGT"), + ("seq2 yo", "GATTACAGATTACA"), + ("seq3\tdescrip", "ATGATGTGA"), + ] + + +def test_parse_fasta_single(): + seqstream = ">contig1\nATGNNNNNNNNNTGA".split("\n") + recordstream = happer.seqio.parse_fasta(seqstream) + records = list(recordstream) + assert records == [("contig1", "ATGNNNNNNNNNTGA")] + + +def test_format_seq(): + seq = "CAAAATGTCAGAGAAGTGTCGGACGTAGCCGACTAAAGGATCAGAGTGATCGTTGCGCGTGAGCGCT" + + out1 = StringIO() + happer.seqio.format(seq, out1, 0) + assert out1.getvalue() == seq + "\n" + + out2 = StringIO() + happer.seqio.format(seq, out2, 40) + for outputline in out2.getvalue().split("\n"): + assert len(outputline) <= 40 + + out3 = StringIO() + happer.seqio.format(seq, out3, 20) + formatseq = ( + "CAAAATGTCAGAGAAGTGTC\n" "GGACGTAGCCGACTAAAGGA\n" "TCAGAGTGATCGTTGCGCGT\n" "GAGCGCT\n" + ) + assert out3.getvalue() == formatseq diff --git a/microhapulator/profile.py b/microhapulator/profile.py index 8ac49b9..032ec7f 100644 --- a/microhapulator/profile.py +++ b/microhapulator/profile.py @@ -12,12 +12,12 @@ from collections import defaultdict -from happer.mutate import mutate from io import StringIO import json import jsonschema from microhapulator import __version__ from microhapulator import open as mhopen +from microhapulator.happer.mutate import mutate from numpy.random import choice from pkg_resources import resource_filename import pandas as pd diff --git a/setup.py b/setup.py index b33510c..2fd894d 100644 --- a/setup.py +++ b/setup.py @@ -29,6 +29,7 @@ packages=[ "microhapulator", "microhapulator.cli", + "microhapulator.happer", "microhapulator.pipe", "microhapulator.tests", ], @@ -43,7 +44,6 @@ include_package_data=True, install_requires=[ "biopython", - "happer>=0.1", "insilicoseq>=1.5.4,<2.0", "jsonschema>=4.0", "matplotlib>=3.0", @@ -59,7 +59,12 @@ "termgraph>=0.5", "tqdm>=4.0", ], - entry_points={"console_scripts": ["mhpl8r = microhapulator.cli:main"]}, + entry_points={ + "console_scripts": [ + "mhpl8r = microhapulator.cli:main", + "happer = microhapulator.happer.__main__:main", + ] + }, classifiers=[ "Environment :: Console", "Framework :: IPython", From b4adf349fd4fc7a8928d426aaaa9123a913749a3 Mon Sep 17 00:00:00 2001 From: Daniel Standage Date: Wed, 30 Apr 2025 11:32:26 -0400 Subject: [PATCH 03/11] Increase MicroHapDB min version to 0.12 --- CHANGELOG.md | 1 + setup.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1603cf6..259b9ec 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ This project adheres to [Semantic Versioning](http://semver.org/). ### Changed - Intermediate FASTQ files are now bgzip compressed to reduce storage requirements (#189). - Colons are now used instead of commas to separate SNP alleles in microhap alleles (#192). +- Implemented Python 3.12 support by integrating happer package and increasing minimum version of MicroHapDB dependency (#193). - Updated working directory organization to provide additional structure (#194). ### Fixed diff --git a/setup.py b/setup.py index 2fd894d..c3a167c 100644 --- a/setup.py +++ b/setup.py @@ -47,7 +47,7 @@ "insilicoseq>=1.5.4,<2.0", "jsonschema>=4.0", "matplotlib>=3.0", - "microhapdb>=0.10.1", + "microhapdb>=0.12", "multiqc>=1.14", "nbformat>=5.0,<5.6", "numpy>=1.19", From b43aca2e71559019acddd51cc49669b6e20d7678 Mon Sep 17 00:00:00 2001 From: Daniel Standage Date: Wed, 30 Apr 2025 11:55:08 -0400 Subject: [PATCH 04/11] Fix happer --- microhapulator/happer/__main__.py | 6 +++--- microhapulator/happer/mutate.py | 12 ++++++------ microhapulator/tests/__init__.py | 7 ++----- 3 files changed, 11 insertions(+), 14 deletions(-) diff --git a/microhapulator/happer/__main__.py b/microhapulator/happer/__main__.py index 25b43cc..2a22f1b 100644 --- a/microhapulator/happer/__main__.py +++ b/microhapulator/happer/__main__.py @@ -11,14 +11,14 @@ # ------------------------------------------------------------------------------------------------- import argparse -import happer +from microhapulator import __version__, happer import sys def get_parser(): parser = argparse.ArgumentParser() parser.add_argument( - "-v", "--version", action="version", version="happer v{}".format(happer.__version__) + "-v", "--version", action="version", version="happer v{}".format(__version__) ) parser.add_argument( "-o", @@ -46,6 +46,6 @@ def main(args=None): get_parser().parse_args(["-h"]) args = get_parser().parse_args() - versionmessage = "[happer] running version {}".format(happer.__version__) + versionmessage = "[happer] running version {}".format(__version__) print(versionmessage, file=sys.stderr) happer.mutate.main(args) diff --git a/microhapulator/happer/mutate.py b/microhapulator/happer/mutate.py index 99bac48..9e9dcf9 100644 --- a/microhapulator/happer/mutate.py +++ b/microhapulator/happer/mutate.py @@ -10,8 +10,9 @@ # Development Center. # ------------------------------------------------------------------------------------------------- -import happer -from happer import MutableString +from . import seqio +from .allele import parse_alleles +from .mutablestring import MutableString import re @@ -66,8 +67,7 @@ def populate_haplotype_index(allelestream): def mutate(seqstream, alleles): - allelestream = happer.allele.parse_alleles(alleles) - ploidy, haplotypes = populate_haplotype_index(allelestream) + ploidy, haplotypes = populate_haplotype_index(parse_alleles(alleles)) for defline, sequence in seqstream: haploseqs = list() @@ -88,8 +88,8 @@ def main(args): seq = open(args.seqfile, "r") als = open(args.bed, "r") out = open(args.out, "w") - seqstream = happer.seqio.parse_fasta(seq) + seqstream = seqio.parse_fasta(seq) for defline, haploseq in mutate(seqstream, als): print(">", defline, sep="", file=out) - happer.seqio.format(haploseq, out) + seqio.format(haploseq, out) out.close() diff --git a/microhapulator/tests/__init__.py b/microhapulator/tests/__init__.py index da688bc..d25a290 100644 --- a/microhapulator/tests/__init__.py +++ b/microhapulator/tests/__init__.py @@ -11,11 +11,8 @@ # ------------------------------------------------------------------------------------------------- -import os -from pkg_resources import resource_filename +from importlib.resources import files def data_file(path): - pathparts = path.split("/") - relpath = os.path.join("tests", "data", *pathparts) - return resource_filename("microhapulator", relpath) + return str(files("microhapulator") / "tests" / "data" / path) From d1e5a23c00879357188afa72e9fcdae213468385 Mon Sep 17 00:00:00 2001 From: Daniel Standage Date: Wed, 30 Apr 2025 12:58:42 -0400 Subject: [PATCH 05/11] Troubleshoot --- microhapulator/happer/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/microhapulator/happer/__init__.py b/microhapulator/happer/__init__.py index 7d194ca..97a0d1c 100644 --- a/microhapulator/happer/__init__.py +++ b/microhapulator/happer/__init__.py @@ -14,6 +14,5 @@ from .mutablestring import MutableString from . import mutate from . import seqio -from . import tests from . import __main__ from .__main__ import get_parser From 241ddea13bd5f37ee9934f272b56ddc15ceb6fc8 Mon Sep 17 00:00:00 2001 From: Daniel Standage Date: Wed, 30 Apr 2025 13:06:11 -0400 Subject: [PATCH 06/11] Troubleshoot --- microhapulator/profile.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/microhapulator/profile.py b/microhapulator/profile.py index 032ec7f..37a46a4 100644 --- a/microhapulator/profile.py +++ b/microhapulator/profile.py @@ -12,6 +12,7 @@ from collections import defaultdict +from importlib.resources import files from io import StringIO import json import jsonschema @@ -19,7 +20,6 @@ from microhapulator import open as mhopen from microhapulator.happer.mutate import mutate from numpy.random import choice -from pkg_resources import resource_filename import pandas as pd from pathlib import Path import sys @@ -32,7 +32,7 @@ class RandomMatchError(ValueError): def load_schema(): - with mhopen(resource_filename("microhapulator", "data/profile-schema.json"), "r") as fh: + with mhopen(files("microhapulator") / "data/profile-schema.json", "r") as fh: return json.load(fh) From 333b6b4c875a13fb8089bced1e6396ec42a38c12 Mon Sep 17 00:00:00 2001 From: Daniel Standage Date: Wed, 30 Apr 2025 13:18:51 -0400 Subject: [PATCH 07/11] Troubleshoot --- microhapulator/cli/getrefr.py | 7 +++---- microhapulator/cli/pipe.py | 8 ++++---- microhapulator/pipe/reporter.py | 6 +++--- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/microhapulator/cli/getrefr.py b/microhapulator/cli/getrefr.py index 1fa0e54..e046a79 100644 --- a/microhapulator/cli/getrefr.py +++ b/microhapulator/cli/getrefr.py @@ -12,8 +12,7 @@ from hashlib import sha1 -from pathlib import Path -from pkg_resources import resource_filename +from importlib.resources import files from subprocess import run import sys from tqdm import tqdm @@ -60,7 +59,7 @@ def subparser(subparsers): def main(args): url = "https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/001/405/GCA_000001405.15_GRCh38/seqs_for_alignment_pipelines.ucsc_ids/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna.gz" checksum = "70fb7af4dff26bffdf27dbef80caf1f0359d488f" - hg38path = Path(resource_filename("microhapulator", "data/hg38.fasta.gz")) + hg38path = files("microhapulator") / "data" / "hg38.fasta.gz" if download_is_needed(url, hg38path, checksum): print("[MicroHapulator] downloading GRCh38 reference genome", file=sys.stderr) with ProgressBar(unit="B", unit_scale=True, miniters=1, desc=hg38path.name) as pb: @@ -72,7 +71,7 @@ def main(args): ) if compute_shasum(hg38path) != checksum: raise ValueError(f"checksum failed for {str(hg38path)}") - index_path = Path(resource_filename("microhapulator", "data/hg38.mmi")) + index_path = files("microhapulator") / "data" / "hg38.mmi" if index_path.is_file(): print("[MicroHapulator] Minimap2 index present, good to go!", file=sys.stderr) else: diff --git a/microhapulator/cli/pipe.py b/microhapulator/cli/pipe.py index 854364c..bc434b2 100644 --- a/microhapulator/cli/pipe.py +++ b/microhapulator/cli/pipe.py @@ -13,10 +13,10 @@ from argparse import SUPPRESS from collections import defaultdict +from importlib.resources import files from microhapulator.marker import MicrohapIndex from os import cpu_count, symlink from pathlib import Path -from pkg_resources import resource_filename from shutil import copy from snakemake import snakemake import sys @@ -43,7 +43,7 @@ def main(args): thresh_gap_alert=args.gap_alert, hspace=args.hspace, ) - snakefile = resource_filename("microhapulator", "workflows/analysis.smk") + snakefile = files("microhapulator") / "workflows" / "analysis.smk" success = snakemake( snakefile, cores=args.threads, @@ -254,13 +254,13 @@ def subparser(subparsers): ) cli.add_argument( "--hg38", - default=resource_filename("microhapulator", "data/hg38.fasta.gz"), + default=files("microhapulator") / "data" / "hg38.fasta.gz", help=SUPPRESS, # Hidden option for testing purposes ) cli.add_argument( "--hg38idx", - default=resource_filename("microhapulator", "data/hg38.mmi"), + default=files("microhapulator") / "data" / "hg38.mmi", help=SUPPRESS, # Hidden option for testing purposes ) diff --git a/microhapulator/pipe/reporter.py b/microhapulator/pipe/reporter.py index 07bbc4f..baf6817 100644 --- a/microhapulator/pipe/reporter.py +++ b/microhapulator/pipe/reporter.py @@ -15,13 +15,13 @@ from .qcsummary import PairedReadQCSummary, SingleEndReadQCSummary from .typestats import TypingSummary from datetime import datetime +from importlib.resources import files from jinja2 import FileSystemLoader, Environment, Template import microhapulator from microhapulator.marker import MicrohapIndex import json import pandas as pd from pathlib import Path -from pkg_resources import resource_filename class OverviewReporter: @@ -45,7 +45,7 @@ def marker_names(self): return sample_rates.index def render(self): - template_loader = FileSystemLoader(resource_filename("microhapulator", "data")) + template_loader = FileSystemLoader(files("microhapulator") / "data") env = Environment(loader=template_loader) if self.reads_are_paired: template_file = "paired.html" @@ -89,7 +89,7 @@ def marker_names(self): return sample_rates.index def render(self): - templatefile = resource_filename("microhapulator", "data/marker_details_template.html") + templatefile = files("microhapulator") / "data" / "marker_details_template.html" with open(templatefile, "r") as fh: template = Template(fh.read()) output = template.render( From ff3189fab523a0eb8fe6d5e83fe04cdc637f1850 Mon Sep 17 00:00:00 2001 From: Daniel Standage Date: Wed, 30 Apr 2025 13:47:52 -0400 Subject: [PATCH 08/11] Troubleshoot --- microhapulator/tests/test_pipe.py | 2 ++ microhapulator/workflows/preproc-paired.smk | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/microhapulator/tests/test_pipe.py b/microhapulator/tests/test_pipe.py index ae969fe..ef73d12 100644 --- a/microhapulator/tests/test_pipe.py +++ b/microhapulator/tests/test_pipe.py @@ -67,9 +67,11 @@ def test_pipe_gbr_usc10(tmp_path): args = microhapulator.cli.get_parser().parse_args(arglist) microhapulator.cli.pipe.main(args) expected = SimulatedProfile(fromfile=data_file("prof/gbr-usc10-sim.json")) + print("DEBUGger", *list((tmp_path / "analysis" / "gbr-usc" / "03typing").glob("*")), sep="\n") observed = TypingResult( fromfile=tmp_path / "analysis" / "gbr-usc" / "03typing" / "gbr-usc-type.json" ) + assert False diff = list(mhapi.diff(observed, expected)) assert len(diff) == 0 report = tmp_path / "report" / "report.html" diff --git a/microhapulator/workflows/preproc-paired.smk b/microhapulator/workflows/preproc-paired.smk index 824d4bd..a66d127 100644 --- a/microhapulator/workflows/preproc-paired.smk +++ b/microhapulator/workflows/preproc-paired.smk @@ -72,7 +72,6 @@ rule filter_ambiguous: ambig_thresh=config["ambiguous_thresh"], out_prefix="analysis/{sample}/01preprocessing/{sample}", run: - print("DEBUG", wildcards.sample, input) ambig_filter = AmbigPairedReadFilter(*input, params.out_prefix, params.ambig_thresh) ambig_filter.filter() with open(output.counts, "w") as fh: From b1c3fc4c44dbe1e9ec0b649e47075f33a113ff66 Mon Sep 17 00:00:00 2001 From: Daniel Standage Date: Wed, 30 Apr 2025 14:08:32 -0400 Subject: [PATCH 09/11] Troubleshoot --- microhapulator/tests/test_pipe.py | 1 - microhapulator/workflows/preproc-paired.smk | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/microhapulator/tests/test_pipe.py b/microhapulator/tests/test_pipe.py index ef73d12..44d2256 100644 --- a/microhapulator/tests/test_pipe.py +++ b/microhapulator/tests/test_pipe.py @@ -67,7 +67,6 @@ def test_pipe_gbr_usc10(tmp_path): args = microhapulator.cli.get_parser().parse_args(arglist) microhapulator.cli.pipe.main(args) expected = SimulatedProfile(fromfile=data_file("prof/gbr-usc10-sim.json")) - print("DEBUGger", *list((tmp_path / "analysis" / "gbr-usc" / "03typing").glob("*")), sep="\n") observed = TypingResult( fromfile=tmp_path / "analysis" / "gbr-usc" / "03typing" / "gbr-usc-type.json" ) diff --git a/microhapulator/workflows/preproc-paired.smk b/microhapulator/workflows/preproc-paired.smk index a66d127..ade8c3b 100644 --- a/microhapulator/workflows/preproc-paired.smk +++ b/microhapulator/workflows/preproc-paired.smk @@ -40,7 +40,8 @@ rule fastqc: outfiles = sorted(Path(params.outdir).glob("*.html")) for end, outfile in enumerate(outfiles, 1): outfile = Path(outfile) - linkfile = f"{params.outdir}/R{end}-fastqc.html" + # Snakemake f-strings break with Python 3.12: https://github.com/snakemake/snakemake/issues/2648 + linkfile = params.outdir + "/R" + end + "-fastqc.html" symlink(outfile.name, linkfile) From ee336cdacdd0cd00b603a4dd28358736a9844102 Mon Sep 17 00:00:00 2001 From: Daniel Standage Date: Wed, 30 Apr 2025 14:21:52 -0400 Subject: [PATCH 10/11] Troubleshoot --- microhapulator/tests/test_pipe.py | 1 - microhapulator/workflows/preproc-paired.smk | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/microhapulator/tests/test_pipe.py b/microhapulator/tests/test_pipe.py index 44d2256..ae969fe 100644 --- a/microhapulator/tests/test_pipe.py +++ b/microhapulator/tests/test_pipe.py @@ -70,7 +70,6 @@ def test_pipe_gbr_usc10(tmp_path): observed = TypingResult( fromfile=tmp_path / "analysis" / "gbr-usc" / "03typing" / "gbr-usc-type.json" ) - assert False diff = list(mhapi.diff(observed, expected)) assert len(diff) == 0 report = tmp_path / "report" / "report.html" diff --git a/microhapulator/workflows/preproc-paired.smk b/microhapulator/workflows/preproc-paired.smk index ade8c3b..9568619 100644 --- a/microhapulator/workflows/preproc-paired.smk +++ b/microhapulator/workflows/preproc-paired.smk @@ -41,7 +41,7 @@ rule fastqc: for end, outfile in enumerate(outfiles, 1): outfile = Path(outfile) # Snakemake f-strings break with Python 3.12: https://github.com/snakemake/snakemake/issues/2648 - linkfile = params.outdir + "/R" + end + "-fastqc.html" + linkfile = "{}/R{}-fastqc.html".format(params.outdir, end) symlink(outfile.name, linkfile) From fbf2aa50870ed15f4bf6a59b2c5352b2b661271a Mon Sep 17 00:00:00 2001 From: Daniel Standage Date: Wed, 30 Apr 2025 14:34:19 -0400 Subject: [PATCH 11/11] Troubleshoot --- microhapulator/workflows/analysis.smk | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/microhapulator/workflows/analysis.smk b/microhapulator/workflows/analysis.smk index 0cbc3ed..7811ffc 100644 --- a/microhapulator/workflows/analysis.smk +++ b/microhapulator/workflows/analysis.smk @@ -269,7 +269,9 @@ rule plot_haplotype_calls: run: result = TypingResult(fromfile=input.result) mhapi.plot_haplotype_calls( - result, f"analysis/{wildcards.sample}/03typing/callplots", sample=wildcards.sample + result, + "analysis/{}/03typing/callplots".format(wildcards.sample), + sample=wildcards.sample, )